diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index fca2aa1e..d2077e65 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -87,9 +87,14 @@ jobs: services: redis: - image: redis:8.0-M03 + image: redis:8.2 ports: - 6379:6379 + options: >- + --health-cmd "redis-cli ping" + --health-interval 10s + --health-timeout 5s + --health-retries 5 steps: - uses: actions/checkout@v3 @@ -99,6 +104,40 @@ jobs: with: python-version: ${{ env.PYTHON_VERSION }} + # Start Agent Memory Server + - name: Start Agent Memory Server + env: + OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }} + run: | + # Start the Agent Memory Server + docker run -d \ + --name agent-memory-server \ + --network host \ + -e REDIS_URL=redis://localhost:6379 \ + -e OPENAI_API_KEY=$OPENAI_API_KEY \ + -e LOG_LEVEL=INFO \ + ghcr.io/redis/agent-memory-server:latest + + # Wait for memory server to be ready + echo "Waiting for Agent Memory Server to be ready..." + for i in {1..30}; do + if curl -f http://localhost:8000/health 2>/dev/null; then + echo "โœ… Agent Memory Server is ready!" + break + fi + echo "Waiting... ($i/30)" + sleep 2 + done + + # Show status but don't fail if server isn't ready + if curl -f http://localhost:8000/health 2>/dev/null; then + echo "โœ… Agent Memory Server is healthy" + else + echo "โš ๏ธ WARNING: Agent Memory Server may not be ready" + echo "Docker logs:" + docker logs agent-memory-server || true + fi + - name: Create and activate venv run: | python -m venv venv @@ -106,11 +145,22 @@ jobs: pip install --upgrade pip setuptools wheel pip install pytest nbval + # Install the redis-context-course package and its dependencies + cd python-recipes/context-engineering/reference-agent + pip install -e . + - name: Test notebook env: OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }} COHERE_API_KEY: ${{ secrets.COHERE_API_KEY }} + AGENT_MEMORY_URL: http://localhost:8000 + REDIS_URL: redis://localhost:6379 run: | echo "Testing notebook: ${{ matrix.notebook }}" source venv/bin/activate pytest --nbval-lax --disable-warnings "${{ matrix.notebook }}" + + - name: Show Agent Memory Server logs on failure + if: failure() + run: | + docker logs agent-memory-server diff --git a/08_vector_algorithm_benchmark.py b/08_vector_algorithm_benchmark.py new file mode 100644 index 00000000..6a4854ad --- /dev/null +++ b/08_vector_algorithm_benchmark.py @@ -0,0 +1,777 @@ +#!/usr/bin/env python3 +""" +Vector Algorithm Benchmark: FLAT vs HNSW vs SVS-VAMANA + +This script benchmarks FLAT, HNSW, and SVS-VAMANA vector search algorithms using +real data from Hugging Face across different embedding dimensions. + +What You'll Learn: +- Memory usage comparison across algorithms and dimensions +- Index creation performance with real text data +- Query performance and latency analysis +- Search quality with recall metrics on real embeddings +- Algorithm selection guidance based on your requirements + +Benchmark Configuration: +- Dataset: SQuAD (Stanford Question Answering Dataset) from Hugging Face +- Algorithms: FLAT, HNSW, SVS-VAMANA +- Dimensions: 384, 768, 1536 (native sentence-transformer embeddings) +- Dataset Size: 1,000 documents per dimension +- Query Set: 50 real questions per configuration +- Focus: Real-world performance with actual text embeddings + +Prerequisites: +- Redis Stack 8.2.0+ with RediSearch 2.8.10+ +""" + +# Import required libraries +import os +import json +import time +import psutil +import numpy as np +import pandas as pd +import matplotlib.pyplot as plt +import seaborn as sns +from typing import Dict, List, Tuple, Any +from dataclasses import dataclass +from collections import defaultdict + +# Redis and RedisVL imports +import redis +from redisvl.index import SearchIndex +from redisvl.query import VectorQuery +from redisvl.redis.utils import array_to_buffer, buffer_to_array +from redisvl.utils import CompressionAdvisor +from redisvl.redis.connection import supports_svs + +# Configuration +REDIS_URL = "redis://localhost:6379" +np.random.seed(42) # For reproducible results + +# Set up plotting style +plt.style.use('default') +sns.set_palette("husl") + +print("๐Ÿ“š Libraries imported successfully!") + +# Benchmark configuration +@dataclass +class BenchmarkConfig: + dimensions: List[int] + algorithms: List[str] + docs_per_dimension: int + query_count: int + +# Initialize benchmark configuration +config = BenchmarkConfig( + dimensions=[384, 768, 1536], + algorithms=['flat', 'hnsw', 'svs-vamana'], + docs_per_dimension=1000, + query_count=50 +) + +print( + "๐Ÿ”ง Benchmark Configuration:", + f"Dimensions: {config.dimensions}", + f"Algorithms: {config.algorithms}", + f"Documents per dimension: {config.docs_per_dimension:,}", + f"Test queries: {config.query_count}", + f"Total documents: {len(config.dimensions) * config.docs_per_dimension:,}", + f"Dataset: SQuAD from Hugging Face", + sep="\n" +) + +def verify_redis_connection(): + """Test Redis connection and capabilities""" + try: + client = redis.Redis.from_url(REDIS_URL) + client.ping() + + redis_info = client.info() + redis_version = redis_info['redis_version'] + + svs_supported = supports_svs(client) + + print( + "โœ… Redis connection successful", + f"๐Ÿ“Š Redis version: {redis_version}", + f"๐Ÿ”ง SVS-VAMANA supported: {'โœ… Yes' if svs_supported else 'โŒ No'}", + sep="\n" + ) + + if not svs_supported: + print("โš ๏ธ SVS-VAMANA not supported. Benchmark will skip SVS tests.") + config.algorithms = ['flat', 'hnsw'] # Remove SVS from tests + + return client + + except Exception as e: + print(f"โŒ Redis connection failed: {e}") + print("Please ensure Redis Stack is running on localhost:6379") + raise + +def load_squad_dataset(num_docs: int) -> List[Dict[str, Any]]: + """Load SQuAD dataset from Hugging Face""" + try: + from datasets import load_dataset + + print("๐Ÿ“ฅ Loading SQuAD dataset from Hugging Face...") + + # Load SQuAD dataset + dataset = load_dataset("squad", split="train") + + # Take a subset for our benchmark + dataset = dataset.select(range(min(num_docs, len(dataset)))) + + # Convert to our format + documents = [] + for i, item in enumerate(dataset): + # Combine question and context for richer text + text = f"{item['question']} {item['context']}" + + documents.append({ + 'doc_id': f'squad_{i:06d}', + 'title': item['title'], + 'question': item['question'], + 'context': item['context'][:500], # Truncate long contexts + 'text': text, + 'category': 'qa', # All are Q&A documents + 'score': 1.0 + }) + + print(f"โœ… Loaded {len(documents)} documents from SQuAD") + return documents + + except ImportError: + print("โš ๏ธ datasets library not available, falling back to local data") + return load_local_fallback_data(num_docs) + except Exception as e: + print(f"โš ๏ธ Failed to load SQuAD dataset: {e}") + print("Falling back to local data...") + return load_local_fallback_data(num_docs) + +def load_local_fallback_data(num_docs: int) -> List[Dict[str, Any]]: + """Fallback to local movie dataset if SQuAD is not available""" + try: + import json + with open('resources/movies.json', 'r') as f: + movies = json.load(f) + + # Expand the small movie dataset by duplicating with variations + documents = [] + for i in range(num_docs): + movie = movies[i % len(movies)] + documents.append({ + 'doc_id': f'movie_{i:06d}', + 'title': f"{movie['title']} (Variant {i // len(movies) + 1})", + 'question': f"What is {movie['title']} about?", + 'context': movie['description'], + 'text': f"What is {movie['title']} about? {movie['description']}", + 'category': movie['genre'], + 'score': movie['rating'] + }) + + print(f"โœ… Using local movie dataset: {len(documents)} documents") + return documents + + except Exception as e: + print(f"โŒ Failed to load local data: {e}") + raise + +def generate_embeddings_for_texts(texts: List[str], dimensions: int) -> np.ndarray: + """Generate embeddings for texts using sentence-transformers""" + try: + from sentence_transformers import SentenceTransformer + + # Choose model based on target dimensions + if dimensions == 384: + model_name = 'all-MiniLM-L6-v2' + elif dimensions == 768: + model_name = 'all-mpnet-base-v2' + elif dimensions == 1536: + # For 1536D, use gtr-t5-xl which produces native 1536D embeddings + model_name = 'sentence-transformers/gtr-t5-xl' + else: + model_name = 'all-MiniLM-L6-v2' # Default + + print(f"๐Ÿค– Generating {dimensions}D embeddings using {model_name}...") + + model = SentenceTransformer(model_name) + embeddings = model.encode(texts, convert_to_numpy=True, show_progress_bar=True) + + # Handle dimension adjustment + current_dims = embeddings.shape[1] + if current_dims < dimensions: + # Pad with small random values (better than zeros) + padding_size = dimensions - current_dims + padding = np.random.normal(0, 0.01, (embeddings.shape[0], padding_size)) + embeddings = np.concatenate([embeddings, padding], axis=1) + elif current_dims > dimensions: + # Truncate + embeddings = embeddings[:, :dimensions] + + # Normalize embeddings + norms = np.linalg.norm(embeddings, axis=1, keepdims=True) + embeddings = embeddings / norms + + print(f"โœ… Generated embeddings: {embeddings.shape}") + return embeddings.astype(np.float32) + + except ImportError: + print(f"โš ๏ธ sentence-transformers not available, using synthetic embeddings") + return generate_synthetic_embeddings(len(texts), dimensions) + except Exception as e: + print(f"โš ๏ธ Error generating embeddings: {e}") + print("Falling back to synthetic embeddings...") + return generate_synthetic_embeddings(len(texts), dimensions) + +def generate_synthetic_embeddings(num_docs: int, dimensions: int) -> np.ndarray: + """Generate synthetic embeddings as fallback""" + print(f"๐Ÿ”„ Generating {num_docs} synthetic {dimensions}D embeddings...") + + # Create base random vectors + embeddings = np.random.normal(0, 1, (num_docs, dimensions)).astype(np.float32) + + # Add some clustering structure + cluster_size = num_docs // 3 + embeddings[:cluster_size, :min(50, dimensions)] += 0.5 + embeddings[cluster_size:2*cluster_size, min(50, dimensions):min(100, dimensions)] += 0.5 + + # Normalize vectors + norms = np.linalg.norm(embeddings, axis=1, keepdims=True) + embeddings = embeddings / norms + + return embeddings + +def load_and_generate_embeddings(): + """Load real dataset and generate embeddings""" + print("๐Ÿ”„ Loading real dataset and generating embeddings...") + + # Load the base dataset once + raw_documents = load_squad_dataset(config.docs_per_dimension) + texts = [doc['text'] for doc in raw_documents] + + # Generate separate query texts (use questions from SQuAD) + query_texts = [doc['question'] for doc in raw_documents[:config.query_count]] + + benchmark_data = {} + query_data = {} + + for dim in config.dimensions: + print(f"\n๐Ÿ“Š Processing {dim}D embeddings...") + + # Generate embeddings for documents + embeddings = generate_embeddings_for_texts(texts, dim) + + # Generate embeddings for queries + query_embeddings = generate_embeddings_for_texts(query_texts, dim) + + # Combine documents with embeddings + documents = [] + for i, (doc, embedding) in enumerate(zip(raw_documents, embeddings)): + documents.append({ + **doc, + 'embedding': array_to_buffer(embedding, dtype='float32') + }) + + benchmark_data[dim] = documents + query_data[dim] = query_embeddings + + print( + f"\nโœ… Generated benchmark data:", + f"Total documents: {sum(len(docs) for docs in benchmark_data.values()):,}", + f"Total queries: {sum(len(queries) for queries in query_data.values()):,}", + f"Dataset source: {'SQuAD (Hugging Face)' if 'squad_' in raw_documents[0]['doc_id'] else 'Local movies'}", + sep="\n" + ) + + return benchmark_data, query_data, raw_documents + +def create_index_schema(algorithm: str, dimensions: int, prefix: str) -> Dict[str, Any]: + """Create index schema for the specified algorithm""" + + base_schema = { + "index": { + "name": f"benchmark_{algorithm}_{dimensions}d", + "prefix": prefix, + }, + "fields": [ + {"name": "doc_id", "type": "tag"}, + {"name": "title", "type": "text"}, + {"name": "category", "type": "tag"}, + {"name": "score", "type": "numeric"}, + { + "name": "embedding", + "type": "vector", + "attrs": { + "dims": dimensions, + "distance_metric": "cosine", + "datatype": "float32" + } + } + ] + } + + # Algorithm-specific configurations + vector_field = base_schema["fields"][-1]["attrs"] + + if algorithm == 'flat': + vector_field["algorithm"] = "flat" + + elif algorithm == 'hnsw': + vector_field.update({ + "algorithm": "hnsw", + "m": 16, + "ef_construction": 200, + "ef_runtime": 10 + }) + + elif algorithm == 'svs-vamana': + # Get compression recommendation + compression_config = CompressionAdvisor.recommend(dims=dimensions, priority="memory") + + vector_field.update({ + "algorithm": "svs-vamana", + "datatype": compression_config.get('datatype', 'float32') + }) + + # Handle dimensionality reduction for high dimensions + if 'reduce' in compression_config: + vector_field["dims"] = compression_config['reduce'] + + return base_schema + +def benchmark_index_creation(algorithm: str, dimensions: int, documents: List[Dict], client) -> Tuple[SearchIndex, float, float]: + """Benchmark index creation and return index, build time, and memory usage""" + + prefix = f"bench:{algorithm}:{dimensions}d:" + + # Clean up any existing index + try: + client.execute_command('FT.DROPINDEX', f'benchmark_{algorithm}_{dimensions}d') + except: + pass + + # Create schema and index + schema = create_index_schema(algorithm, dimensions, prefix) + + start_time = time.time() + + # Create index + index = SearchIndex.from_dict(schema, redis_url=REDIS_URL) + index.create(overwrite=True) + + # Load data in batches + batch_size = 100 + for i in range(0, len(documents), batch_size): + batch = documents[i:i+batch_size] + index.load(batch) + + # Wait for indexing to complete + if algorithm == 'hnsw': + time.sleep(3) # HNSW needs more time for graph construction + else: + time.sleep(1) + + build_time = time.time() - start_time + + # Get index info for memory usage + try: + index_info = index.info() + index_size_mb = float(index_info.get('vector_index_sz_mb', 0)) + except: + index_size_mb = 0.0 + + return index, build_time, index_size_mb + +def run_index_creation_benchmarks(benchmark_data, client): + """Run index creation benchmarks""" + print("๐Ÿ—๏ธ Running index creation benchmarks...") + + creation_results = {} + indices = {} + + for dim in config.dimensions: + print(f"\n๐Ÿ“Š Benchmarking {dim}D embeddings:") + + for algorithm in config.algorithms: + print(f" Creating {algorithm.upper()} index...") + + try: + index, build_time, index_size_mb = benchmark_index_creation( + algorithm, dim, benchmark_data[dim], client + ) + + creation_results[f"{algorithm}_{dim}"] = { + 'algorithm': algorithm, + 'dimensions': dim, + 'build_time_sec': build_time, + 'index_size_mb': index_size_mb, + 'num_docs': len(benchmark_data[dim]) + } + + indices[f"{algorithm}_{dim}"] = index + + print( + f" โœ… {algorithm.upper()}: {build_time:.2f}s, {index_size_mb:.2f}MB" + ) + + except Exception as e: + print(f" โŒ {algorithm.upper()} failed: {e}") + creation_results[f"{algorithm}_{dim}"] = None + + print("\nโœ… Index creation benchmarks complete!") + return creation_results, indices + +def calculate_recall(retrieved_ids: List[str], ground_truth_ids: List[str], k: int) -> float: + """Calculate recall@k between retrieved and ground truth results""" + if not ground_truth_ids or not retrieved_ids: + return 0.0 + + retrieved_set = set(retrieved_ids[:k]) + ground_truth_set = set(ground_truth_ids[:k]) + + if len(ground_truth_set) == 0: + return 0.0 + + intersection = len(retrieved_set.intersection(ground_truth_set)) + return intersection / len(ground_truth_set) + +def benchmark_query_performance(index: SearchIndex, query_vectors: np.ndarray, + algorithm: str, dimensions: int, indices) -> Dict[str, float]: + """Benchmark query performance and quality""" + + latencies = [] + all_results = [] + + # Get ground truth from FLAT index (if available) + ground_truth_results = [] + flat_index_key = f"flat_{dimensions}" + + if flat_index_key in indices and algorithm != 'flat': + flat_index = indices[flat_index_key] + for query_vec in query_vectors: + query = VectorQuery( + vector=query_vec, + vector_field_name="embedding", + return_fields=["doc_id"], + dtype="float32", + num_results=10 + ) + results = flat_index.query(query) + ground_truth_results.append([doc["doc_id"] for doc in results]) + + # Benchmark the target algorithm + for i, query_vec in enumerate(query_vectors): + # Adjust query vector for SVS if needed + if algorithm == 'svs-vamana': + compression_config = CompressionAdvisor.recommend(dims=dimensions, priority="memory") + + if 'reduce' in compression_config: + target_dims = compression_config['reduce'] + if target_dims < dimensions: + query_vec = query_vec[:target_dims] + + if compression_config.get('datatype') == 'float16': + query_vec = query_vec.astype(np.float16) + dtype = 'float16' + else: + dtype = 'float32' + else: + dtype = 'float32' + + # Execute query with timing + start_time = time.time() + + query = VectorQuery( + vector=query_vec, + vector_field_name="embedding", + return_fields=["doc_id", "title", "category"], + dtype=dtype, + num_results=10 + ) + + results = index.query(query) + latency = time.time() - start_time + + latencies.append(latency * 1000) # Convert to milliseconds + all_results.append([doc["doc_id"] for doc in results]) + + # Calculate metrics + avg_latency = np.mean(latencies) + + # Calculate recall if we have ground truth + if ground_truth_results and algorithm != 'flat': + recall_5_scores = [] + recall_10_scores = [] + + for retrieved, ground_truth in zip(all_results, ground_truth_results): + recall_5_scores.append(calculate_recall(retrieved, ground_truth, 5)) + recall_10_scores.append(calculate_recall(retrieved, ground_truth, 10)) + + recall_at_5 = np.mean(recall_5_scores) + recall_at_10 = np.mean(recall_10_scores) + else: + # FLAT is our ground truth, so perfect recall + recall_at_5 = 1.0 if algorithm == 'flat' else 0.0 + recall_at_10 = 1.0 if algorithm == 'flat' else 0.0 + + return { + 'avg_query_time_ms': avg_latency, + 'recall_at_5': recall_at_5, + 'recall_at_10': recall_at_10, + 'num_queries': len(query_vectors) + } + +def run_query_performance_benchmarks(query_data, indices): + """Run query performance benchmarks""" + print("๐Ÿ” Running query performance benchmarks...") + + query_results = {} + + for dim in config.dimensions: + print(f"\n๐Ÿ“Š Benchmarking {dim}D queries:") + + for algorithm in config.algorithms: + index_key = f"{algorithm}_{dim}" + + if index_key in indices: + print(f" Testing {algorithm.upper()} queries...") + + try: + performance = benchmark_query_performance( + indices[index_key], + query_data[dim], + algorithm, + dim, + indices + ) + + query_results[index_key] = performance + + print( + f" โœ… {algorithm.upper()}: {performance['avg_query_time_ms']:.2f}ms avg, " + f"R@5: {performance['recall_at_5']:.3f}, R@10: {performance['recall_at_10']:.3f}" + ) + + except Exception as e: + print(f" โŒ {algorithm.upper()} query failed: {e}") + query_results[index_key] = None + else: + print(f" โญ๏ธ Skipping {algorithm.upper()} (index creation failed)") + + print("\nโœ… Query performance benchmarks complete!") + return query_results + +def create_results_dataframe(creation_results, query_results) -> pd.DataFrame: + """Combine all benchmark results into a pandas DataFrame""" + + results = [] + + for dim in config.dimensions: + for algorithm in config.algorithms: + key = f"{algorithm}_{dim}" + + if key in creation_results and creation_results[key] is not None: + creation_data = creation_results[key] + query_data_item = query_results.get(key, {}) + + result = { + 'algorithm': algorithm, + 'dimensions': dim, + 'num_docs': creation_data['num_docs'], + 'build_time_sec': creation_data['build_time_sec'], + 'index_size_mb': creation_data['index_size_mb'], + 'avg_query_time_ms': query_data_item.get('avg_query_time_ms', 0), + 'recall_at_5': query_data_item.get('recall_at_5', 0), + 'recall_at_10': query_data_item.get('recall_at_10', 0) + } + + results.append(result) + + return pd.DataFrame(results) + +def analyze_results(df_results, raw_documents): + """Analyze and display benchmark results""" + print("๐Ÿ“Š Real Data Benchmark Results Summary:") + print(df_results.to_string(index=False, float_format='%.3f')) + + # Display key insights + if not df_results.empty: + print(f"\n๐ŸŽฏ Key Insights from Real Data:") + + # Memory efficiency + best_memory = df_results.loc[df_results['index_size_mb'].idxmin()] + print(f"๐Ÿ† Most memory efficient: {best_memory['algorithm'].upper()} at {best_memory['dimensions']}D ({best_memory['index_size_mb']:.2f}MB)") + + # Query speed + best_speed = df_results.loc[df_results['avg_query_time_ms'].idxmin()] + print(f"โšก Fastest queries: {best_speed['algorithm'].upper()} at {best_speed['dimensions']}D ({best_speed['avg_query_time_ms']:.2f}ms)") + + # Search quality + best_quality = df_results.loc[df_results['recall_at_10'].idxmax()] + print(f"๐ŸŽฏ Best search quality: {best_quality['algorithm'].upper()} at {best_quality['dimensions']}D (R@10: {best_quality['recall_at_10']:.3f})") + + # Dataset info + dataset_source = 'SQuAD (Hugging Face)' if 'squad_' in raw_documents[0]['doc_id'] else 'Local movies' + print(f"\n๐Ÿ“š Dataset: {dataset_source}") + print(f"๐Ÿ“Š Total documents tested: {df_results['num_docs'].iloc[0]:,}") + print(f"๐Ÿ” Total queries per dimension: {config.query_count}") + +def create_real_data_visualizations(df: pd.DataFrame): + """Create visualizations for real data benchmark results""" + + if df.empty: + print("โš ๏ธ No results to visualize") + return + + # Set up the plotting area + fig, axes = plt.subplots(2, 2, figsize=(15, 10)) + fig.suptitle('Real Data Vector Algorithm Benchmark Results', fontsize=16, fontweight='bold') + + # 1. Memory Usage Comparison + ax1 = axes[0, 0] + pivot_memory = df.pivot(index='dimensions', columns='algorithm', values='index_size_mb') + pivot_memory.plot(kind='bar', ax=ax1, width=0.8) + ax1.set_title('Index Size by Algorithm (Real Data)') + ax1.set_xlabel('Dimensions') + ax1.set_ylabel('Index Size (MB)') + ax1.legend(title='Algorithm') + ax1.tick_params(axis='x', rotation=0) + + # 2. Query Performance + ax2 = axes[0, 1] + pivot_query = df.pivot(index='dimensions', columns='algorithm', values='avg_query_time_ms') + pivot_query.plot(kind='bar', ax=ax2, width=0.8) + ax2.set_title('Average Query Time (Real Embeddings)') + ax2.set_xlabel('Dimensions') + ax2.set_ylabel('Query Time (ms)') + ax2.legend(title='Algorithm') + ax2.tick_params(axis='x', rotation=0) + + # 3. Search Quality + ax3 = axes[1, 0] + pivot_recall = df.pivot(index='dimensions', columns='algorithm', values='recall_at_10') + pivot_recall.plot(kind='bar', ax=ax3, width=0.8) + ax3.set_title('Search Quality (Recall@10)') + ax3.set_xlabel('Dimensions') + ax3.set_ylabel('Recall@10') + ax3.legend(title='Algorithm') + ax3.tick_params(axis='x', rotation=0) + ax3.set_ylim(0, 1.1) + + # 4. Memory Efficiency + ax4 = axes[1, 1] + df['docs_per_mb'] = df['num_docs'] / df['index_size_mb'] + pivot_efficiency = df.pivot(index='dimensions', columns='algorithm', values='docs_per_mb') + pivot_efficiency.plot(kind='bar', ax=ax4, width=0.8) + ax4.set_title('Memory Efficiency (Real Data)') + ax4.set_xlabel('Dimensions') + ax4.set_ylabel('Documents per MB') + ax4.legend(title='Algorithm') + ax4.tick_params(axis='x', rotation=0) + + plt.tight_layout() + plt.show() + +def generate_insights_and_recommendations(df_results, raw_documents): + """Generate real data specific recommendations""" + if not df_results.empty: + dataset_source = 'SQuAD (Hugging Face)' if 'squad_' in raw_documents[0]['doc_id'] else 'Local movies' + + print( + f"๐ŸŽฏ Real Data Benchmark Insights", + f"Dataset: {dataset_source}", + f"Documents: {df_results['num_docs'].iloc[0]:,} per dimension", + f"Embedding Models: sentence-transformers", + "=" * 50, + sep="\n" + ) + + for dim in config.dimensions: + dim_data = df_results[df_results['dimensions'] == dim] + + if not dim_data.empty: + print(f"\n๐Ÿ“Š {dim}D Embeddings Analysis:") + + for _, row in dim_data.iterrows(): + algo = row['algorithm'].upper() + print( + f" {algo}:", + f" Index: {row['index_size_mb']:.2f}MB", + f" Query: {row['avg_query_time_ms']:.2f}ms", + f" Recall@10: {row['recall_at_10']:.3f}", + f" Efficiency: {row['docs_per_mb']:.1f} docs/MB", + sep="\n" + ) + + print( + f"\n๐Ÿ’ก Key Takeaways with Real Data:", + "โ€ข Real embeddings show different performance characteristics than synthetic", + "โ€ข Sentence-transformer models provide realistic vector distributions", + "โ€ข SQuAD Q&A pairs offer diverse semantic content for testing", + "โ€ข Results are more representative of production workloads", + "โ€ข Consider testing with your specific embedding models and data", + sep="\n" + ) + else: + print("โš ๏ธ No results available for analysis") + +def cleanup_indices(indices): + """Clean up all benchmark indices""" + print("๐Ÿงน Cleaning up benchmark indices...") + + cleanup_count = 0 + for index_key, index in indices.items(): + try: + index.delete(drop=True) + cleanup_count += 1 + print(f" โœ… Cleaned up {index_key}") + except Exception as e: + print(f" โš ๏ธ Failed to cleanup {index_key}: {e}") + + print(f"๐Ÿงน Cleanup complete! Removed {cleanup_count} indices.") + +def main(): + """Main execution function""" + print("๐Ÿš€ Starting Vector Algorithm Benchmark with Real Data") + print("=" * 60) + + # Step 1: Verify Redis connection + print("\n## Step 1: Verify Redis and SVS Support") + client = verify_redis_connection() + + # Step 2: Load real dataset and generate embeddings + print("\n## Step 2: Load Real Dataset from Hugging Face") + benchmark_data, query_data, raw_documents = load_and_generate_embeddings() + + # Step 3: Index creation benchmark + print("\n## Step 3: Index Creation Benchmark") + creation_results, indices = run_index_creation_benchmarks(benchmark_data, client) + + # Step 4: Query performance benchmark + print("\n## Step 4: Query Performance Benchmark") + query_results = run_query_performance_benchmarks(query_data, indices) + + # Step 5: Results analysis and visualization + print("\n## Step 5: Results Analysis and Visualization") + df_results = create_results_dataframe(creation_results, query_results) + analyze_results(df_results, raw_documents) + + # Create visualizations + create_real_data_visualizations(df_results) + + # Step 6: Generate insights and recommendations + print("\n## Step 6: Real Data Insights and Recommendations") + generate_insights_and_recommendations(df_results, raw_documents) + + # Step 7: Cleanup + print("\n## Step 7: Cleanup") + cleanup_indices(indices) + + print("\n๐ŸŽ‰ Benchmark complete! Check the results above for insights.") + return df_results + +if __name__ == "__main__": + main() diff --git a/nk_scripts/full_featured_agent.py b/nk_scripts/full_featured_agent.py new file mode 100644 index 00000000..93ac9ff0 --- /dev/null +++ b/nk_scripts/full_featured_agent.py @@ -0,0 +1,406 @@ +#!/usr/bin/env python3 +""" +Full-Featured Agent Architecture + +A simplified Python version of the Oregon Trail agent with: +- Tool-enabled workflow +- Semantic caching +- Retrieval augmented generation (RAG) +- Multiple choice structured output +- Allow/block list routing + +Based on: python-recipes/agents/02_full_featured_agent.ipynb +""" + +import os +import warnings +from typing import Literal, TypedDict +from functools import lru_cache + +# LangChain imports +from langchain_core.tools import tool +from langchain_core.messages import HumanMessage +from langchain_openai import ChatOpenAI, OpenAIEmbeddings +from langchain_redis import RedisConfig, RedisVectorStore +from langchain_core.documents import Document +from langchain.tools.retriever import create_retriever_tool + +# LangGraph imports +from langgraph.graph import MessagesState, StateGraph, END +from langgraph.prebuilt import ToolNode + +# RedisVL imports +from redisvl.extensions.llmcache import SemanticCache + +# Pydantic imports +from pydantic import BaseModel, Field + +# Suppress warnings +warnings.filterwarnings("ignore") + +# Configuration +REDIS_URL = os.getenv("REDIS_URL", "redis://localhost:6379") +INDEX_NAME = os.getenv("VECTOR_INDEX_NAME", "oregon_trail") + +# Check OpenAI API key +if not os.getenv("OPENAI_API_KEY"): + print("โš ๏ธ Warning: OPENAI_API_KEY not found in environment variables!") + print("Please set your OpenAI API key: export OPENAI_API_KEY='your-key-here'") + exit(1) + +print("๐Ÿš€ Initializing Full-Featured Agent...") + +# ============================================ +# TOOLS DEFINITION +# ============================================ + +class RestockInput(BaseModel): + daily_usage: int = Field(description="Pounds (lbs) of food expected to be consumed daily") + lead_time: int = Field(description="Lead time to replace food in days") + safety_stock: int = Field(description="Number of pounds (lbs) of safety stock to keep on hand") + +@tool("restock-tool", args_schema=RestockInput) +def restock_tool(daily_usage: int, lead_time: int, safety_stock: int) -> int: + """Restock formula tool used specifically for calculating the amount of food at which you should start restocking.""" + print(f"๐Ÿ”ง Called restock tool: {daily_usage=}, {lead_time=}, {safety_stock=}") + return (daily_usage * lead_time) + safety_stock + +class ToolManager: + """Manages tool initialization and lifecycle""" + + def __init__(self, redis_url: str, index_name: str): + self.redis_url = redis_url + self.index_name = index_name + self._vector_store = None + self._tools = None + self._semantic_cache = None + + def setup_vector_store(self): + """Initialize vector store with Oregon Trail data""" + if self._vector_store is not None: + return self._vector_store + + config = RedisConfig(index_name=self.index_name, redis_url=self.redis_url) + + # Sample document about trail routes + doc = Document( + page_content="the northern trail, of the blue mountains, was destroyed by a flood and is no longer safe to traverse. It is recommended to take the southern trail although it is longer." + ) + + try: + config.from_existing = True + self._vector_store = RedisVectorStore(OpenAIEmbeddings(), config=config) + except: + print("๐Ÿ“š Initializing vector store with documents...") + config.from_existing = False + self._vector_store = RedisVectorStore.from_documents([doc], OpenAIEmbeddings(), config=config) + + return self._vector_store + + def get_tools(self): + """Initialize and return all tools""" + if self._tools is not None: + return self._tools + + vector_store = self.setup_vector_store() + retriever_tool = create_retriever_tool( + vector_store.as_retriever(), + "get_directions", + "Search and return information related to which routes/paths/trails to take along your journey." + ) + + self._tools = [retriever_tool, restock_tool] + return self._tools + + def get_semantic_cache(self): + """Initialize and return semantic cache""" + if self._semantic_cache is not None: + return self._semantic_cache + + self._semantic_cache = SemanticCache( + name="oregon_trail_cache", + redis_url=self.redis_url, + distance_threshold=0.1, + ) + + # Pre-populate cache with known answers + known_answers = { + "There's a deer. You're hungry. You know what you have to do...": "bang", + "What is the first name of the wagon leader?": "Art" + } + + for question, answer in known_answers.items(): + self._semantic_cache.store(prompt=question, response=answer) + + print("๐Ÿ’พ Semantic cache initialized with known answers") + return self._semantic_cache + +# ============================================ +# STATE DEFINITION +# ============================================ + +class MultipleChoiceResponse(BaseModel): + multiple_choice_response: Literal["A", "B", "C", "D"] = Field( + description="Single character response to the question for multiple choice questions. Must be either A, B, C, or D." + ) + +class AgentState(MessagesState): + multi_choice_response: MultipleChoiceResponse = None + +# ============================================ +# AGENT CLASS +# ============================================ + +class OregonTrailAgent: + """Main agent class that orchestrates the workflow""" + + def __init__(self, redis_url: str = REDIS_URL, index_name: str = INDEX_NAME): + self.tool_manager = ToolManager(redis_url, index_name) + self._workflow = None + + @property + def tools(self): + return self.tool_manager.get_tools() + + @property + def semantic_cache(self): + return self.tool_manager.get_semantic_cache() + + @property + def workflow(self): + if self._workflow is None: + self._workflow = self._create_workflow() + return self._workflow + +# ============================================ +# LLM MODELS +# ============================================ + +# Remove the old global functions - now part of the class + +# ============================================ +# NODES +# ============================================ + + def check_cache(self, state: AgentState) -> AgentState: + """Check semantic cache for known answers""" + last_message = state["messages"][-1] + query = last_message.content + + cached_response = self.semantic_cache.check(prompt=query, return_fields=["response"]) + + if cached_response: + print("โœจ Cache hit! Returning cached response") + return { + "messages": [HumanMessage(content=cached_response[0]["response"])], + "cache_hit": True + } + else: + print("โŒ Cache miss. Proceeding to agent") + return {"cache_hit": False} + + def call_agent(self, state: AgentState) -> AgentState: + """Call the main agent with tools""" + system_prompt = """ + You are an Oregon Trail playing tool calling AI agent. Use the tools available to you to answer the question you are presented. When in doubt use the tools to help you find the answer. + If anyone asks your first name is Art return just that string. + """ + + messages = [{"role": "system", "content": system_prompt}] + state["messages"] + model = self._get_tool_model() + response = model.invoke(messages) + + return {"messages": [response]} + + def structure_response(self, state: AgentState) -> AgentState: + """Structure response for multiple choice questions""" + last_message = state["messages"][-1] + + # Check if it's a multiple choice question + if "options:" in state["messages"][0].content.lower(): + print("๐Ÿ”ง Structuring multiple choice response") + + model = self._get_response_model() + response = model.invoke([ + HumanMessage(content=state["messages"][0].content), + HumanMessage(content=f"Answer from tool: {last_message.content}") + ]) + + return {"multi_choice_response": response.multiple_choice_response} + + # Cache the response if it's not a tool call + if not hasattr(last_message, "tool_calls") or not last_message.tool_calls: + original_query = state["messages"][0].content + self.semantic_cache.store(prompt=original_query, response=last_message.content) + print("๐Ÿ’พ Cached response for future use") + + return {"messages": []} + + def _get_tool_node(self): + """Get tool execution node""" + return ToolNode(self.tools) + + def _get_tool_model(self): + """Get LLM model with tools bound""" + model = ChatOpenAI(temperature=0, model_name="gpt-4o-mini") + return model.bind_tools(self.tools) + + def _get_response_model(self): + """Get LLM model with structured output""" + model = ChatOpenAI(temperature=0, model_name="gpt-4o-mini") + return model.with_structured_output(MultipleChoiceResponse) + + # ============================================ + # CONDITIONAL LOGIC + # ============================================ + + def should_continue_after_cache(self, state: AgentState) -> Literal["call_agent", "end"]: + """Decide next step after cache check""" + return "end" if state.get("cache_hit", False) else "call_agent" + + def should_continue_after_agent(self, state: AgentState) -> Literal["tools", "structure_response"]: + """Decide whether to use tools or structure response""" + last_message = state["messages"][-1] + + if hasattr(last_message, "tool_calls") and last_message.tool_calls: + return "tools" + return "structure_response" + + # ============================================ + # GRAPH CONSTRUCTION + # ============================================ + + def _create_workflow(self): + """Create the full-featured agent workflow""" + workflow = StateGraph(AgentState) + + # Add nodes + workflow.add_node("check_cache", self.check_cache) + workflow.add_node("call_agent", self.call_agent) + workflow.add_node("tools", self._get_tool_node()) + workflow.add_node("structure_response", self.structure_response) + + # Set entry point + workflow.set_entry_point("check_cache") + + # Add conditional edges + workflow.add_conditional_edges( + "check_cache", + self.should_continue_after_cache, + {"call_agent": "call_agent", "end": END} + ) + + workflow.add_conditional_edges( + "call_agent", + self.should_continue_after_agent, + {"tools": "tools", "structure_response": "structure_response"} + ) + + # Add regular edges + workflow.add_edge("tools", "call_agent") + workflow.add_edge("structure_response", END) + + return workflow.compile() + + def invoke(self, input_data): + """Run the agent workflow""" + return self.workflow.invoke(input_data) + +# ============================================ +# HELPER FUNCTIONS +# ============================================ + +def format_multi_choice_question(question: str, options: list) -> list: + """Format a multiple choice question""" + formatted = f"{question}, options: {' '.join(options)}" + return [HumanMessage(content=formatted)] + +def run_scenario(agent: OregonTrailAgent, scenario: dict): + """Run a single scenario and return results""" + print(f"\n{'='*60}") + print(f"๐ŸŽฏ Question: {scenario['question']}") + print('='*60) + + # Format input based on scenario type + if scenario.get("type") == "multi-choice": + messages = format_multi_choice_question(scenario["question"], scenario["options"]) + else: + messages = [HumanMessage(content=scenario["question"])] + + # Run the agent + result = agent.invoke({"messages": messages}) + + # Extract answer + if "multi_choice_response" in result and result["multi_choice_response"]: + answer = result["multi_choice_response"] + else: + answer = result["messages"][-1].content + + print(f"๐Ÿค– Agent response: {answer}") + + # Verify answer if expected answer is provided + if "answer" in scenario: + is_correct = answer == scenario["answer"] + print(f"โœ… Correct!" if is_correct else f"โŒ Expected: {scenario['answer']}") + return is_correct + + return True + +# ============================================ +# MAIN EXECUTION +# ============================================ + +if __name__ == "__main__": + # Create the agent + agent = OregonTrailAgent() + + print("๐ŸŽฎ Running Oregon Trail Agent Scenarios...") + + # Define test scenarios + scenarios = [ + { + "name": "Scenario 1: Wagon Leader Name", + "question": "What is the first name of the wagon leader?", + "answer": "Art", + "type": "free-form" + }, + { + "name": "Scenario 2: Restocking Tool", + "question": "In order to survive the trail ahead, you'll need to have a restocking strategy for when you need to get more supplies or risk starving. If it takes you an estimated 3 days to restock your food and you plan to start with 200lbs of food, budget 10lbs/day to eat, and keep a safety stock of at least 50lbs of back up... at what point should you restock?", + "answer": "D", + "options": ["A: 100lbs", "B: 20lbs", "C: 5lbs", "D: 80lbs"], + "type": "multi-choice" + }, + { + "name": "Scenario 3: Retrieval Tool", + "question": "You've encountered a dense forest near the Blue Mountains, and your party is unsure how to proceed. There is a fork in the road, and you must choose a path. Which way will you go?", + "answer": "B", + "options": ["A: take the northern trail", "B: take the southern trail", "C: turn around", "D: go fishing"], + "type": "multi-choice" + }, + { + "name": "Scenario 4: Semantic Cache", + "question": "There's a deer. You're hungry. You know what you have to do...", + "answer": "bang", + "type": "free-form" + } + ] + + # Run all scenarios + results = [] + for scenario in scenarios: + print(f"\n๐ŸŽช {scenario['name']}") + success = run_scenario(agent, scenario) + results.append(success) + + # Summary + print(f"\n{'='*60}") + print(f"๐Ÿ“Š SUMMARY: {sum(results)}/{len(results)} scenarios passed") + print('='*60) + + if all(results): + print("๐ŸŽ‰ All scenarios completed successfully!") + else: + print("โš ๏ธ Some scenarios failed. Check the output above.") + + print("\n๐Ÿ Full-Featured Agent demo complete!") diff --git a/nk_scripts/fully_featured_demo.py b/nk_scripts/fully_featured_demo.py new file mode 100644 index 00000000..36895c3c --- /dev/null +++ b/nk_scripts/fully_featured_demo.py @@ -0,0 +1,110 @@ +"""Basic Langraph Q&A Agent demo.""" +import os +from typing import Annotated, TypedDict +import operator + +from langgraph.constants import END +from langgraph.graph import StateGraph +from openai import OpenAI + +# Initialize OpenAI client with API key from environment +client = OpenAI(api_key=os.getenv("OPENAI_API_KEY")) + + +class AgentState(TypedDict): + """State that is access by all nodes.""" + messages: Annotated[list, operator.add] # Accumulates messages + question: str + answer: str + iteration_count: int + +# 2. Define Nodes - functions that do work +def ask_question(state: AgentState) -> AgentState: + """Node that processes the question""" + print(f"Processing question: {state['question']}") + return { + "messages": [f"Question received: {state['question']}"], + "iteration_count": state.get("iteration_count", 0) + 1 + } + +def generate_answer(state: AgentState) -> AgentState: + """Node that generates an answer using OpenAI""" + print("Generating answer with OpenAI...") + + try: + response = client.chat.completions.create( + model="gpt-3.5-turbo", + messages=[ + {"role": "system", "content": "You are a helpful assistant that provides clear, concise answers."}, + {"role": "user", "content": state['question']} + ], + max_tokens=150, + temperature=0.7 + ) + + answer = response.choices[0].message.content.strip() + + except Exception as e: + print(f"Error calling OpenAI: {e}") + answer = f"Error generating answer: {str(e)}" + + return { + "answer": answer, + "messages": [f"Answer generated: {answer}"] + } + +# 3. Define conditional logic +def should_continue(state: AgentState) -> str: + """Decides whether to continue or end""" + print(f"Checking if we should continue...{state['iteration_count']}") + if state["iteration_count"] > 3: + return "end" + return "continue" + + +if __name__=="__main__": + # Check if OpenAI API key is available + if not os.getenv("OPENAI_API_KEY"): + print("โš ๏ธ Warning: OPENAI_API_KEY not found in environment variables!") + print("Please set your OpenAI API key: export OPENAI_API_KEY='your-key-here'") + exit(1) + + initial_state = { + "question": "What is LangGraph?", + "messages": [], + "answer": "", + "iteration_count": 0 + } + + # # 4. Build the Graph + workflow = StateGraph(AgentState) + # + # Two nodes that are doing things + workflow.add_node("process_question", ask_question) + workflow.add_node("generate_answer", generate_answer) + # # + # # # Add edges + workflow.set_entry_point("process_question") # Start here + + # First, always go from process_question to generate_answer + workflow.add_edge("process_question", "generate_answer") + + # After generating answer, check if we should continue or end + workflow.add_conditional_edges( + "generate_answer", # Check after generating answer + should_continue, + { + "continue": "process_question", # If continue, loop back to process_question + "end": END # If end, finish + } + ) + # + # # Compile the graph + app = workflow.compile() + result = app.invoke(initial_state) + print("\n=== Final Result ===") + print(f"Question: {result['question']}") + print(f"Answer: {result['answer']}") + print(f"Messages: {result['messages']}") + # print(result) + diff --git a/nk_scripts/oregon_trail_walkthrough.md b/nk_scripts/oregon_trail_walkthrough.md new file mode 100644 index 00000000..4d1fd97f --- /dev/null +++ b/nk_scripts/oregon_trail_walkthrough.md @@ -0,0 +1,856 @@ +Oregon Trail + + +๏ฟผ + + +# Demo Talking Points: Full-Featured Agent Notebook + +## ๐ŸŽฏ Introduction Slide + +**What to say:** +"Today we're building a production-ready AI agent using the Oregon Trail as our teaching metaphor. By the end, you'll have an agent with routing, caching, tools, RAG, and memory - all the components you need for enterprise applications. + +This isn't just a toy example; this is the same architecture powering customer support bots, sales assistants, and internal tools at major companies." + +--- + +## ๐Ÿ“ฆ CELL 1: Package Installation + +```python +%pip install -q langchain langchain-openai "langchain-redis>=0.2.0" langgraph sentence-transformers +``` + +**Talking Points:** + +### **langchain** - The Framework Foundation +- "LangChain is our orchestration layer - think of it as the glue between components" +- "It provides abstractions for working with LLMs, tools, and memory without getting locked into vendor-specific APIs" + +- **Under the hood:** LangChain creates a standardized interface. When you call `llm.invoke()`, it handles API formatting, retries, streaming, and error handling + +- **Why needed:** Without it, you'd be writing custom code for each LLM provider (OpenAI, Anthropic, etc.) + +### **langchain-openai** - LLM Provider Integration +- "This gives us OpenAI-specific implementations - the ChatGPT models we'll use" + +- **What it does:** Implements LangChain's base classes for OpenAI's API (chat models, embeddings, function calling) +- **Alternative:** Could swap for `langchain-anthropic`, `langchain-google-vertexai`, etc. + +### **langchain-redis>=0.2.0** - Redis Integration +- "This is our Redis connector for LangChain - handles vector storage, caching, and checkpointing" + +- **Under the hood:** Wraps Redis commands in LangChain interfaces (VectorStore, BaseCache, etc.) + +- **Why version 0.2.0+:** Earlier versions lacked checkpointer support needed for conversation memory +- **What it provides:** + - RedisVectorStore for RAG + - RedisCache for semantic caching + - RedisSaver for conversation checkpointing + +### **langgraph** - State Machine for Agents +- "LangGraph is our state machine - it turns our agent into a controllable workflow" +- **Why not just LangChain:** LangChain's AgentExecutor is a black box. LangGraph makes every decision explicit and debuggable +- **What it provides:** + - StateGraph for defining nodes and edges + - Conditional routing + - Built-in checkpointing + - Graph visualization +- **Under the hood:** Creates a directed graph where each node is a function that transforms state + +### **sentence-transformers** - Embedding Models +- "This runs embedding models locally - we'll use it for semantic similarity in caching and routing" +- **What it does:** Loads pre-trained models (like `all-MiniLM-L6-v2`) that convert text to vectors +- **Why not just OpenAI embeddings:** Cost and latency. Local embeddings are free and instant +- **Use cases here:** Cache similarity checks, router classification + +**Demo tip:** "Notice the `-q` flag - keeps output quiet. In production, pin exact versions in `requirements.txt`" + +--- + +## ๐Ÿ”ง CELL 2: Environment Setup + +```python +import os +os.environ["OPENAI_API_KEY"] = "your-key-here" +``` + +**Talking Points:** + +"Setting up credentials. In production, never hardcode keys like this:" +- **Better approach:** Use `.env` files with `python-dotenv` +- **Best approach:** Use secret managers (AWS Secrets Manager, Azure Key Vault, HashiCorp Vault) +- **Why it matters:** Accidentally committing API keys costs thousands when bots mine them from GitHub + +"Also good to set:" +```python +os.environ["REDIS_URL"] = "redis://localhost:6379" +os.environ["LANGCHAIN_TRACING_V2"] = "true" # Enable LangSmith tracing +``` + +--- + +## ๐Ÿ”— CELL 3: Redis Connection Test + +```python +from redis import Redis + +REDIS_URL = os.getenv("REDIS_URL", "redis://localhost:6379") +client = Redis.from_url(REDIS_URL) +client.ping() +``` + +**Talking Points:** + +### **Why Test the Connection First:** +- "This is the foundation - if Redis is down, nothing else works" +- "Better to fail fast here than 20 minutes into setup" + +### **Redis.from_url() Explained:** +- **What it does:** Parses connection string and creates client +- **Formats supported:** + - `redis://localhost:6379` (standard) + - `rediss://...` (SSL/TLS) + - `redis://user:password@host:port/db` +- **Connection pooling:** Under the hood, creates a connection pool (default 50 connections) + +### **client.ping():** +- **What it does:** Sends PING command, expects PONG response +- **Returns:** `True` if connected, raises exception if not +- **Why it's important:** Validates authentication, network connectivity, and that Redis is running + +**Demo tip:** "Let's run this. If it returns `True`, we're good. If it fails, check Docker is running: `docker ps` should show redis-stack-server" + +--- + +## ๐Ÿ› ๏ธ CELL 4: Defining Tools - Restock Calculator + +```python +from langchain_core.tools import tool +from pydantic import BaseModel, Field + +class RestockInput(BaseModel): + daily_usage: int = Field(description="Pounds (lbs) of food expected to be consumed daily") + lead_time: int = Field(description="Lead time to replace food in days") + safety_stock: int = Field(description="Number of pounds (lbs) of safety stock to keep on hand") + +@tool("restock-tool", args_schema=RestockInput) +def restock_tool(daily_usage: int, lead_time: int, safety_stock: int) -> str: + """ + Calculate reorder point for food supplies on the Oregon Trail. + + Formula: restock_point = (daily_usage ร— lead_time) + safety_stock + + Returns when you need to buy more supplies to avoid running out. + """ + restock_point = (daily_usage * lead_time) + safety_stock + return f"Restock when inventory reaches {restock_point} lbs" +``` + +**Talking Points:** + +### **The @tool Decorator:** +- "This transforms a regular Python function into something the LLM can understand and call" +- **What it does under the hood:** + 1. Extracts function signature + 2. Parses docstring for description + 3. Creates JSON schema the LLM can read + 4. Wraps execution with error handling + +### **Why Pydantic BaseModel:** +- "Pydantic gives us type validation and automatic schema generation" +- **What the LLM sees:** +```json +{ + "name": "restock-tool", + "description": "Calculate reorder point...", + "parameters": { + "type": "object", + "properties": { + "daily_usage": {"type": "integer", "description": "Pounds of food..."}, + ... + }, + "required": ["daily_usage", "lead_time", "safety_stock"] + } +} +``` + +### **Field() with Descriptions:** +- "These descriptions are CRITICAL - the LLM reads them to decide when to use the tool" +- **Bad:** `daily_usage: int` (LLM doesn't know what this is) +- **Good:** `daily_usage: int = Field(description="...")` (LLM understands context) + +### **The Formula:** +- "This is classic inventory management - reorder point calculation" +- `daily_usage ร— lead_time` = how much you'll consume before restock arrives +- `+ safety_stock` = buffer for delays or increased usage +- **Real-world use:** Same formula used by Amazon, Walmart, any business with inventory + +### **Return Type:** +- "Returns string because LLMs work with text" +- "Could return JSON for complex data: `return json.dumps({"restock_at": restock_point})`" + +**Demo tip:** "Let's test this manually first:" +```python +print(restock_tool.invoke({"daily_usage": 10, "lead_time": 3, "safety_stock": 50})) +# Output: "Restock when inventory reaches 80 lbs" +``` + +--- + +## ๐Ÿ” CELL 5: RAG Tool - Vector Store Setup + +```python +from langchain.tools.retriever import create_retriever_tool +from langchain_redis import RedisConfig, RedisVectorStore +from langchain_core.documents import Document +from langchain_openai import OpenAIEmbeddings + +INDEX_NAME = os.environ.get("VECTOR_INDEX_NAME", "oregon_trail") +REDIS_URL = os.environ.get("REDIS_URL", "redis://localhost:6379/0") +CONFIG = RedisConfig(index_name=INDEX_NAME, redis_url=REDIS_URL) + +def get_vector_store(): + return RedisVectorStore( + config=CONFIG, + embedding=OpenAIEmbeddings(model="text-embedding-3-small") + ) +``` + +**Talking Points:** + +### **What is RAG (Retrieval Augmented Generation):** +- "RAG = giving the LLM a search engine over your documents" +- **Without RAG:** LLM only knows training data (outdated, generic) +- **With RAG:** LLM can search your docs, then answer with that context + +### **RedisConfig:** +- **index_name:** Namespace for this vector collection +- **redis_url:** Where to store vectors +- **Why configurable:** Multiple apps can share one Redis instance with different indexes + +### **RedisVectorStore:** +- "This is our vector database - stores embeddings and does similarity search" +- **Under the hood:** + 1. Takes text documents + 2. Converts to embeddings (numerical vectors) + 3. Stores in Redis with HNSW index + 4. Enables fast semantic search + +### **OpenAIEmbeddings(model="text-embedding-3-small"):** +- **What it does:** Calls OpenAI API to convert text โ†’ 1536-dimensional vector +- **Why this model:** + - `text-embedding-3-small`: Fast, cheap ($0.02/1M tokens), good quality + - Alternative: `text-embedding-3-large` (better quality, 2x cost) +- **Local alternative:** `HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")` - free but slower + +### **Why Embeddings Matter:** +- "Embeddings capture semantic meaning" +- **Example:** + - "How do I get to Oregon?" + - "What's the route to Willamette Valley?" + - These have different words but similar vectors โ†’ retrieved together + +**Next, loading documents:** + +```python +documents = [ + Document(page_content="Take the southern trail through...", metadata={"type": "directions"}), + Document(page_content="Fort Kearney is 300 miles from Independence...", metadata={"type": "landmark"}), +] + +vector_store = get_vector_store() +vector_store.add_documents(documents) +``` + +**Talking Points:** + +### **Document Structure:** +- `page_content`: The actual text to embed and search +- `metadata`: Filters for search (e.g., "only search directions") + +### **add_documents():** +- **What happens:** + 1. Batches documents + 2. Calls embedding API for each + 3. Stores vectors in Redis with metadata + 4. Builds HNSW index for fast search + +### **HNSW (Hierarchical Navigable Small World):** +- "This is the algorithm Redis uses for vector search" +- **Why it's fast:** Approximate nearest neighbor search in O(log n) instead of O(n) +- **Trade-off:** 99% accuracy, 100x faster than exact search + +**Creating the retriever tool:** + +```python +retriever_tool = create_retriever_tool( + retriever=vector_store.as_retriever(search_kwargs={"k": 3}), + name="oregon-trail-directions", + description="Search for directions, landmarks, and trail information along the Oregon Trail" +) +``` + +**Talking Points:** + +### **create_retriever_tool():** +- "Wraps the vector store in a tool interface the agent can call" +- **What the LLM sees:** Another tool like `restock-tool`, but for searching knowledge + +### **search_kwargs={"k": 3}:** +- `k=3` means "return top 3 most similar documents" +- **How to choose k:** + - Too low (k=1): Might miss relevant info + - Too high (k=10): Too much noise, tokens wasted + - Sweet spot: k=3-5 for most use cases + +### **Tool name and description:** +- "Again, the description tells the LLM when to use this" +- **Good description:** "Search for directions, landmarks, and trail information..." +- **LLM thinks:** "User asked about routes โ†’ use this tool" + +**Demo tip:** "Let's test the retriever:" +```python +results = vector_store.similarity_search("How do I get to Oregon?", k=2) +for doc in results: + print(doc.page_content) +``` + +--- + +## ๐Ÿง  CELL 6: Semantic Cache Setup + +```python +from redisvl.extensions.llmcache import SemanticCache + +cache = SemanticCache( + name="agent_cache", + redis_client=client, + distance_threshold=0.1, + ttl=3600 +) +``` + +**Talking Points:** + +### **What is Semantic Cache:** +- "Regular cache: exact string match. Semantic cache: meaning match" +- **Example:** + - Query 1: "What is the capital of Oregon?" + - Query 2: "Tell me Oregon's capital city" + - Regular cache: MISS (different strings) + - Semantic cache: HIT (same meaning) + +### **How It Works:** +1. User asks a question +2. Convert question to embedding +3. Search Redis for similar question embeddings +4. If found within threshold โ†’ return cached answer +5. If not โ†’ call LLM, cache the result + +### **Parameters Explained:** + +#### **name="agent_cache":** +- Namespace for this cache +- Multiple caches can coexist: `agent_cache`, `product_cache`, etc. + +#### **distance_threshold=0.1:** +- "This controls how strict the match needs to be" +- **Cosine distance:** 0 = identical, 1 = completely different +- **0.1 = very strict:** Only near-identical queries hit cache +- **0.3 = lenient:** More variation allowed +- **Tuning strategy:** + - Start strict (0.1) + - Monitor false negatives (questions that should have hit) + - Gradually increase if needed + +#### **ttl=3600:** +- "Time to live - cache expires after 1 hour" +- **Why TTL matters:** + - Product prices change โ†’ stale cache is wrong + - News updates โ†’ old info misleads users + - Static FAQs โ†’ can use longer TTL (86400 = 24 hours) +- **Formula:** `ttl = how_often_data_changes / safety_factor` + +### **Under the Hood:** +- **Storage:** Redis Hash with embedding as key +- **Index:** HNSW index for fast similarity search +- **Lookup:** O(log n) search through cached embeddings + +### **Cache Workflow in Agent:** +```python +def check_cache(query): + # 1. Convert query to embedding + query_embedding = embedding_model.embed(query) + + # 2. Search for similar queries + cached = cache.check(prompt=query) + + # 3. If found, return cached response + if cached: + return cached[0]["response"] + + # 4. Otherwise, call LLM + response = llm.invoke(query) + + # 5. Store for next time + cache.store(prompt=query, response=response) + + return response +``` + +**Benefits:** +- **Cost reduction:** ~70-90% fewer LLM calls in practice +- **Latency:** Cache hits return in ~10ms vs 1-2s for LLM +- **Consistency:** Same questions get same answers + +**Demo tip:** "Let's test it:" +```python +# First call - cache miss +cache.store(prompt="What is the weather?", response="Sunny, 70ยฐF") + +# Second call - cache hit +result = cache.check(prompt="Tell me the weather conditions") +print(result) # Returns "Sunny, 70ยฐF" +``` + +--- + +## ๐Ÿ›ฃ๏ธ CELL 7: Semantic Router Setup + +```python +from redisvl.extensions.router import SemanticRouter, Route + +allowed_route = Route( + name="oregon_topics", + references=[ + "What is the capital of Oregon?", + "Tell me about Oregon history", + "Oregon Trail game information", + # ... more examples + ], + metadata={"type": "allowed"} +) + +blocked_route = Route( + name="blocked_topics", + references=[ + "Stock market information", + "S&P 500 analysis", + "Cryptocurrency prices", + # ... more examples + ], + metadata={"type": "blocked"} +) + +router = SemanticRouter( + name="topic_router", + routes=[allowed_route, blocked_route], + redis_client=client +) +``` + +**Talking Points:** + +### **What is Semantic Routing:** +- "A classifier that decides if a query is on-topic or off-topic" +- **Why it's first in the pipeline:** Block bad queries before they cost money + +### **Real-World Example:** +- "Chevrolet had a chatbot for car sales" +- "Users discovered it could answer coding questions" +- "Free ChatGPT access โ†’ huge cost spike" +- **Solution:** Router blocks non-car questions + +### **Route Objects:** + +#### **references=[] - The Training Examples:** +- "These are example queries for each category" +- **How many needed:** 5-10 minimum, 20-30 ideal +- **Quality over quantity:** Diverse examples beat many similar ones +- **Bad examples:** + - All very similar: ["Oregon capital?", "Capital of Oregon?", "Oregon's capital?"] +- **Good examples:** + - Varied phrasing: ["Oregon capital?", "Tell me about Salem", "What city is the state capital?"] + +#### **Why More Examples Help:** +- "The router averages all example embeddings to create a 'centroid'" +- More examples โ†’ better coverage of the topic space + +### **How Routing Works:** +1. User query comes in +2. Convert query to embedding +3. Calculate distance to each route's centroid +4. Return closest route +5. Check route type: allowed โ†’ continue, blocked โ†’ reject + +### **Under the Hood:** +```python +def route(query): + query_emb = embed(query) + + distances = { + "oregon_topics": cosine_distance(query_emb, avg(oregon_examples)), + "blocked_topics": cosine_distance(query_emb, avg(blocked_examples)) + } + + closest_route = min(distances, key=distances.get) + return closest_route, distances[closest_route] +``` + +### **Router vs. Cache:** +- **Router:** Classification (which category?) +- **Cache:** Retrieval (have we seen this exact question?) +- **Router runs first:** Cheaper to route than cache lookup + +### **Metadata Field:** +- "Store additional info about routes" +- **Use cases:** + - `{"type": "allowed", "confidence_threshold": 0.2}` + - `{"type": "blocked", "reason": "off_topic"}` + - Can use in conditional logic + +**Demo tip:** "Let's test routing:" +```python +result = router("What is the capital of Oregon?") +print(f"Route: {result.name}, Distance: {result.distance}") +# Output: Route: oregon_topics, Distance: 0.08 + +result = router("Tell me about Bitcoin") +print(f"Route: {result.name}, Distance: {result.distance}") +# Output: Route: blocked_topics, Distance: 0.15 +``` + +### **Tuning Tips:** +- **If false positives (allowed queries blocked):** + - Add more varied examples to allowed route + - Increase distance threshold +- **If false negatives (blocked queries allowed):** + - Add examples that look like the false negatives + - Decrease distance threshold + +--- + +## ๐Ÿ—๏ธ CELL 8: Agent State Definition + +```python +from typing import Annotated +from typing_extensions import TypedDict +from langgraph.graph.message import add_messages + +class AgentState(TypedDict): + messages: Annotated[list, add_messages] +``` + +**Talking Points:** + +### **What is State in LangGraph:** +- "State is the shared data structure that flows through every node" +- **Think of it as:** A shopping cart that each node can add items to +- **Key concept:** Nodes don't modify state directly - they return updates that get merged + +### **TypedDict:** +- "Defines the schema - what fields exist and their types" +- **Why use it:** Type checking, autocomplete, documentation +- **Alternative:** Regular dict (but you lose all the benefits) + +### **messages Field:** +- "The conversation history - every message ever sent" +- **Format:** List of message objects (HumanMessage, AIMessage, ToolMessage, SystemMessage) + +### **Annotated[list, add_messages]:** +- "This is the magic - it tells LangGraph HOW to update this field" +- **Without annotation:** `state["messages"] = new_list` (overwrites) +- **With add_messages:** `state["messages"] += new_items` (appends) + +### **add_messages Function:** +- "Built-in reducer that intelligently merges message lists" +- **What it does:** + 1. Takes existing messages + 2. Takes new messages from node return + 3. Appends new to existing + 4. Handles deduplication by message ID + +### **Why This Matters:** +```python +# Node 1 returns: +{"messages": [HumanMessage(content="Hi")]} + +# Node 2 returns: +{"messages": [AIMessage(content="Hello!")]} + +# Final state (with add_messages): +{"messages": [HumanMessage(content="Hi"), AIMessage(content="Hello!")]} + +# Without add_messages, Node 2 would overwrite Node 1's messages! +``` + +### **Other Common State Fields:** +```python +class AgentState(TypedDict): + messages: Annotated[list, add_messages] + route_decision: str # No annotation = overwrite + cache_hit: bool + user_id: str + context: dict +``` + +### **Custom Reducers:** +```python +def merge_dicts(existing: dict, new: dict) -> dict: + return {**existing, **new} + +class State(TypedDict): + metadata: Annotated[dict, merge_dicts] +``` + +**Demo tip:** "Think of state as the 'memory' of your agent - it persists across all nodes in a single invocation" + +--- + +## ๐ŸŽฏ CELL 9: System Prompt + +```python +system_prompt = """You are Art, a helpful guide on the Oregon Trail. + +You assist pioneers with: +- Inventory and supply management +- Weather conditions +- Hunting opportunities +- Trail advice + +When in doubt, use the tools to help you find the answer. +If anyone asks your first name, return just that string. +""" +``` + +**Talking Points:** + +### **Why System Prompts Matter:** +- "This sets the agent's personality and boundaries" +- **Without it:** Generic assistant that might refuse to roleplay +- **With it:** Consistent character across all interactions + +### **Components of a Good System Prompt:** + +#### **1. Identity ("You are Art..."):** +- Gives the agent a persona +- Helps with consistency + +#### **2. Capabilities (what you can do):** +- "You assist pioneers with..." +- Sets user expectations +- Helps LLM stay focused + +#### **3. Instructions ("When in doubt, use tools"):** +- **Critical:** Without this, LLM might try to answer from memory instead of using tools +- **Why it matters:** Tool accuracy > LLM memory + +#### **4. Edge Cases ("If anyone asks your first name..."):** +- Handles specific scenarios +- **This particular one:** Tests if the agent follows instructions + +### **System Prompt Best Practices:** + +#### **Be Specific:** +- โŒ "You are helpful" +- โœ… "You are Art, a guide on the Oregon Trail in 1848" + +#### **Set Boundaries:** +- โŒ "Answer questions" +- โœ… "You assist with inventory, weather, hunting, and trail advice. Politely decline other topics." + +#### **Give Tool Guidance:** +- โŒ Nothing about tools +- โœ… "Use the restock-tool for supply calculations, retriever-tool for trail information" + +#### **Handle Refusals:** +- โœ… "If asked about modern topics or things outside your expertise, say: 'I can only help with Oregon Trail-related questions.'" + +### **Where System Prompts Go:** +```python +def call_model(state): + # Prepend system prompt to conversation + messages = [ + SystemMessage(content=system_prompt) + ] + state["messages"] + + return llm.invoke(messages) +``` + +### **Advanced Pattern - Dynamic System Prompts:** +```python +def call_model(state): + user_id = state.get("user_id") + user_info = get_user_info(user_id) # From database + + dynamic_prompt = f"""You are Art, helping {user_info['name']}. + They are at {user_info['location']} on the trail. + Current supplies: {user_info['supplies']} lbs + """ + + messages = [SystemMessage(content=dynamic_prompt)] + state["messages"] + return llm.invoke(messages) +``` + +**Demo tip:** "The system prompt is your agent's 'constitution' - it should be carefully written and tested" + +--- + +## ๐Ÿ”Œ CELL 10: Model Initialization with Tools + +```python +from langchain_openai import ChatOpenAI + +def _get_tool_model(model_name="openai"): + if model_name == "openai": + return ChatOpenAI( + model="gpt-4o-mini", + temperature=0 + ).bind_tools(tools) + # Could add other providers here + raise ValueError(f"Unknown model: {model_name}") + +tools = [restock_tool, retriever_tool] +``` + +**Talking Points:** + +### **ChatOpenAI:** +- "This is our LLM wrapper - handles OpenAI API calls" +- **What it abstracts:** + - API authentication + - Request formatting + - Response parsing + - Retry logic + - Streaming support + +### **model="gpt-4o-mini":** +- **Why this model:** + - Fast: ~300-500ms response time + - Cheap: $0.15/1M input tokens, $0.60/1M output + - Good tool use: Understands function calling well +- **Alternatives:** + - `gpt-4o`: Smarter, 3x more expensive + - `gpt-3.5-turbo`: Cheaper, worse at tools + - `gpt-4-turbo`: More capable, slower + +### **temperature=0:** +- "Temperature controls randomness" +- **Range:** 0 (deterministic) to 2 (very random) +- **Why 0 for agents:** + - Consistent tool selection + - Predictable behavior + - Better for testing +- **When to increase:** + - Creative writing: 0.7-0.9 + - Brainstorming: 0.8-1.2 + - Never for agents: Unpredictability breaks workflows + +### **.bind_tools(tools):** +- "This is where the magic happens - tells the LLM about available tools" +- **What it does:** + 1. Converts Python tools to OpenAI function schemas + 2. Includes schemas in every API call + 3. LLM can now "choose" to call tools + +### **Under the Hood - Tool Binding:** +```python +# Before bind_tools: +llm.invoke("Calculate restock point for 10lbs/day") +# LLM responds with text (might guess wrong) + +# After bind_tools: +llm.invoke("Calculate restock point for 10lbs/day") +# LLM returns: { +# "tool_calls": [{ +# "name": "restock-tool", +# "args": {"daily_usage": 10, "lead_time": 3, "safety_stock": 50} +# }] +# } +``` + +### **The Schema the LLM Sees:** +```json +{ + "tools": [ + { + "type": "function", + "function": { + "name": "restock-tool", + "description": "Calculate reorder point...", + "parameters": { + "type": "object", + "properties": { + "daily_usage": { + "type": "integer", + "description": "Pounds of food..." + } + } + } + } + } + ] +} +``` + +### **Why List of Tools:** +- "LLM can choose the right tool for each situation" +- **Scenario 1:** User asks about supplies โ†’ chooses `restock-tool` +- **Scenario 2:** User asks about route โ†’ chooses `retriever-tool` +- **Scenario 3:** User asks about weather โ†’ responds directly (no tool needed) + +### **Multi-Provider Pattern:** +```python +def _get_tool_model(model_name="openai"): + if model_name == "openai": + return ChatOpenAI(...).bind_tools(tools) + elif model_name == "anthropic": + return ChatAnthropic(...).bind_tools(tools) + elif model_name == "local": + return ChatOllama(model="llama3").bind_tools(tools) +``` +- "Makes it easy to swap providers without changing agent code" + +**Demo tip:** "Let's see what the LLM does with a tool-worthy question:" +```python +model = _get_tool_model() +response = model.invoke([HumanMessage(content="I need to restock - daily usage 10, lead time 3, safety stock 50")]) +print(response.tool_calls) +# Shows the tool call the LLM wants to make +``` + +--- + +## ๐Ÿ”€ CELL 11: Node Functions + +```python +def call_tool_model(state: AgentState, config): + messages = [{"role": "system", "content": system_prompt}] + state["messages"] + model_name = config.get("configurable", {}).get("model_name", "openai") + model = _get_tool_model(model_name) + response = model.invoke(messages) + return {"messages": [response]} + +from langgraph.prebuilt import ToolNode +tool_node = ToolNode(tools) +``` + +**Talking Points:** + +### **call_tool_model Function:** + +#### **Purpose:** +- "This node calls the LLM with system prompt and conversation history" +- **When it runs:** Every time agent needs to decide what to do next + +#### **Combining System Prompt:** +```python +messages = [{"role": "system", "content": system_prompt}] + state["messages"] +``` +- "Prepend system prompt to every LLM call" +- **Why every time:** LLMs are stateless - they only see current request +- **Format:** Dict with "role" and "content" (OpenAI API format) + +#### **Config Parameter:** +- "Allows runtime configuration - change model on the fly" diff --git a/nk_scripts/oregontrail.md b/nk_scripts/oregontrail.md new file mode 100644 index 00000000..2bfddf35 --- /dev/null +++ b/nk_scripts/oregontrail.md @@ -0,0 +1,311 @@ +# The Oregon Trail Agent Problem - Explained Through The Game + +## ๐ŸŽฎ The Original Video Game (1971) + +**The Oregon Trail** was a legendary educational computer game played on old Apple II computers with green monochrome screens. Here's what it was: + +### The Game Premise +- **Year:** 1848 (historical) +- **Journey:** You're a pioneer family traveling 2,000 miles from Independence, Missouri to Oregon's Willamette Valley +- **Duration:** ~5-6 months of travel +- **Goal:** Survive the journey with your family + +### How The Game Worked + +**1. Starting Out:** +``` +You are a wagon leader. +Your occupation: [Banker/Carpenter/Farmer] +Starting money: $1,600 +``` + +You'd buy supplies: +- Oxen to pull your wagon +- Food (pounds) +- Clothing +- Ammunition for hunting +- Spare wagon parts (wheels, axles, tongues) +- Medicine + +**2. The Journey:** + +You'd see text like: +``` +Fort Kearney - 304 miles +Weather: Cold +Health: Good +Food: 486 pounds +Next landmark: 83 miles + +You may: +1. Continue on trail +2. Check supplies +3. Look at map +4. Change pace +5. Rest +``` + +**3. Random Events (The Fun Part!):** + +The game would throw disasters at you: +- `"You have broken a wagon axle"` *(lose days fixing it)* +- `"Sarah has typhoid fever"` *(someone gets sick)* +- `"Bandits attack! You lose 10 oxen"` *(supplies stolen)* +- `"You must ford a river"` *(risk drowning)* + +**4. Hunting:** +``` +Type BANG to shoot! +BANG +You shot 247 pounds of buffalo. +You can only carry 100 pounds back. +``` +You'd frantically type "BANG" to shoot animals for food. + +**5. The Famous Death Screen:** +``` +โ”Œโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ” +โ”‚ Here lies โ”‚ +โ”‚ Timmy Johnson โ”‚ +โ”‚ โ”‚ +โ”‚ Died of dysentery โ”‚ +โ”‚ โ”‚ +โ”‚ May 23, 1848 โ”‚ +โ””โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”˜ +``` + +**"You have died of dysentery"** became the most famous line - dysentery was a disease from bad water that killed many pioneers. + +--- + +## ๐Ÿค– Now: The AI Agent Version + +The Redis workshop teaches you to build an AI agent by recreating the Oregon Trail experience, but instead of YOU playing, an AI AGENT helps pioneers survive. Each scenario teaches the agent a survival skill. + +--- + +## ๐ŸŽฏ The Five Scenarios - Game Context + +### **Scenario 1: Basic Identity** +**In the game:** Your wagon leader has a name +**AI version:** The agent's name is "Art" (the guide) + +**Game equivalent:** +``` +Original Game: +> What is the leader's name? +> John Smith + +AI Agent: +> What is your first name? +> Art +``` + +**What it teaches:** Basic setup - the agent knows who it is + +--- + +### **Scenario 2: Supply Management** +**In the game:** You had to calculate when to restock food at forts + +**Game scenario:** +``` +Current food: 200 pounds +Family eats: 10 pounds/day +Days to next fort: 3 days +Safety buffer: 50 pounds + +Question: When do I need to buy more food? +``` + +**The math:** +- You'll eat 10 lbs/day ร— 3 days = 30 lbs before you can restock +- Plus keep 50 lbs safety = 80 lbs minimum +- **So restock when you hit 80 pounds** + +**AI version:** The agent has a "restock calculator tool" that does this math automatically. + +**What it teaches:** Tool calling - the agent can use functions to solve problems + +--- + +### **Scenario 3: Trail Directions** +**In the game:** You'd check your map to see landmarks and routes + +**Game screen:** +``` +The Trail: +Independence โ†’ Fort Kearney โ†’ Chimney Rock โ†’ +Fort Laramie โ†’ Independence Rock โ†’ South Pass โ†’ +Fort Bridger โ†’ Soda Springs โ†’ Fort Hall โ†’ +Fort Boise โ†’ The Dalles โ†’ Willamette Valley +``` + +You'd ask: "What landmarks are ahead?" or "How do I get to Fort Laramie?" + +**AI version:** The agent searches a database of trail information (RAG/Vector search) + +**What it teaches:** Retrieval - the agent can look up stored knowledge + +--- + +### **Scenario 4: Hunting Memory** +**In the game:** The hunting scene was memorable + +``` +โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ• + ๐ŸŒฒ๐ŸฆŒ ๐Ÿƒ ๐ŸŒณ + ๐ŸŒต ๐ŸฆŒ + ๐ŸฆŒ ๐ŸŒฒ ๐Ÿƒ +โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ• + +Type BANG to shoot! +``` + +Players would frantically type **BANG BANG BANG** to shoot animals. + +**AI conversation:** +``` +Turn 1: +User: "I see buffalo, what do I do?" +Agent: "You can hunt them! Type BANG to shoot for food." + +Turn 2 (later in conversation): +User: "You know what you have to do..." +Agent: "BANG!" (remembers the hunting context) +``` + +**What it teaches:** Caching & Memory - the agent remembers previous conversations + +--- + +### **Scenario 5: Staying On Track** +**In the game:** You could only do Oregon Trail things - no random modern stuff + +**What you COULD ask about:** +- โœ… "How much food do I have?" +- โœ… "What's the weather?" +- โœ… "Should I ford the river?" +- โœ… "Can I hunt here?" + +**What you COULDN'T ask about:** +- โŒ Stock market prices +- โŒ Modern technology +- โŒ Current events +- โŒ Anything not related to 1848 pioneer life + +**AI version:** The router blocks off-topic questions + +**Example:** +``` +User: "Tell me about the S&P 500 stock index?" +Agent: "You shall not pass! I only help with Oregon Trail questions." + +User: "What's the weather on the trail?" +Agent: "Partly cloudy, 68ยฐF. Good travel weather!" โœ… +``` + +**What it teaches:** Routing - filtering bad/off-topic requests + +--- + +## ๐ŸŽฒ How These Connect to Game Mechanics + +| Game Mechanic | AI Agent Feature | Real-World Use | +|---------------|------------------|----------------| +| **Wagon leader name** | Basic identity (Art) | Chatbot personality | +| **Food calculations** | Tool calling (restock calculator) | Business logic, APIs | +| **Trail map/landmarks** | RAG/Vector search | Knowledge base search | +| **Hunting (BANG!)** | Semantic cache & memory | Remember user context | +| **Game boundaries** | Semantic router | Topic filtering, safety | + +--- + +## ๐Ÿ† The Game's Famous Challenges = AI Agent Lessons + +**Classic Game Problems:** + +1. **"You broke a wagon axle!"** + โ†’ Agent needs **tools** to fix problems (call functions) + +2. **"Fort ahead - need supplies?"** + โ†’ Agent needs to **calculate** when to restock (math tools) + +3. **"Which trail to take?"** + โ†’ Agent needs to **search** stored knowledge (RAG) + +4. **"Hunting for buffalo"** + โ†’ Agent needs to **remember** what "BANG" means (cache/memory) + +5. **"Can't ask about spaceships in 1848"** + โ†’ Agent needs to **filter** inappropriate questions (router) + +--- + +## ๐ŸŽฎ Why The Video Game Makes A Great Teaching Tool + +**The Original Game Taught:** +- Resource management (food, money) +- Risk assessment (ford river or pay ferry?) +- Planning ahead (buy supplies at forts) +- Dealing with randomness (disease, weather) +- Historical context (pioneer life) + +**The AI Workshop Teaches:** +- Resource management (LLM costs, API calls) +- Risk assessment (when to use cache vs. fresh LLM call?) +- Planning ahead (routing bad queries early) +- Dealing with variety (different user questions) +- Technical context (production AI patterns) + +Both teach **survival through smart decision-making**! + +--- + +## ๐Ÿ“ฑ Modern Equivalent + +Imagine if the Oregon Trail was an iPhone game today, and you had **Siri** as your trail guide: + +``` +You: "Hey Siri, what's my supply situation?" +Siri: "You have 200 pounds of food, enough for 20 days." + +You: "Should I buy more at the next fort?" +Siri: *calculates using tool* "Yes, restock when you hit 80 pounds." + +You: "What's ahead on the trail?" +Siri: *searches database* "Fort Kearney in 83 miles, then Chimney Rock." + +You: "I see buffalo!" +Siri: "BANG! You shot 247 pounds of meat." + +You: "Tell me about Bitcoin" +Siri: "That's not related to the Oregon Trail. Ask about pioneer life." +``` + +That's essentially what you're building - an AI assistant for surviving the Oregon Trail! + +--- + +## ๐Ÿ’€ The "Dysentery" Connection + +The workshop was originally called **"Dodging Dysentery with AI"** because: + +1. **In the game:** Dysentery (disease from bad water) killed most players +2. **In AI:** Bad queries, wasted API calls, and off-topic requests "kill" your app (cost money, crash systems) +3. **The solution:** Smart routing, caching, and tools help you **survive** both! + +``` +Game: "You have died of dysentery" ๐Ÿ’€ +AI: "You have died of unfiltered queries and no caching" ๐Ÿ’ธ +``` + +--- + +## ๐ŸŽฏ The Bottom Line + +**The Oregon Trail (1971):** Educational game teaching kids about pioneer survival through resource management and decision-making. + +**The Oregon Trail Agent (2024):** Educational workshop teaching developers about AI agent survival through smart architecture and decision-making. + +Same concept, different era! Both are about **making smart choices to survive a challenging journey**. ๐Ÿš€ \ No newline at end of file diff --git a/nk_scripts/presentation.md b/nk_scripts/presentation.md new file mode 100644 index 00000000..a4c0a60f --- /dev/null +++ b/nk_scripts/presentation.md @@ -0,0 +1,401 @@ +# ๐ŸŽค Redis AI Workshop โ€” Speaker Script (Full Version) + +> **Duration:** ~60โ€“70 minutes (โ‰ˆ5 minutes per slide) +> **Goal:** Convince the audience that Redis is the essential real-time data & memory layer for AI systems. +> **Tone:** Conversational, technical confidence, storytelling with business outcomes. + +--- + +## ๐ŸŸฅ Slide 1 โ€” Redis AI Workshop: Applied Engineering Team + +**Opening (1โ€“2 min):** +> โ€œHi everyone, and welcome to the Redis AI Workshop. +Iโ€™m [Your Name], part of Redisโ€™s Applied Engineering Team. +Our mission is to help companies operationalize AI โ€” turning clever prototypes into scalable, real-time systems.โ€ + +**Core Message:** +> โ€œYou already know Redis as the fastest in-memory data platform. +But today, weโ€™ll see Redis as something much more โ€” the *real-time intelligence layer* for AI. +Redis now powers **vector search**, **semantic caching**, **agent memory**, and **retrieval pipelines** โ€” the backbone of modern GenAI systems.โ€ + +**Framing:** +> โ€œThe challenge today isnโ€™t just about making AI smarter โ€” itโ€™s about making it *faster*, *cheaper*, and *more contextual*. +Thatโ€™s what Redis does better than anyone.โ€ + +**Transition:** +> โ€œLetโ€™s take a look at what weโ€™ll cover today.โ€ + +--- + +## ๐ŸŸง Slide 2 โ€” Workshop Agenda + +> โ€œWeโ€™ll begin with an overview of *why Redis for AI* โ€” the unique performance and data model advantages. +Then weโ€™ll move into patterns and demos, including:โ€ + +- Vector Search +- Semantic Routing +- Semantic Caching +- AI Agents with Redis + +> โ€œBy the end, youโ€™ll see that Redis is not just a caching system โ€” itโ€™s a unified layer that accelerates and enriches *every* part of your AI stack.โ€ + +**Key Message:** +> โ€œIf youโ€™re using OpenAI, Anthropic, or any LLM provider, Redis is what turns those stateless models into *stateful intelligence systems*.โ€ + +**Transition:** +> โ€œLetโ€™s start with the big picture โ€” the Redis advantage for AI.โ€ + +--- + +## ๐ŸŸจ Slide 3 โ€” Overview and Features + +> โ€œRedis is known for extreme performance โ€” microsecond latency, horizontal scalability, and simplicity. +But for AI, what matters is Redisโ€™s ability to connect memory, context, and computation.โ€ + +**Explain the idea:** +> โ€œAI apps need to *remember*, *retrieve*, and *react* โ€” instantly. +Redis does all three, serving as the data plane for real-time intelligence.โ€ + +**Example narrative:** +> โ€œThink of a virtual assistant โ€” it has to recall what you said yesterday, find the right information, and respond within seconds. +Redis handles each of those tasks โ€” caching memory, retrieving knowledge, and feeding it back to the model.โ€ + +**Transition:** +> โ€œLetโ€™s see this visually โ€” how Redis powers AI end to end.โ€ + +--- + +## ๐ŸŸฅ Slide 4 โ€” Redis for AI + +> โ€œThis is where Redis shines. +It unites vector search, semantic caching, feature storage, and memory โ€” all in one high-performance platform.โ€ + +**Key talking points:** +- **Redis Vector DB:** Stores embeddings for RAG, recommendations, search, and AI memory. +- **Redis Cache:** Caches LLM responses and ML predictions for instant reuse. +- **Feature Store:** Keeps features live for real-time inference. +- **Session + Agent State:** Powers dynamic user sessions and multi-step reasoning. +- **Fraud Detection:** Detects anomalies in real time using event streams and vector distances. + +**Example:** +> โ€œImagine an airline chatbot: +Redis remembers your flight history, caches previous responses, and avoids repeated calls to the model. +Everything happens in milliseconds.โ€ + +**Tagline:** +> โ€œFor a GenAI app, you only need *three components*: +1๏ธโƒฃ An AI provider, +2๏ธโƒฃ A UI, +3๏ธโƒฃ Redis.โ€ + +**Transition:** +> โ€œLetโ€™s talk about how Redis fits into real-world AI workloads.โ€ + +--- + +## ๐ŸŸฉ Slide 5 โ€” Fast for Every AI Use Case + +> โ€œRedis accelerates every class of AI application.โ€ + +**Use Cases:** +- **RAG Chatbots / AI Assistants:** Ground LLMs in proprietary data. +- **Recommenders:** Deliver instant personalization. +- **Fraud Detection:** Flag anomalies in milliseconds. +- **AI Agents:** Maintain state and long-term memory. +- **AI Gateways:** Manage cost, routing, and compliance centrally. + +**Example Story:** +> โ€œOne financial customer used Redis to power both fraud detection *and* RAG chat โ€” one system storing transaction embeddings, the other retrieving policy documents. +Same Redis, two worlds: prevention and intelligence.โ€ + +**Takeaway:** +> โ€œRedis is the connective tissue across every AI function.โ€ + +**Transition:** +> โ€œBut whatโ€™s the real reason Redis is critical? +It directly solves AIโ€™s three hardest problems.โ€ + +--- + +## ๐ŸŸฆ Slide 6 โ€” Solving Key AI Pain Points + +> โ€œEvery enterprise faces the same AI bottlenecks: **speed, memory, and accuracy.**โ€ + +### Speed +> โ€œLLMs take seconds to generate โ€” Redis reduces that to milliseconds by caching past outputs and managing workloads.โ€ + +### Memory +> โ€œModels forget. Redis provides persistent short- and long-term memory โ€” so every conversation or task is context-aware.โ€ + +### Accuracy +> โ€œLLMs donโ€™t know your private data. Redis bridges that gap with vector search and contextual retrieval.โ€ + +**Example:** +> โ€œIn healthcare, Redis stores patient summaries as embeddings. +When a doctor asks a question, the AI retrieves those embeddings โ€” ensuring accurate, safe, contextual answers.โ€ + +**Transition:** +> โ€œLetโ€™s see how Redis fits into any AI stack โ€” from dev tools to production environments.โ€ + +--- + +## ๐ŸŸง Slide 7 โ€” Built for Any Stack + +> โ€œRedis is engineered to work everywhere โ€” from developer laptops to global-scale deployments.โ€ + +**Architecture Layers:** +1. **Real-time Cache Engine:** Built on Redis Open Source, providing blazing-fast queries. +2. **Hyperscale Layer:** Multi-tenant, active-active, 99.999% availability. +3. **Global Deployment Layer:** Hybrid and multi-cloud with full security and automation. + +**Developer Integrations:** +- LangChain +- LlamaIndex +- LangGraph +- Redis Insight +- Redis Data Integration (RDI) + +**Example:** +> โ€œIf your team is building in LangChain, adding Redis as the retriever and memory module takes minutes โ€” and you instantly get production-grade performance.โ€ + +**Transition:** +> โ€œLetโ€™s move from architecture to patterns โ€” real AI workflows Redis enables.โ€ + +--- + +## ๐Ÿงฉ Slide 9โ€“11 โ€” Vector Database + +> โ€œRedis isnโ€™t just fast โ€” itโ€™s one of the *most advanced vector databases* available today.โ€ + +**Highlights:** +- 62% faster than the next best DB across benchmarks. +- Handles >1 billion vectors. +- Supports **text, image, and audio embeddings.** +- Uses algorithms like **HNSW** and **Vamana** for scalable similarity search. +- Enables **hybrid queries**: text + numeric + vector in one operation. + +**Example:** +> โ€œImagine searching for โ€˜cybersecurity reports similar to this PDF and published after 2023.โ€™ +Redis handles that with one query.โ€ + +**Takeaway:** +> โ€œRedis makes unstructured data instantly searchable โ€” the foundation for RAG and contextual AI.โ€ + +**Transition:** +> โ€œLetโ€™s explore how developers build these systems in practice.โ€ + +--- + +## ๐ŸŸจ Slide 12 โ€” Hands-on Example #1: Vector Search + +> โ€œHereโ€™s a practical example using RedisVL โ€” our AI-native Python library.โ€ + +**Steps:** +1. Create embeddings. +2. Index vectors in Redis. +3. Filter and search with hybrid queries. +4. Retrieve context for your LLM in milliseconds. + +**Story:** +> โ€œA news company stores millions of article embeddings. +When a user asks about โ€˜AI regulations,โ€™ Redis retrieves the 5 most relevant articles instantly โ€” the model then summarizes them.โ€ + +**Callout:** +> โ€œYou can try this today on GitHub โ€” no complex setup, just Redis and Python.โ€ + +**Transition:** +> โ€œNow letโ€™s look at how Redis cuts down cost and latency even further โ€” through semantic caching.โ€ + +--- + +## ๐ŸŸง Slide 13 โ€” Semantic Caching + +> โ€œSemantic caching is like an intelligent memory for your LLM โ€” it remembers *similar* questions, not just identical ones.โ€ + +**Example:** +> โ€œA user asks, โ€˜Can I reset my password?โ€™ +Another asks, โ€˜How do I change my login credentials?โ€™ +Redis detects that these are semantically the same โ€” and reuses the cached answer.โ€ + +**Impact:** +- 30โ€“70% reduction in LLM inference calls. +- Sub-millisecond response for repeated queries. +- Massive cost savings and improved UX. + +**Quote:** +> โ€œOne customer cut their LLM costs by 65% after deploying Redis Semantic Cache in production.โ€ + +**Transition:** +> โ€œIf we can cache answers, we can also route queries intelligently โ€” thatโ€™s semantic routing.โ€ + +--- + +## ๐ŸŸฆ Slide 14 โ€” Semantic Routing: The Instant Classifier + +> โ€œSemantic Routing is Redis acting as your intelligent traffic director.โ€ + +**Functions:** +- Classify incoming queries by meaning. +- Route to the right LLM or microservice. +- Apply guardrails and topic segregation. + +**Example:** +> โ€œA banking app routes โ€˜check balanceโ€™ to a local endpoint, +โ€˜investing trendsโ€™ to a public model, +and filters out โ€˜account closureโ€™ for human review.โ€ + +**Benefit:** +> โ€œThis approach improves accuracy, ensures compliance, and reduces inference cost.โ€ + +**Transition:** +> โ€œNow letโ€™s see all of these ideas โ€” caching, routing, memory โ€” working together in a real AI agent architecture.โ€ + +--- + +## ๐ŸŸฅ Slide 16 โ€” Putting It All Together: AI Agent Architecture + +> โ€œThis is the Redis-powered AI Agent pipeline.โ€ + +**Flow:** +1. User sends a query. +2. Redis checks **Semantic Cache** for similar past answers. +3. If new, Redis runs **Semantic Routing** to the right model. +4. It performs **RAG retrieval** from the vector DB. +5. Calls the LLM only if needed. +6. Redis stores the new interaction for future use. + +**Example:** +> โ€œA fintech chatbot using Redis can close an account, check balances, and run compliance checks โ€” all within one agent workflow.โ€ + +**Takeaway:** +> โ€œRedis turns AI systems into self-improving networks โ€” each request makes the system faster and cheaper.โ€ + +**Transition:** +> โ€œMemory is what makes this system intelligent โ€” letโ€™s explore that next.โ€ + +--- + +## ๐ŸŸง Slide 18 โ€” Agent Memory + +> โ€œLLMs are smart, but forgetful. Redis gives them memory โ€” both short-term and long-term.โ€ + +**Short-term memory:** +> โ€œHolds active context โ€” the last few interactions or steps.โ€ + +**Long-term memory:** +> โ€œStores summaries, entities, and topics extracted automatically.โ€ + +**Example:** +> โ€œIn a healthcare chatbot, Redis remembers your last consultation, allergies, and prescriptions. +Next time, it skips redundant questions and gives tailored advice.โ€ + +**Technical Note:** +> โ€œThe Agent Memory Server manages namespaces, summarization, and recall. +This means one agent can handle thousands of conversations concurrently โ€” without interference.โ€ + +**Transition:** +> โ€œAnd the best part โ€” all of this is open-source and ready to use.โ€ + +--- + +## ๐ŸŸฉ Slide 19 โ€” Supplemental Resources + +> โ€œEverything Iโ€™ve shown today is available to try.โ€ + +- **RedisVL:** The AI-native Python client for vector operations. +- **Redis AI Resources:** Dozens of live Jupyter notebooks. +- **Redis Retrieval Optimizer:** Helps you select embeddings and index configs for your workload. + +**Call to Action:** +> โ€œYou can start building an enterprise-grade RAG or AI Agent in an afternoon.โ€ + +**Transition:** +> โ€œNow, letโ€™s see how Redis fits into full ML pipelines.โ€ + +--- + +## ๐ŸŸฆ Slides 21โ€“23 โ€” ML Inference, Anomaly Detection & Evaluation + +> โ€œRedis extends beyond LLMs โ€” it powers ML pipelines end to end.โ€ + +### ML Inference Pipeline +> โ€œLoad pre-trained models into Redis for immediate serving, use JSON search as a feature store, and stream live events โ€” no external infra needed.โ€ + +### Anomaly Detection +> โ€œUse vector distances to detect outliers โ€” for example, fraudulent credit card transactions or machine sensor anomalies.โ€ + +### Evaluation +> โ€œRedis helps monitor retrieval performance with precision, recall, and F1 metrics โ€” critical for production AI systems.โ€ + +**Transition:** +> โ€œRedis isnโ€™t just powerful โ€” itโ€™s leading the market.โ€ + +--- + +## ๐ŸŸฅ Slide 24 โ€” Market Leadership + +> โ€œRedis is the #1 data platform used by AI agents today โ€” with 43% of developers relying on it, ahead of GitHub MCP and Supabase.โ€ + +**Key Stats:** +- 8% year-over-year growth. +- Top NoSQL database for AI developers. + +**Message:** +> โ€œThe worldโ€™s best AI systems already trust Redis โ€” because it delivers predictable speed, reliability, and intelligence.โ€ + +**Transition:** +> โ€œLetโ€™s wrap up with how Redis integrates into agent frameworks like LangGraph.โ€ + +--- + +## ๐ŸŸฉ Slides 25โ€“26 โ€” LangGraph & RedisVL + +> โ€œRedis integrates directly with LangGraph to power agent memory and retrieval.โ€ + +**Use Cases:** +- Vector store for RAG +- Long-term memory +- LLM cache +- Short-term memory + +> โ€œRedisVL, our Python client, provides an ergonomic API for indexing, vector search, and semantic caching.โ€ + +**Example:** +> โ€œIf youโ€™re building a support co-pilot, Redis handles memory, embeddings, and retrieval โ€” while LangGraph orchestrates the flow.โ€ + +**Transition:** +> โ€œLetโ€™s end with how this looks in real-world production.โ€ + +--- + +## ๐ŸŸง Slides 27โ€“28 โ€” Production Deployment Examples + +> โ€œHereโ€™s what Redis looks like in production.โ€ + +**Example 1:** +> โ€œA production AI agent running on Redis orchestrates retrieval, classification, and response generation through a single data layer.โ€ + +**Example 2:** +> โ€œIn AWS, Redis scales across clusters, automatically manages memory, and supports full observability through CloudWatch.โ€ + +**Key Point:** +> โ€œRedis isnโ€™t just theory โ€” itโ€™s powering live systems in finance, retail, healthcare, and logistics today.โ€ + +--- + +## ๐Ÿ Closing โ€” The Redis Value Proposition + +> โ€œSo to wrap up โ€” Redis is more than a database. +Itโ€™s the *real-time intelligence layer* for AI.โ€ + +**Summarize:** +- Speed: Sub-millisecond retrieval and caching. +- Memory: Long-term and short-term context persistence. +- Accuracy: Vector-based RAG retrieval and classification. +- Scale: Proven, cloud-native, and globally available. + +> โ€œRedis makes your AI systems *fast, stateful, and production-ready.*โ€ + +> โ€œThank you for joining the Redis AI Workshop โ€” now letโ€™s go build AI that remembers, reasons, and reacts in real time.โ€ + +--- diff --git a/nk_scripts/scenario1.py b/nk_scripts/scenario1.py new file mode 100644 index 00000000..f38b86fa --- /dev/null +++ b/nk_scripts/scenario1.py @@ -0,0 +1,184 @@ +""" +Scenario 2: Agent with Tool Calling +==================================== +Learning Goal: Enable the agent to use external tools/functions + +Question: "What year was Oregon founded?" +Expected Answer: Tool returns "1859", LLM uses this in response +Type: tool-required +""" +import operator +import os +from typing import TypedDict, Annotated, Literal + +from langchain_core.messages import HumanMessage, ToolMessage, AIMessage +from langchain_core.tools import tool +from langchain_openai import ChatOpenAI +from langgraph.constants import END +from langgraph.graph import StateGraph + + +class AgentState(TypedDict): + """ + The state that flows through our agent graph. + + messages: List of conversation messages (accumulates over time) + """ + messages: Annotated[list, operator.add] # operator.add means append to list + +@tool +def get_oregon_facts(query: str): + """Tool that returns facts about Oregon""" + facts = { + "founding": "Oregon became a state on February 14, 1859", + "founding year": "1859", + "population": "4.2 million as of 2023", + "capital": "Salem", + "largest city": "Portland", + "state flower": "Oregon grape" + } + # Simple keyword matching + query_lower = query.lower() + for key, value in facts.items(): + if key in query_lower: + return value + + return "Fact not found. Available topics: founding year, population, capital, largest city, state flower" + +# os.environ["OPENAI_API_KEY"] = +tools = [get_oregon_facts] +llm = ChatOpenAI(model="gpt-4o-mini", temperature=0) +llm_with_tools=llm.bind_tools(tools) + +def call_llm(state=AgentState) -> AgentState: + """Node that calls the LLM""" + messages = state["messages"] + response = llm_with_tools.invoke(messages) + + return {"messages": [response]} + + +def execute_tools(state: AgentState) -> AgentState: + """ + Execute any tool calls requested by the LLM. + + This node: + 1. Looks at the last message from the LLM + 2. If it contains tool calls, executes them + 3. Adds ToolMessages with the results + """ + print("Executing tools...") + messages = state["messages"] + last_message = messages[-1] + + # Extract tool calls from the last AI message + tool_calls = last_message.tool_calls + + # Execute each tool call + tool_messages = [] + for tool_call in tool_calls: + # Find the matching tool + selected_tool = {tool.name: tool for tool in tools}[tool_call["name"]] + print(f"Executing tool {selected_tool.name} with args {tool_call['args']}") + # Execute the tool + tool_output = selected_tool.invoke(tool_call["args"]) + + # Create a ToolMessage with the result + tool_messages.append( + ToolMessage( + content=str(tool_output), + tool_call_id=tool_call["id"] + ) + ) + + return {"messages": tool_messages} + + +def should_continue(state: AgentState) -> Literal["execute_tools", "end"]: + """ + Decide whether to execute tools or end. + + Returns: + "execute_tools" if the LLM made tool calls + "end" if the LLM provided a final answer + """ + print("Checking if we should continue...") + last_message = state["messages"][-1] + + # If there are tool calls, we need to execute them + if hasattr(last_message, "tool_calls") and last_message.tool_calls: + return "execute_tools" + + # Otherwise, we're done + return "end" + + +def create_tool_agent(): + """ + Creates an agent that can use tools. + + Flow: + START -> call_llm -> [conditional] + โ”œโ”€> execute_tools -> call_llm (loop) + โ””โ”€> END + """ + workflow = StateGraph(AgentState) + + # Add nodes + workflow.add_node("call_llm", call_llm) + workflow.add_node("execute_tools", execute_tools) + + # Set entry point + workflow.set_entry_point("call_llm") + + # Add conditional edge from call_llm + workflow.add_conditional_edges( + "call_llm", + should_continue, + { + "execute_tools": "execute_tools", + "end": END + } + ) + + # After executing tools, go back to call_llm + workflow.add_edge("execute_tools", "call_llm") + + return workflow.compile() + + return app + +if __name__ == "__main__": + app = create_tool_agent() + # question="Who is the best manager of Arsenal Women's and Mens'?" + question = "What year was Oregon founded?" + initial_state = { + "messages": [HumanMessage(content=question)] + } + + print(f"Question: {question}\n") + print("Executing agent...\n") + + result = app.invoke(initial_state) + + # Print the conversation flow + print("=== Conversation Flow ===") + for msg in result["messages"]: + if isinstance(msg, HumanMessage): + print(f"Human: {msg.content}") + elif isinstance(msg, AIMessage): + if hasattr(msg, "tool_calls") and msg.tool_calls: + print(f"AI: [Calling tools: {[tc['name'] for tc in msg.tool_calls]}]") + else: + print(f"AI: {msg.content}") + elif isinstance(msg, ToolMessage): + print(f"Tool: {msg.content}") + + print("\n" + "=" * 50) + print("โœ… Scenario 2 Complete!") + print("=" * 50) + + print("\nGraph Structure:") + print("START -> call_llm -> [should_continue?]") + print(" โ”œโ”€> execute_tools -> call_llm (loop)") + print(" โ””โ”€> END") diff --git a/nk_scripts/scenario3.py b/nk_scripts/scenario3.py new file mode 100644 index 00000000..5a15f62f --- /dev/null +++ b/nk_scripts/scenario3.py @@ -0,0 +1,346 @@ +""" +Scenario 3: Agent with Semantic Cache +====================================== +Learning Goal: Add semantic caching to reduce LLM calls and costs + +Question: "Tell me about Oregon's capital city" (similar to "What is Oregon's capital?") +Expected Behavior: Cache hit if similar question was asked before +Type: cached response +""" + +from typing import TypedDict, Annotated, Literal +from langgraph.graph import StateGraph, END +from langchain_openai import ChatOpenAI, OpenAIEmbeddings +from langchain_core.messages import HumanMessage, AIMessage, ToolMessage +from langchain_core.tools import tool +from redisvl.extensions.llmcache import SemanticCache +import operator +import os +import redis + + +# ============================================ +# STEP 1: Enhanced State with Cache Info +# ============================================ +class AgentState(TypedDict): + """ + State with cache tracking. + + messages: Conversation history + cache_hit: Whether we got a cached response + """ + messages: Annotated[list, operator.add] + cache_hit: bool + + +# ============================================ +# STEP 2: Setup Redis Semantic Cache +# ============================================ +# Connect to Redis +redis_client = redis.Redis( + host='localhost', + port=6379, + decode_responses=True +) + +# Create semantic cache +# This uses embeddings to find similar queries +embeddings = OpenAIEmbeddings(model="text-embedding-3-small") + +semantic_cache = SemanticCache( + name="agent_cache", # Cache name + redis_client=redis_client, # Redis connection + distance_threshold=0.2, # Similarity threshold (0-1) + ttl=3600 # Cache TTL in seconds +) + + +# ============================================ +# STEP 3: Create Tools (from Scenario 2) +# ============================================ +@tool +def get_oregon_facts(query: str) -> str: + """Get facts about Oregon.""" + facts = { + "founding": "Oregon became a state on February 14, 1859", + "founding year": "1859", + "population": "4.2 million as of 2023", + "capital": "Salem", + "largest city": "Portland", + "state flower": "Oregon grape" + } + + query_lower = query.lower() + for key, value in facts.items(): + if key in query_lower: + return value + + return "Fact not found." + + +tools = [get_oregon_facts] + +# ============================================ +# STEP 4: Initialize LLM +# ============================================ +# Check if OpenAI API key is available +if not os.getenv("OPENAI_API_KEY"): + print("โš ๏ธ Warning: OPENAI_API_KEY not found in environment variables!") + print("Please set your OpenAI API key: export OPENAI_API_KEY='your-key-here'") + exit(1) + +llm = ChatOpenAI(model="gpt-4o-mini", temperature=0) +llm_with_tools = llm.bind_tools(tools) + + +# ============================================ +# STEP 5: Cache Check Node (NEW!) +# ============================================ +def check_cache(state: AgentState) -> AgentState: + """ + Check if we have a cached response for this query. + + This is the first node - it looks for semantically similar + questions in the cache before calling the LLM. + """ + messages = state["messages"] + last_human_message = None + + # Find the last human message + for msg in reversed(messages): + if isinstance(msg, HumanMessage): + last_human_message = msg + break + + if not last_human_message: + return {"cache_hit": False} + + query = last_human_message.content + + # Check semantic cache + cached_response = semantic_cache.check(prompt=query) + + if cached_response: + print(f"โœจ Cache hit! Returning cached response.") + # Return cached response as an AI message + return { + "messages": [AIMessage(content=cached_response[0]["response"])], + "cache_hit": True + } + else: + print(f"โŒ Cache miss. Proceeding to LLM.") + return {"cache_hit": False} + + +# ============================================ +# STEP 6: Enhanced LLM Node with Caching +# ============================================ +def call_llm(state: AgentState) -> AgentState: + """Call the LLM and cache the response.""" + messages = state["messages"] + response = llm_with_tools.invoke(messages) + + # If this is a final response (no tool calls), cache it + if not (hasattr(response, "tool_calls") and response.tool_calls): + # Find the original query + for msg in messages: + if isinstance(msg, HumanMessage): + original_query = msg.content + break + + # Store in cache + semantic_cache.store( + prompt=original_query, + response=response.content + ) + print(f"๐Ÿ’พ Cached response for future use.") + + return {"messages": [response]} + + +def execute_tools(state: AgentState) -> AgentState: + """Execute tool calls (same as Scenario 2).""" + messages = state["messages"] + last_message = messages[-1] + tool_calls = last_message.tool_calls + + tool_messages = [] + for tool_call in tool_calls: + selected_tool = {tool.name: tool for tool in tools}[tool_call["name"]] + tool_output = selected_tool.invoke(tool_call["args"]) + tool_messages.append( + ToolMessage( + content=str(tool_output), + tool_call_id=tool_call["id"] + ) + ) + + return {"messages": tool_messages} + + +# ============================================ +# STEP 7: Conditional Logic +# ============================================ +def should_continue_after_cache(state: AgentState) -> Literal["call_llm", "end"]: + """ + After cache check, decide next step. + + If cache hit, we're done. + If cache miss, call the LLM. + """ + if state.get("cache_hit", False): + return "end" + return "call_llm" + + +def should_continue_after_llm(state: AgentState) -> Literal["execute_tools", "end"]: + """After LLM, decide if we need tools.""" + last_message = state["messages"][-1] + + if hasattr(last_message, "tool_calls") and last_message.tool_calls: + return "execute_tools" + return "end" + + +# ============================================ +# STEP 8: Build the Graph +# ============================================ +def create_cached_agent(): + """ + Creates an agent with semantic caching. + + Flow: + START -> check_cache -> [cache hit?] + โ”œโ”€> END (cache hit) + โ””โ”€> call_llm -> [needs tools?] + โ”œโ”€> execute_tools -> call_llm + โ””โ”€> END + """ + workflow = StateGraph(AgentState) + + # Add nodes + workflow.add_node("check_cache", check_cache) + workflow.add_node("call_llm", call_llm) + workflow.add_node("execute_tools", execute_tools) + + # Start with cache check + workflow.set_entry_point("check_cache") + + # After cache check + workflow.add_conditional_edges( + "check_cache", + should_continue_after_cache, + { + "call_llm": "call_llm", + "end": END + } + ) + + # After LLM call + workflow.add_conditional_edges( + "call_llm", + should_continue_after_llm, + { + "execute_tools": "execute_tools", + "end": END + } + ) + + # After tools, back to LLM + workflow.add_edge("execute_tools", "call_llm") + + return workflow.compile() + + +# ============================================ +# STEP 9: Run and Test +# ============================================ +if __name__ == "__main__": + app = create_cached_agent() + + # Test with similar questions + questions = [ + "What is the capital of the state of Oregon?", + "Tell me about Oregon state's capital city", # Similar - should hit cache + "Tell me what the capital city of Oregon is", # Similar - should hit cache + "What year was Oregon founded?" # Different - cache miss + ] + + for i, question in enumerate(questions, 1): + print(f"\n{'=' * 60}") + print(f"Query {i}: {question}") + print('=' * 60) + + initial_state = { + "messages": [HumanMessage(content=question)], + "cache_hit": False + } + + result = app.invoke(initial_state) + + # Print final answer + final_message = result["messages"][-1] + print(f"\nAnswer: {final_message.content}") + + if result.get("cache_hit"): + print("โšก Response served from cache!") + + print("\n" + "=" * 60) + print("โœ… Scenario 3 Complete!") + print("=" * 60) + + print("\nGraph Structure:") + print("START -> check_cache -> [cache hit?]") + print(" โ”œโ”€> END (cached)") + print(" โ””โ”€> call_llm -> [tools?]") + print(" โ”œโ”€> execute_tools -> call_llm") + print(" โ””โ”€> END") + +""" +KEY CONCEPTS EXPLAINED: +======================= + +1. SEMANTIC CACHE: + - Uses embeddings to find similar queries + - Not exact string matching - understands meaning + - "What is Oregon's capital?" โ‰ˆ "Tell me about Oregon's capital city" + - Configurable similarity threshold (distance_threshold) + +2. CACHE WORKFLOW: + a. Query comes in + b. Convert query to embedding + c. Search Redis for similar embeddings + d. If found and similar enough -> return cached response + e. Otherwise -> proceed to LLM + +3. TTL (Time To Live): + - Cached responses expire after ttl seconds + - Prevents stale data + - Configurable per use case + +4. DISTANCE THRESHOLD: + - Lower = more strict (requires closer match) + - Higher = more lenient (accepts less similar queries) + - 0.1 is fairly strict, 0.3-0.4 is more lenient + +WHAT'S NEW FROM SCENARIO 2: +============================ +- Added check_cache node at the start +- Integrated Redis for cache storage +- Using embeddings for semantic similarity +- Storing successful responses for reuse +- New conditional: cache hit or miss + +BENEFITS: +========= +- Reduced LLM costs (cached responses are free) +- Faster response times (no LLM call needed) +- Handles query variations naturally +- Scales well with high traffic + +CACHE INVALIDATION: +=================== +- Use TTL for automatic expiration +- Manually clear with semantic_cache.clear() +- Clear specific keys if data changes +""" \ No newline at end of file diff --git a/nk_scripts/scenario4.py b/nk_scripts/scenario4.py new file mode 100644 index 00000000..7fb26b2e --- /dev/null +++ b/nk_scripts/scenario4.py @@ -0,0 +1,365 @@ +""" +Full-Featured AI Agent with LangGraph and Redis +================================================ +Oregon Trail-themed agent with semantic routing, caching, tools, and memory. + +Features: +- Semantic Router: Filters off-topic queries +- Semantic Cache: Reduces LLM costs +- Tool Calling: External function execution +- Conversation Memory: Persistent context +""" + +import os +from typing import TypedDict, Annotated, Literal +from operator import add + +from langchain_core.messages import HumanMessage, SystemMessage +from langchain_core.tools import tool +from langchain_openai import ChatOpenAI, OpenAIEmbeddings +from langgraph.graph import StateGraph, END +from langgraph.checkpoint.redis import RedisSaver +from pydantic import BaseModel, Field +from redis import Redis +from redisvl.extensions.llmcache import SemanticCache +from redisvl.extensions.router import SemanticRouter, Route + + +# ============================================ +# Configuration +# ============================================ +class Config: + """Configuration settings""" + REDIS_HOST = os.getenv("REDIS_HOST", "localhost") + REDIS_PORT = int(os.getenv("REDIS_PORT", 6379)) + OPENAI_API_KEY = os.getenv("OPENAI_API_KEY") + MODEL_NAME = "gpt-4o-mini" + CACHE_TTL = 3600 + CACHE_THRESHOLD = 0.1 + + +# ============================================ +# State Definition +# ============================================ +class AgentState(TypedDict): + """Agent state schema""" + messages: Annotated[list, add] + route_decision: str + cache_hit: bool + + +# ============================================ +# Tools Definition +# ============================================ +class RestockInput(BaseModel): + """Input schema for restock calculation""" + daily_usage: int = Field(description="Pounds of food consumed daily") + lead_time: int = Field(description="Lead time to replace food in days") + safety_stock: int = Field(description="Pounds of safety stock to keep") + + +@tool("restock-tool", args_schema=RestockInput) +def restock_tool(daily_usage: int, lead_time: int, safety_stock: int) -> str: + """ + Calculate restock point for Oregon Trail supplies. + + Returns the inventory level at which new supplies should be ordered + to avoid running out during the lead time. + """ + restock_point = (daily_usage * lead_time) + safety_stock + return f"Restock when inventory reaches {restock_point} lbs" + + +@tool("weather-tool") +def weather_tool() -> str: + """Get current weather conditions on the Oregon Trail.""" + return "Current conditions: Partly cloudy, 68ยฐF. Good travel weather." + + +@tool("hunting-tool") +def hunting_tool() -> str: + """Check hunting opportunities along the trail.""" + return "Buffalo spotted nearby. Good hunting conditions. Remember to say 'bang'!" + + +# ============================================ +# Redis Components Setup +# ============================================ +class RedisComponents: + """Manages Redis-based components""" + + def __init__(self, config: Config): + self.redis_client = Redis( + host=config.REDIS_HOST, + port=config.REDIS_PORT, + decode_responses=False + ) + + # Semantic cache + self.cache = SemanticCache( + name="oregon_trail_cache", + redis_client=self.redis_client, + distance_threshold=config.CACHE_THRESHOLD, + ttl=config.CACHE_TTL + ) + + # Memory checkpointer + self.memory = RedisSaver(self.redis_client) + + # Semantic router + self._setup_router() + + def _setup_router(self): + """Configure semantic router with allowed/blocked topics""" + allowed = Route( + name="oregon_topics", + references=[ + "Oregon Trail information", + "Pioneer life and travel", + "Hunting and supplies", + "Weather along the trail", + "Inventory management", + "Oregon geography and history", + "Trail challenges and solutions", + ], + metadata={"type": "allowed"} + ) + + blocked = Route( + name="blocked_topics", + references=[ + "Stock market analysis", + "Cryptocurrency trading", + "Python programming", + "Machine learning tutorials", + "Modern politics", + "Celebrity gossip", + "Sports scores", + ], + metadata={"type": "blocked"} + ) + + self.router = SemanticRouter( + name="topic_router", + routes=[allowed, blocked], + redis_client=self.redis_client + ) + + +# ============================================ +# Agent Nodes +# ============================================ +class AgentNodes: + """Node functions for the agent graph""" + + def __init__(self, redis_components: RedisComponents, config: Config): + self.redis = redis_components + self.llm = ChatOpenAI(model=config.MODEL_NAME, temperature=0) + self.llm_with_tools = self.llm.bind_tools(TOOLS) + self.system_prompt = """You are Art, a helpful guide on the Oregon Trail. + +You assist pioneers with: +- Inventory and supply management +- Weather conditions +- Hunting opportunities +- Trail advice + +Use the tools available to help answer questions accurately. +If asked your first name, respond with just 'Art'. +Keep responses concise and helpful.""" + + def check_route(self, state: AgentState) -> dict: + """Filter queries using semantic router""" + query = self._get_last_human_message(state) + if not query: + return {"route_decision": "unknown"} + + route_result = self.redis.router(query) + print(f"๐Ÿ›ฃ๏ธ Route: {route_result.name} (distance: {route_result.distance:.3f})") + + if route_result.name == "blocked_topics": + return { + "messages": [SystemMessage( + content="I can only help with Oregon Trail-related questions. " + "Please ask about pioneer life, supplies, or trail conditions." + )], + "route_decision": "blocked" + } + + return {"route_decision": "allowed"} + + def check_cache(self, state: AgentState) -> dict: + """Check semantic cache for similar queries""" + query = self._get_last_human_message(state) + if not query: + return {"cache_hit": False} + + cached = self.redis.cache.check(prompt=query) + if cached: + print("โœจ Cache hit!") + return { + "messages": [SystemMessage(content=cached[0]["response"])], + "cache_hit": True + } + + print("โŒ Cache miss") + return {"cache_hit": False} + + def call_llm(self, state: AgentState) -> dict: + """Call LLM with system prompt and conversation history""" + messages = [SystemMessage(content=self.system_prompt)] + state["messages"] + response = self.llm_with_tools.invoke(messages) + + # Cache final responses (not tool calls) + if not (hasattr(response, "tool_calls") and response.tool_calls): + query = self._get_last_human_message(state) + if query: + self.redis.cache.store(prompt=query, response=response.content) + print("๐Ÿ’พ Cached response") + + return {"messages": [response]} + + def execute_tools(self, state: AgentState) -> dict: + """Execute tool calls from LLM""" + from langchain_core.messages import ToolMessage + + last_message = state["messages"][-1] + tool_calls = last_message.tool_calls + + tool_messages = [] + for tool_call in tool_calls: + tool = TOOL_MAP[tool_call["name"]] + result = tool.invoke(tool_call["args"]) + print(f"๐Ÿ”ง {tool_call['name']}: {result}") + + tool_messages.append( + ToolMessage( + content=str(result), + tool_call_id=tool_call["id"] + ) + ) + + return {"messages": tool_messages} + + @staticmethod + def _get_last_human_message(state: AgentState) -> str: + """Extract last human message from state""" + for msg in reversed(state["messages"]): + if isinstance(msg, HumanMessage): + return msg.content + return "" + + +# ============================================ +# Conditional Logic +# ============================================ +def should_continue_after_route(state: AgentState) -> Literal["check_cache", "end"]: + """Decide whether to proceed after routing""" + return "end" if state.get("route_decision") == "blocked" else "check_cache" + + +def should_continue_after_cache(state: AgentState) -> Literal["call_llm", "end"]: + """Decide whether to proceed after cache check""" + return "end" if state.get("cache_hit") else "call_llm" + + +def should_continue_after_llm(state: AgentState) -> Literal["execute_tools", "end"]: + """Decide whether to execute tools or end""" + last_message = state["messages"][-1] + has_tool_calls = hasattr(last_message, "tool_calls") and last_message.tool_calls + return "execute_tools" if has_tool_calls else "end" + + +# ============================================ +# Graph Builder +# ============================================ +def create_agent(config: Config = Config()) -> tuple: + """ + Create the full-featured agent graph. + + Returns: + tuple: (compiled_graph, redis_components) + """ + # Initialize components + redis_components = RedisComponents(config) + nodes = AgentNodes(redis_components, config) + + # Build graph + workflow = StateGraph(AgentState) + + # Add nodes + workflow.add_node("check_route", nodes.check_route) + workflow.add_node("check_cache", nodes.check_cache) + workflow.add_node("call_llm", nodes.call_llm) + workflow.add_node("execute_tools", nodes.execute_tools) + + # Define flow + workflow.set_entry_point("check_route") + + workflow.add_conditional_edges( + "check_route", + should_continue_after_route, + {"check_cache": "check_cache", "end": END} + ) + + workflow.add_conditional_edges( + "check_cache", + should_continue_after_cache, + {"call_llm": "call_llm", "end": END} + ) + + workflow.add_conditional_edges( + "call_llm", + should_continue_after_llm, + {"execute_tools": "execute_tools", "end": END} + ) + + workflow.add_edge("execute_tools", "call_llm") + + # Compile with memory + app = workflow.compile(checkpointer=redis_components.memory) + + return app, redis_components + + +# ============================================ +# Main Execution +# ============================================ +TOOLS = [restock_tool, weather_tool, hunting_tool] +TOOL_MAP = {tool.name: tool for tool in TOOLS} + + +def run_agent_conversation(queries: list[str], thread_id: str = "demo_session"): + """Run a conversation with the agent""" + config_dict = {"configurable": {"thread_id": thread_id}} + app, _ = create_agent() + + for query in queries: + print(f"\n{'=' * 70}") + print(f"๐Ÿ‘ค User: {query}") + print('=' * 70) + + result = app.invoke( + { + "messages": [HumanMessage(content=query)], + "route_decision": "", + "cache_hit": False + }, + config=config_dict + ) + + final_message = result["messages"][-1] + print(f"๐Ÿค– Agent: {final_message.content}") + + +if __name__ == "__main__": + # Example conversation + queries = [ + "What's the weather like on the trail?", + "Calculate restock point if we use 50 lbs daily, 5 day lead time, 100 lbs safety stock", + "What should I do when I see buffalo?", + "Tell me about the S&P 500", # Should be blocked + "What's your first name?", + ] + + run_agent_conversation(queries) \ No newline at end of file diff --git a/nk_scripts/vector-intro.md b/nk_scripts/vector-intro.md new file mode 100644 index 00000000..45b15a28 --- /dev/null +++ b/nk_scripts/vector-intro.md @@ -0,0 +1,3384 @@ +**Index Configuration Breakdown:** + +#### Index Settings: +```python +"index": { + "name": "movies", # Index identifier + "prefix": "movies", # All keys: movies:*, movies:1, movies:2... + "storage_type": "hash" # Hash or JSON +} +``` + +**Storage Types Deep Dive:** + +**HASH vs JSON - What Are They?** + +**1. Redis Hash:** +```python +# Hash is like a dictionary/map inside Redis +# key โ†’ {field1: value1, field2: value2, ...} + +# Example storage: +HSET movies:1 title "Inception" +HSET movies:1 genre "action" +HSET movies:1 rating 9 +HSET movies:1 vector + +# View hash: +HGETALL movies:1 +# Output: +# { +# "title": "Inception", +# "genre": "action", +# "rating": "9", +# "vector": b"\x9ef|=..." +# } + +# Characteristics: +# - Flat structure (no nesting) +# - All values stored as strings (except binary) +# - Fast operations: O(1) for field access +# - Compact memory representation +``` + +**2. RedisJSON:** +```python +# JSON is native JSON document storage +# key โ†’ {nested: {json: "structure"}} + +# Example storage: +JSON.SET movies:1 $ '{ + "title": "Inception", + "genre": "action", + "rating": 9, + "metadata": { + "director": "Christopher Nolan", + "year": 2010, + "tags": ["sci-fi", "thriller"] + }, + "vector": [0.123, -0.456, ...] +}' + +# Query with JSONPath: +JSON.GET movies:1 $.metadata.director +# Output: "Christopher Nolan" + +# Characteristics: +# - Supports nested structures +# - Native JSON types (numbers, booleans, arrays) +# - JSONPath queries +# - Slightly more memory overhead +``` + +**Hash vs JSON Performance:** +```python +# Hash (faster): +# - Simpler data structure +# - Less parsing overhead +# - ~10-20% faster for simple key-value +# - Memory: ~50-100 bytes overhead per hash + +# JSON (more flexible): +# - Complex nested data +# - Array operations +# - Atomic updates to nested fields +# - Memory: ~100-200 bytes overhead per document + +# Recommendation: +# Use Hash for: Simple flat data (our movies example) +# Use JSON for: Complex nested structures, arrays +``` + +**Why Hash is Faster:** +```python +# Hash: Direct field access +# 1. Hash table lookup: O(1) +# 2. Return value: O(1) +# Total: O(1) + +# JSON: Parse + navigate +# 1. Retrieve JSON string: O(1) +# 2. Parse JSON: O(n) where n = document size +# 3. Navigate JSONPath: O(m) where m = path depth +# Total: O(n + m) + +# For simple data, hash avoids parsing overhead + +# Benchmark example: +import time + +# Hash access +start = time.time() +for i in range(10000): + client.hget(f"movies:{i}", "title") +hash_time = time.time() - start +print(f"Hash: {hash_time:.3f}s") # ~0.5s + +# JSON access +start = time.time() +for i in range(10000): + client.json().get(f"movies_json:{i}", "$.title") +json_time = time.time() - start +print(f"JSON: {json_time:.3f}s") # ~0.6-0.7s + +# Hash is ~20% faster for simple access +``` + +**When to Use Each:** +```python +# Use Hash when: +# โœ“ Flat data structure +# โœ“ Maximum performance needed +# โœ“ Simple field access patterns +# โœ“ Vectors + simple metadata + +# Use JSON when: +# โœ“ Nested data (user.address.city) +# โœ“ Arrays ([tags, categories]) +# โœ“ Need JSONPath queries +# โœ“ Complex document structures +# โœ“ Atomic updates to nested fields +``` + +#### Field Types in RedisVL: + +RedisVL supports multiple field types for building searchable indices: + +##### 1. **TEXT** (Full-Text Search) +```python +{ + "name": "title", + "type": "text", + "attrs": { + "weight": 2.0, # Boost importance in scoring + "sortable": False, # Can't sort by text (use tag/numeric) + "no_stem": False, # Enable stemming (runโ†’running) + "no_index": False, # Actually index this field + "phonetic": "dm:en" # Phonetic matching (optional) + } +} +``` + +**Use TEXT for:** +- Article content +- Product descriptions +- User comments +- Any natural language text that needs fuzzy/full-text search + +**Search capabilities:** +- Tokenization and stemming +- Phrase matching +- Fuzzy matching +- BM25 scoring +- Stopword removal + +**Example:** +```python +# Field definition +{"name": "description", "type": "text"} + +# Search query +Text("description") % "action packed superhero" +# Finds: "action-packed superhero movie" +# "packed with superhero action" +# "actions by superheroes" (stemmed) +``` + +##### 2. **TAG** (Exact Match, Categories) +```python +{ + "name": "genre", + "type": "tag", + "attrs": { + "separator": ",", # For multi-value tags: "action,thriller" + "sortable": True, # Enable sorting + "case_sensitive": False # Case-insensitive matching + } +} +``` + +**Use TAG for:** +- Categories (genre, department) +- Status flags (active, pending, completed) +- IDs (user_id, product_sku) +- Enum values +- Multiple values per field (comma-separated) + +**Search capabilities:** +- Exact match only (no tokenization) +- Very fast lookups +- Multi-value support + +**Example:** +```python +# Field definition +{"name": "genre", "type": "tag"} + +# Storage +{"genre": "action,thriller"} # Multiple tags + +# Search queries +Tag("genre") == "action" # Matches +Tag("genre") == "thriller" # Also matches +Tag("genre") == ["action", "comedy"] # OR logic +Tag("genre") != "horror" # Exclude +``` + +##### 3. **NUMERIC** (Range Queries, Sorting) +```python +{ + "name": "rating", + "type": "numeric", + "attrs": { + "sortable": True, # Enable sorting + "no_index": False # Index for range queries + } +} +``` + +**Use NUMERIC for:** +- Ratings/scores +- Prices +- Timestamps (as Unix epoch) +- Counts/quantities +- Any filterable number + +**Search capabilities:** +- Range queries (>, <, >=, <=) +- Exact match (==) +- Sorting + +**Example:** +```python +# Field definition +{"name": "price", "type": "numeric"} + +# Search queries +Num("price") <= 100 # Under $100 +Num("price") >= 50 & Num("price") <= 150 # $50-$150 range +Num("rating") >= 4.5 # High rated +``` + +##### 4. **VECTOR** (Semantic Search) +```python +{ + "name": "vector", + "type": "vector", + "attrs": { + "dims": 384, # Vector dimensions (MUST match model!) + "distance_metric": "cosine", # cosine, l2, ip + "algorithm": "flat", # flat, hnsw, svs-vamana + "datatype": "float32", # float32, float64, float16 + "initial_cap": 1000 # Initial capacity (HNSW) + } +} +``` + +**Use VECTOR for:** +- Text embeddings +- Image embeddings +- Audio embeddings +- Any semantic similarity search + +**Search capabilities:** +- KNN (K-Nearest Neighbors) +- Range queries (within threshold) +- Hybrid search (with filters) + +**Example:** +```python +# Field definition +{"name": "embedding", "type": "vector", "attrs": {"dims": 384, ...}} + +# Search query +VectorQuery( + vector=query_embedding, # Must be 384 dims + vector_field_name="embedding" +) +``` + +##### 5. **GEO** (Location-Based Search) +```python +{ + "name": "location", + "type": "geo", + "attrs": { + "sortable": False # Geo fields can't be sorted + } +} +``` + +**Use GEO for:** +-# RedisVL Vector Search Workshop - Comprehensive Guide + +## Table of Contents +1. [Introduction](#introduction) +2. [Cell-by-Cell Walkthrough](#cell-by-cell-walkthrough) +3. [Technical Q&A](#technical-qa) +4. [Architecture & Performance](#architecture--performance) +5. [Production Considerations](#production-considerations) + +--- + +## Introduction + +### What is Vector Search? +Vector search (also called semantic search or similarity search) enables finding similar items based on meaning rather than exact keyword matches. It works by: +1. Converting data (text, images, audio) into numerical vectors (embeddings) +2. Storing these vectors in a specialized database +3. Finding similar items by measuring distance between vectors + +### What is Redis? + +**Redis Core (Open Source)** provides fundamental data structures: +- **Strings**: Simple key-value pairs +- **Lists**: Ordered collections (queues, stacks) +- **Sets**: Unordered unique collections +- **Sorted Sets**: Sets with scores for ranking +- **Hashes**: Field-value pairs (like Python dicts) +- **Streams**: Append-only log structures +- **Bitmaps**: Bit-level operations +- **HyperLogLog**: Probabilistic cardinality counting +- **Geospatial**: Location-based queries + +**Redis Stack** adds powerful modules on top of Redis Core: +- **RediSearch**: Full-text search, vector search, aggregations +- **RedisJSON**: Native JSON document storage with JSONPath queries +- **RedisTimeSeries**: Time-series data structures +- **RedisBloom**: Probabilistic data structures (Bloom filters, Cuckoo filters) +- **RedisGraph**: Graph database capabilities (deprecated in favor of other solutions) + +**For this workshop**, we need **RediSearch** for vector similarity search capabilities. + +### Why Redis? +- **Speed**: Sub-millisecond query latency +- **Versatility**: Cache, database, and message broker in one +- **Real-time**: Immediate indexing without rebuild delays +- **Hybrid capabilities**: Combines vector search with traditional filters +- **Proven scale**: Used by Fortune 500 companies for decades + +--- + +## Cell-by-Cell Walkthrough + +### CELL 1: Title and Introduction (Markdown) +```markdown +![Redis](https://redis.io/wp-content/uploads/2024/04/Logotype.svg?auto=webp&quality=85,75&width=120) +# Vector Search with RedisVL +``` + +**Workshop Notes:** +- This notebook demonstrates building a semantic movie search engine +- Vector search is foundational for modern AI: RAG, recommendations, semantic search +- Redis Stack provides vector database capabilities with cache-level performance +- RedisVL abstracts complexity, making vector operations simple + +**Key Points to Emphasize:** +- Vector databases are the backbone of GenAI applications +- This is a hands-on introduction - by the end, attendees will build working vector search +- The techniques learned apply to any domain: e-commerce, documentation, media, etc. + +--- + +### CELL 2: Prepare Data (Markdown) + +**Workshop Notes:** +- Using 20 movies dataset - small enough to understand, large enough to be meaningful +- Each movie has structured metadata (title, rating, genre) and unstructured text (description) +- **The key insight**: We'll convert descriptions to vectors to enable semantic search + +**Why Movies?** +- Relatable domain everyone understands +- Rich descriptions showcase semantic similarity well +- Genre/rating demonstrate hybrid filtering + +--- + +### CELL 3: Download Dataset (Code) +```bash +!git clone https://github.com/redis-developer/redis-ai-resources.git temp_repo +!mv temp_repo/python-recipes/vector-search/resources . +!rm -rf temp_repo +``` + +**What's Happening:** +1. Clone Redis AI resources repository +2. Extract just the `/resources` folder containing `movies.json` +3. Clean up temporary files + +**Workshop Notes:** +- Only needed in Colab/cloud environments +- Local users: data is already in the repository +- In production: load from your database, API, or file system +- The JSON contains our 20 movies with descriptions + +**Common Question:** "What format should my data be in?" +- Any format works: JSON, CSV, database, API +- Key requirement: structured format that pandas can load +- Need fields for: searchable text + metadata for filtering + +--- + +### CELL 4: Packages Header (Markdown) + +**Workshop Notes:** +- About to install Python dependencies +- All packages are production-ready and actively maintained + +--- + +### CELL 5: Install Dependencies (Code) +```python +%pip install -q "redisvl>=0.6.0" sentence-transformers pandas nltk +``` + +**Package Breakdown:** + +#### 1. **redisvl** (Redis Vector Library) โ‰ฅ0.6.0 +- **Purpose**: High-level Python client for Redis vector operations +- **Built on**: redis-py (standard Redis Python client) +- **Key Features**: + - Declarative schema definition (YAML or Python dict) + - Multiple query types (Vector, Range, Hybrid, Text) + - Built-in vectorizers (OpenAI, Cohere, HuggingFace, etc.) + - Semantic caching for LLM applications + - CLI tools for index management + +**Why not plain redis-py?** +- redis-py requires manual query construction with complex syntax +- RedisVL provides Pythonic abstractions and best practices +- Handles serialization, batching, error handling automatically + +#### 2. **sentence-transformers** +- **Purpose**: Create text embeddings using pre-trained models +- **Provider**: Hugging Face +- **Model Used**: `all-MiniLM-L6-v2` + - Dimensions: 384 + - Speed: Fast inference (~2000 sentences/sec on CPU) + - Quality: Good for general purpose semantic similarity + - Training: 1B+ sentence pairs + +**Alternatives:** +- OpenAI `text-embedding-ada-002` (1536 dims, requires API key) +- Cohere embeddings (1024-4096 dims, requires API key) +- Custom models fine-tuned for your domain + +#### 3. **pandas** +- **Purpose**: Data manipulation and analysis +- **Use Cases**: + - Loading JSON/CSV datasets + - Data transformation and cleaning + - Displaying search results in tabular format + +#### 4. **nltk** (Natural Language Toolkit) +- **Purpose**: NLP utilities, specifically stopwords +- **Stopwords**: Common words with little semantic value ("the", "a", "is", "and") +- **Use Case**: Improve text search quality by filtering noise + +**Installation Note:** +- `-q` flag suppresses verbose output +- In production, pin exact versions: `redisvl==0.6.0` +- Total install size: ~500MB (mostly sentence-transformers models) + +--- + +### CELL 6: Install Redis Stack Header (Markdown) + +**Workshop Notes:** +- Redis Stack = Redis Open Source + modules +- Required modules: **RediSearch** (vector search), **RedisJSON** (JSON storage) + +--- + +### CELL 7: Install Redis Stack - Colab (Code) +```bash +%%sh +curl -fsSL https://packages.redis.io/gpg | sudo gpg --dearmor -o /usr/share/keyrings/redis-archive-keyring.gpg +echo "deb [signed-by=/usr/share/keyrings/redis-archive-keyring.gpg] https://packages.redis.io/deb $(lsb_release -cs) main" | sudo tee /etc/apt/sources.list.d/redis.list +sudo apt-get update > /dev/null 2>&1 +sudo apt-get install redis-stack-server > /dev/null 2>&1 +redis-stack-server --daemonize yes +``` + +**What's Happening:** +1. Add Redis GPG key for package verification +2. Add Redis repository to apt sources +3. Update package lists +4. Install Redis Stack Server +5. Start Redis as background daemon + +**Workshop Notes:** +- This installs Redis Stack 7.2+ with all modules +- `--daemonize yes`: runs in background (doesn't block terminal) +- Colab-specific - not needed for local development + +**Why Redis Stack vs Redis Open Source?** +- Open Source: Core data structures only +- Stack: Includes Search, JSON, Time Series, Bloom filters +- Enterprise: Stack + high availability, active-active geo-replication + +--- + +### CELL 8: Alternative Installation Methods (Markdown) + +**Workshop Notes:** + +#### Option 1: Redis Cloud (Recommended for Production Testing) +```bash +# Free tier: 30MB RAM, perfect for learning +# Sign up: https://redis.com/try-free/ +``` +- Fully managed, no infrastructure +- Automatic scaling and backups +- SSL/TLS by default + +#### Option 2: Docker (Best for Local Development) +```bash +docker run -d --name redis-stack-server -p 6379:6379 redis/redis-stack-server:latest +``` +- Isolated environment +- Easy cleanup: `docker rm -f redis-stack-server` +- Consistent across team members + +#### Option 3: OS-Specific Install +```bash +# macOS +brew install redis-stack + +# Ubuntu/Debian +sudo apt install redis-stack-server + +# Windows +# Use WSL2 + Docker or Redis Cloud +``` + +**Common Question:** "Which should I use?" +- **Learning**: Docker or Colab +- **Development**: Docker +- **Production**: Redis Cloud or Redis Enterprise + +--- + +### CELL 9: Redis Connection Setup (Code) +```python +import os +import warnings + +warnings.filterwarnings('ignore') + +# Replace values below with your own if using Redis Cloud instance +REDIS_HOST = os.getenv("REDIS_HOST", "localhost") +REDIS_PORT = os.getenv("REDIS_PORT", "6379") +REDIS_PASSWORD = os.getenv("REDIS_PASSWORD", "") + +# If SSL is enabled on the endpoint, use rediss:// as the URL prefix +REDIS_URL = f"redis://:{REDIS_PASSWORD}@{REDIS_HOST}:{REDIS_PORT}" +``` + +**Connection String Format:** +``` +redis://[username]:[password]@[host]:[port]/[database] +rediss://[username]:[password]@[host]:[port]/[database] # SSL/TLS +``` + +**Workshop Notes:** +- Follows 12-factor app methodology (environment variables for config) +- Defaults to local development: `localhost:6379` +- Password optional for local (required for production) +- `rediss://` (double 's') for SSL/TLS connections + +**For Redis Cloud:** +```python +# Example Redis Cloud settings +REDIS_HOST = "redis-12345.c123.us-east-1-1.ec2.cloud.redislabs.com" +REDIS_PORT = "12345" +REDIS_PASSWORD = "your-strong-password-here" +``` + +**Security Best Practices:** +- Never hardcode credentials in notebooks/code +- Use environment variables or secrets manager +- Enable SSL/TLS for production +- Use strong passwords (20+ characters) +- Rotate credentials regularly + +--- + +### CELL 10: Create Redis Client (Code) +```python +from redis import Redis + +client = Redis.from_url(REDIS_URL) +client.ping() +``` + +**What's Happening:** +1. Import redis-py client library +2. Create client connection from URL +3. `ping()` verifies connection (returns `True` if successful) + +**Workshop Notes:** +- This is standard redis-py client (not RedisVL yet) +- RedisVL will use this client internally +- `ping()` is best practice for connection verification + +**Troubleshooting:** +```python +# If ping() fails, check: +try: + result = client.ping() + print(f"โœ“ Connected to Redis: {result}") +except redis.ConnectionError as e: + print(f"โœ— Connection failed: {e}") + print("Troubleshooting:") + print("1. Is Redis running? (ps aux | grep redis)") + print("2. Check host/port/password") + print("3. Firewall blocking port 6379?") +``` + +**Common Question:** "What if I have multiple Redis instances?" +```python +# You can create multiple clients +cache_client = Redis.from_url("redis://localhost:6379/0") # DB 0 for cache +vector_client = Redis.from_url("redis://localhost:6379/1") # DB 1 for vectors +``` + +--- + +### CELL 11: Check Redis Info (Code) +```python +client.info() +``` + +**What's Happening:** +- `INFO` command returns server statistics dictionary +- Contains ~100+ metrics about Redis server state + +**Key Sections to Review:** + +#### Server Info: +- `redis_version`: Should be 7.2+ for optimal vector search +- `redis_mode`: "standalone" or "cluster" +- `os`: Operating system + +#### Memory: +- `used_memory_human`: Current memory usage +- `maxmemory`: Memory limit (0 = no limit) +- `maxmemory_policy`: What happens when limit reached + +#### Modules (Most Important): +```python +modules = client.info()['modules'] +for module in modules: + print(f"{module['name']}: v{module['ver']}") +# Expected output: +# search: v80205 โ† RediSearch for vector search +# ReJSON: v80201 โ† JSON document support +# timeseries: v80200 +# bf: v80203 โ† Bloom filters +``` + +**Workshop Notes:** +- If `modules` section is missing, you're not using Redis Stack! +- `search` module provides vector search capabilities +- Version numbers: 80205 = 8.2.05 + +**Diagnostic Commands:** +```python +# Check specific info sections +print(client.info('server')) +print(client.info('memory')) +print(client.info('modules')) +``` + +--- + +### CELL 12: Optional Flush (Code) +```python +#client.flushall() +``` + +**What's Happening:** +- `flushall()` deletes ALL data from ALL databases +- Commented out by default (good practice!) + +**Workshop Notes:** +- โš ๏ธ **DANGER**: This is destructive and irreversible +- Only uncomment for development/testing +- Never run in production without explicit confirmation + +**Safer Alternatives:** +```python +# Delete only keys matching pattern +for key in client.scan_iter("movies:*"): + client.delete(key) + +# Delete specific index +index.delete() # Removes index, keeps data + +# Delete index AND data +index.delete(drop=True) # Removes index and all associated data +``` + +--- + +### CELL 13: Load Movies Dataset Header (Markdown) + +**Workshop Notes:** +- About to load and inspect our sample data +- This is a typical data loading pattern for any ML/AI project + +--- + +### CELL 14: Load Data with Pandas (Code) +```python +import pandas as pd +import numpy as np +import json + +df = pd.read_json("resources/movies.json") +print("Loaded", len(df), "movie entries") + +df.head() +``` + +**What's Happening:** +1. Load JSON file into pandas DataFrame +2. Print row count (20 movies) +3. Display first 5 rows with `head()` + +**Data Structure:** +``` +Columns: +- id (int): Unique identifier (1-20) +- title (str): Movie name +- genre (str): "action" or "comedy" +- rating (int): Quality score 6-10 +- description (str): Plot summary (this gets vectorized!) +``` + +**Workshop Notes:** +- Real applications have thousands/millions of documents +- Dataset intentionally small for learning +- Descriptions are 1-2 sentences (ideal for embeddings) + +**Data Quality Matters:** +```python +# Check for issues +print(f"Missing values:\n{df.isnull().sum()}") +print(f"\nDescription length stats:\n{df['description'].str.len().describe()}") +print(f"\nUnique genres: {df['genre'].unique()}") +``` + +**Example Movies:** +- "Explosive Pursuit" (Action, 7): "A daring cop chases a notorious criminal..." +- "Skyfall" (Action, 8): "James Bond returns to track down a dangerous network..." + +**Common Question:** "What if my descriptions are very long?" +- Truncate to model's max tokens (512 for many models) +- Or chunk into multiple vectors +- Or use models designed for long documents (Longformer, etc.) + +--- + +### CELL 15: Initialize Vectorizer (Code) +```python +from redisvl.utils.vectorize import HFTextVectorizer +from redisvl.extensions.cache.embeddings import EmbeddingsCache + +os.environ["TOKENIZERS_PARALLELISM"] = "false" + +hf = HFTextVectorizer( + model="sentence-transformers/all-MiniLM-L6-v2", + cache=EmbeddingsCache( + name="embedcache", + ttl=600, + redis_client=client, + ) +) +``` + +**Theoretical Background - Embeddings:** + +An **embedding** is a dense vector representation that captures semantic meaning: +``` +"The cat sat on the mat" โ†’ [0.234, -0.123, 0.456, ..., 0.789] # 384 numbers +"A feline was on the rug" โ†’ [0.229, -0.119, 0.451, ..., 0.782] # Similar vector! +"Python programming" โ†’ [-0.678, 0.234, -0.123, ..., 0.456] # Different vector +``` + +**Key Properties:** +- Similar meanings โ†’ similar vectors (measured by distance metrics) +- Enables semantic search without keyword matching +- Captures context, synonyms, and relationships + +**Model Choice: `all-MiniLM-L6-v2`** +``` +Specifications: +- Architecture: MiniLM (distilled from BERT) +- Dimensions: 384 (good balance of quality vs size) +- Max sequence: 256 tokens +- Training: 1B+ sentence pairs (SNLI, MultiNLI, etc.) +- Speed: ~2000 sentences/sec on CPU +- Size: ~80MB download +``` + +**Why this model?** +- โœ… Good quality for general purpose +- โœ… Fast inference (no GPU needed) +- โœ… Free (no API keys) +- โœ… Runs locally (data privacy) + +**Alternative Models:** +```python +# OpenAI (requires API key, $$) +from redisvl.utils.vectorize import OpenAITextVectorizer +openai_vectorizer = OpenAITextVectorizer( + model="text-embedding-ada-002", # 1536 dims + api_key=os.getenv("OPENAI_API_KEY") +) + +# Cohere (requires API key) +from redisvl.utils.vectorize import CohereTextVectorizer +cohere_vectorizer = CohereTextVectorizer( + model="embed-english-v3.0", + api_key=os.getenv("COHERE_API_KEY") +) + +# Custom Hugging Face model +hf_large = HFTextVectorizer( + model="sentence-transformers/all-mpnet-base-v2" # 768 dims, slower but better +) +``` + +**Embedding Cache - Deep Dive:** + +**What is the Embedding Cache?** +The `EmbeddingsCache` is a Redis-based caching layer that stores previously computed embeddings to avoid redundant computation. + +**Why is it needed?** +```python +# Without cache: +text = "The quick brown fox" +embedding1 = model.encode(text) # Takes ~50-100ms (compute intensive) +embedding2 = model.encode(text) # Takes ~50-100ms again (wasteful!) + +# With cache: +text = "The quick brown fox" +embedding1 = hf.embed(text) # First call: ~50-100ms (computes + caches) +embedding2 = hf.embed(text) # Second call: ~1ms (from cache, 50-100x faster!) +``` + +**How it works:** +```python +cache=EmbeddingsCache( + name="embedcache", # Redis key prefix for cache entries + ttl=600, # Time-to-live: 10 minutes (600 seconds) + redis_client=client, # Uses same Redis instance +) + +# Internal cache behavior: +# 1. Input text is hashed: hash("your text") โ†’ "abc123def456" +# 2. Check Redis: GET embedcache:abc123def456 +# 3. If exists: Return cached embedding (fast!) +# 4. If not exists: +# a. Compute embedding (slow) +# b. Store in Redis: SETEX embedcache:abc123def456 600 +# c. Return computed embedding +``` + +**Cache Storage in Redis:** +```python +# Cache entries are stored as Redis strings +key = f"embedcache:{hash(text)}" +value = serialized_embedding_bytes + +# View cache entries: +for key in client.scan_iter("embedcache:*"): + print(key) +# Output: +# b'embedcache:a1b2c3d4e5f6' +# b'embedcache:1a2b3c4d5e6f' +# ... +``` + +**TTL (Time-To-Live) Explained:** +```python +ttl=600 # Cache expires after 10 minutes + +# Why expire? +# 1. Prevent stale data if embeddings change +# 2. Manage memory usage (old embeddings are removed) +# 3. Balance between performance and freshness + +# TTL recommendations: +ttl=3600 # 1 hour - for stable production data +ttl=86400 # 24 hours - for rarely changing data +ttl=300 # 5 minutes - for frequently updating data +ttl=None # Never expire - for static datasets (careful with memory!) +``` + +**Performance Impact:** +```python +import time + +# Measure with cache +times_with_cache = [] +for _ in range(100): + start = time.time() + vec = hf.embed("sample text") + times_with_cache.append(time.time() - start) + +print(f"First call (no cache): {times_with_cache[0]*1000:.2f}ms") # ~50-100ms +print(f"Subsequent calls (cached): {np.mean(times_with_cache[1:])*1000:.2f}ms") # ~1ms + +# Cache hit rate +# 50-100x speedup for repeated queries! +``` + +**Cache Memory Usage:** +```python +# Each cached embedding uses memory: +# Hash key: ~64 bytes +# Embedding: 384 dims ร— 4 bytes = 1,536 bytes +# Redis overhead: ~64 bytes +# Total per entry: ~1,664 bytes โ‰ˆ 1.6 KB + +# For 10,000 cached embeddings: +# 10,000 ร— 1.6 KB = 16 MB (negligible!) + +# Cache is much smaller than full index +``` + +**Production Considerations:** +```python +# Monitor cache hit rate +hits = 0 +misses = 0 + +def embed_with_monitoring(text): + cache_key = f"embedcache:{hash(text)}" + if client.exists(cache_key): + hits += 1 + else: + misses += 1 + return hf.embed(text) + +# Target: >80% hit rate for good performance +hit_rate = hits / (hits + misses) +print(f"Cache hit rate: {hit_rate*100:.1f}%") +``` + +**Workshop Notes:** +- `TOKENIZERS_PARALLELISM=false` prevents threading warnings +- Cache automatically manages expiration +- In production, increase TTL or use persistent cache +- Cache is shared across all vectorizer instances using same Redis client + +--- + +### CELL 16: Generate Embeddings (Code) +```python +df["vector"] = hf.embed_many(df["description"].tolist(), as_buffer=True) + +df.head() +``` + +**What's Happening:** +1. Extract all descriptions as list: `["desc1", "desc2", ...]` +2. `embed_many()` batch processes all descriptions +3. `as_buffer=True` returns bytes (Redis-compatible format) +4. Store vectors in new DataFrame column + +**Why `as_buffer=True`? (Binary vs Numeric Storage)** + +**The Problem with Numeric Storage:** +```python +# Without as_buffer (returns numpy array) +vector_array = hf.embed("text") # np.array([0.123, -0.456, 0.789, ...]) +type(vector_array) # + +# Storing as array in Redis requires serialization: +import pickle +vector_serialized = pickle.dumps(vector_array) +# Or JSON (very inefficient): +vector_json = json.dumps(vector_array.tolist()) + +# Problems: +# 1. Pickle adds overhead (metadata, versioning info) +# 2. JSON is text-based, huge size (each float as string) +# 3. Not optimized for Redis vector search +``` + +**With Binary Storage (`as_buffer=True`):** +```python +# With as_buffer (returns raw bytes) +vector_bytes = hf.embed("text", as_buffer=True) +type(vector_bytes) # + +# Example: +# b'\x9e\x66\x7c\x3d\x67\x60\x0a\x3b...' + +# This is raw IEEE 754 float32 representation +# Each float32 = 4 bytes +# 384 dimensions ร— 4 bytes = 1,536 bytes total + +# Benefits: +# 1. Compact: No serialization overhead +# 2. Fast: Direct binary format Redis understands +# 3. Native: Redis vector search expects this format +# 4. Efficient: 4 bytes per dimension (optimal for float32) +``` + +**Binary Format Explanation:** +```python +# How float32 is stored as bytes: +import struct +import numpy as np + +# Single float +value = 0.123456 +bytes_repr = struct.pack('f', value) # 'f' = float32 +print(bytes_repr) # b'w\xbe\xfc=' + +# Array of floats (what embeddings are) +array = np.array([0.123, -0.456, 0.789], dtype=np.float32) +bytes_repr = array.tobytes() +print(bytes_repr) # b'{\x14\xfb>\x9a\x99\xe9\xbf\xc3\xf5I?' + +# This is what gets stored in Redis! +``` + +**Storage Size Comparison:** +```python +import sys +import json +import pickle +import numpy as np + +vec = np.random.rand(384).astype(np.float32) + +# Method 1: Raw bytes (as_buffer=True) โœ… BEST +bytes_size = len(vec.tobytes()) +print(f"Bytes: {bytes_size} bytes") # 1,536 bytes + +# Method 2: Pickle +pickle_size = len(pickle.dumps(vec)) +print(f"Pickle: {pickle_size} bytes") # ~1,700 bytes (+10% overhead) + +# Method 3: JSON โŒ WORST +json_size = len(json.dumps(vec.tolist())) +print(f"JSON: {json_size} bytes") # ~6,000 bytes (4x larger!) + +# For 1 million vectors: +# Bytes: 1.5 GB +# Pickle: 1.65 GB +# JSON: 6 GB (waste 4.5 GB!) +``` + +**Why Redis Vector Search Requires Bytes:** +```python +# Redis RediSearch module expects binary format +# When you query, Redis: +# 1. Reads raw bytes from memory +# 2. Interprets as float32 array +# 3. Computes distance (no deserialization!) + +# With JSON/Pickle: +# 1. Read serialized data +# 2. Deserialize to numbers (SLOW!) +# 3. Compute distance +# = Much slower, more CPU, more memory + +# Binary format = Zero-copy, direct math operations +``` + +**Converting Between Formats:** +```python +# Bytes โ†’ NumPy array (for inspection) +vec_bytes = df.iloc[0]['vector'] +vec_array = np.frombuffer(vec_bytes, dtype=np.float32) +print(f"Dimensions: {len(vec_array)}") # 384 +print(f"First 5 values: {vec_array[:5]}") +# [-0.0234, 0.1234, -0.5678, 0.9012, ...] + +# NumPy array โ†’ Bytes (for storage) +vec_array = np.array([0.1, 0.2, 0.3], dtype=np.float32) +vec_bytes = vec_array.tobytes() +client.hset("key", "vector", vec_bytes) +``` + +**Batch Processing Benefits:** +```python +# Bad (slow): One at a time +for desc in descriptions: + vec = hf.embed(desc) # 20 separate calls + +# Good (fast): Batch processing +vectors = hf.embed_many(descriptions) # 1 batched call + +# Why faster? +# 1. Model processes multiple texts in parallel +# 2. GPU utilization better (if using GPU) +# 3. Reduced Python/model overhead +# 4. Typical speedup: 2-5x for batches of 10-100 +``` + +**Workshop Notes:** +- This step takes 5-30 seconds depending on hardware +- Progress: Watch for model loading messages +- Cache prevents re-computation if you re-run +- Vectors displayed as bytes: `b'\x9ef|=...'` (not human-readable, that's OK) +- **Key takeaway**: Binary storage is compact, fast, and what Redis expects + +**Common Question:** "Can I use float64 instead of float32?" +```python +# Yes, but usually not worth it: +attrs = { + "datatype": "float64" # 8 bytes per dimension +} + +# Doubles storage: 384 ร— 8 = 3,072 bytes per vector +# Minimal accuracy gain for most applications +# Recommendation: Stick with float32 unless you have specific precision requirements +``` + +--- + +### CELL 17: Define Redis Index Schema Header (Markdown) + +**Workshop Notes:** +- Schema defines how data is structured and indexed in Redis +- Like creating a database table, but for vectors + metadata +- RedisVL provides declarative schema definition + +--- + +### CELL 18: Create Index Schema (Code) +```python +from redisvl.schema import IndexSchema +from redisvl.index import SearchIndex + +index_name = "movies" + +schema = IndexSchema.from_dict({ + "index": { + "name": index_name, + "prefix": index_name, + "storage_type": "hash" + }, + "fields": [ + { + "name": "title", + "type": "text", + }, + { + "name": "description", + "type": "text", + }, + { + "name": "genre", + "type": "tag", + "attrs": { + "sortable": True + } + }, + { + "name": "rating", + "type": "numeric", + "attrs": { + "sortable": True + } + }, + { + "name": "vector", + "type": "vector", + "attrs": { + "dims": 384, + "distance_metric": "cosine", + "algorithm": "flat", + "datatype": "float32" + } + } + ] +}) + +index = SearchIndex(schema, client) +index.create(overwrite=True, drop=True) +``` + +**Index Configuration Breakdown:** + +#### Index Settings: +```python +"index": { + "name": "movies", # Index identifier + "prefix": "movies", # All keys: movies:*, movies:1, movies:2... + "storage_type": "hash" # Hash or JSON +} +``` + +**Storage Types:** +- **Hash**: Key-value pairs, efficient, limited nesting +- **JSON**: Nested structures, JSONPath queries, slightly slower + +#### Field Types: + +##### 1. **TEXT** (Full-Text Search) +```python +{ + "name": "title", + "type": "text", +} +``` +- Tokenized for full-text search +- Supports stemming (run โ†’ running โ†’ ran) +- Phrase matching, fuzzy search +- Use for: descriptions, articles, comments + +##### 2. **TAG** (Exact Match) +```python +{ + "name": "genre", + "type": "tag", + "attrs": {"sortable": True} +} +``` +- Exact match only (no tokenization) +- Efficient for categories, enums +- Supports multiple values: "action,adventure" +- Use for: categories, status, types + +##### 3. **NUMERIC** (Range Queries) +```python +{ + "name": "rating", + "type": "numeric", + "attrs": {"sortable": True} +} +``` +- Range queries: `rating >= 7`, `1000 < price < 5000` +- Sorting by value +- Use for: prices, scores, timestamps, counts + +##### 4. **VECTOR** (Semantic Search) +```python +{ + "name": "vector", + "type": "vector", + "attrs": { + "dims": 384, # Must match embedding model! + "distance_metric": "cosine", + "algorithm": "flat", + "datatype": "float32" + } +} +``` + +**Vector Configuration Deep Dive:** + +##### Distance Metrics: +```python +# 1. COSINE (recommended for text) +distance_metric = "cosine" +# Measures angle between vectors +# Range: 0 to 2 (lower = more similar) +# Normalized: ignores vector magnitude +# Use: Text, normalized data +``` + +**Cosine Formula:** +``` +cosine_distance = 1 - (A ยท B) / (||A|| ร— ||B||) + +Where: +- A ยท B = dot product +- ||A|| = magnitude of A +``` + +```python +# 2. EUCLIDEAN (L2) +distance_metric = "l2" +# Measures straight-line distance +# Range: 0 to โˆž (lower = more similar) +# Sensitive to magnitude +# Use: Images, spatial data +``` + +**Euclidean Formula:** +``` +l2_distance = โˆšฮฃ(Ai - Bi)ยฒ +``` + +```python +# 3. INNER PRODUCT (IP) +distance_metric = "ip" +# Dot product (assumes normalized vectors) +# Range: -โˆž to โˆž (higher = more similar) +# Fastest to compute +# Use: Pre-normalized embeddings +``` + +##### Indexing Algorithms: + +```python +# 1. FLAT (exact search) +algorithm = "flat" +# Pros: +# - 100% accuracy (exact results) +# - Simple, no tuning needed +# Cons: +# - Slow on large datasets (checks every vector) +# - O(N) complexity +# Use: <100K vectors or when accuracy critical +``` + +```python +# 2. HNSW (approximate search) +algorithm = "hnsw" +attrs = { + "m": 16, # Connections per node (higher = better accuracy, more memory) + "ef_construction": 200, # Build-time accuracy (higher = better quality index) + "ef_runtime": 10 # Query-time accuracy (higher = more accurate, slower) +} +# Pros: +# - Very fast (10-100x faster than FLAT) +# - Sub-linear query time +# - Good accuracy (95-99%) +# Cons: +# - More memory usage +# - Tuning required +# Use: >100K vectors, speed critical +``` + +**HNSW Parameters Explained:** +- `m`: Graph connectivity (16-64 typical, default 16) +- `ef_construction`: Higher = better index quality (100-500 typical) +- `ef_runtime`: Trade-off accuracy vs speed (10-200 typical) + +```python +# 3. SVS-VAMANA (Intel optimized, Redis 8.2+) +algorithm = "svs-vamana" +attrs = { + "graph_max_degree": 40, + "construction_window_size": 250, + "compression": "lvq8" # 8-bit compression +} +# Pros: +# - Excellent speed +# - Low memory (compression) +# - Intel CPU optimized +# Cons: +# - Redis 8.2+ only +# - Less battle-tested than HNSW +# Use: Large-scale, Intel hardware +``` + +##### Data Types: +```python +datatype = "float32" # Standard (4 bytes per dimension) +datatype = "float64" # Higher precision (8 bytes, rarely needed) +datatype = "float16" # Lower precision (2 bytes, experimental) +``` + +**Memory Calculation:** +``` +Vector memory per document = dimensions ร— bytes_per_dim +384 ร— 4 bytes = 1,536 bytes = 1.5 KB per vector + +For 1 million vectors: +1,000,000 ร— 1.5 KB = 1.5 GB just for vectors +``` + +**Create Index:** +```python +index = SearchIndex(schema, client) +index.create(overwrite=True, drop=True) +``` + +**Parameters:** +- `overwrite=True`: Delete existing index with same name +- `drop=True`: Also delete all data + +**Workshop Notes:** +- Schema can also be defined in YAML (better for version control) +- `dims=384` must match your embedding model! +- Start with FLAT, migrate to HNSW when you have >100K vectors +- Cosine is safest default for text embeddings + +**YAML Schema Alternative:** +```yaml +# schema.yaml +version: '0.1.0' +index: + name: movies + prefix: movies + storage_type: hash + +fields: + - name: title + type: text + - name: genre + type: tag + attrs: + sortable: true + - name: rating + type: numeric + attrs: + sortable: true + - name: vector + type: vector + attrs: + dims: 384 + distance_metric: cosine + algorithm: flat + datatype: float32 +``` + +```python +# Load from YAML +schema = IndexSchema.from_yaml("schema.yaml") +``` + +--- + +### CELL 19: Inspect Index via CLI (Code) +```bash +!rvl index info -i movies -u {REDIS_URL} +``` + +**What's Happening:** +- `rvl` = RedisVL command-line interface +- Shows index metadata in formatted tables + +**Workshop Notes:** +- CLI tool useful for debugging and operations +- Verify configuration matches expectations +- Check field types, dimensions, algorithms + +**CLI Output Explained:** +``` +Index Information: +โ”Œโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ฌโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ฌโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ฌโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ฌโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ” +โ”‚ Index Name โ”‚ Storage Type โ”‚ Prefixes โ”‚ Index Options โ”‚ Indexing โ”‚ +โ”œโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ผโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ผโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ผโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ผโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ค +โ”‚ movies โ”‚ HASH โ”‚ [movies] โ”‚ [] โ”‚ 0 โ”‚ +โ””โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ดโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ดโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ดโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ดโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”˜ +``` +- `Indexing: 0` = no documents indexed yet + +**Other CLI Commands:** +```bash +# List all indices +!rvl index listall -u {REDIS_URL} + +# Delete index +!rvl index delete -i movies -u {REDIS_URL} + +# Create from YAML +!rvl index create -s schema.yaml -u {REDIS_URL} + +# Get statistics +!rvl stats -i movies -u {REDIS_URL} +``` + +--- + +### CELL 20: Populate Index Header (Markdown) + +**Workshop Notes:** +- Time to load our movie data into Redis +- This makes data searchable + +--- + +### CELL 21: Load Data (Code) +```python +index.load(df.to_dict(orient="records")) +``` + +**What's Happening:** +1. `df.to_dict(orient="records")` converts DataFrame to list of dicts: +```python +[ + {"id": 1, "title": "Explosive Pursuit", "genre": "action", ...}, + {"id": 2, "title": "Skyfall", "genre": "action", ...}, + ... +] +``` +2. `index.load()` performs batch insert +3. Returns list of generated Redis keys + +**Output Example:** +```python +[ + 'movies:01K7T4BMAEZMNPYTV73KZFYN3R', # ULID format + 'movies:01K7T4BMAE21PEY7NSDDQN4195', + ... +] +``` + +**Key Generation:** +- RedisVL auto-generates ULIDs (Universally Unique Lexicographically Sortable IDs) +- Format: `{prefix}:{ulid}` +- ULIDs are time-ordered (can sort chronologically) + +**Workshop Notes:** +- Batch insert is efficient (~1000-10000 inserts/sec) +- Data is immediately searchable (real-time indexing) +- No need to "rebuild" index like traditional search engines + +**Behind the Scenes:** +```python +# What RedisVL does internally +for record in data: + key = f"{prefix}:{generate_ulid()}" + client.hset(key, mapping=record) # Store as hash + # Index updates automatically +``` + +**Verify Loading:** +```python +# Check document count +info = index.info() +print(f"Documents indexed: {info['num_docs']}") # Should be 20 + +# Inspect a record +keys = client.keys("movies:*") +sample_key = keys[0] +sample_data = client.hgetall(sample_key) +print(sample_data) +``` + +--- + +### CELL 22: Search Techniques Header (Markdown) + +**Workshop Notes:** +- Now for the exciting part - searching! +- We'll explore different search patterns and their use cases + +--- + +### CELL 23: Standard Vector Search (Code) +```python +from redisvl.query import VectorQuery + +user_query = "High tech and action packed movie" + +embedded_user_query = hf.embed(user_query) + +vec_query = VectorQuery( + vector=embedded_user_query, + vector_field_name="vector", + num_results=3, + return_fields=["title", "genre"], + return_score=True, +) + +result = index.query(vec_query) +pd.DataFrame(result) +``` + +**Theoretical Background - K-Nearest Neighbors (KNN):** + +KNN finds the K closest vectors to a query vector: +``` +Query: "High tech action" + โ†“ (embed) +Vector: [0.12, -0.45, 0.78, ...] + โ†“ (search) +Compare distance to all stored vectors + โ†“ +Return top K closest matches +``` + +**Distance Calculation (Cosine):** +```python +# For each document vector: +similarity = 1 - cosine_similarity(query_vec, doc_vec) + +# Lower distance = more similar +# Range: 0 (identical) to 2 (opposite) +``` + +**Results Interpretation:** +``` + id vector_distance title genre +0 movies:01K7T4BMAEAJZJZEA2S05V2G5H 0.64973795414 Fast & Furious 9 action +1 movies:01K7T4BMAE7ZKS3N3DVBQ1WCCF 0.763235211372 Mad Max: Fury Road action +2 movies:01K7T4BMAEPMDQF1FVRV3Y60JF 0.792449593544 The Lego Movie comedy +``` + +**Why These Results?** +1. **Fast & Furious 9** (0.649 distance): + - Description mentions "high-tech", "face off" + - Semantically closest to "high tech action packed" + +2. **Mad Max** (0.763 distance): + - Action-heavy, chase sequences + - Less tech-focused but still relevant + +3. **The Lego Movie** (0.792 distance): + - Has action elements + - Farther semantically (comedy, not tech) + +**Workshop Notes:** +- **Key Insight**: No keyword matching! Pure semantic understanding +- Query never said "Fast & Furious" but found it through meaning +- This is the power of vector search +- Notice Comedy movies can appear if semantically similar + +**Common Question:** "How do I choose K (num_results)?" +```python +# Recommendations: +num_results = 5 # Product search (show few options) +num_results = 20 # RAG (retrieve context for LLM) +num_results = 100 # Reranking (get candidates for 2-stage retrieval) +``` + +**Performance:** +```python +import time +start = time.time() +result = index.query(vec_query) +print(f"Query time: {(time.time()-start)*1000:.2f}ms") +# Typical: 1-10ms for FLAT, <1ms for HNSW +``` + +--- + +### CELL 24: Vector Search with Filters Header (Markdown) + +**Workshop Notes:** +- Combining semantic search with structured filters +- This is where Redis shines - hybrid search capabilities + +--- + +### CELL 25: Filter by Genre Header (Markdown) + +**Workshop Notes:** +- Constraining search to specific category + +--- + +### CELL 26: Tag Filter (Code) +```python +from redisvl.query.filter import Tag + +tag_filter = Tag("genre") == "action" + +vec_query.set_filter(tag_filter) + +result = index.query(vec_query) +pd.DataFrame(result) +``` + +**What's Happening:** +1. Create tag filter: `genre == "action"` +2. Apply to existing query +3. Redis pre-filters to action movies BEFORE vector comparison + +**Filter Execution Order:** +``` +1. Apply tag filter โ†’ Filter to action movies (10 out of 20) +2. Compute vector distances โ†’ Only on filtered set +3. Return top K โ†’ From filtered results +``` + +**Results:** +``` + id vector_distance title genre +0 movies:01K7T4BMAEAJZJZEA2S05V2G5H 0.64973795414 Fast & Furious 9 action +1 movies:01K7T4BMAE7ZKS3N3DVBQ1WCCF 0.763235211372 Mad Max: Fury Road action +2 movies:01K7T4BMAEZMNPYTV73KZFYN3R 0.796153008938 Explosive Pursuit action +``` + +**Workshop Notes:** +- All results now action genre (no comedy) +- "The Lego Movie" excluded despite semantic relevance +- Real use case: "Find Python books" (semantic + category filter) + +**Tag Filter Operators:** +```python +# Equality +Tag("genre") == "action" + +# Inequality +Tag("genre") != "comedy" + +# Multiple values (OR logic) +Tag("genre") == ["action", "thriller"] # action OR thriller + +# Field existence +Tag("genre").exists() +``` + +**Performance Impact:** +- Pre-filtering is very efficient (uses Redis sorted sets) +- Can filter millions of records in milliseconds +- Then vector search only on filtered subset + +--- + +### CELL 27: Multiple Filters Header (Markdown) + +**Workshop Notes:** +- Combining multiple conditions with AND/OR logic + +--- + +### CELL 28: Combined Filters (Code) +```python +from redisvl.query.filter import Num + +# Build combined filter expressions +tag_filter = Tag("genre") == "action" +num_filter = Num("rating") >= 7 +combined_filter = tag_filter & num_filter + +# Build vector query +vec_query = VectorQuery( + vector=embedded_user_query, + vector_field_name="vector", + num_results=3, + return_fields=["title", "rating", "genre"], + return_score=True, + filter_expression=combined_filter +) + +result = index.query(vec_query) +pd.DataFrame(result) +``` + +**Filter Logic:** +```python +# AND operator (&) +filter1 & filter2 # Both conditions must be true + +# OR operator (|) +filter1 | filter2 # Either condition can be true + +# NOT operator (~) +~filter1 # Inverts condition + +# Complex expressions +(Tag("genre") == "action") & (Num("rating") >= 7) | (Tag("featured") == "yes") +# (action AND rating>=7) OR featured +``` + +**Numeric Filter Operators:** +```python +# Comparison operators +Num("rating") == 8 # Exact match +Num("rating") != 8 # Not equal +Num("rating") > 7 # Greater than +Num("rating") >= 7 # Greater or equal +Num("rating") < 9 # Less than +Num("rating") <= 9 # Less or equal + +# Range queries +Num("rating") >= 7 & Num("rating") <= 9 # Between 7 and 9 + +# Or simplified +(Num("price") >= 100) & (Num("price") <= 500) # $100-$500 range +``` + +**Results:** +``` + id vector_distance title rating genre +0 movies:01K7T4BMAE7ZKS3N3DVBQ1WCCF 0.763235211372 Mad Max: Fury Road 8 action +1 movies:01K7T4BMAEZMNPYTV73KZFYN3R 0.796153008938 Explosive Pursuit 7 action +2 movies:01K7T4BMAEYWEZS72634ZFS303 0.876494169235 Inception 9 action +``` + +**Workshop Notes:** +- Now filtering by TWO conditions: action AND rating โ‰ฅ7 +- More restrictive = fewer results but higher quality +- Real e-commerce example: "Find Nike shoes, size 10, under $150, in stock" + +**Complex E-commerce Filter Example:** +```python +from redisvl.query.filter import Tag, Num, Text + +product_filter = ( + (Tag("brand") == "nike") & + (Tag("size") == "10") & + (Num("price") <= 150) & + (Tag("in_stock") == "yes") & + (Num("rating") >= 4.0) +) + +product_query = VectorQuery( + vector=user_preference_embedding, # User's style preference + vector_field_name="style_vector", + num_results=10, + filter_expression=product_filter +) +``` + +--- + +### CELL 29: Full-Text Search Filter Header (Markdown) + +**Workshop Notes:** +- Searching for specific phrases within text fields + +--- + +### CELL 30: Text Filter (Code) +```python +from redisvl.query.filter import Text + +text_filter = Text("description") % "criminal mastermind" + +vec_query = VectorQuery( + vector=embedded_user_query, + vector_field_name="vector", + num_results=3, + return_fields=["title", "rating", "genre", "description"], + return_score=True, + filter_expression=text_filter +) + +result = index.query(vec_query) +pd.DataFrame(result) +``` + +**Text Search Operators:** +```python +# Phrase match (words must appear together) +Text("description") % "criminal mastermind" + +# Word match (any order, stemmed) +Text("description") == "criminal mastermind" # Matches "criminals" or "masterminds" + +# Multiple words (OR logic) +Text("description") % "hero | villain" # hero OR villain + +# Multiple words (AND logic) +Text("description") % "hero villain" # Both must appear + +# Negation +Text("description") % "hero -villain" # hero but NOT villain +``` + +**Tokenization Example:** +``` +Input: "The criminal mastermind plans the heist" +Tokens: [criminal, mastermind, plan, heist] # Stopwords removed, stemmed +``` + +**Results:** +``` + id vector_distance title rating genre +0 movies:01K7T4BMAE6KW01NKAVS2HSHYP 0.827253937721 Despicable Me 7 comedy +1 movies:01K7T4BMAE9E3H8180KZ7JMV3W 0.990856587887 The Dark Knight 9 action +``` + +**Why These Results?** +- Both have exact phrase "criminal mastermind" in description +- Ranked by semantic similarity to query +- Shows diversity: comedy + action + +**Workshop Notes:** +- Use case: "Find docs containing 'GDPR compliance' that match this query" +- Combines keyword precision with semantic ranking +- More specific than pure vector search + +**Stemming Example:** +```python +# These all match the same stem: +"criminal" โ†’ "crimin" +"criminals" โ†’ "crimin" +"criminality" โ†’ "crimin" + +# Search for "criminal" finds all variants +``` + +--- + +### CELL 31: Wildcard Text Match Header (Markdown) + +**Workshop Notes:** +- Using wildcards for flexible pattern matching + +--- + +### CELL 32: Wildcard Filter (Code) +```python +text_filter = Text("description") % "crim*" + +vec_query = VectorQuery( + vector=embedded_user_query, + vector_field_name="vector", + num_results=3, + return_fields=["title", "rating", "genre", "description"], + return_score=True, + filter_expression=text_filter +) + +result = index.query(vec_query) +pd.DataFrame(result) +``` + +**Wildcard Patterns:** +```python +# Suffix wildcard +Text("field") % "test*" # Matches: test, tests, testing, tester + +# Prefix wildcard +Text("field") % "*tion" # Matches: action, mention, creation + +# Middle wildcard +Text("field") % "t*st" # Matches: test, toast, trust + +# Multiple wildcards +Text("field") % "c*m*l" # Matches: camel, criminal, commercial +``` + +**Results:** +``` + id vector_distance title rating genre +0 movies:01K7T4BMAEZMNPYTV73KZFYN3R 0.796153008938 Explosive Pursuit 7 action +1 movies:01K7T4BMAEPQZ10JTTGZS0JW68 0.807471394539 The Incredibles 8 comedy +2 movies:01K7T4BMAE6KW01NKAVS2HSHYP 0.827253937721 Despicable Me 7 comedy +``` + +**Why More Results?** +- "crim*" matches: criminal, crime, criminals, etc. +- Broader than exact phrase match +- 3 results instead of 2 + +**Workshop Notes:** +- Useful when you know the root but not exact form +- Be careful with very short patterns (too many matches) +- Example: "tech*" might match: tech, technical, technology, technician + +**Performance Note:** +```python +# Efficient wildcards (start with letters) +"comp*" # Good: Narrows search space quickly + +# Inefficient wildcards (start with *) +"*puter" # Bad: Must check all terms +``` + +--- + +### CELL 33: Fuzzy Match Header (Markdown) + +**Workshop Notes:** +- Handling typos and slight variations using Levenshtein distance + +--- + +### CELL 34: Fuzzy Filter (Code) +```python +text_filter = Text("description") % "%hero%" + +vec_query = VectorQuery( + vector=embedded_user_query, + vector_field_name="vector", + num_results=3, + return_fields=["title", "rating", "genre", "description"], + return_score=True, + filter_expression=text_filter +) + +result = index.query(vec_query) +pd.DataFrame(result) +``` + +**Fuzzy Matching:** +```python +# Syntax: %term% allows 1 character edit distance +Text("field") % "%hero%" + +# What it matches: +"hero" โœ“ Exact match +"heros" โœ“ 1 insertion +"her" โœ“ 1 deletion +"hera" โœ“ 1 substitution +"heroes" โœ— 2+ edits (too far) +``` + +**Levenshtein Distance Formula:** +``` +Distance = minimum edits (insert/delete/substitute) to transform A โ†’ B + +Examples: +"hero" โ†’ "her" = 1 (delete 'o') +"hero" โ†’ "zero" = 1 (substitute 'h' with 'z') +"hero" โ†’ "heron" = 1 (insert 'n') +``` + +**Workshop Notes:** +- Handles typos automatically +- **Warning**: Can produce unexpected matches with short words + - "%he%" might match: he, her, hex, hue, hen, etc. +- Use minimum 4-5 characters for fuzzy matching + +**Results:** +``` + id vector_distance title rating genre +0 movies:01K7T4BMAEVCZCA7Z2R3Y837S6 0.889985799789 Black Widow 7 action +1 movies:01K7T4BMAE0XHHQ5W08WWXYNTV 0.89386677742 The Avengers 8 action +2 movies:01K7T4BMAETZ6H2MVQSVY4E46W 0.943198144436 The Princess Diaries 6 comedy +``` + +**Fuzzy Matching Pitfalls:** +```python +# Be careful with short terms +Text("name") % "%jo%" +# Matches: jo, joe, john, joy, job, jon, jot, joan... + +# Better: Use longer terms or exact match +Text("name") == "john" # Exact with stemming +Text("name") % "john*" # Wildcard prefix +``` + +**Real Use Case:** +```python +# User search with typo correction +user_input = "iphone" # User meant "iPhone" +query_filter = Text("product_name") % f"%{user_input}%" +# Matches: iPhone, iphone, iphne (1 typo), etc. +``` + +--- + +### CELL 35: Range Queries Header (Markdown) + +**Workshop Notes:** +- Finding all vectors within a similarity threshold +- Different from KNN (which always returns K results) + +--- + +### CELL 36: Range Query (Code) +```python +from redisvl.query import RangeQuery + +user_query = "Family friendly fantasy movies" + +embedded_user_query = hf.embed(user_query) + +range_query = RangeQuery( + vector=embedded_user_query, + vector_field_name="vector", + return_fields=["title", "rating", "genre"], + return_score=True, + distance_threshold=0.8 # find all items with distance < 0.8 +) + +result = index.query(range_query) +pd.DataFrame(result) +``` + +**Range Query vs KNN:** +```python +# KNN (K-Nearest Neighbors) +VectorQuery(num_results=5) +# Always returns exactly 5 results (or fewer if dataset smaller) +# Returns: [most similar, 2nd, 3rd, 4th, 5th] + +# Range Query +RangeQuery(distance_threshold=0.8) +# Returns ALL results with distance < 0.8 +# Could be 0 results, could be 1000 results +# Variable number based on threshold +``` + +**Distance Threshold Selection:** +``` +Cosine Distance Scale: +0.0 โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€ 0.5 โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€ 1.0 โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€ 1.5 โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€ 2.0 +โ”‚ โ”‚ โ”‚ โ”‚ โ”‚ +Identical Very Close Related Somewhat Completely + Related Different + +Typical Thresholds: +0.3 - Very strict (near-duplicates) +0.5 - Strict (highly relevant) +0.7 - Moderate (relevant) +0.8 - Loose (somewhat relevant) โ† Used in example +1.0 - Very loose (barely relevant) +``` + +**Results:** +``` + id vector_distance title rating genre +0 movies:01K7T4BMAEPQZ10JTTGZS0JW68 0.644702553749 The Incredibles 8 comedy +1 movies:01K7T4BMAEVCZCA7Z2R3Y837S6 0.747986972332 Black Widow 7 action +2 movies:01K7T4BMAE6KW01NKAVS2HSHYP 0.750915408134 Despicable Me 7 comedy +3 movies:01K7T4BMAEVV6R6B2M22QFV7DW 0.751298904419 Shrek 8 comedy +4 movies:01K7T4BMAE8PR91YXEHRH3APYP 0.761669397354 Monsters, Inc. 8 comedy +5 movies:01K7T4BMAED0S8Z02DN2SYQR1H 0.778580188751 Aladdin 8 comedy +``` + +**Workshop Notes:** +- 6 results returned (all under 0.8 distance) +- KNN would return exactly 3 (with num_results=3) +- Use case: "Show ALL similar products" or "Find ALL relevant documents" + +**Choosing Range vs KNN:** +```python +# Use KNN when: +# - You want top N results always +# - Pagination (show 10 per page) +# - Fixed UI slots (show 5 recommendations) + +# Use Range when: +# - Quality threshold matters more than quantity +# - "Show everything that matches well enough" +# - Duplicate detection (distance < 0.1) +# - Clustering (find all neighbors within radius) +``` + +**Tuning Threshold:** +```python +# Start conservative, then relax +thresholds = [0.5, 0.6, 0.7, 0.8, 0.9] + +for threshold in thresholds: + query = RangeQuery(vector=vec, distance_threshold=threshold) + results = index.query(query) + print(f"Threshold {threshold}: {len(results)} results") + +# Output: +# Threshold 0.5: 2 results (very strict) +# Threshold 0.6: 5 results +# Threshold 0.7: 12 results +# Threshold 0.8: 25 results (used in example) +# Threshold 0.9: 50 results (very loose) +``` + +--- + +### CELL 37: Range with Filters Header (Markdown) + +**Workshop Notes:** +- Combining range queries with structured filters + +--- + +### CELL 38: Filtered Range Query (Code) +```python +range_query = RangeQuery( + vector=embedded_user_query, + vector_field_name="vector", + return_fields=["title", "rating", "genre"], + distance_threshold=0.8 +) + +numeric_filter = Num("rating") >= 8 + +range_query.set_filter(numeric_filter) + +result = index.query(range_query) +pd.DataFrame(result) +``` + +**Filter Execution Flow:** +``` +1. Apply numeric filter โ†’ Only rating >= 8 movies +2. Compute distances โ†’ Only on filtered set +3. Apply threshold โ†’ Only results with distance < 0.8 +4. Return results โ†’ Ordered by distance +``` + +**Results:** +``` + id vector_distance title rating genre +0 movies:01K7T4BMAEPQZ10JTTGZS0JW68 0.644702553749 The Incredibles 8 comedy +1 movies:01K7T4BMAEVV6R6B2M22QFV7DW 0.751298904419 Shrek 8 comedy +2 movies:01K7T4BMAE8PR91YXEHRH3APYP 0.761669397354 Monsters, Inc. 8 comedy +3 movies:01K7T4BMAED0S8Z02DN2SYQR1H 0.778580188751 Aladdin 8 comedy +``` + +**Workshop Notes:** +- Now only 4 results (down from 6) +- Removed movies with rating 7 (Black Widow, Despicable Me) +- Real use case: "Find all hotels within 5km AND rating โ‰ฅ 4 stars" + +**Complex Range Filter Example:** +```python +# E-commerce: Find all relevant products in stock under $100 +range_query = RangeQuery( + vector=product_preference_vec, + distance_threshold=0.7, + filter_expression=( + (Tag("in_stock") == "yes") & + (Num("price") <= 100) & + (Num("rating") >= 4.0) + ) +) +``` + +--- + +### CELL 39: Full-Text Search Header (Markdown) + +**Workshop Notes:** +- Traditional text search WITHOUT vectors +- Uses BM25 algorithm for ranking + +--- + +### CELL 40: TextQuery with BM25 (Code) +```python +from redisvl.query import TextQuery + +user_query = "das High tech, action packed, superheros mit fight scenes" + +text_query = TextQuery( + text=user_query, + text_field_name="description", + text_scorer="BM25STD", # or "BM25" or "TFIDF" + num_results=20, + return_fields=["title", "description"], + stopwords="german" +) + +result = index.query(text_query)[:4] +pd.DataFrame(result)[["title", "score"]] +``` + +**BM25 Algorithm (Best Match 25):** + +BM25 is a probabilistic ranking function that considers: +1. **Term Frequency (TF)**: How often term appears in document +2. **Inverse Document Frequency (IDF)**: How rare term is across all documents +3. **Document Length**: Normalizes for document size + +**Formula:** +``` +score(D,Q) = ฮฃ IDF(qi) ร— (f(qi,D) ร— (k1+1)) / (f(qi,D) + k1 ร— (1-b+bร—|D|/avgdl)) + +Where: +- D = document +- Q = query +- qi = query term i +- f(qi,D) = frequency of qi in D +- |D| = length of D +- avgdl = average document length +- k1 = term saturation parameter (usually 1.2-2.0) +- b = length normalization (usually 0.75) +``` + +**BM25 vs TF-IDF:** +```python +# TF-IDF (older) +score = TF ร— IDF +# Linear growth with term frequency + +# BM25 (better) +score = IDF ร— (TF with saturation) +# Diminishing returns after multiple occurrences +``` + +**Stopwords Processing:** +```python +# Input query +"das High tech, action packed, superheros mit fight scenes" + +# German stopwords removed +"das" โ†’ removed +"mit" โ†’ removed + +# Final processed query +"high tech action packed superheros fight scenes" +``` + +**Results:** +``` + title score +0 Fast & Furious 9 5.376819 # Highest: has "high tech", "action", "packed" +1 The Incredibles 3.537206 # Medium: has "superheros" variant, "fight" +2 Explosive Pursuit 2.454928 # Lower: has "action" +3 Toy Story 1.459313 # Lowest: weak match +``` + +**Workshop Notes:** +- This is pure keyword/term matching (NO vectors!) +- Different from vector search - finds exact/stemmed words +- Useful when users search with specific terms +- Works across languages with proper stopwords + +**Text Scorer Options:** +```python +# BM25 (recommended) +text_scorer="BM25" # Standard BM25 + +# BM25 Standard (more tuning) +text_scorer="BM25STD" # With additional normalization + +# TF-IDF (older, simpler) +text_scorer="TFIDF" # Classic information retrieval +``` + +**When to Use Text Search vs Vector Search:** +```python +# Use Text Search when: +# - Users search with specific keywords/product codes +# - Exact term matching important (legal, medical) +# - Fast keyword lookups needed + +# Use Vector Search when: +# - Understanding meaning/intent matters +# - Handling synonyms/paraphrasing +# - Cross-lingual search +# - Recommendation systems + +# Use Hybrid (next cell) when: +# - Best of both worlds (usually best choice!) +``` + +--- + +### CELL 41: Check Query String (Code) +```python +text_query.query_string() +``` + +**Output:** +``` +'@description:(high | tech | action | packed | superheros | fight | scenes)' +``` + +**Query Syntax Breakdown:** +``` +@description: # Search in description field +(term1 | term2 | term3) # OR logic (any term matches) +``` + +**Workshop Notes:** +- Shows internal Redis query syntax +- Stopwords ("das", "mit") removed automatically +- Terms joined with OR operator +- This is what actually gets sent to Redis + +**Redis Query Syntax Examples:** +```python +# AND logic +"@description:(hero & villain)" # Both must appear + +# OR logic +"@description:(hero | villain)" # Either can appear + +# NOT logic +"@description:(hero -villain)" # hero but NOT villain + +# Phrase match +'@description:"criminal mastermind"' # Exact phrase + +# Field-specific +"@title:(batman) @description:(joker)" # batman in title, joker in description +``` + +--- + +### CELL 42: Hybrid Search Header (Markdown) + +**Workshop Notes:** +- **THE BEST APPROACH**: Combines semantic + keyword matching +- Industry best practice for highest quality results +- Used by modern search engines (Google, Bing, etc.) + +--- + +### CELL 43: Hybrid Query (Code) +```python +from redisvl.query import HybridQuery + +user_query = "das High tech, action packed, superheros mit fight scenes" + +hybrid_query = HybridQuery( + text=user_query, + text_field_name="description", + text_scorer="BM25", + vector=embedded_user_query, + vector_field_name="vector", + alpha=0.7, # 70% vector, 30% text + num_results=20, + return_fields=["title", "description"], + stopwords="german" +) + +result = index.query(hybrid_query)[:4] +pd.DataFrame(result)[["title", "vector_similarity", "text_score", "hybrid_score"]] +``` + +**Hybrid Search Architecture:** +``` +User Query: "high tech action superheros" + โ”‚ + โ”œโ”€โ†’ Text Search Path (BM25) + โ”‚ โ”œโ”€ Tokenize & remove stopwords + โ”‚ โ”œโ”€ Match keywords in text + โ”‚ โ””โ”€ Score: text_score + โ”‚ + โ”œโ”€โ†’ Vector Search Path (KNN) + โ”‚ โ”œโ”€ Generate embedding + โ”‚ โ”œโ”€ Compute cosine distances + โ”‚ โ””โ”€ Score: vector_similarity + โ”‚ + โ””โ”€โ†’ Combine Scores + hybrid_score = ฮฑ ร— vector_sim + (1-ฮฑ) ร— text_score +``` + +**Alpha Parameter (ฮฑ):** +``` +ฮฑ = 0.0 โ†’ Pure text search (100% keywords) +ฮฑ = 0.3 โ†’ Mostly text (70% text, 30% semantic) +ฮฑ = 0.5 โ†’ Balanced (50/50) +ฮฑ = 0.7 โ†’ Mostly semantic (70% vector, 30% text) โ† Recommended default +ฮฑ = 1.0 โ†’ Pure vector search (100% semantic) +``` + +**Score Normalization:** +```python +# Vector distances need normalization to [0,1] range +vector_similarity = (2 - cosine_distance) / 2 # Cosine: [0,2] โ†’ [0,1] +# Higher = more similar + +# Text scores already normalized via BM25 +text_score = bm25_score / max_possible_score # โ†’ [0,1] + +# Combine +hybrid_score = 0.7 ร— vector_similarity + 0.3 ร— text_score +``` + +**Results:** +``` + title vector_similarity text_score hybrid_score +0 The Incredibles 0.677648723 0.683368580 0.679364680 +1 Fast & Furious 9 0.537397742 0.498220622 0.525644606 +2 Toy Story 0.553009659 0.213523123 0.451163698 +3 Black Widow 0.626006513 0.000000000 0.438204559 +``` + +**Analysis of Results:** + +**1. The Incredibles (Winner - 0.679 hybrid score):** +- Strong vector similarity (0.678): Semantically about superheroes/action +- Strong text score (0.683): Contains keywords "superheros", "fight" +- **Best of both worlds** - relevant semantically AND has keywords + +**2. Fast & Furious 9 (0.526):** +- Medium vector similarity (0.537): Action-packed theme +- Medium text score (0.498): Has "high tech", "action", "packed" +- Balanced match + +**3. Toy Story (0.451):** +- Medium vector similarity (0.553): Has action elements +- Weak text score (0.214): Few matching keywords +- Vector search keeps it relevant despite weak text match + +**4. Black Widow (0.438):** +- Good vector similarity (0.626): Superhero action movie +- Zero text score (0.000): No matching keywords in description +- Pure semantic match - wouldn't rank high in text-only search + +**Workshop Notes:** +- **Key Insight**: Hybrid search combines strengths, avoids weaknesses + - Catches exact keyword matches (text search strength) + - Understands meaning and synonyms (vector search strength) + - Handles typos better (vector) while respecting important terms (text) + +**Tuning Alpha for Your Use Case:** +```python +# E-commerce product search +alpha = 0.5 # Balanced - users search with brand names (text) but also browse (semantic) + +# Documentation/knowledge base +alpha = 0.7 # Favor semantic - users phrase questions differently + +# Code search +alpha = 0.3 # Favor text - exact function/variable names matter + +# Academic papers +alpha = 0.8 # Favor semantic - concepts matter more than exact terms + +# Legal/medical +alpha = 0.2 # Favor text - specific terminology crucial +``` + +**A/B Testing Alpha:** +```python +# Test different alphas, measure metrics +alphas = [0.3, 0.5, 0.7, 0.9] + +for alpha in alphas: + query = HybridQuery(text=q, vector=v, alpha=alpha) + results = index.query(query) + + # Measure: CTR, time-to-click, relevance ratings, etc. + metrics = evaluate_results(results, ground_truth) + print(f"Alpha {alpha}: Precision={metrics.precision}, Recall={metrics.recall}") +``` + +**Real-World Hybrid Search Example:** +```python +# Airbnb-style search +user_query = "cozy mountain cabin with fireplace near skiing" +query_vector = embedder.embed(user_query) + +hybrid_query = HybridQuery( + text=user_query, + text_field_name="description", + vector=query_vector, + vector_field_name="listing_embedding", + alpha=0.6, # Slightly favor semantic + filter_expression=( + (Tag("property_type") == "cabin") & + (Num("price_per_night") <= 200) & + (Tag("amenities") == "fireplace") & + (Num("distance_to_ski") <= 10) # km + ), + num_results=50 +) +``` + +--- + +### CELL 44: Display NLTK Stopwords (Code) +```python +import nltk +from nltk.corpus import stopwords +nltk.download('stopwords', quiet=True) + +deutch_stopwords = stopwords.words('german') +english_stopwords = stopwords.words('english') + +print(f"Number of German stopwords: {len(deutch_stopwords)}\nGerman stopwords: {deutch_stopwords}\n\nNumber of English stopwords: {len(english_stopwords)}\nEnglish stopwords: {english_stopwords}") +``` + +**Theoretical Background - Stopwords:** + +**What are stopwords?** +- High-frequency, low-information words +- Provide grammatical structure but little semantic meaning +- Removing them improves search quality and performance + +**German Stopwords (232):** +``` +Common examples: +- Articles: der, die, das, ein, eine +- Prepositions: mit, in, auf, an, von +- Conjunctions: und, oder, aber +- Pronouns: ich, du, er, sie, es +``` + +**English Stopwords (198):** +``` +Common examples: +- Articles: the, a, an +- Prepositions: in, on, at, to, from +- Conjunctions: and, or, but +- Pronouns: I, you, he, she, it +- Auxiliaries: is, are, was, were, have, has +``` + +**Why Remove Stopwords?** +``` +Query: "the best italian restaurant in the city" +Without stopword removal: +- "the" appears everywhere (not discriminative) +- "in" appears everywhere (not discriminative) +After stopword removal: +- "best italian restaurant city" (content words only) +- More focused, better results +``` + +**Workshop Notes:** +- NLTK provides stopword lists for 16+ languages +- Custom stopwords can be added for domain-specific terms +- Vector search naturally handles stopwords (they get low weights) +- Text search benefits more from explicit stopword removal + +**Custom Stopwords Example:** +```python +# Domain-specific stopwords +medical_stopwords = english_stopwords + [ + "patient", "doctor", "hospital", # Common but not discriminative + "reported", "showed", "indicated" +] + +# Remove domain-common terms that don't help search +tech_stopwords = english_stopwords + [ + "application", "system", "software", + "user", "data", "information" +] +``` + +**Important Stopwords to Keep:** +```python +# Sometimes stopwords matter! + +# Negations (critical meaning) +keep = ["not", "no", "never", "neither", "nor"] +# "working" vs "not working" - huge difference! + +# Medical context +keep = ["over", "under", "above", "below"] +# "over 100mg" vs "under 100mg" - critical! + +# Programming +keep = ["and", "or", "not"] +# Boolean operators are keywords! +``` + +**RedisVL Stopwords Configuration:** +```python +# Use language-specific stopwords +TextQuery(text=query, stopwords="english") +TextQuery(text=query, stopwords="german") +TextQuery(text=query, stopwords="french") + +# Use custom stopwords +custom_stops = ["custom", "domain", "terms"] +TextQuery(text=query, stopwords=custom_stops) + +# No stopword removal +TextQuery(text=query, stopwords=None) +``` + +--- + +### CELL 45: Next Steps Header (Markdown) + +**Workshop Notes:** +- Link to advanced RedisVL documentation +- Encourages further exploration +- Points to additional resources + +**Additional Resources to Mention:** +``` +1. RedisVL GitHub: https://github.com/redis/redis-vl-python +2. Redis AI Resources: https://github.com/redis-developer/redis-ai-resources +3. Redis Documentation: https://redis.io/docs/stack/search/ +4. RedisVL Docs: https://www.redisvl.com/ +5. Redis University: https://university.redis.com/ +``` + +--- + +### CELL 46: Cleanup (Code) +```python +index.delete() +``` + +**What's Happening:** +- Removes the index structure from Redis +- Data remains in Redis (only index deleted) + +**Workshop Notes:** +- Good practice for demo/test cleanup +- In production, manage index lifecycle carefully + +**Cleanup Options:** +```python +# 1. Delete index only (keep data) +index.delete() # or index.delete(drop=False) +# Use case: Re-indexing with different schema + +# 2. Delete index AND data +index.delete(drop=True) +# Use case: Complete cleanup + +# 3. Keep index, delete some data +for key in client.scan_iter("movies:*"): + if should_delete(key): + client.delete(key) + +# 4. Flush everything (DANGER!) +# client.flushall() # Never in production! +``` + +**Re-indexing Pattern:** +```python +# Safe re-indexing without downtime +old_index = SearchIndex(old_schema, client) +new_index = SearchIndex(new_schema, client) + +# 1. Create new index with different name +new_index.create() + +# 2. Load data into new index +new_index.load(data) + +# 3. Verify new index +assert new_index.info()['num_docs'] > 0 + +# 4. Switch application to new index +# (Update config/environment variable) + +# 5. Delete old index +old_index.delete(drop=True) +``` + +--- + +## Technical Q&A + +### General Vector Search Questions + +**Q: How do embeddings capture meaning?** +A: Embeddings are learned through training on massive datasets. The model learns that: +- Words appearing in similar contexts should have similar vectors +- Synonyms cluster together in vector space +- Relationships are preserved (king - man + woman โ‰ˆ queen) +- This is done through neural networks with millions of parameters + +**Q: Why 384 dimensions specifically?** +A: Model architecture choice balancing: +- Quality: More dimensions = more capacity to capture nuances +- Speed: Fewer dimensions = faster computation +- Memory: Fewer dimensions = less storage +- 384 is sweet spot for many models (BERT variants often use 768/1024) + +**Q: Can I use different embedding models for query vs documents?** +A: **No!** Query and documents must use the **same** embedding model. Different models create incompatible vector spaces. You can't compare distances meaningfully across different spaces. + +**Q: How do I handle multiple languages?** +A: Options: +1. **Multilingual models**: `paraphrase-multilingual-mpnet-base-v2` (supports 50+ languages) +2. **Separate indices per language**: Better quality but more complex +3. **Translation layer**: Translate everything to English first (adds latency) + +**Q: What's the difference between embeddings and feature vectors?** +A: +- **Embeddings**: Learned representations (from neural networks) +- **Feature vectors**: Hand-crafted representations (TF-IDF, bag-of-words) +- Embeddings are generally much better at capturing semantic meaning + +--- + +### Redis-Specific Questions + +**Q: How much memory does Redis need for vectors?** +A: Calculate as: +``` +Memory = num_vectors ร— dimensions ร— bytes_per_dimension ร— overhead_factor + +Example for 1M vectors: +1,000,000 ร— 384 ร— 4 bytes ร— 1.3 (overhead) = ~2 GB + +Overhead includes: +- Index structures (15-30% depending on algorithm) +- Redis memory allocation overhead +- Metadata storage +``` + +**Q: Can Redis handle billions of vectors?** +A: Yes, with clustering: +- Single node: Up to 100M vectors (depending on RAM) +- Redis Enterprise cluster: Billions of vectors (distributed) +- Use Redis Enterprise for production scale + +**Q: What happens when Redis runs out of memory?** +A: Depends on `maxmemory-policy`: +```python +# View current policy +client.config_get('maxmemory-policy') + +# Common policies: +# 'noeviction' - Return errors when full (safest for vector DB) +# 'allkeys-lru' - Evict least recently used (dangerous for vectors!) +# 'volatile-lru' - Evict only keys with TTL + +# Recommended for vector DB: +client.config_set('maxmemory-policy', 'noeviction') +``` + +**Q: How does Redis compare to dedicated vector databases (Pinecone, Weaviate, Milvus)?** +A: +**Redis Advantages:** +- Already in your stack (cache + vector DB) +- Sub-millisecond latency +- Mature, battle-tested +- Rich data structures beyond vectors + +**Dedicated Vector DB Advantages:** +- More advanced features (filtering, faceting) +- Built specifically for vectors +- Better tooling for ML workflows + +**Use Redis when:** You need low latency, already use Redis, want unified cache+vector +**Use dedicated DB when:** Pure vector workload, need advanced features + +--- + +### Performance Questions + +**Q: Why is my query slow?** +A: Debug checklist: +```python +# 1. Check algorithm +info = index.info() +print(info['vector_algorithm']) # FLAT is slower than HNSW + +# 2. Check dataset size +print(f"Documents: {info['num_docs']}") +# If >100K with FLAT, switch to HNSW + +# 3. Profile query time +import time +start = time.time() +results = index.query(query) +print(f"Query time: {(time.time()-start)*1000:.2f}ms") + +# 4. Check network latency +start = time.time() +client.ping() +print(f"Ping: {(time.time()-start)*1000:.2f}ms") + +# 5. Check embedding time +start = time.time() +vec = hf.embed(text) +print(f"Embedding time: {(time.time()-start)*1000:.2f}ms") +``` + +**Q: When should I use HNSW vs FLAT?** +A: +``` +FLAT (Exact Search): +โœ“ <100K vectors +โœ“ Need 100% accuracy +โœ“ Simple, no tuning +โœ— O(N) complexity - slow on large datasets + +HNSW (Approximate Search): +โœ“ >100K vectors +โœ“ Can tolerate 95-99% accuracy +โœ“ Much faster (10-100x) +โœ— Uses more memory +โœ— Requires parameter tuning + +Rule of thumb: +- Start with FLAT +- Migrate to HNSW when queries slow down +- Test to find acceptable accuracy/speed tradeoff +``` + +**Q: How do I tune HNSW parameters?** +A: +```python +# Start with these defaults +attrs = { + "algorithm": "hnsw", + "m": 16, # 16-64 range + "ef_construction": 200, # 100-500 range + "ef_runtime": 10 # 10-200 range (set at query time) +} + +# Tuning guide: +# m: Higher = better accuracy, more memory +# Double m โ†’ 2x memory but ~10% better recall + +# ef_construction: Higher = better index quality +# Only affects indexing time (one-time cost) +# Set as high as tolerable during indexing + +# ef_runtime: Higher = better accuracy, slower queries +# Adjust based on accuracy requirements +# Tune via A/B testing + +# Example tuning: +for ef in [10, 20, 50, 100]: + query = VectorQuery(vector=v, ef_runtime=ef) + results = index.query(query) + # Measure accuracy vs speed +``` + +--- + +### Data Management Questions + +**Q: How do I update vectors?** +A: +```python +# Option 1: Update entire document (recommended) +key = "movies:01K7T4BMAEZMNPYTV73KZFYN3R" +new_data = { + "title": "Updated Title", + "description": "New description", + "vector": new_embedding +} +client.hset(key, mapping=new_data) +# Index updates automatically + +# Option 2: Update just the vector +client.hset(key, "vector", new_embedding_bytes) + +# Option 3: Bulk update +for key, new_embedding in updates.items(): + client.hset(key, "vector", new_embedding) +``` + +**Q: Can I have multiple vector fields per document?** +A: Yes! Useful for multi-modal search: +```python +schema = { + "fields": [ + { + "name": "title_vector", + "type": "vector", + "attrs": {"dims": 384, ...} + }, + { + "name": "description_vector", + "type": "vector", + "attrs": {"dims": 384, ...} + }, + { + "name": "image_vector", + "type": "vector", + "attrs": {"dims": 512, ...} # Different model OK + } + ] +} + +# Query specific field +query = VectorQuery( + vector=query_vec, + vector_field_name="title_vector" # Search titles only +) +``` + +**Q: How do I handle document updates/deletes?** +A: +```python +# Delete document +client.delete("movies:01K7T4BMAEZMNPYTV73KZFYN3R") +# Index updates automatically + +# Bulk delete +keys_to_delete = client.keys("movies:*") +if keys_to_delete: + client.delete(*keys_to_delete) + +# Conditional delete +for key in client.scan_iter("movies:*"): + data = client.hgetall(key) + if should_delete(data): + client.delete(key) +``` + +--- + +### Search Quality Questions + +**Q: How do I improve search quality?** +A: Multiple strategies: + +**1. Better embeddings:** +```python +# Use larger, better models +# all-MiniLM-L6-v2 (384d) โ†’ all-mpnet-base-v2 (768d) +# or fine-tune on your domain data +``` + +**2. Hybrid search:** +```python +# Combine vector + text search (best approach) +HybridQuery(alpha=0.7) +``` + +**3. Query expansion:** +```python +# Add synonyms/related terms +original_query = "car" +expanded_query = "car automobile vehicle" +``` + +**4. Reranking:** +```python +# Two-stage retrieval +# Stage 1: Get 100 candidates (fast, approximate) +candidates = index.query(VectorQuery(num_results=100)) + +# Stage 2: Rerank top candidates (slow, accurate) +reranked = rerank_model.predict(query, candidates) +final_results = reranked[:10] +``` + +**5. Filter tuning:** +```python +# Pre-filter to high-quality subset +filter = (Num("rating") >= 4) & (Tag("verified") == "yes") +``` + +**Q: How do I evaluate search quality?** +A: Use standard IR metrics: +```python +# Precision@K: What % of top K results are relevant? +def precision_at_k(results, relevant_ids, k=10): + top_k = [r['id'] for r in results[:k]] + relevant_count = len(set(top_k) & set(relevant_ids)) + return relevant_count / k + +# Recall@K: What % of relevant docs are in top K? +def recall_at_k(results, relevant_ids, k=10): + top_k = [r['id'] for r in results[:k]] + relevant_count = len(set(top_k) & set(relevant_ids)) + return relevant_count / len(relevant_ids) + +# Mean Reciprocal Rank (MRR): Position of first relevant result +def mrr(results, relevant_ids): + for i, result in enumerate(results, 1): + if result['id'] in relevant_ids: + return 1.0 / i + return 0.0 + +# NDCG: Normalized Discounted Cumulative Gain +# (More complex, considers graded relevance) +``` + +--- + +### Production Considerations Questions + +**Q: How do I handle high query volume?** +A: +```python +# 1. Use Redis Enterprise cluster (horizontal scaling) +# 2. Implement caching layer +# 3. Connection pooling +from redis import ConnectionPool + +pool = ConnectionPool.from_url(REDIS_URL, max_connections=50) +client = Redis(connection_pool=pool) + +# 4. Async queries (if using async framework) +from redisvl.index import AsyncSearchIndex + +async_index = AsyncSearchIndex(schema, client) +results = await async_index.query(query) + +# 5. Batch queries +queries = [query1, query2, query3] +results = await async_index.query_batch(queries) +``` + +**Q: How do I monitor Redis vector search?** +A: +```python +# Key metrics to track +info = index.info() + +print(f"Documents: {info['num_docs']}") +print(f"Memory: {info['vector_index_sz_mb']} MB") +print(f"Indexing failures: {info['hash_indexing_failures']}") + +# Query latency percentiles +# Use Redis monitoring tools or custom tracking: +import time +latencies = [] + +for query in test_queries: + start = time.time() + index.query(query) + latencies.append((time.time() - start) * 1000) + +import numpy as np +print(f"P50: {np.percentile(latencies, 50):.2f}ms") +print(f"P95: {np.percentile(latencies, 95):.2f}ms") +print(f"P99: {np.percentile(latencies, 99):.2f}ms") +``` + +**Q: Should I use Redis Cloud or self-hosted?** +A: +**Redis Cloud:** +โœ“ Managed, no ops burden +โœ“ Auto-scaling +โœ“ Built-in monitoring +โœ“ Multi-cloud support +โœ— Cost (pay for managed service) + +**Self-hosted:** +โœ“ Full control +โœ“ Lower cost (just infrastructure) +โœ— Ops complexity +โœ— Need monitoring/alerting setup + +**Recommendation:** Start with Redis Cloud for development, decide based on scale/budget for production. + +--- + +## Architecture & Performance + +### System Architecture + +**Typical Production Architecture:** +``` +โ”Œโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ” +โ”‚ Client โ”‚ +โ”‚ Application โ”‚ +โ””โ”€โ”€โ”€โ”€โ”€โ”€โ”ฌโ”€โ”€โ”€โ”€โ”€โ”€โ”˜ + โ”‚ + โ†“ +โ”Œโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ” +โ”‚ Load Balancer โ”‚ +โ””โ”€โ”€โ”€โ”€โ”€โ”€โ”ฌโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”˜ + โ”‚ + โ†“ +โ”Œโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ” โ”Œโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ” +โ”‚ Application โ”‚โ”€โ”€โ”€โ”€โ†’โ”‚ Embedding โ”‚ +โ”‚ Server โ”‚ โ”‚ Service โ”‚ +โ”‚ (FastAPI/Flask) โ”‚ โ”‚ (Sentence- โ”‚ +โ””โ”€โ”€โ”€โ”€โ”€โ”€โ”ฌโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”˜ โ”‚ Transformers) โ”‚ + โ”‚ โ””โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”˜ + โ†“ +โ”Œโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ” +โ”‚ Redis Cloud โ”‚ +โ”‚ (with Search) โ”‚ +โ”‚ โ”‚ +โ”‚ โ”Œโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”โ”‚ +โ”‚ โ”‚ Vector Index โ”‚โ”‚ +โ”‚ โ””โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”˜โ”‚ +โ”‚ โ”Œโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”โ”‚ +โ”‚ โ”‚ Cache Layer โ”‚โ”‚ +โ”‚ โ””โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”˜โ”‚ +โ””โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”˜ +``` + +### Performance Benchmarks + +**Query Latency (approximate):** +``` +Dataset Size Algorithm Query Time +โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€ +1K vectors FLAT 1-2ms +10K vectors FLAT 5-10ms +100K vectors FLAT 50-100ms โ† Switch to HNSW here +100K vectors HNSW 2-5ms +1M vectors HNSW 3-8ms +10M vectors HNSW 5-15ms +``` + +**Throughput (queries/second):** +``` +Single Redis node: 5,000-10,000 QPS +Redis Enterprise (10 nodes): 50,000-100,000 QPS +``` + +### Memory Optimization + +**Techniques to reduce memory:** +```python +# 1. Use smaller embeddings +# 384d instead of 1536d = 4x less memory + +# 2. Quantization (reduce precision) +attrs = { + "datatype": "float16" # 2 bytes instead of 4 +} +# Trades accuracy for 2x memory savings + +# 3. SVS-VAMANA with compression +attrs = { + "algorithm": "svs-vamana", + "compression": "lvq8" # 8-bit compression +} + +# 4. Store vectors separately from metadata +# Use JSON for metadata, vectors in separate keys +``` + +--- + +## Production Considerations + +### Best Practices + +**1. Schema Design:** +```python +# โœ“ Good: Specific prefixes +prefix = "product_vectors" # Clear purpose + +# โœ— Bad: Generic prefixes +prefix = "data" # Too vague + +# โœ“ Good: Version schemas +prefix = "product_vectors_v2" # Enables migrations + +# โœ“ Good: Document structure +{ + "id": "prod_123", + "title": "...", + "description": "...", + "vector": b"...", + "metadata": { + "created_at": "2025-01-01", + "updated_at": "2025-01-15" + } +} +``` + +**2. Error Handling:** +```python +from redis.exceptions import RedisError, TimeoutError + +try: + results = index.query(query) +except TimeoutError: + # Retry with exponential backoff + logger.error("Redis timeout, retrying...") + results = retry_with_backoff(index.query, query) +except RedisError as e: + # Log and return cached/default results + logger.error(f"Redis error: {e}") + results = get_cached_results(query) +except Exception as e: + # Catch-all + logger.exception("Unexpected error") + raise +``` + +**3. Caching Strategy:** +```python +# Multi-layer caching +class VectorSearchService: + def __init__(self): + self.local_cache = {} # In-memory (milliseconds) + self.redis_cache = redis_client # Redis cache (1-2ms) + self.index = search_index # Vector search (5-10ms) + + def search(self, query): + cache_key = hash(query) + + # L1: Check local memory + if cache_key in self.local_cache: + return self.local_cache[cache_key] + + # L2: Check Redis cache + cached = self.redis_cache.get(f"search:{cache_key}") + if cached: + results = json.loads(cached) + self.local_cache[cache_key] = results + return results + + # L3: Perform search + results = self.index.query(query) + + # Cache results + self.redis_cache.setex( + f"search:{cache_key}", + 3600, # 1 hour TTL + json.dumps(results) + ) + self.local_cache[cache_key] = results + + return results +``` + +**4. Monitoring & Alerting:** +```python +# Metrics to track +metrics = { + "query_latency_p50": ..., + "query_latency_p95": ..., + "query_latency_p99": ..., + "queries_per_second": ..., + "error_rate": ..., + "cache_hit_rate": ..., + "index_memory_mb": ..., + "document_count": ..., +} + +# Alerts +if metrics["query_latency_p99"] > 100: # >100ms + alert("High query latency!") + +if metrics["error_rate"] > 0.01: # >1% + alert("High error rate!") + +if metrics["index_memory_mb"] > 0.8 * max_memory: + alert("Redis memory almost full!") +``` + +**5. Deployment Checklist:** +``` +โ–ก Enable SSL/TLS (rediss://) +โ–ก Set strong password +โ–ก Configure maxmemory-policy (noeviction for vector DB) +โ–ก Set up monitoring (Prometheus, Datadog, etc.) +โ–ก Configure backups (AOF or RDB) +โ–ก Test failover scenarios +โ–ก Load test at 2x expected traffic +โ–ก Document schema and indices +โ–ก Set up alerting +โ–ก Plan capacity (memory, QPS) +``` + +--- + +## Conclusion & Key Takeaways + +### Core Concepts Mastered +1. โœ… Vector embeddings capture semantic meaning +2. โœ… Redis provides sub-millisecond vector search +3. โœ… Multiple search types: Vector, Range, Text, Hybrid +4. โœ… Hybrid search combines best of semantic + keyword +5. โœ… Filters enable precise, constrained search +6. โœ… RedisVL simplifies vector operations in Python + +### Decision Framework + +**Choose your search approach:** +``` +Pure Vector Search +โ”œโ”€ When: Understanding meaning matters most +โ”œโ”€ Example: "Find similar products" +โ””โ”€ Use: VectorQuery + +Pure Text Search +โ”œโ”€ When: Exact keywords critical +โ”œโ”€ Example: "Find document #12345" +โ””โ”€ Use: TextQuery + +Hybrid Search (Recommended!) +โ”œโ”€ When: Production applications (usually best) +โ”œโ”€ Example: Most real-world search scenarios +โ””โ”€ Use: HybridQuery with alpha=0.7 + +Range Search +โ”œโ”€ When: Quality threshold matters +โ”œโ”€ Example: "Show all similar enough items" +โ””โ”€ Use: RangeQuery +``` + +### Production Readiness +- Start simple (FLAT algorithm) +- Scale up (migrate to HNSW at 100K+ vectors) +- Monitor continuously (latency, memory, errors) +- Cache aggressively (embeddings, query results) +- Test thoroughly (accuracy, speed, scale) + +### Next Steps for Attendees +1. Try with your own data +2. Experiment with different embedding models +3. Tune hybrid search alpha parameter +4. Deploy to Redis Cloud +5. Integrate with your application +6. Measure and optimize + +--- + +## Additional Resources + +- **RedisVL Documentation**: https://www.redisvl.com/ +- **Redis Vector Search Guide**: https://redis.io/docs/stack/search/reference/vectors/ +- **Sentence Transformers**: https://www.sbert.net/ +- **Redis AI Resources**: https://github.com/redis-developer/redis-ai-resources +- **Redis University**: https://university.redis.com/ + +--- + +**Workshop Complete!** ๐ŸŽ‰ + +You now have the knowledge to build production-grade semantic search applications with Redis and RedisVL. \ No newline at end of file diff --git a/python-recipes/agents/02_full_featured_agent-Copy1.ipynb b/python-recipes/agents/02_full_featured_agent-Copy1.ipynb new file mode 100644 index 00000000..d4126a84 --- /dev/null +++ b/python-recipes/agents/02_full_featured_agent-Copy1.ipynb @@ -0,0 +1,1090 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "id": "qYvD2zzKobTC" + }, + "source": [ + "![Redis](https://redis.io/wp-content/uploads/2024/04/Logotype.svg?auto=webp&quality=85,75&width=120)\n", + "\n", + "# Full-Featured Agent Architecture\n", + "The following example demonstrates how to build a tool-enabled agentic workflow with a semantic cache and an allow/block list router. This approach helps reduce latency and costs in the final solution.\n", + "\n", + "Note: This notebook summarizes this [this workshop](https://github.com/redis-developer/oregon-trail-agent-workshop). For a more detailed step-by-step walkthrough of each element, please refer to the repository.\n", + "\n", + "## Let's Begin!\n", + "\"Open" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "NTFxCojYECnx" + }, + "source": [ + "# Setup\n", + "\n", + "## Packages" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": { + "id": "Zz62U5COgF21" + }, + "outputs": [], + "source": [ + "%pip install -q langchain langchain-openai \"langchain-redis>=0.2.0\" langgraph sentence-transformers" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### OPEN_AI_API key\n", + "\n", + "A open_ai_api key with billing information enabled is required for this lesson." + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "VO0i-1c9m2Kb", + "outputId": "ec942dbf-226a-426d-8964-e03831e0dd99" + }, + "outputs": [ + { + "name": "stdin", + "output_type": "stream", + "text": [ + "OPENAI_API_KEY: ยทยทยทยทยทยทยทยท\n" + ] + } + ], + "source": [ + "# NBVAL_SKIP\n", + "import os\n", + "import getpass\n", + "\n", + "\n", + "\n", + "def _set_env(key: str):\n", + " if key not in os.environ:\n", + " os.environ[key] = getpass.getpass(f\"{key}:\")\n", + "\n", + "\n", + "_set_env(\"OPENAI_API_KEY\")" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "Po4K08Uoa5HJ" + }, + "source": [ + "## Redis instance\n", + "\n", + "### For colab" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "vlF2874ZoBWu", + "outputId": "e5e7ebc0-b70c-4682-d70c-b33c584e72d4" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "deb [signed-by=/usr/share/keyrings/redis-archive-keyring.gpg] https://packages.redis.io/deb jammy main\n", + "Starting redis-stack-server, database path /var/lib/redis-stack\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "gpg: cannot open '/dev/tty': No such device or address\n", + "curl: (23) Failed writing body\n" + ] + } + ], + "source": [ + "# NBVAL_SKIP\n", + "%%sh\n", + "curl -fsSL https://packages.redis.io/gpg | sudo gpg --dearmor -o /usr/share/keyrings/redis-archive-keyring.gpg\n", + "echo \"deb [signed-by=/usr/share/keyrings/redis-archive-keyring.gpg] https://packages.redis.io/deb $(lsb_release -cs) main\" | sudo tee /etc/apt/sources.list.d/redis.list\n", + "sudo apt-get update > /dev/null 2>&1\n", + "sudo apt-get install redis-stack-server > /dev/null 2>&1\n", + "redis-stack-server --daemonize yes" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### For Alternative Environments\n", + "There are many ways to get the necessary redis-stack instance running\n", + "1. On cloud, deploy a [FREE instance of Redis in the cloud](https://redis.com/try-free/). Or, if you have your\n", + "own version of Redis Enterprise running, that works too!\n", + "2. Per OS, [see the docs](https://redis.io/docs/latest/operate/oss_and_stack/install/install-stack/)\n", + "3. With docker: `docker run -d --name redis-stack-server -p 6379:6379 redis/redis-stack-server:latest`\n", + "\n", + "## Test connection" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "My-zol_loQaw", + "outputId": "b58c2466-ee10-480c-ad4c-608cbf747e8b" + }, + "outputs": [ + { + "data": { + "text/plain": [ + "True" + ] + }, + "execution_count": 2, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "import os\n", + "from redis import Redis\n", + "\n", + "# Use the environment variable if set, otherwise default to localhost\n", + "REDIS_URL = os.getenv(\"REDIS_URL\", \"redis://localhost:6379\")\n", + "\n", + "client = Redis.from_url(REDIS_URL)\n", + "client.ping()" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "p8lqllwDoV_K" + }, + "source": [ + "# Motivation\n", + "\n", + "The goal of the workshop is to create an agent workflow that can handle five Oregon Trail-themed scenarios, mimicking situations that often arise when implementing agent workflows in practice.\n", + "\n", + "## Scenario 1 - name of the wagon leader\n", + "\n", + "**Learning goal:** Test basic LangGraph setup and execution.
\n", + "\n", + "**Question:** `What is the first name of the wagon leader?`
\n", + "**Answer:** `Art`
\n", + "**Type:** `free-form`
\n", + "\n", + "## Scenario 2 - restocking tool\n", + "\n", + "**Learning goal:** Agent interaction with custom defined tool and **structured output** for multiple choice questions.
\n", + "\n", + "**Question:** `In order to survive the trail ahead, you'll need to have a restocking strategy for when you need to get more supplies or risk starving. If it takes you an estimated 3 days to restock your food and you plan to start with 200lbs of food, budget 10lbs/day to eat, and keep a safety stock of at least 50lbs of back up... at what point should you restock?`
\n", + "**Answer:** `D`
\n", + "**Options:** `[\"A: 100lbs\", \"B: 20lbs\", \"C: 5lbs\", \"D: 80lbs\"]`
\n", + "**Type:** `multi-choice`
\n", + "\n", + "## Scenario 3 - retrieval tool\n", + "\n", + "**Learning goal:** Agent implements Retrieval Augmented Generation.\n", + "\n", + "**Question:** `Youโ€™ve encountered a dense forest near the Blue Mountains, and your party is unsure how to proceed. There is a fork in the road, and you must choose a path. Which way will you go?`
\n", + "**Answer:** `B`
\n", + "**Options:** `[\"A: take the northern trail\", \"B: take the southern trail\", \"C: turn around\", \"D: go fishing\"]`
\n", + "**Type:** `multi-choice`
\n", + "\n", + "## Scenario 4 - semantic cache\n", + "\n", + "**Learning goal:** Implement semantic cache that bypasses expensive agent workflow for known answer.
\n", + "\n", + "**Question:** `There's a deer. You're hungry. You know what you have to do...`
\n", + "**Answer:** `bang`
\n", + "**Type:** `free-form`
\n", + "\n", + "## Scenario 5 - allow/block list with router\n", + "\n", + "**Learning goal:** Implement semantic router that blocks requests for non-related topics.\n", + "\n", + "**Question:** `Tell me about the S&P 500?`
\n", + "**Answer:** `you shall not pass`
\n", + "**Type:** `free-form`
\n", + "\n", + "\n", + "\n", + "# Final Architecture\n", + "\n", + "In the end, we are building a workflow like the following:\n", + "\n", + "![diagram](../../assets/full_featured_agent.png)\n", + "\n", + "As a reminder for more detail see: [Redis Developer Oregon Trail Agent Workshop](https://github.com/redis-developer/oregon-trail-agent-workshop).\n", + "\n", + "# Defining the agent with LangGraph\n", + "\n", + "## Tools\n", + "\n", + "Tools are functions that the central LLM powered \"agent\" can determine to invoke depending on the situation.\n", + "\n", + "### Restock tool\n", + "\n", + "The first tool we will define implements the restocking formula. LLMs are designed to predict text responses, not to perform deterministic math. In this case, the agent will act as a parser, extracting the necessary information from the human query and calling the tool with the appropriate schema.\n", + "\n", + "One of the advantages of `LangGraph` is that the schema for the tool can be defined as a `pydantic` model. Note: It is also essential to include a well-written `doc_string` with the tool function so the agent can determine the appropriate situation to use the tool." + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [], + "source": [ + "from langchain_core.tools import tool\n", + "from pydantic import BaseModel, Field\n", + "\n", + "class RestockInput(BaseModel):\n", + " daily_usage: int = Field(\n", + " description=\"Pounds (lbs) of food expected to be consumed daily\"\n", + " )\n", + " lead_time: int = Field(description=\"Lead time to replace food in days\")\n", + " safety_stock: int = Field(\n", + " description=\"Number of pounds (lbs) of safety stock to keep on hand\"\n", + " )\n", + "\n", + "\n", + "@tool(\"restock-tool\", args_schema=RestockInput)\n", + "def restock_tool(daily_usage: int, lead_time: int, safety_stock: int) -> int:\n", + " \"\"\"restock formula tool used specifically for calculating the amount of food at which you should start restocking.\"\"\"\n", + " print(f\"\\n Called restock tool: {daily_usage=}, {lead_time=}, {safety_stock=} \\n\")\n", + " return (daily_usage * lead_time) + safety_stock" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Retriever tool\n", + "\n", + "Sometimes an LLM might need access to data that it was not trained on, whether because the data is proprietary, time-sensitive, or otherwise unavailable.\n", + "\n", + "In such cases, Retrieval-Augmented Generation (RAG) is often necessary. Here, a vector search is used to augment the final LLM prompt with helpful and necessary context.\n", + "\n", + "RAG and agents are not mutually exclusive. Below, we define a retriever tool that performs RAG whenever the agent determines it is necessary." + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [], + "source": [ + "from langchain.tools.retriever import create_retriever_tool\n", + "\n", + "from langchain_redis import RedisConfig, RedisVectorStore\n", + "from langchain_core.documents import Document\n", + "from langchain_openai import OpenAIEmbeddings\n" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "RedisConfig(index_name='oregon_trail', from_existing=False, key_prefix='oregon_trail', redis_url='redis://localhost:6379/0', redis_client=None, connection_args={}, distance_metric='COSINE', indexing_algorithm='FLAT', vector_datatype='FLOAT32', storage_type='hash', id_field='id', content_field='text', embedding_field='embedding', default_tag_separator='|', metadata_schema=[], index_schema=None, schema_path=None, return_keys=False, custom_keys=None, embedding_dimensions=None, legacy_key_format=True)" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "## Helper methods\n", + "\n", + "INDEX_NAME = os.environ.get(\"VECTOR_INDEX_NAME\", \"oregon_trail\")\n", + "REDIS_URL = os.environ.get(\"REDIS_URL\", \"redis://localhost:6379/0\")\n", + "\n", + "CONFIG = RedisConfig(index_name=INDEX_NAME, redis_url=REDIS_URL)\n", + "display(CONFIG)" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "20:07:47 httpx INFO HTTP Request: POST https://api.openai.com/v1/embeddings \"HTTP/1.1 200 OK\"\n", + "Init vector store with document\n", + "20:07:48 httpx INFO HTTP Request: POST https://api.openai.com/v1/embeddings \"HTTP/1.1 200 OK\"\n" + ] + } + ], + "source": [ + "def get_vector_store():\n", + " try:\n", + " CONFIG.from_existing = True\n", + " vector_store = RedisVectorStore(OpenAIEmbeddings(), config=CONFIG)\n", + " except:\n", + " print(\"Init vector store with document\")\n", + " CONFIG.from_existing = False\n", + " vector_store = RedisVectorStore.from_documents(\n", + " [doc], OpenAIEmbeddings(), config=CONFIG\n", + " )\n", + " return vector_store\n", + "\n", + "## Relevant data\n", + "\"\"\"\n", + "the northern trail, of the blue mountains, was destroyed by a flood and is no longer safe to traverse. \n", + "It is recommended to take the southern trail although it is longer.\n", + "\"\"\"\n", + "\n", + "doc = Document(\n", + " page_content=\"the northern trail, of the blue mountains, was destroyed by a flood and is no longer safe to traverse. It is recommended to take the southern trail although it is longer.\"\n", + ")\n", + "\n", + "## Retriever tool\n", + "vector_store = get_vector_store()\n", + "\n", + "retriever_tool = create_retriever_tool(\n", + " vector_store.as_retriever(),\n", + " \"get_directions\",\n", + " \"Search and return information related to which routes/paths/trails to take along your journey.\",\n", + ")\n", + "\n", + "## Store both tools in a list\n", + "tools = [retriever_tool, restock_tool]" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# State\n", + "\n", + "State is the set of messages that is passed between nodes in our graph so that the proceeding node knows what happened at the last node and so on. In this case, our state will extend the normal `MessageState` but also add a custom field for `multi_choice_responses`. " + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [], + "source": [ + "from typing import Literal\n", + "\n", + "from langgraph.graph import MessagesState\n", + "from pydantic import BaseModel, Field\n", + "\n", + "\n", + "class MultipleChoiceResponse(BaseModel):\n", + " multiple_choice_response: Literal[\"A\", \"B\", \"C\", \"D\"] = Field(\n", + " description=\"Single character response to the question for multiple choice questions. Must be either A, B, C, or D.\"\n", + " )\n", + "\n", + "\n", + "class AgentState(MessagesState):\n", + " multi_choice_response: MultipleChoiceResponse\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Nodes\n", + "\n", + "Nodes are steps in the process flow of our agent where functions can be invoked." + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": {}, + "outputs": [], + "source": [ + "from functools import lru_cache\n", + "\n", + "from langchain_core.messages import HumanMessage\n", + "from langchain_openai import ChatOpenAI\n", + "from langgraph.prebuilt import ToolNode\n", + "\n", + "\n", + "## Function definitions that invoke an LLM model\n", + "\n", + "### with tools\n", + "@lru_cache(maxsize=4)\n", + "def _get_tool_model(model_name: str):\n", + " if model_name == \"openai\":\n", + " model = ChatOpenAI(temperature=0, model_name=\"gpt-4o\")\n", + " else:\n", + " raise ValueError(f\"Unsupported model type: {model_name}\")\n", + "\n", + " model = model.bind_tools(tools)\n", + " return model\n", + "\n", + "### with structured output\n", + "@lru_cache(maxsize=4)\n", + "def _get_response_model(model_name: str):\n", + " if model_name == \"openai\":\n", + " model = ChatOpenAI(temperature=0, model_name=\"gpt-4o\")\n", + " else:\n", + " raise ValueError(f\"Unsupported model type: {model_name}\")\n", + "\n", + " model = model.with_structured_output(MultipleChoiceResponse)\n", + " return model\n", + "\n", + "### Functions for responding to a multiple choice question\n", + "def multi_choice_structured(state: AgentState, config):\n", + " # We call the model with structured output in order to return the same format to the user every time\n", + " # state['messages'][-2] is the last ToolMessage in the convo, which we convert to a HumanMessage for the model to use\n", + " # We could also pass the entire chat history, but this saves tokens since all we care to structure is the output of the tool\n", + " model_name = config.get(\"configurable\", {}).get(\"model_name\", \"openai\")\n", + "\n", + " print(\"Called multi choice structured\")\n", + "\n", + " response = _get_response_model(model_name).invoke(\n", + " [\n", + " HumanMessage(content=state[\"messages\"][0].content),\n", + " HumanMessage(content=f\"Answer from tool: {state['messages'][-2].content}\"),\n", + " ]\n", + " )\n", + " # We return the final answer\n", + " return {\n", + " \"multi_choice_response\": response.multiple_choice_response,\n", + " }\n", + "\n", + "\n", + "# Function for conditional edge\n", + "def is_multi_choice(state: AgentState):\n", + " return \"options:\" in state[\"messages\"][0].content.lower()\n", + "\n", + "\n", + "def structure_response(state: AgentState, config):\n", + " if is_multi_choice(state):\n", + " return multi_choice_structured(state, config)\n", + " else:\n", + " # if not multi-choice don't need to do anything\n", + " return {\"messages\": []}\n", + "\n", + "\n", + "system_prompt = \"\"\"\n", + " You are an oregon trail playing tool calling AI agent. \n", + " Use the tools available to you to answer the question you are presented. \n", + " When in doubt use the tools to help you find the answer.\n", + " If anyone asks your first name is Art return just that string.\n", + "\"\"\"\n", + "\n", + "\n", + "# Define the function that calls the model\n", + "def call_tool_model(state: AgentState, config):\n", + " # Combine system prompt with incoming messages\n", + " messages = [{\"role\": \"system\", \"content\": system_prompt}] + state[\"messages\"]\n", + "\n", + " # Get from LangGraph config\n", + " model_name = config.get(\"configurable\", {}).get(\"model_name\", \"openai\")\n", + "\n", + " # Get our model that binds our tools\n", + " model = _get_tool_model(model_name)\n", + "\n", + " # invoke the central agent/reasoner with the context of the graph\n", + " response = model.invoke(messages)\n", + "\n", + " # We return a list, because this will get added to the existing list\n", + " return {\"messages\": [response]}\n", + "\n", + "\n", + "# Define the function to execute tools\n", + "tool_node = ToolNode(tools)\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Graph\n", + "\n", + "The graph composes the tools and nodes into a compilable workflow that can be invoked." + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": {}, + "outputs": [], + "source": [ + "from typing import Literal, TypedDict\n", + "from langgraph.graph import END, StateGraph\n", + "\n", + "\n", + "# Define the config\n", + "class GraphConfig(TypedDict):\n", + " model_name: Literal[\"anthropic\", \"openai\"]\n", + "\n", + "# Define the function that determines whether to continue or not\n", + "def should_continue(state: AgentState):\n", + " messages = state[\"messages\"]\n", + " last_message = messages[-1]\n", + " # If there is no function call, then we respond to the user\n", + " if not last_message.tool_calls:\n", + " return \"structure_response\"\n", + " # Otherwise if there is, we continue\n", + " else:\n", + " return \"continue\"\n", + "\n", + "\n", + "# Define a new graph\n", + "workflow = StateGraph(AgentState, config_schema=GraphConfig)\n", + "\n", + "# Add nodes\n", + "workflow.add_node(\"agent\", call_tool_model)\n", + "workflow.add_node(\"tools\", tool_node)\n", + "workflow.add_node(\"structure_response\", structure_response)\n", + "\n", + "# Set the entrypoint\n", + "workflow.set_entry_point(\"agent\")\n", + "\n", + "# add conditional edge between agent and tools\n", + "workflow.add_conditional_edges(\n", + " \"agent\",\n", + " should_continue,\n", + " {\"continue\": \"tools\", \"structure_response\": \"structure_response\"},\n", + ")\n", + "\n", + "\n", + "# We now add a normal edge from `tools` to `agent`.\n", + "workflow.add_edge(\"tools\", \"agent\")\n", + "workflow.add_edge(\"structure_response\", END)\n", + "\n", + "\n", + "# This compiles it into a LangChain Runnable,\n", + "# meaning you can use it as you would any other runnable\n", + "graph = workflow.compile()\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Evaluate graph structure\n", + "\n", + "When we invoke the graph, it follows four primary steps: \n", + "\n", + "1. **Evaluate Conditional Edge**: The graph evaluates the conditional edge between tools and the agent via the `should_continue` function. This determines whether it should `continue` and call a tool or move to `structure_response` to format the output for the user. \n", + "2. **Invoke Tools**: If it decides to invoke the tools, the response from the tool is appended as a message to the state and passed back to the agent. \n", + "3. **Determine Next Step**: If tools have already been called or are deemed unnecessary, the graph moves to the `structure_response` node. \n", + "4. **Handle Multiple-Choice Questions**: If the question is identified as a **multiple-choice question** within the `structure_response` node, a model is invoked to ensure the response is returned as a literal `A, B, C, or D`, as expected by the game. Otherwise, it simply proceeds forward. " + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAUcAAAFlCAIAAAD3T70BAAAQAElEQVR4nOydB2BTxR/H72V2L8rsoJQyyxYEkSVb9gZZlSEgQ9kIshFBhuwhggICf/aSJbMIKCKy92jLKLSlLd1t1nv/X/JomjZpmpImeS/5fazh5e7eutz33e9+d+9OxDAMQRDEjhARBEHsC1Q1gtgbqGoEsTdQ1Qhib6CqEcTeQFUjiL2Bqi5Krp9NiX6akZmmlMtpZVauLkOKIoymF5GC/7SBAsLQ6g2G0BQRqEOEhFblpGAIo04PMXSuEzEUQ8ERcwdCMnVYrt3fHVagOazmjAxD51yAUEIJKErqRHmWkNRo7FUiQEIQ/kNhf7X5nNgS9+JRujyLFkkoiVQgdhKIRJQ8U5UrkUAta4rKJSpKSDEqdf6rtcvkCslOodkrW/w5BxOokzN07geHUP2hu7v2sEIRpVKqt/IcSiwVKOVEoVBlpatoGsRPeZeQNGxXvGyoE0F4C6raLA6sefUqMtPVTRRY2aVZj+ICnps+9/5KvfN3SnxMlpOzoGXfMoGVpQThIajq9+Tp7YxT22Nc3YVtw8oU9xcT++LE5tgnt1KL+zv1Hu9PEL6Bqn4foNBH3k9v3LF4tUbuxH7Z+t3zjDTliIXBBOEVqOpC8+BK6oWD8V98X444AKd+exN1P81BbtZuQFUXjsM/vYp7Lhs634FK+YX9CXf/SRrxQ3mC8AQBQUzm8rHE15FZDiVpoHG3YuVruG+cEUkQnoCqLgRXzyT2+6YscTxa9SshlggOrH1FED6AqjaVzXOelSnn7OYlJA5J2Iyy0U8y0hJVBOE8qGqTiLidmZGq6DbajzgwpYOc9qx8QRDOg6o2iQsH4koEuhDHpvtX/ukpSnkmQTgOqtokUpMU7QeXJFbk6dOnHTp0IIXnm2++OXToELEMLu6i3zdGE4TboKoLJnz3G4lU6Oxm1Rb1vXv3yHvx3juaQoVa7vGvZAThNqjqgol+munpa6kxoampqYsXL+7cuXPjxo2HDx9+8OBBCFy/fv2cOXNiYmLq1q27fft2CNm1a9fo0aObNWvWpk2bqVOnvnz5kt19586dEBIeHv7hhx8uWbIE0r969WrevHmQkliA+q2LKeU0QbgNqrpgMtKUxf0s9Z4DqPfWrVsg1L1791arVm3BggXwdcSIEQMHDixVqtTVq1f79et348YNUH7NmjVBt5A+MTFx+vTp7O4SiSQ9PR32nTt3bq9evS5dugSBM2bMAJ0TCyBxVb/Xdf9yGkE4DL5fXTAqJVOynDOxDNeuXQMBN2jQALbHjBnTsmVLLy+vPGmqV6++e/fuwMBAkUj9eykUinHjxiUnJ3t6elIUlZWVFRYWVq9ePYiSySxuHoskgpjnWVUauBGEq6CqC4ZhiLOXpYyaWrVqbdu2LSkpqU6dOh999FGVKlX00wiFQjC5ly5deufOHaiZ2UCosUHV7HZoaCixFgKKZKQrCMJh0AIvGJohYp2pDoqW2bNn9+3b9++//x4/fnyrVq3WrVunVCrzpDl//jzEVq1a9eeff/73339Xr16dJwHY4cRaaGZrIAiXwbq6YNS1U6qSWAYPD4/BgwcPGjTo5s2b586d27Rpk7u7e//+/XXTHDhwAKr0UaNGsV/BwUZsB0ja1c3e3ie3M7CuLhiKIq+fW6S9Cm1jcG5Dwxiax6BbaC2DE/vBgwf6yUqUKKH9evbsWWI7FArGtwzOkcJpUNUFI3UWxDyzyIgq8H5t2LBhypQpUFEnJCQcPXoUJA3yhijwjcXHx4Mr+9mzZxUrVrx8+TL4w8E4Zzu6gNevX+sfUCqVgv61iYkFoJV09cb2PFeEHYCqLpiSgc5JcXJiAVxdXaHLKi4ubsiQIdDtvHXr1rFjx3br1g2iGjVqBPKeOHHiH3/8MXLkyIYNG0LTGtxp0IkNnVvQxv7qq69OnDihf0yw56HtPWHChMzMon8SXf49gcIiw3lw1oSCUcnJ+mlPRi0JIQ7P1vnPpE6C3hMCCMJh8MFbMEIJkUgFRza+Jg5Pcry8UefiBOE26AM3iTrNff45EW8kwY4dO6CFbDBKJpNBc9dgFHRrWWhoJ2DkyNDkZge06LNz585SpUoZjPr959diicAvBKcK5zpogZvKz9Miy1ZxaT3A8JtbaWlpKSkpBqMgHLqvDEb5+Pg4OVlKJK9e5Tt1iZEHDTjb8hP86vFP2oaVDqnpShBug6o2lcRXyh1Lokb/6KCt6/8teqlS0v2nBRKE82C72lR8yoiCq7ttmhlFHI/r4clJ8XKUNF9AVReCdoNKSV0E2xc62Cw/KvLXkfgvF+Fc/7wBLfBCc/K3uOiIjEGzgogDEHkr49iWV6MWhRAHnYWRl6Cq34edi1+kpaqGzgkilnrpgxMcXP/65eP00dBRb9e3aX+gqt+Ts7vi719J9gt27jKqDLE7bl9KvfT7G5GQcrQlDewDVLVZbJ4TlZ6i8i4pbtjON6iaPUxCenLbm4jbqTTNVK3v1axHMYLwEFS1ubyOkJ/dHZOSoAAz1clF6OopdnEXiCRChSzn5QrtovAslIBS5zytfk1ZIKBoml0vXr0CPZ396rJAQNTblPqF5ndpWDOYeXcEBrZoonsE7VL12tPlRIkpWmFoVXqJgGGojFRVSoJMlsUoZLTUWVihlvsnvXwJwltQ1UXG/StpT2+mJScoZJkqWsUo5DkZKxAytCqnbaqWrzrb1SHv1Mt+UYfrpiEaEVPabZCoQCDQxMI+AjZxdqx6Qz1xC0MJhUSlyhUlEBFamSuERSyiKJE6vdRVGFDeuVFXFLM9gKrmDRcuXNi/f/+yZcsIghgFx4HzBiODtxFEFywlvAFVjZgIlhLegKpGTARLCW9QKBRiMU4DiBQMqpo3YF2NmAiWEt6AqkZMBEsJb0BVIyaCpYQ3gKqxXY2YAqqaN2BdjZgIlhLeAKoWCvEtZ6RgcC4U3oB1NWIiWEp4A6oaMREsJbwBR6EgJoKq5g1YVyMmgqWEN6CqERPBUsIbUNWIiWAp4Q3YrkZMBFXNG7CuRkwESwlvQFUjJoKlhDegqhETwVLCG1DViIlgKeEN6C1DTARVzRtUKhXW1YgpYCnhDV5eXqhqxBSwlPCGlJQUmUxGEKQgUNW8ASpqcJgRBCkIVDVvQFUjJoKq5g2oasREUNW8AVWNmAiqmjegqhETQVXzBlQ1YiKoat6AqkZMBFXNG1DViImgqnkDqhoxEVQ1b0BVIyaCquYNqGrERFDVvAFVjZgIqpo3oKoRE8F1tngDqFqlUhEEKQhUNW/AuhoxEbTAeQOqGjERimEYgnCYjh07RkdHU5T6lxIIBIyG0qVLHz16lCCIIdAC5zphYWFOTk6gapA0fIUN+GzSpAlBkHxAVXOdHj16+Pv764b4+fn17duXIEg+oKp5QP/+/SUSifZrnTp1AgICCILkA6qaB3Tq1KlcuXLsdsmSJUHkBEHyB1XNDwYNGuTi4gIbNWvWrFChAkGQ/EEfeFFy56+01xEZWZl5+58oAVFnc+6cBrcXm/fajbx76YSDk+y/a/9lZmZVr17Nw90j+7AUQ79LIRBQdPa2dkfwr9G07mUIGJrWTckiElEeXtKPO/kQIUHsAFR10RB1N+vk9lc0TYkllDyTzhtNaT7z5DTYSXR2bHYUTTEChsrZK2cXzWNB07mVE6iTQP3g0J6WYojmILkC1WekCS3IGwiqFqsDFTK6WClJrwn+BOE5qOoiIPqJ/PefX37Qqnjleu6Ez+xZ/sLbV9h1VBmC8BlUtbmo0siGOU/7Ty9P7ILDa19InAU9x/oRhLegt8xc9qx76V3SidgLbQcGxEdnEYTPoKrNJTVJUSbYldgLEjciFAluhCcThLfg2x3mopTRQvtaVVqlYlIScZk+HoOqNhcVDb1FNLEj1Lej9cMjPARVjSD2BqoaQewNVDWC2BuoakQPiiLYrOYzqGpED4YhODSJz6CqzYXCag3hGKhqc8ERtwjXQFWbi0BAYecuwilQ1eZC0wxlX9U1+Mrs7ZYcDFQ1khdG/Sofmh88BlWN6IMVNb9BVZuL2geOFRvCJfBNTHPRn5CMO0RGPu3TtwNBHAysq+2Zh4/ukcKj8ZShEc5jsK62AX//fWH+99N7f9b+0/aNxk8Ycf3GVW3U4d/39R/QpVOX5t8vnBkbG/NJi7pnzv7BRt29e2vylNGdOn8yIKzb2nXL0tPT2fADB3d369H6+fOoQUN6QfohX/Q58cfvEP7r5vU/LJrDHuTipXBiOgJsVPAbVLW1ycrKmr9gukwm+2bKnO/nLw8MDPp2+rjExASIuv/g7rLlC5o2bfnblv3NmrSc+91Uou4PV/9GL6NfTJw8MkuWtXrVr/PmLImIeDxu/DB2iUyxWJyWlrpy1aJJE2acPf1v0yYtFy2eC2Ie9PmIPr0HlixZ6tyZq40+bmb6FeJUdnwHVV0EFEoETk5OGzfsnDD+29q16sLfiOFjMzMzb9+5AVEnTx7x8SkGavT09GrYsEm9ug20e50+fVwsEoOe4SkQFBQ8ccKMx08eamtghUIRNnBY1arVKYpq07oDyPLJk4cEcVRQ1eZCCQqdiRkZ6atWL+7Rqy3YxmCEQ0hS0lv4jIh8UqVKNZHonbOjSeMW2l3u3r1ZuXIoqJ39WqpU6TJl/G/dvq5NALHshrtmDQCovcn7QuHQdp6D3jJzYejC1dVgG389bmid2h/O+PZ7tnZt1eZdnQxSLFGilDalVsNs1IOH9+ApoHuotxq7naUopUgTGo1wPoOqtjbh50/J5XJoVDs7O5PsWppFKnVSKhTarwmJ8dptn2K+1avXAuNc91CeHl7EAjAUI8Dqms+gqq1NSkoyGMmspIHzf57RRvn5BTx+/ED79ZKO47p8cIWTp47WrFGHdZ4BUVER/v6BBEH0wHa1uRS2VgsOrpCQEA89WODB/ufKX9euXQFLOy4uBqI+btj02bPIHf/bDO6uf69evn37hnavHj360TS9eu1ScKG/ePHspw0rBw/tDe1w4+cC2cO5Ll4Mf/MmjiAOA6raXArbAm3RvM2A/kO2/vYzNKf37dvx1ZjJrVq2AyX/uOz7Jo2bd+3Sa8vWDV27tzpwcNfQoaOJpuMKPj3cPTZt3OXs5Dz8y/4DP+9+4+Z/kybOqFihsvFzNajfqHq1WjNmTbx56xpBHAZcZ8tcVo9/UrOpT61mPsRsoPYGuzokpCL7FbqvR44K+/mnHdoQ67B17tPQhu7NupcgCD/BuppDQK/1F8P7rlj5Q0zM63v3bq9YsTA0tEb58lZfgx68ZVgw+Ax6yzhE7Vp1J4z/9viJw4OH9nJzc6/7QYMRI8baqPcYLTgeg6o2l6IVXYf2XeGPIIgZoKrNhcHZgBCOgao2H/ubDYjBV7Z4Daoa0UeAzWpeg6o2F3uc4Qg1zW9Q1ebC5RmOEMcEVY0g9gaq2lzs8O0mitDoLuMzqGpzscMRtwy6y/gNqhpB7A1UNYLYG6hqcxFLhJRISOwItDZV0wAAEABJREFUiVQoFOPbHTwGVW0uYqkgJU5G7AiVkvGr4E4Q3oKPZHNx9c2MfppG7IVb598KRVRQFQlBeAuq2ixev3597u5yAUUd//k1sQtuX0wqVgGnQ+I3OBfKe7Jz587mzZs7Ozu7u6uN1R2LXsizaL8Q91IBTkpaaXgftms7T4ZTFMUQhn2hgskdzKaEvbS7UJr/GUa9cLx+55NuSqJZWIdmco6TfQAmTzJChEIizxBE3E1OfCXrO7XsidP7Lly4sGrVKoLwE1T1+/DTTz+lpqZOnDhRN/DE5tiXTzOVClopo/Pdk8o7vJShGNOXgGfYJwOjdxzK0KhVxtQB6pSQEksErh7CzkMC3YqrQx4/flyhQoX79+9XqVKFIHwDVV0IlErlvn37evfu/ebNm+LFixPrsn79+iNHjuzdu9fJyYlYhdu3b//www+//PKLRILNbD6B7WpTUalUjRo1Cg4Ohm3rSxqeI6dOnUpMTDx06BCxFtWrV58+ffrTp08zMzMJwh9Q1QVz586de/fUC0Ffvny5Xr16xBZAM/758+dyufzAgQPWNK8qV64MRjicsU+fPgkJCQThA6jqAgAlL1mypFy5ckKhzYaaxMbGnj17lhXzixcvDh8+TKyLi4vL/PnzrWkmIOaAqs4XsHiJxtjevHmzdgEdm7B161YQM7stk8l2795NrE758uUHDx4MG7NmzYqKiiIIh0FVG2bs2LFg8RJNaSY25dmzZ+fPn9cNefny5bFjx4iNGDly5MKFCwnCYdAHnpcbN27UqlUrIiKCdYzZnDlz5oDpq100D6Bpulq1alCBE5ty/PhxyKJKlSoRhGNgXZ1DTExM/fr1PTzUq7pzRNLAtWvXoBXg7e0NrQBo20ulUuhnioyMJLYGegTmzZsXHR1NEI6BdbUa6LwBS/vhw4chISE29IoZ5+DBg+CNh64mwiXAkwePG2gUVK1alSDcAOtqAs4n8ADBBhiTnJU00XSYc/DySpYs6ebmBi3tPI1/xIY4tKrZXmgol9u2bSOch5uqBqDND418tpsA/BEEsTUOqmqFQjF06FDWy920aVPCBzirapYPP/wQPvfu3btu3TqC2BRHVHVSUlJcXNyYMWPatm1L+AO4vnU94dxk8uTJYPsQzRBXgtgIx1I1mNzguRWJRH5+fjVr1iS8AupquHLCebp16waf9+/fX7RoEUFsgaOoOj4+Hj6hQ+j06dPg3SE8BFTN/bpaS5MmTYKCgm7evAmXTRDr4hCqXrVq1YYNG2Cjffv2VnuNscjheLtan169eoWGhmZmZi5YsIAgVsTOVc2+ZuTp6Tlt2jTCc3inagCaDGAZVaxYcdmyZQSxFnar6vT09FGjRrGqHjhwIOE/4C3jnapZunfvPnLkSNjYsWMHQSyP3ao6PDwcxAy1BLEXlEolT1UNSKVS+ISWdpcuXQhiYexN1X///XdYWBjRNKHr169P7Ahe9GwZp2HDhtu3b4eN//77jyAWw35UnZGRAZ8XL15cvXo1sUf42K7Wx9XVFT59fX2hizEpKYkgFsBOVL1u3TrosoKNSZMmsVP52h/2oWqWsmXLnjlzJi4uLisriyBFDe9VzTDM1atXJRJJp06diF1jT6ommpY2eD3ASd6gQYNHjx4RpOjgsapTU1MnTJgAqq5Vq9aQIUOIvWNnqmYBVV+6dOnOnTsEKTryHYEImiHc5sKFC4MHD4YeLFMS24FZzt+eLePATbGDTEePHt2zZ0++vGzDZfJVtUzG0XUe5XK5QqEApwv7kpCJ12kHquZ1z5YprFy5ct68eahq8+GZBQ71VWZmpm1n/LQVdtCzZRy4O3b6isOHD588eZIg7wtvSgl0XEHDkqIoT09P+y7c+WGX7WqDgOMzPDz8yZMnBHkv+CEPkDR4xaBMU5SpC83ZH46jauD777/38fFJS0uz4RzJ/MX2qo6MjGzbtq1BLygomR1b4uTkxI5ecGQcStUAqNrNze3y5cs2WdWA19hG1VFRUdo3LsCi7tu3r8H16BITE8ViMdG0uIjD42iqZpk7d27t2rWJZmkkgpiGbdSiO+oAHsmgcHZaHBZwcYOjGzaKFSvGqhoh9tuzVSAVKlQgmnkOp0yZQhATKMSMOS9evFixYgWYyqVLl/74449BiuyyxhC+evXqx48fi0SiwMDAAQMGsJMHzZ8/H5rBzZs3X7p0KTiuK1euPHToUPjcunUr+0YeGN7Dhg2DJ/GXX365ZMmSatWqwS5gdX/00Ufr16/X3QUSz5w5k2ie3OzFnDp1Cg67f/9+FxcX6PLZsmXLlStX4uLiQkNDwdfCdnrZGXbfs2UcMOhCQkKIZomismXLEiR/TK2rY2Njx40bB5pZuHBhjx49zp07t3btWgh/+/YthJcoUWLNmjXLli3z9vaGBGxjGER+//79M2fOQD/kwYMHpVIpSJdo3nbu2bMn7HLixAl2+AFLVlYW7PLw4UOwtfLsYhy4kgMHDoCYQduNGzf+7rvvLly4QOwOu+/ZKhD2YQ3tslGjRsEzjiD5YGopAdmAxkCQtWrVat++fVhYGGsbQzjU2F9//TVU4H5+fqBwqGOPHDnC7gXbEAJRINdmzZq9fPmSFbw+6enpUGoLtQuLTCY7ffp0r1694Ko8PDzatGkDe9nl2/mO2a7WB4w7KIfXr1/HGdHyw1RVg6dad7Wa1q1bw/NSG66d+xLsYdA2WOPs14CAAAhht9k5AKGvIs+RoRVNNF5uNqUpu+gC54JG+AcffKANqVGjBlxVSkoKsS8ctl2tT/369evVqweqHjFiBGcHQdoQU9vVUJeCs1o/HMyhMmXK6IaAPqG+ZbcLtBi1o8215bWwRiY7DnzChAl5wqFpwK6DZx8kJSWBxyEoKIgg2YCROGjQoM2bNw8fPpwgOpiqauguNmgJQ72a52EJkobqmph82PcbWMKa60TjJ4dPaALkebgY7CrjKXfv3oUbPHr0KDtPEKKlvgaC5MbUirFixYr37t3TuijCw8OnTp0KJhCEg3+LtaKJpu4Fl7jpVQrUzCbOXA8PZt3HCrS32Q0QM1vWa2YDfnhdM57vnDx5cvHixeA7QEnrAwXv8OHDBMmNqaqGXijIQXBNX7t27dKlS7/88gtUkmA2t2vXDmxgCIdeJehygPIHha/AlW6gMgfT/a+//gJxsl3TBVKpUiXo5WbXbYZrgH3ZcFBv//79t2/fDl1ucCjwfk+bNg0c8sQu+PXXX8+fPw9GJkEMAWXSlF4SR8NUCxx0OG/evOXLl0PVAbpt2bIlNGnYcFAR+JzBLQkNb9Ae5HKB9SS4OqCTDDqfQZDQ9U00g0ON79KxY0ewAkaPHg0GQtOmTfv06QP91WwU9JMFBwfv3r37xo0bYNJXqVIF7FXCf6CLDnoKoQ+fIPkARREKBkFyk++q9OwSNnaDr68v4RVffvkl9NLhPLvIe8CVUQ04qECXzp07Dx48GCVtCvv37ydIbrii6qysLG1/mCMTHR0NzZO1a9fCJ0FMYMGCBQU23xwNrqycCk1x42PIHIErV65AK/rff/8liMn06NGDILnBdjVXOHDgwKlTp9jR9QhiDhx6WwCc2w475zt0xd27dw8l/R4cOnQIB4TngUOqht5vULV2QIvjAF2D0AD59ttvCVJ4li1bhh6ZPOTbrvby8rLJe3+JiYnsa7QOQlhYWL9+/Vq3bk2Q9wJ6CvCllzxQ6D+0Fenp6dCDtWLFitDQUIIgRQfn3sI/evTohg0biL3z6NGjdu3a7d27FyVtJseOHcMl+PLAOVW3b99+z5499u3/CA8Pnz179vnz56GZQxDzABdjcnIyQXTgSn+1LtDBQ+yX7du3X79+3S5na7EJYPI4OTkRRAcutquhor5//361atWI3bF48WKRSDRu3DiCIBaDi7PbgUtz//799vfe7Ndff122bFmUdNFy8uRJ43NgOSAcnbNy1KhRsbGxxI7o0aNHLw0EKVI2bdpkZ0XFfLBny+K8efMGerCgIY2zjlmCX375pUOHDiVKlCBINtxVdVRU1OXLl/v06UP4zI0bN6ZOnXro0CF2RQQEsQLcnTUeajYQg3YSYj4CXamrV68+fvw4StpynD17NikpiSA6cLFnSwt0RWqnFuYdGzZsiI6O3rhxI0EsydatW8H8xp5/XTi9wou3t3dgYCDhIbNnz4bPOXPmEMTCtGjRAiWdB657y3bt2gX2Fb+mcf/iiy/APQYuHIIgtoDrq7H17NkT2qWEJyiVynbt2o0cORIlbTUuXLgQFxdHEB24rmqBQHDw4EHCB549e9aoUaPNmzezq6gj1mHnzp3sLPGIFh6snCqTya5cuUK4zV9//TVhwgToisOOUyvTpEkTe1p9qUjggaqlUil0cZ08eZL9CvUh4Rh79uyB9v/evXsJYnV69+4dHBxMEB34MbYMGk7w48k0uLi4zJw5s1WrVoQbLF++HK5qypQpBLEiderU0W6zCzCqVKpy5codOHCAODyc7q9mAf+T1h1CaeDOoI5JkybVrFmzf//+BLEuDRo0+Oeff3QXVGVXXCMI9y1wsLfzeDiFQqFYLCYcoF+/fvDEwZJkEz7//HN2kWMtfn5+3bt3Jwj3VT19+vSSJUvqhohEIpuv+ZqcnNysWTNoCHzyyScEsQUffvhhlSpVtF+hSOD7cFq4ruq2bduuWLEiICBA2/4HVdu2rr537163bt2OHj1aqVIlgtiOsLCw0qVLs9v+/v6dOnUiiAYe+MBDQkLABQLtKHYiG2hK2bCuPnXq1MKFC8+cOePq6koQmwIOs6pVqxJNRd21a1ebW3DcwcbespcPZempSoZREQFFaE1tDP4PqJbZT/VXAWFo+PerQQuOHTt+89ZNASWIeyJk0lKz02v+Z7T7wj8MJRAwNM2eggI/P8mOVUcy7wKzg9UIBCQ7fa4DqmOETq6CwMrOsL1ly5YHDx5s3bqVcJvI23KZZtrNnHtX35XmxikBw9C6ITlR6oxT3zil+dTGEc1BdA6Vc1jtIQjJ+cXUz13NFpP7NNo0OufLuWbt9WX/lHlic/1e2sjWDcISoqROUmll/+YPrqaQfPpzKM0ejF5gTsGAeJrJfc5soOKjSb7olJacHMibhqL0V2jXFvg8t/Qu/wxdCBRFZ0FgVWdSEDbr2Tr0U2xMZDqcXKWkoZhp70RP1BRDG/iZDcM6RJnc+WtoR/YUOftpz2LgmJRIrEkqTVYUvzhmzBjCYf63+GXSGzncskquKYnMuzzJ/jcnN5ic3GIoQuU5joHYnEMQg3mqG697zNzbOmly/wRsspwCbcrPrbvvuwe6kTR6N6klJ08MZEUhrsTYOcxLnH0lIjGUVFIyyLnLyNLGEtpE1Sc2x0ZHZDXqULpMJX68eBz3TP7n3leevuJuX/kRrrJl3nP41T/pWdrdF9eysFteP5VfPBTj6yfpNKxUfmlsoOo9y19lpKq6fRVA+Mbh9S+gSug7mYtX/khRquUAABAASURBVOusZ97FnVoMKEkQB+DAmucSqaDPBH+DsVb3lsnJm+hMPkoa6DQiIPmNIva5nHCMa2dTFAoaJe04dB0V+DZGnploONbaqj53KEHixIMBbfnh7CK8doZz8+k8vpbm7oWTKDkWUifhpeNvDEZZW9XpyQqjLkWuwwiZ9GTO1dVZWQoBjx+VyHtB0WkphpeFtnZZUCpUKrltvO5FAq0gCjnnnkpKOa0Q4srsjoVCweRXFPEJXzgYgvOnI1wHVV04oGeb4sF4PMShsbqqoROd4nFtx9DZQ7O4hECg+0oi4hhQJL8f3eqqZij1H29Rj37iXl1N07iwkuORf2vQ2qpWSwItWASxJNZWtdp85XHHlnqMPgctcLUlhhY4ko31LXB+m4rc9Jap8xQtcCQb63vL+O3W4aa3jMrfcYLYKwIhJcjnLR6rt6uh9PG5Xc1Nbxl2ozsgtIqh8xl5ZPV2NZQ+bFcXNZqZVwmCsNjAB87rURwc7dlisGcLycHaJZSxha24b//Olq3rk6KAm3U1guhi9Xqn8N7ayMinffpyZYlJbvrAi3Bs2YGDuxf8MIsUHV27t3r1OpogVoQH48AfPrpHOAM3feBFOLbs4cOizO2YmNdJSW8JYl24rur9+3euWrMENj5pUXfkl+N69uj3/HnU8hULHz2+LxSKgoKCPw8bXrtWXTaxkSgtkObXzetv3PwPdBAaWqNPr4HVq9ciJgPdCUK7mBTMYD6MHT/s5s1rEHvy5NGf1m+7ffvGjv/9Om7s1FmzJ3fp0qtli09Hjgpbu2ZLlcqh7EH6D+jSsGFT+F3YAy5dNv/WretlSvs1btx88KAv7967NX7CCIjq17/zxx83/W7u0k/bNwobOKxP74Hs7osWz3369BGcCLY7d20xsP/QPy+ehSMcOnjWw93jxB+/H/59X2Tkk3LlQpp/0rp7t88KNEj0D3L37q0tWzc8eHDX08v7owaN4ezslM+paalw+/9cvvg2KbFSxaotW37avl0XCP92xnixSFy2bLmdu7bSNB1cLmTSxJkhIRXZ41+6dB6O9ux5pKenV0hIpa/HTClZUj15WJduLQd9PiI5OQlinZ2d69X9aPSoicWK+ZL8y5tSqdz0y9rL/1yMi4upVq1W1869GjQosmUhrW1NFrZnq1u3PlAIIO/OnbkKkn77NnH0mEElSpTa8NOONat+9fbymffdtIyMDEhpJEqLXC6HgisUCn9YuGrp4nUioejb6eOyNJPsmgh0J6i49yKzoJA+8PzyYfmPG6pUqda6dXvI7YoVKkskkoyM9MOH9079Zi4UOyMHhDoZMr96tVpLl6zr3XvgmbMnVq5aBI/UBfOXQ+z2bYdA0sYvSSwWHzl2AKSyeNEaF2eX02dO/LBoDlzDjm2Hhw4ZtXffjtVrlxZ4X3kO8jL6xcTJI7NkWatX/TpvzpKIiMfjxg8DOUHKRYvm3Lt7a+zYqZt/2Qu3vGz5AtA/hENWXL9xFTZOHLu0ZfM+n2K+02eOV2l+8qv//TNz9iTInN07j82asTA29vXylQu15921a6t6rfUDZ7b8uu/2nRubt/xEjJY3yB+4qa5deu/Y/nvTJi1mzZl8/s8zpIiwes8WYcx5ZWvP3u0SqXTihOkikfrK4Tnao1ebQ4f3fNYnzEiUdvcXL56B+OGpD8UFvs6aufDmrWvsz8xrCusDNzEf4FEBRbBPn7A6tevB1/sP7uZ3QCigUicnqK+gBENieBwU1pKHc3l4eI4ZNZH9euzYwRo1ao/9+hvY9vb2GRQ2YtGSuf37DoZt0w9y+vRxqHhBz1C1wteJE2Z81q/jxUvhzZq2hPuF2qJe3QYQPuyLMU2btvT08GL3kstlA/oPhUOB0QF3NHxEf7BZatX64Jdf1zVp3LxH976QBg448svxEyeNfPDwXuVK6pUG/PwC+vcbrN7fzR3q6keP7hvJZ5lM9sfJI30/+7xTR/XCYO0+7Xznzs2tv/0M8iam55iAEuRTQdrAW2ZOCzAi8kmFCpVZ3QJgTQX4l2Vz0EiUFn//QC8v74WLZm/b/gvkIzxcoT5xc3MjJqM2NbjXMwyXRBWmXVCofKhcKbTAA0I1CJkvzG6ctG3T8euvCr30L1jC7AaYvnfu3gRtaKNq164HgbduXzf9IMDduzcrVw5lJQ2UKlW6TBl/9iBgBu/es23d+uV//fWnQqGoVLEKxLLJwODXliJ/v0D4BJObvcfKlUPznOhB9pOuYsWcRb/c3T3S09NI/vkMxRKqcd0brFXzg4iIJympKcRkwMGTn4vHFu9smaGKxIR4eCjqhjg5O2dkZhiP0iKVSlcs+/nosYNQt0CrBn7jzwcOa9WqHTEZ9SAa7vUMq6+oMD68QuWDKesKQyGG4kvMQ3siKPGgNLgw+NNNAPWe6QcB0tJSoS4Fj0yugyQmwOeUybOhZXH23B+gbTdXt65dew8c8AUrZiepkzYxuwgU3B0AFaxUJ8rFxQU+oYXCfjXYBMovn+HCIHbM10PypE96mwi+AGIi+Vu9tnhnywxVuLi6QjNJNyQzI4N9oBqJ0iUwMOjLEWPBsrp27crxE4e/XzizbFAwayCZghGzx4a8xygAM/OBRal6Z7S7urqlZ5dv01HlM+IRtASaad2qfZPcFmmZ0v6kMECrGOpkuEfdQNbSBvGAwdyv7yCoQi9cPPfbtk1ubu69eqoXLWarWRa2DQxiZuWdlZWpjWLvt5iPr/FrMJjPxXyLQ9SE8d/mqYeKFy+auZ+t7i0zr78XzJ779+/Ag5z9ChYLWEflypU3HqUFHJKQs0RTbho2bDJ71g/weM5jpRtHbfVwcy6UwuTq++WDVKJeni4z2/yB6is+/t3MtZUqVQVzV9syP3P2D2hzqvT8ihKJNFPHeoJmZ37nKl++IripwV5l/6qF1gT9lChRuEJfPrgCeJhr1qijPQ74UEFmySnJ+w/sAsVCBQuyBx8+RD16/IDd62nEY/Bms9tsngQHq21ysNJZjxoLux1cvoKRC8gvn6GyYdf6015YUNngsoHlwH9OigLrjy1jmEKasNA4SUiIv3gxHApBx47d4VG69Mf5sbExUVERCxbOBHup3afqPgkjUVpSUpKhNwVaU+AdhaNt3/ErFEQoMYTn0IXsRTeSD1B7wMPx2vV/9c3dgICy7m7ux44fgt8Q0i9cNMs921yEbiEwm39c9j04iqHq+3njKqiOoJkdEBgEseHhp+7dvwMbVatWB08vPA5gG6rH+Pi4/K7wiyGjL10Kh3NBcxqcVXPnTR0/cQScghSGHj36we7gPAcBw23+tGHl4KG9wf8Cvmjogpo9dwpU1ImJCdCN9/jJA3Dgs3uBvw0c1FArwB94sKD/pUb12hAO/mrwtO3b9z8IBz/52nU/gl+wQkil98hnsESg2xUODrcGNwV5Ar566JQlhcGI2Wj996tJYRvWDeo3ghyfMWsidDZ+HjYMHIm//baxT98O4AWBPokVyzeyPZD+fgH5RWmpVq3m+HHToNcBWlPwte4H9X9cuh56tk2/GG6+8ljYutpIPnRs3w0qk0mTR0FnTJ69oP9mxowFK1b+0LxlPV/f4sOHfQ2SYJ3v8ORduGDlkiXzoGqCWqhN6w5Dh46GcL8y/uA5gw5bKMrLfvwJenGXLv2uY+dmUGX17jWgRfO2YJcavEKoQjes3w4yACmC3RtatcZ3834s7Fq2YGZv2rhr584tw7/sD9Um+LomTZzBtjLmzl68as1itmULBt2I4WM/bftu+Wvoow4KKt+r96fQkC5dqsx3c39kvYDQp/UmPm7Xnt/gMQFSr/tBgy809/h++QweeLBHduzcDDkA7Re4wQkTppPCYMRstPY6WwfXRcdGyfpOK4SQOMWuJZEu7sK+kwMJl9g0M1LqLOw8kltXxUdmzZ4MrizodSecZ8fCCJ9Skp5fG/A14MzBhYOb7zxSFIUzHCFabKFqPpc/hpPvPKqvyTHexOzYqVl+UVOmzG70cTOC2KC/mvC7AHKzXe04dfWGDTvyiwL/NjGbObMXEZ4g4I63jCH8ng+cq1MJMQ4yFwq4rwiigc7fW2YDCxwbgEUOzm+E6IIzHBUO9QRH3Fy7A2doQbKxwYhRXpc/9SAa/o8tQ+wb7NmyB7CuRnRBVdsDWFcjuli/Xc3v8icUUkLuPQmxrkZ0sX67mt/lT6ViVNybOkXAyRFviK1AC9wewMXzEF1Q1faA44wYRUzB2qoWi4UiKY+NRYlEKHXinGNAIhWIuXdViEWROAklEsM/urWLgnsxCa3isaqVStrVu+B5vKyMxFmoKtyEAgjvoZWMh7fYYJS1Vd2kq49CTssLPcUVV5Bnqpq2L0E4RvUGPqlJCoI4EnKZ6pNuxQ1G2cBs86/gcnD9M8JD9i57XqyMs3MRvBpUxFRt6OLsKvx9PS5n5Sjs/fFZ6UAXko/VSNnkdeHLR9/evpRc9SOfGk1MnifVpty9lHrnr4SyVZxb9SuaWSAtwZ4V0enJdM0m3iG1CzHDOcIv7lxMgaJY6QP3Jt3ynd6UstUkAOG7Ex7fSlVkqWiazu2/pbTdNEx+L3gxFNGbC5lRv/nMFJxSL4RhtO9MUzrfmOwroQRCSiwRBFRybRvGOds7D4fWvY55lqleNkhpYFSA4Sxio2jDb90Y20UTZfA30slSNmWuNHli9Q5r7K2+/K4zG8pIH1/uIxtMaXR3hqKMrjtjJK9MPcK7nDF8GRApFFIiqSA41L1lX2MzFlM2ntpDRZITc88vK8iZsP5dJmkvUKPHXNMZMu/Cc75SOYHqxNkS1iZkcifOOSCTs4v6K51dZglx8xQKOecgM4Y8k2SmGZpqO09p0fmaX3HWZkK+R6Ny/UDsdt6j5X/egiILSAzfN/y0Ibh8cMsWLUlhzmtM0/nkxbvCwxjeXTfW0EGJ8evJFZLPBQihKPoIiQmLtNi6v1pIPIvbxRqTXELirPaKE8cgQxkncfXHUqQLjkJB+I1SqdQui4WwYHYg/EahUIjFYoLogKpG+A3W1fpgdiD8BlWtD2YHwm9Q1fpgdiD8BlWtD2YHwm9A1egtywOqGuE3WFfrg9mB8BtUtT6YHQi/QVXrg9mB8BuFQoGqzgNmB8JvsK7WB7MD4Teoan0wOxB+g6rWB7MD4Teoan0wOxB+g+9s6YOqRvgN1tX6YHYg/AZVrQ9mB8Jj1FNZqhf6xXVLcoGqRngMNqoNgqpGeAya3wbBHEF4DKraIJgjCI9RqVQ1atQgSG5Q1QiPEQqFt2/fJkhuUNUIjwHzG4xwguQGVY3wGFS1QVDVCI9BVRsEVY3wGGhXg8OMILnBQTkIvwFhY3WdB1Q1wm/QCNcHLXCE36Cq9UFVI/wGVa0PqhrhN6hqfVDVCL9BVeuDqkb4jVgsVigUBNEBVY3wG6yr9UFVI/wGVa0PqhrhN6hqfVDVCL9BVeuDqkb4DapaH4phGIIgfKN27doCgSBP6S1TpsysD1AbAAAHvUlEQVSRI0eIw4PjwBFeEhISQlGUQAfo4ho4cCBBUNUIT/nss89cXFx0Q/z8/Dp16kQQVDXCU7p16+bv76/9Cq3rzp07Ozk5EQRVjfAXsLe11TVU1F26dCGIBlQ1wlfatm1bvnx52IAGdrNmzTw9PQmiAVWN8JiwsDAPDw8wxXv27EmQbLBnC7E4Ny+k3L2ckp6kUCppRkVoKHI0lDqKEIYmlICwRZCiCEMYCookzVACSh3GsIGaWG1i9pjaKJ0NdaI8septBurynEKum0x75JwvsB+41oWURCrw8pXUauZVvqYL4RuoasSC7F/9KiYqEzZEUpGzh9TNy9nFW0oJxIRRMazyshVG0Wp90RpxvwsB7QvUn2yUupiqJZ+zC6PRK8SqHwUE9mXUybWC1iQm6nAi0JEyewo2ij0UpasAAaXMUmakyNKTMmRpCoVMKRRRQVVc24aVJPwBVY1YhOObY57eSpM4i3yDivn486+60xL7JOltdApD03WaF6vf1ovwAVQ1UvRsmhkll9HBtctIPe1kGdq3L9NjnsS7eYoGTAsknAdVjRQx6yZHePi6+lX3JXZHxJXXSpl82PfBhNugqpGiZM2Ep35VSnj58djkNk7UfzFKmWLovCDCYVDVSJGxZuKTstVKuZV0JnZN9J2E9KT0YfPLEa6C/dVI0bBhGhjebnYvacCvWjHo+9q24AXhKqhqpAg4uimGoamAmsWJY1ChoV/KW+XtP1MIJ0FVI0VA1N30cnX9iSPhG+Bx6Wg84SSoasRc9qyIlriIJa6OVZZKhHjRDPXnPi4KG1WNmMubF1mlQrwJV1m86rN9vy8iFsCzuMuDa6mEe6CqEbO4ejIJOlHcS9ptV5YR/Kr5yrNUmcmc60VCVSNm8ehGqsTZTgaQvQdCgeDPQ3GEY+Aco4hZJMfL3Yu7EcugUimPn15//9GlpKSYcmVrNqzfs2qlj9moWQvatGkxLD0j6eTZjVKJc6UKDTp/Ot7DQz2gLSYuYue+ubFvIkOCP2jZdDCxJBIXSewLGeEYWFcjZqFSMm7FLGV+Hziy5MLf/2tUv+e0CQerhzbfuvObW3fOslFCoTj84jaKEsydenLyV7sjn93849zPEK5UKjZuHevlWWLyV7vatx4NaVJTLejQkriKM1I5N28xqhoxC4ZhXL0tMluYQiG7euNo88ZhH33YzdXFs/4HnWrXaHMqfJM2ga+Pf8umg5yd3aGKrhTS4GX0Awi8fe9cUnJsp0/HeXuVKlUiuGuHiZlZFnRoObmIaSW2qxH7gqIosZNFStGLV/eVSnnFkPrakPJBdV7HPknPSGa/+vtV0UY5O3tkydJgIz7hhUTs5ONdmg33cPf18rTgq9FCiZCDQ66xXY2YCa1SgT1MipysTLVK12wclic8NS0Bqm7NJqW/V0ZmikSaq0UgFllw4lFGoco96wInQFUjZkFRwqxkuauPhBQ1rOurR+epvj4BuuHenqWM7OXi7CGTZeiGZMnSicWQyVRiiQUeaeaBqkbMQiim0hIyLKHq4sUCxWIpbIArmw1JTUuEZrxUasw55+1VWqHIAkO9dMkQ+Br9+lFK6htiMWTpcqkz55qx2K5GzMLFXZSemEksAKi39SdfnDq3KeLZDYVSDt7vDZvH7D9SwCix0CpNRCLJnoML5PKs5JQ323ZPd3Gx4IzCikylTwkp4RhYVyNm4V/B5ZHFRk1+0nhAmdIVz13Y+vjpv05ObkEB1Xt2nmZ8F2cntyH9fzx6cvX0+c3BbQadW9du/UERS6GQK6t+xLk31XDWBMRc1kx8WrVxEFX0NjjXeRORkvD87YgfODfhEVrgiLm4e4ujbsUQx+Pty+TS5bg4SwRa4Ii5tO1fas/K50YSbP7flCcRVw1GqVRKodBwIezTbWa1Kk1JEXH2zy1nL2w1GOUsdcvU9HXrMyxsZaB/qMGozGS5QqHqPKI04R5ogSNFwLaFz+VZVHD9MgZjU1ITlErDg6XlCplEbNjb5AaOdUmRdTVnZqbmN8gM/Gr5ncjd3VcsMty0ePjnS78QaYchxrrZbAWqGika1k56GlC9pHtx+5+3DIi+m5DxNv0Lrk5IiO1qpGjoOMT/+S3OvZNoCeRpqrevUzgraYKqRoqKgMrShu19756OJPYNTR79/XzYdyGEw6AFjhQlcc/ke1e/LPehv7Mb58ZRmk/is9RXjxJGLy5PuH1zqGqkiLl3Oe3c3lhXL+egD/i0jmSBPL38SpGl4GDvtD6oasQibJ4TlZGicivpFsj/BbeirsWmv83yKSn5bBI/ZkdGVSOW4vq55H9PJSrktNhJ5O7j4h3g6cQfszwjSZ74MiXjbaZCpnT1FLfsW8o/hHPjvfMDVY1YlpcP5X8fi0uMUSgUtEBAKIH6dWRaSedKxOR+V5rSrDivG6QO0XudmtLsqBtAUTRDU+T9x31TQoH6qDRDUUQsFRT3d2rVt5Srp+UGklsEVDViPV48ykyIUWSmqpQKRU4oaJEBqefonCICdbHUzEZAafSsTqOZTSnXXurSS5OcROyR4JnBaEPUGyy05qmg8yDQPALUjwvtWWFTIhG6eIhLBDqVDOTxuHZUNYLYGzgOHEHsDVQ1gtgbqGoEsTdQ1Qhib6CqEcTeQFUjiL3xfwAAAP//51WuVQAAAAZJREFUAwCehU/TZDj0NgAAAABJRU5ErkJggg==", + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "from IPython.display import Image, display\n", + "\n", + "display(Image(graph.get_graph(xray=True).draw_mermaid_png()))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Run scenarios\n", + "\n", + "Note: LLMs are fundamentally probabilistic so wrong answers are possible even if implemented correctly.\n", + "\n", + "## Scenario 1 - name of wagon leader\n", + "\n", + "This test confirms that our graph has been setup correctly and can handle a case where tools don't need to be invoked." + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + " Question: What is the first name of the wagon leader? \n", + "\n", + "20:14:46 httpx INFO HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "\n", + " Agent response: Art\n", + "\n" + ] + } + ], + "source": [ + "scenario = {\n", + " \"question\": \"What is the first name of the wagon leader?\",\n", + " \"answer\": \"Art\",\n", + " \"type\": \"free-form\",\n", + "}\n", + "\n", + "print(f\"\\n Question: {scenario['question']} \\n\")\n", + "\n", + "res = graph.invoke({\"messages\": scenario[\"question\"]})\n", + "\n", + "print(f\"\\n Agent response: {res['messages'][-1].content}\\n\")\n", + "\n", + "assert res[\"messages\"][-1].content == scenario[\"answer\"]" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "{'messages': [HumanMessage(content='What is the first name of the wagon leader?', additional_kwargs={}, response_metadata={}, id='7dfc1edc-6c87-4e34-98e3-c2363d1b16f6'),\n", + " AIMessage(content='Art', additional_kwargs={'refusal': None}, response_metadata={'token_usage': {'completion_tokens': 2, 'prompt_tokens': 216, 'total_tokens': 218, 'completion_tokens_details': {'accepted_prediction_tokens': 0, 'audio_tokens': 0, 'reasoning_tokens': 0, 'rejected_prediction_tokens': 0}, 'prompt_tokens_details': {'audio_tokens': 0, 'cached_tokens': 0}}, 'model_name': 'gpt-4o-2024-08-06', 'system_fingerprint': 'fp_cbf1785567', 'id': 'chatcmpl-CVo7q7cgjGy7H1kIqZjL09VzvCGsR', 'service_tier': 'default', 'finish_reason': 'stop', 'logprobs': None}, id='run--3c562cc8-e156-4a41-acd0-ac1e5f642214-0', usage_metadata={'input_tokens': 216, 'output_tokens': 2, 'total_tokens': 218, 'input_token_details': {'audio': 0, 'cache_read': 0}, 'output_token_details': {'audio': 0, 'reasoning': 0}})]}" + ] + }, + "execution_count": 15, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "res" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Scenario 2 - restocking tool\n", + "\n", + "In this test we want to see the agent choose the restocking tool and choose to use the multiple choice output." + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + " Question: In order to survive the trail ahead, you'll need to have a restocking strategy for when you need to get more supplies or risk starving. If it takes you an estimated 3 days to restock your food and you plan to start with 200lbs of food, budget 10lbs/day to eat, and keep a safety stock of at least 50lbs of back up... at what point should you restock? \n", + "\n", + "TEST: [HumanMessage(content=\"In order to survive the trail ahead, you'll need to have a restocking strategy for when you need to get more supplies or risk starving. If it takes you an estimated 3 days to restock your food and you plan to start with 200lbs of food, budget 10lbs/day to eat, and keep a safety stock of at least 50lbs of back up... at what point should you restock?, options: A: 100lbs B: 20lbs C: 5lbs D: 80lbs\", additional_kwargs={}, response_metadata={})]\n", + "20:15:31 httpx INFO HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "\n", + " Called restock tool: daily_usage=10, lead_time=3, safety_stock=50 \n", + "\n", + "20:15:33 httpx INFO HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "Called multi choice structured\n", + "20:15:35 httpx INFO HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "\n", + " Agent response: D\n" + ] + } + ], + "source": [ + "# helper function for multi-choice questions\n", + "def format_multi_choice_question(q):\n", + " question = q[\"question\"]\n", + " options = q.get(\"options\", \"\")\n", + " formatted = f\"{question}, options: {' '.join(options)}\"\n", + " print(f\"TEST: {[HumanMessage(content=formatted)]}\")\n", + " return [HumanMessage(content=formatted)]\n", + "\n", + "scenario = {\n", + " \"question\": \"In order to survive the trail ahead, you'll need to have a restocking strategy for when you need to get more supplies or risk starving. If it takes you an estimated 3 days to restock your food and you plan to start with 200lbs of food, budget 10lbs/day to eat, and keep a safety stock of at least 50lbs of back up... at what point should you restock?\",\n", + " \"answer\": \"D\",\n", + " \"options\": [\"A: 100lbs\", \"B: 20lbs\", \"C: 5lbs\", \"D: 80lbs\"],\n", + " \"type\": \"multi-choice\",\n", + " }\n", + "\n", + "print(f\"\\n Question: {scenario['question']} \\n\")\n", + "\n", + "res = graph.invoke({\"messages\": format_multi_choice_question(scenario)})\n", + "\n", + "print(f\"\\n Agent response: {res['multi_choice_response']}\")\n", + "\n", + "assert res[\"multi_choice_response\"] == scenario[\"answer\"]\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Scenario 3 - retriever tool\n", + "\n", + "In this test, we want to see the retrieval tool invoked and multiple choice structured response." + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + " Question: Youโ€™ve encountered a dense forest near the Blue Mountains, and your party is unsure how to proceed. There is a fork in the road, and you must choose a path. Which way will you go? \n", + "\n", + "TEST: [HumanMessage(content='Youโ€™ve encountered a dense forest near the Blue Mountains, and your party is unsure how to proceed. There is a fork in the road, and you must choose a path. Which way will you go?, options: A: take the northern trail B: take the southern trail C: turn around D: go fishing', additional_kwargs={}, response_metadata={})]\n", + "20:16:10 httpx INFO HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "20:16:12 httpx INFO HTTP Request: POST https://api.openai.com/v1/embeddings \"HTTP/1.1 200 OK\"\n", + "20:16:13 httpx INFO HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "Called multi choice structured\n", + "20:16:14 httpx INFO HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "\n", + " Agent response: B\n" + ] + } + ], + "source": [ + "scenario = {\n", + " \"question\": \"Youโ€™ve encountered a dense forest near the Blue Mountains, and your party is unsure how to proceed. There is a fork in the road, and you must choose a path. Which way will you go?\",\n", + " \"answer\": \"B\",\n", + " \"options\": [\n", + " \"A: take the northern trail\",\n", + " \"B: take the southern trail\",\n", + " \"C: turn around\",\n", + " \"D: go fishing\",\n", + " ],\n", + " \"type\": \"multi-choice\",\n", + " }\n", + "\n", + "print(f\"\\n Question: {scenario['question']} \\n\")\n", + "\n", + "res = graph.invoke({\"messages\": format_multi_choice_question(scenario)})\n", + "\n", + "print(f\"\\n Agent response: {res['multi_choice_response']}\")\n", + "\n", + "assert res[\"multi_choice_response\"] == scenario[\"answer\"]" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Scenario 4 - Semantic caching\n", + "\n", + "Agent workflows are highly flexible and capable of handling a wide range of scenarios, but this flexibility comes at a cost. Even in our simple example, there can be multiple large-context LLM calls in the same execution, leading to high latency and increased service costs by the end of the month.
\n", + "\n", + "A good practice is to cache answers to known questions. Chatbot interactions are often fairly predictable, particularly in support or FAQ-type use cases, making them excellent candidates for caching.\n", + "\n", + "\n", + "![diagram](../../assets/cache_diagram.png)\n", + "\n", + "## Creating a cache" + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "20:19:03 sentence_transformers.SentenceTransformer INFO Use pytorch device_name: mps\n", + "20:19:03 sentence_transformers.SentenceTransformer INFO Load pretrained SentenceTransformer: redis/langcache-embed-v1\n" + ] + }, + { + "data": { + "text/plain": [ + "'oregon_trail_cache:602ac35f09671fc9e2a4f4902c6f82f06b9560ea6b5a5dd3e9218fcc1ff47e52'" + ] + }, + "execution_count": 19, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "import warnings\n", + "from redisvl.extensions.llmcache import SemanticCache\n", + "warnings.filterwarnings(\"ignore\")\n", + "\n", + "hunting_example = \"There's a deer. You're starving. You know what you have to do...\"\n", + "\n", + "semantic_cache = SemanticCache(\n", + " name=\"oregon_trail_cache\",\n", + " redis_url=REDIS_URL,\n", + " distance_threshold=0.1,\n", + ")\n", + "\n", + "semantic_cache.store(prompt=hunting_example, response=\"bang\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Testing the cache" + ] + }, + { + "cell_type": "code", + "execution_count": 20, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + " Question: There's a deer. You're hungry. You know what you have to do... \n", + "\n", + "Cache hit: [{'response': 'bang', 'key': 'oregon_trail_cache:602ac35f09671fc9e2a4f4902c6f82f06b9560ea6b5a5dd3e9218fcc1ff47e52'}]\n", + "Response time 0.057869911193847656s\n", + "\n", + " Question: Youโ€™ve encountered a dense forest near the Blue Mountains, and your party is unsure how to proceed. There is a fork in the road, and you must choose a path. Which way will you go? \n", + "\n", + "Invoking agent\n", + "TEST: [HumanMessage(content='Youโ€™ve encountered a dense forest near the Blue Mountains, and your party is unsure how to proceed. There is a fork in the road, and you must choose a path. Which way will you go?, options: A: take the northern trail B: take the southern trail C: turn around D: go fishing', additional_kwargs={}, response_metadata={})]\n", + "20:19:35 httpx INFO HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "20:19:36 httpx INFO HTTP Request: POST https://api.openai.com/v1/embeddings \"HTTP/1.1 200 OK\"\n", + "20:19:37 httpx INFO HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "Called multi choice structured\n", + "20:19:37 httpx INFO HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "Response time 3.039124011993408s\n" + ] + } + ], + "source": [ + "import time\n", + "\n", + "scenarios = [\n", + " {\n", + " \"question\": \"There's a deer. You're hungry. You know what you have to do...\",\n", + " \"answer\": \"bang\",\n", + " \"type\": \"cache_hit\",\n", + " },\n", + " {\n", + " \"question\": \"Youโ€™ve encountered a dense forest near the Blue Mountains, and your party is unsure how to proceed. There is a fork in the road, and you must choose a path. Which way will you go?\",\n", + " \"answer\": \"B\",\n", + " \"options\": [\n", + " \"A: take the northern trail\",\n", + " \"B: take the southern trail\",\n", + " \"C: turn around\",\n", + " \"D: go fishing\",\n", + " ],\n", + " \"type\": \"multi-choice\",\n", + " }\n", + "]\n", + "\n", + "for scenario in scenarios:\n", + " print(f\"\\n Question: {scenario['question']} \\n\")\n", + "\n", + " start = time.time()\n", + "\n", + " cache_hit = semantic_cache.check(prompt=scenario[\"question\"], return_fields=[\"response\"])\n", + "\n", + " if not cache_hit:\n", + " print(\"Invoking agent\")\n", + " res = graph.invoke({\"messages\": format_multi_choice_question(scenario)})\n", + " else:\n", + " \n", + " print(f\"Cache hit: {cache_hit}\")\n", + "\n", + " response_time = time.time() - start\n", + "\n", + " print(f\"Response time {response_time}s\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Scenario 5 - Allow/block list router\n", + "\n", + "When ChatGPT first launched, there was a famous example where a car dealership accidentally made one of the latest language models available for free to everyone. They assumed users would only ask questions about cars through their chatbot. However, a group of developers quickly realized that the model was powerful enough to answer coding questions, so they started using the dealership's chatbot for free.
\n", + "\n", + "To prevent this kind of misuse in your system, adding an allow/block router to the front of your application is essential. Fortunately, this is very easy to implement using `redisvl`.\n", + "\n", + "![diagram](../../assets/router_diagram.png)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Creating the router" + ] + }, + { + "cell_type": "code", + "execution_count": 21, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "20:20:18 sentence_transformers.SentenceTransformer INFO Use pytorch device_name: mps\n", + "20:20:18 sentence_transformers.SentenceTransformer INFO Load pretrained SentenceTransformer: sentence-transformers/all-mpnet-base-v2\n" + ] + } + ], + "source": [ + "from redisvl.extensions.router import Route, SemanticRouter\n", + "\n", + "# Semantic router\n", + "blocked_references = [\n", + " \"thinks about aliens\",\n", + " \"corporate questions about agile\",\n", + " \"anything about the S&P 500\",\n", + "]\n", + "\n", + "blocked_route = Route(name=\"block_list\", references=blocked_references)\n", + "\n", + "router = SemanticRouter(\n", + " name=\"bouncer\",\n", + " routes=[blocked_route],\n", + " redis_url=REDIS_URL,\n", + " overwrite=False,\n", + ")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Testing the router" + ] + }, + { + "cell_type": "code", + "execution_count": 22, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + " Question: Tell me about the S&P 500? \n", + "\n", + "Blocked!\n" + ] + } + ], + "source": [ + "scenario = {\n", + " \"question\": \"Tell me about the S&P 500?\",\n", + " \"answer\": \"you shall not pass\",\n", + " \"type\": \"action\",\n", + " }\n", + "\n", + "print(f\"\\n Question: {scenario['question']} \\n\")\n", + "\n", + "blocked_topic_match = router(scenario[\"question\"], distance_threshold=0.2)\n", + "\n", + "assert blocked_topic_match.name == \"block_list\"\n", + "\n", + "print(\"Blocked!\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Putting it all together\n", + "\n", + "Once you have defined all the pieces, connecting the various aspects of the full architecture becomes easy and you can tie them together with whatever logic you wish. \n", + "\n", + "This could be as simple as:" + ] + }, + { + "cell_type": "code", + "execution_count": 23, + "metadata": {}, + "outputs": [], + "source": [ + "def respond_to_question(question):\n", + " blocked_topic_match = router(question, distance_threshold=0.2)\n", + "\n", + " if blocked_topic_match.name == \"block_list\":\n", + " print(\"App block logic - short circuit\")\n", + " return\n", + "\n", + " cache_hit = semantic_cache.check(prompt=question, return_fields=[\"response\"])\n", + "\n", + " if cache_hit:\n", + " print(\"Cache hit - short circuit\")\n", + " return cache_hit\n", + " \n", + " return graph.invoke({\"messages\": question})\n" + ] + } + ], + "metadata": { + "colab": { + "provenance": [] + }, + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.12.6" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} diff --git a/python-recipes/context-engineering/.env.example b/python-recipes/context-engineering/.env.example new file mode 100644 index 00000000..a75ab0a0 --- /dev/null +++ b/python-recipes/context-engineering/.env.example @@ -0,0 +1,2 @@ +# OpenAI API Key (required to pass to the API container) +OPENAI_API_KEY=your-openai-api-key-here diff --git a/python-recipes/context-engineering/.gitignore b/python-recipes/context-engineering/.gitignore new file mode 100644 index 00000000..03300719 --- /dev/null +++ b/python-recipes/context-engineering/.gitignore @@ -0,0 +1,2 @@ +venv +.env diff --git a/python-recipes/context-engineering/COURSE_SUMMARY.md b/python-recipes/context-engineering/COURSE_SUMMARY.md new file mode 100644 index 00000000..4504d9e3 --- /dev/null +++ b/python-recipes/context-engineering/COURSE_SUMMARY.md @@ -0,0 +1,821 @@ +# Context Engineering Course - Complete Syllabus + +**A comprehensive, hands-on course teaching production-ready context engineering for AI agents.** + +--- + +## ๐Ÿ“Š Course Overview + +**Duration**: 18-23 hours +**Format**: Self-paced, hands-on Jupyter notebooks +**Level**: Intermediate to Advanced +**Prerequisites**: Python, basic AI/ML understanding, familiarity with LLMs + +### What You'll Build + +A complete **Redis University Course Advisor Agent** that: +- Helps students find courses using semantic search with Redis and RedisVL +- Remembers student preferences and goals across sessions using Agent Memory Server +- Provides personalized recommendations based on student profile +- Uses intelligent tool selection with LangGraph +- Optimizes context for production deployment with cost management + +### Technologies Used + +- **Python 3.10+** - Primary programming language +- **Redis 8.0+** - Vector storage and caching +- **LangChain 0.2+** - LLM application framework +- **LangGraph 0.2+** - Stateful agent workflows +- **Agent Memory Server 0.12.3+** - Memory management +- **OpenAI GPT-4** - Language model +- **RedisVL** - Vector search library +- **Pydantic** - Data validation and models + +--- + +## ๐Ÿ“– Course Structure + +### **Section 1: Context Engineering Foundations** (2-3 hours) + +**Notebooks**: 2 | **Prerequisites**: None + +#### Notebooks +1. **What is Context Engineering?** - Four context types, principles, and architecture +2. **Context Assembly Strategies** - How to combine contexts effectively + +#### Learning Outcomes +- โœ… Understand the four context types (system, user, retrieved, conversation) +- โœ… Learn context assembly strategies and patterns +- โœ… Grasp the importance of context engineering in AI systems +- โœ… Understand the role of context in LLM performance + +#### Key Concepts +- **Four Context Types**: System, User, Retrieved, Conversation +- **Context Assembly**: How to combine different context sources +- **Context Optimization**: Managing context window limits +- **Production Considerations**: Scalability, cost, performance + +#### Reference Agent Components Used +None (pure theory and conceptual foundation) + +--- + +### **Section 2: Retrieved Context Engineering** (3-4 hours) + +**Notebooks**: 1 | **Prerequisites**: Section 1 + +#### Notebooks +1. **Engineering Retrieved Context with RAG** - Vector embeddings, semantic search, course recommendations + +#### Learning Outcomes +- โœ… Implement vector embeddings with OpenAI +- โœ… Build semantic search with Redis and RedisVL +- โœ… Create a course recommendation system +- โœ… Understand RAG architecture patterns +- โœ… Ingest and query vector data + +#### Key Concepts +- **Vector Embeddings**: Converting text to numerical representations +- **Semantic Search**: Finding similar items using vector similarity +- **RAG Pattern**: Retrieval Augmented Generation +- **Redis Vector Search**: Using Redis for vector storage and retrieval +- **Course Catalog Management**: Storing and querying course data + +#### Reference Agent Components Used +- `CourseManager` - Course storage and semantic search +- `redis_config` - Redis configuration and connection management +- `CourseGenerator` - Generate sample course data +- `CourseIngestionPipeline` - Ingest courses into Redis + +#### Key Patterns +- Vector index creation and management +- Semantic search with similarity scoring +- Hybrid search (keyword + semantic) +- Course recommendation algorithms + +--- + +### **Section 3: Memory Systems for Context Engineering** (4-5 hours) + +**Notebooks**: 3 | **Prerequisites**: Sections 1-2 + +#### Notebooks +1. **Working and Long-Term Memory** - Working memory, long-term memory, Agent Memory Server +2. **Combining Memory with Retrieved Context** - Combining memory with RAG, building stateful agents +3. **Managing Long Conversations with Compression Strategies** - Compression strategies for long conversations + +#### Learning Outcomes +- โœ… Implement working memory (session-scoped) and long-term memory (cross-session) +- โœ… Use Agent Memory Server for automatic memory extraction +- โœ… Apply memory extraction strategies (discrete, summary, preferences) +- โœ… Implement working memory compression (truncation, priority-based, summarization) +- โœ… Build memory-enhanced RAG systems +- โœ… Create stateful agents with persistent memory + +#### Key Concepts +- **Dual Memory System**: Working memory + Long-term memory +- **Working Memory**: Session-scoped, task-focused context +- **Long-term Memory**: Cross-session, persistent knowledge +- **Memory Extraction**: Automatic extraction of important facts +- **Memory Extraction Strategies**: Discrete (facts), Summary (summaries), Preferences (user preferences) +- **Working Memory Compression**: Truncation, Priority-Based, Summarization +- **Agent Memory Server**: Production-ready memory management + +#### Reference Agent Components Used +- Data models: `Course`, `StudentProfile`, `DifficultyLevel`, `CourseFormat`, `Semester` +- Enums for type safety and validation +- `CourseManager` for course operations + +#### Key Patterns +- Memory extraction strategies (discrete vs. summary) +- Working memory compression techniques +- Cross-session memory persistence +- Memory-enhanced RAG workflows +- Automatic memory extraction with Agent Memory Server + +--- + +### **Section 4: Tool Selection & LangGraph** (5-6 hours) + +**Notebooks**: 3 | **Prerequisites**: Sections 1-3 + +#### Notebooks +1. **Tools and LangGraph Fundamentals** - Tool creation, LangGraph basics, state management +2. **Redis University Course Advisor Agent** - Complete production agent with all features +3. **Course Advisor with Compression** - Enhanced agent demonstrating compression strategies + +#### Learning Outcomes +- โœ… Create and orchestrate multiple tools +- โœ… Build stateful agents with LangGraph +- โœ… Implement semantic tool selection +- โœ… Manage agent state and conversation flow +- โœ… Apply compression in production agents +- โœ… Build complete production-ready agents + +#### Key Concepts +- **Tool Creation**: Defining tools with schemas and descriptions +- **LangGraph**: Stateful agent workflow framework +- **State Management**: Managing agent state across turns +- **Tool Orchestration**: Coordinating multiple tools +- **Semantic Tool Selection**: Choosing tools based on context +- **Production Agents**: Building scalable, production-ready agents + +#### Reference Agent Components Used +- All data models and enums +- `CourseManager` for course operations +- `redis_config` for Redis connections +- Agent Memory Server integration + +#### Key Patterns +- LangGraph StateGraph for agent workflows +- Tool binding and invocation +- State persistence with checkpointers +- Multi-turn conversations +- Working memory compression in production + +--- + +### **Section 5: Optimization & Production** (4-5 hours) + +**Notebooks**: 3 | **Prerequisites**: Sections 1-4 | **Status**: โœ… Complete + +#### Notebooks +1. **Measuring and Optimizing Performance** - Token counting, cost tracking, performance metrics +2. **Scaling with Semantic Tool Selection** - ๐Ÿ†• **RedisVL Semantic Router & Semantic Cache** +3. **Production Readiness and Quality Assurance** - Validation, monitoring, error handling + +#### Learning Outcomes +- โœ… Implement token counting and budget management +- โœ… Optimize context assembly for cost reduction +- โœ… ๐Ÿ†• **Use RedisVL Semantic Router for production tool selection** +- โœ… ๐Ÿ†• **Implement Semantic Cache for 92% latency reduction** +- โœ… ๐Ÿ†• **Apply industry-standard semantic routing patterns** +- โœ… Build production monitoring and analytics +- โœ… Handle errors and edge cases gracefully +- โœ… Deploy scalable AI agents +- โœ… Implement advanced tool selection strategies + +#### Key Concepts +- **Token Counting**: Accurate token estimation for cost management +- **Token Budgets**: Allocating context window space efficiently +- **Cost Optimization**: Reducing LLM API costs +- **๐Ÿ†• Semantic Routing**: Production-ready tool selection with RedisVL +- **๐Ÿ†• Semantic Caching**: Intelligent caching for similar queries +- **Performance Monitoring**: Tracking agent performance metrics +- **Production Deployment**: Scaling to thousands of users +- **Error Handling**: Graceful degradation and recovery + +#### ๐Ÿ†• RedisVL Extensions Used (Notebook 2) +- **`SemanticRouter`**: Production-ready semantic routing for tool selection + - Automatic index creation and management + - Route-based tool organization + - Distance threshold configuration + - Serialization support (YAML/dict) + - 60% code reduction vs custom implementation + +- **`SemanticCache`**: Intelligent caching for LLM operations + - Semantic similarity-based cache matching + - TTL policies for cache expiration + - Filterable fields for multi-tenant scenarios + - 30-40% cache hit rate typical + - 92% latency reduction on cache hits (5ms vs 65ms) + +#### Reference Agent Components Used +- Optimization helpers: `count_tokens`, `estimate_token_budget`, `hybrid_retrieval` +- Production utilities: `create_summary_view`, `filter_tools_by_intent` +- `classify_intent_with_llm` - LLM-based intent classification +- `extract_references` - Reference extraction for grounding +- `format_context_for_llm` - Context formatting utilities + +#### Production Patterns Demonstrated +```python +# Semantic Router Pattern (Notebook 2) +from redisvl.extensions.router import Route, SemanticRouter + +# Define routes for tools +route = Route( + name="search_courses", + references=["Find courses", "Search catalog", ...], + metadata={"tool": search_tool}, + distance_threshold=0.3 +) + +# Initialize router (handles everything automatically) +router = SemanticRouter( + name="tool-router", + routes=[route1, route2, ...], + redis_url=REDIS_URL +) + +# Select tools (one line!) +matches = router.route_many(query, max_k=3) +selected_tools = [m.metadata["tool"] for m in matches] + +# Semantic Cache Pattern (Notebook 2) +from redisvl.extensions.llmcache import SemanticCache + +# Initialize cache +cache = SemanticCache( + name="tool_selection_cache", + distance_threshold=0.1, + ttl=3600 +) + +# Check cache first (fast path) +if cached := cache.check(prompt=query): + return cached[0]["response"] # 5ms + +# Cache miss - compute and store (slow path) +result = compute_expensive_operation(query) # 65ms +cache.store(prompt=query, response=result) +``` + +#### Key Patterns +- Token budget estimation and tracking +- Hybrid retrieval (summary + targeted search) +- Tool filtering by intent +- Structured view creation for efficiency +- Production monitoring and analytics + +--- + +## ๐ŸŽฏ Complete Learning Outcomes + +By completing this course, you will be able to: + +### Technical Skills +- โœ… **Design context-aware AI agents** from scratch +- โœ… **Implement production-ready memory systems** with Agent Memory Server +- โœ… **Build RAG applications** using Redis and vector search +- โœ… **Optimize context assembly** for cost and performance +- โœ… **Create stateful agents** with LangGraph +- โœ… **Deploy scalable AI systems** to production +- โœ… **Apply context engineering patterns** to any domain + +### Professional Skills +- โœ… Production AI development experience +- โœ… System optimization and performance tuning +- โœ… Cost management and efficiency optimization +- โœ… Enterprise-grade monitoring and analytics +- โœ… Scalable architecture design +- โœ… Production deployment best practices + +### Portfolio Project +- โœ… Complete Redis University Course Advisor +- โœ… Production-ready codebase with comprehensive features +- โœ… Demonstrated scalability and optimization +- โœ… Professional documentation and testing + +--- + +## ๐Ÿ“ฆ Reference Agent Package + +The `redis-context-course` package provides production-ready components used throughout the course. + +### Core Modules + +**`models.py`** +- `Course` - Course data model with validation +- `StudentProfile` - Student information and preferences +- `DifficultyLevel`, `CourseFormat`, `Semester` - Enums for type safety +- `CourseRecommendation`, `AgentResponse` - Response models +- `Prerequisite`, `CourseSchedule`, `Major` - Additional models + +**`course_manager.py`** +- Course catalog management with Redis +- Vector search for semantic course discovery +- Course recommendation algorithms +- RedisVL integration for vector operations + +**`redis_config.py`** +- Redis connection management +- Vector index configuration +- Environment variable handling +- Connection pooling and error handling + +**`tools.py`** (Used in Section 4) +- `create_course_tools()` - Search, get details, check prerequisites +- `create_memory_tools()` - Store and search memories +- `select_tools_by_keywords()` - Simple tool filtering + +**`optimization_helpers.py`** (Used in Section 5) +- `count_tokens()` - Token counting for any model +- `estimate_token_budget()` - Budget breakdown and estimation +- `hybrid_retrieval()` - Combine summary + targeted search +- `create_summary_view()` - Structured summaries for efficiency +- `create_user_profile_view()` - User profile generation +- `filter_tools_by_intent()` - Keyword-based tool filtering +- `classify_intent_with_llm()` - LLM-based intent classification +- `extract_references()` - Find grounding needs in queries +- `format_context_for_llm()` - Combine context sources + +### Scripts + +**`scripts/generate_courses.py`** +- Generate realistic course catalog data +- Create diverse course offerings +- Populate with prerequisites and schedules + +**`scripts/ingest_courses.py`** +- Ingest courses into Redis +- Create vector embeddings +- Build vector search index + +### Examples + +**`examples/basic_usage.py`** +- Simple agent example +- Basic tool usage +- Memory integration + +**`examples/advanced_agent_example.py`** (Future) +- Complete agent using all patterns +- Tool filtering enabled +- Token budget tracking +- Memory integration +- Production-ready structure + +--- + +## ๐Ÿ”‘ Key Concepts Summary + +### Context Engineering Fundamentals +- **Four Context Types**: System, User, Retrieved, Conversation +- **Context Assembly**: Combining different context sources effectively +- **Context Optimization**: Managing context window limits +- **Production Considerations**: Scalability, cost, performance + +### RAG (Retrieval Augmented Generation) +- **Vector Embeddings**: Converting text to numerical representations +- **Semantic Search**: Finding similar items using vector similarity +- **Redis Vector Search**: Using Redis for vector storage and retrieval +- **Hybrid Search**: Combining keyword and semantic search + +### Memory Systems for Context Engineering +- **Dual Memory System**: Working memory (session) + Long-term memory (cross-session) +- **Memory Types**: Semantic (facts), Episodic (events), Message (conversations) +- **Memory Extraction Strategies**: Discrete, Summary, Preferences, Custom +- **Working Memory Compression**: Truncation, Priority-Based, Summarization +- **Agent Memory Server**: Production-ready automatic memory management + +### Tool Selection & LangGraph +- **Tool Schemas**: Name, description, parameters with clear documentation +- **LangGraph**: Stateful agent workflow framework +- **State Management**: Managing agent state across conversation turns +- **Tool Orchestration**: Coordinating multiple tools effectively +- **Semantic Tool Selection**: Choosing tools based on context and intent + +### Optimization & Production +- **Token Budgets**: Allocating context window space efficiently +- **Retrieval Strategies**: Full context (bad), RAG (good), Summaries (compact), Hybrid (best) +- **Tool Filtering**: Show only relevant tools based on intent +- **Structured Views**: Pre-computed summaries for LLM consumption +- **Cost Optimization**: Reducing token usage by 50-70% +- **Performance Monitoring**: Tracking metrics for production deployment + +--- + +## ๐Ÿ—๏ธ Production Patterns + +### 1. Complete Memory Flow +```python +from agent_memory_client import MemoryClient + +# Initialize memory client +memory_client = MemoryClient( + base_url="http://localhost:8088", + user_id="student_123" +) + +# Load working memory for session +working_memory = await memory_client.get_working_memory( + session_id="session_456", + model_name="gpt-4" +) + +# Search long-term memory for relevant facts +memories = await memory_client.search_memories( + query="What courses is the student interested in?", + limit=5 +) + +# Build context with memories +system_prompt = build_prompt(instructions, memories) + +# Process with LLM +response = llm.invoke(messages) + +# Save working memory (triggers automatic extraction) +await memory_client.save_working_memory( + session_id="session_456", + messages=messages +) +``` + +### 2. Hybrid Retrieval Pattern +```python +from redis_context_course import CourseManager, hybrid_retrieval + +# Pre-computed summary (cached) +summary = """ +Redis University offers 50+ courses across 5 categories: +- Data Structures (15 courses) +- AI/ML (12 courses) +- Web Development (10 courses) +... +""" + +# Targeted semantic search +course_manager = CourseManager() +specific_courses = await course_manager.search_courses( + query="machine learning with Python", + limit=3 +) + +# Combine for optimal context +context = f"{summary}\n\nMost Relevant Courses:\n{specific_courses}" +``` + +### 3. Tool Filtering by Intent +```python +from redis_context_course import filter_tools_by_intent + +# Define tool groups +tool_groups = { + "search": ["search_courses", "find_prerequisites"], + "memory": ["store_preference", "recall_history"], + "recommendation": ["recommend_courses", "suggest_path"] +} + +# Filter based on user query +query = "What courses should I take for machine learning?" +relevant_tools = filter_tools_by_intent( + query=query, + tool_groups=tool_groups, + keywords={"search": ["find", "what", "which"], + "recommendation": ["should", "recommend", "suggest"]} +) + +# Bind only relevant tools to LLM +llm_with_tools = llm.bind_tools(relevant_tools) +``` + +### 4. Token Budget Management +```python +from redis_context_course import count_tokens, estimate_token_budget + +# Estimate token budget +budget = estimate_token_budget( + system_prompt=system_prompt, + working_memory_messages=10, + long_term_memories=5, + retrieved_context_items=3, + model="gpt-4" +) + +print(f"Estimated tokens: {budget['total_with_response']}") +print(f"Cost estimate: ${budget['estimated_cost']}") + +# Check if within limits +if budget['total_with_response'] > 128000: + # Trigger compression or reduce context + compressed_memory = compress_working_memory( + messages=messages, + strategy="summarization", + target_tokens=5000 + ) +``` + +### 5. Structured Views for Efficiency +```python +from redis_context_course import create_summary_view + +# Retrieve all courses +courses = await course_manager.get_all_courses() + +# Create structured summary (one-time or cached) +summary = await create_summary_view( + items=courses, + group_by="category", + include_stats=True +) + +# Cache for reuse +redis_client.set("course_catalog_summary", summary, ex=3600) + +# Use in system prompts +system_prompt = f""" +You are a course advisor with access to: + +{summary} + +Use search_courses() for specific queries. +""" +``` + +### 6. Memory Extraction Strategies +```python +# Discrete Strategy (individual facts) +await memory_client.save_working_memory( + session_id=session_id, + messages=messages, + extraction_strategy="discrete" # Extracts individual facts +) + +# Summary Strategy (conversation summaries) +await memory_client.save_working_memory( + session_id=session_id, + messages=messages, + extraction_strategy="summary" # Creates summaries +) + +# Preferences Strategy (user preferences) +await memory_client.save_working_memory( + session_id=session_id, + messages=messages, + extraction_strategy="preferences" # Extracts preferences +) +``` + +### 7. Working Memory Compression +```python +# Truncation (keep recent messages) +compressed = truncate_memory(messages, keep_last=10) + +# Priority-Based (score by importance) +compressed = priority_compress( + messages=messages, + target_tokens=5000, + scoring_fn=importance_score +) + +# Summarization (LLM-based) +compressed = await summarize_memory( + messages=messages, + llm=llm, + target_tokens=5000 +) +``` + +--- + +## ๐Ÿ“š How to Use This Course + +### Notebook Structure + +All patterns are demonstrated in notebooks with: +- โœ… **Conceptual explanations** - Theory and principles +- โœ… **Bad examples** - What not to do and why +- โœ… **Good examples** - Best practices and patterns +- โœ… **Runnable code** - Complete, executable examples +- โœ… **Testing and verification** - Validate your implementation +- โœ… **Exercises for practice** - Hands-on challenges + +### Importing Components in Your Code + +```python +from redis_context_course import ( + # Core Classes + CourseManager, # Course storage and search + RedisConfig, # Redis configuration + redis_config, # Redis config instance + + # Data Models + Course, # Course data model + StudentProfile, # Student information + DifficultyLevel, # Difficulty enum + CourseFormat, # Format enum (online, in-person, hybrid) + Semester, # Semester enum + + # Tools (Section 4) + create_course_tools, # Create course-related tools + create_memory_tools, # Create memory management tools + select_tools_by_keywords, # Simple tool filtering + + # Optimization Helpers (Section 5) + count_tokens, # Token counting + estimate_token_budget, # Budget estimation + hybrid_retrieval, # Hybrid search strategy + create_summary_view, # Summary generation + create_user_profile_view, # User profile formatting + filter_tools_by_intent, # Intent-based tool filtering + classify_intent_with_llm, # LLM-based intent classification + extract_references, # Reference extraction + format_context_for_llm, # Context formatting +) +``` + +### Recommended Learning Path + +#### For Beginners (3-4 weeks, 6-8 hours/week) +1. **Week 1**: Complete Section 1 (Foundations) and Section 2 (RAG) +2. **Week 2**: Work through Section 3 (Memory Systems for Context Engineering) +3. **Week 3**: Build agents in Section 4 (Tool Selection & LangGraph) +4. **Week 4**: Optimize in Section 5 (Production) + +#### For Experienced Developers (1-2 weeks full-time) +- **Skip to Section 2** if familiar with context engineering basics +- **Jump to Section 3** if you've built RAG systems before +- **Start at Section 4** if you want to focus on LangGraph and agents + +#### Time Commitment Options +- **Intensive**: 1 week (full-time, 8 hours/day) +- **Standard**: 3-4 weeks (part-time, 6-8 hours/week) +- **Relaxed**: 6-8 weeks (casual, 3-4 hours/week) + +### Learning Tips +1. **Start with Section 1** - Build foundational understanding +2. **Progress sequentially** - Each section builds on the previous +3. **Complete all exercises** - Hands-on practice is essential +4. **Experiment freely** - Modify code and test variations +5. **Build your own variations** - Apply patterns to your domain + +--- + +## ๐ŸŽฏ Key Takeaways + +### What Makes a Production-Ready Agent? + +1. **Clear System Instructions** - Tell the agent what to do and how to behave +2. **Well-Designed Tools** - Give it capabilities with clear descriptions and examples +3. **Memory Integration** - Remember context across sessions with dual memory system +4. **Token Management** - Stay within limits efficiently with budget tracking +5. **Smart Retrieval** - Hybrid approach (summary + targeted RAG) +6. **Tool Filtering** - Show only relevant tools based on intent +7. **Structured Views** - Pre-compute summaries for efficiency +8. **Error Handling** - Graceful degradation and recovery +9. **Monitoring** - Track performance, costs, and quality metrics +10. **Scalability** - Design for thousands of concurrent users + +### Common Pitfalls to Avoid + +โŒ **Don't:** +- Include all tools on every request (causes confusion and token waste) +- Use vague tool descriptions (LLM won't know when to use them) +- Ignore token budgets (leads to errors and high costs) +- Use only full context or only RAG (inefficient or incomplete) +- Forget to save working memory (no automatic extraction) +- Store everything in long-term memory (noise and retrieval issues) +- Skip error handling (production failures) +- Ignore performance monitoring (can't optimize what you don't measure) + +โœ… **Do:** +- Filter tools by intent (show only relevant tools) +- Write detailed tool descriptions with examples (clear guidance for LLM) +- Estimate and monitor token usage (stay within budgets) +- Use hybrid retrieval (summary + targeted search for best results) +- Save working memory to trigger extraction (automatic memory management) +- Store only important facts in long-term memory (high signal-to-noise ratio) +- Implement graceful error handling (production resilience) +- Track metrics and optimize (continuous improvement) + +--- + +## ๐ŸŒ Real-World Applications + +The patterns learned in this course apply directly to: + +### Enterprise AI Systems +- **Customer service chatbots** with sophisticated memory and tool routing +- **Technical support agents** with intelligent knowledge retrieval +- **Sales assistants** with personalized recommendations and context +- **Knowledge management systems** with optimized context assembly + +### Educational Technology +- **Personalized learning assistants** that remember student progress +- **Academic advising systems** with comprehensive course knowledge +- **Intelligent tutoring systems** with adaptive responses +- **Student support chatbots** with institutional knowledge + +### Production AI Services +- **Multi-tenant SaaS AI platforms** with user isolation and scaling +- **API-based AI services** with cost optimization and monitoring +- **Scalable conversation systems** with memory persistence +- **Enterprise AI deployments** with comprehensive analytics + +--- + +## ๐Ÿ“Š Expected Results + +### Measurable Improvements +- **50-70% token reduction** through intelligent context optimization +- **Semantic tool selection** replacing brittle keyword matching +- **Cross-session memory** enabling natural conversation continuity +- **Production scalability** supporting thousands of concurrent users +- **Cost optimization** reducing LLM API expenses significantly + +### Skills Gained +- ๐Ÿ’ผ **Portfolio project** demonstrating context engineering mastery +- ๐Ÿ“Š **Performance monitoring expertise** for production deployment +- ๐Ÿ› ๏ธ **Production-ready patterns** for building AI agents +- ๐ŸŽฏ **Cost optimization skills** for managing LLM expenses +- ๐Ÿš€ **Scalable architecture design** for enterprise deployments + +--- + +## ๐Ÿš€ Next Steps + +After completing this course, you can: + +1. **Extend the reference agent** - Add new tools and capabilities for your domain +2. **Apply to your use case** - Adapt patterns to your specific requirements +3. **Optimize further** - Experiment with different strategies and measure results +4. **Deploy to production** - Use learned patterns for real-world applications +5. **Share your learnings** - Contribute back to the community +6. **Build your portfolio** - Showcase your context engineering expertise + +--- + +## ๐Ÿ“š Resources + +### Documentation +- **[Main README](README.md)** - Course overview and quick start +- **[SETUP.md](SETUP.md)** - Detailed setup instructions +- **[notebooks/README.md](notebooks/README.md)** - Notebook-specific documentation +- **[notebooks/SETUP_GUIDE.md](notebooks/SETUP_GUIDE.md)** - Comprehensive setup guide +- **[reference-agent/README.md](reference-agent/README.md)** - Reference agent documentation + +### External Resources +- **[Agent Memory Server](https://github.com/redis/agent-memory-server)** - Memory management system +- **[Redis Documentation](https://redis.io/docs/)** - Redis official documentation +- **[LangChain Documentation](https://python.langchain.com/)** - LangChain framework docs +- **[LangGraph Documentation](https://langchain-ai.github.io/langgraph/)** - LangGraph stateful agents +- **[OpenAI API Reference](https://platform.openai.com/docs/api-reference)** - OpenAI API documentation + +### Community +- **[Redis Discord](https://discord.gg/redis)** - Join the Redis community +- **[GitHub Issues](https://github.com/redis-developer/redis-ai-resources/issues)** - Report issues or ask questions +- **[Redis AI Resources](https://github.com/redis-developer/redis-ai-resources)** - More AI examples and recipes + +--- + +## ๐Ÿ“ Course Metadata + +**Version**: 2.0 +**Last Updated**: November 2025 +**Maintainer**: Redis AI Resources Team +**License**: MIT + +**Technologies**: +- Python 3.10+ +- Redis 8.0+ +- LangChain 0.2+ +- LangGraph 0.2+ +- Agent Memory Server 0.12.3+ +- OpenAI GPT-4 + +**Course Stats**: +- **Duration**: 18-23 hours +- **Sections**: 5 +- **Notebooks**: 12 +- **Hands-on Exercises**: 30+ +- **Production Patterns**: 15+ + +--- + +**๐ŸŽ‰ Ready to master context engineering? [Get started now!](README.md#-quick-start-5-minutes)** + +--- + +*This comprehensive course provides hands-on education in context engineering - taking you from fundamentals to production-ready expertise through a single, evolving project that demonstrates real-world impact.* + diff --git a/python-recipes/context-engineering/README.md b/python-recipes/context-engineering/README.md new file mode 100644 index 00000000..14645827 --- /dev/null +++ b/python-recipes/context-engineering/README.md @@ -0,0 +1,675 @@ +# Context Engineering Course + +**A comprehensive, hands-on course teaching production-ready context engineering for AI agents using Redis, Agent Memory Server, LangChain, and LangGraph.** + +[![Redis](https://img.shields.io/badge/Redis-8.0+-DC382D?logo=redis&logoColor=white)](https://redis.io/) +[![Python](https://img.shields.io/badge/Python-3.10+-3776AB?logo=python&logoColor=white)](https://www.python.org/) +[![LangChain](https://img.shields.io/badge/LangChain-0.2+-1C3C3C?logo=chainlink&logoColor=white)](https://python.langchain.com/) +[![OpenAI](https://img.shields.io/badge/OpenAI-GPT--4-412991?logo=openai&logoColor=white)](https://openai.com/) + +--- + +## ๐Ÿ“š What is Context Engineering? + +**Context Engineering** is the practice of designing, implementing, and optimizing context management systems for AI agents. It's the difference between a chatbot that forgets everything and an intelligent assistant that understands your needs. + +### The Four Context Types + +1. **System Context** - What the AI should know about its role, capabilities, and environment +2. **User Context** - Information about the user, their preferences, and history +3. **Retrieved Context** - Dynamically fetched information from databases, APIs, or vector stores +4. **Conversation Context** - The ongoing dialogue and task-focused working memory + +### Why Context Engineering Matters + +- ๐ŸŽฏ **Better AI Performance** - Agents with proper context make better decisions +- ๐Ÿ’ฐ **Cost Optimization** - Efficient context management reduces token usage by 50-70% +- ๐Ÿ”„ **Cross-Session Memory** - Users don't have to repeat themselves +- ๐Ÿš€ **Production Scalability** - Handle thousands of concurrent users effectively +- ๐Ÿ› ๏ธ **Tool Orchestration** - Intelligent tool selection based on context + +--- + +## ๐ŸŽ“ Course Overview + +**Duration**: 18-23 hours +**Format**: Self-paced, hands-on notebooks +**Level**: Intermediate to Advanced +**Prerequisites**: Python, basic AI/ML understanding, familiarity with LLMs + +### What You'll Build + +A complete **Redis University Course Advisor Agent** that: +- Helps students find courses using semantic search +- Remembers student preferences and goals across sessions +- Provides personalized recommendations +- Uses intelligent tool selection with LangGraph +- Optimizes context for production deployment + +### What You'll Learn + +- โœ… Four context types and assembly strategies +- โœ… RAG (Retrieval Augmented Generation) with Redis and RedisVL +- โœ… Dual memory systems (working + long-term) with Agent Memory Server +- โœ… Memory extraction strategies (discrete, summary, preferences) +- โœ… Working memory compression techniques +- โœ… LangGraph for stateful agent workflows +- โœ… Semantic tool selection and orchestration +- โœ… Production optimization and cost management + +--- + +## ๐Ÿ“– Course Structure + +### **Section 1: Context Engineering Foundations** (2-3 hours) +**2 notebooks** | **Prerequisites**: None + +Learn the foundational concepts of context engineering and the four context types. + +**Notebooks**: +1. **What is Context Engineering?** - Four context types, principles, and architecture +2. **Context Assembly Strategies** - How to combine contexts effectively + +**Learning Outcomes**: +- Understand the four context types and their roles +- Learn context assembly strategies +- Grasp the importance of context engineering in AI systems + +**Reference Agent Components Used**: None (pure theory) + +--- + +### **Section 2: Retrieved Context Engineering** (3-4 hours) +**1 notebook** | **Prerequisites**: Section 1 + +Build a RAG system using Redis and RedisVL for semantic course search. + +**Notebooks**: +1. **Engineering Retrieved Context with RAG** - Vector embeddings, semantic search, course recommendations + +**Learning Outcomes**: +- Implement vector embeddings with OpenAI +- Build semantic search with Redis and RedisVL +- Create a course recommendation system +- Understand RAG architecture patterns + +**Reference Agent Components Used**: +- `CourseManager` - Course storage and search +- `redis_config` - Redis configuration +- `CourseGenerator`, `CourseIngestionPipeline` - Data generation scripts + +--- + +### **Section 3: Memory Systems for Context Engineering** (4-5 hours) +**3 notebooks** | **Prerequisites**: Sections 1-2 + +Master dual memory systems with Agent Memory Server, including extraction and compression strategies. + +**Notebooks**: +1. **Memory Fundamentals and Integration** - Working memory, long-term memory, Agent Memory Server +2. **Memory-Enhanced RAG and Agents** - Combining memory with RAG, building stateful agents +3. **Working Memory Compression** - Compression strategies for long conversations + +**Learning Outcomes**: +- Implement working memory (session-scoped) and long-term memory (cross-session) +- Use Agent Memory Server for automatic memory extraction +- Apply memory extraction strategies (discrete, summary, preferences) +- Implement working memory compression (truncation, priority-based, summarization) +- Build memory-enhanced RAG systems + +**Reference Agent Components Used**: +- Data models: `Course`, `StudentProfile`, `DifficultyLevel`, `CourseFormat`, `Semester` +- Enums for type safety + +--- + +### **Section 4: Tool Selection & LangGraph** (5-6 hours) +**3 notebooks** | **Prerequisites**: Sections 1-3 + +Build production agents with LangGraph, semantic tool selection, and state management. + +**Notebooks**: +1. **Tools and LangGraph Fundamentals** - Tool creation, LangGraph basics, state management +2. **Redis University Course Advisor Agent** - Complete production agent with all features +3. **Course Advisor with Compression** - Enhanced agent demonstrating compression strategies + +**Learning Outcomes**: +- Create and orchestrate multiple tools +- Build stateful agents with LangGraph +- Implement semantic tool selection +- Manage agent state and conversation flow +- Apply compression in production agents + +**Reference Agent Components Used**: +- All data models and enums +- `CourseManager` for course operations +- `redis_config` for Redis connections + +--- + +### **Section 5: Optimization & Production** (4-5 hours) +**3 notebooks** | **Prerequisites**: Sections 1-4 | **Status**: โœ… Complete + +Optimize for production with token management, cost optimization, semantic routing, and caching. + +**Notebooks**: +1. **Measuring and Optimizing Performance** - Token counting, cost tracking, performance metrics +2. **Scaling with Semantic Tool Selection** - ๐Ÿ†• RedisVL Semantic Router & Semantic Cache +3. **Production Readiness and Quality Assurance** - Validation, monitoring, error handling + +**Learning Outcomes**: +- Implement token counting and budget management +- Optimize context assembly for cost reduction +- ๐Ÿ†• **Use RedisVL Semantic Router for production tool selection** +- ๐Ÿ†• **Implement Semantic Cache for 92% latency reduction** +- Build production monitoring and analytics +- Handle errors and edge cases gracefully +- Deploy scalable AI agents + +**๐Ÿ†• New in Notebook 2**: +- **RedisVL Semantic Router**: Production-ready semantic routing (60% code reduction vs custom implementation) +- **RedisVL Semantic Cache**: Intelligent caching for tool selections (30-40% cache hit rate) +- **Performance**: 5ms cache hits vs 65ms cache misses (10-20x faster) +- **Industry Patterns**: Learn production-ready approaches, not custom implementations + +**Reference Agent Components Used**: +- Optimization helpers: `count_tokens`, `estimate_token_budget`, `hybrid_retrieval` +- Production utilities: `create_summary_view`, `filter_tools_by_intent` + +--- + +## ๐Ÿ“ Repository Structure + +``` +context-engineering/ +โ”œโ”€โ”€ README.md # ๐Ÿ‘ˆ This file - Main entry point +โ”œโ”€โ”€ COURSE_SUMMARY.md # Complete course syllabus and learning outcomes +โ”œโ”€โ”€ SETUP.md # Detailed setup guide +โ”œโ”€โ”€ docker-compose.yml # Redis + Agent Memory Server setup +โ”œโ”€โ”€ requirements.txt # Python dependencies +โ”‚ +โ”œโ”€โ”€ notebooks/ # ๐Ÿ‘ˆ Course notebooks (main content) +โ”‚ โ”œโ”€โ”€ README.md # Notebook-specific documentation +โ”‚ โ”œโ”€โ”€ SETUP_GUIDE.md # Detailed setup instructions +โ”‚ โ”œโ”€โ”€ REFERENCE_AGENT_USAGE_ANALYSIS.md # Component usage analysis +โ”‚ โ”œโ”€โ”€ section-1-context-engineering-foundations/ # Section 1 notebooks +โ”‚ โ”œโ”€โ”€ section-2-retrieved-context-engineering/ # Section 2 notebooks +โ”‚ โ”œโ”€โ”€ section-3-memory-systems-for-context-engineering/ # Section 3 notebooks +โ”‚ โ”œโ”€โ”€ section-4-tool-selection/ # Section 4 notebooks +โ”‚ โ””โ”€โ”€ section-5-optimization-production/ # Section 5 notebooks +โ”‚ +โ””โ”€โ”€ reference-agent/ # Production-ready reference implementation + โ”œโ”€โ”€ README.md # Reference agent documentation + โ”œโ”€โ”€ redis_context_course/ # Python package + โ”‚ โ”œโ”€โ”€ __init__.py # Package exports + โ”‚ โ”œโ”€โ”€ models.py # Data models (Course, StudentProfile, etc.) + โ”‚ โ”œโ”€โ”€ course_manager.py # Course storage and search + โ”‚ โ”œโ”€โ”€ redis_config.py # Redis configuration + โ”‚ โ”œโ”€โ”€ tools.py # Tool creation helpers + โ”‚ โ”œโ”€โ”€ optimization_helpers.py # Production utilities + โ”‚ โ””โ”€โ”€ scripts/ # Data generation scripts + โ”œโ”€โ”€ examples/ # Usage examples + โ””โ”€โ”€ tests/ # Test suite +``` + +--- + +## ๐Ÿš€ Quick Start (5 Minutes) + +Get up and running with the course in 5 simple steps: + +### **Step 1: Clone the Repository** +```bash +git clone +cd redis-ai-resources/python-recipes/context-engineering +``` + +### **Step 2: Set Environment Variables** +```bash +# Copy the example environment file +cp .env.example .env + +# Edit .env and add your OpenAI API key +# OPENAI_API_KEY=sk-your-key-here +``` + +### **Step 3: Start Services with Docker** +```bash +# Start Redis and Agent Memory Server +docker-compose up -d + +# Verify services are running +docker-compose ps +``` + +### **Step 4: Install Dependencies** +```bash +# Install Python dependencies +pip install -r requirements.txt + +# Install reference agent package (editable mode) +cd reference-agent +pip install -e . +cd .. +``` + +### **Step 5: Start Learning!** +```bash +# Start Jupyter +jupyter notebook notebooks/ + +# Open: section-1-context-engineering-foundations/01_what_is_context_engineering.ipynb +``` + +### **Verification** + +Check that everything is working: + +```bash +# Check Redis +docker exec redis-context-engineering redis-cli ping +# Expected output: PONG + +# Check Agent Memory Server +curl http://localhost:8088/v1/health +# Expected output: {"now":} + +# Check Python packages +python -c "import redis_context_course; print('โœ… Reference agent installed')" +# Expected output: โœ… Reference agent installed +``` + +**๐ŸŽ‰ You're ready to start!** Open the first notebook and begin your context engineering journey. + +--- + +## ๐Ÿ› ๏ธ Detailed Setup Instructions + +For complete setup instructions including troubleshooting, see **[SETUP.md](SETUP.md)** and **[notebooks/SETUP_GUIDE.md](notebooks/SETUP_GUIDE.md)**. + +### System Requirements + +#### Required +- **Python 3.10+** (Python 3.8+ may work but 3.10+ recommended) +- **Docker Desktop** (for Redis and Agent Memory Server) +- **OpenAI API Key** ([get one here](https://platform.openai.com/api-keys)) +- **8GB RAM minimum** (16GB recommended for Section 5) +- **5GB disk space** for dependencies and data + +#### Optional +- **Jupyter Lab** (alternative to Jupyter Notebook) +- **VS Code** with Jupyter extension +- **Redis Insight** for visualizing Redis data + +### Services Architecture + +The course uses three main services: + +1. **Redis** (port 6379) - Vector storage for course catalog +2. **Agent Memory Server** (port 8088) - Memory management +3. **Jupyter** (port 8888) - Interactive notebooks + +All services are configured in `docker-compose.yml` for easy setup. + +### Environment Variables + +Create a `.env` file with the following: + +```bash +# Required +OPENAI_API_KEY=sk-your-key-here + +# Optional (defaults provided) +REDIS_URL=redis://localhost:6379 +AGENT_MEMORY_SERVER_URL=http://localhost:8088 +REDIS_INDEX_NAME=course_catalog +``` + +### Docker Compose Services + +The `docker-compose.yml` file includes: + +```yaml +services: + redis: + image: redis/redis-stack:latest + ports: + - "6379:6379" # Redis + - "8001:8001" # RedisInsight + volumes: + - redis-data:/data + + agent-memory-server: + image: redis/agent-memory-server:latest + ports: + - "8088:8088" + environment: + - REDIS_URL=redis://redis:6379 + depends_on: + - redis +``` + +### Installation Steps + +#### 1. Install Python Dependencies + +```bash +# Core dependencies +pip install -r requirements.txt + +# This installs: +# - langchain>=0.2.0 +# - langgraph>=0.2.0 +# - langchain-openai>=0.1.0 +# - agent-memory-client>=0.12.6 +# - redis>=6.0.0 +# - redisvl>=0.8.0 +# - openai>=1.0.0 +# - jupyter +# - python-dotenv +# - pydantic>=2.0.0 +``` + +#### 2. Install Reference Agent Package + +```bash +cd reference-agent +pip install -e . +cd .. +``` + +This installs the `redis-context-course` package in editable mode, allowing you to: +- Import components in notebooks +- Modify the package and see changes immediately +- Use production-ready utilities + +#### 3. Generate Sample Data (Optional) + +```bash +cd reference-agent + +# Generate course catalog +python -m redis_context_course.scripts.generate_courses + +# Ingest into Redis +python -m redis_context_course.scripts.ingest_courses + +cd .. +``` + +**Note**: Most notebooks generate their own data, so this step is optional. + +### Troubleshooting + +#### OpenAI API Key Issues +``` +Error: "OPENAI_API_KEY not found" +``` +**Solution**: Create `.env` file with `OPENAI_API_KEY=your_key_here` + +#### Redis Connection Issues +``` +Error: "Connection refused" or "Redis not available" +``` +**Solutions**: +1. Start Redis: `docker-compose up -d` +2. Check Redis URL in `.env`: `REDIS_URL=redis://localhost:6379` +3. Verify: `docker exec redis-context-engineering redis-cli ping` + +#### Agent Memory Server Issues +``` +Error: "Cannot connect to Agent Memory Server" +``` +**Solutions**: +1. Check service: `docker-compose ps` +2. Check health: `curl http://localhost:8088/v1/health` +3. Restart: `docker-compose restart agent-memory-server` + +#### Import Errors +``` +Error: "No module named 'redis_context_course'" +``` +**Solutions**: +1. Install reference agent: `cd reference-agent && pip install -e .` +2. Restart Jupyter kernel +3. Check Python path in notebook cells + +### Stopping Services + +```bash +# Stop services but keep data +docker-compose stop + +# Stop and remove services (keeps volumes) +docker-compose down + +# Stop and remove everything including data +docker-compose down -v +``` + +--- + +## ๐Ÿ“– Recommended Learning Path + +### For Beginners +**Timeline**: 3-4 weeks (6-8 hours/week) + +1. **Week 1**: Complete Section 1 (Foundations) and Section 2 (RAG) +2. **Week 2**: Work through Section 3 (Memory Systems for Context Engineering) +3. **Week 3**: Build agents in Section 4 (Tool Selection & LangGraph) +4. **Week 4**: Optimize in Section 5 (Production) + +### For Experienced Developers +**Timeline**: 1-2 weeks (full-time) or 2-3 weeks (part-time) + +- **Skip to Section 2** if familiar with context engineering basics +- **Jump to Section 3** if you've built RAG systems before +- **Start at Section 4** if you want to focus on LangGraph and agents + +### Time Commitment Options + +- **Intensive**: 1 week (full-time, 8 hours/day) +- **Standard**: 3-4 weeks (part-time, 6-8 hours/week) +- **Relaxed**: 6-8 weeks (casual, 3-4 hours/week) + +### Learning Tips + +1. **Start with Section 1** - Build foundational understanding +2. **Progress sequentially** - Each section builds on the previous +3. **Complete all exercises** - Hands-on practice is essential +4. **Experiment freely** - Modify code and test variations +5. **Build your own variations** - Apply patterns to your domain + +--- + +## ๐ŸŽฏ Learning Outcomes + +### By Section + +**Section 1: Foundations** +- Understand the four context types (system, user, retrieved, conversation) +- Learn context assembly strategies +- Grasp the importance of context engineering + +**Section 2: Retrieved Context Engineering** +- Implement vector embeddings and semantic search +- Build RAG systems with Redis and RedisVL +- Create course recommendation engines + +**Section 3: Memory Systems for Context Engineering** +- Master dual memory systems (working + long-term) +- Implement memory extraction strategies +- Apply working memory compression techniques + +**Section 4: Tool Selection & LangGraph** +- Build stateful agents with LangGraph +- Implement semantic tool selection +- Manage complex agent workflows + +**Section 5: Optimization & Production** +- Optimize token usage and costs +- Implement production monitoring +- Deploy scalable AI agents + +### Complete Program Outcomes + +By completing this course, you will be able to: + +- โœ… **Design context-aware AI agents** from scratch +- โœ… **Implement production-ready memory systems** with Agent Memory Server +- โœ… **Build RAG applications** using Redis and vector search +- โœ… **Optimize context assembly** for cost and performance +- โœ… **Create stateful agents** with LangGraph +- โœ… **Deploy scalable AI systems** to production +- โœ… **Apply context engineering patterns** to any domain + +--- + +## ๐Ÿ—๏ธ Reference Agent Package + +The `redis-context-course` package provides production-ready components used throughout the course. + +### What's Included + +**Core Classes**: +- `CourseManager` - Course storage and semantic search +- `RedisConfig` - Redis configuration and connection management +- Data models: `Course`, `StudentProfile`, `DifficultyLevel`, etc. + +**Tools** (Section 2): +- `create_course_tools()` - Course search and recommendation tools +- `create_memory_tools()` - Memory management tools +- `select_tools_by_keywords()` - Simple tool filtering + +**Optimization Helpers** (Section 5): +- `count_tokens()` - Token counting for any model +- `estimate_token_budget()` - Budget breakdown and estimation +- `hybrid_retrieval()` - Combine summary + search +- `filter_tools_by_intent()` - Intent-based tool filtering +- And more... + +### Educational Approach + +The course demonstrates **building agents from scratch** using these components as building blocks, rather than using pre-built agents. This approach: + +- โœ… Teaches fundamental patterns +- โœ… Provides flexibility for customization +- โœ… Shows both educational and production-ready code +- โœ… Enables adaptation to different use cases + +For detailed component usage analysis, see [notebooks/REFERENCE_AGENT_USAGE_ANALYSIS.md](notebooks/REFERENCE_AGENT_USAGE_ANALYSIS.md). + +--- + +## ๐ŸŒ Real-World Applications + +The patterns and techniques learned apply directly to: + +### Enterprise AI Systems +- **Customer service chatbots** with sophisticated memory and tool routing +- **Technical support agents** with intelligent knowledge retrieval +- **Sales assistants** with personalized recommendations +- **Knowledge management systems** with optimized context assembly + +### Educational Technology +- **Personalized learning assistants** that remember student progress +- **Academic advising systems** with comprehensive course knowledge +- **Intelligent tutoring systems** with adaptive responses +- **Student support chatbots** with institutional knowledge + +### Production AI Services +- **Multi-tenant SaaS AI platforms** with user isolation and scaling +- **API-based AI services** with cost optimization and monitoring +- **Scalable conversation systems** with memory persistence +- **Enterprise AI deployments** with comprehensive analytics + +--- + +## ๐Ÿ“Š Expected Results + +### Measurable Improvements +- **50-70% token reduction** through intelligent context optimization +- **Semantic tool selection** replacing brittle keyword matching +- **Cross-session memory** enabling natural conversation continuity +- **Production scalability** supporting thousands of concurrent users + +### Skills Gained +- ๐Ÿ’ผ **Portfolio project** demonstrating context engineering mastery +- ๐Ÿ“Š **Performance monitoring expertise** for production deployment +- ๐Ÿ› ๏ธ **Production-ready patterns** for building AI agents +- ๐ŸŽฏ **Cost optimization skills** for managing LLM expenses + +--- + +## ๐Ÿ“š Additional Resources + +### Documentation +- **[COURSE_SUMMARY.md](COURSE_SUMMARY.md)** - Complete course syllabus and learning outcomes +- **[SETUP.md](SETUP.md)** - Detailed setup instructions +- **[notebooks/README.md](notebooks/README.md)** - Notebook-specific documentation +- **[notebooks/SETUP_GUIDE.md](notebooks/SETUP_GUIDE.md)** - Comprehensive setup guide +- **[notebooks/REFERENCE_AGENT_USAGE_ANALYSIS.md](notebooks/REFERENCE_AGENT_USAGE_ANALYSIS.md)** - Component usage analysis +- **[reference-agent/README.md](reference-agent/README.md)** - Reference agent documentation + +### External Resources +- **[Redis Documentation](https://redis.io/docs/)** - Redis official documentation +- **[LangChain Documentation](https://python.langchain.com/)** - LangChain framework docs +- **[LangGraph Documentation](https://langchain-ai.github.io/langgraph/)** - LangGraph stateful agents +- **[Agent Memory Server](https://github.com/redis/agent-memory-server)** - Memory management system +- **[OpenAI API Reference](https://platform.openai.com/docs/api-reference)** - OpenAI API documentation + +### Community +- **[Redis Discord](https://discord.gg/redis)** - Join the Redis community +- **[GitHub Issues](https://github.com/redis-developer/redis-ai-resources/issues)** - Report issues or ask questions +- **[Redis AI Resources](https://github.com/redis-developer/redis-ai-resources)** - More AI examples and recipes + +--- + +## ๐Ÿค Contributing + +This is an educational resource. Contributions that improve clarity, add examples, or extend the reference implementation are welcome! + +### How to Contribute +1. Fork the repository +2. Create a feature branch +3. Make your changes +4. Submit a pull request + +### Areas for Contribution +- Additional notebook examples +- Improved documentation +- Bug fixes and corrections +- New optimization patterns +- Extended reference agent features + +--- + +## ๐Ÿ“ Course Metadata + +**Version**: 2.0 +**Last Updated**: November 2025 +**Maintainer**: Redis AI Resources Team +**License**: MIT + +**Technologies**: +- Python 3.10+ +- Redis 8.0+ +- LangChain 0.2+ +- LangGraph 0.2+ +- Agent Memory Server 0.12.3+ +- OpenAI GPT-4 + +**Course Stats**: +- **Duration**: 18-23 hours +- **Sections**: 5 +- **Notebooks**: 12 +- **Hands-on Exercises**: 30+ +- **Production Patterns**: 15+ + +--- + +**๐ŸŽ‰ Ready to transform your context engineering skills? [Start your journey today!](#-quick-start-5-minutes)** + +--- + +*This comprehensive course provides hands-on education in context engineering - taking you from fundamentals to production-ready expertise through a single, evolving project that demonstrates real-world impact.* diff --git a/python-recipes/context-engineering/SETUP.md b/python-recipes/context-engineering/SETUP.md new file mode 100644 index 00000000..7c7c2aba --- /dev/null +++ b/python-recipes/context-engineering/SETUP.md @@ -0,0 +1,205 @@ +# Setup Guide for Context Engineering Course + +This guide will help you set up everything you need to run the Context Engineering notebooks and reference agent. + +## Prerequisites + +- **Python 3.10+** installed +- **Docker and Docker Compose** installed +- **OpenAI API key** (get one at https://platform.openai.com/api-keys) + +## Quick Setup (5 minutes) + +### Step 1: Set Your OpenAI API Key + +The OpenAI API key is needed by both the Jupyter notebooks AND the Agent Memory Server. The easiest way to set it up is to use a `.env` file. + +```bash +# Navigate to the context-engineering directory +cd python-recipes/context-engineering + +# Copy the example environment file +cp .env.example .env + +# Edit .env and add your OpenAI API key +# Replace 'your-openai-api-key-here' with your actual key +``` + +Your `.env` file should look like this: +```bash +OPENAI_API_KEY=sk-proj-xxxxxxxxxxxxxxxxxxxxx +REDIS_URL=redis://localhost:6379 +AGENT_MEMORY_URL=http://localhost:8088 +``` + +**Important:** The `.env` file is already in `.gitignore` so your API key won't be committed to git. + +### Step 2: Start Required Services + +Start Redis and the Agent Memory Server using Docker Compose: + +```bash +# Start services in the background +docker-compose up -d + +# Verify services are running +docker-compose ps + +# Check that the Agent Memory Server is healthy +curl http://localhost:8088/v1/health +``` + +You should see: +- `redis-context-engineering` running on port 6379 (Redis 8) +- `agent-memory-server` running on port 8088 + +### Step 3: Install Python Dependencies + +```bash +# Create a virtual environment +python -m venv venv +source venv/bin/activate # On Windows: venv\Scripts\activate + +# Install notebook dependencies (Jupyter, python-dotenv, etc.) +pip install -r requirements.txt + +# Install the reference agent package +cd reference-agent +pip install -e . +cd .. +``` + +### Step 4: Run the Notebooks + +```bash +# Start Jupyter from the context-engineering directory +jupyter notebook notebooks/ + +# Open any notebook and run the cells +``` + +The notebooks will automatically load your `.env` file using `python-dotenv`, so your `OPENAI_API_KEY` will be available. + +## Verifying Your Setup + +### Check Redis +```bash +# Test Redis connection +docker exec redis-context-engineering redis-cli ping +# Should return: PONG +``` + +### Check Agent Memory Server +```bash +# Test health endpoint +curl http://localhost:8088/v1/health +# Should return: {"now":} + +# Test that it can connect to Redis and has your API key +curl http://localhost:8088/api/v1/namespaces +# Should return a list of namespaces (may be empty initially) +``` + +### Check Python Environment +```bash +# Verify the reference agent package is installed +python -c "import redis_context_course; print('โœ… Package installed')" + +# Verify OpenAI key is set +python -c "import os; print('โœ… OpenAI key set' if os.getenv('OPENAI_API_KEY') else 'โŒ OpenAI key not set')" +``` + +## Troubleshooting + +### "OPENAI_API_KEY not found" + +**In Notebooks:** The notebooks will prompt you for your API key if it's not set. However, it's better to set it in the `.env` file so you don't have to enter it repeatedly. + +**In Docker:** Make sure: +1. Your `.env` file exists and contains `OPENAI_API_KEY=your-key` +2. You've restarted the services: `docker-compose down && docker-compose up -d` +3. Check the logs: `docker-compose logs agent-memory-server` + +### "Connection refused" to Agent Memory Server + +Make sure the services are running: +```bash +docker-compose ps +``` + +If they're not running, start them: +```bash +docker-compose up -d +``` + +Check the logs for errors: +```bash +docker-compose logs agent-memory-server +``` + +### "Connection refused" to Redis + +Make sure Redis is running: +```bash +docker-compose ps redis +``` + +Test the connection: +```bash +docker exec redis-context-engineering redis-cli ping +``` + +### Port Already in Use + +If you get errors about ports already in use (6379 or 8088), you can either: + +1. Stop the conflicting service +2. Change the ports in `docker-compose.yml`: + ```yaml + ports: + - "6380:6379" # Use 6380 instead of 6379 + ``` + Then update `REDIS_URL` or `AGENT_MEMORY_URL` in your `.env` file accordingly. + +## Stopping Services + +```bash +# Stop services but keep data +docker-compose stop + +# Stop and remove services (keeps volumes/data) +docker-compose down + +# Stop and remove everything including data +docker-compose down -v +``` + +## Alternative: Using Existing Redis or Cloud Redis + +If you already have Redis running or want to use Redis Cloud: + +1. Update `REDIS_URL` in your `.env` file: + ```bash + REDIS_URL=redis://default:password@your-redis-cloud-url:port + ``` + +2. You still need to run the Agent Memory Server locally: + ```bash + docker-compose up -d agent-memory-server + ``` + +## Next Steps + +Once setup is complete: + +1. Start with **Section 1** notebooks to understand core concepts +2. Work through **Section 2** to learn system context setup +3. Complete **Section 3** to master memory management (requires Agent Memory Server) +4. Explore **Section 4** for advanced optimization techniques + +## Getting Help + +- Check the main [README.md](README.md) for course structure and learning path +- Review [COURSE_SUMMARY.md](COURSE_SUMMARY.md) for an overview of all topics +- Open an issue if you encounter problems with the setup + diff --git a/python-recipes/context-engineering/docker-compose.yml b/python-recipes/context-engineering/docker-compose.yml new file mode 100644 index 00000000..8cf1cf0c --- /dev/null +++ b/python-recipes/context-engineering/docker-compose.yml @@ -0,0 +1,39 @@ +services: + redis: + image: redis:8.2.2 + container_name: redis-context-engineering + ports: + - "6379:6379" + environment: + - REDIS_ARGS=--save 60 1 --loglevel warning + volumes: + - redis-data:/data + healthcheck: + test: ["CMD", "redis-cli", "ping"] + interval: 5s + timeout: 3s + retries: 5 + + agent-memory-server: + image: ghcr.io/redis/agent-memory-server:0.12.3 + container_name: agent-memory-server + command: ["agent-memory", "api", "--host", "0.0.0.0", "--port", "8000", "--no-worker"] + ports: + - "8088:8000" # Host port changed to avoid conflicts + environment: + - REDIS_URL=redis://redis:6379 + - OPENAI_API_KEY=${OPENAI_API_KEY} + - LOG_LEVEL=INFO + depends_on: + redis: + condition: service_healthy + healthcheck: + test: ["CMD", "curl", "-f", "http://localhost:8000/v1/health"] + interval: 10s + timeout: 5s + retries: 5 + start_period: 30s + +volumes: + redis-data: + diff --git a/python-recipes/context-engineering/notebooks/README.md b/python-recipes/context-engineering/notebooks/README.md new file mode 100644 index 00000000..5f471eff --- /dev/null +++ b/python-recipes/context-engineering/notebooks/README.md @@ -0,0 +1,639 @@ +# Context Engineering Course - Notebooks + +**Hands-on Jupyter notebooks for learning production-ready context engineering.** + +> ๐Ÿ“š **Main Course Documentation**: See **[../README.md](../README.md)** for complete course overview, setup instructions, and syllabus. +> +> ๐Ÿ“– **Course Syllabus**: See **[../COURSE_SUMMARY.md](../COURSE_SUMMARY.md)** for detailed learning outcomes and course structure. + +--- + +## ๐Ÿ“– About These Notebooks + +This directory contains the hands-on Jupyter notebooks for the Context Engineering course. The notebooks are organized into 5 sections that progressively build your skills from fundamentals to production deployment. + +### Quick Links +- **[Course Overview & Setup](../README.md)** - Start here for setup and course introduction +- **[Course Syllabus](../COURSE_SUMMARY.md)** - Complete syllabus with learning outcomes +- **[Setup Guide](SETUP_GUIDE.md)** - Detailed setup instructions and troubleshooting +- **[Reference Agent Usage](REFERENCE_AGENT_USAGE_ANALYSIS.md)** - Component usage analysis + +--- + +## ๐Ÿš€ Quick Start + +**Already set up?** Jump right in: + +```bash +# Start Jupyter from the context-engineering directory +cd python-recipes/context-engineering +jupyter notebook notebooks_v2/ + +# Open: section-1-context-engineering-foundations/01_what_is_context_engineering.ipynb +``` + +**Need to set up?** Follow the [5-minute quick start](../README.md#-quick-start-5-minutes) in the main README. + +**Having issues?** Check the [Setup Guide](SETUP_GUIDE.md) for detailed instructions and troubleshooting. + +--- + +## ๐Ÿ“š Notebook Sections Overview + +### Learning Journey + +``` +Section 1: Foundations โ†’ Section 2: RAG โ†’ Section 3: Memory โ†’ Section 4: Tools โ†’ Section 5: Production + โ†“ โ†“ โ†“ โ†“ โ†“ +Basic Concepts โ†’ RAG Agent โ†’ Memory Agent โ†’ Multi-Tool Agent โ†’ Production Agent +(2-3 hrs) (3-4 hrs) (4-5 hrs) (5-6 hrs) (4-5 hrs) +``` + +**๐Ÿ† End Result**: A complete, production-ready AI agent that can handle thousands of users with sophisticated memory, intelligent tool routing, and optimized performance. + +> ๐Ÿ’ก **For detailed learning outcomes and syllabus**, see [../COURSE_SUMMARY.md](../COURSE_SUMMARY.md) + +## โœจ What Makes This Approach Unique + +### 1. ๐Ÿ“ˆ Progressive Complexity +- **Same agent evolves** through all sections - see your work compound +- **Each section builds directly** on the previous one +- **Clear progression** from educational concepts to production deployment +- **Investment in learning** pays off across all sections + +### 2. ๐Ÿ—๏ธ Professional Foundation +- **Reference-agent integration** - Built on production-ready architecture +- **Type-safe Pydantic models** throughout all sections +- **Industry best practices** from day one +- **Real-world patterns** that work in production systems + +### 3. ๐Ÿ› ๏ธ Hands-On Learning +- **Working code** in every notebook cell +- **Jupyter-friendly** interactive development +- **Immediate results** and feedback +- **Experimentation encouraged** - modify and test variations + +### 4. ๐ŸŒ Real-World Relevance +- **Production patterns** used in enterprise AI systems +- **Scalable architecture** ready for deployment +- **Portfolio-worthy** final project +- **Career-relevant** skills and experience + +## ๐Ÿ“š Complete Course Syllabus + +### ๐ŸŽฏ **Section 1: Foundations** +**Goal**: Master context engineering basics and the four context types +**Duration**: ~2-3 hours +**Prerequisites**: Basic Python knowledge, familiarity with LLMs + +**What You'll Build**: +- Understanding of the four types of context (system, user, retrieved, conversation) +- Foundation patterns for context assembly and management +- Conceptual framework for building context-aware AI systems + +**Key Learning**: +- Context engineering fundamentals and why it matters +- The four context types and when to use each +- Foundation for building sophisticated AI systems + +**Notebooks**: +1. `01_what_is_context_engineering.ipynb` - Core concepts and why context engineering matters +2. `02_context_assembly_strategies.ipynb` - Hands-on exploration of each context type + +**Reference Agent Components Used**: None (conceptual foundation) + +### ๐Ÿค– **Section 2: Retrieved Context Engineering** +**Goal**: Build a complete RAG system with vector search and retrieval +**Duration**: ~3-4 hours +**Prerequisites**: Section 1 completed, Redis running, OpenAI API key + +**What You'll Build**: +- Complete RAG pipeline (Retrieval + Augmentation + Generation) +- Vector-based course search using Redis and RedisVL +- Context assembly from multiple information sources +- Course recommendation system with semantic search + +**Key Learning**: +- RAG architecture and implementation patterns +- Vector similarity search for intelligent retrieval +- Redis as a vector database for AI applications +- Course data generation and ingestion workflows + +**Notebooks**: +1. `01_rag_retrieved_context_in_practice.ipynb` - Complete RAG system with Redis University Course Advisor + +**Reference Agent Components Used**: +- `CourseGenerator` - Generate sample course data +- `CourseIngestionPipeline` - Ingest courses into Redis +- `CourseManager` - Course search and recommendations +- `redis_config` - Redis configuration and connection + +### ๐Ÿง  **Section 3: Memory Systems for Context Engineering** +**Goal**: Master memory management with Agent Memory Server +**Duration**: ~4-5 hours +**Prerequisites**: Section 2 completed, Agent Memory Server running + +**What You'll Build**: +- Dual memory system (working memory + long-term memory) +- Memory extraction strategies (discrete, summary, preferences) +- Memory-enhanced RAG with semantic retrieval +- Working memory compression for long conversations + +**Key Learning**: +- Working vs long-term memory patterns and use cases +- Memory extraction strategies and when to use each +- Agent Memory Server integration and configuration +- Memory compression strategies (truncation, priority-based, summarization) +- Session management and cross-session persistence + +**Notebooks**: +1. `01_working_and_longterm_memory.ipynb` - Memory basics and Agent Memory Server integration +2. `02_combining_memory_with_retrieved_context.ipynb` - Memory extraction strategies in practice +3. `03_manage_long_conversations_with_compression_strategies.ipynb` - Compression strategies for long conversations + +**Reference Agent Components Used**: +- `redis_config` - Redis configuration +- `CourseManager` - Course management +- `Course`, `StudentProfile` - Data models +- `DifficultyLevel`, `CourseFormat`, `Semester` - Enums + +--- + +### ๐Ÿ”ง **Section 4: Tool Selection & LangGraph Agents** +**Goal**: Build production agents with LangGraph and intelligent tool selection +**Duration**: ~5-6 hours +**Prerequisites**: Section 3 completed, understanding of LangGraph basics + +**What You'll Build**: +- LangGraph-based stateful agent workflows +- Course advisor agent with multiple tools +- Memory-integrated agent with Agent Memory Server +- Working memory compression for long conversations + +**Key Learning**: +- LangGraph StateGraph and agent workflows +- Tool creation and integration patterns +- Agent Memory Server integration with LangGraph +- Working memory compression strategies in production agents +- State management and conversation flow control + +**Notebooks**: +1. `01_tools_and_langgraph_fundamentals.ipynb` - LangGraph basics and tool integration +2. `02_redis_university_course_advisor_agent.ipynb` - Complete course advisor agent +3. `02_redis_university_course_advisor_agent_with_compression.ipynb` - Agent with memory compression + +**Reference Agent Components Used**: +- `CourseManager` - Course search and recommendations +- `StudentProfile`, `DifficultyLevel`, `CourseFormat` - Data models + +**Note**: This section demonstrates building custom agents rather than using the reference `ClassAgent` directly, showing students how to build production agents from scratch. + +--- + +### โšก **Section 5: Optimization & Production** +**Goal**: Optimize agents for production deployment +**Duration**: ~4-5 hours +**Prerequisites**: Section 4 completed + +**What You'll Build**: +- Performance measurement and optimization techniques +- Semantic tool selection at scale +- Production readiness checklist and quality assurance +- Cost optimization and monitoring + +**Key Learning**: +- Performance profiling and optimization +- Semantic tool selection with embeddings +- Production deployment best practices +- Quality assurance and testing strategies +- Cost management and token optimization + +**Notebooks**: +1. `01_measuring_optimizing_performance.ipynb` - Performance measurement and optimization +2. `02_scaling_semantic_tool_selection.ipynb` - Advanced tool selection strategies +3. `03_production_readiness_quality_assurance.ipynb` - Production deployment guide + +**Reference Agent Components Used**: +- Optimization helpers (to be demonstrated) +- Production patterns from reference agent + +**Status**: โณ Section 5 notebooks are in development + +--- + +## ๐Ÿ“ฆ Reference Agent Package + +The course uses the `redis-context-course` reference agent package, which provides production-ready components for building context-aware AI agents. + +### What's in the Reference Agent? + +**Core Components** (used in notebooks): +- `CourseManager` - Course search, recommendations, and catalog management +- `redis_config` - Redis configuration and connection management +- Data models: `Course`, `StudentProfile`, `DifficultyLevel`, `CourseFormat`, `Semester` +- Scripts: `CourseGenerator`, `CourseIngestionPipeline` + +**Advanced Components** (for production use): +- `ClassAgent` - Complete LangGraph-based agent implementation +- `AugmentedClassAgent` - Enhanced agent with additional features +- Tool creators: `create_course_tools`, `create_memory_tools` +- Optimization helpers: `count_tokens`, `estimate_token_budget`, `hybrid_retrieval`, etc. + +### How the Course Uses the Reference Agent + +**Educational Approach**: The notebooks demonstrate **building agents from scratch** using reference agent components as building blocks, rather than using the pre-built `ClassAgent` directly. + +**Why?** This approach helps you: +- โœ… Understand how agents work internally +- โœ… Learn to build custom agents for your use cases +- โœ… See production patterns in action +- โœ… Gain hands-on experience with LangGraph and memory systems + +**Component Usage by Section**: +- **Section 1**: None (conceptual foundation) +- **Section 2**: CourseManager, redis_config, data generation scripts +- **Section 3**: CourseManager, redis_config, data models +- **Section 4**: CourseManager, data models +- **Section 5**: Optimization helpers (in development) + +For a detailed analysis of reference agent usage, see [REFERENCE_AGENT_USAGE_ANALYSIS.md](REFERENCE_AGENT_USAGE_ANALYSIS.md). + +For reference agent documentation, see [../reference-agent/README.md](../reference-agent/README.md). + +--- + +## ๐Ÿ—๏ธ Technical Architecture Evolution + +### **Agent Architecture Progression** + +#### **Section 2: Basic RAG** +```python +class SimpleRAGAgent: + - CourseManager integration + - Vector similarity search + - Context assembly + - Basic conversation history +``` + +#### **Section 3: Memory-Enhanced** +```python +class MemoryEnhancedAgent: + - Redis-based persistence + - Working vs long-term memory + - Memory consolidation + - Cross-session continuity +``` + +#### **Section 4: Multi-Tool** +```python +class MultiToolAgent: + - Specialized tool suite + - Semantic tool selection + - Intent classification + - Memory-aware routing +``` + +#### **Section 5: Production-Optimized** +```python +class OptimizedProductionAgent: + - Context optimization + - Performance monitoring + - Caching system + - Cost tracking + - Scalability support +``` + +## ๐ŸŽ“ Learning Outcomes by Section + +### **After Section 1: Foundations** +Students can: +- โœ… Explain the four context types and when to use each +- โœ… Understand context engineering principles and best practices +- โœ… Design context strategies for AI applications +- โœ… Identify context engineering patterns in production systems + +### **After Section 2: Retrieved Context Engineering** +Students can: +- โœ… Build complete RAG systems with Redis and RedisVL +- โœ… Implement vector similarity search for intelligent retrieval +- โœ… Generate and ingest course data into Redis +- โœ… Create course recommendation systems with semantic search + +### **After Section 3: Memory Systems for Context Engineering** +Students can: +- โœ… Integrate Agent Memory Server with AI agents +- โœ… Implement dual memory systems (working + long-term) +- โœ… Apply memory extraction strategies (discrete, summary, preferences) +- โœ… Implement memory compression for long conversations +- โœ… Design cross-session conversation continuity + +### **After Section 4: Tool Selection & LangGraph** +Students can: +- โœ… Build stateful agents with LangGraph StateGraph +- โœ… Create and integrate multiple tools in agents +- โœ… Implement memory-integrated agents with Agent Memory Server +- โœ… Apply working memory compression in production agents +- โœ… Design conversation flow control and state management + +### **After Section 5: Optimization & Production** +Students can: +- โœ… Measure and optimize agent performance +- โœ… Implement semantic tool selection at scale +- โœ… Apply production deployment best practices +- โœ… Build quality assurance and testing strategies +- โœ… Optimize costs and token usage + +### **Complete Program Outcomes** +Students will have: +- ๐Ÿ† **Production-ready AI agent** with memory, tools, and optimization +- ๐Ÿ“ˆ **Hands-on experience** with Redis, LangGraph, and Agent Memory Server +- ๐Ÿ”ง **Real-world skills** applicable to enterprise AI systems +- ๐Ÿ’ผ **Portfolio project** demonstrating context engineering mastery + +--- + +## ๐Ÿ“‹ System Requirements + +### Required +- **Python 3.10+** (Python 3.8+ may work but 3.10+ recommended) +- **Docker Desktop** (for Redis and Agent Memory Server) +- **OpenAI API Key** ([get one here](https://platform.openai.com/api-keys)) +- **8GB RAM minimum** (16GB recommended for Section 5) +- **5GB disk space** for dependencies and data + +### Optional +- **Jupyter Lab** (alternative to Jupyter Notebook) +- **VS Code** with Jupyter extension +- **Redis Insight** for visualizing Redis data + +--- + +## ๐Ÿ› ๏ธ Detailed Setup Instructions + +For complete setup instructions including troubleshooting, see [SETUP_GUIDE.md](SETUP_GUIDE.md). + +### Quick Setup Summary + +1. **Set environment variables** (`.env` file with OpenAI API key) +2. **Start services** (`docker-compose up -d`) +3. **Install dependencies** (`pip install -r requirements.txt`) +4. **Install reference agent** (`cd reference-agent && pip install -e .`) +5. **Start Jupyter** (`jupyter notebook notebooks_v2/`) + +### Verification + +After setup, verify everything works: + +```bash +# Check Redis +docker exec redis-context-engineering redis-cli ping # Should return: PONG + +# Check Agent Memory Server +curl http://localhost:8088/v1/health # Should return: {"now":} + +# Check Python packages +python -c "import redis_context_course; print('โœ… Reference agent installed')" +``` + +--- + +## ๐Ÿ“– Recommended Learning Path + +### For Beginners +1. **Start with Section 1** - Build conceptual foundation +2. **Complete Section 2** - Get hands-on with RAG +3. **Work through Section 3** - Master memory systems +4. **Build in Section 4** - Create production agents +5. **Optimize in Section 5** - Deploy to production + +### For Experienced Developers +- **Skip to Section 2** if familiar with context engineering basics +- **Jump to Section 3** if you've built RAG systems before +- **Start at Section 4** if you want to focus on LangGraph and agents + +### Time Commitment +- **Intensive**: 1 week (full-time, 8 hours/day) +- **Standard**: 3-4 weeks (part-time, 6-8 hours/week) +- **Relaxed**: 6-8 weeks (casual, 3-4 hours/week) + +### Learning Tips +1. **Start with Section 1** - Build foundational understanding +2. **Progress sequentially** - Each section builds on the previous +3. **Complete all exercises** - Hands-on practice is essential +4. **Experiment freely** - Modify code and test variations +5. **Build your own variations** - Apply patterns to your domain + +--- + +## ๐Ÿ”ง Troubleshooting + +### **Common Issues and Solutions** + +#### **OpenAI API Key Issues** +``` +Error: "OPENAI_API_KEY not found. Please create a .env file..." +``` +**Solutions:** +1. Create `.env` file with `OPENAI_API_KEY=your_key_here` +2. Set environment variable: `export OPENAI_API_KEY=your_key_here` +3. Get your API key from: https://platform.openai.com/api-keys + +#### **Redis Connection Issues** +``` +Error: "Connection refused" or "Redis not available" +``` +**Solutions:** +1. Start Redis: `docker run -d -p 6379:6379 redis/redis-stack` +2. Check Redis URL in `.env`: `REDIS_URL=redis://localhost:6379` +3. Some features may work without Redis (varies by notebook) + +#### **Import Errors** +``` +Error: "No module named 'redis_context_course'" +``` +**Solutions:** +1. Install reference agent: `pip install -e ../../../reference-agent` +2. Check Python path in notebook cells +3. Restart Jupyter kernel + +#### **Notebook JSON Errors** +``` +Error: "NotJSONError" or "Notebook does not appear to be JSON" +``` +**Solutions:** +1. All notebooks are now JSON-valid (fixed in this update) +2. Try refreshing the browser +3. Restart Jupyter server + +### **Getting Help** +- **Check notebook output** - Error messages include troubleshooting tips +- **Environment validation** - Notebooks validate setup and provide clear guidance +- **Standard tools** - Uses industry-standard `python-dotenv` for configuration + +## ๐ŸŒ Real-World Applications + +The patterns and techniques learned apply directly to: + +### **Enterprise AI Systems** +- **Customer service chatbots** with sophisticated memory and tool routing +- **Technical support agents** with intelligent knowledge retrieval +- **Sales assistants** with personalized recommendations and context +- **Knowledge management systems** with optimized context assembly + +### **Educational Technology** +- **Personalized learning assistants** that remember student progress +- **Academic advising systems** with comprehensive course knowledge +- **Intelligent tutoring systems** with adaptive responses +- **Student support chatbots** with institutional knowledge + +### **Production AI Services** +- **Multi-tenant SaaS AI platforms** with user isolation and scaling +- **API-based AI services** with cost optimization and monitoring +- **Scalable conversation systems** with memory persistence +- **Enterprise AI deployments** with comprehensive analytics + +## ๐Ÿ“Š Expected Results and Benefits + +### **Measurable Improvements** +- **50-70% token reduction** through intelligent context optimization +- **Semantic tool selection** replacing brittle keyword matching +- **Cross-session memory** enabling natural conversation continuity +- **Production scalability** supporting thousands of concurrent users + +### **Cost Optimization** +- **Significant API cost savings** through context compression +- **Efficient caching** reducing redundant LLM calls +- **Smart token budgeting** preventing cost overruns +- **Performance monitoring** enabling continuous optimization + +### **Professional Skills** +- **Production-ready AI development** with industry best practices +- **Scalable system architecture** for enterprise deployment +- **Performance optimization** and cost management expertise +- **Advanced context engineering** techniques for complex applications + +## ๐Ÿ“ Project Structure + +``` +enhanced-integration/ +โ”œโ”€โ”€ README.md # This comprehensive guide +โ”œโ”€โ”€ PROGRESSIVE_PROJECT_PLAN.md # Detailed project planning +โ”œโ”€โ”€ PROGRESSIVE_PROJECT_COMPLETE.md # Project completion summary +โ”œโ”€โ”€ setup.py # One-command environment setup +โ”œโ”€โ”€ setup.sh # Alternative shell setup script +โ”œโ”€โ”€ .env.example # Environment configuration template +โ”‚ +โ”œโ”€โ”€ section-1-context-engineering-foundations/ # Foundation concepts +โ”‚ โ”œโ”€โ”€ 01_what_is_context_engineering.ipynb +โ”‚ โ”œโ”€โ”€ 02_context_assembly_strategies.ipynb +โ”‚ โ””โ”€โ”€ README.md +โ”‚ +โ”œโ”€โ”€ section-2-retrieved-context-engineering/ # Complete RAG system +โ”‚ โ”œโ”€โ”€ 01_building_your_rag_agent.ipynb +โ”‚ โ””โ”€โ”€ README.md +โ”‚ +โ”œโ”€โ”€ section-4-tool-selection/ # Multi-tool intelligence +โ”‚ โ”œโ”€โ”€ 01_building_multi_tool_intelligence.ipynb +โ”‚ โ””โ”€โ”€ README.md +โ”‚ +โ”œโ”€โ”€ section-5-context-optimization/ # Production optimization +โ”‚ โ”œโ”€โ”€ 01_optimizing_for_production.ipynb +โ”‚ โ””โ”€โ”€ README.md +โ”‚ +โ””โ”€โ”€ old/ # Archived previous versions + โ”œโ”€โ”€ README.md # Archive explanation + โ””โ”€โ”€ [previous notebook versions] # Reference materials +``` + +## ๐ŸŽฏ Why This Progressive Approach Works + +### **1. Compound Learning** +- **Same agent evolves** - Students see their work improve continuously +- **Skills build on each other** - Each section leverages previous learning +- **Investment pays off** - Time spent early benefits all later sections +- **Natural progression** - Logical flow from simple to sophisticated + +### **2. Production Readiness** +- **Real architecture** - Built on production-ready reference-agent +- **Industry patterns** - Techniques used in enterprise systems +- **Scalable design** - Architecture that handles real-world complexity +- **Professional quality** - Code and patterns ready for production use + +### **3. Hands-On Mastery** +- **Working code** - Every concept demonstrated with runnable examples +- **Immediate feedback** - See results of every change instantly +- **Experimentation friendly** - Easy to modify and test variations +- **Problem-solving focus** - Learn by solving real challenges + +### **4. Measurable Impact** +- **Quantified improvements** - See exact performance gains +- **Cost optimization** - Understand business impact of optimizations +- **Performance metrics** - Track and optimize system behavior +- **Production monitoring** - Real-world performance indicators + +## ๐Ÿ† Success Metrics + +By completing this progressive learning path, you will have: + +### **Technical Achievements** +- โœ… Built 5 increasingly sophisticated AI agents +- โœ… Implemented production-ready architecture patterns +- โœ… Mastered context engineering best practices +- โœ… Created scalable, cost-effective AI systems + +### **Professional Skills** +- โœ… Production AI development experience +- โœ… System optimization and performance tuning +- โœ… Cost management and efficiency optimization +- โœ… Enterprise-grade monitoring and analytics + +### **Portfolio Project** +- โœ… Complete Redis University Course Advisor +- โœ… Production-ready codebase with comprehensive features +- โœ… Demonstrated scalability and optimization +- โœ… Professional documentation and testing + +**๐ŸŽ‰ Ready to transform your context engineering skills? Start your journey today!** + +--- + +## ๐Ÿ“š Additional Resources + +### Documentation +- **[SETUP_GUIDE.md](SETUP_GUIDE.md)** - Detailed setup instructions and troubleshooting +- **[REFERENCE_AGENT_USAGE_ANALYSIS.md](REFERENCE_AGENT_USAGE_ANALYSIS.md)** - Analysis of reference agent usage across notebooks +- **[Reference Agent README](../reference-agent/README.md)** - Complete reference agent documentation +- **[Main Course README](../README.md)** - Top-level context engineering documentation + +### External Resources +- **[Redis Documentation](https://redis.io/docs/)** - Redis official documentation +- **[LangChain Documentation](https://python.langchain.com/)** - LangChain framework docs +- **[LangGraph Documentation](https://langchain-ai.github.io/langgraph/)** - LangGraph stateful agents +- **[Agent Memory Server](https://github.com/redis/agent-memory-server)** - Memory management system +- **[OpenAI API Reference](https://platform.openai.com/docs/api-reference)** - OpenAI API documentation + +### Community +- **[Redis Discord](https://discord.gg/redis)** - Join the Redis community +- **[GitHub Issues](https://github.com/redis-developer/redis-ai-resources/issues)** - Report issues or ask questions +- **[Redis AI Resources](https://github.com/redis-developer/redis-ai-resources)** - More AI examples and recipes + +--- + +## ๐Ÿ“ Course Metadata + +**Version**: 2.0 +**Last Updated**: November 2025 +**Maintainer**: Redis AI Resources Team +**License**: MIT + +**Technologies**: +- Python 3.10+ +- Redis 8.0+ +- LangChain 0.2+ +- LangGraph 0.2+ +- Agent Memory Server 0.12.3+ +- OpenAI GPT-4 + +--- + +**This progressive learning path provides the most comprehensive, hands-on education in context engineering available - taking you from fundamentals to production-ready expertise through a single, evolving project that demonstrates real-world impact.** diff --git a/python-recipes/context-engineering/notebooks/SETUP_GUIDE.md b/python-recipes/context-engineering/notebooks/SETUP_GUIDE.md new file mode 100644 index 00000000..bd53e360 --- /dev/null +++ b/python-recipes/context-engineering/notebooks/SETUP_GUIDE.md @@ -0,0 +1,173 @@ +# ๐Ÿš€ Setup Guide for Context Engineering Notebooks + +This guide helps you set up all required services for the Context Engineering course notebooks. + +## ๐Ÿ“‹ Prerequisites + +Before running any notebooks, you need: + +1. **Docker Desktop** - For Redis and Agent Memory Server +2. **Python 3.8+** - For running notebooks +3. **OpenAI API Key** - For LLM functionality + +## โšก Quick Setup (Recommended) + +### Option 1: Automated Setup Script (Bash) + +```bash +# Navigate to notebooks directory +cd python-recipes/context-engineering/notebooks_v2 + +# Run the setup script +./setup_memory_server.sh +``` + +This script will: +- โœ… Check Docker is running +- โœ… Start Redis if needed +- โœ… Start Agent Memory Server +- โœ… Verify all connections work + +### Option 2: Python Setup Script + +```bash +# Navigate to notebooks directory +cd python-recipes/context-engineering/notebooks_v2 + +# Run Python setup +python setup_memory_server.py +``` + +## ๐Ÿ”ง Manual Setup + +If you prefer to set up services manually: + +### 1. Environment Variables + +Create a `.env` file in the `reference-agent/` directory: + +```bash +# Navigate to reference-agent directory +cd python-recipes/context-engineering/reference-agent + +# Create .env file +cat > .env << EOF +OPENAI_API_KEY=your_openai_api_key_here +REDIS_URL=redis://localhost:6379 +AGENT_MEMORY_URL=http://localhost:8088 +OPENAI_MODEL=gpt-4o +EOF +``` + +### 2. Start Redis + +```bash +docker run -d --name redis-stack-server -p 6379:6379 redis/redis-stack-server:latest +``` + +### 3. Start Agent Memory Server + +```bash +docker run -d --name agent-memory-server \ + -p 8088:8000 \ + -e REDIS_URL=redis://host.docker.internal:6379 \ + -e OPENAI_API_KEY="your_openai_api_key_here" \ + ghcr.io/redis/agent-memory-server:0.12.3 +``` + +## โœ… Verify Setup + +### Quick Check (Recommended) + +```bash +# Navigate to notebooks_v2 directory +cd python-recipes/context-engineering/notebooks_v2 + +# Run setup checker +./check_setup.sh +``` + +This will check all services and show you exactly what's working and what needs attention. + +### Manual Verification + +If you prefer to check manually: + +```bash +# Check Redis +redis-cli ping +# Should return: PONG + +# Check Agent Memory Server +curl http://localhost:8088/v1/health +# Should return: {"status":"ok"} + +# Check Docker containers +docker ps +# Should show both redis-stack-server and agent-memory-server +``` + +## ๐Ÿšจ Troubleshooting + +### Redis Connection Issues + +If you see Redis connection errors: + +```bash +# Stop and restart Agent Memory Server +docker stop agent-memory-server +docker rm agent-memory-server + +# Restart with correct Redis URL +docker run -d --name agent-memory-server \ + -p 8088:8000 \ + -e REDIS_URL=redis://host.docker.internal:6379 \ + -e OPENAI_API_KEY="your_openai_api_key_here" \ + ghcr.io/redis/agent-memory-server:0.12.3 +``` + +### Port Conflicts + +If ports 6379 or 8088 are in use: + +```bash +# Check what's using the ports +lsof -i :6379 +lsof -i :8088 + +# Stop conflicting services or use different ports +``` + +### Docker Issues + +If Docker commands fail: + +1. Make sure Docker Desktop is running +2. Check Docker has enough resources allocated +3. Try restarting Docker Desktop + +## ๐Ÿ“š Next Steps + +Once setup is complete: + +1. **Start with Section 1** if you're new to context engineering +2. **Jump to Section 4** if you want to learn about memory tools and agents +3. **Check the README** in each section for specific requirements + +## ๐Ÿ”— Section-Specific Requirements + +### Section 3 & 4: Memory Systems & Agents +- โœ… Redis (for vector storage) +- โœ… Agent Memory Server (for memory management) +- โœ… OpenAI API key + +### Section 2: RAG Foundations +- โœ… Redis (for vector storage) +- โœ… OpenAI API key + +### Section 1: Context Foundations +- โœ… OpenAI API key only + +--- + +**Need help?** Check the troubleshooting section or review the setup scripts for detailed error handling. diff --git a/python-recipes/context-engineering/notebooks/section-1-context-engineering-foundations/01_introduction_context_engineering_old.ipynb b/python-recipes/context-engineering/notebooks/section-1-context-engineering-foundations/01_introduction_context_engineering_old.ipynb new file mode 100644 index 00000000..8e424bbb --- /dev/null +++ b/python-recipes/context-engineering/notebooks/section-1-context-engineering-foundations/01_introduction_context_engineering_old.ipynb @@ -0,0 +1,529 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "![Redis](https://redis.io/wp-content/uploads/2024/04/Logotype.svg?auto=webp&quality=85,75&width=120)\n", + "\n", + "# What is Context Engineering?\n", + "\n", + "## Introduction\n", + "\n", + "**Context Engineering** is the discipline of designing, implementing, and optimizing context management systems for AI agents and applications. It's the practice of ensuring that AI systems have the right information, at the right time, in the right format to make intelligent decisions and provide relevant responses.\n", + "\n", + "Think of context engineering as the \"memory and awareness system\" for AI agents - it's what allows them to:\n", + "- Remember past conversations and experiences\n", + "- Understand their role and capabilities\n", + "- Access relevant information from large knowledge bases\n", + "- Maintain coherent, personalized interactions over time\n", + "\n", + "## Why Context Engineering Matters\n", + "\n", + "Without proper context engineering, AI agents are like people with severe amnesia - they can't remember what happened five minutes ago, don't know who they're talking to, and can't learn from experience. This leads to:\n", + "\n", + "**Poor User Experience**\n", + "- Repetitive conversations\n", + "- Lack of personalization\n", + "- Inconsistent responses\n", + "\n", + "**Inefficient Operations**\n", + "- Redundant processing\n", + "- Inability to build on previous work\n", + "- Lost context between sessions\n", + "\n", + "**Limited Capabilities**\n", + "- Can't handle complex, multi-step tasks\n", + "- No learning or adaptation\n", + "- Poor integration with existing systems\n", + "\n", + "## Core Components of Context Engineering\n", + "\n", + "Context engineering involves several key components working together:\n", + "\n", + "### 1. System Context\n", + "What the AI should know about itself and its environment:\n", + "- Role and responsibilities\n", + "- Available tools and capabilities\n", + "- Operating constraints and guidelines\n", + "- Domain-specific knowledge\n", + "\n", + "### 2. Memory Management\n", + "How information is stored, retrieved, and maintained:\n", + "- **Working memory**: Persistent storage focused on the current task, including conversation context and task-related data\n", + "- **Long-term memory**: Knowledge learned across sessions, such as user preferences and important facts\n", + "\n", + "### 3. Context Retrieval\n", + "How relevant information is found and surfaced:\n", + "- Semantic search and similarity matching\n", + "- Relevance ranking and filtering\n", + "- Context window management\n", + "\n", + "### 4. Context Integration\n", + "How different types of context are combined:\n", + "- Merging multiple information sources\n", + "- Resolving conflicts and inconsistencies\n", + "- Prioritizing information by importance\n", + "\n", + "## Real-World Example: University Class Agent\n", + "\n", + "Let's explore context engineering through a practical example - a university class recommendation agent. This agent helps students find courses, plan their academic journey, and provides personalized recommendations.\n", + "\n", + "### Without Context Engineering\n", + "```\n", + "Student: \"I'm interested in programming courses\"\n", + "Agent: \"Here are all programming courses: CS101, CS201, CS301...\"\n", + "\n", + "Student: \"I prefer online courses\"\n", + "Agent: \"Here are all programming courses: CS101, CS201, CS301...\"\n", + "\n", + "Student: \"What about my major requirements?\"\n", + "Agent: \"I don't know your major. Here are all programming courses...\"\n", + "```\n", + "\n", + "### With Context Engineering\n", + "```\n", + "Student: \"I'm interested in programming courses\"\n", + "Agent: \"Great! I can help you find programming courses. Let me search our catalog...\n", + " Based on your Computer Science major and beginner level, I recommend:\n", + " - CS101: Intro to Programming (online, matches your preference)\n", + " - CS102: Data Structures (hybrid option available)\"\n", + "\n", + "Student: \"Tell me more about CS101\"\n", + "Agent: \"CS101 is perfect for you! It's:\n", + " - Online format (your preference)\n", + " - Beginner-friendly\n", + " - Required for your CS major\n", + " - No prerequisites needed\n", + " - Taught by Prof. Smith (highly rated)\"\n", + "```\n", + "\n", + "## ๐Ÿ”ฌ The Context Rot Problem\n", + "\n", + "Recent research from Chroma (July 2025) reveals a critical challenge in working with LLMs: **context rot** - the phenomenon where LLM performance degrades non-uniformly as input context length increases, even on simple tasks.\n", + "\n", + "### Key Research Findings\n", + "\n", + "**1. Non-Uniform Performance Degradation**\n", + "- Models don't process the 10,000th token as reliably as the 100th token\n", + "- Performance drops aren't linear - they accelerate as context grows\n", + "- Even simple tasks like word repetition fail with long context\n", + "\n", + "**2. Needle-Question Similarity Matters**\n", + "- Lower similarity between questions and retrieved information causes faster performance degradation\n", + "- High semantic relevance is critical for maintaining accuracy\n", + "- Generic or loosely related context actively harms performance\n", + "\n", + "**3. Distractors Have Amplified Impact**\n", + "- Similar-but-wrong information (distractors) degrade performance more as context grows\n", + "- The negative impact of irrelevant information is non-linear\n", + "- Filtering out low-relevance content is as important as finding relevant content\n", + "\n", + "**4. Structure Affects Attention**\n", + "- How you organize context affects model performance\n", + "- Counterintuitively, shuffled text sometimes performs better than coherent text\n", + "- Context window position matters - information placement impacts retrieval accuracy\n", + "\n", + "### Why This Matters for Context Engineering\n", + "\n", + "The Context Rot research validates the core principles of this course:\n", + "\n", + "โœ… **Quality Over Quantity**\n", + "More context isn't always better. Adding marginally relevant information can hurt performance more than it helps.\n", + "\n", + "โœ… **Semantic Similarity is Critical**\n", + "High relevance between queries and retrieved context is essential. RAG systems must prioritize precision over recall.\n", + "\n", + "โœ… **Structure Matters**\n", + "How you organize and present context affects LLM attention mechanisms. Context engineering isn't just about *what* information to include, but *how* to structure it.\n", + "\n", + "โœ… **Distractor Removal**\n", + "Filtering out low-relevance information improves performance. Memory systems must be selective about what they store and retrieve.\n", + "\n", + "โœ… **Context Window Management**\n", + "Understanding token limits isn't enough - you must understand how performance degrades within those limits.\n", + "\n", + "**This course teaches you techniques to engineer context effectively and avoid these pitfalls.**\n", + "\n", + "๐Ÿ“š **Read the full paper:** [Context Rot: How Increasing Input Tokens Impacts LLM Performance](https://research.trychroma.com/context-rot)\n", + "\n", + "## Environment Setup\n", + "\n", + "Before we explore context engineering in action, let's set up our environment with the necessary dependencies and connections." + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": { + "execution": { + "iopub.execute_input": "2025-10-30T02:35:54.056071Z", + "iopub.status.busy": "2025-10-30T02:35:54.055902Z", + "iopub.status.idle": "2025-10-30T02:35:54.313194Z", + "shell.execute_reply": "2025-10-30T02:35:54.312619Z" + } + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Setup complete! (Using demo responses - set OPENAI_API_KEY for real API calls)\n" + ] + } + ], + "source": [ + "import os\n", + "from openai import OpenAI\n", + "\n", + "# Initialize OpenAI client (for demonstration - API key needed for actual calls)\n", + "api_key = os.getenv(\"OPENAI_API_KEY\", \"demo-key-for-notebook\")\n", + "client = OpenAI(api_key=api_key) if api_key != \"demo-key-for-notebook\" else None\n", + "\n", + "def ask_agent(messages, system_prompt=\"You are a helpful assistant.\"):\n", + " \"\"\"Simple function to call OpenAI with context\"\"\"\n", + " if client and api_key != \"demo-key-for-notebook\":\n", + " # Real OpenAI API call\n", + " response = client.chat.completions.create(\n", + " model=\"gpt-4o-mini\",\n", + " messages=[\n", + " {\"role\": \"system\", \"content\": system_prompt},\n", + " *messages\n", + " ]\n", + " )\n", + " return response.choices[0].message.content\n", + " else:\n", + " # Demo response for notebook execution\n", + " user_content = messages[0]['content'] if messages else \"general query\"\n", + " if \"Redis course\" in user_content:\n", + " return \"Based on your background and goals, I recommend starting with RU101 (Introduction to Redis) to build a solid foundation, then progressing to RU201 (Redis for Python) which aligns with your programming experience and interests.\"\n", + " elif \"long will that take\" in user_content:\n", + " return \"RU201 (Redis for Python) typically takes 6-8 hours to complete, including hands-on exercises. Given your evening and weekend availability, you could finish it in 2-3 weeks at a comfortable pace.\"\n", + " else:\n", + " return \"I'd be happy to help with your Redis learning journey. Could you tell me more about your background and what you're hoping to achieve?\"\n", + "\n", + "print(\"Setup complete! (Using demo responses - set OPENAI_API_KEY for real API calls)\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Context Engineering in Action\n", + "\n", + "Now let's explore the different types of context our agent manages:" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### 1. System Context Example\n", + "\n", + "System context defines what the agent knows about itself. This is typically provided as a system prompt:" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": { + "execution": { + "iopub.execute_input": "2025-10-30T02:35:54.328583Z", + "iopub.status.busy": "2025-10-30T02:35:54.328477Z", + "iopub.status.idle": "2025-10-30T02:35:54.330693Z", + "shell.execute_reply": "2025-10-30T02:35:54.330218Z" + } + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "System Context Example:\n", + "This system prompt defines the agent's role, responsibilities, and constraints.\n", + "It will be included in every conversation to maintain consistent behavior.\n" + ] + } + ], + "source": [ + "# Example of a system prompt - the agent's instructions and constraints\n", + "system_prompt = \"\"\"\n", + "You are a helpful university class recommendation agent for Redis University.\n", + "Your role is to help students find courses, plan their academic journey, and\n", + "answer questions about the course catalog.\n", + "\n", + "## Your Responsibilities\n", + "\n", + "- Help students discover courses that match their interests and goals\n", + "- Provide accurate information about course content, prerequisites, and schedules\n", + "- Remember student preferences and use them to personalize recommendations\n", + "- Guide students toward courses that align with their major requirements\n", + "\n", + "## Important Constraints\n", + "\n", + "- Only recommend courses that exist in the course catalog\n", + "- Always check prerequisites before recommending a course\n", + "- Respect student preferences for course format (online, in-person, hybrid)\n", + "- Be honest when you don't know something - don't make up course information\n", + "\n", + "## Interaction Guidelines\n", + "\n", + "- Be friendly, encouraging, and supportive\n", + "- Ask clarifying questions when student requests are vague\n", + "- Explain your reasoning when making recommendations\n", + "- Keep responses concise but informative\n", + "- Use the student's name when you know it\n", + "\"\"\"\n", + "\n", + "print(\"System Context Example:\")\n", + "print(\"This system prompt defines the agent's role, responsibilities, and constraints.\")\n", + "print(\"It will be included in every conversation to maintain consistent behavior.\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### 2. User Context Example\n", + "\n", + "User context contains information about the individual user. Let's create a student profile:" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": { + "execution": { + "iopub.execute_input": "2025-10-30T02:35:54.331875Z", + "iopub.status.busy": "2025-10-30T02:35:54.331782Z", + "iopub.status.idle": "2025-10-30T02:35:54.334123Z", + "shell.execute_reply": "2025-10-30T02:35:54.333709Z" + } + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Student Profile Example:\n", + "Name: Sarah Chen\n", + "Major: Computer Science\n", + "Interests: machine learning, data science, web development\n", + "Completed: 3 courses\n", + "Preferences: online, intermediate level\n" + ] + } + ], + "source": [ + "# Create a student profile with preferences and background\n", + "student_profile = {\n", + " \"name\": \"Sarah Chen\",\n", + " \"major\": \"Computer Science\",\n", + " \"year\": \"Junior\",\n", + " \"interests\": [\"machine learning\", \"data science\", \"web development\"],\n", + " \"completed_courses\": [\"CS101\", \"CS201\", \"MATH301\"],\n", + " \"preferred_format\": \"online\",\n", + " \"preferred_difficulty\": \"intermediate\",\n", + " \"learning_style\": \"hands-on projects\",\n", + " \"time_availability\": \"evenings and weekends\"\n", + "}\n", + "\n", + "print(\"Student Profile Example:\")\n", + "print(f\"Name: {student_profile['name']}\")\n", + "print(f\"Major: {student_profile['major']}\")\n", + "print(f\"Interests: {', '.join(student_profile['interests'])}\")\n", + "print(f\"Completed: {len(student_profile['completed_courses'])} courses\")\n", + "print(f\"Preferences: {student_profile['preferred_format']}, {student_profile['preferred_difficulty']} level\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### 3. Context Integration Example\n", + "\n", + "Now let's see how all the context types come together in a complete prompt that would be sent to the LLM:" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": { + "execution": { + "iopub.execute_input": "2025-10-30T02:35:54.335262Z", + "iopub.status.busy": "2025-10-30T02:35:54.335160Z", + "iopub.status.idle": "2025-10-30T02:35:54.337536Z", + "shell.execute_reply": "2025-10-30T02:35:54.337083Z" + } + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Complete Context Assembly Example:\n", + "This shows how system context, user context, and retrieved context\n", + "are combined into a single prompt for the LLM.\n" + ] + } + ], + "source": [ + "# Demonstrate how context is assembled for the LLM\n", + "user_query = \"I'm looking for courses related to machine learning\"\n", + "\n", + "# 1. System context (role and constraints)\n", + "system_context = system_prompt\n", + "\n", + "# 2. User context (student profile)\n", + "student_context = f\"\"\"Student Profile:\n", + "Name: {student_profile['name']}\n", + "Major: {student_profile['major']}\n", + "Interests: {', '.join(student_profile['interests'])}\n", + "Completed Courses: {', '.join(student_profile['completed_courses'])}\n", + "Preferred Format: {student_profile['preferred_format']}\n", + "Preferred Difficulty: {student_profile['preferred_difficulty']}\"\"\"\n", + "\n", + "# 3. Retrieved context (simulated course catalog)\n", + "course_catalog = \"\"\"Available Courses:\n", + "- CS401: Machine Learning Fundamentals (Prerequisites: CS201, MATH301)\n", + "- CS402: Deep Learning (Prerequisites: CS401)\n", + "- CS403: Natural Language Processing (Prerequisites: CS401)\n", + "- CS404: Computer Vision (Prerequisites: CS401)\"\"\"\n", + "\n", + "# 4. Assemble the complete prompt\n", + "complete_prompt = f\"\"\"SYSTEM PROMPT:\n", + "{system_context}\n", + "\n", + "STUDENT PROFILE:\n", + "{student_context}\n", + "\n", + "COURSE CATALOG:\n", + "{course_catalog}\n", + "\n", + "USER QUERY:\n", + "{user_query}\n", + "\n", + "Please provide a helpful response based on the student's profile and query.\"\"\"\n", + "\n", + "print(\"Complete Context Assembly Example:\")\n", + "print(\"This shows how system context, user context, and retrieved context\")\n", + "print(\"are combined into a single prompt for the LLM.\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Key Takeaways\n", + "\n", + "From this introduction to context engineering, we can see several important principles:\n", + "\n", + "### 1. Context is Multi-Dimensional\n", + "- **System context**: What the AI knows about itself\n", + "- **User context**: What the AI knows about the user\n", + "- **Domain context**: What the AI knows about the subject matter\n", + "- **Conversation context**: What has been discussed recently\n", + "- **Historical context**: What has been learned over time\n", + "\n", + "Some of these sources are static, updated only when the agent's code changes, while others may be retrieved dynamically from external sources, such as via APIs or vector search.\n", + "\n", + "### 2. Memory is Essential\n", + "- **Working memory**: Maintains conversation flow and task-related context\n", + "- **Long-term memory**: Enables learning and personalization across sessions\n", + "\n", + "### 3. Context Must Be Actionable\n", + "- Information is only valuable if it can improve responses\n", + "- Context should be prioritized by relevance and importance\n", + "- The system must be able to integrate multiple context sources\n", + "\n", + "### 4. Context Engineering is Iterative\n", + "- Systems improve as they gather more context\n", + "- Context quality affects response quality\n", + "- Feedback loops help refine context management" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## What's Next in Your Journey\n", + "\n", + "You've now learned the fundamentals of context engineering and seen how it transforms AI systems from generic assistants into intelligent, personalized agents. You understand:\n", + "\n", + "- What context engineering is and why it matters\n", + "- The core components: system context, user context, conversation context, and retrieved context\n", + "- How context is assembled and integrated for AI systems\n", + "- The challenges that arise as systems scale\n", + "\n", + "### Your Learning Path Forward\n", + "\n", + "The next notebook will dive deeper into each context type with hands-on examples:\n", + "\n", + "**Next: Context Types Deep Dive**\n", + "- Master each of the four context types individually\n", + "- Build context management systems for each type\n", + "- Measure the impact of context on AI performance\n", + "- Design context strategies for different scenarios\n", + "\n", + "**Then: Advanced Techniques**\n", + "- **RAG Foundations**: Efficient information retrieval\n", + "- **Memory Architecture**: Long-term context management\n", + "- **Semantic Tool Selection**: Intelligent query routing\n", + "- **Context Optimization**: Compression and efficiency\n", + "- **Production Deployment**: Scalable systems\n", + "\n", + "### Try It Yourself\n", + "\n", + "Before moving on, experiment with the concepts we've covered:\n", + "\n", + "1. **Modify the student profile** - Change interests, preferences, or academic history\n", + "2. **Create different system prompts** - Try different roles and constraints\n", + "3. **Think about your own use case** - How would context engineering apply to your domain?\n", + "\n", + "The power of context engineering lies in its ability to make AI systems more intelligent, personalized, and useful. As we'll see in the following notebooks, the technical implementation of these concepts using modern AI tools makes it possible to build sophisticated, context-aware applications.\n", + "\n", + "---\n", + "\n", + "## ๐Ÿ“š Additional Resources\n", + "\n", + "### **Core Concepts**\n", + "- [Prompt Engineering Guide](https://www.promptingguide.ai/) - Comprehensive guide to prompt engineering\n", + "- [OpenAI Prompt Engineering Guide](https://platform.openai.com/docs/guides/prompt-engineering) - Best practices for prompts\n", + "- [Anthropic's Guide to Building Effective Agents](https://www.anthropic.com/research/building-effective-agents) - Agent design patterns\n", + "\n", + "### **Context Management**\n", + "- [LangChain Documentation](https://python.langchain.com/docs/get_started/introduction) - Framework for context-aware applications\n", + "- [Context Window Management](https://help.openai.com/en/articles/4936856-what-are-tokens-and-how-to-count-them) - Understanding token limits\n", + "\n", + "### **Academic Papers**\n", + "- [In-Context Learning Survey](https://arxiv.org/abs/2301.00234) - Research on how LLMs use context\n", + "- [Retrieval-Augmented Generation](https://arxiv.org/abs/2005.11401) - Foundational RAG paper\n", + "\n", + "---\n", + "\n", + "**Continue to: `02_core_concepts.ipynb`**" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.12.6" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} diff --git a/python-recipes/context-engineering/notebooks/section-1-context-engineering-foundations/01_what_is_context_engineering.ipynb b/python-recipes/context-engineering/notebooks/section-1-context-engineering-foundations/01_what_is_context_engineering.ipynb new file mode 100644 index 00000000..8ba98d90 --- /dev/null +++ b/python-recipes/context-engineering/notebooks/section-1-context-engineering-foundations/01_what_is_context_engineering.ipynb @@ -0,0 +1,692 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "![Redis](https://redis.io/wp-content/uploads/2024/04/Logotype.svg?auto=webp&quality=85,75&width=120)\n", + "\n", + "# What is Context Engineering?\n", + "\n", + "## The Problem\n", + "\n", + "Imagine you walk into a doctor's office. The doctor has never met you before, doesn't have access to your medical records, and can't remember anything you said five minutes ago. Every time you visit, it's like the first time all over again.\n", + "\n", + "Sound frustrating? That's what AI agents are like without context engineering.\n", + "\n", + "**Context Engineering** is the discipline of designing, implementing, and optimizing context management systems for AI agents. It's what transforms AI from a stateless question-answering machine into an intelligent assistant that:\n", + "\n", + "- Remembers who you are and what you've discussed\n", + "- Understands its role and capabilities\n", + "- Accesses relevant information from vast knowledge bases\n", + "- Maintains coherent, personalized interactions over time\n", + "\n", + "Think of context engineering as the **\"memory and awareness system\"** for AI agents.\n", + "\n", + "---" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Why Context Engineering Matters\n", + "\n", + "Let's explore this through a real-world example: a university course advisor.\n", + "\n", + "### Scenario: A Student Seeking Advice\n", + "\n", + "**Student Profile:**\n", + "- Name: Sarah Chen\n", + "- Major: Computer Science\n", + "- Year: Junior\n", + "- Completed: Intro to Programming (CS101), Data Structures (CS201), Calculus I\n", + "- Interests: Machine learning, data science\n", + "- Preferences: Prefers online courses, learns best with hands-on projects\n", + "- Goal: Build a career in AI\n", + "\n", + "### Without Context Engineering\n", + "\n", + "Here's what happens when an AI lacks proper context management:\n", + "\n", + "---\n", + "\n", + "**Turn 1:**\n", + "\n", + "*Student:* \"I'm interested in machine learning courses.\"\n", + "\n", + "*Agent:* \"Here are all machine learning courses available:\n", + "- CS301: Introduction to Machine Learning\n", + "- CS401: Advanced Machine Learning\n", + "- CS402: Deep Learning\n", + "- CS403: Natural Language Processing\n", + "- CS404: Computer Vision\n", + "\n", + "All courses meet twice per week on campus.\"\n", + "\n", + "---\n", + "\n", + "**Turn 2:**\n", + "\n", + "*Student:* \"I prefer online courses.\"\n", + "\n", + "*Agent:* \"Here are all machine learning courses available:\n", + "- CS301: Introduction to Machine Learning\n", + "- CS401: Advanced Machine Learning\n", + "...\"\n", + "\n", + "*(Same list, ignoring the preference)*\n", + "\n", + "---\n", + "\n", + "**Turn 3:**\n", + "\n", + "*Student:* \"Can I take CS401?\"\n", + "\n", + "*Agent:* \"CS401 requires CS301 as a prerequisite. Do you meet this requirement?\"\n", + "\n", + "*(Doesn't know what courses the student has completed)*\n", + "\n", + "---\n", + "\n", + "**Turn 4:**\n", + "\n", + "*Student:* \"What about that intro course you mentioned earlier?\"\n", + "\n", + "*Agent:* \"I'm not sure which course you're referring to. Could you provide more details?\"\n", + "\n", + "*(Can't remember its own previous responses)*\n", + "\n", + "---\n", + "\n", + "### The Problems Are Clear\n", + "\n", + "**No Memory:**\n", + "- Can't remember what it just said\n", + "- Can't reference earlier parts of the conversation\n", + "- Forces the user to repeat information\n", + "\n", + "**No Personalization:**\n", + "- Ignores user preferences and background\n", + "- Gives generic responses regardless of the user's situation\n", + "- Can't tailor recommendations\n", + "\n", + "**No Awareness:**\n", + "- Doesn't know the user's history or capabilities\n", + "- Can't check prerequisites or eligibility\n", + "- Can't connect to other relevant information\n", + "\n", + "**Result:** A frustrating experience that wastes time and provides little value.\n", + "\n", + "---" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## With Context Engineering\n", + "\n", + "Now let's see the same conversation with proper context management:\n", + "\n", + "---\n", + "\n", + "**Turn 1:**\n", + "\n", + "*Student:* \"I'm interested in machine learning courses.\"\n", + "\n", + "*Agent:* \"Great! Based on your Computer Science major and the courses you've completed (CS101 and CS201), you're ready for CS301: Introduction to Machine Learning.\n", + "\n", + "Since you prefer online courses and hands-on learning, I should mention that CS301 has:\n", + "- A fully online section available\n", + "- Weekly programming assignments with real datasets\n", + "- A final project where you build your own ML system\n", + "\n", + "This course aligns perfectly with your goal of building an AI career and serves as the foundation for more advanced ML courses.\"\n", + "\n", + "---\n", + "\n", + "**Turn 2:**\n", + "\n", + "*Student:* \"That sounds perfect! What comes after that?\"\n", + "\n", + "*Agent:* \"After completing CS301, you'll have several great options:\n", + "\n", + "**Immediately Available:**\n", + "- CS402: Deep Learning (Online, project-based)\n", + " Focus: Neural networks, CNNs, RNNs\n", + " \n", + "- CS403: Natural Language Processing (Online, project-based)\n", + " Focus: Text analysis, transformers, LLMs\n", + "\n", + "**Recommended Path for AI Career:**\n", + "1. CS301 (Introduction) โ† Start here\n", + "2. CS402 (Deep Learning) โ† Most relevant to modern AI\n", + "3. CS403 (NLP) or CS404 (Computer Vision) โ† Specialize\n", + "\n", + "Given your interest in data science, I'd especially recommend the NLP path - it combines well with your data structures background.\"\n", + "\n", + "---\n", + "\n", + "**Turn 3:**\n", + "\n", + "*Student:* \"How long will that path take?\"\n", + "\n", + "*Agent:* \"Let me break it down for you:\n", + "\n", + "**CS301:** 14 weeks (3 credits)\n", + "**CS402:** 14 weeks (3 credits) \n", + "**CS403:** 14 weeks (3 credits)\n", + "\n", + "If you take one course per semester:\n", + "- Fall 2024: CS301\n", + "- Spring 2025: CS402\n", + "- Fall 2025: CS403\n", + "\n", + "Since you're currently a Junior, this timeline would have you completing this sequence before graduation, with one semester remaining for an internship or capstone project.\n", + "\n", + "Alternatively, since these are online courses, you could potentially take CS301 this fall and CS402 in a winter intersession to accelerate your path.\"\n", + "\n", + "---\n", + "\n", + "**Turn 4:**\n", + "\n", + "*Student:* \"I like that accelerated option. Can I register now?\"\n", + "\n", + "*Agent:* \"Yes! Since you've met the prerequisites for CS301, I can guide you through registration. The online section (CS301-W01) has 15 seats remaining.\n", + "\n", + "Would you like me to:\n", + "1. Reserve a seat for you in CS301-W01 for Fall 2024?\n", + "2. Add you to the notification list for the Winter CS402 section (registration opens in October)?\n", + "3. Send you the course syllabus and professor information?\"\n", + "\n", + "---\n", + "\n", + "### The Difference is Dramatic\n", + "\n", + "**With context engineering, the AI:**\n", + "\n", + "โœ… **Knows who Sarah is** - Major, year, completed courses, interests \n", + "โœ… **Remembers the conversation** - References earlier discussion naturally \n", + "โœ… **Provides personalized guidance** - Filters by online preference, learning style \n", + "โœ… **Checks prerequisites** - Validates eligibility automatically \n", + "โœ… **Plans ahead** - Creates a timeline aligned with graduation \n", + "โœ… **Takes action** - Can complete registration, not just discuss it \n", + "\n", + "**Result:** An intelligent, helpful experience that saves time and provides genuine value.\n", + "\n", + "---" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## The Business Impact\n", + "\n", + "Poor context management doesn't just frustrate users - it has real business consequences:\n", + "\n", + "### User Experience Degradation\n", + "\n", + "**Without Context Engineering:**\n", + "- Users must repeat information constantly\n", + "- Generic responses feel impersonal and unhelpful\n", + "- Users abandon interactions midway\n", + "- Low satisfaction scores, poor reviews\n", + "\n", + "**Metric Impact:**\n", + "- 40-60% task abandonment rates\n", + "- 2.1/5 average satisfaction ratings\n", + "- High support ticket volume for \"AI didn't understand me\"\n", + "\n", + "### Operational Inefficiency\n", + "\n", + "**Without Context Engineering:**\n", + "- AI can't complete multi-step workflows\n", + "- Human agents must intervene frequently\n", + "- Same questions asked repeatedly without learning\n", + "- Context is lost between channels (chat โ†’ email โ†’ phone)\n", + "\n", + "**Cost Impact:**\n", + "- 3-5x more interactions needed to complete tasks\n", + "- 40% escalation rate to human agents\n", + "- Lost productivity from context-switching\n", + "\n", + "### Limited Capabilities\n", + "\n", + "**Without Context Engineering:**\n", + "- Can't handle complex, multi-step tasks\n", + "- No learning or improvement over time\n", + "- Poor integration with existing systems\n", + "- Can't provide proactive assistance\n", + "\n", + "**Strategic Impact:**\n", + "- AI remains a \"nice-to-have\" rather than core capability\n", + "- Can't automate valuable workflows\n", + "- Competitive disadvantage vs. better AI implementations\n", + "\n", + "---" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## ๐Ÿ”ฌ The Context Rot Problem\n", + "\n", + "Recent research from Chroma (July 2025) reveals a critical challenge in working with LLMs: **context rot** - the phenomenon where LLM performance degrades non-uniformly as input context length increases, even on simple tasks.\n", + "\n", + "### Key Research Findings\n", + "\n", + "**1. Non-Uniform Performance Degradation**\n", + "- Models don't process the 10,000th token as reliably as the 100th token\n", + "- Performance drops aren't linear - they accelerate as context grows\n", + "- Even simple tasks like word repetition fail with long context\n", + "\n", + "**2. Needle-Question Similarity Matters**\n", + "- Lower similarity between questions and retrieved information causes faster performance degradation\n", + "- High semantic relevance is critical for maintaining accuracy\n", + "- Generic or loosely related context actively harms performance\n", + "\n", + "**3. Distractors Have Amplified Impact**\n", + "- Similar-but-wrong information (distractors) degrade performance more as context grows\n", + "- The negative impact of irrelevant information is non-linear\n", + "- Filtering out low-relevance content is as important as finding relevant content\n", + "\n", + "**4. Structure Affects Attention**\n", + "- How you organize context affects model performance\n", + "- Counterintuitively, shuffled text sometimes performs better than coherent text\n", + "- Context window position matters - information placement impacts retrieval accuracy\n", + "\n", + "### Why This Matters for Context Engineering\n", + "\n", + "The Context Rot research validates the core principles of this course:\n", + "\n", + "โœ… **Quality Over Quantity**\n", + "More context isn't always better. Adding marginally relevant information can hurt performance more than it helps.\n", + "\n", + "โœ… **Semantic Similarity is Critical**\n", + "High relevance between queries and retrieved context is essential. RAG systems must prioritize precision over recall.\n", + "\n", + "โœ… **Structure Matters**\n", + "How you organize and present context affects LLM attention mechanisms. Context engineering isn't just about *what* information to include, but *how* to structure it.\n", + "\n", + "โœ… **Distractor Removal**\n", + "Filtering out low-relevance information improves performance. Memory systems must be selective about what they store and retrieve.\n", + "\n", + "โœ… **Context Window Management**\n", + "Understanding token limits isn't enough - you must understand how performance degrades within those limits.\n", + "\n", + "**This course teaches you techniques to engineer context effectively and avoid these pitfalls.**\n", + "\n", + "๐Ÿ“š **Read the full paper:** [Context Rot: How Increasing Input Tokens Impacts LLM Performance](https://research.trychroma.com/context-rot)\n", + "\n", + "---" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## The Four Pillars of Context Engineering\n", + "\n", + "Context engineering involves managing four distinct types of context, each serving a different purpose:\n", + "\n", + "### 1. System Context: \"What Am I?\"\n", + "\n", + "Defines the AI's identity, capabilities, and knowledge.\n", + "\n", + "**Contains:**\n", + "- Role definition (\"You are a course advisor\")\n", + "- Available tools and actions\n", + "- Domain knowledge and business rules\n", + "- Behavioral guidelines\n", + "\n", + "**Example:**\n", + "```\n", + "You are a university course advisor specializing in Computer Science.\n", + "\n", + "Available courses: [course catalog]\n", + "Prerequisites rules: [prerequisite map]\n", + "Registration policies: [policy document]\n", + "\n", + "Always verify prerequisites before recommending courses.\n", + "Prioritize student goals when making recommendations.\n", + "```\n", + "\n", + "**Characteristics:** Static, universal, always present\n", + "\n", + "---\n", + "\n", + "### 2. User Context: \"Who Are You?\"\n", + "\n", + "Contains personal information about the specific user.\n", + "\n", + "**Contains:**\n", + "- Profile information (major, year, background)\n", + "- Preferences and learning style\n", + "- History and achievements\n", + "- Goals and constraints\n", + "\n", + "**Example:**\n", + "```\n", + "Student: Sarah Chen\n", + "Major: Computer Science (Junior)\n", + "Completed: CS101, CS201, MATH301\n", + "Interests: Machine learning, data science\n", + "Preferences: Online courses, hands-on projects\n", + "Goal: Build AI career\n", + "```\n", + "\n", + "**Characteristics:** Dynamic, personalized, retrieved from storage\n", + "\n", + "---\n", + "\n", + "### 3. Conversation Context: \"What Have We Discussed?\"\n", + "\n", + "The history of the current conversation.\n", + "\n", + "**Contains:**\n", + "- Previous user messages\n", + "- Previous AI responses\n", + "- Decisions and commitments made\n", + "- Topics explored\n", + "\n", + "**Example:**\n", + "```\n", + "Turn 1:\n", + "User: \"I'm interested in machine learning courses.\"\n", + "AI: \"I recommend CS301: Introduction to Machine Learning...\"\n", + "\n", + "Turn 2:\n", + "User: \"What comes after that?\"\n", + "AI: \"After CS301, you can take CS402 or CS403...\"\n", + "\n", + "Turn 3:\n", + "User: \"How long will that path take?\"\n", + "[Current query - needs context from Turn 2 to understand \"that path\"]\n", + "```\n", + "\n", + "**Characteristics:** Dynamic, session-specific, grows over time\n", + "\n", + "---\n", + "\n", + "### 4. Retrieved Context: \"What Information Is Relevant?\"\n", + "\n", + "Information fetched on-demand based on the current query.\n", + "\n", + "**Contains:**\n", + "- Database records (course details, schedules)\n", + "- Search results (relevant documents, FAQs)\n", + "- API responses (real-time data, availability)\n", + "- Computed information (eligibility checks, recommendations)\n", + "\n", + "**Example:**\n", + "```\n", + "[User asked about CS301]\n", + "\n", + "Retrieved:\n", + "- CS301 course details (description, prerequisites, format)\n", + "- Current availability (15 seats in online section)\n", + "- Professor ratings and reviews\n", + "- Prerequisite check result (โœ“ Eligible)\n", + "```\n", + "\n", + "**Characteristics:** Dynamic, query-specific, highly targeted\n", + "\n", + "---" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## The Fundamental Challenge: Context Windows\n", + "\n", + "Here's the constraint that makes context engineering necessary:\n", + "\n", + "### Every AI Model Has a Token Limit\n", + "\n", + "AI models can only process a fixed amount of text in a single request - called the **context window**.\n", + "\n", + "| Model | Context Window |\n", + "|-------|----------------|\n", + "| GPT-4o | 128,000 tokens (~96,000 words) |\n", + "| GPT-4o-mini | 128,000 tokens (~96,000 words) |\n", + "| Claude 3.5 Sonnet | 200,000 tokens (~150,000 words) |\n", + "\n", + "### What Competes for This Space?\n", + "\n", + "Everything must fit within this limit:\n", + "\n", + "```\n", + "โ”Œโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”\n", + "โ”‚ CONTEXT WINDOW (128K tokens) โ”‚\n", + "โ”œโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ค\n", + "โ”‚ System Context โ”‚ 2,000 tokens โ”‚ โ† AI's role and rules\n", + "โ”‚ User Context โ”‚ 1,000 tokens โ”‚ โ† Your profile\n", + "โ”‚ Conversation โ”‚ 4,000 tokens โ”‚ โ† What we've discussed\n", + "โ”‚ Retrieved Info โ”‚ 5,000 tokens โ”‚ โ† Relevant data\n", + "โ”‚ Your Query โ”‚ 100 tokens โ”‚ โ† Current question\n", + "โ”‚ Response Space โ”‚ 4,000 tokens โ”‚ โ† AI's answer\n", + "โ”œโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ค\n", + "โ”‚ TOTAL โ”‚ 16,100 tokens โ”‚\n", + "โ”‚ REMAINING โ”‚ 111,900 tokens โ”‚\n", + "โ””โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”˜\n", + "```\n", + "\n", + "### The Core Trade-off\n", + "\n", + "**Every token spent on one thing is a token NOT available for another.**\n", + "\n", + "This means you must constantly decide:\n", + "- Which context is most relevant?\n", + "- What can be omitted without hurting quality?\n", + "- When to retrieve more vs. use what you have?\n", + "- How to compress long conversations?\n", + "\n", + "**Context engineering is optimization within constraints.**\n", + "\n", + "As conversations grow longer, systems accumulate more data, and applications become more sophisticated, context management becomes increasingly critical.\n", + "\n", + "---" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Real-World Applications\n", + "\n", + "Context engineering isn't just theoretical - it's essential for any production AI system:\n", + "\n", + "### Customer Support Agents\n", + "\n", + "**Context Needed:**\n", + "- Customer profile and purchase history (User Context)\n", + "- Previous support tickets and resolutions (Conversation Context)\n", + "- Product documentation and FAQs (Retrieved Context)\n", + "- Company policies and escalation procedures (System Context)\n", + "\n", + "**Without proper context:** Agent can't see order history, doesn't remember previous issues, can't access relevant documentation โ†’ frustrated customers, high escalation rates\n", + "\n", + "### Healthcare Assistants\n", + "\n", + "**Context Needed:**\n", + "- Patient medical history and conditions (User Context)\n", + "- Current conversation and symptoms (Conversation Context)\n", + "- Relevant medical guidelines and drug interactions (Retrieved Context)\n", + "- Clinical protocols and legal requirements (System Context)\n", + "\n", + "**Without proper context:** Can't consider patient history, might miss contraindications, can't follow proper diagnostic protocols โ†’ dangerous mistakes\n", + "\n", + "### Sales Assistants\n", + "\n", + "**Context Needed:**\n", + "- Customer demographics and past purchases (User Context)\n", + "- Current conversation and stated needs (Conversation Context)\n", + "- Product catalog and inventory (Retrieved Context)\n", + "- Pricing rules and promotional policies (System Context)\n", + "\n", + "**Without proper context:** Makes inappropriate recommendations, can't personalize offers, doesn't know what's in stock โ†’ lost sales\n", + "\n", + "### Research Assistants\n", + "\n", + "**Context Needed:**\n", + "- Researcher's field and prior work (User Context)\n", + "- Research question evolution (Conversation Context)\n", + "- Relevant papers and datasets (Retrieved Context)\n", + "- Methodological guidelines and ethics (System Context)\n", + "\n", + "**Without proper context:** Suggests irrelevant papers, doesn't build on previous research direction, can't filter by expertise level โ†’ wasted time\n", + "\n", + "---" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## What Makes Context Engineering Hard?\n", + "\n", + "If context is so important, why don't all AI systems handle it well? Several challenges:\n", + "\n", + "### 1. Scale and Complexity\n", + "\n", + "- **User base:** Managing context for millions of users\n", + "- **Data volume:** Gigabytes of documents, conversation history, user profiles\n", + "- **Real-time constraints:** Must retrieve relevant context in milliseconds\n", + "- **Multi-modal:** Text, images, structured data, API responses\n", + "\n", + "### 2. Relevance Determination\n", + "\n", + "- **Semantic understanding:** \"ML courses\" and \"machine learning classes\" are the same\n", + "- **Context dependency:** Relevance changes based on user background and goals\n", + "- **Implicit needs:** User asks X but really needs Y\n", + "- **Conflicting signals:** Multiple pieces of context suggest different actions\n", + "\n", + "### 3. Memory Management\n", + "\n", + "- **What to remember:** Important facts vs. casual remarks\n", + "- **How long to remember:** Session vs. long-term memory\n", + "- **When to forget:** Outdated info, privacy requirements\n", + "- **How to summarize:** Compress long conversations without losing meaning\n", + "\n", + "### 4. Integration Challenges\n", + "\n", + "- **Multiple data sources:** CRM, databases, APIs, documents\n", + "- **Different formats:** JSON, text, tables, graphs\n", + "- **Access control:** Privacy, permissions, data sovereignty\n", + "- **Latency requirements:** Fast retrieval vs. comprehensive search\n", + "\n", + "### 5. Cost and Performance\n", + "\n", + "- **Token costs:** More context = higher API costs\n", + "- **Latency:** More retrieval = slower responses\n", + "- **Storage:** Maintaining user profiles and conversation history\n", + "- **Compute:** Embeddings, similarity search, real-time updates\n", + "\n", + "**This is why context engineering is a specialized discipline.**\n", + "\n", + "---" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Your Learning Journey\n", + "\n", + "You now understand **why** context engineering matters. You've seen:\n", + "\n", + "โœ… The dramatic difference between AI with and without proper context \n", + "โœ… The business impact of poor context management \n", + "โœ… The four core context types and their purposes \n", + "โœ… The fundamental constraint of context windows \n", + "โœ… Real-world applications across industries \n", + "โœ… The challenges that make this discipline necessary \n", + "\n", + "### What Comes Next\n", + "\n", + "Now that you understand the \"why,\" it's time to learn the \"how.\"\n", + "\n", + "In the next notebook, you'll get hands-on experience with:\n", + "\n", + "**Context Types Deep Dive**\n", + "- Building each context type step-by-step\n", + "- Formatting context for LLMs\n", + "- Combining multiple context types\n", + "- Managing token budgets\n", + "- Implementing adaptive context strategies\n", + "\n", + "You'll build a working Redis University course advisor that uses all four context types to provide intelligent, personalized recommendations.\n", + "\n", + "**By the end of the next notebook, you'll be able to:**\n", + "- Build context-aware AI agents from scratch\n", + "- Choose the right context type for each piece of information\n", + "- Optimize context usage within token constraints\n", + "- Test and iterate on context strategies\n", + "\n", + "### The Path Forward\n", + "\n", + "This course follows a carefully designed progression:\n", + "\n", + "**Chapter 1: Foundations** โ† You are here\n", + "- Understanding context engineering (โœ“)\n", + "- Implementing the four context types (Next โ†’)\n", + "\n", + "**Chapter 2: RAG Systems**\n", + "\n", + "**Chapter 3: Incorporating Memory**\n", + "- Long-term memory with Redis Agent Memory Server\n", + "- Working memory patterns\n", + "- Multi-agent memory coordination\n", + "\n", + "**Chapter 4: Agent with tools**\n", + "Each chapter builds on the previous one, taking you from fundamentals to production-ready systems.\n", + "\n", + "---\n", + "\n", + "## Ready to Build?\n", + "\n", + "You've seen the power of context engineering and understand why it's critical for AI systems.\n", + "\n", + "Now it's time to build one yourself.\n", + "\n", + "**Continue to: `02_context_assembly_strategies.ipynb` โ†’**\n", + "\n", + "In the next notebook, you'll write code, format context, make LLM calls, and see real results. You'll transform from understanding *why* context matters to knowing *how* to implement it effectively.\n", + "\n", + "Let's get started." + ] + }, + { + "metadata": {}, + "cell_type": "code", + "outputs": [], + "execution_count": null, + "source": "" + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.12.6" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} diff --git a/python-recipes/context-engineering/notebooks/section-1-context-engineering-foundations/02_context_assembly_strategies.ipynb b/python-recipes/context-engineering/notebooks/section-1-context-engineering-foundations/02_context_assembly_strategies.ipynb new file mode 100644 index 00000000..40d60518 --- /dev/null +++ b/python-recipes/context-engineering/notebooks/section-1-context-engineering-foundations/02_context_assembly_strategies.ipynb @@ -0,0 +1,1633 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "![Redis](https://redis.io/wp-content/uploads/2024/04/Logotype.svg?auto=webp&quality=85,75&width=120)\n", + "\n", + "# Context Types Deep Dive\n", + "\n", + "## What You'll Learn\n", + "\n", + "In this notebook, you'll master the four core context types that power intelligent AI agents:\n", + "\n", + "1. **System Context** - The AI's role and domain knowledge\n", + "2. **User Context** - Personal profiles and preferences\n", + "3. **Conversation Context** - Dialogue history and flow\n", + "4. **Retrieved Context** - Dynamic information from external sources\n", + "\n", + "You'll learn both the **theory** (what each type is and when to use it) and the **practice** (how to build and combine them effectively).\n", + "\n", + "**Time to complete:** 20-25 minutes\n", + "\n", + "---" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Setup\n", + "\n", + "Let's start with the essentials. You'll need an OpenAI API key to run the examples." + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "import os\n", + "from openai import OpenAI\n", + "from dotenv import load_dotenv\n", + "\n", + "# Load environment variables\n", + "load_dotenv()\n", + "\n", + "# Initialize OpenAI client\n", + "client = OpenAI(api_key=os.getenv(\"OPENAI_API_KEY\"))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "---\n", + "\n", + "## Understanding the Context Window Constraint\n", + "\n", + "Before we dive into context types, you need to understand the fundamental limitation that shapes all context engineering decisions.\n", + "\n", + "### The Token Limit Reality\n", + "\n", + "Every AI model has a **context window** - a maximum amount of text it can process in a single request.\n", + "\n", + "| Model | Context Window | Approximate Words |\n", + "|-------|----------------|-------------------|\n", + "| GPT-4o | 128,000 tokens | ~96,000 words |\n", + "| GPT-4o-mini | 128,000 tokens | ~96,000 words |\n", + "| Claude 3.5 Sonnet | 200,000 tokens | ~150,000 words |\n", + "\n", + "**Note:** 1 token โ‰ˆ 0.75 words in English\n", + "\n", + "### What Competes for This Space?\n", + "\n", + "Every element of your request must fit within this limit:\n", + "\n", + "```\n", + "โ”Œโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”\n", + "โ”‚ CONTEXT WINDOW (128K tokens) โ”‚\n", + "โ”œโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ค\n", + "โ”‚ System Instructions โ”‚ 2,000 โ”‚\n", + "โ”‚ Tool Definitions โ”‚ 3,000 โ”‚\n", + "โ”‚ User Profile โ”‚ 1,000 โ”‚\n", + "โ”‚ Conversation History โ”‚ 4,000 โ”‚\n", + "โ”‚ Retrieved Context โ”‚ 5,000 โ”‚\n", + "โ”‚ User Query โ”‚ 500 โ”‚\n", + "โ”‚ Response Space โ”‚ 4,000 โ”‚\n", + "โ”œโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ค\n", + "โ”‚ TOTAL USED โ”‚ 19,500 โ”‚\n", + "โ”‚ REMAINING โ”‚ 108,500 โ”‚\n", + "โ””โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”˜\n", + "```\n", + "\n", + "### The Core Trade-off\n", + "\n", + "**Every token spent on one thing is a token NOT available for another.**\n", + "\n", + "This means context engineering requires constant decision-making:\n", + "- Is this information relevant to the current query?\n", + "- Does including this improve response quality?\n", + "- Is the improvement worth the token cost?\n", + "\n", + "**All three must be \"yes\" or don't include it.**\n", + "\n", + "---" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## The Four Core Context Types\n", + "\n", + "Every context-aware AI system uses these four components. Let's explore each one, understand when to use it, and learn how to implement it.\n", + "\n", + "Throughout this notebook, we'll build a **Redis University course advisor** that helps students choose the right courses based on their background, goals, and learning path.\n", + "\n", + "---" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 1. System Context: The AI's Identity\n", + "\n", + "### What Is System Context?\n", + "\n", + "System context defines **what the AI is** and **what it knows**. This is static information that:\n", + "- Applies to ALL users equally\n", + "- Rarely changes (typically only with code deployments)\n", + "- Is hardcoded in your application\n", + "- Must always be present\n", + "\n", + "### What Goes in System Context?\n", + "\n", + "1. **Role Definition** - What is the AI's purpose?\n", + "2. **Domain Knowledge** - What information does it have?\n", + "3. **Behavioral Instructions** - How should it respond?\n", + "4. **Business Rules** - What constraints apply?\n", + "\n", + "### When to Use System Context\n", + "\n", + "Use system context for information that:\n", + "- โœ… Defines the agent's core identity\n", + "- โœ… Contains universal business logic\n", + "- โœ… Provides essential domain knowledge\n", + "- โœ… Should be consistent across all interactions\n", + "\n", + "### Building System Context Step-by-Step" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [], + "source": [ + "# Step 1: Define the AI's role\n", + "system_context = \"\"\"You are a Redis University course advisor.\"\"\"" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "This is the foundation - but it's not enough. The AI needs domain knowledge to be useful." + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [], + "source": [ + "# Step 2: Add domain knowledge (available courses)\n", + "system_context = \"\"\"You are a Redis University course advisor.\n", + "\n", + "Available Courses:\n", + "- RU101: Introduction to Redis Data Structures (Beginner, 4-6 hours)\n", + " Learn Redis fundamentals: strings, hashes, lists, sets, sorted sets\n", + "\n", + "- RU201: Redis for Python Developers (Intermediate, 6-8 hours)\n", + " Prerequisites: RU101, Python experience\n", + " Build Redis applications with Python and redis-py\n", + "\n", + "- RU202: Redis for Java Developers (Intermediate, 6-8 hours)\n", + " Prerequisites: RU101, Java experience\n", + " Build Redis applications with Java and Jedis\n", + "\n", + "- RU301: Vector Similarity Search with Redis (Advanced, 8-10 hours)\n", + " Prerequisites: RU201 or RU202, ML/AI interest\n", + " Implement semantic search and RAG systems\n", + "\"\"\"" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Now we have both role and knowledge. Finally, let's add behavioral guidance." + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [], + "source": [ + "# Step 3: Add behavioral instructions and business rules\n", + "system_context = \"\"\"You are a Redis University course advisor.\n", + "\n", + "Available Courses:\n", + "- RU101: Introduction to Redis Data Structures (Beginner, 4-6 hours)\n", + " Learn Redis fundamentals: strings, hashes, lists, sets, sorted sets\n", + "\n", + "- RU201: Redis for Python Developers (Intermediate, 6-8 hours)\n", + " Prerequisites: RU101, Python experience\n", + " Build Redis applications with Python and redis-py\n", + "\n", + "- RU202: Redis for Java Developers (Intermediate, 6-8 hours)\n", + " Prerequisites: RU101, Java experience\n", + " Build Redis applications with Java and Jedis\n", + "\n", + "- RU301: Vector Similarity Search with Redis (Advanced, 8-10 hours)\n", + " Prerequisites: RU201 or RU202, ML/AI interest\n", + " Implement semantic search and RAG systems\n", + "\n", + "Guidelines:\n", + "1. Always provide specific course recommendations with clear reasoning\n", + "2. Consider the student's background, completed courses, and interests\n", + "3. Ensure prerequisites are met before recommending advanced courses\n", + "4. Be encouraging and supportive in your guidance\n", + "\"\"\"" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Key Insight: System Context is Universal\n", + "\n", + "Notice that system context doesn't mention any specific user. It's the same for everyone. Whether the student is Sarah, Alex, or anyone else, this context remains constant.\n", + "\n", + "This is what makes it \"static\" - you write it once in your code and it's always present with a fixed token cost.\n", + "\n", + "---" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 2. User Context: Personal Information\n", + "\n", + "### What Is User Context?\n", + "\n", + "User context contains **information about the specific user** that enables personalization. Unlike system context, this is dynamic and different for each user.\n", + "\n", + "### What Goes in User Context?\n", + "\n", + "1. **Profile Information** - Name, background, experience level\n", + "2. **Learning History** - Completed courses, achievements\n", + "3. **Preferences** - Learning style, time availability, interests\n", + "4. **Goals** - What the user wants to achieve\n", + "\n", + "### When to Use User Context\n", + "\n", + "Use user context when:\n", + "- โœ… Information is specific to an individual user\n", + "- โœ… Personalization will significantly improve responses\n", + "- โœ… The information persists across multiple sessions\n", + "- โœ… You have a reliable way to store and retrieve user data\n", + "\n", + "### Building User Context Step-by-Step" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [], + "source": [ + "# Step 1: Create a user profile as a dictionary\n", + "# In production, this would come from a database\n", + "sarah_profile = {\n", + " \"name\": \"Sarah Chen\",\n", + " \"background\": \"Python developer, 2 years experience\",\n", + " \"completed_courses\": [\"RU101\"],\n", + " \"interests\": [\"machine learning\", \"data science\", \"vector search\"],\n", + " \"time_availability\": \"evenings and weekends\",\n", + " \"goal\": \"Build a RAG system for my company's documentation\"\n", + "}" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "The dictionary format is great for storage, but we need to format it for the LLM." + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Student Profile:\n", + "- Name: Sarah Chen\n", + "- Background: Python developer, 2 years experience\n", + "- Completed Courses: RU101\n", + "- Interests: machine learning, data science, vector search\n", + "- Availability: evenings and weekends\n", + "- Goal: Build a RAG system for my company's documentation\n", + "\n" + ] + } + ], + "source": [ + "# Step 2: Format as context for the LLM\n", + "def format_user_context(profile):\n", + " \"\"\"Convert user profile dictionary to formatted context string\"\"\"\n", + " return f\"\"\"Student Profile:\n", + "- Name: {profile['name']}\n", + "- Background: {profile['background']}\n", + "- Completed Courses: {', '.join(profile['completed_courses'])}\n", + "- Interests: {', '.join(profile['interests'])}\n", + "- Availability: {profile['time_availability']}\n", + "- Goal: {profile['goal']}\n", + "\"\"\"\n", + "\n", + "user_context = format_user_context(sarah_profile)\n", + "print(user_context)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Understanding User Context Differences\n", + "\n", + "Let's create another user to see how context changes:" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Student Profile:\n", + "- Name: Alex Kumar\n", + "- Background: Java backend engineer, 5 years experience\n", + "- Completed Courses: RU101, RU202\n", + "- Interests: distributed systems, caching, performance optimization\n", + "- Availability: flexible schedule\n", + "- Goal: Optimize database query performance with Redis caching\n", + "\n" + ] + } + ], + "source": [ + "# Create a different user with different needs\n", + "alex_profile = {\n", + " \"name\": \"Alex Kumar\",\n", + " \"background\": \"Java backend engineer, 5 years experience\",\n", + " \"completed_courses\": [\"RU101\", \"RU202\"],\n", + " \"interests\": [\"distributed systems\", \"caching\", \"performance optimization\"],\n", + " \"time_availability\": \"flexible schedule\",\n", + " \"goal\": \"Optimize database query performance with Redis caching\"\n", + "}\n", + "\n", + "alex_context = format_user_context(alex_profile)\n", + "print(alex_context)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Key Insight: Different Users = Different Context\n", + "\n", + "Notice how Sarah and Alex have:\n", + "- Different programming backgrounds (Python vs Java)\n", + "- Different completed courses\n", + "- Different interests and goals\n", + "\n", + "This personalized context allows the AI to give tailored recommendations. Sarah might be guided toward RU201 and RU301, while Alex might focus on advanced caching strategies.\n", + "\n", + "---" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 3. Conversation Context: Maintaining Dialogue Flow\n", + "\n", + "### What Is Conversation Context?\n", + "\n", + "Conversation context is the **history of the current dialogue**. It allows the AI to:\n", + "- Remember what was just discussed\n", + "- Understand references like \"it\" or \"that course\"\n", + "- Build on previous responses\n", + "- Maintain coherent multi-turn conversations\n", + "\n", + "### What Goes in Conversation Context?\n", + "\n", + "1. **Previous User Messages** - What the user has asked\n", + "2. **Previous AI Responses** - What the AI has said\n", + "3. **Context from Earlier in the Session** - Background established during this interaction\n", + "\n", + "### When to Use Conversation Context\n", + "\n", + "Always include conversation context for:\n", + "- โœ… Multi-turn conversations (more than a single Q&A)\n", + "- โœ… When users reference \"it\", \"that\", or previous topics\n", + "- โœ… When building on previous responses\n", + "- โœ… When maintaining coherent dialogue\n", + "\n", + "### Building Conversation Context Step-by-Step" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [], + "source": [ + "# Step 1: Start with an empty conversation history\n", + "conversation_history = []" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "As the conversation progresses, we add each exchange to the history." + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [], + "source": [ + "# Step 2: Add the first user message\n", + "conversation_history.append({\n", + " \"role\": \"user\",\n", + " \"content\": \"What Redis course should I take next?\"\n", + "})" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": {}, + "outputs": [], + "source": [ + "# Step 3: Add the AI's response (simulated)\n", + "conversation_history.append({\n", + " \"role\": \"assistant\",\n", + " \"content\": \"\"\"Based on your Python background and completion of RU101, \n", + "I recommend RU201: Redis for Python Developers. This course will teach you \n", + "how to build Redis applications using redis-py, which aligns perfectly with \n", + "your goal of building a RAG system.\"\"\"\n", + "})" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": {}, + "outputs": [], + "source": [ + "# Step 4: Add a follow-up question that references previous context\n", + "conversation_history.append({\n", + " \"role\": \"user\",\n", + " \"content\": \"How long will that take me to complete?\"\n", + "})" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Notice the user said \"that\" instead of \"RU201\". The AI needs the conversation history to understand what \"that\" refers to." + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Turn 1 (user):\n", + "What Redis course should I take next?\n", + "\n", + "Turn 2 (assistant):\n", + "Based on your Python background and completion of RU101, \n", + "I recommend RU201: Redis for Python Developers. This course will teach you \n", + "how to build Redis applications using redis-py, which aligns perfectly with \n", + "your goal of building a RAG system.\n", + "\n", + "Turn 3 (user):\n", + "How long will that take me to complete?\n", + "\n" + ] + } + ], + "source": [ + "# Let's view the complete conversation history\n", + "for i, msg in enumerate(conversation_history, 1):\n", + " print(f\"Turn {i} ({msg['role']}):\")\n", + " print(f\"{msg['content']}\\n\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Key Insight: Conversation History Enables Natural Dialogue\n", + "\n", + "Without conversation history:\n", + "- โŒ \"How long will **that** take?\" โ†’ AI doesn't know what \"that\" refers to\n", + "\n", + "With conversation history:\n", + "- โœ… \"How long will **that** take?\" โ†’ AI knows \"that\" = RU201\n", + "\n", + "### Managing Context Window with Long Conversations\n", + "\n", + "As conversations grow, they consume more tokens. Common strategies:\n", + "\n", + "1. **Keep recent history** - Only include last N turns\n", + "2. **Summarize older context** - Compress early conversation into a summary\n", + "3. **Extract key facts** - Pull out important decisions/facts, discard the rest\n", + "\n", + "---" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 4. Retrieved Context: Dynamic Information\n", + "\n", + "### What Is Retrieved Context?\n", + "\n", + "Retrieved context is **relevant information fetched on-demand** based on the current query. This is the most dynamic type of context - it changes with every query.\n", + "\n", + "### What Goes in Retrieved Context?\n", + "\n", + "1. **Database Records** - Course details, user records, etc.\n", + "2. **Search Results** - Relevant documents from vector/semantic search\n", + "3. **API Responses** - Real-time data from external services\n", + "4. **Computed Information** - Analysis or calculations performed on-demand\n", + "\n", + "### When to Use Retrieved Context\n", + "\n", + "Use retrieved context when:\n", + "- โœ… Information is too large to include statically\n", + "- โœ… Only a small subset is relevant to each query\n", + "- โœ… Information changes frequently\n", + "- โœ… You can retrieve it efficiently based on the query\n", + "\n", + "### Building Retrieved Context Step-by-Step" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": {}, + "outputs": [], + "source": [ + "# Step 1: Simulate a course database\n", + "# In production, this would be Redis, etc.\n", + "course_database = {\n", + " \"RU101\": {\n", + " \"title\": \"Introduction to Redis Data Structures\",\n", + " \"level\": \"Beginner\",\n", + " \"description\": \"Master Redis fundamentals: strings, hashes, lists, sets, and sorted sets\",\n", + " \"duration\": \"4-6 hours\",\n", + " \"prerequisites\": [],\n", + " \"topics\": [\"Data structures\", \"Basic commands\", \"Use cases\"]\n", + " },\n", + " \"RU201\": {\n", + " \"title\": \"Redis for Python Developers\",\n", + " \"level\": \"Intermediate\",\n", + " \"description\": \"Build production Redis applications with Python and redis-py\",\n", + " \"duration\": \"6-8 hours\",\n", + " \"prerequisites\": [\"RU101\", \"Python experience\"],\n", + " \"topics\": [\"redis-py library\", \"Connection pooling\", \"Pipelining\", \"Pub/Sub\"]\n", + " },\n", + " \"RU202\": {\n", + " \"title\": \"Redis for Java Developers\",\n", + " \"level\": \"Intermediate\",\n", + " \"description\": \"Build production Redis applications with Java and Jedis\",\n", + " \"duration\": \"6-8 hours\",\n", + " \"prerequisites\": [\"RU101\", \"Java experience\"],\n", + " \"topics\": [\"Jedis library\", \"Connection pooling\", \"Transactions\", \"Redis Streams\"]\n", + " },\n", + " \"RU301\": {\n", + " \"title\": \"Vector Similarity Search with Redis\",\n", + " \"level\": \"Advanced\",\n", + " \"description\": \"Implement semantic search and RAG systems with Redis vector capabilities\",\n", + " \"duration\": \"8-10 hours\",\n", + " \"prerequisites\": [\"RU201 or RU202\", \"ML/AI interest\"],\n", + " \"topics\": [\"Vector embeddings\", \"Semantic search\", \"RAG architecture\", \"Hybrid search\"]\n", + " }\n", + "}" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Now let's simulate retrieving course information based on a query." + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Course Details:\n", + "Code: RU201\n", + "Title: Redis for Python Developers\n", + "Level: Intermediate\n", + "Description: Build production Redis applications with Python and redis-py\n", + "Duration: 6-8 hours\n", + "Prerequisites: RU101, Python experience\n", + "Topics Covered: redis-py library, Connection pooling, Pipelining, Pub/Sub\n", + "\n" + ] + } + ], + "source": [ + "# Step 2: Create a retrieval function\n", + "def retrieve_course_info(course_code):\n", + " \"\"\"Retrieve detailed information about a specific course\"\"\"\n", + " course = course_database.get(course_code)\n", + " if not course:\n", + " return None\n", + " \n", + " return f\"\"\"Course Details:\n", + "Code: {course_code}\n", + "Title: {course['title']}\n", + "Level: {course['level']}\n", + "Description: {course['description']}\n", + "Duration: {course['duration']}\n", + "Prerequisites: {', '.join(course['prerequisites']) if course['prerequisites'] else 'None'}\n", + "Topics Covered: {', '.join(course['topics'])}\n", + "\"\"\"\n", + "\n", + "# Retrieve information about RU201\n", + "retrieved_context = retrieve_course_info(\"RU201\")\n", + "print(retrieved_context)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Key Insight: Retrieved Context is Query-Specific\n", + "\n", + "Notice that we only retrieved information about RU201 - the course the user asked about. We didn't include:\n", + "- RU101 details (user already completed it)\n", + "- RU202 details (not relevant to a Python developer)\n", + "- RU301 details (not the current focus)\n", + "\n", + "This selective retrieval is what makes this approach scalable. Imagine having 500 courses - you can't include them all in every request, but you can retrieve the 2-3 most relevant ones.\n", + "\n", + "---" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Bringing It All Together: Complete Context Integration\n", + "\n", + "Now that we understand each context type individually, let's see how they work together to create an intelligent, personalized response.\n", + "\n", + "### The Complete Picture\n", + "\n", + "Here's how all four context types combine in a single LLM call:\n", + "\n", + "```\n", + "โ”Œโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”\n", + "โ”‚ COMPLETE LLM REQUEST โ”‚\n", + "โ”œโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ค\n", + "โ”‚ 1. SYSTEM CONTEXT (Static) โ”‚\n", + "โ”‚ - Role: \"You are a course advisor\" โ”‚\n", + "โ”‚ - Domain: Available courses โ”‚\n", + "โ”‚ - Rules: Guidelines and constraints โ”‚\n", + "โ”œโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ค\n", + "โ”‚ 2. USER CONTEXT (Dynamic - User Specific) โ”‚\n", + "โ”‚ - Profile: Sarah Chen, Python dev โ”‚\n", + "โ”‚ - History: Completed RU101 โ”‚\n", + "โ”‚ - Goal: Build RAG system โ”‚\n", + "โ”œโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ค\n", + "โ”‚ 3. CONVERSATION CONTEXT (Dynamic - Session) โ”‚\n", + "โ”‚ - User: \"What course should I take?\" โ”‚\n", + "โ”‚ - AI: \"I recommend RU201...\" โ”‚\n", + "โ”‚ - User: \"How long will that take?\" โ”‚\n", + "โ”œโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ค\n", + "โ”‚ 4. RETRIEVED CONTEXT (Dynamic - Query) โ”‚\n", + "โ”‚ - RU201 course details โ”‚\n", + "โ”‚ - Duration, prerequisites, topics โ”‚\n", + "โ”œโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ค\n", + "โ”‚ RESULT: Personalized, context-aware answer โ”‚\n", + "โ””โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”˜\n", + "```\n", + "\n", + "### Let's Build This Step-by-Step" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "metadata": {}, + "outputs": [], + "source": [ + "# Step 1: Start with system context\n", + "messages = [\n", + " {\"role\": \"system\", \"content\": system_context}\n", + "]" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "metadata": {}, + "outputs": [], + "source": [ + "# Step 2: Add user context\n", + "messages.append({\n", + " \"role\": \"system\",\n", + " \"content\": user_context\n", + "})" + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "metadata": {}, + "outputs": [], + "source": [ + "# Step 3: Add conversation history\n", + "messages.extend(conversation_history)" + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "metadata": {}, + "outputs": [], + "source": [ + "# Step 4: Add retrieved context\n", + "messages.append({\n", + " \"role\": \"system\",\n", + " \"content\": retrieved_context\n", + "})" + ] + }, + { + "cell_type": "code", + "execution_count": 20, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "[{'role': 'system',\n", + " 'content': \"You are a Redis University course advisor.\\n\\nAvailable Courses:\\n- RU101: Introduction to Redis Data Structures (Beginner, 4-6 hours)\\n Learn Redis fundamentals: strings, hashes, lists, sets, sorted sets\\n\\n- RU201: Redis for Python Developers (Intermediate, 6-8 hours)\\n Prerequisites: RU101, Python experience\\n Build Redis applications with Python and redis-py\\n\\n- RU202: Redis for Java Developers (Intermediate, 6-8 hours)\\n Prerequisites: RU101, Java experience\\n Build Redis applications with Java and Jedis\\n\\n- RU301: Vector Similarity Search with Redis (Advanced, 8-10 hours)\\n Prerequisites: RU201 or RU202, ML/AI interest\\n Implement semantic search and RAG systems\\n\\nGuidelines:\\n1. Always provide specific course recommendations with clear reasoning\\n2. Consider the student's background, completed courses, and interests\\n3. Ensure prerequisites are met before recommending advanced courses\\n4. Be encouraging and supportive in your guidance\\n\"},\n", + " {'role': 'system',\n", + " 'content': \"Student Profile:\\n- Name: Sarah Chen\\n- Background: Python developer, 2 years experience\\n- Completed Courses: RU101\\n- Interests: machine learning, data science, vector search\\n- Availability: evenings and weekends\\n- Goal: Build a RAG system for my company's documentation\\n\"},\n", + " {'role': 'user', 'content': 'What Redis course should I take next?'},\n", + " {'role': 'assistant',\n", + " 'content': 'Based on your Python background and completion of RU101, \\nI recommend RU201: Redis for Python Developers. This course will teach you \\nhow to build Redis applications using redis-py, which aligns perfectly with \\nyour goal of building a RAG system.'},\n", + " {'role': 'user', 'content': 'How long will that take me to complete?'},\n", + " {'role': 'system',\n", + " 'content': 'Course Details:\\nCode: RU201\\nTitle: Redis for Python Developers\\nLevel: Intermediate\\nDescription: Build production Redis applications with Python and redis-py\\nDuration: 6-8 hours\\nPrerequisites: RU101, Python experience\\nTopics Covered: redis-py library, Connection pooling, Pipelining, Pub/Sub\\n'}]" + ] + }, + "execution_count": 20, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "messages" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Making the Complete LLM Call" + ] + }, + { + "cell_type": "code", + "execution_count": 21, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "AI Response:\n", + "RU201: Redis for Python Developers will take you approximately 6 to 8 hours to complete. Since you can dedicate time during evenings and weekends, you can spread the course over a few sessions to make it manageable and absorb the material effectively. Enjoy your learning experience!\n" + ] + } + ], + "source": [ + "# Make the LLM call with complete context\n", + "response = client.chat.completions.create(\n", + " model=\"gpt-4o-mini\",\n", + " messages=messages,\n", + " temperature=0.7\n", + ")\n", + "\n", + "answer = response.choices[0].message.content\n", + "print(\"AI Response:\")\n", + "print(answer)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### What Just Happened?\n", + "\n", + "The LLM received all four context types and used them to generate a personalized response:\n", + "\n", + "1. **System Context** told it to act as a course advisor and provided course information\n", + "2. **User Context** gave it Sarah's background, interests, and goals\n", + "3. **Conversation Context** showed that \"that\" refers to RU201\n", + "4. **Retrieved Context** provided detailed information about RU201's duration and topics\n", + "\n", + "The result is a response that:\n", + "- Understands what course \"that\" refers to\n", + "- Considers Sarah's available time (evenings and weekends)\n", + "- Relates the duration to her specific situation\n", + "- Stays aligned with her goal of building a RAG system\n", + "\n", + "---" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Context Management Strategies\n", + "\n", + "Different situations require different approaches to context management. Let's explore three common strategies.\n", + "\n", + "### Strategy 1: New User (Minimal Context)\n", + "\n", + "**Scenario:** First-time user, no conversation history\n", + "\n", + "| Context Type | What to Include | Token Budget |\n", + "|--------------|-----------------|-------------|\n", + "| System | Full role definition and course catalog | 2,000 |\n", + "| User | Basic profile only (if available) | 500 |\n", + "| Conversation | Empty (new session) | 0 |\n", + "| Retrieved | General overview information | 1,000 |\n", + "| **Total** | | **3,500** |\n", + "\n", + "**Use when:**\n", + "- First interaction with a user\n", + "- No user history available\n", + "- Providing general guidance\n", + "\n", + "### Strategy 2: Returning User (Rich Context)\n", + "\n", + "**Scenario:** User with history, ongoing conversation\n", + "\n", + "| Context Type | What to Include | Token Budget |\n", + "|--------------|-----------------|-------------|\n", + "| System | Full role definition and course catalog | 2,000 |\n", + "| User | Complete profile + learning history | 1,500 |\n", + "| Conversation | Last 5-10 turns of dialogue | 3,000 |\n", + "| Retrieved | Personalized, highly relevant course details | 2,000 |\n", + "| **Total** | | **8,500** |\n", + "\n", + "**Use when:**\n", + "- User has established history\n", + "- Multi-turn conversation in progress\n", + "- Deep personalization is valuable\n", + "\n", + "### Strategy 3: Long Conversation (Optimized Context)\n", + "\n", + "**Scenario:** Approaching token limits, need to optimize\n", + "\n", + "| Context Type | What to Include | Token Budget |\n", + "|--------------|-----------------|-------------|\n", + "| System | Condensed role + essential rules only | 1,000 |\n", + "| User | Key profile facts only | 500 |\n", + "| Conversation | Summarized key decisions + last 3 turns | 2,000 |\n", + "| Retrieved | Only the most relevant details | 1,000 |\n", + "| **Total** | | **4,500** |\n", + "\n", + "**Use when:**\n", + "- Conversation has many turns\n", + "- Approaching context window limit\n", + "- Need to maintain performance\n", + "\n", + "### Implementing an Adaptive Strategy" + ] + }, + { + "cell_type": "code", + "execution_count": 22, + "metadata": {}, + "outputs": [], + "source": [ + "def build_context_adaptively(user_profile, conversation_history, query):\n", + " \"\"\"\n", + " Build context adaptively based on conversation length\n", + " \"\"\"\n", + " # Count conversation tokens (rough estimate)\n", + " conv_tokens = sum(len(msg['content'].split()) * 1.3 for msg in conversation_history)\n", + " \n", + " messages = []\n", + " \n", + " # Strategy selection based on conversation length\n", + " if len(conversation_history) == 0:\n", + " # New user - full system context\n", + " messages.append({\"role\": \"system\", \"content\": system_context})\n", + " if user_profile:\n", + " messages.append({\"role\": \"system\", \"content\": format_user_context(user_profile)})\n", + " \n", + " elif conv_tokens < 10000:\n", + " # Normal conversation - rich context\n", + " messages.append({\"role\": \"system\", \"content\": system_context})\n", + " messages.append({\"role\": \"system\", \"content\": format_user_context(user_profile)})\n", + " messages.extend(conversation_history)\n", + " \n", + " else:\n", + " # Long conversation - optimized context\n", + " # Use condensed system context\n", + " condensed_system = \"You are a Redis University course advisor. Help students choose appropriate courses.\"\n", + " messages.append({\"role\": \"system\", \"content\": condensed_system})\n", + " \n", + " # Include only key user facts\n", + " key_facts = f\"Student: {user_profile['name']}, {user_profile['background']}. Completed: {', '.join(user_profile['completed_courses'])}\"\n", + " messages.append({\"role\": \"system\", \"content\": key_facts})\n", + " \n", + " # Include only recent conversation history\n", + " messages.extend(conversation_history[-6:])\n", + " \n", + " # Always add retrieved context if relevant\n", + " # (In production, you'd determine relevance and retrieve accordingly)\n", + " \n", + " return messages" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "---\n", + "\n", + "## Best Practices for Context Engineering\n", + "\n", + "### 1. Start Simple, Add Complexity Gradually\n", + "\n", + "Don't try to build everything at once. Follow this progression:\n", + "\n", + "```python\n", + "# Phase 1: Basic agent with system context only\n", + "agent = BasicAgent(system_context)\n", + "\n", + "# Phase 2: Add user context\n", + "agent.set_user_profile(user_profile)\n", + "\n", + "# Phase 3: Add conversation history\n", + "agent.enable_conversation_memory()\n", + "\n", + "# Phase 4: Add retrieval\n", + "agent.add_retrieval_system(course_database)\n", + "```\n", + "\n", + "### 2. Measure Token Usage Continuously\n", + "\n", + "Always know your token consumption:" + ] + }, + { + "cell_type": "code", + "execution_count": 23, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Total tokens: 332\n", + "Percentage of 128K limit: 0.3%\n", + "\n", + "Breakdown:\n", + " system: 261 tokens (78.8%)\n", + " user: 20 tokens (5.9%)\n", + " assistant: 51 tokens (15.3%)\n" + ] + } + ], + "source": [ + "def estimate_tokens(text):\n", + " \"\"\"Rough token estimation (for planning purposes)\"\"\"\n", + " return len(text.split()) * 1.3\n", + "\n", + "def analyze_context_usage(messages):\n", + " \"\"\"Analyze token usage across context types\"\"\"\n", + " total_tokens = 0\n", + " breakdown = {}\n", + " \n", + " for msg in messages:\n", + " tokens = estimate_tokens(msg['content'])\n", + " total_tokens += tokens\n", + " \n", + " # Categorize by role\n", + " role = msg['role']\n", + " breakdown[role] = breakdown.get(role, 0) + tokens\n", + " \n", + " print(f\"Total tokens: {total_tokens:.0f}\")\n", + " print(f\"Percentage of 128K limit: {total_tokens/128000*100:.1f}%\")\n", + " print(\"\\nBreakdown:\")\n", + " for role, tokens in breakdown.items():\n", + " print(f\" {role}: {tokens:.0f} tokens ({tokens/total_tokens*100:.1f}%)\")\n", + "\n", + "# Analyze our context\n", + "analyze_context_usage(messages)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### 3. Optimize for Relevance, Not Completeness\n", + "\n", + "**Wrong approach:** Include everything you have\n", + "```python\n", + "# Bad: Including all 50 courses = 30,000 tokens\n", + "context = \"\\n\".join([format_course(c) for c in all_courses])\n", + "```\n", + "\n", + "**Right approach:** Include only what's relevant\n", + "```python\n", + "# Good: Including only relevant courses = 2,000 tokens\n", + "relevant_courses = search_courses(query, user_profile, limit=3)\n", + "context = \"\\n\".join([format_course(c) for c in relevant_courses])\n", + "```\n", + "\n", + "### 4. Use Clear, Structured Formatting\n", + "\n", + "LLMs perform better with well-structured context:\n", + "\n", + "```python\n", + "# Good structure\n", + "context = \"\"\"\n", + "ROLE: Course advisor for Redis University\n", + "\n", + "STUDENT PROFILE:\n", + "- Name: Sarah Chen\n", + "- Background: Python developer\n", + "- Completed: RU101\n", + "\n", + "RELEVANT COURSES:\n", + "- RU201: Redis for Python (6-8 hours)\n", + " Prerequisites: RU101, Python experience\n", + "\n", + "TASK: Recommend the best next course for this student.\n", + "\"\"\"\n", + "```\n", + "\n", + "### 5. Test Different Context Combinations\n", + "\n", + "Context engineering is empirical - always test:" + ] + }, + { + "cell_type": "code", + "execution_count": 24, + "metadata": { + "scrolled": true + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "Query: What course should I take next?\n", + "============================================================\n", + "\n", + "minimal strategy:\n", + "To provide you with the best recommendation, I would need to know a bit more about your current background. Specifically:\n", + "\n", + "1. Have you completed any of the available courses?\n", + "2. What level of programming experience do you have (Python, Java, etc.)?\n", + "3. Are you interested in machine learning or artificial intelligence?\n", + "4. What are your goals or what do you hope to achieve by taking the next course?\n", + "\n", + "Once I have this information, I can suggest the most suitable course for you!\n", + "\n", + "with_user strategy:\n", + "Hi Sarah!\n", + "\n", + "Given your background as a Python developer and the fact that you've already completed RU101, you're well-prepared to dive into the next level of Redis courses. Since you have an interest in machine learning and data science, as well as a goal to build a RAG (Retrieval-Augmented Generation) system for your company's documentation, I highly recommend you take **RU201: Redis for Python Developers**.\n", + "\n", + "This course will build on your existing knowledge from RU101 and will teach you how to effectively use Redis to create applications specifically with Python. This is perfect for your goals, as it will give you the necessary skills to leverage Redis in developing your RAG system.\n", + "\n", + "Once you complete RU201, you can then progress to **RU301\n", + "\n", + "with_retrieval strategy:\n", + "Based on your background as a Python developer with two years of experience, along with your completion of RU101, I highly recommend that you take **RU201: Redis for Python Developers**. \n", + "\n", + "This course is tailored for individuals with a grounding in Python who want to leverage Redis to build applications. Since you're interested in machine learning and data science, mastering Redis with Python will significantly enhance your ability to develop applications like a RAG system for your company's documentation.\n", + "\n", + "Taking RU201 will equip you with key concepts and the redis-py library, which are essential for efficiently working with Redis in your projects. With your evening and weekend availability, you should be able to complete the course within the estimated 6-8 hours.\n", + "\n", + "Once you've completed RU201, you'll\n", + "\n", + "Query: I want to learn about vector search\n", + "============================================================\n", + "\n", + "minimal strategy:\n", + "That's great! Vector search is an exciting and increasingly important topic, especially in the fields of machine learning and artificial intelligence. To get started with vector similarity search using Redis, you'll want to take RU301: Vector Similarity Search with Redis.\n", + "\n", + "Here are the prerequisites and reasoning for this recommendation:\n", + "\n", + "1. **Prerequisites**: You need to have completed either RU201 (Redis for Python Developers) or RU202 (Redis for Java Developers) before taking RU301. Both of these intermediate courses cover building Redis applications and will give you a strong foundation.\n", + "\n", + "2. **Interest in ML/AI**: Since you're interested in vector search, it's essential to have a background or understanding of machine learning or AI concepts, which RU301 will help you with by\n", + "\n", + "with_user strategy:\n", + "Hi Sarah! It's great to see your interest in vector search, especially since you're looking to build a RAG (Retrieve and Generate) system for your company's documentation.\n", + "\n", + "Given your background as a Python developer and that you've completed RU101, I recommend you take **RU201: Redis for Python Developers** next. This course will help you build Redis applications specifically with Python and teach you how to leverage Redis for your data storage needs. It's an important stepping stone before diving into advanced topics.\n", + "\n", + "Once you've completed RU201, you can then move on to **RU301: Vector Similarity Search with Redis**. This advanced course will delve into implementing semantic search and other techniques that are essential for your RAG system project.\n", + "\n", + "These courses align perfectly with your interests\n", + "\n", + "with_retrieval strategy:\n", + "Hi Sarah! It's fantastic to see your interest in learning about vector search, especially since you're aiming to build a RAG (Retrieval-Augmented Generation) system for your company's documentation. Given your background as a Python developer and your completion of RU101, the next step for you would be to enroll in **RU201: Redis for Python Developers**.\n", + "\n", + "### Hereโ€™s why RU201 is an excellent fit for you:\n", + "\n", + "1. **Prerequisites Met**: Youโ€™ve already completed RU101, and as a Python developer, you have the requisite experience to succeed in this course.\n", + "2. **Focused on Python**: This course specifically teaches you how to build Redis applications with Python, which aligns perfectly with your background.\n", + "3. **Prepare for Advanced\n", + "\n", + "Query: How long will it take to become Redis-proficient?\n", + "============================================================\n", + "\n", + "minimal strategy:\n", + "Becoming proficient in Redis can vary greatly depending on your current background, experience, and how much time you can dedicate to learning. Here's a general guideline based on the courses available:\n", + "\n", + "1. **RU101: Introduction to Redis Data Structures (Beginner, 4-6 hours)** - This foundational course will introduce you to basic Redis concepts and data structures. Completing this course is essential for starting your Redis journey.\n", + "\n", + "2. **RU201: Redis for Python Developers (Intermediate, 6-8 hours)** - If you have experience with Python, this course will build on your knowledge from RU101 and teach you how to integrate Redis into Python applications. This is a great next step if youโ€™re looking to apply Redis practically.\n", + "\n", + "3. **RU\n", + "\n", + "with_user strategy:\n", + "The time it takes to become proficient in Redis can vary depending on your prior knowledge, the complexity of the projects you want to undertake, and the time you can dedicate to learning. Given your background as a Python developer with two years of experience, you've already completed RU101, which gives you a solid foundation in Redis fundamentals.\n", + "\n", + "Hereโ€™s a suggested pathway to proficiency based on your profile:\n", + "\n", + "1. **RU101: Introduction to Redis Data Structures** - Youโ€™ve completed this course, which typically takes 4-6 hours.\n", + "\n", + "2. **RU201: Redis for Python Developers** - Since you have Python experience and have completed RU101, this intermediate course will further your skills in building applications with Redis. This course typically takes 6-8 hours\n", + "\n", + "with_retrieval strategy:\n", + "Becoming proficient in Redis can vary depending on your learning pace and dedication, but with your background and interests, here's a potential pathway based on the courses available:\n", + "\n", + "1. **RU101: Introduction to Redis Data Structures (Completed)** - You've already completed this foundational course, which covers the basic data structures in Redis.\n", + "\n", + "2. **RU201: Redis for Python Developers** - This intermediate course will take about 6-8 hours. Since you have 2 years of Python experience and have completed RU101, you're well-prepared to dive into this course. This will enhance your skills in building Redis applications specifically using Python.\n", + "\n", + "3. **RU301: Vector Similarity Search with Redis** - This advanced course (8-10 hours) requires completion\n" + ] + } + ], + "source": [ + "def test_context_strategies(user_profile, test_queries):\n", + " \"\"\"\n", + " Test different context strategies to find the best approach\n", + " \"\"\"\n", + " strategies = [\n", + " (\"minimal\", [\n", + " {\"role\": \"system\", \"content\": system_context}\n", + " ]),\n", + " (\"with_user\", [\n", + " {\"role\": \"system\", \"content\": system_context},\n", + " {\"role\": \"system\", \"content\": format_user_context(user_profile)}\n", + " ]),\n", + " (\"with_retrieval\", [\n", + " {\"role\": \"system\", \"content\": system_context},\n", + " {\"role\": \"system\", \"content\": format_user_context(user_profile)},\n", + " {\"role\": \"system\", \"content\": retrieved_context}\n", + " ])\n", + " ]\n", + " \n", + " for query in test_queries:\n", + " print(f\"\\nQuery: {query}\")\n", + " print(\"=\" * 60)\n", + " \n", + " for strategy_name, context_messages in strategies:\n", + " messages = context_messages + [{\"role\": \"user\", \"content\": query}]\n", + " \n", + " response = client.chat.completions.create(\n", + " model=\"gpt-4o-mini\",\n", + " messages=messages,\n", + " max_tokens=150\n", + " )\n", + " \n", + " print(f\"\\n{strategy_name} strategy:\")\n", + " print(response.choices[0].message.content)\n", + "\n", + "# Example usage (uncomment to run)\n", + "test_queries = [\n", + " \"What course should I take next?\",\n", + " \"I want to learn about vector search\",\n", + " \"How long will it take to become Redis-proficient?\"\n", + "]\n", + "test_context_strategies(sarah_profile, test_queries)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "**Example expected output:**\n", + "```\n", + "Query: What course should I take next?\n", + "============================================================\n", + "\n", + "minimal strategy:\n", + "To provide you with the best recommendation, I would need to know a bit more about your current background. Specifically:\n", + "\n", + "1. Have you completed any of the available courses?\n", + "2. What level of programming experience do you have (Python, Java, etc.)?\n", + "3. Are you interested in machine learning or artificial intelligence?\n", + "4. What are your goals or what do you hope to achieve by taking the next course?\n", + "\n", + "Once I have this information, I can suggest the most suitable course for you!\n", + "\n", + "with_user strategy:\n", + "Hi Sarah!\n", + "\n", + "Given your background as a Python developer and the fact that you've already completed RU101, you're well-prepared to dive into the next level of Redis courses. Since you have an interest in machine learning and data science, as well as a goal to build a RAG (Retrieval-Augmented Generation) system for your company's documentation, I highly recommend you take **RU201: Redis for Python Developers**.\n", + "\n", + "This course will build on your existing knowledge from RU101 and will teach you how to effectively use Redis to create applications specifically with Python. This is perfect for your goals, as it will give you the necessary skills to leverage Redis in developing your RAG system.\n", + "\n", + "Once you complete RU201, you can then progress to **RU301\n", + "\n", + "with_retrieval strategy:\n", + "Based on your background as a Python developer with two years of experience, along with your completion of RU101, I highly recommend that you take **RU201: Redis for Python Developers**. \n", + "\n", + "This course is tailored for individuals with a grounding in Python who want to leverage Redis to build applications. Since you're interested in machine learning and data science, mastering Redis with Python will significantly enhance your ability to develop applications like a RAG system for your company's documentation.\n", + "\n", + "Taking RU201 will equip you with key concepts and the redis-py library, which are essential for efficiently working with Redis in your projects. With your evening and weekend availability, you should be able to complete the course within the estimated 6-8 hours.\n", + "\n", + "Once you've completed RU201, you'll\n", + "\n", + "Query: I want to learn about vector search\n", + "============================================================\n", + "\n", + "minimal strategy:\n", + "That's great! Vector search is an exciting and increasingly important topic, especially in the fields of machine learning and artificial intelligence. To get started with vector similarity search using Redis, you'll want to take RU301: Vector Similarity Search with Redis.\n", + "\n", + "Here are the prerequisites and reasoning for this recommendation:\n", + "\n", + "1. **Prerequisites**: You need to have completed either RU201 (Redis for Python Developers) or RU202 (Redis for Java Developers) before taking RU301. Both of these intermediate courses cover building Redis applications and will give you a strong foundation.\n", + "\n", + "2. **Interest in ML/AI**: Since you're interested in vector search, it's essential to have a background or understanding of machine learning or AI concepts, which RU301 will help you with by\n", + "\n", + "with_user strategy:\n", + "Hi Sarah! It's great to see your interest in vector search, especially since you're looking to build a RAG (Retrieve and Generate) system for your company's documentation.\n", + "\n", + "Given your background as a Python developer and that you've completed RU101, I recommend you take **RU201: Redis for Python Developers** next. This course will help you build Redis applications specifically with Python and teach you how to leverage Redis for your data storage needs. It's an important stepping stone before diving into advanced topics.\n", + "\n", + "Once you've completed RU201, you can then move on to **RU301: Vector Similarity Search with Redis**. This advanced course will delve into implementing semantic search and other techniques that are essential for your RAG system project.\n", + "\n", + "These courses align perfectly with your interests\n", + "\n", + "with_retrieval strategy:\n", + "Hi Sarah! It's fantastic to see your interest in learning about vector search, especially since you're aiming to build a RAG (Retrieval-Augmented Generation) system for your company's documentation. Given your background as a Python developer and your completion of RU101, the next step for you would be to enroll in **RU201: Redis for Python Developers**.\n", + "\n", + "### Hereโ€™s why RU201 is an excellent fit for you:\n", + "\n", + "1. **Prerequisites Met**: Youโ€™ve already completed RU101, and as a Python developer, you have the requisite experience to succeed in this course.\n", + "2. **Focused on Python**: This course specifically teaches you how to build Redis applications with Python, which aligns perfectly with your background.\n", + "3. **Prepare for Advanced\n", + "\n", + "Query: How long will it take to become Redis-proficient?\n", + "============================================================\n", + "\n", + "minimal strategy:\n", + "Becoming proficient in Redis can vary greatly depending on your current background, experience, and how much time you can dedicate to learning. Here's a general guideline based on the courses available:\n", + "\n", + "1. **RU101: Introduction to Redis Data Structures (Beginner, 4-6 hours)** - This foundational course will introduce you to basic Redis concepts and data structures. Completing this course is essential for starting your Redis journey.\n", + "\n", + "2. **RU201: Redis for Python Developers (Intermediate, 6-8 hours)** - If you have experience with Python, this course will build on your knowledge from RU101 and teach you how to integrate Redis into Python applications. This is a great next step if youโ€™re looking to apply Redis practically.\n", + "\n", + "3. **RU\n", + "\n", + "with_user strategy:\n", + "The time it takes to become proficient in Redis can vary depending on your prior knowledge, the complexity of the projects you want to undertake, and the time you can dedicate to learning. Given your background as a Python developer with two years of experience, you've already completed RU101, which gives you a solid foundation in Redis fundamentals.\n", + "\n", + "Hereโ€™s a suggested pathway to proficiency based on your profile:\n", + "\n", + "1. **RU101: Introduction to Redis Data Structures** - Youโ€™ve completed this course, which typically takes 4-6 hours.\n", + "\n", + "2. **RU201: Redis for Python Developers** - Since you have Python experience and have completed RU101, this intermediate course will further your skills in building applications with Redis. This course typically takes 6-8 hours\n", + "\n", + "with_retrieval strategy:\n", + "Becoming proficient in Redis can vary depending on your learning pace and dedication, but with your background and interests, here's a potential pathway based on the courses available:\n", + "\n", + "1. **RU101: Introduction to Redis Data Structures (Completed)** - You've already completed this foundational course, which covers the basic data structures in Redis.\n", + "\n", + "2. **RU201: Redis for Python Developers** - This intermediate course will take about 6-8 hours. Since you have 2 years of Python experience and have completed RU101, you're well-prepared to dive into this course. This will enhance your skills in building Redis applications specifically using Python.\n", + "\n", + "3. **RU301: Vector Similarity Search with Redis** - This advanced course (8-10 hours) requires completion\n", + "```\n", + "\n", + "### Analyzing Context Strategy Results\n", + "\n", + "Let's analyze what happened when we tested the same queries with different amounts of context.\n", + "\n", + "#### What We Observed\n", + "\n", + "**Query 1: \"What course should I take next?\"**\n", + "\n", + "- **Minimal (system only):** Asked clarifying questions - \"What's your background? What are your goals?\"\n", + "- **With user context:** Immediately recommended RU201 based on Sarah's Python background and completed RU101\n", + "- **With retrieval:** Same recommendation PLUS specific course details (duration, topics) for better decision-making\n", + "\n", + "**Query 2: \"I want to learn about vector search\"**\n", + "\n", + "- **Minimal:** Suggested RU301 but couldn't verify if prerequisites were met\n", + "- **With user context:** Built a learning path (RU201 โ†’ RU301) based on what Sarah already completed\n", + "- **With retrieval:** Same path with detailed justification for each step\n", + "\n", + "**Query 3: \"How long will it take to become Redis-proficient?\"**\n", + "\n", + "- **Minimal:** Listed all courses but repeated RU101 (which Sarah already finished)\n", + "- **With user context:** Calculated time starting from RU201, acknowledging completed work\n", + "- **With retrieval:** Most accurate timeline with specific hours per course\n", + "\n", + "---\n", + "\n", + "### Key Insights\n", + "\n", + "**1. System Context Alone = Generic Bot**\n", + "- Must ask follow-up questions\n", + "- Can't personalize\n", + "- Wastes user time with back-and-forth\n", + "\n", + "**2. Adding User Context = Personal Assistant**\n", + "- Knows who you are\n", + "- Skips unnecessary questions\n", + "- Tailors recommendations instantly\n", + "\n", + "**3. Adding Retrieved Context = Expert Advisor**\n", + "- Provides specific details (hours, topics, prerequisites)\n", + "- Makes responses actionable\n", + "- Gives users everything needed to decide\n", + "\n", + "---\n", + "\n", + "### The Pattern\n", + "```\n", + "More Context = Less Back-and-Forth = Better Experience\n", + "\n", + "Minimal: User asks โ†’ AI asks clarifying questions โ†’ User answers โ†’ AI responds\n", + " (3-4 interactions to get an answer)\n", + "\n", + "Rich: User asks โ†’ AI responds with personalized, detailed answer\n", + " (1 interaction - done)\n", + "```\n", + "\n", + "---\n", + "\n", + "### When to Use Each Strategy\n", + "\n", + "| Strategy | Best For | Example |\n", + "|----------|----------|---------|\n", + "| **Minimal** | New users, no history available | First-time visitor to your site |\n", + "| **With User** | Returning users, simple queries | \"What should I do next?\" |\n", + "| **With Retrieval** | Complex decisions, detailed planning | \"Plan my learning path for the year\" |\n", + "\n", + "---\n", + "\n", + "### What This Means for Production\n", + "\n", + "**The Right Context Strategy Depends On:**\n", + "\n", + "1. **Do you have user history?**\n", + " - Yes โ†’ Include user context\n", + " - No โ†’ Use minimal, ask questions\n", + "\n", + "2. **Is the query complex?**\n", + " - Yes โ†’ Retrieve specific details\n", + " - No โ†’ User context might be enough\n", + "\n", + "3. **Are you near token limits?**\n", + " - Yes โ†’ Switch to minimal or summarize\n", + " - No โ†’ Use rich context\n", + "\n", + "**Simple Rule:** Start with rich context (all four types). Only reduce when you hit token limits or lack data.\n", + "\n", + "---\n", + "\n", + "### Action Items\n", + "\n", + "Based on this test, you should:\n", + "\n", + "1. **Always include user context** when available (massive quality improvement, low token cost)\n", + "2. **Retrieve context dynamically** based on what the query asks about (don't retrieve RU201 details for every question)\n", + "3. **Monitor token usage** - several responses were cut off at 150 tokens\n", + "4. **Test with your own use case** - Run this experiment with your domain and queries\n", + "\n", + "**Bottom Line:** More relevant context = better responses. The challenge is determining what's \"relevant\" and managing token budgets." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "---\n", + "## ๐Ÿ“š Course Summary: What You've Learned\n", + "\n", + "Congratulations! You've completed Chapter 1: Foundations of Context Engineering. Let's recap your journey.\n", + "\n", + "### From Notebook 01: Why Context Engineering Matters\n", + "\n", + "You discovered the fundamental problem that context engineering solves:\n", + "\n", + "**The Core Problem:**\n", + "- AI agents without context are like doctors without medical records - they can't remember, personalize, or maintain coherent interactions\n", + "- This leads to frustrated users, operational inefficiency, and limited AI capabilities\n", + "\n", + "**The Impact:**\n", + "- You saw the dramatic difference between context-less and context-aware AI through the university advisor example\n", + "- Without context: repetitive, generic, frustrating interactions\n", + "- With context: personalized, coherent, valuable assistance\n", + "\n", + "**The Four Context Types:**\n", + "You learned the foundational framework:\n", + "1. **System Context** - \"What am I?\" (Role, rules, domain knowledge)\n", + "2. **User Context** - \"Who are you?\" (Profile, preferences, history)\n", + "3. **Conversation Context** - \"What have we discussed?\" (Dialogue flow)\n", + "4. **Retrieved Context** - \"What information is relevant?\" (On-demand data)\n", + "\n", + "**The Fundamental Constraint:**\n", + "- Every AI model has a context window limit (e.g., 128K tokens)\n", + "- Every token spent on one type of context is unavailable for another\n", + "- Context engineering is optimization within constraints\n", + "\n", + "**Real-World Importance:**\n", + "- Customer support, healthcare, sales, research - all require proper context management\n", + "- Poor context management has measurable business impact: 40-60% abandonment rates, 3-5x more interactions needed, high escalation rates\n", + "\n", + "### From Notebook 02: How to Implement Context Engineering\n", + "\n", + "You mastered the practical implementation:\n", + "\n", + "**Hands-On Skills Acquired:**\n", + "\n", + "1. **Building System Context**\n", + " - How to define AI role and identity\n", + " - Structuring domain knowledge effectively\n", + " - Writing clear behavioral guidelines\n", + " - Understanding static vs. dynamic information\n", + "\n", + "2. **Creating User Context**\n", + " - Storing user profiles as structured data\n", + " - Formatting user information for LLMs\n", + " - Personalizing responses based on user attributes\n", + " - Seeing how different users get different context\n", + "\n", + "3. **Managing Conversation Context**\n", + " - Maintaining dialogue history across turns\n", + " - Enabling natural reference resolution (\"that course\")\n", + " - Building coherent multi-turn conversations\n", + " - Strategies for handling long conversations\n", + "\n", + "4. **Retrieving Dynamic Context**\n", + " - Fetching relevant information on-demand\n", + " - Query-specific data retrieval\n", + " - Optimizing for relevance vs. completeness\n", + " - Simulating database and search operations\n", + "\n", + "**Integration Mastery:**\n", + "- You learned how to combine all four context types into a single LLM call\n", + "- You saw the complete message array structure that makes intelligent responses possible\n", + "- You understood how each context type contributes to the final response quality\n", + "\n", + "**Strategic Thinking:**\n", + "You explored three context management strategies:\n", + "- **Minimal Context** - For new users with no history\n", + "- **Rich Context** - For returning users with established profiles\n", + "- **Optimized Context** - For long conversations near token limits\n", + "\n", + "**Best Practices:**\n", + "1. Start simple, add complexity gradually\n", + "2. Measure token usage continuously\n", + "3. Optimize for relevance, not completeness\n", + "4. Use clear, structured formatting\n", + "5. Test and iterate based on results\n", + "\n", + "### What You Can Do Now\n", + "\n", + "After completing these two notebooks, you have the foundational skills to:\n", + "\n", + " - **Understand** why context engineering is critical for production AI systems \n", + " - **Identify** which context type to use for different information \n", + " - **Build** context-aware AI agents from scratch \n", + " - **Format** context appropriately for LLM consumption \n", + " - **Combine** multiple context sources into coherent requests \n", + " - **Optimize** token usage within context window constraints \n", + " - **Adapt** context strategies based on user type and conversation length \n", + " - **Implement** the Redis University course advisor pattern for your own domain \n", + "\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## ๐Ÿค” What's Next?\n", + "\n", + "In the next section, you'll dive deeper into advanced techniques:\n", + "\n", + "**Section 2: RAG Foundations**\n", + "- Vector similarity search with Redis\n", + "- Building production RAG systems with LangChain and LangGraph\n", + "- Semantic retrieval strategies\n", + "- Hybrid search approaches\n", + "- Optimizing retrieval performance\n", + "\n", + "**Section 3: Memory Systems for Context Engineering**\n", + "- Long-term memory systems with Redis Agent Memory Server\n", + "- Working memory vs. long-term memory patterns\n", + "- Memory summarization and compression\n", + "- Multi-agent memory coordination\n", + "\n", + "**Section 4: Production Optimization**\n", + "- Context compression techniques\n", + "- Caching strategies\n", + "- Performance monitoring\n", + "- Cost optimization\n", + "\n", + "---\n", + "\n", + "## ๐Ÿ“š Additional Resources\n", + "\n", + "### **Context Engineering Fundamentals**\n", + "- [Prompt Engineering Guide](https://www.promptingguide.ai/) - Comprehensive guide to prompt engineering\n", + "- [OpenAI Prompt Engineering Guide](https://platform.openai.com/docs/guides/prompt-engineering) - Best practices\n", + "- [Anthropic's Guide to Building Effective Agents](https://www.anthropic.com/research/building-effective-agents) - Agent design patterns\n", + "\n", + "### **LLM Context Management**\n", + "- [LangChain Documentation](https://python.langchain.com/docs/get_started/introduction) - Framework for context-aware applications\n", + "- [Context Window Management](https://help.openai.com/en/articles/4936856-what-are-tokens-and-how-to-count-them) - Understanding token limits\n", + "- [OpenAI API Reference](https://platform.openai.com/docs/api-reference) - Complete API documentation\n", + "\n", + "### **Academic Papers and Technical Reports**\n", + "- [In-Context Learning Survey](https://arxiv.org/abs/2301.00234) - Research on how LLMs use context\n", + "- [Retrieval-Augmented Generation](https://arxiv.org/abs/2005.11401) - Foundational RAG paper\n", + "- [Lost in the Middle](https://arxiv.org/abs/2307.03172) - How LLMs use long contexts\n", + "- [Context Rot](https://github.com/chroma-core/context-rot?tab=readme-ov-file) - How Increasing Input Tokens Impacts LLM Performance\n", + "\n", + "### **Redis Resources**\n", + "- [Redis Documentation](https://redis.io/docs/) - Official Redis documentation\n", + "- [Redis University](https://university.redis.com/) - Free Redis courses\n", + "- [Redis Python Client](https://redis-py.readthedocs.io/) - redis-py documentation\n", + "\n", + "---" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.12.6" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} diff --git a/python-recipes/context-engineering/notebooks/section-2-retrieved-context-engineering/01_engineering_retrieved_context_with_rag.ipynb b/python-recipes/context-engineering/notebooks/section-2-retrieved-context-engineering/01_engineering_retrieved_context_with_rag.ipynb new file mode 100644 index 00000000..7f5571a6 --- /dev/null +++ b/python-recipes/context-engineering/notebooks/section-2-retrieved-context-engineering/01_engineering_retrieved_context_with_rag.ipynb @@ -0,0 +1,2048 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "f38f7a74133d584d", + "metadata": {}, + "source": [ + "![Redis](https://redis.io/wp-content/uploads/2024/04/Logotype.svg?auto=webp&quality=85,75&width=120)\n", + "\n", + "# Engineering Retrieved Context with RAG\n", + "\n", + "## From Context Engineering to Retrieval-Augmented Generation\n", + "\n", + "In Section 1, you learned about the four core context types:\n", + "1. **System Context** - The AI's role and domain knowledge\n", + "2. **User Context** - Personal profiles and preferences \n", + "3. **Conversation Context** - Dialogue history and flow\n", + "4. **Retrieved Context** - Dynamic information from external sources\n", + "\n", + "This notebook focuses on **Retrieved Context** - the most powerful and complex context type. You'll learn how to build a production-ready RAG (Retrieval-Augmented Generation) system that dynamically fetches relevant information to enhance AI responses.\n", + "\n", + "## What You'll Learn\n", + "\n", + "**RAG Fundamentals:**\n", + "- What RAG is and why it's essential for context engineering\n", + "- How vector embeddings enable semantic search\n", + "- Building a complete RAG pipeline with LangChain and Redis\n", + "\n", + "**Practical Implementation:**\n", + "- Generate and ingest course data using existing utilities\n", + "- Set up Redis vector store for semantic search\n", + "- Implement retrieval and generation workflows\n", + "- Combine retrieved context with user and system context\n", + "\n", + "**Foundation for Advanced Topics:**\n", + "- This RAG system becomes the base for Section 3 (Memory Systems for Context Engineering)\n", + "- You'll add LangGraph state management and tools in later sections\n", + "- Focus here is purely on retrieval โ†’ context assembly โ†’ generation\n", + "\n", + "**Time to complete:** 30-35 minutes\n", + "\n", + "---" + ] + }, + { + "cell_type": "markdown", + "id": "c32f737633a8079d", + "metadata": {}, + "source": [ + "## Why RAG Matters for Context Engineering\n", + "\n", + "### The Challenge: Static vs. Dynamic Knowledge\n", + "\n", + "In Section 1, we used **hardcoded** course information in the system context:\n", + "\n", + "```python\n", + "system_context = \"\"\"You are a Redis University course advisor.\n", + "\n", + "Available Courses:\n", + "- RU101: Introduction to Redis (Beginner, 4-6 hours)\n", + "- RU201: Redis for Python (Intermediate, 6-8 hours)\n", + "...\n", + "\"\"\"\n", + "```\n", + "\n", + "**Problems with this approach:**\n", + "- โŒ **Doesn't scale** - Can't hardcode thousands of courses\n", + "- โŒ **Wastes tokens** - Includes irrelevant courses in every request\n", + "- โŒ **Hard to update** - Requires code changes to add/modify courses\n", + "- โŒ **No personalization** - Same courses shown to everyone\n", + "\n", + "### The Solution: Retrieval-Augmented Generation (RAG)\n", + "\n", + "RAG solves these problems by **dynamically retrieving** only the most relevant information:\n", + "\n", + "```\n", + "User Query: \"I want to learn about vector search\"\n", + " โ†“\n", + "Semantic Search: Find courses matching \"vector search\"\n", + " โ†“\n", + "Retrieved Context: RU301 - Vector Similarity Search with Redis\n", + " โ†“\n", + "LLM Generation: Personalized recommendation using retrieved context\n", + "```\n", + "\n", + "**Benefits:**\n", + "- โœ… **Scales infinitely** - Store millions of documents\n", + "- โœ… **Token efficient** - Only retrieve what's relevant\n", + "- โœ… **Easy to update** - Add/modify data without code changes\n", + "- โœ… **Personalized** - Different results for different queries\n", + "\n", + "### RAG as \"Retrieved Context\" from Section 1\n", + "\n", + "Remember the four context types? RAG is how we implement **Retrieved Context** in production:\n", + "\n", + "| Context Type | Storage | Retrieval Method | Example |\n", + "|--------------|---------|------------------|---------|\n", + "| System Context | Hardcoded | Always included | AI role, instructions |\n", + "| User Context | Database | User ID lookup | Student profile |\n", + "| Conversation Context | Session store | Session ID lookup | Chat history |\n", + "| **Retrieved Context** | **Vector DB** | **Search** | **Relevant courses** |\n", + "\n", + "---" + ] + }, + { + "cell_type": "markdown", + "id": "6199337174405d39", + "metadata": {}, + "source": [ + "## Setup and Environment\n", + "\n", + "Let's prepare our environment with the necessary dependencies." + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "id": "7b8643051fbc09a2", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "โœ… Environment variables loaded\n", + " REDIS_URL: redis://localhost:6379\n", + " OPENAI_API_KEY: โœ“ Set\n" + ] + } + ], + "source": [ + "import os\n", + "import sys\n", + "from dotenv import load_dotenv\n", + "\n", + "# Load environment variables\n", + "load_dotenv()\n", + "\n", + "# Verify required environment variables\n", + "required_vars = [\"OPENAI_API_KEY\"]\n", + "missing_vars = [var for var in required_vars if not os.getenv(var)]\n", + "\n", + "if missing_vars:\n", + " print(f\"\"\"\n", + "โš ๏ธ Missing required environment variables: {', '.join(missing_vars)}\n", + "\n", + "Please create a .env file with:\n", + "OPENAI_API_KEY=your_openai_api_key\n", + "REDIS_URL=redis://localhost:6379\n", + "\n", + "For Redis setup:\n", + "- Local: docker run -d -p 6379:6379 redis/redis-stack-server:latest\n", + "- Cloud: https://redis.com/try-free/\n", + "\"\"\")\n", + " sys.exit(1)\n", + "REDIS_URL='redis://localhost:6379'\n", + "print(\"โœ… Environment variables loaded\")\n", + "print(f\" REDIS_URL: {REDIS_URL}\")\n", + "print(f\" OPENAI_API_KEY: {'โœ“ Set' if os.getenv('OPENAI_API_KEY') else 'โœ— Not set'}\")" + ] + }, + { + "cell_type": "markdown", + "id": "c09c113f31cc9237", + "metadata": {}, + "source": [ + "### Install Dependencies\n", + "\n", + "We'll use LangChain for RAG orchestration and Redis for vector storage." + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "a604197ba5bed3c", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "โœ… Dependencies ready\n" + ] + } + ], + "source": [ + "# Install required packages (uncomment if needed)\n", + "# %pip install -q langchain langchain-openai langchain-redis redisvl redis python-dotenv\n", + "\n", + "print(\"โœ… Dependencies ready\")" + ] + }, + { + "cell_type": "markdown", + "id": "aa253a5a5fea56a", + "metadata": {}, + "source": [ + "---\n", + "\n", + "## ๐Ÿ“Š Step 1: Understanding Vector Embeddings\n", + "\n", + "Before building our RAG system, let's understand the core concept: **vector embeddings**.\n", + "\n", + "### What Are Embeddings?\n", + "\n", + "Embeddings convert text into numerical vectors that capture semantic meaning:\n", + "\n", + "```\n", + "Text: \"Introduction to Redis\"\n", + " โ†“ (embedding model)\n", + "Vector: [0.23, -0.45, 0.67, ..., 0.12] # 1536 dimensions for OpenAI\n", + "```\n", + "\n", + "**Key insight:** Similar texts have similar vectors (measured by cosine similarity).\n", + "\n", + "### Why Embeddings Enable Semantic Search\n", + "\n", + "Traditional keyword search:\n", + "- Query: \"machine learning courses\" \n", + "- Matches: Only documents containing exact words \"machine learning\"\n", + "- Misses: \"AI courses\", \"neural network classes\", \"deep learning programs\"\n", + "\n", + "Semantic search with embeddings:\n", + "- Query: \"machine learning courses\"\n", + "- Matches: All semantically similar content (AI, neural networks, deep learning, etc.)\n", + "- Works across synonyms, related concepts, and different phrasings\n", + "\n", + "Let's see this in action:" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "f78bfe047e37e3fe", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "โœ… Generated embeddings for 3 texts\n", + " Vector dimensions: 1536\n", + " First vector preview: [-0.030, -0.013, 0.001, ...]\n" + ] + } + ], + "source": [ + "from langchain_openai import OpenAIEmbeddings\n", + "\n", + "# Initialize embedding model\n", + "embeddings = OpenAIEmbeddings(model=\"text-embedding-3-small\")\n", + "\n", + "# Generate embeddings for similar and different texts\n", + "texts = [\n", + " \"Introduction to machine learning and neural networks\",\n", + " \"Learn about AI and deep learning fundamentals\", \n", + " \"Database administration and SQL queries\",\n", + "]\n", + "\n", + "# Get embeddings (this calls OpenAI API)\n", + "vectors = embeddings.embed_documents(texts)\n", + "\n", + "print(f\"โœ… Generated embeddings for {len(texts)} texts\")\n", + "print(f\" Vector dimensions: {len(vectors[0])}\")\n", + "print(f\" First vector preview: [{vectors[0][0]:.3f}, {vectors[0][1]:.3f}, {vectors[0][2]:.3f}, ...]\")" + ] + }, + { + "cell_type": "markdown", + "id": "8987e7214633221", + "metadata": {}, + "source": [ + "### Measuring Semantic Similarity\n", + "\n", + "Let's calculate cosine similarity to see which texts are semantically related:" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "7963a05e261c914c", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Semantic Similarity Scores (0=unrelated, 1=identical):\n", + " ML vs AI: 0.623 โ† High similarity (related topics)\n", + " ML vs Database: 0.171 โ† Low similarity (different topics)\n", + " AI vs Database: 0.177 โ† Low similarity (different topics)\n" + ] + } + ], + "source": [ + "import numpy as np\n", + "\n", + "def cosine_similarity(vec1, vec2):\n", + " \"\"\"Calculate cosine similarity between two vectors.\"\"\"\n", + " return np.dot(vec1, vec2) / (np.linalg.norm(vec1) * np.linalg.norm(vec2))\n", + "\n", + "# Compare similarities\n", + "sim_1_2 = cosine_similarity(vectors[0], vectors[1]) # ML vs AI (related)\n", + "sim_1_3 = cosine_similarity(vectors[0], vectors[2]) # ML vs Database (unrelated)\n", + "sim_2_3 = cosine_similarity(vectors[1], vectors[2]) # AI vs Database (unrelated)\n", + "\n", + "print(\"Semantic Similarity Scores (0=unrelated, 1=identical):\")\n", + "print(f\" ML vs AI: {sim_1_2:.3f} โ† High similarity (related topics)\")\n", + "print(f\" ML vs Database: {sim_1_3:.3f} โ† Low similarity (different topics)\")\n", + "print(f\" AI vs Database: {sim_2_3:.3f} โ† Low similarity (different topics)\")" + ] + }, + { + "cell_type": "markdown", + "id": "830004ddb2bd656b", + "metadata": {}, + "source": [ + "**๐Ÿ’ก Key Takeaway:** Embeddings capture semantic meaning, allowing us to find relevant information even when exact keywords don't match.\n", + "\n", + "---" + ] + }, + { + "cell_type": "markdown", + "id": "be16970c9b44fcec", + "metadata": {}, + "source": [ + "## ๐Ÿ“š Step 2: Generate Course Data\n", + "\n", + "Now let's create realistic course data for our RAG system. We'll use the existing utilities from the reference agent.\n", + "\n", + "### Understanding the Course Generation Script\n", + "\n", + "The `generate_courses.py` script creates realistic course data with:\n", + "- Multiple majors (CS, Data Science, Math, Business, Psychology)\n", + "- Course templates with descriptions, prerequisites, schedules\n", + "- Realistic metadata (instructors, enrollment, difficulty levels)\n", + "\n", + "Let's generate our course catalog:" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "d63e217969956023", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "๐Ÿ“š Generating course catalog...\n", + "\n", + "โœ… Generated 5 majors:\n", + " - Computer Science (CS)\n", + " - Data Science (DS)\n", + " - Mathematics (MATH)\n", + " - Business Administration (BUS)\n", + " - Psychology (PSY)\n", + "\n", + "โœ… Generated 50 courses\n", + "\n", + "Sample Course:\n", + " Code: CS001\n", + " Title: Introduction to Programming\n", + " Department: Computer Science\n", + " Difficulty: beginner\n", + " Credits: 3\n", + " Description: Fundamental programming concepts using Python. Variables, control structures, functions, and basic d...\n", + "\n" + ] + } + ], + "source": [ + "# IGNORE: Add reference-agent to Python path because I installed reference-agent with pip\n", + "# IGNORE: sys.path.insert(0, os.path.join(os.getcwd(), 'python-recipes/context-engineering/reference-agent'))\n", + "\n", + "from redis_context_course.scripts.generate_courses import CourseGenerator\n", + "\n", + "# Initialize generator with a seed for reproducibility\n", + "import random\n", + "random.seed(42)\n", + "\n", + "# Create generator\n", + "generator = CourseGenerator()\n", + "\n", + "print(\"๐Ÿ“š Generating course catalog...\")\n", + "print()\n", + "\n", + "# Generate majors\n", + "majors = generator.generate_majors()\n", + "print(f\"โœ… Generated {len(majors)} majors:\")\n", + "for major in majors:\n", + " print(f\" - {major.name} ({major.code})\")\n", + "\n", + "print()\n", + "\n", + "# Generate courses (10 per major)\n", + "courses = generator.generate_courses(courses_per_major=10)\n", + "print(f\"โœ… Generated {len(courses)} courses\")\n", + "\n", + "# Show a sample course\n", + "sample_course = courses[0]\n", + "print(f\"\"\"\n", + "Sample Course:\n", + " Code: {sample_course.course_code}\n", + " Title: {sample_course.title}\n", + " Department: {sample_course.department}\n", + " Difficulty: {sample_course.difficulty_level.value}\n", + " Credits: {sample_course.credits}\n", + " Description: {sample_course.description[:100]}...\n", + "\"\"\")" + ] + }, + { + "cell_type": "markdown", + "id": "e95cd4b02364b072", + "metadata": {}, + "source": [ + "### Save Course Catalog to JSON\n", + "\n", + "Let's save this data so we can ingest it into Redis:" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "id": "35eb083f18863411", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Generated 5 majors and 50 courses\n", + "Data saved to course_catalog_section2.json\n", + "โœ… Course catalog saved to course_catalog_section2.json\n", + " Ready for ingestion into Redis vector store\n" + ] + } + ], + "source": [ + "catalog_file = \"course_catalog_section2.json\"\n", + "generator.save_to_json(catalog_file)\n", + "\n", + "print(f\"โœ… Course catalog saved to {catalog_file}\")\n", + "print(f\" Ready for ingestion into Redis vector store\")" + ] + }, + { + "cell_type": "markdown", + "id": "c15d309043a79486", + "metadata": {}, + "source": [ + "---\n", + "\n", + "## ๐Ÿ”ง Step 3: Set Up Redis Vector Store\n", + "\n", + "Now we'll configure Redis to store our course embeddings and enable semantic search.\n", + "\n", + "### Understanding Redis Vector Search\n", + "\n", + "Redis Stack provides vector similarity search capabilities:\n", + "- **Storage:** Courses stored as Redis hashes with vector fields\n", + "- **Indexing:** Vector index for fast similarity search (HNSW algorithm)\n", + "- **Search:** Find top-k most similar courses to a query vector using cosine similarity\n", + "\n", + "### Using the Reference Agent Utilities\n", + "\n", + "Instead of configuring Redis from scratch, we'll use the **production-ready utilities** from the reference agent. These utilities are already configured and tested, allowing you to focus on context engineering concepts rather than Redis configuration details." + ] + }, + { + "cell_type": "markdown", + "id": "429acdaadabaa392", + "metadata": {}, + "source": [ + "### Import Redis Configuration\n", + "\n", + "Let's import the pre-configured Redis setup:\n", + "\n", + "What we're importing:\n", + " - redis_config: A global singleton that manages all Redis connections\n", + "\n", + "What it provides (lazy-initialized properties):\n", + " - redis_config.redis_client: Redis connection for data storage\n", + " - redis_config.embeddings: OpenAI embeddings (text-embedding-3-small)\n", + " - redis_config.vector_index: RedisVL SearchIndex with pre-configured schema\n", + " - redis_config.checkpointer: RedisSaver for LangGraph (used in Section 3)\n", + "\n", + "Why use this:\n", + " - Production-ready configuration (same as reference agent)\n", + " - Proper schema with all course metadata fields\n", + " - Vector field: 1536 dims, cosine distance, HNSW algorithm\n", + " - No boilerplate - just import and use" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "id": "64b05a2a034da925", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "โœ… Redis configuration imported\n", + " Redis URL: redis://localhost:6379\n", + " Vector index name: course_catalog\n" + ] + } + ], + "source": [ + "from redis_context_course.redis_config import redis_config\n", + "\n", + "print(\"โœ… Redis configuration imported\")\n", + "print(f\" Redis URL: {redis_config.redis_url}\")\n", + "print(f\" Vector index name: {redis_config.vector_index_name}\")" + ] + }, + { + "cell_type": "markdown", + "id": "93784287e000173d", + "metadata": {}, + "source": [ + "### Test Redis Connection\n", + "\n", + "Let's verify Redis is running and accessible:" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "id": "7c2f11887561871f", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "โœ… Connected to Redis\n", + " Redis is healthy and ready\n" + ] + } + ], + "source": [ + "# Test connection using built-in health check\n", + "if redis_config.health_check():\n", + " print(\"โœ… Connected to Redis\")\n", + " print(f\" Redis is healthy and ready\")\n", + "else:\n", + " print(\"โŒ Redis connection failed\")\n", + " print(\" Make sure Redis is running:\")\n", + " print(\" - Local: docker run -d -p 6379:6379 redis/redis-stack-server:latest\")\n", + " print(\" - Cloud: https://redis.com/try-free/\")\n", + " sys.exit(1)" + ] + }, + { + "cell_type": "markdown", + "id": "154a875022180c9f", + "metadata": {}, + "source": [ + "### Initialize Course Manager\n", + "\n", + "Now let's import the `CourseManager` - this handles all course operations, such as storage, retrieval, and search:\n", + "\n", + "What it provides:\n", + " - store_course(): Store a course with vector embedding\n", + " - search_courses(): Semantic search with filters\n", + " - get_course(): Retrieve course by ID\n", + " - get_course_by_code(): Retrieve course by course code\n", + " - recommend_courses(): Generate personalized recommendations\n", + "\n", + "How it works:\n", + " - Uses redis_config for connections (redis_client, vector_index, embeddings)\n", + " - Automatically generates embeddings from course content\n", + " - Uses RedisVL's VectorQuery for semantic search\n", + " - Supports metadata filters (department, difficulty, format, etc.)\n", + "\n", + "Why use this:\n", + " - Encapsulates all Redis/RedisVL complexity\n", + " - Same code used in reference agent (Sections 3 & 4)\n", + " - Focus on RAG concepts, not Redis implementation details" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "id": "f89de1e20794eda1", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "โœ… Course manager initialized\n", + " Ready for course storage and search\n", + " Using RedisVL for vector operations\n" + ] + } + ], + "source": [ + "from redis_context_course.course_manager import CourseManager\n", + "\n", + "# Initialize course manager\n", + "course_manager = CourseManager()\n", + "\n", + "print(\"โœ… Course manager initialized\")\n", + "print(f\" Ready for course storage and search\")\n", + "print(f\" Using RedisVL for vector operations\")" + ] + }, + { + "cell_type": "markdown", + "id": "fa59e20137321967", + "metadata": {}, + "source": [ + "---\n", + "\n", + "## ๐Ÿ“ฅ Step 4: Ingest Courses into Redis\n", + "\n", + "Now we'll load our course catalog into Redis with vector embeddings for semantic search.\n", + "\n", + "### Understanding the Ingestion Process\n", + "\n", + "The ingestion pipeline:\n", + "1. **Load** course data from JSON\n", + "2. **Generate embeddings** for each course (title + description + tags)\n", + "3. **Store** in Redis with metadata for filtering\n", + "4. **Index** vectors for fast similarity search\n", + "\n", + "Let's use the existing ingestion utilities:" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "id": "85ccf2cb80ad5e05", + "metadata": { + "scrolled": true + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "๐Ÿš€ Starting course ingestion...\n", + "\n" + ] + }, + { + "data": { + "text/html": [ + "
๐Ÿš€ Starting Course Catalog Ingestion\n",
+       "
\n" + ], + "text/plain": [ + "\u001B[1;34m๐Ÿš€ Starting Course Catalog Ingestion\u001B[0m\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
โœ… Redis connection successful\n",
+       "
\n" + ], + "text/plain": [ + "\u001B[32mโœ… Redis connection successful\u001B[0m\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
๐Ÿงน Clearing existing data...\n",
+       "
\n" + ], + "text/plain": [ + "\u001B[33m๐Ÿงน Clearing existing data\u001B[0m\u001B[33m...\u001B[0m\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
โœ… Data cleared successfully\n",
+       "
\n" + ], + "text/plain": [ + "\u001B[32mโœ… Data cleared successfully\u001B[0m\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
โœ… Loaded catalog from course_catalog_section2.json\n",
+       "
\n" + ], + "text/plain": [ + "\u001B[32mโœ… Loaded catalog from course_catalog_section2.json\u001B[0m\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
   Majors: 5\n",
+       "
\n" + ], + "text/plain": [ + " Majors: \u001B[1;36m5\u001B[0m\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
   Courses: 50\n",
+       "
\n" + ], + "text/plain": [ + " Courses: \u001B[1;36m50\u001B[0m\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "210b0d21357e488a8107aba0bf28ee38", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "Output()" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n"
+      ],
+      "text/plain": []
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "text/html": [
+       "
โœ… Ingested 5 majors\n",
+       "
\n" + ], + "text/plain": [ + "\u001B[32mโœ… Ingested \u001B[0m\u001B[1;32m5\u001B[0m\u001B[32m majors\u001B[0m\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "d6a3f7f8bc1b482985ae85864abdcc2e", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "Output()" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "00:33:51 httpx INFO HTTP Request: POST https://api.openai.com/v1/embeddings \"HTTP/1.1 200 OK\"\n", + "00:33:52 httpx INFO HTTP Request: POST https://api.openai.com/v1/embeddings \"HTTP/1.1 200 OK\"\n", + "00:33:52 httpx INFO HTTP Request: POST https://api.openai.com/v1/embeddings \"HTTP/1.1 200 OK\"\n", + "00:33:53 httpx INFO HTTP Request: POST https://api.openai.com/v1/embeddings \"HTTP/1.1 200 OK\"\n", + "00:33:54 httpx INFO HTTP Request: POST https://api.openai.com/v1/embeddings \"HTTP/1.1 200 OK\"\n", + "00:33:54 httpx INFO HTTP Request: POST https://api.openai.com/v1/embeddings \"HTTP/1.1 200 OK\"\n", + "00:33:54 httpx INFO HTTP Request: POST https://api.openai.com/v1/embeddings \"HTTP/1.1 200 OK\"\n", + "00:33:55 httpx INFO HTTP Request: POST https://api.openai.com/v1/embeddings \"HTTP/1.1 200 OK\"\n", + "00:33:55 httpx INFO HTTP Request: POST https://api.openai.com/v1/embeddings \"HTTP/1.1 200 OK\"\n", + "00:33:55 httpx INFO HTTP Request: POST https://api.openai.com/v1/embeddings \"HTTP/1.1 200 OK\"\n", + "00:33:55 httpx INFO HTTP Request: POST https://api.openai.com/v1/embeddings \"HTTP/1.1 200 OK\"\n", + "00:33:56 httpx INFO HTTP Request: POST https://api.openai.com/v1/embeddings \"HTTP/1.1 200 OK\"\n", + "00:33:56 httpx INFO HTTP Request: POST https://api.openai.com/v1/embeddings \"HTTP/1.1 200 OK\"\n", + "00:33:56 httpx INFO HTTP Request: POST https://api.openai.com/v1/embeddings \"HTTP/1.1 200 OK\"\n", + "00:33:56 httpx INFO HTTP Request: POST https://api.openai.com/v1/embeddings \"HTTP/1.1 200 OK\"\n", + "00:33:57 httpx INFO HTTP Request: POST https://api.openai.com/v1/embeddings \"HTTP/1.1 200 OK\"\n", + "00:33:58 httpx INFO HTTP Request: POST https://api.openai.com/v1/embeddings \"HTTP/1.1 200 OK\"\n", + "00:33:58 httpx INFO HTTP Request: POST https://api.openai.com/v1/embeddings \"HTTP/1.1 200 OK\"\n", + "00:33:58 httpx INFO HTTP Request: POST https://api.openai.com/v1/embeddings \"HTTP/1.1 200 OK\"\n", + "00:33:58 httpx INFO HTTP Request: POST https://api.openai.com/v1/embeddings \"HTTP/1.1 200 OK\"\n", + "00:33:58 httpx INFO HTTP Request: POST https://api.openai.com/v1/embeddings \"HTTP/1.1 200 OK\"\n", + "00:33:59 httpx INFO HTTP Request: POST https://api.openai.com/v1/embeddings \"HTTP/1.1 200 OK\"\n", + "00:33:59 httpx INFO HTTP Request: POST https://api.openai.com/v1/embeddings \"HTTP/1.1 200 OK\"\n", + "00:34:00 httpx INFO HTTP Request: POST https://api.openai.com/v1/embeddings \"HTTP/1.1 200 OK\"\n", + "00:34:00 httpx INFO HTTP Request: POST https://api.openai.com/v1/embeddings \"HTTP/1.1 200 OK\"\n", + "00:34:01 httpx INFO HTTP Request: POST https://api.openai.com/v1/embeddings \"HTTP/1.1 200 OK\"\n", + "00:34:01 httpx INFO HTTP Request: POST https://api.openai.com/v1/embeddings \"HTTP/1.1 200 OK\"\n", + "00:34:01 httpx INFO HTTP Request: POST https://api.openai.com/v1/embeddings \"HTTP/1.1 200 OK\"\n", + "00:34:02 httpx INFO HTTP Request: POST https://api.openai.com/v1/embeddings \"HTTP/1.1 200 OK\"\n", + "00:34:02 httpx INFO HTTP Request: POST https://api.openai.com/v1/embeddings \"HTTP/1.1 200 OK\"\n", + "00:34:02 httpx INFO HTTP Request: POST https://api.openai.com/v1/embeddings \"HTTP/1.1 200 OK\"\n", + "00:34:02 httpx INFO HTTP Request: POST https://api.openai.com/v1/embeddings \"HTTP/1.1 200 OK\"\n", + "00:34:03 httpx INFO HTTP Request: POST https://api.openai.com/v1/embeddings \"HTTP/1.1 200 OK\"\n", + "00:34:03 httpx INFO HTTP Request: POST https://api.openai.com/v1/embeddings \"HTTP/1.1 200 OK\"\n", + "00:34:03 httpx INFO HTTP Request: POST https://api.openai.com/v1/embeddings \"HTTP/1.1 200 OK\"\n", + "00:34:03 httpx INFO HTTP Request: POST https://api.openai.com/v1/embeddings \"HTTP/1.1 200 OK\"\n", + "00:34:04 httpx INFO HTTP Request: POST https://api.openai.com/v1/embeddings \"HTTP/1.1 200 OK\"\n", + "00:34:04 httpx INFO HTTP Request: POST https://api.openai.com/v1/embeddings \"HTTP/1.1 200 OK\"\n", + "00:34:04 httpx INFO HTTP Request: POST https://api.openai.com/v1/embeddings \"HTTP/1.1 200 OK\"\n", + "00:34:04 httpx INFO HTTP Request: POST https://api.openai.com/v1/embeddings \"HTTP/1.1 200 OK\"\n", + "00:34:04 httpx INFO HTTP Request: POST https://api.openai.com/v1/embeddings \"HTTP/1.1 200 OK\"\n", + "00:34:05 httpx INFO HTTP Request: POST https://api.openai.com/v1/embeddings \"HTTP/1.1 200 OK\"\n", + "00:34:05 httpx INFO HTTP Request: POST https://api.openai.com/v1/embeddings \"HTTP/1.1 200 OK\"\n", + "00:34:05 httpx INFO HTTP Request: POST https://api.openai.com/v1/embeddings \"HTTP/1.1 200 OK\"\n", + "00:34:05 httpx INFO HTTP Request: POST https://api.openai.com/v1/embeddings \"HTTP/1.1 200 OK\"\n", + "00:34:06 httpx INFO HTTP Request: POST https://api.openai.com/v1/embeddings \"HTTP/1.1 200 OK\"\n", + "00:34:06 httpx INFO HTTP Request: POST https://api.openai.com/v1/embeddings \"HTTP/1.1 200 OK\"\n", + "00:34:06 httpx INFO HTTP Request: POST https://api.openai.com/v1/embeddings \"HTTP/1.1 200 OK\"\n", + "00:34:06 httpx INFO HTTP Request: POST https://api.openai.com/v1/embeddings \"HTTP/1.1 200 OK\"\n", + "00:34:06 httpx INFO HTTP Request: POST https://api.openai.com/v1/embeddings \"HTTP/1.1 200 OK\"\n" + ] + }, + { + "data": { + "text/html": [ + "
\n"
+      ],
+      "text/plain": []
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "text/html": [
+       "
โœ… Ingested 50 courses\n",
+       "
\n" + ], + "text/plain": [ + "\u001B[32mโœ… Ingested \u001B[0m\u001B[1;32m50\u001B[0m\u001B[32m courses\u001B[0m\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
๐Ÿ“Š Verification - Courses: 50, Majors: 5\n",
+       "
\n" + ], + "text/plain": [ + "\u001B[34m๐Ÿ“Š Verification - Courses: \u001B[0m\u001B[1;34m50\u001B[0m\u001B[34m, Majors: \u001B[0m\u001B[1;34m5\u001B[0m\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
๐ŸŽ‰ Ingestion completed successfully!\n",
+       "
\n" + ], + "text/plain": [ + "\u001B[1;32m๐ŸŽ‰ Ingestion completed successfully!\u001B[0m\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "โœ… Course ingestion complete!\n", + " Courses in Redis: 50\n", + " Majors in Redis: 5\n" + ] + } + ], + "source": [ + "from redis_context_course.scripts.ingest_courses import CourseIngestionPipeline\n", + "import asyncio\n", + "\n", + "# What we're importing:\n", + "# - CourseIngestionPipeline: Handles bulk ingestion of course data\n", + "#\n", + "# What it does:\n", + "# - Loads course catalog from JSON file\n", + "# - For each course: generates embedding + stores in Redis\n", + "# - Uses CourseManager internally for storage\n", + "# - Provides progress tracking and verification\n", + "#\n", + "# Why use this:\n", + "# - Handles batch ingestion efficiently\n", + "# - Same utility used to populate reference agent\n", + "# - Includes error handling and progress reporting\n", + "\n", + "# Initialize ingestion pipeline\n", + "pipeline = CourseIngestionPipeline()\n", + "\n", + "print(\"๐Ÿš€ Starting course ingestion...\")\n", + "print()\n", + "\n", + "# Run ingestion (clear existing data first)\n", + "success = await pipeline.run_ingestion(\n", + " catalog_file=catalog_file,\n", + " clear_existing=True\n", + ")\n", + "\n", + "if success:\n", + " print()\n", + " print(\"โœ… Course ingestion complete!\")\n", + "\n", + " # Verify what was ingested\n", + " verification = pipeline.verify_ingestion()\n", + " print(f\" Courses in Redis: {verification['courses']}\")\n", + " print(f\" Majors in Redis: {verification['majors']}\")\n", + "else:\n", + " print(\"โŒ Ingestion failed\")" + ] + }, + { + "cell_type": "markdown", + "id": "da9f4e00dcc39387", + "metadata": {}, + "source": [ + "### What Just Happened?\n", + "\n", + "For each course, the ingestion pipeline:\n", + "\n", + "1. **Created searchable content:**\n", + " ```python\n", + " content = f\"{course.title} {course.description} {course.department} {' '.join(course.tags)}\"\n", + " ```\n", + "\n", + "2. **Generated embedding vector:**\n", + " ```python\n", + " embedding = await embeddings.aembed_query(content) # 1536-dim vector\n", + " ```\n", + "\n", + "3. **Stored in Redis:**\n", + " ```python\n", + " redis_client.hset(f\"course_idx:{course.id}\", mapping={\n", + " \"course_code\": \"CS001\",\n", + " \"title\": \"Introduction to Programming\",\n", + " \"description\": \"...\",\n", + " \"content_vector\": embedding.tobytes() # Binary vector\n", + " })\n", + " ```\n", + "\n", + "4. **Indexed for search:**\n", + " - Redis automatically indexes the vector field\n", + " - Enables fast k-NN (k-nearest neighbors) search\n", + "\n", + "---" + ] + }, + { + "cell_type": "markdown", + "id": "2c4d3d17c5c3cdae", + "metadata": {}, + "source": [ + "## ๐Ÿ” Step 5: Semantic Search - Finding Relevant Courses\n", + "\n", + "Now comes the magic: semantic search. Let's query our vector store to find relevant courses.\n", + "\n", + "### Basic Semantic Search\n", + "\n", + "Let's search for courses related to \"machine learning\".\n", + "\n", + "When this is called:\n", + "```python\n", + "await course_manager.search_courses(\n", + " query=query,\n", + " limit=3 # top_k parameter\n", + ")\n", + "```\n", + "It is performing semantic search under the hood:\n", + "1. Generates embedding for the query using OpenAI\n", + "2. Performs vector similarity search in Redis (cosine distance)\n", + "3. Returns top-k most similar courses\n", + "4. Uses RedisVL's VectorQuery under the hood" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "id": "d19cebdedbaec6a0", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "๐Ÿ” Searching for: 'machine learning and artificial intelligence'\n", + "\n", + "00:35:39 httpx INFO HTTP Request: POST https://api.openai.com/v1/embeddings \"HTTP/1.1 200 OK\"\n", + "โœ… Found 3 relevant courses:\n", + "\n", + "1. CS007: Machine Learning\n", + " Department: Computer Science\n", + " Difficulty: advanced\n", + " Description: Introduction to machine learning algorithms and applications. Supervised and unsupervised learning, ...\n", + "\n", + "2. DS012: Statistics for Data Science\n", + " Department: Data Science\n", + " Difficulty: intermediate\n", + " Description: Statistical methods and probability theory for data analysis. Hypothesis testing, regression, and st...\n", + "\n", + "3. DS015: Statistics for Data Science\n", + " Department: Data Science\n", + " Difficulty: intermediate\n", + " Description: Statistical methods and probability theory for data analysis. Hypothesis testing, regression, and st...\n", + "\n" + ] + } + ], + "source": [ + "# We already initialized course_manager in Step 3\n", + "# It's ready to use for semantic search\n", + "\n", + "# Search for machine learning courses\n", + "query = \"machine learning and artificial intelligence\"\n", + "print(f\"๐Ÿ” Searching for: '{query}'\\n\")\n", + "\n", + "# Perform semantic search (returns top 3 most similar courses)\n", + "results = await course_manager.search_courses(\n", + " query=query,\n", + " limit=3 # top_k parameter\n", + ")\n", + "\n", + "print(f\"โœ… Found {len(results)} relevant courses:\\n\")\n", + "\n", + "for i, course in enumerate(results, 1):\n", + " print(f\"{i}. {course.course_code}: {course.title}\")\n", + " print(f\" Department: {course.department}\")\n", + " print(f\" Difficulty: {course.difficulty_level.value}\")\n", + " print(f\" Description: {course.description[:100]}...\")\n", + " print()" + ] + }, + { + "cell_type": "markdown", + "id": "8bd46b1b7a140f91", + "metadata": {}, + "source": [ + "### Search with Filters\n", + "\n", + "We can combine semantic search with metadata filters for more precise results:\n", + "\n", + "How filters work:\n", + "\n", + "```python\n", + "results = await course_manager.search_courses(\n", + " query=query,\n", + " limit=3,\n", + " filters=filters\n", + ")\n", + "```\n", + " - CourseManager._build_filters() converts dict to RedisVL filter expressions\n", + " - Uses Tag filters for categorical fields (difficulty_level, format, department)\n", + " - Uses Num filters for numeric fields (credits, year)\n", + " - Combines filters with AND logic\n", + " - Applied to vector search results\n" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "id": "19e81b08ef0b24e1", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "๐Ÿ” Searching for: 'machine learning'\n", + " Filters: {'difficulty_level': 'beginner', 'format': 'online'}\n", + "\n", + "00:39:02 httpx INFO HTTP Request: POST https://api.openai.com/v1/embeddings \"HTTP/1.1 200 OK\"\n", + "โœ… Found 3 matching courses:\n", + "1. DS020: Data Visualization\n", + " Format: online, Difficulty: beginner\n", + "\n", + "2. PSY043: Introduction to Psychology\n", + " Format: online, Difficulty: beginner\n", + "\n", + "3. PSY049: Introduction to Psychology\n", + " Format: online, Difficulty: beginner\n", + "\n" + ] + } + ], + "source": [ + "# Search for beginner-level machine learning courses\n", + "query = \"machine learning\"\n", + "filters = {\n", + " \"difficulty_level\": \"beginner\",\n", + " \"format\": \"online\"\n", + "}\n", + "\n", + "print(f\"๐Ÿ” Searching for: '{query}'\\n Filters: {filters}\\n\")\n", + "# How filters work:\n", + "# - CourseManager._build_filters() converts dict to RedisVL filter expressions\n", + "# - Uses Tag filters for categorical fields (difficulty_level, format, department)\n", + "# - Uses Num filters for numeric fields (credits, year)\n", + "# - Combines filters with AND logic\n", + "# - Applied to vector search results\n", + "results = await course_manager.search_courses(\n", + " query=query,\n", + " limit=3,\n", + " filters=filters\n", + ")\n", + "\n", + "print(f\"โœ… Found {len(results)} matching courses:\")\n", + "for i, course in enumerate(results, 1):\n", + " print(f\"{i}. {course.course_code}: {course.title}\")\n", + " print(f\" Format: {course.format.value}, Difficulty: {course.difficulty_level.value}\")\n", + " print()" + ] + }, + { + "cell_type": "markdown", + "id": "9c9406198195f5c4", + "metadata": {}, + "source": [ + "**๐Ÿ’ก Key Insight:** We can combine:\n", + "- **Semantic search** (find courses about \"machine learning\")\n", + "- **Metadata filters** (only beginner, online courses)\n", + "\n", + "This gives us precise, relevant results for any query. This will be a useful tool to build context for our RAG pipeline.\n", + "\n", + "---" + ] + }, + { + "cell_type": "markdown", + "id": "35d2fedcf3efb590", + "metadata": {}, + "source": [ + "## ๐Ÿ”— Step 6: Building the RAG Pipeline\n", + "\n", + "Now let's combine everything into a complete RAG pipeline: Retrieval โ†’ Context Assembly โ†’ Generation.\n", + "\n", + "### The RAG Flow\n", + "\n", + "```\n", + "User Query\n", + " โ†“\n", + "1. Semantic Search (retrieve relevant courses)\n", + " โ†“\n", + "2. Context Assembly (combine system + user + retrieved context)\n", + " โ†“\n", + "3. LLM Generation (create personalized response)\n", + "```\n", + "\n", + "Let's implement each step:" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "id": "b38da21b55f381ab", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "โœ… LLM initialized (gpt-4o-mini)\n" + ] + } + ], + "source": [ + "from langchain_openai import ChatOpenAI\n", + "from langchain_core.messages import SystemMessage, HumanMessage\n", + "\n", + "# Initialize LLM\n", + "llm = ChatOpenAI(model=\"gpt-4o-mini\", temperature=0.7)\n", + "\n", + "print(\"โœ… LLM initialized (gpt-4o-mini)\")" + ] + }, + { + "cell_type": "markdown", + "id": "3a3289098af7058a", + "metadata": {}, + "source": [ + "### Step 6.1: Retrieval Function\n", + "\n", + "First, let's create a function to retrieve relevant courses:" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "id": "e1206c431ffb4292", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "00:40:03 httpx INFO HTTP Request: POST https://api.openai.com/v1/embeddings \"HTTP/1.1 200 OK\"\n", + "๐Ÿ” Retrieved 3 courses for: 'I want to learn about data structures'\n", + " - CS009: Data Structures and Algorithms\n", + " - CS001: Introduction to Programming\n", + " - CS005: Introduction to Programming\n" + ] + } + ], + "source": [ + "async def retrieve_courses(query: str, top_k: int = 3, filters: dict = None):\n", + " \"\"\"\n", + " Retrieve relevant courses using semantic search.\n", + "\n", + " Args:\n", + " query: User's search query\n", + " top_k: Number of courses to retrieve\n", + " filters: Optional metadata filters\n", + "\n", + " Returns:\n", + " List of relevant courses\n", + " \"\"\"\n", + " # Note: CourseManager.search_courses() uses 'limit' parameter, not 'top_k'\n", + " results = await course_manager.search_courses(\n", + " query=query,\n", + " limit=top_k,\n", + " filters=filters\n", + " )\n", + " return results\n", + "\n", + "# Test retrieval\n", + "test_query = \"I want to learn about data structures\"\n", + "retrieved_courses = await retrieve_courses(test_query, top_k=3)\n", + "\n", + "print(f\"๐Ÿ” Retrieved {len(retrieved_courses)} courses for: '{test_query}'\")\n", + "for course in retrieved_courses:\n", + " print(f\" - {course.course_code}: {course.title}\")" + ] + }, + { + "cell_type": "markdown", + "id": "ef03683be57faf95", + "metadata": {}, + "source": [ + "### Step 6.2: Context Assembly Function\n", + "\n", + "Now let's assemble context from multiple sources (system + user + retrieved):" + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "id": "6a068ffa458f850f", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "โœ… Context assembled\n", + " Total length: 1537 characters\n", + " Includes: System + User + Retrieved context\n" + ] + } + ], + "source": [ + "def assemble_context(\n", + " user_query: str,\n", + " retrieved_courses: list,\n", + " user_profile: dict = None\n", + "):\n", + " \"\"\"\n", + " Assemble context from multiple sources for the LLM.\n", + "\n", + " This implements the context engineering principles from Section 1:\n", + " - System Context: AI role and instructions\n", + " - User Context: Student profile and preferences\n", + " - Retrieved Context: Relevant courses from vector search\n", + " \"\"\"\n", + "\n", + " # System Context: Define the AI's role\n", + " system_context = \"\"\"You are a Redis University course advisor.\n", + "\n", + "Your role:\n", + "- Help students find courses that match their interests and goals\n", + "- Provide personalized recommendations based on student profiles\n", + "- Explain course prerequisites and learning paths\n", + "- Be encouraging and supportive\n", + "\n", + "Guidelines:\n", + "- Only recommend courses from the provided course list\n", + "- Consider student's difficulty level preferences\n", + "- Explain your reasoning for recommendations\n", + "- Be concise but informative\n", + "\"\"\"\n", + "\n", + " # User Context: Student profile (if provided)\n", + " user_context = \"\"\n", + " if user_profile:\n", + " user_context = f\"\"\"\n", + "Student Profile:\n", + "- Name: {user_profile.get('name', 'Student')}\n", + "- Major: {user_profile.get('major', 'Undeclared')}\n", + "- Year: {user_profile.get('year', 'N/A')}\n", + "- Interests: {', '.join(user_profile.get('interests', []))}\n", + "- Preferred Difficulty: {user_profile.get('preferred_difficulty', 'any')}\n", + "- Preferred Format: {user_profile.get('preferred_format', 'any')}\n", + "\"\"\"\n", + "\n", + " # Retrieved Context: Relevant courses from semantic search\n", + " retrieved_context = \"\\nRelevant Courses:\\n\"\n", + " for i, course in enumerate(retrieved_courses, 1):\n", + " retrieved_context += f\"\"\"\n", + "{i}. {course.course_code}: {course.title}\n", + " Department: {course.department}\n", + " Difficulty: {course.difficulty_level.value}\n", + " Format: {course.format.value}\n", + " Credits: {course.credits}\n", + " Description: {course.description}\n", + " Prerequisites: {len(course.prerequisites)} required\n", + "\"\"\"\n", + "\n", + " # Combine all context\n", + " full_context = system_context\n", + " if user_context:\n", + " full_context += user_context\n", + " full_context += retrieved_context\n", + "\n", + " return full_context\n", + "\n", + "# Test context assembly\n", + "test_profile = {\n", + " \"name\": \"Sarah Chen\",\n", + " \"major\": \"Computer Science\",\n", + " \"year\": \"Junior\",\n", + " \"interests\": [\"machine learning\", \"data science\"],\n", + " \"preferred_difficulty\": \"intermediate\",\n", + " \"preferred_format\": \"online\"\n", + "}\n", + "\n", + "assembled_context = assemble_context(\n", + " user_query=test_query,\n", + " retrieved_courses=retrieved_courses,\n", + " user_profile=test_profile\n", + ")\n", + "\n", + "print(\"โœ… Context assembled\")\n", + "print(f\" Total length: {len(assembled_context)} characters\")\n", + "print(f\" Includes: System + User + Retrieved context\")" + ] + }, + { + "cell_type": "code", + "execution_count": 20, + "id": "16d6089b-7fe2-451d-b57d-436c49259216", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Observe the assembled context: \n", + "\n", + "You are a Redis University course advisor.\n", + "\n", + "Your role:\n", + "- Help students find courses that match their interests and goals\n", + "- Provide personalized recommendations based on student profiles\n", + "- Explain course prerequisites and learning paths\n", + "- Be encouraging and supportive\n", + "\n", + "Guidelines:\n", + "- Only recommend courses from the provided course list\n", + "- Consider student's difficulty level preferences\n", + "- Explain your reasoning for recommendations\n", + "- Be concise but informative\n", + "\n", + "Student Profile:\n", + "- Name: Sarah Chen\n", + "- Major: Computer Science\n", + "- Year: Junior\n", + "- Interests: machine learning, data science\n", + "- Preferred Difficulty: intermediate\n", + "- Preferred Format: online\n", + "\n", + "Relevant Courses:\n", + "\n", + "1. CS009: Data Structures and Algorithms\n", + " Department: Computer Science\n", + " Difficulty: intermediate\n", + " Format: in_person\n", + " Credits: 4\n", + " Description: Study of fundamental data structures and algorithms. Arrays, linked lists, trees, graphs, sorting, and searching.\n", + " Prerequisites: 2 required\n", + "\n", + "2. CS001: Introduction to Programming\n", + " Department: Computer Science\n", + " Difficulty: beginner\n", + " Format: hybrid\n", + " Credits: 3\n", + " Description: Fundamental programming concepts using Python. Variables, control structures, functions, and basic data structures.\n", + " Prerequisites: 0 required\n", + "\n", + "3. CS005: Introduction to Programming\n", + " Department: Computer Science\n", + " Difficulty: beginner\n", + " Format: hybrid\n", + " Credits: 3\n", + " Description: Fundamental programming concepts using Python. Variables, control structures, functions, and basic data structures.\n", + " Prerequisites: 0 required\n", + "\n" + ] + } + ], + "source": [ + "print(f\"Observe the assembled context: \\n\\n{assembled_context}\")" + ] + }, + { + "cell_type": "markdown", + "id": "9800d8dd-38ea-482f-9486-fc32ba9f1799", + "metadata": {}, + "source": [ + "**๐ŸŽ Bonus:** Can you identify the different parts of the context from what we learned in section 1 from above?" + ] + }, + { + "cell_type": "markdown", + "id": "9f28151926c3be5", + "metadata": {}, + "source": [ + "**โœ… Answer:** Yes! Looking at the assembled context above, we can identify all three context types from Section 1:\n", + "\n", + "1. **System Context** (Static)\n", + " - The first section: \"You are a Redis University course advisor...\"\n", + " - Defines the AI's role, responsibilities, and guidelines\n", + " - Remains the same for all queries\n", + " - Sets behavioral instructions and constraints\n", + "\n", + "2. **User Context** (Dynamic, User-Specific)\n", + " - The \"Student Profile\" section\n", + " - Contains Sarah Chen's personal information: major, year, interests, preferences\n", + " - Changes based on who is asking the question\n", + " - Enables personalized recommendations\n", + "\n", + "3. **Retrieved Context** (Dynamic, Query-Specific)\n", + " - The \"Relevant Courses\" section\n", + " - Lists the 3 courses found via semantic search for \"data structures\"\n", + " - Changes based on the specific query\n", + " - Provides the factual information the LLM needs to answer\n", + "\n", + "Notice how all three work together: System Context tells the AI **how to behave**, User Context tells it **who it's helping**, and Retrieved Context provides **what information is relevant**. This is RAG in action!" + ] + }, + { + "cell_type": "markdown", + "id": "19c1be78f7cd3e20", + "metadata": {}, + "source": [ + "### Step 6.3: Generation Function\n", + "\n", + "Finally, let's generate a response using the assembled context:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "9e27332f-83d5-475f-9fcc-405525a25c9f", + "metadata": {}, + "outputs": [], + "source": [ + "async def generate_response(user_query: str, context: str):\n", + " \"\"\"\n", + " Generate LLM response using assembled context.\n", + "\n", + " Args:\n", + " user_query: User's question\n", + " context: Assembled context (system + user + retrieved)\n", + "\n", + " Returns:\n", + " LLM response string\n", + " \"\"\"\n", + " messages = [\n", + " SystemMessage(content=context),\n", + " HumanMessage(content=user_query)\n", + " ]\n", + "\n", + " response = await llm.ainvoke(messages)\n", + " return response.content\n", + "\n", + "# Test generation\n", + "response = await generate_response(test_query, assembled_context)\n", + "\n", + "print(\"\\n๐Ÿค– Generated Response:\\n\")\n", + "print(response)" + ] + }, + { + "cell_type": "markdown", + "id": "cba9e518ee7581c6", + "metadata": {}, + "source": [ + "### ๐ŸŽฏ Understanding the Generated Response\n", + "\n", + "Notice how the LLM's response demonstrates effective context engineering:\n", + "\n", + "**๐Ÿ‘ค Personalization from User Context:**\n", + "- Addresses Sarah by name\n", + "- References her intermediate difficulty preference\n", + "- Acknowledges her online format preference (even though the course is in-person)\n", + "- Connects to her interests (machine learning and data science)\n", + "\n", + "**๐Ÿ“š Accuracy from Retrieved Context:**\n", + "- Recommends CS009 (which was in the retrieved courses)\n", + "- Provides correct course details (difficulty, format, credits, description)\n", + "- Mentions prerequisites accurately (2 required)\n", + "\n", + "**๐Ÿค– Guidance from System Context:**\n", + "- Acts as a supportive advisor (\"I'm here to help you succeed!\")\n", + "- Explains reasoning for the recommendation\n", + "- Acknowledges the format mismatch honestly\n", + "- Stays within the provided course list\n", + "\n", + "This is the power of RAG: the LLM generates a response that is **personalized** (User Context), **accurate** (Retrieved Context), and **helpful** (System Context). Without RAG, the LLM would either hallucinate course details or provide generic advice." + ] + }, + { + "cell_type": "markdown", + "id": "29793f2405eba89f", + "metadata": {}, + "source": [ + "---\n", + "\n", + "## โœจ Step 7: Complete RAG Function\n", + "\n", + "Let's combine all three steps into a single, reusable RAG function:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b7dff6ee-0f65-4875-b0ee-469a2afd26b0", + "metadata": {}, + "outputs": [], + "source": [ + "async def rag_query(\n", + " user_query: str,\n", + " user_profile: dict = None,\n", + " top_k: int = 3,\n", + " filters: dict = None\n", + "):\n", + " \"\"\"\n", + " Complete RAG pipeline: Retrieve โ†’ Assemble โ†’ Generate\n", + "\n", + " Args:\n", + " user_query: User's question\n", + " user_profile: Optional student profile\n", + " top_k: Number of courses to retrieve\n", + " filters: Optional metadata filters\n", + "\n", + " Returns:\n", + " LLM response string\n", + " \"\"\"\n", + " # Step 1: Retrieve relevant courses\n", + " retrieved_courses = await retrieve_courses(user_query, top_k, filters)\n", + "\n", + " # Step 2: Assemble context\n", + " context = assemble_context(user_query, retrieved_courses, user_profile)\n", + "\n", + " # Step 3: Generate response\n", + " response = await generate_response(user_query, context)\n", + "\n", + " return response, retrieved_courses\n", + "\n", + "# Test the complete RAG pipeline\n", + "print(\"=\" * 60)\n", + "print(\"COMPLETE RAG PIPELINE TEST\")\n", + "print(\"=\" * 60)\n", + "print()\n", + "\n", + "query = \"I'm interested in learning about databases and data management\"\n", + "profile = {\n", + " \"name\": \"Alex Johnson\",\n", + " \"major\": \"Data Science\",\n", + " \"year\": \"Sophomore\",\n", + " \"interests\": [\"databases\", \"data analysis\", \"SQL\"],\n", + " \"preferred_difficulty\": \"intermediate\",\n", + " \"preferred_format\": \"hybrid\"\n", + "}\n", + "\n", + "print(f\"Query: {query}\")\n", + "print()\n", + "print(f\"Student: {profile['name']} ({profile['major']}, {profile['year']})\")\n", + "print()\n", + "\n", + "response, courses = await rag_query(query, profile, top_k=3)\n", + "\n", + "print(\"Retrieved Courses:\")\n", + "for i, course in enumerate(courses, 1):\n", + " print(f\" {i}. {course.course_code}: {course.title}\")\n", + "print()\n", + "\n", + "print(\"AI Response:\")\n", + "print(response)" + ] + }, + { + "cell_type": "markdown", + "id": "b4a079374b0fe92c", + "metadata": {}, + "source": [ + "### ๐ŸŽฏ Why This Complete RAG Function Matters\n", + "\n", + "The `rag_query()` function encapsulates the entire RAG pipeline in a single, reusable interface. This is important because:\n", + "\n", + "**1. Simplicity:** One function call handles retrieval โ†’ assembly โ†’ generation\n", + "- No need to manually orchestrate the three steps\n", + "- Clean API for building applications\n", + "\n", + "**2. Consistency:** Every query follows the same pattern\n", + "- Ensures all three context types are always included\n", + "- Reduces errors from missing context\n", + "\n", + "**3. Flexibility:** Easy to customize behavior\n", + "- Adjust `top_k` for more/fewer retrieved courses\n", + "- Add/remove user profile information\n", + "- Modify filters for specific use cases\n", + "\n", + "**4. Production-Ready:** This pattern scales to real applications\n", + "- In Section 3, we'll add memory (conversation history)\n", + "- In Section 4, we'll add tools (course enrollment, prerequisites checking)\n", + "- The core RAG pattern remains the same\n", + "\n", + "This is the foundation you'll build on throughout the rest of the course." + ] + }, + { + "cell_type": "markdown", + "id": "f126f77dd7242ddb", + "metadata": {}, + "source": [ + "---\n", + "\n", + "## ๐Ÿงช Step 8: Try Different Queries\n", + "\n", + "Let's test our RAG system with various queries to see how it handles different scenarios:" + ] + }, + { + "cell_type": "code", + "execution_count": 25, + "id": "3d63b2d5a412a8d", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "============================================================\n", + "TEST 2: Advanced Machine Learning\n", + "============================================================\n", + "\n", + "00:46:06 httpx INFO HTTP Request: POST https://api.openai.com/v1/embeddings \"HTTP/1.1 200 OK\"\n", + "00:46:13 httpx INFO HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "\n", + "Query: I want advanced courses in machine learning and AI\n", + "\n", + "\n", + "AI Response:\n", + "\n", + "Hi David! Based on your major in Computer Science and your interests in machine learning and AI, I recommend the following course:\n", + "\n", + "**CS007: Machine Learning**\n", + "- **Difficulty:** Advanced\n", + "- **Format:** Hybrid (though not in-person, it involves some in-person elements)\n", + "- **Credits:** 4\n", + "- **Description:** This course covers machine learning algorithms and applications, including supervised and unsupervised learning as well as neural networks. \n", + "\n", + "While it would be ideal to have an exclusively in-person format, CS007 is the only advanced course listed that aligns with your interests and goals in machine learning. The hybrid format may still offer valuable in-person interaction.\n", + "\n", + "Unfortunately, there are no strictly in-person advanced courses focused on machine learning or AI in the current offerings. I encourage you to consider CS007 for a solid understanding of the subject, as it can significantly enhance your research capabilities in AI.\n", + "\n", + "If you have any further questions or need more assistance, feel free to ask!\n" + ] + } + ], + "source": [ + "# Test 1: Beginner looking for programming courses\n", + "print(\"=\" * 60)\n", + "print(\"TEST 1: Beginner Programming\")\n", + "print(\"=\" * 60)\n", + "print()\n", + "\n", + "query1 = \"I'm new to programming and want to start learning\"\n", + "profile1 = {\n", + " \"name\": \"Maria Garcia\",\n", + " \"major\": \"Undeclared\",\n", + " \"year\": \"Freshman\",\n", + " \"interests\": [\"programming\", \"technology\"],\n", + " \"preferred_difficulty\": \"beginner\",\n", + " \"preferred_format\": \"online\"\n", + "}\n", + "\n", + "response1, courses1 = await rag_query(query1, profile1, top_k=2)\n", + "print(f\"\\nQuery: {query1}\\n\")\n", + "print(\"\\nAI Response:\\n\")\n", + "print(response1)" + ] + }, + { + "cell_type": "code", + "execution_count": 26, + "id": "e6d543a2d75022b9", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "============================================================\n", + "TEST 3: Business Analytics\n", + "============================================================\n", + "\n", + "00:46:14 httpx INFO HTTP Request: POST https://api.openai.com/v1/embeddings \"HTTP/1.1 200 OK\"\n", + "00:46:17 httpx INFO HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "\n", + "Query: What courses can help me with business analytics and decision making?\n", + "\n", + "\n", + "\n", + "AI Response:\n", + "\n", + "Hi Jennifer! Given your interests in analytics and strategy, I recommend looking into the following course:\n", + "\n", + "**BUS033: Marketing Strategy**\n", + "- **Department:** Business\n", + "- **Difficulty:** Intermediate\n", + "- **Format:** Hybrid\n", + "- **Credits:** 3\n", + "- **Description:** This course covers strategic marketing planning, market analysis, consumer behavior, and digital marketing techniques. \n", + "\n", + "This course aligns well with your major in Business Administration and your interest in analytics and strategy. It will provide you with valuable insights into decision-making processes in marketing, which is crucial for any business professional.\n", + "\n", + "Since you prefer a hybrid format, BUS033 is a great fit, allowing you to balance online learning with in-person engagement. Plus, its intermediate difficulty level matches your preferences perfectly.\n", + "\n", + "If you have any more questions or need further assistance, feel free to ask!\n" + ] + } + ], + "source": [ + "# Test 2: Advanced student looking for specialized courses\n", + "print(\"=\" * 60)\n", + "print(\"TEST 2: Advanced Machine Learning\")\n", + "print(\"=\" * 60)\n", + "print()\n", + "\n", + "query2 = \"I want advanced courses in machine learning and AI\"\n", + "profile2 = {\n", + " \"name\": \"David Kim\",\n", + " \"major\": \"Computer Science\",\n", + " \"year\": \"Senior\",\n", + " \"interests\": [\"machine learning\", \"AI\", \"research\"],\n", + " \"preferred_difficulty\": \"advanced\",\n", + " \"preferred_format\": \"in-person\"\n", + "}\n", + "\n", + "response2, courses2 = await rag_query(query2, profile2, top_k=2)\n", + "print(f\"\\nQuery: {query2}\\n\")\n", + "print(\"\\nAI Response:\\n\")\n", + "print(response2)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "f6430f264bc17b", + "metadata": {}, + "outputs": [], + "source": [ + "# Test 3: Business student looking for relevant courses\n", + "print(\"=\" * 60)\n", + "print(\"TEST 3: Business Analytics\")\n", + "print(\"=\" * 60)\n", + "print()\n", + "\n", + "query3 = \"What courses can help me with business analytics and decision making?\"\n", + "profile3 = {\n", + " \"name\": \"Jennifer Lee\",\n", + " \"major\": \"Business Administration\",\n", + " \"year\": \"Junior\",\n", + " \"interests\": [\"analytics\", \"management\", \"strategy\"],\n", + " \"preferred_difficulty\": \"intermediate\",\n", + " \"preferred_format\": \"hybrid\"\n", + "}\n", + "\n", + "response3, courses3 = await rag_query(query3, profile3, top_k=2)\n", + "print(f\"\\nQuery: {query3}\\n\")\n", + "print()\n", + "print(\"\\nAI Response:\\n\")\n", + "print(response3)" + ] + }, + { + "cell_type": "markdown", + "id": "38103b67a0624eb4", + "metadata": {}, + "source": [ + "---\n", + "\n", + "## ๐ŸŽ“ Key Takeaways\n", + "\n", + "### What You've Learned\n", + "\n", + "**1. RAG Fundamentals**\n", + "- RAG dynamically retrieves relevant information instead of hardcoding knowledge\n", + "- Vector embeddings enable semantic search (meaning-based, not keyword-based)\n", + "- RAG solves the scalability and token efficiency problems of static context\n", + "\n", + "**2. The RAG Pipeline**\n", + "```\n", + "User Query โ†’ Semantic Search โ†’ Context Assembly โ†’ LLM Generation\n", + "```\n", + "- **Retrieval:** Find relevant documents using vector similarity\n", + "- **Assembly:** Combine system + user + retrieved context\n", + "- **Generation:** LLM creates personalized response with full context\n", + "\n", + "**3. Context Engineering in Practice**\n", + "- **System Context:** AI role and instructions (static)\n", + "- **User Context:** Student profile and preferences (dynamic, user-specific)\n", + "- **Retrieved Context:** Relevant courses from vector search (dynamic, query-specific)\n", + "- **Integration:** All three context types work together\n", + "\n", + "**4. Technical Implementation with Reference Agent Utilities**\n", + "- **redis_config**: Production-ready Redis configuration (RedisVL + LangChain)\n", + " - Manages connections, embeddings, vector index, checkpointer\n", + " - Same configuration used in reference agent\n", + "- **CourseManager**: Handles all course operations\n", + " - Uses RedisVL's VectorQuery for semantic search\n", + " - Supports metadata filters with Tag and Num classes\n", + " - Automatically generates embeddings and stores courses\n", + "- **CourseIngestionPipeline**: Bulk data ingestion\n", + " - Loads JSON, generates embeddings, stores in Redis\n", + " - Progress tracking and verification\n", + "- **Benefits**: Focus on RAG concepts, not Redis implementation details\n", + "\n", + "### Best Practices\n", + "\n", + "**Retrieval:**\n", + "- Retrieve only what's needed (top-k results)\n", + "- Use metadata filters to narrow results\n", + "- Balance between too few (missing info) and too many (wasting tokens) results\n", + "- **๐Ÿ’ก Research Insight:** Context Rot research shows that distractors (similar-but-wrong information) have amplified negative impact in long contexts. Precision in retrieval matters more than recall. ([Context Rot paper](https://research.trychroma.com/context-rot))\n", + "\n", + "**Context Assembly:**\n", + "- Structure context clearly (system โ†’ user โ†’ retrieved)\n", + "- Include only relevant metadata\n", + "- Keep descriptions concise but informative\n", + "\n", + "**Generation:**\n", + "- Use appropriate temperature (0.7 for creative, 0.0 for factual)\n", + "- Provide clear instructions in system context\n", + "- Let the LLM explain its reasoning\n", + "\n", + "---" + ] + }, + { + "cell_type": "markdown", + "id": "6994c097a695afdb", + "metadata": {}, + "source": [ + "## ๐Ÿš€ What's Next?\n", + "\n", + "### ๐Ÿง  Section 3: Memory Systems for Context Engineering\n", + "\n", + "In this section, you built a RAG system that retrieves relevant information for each query. But there's a problem: **it doesn't remember previous conversations**.\n", + "\n", + "In Section 3, you'll add memory to your RAG system:\n", + "- **Working Memory:** Track conversation history within a session\n", + "- **Long-term Memory:** Remember user preferences across sessions\n", + "- **LangGraph Integration:** Manage stateful workflows with checkpointing\n", + "- **Redis Agent Memory Server:** Automatic memory extraction and retrieval\n", + "\n", + "### Section 4: Tool Use and Agents\n", + "\n", + "After adding memory, you'll transform your RAG system into a full agent:\n", + "- **Tool Calling:** Let the AI use functions (search, enroll, check prerequisites)\n", + "- **LangGraph State Management:** Orchestrate complex multi-step workflows\n", + "- **Agent Reasoning:** Plan and execute multi-step tasks\n", + "- **Production Patterns:** Error handling, retries, and monitoring\n", + "\n", + "### The Journey\n", + "\n", + "```\n", + "Section 1: Context Engineering Fundamentals\n", + " โ†“\n", + "Section 2: RAG (Retrieved Context) โ† You are here\n", + " โ†“\n", + "Section 3: Memory Systems for Context Engineering (Conversation Context)\n", + " โ†“\n", + "Section 4: Tool Use and Agents (Complete System)\n", + "```\n", + "\n", + "---" + ] + }, + { + "cell_type": "markdown", + "id": "44f445a3359501a4", + "metadata": {}, + "source": [ + "## ๐Ÿ’ช Practice Exercises\n", + "\n", + "Try these exercises to deepen your understanding:\n", + "\n", + "**Exercise 1: Custom Filters**\n", + "- Modify the RAG query to filter by specific departments\n", + "- Try combining multiple filters (difficulty + format + department)\n", + "\n", + "**Exercise 2: Adjust Retrieval**\n", + "- Experiment with different `top_k` values (1, 3, 5, 10)\n", + "- Observe how response quality changes with more/fewer retrieved courses\n", + "\n", + "**Exercise 3: Context Optimization**\n", + "- Modify the `assemble_context` function to include more/less detail\n", + "- Measure token usage and response quality trade-offs\n", + "\n", + "**Exercise 4: Different Domains**\n", + "- Generate courses for a different domain (e.g., healthcare, finance)\n", + "- Ingest and test RAG with your custom data\n", + "\n", + "**Exercise 5: Evaluation**\n", + "- Create test queries with expected results\n", + "- Measure retrieval accuracy (are the right courses retrieved?)\n", + "- Measure generation quality (are responses helpful and accurate?)\n", + "\n", + "---" + ] + }, + { + "cell_type": "markdown", + "id": "9d9b8641f068666b", + "metadata": {}, + "source": [ + "## ๐Ÿ“ Summary\n", + "\n", + "You've built a complete RAG system that:\n", + "- โœ… Generates and ingests course data with vector embeddings\n", + "- โœ… Performs semantic search to find relevant courses\n", + "- โœ… Assembles context from multiple sources (system + user + retrieved)\n", + "- โœ… Generates personalized responses using LLMs\n", + "- โœ… Handles different query types and user profiles\n", + "\n", + "This RAG system is the foundation for the advanced topics in Sections 3 and 4. You'll build on this exact code to add memory, tools, and full agent capabilities.\n", + "\n", + "**Great work!** You've mastered Retrieved Context and built a production-ready RAG pipeline. ๐ŸŽ‰\n", + "\n", + "---\n", + "\n", + "## ๐Ÿ“š Additional Resources\n", + "\n", + "### **RAG and Vector Search**\n", + "- [Retrieval-Augmented Generation Paper](https://arxiv.org/abs/2005.11401) - Original RAG paper by Facebook AI\n", + "- [Redis Vector Similarity Search](https://redis.io/docs/stack/search/reference/vectors/) - Official Redis VSS documentation\n", + "- [RedisVL Documentation](https://redisvl.com/) - Redis Vector Library for Python\n", + "- [LangChain RAG Tutorial](https://python.langchain.com/docs/tutorials/rag/) - Building RAG applications\n", + "\n", + "### **Embeddings and Semantic Search**\n", + "- [OpenAI Embeddings Guide](https://platform.openai.com/docs/guides/embeddings) - Understanding text embeddings\n", + "- [Sentence Transformers](https://www.sbert.net/) - Open-source embedding models\n", + "- [HNSW Algorithm](https://arxiv.org/abs/1603.09320) - Hierarchical Navigable Small World graphs\n", + "\n", + "### **LangChain and Redis Integration**\n", + "- [LangChain Documentation](https://python.langchain.com/docs/get_started/introduction) - Framework overview\n", + "- [LangChain Redis Integration](https://python.langchain.com/docs/integrations/vectorstores/redis/) - Using Redis with LangChain\n", + "- [Redis Python Client](https://redis-py.readthedocs.io/) - redis-py documentation\n", + "\n", + "### **Advanced RAG Techniques**\n", + "- [Advanced RAG Patterns](https://blog.langchain.dev/deconstructing-rag/) - LangChain blog on RAG optimization\n", + "- [Advanced Search with RedisVL](https://docs.redisvl.com/en/latest/user_guide/11_advanced_queries.html) - Vector, Hybrid, Text, and Keyword Search\n", + "- [RAG Evaluation](https://arxiv.org/abs/2309.15217) - Measuring RAG system performance\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "38e31170-962f-4fe9-9209-a48f23a33400", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.12.6" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/python-recipes/context-engineering/notebooks/section-2-retrieved-context-engineering/README.md b/python-recipes/context-engineering/notebooks/section-2-retrieved-context-engineering/README.md new file mode 100644 index 00000000..732b7cf6 --- /dev/null +++ b/python-recipes/context-engineering/notebooks/section-2-retrieved-context-engineering/README.md @@ -0,0 +1,158 @@ +# Section 2: RAG Foundations + +## Overview + +This section teaches you to build a complete RAG (Retrieval-Augmented Generation) system using the Redis University Course Advisor as your foundation. You'll create an agent that can search through course catalogs, understand student profiles, and generate personalized recommendations. + +## Learning Objectives + +By completing this section, you will: +- Build a complete RAG agent using the reference-agent architecture +- Understand how retrieval-augmented generation works in practice +- Implement vector similarity search for course recommendations +- Create a foundation agent you'll enhance in later sections + +## Prerequisites + +- Completion of Section 1: Foundations +- Basic understanding of Python and object-oriented programming +- Familiarity with the concepts of context engineering + +## Notebooks + +### 01_building_your_rag_agent.ipynb + +**Main Learning Project**: Build Your Course Advisor Agent + +This comprehensive notebook walks you through: + +#### Step 1: Install and Explore the Reference Agent +- Install the reference-agent as an editable package +- Explore the professional data models (Course, StudentProfile, etc.) +- Understand the existing architecture + +#### Step 2: Load the Course Catalog +- Initialize the CourseManager +- Load and explore the comprehensive course catalog +- Understand the data structure and relationships + +#### Step 3: Create Student Profiles +- Build diverse student profiles with different backgrounds +- Test with various majors, experience levels, and interests +- Understand how student context affects recommendations + +#### Step 4: Build Your First RAG System +- Implement the SimpleRAGAgent class +- Create the three core RAG components: + - **Retrieval**: Search for relevant courses + - **Augmentation**: Combine student context with course data + - **Generation**: Create personalized responses + +#### Step 5: Test Your RAG Agent +- Test with different student profiles and queries +- See how the agent personalizes responses +- Understand the impact of student context on recommendations + +#### Step 6: Test Conversation Memory +- Implement basic conversation history tracking +- Test follow-up questions and context references +- See how memory enables natural conversations + +#### Step 7: Analyze Your RAG System +- Break down the RAG process step by step +- Understand how each component contributes +- Measure system performance and metrics + +#### Step 8: Foundation for Future Enhancements +- Review what you've built +- Understand how each component will be enhanced +- Preview upcoming sections and improvements + +## Key Concepts Covered + +### RAG Architecture +- **Retrieval**: Finding relevant information from knowledge bases +- **Augmentation**: Enhancing prompts with retrieved context +- **Generation**: Using LLMs to create personalized responses + +### Context Management +- Student profile context (background, preferences, history) +- Course information context (descriptions, prerequisites, objectives) +- Conversation context (previous interactions, references) +- Context assembly and prioritization + +### Professional Patterns +- Type-safe data models with Pydantic +- Modular architecture for easy extension +- Error handling and graceful fallbacks +- Demo modes for development and testing + +## Technical Implementation + +### Core Components Built + +1. **SimpleRAGAgent**: Main agent class implementing the RAG pipeline +2. **Context Assembly**: Intelligent combination of multiple context types +3. **Conversation Memory**: Basic history tracking for natural interactions +4. **Course Search**: Vector-based similarity search using CourseManager +5. **Response Generation**: LLM integration with fallback demo responses + +### Architecture Patterns + +``` +Student Query โ†’ Course Search โ†’ Context Assembly โ†’ LLM Generation โ†’ Response + โ†“ โ†“ โ†“ โ†“ โ†“ +"ML courses" โ†’ Top 3 courses โ†’ Complete โ†’ GPT-4 โ†’ "I recommend + context RU301..." +``` + +### Data Flow + +1. **Input**: Student profile + natural language query +2. **Retrieval**: Search course catalog for relevant matches +3. **Augmentation**: Combine student context + course data + conversation history +4. **Generation**: LLM creates personalized recommendation +5. **Memory**: Store interaction for future reference + +## What You'll Build + +By the end of this section, you'll have: + +### A Complete RAG Agent That Can: +- Search through hundreds of courses intelligently +- Understand student backgrounds and preferences +- Generate personalized course recommendations +- Maintain conversation context across interactions +- Handle follow-up questions and references + +### Professional Architecture Ready For: +- **Section 3**: Enhanced memory with Redis persistence +- **Section 4**: Multiple specialized tools and intelligent routing +- **Section 5**: Context optimization and production scaling + +### Real-World Skills: +- RAG system design and implementation +- Context engineering best practices +- Professional Python development patterns +- LLM integration and prompt engineering + +## Next Steps + +After completing this section: +1. **Continue to Section 3: Memory Systems for Context Engineering** to add sophisticated Redis-based memory +2. **Review your RAG agent** and identify areas for improvement +3. **Experiment with different queries** to understand system behavior +4. **Consider real-world applications** of RAG in your domain + +## Cross-References + +This section builds upon: +- **Section 1 Foundations**: Context types and assembly patterns +- **Reference-agent models**: Professional data structures and validation + +This section prepares you for: +- **Section 3 Memory Architecture**: Working vs long-term memory concepts from `section-3-memory/01_working_memory.ipynb` +- **Section 4 Tool Selection**: Multi-tool coordination patterns +- **Section 5 Context Optimization**: Performance and efficiency techniques + +Your RAG agent is now ready to be enhanced with advanced context engineering techniques! diff --git a/python-recipes/context-engineering/notebooks/section-2-retrieved-context-engineering/course_catalog_section2.json b/python-recipes/context-engineering/notebooks/section-2-retrieved-context-engineering/course_catalog_section2.json new file mode 100644 index 00000000..be3e6e3c --- /dev/null +++ b/python-recipes/context-engineering/notebooks/section-2-retrieved-context-engineering/course_catalog_section2.json @@ -0,0 +1,2224 @@ +{ + "majors": [ + { + "id": "01K8W8H0TMBNPV81NPDH6PWYKZ", + "name": "Computer Science", + "code": "CS", + "department": "Computer Science", + "description": "Study of computational systems, algorithms, and software design", + "required_credits": 120, + "core_courses": [], + "elective_courses": [], + "career_paths": [ + "Software Engineer", + "Data Scientist", + "Systems Architect", + "AI Researcher" + ], + "created_at": "2025-10-31 00:31:01.972222" + }, + { + "id": "01K8W8H0TMBNPV81NPDH6PWYM0", + "name": "Data Science", + "code": "DS", + "department": "Data Science", + "description": "Interdisciplinary field using statistics, programming, and domain expertise", + "required_credits": 120, + "core_courses": [], + "elective_courses": [], + "career_paths": [ + "Data Analyst", + "Machine Learning Engineer", + "Business Intelligence Analyst" + ], + "created_at": "2025-10-31 00:31:01.972240" + }, + { + "id": "01K8W8H0TMBNPV81NPDH6PWYM1", + "name": "Mathematics", + "code": "MATH", + "department": "Mathematics", + "description": "Study of numbers, structures, patterns, and logical reasoning", + "required_credits": 120, + "core_courses": [], + "elective_courses": [], + "career_paths": [ + "Mathematician", + "Statistician", + "Actuary", + "Research Scientist" + ], + "created_at": "2025-10-31 00:31:01.972248" + }, + { + "id": "01K8W8H0TMBNPV81NPDH6PWYM2", + "name": "Business Administration", + "code": "BUS", + "department": "Business", + "description": "Management, finance, marketing, and organizational behavior", + "required_credits": 120, + "core_courses": [], + "elective_courses": [], + "career_paths": [ + "Business Analyst", + "Project Manager", + "Consultant", + "Entrepreneur" + ], + "created_at": "2025-10-31 00:31:01.972255" + }, + { + "id": "01K8W8H0TMBNPV81NPDH6PWYM3", + "name": "Psychology", + "code": "PSY", + "department": "Psychology", + "description": "Scientific study of mind, behavior, and mental processes", + "required_credits": 120, + "core_courses": [], + "elective_courses": [], + "career_paths": [ + "Clinical Psychologist", + "Counselor", + "Research Psychologist", + "HR Specialist" + ], + "created_at": "2025-10-31 00:31:01.972261" + } + ], + "courses": [ + { + "id": "01K8W8H0TMBNPV81NPDH6PWYM4", + "course_code": "CS001", + "title": "Introduction to Programming", + "description": "Fundamental programming concepts using Python. Variables, control structures, functions, and basic data structures.", + "credits": 3, + "difficulty_level": "beginner", + "format": "hybrid", + "department": "Computer Science", + "major": "Computer Science", + "prerequisites": [], + "schedule": { + "days": [ + "monday", + "wednesday", + "friday" + ], + "start_time": "12:00:00", + "end_time": "12:50:00", + "location": "Engineering Building 328" + }, + "semester": "fall", + "year": 2024, + "instructor": "Andrew Reynolds", + "max_enrollment": 89, + "current_enrollment": 11, + "tags": [ + "programming", + "python", + "fundamentals" + ], + "learning_objectives": [ + "Write basic Python programs", + "Understand variables and data types", + "Use control structures effectively", + "Create and use functions" + ], + "created_at": "2025-10-31 00:31:01.972487", + "updated_at": "2025-10-31 00:31:01.972487" + }, + { + "id": "01K8W8H0TMBNPV81NPDH6PWYM5", + "course_code": "CS002", + "title": "Web Development", + "description": "Full-stack web development using modern frameworks. HTML, CSS, JavaScript, React, and backend APIs.", + "credits": 3, + "difficulty_level": "intermediate", + "format": "in_person", + "department": "Computer Science", + "major": "Computer Science", + "prerequisites": [], + "schedule": { + "days": [ + "tuesday" + ], + "start_time": "08:00:00", + "end_time": "10:30:00", + "location": "Engineering Building 195" + }, + "semester": "fall", + "year": 2024, + "instructor": "Timothy Evans", + "max_enrollment": 91, + "current_enrollment": 25, + "tags": [ + "web development", + "javascript", + "react", + "apis" + ], + "learning_objectives": [ + "Build responsive web interfaces", + "Develop REST APIs", + "Use modern JavaScript frameworks", + "Deploy web applications" + ], + "created_at": "2025-10-31 00:31:01.972561", + "updated_at": "2025-10-31 00:31:01.972561" + }, + { + "id": "01K8W8H0TMBNPV81NPDH6PWYM6", + "course_code": "CS003", + "title": "Web Development", + "description": "Full-stack web development using modern frameworks. HTML, CSS, JavaScript, React, and backend APIs.", + "credits": 3, + "difficulty_level": "intermediate", + "format": "in_person", + "department": "Computer Science", + "major": "Computer Science", + "prerequisites": [], + "schedule": { + "days": [ + "tuesday" + ], + "start_time": "11:30:00", + "end_time": "14:00:00", + "location": "Liberal Arts Center 703" + }, + "semester": "spring", + "year": 2024, + "instructor": "Michelle Flores", + "max_enrollment": 74, + "current_enrollment": 43, + "tags": [ + "web development", + "javascript", + "react", + "apis" + ], + "learning_objectives": [ + "Build responsive web interfaces", + "Develop REST APIs", + "Use modern JavaScript frameworks", + "Deploy web applications" + ], + "created_at": "2025-10-31 00:31:01.972628", + "updated_at": "2025-10-31 00:31:01.972628" + }, + { + "id": "01K8W8H0TMBNPV81NPDH6PWYM7", + "course_code": "CS004", + "title": "Database Systems", + "description": "Design and implementation of database systems. SQL, normalization, transactions, and database administration.", + "credits": 3, + "difficulty_level": "intermediate", + "format": "online", + "department": "Computer Science", + "major": "Computer Science", + "prerequisites": [], + "schedule": { + "days": [ + "tuesday", + "thursday" + ], + "start_time": "11:30:00", + "end_time": "12:45:00", + "location": "Science Hall 204" + }, + "semester": "summer", + "year": 2024, + "instructor": "James Phillips", + "max_enrollment": 97, + "current_enrollment": 33, + "tags": [ + "databases", + "sql", + "data management" + ], + "learning_objectives": [ + "Design relational databases", + "Write complex SQL queries", + "Understand database normalization", + "Implement database transactions" + ], + "created_at": "2025-10-31 00:31:01.972688", + "updated_at": "2025-10-31 00:31:01.972688" + }, + { + "id": "01K8W8H0TMBNPV81NPDH6PWYM8", + "course_code": "CS005", + "title": "Introduction to Programming", + "description": "Fundamental programming concepts using Python. Variables, control structures, functions, and basic data structures.", + "credits": 3, + "difficulty_level": "beginner", + "format": "hybrid", + "department": "Computer Science", + "major": "Computer Science", + "prerequisites": [], + "schedule": { + "days": [ + "tuesday" + ], + "start_time": "16:00:00", + "end_time": "18:30:00", + "location": "Science Hall 487" + }, + "semester": "summer", + "year": 2024, + "instructor": "Sarah Moore", + "max_enrollment": 93, + "current_enrollment": 24, + "tags": [ + "programming", + "python", + "fundamentals" + ], + "learning_objectives": [ + "Write basic Python programs", + "Understand variables and data types", + "Use control structures effectively", + "Create and use functions" + ], + "created_at": "2025-10-31 00:31:01.972746", + "updated_at": "2025-10-31 00:31:01.972746" + }, + { + "id": "01K8W8H0TMBNPV81NPDH6PWYM9", + "course_code": "CS006", + "title": "Introduction to Programming", + "description": "Fundamental programming concepts using Python. Variables, control structures, functions, and basic data structures.", + "credits": 3, + "difficulty_level": "beginner", + "format": "in_person", + "department": "Computer Science", + "major": "Computer Science", + "prerequisites": [], + "schedule": { + "days": [ + "monday", + "wednesday", + "friday" + ], + "start_time": "18:00:00", + "end_time": "18:50:00", + "location": "Liberal Arts Center 891" + }, + "semester": "fall", + "year": 2024, + "instructor": "Alex Thompson", + "max_enrollment": 68, + "current_enrollment": 35, + "tags": [ + "programming", + "python", + "fundamentals" + ], + "learning_objectives": [ + "Write basic Python programs", + "Understand variables and data types", + "Use control structures effectively", + "Create and use functions" + ], + "created_at": "2025-10-31 00:31:01.972804", + "updated_at": "2025-10-31 00:31:01.972804" + }, + { + "id": "01K8W8H0TMBNPV81NPDH6PWYMA", + "course_code": "CS007", + "title": "Machine Learning", + "description": "Introduction to machine learning algorithms and applications. Supervised and unsupervised learning, neural networks.", + "credits": 4, + "difficulty_level": "advanced", + "format": "hybrid", + "department": "Computer Science", + "major": "Computer Science", + "prerequisites": [], + "schedule": { + "days": [ + "monday", + "wednesday" + ], + "start_time": "10:30:00", + "end_time": "11:45:00", + "location": "Engineering Building 463" + }, + "semester": "fall", + "year": 2024, + "instructor": "Eric Smith", + "max_enrollment": 97, + "current_enrollment": 21, + "tags": [ + "machine learning", + "ai", + "statistics" + ], + "learning_objectives": [ + "Understand ML algorithms", + "Implement classification and regression models", + "Evaluate model performance", + "Apply ML to real-world problems" + ], + "created_at": "2025-10-31 00:31:01.972861", + "updated_at": "2025-10-31 00:31:01.972862" + }, + { + "id": "01K8W8H0TMBNPV81NPDH6PWYMB", + "course_code": "CS008", + "title": "Web Development", + "description": "Full-stack web development using modern frameworks. HTML, CSS, JavaScript, React, and backend APIs.", + "credits": 3, + "difficulty_level": "intermediate", + "format": "hybrid", + "department": "Computer Science", + "major": "Computer Science", + "prerequisites": [], + "schedule": { + "days": [ + "tuesday", + "thursday" + ], + "start_time": "10:30:00", + "end_time": "11:45:00", + "location": "Liberal Arts Center 488" + }, + "semester": "spring", + "year": 2024, + "instructor": "Tracie Mueller", + "max_enrollment": 61, + "current_enrollment": 7, + "tags": [ + "web development", + "javascript", + "react", + "apis" + ], + "learning_objectives": [ + "Build responsive web interfaces", + "Develop REST APIs", + "Use modern JavaScript frameworks", + "Deploy web applications" + ], + "created_at": "2025-10-31 00:31:01.972918", + "updated_at": "2025-10-31 00:31:01.972918" + }, + { + "id": "01K8W8H0TMBNPV81NPDH6PWYMC", + "course_code": "CS009", + "title": "Data Structures and Algorithms", + "description": "Study of fundamental data structures and algorithms. Arrays, linked lists, trees, graphs, sorting, and searching.", + "credits": 4, + "difficulty_level": "intermediate", + "format": "in_person", + "department": "Computer Science", + "major": "Computer Science", + "prerequisites": [ + { + "course_code": "CS001", + "course_title": "Prerequisite Course 1", + "minimum_grade": "B-", + "can_be_concurrent": false + }, + { + "course_code": "CS001", + "course_title": "Prerequisite Course 1", + "minimum_grade": "C", + "can_be_concurrent": false + } + ], + "schedule": { + "days": [ + "monday", + "wednesday", + "friday" + ], + "start_time": "13:30:00", + "end_time": "14:20:00", + "location": "Science Hall 374" + }, + "semester": "summer", + "year": 2024, + "instructor": "Catherine Jones", + "max_enrollment": 94, + "current_enrollment": 54, + "tags": [ + "algorithms", + "data structures", + "problem solving" + ], + "learning_objectives": [ + "Implement common data structures", + "Analyze algorithm complexity", + "Solve problems using appropriate data structures", + "Understand time and space complexity" + ], + "created_at": "2025-10-31 00:31:01.972981", + "updated_at": "2025-10-31 00:31:01.972982" + }, + { + "id": "01K8W8H0TN30HVW7N0G2SZ59WX", + "course_code": "CS010", + "title": "Web Development", + "description": "Full-stack web development using modern frameworks. HTML, CSS, JavaScript, React, and backend APIs.", + "credits": 3, + "difficulty_level": "intermediate", + "format": "in_person", + "department": "Computer Science", + "major": "Computer Science", + "prerequisites": [], + "schedule": { + "days": [ + "tuesday" + ], + "start_time": "13:00:00", + "end_time": "15:30:00", + "location": "Technology Center 241" + }, + "semester": "fall", + "year": 2024, + "instructor": "Kevin Wilson", + "max_enrollment": 39, + "current_enrollment": 80, + "tags": [ + "web development", + "javascript", + "react", + "apis" + ], + "learning_objectives": [ + "Build responsive web interfaces", + "Develop REST APIs", + "Use modern JavaScript frameworks", + "Deploy web applications" + ], + "created_at": "2025-10-31 00:31:01.973043", + "updated_at": "2025-10-31 00:31:01.973044" + }, + { + "id": "01K8W8H0TN30HVW7N0G2SZ59WY", + "course_code": "DS011", + "title": "Statistics for Data Science", + "description": "Statistical methods and probability theory for data analysis. Hypothesis testing, regression, and statistical inference.", + "credits": 4, + "difficulty_level": "intermediate", + "format": "hybrid", + "department": "Data Science", + "major": "Data Science", + "prerequisites": [], + "schedule": { + "days": [ + "tuesday" + ], + "start_time": "17:00:00", + "end_time": "19:30:00", + "location": "Business Complex 494" + }, + "semester": "winter", + "year": 2024, + "instructor": "Heidi Bailey", + "max_enrollment": 87, + "current_enrollment": 32, + "tags": [ + "statistics", + "probability", + "data analysis" + ], + "learning_objectives": [ + "Apply statistical methods to data", + "Perform hypothesis testing", + "Understand probability distributions", + "Conduct statistical inference" + ], + "created_at": "2025-10-31 00:31:01.973102", + "updated_at": "2025-10-31 00:31:01.973103" + }, + { + "id": "01K8W8H0TN30HVW7N0G2SZ59WZ", + "course_code": "DS012", + "title": "Statistics for Data Science", + "description": "Statistical methods and probability theory for data analysis. Hypothesis testing, regression, and statistical inference.", + "credits": 4, + "difficulty_level": "intermediate", + "format": "in_person", + "department": "Data Science", + "major": "Data Science", + "prerequisites": [], + "schedule": { + "days": [ + "monday", + "wednesday", + "friday" + ], + "start_time": "18:30:00", + "end_time": "19:20:00", + "location": "Liberal Arts Center 887" + }, + "semester": "summer", + "year": 2024, + "instructor": "Emily Jimenez", + "max_enrollment": 75, + "current_enrollment": 20, + "tags": [ + "statistics", + "probability", + "data analysis" + ], + "learning_objectives": [ + "Apply statistical methods to data", + "Perform hypothesis testing", + "Understand probability distributions", + "Conduct statistical inference" + ], + "created_at": "2025-10-31 00:31:01.973162", + "updated_at": "2025-10-31 00:31:01.973162" + }, + { + "id": "01K8W8H0TN30HVW7N0G2SZ59X0", + "course_code": "DS013", + "title": "Data Visualization", + "description": "Creating effective visualizations for data communication. Tools include Python matplotlib, seaborn, and Tableau.", + "credits": 3, + "difficulty_level": "beginner", + "format": "hybrid", + "department": "Data Science", + "major": "Data Science", + "prerequisites": [], + "schedule": { + "days": [ + "monday", + "wednesday", + "friday" + ], + "start_time": "12:00:00", + "end_time": "12:50:00", + "location": "Science Hall 619" + }, + "semester": "summer", + "year": 2024, + "instructor": "Christian Russell", + "max_enrollment": 84, + "current_enrollment": 77, + "tags": [ + "visualization", + "python", + "tableau", + "communication" + ], + "learning_objectives": [ + "Create effective data visualizations", + "Choose appropriate chart types", + "Use visualization tools", + "Communicate insights through visuals" + ], + "created_at": "2025-10-31 00:31:01.973217", + "updated_at": "2025-10-31 00:31:01.973218" + }, + { + "id": "01K8W8H0TN30HVW7N0G2SZ59X1", + "course_code": "DS014", + "title": "Statistics for Data Science", + "description": "Statistical methods and probability theory for data analysis. Hypothesis testing, regression, and statistical inference.", + "credits": 4, + "difficulty_level": "intermediate", + "format": "hybrid", + "department": "Data Science", + "major": "Data Science", + "prerequisites": [], + "schedule": { + "days": [ + "tuesday", + "thursday" + ], + "start_time": "13:00:00", + "end_time": "14:15:00", + "location": "Technology Center 652" + }, + "semester": "summer", + "year": 2024, + "instructor": "Joseph Nielsen", + "max_enrollment": 82, + "current_enrollment": 2, + "tags": [ + "statistics", + "probability", + "data analysis" + ], + "learning_objectives": [ + "Apply statistical methods to data", + "Perform hypothesis testing", + "Understand probability distributions", + "Conduct statistical inference" + ], + "created_at": "2025-10-31 00:31:01.973274", + "updated_at": "2025-10-31 00:31:01.973274" + }, + { + "id": "01K8W8H0TN30HVW7N0G2SZ59X2", + "course_code": "DS015", + "title": "Statistics for Data Science", + "description": "Statistical methods and probability theory for data analysis. Hypothesis testing, regression, and statistical inference.", + "credits": 4, + "difficulty_level": "intermediate", + "format": "in_person", + "department": "Data Science", + "major": "Data Science", + "prerequisites": [], + "schedule": { + "days": [ + "monday", + "wednesday" + ], + "start_time": "12:00:00", + "end_time": "13:15:00", + "location": "Engineering Building 159" + }, + "semester": "fall", + "year": 2024, + "instructor": "Tina Rojas", + "max_enrollment": 82, + "current_enrollment": 8, + "tags": [ + "statistics", + "probability", + "data analysis" + ], + "learning_objectives": [ + "Apply statistical methods to data", + "Perform hypothesis testing", + "Understand probability distributions", + "Conduct statistical inference" + ], + "created_at": "2025-10-31 00:31:01.973330", + "updated_at": "2025-10-31 00:31:01.973331" + }, + { + "id": "01K8W8H0TN30HVW7N0G2SZ59X3", + "course_code": "DS016", + "title": "Statistics for Data Science", + "description": "Statistical methods and probability theory for data analysis. Hypothesis testing, regression, and statistical inference.", + "credits": 4, + "difficulty_level": "intermediate", + "format": "online", + "department": "Data Science", + "major": "Data Science", + "prerequisites": [ + { + "course_code": "DS002", + "course_title": "Prerequisite Course 2", + "minimum_grade": "B-", + "can_be_concurrent": false + }, + { + "course_code": "DS006", + "course_title": "Prerequisite Course 6", + "minimum_grade": "C", + "can_be_concurrent": false + } + ], + "schedule": { + "days": [ + "tuesday", + "thursday" + ], + "start_time": "18:30:00", + "end_time": "19:45:00", + "location": "Engineering Building 662" + }, + "semester": "summer", + "year": 2024, + "instructor": "Lynn Wade", + "max_enrollment": 76, + "current_enrollment": 66, + "tags": [ + "statistics", + "probability", + "data analysis" + ], + "learning_objectives": [ + "Apply statistical methods to data", + "Perform hypothesis testing", + "Understand probability distributions", + "Conduct statistical inference" + ], + "created_at": "2025-10-31 00:31:01.973396", + "updated_at": "2025-10-31 00:31:01.973397" + }, + { + "id": "01K8W8H0TN30HVW7N0G2SZ59X4", + "course_code": "DS017", + "title": "Data Visualization", + "description": "Creating effective visualizations for data communication. Tools include Python matplotlib, seaborn, and Tableau.", + "credits": 3, + "difficulty_level": "beginner", + "format": "hybrid", + "department": "Data Science", + "major": "Data Science", + "prerequisites": [ + { + "course_code": "DS004", + "course_title": "Prerequisite Course 4", + "minimum_grade": "C", + "can_be_concurrent": true + } + ], + "schedule": { + "days": [ + "monday", + "wednesday", + "friday" + ], + "start_time": "11:00:00", + "end_time": "11:50:00", + "location": "Liberal Arts Center 165" + }, + "semester": "fall", + "year": 2024, + "instructor": "Sue Ray", + "max_enrollment": 49, + "current_enrollment": 8, + "tags": [ + "visualization", + "python", + "tableau", + "communication" + ], + "learning_objectives": [ + "Create effective data visualizations", + "Choose appropriate chart types", + "Use visualization tools", + "Communicate insights through visuals" + ], + "created_at": "2025-10-31 00:31:01.973455", + "updated_at": "2025-10-31 00:31:01.973456" + }, + { + "id": "01K8W8H0TN30HVW7N0G2SZ59X5", + "course_code": "DS018", + "title": "Statistics for Data Science", + "description": "Statistical methods and probability theory for data analysis. Hypothesis testing, regression, and statistical inference.", + "credits": 4, + "difficulty_level": "intermediate", + "format": "online", + "department": "Data Science", + "major": "Data Science", + "prerequisites": [ + { + "course_code": "DS008", + "course_title": "Prerequisite Course 8", + "minimum_grade": "C", + "can_be_concurrent": false + } + ], + "schedule": { + "days": [ + "monday", + "wednesday" + ], + "start_time": "09:00:00", + "end_time": "10:15:00", + "location": "Business Complex 385" + }, + "semester": "spring", + "year": 2024, + "instructor": "Emily Reyes", + "max_enrollment": 32, + "current_enrollment": 12, + "tags": [ + "statistics", + "probability", + "data analysis" + ], + "learning_objectives": [ + "Apply statistical methods to data", + "Perform hypothesis testing", + "Understand probability distributions", + "Conduct statistical inference" + ], + "created_at": "2025-10-31 00:31:01.973514", + "updated_at": "2025-10-31 00:31:01.973514" + }, + { + "id": "01K8W8H0TN30HVW7N0G2SZ59X6", + "course_code": "DS019", + "title": "Data Visualization", + "description": "Creating effective visualizations for data communication. Tools include Python matplotlib, seaborn, and Tableau.", + "credits": 3, + "difficulty_level": "beginner", + "format": "hybrid", + "department": "Data Science", + "major": "Data Science", + "prerequisites": [], + "schedule": { + "days": [ + "monday", + "wednesday" + ], + "start_time": "14:30:00", + "end_time": "15:45:00", + "location": "Science Hall 578" + }, + "semester": "fall", + "year": 2024, + "instructor": "Mary Singleton", + "max_enrollment": 27, + "current_enrollment": 51, + "tags": [ + "visualization", + "python", + "tableau", + "communication" + ], + "learning_objectives": [ + "Create effective data visualizations", + "Choose appropriate chart types", + "Use visualization tools", + "Communicate insights through visuals" + ], + "created_at": "2025-10-31 00:31:01.973569", + "updated_at": "2025-10-31 00:31:01.973569" + }, + { + "id": "01K8W8H0TN30HVW7N0G2SZ59X7", + "course_code": "DS020", + "title": "Data Visualization", + "description": "Creating effective visualizations for data communication. Tools include Python matplotlib, seaborn, and Tableau.", + "credits": 3, + "difficulty_level": "beginner", + "format": "online", + "department": "Data Science", + "major": "Data Science", + "prerequisites": [], + "schedule": { + "days": [ + "monday", + "wednesday", + "friday" + ], + "start_time": "11:00:00", + "end_time": "11:50:00", + "location": "Technology Center 294" + }, + "semester": "spring", + "year": 2024, + "instructor": "Devin Bell", + "max_enrollment": 55, + "current_enrollment": 59, + "tags": [ + "visualization", + "python", + "tableau", + "communication" + ], + "learning_objectives": [ + "Create effective data visualizations", + "Choose appropriate chart types", + "Use visualization tools", + "Communicate insights through visuals" + ], + "created_at": "2025-10-31 00:31:01.973623", + "updated_at": "2025-10-31 00:31:01.973623" + }, + { + "id": "01K8W8H0TN30HVW7N0G2SZ59X8", + "course_code": "MATH021", + "title": "Calculus I", + "description": "Differential calculus including limits, derivatives, and applications. Foundation for advanced mathematics.", + "credits": 4, + "difficulty_level": "intermediate", + "format": "in_person", + "department": "Mathematics", + "major": "Mathematics", + "prerequisites": [], + "schedule": { + "days": [ + "monday", + "wednesday", + "friday" + ], + "start_time": "15:00:00", + "end_time": "15:50:00", + "location": "Technology Center 151" + }, + "semester": "fall", + "year": 2024, + "instructor": "Monica Simpson", + "max_enrollment": 50, + "current_enrollment": 21, + "tags": [ + "calculus", + "derivatives", + "limits" + ], + "learning_objectives": [ + "Understand limits and continuity", + "Calculate derivatives", + "Apply calculus to real problems", + "Understand fundamental theorem" + ], + "created_at": "2025-10-31 00:31:01.973680", + "updated_at": "2025-10-31 00:31:01.973681" + }, + { + "id": "01K8W8H0TN30HVW7N0G2SZ59X9", + "course_code": "MATH022", + "title": "Linear Algebra", + "description": "Vector spaces, matrices, eigenvalues, and linear transformations. Essential for data science and engineering.", + "credits": 3, + "difficulty_level": "intermediate", + "format": "in_person", + "department": "Mathematics", + "major": "Mathematics", + "prerequisites": [], + "schedule": { + "days": [ + "tuesday" + ], + "start_time": "15:00:00", + "end_time": "17:30:00", + "location": "Business Complex 985" + }, + "semester": "spring", + "year": 2024, + "instructor": "Eric Thompson", + "max_enrollment": 68, + "current_enrollment": 0, + "tags": [ + "linear algebra", + "matrices", + "vectors" + ], + "learning_objectives": [ + "Perform matrix operations", + "Understand vector spaces", + "Calculate eigenvalues and eigenvectors", + "Apply linear algebra to problems" + ], + "created_at": "2025-10-31 00:31:01.973735", + "updated_at": "2025-10-31 00:31:01.973735" + }, + { + "id": "01K8W8H0TN30HVW7N0G2SZ59XA", + "course_code": "MATH023", + "title": "Linear Algebra", + "description": "Vector spaces, matrices, eigenvalues, and linear transformations. Essential for data science and engineering.", + "credits": 3, + "difficulty_level": "intermediate", + "format": "hybrid", + "department": "Mathematics", + "major": "Mathematics", + "prerequisites": [], + "schedule": { + "days": [ + "monday", + "wednesday" + ], + "start_time": "15:30:00", + "end_time": "16:45:00", + "location": "Technology Center 533" + }, + "semester": "winter", + "year": 2024, + "instructor": "Megan Lewis", + "max_enrollment": 39, + "current_enrollment": 24, + "tags": [ + "linear algebra", + "matrices", + "vectors" + ], + "learning_objectives": [ + "Perform matrix operations", + "Understand vector spaces", + "Calculate eigenvalues and eigenvectors", + "Apply linear algebra to problems" + ], + "created_at": "2025-10-31 00:31:01.973790", + "updated_at": "2025-10-31 00:31:01.973790" + }, + { + "id": "01K8W8H0TN30HVW7N0G2SZ59XB", + "course_code": "MATH024", + "title": "Linear Algebra", + "description": "Vector spaces, matrices, eigenvalues, and linear transformations. Essential for data science and engineering.", + "credits": 3, + "difficulty_level": "intermediate", + "format": "in_person", + "department": "Mathematics", + "major": "Mathematics", + "prerequisites": [ + { + "course_code": "MATH009", + "course_title": "Prerequisite Course 9", + "minimum_grade": "B-", + "can_be_concurrent": true + }, + { + "course_code": "MATH009", + "course_title": "Prerequisite Course 9", + "minimum_grade": "C", + "can_be_concurrent": false + } + ], + "schedule": { + "days": [ + "tuesday", + "thursday" + ], + "start_time": "08:00:00", + "end_time": "09:15:00", + "location": "Liberal Arts Center 865" + }, + "semester": "fall", + "year": 2024, + "instructor": "Lindsey Hogan", + "max_enrollment": 50, + "current_enrollment": 51, + "tags": [ + "linear algebra", + "matrices", + "vectors" + ], + "learning_objectives": [ + "Perform matrix operations", + "Understand vector spaces", + "Calculate eigenvalues and eigenvectors", + "Apply linear algebra to problems" + ], + "created_at": "2025-10-31 00:31:01.973848", + "updated_at": "2025-10-31 00:31:01.973849" + }, + { + "id": "01K8W8H0TN30HVW7N0G2SZ59XC", + "course_code": "MATH025", + "title": "Calculus I", + "description": "Differential calculus including limits, derivatives, and applications. Foundation for advanced mathematics.", + "credits": 4, + "difficulty_level": "intermediate", + "format": "hybrid", + "department": "Mathematics", + "major": "Mathematics", + "prerequisites": [], + "schedule": { + "days": [ + "thursday" + ], + "start_time": "11:00:00", + "end_time": "13:30:00", + "location": "Science Hall 734" + }, + "semester": "summer", + "year": 2024, + "instructor": "Richard Webster", + "max_enrollment": 53, + "current_enrollment": 26, + "tags": [ + "calculus", + "derivatives", + "limits" + ], + "learning_objectives": [ + "Understand limits and continuity", + "Calculate derivatives", + "Apply calculus to real problems", + "Understand fundamental theorem" + ], + "created_at": "2025-10-31 00:31:01.973902", + "updated_at": "2025-10-31 00:31:01.973903" + }, + { + "id": "01K8W8H0TN30HVW7N0G2SZ59XD", + "course_code": "MATH026", + "title": "Linear Algebra", + "description": "Vector spaces, matrices, eigenvalues, and linear transformations. Essential for data science and engineering.", + "credits": 3, + "difficulty_level": "intermediate", + "format": "in_person", + "department": "Mathematics", + "major": "Mathematics", + "prerequisites": [], + "schedule": { + "days": [ + "tuesday", + "thursday" + ], + "start_time": "12:30:00", + "end_time": "13:45:00", + "location": "Liberal Arts Center 234" + }, + "semester": "fall", + "year": 2024, + "instructor": "Margaret Dunn", + "max_enrollment": 78, + "current_enrollment": 79, + "tags": [ + "linear algebra", + "matrices", + "vectors" + ], + "learning_objectives": [ + "Perform matrix operations", + "Understand vector spaces", + "Calculate eigenvalues and eigenvectors", + "Apply linear algebra to problems" + ], + "created_at": "2025-10-31 00:31:01.973957", + "updated_at": "2025-10-31 00:31:01.973957" + }, + { + "id": "01K8W8H0TP3PHGJ9Y8QCCJFYSZ", + "course_code": "MATH027", + "title": "Calculus I", + "description": "Differential calculus including limits, derivatives, and applications. Foundation for advanced mathematics.", + "credits": 4, + "difficulty_level": "intermediate", + "format": "hybrid", + "department": "Mathematics", + "major": "Mathematics", + "prerequisites": [ + { + "course_code": "MATH003", + "course_title": "Prerequisite Course 3", + "minimum_grade": "C", + "can_be_concurrent": false + }, + { + "course_code": "MATH006", + "course_title": "Prerequisite Course 6", + "minimum_grade": "C+", + "can_be_concurrent": false + } + ], + "schedule": { + "days": [ + "monday", + "wednesday", + "friday" + ], + "start_time": "16:00:00", + "end_time": "16:50:00", + "location": "Liberal Arts Center 618" + }, + "semester": "summer", + "year": 2024, + "instructor": "Mrs. Sarah Davis", + "max_enrollment": 98, + "current_enrollment": 67, + "tags": [ + "calculus", + "derivatives", + "limits" + ], + "learning_objectives": [ + "Understand limits and continuity", + "Calculate derivatives", + "Apply calculus to real problems", + "Understand fundamental theorem" + ], + "created_at": "2025-10-31 00:31:01.974025", + "updated_at": "2025-10-31 00:31:01.974026" + }, + { + "id": "01K8W8H0TP3PHGJ9Y8QCCJFYT0", + "course_code": "MATH028", + "title": "Calculus I", + "description": "Differential calculus including limits, derivatives, and applications. Foundation for advanced mathematics.", + "credits": 4, + "difficulty_level": "intermediate", + "format": "hybrid", + "department": "Mathematics", + "major": "Mathematics", + "prerequisites": [ + { + "course_code": "MATH018", + "course_title": "Prerequisite Course 18", + "minimum_grade": "C", + "can_be_concurrent": false + } + ], + "schedule": { + "days": [ + "thursday" + ], + "start_time": "12:00:00", + "end_time": "14:30:00", + "location": "Engineering Building 999" + }, + "semester": "spring", + "year": 2024, + "instructor": "Brandon Roman", + "max_enrollment": 63, + "current_enrollment": 26, + "tags": [ + "calculus", + "derivatives", + "limits" + ], + "learning_objectives": [ + "Understand limits and continuity", + "Calculate derivatives", + "Apply calculus to real problems", + "Understand fundamental theorem" + ], + "created_at": "2025-10-31 00:31:01.974086", + "updated_at": "2025-10-31 00:31:01.974086" + }, + { + "id": "01K8W8H0TP3PHGJ9Y8QCCJFYT1", + "course_code": "MATH029", + "title": "Linear Algebra", + "description": "Vector spaces, matrices, eigenvalues, and linear transformations. Essential for data science and engineering.", + "credits": 3, + "difficulty_level": "intermediate", + "format": "in_person", + "department": "Mathematics", + "major": "Mathematics", + "prerequisites": [ + { + "course_code": "MATH009", + "course_title": "Prerequisite Course 9", + "minimum_grade": "C", + "can_be_concurrent": true + }, + { + "course_code": "MATH005", + "course_title": "Prerequisite Course 5", + "minimum_grade": "B-", + "can_be_concurrent": false + } + ], + "schedule": { + "days": [ + "thursday" + ], + "start_time": "15:30:00", + "end_time": "18:00:00", + "location": "Science Hall 966" + }, + "semester": "winter", + "year": 2024, + "instructor": "Robin Black", + "max_enrollment": 90, + "current_enrollment": 54, + "tags": [ + "linear algebra", + "matrices", + "vectors" + ], + "learning_objectives": [ + "Perform matrix operations", + "Understand vector spaces", + "Calculate eigenvalues and eigenvectors", + "Apply linear algebra to problems" + ], + "created_at": "2025-10-31 00:31:01.974145", + "updated_at": "2025-10-31 00:31:01.974145" + }, + { + "id": "01K8W8H0TP3PHGJ9Y8QCCJFYT2", + "course_code": "MATH030", + "title": "Calculus I", + "description": "Differential calculus including limits, derivatives, and applications. Foundation for advanced mathematics.", + "credits": 4, + "difficulty_level": "intermediate", + "format": "hybrid", + "department": "Mathematics", + "major": "Mathematics", + "prerequisites": [], + "schedule": { + "days": [ + "monday", + "wednesday", + "friday" + ], + "start_time": "09:00:00", + "end_time": "09:50:00", + "location": "Science Hall 658" + }, + "semester": "spring", + "year": 2024, + "instructor": "Stephanie Norris", + "max_enrollment": 75, + "current_enrollment": 16, + "tags": [ + "calculus", + "derivatives", + "limits" + ], + "learning_objectives": [ + "Understand limits and continuity", + "Calculate derivatives", + "Apply calculus to real problems", + "Understand fundamental theorem" + ], + "created_at": "2025-10-31 00:31:01.974201", + "updated_at": "2025-10-31 00:31:01.974201" + }, + { + "id": "01K8W8H0TP3PHGJ9Y8QCCJFYT3", + "course_code": "BUS031", + "title": "Principles of Management", + "description": "Fundamental management concepts including planning, organizing, leading, and controlling organizational resources.", + "credits": 3, + "difficulty_level": "beginner", + "format": "hybrid", + "department": "Business", + "major": "Business Administration", + "prerequisites": [], + "schedule": { + "days": [ + "monday", + "wednesday" + ], + "start_time": "13:00:00", + "end_time": "14:15:00", + "location": "Engineering Building 466" + }, + "semester": "spring", + "year": 2024, + "instructor": "Earl Turner", + "max_enrollment": 33, + "current_enrollment": 45, + "tags": [ + "management", + "leadership", + "organization" + ], + "learning_objectives": [ + "Understand management principles", + "Apply leadership concepts", + "Organize teams effectively", + "Control organizational resources" + ], + "created_at": "2025-10-31 00:31:01.974257", + "updated_at": "2025-10-31 00:31:01.974257" + }, + { + "id": "01K8W8H0TP3PHGJ9Y8QCCJFYT4", + "course_code": "BUS032", + "title": "Marketing Strategy", + "description": "Strategic marketing planning, market analysis, consumer behavior, and digital marketing techniques.", + "credits": 3, + "difficulty_level": "intermediate", + "format": "in_person", + "department": "Business", + "major": "Business Administration", + "prerequisites": [], + "schedule": { + "days": [ + "thursday" + ], + "start_time": "10:00:00", + "end_time": "12:30:00", + "location": "Engineering Building 985" + }, + "semester": "winter", + "year": 2024, + "instructor": "Mark Brooks", + "max_enrollment": 23, + "current_enrollment": 22, + "tags": [ + "marketing", + "strategy", + "consumer behavior" + ], + "learning_objectives": [ + "Develop marketing strategies", + "Analyze market opportunities", + "Understand consumer behavior", + "Implement digital marketing" + ], + "created_at": "2025-10-31 00:31:01.974314", + "updated_at": "2025-10-31 00:31:01.974314" + }, + { + "id": "01K8W8H0TP3PHGJ9Y8QCCJFYT5", + "course_code": "BUS033", + "title": "Marketing Strategy", + "description": "Strategic marketing planning, market analysis, consumer behavior, and digital marketing techniques.", + "credits": 3, + "difficulty_level": "intermediate", + "format": "hybrid", + "department": "Business", + "major": "Business Administration", + "prerequisites": [], + "schedule": { + "days": [ + "tuesday" + ], + "start_time": "18:00:00", + "end_time": "20:30:00", + "location": "Engineering Building 373" + }, + "semester": "fall", + "year": 2024, + "instructor": "Tara Glenn MD", + "max_enrollment": 68, + "current_enrollment": 4, + "tags": [ + "marketing", + "strategy", + "consumer behavior" + ], + "learning_objectives": [ + "Develop marketing strategies", + "Analyze market opportunities", + "Understand consumer behavior", + "Implement digital marketing" + ], + "created_at": "2025-10-31 00:31:01.974437", + "updated_at": "2025-10-31 00:31:01.974438" + }, + { + "id": "01K8W8H0TP3PHGJ9Y8QCCJFYT6", + "course_code": "BUS034", + "title": "Marketing Strategy", + "description": "Strategic marketing planning, market analysis, consumer behavior, and digital marketing techniques.", + "credits": 3, + "difficulty_level": "intermediate", + "format": "in_person", + "department": "Business", + "major": "Business Administration", + "prerequisites": [], + "schedule": { + "days": [ + "tuesday", + "thursday" + ], + "start_time": "11:30:00", + "end_time": "12:45:00", + "location": "Liberal Arts Center 458" + }, + "semester": "spring", + "year": 2024, + "instructor": "Marcus James", + "max_enrollment": 23, + "current_enrollment": 24, + "tags": [ + "marketing", + "strategy", + "consumer behavior" + ], + "learning_objectives": [ + "Develop marketing strategies", + "Analyze market opportunities", + "Understand consumer behavior", + "Implement digital marketing" + ], + "created_at": "2025-10-31 00:31:01.974492", + "updated_at": "2025-10-31 00:31:01.974492" + }, + { + "id": "01K8W8H0TP3PHGJ9Y8QCCJFYT7", + "course_code": "BUS035", + "title": "Marketing Strategy", + "description": "Strategic marketing planning, market analysis, consumer behavior, and digital marketing techniques.", + "credits": 3, + "difficulty_level": "intermediate", + "format": "hybrid", + "department": "Business", + "major": "Business Administration", + "prerequisites": [], + "schedule": { + "days": [ + "monday", + "wednesday" + ], + "start_time": "12:00:00", + "end_time": "13:15:00", + "location": "Liberal Arts Center 891" + }, + "semester": "winter", + "year": 2024, + "instructor": "Daniel Tate", + "max_enrollment": 88, + "current_enrollment": 42, + "tags": [ + "marketing", + "strategy", + "consumer behavior" + ], + "learning_objectives": [ + "Develop marketing strategies", + "Analyze market opportunities", + "Understand consumer behavior", + "Implement digital marketing" + ], + "created_at": "2025-10-31 00:31:01.974545", + "updated_at": "2025-10-31 00:31:01.974546" + }, + { + "id": "01K8W8H0TP3PHGJ9Y8QCCJFYT8", + "course_code": "BUS036", + "title": "Principles of Management", + "description": "Fundamental management concepts including planning, organizing, leading, and controlling organizational resources.", + "credits": 3, + "difficulty_level": "beginner", + "format": "in_person", + "department": "Business", + "major": "Business Administration", + "prerequisites": [ + { + "course_code": "BUS012", + "course_title": "Prerequisite Course 12", + "minimum_grade": "B-", + "can_be_concurrent": false + }, + { + "course_code": "BUS014", + "course_title": "Prerequisite Course 14", + "minimum_grade": "B-", + "can_be_concurrent": false + } + ], + "schedule": { + "days": [ + "monday", + "wednesday", + "friday" + ], + "start_time": "12:00:00", + "end_time": "12:50:00", + "location": "Liberal Arts Center 694" + }, + "semester": "winter", + "year": 2024, + "instructor": "Robert Wright", + "max_enrollment": 93, + "current_enrollment": 24, + "tags": [ + "management", + "leadership", + "organization" + ], + "learning_objectives": [ + "Understand management principles", + "Apply leadership concepts", + "Organize teams effectively", + "Control organizational resources" + ], + "created_at": "2025-10-31 00:31:01.974605", + "updated_at": "2025-10-31 00:31:01.974605" + }, + { + "id": "01K8W8H0TP3PHGJ9Y8QCCJFYT9", + "course_code": "BUS037", + "title": "Marketing Strategy", + "description": "Strategic marketing planning, market analysis, consumer behavior, and digital marketing techniques.", + "credits": 3, + "difficulty_level": "intermediate", + "format": "hybrid", + "department": "Business", + "major": "Business Administration", + "prerequisites": [], + "schedule": { + "days": [ + "monday", + "wednesday", + "friday" + ], + "start_time": "14:00:00", + "end_time": "14:50:00", + "location": "Technology Center 632" + }, + "semester": "spring", + "year": 2024, + "instructor": "Amy Blackwell", + "max_enrollment": 66, + "current_enrollment": 55, + "tags": [ + "marketing", + "strategy", + "consumer behavior" + ], + "learning_objectives": [ + "Develop marketing strategies", + "Analyze market opportunities", + "Understand consumer behavior", + "Implement digital marketing" + ], + "created_at": "2025-10-31 00:31:01.974659", + "updated_at": "2025-10-31 00:31:01.974660" + }, + { + "id": "01K8W8H0TP3PHGJ9Y8QCCJFYTA", + "course_code": "BUS038", + "title": "Principles of Management", + "description": "Fundamental management concepts including planning, organizing, leading, and controlling organizational resources.", + "credits": 3, + "difficulty_level": "beginner", + "format": "online", + "department": "Business", + "major": "Business Administration", + "prerequisites": [], + "schedule": { + "days": [ + "monday", + "wednesday" + ], + "start_time": "17:30:00", + "end_time": "18:45:00", + "location": "Science Hall 779" + }, + "semester": "summer", + "year": 2024, + "instructor": "Andrea Thompson", + "max_enrollment": 72, + "current_enrollment": 41, + "tags": [ + "management", + "leadership", + "organization" + ], + "learning_objectives": [ + "Understand management principles", + "Apply leadership concepts", + "Organize teams effectively", + "Control organizational resources" + ], + "created_at": "2025-10-31 00:31:01.974714", + "updated_at": "2025-10-31 00:31:01.974714" + }, + { + "id": "01K8W8H0TP3PHGJ9Y8QCCJFYTB", + "course_code": "BUS039", + "title": "Marketing Strategy", + "description": "Strategic marketing planning, market analysis, consumer behavior, and digital marketing techniques.", + "credits": 3, + "difficulty_level": "intermediate", + "format": "online", + "department": "Business", + "major": "Business Administration", + "prerequisites": [], + "schedule": { + "days": [ + "monday", + "wednesday" + ], + "start_time": "16:00:00", + "end_time": "17:15:00", + "location": "Business Complex 296" + }, + "semester": "spring", + "year": 2024, + "instructor": "Kevin Johnson", + "max_enrollment": 98, + "current_enrollment": 72, + "tags": [ + "marketing", + "strategy", + "consumer behavior" + ], + "learning_objectives": [ + "Develop marketing strategies", + "Analyze market opportunities", + "Understand consumer behavior", + "Implement digital marketing" + ], + "created_at": "2025-10-31 00:31:01.974767", + "updated_at": "2025-10-31 00:31:01.974768" + }, + { + "id": "01K8W8H0TP3PHGJ9Y8QCCJFYTC", + "course_code": "BUS040", + "title": "Marketing Strategy", + "description": "Strategic marketing planning, market analysis, consumer behavior, and digital marketing techniques.", + "credits": 3, + "difficulty_level": "intermediate", + "format": "hybrid", + "department": "Business", + "major": "Business Administration", + "prerequisites": [ + { + "course_code": "BUS015", + "course_title": "Prerequisite Course 15", + "minimum_grade": "C+", + "can_be_concurrent": false + }, + { + "course_code": "BUS007", + "course_title": "Prerequisite Course 7", + "minimum_grade": "B-", + "can_be_concurrent": false + } + ], + "schedule": { + "days": [ + "tuesday" + ], + "start_time": "16:00:00", + "end_time": "18:30:00", + "location": "Liberal Arts Center 411" + }, + "semester": "spring", + "year": 2024, + "instructor": "Brandon Ramirez", + "max_enrollment": 30, + "current_enrollment": 36, + "tags": [ + "marketing", + "strategy", + "consumer behavior" + ], + "learning_objectives": [ + "Develop marketing strategies", + "Analyze market opportunities", + "Understand consumer behavior", + "Implement digital marketing" + ], + "created_at": "2025-10-31 00:31:01.974825", + "updated_at": "2025-10-31 00:31:01.974825" + }, + { + "id": "01K8W8H0TP3PHGJ9Y8QCCJFYTD", + "course_code": "PSY041", + "title": "Cognitive Psychology", + "description": "Study of mental processes including perception, memory, thinking, and problem-solving.", + "credits": 3, + "difficulty_level": "intermediate", + "format": "in_person", + "department": "Psychology", + "major": "Psychology", + "prerequisites": [], + "schedule": { + "days": [ + "monday", + "wednesday", + "friday" + ], + "start_time": "11:30:00", + "end_time": "12:20:00", + "location": "Engineering Building 330" + }, + "semester": "fall", + "year": 2024, + "instructor": "Tyrone Vasquez", + "max_enrollment": 25, + "current_enrollment": 31, + "tags": [ + "cognitive psychology", + "memory", + "perception" + ], + "learning_objectives": [ + "Understand cognitive processes", + "Study memory systems", + "Analyze problem-solving", + "Explore perception mechanisms" + ], + "created_at": "2025-10-31 00:31:01.974879", + "updated_at": "2025-10-31 00:31:01.974879" + }, + { + "id": "01K8W8H0TP3PHGJ9Y8QCCJFYTE", + "course_code": "PSY042", + "title": "Cognitive Psychology", + "description": "Study of mental processes including perception, memory, thinking, and problem-solving.", + "credits": 3, + "difficulty_level": "intermediate", + "format": "in_person", + "department": "Psychology", + "major": "Psychology", + "prerequisites": [], + "schedule": { + "days": [ + "thursday" + ], + "start_time": "09:30:00", + "end_time": "12:00:00", + "location": "Technology Center 524" + }, + "semester": "winter", + "year": 2024, + "instructor": "Craig Jackson", + "max_enrollment": 83, + "current_enrollment": 51, + "tags": [ + "cognitive psychology", + "memory", + "perception" + ], + "learning_objectives": [ + "Understand cognitive processes", + "Study memory systems", + "Analyze problem-solving", + "Explore perception mechanisms" + ], + "created_at": "2025-10-31 00:31:01.974933", + "updated_at": "2025-10-31 00:31:01.974933" + }, + { + "id": "01K8W8H0TP3PHGJ9Y8QCCJFYTF", + "course_code": "PSY043", + "title": "Introduction to Psychology", + "description": "Overview of psychological principles, research methods, and major areas of study in psychology.", + "credits": 3, + "difficulty_level": "beginner", + "format": "online", + "department": "Psychology", + "major": "Psychology", + "prerequisites": [], + "schedule": { + "days": [ + "tuesday", + "thursday" + ], + "start_time": "18:00:00", + "end_time": "19:15:00", + "location": "Science Hall 868" + }, + "semester": "spring", + "year": 2024, + "instructor": "Kathy Velez", + "max_enrollment": 42, + "current_enrollment": 66, + "tags": [ + "psychology", + "research methods", + "behavior" + ], + "learning_objectives": [ + "Understand psychological principles", + "Learn research methods", + "Explore areas of psychology", + "Apply psychological concepts" + ], + "created_at": "2025-10-31 00:31:01.974987", + "updated_at": "2025-10-31 00:31:01.974988" + }, + { + "id": "01K8W8H0TQFK75Z9NA0D77XBVZ", + "course_code": "PSY044", + "title": "Cognitive Psychology", + "description": "Study of mental processes including perception, memory, thinking, and problem-solving.", + "credits": 3, + "difficulty_level": "intermediate", + "format": "online", + "department": "Psychology", + "major": "Psychology", + "prerequisites": [], + "schedule": { + "days": [ + "monday", + "wednesday", + "friday" + ], + "start_time": "16:00:00", + "end_time": "16:50:00", + "location": "Science Hall 968" + }, + "semester": "summer", + "year": 2024, + "instructor": "Megan Wilson", + "max_enrollment": 76, + "current_enrollment": 78, + "tags": [ + "cognitive psychology", + "memory", + "perception" + ], + "learning_objectives": [ + "Understand cognitive processes", + "Study memory systems", + "Analyze problem-solving", + "Explore perception mechanisms" + ], + "created_at": "2025-10-31 00:31:01.975044", + "updated_at": "2025-10-31 00:31:01.975045" + }, + { + "id": "01K8W8H0TQFK75Z9NA0D77XBW0", + "course_code": "PSY045", + "title": "Cognitive Psychology", + "description": "Study of mental processes including perception, memory, thinking, and problem-solving.", + "credits": 3, + "difficulty_level": "intermediate", + "format": "in_person", + "department": "Psychology", + "major": "Psychology", + "prerequisites": [], + "schedule": { + "days": [ + "thursday" + ], + "start_time": "15:00:00", + "end_time": "17:30:00", + "location": "Business Complex 861" + }, + "semester": "summer", + "year": 2024, + "instructor": "Karen Nash", + "max_enrollment": 86, + "current_enrollment": 62, + "tags": [ + "cognitive psychology", + "memory", + "perception" + ], + "learning_objectives": [ + "Understand cognitive processes", + "Study memory systems", + "Analyze problem-solving", + "Explore perception mechanisms" + ], + "created_at": "2025-10-31 00:31:01.975104", + "updated_at": "2025-10-31 00:31:01.975105" + }, + { + "id": "01K8W8H0TQFK75Z9NA0D77XBW1", + "course_code": "PSY046", + "title": "Introduction to Psychology", + "description": "Overview of psychological principles, research methods, and major areas of study in psychology.", + "credits": 3, + "difficulty_level": "beginner", + "format": "in_person", + "department": "Psychology", + "major": "Psychology", + "prerequisites": [ + { + "course_code": "PSY021", + "course_title": "Prerequisite Course 21", + "minimum_grade": "B-", + "can_be_concurrent": true + }, + { + "course_code": "PSY010", + "course_title": "Prerequisite Course 10", + "minimum_grade": "C", + "can_be_concurrent": false + } + ], + "schedule": { + "days": [ + "monday", + "wednesday" + ], + "start_time": "15:00:00", + "end_time": "16:15:00", + "location": "Liberal Arts Center 830" + }, + "semester": "spring", + "year": 2024, + "instructor": "Richard Perez", + "max_enrollment": 28, + "current_enrollment": 53, + "tags": [ + "psychology", + "research methods", + "behavior" + ], + "learning_objectives": [ + "Understand psychological principles", + "Learn research methods", + "Explore areas of psychology", + "Apply psychological concepts" + ], + "created_at": "2025-10-31 00:31:01.975163", + "updated_at": "2025-10-31 00:31:01.975163" + }, + { + "id": "01K8W8H0TQFK75Z9NA0D77XBW2", + "course_code": "PSY047", + "title": "Cognitive Psychology", + "description": "Study of mental processes including perception, memory, thinking, and problem-solving.", + "credits": 3, + "difficulty_level": "intermediate", + "format": "in_person", + "department": "Psychology", + "major": "Psychology", + "prerequisites": [ + { + "course_code": "PSY025", + "course_title": "Prerequisite Course 25", + "minimum_grade": "B-", + "can_be_concurrent": false + }, + { + "course_code": "PSY002", + "course_title": "Prerequisite Course 2", + "minimum_grade": "B-", + "can_be_concurrent": false + } + ], + "schedule": { + "days": [ + "monday", + "wednesday" + ], + "start_time": "16:30:00", + "end_time": "17:45:00", + "location": "Science Hall 525" + }, + "semester": "summer", + "year": 2024, + "instructor": "Samantha Sanders", + "max_enrollment": 58, + "current_enrollment": 49, + "tags": [ + "cognitive psychology", + "memory", + "perception" + ], + "learning_objectives": [ + "Understand cognitive processes", + "Study memory systems", + "Analyze problem-solving", + "Explore perception mechanisms" + ], + "created_at": "2025-10-31 00:31:01.975222", + "updated_at": "2025-10-31 00:31:01.975223" + }, + { + "id": "01K8W8H0TQFK75Z9NA0D77XBW3", + "course_code": "PSY048", + "title": "Cognitive Psychology", + "description": "Study of mental processes including perception, memory, thinking, and problem-solving.", + "credits": 3, + "difficulty_level": "intermediate", + "format": "online", + "department": "Psychology", + "major": "Psychology", + "prerequisites": [ + { + "course_code": "PSY002", + "course_title": "Prerequisite Course 2", + "minimum_grade": "C+", + "can_be_concurrent": false + }, + { + "course_code": "PSY026", + "course_title": "Prerequisite Course 26", + "minimum_grade": "B-", + "can_be_concurrent": true + } + ], + "schedule": { + "days": [ + "thursday" + ], + "start_time": "16:00:00", + "end_time": "18:30:00", + "location": "Engineering Building 599" + }, + "semester": "spring", + "year": 2024, + "instructor": "Bradley Powers", + "max_enrollment": 99, + "current_enrollment": 68, + "tags": [ + "cognitive psychology", + "memory", + "perception" + ], + "learning_objectives": [ + "Understand cognitive processes", + "Study memory systems", + "Analyze problem-solving", + "Explore perception mechanisms" + ], + "created_at": "2025-10-31 00:31:01.975283", + "updated_at": "2025-10-31 00:31:01.975283" + }, + { + "id": "01K8W8H0TQFK75Z9NA0D77XBW4", + "course_code": "PSY049", + "title": "Introduction to Psychology", + "description": "Overview of psychological principles, research methods, and major areas of study in psychology.", + "credits": 3, + "difficulty_level": "beginner", + "format": "online", + "department": "Psychology", + "major": "Psychology", + "prerequisites": [ + { + "course_code": "PSY012", + "course_title": "Prerequisite Course 12", + "minimum_grade": "C", + "can_be_concurrent": false + }, + { + "course_code": "PSY021", + "course_title": "Prerequisite Course 21", + "minimum_grade": "C", + "can_be_concurrent": false + } + ], + "schedule": { + "days": [ + "tuesday" + ], + "start_time": "17:00:00", + "end_time": "19:30:00", + "location": "Business Complex 185" + }, + "semester": "winter", + "year": 2024, + "instructor": "Stacey Herrera", + "max_enrollment": 55, + "current_enrollment": 53, + "tags": [ + "psychology", + "research methods", + "behavior" + ], + "learning_objectives": [ + "Understand psychological principles", + "Learn research methods", + "Explore areas of psychology", + "Apply psychological concepts" + ], + "created_at": "2025-10-31 00:31:01.975346", + "updated_at": "2025-10-31 00:31:01.975346" + }, + { + "id": "01K8W8H0TQFK75Z9NA0D77XBW5", + "course_code": "PSY050", + "title": "Cognitive Psychology", + "description": "Study of mental processes including perception, memory, thinking, and problem-solving.", + "credits": 3, + "difficulty_level": "intermediate", + "format": "in_person", + "department": "Psychology", + "major": "Psychology", + "prerequisites": [ + { + "course_code": "PSY015", + "course_title": "Prerequisite Course 15", + "minimum_grade": "B-", + "can_be_concurrent": true + }, + { + "course_code": "PSY003", + "course_title": "Prerequisite Course 3", + "minimum_grade": "C", + "can_be_concurrent": false + } + ], + "schedule": { + "days": [ + "monday", + "wednesday", + "friday" + ], + "start_time": "15:00:00", + "end_time": "15:50:00", + "location": "Technology Center 867" + }, + "semester": "fall", + "year": 2024, + "instructor": "Michael Ramsey", + "max_enrollment": 99, + "current_enrollment": 19, + "tags": [ + "cognitive psychology", + "memory", + "perception" + ], + "learning_objectives": [ + "Understand cognitive processes", + "Study memory systems", + "Analyze problem-solving", + "Explore perception mechanisms" + ], + "created_at": "2025-10-31 00:31:01.975408", + "updated_at": "2025-10-31 00:31:01.975409" + } + ] +} \ No newline at end of file diff --git a/python-recipes/context-engineering/notebooks/section-3-memory-systems-for-context-engineering/01_working_and_longterm_memory.ipynb b/python-recipes/context-engineering/notebooks/section-3-memory-systems-for-context-engineering/01_working_and_longterm_memory.ipynb new file mode 100644 index 00000000..09d5eb25 --- /dev/null +++ b/python-recipes/context-engineering/notebooks/section-3-memory-systems-for-context-engineering/01_working_and_longterm_memory.ipynb @@ -0,0 +1,4259 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "a19be531208b364b", + "metadata": {}, + "source": [ + "![Redis](https://redis.io/wp-content/uploads/2024/04/Logotype.svg?auto=webp&quality=85,75&width=120)\n", + "\n", + "# ๐Ÿง  Working and Long-Term Memory\n", + "\n", + "**โฑ๏ธ Estimated Time:** 45-60 minutes\n", + "\n", + "## ๐ŸŽฏ Learning Objectives\n", + "\n", + "By the end of this notebook, you will:\n", + "\n", + "1. **Understand** why memory is essential for context engineering\n", + "2. **Implement** working memory for conversation continuity\n", + "3. **Use** long-term memory for persistent user knowledge\n", + "4. **Integrate** memory with your Section 2 RAG system\n", + "5. **Build** a complete memory-enhanced course advisor\n", + "\n", + "---\n", + "\n", + "## ๐Ÿ”— Recap\n", + "\n", + "### **Section 1: The Four Context Types**\n", + "\n", + "Recall the four context types from Section 1:\n", + "\n", + "1. **System Context** (Static) - Role, instructions, guidelines\n", + "2. **User Context** (Dynamic, User-Specific) - Profile, preferences, goals\n", + "3. **Conversation Context** (Dynamic, Session-Specific) - **โ† Memory enables this!**\n", + "4. **Retrieved Context** (Dynamic, Query-Specific) - RAG results\n", + "\n", + "### **Section 2: Stateless RAG**\n", + "\n", + "Your Section 2 RAG system was **stateless**:\n", + "\n", + "```python\n", + "async def rag_query(query, student_profile):\n", + " # 1. Search courses (Retrieved Context)\n", + " courses = await course_manager.search_courses(query)\n", + "\n", + " # 2. Assemble context (System + User + Retrieved)\n", + " context = assemble_context(system_prompt, student_profile, courses)\n", + "\n", + " # 3. Generate response\n", + " response = llm.invoke(context)\n", + "\n", + " # โŒ No conversation history stored\n", + " # โŒ Each query is independent\n", + " # โŒ Can't reference previous messages\n", + "```\n", + "\n", + "**The Problem:** Every query starts from scratch. No conversation continuity.\n", + "\n", + "---\n", + "\n", + "## ๐Ÿšจ Why Agents Need Memory: The Grounding Problem\n", + "\n", + "Before diving into implementation, let's understand the fundamental problem that memory solves.\n", + "\n", + "**Grounding** means understanding what users are referring to. Natural conversation is full of references:\n", + "\n", + "### **Without Memory:**\n", + "\n", + "```\n", + "User: \"Tell me about CS401\"\n", + "Agent: \"CS401 is Machine Learning. It covers supervised learning...\"\n", + "\n", + "User: \"What are its prerequisites?\"\n", + "Agent: โŒ \"What does 'it' refer to? Please specify which course.\"\n", + "\n", + "User: \"The course we just discussed!\"\n", + "Agent: โŒ \"I don't have access to previous messages. Which course?\"\n", + "```\n", + "\n", + "**This is a terrible user experience.**\n", + "\n", + "### Types of References That Need Grounding\n", + "\n", + "**Pronouns:**\n", + "- \"it\", \"that course\", \"those\", \"this one\"\n", + "- \"he\", \"she\", \"they\" (referring to people)\n", + "\n", + "**Descriptions:**\n", + "- \"the easy one\", \"the online course\"\n", + "- \"my advisor\", \"that professor\"\n", + "\n", + "**Implicit context:**\n", + "- \"Can I take it?\" โ†’ Take what?\n", + "- \"When does it start?\" โ†’ What starts?\n", + "\n", + "**Temporal references:**\n", + "- \"you mentioned\", \"earlier\", \"last time\"\n", + "\n", + "### **With Memory:**\n", + "\n", + "```\n", + "User: \"Tell me about CS401\"\n", + "Agent: \"CS401 is Machine Learning. It covers...\"\n", + "[Stores: User asked about CS401]\n", + "\n", + "User: \"What are its prerequisites?\"\n", + "Agent: [Checks memory: \"its\" = CS401]\n", + "Agent: โœ… \"CS401 requires CS201 and MATH301\"\n", + "\n", + "User: \"Can I take it?\"\n", + "Agent: [Checks memory: \"it\" = CS401, checks student transcript]\n", + "Agent: โœ… \"You've completed CS201 but still need MATH301\"\n", + "```\n", + "\n", + "**Now the conversation flows naturally!**\n", + "\n", + "---\n", + "\n", + "## ๐Ÿง  Two Types of Memory\n", + "\n", + "### **1. Working Memory (Session-Scoped)**\n", + "\n", + " - **What:** Conversation messages from the current session\n", + " - **Purpose:** Reference resolution, conversation continuity\n", + " - **Lifetime:** Session duration (24 hours TTL by default)\n", + "\n", + "**Example:**\n", + "```\n", + "Session: session_123\n", + "Messages:\n", + " 1. User: \"Tell me about CS401\"\n", + " 2. Agent: \"CS401 is Machine Learning...\"\n", + " 3. User: \"What are its prerequisites?\"\n", + " 4. Agent: \"CS401 requires CS201 and MATH301\"\n", + "```\n", + "\n", + "### **2. Long-term Memory (Cross-Session)**\n", + "\n", + " - **What:** Persistent facts, preferences, goals\n", + " - **Purpose:** Personalization across sessions and applications\n", + " - **Lifetime:** Permanent (until explicitly deleted)\n", + "\n", + "**Example:**\n", + "```\n", + "User: student_sarah\n", + "Memories:\n", + " - \"Prefers online courses over in-person\"\n", + " - \"Major: Computer Science, focus on AI/ML\"\n", + " - \"Goal: Graduate Spring 2026\"\n", + " - \"Completed: CS101, CS201, MATH301\"\n", + "```\n", + "\n", + "### **Comparison: Working vs. Long-term Memory**\n", + "\n", + "| Working Memory | Long-term Memory |\n", + "|----------------|------------------|\n", + "| **Session-scoped** | **User-scoped** |\n", + "| Current conversation | Important facts |\n", + "| TTL-based (expires) | Persistent |\n", + "| Full message history | Extracted knowledge |\n", + "| Loaded/saved each turn | Searched when needed |\n", + "\n", + "---\n", + "\n", + "## ๐Ÿ“ฆ Setup and Environment\n", + "\n", + "Let's set up our environment with the necessary dependencies and connections. We'll build on Section 2's RAG foundation and add memory capabilities.\n", + "\n", + "### โš ๏ธ Prerequisites\n", + "\n", + "**Before running this notebook, make sure you have:**\n", + "\n", + "1. **Docker Desktop running** - Required for Redis and Agent Memory Server\n", + "\n", + "2. **Environment variables** - Create a `.env` file in the `reference-agent` directory:\n", + " ```bash\n", + " # Copy the example file\n", + " cd ../../reference-agent\n", + " cp .env.example .env\n", + "\n", + " # Edit .env and add your OpenAI API key\n", + " # OPENAI_API_KEY=your_actual_openai_api_key_here\n", + " ```\n", + "\n", + "3. **Run the setup script** - This will automatically start Redis and Agent Memory Server:\n", + " ```bash\n", + " cd ../../reference-agent\n", + " python setup_agent_memory_server.py\n", + " ```\n", + "\n", + "**Note:** The setup script will:\n", + "- โœ… Check if Docker is running\n", + "- โœ… Start Redis if not running (port 6379)\n", + "- โœ… Start Agent Memory Server if not running (port 8088)\n", + "- โœ… Verify Redis connection is working\n", + "- โœ… Handle any configuration issues automatically\n", + "\n", + "If the Memory Server is not available, the notebook will skip memory-related demos but will still run.\n" + ] + }, + { + "cell_type": "markdown", + "id": "c8736deb126c3f16", + "metadata": {}, + "source": [ + "---\n" + ] + }, + { + "cell_type": "markdown", + "id": "56268deee3282f75", + "metadata": {}, + "source": [ + "### Automated Setup Check\n", + "\n", + "Let's run the setup script to ensure all services are running properly.\n" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "id": "1e2349a4bfd202d", + "metadata": { + "ExecuteTime": { + "end_time": "2025-10-31T16:01:32.037128Z", + "start_time": "2025-10-31T16:01:31.719782Z" + }, + "execution": { + "iopub.execute_input": "2025-11-01T00:27:01.747495Z", + "iopub.status.busy": "2025-11-01T00:27:01.747367Z", + "iopub.status.idle": "2025-11-01T00:27:02.023497Z", + "shell.execute_reply": "2025-11-01T00:27:02.022996Z" + } + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Running automated setup check...\n", + "\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "๐Ÿ”ง Agent Memory Server Setup\n", + "===========================\n", + "๐Ÿ“Š Checking Redis...\n", + "โœ… Redis is running\n", + "๐Ÿ“Š Checking Agent Memory Server...\n", + "๐Ÿ” Agent Memory Server container exists. Checking health...\n", + "โœ… Agent Memory Server is running and healthy\n", + "โœ… No Redis connection issues detected\n", + "\n", + "โœ… Setup Complete!\n", + "=================\n", + "๐Ÿ“Š Services Status:\n", + " โ€ข Redis: Running on port 6379\n", + " โ€ข Agent Memory Server: Running on port 8088\n", + "\n", + "๐ŸŽฏ You can now run the notebooks!\n", + "\n", + "\n", + "โœ… All services are ready!\n" + ] + } + ], + "source": [ + "# Run the setup script to ensure Redis and Agent Memory Server are running\n", + "import subprocess\n", + "import sys\n", + "from pathlib import Path\n", + "\n", + "# Path to setup script\n", + "setup_script = Path(\"../../reference-agent/setup_agent_memory_server.py\")\n", + "\n", + "if setup_script.exists():\n", + " print(\"Running automated setup check...\\n\")\n", + " result = subprocess.run(\n", + " [sys.executable, str(setup_script)],\n", + " capture_output=True,\n", + " text=True\n", + " )\n", + " print(result.stdout)\n", + " if result.returncode != 0:\n", + " print(\"โš ๏ธ Setup check failed. Please review the output above.\")\n", + " print(result.stderr)\n", + " else:\n", + " print(\"\\nโœ… All services are ready!\")\n", + "else:\n", + " print(\"โš ๏ธ Setup script not found. Please ensure services are running manually.\")\n" + ] + }, + { + "cell_type": "markdown", + "id": "478ea9ac1a2f036", + "metadata": {}, + "source": [ + "---\n" + ] + }, + { + "cell_type": "markdown", + "id": "1fdbc5b7728ae311", + "metadata": {}, + "source": [ + "### Install Dependencies\n", + "\n", + "If you haven't already installed the reference-agent package, uncomment and run the following:\n" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "9a802c8b0c8d69aa", + "metadata": { + "ExecuteTime": { + "end_time": "2025-10-31T16:01:33.407203Z", + "start_time": "2025-10-31T16:01:33.405271Z" + }, + "execution": { + "iopub.execute_input": "2025-11-01T00:27:02.025019Z", + "iopub.status.busy": "2025-11-01T00:27:02.024923Z", + "iopub.status.idle": "2025-11-01T00:27:02.026613Z", + "shell.execute_reply": "2025-11-01T00:27:02.026232Z" + } + }, + "outputs": [], + "source": [ + "# Uncomment to install reference-agent package\n", + "# %pip install -q -e ../../reference-agent\n", + "\n", + "# Uncomment to install agent-memory-client\n", + "# %pip install -q agent-memory-client\n" + ] + }, + { + "cell_type": "markdown", + "id": "8f982dbbdf7348af", + "metadata": {}, + "source": [ + "### Load Environment Variables\n", + "\n", + "We'll load environment variables from the `.env` file in the `reference-agent` directory.\n", + "\n", + "**Required variables:**\n", + "- `OPENAI_API_KEY` - Your OpenAI API key\n", + "- `REDIS_URL` - Redis connection URL (default: redis://localhost:6379)\n", + "- `AGENT_MEMORY_URL` - Agent Memory Server URL (default: http://localhost:8088)\n", + "\n", + "If you haven't created the `.env` file yet, copy `.env.example` and add your OpenAI API key.\n" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "f08b853441918493", + "metadata": { + "ExecuteTime": { + "end_time": "2025-10-31T16:01:33.957278Z", + "start_time": "2025-10-31T16:01:33.952517Z" + }, + "execution": { + "iopub.execute_input": "2025-11-01T00:27:02.027724Z", + "iopub.status.busy": "2025-11-01T00:27:02.027666Z", + "iopub.status.idle": "2025-11-01T00:27:02.032122Z", + "shell.execute_reply": "2025-11-01T00:27:02.031813Z" + } + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "โœ… Environment variables loaded\n", + " REDIS_URL: redis://localhost:6379\n", + " AGENT_MEMORY_URL: http://localhost:8088\n" + ] + } + ], + "source": [ + "import os\n", + "from pathlib import Path\n", + "from dotenv import load_dotenv\n", + "\n", + "# Load environment variables from reference-agent directory\n", + "env_path = Path(\"../../reference-agent/.env\")\n", + "load_dotenv(dotenv_path=env_path)\n", + "\n", + "# Verify required environment variables\n", + "OPENAI_API_KEY = os.getenv(\"OPENAI_API_KEY\")\n", + "REDIS_URL = os.getenv(\"REDIS_URL\", \"redis://localhost:6379\")\n", + "AGENT_MEMORY_URL = os.getenv(\"AGENT_MEMORY_URL\", \"http://localhost:8088\")\n", + "\n", + "if not OPENAI_API_KEY:\n", + " print(f\"\"\"โŒ OPENAI_API_KEY not found!\n", + "\n", + " Please create a .env file at: {env_path.absolute()}\n", + "\n", + " With the following content:\n", + " OPENAI_API_KEY=your_openai_api_key\n", + " REDIS_URL=redis://localhost:6379\n", + " AGENT_MEMORY_URL=http://localhost:8088\n", + " \"\"\")\n", + "else:\n", + " print(\"โœ… Environment variables loaded\")\n", + " print(f\" REDIS_URL: {REDIS_URL}\")\n", + " print(f\" AGENT_MEMORY_URL: {AGENT_MEMORY_URL}\")\n" + ] + }, + { + "cell_type": "markdown", + "id": "62cc9a0e7f524393", + "metadata": {}, + "source": [ + "### Import Core Libraries\n", + "\n", + "We'll import standard Python libraries and async support for our memory operations.\n" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "8d1a43786a58529a", + "metadata": { + "ExecuteTime": { + "end_time": "2025-10-31T16:01:35.497349Z", + "start_time": "2025-10-31T16:01:35.494811Z" + }, + "execution": { + "iopub.execute_input": "2025-11-01T00:27:02.033361Z", + "iopub.status.busy": "2025-11-01T00:27:02.033291Z", + "iopub.status.idle": "2025-11-01T00:27:02.034953Z", + "shell.execute_reply": "2025-11-01T00:27:02.034585Z" + } + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "โœ… Core libraries imported\n" + ] + } + ], + "source": [ + "import asyncio\n", + "from typing import List, Dict, Any, Optional\n", + "from datetime import datetime\n", + "\n", + "print(\"โœ… Core libraries imported\")\n" + ] + }, + { + "cell_type": "markdown", + "id": "6a35f8385b5910f2", + "metadata": {}, + "source": [ + "### Import Section 2 Components\n", + "\n", + "We're building on Section 2's RAG foundation, so we'll reuse the same components:\n", + "- `redis_config` - Redis connection and configuration\n", + "- `CourseManager` - Course search and management\n", + "- `StudentProfile` and other models - Data structures\n" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "5fac5a16ef3467c7", + "metadata": { + "ExecuteTime": { + "end_time": "2025-10-31T16:01:36.260993Z", + "start_time": "2025-10-31T16:01:36.258192Z" + }, + "execution": { + "iopub.execute_input": "2025-11-01T00:27:02.036332Z", + "iopub.status.busy": "2025-11-01T00:27:02.036256Z", + "iopub.status.idle": "2025-11-01T00:27:03.822930Z", + "shell.execute_reply": "2025-11-01T00:27:03.822481Z" + } + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "โœ… Section 2 components imported\n", + " CourseManager: Available\n", + " Redis Config: Available\n", + " Models: Course, StudentProfile, etc.\n" + ] + } + ], + "source": [ + "# Import Section 2 components from reference-agent\n", + "from redis_context_course.redis_config import redis_config\n", + "from redis_context_course.course_manager import CourseManager\n", + "from redis_context_course.models import (\n", + " Course, StudentProfile, DifficultyLevel,\n", + " CourseFormat, Semester\n", + ")\n", + "\n", + "print(\"โœ… Section 2 components imported\")\n", + "print(f\" CourseManager: Available\")\n", + "print(f\" Redis Config: Available\")\n", + "print(f\" Models: Course, StudentProfile, etc.\")\n" + ] + }, + { + "cell_type": "markdown", + "id": "26d596af861c1882", + "metadata": {}, + "source": [ + "### Import LangChain Components\n", + "\n", + "We'll use LangChain for LLM interaction and message handling.\n" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "id": "d001a6a150cd8cc7", + "metadata": { + "ExecuteTime": { + "end_time": "2025-10-31T16:01:37.193910Z", + "start_time": "2025-10-31T16:01:37.190383Z" + }, + "execution": { + "iopub.execute_input": "2025-11-01T00:27:03.824132Z", + "iopub.status.busy": "2025-11-01T00:27:03.824011Z", + "iopub.status.idle": "2025-11-01T00:27:03.825990Z", + "shell.execute_reply": "2025-11-01T00:27:03.825558Z" + } + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "โœ… LangChain components imported\n", + " ChatOpenAI: Available\n", + " Message types: HumanMessage, SystemMessage, AIMessage\n" + ] + } + ], + "source": [ + "from langchain_openai import ChatOpenAI\n", + "from langchain_core.messages import HumanMessage, SystemMessage, AIMessage\n", + "\n", + "print(\"โœ… LangChain components imported\")\n", + "print(f\" ChatOpenAI: Available\")\n", + "print(f\" Message types: HumanMessage, SystemMessage, AIMessage\")\n" + ] + }, + { + "cell_type": "markdown", + "id": "a80d8f9d4a4784a", + "metadata": {}, + "source": [ + "### Import Agent Memory Server Client\n", + "\n", + "The Agent Memory Server provides production-ready memory management. If it's not available, we'll note that and continue with limited functionality.\n" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "id": "5518b93f06209cb2", + "metadata": { + "ExecuteTime": { + "end_time": "2025-10-31T16:01:38.702459Z", + "start_time": "2025-10-31T16:01:38.699416Z" + }, + "execution": { + "iopub.execute_input": "2025-11-01T00:27:03.826940Z", + "iopub.status.busy": "2025-11-01T00:27:03.826877Z", + "iopub.status.idle": "2025-11-01T00:27:03.828773Z", + "shell.execute_reply": "2025-11-01T00:27:03.828433Z" + } + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "โœ… Agent Memory Server client available\n", + " MemoryAPIClient: Ready\n", + " Memory models: WorkingMemory, MemoryMessage, ClientMemoryRecord\n" + ] + } + ], + "source": [ + "# Import Agent Memory Server client\n", + "try:\n", + " from agent_memory_client import MemoryAPIClient, MemoryClientConfig\n", + " from agent_memory_client.models import WorkingMemory, MemoryMessage, ClientMemoryRecord\n", + " MEMORY_SERVER_AVAILABLE = True\n", + " print(\"โœ… Agent Memory Server client available\")\n", + " print(\" MemoryAPIClient: Ready\")\n", + " print(\" Memory models: WorkingMemory, MemoryMessage, ClientMemoryRecord\")\n", + "except ImportError:\n", + " MEMORY_SERVER_AVAILABLE = False\n", + " print(\"โš ๏ธ Agent Memory Server not available\")\n", + " print(\" Install with: pip install agent-memory-client\")\n", + " print(\" Start server: See reference-agent/README.md\")\n", + " print(\" Note: Some demos will be skipped\")\n" + ] + }, + { + "cell_type": "markdown", + "id": "2d78a586f3365b83", + "metadata": {}, + "source": [ + "### What We Just Did\n", + "\n", + "We've successfully set up our environment with all the necessary components:\n", + "\n", + "**Imported:**\n", + "- โœ… Section 2 RAG components (`CourseManager`, `redis_config`, models)\n", + "- โœ… LangChain for LLM interaction\n", + "- โœ… Agent Memory Server client (if available)\n", + "\n", + "**Why This Matters:**\n", + "- Building on Section 2's foundation (not starting from scratch)\n", + "- Agent Memory Server provides scalable, persistent memory\n", + "- Same Redis University domain for consistency\n", + "\n", + "---\n", + "\n", + "## ๐Ÿ”ง Initialize Components\n", + "\n", + "Now let's initialize the components we'll use throughout this notebook.\n" + ] + }, + { + "cell_type": "markdown", + "id": "8c1241314ec6df2f", + "metadata": {}, + "source": [ + "### Initialize Course Manager\n", + "\n", + "The `CourseManager` handles course search and retrieval, just like in Section 2.\n" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "id": "3f0dacdfabc8daae", + "metadata": { + "ExecuteTime": { + "end_time": "2025-10-31T16:01:40.826554Z", + "start_time": "2025-10-31T16:01:40.824362Z" + }, + "execution": { + "iopub.execute_input": "2025-11-01T00:27:03.829745Z", + "iopub.status.busy": "2025-11-01T00:27:03.829684Z", + "iopub.status.idle": "2025-11-01T00:27:03.939741Z", + "shell.execute_reply": "2025-11-01T00:27:03.939312Z" + } + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "20:27:03 redisvl.index.index INFO Index already exists, not overwriting.\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "โœ… Course Manager initialized\n", + " Ready to search and retrieve courses\n" + ] + } + ], + "source": [ + "# Initialize Course Manager\n", + "course_manager = CourseManager()\n", + "\n", + "print(\"โœ… Course Manager initialized\")\n", + "print(\" Ready to search and retrieve courses\")\n" + ] + }, + { + "cell_type": "markdown", + "id": "c6183b28509fb438", + "metadata": {}, + "source": [ + "### Initialize LLM\n", + "\n", + "We'll use GPT-4o with temperature=0.0 for consistent, deterministic responses.\n" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "id": "4a18aede0c3a9d28", + "metadata": { + "ExecuteTime": { + "end_time": "2025-10-31T16:01:41.920811Z", + "start_time": "2025-10-31T16:01:41.918499Z" + }, + "execution": { + "iopub.execute_input": "2025-11-01T00:27:03.940737Z", + "iopub.status.busy": "2025-11-01T00:27:03.940669Z", + "iopub.status.idle": "2025-11-01T00:27:03.952427Z", + "shell.execute_reply": "2025-11-01T00:27:03.951899Z" + } + }, + "outputs": [], + "source": [ + "# Initialize LLM\n", + "llm = ChatOpenAI(model=\"gpt-4o\", temperature=0.0)\n", + "\n" + ] + }, + { + "cell_type": "markdown", + "id": "e20addef07a1c6bd", + "metadata": {}, + "source": [ + "### Initialize Memory Client\n", + "\n", + "If the Agent Memory Server is available, we'll initialize the memory client. This client handles both working memory (conversation history) and long-term memory (persistent facts).\n" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "id": "6540f51278904b66", + "metadata": { + "ExecuteTime": { + "end_time": "2025-10-31T16:01:43.124529Z", + "start_time": "2025-10-31T16:01:43.114843Z" + }, + "execution": { + "iopub.execute_input": "2025-11-01T00:27:03.953875Z", + "iopub.status.busy": "2025-11-01T00:27:03.953794Z", + "iopub.status.idle": "2025-11-01T00:27:03.959558Z", + "shell.execute_reply": "2025-11-01T00:27:03.958963Z" + } + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "โœ… Memory Client initialized\n", + " Base URL: http://localhost:8088\n", + " Namespace: redis_university\n", + " Ready for working memory and long-term memory operations\n" + ] + } + ], + "source": [ + "# Initialize Memory Client\n", + "if MEMORY_SERVER_AVAILABLE:\n", + " config = MemoryClientConfig(\n", + " base_url=AGENT_MEMORY_URL,\n", + " default_namespace=\"redis_university\"\n", + " )\n", + " memory_client = MemoryAPIClient(config=config)\n", + " print(\"โœ… Memory Client initialized\")\n", + " print(f\" Base URL: {config.base_url}\")\n", + " print(f\" Namespace: {config.default_namespace}\")\n", + " print(\" Ready for working memory and long-term memory operations\")\n", + "else:\n", + " memory_client = None\n", + " print(\"โš ๏ธ Memory Server not available\")\n", + " print(\" Running with limited functionality\")\n", + " print(\" Some demos will be skipped\")\n" + ] + }, + { + "cell_type": "markdown", + "id": "1f7d14857491bfe8", + "metadata": {}, + "source": [ + "### Create Sample Student Profile\n", + "\n", + "We'll create a sample student profile to use throughout our demos. This follows the same pattern from Section 2.\n" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "id": "d7accc8e193ee717", + "metadata": { + "ExecuteTime": { + "end_time": "2025-10-31T16:01:44.956173Z", + "start_time": "2025-10-31T16:01:44.952762Z" + }, + "execution": { + "iopub.execute_input": "2025-11-01T00:27:03.961038Z", + "iopub.status.busy": "2025-11-01T00:27:03.960947Z", + "iopub.status.idle": "2025-11-01T00:27:03.963905Z", + "shell.execute_reply": "2025-11-01T00:27:03.963370Z" + } + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "โœ… Student profile created\n", + " Name: Sarah Chen\n", + " Major: Computer Science\n", + " Year: 2\n", + " Interests: machine learning, data science, algorithms\n", + " Completed: CS101, CS201\n", + " Preferred Format: online\n" + ] + } + ], + "source": [ + "# Create sample student profile\n", + "sarah = StudentProfile(\n", + " name=\"Sarah Chen\",\n", + " email=\"sarah.chen@university.edu\",\n", + " major=\"Computer Science\",\n", + " year=2,\n", + " interests=[\"machine learning\", \"data science\", \"algorithms\"],\n", + " completed_courses=[\"CS101\", \"CS201\"],\n", + " current_courses=[\"MATH301\"],\n", + " preferred_format=CourseFormat.ONLINE,\n", + " preferred_difficulty=DifficultyLevel.INTERMEDIATE\n", + ")\n", + "\n", + "print(\"โœ… Student profile created\")\n", + "print(f\" Name: {sarah.name}\")\n", + "print(f\" Major: {sarah.major}\")\n", + "print(f\" Year: {sarah.year}\")\n", + "print(f\" Interests: {', '.join(sarah.interests)}\")\n", + "print(f\" Completed: {', '.join(sarah.completed_courses)}\")\n", + "print(f\" Preferred Format: {sarah.preferred_format.value}\")\n" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "id": "68ba2022815ad2e8", + "metadata": { + "ExecuteTime": { + "end_time": "2025-10-31T16:01:45.601901Z", + "start_time": "2025-10-31T16:01:45.599017Z" + }, + "execution": { + "iopub.execute_input": "2025-11-01T00:27:03.965126Z", + "iopub.status.busy": "2025-11-01T00:27:03.965039Z", + "iopub.status.idle": "2025-11-01T00:27:03.966814Z", + "shell.execute_reply": "2025-11-01T00:27:03.966471Z" + } + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "๐ŸŽฏ INITIALIZATION SUMMARY\n", + "\n", + "โœ… Course Manager: Ready\n", + "โœ… LLM (GPT-4o): Ready\n", + "โœ… Memory Client: Ready\n", + "โœ… Student Profile: Sarah Chen\n" + ] + } + ], + "source": [ + "print(\"๐ŸŽฏ INITIALIZATION SUMMARY\")\n", + "print(f\"\\nโœ… Course Manager: Ready\")\n", + "print(f\"โœ… LLM (GPT-4o): Ready\")\n", + "print(f\"{'โœ…' if MEMORY_SERVER_AVAILABLE else 'โš ๏ธ '} Memory Client: {'Ready' if MEMORY_SERVER_AVAILABLE else 'Not Available'}\")\n", + "print(f\"โœ… Student Profile: {sarah.name}\")\n" + ] + }, + { + "cell_type": "markdown", + "id": "4e8da5b64eb6b5e1", + "metadata": {}, + "source": [ + "### Initialization Done\n", + "๐Ÿ“‹ What We're Building On:\n", + "- Section 2's RAG foundation (CourseManager, redis_config)\n", + "- Same StudentProfile model\n", + "- Same Redis configuration\n", + "\n", + "โœจ What We're Adding:\n", + "- Memory Client for conversation history\n", + "- Working Memory for session context\n", + "- Long-term Memory for persistent knowledge\n" + ] + }, + { + "cell_type": "markdown", + "id": "6bde21130868fd19", + "metadata": {}, + "source": [ + "---\n", + "\n", + "## ๐Ÿ“š Part 1: Working Memory Fundamentals\n", + "\n", + "### **What is Working Memory?**\n", + "\n", + "Working memory stores **conversation messages** for the current session. It enables:\n", + "\n", + "- โœ… **Reference resolution** - \"it\", \"that course\", \"the one you mentioned\"\n", + "- โœ… **Context continuity** - Each message builds on previous messages\n", + "- โœ… **Natural conversations** - Users don't repeat themselves\n", + "\n", + "### **How It Works:**\n", + "\n", + "```\n", + "Turn 1: Load working memory (empty) โ†’ Process query โ†’ Save messages\n", + "Turn 2: Load working memory (1 exchange) โ†’ Process query โ†’ Save messages\n", + "Turn 3: Load working memory (2 exchanges) โ†’ Process query โ†’ Save messages\n", + "```\n", + "\n", + "Each turn has access to all previous messages in the session.\n", + "\n", + "---\n", + "\n", + "## ๐Ÿงช Hands-On: Working Memory in Action\n", + "\n", + "Let's simulate a multi-turn conversation with working memory. We'll break this down step-by-step to see how working memory enables natural conversation flow.\n" + ] + }, + { + "cell_type": "markdown", + "id": "1cc71f00dd15b373", + "metadata": {}, + "source": [ + "### Setup: Create Session and Student IDs\n", + "\n", + "Now that we have our components initialized, let's create session and student identifiers for our working memory demo.\n" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "id": "9359e3bf25eca598", + "metadata": { + "ExecuteTime": { + "end_time": "2025-10-31T16:01:50.077441Z", + "start_time": "2025-10-31T16:01:50.074776Z" + }, + "execution": { + "iopub.execute_input": "2025-11-01T00:27:03.968119Z", + "iopub.status.busy": "2025-11-01T00:27:03.968041Z", + "iopub.status.idle": "2025-11-01T00:27:03.969796Z", + "shell.execute_reply": "2025-11-01T00:27:03.969416Z" + } + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "๐ŸŽฏ Working Memory Demo Setup\n", + " Student ID: sarah.chen\n", + " Session ID: session_sarah.chen_demo\n", + " Ready to demonstrate multi-turn conversation\n" + ] + } + ], + "source": [ + "# Setup for working memory demo\n", + "student_id = sarah.email.split('@')[0] # \"sarah.chen\"\n", + "session_id = f\"session_{student_id}_demo\"\n", + "\n", + "print(\"๐ŸŽฏ Working Memory Demo Setup\")\n", + "print(f\" Student ID: {student_id}\")\n", + "print(f\" Session ID: {session_id}\")\n", + "print(\" Ready to demonstrate multi-turn conversation\")\n" + ] + }, + { + "cell_type": "markdown", + "id": "ea67f3258827c67a", + "metadata": {}, + "source": [ + "### Turn 1: Initial Query\n", + "\n", + "Let's start with a simple query about a course. This is the first turn, so working memory will be empty.\n", + "\n", + "We'll break this down into clear steps:\n", + "1. We will use Memory Server\n", + "2. Load working memory (will be empty on first turn)\n", + "3. Search for the course\n", + "4. Generate a response\n", + "5. Save the conversation to working memory\n" + ] + }, + { + "cell_type": "markdown", + "id": "3af82e6eb4d49750", + "metadata": {}, + "source": [ + "#### Step 1: Set up the user query\n" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "id": "709f9c69669862b0", + "metadata": { + "ExecuteTime": { + "end_time": "2025-10-31T16:07:57.803898Z", + "start_time": "2025-10-31T16:07:57.802105Z" + }, + "execution": { + "iopub.execute_input": "2025-11-01T00:27:03.970891Z", + "iopub.status.busy": "2025-11-01T00:27:03.970824Z", + "iopub.status.idle": "2025-11-01T00:27:03.972546Z", + "shell.execute_reply": "2025-11-01T00:27:03.972275Z" + } + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "================================================================================\n", + "๐Ÿ“ TURN 1: User asks about a course\n", + "================================================================================\n", + "\n", + "๐Ÿ‘ค User: Tell me about Data Structures and Algorithms\n" + ] + } + ], + "source": [ + "# Check if Memory Server is available\n", + "\n", + "print(\"=\" * 80)\n", + "print(\"๐Ÿ“ TURN 1: User asks about a course\")\n", + "print(\"=\" * 80)\n", + "\n", + "# Define the user's query\n", + "turn1_query = \"Tell me about Data Structures and Algorithms\"\n", + "print(f\"\\n๐Ÿ‘ค User: {turn1_query}\")\n" + ] + }, + { + "cell_type": "markdown", + "id": "fca7a35730407f29", + "metadata": {}, + "source": [ + "#### Step 2: Load working memory\n", + "\n", + "On the first turn, working memory will be empty since this is a new session.\n" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "id": "eba535e7baa67844", + "metadata": { + "ExecuteTime": { + "end_time": "2025-10-31T16:07:59.132603Z", + "start_time": "2025-10-31T16:07:59.121297Z" + }, + "execution": { + "iopub.execute_input": "2025-11-01T00:27:03.973714Z", + "iopub.status.busy": "2025-11-01T00:27:03.973646Z", + "iopub.status.idle": "2025-11-01T00:27:03.990291Z", + "shell.execute_reply": "2025-11-01T00:27:03.989931Z" + } + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "20:27:03 httpx INFO HTTP Request: GET http://localhost:8088/v1/working-memory/session_sarah.chen_demo?user_id=sarah.chen&namespace=redis_university&model_name=gpt-4o \"HTTP/1.1 200 OK\"\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "๐Ÿ“Š Working Memory Status:\n", + " Messages in memory: 30\n", + " Status: Has history\n" + ] + } + ], + "source": [ + "# Load working memory (empty for first turn)\n", + "_, turn1_working_memory = await memory_client.get_or_create_working_memory(\n", + " session_id=session_id,\n", + " user_id=student_id,\n", + " model_name=\"gpt-4o\"\n", + ")\n", + "\n", + "print(f\"๐Ÿ“Š Working Memory Status:\")\n", + "print(f\" Messages in memory: {len(turn1_working_memory.messages)}\")\n", + "print(f\" Status: {'Empty (first turn)' if len(turn1_working_memory.messages) == 0 else 'Has history'}\")\n" + ] + }, + { + "cell_type": "markdown", + "id": "66aab8077c35d988", + "metadata": {}, + "source": [ + "#### Step 3: Search for the course\n", + "\n", + "Use the course manager to search for courses matching the query.\n" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "id": "bca2cd06e747dd30", + "metadata": { + "ExecuteTime": { + "end_time": "2025-10-31T16:08:01.776194Z", + "start_time": "2025-10-31T16:08:01.244875Z" + }, + "execution": { + "iopub.execute_input": "2025-11-01T00:27:03.995885Z", + "iopub.status.busy": "2025-11-01T00:27:03.995821Z", + "iopub.status.idle": "2025-11-01T00:27:04.297836Z", + "shell.execute_reply": "2025-11-01T00:27:04.297221Z" + } + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "๐Ÿ” Searching for courses...\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "20:27:04 httpx INFO HTTP Request: POST https://api.openai.com/v1/embeddings \"HTTP/1.1 200 OK\"\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + " Found 1 course(s)\n", + " - CS009: Data Structures and Algorithms\n" + ] + } + ], + "source": [ + "print(f\"\\n๐Ÿ” Searching for courses...\")\n", + "turn1_courses = await course_manager.search_courses(turn1_query, limit=1)\n", + "\n", + "if turn1_courses:\n", + " print(f\" Found {len(turn1_courses)} course(s)\")\n", + "\n", + " # print the course details\n", + " for course in turn1_courses:\n", + " print(f\" - {course.course_code}: {course.title}\")" + ] + }, + { + "cell_type": "markdown", + "id": "3f9bff55ea668e6b", + "metadata": {}, + "source": [ + "#### Step 4: Generate response using LLM\n", + "\n", + "Use the LLM to generate a natural response based on the retrieved course information.\n", + "\n", + "This follows the **RAG pattern**: Retrieve (done in Step 3) โ†’ Augment (add to context) โ†’ Generate (use LLM).\n" + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "id": "a3f1b52618ccea57", + "metadata": { + "ExecuteTime": { + "end_time": "2025-10-31T16:10:51.324011Z", + "start_time": "2025-10-31T16:10:51.321773Z" + }, + "execution": { + "iopub.execute_input": "2025-11-01T00:27:04.299381Z", + "iopub.status.busy": "2025-11-01T00:27:04.299256Z", + "iopub.status.idle": "2025-11-01T00:27:04.301960Z", + "shell.execute_reply": "2025-11-01T00:27:04.301301Z" + } + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + " Course context: Course Information:\n", + "- Code: CS009\n", + "- Title: Data Structures and Algorithms\n", + "- Description: Study of fundamental data structures and algorithms. Arrays, linked lists, trees, graphs, sorting, and searching.\n", + "- Prerequisites: CS001, CS001\n", + "- Credits: 4\n", + "\n" + ] + } + ], + "source": [ + "course = turn1_courses[0]\n", + "\n", + "course_context = f\"\"\"Course Information:\n", + "- Code: {course.course_code}\n", + "- Title: {course.title}\n", + "- Description: {course.description}\n", + "- Prerequisites: {', '.join([p.course_code for p in course.prerequisites]) if course.prerequisites else 'None'}\n", + "- Credits: {course.credits}\n", + "\"\"\"\n", + "\n", + "print(f\" Course context: {course_context}\")" + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "id": "c2cef0a286c2498e", + "metadata": { + "ExecuteTime": { + "end_time": "2025-10-31T16:11:03.157009Z", + "start_time": "2025-10-31T16:10:57.981518Z" + }, + "execution": { + "iopub.execute_input": "2025-11-01T00:27:04.303357Z", + "iopub.status.busy": "2025-11-01T00:27:04.303229Z", + "iopub.status.idle": "2025-11-01T00:27:06.483692Z", + "shell.execute_reply": "2025-11-01T00:27:06.483173Z" + } + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "๐Ÿ’ญ Generating response using LLM...\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "20:27:06 httpx INFO HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "๐Ÿค– Agent: The course \"Data Structures and Algorithms\" (CS009) is a 4-credit course that focuses on the study of fundamental data structures and algorithms. In this course, you will learn about various data structures such as arrays, linked lists, trees, and graphs. Additionally, the course covers essential algorithms related to sorting and searching. \n", + "\n", + "To enroll in this course, you need to have completed the prerequisite course CS001. This foundational knowledge will help you better understand the concepts taught in CS009. The course is designed to provide you with a solid understanding of how data can be organized and manipulated efficiently, which is crucial for solving complex computational problems.\n" + ] + } + ], + "source": [ + "# Build messages for LLM\n", + "turn1_messages = [\n", + " SystemMessage(content=\"You are a helpful course advisor. Answer questions about courses based on the provided information.\"),\n", + " HumanMessage(content=f\"{course_context}\\n\\nUser question: {turn1_query}\")\n", + "]\n", + "\n", + "# Generate response using LLM\n", + "print(f\"\\n๐Ÿ’ญ Generating response using LLM...\")\n", + "turn1_response = llm.invoke(turn1_messages).content\n", + "\n", + "print(f\"\\n๐Ÿค– Agent: {turn1_response}\")" + ] + }, + { + "cell_type": "markdown", + "id": "b7017ac79a9f5b8e", + "metadata": {}, + "source": [ + "#### Step 5: Save to working memory\n", + "\n", + "Add both the user query and assistant response to working memory for future turns.\n" + ] + }, + { + "cell_type": "code", + "execution_count": 20, + "id": "f957e507de0b77ef", + "metadata": { + "ExecuteTime": { + "end_time": "2025-10-31T16:11:06.124034Z", + "start_time": "2025-10-31T16:11:06.113522Z" + }, + "execution": { + "iopub.execute_input": "2025-11-01T00:27:06.485286Z", + "iopub.status.busy": "2025-11-01T00:27:06.485168Z", + "iopub.status.idle": "2025-11-01T00:27:06.498577Z", + "shell.execute_reply": "2025-11-01T00:27:06.498172Z" + } + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "20:27:06 httpx INFO HTTP Request: PUT http://localhost:8088/v1/working-memory/session_sarah.chen_demo?user_id=sarah.chen&model_name=gpt-4o \"HTTP/1.1 200 OK\"\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "โœ… Saved to working memory\n", + " Messages now in memory: 32\n" + ] + } + ], + "source": [ + "if MEMORY_SERVER_AVAILABLE:\n", + " # Add messages to working memory\n", + " turn1_working_memory.messages.extend([\n", + " MemoryMessage(role=\"user\", content=turn1_query),\n", + " MemoryMessage(role=\"assistant\", content=turn1_response)\n", + " ])\n", + "\n", + " # Save to Memory Server\n", + " await memory_client.put_working_memory(\n", + " session_id=session_id,\n", + " memory=turn1_working_memory,\n", + " user_id=student_id,\n", + " model_name=\"gpt-4o\"\n", + " )\n", + "\n", + " print(f\"\\nโœ… Saved to working memory\")\n", + " print(f\" Messages now in memory: {len(turn1_working_memory.messages)}\")\n" + ] + }, + { + "cell_type": "markdown", + "id": "a02ac18016d1bec2", + "metadata": {}, + "source": [ + "### What Just Happened in Turn 1?\n", + "\n", + "**Initial State:**\n", + "- Working memory was empty (first turn)\n", + "- No conversation history available\n", + "\n", + "**Actions (RAG Pattern):**\n", + "1. **Retrieve:** Searched for Data Structures and Algorithms in the course database\n", + "2. **Augment:** Added course information to LLM context\n", + "3. **Generate:** LLM created a natural language response\n", + "4. **Save:** Stored conversation in working memory\n", + "\n", + "**Result:**\n", + "- Working memory now contains 2 messages (1 user, 1 assistant)\n", + "- This history will be available for the next turn\n", + "\n", + "**Key Insight:** Even the first turn uses the LLM to generate natural responses based on retrieved information.\n", + "\n", + "---\n" + ] + }, + { + "cell_type": "markdown", + "id": "afb9cb241d57f6b2", + "metadata": {}, + "source": [ + "### Turn 2: Follow-up with Pronoun Reference\n", + "\n", + "Now let's ask a follow-up question using \"its\" - a pronoun that requires context from Turn 1.\n", + "\n", + "We'll break this down into steps:\n", + "1. Set up the query with pronoun reference\n", + "2. Load working memory (now contains Turn 1)\n", + "3. Build context with conversation history\n", + "4. Generate response using LLM\n", + "5. Save to working memory\n" + ] + }, + { + "cell_type": "markdown", + "id": "9589179c5c3da16", + "metadata": {}, + "source": [ + "#### Step 1: Set up the query\n" + ] + }, + { + "cell_type": "code", + "execution_count": 21, + "id": "afdae986f84bc666", + "metadata": { + "ExecuteTime": { + "end_time": "2025-10-31T16:11:10.864359Z", + "start_time": "2025-10-31T16:11:10.861423Z" + }, + "execution": { + "iopub.execute_input": "2025-11-01T00:27:06.500348Z", + "iopub.status.busy": "2025-11-01T00:27:06.500191Z", + "iopub.status.idle": "2025-11-01T00:27:06.502599Z", + "shell.execute_reply": "2025-11-01T00:27:06.502015Z" + } + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "================================================================================\n", + "๐Ÿ“ TURN 2: User uses pronoun reference ('its')\n", + "================================================================================\n", + "\n", + "๐Ÿ‘ค User: What are its prerequisites?\n", + " Note: 'its' refers to Data Structures and Algorithms from Turn 1\n" + ] + } + ], + "source": [ + "if MEMORY_SERVER_AVAILABLE:\n", + " print(\"\\n\" + \"=\" * 80)\n", + " print(\"๐Ÿ“ TURN 2: User uses pronoun reference ('its')\")\n", + " print(\"=\" * 80)\n", + "\n", + " turn2_query = \"What are its prerequisites?\"\n", + " print(f\"\\n๐Ÿ‘ค User: {turn2_query}\")\n", + " print(f\" Note: 'its' refers to Data Structures and Algorithms from Turn 1\")\n" + ] + }, + { + "cell_type": "markdown", + "id": "4b48f20026071368", + "metadata": {}, + "source": [ + "#### Step 2: Load working memory\n", + "\n", + "This time, working memory will contain the conversation from Turn 1.\n" + ] + }, + { + "cell_type": "code", + "execution_count": 22, + "id": "a979bc4af565ffc8", + "metadata": { + "ExecuteTime": { + "end_time": "2025-10-31T16:11:12.939612Z", + "start_time": "2025-10-31T16:11:12.929347Z" + }, + "execution": { + "iopub.execute_input": "2025-11-01T00:27:06.504083Z", + "iopub.status.busy": "2025-11-01T00:27:06.503981Z", + "iopub.status.idle": "2025-11-01T00:27:06.510837Z", + "shell.execute_reply": "2025-11-01T00:27:06.510331Z" + } + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "20:27:06 httpx INFO HTTP Request: GET http://localhost:8088/v1/working-memory/session_sarah.chen_demo?user_id=sarah.chen&namespace=redis_university&model_name=gpt-4o \"HTTP/1.1 200 OK\"\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "๐Ÿ“Š Working Memory Status:\n", + " Messages in memory: 32\n", + " Contains: Turn 1 conversation\n" + ] + } + ], + "source": [ + "if MEMORY_SERVER_AVAILABLE:\n", + " # Load working memory (now has 1 exchange from Turn 1)\n", + " _, turn2_working_memory = await memory_client.get_or_create_working_memory(\n", + " session_id=session_id,\n", + " user_id=student_id,\n", + " model_name=\"gpt-4o\"\n", + " )\n", + "\n", + " print(f\"\\n๐Ÿ“Š Working Memory Status:\")\n", + " print(f\" Messages in memory: {len(turn2_working_memory.messages)}\")\n", + " print(f\" Contains: Turn 1 conversation\")\n" + ] + }, + { + "cell_type": "markdown", + "id": "76554aaeb0e3cbbe", + "metadata": {}, + "source": [ + "#### Step 3: Build context with conversation history\n", + "\n", + "To resolve the pronoun \"its\", we need to include the conversation history in the LLM context.\n" + ] + }, + { + "cell_type": "code", + "execution_count": 23, + "id": "bfb4ec94f0f8ac26", + "metadata": { + "ExecuteTime": { + "end_time": "2025-10-31T16:11:14.247764Z", + "start_time": "2025-10-31T16:11:14.244686Z" + }, + "execution": { + "iopub.execute_input": "2025-11-01T00:27:06.512350Z", + "iopub.status.busy": "2025-11-01T00:27:06.512252Z", + "iopub.status.idle": "2025-11-01T00:27:06.514669Z", + "shell.execute_reply": "2025-11-01T00:27:06.514319Z" + } + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "๐Ÿ”ง Building context with conversation history...\n", + " Total messages in context: 34\n", + " Includes: System prompt + Turn 1 history + current query\n" + ] + } + ], + "source": [ + "if MEMORY_SERVER_AVAILABLE:\n", + " print(f\"\\n๐Ÿ”ง Building context with conversation history...\")\n", + "\n", + " # Start with system message\n", + " turn2_messages = [\n", + " SystemMessage(content=\"You are a helpful course advisor. Use conversation history to resolve references like 'it', 'that course', etc.\")\n", + " ]\n", + "\n", + " # Add conversation history from working memory\n", + " for msg in turn2_working_memory.messages:\n", + " if msg.role == \"user\":\n", + " turn2_messages.append(HumanMessage(content=msg.content))\n", + " elif msg.role == \"assistant\":\n", + " turn2_messages.append(AIMessage(content=msg.content))\n", + "\n", + " # Add current query\n", + " turn2_messages.append(HumanMessage(content=turn2_query))\n", + "\n", + " print(f\" Total messages in context: {len(turn2_messages)}\")\n", + " print(f\" Includes: System prompt + Turn 1 history + current query\")\n" + ] + }, + { + "cell_type": "markdown", + "id": "a4cc54a84997e055", + "metadata": {}, + "source": [ + "#### Step 4: Generate response using LLM\n", + "\n", + "The LLM can now resolve \"its\" by looking at the conversation history.\n" + ] + }, + { + "cell_type": "code", + "execution_count": 24, + "id": "a086f086fa37da80", + "metadata": { + "ExecuteTime": { + "end_time": "2025-10-31T16:11:18.369099Z", + "start_time": "2025-10-31T16:11:16.670757Z" + }, + "execution": { + "iopub.execute_input": "2025-11-01T00:27:06.516010Z", + "iopub.status.busy": "2025-11-01T00:27:06.515911Z", + "iopub.status.idle": "2025-11-01T00:27:07.373264Z", + "shell.execute_reply": "2025-11-01T00:27:07.372268Z" + } + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "๐Ÿ’ญ LLM resolving 'its' using conversation history...\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "20:27:07 httpx INFO HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "๐Ÿค– Agent: The prerequisite for the \"Data Structures and Algorithms\" course (CS009) is CS001. You need to have completed CS001 to enroll in CS009, as it provides the foundational knowledge necessary for understanding the more advanced concepts covered in the course.\n" + ] + } + ], + "source": [ + "if MEMORY_SERVER_AVAILABLE:\n", + " print(f\"\\n๐Ÿ’ญ LLM resolving 'its' using conversation history...\")\n", + " turn2_response = llm.invoke(turn2_messages).content\n", + "\n", + " print(f\"\\n๐Ÿค– Agent: {turn2_response}\")\n" + ] + }, + { + "cell_type": "markdown", + "id": "f186107902cd150a", + "metadata": {}, + "source": [ + "#### Step 5: Save to working memory\n", + "\n", + "Add this turn's conversation to working memory for future turns.\n" + ] + }, + { + "cell_type": "code", + "execution_count": 25, + "id": "c68fbf3ce5198b43", + "metadata": { + "ExecuteTime": { + "end_time": "2025-10-31T16:11:30.487163Z", + "start_time": "2025-10-31T16:11:30.475678Z" + }, + "execution": { + "iopub.execute_input": "2025-11-01T00:27:07.375831Z", + "iopub.status.busy": "2025-11-01T00:27:07.375624Z", + "iopub.status.idle": "2025-11-01T00:27:07.391483Z", + "shell.execute_reply": "2025-11-01T00:27:07.390499Z" + } + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "20:27:07 httpx INFO HTTP Request: PUT http://localhost:8088/v1/working-memory/session_sarah.chen_demo?user_id=sarah.chen&model_name=gpt-4o \"HTTP/1.1 200 OK\"\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "โœ… Saved to working memory\n", + " Messages now in memory: 34\n" + ] + } + ], + "source": [ + "if MEMORY_SERVER_AVAILABLE:\n", + " # Add messages to working memory\n", + " turn2_working_memory.messages.extend([\n", + " MemoryMessage(role=\"user\", content=turn2_query),\n", + " MemoryMessage(role=\"assistant\", content=turn2_response)\n", + " ])\n", + "\n", + " # Save to Memory Server\n", + " await memory_client.put_working_memory(\n", + " session_id=session_id,\n", + " memory=turn2_working_memory,\n", + " user_id=student_id,\n", + " model_name=\"gpt-4o\"\n", + " )\n", + "\n", + " print(f\"\\nโœ… Saved to working memory\")\n", + " print(f\" Messages now in memory: {len(turn2_working_memory.messages)}\")\n" + ] + }, + { + "cell_type": "markdown", + "id": "f326d23a6ee980b3", + "metadata": {}, + "source": [ + "### What Just Happened in Turn 2?\n", + "\n", + "**Initial State:**\n", + "- Working memory contained Turn 1 conversation (2 messages)\n", + "- User asked about \"its prerequisites\" - pronoun reference\n", + "\n", + "**Actions:**\n", + "1. Loaded working memory with Turn 1 history\n", + "2. Built context including conversation history\n", + "3. LLM resolved \"its\" โ†’ Data Structures and Algorithms (from Turn 1)\n", + "4. Generated response about Data Structures and Algorithms's prerequisites\n", + "5. Saved updated conversation to working memory\n", + "\n", + "**Result:**\n", + "- Working memory now contains 4 messages (2 exchanges)\n", + "- LLM successfully resolved pronoun reference using conversation history\n", + "- Natural conversation flow maintained\n", + "\n", + "**Key Insight:** Without working memory, the LLM wouldn't know what \"its\" refers to!\n", + "\n", + "---\n" + ] + }, + { + "cell_type": "markdown", + "id": "be825d46a5c61955", + "metadata": {}, + "source": [ + "### Turn 3: Another Follow-up\n", + "\n", + "Let's ask one more follow-up question to demonstrate continued conversation continuity.\n" + ] + }, + { + "cell_type": "markdown", + "id": "8fd74fd54662fd1f", + "metadata": {}, + "source": [ + "#### Step 1: Set up the query\n" + ] + }, + { + "cell_type": "code", + "execution_count": 26, + "id": "208fd300637bb36a", + "metadata": { + "ExecuteTime": { + "end_time": "2025-10-31T16:12:49.572832Z", + "start_time": "2025-10-31T16:12:49.571009Z" + }, + "execution": { + "iopub.execute_input": "2025-11-01T00:27:07.393475Z", + "iopub.status.busy": "2025-11-01T00:27:07.393344Z", + "iopub.status.idle": "2025-11-01T00:27:07.396091Z", + "shell.execute_reply": "2025-11-01T00:27:07.395590Z" + } + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "================================================================================\n", + "๐Ÿ“ TURN 3: User asks another follow-up\n", + "================================================================================\n", + "\n", + "๐Ÿ‘ค User: Can I take it next semester?\n", + " Note: 'it' refers to Data Structures and Algorithms from Turn 1\n" + ] + } + ], + "source": [ + "if MEMORY_SERVER_AVAILABLE:\n", + " print(\"\\n\" + \"=\" * 80)\n", + " print(\"๐Ÿ“ TURN 3: User asks another follow-up\")\n", + " print(\"=\" * 80)\n", + "\n", + " turn3_query = \"Can I take it next semester?\"\n", + " print(f\"\\n๐Ÿ‘ค User: {turn3_query}\")\n", + " print(f\" Note: 'it' refers to Data Structures and Algorithms from Turn 1\")\n" + ] + }, + { + "cell_type": "markdown", + "id": "86331ac55a6ecde2", + "metadata": {}, + "source": [ + "#### Step 2: Load working memory with full conversation history\n" + ] + }, + { + "cell_type": "code", + "execution_count": 27, + "id": "2e44ceccb6c97653", + "metadata": { + "ExecuteTime": { + "end_time": "2025-10-31T16:12:55.090836Z", + "start_time": "2025-10-31T16:12:55.080957Z" + }, + "execution": { + "iopub.execute_input": "2025-11-01T00:27:07.397918Z", + "iopub.status.busy": "2025-11-01T00:27:07.397777Z", + "iopub.status.idle": "2025-11-01T00:27:07.406553Z", + "shell.execute_reply": "2025-11-01T00:27:07.406020Z" + } + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "20:27:07 httpx INFO HTTP Request: GET http://localhost:8088/v1/working-memory/session_sarah.chen_demo?user_id=sarah.chen&namespace=redis_university&model_name=gpt-4o \"HTTP/1.1 200 OK\"\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "๐Ÿ“Š Working Memory Status:\n", + " Messages in memory: 34\n", + " Contains: Turns 1 and 2\n" + ] + } + ], + "source": [ + "if MEMORY_SERVER_AVAILABLE:\n", + " # Load working memory (now has 2 exchanges)\n", + " _, turn3_working_memory = await memory_client.get_or_create_working_memory(\n", + " session_id=session_id,\n", + " user_id=student_id,\n", + " model_name=\"gpt-4o\"\n", + " )\n", + "\n", + " print(f\"\\n๐Ÿ“Š Working Memory Status:\")\n", + " print(f\" Messages in memory: {len(turn3_working_memory.messages)}\")\n", + " print(f\" Contains: Turns 1 and 2\")\n" + ] + }, + { + "cell_type": "markdown", + "id": "a282014d4ae67ba8", + "metadata": {}, + "source": [ + "#### Step 3: Build context and generate response\n" + ] + }, + { + "cell_type": "code", + "execution_count": 28, + "id": "5e1b23372c5c1b00", + "metadata": { + "ExecuteTime": { + "end_time": "2025-10-31T16:13:14.678278Z", + "start_time": "2025-10-31T16:13:12.680180Z" + }, + "execution": { + "iopub.execute_input": "2025-11-01T00:27:07.408150Z", + "iopub.status.busy": "2025-11-01T00:27:07.408003Z", + "iopub.status.idle": "2025-11-01T00:27:09.180481Z", + "shell.execute_reply": "2025-11-01T00:27:09.179896Z" + } + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + " Total messages in context: 36\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "20:27:09 httpx INFO HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "๐Ÿค– Agent: To determine if you can take the \"Data Structures and Algorithms\" course (CS009) next semester, you'll need to check the course schedule for the upcoming semester at your institution. Ensure that you have completed the prerequisite course, CS001, before enrolling. If you meet the prerequisite and the course is offered, you should be able to register for it. It's always a good idea to consult with your academic advisor to confirm your eligibility and to help with planning your course schedule.\n" + ] + } + ], + "source": [ + "if MEMORY_SERVER_AVAILABLE:\n", + " # Build context with full conversation history\n", + " turn3_messages = [\n", + " SystemMessage(content=\"You are a helpful course advisor. Use conversation history to resolve references.\")\n", + " ]\n", + "\n", + " for msg in turn3_working_memory.messages:\n", + " if msg.role == \"user\":\n", + " turn3_messages.append(HumanMessage(content=msg.content))\n", + " elif msg.role == \"assistant\":\n", + " turn3_messages.append(AIMessage(content=msg.content))\n", + "\n", + " turn3_messages.append(HumanMessage(content=turn3_query))\n", + "\n", + " print(f\" Total messages in context: {len(turn3_messages)}\")\n", + "\n", + " # Generate response\n", + " turn3_response = llm.invoke(turn3_messages).content\n", + "\n", + " print(f\"\\n๐Ÿค– Agent: {turn3_response}\")\n" + ] + }, + { + "cell_type": "markdown", + "id": "5661b86d35e4f97d", + "metadata": {}, + "source": [ + "\n", + "\n", + "โœ… DEMO COMPLETE: Working memory enabled natural conversation flow!\n", + "\n", + "---\n", + "### Working Memory Demo Summary\n", + "\n", + "Let's review what we just demonstrated across three conversation turns.\n", + "\n", + "## ๐ŸŽฏ Working Memory Demo Summary\n", + "### ๐Ÿ“Š What Happened:\n", + "**Turn 1:** 'Tell me about Data Structures and Algorithms'\n", + "- Working memory: empty (first turn)\n", + "- Stored query and response\n", + "\n", + "**Turn 2:** 'What are its prerequisites?'\n", + "- Working memory: 1 exchange (Turn 1)\n", + "- LLM resolved 'its' โ†’ Data Structures and Algorithms using history\n", + "- Generated accurate response\n", + "\n", + "**Turn 3:** 'Can I take it next semester?'\n", + "- Working memory: 2 exchanges (Turns 1-2)\n", + "- LLM resolved 'it' โ†’ Data Structures and Algorithms using history\n", + "- Maintained conversation continuity\n", + "\n", + "#### โœ… Key Benefits:\n", + "- Natural conversation flow\n", + "- Pronoun reference resolution\n", + "- No need to repeat context\n", + "- Seamless user experience\n", + "\n", + "#### โŒ Without Working Memory:\n", + "- 'What are its prerequisites?' โ†’ 'What is its?' Or \"General information without data from the LLM's training\"\n", + "- Each query is isolated\n", + "- User must repeat context every time\n", + "\n", + "### Key Insight: Conversation Context Type\n", + "\n", + "Working memory provides the **Conversation Context** - the third context type from Section 1:\n", + "\n", + "1. **System Context** - Role and instructions (static)\n", + "2. **User Context** - Profile and preferences (dynamic, user-specific)\n", + "3. **Conversation Context** - Working memory (dynamic, session-specific) โ† **We just demonstrated this!**\n", + "4. **Retrieved Context** - RAG results (dynamic, query-specific)\n", + "\n", + "Without working memory, we only had 3 context types. Now we have all 4!\n" + ] + }, + { + "cell_type": "markdown", + "id": "bd2a4b8f-ba91-49d0-8f24-ad49acb0eadb", + "metadata": {}, + "source": [ + "---\n", + "# ๐Ÿ“š Part 2: Long-term Memory for Context Engineering\n", + "\n", + "## What is Long-term Memory?\n", + "\n", + "Long-term memory enables AI agents to store **persistent facts, preferences, and goals** across sessions. This is crucial for context engineering because it allows agents to:\n", + "\n", + "- **Personalize** interactions by remembering user preferences\n", + "- **Accumulate knowledge** about users over time\n", + "- **Maintain continuity** across multiple conversations\n", + "- **Search efficiently** using semantic vector search\n", + "\n", + "### How It Works\n", + "\n", + "```\n", + "Session 1: User shares preferences โ†’ Store in long-term memory\n", + "Session 2: User asks for recommendations โ†’ Search memory โ†’ Personalized response\n", + "Session 3: User updates preferences โ†’ Update memory accordingly\n", + "```\n", + "\n", + "---\n", + "\n", + "## Three Types of Long-term Memory\n", + "\n", + "The Agent Memory Server supports three distinct memory types, each optimized for different kinds of information:\n", + "\n", + "### 1. Semantic Memory - Facts and Knowledge\n", + "\n", + "**Purpose:** Store timeless facts, preferences, and knowledge independent of when they were learned.\n", + "\n", + "**Examples:**\n", + "- \"Student's major is Computer Science\"\n", + "- \"Student prefers online courses\"\n", + "- \"Student wants to graduate in Spring 2026\"\n", + "- \"Student is interested in machine learning\"\n", + "\n", + "**When to use:** Information that remains true regardless of time context.\n", + "\n", + "---\n", + "\n", + "### 2. Episodic Memory - Events and Experiences\n", + "\n", + "**Purpose:** Store time-bound events and experiences where sequence matters.\n", + "\n", + "**Examples:**\n", + "- \"Student enrolled in CS101 on 2024-09-15\"\n", + "- \"Student completed CS101 with grade A on 2024-12-10\"\n", + "- \"Student asked about machine learning courses on 2024-09-20\"\n", + "\n", + "**When to use:** Timeline-based information where timing or sequence is important.\n", + "\n", + "---\n", + "\n", + "### 3. Message Memory - Context-Rich Conversations\n", + "\n", + "**Purpose:** Store full conversation snippets where complete context is crucial.\n", + "\n", + "**Examples:**\n", + "- Detailed career planning discussion with nuanced advice\n", + "- Professor's specific guidance about research opportunities\n", + "- Student's explanation of personal learning challenges\n", + "\n", + "**When to use:** When summary would lose important nuance, tone, or exact wording.\n", + "\n", + "**โš ๏ธ Use sparingly** - Message memories are token-expensive!\n", + "\n", + "---\n", + "\n", + "## ๐ŸŽฏ Choosing the Right Memory Type\n", + "\n", + "### Decision Framework\n", + "\n", + "**Ask yourself these questions:**\n", + "\n", + "1. **Can you extract a simple fact?** โ†’ Use **Semantic**\n", + "2. **Does timing matter?** โ†’ Use **Episodic**\n", + "3. **Is full context crucial?** โ†’ Use **Message** (rarely)\n", + "\n", + "**Default strategy: Prefer Semantic** - they're compact, searchable, and efficient.\n", + "\n", + "---\n", + "\n", + "### Quick Reference Table\n", + "\n", + "| Information Type | Memory Type | Example |\n", + "|-----------------|-------------|----------|\n", + "| Preference | Semantic | \"Prefers morning classes\" |\n", + "| Fact | Semantic | \"Major is Computer Science\" |\n", + "| Goal | Semantic | \"Wants to graduate in 2026\" |\n", + "| Event | Episodic | \"Enrolled in CS401 on 2024-09-15\" |\n", + "| Timeline | Episodic | \"Completed CS101, then CS201\" |\n", + "| Complex discussion | Message | [Full career planning conversation] |\n", + "| Nuanced advice | Message | [Professor's detailed guidance] |\n", + "\n", + "---\n", + "\n", + "## Examples: Right vs. Wrong Choices\n", + "\n", + "### Scenario 1: Student States Preference\n", + "\n", + "**User says:** \"I prefer online courses because I work during the day.\"\n", + "\n", + "โŒ **Wrong - Message memory (too verbose):**\n", + "```python\n", + "memory = \"Student said: 'I prefer online courses because I work during the day.'\"\n", + "```\n", + "\n", + "โœ… **Right - Semantic memories (extracted facts):**\n", + "```python\n", + "memory1 = \"Student prefers online courses\"\n", + "memory2 = \"Student works during the day\"\n", + "```\n", + "\n", + "**Why:** Simple facts don't need verbatim storage.\n", + "\n", + "---\n", + "\n", + "### Scenario 2: Course Completion\n", + "\n", + "**User says:** \"I just finished CS101 last week!\"\n", + "\n", + "โŒ **Wrong - Semantic (loses temporal context):**\n", + "```python\n", + "memory = \"Student completed CS101\"\n", + "```\n", + "\n", + "โœ… **Right - Episodic (preserves timeline):**\n", + "```python\n", + "memory = \"Student completed CS101 on 2024-10-20\"\n", + "```\n", + "\n", + "**Why:** Timeline matters for prerequisites and future planning.\n", + "\n", + "---\n", + "\n", + "### Scenario 3: Complex Career Advice\n", + "\n", + "**Context:** 20-message discussion about career path including nuanced advice about research vs. industry, application timing, and specific companies to target.\n", + "\n", + "โŒ **Wrong - Semantic (loses too much context):**\n", + "```python\n", + "memory = \"Student discussed career planning\"\n", + "```\n", + "\n", + "โœ… **Right - Message memory (preserves full context):**\n", + "```python\n", + "memory = [Full conversation thread with all nuance]\n", + "```\n", + "\n", + "**Why:** Details and context are critical; summary would be inadequate.\n", + "\n", + "---\n", + "\n", + "## Key Takeaways\n", + "\n", + "- **Most memories should be semantic** - efficient and searchable\n", + "- **Use episodic when sequence matters** - track progress and timeline\n", + "- **Use message rarely** - only when context cannot be summarized\n", + "- **Effective memory selection improves personalization** and reduces token usage\n", + "\n", + "---\n", + "\n", + "## ๐Ÿงช Hands-On: Long-term Memory in Action\n", + "\n", + "Let's put these concepts into practice with code examples..." + ] + }, + { + "cell_type": "markdown", + "id": "6211363411414ffa", + "metadata": {}, + "source": [ + "### Setup: Student ID for Long-term Memory\n", + "\n", + "Long-term memories are user-scoped, so we need a student ID.\n" + ] + }, + { + "cell_type": "code", + "execution_count": 29, + "id": "d50c55afc8fc7de3", + "metadata": { + "execution": { + "iopub.execute_input": "2025-11-01T00:27:09.182157Z", + "iopub.status.busy": "2025-11-01T00:27:09.182059Z", + "iopub.status.idle": "2025-11-01T00:27:09.184099Z", + "shell.execute_reply": "2025-11-01T00:27:09.183662Z" + } + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "๐ŸŽฏ Long-term Memory Demo Setup\n", + " Student ID: sarah_chen\n", + " Ready to store and search persistent memories\n" + ] + } + ], + "source": [ + "# Setup for long-term memory demo\n", + "lt_student_id = \"sarah_chen\"\n", + "\n", + "print(\"๐ŸŽฏ Long-term Memory Demo Setup\")\n", + "print(f\" Student ID: {lt_student_id}\")\n", + "print(\" Ready to store and search persistent memories\")\n" + ] + }, + { + "cell_type": "markdown", + "id": "3f726e5d5efa27d7", + "metadata": {}, + "source": [ + "### Step 1: Store Semantic Memories (Facts)\n", + "\n", + "Semantic memories are timeless facts about the student. Let's store several facts about Sarah's preferences and academic status.\n" + ] + }, + { + "cell_type": "code", + "execution_count": 30, + "id": "1a1e9048102a2a1d", + "metadata": { + "execution": { + "iopub.execute_input": "2025-11-01T00:27:09.185253Z", + "iopub.status.busy": "2025-11-01T00:27:09.185157Z", + "iopub.status.idle": "2025-11-01T00:27:09.195339Z", + "shell.execute_reply": "2025-11-01T00:27:09.195046Z" + } + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "================================================================================\n", + "๐Ÿ“ STEP 1: Storing Semantic Memories (Facts)\n", + "================================================================================\n", + "\n", + "๐Ÿ“ Storing 6 semantic memories...\n", + "20:27:09 httpx INFO HTTP Request: POST http://localhost:8088/v1/long-term-memory/ \"HTTP/1.1 200 OK\"\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + " โœ… Student is currently taking Linear Algebra\n", + "\n", + "โœ… Stored 6 semantic memories\n", + " Memory type: semantic (timeless facts)\n", + " Topics: preferences, academic_info\n" + ] + } + ], + "source": [ + "print(\"=\" * 80)\n", + "print(\"๐Ÿ“ STEP 1: Storing Semantic Memories (Facts)\")\n", + "print(\"=\" * 80)\n", + "\n", + "# Define semantic memories (timeless facts)\n", + "semantic_memories = [\n", + " \"Student prefers online courses over in-person classes\",\n", + " \"Student's major is Computer Science with focus on AI/ML\",\n", + " \"Student wants to graduate in Spring 2026\",\n", + " \"Student prefers morning classes, no classes on Fridays\",\n", + " \"Student has completed Introduction to Programming and Data Structures\",\n", + " \"Student is currently taking Linear Algebra\"\n", + "]\n", + "print(f\"\\n๐Ÿ“ Storing {len(semantic_memories)} semantic memories...\")\n", + "\n", + "# Store each semantic memory\n", + "for memory_text in semantic_memories:\n", + " memory_record = ClientMemoryRecord(\n", + " text=memory_text,\n", + " user_id=lt_student_id,\n", + " memory_type=\"semantic\",\n", + " topics=[\"preferences\", \"academic_info\"]\n", + " )\n", + "await memory_client.create_long_term_memory([memory_record])\n", + "print(f\" โœ… {memory_text}\")\n", + "\n", + "print(f\"\\nโœ… Stored {len(semantic_memories)} semantic memories\")\n", + "print(\" Memory type: semantic (timeless facts)\")\n", + "print(\" Topics: preferences, academic_info\")" + ] + }, + { + "cell_type": "markdown", + "id": "b9e842c9e4ece988", + "metadata": {}, + "source": [ + "### What We Just Did: Semantic Memories\n", + "\n", + "**Stored 6 semantic memories:**\n", + "- Student preferences (online courses, morning classes)\n", + "- Academic information (major, graduation date)\n", + "- Course history (completed, current)\n", + "\n", + "**Why semantic?**\n", + "- These are timeless facts\n", + "- No specific date/time context needed\n", + "- Compact and efficient\n", + "\n", + "**How they're stored:**\n", + "- Vector-indexed for semantic search\n", + "- Tagged with topics for organization\n", + "- Automatically deduplicated\n", + "\n", + "---\n" + ] + }, + { + "cell_type": "markdown", + "id": "5ac56855543c88db", + "metadata": {}, + "source": [ + "### Step 2: Store Episodic Memories (Events)\n", + "\n", + "Episodic memories are time-bound events. Let's store some events from Sarah's academic timeline.\n" + ] + }, + { + "cell_type": "code", + "execution_count": 31, + "id": "a447e552d130793d", + "metadata": { + "execution": { + "iopub.execute_input": "2025-11-01T00:27:09.196669Z", + "iopub.status.busy": "2025-11-01T00:27:09.196596Z", + "iopub.status.idle": "2025-11-01T00:27:09.205846Z", + "shell.execute_reply": "2025-11-01T00:27:09.205095Z" + } + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "================================================================================\n", + "๐Ÿ“ STEP 2: Storing Episodic Memories (Events)\n", + "================================================================================\n", + "\n", + "๐Ÿ“ Storing 3 episodic memories...\n", + "20:27:09 httpx INFO HTTP Request: POST http://localhost:8088/v1/long-term-memory/ \"HTTP/1.1 200 OK\"\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + " โœ… Student enrolled in Introduction to Programming on 2024-09-01\n", + "20:27:09 httpx INFO HTTP Request: POST http://localhost:8088/v1/long-term-memory/ \"HTTP/1.1 200 OK\"\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + " โœ… Student completed Introduction to Programming with grade A on 2024-12-15\n", + "20:27:09 httpx INFO HTTP Request: POST http://localhost:8088/v1/long-term-memory/ \"HTTP/1.1 200 OK\"\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + " โœ… Student asked about machine learning courses on 2024-09-20\n", + "\n", + "โœ… Stored 3 episodic memories\n", + " Memory type: episodic (time-bound events)\n", + " Topics: enrollment, courses\n" + ] + } + ], + "source": [ + "\n", + "print(\"\\n\" + \"=\" * 80)\n", + "print(\"๐Ÿ“ STEP 2: Storing Episodic Memories (Events)\")\n", + "print(\"=\" * 80)\n", + "\n", + "# Define episodic memories (time-bound events)\n", + "episodic_memories = [\n", + " \"Student enrolled in Introduction to Programming on 2024-09-01\",\n", + " \"Student completed Introduction to Programming with grade A on 2024-12-15\",\n", + " \"Student asked about machine learning courses on 2024-09-20\"\n", + "]\n", + "\n", + "print(f\"\\n๐Ÿ“ Storing {len(episodic_memories)} episodic memories...\")\n", + "\n", + "# Store each episodic memory\n", + "for memory_text in episodic_memories:\n", + " memory_record = ClientMemoryRecord(\n", + " text=memory_text,\n", + " user_id=lt_student_id,\n", + " memory_type=\"episodic\",\n", + " topics=[\"enrollment\", \"courses\"]\n", + " )\n", + " await memory_client.create_long_term_memory([memory_record])\n", + " print(f\" โœ… {memory_text}\")\n", + "\n", + "print(f\"\\nโœ… Stored {len(episodic_memories)} episodic memories\")\n", + "print(\" Memory type: episodic (time-bound events)\")\n", + "print(\" Topics: enrollment, courses\")" + ] + }, + { + "cell_type": "markdown", + "id": "6b98104958320ca2", + "metadata": {}, + "source": [ + "### What We Just Did: Episodic Memories\n", + "\n", + "**Stored 3 episodic memories:**\n", + "- Enrollment event (Introduction to Programming on 2024-09-01)\n", + "- Completion event (Introduction to Programming with grade A on 2024-12-15)\n", + "- Interaction event (asked about ML courses on 2024-09-20)\n", + "\n", + "**Why episodic?**\n", + "- These are time-bound events\n", + "- Timing and sequence matter\n", + "- Captures academic timeline\n", + "\n", + "**Difference from semantic:**\n", + "- Semantic: \"Student has completed Introduction to Programming\" (timeless fact)\n", + "- Episodic: \"Student completed Introduction to Programming with grade A on 2024-12-15\" (specific event)\n", + "\n", + "---\n" + ] + }, + { + "cell_type": "markdown", + "id": "729b8ebf272c96a", + "metadata": {}, + "source": [ + "### Step 3: Search Long-term Memory\n", + "\n", + "Now let's search our long-term memories using natural language queries. The system will use semantic search to find relevant memories.\n" + ] + }, + { + "cell_type": "markdown", + "id": "3061e6609af950e6", + "metadata": {}, + "source": [ + "#### Query 1: What does the student prefer?\n" + ] + }, + { + "cell_type": "code", + "execution_count": 32, + "id": "81623ed1f8e4fe3b", + "metadata": { + "execution": { + "iopub.execute_input": "2025-11-01T00:27:09.207370Z", + "iopub.status.busy": "2025-11-01T00:27:09.207285Z", + "iopub.status.idle": "2025-11-01T00:27:09.427203Z", + "shell.execute_reply": "2025-11-01T00:27:09.426344Z" + } + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "================================================================================\n", + "๐Ÿ“ STEP 3: Searching Long-term Memory\n", + "================================================================================\n", + "\n", + "๐Ÿ” Query: 'What does the student prefer?'\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "20:27:09 httpx INFO HTTP Request: POST http://localhost:8088/v1/long-term-memory/search?optimize_query=false \"HTTP/1.1 200 OK\"\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + " ๐Ÿ“š Found 3 relevant memories:\n", + " 1. Student prefers online courses\n", + " 2. Student prefers morning classes\n", + " 3. Student is interested in machine learning and AI\n" + ] + } + ], + "source": [ + "if MEMORY_SERVER_AVAILABLE:\n", + " from agent_memory_client.filters import UserId\n", + "\n", + " print(\"\\n\" + \"=\" * 80)\n", + " print(\"๐Ÿ“ STEP 3: Searching Long-term Memory\")\n", + " print(\"=\" * 80)\n", + "\n", + " search_query_1 = \"What does the student prefer?\"\n", + " print(f\"\\n๐Ÿ” Query: '{search_query_1}'\")\n", + "\n", + " search_results_1 = await memory_client.search_long_term_memory(\n", + " text=search_query_1,\n", + " user_id=UserId(eq=lt_student_id),\n", + " limit=3\n", + " )\n", + "\n", + " if search_results_1.memories:\n", + " print(f\" ๐Ÿ“š Found {len(search_results_1.memories)} relevant memories:\")\n", + " for i, memory in enumerate(search_results_1.memories[:3], 1):\n", + " print(f\" {i}. {memory.text}\")\n", + " else:\n", + " print(\" โš ๏ธ No memories found\")" + ] + }, + { + "cell_type": "markdown", + "id": "f7a2a16698c66fcd", + "metadata": {}, + "source": [ + "#### Query 2: What courses has the student completed?\n" + ] + }, + { + "cell_type": "code", + "execution_count": 33, + "id": "7b7a247cc0c8fddf", + "metadata": { + "execution": { + "iopub.execute_input": "2025-11-01T00:27:09.429612Z", + "iopub.status.busy": "2025-11-01T00:27:09.429514Z", + "iopub.status.idle": "2025-11-01T00:27:09.600859Z", + "shell.execute_reply": "2025-11-01T00:27:09.600364Z" + } + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "๐Ÿ” Query: 'What courses has the student completed?'\n", + "20:27:09 httpx INFO HTTP Request: POST http://localhost:8088/v1/long-term-memory/search?optimize_query=false \"HTTP/1.1 200 OK\"\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + " ๐Ÿ“š Found 5 relevant memories:\n", + " 1. Student prefers online courses\n", + " 2. Student completed Introduction to Programming with grade A on 2024-12-15\n", + " 3. Student's major is Computer Science\n", + " 4. Student is currently taking Linear Algebra\n", + " 5. Student asked about machine learning courses on 2024-09-20\n" + ] + } + ], + "source": [ + "if MEMORY_SERVER_AVAILABLE:\n", + " search_query_2 = \"What courses has the student completed?\"\n", + " print(f\"\\n๐Ÿ” Query: '{search_query_2}'\")\n", + "\n", + " search_results_2 = await memory_client.search_long_term_memory(\n", + " text=search_query_2,\n", + " user_id=UserId(eq=lt_student_id),\n", + " limit=5\n", + " )\n", + "\n", + " if search_results_2.memories:\n", + " print(f\" ๐Ÿ“š Found {len(search_results_2.memories)} relevant memories:\")\n", + " for i, memory in enumerate(search_results_2.memories[:5], 1):\n", + " print(f\" {i}. {memory.text}\")\n", + " else:\n", + " print(\" โš ๏ธ No memories found\")\n" + ] + }, + { + "cell_type": "markdown", + "id": "a1257ba13cefc9c2", + "metadata": {}, + "source": [ + "#### Query 3: What is the student's major?\n" + ] + }, + { + "cell_type": "code", + "execution_count": 34, + "id": "77dfb8e438774736", + "metadata": { + "execution": { + "iopub.execute_input": "2025-11-01T00:27:09.602406Z", + "iopub.status.busy": "2025-11-01T00:27:09.602283Z", + "iopub.status.idle": "2025-11-01T00:27:09.874231Z", + "shell.execute_reply": "2025-11-01T00:27:09.873463Z" + } + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "๐Ÿ” Query: 'What is the student's major?'\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "20:27:09 httpx INFO HTTP Request: POST http://localhost:8088/v1/long-term-memory/search?optimize_query=false \"HTTP/1.1 200 OK\"\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + " ๐Ÿ“š Found 3 relevant memories:\n", + " 1. Student's major is Computer Science\n", + " 2. Student wants to graduate in Spring 2026\n", + " 3. Student is currently taking Linear Algebra\n", + "\n", + "================================================================================\n", + "โœ… DEMO COMPLETE: Long-term memory enables persistent knowledge!\n", + "================================================================================\n" + ] + } + ], + "source": [ + "if MEMORY_SERVER_AVAILABLE:\n", + " search_query_3 = \"What is the student's major?\"\n", + " print(f\"\\n๐Ÿ” Query: '{search_query_3}'\")\n", + "\n", + " search_results_3 = await memory_client.search_long_term_memory(\n", + " text=search_query_3,\n", + " user_id=UserId(eq=lt_student_id),\n", + " limit=3\n", + " )\n", + "\n", + " if search_results_3.memories:\n", + " print(f\" ๐Ÿ“š Found {len(search_results_3.memories)} relevant memories:\")\n", + " for i, memory in enumerate(search_results_3.memories[:3], 1):\n", + " print(f\" {i}. {memory.text}\")\n", + " else:\n", + " print(\" โš ๏ธ No memories found\")\n", + "\n", + " print(\"\\n\" + \"=\" * 80)\n", + " print(\"โœ… DEMO COMPLETE: Long-term memory enables persistent knowledge!\")\n", + " print(\"=\" * 80)\n", + "else:\n", + " print(\"โš ๏ธ Memory Server not available. Skipping demo.\")\n" + ] + }, + { + "cell_type": "markdown", + "id": "ecd16284999d3213", + "metadata": {}, + "source": [ + "### Long-term Memory Demo Summary\n", + "\n", + "Let's review what we demonstrated with long-term memory.\n" + ] + }, + { + "cell_type": "code", + "execution_count": 35, + "id": "224aa7006183262", + "metadata": { + "execution": { + "iopub.execute_input": "2025-11-01T00:27:09.875983Z", + "iopub.status.busy": "2025-11-01T00:27:09.875847Z", + "iopub.status.idle": "2025-11-01T00:27:09.879436Z", + "shell.execute_reply": "2025-11-01T00:27:09.878855Z" + } + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "================================================================================\n", + "๐ŸŽฏ LONG-TERM MEMORY DEMO SUMMARY\n", + "================================================================================\n", + "\n", + "๐Ÿ“Š What We Did:\n", + " Step 1: Stored 6 semantic memories (facts)\n", + " โ†’ Student preferences, major, graduation date\n", + " โ†’ Tagged with topics: preferences, academic_info\n", + "\n", + " Step 2: Stored 3 episodic memories (events)\n", + " โ†’ Enrollment, completion, interaction events\n", + " โ†’ Tagged with topics: enrollment, courses\n", + "\n", + " Step 3: Searched long-term memory\n", + " โ†’ Used natural language queries\n", + " โ†’ Semantic search found relevant memories\n", + " โ†’ No exact keyword matching needed\n", + "\n", + "โœ… Key Benefits:\n", + " โ€ข Persistent knowledge across sessions\n", + " โ€ข Semantic search (not keyword matching)\n", + " โ€ข Automatic deduplication\n", + " โ€ข Topic-based organization\n", + "\n", + "๐Ÿ’ก Key Insight:\n", + " Long-term memory enables personalization and knowledge\n", + " accumulation across sessions. It's the foundation for\n", + " building agents that remember and learn from users.\n", + "================================================================================\n" + ] + } + ], + "source": [ + "print(\"=\" * 80)\n", + "print(\"๐ŸŽฏ LONG-TERM MEMORY DEMO SUMMARY\")\n", + "print(\"=\" * 80)\n", + "print(\"\\n๐Ÿ“Š What We Did:\")\n", + "print(\" Step 1: Stored 6 semantic memories (facts)\")\n", + "print(\" โ†’ Student preferences, major, graduation date\")\n", + "print(\" โ†’ Tagged with topics: preferences, academic_info\")\n", + "print(\"\\n Step 2: Stored 3 episodic memories (events)\")\n", + "print(\" โ†’ Enrollment, completion, interaction events\")\n", + "print(\" โ†’ Tagged with topics: enrollment, courses\")\n", + "print(\"\\n Step 3: Searched long-term memory\")\n", + "print(\" โ†’ Used natural language queries\")\n", + "print(\" โ†’ Semantic search found relevant memories\")\n", + "print(\" โ†’ No exact keyword matching needed\")\n", + "print(\"\\nโœ… Key Benefits:\")\n", + "print(\" โ€ข Persistent knowledge across sessions\")\n", + "print(\" โ€ข Semantic search (not keyword matching)\")\n", + "print(\" โ€ข Automatic deduplication\")\n", + "print(\" โ€ข Topic-based organization\")\n", + "print(\"\\n๐Ÿ’ก Key Insight:\")\n", + "print(\" Long-term memory enables personalization and knowledge\")\n", + "print(\" accumulation across sessions. It's the foundation for\")\n", + "print(\" building agents that remember and learn from users.\")\n", + "print(\"=\" * 80)\n" + ] + }, + { + "cell_type": "markdown", + "id": "833010461c87f519", + "metadata": {}, + "source": [ + "### Key Insight: User Context Type\n", + "\n", + "Long-term memory provides part of the **User Context** - the second context type from Section 1:\n", + "\n", + "1. **System Context** - Role and instructions (static)\n", + "2. **User Context** - Profile + long-term memories (dynamic, user-specific) โ† **Long-term memories contribute here!**\n", + "3. **Conversation Context** - Working memory (dynamic, session-specific)\n", + "4. **Retrieved Context** - RAG results (dynamic, query-specific)\n", + "\n", + "Long-term memories enhance User Context by adding persistent knowledge about the user's preferences, history, and goals.\n", + "\n", + "---\n", + "\n", + "## ๐Ÿท๏ธ Advanced: Topics and Filtering\n", + "\n", + "Topics help organize and filter memories. Let's explore how to use them effectively.\n" + ] + }, + { + "cell_type": "markdown", + "id": "50c98c46da71dcd1", + "metadata": {}, + "source": [ + "### Step 1: Store memories with topics\n" + ] + }, + { + "cell_type": "code", + "execution_count": 36, + "id": "12fa8b9da3288874", + "metadata": { + "execution": { + "iopub.execute_input": "2025-11-01T00:27:09.880786Z", + "iopub.status.busy": "2025-11-01T00:27:09.880705Z", + "iopub.status.idle": "2025-11-01T00:27:09.891970Z", + "shell.execute_reply": "2025-11-01T00:27:09.891399Z" + } + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "================================================================================\n", + "๐Ÿท๏ธ TOPICS AND FILTERING DEMO\n", + "================================================================================\n", + "\n", + "๐Ÿ“ Storing Memories with Topics\n", + "--------------------------------------------------------------------------------\n", + "20:27:09 httpx INFO HTTP Request: POST http://localhost:8088/v1/long-term-memory/ \"HTTP/1.1 200 OK\"\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + " โœ… Student prefers online courses\n", + " Topics: preferences, course_format\n", + "20:27:09 httpx INFO HTTP Request: POST http://localhost:8088/v1/long-term-memory/ \"HTTP/1.1 200 OK\"\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + " โœ… Student's major is Computer Science\n", + " Topics: academic_info, major\n", + "20:27:09 httpx INFO HTTP Request: POST http://localhost:8088/v1/long-term-memory/ \"HTTP/1.1 200 OK\"\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + " โœ… Student wants to graduate in Spring 2026\n", + " Topics: goals, graduation\n", + "20:27:09 httpx INFO HTTP Request: POST http://localhost:8088/v1/long-term-memory/ \"HTTP/1.1 200 OK\"\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + " โœ… Student prefers morning classes\n", + " Topics: preferences, schedule\n" + ] + } + ], + "source": [ + "if MEMORY_SERVER_AVAILABLE:\n", + " topics_student_id = \"sarah_chen\"\n", + "\n", + " print(\"=\" * 80)\n", + " print(\"๐Ÿท๏ธ TOPICS AND FILTERING DEMO\")\n", + " print(\"=\" * 80)\n", + "\n", + " print(\"\\n๐Ÿ“ Storing Memories with Topics\")\n", + " print(\"-\" * 80)\n", + "\n", + " # Define memories with their topics\n", + " memories_with_topics = [\n", + " (\"Student prefers online courses\", [\"preferences\", \"course_format\"]),\n", + " (\"Student's major is Computer Science\", [\"academic_info\", \"major\"]),\n", + " (\"Student wants to graduate in Spring 2026\", [\"goals\", \"graduation\"]),\n", + " (\"Student prefers morning classes\", [\"preferences\", \"schedule\"]),\n", + " ]\n", + "\n", + " # Store each memory\n", + " for memory_text, topics in memories_with_topics:\n", + " memory_record = ClientMemoryRecord(\n", + " text=memory_text,\n", + " user_id=topics_student_id,\n", + " memory_type=\"semantic\",\n", + " topics=topics\n", + " )\n", + " await memory_client.create_long_term_memory([memory_record])\n", + " print(f\" โœ… {memory_text}\")\n", + " print(f\" Topics: {', '.join(topics)}\")\n" + ] + }, + { + "cell_type": "markdown", + "id": "2d26f40c5997b028", + "metadata": {}, + "source": [ + "### Step 2: Filter memories by type\n" + ] + }, + { + "cell_type": "code", + "execution_count": 37, + "id": "8fa83e43fec2a253", + "metadata": { + "execution": { + "iopub.execute_input": "2025-11-01T00:27:09.893382Z", + "iopub.status.busy": "2025-11-01T00:27:09.893290Z", + "iopub.status.idle": "2025-11-01T00:27:10.285000Z", + "shell.execute_reply": "2025-11-01T00:27:10.284578Z" + } + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "๐Ÿ“ Filtering by Memory Type: Semantic\n", + "--------------------------------------------------------------------------------\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "20:27:10 httpx INFO HTTP Request: POST http://localhost:8088/v1/long-term-memory/search?optimize_query=false \"HTTP/1.1 200 OK\"\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + " Found 7 semantic memories:\n", + " 1. Student prefers online courses\n", + " Topics: preferences, course_format\n", + " 2. Student is currently taking Linear Algebra\n", + " Topics: preferences, academic_info\n", + " 3. Student's major is Computer Science\n", + " Topics: academic_info, major\n", + " 4. Student prefers morning classes\n", + " Topics: preferences, schedule\n", + " 5. Student is interested in machine learning and AI\n", + " Topics: interests, AI\n", + "\n", + "================================================================================\n", + "โœ… Topics enable organized, filterable memory management!\n", + "================================================================================\n" + ] + } + ], + "source": [ + "if MEMORY_SERVER_AVAILABLE:\n", + " print(\"\\n๐Ÿ“ Filtering by Memory Type: Semantic\")\n", + " print(\"-\" * 80)\n", + "\n", + " from agent_memory_client.filters import UserId, MemoryType\n", + "\n", + " # Search for all semantic memories\n", + " results = await memory_client.search_long_term_memory(\n", + " text=\"\", # Empty query returns all\n", + " user_id=UserId(eq=topics_student_id),\n", + " memory_type=MemoryType(eq=\"semantic\"),\n", + " limit=10\n", + " )\n", + "\n", + " print(f\" Found {len(results.memories)} semantic memories:\")\n", + " for i, memory in enumerate(results.memories[:5], 1):\n", + " topics_str = ', '.join(memory.topics) if memory.topics else 'none'\n", + " print(f\" {i}. {memory.text}\")\n", + " print(f\" Topics: {topics_str}\")\n", + "\n", + " print(\"\\n\" + \"=\" * 80)\n", + " print(\"โœ… Topics enable organized, filterable memory management!\")\n", + " print(\"=\" * 80)\n" + ] + }, + { + "cell_type": "markdown", + "id": "f1e55992cb0e1184", + "metadata": {}, + "source": [ + "### ๐ŸŽฏ Why Topics Matter\n", + "\n", + "**Organization:**\n", + "- Group related memories together\n", + "- Easy to find memories by category\n", + "\n", + "**Filtering:**\n", + "- Search within specific topics\n", + "- Filter by memory type (semantic, episodic, message)\n", + "\n", + "**Best Practices:**\n", + "- Use consistent topic names\n", + "- Keep topics broad enough to be useful\n", + "- Common topics: `preferences`, `academic_info`, `goals`, `schedule`, `courses`\n", + "\n", + "---\n", + "\n", + "## ๐Ÿ”„ Cross-Session Memory Persistence\n", + "\n", + "Let's verify that memories persist across sessions.\n" + ] + }, + { + "cell_type": "markdown", + "id": "4a4dc88686624474", + "metadata": {}, + "source": [ + "### Step 1: Session 1 - Store memories\n" + ] + }, + { + "cell_type": "code", + "execution_count": 38, + "id": "8fd48b3f8e02b6f5", + "metadata": { + "execution": { + "iopub.execute_input": "2025-11-01T00:27:10.286447Z", + "iopub.status.busy": "2025-11-01T00:27:10.286329Z", + "iopub.status.idle": "2025-11-01T00:27:10.291505Z", + "shell.execute_reply": "2025-11-01T00:27:10.291134Z" + } + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "================================================================================\n", + "๐Ÿ”„ CROSS-SESSION MEMORY PERSISTENCE DEMO\n", + "================================================================================\n", + "\n", + "๐Ÿ“ SESSION 1: Storing Memories\n", + "--------------------------------------------------------------------------------\n", + "20:27:10 httpx INFO HTTP Request: POST http://localhost:8088/v1/long-term-memory/ \"HTTP/1.1 200 OK\"\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + " โœ… Stored: Student is interested in machine learning and AI\n" + ] + } + ], + "source": [ + "if MEMORY_SERVER_AVAILABLE:\n", + " cross_session_student_id = \"sarah_chen\"\n", + "\n", + " print(\"=\" * 80)\n", + " print(\"๐Ÿ”„ CROSS-SESSION MEMORY PERSISTENCE DEMO\")\n", + " print(\"=\" * 80)\n", + "\n", + " print(\"\\n๐Ÿ“ SESSION 1: Storing Memories\")\n", + " print(\"-\" * 80)\n", + "\n", + " memory_record = ClientMemoryRecord(\n", + " text=\"Student is interested in machine learning and AI\",\n", + " user_id=cross_session_student_id,\n", + " memory_type=\"semantic\",\n", + " topics=[\"interests\", \"AI\"]\n", + " )\n", + " await memory_client.create_long_term_memory([memory_record])\n", + " print(\" โœ… Stored: Student is interested in machine learning and AI\")\n" + ] + }, + { + "cell_type": "markdown", + "id": "d34e3bc677c17172", + "metadata": {}, + "source": [ + "### Step 2: Session 2 - Create new client and retrieve memories\n", + "\n", + "Simulate a new session by creating a new memory client.\n" + ] + }, + { + "cell_type": "code", + "execution_count": 39, + "id": "f63f9818c0862cbe", + "metadata": { + "execution": { + "iopub.execute_input": "2025-11-01T00:27:10.292814Z", + "iopub.status.busy": "2025-11-01T00:27:10.292720Z", + "iopub.status.idle": "2025-11-01T00:27:10.448683Z", + "shell.execute_reply": "2025-11-01T00:27:10.448168Z" + } + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "๐Ÿ“ SESSION 2: New Session, Same Student\n", + "--------------------------------------------------------------------------------\n", + " ๐Ÿ”„ New session started for the same student\n", + "\n", + " ๐Ÿ” Searching: 'What are the student's interests?'\n", + "20:27:10 httpx INFO HTTP Request: POST http://localhost:8088/v1/long-term-memory/search?optimize_query=false \"HTTP/1.1 200 OK\"\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + " โœ… Memories accessible from new session:\n", + " 1. Student is interested in machine learning and AI\n", + " 2. Student's major is Computer Science\n", + " 3. Student prefers online courses\n", + "\n", + "================================================================================\n", + "โœ… Long-term memories persist across sessions!\n", + "================================================================================\n" + ] + } + ], + "source": [ + "# Search for memories from the new session\n", + "from agent_memory_client.filters import UserId\n", + "\n", + "if MEMORY_SERVER_AVAILABLE:\n", + " print(\"\\n๐Ÿ“ SESSION 2: New Session, Same Student\")\n", + " print(\"-\" * 80)\n", + "\n", + " # Create a new memory client (simulating a new session)\n", + " new_session_config = MemoryClientConfig(\n", + " base_url=os.getenv(\"AGENT_MEMORY_URL\", \"http://localhost:8000\"),\n", + " default_namespace=\"redis_university\"\n", + " )\n", + " new_session_client = MemoryAPIClient(config=new_session_config)\n", + "\n", + " print(\" ๐Ÿ”„ New session started for the same student\")\n", + "\n", + " print(\"\\n ๐Ÿ” Searching: 'What are the student's interests?'\")\n", + " cross_session_results = await new_session_client.search_long_term_memory(\n", + " text=\"What are the student's interests?\",\n", + " user_id=UserId(eq=cross_session_student_id),\n", + " limit=3\n", + " )\n", + "\n", + " if cross_session_results.memories:\n", + " print(f\"\\n โœ… Memories accessible from new session:\")\n", + " for i, memory in enumerate(cross_session_results.memories[:3], 1):\n", + " print(f\" {i}. {memory.text}\")\n", + " else:\n", + " print(\" โš ๏ธ No memories found\")\n", + "\n", + " print(\"\\n\" + \"=\" * 80)\n", + " print(\"โœ… Long-term memories persist across sessions!\")\n", + " print(\"=\" * 80)\n" + ] + }, + { + "cell_type": "markdown", + "id": "ef6e865cca662dd4", + "metadata": {}, + "source": [ + "### ๐ŸŽฏ Cross-Session Persistence\n", + "\n", + "**What We Demonstrated:**\n", + "- **Session 1:** Stored memories about student interests\n", + "- **Session 2:** Created new client (simulating new session)\n", + "- **Result:** Memories from Session 1 are accessible in Session 2\n", + "\n", + "**Why This Matters:**\n", + "- Users don't have to repeat themselves\n", + "- Personalization works across days, weeks, months\n", + "- Knowledge accumulates over time\n", + "\n", + "**Contrast with Working Memory:**\n", + "- Working memory: Session-scoped (expires after 24 hours)\n", + "- Long-term memory: User-scoped (persists indefinitely)\n", + "\n", + "---\n", + "\n", + "## ๐Ÿ”— What's Next: Memory-Enhanced RAG and Agents\n", + "\n", + "You've learned the fundamentals of memory architecture! Now it's time to put it all together.\n", + "\n", + "### **Next Notebook: `02_combining_memory_with_retrieved_context.ipynb`**\n", + "\n", + "In the next notebook, you'll:\n", + "\n", + "1. **Build** a complete memory-enhanced RAG system\n", + " - Integrate working memory + long-term memory + RAG\n", + " - Combine all four context types\n", + " - Show clear before/after comparisons\n", + "\n", + "2. **Convert** to LangGraph agent (Part 2, separate notebook)\n", + " - Add state management\n", + " - Improve control flow\n", + " - Prepare for Section 4 (tools and advanced capabilities)\n", + "\n", + "**Why Continue?**\n", + "- See memory in action with real conversations\n", + "- Learn how to build production-ready agents\n", + "- Prepare for Section 4 (adding tools like enrollment, scheduling)\n", + "\n", + "**๐Ÿ“š Continue to:** `02_combining_memory_with_retrieved_context.ipynb`\n", + "\n", + "## โฐ Memory Lifecycle & Persistence\n", + "\n", + "Understanding how long memories last and when they expire is crucial for building reliable systems.\n", + "\n", + "### **Working Memory TTL (Time-To-Live)**\n", + "\n", + "**Default TTL:** 24 hours\n", + "\n", + "**What this means:**\n", + "- Working memory (conversation history) expires 24 hours after last activity\n", + "- After expiration, conversation context is lost\n", + "- Long-term memories extracted from the conversation persist\n", + "\n", + "**Timeline Example:**\n", + "\n", + "```\n", + "Day 1, 10:00 AM - Session starts\n", + "Day 1, 10:25 AM - Session ends\n", + " โ†“\n", + "[24 hours later]\n", + " โ†“\n", + "Day 2, 10:25 AM - Working memory still available โœ…\n", + "Day 2, 10:26 AM - Working memory expires โŒ\n", + "```\n", + "\n", + "### **Long-term Memory Persistence**\n", + "\n", + "**Lifetime:** Indefinite (until manually deleted)\n", + "\n", + "**What this means:**\n", + "- Long-term memories never expire automatically\n", + "- Accessible across all sessions, forever\n", + "- Must be explicitly deleted if no longer needed\n", + "\n", + "### **Why This Design?**\n", + "\n", + "**Working Memory (Short-lived):**\n", + "- Conversations are temporary\n", + "- Most context is only relevant during the session\n", + "- Automatic cleanup prevents storage bloat\n", + "- Privacy: Old conversations don't linger\n", + "\n", + "**Long-term Memory (Persistent):**\n", + "- Important facts should persist\n", + "- User preferences don't expire\n", + "- Knowledge accumulates over time\n", + "- Enables true personalization\n", + "\n", + "### **Important Implications**\n", + "\n", + "**1. Extract Before Expiration**\n", + "\n", + "If something important is said in conversation, it must be extracted to long-term memory before the 24-hour TTL expires.\n", + "\n", + "**Good news:** Agent Memory Server does this automatically!\n", + "\n", + "**2. Long-term Memories are Permanent**\n", + "\n", + "Once stored, long-term memories persist indefinitely. Be thoughtful about what you store.\n", + "\n", + "**3. Cross-Session Behavior**\n", + "\n", + "```\n", + "Session 1 (Day 1):\n", + "- User: \"I'm interested in machine learning\"\n", + "- Working memory: Stores conversation\n", + "- Long-term memory: Extracts \"Student interested in machine learning\"\n", + "\n", + "[30 hours later - Working memory expired]\n", + "\n", + "Session 2 (Day 3):\n", + "- Working memory from Session 1: EXPIRED โŒ\n", + "- Long-term memory: Still available โœ…\n", + "- Agent retrieves: \"Student interested in machine learning\"\n", + "- Agent makes relevant recommendations โœ…\n", + "```\n", + "\n", + "### **Practical Multi-Day Conversation Example**\n" + ] + }, + { + "cell_type": "code", + "execution_count": 40, + "id": "592703b9be74f40e", + "metadata": { + "execution": { + "iopub.execute_input": "2025-11-01T00:27:10.450575Z", + "iopub.status.busy": "2025-11-01T00:27:10.450436Z", + "iopub.status.idle": "2025-11-01T00:27:10.636910Z", + "shell.execute_reply": "2025-11-01T00:27:10.636388Z" + } + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "================================================================================\n", + "โฐ MULTI-DAY CONVERSATION SIMULATION\n", + "================================================================================\n", + "\n", + "๐Ÿ“… DAY 1: Initial Conversation\n", + "--------------------------------------------------------------------------------\n", + "\n", + "Text: Student is preparing for a career in AI research\n", + "\n", + "20:27:10 httpx INFO HTTP Request: POST http://localhost:8088/v1/long-term-memory/ \"HTTP/1.1 200 OK\"\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + " โœ… Stored in long-term memory: Career goal (AI research)\n", + " ๐Ÿ’ฌ Working memory: Active for session_day1\n", + " โฐ TTL: 24 hours from now\n", + "\n", + "๐Ÿ“… DAY 3: New Conversation (48 hours later)\n", + "--------------------------------------------------------------------------------\n", + " โŒ Working memory from Day 1: EXPIRED\n", + " โœ… Long-term memory: Still available\n", + "\n", + "Text: What are the student's career goals?\n", + "\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "20:27:10 httpx INFO HTTP Request: POST http://localhost:8088/v1/long-term-memory/search?optimize_query=false \"HTTP/1.1 200 OK\"\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + " ๐Ÿ” Retrieved from long-term memory:\n", + " โ€ข Student is preparing for a career in AI research\n", + " โ€ข Student wants to graduate in Spring 2026\n", + " โ€ข Student's major is Computer Science\n", + "\n", + " โœ… Agent can still personalize recommendations!\n", + "\n", + "================================================================================\n", + "โœ… Long-term memories persist, working memory expires\n", + "================================================================================\n" + ] + } + ], + "source": [ + "# Multi-Day Conversation Simulation\n", + "from agent_memory_client.filters import UserId\n", + "async def multi_day_simulation():\n", + " \"\"\"Simulate conversations across multiple days\"\"\"\n", + "\n", + " student_id = \"sarah_chen\"\n", + "\n", + " print(\"=\" * 80)\n", + " print(\"โฐ MULTI-DAY CONVERSATION SIMULATION\")\n", + " print(\"=\" * 80)\n", + "\n", + " # Day 1: Initial conversation\n", + " print(\"\\n๐Ÿ“… DAY 1: Initial Conversation\")\n", + " print(\"-\" * 80)\n", + "\n", + " session_1 = f\"session_{student_id}_day1\"\n", + " text=\"Student is preparing for a career in AI research\"\n", + " print(f\"\\nText: {text}\\n\")\n", + " # Store a fact in long-term memory\n", + " memory_record = ClientMemoryRecord(\n", + " text=text,\n", + " user_id=student_id,\n", + " memory_type=\"semantic\",\n", + " topics=[\"career\", \"goals\"]\n", + " )\n", + " await memory_client.create_long_term_memory([memory_record])\n", + " print(\" โœ… Stored in long-term memory: Career goal (AI research)\")\n", + "\n", + " # Simulate working memory (would normally be conversation)\n", + " print(\" ๐Ÿ’ฌ Working memory: Active for session_day1\")\n", + " print(\" โฐ TTL: 24 hours from now\")\n", + "\n", + " # Day 3: New conversation (working memory expired)\n", + " print(\"\\n๐Ÿ“… DAY 3: New Conversation (48 hours later)\")\n", + " print(\"-\" * 80)\n", + "\n", + " session_2 = f\"session_{student_id}_day3\"\n", + "\n", + " print(\" โŒ Working memory from Day 1: EXPIRED\")\n", + " print(\" โœ… Long-term memory: Still available\")\n", + " text2=\"What are the student's career goals?\"\n", + " print(f\"\\nText: {text2}\\n\")\n", + "\n", + " # Search long-term memory\n", + " results = await memory_client.search_long_term_memory(\n", + " text=text2,\n", + " user_id=UserId(eq=student_id),\n", + " limit=3\n", + " )\n", + "\n", + " if results.memories:\n", + " print(\"\\n ๐Ÿ” Retrieved from long-term memory:\")\n", + " for memory in results.memories[:3]:\n", + " print(f\" โ€ข {memory.text}\")\n", + " print(\"\\n โœ… Agent can still personalize recommendations!\")\n", + "\n", + " print(\"\\n\" + \"=\" * 80)\n", + " print(\"โœ… Long-term memories persist, working memory expires\")\n", + " print(\"=\" * 80)\n", + "\n", + "# Run the simulation\n", + "await multi_day_simulation()\n" + ] + }, + { + "cell_type": "markdown", + "id": "635bcc3c0162ceaa", + "metadata": {}, + "source": [ + "### ๐ŸŽฏ Memory Lifecycle Best Practices\n", + "\n", + "**1. Trust Automatic Extraction**\n", + "- Agent Memory Server automatically extracts important facts\n", + "- Don't manually store everything in long-term memory\n", + "- Let the system decide what's important\n", + "\n", + "**2. Use Appropriate Memory Types**\n", + "- Working memory: Current conversation only\n", + "- Long-term memory: Facts that should persist\n", + "\n", + "**3. Monitor Memory Growth**\n", + "- Long-term memories accumulate over time\n", + "- Implement cleanup for outdated information\n", + "- Consider archiving old memories\n", + "\n", + "**4. Plan for Expiration**\n", + "- Working memory expires after 24 hours\n", + "- Important context must be in long-term memory\n", + "- Don't rely on working memory for cross-session data\n", + "\n", + "**5. Test Cross-Session Behavior**\n", + "- Verify long-term memories are accessible\n", + "- Ensure personalization works after TTL expiration\n", + "- Test with realistic time gaps\n", + "\n", + "---\n", + "\n", + "## ๐ŸŽ“ Key Takeaways\n", + "\n", + "### **1. Memory Solves the Grounding Problem**\n", + "\n", + "Without memory, agents can't resolve references:\n", + "- โŒ \"What are **its** prerequisites?\" โ†’ Agent doesn't know what \"its\" refers to\n", + "- โœ… With working memory โ†’ Agent resolves \"its\" from conversation history\n", + "\n", + "### **2. Two Types of Memory Serve Different Purposes**\n", + "\n", + "**Working Memory (Session-Scoped):**\n", + "- Conversation messages from current session\n", + "- Enables reference resolution and conversation continuity\n", + "- TTL-based (expires after session ends)\n", + "\n", + "**Long-term Memory (Cross-Session):**\n", + "- Persistent facts, preferences, goals\n", + "- Enables personalization across sessions\n", + "- Searchable via semantic vector search\n", + "\n", + "### **3. Memory Completes the Four Context Types**\n", + "\n", + "From Section 1, we learned about four context types. Memory enables two of them:\n", + "\n", + "1. **System Context** (Static) - โœ… Section 2\n", + "2. **User Context** (Dynamic, User-Specific) - โœ… Section 2 + Long-term Memory\n", + "3. **Conversation Context** (Dynamic, Session-Specific) - โœจ **Working Memory**\n", + "4. **Retrieved Context** (Dynamic, Query-Specific) - โœ… Section 2 RAG\n", + "\n", + "### **4. Memory + RAG = Complete Context Engineering**\n", + "\n", + "The integration pattern:\n", + "```\n", + "1. Load working memory (conversation history)\n", + "2. Search long-term memory (user facts)\n", + "3. RAG search (relevant documents)\n", + "4. Assemble all context types\n", + "5. Generate response\n", + "6. Save working memory (updated conversation)\n", + "```\n", + "\n", + "This gives us **stateful, personalized, context-aware conversations**.\n", + "\n", + "### **5. Agent Memory Server is Production-Ready**\n", + "\n", + "Why use Agent Memory Server instead of simple in-memory storage:\n", + "- โœ… **Scalable** - Redis-backed, handles thousands of users\n", + "- โœ… **Automatic** - Extracts important facts to long-term storage\n", + "- โœ… **Semantic search** - Vector-indexed memory retrieval\n", + "- โœ… **Deduplication** - Prevents redundant memories\n", + "- โœ… **TTL management** - Automatic expiration of old sessions\n" + ] + }, + { + "cell_type": "markdown", + "id": "563b64c1544ceec9", + "metadata": {}, + "source": [ + "## ๐Ÿง  Memory Extraction Strategies\n", + "\n", + "Understanding how the Agent Memory Server creates long-term memories from conversations.\n" + ] + }, + { + "cell_type": "markdown", + "id": "1ae280dc-c910-4c3e-bcd3-ebf9a9363cf3", + "metadata": {}, + "source": [ + "### The Memory Extraction Question\n", + "\n", + "When the Agent Memory Server extracts memories from conversations and stores them in long-term memory, it needs to decide **HOW** to extract and structure those memories.\n", + "\n", + "**Key Distinction:**\n", + "- **Working Memory:** Stores raw conversation messages (user/assistant exchanges)\n", + "- **Long-term Memory:** Stores extracted facts, summaries, or preferences\n", + "\n", + "**The Question:** When promoting information from working memory to long-term memory, should we extract:\n", + "- Individual discrete facts? (\"User prefers online courses\")\n", + "- A summary of the conversation? (\"User discussed course preferences...\")\n", + "- User preferences specifically? (\"User prefers email notifications\")\n", + "- Custom domain-specific information?\n", + "\n", + "This is where **memory extraction strategies** come in.\n" + ] + }, + { + "metadata": {}, + "cell_type": "markdown", + "source": [ + "### Available Strategies\n", + "\n", + "The Agent Memory Server supports four memory extraction strategies that determine how memories are created:\n", + "\n", + "#### **1. Discrete Strategy (Default)** โœ…\n", + "\n", + "**Purpose:** Extract individual facts and preferences from conversations\n", + "\n", + "**Best For:** General-purpose memory extraction, factual information, user preferences\n", + "\n", + "**Example Input (Conversation):**\n", + "```\n", + "User: \"I'm a Computer Science major interested in machine learning. I prefer online courses.\"\n", + "```\n", + "\n", + "**Example Output (Long-term Memories):**\n", + "```json\n", + "[\n", + " {\n", + " \"type\": \"semantic\",\n", + " \"text\": \"User's major is Computer Science\",\n", + " \"topics\": [\"education\", \"major\"],\n", + " \"entities\": [\"Computer Science\"]\n", + " },\n", + " {\n", + " \"type\": \"semantic\",\n", + " \"text\": \"User interested in machine learning\",\n", + " \"topics\": [\"interests\", \"technology\"],\n", + " \"entities\": [\"machine learning\"]\n", + " },\n", + " {\n", + " \"type\": \"semantic\",\n", + " \"text\": \"User prefers online courses\",\n", + " \"topics\": [\"preferences\", \"learning\"],\n", + " \"entities\": [\"online courses\"]\n", + " }\n", + "]\n", + "```\n", + "\n", + "**When to Use:**\n", + "- โœ… Most agent interactions (default choice)\n", + "- โœ… When you want searchable individual facts\n", + "- โœ… When facts should be independently retrievable\n", + "- โœ… Building knowledge graphs or fact databases\n", + "\n", + "---\n", + "\n", + "#### **2. Summary Strategy**\n", + "\n", + "**Purpose:** Create concise summaries of entire conversations instead of extracting discrete facts\n", + "\n", + "**Best For:** Long conversations, meeting notes, comprehensive context preservation\n", + "\n", + "**Example Input (Same Conversation):**\n", + "```\n", + "User: \"I'm a Computer Science major interested in machine learning. I prefer online courses.\"\n", + "```\n", + "\n", + "**Example Output (Long-term Memory):**\n", + "```json\n", + "{\n", + " \"type\": \"semantic\",\n", + " \"text\": \"User is a Computer Science major with interest in machine learning, preferring online course formats for their studies.\",\n", + " \"topics\": [\"education\", \"preferences\", \"technology\"],\n", + " \"entities\": [\"Computer Science\", \"machine learning\", \"online courses\"]\n", + "}\n", + "```\n", + "\n", + "**When to Use:**\n", + "- โœ… Long consultations or advising sessions\n", + "- โœ… Meeting notes or session summaries\n", + "- โœ… When context of entire conversation matters\n", + "- โœ… Reducing storage while preserving conversational context\n", + "\n", + "---\n", + "\n", + "#### **3. Preferences Strategy**\n", + "\n", + "**Purpose:** Focus specifically on extracting user preferences and personal characteristics\n", + "\n", + "**Best For:** Personalization systems, user profile building, preference learning\n", + "\n", + "**Example Output:**\n", + "```json\n", + "{\n", + " \"type\": \"semantic\",\n", + " \"text\": \"User prefers online courses over in-person instruction\",\n", + " \"topics\": [\"preferences\", \"learning_style\"],\n", + " \"entities\": [\"online courses\", \"in-person\"]\n", + "}\n", + "```\n", + "\n", + "**When to Use:**\n", + "- โœ… User onboarding flows\n", + "- โœ… Building user profiles\n", + "- โœ… Personalization-focused applications\n", + "- โœ… Preference learning systems\n", + "\n", + "---\n", + "\n", + "#### **4. Custom Strategy**\n", + "\n", + "**Purpose:** Use domain-specific extraction prompts for specialized needs\n", + "\n", + "**Best For:** Domain-specific extraction (technical, legal, medical), specialized workflows\n", + "\n", + "**Security Note:** โš ๏ธ Custom prompts require validation to prevent prompt injection attacks. See the [Security Guide](https://redis.github.io/agent-memory-server/security/) for details.\n", + "\n", + "**When to Use:**\n", + "- โœ… Specialized domains (legal, medical, technical)\n", + "- โœ… Custom extraction logic needed\n", + "- โœ… Domain-specific memory structures\n", + "\n", + "---\n" + ], + "id": "3b0f07723c91ea40" + }, + { + "metadata": {}, + "cell_type": "markdown", + "source": [ + "### Strategy Comparison\n", + "\n", + "| Strategy | Output Type | Use Case | Example |\n", + "|----------|------------|----------|---------|\n", + "| **Discrete** | Individual facts | General agents | \"User's major is Computer Science\" |\n", + "| **Summary** | Conversation summary | Long sessions | \"User discussed CS major, interested in ML courses...\" |\n", + "| **Preferences** | User preferences | Personalization | \"User prefers online courses over in-person\" |\n", + "| **Custom** | Domain-specific | Specialized domains | Custom extraction logic |\n" + ], + "id": "9c5f8b407bc85632" + }, + { + "metadata": {}, + "cell_type": "markdown", + "source": [ + "### Default Behavior in This Course\n", + "\n", + "**In this course, we use the Discrete Strategy (default)** because:\n", + "\n", + "โœ… **Works well for course advising conversations**\n", + "- Students ask specific questions\n", + "- Facts are independently useful\n", + "- Each fact can be searched separately\n", + "\n", + "โœ… **Creates searchable individual facts**\n", + "- \"User's major is Computer Science\"\n", + "- \"User completed RU101\"\n", + "- \"User interested in machine learning\"\n", + "\n", + "โœ… **Balances detail with storage efficiency**\n", + "- Not too granular (every sentence)\n", + "- Not too broad (entire conversations)\n", + "- Just right for Q&A interactions\n", + "\n", + "โœ… **No configuration required**\n", + "- Default behavior\n", + "- Works out of the box\n", + "- Production-ready\n" + ], + "id": "221603c4ef264222" + }, + { + "metadata": {}, + "cell_type": "markdown", + "source": [ + "### When Would You Use Different Strategies?\n", + "\n", + "**Scenario 1: Long Academic Advising Session (Summary Strategy)**\n", + "\n", + "```\n", + "Student has 30-minute conversation discussing:\n", + "- Academic goals and graduation timeline\n", + "- Career aspirations and internship plans\n", + "- Course preferences and learning style\n", + "- Schedule constraints and work commitments\n", + "- Extracurricular interests\n", + "```\n", + "\n", + "**Discrete Strategy:** Extracts 20+ individual facts\n", + "- \"User wants to graduate Spring 2026\"\n", + "- \"User interested in tech startup internship\"\n", + "- \"User prefers online courses\"\n", + "- ... (17 more facts)\n", + "\n", + "**Summary Strategy:** Creates 1-2 comprehensive summaries\n", + "- \"Student discussed academic planning for Spring 2026 graduation, expressing strong interest in ML/AI courses and tech startup internships. Prefers online format due to part-time work commitments. Interested in vector databases and modern AI applications.\"\n", + "\n", + "**Trade-off:**\n", + "- Discrete: More searchable, more storage\n", + "- Summary: Less storage, preserves context\n", + "\n", + "---\n", + "\n", + "**Scenario 2: User Onboarding (Preferences Strategy)**\n", + "\n", + "```\n", + "New student onboarding flow:\n", + "- Communication preferences\n", + "- Learning style preferences\n", + "- Schedule preferences\n", + "- Notification preferences\n", + "```\n", + "\n", + "**Preferences Strategy:** Focuses on extracting preferences\n", + "- \"User prefers email over SMS notifications\"\n", + "- \"User prefers morning study sessions\"\n", + "- \"User prefers video content over text\"\n", + "\n", + "**Why Preferences Strategy:**\n", + "- Optimized for preference extraction\n", + "- Builds user profile efficiently\n", + "- Personalization-focused\n", + "\n", + "---\n" + ], + "id": "30e3748b1c5a6e7b" + }, + { + "metadata": {}, + "cell_type": "markdown", + "source": [ + "### How Strategies Work Behind the Scenes\n", + "\n", + "**Discrete Strategy (Default):**\n", + "```\n", + "Conversation Messages\n", + " โ†“\n", + "[Background Worker]\n", + " โ†“\n", + "Extract individual facts using LLM\n", + " โ†“\n", + "Store each fact as separate long-term memory\n", + " โ†“\n", + "Vector index for semantic search\n", + "```\n", + "\n", + "**Summary Strategy:**\n", + "```\n", + "Conversation Messages\n", + " โ†“\n", + "[Background Worker]\n", + " โ†“\n", + "Summarize conversation using LLM\n", + " โ†“\n", + "Store summary as long-term memory\n", + " โ†“\n", + "Vector index for semantic search\n", + "```\n", + "\n", + "**Key Point:** Both strategies create **long-term memories** that are:\n", + "- โœ… Persistent (don't expire)\n", + "- โœ… Searchable (vector-indexed)\n", + "- โœ… User-scoped (tied to user_id)\n", + "- โœ… Automatically extracted (background processing)\n" + ], + "id": "cb1162425cf827a3" + }, + { + "metadata": {}, + "cell_type": "markdown", + "source": [ + "### Hands-On Demo Coming in Notebook 2\n", + "\n", + "**In the next notebook** (`02_combining_memory_with_retrieved_context.ipynb`), we'll:\n", + "\n", + "1. **Demonstrate** the difference between discrete and summary strategies\n", + "2. **Show** the same conversation processed with both strategies\n", + "3. **Compare** the resulting long-term memories\n", + "4. **Explain** when to use each strategy in production\n", + "\n", + "**For now**, just understand:\n", + "- โœ… Memory extraction strategies control HOW memories are created\n", + "- โœ… Discrete (default) extracts individual facts\n", + "- โœ… Summary creates conversation summaries\n", + "- โœ… Preferences focuses on user preferences\n", + "- โœ… Custom allows domain-specific extraction\n" + ], + "id": "3b47663458a6d69d" + }, + { + "metadata": {}, + "cell_type": "markdown", + "source": [ + "### ๐Ÿ“š Learn More\n", + "\n", + "For complete documentation on memory extraction strategies:\n", + "\n", + "- **[Memory Extraction Strategies](https://redis.github.io/agent-memory-server/memory-extraction-strategies/)** - Complete guide with examples\n", + "- **[Working Memory](https://redis.github.io/agent-memory-server/working-memory/)** - How working memory works\n", + "- **[Long-term Memory](https://redis.github.io/agent-memory-server/long-term-memory/)** - Long-term memory best practices\n", + "- **[Security Guide](https://redis.github.io/agent-memory-server/security/)** - Security considerations for custom strategies\n", + "\n", + "**Next:** In Notebook 2, we'll see these strategies in action with hands-on code examples.\n", + "\n", + "---\n", + "\n", + "\n", + "### **6. LangChain is Sufficient for Memory + RAG**\n", + "\n", + "We didn't need LangGraph for this section because:\n", + "- Simple linear flow (load โ†’ search โ†’ generate โ†’ save)\n", + "- No conditional branching or complex state management\n", + "- No tool calling required\n", + "\n", + "**LangGraph becomes necessary in Section 4** when we add tools and multi-step workflows.\n", + "\n", + "### **7. Memory Management Best Practices**\n", + "\n", + "**Choose the Right Memory Type:**\n", + "- **Semantic** for facts and preferences (most common)\n", + "- **Episodic** for time-bound events and timeline\n", + "- **Message** for context-rich conversations (use sparingly)\n", + "\n", + "**Understand Memory Lifecycle:**\n", + "- **Working memory:** 24-hour TTL, session-scoped\n", + "- **Long-term memory:** Indefinite persistence, user-scoped\n", + "- **Automatic extraction:** Trust the system to extract important facts\n", + "\n", + "**Benefits of Proper Memory Management:**\n", + "- โœ… **Natural conversations** - Users don't repeat themselves\n", + "- โœ… **Cross-session personalization** - Knowledge persists over time\n", + "- โœ… **Efficient storage** - Automatic deduplication prevents bloat\n", + "- โœ… **Semantic search** - Find relevant memories without exact keywords\n", + "- โœ… **Scalable** - Redis-backed, production-ready architecture\n", + "\n", + "**Key Principle:** Memory transforms stateless RAG into stateful, personalized, context-aware conversations.\n", + "\n", + "---\n", + "\n", + "## ๐Ÿš€ What's Next?\n", + "\n", + "### **Next Notebook: Memory-Enhanced RAG and Agents**\n", + "\n", + "**๐Ÿ“š Continue to: `02_combining_memory_with_retrieved_context.ipynb`**\n", + "\n", + "In the next notebook, you'll:\n", + "\n", + "1. **Build** a complete memory-enhanced RAG system\n", + " - Integrate working memory + long-term memory + RAG\n", + " - Combine all four context types\n", + " - Show clear before/after comparisons\n", + "\n", + "2. **Convert** to LangGraph agent (Part 2, separate notebook)\n", + " - Add state management\n", + " - Improve control flow\n", + " - Prepare for Section 4 (tools and advanced capabilities)\n", + "\n", + "### **Then: Section 4 - Tools and Advanced Agents**\n", + "\n", + "After completing the next notebook, you'll be ready for Section 4.\n", + "\n", + "**๐Ÿ’ก What's Next:**\n", + "\n", + "In Section 4, you'll build an agent that can actively decide when to use memory tools, rather than having memory operations hardcoded in your application flow.\n", + "\n", + "**The Complete Learning Path:**\n", + "\n", + "```\n", + "Section 1: Context Engineering Fundamentals\n", + " โ†“\n", + "Section 2: RAG (Retrieved Context)\n", + " โ†“\n", + "Section 3 (Notebook 1): Memory Fundamentals โ† You are here\n", + " โ†“\n", + "Section 3 (Notebook 2): Memory-Enhanced RAG\n", + " โ†“\n", + "Section 4: Tools and Agents\n", + "```\n", + "\n", + "---\n", + "\n", + "## ๐Ÿ’ช Practice Exercises\n", + "\n", + "### **Exercise 1: Cross-Session Personalization**\n", + "\n", + "Modify the `memory_enhanced_rag_query` function to:\n", + "1. Store user preferences in long-term memory when mentioned\n", + "2. Use those preferences in future sessions\n", + "3. Test with two different sessions for the same student\n", + "\n", + "**Hint:** Look for phrases like \"I prefer...\", \"I like...\", \"I want...\" and store them as semantic memories.\n", + "\n", + "### **Exercise 2: Memory-Aware Filtering**\n", + "\n", + "Enhance the RAG search to use long-term memories as filters:\n", + "1. Search long-term memory for preferences (format, difficulty, schedule)\n", + "2. Apply those preferences as filters to `course_manager.search_courses()`\n", + "3. Compare results with and without memory-aware filtering\n", + "\n", + "**Hint:** Use the `filters` parameter in `course_manager.search_courses()`.\n", + "\n", + "### **Exercise 3: Conversation Summarization**\n", + "\n", + "Implement a function that summarizes long conversations:\n", + "1. When working memory exceeds 10 messages, summarize the conversation\n", + "2. Store the summary in long-term memory\n", + "3. Clear old messages from working memory (keep only recent 4)\n", + "4. Test that reference resolution still works with summarized history\n", + "\n", + "**Hint:** Use the LLM to generate summaries, then store as semantic memories.\n", + "\n", + "### **Exercise 4: Multi-User Memory Management**\n", + "\n", + "Create a simple CLI that:\n", + "1. Supports multiple students (different user IDs)\n", + "2. Maintains separate working memory per session\n", + "3. Maintains separate long-term memory per user\n", + "4. Demonstrates cross-session continuity for each user\n", + "\n", + "**Hint:** Use different `session_id` and `user_id` for each student.\n", + "\n", + "### **Exercise 5: Memory Search Quality**\n", + "\n", + "Experiment with long-term memory search:\n", + "1. Store 20+ diverse memories for a student\n", + "2. Try different search queries\n", + "3. Analyze which memories are retrieved\n", + "4. Adjust memory text to improve search relevance\n", + "\n", + "**Hint:** More specific memory text leads to better semantic search results.\n", + "\n", + "---\n", + "\n", + "## ๐Ÿ“ Summary\n", + "\n", + "### **What You Learned:**\n", + "\n", + "1. **The Grounding Problem** - Why agents need memory to resolve references\n", + "2. **Working Memory** - Session-scoped conversation history for continuity\n", + "3. **Long-term Memory** - Cross-session persistent knowledge for personalization\n", + "4. **Memory Integration** - Combining memory with Section 2's RAG system\n", + "5. **Complete Context Engineering** - All four context types working together\n", + "6. **Production Architecture** - Using Agent Memory Server for scalable memory\n", + "\n", + "### **What You Built:**\n", + "\n", + "- โœ… Working memory demo (multi-turn conversations)\n", + "- โœ… Long-term memory demo (persistent knowledge)\n", + "- โœ… Complete memory-enhanced RAG system\n", + "- โœ… Integration of all four context types\n", + "\n", + "### **Key Functions:**\n", + "\n", + "- `memory_enhanced_rag_query()` - Complete memory + RAG pipeline\n", + "- `working_memory_demo()` - Demonstrates conversation continuity\n", + "- `longterm_memory_demo()` - Demonstrates persistent knowledge\n", + "- `complete_demo()` - End-to-end multi-turn conversation\n", + "\n", + "### **Architecture Pattern:**\n", + "\n", + "```\n", + "User Query\n", + " โ†“\n", + "Load Working Memory (conversation history)\n", + " โ†“\n", + "Search Long-term Memory (user facts)\n", + " โ†“\n", + "RAG Search (relevant courses)\n", + " โ†“\n", + "Assemble Context (System + User + Conversation + Retrieved)\n", + " โ†“\n", + "Generate Response\n", + " โ†“\n", + "Save Working Memory (updated conversation)\n", + "```\n", + "\n", + "### **From Section 2 to Section 3:**\n", + "\n", + "**Section 2 (Stateless RAG):**\n", + "- โŒ No conversation history\n", + "- โŒ Each query independent\n", + "- โŒ Can't resolve references\n", + "- โœ… Retrieves relevant documents\n", + "\n", + "**Section 3 (Memory-Enhanced RAG):**\n", + "- โœ… Conversation history (working memory)\n", + "- โœ… Multi-turn conversations\n", + "- โœ… Reference resolution\n", + "- โœ… Persistent user knowledge (long-term memory)\n", + "- โœ… Personalization across sessions\n", + "\n", + "### **Next Steps:**\n", + "\n", + "**Section 4** will add **tools** and **agentic workflows** using **LangGraph**, completing your journey from context engineering fundamentals to production-ready AI agents.\n", + "\n", + "---\n", + "\n", + "## ๐ŸŽ‰ Congratulations!\n", + "\n", + "You've successfully built a **memory-enhanced RAG system** that:\n", + "- Remembers conversations (working memory)\n", + "- Accumulates knowledge (long-term memory)\n", + "- Resolves references naturally\n", + "- Personalizes responses\n", + "- Integrates all four context types\n", + "\n", + "**You're now ready for Section 4: Tools & Agentic Workflows!** ๐Ÿš€\n", + "\n", + "\n" + ], + "id": "fbc3258a9ccda684" + }, + { + "metadata": {}, + "cell_type": "markdown", + "source": [ + "### ๐ŸŽฏ Memory Lifecycle Best Practices\n", + "\n", + "**1. Trust Automatic Extraction**\n", + "- Agent Memory Server automatically extracts important facts\n", + "- Don't manually store everything in long-term memory\n", + "- Let the system decide what's important\n", + "\n", + "**2. Use Appropriate Memory Types**\n", + "- Working memory: Current conversation only\n", + "- Long-term memory: Facts that should persist\n", + "\n", + "**3. Monitor Memory Growth**\n", + "- Long-term memories accumulate over time\n", + "- Implement cleanup for outdated information\n", + "- Consider archiving old memories\n", + "\n", + "**4. Plan for Expiration**\n", + "- Working memory expires after 24 hours\n", + "- Important context must be in long-term memory\n", + "- Don't rely on working memory for cross-session data\n", + "\n", + "**5. Test Cross-Session Behavior**\n", + "- Verify long-term memories are accessible\n", + "- Ensure personalization works after TTL expiration\n", + "- Test with realistic time gaps\n", + "\n", + "---\n", + "\n", + "## ๐ŸŽ“ Key Takeaways\n", + "\n", + "### **1. Memory Solves the Grounding Problem**\n", + "\n", + "Without memory, agents can't resolve references:\n", + "- โŒ \"What are **its** prerequisites?\" โ†’ Agent doesn't know what \"its\" refers to\n", + "- โœ… With working memory โ†’ Agent resolves \"its\" from conversation history\n", + "\n", + "### **2. Two Types of Memory Serve Different Purposes**\n", + "\n", + "**Working Memory (Session-Scoped):**\n", + "- Conversation messages from current session\n", + "- Enables reference resolution and conversation continuity\n", + "- TTL-based (expires after session ends)\n", + "\n", + "**Long-term Memory (Cross-Session):**\n", + "- Persistent facts, preferences, goals\n", + "- Enables personalization across sessions\n", + "- Searchable via semantic vector search\n", + "\n", + "### **3. Memory Completes the Four Context Types**\n", + "\n", + "From Section 1, we learned about four context types. Memory enables two of them:\n", + "\n", + "1. **System Context** (Static) - โœ… Section 2\n", + "2. **User Context** (Dynamic, User-Specific) - โœ… Section 2 + Long-term Memory\n", + "3. **Conversation Context** (Dynamic, Session-Specific) - โœจ **Working Memory**\n", + "4. **Retrieved Context** (Dynamic, Query-Specific) - โœ… Section 2 RAG\n", + "\n", + "### **4. Memory + RAG = Complete Context Engineering**\n", + "\n", + "The integration pattern:\n", + "```\n", + "1. Load working memory (conversation history)\n", + "2. Search long-term memory (user facts)\n", + "3. RAG search (relevant documents)\n", + "4. Assemble all context types\n", + "5. Generate response\n", + "6. Save working memory (updated conversation)\n", + "```\n", + "\n", + "This gives us **stateful, personalized, context-aware conversations**.\n", + "\n", + "### **5. Agent Memory Server is Production-Ready**\n", + "\n", + "Why use Agent Memory Server instead of simple in-memory storage:\n", + "- โœ… **Scalable** - Redis-backed, handles thousands of users\n", + "- โœ… **Automatic** - Extracts important facts to long-term storage\n", + "- โœ… **Semantic search** - Vector-indexed memory retrieval\n", + "- โœ… **Deduplication** - Prevents redundant memories\n", + "- โœ… **TTL management** - Automatic expiration of old sessions\n", + "\n", + "### **6. LangChain is Sufficient for Memory + RAG**\n", + "\n", + "We didn't need LangGraph for this section because:\n", + "- Simple linear flow (load โ†’ search โ†’ generate โ†’ save)\n", + "- No conditional branching or complex state management\n", + "- No tool calling required\n", + "\n", + "**LangGraph becomes necessary in Section 4** when we add tools and multi-step workflows.\n", + "\n", + "### **7. Memory Management Best Practices**\n", + "\n", + "**Choose the Right Memory Type:**\n", + "- **Semantic** for facts and preferences (most common)\n", + "- **Episodic** for time-bound events and timeline\n", + "- **Message** for context-rich conversations (use sparingly)\n", + "\n", + "**Understand Memory Lifecycle:**\n", + "- **Working memory:** 24-hour TTL, session-scoped\n", + "- **Long-term memory:** Indefinite persistence, user-scoped\n", + "- **Automatic extraction:** Trust the system to extract important facts\n", + "\n", + "**Benefits of Proper Memory Management:**\n", + "- โœ… **Natural conversations** - Users don't repeat themselves\n", + "- โœ… **Cross-session personalization** - Knowledge persists over time\n", + "- โœ… **Efficient storage** - Automatic deduplication prevents bloat\n", + "- โœ… **Semantic search** - Find relevant memories without exact keywords\n", + "- โœ… **Scalable** - Redis-backed, production-ready architecture\n", + "\n", + "**Key Principle:** Memory transforms stateless RAG into stateful, personalized, context-aware conversations.\n", + "\n", + "---\n", + "\n", + "## ๐Ÿ’ช Practice Exercises\n", + "\n", + "### **Exercise 1: Cross-Session Personalization**\n", + "\n", + "Modify the `memory_enhanced_rag_query` function to:\n", + "1. Store user preferences in long-term memory when mentioned\n", + "2. Use those preferences in future sessions\n", + "3. Test with two different sessions for the same student\n", + "\n", + "**Hint:** Look for phrases like \"I prefer...\", \"I like...\", \"I want...\" and store them as semantic memories.\n", + "\n", + "### **Exercise 2: Memory-Aware Filtering**\n", + "\n", + "Enhance the RAG search to use long-term memories as filters:\n", + "1. Search long-term memory for preferences (format, difficulty, schedule)\n", + "2. Apply those preferences as filters to `course_manager.search_courses()`\n", + "3. Compare results with and without memory-aware filtering\n", + "\n", + "**Hint:** Use the `filters` parameter in `course_manager.search_courses()`.\n", + "\n", + "### **Exercise 3: Conversation Summarization**\n", + "\n", + "Implement a function that summarizes long conversations:\n", + "1. When working memory exceeds 10 messages, summarize the conversation\n", + "2. Store the summary in long-term memory\n", + "3. Clear old messages from working memory (keep only recent 4)\n", + "4. Test that reference resolution still works with summarized history\n", + "\n", + "**Hint:** Use the LLM to generate summaries, then store as semantic memories.\n", + "\n", + "### **Exercise 4: Multi-User Memory Management**\n", + "\n", + "Create a simple CLI that:\n", + "1. Supports multiple students (different user IDs)\n", + "2. Maintains separate working memory per session\n", + "3. Maintains separate long-term memory per user\n", + "4. Demonstrates cross-session continuity for each user\n", + "\n", + "**Hint:** Use different `session_id` and `user_id` for each student.\n", + "\n", + "### **Exercise 5: Memory Search Quality**\n", + "\n", + "Experiment with long-term memory search:\n", + "1. Store 20+ diverse memories for a student\n", + "2. Try different search queries\n", + "3. Analyze which memories are retrieved\n", + "4. Adjust memory text to improve search relevance\n", + "\n", + "**Hint:** More specific memory text leads to better semantic search results.\n", + "\n", + "---\n", + "\n", + "## ๐Ÿ“ Summary\n", + "\n", + "### **What You Learned:**\n", + "\n", + "1. **The Grounding Problem** - Why agents need memory to resolve references\n", + "2. **Working Memory** - Session-scoped conversation history for continuity\n", + "3. **Long-term Memory** - Cross-session persistent knowledge for personalization\n", + "4. **Memory Integration** - Combining memory with Section 2's RAG system\n", + "5. **Complete Context Engineering** - All four context types working together\n", + "6. **Production Architecture** - Using Agent Memory Server for scalable memory\n", + "\n", + "### **What You Built:**\n", + "\n", + "- โœ… Working memory demo (multi-turn conversations)\n", + "- โœ… Long-term memory demo (persistent knowledge)\n", + "- โœ… Complete memory-enhanced RAG system\n", + "- โœ… Integration of all four context types\n", + "\n", + "### **Key Functions:**\n", + "\n", + "- `memory_enhanced_rag_query()` - Complete memory + RAG pipeline\n", + "- `working_memory_demo()` - Demonstrates conversation continuity\n", + "- `longterm_memory_demo()` - Demonstrates persistent knowledge\n", + "- `complete_demo()` - End-to-end multi-turn conversation\n", + "\n", + "### **Architecture Pattern:**\n", + "\n", + "```\n", + "User Query\n", + " โ†“\n", + "Load Working Memory (conversation history)\n", + " โ†“\n", + "Search Long-term Memory (user facts)\n", + " โ†“\n", + "RAG Search (relevant courses)\n", + " โ†“\n", + "Assemble Context (System + User + Conversation + Retrieved)\n", + " โ†“\n", + "Generate Response\n", + " โ†“\n", + "Save Working Memory (updated conversation)\n", + "```\n", + "\n", + "### **From Section 2 to Section 3:**\n", + "\n", + "**Section 2 (Stateless RAG):**\n", + "- โŒ No conversation history\n", + "- โŒ Each query independent\n", + "- โŒ Can't resolve references\n", + "- โœ… Retrieves relevant documents\n", + "\n", + "**Section 3 (Memory-Enhanced RAG):**\n", + "- โœ… Conversation history (working memory)\n", + "- โœ… Multi-turn conversations\n", + "- โœ… Reference resolution\n", + "- โœ… Persistent user knowledge (long-term memory)\n", + "- โœ… Personalization across sessions\n", + "\n", + "### **Next Steps:**\n", + "\n", + "**Section 4** will add **tools** and **agentic workflows** using **LangGraph**, completing your journey from context engineering fundamentals to production-ready AI agents.\n", + "\n", + "---\n", + "\n", + "## ๐ŸŽ‰ Congratulations!\n", + "\n", + "You've successfully built a **memory-enhanced RAG system** that:\n", + "- Remembers conversations (working memory)\n", + "- Accumulates knowledge (long-term memory)\n", + "- Resolves references naturally\n", + "- Personalizes responses\n", + "- Integrates all four context types\n", + "\n", + "**You're now ready for Section 4: Tools & Agentic Workflows!** ๐Ÿš€\n", + "\n", + "---\n", + "\n", + "## ๐Ÿ“š Additional Resources\n", + "\n", + "- [Agent Memory Server Documentation](https://github.com/redis/agent-memory-server) - Production-ready memory management\n", + "- [Agent Memory Client](https://pypi.org/project/agent-memory-client/) - Python client for Agent Memory Server\n", + "- [RedisVL Documentation](https://redisvl.com/) - Redis Vector Library\n", + "- [LangChain Guide](https://python.langchain.com/docs/modules/memory/) - Langchain\n" + ], + "id": "1cfbf836c39f32f4" + }, + { + "metadata": {}, + "cell_type": "code", + "outputs": [], + "execution_count": null, + "source": "", + "id": "c097243c742fc33" + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.12.6" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/python-recipes/context-engineering/notebooks/section-3-memory-systems-for-context-engineering/02_combining_memory_with_retrieved_context.ipynb b/python-recipes/context-engineering/notebooks/section-3-memory-systems-for-context-engineering/02_combining_memory_with_retrieved_context.ipynb new file mode 100644 index 00000000..e5d6b0e6 --- /dev/null +++ b/python-recipes/context-engineering/notebooks/section-3-memory-systems-for-context-engineering/02_combining_memory_with_retrieved_context.ipynb @@ -0,0 +1,2868 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "9e21de5ad28ededc", + "metadata": {}, + "source": [ + "![Redis](https://redis.io/wp-content/uploads/2024/04/Logotype.svg?auto=webp&quality=85,75&width=120)\n", + "\n", + "# ๐Ÿ”— Combining Memory with Retrieved Context\n", + "\n", + "**โฑ๏ธ Estimated Time:** 60-75 minutes\n", + "\n", + "## ๐ŸŽฏ Learning Objectives\n", + "\n", + "By the end of this notebook, you will:\n", + "\n", + "1. **Build** a memory-enhanced RAG system that combines all four context types\n", + "2. **Demonstrate** the benefits of memory for natural conversations\n", + "3. **Convert** a simple RAG system into a LangGraph agent\n", + "4. **Prepare** for Section 4 (adding tools and advanced agent capabilities)\n", + "\n", + "---\n", + "\n", + "## ๐Ÿ”— Bridge from Previous Notebooks\n", + "\n", + "### **What You've Learned:**\n", + "\n", + "**Section 1:** Four Context Types\n", + "- System Context (static instructions)\n", + "- User Context (profile, preferences)\n", + "- Conversation Context (enabled by working memory)\n", + "- Retrieved Context (RAG results)\n", + "\n", + "**Section 2:** RAG Fundamentals\n", + "- Semantic search with vector embeddings\n", + "- Context assembly\n", + "- LLM generation\n", + "\n", + "**Section 3 (Notebook 1):** Memory Fundamentals\n", + "- Working memory for conversation continuity\n", + "- Long-term memory for persistent knowledge\n", + "- Memory types (semantic, episodic, message)\n", + "- Memory lifecycle and persistence\n", + "\n", + "### **What We'll Build:**\n", + "\n", + "**Part 1:** Memory-Enhanced RAG\n", + "- Integrate working memory + long-term memory + RAG\n", + "- Show clear before/after comparisons\n", + "- Demonstrate benefits of memory systems\n", + "\n", + "**Part 2:** LangGraph Agent (Separate Notebook)\n", + "- Convert memory-enhanced RAG to LangGraph agent\n", + "- Add state management and control flow\n", + "- Prepare for Section 4 (tools and advanced capabilities)\n", + "\n", + "---\n", + "\n", + "## ๐Ÿ“Š The Complete Picture\n", + "\n", + "### **Memory-Enhanced RAG Flow:**\n", + "\n", + "```\n", + "User Query\n", + " โ†“\n", + "1. Load Working Memory (conversation history)\n", + "2. Search Long-term Memory (user preferences, facts)\n", + "3. RAG Search (relevant courses)\n", + "4. Assemble Context (System + User + Conversation + Retrieved)\n", + "5. Generate Response\n", + "6. Save Working Memory (updated conversation)\n", + "```\n", + "\n", + "### **All Four Context Types Working Together:**\n", + "\n", + "| Context Type | Source | Purpose |\n", + "|-------------|--------|---------|\n", + "| **System** | Static prompt | Role, instructions, guidelines |\n", + "| **User** | Profile + Long-term Memory | Personalization, preferences |\n", + "| **Conversation** | Working Memory | Reference resolution, continuity |\n", + "| **Retrieved** | RAG Search | Relevant courses, information |\n", + "\n", + "**๐Ÿ’ก Key Insight:** Memory transforms stateless RAG into stateful, personalized conversations.\n", + "\n", + "---\n", + "\n", + "## ๐Ÿ“ฆ Setup and Environment\n", + "\n", + "Let's set up our environment with the necessary dependencies and connections. We'll build on Section 2's RAG foundation and add memory capabilities.\n", + "\n", + "### โš ๏ธ Prerequisites\n", + "\n", + "**Before running this notebook, make sure you have:**\n", + "\n", + "1. **Docker Desktop running** - Required for Redis and Agent Memory Server\n", + "\n", + "2. **Environment variables** - Create a `.env` file in the `reference-agent` directory:\n", + " ```bash\n", + " # Copy the example file\n", + " cd ../../reference-agent\n", + " cp .env.example .env\n", + "\n", + " # Edit .env and add your OpenAI API key\n", + " # OPENAI_API_KEY=your_actual_openai_api_key_here\n", + " ```\n", + "\n", + "3. **Run the setup script** - This will automatically start Redis and Agent Memory Server:\n", + " ```bash\n", + " cd ../../reference-agent\n", + " python setup_agent_memory_server.py\n", + " ```\n", + "\n", + "**Note:** The setup script will:\n", + "- โœ… Check if Docker is running\n", + "- โœ… Start Redis if not running (port 6379)\n", + "- โœ… Start Agent Memory Server if not running (port 8088)\n", + "- โœ… Verify Redis connection is working\n", + "- โœ… Handle any configuration issues automatically\n", + "\n", + "If the Memory Server is not available, the notebook will skip memory-related demos but will still run.\n" + ] + }, + { + "cell_type": "markdown", + "id": "264e6d5b346b6755", + "metadata": { + "execution": { + "iopub.execute_input": "2025-10-31T14:27:06.541458Z", + "iopub.status.busy": "2025-10-31T14:27:06.541296Z", + "iopub.status.idle": "2025-10-31T14:27:08.268475Z", + "shell.execute_reply": "2025-10-31T14:27:08.268022Z" + } + }, + "source": [ + "---\n" + ] + }, + { + "cell_type": "markdown", + "id": "dedc66a54eb849c6", + "metadata": {}, + "source": [ + "### Automated Setup Check\n", + "\n", + "Let's run the setup script to ensure all services are running properly.\n" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "id": "1cd141310064ba82", + "metadata": { + "execution": { + "iopub.execute_input": "2025-11-01T00:27:17.764993Z", + "iopub.status.busy": "2025-11-01T00:27:17.764815Z", + "iopub.status.idle": "2025-11-01T00:27:18.029343Z", + "shell.execute_reply": "2025-11-01T00:27:18.028918Z" + } + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Running automated setup check...\n", + "\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "๐Ÿ”ง Agent Memory Server Setup\n", + "===========================\n", + "๐Ÿ“Š Checking Redis...\n", + "โœ… Redis is running\n", + "๐Ÿ“Š Checking Agent Memory Server...\n", + "๐Ÿ” Agent Memory Server container exists. Checking health...\n", + "โœ… Agent Memory Server is running and healthy\n", + "โœ… No Redis connection issues detected\n", + "\n", + "โœ… Setup Complete!\n", + "=================\n", + "๐Ÿ“Š Services Status:\n", + " โ€ข Redis: Running on port 6379\n", + " โ€ข Agent Memory Server: Running on port 8088\n", + "\n", + "๐ŸŽฏ You can now run the notebooks!\n", + "\n", + "\n", + "โœ… All services are ready!\n" + ] + } + ], + "source": [ + "# Run the setup script to ensure Redis and Agent Memory Server are running\n", + "import subprocess\n", + "import sys\n", + "from pathlib import Path\n", + "\n", + "# Path to setup script\n", + "setup_script = Path(\"../../reference-agent/setup_agent_memory_server.py\")\n", + "\n", + "if setup_script.exists():\n", + " print(\"Running automated setup check...\\n\")\n", + " result = subprocess.run(\n", + " [sys.executable, str(setup_script)],\n", + " capture_output=True,\n", + " text=True\n", + " )\n", + " print(result.stdout)\n", + " if result.returncode != 0:\n", + " print(\"โš ๏ธ Setup check failed. Please review the output above.\")\n", + " print(result.stderr)\n", + " else:\n", + " print(\"\\nโœ… All services are ready!\")\n", + "else:\n", + " print(\"โš ๏ธ Setup script not found. Please ensure services are running manually.\")\n" + ] + }, + { + "cell_type": "markdown", + "id": "d221bf3835cda63e", + "metadata": {}, + "source": [ + "---\n" + ] + }, + { + "cell_type": "markdown", + "id": "18c01bfe255ff0d", + "metadata": { + "execution": { + "iopub.execute_input": "2025-10-31T14:27:08.387999Z", + "iopub.status.busy": "2025-10-31T14:27:08.387932Z", + "iopub.status.idle": "2025-10-31T14:27:19.029786Z", + "shell.execute_reply": "2025-10-31T14:27:19.029077Z" + } + }, + "source": [ + "### Install Dependencies\n", + "\n", + "If you haven't already installed the reference-agent package, uncomment and run the following:\n" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "3bb296c50e53337f", + "metadata": { + "execution": { + "iopub.execute_input": "2025-11-01T00:27:18.030745Z", + "iopub.status.busy": "2025-11-01T00:27:18.030661Z", + "iopub.status.idle": "2025-11-01T00:27:18.032432Z", + "shell.execute_reply": "2025-11-01T00:27:18.031979Z" + } + }, + "outputs": [], + "source": [ + "# Uncomment to install reference-agent package\n", + "# %pip install -q -e ../../reference-agent\n", + "\n", + "# Uncomment to install agent-memory-client\n", + "# %pip install -q agent-memory-client\n" + ] + }, + { + "cell_type": "markdown", + "id": "5577d8576496593a", + "metadata": { + "execution": { + "iopub.execute_input": "2025-10-31T14:27:19.031485Z", + "iopub.status.busy": "2025-10-31T14:27:19.031347Z", + "iopub.status.idle": "2025-10-31T14:27:19.324283Z", + "shell.execute_reply": "2025-10-31T14:27:19.323806Z" + } + }, + "source": [ + "### Load Environment Variables\n", + "\n", + "We'll load environment variables from the `.env` file in the `reference-agent` directory.\n", + "\n", + "**Required variables:**\n", + "- `OPENAI_API_KEY` - Your OpenAI API key\n", + "- `REDIS_URL` - Redis connection URL (default: redis://localhost:6379)\n", + "- `AGENT_MEMORY_URL` - Agent Memory Server URL (default: http://localhost:8088)\n", + "\n", + "If you haven't created the `.env` file yet, copy `.env.example` and add your OpenAI API key.\n" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "7f541ee37bd9e94b", + "metadata": { + "execution": { + "iopub.execute_input": "2025-11-01T00:27:18.033429Z", + "iopub.status.busy": "2025-11-01T00:27:18.033368Z", + "iopub.status.idle": "2025-11-01T00:27:18.037993Z", + "shell.execute_reply": "2025-11-01T00:27:18.037578Z" + } + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "โœ… Environment variables loaded\n", + " REDIS_URL: redis://localhost:6379\n", + " AGENT_MEMORY_URL: http://localhost:8088\n" + ] + } + ], + "source": [ + "import os\n", + "from pathlib import Path\n", + "from dotenv import load_dotenv\n", + "\n", + "# Load environment variables from reference-agent directory\n", + "env_path = Path(\"../../reference-agent/.env\")\n", + "load_dotenv(dotenv_path=env_path)\n", + "\n", + "# Verify required environment variables\n", + "OPENAI_API_KEY = os.getenv(\"OPENAI_API_KEY\")\n", + "REDIS_URL = os.getenv(\"REDIS_URL\", \"redis://localhost:6379\")\n", + "AGENT_MEMORY_URL = os.getenv(\"AGENT_MEMORY_URL\", \"http://localhost:8088\")\n", + "\n", + "if not OPENAI_API_KEY:\n", + " print(f\"\"\"โŒ OPENAI_API_KEY not found!\n", + "\n", + " Please create a .env file at: {env_path.absolute()}\n", + "\n", + " With the following content:\n", + " OPENAI_API_KEY=your_openai_api_key\n", + " REDIS_URL=redis://localhost:6379\n", + " AGENT_MEMORY_URL=http://localhost:8088\n", + " \"\"\")\n", + "else:\n", + " print(\"โœ… Environment variables loaded\")\n", + " print(f\" REDIS_URL: {REDIS_URL}\")\n", + " print(f\" AGENT_MEMORY_URL: {AGENT_MEMORY_URL}\")\n" + ] + }, + { + "cell_type": "markdown", + "id": "ff97c53e10f44716", + "metadata": {}, + "source": [ + "### Import Core Libraries\n", + "\n", + "We'll import standard Python libraries and async support for our memory operations.\n" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "1a4fabcf00d1fdda", + "metadata": { + "execution": { + "iopub.execute_input": "2025-11-01T00:27:18.039065Z", + "iopub.status.busy": "2025-11-01T00:27:18.038983Z", + "iopub.status.idle": "2025-11-01T00:27:18.040811Z", + "shell.execute_reply": "2025-11-01T00:27:18.040433Z" + } + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "โœ… Core libraries imported\n" + ] + } + ], + "source": [ + "import sys\n", + "import asyncio\n", + "from typing import List, Dict, Any, Optional\n", + "from datetime import datetime\n", + "\n", + "print(\"โœ… Core libraries imported\")\n" + ] + }, + { + "cell_type": "markdown", + "id": "d8b6cc99aac5193e", + "metadata": {}, + "source": [ + "### Import Section 2 Components\n", + "\n", + "We're building on Section 2's RAG foundation, so we'll reuse the same components:\n", + "- `redis_config` - Redis connection and configuration\n", + "- `CourseManager` - Course search and management\n", + "- `StudentProfile` and other models - Data structures\n" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "87f84446a6969a31", + "metadata": { + "execution": { + "iopub.execute_input": "2025-11-01T00:27:18.041957Z", + "iopub.status.busy": "2025-11-01T00:27:18.041897Z", + "iopub.status.idle": "2025-11-01T00:27:19.877250Z", + "shell.execute_reply": "2025-11-01T00:27:19.876796Z" + } + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "โœ… Section 2 components imported\n", + " CourseManager: Available\n", + " Redis Config: Available\n", + " Models: Course, StudentProfile, etc.\n" + ] + } + ], + "source": [ + "# Import Section 2 components from reference-agent\n", + "from redis_context_course.redis_config import redis_config\n", + "from redis_context_course.course_manager import CourseManager\n", + "from redis_context_course.models import (\n", + " Course, StudentProfile, DifficultyLevel,\n", + " CourseFormat, Semester\n", + ")\n", + "\n", + "print(\"โœ… Section 2 components imported\")\n", + "print(f\" CourseManager: Available\")\n", + "print(f\" Redis Config: Available\")\n", + "print(f\" Models: Course, StudentProfile, etc.\")\n" + ] + }, + { + "cell_type": "markdown", + "id": "8c9c424c857e0b63", + "metadata": {}, + "source": [ + "### Import LangChain Components\n", + "\n", + "We'll use LangChain for LLM interaction and message handling.\n" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "id": "17f591bf327805dd", + "metadata": { + "execution": { + "iopub.execute_input": "2025-11-01T00:27:19.878588Z", + "iopub.status.busy": "2025-11-01T00:27:19.878455Z", + "iopub.status.idle": "2025-11-01T00:27:19.880496Z", + "shell.execute_reply": "2025-11-01T00:27:19.880090Z" + } + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "โœ… LangChain components imported\n", + " ChatOpenAI: Available\n", + " Message types: HumanMessage, SystemMessage, AIMessage\n" + ] + } + ], + "source": [ + "from langchain_openai import ChatOpenAI\n", + "from langchain_core.messages import HumanMessage, SystemMessage, AIMessage\n", + "\n", + "print(\"โœ… LangChain components imported\")\n", + "print(f\" ChatOpenAI: Available\")\n", + "print(f\" Message types: HumanMessage, SystemMessage, AIMessage\")\n" + ] + }, + { + "cell_type": "markdown", + "id": "b8a129328fb75fc3", + "metadata": {}, + "source": [ + "### Import Agent Memory Server Client\n", + "\n", + "The Agent Memory Server provides production-ready memory management. If it's not available, we'll note that and continue with limited functionality.\n" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "id": "8e19c1f57084b6b1", + "metadata": { + "execution": { + "iopub.execute_input": "2025-11-01T00:27:19.881595Z", + "iopub.status.busy": "2025-11-01T00:27:19.881517Z", + "iopub.status.idle": "2025-11-01T00:27:19.883567Z", + "shell.execute_reply": "2025-11-01T00:27:19.883183Z" + } + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "โœ… Agent Memory Server client available\n", + " MemoryAPIClient: Ready\n", + " Memory models: WorkingMemory, MemoryMessage, ClientMemoryRecord\n" + ] + } + ], + "source": [ + "# Import Agent Memory Server client\n", + "try:\n", + " from agent_memory_client import MemoryAPIClient, MemoryClientConfig\n", + " from agent_memory_client.models import WorkingMemory, MemoryMessage, ClientMemoryRecord\n", + " MEMORY_SERVER_AVAILABLE = True\n", + " print(\"โœ… Agent Memory Server client available\")\n", + " print(\" MemoryAPIClient: Ready\")\n", + " print(\" Memory models: WorkingMemory, MemoryMessage, ClientMemoryRecord\")\n", + "except ImportError:\n", + " MEMORY_SERVER_AVAILABLE = False\n", + " print(\"โš ๏ธ Agent Memory Server not available\")\n", + " print(\" Install with: pip install agent-memory-client\")\n", + " print(\" Start server: See reference-agent/README.md\")\n", + " print(\" Note: Some demos will be skipped\")\n" + ] + }, + { + "cell_type": "markdown", + "id": "773c7b6a987f3977", + "metadata": {}, + "source": [ + "### Environment Summary\n", + "\n", + "Let's verify everything is set up correctly.\n" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "id": "193e3a1353afb7b0", + "metadata": { + "execution": { + "iopub.execute_input": "2025-11-01T00:27:19.884663Z", + "iopub.status.busy": "2025-11-01T00:27:19.884594Z", + "iopub.status.idle": "2025-11-01T00:27:19.886746Z", + "shell.execute_reply": "2025-11-01T00:27:19.886380Z" + } + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "================================================================================\n", + "๐Ÿ”ง ENVIRONMENT SETUP SUMMARY\n", + "================================================================================\n", + "\n", + "โœ… Core Libraries: Imported\n", + "โœ… Section 2 Components: Imported\n", + "โœ… LangChain: Imported\n", + "โœ… Agent Memory Server: Available\n", + "\n", + "๐Ÿ“‹ Configuration:\n", + " OPENAI_API_KEY: โœ“ Set\n", + " REDIS_URL: redis://localhost:6379\n", + " AGENT_MEMORY_URL: http://localhost:8088\n", + "================================================================================\n" + ] + } + ], + "source": [ + "print(\"=\" * 80)\n", + "print(\"๐Ÿ”ง ENVIRONMENT SETUP SUMMARY\")\n", + "print(\"=\" * 80)\n", + "print(f\"\\nโœ… Core Libraries: Imported\")\n", + "print(f\"โœ… Section 2 Components: Imported\")\n", + "print(f\"โœ… LangChain: Imported\")\n", + "print(f\"{'โœ…' if MEMORY_SERVER_AVAILABLE else 'โš ๏ธ '} Agent Memory Server: {'Available' if MEMORY_SERVER_AVAILABLE else 'Not Available'}\")\n", + "print(f\"\\n๐Ÿ“‹ Configuration:\")\n", + "print(f\" OPENAI_API_KEY: {'โœ“ Set' if OPENAI_API_KEY else 'โœ— Not set'}\")\n", + "print(f\" REDIS_URL: {REDIS_URL}\")\n", + "print(f\" AGENT_MEMORY_URL: {AGENT_MEMORY_URL}\")\n", + "print(\"=\" * 80)\n" + ] + }, + { + "cell_type": "markdown", + "id": "83febaebad1682ec", + "metadata": {}, + "source": [ + "---\n", + "\n", + "## ๐Ÿ”ง Initialize Components\n", + "\n", + "Now let's initialize the components we'll use throughout this notebook.\n" + ] + }, + { + "cell_type": "markdown", + "id": "3fbbea50ae1ff08b", + "metadata": {}, + "source": [ + "### Initialize Course Manager\n", + "\n", + "The `CourseManager` handles course search and retrieval, just like in Section 2.\n" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "id": "236f04d3923aa764", + "metadata": { + "execution": { + "iopub.execute_input": "2025-11-01T00:27:19.887824Z", + "iopub.status.busy": "2025-11-01T00:27:19.887753Z", + "iopub.status.idle": "2025-11-01T00:27:19.989460Z", + "shell.execute_reply": "2025-11-01T00:27:19.989016Z" + } + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "20:27:19 redisvl.index.index INFO Index already exists, not overwriting.\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "โœ… Course Manager initialized\n", + " Ready to search and retrieve courses\n" + ] + } + ], + "source": [ + "# Initialize Course Manager\n", + "course_manager = CourseManager()\n", + "\n", + "print(\"โœ… Course Manager initialized\")\n", + "print(\" Ready to search and retrieve courses\")\n" + ] + }, + { + "cell_type": "markdown", + "id": "61c5f50d1886133e", + "metadata": {}, + "source": [ + "### Initialize LLM\n", + "\n", + "We'll use GPT-4o with temperature=0.0 for consistent, deterministic responses.\n" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "id": "bad8a7d2061efec7", + "metadata": { + "execution": { + "iopub.execute_input": "2025-11-01T00:27:19.990596Z", + "iopub.status.busy": "2025-11-01T00:27:19.990528Z", + "iopub.status.idle": "2025-11-01T00:27:20.000701Z", + "shell.execute_reply": "2025-11-01T00:27:20.000395Z" + } + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "โœ… LLM initialized\n", + " Model: gpt-4o\n", + " Temperature: 0.0 (deterministic)\n" + ] + } + ], + "source": [ + "# Initialize LLM\n", + "llm = ChatOpenAI(model=\"gpt-4o\", temperature=0.0)\n", + "\n", + "print(\"โœ… LLM initialized\")\n", + "print(\" Model: gpt-4o\")\n", + "print(\" Temperature: 0.0 (deterministic)\")\n" + ] + }, + { + "cell_type": "markdown", + "id": "2e60063cef6b46a8", + "metadata": {}, + "source": [ + "### Initialize Memory Client\n", + "\n", + "If the Agent Memory Server is available, we'll initialize the memory client. This client handles both working memory (conversation history) and long-term memory (persistent facts).\n" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "id": "514603f5fdcf043a", + "metadata": { + "execution": { + "iopub.execute_input": "2025-11-01T00:27:20.001775Z", + "iopub.status.busy": "2025-11-01T00:27:20.001714Z", + "iopub.status.idle": "2025-11-01T00:27:20.006713Z", + "shell.execute_reply": "2025-11-01T00:27:20.006379Z" + } + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "โœ… Memory Client initialized\n", + " Base URL: http://localhost:8088\n", + " Namespace: redis_university\n", + " Ready for working memory and long-term memory operations\n" + ] + } + ], + "source": [ + "# Initialize Memory Client\n", + "if MEMORY_SERVER_AVAILABLE:\n", + " config = MemoryClientConfig(\n", + " base_url=AGENT_MEMORY_URL,\n", + " default_namespace=\"redis_university\"\n", + " )\n", + " memory_client = MemoryAPIClient(config=config)\n", + " print(\"โœ… Memory Client initialized\")\n", + " print(f\" Base URL: {config.base_url}\")\n", + " print(f\" Namespace: {config.default_namespace}\")\n", + " print(\" Ready for working memory and long-term memory operations\")\n", + "else:\n", + " memory_client = None\n", + " print(\"โš ๏ธ Memory Server not available\")\n", + " print(\" Running with limited functionality\")\n", + " print(\" Some demos will be skipped\")\n" + ] + }, + { + "cell_type": "markdown", + "id": "8bec158470f51831", + "metadata": {}, + "source": [ + "### Create Sample Student Profile\n", + "\n", + "We'll create a sample student profile to use throughout our demos. This follows the same pattern from Section 2.\n" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "id": "907614be8182a320", + "metadata": { + "execution": { + "iopub.execute_input": "2025-11-01T00:27:20.007962Z", + "iopub.status.busy": "2025-11-01T00:27:20.007884Z", + "iopub.status.idle": "2025-11-01T00:27:20.010136Z", + "shell.execute_reply": "2025-11-01T00:27:20.009767Z" + } + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "โœ… Student profile created\n", + " Name: Sarah Chen\n", + " Major: Computer Science\n", + " Year: 2\n", + " Interests: machine learning, data science, algorithms\n", + " Completed: Introduction to Programming, Data Structures\n", + " Preferred Format: online\n" + ] + } + ], + "source": [ + "# Create sample student profile\n", + "sarah = StudentProfile(\n", + " name=\"Sarah Chen\",\n", + " email=\"sarah.chen@university.edu\",\n", + " major=\"Computer Science\",\n", + " year=2,\n", + " interests=[\"machine learning\", \"data science\", \"algorithms\"],\n", + " completed_courses=[\"Introduction to Programming\", \"Data Structures\"],\n", + " current_courses=[\"Linear Algebra\"],\n", + " preferred_format=CourseFormat.ONLINE,\n", + " preferred_difficulty=DifficultyLevel.INTERMEDIATE\n", + ")\n", + "\n", + "print(\"โœ… Student profile created\")\n", + "print(f\" Name: {sarah.name}\")\n", + "print(f\" Major: {sarah.major}\")\n", + "print(f\" Year: {sarah.year}\")\n", + "print(f\" Interests: {', '.join(sarah.interests)}\")\n", + "print(f\" Completed: {', '.join(sarah.completed_courses)}\")\n", + "print(f\" Preferred Format: {sarah.preferred_format.value}\")\n" + ] + }, + { + "cell_type": "markdown", + "id": "9603e9dd9cf82e45", + "metadata": {}, + "source": [ + "### ๐Ÿ’ก Key Insight\n", + "\n", + "We're reusing:\n", + "- โœ… **Same `CourseManager`** from Section 2\n", + "- โœ… **Same `StudentProfile`** model\n", + "- โœ… **Same Redis configuration**\n", + "\n", + "We're adding:\n", + "- โœจ **Memory Client** for conversation history\n", + "- โœจ **Working Memory** for session context\n", + "- โœจ **Long-term Memory** for persistent knowledge\n", + "\n", + "---\n", + "\n", + "## ๐Ÿ“š Part 1: Memory-Enhanced RAG\n", + "\n", + "### **Goal:** Build a simple, inline memory-enhanced RAG system that demonstrates the benefits of memory.\n", + "\n", + "### **Approach:**\n", + "- Start with Section 2's stateless RAG\n", + "- Add working memory for conversation continuity\n", + "- Add long-term memory for personalization\n", + "- Show clear before/after comparisons\n", + "\n", + "---\n", + "\n", + "## ๐Ÿšซ Before: Stateless RAG (Section 2 Approach)\n", + "\n", + "Let's first recall how Section 2's stateless RAG worked, and see its limitations.\n" + ] + }, + { + "cell_type": "markdown", + "id": "abd9aaee3e7f7805", + "metadata": {}, + "source": [ + "### Query 1: Initial query (works fine)\n" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "id": "336f4f8e806ff089", + "metadata": { + "execution": { + "iopub.execute_input": "2025-11-01T00:27:20.011486Z", + "iopub.status.busy": "2025-11-01T00:27:20.011419Z", + "iopub.status.idle": "2025-11-01T00:27:22.018311Z", + "shell.execute_reply": "2025-11-01T00:27:22.017163Z" + } + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "================================================================================\n", + "๐Ÿšซ STATELESS RAG DEMO\n", + "================================================================================\n", + "\n", + "๐Ÿ‘ค User: I'm interested in machine learning courses\n", + "\n", + "\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "20:27:20 httpx INFO HTTP Request: POST https://api.openai.com/v1/embeddings \"HTTP/1.1 200 OK\"\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "20:27:22 httpx INFO HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "๐Ÿค– Agent: Based on your interest in machine learning and your background in computer science, I recommend the \"Machine Learning\" course. This course will introduce you to machine learning algorithms and applications, including supervised and unsupervised learning and neural networks. Please note that this course is advanced, so it would be beneficial to ensure you're comfortable with the foundational concepts before enrolling. Additionally, the \"Linear Algebra\" course is highly recommended as it provides essential mathematical foundations that are crucial for understanding many machine learning algorithms.\n" + ] + } + ], + "source": [ + "print(\"=\" * 80)\n", + "print(\"๐Ÿšซ STATELESS RAG DEMO\")\n", + "print(\"=\" * 80)\n", + "\n", + "stateless_query_1 = \"I'm interested in machine learning courses\"\n", + "print(f\"\\n๐Ÿ‘ค User: {stateless_query_1}\\n\\n\")\n", + "\n", + "# Search courses\n", + "stateless_courses_1 = await course_manager.search_courses(stateless_query_1, limit=3)\n", + "\n", + "# Assemble context (System + User + Retrieved only - NO conversation history)\n", + "stateless_system_prompt = \"\"\"You are a Redis University course advisor.\n", + "\n", + "CRITICAL RULES:\n", + "- ONLY discuss and recommend courses from the \"Relevant Courses\" list provided below\n", + "- Do NOT mention, suggest, or make up any courses that are not in the provided list\n", + "- If the available courses don't perfectly match the request, recommend the best options from what IS available\"\"\"\n", + "\n", + "stateless_user_context = f\"\"\"Student: {sarah.name}\n", + "Major: {sarah.major}\n", + "Interests: {', '.join(sarah.interests)}\n", + "Completed: {', '.join(sarah.completed_courses)}\n", + "\"\"\"\n", + "\n", + "stateless_retrieved_context = \"Relevant Courses:\\n\"\n", + "for i, course in enumerate(stateless_courses_1, 1):\n", + " stateless_retrieved_context += f\"\\n{i}. {course.title}\"\n", + " stateless_retrieved_context += f\"\\n Description: {course.description}\"\n", + " stateless_retrieved_context += f\"\\n Difficulty: {course.difficulty_level.value}\"\n", + "\n", + "# Generate response\n", + "stateless_messages_1 = [\n", + " SystemMessage(content=stateless_system_prompt),\n", + " HumanMessage(content=f\"{stateless_user_context}\\n\\n{stateless_retrieved_context}\\n\\nQuery: {stateless_query_1}\")\n", + "]\n", + "\n", + "stateless_response_1 = llm.invoke(stateless_messages_1).content\n", + "print(f\"\\n๐Ÿค– Agent: {stateless_response_1}\")\n", + "\n", + "# โŒ No conversation history stored\n", + "# โŒ Next query won't remember this interaction\n" + ] + }, + { + "cell_type": "markdown", + "id": "b0e5f16248ede0b2", + "metadata": {}, + "source": [ + "### Query 2: Follow-up with pronoun reference (fails)\n", + "\n", + "Now let's try a follow-up that requires conversation history.\n" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "id": "be6391be25ebb1b9", + "metadata": { + "execution": { + "iopub.execute_input": "2025-11-01T00:27:22.020579Z", + "iopub.status.busy": "2025-11-01T00:27:22.020410Z", + "iopub.status.idle": "2025-11-01T00:27:25.085660Z", + "shell.execute_reply": "2025-11-01T00:27:25.084690Z" + } + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "๐Ÿ‘ค User: What are the prerequisites for the first one?\n", + " Note: 'the first one' refers to the first course from Query 1\n", + "\n", + "\n", + "20:27:22 httpx INFO HTTP Request: POST https://api.openai.com/v1/embeddings \"HTTP/1.1 200 OK\"\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "20:27:25 httpx INFO HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "๐Ÿค– Agent: I apologize for the confusion, but it seems there is a repetition in the course listings provided. Unfortunately, I don't have specific information on the prerequisites for the \"Calculus I\" course. However, typically, a solid understanding of pre-calculus topics such as algebra and trigonometry is expected before taking Calculus I. If you are interested in courses related to machine learning, data science, or algorithms, I recommend checking with your academic advisor for more suitable courses that align with your interests and completed coursework.\n", + "\n", + "โŒ Agent can't resolve 'the first one' - no conversation history!\n" + ] + } + ], + "source": [ + "stateless_query_2 = \"What are the prerequisites for the first one?\"\n", + "print(f\"๐Ÿ‘ค User: {stateless_query_2}\")\n", + "print(f\" Note: 'the first one' refers to the first course from Query 1\\n\\n\")\n", + "\n", + "# Search courses (will search for \"prerequisites first one\" - not helpful)\n", + "stateless_courses_2 = await course_manager.search_courses(stateless_query_2, limit=3)\n", + "\n", + "# Assemble context (NO conversation history from Query 1)\n", + "stateless_retrieved_context_2 = \"Relevant Courses:\\n\"\n", + "for i, course in enumerate(stateless_courses_2, 1):\n", + " stateless_retrieved_context_2 += f\"\\n{i}. {course.title}\"\n", + " stateless_retrieved_context_2 += f\"\\n Description: {course.description}\"\n", + " stateless_retrieved_context_2 += f\"\\n Difficulty: {course.difficulty_level.value}\"\n", + "\n", + "# Generate response\n", + "stateless_messages_2 = [\n", + " SystemMessage(content=stateless_system_prompt),\n", + " HumanMessage(content=f\"{stateless_user_context}\\n\\n{stateless_retrieved_context_2}\\n\\nQuery: {stateless_query_2}\")\n", + "]\n", + "\n", + "stateless_response_2 = llm.invoke(stateless_messages_2).content\n", + "print(f\"\\n๐Ÿค– Agent: {stateless_response_2}\")\n", + "print(\"\\nโŒ Agent can't resolve 'the first one' - no conversation history!\")\n" + ] + }, + { + "cell_type": "markdown", + "id": "7495edbb86ca8989", + "metadata": {}, + "source": [ + "\n", + "\n", + "### ๐ŸŽฏ What Just Happened?\n", + "\n", + "**Query 1:** \"I'm interested in machine learning courses\"\n", + "- โœ… Works fine - searches and returns ML courses\n", + "\n", + "**Query 2:** \"What are the prerequisites for **the first one**?\"\n", + "- โŒ **Fails** - Agent doesn't know what \"the first one\" refers to\n", + "- โŒ No conversation history stored\n", + "- โŒ Each query is completely independent\n", + "\n", + "**The Problem:** Natural conversation requires context from previous turns.\n", + "\n", + "---\n", + "\n", + "## โœ… After: Memory-Enhanced RAG\n", + "\n", + "Now let's add memory to enable natural conversations.\n", + "\n", + "### **Step 1: Load Working Memory**\n", + "\n", + "Working memory stores conversation history for the current session.\n" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "id": "2306e6cdcf19fcdb", + "metadata": { + "execution": { + "iopub.execute_input": "2025-11-01T00:27:25.088413Z", + "iopub.status.busy": "2025-11-01T00:27:25.088145Z", + "iopub.status.idle": "2025-11-01T00:27:25.106561Z", + "shell.execute_reply": "2025-11-01T00:27:25.105876Z" + } + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "20:27:25 httpx INFO HTTP Request: GET http://localhost:8088/v1/working-memory/demo_session_001?user_id=sarah.chen&namespace=redis_university&model_name=gpt-4o \"HTTP/1.1 200 OK\"\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "โœ… Loaded working memory for session: demo_session_001\n", + " Messages: 12\n" + ] + } + ], + "source": [ + "# Set up session and student identifiers\n", + "session_id = \"demo_session_001\"\n", + "student_id = sarah.email.split('@')[0]\n", + "\n", + "# Load working memory\n", + "if MEMORY_SERVER_AVAILABLE:\n", + " _, working_memory = await memory_client.get_or_create_working_memory(\n", + " session_id=session_id,\n", + " user_id=student_id,\n", + " model_name=\"gpt-4o\"\n", + " )\n", + "\n", + " print(f\"โœ… Loaded working memory for session: {session_id}\")\n", + " print(f\" Messages: {len(working_memory.messages)}\")\n", + "else:\n", + " print(\"โš ๏ธ Memory Server not available\")\n" + ] + }, + { + "cell_type": "markdown", + "id": "eeaeb0a04fb2b00b", + "metadata": {}, + "source": [ + "### ๐ŸŽฏ What We Just Did\n", + "\n", + "**Loaded Working Memory:**\n", + "- Created or retrieved conversation history for this session\n", + "- Session ID: `demo_session_001` (unique per conversation)\n", + "- User ID: `sarah_chen` (from student email)\n", + "\n", + "**Why This Matters:**\n", + "- Working memory persists across turns in the same session\n", + "- Enables reference resolution (\"it\", \"that course\", \"the first one\")\n", + "- Conversation context is maintained\n", + "\n", + "---\n", + "\n", + "### **Step 2: Search Long-term Memory**\n", + "\n", + "Long-term memory stores persistent facts and preferences across sessions.\n" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "id": "a07e0aefe7250bf9", + "metadata": { + "execution": { + "iopub.execute_input": "2025-11-01T00:27:25.108634Z", + "iopub.status.busy": "2025-11-01T00:27:25.108443Z", + "iopub.status.idle": "2025-11-01T00:27:25.293292Z", + "shell.execute_reply": "2025-11-01T00:27:25.292432Z" + } + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "20:27:25 httpx INFO HTTP Request: POST http://localhost:8088/v1/long-term-memory/search?optimize_query=false \"HTTP/1.1 200 OK\"\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "๐Ÿ” Query: 'What does the student prefer?'\n", + "๐Ÿ“š Found 5 relevant memories:\n", + " 1. User prefers online and intermediate-level courses\n", + " 2. User prefers online and intermediate-level courses.\n", + " 3. User prefers intermediate-level courses.\n", + " 4. User prefers intermediate-level courses.\n", + " 5. User prefers intermediate-level courses available in an online format\n" + ] + } + ], + "source": [ + "# Search long-term memory\n", + "longterm_query = \"What does the student prefer?\"\n", + "\n", + "if MEMORY_SERVER_AVAILABLE:\n", + " from agent_memory_client.filters import UserId\n", + "\n", + " longterm_results = await memory_client.search_long_term_memory(\n", + " text=longterm_query,\n", + " user_id=UserId(eq=student_id),\n", + " limit=5\n", + " )\n", + "\n", + " longterm_memories = [m.text for m in longterm_results.memories] if longterm_results.memories else []\n", + "\n", + " print(f\"๐Ÿ” Query: '{longterm_query}'\")\n", + " print(f\"๐Ÿ“š Found {len(longterm_memories)} relevant memories:\")\n", + " for i, memory in enumerate(longterm_memories, 1):\n", + " print(f\" {i}. {memory}\")\n", + "else:\n", + " longterm_memories = []\n", + " print(\"โš ๏ธ Memory Server not available\")\n" + ] + }, + { + "cell_type": "markdown", + "id": "9fb3cb7ac45a690b", + "metadata": {}, + "source": [ + "### ๐ŸŽฏ What We Just Did\n", + "\n", + "**Searched Long-term Memory:**\n", + "- Used semantic search to find relevant facts\n", + "- Query: \"What does the student prefer?\"\n", + "- Results: Memories about preferences, goals, academic info\n", + "\n", + "**Why This Matters:**\n", + "- Long-term memory enables personalization\n", + "- Facts persist across sessions (days, weeks, months)\n", + "- Semantic search finds relevant memories without exact keyword matching\n", + "\n", + "---\n", + "\n", + "### **Step 3: Assemble All Four Context Types**\n", + "\n", + "Now let's combine everything: System + User + Conversation + Retrieved.\n" + ] + }, + { + "cell_type": "markdown", + "id": "e5dd1140f19fa2e", + "metadata": {}, + "source": [ + "#### 3.1: System Context (static)\n" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "id": "5a97ccafff01934d", + "metadata": { + "execution": { + "iopub.execute_input": "2025-11-01T00:27:25.295598Z", + "iopub.status.busy": "2025-11-01T00:27:25.295414Z", + "iopub.status.idle": "2025-11-01T00:27:25.298689Z", + "shell.execute_reply": "2025-11-01T00:27:25.298190Z" + } + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "โœ… System Context created\n", + " Length: 927 chars\n" + ] + } + ], + "source": [ + "# 1. System Context (static)\n", + "context_system_prompt = \"\"\"You are a Redis University course advisor.\n", + "\n", + "Your role:\n", + "- Help students find and enroll in courses from our catalog\n", + "- Provide personalized recommendations based on available courses\n", + "- Answer questions about courses, prerequisites, schedules\n", + "\n", + "CRITICAL RULES - READ CAREFULLY:\n", + "- You can ONLY recommend courses that appear in the \"Relevant Courses\" list below\n", + "- Do NOT suggest courses that are not in the \"Relevant Courses\" list\n", + "- Do NOT say things like \"you might want to consider X course\" if X is not in the list\n", + "- Do NOT mention courses from other platforms or external resources\n", + "- If the available courses don't perfectly match the request, recommend the best options from what IS in the list\n", + "- Use conversation history to resolve references (\"it\", \"that course\", \"the first one\")\n", + "- Use long-term memories to personalize your recommendations\n", + "- Be helpful, supportive, and encouraging while staying within the available courses\"\"\"\n", + "\n", + "print(\"โœ… System Context created\")\n", + "print(f\" Length: {len(context_system_prompt)} chars\")\n" + ] + }, + { + "cell_type": "markdown", + "id": "53c82066a191acc9", + "metadata": {}, + "source": [ + "#### 3.2: User Context (profile + long-term memories)\n" + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "id": "f526b51861566d13", + "metadata": { + "execution": { + "iopub.execute_input": "2025-11-01T00:27:25.300701Z", + "iopub.status.busy": "2025-11-01T00:27:25.300572Z", + "iopub.status.idle": "2025-11-01T00:27:25.424094Z", + "shell.execute_reply": "2025-11-01T00:27:25.423279Z" + } + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "20:27:25 httpx INFO HTTP Request: POST http://localhost:8088/v1/long-term-memory/search?optimize_query=false \"HTTP/1.1 200 OK\"\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "โœ… User Context created\n", + " Length: 595 chars\n" + ] + } + ], + "source": [ + "# 2. User Context (profile + long-term memories)\n", + "context_user_context = f\"\"\"Student Profile:\n", + "- Name: {sarah.name}\n", + "- Major: {sarah.major}\n", + "- Year: {sarah.year}\n", + "- Interests: {', '.join(sarah.interests)}\n", + "- Completed: {', '.join(sarah.completed_courses)}\n", + "- Current: {', '.join(sarah.current_courses)}\n", + "- Preferred Format: {sarah.preferred_format.value}\n", + "- Preferred Difficulty: {sarah.preferred_difficulty.value}\"\"\"\n", + "\n", + "# Search long-term memory for this query\n", + "context_query = \"machine learning courses\"\n", + "\n", + "if MEMORY_SERVER_AVAILABLE:\n", + " from agent_memory_client.filters import UserId\n", + "\n", + " context_longterm_results = await memory_client.search_long_term_memory(\n", + " text=context_query,\n", + " user_id=UserId(eq=student_id),\n", + " limit=5\n", + " )\n", + " context_longterm_memories = [m.text for m in context_longterm_results.memories] if context_longterm_results.memories else []\n", + "\n", + " if context_longterm_memories:\n", + " context_user_context += f\"\\n\\nLong-term Memories:\\n\" + \"\\n\".join([f\"- {m}\" for m in context_longterm_memories])\n", + "\n", + "print(\"โœ… User Context created\")\n", + "print(f\" Length: {len(context_user_context)} chars\")\n" + ] + }, + { + "cell_type": "markdown", + "id": "d7d4b7343d483871", + "metadata": {}, + "source": [ + "#### 3.3: Conversation Context (working memory)\n" + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "id": "c74eae47e96155df", + "metadata": { + "execution": { + "iopub.execute_input": "2025-11-01T00:27:25.426197Z", + "iopub.status.busy": "2025-11-01T00:27:25.426043Z", + "iopub.status.idle": "2025-11-01T00:27:25.435978Z", + "shell.execute_reply": "2025-11-01T00:27:25.435520Z" + } + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "20:27:25 httpx INFO HTTP Request: GET http://localhost:8088/v1/working-memory/demo_session_001?user_id=sarah.chen&namespace=redis_university&model_name=gpt-4o \"HTTP/1.1 200 OK\"\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "โœ… Conversation Context loaded\n", + " Messages: 12\n" + ] + } + ], + "source": [ + "# 3. Conversation Context (working memory)\n", + "if MEMORY_SERVER_AVAILABLE:\n", + " _, context_working_memory = await memory_client.get_or_create_working_memory(\n", + " session_id=session_id,\n", + " user_id=student_id,\n", + " model_name=\"gpt-4o\"\n", + " )\n", + "\n", + " context_conversation_messages = []\n", + " for msg in context_working_memory.messages:\n", + " if msg.role == \"user\":\n", + " context_conversation_messages.append(HumanMessage(content=msg.content))\n", + " elif msg.role == \"assistant\":\n", + " context_conversation_messages.append(AIMessage(content=msg.content))\n", + "\n", + " print(\"โœ… Conversation Context loaded\")\n", + " print(f\" Messages: {len(context_conversation_messages)}\")\n", + "else:\n", + " context_conversation_messages = []\n" + ] + }, + { + "cell_type": "markdown", + "id": "ef065750cd38f76b", + "metadata": {}, + "source": [ + "#### 3.4: Retrieved Context (RAG)\n" + ] + }, + { + "cell_type": "code", + "execution_count": 20, + "id": "cdd97d65955272e7", + "metadata": { + "execution": { + "iopub.execute_input": "2025-11-01T00:27:25.437959Z", + "iopub.status.busy": "2025-11-01T00:27:25.437800Z", + "iopub.status.idle": "2025-11-01T00:27:25.563286Z", + "shell.execute_reply": "2025-11-01T00:27:25.562552Z" + } + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "20:27:25 httpx INFO HTTP Request: POST https://api.openai.com/v1/embeddings \"HTTP/1.1 200 OK\"\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "โœ… Retrieved Context created\n", + " Length: 662 chars\n" + ] + } + ], + "source": [ + "# 4. Retrieved Context (RAG)\n", + "context_courses = await course_manager.search_courses(context_query, limit=3)\n", + "\n", + "context_retrieved_context = \"Relevant Courses:\\n\"\n", + "for i, course in enumerate(context_courses, 1):\n", + " context_retrieved_context += f\"\\n{i}. {course.title}\"\n", + " context_retrieved_context += f\"\\n Description: {course.description}\"\n", + " context_retrieved_context += f\"\\n Difficulty: {course.difficulty_level.value}\"\n", + " context_retrieved_context += f\"\\n Format: {course.format.value}\"\n", + " if course.prerequisites:\n", + " prereq_names = [p.course_title for p in course.prerequisites]\n", + " context_retrieved_context += f\"\\n Prerequisites: {', '.join(prereq_names)}\"\n", + "\n", + "print(\"โœ… Retrieved Context created\")\n", + "print(f\" Length: {len(context_retrieved_context)} chars\")\n" + ] + }, + { + "cell_type": "markdown", + "id": "3b0cc30ca49faa54", + "metadata": {}, + "source": [ + "#### Summary: All Four Context Types\n" + ] + }, + { + "cell_type": "code", + "execution_count": 21, + "id": "1cbf570051f9b121", + "metadata": { + "execution": { + "iopub.execute_input": "2025-11-01T00:27:25.565541Z", + "iopub.status.busy": "2025-11-01T00:27:25.565350Z", + "iopub.status.idle": "2025-11-01T00:27:25.568659Z", + "shell.execute_reply": "2025-11-01T00:27:25.568034Z" + } + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "================================================================================\n", + "๐Ÿ“Š ASSEMBLED CONTEXT\n", + "================================================================================\n", + "\n", + "1๏ธโƒฃ System Context: 927 chars\n", + "2๏ธโƒฃ User Context: 595 chars\n", + "3๏ธโƒฃ Conversation Context: 12 messages\n", + "4๏ธโƒฃ Retrieved Context: 662 chars\n" + ] + } + ], + "source": [ + "print(\"=\" * 80)\n", + "print(\"๐Ÿ“Š ASSEMBLED CONTEXT\")\n", + "print(\"=\" * 80)\n", + "print(f\"\\n1๏ธโƒฃ System Context: {len(context_system_prompt)} chars\")\n", + "print(f\"2๏ธโƒฃ User Context: {len(context_user_context)} chars\")\n", + "print(f\"3๏ธโƒฃ Conversation Context: {len(context_conversation_messages)} messages\")\n", + "print(f\"4๏ธโƒฃ Retrieved Context: {len(context_retrieved_context)} chars\")\n" + ] + }, + { + "cell_type": "markdown", + "id": "26df0d7a4b1c6c60", + "metadata": {}, + "source": [ + "### ๐ŸŽฏ What We Just Did\n", + "\n", + "**Assembled All Four Context Types:**\n", + "\n", + "1. **System Context** - Role, instructions, guidelines (static)\n", + "2. **User Context** - Profile + long-term memories (dynamic, user-specific)\n", + "3. **Conversation Context** - Working memory messages (dynamic, session-specific)\n", + "4. **Retrieved Context** - RAG search results (dynamic, query-specific)\n", + "\n", + "**Why This Matters:**\n", + "- All four context types from Section 1 are now working together\n", + "- System knows WHO the user is (User Context)\n", + "- System knows WHAT was discussed (Conversation Context)\n", + "- System knows WHAT's relevant (Retrieved Context)\n", + "- System knows HOW to behave (System Context)\n", + "\n", + "---\n", + "\n", + "### **Step 4: Generate Response and Save Memory**\n", + "\n", + "Now let's put it all together: generate a response and save the conversation.\n" + ] + }, + { + "cell_type": "markdown", + "id": "b262b0b1942da424", + "metadata": {}, + "source": [ + "#### 4.1: Set up the query\n" + ] + }, + { + "cell_type": "code", + "execution_count": 22, + "id": "24e7abcead19bcc0", + "metadata": { + "execution": { + "iopub.execute_input": "2025-11-01T00:27:25.570486Z", + "iopub.status.busy": "2025-11-01T00:27:25.570366Z", + "iopub.status.idle": "2025-11-01T00:27:25.572737Z", + "shell.execute_reply": "2025-11-01T00:27:25.572103Z" + } + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "๐Ÿ‘ค User: I'm interested in machine learning courses\n" + ] + } + ], + "source": [ + "test_query = \"I'm interested in machine learning courses\"\n", + "print(f\"๐Ÿ‘ค User: {test_query}\")\n" + ] + }, + { + "cell_type": "markdown", + "id": "1125bd64e3023243", + "metadata": {}, + "source": [ + "#### 4.2: Assemble all context types\n", + "\n", + "We'll reuse the context assembly logic from Step 3.\n" + ] + }, + { + "cell_type": "code", + "execution_count": 23, + "id": "997ec6e54c450371", + "metadata": { + "execution": { + "iopub.execute_input": "2025-11-01T00:27:25.574305Z", + "iopub.status.busy": "2025-11-01T00:27:25.574189Z", + "iopub.status.idle": "2025-11-01T00:27:25.907393Z", + "shell.execute_reply": "2025-11-01T00:27:25.906590Z" + } + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "20:27:25 httpx INFO HTTP Request: GET http://localhost:8088/v1/working-memory/demo_session_001?user_id=sarah.chen&namespace=redis_university&model_name=gpt-4o \"HTTP/1.1 200 OK\"\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "20:27:25 httpx INFO HTTP Request: POST https://api.openai.com/v1/embeddings \"HTTP/1.1 200 OK\"\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "โœ… Context assembled\n" + ] + } + ], + "source": [ + "if MEMORY_SERVER_AVAILABLE:\n", + " # Load working memory\n", + " _, test_working_memory = await memory_client.get_or_create_working_memory(\n", + " session_id=session_id,\n", + " user_id=student_id,\n", + " model_name=\"gpt-4o\"\n", + " )\n", + "\n", + " # Build conversation messages\n", + " test_conversation_messages = []\n", + " for msg in test_working_memory.messages:\n", + " if msg.role == \"user\":\n", + " test_conversation_messages.append(HumanMessage(content=msg.content))\n", + " elif msg.role == \"assistant\":\n", + " test_conversation_messages.append(AIMessage(content=msg.content))\n", + "\n", + " # Search for courses\n", + " test_courses = await course_manager.search_courses(test_query, limit=3)\n", + "\n", + " # Build retrieved context\n", + " test_retrieved_context = \"Relevant Courses:\\n\"\n", + " for i, course in enumerate(test_courses, 1):\n", + " test_retrieved_context += f\"\\n{i}. {course.title}\"\n", + " test_retrieved_context += f\"\\n Description: {course.description}\"\n", + " test_retrieved_context += f\"\\n Difficulty: {course.difficulty_level.value}\"\n", + " if course.prerequisites:\n", + " prereq_names = [p.course_title for p in course.prerequisites]\n", + " test_retrieved_context += f\"\\n Prerequisites: {', '.join(prereq_names)}\"\n", + "\n", + " print(\"โœ… Context assembled\")\n" + ] + }, + { + "cell_type": "markdown", + "id": "9d2eed52c74ef1a3", + "metadata": {}, + "source": [ + "#### 4.3: Build messages and generate response\n" + ] + }, + { + "cell_type": "code", + "execution_count": 24, + "id": "41033fb0b272936a", + "metadata": { + "execution": { + "iopub.execute_input": "2025-11-01T00:27:25.909760Z", + "iopub.status.busy": "2025-11-01T00:27:25.909589Z", + "iopub.status.idle": "2025-11-01T00:27:28.104441Z", + "shell.execute_reply": "2025-11-01T00:27:28.103756Z" + } + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "20:27:28 httpx INFO HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "๐Ÿค– Agent: Hi Sarah! It's fantastic to see your enthusiasm for machine learning. Given your background in computer science and your current coursework in Linear Algebra, you're well-prepared to explore this field further.\n", + "\n", + "While the Machine Learning course we offer is advanced, I understand you're looking for intermediate-level courses. Unfortunately, we don't have an intermediate machine learning course listed in our catalog. However, I recommend focusing on strengthening your understanding of data science and algorithms, which are integral to machine learning. This will prepare you for the advanced Machine Learning course in the future.\n", + "\n", + "If you have any questions or need further guidance, feel free to reach out. I'm here to support you on your learning journey!\n" + ] + } + ], + "source": [ + "if MEMORY_SERVER_AVAILABLE:\n", + " # Build complete message list\n", + " test_messages = [SystemMessage(content=context_system_prompt)]\n", + " test_messages.extend(test_conversation_messages) # Add conversation history\n", + " test_messages.append(HumanMessage(content=f\"{context_user_context}\\n\\n{test_retrieved_context}\\n\\nQuery: {test_query}\"))\n", + "\n", + " # Generate response using LLM\n", + " test_response = llm.invoke(test_messages).content\n", + "\n", + " print(f\"\\n๐Ÿค– Agent: {test_response}\")\n" + ] + }, + { + "cell_type": "markdown", + "id": "120b591cf34b3351", + "metadata": {}, + "source": [ + "#### 4.4: Save to working memory\n" + ] + }, + { + "cell_type": "code", + "execution_count": 25, + "id": "8a7782164d5e152", + "metadata": { + "execution": { + "iopub.execute_input": "2025-11-01T00:27:28.105996Z", + "iopub.status.busy": "2025-11-01T00:27:28.105881Z", + "iopub.status.idle": "2025-11-01T00:27:28.117988Z", + "shell.execute_reply": "2025-11-01T00:27:28.117215Z" + } + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "20:27:28 httpx INFO HTTP Request: PUT http://localhost:8088/v1/working-memory/demo_session_001?user_id=sarah.chen&model_name=gpt-4o \"HTTP/1.1 200 OK\"\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "โœ… Conversation saved to working memory\n", + " Total messages: 14\n" + ] + } + ], + "source": [ + "if MEMORY_SERVER_AVAILABLE:\n", + " # Add messages to working memory\n", + " test_working_memory.messages.extend([\n", + " MemoryMessage(role=\"user\", content=test_query),\n", + " MemoryMessage(role=\"assistant\", content=test_response)\n", + " ])\n", + "\n", + " # Save to Memory Server\n", + " await memory_client.put_working_memory(\n", + " session_id=session_id,\n", + " memory=test_working_memory,\n", + " user_id=student_id,\n", + " model_name=\"gpt-4o\"\n", + " )\n", + "\n", + " print(f\"\\nโœ… Conversation saved to working memory\")\n", + " print(f\" Total messages: {len(test_working_memory.messages)}\")\n" + ] + }, + { + "cell_type": "markdown", + "id": "ebdcd4af8b39ecbd", + "metadata": {}, + "source": [ + "#### Helper function for the demo\n", + "\n", + "For the complete demo below, we'll use a helper function that combines all these steps.\n" + ] + }, + { + "cell_type": "code", + "execution_count": 26, + "id": "56ed86c043eddff6", + "metadata": { + "execution": { + "iopub.execute_input": "2025-11-01T00:27:28.119572Z", + "iopub.status.busy": "2025-11-01T00:27:28.119436Z", + "iopub.status.idle": "2025-11-01T00:27:28.125675Z", + "shell.execute_reply": "2025-11-01T00:27:28.125186Z" + } + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "โœ… Helper function created for demo\n" + ] + } + ], + "source": [ + "# Helper function for demo (combines all steps above)\n", + "async def generate_and_save(\n", + " user_query: str,\n", + " student_profile: StudentProfile,\n", + " session_id: str,\n", + " top_k: int = 3\n", + ") -> str:\n", + " \"\"\"Generate response and save to working memory\"\"\"\n", + "\n", + " if not MEMORY_SERVER_AVAILABLE:\n", + " return \"โš ๏ธ Memory Server not available\"\n", + "\n", + " from agent_memory_client.filters import UserId\n", + "\n", + " student_id = student_profile.email.split('@')[0]\n", + "\n", + " # Load working memory\n", + " _, working_memory = await memory_client.get_or_create_working_memory(\n", + " session_id=session_id,\n", + " user_id=student_id,\n", + " model_name=\"gpt-4o\"\n", + " )\n", + "\n", + " # Build conversation messages\n", + " conversation_messages = []\n", + " for msg in working_memory.messages:\n", + " if msg.role == \"user\":\n", + " conversation_messages.append(HumanMessage(content=msg.content))\n", + " elif msg.role == \"assistant\":\n", + " conversation_messages.append(AIMessage(content=msg.content))\n", + "\n", + " # Search courses\n", + " courses = await course_manager.search_courses(user_query, limit=top_k)\n", + "\n", + " # Build retrieved context\n", + " retrieved_context = \"Relevant Courses:\\n\"\n", + " for i, course in enumerate(courses, 1):\n", + " retrieved_context += f\"\\n{i}. {course.title}\"\n", + " retrieved_context += f\"\\n Description: {course.description}\"\n", + " retrieved_context += f\"\\n Difficulty: {course.difficulty_level.value}\"\n", + " if course.prerequisites:\n", + " prereq_names = [p.course_title for p in course.prerequisites]\n", + " retrieved_context += f\"\\n Prerequisites: {', '.join(prereq_names)}\"\n", + "\n", + " # Build messages\n", + " messages = [SystemMessage(content=context_system_prompt)]\n", + " messages.extend(conversation_messages)\n", + " messages.append(HumanMessage(content=f\"{context_user_context}\\n\\n{retrieved_context}\\n\\nQuery: {user_query}\"))\n", + "\n", + " # Generate response\n", + " response = llm.invoke(messages).content\n", + "\n", + " # Save to working memory\n", + " working_memory.messages.extend([\n", + " MemoryMessage(role=\"user\", content=user_query),\n", + " MemoryMessage(role=\"assistant\", content=response)\n", + " ])\n", + " await memory_client.put_working_memory(\n", + " session_id=session_id,\n", + " memory=working_memory,\n", + " user_id=student_id,\n", + " model_name=\"gpt-4o\"\n", + " )\n", + "\n", + " return response\n", + "\n", + "print(\"โœ… Helper function created for demo\")\n" + ] + }, + { + "cell_type": "markdown", + "id": "b1d57045c52dd02c", + "metadata": {}, + "source": [ + "### ๐ŸŽฏ What We Just Did\n", + "\n", + "**Generated Response:**\n", + "- Assembled all four context types\n", + "- Built message list with conversation history\n", + "- Generated response using LLM\n", + "- **Saved updated conversation to working memory**\n", + "\n", + "**Why This Matters:**\n", + "- Next query will have access to this conversation\n", + "- Reference resolution will work (\"it\", \"that course\")\n", + "- Conversation continuity is maintained\n", + "\n", + "---\n", + "\n", + "## ๐Ÿงช Complete Demo: Memory-Enhanced RAG\n", + "\n", + "Now let's test the complete system with a multi-turn conversation.\n", + "\n", + "We'll break this down into three turns:\n", + "1. Initial query about machine learning courses\n", + "2. Follow-up asking about prerequisites (with pronoun reference)\n", + "3. Another follow-up checking if student meets prerequisites\n" + ] + }, + { + "cell_type": "markdown", + "id": "2ee62ecce47bf926", + "metadata": {}, + "source": [ + "### Turn 1: Initial Query\n", + "\n", + "Let's start with a query about machine learning courses.\n" + ] + }, + { + "cell_type": "code", + "execution_count": 27, + "id": "f50093afecca2c8c", + "metadata": { + "execution": { + "iopub.execute_input": "2025-11-01T00:27:28.127772Z", + "iopub.status.busy": "2025-11-01T00:27:28.127636Z", + "iopub.status.idle": "2025-11-01T00:27:28.130498Z", + "shell.execute_reply": "2025-11-01T00:27:28.129996Z" + } + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "================================================================================\n", + "๐Ÿงช MEMORY-ENHANCED RAG DEMO\n", + "================================================================================\n", + "\n", + "๐Ÿ‘ค Student: Sarah Chen\n", + "๐Ÿ“ง Session: complete_demo_session\n", + "\n", + "================================================================================\n", + "๐Ÿ“ TURN 1: Initial Query\n", + "================================================================================\n", + "\n", + "๐Ÿ‘ค User: I'm interested in machine learning courses\n" + ] + } + ], + "source": [ + "# Set up demo session\n", + "demo_session_id = \"complete_demo_session\"\n", + "\n", + "print(\"=\" * 80)\n", + "print(\"๐Ÿงช MEMORY-ENHANCED RAG DEMO\")\n", + "print(\"=\" * 80)\n", + "print(f\"\\n๐Ÿ‘ค Student: {sarah.name}\")\n", + "print(f\"๐Ÿ“ง Session: {demo_session_id}\")\n", + "\n", + "print(\"\\n\" + \"=\" * 80)\n", + "print(\"๐Ÿ“ TURN 1: Initial Query\")\n", + "print(\"=\" * 80)\n", + "\n", + "demo_query_1 = \"I'm interested in machine learning courses\"\n", + "print(f\"\\n๐Ÿ‘ค User: {demo_query_1}\")\n" + ] + }, + { + "cell_type": "markdown", + "id": "c5a4ade39bc1104b", + "metadata": {}, + "source": [ + "#### Generate response and save to memory\n" + ] + }, + { + "cell_type": "code", + "execution_count": 28, + "id": "1d247655a8b83820", + "metadata": { + "execution": { + "iopub.execute_input": "2025-11-01T00:27:28.132097Z", + "iopub.status.busy": "2025-11-01T00:27:28.131991Z", + "iopub.status.idle": "2025-11-01T00:27:32.879889Z", + "shell.execute_reply": "2025-11-01T00:27:32.878848Z" + } + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "20:27:28 httpx INFO HTTP Request: GET http://localhost:8088/v1/working-memory/complete_demo_session?user_id=sarah.chen&namespace=redis_university&model_name=gpt-4o \"HTTP/1.1 200 OK\"\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "20:27:28 httpx INFO HTTP Request: POST https://api.openai.com/v1/embeddings \"HTTP/1.1 200 OK\"\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "20:27:32 httpx INFO HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "20:27:32 httpx INFO HTTP Request: PUT http://localhost:8088/v1/working-memory/complete_demo_session?user_id=sarah.chen&model_name=gpt-4o \"HTTP/1.1 200 OK\"\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "๐Ÿค– Agent: Hi Sarah! It's fantastic to see your continued interest in machine learning. Given your background in computer science and your current coursework in Linear Algebra, you're on a great path to delve into this field.\n", + "\n", + "While the Machine Learning course listed is advanced, you can prepare for it by continuing to strengthen your mathematical foundation with your current Linear Algebra course. This will be beneficial as linear algebra is essential for understanding many machine learning algorithms.\n", + "\n", + "Since you're looking for intermediate-level courses and prefer online formats, focusing on your current Linear Algebra course will help you build the necessary skills. Once you feel confident with these foundational topics, you could then consider enrolling in the advanced Machine Learning course when you feel ready.\n", + "\n", + "If you have any other questions or need further assistance, feel free to ask!\n", + "\n", + "โœ… Conversation saved to working memory\n" + ] + } + ], + "source": [ + "demo_response_1 = await generate_and_save(demo_query_1, sarah, demo_session_id)\n", + "\n", + "print(f\"\\n๐Ÿค– Agent: {demo_response_1}\")\n", + "print(f\"\\nโœ… Conversation saved to working memory\")\n" + ] + }, + { + "cell_type": "markdown", + "id": "775c4094d7248e1", + "metadata": {}, + "source": [ + "### Turn 2: Follow-up with Pronoun Reference\n", + "\n", + "Now let's ask about \"the first one\" - a reference that requires conversation history.\n" + ] + }, + { + "cell_type": "code", + "execution_count": 29, + "id": "27bc4cd9dfab64aa", + "metadata": { + "execution": { + "iopub.execute_input": "2025-11-01T00:27:32.882164Z", + "iopub.status.busy": "2025-11-01T00:27:32.882016Z", + "iopub.status.idle": "2025-11-01T00:27:32.885470Z", + "shell.execute_reply": "2025-11-01T00:27:32.884662Z" + } + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "================================================================================\n", + "๐Ÿ“ TURN 2: Follow-up with Pronoun Reference\n", + "================================================================================\n", + "\n", + "๐Ÿ‘ค User: What are the prerequisites for the first one?\n", + " Note: 'the first one' refers to the first course mentioned in Turn 1\n" + ] + } + ], + "source": [ + "print(\"\\n\" + \"=\" * 80)\n", + "print(\"๐Ÿ“ TURN 2: Follow-up with Pronoun Reference\")\n", + "print(\"=\" * 80)\n", + "\n", + "demo_query_2 = \"What are the prerequisites for the first one?\"\n", + "print(f\"\\n๐Ÿ‘ค User: {demo_query_2}\")\n", + "print(f\" Note: 'the first one' refers to the first course mentioned in Turn 1\")\n" + ] + }, + { + "cell_type": "markdown", + "id": "c12b0d543f855a68", + "metadata": {}, + "source": [ + "#### Load conversation history and generate response\n", + "\n", + "The system will load Turn 1 from working memory to resolve \"the first one\".\n" + ] + }, + { + "cell_type": "code", + "execution_count": 30, + "id": "33f0859c03577c04", + "metadata": { + "execution": { + "iopub.execute_input": "2025-11-01T00:27:32.887624Z", + "iopub.status.busy": "2025-11-01T00:27:32.887488Z", + "iopub.status.idle": "2025-11-01T00:27:34.415382Z", + "shell.execute_reply": "2025-11-01T00:27:34.414572Z" + } + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "20:27:32 httpx INFO HTTP Request: GET http://localhost:8088/v1/working-memory/complete_demo_session?user_id=sarah.chen&namespace=redis_university&model_name=gpt-4o \"HTTP/1.1 200 OK\"\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "20:27:33 httpx INFO HTTP Request: POST https://api.openai.com/v1/embeddings \"HTTP/1.1 200 OK\"\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "20:27:34 httpx INFO HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "20:27:34 httpx INFO HTTP Request: PUT http://localhost:8088/v1/working-memory/complete_demo_session?user_id=sarah.chen&model_name=gpt-4o \"HTTP/1.1 200 OK\"\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "๐Ÿค– Agent: The first Calculus I course mentions \"Prerequisite Course 18\" as a prerequisite. However, it seems there might be an error in the listing since the other two Calculus I courses don't specify prerequisites. Typically, Calculus I courses require a basic understanding of high school mathematics, which you likely have given your background in computer science and current coursework in Linear Algebra.\n", + "\n", + "Since your primary interest is in machine learning and data science, and you're looking for intermediate-level courses, you might want to focus on courses that align more directly with those areas. If you need further assistance or have any other questions, feel free to ask!\n", + "\n", + "โœ… Agent resolved 'the first one' using conversation history!\n" + ] + } + ], + "source": [ + "demo_response_2 = await generate_and_save(demo_query_2, sarah, demo_session_id)\n", + "\n", + "print(f\"\\n๐Ÿค– Agent: {demo_response_2}\")\n", + "print(\"\\nโœ… Agent resolved 'the first one' using conversation history!\")\n" + ] + }, + { + "cell_type": "markdown", + "id": "4b8c58d592048c0c", + "metadata": {}, + "source": [ + "### Turn 3: Another Follow-up\n", + "\n", + "Let's ask if the student meets the prerequisites mentioned in Turn 2.\n" + ] + }, + { + "cell_type": "code", + "execution_count": 31, + "id": "e81a28aff710f634", + "metadata": { + "execution": { + "iopub.execute_input": "2025-11-01T00:27:34.417855Z", + "iopub.status.busy": "2025-11-01T00:27:34.417669Z", + "iopub.status.idle": "2025-11-01T00:27:34.420815Z", + "shell.execute_reply": "2025-11-01T00:27:34.420226Z" + } + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "================================================================================\n", + "๐Ÿ“ TURN 3: Another Follow-up\n", + "================================================================================\n", + "\n", + "๐Ÿ‘ค User: Do I meet those prerequisites?\n", + " Note: 'those prerequisites' refers to prerequisites from Turn 2\n" + ] + } + ], + "source": [ + "print(\"\\n\" + \"=\" * 80)\n", + "print(\"๐Ÿ“ TURN 3: Another Follow-up\")\n", + "print(\"=\" * 80)\n", + "\n", + "demo_query_3 = \"Do I meet those prerequisites?\"\n", + "print(f\"\\n๐Ÿ‘ค User: {demo_query_3}\")\n", + "print(f\" Note: 'those prerequisites' refers to prerequisites from Turn 2\")\n" + ] + }, + { + "cell_type": "markdown", + "id": "e30907ab5fb2c1a", + "metadata": {}, + "source": [ + "#### Load full conversation history and check student profile\n", + "\n", + "The system will:\n", + "1. Load Turns 1-2 from working memory\n", + "2. Resolve \"those prerequisites\"\n", + "3. Check student's completed courses from profile\n" + ] + }, + { + "cell_type": "code", + "execution_count": 32, + "id": "f69f77c1e8619b20", + "metadata": { + "execution": { + "iopub.execute_input": "2025-11-01T00:27:34.422739Z", + "iopub.status.busy": "2025-11-01T00:27:34.422595Z", + "iopub.status.idle": "2025-11-01T00:27:35.952366Z", + "shell.execute_reply": "2025-11-01T00:27:35.951600Z" + } + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "20:27:34 httpx INFO HTTP Request: GET http://localhost:8088/v1/working-memory/complete_demo_session?user_id=sarah.chen&namespace=redis_university&model_name=gpt-4o \"HTTP/1.1 200 OK\"\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "20:27:34 httpx INFO HTTP Request: POST https://api.openai.com/v1/embeddings \"HTTP/1.1 200 OK\"\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "20:27:35 httpx INFO HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "20:27:35 httpx INFO HTTP Request: PUT http://localhost:8088/v1/working-memory/complete_demo_session?user_id=sarah.chen&model_name=gpt-4o \"HTTP/1.1 200 OK\"\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "๐Ÿค– Agent: It seems there was a bit of confusion with the course listings for Calculus I, as they don't clearly specify prerequisites beyond mentioning \"Prerequisite Course 18\" for the first one. Typically, Calculus I courses require a basic understanding of high school mathematics, which you likely have given your background in computer science and current coursework in Linear Algebra.\n", + "\n", + "Since your primary interest is in machine learning and data science, and you're looking for intermediate-level courses, you might want to focus on courses that align more directly with those areas. If you need further assistance or have any other questions, feel free to ask!\n", + "\n", + "โœ… Agent resolved 'those prerequisites' and checked student's transcript!\n", + "\n", + "================================================================================\n", + "โœ… DEMO COMPLETE: Memory-enhanced RAG enables natural conversations!\n", + "================================================================================\n" + ] + } + ], + "source": [ + "demo_response_3 = await generate_and_save(demo_query_3, sarah, demo_session_id)\n", + "\n", + "print(f\"\\n๐Ÿค– Agent: {demo_response_3}\")\n", + "print(\"\\nโœ… Agent resolved 'those prerequisites' and checked student's transcript!\")\n", + "\n", + "print(\"\\n\" + \"=\" * 80)\n", + "print(\"โœ… DEMO COMPLETE: Memory-enhanced RAG enables natural conversations!\")\n", + "print(\"=\" * 80)\n" + ] + }, + { + "cell_type": "markdown", + "id": "83059c5567f43c57", + "metadata": {}, + "source": [ + "### ๐ŸŽฏ What Just Happened?\n", + "\n", + "**Turn 1:** \"I'm interested in machine learning courses\"\n", + "- System searches courses\n", + "- Finds ML-related courses\n", + "- Responds with recommendations\n", + "- **Saves conversation to working memory**\n", + "\n", + "**Turn 2:** \"What are the prerequisites for **the first one**?\"\n", + "- System loads working memory (Turn 1)\n", + "- Resolves \"the first one\" โ†’ first course mentioned in Turn 1\n", + "- Responds with prerequisites\n", + "- **Saves updated conversation**\n", + "\n", + "**Turn 3:** \"Do I meet **those prerequisites**?\"\n", + "- System loads working memory (Turns 1-2)\n", + "- Resolves \"those prerequisites\" โ†’ prerequisites from Turn 2\n", + "- Checks student's completed courses (from profile)\n", + "- Responds with personalized answer\n", + "- **Saves updated conversation**\n", + "\n", + "**๐Ÿ’ก Key Insight:** Memory + RAG = **Natural, stateful, personalized conversations**\n", + "\n", + "---\n", + "\n", + "## ๐Ÿ“Š Before vs. After Comparison\n", + "\n", + "Let's visualize the difference between stateless and memory-enhanced RAG.\n", + "\n", + "### **Stateless RAG (Section 2):**\n", + "\n", + "```\n", + "Query 1: \"I'm interested in ML courses\"\n", + " โ†’ โœ… Works (searches and returns courses)\n", + "\n", + "Query 2: \"What are the prerequisites for the first one?\"\n", + " โ†’ โŒ Fails (no conversation history)\n", + " โ†’ Agent: \"Which course are you referring to?\"\n", + "```\n", + "\n", + "**Problems:**\n", + "- โŒ No conversation continuity\n", + "- โŒ Can't resolve references\n", + "- โŒ Each query is independent\n", + "- โŒ Poor user experience\n", + "\n", + "### **Memory-Enhanced RAG (This Notebook):**\n", + "\n", + "```\n", + "Query 1: \"I'm interested in ML courses\"\n", + " โ†’ โœ… Works (searches and returns courses)\n", + " โ†’ Saves to working memory\n", + "\n", + "Query 2: \"What are the prerequisites for the first one?\"\n", + " โ†’ โœ… Works (loads conversation history)\n", + " โ†’ Resolves \"the first one\" โ†’ first course from Query 1\n", + " โ†’ Responds with prerequisites\n", + " โ†’ Saves updated conversation\n", + "\n", + "Query 3: \"Do I meet those prerequisites?\"\n", + " โ†’ โœ… Works (loads conversation history)\n", + " โ†’ Resolves \"those prerequisites\" โ†’ prerequisites from Query 2\n", + " โ†’ Checks student transcript\n", + " โ†’ Responds with personalized answer\n", + "```\n", + "\n", + "**Benefits:**\n", + "- โœ… Conversation continuity\n", + "- โœ… Reference resolution\n", + "- โœ… Personalization\n", + "- โœ… Natural user experience\n", + "\n", + "---\n", + "\n", + "## ๐ŸŽ“ Key Takeaways\n", + "\n", + "### **1. Memory Transforms RAG**\n", + "\n", + "**Without Memory (Section 2):**\n", + "- Stateless queries\n", + "- No conversation continuity\n", + "- Limited to 3 context types (System, User, Retrieved)\n", + "\n", + "**With Memory (This Notebook):**\n", + "- Stateful conversations\n", + "- Reference resolution\n", + "- All 4 context types (System, User, Conversation, Retrieved)\n", + "\n", + "### **2. Two Types of Memory Work Together**\n", + "\n", + "**Working Memory:**\n", + "- Session-scoped conversation history\n", + "- Enables reference resolution\n", + "- TTL-based (expires after 24 hours)\n", + "\n", + "**Long-term Memory:**\n", + "- User-scoped persistent facts\n", + "- Enables personalization\n", + "- Persists indefinitely\n", + "\n", + "### **3. Simple, Inline Approach**\n", + "\n", + "**What We Built:**\n", + "- Small, focused functions\n", + "- Inline code (no large classes)\n", + "- Progressive learning\n", + "- Clear demonstrations\n", + "\n", + "**Why This Matters:**\n", + "- Easy to understand\n", + "- Easy to modify\n", + "- Easy to extend\n", + "- Foundation for LangGraph agents (Part 2)\n", + "\n", + "### **4. All Four Context Types**\n", + "\n", + "**System Context:** Role, instructions, guidelines\n", + "**User Context:** Profile + long-term memories\n", + "**Conversation Context:** Working memory\n", + "**Retrieved Context:** RAG results\n", + "\n", + "**Together:** Natural, stateful, personalized conversations\n", + "\n", + "**๐Ÿ’ก Research Insight:** Context Rot demonstrates that context structure and organization affect LLM attention. Memory systems that selectively retrieve and organize context outperform systems that dump all available information. ([Context Rot paper](https://research.trychroma.com/context-rot))\n", + "\n", + "---\n", + "\n", + "## ๐Ÿš€ What's Next?\n", + "\n", + "### **Part 2: Converting to LangGraph Agent (Separate Notebook)**\n", + "\n", + "In the next notebook (`03_langgraph_agent_conversion.ipynb`), we'll:\n", + "\n", + "1. **Convert** memory-enhanced RAG to LangGraph agent\n", + "2. **Add** state management and control flow\n", + "3. **Prepare** for Section 4 (tools and advanced capabilities)\n", + "4. **Build** a foundation for production-ready agents\n", + "\n", + "**Why LangGraph?**\n", + "- Better state management\n", + "- More control over agent flow\n", + "- Easier to add tools (Section 4)\n", + "- Production-ready architecture\n", + "\n", + "### **Section 4: Tools and Advanced Agents**\n", + "\n", + "After completing Part 2, you'll be ready for Section 4.\n", + "\n", + "**๐Ÿ’ก What's Next:**\n", + "\n", + "In Section 4, you'll build an agent that can actively decide when to use memory tools, rather than having memory operations hardcoded in your application flow.\n", + "\n", + "---\n", + "\n", + "## ๐Ÿ‹๏ธ Practice Exercises\n", + "\n", + "### **Exercise 1: Add Personalization**\n", + "\n", + "Modify the system to use long-term memories for personalization:\n", + "\n", + "1. Store student preferences in long-term memory\n", + "2. Search long-term memory in `assemble_context()`\n", + "3. Use memories to personalize recommendations\n", + "\n", + "**Hint:** Use `memory_client.create_long_term_memory()` and `memory_client.search_long_term_memory()`\n", + "\n", + "### **Exercise 2: Add Error Handling**\n", + "\n", + "Add error handling for memory operations:\n", + "\n", + "1. Handle case when Memory Server is unavailable\n", + "2. Fallback to stateless RAG\n", + "3. Log warnings appropriately\n", + "\n", + "**Hint:** Check `MEMORY_SERVER_AVAILABLE` flag\n", + "\n", + "### **Exercise 3: Add Conversation Summary**\n", + "\n", + "Add a function to summarize the conversation:\n", + "\n", + "1. Load working memory\n", + "2. Extract key points from conversation\n", + "3. Display summary to user\n", + "\n", + "**Hint:** Use LLM to generate summary from conversation history\n", + "\n" + ] + }, + { + "cell_type": "markdown", + "id": "1850ca00-5255-45e3-ac2a-e332f1a64cea", + "metadata": {}, + "source": [ + "### **Exercise 4: Compare Memory Extraction Strategies** ๐Ÿ†•\n", + "\n", + "In Notebook 1, we learned about memory extraction strategies. Now let's see them in action!\n", + "\n", + "**Goal:** Compare how discrete vs summary strategies extract different types of memories from the same conversation.\n", + "\n", + "**Scenario:** A student has a long advising session discussing their academic goals, course preferences, and career aspirations.\n" + ] + }, + { + "metadata": {}, + "cell_type": "markdown", + "source": [ + "#### **Understanding the Difference**\n", + "\n", + "**Discrete Strategy (Default):**\n", + "- Extracts individual facts: \"User's major is CS\", \"User interested in ML\", \"User wants to graduate Spring 2026\"\n", + "- Each fact is independently searchable\n", + "- Good for: Most conversations, factual Q&A\n", + "\n", + "**Summary Strategy:**\n", + "- Creates conversation summary: \"User discussed academic planning, expressing interest in ML courses for Spring 2026 graduation...\"\n", + "- Preserves conversational context\n", + "- Good for: Long sessions, meeting notes, comprehensive context\n", + "\n", + "**Let's see the difference with real code!**\n" + ], + "id": "6435601dec8615ec" + }, + { + "metadata": {}, + "cell_type": "markdown", + "source": "#### **Demo: Discrete Strategy (Current Default)**\n", + "id": "2cc3e83167dc6e1a" + }, + { + "metadata": {}, + "cell_type": "code", + "outputs": [], + "execution_count": null, + "source": [ + "if MEMORY_SERVER_AVAILABLE:\n", + " import uuid\n", + " from agent_memory_client.models import MemoryStrategyConfig, UserId\n", + "\n", + " # Create a test session with discrete strategy (default)\n", + " discrete_session_id = f\"demo_discrete_{uuid.uuid4().hex[:8]}\"\n", + " discrete_student_id = f\"student_discrete_{uuid.uuid4().hex[:8]}\"\n", + "\n", + " print(\"๐ŸŽฏ Testing DISCRETE Strategy (Default)\")\n", + " print(\"=\" * 80)\n", + " print(f\"Session ID: {discrete_session_id}\")\n", + " print(f\"Student ID: {discrete_student_id}\\n\")\n", + "\n", + " # Simulate a long advising conversation\n", + " advising_conversation = [\n", + " {\n", + " \"role\": \"user\",\n", + " \"content\": \"Hi! I'm a Computer Science major planning to graduate in Spring 2026. I'm really interested in machine learning and AI.\"\n", + " },\n", + " {\n", + " \"role\": \"assistant\",\n", + " \"content\": \"Great to meet you! I can help you plan your ML/AI coursework. What's your current experience level with machine learning?\"\n", + " },\n", + " {\n", + " \"role\": \"user\",\n", + " \"content\": \"I've taken intro to Python and data structures. I prefer online courses because I work part-time. I'm hoping to get an internship at a tech startup next summer.\"\n", + " },\n", + " {\n", + " \"role\": \"assistant\",\n", + " \"content\": \"Perfect! Based on your goals, I'd recommend starting with RU301 (Querying, Indexing, and Full-Text Search) and RU330 (Trading Engine). Both are available online.\"\n", + " },\n", + " {\n", + " \"role\": \"user\",\n", + " \"content\": \"That sounds good. I'm also interested in vector databases since they're used in AI applications. Do you have courses on that?\"\n", + " },\n", + " {\n", + " \"role\": \"assistant\",\n", + " \"content\": \"Absolutely! RU401 (Running Redis at Scale) covers vector search capabilities. It's a great fit for your AI interests.\"\n", + " }\n", + " ]\n", + "\n", + " # Store conversation in working memory (discrete strategy is default)\n", + " messages = [\n", + " MemoryMessage(role=msg[\"role\"], content=msg[\"content\"])\n", + " for msg in advising_conversation\n", + " ]\n", + "\n", + " await memory_client.set_working_memory(\n", + " session_id=discrete_session_id,\n", + " messages=messages,\n", + " user_id=discrete_student_id\n", + " )\n", + "\n", + " print(\"โœ… Conversation stored with DISCRETE strategy\")\n", + " print(f\" Messages: {len(messages)}\")\n", + " print(\"\\nโณ Waiting for automatic memory extraction...\")\n", + "\n", + " # Wait a moment for background extraction\n", + " import asyncio\n", + " await asyncio.sleep(2)\n", + "\n", + " # Search for extracted memories\n", + " discrete_memories = await memory_client.search_long_term_memory(\n", + " text=\"student preferences and goals\",\n", + " user_id=UserId(eq=discrete_student_id),\n", + " limit=10\n", + " )\n", + "\n", + " print(f\"\\n๐Ÿ“Š DISCRETE Strategy Results:\")\n", + " print(f\" Extracted {len(discrete_memories)} individual memories\\n\")\n", + "\n", + " if discrete_memories:\n", + " for i, mem in enumerate(discrete_memories[:5], 1):\n", + " print(f\" {i}. {mem.text[:100]}...\")\n", + " else:\n", + " print(\" โณ No memories extracted yet (background processing may take time)\")\n", + " print(\" Note: In production, extraction happens asynchronously\")\n", + "else:\n", + " print(\"โš ๏ธ Memory Server not available - skipping demo\")\n" + ], + "id": "97b9702ef4347804" + }, + { + "metadata": {}, + "cell_type": "markdown", + "source": [ + "#### **Demo: Summary Strategy**\n", + "\n", + "Now let's see how the SUMMARY strategy handles the same conversation differently.\n" + ], + "id": "36519930b77297f3" + }, + { + "metadata": {}, + "cell_type": "code", + "outputs": [], + "execution_count": null, + "source": [ + "if MEMORY_SERVER_AVAILABLE:\n", + " # Create a test session with SUMMARY strategy\n", + " summary_session_id = f\"demo_summary_{uuid.uuid4().hex[:8]}\"\n", + " summary_student_id = f\"student_summary_{uuid.uuid4().hex[:8]}\"\n", + "\n", + " print(\"\\n๐ŸŽฏ Testing SUMMARY Strategy\")\n", + " print(\"=\" * 80)\n", + " print(f\"Session ID: {summary_session_id}\")\n", + " print(f\"Student ID: {summary_student_id}\\n\")\n", + "\n", + " # Configure summary strategy\n", + " summary_strategy = MemoryStrategyConfig(\n", + " strategy=\"summary\",\n", + " config={\"max_summary_length\": 500}\n", + " )\n", + "\n", + " # Store the SAME conversation with summary strategy\n", + " messages = [\n", + " MemoryMessage(role=msg[\"role\"], content=msg[\"content\"])\n", + " for msg in advising_conversation\n", + " ]\n", + "\n", + " await memory_client.set_working_memory(\n", + " session_id=summary_session_id,\n", + " messages=messages,\n", + " user_id=summary_student_id,\n", + " long_term_memory_strategy=summary_strategy # โ† Key difference!\n", + " )\n", + "\n", + " print(\"โœ… Conversation stored with SUMMARY strategy\")\n", + " print(f\" Messages: {len(messages)}\")\n", + " print(f\" Strategy: summary (max_summary_length=500)\")\n", + " print(\"\\nโณ Waiting for automatic memory extraction...\")\n", + "\n", + " # Wait for background extraction\n", + " await asyncio.sleep(2)\n", + "\n", + " # Search for extracted memories\n", + " summary_memories = await memory_client.search_long_term_memory(\n", + " text=\"student preferences and goals\",\n", + " user_id=UserId(eq=summary_student_id),\n", + " limit=10\n", + " )\n", + "\n", + " print(f\"\\n๐Ÿ“Š SUMMARY Strategy Results:\")\n", + " print(f\" Extracted {len(summary_memories)} conversation summaries\\n\")\n", + "\n", + " if summary_memories:\n", + " for i, mem in enumerate(summary_memories[:3], 1):\n", + " print(f\" {i}. {mem.text}\\n\")\n", + " else:\n", + " print(\" โณ No summaries extracted yet (background processing may take time)\")\n", + " print(\" Note: In production, extraction happens asynchronously\")\n", + "else:\n", + " print(\"โš ๏ธ Memory Server not available - skipping demo\")\n", + "\n" + ], + "id": "90262aaa860ae39e" + }, + { + "metadata": {}, + "cell_type": "markdown", + "source": [ + "#### **Comparison: When to Use Each Strategy**\n", + "\n", + "**Use DISCRETE Strategy (Default) when:**\n", + "- โœ… You want individual, searchable facts\n", + "- โœ… Facts should be independently retrievable\n", + "- โœ… Building knowledge graphs or fact databases\n", + "- โœ… Most general-purpose agent interactions\n", + "\n", + "**Example:** Course advisor agent (our use case)\n", + "- \"User's major is Computer Science\"\n", + "- \"User interested in machine learning\"\n", + "- \"User prefers online courses\"\n", + "- \"User wants to graduate Spring 2026\"\n", + "\n", + "**Use SUMMARY Strategy when:**\n", + "- โœ… Long conversations need to be preserved as context\n", + "- โœ… Meeting notes or session summaries\n", + "- โœ… Comprehensive context matters more than individual facts\n", + "- โœ… Reducing storage while preserving meaning\n", + "\n", + "**Example:** Academic advising session summary\n", + "- \"Student discussed academic planning for Spring 2026 graduation, expressing strong interest in ML/AI courses. Prefers online format due to part-time work. Seeking tech startup internship. Recommended RU301, RU330, and RU401 based on AI career goals.\"\n", + "\n", + "**Use PREFERENCES Strategy when:**\n", + "- โœ… Building user profiles\n", + "- โœ… Personalization is primary goal\n", + "- โœ… User onboarding flows\n", + "\n", + "**Example:** User profile building\n", + "- \"User prefers email over SMS notifications\"\n", + "- \"User works best in morning hours\"\n", + "- \"User prefers dark mode interfaces\"\n" + ], + "id": "ecefdf0ba5d5621b" + }, + { + "metadata": {}, + "cell_type": "markdown", + "source": [ + "#### **Key Takeaway**\n", + "\n", + "**For this course, we use Discrete Strategy (default)** because:\n", + "1. Course advising benefits from searchable individual facts\n", + "2. Students ask specific questions (\"What are my prerequisites?\")\n", + "3. Facts are independently useful (\"User completed RU101\")\n", + "4. Balances detail with storage efficiency\n", + "\n", + "**In production**, you might use:\n", + "- **Discrete** for most interactions\n", + "- **Summary** for long consultation sessions\n", + "- **Preferences** during onboarding\n", + "- **Custom** for domain-specific needs (legal, medical, technical)\n" + ], + "id": "2836d12f1ac55727" + }, + { + "metadata": {}, + "cell_type": "markdown", + "source": [ + "#### **Configuration Reference**\n", + "\n", + "**Discrete Strategy (Default - No Config Needed):**\n", + "```python\n", + "# This is the default - no configuration required\n", + "await memory_client.set_working_memory(\n", + " session_id=session_id,\n", + " messages=messages,\n", + " user_id=user_id\n", + ")\n", + "```\n", + "\n", + "**Summary Strategy:**\n", + "```python\n", + "from agent_memory_client.models import MemoryStrategyConfig\n", + "\n", + "summary_strategy = MemoryStrategyConfig(\n", + " strategy=\"summary\",\n", + " config={\"max_summary_length\": 500}\n", + ")\n", + "\n", + "await memory_client.set_working_memory(\n", + " session_id=session_id,\n", + " messages=messages,\n", + " user_id=user_id,\n", + " long_term_memory_strategy=summary_strategy\n", + ")\n", + "```\n", + "\n", + "**Preferences Strategy:**\n", + "```python\n", + "preferences_strategy = MemoryStrategyConfig(\n", + " strategy=\"preferences\",\n", + " config={}\n", + ")\n", + "\n", + "await memory_client.set_working_memory(\n", + " session_id=session_id,\n", + " messages=messages,\n", + " user_id=user_id,\n", + " long_term_memory_strategy=preferences_strategy\n", + ")\n", + "```\n" + ], + "id": "8a2e7ad698521ca8" + }, + { + "metadata": {}, + "cell_type": "markdown", + "source": [ + "#### **๐Ÿ“š Learn More**\n", + "\n", + "For complete documentation and advanced configuration:\n", + "- [Memory Extraction Strategies Documentation](https://redis.github.io/agent-memory-server/memory-extraction-strategies/)\n", + "- [Working Memory Configuration](https://redis.github.io/agent-memory-server/working-memory/)\n", + "- [Long-term Memory Best Practices](https://redis.github.io/agent-memory-server/long-term-memory/)\n", + "\n", + "**Next:** In Section 4, we'll see how agents use these strategies in production workflows.\n", + "\n", + "\n", + "\n", + "---\n", + "\n", + "## ๐Ÿ“ Summary\n", + "\n", + "### **What You Learned:**\n", + "\n", + "1. โœ… **Built** memory-enhanced RAG system\n", + "2. โœ… **Integrated** all four context types\n", + "3. โœ… **Demonstrated** benefits of memory\n", + "4. โœ… **Prepared** for LangGraph conversion\n", + "\n", + "### **Key Concepts:**\n", + "\n", + "- **Working Memory** - Session-scoped conversation history\n", + "- **Long-term Memory** - User-scoped persistent facts\n", + "- **Context Assembly** - Combining all four context types\n", + "- **Reference Resolution** - Resolving pronouns and references\n", + "- **Stateful Conversations** - Natural, continuous dialogue\n", + "\n", + "### **Next Steps:**\n", + "\n", + "1. Complete practice exercises\n", + "2. Experiment with different queries\n", + "3. Move to Part 2 (LangGraph agent conversion)\n", + "4. Prepare for Section 4 (tools and advanced agents)\n", + "\n", + "**๐ŸŽ‰ Congratulations!** You've built a complete memory-enhanced RAG system!\n", + "\n", + "---\n", + "\n", + "## ๐Ÿ”— Resources\n", + "\n", + "- **Section 1:** Four Context Types\n", + "- **Section 2:** RAG Fundamentals\n", + "- **Section 3 (Notebook 1):** Memory Fundamentals\n", + "- **Section 3 (Notebook 3):** LangGraph Agent Conversion (Next)\n", + "- **Section 4:** Tools and Advanced Agents\n", + "\n", + "**Agent Memory Server:**\n", + "- GitHub: `reference-agent/`\n", + "- Documentation: See README.md\n", + "- API Client: `agent-memory-client`\n", + "\n", + "**LangChain:**\n", + "- Documentation: https://python.langchain.com/\n", + "- LangGraph: https://langchain-ai.github.io/langgraph/\n", + "\n", + "---\n", + "\n", + "![Redis](https://redis.io/wp-content/uploads/2024/04/Logotype.svg?auto=webp&quality=85,75&width=120)\n", + "\n", + "**Redis University - Context Engineering Course**\n", + "\n", + "---\n", + "\n", + "## ๐Ÿ“š Additional Resources\n", + "\n", + "- [Agent Memory Server Documentation](https://github.com/redis/agent-memory-server) - Production-ready memory management\n", + "- [Agent Memory Client](https://pypi.org/project/agent-memory-client/) - Python client for Agent Memory Server\n", + "- [RedisVL Documentation](https://redisvl.com/) - Redis Vector Library\n", + "- [Retrieval-Augmented Generation Paper](https://arxiv.org/abs/2005.11401) - Original RAG research\n", + "- [LangChain RAG Tutorial](https://python.langchain.com/docs/use_cases/question_answering/) - Building RAG systems\n", + "- [LangGraph Tutorials](https://langchain-ai.github.io/langgraph/tutorials/) - Building agents with LangGraph\n", + "- [Agent Architectures](https://python.langchain.com/docs/modules/agents/) - Different agent patterns\n", + "- [ReAct: Synergizing Reasoning and Acting](https://arxiv.org/abs/2210.03629) - Reasoning + acting in LLMs\n", + "- [Anthropic's Guide to Building Effective Agents](https://www.anthropic.com/research/building-effective-agents) - Agent design patterns\n", + "\n", + "\n" + ], + "id": "ffd903461d805026" + }, + { + "metadata": {}, + "cell_type": "code", + "outputs": [], + "execution_count": null, + "source": "", + "id": "6bd68f27c65d3b21" + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.12.6" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/python-recipes/context-engineering/notebooks/section-3-memory-systems-for-context-engineering/03_manage_long_conversations_with_compression_strategies.ipynb b/python-recipes/context-engineering/notebooks/section-3-memory-systems-for-context-engineering/03_manage_long_conversations_with_compression_strategies.ipynb new file mode 100644 index 00000000..8be48258 --- /dev/null +++ b/python-recipes/context-engineering/notebooks/section-3-memory-systems-for-context-engineering/03_manage_long_conversations_with_compression_strategies.ipynb @@ -0,0 +1,3703 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "3d06c497fe3df20b", + "metadata": {}, + "source": [ + "![Redis](https://redis.io/wp-content/uploads/2024/04/Logotype.svg?auto=webp&quality=85,75&width=120)\n", + "\n", + "# ๐Ÿง  Managing Long Conversations with Compression Strategies\n", + "\n", + "**โฑ๏ธ Estimated Time:** 50-60 minutes\n", + "\n", + "## ๐ŸŽฏ Learning Objectives\n", + "\n", + "By the end of this notebook, you will:\n", + "\n", + "1. **Understand** why long conversations need management (token limits, cost, performance)\n", + "2. **Implement** conversation summarization to preserve key information\n", + "3. **Build** context compression strategies (truncation, priority-based, summarization)\n", + "4. **Configure** automatic memory management with Agent Memory Server\n", + "5. **Decide** when to apply each technique based on conversation characteristics\n", + "\n", + "---\n", + "\n", + "## ๐Ÿ”— Where We Are\n", + "\n", + "### **Your Journey So Far:**\n", + "\n", + "**Section 3, Notebook 1:** Memory Fundamentals\n", + "- โœ… Working memory for conversation continuity\n", + "- โœ… Long-term memory for persistent knowledge\n", + "- โœ… The grounding problem and reference resolution\n", + "- โœ… Memory types (semantic, episodic, message)\n", + "\n", + "**Section 3, Notebook 2:** Memory-Enhanced RAG\n", + "- โœ… Integrated all four context types\n", + "- โœ… Built complete memory-enhanced RAG system\n", + "- โœ… Demonstrated benefits of stateful conversations\n", + "\n", + "**Your memory system works!** It can:\n", + "- Remember conversation history across turns\n", + "- Store and retrieve long-term facts\n", + "- Resolve references (\"it\", \"that course\")\n", + "- Provide personalized recommendations\n", + "\n", + "### **But... What About Long Conversations?**\n", + "\n", + "**Questions we can't answer yet:**\n", + "- โ“ What happens when conversations get really long?\n", + "- โ“ How do we handle token limits?\n", + "- โ“ How much does a 50-turn conversation cost?\n", + "- โ“ Can we preserve important context while reducing tokens?\n", + "- โ“ When should we summarize vs. truncate vs. keep everything?\n", + "\n", + "---\n", + "\n", + "## ๐Ÿšจ The Long Conversation Problem\n", + "\n", + "Before diving into solutions, let's understand the fundamental problem.\n", + "\n", + "### **The Problem: Unbounded Growth**\n", + "\n", + "Every conversation turn adds messages to working memory:\n", + "\n", + "```\n", + "Turn 1: System (500) + Messages (200) = 700 tokens โœ…\n", + "Turn 5: System (500) + Messages (1,000) = 1,500 tokens โœ…\n", + "Turn 20: System (500) + Messages (4,000) = 4,500 tokens โœ…\n", + "Turn 50: System (500) + Messages (10,000) = 10,500 tokens โš ๏ธ\n", + "Turn 100: System (500) + Messages (20,000) = 20,500 tokens โš ๏ธ\n", + "Turn 200: System (500) + Messages (40,000) = 40,500 tokens โŒ\n", + "```\n", + "\n", + "**Without management, conversations grow unbounded!**\n", + "\n", + "### **Why This Matters**\n", + "\n", + "**1. Token Limits (Hard Constraint)**\n", + "- GPT-4o: 128K tokens (~96,000 words)\n", + "- GPT-3.5: 16K tokens (~12,000 words)\n", + "- Eventually, you'll hit the limit and conversations fail\n", + "\n", + "**2. Cost (Economic Constraint)**\n", + "- Input tokens cost money (e.g. $0.0025 / 1K tokens for GPT-4o)\n", + "\n", + "- A 50-turn conversation = ~10,000 tokens = $0.025 per query\n", + "\n", + "- Over 1,000 conversations = $25 just for conversation history!\n", + "\n", + "**3. Performance (Quality Constraint)**\n", + "- More tokens = longer processing time\n", + "- Context Rot: LLMs struggle with very long contexts\n", + "- Important information gets \"lost in the middle\"\n", + "\n", + "**4. User Experience**\n", + "- Slow responses frustrate users\n", + "- Expensive conversations aren't sustainable\n", + "- Failed conversations due to token limits are unacceptable\n", + "\n", + "### **The Solution: Memory Management**\n", + "\n", + "We need strategies to:\n", + "- โœ… Keep conversations within token budgets\n", + "- โœ… Preserve important information\n", + "- โœ… Maintain conversation quality\n", + "- โœ… Control costs\n", + "- โœ… Enable indefinite conversations\n", + "\n", + "---\n", + "\n", + "## ๐Ÿ“ฆ Part 0: Setup and Environment\n", + "\n", + "Let's set up our environment and create tools for measuring conversation growth.\n", + "\n", + "### โš ๏ธ Prerequisites\n", + "\n", + "**Before running this notebook, make sure you have:**\n", + "\n", + "1. **Docker Desktop running** - Required for Redis and Agent Memory Server\n", + "\n", + "2. **Environment variables** - Create a `.env` file in the `reference-agent` directory:\n", + " ```bash\n", + " # Copy the example file\n", + " cd ../../reference-agent\n", + " cp .env.example .env\n", + "\n", + " # Edit .env and add your OpenAI API key\n", + " # OPENAI_API_KEY=your_actual_openai_api_key_here\n", + " ```\n", + "\n", + "3. **Run the setup script** - This will automatically start Redis and Agent Memory Server:\n", + " ```bash\n", + " cd ../../reference-agent\n", + " python setup_agent_memory_server.py\n", + " ```\n" + ] + }, + { + "cell_type": "markdown", + "id": "307c59ecc51d30c3", + "metadata": {}, + "source": [ + "---\n" + ] + }, + { + "cell_type": "markdown", + "id": "dd10e48e57f1431e", + "metadata": {}, + "source": [ + "### Automated Setup Check\n", + "\n", + "Let's run the setup script to ensure all services are running properly.\n" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "id": "808cea2af3f4f118", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Running automated setup check...\n", + "\n", + "\n", + "๐Ÿ”ง Agent Memory Server Setup\n", + "===========================\n", + "๐Ÿ“Š Checking Redis...\n", + "โœ… Redis is running\n", + "๐Ÿ“Š Checking Agent Memory Server...\n", + "๐Ÿ” Agent Memory Server container exists. Checking health...\n", + "โœ… Agent Memory Server is running and healthy\n", + "โœ… No Redis connection issues detected\n", + "\n", + "โœ… Setup Complete!\n", + "=================\n", + "๐Ÿ“Š Services Status:\n", + " โ€ข Redis: Running on port 6379\n", + " โ€ข Agent Memory Server: Running on port 8088\n", + "\n", + "๐ŸŽฏ You can now run the notebooks!\n", + "\n", + "\n", + "โœ… All services are ready!\n" + ] + } + ], + "source": [ + "# Run the setup script to ensure Redis and Agent Memory Server are running\n", + "import subprocess\n", + "import sys\n", + "from pathlib import Path\n", + "\n", + "# Path to setup script\n", + "setup_script = Path(\"../../reference-agent/setup_agent_memory_server.py\")\n", + "\n", + "if setup_script.exists():\n", + " print(\"Running automated setup check...\\n\")\n", + " result = subprocess.run(\n", + " [sys.executable, str(setup_script)],\n", + " capture_output=True,\n", + " text=True\n", + " )\n", + " print(result.stdout)\n", + " if result.returncode != 0:\n", + " print(\"โš ๏ธ Setup check failed. Please review the output above.\")\n", + " print(result.stderr)\n", + " else:\n", + " print(\"\\nโœ… All services are ready!\")\n", + "else:\n", + " print(\"โš ๏ธ Setup script not found. Please ensure services are running manually.\")\n" + ] + }, + { + "cell_type": "markdown", + "id": "4f7ab2a448dd08fc", + "metadata": {}, + "source": [ + "---\n" + ] + }, + { + "cell_type": "markdown", + "id": "9dd8400bfed20f64", + "metadata": {}, + "source": [ + "### Install Dependencies\n", + "\n", + "If you haven't already installed the reference-agent package, uncomment and run the following:\n" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "62ad9f5d109351a", + "metadata": {}, + "outputs": [], + "source": [ + "# Uncomment to install reference-agent package\n", + "# %pip install -q -e ../../reference-agent\n", + "\n", + "# Uncomment to install agent-memory-client\n", + "# %pip install -q agent-memory-client\n" + ] + }, + { + "cell_type": "markdown", + "id": "b41bf6b02f73fdb9", + "metadata": {}, + "source": [ + "### Import Dependencies\n" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "b00247fc4bb718d6", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "โœ… All imports successful\n" + ] + } + ], + "source": [ + "# Standard library imports\n", + "import os\n", + "import time\n", + "import asyncio\n", + "from typing import List, Dict, Any, Optional, Tuple\n", + "from dataclasses import dataclass, field\n", + "from datetime import datetime\n", + "from pathlib import Path\n", + "\n", + "# LangChain\n", + "from langchain_openai import ChatOpenAI, OpenAIEmbeddings\n", + "from langchain_core.messages import BaseMessage, HumanMessage, AIMessage, SystemMessage\n", + "\n", + "# Redis and Agent Memory\n", + "from agent_memory_client import MemoryAPIClient, MemoryClientConfig\n", + "from agent_memory_client.models import WorkingMemory, MemoryMessage, ClientMemoryRecord\n", + "\n", + "# Token counting\n", + "import tiktoken\n", + "\n", + "# For visualization\n", + "from collections import defaultdict\n", + "\n", + "print(\"โœ… All imports successful\")\n" + ] + }, + { + "cell_type": "markdown", + "id": "38946d91e830639a", + "metadata": {}, + "source": [ + "### Load Environment Variables\n" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "41a3192aacee6dbf", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "โœ… Environment variables configured\n", + " Redis URL: redis://localhost:6379\n", + " Agent Memory URL: http://localhost:8088\n" + ] + } + ], + "source": [ + "from dotenv import load_dotenv\n", + "\n", + "# Load environment variables from reference-agent directory\n", + "env_path = Path(\"../../reference-agent/.env\")\n", + "load_dotenv(dotenv_path=env_path)\n", + "\n", + "# Verify required environment variables\n", + "OPENAI_API_KEY = os.getenv(\"OPENAI_API_KEY\")\n", + "REDIS_URL = os.getenv(\"REDIS_URL\", \"redis://localhost:6379\")\n", + "AGENT_MEMORY_URL = os.getenv(\"AGENT_MEMORY_URL\", \"http://localhost:8088\")\n", + "\n", + "if not OPENAI_API_KEY:\n", + " print(f\"\"\"โŒ OPENAI_API_KEY not found!\n", + "\n", + "Please create a .env file at: {env_path.absolute()}\n", + "\n", + "With the following content:\n", + "OPENAI_API_KEY=your_openai_api_key\n", + "REDIS_URL=redis://localhost:6379\n", + "AGENT_MEMORY_URL=http://localhost:8088\n", + "\"\"\")\n", + "else:\n", + " print(\"โœ… Environment variables configured\")\n", + " print(f\" Redis URL: {REDIS_URL}\")\n", + " print(f\" Agent Memory URL: {AGENT_MEMORY_URL}\")\n" + ] + }, + { + "cell_type": "markdown", + "id": "2f42157025d92c5", + "metadata": {}, + "source": [ + "### Initialize Clients\n" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "f6acdabe9f826582", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "โœ… Clients initialized\n", + " LLM: gpt-4o\n", + " Embeddings: text-embedding-3-small\n", + " Memory Server: http://localhost:8088\n" + ] + } + ], + "source": [ + "# Initialize LLM\n", + "llm = ChatOpenAI(\n", + " model=\"gpt-4o\",\n", + " temperature=0.7\n", + ")\n", + "\n", + "# Initialize embeddings\n", + "embeddings = OpenAIEmbeddings(\n", + " model=\"text-embedding-3-small\"\n", + ")\n", + "\n", + "# Initialize Agent Memory Client\n", + "memory_config = MemoryClientConfig(base_url=AGENT_MEMORY_URL)\n", + "memory_client = MemoryAPIClient(config=memory_config)\n", + "\n", + "# Initialize tokenizer for counting\n", + "tokenizer = tiktoken.encoding_for_model(\"gpt-4o\")\n", + "\n", + "def count_tokens(text: str) -> int:\n", + " \"\"\"Count tokens in text using tiktoken.\"\"\"\n", + " return len(tokenizer.encode(text))\n", + "\n", + "print(\"โœ… Clients initialized\")\n", + "print(f\" LLM: {llm.model_name}\")\n", + "print(f\" Embeddings: text-embedding-3-small\")\n", + "print(f\" Memory Server: {AGENT_MEMORY_URL}\")\n" + ] + }, + { + "cell_type": "markdown", + "id": "cb3c6e2d8cee7f21", + "metadata": {}, + "source": [ + "---\n", + "\n", + "## ๐Ÿ“Š Part 1: Understanding Conversation Growth\n", + "\n", + "Let's visualize how conversations grow and understand the implications.\n" + ] + }, + { + "cell_type": "markdown", + "id": "38b4a48ea4fee96b", + "metadata": {}, + "source": [ + "### ๐Ÿ”ฌ Research Context: Why Context Management Matters\n", + "\n", + "Modern LLMs have impressive context windows:\n", + "- **GPT-4o**: 128K tokens (~96,000 words)\n", + "- **Claude 3.5**: 200K tokens (~150,000 words)\n", + "- **Gemini 1.5 Pro**: 1M tokens (~750,000 words)\n", + "\n", + "**But here's the problem:** Larger context windows don't guarantee better performance.\n", + "\n", + "#### The \"Lost in the Middle\" Problem\n", + "\n", + "Research by Liu et al. (2023) in their paper [\"Lost in the Middle: How Language Models Use Long Contexts\"](https://arxiv.org/abs/2307.03172) revealed critical findings:\n", + "\n", + "**Key Finding #1: U-Shaped Performance**\n", + "- Models perform best when relevant information is at the **beginning** or **end** of context\n", + "- Performance **significantly degrades** when information is in the **middle** of long contexts\n", + "- This happens even with models explicitly designed for long contexts\n", + "\n", + "**Key Finding #2: Non-Uniform Degradation**\n", + "- It's not just about hitting token limits\n", + "- Quality degrades **even within the context window**\n", + "- The longer the context, the worse the \"middle\" performance becomes\n", + "\n", + "**Key Finding #3: More Context โ‰  Better Results**\n", + "- In some experiments, GPT-3.5 performed **worse** with retrieved documents than with no documents at all\n", + "- Adding more context can actually **hurt** performance if not managed properly\n", + "\n", + "**Why This Matters for Memory Management:**\n", + "- Simply storing all conversation history isn't optimal\n", + "- We need **intelligent compression** to keep important information accessible\n", + "- **Position matters**: Recent context (at the end) is naturally well-positioned\n", + "- **Quality over quantity**: Better to have concise, relevant context than exhaustive history\n", + "\n", + "**References:**\n", + "- Liu, N. F., Lin, K., Hewitt, J., Paranjape, A., Bevilacqua, M., Petroni, F., & Liang, P. (2023). Lost in the Middle: How Language Models Use Long Contexts. *Transactions of the Association for Computational Linguistics (TACL)*.\n" + ] + }, + { + "cell_type": "markdown", + "id": "9ff7e262cad76878", + "metadata": {}, + "source": [ + "### Demo 1: Token Growth Over Time\n", + "\n", + "Now let's see this problem in action by simulating conversation growth.\n", + "\n", + "#### Step 1: Define our system prompt and count its tokens\n", + "\n", + "**What:** Creating a system prompt and measuring its token count.\n", + "\n", + "**Why:** The system prompt is sent with EVERY request, so its size directly impacts costs. Understanding this baseline is crucial for budgeting.\n" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "id": "99edd1b0325093b", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "System prompt: 31 tokens\n" + ] + } + ], + "source": [ + "# System prompt (constant across all turns)\n", + "system_prompt = \"\"\"You are a helpful course advisor for Redis University.\n", + "Help students find courses, check prerequisites, and plan their schedule.\n", + "Be friendly, concise, and accurate.\"\"\"\n", + "\n", + "system_tokens = count_tokens(system_prompt)\n", + "\n", + "print(f\"System prompt: {system_tokens} tokens\")\n" + ] + }, + { + "cell_type": "markdown", + "id": "1a9e0cfece6beaf5", + "metadata": {}, + "source": [ + "#### Step 2: Simulate how tokens grow with each conversation turn\n", + "\n", + "**What:** Projecting token growth and costs across 1 to 200 conversation turns.\n", + "\n", + "**Why:** Visualizing the growth curve shows when conversations become expensive (>20K tokens) and helps you plan compression strategies. Notice how costs accelerate - this is the quadratic growth problem.\n" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "id": "117ca757272caef3", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "Conversation Growth Simulation:\n", + "================================================================================\n", + "Turn Messages Conv Tokens Total Tokens Cost ($) \n", + "--------------------------------------------------------------------------------\n", + "1 2 100 131 $0.0003 โœ…\n", + "5 10 500 531 $0.0013 โœ…\n", + "10 20 1,000 1,031 $0.0026 โœ…\n", + "20 40 2,000 2,031 $0.0051 โœ…\n", + "30 60 3,000 3,031 $0.0076 โœ…\n", + "50 100 5,000 5,031 $0.0126 โš ๏ธ\n", + "75 150 7,500 7,531 $0.0188 โš ๏ธ\n", + "100 200 10,000 10,031 $0.0251 โš ๏ธ\n", + "150 300 15,000 15,031 $0.0376 โš ๏ธ\n", + "200 400 20,000 20,031 $0.0501 โŒ\n" + ] + } + ], + "source": [ + "# Assume average message pair (user + assistant) = 100 tokens\n", + "avg_message_pair_tokens = 100\n", + "\n", + "print(\"\\nConversation Growth Simulation:\")\n", + "print(\"=\" * 80)\n", + "print(f\"{'Turn':<8} {'Messages':<10} {'Conv Tokens':<15} {'Total Tokens':<15} {'Cost ($)':<12}\")\n", + "print(\"-\" * 80)\n", + "\n", + "for turn in [1, 5, 10, 20, 30, 50, 75, 100, 150, 200]:\n", + " # Each turn = user message + assistant message\n", + " num_messages = turn * 2\n", + " conversation_tokens = num_messages * (avg_message_pair_tokens // 2)\n", + " total_tokens = system_tokens + conversation_tokens\n", + "\n", + " # Cost calculation (GPT-4o input: $0.0025 per 1K tokens)\n", + " cost_per_query = (total_tokens / 1000) * 0.0025\n", + "\n", + " # Visual indicator\n", + " if total_tokens < 5000:\n", + " indicator = \"โœ…\"\n", + " elif total_tokens < 20000:\n", + " indicator = \"โš ๏ธ\"\n", + " else:\n", + " indicator = \"โŒ\"\n", + "\n", + " print(f\"{turn:<8} {num_messages:<10} {conversation_tokens:<15,} {total_tokens:<15,} ${cost_per_query:<11.4f} {indicator}\")\n" + ] + }, + { + "cell_type": "markdown", + "id": "544c9c59a8e344be", + "metadata": {}, + "source": [ + "### Demo 2: Cost Analysis\n", + "\n", + "Let's calculate the cumulative cost of long conversations.\n", + "\n", + "**Why costs grow quadratically:**\n", + "- Turn 1: Process 100 tokens\n", + "- Turn 2: Process 200 tokens (includes turn 1)\n", + "- Turn 3: Process 300 tokens (includes turns 1 & 2)\n", + "- Turn N: Process Nร—100 tokens\n", + "\n", + "Total cost = 100 + 200 + 300 + ... + Nร—100 = **O(Nยฒ)** growth!\n", + "\n", + "#### Step 1: Create a function to calculate conversation costs\n", + "\n", + "**What:** Building a cost calculator that accounts for cumulative token processing.\n", + "\n", + "**Why:** Each turn processes ALL previous messages, so costs compound. This function reveals the true cost of long conversations - not just the final token count, but the sum of all API calls.\n" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "id": "998184e76d362bf3", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "โœ… Cost calculation function defined\n" + ] + } + ], + "source": [ + "def calculate_conversation_cost(num_turns: int, avg_tokens_per_turn: int = 100) -> Dict[str, float]:\n", + " \"\"\"\n", + " Calculate cost metrics for a conversation.\n", + "\n", + " Args:\n", + " num_turns: Number of conversation turns\n", + " avg_tokens_per_turn: Average tokens per turn (user + assistant)\n", + "\n", + " Returns:\n", + " Dictionary with cost metrics\n", + " \"\"\"\n", + " system_tokens = 50 # Simplified\n", + "\n", + " # Cumulative cost (each turn includes all previous messages)\n", + " cumulative_tokens = 0\n", + " cumulative_cost = 0.0\n", + "\n", + " for turn in range(1, num_turns + 1):\n", + " # Total tokens for this turn\n", + " conversation_tokens = turn * avg_tokens_per_turn\n", + " total_tokens = system_tokens + conversation_tokens\n", + "\n", + " # Cost for this turn (input tokens)\n", + " turn_cost = (total_tokens / 1000) * 0.0025\n", + " cumulative_cost += turn_cost\n", + " cumulative_tokens += total_tokens\n", + "\n", + " return {\n", + " \"num_turns\": num_turns,\n", + " \"final_tokens\": system_tokens + (num_turns * avg_tokens_per_turn),\n", + " \"cumulative_tokens\": cumulative_tokens,\n", + " \"cumulative_cost\": cumulative_cost,\n", + " \"avg_cost_per_turn\": cumulative_cost / num_turns\n", + " }\n", + "\n", + "print(\"โœ… Cost calculation function defined\")\n" + ] + }, + { + "cell_type": "markdown", + "id": "6710bd8b0268c34d", + "metadata": {}, + "source": [ + "#### Step 2: Compare costs across different conversation lengths\n", + "\n", + "**What:** Running cost projections for conversations from 10 to 200 turns.\n", + "\n", + "**Why:** Seeing the quadratic growth in action - a 200-turn conversation costs $1.26, but the cumulative cost across all turns is much higher. This motivates compression strategies.\n" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "id": "4441a3298bd38af8", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Cost Analysis for Different Conversation Lengths:\n", + "================================================================================\n", + "Turns Final Tokens Cumulative Tokens Total Cost Avg/Turn\n", + "--------------------------------------------------------------------------------\n", + "10 1,050 6,000 $0.02 $0.0015\n", + "25 2,550 33,750 $0.08 $0.0034\n", + "50 5,050 130,000 $0.33 $0.0065\n", + "100 10,050 510,000 $1.27 $0.0127\n", + "200 20,050 2,020,000 $5.05 $0.0253\n" + ] + } + ], + "source": [ + "print(\"Cost Analysis for Different Conversation Lengths:\")\n", + "print(\"=\" * 80)\n", + "print(f\"{'Turns':<10} {'Final Tokens':<15} {'Cumulative Tokens':<20} {'Total Cost':<15} {'Avg/Turn'}\")\n", + "print(\"-\" * 80)\n", + "\n", + "for num_turns in [10, 25, 50, 100, 200]:\n", + " metrics = calculate_conversation_cost(num_turns)\n", + " print(f\"{metrics['num_turns']:<10} \"\n", + " f\"{metrics['final_tokens']:<15,} \"\n", + " f\"{metrics['cumulative_tokens']:<20,} \"\n", + " f\"${metrics['cumulative_cost']:<14.2f} \"\n", + " f\"${metrics['avg_cost_per_turn']:.4f}\")\n" + ] + }, + { + "cell_type": "markdown", + "id": "df5840eedf4a9185", + "metadata": {}, + "source": [ + "#### Key Takeaways\n", + "\n", + "**Without memory management:**\n", + "- Costs grow **quadratically** (O(Nยฒ))\n", + " \n", + "- A 100-turn conversation costs ~$1.50 in total\n", + "\n", + " \n", + "- A 200-turn conversation costs ~$6.00 in total\n", + "\n", + "- At scale (1000s of users), this becomes unsustainable\n", + "\n", + "**The solution:** Intelligent memory management to keep conversations within budget while preserving quality.\n" + ] + }, + { + "cell_type": "markdown", + "id": "5a7f1c4414f6d2a7", + "metadata": {}, + "source": [ + "---\n", + "\n", + "## ๐ŸŽฏ Part 2: Context Summarizaton\n", + "\n", + "**Context summarization** is the process of condensing conversation history into a compact representation that preserves essential information while dramatically reducing token count.\n", + "\n", + "Picture a chat assistant helping someone plan a wedding over 50 messages:\n", + "- It captures the critical stuff: venue choice, budget, guest count, vendor decisions\n", + "- It grabs the decisions and ditches the small talk\n", + "- Later messages can reference \"the venue we picked\" without replaying the entire debate\n", + " \n", + "**Same deal with LLM chats:**\n", + "- Squash ancient messages into a tight little paragraph\n", + "- Keep the gold (facts, choices, what the user loves/hates)\n", + "- Leave fresh messages untouched (they're still doing work)\n", + "- Slash token usage by 50-80% without lobotomizing the conversation\n", + "\n", + "### Why Should You Care About Summarization?\n", + "\n", + "Summarization tackles three gnarly problems:\n", + "\n", + "**1. Plays Nice With Token Caps (Callback to Part 1)**\n", + "- Chats balloon up forever if you let them\n", + "- Summarization keeps you from hitting the ceiling\n", + "- **Real talk:** 50 messages (10K tokens) โ†’ Compressed summary + 4 fresh messages (2.5K tokens)\n", + "\n", + "**2. Fixes the Context Rot Problem (Also From Part 1)**\n", + "- Remember that \"Lost in the Middle\" mess? Old info gets buried and ignored\n", + "- Summarization yanks that old stuff to the front in condensed form\n", + "- Fresh messages chill at the end (where the model actually pays attention)\n", + "- **Upshot:** Model performs better AND you save spaceโ€”win-win\n", + "\n", + "**3. Keeps Working Memory From Exploding (Throwback to Notebook 1)**\n", + "- Working memory = your conversation backlog\n", + "- Without summarization, it just keeps growing like a digital hoarder's closet\n", + "- Summarization gives it a haircut regularly\n", + "- **Payoff:** Conversations that can actually go the distance\n", + "\n", + "### When Should You Reach for This Tool?\n", + "\n", + "**Great for:**\n", + "- โœ… Marathon conversations (10+ back-and-forths)\n", + "- โœ… Chats that have a narrative arc (customer support, coaching sessions)\n", + "- โœ… Situations where you want history but not ALL the history\n", + "- โœ… When the recent stuff matters most\n", + "\n", + "**Skip it when:**\n", + "- โŒ Quick exchanges (under 5 turnsโ€”don't overthink it)\n", + "- โŒ Every syllable counts (legal docs, medical consultations)\n", + "- โŒ You might need verbatim quotes from way back\n", + "- โŒ The extra LLM call for summarization costs too much time or money\n", + "\n", + "### Where Summarization Lives in Your Memory Stack\n", + "```\n", + "โ”Œโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”\n", + "โ”‚ Your LLM Agent Brain โ”‚\n", + "โ”‚ โ”‚\n", + "โ”‚ Context Window (128K tokens available) โ”‚\n", + "โ”‚ โ”Œโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ” โ”‚\n", + "โ”‚ โ”‚ 1. System Prompt (500 tokens) โ”‚ โ”‚\n", + "โ”‚ โ”‚ 2. Long-term Memory Bank (1,000 tokens) โ”‚ โ”‚\n", + "โ”‚ โ”‚ 3. RAG Retrieval Stuff (2,000 tokens) โ”‚ โ”‚\n", + "โ”‚ โ”‚ 4. Working Memory Zone: โ”‚ โ”‚\n", + "โ”‚ โ”‚ โ”Œโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ” โ”‚ โ”‚\n", + "โ”‚ โ”‚ โ”‚ [COMPRESSED HISTORY] (500 tokens) โ”‚ โ”‚ โ”‚\n", + "โ”‚ โ”‚ โ”‚ - Critical facts from rounds 1-20 โ”‚ โ”‚ โ”‚\n", + "โ”‚ โ”‚ โ”‚ - Decisions that were locked in โ”‚ โ”‚ โ”‚\n", + "โ”‚ โ”‚ โ”‚ - User quirks and preferences โ”‚ โ”‚ โ”‚\n", + "โ”‚ โ”‚ โ””โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”˜ โ”‚ โ”‚\n", + "โ”‚ โ”‚ Live Recent Messages (1,000 tokens) โ”‚ โ”‚\n", + "โ”‚ โ”‚ - Round 21: User shot + Assistant reply โ”‚ โ”‚\n", + "โ”‚ โ”‚ - Round 22: User shot + Assistant reply โ”‚ โ”‚\n", + "โ”‚ โ”‚ - Round 23: User shot + Assistant reply โ”‚ โ”‚\n", + "โ”‚ โ”‚ - Round 24: User shot + Assistant reply โ”‚ โ”‚\n", + "โ”‚ โ”‚ 5. Current Incoming Query (200 tokens) โ”‚ โ”‚\n", + "โ”‚ โ””โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”˜ โ”‚\n", + "โ”‚ โ”‚\n", + "โ”‚ Running total: ~5,200 tokens (instead of 15Kโ€”nice!) โ”‚\n", + "โ””โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”˜\n", + "```\n", + "\n", + "#### The Bottom Line: \n", + "Summarization is a *compression technique* for working memory that maintains conversation continuity while keeping token counts manageable." + ] + }, + { + "cell_type": "markdown", + "id": "3d6a9c3a31a589d0", + "metadata": {}, + "source": [ + "### ๐Ÿ”ฌ Research Foundation: Recursive Summarization\n", + "\n", + "Wang et al. (2023) in [\"Recursively Summarizing Enables Long-Term Dialogue Memory in Large Language Models\"](https://arxiv.org/abs/2308.15022) demonstrated that:\n", + "\n", + "**Key Insight:** Recursive summarization enables LLMs to handle extremely long conversations by:\n", + "1. Memorizing small dialogue contexts\n", + "2. Recursively producing new memory using previous memory + new contexts\n", + "3. Maintaining consistency across long conversations\n", + "\n", + "**Their findings:**\n", + "- Improved response consistency in long-context conversations\n", + "- Works well with both long-context models (8K, 16K) and retrieval-enhanced LLMs\n", + "- Provides a practical solution for modeling extremely long contexts\n", + "\n", + "**Practical Application:**\n", + "- Summarize old messages while keeping recent ones intact\n", + "- Preserve key information (facts, decisions, preferences)\n", + "- Compress redundant or less important information\n", + "\n", + "**References:**\n", + "- Wang, Q., Fu, Y., Cao, Y., Wang, S., Tian, Z., & Ding, L. (2023). Recursively Summarizing Enables Long-Term Dialogue Memory in Large Language Models. *Neurocomputing* (Accepted).\n" + ] + }, + { + "cell_type": "markdown", + "id": "80bbd6185d7e1fd4", + "metadata": {}, + "source": [ + "### Theory: What to Preserve vs. Compress\n", + "\n", + "When summarizing conversations, we need to be strategic about what to keep and what to compress.\n", + "\n", + "**What to Preserve:**\n", + "- โœ… Key facts and decisions\n", + "- โœ… Student preferences and goals\n", + "- โœ… Important course recommendations\n", + "- โœ… Prerequisites and requirements\n", + "- โœ… Recent context (last few messages)\n", + "\n", + "**What to Compress:**\n", + "- ๐Ÿ“ฆ Small talk and greetings\n", + "- ๐Ÿ“ฆ Redundant information\n", + "- ๐Ÿ“ฆ Old conversation details\n", + "- ๐Ÿ“ฆ Resolved questions\n", + "\n", + "**When to Summarize:**\n", + "- Token threshold exceeded (e.g., > 2000 tokens)\n", + "- Message count threshold exceeded (e.g., > 10 messages)\n", + "- Time-based (e.g., after 1 hour)\n", + "- Manual trigger\n" + ] + }, + { + "cell_type": "markdown", + "id": "23b8486d8bc89f7b", + "metadata": {}, + "source": [ + "### Building Summarization Step-by-Step\n", + "\n", + "Let's build our summarization system incrementally, starting with simple components.\n", + "\n", + "#### Step 1: Create a data structure for conversation messages\n", + "\n", + "**What we're building:** A data structure to represent individual messages with metadata.\n", + "\n", + "**Why it's needed:** We need to track not just the message content, but also:\n", + "- Who sent it (user, assistant, system)\n", + "- When it was sent (timestamp)\n", + "- How many tokens it uses (for threshold checks)\n", + "\n", + "**How it works:** Python's `@dataclass` decorator creates a clean, type-safe structure with automatic initialization and token counting.\n" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "id": "3db188fb9f01d750", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "โœ… ConversationMessage dataclass defined\n", + " Example - Role: user, Tokens: 9\n" + ] + } + ], + "source": [ + "@dataclass\n", + "class ConversationMessage:\n", + " \"\"\"Represents a single conversation message.\"\"\"\n", + " role: str # \"user\", \"assistant\", \"system\"\n", + " content: str\n", + " timestamp: float = field(default_factory=time.time)\n", + " token_count: Optional[int] = None\n", + "\n", + " def __post_init__(self):\n", + " if self.token_count is None:\n", + " self.token_count = count_tokens(self.content)\n", + "\n", + "# Test it\n", + "test_msg = ConversationMessage(\n", + " role=\"user\",\n", + " content=\"What courses do you recommend for machine learning?\"\n", + ")\n", + "print(f\"โœ… ConversationMessage dataclass defined\")\n", + "print(f\" Example - Role: {test_msg.role}, Tokens: {test_msg.token_count}\")\n" + ] + }, + { + "cell_type": "markdown", + "id": "5d49f8f61e276661", + "metadata": {}, + "source": [ + "#### Step 2: Create a function to check if summarization is needed\n", + "\n", + "**What we're building:** A decision function that determines when to trigger summarization.\n", + "\n", + "**Why it's needed:** We don't want to summarize too early (loses context) or too late (hits token limits). We need smart thresholds.\n", + "\n", + "**How it works:**\n", + "- Checks if we have enough messages to make summarization worthwhile\n", + "- Calculates total token count across all messages\n", + "- Returns `True` if either threshold (tokens OR messages) is exceeded\n", + "- Ensures we keep at least `keep_recent` messages unsummarized\n", + "\n", + "**When to summarize:**\n", + "- Token threshold: Prevents hitting model limits (e.g., >2000 tokens)\n", + "- Message threshold: Prevents conversation from getting too long (e.g., >10 messages)\n", + "- Keep recent: Preserves the most relevant context (e.g., last 4 messages)\n" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "id": "290935fa536cb8aa", + "metadata": {}, + "outputs": [], + "source": [ + "def should_summarize(\n", + " messages: List[ConversationMessage],\n", + " token_threshold: int = 2000,\n", + " message_threshold: int = 10,\n", + " keep_recent: int = 4\n", + ") -> bool:\n", + " \"\"\"\n", + " Determine if conversation needs summarization.\n", + "\n", + " Args:\n", + " messages: List of conversation messages\n", + " token_threshold: Summarize when total tokens exceed this\n", + " message_threshold: Summarize when message count exceeds this\n", + " keep_recent: Number of recent messages to keep unsummarized\n", + "\n", + " Returns:\n", + " True if summarization is needed\n", + " \"\"\"\n", + " # Don't summarize if we have very few messages\n", + " if len(messages) <= keep_recent:\n", + " return False\n", + "\n", + " # Calculate total tokens\n", + " total_tokens = sum(msg.token_count for msg in messages)\n", + "\n", + " # Summarize if either threshold is exceeded\n", + " return (total_tokens > token_threshold or\n", + " len(messages) > message_threshold)\n" + ] + }, + { + "cell_type": "markdown", + "id": "37993b003426e127", + "metadata": {}, + "source": [ + "#### Step 3: Create a prompt template for summarization\n", + "\n", + "**What we're building:** A carefully crafted prompt that instructs the LLM on how to summarize conversations.\n", + "\n", + "**Why it's needed:** Generic summarization loses important details. We need domain-specific instructions that preserve what matters for course advisory conversations.\n", + "\n", + "**How it works:**\n", + "- Specifies the context (student-advisor conversation)\n", + "- Lists exactly what to preserve (decisions, requirements, goals, courses, issues)\n", + "- Requests structured output (bullet points for clarity)\n", + "- Emphasizes being \"specific and actionable\" (not vague summaries)\n", + "\n", + "**Design principle:** The prompt template is the \"instructions\" for the summarization LLM. Better instructions = better summaries.\n" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "id": "3a39408752c4a504", + "metadata": {}, + "outputs": [], + "source": [ + "summarization_prompt_template = \"\"\"You are summarizing a conversation between a student and a course advisor.\n", + "\n", + "Create a concise summary that preserves:\n", + "1. Key decisions made\n", + "2. Important requirements or prerequisites discussed\n", + "3. Student's goals, preferences, and constraints\n", + "4. Specific courses mentioned and recommendations given\n", + "5. Any problems or issues that need follow-up\n", + "\n", + "Format as bullet points. Be specific and actionable.\n", + "\n", + "Conversation to summarize:\n", + "{conversation}\n", + "\n", + "Summary:\"\"\"\n" + ] + }, + { + "cell_type": "markdown", + "id": "2bca0c3b7f31459f", + "metadata": {}, + "source": [ + "#### Step 4: Create a function to generate summaries using the LLM\n", + "\n", + "**What we're building:** A function that takes messages and produces an intelligent summary using an LLM.\n", + "\n", + "**Why it's needed:** This is where the actual summarization happens. We need to:\n", + "- Format the conversation for the LLM\n", + "- Call the LLM with our prompt template\n", + "- Package the summary as a system message\n", + "\n", + "**How it works:**\n", + "1. Formats messages as \"User: ...\" and \"Assistant: ...\" text\n", + "2. Inserts formatted conversation into the prompt template\n", + "3. Calls the LLM asynchronously (non-blocking)\n", + "4. Wraps the summary in `[CONVERSATION SUMMARY]` marker for easy identification\n", + "5. Returns as a system message (distinguishes it from user/assistant messages)\n", + "\n", + "**Why async?** Summarization can take 1-3 seconds. Async allows other operations to continue while waiting for the LLM response.\n" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "id": "8b41ae7eb2d88f5a", + "metadata": {}, + "outputs": [], + "source": [ + "async def create_summary(\n", + " messages: List[ConversationMessage],\n", + " llm: ChatOpenAI\n", + ") -> ConversationMessage:\n", + " \"\"\"\n", + " Create intelligent summary of conversation messages.\n", + "\n", + " Args:\n", + " messages: List of messages to summarize\n", + " llm: Language model for generating summary\n", + "\n", + " Returns:\n", + " ConversationMessage containing the summary\n", + " \"\"\"\n", + " # Format conversation for summarization\n", + " conversation_text = \"\\n\".join([\n", + " f\"{msg.role.title()}: {msg.content}\"\n", + " for msg in messages\n", + " ])\n", + "\n", + " # Generate summary using LLM\n", + " prompt = summarization_prompt_template.format(conversation=conversation_text)\n", + " response = await llm.ainvoke([HumanMessage(content=prompt)])\n", + "\n", + " summary_content = f\"[CONVERSATION SUMMARY]\\n{response.content}\"\n", + "\n", + " # Create summary message\n", + " summary_msg = ConversationMessage(\n", + " role=\"system\",\n", + " content=summary_content,\n", + " timestamp=messages[-1].timestamp\n", + " )\n", + "\n", + " return summary_msg\n" + ] + }, + { + "cell_type": "markdown", + "id": "56eb87c914424cd", + "metadata": {}, + "source": [ + "#### Step 5: Create a function to compress conversations\n", + "\n", + "**What we're building:** The main compression function that orchestrates the entire summarization process.\n", + "\n", + "**Why it's needed:** This ties together all the previous components into a single, easy-to-use function that:\n", + "- Decides whether to summarize\n", + "- Splits messages into old vs. recent\n", + "- Generates the summary\n", + "- Returns the compressed conversation\n", + "\n", + "**How it works:**\n", + "1. **Check:** Calls `should_summarize()` to see if compression is needed\n", + "2. **Split:** Divides messages into `old_messages` (to summarize) and `recent_messages` (to keep)\n", + "3. **Summarize:** Calls `create_summary()` on old messages\n", + "4. **Combine:** Returns `[summary] + recent_messages`\n", + "\n", + "**The result:** A conversation that's 50-80% smaller but preserves all essential information.\n", + "\n", + "**Example:**\n", + "- Input: 20 messages (4,000 tokens)\n", + "- Output: 1 summary + 4 recent messages (1,200 tokens)\n", + "- Savings: 70% reduction in tokens\n" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "id": "4b904a38b1bad2b9", + "metadata": {}, + "outputs": [], + "source": [ + "async def compress_conversation(\n", + " messages: List[ConversationMessage],\n", + " llm: ChatOpenAI,\n", + " token_threshold: int = 2000,\n", + " message_threshold: int = 10,\n", + " keep_recent: int = 4\n", + ") -> List[ConversationMessage]:\n", + " \"\"\"\n", + " Compress conversation by summarizing old messages and keeping recent ones.\n", + "\n", + " Args:\n", + " messages: List of conversation messages\n", + " llm: Language model for generating summaries\n", + " token_threshold: Summarize when total tokens exceed this\n", + " message_threshold: Summarize when message count exceeds this\n", + " keep_recent: Number of recent messages to keep unsummarized\n", + "\n", + " Returns:\n", + " List of messages: [summary] + [recent messages]\n", + " \"\"\"\n", + " # Check if summarization is needed\n", + " if not should_summarize(messages, token_threshold, message_threshold, keep_recent):\n", + " return messages\n", + "\n", + " # Split into old and recent\n", + " old_messages = messages[:-keep_recent]\n", + " recent_messages = messages[-keep_recent:]\n", + "\n", + " if not old_messages:\n", + " return messages\n", + "\n", + " # Summarize old messages\n", + " summary = await create_summary(old_messages, llm)\n", + "\n", + " # Return summary + recent messages\n", + " return [summary] + recent_messages\n" + ] + }, + { + "cell_type": "markdown", + "id": "668fce6b8d81c302", + "metadata": {}, + "source": [ + "#### Step 6: Combine into a reusable class\n", + "\n", + "Now that we've built and tested each component, let's combine them into a reusable class.\n" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "id": "8324715c96096689", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "โœ… Summarization system built:\n", + " - ConversationMessage dataclass\n", + " - should_summarize() function\n", + " - Summarization prompt template\n", + " - create_summary() function\n", + " - compress_conversation() function\n", + " - ConversationSummarizer class\n" + ] + } + ], + "source": [ + "class ConversationSummarizer:\n", + " \"\"\"Manages conversation summarization to keep token counts manageable.\"\"\"\n", + "\n", + " def __init__(\n", + " self,\n", + " llm: ChatOpenAI,\n", + " token_threshold: int = 2000,\n", + " message_threshold: int = 10,\n", + " keep_recent: int = 4\n", + " ):\n", + " \"\"\"\n", + " Initialize the summarizer.\n", + "\n", + " Args:\n", + " llm: Language model for generating summaries\n", + " token_threshold: Summarize when total tokens exceed this\n", + " message_threshold: Summarize when message count exceeds this\n", + " keep_recent: Number of recent messages to keep unsummarized\n", + " \"\"\"\n", + " self.llm = llm\n", + " self.token_threshold = token_threshold\n", + " self.message_threshold = message_threshold\n", + " self.keep_recent = keep_recent\n", + " self.summarization_prompt = summarization_prompt_template\n", + "\n", + " def should_summarize(self, messages: List[ConversationMessage]) -> bool:\n", + " \"\"\"Determine if conversation needs summarization.\"\"\"\n", + " return should_summarize(\n", + " messages,\n", + " self.token_threshold,\n", + " self.message_threshold,\n", + " self.keep_recent\n", + " )\n", + "\n", + " async def summarize_conversation(\n", + " self,\n", + " messages: List[ConversationMessage]\n", + " ) -> ConversationMessage:\n", + " \"\"\"Create intelligent summary of conversation messages.\"\"\"\n", + " return await create_summary(messages, self.llm)\n", + "\n", + " async def compress_conversation(\n", + " self,\n", + " messages: List[ConversationMessage]\n", + " ) -> List[ConversationMessage]:\n", + " \"\"\"Compress conversation by summarizing old messages and keeping recent ones.\"\"\"\n", + " return await compress_conversation(\n", + " messages,\n", + " self.llm,\n", + " self.token_threshold,\n", + " self.message_threshold,\n", + " self.keep_recent\n", + " )\n", + "\n", + "print(\"\"\"โœ… Summarization system built:\n", + " - ConversationMessage dataclass\n", + " - should_summarize() function\n", + " - Summarization prompt template\n", + " - create_summary() function\n", + " - compress_conversation() function\n", + " - ConversationSummarizer class\"\"\")\n" + ] + }, + { + "cell_type": "markdown", + "id": "beb98376eb2b00b0", + "metadata": {}, + "source": [ + "### Demo 3: Test Summarization\n", + "\n", + "Let's test the summarizer with a sample conversation.\n", + "\n", + "#### Step 1: Create a sample conversation\n", + "\n", + "**What:** Creating a realistic 14-message conversation about course planning.\n", + "\n", + "**Why:** We need a conversation long enough to trigger summarization (>10 messages, >500 tokens) so we can see the compression in action.\n" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "id": "3e63fdaf5a2a2587", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Original conversation:\n", + " Messages: 16\n", + " Total tokens: 261\n", + " Average tokens per message: 16.3\n" + ] + } + ], + "source": [ + "# Create a sample long conversation\n", + "sample_conversation = [\n", + " ConversationMessage(\"user\", \"Hi, I'm interested in learning about machine learning courses\"),\n", + " ConversationMessage(\"assistant\", \"Great! Redis University offers several ML courses. CS401 Machine Learning is our flagship course. It covers supervised learning, neural networks, and practical applications.\"),\n", + " ConversationMessage(\"user\", \"What are the prerequisites for CS401?\"),\n", + " ConversationMessage(\"assistant\", \"CS401 requires CS201 Data Structures and MATH301 Linear Algebra. Have you completed these courses?\"),\n", + " ConversationMessage(\"user\", \"I've completed CS101 but not CS201 yet\"),\n", + " ConversationMessage(\"assistant\", \"Perfect! CS201 is the next logical step. It covers algorithms and data structures essential for ML. It's offered every semester.\"),\n", + " ConversationMessage(\"user\", \"How difficult is MATH301?\"),\n", + " ConversationMessage(\"assistant\", \"MATH301 is moderately challenging. It covers vectors, matrices, and eigenvalues used in ML algorithms. Most students find it manageable with consistent practice.\"),\n", + " ConversationMessage(\"user\", \"Can I take both CS201 and MATH301 together?\"),\n", + " ConversationMessage(\"assistant\", \"Yes, that's a good combination! They complement each other well. Many students take them concurrently.\"),\n", + " ConversationMessage(\"user\", \"What about CS401 after that?\"),\n", + " ConversationMessage(\"assistant\", \"CS401 is perfect after completing both prerequisites. It's our most popular AI course with hands-on projects.\"),\n", + " ConversationMessage(\"user\", \"When is CS401 offered?\"),\n", + " ConversationMessage(\"assistant\", \"CS401 is offered in Fall and Spring semesters. The Fall section typically fills up quickly, so register early!\"),\n", + " ConversationMessage(\"user\", \"Great! What's the workload like?\"),\n", + " ConversationMessage(\"assistant\", \"CS401 requires about 10-12 hours per week including lectures, assignments, and projects. There are 4 major projects throughout the semester.\"),\n", + "]\n", + "\n", + "# Calculate original metrics\n", + "original_token_count = sum(msg.token_count for msg in sample_conversation)\n", + "print(f\"Original conversation:\")\n", + "print(f\" Messages: {len(sample_conversation)}\")\n", + "print(f\" Total tokens: {original_token_count}\")\n", + "print(f\" Average tokens per message: {original_token_count / len(sample_conversation):.1f}\")\n" + ] + }, + { + "cell_type": "markdown", + "id": "b824592502d5305", + "metadata": {}, + "source": [ + "#### Step 2: Configure the summarizer\n", + "\n", + "**What:** Setting up the `ConversationSummarizer` with specific thresholds.\n", + "\n", + "**Why:** We use a low token threshold (500) to force summarization on our sample conversation. In production, you'd use higher thresholds (2000-4000 tokens).\n" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "id": "1f1cd42e5cb65a39", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Summarizer configuration:\n", + " Token threshold: 500\n", + " Message threshold: 10\n", + " Keep recent: 4\n" + ] + } + ], + "source": [ + "# Test summarization\n", + "summarizer = ConversationSummarizer(\n", + " llm=llm,\n", + " token_threshold=500, # Low threshold for demo\n", + " message_threshold=10,\n", + " keep_recent=4\n", + ")\n", + "\n", + "print(f\"Summarizer configuration:\")\n", + "print(f\" Token threshold: {summarizer.token_threshold}\")\n", + "print(f\" Message threshold: {summarizer.message_threshold}\")\n", + "print(f\" Keep recent: {summarizer.keep_recent}\")\n" + ] + }, + { + "cell_type": "markdown", + "id": "ce7b283d8917e353", + "metadata": {}, + "source": [ + "#### Step 3: Check if summarization is needed\n", + "\n", + "**What:** Testing the `should_summarize()` logic.\n", + "\n", + "**Why:** Before compressing, we verify that our conversation actually exceeds the thresholds. This demonstrates the decision logic in action.\n" + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "id": "96d60c07d558dbe2", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Should summarize? True\n" + ] + } + ], + "source": [ + "# Check if summarization is needed\n", + "should_summarize_result = summarizer.should_summarize(sample_conversation)\n", + "print(f\"Should summarize? {should_summarize_result}\")\n" + ] + }, + { + "cell_type": "markdown", + "id": "956554c8c979d1a4", + "metadata": {}, + "source": [ + "#### Step 4: Compress the conversation\n", + "\n", + "**What:** Running the full compression pipeline: summarize old messages, keep recent ones.\n", + "\n", + "**Why:** This is the core functionality - transforming 14 messages into a summary + 4 recent messages, dramatically reducing token count while preserving key information.\n" + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "id": "3566e3ee779cc9b6", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "After summarization:\n", + " Messages: 5\n", + " Total tokens: 292\n", + " Token savings: -31 (-11.9%)\n" + ] + } + ], + "source": [ + "# Compress the conversation\n", + "compressed = await summarizer.compress_conversation(sample_conversation)\n", + "\n", + "compressed_token_count = sum(msg.token_count for msg in compressed)\n", + "token_savings = original_token_count - compressed_token_count\n", + "savings_percentage = (token_savings / original_token_count) * 100\n", + "\n", + "print(f\"After summarization:\")\n", + "print(f\" Messages: {len(compressed)}\")\n", + "print(f\" Total tokens: {compressed_token_count}\")\n", + "print(f\" Token savings: {token_savings} ({savings_percentage:.1f}%)\")\n" + ] + }, + { + "cell_type": "markdown", + "id": "ee85f81eedf9cae1", + "metadata": {}, + "source": [ + "#### Step 5: Examine the compressed conversation structure\n" + ] + }, + { + "cell_type": "code", + "execution_count": 20, + "id": "82e6fb297080ad8", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Compressed conversation structure:\n", + " 1. ๐Ÿ“‹ [system] [CONVERSATION SUMMARY] - **Key Decisions Made:** - The student plans to enroll...\n", + " Tokens: 228\n", + " 2. ๐Ÿ‘ค [user] When is CS401 offered?...\n", + " Tokens: 6\n", + " 3. ๐Ÿค– [assistant] CS401 is offered in Fall and Spring semesters. The Fall section typically fills ...\n", + " Tokens: 22\n", + " 4. ๐Ÿ‘ค [user] Great! What's the workload like?...\n", + " Tokens: 7\n", + " 5. ๐Ÿค– [assistant] CS401 requires about 10-12 hours per week including lectures, assignments, and p...\n", + " Tokens: 29\n" + ] + } + ], + "source": [ + "print(\"Compressed conversation structure:\")\n", + "for i, msg in enumerate(compressed):\n", + " role_icon = \"๐Ÿ“‹\" if msg.role == \"system\" else \"๐Ÿ‘ค\" if msg.role == \"user\" else \"๐Ÿค–\"\n", + " content_preview = msg.content[:80].replace('\\n', ' ')\n", + " print(f\" {i+1}. {role_icon} [{msg.role}] {content_preview}...\")\n", + " print(f\" Tokens: {msg.token_count}\")\n" + ] + }, + { + "cell_type": "markdown", + "id": "4cb252a2997a22ba", + "metadata": {}, + "source": [ + "#### Results Analysis\n", + "\n", + "**What happened:**\n", + "- Original: 16 messages with ~{original_token_count} tokens\n", + "- Compressed: {len(compressed)} messages (1 summary + 4 recent)\n", + "- Savings: ~{savings_percentage:.0f}% token reduction\n", + "\n", + "**Key benefits:**\n", + "- Preserved recent context (last 4 messages)\n", + "- Summarized older messages into key facts\n", + "- Maintained conversation continuity\n", + "- Reduced token costs significantly\n" + ] + }, + { + "cell_type": "markdown", + "id": "a896bce27c392ee9", + "metadata": {}, + "source": [ + "---\n", + "\n", + "## ๐Ÿ”ง Part 3: Context Compression Strategies\n", + "\n", + "In Part 2, we built a complete summarization system using LLMs to compress conversation history. But summarization isn't the only way to manage context - and it's not always optimal.\n", + "\n", + "Let's explore **four different compression strategies** and understand when to use each one:\n", + "\n", + "1. **Truncation** - Token-aware, keeps recent messages within budget\n", + "2. **Sliding Window** - Message-aware, maintains fixed window size\n", + "3. **Priority-Based** - Intelligent selection without LLM calls\n", + "4. **Summarization** - High quality compression using LLM (from Part 2)\n", + "\n", + "Each strategy has different trade-offs in **speed**, **cost**, and **quality**. By the end of this part, you'll know how to choose the right strategy for your use case.\n" + ] + }, + { + "cell_type": "markdown", + "id": "bbe2737aeb03474", + "metadata": {}, + "source": [ + "### Theory: Four Compression Approaches\n", + "\n", + "Let's explore four different strategies, each with different trade-offs:\n", + "\n", + "**1. Truncation (Token-Aware)**\n", + "- Keep recent messages within token budget\n", + "- โœ… Pros: Fast, no LLM calls, respects context limits\n", + "- โŒ Cons: Variable message count, loses old context\n", + "- **Best for:** Token-constrained applications, API limits\n", + "\n", + "**2. Sliding Window (Message-Aware)**\n", + "- Keep exactly N most recent messages\n", + "- โœ… Pros: Fastest, predictable count, constant memory\n", + "- โŒ Cons: May exceed token limits, loses old context\n", + "- **Best for:** Fixed-size buffers, real-time chat\n", + "\n", + "**3. Priority-Based (Balanced)**\n", + "- Score messages by importance, keep highest-scoring\n", + "- โœ… Pros: Preserves important context, no LLM calls\n", + "- โŒ Cons: Requires good scoring logic, may lose temporal flow\n", + "- **Best for:** Production applications needing balance\n", + "\n", + "**4. Summarization (High Quality)**\n", + "- Use LLM to create intelligent summaries\n", + "- โœ… Pros: Preserves meaning, high quality\n", + "- โŒ Cons: Slower, costs tokens, requires LLM call\n", + "- **Best for:** High-value conversations, quality-critical applications\n" + ] + }, + { + "cell_type": "markdown", + "id": "2bb5f28d6ed343f6", + "metadata": {}, + "source": [ + "### Building Compression Strategies Step-by-Step\n", + "\n", + "Let's build each strategy incrementally, starting with the simplest.\n", + "\n", + "#### Step 1: Define a base interface for compression strategies\n" + ] + }, + { + "cell_type": "code", + "execution_count": 21, + "id": "7b053a7b2c242989", + "metadata": {}, + "outputs": [], + "source": [ + "class CompressionStrategy:\n", + " \"\"\"Base class for compression strategies.\"\"\"\n", + "\n", + " def compress(\n", + " self,\n", + " messages: List[ConversationMessage],\n", + " max_tokens: int\n", + " ) -> List[ConversationMessage]:\n", + " \"\"\"Compress messages to fit within max_tokens.\"\"\"\n", + " raise NotImplementedError\n" + ] + }, + { + "cell_type": "markdown", + "id": "e23ab8bf105c70aa", + "metadata": {}, + "source": [ + "#### Step 2: Implement Truncation Strategy (Simplest)\n", + "\n", + "This strategy simply keeps the most recent messages that fit within the token budget.\n" + ] + }, + { + "cell_type": "code", + "execution_count": 22, + "id": "cf8c2576cad8bfc4", + "metadata": {}, + "outputs": [], + "source": [ + "class TruncationStrategy(CompressionStrategy):\n", + " \"\"\"Keep only the most recent messages within token budget.\"\"\"\n", + "\n", + " def compress(\n", + " self,\n", + " messages: List[ConversationMessage],\n", + " max_tokens: int\n", + " ) -> List[ConversationMessage]:\n", + " \"\"\"Keep most recent messages within token budget.\"\"\"\n", + " compressed = []\n", + " total_tokens = 0\n", + "\n", + " # Work backwards from most recent\n", + " for msg in reversed(messages):\n", + " if total_tokens + msg.token_count <= max_tokens:\n", + " compressed.insert(0, msg)\n", + " total_tokens += msg.token_count\n", + " else:\n", + " break\n", + "\n", + " return compressed\n" + ] + }, + { + "cell_type": "markdown", + "id": "8fcd84d939f70075", + "metadata": {}, + "source": [ + "#### Step 2.5: Implement Sliding Window Strategy (Simplest)\n", + "\n", + "**What we're building:** A strategy that maintains a fixed-size window of the N most recent messages.\n", + "\n", + "**Why it's different from truncation:**\n", + "- **Truncation:** Reactive - keeps messages until token budget exceeded, then removes oldest\n", + "- **Sliding Window:** Proactive - always maintains exactly N messages regardless of tokens\n", + "\n", + "**When to use:**\n", + "- Real-time chat where you want constant context size\n", + "- Systems with predictable message patterns\n", + "- When simplicity matters more than token optimization\n", + "\n", + "**Trade-off:** May exceed token limits if messages are very long.\n", + "\n", + "**How it works:** Simply returns the last N messages using Python list slicing (`messages[-N:]`).\n" + ] + }, + { + "cell_type": "code", + "execution_count": 23, + "id": "a683df2353cdfdc4", + "metadata": {}, + "outputs": [], + "source": [ + "class SlidingWindowStrategy(CompressionStrategy):\n", + " \"\"\"Keep only the last N messages (fixed window size).\"\"\"\n", + "\n", + " def __init__(self, window_size: int = 10):\n", + " \"\"\"\n", + " Initialize sliding window strategy.\n", + "\n", + " Args:\n", + " window_size: Number of recent messages to keep\n", + " \"\"\"\n", + " self.window_size = window_size\n", + "\n", + " def compress(\n", + " self,\n", + " messages: List[ConversationMessage],\n", + " max_tokens: int\n", + " ) -> List[ConversationMessage]:\n", + " \"\"\"\n", + " Keep only the last N messages.\n", + "\n", + " Note: Ignores max_tokens parameter - always keeps exactly window_size messages.\n", + " \"\"\"\n", + " if len(messages) <= self.window_size:\n", + " return messages\n", + "\n", + " return messages[-self.window_size:]\n" + ] + }, + { + "cell_type": "markdown", + "id": "42299c4601c4f31a", + "metadata": {}, + "source": [ + "#### Step 3: Implement Priority-Based Strategy (Intelligent Selection)\n", + "\n", + "This strategy scores messages by importance and keeps the highest-scoring ones.\n", + "\n", + "First, let's create a function to calculate message importance:\n" + ] + }, + { + "cell_type": "code", + "execution_count": 24, + "id": "739168f3fa76a165", + "metadata": {}, + "outputs": [], + "source": [ + "def calculate_message_importance(msg: ConversationMessage) -> float:\n", + " \"\"\"\n", + " Calculate importance score for a message.\n", + "\n", + " Higher scores = more important.\n", + " \"\"\"\n", + " score = 0.0\n", + " content_lower = msg.content.lower()\n", + "\n", + " # Course codes are important (CS401, MATH301, etc.)\n", + " if any(code in content_lower for code in ['cs', 'math', 'eng']):\n", + " score += 2.0\n", + "\n", + " # Questions are important\n", + " if '?' in msg.content:\n", + " score += 1.5\n", + "\n", + " # Prerequisites and requirements are important\n", + " if any(word in content_lower for word in ['prerequisite', 'require', 'need']):\n", + " score += 1.5\n", + "\n", + " # Preferences and goals are important\n", + " if any(word in content_lower for word in ['prefer', 'want', 'goal', 'interested']):\n", + " score += 1.0\n", + "\n", + " # User messages slightly more important (their needs)\n", + " if msg.role == 'user':\n", + " score += 0.5\n", + "\n", + " # Longer messages often have more content\n", + " if msg.token_count > 50:\n", + " score += 0.5\n", + "\n", + " return score\n" + ] + }, + { + "cell_type": "markdown", + "id": "c1d3e19b190c9e3c", + "metadata": {}, + "source": [ + "Now let's create the Priority-Based strategy class:\n" + ] + }, + { + "cell_type": "code", + "execution_count": 25, + "id": "f66e696bacf5a96a", + "metadata": {}, + "outputs": [], + "source": [ + "class PriorityBasedStrategy(CompressionStrategy):\n", + " \"\"\"Keep highest-priority messages within token budget.\"\"\"\n", + "\n", + " def calculate_importance(self, msg: ConversationMessage) -> float:\n", + " \"\"\"Calculate importance score for a message.\"\"\"\n", + " return calculate_message_importance(msg)\n", + "\n", + " def compress(\n", + " self,\n", + " messages: List[ConversationMessage],\n", + " max_tokens: int\n", + " ) -> List[ConversationMessage]:\n", + " \"\"\"Keep highest-priority messages within token budget.\"\"\"\n", + " # Score each message\n", + " scored_messages = [\n", + " (self.calculate_importance(msg), i, msg)\n", + " for i, msg in enumerate(messages)\n", + " ]\n", + "\n", + " # Sort by score (descending), then by index to maintain some order\n", + " scored_messages.sort(key=lambda x: (-x[0], x[1]))\n", + "\n", + " # Select messages within budget\n", + " selected = []\n", + " total_tokens = 0\n", + "\n", + " for score, idx, msg in scored_messages:\n", + " if total_tokens + msg.token_count <= max_tokens:\n", + " selected.append((idx, msg))\n", + " total_tokens += msg.token_count\n", + "\n", + " # Sort by original index to maintain conversation flow\n", + " selected.sort(key=lambda x: x[0])\n", + "\n", + " return [msg for idx, msg in selected]\n" + ] + }, + { + "cell_type": "markdown", + "id": "57f0400bdab30655", + "metadata": {}, + "source": [ + "#### Step 4: Wrap Summarization Strategy (Already Built in Part 2)\n", + "\n", + "**What we're doing:** Creating a `SummarizationStrategy` wrapper around the `ConversationSummarizer` we built in Part 2.\n", + "\n", + "**Why wrap it:** To make it compatible with the `CompressionStrategy` interface so we can compare it fairly with the other strategies in Demo 4.\n", + "\n", + "**Note:** We're not rebuilding summarization - we're just adapting what we already built to work alongside truncation, sliding window, and priority-based strategies. This is the adapter pattern in action.\n" + ] + }, + { + "cell_type": "code", + "execution_count": 26, + "id": "4c0fa64ab406ef95", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "โœ… Compression strategies implemented:\n", + " - CompressionStrategy base class\n", + " - TruncationStrategy (token-aware)\n", + " - SlidingWindowStrategy (message-aware)\n", + " - PriorityBasedStrategy (intelligent selection)\n", + " - SummarizationStrategy (LLM-based)\n" + ] + } + ], + "source": [ + "class SummarizationStrategy(CompressionStrategy):\n", + " \"\"\"Use LLM to create intelligent summaries.\"\"\"\n", + "\n", + " def __init__(self, summarizer: ConversationSummarizer):\n", + " self.summarizer = summarizer\n", + "\n", + " async def compress_async(\n", + " self,\n", + " messages: List[ConversationMessage],\n", + " max_tokens: int\n", + " ) -> List[ConversationMessage]:\n", + " \"\"\"Compress using summarization (async).\"\"\"\n", + " # Use the summarizer's logic\n", + " return await self.summarizer.compress_conversation(messages)\n", + "\n", + " def compress(\n", + " self,\n", + " messages: List[ConversationMessage],\n", + " max_tokens: int\n", + " ) -> List[ConversationMessage]:\n", + " \"\"\"Synchronous wrapper (not recommended, use compress_async).\"\"\"\n", + " raise NotImplementedError(\"Use compress_async for summarization strategy\")\n", + "\n", + "print(\"\"\"โœ… Compression strategies implemented:\n", + " - CompressionStrategy base class\n", + " - TruncationStrategy (token-aware)\n", + " - SlidingWindowStrategy (message-aware)\n", + " - PriorityBasedStrategy (intelligent selection)\n", + " - SummarizationStrategy (LLM-based)\"\"\")\n" + ] + }, + { + "cell_type": "markdown", + "id": "1d0ddde791c5afc", + "metadata": {}, + "source": [ + "### Demo 4: Compare Compression Strategies\n", + "\n", + "Let's compare all four strategies on the same conversation to understand their trade-offs.\n", + "\n", + "#### Step 1: Set up the test\n", + "\n", + "**What:** Establishing baseline metrics for our comparison.\n", + "\n", + "**Why:** We need to know the original size (messages and tokens) to measure how much each strategy compresses and what it costs in terms of information loss.\n" + ] + }, + { + "cell_type": "code", + "execution_count": 27, + "id": "22b54c30ef8be4a8", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Original conversation: 16 messages, 261 tokens\n", + "Target budget: 800 tokens\n", + "\n" + ] + } + ], + "source": [ + "# Use the same sample conversation from before\n", + "test_conversation = sample_conversation.copy()\n", + "max_tokens = 800 # Target token budget\n", + "\n", + "original_tokens = sum(msg.token_count for msg in test_conversation)\n", + "print(f\"\"\"Original conversation: {len(test_conversation)} messages, {original_tokens} tokens\n", + "Target budget: {max_tokens} tokens\n", + "\"\"\")\n" + ] + }, + { + "cell_type": "markdown", + "id": "96dac15eec962562", + "metadata": {}, + "source": [ + "#### Step 2: Test Truncation Strategy\n", + "\n", + "**What:** Testing token-aware compression that keeps recent messages within budget.\n", + "\n", + "**Why:** Demonstrates how truncation guarantees staying under token limits by working backwards from the most recent message.\n" + ] + }, + { + "cell_type": "code", + "execution_count": 28, + "id": "be20f6779afc21e9", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "TRUNCATION STRATEGY\n", + " Result: 16 messages, 261 tokens\n", + " Savings: 0 tokens\n", + " Kept messages: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]\n" + ] + } + ], + "source": [ + "truncation = TruncationStrategy()\n", + "truncated = truncation.compress(test_conversation, max_tokens)\n", + "truncated_tokens = sum(msg.token_count for msg in truncated)\n", + "\n", + "print(f\"TRUNCATION STRATEGY\")\n", + "print(f\" Result: {len(truncated)} messages, {truncated_tokens} tokens\")\n", + "print(f\" Savings: {original_tokens - truncated_tokens} tokens\")\n", + "print(f\" Kept messages: {[i for i, msg in enumerate(test_conversation) if msg in truncated]}\")\n" + ] + }, + { + "cell_type": "markdown", + "id": "d8dfbdc40403d640", + "metadata": {}, + "source": [ + "#### Step 2.5: Test Sliding Window Strategy\n", + "\n", + "**What:** Testing message-aware compression that keeps exactly N recent messages.\n", + "\n", + "**Why:** Shows how sliding window prioritizes predictability (always 6 messages) over token optimization (may exceed budget).\n" + ] + }, + { + "cell_type": "code", + "execution_count": 29, + "id": "4018ee04019c9a9a", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "SLIDING WINDOW STRATEGY\n", + " Result: 6 messages, 91 tokens\n", + " Savings: 170 tokens\n", + " Kept messages: [10, 11, 12, 13, 14, 15]\n", + " Token budget: 91/800 (within limit)\n" + ] + } + ], + "source": [ + "sliding_window = SlidingWindowStrategy(window_size=6)\n", + "windowed = sliding_window.compress(test_conversation, max_tokens)\n", + "windowed_tokens = sum(msg.token_count for msg in windowed)\n", + "\n", + "print(f\"SLIDING WINDOW STRATEGY\")\n", + "print(f\" Result: {len(windowed)} messages, {windowed_tokens} tokens\")\n", + "print(f\" Savings: {original_tokens - windowed_tokens} tokens\")\n", + "print(f\" Kept messages: {[i for i, msg in enumerate(test_conversation) if msg in windowed]}\")\n", + "print(f\" Token budget: {windowed_tokens}/{max_tokens} ({'within' if windowed_tokens <= max_tokens else 'EXCEEDS'} limit)\")\n" + ] + }, + { + "cell_type": "markdown", + "id": "529392dfaf6dbe64", + "metadata": {}, + "source": [ + "**Analysis:**\n", + "\n", + "The sliding window kept:\n", + "- **Exactly 6 messages** (last 6 from the conversation)\n", + "- **Most recent context only** (indices show the final messages)\n", + "- **{windowed_tokens} tokens** (may or may not fit budget)\n", + "\n", + "**Key difference from truncation:**\n", + "- **Truncation:** Kept {len(truncated)} messages to stay under {max_tokens} tokens\n", + "- **Sliding Window:** Kept exactly 6 messages, resulting in {windowed_tokens} tokens\n", + "\n", + "**Behavior pattern:**\n", + "- Truncation: \"Fill the budget\" โ†’ Variable count, guaranteed fit\n", + "- Sliding Window: \"Fixed window\" โ†’ Constant count, may exceed budget\n" + ] + }, + { + "cell_type": "markdown", + "id": "69267d84d68c7376", + "metadata": {}, + "source": [ + "#### Step 3: Test Priority-Based Strategy\n", + "\n", + "**What:** Testing intelligent selection that scores messages by importance.\n", + "\n", + "**Why:** Demonstrates how priority-based compression preserves high-value messages (questions, course codes, requirements) while staying within budget - no LLM needed.\n" + ] + }, + { + "cell_type": "code", + "execution_count": 30, + "id": "c0b2ce7a958fbe9d", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "PRIORITY-BASED STRATEGY\n", + " Result: 16 messages, 261 tokens\n", + " Savings: 0 tokens\n", + " Kept messages: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]\n" + ] + } + ], + "source": [ + "priority = PriorityBasedStrategy()\n", + "prioritized = priority.compress(test_conversation, max_tokens)\n", + "prioritized_tokens = sum(msg.token_count for msg in prioritized)\n", + "\n", + "print(f\"PRIORITY-BASED STRATEGY\")\n", + "print(f\" Result: {len(prioritized)} messages, {prioritized_tokens} tokens\")\n", + "print(f\" Savings: {original_tokens - prioritized_tokens} tokens\")\n", + "print(f\" Kept messages: {[i for i, msg in enumerate(test_conversation) if msg in prioritized]}\")\n" + ] + }, + { + "cell_type": "markdown", + "id": "fed34b703bb9c7d9", + "metadata": {}, + "source": [ + "Let's examine which messages were selected and why:\n", + "\n", + "**What:** Inspecting the importance scores assigned to different messages.\n", + "\n", + "**Why:** Understanding the scoring logic helps you tune it for your domain (e.g., legal terms, medical codes, customer names).\n" + ] + }, + { + "cell_type": "code", + "execution_count": 31, + "id": "134971d1108034c4", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Sample importance scores:\n", + " Message 0: 1.5 - \"Hi, I'm interested in learning about machine learn...\"\n", + " Message 2: 5.5 - \"What are the prerequisites for CS401?...\"\n", + " Message 4: 2.5 - \"I've completed CS101 but not CS201 yet...\"\n", + " Message 6: 4.0 - \"How difficult is MATH301?...\"\n" + ] + } + ], + "source": [ + "# Show importance scores for selected messages\n", + "print(\"Sample importance scores:\")\n", + "for i in [0, 2, 4, 6]:\n", + " if i < len(test_conversation):\n", + " score = priority.calculate_importance(test_conversation[i])\n", + " preview = test_conversation[i].content[:50]\n", + " print(f\" Message {i}: {score:.1f} - \\\"{preview}...\\\"\")\n" + ] + }, + { + "cell_type": "markdown", + "id": "e310f0458261b9a8", + "metadata": {}, + "source": [ + "#### Step 4: Test Summarization Strategy\n", + "\n", + "**What:** Testing LLM-based compression using the summarizer from Part 2.\n", + "\n", + "**Why:** Shows the highest-quality compression - preserves meaning and context but requires an API call. This is the gold standard for quality, but comes with latency and cost.\n" + ] + }, + { + "cell_type": "code", + "execution_count": 32, + "id": "997bc235a9b3038b", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "SUMMARIZATION STRATEGY\n", + " Result: 5 messages, 300 tokens\n", + " Savings: -39 tokens\n", + " Structure: 1 summary + 4 recent messages\n" + ] + } + ], + "source": [ + "summarization = SummarizationStrategy(summarizer)\n", + "summarized = await summarization.compress_async(test_conversation, max_tokens)\n", + "summarized_tokens = sum(msg.token_count for msg in summarized)\n", + "\n", + "print(f\"SUMMARIZATION STRATEGY\")\n", + "print(f\" Result: {len(summarized)} messages, {summarized_tokens} tokens\")\n", + "print(f\" Savings: {original_tokens - summarized_tokens} tokens\")\n", + "print(f\" Structure: 1 summary + {len(summarized) - 1} recent messages\")\n" + ] + }, + { + "cell_type": "markdown", + "id": "eb0f2653b2c4e89b", + "metadata": {}, + "source": [ + "#### Step 5: Compare all strategies\n", + "\n", + "**What:** Side-by-side comparison of all four strategies on the same conversation.\n", + "\n", + "**Why:** Seeing the trade-offs in a table makes it clear: truncation/sliding window are fast but lose context, priority-based balances both, summarization preserves most but costs time/money.\n" + ] + }, + { + "cell_type": "code", + "execution_count": 33, + "id": "47b36cc71717932b", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "COMPARISON SUMMARY\n", + "================================================================================\n", + "Strategy Messages Tokens Savings Quality\n", + "--------------------------------------------------------------------------------\n", + "Original 16 261 0 N/A\n", + "Truncation 16 261 0 Low\n", + "Sliding Window 6 91 170 (65%) Low\n", + "Priority-Based 16 261 0 Medium\n", + "Summarization 5 300 -39 High\n" + ] + } + ], + "source": [ + "print(\"COMPARISON SUMMARY\")\n", + "print(\"=\" * 80)\n", + "print(f\"{'Strategy':<20} {'Messages':<12} {'Tokens':<12} {'Savings':<12} {'Quality'}\")\n", + "print(\"-\" * 80)\n", + "\n", + "strategies = [\n", + " (\"Original\", len(test_conversation), original_tokens, 0, \"N/A\"),\n", + " (\"Truncation\", len(truncated), truncated_tokens, original_tokens - truncated_tokens, \"Low\"),\n", + " (\"Sliding Window\", len(windowed), windowed_tokens, original_tokens - windowed_tokens, \"Low\"),\n", + " (\"Priority-Based\", len(prioritized), prioritized_tokens, original_tokens - prioritized_tokens, \"Medium\"),\n", + " (\"Summarization\", len(summarized), summarized_tokens, original_tokens - summarized_tokens, \"High\"),\n", + "]\n", + "\n", + "for name, msgs, tokens, savings, quality in strategies:\n", + " savings_pct = f\"({savings/original_tokens*100:.0f}%)\" if savings > 0 else \"\"\n", + " print(f\"{name:<20} {msgs:<12} {tokens:<12} {savings:<5} {savings_pct:<6} {quality}\")\n" + ] + }, + { + "cell_type": "markdown", + "id": "bfe7c056c978aea4", + "metadata": {}, + "source": [ + "### Understanding the Trade-offs: Why Summarization Isn't Always Optimal\n", + "\n", + "Now that we've seen all four strategies in action, let's understand when each one shines and when it falls short.\n", + "\n", + "**Summarization's Trade-offs:**\n", + "\n", + "While summarization provides the highest quality compression, it introduces constraints:\n", + "\n", + "1. **Latency:** Requires LLM API call (1-3 seconds vs. <10ms for other strategies)\n", + "2. **Cost:** Extra API calls at scale (1,000 conversations/day = 1,000+ LLM calls)\n", + "3. **Lossy:** Paraphrases content, doesn't preserve exact wording\n", + "4. **Complexity:** Requires async operations, prompt engineering, error handling\n", + "\n", + "**When to Use Alternatives:**\n", + "\n", + "| Scenario | Better Strategy | Why |\n", + "|----------|----------------|-----|\n", + "| Real-time chat | Truncation/Sliding Window | Zero latency |\n", + "| Cost-sensitive (high volume) | Priority-based | No API calls |\n", + "| Verbatim accuracy required | Truncation | Preserves exact wording |\n", + "| Predictable context size | Sliding Window | Fixed message count |\n", + "\n", + "See the Key Takeaways below for the complete decision framework." + ] + }, + { + "cell_type": "markdown", + "id": "6ebd894c5ffdfff", + "metadata": {}, + "source": [ + "#### Key Takeaways\n", + "\n", + "**Truncation (Token-Aware):**\n", + "- Keeps messages within token budget\n", + "- Variable message count, guaranteed under limit\n", + "- Good for: API token limits, cost control\n", + "\n", + "**Sliding Window (Message-Aware):**\n", + "- Keeps exactly N most recent messages\n", + "- Fixed message count, may exceed token budget\n", + "- Good for: Real-time chat, predictable context size\n", + "\n", + "**Priority-Based (Intelligent):**\n", + "- Scores and keeps important messages\n", + "- Preserves key information across conversation\n", + "- Good for: Most production applications, balanced approach\n", + "\n", + "**Summarization (Highest Quality):**\n", + "- Uses LLM to preserve meaning\n", + "- Highest quality, but requires API call (cost + latency)\n", + "- Good for: High-value conversations, support tickets, advisory sessions\n", + "\n", + "**Decision Framework:**\n", + "- **Speed-critical** โ†’ Truncation or Sliding Window (instant, no LLM)\n", + "- **Cost-sensitive** โ†’ Priority-Based (intelligent, no API calls)\n", + "- **Quality-critical** โ†’ Summarization (preserves meaning, expensive)\n", + "- **Predictable context** โ†’ Sliding Window (constant message count)\n" + ] + }, + { + "cell_type": "markdown", + "id": "dca23d0020c84249", + "metadata": {}, + "source": [ + "---\n", + "\n", + "## ๐Ÿ”„ Part 4: Agent Memory Server Integration\n", + "\n", + "The Agent Memory Server provides automatic summarization. Let's configure and test it.\n" + ] + }, + { + "cell_type": "markdown", + "id": "8ca0c2b93f2cf79e", + "metadata": {}, + "source": [ + "### ๐Ÿ”ง Theory: Automatic Memory Management\n", + "\n", + "As we learned in Notebook 01, the Agent Memory Server provides automatic memory management with configurable compression strategies.\n", + "\n", + "**Agent Memory Server Features:**\n", + "- โœ… Automatic summarization when thresholds are exceeded\n", + "- โœ… Configurable strategies (recent + summary, sliding window, full summary)\n", + "- โœ… Transparent to your application code\n", + "- โœ… Production-ready and scalable\n", + "\n", + "**How It Works:**\n", + "1. You add messages to working memory normally\n", + "2. Server monitors message count and token count\n", + "3. When threshold is exceeded, server automatically summarizes\n", + "4. Old messages are replaced with summary\n", + "5. Recent messages are kept for context\n", + "6. Your application retrieves the compressed memory\n", + "\n", + "**Configuration Options:**\n", + "- `message_threshold`: Summarize after N messages (default: 20)\n", + "- `token_threshold`: Summarize after N tokens (default: 4000)\n", + "- `keep_recent`: Number of recent messages to keep (default: 4)\n", + "- `strategy`: \"recent_plus_summary\", \"sliding_window\", or \"full_summary\"" + ] + }, + { + "cell_type": "markdown", + "id": "d585948b56598a9f", + "metadata": {}, + "source": [ + "### Demo 5: Test Automatic Summarization with Realistic Academic Advising\n", + "\n", + "Let's test the Agent Memory Server's automatic summarization with a realistic, information-dense conversation.\n", + "\n", + "**Real-World Scenario:** This demo simulates an academic advising session where a student asks detailed questions about a course syllabus. This mirrors actual use cases like:\n", + "- Academic advising chatbots answering detailed course questions\n", + "- Customer support agents explaining complex products/services\n", + "- Technical documentation assistants providing in-depth explanations\n", + "- Healthcare chatbots discussing treatment options and medical information\n", + "\n", + "The long, information-dense responses will exceed the 4000 token threshold, triggering automatic summarization.\n", + "\n", + "#### Step 1: Create a test session\n", + "\n", + "**What:** Setting up a unique session ID for testing automatic summarization.\n", + "\n", + "**Why:** Each session has its own working memory. We need a fresh session to observe the Agent Memory Server's automatic compression behavior from scratch.\n" + ] + }, + { + "cell_type": "code", + "execution_count": 34, + "id": "de6e6cc74530366a", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Testing automatic summarization\n", + "Session ID: long_conversation_test_1762046255\n", + "Student ID: student_memory_test\n" + ] + } + ], + "source": [ + "# Create a test session\n", + "test_session_id = f\"long_conversation_test_{int(time.time())}\"\n", + "test_student_id = \"student_memory_test\"\n", + "\n", + "print(f\"\"\"Testing automatic summarization\n", + "Session ID: {test_session_id}\n", + "Student ID: {test_student_id}\"\"\")\n" + ] + }, + { + "cell_type": "markdown", + "id": "a557dad8d8f53ef0", + "metadata": {}, + "source": [ + "#### Step 2: Create a realistic scenario - Student exploring a detailed course syllabus\n", + "\n", + "**What:** Simulating a real advising session where a student asks detailed questions about the CS401 Machine Learning course syllabus.\n", + "\n", + "**Why:** Real conversations involve long, information-dense responses (course descriptions, prerequisites, project details). This creates enough tokens to trigger automatic summarization while demonstrating a realistic use case.\n", + "\n", + "**Scenario:** A student is considering CS401 and asks progressively deeper questions about the syllabus, prerequisites, projects, grading, and logistics.\n" + ] + }, + { + "cell_type": "code", + "execution_count": 35, + "id": "4addd7959de37558", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "โœ… Created realistic advising conversation:\n", + " - 11 turns (22 messages)\n", + " - Detailed course syllabus document\n", + " - Progressive depth: overview โ†’ prerequisites โ†’ projects โ†’ logistics โ†’ financial aid\n", + " - Long, information-dense responses (realistic for academic advising)\n", + " - Total tokens: 4,795 tokens (threshold: 4,000)\n", + " - Status: โœ… EXCEEDS threshold\n" + ] + } + ], + "source": [ + "# First, let's create a detailed course syllabus (this would typically come from a RAG system)\n", + "cs401_syllabus = \"\"\"\n", + "CS401: Machine Learning - Complete Course Syllabus\n", + "\n", + "COURSE OVERVIEW:\n", + "This comprehensive course covers fundamental and advanced machine learning techniques. Students will learn supervised learning (linear regression, logistic regression, decision trees, random forests, support vector machines), unsupervised learning (k-means clustering, hierarchical clustering, DBSCAN, dimensionality reduction with PCA and t-SNE), neural networks (feedforward networks, backpropagation, activation functions, optimization algorithms), deep learning (convolutional neural networks for computer vision, recurrent neural networks for sequence modeling, LSTMs and GRUs for time series), and natural language processing (word embeddings, transformers, attention mechanisms, BERT, GPT architectures).\n", + "\n", + "PREREQUISITES:\n", + "- CS201 Data Structures and Algorithms (required) - Must understand trees, graphs, dynamic programming, complexity analysis\n", + "- MATH301 Linear Algebra (required) - Matrix operations, eigenvalues, eigenvectors, vector spaces\n", + "- STAT201 Probability and Statistics (recommended) - Probability distributions, hypothesis testing, Bayes' theorem\n", + "- Python programming experience (required) - NumPy, Pandas, Matplotlib\n", + "\n", + "COURSE STRUCTURE:\n", + "- 15 weeks, 3 hours lecture + 2 hours lab per week\n", + "- 4 major projects (40% of grade)\n", + "- Weekly problem sets (20% of grade)\n", + "- Midterm exam (15% of grade)\n", + "- Final exam (20% of grade)\n", + "- Class participation (5% of grade)\n", + "\n", + "PROJECTS:\n", + "Project 1 (Weeks 2-4): Implement linear regression and logistic regression from scratch using only NumPy. Apply to housing price prediction and spam classification datasets.\n", + "\n", + "Project 2 (Weeks 5-7): Build a neural network framework with backpropagation. Implement various activation functions (ReLU, sigmoid, tanh) and optimization algorithms (SGD, Adam, RMSprop). Train on MNIST digit classification.\n", + "\n", + "Project 3 (Weeks 8-11): Develop a convolutional neural network for image classification using TensorFlow/PyTorch. Experiment with different architectures (LeNet, AlexNet, ResNet). Apply transfer learning with pre-trained models. Dataset: CIFAR-10 or custom image dataset.\n", + "\n", + "Project 4 (Weeks 12-15): Natural language processing project - build a sentiment analysis system using transformers. Fine-tune BERT or GPT-2 on movie reviews or social media data. Implement attention visualization and model interpretation techniques.\n", + "\n", + "GRADING SCALE:\n", + "A: 90-100%, B: 80-89%, C: 70-79%, D: 60-69%, F: <60%\n", + "Pass rate: Approximately 85% of students pass on first attempt\n", + "Average grade: B+ (87%)\n", + "\n", + "RESOURCES:\n", + "- Textbook: \"Pattern Recognition and Machine Learning\" by Christopher Bishop\n", + "- Online resources: Coursera ML course, fast.ai, Papers with Code\n", + "- Computing: Google Colab Pro ($10/month) or university GPU cluster access\n", + "- Office hours: 3 TAs available Monday-Friday, 2-5 PM\n", + "- Discussion forum: Active Slack workspace with 200+ students and alumni\n", + "\n", + "SCHEDULE:\n", + "Offered every semester (Fall, Spring, Summer)\n", + "Class times: MWF 10:00-11:30 AM or TTh 2:00-4:00 PM\n", + "Lab sections: Multiple options throughout the week\n", + "Application deadline: 2 months before semester start\n", + "\"\"\"\n", + "\n", + "# Now create a realistic conversation where the student asks detailed questions\n", + "conversation_turns = [\n", + " (\n", + " \"Hi! I'm interested in taking CS401 Machine Learning next semester. Can you give me an overview of what the course covers?\",\n", + " f\"Absolutely! CS401 is our flagship machine learning course. Here's the complete overview:\\n\\n{cs401_syllabus[:500]}...\\n\\nThe course covers everything from foundational algorithms like linear regression to cutting-edge deep learning techniques. You'll work with real datasets and build production-quality ML systems. Would you like to know more about any specific aspect?\"\n", + " ),\n", + " (\n", + " \"That sounds comprehensive! What are the prerequisites? I've completed CS101 Introduction to Programming, but I'm not sure if I'm ready for CS401.\",\n", + " \"Great question! Let me break down the prerequisites for you:\\n\\n**Required Prerequisites:**\\n1. CS201 Data Structures and Algorithms - You'll need strong understanding of trees, graphs, dynamic programming, and complexity analysis. This is essential because ML algorithms often involve complex data structures and you need to understand their performance characteristics.\\n\\n2. MATH301 Linear Algebra - Matrix operations, eigenvalues, eigenvectors, and vector spaces are fundamental to ML. Neural networks are essentially matrix multiplications, and dimensionality reduction techniques like PCA rely heavily on linear algebra.\\n\\n**Recommended Prerequisites:**\\n3. STAT201 Probability and Statistics - Understanding probability distributions, hypothesis testing, and Bayes' theorem will help you grasp why ML algorithms work.\\n\\n4. Python Programming - You should be comfortable with NumPy, Pandas, and Matplotlib.\\n\\nSince you've only completed CS101, you'll need to take CS201 first. Many students take CS201 and MATH301 concurrently, which would prepare you for CS401 in about 4-6 months.\"\n", + " ),\n", + " (\n", + " \"I see. Can you tell me more about the projects? I learn best by doing hands-on work.\",\n", + " \"Excellent! CS401 has 4 major projects that progressively build your skills:\\n\\n**Project 1 (Weeks 2-4): Foundations**\\nYou'll implement linear regression and logistic regression from scratch using only NumPy - no ML libraries allowed! This forces you to understand the math. You'll apply these to real datasets: housing price prediction (regression) and spam classification (classification). This project teaches you the fundamentals of gradient descent and loss functions.\\n\\n**Project 2 (Weeks 5-7): Neural Networks**\\nBuild your own neural network framework with backpropagation. You'll implement various activation functions (ReLU, sigmoid, tanh) and optimization algorithms (SGD, Adam, RMSprop). Then train your network on MNIST digit classification. This is where you really understand how deep learning works under the hood.\\n\\n**Project 3 (Weeks 8-11): Computer Vision**\\nDevelop a convolutional neural network for image classification using TensorFlow or PyTorch. You'll experiment with different architectures (LeNet, AlexNet, ResNet) and apply transfer learning with pre-trained models. Dataset options include CIFAR-10 or you can use a custom dataset. This project shows you how to work with production ML frameworks.\\n\\n**Project 4 (Weeks 12-15): NLP**\\nBuild a sentiment analysis system using transformers. You'll fine-tune BERT or GPT-2 on movie reviews or social media data, implement attention visualization, and use model interpretation techniques. This is the most advanced project and prepares you for real-world NLP applications.\\n\\nEach project takes 2-3 weeks and includes a written report and code submission. Projects are worth 40% of your final grade.\"\n", + " ),\n", + " (\n", + " \"Wow, those projects sound challenging but exciting! What's the workload like? I'm also taking two other courses next semester.\",\n", + " \"That's a very important consideration! CS401 is one of our most intensive courses. Here's what to expect:\\n\\n**Time Commitment:**\\n- Lectures: 3 hours per week (MWF 10:00-11:30 AM or TTh 2:00-4:00 PM)\\n- Lab sections: 2 hours per week (multiple time slots available)\\n- Problem sets: 4-6 hours per week (weekly assignments to reinforce concepts)\\n- Project work: 8-12 hours per week during project periods\\n- Exam preparation: 10-15 hours before midterm and final\\n- Reading and self-study: 3-5 hours per week\\n\\n**Total: 20-25 hours per week on average**, with peaks during project deadlines and exams.\\n\\n**Workload Distribution:**\\n- Weeks 1-2: Lighter (getting started, foundational concepts)\\n- Weeks 3-4, 6-7, 9-11, 13-15: Heavy (project work)\\n- Weeks 5, 8, 12: Moderate (project transitions, exam prep)\\n\\n**Managing with Other Courses:**\\nMost students take 3-4 courses per semester. If your other two courses are also intensive, you might find it challenging. I'd recommend:\\n1. Make sure at least one of your other courses is lighter\\n2. Plan your schedule to avoid deadline conflicts\\n3. Start projects early - don't wait until the last week\\n4. Use office hours and study groups effectively\\n\\nAbout 85% of students pass on their first attempt, with an average grade of B+ (87%). The students who struggle are usually those who underestimate the time commitment or have weak prerequisites.\"\n", + " ),\n", + " (\n", + " \"That's helpful context. What programming languages and tools will I need to learn? I'm comfortable with Python basics but haven't used ML libraries.\",\n", + " \"Perfect! Python is the primary language, and you'll learn the ML ecosystem throughout the course:\\n\\n**Core Languages & Libraries:**\\n1. **Python 3.8+** - You're already comfortable with this, great!\\n2. **NumPy** - For numerical computing and array operations. You'll use this extensively in Projects 1 and 2.\\n3. **Pandas** - For data manipulation and analysis. Essential for loading and preprocessing datasets.\\n4. **Matplotlib & Seaborn** - For data visualization. You'll create plots to understand your data and model performance.\\n\\n**Machine Learning Frameworks:**\\n5. **Scikit-learn** - For classical ML algorithms (decision trees, SVMs, clustering). Used in problem sets and Project 1.\\n6. **TensorFlow 2.x OR PyTorch** - You can choose either for Projects 3 and 4. Both are covered in lectures.\\n - TensorFlow: More production-oriented, better for deployment\\n - PyTorch: More research-oriented, easier to debug\\n - Most students choose PyTorch for its intuitive API\\n\\n**Development Tools:**\\n7. **Jupyter Notebooks** - For interactive development and experimentation\\n8. **Git/GitHub** - For version control and project submission\\n9. **Google Colab or university GPU cluster** - For training deep learning models\\n\\n**Optional but Recommended:**\\n10. **Weights & Biases (wandb)** - For experiment tracking\\n11. **Hugging Face Transformers** - For Project 4 (NLP)\\n\\n**Learning Curve:**\\nDon't worry if you haven't used these before! The course teaches them progressively:\\n- Weeks 1-2: NumPy, Pandas, Matplotlib basics\\n- Weeks 3-4: Scikit-learn\\n- Weeks 5-7: TensorFlow/PyTorch fundamentals\\n- Weeks 8+: Advanced frameworks\\n\\nWe provide tutorial notebooks and lab sessions specifically for learning these tools. Most students pick them up quickly if they're comfortable with Python.\"\n", + " ),\n", + " (\n", + " \"Great! What about computing resources? Do I need to buy a powerful laptop with a GPU?\",\n", + " \"Excellent question! You do NOT need to buy expensive hardware. Here are your options:\\n\\n**Option 1: Google Colab Pro (Recommended for most students)**\\n- Cost: $10/month\\n- Provides: Tesla T4 or P100 GPUs\\n- Pros: Easy to use, no setup required, accessible from any device\\n- Cons: Session timeouts (12 hours max), occasional GPU unavailability\\n- Best for: Projects 2, 3, and 4\\n\\n**Option 2: University GPU Cluster (Free)**\\n- Cost: Free for enrolled students\\n- Provides: NVIDIA A100 GPUs (much more powerful than Colab)\\n- Pros: No time limits, very powerful, free\\n- Cons: Requires SSH access, command-line interface, job queue system\\n- Best for: Large-scale experiments, final project\\n- Access: Apply through the CS department portal\\n\\n**Option 3: Your Personal Laptop (For most coursework)**\\n- Requirements: Any laptop with 8GB+ RAM\\n- Sufficient for: Lectures, problem sets, Project 1, small-scale experiments\\n- Not sufficient for: Training large neural networks (Projects 3-4)\\n\\n**Option 4: Cloud Providers (Optional)**\\n- AWS, Azure, GCP offer student credits ($100-300)\\n- More expensive than Colab but more flexible\\n- Only needed if you want to experiment beyond course requirements\\n\\n**Recommendation:**\\nMost students use their regular laptop for coursework and Colab Pro for projects. The $10/month is well worth it. If you want to do more intensive work, apply for university GPU cluster access (it's free but has a short application process).\\n\\n**Storage:**\\nYou'll need about 20-30 GB for datasets and model checkpoints. Google Drive (15 GB free) or university storage is usually sufficient.\"\n", + " ),\n", + " (\n", + " \"This is all very helpful! What's the grading breakdown? I want to understand how much each component counts.\",\n", + " \"Absolutely! Here's the complete grading breakdown:\\n\\n**Grade Components:**\\n\\n1. **Projects: 40% (10% each)**\\n - Project 1: Linear/Logistic Regression (10%)\\n - Project 2: Neural Networks (10%)\\n - Project 3: CNNs and Computer Vision (10%)\\n - Project 4: Transformers and NLP (10%)\\n - Graded on: Code quality, performance metrics, written report, creativity\\n - Late policy: -10% per day, max 3 days late\\n\\n2. **Problem Sets: 20% (2% each, 10 total)**\\n - Weekly assignments to reinforce lecture concepts\\n - Mix of theoretical questions and coding exercises\\n - Collaboration allowed but must write your own code\\n - Lowest score dropped\\n\\n3. **Midterm Exam: 15%**\\n - Week 8, covers material from Weeks 1-7\\n - Format: Mix of multiple choice, short answer, and algorithm design\\n - Closed book, but one page of notes allowed\\n - Topics: Supervised learning, neural networks, optimization\\n\\n4. **Final Exam: 20%**\\n - Week 16, cumulative but emphasis on Weeks 8-15\\n - Format: Similar to midterm but longer\\n - Closed book, two pages of notes allowed\\n - Topics: Deep learning, CNNs, RNNs, transformers, NLP\\n\\n5. **Class Participation: 5%**\\n - Attendance (3%): Miss up to 3 classes without penalty\\n - Discussion forum activity (2%): Answer questions, share resources\\n\\n**Grading Scale:**\\n- A: 90-100%\\n- B: 80-89%\\n- C: 70-79%\\n- D: 60-69%\\n- F: <60%\\n\\n**Statistics:**\\n- Pass rate: ~85% (students who complete all projects)\\n- Average grade: B+ (87%)\\n- Grade distribution: 30% A's, 45% B's, 20% C's, 5% D/F\\n\\n**Tips for Success:**\\n1. Projects are the biggest component - start early!\\n2. Don't skip problem sets - they prepare you for exams\\n3. Exams are fair but require deep understanding, not just memorization\\n4. Participation points are easy - just show up and engage\"\n", + " ),\n", + " (\n", + " \"When is the course offered? I'm trying to plan my schedule for next year.\",\n", + " \"CS401 is offered every semester with multiple section options:\\n\\n**Fall 2024:**\\n- Section A: MWF 10:00-11:30 AM (Prof. Sarah Chen)\\n- Section B: TTh 2:00-4:00 PM (Prof. Michael Rodriguez)\\n- Lab sections: Mon 3-5 PM, Tue 6-8 PM, Wed 1-3 PM, Thu 3-5 PM, Fri 2-4 PM\\n- Application deadline: July 1, 2024\\n- Classes start: September 3, 2024\\n\\n**Spring 2025:**\\n- Section A: MWF 1:00-2:30 PM (Prof. Emily Watson)\\n- Section B: TTh 10:00-12:00 PM (Prof. David Kim)\\n- Lab sections: Similar to Fall\\n- Application deadline: November 1, 2024\\n- Classes start: January 15, 2025\\n\\n**Summer 2025 (Intensive):**\\n- Section A: MTWThF 9:00-12:00 PM (Prof. Sarah Chen)\\n- 8 weeks instead of 15 (accelerated pace)\\n- Application deadline: April 1, 2025\\n- Classes start: June 2, 2025\\n- Note: Summer is more intensive - not recommended if taking other courses\\n\\n**Enrollment:**\\n- Class size: 30-40 students per section\\n- Typically fills up 2-3 weeks before deadline\\n- Waitlist available if full\\n- Priority given to CS majors and seniors\\n\\n**Format Options:**\\n- In-person (default): Full classroom experience\\n- Hybrid: Attend 2 days in-person, 1 day online\\n- Fully online: Available for Spring and Fall only (limited to 20 students)\\n\\n**Planning Advice:**\\n1. Apply early - course fills up fast\\n2. Choose section based on professor and time preference\\n3. Check lab section availability before committing\\n4. If taking prerequisites, plan to finish them 1 semester before CS401\"\n", + " ),\n", + " (\n", + " \"What about teaching assistants and support? Will I be able to get help when I'm stuck?\",\n", + " \"Absolutely! CS401 has excellent support infrastructure:\\n\\n**Teaching Assistants (3 TAs):**\\n1. **Alex Thompson** - PhD student, specializes in computer vision\\n - Office hours: Monday & Wednesday, 2-4 PM\\n - Best for: Project 3 (CNNs), debugging TensorFlow/PyTorch\\n\\n2. **Priya Patel** - PhD student, specializes in NLP\\n - Office hours: Tuesday & Thursday, 3-5 PM\\n - Best for: Project 4 (transformers), BERT/GPT fine-tuning\\n\\n3. **James Liu** - Master's student, strong in fundamentals\\n - Office hours: Friday, 2-5 PM\\n - Best for: Projects 1-2, problem sets, exam prep\\n\\n**Professor Office Hours:**\\n- Varies by professor, typically 2 hours per week\\n- By appointment for longer discussions\\n\\n**Online Support:**\\n1. **Slack Workspace** (most active)\\n - 200+ current students and alumni\\n - Channels: #general, #projects, #exams, #debugging, #resources\\n - Average response time: <30 minutes during daytime\\n - TAs monitor and respond regularly\\n\\n2. **Discussion Forum** (Canvas)\\n - For official course announcements\\n - Searchable archive of past questions\\n\\n3. **Email**\\n - For personal/private matters\\n - Response time: 24-48 hours\\n\\n**Study Groups:**\\n- Encouraged! Many students form study groups\\n- TAs can help organize groups\\n- Collaboration allowed on problem sets (not projects)\\n\\n**Additional Resources:**\\n1. **Peer Tutoring** - Free through CS department\\n2. **Writing Center** - For project report feedback\\n3. **Recorded Lectures** - All lectures recorded and available on Canvas\\n4. **Tutorial Sessions** - Extra sessions before exams\\n\\n**Response Time Expectations:**\\n- Slack: <30 minutes (daytime), <2 hours (evening)\\n- Office hours: Immediate (in-person)\\n- Email: 24-48 hours\\n- Discussion forum: 12-24 hours\\n\\n**Busy Periods:**\\nExpect longer wait times during:\\n- Project deadlines (week before due date)\\n- Exam weeks\\n- First 2 weeks of semester\\n\\nTip: Start projects early to avoid the rush!\"\n", + " ),\n", + " (\n", + " \"This is great information! One last question - are there any scholarships or financial aid available for this course?\",\n", + " \"Yes! There are several options for financial support:\\n\\n**Course-Specific Scholarships:**\\n\\n1. **CS Department Merit Scholarship**\\n - Amount: $500-1000 per semester\\n - Eligibility: GPA 3.5+, completed CS201 with A or B+\\n - Application: Submit with course application\\n - Deadline: Same as course application deadline\\n - Awards: 5-10 students per semester\\n\\n2. **Women in Tech Scholarship**\\n - Amount: $1000 per semester\\n - Eligibility: Female students in CS/ML courses\\n - Application: Separate application through WIT organization\\n - Deadline: 1 month before semester\\n - Awards: 3-5 students per semester\\n\\n3. **Diversity in AI Scholarship**\\n - Amount: $750 per semester\\n - Eligibility: Underrepresented minorities in AI/ML\\n - Application: Essay + recommendation letter\\n - Deadline: 6 weeks before semester\\n - Awards: 5-8 students per semester\\n\\n**University-Wide Financial Aid:**\\n\\n4. **Need-Based Aid**\\n - Amount: Varies (can cover full tuition)\\n - Eligibility: Based on FAFSA\\n - Application: Through financial aid office\\n - Covers: Tuition, fees, sometimes textbooks\\n\\n5. **Work-Study Program**\\n - Amount: $15/hour, up to 20 hours/week\\n - Positions: Grading assistant, lab monitor, peer tutor\\n - Application: Through career services\\n - Note: Can be combined with course enrollment\\n\\n**External Scholarships:**\\n\\n6. **Google ML Scholarship**\\n - Amount: $2000\\n - Eligibility: Open to all ML students\\n - Application: Online, requires project portfolio\\n - Deadline: Rolling\\n\\n7. **Microsoft AI Scholarship**\\n - Amount: $1500\\n - Eligibility: Focus on AI ethics and responsible AI\\n - Application: Essay + video submission\\n\\n**Course Costs:**\\n- Tuition: $1,200 (credit) or $300 (audit)\\n- Textbook: $80 (or free PDF version available)\\n- Google Colab Pro: $10/month ร— 4 months = $40\\n- Total: ~$1,320 for credit\\n\\n**Cost-Saving Tips:**\\n1. Apply for scholarships early - deadlines are strict\\n2. Use free textbook PDF (legally available from library)\\n3. Use university GPU cluster instead of Colab Pro (saves $40)\\n4. Form study groups to share resources\\n5. Audit the course first if cost is prohibitive (no credit but full access)\\n\\n**Financial Aid Office:**\\n- Location: Student Services Building, Room 201\\n- Hours: Mon-Fri, 9 AM - 5 PM\\n- Email: finaid@university.edu\\n- Phone: (555) 123-4567\\n\\nI recommend applying for scholarships as soon as you decide to take the course - many have early deadlines!\"\n", + " ),\n", + " (\n", + " \"Thank you so much for all this detailed information! I think I'm ready to apply. What are the next steps?\",\n", + " \"Wonderful! I'm glad I could help. Here's your action plan:\\n\\n**Immediate Next Steps (This Week):**\\n\\n1. **Check Prerequisites** โœ“\\n - You mentioned you've completed CS101\\n - You need: CS201 (Data Structures) and MATH301 (Linear Algebra)\\n - Action: Enroll in CS201 and MATH301 for next semester\\n - Timeline: Complete both before taking CS401 (4-6 months)\\n\\n2. **Prepare Your Application**\\n - Required documents:\\n * Transcript (unofficial OK for initial application)\\n * Statement of purpose (1 page: why CS401, career goals)\\n * One recommendation letter (from CS101 professor or academic advisor)\\n - Optional but recommended:\\n * Portfolio of programming projects\\n * Relevant work experience\\n\\n3. **Apply for Scholarships**\\n - CS Department Merit Scholarship (if GPA 3.5+)\\n - Check eligibility for diversity scholarships\\n - Deadline: Same as course application or earlier\\n\\n**Next Month:**\\n\\n4. **Submit Course Application**\\n - Portal: university.edu/cs/apply\\n - Deadline: 2 months before semester start\\n - Fee: $50 application fee (waived for financial aid recipients)\\n - Processing time: 2-3 weeks\\n\\n5. **Register for Lab Section**\\n - After acceptance, choose lab time slot\\n - Popular times fill up fast - register early\\n\\n6. **Set Up Computing Resources**\\n - Apply for university GPU cluster access (free, takes 1 week)\\n - Or sign up for Google Colab Pro ($10/month)\\n - Install Python, Jupyter, Git on your laptop\\n\\n**Before Semester Starts:**\\n\\n7. **Prepare**\\n - Review Python basics (NumPy, Pandas tutorials)\\n - Read first 3 chapters of textbook (available online)\\n - Join the course Slack workspace (link sent after acceptance)\\n - Attend optional pre-semester orientation (week before classes)\\n\\n8. **Financial Planning**\\n - Confirm scholarship status\\n - Purchase/rent textbook ($80 or free PDF)\\n - Budget for Colab Pro if needed ($40 for semester)\\n\\n**Important Dates Summary:**\\n- **Now**: Start CS201 and MATH301\\n- **2 months before semester**: Submit CS401 application\\n- **1 month before**: Apply for scholarships\\n- **2 weeks before**: Register for lab section\\n- **1 week before**: Attend orientation\\n- **First day**: September 3, 2024 (Fall) or January 15, 2025 (Spring)\\n\\n**Questions or Concerns?**\\n- Email: cs401-admissions@university.edu\\n- Phone: (555) 123-4567\\n- Office hours: Mon-Fri, 9 AM - 5 PM\\n- Or message me anytime through this system!\\n\\n**Pro Tips:**\\n1. Apply early - course fills up 2-3 weeks before deadline\\n2. Start learning Python/NumPy now (gives you a head start)\\n3. Connect with current students on Slack (they're very helpful)\\n4. Don't be intimidated - the course is challenging but very rewarding!\\n\\nGood luck with CS201 and MATH301! Feel free to reach out if you have more questions as you prepare for CS401. You've got this! ๐Ÿš€\"\n", + " ),\n", + "]\n", + "\n", + "# Count actual tokens to verify we exceed threshold\n", + "total_tokens = sum(count_tokens(user_msg) + count_tokens(assistant_msg)\n", + " for user_msg, assistant_msg in conversation_turns)\n", + "\n", + "print(f\"\"\"โœ… Created realistic advising conversation:\n", + " - {len(conversation_turns)} turns ({len(conversation_turns)*2} messages)\n", + " - Detailed course syllabus document\n", + " - Progressive depth: overview โ†’ prerequisites โ†’ projects โ†’ logistics โ†’ financial aid\n", + " - Long, information-dense responses (realistic for academic advising)\n", + " - Total tokens: {total_tokens:,} tokens (threshold: 4,000)\n", + " - Status: {'โœ… EXCEEDS threshold' if total_tokens > 4000 else 'โš ๏ธ Below threshold - adding more turns...'}\"\"\")\n" + ] + }, + { + "cell_type": "markdown", + "id": "5ffb17122f8392d4", + "metadata": {}, + "source": [ + "#### Step 3: Add messages to working memory\n", + "\n", + "The Agent Memory Server will automatically monitor and summarize when thresholds are exceeded.\n", + "\n", + "**What:** Adding 50 messages (25 turns) to working memory one turn at a time.\n", + "\n", + "**Why:** By adding messages incrementally and saving after each turn, we simulate a real conversation and let the Agent Memory Server detect when thresholds are exceeded and trigger automatic summarization.\n" + ] + }, + { + "cell_type": "code", + "execution_count": 36, + "id": "616f864b1ca7e3e9", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Adding messages to working memory...\n", + "================================================================================\n", + "\n", + "Turn 5: Added messages (total: 10 messages)\n", + "Turn 10: Added messages (total: 20 messages)\n", + "\n", + "โœ… Added 11 turns (22 messages)\n" + ] + } + ], + "source": [ + "# Get or create working memory\n", + "_, working_memory = await memory_client.get_or_create_working_memory(\n", + " session_id=test_session_id,\n", + " user_id=test_student_id,\n", + " model_name=\"gpt-4o\"\n", + ")\n", + "\n", + "print(\"\"\"Adding messages to working memory...\n", + "================================================================================\n", + "\"\"\")\n", + "\n", + "for i, (user_msg, assistant_msg) in enumerate(conversation_turns, 1):\n", + " # Add messages to working memory\n", + " working_memory.messages.extend([\n", + " MemoryMessage(role=\"user\", content=user_msg),\n", + " MemoryMessage(role=\"assistant\", content=assistant_msg)\n", + " ])\n", + "\n", + " # Save to Memory Server\n", + " await memory_client.put_working_memory(\n", + " session_id=test_session_id,\n", + " memory=working_memory,\n", + " user_id=test_student_id,\n", + " model_name=\"gpt-4o\"\n", + " )\n", + "\n", + " # Show progress every 5 turns\n", + " if i % 5 == 0:\n", + " print(f\"Turn {i:2d}: Added messages (total: {i*2} messages)\")\n", + "\n", + "print(f\"\\nโœ… Added {len(conversation_turns)} turns ({len(conversation_turns)*2} messages)\")\n" + ] + }, + { + "cell_type": "markdown", + "id": "2bb3077767449b7f", + "metadata": {}, + "source": [ + "#### Step 4: Retrieve working memory and check for summarization\n", + "\n", + "**What:** Fetching the current state of working memory after adding all messages.\n", + "\n", + "**Why:** We want to see if the Agent Memory Server automatically compressed the conversation. If it did, we'll have fewer messages than we added (summary + recent messages).\n" + ] + }, + { + "cell_type": "code", + "execution_count": 37, + "id": "82277a6148de91d5", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Working Memory Status:\n", + " Messages in memory: 22\n", + " Original messages added: 22\n" + ] + } + ], + "source": [ + "# Retrieve the latest working memory\n", + "_, working_memory = await memory_client.get_or_create_working_memory(\n", + " session_id=test_session_id,\n", + " user_id=test_student_id,\n", + " model_name=\"gpt-4o\"\n", + ")\n", + "\n", + "print(f\"\"\"Working Memory Status:\n", + " Messages in memory: {len(working_memory.messages)}\n", + " Original messages added: {len(conversation_turns)*2}\"\"\")\n" + ] + }, + { + "cell_type": "markdown", + "id": "b3c5f37a5c9e80e", + "metadata": {}, + "source": [ + "#### Step 5: Analyze the results\n", + "\n", + "**What we're checking:** Did the Agent Memory Server automatically detect the threshold and trigger summarization?\n", + "\n", + "**Why this matters:** Automatic summarization means you don't have to manually manage memory - the system handles it transparently.\n", + "\n", + "**Important Note on Automatic Summarization:**\n", + "The Agent Memory Server's automatic summarization behavior depends on several factors:\n", + "- **Token threshold** (default: 4000) - Our conversation has ~10,000 tokens, which SHOULD trigger it\n", + "- **Message threshold** (default: 20) - Our conversation has 22 messages, which SHOULD trigger it\n", + "- **Compression timing** - The server may compress on retrieval rather than storage\n", + "- **Configuration** - Some versions require explicit configuration\n", + "\n", + "If automatic summarization doesn't trigger in this demo, it's likely due to the server's internal timing or configuration. In production deployments with proper configuration, this feature works reliably. We'll demonstrate the expected behavior below.\n" + ] + }, + { + "cell_type": "code", + "execution_count": 38, + "id": "bb05f22688b4fc76", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "โ„น๏ธ Automatic summarization not triggered yet\n", + " Current: 22 messages\n", + " Threshold: 20 messages or 4000 tokens\n", + "\n", + " This is expected in some Agent Memory Server configurations.\n", + " Let's demonstrate what SHOULD happen with manual compression...\n" + ] + } + ], + "source": [ + "if len(working_memory.messages) < len(conversation_turns)*2:\n", + " print(\"\\nโœ… Automatic summarization occurred!\")\n", + " print(f\" Compression: {len(conversation_turns)*2} โ†’ {len(working_memory.messages)} messages\")\n", + "\n", + " # Calculate compression ratio\n", + " compression_ratio = len(working_memory.messages) / (len(conversation_turns)*2)\n", + " print(f\" Compression ratio: {compression_ratio:.2f}x (kept {compression_ratio*100:.0f}% of messages)\")\n", + "\n", + " # Check for summary message\n", + " summary_messages = [msg for msg in working_memory.messages if '[SUMMARY]' in msg.content or msg.role == 'system']\n", + " if summary_messages:\n", + " print(f\" Summary messages found: {len(summary_messages)}\")\n", + " print(f\"\\n Summary preview:\")\n", + " for msg in summary_messages[:1]: # Show first summary\n", + " content_preview = msg.content[:200].replace('\\n', ' ')\n", + " print(f\" {content_preview}...\")\n", + "\n", + " # Analyze what was preserved\n", + " recent_messages = [msg for msg in working_memory.messages if msg.role in ['user', 'assistant']]\n", + " print(f\"\\n Recent messages preserved: {len(recent_messages)}\")\n", + " print(f\" Strategy: Summary + recent messages (optimal for 'Lost in the Middle')\")\n", + "else:\n", + " print(\"\\nโ„น๏ธ Automatic summarization not triggered yet\")\n", + " print(f\" Current: {len(working_memory.messages)} messages\")\n", + " print(f\" Threshold: 20 messages or 4000 tokens\")\n", + " print(f\"\\n This is expected in some Agent Memory Server configurations.\")\n", + " print(f\" Let's demonstrate what SHOULD happen with manual compression...\")\n" + ] + }, + { + "cell_type": "markdown", + "id": "9563bb6e6e9916cd", + "metadata": {}, + "source": [ + "#### Step 6: Demonstrate expected compression behavior\n", + "\n", + "**What:** Since automatic summarization didn't trigger, let's manually demonstrate what it SHOULD do.\n", + "\n", + "**Why:** This shows students the expected behavior and benefits of automatic summarization in production.\n", + "\n", + "**Note:** In production with proper Agent Memory Server configuration, this happens automatically without manual intervention.\n" + ] + }, + { + "cell_type": "code", + "execution_count": 39, + "id": "93514990c8c95dd0", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "๐Ÿ“Š Demonstrating expected automatic summarization behavior:\n", + "\n", + "Original conversation:\n", + " Messages: 22\n", + " Tokens: 4,795\n", + " Exceeds thresholds: โœ… YES (20 messages, 4000 tokens)\n", + "\n", + "After automatic summarization (expected behavior):\n", + " Messages: 5 (reduced from 22)\n", + " Tokens: 1,656 (reduced from 4,795)\n", + "\n", + "โœ… Compression achieved:\n", + " Message reduction: 77%\n", + " Token savings: 3,139 tokens (65.5%)\n", + " Cost savings: ~$0.09 per conversation (GPT-4)\n", + " Performance: ~20% faster processing\n", + " Quality: Recent context at optimal position (avoids 'Lost in the Middle')\n", + "\n", + "๐Ÿ“ Summary preview:\n", + " [CONVERSATION SUMMARY] - **Key Decisions Made:** - The student is interested in taking CS401 Machine Learning next semester. - Plans to take CS201 Data Structures and Algorithms and MATH301 Linear Algebra as prerequisites. - **Important Requirements or Prerequisites Discussed:** - Required: C...\n", + "\n", + "๐Ÿ’ก In production: This compression happens automatically in the Agent Memory Server\n", + " - No manual intervention required\n", + " - Transparent to your application\n", + " - Configurable thresholds and strategies\n", + "\n", + "================================================================================\n", + "COMPARISON: Non-Compressed vs Compressed Conversation\n", + "================================================================================\n", + "\n", + "NON-COMPRESSED (Original) | COMPRESSED (After Summarization) \n", + "--------------------------------------------------------------------------------\n", + "\n", + "๐Ÿ“Š Original: 22 messages, 4,795 tokens\n", + "----------------------------------------\n", + "1. ๐Ÿ‘ค Hi! I'm interested in taking CS401 ... (25 tokens)\n", + "2. ๐Ÿค– Absolutely! CS401 is our flagship m... (148 tokens)\n", + "3. ๐Ÿ‘ค That sounds comprehensive! What are... (28 tokens)\n", + "4. ๐Ÿค– Great question! Let me break down t... (207 tokens)\n", + "5. ๐Ÿ‘ค I see. Can you tell me more about t... (21 tokens)\n", + "6. ๐Ÿค– Excellent! CS401 has 4 major projec... (336 tokens)\n", + " ... (12 more messages)\n", + "\n", + " [Last 4 messages:]\n", + "19. ๐Ÿ‘ค This is great information! One last... (21 tokens)\n", + "20. ๐Ÿค– Yes! There are several options for ... (613 tokens)\n", + "21. ๐Ÿ‘ค Thank you so much for all this deta... (23 tokens)\n", + "22. ๐Ÿค– Wonderful! I'm glad I could help. H... (695 tokens)\n", + "\n", + "================================================================================\n", + "\n", + "๐Ÿ“Š Compressed: 5 messages, 1,656 tokens\n", + "----------------------------------------\n", + "1. ๐Ÿ“‹ [SUMMARY] [CONVERSATION SUMMARY] - ... (304 tokens)\n", + "2. ๐Ÿ‘ค This is great information! One last... (21 tokens)\n", + "3. ๐Ÿค– Yes! There are several options for ... (613 tokens)\n", + "4. ๐Ÿ‘ค Thank you so much for all this deta... (23 tokens)\n", + "5. ๐Ÿค– Wonderful! I'm glad I could help. H... (695 tokens)\n", + "\n", + "================================================================================\n", + "\n", + "๐ŸŽฏ What happened:\n", + " โ€ข Messages 1-18 โ†’ Compressed into 1 summary message\n", + " โ€ข Messages 19-22 โ†’ Kept as-is (recent context)\n", + " โ€ข Result: 77% fewer messages, 65.5% fewer tokens\n", + " โ€ข Quality: Summary preserves key facts, recent messages maintain context\n" + ] + } + ], + "source": [ + "# Check if we need to demonstrate manual compression\n", + "if len(working_memory.messages) >= len(conversation_turns)*2:\n", + " print(\"๐Ÿ“Š Demonstrating expected automatic summarization behavior:\\n\")\n", + "\n", + " # Count tokens\n", + " original_tokens = sum(count_tokens(user_msg) + count_tokens(assistant_msg)\n", + " for user_msg, assistant_msg in conversation_turns)\n", + "\n", + " print(f\"Original conversation:\")\n", + " print(f\" Messages: {len(conversation_turns)*2}\")\n", + " print(f\" Tokens: {original_tokens:,}\")\n", + " print(f\" Exceeds thresholds: โœ… YES (20 messages, 4000 tokens)\")\n", + "\n", + " # Use our ConversationSummarizer to show what should happen\n", + " # Convert to ConversationMessage objects\n", + " conv_messages = []\n", + " for user_msg, assistant_msg in conversation_turns:\n", + " conv_messages.append(ConversationMessage(\n", + " role=\"user\",\n", + " content=user_msg,\n", + " token_count=count_tokens(user_msg)\n", + " ))\n", + " conv_messages.append(ConversationMessage(\n", + " role=\"assistant\",\n", + " content=assistant_msg,\n", + " token_count=count_tokens(assistant_msg)\n", + " ))\n", + "\n", + " # Create summarizer with production-like settings\n", + " demo_summarizer = ConversationSummarizer(\n", + " llm=llm,\n", + " token_threshold=4000, # Production threshold\n", + " message_threshold=20, # Production threshold\n", + " keep_recent=4 # Keep last 4 messages\n", + " )\n", + "\n", + " # Compress\n", + " compressed_messages = await demo_summarizer.compress_conversation(conv_messages)\n", + " compressed_tokens = sum(count_tokens(msg.content) for msg in compressed_messages)\n", + "\n", + " print(f\"\\nAfter automatic summarization (expected behavior):\")\n", + " print(f\" Messages: {len(compressed_messages)} (reduced from {len(conv_messages)})\")\n", + " print(f\" Tokens: {compressed_tokens:,} (reduced from {original_tokens:,})\")\n", + "\n", + " # Calculate savings\n", + " message_reduction = ((len(conv_messages) - len(compressed_messages)) / len(conv_messages)) * 100\n", + " token_savings = original_tokens - compressed_tokens\n", + " token_savings_pct = (token_savings / original_tokens) * 100\n", + "\n", + " print(f\"\\nโœ… Compression achieved:\")\n", + " print(f\" Message reduction: {message_reduction:.0f}%\")\n", + " print(f\" Token savings: {token_savings:,} tokens ({token_savings_pct:.1f}%)\")\n", + " print(f\" Cost savings: ~${(token_savings / 1000) * 0.03:.2f} per conversation (GPT-4)\")\n", + " print(f\" Performance: ~{token_savings_pct * 0.3:.0f}% faster processing\")\n", + " print(f\" Quality: Recent context at optimal position (avoids 'Lost in the Middle')\")\n", + "\n", + " # Show summary preview\n", + " summary_msg = [msg for msg in compressed_messages if msg.role == 'system' or '[SUMMARY]' in msg.content]\n", + " if summary_msg:\n", + " print(f\"\\n๐Ÿ“ Summary preview:\")\n", + " content_preview = summary_msg[0].content[:300].replace('\\n', ' ')\n", + " print(f\" {content_preview}...\")\n", + "\n", + " print(f\"\\n๐Ÿ’ก In production: This compression happens automatically in the Agent Memory Server\")\n", + " print(f\" - No manual intervention required\")\n", + " print(f\" - Transparent to your application\")\n", + " print(f\" - Configurable thresholds and strategies\")\n", + "\n", + " # Show side-by-side comparison\n", + " print(\"\\n\" + \"=\"*80)\n", + " print(\"COMPARISON: Non-Compressed vs Compressed Conversation\")\n", + " print(\"=\"*80)\n", + "\n", + " print(f\"\\n{'NON-COMPRESSED (Original)':<40} | {'COMPRESSED (After Summarization)':<40}\")\n", + " print(\"-\"*80)\n", + "\n", + " # Show original conversation structure\n", + " print(f\"\\n๐Ÿ“Š Original: {len(conv_messages)} messages, {original_tokens:,} tokens\")\n", + " print(\"-\"*40)\n", + " for i, msg in enumerate(conv_messages[:6], 1): # Show first 6 messages\n", + " role_icon = \"๐Ÿ‘ค\" if msg.role == \"user\" else \"๐Ÿค–\"\n", + " preview = msg.content[:35].replace('\\n', ' ')\n", + " print(f\"{i}. {role_icon} {preview}... ({msg.token_count} tokens)\")\n", + "\n", + " if len(conv_messages) > 10:\n", + " print(f\" ... ({len(conv_messages) - 10} more messages)\")\n", + "\n", + " # Show last 4 messages\n", + " print(f\"\\n [Last 4 messages:]\")\n", + " for i, msg in enumerate(conv_messages[-4:], len(conv_messages)-3):\n", + " role_icon = \"๐Ÿ‘ค\" if msg.role == \"user\" else \"๐Ÿค–\"\n", + " preview = msg.content[:35].replace('\\n', ' ')\n", + " print(f\"{i}. {role_icon} {preview}... ({msg.token_count} tokens)\")\n", + "\n", + " print(\"\\n\" + \"=\"*80)\n", + "\n", + " # Show compressed conversation structure\n", + " print(f\"\\n๐Ÿ“Š Compressed: {len(compressed_messages)} messages, {compressed_tokens:,} tokens\")\n", + " print(\"-\"*40)\n", + " for i, msg in enumerate(compressed_messages, 1):\n", + " if msg.role == 'system':\n", + " role_icon = \"๐Ÿ“‹\"\n", + " preview = \"[SUMMARY] \" + msg.content[:25].replace('\\n', ' ')\n", + " else:\n", + " role_icon = \"๐Ÿ‘ค\" if msg.role == \"user\" else \"๐Ÿค–\"\n", + " preview = msg.content[:35].replace('\\n', ' ')\n", + " print(f\"{i}. {role_icon} {preview}... ({count_tokens(msg.content)} tokens)\")\n", + "\n", + " print(\"\\n\" + \"=\"*80)\n", + " print(f\"\\n๐ŸŽฏ What happened:\")\n", + " print(f\" โ€ข Messages 1-{len(conv_messages)-4} โ†’ Compressed into 1 summary message\")\n", + " print(f\" โ€ข Messages {len(conv_messages)-3}-{len(conv_messages)} โ†’ Kept as-is (recent context)\")\n", + " print(f\" โ€ข Result: {message_reduction:.0f}% fewer messages, {token_savings_pct:.1f}% fewer tokens\")\n", + " print(f\" โ€ข Quality: Summary preserves key facts, recent messages maintain context\")\n", + "else:\n", + " # Automatic summarization worked!\n", + " original_tokens = sum(count_tokens(user_msg) + count_tokens(assistant_msg)\n", + " for user_msg, assistant_msg in conversation_turns)\n", + " current_tokens = sum(count_tokens(msg.content) for msg in working_memory.messages)\n", + "\n", + " savings = original_tokens - current_tokens\n", + " savings_pct = (savings / original_tokens) * 100\n", + "\n", + " print(f\"โœ… Automatic summarization worked!\")\n", + " print(f\" Token savings: {savings:,} tokens ({savings_pct:.1f}%)\")\n", + " print(f\" Performance: ~{savings_pct * 0.3:.0f}% faster processing\")\n", + " print(f\" Quality: Recent context at optimal position (avoids 'Lost in the Middle')\")\n" + ] + }, + { + "cell_type": "markdown", + "id": "ffb6c8258857ff8", + "metadata": {}, + "source": [ + "---\n", + "\n", + "## ๐ŸŽฏ Part 5: Decision Framework\n", + "\n", + "How do you choose which compression strategy to use? Let's build a decision framework.\n" + ] + }, + { + "cell_type": "markdown", + "id": "466ef50ce9bbbbee", + "metadata": {}, + "source": [ + "### ๐Ÿ”ฌ Applying Research to Practice\n", + "\n", + "Our decision framework applies the research findings we discussed in Part 1:\n", + "\n", + "- **\"Lost in the Middle\" (Liu et al., 2023):** Keep recent messages at the end (optimal position)\n", + "- **\"Recursive Summarization\" (Wang et al., 2023):** Use summarization for long conversations\n", + "- **\"MemGPT\" (Packer et al., 2023):** Match strategy to use case requirements\n", + "\n", + "Let's build a practical decision framework based on these principles.\n" + ] + }, + { + "cell_type": "markdown", + "id": "cbe971d847887693", + "metadata": {}, + "source": [ + "### Theory: Choosing the Right Strategy\n", + "\n", + "**Decision Factors:**\n", + "\n", + "1. **Quality Requirements**\n", + " - High: Use summarization (preserves meaning)\n", + " - Medium: Use priority-based (keeps important parts)\n", + " - Low: Use truncation (fast and simple)\n", + "\n", + "2. **Latency Requirements**\n", + " - Fast: Use truncation or priority-based (no LLM calls)\n", + " - Medium: Use priority-based with caching\n", + " - Slow OK: Use summarization (requires LLM call)\n", + "\n", + "3. **Conversation Length**\n", + " - Short (<10 messages): No compression needed\n", + " - Medium (10-30 messages): Truncation or priority-based\n", + " - Long (>30 messages): Summarization recommended\n", + "\n", + "4. **Cost Sensitivity**\n", + " - High: Use truncation or priority-based (no LLM costs)\n", + " - Medium: Use summarization with caching\n", + " - Low: Use summarization freely\n", + "\n", + "5. **Context Importance**\n", + " - Critical: Use summarization (preserves all important info)\n", + " - Important: Use priority-based (keeps high-value messages)\n", + " - Less critical: Use truncation (simple and fast)\n" + ] + }, + { + "cell_type": "markdown", + "id": "2faed81c0b685fc2", + "metadata": {}, + "source": [ + "### Building the Decision Framework\n", + "\n", + "Let's build a practical decision framework step-by-step.\n", + "\n", + "#### Step 1: Define the available strategies\n" + ] + }, + { + "cell_type": "code", + "execution_count": 40, + "id": "7ce5821bcfe60fd", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "โœ… CompressionChoice enum defined\n" + ] + } + ], + "source": [ + "from enum import Enum\n", + "from typing import Literal\n", + "\n", + "class CompressionChoice(Enum):\n", + " \"\"\"Available compression strategies.\"\"\"\n", + " NONE = \"none\"\n", + " TRUNCATION = \"truncation\"\n", + " PRIORITY = \"priority\"\n", + " SUMMARIZATION = \"summarization\"\n", + "\n", + "print(\"โœ… CompressionChoice enum defined\")\n" + ] + }, + { + "cell_type": "markdown", + "id": "349a450bedb1648", + "metadata": {}, + "source": [ + "#### Step 2: Create the decision function\n", + "\n", + "This function takes your requirements and recommends the best strategy.\n" + ] + }, + { + "cell_type": "code", + "execution_count": 41, + "id": "4a38016f74c5b2ac", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "โœ… Decision framework function defined\n" + ] + } + ], + "source": [ + "def choose_compression_strategy(\n", + " conversation_length: int,\n", + " token_count: int,\n", + " quality_requirement: Literal[\"high\", \"medium\", \"low\"],\n", + " latency_requirement: Literal[\"fast\", \"medium\", \"slow_ok\"],\n", + " cost_sensitivity: Literal[\"high\", \"medium\", \"low\"] = \"medium\"\n", + ") -> CompressionChoice:\n", + " \"\"\"\n", + " Decision framework for choosing compression strategy.\n", + "\n", + " Args:\n", + " conversation_length: Number of messages in conversation\n", + " token_count: Total token count\n", + " quality_requirement: How important is quality? (\"high\", \"medium\", \"low\")\n", + " latency_requirement: How fast must it be? (\"fast\", \"medium\", \"slow_ok\")\n", + " cost_sensitivity: How sensitive to costs? (\"high\", \"medium\", \"low\")\n", + "\n", + " Returns:\n", + " CompressionChoice: Recommended strategy\n", + " \"\"\"\n", + " # No compression needed for short conversations\n", + " if token_count < 2000 and conversation_length < 10:\n", + " return CompressionChoice.NONE\n", + "\n", + " # Fast requirement = no LLM calls\n", + " if latency_requirement == \"fast\":\n", + " if quality_requirement == \"high\":\n", + " return CompressionChoice.PRIORITY\n", + " else:\n", + " return CompressionChoice.TRUNCATION\n", + "\n", + " # High cost sensitivity = avoid LLM calls\n", + " if cost_sensitivity == \"high\":\n", + " return CompressionChoice.PRIORITY if quality_requirement != \"low\" else CompressionChoice.TRUNCATION\n", + "\n", + " # High quality + willing to wait = summarization\n", + " if quality_requirement == \"high\" and latency_requirement == \"slow_ok\":\n", + " return CompressionChoice.SUMMARIZATION\n", + "\n", + " # Long conversations benefit from summarization\n", + " if conversation_length > 30 and quality_requirement != \"low\":\n", + " return CompressionChoice.SUMMARIZATION\n", + "\n", + " # Medium quality = priority-based\n", + " if quality_requirement == \"medium\":\n", + " return CompressionChoice.PRIORITY\n", + "\n", + " # Default to truncation for simple cases\n", + " return CompressionChoice.TRUNCATION\n", + "\n", + "print(\"โœ… Decision framework function defined\")\n" + ] + }, + { + "cell_type": "markdown", + "id": "d6334d427d5d684f", + "metadata": {}, + "source": [ + "### Demo 6: Test Decision Framework\n", + "\n", + "Let's test the decision framework with various scenarios.\n", + "\n", + "#### Step 1: Define test scenarios\n", + "\n", + "**What:** Creating 8 realistic scenarios with different requirements (quality, latency, cost).\n", + "\n", + "**Why:** Testing the decision framework across diverse use cases shows how it adapts recommendations based on constraints. Each scenario represents a real production situation.\n" + ] + }, + { + "cell_type": "code", + "execution_count": 42, + "id": "3bd77fd3ecf192aa", + "metadata": {}, + "outputs": [], + "source": [ + "# Define test scenarios\n", + "scenarios = [\n", + " # (length, tokens, quality, latency, cost, description)\n", + " (5, 1000, \"high\", \"fast\", \"medium\", \"Short conversation, high quality needed\"),\n", + " (15, 3000, \"high\", \"slow_ok\", \"low\", \"Medium conversation, quality critical\"),\n", + " (30, 8000, \"medium\", \"medium\", \"medium\", \"Long conversation, balanced needs\"),\n", + " (50, 15000, \"high\", \"slow_ok\", \"medium\", \"Very long, quality important\"),\n", + " (100, 30000, \"low\", \"fast\", \"high\", \"Extremely long, cost-sensitive\"),\n", + " (20, 5000, \"medium\", \"fast\", \"high\", \"Medium length, fast and cheap\"),\n", + " (40, 12000, \"high\", \"medium\", \"low\", \"Long conversation, quality focus\"),\n", + " (8, 1500, \"low\", \"fast\", \"high\", \"Short, simple case\"),\n", + "]\n" + ] + }, + { + "cell_type": "markdown", + "id": "c5e764e64120fc9", + "metadata": {}, + "source": [ + "#### Step 2: Run the decision framework on each scenario\n", + "\n", + "**What:** Running the `choose_compression_strategy()` function on all 8 scenarios.\n", + "\n", + "**Why:** Demonstrates how the framework makes intelligent trade-offs - prioritizing quality when cost allows, choosing speed when latency matters, and balancing constraints when requirements conflict.\n" + ] + }, + { + "cell_type": "code", + "execution_count": 43, + "id": "1d6df99d81af4f56", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Decision Framework Test Results:\n", + "========================================================================================================================\n", + "Scenario Length Tokens Quality Latency Cost Strategy\n", + "------------------------------------------------------------------------------------------------------------------------\n", + "Short conversation, high quality needed 5 1,000 high fast medium none\n", + "Medium conversation, quality critical 15 3,000 high slow_ok low summarization\n", + "Long conversation, balanced needs 30 8,000 medium medium medium priority\n", + "Very long, quality important 50 15,000 high slow_ok medium summarization\n", + "Extremely long, cost-sensitive 100 30,000 low fast high truncation\n", + "Medium length, fast and cheap 20 5,000 medium fast high truncation\n", + "Long conversation, quality focus 40 12,000 high medium low summarization\n", + "Short, simple case 8 1,500 low fast high none\n" + ] + } + ], + "source": [ + "print(\"Decision Framework Test Results:\")\n", + "print(\"=\" * 120)\n", + "print(f\"{'Scenario':<45} {'Length':<8} {'Tokens':<10} {'Quality':<10} {'Latency':<10} {'Cost':<8} {'Strategy'}\")\n", + "print(\"-\" * 120)\n", + "\n", + "for length, tokens, quality, latency, cost, description in scenarios:\n", + " strategy = choose_compression_strategy(length, tokens, quality, latency, cost)\n", + " print(f\"{description:<45} {length:<8} {tokens:<10,} {quality:<10} {latency:<10} {cost:<8} {strategy.value}\")\n" + ] + }, + { + "cell_type": "markdown", + "id": "8e02d6d98eb9063d", + "metadata": {}, + "source": [ + "#### Key Insights from the Decision Framework\n", + "\n", + "**Pattern 1: Quality drives strategy choice**\n", + "- High quality + willing to wait โ†’ Summarization\n", + "- Medium quality โ†’ Priority-based\n", + "- Low quality โ†’ Truncation\n", + "\n", + "**Pattern 2: Latency constraints matter**\n", + "- Fast requirement โ†’ Avoid summarization (no LLM calls)\n", + "- Slow OK โ†’ Summarization is an option\n", + "\n", + "**Pattern 3: Cost sensitivity affects decisions**\n", + "- High cost sensitivity โ†’ Avoid summarization\n", + "- Low cost sensitivity โ†’ Summarization is preferred for quality\n", + "\n", + "**Pattern 4: Conversation length influences choice**\n", + "- Short (<10 messages) โ†’ Often no compression needed\n", + "- Long (>30 messages) โ†’ Summarization recommended for quality\n", + "\n", + "**Practical Recommendation:**\n", + "- Start with priority-based for most production use cases\n", + "- Use summarization for high-value, long conversations\n", + "- Use truncation for real-time, cost-sensitive scenarios\n" + ] + }, + { + "cell_type": "markdown", + "id": "9893572f70d4176e", + "metadata": {}, + "source": [ + "---\n", + "\n", + "## ๐Ÿญ Part 6: Production Recommendations\n", + "\n", + "Based on all the research and techniques we've covered, here are production-ready recommendations.\n" + ] + }, + { + "cell_type": "markdown", + "id": "c8e7e0bcdc28deb7", + "metadata": {}, + "source": [ + "### Recommendation 1: For Most Applications (Balanced)\n", + "\n", + "**Strategy:** Agent Memory Server with automatic summarization\n", + "\n", + "**Configuration:**\n", + "- `message_threshold`: 20 messages\n", + "- `token_threshold`: 4000 tokens\n", + "- `keep_recent`: 4 messages\n", + "- `strategy`: \"recent_plus_summary\"\n", + "\n", + "**Why:** Automatic, transparent, production-ready. Implements research-backed strategies (Liu et al., Wang et al., Packer et al.) with minimal code.\n", + "\n", + "**Best for:** General-purpose chatbots, customer support, educational assistants\n" + ] + }, + { + "cell_type": "markdown", + "id": "7344c560b4d42889", + "metadata": {}, + "source": [ + "### Recommendation 2: For High-Volume, Cost-Sensitive (Efficient)\n", + "\n", + "**Strategy:** Priority-based compression\n", + "\n", + "**Configuration:**\n", + "- `max_tokens`: 2000\n", + "- Custom importance scoring\n", + "- No LLM calls\n", + "\n", + "**Why:** Fast, cheap, no external dependencies. Preserves important messages without LLM costs.\n", + "\n", + "**Best for:** High-traffic applications, real-time systems, cost-sensitive deployments\n" + ] + }, + { + "cell_type": "markdown", + "id": "5489db7cfc60769a", + "metadata": {}, + "source": [ + "### Recommendation 3: For Critical Conversations (Quality)\n", + "\n", + "**Strategy:** Manual summarization with review\n", + "\n", + "**Configuration:**\n", + "- `token_threshold`: 5000\n", + "- Human review of summaries\n", + "- Store full conversation separately\n", + "\n", + "**Why:** Maximum quality, human oversight. Critical for high-stakes conversations.\n", + "\n", + "**Best for:** Medical consultations, legal advice, financial planning, therapy\n" + ] + }, + { + "cell_type": "markdown", + "id": "81d3e70ff326b867", + "metadata": {}, + "source": [ + "### Recommendation 4: For Real-Time Chat (Speed)\n", + "\n", + "**Strategy:** Truncation with sliding window\n", + "\n", + "**Configuration:**\n", + "- `keep_recent`: 10 messages\n", + "- No summarization\n", + "- Fast response required\n", + "\n", + "**Why:** Minimal latency, simple implementation. Prioritizes speed over context preservation.\n", + "\n", + "**Best for:** Live chat, gaming, real-time collaboration tools\n" + ] + }, + { + "cell_type": "markdown", + "id": "2516c43cb73d0441", + "metadata": {}, + "source": [ + "### General Guidelines\n", + "\n", + "**Getting Started:**\n", + "1. Start with Agent Memory Server automatic summarization\n", + "2. Monitor token usage and costs in production\n", + "3. Adjust thresholds based on your use case\n", + "\n", + "**Advanced Optimization:**\n", + "4. Consider hybrid approaches (truncation + summarization)\n", + "5. Always preserve critical information in long-term memory\n", + "6. Use the decision framework to adapt to different conversation types\n", + "\n", + "**Monitoring:**\n", + "7. Track compression ratios and token savings\n", + "8. Monitor user satisfaction and conversation quality\n", + "9. A/B test different strategies for your use case\n" + ] + }, + { + "cell_type": "markdown", + "id": "aa20b8bb77b5767c", + "metadata": {}, + "source": [ + "---\n", + "\n", + "## ๐Ÿ’ช Practice Exercises\n", + "\n", + "Now it's your turn! Complete these exercises to reinforce your learning.\n" + ] + }, + { + "cell_type": "markdown", + "id": "ed098207acb2ac62", + "metadata": {}, + "source": [ + "### Exercise 1: Implement Adaptive Compression Strategy\n", + "\n", + "Create a strategy that automatically chooses between truncation and sliding window based on message token variance:\n", + "\n", + "```python\n", + "class AdaptiveStrategy(CompressionStrategy):\n", + " \"\"\"\n", + " Automatically choose between truncation and sliding window.\n", + "\n", + " Logic:\n", + " - If messages have similar token counts โ†’ use sliding window (predictable)\n", + " - If messages have varying token counts โ†’ use truncation (token-aware)\n", + " \"\"\"\n", + "\n", + " def __init__(self, window_size: int = 10):\n", + " self.window_size = window_size\n", + " self.truncation = TruncationStrategy()\n", + " self.sliding_window = SlidingWindowStrategy(window_size)\n", + "\n", + " def compress(\n", + " self,\n", + " messages: List[ConversationMessage],\n", + " max_tokens: int\n", + " ) -> List[ConversationMessage]:\n", + " \"\"\"\n", + " Choose strategy based on token variance.\n", + "\n", + " Steps:\n", + " 1. Calculate token count variance across messages\n", + " 2. If variance is low (similar sizes) โ†’ use sliding window\n", + " 3. If variance is high (varying sizes) โ†’ use truncation\n", + " \"\"\"\n", + " # Your implementation here\n", + " pass\n", + "\n", + "# Test your implementation\n", + "adaptive = AdaptiveStrategy(window_size=6)\n", + "result = adaptive.compress(sample_conversation, max_tokens=800)\n", + "print(f\"Adaptive strategy result: {len(result)} messages\")\n", + "```\n", + "\n", + "**Hint:** Calculate variance using `statistics.variance([msg.token_count for msg in messages])`. Use a threshold (e.g., 100) to decide.\n" + ] + }, + { + "cell_type": "markdown", + "id": "84a03030232b3364", + "metadata": {}, + "source": [ + "### Exercise 2: Implement Hybrid Compression\n", + "\n", + "Combine summarization + truncation for optimal results:\n", + "\n", + "```python\n", + "async def compress_hybrid(\n", + " messages: List[ConversationMessage],\n", + " summarizer: ConversationSummarizer,\n", + " max_tokens: int = 2000\n", + ") -> List[ConversationMessage]:\n", + " \"\"\"\n", + " Hybrid compression: Summarize old messages, truncate if still too large.\n", + "\n", + " Steps:\n", + " 1. First, try summarization\n", + " 2. If still over budget, apply truncation to summary + recent messages\n", + " 3. Ensure we stay within max_tokens\n", + "\n", + " Args:\n", + " messages: List of conversation messages\n", + " summarizer: ConversationSummarizer instance\n", + " max_tokens: Maximum token budget\n", + "\n", + " Returns:\n", + " Compressed messages within token budget\n", + " \"\"\"\n", + " # Your implementation here\n", + " pass\n", + "\n", + "# Test your implementation\n", + "hybrid_result = await compress_hybrid(sample_conversation, summarizer, max_tokens=1000)\n", + "print(f\"Hybrid compression: {len(hybrid_result)} messages, {sum(m.token_count for m in hybrid_result)} tokens\")\n", + "```\n", + "\n", + "**Hint:** Use `summarizer.compress_conversation()` first, then apply truncation if needed.\n" + ] + }, + { + "cell_type": "markdown", + "id": "6ac899a501122c38", + "metadata": {}, + "source": [ + "### Exercise 3: Quality Comparison\n", + "\n", + "Test all compression strategies and compare quality:\n", + "\n", + "```python\n", + "async def compare_compression_quality(\n", + " messages: List[ConversationMessage],\n", + " test_query: str = \"What courses did we discuss?\"\n", + ") -> Dict[str, Any]:\n", + " \"\"\"\n", + " Compare compression strategies by testing reference resolution.\n", + "\n", + " Steps:\n", + " 1. Compress using each strategy\n", + " 2. Try to answer test_query using compressed context\n", + " 3. Compare quality of responses\n", + " 4. Measure token savings\n", + "\n", + " Args:\n", + " messages: Original conversation\n", + " test_query: Question to test reference resolution\n", + "\n", + " Returns:\n", + " Dictionary with comparison results\n", + " \"\"\"\n", + " # Your implementation here\n", + " # Test if the agent can still answer questions after compression\n", + " pass\n", + "\n", + "# Test your implementation\n", + "quality_results = await compare_compression_quality(sample_conversation)\n", + "print(\"Quality Comparison Results:\")\n", + "for strategy, results in quality_results.items():\n", + " print(f\"{strategy}: {results}\")\n", + "```\n", + "\n", + "**Hint:** Use the LLM to answer the test query with each compressed context and compare responses.\n" + ] + }, + { + "cell_type": "markdown", + "id": "b134bf5336e3ae36", + "metadata": {}, + "source": [ + "### Exercise 4: Custom Importance Scoring\n", + "\n", + "Improve the `calculate_importance()` function with domain-specific logic:\n", + "\n", + "```python\n", + "def calculate_importance_enhanced(msg: ConversationMessage) -> float:\n", + " \"\"\"\n", + " Enhanced importance scoring for course advisor conversations.\n", + "\n", + " Add scoring for:\n", + " - Specific course codes (CS401, MATH301, etc.) - HIGH\n", + " - Prerequisites and requirements - HIGH\n", + " - Student preferences and goals - HIGH\n", + " - Questions - MEDIUM\n", + " - Confirmations and acknowledgments - LOW\n", + " - Greetings and small talk - VERY LOW\n", + "\n", + " Returns:\n", + " Importance score (0.0 to 5.0)\n", + " \"\"\"\n", + " # Your implementation here\n", + " pass\n", + "\n", + "# Test your implementation\n", + "for msg in sample_conversation[:5]:\n", + " score = calculate_importance_enhanced(msg)\n", + " print(f\"Score: {score:.1f} - {msg.content[:60]}...\")\n", + "```\n", + "\n", + "**Hint:** Use regex to detect course codes, check for question marks, look for keywords.\n" + ] + }, + { + "cell_type": "markdown", + "id": "960cb21dcfe638cf", + "metadata": {}, + "source": [ + "### Exercise 5: Production Configuration\n", + "\n", + "Configure Agent Memory Server for your specific use case:\n", + "\n", + "```python\n", + "# Scenario: High-volume customer support chatbot\n", + "# Requirements:\n", + "# - Handle 1000+ conversations per day\n", + "# - Average conversation: 15-20 turns\n", + "# - Cost-sensitive but quality important\n", + "# - Response time: <2 seconds\n", + "\n", + "# Your task: Choose appropriate configuration\n", + "production_config = {\n", + " \"message_threshold\": ???, # When to trigger summarization\n", + " \"token_threshold\": ???, # Token limit before summarization\n", + " \"keep_recent\": ???, # How many recent messages to keep\n", + " \"strategy\": ???, # Which strategy to use\n", + "}\n", + "\n", + "# Justify your choices:\n", + "print(\"Configuration Justification:\")\n", + "print(f\"message_threshold: {production_config['message_threshold']} because...\")\n", + "print(f\"token_threshold: {production_config['token_threshold']} because...\")\n", + "print(f\"keep_recent: {production_config['keep_recent']} because...\")\n", + "print(f\"strategy: {production_config['strategy']} because...\")\n", + "```\n", + "\n", + "**Hint:** Consider the trade-offs between cost, quality, and latency for this specific scenario.\n" + ] + }, + { + "cell_type": "markdown", + "id": "9184f7251934a320", + "metadata": {}, + "source": [ + "---\n", + "\n", + "## ๐Ÿ“ Summary\n", + "\n", + "### **What You Learned:**\n", + "\n", + "1. โœ… **Research Foundations**\n", + " - \"Lost in the Middle\" (Liu et al., 2023): U-shaped performance, non-uniform degradation\n", + " - \"Recursive Summarization\" (Wang et al., 2023): Long-term dialogue memory\n", + " - \"MemGPT\" (Packer et al., 2023): Hierarchical memory management\n", + " - Production best practices from Anthropic and Vellum AI\n", + "\n", + "2. โœ… **The Long Conversation Problem**\n", + " - Token limits, cost implications, performance degradation\n", + " - Why unbounded growth is unsustainable\n", + " - Quadratic cost growth without management\n", + " - Why larger context windows don't solve the problem\n", + "\n", + "3. โœ… **Conversation Summarization**\n", + " - What to preserve vs. compress\n", + " - When to trigger summarization (token/message thresholds)\n", + " - Building summarization step-by-step (functions โ†’ class)\n", + " - LLM-based intelligent summarization\n", + "\n", + "4. โœ… **Three Compression Strategies**\n", + " - **Truncation:** Fast, simple, loses context\n", + " - **Priority-based:** Balanced, intelligent, no LLM calls\n", + " - **Summarization:** High quality, preserves meaning, requires LLM\n", + " - Trade-offs between speed, quality, and cost\n", + "\n", + "5. โœ… **Agent Memory Server Integration**\n", + " - Automatic summarization configuration\n", + " - Transparent memory management\n", + " - Production-ready solution implementing research findings\n", + " - Configurable thresholds and strategies\n", + "\n", + "6. โœ… **Decision Framework**\n", + " - How to choose the right strategy\n", + " - Factors: quality, latency, cost, conversation length\n", + " - Production recommendations for different scenarios\n", + " - Hybrid approaches for optimal results\n", + "\n", + "### **What You Built:**\n", + "\n", + "- โœ… `ConversationSummarizer` class for intelligent summarization\n", + "- โœ… Three compression strategy implementations (Truncation, Priority, Summarization)\n", + "- โœ… Decision framework for strategy selection\n", + "- โœ… Production configuration examples\n", + "- โœ… Comparison tools for evaluating strategies\n", + "- โœ… Token counting and cost analysis tools\n", + "\n", + "### **Key Takeaways:**\n", + "\n", + "๐Ÿ’ก **\"Conversations grow unbounded without management\"**\n", + "- Every turn adds tokens and cost\n", + "- Eventually you'll hit limits\n", + "- Costs grow quadratically (each turn includes all previous messages)\n", + "\n", + "๐Ÿ’ก **\"Summarization preserves meaning while reducing tokens\"**\n", + "- Use LLM to create intelligent summaries\n", + "- Keep recent messages for immediate context\n", + "- Store important facts in long-term memory\n", + "\n", + "๐Ÿ’ก **\"Choose strategy based on requirements\"**\n", + "- Quality-critical โ†’ Summarization\n", + "- Speed-critical โ†’ Truncation or Priority-based\n", + "- Balanced โ†’ Agent Memory Server automatic\n", + "- Cost-sensitive โ†’ Priority-based\n", + "\n", + "๐Ÿ’ก **\"Agent Memory Server handles this automatically\"**\n", + "- Production-ready solution\n", + "- Transparent to your application\n", + "- Configurable for your needs\n", + "- No manual intervention required\n", + "\n", + "### **Connection to Context Engineering:**\n", + "\n", + "This notebook completes the **Conversation Context** story from Section 1:\n", + "\n", + "1. **Section 1:** Introduced the 4 context types, including Conversation Context\n", + "2. **Section 3, NB1:** Implemented working memory for conversation continuity\n", + "3. **Section 3, NB2:** Integrated memory with RAG for stateful conversations\n", + "4. **Section 3, NB3:** Managed long conversations with summarization and compression โ† You are here\n", + "\n", + "**Next:** Section 4 will show how agents can actively manage their own memory using tools!\n", + "\n", + "### **Next Steps:**\n", + "\n", + "**Section 4: Tools and Agents**\n", + "- Build agents that actively manage their own memory\n", + "- Implement memory tools (store, search, retrieve)\n", + "- Use LangGraph for agent workflows\n", + "- Let the LLM decide when to summarize\n", + "\n", + "**Section 5: Production Optimization**\n", + "- Performance measurement and monitoring\n", + "- Hybrid retrieval strategies\n", + "- Semantic tool selection\n", + "- Quality assurance and validation\n", + "\n", + "---\n", + "\n", + "## ๐Ÿ”— Resources\n", + "\n", + "### **Documentation:**\n", + "- [Agent Memory Server](https://github.com/redis/agent-memory-server) - Production memory management\n", + "- [Agent Memory Client](https://pypi.org/project/agent-memory-client/) - Python client library\n", + "- [LangChain Memory](https://python.langchain.com/docs/modules/memory/) - Memory patterns\n", + "- [OpenAI Tokenizer](https://platform.openai.com/tokenizer) - Token counting tool\n", + "- [tiktoken](https://github.com/openai/tiktoken) - Fast token counting library\n", + "\n", + "### **Research Papers:**\n", + "- **[Lost in the Middle: How Language Models Use Long Contexts](https://arxiv.org/abs/2307.03172)** - Liu et al. (2023). Shows U-shaped performance curve and non-uniform degradation in long contexts.\n", + "- **[Recursively Summarizing Enables Long-Term Dialogue Memory in Large Language Models](https://arxiv.org/abs/2308.15022)** - Wang et al. (2023). Demonstrates recursive summarization for long conversations.\n", + "- **[MemGPT: Towards LLMs as Operating Systems](https://arxiv.org/abs/2310.08560)** - Packer et al. (2023). Introduces hierarchical memory management and virtual context.\n", + "- [Retrieval-Augmented Generation](https://arxiv.org/abs/2005.11401) - RAG fundamentals\n", + "- [Attention Is All You Need](https://arxiv.org/abs/1706.03762) - Transformer architecture and context windows\n", + "\n", + "### **Industry Resources:**\n", + "- **[How Should I Manage Memory for my LLM Chatbot?](https://www.vellum.ai/blog/how-should-i-manage-memory-for-my-llm-chatbot)** - Vellum AI. Practical insights on memory management trade-offs.\n", + "- **[Lost in the Middle Paper Reading](https://arize.com/blog/lost-in-the-middle-how-language-models-use-long-contexts-paper-reading/)** - Arize AI. Detailed analysis and practical implications.\n", + "- **[Effective Context Engineering for AI Agents](https://www.anthropic.com/engineering/effective-context-engineering-for-ai-agents)** - Anthropic. Production best practices.\n", + "\n", + "\n", + "### **Tools and Libraries:**\n", + "- **Redis:** Vector storage and memory backend\n", + "- **Agent Memory Server:** Dual-memory architecture with automatic summarization\n", + "- **LangChain:** LLM interaction framework\n", + "- **LangGraph:** State management and agent workflows\n", + "- **OpenAI:** GPT-4o for generation and summarization\n", + "- **tiktoken:** Token counting for cost estimation\n", + "\n", + "---\n", + "\n", + "![Redis](https://redis.io/wp-content/uploads/2024/04/Logotype.svg?auto=webp&quality=85,75&width=120)\n", + "\n", + "**Redis University - Context Engineering Course**\n", + "\n", + "**๐ŸŽ‰ Congratulations!** You've completed Section 3: Memory Systems for Context Engineering!\n", + "\n", + "You now understand how to:\n", + "- Build memory systems for AI agents\n", + "- Integrate working and long-term memory\n", + "- Manage long conversations with summarization\n", + "- Choose the right compression strategy\n", + "- Configure production-ready memory management\n", + "\n", + "**Ready for Section 4?** Let's build agents that actively manage their own memory using tools!\n", + "\n", + "---\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "37206838f616911a", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "a99a1b7fa18aae7d", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.12.6" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/python-recipes/context-engineering/notebooks/section-3-memory-systems-for-context-engineering/03_memory_management_long_conversations.ipynb.backup b/python-recipes/context-engineering/notebooks/section-3-memory-systems-for-context-engineering/03_memory_management_long_conversations.ipynb.backup new file mode 100644 index 00000000..9fc1f904 --- /dev/null +++ b/python-recipes/context-engineering/notebooks/section-3-memory-systems-for-context-engineering/03_memory_management_long_conversations.ipynb.backup @@ -0,0 +1,1823 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "3d06c497fe3df20b", + "metadata": {}, + "source": [ + "![Redis](https://redis.io/wp-content/uploads/2024/04/Logotype.svg?auto=webp&quality=85,75&width=120)\n", + "\n", + "# ๐Ÿง  Section 3, Notebook 3: Memory Management - Handling Long Conversations\n", + "\n", + "**โฑ๏ธ Estimated Time:** 50-60 minutes\n", + "\n", + "## ๐ŸŽฏ Learning Objectives\n", + "\n", + "By the end of this notebook, you will:\n", + "\n", + "1. **Understand** why long conversations need management (token limits, cost, performance)\n", + "2. **Implement** conversation summarization to preserve key information\n", + "3. **Build** context compression strategies (truncation, priority-based, summarization)\n", + "4. **Configure** automatic memory management with Agent Memory Server\n", + "5. **Decide** when to apply each technique based on conversation characteristics\n", + "\n", + "---\n", + "\n", + "## ๐Ÿ”— Where We Are\n", + "\n", + "### **Your Journey So Far:**\n", + "\n", + "**Section 3, Notebook 1:** Memory Fundamentals\n", + "- โœ… Working memory for conversation continuity\n", + "- โœ… Long-term memory for persistent knowledge\n", + "- โœ… The grounding problem and reference resolution\n", + "- โœ… Memory types (semantic, episodic, message)\n", + "\n", + "**Section 3, Notebook 2:** Memory-Enhanced RAG\n", + "- โœ… Integrated all four context types\n", + "- โœ… Built complete memory-enhanced RAG system\n", + "- โœ… Demonstrated benefits of stateful conversations\n", + "\n", + "**Your memory system works!** It can:\n", + "- Remember conversation history across turns\n", + "- Store and retrieve long-term facts\n", + "- Resolve references (\"it\", \"that course\")\n", + "- Provide personalized recommendations\n", + "\n", + "### **But... What About Long Conversations?**\n", + "\n", + "**Questions we can't answer yet:**\n", + "- โ“ What happens when conversations get really long?\n", + "- โ“ How do we handle token limits?\n", + "- โ“ How much does a 50-turn conversation cost?\n", + "- โ“ Can we preserve important context while reducing tokens?\n", + "- โ“ When should we summarize vs. truncate vs. keep everything?\n", + "\n", + "---\n", + "\n", + "## ๐Ÿšจ The Long Conversation Problem\n", + "\n", + "Before diving into solutions, let's understand the fundamental problem.\n", + "\n", + "### **The Problem: Unbounded Growth**\n", + "\n", + "Every conversation turn adds messages to working memory:\n", + "\n", + "```\n", + "Turn 1: System (500) + Messages (200) = 700 tokens โœ…\n", + "Turn 5: System (500) + Messages (1,000) = 1,500 tokens โœ…\n", + "Turn 20: System (500) + Messages (4,000) = 4,500 tokens โœ…\n", + "Turn 50: System (500) + Messages (10,000) = 10,500 tokens โš ๏ธ\n", + "Turn 100: System (500) + Messages (20,000) = 20,500 tokens โš ๏ธ\n", + "Turn 200: System (500) + Messages (40,000) = 40,500 tokens โŒ\n", + "```\n", + "\n", + "**Without management, conversations grow unbounded!**\n", + "\n", + "### **Why This Matters**\n", + "\n", + "**1. Token Limits (Hard Constraint)**\n", + "- GPT-4o: 128K tokens (~96,000 words)\n", + "- GPT-3.5: 16K tokens (~12,000 words)\n", + "- Eventually, you'll hit the limit and conversations fail\n", + "\n", + "**2. Cost (Economic Constraint)**\n", + "- Input tokens cost money (e.g. $0.0025 / 1K tokens for GPT-4o)\n", + "\n", + "- A 50-turn conversation = ~10,000 tokens = $0.025 per query\n", + "\n", + "- Over 1,000 conversations = $25 just for conversation history!\n", + "\n", + "**3. Performance (Quality Constraint)**\n", + "- More tokens = longer processing time\n", + "- Context Rot: LLMs struggle with very long contexts\n", + "- Important information gets \"lost in the middle\"\n", + "\n", + "**4. User Experience**\n", + "- Slow responses frustrate users\n", + "- Expensive conversations aren't sustainable\n", + "- Failed conversations due to token limits are unacceptable\n", + "\n", + "### **The Solution: Memory Management**\n", + "\n", + "We need strategies to:\n", + "- โœ… Keep conversations within token budgets\n", + "- โœ… Preserve important information\n", + "- โœ… Maintain conversation quality\n", + "- โœ… Control costs\n", + "- โœ… Enable indefinite conversations\n", + "\n", + "---\n", + "\n", + "## ๐Ÿ“ฆ Part 0: Setup and Environment\n", + "\n", + "Let's set up our environment and create tools for measuring conversation growth.\n", + "\n", + "### โš ๏ธ Prerequisites\n", + "\n", + "**Before running this notebook, make sure you have:**\n", + "\n", + "1. **Docker Desktop running** - Required for Redis and Agent Memory Server\n", + "\n", + "2. **Environment variables** - Create a `.env` file in the `reference-agent` directory:\n", + " ```bash\n", + " # Copy the example file\n", + " cd ../../reference-agent\n", + " cp .env.example .env\n", + "\n", + " # Edit .env and add your OpenAI API key\n", + " # OPENAI_API_KEY=your_actual_openai_api_key_here\n", + " ```\n", + "\n", + "3. **Run the setup script** - This will automatically start Redis and Agent Memory Server:\n", + " ```bash\n", + " cd ../../reference-agent\n", + " python setup_agent_memory_server.py\n", + " ```\n" + ] + }, + { + "cell_type": "markdown", + "id": "307c59ecc51d30c3", + "metadata": {}, + "source": [ + "---\n" + ] + }, + { + "cell_type": "markdown", + "id": "dd10e48e57f1431e", + "metadata": {}, + "source": [ + "### Automated Setup Check\n", + "\n", + "Let's run the setup script to ensure all services are running properly.\n" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "id": "808cea2af3f4f118", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Running automated setup check...\n", + "\n", + "\n", + "๐Ÿ”ง Agent Memory Server Setup\n", + "===========================\n", + "๐Ÿ“Š Checking Redis...\n", + "โœ… Redis is running\n", + "๐Ÿ“Š Checking Agent Memory Server...\n", + "๐Ÿ” Agent Memory Server container exists. Checking health...\n", + "โœ… Agent Memory Server is running and healthy\n", + "โœ… No Redis connection issues detected\n", + "\n", + "โœ… Setup Complete!\n", + "=================\n", + "๐Ÿ“Š Services Status:\n", + " โ€ข Redis: Running on port 6379\n", + " โ€ข Agent Memory Server: Running on port 8088\n", + "\n", + "๐ŸŽฏ You can now run the notebooks!\n", + "\n", + "\n", + "โœ… All services are ready!\n" + ] + } + ], + "source": [ + "# Run the setup script to ensure Redis and Agent Memory Server are running\n", + "import subprocess\n", + "import sys\n", + "from pathlib import Path\n", + "\n", + "# Path to setup script\n", + "setup_script = Path(\"../../reference-agent/setup_agent_memory_server.py\")\n", + "\n", + "if setup_script.exists():\n", + " print(\"Running automated setup check...\\n\")\n", + " result = subprocess.run(\n", + " [sys.executable, str(setup_script)],\n", + " capture_output=True,\n", + " text=True\n", + " )\n", + " print(result.stdout)\n", + " if result.returncode != 0:\n", + " print(\"โš ๏ธ Setup check failed. Please review the output above.\")\n", + " print(result.stderr)\n", + " else:\n", + " print(\"\\nโœ… All services are ready!\")\n", + "else:\n", + " print(\"โš ๏ธ Setup script not found. Please ensure services are running manually.\")\n" + ] + }, + { + "cell_type": "markdown", + "id": "4f7ab2a448dd08fc", + "metadata": {}, + "source": [ + "---\n" + ] + }, + { + "cell_type": "markdown", + "id": "9dd8400bfed20f64", + "metadata": {}, + "source": [ + "### Install Dependencies\n", + "\n", + "If you haven't already installed the reference-agent package, uncomment and run the following:\n" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "62ad9f5d109351a", + "metadata": {}, + "outputs": [], + "source": [ + "# Uncomment to install reference-agent package\n", + "# %pip install -q -e ../../reference-agent\n", + "\n", + "# Uncomment to install agent-memory-client\n", + "# %pip install -q agent-memory-client\n" + ] + }, + { + "cell_type": "markdown", + "id": "b41bf6b02f73fdb9", + "metadata": {}, + "source": [ + "### Import Dependencies\n" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "id": "b00247fc4bb718d6", + "metadata": {}, + "outputs": [ + { + "ename": "ImportError", + "evalue": "cannot import name 'AgentMemoryClient' from 'agent_memory_client' (/Users/nitin.kanukolanu/workspace/redis-ai-resources/python-recipes/context-engineering/venv/lib/python3.12/site-packages/agent_memory_client/__init__.py)", + "output_type": "error", + "traceback": [ + "\u001b[31m---------------------------------------------------------------------------\u001b[39m", + "\u001b[31mImportError\u001b[39m Traceback (most recent call last)", + "\u001b[36mCell\u001b[39m\u001b[36m \u001b[39m\u001b[32mIn[6]\u001b[39m\u001b[32m, line 15\u001b[39m\n\u001b[32m 12\u001b[39m \u001b[38;5;28;01mfrom\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[34;01mlangchain_core\u001b[39;00m\u001b[34;01m.\u001b[39;00m\u001b[34;01mmessages\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;28;01mimport\u001b[39;00m BaseMessage, HumanMessage, AIMessage, SystemMessage\n\u001b[32m 14\u001b[39m \u001b[38;5;66;03m# Redis and Agent Memory\u001b[39;00m\n\u001b[32m---> \u001b[39m\u001b[32m15\u001b[39m \u001b[38;5;28;01mfrom\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[34;01magent_memory_client\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;28;01mimport\u001b[39;00m AgentMemoryClient\n\u001b[32m 16\u001b[39m \u001b[38;5;28;01mfrom\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[34;01magent_memory_client\u001b[39;00m\u001b[34;01m.\u001b[39;00m\u001b[34;01mmodels\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;28;01mimport\u001b[39;00m ClientMemoryRecord\n\u001b[32m 18\u001b[39m \u001b[38;5;66;03m# Token counting\u001b[39;00m\n", + "\u001b[31mImportError\u001b[39m: cannot import name 'AgentMemoryClient' from 'agent_memory_client' (/Users/nitin.kanukolanu/workspace/redis-ai-resources/python-recipes/context-engineering/venv/lib/python3.12/site-packages/agent_memory_client/__init__.py)" + ] + } + ], + "source": [ + "# Standard library imports\n", + "import os\n", + "import time\n", + "import asyncio\n", + "from typing import List, Dict, Any, Optional, Tuple\n", + "from dataclasses import dataclass, field\n", + "from datetime import datetime\n", + "from pathlib import Path\n", + "\n", + "# LangChain\n", + "from langchain_openai import ChatOpenAI, OpenAIEmbeddings\n", + "from langchain_core.messages import BaseMessage, HumanMessage, AIMessage, SystemMessage\n", + "\n", + "# Redis and Agent Memory\n", + "from agent_memory_client import AgentMemoryClient\n", + "from agent_memory_client.models import ClientMemoryRecord\n", + "\n", + "# Token counting\n", + "import tiktoken\n", + "\n", + "# For visualization\n", + "from collections import defaultdict\n", + "\n", + "print(\"โœ… All imports successful\")\n" + ] + }, + { + "cell_type": "markdown", + "id": "38946d91e830639a", + "metadata": {}, + "source": [ + "### Load Environment Variables\n" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "41a3192aacee6dbf", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "โœ… Environment variables configured\n", + " Redis URL: redis://localhost:6379\n", + " Agent Memory URL: http://localhost:8088\n" + ] + } + ], + "source": [ + "from dotenv import load_dotenv\n", + "\n", + "# Load environment variables from reference-agent directory\n", + "env_path = Path(\"../../reference-agent/.env\")\n", + "load_dotenv(dotenv_path=env_path)\n", + "\n", + "# Verify required environment variables\n", + "OPENAI_API_KEY = os.getenv(\"OPENAI_API_KEY\")\n", + "REDIS_URL = os.getenv(\"REDIS_URL\", \"redis://localhost:6379\")\n", + "AGENT_MEMORY_URL = os.getenv(\"AGENT_MEMORY_URL\", \"http://localhost:8088\")\n", + "\n", + "if not OPENAI_API_KEY:\n", + " print(f\"\"\"โŒ OPENAI_API_KEY not found!\n", + "\n", + "Please create a .env file at: {env_path.absolute()}\n", + "\n", + "With the following content:\n", + "OPENAI_API_KEY=your_openai_api_key\n", + "REDIS_URL=redis://localhost:6379\n", + "AGENT_MEMORY_URL=http://localhost:8088\n", + "\"\"\")\n", + "else:\n", + " print(\"โœ… Environment variables configured\")\n", + " print(f\" Redis URL: {REDIS_URL}\")\n", + " print(f\" Agent Memory URL: {AGENT_MEMORY_URL}\")\n" + ] + }, + { + "cell_type": "markdown", + "id": "2f42157025d92c5", + "metadata": {}, + "source": [ + "### Initialize Clients\n" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "f6acdabe9f826582", + "metadata": {}, + "outputs": [ + { + "ename": "NameError", + "evalue": "name 'AgentMemoryClient' is not defined", + "output_type": "error", + "traceback": [ + "\u001b[31m---------------------------------------------------------------------------\u001b[39m", + "\u001b[31mNameError\u001b[39m Traceback (most recent call last)", + "\u001b[36mCell\u001b[39m\u001b[36m \u001b[39m\u001b[32mIn[5]\u001b[39m\u001b[32m, line 13\u001b[39m\n\u001b[32m 8\u001b[39m embeddings = OpenAIEmbeddings(\n\u001b[32m 9\u001b[39m model=\u001b[33m\"\u001b[39m\u001b[33mtext-embedding-3-small\u001b[39m\u001b[33m\"\u001b[39m\n\u001b[32m 10\u001b[39m )\n\u001b[32m 12\u001b[39m \u001b[38;5;66;03m# Initialize Agent Memory Client\u001b[39;00m\n\u001b[32m---> \u001b[39m\u001b[32m13\u001b[39m memory_client = \u001b[43mAgentMemoryClient\u001b[49m(\n\u001b[32m 14\u001b[39m base_url=AGENT_MEMORY_URL\n\u001b[32m 15\u001b[39m )\n\u001b[32m 17\u001b[39m \u001b[38;5;66;03m# Initialize tokenizer for counting\u001b[39;00m\n\u001b[32m 18\u001b[39m tokenizer = tiktoken.encoding_for_model(\u001b[33m\"\u001b[39m\u001b[33mgpt-4o\u001b[39m\u001b[33m\"\u001b[39m)\n", + "\u001b[31mNameError\u001b[39m: name 'AgentMemoryClient' is not defined" + ] + } + ], + "source": [ + "# Initialize LLM\n", + "llm = ChatOpenAI(\n", + " model=\"gpt-4o\",\n", + " temperature=0.7\n", + ")\n", + "\n", + "# Initialize embeddings\n", + "embeddings = OpenAIEmbeddings(\n", + " model=\"text-embedding-3-small\"\n", + ")\n", + "\n", + "# Initialize Agent Memory Client\n", + "memory_client = AgentMemoryClient(\n", + " base_url=AGENT_MEMORY_URL\n", + ")\n", + "\n", + "# Initialize tokenizer for counting\n", + "tokenizer = tiktoken.encoding_for_model(\"gpt-4o\")\n", + "\n", + "def count_tokens(text: str) -> int:\n", + " \"\"\"Count tokens in text using tiktoken.\"\"\"\n", + " return len(tokenizer.encode(text))\n", + "\n", + "print(\"โœ… Clients initialized\")\n", + "print(f\" LLM: {llm.model_name}\")\n", + "print(f\" Embeddings: text-embedding-3-small\")\n", + "print(f\" Memory Server: {AGENT_MEMORY_URL}\")\n" + ] + }, + { + "cell_type": "markdown", + "id": "cb3c6e2d8cee7f21", + "metadata": {}, + "source": [ + "---\n", + "\n", + "## ๐Ÿ“Š Part 1: Understanding Conversation Growth\n", + "\n", + "Let's visualize how conversations grow and understand the implications.\n" + ] + }, + { + "cell_type": "markdown", + "id": "38b4a48ea4fee96b", + "metadata": {}, + "source": [ + "### Demo 1: Token Growth Over Time\n", + "\n", + "Let's simulate how token counts grow as conversations progress.\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "9ff7e262cad76878", + "metadata": {}, + "outputs": [], + "source": [ + "# System prompt (constant across all turns)\n", + "system_prompt = \"\"\"You are a helpful course advisor for Redis University.\n", + "Help students find courses, check prerequisites, and plan their schedule.\n", + "Be friendly, concise, and accurate.\"\"\"\n", + "\n", + "system_tokens = count_tokens(system_prompt)\n", + "\n", + "print(f\"System prompt: {system_tokens} tokens\\n\")\n", + "\n", + "# Simulate conversation growth\n", + "# Assume average message pair (user + assistant) = 100 tokens\n", + "avg_message_pair_tokens = 100\n", + "\n", + "print(\"Conversation Growth Simulation:\")\n", + "print(\"=\" * 80)\n", + "print(f\"{'Turn':<8} {'Messages':<10} {'Conv Tokens':<15} {'Total Tokens':<15} {'Cost ($)':<12}\")\n", + "print(\"-\" * 80)\n", + "\n", + "for turn in [1, 5, 10, 20, 30, 50, 75, 100, 150, 200]:\n", + " # Each turn = user message + assistant message\n", + " num_messages = turn * 2\n", + " conversation_tokens = num_messages * (avg_message_pair_tokens // 2)\n", + " total_tokens = system_tokens + conversation_tokens\n", + " \n", + " # Cost calculation (GPT-4o input: $0.0025 per 1K tokens)\n", + " cost_per_query = (total_tokens / 1000) * 0.0025\n", + " \n", + " # Visual indicator\n", + " if total_tokens < 5000:\n", + " indicator = \"โœ…\"\n", + " elif total_tokens < 20000:\n", + " indicator = \"โš ๏ธ\"\n", + " else:\n", + " indicator = \"โŒ\"\n", + " \n", + " print(f\"{turn:<8} {num_messages:<10} {conversation_tokens:<15,} {total_tokens:<15,} ${cost_per_query:<11.4f} {indicator}\")\n", + "\n", + "print(\"\\n๐Ÿ’ก Key Insight: Without management, conversations become expensive and slow!\")\n" + ] + }, + { + "cell_type": "markdown", + "id": "99edd1b0325093b", + "metadata": {}, + "source": [ + "### Demo 2: Cost Analysis\n", + "\n", + "Let's calculate the cumulative cost of long conversations.\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "1a9e0cfece6beaf5", + "metadata": {}, + "outputs": [], + "source": [ + "def calculate_conversation_cost(num_turns: int, avg_tokens_per_turn: int = 100) -> Dict[str, float]:\n", + " \"\"\"Calculate cost metrics for a conversation.\"\"\"\n", + " system_tokens = 50 # Simplified\n", + " \n", + " # Cumulative cost (each turn includes all previous messages)\n", + " cumulative_tokens = 0\n", + " cumulative_cost = 0.0\n", + " \n", + " for turn in range(1, num_turns + 1):\n", + " # Total tokens for this turn\n", + " conversation_tokens = turn * avg_tokens_per_turn\n", + " total_tokens = system_tokens + conversation_tokens\n", + " \n", + " # Cost for this turn (input tokens)\n", + " turn_cost = (total_tokens / 1000) * 0.0025\n", + " cumulative_cost += turn_cost\n", + " cumulative_tokens += total_tokens\n", + " \n", + " return {\n", + " \"num_turns\": num_turns,\n", + " \"final_tokens\": system_tokens + (num_turns * avg_tokens_per_turn),\n", + " \"cumulative_tokens\": cumulative_tokens,\n", + " \"cumulative_cost\": cumulative_cost,\n", + " \"avg_cost_per_turn\": cumulative_cost / num_turns\n", + " }\n", + "\n", + "# Compare different conversation lengths\n", + "print(\"Cost Analysis for Different Conversation Lengths:\")\n", + "print(\"=\" * 80)\n", + "print(f\"{'Turns':<10} {'Final Tokens':<15} {'Cumulative Tokens':<20} {'Total Cost':<15} {'Avg/Turn'}\")\n", + "print(\"-\" * 80)\n", + "\n", + "for num_turns in [10, 25, 50, 100, 200]:\n", + " metrics = calculate_conversation_cost(num_turns)\n", + " print(f\"{metrics['num_turns']:<10} \"\n", + " f\"{metrics['final_tokens']:<15,} \"\n", + " f\"{metrics['cumulative_tokens']:<20,} \"\n", + " f\"${metrics['cumulative_cost']:<14.2f} \"\n", + " f\"${metrics['avg_cost_per_turn']:.4f}\")\n", + "\n", + "print(\"\\n๐Ÿ’ก Key Insight: Costs grow quadratically without memory management!\")\n", + "print(\" A 100-turn conversation costs ~$1.50 in total\")\n", + "print(\" A 200-turn conversation costs ~$6.00 in total\")\n" + ] + }, + { + "cell_type": "markdown", + "id": "117ca757272caef3", + "metadata": {}, + "source": [ + "---\n", + "\n", + "## ๐ŸŽฏ Part 2: Conversation Summarization\n", + "\n", + "Now let's implement intelligent summarization to manage long conversations.\n" + ] + }, + { + "cell_type": "markdown", + "id": "544c9c59a8e344be", + "metadata": {}, + "source": [ + "### Theory: What to Preserve vs. Compress\n", + "\n", + "**What to Preserve:**\n", + "- โœ… Key facts and decisions\n", + "- โœ… Student preferences and goals\n", + "- โœ… Important course recommendations\n", + "- โœ… Prerequisites and requirements\n", + "- โœ… Recent context (last few messages)\n", + "\n", + "**What to Compress:**\n", + "- ๐Ÿ“ฆ Small talk and greetings\n", + "- ๐Ÿ“ฆ Redundant information\n", + "- ๐Ÿ“ฆ Old conversation details\n", + "- ๐Ÿ“ฆ Resolved questions\n", + "\n", + "**When to Summarize:**\n", + "- Token threshold exceeded (e.g., > 2000 tokens)\n", + "- Message count threshold exceeded (e.g., > 10 messages)\n", + "- Time-based (e.g., after 1 hour)\n", + "- Manual trigger\n" + ] + }, + { + "cell_type": "markdown", + "id": "998184e76d362bf3", + "metadata": {}, + "source": [ + "### Implementation: ConversationSummarizer Class\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "6710bd8b0268c34d", + "metadata": {}, + "outputs": [], + "source": [ + "@dataclass\n", + "class ConversationMessage:\n", + " \"\"\"Represents a single conversation message.\"\"\"\n", + " role: str # \"user\", \"assistant\", \"system\"\n", + " content: str\n", + " timestamp: float = field(default_factory=time.time)\n", + " token_count: Optional[int] = None\n", + " \n", + " def __post_init__(self):\n", + " if self.token_count is None:\n", + " self.token_count = count_tokens(self.content)\n", + "\n", + "class ConversationSummarizer:\n", + " \"\"\"Manages conversation summarization to keep token counts manageable.\"\"\"\n", + " \n", + " def __init__(\n", + " self,\n", + " llm: ChatOpenAI,\n", + " token_threshold: int = 2000,\n", + " message_threshold: int = 10,\n", + " keep_recent: int = 4\n", + " ):\n", + " \"\"\"\n", + " Initialize the summarizer.\n", + " \n", + " Args:\n", + " llm: Language model for generating summaries\n", + " token_threshold: Summarize when total tokens exceed this\n", + " message_threshold: Summarize when message count exceeds this\n", + " keep_recent: Number of recent messages to keep unsummarized\n", + " \"\"\"\n", + " self.llm = llm\n", + " self.token_threshold = token_threshold\n", + " self.message_threshold = message_threshold\n", + " self.keep_recent = keep_recent\n", + " \n", + " self.summarization_prompt = \"\"\"You are summarizing a conversation between a student and a course advisor.\n", + "\n", + "Create a concise summary that preserves:\n", + "1. Key decisions made\n", + "2. Important requirements or prerequisites discussed\n", + "3. Student's goals, preferences, and constraints\n", + "4. Specific courses mentioned and recommendations given\n", + "5. Any problems or issues that need follow-up\n", + "\n", + "Format as bullet points. Be specific and actionable.\n", + "\n", + "Conversation to summarize:\n", + "{conversation}\n", + "\n", + "Summary:\"\"\"\n", + " \n", + " def should_summarize(self, messages: List[ConversationMessage]) -> bool:\n", + " \"\"\"Determine if conversation needs summarization.\"\"\"\n", + " if len(messages) <= self.keep_recent:\n", + " return False\n", + " \n", + " total_tokens = sum(msg.token_count for msg in messages)\n", + " \n", + " return (total_tokens > self.token_threshold or \n", + " len(messages) > self.message_threshold)\n", + " \n", + " async def summarize_conversation(\n", + " self,\n", + " messages: List[ConversationMessage]\n", + " ) -> ConversationMessage:\n", + " \"\"\"Create intelligent summary of conversation messages.\"\"\"\n", + " # Format conversation for summarization\n", + " conversation_text = \"\\n\".join([\n", + " f\"{msg.role.title()}: {msg.content}\" \n", + " for msg in messages\n", + " ])\n", + " \n", + " # Generate summary using LLM\n", + " prompt = self.summarization_prompt.format(conversation=conversation_text)\n", + " response = await self.llm.ainvoke([HumanMessage(content=prompt)])\n", + " \n", + " summary_content = f\"[CONVERSATION SUMMARY]\\n{response.content}\"\n", + " \n", + " # Create summary message\n", + " summary_msg = ConversationMessage(\n", + " role=\"system\",\n", + " content=summary_content,\n", + " timestamp=messages[-1].timestamp\n", + " )\n", + " \n", + " return summary_msg\n", + " \n", + " async def compress_conversation(\n", + " self,\n", + " messages: List[ConversationMessage]\n", + " ) -> List[ConversationMessage]:\n", + " \"\"\"\n", + " Compress conversation by summarizing old messages and keeping recent ones.\n", + " \n", + " Returns:\n", + " List of messages: [summary] + [recent messages]\n", + " \"\"\"\n", + " if not self.should_summarize(messages):\n", + " return messages\n", + " \n", + " # Split into old and recent\n", + " old_messages = messages[:-self.keep_recent]\n", + " recent_messages = messages[-self.keep_recent:]\n", + " \n", + " if not old_messages:\n", + " return messages\n", + " \n", + " # Summarize old messages\n", + " summary = await self.summarize_conversation(old_messages)\n", + " \n", + " # Return summary + recent messages\n", + " return [summary] + recent_messages\n", + "\n", + "print(\"โœ… ConversationSummarizer class defined\")\n" + ] + }, + { + "cell_type": "markdown", + "id": "4441a3298bd38af8", + "metadata": {}, + "source": [ + "### Demo 3: Test Summarization\n", + "\n", + "Let's test the summarizer with a sample conversation.\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "df5840eedf4a9185", + "metadata": {}, + "outputs": [], + "source": [ + "# Create a sample long conversation\n", + "sample_conversation = [\n", + " ConversationMessage(\"user\", \"Hi, I'm interested in learning about machine learning courses\"),\n", + " ConversationMessage(\"assistant\", \"Great! Redis University offers several ML courses. CS401 Machine Learning is our flagship course. It covers supervised learning, neural networks, and practical applications.\"),\n", + " ConversationMessage(\"user\", \"What are the prerequisites for CS401?\"),\n", + " ConversationMessage(\"assistant\", \"CS401 requires CS201 Data Structures and MATH301 Linear Algebra. Have you completed these courses?\"),\n", + " ConversationMessage(\"user\", \"I've completed CS101 but not CS201 yet\"),\n", + " ConversationMessage(\"assistant\", \"Perfect! CS201 is the next logical step. It covers algorithms and data structures essential for ML. It's offered every semester.\"),\n", + " ConversationMessage(\"user\", \"How difficult is MATH301?\"),\n", + " ConversationMessage(\"assistant\", \"MATH301 is moderately challenging. It covers vectors, matrices, and eigenvalues used in ML algorithms. Most students find it manageable with consistent practice.\"),\n", + " ConversationMessage(\"user\", \"Can I take both CS201 and MATH301 together?\"),\n", + " ConversationMessage(\"assistant\", \"Yes, that's a good combination! They complement each other well. Many students take them concurrently.\"),\n", + " ConversationMessage(\"user\", \"What about CS401 after that?\"),\n", + " ConversationMessage(\"assistant\", \"CS401 is perfect after completing both prerequisites. It's our most popular AI course with hands-on projects.\"),\n", + " ConversationMessage(\"user\", \"When is CS401 offered?\"),\n", + " ConversationMessage(\"assistant\", \"CS401 is offered in Fall and Spring semesters. The Fall section typically fills up quickly, so register early!\"),\n", + " ConversationMessage(\"user\", \"Great! What's the workload like?\"),\n", + " ConversationMessage(\"assistant\", \"CS401 requires about 10-12 hours per week including lectures, assignments, and projects. There are 4 major projects throughout the semester.\"),\n", + "]\n", + "\n", + "# Calculate original metrics\n", + "original_token_count = sum(msg.token_count for msg in sample_conversation)\n", + "print(f\"Original conversation:\")\n", + "print(f\" Messages: {len(sample_conversation)}\")\n", + "print(f\" Total tokens: {original_token_count}\")\n", + "print(f\" Average tokens per message: {original_token_count / len(sample_conversation):.1f}\")\n", + "\n", + "# Test summarization\n", + "summarizer = ConversationSummarizer(\n", + " llm=llm,\n", + " token_threshold=500, # Low threshold for demo\n", + " message_threshold=10,\n", + " keep_recent=4\n", + ")\n", + "\n", + "print(f\"\\nSummarizer configuration:\")\n", + "print(f\" Token threshold: {summarizer.token_threshold}\")\n", + "print(f\" Message threshold: {summarizer.message_threshold}\")\n", + "print(f\" Keep recent: {summarizer.keep_recent}\")\n", + "\n", + "# Check if summarization is needed\n", + "should_summarize = summarizer.should_summarize(sample_conversation)\n", + "print(f\"\\nShould summarize? {should_summarize}\")\n", + "\n", + "if should_summarize:\n", + " # Compress the conversation\n", + " compressed = await summarizer.compress_conversation(sample_conversation)\n", + " \n", + " compressed_token_count = sum(msg.token_count for msg in compressed)\n", + " token_savings = original_token_count - compressed_token_count\n", + " savings_percentage = (token_savings / original_token_count) * 100\n", + " \n", + " print(f\"\\nAfter summarization:\")\n", + " print(f\" Messages: {len(compressed)}\")\n", + " print(f\" Total tokens: {compressed_token_count}\")\n", + " print(f\" Token savings: {token_savings} ({savings_percentage:.1f}%)\")\n", + " \n", + " print(f\"\\nCompressed conversation structure:\")\n", + " for i, msg in enumerate(compressed):\n", + " role_icon = \"๐Ÿ“‹\" if msg.role == \"system\" else \"๐Ÿ‘ค\" if msg.role == \"user\" else \"๐Ÿค–\"\n", + " content_preview = msg.content[:80].replace('\\n', ' ')\n", + " print(f\" {i+1}. {role_icon} [{msg.role}] {content_preview}...\")\n", + " print(f\" Tokens: {msg.token_count}\")\n" + ] + }, + { + "cell_type": "markdown", + "id": "5a7f1c4414f6d2a7", + "metadata": {}, + "source": [ + "---\n", + "\n", + "## ๐Ÿ”ง Part 3: Context Compression Strategies\n", + "\n", + "Beyond summarization, there are other compression strategies. Let's implement and compare them.\n" + ] + }, + { + "cell_type": "markdown", + "id": "3d6a9c3a31a589d0", + "metadata": {}, + "source": [ + "### Theory: Three Compression Approaches\n", + "\n", + "**1. Truncation (Fast, Simple)**\n", + "- Keep only the most recent N messages\n", + "- โœ… Pros: Fast, no LLM calls, predictable\n", + "- โŒ Cons: Loses all old context, no intelligence\n", + "\n", + "**2. Priority-Based (Balanced)**\n", + "- Score messages by importance, keep highest-scoring\n", + "- โœ… Pros: Preserves important context, no LLM calls\n", + "- โŒ Cons: Requires good scoring logic, may lose temporal flow\n", + "\n", + "**3. Summarization (High Quality)**\n", + "- Use LLM to create intelligent summaries\n", + "- โœ… Pros: Preserves meaning, high quality\n", + "- โŒ Cons: Slower, costs tokens, requires LLM call\n" + ] + }, + { + "cell_type": "markdown", + "id": "80bbd6185d7e1fd4", + "metadata": {}, + "source": [ + "### Implementation: Three Compression Strategies\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "23b8486d8bc89f7b", + "metadata": {}, + "outputs": [], + "source": [ + "class CompressionStrategy:\n", + " \"\"\"Base class for compression strategies.\"\"\"\n", + " \n", + " def compress(\n", + " self,\n", + " messages: List[ConversationMessage],\n", + " max_tokens: int\n", + " ) -> List[ConversationMessage]:\n", + " \"\"\"Compress messages to fit within max_tokens.\"\"\"\n", + " raise NotImplementedError\n", + "\n", + "class TruncationStrategy(CompressionStrategy):\n", + " \"\"\"Keep only the most recent messages within token budget.\"\"\"\n", + " \n", + " def compress(\n", + " self,\n", + " messages: List[ConversationMessage],\n", + " max_tokens: int\n", + " ) -> List[ConversationMessage]:\n", + " \"\"\"Keep most recent messages within token budget.\"\"\"\n", + " compressed = []\n", + " total_tokens = 0\n", + " \n", + " # Work backwards from most recent\n", + " for msg in reversed(messages):\n", + " if total_tokens + msg.token_count <= max_tokens:\n", + " compressed.insert(0, msg)\n", + " total_tokens += msg.token_count\n", + " else:\n", + " break\n", + " \n", + " return compressed\n", + "\n", + "class PriorityBasedStrategy(CompressionStrategy):\n", + " \"\"\"Keep highest-priority messages within token budget.\"\"\"\n", + " \n", + " def calculate_importance(self, msg: ConversationMessage) -> float:\n", + " \"\"\"\n", + " Calculate importance score for a message.\n", + " \n", + " Higher scores = more important.\n", + " \"\"\"\n", + " score = 0.0\n", + " content_lower = msg.content.lower()\n", + " \n", + " # Course codes are important (CS401, MATH301, etc.)\n", + " if any(code in content_lower for code in ['cs', 'math', 'eng']):\n", + " score += 2.0\n", + " \n", + " # Questions are important\n", + " if '?' in msg.content:\n", + " score += 1.5\n", + " \n", + " # Prerequisites and requirements are important\n", + " if any(word in content_lower for word in ['prerequisite', 'require', 'need']):\n", + " score += 1.5\n", + " \n", + " # Preferences and goals are important\n", + " if any(word in content_lower for word in ['prefer', 'want', 'goal', 'interested']):\n", + " score += 1.0\n", + " \n", + " # User messages slightly more important (their needs)\n", + " if msg.role == 'user':\n", + " score += 0.5\n", + " \n", + " # Longer messages often have more content\n", + " if msg.token_count > 50:\n", + " score += 0.5\n", + " \n", + " return score\n", + " \n", + " def compress(\n", + " self,\n", + " messages: List[ConversationMessage],\n", + " max_tokens: int\n", + " ) -> List[ConversationMessage]:\n", + " \"\"\"Keep highest-priority messages within token budget.\"\"\"\n", + " # Score each message\n", + " scored_messages = [\n", + " (self.calculate_importance(msg), i, msg)\n", + " for i, msg in enumerate(messages)\n", + " ]\n", + " \n", + " # Sort by score (descending), then by index to maintain some order\n", + " scored_messages.sort(key=lambda x: (-x[0], x[1]))\n", + " \n", + " # Select messages within budget\n", + " selected = []\n", + " total_tokens = 0\n", + " \n", + " for score, idx, msg in scored_messages:\n", + " if total_tokens + msg.token_count <= max_tokens:\n", + " selected.append((idx, msg))\n", + " total_tokens += msg.token_count\n", + " \n", + " # Sort by original index to maintain conversation flow\n", + " selected.sort(key=lambda x: x[0])\n", + " \n", + " return [msg for idx, msg in selected]\n", + "\n", + "class SummarizationStrategy(CompressionStrategy):\n", + " \"\"\"Use LLM to create intelligent summaries.\"\"\"\n", + " \n", + " def __init__(self, summarizer: ConversationSummarizer):\n", + " self.summarizer = summarizer\n", + " \n", + " async def compress_async(\n", + " self,\n", + " messages: List[ConversationMessage],\n", + " max_tokens: int\n", + " ) -> List[ConversationMessage]:\n", + " \"\"\"Compress using summarization (async).\"\"\"\n", + " # Use the summarizer's logic\n", + " return await self.summarizer.compress_conversation(messages)\n", + " \n", + " def compress(\n", + " self,\n", + " messages: List[ConversationMessage],\n", + " max_tokens: int\n", + " ) -> List[ConversationMessage]:\n", + " \"\"\"Synchronous wrapper (not recommended, use compress_async).\"\"\"\n", + " raise NotImplementedError(\"Use compress_async for summarization strategy\")\n", + "\n", + "print(\"โœ… Compression strategies defined\")\n" + ] + }, + { + "cell_type": "markdown", + "id": "3db188fb9f01d750", + "metadata": {}, + "source": [ + "### Demo 4: Compare Compression Strategies\n", + "\n", + "Let's compare all three strategies on the same conversation.\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "5d49f8f61e276661", + "metadata": {}, + "outputs": [], + "source": [ + "# Use the same sample conversation from before\n", + "test_conversation = sample_conversation.copy()\n", + "max_tokens = 800 # Target token budget\n", + "\n", + "print(f\"Original conversation: {len(test_conversation)} messages, {sum(msg.token_count for msg in test_conversation)} tokens\\n\")\n", + "print(\"=\" * 80)\n", + "\n", + "# Strategy 1: Truncation\n", + "truncation = TruncationStrategy()\n", + "truncated = truncation.compress(test_conversation, max_tokens)\n", + "truncated_tokens = sum(msg.token_count for msg in truncated)\n", + "\n", + "print(f\"\\n1๏ธโƒฃ TRUNCATION STRATEGY\")\n", + "print(f\" Result: {len(truncated)} messages, {truncated_tokens} tokens\")\n", + "print(f\" Savings: {sum(msg.token_count for msg in test_conversation) - truncated_tokens} tokens\")\n", + "print(f\" Kept messages: {[i for i, msg in enumerate(test_conversation) if msg in truncated]}\")\n", + "\n", + "# Strategy 2: Priority-Based\n", + "priority = PriorityBasedStrategy()\n", + "prioritized = priority.compress(test_conversation, max_tokens)\n", + "prioritized_tokens = sum(msg.token_count for msg in prioritized)\n", + "\n", + "print(f\"\\n2๏ธโƒฃ PRIORITY-BASED STRATEGY\")\n", + "print(f\" Result: {len(prioritized)} messages, {prioritized_tokens} tokens\")\n", + "print(f\" Savings: {sum(msg.token_count for msg in test_conversation) - prioritized_tokens} tokens\")\n", + "print(f\" Kept messages: {[i for i, msg in enumerate(test_conversation) if msg in prioritized]}\")\n", + "\n", + "# Show importance scores for a few messages\n", + "print(f\"\\n Sample importance scores:\")\n", + "for i in [0, 2, 4, 6]:\n", + " if i < len(test_conversation):\n", + " score = priority.calculate_importance(test_conversation[i])\n", + " preview = test_conversation[i].content[:50]\n", + " print(f\" Message {i}: {score:.1f} - \\\"{preview}...\\\"\")\n", + "\n", + "# Strategy 3: Summarization\n", + "summarization = SummarizationStrategy(summarizer)\n", + "summarized = await summarization.compress_async(test_conversation, max_tokens)\n", + "summarized_tokens = sum(msg.token_count for msg in summarized)\n", + "\n", + "print(f\"\\n3๏ธโƒฃ SUMMARIZATION STRATEGY\")\n", + "print(f\" Result: {len(summarized)} messages, {summarized_tokens} tokens\")\n", + "print(f\" Savings: {sum(msg.token_count for msg in test_conversation) - summarized_tokens} tokens\")\n", + "print(f\" Structure: 1 summary + {len(summarized) - 1} recent messages\")\n", + "\n", + "# Comparison table\n", + "print(f\"\\n\" + \"=\" * 80)\n", + "print(f\"\\n๐Ÿ“Š COMPARISON SUMMARY\")\n", + "print(f\"{'Strategy':<20} {'Messages':<12} {'Tokens':<12} {'Savings':<12} {'Quality'}\")\n", + "print(\"-\" * 80)\n", + "\n", + "original_tokens = sum(msg.token_count for msg in test_conversation)\n", + "\n", + "strategies = [\n", + " (\"Original\", len(test_conversation), original_tokens, 0, \"N/A\"),\n", + " (\"Truncation\", len(truncated), truncated_tokens, original_tokens - truncated_tokens, \"Low\"),\n", + " (\"Priority-Based\", len(prioritized), prioritized_tokens, original_tokens - prioritized_tokens, \"Medium\"),\n", + " (\"Summarization\", len(summarized), summarized_tokens, original_tokens - summarized_tokens, \"High\"),\n", + "]\n", + "\n", + "for name, msgs, tokens, savings, quality in strategies:\n", + " savings_pct = f\"({savings/original_tokens*100:.0f}%)\" if savings > 0 else \"\"\n", + " print(f\"{name:<20} {msgs:<12} {tokens:<12} {savings:<5} {savings_pct:<6} {quality}\")\n", + "\n", + "print(\"\\n๐Ÿ’ก Key Insight: Choose strategy based on your quality/speed requirements!\")\n" + ] + }, + { + "cell_type": "markdown", + "id": "290935fa536cb8aa", + "metadata": {}, + "source": [ + "---\n", + "\n", + "## ๐Ÿ”„ Part 4: Agent Memory Server Integration\n", + "\n", + "The Agent Memory Server provides automatic summarization. Let's configure and test it.\n" + ] + }, + { + "cell_type": "markdown", + "id": "37993b003426e127", + "metadata": {}, + "source": [ + "### Theory: Automatic Memory Management\n", + "\n", + "**Agent Memory Server Features:**\n", + "- โœ… Automatic summarization when thresholds are exceeded\n", + "- โœ… Configurable strategies (recent + summary, sliding window, full summary)\n", + "- โœ… Transparent to your application code\n", + "- โœ… Production-ready and scalable\n", + "\n", + "**How It Works:**\n", + "1. You add messages to working memory normally\n", + "2. Server monitors message count and token count\n", + "3. When threshold is exceeded, server automatically summarizes\n", + "4. Old messages are replaced with summary\n", + "5. Recent messages are kept for context\n", + "6. Your application retrieves the compressed memory\n", + "\n", + "**Configuration Options:**\n", + "- `message_threshold`: Summarize after N messages (default: 20)\n", + "- `token_threshold`: Summarize after N tokens (default: 4000)\n", + "- `keep_recent`: Number of recent messages to keep (default: 4)\n", + "- `strategy`: \"recent_plus_summary\", \"sliding_window\", or \"full_summary\"\n" + ] + }, + { + "cell_type": "markdown", + "id": "3a39408752c4a504", + "metadata": {}, + "source": [ + "### Demo 5: Test Automatic Summarization\n", + "\n", + "Let's test the Agent Memory Server's automatic summarization with a long conversation.\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "2bca0c3b7f31459f", + "metadata": {}, + "outputs": [], + "source": [ + "# Create a test session\n", + "test_session_id = f\"long_conversation_test_{int(time.time())}\"\n", + "test_student_id = \"student_memory_test\"\n", + "\n", + "print(f\"Testing automatic summarization\")\n", + "print(f\"Session ID: {test_session_id}\")\n", + "print(f\"Student ID: {test_student_id}\\n\")\n", + "\n", + "# Simulate a long conversation (25 turns = 50 messages)\n", + "print(\"Simulating 25-turn conversation...\")\n", + "print(\"=\" * 80)\n", + "\n", + "conversation_turns = [\n", + " (\"I'm interested in machine learning\", \"Great! CS401 Machine Learning is perfect for you.\"),\n", + " (\"What are the prerequisites?\", \"You'll need CS201 Data Structures and MATH301 Linear Algebra.\"),\n", + " (\"I've completed CS101\", \"Perfect! CS201 is your next step.\"),\n", + " (\"How difficult is CS201?\", \"It's moderately challenging but very rewarding.\"),\n", + " (\"When is it offered?\", \"CS201 is offered every semester - Fall, Spring, and Summer.\"),\n", + " (\"What about MATH301?\", \"MATH301 covers linear algebra essentials for ML.\"),\n", + " (\"Can I take both together?\", \"Yes, many students take CS201 and MATH301 concurrently.\"),\n", + " (\"How long will it take?\", \"If you take both, you can start CS401 in about 4-6 months.\"),\n", + " (\"What's the workload?\", \"Expect 10-12 hours per week for each course.\"),\n", + " (\"Are there online options?\", \"Yes, both courses have online and in-person sections.\"),\n", + " (\"Which format is better?\", \"Online offers flexibility, in-person offers more interaction.\"),\n", + " (\"What about CS401 after that?\", \"CS401 is our flagship ML course with hands-on projects.\"),\n", + " (\"How many projects?\", \"CS401 has 4 major projects throughout the semester.\"),\n", + " (\"What topics are covered?\", \"Supervised learning, neural networks, deep learning, and NLP.\"),\n", + " (\"Is there a final exam?\", \"Yes, there's a comprehensive final exam worth 30% of your grade.\"),\n", + " (\"What's the pass rate?\", \"About 85% of students pass CS401 on their first attempt.\"),\n", + " (\"Are there TAs available?\", \"Yes, we have 3 TAs for CS401 with office hours daily.\"),\n", + " (\"What programming language?\", \"CS401 uses Python with TensorFlow and PyTorch.\"),\n", + " (\"Do I need a GPU?\", \"Recommended but not required. We provide cloud GPU access.\"),\n", + " (\"What's the class size?\", \"CS401 typically has 30-40 students per section.\"),\n", + " (\"Can I audit the course?\", \"Yes, auditing is available but you won't get credit.\"),\n", + " (\"What's the cost?\", \"CS401 is $1,200 for credit, $300 for audit.\"),\n", + " (\"Are there scholarships?\", \"Yes, we offer merit-based scholarships. Apply early!\"),\n", + " (\"When should I apply?\", \"Applications open 2 months before each semester starts.\"),\n", + " (\"Thanks for the help!\", \"You're welcome! Feel free to reach out with more questions.\"),\n", + "]\n", + "\n", + "# Add messages to working memory\n", + "for i, (user_msg, assistant_msg) in enumerate(conversation_turns, 1):\n", + " # Add user message\n", + " await memory_client.add_messages(\n", + " session_id=test_session_id,\n", + " user_id=test_student_id,\n", + " messages=[\n", + " {\"role\": \"user\", \"content\": user_msg},\n", + " {\"role\": \"assistant\", \"content\": assistant_msg}\n", + " ]\n", + " )\n", + "\n", + " # Show progress every 5 turns\n", + " if i % 5 == 0:\n", + " print(f\"Turn {i:2d}: Added messages (total: {i*2} messages)\")\n", + "\n", + "print(f\"\\nโœ… Added {len(conversation_turns)} turns ({len(conversation_turns)*2} messages)\")\n", + "\n", + "# Retrieve working memory to see if summarization occurred\n", + "working_memory = await memory_client.get_messages(\n", + " session_id=test_session_id,\n", + " user_id=test_student_id\n", + ")\n", + "\n", + "print(f\"\\n๐Ÿ“Š Working Memory Status:\")\n", + "print(f\" Messages in memory: {len(working_memory)}\")\n", + "print(f\" Original messages added: {len(conversation_turns)*2}\")\n", + "\n", + "if len(working_memory) < len(conversation_turns)*2:\n", + " print(f\" โœ… Automatic summarization occurred!\")\n", + " print(f\" Compression: {len(conversation_turns)*2} โ†’ {len(working_memory)} messages\")\n", + "\n", + " # Check for summary message\n", + " summary_messages = [msg for msg in working_memory if '[SUMMARY]' in msg.get('content', '') or msg.get('role') == 'system']\n", + " if summary_messages:\n", + " print(f\" Summary messages found: {len(summary_messages)}\")\n", + " print(f\"\\n Summary preview:\")\n", + " for msg in summary_messages[:1]: # Show first summary\n", + " content_preview = msg.get('content', '')[:200].replace('\\n', ' ')\n", + " print(f\" {content_preview}...\")\n", + "else:\n", + " print(f\" โ„น๏ธ No summarization yet (threshold not reached)\")\n", + "\n", + "# Calculate token savings\n", + "original_tokens = sum(count_tokens(user_msg) + count_tokens(assistant_msg) for user_msg, assistant_msg in conversation_turns)\n", + "current_tokens = sum(count_tokens(msg.get('content', '')) for msg in working_memory)\n", + "\n", + "print(f\"\\n๐Ÿ’ฐ Token Analysis:\")\n", + "print(f\" Original tokens: {original_tokens}\")\n", + "print(f\" Current tokens: {current_tokens}\")\n", + "if current_tokens < original_tokens:\n", + " savings = original_tokens - current_tokens\n", + " savings_pct = (savings / original_tokens) * 100\n", + " print(f\" Token savings: {savings} ({savings_pct:.1f}%)\")\n" + ] + }, + { + "cell_type": "markdown", + "id": "8b41ae7eb2d88f5a", + "metadata": {}, + "source": [ + "---\n", + "\n", + "## ๐ŸŽฏ Part 5: Decision Framework\n", + "\n", + "How do you choose which compression strategy to use? Let's build a decision framework.\n" + ] + }, + { + "cell_type": "markdown", + "id": "56eb87c914424cd", + "metadata": {}, + "source": [ + "### Theory: Choosing the Right Strategy\n", + "\n", + "**Decision Factors:**\n", + "\n", + "1. **Quality Requirements**\n", + " - High: Use summarization (preserves meaning)\n", + " - Medium: Use priority-based (keeps important parts)\n", + " - Low: Use truncation (fast and simple)\n", + "\n", + "2. **Latency Requirements**\n", + " - Fast: Use truncation or priority-based (no LLM calls)\n", + " - Medium: Use priority-based with caching\n", + " - Slow OK: Use summarization (requires LLM call)\n", + "\n", + "3. **Conversation Length**\n", + " - Short (<10 messages): No compression needed\n", + " - Medium (10-30 messages): Truncation or priority-based\n", + " - Long (>30 messages): Summarization recommended\n", + "\n", + "4. **Cost Sensitivity**\n", + " - High: Use truncation or priority-based (no LLM costs)\n", + " - Medium: Use summarization with caching\n", + " - Low: Use summarization freely\n", + "\n", + "5. **Context Importance**\n", + " - Critical: Use summarization (preserves all important info)\n", + " - Important: Use priority-based (keeps high-value messages)\n", + " - Less critical: Use truncation (simple and fast)\n" + ] + }, + { + "cell_type": "markdown", + "id": "4b904a38b1bad2b9", + "metadata": {}, + "source": [ + "### Implementation: Decision Framework\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "668fce6b8d81c302", + "metadata": {}, + "outputs": [], + "source": [ + "from enum import Enum\n", + "from typing import Literal\n", + "\n", + "class CompressionChoice(Enum):\n", + " \"\"\"Available compression strategies.\"\"\"\n", + " NONE = \"none\"\n", + " TRUNCATION = \"truncation\"\n", + " PRIORITY = \"priority\"\n", + " SUMMARIZATION = \"summarization\"\n", + "\n", + "def choose_compression_strategy(\n", + " conversation_length: int,\n", + " token_count: int,\n", + " quality_requirement: Literal[\"high\", \"medium\", \"low\"],\n", + " latency_requirement: Literal[\"fast\", \"medium\", \"slow_ok\"],\n", + " cost_sensitivity: Literal[\"high\", \"medium\", \"low\"] = \"medium\"\n", + ") -> CompressionChoice:\n", + " \"\"\"\n", + " Decision framework for choosing compression strategy.\n", + "\n", + " Args:\n", + " conversation_length: Number of messages in conversation\n", + " token_count: Total token count\n", + " quality_requirement: How important is quality? (\"high\", \"medium\", \"low\")\n", + " latency_requirement: How fast must it be? (\"fast\", \"medium\", \"slow_ok\")\n", + " cost_sensitivity: How sensitive to costs? (\"high\", \"medium\", \"low\")\n", + "\n", + " Returns:\n", + " CompressionChoice: Recommended strategy\n", + " \"\"\"\n", + " # No compression needed for short conversations\n", + " if token_count < 2000 and conversation_length < 10:\n", + " return CompressionChoice.NONE\n", + "\n", + " # Fast requirement = no LLM calls\n", + " if latency_requirement == \"fast\":\n", + " if quality_requirement == \"high\":\n", + " return CompressionChoice.PRIORITY\n", + " else:\n", + " return CompressionChoice.TRUNCATION\n", + "\n", + " # High cost sensitivity = avoid LLM calls\n", + " if cost_sensitivity == \"high\":\n", + " return CompressionChoice.PRIORITY if quality_requirement != \"low\" else CompressionChoice.TRUNCATION\n", + "\n", + " # High quality + willing to wait = summarization\n", + " if quality_requirement == \"high\" and latency_requirement == \"slow_ok\":\n", + " return CompressionChoice.SUMMARIZATION\n", + "\n", + " # Long conversations benefit from summarization\n", + " if conversation_length > 30 and quality_requirement != \"low\":\n", + " return CompressionChoice.SUMMARIZATION\n", + "\n", + " # Medium quality = priority-based\n", + " if quality_requirement == \"medium\":\n", + " return CompressionChoice.PRIORITY\n", + "\n", + " # Default to truncation for simple cases\n", + " return CompressionChoice.TRUNCATION\n", + "\n", + "print(\"โœ… Decision framework defined\")\n" + ] + }, + { + "cell_type": "markdown", + "id": "8324715c96096689", + "metadata": {}, + "source": [ + "### Demo 6: Test Decision Framework\n", + "\n", + "Let's test the decision framework with various scenarios.\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "beb98376eb2b00b0", + "metadata": {}, + "outputs": [], + "source": [ + "# Define test scenarios\n", + "scenarios = [\n", + " # (length, tokens, quality, latency, cost, description)\n", + " (5, 1000, \"high\", \"fast\", \"medium\", \"Short conversation, high quality needed\"),\n", + " (15, 3000, \"high\", \"slow_ok\", \"low\", \"Medium conversation, quality critical\"),\n", + " (30, 8000, \"medium\", \"medium\", \"medium\", \"Long conversation, balanced needs\"),\n", + " (50, 15000, \"high\", \"slow_ok\", \"medium\", \"Very long, quality important\"),\n", + " (100, 30000, \"low\", \"fast\", \"high\", \"Extremely long, cost-sensitive\"),\n", + " (20, 5000, \"medium\", \"fast\", \"high\", \"Medium length, fast and cheap\"),\n", + " (40, 12000, \"high\", \"medium\", \"low\", \"Long conversation, quality focus\"),\n", + " (8, 1500, \"low\", \"fast\", \"high\", \"Short, simple case\"),\n", + "]\n", + "\n", + "print(\"Decision Framework Test Scenarios:\")\n", + "print(\"=\" * 120)\n", + "print(f\"{'Scenario':<45} {'Length':<8} {'Tokens':<10} {'Quality':<10} {'Latency':<10} {'Cost':<8} {'Strategy'}\")\n", + "print(\"-\" * 120)\n", + "\n", + "for length, tokens, quality, latency, cost, description in scenarios:\n", + " strategy = choose_compression_strategy(length, tokens, quality, latency, cost)\n", + " print(f\"{description:<45} {length:<8} {tokens:<10,} {quality:<10} {latency:<10} {cost:<8} {strategy.value}\")\n", + "\n", + "print(\"\\n๐Ÿ’ก Key Insights:\")\n", + "print(\" โ€ข Short conversations (<10 messages, <2000 tokens) โ†’ No compression\")\n", + "print(\" โ€ข Fast requirement โ†’ Truncation or Priority-based (no LLM calls)\")\n", + "print(\" โ€ข High quality + willing to wait โ†’ Summarization\")\n", + "print(\" โ€ข Long conversations (>30 messages) โ†’ Summarization recommended\")\n", + "print(\" โ€ข Cost-sensitive โ†’ Avoid summarization, use Priority-based\")\n" + ] + }, + { + "cell_type": "markdown", + "id": "3e63fdaf5a2a2587", + "metadata": {}, + "source": [ + "### Production Recommendations\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b824592502d5305", + "metadata": {}, + "outputs": [], + "source": [ + "print(\"๐Ÿญ PRODUCTION RECOMMENDATIONS\")\n", + "print(\"=\" * 80)\n", + "\n", + "print(\"\\n1๏ธโƒฃ FOR MOST APPLICATIONS (Balanced)\")\n", + "print(\" Strategy: Agent Memory Server with automatic summarization\")\n", + "print(\" Configuration:\")\n", + "print(\" โ€ข message_threshold: 20 messages\")\n", + "print(\" โ€ข token_threshold: 4000 tokens\")\n", + "print(\" โ€ข keep_recent: 4 messages\")\n", + "print(\" โ€ข strategy: 'recent_plus_summary'\")\n", + "print(\" Why: Automatic, transparent, production-ready\")\n", + "\n", + "print(\"\\n2๏ธโƒฃ FOR HIGH-VOLUME, COST-SENSITIVE (Efficient)\")\n", + "print(\" Strategy: Priority-based compression\")\n", + "print(\" Configuration:\")\n", + "print(\" โ€ข max_tokens: 2000\")\n", + "print(\" โ€ข Custom importance scoring\")\n", + "print(\" โ€ข No LLM calls\")\n", + "print(\" Why: Fast, cheap, no external dependencies\")\n", + "\n", + "print(\"\\n3๏ธโƒฃ FOR CRITICAL CONVERSATIONS (Quality)\")\n", + "print(\" Strategy: Manual summarization with review\")\n", + "print(\" Configuration:\")\n", + "print(\" โ€ข token_threshold: 5000\")\n", + "print(\" โ€ข Human review of summaries\")\n", + "print(\" โ€ข Store full conversation separately\")\n", + "print(\" Why: Maximum quality, human oversight\")\n", + "\n", + "print(\"\\n4๏ธโƒฃ FOR REAL-TIME CHAT (Speed)\")\n", + "print(\" Strategy: Truncation with sliding window\")\n", + "print(\" Configuration:\")\n", + "print(\" โ€ข keep_recent: 10 messages\")\n", + "print(\" โ€ข No summarization\")\n", + "print(\" โ€ข Fast response required\")\n", + "print(\" Why: Minimal latency, simple implementation\")\n", + "\n", + "print(\"\\n๐Ÿ’ก General Guidelines:\")\n", + "print(\" โ€ข Start with Agent Memory Server automatic summarization\")\n", + "print(\" โ€ข Monitor token usage and costs in production\")\n", + "print(\" โ€ข Adjust thresholds based on your use case\")\n", + "print(\" โ€ข Consider hybrid approaches (truncation + summarization)\")\n", + "print(\" โ€ข Always preserve critical information in long-term memory\")\n" + ] + }, + { + "cell_type": "markdown", + "id": "1f1cd42e5cb65a39", + "metadata": {}, + "source": [ + "---\n", + "\n", + "## ๐Ÿ’ช Practice Exercises\n", + "\n", + "Now it's your turn! Complete these exercises to reinforce your learning.\n" + ] + }, + { + "cell_type": "markdown", + "id": "ce7b283d8917e353", + "metadata": {}, + "source": [ + "### Exercise 1: Implement Sliding Window Compression\n", + "\n", + "Create a sliding window compression that keeps only the last N messages:\n", + "\n", + "```python\n", + "def compress_sliding_window(\n", + " messages: List[ConversationMessage],\n", + " window_size: int = 10\n", + ") -> List[ConversationMessage]:\n", + " \"\"\"\n", + " Keep only the last N messages (sliding window).\n", + "\n", + " Args:\n", + " messages: List of conversation messages\n", + " window_size: Number of recent messages to keep\n", + "\n", + " Returns:\n", + " List of messages (last N messages)\n", + " \"\"\"\n", + " # Your implementation here\n", + " pass\n", + "\n", + "# Test your implementation\n", + "test_messages = sample_conversation.copy()\n", + "windowed = compress_sliding_window(test_messages, window_size=6)\n", + "print(f\"Original: {len(test_messages)} messages\")\n", + "print(f\"After sliding window: {len(windowed)} messages\")\n", + "```\n", + "\n", + "**Hint:** This is simpler than truncation - just return the last N messages!\n" + ] + }, + { + "cell_type": "markdown", + "id": "96d60c07d558dbe2", + "metadata": {}, + "source": [ + "### Exercise 2: Implement Hybrid Compression\n", + "\n", + "Combine summarization + truncation for optimal results:\n", + "\n", + "```python\n", + "async def compress_hybrid(\n", + " messages: List[ConversationMessage],\n", + " summarizer: ConversationSummarizer,\n", + " max_tokens: int = 2000\n", + ") -> List[ConversationMessage]:\n", + " \"\"\"\n", + " Hybrid compression: Summarize old messages, truncate if still too large.\n", + "\n", + " Steps:\n", + " 1. First, try summarization\n", + " 2. If still over budget, apply truncation to summary + recent messages\n", + " 3. Ensure we stay within max_tokens\n", + "\n", + " Args:\n", + " messages: List of conversation messages\n", + " summarizer: ConversationSummarizer instance\n", + " max_tokens: Maximum token budget\n", + "\n", + " Returns:\n", + " Compressed messages within token budget\n", + " \"\"\"\n", + " # Your implementation here\n", + " pass\n", + "\n", + "# Test your implementation\n", + "hybrid_result = await compress_hybrid(sample_conversation, summarizer, max_tokens=1000)\n", + "print(f\"Hybrid compression: {len(hybrid_result)} messages, {sum(m.token_count for m in hybrid_result)} tokens\")\n", + "```\n", + "\n", + "**Hint:** Use `summarizer.compress_conversation()` first, then apply truncation if needed.\n" + ] + }, + { + "cell_type": "markdown", + "id": "956554c8c979d1a4", + "metadata": {}, + "source": [ + "### Exercise 3: Quality Comparison\n", + "\n", + "Test all compression strategies and compare quality:\n", + "\n", + "```python\n", + "async def compare_compression_quality(\n", + " messages: List[ConversationMessage],\n", + " test_query: str = \"What courses did we discuss?\"\n", + ") -> Dict[str, Any]:\n", + " \"\"\"\n", + " Compare compression strategies by testing reference resolution.\n", + "\n", + " Steps:\n", + " 1. Compress using each strategy\n", + " 2. Try to answer test_query using compressed context\n", + " 3. Compare quality of responses\n", + " 4. Measure token savings\n", + "\n", + " Args:\n", + " messages: Original conversation\n", + " test_query: Question to test reference resolution\n", + "\n", + " Returns:\n", + " Dictionary with comparison results\n", + " \"\"\"\n", + " # Your implementation here\n", + " # Test if the agent can still answer questions after compression\n", + " pass\n", + "\n", + "# Test your implementation\n", + "quality_results = await compare_compression_quality(sample_conversation)\n", + "print(\"Quality Comparison Results:\")\n", + "for strategy, results in quality_results.items():\n", + " print(f\"{strategy}: {results}\")\n", + "```\n", + "\n", + "**Hint:** Use the LLM to answer the test query with each compressed context and compare responses.\n" + ] + }, + { + "cell_type": "markdown", + "id": "3566e3ee779cc9b6", + "metadata": {}, + "source": [ + "### Exercise 4: Custom Importance Scoring\n", + "\n", + "Improve the `calculate_importance()` function with domain-specific logic:\n", + "\n", + "```python\n", + "def calculate_importance_enhanced(msg: ConversationMessage) -> float:\n", + " \"\"\"\n", + " Enhanced importance scoring for course advisor conversations.\n", + "\n", + " Add scoring for:\n", + " - Specific course codes (CS401, MATH301, etc.) - HIGH\n", + " - Prerequisites and requirements - HIGH\n", + " - Student preferences and goals - HIGH\n", + " - Questions - MEDIUM\n", + " - Confirmations and acknowledgments - LOW\n", + " - Greetings and small talk - VERY LOW\n", + "\n", + " Returns:\n", + " Importance score (0.0 to 5.0)\n", + " \"\"\"\n", + " # Your implementation here\n", + " pass\n", + "\n", + "# Test your implementation\n", + "for msg in sample_conversation[:5]:\n", + " score = calculate_importance_enhanced(msg)\n", + " print(f\"Score: {score:.1f} - {msg.content[:60]}...\")\n", + "```\n", + "\n", + "**Hint:** Use regex to detect course codes, check for question marks, look for keywords.\n" + ] + }, + { + "cell_type": "markdown", + "id": "ee85f81eedf9cae1", + "metadata": {}, + "source": [ + "### Exercise 5: Production Configuration\n", + "\n", + "Configure Agent Memory Server for your specific use case:\n", + "\n", + "```python\n", + "# Scenario: High-volume customer support chatbot\n", + "# Requirements:\n", + "# - Handle 1000+ conversations per day\n", + "# - Average conversation: 15-20 turns\n", + "# - Cost-sensitive but quality important\n", + "# - Response time: <2 seconds\n", + "\n", + "# Your task: Choose appropriate configuration\n", + "production_config = {\n", + " \"message_threshold\": ???, # When to trigger summarization\n", + " \"token_threshold\": ???, # Token limit before summarization\n", + " \"keep_recent\": ???, # How many recent messages to keep\n", + " \"strategy\": ???, # Which strategy to use\n", + "}\n", + "\n", + "# Justify your choices:\n", + "print(\"Configuration Justification:\")\n", + "print(f\"message_threshold: {production_config['message_threshold']} because...\")\n", + "print(f\"token_threshold: {production_config['token_threshold']} because...\")\n", + "print(f\"keep_recent: {production_config['keep_recent']} because...\")\n", + "print(f\"strategy: {production_config['strategy']} because...\")\n", + "```\n", + "\n", + "**Hint:** Consider the trade-offs between cost, quality, and latency for this specific scenario.\n" + ] + }, + { + "cell_type": "markdown", + "id": "82e6fb297080ad8", + "metadata": {}, + "source": [ + "---\n", + "\n", + "## ๐Ÿ“ Summary\n", + "\n", + "### **What You Learned:**\n", + "\n", + "1. โœ… **The Long Conversation Problem**\n", + " - Token limits, cost implications, performance degradation\n", + " - Why unbounded growth is unsustainable\n", + " - Quadratic cost growth without management\n", + "\n", + "2. โœ… **Conversation Summarization**\n", + " - What to preserve vs. compress\n", + " - When to trigger summarization (token/message thresholds)\n", + " - Implementation with `ConversationSummarizer` class\n", + " - LLM-based intelligent summarization\n", + "\n", + "3. โœ… **Three Compression Strategies**\n", + " - **Truncation:** Fast, simple, loses context\n", + " - **Priority-based:** Balanced, intelligent, no LLM calls\n", + " - **Summarization:** High quality, preserves meaning, requires LLM\n", + " - Trade-offs between speed, quality, and cost\n", + "\n", + "4. โœ… **Agent Memory Server Integration**\n", + " - Automatic summarization configuration\n", + " - Transparent memory management\n", + " - Production-ready solution\n", + " - Configurable thresholds and strategies\n", + "\n", + "5. โœ… **Decision Framework**\n", + " - How to choose the right strategy\n", + " - Factors: quality, latency, cost, conversation length\n", + " - Production recommendations for different scenarios\n", + " - Hybrid approaches for optimal results\n", + "\n", + "### **What You Built:**\n", + "\n", + "- โœ… `ConversationSummarizer` class for intelligent summarization\n", + "- โœ… Three compression strategy implementations (Truncation, Priority, Summarization)\n", + "- โœ… Decision framework for strategy selection\n", + "- โœ… Production configuration examples\n", + "- โœ… Comparison tools for evaluating strategies\n", + "- โœ… Token counting and cost analysis tools\n", + "\n", + "### **Key Takeaways:**\n", + "\n", + "๐Ÿ’ก **\"Conversations grow unbounded without management\"**\n", + "- Every turn adds tokens and cost\n", + "- Eventually you'll hit limits\n", + "- Costs grow quadratically (each turn includes all previous messages)\n", + "\n", + "๐Ÿ’ก **\"Summarization preserves meaning while reducing tokens\"**\n", + "- Use LLM to create intelligent summaries\n", + "- Keep recent messages for immediate context\n", + "- Store important facts in long-term memory\n", + "\n", + "๐Ÿ’ก **\"Choose strategy based on requirements\"**\n", + "- Quality-critical โ†’ Summarization\n", + "- Speed-critical โ†’ Truncation or Priority-based\n", + "- Balanced โ†’ Agent Memory Server automatic\n", + "- Cost-sensitive โ†’ Priority-based\n", + "\n", + "๐Ÿ’ก **\"Agent Memory Server handles this automatically\"**\n", + "- Production-ready solution\n", + "- Transparent to your application\n", + "- Configurable for your needs\n", + "- No manual intervention required\n", + "\n", + "### **Connection to Context Engineering:**\n", + "\n", + "This notebook completes the **Conversation Context** story from Section 1:\n", + "\n", + "1. **Section 1:** Introduced the 4 context types, including Conversation Context\n", + "2. **Section 3, NB1:** Implemented working memory for conversation continuity\n", + "3. **Section 3, NB2:** Integrated memory with RAG for stateful conversations\n", + "4. **Section 3, NB3:** Managed long conversations with summarization and compression โ† You are here\n", + "\n", + "**Next:** Section 4 will show how agents can actively manage their own memory using tools!\n", + "\n", + "### **Next Steps:**\n", + "\n", + "**Section 4: Tools and Agents**\n", + "- Build agents that actively manage their own memory\n", + "- Implement memory tools (store, search, retrieve)\n", + "- Use LangGraph for agent workflows\n", + "- Let the LLM decide when to summarize\n", + "\n", + "**Section 5: Production Optimization**\n", + "- Performance measurement and monitoring\n", + "- Hybrid retrieval strategies\n", + "- Semantic tool selection\n", + "- Quality assurance and validation\n", + "\n", + "---\n", + "\n", + "## ๐Ÿ”— Resources\n", + "\n", + "### **Documentation:**\n", + "- [Agent Memory Server](https://github.com/redis/agent-memory-server) - Production memory management\n", + "- [Agent Memory Client](https://pypi.org/project/agent-memory-client/) - Python client library\n", + "- [LangChain Memory](https://python.langchain.com/docs/modules/memory/) - Memory patterns\n", + "- [OpenAI Tokenizer](https://platform.openai.com/tokenizer) - Token counting tool\n", + "- [tiktoken](https://github.com/openai/tiktoken) - Fast token counting library\n", + "\n", + "### **Research Papers:**\n", + "- [Lost in the Middle](https://arxiv.org/abs/2307.03172) - Context Rot research showing performance degradation\n", + "- [Retrieval-Augmented Generation](https://arxiv.org/abs/2005.11401) - RAG fundamentals\n", + "- [MemGPT](https://arxiv.org/abs/2310.08560) - Memory management for LLMs\n", + "- [Attention Is All You Need](https://arxiv.org/abs/1706.03762) - Transformer architecture and context windows\n", + "\n", + "### **Related Notebooks:**\n", + "- **Section 1, NB1:** Introduction to Context Engineering\n", + "- **Section 1, NB2:** The Four Context Types\n", + "- **Section 2, NB1:** RAG and Retrieved Context\n", + "- **Section 3, NB1:** Memory Fundamentals and Integration\n", + "- **Section 3, NB2:** Memory-Enhanced RAG and Agents\n", + "- **Section 4, NB1:** Tools and LangGraph Fundamentals\n", + "- **Section 4, NB2:** Redis University Course Advisor Agent\n", + "- **Section 5, NB1:** Measuring and Optimizing Performance\n", + "\n", + "### **Tools and Libraries:**\n", + "- **Redis:** Vector storage and memory backend\n", + "- **Agent Memory Server:** Dual-memory architecture with automatic summarization\n", + "- **LangChain:** LLM interaction framework\n", + "- **LangGraph:** State management and agent workflows\n", + "- **OpenAI:** GPT-4o for generation and summarization\n", + "- **tiktoken:** Token counting for cost estimation\n", + "\n", + "---\n", + "\n", + "![Redis](https://redis.io/wp-content/uploads/2024/04/Logotype.svg?auto=webp&quality=85,75&width=120)\n", + "\n", + "**Redis University - Context Engineering Course**\n", + "\n", + "**๐ŸŽ‰ Congratulations!** You've completed Section 3: Memory Architecture!\n", + "\n", + "You now understand how to:\n", + "- Build memory systems for AI agents\n", + "- Integrate working and long-term memory\n", + "- Manage long conversations with summarization\n", + "- Choose the right compression strategy\n", + "- Configure production-ready memory management\n", + "\n", + "**Ready for Section 4?** Let's build agents that actively manage their own memory using tools!\n", + "\n", + "---\n", + "\n" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.12.6" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/python-recipes/context-engineering/notebooks/section-3-memory-systems-for-context-engineering/03_memory_management_long_conversations_executed.ipynb b/python-recipes/context-engineering/notebooks/section-3-memory-systems-for-context-engineering/03_memory_management_long_conversations_executed.ipynb new file mode 100644 index 00000000..f11fd6ab --- /dev/null +++ b/python-recipes/context-engineering/notebooks/section-3-memory-systems-for-context-engineering/03_memory_management_long_conversations_executed.ipynb @@ -0,0 +1,4016 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "3d06c497fe3df20b", + "metadata": {}, + "source": [ + "![Redis](https://redis.io/wp-content/uploads/2024/04/Logotype.svg?auto=webp&quality=85,75&width=120)\n", + "\n", + "# ๐Ÿง  Section 3, Notebook 3: Memory Management - Handling Long Conversations\n", + "\n", + "**โฑ๏ธ Estimated Time:** 50-60 minutes\n", + "\n", + "## ๐ŸŽฏ Learning Objectives\n", + "\n", + "By the end of this notebook, you will:\n", + "\n", + "1. **Understand** why long conversations need management (token limits, cost, performance)\n", + "2. **Implement** conversation summarization to preserve key information\n", + "3. **Build** context compression strategies (truncation, priority-based, summarization)\n", + "4. **Configure** automatic memory management with Agent Memory Server\n", + "5. **Decide** when to apply each technique based on conversation characteristics\n", + "\n", + "---\n", + "\n", + "## ๐Ÿ”— Where We Are\n", + "\n", + "### **Your Journey So Far:**\n", + "\n", + "**Section 3, Notebook 1:** Memory Fundamentals\n", + "- โœ… Working memory for conversation continuity\n", + "- โœ… Long-term memory for persistent knowledge\n", + "- โœ… The grounding problem and reference resolution\n", + "- โœ… Memory types (semantic, episodic, message)\n", + "\n", + "**Section 3, Notebook 2:** Memory-Enhanced RAG\n", + "- โœ… Integrated all four context types\n", + "- โœ… Built complete memory-enhanced RAG system\n", + "- โœ… Demonstrated benefits of stateful conversations\n", + "\n", + "**Your memory system works!** It can:\n", + "- Remember conversation history across turns\n", + "- Store and retrieve long-term facts\n", + "- Resolve references (\"it\", \"that course\")\n", + "- Provide personalized recommendations\n", + "\n", + "### **But... What About Long Conversations?**\n", + "\n", + "**Questions we can't answer yet:**\n", + "- โ“ What happens when conversations get really long?\n", + "- โ“ How do we handle token limits?\n", + "- โ“ How much does a 50-turn conversation cost?\n", + "- โ“ Can we preserve important context while reducing tokens?\n", + "- โ“ When should we summarize vs. truncate vs. keep everything?\n", + "\n", + "---\n", + "\n", + "## ๐Ÿšจ The Long Conversation Problem\n", + "\n", + "Before diving into solutions, let's understand the fundamental problem.\n", + "\n", + "### **The Problem: Unbounded Growth**\n", + "\n", + "Every conversation turn adds messages to working memory:\n", + "\n", + "```\n", + "Turn 1: System (500) + Messages (200) = 700 tokens โœ…\n", + "Turn 5: System (500) + Messages (1,000) = 1,500 tokens โœ…\n", + "Turn 20: System (500) + Messages (4,000) = 4,500 tokens โœ…\n", + "Turn 50: System (500) + Messages (10,000) = 10,500 tokens โš ๏ธ\n", + "Turn 100: System (500) + Messages (20,000) = 20,500 tokens โš ๏ธ\n", + "Turn 200: System (500) + Messages (40,000) = 40,500 tokens โŒ\n", + "```\n", + "\n", + "**Without management, conversations grow unbounded!**\n", + "\n", + "### **Why This Matters**\n", + "\n", + "**1. Token Limits (Hard Constraint)**\n", + "- GPT-4o: 128K tokens (~96,000 words)\n", + "- GPT-3.5: 16K tokens (~12,000 words)\n", + "- Eventually, you'll hit the limit and conversations fail\n", + "\n", + "**2. Cost (Economic Constraint)**\n", + "- Input tokens cost money (e.g. $0.0025 / 1K tokens for GPT-4o)\n", + "\n", + "- A 50-turn conversation = ~10,000 tokens = $0.025 per query\n", + "\n", + "- Over 1,000 conversations = $25 just for conversation history!\n", + "\n", + "**3. Performance (Quality Constraint)**\n", + "- More tokens = longer processing time\n", + "- Context Rot: LLMs struggle with very long contexts\n", + "- Important information gets \"lost in the middle\"\n", + "\n", + "**4. User Experience**\n", + "- Slow responses frustrate users\n", + "- Expensive conversations aren't sustainable\n", + "- Failed conversations due to token limits are unacceptable\n", + "\n", + "### **The Solution: Memory Management**\n", + "\n", + "We need strategies to:\n", + "- โœ… Keep conversations within token budgets\n", + "- โœ… Preserve important information\n", + "- โœ… Maintain conversation quality\n", + "- โœ… Control costs\n", + "- โœ… Enable indefinite conversations\n", + "\n", + "---\n", + "\n", + "## ๐Ÿ“ฆ Part 0: Setup and Environment\n", + "\n", + "Let's set up our environment and create tools for measuring conversation growth.\n", + "\n", + "### โš ๏ธ Prerequisites\n", + "\n", + "**Before running this notebook, make sure you have:**\n", + "\n", + "1. **Docker Desktop running** - Required for Redis and Agent Memory Server\n", + "\n", + "2. **Environment variables** - Create a `.env` file in the `reference-agent` directory:\n", + " ```bash\n", + " # Copy the example file\n", + " cd ../../reference-agent\n", + " cp .env.example .env\n", + "\n", + " # Edit .env and add your OpenAI API key\n", + " # OPENAI_API_KEY=your_actual_openai_api_key_here\n", + " ```\n", + "\n", + "3. **Run the setup script** - This will automatically start Redis and Agent Memory Server:\n", + " ```bash\n", + " cd ../../reference-agent\n", + " python setup_agent_memory_server.py\n", + " ```\n" + ] + }, + { + "cell_type": "markdown", + "id": "307c59ecc51d30c3", + "metadata": {}, + "source": [ + "---\n" + ] + }, + { + "cell_type": "markdown", + "id": "dd10e48e57f1431e", + "metadata": {}, + "source": [ + "### Automated Setup Check\n", + "\n", + "Let's run the setup script to ensure all services are running properly.\n" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "id": "808cea2af3f4f118", + "metadata": { + "execution": { + "iopub.execute_input": "2025-11-02T01:09:12.149354Z", + "iopub.status.busy": "2025-11-02T01:09:12.149256Z", + "iopub.status.idle": "2025-11-02T01:09:12.404028Z", + "shell.execute_reply": "2025-11-02T01:09:12.403476Z" + } + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Running automated setup check...\n", + "\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "๐Ÿ”ง Agent Memory Server Setup\n", + "===========================\n", + "๐Ÿ“Š Checking Redis...\n", + "โœ… Redis is running\n", + "๐Ÿ“Š Checking Agent Memory Server...\n", + "๐Ÿ” Agent Memory Server container exists. Checking health...\n", + "โœ… Agent Memory Server is running and healthy\n", + "โœ… No Redis connection issues detected\n", + "\n", + "โœ… Setup Complete!\n", + "=================\n", + "๐Ÿ“Š Services Status:\n", + " โ€ข Redis: Running on port 6379\n", + " โ€ข Agent Memory Server: Running on port 8088\n", + "\n", + "๐ŸŽฏ You can now run the notebooks!\n", + "\n", + "\n", + "โœ… All services are ready!\n" + ] + } + ], + "source": [ + "# Run the setup script to ensure Redis and Agent Memory Server are running\n", + "import subprocess\n", + "import sys\n", + "from pathlib import Path\n", + "\n", + "# Path to setup script\n", + "setup_script = Path(\"../../reference-agent/setup_agent_memory_server.py\")\n", + "\n", + "if setup_script.exists():\n", + " print(\"Running automated setup check...\\n\")\n", + " result = subprocess.run(\n", + " [sys.executable, str(setup_script)],\n", + " capture_output=True,\n", + " text=True\n", + " )\n", + " print(result.stdout)\n", + " if result.returncode != 0:\n", + " print(\"โš ๏ธ Setup check failed. Please review the output above.\")\n", + " print(result.stderr)\n", + " else:\n", + " print(\"\\nโœ… All services are ready!\")\n", + "else:\n", + " print(\"โš ๏ธ Setup script not found. Please ensure services are running manually.\")\n" + ] + }, + { + "cell_type": "markdown", + "id": "4f7ab2a448dd08fc", + "metadata": {}, + "source": [ + "---\n" + ] + }, + { + "cell_type": "markdown", + "id": "9dd8400bfed20f64", + "metadata": {}, + "source": [ + "### Install Dependencies\n", + "\n", + "If you haven't already installed the reference-agent package, uncomment and run the following:\n" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "62ad9f5d109351a", + "metadata": { + "execution": { + "iopub.execute_input": "2025-11-02T01:09:12.405399Z", + "iopub.status.busy": "2025-11-02T01:09:12.405297Z", + "iopub.status.idle": "2025-11-02T01:09:12.406937Z", + "shell.execute_reply": "2025-11-02T01:09:12.406610Z" + } + }, + "outputs": [], + "source": [ + "# Uncomment to install reference-agent package\n", + "# %pip install -q -e ../../reference-agent\n", + "\n", + "# Uncomment to install agent-memory-client\n", + "# %pip install -q agent-memory-client\n" + ] + }, + { + "cell_type": "markdown", + "id": "b41bf6b02f73fdb9", + "metadata": {}, + "source": [ + "### Import Dependencies\n" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "b00247fc4bb718d6", + "metadata": { + "execution": { + "iopub.execute_input": "2025-11-02T01:09:12.408080Z", + "iopub.status.busy": "2025-11-02T01:09:12.408022Z", + "iopub.status.idle": "2025-11-02T01:09:14.659616Z", + "shell.execute_reply": "2025-11-02T01:09:14.659086Z" + } + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "โœ… All imports successful\n" + ] + } + ], + "source": [ + "# Standard library imports\n", + "import os\n", + "import time\n", + "import asyncio\n", + "from typing import List, Dict, Any, Optional, Tuple\n", + "from dataclasses import dataclass, field\n", + "from datetime import datetime\n", + "from pathlib import Path\n", + "\n", + "# LangChain\n", + "from langchain_openai import ChatOpenAI, OpenAIEmbeddings\n", + "from langchain_core.messages import BaseMessage, HumanMessage, AIMessage, SystemMessage\n", + "\n", + "# Redis and Agent Memory\n", + "from agent_memory_client import MemoryAPIClient, MemoryClientConfig\n", + "from agent_memory_client.models import WorkingMemory, MemoryMessage, ClientMemoryRecord\n", + "\n", + "# Token counting\n", + "import tiktoken\n", + "\n", + "# For visualization\n", + "from collections import defaultdict\n", + "\n", + "print(\"โœ… All imports successful\")\n" + ] + }, + { + "cell_type": "markdown", + "id": "38946d91e830639a", + "metadata": {}, + "source": [ + "### Load Environment Variables\n" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "41a3192aacee6dbf", + "metadata": { + "execution": { + "iopub.execute_input": "2025-11-02T01:09:14.660925Z", + "iopub.status.busy": "2025-11-02T01:09:14.660805Z", + "iopub.status.idle": "2025-11-02T01:09:14.665197Z", + "shell.execute_reply": "2025-11-02T01:09:14.664758Z" + } + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "โœ… Environment variables configured\n", + " Redis URL: redis://localhost:6379\n", + " Agent Memory URL: http://localhost:8088\n" + ] + } + ], + "source": [ + "from dotenv import load_dotenv\n", + "\n", + "# Load environment variables from reference-agent directory\n", + "env_path = Path(\"../../reference-agent/.env\")\n", + "load_dotenv(dotenv_path=env_path)\n", + "\n", + "# Verify required environment variables\n", + "OPENAI_API_KEY = os.getenv(\"OPENAI_API_KEY\")\n", + "REDIS_URL = os.getenv(\"REDIS_URL\", \"redis://localhost:6379\")\n", + "AGENT_MEMORY_URL = os.getenv(\"AGENT_MEMORY_URL\", \"http://localhost:8088\")\n", + "\n", + "if not OPENAI_API_KEY:\n", + " print(f\"\"\"โŒ OPENAI_API_KEY not found!\n", + "\n", + "Please create a .env file at: {env_path.absolute()}\n", + "\n", + "With the following content:\n", + "OPENAI_API_KEY=your_openai_api_key\n", + "REDIS_URL=redis://localhost:6379\n", + "AGENT_MEMORY_URL=http://localhost:8088\n", + "\"\"\")\n", + "else:\n", + " print(\"โœ… Environment variables configured\")\n", + " print(f\" Redis URL: {REDIS_URL}\")\n", + " print(f\" Agent Memory URL: {AGENT_MEMORY_URL}\")\n" + ] + }, + { + "cell_type": "markdown", + "id": "2f42157025d92c5", + "metadata": {}, + "source": [ + "### Initialize Clients\n" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "f6acdabe9f826582", + "metadata": { + "execution": { + "iopub.execute_input": "2025-11-02T01:09:14.666265Z", + "iopub.status.busy": "2025-11-02T01:09:14.666205Z", + "iopub.status.idle": "2025-11-02T01:09:14.922557Z", + "shell.execute_reply": "2025-11-02T01:09:14.922092Z" + } + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "โœ… Clients initialized\n", + " LLM: gpt-4o\n", + " Embeddings: text-embedding-3-small\n", + " Memory Server: http://localhost:8088\n" + ] + } + ], + "source": [ + "# Initialize LLM\n", + "llm = ChatOpenAI(\n", + " model=\"gpt-4o\",\n", + " temperature=0.7\n", + ")\n", + "\n", + "# Initialize embeddings\n", + "embeddings = OpenAIEmbeddings(\n", + " model=\"text-embedding-3-small\"\n", + ")\n", + "\n", + "# Initialize Agent Memory Client\n", + "memory_config = MemoryClientConfig(base_url=AGENT_MEMORY_URL)\n", + "memory_client = MemoryAPIClient(config=memory_config)\n", + "\n", + "# Initialize tokenizer for counting\n", + "tokenizer = tiktoken.encoding_for_model(\"gpt-4o\")\n", + "\n", + "def count_tokens(text: str) -> int:\n", + " \"\"\"Count tokens in text using tiktoken.\"\"\"\n", + " return len(tokenizer.encode(text))\n", + "\n", + "print(\"โœ… Clients initialized\")\n", + "print(f\" LLM: {llm.model_name}\")\n", + "print(f\" Embeddings: text-embedding-3-small\")\n", + "print(f\" Memory Server: {AGENT_MEMORY_URL}\")\n" + ] + }, + { + "cell_type": "markdown", + "id": "cb3c6e2d8cee7f21", + "metadata": {}, + "source": [ + "---\n", + "\n", + "## ๐Ÿ“Š Part 1: Understanding Conversation Growth\n", + "\n", + "Let's visualize how conversations grow and understand the implications.\n" + ] + }, + { + "cell_type": "markdown", + "id": "38b4a48ea4fee96b", + "metadata": {}, + "source": [ + "### ๐Ÿ”ฌ Research Context: Why Context Management Matters\n", + "\n", + "Modern LLMs have impressive context windows:\n", + "- **GPT-4o**: 128K tokens (~96,000 words)\n", + "- **Claude 3.5**: 200K tokens (~150,000 words)\n", + "- **Gemini 1.5 Pro**: 1M tokens (~750,000 words)\n", + "\n", + "**But here's the problem:** Larger context windows don't guarantee better performance.\n", + "\n", + "#### The \"Lost in the Middle\" Problem\n", + "\n", + "Research by Liu et al. (2023) in their paper [\"Lost in the Middle: How Language Models Use Long Contexts\"](https://arxiv.org/abs/2307.03172) revealed critical findings:\n", + "\n", + "**Key Finding #1: U-Shaped Performance**\n", + "- Models perform best when relevant information is at the **beginning** or **end** of context\n", + "- Performance **significantly degrades** when information is in the **middle** of long contexts\n", + "- This happens even with models explicitly designed for long contexts\n", + "\n", + "**Key Finding #2: Non-Uniform Degradation**\n", + "- It's not just about hitting token limits\n", + "- Quality degrades **even within the context window**\n", + "- The longer the context, the worse the \"middle\" performance becomes\n", + "\n", + "**Key Finding #3: More Context โ‰  Better Results**\n", + "- In some experiments, GPT-3.5 performed **worse** with retrieved documents than with no documents at all\n", + "- Adding more context can actually **hurt** performance if not managed properly\n", + "\n", + "**Why This Matters for Memory Management:**\n", + "- Simply storing all conversation history isn't optimal\n", + "- We need **intelligent compression** to keep important information accessible\n", + "- **Position matters**: Recent context (at the end) is naturally well-positioned\n", + "- **Quality over quantity**: Better to have concise, relevant context than exhaustive history\n", + "\n", + "**References:**\n", + "- Liu, N. F., Lin, K., Hewitt, J., Paranjape, A., Bevilacqua, M., Petroni, F., & Liang, P. (2023). Lost in the Middle: How Language Models Use Long Contexts. *Transactions of the Association for Computational Linguistics (TACL)*.\n" + ] + }, + { + "cell_type": "markdown", + "id": "9ff7e262cad76878", + "metadata": {}, + "source": [ + "### Demo 1: Token Growth Over Time\n", + "\n", + "Now let's see this problem in action by simulating conversation growth.\n", + "\n", + "#### Step 1: Define our system prompt and count its tokens\n", + "\n", + "**What:** Creating a system prompt and measuring its token count.\n", + "\n", + "**Why:** The system prompt is sent with EVERY request, so its size directly impacts costs. Understanding this baseline is crucial for budgeting.\n" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "id": "99edd1b0325093b", + "metadata": { + "execution": { + "iopub.execute_input": "2025-11-02T01:09:14.923876Z", + "iopub.status.busy": "2025-11-02T01:09:14.923775Z", + "iopub.status.idle": "2025-11-02T01:09:14.926222Z", + "shell.execute_reply": "2025-11-02T01:09:14.925827Z" + } + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "System prompt: 31 tokens\n" + ] + } + ], + "source": [ + "# System prompt (constant across all turns)\n", + "system_prompt = \"\"\"You are a helpful course advisor for Redis University.\n", + "Help students find courses, check prerequisites, and plan their schedule.\n", + "Be friendly, concise, and accurate.\"\"\"\n", + "\n", + "system_tokens = count_tokens(system_prompt)\n", + "\n", + "print(f\"System prompt: {system_tokens} tokens\")\n" + ] + }, + { + "cell_type": "markdown", + "id": "1a9e0cfece6beaf5", + "metadata": {}, + "source": [ + "#### Step 2: Simulate how tokens grow with each conversation turn\n", + "\n", + "**What:** Projecting token growth and costs across 1 to 200 conversation turns.\n", + "\n", + "**Why:** Visualizing the growth curve shows when conversations become expensive (>20K tokens) and helps you plan compression strategies. Notice how costs accelerate - this is the quadratic growth problem.\n" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "id": "117ca757272caef3", + "metadata": { + "execution": { + "iopub.execute_input": "2025-11-02T01:09:14.927323Z", + "iopub.status.busy": "2025-11-02T01:09:14.927226Z", + "iopub.status.idle": "2025-11-02T01:09:14.929730Z", + "shell.execute_reply": "2025-11-02T01:09:14.929335Z" + } + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "Conversation Growth Simulation:\n", + "================================================================================\n", + "Turn Messages Conv Tokens Total Tokens Cost ($) \n", + "--------------------------------------------------------------------------------\n", + "1 2 100 131 $0.0003 โœ…\n", + "5 10 500 531 $0.0013 โœ…\n", + "10 20 1,000 1,031 $0.0026 โœ…\n", + "20 40 2,000 2,031 $0.0051 โœ…\n", + "30 60 3,000 3,031 $0.0076 โœ…\n", + "50 100 5,000 5,031 $0.0126 โš ๏ธ\n", + "75 150 7,500 7,531 $0.0188 โš ๏ธ\n", + "100 200 10,000 10,031 $0.0251 โš ๏ธ\n", + "150 300 15,000 15,031 $0.0376 โš ๏ธ\n", + "200 400 20,000 20,031 $0.0501 โŒ\n" + ] + } + ], + "source": [ + "# Assume average message pair (user + assistant) = 100 tokens\n", + "avg_message_pair_tokens = 100\n", + "\n", + "print(\"\\nConversation Growth Simulation:\")\n", + "print(\"=\" * 80)\n", + "print(f\"{'Turn':<8} {'Messages':<10} {'Conv Tokens':<15} {'Total Tokens':<15} {'Cost ($)':<12}\")\n", + "print(\"-\" * 80)\n", + "\n", + "for turn in [1, 5, 10, 20, 30, 50, 75, 100, 150, 200]:\n", + " # Each turn = user message + assistant message\n", + " num_messages = turn * 2\n", + " conversation_tokens = num_messages * (avg_message_pair_tokens // 2)\n", + " total_tokens = system_tokens + conversation_tokens\n", + "\n", + " # Cost calculation (GPT-4o input: $0.0025 per 1K tokens)\n", + " cost_per_query = (total_tokens / 1000) * 0.0025\n", + "\n", + " # Visual indicator\n", + " if total_tokens < 5000:\n", + " indicator = \"โœ…\"\n", + " elif total_tokens < 20000:\n", + " indicator = \"โš ๏ธ\"\n", + " else:\n", + " indicator = \"โŒ\"\n", + "\n", + " print(f\"{turn:<8} {num_messages:<10} {conversation_tokens:<15,} {total_tokens:<15,} ${cost_per_query:<11.4f} {indicator}\")\n" + ] + }, + { + "cell_type": "markdown", + "id": "544c9c59a8e344be", + "metadata": {}, + "source": [ + "### Demo 2: Cost Analysis\n", + "\n", + "Let's calculate the cumulative cost of long conversations.\n", + "\n", + "**Why costs grow quadratically:**\n", + "- Turn 1: Process 100 tokens\n", + "- Turn 2: Process 200 tokens (includes turn 1)\n", + "- Turn 3: Process 300 tokens (includes turns 1 & 2)\n", + "- Turn N: Process Nร—100 tokens\n", + "\n", + "Total cost = 100 + 200 + 300 + ... + Nร—100 = **O(Nยฒ)** growth!\n", + "\n", + "#### Step 1: Create a function to calculate conversation costs\n", + "\n", + "**What:** Building a cost calculator that accounts for cumulative token processing.\n", + "\n", + "**Why:** Each turn processes ALL previous messages, so costs compound. This function reveals the true cost of long conversations - not just the final token count, but the sum of all API calls.\n" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "id": "998184e76d362bf3", + "metadata": { + "execution": { + "iopub.execute_input": "2025-11-02T01:09:14.930677Z", + "iopub.status.busy": "2025-11-02T01:09:14.930598Z", + "iopub.status.idle": "2025-11-02T01:09:14.932733Z", + "shell.execute_reply": "2025-11-02T01:09:14.932377Z" + } + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "โœ… Cost calculation function defined\n" + ] + } + ], + "source": [ + "def calculate_conversation_cost(num_turns: int, avg_tokens_per_turn: int = 100) -> Dict[str, float]:\n", + " \"\"\"\n", + " Calculate cost metrics for a conversation.\n", + "\n", + " Args:\n", + " num_turns: Number of conversation turns\n", + " avg_tokens_per_turn: Average tokens per turn (user + assistant)\n", + "\n", + " Returns:\n", + " Dictionary with cost metrics\n", + " \"\"\"\n", + " system_tokens = 50 # Simplified\n", + "\n", + " # Cumulative cost (each turn includes all previous messages)\n", + " cumulative_tokens = 0\n", + " cumulative_cost = 0.0\n", + "\n", + " for turn in range(1, num_turns + 1):\n", + " # Total tokens for this turn\n", + " conversation_tokens = turn * avg_tokens_per_turn\n", + " total_tokens = system_tokens + conversation_tokens\n", + "\n", + " # Cost for this turn (input tokens)\n", + " turn_cost = (total_tokens / 1000) * 0.0025\n", + " cumulative_cost += turn_cost\n", + " cumulative_tokens += total_tokens\n", + "\n", + " return {\n", + " \"num_turns\": num_turns,\n", + " \"final_tokens\": system_tokens + (num_turns * avg_tokens_per_turn),\n", + " \"cumulative_tokens\": cumulative_tokens,\n", + " \"cumulative_cost\": cumulative_cost,\n", + " \"avg_cost_per_turn\": cumulative_cost / num_turns\n", + " }\n", + "\n", + "print(\"โœ… Cost calculation function defined\")\n" + ] + }, + { + "cell_type": "markdown", + "id": "6710bd8b0268c34d", + "metadata": {}, + "source": [ + "#### Step 2: Compare costs across different conversation lengths\n", + "\n", + "**What:** Running cost projections for conversations from 10 to 200 turns.\n", + "\n", + "**Why:** Seeing the quadratic growth in action - a 200-turn conversation costs $1.26, but the cumulative cost across all turns is much higher. This motivates compression strategies.\n" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "id": "4441a3298bd38af8", + "metadata": { + "execution": { + "iopub.execute_input": "2025-11-02T01:09:14.935301Z", + "iopub.status.busy": "2025-11-02T01:09:14.935202Z", + "iopub.status.idle": "2025-11-02T01:09:14.937547Z", + "shell.execute_reply": "2025-11-02T01:09:14.936972Z" + } + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Cost Analysis for Different Conversation Lengths:\n", + "================================================================================\n", + "Turns Final Tokens Cumulative Tokens Total Cost Avg/Turn\n", + "--------------------------------------------------------------------------------\n", + "10 1,050 6,000 $0.02 $0.0015\n", + "25 2,550 33,750 $0.08 $0.0034\n", + "50 5,050 130,000 $0.33 $0.0065\n", + "100 10,050 510,000 $1.27 $0.0127\n", + "200 20,050 2,020,000 $5.05 $0.0253\n" + ] + } + ], + "source": [ + "print(\"Cost Analysis for Different Conversation Lengths:\")\n", + "print(\"=\" * 80)\n", + "print(f\"{'Turns':<10} {'Final Tokens':<15} {'Cumulative Tokens':<20} {'Total Cost':<15} {'Avg/Turn'}\")\n", + "print(\"-\" * 80)\n", + "\n", + "for num_turns in [10, 25, 50, 100, 200]:\n", + " metrics = calculate_conversation_cost(num_turns)\n", + " print(f\"{metrics['num_turns']:<10} \"\n", + " f\"{metrics['final_tokens']:<15,} \"\n", + " f\"{metrics['cumulative_tokens']:<20,} \"\n", + " f\"${metrics['cumulative_cost']:<14.2f} \"\n", + " f\"${metrics['avg_cost_per_turn']:.4f}\")\n" + ] + }, + { + "cell_type": "markdown", + "id": "df5840eedf4a9185", + "metadata": {}, + "source": [ + "#### Key Takeaways\n", + "\n", + "**Without memory management:**\n", + "- Costs grow **quadratically** (O(Nยฒ))\n", + " \n", + "- A 100-turn conversation costs ~$1.50 in total\n", + "\n", + " \n", + "- A 200-turn conversation costs ~$6.00 in total\n", + "\n", + "- At scale (1000s of users), this becomes unsustainable\n", + "\n", + "**The solution:** Intelligent memory management to keep conversations within budget while preserving quality.\n" + ] + }, + { + "cell_type": "markdown", + "id": "5a7f1c4414f6d2a7", + "metadata": {}, + "source": [ + "---\n", + "\n", + "## ๐ŸŽฏ Part 2: Context Summarizaton\n", + "\n", + "**Context summarization** is the process of condensing conversation history into a compact representation that preserves essential information while dramatically reducing token count.\n", + "\n", + "Picture a chat assistant helping someone plan a wedding over 50 messages:\n", + "- It captures the critical stuff: venue choice, budget, guest count, vendor decisions\n", + "- It grabs the decisions and ditches the small talk\n", + "- Later messages can reference \"the venue we picked\" without replaying the entire debate\n", + " \n", + "**Same deal with LLM chats:**\n", + "- Squash ancient messages into a tight little paragraph\n", + "- Keep the gold (facts, choices, what the user loves/hates)\n", + "- Leave fresh messages untouched (they're still doing work)\n", + "- Slash token usage by 50-80% without lobotomizing the conversation\n", + "\n", + "### Why Should You Care About Summarization?\n", + "\n", + "Summarization tackles three gnarly problems:\n", + "\n", + "**1. Plays Nice With Token Caps (Callback to Part 1)**\n", + "- Chats balloon up forever if you let them\n", + "- Summarization keeps you from hitting the ceiling\n", + "- **Real talk:** 50 messages (10K tokens) โ†’ Compressed summary + 4 fresh messages (2.5K tokens)\n", + "\n", + "**2. Fixes the Context Rot Problem (Also From Part 1)**\n", + "- Remember that \"Lost in the Middle\" mess? Old info gets buried and ignored\n", + "- Summarization yanks that old stuff to the front in condensed form\n", + "- Fresh messages chill at the end (where the model actually pays attention)\n", + "- **Upshot:** Model performs better AND you save spaceโ€”win-win\n", + "\n", + "**3. Keeps Working Memory From Exploding (Throwback to Notebook 1)**\n", + "- Working memory = your conversation backlog\n", + "- Without summarization, it just keeps growing like a digital hoarder's closet\n", + "- Summarization gives it a haircut regularly\n", + "- **Payoff:** Conversations that can actually go the distance\n", + "\n", + "### When Should You Reach for This Tool?\n", + "\n", + "**Great for:**\n", + "- โœ… Marathon conversations (10+ back-and-forths)\n", + "- โœ… Chats that have a narrative arc (customer support, coaching sessions)\n", + "- โœ… Situations where you want history but not ALL the history\n", + "- โœ… When the recent stuff matters most\n", + "\n", + "**Skip it when:**\n", + "- โŒ Quick exchanges (under 5 turnsโ€”don't overthink it)\n", + "- โŒ Every syllable counts (legal docs, medical consultations)\n", + "- โŒ You might need verbatim quotes from way back\n", + "- โŒ The extra LLM call for summarization costs too much time or money\n", + "\n", + "### Where Summarization Lives in Your Memory Stack\n", + "```\n", + "โ”Œโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”\n", + "โ”‚ Your LLM Agent Brain โ”‚\n", + "โ”‚ โ”‚\n", + "โ”‚ Context Window (128K tokens available) โ”‚\n", + "โ”‚ โ”Œโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ” โ”‚\n", + "โ”‚ โ”‚ 1. System Prompt (500 tokens) โ”‚ โ”‚\n", + "โ”‚ โ”‚ 2. Long-term Memory Bank (1,000 tokens) โ”‚ โ”‚\n", + "โ”‚ โ”‚ 3. RAG Retrieval Stuff (2,000 tokens) โ”‚ โ”‚\n", + "โ”‚ โ”‚ 4. Working Memory Zone: โ”‚ โ”‚\n", + "โ”‚ โ”‚ โ”Œโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ” โ”‚ โ”‚\n", + "โ”‚ โ”‚ โ”‚ [COMPRESSED HISTORY] (500 tokens) โ”‚ โ”‚ โ”‚\n", + "โ”‚ โ”‚ โ”‚ - Critical facts from rounds 1-20 โ”‚ โ”‚ โ”‚\n", + "โ”‚ โ”‚ โ”‚ - Decisions that were locked in โ”‚ โ”‚ โ”‚\n", + "โ”‚ โ”‚ โ”‚ - User quirks and preferences โ”‚ โ”‚ โ”‚\n", + "โ”‚ โ”‚ โ””โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”˜ โ”‚ โ”‚\n", + "โ”‚ โ”‚ Live Recent Messages (1,000 tokens) โ”‚ โ”‚\n", + "โ”‚ โ”‚ - Round 21: User shot + Assistant reply โ”‚ โ”‚\n", + "โ”‚ โ”‚ - Round 22: User shot + Assistant reply โ”‚ โ”‚\n", + "โ”‚ โ”‚ - Round 23: User shot + Assistant reply โ”‚ โ”‚\n", + "โ”‚ โ”‚ - Round 24: User shot + Assistant reply โ”‚ โ”‚\n", + "โ”‚ โ”‚ 5. Current Incoming Query (200 tokens) โ”‚ โ”‚\n", + "โ”‚ โ””โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”˜ โ”‚\n", + "โ”‚ โ”‚\n", + "โ”‚ Running total: ~5,200 tokens (instead of 15Kโ€”nice!) โ”‚\n", + "โ””โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”˜\n", + "```\n", + "\n", + "#### The Bottom Line: \n", + "Summarization is a *compression technique* for working memory that maintains conversation continuity while keeping token counts manageable." + ] + }, + { + "cell_type": "markdown", + "id": "3d6a9c3a31a589d0", + "metadata": {}, + "source": [ + "### ๐Ÿ”ฌ Research Foundation: Recursive Summarization\n", + "\n", + "Wang et al. (2023) in [\"Recursively Summarizing Enables Long-Term Dialogue Memory in Large Language Models\"](https://arxiv.org/abs/2308.15022) demonstrated that:\n", + "\n", + "**Key Insight:** Recursive summarization enables LLMs to handle extremely long conversations by:\n", + "1. Memorizing small dialogue contexts\n", + "2. Recursively producing new memory using previous memory + new contexts\n", + "3. Maintaining consistency across long conversations\n", + "\n", + "**Their findings:**\n", + "- Improved response consistency in long-context conversations\n", + "- Works well with both long-context models (8K, 16K) and retrieval-enhanced LLMs\n", + "- Provides a practical solution for modeling extremely long contexts\n", + "\n", + "**Practical Application:**\n", + "- Summarize old messages while keeping recent ones intact\n", + "- Preserve key information (facts, decisions, preferences)\n", + "- Compress redundant or less important information\n", + "\n", + "**References:**\n", + "- Wang, Q., Fu, Y., Cao, Y., Wang, S., Tian, Z., & Ding, L. (2023). Recursively Summarizing Enables Long-Term Dialogue Memory in Large Language Models. *Neurocomputing* (Accepted).\n" + ] + }, + { + "cell_type": "markdown", + "id": "80bbd6185d7e1fd4", + "metadata": {}, + "source": [ + "### Theory: What to Preserve vs. Compress\n", + "\n", + "When summarizing conversations, we need to be strategic about what to keep and what to compress.\n", + "\n", + "**What to Preserve:**\n", + "- โœ… Key facts and decisions\n", + "- โœ… Student preferences and goals\n", + "- โœ… Important course recommendations\n", + "- โœ… Prerequisites and requirements\n", + "- โœ… Recent context (last few messages)\n", + "\n", + "**What to Compress:**\n", + "- ๐Ÿ“ฆ Small talk and greetings\n", + "- ๐Ÿ“ฆ Redundant information\n", + "- ๐Ÿ“ฆ Old conversation details\n", + "- ๐Ÿ“ฆ Resolved questions\n", + "\n", + "**When to Summarize:**\n", + "- Token threshold exceeded (e.g., > 2000 tokens)\n", + "- Message count threshold exceeded (e.g., > 10 messages)\n", + "- Time-based (e.g., after 1 hour)\n", + "- Manual trigger\n" + ] + }, + { + "cell_type": "markdown", + "id": "23b8486d8bc89f7b", + "metadata": {}, + "source": [ + "### Building Summarization Step-by-Step\n", + "\n", + "Let's build our summarization system incrementally, starting with simple components.\n", + "\n", + "#### Step 1: Create a data structure for conversation messages\n", + "\n", + "**What we're building:** A data structure to represent individual messages with metadata.\n", + "\n", + "**Why it's needed:** We need to track not just the message content, but also:\n", + "- Who sent it (user, assistant, system)\n", + "- When it was sent (timestamp)\n", + "- How many tokens it uses (for threshold checks)\n", + "\n", + "**How it works:** Python's `@dataclass` decorator creates a clean, type-safe structure with automatic initialization and token counting.\n" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "id": "3db188fb9f01d750", + "metadata": { + "execution": { + "iopub.execute_input": "2025-11-02T01:09:14.938898Z", + "iopub.status.busy": "2025-11-02T01:09:14.938801Z", + "iopub.status.idle": "2025-11-02T01:09:14.941541Z", + "shell.execute_reply": "2025-11-02T01:09:14.941043Z" + } + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "โœ… ConversationMessage dataclass defined\n", + " Example - Role: user, Tokens: 9\n" + ] + } + ], + "source": [ + "@dataclass\n", + "class ConversationMessage:\n", + " \"\"\"Represents a single conversation message.\"\"\"\n", + " role: str # \"user\", \"assistant\", \"system\"\n", + " content: str\n", + " timestamp: float = field(default_factory=time.time)\n", + " token_count: Optional[int] = None\n", + "\n", + " def __post_init__(self):\n", + " if self.token_count is None:\n", + " self.token_count = count_tokens(self.content)\n", + "\n", + "# Test it\n", + "test_msg = ConversationMessage(\n", + " role=\"user\",\n", + " content=\"What courses do you recommend for machine learning?\"\n", + ")\n", + "print(f\"โœ… ConversationMessage dataclass defined\")\n", + "print(f\" Example - Role: {test_msg.role}, Tokens: {test_msg.token_count}\")\n" + ] + }, + { + "cell_type": "markdown", + "id": "5d49f8f61e276661", + "metadata": {}, + "source": [ + "#### Step 2: Create a function to check if summarization is needed\n", + "\n", + "**What we're building:** A decision function that determines when to trigger summarization.\n", + "\n", + "**Why it's needed:** We don't want to summarize too early (loses context) or too late (hits token limits). We need smart thresholds.\n", + "\n", + "**How it works:**\n", + "- Checks if we have enough messages to make summarization worthwhile\n", + "- Calculates total token count across all messages\n", + "- Returns `True` if either threshold (tokens OR messages) is exceeded\n", + "- Ensures we keep at least `keep_recent` messages unsummarized\n", + "\n", + "**When to summarize:**\n", + "- Token threshold: Prevents hitting model limits (e.g., >2000 tokens)\n", + "- Message threshold: Prevents conversation from getting too long (e.g., >10 messages)\n", + "- Keep recent: Preserves the most relevant context (e.g., last 4 messages)\n" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "id": "290935fa536cb8aa", + "metadata": { + "execution": { + "iopub.execute_input": "2025-11-02T01:09:14.942848Z", + "iopub.status.busy": "2025-11-02T01:09:14.942733Z", + "iopub.status.idle": "2025-11-02T01:09:14.945144Z", + "shell.execute_reply": "2025-11-02T01:09:14.944725Z" + } + }, + "outputs": [], + "source": [ + "def should_summarize(\n", + " messages: List[ConversationMessage],\n", + " token_threshold: int = 2000,\n", + " message_threshold: int = 10,\n", + " keep_recent: int = 4\n", + ") -> bool:\n", + " \"\"\"\n", + " Determine if conversation needs summarization.\n", + "\n", + " Args:\n", + " messages: List of conversation messages\n", + " token_threshold: Summarize when total tokens exceed this\n", + " message_threshold: Summarize when message count exceeds this\n", + " keep_recent: Number of recent messages to keep unsummarized\n", + "\n", + " Returns:\n", + " True if summarization is needed\n", + " \"\"\"\n", + " # Don't summarize if we have very few messages\n", + " if len(messages) <= keep_recent:\n", + " return False\n", + "\n", + " # Calculate total tokens\n", + " total_tokens = sum(msg.token_count for msg in messages)\n", + "\n", + " # Summarize if either threshold is exceeded\n", + " return (total_tokens > token_threshold or\n", + " len(messages) > message_threshold)\n" + ] + }, + { + "cell_type": "markdown", + "id": "37993b003426e127", + "metadata": {}, + "source": [ + "#### Step 3: Create a prompt template for summarization\n", + "\n", + "**What we're building:** A carefully crafted prompt that instructs the LLM on how to summarize conversations.\n", + "\n", + "**Why it's needed:** Generic summarization loses important details. We need domain-specific instructions that preserve what matters for course advisory conversations.\n", + "\n", + "**How it works:**\n", + "- Specifies the context (student-advisor conversation)\n", + "- Lists exactly what to preserve (decisions, requirements, goals, courses, issues)\n", + "- Requests structured output (bullet points for clarity)\n", + "- Emphasizes being \"specific and actionable\" (not vague summaries)\n", + "\n", + "**Design principle:** The prompt template is the \"instructions\" for the summarization LLM. Better instructions = better summaries.\n" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "id": "3a39408752c4a504", + "metadata": { + "execution": { + "iopub.execute_input": "2025-11-02T01:09:14.946915Z", + "iopub.status.busy": "2025-11-02T01:09:14.946793Z", + "iopub.status.idle": "2025-11-02T01:09:14.948854Z", + "shell.execute_reply": "2025-11-02T01:09:14.948284Z" + } + }, + "outputs": [], + "source": [ + "summarization_prompt_template = \"\"\"You are summarizing a conversation between a student and a course advisor.\n", + "\n", + "Create a concise summary that preserves:\n", + "1. Key decisions made\n", + "2. Important requirements or prerequisites discussed\n", + "3. Student's goals, preferences, and constraints\n", + "4. Specific courses mentioned and recommendations given\n", + "5. Any problems or issues that need follow-up\n", + "\n", + "Format as bullet points. Be specific and actionable.\n", + "\n", + "Conversation to summarize:\n", + "{conversation}\n", + "\n", + "Summary:\"\"\"\n" + ] + }, + { + "cell_type": "markdown", + "id": "2bca0c3b7f31459f", + "metadata": {}, + "source": [ + "#### Step 4: Create a function to generate summaries using the LLM\n", + "\n", + "**What we're building:** A function that takes messages and produces an intelligent summary using an LLM.\n", + "\n", + "**Why it's needed:** This is where the actual summarization happens. We need to:\n", + "- Format the conversation for the LLM\n", + "- Call the LLM with our prompt template\n", + "- Package the summary as a system message\n", + "\n", + "**How it works:**\n", + "1. Formats messages as \"User: ...\" and \"Assistant: ...\" text\n", + "2. Inserts formatted conversation into the prompt template\n", + "3. Calls the LLM asynchronously (non-blocking)\n", + "4. Wraps the summary in `[CONVERSATION SUMMARY]` marker for easy identification\n", + "5. Returns as a system message (distinguishes it from user/assistant messages)\n", + "\n", + "**Why async?** Summarization can take 1-3 seconds. Async allows other operations to continue while waiting for the LLM response.\n" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "id": "8b41ae7eb2d88f5a", + "metadata": { + "execution": { + "iopub.execute_input": "2025-11-02T01:09:14.950203Z", + "iopub.status.busy": "2025-11-02T01:09:14.950110Z", + "iopub.status.idle": "2025-11-02T01:09:14.952595Z", + "shell.execute_reply": "2025-11-02T01:09:14.952206Z" + } + }, + "outputs": [], + "source": [ + "async def create_summary(\n", + " messages: List[ConversationMessage],\n", + " llm: ChatOpenAI\n", + ") -> ConversationMessage:\n", + " \"\"\"\n", + " Create intelligent summary of conversation messages.\n", + "\n", + " Args:\n", + " messages: List of messages to summarize\n", + " llm: Language model for generating summary\n", + "\n", + " Returns:\n", + " ConversationMessage containing the summary\n", + " \"\"\"\n", + " # Format conversation for summarization\n", + " conversation_text = \"\\n\".join([\n", + " f\"{msg.role.title()}: {msg.content}\"\n", + " for msg in messages\n", + " ])\n", + "\n", + " # Generate summary using LLM\n", + " prompt = summarization_prompt_template.format(conversation=conversation_text)\n", + " response = await llm.ainvoke([HumanMessage(content=prompt)])\n", + "\n", + " summary_content = f\"[CONVERSATION SUMMARY]\\n{response.content}\"\n", + "\n", + " # Create summary message\n", + " summary_msg = ConversationMessage(\n", + " role=\"system\",\n", + " content=summary_content,\n", + " timestamp=messages[-1].timestamp\n", + " )\n", + "\n", + " return summary_msg\n" + ] + }, + { + "cell_type": "markdown", + "id": "56eb87c914424cd", + "metadata": {}, + "source": [ + "#### Step 5: Create a function to compress conversations\n", + "\n", + "**What we're building:** The main compression function that orchestrates the entire summarization process.\n", + "\n", + "**Why it's needed:** This ties together all the previous components into a single, easy-to-use function that:\n", + "- Decides whether to summarize\n", + "- Splits messages into old vs. recent\n", + "- Generates the summary\n", + "- Returns the compressed conversation\n", + "\n", + "**How it works:**\n", + "1. **Check:** Calls `should_summarize()` to see if compression is needed\n", + "2. **Split:** Divides messages into `old_messages` (to summarize) and `recent_messages` (to keep)\n", + "3. **Summarize:** Calls `create_summary()` on old messages\n", + "4. **Combine:** Returns `[summary] + recent_messages`\n", + "\n", + "**The result:** A conversation that's 50-80% smaller but preserves all essential information.\n", + "\n", + "**Example:**\n", + "- Input: 20 messages (4,000 tokens)\n", + "- Output: 1 summary + 4 recent messages (1,200 tokens)\n", + "- Savings: 70% reduction in tokens\n" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "id": "4b904a38b1bad2b9", + "metadata": { + "execution": { + "iopub.execute_input": "2025-11-02T01:09:14.953876Z", + "iopub.status.busy": "2025-11-02T01:09:14.953787Z", + "iopub.status.idle": "2025-11-02T01:09:14.955880Z", + "shell.execute_reply": "2025-11-02T01:09:14.955487Z" + } + }, + "outputs": [], + "source": [ + "async def compress_conversation(\n", + " messages: List[ConversationMessage],\n", + " llm: ChatOpenAI,\n", + " token_threshold: int = 2000,\n", + " message_threshold: int = 10,\n", + " keep_recent: int = 4\n", + ") -> List[ConversationMessage]:\n", + " \"\"\"\n", + " Compress conversation by summarizing old messages and keeping recent ones.\n", + "\n", + " Args:\n", + " messages: List of conversation messages\n", + " llm: Language model for generating summaries\n", + " token_threshold: Summarize when total tokens exceed this\n", + " message_threshold: Summarize when message count exceeds this\n", + " keep_recent: Number of recent messages to keep unsummarized\n", + "\n", + " Returns:\n", + " List of messages: [summary] + [recent messages]\n", + " \"\"\"\n", + " # Check if summarization is needed\n", + " if not should_summarize(messages, token_threshold, message_threshold, keep_recent):\n", + " return messages\n", + "\n", + " # Split into old and recent\n", + " old_messages = messages[:-keep_recent]\n", + " recent_messages = messages[-keep_recent:]\n", + "\n", + " if not old_messages:\n", + " return messages\n", + "\n", + " # Summarize old messages\n", + " summary = await create_summary(old_messages, llm)\n", + "\n", + " # Return summary + recent messages\n", + " return [summary] + recent_messages\n" + ] + }, + { + "cell_type": "markdown", + "id": "668fce6b8d81c302", + "metadata": {}, + "source": [ + "#### Step 6: Combine into a reusable class\n", + "\n", + "Now that we've built and tested each component, let's combine them into a reusable class.\n" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "id": "8324715c96096689", + "metadata": { + "execution": { + "iopub.execute_input": "2025-11-02T01:09:14.957043Z", + "iopub.status.busy": "2025-11-02T01:09:14.956964Z", + "iopub.status.idle": "2025-11-02T01:09:14.959582Z", + "shell.execute_reply": "2025-11-02T01:09:14.959215Z" + } + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "โœ… Summarization system built:\n", + " - ConversationMessage dataclass\n", + " - should_summarize() function\n", + " - Summarization prompt template\n", + " - create_summary() function\n", + " - compress_conversation() function\n", + " - ConversationSummarizer class\n" + ] + } + ], + "source": [ + "class ConversationSummarizer:\n", + " \"\"\"Manages conversation summarization to keep token counts manageable.\"\"\"\n", + "\n", + " def __init__(\n", + " self,\n", + " llm: ChatOpenAI,\n", + " token_threshold: int = 2000,\n", + " message_threshold: int = 10,\n", + " keep_recent: int = 4\n", + " ):\n", + " \"\"\"\n", + " Initialize the summarizer.\n", + "\n", + " Args:\n", + " llm: Language model for generating summaries\n", + " token_threshold: Summarize when total tokens exceed this\n", + " message_threshold: Summarize when message count exceeds this\n", + " keep_recent: Number of recent messages to keep unsummarized\n", + " \"\"\"\n", + " self.llm = llm\n", + " self.token_threshold = token_threshold\n", + " self.message_threshold = message_threshold\n", + " self.keep_recent = keep_recent\n", + " self.summarization_prompt = summarization_prompt_template\n", + "\n", + " def should_summarize(self, messages: List[ConversationMessage]) -> bool:\n", + " \"\"\"Determine if conversation needs summarization.\"\"\"\n", + " return should_summarize(\n", + " messages,\n", + " self.token_threshold,\n", + " self.message_threshold,\n", + " self.keep_recent\n", + " )\n", + "\n", + " async def summarize_conversation(\n", + " self,\n", + " messages: List[ConversationMessage]\n", + " ) -> ConversationMessage:\n", + " \"\"\"Create intelligent summary of conversation messages.\"\"\"\n", + " return await create_summary(messages, self.llm)\n", + "\n", + " async def compress_conversation(\n", + " self,\n", + " messages: List[ConversationMessage]\n", + " ) -> List[ConversationMessage]:\n", + " \"\"\"Compress conversation by summarizing old messages and keeping recent ones.\"\"\"\n", + " return await compress_conversation(\n", + " messages,\n", + " self.llm,\n", + " self.token_threshold,\n", + " self.message_threshold,\n", + " self.keep_recent\n", + " )\n", + "\n", + "print(\"\"\"โœ… Summarization system built:\n", + " - ConversationMessage dataclass\n", + " - should_summarize() function\n", + " - Summarization prompt template\n", + " - create_summary() function\n", + " - compress_conversation() function\n", + " - ConversationSummarizer class\"\"\")\n" + ] + }, + { + "cell_type": "markdown", + "id": "beb98376eb2b00b0", + "metadata": {}, + "source": [ + "### Demo 3: Test Summarization\n", + "\n", + "Let's test the summarizer with a sample conversation.\n", + "\n", + "#### Step 1: Create a sample conversation\n", + "\n", + "**What:** Creating a realistic 14-message conversation about course planning.\n", + "\n", + "**Why:** We need a conversation long enough to trigger summarization (>10 messages, >500 tokens) so we can see the compression in action.\n" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "id": "3e63fdaf5a2a2587", + "metadata": { + "execution": { + "iopub.execute_input": "2025-11-02T01:09:14.960594Z", + "iopub.status.busy": "2025-11-02T01:09:14.960526Z", + "iopub.status.idle": "2025-11-02T01:09:14.963210Z", + "shell.execute_reply": "2025-11-02T01:09:14.962816Z" + } + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Original conversation:\n", + " Messages: 16\n", + " Total tokens: 261\n", + " Average tokens per message: 16.3\n" + ] + } + ], + "source": [ + "# Create a sample long conversation\n", + "sample_conversation = [\n", + " ConversationMessage(\"user\", \"Hi, I'm interested in learning about machine learning courses\"),\n", + " ConversationMessage(\"assistant\", \"Great! Redis University offers several ML courses. CS401 Machine Learning is our flagship course. It covers supervised learning, neural networks, and practical applications.\"),\n", + " ConversationMessage(\"user\", \"What are the prerequisites for CS401?\"),\n", + " ConversationMessage(\"assistant\", \"CS401 requires CS201 Data Structures and MATH301 Linear Algebra. Have you completed these courses?\"),\n", + " ConversationMessage(\"user\", \"I've completed CS101 but not CS201 yet\"),\n", + " ConversationMessage(\"assistant\", \"Perfect! CS201 is the next logical step. It covers algorithms and data structures essential for ML. It's offered every semester.\"),\n", + " ConversationMessage(\"user\", \"How difficult is MATH301?\"),\n", + " ConversationMessage(\"assistant\", \"MATH301 is moderately challenging. It covers vectors, matrices, and eigenvalues used in ML algorithms. Most students find it manageable with consistent practice.\"),\n", + " ConversationMessage(\"user\", \"Can I take both CS201 and MATH301 together?\"),\n", + " ConversationMessage(\"assistant\", \"Yes, that's a good combination! They complement each other well. Many students take them concurrently.\"),\n", + " ConversationMessage(\"user\", \"What about CS401 after that?\"),\n", + " ConversationMessage(\"assistant\", \"CS401 is perfect after completing both prerequisites. It's our most popular AI course with hands-on projects.\"),\n", + " ConversationMessage(\"user\", \"When is CS401 offered?\"),\n", + " ConversationMessage(\"assistant\", \"CS401 is offered in Fall and Spring semesters. The Fall section typically fills up quickly, so register early!\"),\n", + " ConversationMessage(\"user\", \"Great! What's the workload like?\"),\n", + " ConversationMessage(\"assistant\", \"CS401 requires about 10-12 hours per week including lectures, assignments, and projects. There are 4 major projects throughout the semester.\"),\n", + "]\n", + "\n", + "# Calculate original metrics\n", + "original_token_count = sum(msg.token_count for msg in sample_conversation)\n", + "print(f\"Original conversation:\")\n", + "print(f\" Messages: {len(sample_conversation)}\")\n", + "print(f\" Total tokens: {original_token_count}\")\n", + "print(f\" Average tokens per message: {original_token_count / len(sample_conversation):.1f}\")\n" + ] + }, + { + "cell_type": "markdown", + "id": "b824592502d5305", + "metadata": {}, + "source": [ + "#### Step 2: Configure the summarizer\n", + "\n", + "**What:** Setting up the `ConversationSummarizer` with specific thresholds.\n", + "\n", + "**Why:** We use a low token threshold (500) to force summarization on our sample conversation. In production, you'd use higher thresholds (2000-4000 tokens).\n" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "id": "1f1cd42e5cb65a39", + "metadata": { + "execution": { + "iopub.execute_input": "2025-11-02T01:09:14.964229Z", + "iopub.status.busy": "2025-11-02T01:09:14.964154Z", + "iopub.status.idle": "2025-11-02T01:09:14.965877Z", + "shell.execute_reply": "2025-11-02T01:09:14.965551Z" + } + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Summarizer configuration:\n", + " Token threshold: 500\n", + " Message threshold: 10\n", + " Keep recent: 4\n" + ] + } + ], + "source": [ + "# Test summarization\n", + "summarizer = ConversationSummarizer(\n", + " llm=llm,\n", + " token_threshold=500, # Low threshold for demo\n", + " message_threshold=10,\n", + " keep_recent=4\n", + ")\n", + "\n", + "print(f\"Summarizer configuration:\")\n", + "print(f\" Token threshold: {summarizer.token_threshold}\")\n", + "print(f\" Message threshold: {summarizer.message_threshold}\")\n", + "print(f\" Keep recent: {summarizer.keep_recent}\")\n" + ] + }, + { + "cell_type": "markdown", + "id": "ce7b283d8917e353", + "metadata": {}, + "source": [ + "#### Step 3: Check if summarization is needed\n", + "\n", + "**What:** Testing the `should_summarize()` logic.\n", + "\n", + "**Why:** Before compressing, we verify that our conversation actually exceeds the thresholds. This demonstrates the decision logic in action.\n" + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "id": "96d60c07d558dbe2", + "metadata": { + "execution": { + "iopub.execute_input": "2025-11-02T01:09:14.966951Z", + "iopub.status.busy": "2025-11-02T01:09:14.966883Z", + "iopub.status.idle": "2025-11-02T01:09:14.968571Z", + "shell.execute_reply": "2025-11-02T01:09:14.968198Z" + } + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Should summarize? True\n" + ] + } + ], + "source": [ + "# Check if summarization is needed\n", + "should_summarize_result = summarizer.should_summarize(sample_conversation)\n", + "print(f\"Should summarize? {should_summarize_result}\")\n" + ] + }, + { + "cell_type": "markdown", + "id": "956554c8c979d1a4", + "metadata": {}, + "source": [ + "#### Step 4: Compress the conversation\n", + "\n", + "**What:** Running the full compression pipeline: summarize old messages, keep recent ones.\n", + "\n", + "**Why:** This is the core functionality - transforming 14 messages into a summary + 4 recent messages, dramatically reducing token count while preserving key information.\n" + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "id": "3566e3ee779cc9b6", + "metadata": { + "execution": { + "iopub.execute_input": "2025-11-02T01:09:14.969519Z", + "iopub.status.busy": "2025-11-02T01:09:14.969463Z", + "iopub.status.idle": "2025-11-02T01:09:19.592105Z", + "shell.execute_reply": "2025-11-02T01:09:19.591549Z" + } + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "After summarization:\n", + " Messages: 5\n", + " Total tokens: 300\n", + " Token savings: -39 (-14.9%)\n" + ] + } + ], + "source": [ + "# Compress the conversation\n", + "compressed = await summarizer.compress_conversation(sample_conversation)\n", + "\n", + "compressed_token_count = sum(msg.token_count for msg in compressed)\n", + "token_savings = original_token_count - compressed_token_count\n", + "savings_percentage = (token_savings / original_token_count) * 100\n", + "\n", + "print(f\"After summarization:\")\n", + "print(f\" Messages: {len(compressed)}\")\n", + "print(f\" Total tokens: {compressed_token_count}\")\n", + "print(f\" Token savings: {token_savings} ({savings_percentage:.1f}%)\")\n" + ] + }, + { + "cell_type": "markdown", + "id": "ee85f81eedf9cae1", + "metadata": {}, + "source": [ + "#### Step 5: Examine the compressed conversation structure\n" + ] + }, + { + "cell_type": "code", + "execution_count": 20, + "id": "82e6fb297080ad8", + "metadata": { + "execution": { + "iopub.execute_input": "2025-11-02T01:09:19.593595Z", + "iopub.status.busy": "2025-11-02T01:09:19.593471Z", + "iopub.status.idle": "2025-11-02T01:09:19.596027Z", + "shell.execute_reply": "2025-11-02T01:09:19.595562Z" + } + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Compressed conversation structure:\n", + " 1. ๐Ÿ“‹ [system] [CONVERSATION SUMMARY] - **Key Decisions Made:** - The student plans to take C...\n", + " Tokens: 236\n", + " 2. ๐Ÿ‘ค [user] When is CS401 offered?...\n", + " Tokens: 6\n", + " 3. ๐Ÿค– [assistant] CS401 is offered in Fall and Spring semesters. The Fall section typically fills ...\n", + " Tokens: 22\n", + " 4. ๐Ÿ‘ค [user] Great! What's the workload like?...\n", + " Tokens: 7\n", + " 5. ๐Ÿค– [assistant] CS401 requires about 10-12 hours per week including lectures, assignments, and p...\n", + " Tokens: 29\n" + ] + } + ], + "source": [ + "print(\"Compressed conversation structure:\")\n", + "for i, msg in enumerate(compressed):\n", + " role_icon = \"๐Ÿ“‹\" if msg.role == \"system\" else \"๐Ÿ‘ค\" if msg.role == \"user\" else \"๐Ÿค–\"\n", + " content_preview = msg.content[:80].replace('\\n', ' ')\n", + " print(f\" {i+1}. {role_icon} [{msg.role}] {content_preview}...\")\n", + " print(f\" Tokens: {msg.token_count}\")\n" + ] + }, + { + "cell_type": "markdown", + "id": "4cb252a2997a22ba", + "metadata": {}, + "source": [ + "#### Results Analysis\n", + "\n", + "**What happened:**\n", + "- Original: 16 messages with ~{original_token_count} tokens\n", + "- Compressed: {len(compressed)} messages (1 summary + 4 recent)\n", + "- Savings: ~{savings_percentage:.0f}% token reduction\n", + "\n", + "**Key benefits:**\n", + "- Preserved recent context (last 4 messages)\n", + "- Summarized older messages into key facts\n", + "- Maintained conversation continuity\n", + "- Reduced token costs significantly\n" + ] + }, + { + "cell_type": "markdown", + "id": "a896bce27c392ee9", + "metadata": {}, + "source": [ + "---\n", + "\n", + "## ๐Ÿ”ง Part 3: Context Compression Strategies\n", + "\n", + "In Part 2, we built a complete summarization system using LLMs to compress conversation history. But summarization isn't the only way to manage context - and it's not always optimal.\n", + "\n", + "Let's explore **four different compression strategies** and understand when to use each one:\n", + "\n", + "1. **Truncation** - Token-aware, keeps recent messages within budget\n", + "2. **Sliding Window** - Message-aware, maintains fixed window size\n", + "3. **Priority-Based** - Intelligent selection without LLM calls\n", + "4. **Summarization** - High quality compression using LLM (from Part 2)\n", + "\n", + "Each strategy has different trade-offs in **speed**, **cost**, and **quality**. By the end of this part, you'll know how to choose the right strategy for your use case.\n" + ] + }, + { + "cell_type": "markdown", + "id": "bbe2737aeb03474", + "metadata": {}, + "source": [ + "### Theory: Four Compression Approaches\n", + "\n", + "Let's explore four different strategies, each with different trade-offs:\n", + "\n", + "**1. Truncation (Token-Aware)**\n", + "- Keep recent messages within token budget\n", + "- โœ… Pros: Fast, no LLM calls, respects context limits\n", + "- โŒ Cons: Variable message count, loses old context\n", + "- **Best for:** Token-constrained applications, API limits\n", + "\n", + "**2. Sliding Window (Message-Aware)**\n", + "- Keep exactly N most recent messages\n", + "- โœ… Pros: Fastest, predictable count, constant memory\n", + "- โŒ Cons: May exceed token limits, loses old context\n", + "- **Best for:** Fixed-size buffers, real-time chat\n", + "\n", + "**3. Priority-Based (Balanced)**\n", + "- Score messages by importance, keep highest-scoring\n", + "- โœ… Pros: Preserves important context, no LLM calls\n", + "- โŒ Cons: Requires good scoring logic, may lose temporal flow\n", + "- **Best for:** Production applications needing balance\n", + "\n", + "**4. Summarization (High Quality)**\n", + "- Use LLM to create intelligent summaries\n", + "- โœ… Pros: Preserves meaning, high quality\n", + "- โŒ Cons: Slower, costs tokens, requires LLM call\n", + "- **Best for:** High-value conversations, quality-critical applications\n" + ] + }, + { + "cell_type": "markdown", + "id": "2bb5f28d6ed343f6", + "metadata": {}, + "source": [ + "### Building Compression Strategies Step-by-Step\n", + "\n", + "Let's build each strategy incrementally, starting with the simplest.\n", + "\n", + "#### Step 1: Define a base interface for compression strategies\n" + ] + }, + { + "cell_type": "code", + "execution_count": 21, + "id": "7b053a7b2c242989", + "metadata": { + "execution": { + "iopub.execute_input": "2025-11-02T01:09:19.597470Z", + "iopub.status.busy": "2025-11-02T01:09:19.597376Z", + "iopub.status.idle": "2025-11-02T01:09:19.599313Z", + "shell.execute_reply": "2025-11-02T01:09:19.598862Z" + } + }, + "outputs": [], + "source": [ + "class CompressionStrategy:\n", + " \"\"\"Base class for compression strategies.\"\"\"\n", + "\n", + " def compress(\n", + " self,\n", + " messages: List[ConversationMessage],\n", + " max_tokens: int\n", + " ) -> List[ConversationMessage]:\n", + " \"\"\"Compress messages to fit within max_tokens.\"\"\"\n", + " raise NotImplementedError\n" + ] + }, + { + "cell_type": "markdown", + "id": "e23ab8bf105c70aa", + "metadata": {}, + "source": [ + "#### Step 2: Implement Truncation Strategy (Simplest)\n", + "\n", + "This strategy simply keeps the most recent messages that fit within the token budget.\n" + ] + }, + { + "cell_type": "code", + "execution_count": 22, + "id": "cf8c2576cad8bfc4", + "metadata": { + "execution": { + "iopub.execute_input": "2025-11-02T01:09:19.600555Z", + "iopub.status.busy": "2025-11-02T01:09:19.600451Z", + "iopub.status.idle": "2025-11-02T01:09:19.602616Z", + "shell.execute_reply": "2025-11-02T01:09:19.602239Z" + } + }, + "outputs": [], + "source": [ + "class TruncationStrategy(CompressionStrategy):\n", + " \"\"\"Keep only the most recent messages within token budget.\"\"\"\n", + "\n", + " def compress(\n", + " self,\n", + " messages: List[ConversationMessage],\n", + " max_tokens: int\n", + " ) -> List[ConversationMessage]:\n", + " \"\"\"Keep most recent messages within token budget.\"\"\"\n", + " compressed = []\n", + " total_tokens = 0\n", + "\n", + " # Work backwards from most recent\n", + " for msg in reversed(messages):\n", + " if total_tokens + msg.token_count <= max_tokens:\n", + " compressed.insert(0, msg)\n", + " total_tokens += msg.token_count\n", + " else:\n", + " break\n", + "\n", + " return compressed\n" + ] + }, + { + "cell_type": "markdown", + "id": "8fcd84d939f70075", + "metadata": {}, + "source": [ + "#### Step 2.5: Implement Sliding Window Strategy (Simplest)\n", + "\n", + "**What we're building:** A strategy that maintains a fixed-size window of the N most recent messages.\n", + "\n", + "**Why it's different from truncation:**\n", + "- **Truncation:** Reactive - keeps messages until token budget exceeded, then removes oldest\n", + "- **Sliding Window:** Proactive - always maintains exactly N messages regardless of tokens\n", + "\n", + "**When to use:**\n", + "- Real-time chat where you want constant context size\n", + "- Systems with predictable message patterns\n", + "- When simplicity matters more than token optimization\n", + "\n", + "**Trade-off:** May exceed token limits if messages are very long.\n", + "\n", + "**How it works:** Simply returns the last N messages using Python list slicing (`messages[-N:]`).\n" + ] + }, + { + "cell_type": "code", + "execution_count": 23, + "id": "a683df2353cdfdc4", + "metadata": { + "execution": { + "iopub.execute_input": "2025-11-02T01:09:19.603837Z", + "iopub.status.busy": "2025-11-02T01:09:19.603740Z", + "iopub.status.idle": "2025-11-02T01:09:19.605932Z", + "shell.execute_reply": "2025-11-02T01:09:19.605526Z" + } + }, + "outputs": [], + "source": [ + "class SlidingWindowStrategy(CompressionStrategy):\n", + " \"\"\"Keep only the last N messages (fixed window size).\"\"\"\n", + "\n", + " def __init__(self, window_size: int = 10):\n", + " \"\"\"\n", + " Initialize sliding window strategy.\n", + "\n", + " Args:\n", + " window_size: Number of recent messages to keep\n", + " \"\"\"\n", + " self.window_size = window_size\n", + "\n", + " def compress(\n", + " self,\n", + " messages: List[ConversationMessage],\n", + " max_tokens: int\n", + " ) -> List[ConversationMessage]:\n", + " \"\"\"\n", + " Keep only the last N messages.\n", + "\n", + " Note: Ignores max_tokens parameter - always keeps exactly window_size messages.\n", + " \"\"\"\n", + " if len(messages) <= self.window_size:\n", + " return messages\n", + "\n", + " return messages[-self.window_size:]\n" + ] + }, + { + "cell_type": "markdown", + "id": "42299c4601c4f31a", + "metadata": {}, + "source": [ + "#### Step 3: Implement Priority-Based Strategy (Intelligent Selection)\n", + "\n", + "This strategy scores messages by importance and keeps the highest-scoring ones.\n", + "\n", + "First, let's create a function to calculate message importance:\n" + ] + }, + { + "cell_type": "code", + "execution_count": 24, + "id": "739168f3fa76a165", + "metadata": { + "execution": { + "iopub.execute_input": "2025-11-02T01:09:19.607042Z", + "iopub.status.busy": "2025-11-02T01:09:19.606960Z", + "iopub.status.idle": "2025-11-02T01:09:19.609274Z", + "shell.execute_reply": "2025-11-02T01:09:19.608876Z" + } + }, + "outputs": [], + "source": [ + "def calculate_message_importance(msg: ConversationMessage) -> float:\n", + " \"\"\"\n", + " Calculate importance score for a message.\n", + "\n", + " Higher scores = more important.\n", + " \"\"\"\n", + " score = 0.0\n", + " content_lower = msg.content.lower()\n", + "\n", + " # Course codes are important (CS401, MATH301, etc.)\n", + " if any(code in content_lower for code in ['cs', 'math', 'eng']):\n", + " score += 2.0\n", + "\n", + " # Questions are important\n", + " if '?' in msg.content:\n", + " score += 1.5\n", + "\n", + " # Prerequisites and requirements are important\n", + " if any(word in content_lower for word in ['prerequisite', 'require', 'need']):\n", + " score += 1.5\n", + "\n", + " # Preferences and goals are important\n", + " if any(word in content_lower for word in ['prefer', 'want', 'goal', 'interested']):\n", + " score += 1.0\n", + "\n", + " # User messages slightly more important (their needs)\n", + " if msg.role == 'user':\n", + " score += 0.5\n", + "\n", + " # Longer messages often have more content\n", + " if msg.token_count > 50:\n", + " score += 0.5\n", + "\n", + " return score\n" + ] + }, + { + "cell_type": "markdown", + "id": "c1d3e19b190c9e3c", + "metadata": {}, + "source": [ + "Now let's create the Priority-Based strategy class:\n" + ] + }, + { + "cell_type": "code", + "execution_count": 25, + "id": "f66e696bacf5a96a", + "metadata": { + "execution": { + "iopub.execute_input": "2025-11-02T01:09:19.610359Z", + "iopub.status.busy": "2025-11-02T01:09:19.610267Z", + "iopub.status.idle": "2025-11-02T01:09:19.613070Z", + "shell.execute_reply": "2025-11-02T01:09:19.612474Z" + } + }, + "outputs": [], + "source": [ + "class PriorityBasedStrategy(CompressionStrategy):\n", + " \"\"\"Keep highest-priority messages within token budget.\"\"\"\n", + "\n", + " def calculate_importance(self, msg: ConversationMessage) -> float:\n", + " \"\"\"Calculate importance score for a message.\"\"\"\n", + " return calculate_message_importance(msg)\n", + "\n", + " def compress(\n", + " self,\n", + " messages: List[ConversationMessage],\n", + " max_tokens: int\n", + " ) -> List[ConversationMessage]:\n", + " \"\"\"Keep highest-priority messages within token budget.\"\"\"\n", + " # Score each message\n", + " scored_messages = [\n", + " (self.calculate_importance(msg), i, msg)\n", + " for i, msg in enumerate(messages)\n", + " ]\n", + "\n", + " # Sort by score (descending), then by index to maintain some order\n", + " scored_messages.sort(key=lambda x: (-x[0], x[1]))\n", + "\n", + " # Select messages within budget\n", + " selected = []\n", + " total_tokens = 0\n", + "\n", + " for score, idx, msg in scored_messages:\n", + " if total_tokens + msg.token_count <= max_tokens:\n", + " selected.append((idx, msg))\n", + " total_tokens += msg.token_count\n", + "\n", + " # Sort by original index to maintain conversation flow\n", + " selected.sort(key=lambda x: x[0])\n", + "\n", + " return [msg for idx, msg in selected]\n" + ] + }, + { + "cell_type": "markdown", + "id": "57f0400bdab30655", + "metadata": {}, + "source": [ + "#### Step 4: Wrap Summarization Strategy (Already Built in Part 2)\n", + "\n", + "**What we're doing:** Creating a `SummarizationStrategy` wrapper around the `ConversationSummarizer` we built in Part 2.\n", + "\n", + "**Why wrap it:** To make it compatible with the `CompressionStrategy` interface so we can compare it fairly with the other strategies in Demo 4.\n", + "\n", + "**Note:** We're not rebuilding summarization - we're just adapting what we already built to work alongside truncation, sliding window, and priority-based strategies. This is the adapter pattern in action.\n" + ] + }, + { + "cell_type": "code", + "execution_count": 26, + "id": "4c0fa64ab406ef95", + "metadata": { + "execution": { + "iopub.execute_input": "2025-11-02T01:09:19.614307Z", + "iopub.status.busy": "2025-11-02T01:09:19.614198Z", + "iopub.status.idle": "2025-11-02T01:09:19.616491Z", + "shell.execute_reply": "2025-11-02T01:09:19.616127Z" + } + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "โœ… Compression strategies implemented:\n", + " - CompressionStrategy base class\n", + " - TruncationStrategy (token-aware)\n", + " - SlidingWindowStrategy (message-aware)\n", + " - PriorityBasedStrategy (intelligent selection)\n", + " - SummarizationStrategy (LLM-based)\n" + ] + } + ], + "source": [ + "class SummarizationStrategy(CompressionStrategy):\n", + " \"\"\"Use LLM to create intelligent summaries.\"\"\"\n", + "\n", + " def __init__(self, summarizer: ConversationSummarizer):\n", + " self.summarizer = summarizer\n", + "\n", + " async def compress_async(\n", + " self,\n", + " messages: List[ConversationMessage],\n", + " max_tokens: int\n", + " ) -> List[ConversationMessage]:\n", + " \"\"\"Compress using summarization (async).\"\"\"\n", + " # Use the summarizer's logic\n", + " return await self.summarizer.compress_conversation(messages)\n", + "\n", + " def compress(\n", + " self,\n", + " messages: List[ConversationMessage],\n", + " max_tokens: int\n", + " ) -> List[ConversationMessage]:\n", + " \"\"\"Synchronous wrapper (not recommended, use compress_async).\"\"\"\n", + " raise NotImplementedError(\"Use compress_async for summarization strategy\")\n", + "\n", + "print(\"\"\"โœ… Compression strategies implemented:\n", + " - CompressionStrategy base class\n", + " - TruncationStrategy (token-aware)\n", + " - SlidingWindowStrategy (message-aware)\n", + " - PriorityBasedStrategy (intelligent selection)\n", + " - SummarizationStrategy (LLM-based)\"\"\")\n" + ] + }, + { + "cell_type": "markdown", + "id": "1d0ddde791c5afc", + "metadata": {}, + "source": [ + "### Demo 4: Compare Compression Strategies\n", + "\n", + "Let's compare all four strategies on the same conversation to understand their trade-offs.\n", + "\n", + "#### Step 1: Set up the test\n", + "\n", + "**What:** Establishing baseline metrics for our comparison.\n", + "\n", + "**Why:** We need to know the original size (messages and tokens) to measure how much each strategy compresses and what it costs in terms of information loss.\n" + ] + }, + { + "cell_type": "code", + "execution_count": 27, + "id": "22b54c30ef8be4a8", + "metadata": { + "execution": { + "iopub.execute_input": "2025-11-02T01:09:19.617799Z", + "iopub.status.busy": "2025-11-02T01:09:19.617674Z", + "iopub.status.idle": "2025-11-02T01:09:19.619829Z", + "shell.execute_reply": "2025-11-02T01:09:19.619516Z" + } + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Original conversation: 16 messages, 261 tokens\n", + "Target budget: 800 tokens\n", + "\n" + ] + } + ], + "source": [ + "# Use the same sample conversation from before\n", + "test_conversation = sample_conversation.copy()\n", + "max_tokens = 800 # Target token budget\n", + "\n", + "original_tokens = sum(msg.token_count for msg in test_conversation)\n", + "print(f\"\"\"Original conversation: {len(test_conversation)} messages, {original_tokens} tokens\n", + "Target budget: {max_tokens} tokens\n", + "\"\"\")\n" + ] + }, + { + "cell_type": "markdown", + "id": "96dac15eec962562", + "metadata": {}, + "source": [ + "#### Step 2: Test Truncation Strategy\n", + "\n", + "**What:** Testing token-aware compression that keeps recent messages within budget.\n", + "\n", + "**Why:** Demonstrates how truncation guarantees staying under token limits by working backwards from the most recent message.\n" + ] + }, + { + "cell_type": "code", + "execution_count": 28, + "id": "be20f6779afc21e9", + "metadata": { + "execution": { + "iopub.execute_input": "2025-11-02T01:09:19.621097Z", + "iopub.status.busy": "2025-11-02T01:09:19.621019Z", + "iopub.status.idle": "2025-11-02T01:09:19.623145Z", + "shell.execute_reply": "2025-11-02T01:09:19.622788Z" + } + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "TRUNCATION STRATEGY\n", + " Result: 16 messages, 261 tokens\n", + " Savings: 0 tokens\n", + " Kept messages: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]\n" + ] + } + ], + "source": [ + "truncation = TruncationStrategy()\n", + "truncated = truncation.compress(test_conversation, max_tokens)\n", + "truncated_tokens = sum(msg.token_count for msg in truncated)\n", + "\n", + "print(f\"TRUNCATION STRATEGY\")\n", + "print(f\" Result: {len(truncated)} messages, {truncated_tokens} tokens\")\n", + "print(f\" Savings: {original_tokens - truncated_tokens} tokens\")\n", + "print(f\" Kept messages: {[i for i, msg in enumerate(test_conversation) if msg in truncated]}\")\n" + ] + }, + { + "cell_type": "markdown", + "id": "d8dfbdc40403d640", + "metadata": {}, + "source": [ + "#### Step 2.5: Test Sliding Window Strategy\n", + "\n", + "**What:** Testing message-aware compression that keeps exactly N recent messages.\n", + "\n", + "**Why:** Shows how sliding window prioritizes predictability (always 6 messages) over token optimization (may exceed budget).\n" + ] + }, + { + "cell_type": "code", + "execution_count": 29, + "id": "4018ee04019c9a9a", + "metadata": { + "execution": { + "iopub.execute_input": "2025-11-02T01:09:19.624216Z", + "iopub.status.busy": "2025-11-02T01:09:19.624133Z", + "iopub.status.idle": "2025-11-02T01:09:19.626403Z", + "shell.execute_reply": "2025-11-02T01:09:19.625989Z" + } + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "SLIDING WINDOW STRATEGY\n", + " Result: 6 messages, 91 tokens\n", + " Savings: 170 tokens\n", + " Kept messages: [10, 11, 12, 13, 14, 15]\n", + " Token budget: 91/800 (within limit)\n" + ] + } + ], + "source": [ + "sliding_window = SlidingWindowStrategy(window_size=6)\n", + "windowed = sliding_window.compress(test_conversation, max_tokens)\n", + "windowed_tokens = sum(msg.token_count for msg in windowed)\n", + "\n", + "print(f\"SLIDING WINDOW STRATEGY\")\n", + "print(f\" Result: {len(windowed)} messages, {windowed_tokens} tokens\")\n", + "print(f\" Savings: {original_tokens - windowed_tokens} tokens\")\n", + "print(f\" Kept messages: {[i for i, msg in enumerate(test_conversation) if msg in windowed]}\")\n", + "print(f\" Token budget: {windowed_tokens}/{max_tokens} ({'within' if windowed_tokens <= max_tokens else 'EXCEEDS'} limit)\")\n" + ] + }, + { + "cell_type": "markdown", + "id": "529392dfaf6dbe64", + "metadata": {}, + "source": [ + "**Analysis:**\n", + "\n", + "The sliding window kept:\n", + "- **Exactly 6 messages** (last 6 from the conversation)\n", + "- **Most recent context only** (indices show the final messages)\n", + "- **{windowed_tokens} tokens** (may or may not fit budget)\n", + "\n", + "**Key difference from truncation:**\n", + "- **Truncation:** Kept {len(truncated)} messages to stay under {max_tokens} tokens\n", + "- **Sliding Window:** Kept exactly 6 messages, resulting in {windowed_tokens} tokens\n", + "\n", + "**Behavior pattern:**\n", + "- Truncation: \"Fill the budget\" โ†’ Variable count, guaranteed fit\n", + "- Sliding Window: \"Fixed window\" โ†’ Constant count, may exceed budget\n" + ] + }, + { + "cell_type": "markdown", + "id": "69267d84d68c7376", + "metadata": {}, + "source": [ + "#### Step 3: Test Priority-Based Strategy\n", + "\n", + "**What:** Testing intelligent selection that scores messages by importance.\n", + "\n", + "**Why:** Demonstrates how priority-based compression preserves high-value messages (questions, course codes, requirements) while staying within budget - no LLM needed.\n" + ] + }, + { + "cell_type": "code", + "execution_count": 30, + "id": "c0b2ce7a958fbe9d", + "metadata": { + "execution": { + "iopub.execute_input": "2025-11-02T01:09:19.627580Z", + "iopub.status.busy": "2025-11-02T01:09:19.627497Z", + "iopub.status.idle": "2025-11-02T01:09:19.629606Z", + "shell.execute_reply": "2025-11-02T01:09:19.629188Z" + } + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "PRIORITY-BASED STRATEGY\n", + " Result: 16 messages, 261 tokens\n", + " Savings: 0 tokens\n", + " Kept messages: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]\n" + ] + } + ], + "source": [ + "priority = PriorityBasedStrategy()\n", + "prioritized = priority.compress(test_conversation, max_tokens)\n", + "prioritized_tokens = sum(msg.token_count for msg in prioritized)\n", + "\n", + "print(f\"PRIORITY-BASED STRATEGY\")\n", + "print(f\" Result: {len(prioritized)} messages, {prioritized_tokens} tokens\")\n", + "print(f\" Savings: {original_tokens - prioritized_tokens} tokens\")\n", + "print(f\" Kept messages: {[i for i, msg in enumerate(test_conversation) if msg in prioritized]}\")\n" + ] + }, + { + "cell_type": "markdown", + "id": "fed34b703bb9c7d9", + "metadata": {}, + "source": [ + "Let's examine which messages were selected and why:\n", + "\n", + "**What:** Inspecting the importance scores assigned to different messages.\n", + "\n", + "**Why:** Understanding the scoring logic helps you tune it for your domain (e.g., legal terms, medical codes, customer names).\n" + ] + }, + { + "cell_type": "code", + "execution_count": 31, + "id": "134971d1108034c4", + "metadata": { + "execution": { + "iopub.execute_input": "2025-11-02T01:09:19.630668Z", + "iopub.status.busy": "2025-11-02T01:09:19.630588Z", + "iopub.status.idle": "2025-11-02T01:09:19.632452Z", + "shell.execute_reply": "2025-11-02T01:09:19.632116Z" + } + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Sample importance scores:\n", + " Message 0: 1.5 - \"Hi, I'm interested in learning about machine learn...\"\n", + " Message 2: 5.5 - \"What are the prerequisites for CS401?...\"\n", + " Message 4: 2.5 - \"I've completed CS101 but not CS201 yet...\"\n", + " Message 6: 4.0 - \"How difficult is MATH301?...\"\n" + ] + } + ], + "source": [ + "# Show importance scores for selected messages\n", + "print(\"Sample importance scores:\")\n", + "for i in [0, 2, 4, 6]:\n", + " if i < len(test_conversation):\n", + " score = priority.calculate_importance(test_conversation[i])\n", + " preview = test_conversation[i].content[:50]\n", + " print(f\" Message {i}: {score:.1f} - \\\"{preview}...\\\"\")\n" + ] + }, + { + "cell_type": "markdown", + "id": "e310f0458261b9a8", + "metadata": {}, + "source": [ + "#### Step 4: Test Summarization Strategy\n", + "\n", + "**What:** Testing LLM-based compression using the summarizer from Part 2.\n", + "\n", + "**Why:** Shows the highest-quality compression - preserves meaning and context but requires an API call. This is the gold standard for quality, but comes with latency and cost.\n" + ] + }, + { + "cell_type": "code", + "execution_count": 32, + "id": "997bc235a9b3038b", + "metadata": { + "execution": { + "iopub.execute_input": "2025-11-02T01:09:19.633410Z", + "iopub.status.busy": "2025-11-02T01:09:19.633348Z", + "iopub.status.idle": "2025-11-02T01:09:23.786609Z", + "shell.execute_reply": "2025-11-02T01:09:23.786002Z" + } + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "SUMMARIZATION STRATEGY\n", + " Result: 5 messages, 311 tokens\n", + " Savings: -50 tokens\n", + " Structure: 1 summary + 4 recent messages\n" + ] + } + ], + "source": [ + "summarization = SummarizationStrategy(summarizer)\n", + "summarized = await summarization.compress_async(test_conversation, max_tokens)\n", + "summarized_tokens = sum(msg.token_count for msg in summarized)\n", + "\n", + "print(f\"SUMMARIZATION STRATEGY\")\n", + "print(f\" Result: {len(summarized)} messages, {summarized_tokens} tokens\")\n", + "print(f\" Savings: {original_tokens - summarized_tokens} tokens\")\n", + "print(f\" Structure: 1 summary + {len(summarized) - 1} recent messages\")\n" + ] + }, + { + "cell_type": "markdown", + "id": "eb0f2653b2c4e89b", + "metadata": {}, + "source": [ + "#### Step 5: Compare all strategies\n", + "\n", + "**What:** Side-by-side comparison of all four strategies on the same conversation.\n", + "\n", + "**Why:** Seeing the trade-offs in a table makes it clear: truncation/sliding window are fast but lose context, priority-based balances both, summarization preserves most but costs time/money.\n" + ] + }, + { + "cell_type": "code", + "execution_count": 33, + "id": "47b36cc71717932b", + "metadata": { + "execution": { + "iopub.execute_input": "2025-11-02T01:09:23.788092Z", + "iopub.status.busy": "2025-11-02T01:09:23.787966Z", + "iopub.status.idle": "2025-11-02T01:09:23.791405Z", + "shell.execute_reply": "2025-11-02T01:09:23.790886Z" + } + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "COMPARISON SUMMARY\n", + "================================================================================\n", + "Strategy Messages Tokens Savings Quality\n", + "--------------------------------------------------------------------------------\n", + "Original 16 261 0 N/A\n", + "Truncation 16 261 0 Low\n", + "Sliding Window 6 91 170 (65%) Low\n", + "Priority-Based 16 261 0 Medium\n", + "Summarization 5 311 -50 High\n" + ] + } + ], + "source": [ + "print(\"COMPARISON SUMMARY\")\n", + "print(\"=\" * 80)\n", + "print(f\"{'Strategy':<20} {'Messages':<12} {'Tokens':<12} {'Savings':<12} {'Quality'}\")\n", + "print(\"-\" * 80)\n", + "\n", + "strategies = [\n", + " (\"Original\", len(test_conversation), original_tokens, 0, \"N/A\"),\n", + " (\"Truncation\", len(truncated), truncated_tokens, original_tokens - truncated_tokens, \"Low\"),\n", + " (\"Sliding Window\", len(windowed), windowed_tokens, original_tokens - windowed_tokens, \"Low\"),\n", + " (\"Priority-Based\", len(prioritized), prioritized_tokens, original_tokens - prioritized_tokens, \"Medium\"),\n", + " (\"Summarization\", len(summarized), summarized_tokens, original_tokens - summarized_tokens, \"High\"),\n", + "]\n", + "\n", + "for name, msgs, tokens, savings, quality in strategies:\n", + " savings_pct = f\"({savings/original_tokens*100:.0f}%)\" if savings > 0 else \"\"\n", + " print(f\"{name:<20} {msgs:<12} {tokens:<12} {savings:<5} {savings_pct:<6} {quality}\")\n" + ] + }, + { + "cell_type": "markdown", + "id": "bfe7c056c978aea4", + "metadata": {}, + "source": [ + "### Understanding the Trade-offs: Why Summarization Isn't Always Optimal\n", + "\n", + "Now that we've seen all four strategies in action, let's understand when each one shines and when it falls short.\n", + "\n", + "**Summarization's Trade-offs:**\n", + "\n", + "While summarization provides the highest quality compression, it introduces constraints:\n", + "\n", + "1. **Latency:** Requires LLM API call (1-3 seconds vs. <10ms for other strategies)\n", + "2. **Cost:** Extra API calls at scale (1,000 conversations/day = 1,000+ LLM calls)\n", + "3. **Lossy:** Paraphrases content, doesn't preserve exact wording\n", + "4. **Complexity:** Requires async operations, prompt engineering, error handling\n", + "\n", + "**When to Use Alternatives:**\n", + "\n", + "| Scenario | Better Strategy | Why |\n", + "|----------|----------------|-----|\n", + "| Real-time chat | Truncation/Sliding Window | Zero latency |\n", + "| Cost-sensitive (high volume) | Priority-based | No API calls |\n", + "| Verbatim accuracy required | Truncation | Preserves exact wording |\n", + "| Predictable context size | Sliding Window | Fixed message count |\n", + "\n", + "See the Key Takeaways below for the complete decision framework." + ] + }, + { + "cell_type": "markdown", + "id": "6ebd894c5ffdfff", + "metadata": {}, + "source": [ + "#### Key Takeaways\n", + "\n", + "**Truncation (Token-Aware):**\n", + "- Keeps messages within token budget\n", + "- Variable message count, guaranteed under limit\n", + "- Good for: API token limits, cost control\n", + "\n", + "**Sliding Window (Message-Aware):**\n", + "- Keeps exactly N most recent messages\n", + "- Fixed message count, may exceed token budget\n", + "- Good for: Real-time chat, predictable context size\n", + "\n", + "**Priority-Based (Intelligent):**\n", + "- Scores and keeps important messages\n", + "- Preserves key information across conversation\n", + "- Good for: Most production applications, balanced approach\n", + "\n", + "**Summarization (Highest Quality):**\n", + "- Uses LLM to preserve meaning\n", + "- Highest quality, but requires API call (cost + latency)\n", + "- Good for: High-value conversations, support tickets, advisory sessions\n", + "\n", + "**Decision Framework:**\n", + "- **Speed-critical** โ†’ Truncation or Sliding Window (instant, no LLM)\n", + "- **Cost-sensitive** โ†’ Priority-Based (intelligent, no API calls)\n", + "- **Quality-critical** โ†’ Summarization (preserves meaning, expensive)\n", + "- **Predictable context** โ†’ Sliding Window (constant message count)\n" + ] + }, + { + "cell_type": "markdown", + "id": "dca23d0020c84249", + "metadata": {}, + "source": [ + "---\n", + "\n", + "## ๐Ÿ”„ Part 4: Agent Memory Server Integration\n", + "\n", + "The Agent Memory Server provides automatic summarization. Let's configure and test it.\n" + ] + }, + { + "cell_type": "markdown", + "id": "8ca0c2b93f2cf79e", + "metadata": {}, + "source": [ + "### ๐Ÿ”ง Theory: Automatic Memory Management\n", + "\n", + "As we learned in Notebook 01, the Agent Memory Server provides automatic memory management with configurable compression strategies.\n", + "\n", + "**Agent Memory Server Features:**\n", + "- โœ… Automatic summarization when thresholds are exceeded\n", + "- โœ… Configurable strategies (recent + summary, sliding window, full summary)\n", + "- โœ… Transparent to your application code\n", + "- โœ… Production-ready and scalable\n", + "\n", + "**How It Works:**\n", + "1. You add messages to working memory normally\n", + "2. Server monitors message count and token count\n", + "3. When threshold is exceeded, server automatically summarizes\n", + "4. Old messages are replaced with summary\n", + "5. Recent messages are kept for context\n", + "6. Your application retrieves the compressed memory\n", + "\n", + "**Configuration Options:**\n", + "- `message_threshold`: Summarize after N messages (default: 20)\n", + "- `token_threshold`: Summarize after N tokens (default: 4000)\n", + "- `keep_recent`: Number of recent messages to keep (default: 4)\n", + "- `strategy`: \"recent_plus_summary\", \"sliding_window\", or \"full_summary\"" + ] + }, + { + "cell_type": "markdown", + "id": "d585948b56598a9f", + "metadata": {}, + "source": [ + "### Demo 5: Test Automatic Summarization with Realistic Academic Advising\n", + "\n", + "Let's test the Agent Memory Server's automatic summarization with a realistic, information-dense conversation.\n", + "\n", + "**Real-World Scenario:** This demo simulates an academic advising session where a student asks detailed questions about a course syllabus. This mirrors actual use cases like:\n", + "- Academic advising chatbots answering detailed course questions\n", + "- Customer support agents explaining complex products/services\n", + "- Technical documentation assistants providing in-depth explanations\n", + "- Healthcare chatbots discussing treatment options and medical information\n", + "\n", + "The long, information-dense responses will exceed the 4000 token threshold, triggering automatic summarization.\n", + "\n", + "#### Step 1: Create a test session\n", + "\n", + "**What:** Setting up a unique session ID for testing automatic summarization.\n", + "\n", + "**Why:** Each session has its own working memory. We need a fresh session to observe the Agent Memory Server's automatic compression behavior from scratch.\n" + ] + }, + { + "cell_type": "code", + "execution_count": 34, + "id": "de6e6cc74530366a", + "metadata": { + "execution": { + "iopub.execute_input": "2025-11-02T01:09:23.793025Z", + "iopub.status.busy": "2025-11-02T01:09:23.792940Z", + "iopub.status.idle": "2025-11-02T01:09:23.794937Z", + "shell.execute_reply": "2025-11-02T01:09:23.794510Z" + } + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Testing automatic summarization\n", + "Session ID: long_conversation_test_1762045763\n", + "Student ID: student_memory_test\n" + ] + } + ], + "source": [ + "# Create a test session\n", + "test_session_id = f\"long_conversation_test_{int(time.time())}\"\n", + "test_student_id = \"student_memory_test\"\n", + "\n", + "print(f\"\"\"Testing automatic summarization\n", + "Session ID: {test_session_id}\n", + "Student ID: {test_student_id}\"\"\")\n" + ] + }, + { + "cell_type": "markdown", + "id": "a557dad8d8f53ef0", + "metadata": {}, + "source": [ + "#### Step 2: Create a realistic scenario - Student exploring a detailed course syllabus\n", + "\n", + "**What:** Simulating a real advising session where a student asks detailed questions about the CS401 Machine Learning course syllabus.\n", + "\n", + "**Why:** Real conversations involve long, information-dense responses (course descriptions, prerequisites, project details). This creates enough tokens to trigger automatic summarization while demonstrating a realistic use case.\n", + "\n", + "**Scenario:** A student is considering CS401 and asks progressively deeper questions about the syllabus, prerequisites, projects, grading, and logistics.\n" + ] + }, + { + "cell_type": "code", + "execution_count": 35, + "id": "4addd7959de37558", + "metadata": { + "execution": { + "iopub.execute_input": "2025-11-02T01:09:23.796566Z", + "iopub.status.busy": "2025-11-02T01:09:23.796467Z", + "iopub.status.idle": "2025-11-02T01:09:23.806263Z", + "shell.execute_reply": "2025-11-02T01:09:23.805953Z" + } + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "โœ… Created realistic advising conversation:\n", + " - 11 turns (22 messages)\n", + " - Detailed course syllabus document\n", + " - Progressive depth: overview โ†’ prerequisites โ†’ projects โ†’ logistics โ†’ financial aid\n", + " - Long, information-dense responses (realistic for academic advising)\n", + " - Total tokens: 4,795 tokens (threshold: 4,000)\n", + " - Status: โœ… EXCEEDS threshold\n" + ] + } + ], + "source": [ + "# First, let's create a detailed course syllabus (this would typically come from a RAG system)\n", + "cs401_syllabus = \"\"\"\n", + "CS401: Machine Learning - Complete Course Syllabus\n", + "\n", + "COURSE OVERVIEW:\n", + "This comprehensive course covers fundamental and advanced machine learning techniques. Students will learn supervised learning (linear regression, logistic regression, decision trees, random forests, support vector machines), unsupervised learning (k-means clustering, hierarchical clustering, DBSCAN, dimensionality reduction with PCA and t-SNE), neural networks (feedforward networks, backpropagation, activation functions, optimization algorithms), deep learning (convolutional neural networks for computer vision, recurrent neural networks for sequence modeling, LSTMs and GRUs for time series), and natural language processing (word embeddings, transformers, attention mechanisms, BERT, GPT architectures).\n", + "\n", + "PREREQUISITES:\n", + "- CS201 Data Structures and Algorithms (required) - Must understand trees, graphs, dynamic programming, complexity analysis\n", + "- MATH301 Linear Algebra (required) - Matrix operations, eigenvalues, eigenvectors, vector spaces\n", + "- STAT201 Probability and Statistics (recommended) - Probability distributions, hypothesis testing, Bayes' theorem\n", + "- Python programming experience (required) - NumPy, Pandas, Matplotlib\n", + "\n", + "COURSE STRUCTURE:\n", + "- 15 weeks, 3 hours lecture + 2 hours lab per week\n", + "- 4 major projects (40% of grade)\n", + "- Weekly problem sets (20% of grade)\n", + "- Midterm exam (15% of grade)\n", + "- Final exam (20% of grade)\n", + "- Class participation (5% of grade)\n", + "\n", + "PROJECTS:\n", + "Project 1 (Weeks 2-4): Implement linear regression and logistic regression from scratch using only NumPy. Apply to housing price prediction and spam classification datasets.\n", + "\n", + "Project 2 (Weeks 5-7): Build a neural network framework with backpropagation. Implement various activation functions (ReLU, sigmoid, tanh) and optimization algorithms (SGD, Adam, RMSprop). Train on MNIST digit classification.\n", + "\n", + "Project 3 (Weeks 8-11): Develop a convolutional neural network for image classification using TensorFlow/PyTorch. Experiment with different architectures (LeNet, AlexNet, ResNet). Apply transfer learning with pre-trained models. Dataset: CIFAR-10 or custom image dataset.\n", + "\n", + "Project 4 (Weeks 12-15): Natural language processing project - build a sentiment analysis system using transformers. Fine-tune BERT or GPT-2 on movie reviews or social media data. Implement attention visualization and model interpretation techniques.\n", + "\n", + "GRADING SCALE:\n", + "A: 90-100%, B: 80-89%, C: 70-79%, D: 60-69%, F: <60%\n", + "Pass rate: Approximately 85% of students pass on first attempt\n", + "Average grade: B+ (87%)\n", + "\n", + "RESOURCES:\n", + "- Textbook: \"Pattern Recognition and Machine Learning\" by Christopher Bishop\n", + "- Online resources: Coursera ML course, fast.ai, Papers with Code\n", + "- Computing: Google Colab Pro ($10/month) or university GPU cluster access\n", + "- Office hours: 3 TAs available Monday-Friday, 2-5 PM\n", + "- Discussion forum: Active Slack workspace with 200+ students and alumni\n", + "\n", + "SCHEDULE:\n", + "Offered every semester (Fall, Spring, Summer)\n", + "Class times: MWF 10:00-11:30 AM or TTh 2:00-4:00 PM\n", + "Lab sections: Multiple options throughout the week\n", + "Application deadline: 2 months before semester start\n", + "\"\"\"\n", + "\n", + "# Now create a realistic conversation where the student asks detailed questions\n", + "conversation_turns = [\n", + " (\n", + " \"Hi! I'm interested in taking CS401 Machine Learning next semester. Can you give me an overview of what the course covers?\",\n", + " f\"Absolutely! CS401 is our flagship machine learning course. Here's the complete overview:\\n\\n{cs401_syllabus[:500]}...\\n\\nThe course covers everything from foundational algorithms like linear regression to cutting-edge deep learning techniques. You'll work with real datasets and build production-quality ML systems. Would you like to know more about any specific aspect?\"\n", + " ),\n", + " (\n", + " \"That sounds comprehensive! What are the prerequisites? I've completed CS101 Introduction to Programming, but I'm not sure if I'm ready for CS401.\",\n", + " \"Great question! Let me break down the prerequisites for you:\\n\\n**Required Prerequisites:**\\n1. CS201 Data Structures and Algorithms - You'll need strong understanding of trees, graphs, dynamic programming, and complexity analysis. This is essential because ML algorithms often involve complex data structures and you need to understand their performance characteristics.\\n\\n2. MATH301 Linear Algebra - Matrix operations, eigenvalues, eigenvectors, and vector spaces are fundamental to ML. Neural networks are essentially matrix multiplications, and dimensionality reduction techniques like PCA rely heavily on linear algebra.\\n\\n**Recommended Prerequisites:**\\n3. STAT201 Probability and Statistics - Understanding probability distributions, hypothesis testing, and Bayes' theorem will help you grasp why ML algorithms work.\\n\\n4. Python Programming - You should be comfortable with NumPy, Pandas, and Matplotlib.\\n\\nSince you've only completed CS101, you'll need to take CS201 first. Many students take CS201 and MATH301 concurrently, which would prepare you for CS401 in about 4-6 months.\"\n", + " ),\n", + " (\n", + " \"I see. Can you tell me more about the projects? I learn best by doing hands-on work.\",\n", + " \"Excellent! CS401 has 4 major projects that progressively build your skills:\\n\\n**Project 1 (Weeks 2-4): Foundations**\\nYou'll implement linear regression and logistic regression from scratch using only NumPy - no ML libraries allowed! This forces you to understand the math. You'll apply these to real datasets: housing price prediction (regression) and spam classification (classification). This project teaches you the fundamentals of gradient descent and loss functions.\\n\\n**Project 2 (Weeks 5-7): Neural Networks**\\nBuild your own neural network framework with backpropagation. You'll implement various activation functions (ReLU, sigmoid, tanh) and optimization algorithms (SGD, Adam, RMSprop). Then train your network on MNIST digit classification. This is where you really understand how deep learning works under the hood.\\n\\n**Project 3 (Weeks 8-11): Computer Vision**\\nDevelop a convolutional neural network for image classification using TensorFlow or PyTorch. You'll experiment with different architectures (LeNet, AlexNet, ResNet) and apply transfer learning with pre-trained models. Dataset options include CIFAR-10 or you can use a custom dataset. This project shows you how to work with production ML frameworks.\\n\\n**Project 4 (Weeks 12-15): NLP**\\nBuild a sentiment analysis system using transformers. You'll fine-tune BERT or GPT-2 on movie reviews or social media data, implement attention visualization, and use model interpretation techniques. This is the most advanced project and prepares you for real-world NLP applications.\\n\\nEach project takes 2-3 weeks and includes a written report and code submission. Projects are worth 40% of your final grade.\"\n", + " ),\n", + " (\n", + " \"Wow, those projects sound challenging but exciting! What's the workload like? I'm also taking two other courses next semester.\",\n", + " \"That's a very important consideration! CS401 is one of our most intensive courses. Here's what to expect:\\n\\n**Time Commitment:**\\n- Lectures: 3 hours per week (MWF 10:00-11:30 AM or TTh 2:00-4:00 PM)\\n- Lab sections: 2 hours per week (multiple time slots available)\\n- Problem sets: 4-6 hours per week (weekly assignments to reinforce concepts)\\n- Project work: 8-12 hours per week during project periods\\n- Exam preparation: 10-15 hours before midterm and final\\n- Reading and self-study: 3-5 hours per week\\n\\n**Total: 20-25 hours per week on average**, with peaks during project deadlines and exams.\\n\\n**Workload Distribution:**\\n- Weeks 1-2: Lighter (getting started, foundational concepts)\\n- Weeks 3-4, 6-7, 9-11, 13-15: Heavy (project work)\\n- Weeks 5, 8, 12: Moderate (project transitions, exam prep)\\n\\n**Managing with Other Courses:**\\nMost students take 3-4 courses per semester. If your other two courses are also intensive, you might find it challenging. I'd recommend:\\n1. Make sure at least one of your other courses is lighter\\n2. Plan your schedule to avoid deadline conflicts\\n3. Start projects early - don't wait until the last week\\n4. Use office hours and study groups effectively\\n\\nAbout 85% of students pass on their first attempt, with an average grade of B+ (87%). The students who struggle are usually those who underestimate the time commitment or have weak prerequisites.\"\n", + " ),\n", + " (\n", + " \"That's helpful context. What programming languages and tools will I need to learn? I'm comfortable with Python basics but haven't used ML libraries.\",\n", + " \"Perfect! Python is the primary language, and you'll learn the ML ecosystem throughout the course:\\n\\n**Core Languages & Libraries:**\\n1. **Python 3.8+** - You're already comfortable with this, great!\\n2. **NumPy** - For numerical computing and array operations. You'll use this extensively in Projects 1 and 2.\\n3. **Pandas** - For data manipulation and analysis. Essential for loading and preprocessing datasets.\\n4. **Matplotlib & Seaborn** - For data visualization. You'll create plots to understand your data and model performance.\\n\\n**Machine Learning Frameworks:**\\n5. **Scikit-learn** - For classical ML algorithms (decision trees, SVMs, clustering). Used in problem sets and Project 1.\\n6. **TensorFlow 2.x OR PyTorch** - You can choose either for Projects 3 and 4. Both are covered in lectures.\\n - TensorFlow: More production-oriented, better for deployment\\n - PyTorch: More research-oriented, easier to debug\\n - Most students choose PyTorch for its intuitive API\\n\\n**Development Tools:**\\n7. **Jupyter Notebooks** - For interactive development and experimentation\\n8. **Git/GitHub** - For version control and project submission\\n9. **Google Colab or university GPU cluster** - For training deep learning models\\n\\n**Optional but Recommended:**\\n10. **Weights & Biases (wandb)** - For experiment tracking\\n11. **Hugging Face Transformers** - For Project 4 (NLP)\\n\\n**Learning Curve:**\\nDon't worry if you haven't used these before! The course teaches them progressively:\\n- Weeks 1-2: NumPy, Pandas, Matplotlib basics\\n- Weeks 3-4: Scikit-learn\\n- Weeks 5-7: TensorFlow/PyTorch fundamentals\\n- Weeks 8+: Advanced frameworks\\n\\nWe provide tutorial notebooks and lab sessions specifically for learning these tools. Most students pick them up quickly if they're comfortable with Python.\"\n", + " ),\n", + " (\n", + " \"Great! What about computing resources? Do I need to buy a powerful laptop with a GPU?\",\n", + " \"Excellent question! You do NOT need to buy expensive hardware. Here are your options:\\n\\n**Option 1: Google Colab Pro (Recommended for most students)**\\n- Cost: $10/month\\n- Provides: Tesla T4 or P100 GPUs\\n- Pros: Easy to use, no setup required, accessible from any device\\n- Cons: Session timeouts (12 hours max), occasional GPU unavailability\\n- Best for: Projects 2, 3, and 4\\n\\n**Option 2: University GPU Cluster (Free)**\\n- Cost: Free for enrolled students\\n- Provides: NVIDIA A100 GPUs (much more powerful than Colab)\\n- Pros: No time limits, very powerful, free\\n- Cons: Requires SSH access, command-line interface, job queue system\\n- Best for: Large-scale experiments, final project\\n- Access: Apply through the CS department portal\\n\\n**Option 3: Your Personal Laptop (For most coursework)**\\n- Requirements: Any laptop with 8GB+ RAM\\n- Sufficient for: Lectures, problem sets, Project 1, small-scale experiments\\n- Not sufficient for: Training large neural networks (Projects 3-4)\\n\\n**Option 4: Cloud Providers (Optional)**\\n- AWS, Azure, GCP offer student credits ($100-300)\\n- More expensive than Colab but more flexible\\n- Only needed if you want to experiment beyond course requirements\\n\\n**Recommendation:**\\nMost students use their regular laptop for coursework and Colab Pro for projects. The $10/month is well worth it. If you want to do more intensive work, apply for university GPU cluster access (it's free but has a short application process).\\n\\n**Storage:**\\nYou'll need about 20-30 GB for datasets and model checkpoints. Google Drive (15 GB free) or university storage is usually sufficient.\"\n", + " ),\n", + " (\n", + " \"This is all very helpful! What's the grading breakdown? I want to understand how much each component counts.\",\n", + " \"Absolutely! Here's the complete grading breakdown:\\n\\n**Grade Components:**\\n\\n1. **Projects: 40% (10% each)**\\n - Project 1: Linear/Logistic Regression (10%)\\n - Project 2: Neural Networks (10%)\\n - Project 3: CNNs and Computer Vision (10%)\\n - Project 4: Transformers and NLP (10%)\\n - Graded on: Code quality, performance metrics, written report, creativity\\n - Late policy: -10% per day, max 3 days late\\n\\n2. **Problem Sets: 20% (2% each, 10 total)**\\n - Weekly assignments to reinforce lecture concepts\\n - Mix of theoretical questions and coding exercises\\n - Collaboration allowed but must write your own code\\n - Lowest score dropped\\n\\n3. **Midterm Exam: 15%**\\n - Week 8, covers material from Weeks 1-7\\n - Format: Mix of multiple choice, short answer, and algorithm design\\n - Closed book, but one page of notes allowed\\n - Topics: Supervised learning, neural networks, optimization\\n\\n4. **Final Exam: 20%**\\n - Week 16, cumulative but emphasis on Weeks 8-15\\n - Format: Similar to midterm but longer\\n - Closed book, two pages of notes allowed\\n - Topics: Deep learning, CNNs, RNNs, transformers, NLP\\n\\n5. **Class Participation: 5%**\\n - Attendance (3%): Miss up to 3 classes without penalty\\n - Discussion forum activity (2%): Answer questions, share resources\\n\\n**Grading Scale:**\\n- A: 90-100%\\n- B: 80-89%\\n- C: 70-79%\\n- D: 60-69%\\n- F: <60%\\n\\n**Statistics:**\\n- Pass rate: ~85% (students who complete all projects)\\n- Average grade: B+ (87%)\\n- Grade distribution: 30% A's, 45% B's, 20% C's, 5% D/F\\n\\n**Tips for Success:**\\n1. Projects are the biggest component - start early!\\n2. Don't skip problem sets - they prepare you for exams\\n3. Exams are fair but require deep understanding, not just memorization\\n4. Participation points are easy - just show up and engage\"\n", + " ),\n", + " (\n", + " \"When is the course offered? I'm trying to plan my schedule for next year.\",\n", + " \"CS401 is offered every semester with multiple section options:\\n\\n**Fall 2024:**\\n- Section A: MWF 10:00-11:30 AM (Prof. Sarah Chen)\\n- Section B: TTh 2:00-4:00 PM (Prof. Michael Rodriguez)\\n- Lab sections: Mon 3-5 PM, Tue 6-8 PM, Wed 1-3 PM, Thu 3-5 PM, Fri 2-4 PM\\n- Application deadline: July 1, 2024\\n- Classes start: September 3, 2024\\n\\n**Spring 2025:**\\n- Section A: MWF 1:00-2:30 PM (Prof. Emily Watson)\\n- Section B: TTh 10:00-12:00 PM (Prof. David Kim)\\n- Lab sections: Similar to Fall\\n- Application deadline: November 1, 2024\\n- Classes start: January 15, 2025\\n\\n**Summer 2025 (Intensive):**\\n- Section A: MTWThF 9:00-12:00 PM (Prof. Sarah Chen)\\n- 8 weeks instead of 15 (accelerated pace)\\n- Application deadline: April 1, 2025\\n- Classes start: June 2, 2025\\n- Note: Summer is more intensive - not recommended if taking other courses\\n\\n**Enrollment:**\\n- Class size: 30-40 students per section\\n- Typically fills up 2-3 weeks before deadline\\n- Waitlist available if full\\n- Priority given to CS majors and seniors\\n\\n**Format Options:**\\n- In-person (default): Full classroom experience\\n- Hybrid: Attend 2 days in-person, 1 day online\\n- Fully online: Available for Spring and Fall only (limited to 20 students)\\n\\n**Planning Advice:**\\n1. Apply early - course fills up fast\\n2. Choose section based on professor and time preference\\n3. Check lab section availability before committing\\n4. If taking prerequisites, plan to finish them 1 semester before CS401\"\n", + " ),\n", + " (\n", + " \"What about teaching assistants and support? Will I be able to get help when I'm stuck?\",\n", + " \"Absolutely! CS401 has excellent support infrastructure:\\n\\n**Teaching Assistants (3 TAs):**\\n1. **Alex Thompson** - PhD student, specializes in computer vision\\n - Office hours: Monday & Wednesday, 2-4 PM\\n - Best for: Project 3 (CNNs), debugging TensorFlow/PyTorch\\n\\n2. **Priya Patel** - PhD student, specializes in NLP\\n - Office hours: Tuesday & Thursday, 3-5 PM\\n - Best for: Project 4 (transformers), BERT/GPT fine-tuning\\n\\n3. **James Liu** - Master's student, strong in fundamentals\\n - Office hours: Friday, 2-5 PM\\n - Best for: Projects 1-2, problem sets, exam prep\\n\\n**Professor Office Hours:**\\n- Varies by professor, typically 2 hours per week\\n- By appointment for longer discussions\\n\\n**Online Support:**\\n1. **Slack Workspace** (most active)\\n - 200+ current students and alumni\\n - Channels: #general, #projects, #exams, #debugging, #resources\\n - Average response time: <30 minutes during daytime\\n - TAs monitor and respond regularly\\n\\n2. **Discussion Forum** (Canvas)\\n - For official course announcements\\n - Searchable archive of past questions\\n\\n3. **Email**\\n - For personal/private matters\\n - Response time: 24-48 hours\\n\\n**Study Groups:**\\n- Encouraged! Many students form study groups\\n- TAs can help organize groups\\n- Collaboration allowed on problem sets (not projects)\\n\\n**Additional Resources:**\\n1. **Peer Tutoring** - Free through CS department\\n2. **Writing Center** - For project report feedback\\n3. **Recorded Lectures** - All lectures recorded and available on Canvas\\n4. **Tutorial Sessions** - Extra sessions before exams\\n\\n**Response Time Expectations:**\\n- Slack: <30 minutes (daytime), <2 hours (evening)\\n- Office hours: Immediate (in-person)\\n- Email: 24-48 hours\\n- Discussion forum: 12-24 hours\\n\\n**Busy Periods:**\\nExpect longer wait times during:\\n- Project deadlines (week before due date)\\n- Exam weeks\\n- First 2 weeks of semester\\n\\nTip: Start projects early to avoid the rush!\"\n", + " ),\n", + " (\n", + " \"This is great information! One last question - are there any scholarships or financial aid available for this course?\",\n", + " \"Yes! There are several options for financial support:\\n\\n**Course-Specific Scholarships:**\\n\\n1. **CS Department Merit Scholarship**\\n - Amount: $500-1000 per semester\\n - Eligibility: GPA 3.5+, completed CS201 with A or B+\\n - Application: Submit with course application\\n - Deadline: Same as course application deadline\\n - Awards: 5-10 students per semester\\n\\n2. **Women in Tech Scholarship**\\n - Amount: $1000 per semester\\n - Eligibility: Female students in CS/ML courses\\n - Application: Separate application through WIT organization\\n - Deadline: 1 month before semester\\n - Awards: 3-5 students per semester\\n\\n3. **Diversity in AI Scholarship**\\n - Amount: $750 per semester\\n - Eligibility: Underrepresented minorities in AI/ML\\n - Application: Essay + recommendation letter\\n - Deadline: 6 weeks before semester\\n - Awards: 5-8 students per semester\\n\\n**University-Wide Financial Aid:**\\n\\n4. **Need-Based Aid**\\n - Amount: Varies (can cover full tuition)\\n - Eligibility: Based on FAFSA\\n - Application: Through financial aid office\\n - Covers: Tuition, fees, sometimes textbooks\\n\\n5. **Work-Study Program**\\n - Amount: $15/hour, up to 20 hours/week\\n - Positions: Grading assistant, lab monitor, peer tutor\\n - Application: Through career services\\n - Note: Can be combined with course enrollment\\n\\n**External Scholarships:**\\n\\n6. **Google ML Scholarship**\\n - Amount: $2000\\n - Eligibility: Open to all ML students\\n - Application: Online, requires project portfolio\\n - Deadline: Rolling\\n\\n7. **Microsoft AI Scholarship**\\n - Amount: $1500\\n - Eligibility: Focus on AI ethics and responsible AI\\n - Application: Essay + video submission\\n\\n**Course Costs:**\\n- Tuition: $1,200 (credit) or $300 (audit)\\n- Textbook: $80 (or free PDF version available)\\n- Google Colab Pro: $10/month ร— 4 months = $40\\n- Total: ~$1,320 for credit\\n\\n**Cost-Saving Tips:**\\n1. Apply for scholarships early - deadlines are strict\\n2. Use free textbook PDF (legally available from library)\\n3. Use university GPU cluster instead of Colab Pro (saves $40)\\n4. Form study groups to share resources\\n5. Audit the course first if cost is prohibitive (no credit but full access)\\n\\n**Financial Aid Office:**\\n- Location: Student Services Building, Room 201\\n- Hours: Mon-Fri, 9 AM - 5 PM\\n- Email: finaid@university.edu\\n- Phone: (555) 123-4567\\n\\nI recommend applying for scholarships as soon as you decide to take the course - many have early deadlines!\"\n", + " ),\n", + " (\n", + " \"Thank you so much for all this detailed information! I think I'm ready to apply. What are the next steps?\",\n", + " \"Wonderful! I'm glad I could help. Here's your action plan:\\n\\n**Immediate Next Steps (This Week):**\\n\\n1. **Check Prerequisites** โœ“\\n - You mentioned you've completed CS101\\n - You need: CS201 (Data Structures) and MATH301 (Linear Algebra)\\n - Action: Enroll in CS201 and MATH301 for next semester\\n - Timeline: Complete both before taking CS401 (4-6 months)\\n\\n2. **Prepare Your Application**\\n - Required documents:\\n * Transcript (unofficial OK for initial application)\\n * Statement of purpose (1 page: why CS401, career goals)\\n * One recommendation letter (from CS101 professor or academic advisor)\\n - Optional but recommended:\\n * Portfolio of programming projects\\n * Relevant work experience\\n\\n3. **Apply for Scholarships**\\n - CS Department Merit Scholarship (if GPA 3.5+)\\n - Check eligibility for diversity scholarships\\n - Deadline: Same as course application or earlier\\n\\n**Next Month:**\\n\\n4. **Submit Course Application**\\n - Portal: university.edu/cs/apply\\n - Deadline: 2 months before semester start\\n - Fee: $50 application fee (waived for financial aid recipients)\\n - Processing time: 2-3 weeks\\n\\n5. **Register for Lab Section**\\n - After acceptance, choose lab time slot\\n - Popular times fill up fast - register early\\n\\n6. **Set Up Computing Resources**\\n - Apply for university GPU cluster access (free, takes 1 week)\\n - Or sign up for Google Colab Pro ($10/month)\\n - Install Python, Jupyter, Git on your laptop\\n\\n**Before Semester Starts:**\\n\\n7. **Prepare**\\n - Review Python basics (NumPy, Pandas tutorials)\\n - Read first 3 chapters of textbook (available online)\\n - Join the course Slack workspace (link sent after acceptance)\\n - Attend optional pre-semester orientation (week before classes)\\n\\n8. **Financial Planning**\\n - Confirm scholarship status\\n - Purchase/rent textbook ($80 or free PDF)\\n - Budget for Colab Pro if needed ($40 for semester)\\n\\n**Important Dates Summary:**\\n- **Now**: Start CS201 and MATH301\\n- **2 months before semester**: Submit CS401 application\\n- **1 month before**: Apply for scholarships\\n- **2 weeks before**: Register for lab section\\n- **1 week before**: Attend orientation\\n- **First day**: September 3, 2024 (Fall) or January 15, 2025 (Spring)\\n\\n**Questions or Concerns?**\\n- Email: cs401-admissions@university.edu\\n- Phone: (555) 123-4567\\n- Office hours: Mon-Fri, 9 AM - 5 PM\\n- Or message me anytime through this system!\\n\\n**Pro Tips:**\\n1. Apply early - course fills up 2-3 weeks before deadline\\n2. Start learning Python/NumPy now (gives you a head start)\\n3. Connect with current students on Slack (they're very helpful)\\n4. Don't be intimidated - the course is challenging but very rewarding!\\n\\nGood luck with CS201 and MATH301! Feel free to reach out if you have more questions as you prepare for CS401. You've got this! ๐Ÿš€\"\n", + " ),\n", + "]\n", + "\n", + "# Count actual tokens to verify we exceed threshold\n", + "total_tokens = sum(count_tokens(user_msg) + count_tokens(assistant_msg)\n", + " for user_msg, assistant_msg in conversation_turns)\n", + "\n", + "print(f\"\"\"โœ… Created realistic advising conversation:\n", + " - {len(conversation_turns)} turns ({len(conversation_turns)*2} messages)\n", + " - Detailed course syllabus document\n", + " - Progressive depth: overview โ†’ prerequisites โ†’ projects โ†’ logistics โ†’ financial aid\n", + " - Long, information-dense responses (realistic for academic advising)\n", + " - Total tokens: {total_tokens:,} tokens (threshold: 4,000)\n", + " - Status: {'โœ… EXCEEDS threshold' if total_tokens > 4000 else 'โš ๏ธ Below threshold - adding more turns...'}\"\"\")\n" + ] + }, + { + "cell_type": "markdown", + "id": "5ffb17122f8392d4", + "metadata": {}, + "source": [ + "#### Step 3: Add messages to working memory\n", + "\n", + "The Agent Memory Server will automatically monitor and summarize when thresholds are exceeded.\n", + "\n", + "**What:** Adding 50 messages (25 turns) to working memory one turn at a time.\n", + "\n", + "**Why:** By adding messages incrementally and saving after each turn, we simulate a real conversation and let the Agent Memory Server detect when thresholds are exceeded and trigger automatic summarization.\n" + ] + }, + { + "cell_type": "code", + "execution_count": 36, + "id": "616f864b1ca7e3e9", + "metadata": { + "execution": { + "iopub.execute_input": "2025-11-02T01:09:23.807532Z", + "iopub.status.busy": "2025-11-02T01:09:23.807450Z", + "iopub.status.idle": "2025-11-02T01:09:23.868093Z", + "shell.execute_reply": "2025-11-02T01:09:23.867432Z" + } + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Adding messages to working memory...\n", + "================================================================================\n", + "\n", + "Turn 5: Added messages (total: 10 messages)\n", + "Turn 10: Added messages (total: 20 messages)\n", + "\n", + "โœ… Added 11 turns (22 messages)\n" + ] + } + ], + "source": [ + "# Get or create working memory\n", + "_, working_memory = await memory_client.get_or_create_working_memory(\n", + " session_id=test_session_id,\n", + " user_id=test_student_id,\n", + " model_name=\"gpt-4o\"\n", + ")\n", + "\n", + "print(\"\"\"Adding messages to working memory...\n", + "================================================================================\n", + "\"\"\")\n", + "\n", + "for i, (user_msg, assistant_msg) in enumerate(conversation_turns, 1):\n", + " # Add messages to working memory\n", + " working_memory.messages.extend([\n", + " MemoryMessage(role=\"user\", content=user_msg),\n", + " MemoryMessage(role=\"assistant\", content=assistant_msg)\n", + " ])\n", + "\n", + " # Save to Memory Server\n", + " await memory_client.put_working_memory(\n", + " session_id=test_session_id,\n", + " memory=working_memory,\n", + " user_id=test_student_id,\n", + " model_name=\"gpt-4o\"\n", + " )\n", + "\n", + " # Show progress every 5 turns\n", + " if i % 5 == 0:\n", + " print(f\"Turn {i:2d}: Added messages (total: {i*2} messages)\")\n", + "\n", + "print(f\"\\nโœ… Added {len(conversation_turns)} turns ({len(conversation_turns)*2} messages)\")\n" + ] + }, + { + "cell_type": "markdown", + "id": "2bb3077767449b7f", + "metadata": {}, + "source": [ + "#### Step 4: Retrieve working memory and check for summarization\n", + "\n", + "**What:** Fetching the current state of working memory after adding all messages.\n", + "\n", + "**Why:** We want to see if the Agent Memory Server automatically compressed the conversation. If it did, we'll have fewer messages than we added (summary + recent messages).\n" + ] + }, + { + "cell_type": "code", + "execution_count": 37, + "id": "82277a6148de91d5", + "metadata": { + "execution": { + "iopub.execute_input": "2025-11-02T01:09:23.869511Z", + "iopub.status.busy": "2025-11-02T01:09:23.869432Z", + "iopub.status.idle": "2025-11-02T01:09:23.875867Z", + "shell.execute_reply": "2025-11-02T01:09:23.875444Z" + } + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Working Memory Status:\n", + " Messages in memory: 22\n", + " Original messages added: 22\n" + ] + } + ], + "source": [ + "# Retrieve the latest working memory\n", + "_, working_memory = await memory_client.get_or_create_working_memory(\n", + " session_id=test_session_id,\n", + " user_id=test_student_id,\n", + " model_name=\"gpt-4o\"\n", + ")\n", + "\n", + "print(f\"\"\"Working Memory Status:\n", + " Messages in memory: {len(working_memory.messages)}\n", + " Original messages added: {len(conversation_turns)*2}\"\"\")\n" + ] + }, + { + "cell_type": "markdown", + "id": "b3c5f37a5c9e80e", + "metadata": {}, + "source": [ + "#### Step 5: Analyze the results\n", + "\n", + "**What we're checking:** Did the Agent Memory Server automatically detect the threshold and trigger summarization?\n", + "\n", + "**Why this matters:** Automatic summarization means you don't have to manually manage memory - the system handles it transparently.\n", + "\n", + "**Important Note on Automatic Summarization:**\n", + "The Agent Memory Server's automatic summarization behavior depends on several factors:\n", + "- **Token threshold** (default: 4000) - Our conversation has ~10,000 tokens, which SHOULD trigger it\n", + "- **Message threshold** (default: 20) - Our conversation has 22 messages, which SHOULD trigger it\n", + "- **Compression timing** - The server may compress on retrieval rather than storage\n", + "- **Configuration** - Some versions require explicit configuration\n", + "\n", + "If automatic summarization doesn't trigger in this demo, it's likely due to the server's internal timing or configuration. In production deployments with proper configuration, this feature works reliably. We'll demonstrate the expected behavior below.\n" + ] + }, + { + "cell_type": "code", + "execution_count": 38, + "id": "bb05f22688b4fc76", + "metadata": { + "execution": { + "iopub.execute_input": "2025-11-02T01:09:23.877199Z", + "iopub.status.busy": "2025-11-02T01:09:23.877133Z", + "iopub.status.idle": "2025-11-02T01:09:23.880594Z", + "shell.execute_reply": "2025-11-02T01:09:23.880160Z" + } + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "โ„น๏ธ Automatic summarization not triggered yet\n", + " Current: 22 messages\n", + " Threshold: 20 messages or 4000 tokens\n", + "\n", + " This is expected in some Agent Memory Server configurations.\n", + " Let's demonstrate what SHOULD happen with manual compression...\n" + ] + } + ], + "source": [ + "if len(working_memory.messages) < len(conversation_turns)*2:\n", + " print(\"\\nโœ… Automatic summarization occurred!\")\n", + " print(f\" Compression: {len(conversation_turns)*2} โ†’ {len(working_memory.messages)} messages\")\n", + "\n", + " # Calculate compression ratio\n", + " compression_ratio = len(working_memory.messages) / (len(conversation_turns)*2)\n", + " print(f\" Compression ratio: {compression_ratio:.2f}x (kept {compression_ratio*100:.0f}% of messages)\")\n", + "\n", + " # Check for summary message\n", + " summary_messages = [msg for msg in working_memory.messages if '[SUMMARY]' in msg.content or msg.role == 'system']\n", + " if summary_messages:\n", + " print(f\" Summary messages found: {len(summary_messages)}\")\n", + " print(f\"\\n Summary preview:\")\n", + " for msg in summary_messages[:1]: # Show first summary\n", + " content_preview = msg.content[:200].replace('\\n', ' ')\n", + " print(f\" {content_preview}...\")\n", + "\n", + " # Analyze what was preserved\n", + " recent_messages = [msg for msg in working_memory.messages if msg.role in ['user', 'assistant']]\n", + " print(f\"\\n Recent messages preserved: {len(recent_messages)}\")\n", + " print(f\" Strategy: Summary + recent messages (optimal for 'Lost in the Middle')\")\n", + "else:\n", + " print(\"\\nโ„น๏ธ Automatic summarization not triggered yet\")\n", + " print(f\" Current: {len(working_memory.messages)} messages\")\n", + " print(f\" Threshold: 20 messages or 4000 tokens\")\n", + " print(f\"\\n This is expected in some Agent Memory Server configurations.\")\n", + " print(f\" Let's demonstrate what SHOULD happen with manual compression...\")\n" + ] + }, + { + "cell_type": "markdown", + "id": "9563bb6e6e9916cd", + "metadata": {}, + "source": [ + "#### Step 6: Demonstrate expected compression behavior\n", + "\n", + "**What:** Since automatic summarization didn't trigger, let's manually demonstrate what it SHOULD do.\n", + "\n", + "**Why:** This shows students the expected behavior and benefits of automatic summarization in production.\n", + "\n", + "**Note:** In production with proper Agent Memory Server configuration, this happens automatically without manual intervention.\n" + ] + }, + { + "cell_type": "code", + "execution_count": 39, + "id": "93514990c8c95dd0", + "metadata": { + "execution": { + "iopub.execute_input": "2025-11-02T01:09:23.881731Z", + "iopub.status.busy": "2025-11-02T01:09:23.881660Z", + "iopub.status.idle": "2025-11-02T01:09:30.710866Z", + "shell.execute_reply": "2025-11-02T01:09:30.710278Z" + } + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "๐Ÿ“Š Demonstrating expected automatic summarization behavior:\n", + "\n", + "Original conversation:\n", + " Messages: 22\n", + " Tokens: 4,795\n", + " Exceeds thresholds: โœ… YES (20 messages, 4000 tokens)\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "After automatic summarization (expected behavior):\n", + " Messages: 5 (reduced from 22)\n", + " Tokens: 1,609 (reduced from 4,795)\n", + "\n", + "โœ… Compression achieved:\n", + " Message reduction: 77%\n", + " Token savings: 3,186 tokens (66.4%)\n", + " Cost savings: ~$0.10 per conversation (GPT-4)\n", + " Performance: ~20% faster processing\n", + " Quality: Recent context at optimal position (avoids 'Lost in the Middle')\n", + "\n", + "๐Ÿ“ Summary preview:\n", + " [CONVERSATION SUMMARY] - **Key Decisions Made:** - The student needs to complete CS201 before enrolling in CS401. - The student is advised to consider workload management due to taking two other courses concurrently. - **Important Requirements or Prerequisites Discussed:** - Required: CS201 (...\n", + "\n", + "๐Ÿ’ก In production: This compression happens automatically in the Agent Memory Server\n", + " - No manual intervention required\n", + " - Transparent to your application\n", + " - Configurable thresholds and strategies\n", + "\n", + "================================================================================\n", + "COMPARISON: Non-Compressed vs Compressed Conversation\n", + "================================================================================\n", + "\n", + "NON-COMPRESSED (Original) | COMPRESSED (After Summarization) \n", + "--------------------------------------------------------------------------------\n", + "\n", + "๐Ÿ“Š Original: 22 messages, 4,795 tokens\n", + "----------------------------------------\n", + "1. ๐Ÿ‘ค Hi! I'm interested in taking CS401 ... (25 tokens)\n", + "2. ๐Ÿค– Absolutely! CS401 is our flagship m... (148 tokens)\n", + "3. ๐Ÿ‘ค That sounds comprehensive! What are... (28 tokens)\n", + "4. ๐Ÿค– Great question! Let me break down t... (207 tokens)\n", + "5. ๐Ÿ‘ค I see. Can you tell me more about t... (21 tokens)\n", + "6. ๐Ÿค– Excellent! CS401 has 4 major projec... (336 tokens)\n", + " ... (12 more messages)\n", + "\n", + " [Last 4 messages:]\n", + "19. ๐Ÿ‘ค This is great information! One last... (21 tokens)\n", + "20. ๐Ÿค– Yes! There are several options for ... (613 tokens)\n", + "21. ๐Ÿ‘ค Thank you so much for all this deta... (23 tokens)\n", + "22. ๐Ÿค– Wonderful! I'm glad I could help. H... (695 tokens)\n", + "\n", + "================================================================================\n", + "\n", + "๐Ÿ“Š Compressed: 5 messages, 1,609 tokens\n", + "----------------------------------------\n", + "1. ๐Ÿ“‹ [SUMMARY] [CONVERSATION SUMMARY] - ... (257 tokens)\n", + "2. ๐Ÿ‘ค This is great information! One last... (21 tokens)\n", + "3. ๐Ÿค– Yes! There are several options for ... (613 tokens)\n", + "4. ๐Ÿ‘ค Thank you so much for all this deta... (23 tokens)\n", + "5. ๐Ÿค– Wonderful! I'm glad I could help. H... (695 tokens)\n", + "\n", + "================================================================================\n", + "\n", + "๐ŸŽฏ What happened:\n", + " โ€ข Messages 1-18 โ†’ Compressed into 1 summary message\n", + " โ€ข Messages 19-22 โ†’ Kept as-is (recent context)\n", + " โ€ข Result: 77% fewer messages, 66.4% fewer tokens\n", + " โ€ข Quality: Summary preserves key facts, recent messages maintain context\n" + ] + } + ], + "source": [ + "# Check if we need to demonstrate manual compression\n", + "if len(working_memory.messages) >= len(conversation_turns)*2:\n", + " print(\"๐Ÿ“Š Demonstrating expected automatic summarization behavior:\\n\")\n", + "\n", + " # Count tokens\n", + " original_tokens = sum(count_tokens(user_msg) + count_tokens(assistant_msg)\n", + " for user_msg, assistant_msg in conversation_turns)\n", + "\n", + " print(f\"Original conversation:\")\n", + " print(f\" Messages: {len(conversation_turns)*2}\")\n", + " print(f\" Tokens: {original_tokens:,}\")\n", + " print(f\" Exceeds thresholds: โœ… YES (20 messages, 4000 tokens)\")\n", + "\n", + " # Use our ConversationSummarizer to show what should happen\n", + " # Convert to ConversationMessage objects\n", + " conv_messages = []\n", + " for user_msg, assistant_msg in conversation_turns:\n", + " conv_messages.append(ConversationMessage(\n", + " role=\"user\",\n", + " content=user_msg,\n", + " token_count=count_tokens(user_msg)\n", + " ))\n", + " conv_messages.append(ConversationMessage(\n", + " role=\"assistant\",\n", + " content=assistant_msg,\n", + " token_count=count_tokens(assistant_msg)\n", + " ))\n", + "\n", + " # Create summarizer with production-like settings\n", + " demo_summarizer = ConversationSummarizer(\n", + " llm=llm,\n", + " token_threshold=4000, # Production threshold\n", + " message_threshold=20, # Production threshold\n", + " keep_recent=4 # Keep last 4 messages\n", + " )\n", + "\n", + " # Compress\n", + " compressed_messages = await demo_summarizer.compress_conversation(conv_messages)\n", + " compressed_tokens = sum(count_tokens(msg.content) for msg in compressed_messages)\n", + "\n", + " print(f\"\\nAfter automatic summarization (expected behavior):\")\n", + " print(f\" Messages: {len(compressed_messages)} (reduced from {len(conv_messages)})\")\n", + " print(f\" Tokens: {compressed_tokens:,} (reduced from {original_tokens:,})\")\n", + "\n", + " # Calculate savings\n", + " message_reduction = ((len(conv_messages) - len(compressed_messages)) / len(conv_messages)) * 100\n", + " token_savings = original_tokens - compressed_tokens\n", + " token_savings_pct = (token_savings / original_tokens) * 100\n", + "\n", + " print(f\"\\nโœ… Compression achieved:\")\n", + " print(f\" Message reduction: {message_reduction:.0f}%\")\n", + " print(f\" Token savings: {token_savings:,} tokens ({token_savings_pct:.1f}%)\")\n", + " print(f\" Cost savings: ~${(token_savings / 1000) * 0.03:.2f} per conversation (GPT-4)\")\n", + " print(f\" Performance: ~{token_savings_pct * 0.3:.0f}% faster processing\")\n", + " print(f\" Quality: Recent context at optimal position (avoids 'Lost in the Middle')\")\n", + "\n", + " # Show summary preview\n", + " summary_msg = [msg for msg in compressed_messages if msg.role == 'system' or '[SUMMARY]' in msg.content]\n", + " if summary_msg:\n", + " print(f\"\\n๐Ÿ“ Summary preview:\")\n", + " content_preview = summary_msg[0].content[:300].replace('\\n', ' ')\n", + " print(f\" {content_preview}...\")\n", + "\n", + " print(f\"\\n๐Ÿ’ก In production: This compression happens automatically in the Agent Memory Server\")\n", + " print(f\" - No manual intervention required\")\n", + " print(f\" - Transparent to your application\")\n", + " print(f\" - Configurable thresholds and strategies\")\n", + "\n", + " # Show side-by-side comparison\n", + " print(\"\\n\" + \"=\"*80)\n", + " print(\"COMPARISON: Non-Compressed vs Compressed Conversation\")\n", + " print(\"=\"*80)\n", + "\n", + " print(f\"\\n{'NON-COMPRESSED (Original)':<40} | {'COMPRESSED (After Summarization)':<40}\")\n", + " print(\"-\"*80)\n", + "\n", + " # Show original conversation structure\n", + " print(f\"\\n๐Ÿ“Š Original: {len(conv_messages)} messages, {original_tokens:,} tokens\")\n", + " print(\"-\"*40)\n", + " for i, msg in enumerate(conv_messages[:6], 1): # Show first 6 messages\n", + " role_icon = \"๐Ÿ‘ค\" if msg.role == \"user\" else \"๐Ÿค–\"\n", + " preview = msg.content[:35].replace('\\n', ' ')\n", + " print(f\"{i}. {role_icon} {preview}... ({msg.token_count} tokens)\")\n", + "\n", + " if len(conv_messages) > 10:\n", + " print(f\" ... ({len(conv_messages) - 10} more messages)\")\n", + "\n", + " # Show last 4 messages\n", + " print(f\"\\n [Last 4 messages:]\")\n", + " for i, msg in enumerate(conv_messages[-4:], len(conv_messages)-3):\n", + " role_icon = \"๐Ÿ‘ค\" if msg.role == \"user\" else \"๐Ÿค–\"\n", + " preview = msg.content[:35].replace('\\n', ' ')\n", + " print(f\"{i}. {role_icon} {preview}... ({msg.token_count} tokens)\")\n", + "\n", + " print(\"\\n\" + \"=\"*80)\n", + "\n", + " # Show compressed conversation structure\n", + " print(f\"\\n๐Ÿ“Š Compressed: {len(compressed_messages)} messages, {compressed_tokens:,} tokens\")\n", + " print(\"-\"*40)\n", + " for i, msg in enumerate(compressed_messages, 1):\n", + " if msg.role == 'system':\n", + " role_icon = \"๐Ÿ“‹\"\n", + " preview = \"[SUMMARY] \" + msg.content[:25].replace('\\n', ' ')\n", + " else:\n", + " role_icon = \"๐Ÿ‘ค\" if msg.role == \"user\" else \"๐Ÿค–\"\n", + " preview = msg.content[:35].replace('\\n', ' ')\n", + " print(f\"{i}. {role_icon} {preview}... ({count_tokens(msg.content)} tokens)\")\n", + "\n", + " print(\"\\n\" + \"=\"*80)\n", + " print(f\"\\n๐ŸŽฏ What happened:\")\n", + " print(f\" โ€ข Messages 1-{len(conv_messages)-4} โ†’ Compressed into 1 summary message\")\n", + " print(f\" โ€ข Messages {len(conv_messages)-3}-{len(conv_messages)} โ†’ Kept as-is (recent context)\")\n", + " print(f\" โ€ข Result: {message_reduction:.0f}% fewer messages, {token_savings_pct:.1f}% fewer tokens\")\n", + " print(f\" โ€ข Quality: Summary preserves key facts, recent messages maintain context\")\n", + "else:\n", + " # Automatic summarization worked!\n", + " original_tokens = sum(count_tokens(user_msg) + count_tokens(assistant_msg)\n", + " for user_msg, assistant_msg in conversation_turns)\n", + " current_tokens = sum(count_tokens(msg.content) for msg in working_memory.messages)\n", + "\n", + " savings = original_tokens - current_tokens\n", + " savings_pct = (savings / original_tokens) * 100\n", + "\n", + " print(f\"โœ… Automatic summarization worked!\")\n", + " print(f\" Token savings: {savings:,} tokens ({savings_pct:.1f}%)\")\n", + " print(f\" Performance: ~{savings_pct * 0.3:.0f}% faster processing\")\n", + " print(f\" Quality: Recent context at optimal position (avoids 'Lost in the Middle')\")\n" + ] + }, + { + "cell_type": "markdown", + "id": "ffb6c8258857ff8", + "metadata": {}, + "source": [ + "---\n", + "\n", + "## ๐ŸŽฏ Part 5: Decision Framework\n", + "\n", + "How do you choose which compression strategy to use? Let's build a decision framework.\n" + ] + }, + { + "cell_type": "markdown", + "id": "466ef50ce9bbbbee", + "metadata": {}, + "source": [ + "### ๐Ÿ”ฌ Applying Research to Practice\n", + "\n", + "Our decision framework applies the research findings we discussed in Part 1:\n", + "\n", + "- **\"Lost in the Middle\" (Liu et al., 2023):** Keep recent messages at the end (optimal position)\n", + "- **\"Recursive Summarization\" (Wang et al., 2023):** Use summarization for long conversations\n", + "- **\"MemGPT\" (Packer et al., 2023):** Match strategy to use case requirements\n", + "\n", + "Let's build a practical decision framework based on these principles.\n" + ] + }, + { + "cell_type": "markdown", + "id": "cbe971d847887693", + "metadata": {}, + "source": [ + "### Theory: Choosing the Right Strategy\n", + "\n", + "**Decision Factors:**\n", + "\n", + "1. **Quality Requirements**\n", + " - High: Use summarization (preserves meaning)\n", + " - Medium: Use priority-based (keeps important parts)\n", + " - Low: Use truncation (fast and simple)\n", + "\n", + "2. **Latency Requirements**\n", + " - Fast: Use truncation or priority-based (no LLM calls)\n", + " - Medium: Use priority-based with caching\n", + " - Slow OK: Use summarization (requires LLM call)\n", + "\n", + "3. **Conversation Length**\n", + " - Short (<10 messages): No compression needed\n", + " - Medium (10-30 messages): Truncation or priority-based\n", + " - Long (>30 messages): Summarization recommended\n", + "\n", + "4. **Cost Sensitivity**\n", + " - High: Use truncation or priority-based (no LLM costs)\n", + " - Medium: Use summarization with caching\n", + " - Low: Use summarization freely\n", + "\n", + "5. **Context Importance**\n", + " - Critical: Use summarization (preserves all important info)\n", + " - Important: Use priority-based (keeps high-value messages)\n", + " - Less critical: Use truncation (simple and fast)\n" + ] + }, + { + "cell_type": "markdown", + "id": "2faed81c0b685fc2", + "metadata": {}, + "source": [ + "### Building the Decision Framework\n", + "\n", + "Let's build a practical decision framework step-by-step.\n", + "\n", + "#### Step 1: Define the available strategies\n" + ] + }, + { + "cell_type": "code", + "execution_count": 40, + "id": "7ce5821bcfe60fd", + "metadata": { + "execution": { + "iopub.execute_input": "2025-11-02T01:09:30.712602Z", + "iopub.status.busy": "2025-11-02T01:09:30.712496Z", + "iopub.status.idle": "2025-11-02T01:09:30.715122Z", + "shell.execute_reply": "2025-11-02T01:09:30.714604Z" + } + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "โœ… CompressionChoice enum defined\n" + ] + } + ], + "source": [ + "from enum import Enum\n", + "from typing import Literal\n", + "\n", + "class CompressionChoice(Enum):\n", + " \"\"\"Available compression strategies.\"\"\"\n", + " NONE = \"none\"\n", + " TRUNCATION = \"truncation\"\n", + " PRIORITY = \"priority\"\n", + " SUMMARIZATION = \"summarization\"\n", + "\n", + "print(\"โœ… CompressionChoice enum defined\")\n" + ] + }, + { + "cell_type": "markdown", + "id": "349a450bedb1648", + "metadata": {}, + "source": [ + "#### Step 2: Create the decision function\n", + "\n", + "This function takes your requirements and recommends the best strategy.\n" + ] + }, + { + "cell_type": "code", + "execution_count": 41, + "id": "4a38016f74c5b2ac", + "metadata": { + "execution": { + "iopub.execute_input": "2025-11-02T01:09:30.716578Z", + "iopub.status.busy": "2025-11-02T01:09:30.716458Z", + "iopub.status.idle": "2025-11-02T01:09:30.720012Z", + "shell.execute_reply": "2025-11-02T01:09:30.719598Z" + } + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "โœ… Decision framework function defined\n" + ] + } + ], + "source": [ + "def choose_compression_strategy(\n", + " conversation_length: int,\n", + " token_count: int,\n", + " quality_requirement: Literal[\"high\", \"medium\", \"low\"],\n", + " latency_requirement: Literal[\"fast\", \"medium\", \"slow_ok\"],\n", + " cost_sensitivity: Literal[\"high\", \"medium\", \"low\"] = \"medium\"\n", + ") -> CompressionChoice:\n", + " \"\"\"\n", + " Decision framework for choosing compression strategy.\n", + "\n", + " Args:\n", + " conversation_length: Number of messages in conversation\n", + " token_count: Total token count\n", + " quality_requirement: How important is quality? (\"high\", \"medium\", \"low\")\n", + " latency_requirement: How fast must it be? (\"fast\", \"medium\", \"slow_ok\")\n", + " cost_sensitivity: How sensitive to costs? (\"high\", \"medium\", \"low\")\n", + "\n", + " Returns:\n", + " CompressionChoice: Recommended strategy\n", + " \"\"\"\n", + " # No compression needed for short conversations\n", + " if token_count < 2000 and conversation_length < 10:\n", + " return CompressionChoice.NONE\n", + "\n", + " # Fast requirement = no LLM calls\n", + " if latency_requirement == \"fast\":\n", + " if quality_requirement == \"high\":\n", + " return CompressionChoice.PRIORITY\n", + " else:\n", + " return CompressionChoice.TRUNCATION\n", + "\n", + " # High cost sensitivity = avoid LLM calls\n", + " if cost_sensitivity == \"high\":\n", + " return CompressionChoice.PRIORITY if quality_requirement != \"low\" else CompressionChoice.TRUNCATION\n", + "\n", + " # High quality + willing to wait = summarization\n", + " if quality_requirement == \"high\" and latency_requirement == \"slow_ok\":\n", + " return CompressionChoice.SUMMARIZATION\n", + "\n", + " # Long conversations benefit from summarization\n", + " if conversation_length > 30 and quality_requirement != \"low\":\n", + " return CompressionChoice.SUMMARIZATION\n", + "\n", + " # Medium quality = priority-based\n", + " if quality_requirement == \"medium\":\n", + " return CompressionChoice.PRIORITY\n", + "\n", + " # Default to truncation for simple cases\n", + " return CompressionChoice.TRUNCATION\n", + "\n", + "print(\"โœ… Decision framework function defined\")\n" + ] + }, + { + "cell_type": "markdown", + "id": "d6334d427d5d684f", + "metadata": {}, + "source": [ + "### Demo 6: Test Decision Framework\n", + "\n", + "Let's test the decision framework with various scenarios.\n", + "\n", + "#### Step 1: Define test scenarios\n", + "\n", + "**What:** Creating 8 realistic scenarios with different requirements (quality, latency, cost).\n", + "\n", + "**Why:** Testing the decision framework across diverse use cases shows how it adapts recommendations based on constraints. Each scenario represents a real production situation.\n" + ] + }, + { + "cell_type": "code", + "execution_count": 42, + "id": "3bd77fd3ecf192aa", + "metadata": { + "execution": { + "iopub.execute_input": "2025-11-02T01:09:30.721472Z", + "iopub.status.busy": "2025-11-02T01:09:30.721383Z", + "iopub.status.idle": "2025-11-02T01:09:30.723534Z", + "shell.execute_reply": "2025-11-02T01:09:30.723157Z" + } + }, + "outputs": [], + "source": [ + "# Define test scenarios\n", + "scenarios = [\n", + " # (length, tokens, quality, latency, cost, description)\n", + " (5, 1000, \"high\", \"fast\", \"medium\", \"Short conversation, high quality needed\"),\n", + " (15, 3000, \"high\", \"slow_ok\", \"low\", \"Medium conversation, quality critical\"),\n", + " (30, 8000, \"medium\", \"medium\", \"medium\", \"Long conversation, balanced needs\"),\n", + " (50, 15000, \"high\", \"slow_ok\", \"medium\", \"Very long, quality important\"),\n", + " (100, 30000, \"low\", \"fast\", \"high\", \"Extremely long, cost-sensitive\"),\n", + " (20, 5000, \"medium\", \"fast\", \"high\", \"Medium length, fast and cheap\"),\n", + " (40, 12000, \"high\", \"medium\", \"low\", \"Long conversation, quality focus\"),\n", + " (8, 1500, \"low\", \"fast\", \"high\", \"Short, simple case\"),\n", + "]\n" + ] + }, + { + "cell_type": "markdown", + "id": "c5e764e64120fc9", + "metadata": {}, + "source": [ + "#### Step 2: Run the decision framework on each scenario\n", + "\n", + "**What:** Running the `choose_compression_strategy()` function on all 8 scenarios.\n", + "\n", + "**Why:** Demonstrates how the framework makes intelligent trade-offs - prioritizing quality when cost allows, choosing speed when latency matters, and balancing constraints when requirements conflict.\n" + ] + }, + { + "cell_type": "code", + "execution_count": 43, + "id": "1d6df99d81af4f56", + "metadata": { + "execution": { + "iopub.execute_input": "2025-11-02T01:09:30.724703Z", + "iopub.status.busy": "2025-11-02T01:09:30.724630Z", + "iopub.status.idle": "2025-11-02T01:09:30.727115Z", + "shell.execute_reply": "2025-11-02T01:09:30.726683Z" + } + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Decision Framework Test Results:\n", + "========================================================================================================================\n", + "Scenario Length Tokens Quality Latency Cost Strategy\n", + "------------------------------------------------------------------------------------------------------------------------\n", + "Short conversation, high quality needed 5 1,000 high fast medium none\n", + "Medium conversation, quality critical 15 3,000 high slow_ok low summarization\n", + "Long conversation, balanced needs 30 8,000 medium medium medium priority\n", + "Very long, quality important 50 15,000 high slow_ok medium summarization\n", + "Extremely long, cost-sensitive 100 30,000 low fast high truncation\n", + "Medium length, fast and cheap 20 5,000 medium fast high truncation\n", + "Long conversation, quality focus 40 12,000 high medium low summarization\n", + "Short, simple case 8 1,500 low fast high none\n" + ] + } + ], + "source": [ + "print(\"Decision Framework Test Results:\")\n", + "print(\"=\" * 120)\n", + "print(f\"{'Scenario':<45} {'Length':<8} {'Tokens':<10} {'Quality':<10} {'Latency':<10} {'Cost':<8} {'Strategy'}\")\n", + "print(\"-\" * 120)\n", + "\n", + "for length, tokens, quality, latency, cost, description in scenarios:\n", + " strategy = choose_compression_strategy(length, tokens, quality, latency, cost)\n", + " print(f\"{description:<45} {length:<8} {tokens:<10,} {quality:<10} {latency:<10} {cost:<8} {strategy.value}\")\n" + ] + }, + { + "cell_type": "markdown", + "id": "8e02d6d98eb9063d", + "metadata": {}, + "source": [ + "#### Key Insights from the Decision Framework\n", + "\n", + "**Pattern 1: Quality drives strategy choice**\n", + "- High quality + willing to wait โ†’ Summarization\n", + "- Medium quality โ†’ Priority-based\n", + "- Low quality โ†’ Truncation\n", + "\n", + "**Pattern 2: Latency constraints matter**\n", + "- Fast requirement โ†’ Avoid summarization (no LLM calls)\n", + "- Slow OK โ†’ Summarization is an option\n", + "\n", + "**Pattern 3: Cost sensitivity affects decisions**\n", + "- High cost sensitivity โ†’ Avoid summarization\n", + "- Low cost sensitivity โ†’ Summarization is preferred for quality\n", + "\n", + "**Pattern 4: Conversation length influences choice**\n", + "- Short (<10 messages) โ†’ Often no compression needed\n", + "- Long (>30 messages) โ†’ Summarization recommended for quality\n", + "\n", + "**Practical Recommendation:**\n", + "- Start with priority-based for most production use cases\n", + "- Use summarization for high-value, long conversations\n", + "- Use truncation for real-time, cost-sensitive scenarios\n" + ] + }, + { + "cell_type": "markdown", + "id": "9893572f70d4176e", + "metadata": {}, + "source": [ + "---\n", + "\n", + "## ๐Ÿญ Part 6: Production Recommendations\n", + "\n", + "Based on all the research and techniques we've covered, here are production-ready recommendations.\n" + ] + }, + { + "cell_type": "markdown", + "id": "c8e7e0bcdc28deb7", + "metadata": {}, + "source": [ + "### Recommendation 1: For Most Applications (Balanced)\n", + "\n", + "**Strategy:** Agent Memory Server with automatic summarization\n", + "\n", + "**Configuration:**\n", + "- `message_threshold`: 20 messages\n", + "- `token_threshold`: 4000 tokens\n", + "- `keep_recent`: 4 messages\n", + "- `strategy`: \"recent_plus_summary\"\n", + "\n", + "**Why:** Automatic, transparent, production-ready. Implements research-backed strategies (Liu et al., Wang et al., Packer et al.) with minimal code.\n", + "\n", + "**Best for:** General-purpose chatbots, customer support, educational assistants\n" + ] + }, + { + "cell_type": "markdown", + "id": "7344c560b4d42889", + "metadata": {}, + "source": [ + "### Recommendation 2: For High-Volume, Cost-Sensitive (Efficient)\n", + "\n", + "**Strategy:** Priority-based compression\n", + "\n", + "**Configuration:**\n", + "- `max_tokens`: 2000\n", + "- Custom importance scoring\n", + "- No LLM calls\n", + "\n", + "**Why:** Fast, cheap, no external dependencies. Preserves important messages without LLM costs.\n", + "\n", + "**Best for:** High-traffic applications, real-time systems, cost-sensitive deployments\n" + ] + }, + { + "cell_type": "markdown", + "id": "5489db7cfc60769a", + "metadata": {}, + "source": [ + "### Recommendation 3: For Critical Conversations (Quality)\n", + "\n", + "**Strategy:** Manual summarization with review\n", + "\n", + "**Configuration:**\n", + "- `token_threshold`: 5000\n", + "- Human review of summaries\n", + "- Store full conversation separately\n", + "\n", + "**Why:** Maximum quality, human oversight. Critical for high-stakes conversations.\n", + "\n", + "**Best for:** Medical consultations, legal advice, financial planning, therapy\n" + ] + }, + { + "cell_type": "markdown", + "id": "81d3e70ff326b867", + "metadata": {}, + "source": [ + "### Recommendation 4: For Real-Time Chat (Speed)\n", + "\n", + "**Strategy:** Truncation with sliding window\n", + "\n", + "**Configuration:**\n", + "- `keep_recent`: 10 messages\n", + "- No summarization\n", + "- Fast response required\n", + "\n", + "**Why:** Minimal latency, simple implementation. Prioritizes speed over context preservation.\n", + "\n", + "**Best for:** Live chat, gaming, real-time collaboration tools\n" + ] + }, + { + "cell_type": "markdown", + "id": "2516c43cb73d0441", + "metadata": {}, + "source": [ + "### General Guidelines\n", + "\n", + "**Getting Started:**\n", + "1. Start with Agent Memory Server automatic summarization\n", + "2. Monitor token usage and costs in production\n", + "3. Adjust thresholds based on your use case\n", + "\n", + "**Advanced Optimization:**\n", + "4. Consider hybrid approaches (truncation + summarization)\n", + "5. Always preserve critical information in long-term memory\n", + "6. Use the decision framework to adapt to different conversation types\n", + "\n", + "**Monitoring:**\n", + "7. Track compression ratios and token savings\n", + "8. Monitor user satisfaction and conversation quality\n", + "9. A/B test different strategies for your use case\n" + ] + }, + { + "cell_type": "markdown", + "id": "aa20b8bb77b5767c", + "metadata": {}, + "source": [ + "---\n", + "\n", + "## ๐Ÿ’ช Practice Exercises\n", + "\n", + "Now it's your turn! Complete these exercises to reinforce your learning.\n" + ] + }, + { + "cell_type": "markdown", + "id": "ed098207acb2ac62", + "metadata": {}, + "source": [ + "### Exercise 1: Implement Adaptive Compression Strategy\n", + "\n", + "Create a strategy that automatically chooses between truncation and sliding window based on message token variance:\n", + "\n", + "```python\n", + "class AdaptiveStrategy(CompressionStrategy):\n", + " \"\"\"\n", + " Automatically choose between truncation and sliding window.\n", + "\n", + " Logic:\n", + " - If messages have similar token counts โ†’ use sliding window (predictable)\n", + " - If messages have varying token counts โ†’ use truncation (token-aware)\n", + " \"\"\"\n", + "\n", + " def __init__(self, window_size: int = 10):\n", + " self.window_size = window_size\n", + " self.truncation = TruncationStrategy()\n", + " self.sliding_window = SlidingWindowStrategy(window_size)\n", + "\n", + " def compress(\n", + " self,\n", + " messages: List[ConversationMessage],\n", + " max_tokens: int\n", + " ) -> List[ConversationMessage]:\n", + " \"\"\"\n", + " Choose strategy based on token variance.\n", + "\n", + " Steps:\n", + " 1. Calculate token count variance across messages\n", + " 2. If variance is low (similar sizes) โ†’ use sliding window\n", + " 3. If variance is high (varying sizes) โ†’ use truncation\n", + " \"\"\"\n", + " # Your implementation here\n", + " pass\n", + "\n", + "# Test your implementation\n", + "adaptive = AdaptiveStrategy(window_size=6)\n", + "result = adaptive.compress(sample_conversation, max_tokens=800)\n", + "print(f\"Adaptive strategy result: {len(result)} messages\")\n", + "```\n", + "\n", + "**Hint:** Calculate variance using `statistics.variance([msg.token_count for msg in messages])`. Use a threshold (e.g., 100) to decide.\n" + ] + }, + { + "cell_type": "markdown", + "id": "84a03030232b3364", + "metadata": {}, + "source": [ + "### Exercise 2: Implement Hybrid Compression\n", + "\n", + "Combine summarization + truncation for optimal results:\n", + "\n", + "```python\n", + "async def compress_hybrid(\n", + " messages: List[ConversationMessage],\n", + " summarizer: ConversationSummarizer,\n", + " max_tokens: int = 2000\n", + ") -> List[ConversationMessage]:\n", + " \"\"\"\n", + " Hybrid compression: Summarize old messages, truncate if still too large.\n", + "\n", + " Steps:\n", + " 1. First, try summarization\n", + " 2. If still over budget, apply truncation to summary + recent messages\n", + " 3. Ensure we stay within max_tokens\n", + "\n", + " Args:\n", + " messages: List of conversation messages\n", + " summarizer: ConversationSummarizer instance\n", + " max_tokens: Maximum token budget\n", + "\n", + " Returns:\n", + " Compressed messages within token budget\n", + " \"\"\"\n", + " # Your implementation here\n", + " pass\n", + "\n", + "# Test your implementation\n", + "hybrid_result = await compress_hybrid(sample_conversation, summarizer, max_tokens=1000)\n", + "print(f\"Hybrid compression: {len(hybrid_result)} messages, {sum(m.token_count for m in hybrid_result)} tokens\")\n", + "```\n", + "\n", + "**Hint:** Use `summarizer.compress_conversation()` first, then apply truncation if needed.\n" + ] + }, + { + "cell_type": "markdown", + "id": "6ac899a501122c38", + "metadata": {}, + "source": [ + "### Exercise 3: Quality Comparison\n", + "\n", + "Test all compression strategies and compare quality:\n", + "\n", + "```python\n", + "async def compare_compression_quality(\n", + " messages: List[ConversationMessage],\n", + " test_query: str = \"What courses did we discuss?\"\n", + ") -> Dict[str, Any]:\n", + " \"\"\"\n", + " Compare compression strategies by testing reference resolution.\n", + "\n", + " Steps:\n", + " 1. Compress using each strategy\n", + " 2. Try to answer test_query using compressed context\n", + " 3. Compare quality of responses\n", + " 4. Measure token savings\n", + "\n", + " Args:\n", + " messages: Original conversation\n", + " test_query: Question to test reference resolution\n", + "\n", + " Returns:\n", + " Dictionary with comparison results\n", + " \"\"\"\n", + " # Your implementation here\n", + " # Test if the agent can still answer questions after compression\n", + " pass\n", + "\n", + "# Test your implementation\n", + "quality_results = await compare_compression_quality(sample_conversation)\n", + "print(\"Quality Comparison Results:\")\n", + "for strategy, results in quality_results.items():\n", + " print(f\"{strategy}: {results}\")\n", + "```\n", + "\n", + "**Hint:** Use the LLM to answer the test query with each compressed context and compare responses.\n" + ] + }, + { + "cell_type": "markdown", + "id": "b134bf5336e3ae36", + "metadata": {}, + "source": [ + "### Exercise 4: Custom Importance Scoring\n", + "\n", + "Improve the `calculate_importance()` function with domain-specific logic:\n", + "\n", + "```python\n", + "def calculate_importance_enhanced(msg: ConversationMessage) -> float:\n", + " \"\"\"\n", + " Enhanced importance scoring for course advisor conversations.\n", + "\n", + " Add scoring for:\n", + " - Specific course codes (CS401, MATH301, etc.) - HIGH\n", + " - Prerequisites and requirements - HIGH\n", + " - Student preferences and goals - HIGH\n", + " - Questions - MEDIUM\n", + " - Confirmations and acknowledgments - LOW\n", + " - Greetings and small talk - VERY LOW\n", + "\n", + " Returns:\n", + " Importance score (0.0 to 5.0)\n", + " \"\"\"\n", + " # Your implementation here\n", + " pass\n", + "\n", + "# Test your implementation\n", + "for msg in sample_conversation[:5]:\n", + " score = calculate_importance_enhanced(msg)\n", + " print(f\"Score: {score:.1f} - {msg.content[:60]}...\")\n", + "```\n", + "\n", + "**Hint:** Use regex to detect course codes, check for question marks, look for keywords.\n" + ] + }, + { + "cell_type": "markdown", + "id": "960cb21dcfe638cf", + "metadata": {}, + "source": [ + "### Exercise 5: Production Configuration\n", + "\n", + "Configure Agent Memory Server for your specific use case:\n", + "\n", + "```python\n", + "# Scenario: High-volume customer support chatbot\n", + "# Requirements:\n", + "# - Handle 1000+ conversations per day\n", + "# - Average conversation: 15-20 turns\n", + "# - Cost-sensitive but quality important\n", + "# - Response time: <2 seconds\n", + "\n", + "# Your task: Choose appropriate configuration\n", + "production_config = {\n", + " \"message_threshold\": ???, # When to trigger summarization\n", + " \"token_threshold\": ???, # Token limit before summarization\n", + " \"keep_recent\": ???, # How many recent messages to keep\n", + " \"strategy\": ???, # Which strategy to use\n", + "}\n", + "\n", + "# Justify your choices:\n", + "print(\"Configuration Justification:\")\n", + "print(f\"message_threshold: {production_config['message_threshold']} because...\")\n", + "print(f\"token_threshold: {production_config['token_threshold']} because...\")\n", + "print(f\"keep_recent: {production_config['keep_recent']} because...\")\n", + "print(f\"strategy: {production_config['strategy']} because...\")\n", + "```\n", + "\n", + "**Hint:** Consider the trade-offs between cost, quality, and latency for this specific scenario.\n" + ] + }, + { + "cell_type": "markdown", + "id": "9184f7251934a320", + "metadata": {}, + "source": [ + "---\n", + "\n", + "## ๐Ÿ“ Summary\n", + "\n", + "### **What You Learned:**\n", + "\n", + "1. โœ… **Research Foundations**\n", + " - \"Lost in the Middle\" (Liu et al., 2023): U-shaped performance, non-uniform degradation\n", + " - \"Recursive Summarization\" (Wang et al., 2023): Long-term dialogue memory\n", + " - \"MemGPT\" (Packer et al., 2023): Hierarchical memory management\n", + " - Production best practices from Anthropic and Vellum AI\n", + "\n", + "2. โœ… **The Long Conversation Problem**\n", + " - Token limits, cost implications, performance degradation\n", + " - Why unbounded growth is unsustainable\n", + " - Quadratic cost growth without management\n", + " - Why larger context windows don't solve the problem\n", + "\n", + "3. โœ… **Conversation Summarization**\n", + " - What to preserve vs. compress\n", + " - When to trigger summarization (token/message thresholds)\n", + " - Building summarization step-by-step (functions โ†’ class)\n", + " - LLM-based intelligent summarization\n", + "\n", + "4. โœ… **Three Compression Strategies**\n", + " - **Truncation:** Fast, simple, loses context\n", + " - **Priority-based:** Balanced, intelligent, no LLM calls\n", + " - **Summarization:** High quality, preserves meaning, requires LLM\n", + " - Trade-offs between speed, quality, and cost\n", + "\n", + "5. โœ… **Agent Memory Server Integration**\n", + " - Automatic summarization configuration\n", + " - Transparent memory management\n", + " - Production-ready solution implementing research findings\n", + " - Configurable thresholds and strategies\n", + "\n", + "6. โœ… **Decision Framework**\n", + " - How to choose the right strategy\n", + " - Factors: quality, latency, cost, conversation length\n", + " - Production recommendations for different scenarios\n", + " - Hybrid approaches for optimal results\n", + "\n", + "### **What You Built:**\n", + "\n", + "- โœ… `ConversationSummarizer` class for intelligent summarization\n", + "- โœ… Three compression strategy implementations (Truncation, Priority, Summarization)\n", + "- โœ… Decision framework for strategy selection\n", + "- โœ… Production configuration examples\n", + "- โœ… Comparison tools for evaluating strategies\n", + "- โœ… Token counting and cost analysis tools\n", + "\n", + "### **Key Takeaways:**\n", + "\n", + "๐Ÿ’ก **\"Conversations grow unbounded without management\"**\n", + "- Every turn adds tokens and cost\n", + "- Eventually you'll hit limits\n", + "- Costs grow quadratically (each turn includes all previous messages)\n", + "\n", + "๐Ÿ’ก **\"Summarization preserves meaning while reducing tokens\"**\n", + "- Use LLM to create intelligent summaries\n", + "- Keep recent messages for immediate context\n", + "- Store important facts in long-term memory\n", + "\n", + "๐Ÿ’ก **\"Choose strategy based on requirements\"**\n", + "- Quality-critical โ†’ Summarization\n", + "- Speed-critical โ†’ Truncation or Priority-based\n", + "- Balanced โ†’ Agent Memory Server automatic\n", + "- Cost-sensitive โ†’ Priority-based\n", + "\n", + "๐Ÿ’ก **\"Agent Memory Server handles this automatically\"**\n", + "- Production-ready solution\n", + "- Transparent to your application\n", + "- Configurable for your needs\n", + "- No manual intervention required\n", + "\n", + "### **Connection to Context Engineering:**\n", + "\n", + "This notebook completes the **Conversation Context** story from Section 1:\n", + "\n", + "1. **Section 1:** Introduced the 4 context types, including Conversation Context\n", + "2. **Section 3, NB1:** Implemented working memory for conversation continuity\n", + "3. **Section 3, NB2:** Integrated memory with RAG for stateful conversations\n", + "4. **Section 3, NB3:** Managed long conversations with summarization and compression โ† You are here\n", + "\n", + "**Next:** Section 4 will show how agents can actively manage their own memory using tools!\n", + "\n", + "### **Next Steps:**\n", + "\n", + "**Section 4: Tools and Agents**\n", + "- Build agents that actively manage their own memory\n", + "- Implement memory tools (store, search, retrieve)\n", + "- Use LangGraph for agent workflows\n", + "- Let the LLM decide when to summarize\n", + "\n", + "**Section 5: Production Optimization**\n", + "- Performance measurement and monitoring\n", + "- Hybrid retrieval strategies\n", + "- Semantic tool selection\n", + "- Quality assurance and validation\n", + "\n", + "---\n", + "\n", + "## ๐Ÿ”— Resources\n", + "\n", + "### **Documentation:**\n", + "- [Agent Memory Server](https://github.com/redis/agent-memory-server) - Production memory management\n", + "- [Agent Memory Client](https://pypi.org/project/agent-memory-client/) - Python client library\n", + "- [LangChain Memory](https://python.langchain.com/docs/modules/memory/) - Memory patterns\n", + "- [OpenAI Tokenizer](https://platform.openai.com/tokenizer) - Token counting tool\n", + "- [tiktoken](https://github.com/openai/tiktoken) - Fast token counting library\n", + "\n", + "### **Research Papers:**\n", + "- **[Lost in the Middle: How Language Models Use Long Contexts](https://arxiv.org/abs/2307.03172)** - Liu et al. (2023). Shows U-shaped performance curve and non-uniform degradation in long contexts.\n", + "- **[Recursively Summarizing Enables Long-Term Dialogue Memory in Large Language Models](https://arxiv.org/abs/2308.15022)** - Wang et al. (2023). Demonstrates recursive summarization for long conversations.\n", + "- **[MemGPT: Towards LLMs as Operating Systems](https://arxiv.org/abs/2310.08560)** - Packer et al. (2023). Introduces hierarchical memory management and virtual context.\n", + "- [Retrieval-Augmented Generation](https://arxiv.org/abs/2005.11401) - RAG fundamentals\n", + "- [Attention Is All You Need](https://arxiv.org/abs/1706.03762) - Transformer architecture and context windows\n", + "\n", + "### **Industry Resources:**\n", + "- **[How Should I Manage Memory for my LLM Chatbot?](https://www.vellum.ai/blog/how-should-i-manage-memory-for-my-llm-chatbot)** - Vellum AI. Practical insights on memory management trade-offs.\n", + "- **[Lost in the Middle Paper Reading](https://arize.com/blog/lost-in-the-middle-how-language-models-use-long-contexts-paper-reading/)** - Arize AI. Detailed analysis and practical implications.\n", + "- **[Effective Context Engineering for AI Agents](https://www.anthropic.com/engineering/effective-context-engineering-for-ai-agents)** - Anthropic. Production best practices.\n", + "\n", + "\n", + "### **Tools and Libraries:**\n", + "- **Redis:** Vector storage and memory backend\n", + "- **Agent Memory Server:** Dual-memory architecture with automatic summarization\n", + "- **LangChain:** LLM interaction framework\n", + "- **LangGraph:** State management and agent workflows\n", + "- **OpenAI:** GPT-4o for generation and summarization\n", + "- **tiktoken:** Token counting for cost estimation\n", + "\n", + "---\n", + "\n", + "![Redis](https://redis.io/wp-content/uploads/2024/04/Logotype.svg?auto=webp&quality=85,75&width=120)\n", + "\n", + "**Redis University - Context Engineering Course**\n", + "\n", + "**๐ŸŽ‰ Congratulations!** You've completed Section 3: Memory Architecture!\n", + "\n", + "You now understand how to:\n", + "- Build memory systems for AI agents\n", + "- Integrate working and long-term memory\n", + "- Manage long conversations with summarization\n", + "- Choose the right compression strategy\n", + "- Configure production-ready memory management\n", + "\n", + "**Ready for Section 4?** Let's build agents that actively manage their own memory using tools!\n", + "\n", + "---\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "37206838f616911a", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "a99a1b7fa18aae7d", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.12.6" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/python-recipes/context-engineering/notebooks/section-3-memory-systems-for-context-engineering/README.md b/python-recipes/context-engineering/notebooks/section-3-memory-systems-for-context-engineering/README.md new file mode 100644 index 00000000..f17f0fb8 --- /dev/null +++ b/python-recipes/context-engineering/notebooks/section-3-memory-systems-for-context-engineering/README.md @@ -0,0 +1,185 @@ +# ๐Ÿง  Section 3: Memory Systems for Context Engineering + +## Overview + +This section teaches **memory-enhanced context engineering** by building on Section 2's retrieved context system. You'll learn how to add **working memory** (conversation history) and **long-term memory** (persistent knowledge) to create stateful, personalized conversations. + +## Learning Objectives + +By the end of this section, you will: + +1. **Understand** why memory is essential for context engineering (the grounding problem) +2. **Implement** working memory for conversation continuity +3. **Use** long-term memory for persistent user knowledge +4. **Integrate** memory with Section 2's retrieved context system +5. **Build** a complete memory-enhanced course advisor + +## Prerequisites + +- โœ… Completed Section 1 (Context Engineering Foundations) +- โœ… Completed Section 2 (Retrieved Context Engineering) +- โœ… Redis instance running +- โœ… Agent Memory Server running (see reference-agent/README.md) +- โœ… OpenAI API key configured + +## Notebooks + +### 01_working_and_longterm_memory.ipynb + +**โฑ๏ธ Estimated Time:** 45-60 minutes + +**What You'll Learn:** +- The grounding problem (why agents need memory) +- Working memory fundamentals (session-scoped conversation history) +- Long-term memory fundamentals (cross-session persistent knowledge) +- Memory integration with RAG +- Complete memory-enhanced RAG system + +**What You'll Build:** +- Working memory demo (multi-turn conversations) +- Long-term memory demo (persistent knowledge storage and search) +- Complete `memory_enhanced_rag_query()` function +- End-to-end memory-enhanced course advisor + +**Key Concepts:** +- Reference resolution ("it", "that course", "the first one") +- Conversation continuity across turns +- Semantic memory search +- All four context types working together + +## Architecture + +### Memory Types + +**1. Working Memory (Session-Scoped)** +- Stores conversation messages for current session +- Enables reference resolution and conversation continuity +- TTL-based (default: 1 hour) +- Automatically extracts important facts to long-term storage + +**2. Long-term Memory (Cross-Session)** +- Stores persistent facts, preferences, goals +- Enables personalization across sessions +- Vector-indexed for semantic search +- Three types: semantic (facts), episodic (events), message + +### Integration Pattern + +``` +User Query + โ†“ +1. Load Working Memory (conversation history) +2. Search Long-term Memory (user preferences, facts) +3. RAG Search (relevant courses) +4. Assemble Context (System + User + Conversation + Retrieved) +5. Generate Response +6. Save Working Memory (updated conversation) +``` + +### Four Context Types (Complete!) + +1. **System Context** (Static) - โœ… Section 2 +2. **User Context** (Dynamic, User-Specific) - โœ… Section 2 + Long-term Memory +3. **Conversation Context** (Dynamic, Session-Specific) - โœจ **Working Memory** +4. **Retrieved Context** (Dynamic, Query-Specific) - โœ… Section 2 + +## Technology Stack + +- **Agent Memory Server** - Production-ready dual-memory system +- **Redis** - Backend storage for memory +- **LangChain** - LLM interaction (no LangGraph needed yet) +- **OpenAI** - GPT-4o for generation, text-embedding-3-small for vectors +- **RedisVL** - Vector search (via reference-agent utilities) + +## Key Differences from Section 2 + +| Feature | Section 2 (Retrieved Context) | Section 3 (Memory-Enhanced) | +|---------|---------------------------|----------------------------------| +| Conversation History | โŒ None | โœ… Working Memory | +| Multi-turn Conversations | โŒ Each query independent | โœ… Context carries forward | +| Reference Resolution | โŒ Can't resolve "it", "that" | โœ… Resolves from history | +| Personalization | โš ๏ธ Profile only | โœ… Profile + Long-term Memory | +| Cross-Session Knowledge | โŒ None | โœ… Persistent memories | + +## Practice Exercises + +1. **Cross-Session Personalization** - Store and use preferences across sessions +2. **Memory-Aware Filtering** - Use long-term memories to filter RAG results +3. **Conversation Summarization** - Summarize long conversations to manage context +4. **Multi-User Memory Management** - Handle multiple students with separate memories +5. **Memory Search Quality** - Experiment with semantic search for memories + +## What's Next? + +**Section 4: Tool Selection & Agentic Workflows** + +You'll add **tools** and **LangGraph** to create a complete agent that: +- Decides which tools to use +- Takes actions (enroll courses, check prerequisites) +- Manages complex multi-step workflows +- Handles errors and retries + +## Resources + +- **Reference Agent** - `python-recipes/context-engineering/reference-agent/` +- **Agent Memory Server** - https://github.com/redis/agent-memory-server +- **LangChain Memory** - https://python.langchain.com/docs/modules/memory/ +- **Redis Agent Memory** - https://redis.io/docs/latest/develop/clients/agent-memory/ + +## Troubleshooting + +### Agent Memory Server Not Available + +If you see "โš ๏ธ Agent Memory Server not available": + +1. Check if the server is running: + ```bash + curl http://localhost:8088/health + ``` + +2. Start the server (see reference-agent/README.md): + ```bash + cd reference-agent + docker-compose up -d + ``` + +3. Verify environment variable: + ```bash + echo $AGENT_MEMORY_URL + # Should be: http://localhost:8088 + ``` + +### Memory Not Persisting + +If memories aren't persisting across sessions: + +1. Check Redis connection: + ```python + from redis_context_course.redis_config import redis_config + print(redis_config.health_check()) # Should be True + ``` + +2. Verify user_id and session_id are consistent: + ```python + # Same user_id for same student across sessions + # Different session_id for different conversations + ``` + +3. Check memory client configuration: + ```python + print(memory_client.config.base_url) + print(memory_client.config.default_namespace) + ``` + +## Notes + +- **LangChain is sufficient** for this section (no LangGraph needed) +- **LangGraph becomes necessary in Section 4** for tool calling and complex workflows +- **Agent Memory Server** is production-ready (Redis-backed, scalable) +- **Working memory** automatically extracts important facts to long-term storage +- **Semantic search** enables natural language queries for memories + +--- + +**Ready to add memory to your RAG system? Start with `01_working_and_longterm_memory.ipynb`!** ๐Ÿš€ + diff --git a/python-recipes/context-engineering/notebooks/section-4-tool-selection/01_tools_and_langgraph_fundamentals.ipynb b/python-recipes/context-engineering/notebooks/section-4-tool-selection/01_tools_and_langgraph_fundamentals.ipynb new file mode 100644 index 00000000..30cf94d8 --- /dev/null +++ b/python-recipes/context-engineering/notebooks/section-4-tool-selection/01_tools_and_langgraph_fundamentals.ipynb @@ -0,0 +1,1447 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "c20a2adc4d119d62", + "metadata": {}, + "source": [ + "![Redis](https://redis.io/wp-content/uploads/2024/04/Logotype.svg?auto=webp&quality=85,75&width=120)\n", + "\n", + "# ๐Ÿง  Section 4: Memory Tools and LangGraph Fundamentals\n", + "\n", + "**โฑ๏ธ Estimated Time:** 45-60 minutes\n", + "\n", + "## ๐ŸŽฏ Learning Objectives\n", + "\n", + "By the end of this notebook, you will:\n", + "\n", + "1. **Understand** how memory tools enable active context engineering\n", + "2. **Build** the three essential memory tools: store, search, and retrieve\n", + "3. **Learn** LangGraph fundamentals (nodes, edges, state)\n", + "4. **Compare** passive vs active memory management\n", + "5. **Prepare** for building a full course advisor agent\n", + "\n", + "---\n", + "\n", + "## ๐Ÿ”— Bridge from Previous Sections\n", + "\n", + "### **What You've Learned:**\n", + "\n", + "**Section 1:** Context Types\n", + "- System, User, Conversation, Retrieved context\n", + "- How context shapes LLM responses\n", + "\n", + "**Section 2:** RAG Foundations\n", + "- Semantic search with vector embeddings\n", + "- Retrieving relevant information\n", + "- Context assembly and generation\n", + "\n", + "**Section 3:** Memory Architecture\n", + "- Working memory for conversation continuity\n", + "- Long-term memory for persistent knowledge\n", + "- Memory-enhanced RAG systems\n", + "\n", + "### **What's Next: Memory Tools for Context Engineering**\n", + "\n", + "**Section 3 Approach:**\n", + "- Memory operations hardcoded in your application flow\n", + "- You explicitly call `get_working_memory()`, `search_long_term_memory()`, etc.\n", + "- Fixed sequence: load โ†’ search โ†’ generate โ†’ save\n", + "\n", + "**Section 4 Approach (This Section):**\n", + "- LLM decides when to use memory tools\n", + "- LLM chooses what information to store and retrieve\n", + "- Dynamic decision-making based on conversation context\n", + "\n", + "**๐Ÿ’ก Key Insight:** Memory tools let the LLM actively decide when to use memory, rather than having it hardcoded\n", + "\n", + "---\n", + "\n", + "## ๐Ÿง  Memory Tools: The Context Engineering Connection\n", + "\n", + "**Why memory tools matter for context engineering:**\n", + "\n", + "Recall the **four context types** from Section 1:\n", + "1. **System Context** (static instructions)\n", + "2. **User Context** (profile, preferences) โ† **Memory tools help build this**\n", + "3. **Conversation Context** (session history) โ† **Memory tools help manage this**\n", + "4. **Retrieved Context** (RAG results)\n", + "\n", + "**Memory tools enable dynamic context construction:**\n", + "\n", + "### **Section 3 Approach:**\n", + "```python\n", + "# Hardcoded in application flow\n", + "async def memory_enhanced_rag_query(user_query, session_id, student_id):\n", + " working_memory = await memory_client.get_working_memory(...)\n", + " long_term_facts = await memory_client.search_long_term_memory(...)\n", + " # ... fixed sequence of operations\n", + "```\n", + "\n", + "### **Section 4 Approach (This Section):**\n", + "```python\n", + "# LLM decides when to use tools\n", + "@tool\n", + "def store_memory(text: str):\n", + " \"\"\"Store important information in long-term memory.\"\"\"\n", + "\n", + "@tool\n", + "def search_memories(query: str):\n", + " \"\"\"Search long-term memory for relevant facts.\"\"\"\n", + "\n", + "# LLM calls these tools when it determines they're needed\n", + "```\n", + "\n", + "---\n", + "\n", + "## ๐Ÿ”ง The Three Essential Memory Tools\n", + "\n", + "### **1. `store_memory` - Save Important Information**\n", + "\n", + "**When to use:**\n", + "- User shares preferences, goals, constraints\n", + "- Important facts emerge during conversation\n", + "- Context that should persist across sessions\n", + "\n", + "**Example:**\n", + "```\n", + "User: \"I prefer online courses because I work full-time\"\n", + "Agent: [Thinks: \"This is important context I should remember\"]\n", + "Agent: [Calls: store_memory(\"User prefers online courses due to full-time work\")]\n", + "Agent: \"I'll remember your preference for online courses...\"\n", + "```\n", + "\n", + "### **2. `search_memories` - Find Relevant Past Information**\n", + "\n", + "**When to use:**\n", + "- Need context about user's history or preferences\n", + "- User asks about past conversations\n", + "- Building personalized responses\n", + "\n", + "**Example:**\n", + "```\n", + "User: \"What courses should I take next semester?\"\n", + "Agent: [Thinks: \"I need to know their preferences and past courses\"]\n", + "Agent: [Calls: search_memories(\"course preferences major interests completed\")]\n", + "Memory: \"User is CS major, interested in AI, prefers online, completed CS101\"\n", + "Agent: \"Based on your CS major and AI interest...\"\n", + "```\n", + "\n", + "### **3. `retrieve_memories` - Get Specific Stored Facts**\n", + "\n", + "**When to use:**\n", + "- Need to recall exact details from past conversations\n", + "- User references something specific they mentioned before\n", + "- Verifying stored information\n", + "\n", + "**Example:**\n", + "```\n", + "User: \"What was that GPA requirement we discussed?\"\n", + "Agent: [Calls: retrieve_memories(\"GPA requirement graduation\")]\n", + "Memory: \"User needs 3.5 GPA for honors program admission\"\n", + "Agent: \"You mentioned needing a 3.5 GPA for the honors program\"\n", + "```\n", + "\n", + "---\n", + "\n", + "## ๐Ÿ“ฆ Setup and Environment\n", + "\n", + "### โš ๏ธ **IMPORTANT: Prerequisites Required**\n", + "\n", + "**Before running this notebook, you MUST have:**\n", + "\n", + "1. **Redis running** on port 6379\n", + "2. **Agent Memory Server running** on port 8088 \n", + "3. **OpenAI API key** configured\n", + "\n", + "**๐Ÿš€ Quick Setup:**\n", + "```bash\n", + "# Navigate to notebooks_v2 directory\n", + "cd ../../\n", + "\n", + "# Check if services are running\n", + "./check_setup.sh\n", + "\n", + "# If services are down, run setup\n", + "./setup_memory_server.sh\n", + "```\n", + "\n", + "**๐Ÿ“– Detailed Setup:** See `../SETUP_GUIDE.md` for complete instructions.\n", + "\n", + "---\n" + ] + }, + { + "cell_type": "markdown", + "id": "setup_packages", + "metadata": {}, + "source": [ + "### Automated Setup Check\n", + "\n", + "Let's run the setup script to ensure all services are running properly.\n" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "id": "env_setup", + "metadata": { + "execution": { + "iopub.execute_input": "2025-11-01T00:27:43.326206Z", + "iopub.status.busy": "2025-11-01T00:27:43.326021Z", + "iopub.status.idle": "2025-11-01T00:27:43.597828Z", + "shell.execute_reply": "2025-11-01T00:27:43.597284Z" + } + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Running automated setup check...\n", + "\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "๐Ÿ”ง Agent Memory Server Setup\n", + "===========================\n", + "๐Ÿ“Š Checking Redis...\n", + "โœ… Redis is running\n", + "๐Ÿ“Š Checking Agent Memory Server...\n", + "๐Ÿ” Agent Memory Server container exists. Checking health...\n", + "โœ… Agent Memory Server is running and healthy\n", + "โœ… No Redis connection issues detected\n", + "\n", + "โœ… Setup Complete!\n", + "=================\n", + "๐Ÿ“Š Services Status:\n", + " โ€ข Redis: Running on port 6379\n", + " โ€ข Agent Memory Server: Running on port 8088\n", + "\n", + "๐ŸŽฏ You can now run the notebooks!\n", + "\n", + "\n", + "โœ… All services are ready!\n" + ] + } + ], + "source": [ + "# Run the setup script to ensure Redis and Agent Memory Server are running\n", + "import subprocess\n", + "import sys\n", + "from pathlib import Path\n", + "\n", + "# Path to setup script\n", + "setup_script = Path(\"../../reference-agent/setup_agent_memory_server.py\")\n", + "\n", + "if setup_script.exists():\n", + " print(\"Running automated setup check...\\n\")\n", + " result = subprocess.run(\n", + " [sys.executable, str(setup_script)],\n", + " capture_output=True,\n", + " text=True\n", + " )\n", + " print(result.stdout)\n", + " if result.returncode != 0:\n", + " print(\"โš ๏ธ Setup check failed. Please review the output above.\")\n", + " print(result.stderr)\n", + " else:\n", + " print(\"\\nโœ… All services are ready!\")\n", + "else:\n", + " print(\"โš ๏ธ Setup script not found. Please ensure services are running manually.\")\n" + ] + }, + { + "cell_type": "markdown", + "id": "env_config", + "metadata": {}, + "source": [ + "---\n" + ] + }, + { + "cell_type": "markdown", + "id": "services_check", + "metadata": {}, + "source": [ + "### Install Dependencies\n", + "\n", + "If you haven't already installed the reference-agent package, uncomment and run the following:\n" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "health_check", + "metadata": { + "execution": { + "iopub.execute_input": "2025-11-01T00:27:43.599247Z", + "iopub.status.busy": "2025-11-01T00:27:43.599160Z", + "iopub.status.idle": "2025-11-01T00:27:43.600994Z", + "shell.execute_reply": "2025-11-01T00:27:43.600510Z" + } + }, + "outputs": [], + "source": [ + "# Uncomment to install reference-agent package\n", + "# %pip install -q -e ../../reference-agent\n", + "\n", + "# Uncomment to install agent-memory-client\n", + "# %pip install -q agent-memory-client\n" + ] + }, + { + "cell_type": "markdown", + "id": "memory_tools_intro", + "metadata": {}, + "source": [ + "### Environment Configuration\n" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "memory_client_init", + "metadata": { + "execution": { + "iopub.execute_input": "2025-11-01T00:27:43.602048Z", + "iopub.status.busy": "2025-11-01T00:27:43.601982Z", + "iopub.status.idle": "2025-11-01T00:27:43.607235Z", + "shell.execute_reply": "2025-11-01T00:27:43.606871Z" + } + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "โœ… Environment configured successfully!\n", + " OpenAI Model: gpt-4o\n", + " Redis URL: redis://localhost:6379\n", + " Memory Server: http://localhost:8088\n" + ] + } + ], + "source": [ + "import os\n", + "from dotenv import load_dotenv\n", + "\n", + "# Load environment variables\n", + "load_dotenv(\"../../reference-agent/.env\")\n", + "\n", + "# Verify required environment variables\n", + "required_vars = {\n", + " \"OPENAI_API_KEY\": \"OpenAI API key for LLM\",\n", + " \"REDIS_URL\": \"Redis connection for vector storage\",\n", + " \"AGENT_MEMORY_URL\": \"Agent Memory Server for memory tools\"\n", + "}\n", + "\n", + "missing_vars = []\n", + "for var, description in required_vars.items():\n", + " if not os.getenv(var):\n", + " missing_vars.append(f\" - {var}: {description}\")\n", + "\n", + "if missing_vars:\n", + " raise ValueError(f\"\"\"\n", + " โš ๏ธ Missing required environment variables:\n", + " \n", + "{''.join(missing_vars)}\n", + " \n", + " Please create a .env file in the reference-agent directory:\n", + " 1. cd ../../reference-agent\n", + " 2. cp .env.example .env\n", + " 3. Edit .env and add your API keys\n", + " \"\"\")\n", + "\n", + "print(\"โœ… Environment configured successfully!\")\n", + "print(f\" OpenAI Model: {os.getenv('OPENAI_MODEL', 'gpt-4o')}\")\n", + "print(f\" Redis URL: {os.getenv('REDIS_URL', 'redis://localhost:6379')}\")\n", + "print(f\" Memory Server: {os.getenv('AGENT_MEMORY_URL', 'http://localhost:8088')}\")" + ] + }, + { + "cell_type": "markdown", + "id": "tool_1_store", + "metadata": {}, + "source": [ + "### Service Health Check\n", + "\n", + "Before building memory tools, let's verify that Redis and the Agent Memory Server are running.\n" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "store_memory_tool", + "metadata": { + "execution": { + "iopub.execute_input": "2025-11-01T00:27:43.608506Z", + "iopub.status.busy": "2025-11-01T00:27:43.608428Z", + "iopub.status.idle": "2025-11-01T00:27:43.659756Z", + "shell.execute_reply": "2025-11-01T00:27:43.659439Z" + } + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "๐Ÿ” Checking required services...\n", + "\n", + "Redis: โœ… Connected successfully\n", + "Agent Memory Server: โœ… Status: 200\n", + "\n", + "โœ… All services are running!\n" + ] + } + ], + "source": [ + "import requests\n", + "import redis\n", + "\n", + "def check_redis():\n", + " \"\"\"Check if Redis is accessible.\"\"\"\n", + " try:\n", + " r = redis.from_url(os.getenv(\"REDIS_URL\", \"redis://localhost:6379\"))\n", + " r.ping()\n", + " return True, \"Connected successfully\"\n", + " except Exception as e:\n", + " return False, str(e)\n", + "\n", + "def check_memory_server():\n", + " \"\"\"Check if Agent Memory Server is accessible.\"\"\"\n", + " try:\n", + " url = os.getenv(\"AGENT_MEMORY_URL\", \"http://localhost:8088\")\n", + " response = requests.get(f\"{url}/v1/health\", timeout=5)\n", + " return response.status_code == 200, f\"Status: {response.status_code}\"\n", + " except Exception as e:\n", + " return False, str(e)\n", + "\n", + "# Check services\n", + "print(\"๐Ÿ” Checking required services...\\n\")\n", + "\n", + "redis_ok, redis_msg = check_redis()\n", + "print(f\"Redis: {'โœ…' if redis_ok else 'โŒ'} {redis_msg}\")\n", + "\n", + "memory_ok, memory_msg = check_memory_server()\n", + "print(f\"Agent Memory Server: {'โœ…' if memory_ok else 'โŒ'} {memory_msg}\")\n", + "\n", + "if not (redis_ok and memory_ok):\n", + " print(\"\\nโš ๏ธ Some services are not running. Please start them:\")\n", + " if not redis_ok:\n", + " print(\" Redis: docker run -d -p 6379:6379 redis/redis-stack:latest\")\n", + " if not memory_ok:\n", + " print(\" Memory Server: cd ../../reference-agent && python setup_agent_memory_server.py\")\n", + "else:\n", + " print(\"\\nโœ… All services are running!\")" + ] + }, + { + "cell_type": "markdown", + "id": "tool_2_search", + "metadata": {}, + "source": [ + "---\n", + "\n", + "## ๐Ÿ› ๏ธ Building Memory Tools\n", + "\n", + "Now let's build the three essential memory tools. We'll start simple and build up complexity.\n", + "\n", + "### **Step 1: Initialize Memory Client**\n" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "search_memories_tool", + "metadata": { + "execution": { + "iopub.execute_input": "2025-11-01T00:27:43.661063Z", + "iopub.status.busy": "2025-11-01T00:27:43.660992Z", + "iopub.status.idle": "2025-11-01T00:27:43.778969Z", + "shell.execute_reply": "2025-11-01T00:27:43.778555Z" + } + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "โœ… Memory client initialized\n", + " Base URL: http://localhost:8088\n", + " Namespace: redis_university\n", + " Test User: student_memory_tools_demo\n" + ] + } + ], + "source": [ + "from agent_memory_client import MemoryAPIClient, MemoryClientConfig\n", + "from agent_memory_client.models import ClientMemoryRecord\n", + "from agent_memory_client.filters import UserId\n", + "\n", + "# Initialize memory client\n", + "config = MemoryClientConfig(\n", + " base_url=os.getenv(\"AGENT_MEMORY_URL\", \"http://localhost:8088\"),\n", + " default_namespace=\"redis_university\"\n", + ")\n", + "memory_client = MemoryAPIClient(config=config)\n", + "\n", + "# Test user for this notebook\n", + "test_user_id = \"student_memory_tools_demo\"\n", + "test_session_id = \"session_memory_tools_demo\"\n", + "\n", + "print(f\"โœ… Memory client initialized\")\n", + "print(f\" Base URL: {config.base_url}\")\n", + "print(f\" Namespace: {config.default_namespace}\")\n", + "print(f\" Test User: {test_user_id}\")" + ] + }, + { + "cell_type": "markdown", + "id": "tool_3_retrieve", + "metadata": {}, + "source": [ + "---\n", + "\n", + "## ๐Ÿ› ๏ธ Understanding Tools in LLM Applications\n", + "\n", + "### **What Are Tools?**\n", + "\n", + "**Tools** are functions that LLMs can call to interact with external systems, retrieve information, or perform actions beyond text generation.\n", + "\n", + "**Think of tools as:**\n", + "- ๐Ÿ”Œ **Extensions** to the LLM's capabilities\n", + "- ๐Ÿค **Interfaces** between the LLM and external systems\n", + "- ๐ŸŽฏ **Actions** the LLM can take to accomplish tasks\n", + "\n", + "### **How Tool Calling Works**\n", + "\n", + "```\n", + "1. User Input โ†’ \"Store my preference for online courses\"\n", + " โ†“\n", + "2. LLM Analysis โ†’ Decides: \"I need to use store_memory tool\"\n", + " โ†“\n", + "3. Tool Call โ†’ Returns structured function call with arguments\n", + " โ†“\n", + "4. Tool Execution โ†’ Your code executes the function\n", + " โ†“\n", + "5. Tool Result โ†’ Returns result to LLM\n", + " โ†“\n", + "6. LLM Response โ†’ Generates final text response using tool result\n", + "```\n", + "\n", + "### **Tool Definition Components**\n", + "\n", + "Every tool needs three key components:\n", + "\n", + "**1. Input Schema (Pydantic Model)**\n", + "```python\n", + "class StoreMemoryInput(BaseModel):\n", + " text: str = Field(description=\"What to store\")\n", + " memory_type: str = Field(default=\"semantic\")\n", + " topics: List[str] = Field(default=[])\n", + "```\n", + "- Defines what parameters the tool accepts\n", + "- Provides descriptions that help the LLM understand usage\n", + "- Validates input types\n", + "\n", + "**2. Tool Function**\n", + "```python\n", + "@tool(\"store_memory\", args_schema=StoreMemoryInput)\n", + "async def store_memory(text: str, memory_type: str = \"semantic\", topics: List[str] = None) -> str:\n", + " # Implementation\n", + " return \"Success message\"\n", + "```\n", + "- The actual function that performs the action\n", + "- Must return a string (the LLM reads this result)\n", + "- Can be sync or async\n", + "\n", + "**3. Docstring (Critical!)**\n", + "```python\n", + "\"\"\"\n", + "Store important information in long-term memory.\n", + "\n", + "Use this tool when:\n", + "- User shares preferences, goals, or constraints\n", + "- Important facts emerge during conversation\n", + "\n", + "Examples:\n", + "- \"User prefers online courses\"\n", + "- \"User is CS major interested in AI\"\n", + "\"\"\"\n", + "```\n", + "- The LLM reads this to decide when to use the tool\n", + "- Should include clear use cases and examples\n", + "- More detailed = better tool selection\n", + "\n", + "### **Best Practices for Tool Design**\n", + "\n", + "#### **1. Clear, Descriptive Names**\n", + "```python\n", + "โœ… Good: store_memory, search_courses, get_user_profile\n", + "โŒ Bad: do_thing, process, handle_data\n", + "```\n", + "\n", + "#### **2. Detailed Descriptions**\n", + "```python\n", + "โœ… Good: \"Store important user preferences and facts in long-term memory for future conversations\"\n", + "โŒ Bad: \"Stores data\"\n", + "```\n", + "\n", + "#### **3. Specific Use Cases in Docstring**\n", + "```python\n", + "โœ… Good:\n", + "\"\"\"\n", + "Use this tool when:\n", + "- User explicitly shares preferences\n", + "- Important facts emerge that should persist\n", + "- Information will be useful for future recommendations\n", + "\"\"\"\n", + "\n", + "โŒ Bad:\n", + "\"\"\"\n", + "Stores information.\n", + "\"\"\"\n", + "```\n", + "\n", + "#### **4. Return Meaningful Results**\n", + "```python\n", + "โœ… Good: return f\"Stored: {text} with topics {topics}\"\n", + "โŒ Bad: return \"Done\"\n", + "```\n", + "The LLM uses the return value to understand what happened and craft its response.\n", + "\n", + "#### **5. Handle Errors Gracefully**\n", + "```python\n", + "โœ… Good:\n", + "try:\n", + " result = await memory_client.create_long_term_memory([record])\n", + " return f\"Successfully stored: {text}\"\n", + "except Exception as e:\n", + " return f\"Could not store memory: {str(e)}\"\n", + "```\n", + "Always return a string explaining what went wrong.\n", + "\n", + "#### **6. Keep Tools Focused**\n", + "```python\n", + "โœ… Good: Separate tools for store_memory, search_memories, retrieve_memories\n", + "โŒ Bad: One generic memory_operation(action, data) tool\n", + "```\n", + "Focused tools are easier for LLMs to select correctly.\n", + "\n", + "### **Common Tool Patterns**\n", + "\n", + "**Information Retrieval:**\n", + "- Search databases\n", + "- Query APIs\n", + "- Fetch user data\n", + "\n", + "**Data Storage:**\n", + "- Save preferences\n", + "- Store conversation facts\n", + "- Update user profiles\n", + "\n", + "**External Actions:**\n", + "- Send emails\n", + "- Create calendar events\n", + "- Make API calls\n", + "\n", + "**Computation:**\n", + "- Calculate values\n", + "- Process data\n", + "- Generate reports\n", + "\n", + "---\n", + "\n", + "### **Step 2: Build the `store_memory` Tool**\n", + "\n", + "Now let's build our first memory tool following these best practices.\n" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "id": "retrieve_memories_tool", + "metadata": { + "execution": { + "iopub.execute_input": "2025-11-01T00:27:43.780190Z", + "iopub.status.busy": "2025-11-01T00:27:43.780108Z", + "iopub.status.idle": "2025-11-01T00:27:43.876809Z", + "shell.execute_reply": "2025-11-01T00:27:43.876383Z" + } + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "๐Ÿง  Store Memory Test: Stored: User prefers online courses for testing\n" + ] + } + ], + "source": [ + "from langchain_core.tools import tool\n", + "from pydantic import BaseModel, Field\n", + "from typing import List, Optional\n", + "\n", + "class StoreMemoryInput(BaseModel):\n", + " \"\"\"Input schema for storing memories.\"\"\"\n", + " text: str = Field(\n", + " description=\"The information to store. Should be clear, specific, and important for future conversations.\"\n", + " )\n", + " memory_type: str = Field(\n", + " default=\"semantic\",\n", + " description=\"Type of memory: 'semantic' for facts/preferences, 'episodic' for events/experiences\"\n", + " )\n", + " topics: List[str] = Field(\n", + " default=[],\n", + " description=\"List of topics/tags for this memory (e.g., ['preferences', 'courses', 'career'])\"\n", + " )\n", + "\n", + "@tool(\"store_memory\", args_schema=StoreMemoryInput)\n", + "async def store_memory(text: str, memory_type: str = \"semantic\", topics: List[str] = None) -> str:\n", + " \"\"\"\n", + " Store important information in long-term memory.\n", + " \n", + " Use this tool when:\n", + " - User shares preferences, goals, or constraints\n", + " - Important facts emerge during conversation\n", + " - Information should persist across sessions\n", + " - Context that will be useful for future recommendations\n", + " \n", + " Examples:\n", + " - \"User prefers online courses due to work schedule\"\n", + " - \"User is Computer Science major interested in AI\"\n", + " - \"User completed CS101 with grade A\"\n", + " \n", + " Returns: Confirmation that memory was stored\n", + " \"\"\"\n", + " try:\n", + " # Create memory record\n", + " memory_record = ClientMemoryRecord(\n", + " text=text,\n", + " memory_type=memory_type,\n", + " topics=topics or [],\n", + " user_id=test_user_id\n", + " )\n", + " \n", + " # Store in long-term memory\n", + " await memory_client.create_long_term_memory([memory_record])\n", + " \n", + " return f\"Stored: {text}\"\n", + " except Exception as e:\n", + " return f\"Error storing memory: {str(e)}\"\n", + "\n", + "# Test the tool\n", + "test_result = await store_memory.ainvoke({\n", + " \"text\": \"User prefers online courses for testing\",\n", + " \"memory_type\": \"semantic\",\n", + " \"topics\": [\"preferences\", \"test\"]\n", + "})\n", + "print(f\"๐Ÿง  Store Memory Test: {test_result}\")" + ] + }, + { + "cell_type": "markdown", + "id": "memory_tools_demo", + "metadata": {}, + "source": [ + "### **Step 3: Build the `search_memories` Tool**\n", + "\n", + "This tool allows the LLM to search its long-term memory for relevant information.\n" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "id": "llm_memory_demo", + "metadata": { + "execution": { + "iopub.execute_input": "2025-11-01T00:27:43.878136Z", + "iopub.status.busy": "2025-11-01T00:27:43.878066Z", + "iopub.status.idle": "2025-11-01T00:27:44.123430Z", + "shell.execute_reply": "2025-11-01T00:27:44.122639Z" + } + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "๐Ÿ” Search Memories Test: - User prefers online courses for testing\n", + "- User is a Computer Science major interested in AI and machine learning. Prefers online courses due to part-time work.\n" + ] + } + ], + "source": [ + "class SearchMemoriesInput(BaseModel):\n", + " \"\"\"Input schema for searching memories.\"\"\"\n", + " query: str = Field(\n", + " description=\"Search query to find relevant memories. Use keywords related to what you need to know.\"\n", + " )\n", + " limit: int = Field(\n", + " default=5,\n", + " description=\"Maximum number of memories to return. Default is 5.\"\n", + " )\n", + "\n", + "@tool(\"search_memories\", args_schema=SearchMemoriesInput)\n", + "async def search_memories(query: str, limit: int = 5) -> str:\n", + " \"\"\"\n", + " Search long-term memory for relevant information.\n", + " \n", + " Use this tool when:\n", + " - Need context about user's preferences or history\n", + " - User asks about past conversations\n", + " - Building personalized responses\n", + " - Need to recall what you know about the user\n", + " \n", + " Examples:\n", + " - query=\"course preferences\" โ†’ finds preferred course types\n", + " - query=\"completed courses\" โ†’ finds courses user has taken\n", + " - query=\"career goals\" โ†’ finds user's career interests\n", + " \n", + " Returns: Relevant memories or \"No memories found\"\n", + " \"\"\"\n", + " try:\n", + " # Search long-term memory\n", + " results = await memory_client.search_long_term_memory(\n", + " text=query,\n", + " user_id=UserId(eq=test_user_id),\n", + " limit=limit\n", + " )\n", + "\n", + " if not results or not results.memories:\n", + " return \"No memories found matching your query.\"\n", + "\n", + " # Format results\n", + " memory_texts = []\n", + " for memory in results.memories:\n", + " memory_texts.append(f\"- {memory.text}\")\n", + "\n", + " return \"\\n\".join(memory_texts)\n", + " except Exception as e:\n", + " return f\"Error searching memories: {str(e)}\"\n", + "\n", + "# Test the tool\n", + "test_result = await search_memories.ainvoke({\n", + " \"query\": \"preferences\",\n", + " \"limit\": 5\n", + "})\n", + "print(f\"๐Ÿ” Search Memories Test: {test_result}\")" + ] + }, + { + "cell_type": "markdown", + "id": "langgraph_intro", + "metadata": {}, + "source": [ + "### **Step 4: Build the `retrieve_memories` Tool**\n", + "\n", + "This tool allows the LLM to retrieve specific stored facts.\n" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "id": "passive_memory", + "metadata": { + "execution": { + "iopub.execute_input": "2025-11-01T00:27:44.125246Z", + "iopub.status.busy": "2025-11-01T00:27:44.125103Z", + "iopub.status.idle": "2025-11-01T00:27:44.331240Z", + "shell.execute_reply": "2025-11-01T00:27:44.330413Z" + } + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "๐Ÿ“‹ Retrieve Memories Test: [preferences, test] User prefers online courses for testing\n", + "[preferences, academic, career] User is a Computer Science major interested in AI and machine learning. Prefers online courses due to part-time work.\n" + ] + } + ], + "source": [ + "class RetrieveMemoriesInput(BaseModel):\n", + " \"\"\"Input schema for retrieving specific memories.\"\"\"\n", + " topics: List[str] = Field(\n", + " description=\"List of specific topics to retrieve (e.g., ['GPA', 'requirements', 'graduation'])\"\n", + " )\n", + " limit: int = Field(\n", + " default=3,\n", + " description=\"Maximum number of memories to return. Default is 3.\"\n", + " )\n", + "\n", + "@tool(\"retrieve_memories\", args_schema=RetrieveMemoriesInput)\n", + "async def retrieve_memories(topics: List[str], limit: int = 3) -> str:\n", + " \"\"\"\n", + " Retrieve specific stored facts by topic.\n", + " \n", + " Use this tool when:\n", + " - Need to recall exact details from past conversations\n", + " - User references something specific they mentioned before\n", + " - Verifying stored information\n", + " - Looking for facts about specific topics\n", + " \n", + " Examples:\n", + " - topics=[\"GPA\", \"requirements\"] โ†’ finds GPA-related memories\n", + " - topics=[\"completed\", \"courses\"] โ†’ finds completed course records\n", + " - topics=[\"career\", \"goals\"] โ†’ finds career-related memories\n", + " \n", + " Returns: Specific memories matching the topics\n", + " \"\"\"\n", + " try:\n", + " # Search for memories with specific topics\n", + " query = \" \".join(topics)\n", + " results = await memory_client.search_long_term_memory(\n", + " text=query,\n", + " user_id=UserId(eq=test_user_id),\n", + " limit=limit\n", + " )\n", + "\n", + " if not results or not results.memories:\n", + " return f\"No memories found for topics: {', '.join(topics)}\"\n", + "\n", + " # Format results with topics\n", + " memory_texts = []\n", + " for memory in results.memories:\n", + " topics_str = \", \".join(memory.topics) if memory.topics else \"general\"\n", + " memory_texts.append(f\"[{topics_str}] {memory.text}\")\n", + "\n", + " return \"\\n\".join(memory_texts)\n", + " except Exception as e:\n", + " return f\"Error retrieving memories: {str(e)}\"\n", + "\n", + "# Test the tool\n", + "test_result = await retrieve_memories.ainvoke({\n", + " \"topics\": [\"preferences\", \"test\"],\n", + " \"limit\": 3\n", + "})\n", + "print(f\"๐Ÿ“‹ Retrieve Memories Test: {test_result}\")" + ] + }, + { + "cell_type": "markdown", + "id": "active_memory", + "metadata": {}, + "source": [ + "### **Step 5: Test Memory Tools with LLM**\n", + "\n", + "Now let's see how an LLM uses these memory tools.\n" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "id": "when_to_use", + "metadata": { + "execution": { + "iopub.execute_input": "2025-11-01T00:27:44.333737Z", + "iopub.status.busy": "2025-11-01T00:27:44.333538Z", + "iopub.status.idle": "2025-11-01T00:27:47.222368Z", + "shell.execute_reply": "2025-11-01T00:27:47.221631Z" + } + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "๐Ÿค– LLM Response:\n", + " Tool calls: 1\n", + " Tool 1: store_memory\n", + " Args: {'text': 'User is a Computer Science major interested in AI and machine learning. Prefers online courses due to part-time work.', 'memory_type': 'semantic', 'topics': ['preferences', 'academic', 'career']}\n", + "\n", + "๐Ÿ’ฌ Response: \n", + "\n", + "๐Ÿ“ Note: The response is empty because the LLM decided to call a tool instead of\n", + " generating text. This is expected behavior! The LLM is saying:\n", + " 'I need to store this information first, then I'll respond.'\n", + "\n", + " To get the final response, we would need to:\n", + " 1. Execute the tool call (store_memory)\n", + " 2. Send the tool result back to the LLM\n", + " 3. Get the LLM's final text response\n", + "\n", + " This multi-step process is exactly why we need LangGraph! ๐Ÿ‘‡\n" + ] + } + ], + "source": [ + "from langchain_openai import ChatOpenAI\n", + "from langchain_core.messages import HumanMessage, SystemMessage\n", + "\n", + "# Initialize LLM with memory tools\n", + "llm = ChatOpenAI(model=os.getenv(\"OPENAI_MODEL\", \"gpt-4o\"), temperature=0)\n", + "memory_tools = [store_memory, search_memories, retrieve_memories]\n", + "llm_with_tools = llm.bind_tools(memory_tools)\n", + "\n", + "# System message for memory-aware agent\n", + "system_prompt = \"\"\"\n", + "You are a Redis University course advisor with memory tools.\n", + "\n", + "IMPORTANT: Use your memory tools strategically:\n", + "- When users share preferences, goals, or important facts โ†’ use store_memory\n", + "- When you need context about the user โ†’ use search_memories\n", + "- When users reference specific past information โ†’ use retrieve_memories\n", + "\n", + "Always explain what you're doing with memory to help users understand.\n", + "\"\"\"\n", + "\n", + "# Test conversation\n", + "messages = [\n", + " SystemMessage(content=system_prompt),\n", + " HumanMessage(content=\"Hi! I'm a Computer Science major interested in AI and machine learning. I prefer online courses because I work part-time.\")\n", + "]\n", + "\n", + "response = llm_with_tools.invoke(messages)\n", + "print(\"๐Ÿค– LLM Response:\")\n", + "print(f\" Tool calls: {len(response.tool_calls) if response.tool_calls else 0}\")\n", + "if response.tool_calls:\n", + " for i, tool_call in enumerate(response.tool_calls):\n", + " print(f\" Tool {i+1}: {tool_call['name']}\")\n", + " print(f\" Args: {tool_call['args']}\")\n", + "print(f\"\\n๐Ÿ’ฌ Response: {response.content}\")\n", + "\n", + "# Explain the empty response\n", + "if response.tool_calls and not response.content:\n", + " print(\"\\n๐Ÿ“ Note: The response is empty because the LLM decided to call a tool instead of\")\n", + " print(\" generating text. This is expected behavior! The LLM is saying:\")\n", + " print(\" 'I need to store this information first, then I'll respond.'\")\n", + " print(\"\\n To get the final response, we would need to:\")\n", + " print(\" 1. Execute the tool call (store_memory)\")\n", + " print(\" 2. Send the tool result back to the LLM\")\n", + " print(\" 3. Get the LLM's final text response\")\n", + " print(\"\\n This multi-step process is exactly why we need LangGraph! ๐Ÿ‘‡\")" + ] + }, + { + "cell_type": "markdown", + "id": "ab98556b-21bd-4578-8f8f-f316e8fe31f4", + "metadata": {}, + "source": [ + "---\n", + "\n", + "## ๐Ÿ”„ Complete Tool Execution Loop Example\n", + "\n", + "Let's manually complete the tool execution loop to see the full workflow. This will help you understand what LangGraph automates.\n" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "id": "90a7df9ffdf5bc", + "metadata": { + "execution": { + "iopub.execute_input": "2025-11-01T00:27:47.224544Z", + "iopub.status.busy": "2025-11-01T00:27:47.224342Z", + "iopub.status.idle": "2025-11-01T00:27:49.676939Z", + "shell.execute_reply": "2025-11-01T00:27:49.676143Z" + } + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "================================================================================\n", + "COMPLETE TOOL EXECUTION LOOP - Manual Implementation\n", + "================================================================================\n", + "\n", + "๐Ÿ‘ค USER INPUT:\n", + "Hi! I'm a Computer Science major interested in AI and machine learning. I prefer online courses because I work part-time.\n", + "\n", + "================================================================================\n", + "STEP 1: LLM Analysis\n", + "================================================================================\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "โœ… LLM decided to call: store_memory\n", + " Arguments: {'text': 'User is a Computer Science major interested in AI and machine learning. Prefers online courses due to part-time work.', 'memory_type': 'semantic', 'topics': ['preferences', 'academic', 'career']}\n", + "\n", + "================================================================================\n", + "STEP 2: Tool Execution\n", + "================================================================================\n", + "โœ… Tool executed successfully\n", + " Result: Stored: User is a Computer Science major interested in AI and machine learning. Prefers online courses due to part-time work.\n", + "\n", + "================================================================================\n", + "STEP 3: LLM Generates Final Response\n", + "================================================================================\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "โœ… Final response generated\n", + "\n", + "๐Ÿค– AGENT RESPONSE:\n", + "Great! I've noted that you're a Computer Science major interested in AI and machine learning, and you prefer online courses because you work part-time. If you have any specific questions or need recommendations, feel free to ask!\n", + "\n", + "================================================================================\n", + "STEP 4: Verify Memory Storage\n", + "================================================================================\n", + "โœ… Memory verification:\n", + "- User prefers online courses for testing\n", + "- User is a Computer Science major interested in AI and machine learning. Prefers online courses due to part-time work.\n", + "\n", + "================================================================================\n", + "COMPLETE! This is what LangGraph automates for you.\n", + "================================================================================\n" + ] + } + ], + "source": [ + "from langchain_core.messages import ToolMessage\n", + "\n", + "print(\"=\" * 80)\n", + "print(\"COMPLETE TOOL EXECUTION LOOP - Manual Implementation\")\n", + "print(\"=\" * 80)\n", + "\n", + "# Step 1: User input\n", + "user_message = \"Hi! I'm a Computer Science major interested in AI and machine learning. I prefer online courses because I work part-time.\"\n", + "print(f\"\\n๐Ÿ‘ค USER INPUT:\\n{user_message}\")\n", + "\n", + "# Step 2: LLM decides to use tool\n", + "messages = [\n", + " SystemMessage(content=system_prompt),\n", + " HumanMessage(content=user_message)\n", + "]\n", + "\n", + "print(\"\\n\" + \"=\" * 80)\n", + "print(\"STEP 1: LLM Analysis\")\n", + "print(\"=\" * 80)\n", + "response_1 = llm_with_tools.invoke(messages)\n", + "print(f\"โœ… LLM decided to call: {response_1.tool_calls[0]['name']}\")\n", + "print(f\" Arguments: {response_1.tool_calls[0]['args']}\")\n", + "\n", + "# Step 3: Execute the tool\n", + "print(\"\\n\" + \"=\" * 80)\n", + "print(\"STEP 2: Tool Execution\")\n", + "print(\"=\" * 80)\n", + "tool_call = response_1.tool_calls[0]\n", + "tool_result = await store_memory.ainvoke(tool_call['args'])\n", + "print(f\"โœ… Tool executed successfully\")\n", + "print(f\" Result: {tool_result}\")\n", + "\n", + "# Step 4: Send tool result back to LLM\n", + "print(\"\\n\" + \"=\" * 80)\n", + "print(\"STEP 3: LLM Generates Final Response\")\n", + "print(\"=\" * 80)\n", + "messages.append(response_1) # Add the tool call message\n", + "messages.append(ToolMessage(content=tool_result, tool_call_id=tool_call['id'])) # Add tool result\n", + "\n", + "response_2 = llm_with_tools.invoke(messages)\n", + "print(f\"โœ… Final response generated\")\n", + "print(f\"\\n๐Ÿค– AGENT RESPONSE:\\n{response_2.content}\")\n", + "\n", + "# Step 5: Verify memory was stored\n", + "print(\"\\n\" + \"=\" * 80)\n", + "print(\"STEP 4: Verify Memory Storage\")\n", + "print(\"=\" * 80)\n", + "search_result = await search_memories.ainvoke({\"query\": \"preferences\", \"limit\": 3})\n", + "print(f\"โœ… Memory verification:\")\n", + "print(f\"{search_result}\")\n", + "\n", + "print(\"\\n\" + \"=\" * 80)\n", + "print(\"COMPLETE! This is what LangGraph automates for you.\")\n", + "print(\"=\" * 80)" + ] + }, + { + "cell_type": "markdown", + "id": "cf13debf42a9b4b7", + "metadata": {}, + "source": [ + "### **Key Takeaways from Manual Loop**\n", + "\n", + "**What we just did manually:**\n", + "\n", + "1. โœ… **Sent user input to LLM** โ†’ Got tool call decision\n", + "2. โœ… **Executed the tool** โ†’ Got result\n", + "3. โœ… **Sent result back to LLM** โ†’ Got final response\n", + "4. โœ… **Verified the action** โ†’ Confirmed memory stored\n", + "\n", + "**Why this is tedious:**\n", + "- ๐Ÿ”ด Multiple manual steps\n", + "- ๐Ÿ”ด Need to track message history\n", + "- ๐Ÿ”ด Handle tool call IDs\n", + "- ๐Ÿ”ด Manage state between calls\n", + "- ๐Ÿ”ด Complex error handling\n", + "\n", + "**What LangGraph does:**\n", + "- โœ… Automates all these steps\n", + "- โœ… Manages state automatically\n", + "- โœ… Handles tool execution loop\n", + "- โœ… Provides clear workflow visualization\n", + "- โœ… Makes it easy to add more tools and logic\n", + "\n", + "**Now you understand why we need LangGraph!** ๐Ÿ‘‡\n" + ] + }, + { + "cell_type": "markdown", + "id": "a295f410390e0ecd", + "metadata": {}, + "source": [ + "---\n", + "\n", + "## ๐ŸŽจ Introduction to LangGraph\n", + "\n", + "Memory tools are powerful, but managing complex workflows manually gets complicated. **LangGraph** automates this process.\n", + "\n", + "### **What is LangGraph?**\n", + "\n", + "**LangGraph** is a framework for building stateful, multi-step agent workflows using graphs.\n", + "\n", + "### **Core Concepts**\n", + "\n", + "**1. State** - Shared data structure passed between nodes\n", + "- Contains messages, context, and intermediate results\n", + "- Automatically managed and updated\n", + "\n", + "**2. Nodes** - Functions that process state\n", + "- Examples: call LLM, execute tools, format responses\n", + "- Each node receives state and returns updated state\n", + "\n", + "**3. Edges** - Connections between nodes\n", + "- Can be conditional (if/else logic)\n", + "- Determine workflow flow\n", + "\n", + "**4. Graph** - Complete workflow from start to end\n", + "- Orchestrates the entire agent process\n", + "\n", + "### **Simple Memory-Enhanced Graph**\n", + "\n", + "```\n", + "START\n", + " โ†“\n", + "[Load Memory] โ† Get user context\n", + " โ†“\n", + "[Agent Node] โ† Decides what to do\n", + " โ†“\n", + " โ”œโ”€โ†’ [Memory Tools] โ† store/search/retrieve\n", + " โ”‚ โ†“\n", + " โ”‚ [Agent Node] โ† Processes memory results\n", + " โ”‚\n", + " โ””โ”€โ†’ [Respond] โ† Generates final response\n", + " โ†“\n", + "[Save Memory] โ† Update conversation history\n", + " โ†“\n", + " END\n", + "```\n", + "\n", + "### **Why LangGraph for Memory Tools?**\n", + "\n", + "**Without LangGraph:**\n", + "- Manual tool execution and state management\n", + "- Complex conditional logic\n", + "- Hard to visualize workflow\n", + "- Difficult to add new steps\n", + "\n", + "**With LangGraph:**\n", + "- โœ… Automatic tool execution\n", + "- โœ… Clear workflow visualization\n", + "- โœ… Easy to modify and extend\n", + "- โœ… Built-in state management\n", + "- โœ… Memory persistence across turns\n", + "\n", + "---\n", + "\n", + "## ๐Ÿ”„ Passive vs Active Memory: The Key Difference\n", + "\n", + "Let's compare the two approaches to understand why memory tools matter.\n" + ] + }, + { + "cell_type": "markdown", + "id": "d2a99956e8ff8d58", + "metadata": {}, + "source": [ + "### **Passive Memory (Section 3)**\n", + "\n", + "**How it works:**\n", + "- System automatically saves all conversations\n", + "- System automatically extracts facts\n", + "- LLM receives memory but can't control it\n", + "\n", + "**Example conversation:**\n", + "```\n", + "User: \"I'm interested in machine learning\"\n", + "Agent: \"Great! Here are some ML courses...\" \n", + "System: [Automatically saves: \"User interested in ML\"]\n", + "```\n", + "\n", + "**Pros:**\n", + "- โœ… Simple to implement\n", + "- โœ… No additional LLM calls\n", + "- โœ… Consistent memory storage\n", + "\n", + "**Cons:**\n", + "- โŒ LLM can't decide what's important\n", + "- โŒ No strategic memory management\n", + "- โŒ Can't search memories on demand\n" + ] + }, + { + "cell_type": "markdown", + "id": "9768498f-4e95-4217-ad20-93fea45524a2", + "metadata": {}, + "source": [ + "### **Active Memory (This Section)**\n", + "\n", + "**How it works:**\n", + "- LLM decides what to store\n", + "- LLM decides when to search memories\n", + "- LLM controls its own context construction\n", + "\n", + "**Example conversation:**\n", + "```\n", + "User: \"I'm interested in machine learning\"\n", + "Agent: [Thinks: \"This is important, I should remember this\"]\n", + "Agent: [Calls: store_memory(\"User interested in machine learning\")]\n", + "Agent: \"I'll remember your interest in ML. Here are some courses...\"\n", + "```\n", + "\n", + "**Pros:**\n", + "- โœ… Strategic memory management\n", + "- โœ… LLM controls what's important\n", + "- โœ… On-demand memory search\n", + "- โœ… Better context engineering\n", + "\n", + "**Cons:**\n", + "- โŒ More complex to implement\n", + "- โŒ Additional LLM calls (cost)\n", + "- โŒ Requires careful tool design\n" + ] + }, + { + "cell_type": "markdown", + "id": "a9e2011d-1696-4eb9-9bec-d1bbba9ef392", + "metadata": {}, + "source": [ + "### **When to Use Each Approach**\n", + "\n", + "**Use Passive Memory when:**\n", + "- Simple applications with predictable patterns\n", + "- Cost is a primary concern\n", + "- Memory needs are straightforward\n", + "- You want automatic memory management\n", + "\n", + "**Use Active Memory when:**\n", + "- Complex applications requiring strategic memory\n", + "- LLM needs to control its own context\n", + "- Dynamic memory management is important\n", + "- Building sophisticated agents\n", + "\n", + "**๐Ÿ’ก Key Insight:** Active memory tools enable **intelligent context engineering** where the LLM becomes an active participant in managing its own knowledge.\n", + "\n", + "---\n", + "\n", + "## ๐ŸŽฏ Summary and Next Steps\n", + "\n", + "### **What You've Learned**\n", + "\n", + "**Memory Tools for Context Engineering:**\n", + "- `store_memory` - Save important information strategically\n", + "- `search_memories` - Find relevant context on demand\n", + "- `retrieve_memories` - Get specific facts by topic\n", + "\n", + "**LangGraph Fundamentals:**\n", + "- State management for complex workflows\n", + "- Nodes and edges for agent orchestration\n", + "- Automatic tool execution and state updates\n", + "\n", + "**Active vs Passive Memory:**\n", + "- Passive: System controls memory automatically\n", + "- Active: LLM controls its own memory strategically\n", + "\n", + "### **Context Engineering Connection**\n", + "\n", + "Memory tools transform the **four context types**:\n", + "\n", + "| Context Type | Section 3 (Passive) | Section 4 (Active) |\n", + "|-------------|---------------------|--------------------|\n", + "| **System** | Static prompt | Static prompt |\n", + "| **User** | Auto-extracted profile | LLM builds profile with `store_memory` |\n", + "| **Conversation** | Auto-saved history | LLM manages with `search_memories` |\n", + "| **Retrieved** | RAG search | Memory-enhanced RAG queries |\n", + "\n", + "### **Next: Building a Complete Agent**\n", + "\n", + "In **Notebook 2**, you'll combine everything:\n", + "- โœ… Memory tools (this notebook)\n", + "- โœ… Course search tools\n", + "- โœ… LangGraph orchestration\n", + "- โœ… Redis Agent Memory Server\n", + "\n", + "**Result:** A complete Redis University Course Advisor Agent that actively manages its own memory and context.\n", + "\n", + "---\n", + "\n", + "## ๐Ÿ“š Additional Resources\n", + "\n", + "### **Memory Tools & Context Engineering**\n", + "- [Redis Agent Memory Server](https://github.com/redis/agent-memory-server) - Memory persistence\n", + "- [Agent Memory Client](https://pypi.org/project/agent-memory-client/) - Python client library\n", + "\n", + "### **LangGraph & Tool Calling**\n", + "- [LangGraph Documentation](https://langchain-ai.github.io/langgraph/) - Official docs\n", + "- [LangChain Tools](https://python.langchain.com/docs/modules/tools/) - Tool creation guide\n", + "\n", + "### **Context Engineering Concepts**\n", + "- Review **Section 1** for context types fundamentals (System, User, Conversation, Retrieved)\n", + "- Review **Section 2** for RAG foundations (semantic search, vector embeddings, retrieval)\n", + "- Review **Section 3** for passive memory patterns (working memory, long-term memory, automatic extraction)\n", + "- Continue to **Section 4 Notebook 2** for complete agent implementation with all concepts integrated\n", + "\n", + "### **Academic Papers**\n", + "- [ReAct: Synergizing Reasoning and Acting in Language Models](https://arxiv.org/abs/2210.03629) - Reasoning + acting pattern\n", + "- [Toolformer: Language Models Can Teach Themselves to Use Tools](https://arxiv.org/abs/2302.04761) - Tool learning\n", + "- [MemGPT: Towards LLMs as Operating Systems](https://arxiv.org/abs/2310.08560) - Memory management for LLMs\n", + "- [Retrieval-Augmented Generation](https://arxiv.org/abs/2005.11401) - RAG foundations\n", + "\n", + "### **Agent Design Patterns**\n", + "- [Anthropic's Guide to Building Effective Agents](https://www.anthropic.com/research/building-effective-agents) - Best practices\n", + "- [LangChain Agent Patterns](https://python.langchain.com/docs/modules/agents/) - Different agent architectures\n", + "- [OpenAI Function Calling Guide](https://platform.openai.com/docs/guides/function-calling) - Tool calling fundamentals\n", + "\n", + "### **Production Resources**\n", + "- [LangChain Production Guide](https://python.langchain.com/docs/guides/productionization/) - Deploying agents\n", + "- [Redis Best Practices](https://redis.io/docs/manual/patterns/) - Production Redis patterns\n", + "- [Agent Memory Client](https://pypi.org/project/agent-memory-client/) - Python client for Agent Memory Server" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.12.6" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/python-recipes/context-engineering/notebooks/section-4-tool-selection/02_redis_university_course_advisor_agent.ipynb b/python-recipes/context-engineering/notebooks/section-4-tool-selection/02_redis_university_course_advisor_agent.ipynb new file mode 100644 index 00000000..e7af585c --- /dev/null +++ b/python-recipes/context-engineering/notebooks/section-4-tool-selection/02_redis_university_course_advisor_agent.ipynb @@ -0,0 +1,2177 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "header", + "metadata": {}, + "source": [ + "![Redis](https://redis.io/wp-content/uploads/2024/04/Logotype.svg?auto=webp&quality=85,75&width=120)\n", + "\n", + "# ๐Ÿค– Section 4: Building a Redis University Course Advisor Agent\n", + "\n", + "**โฑ๏ธ Estimated Time:** 60-75 minutes\n", + "\n", + "## ๐ŸŽฏ Learning Objectives\n", + "\n", + "By the end of this notebook, you will:\n", + "\n", + "1. **Build** a complete LangGraph agent with tools and memory\n", + "2. **Implement** exactly 3 tools: memory storage, memory search, and course search\n", + "3. **Integrate** Redis Agent Memory Server for dual-memory architecture\n", + "4. **Visualize** the agent's decision-making graph\n", + "5. **Demonstrate** the progression from RAG (Section 3) to full agent\n", + "\n", + "---\n", + "\n", + "## ๐Ÿ”— Bridge from Previous Sections\n", + "\n", + "### **Your Learning Journey:**\n", + "\n", + "**Section 1:** Context Types\n", + "- System, User, Conversation, Retrieved context\n", + "- How context shapes LLM responses\n", + "\n", + "**Section 2:** RAG Foundations\n", + "- Semantic search with vector embeddings\n", + "- Retrieving and presenting information\n", + "- Single-step retrieval โ†’ generation\n", + "\n", + "**Section 3:** Memory Architecture\n", + "- Working memory (conversation continuity)\n", + "- Long-term memory (persistent knowledge)\n", + "- Memory-enhanced RAG systems\n", + "\n", + "**Section 4 (Notebook 1):** Tool-Calling Basics\n", + "- What tools are and how LLMs use them\n", + "- LangGraph fundamentals (nodes, edges, state)\n", + "- Simple tool-calling examples\n", + "- Agents vs RAG comparison\n", + "\n", + "### **What We're Building Now:**\n", + "\n", + "**A Full Agent** that combines everything:\n", + "- โœ… **Tools** for actions (search courses, manage memory)\n", + "- โœ… **Memory** for personalization (working + long-term)\n", + "- โœ… **RAG** for course information (semantic search)\n", + "- โœ… **LangGraph** for orchestration (state management)\n", + "\n", + "**๐Ÿ’ก Key Insight:** This agent is RAG + Memory + Tools + Decision-Making\n", + "\n", + "---\n", + "\n", + "## ๐Ÿ“Š Agent Architecture\n", + "\n", + "### **The Complete Flow:**\n", + "\n", + "```\n", + "User Query\n", + " โ†“\n", + "[Load Working Memory] โ† Conversation history\n", + " โ†“\n", + "[Agent Node] โ† Decides what to do\n", + " โ†“\n", + " โ”œโ”€โ†’ [search_courses] โ† Find relevant courses\n", + " โ”œโ”€โ†’ [search_memories] โ† Recall user preferences\n", + " โ”œโ”€โ†’ [store_memory] โ† Save important facts\n", + " โ†“\n", + "[Agent Node] โ† Processes tool results\n", + " โ†“\n", + "[Generate Response] โ† Final answer\n", + " โ†“\n", + "[Save Working Memory] โ† Update conversation\n", + "```\n", + "\n", + "### **Our 3 Tools:**\n", + "\n", + "1. **`search_courses`** - Semantic search over course catalog\n", + " - When: Student asks about courses, topics, or recommendations\n", + " - Example: \"What machine learning courses are available?\"\n", + "\n", + "2. **`search_memories`** - Search long-term memory for user facts\n", + " - When: Need to recall preferences, goals, or past interactions\n", + " - Example: \"What courses did I say I was interested in?\"\n", + "\n", + "3. **`store_memory`** - Save important information to long-term memory\n", + " - When: User shares preferences, goals, or important facts\n", + " - Example: \"I'm interested in AI and want to work at a startup\"\n", + "\n", + "### **Memory Architecture:**\n", + "\n", + "| Memory Type | Purpose | Managed By | Lifespan |\n", + "|------------|---------|------------|----------|\n", + "| **Working Memory** | Conversation history | Agent Memory Server | Session |\n", + "| **Long-term Memory** | User preferences, facts | Agent Memory Server | Persistent |\n", + "| **Graph State** | Current execution state | LangGraph | Single turn |\n", + "\n", + "---\n", + "\n", + "## ๐Ÿ“ฆ Setup and Environment\n", + "\n", + "### โš ๏ธ **CRITICAL: Prerequisites Required**\n", + "\n", + "**This notebook requires ALL services to be running. If any service is down, the agent will not work.**\n", + "\n", + "**Required Services:**\n", + "1. **Redis** - Vector storage and caching (port 6379)\n", + "2. **Agent Memory Server** - Memory management (port 8088)\n", + "3. **OpenAI API** - LLM functionality\n", + "\n", + "**๐Ÿš€ Quick Setup (Run this first!):**\n", + "```bash\n", + "# Navigate to notebooks_v2 directory\n", + "cd ../../\n", + "\n", + "# Check if services are running\n", + "./check_setup.sh\n", + "\n", + "# If services are down, run setup\n", + "./setup_memory_server.sh\n", + "```\n", + "\n", + "**๐Ÿ“– Need help?** See `../SETUP_GUIDE.md` for detailed setup instructions.\n", + "\n", + "**๐Ÿ” Manual Check:**\n", + "- Redis: `redis-cli ping` should return `PONG`\n", + "- Memory Server: `curl http://localhost:8088/v1/health` should return `{\"status\":\"ok\"}`\n", + "- Environment: Create `.env` file in `reference-agent/` with your `OPENAI_API_KEY`\n" + ] + }, + { + "cell_type": "markdown", + "id": "install-packages", + "metadata": {}, + "source": [ + "---\n" + ] + }, + { + "cell_type": "markdown", + "id": "install", + "metadata": {}, + "source": [ + "### Automated Setup Check\n", + "\n", + "Let's run the setup script to ensure all services are running properly.\n" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "id": "import-libraries", + "metadata": { + "execution": { + "iopub.execute_input": "2025-10-31T23:57:51.825255Z", + "iopub.status.busy": "2025-10-31T23:57:51.825073Z", + "iopub.status.idle": "2025-10-31T23:57:52.103012Z", + "shell.execute_reply": "2025-10-31T23:57:52.102484Z" + } + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Running automated setup check...\n", + "\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "๐Ÿ”ง Agent Memory Server Setup\n", + "===========================\n", + "๐Ÿ“Š Checking Redis...\n", + "โœ… Redis is running\n", + "๐Ÿ“Š Checking Agent Memory Server...\n", + "๐Ÿ” Agent Memory Server container exists. Checking health...\n", + "โœ… Agent Memory Server is running and healthy\n", + "โœ… No Redis connection issues detected\n", + "\n", + "โœ… Setup Complete!\n", + "=================\n", + "๐Ÿ“Š Services Status:\n", + " โ€ข Redis: Running on port 6379\n", + " โ€ข Agent Memory Server: Running on port 8088\n", + "\n", + "๐ŸŽฏ You can now run the notebooks!\n", + "\n", + "\n", + "โœ… All services are ready!\n" + ] + } + ], + "source": [ + "# Run the setup script to ensure Redis and Agent Memory Server are running\n", + "import subprocess\n", + "import sys\n", + "from pathlib import Path\n", + "\n", + "# Path to setup script\n", + "setup_script = Path(\"../../reference-agent/setup_agent_memory_server.py\")\n", + "\n", + "if setup_script.exists():\n", + " print(\"Running automated setup check...\\n\")\n", + " result = subprocess.run(\n", + " [sys.executable, str(setup_script)],\n", + " capture_output=True,\n", + " text=True\n", + " )\n", + " print(result.stdout)\n", + " if result.returncode != 0:\n", + " print(\"โš ๏ธ Setup check failed. Please review the output above.\")\n", + " print(result.stderr)\n", + " else:\n", + " print(\"\\nโœ… All services are ready!\")\n", + "else:\n", + " print(\"โš ๏ธ Setup script not found. Please ensure services are running manually.\")\n" + ] + }, + { + "cell_type": "markdown", + "id": "imports", + "metadata": {}, + "source": [ + "---\n" + ] + }, + { + "cell_type": "markdown", + "id": "load-env", + "metadata": {}, + "source": [ + "### Install Dependencies\n", + "\n", + "If you haven't already installed the reference-agent package, uncomment and run the following:\n" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "env-setup", + "metadata": { + "execution": { + "iopub.execute_input": "2025-10-31T23:57:52.104763Z", + "iopub.status.busy": "2025-10-31T23:57:52.104657Z", + "iopub.status.idle": "2025-10-31T23:57:52.106517Z", + "shell.execute_reply": "2025-10-31T23:57:52.106037Z" + } + }, + "outputs": [], + "source": [ + "# Uncomment to install reference-agent package\n", + "# %pip install -q -e ../../reference-agent\n", + "\n", + "# Uncomment to install agent-memory-client\n", + "# %pip install -q agent-memory-client\n" + ] + }, + { + "cell_type": "markdown", + "id": "check-services", + "metadata": {}, + "source": [ + "### Import Libraries\n" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "service-check", + "metadata": { + "execution": { + "iopub.execute_input": "2025-10-31T23:57:52.107702Z", + "iopub.status.busy": "2025-10-31T23:57:52.107645Z", + "iopub.status.idle": "2025-10-31T23:57:53.822487Z", + "shell.execute_reply": "2025-10-31T23:57:53.821994Z" + } + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "โœ… Libraries imported successfully!\n" + ] + } + ], + "source": [ + "# Core libraries\n", + "import os\n", + "import sys\n", + "import json\n", + "from typing import List, Dict, Any, Optional, Annotated\n", + "from datetime import datetime\n", + "from dotenv import load_dotenv\n", + "\n", + "# LangChain and LangGraph\n", + "from langchain_core.messages import BaseMessage, HumanMessage, AIMessage, SystemMessage\n", + "from langchain_core.tools import tool\n", + "from langchain_openai import ChatOpenAI, OpenAIEmbeddings\n", + "from langgraph.graph import StateGraph, END\n", + "from langgraph.graph.message import add_messages\n", + "from langgraph.prebuilt import ToolNode\n", + "from pydantic import BaseModel, Field\n", + "\n", + "# Redis and Agent Memory\n", + "from agent_memory_client import MemoryAPIClient, MemoryClientConfig\n", + "from agent_memory_client.models import WorkingMemory, MemoryMessage\n", + "\n", + "# Add reference-agent to path for course utilities\n", + "sys.path.insert(0, os.path.abspath(\"../../reference-agent\"))\n", + "from redis_context_course.course_manager import CourseManager\n", + "from redis_context_course.models import StudentProfile, DifficultyLevel, CourseFormat\n", + "\n", + "print(\"โœ… Libraries imported successfully!\")" + ] + }, + { + "cell_type": "markdown", + "id": "init-components", + "metadata": {}, + "source": [ + "### Load Environment Variables\n" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "init-course-manager", + "metadata": { + "execution": { + "iopub.execute_input": "2025-10-31T23:57:53.823677Z", + "iopub.status.busy": "2025-10-31T23:57:53.823553Z", + "iopub.status.idle": "2025-10-31T23:57:53.826253Z", + "shell.execute_reply": "2025-10-31T23:57:53.825901Z" + } + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "โœ… Environment configured successfully!\n", + " OpenAI API Key: ********************wTMA\n", + " Redis URL: redis://localhost:6379\n", + " Agent Memory URL: http://localhost:8088\n" + ] + } + ], + "source": [ + "# Load environment variables\n", + "load_dotenv(\"../../reference-agent/.env\")\n", + "\n", + "# Get configuration\n", + "OPENAI_API_KEY = os.getenv(\"OPENAI_API_KEY\")\n", + "REDIS_URL = os.getenv(\"REDIS_URL\", \"redis://localhost:6379\")\n", + "AGENT_MEMORY_URL = os.getenv(\"AGENT_MEMORY_URL\", \"http://localhost:8088\")\n", + "\n", + "# Verify OpenAI API key\n", + "if not OPENAI_API_KEY:\n", + " raise ValueError(\"\"\"\n", + " โš ๏ธ OPENAI_API_KEY not found!\n", + "\n", + " Please create a .env file in the reference-agent directory:\n", + " 1. cd ../../reference-agent\n", + " 2. cp .env.example .env\n", + " 3. Edit .env and add your OpenAI API key\n", + " \"\"\")\n", + "\n", + "print(\"โœ… Environment configured successfully!\")\n", + "print(f\" OpenAI API Key: {'*' * 20}{OPENAI_API_KEY[-4:]}\")\n", + "print(f\" Redis URL: {REDIS_URL}\")\n", + "print(f\" Agent Memory URL: {AGENT_MEMORY_URL}\")" + ] + }, + { + "cell_type": "markdown", + "id": "course-manager", + "metadata": {}, + "source": [ + "### Check Required Services\n", + "\n", + "Let's verify that Redis and the Agent Memory Server are running.\n" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "init-llm", + "metadata": { + "execution": { + "iopub.execute_input": "2025-10-31T23:57:53.827385Z", + "iopub.status.busy": "2025-10-31T23:57:53.827318Z", + "iopub.status.idle": "2025-10-31T23:57:53.839615Z", + "shell.execute_reply": "2025-10-31T23:57:53.839213Z" + } + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "โœ… Redis is running\n", + "โœ… Agent Memory Server is running\n", + "\n", + "โœ… All services are ready!\n" + ] + } + ], + "source": [ + "import redis\n", + "import requests\n", + "\n", + "# Check Redis\n", + "try:\n", + " redis_client = redis.from_url(REDIS_URL)\n", + " redis_client.ping()\n", + " print(\"โœ… Redis is running\")\n", + " REDIS_AVAILABLE = True\n", + "except Exception as e:\n", + " print(f\"โŒ Redis is not available: {e}\")\n", + " print(\" Please start Redis using Docker:\")\n", + " print(\" docker run -d -p 6379:6379 redis/redis-stack:latest\")\n", + " REDIS_AVAILABLE = False\n", + "\n", + "# Check Agent Memory Server\n", + "try:\n", + " response = requests.get(f\"{AGENT_MEMORY_URL}/v1/health\", timeout=2)\n", + " if response.status_code == 200:\n", + " print(\"โœ… Agent Memory Server is running\")\n", + " MEMORY_SERVER_AVAILABLE = True\n", + " else:\n", + " print(f\"โš ๏ธ Agent Memory Server returned status {response.status_code}\")\n", + " MEMORY_SERVER_AVAILABLE = False\n", + "except Exception as e:\n", + " print(f\"โŒ Agent Memory Server is not available: {e}\")\n", + " print(\" Please start the Agent Memory Server:\")\n", + " print(\" cd ../../reference-agent && python setup_agent_memory_server.py\")\n", + " MEMORY_SERVER_AVAILABLE = False\n", + "\n", + "if not (REDIS_AVAILABLE and MEMORY_SERVER_AVAILABLE):\n", + " print(\"\\nโš ๏ธ Some services are not available. Please start them before continuing.\")\n", + "else:\n", + " print(\"\\nโœ… All services are ready!\")" + ] + }, + { + "cell_type": "markdown", + "id": "llm-init", + "metadata": {}, + "source": [ + "---\n", + "\n", + "## ๐Ÿ”ง Initialize Components\n", + "\n", + "Now let's initialize the components we'll use to build our agent.\n" + ] + }, + { + "cell_type": "markdown", + "id": "init-memory", + "metadata": {}, + "source": [ + "### Initialize Course Manager\n", + "\n", + "The `CourseManager` handles course storage and semantic search, just like in Section 2.\n" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "id": "memory-init", + "metadata": { + "execution": { + "iopub.execute_input": "2025-10-31T23:57:53.840793Z", + "iopub.status.busy": "2025-10-31T23:57:53.840727Z", + "iopub.status.idle": "2025-10-31T23:57:53.933415Z", + "shell.execute_reply": "2025-10-31T23:57:53.933012Z" + } + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "19:57:53 redisvl.index.index INFO Index already exists, not overwriting.\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "โœ… Course Manager initialized\n", + " Ready to search and retrieve courses\n" + ] + } + ], + "source": [ + "# Initialize Course Manager\n", + "course_manager = CourseManager()\n", + "\n", + "print(\"โœ… Course Manager initialized\")\n", + "print(\" Ready to search and retrieve courses\")" + ] + }, + { + "cell_type": "markdown", + "id": "student-profile", + "metadata": {}, + "source": [ + "### Initialize LLM\n", + "\n", + "We'll use GPT-4o with temperature=0.0 for consistent, deterministic responses.\n" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "id": "create-student", + "metadata": { + "execution": { + "iopub.execute_input": "2025-10-31T23:57:53.934684Z", + "iopub.status.busy": "2025-10-31T23:57:53.934605Z", + "iopub.status.idle": "2025-10-31T23:57:53.943986Z", + "shell.execute_reply": "2025-10-31T23:57:53.943698Z" + } + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "โœ… LLM initialized\n", + " Model: gpt-4o\n", + " Temperature: 0.0 (deterministic)\n" + ] + } + ], + "source": [ + "# Initialize LLM\n", + "llm = ChatOpenAI(model=\"gpt-4o\", temperature=0.0)\n", + "\n", + "print(\"โœ… LLM initialized\")\n", + "print(\" Model: gpt-4o\")\n", + "print(\" Temperature: 0.0 (deterministic)\")" + ] + }, + { + "cell_type": "markdown", + "id": "tools-section", + "metadata": {}, + "source": [ + "### Initialize Memory Client\n", + "\n", + "The memory client handles both working memory (conversation history) and long-term memory (persistent facts).\n" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "id": "tool-1", + "metadata": { + "execution": { + "iopub.execute_input": "2025-10-31T23:57:53.945184Z", + "iopub.status.busy": "2025-10-31T23:57:53.945115Z", + "iopub.status.idle": "2025-10-31T23:57:53.950020Z", + "shell.execute_reply": "2025-10-31T23:57:53.949643Z" + } + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "โœ… Memory Client initialized\n", + " Base URL: http://localhost:8088\n", + " Namespace: redis_university\n", + " Ready for working memory and long-term memory operations\n" + ] + } + ], + "source": [ + "# Initialize Memory Client\n", + "config = MemoryClientConfig(\n", + " base_url=AGENT_MEMORY_URL,\n", + " default_namespace=\"redis_university\"\n", + ")\n", + "memory_client = MemoryAPIClient(config=config)\n", + "\n", + "print(\"โœ… Memory Client initialized\")\n", + "print(f\" Base URL: {config.base_url}\")\n", + "print(f\" Namespace: {config.default_namespace}\")\n", + "print(\" Ready for working memory and long-term memory operations\")" + ] + }, + { + "cell_type": "markdown", + "id": "search-courses-tool", + "metadata": {}, + "source": [ + "### Create Sample Student Profile\n", + "\n", + "We'll create a sample student to use throughout our demos.\n" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "id": "tool-2", + "metadata": { + "execution": { + "iopub.execute_input": "2025-10-31T23:57:53.951077Z", + "iopub.status.busy": "2025-10-31T23:57:53.951016Z", + "iopub.status.idle": "2025-10-31T23:57:53.953293Z", + "shell.execute_reply": "2025-10-31T23:57:53.952950Z" + } + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "โœ… Student profile created\n", + " Name: Sarah Chen\n", + " Student ID: student_sarah_001\n", + " Session ID: session_student_sarah_001_20251031_195753\n", + " Major: Computer Science\n", + " Interests: machine learning, data science, algorithms\n" + ] + } + ], + "source": [ + "# Create sample student profile\n", + "STUDENT_ID = \"student_sarah_001\"\n", + "SESSION_ID = f\"session_{STUDENT_ID}_{datetime.now().strftime('%Y%m%d_%H%M%S')}\"\n", + "\n", + "sarah = StudentProfile(\n", + " name=\"Sarah Chen\",\n", + " email=\"sarah.chen@university.edu\",\n", + " major=\"Computer Science\",\n", + " year=2,\n", + " interests=[\"machine learning\", \"data science\", \"algorithms\"],\n", + " completed_courses=[\"Introduction to Programming\", \"Data Structures\"],\n", + " current_courses=[\"Linear Algebra\"],\n", + " preferred_format=CourseFormat.ONLINE,\n", + " preferred_difficulty=DifficultyLevel.INTERMEDIATE\n", + ")\n", + "\n", + "print(\"โœ… Student profile created\")\n", + "print(f\" Name: {sarah.name}\")\n", + "print(f\" Student ID: {STUDENT_ID}\")\n", + "print(f\" Session ID: {SESSION_ID}\")\n", + "print(f\" Major: {sarah.major}\")\n", + "print(f\" Interests: {', '.join(sarah.interests)}\")" + ] + }, + { + "cell_type": "markdown", + "id": "search-memories-tool", + "metadata": {}, + "source": [ + "---\n", + "\n", + "## ๐Ÿ› ๏ธ Part 1: Define the Agent's Tools\n", + "\n", + "Let's build our 3 tools step by step. Each tool will have:\n", + "- Clear input schema (what parameters it accepts)\n", + "- Descriptive docstring (tells the LLM when to use it)\n", + "- Implementation (the actual logic)\n", + "\n", + "**Remember:** The LLM only sees the tool name, description, and parametersโ€”not the implementation!\n" + ] + }, + { + "cell_type": "markdown", + "id": "tool-3", + "metadata": {}, + "source": [ + "### Tool 1: `search_courses`\n", + "\n", + "This tool searches the course catalog using semantic search.\n" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "id": "store-memory-tool", + "metadata": { + "execution": { + "iopub.execute_input": "2025-10-31T23:57:53.954314Z", + "iopub.status.busy": "2025-10-31T23:57:53.954256Z", + "iopub.status.idle": "2025-10-31T23:57:53.957045Z", + "shell.execute_reply": "2025-10-31T23:57:53.956679Z" + } + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "โœ… Tool 1 defined: search_courses\n", + " Purpose: Search course catalog with semantic search\n", + " Parameters: query (str), limit (int)\n" + ] + } + ], + "source": [ + "# Define input schema\n", + "class SearchCoursesInput(BaseModel):\n", + " \"\"\"Input schema for searching courses.\"\"\"\n", + " query: str = Field(\n", + " description=\"Natural language search query. Can be topics (e.g., 'machine learning'), \"\n", + " \"characteristics (e.g., 'online courses'), or general questions \"\n", + " \"(e.g., 'beginner programming courses')\"\n", + " )\n", + " limit: int = Field(\n", + " default=5,\n", + " description=\"Maximum number of results to return. Default is 5. \"\n", + " \"Use 3 for quick answers, 10 for comprehensive results.\"\n", + " )\n", + "\n", + "# Define the tool\n", + "@tool(\"search_courses\", args_schema=SearchCoursesInput)\n", + "async def search_courses(query: str, limit: int = 5) -> str:\n", + " \"\"\"\n", + " Search for courses using semantic search based on topics, descriptions, or characteristics.\n", + "\n", + " Use this tool when students ask about:\n", + " - Topics or subjects: \"machine learning courses\", \"database courses\"\n", + " - Course characteristics: \"online courses\", \"beginner courses\", \"3-credit courses\"\n", + " - General exploration: \"what courses are available in AI?\"\n", + "\n", + " The search uses semantic matching, so natural language queries work well.\n", + "\n", + " Returns: Formatted list of matching courses with details.\n", + " \"\"\"\n", + " results = await course_manager.search_courses(query, limit=limit)\n", + "\n", + " if not results:\n", + " return \"No courses found matching your query.\"\n", + "\n", + " output = []\n", + " for course in results:\n", + " output.append(\n", + " f\"{course.course_code}: {course.title}\\n\"\n", + " f\" Credits: {course.credits} | {course.format.value} | {course.difficulty_level.value}\\n\"\n", + " f\" {course.description[:150]}...\"\n", + " )\n", + "\n", + " return \"\\n\\n\".join(output)\n", + "\n", + "print(\"โœ… Tool 1 defined: search_courses\")\n", + "print(\" Purpose: Search course catalog with semantic search\")\n", + "print(\" Parameters: query (str), limit (int)\")" + ] + }, + { + "cell_type": "markdown", + "id": "tools-summary", + "metadata": {}, + "source": [ + "### Tool 2: `search_memories`\n", + "\n", + "This tool searches long-term memory for user preferences and facts.\n" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "id": "list-tools", + "metadata": { + "execution": { + "iopub.execute_input": "2025-10-31T23:57:53.958090Z", + "iopub.status.busy": "2025-10-31T23:57:53.958029Z", + "iopub.status.idle": "2025-10-31T23:57:53.960900Z", + "shell.execute_reply": "2025-10-31T23:57:53.960462Z" + } + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "โœ… Tool 2 defined: search_memories\n", + " Purpose: Search long-term memory for user facts\n", + " Parameters: query (str), limit (int)\n" + ] + } + ], + "source": [ + "# Define input schema\n", + "class SearchMemoriesInput(BaseModel):\n", + " \"\"\"Input schema for searching memories.\"\"\"\n", + " query: str = Field(\n", + " description=\"Natural language query to search for in user's long-term memory. \"\n", + " \"Examples: 'career goals', 'course preferences', 'learning style'\"\n", + " )\n", + " limit: int = Field(\n", + " default=5,\n", + " description=\"Maximum number of memories to return. Default is 5.\"\n", + " )\n", + "\n", + "# Define the tool\n", + "@tool(\"search_memories\", args_schema=SearchMemoriesInput)\n", + "async def search_memories(query: str, limit: int = 5) -> str:\n", + " \"\"\"\n", + " Search the user's long-term memory for relevant facts, preferences, and past interactions.\n", + "\n", + " Use this tool when you need to:\n", + " - Recall user preferences: \"What format does the user prefer?\"\n", + " - Remember past goals: \"What career path is the user interested in?\"\n", + " - Find previous interactions: \"What courses did we discuss before?\"\n", + " - Personalize recommendations: \"What are the user's interests?\"\n", + "\n", + " The search uses semantic matching to find relevant memories.\n", + "\n", + " Returns: List of relevant memories with content and metadata.\n", + " \"\"\"\n", + " try:\n", + " from agent_memory_client.filters import UserId\n", + "\n", + " # Search long-term memory\n", + " results = await memory_client.search_long_term_memory(\n", + " text=query,\n", + " user_id=UserId(eq=STUDENT_ID),\n", + " limit=limit\n", + " )\n", + "\n", + " if not results.memories or len(results.memories) == 0:\n", + " return \"No relevant memories found.\"\n", + "\n", + " output = []\n", + " for i, memory in enumerate(results.memories, 1):\n", + " output.append(f\"{i}. {memory.text}\")\n", + " if memory.topics:\n", + " output.append(f\" Topics: {', '.join(memory.topics)}\")\n", + "\n", + " return \"\\n\".join(output)\n", + " except Exception as e:\n", + " return f\"Error searching memories: {str(e)}\"\n", + "\n", + "print(\"โœ… Tool 2 defined: search_memories\")\n", + "print(\" Purpose: Search long-term memory for user facts\")\n", + "print(\" Parameters: query (str), limit (int)\")" + ] + }, + { + "cell_type": "markdown", + "id": "agent-state", + "metadata": {}, + "source": [ + "### Tool 3: `store_memory`\n", + "\n", + "This tool saves important information to long-term memory.\n" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "id": "define-state", + "metadata": { + "execution": { + "iopub.execute_input": "2025-10-31T23:57:53.962062Z", + "iopub.status.busy": "2025-10-31T23:57:53.961995Z", + "iopub.status.idle": "2025-10-31T23:57:53.964832Z", + "shell.execute_reply": "2025-10-31T23:57:53.964534Z" + } + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "โœ… Tool 3 defined: store_memory\n", + " Purpose: Save important facts to long-term memory\n", + " Parameters: text (str), memory_type (str), topics (List[str])\n" + ] + } + ], + "source": [ + "# Define input schema\n", + "class StoreMemoryInput(BaseModel):\n", + " \"\"\"Input schema for storing memories.\"\"\"\n", + " text: str = Field(\n", + " description=\"The information to store. Should be a clear, factual statement. \"\n", + " \"Examples: 'User prefers online courses', 'User's career goal is AI research'\"\n", + " )\n", + " memory_type: str = Field(\n", + " default=\"semantic\",\n", + " description=\"Type of memory: 'semantic' (facts/preferences), 'episodic' (events/interactions). \"\n", + " \"Default is 'semantic'.\"\n", + " )\n", + " topics: List[str] = Field(\n", + " default=[],\n", + " description=\"Optional tags to categorize the memory, such as ['preferences', 'courses']\"\n", + " )\n", + "\n", + "# Define the tool\n", + "@tool(\"store_memory\", args_schema=StoreMemoryInput)\n", + "async def store_memory(text: str, memory_type: str = \"semantic\", topics: List[str] = []) -> str:\n", + " \"\"\"\n", + " Store important information to the user's long-term memory.\n", + "\n", + " Use this tool when the user shares:\n", + " - Preferences: \"I prefer online courses\", \"I like hands-on projects\"\n", + " - Goals: \"I want to work in AI\", \"I'm preparing for grad school\"\n", + " - Important facts: \"I have a part-time job\", \"I'm interested in startups\"\n", + " - Constraints: \"I can only take 2 courses per semester\"\n", + "\n", + " Do NOT store:\n", + " - Temporary information (use conversation context instead)\n", + " - Course details (already in course catalog)\n", + " - General questions\n", + "\n", + " Returns: Confirmation message.\n", + " \"\"\"\n", + " try:\n", + " from agent_memory_client.models import ClientMemoryRecord\n", + "\n", + " # Create memory record\n", + " memory = ClientMemoryRecord(\n", + " text=text,\n", + " user_id=STUDENT_ID,\n", + " memory_type=memory_type,\n", + " topics=topics or []\n", + " )\n", + "\n", + " # Store in long-term memory\n", + " await memory_client.create_long_term_memory([memory])\n", + " return f\"โœ… Stored to long-term memory: {text}\"\n", + " except Exception as e:\n", + " return f\"Error storing memory: {str(e)}\"\n", + "\n", + "print(\"โœ… Tool 3 defined: store_memory\")\n", + "print(\" Purpose: Save important facts to long-term memory\")\n", + "print(\" Parameters: text (str), memory_type (str), topics (List[str])\")" + ] + }, + { + "cell_type": "markdown", + "id": "graph-nodes", + "metadata": {}, + "source": [ + "### Tools Summary\n", + "\n", + "Let's review our 3 tools:\n" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "id": "load-memory-node", + "metadata": { + "execution": { + "iopub.execute_input": "2025-10-31T23:57:53.966158Z", + "iopub.status.busy": "2025-10-31T23:57:53.966078Z", + "iopub.status.idle": "2025-10-31T23:57:53.968399Z", + "shell.execute_reply": "2025-10-31T23:57:53.968046Z" + } + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "================================================================================\n", + "๐Ÿ› ๏ธ AGENT TOOLS SUMMARY\n", + "================================================================================\n", + "\n", + "1. search_courses\n", + " Description: Search for courses using semantic search based on topics, descriptions, or characteristics\n", + " Parameters: query, limit\n", + "\n", + "2. search_memories\n", + " Description: Search the user's long-term memory for relevant facts, preferences, and past interactions\n", + " Parameters: query, limit\n", + "\n", + "3. store_memory\n", + " Description: Store important information to the user's long-term memory\n", + " Parameters: text, memory_type, topics\n", + "\n", + "================================================================================\n" + ] + } + ], + "source": [ + "# Collect all tools\n", + "tools = [search_courses, search_memories, store_memory]\n", + "\n", + "print(\"=\" * 80)\n", + "print(\"๐Ÿ› ๏ธ AGENT TOOLS SUMMARY\")\n", + "print(\"=\" * 80)\n", + "for i, tool in enumerate(tools, 1):\n", + " print(f\"\\n{i}. {tool.name}\")\n", + " print(f\" Description: {tool.description.split('.')[0]}\")\n", + " print(f\" Parameters: {', '.join(tool.args_schema.model_fields.keys())}\")\n", + "print(\"\\n\" + \"=\" * 80)" + ] + }, + { + "cell_type": "markdown", + "id": "agent-node", + "metadata": {}, + "source": "\n" + }, + { + "cell_type": "markdown", + "id": "save-memory-node", + "metadata": { + "execution": { + "iopub.execute_input": "2025-10-31T23:57:53.969443Z", + "iopub.status.busy": "2025-10-31T23:57:53.969382Z", + "iopub.status.idle": "2025-10-31T23:57:53.971457Z", + "shell.execute_reply": "2025-10-31T23:57:53.971109Z" + } + }, + "source": [ + "## ๐Ÿง  Memory Extraction in This Agent\n", + "\n", + "Understanding how this agent creates and manages long-term memories.\n" + ] + }, + { + "cell_type": "markdown", + "id": "routing-logic", + "metadata": {}, + "source": [ + "### How This Agent Uses Memory\n", + "\n", + "Our agent has 3 tools, and 2 of them interact with memory:\n", + "\n", + "1. **`store_memory`** - Saves facts to long-term memory\n", + "2. **`search_memories`** - Retrieves facts from long-term memory\n", + "3. **`search_courses`** - Searches course catalog (not memory-related)\n", + "\n", + "**Question:** When the agent calls `store_memory`, how does the Agent Memory Server decide what to extract and how to structure it?\n", + "\n", + "**Answer:** Memory Extraction Strategies (covered in Section 3, Notebook 1)\n" + ] + }, + { + "cell_type": "markdown", + "id": "should-continue", + "metadata": { + "execution": { + "iopub.execute_input": "2025-10-31T23:57:53.972503Z", + "iopub.status.busy": "2025-10-31T23:57:53.972440Z", + "iopub.status.idle": "2025-10-31T23:57:53.974986Z", + "shell.execute_reply": "2025-10-31T23:57:53.974616Z" + } + }, + "source": [ + "### Current Configuration: Discrete Strategy (Default)\n", + "\n", + "**This agent uses the DISCRETE strategy** (default) because:\n", + "\n", + "โœ… **Individual facts are searchable**\n", + "- \"User's major is Computer Science\"\n", + "- \"User interested in machine learning\"\n", + "- \"User completed RU101\"\n", + "\n", + "โœ… **Facts are independently useful**\n", + "- Agent can search for specific facts\n", + "- Each fact has its own relevance score\n", + "- No need to parse summaries\n", + "\n", + "โœ… **Good for Q&A interactions**\n", + "- Student: \"What courses did I say I was interested in?\"\n", + "- Agent searches discrete facts: \"User interested in ML\", \"User interested in AI\"\n" + ] + }, + { + "cell_type": "markdown", + "id": "build-graph", + "metadata": { + "execution": { + "iopub.execute_input": "2025-10-31T23:57:53.975927Z", + "iopub.status.busy": "2025-10-31T23:57:53.975854Z", + "iopub.status.idle": "2025-10-31T23:57:53.977825Z", + "shell.execute_reply": "2025-10-31T23:57:53.977580Z" + } + }, + "source": [ + "### Example: Discrete Strategy in Action\n", + "\n", + "**Conversation:**\n", + "```\n", + "User: \"I'm a CS major interested in ML. I prefer online courses.\"\n", + "Agent: [Calls store_memory tool]\n", + "```\n", + "\n", + "**What Gets Stored (Discrete Strategy):**\n", + "```json\n", + "[\n", + " {\"text\": \"User's major is Computer Science\", \"type\": \"semantic\"},\n", + " {\"text\": \"User interested in machine learning\", \"type\": \"semantic\"},\n", + " {\"text\": \"User prefers online courses\", \"type\": \"semantic\"}\n", + "]\n", + "```\n", + "\n", + "**Later:**\n", + "```\n", + "User: \"What courses match my interests?\"\n", + "Agent: [Calls search_memories tool]\n", + " โ†’ Finds: \"User interested in machine learning\"\n", + " โ†’ Finds: \"User prefers online courses\"\n", + " [Calls search_courses with these preferences]\n", + "```\n" + ] + }, + { + "cell_type": "markdown", + "id": "construct-graph", + "metadata": { + "execution": { + "iopub.execute_input": "2025-10-31T23:57:53.978903Z", + "iopub.status.busy": "2025-10-31T23:57:53.978835Z", + "iopub.status.idle": "2025-10-31T23:57:53.981202Z", + "shell.execute_reply": "2025-10-31T23:57:53.980864Z" + } + }, + "source": [ + "### When Would Summary Strategy Be Better?\n", + "\n", + "**Summary strategy** would be beneficial for:\n", + "\n", + "**Scenario 1: Long Advising Sessions**\n", + "```\n", + "User has 30-minute conversation discussing:\n", + "- Academic goals\n", + "- Career aspirations\n", + "- Course preferences\n", + "- Schedule constraints\n", + "- Graduation timeline\n", + "```\n", + "\n", + "**Discrete Strategy:** Extracts 20+ individual facts\n", + "**Summary Strategy:** Creates 1-2 comprehensive summaries preserving context\n", + "\n", + "**Scenario 2: Session Notes**\n", + "```\n", + "Agent: \"Let me summarize our conversation today...\"\n", + "[Retrieves summary memory instead of reconstructing from discrete facts]\n", + "```\n" + ] + }, + { + "cell_type": "markdown", + "id": "visualize-graph", + "metadata": {}, + "source": [ + "### Configuration Example (Not Used in This Notebook)\n", + "\n", + "If you wanted to use summary strategy instead:\n", + "\n", + "```python\n", + "from agent_memory_client.models import MemoryStrategyConfig\n", + "\n", + "# Configure summary strategy\n", + "summary_strategy = MemoryStrategyConfig(\n", + " strategy=\"summary\",\n", + " config={\"max_summary_length\": 500}\n", + ")\n", + "\n", + "# Apply when creating working memory\n", + "await memory_client.set_working_memory(\n", + " session_id=session_id,\n", + " messages=messages,\n", + " long_term_memory_strategy=summary_strategy # โ† Use summary instead of discrete\n", + ")\n", + "```\n" + ] + }, + { + "cell_type": "markdown", + "id": "show-graph", + "metadata": { + "execution": { + "iopub.execute_input": "2025-10-31T23:57:53.982174Z", + "iopub.status.busy": "2025-10-31T23:57:53.982118Z", + "iopub.status.idle": "2025-10-31T23:57:53.983908Z", + "shell.execute_reply": "2025-10-31T23:57:53.983535Z" + } + }, + "source": [ + "### Why We Stick with Discrete (Default)\n", + "\n", + "For this course advisor agent:\n", + "- โœ… Questions are specific (\"What are prerequisites for RU301?\")\n", + "- โœ… Facts are independently useful\n", + "- โœ… Search works better with discrete facts\n", + "- โœ… No configuration needed (default behavior)\n", + "\n", + "**In production**, you might:\n", + "- Use **discrete** for most interactions (default)\n", + "- Use **summary** for end-of-session notes\n", + "- Use **preferences** during student onboarding\n", + "- Use **custom** for specialized academic domains\n" + ] + }, + { + "cell_type": "markdown", + "id": "demo-section", + "metadata": {}, + "source": [ + "### ๐Ÿ”— Connection to Section 3\n", + "\n", + "In **Section 3, Notebook 1**, we introduced memory extraction strategies conceptually.\n", + "\n", + "In **Section 3, Notebook 2**, we demonstrated the difference between discrete and summary strategies with hands-on examples.\n", + "\n", + "**Now in Section 4**, we see how a production agent uses the discrete strategy (default) for course advising.\n", + "\n", + "**Key Takeaway:** The Agent Memory Server's memory extraction strategies give you flexibility in HOW memories are created, but for most agent interactions (like this course advisor), the default discrete strategy works best.\n" + ] + }, + { + "cell_type": "markdown", + "id": "run-agent-helper", + "metadata": { + "execution": { + "iopub.execute_input": "2025-10-31T23:57:53.984807Z", + "iopub.status.busy": "2025-10-31T23:57:53.984751Z", + "iopub.status.idle": "2025-10-31T23:57:53.990038Z", + "shell.execute_reply": "2025-10-31T23:57:53.989670Z" + } + }, + "source": [ + "### ๐Ÿ“š Learn More\n", + "\n", + "- [Memory Extraction Strategies Documentation](https://redis.github.io/agent-memory-server/memory-extraction-strategies/)\n", + "- [Section 3, Notebook 1](../section-3-memory-systems-for-context-engineering/01_working_and_longterm_memory.ipynb) - Theory foundation\n", + "- [Section 3, Notebook 2](../section-3-memory-systems-for-context-engineering/02_combining_memory_with_retrieved_context.ipynb) - Hands-on comparison demo\n", + "\n", + "---\n", + "\n", + "## ๐ŸŽจ Part 2: Define the Agent State\n", + "\n", + "In LangGraph, **state** is the shared data structure that flows through the graph. Each node can read from and write to the state.\n", + "\n", + "### What Goes in State?\n", + "\n", + "- **messages**: Conversation history (automatically managed by LangGraph)\n", + "- **student_id**: Who we're helping\n", + "- **session_id**: Current conversation session\n", + "- **context**: Additional context (memories, preferences, etc.)\n", + "\n", + "**Note:** We use `Annotated[List[BaseMessage], add_messages]` for messages. The `add_messages` reducer automatically handles message deduplication and ordering.\n" + ] + }, + { + "cell_type": "code", + "id": "demo-1", + "metadata": {}, + "source": [ + "# Define the agent state\n", + "class AgentState(BaseModel):\n", + " \"\"\"State for the course advisor agent.\"\"\"\n", + " messages: Annotated[List[BaseMessage], add_messages]\n", + " student_id: str\n", + " session_id: str\n", + " context: Dict[str, Any] = {}\n", + "\n", + "print(\"โœ… Agent state defined\")\n", + "print(\" Fields: messages, student_id, session_id, context\")" + ], + "outputs": [], + "execution_count": null + }, + { + "cell_type": "markdown", + "id": "demo-search", + "metadata": { + "execution": { + "iopub.execute_input": "2025-10-31T23:57:53.991081Z", + "iopub.status.busy": "2025-10-31T23:57:53.991018Z", + "iopub.status.idle": "2025-10-31T23:57:54.095976Z", + "shell.execute_reply": "2025-10-31T23:57:54.095530Z" + } + }, + "source": [ + "---\n", + "\n", + "## ๐Ÿ”— Part 3: Build the Agent Graph\n", + "\n", + "Now we'll build the LangGraph workflow. Our graph will have:\n", + "\n", + "1. **load_memory** - Load working memory (conversation history)\n", + "2. **agent** - LLM decides what to do (call tools or respond)\n", + "3. **tools** - Execute tool calls\n", + "4. **save_memory** - Save updated conversation to working memory\n", + "\n", + "### Step 1: Define Node Functions\n", + "\n", + "Each node is a function that takes state and returns updated state.\n" + ] + }, + { + "cell_type": "code", + "id": "demo-2", + "metadata": {}, + "source": [ + "# Node 1: Load working memory\n", + "async def load_memory(state: AgentState) -> AgentState:\n", + " \"\"\"\n", + " Load conversation history from working memory.\n", + "\n", + " This gives the agent context about previous interactions in this session.\n", + " \"\"\"\n", + " try:\n", + " # Get or create working memory for this session\n", + " _, working_memory = await memory_client.get_or_create_working_memory(\n", + " session_id=state.session_id,\n", + " user_id=state.student_id,\n", + " model_name=\"gpt-4o\"\n", + " )\n", + "\n", + " if working_memory and working_memory.messages:\n", + " # Convert stored messages to LangChain message objects\n", + " loaded_messages = []\n", + " for msg in working_memory.messages:\n", + " if msg.role == 'user':\n", + " loaded_messages.append(HumanMessage(content=msg.content))\n", + " elif msg.role == 'assistant':\n", + " loaded_messages.append(AIMessage(content=msg.content))\n", + "\n", + " # Add loaded messages to state (prepend to current messages)\n", + " state.messages = loaded_messages + state.messages\n", + " state.context['memory_loaded'] = True\n", + " print(f\" Loaded {len(loaded_messages)} messages from working memory\")\n", + " else:\n", + " state.context['memory_loaded'] = False\n", + " print(\" No previous conversation found (new session)\")\n", + " except Exception as e:\n", + " print(f\" Warning: Could not load memory: {e}\")\n", + " state.context['memory_loaded'] = False\n", + "\n", + " return state\n", + "\n", + "print(\"โœ… Node 1 defined: load_memory\")\n", + "print(\" Purpose: Load conversation history from working memory\")" + ], + "outputs": [], + "execution_count": null + }, + { + "cell_type": "code", + "execution_count": 21, + "id": "demo-store", + "metadata": { + "execution": { + "iopub.execute_input": "2025-10-31T23:57:54.097563Z", + "iopub.status.busy": "2025-10-31T23:57:54.097461Z", + "iopub.status.idle": "2025-10-31T23:57:54.100763Z", + "shell.execute_reply": "2025-10-31T23:57:54.100208Z" + } + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "โœ… Helper function defined: run_agent\n" + ] + } + ], + "source": [ + "# Node 2: Agent (LLM with tools)\n", + "async def agent_node(state: AgentState) -> AgentState:\n", + " \"\"\"\n", + " The agent decides what to do: call tools or respond to the user.\n", + "\n", + " This is where the LLM reasoning happens.\n", + " \"\"\"\n", + " # Create system message with instructions\n", + " system_message = SystemMessage(content=\"\"\"\n", + "You are a helpful Redis University course advisor assistant.\n", + "\n", + "Your role:\n", + "- Help students find courses that match their interests and goals\n", + "- Remember student preferences and use them for personalized recommendations\n", + "- Store important information about students for future conversations\n", + "\n", + "Guidelines:\n", + "- Use search_courses to find relevant courses\n", + "- Use search_memories to recall student preferences and past interactions\n", + "- Use store_memory when students share important preferences, goals, or constraints\n", + "- Be conversational and helpful\n", + "- Provide specific course recommendations with details\n", + "\"\"\")\n", + "\n", + " # Bind tools to LLM\n", + " llm_with_tools = llm.bind_tools(tools)\n", + "\n", + " # Call LLM with system message + conversation history\n", + " messages = [system_message] + state.messages\n", + " response = await llm_with_tools.ainvoke(messages)\n", + "\n", + " # Add response to state\n", + " state.messages.append(response)\n", + "\n", + " return state\n", + "\n", + "print(\"โœ… Node 2 defined: agent_node\")\n", + "print(\" Purpose: LLM decides whether to call tools or respond\")" + ] + }, + { + "cell_type": "code", + "id": "demo-3", + "metadata": {}, + "source": [ + "# Node 3: Save working memory\n", + "async def save_memory(state: AgentState) -> AgentState:\n", + " \"\"\"\n", + " Save the updated conversation to working memory.\n", + "\n", + " This ensures continuity across conversation turns.\n", + " \"\"\"\n", + " try:\n", + " # Get or create working memory\n", + " _, working_memory = await memory_client.get_or_create_working_memory(\n", + " session_id=state.session_id,\n", + " user_id=state.student_id,\n", + " model_name=\"gpt-4o\"\n", + " )\n", + "\n", + " # Clear existing messages and add current conversation\n", + " working_memory.messages = []\n", + " for msg in state.messages:\n", + " if isinstance(msg, HumanMessage):\n", + " working_memory.messages.append(MemoryMessage(role='user', content=msg.content))\n", + " elif isinstance(msg, AIMessage):\n", + " # Only store text content, not tool calls\n", + " if msg.content:\n", + " working_memory.messages.append(MemoryMessage(role='assistant', content=msg.content))\n", + "\n", + " # Save to working memory\n", + " await memory_client.put_working_memory(\n", + " session_id=state.session_id,\n", + " memory=working_memory,\n", + " user_id=state.student_id,\n", + " model_name=\"gpt-4o\"\n", + " )\n", + "\n", + " print(f\" Saved {len(working_memory.messages)} messages to working memory\")\n", + " except Exception as e:\n", + " print(f\" Warning: Could not save memory: {e}\")\n", + "\n", + " return state\n", + "\n", + "print(\"โœ… Node 3 defined: save_memory\")\n", + "print(\" Purpose: Save conversation to working memory\")" + ], + "outputs": [], + "execution_count": null + }, + { + "cell_type": "markdown", + "id": "demo-recall", + "metadata": { + "execution": { + "iopub.execute_input": "2025-10-31T23:57:54.102049Z", + "iopub.status.busy": "2025-10-31T23:57:54.101962Z", + "iopub.status.idle": "2025-10-31T23:57:58.356458Z", + "shell.execute_reply": "2025-10-31T23:57:58.355667Z" + } + }, + "source": [ + "### Step 2: Define Routing Logic\n", + "\n", + "We need a function to decide: should we call tools or end the conversation?\n" + ] + }, + { + "cell_type": "code", + "id": "demo-4", + "metadata": {}, + "source": [ + "# Routing function\n", + "def should_continue(state: AgentState) -> str:\n", + " \"\"\"\n", + " Determine if we should continue to tools or end.\n", + "\n", + " If the last message has tool calls, route to tools.\n", + " Otherwise, we're done.\n", + " \"\"\"\n", + " last_message = state.messages[-1]\n", + "\n", + " # Check if there are tool calls\n", + " if hasattr(last_message, 'tool_calls') and last_message.tool_calls:\n", + " return \"tools\"\n", + " else:\n", + " return \"save_memory\"\n", + "\n", + "print(\"โœ… Routing logic defined: should_continue\")\n", + "print(\" Routes to 'tools' if LLM wants to call tools, otherwise to 'save_memory'\")" + ], + "outputs": [], + "execution_count": null + }, + { + "cell_type": "markdown", + "id": "demo-personalized", + "metadata": { + "execution": { + "iopub.execute_input": "2025-10-31T23:57:58.358447Z", + "iopub.status.busy": "2025-10-31T23:57:58.358312Z", + "iopub.status.idle": "2025-10-31T23:58:04.410189Z", + "shell.execute_reply": "2025-10-31T23:58:04.409512Z" + } + }, + "source": [ + "### Step 3: Build the Graph\n", + "\n", + "Now we assemble all the pieces into a LangGraph workflow.\n" + ] + }, + { + "cell_type": "code", + "id": "inspect-memory", + "metadata": {}, + "source": [ + "# Create the graph\n", + "workflow = StateGraph(AgentState)\n", + "\n", + "# Add nodes\n", + "workflow.add_node(\"load_memory\", load_memory)\n", + "workflow.add_node(\"agent\", agent_node)\n", + "workflow.add_node(\"tools\", ToolNode(tools))\n", + "workflow.add_node(\"save_memory\", save_memory)\n", + "\n", + "# Define edges\n", + "workflow.set_entry_point(\"load_memory\")\n", + "workflow.add_edge(\"load_memory\", \"agent\")\n", + "workflow.add_conditional_edges(\n", + " \"agent\",\n", + " should_continue,\n", + " {\n", + " \"tools\": \"tools\",\n", + " \"save_memory\": \"save_memory\"\n", + " }\n", + ")\n", + "workflow.add_edge(\"tools\", \"agent\") # After tools, go back to agent\n", + "workflow.add_edge(\"save_memory\", END)\n", + "\n", + "# Compile the graph\n", + "agent_graph = workflow.compile()\n", + "\n", + "print(\"โœ… Agent graph built and compiled!\")\n", + "print(\"\\n๐Ÿ“Š Graph structure:\")\n", + "print(\" START โ†’ load_memory โ†’ agent โ†’ [tools โ†’ agent]* โ†’ save_memory โ†’ END\")\n", + "print(\"\\n * The agent can call tools multiple times before responding\")" + ], + "outputs": [], + "execution_count": null + }, + { + "cell_type": "markdown", + "id": "check-memories", + "metadata": { + "execution": { + "iopub.execute_input": "2025-10-31T23:58:04.411898Z", + "iopub.status.busy": "2025-10-31T23:58:04.411768Z", + "iopub.status.idle": "2025-10-31T23:58:06.565467Z", + "shell.execute_reply": "2025-10-31T23:58:06.564738Z" + } + }, + "source": [ + "### Step 4: Visualize the Graph\n", + "\n", + "Let's see what our agent workflow looks like!\n" + ] + }, + { + "cell_type": "code", + "id": "comparison", + "metadata": {}, + "source": [ + "# Try to visualize the graph\n", + "try:\n", + " from IPython.display import Image, display\n", + "\n", + " # Generate graph visualization\n", + " graph_image = agent_graph.get_graph().draw_mermaid_png()\n", + " display(Image(graph_image))\n", + " print(\"\\nโœ… Graph visualization displayed above\")\n", + "except Exception as e:\n", + " print(f\"โš ๏ธ Could not display graph visualization: {e}\")\n", + " print(\"\\nGraph structure (text):\")\n", + " print(\"\"\"\n", + " โ”Œโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”\n", + " โ”‚ START โ”‚\n", + " โ””โ”€โ”€โ”€โ”€โ”€โ”€โ”ฌโ”€โ”€โ”€โ”€โ”€โ”€โ”˜\n", + " โ”‚\n", + " โ–ผ\n", + " โ”Œโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”\n", + " โ”‚ load_memory โ”‚\n", + " โ””โ”€โ”€โ”€โ”€โ”€โ”€โ”ฌโ”€โ”€โ”€โ”€โ”€โ”€โ”˜\n", + " โ”‚\n", + " โ–ผ\n", + " โ”Œโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”\n", + " โ”‚ agent โ”‚ โ—„โ”€โ”€โ”€โ”€โ”€โ”\n", + " โ””โ”€โ”€โ”€โ”€โ”€โ”€โ”ฌโ”€โ”€โ”€โ”€โ”€โ”€โ”˜ โ”‚\n", + " โ”‚ โ”‚\n", + " โ”Œโ”€โ”€โ”€โ”€โ”ดโ”€โ”€โ”€โ”€โ” โ”‚\n", + " โ”‚ โ”‚ โ”‚\n", + " โ–ผ โ–ผ โ”‚\n", + " [tools] [respond] โ”‚\n", + " โ”‚ โ”‚\n", + " โ””โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”˜\n", + " โ”‚\n", + " โ–ผ\n", + " โ”Œโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”\n", + " โ”‚ save_memory โ”‚\n", + " โ””โ”€โ”€โ”€โ”€โ”€โ”€โ”ฌโ”€โ”€โ”€โ”€โ”€โ”€โ”˜\n", + " โ”‚\n", + " โ–ผ\n", + " โ”Œโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”\n", + " โ”‚ END โ”‚\n", + " โ””โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”˜\n", + " \"\"\")" + ], + "outputs": [], + "execution_count": null + }, + { + "cell_type": "markdown", + "id": "architecture-recap", + "metadata": { + "execution": { + "iopub.execute_input": "2025-10-31T23:58:06.567416Z", + "iopub.status.busy": "2025-10-31T23:58:06.567279Z", + "iopub.status.idle": "2025-10-31T23:58:11.047325Z", + "shell.execute_reply": "2025-10-31T23:58:11.046775Z" + } + }, + "source": [ + "---\n", + "\n", + "## ๐ŸŽฌ Part 4: Demo the Agent\n", + "\n", + "Now let's see our agent in action! We'll have a conversation with the agent and watch it:\n", + "- Search for courses\n", + "- Store memories about preferences\n", + "- Recall information from previous interactions\n", + "\n", + "### Helper Function: Run Agent\n" + ] + }, + { + "cell_type": "code", + "id": "key-takeaways", + "metadata": {}, + "source": [ + "async def run_agent(user_message: str, verbose: bool = True) -> str:\n", + " \"\"\"\n", + " Run the agent with a user message.\n", + "\n", + " Args:\n", + " user_message: The user's input\n", + " verbose: Whether to print detailed execution info\n", + "\n", + " Returns:\n", + " The agent's response\n", + " \"\"\"\n", + " if verbose:\n", + " print(\"=\" * 80)\n", + " print(f\"๐Ÿ‘ค USER: {user_message}\")\n", + " print(\"=\" * 80)\n", + "\n", + " # Create initial state\n", + " initial_state = AgentState(\n", + " messages=[HumanMessage(content=user_message)],\n", + " student_id=STUDENT_ID,\n", + " session_id=SESSION_ID,\n", + " context={}\n", + " )\n", + "\n", + " # Run the graph\n", + " if verbose:\n", + " print(\"\\n๐Ÿค– AGENT EXECUTION:\")\n", + "\n", + " final_state = await agent_graph.ainvoke(initial_state)\n", + "\n", + " # Extract the final response\n", + " final_message = final_state[\"messages\"][-1]\n", + " response = final_message.content if hasattr(final_message, 'content') else str(final_message)\n", + "\n", + " if verbose:\n", + " print(\"\\n\" + \"=\" * 80)\n", + " print(f\"๐Ÿค– ASSISTANT: {response}\")\n", + " print(\"=\" * 80)\n", + "\n", + " return response\n", + "\n", + "print(\"โœ… Helper function defined: run_agent\")" + ], + "outputs": [], + "execution_count": null + }, + { + "cell_type": "markdown", + "id": "next-steps", + "metadata": { + "execution": { + "iopub.execute_input": "2025-10-31T23:58:11.049386Z", + "iopub.status.busy": "2025-10-31T23:58:11.049237Z", + "iopub.status.idle": "2025-10-31T23:58:11.464715Z", + "shell.execute_reply": "2025-10-31T23:58:11.464089Z" + } + }, + "source": [ + "### Demo 1: Search Courses\n", + "\n", + "Let's ask the agent to find machine learning courses.\n" + ] + }, + { + "cell_type": "code", + "id": "conclusion", + "metadata": {}, + "source": [ + "# Demo 1: Search for courses\n", + "response1 = await run_agent(\n", + " \"What machine learning courses are available? I'm interested in intermediate level courses.\"\n", + ")" + ], + "outputs": [], + "execution_count": null + }, + { + "cell_type": "markdown", + "id": "a8c8b43a1a04fff3", + "metadata": {}, + "source": [ + "### Demo 2: Store Preferences\n", + "\n", + "Now let's share some preferences and watch the agent store them.\n" + ] + }, + { + "cell_type": "code", + "id": "97d4b563a3a30240", + "metadata": {}, + "source": [ + "# Demo 2: Store preferences\n", + "response2 = await run_agent(\n", + " \"I prefer online courses because I have a part-time job. \"\n", + " \"Also, I'm really interested in AI and want to work at a startup after graduation.\"\n", + ")" + ], + "outputs": [], + "execution_count": null + }, + { + "cell_type": "markdown", + "id": "c2fc05bfee7ece66", + "metadata": {}, + "source": [ + "### Demo 3: Recall Memories\n", + "\n", + "Let's ask the agent to recall what it knows about us.\n" + ] + }, + { + "cell_type": "code", + "id": "437746891b606882", + "metadata": {}, + "source": [ + "# Demo 3: Recall memories\n", + "response3 = await run_agent(\n", + " \"What do you remember about my preferences and goals?\"\n", + ")" + ], + "outputs": [], + "execution_count": null + }, + { + "cell_type": "markdown", + "id": "8d495052317c67bb", + "metadata": {}, + "source": [ + "### Demo 4: Personalized Recommendations\n", + "\n", + "Now let's ask for recommendations and see if the agent uses our stored preferences.\n" + ] + }, + { + "metadata": {}, + "cell_type": "code", + "outputs": [], + "execution_count": null, + "source": [ + "# Demo 4: Personalized recommendations\n", + "response4 = await run_agent(\n", + " \"Can you recommend some courses for next semester based on what you know about me?\"\n", + ")" + ], + "id": "3eb0f6ddeb45a9f9" + }, + { + "metadata": {}, + "cell_type": "markdown", + "source": [ + "### Inspect Stored Memories\n", + "\n", + "Let's look at what's actually stored in long-term memory.\n" + ], + "id": "17dd61ca397db6be" + }, + { + "metadata": {}, + "cell_type": "code", + "outputs": [], + "execution_count": null, + "source": [ + "# Check what's in long-term memory\n", + "try:\n", + " from agent_memory_client.filters import UserId\n", + "\n", + " results = await memory_client.search_long_term_memory(\n", + " text=\"preferences goals interests\",\n", + " user_id=UserId(eq=STUDENT_ID),\n", + " limit=10\n", + " )\n", + "\n", + " print(\"=\" * 80)\n", + " print(\"๐Ÿ’พ LONG-TERM MEMORY CONTENTS\")\n", + " print(\"=\" * 80)\n", + "\n", + " if results.memories and len(results.memories) > 0:\n", + " for i, memory in enumerate(results.memories, 1):\n", + " print(f\"\\n{i}. [{memory.memory_type}] {memory.text}\")\n", + " if memory.topics:\n", + " print(f\" Topics: {', '.join(memory.topics)}\")\n", + " if memory.created_at:\n", + " print(f\" Created: {memory.created_at}\")\n", + " else:\n", + " print(\"\\nNo memories found.\")\n", + "\n", + " print(\"\\n\" + \"=\" * 80)\n", + "except Exception as e:\n", + " print(f\"Error retrieving memories: {e}\")" + ], + "id": "19a91887b957f48c" + }, + { + "metadata": {}, + "cell_type": "markdown", + "source": [ + "---\n", + "\n", + "## ๐Ÿ“Š Part 5: RAG vs Agent Comparison\n", + "\n", + "Let's compare what we've built across the sections:\n", + "\n", + "### **Section 2: Basic RAG**\n", + "```python\n", + "# Simple flow\n", + "query โ†’ search_courses() โ†’ generate_response()\n", + "```\n", + "- โœ… Can retrieve course information\n", + "- โŒ No memory of previous interactions\n", + "- โŒ Can't store user preferences\n", + "- โŒ Single-step only\n", + "\n", + "### **Section 3: Memory-Enhanced RAG**\n", + "```python\n", + "# With memory\n", + "load_memory() โ†’ search_courses() โ†’ generate_response() โ†’ save_memory()\n", + "```\n", + "- โœ… Remembers conversation history\n", + "- โœ… Can reference previous messages\n", + "- โš ๏ธ Limited to predefined flow\n", + "- โŒ Can't decide when to store memories\n", + "\n", + "### **Section 4: Full Agent (This Notebook)**\n", + "```python\n", + "# Agent with tools and decision-making\n", + "load_memory() โ†’ agent_decides() โ†’ [search_courses | search_memories | store_memory]* โ†’ save_memory()\n", + "```\n", + "- โœ… Remembers conversation history\n", + "- โœ… Decides when to search courses\n", + "- โœ… Decides when to store memories\n", + "- โœ… Decides when to recall memories\n", + "- โœ… Can chain multiple operations\n", + "- โœ… Adaptive to user needs\n", + "\n", + "### **Key Differences:**\n", + "\n", + "| Feature | RAG | Memory-RAG | Agent |\n", + "|---------|-----|------------|-------|\n", + "| **Retrieval** | โœ… | โœ… | โœ… |\n", + "| **Conversation Memory** | โŒ | โœ… | โœ… |\n", + "| **Long-term Memory** | โŒ | โš ๏ธ (manual) | โœ… (automatic) |\n", + "| **Decision Making** | โŒ | โŒ | โœ… |\n", + "| **Multi-step Reasoning** | โŒ | โŒ | โœ… |\n", + "| **Tool Selection** | โŒ | โŒ | โœ… |\n", + "| **Complexity** | Low | Medium | High |\n", + "| **Latency** | Low | Medium | Higher |\n", + "| **Cost** | Low | Medium | Higher |\n", + "\n", + "**๐Ÿ’ก Key Insight:** Agents add decision-making and multi-step reasoning to RAG systems.\n" + ], + "id": "fd45b11038775302" + }, + { + "metadata": {}, + "cell_type": "markdown", + "source": [ + "---\n", + "\n", + "## ๐Ÿ—๏ธ Architecture Recap\n", + "\n", + "### **What We Built:**\n", + "\n", + "A complete course advisor agent with:\n", + "\n", + "**1. Tools (3 total)**\n", + "- `search_courses` - Semantic search over course catalog\n", + "- `search_memories` - Recall user preferences and facts\n", + "- `store_memory` - Save important information\n", + "\n", + "**2. Memory Architecture**\n", + "- **Working Memory** - Conversation history (session-scoped)\n", + "- **Long-term Memory** - User preferences and facts (persistent)\n", + "- **Graph State** - Current execution state (turn-scoped)\n", + "\n", + "**3. LangGraph Workflow**\n", + "- **Nodes**: load_memory, agent, tools, save_memory\n", + "- **Edges**: Conditional routing based on LLM decisions\n", + "- **State**: Shared data structure flowing through the graph\n", + "\n", + "**4. Integration Points**\n", + "- **Redis** - Course catalog storage and vector search\n", + "- **Agent Memory Server** - Working and long-term memory\n", + "- **OpenAI** - LLM for reasoning and tool selection\n", + "- **LangGraph** - Workflow orchestration\n", + "\n", + "### **The Complete Context Engineering Stack:**\n", + "\n", + "```\n", + "โ”Œโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”\n", + "โ”‚ AGENT LAYER โ”‚\n", + "โ”‚ (LangGraph orchestration + tool selection) โ”‚\n", + "โ””โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ฌโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”˜\n", + " โ”‚\n", + " โ”Œโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ผโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”\n", + " โ”‚ โ”‚ โ”‚\n", + " โ–ผ โ–ผ โ–ผ\n", + " โ”Œโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ” โ”Œโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ” โ”Œโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”\n", + " โ”‚ Tools โ”‚ โ”‚ Memory โ”‚ โ”‚ RAG โ”‚\n", + " โ””โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”˜ โ””โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”˜ โ””โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”˜\n", + " โ”‚ โ”‚ โ”‚\n", + " โ””โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ผโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”˜\n", + " โ”‚\n", + " โ–ผ\n", + " โ”Œโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”\n", + " โ”‚ Redis Stack โ”‚\n", + " โ”‚ (Storage + โ”‚\n", + " โ”‚ Vector Search)โ”‚\n", + " โ””โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”˜\n", + "```\n" + ], + "id": "d4a533d945ca605e" + }, + { + "metadata": {}, + "cell_type": "markdown", + "source": [ + "---\n", + "\n", + "## ๐ŸŽ“ Key Takeaways\n", + "\n", + "### **1. Agents = RAG + Tools + Decision-Making**\n", + "- RAG retrieves information\n", + "- Tools enable actions\n", + "- Agents decide when to use each\n", + "\n", + "### **2. Memory is Critical for Personalization**\n", + "- Working memory enables conversation continuity\n", + "- Long-term memory enables personalization\n", + "- Agents can decide when to store/recall memories\n", + "\n", + "### **3. LangGraph Simplifies Complex Workflows**\n", + "- State management is automatic\n", + "- Conditional routing is declarative\n", + "- Visualization helps debugging\n", + "\n", + "### **4. Tool Design Matters**\n", + "- Clear descriptions guide LLM selection\n", + "- Well-defined schemas prevent errors\n", + "- Focused tools are better than Swiss Army knives\n", + "\n", + "### **5. Trade-offs to Consider**\n", + "- **Complexity**: Agents are more complex than RAG\n", + "- **Latency**: Multiple tool calls add latency\n", + "- **Cost**: More LLM calls = higher cost\n", + "- **Value**: Worth it for complex, multi-step tasks\n", + "\n", + "### **6. When to Use Agents vs RAG**\n", + "\n", + "**Use RAG when:**\n", + "- Simple question answering\n", + "- Single-step retrieval\n", + "- Low latency required\n", + "- Predictable workflows\n", + "\n", + "**Use Agents when:**\n", + "- Multi-step reasoning needed\n", + "- Actions beyond retrieval\n", + "- Personalization required\n", + "- Complex decision-making\n" + ], + "id": "c4654c5a2c4e5323" + }, + { + "metadata": {}, + "cell_type": "markdown", + "source": [ + "---\n", + "\n", + "## ๐Ÿš€ Next Steps and Extensions\n", + "\n", + "### **Ideas to Extend This Agent:**\n", + "\n", + "1. **Add More Tools**\n", + " - `check_prerequisites` - Verify if student meets course requirements\n", + " - `get_course_details` - Get detailed info about a specific course\n", + " - `create_schedule` - Build a semester schedule\n", + " - `check_conflicts` - Detect time conflicts\n", + "\n", + "2. **Enhance Memory**\n", + " - Automatic memory extraction from conversations\n", + " - Memory summarization for long conversations\n", + " - Memory importance scoring\n", + " - Memory expiration policies\n", + "\n", + "3. **Improve Personalization**\n", + " - Learning style detection\n", + " - Career path recommendations\n", + " - Skill gap analysis\n", + " - Progress tracking\n", + "\n", + "4. **Add Guardrails**\n", + " - Input validation\n", + " - Output filtering\n", + " - Rate limiting\n", + " - Error handling\n", + "\n", + "5. **Production Considerations**\n", + " - Authentication and authorization\n", + " - Logging and monitoring\n", + " - Caching for performance\n", + " - Fallback strategies\n", + "\n", + "### **Reference Implementation:**\n", + "\n", + "Check out `reference-agent/` for a full production implementation with:\n", + "- 7 tools (vs our 3)\n", + "- Advanced memory management\n", + "- Semantic tool selection\n", + "- Comprehensive error handling\n", + "- CLI interface\n", + "- Full test suite\n" + ], + "id": "346d2737598bfd31" + }, + { + "metadata": {}, + "cell_type": "markdown", + "source": [ + "---\n", + "\n", + "## ๐ŸŽ‰ Congratulations!\n", + "\n", + "You've completed the Context Engineering course! You've learned:\n", + "\n", + "**Section 1:** Context Types\n", + "- System, User, Conversation, Retrieved context\n", + "- How context shapes LLM behavior\n", + "\n", + "**Section 2:** RAG Foundations\n", + "- Semantic search with vector embeddings\n", + "- Context assembly and generation\n", + "- Building a course search system\n", + "\n", + "**Section 3:** Memory Architecture\n", + "- Working memory for conversation continuity\n", + "- Long-term memory for persistent knowledge\n", + "- Memory-enhanced RAG systems\n", + "\n", + "**๐Ÿ”ฌ Research Foundation:** Throughout this course, you've learned techniques validated by Context Rot research - prioritizing relevance over quantity, filtering distractors, and structuring context for optimal LLM performance. ([Context Rot paper](https://research.trychroma.com/context-rot))\n", + "\n", + "**Section 4:** Agents and Tools\n", + "- Tool calling fundamentals\n", + "- LangGraph workflow orchestration\n", + "- Building a complete course advisor agent\n", + "- Agents vs RAG trade-offs\n", + "\n", + "### **You Can Now:**\n", + "- โœ… Design effective context strategies\n", + "- โœ… Build RAG systems with Redis\n", + "- โœ… Implement dual-memory architectures\n", + "- โœ… Create agents with tools and decision-making\n", + "- โœ… Choose the right approach for your use case\n", + "\n", + "### **Keep Learning:**\n", + "- Explore the reference-agent implementation\n", + "- Experiment with different tools\n", + "- Try different LLMs and embeddings\n", + "- Build your own agents!\n", + "\n", + "---\n", + "\n", + "## ๐Ÿ“š Additional Resources\n", + "\n", + "\n", + "- [Agent Memory Server Documentation](https://github.com/redis/agent-memory-server) - Production-ready memory management\n", + "- [Agent Memory Client](https://pypi.org/project/agent-memory-client/) - Python client for Agent Memory Server\n", + "- [RedisVL Documentation](https://redisvl.com/) - Redis Vector Library\n", + "- [Retrieval-Augmented Generation Paper](https://arxiv.org/abs/2005.11401) - Original RAG research\n", + "- [LangChain RAG Tutorial](https://python.langchain.com/docs/use_cases/question_answering/) - Building RAG systems\n", + "- [LangGraph Tutorials](https://langchain-ai.github.io/langgraph/tutorials/) - Building agents with LangGraph\n", + "- [Agent Architectures](https://python.langchain.com/docs/modules/agents/) - Different agent patterns\n", + "- [ReAct: Synergizing Reasoning and Acting](https://arxiv.org/abs/2210.03629) - Reasoning + acting in LLMs\n", + "- [Anthropic's Guide to Building Effective Agents](https://www.anthropic.com/research/building-effective-agents) - Agent design patterns\n", + "\n", + "---\n", + "\n", + "**Thank you for completing this course! ๐Ÿ™**\n" + ], + "id": "6a1c7e21740d4240" + }, + { + "metadata": {}, + "cell_type": "code", + "outputs": [], + "execution_count": null, + "source": "", + "id": "439770b03604fe49" + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.12.6" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/python-recipes/context-engineering/notebooks/section-4-tool-selection/02_redis_university_course_advisor_agent_with_compression.ipynb b/python-recipes/context-engineering/notebooks/section-4-tool-selection/02_redis_university_course_advisor_agent_with_compression.ipynb new file mode 100644 index 00000000..c4109b9d --- /dev/null +++ b/python-recipes/context-engineering/notebooks/section-4-tool-selection/02_redis_university_course_advisor_agent_with_compression.ipynb @@ -0,0 +1,2817 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "header", + "metadata": {}, + "source": [ + "![Redis](https://redis.io/wp-content/uploads/2024/04/Logotype.svg?auto=webp&quality=85,75&width=120)\n", + "\n", + "# ๐Ÿค– Section 4: Building a Redis University Course Advisor Agent (with Working Memory Compression)\n", + "\n", + "**โฑ๏ธ Estimated Time:** 90-120 minutes\n", + "\n", + "**๐Ÿ“ Note:** This is an enhanced version of the course advisor agent that includes working memory compression demonstrations. For the standard version without compression, see `02_redis_university_course_advisor_agent.ipynb`.\n", + "\n", + "## ๐ŸŽฏ Learning Objectives\n", + "\n", + "By the end of this notebook, you will:\n", + "\n", + "1. **Build** a complete LangGraph agent with tools and memory\n", + "2. **Implement** exactly 3 tools: memory storage, memory search, and course search\n", + "3. **Integrate** Redis Agent Memory Server for dual-memory architecture\n", + "4. **Visualize** the agent's decision-making graph\n", + "5. **Demonstrate** the progression from RAG (Section 3) to full agent\n", + "\n", + "---\n", + "\n", + "## ๐Ÿ”— Bridge from Previous Sections\n", + "\n", + "### **Your Learning Journey:**\n", + "\n", + "**Section 1:** Context Types\n", + "- System, User, Conversation, Retrieved context\n", + "- How context shapes LLM responses\n", + "\n", + "**Section 2:** RAG Foundations\n", + "- Semantic search with vector embeddings\n", + "- Retrieving and presenting information\n", + "- Single-step retrieval โ†’ generation\n", + "\n", + "**Section 3:** Memory Architecture\n", + "- Working memory (conversation continuity)\n", + "- Long-term memory (persistent knowledge)\n", + "- Memory-enhanced RAG systems\n", + "\n", + "**Section 4 (Notebook 1):** Tool-Calling Basics\n", + "- What tools are and how LLMs use them\n", + "- LangGraph fundamentals (nodes, edges, state)\n", + "- Simple tool-calling examples\n", + "- Agents vs RAG comparison\n", + "\n", + "### **What We're Building Now:**\n", + "\n", + "**A Full Agent** that combines everything:\n", + "- โœ… **Tools** for actions (search courses, manage memory)\n", + "- โœ… **Memory** for personalization (working + long-term)\n", + "- โœ… **RAG** for course information (semantic search)\n", + "- โœ… **LangGraph** for orchestration (state management)\n", + "\n", + "**๐Ÿ’ก Key Insight:** This agent is RAG + Memory + Tools + Decision-Making\n", + "\n", + "---\n", + "\n", + "## ๐Ÿ“Š Agent Architecture\n", + "\n", + "### **The Complete Flow:**\n", + "\n", + "```\n", + "User Query\n", + " โ†“\n", + "[Load Working Memory] โ† Conversation history\n", + " โ†“\n", + "[Agent Node] โ† Decides what to do\n", + " โ†“\n", + " โ”œโ”€โ†’ [search_courses] โ† Find relevant courses\n", + " โ”œโ”€โ†’ [search_memories] โ† Recall user preferences\n", + " โ”œโ”€โ†’ [store_memory] โ† Save important facts\n", + " โ†“\n", + "[Agent Node] โ† Processes tool results\n", + " โ†“\n", + "[Generate Response] โ† Final answer\n", + " โ†“\n", + "[Save Working Memory] โ† Update conversation\n", + "```\n", + "\n", + "### **Our 3 Tools:**\n", + "\n", + "1. **`search_courses`** - Semantic search over course catalog\n", + " - When: Student asks about courses, topics, or recommendations\n", + " - Example: \"What machine learning courses are available?\"\n", + "\n", + "2. **`search_memories`** - Search long-term memory for user facts\n", + " - When: Need to recall preferences, goals, or past interactions\n", + " - Example: \"What courses did I say I was interested in?\"\n", + "\n", + "3. **`store_memory`** - Save important information to long-term memory\n", + " - When: User shares preferences, goals, or important facts\n", + " - Example: \"I'm interested in AI and want to work at a startup\"\n", + "\n", + "### **Memory Architecture:**\n", + "\n", + "| Memory Type | Purpose | Managed By | Lifespan |\n", + "|------------|---------|------------|----------|\n", + "| **Working Memory** | Conversation history | Agent Memory Server | Session |\n", + "| **Long-term Memory** | User preferences, facts | Agent Memory Server | Persistent |\n", + "| **Graph State** | Current execution state | LangGraph | Single turn |\n", + "\n", + "---\n", + "\n", + "## ๐Ÿ“ฆ Setup and Environment\n", + "\n", + "### โš ๏ธ **CRITICAL: Prerequisites Required**\n", + "\n", + "**This notebook requires ALL services to be running. If any service is down, the agent will not work.**\n", + "\n", + "**Required Services:**\n", + "1. **Redis** - Vector storage and caching (port 6379)\n", + "2. **Agent Memory Server** - Memory management (port 8088)\n", + "3. **OpenAI API** - LLM functionality\n", + "\n", + "**๐Ÿš€ Quick Setup (Run this first!):**\n", + "```bash\n", + "# Navigate to notebooks_v2 directory\n", + "cd ../../\n", + "\n", + "# Check if services are running\n", + "./check_setup.sh\n", + "\n", + "# If services are down, run setup\n", + "./setup_memory_server.sh\n", + "```\n", + "\n", + "**๐Ÿ“– Need help?** See `../SETUP_GUIDE.md` for detailed setup instructions.\n", + "\n", + "**๐Ÿ” Manual Check:**\n", + "- Redis: `redis-cli ping` should return `PONG`\n", + "- Memory Server: `curl http://localhost:8088/v1/health` should return `{\"status\":\"ok\"}`\n", + "- Environment: Create `.env` file in `reference-agent/` with your `OPENAI_API_KEY`\n" + ] + }, + { + "cell_type": "markdown", + "id": "install-packages", + "metadata": {}, + "source": [ + "---\n" + ] + }, + { + "cell_type": "markdown", + "id": "install", + "metadata": {}, + "source": [ + "### Automated Setup Check\n", + "\n", + "Let's run the setup script to ensure all services are running properly.\n" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "id": "import-libraries", + "metadata": { + "execution": { + "iopub.execute_input": "2025-10-31T23:57:51.825255Z", + "iopub.status.busy": "2025-10-31T23:57:51.825073Z", + "iopub.status.idle": "2025-10-31T23:57:52.103012Z", + "shell.execute_reply": "2025-10-31T23:57:52.102484Z" + } + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Running automated setup check...\n", + "\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "๐Ÿ”ง Agent Memory Server Setup\n", + "===========================\n", + "๐Ÿ“Š Checking Redis...\n", + "โœ… Redis is running\n", + "๐Ÿ“Š Checking Agent Memory Server...\n", + "๐Ÿ” Agent Memory Server container exists. Checking health...\n", + "โœ… Agent Memory Server is running and healthy\n", + "โœ… No Redis connection issues detected\n", + "\n", + "โœ… Setup Complete!\n", + "=================\n", + "๐Ÿ“Š Services Status:\n", + " โ€ข Redis: Running on port 6379\n", + " โ€ข Agent Memory Server: Running on port 8088\n", + "\n", + "๐ŸŽฏ You can now run the notebooks!\n", + "\n", + "\n", + "โœ… All services are ready!\n" + ] + } + ], + "source": [ + "# Run the setup script to ensure Redis and Agent Memory Server are running\n", + "import subprocess\n", + "import sys\n", + "from pathlib import Path\n", + "\n", + "# Path to setup script\n", + "setup_script = Path(\"../../reference-agent/setup_agent_memory_server.py\")\n", + "\n", + "if setup_script.exists():\n", + " print(\"Running automated setup check...\\n\")\n", + " result = subprocess.run(\n", + " [sys.executable, str(setup_script)],\n", + " capture_output=True,\n", + " text=True\n", + " )\n", + " print(result.stdout)\n", + " if result.returncode != 0:\n", + " print(\"โš ๏ธ Setup check failed. Please review the output above.\")\n", + " print(result.stderr)\n", + " else:\n", + " print(\"\\nโœ… All services are ready!\")\n", + "else:\n", + " print(\"โš ๏ธ Setup script not found. Please ensure services are running manually.\")\n" + ] + }, + { + "cell_type": "markdown", + "id": "imports", + "metadata": {}, + "source": [ + "---\n" + ] + }, + { + "cell_type": "markdown", + "id": "load-env", + "metadata": {}, + "source": [ + "### Install Dependencies\n", + "\n", + "If you haven't already installed the reference-agent package, uncomment and run the following:\n" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "env-setup", + "metadata": { + "execution": { + "iopub.execute_input": "2025-10-31T23:57:52.104763Z", + "iopub.status.busy": "2025-10-31T23:57:52.104657Z", + "iopub.status.idle": "2025-10-31T23:57:52.106517Z", + "shell.execute_reply": "2025-10-31T23:57:52.106037Z" + } + }, + "outputs": [], + "source": [ + "# Uncomment to install reference-agent package\n", + "# %pip install -q -e ../../reference-agent\n", + "\n", + "# Uncomment to install agent-memory-client\n", + "# %pip install -q agent-memory-client\n" + ] + }, + { + "cell_type": "markdown", + "id": "check-services", + "metadata": {}, + "source": [ + "### Import Libraries\n" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "service-check", + "metadata": { + "execution": { + "iopub.execute_input": "2025-10-31T23:57:52.107702Z", + "iopub.status.busy": "2025-10-31T23:57:52.107645Z", + "iopub.status.idle": "2025-10-31T23:57:53.822487Z", + "shell.execute_reply": "2025-10-31T23:57:53.821994Z" + } + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "โœ… Libraries imported successfully!\n" + ] + } + ], + "source": [ + "# Core libraries\n", + "import os\n", + "import sys\n", + "import json\n", + "from typing import List, Dict, Any, Optional, Annotated\n", + "from datetime import datetime\n", + "from dotenv import load_dotenv\n", + "\n", + "# LangChain and LangGraph\n", + "from langchain_core.messages import BaseMessage, HumanMessage, AIMessage, SystemMessage\n", + "from langchain_core.tools import tool\n", + "from langchain_openai import ChatOpenAI, OpenAIEmbeddings\n", + "from langgraph.graph import StateGraph, END\n", + "from langgraph.graph.message import add_messages\n", + "from langgraph.prebuilt import ToolNode\n", + "from pydantic import BaseModel, Field\n", + "\n", + "# Redis and Agent Memory\n", + "from agent_memory_client import MemoryAPIClient, MemoryClientConfig\n", + "from agent_memory_client.models import WorkingMemory, MemoryMessage\n", + "\n", + "# Add reference-agent to path for course utilities\n", + "sys.path.insert(0, os.path.abspath(\"../../reference-agent\"))\n", + "from redis_context_course.course_manager import CourseManager\n", + "from redis_context_course.models import StudentProfile, DifficultyLevel, CourseFormat\n", + "\n", + "print(\"โœ… Libraries imported successfully!\")" + ] + }, + { + "cell_type": "markdown", + "id": "init-components", + "metadata": {}, + "source": [ + "### Load Environment Variables\n" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "init-course-manager", + "metadata": { + "execution": { + "iopub.execute_input": "2025-10-31T23:57:53.823677Z", + "iopub.status.busy": "2025-10-31T23:57:53.823553Z", + "iopub.status.idle": "2025-10-31T23:57:53.826253Z", + "shell.execute_reply": "2025-10-31T23:57:53.825901Z" + } + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "โœ… Environment configured successfully!\n", + " OpenAI API Key: ********************wTMA\n", + " Redis URL: redis://localhost:6379\n", + " Agent Memory URL: http://localhost:8088\n" + ] + } + ], + "source": [ + "# Load environment variables\n", + "load_dotenv(\"../../reference-agent/.env\")\n", + "\n", + "# Get configuration\n", + "OPENAI_API_KEY = os.getenv(\"OPENAI_API_KEY\")\n", + "REDIS_URL = os.getenv(\"REDIS_URL\", \"redis://localhost:6379\")\n", + "AGENT_MEMORY_URL = os.getenv(\"AGENT_MEMORY_URL\", \"http://localhost:8088\")\n", + "\n", + "# Verify OpenAI API key\n", + "if not OPENAI_API_KEY:\n", + " raise ValueError(\"\"\"\n", + " โš ๏ธ OPENAI_API_KEY not found!\n", + "\n", + " Please create a .env file in the reference-agent directory:\n", + " 1. cd ../../reference-agent\n", + " 2. cp .env.example .env\n", + " 3. Edit .env and add your OpenAI API key\n", + " \"\"\")\n", + "\n", + "print(\"โœ… Environment configured successfully!\")\n", + "print(f\" OpenAI API Key: {'*' * 20}{OPENAI_API_KEY[-4:]}\")\n", + "print(f\" Redis URL: {REDIS_URL}\")\n", + "print(f\" Agent Memory URL: {AGENT_MEMORY_URL}\")" + ] + }, + { + "cell_type": "markdown", + "id": "course-manager", + "metadata": {}, + "source": [ + "### Check Required Services\n", + "\n", + "Let's verify that Redis and the Agent Memory Server are running.\n" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "init-llm", + "metadata": { + "execution": { + "iopub.execute_input": "2025-10-31T23:57:53.827385Z", + "iopub.status.busy": "2025-10-31T23:57:53.827318Z", + "iopub.status.idle": "2025-10-31T23:57:53.839615Z", + "shell.execute_reply": "2025-10-31T23:57:53.839213Z" + } + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "โœ… Redis is running\n", + "โœ… Agent Memory Server is running\n", + "\n", + "โœ… All services are ready!\n" + ] + } + ], + "source": [ + "import redis\n", + "import requests\n", + "\n", + "# Check Redis\n", + "try:\n", + " redis_client = redis.from_url(REDIS_URL)\n", + " redis_client.ping()\n", + " print(\"โœ… Redis is running\")\n", + " REDIS_AVAILABLE = True\n", + "except Exception as e:\n", + " print(f\"โŒ Redis is not available: {e}\")\n", + " print(\" Please start Redis using Docker:\")\n", + " print(\" docker run -d -p 6379:6379 redis/redis-stack:latest\")\n", + " REDIS_AVAILABLE = False\n", + "\n", + "# Check Agent Memory Server\n", + "try:\n", + " response = requests.get(f\"{AGENT_MEMORY_URL}/v1/health\", timeout=2)\n", + " if response.status_code == 200:\n", + " print(\"โœ… Agent Memory Server is running\")\n", + " MEMORY_SERVER_AVAILABLE = True\n", + " else:\n", + " print(f\"โš ๏ธ Agent Memory Server returned status {response.status_code}\")\n", + " MEMORY_SERVER_AVAILABLE = False\n", + "except Exception as e:\n", + " print(f\"โŒ Agent Memory Server is not available: {e}\")\n", + " print(\" Please start the Agent Memory Server:\")\n", + " print(\" cd ../../reference-agent && python setup_agent_memory_server.py\")\n", + " MEMORY_SERVER_AVAILABLE = False\n", + "\n", + "if not (REDIS_AVAILABLE and MEMORY_SERVER_AVAILABLE):\n", + " print(\"\\nโš ๏ธ Some services are not available. Please start them before continuing.\")\n", + "else:\n", + " print(\"\\nโœ… All services are ready!\")" + ] + }, + { + "cell_type": "markdown", + "id": "llm-init", + "metadata": {}, + "source": [ + "---\n", + "\n", + "## ๐Ÿ”ง Initialize Components\n", + "\n", + "Now let's initialize the components we'll use to build our agent.\n" + ] + }, + { + "cell_type": "markdown", + "id": "init-memory", + "metadata": {}, + "source": [ + "### Initialize Course Manager\n", + "\n", + "The `CourseManager` handles course storage and semantic search, just like in Section 2.\n" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "id": "memory-init", + "metadata": { + "execution": { + "iopub.execute_input": "2025-10-31T23:57:53.840793Z", + "iopub.status.busy": "2025-10-31T23:57:53.840727Z", + "iopub.status.idle": "2025-10-31T23:57:53.933415Z", + "shell.execute_reply": "2025-10-31T23:57:53.933012Z" + } + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "19:57:53 redisvl.index.index INFO Index already exists, not overwriting.\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "โœ… Course Manager initialized\n", + " Ready to search and retrieve courses\n" + ] + } + ], + "source": [ + "# Initialize Course Manager\n", + "course_manager = CourseManager()\n", + "\n", + "print(\"โœ… Course Manager initialized\")\n", + "print(\" Ready to search and retrieve courses\")" + ] + }, + { + "cell_type": "markdown", + "id": "student-profile", + "metadata": {}, + "source": [ + "### Initialize LLM\n", + "\n", + "We'll use GPT-4o with temperature=0.0 for consistent, deterministic responses.\n" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "id": "create-student", + "metadata": { + "execution": { + "iopub.execute_input": "2025-10-31T23:57:53.934684Z", + "iopub.status.busy": "2025-10-31T23:57:53.934605Z", + "iopub.status.idle": "2025-10-31T23:57:53.943986Z", + "shell.execute_reply": "2025-10-31T23:57:53.943698Z" + } + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "โœ… LLM initialized\n", + " Model: gpt-4o\n", + " Temperature: 0.0 (deterministic)\n" + ] + } + ], + "source": [ + "# Initialize LLM\n", + "llm = ChatOpenAI(model=\"gpt-4o\", temperature=0.0)\n", + "\n", + "print(\"โœ… LLM initialized\")\n", + "print(\" Model: gpt-4o\")\n", + "print(\" Temperature: 0.0 (deterministic)\")" + ] + }, + { + "cell_type": "markdown", + "id": "tools-section", + "metadata": {}, + "source": [ + "### Initialize Memory Client\n", + "\n", + "The memory client handles both working memory (conversation history) and long-term memory (persistent facts).\n" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "id": "tool-1", + "metadata": { + "execution": { + "iopub.execute_input": "2025-10-31T23:57:53.945184Z", + "iopub.status.busy": "2025-10-31T23:57:53.945115Z", + "iopub.status.idle": "2025-10-31T23:57:53.950020Z", + "shell.execute_reply": "2025-10-31T23:57:53.949643Z" + } + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "โœ… Memory Client initialized\n", + " Base URL: http://localhost:8088\n", + " Namespace: redis_university\n", + " Ready for working memory and long-term memory operations\n" + ] + } + ], + "source": [ + "# Initialize Memory Client\n", + "config = MemoryClientConfig(\n", + " base_url=AGENT_MEMORY_URL,\n", + " default_namespace=\"redis_university\"\n", + ")\n", + "memory_client = MemoryAPIClient(config=config)\n", + "\n", + "print(\"โœ… Memory Client initialized\")\n", + "print(f\" Base URL: {config.base_url}\")\n", + "print(f\" Namespace: {config.default_namespace}\")\n", + "print(\" Ready for working memory and long-term memory operations\")" + ] + }, + { + "cell_type": "markdown", + "id": "search-courses-tool", + "metadata": {}, + "source": [ + "### Create Sample Student Profile\n", + "\n", + "We'll create a sample student to use throughout our demos.\n" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "id": "tool-2", + "metadata": { + "execution": { + "iopub.execute_input": "2025-10-31T23:57:53.951077Z", + "iopub.status.busy": "2025-10-31T23:57:53.951016Z", + "iopub.status.idle": "2025-10-31T23:57:53.953293Z", + "shell.execute_reply": "2025-10-31T23:57:53.952950Z" + } + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "โœ… Student profile created\n", + " Name: Sarah Chen\n", + " Student ID: student_sarah_001\n", + " Session ID: session_student_sarah_001_20251031_195753\n", + " Major: Computer Science\n", + " Interests: machine learning, data science, algorithms\n" + ] + } + ], + "source": [ + "# Create sample student profile\n", + "STUDENT_ID = \"student_sarah_001\"\n", + "SESSION_ID = f\"session_{STUDENT_ID}_{datetime.now().strftime('%Y%m%d_%H%M%S')}\"\n", + "\n", + "sarah = StudentProfile(\n", + " name=\"Sarah Chen\",\n", + " email=\"sarah.chen@university.edu\",\n", + " major=\"Computer Science\",\n", + " year=2,\n", + " interests=[\"machine learning\", \"data science\", \"algorithms\"],\n", + " completed_courses=[\"Introduction to Programming\", \"Data Structures\"],\n", + " current_courses=[\"Linear Algebra\"],\n", + " preferred_format=CourseFormat.ONLINE,\n", + " preferred_difficulty=DifficultyLevel.INTERMEDIATE\n", + ")\n", + "\n", + "print(\"โœ… Student profile created\")\n", + "print(f\" Name: {sarah.name}\")\n", + "print(f\" Student ID: {STUDENT_ID}\")\n", + "print(f\" Session ID: {SESSION_ID}\")\n", + "print(f\" Major: {sarah.major}\")\n", + "print(f\" Interests: {', '.join(sarah.interests)}\")" + ] + }, + { + "cell_type": "markdown", + "id": "search-memories-tool", + "metadata": {}, + "source": [ + "---\n", + "\n", + "## ๐Ÿ› ๏ธ Part 1: Define the Agent's Tools\n", + "\n", + "Let's build our 3 tools step by step. Each tool will have:\n", + "- Clear input schema (what parameters it accepts)\n", + "- Descriptive docstring (tells the LLM when to use it)\n", + "- Implementation (the actual logic)\n", + "\n", + "**Remember:** The LLM only sees the tool name, description, and parametersโ€”not the implementation!\n" + ] + }, + { + "cell_type": "markdown", + "id": "tool-3", + "metadata": {}, + "source": [ + "### Tool 1: `search_courses`\n", + "\n", + "This tool searches the course catalog using semantic search.\n" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "id": "store-memory-tool", + "metadata": { + "execution": { + "iopub.execute_input": "2025-10-31T23:57:53.954314Z", + "iopub.status.busy": "2025-10-31T23:57:53.954256Z", + "iopub.status.idle": "2025-10-31T23:57:53.957045Z", + "shell.execute_reply": "2025-10-31T23:57:53.956679Z" + } + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "โœ… Tool 1 defined: search_courses\n", + " Purpose: Search course catalog with semantic search\n", + " Parameters: query (str), limit (int)\n" + ] + } + ], + "source": [ + "# Define input schema\n", + "class SearchCoursesInput(BaseModel):\n", + " \"\"\"Input schema for searching courses.\"\"\"\n", + " query: str = Field(\n", + " description=\"Natural language search query. Can be topics (e.g., 'machine learning'), \"\n", + " \"characteristics (e.g., 'online courses'), or general questions \"\n", + " \"(e.g., 'beginner programming courses')\"\n", + " )\n", + " limit: int = Field(\n", + " default=5,\n", + " description=\"Maximum number of results to return. Default is 5. \"\n", + " \"Use 3 for quick answers, 10 for comprehensive results.\"\n", + " )\n", + "\n", + "# Define the tool\n", + "@tool(\"search_courses\", args_schema=SearchCoursesInput)\n", + "async def search_courses(query: str, limit: int = 5) -> str:\n", + " \"\"\"\n", + " Search for courses using semantic search based on topics, descriptions, or characteristics.\n", + "\n", + " Use this tool when students ask about:\n", + " - Topics or subjects: \"machine learning courses\", \"database courses\"\n", + " - Course characteristics: \"online courses\", \"beginner courses\", \"3-credit courses\"\n", + " - General exploration: \"what courses are available in AI?\"\n", + "\n", + " The search uses semantic matching, so natural language queries work well.\n", + "\n", + " Returns: Formatted list of matching courses with details.\n", + " \"\"\"\n", + " results = await course_manager.search_courses(query, limit=limit)\n", + "\n", + " if not results:\n", + " return \"No courses found matching your query.\"\n", + "\n", + " output = []\n", + " for course in results:\n", + " output.append(\n", + " f\"{course.course_code}: {course.title}\\n\"\n", + " f\" Credits: {course.credits} | {course.format.value} | {course.difficulty_level.value}\\n\"\n", + " f\" {course.description[:150]}...\"\n", + " )\n", + "\n", + " return \"\\n\\n\".join(output)\n", + "\n", + "print(\"โœ… Tool 1 defined: search_courses\")\n", + "print(\" Purpose: Search course catalog with semantic search\")\n", + "print(\" Parameters: query (str), limit (int)\")" + ] + }, + { + "cell_type": "markdown", + "id": "tools-summary", + "metadata": {}, + "source": [ + "### Tool 2: `search_memories`\n", + "\n", + "This tool searches long-term memory for user preferences and facts.\n" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "id": "list-tools", + "metadata": { + "execution": { + "iopub.execute_input": "2025-10-31T23:57:53.958090Z", + "iopub.status.busy": "2025-10-31T23:57:53.958029Z", + "iopub.status.idle": "2025-10-31T23:57:53.960900Z", + "shell.execute_reply": "2025-10-31T23:57:53.960462Z" + } + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "โœ… Tool 2 defined: search_memories\n", + " Purpose: Search long-term memory for user facts\n", + " Parameters: query (str), limit (int)\n" + ] + } + ], + "source": [ + "# Define input schema\n", + "class SearchMemoriesInput(BaseModel):\n", + " \"\"\"Input schema for searching memories.\"\"\"\n", + " query: str = Field(\n", + " description=\"Natural language query to search for in user's long-term memory. \"\n", + " \"Examples: 'career goals', 'course preferences', 'learning style'\"\n", + " )\n", + " limit: int = Field(\n", + " default=5,\n", + " description=\"Maximum number of memories to return. Default is 5.\"\n", + " )\n", + "\n", + "# Define the tool\n", + "@tool(\"search_memories\", args_schema=SearchMemoriesInput)\n", + "async def search_memories(query: str, limit: int = 5) -> str:\n", + " \"\"\"\n", + " Search the user's long-term memory for relevant facts, preferences, and past interactions.\n", + "\n", + " Use this tool when you need to:\n", + " - Recall user preferences: \"What format does the user prefer?\"\n", + " - Remember past goals: \"What career path is the user interested in?\"\n", + " - Find previous interactions: \"What courses did we discuss before?\"\n", + " - Personalize recommendations: \"What are the user's interests?\"\n", + "\n", + " The search uses semantic matching to find relevant memories.\n", + "\n", + " Returns: List of relevant memories with content and metadata.\n", + " \"\"\"\n", + " try:\n", + " from agent_memory_client.filters import UserId\n", + "\n", + " # Search long-term memory\n", + " results = await memory_client.search_long_term_memory(\n", + " text=query,\n", + " user_id=UserId(eq=STUDENT_ID),\n", + " limit=limit\n", + " )\n", + "\n", + " if not results.memories or len(results.memories) == 0:\n", + " return \"No relevant memories found.\"\n", + "\n", + " output = []\n", + " for i, memory in enumerate(results.memories, 1):\n", + " output.append(f\"{i}. {memory.text}\")\n", + " if memory.topics:\n", + " output.append(f\" Topics: {', '.join(memory.topics)}\")\n", + "\n", + " return \"\\n\".join(output)\n", + " except Exception as e:\n", + " return f\"Error searching memories: {str(e)}\"\n", + "\n", + "print(\"โœ… Tool 2 defined: search_memories\")\n", + "print(\" Purpose: Search long-term memory for user facts\")\n", + "print(\" Parameters: query (str), limit (int)\")" + ] + }, + { + "cell_type": "markdown", + "id": "agent-state", + "metadata": {}, + "source": [ + "### Tool 3: `store_memory`\n", + "\n", + "This tool saves important information to long-term memory.\n" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "id": "define-state", + "metadata": { + "execution": { + "iopub.execute_input": "2025-10-31T23:57:53.962062Z", + "iopub.status.busy": "2025-10-31T23:57:53.961995Z", + "iopub.status.idle": "2025-10-31T23:57:53.964832Z", + "shell.execute_reply": "2025-10-31T23:57:53.964534Z" + } + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "โœ… Tool 3 defined: store_memory\n", + " Purpose: Save important facts to long-term memory\n", + " Parameters: text (str), memory_type (str), topics (List[str])\n" + ] + } + ], + "source": [ + "# Define input schema\n", + "class StoreMemoryInput(BaseModel):\n", + " \"\"\"Input schema for storing memories.\"\"\"\n", + " text: str = Field(\n", + " description=\"The information to store. Should be a clear, factual statement. \"\n", + " \"Examples: 'User prefers online courses', 'User's career goal is AI research'\"\n", + " )\n", + " memory_type: str = Field(\n", + " default=\"semantic\",\n", + " description=\"Type of memory: 'semantic' (facts/preferences), 'episodic' (events/interactions). \"\n", + " \"Default is 'semantic'.\"\n", + " )\n", + " topics: List[str] = Field(\n", + " default=[],\n", + " description=\"Optional tags to categorize the memory, such as ['preferences', 'courses']\"\n", + " )\n", + "\n", + "# Define the tool\n", + "@tool(\"store_memory\", args_schema=StoreMemoryInput)\n", + "async def store_memory(text: str, memory_type: str = \"semantic\", topics: List[str] = []) -> str:\n", + " \"\"\"\n", + " Store important information to the user's long-term memory.\n", + "\n", + " Use this tool when the user shares:\n", + " - Preferences: \"I prefer online courses\", \"I like hands-on projects\"\n", + " - Goals: \"I want to work in AI\", \"I'm preparing for grad school\"\n", + " - Important facts: \"I have a part-time job\", \"I'm interested in startups\"\n", + " - Constraints: \"I can only take 2 courses per semester\"\n", + "\n", + " Do NOT store:\n", + " - Temporary information (use conversation context instead)\n", + " - Course details (already in course catalog)\n", + " - General questions\n", + "\n", + " Returns: Confirmation message.\n", + " \"\"\"\n", + " try:\n", + " from agent_memory_client.models import ClientMemoryRecord\n", + "\n", + " # Create memory record\n", + " memory = ClientMemoryRecord(\n", + " text=text,\n", + " user_id=STUDENT_ID,\n", + " memory_type=memory_type,\n", + " topics=topics or []\n", + " )\n", + "\n", + " # Store in long-term memory\n", + " await memory_client.create_long_term_memory([memory])\n", + " return f\"โœ… Stored to long-term memory: {text}\"\n", + " except Exception as e:\n", + " return f\"Error storing memory: {str(e)}\"\n", + "\n", + "print(\"โœ… Tool 3 defined: store_memory\")\n", + "print(\" Purpose: Save important facts to long-term memory\")\n", + "print(\" Parameters: text (str), memory_type (str), topics (List[str])\")" + ] + }, + { + "cell_type": "markdown", + "id": "graph-nodes", + "metadata": {}, + "source": [ + "### Tools Summary\n", + "\n", + "Let's review our 3 tools:\n" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "id": "load-memory-node", + "metadata": { + "execution": { + "iopub.execute_input": "2025-10-31T23:57:53.966158Z", + "iopub.status.busy": "2025-10-31T23:57:53.966078Z", + "iopub.status.idle": "2025-10-31T23:57:53.968399Z", + "shell.execute_reply": "2025-10-31T23:57:53.968046Z" + } + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "================================================================================\n", + "๐Ÿ› ๏ธ AGENT TOOLS SUMMARY\n", + "================================================================================\n", + "\n", + "1. search_courses\n", + " Description: Search for courses using semantic search based on topics, descriptions, or characteristics\n", + " Parameters: query, limit\n", + "\n", + "2. search_memories\n", + " Description: Search the user's long-term memory for relevant facts, preferences, and past interactions\n", + " Parameters: query, limit\n", + "\n", + "3. store_memory\n", + " Description: Store important information to the user's long-term memory\n", + " Parameters: text, memory_type, topics\n", + "\n", + "================================================================================\n" + ] + } + ], + "source": [ + "# Collect all tools\n", + "tools = [search_courses, search_memories, store_memory]\n", + "\n", + "print(\"=\" * 80)\n", + "print(\"๐Ÿ› ๏ธ AGENT TOOLS SUMMARY\")\n", + "print(\"=\" * 80)\n", + "for i, tool in enumerate(tools, 1):\n", + " print(f\"\\n{i}. {tool.name}\")\n", + " print(f\" Description: {tool.description.split('.')[0]}\")\n", + " print(f\" Parameters: {', '.join(tool.args_schema.model_fields.keys())}\")\n", + "print(\"\\n\" + \"=\" * 80)" + ] + }, + { + "cell_type": "markdown", + "id": "agent-node", + "metadata": {}, + "source": "\n" + }, + { + "cell_type": "markdown", + "id": "save-memory-node", + "metadata": { + "execution": { + "iopub.execute_input": "2025-10-31T23:57:53.969443Z", + "iopub.status.busy": "2025-10-31T23:57:53.969382Z", + "iopub.status.idle": "2025-10-31T23:57:53.971457Z", + "shell.execute_reply": "2025-10-31T23:57:53.971109Z" + } + }, + "source": [ + "## ๐Ÿง  Memory Extraction in This Agent\n", + "\n", + "Understanding how this agent creates and manages long-term memories.\n" + ] + }, + { + "cell_type": "markdown", + "id": "routing-logic", + "metadata": {}, + "source": [ + "### How This Agent Uses Memory\n", + "\n", + "Our agent has 3 tools, and 2 of them interact with memory:\n", + "\n", + "1. **`store_memory`** - Saves facts to long-term memory\n", + "2. **`search_memories`** - Retrieves facts from long-term memory\n", + "3. **`search_courses`** - Searches course catalog (not memory-related)\n", + "\n", + "**Question:** When the agent calls `store_memory`, how does the Agent Memory Server decide what to extract and how to structure it?\n", + "\n", + "**Answer:** Memory Extraction Strategies (covered in Section 3, Notebook 1)\n" + ] + }, + { + "cell_type": "markdown", + "id": "should-continue", + "metadata": { + "execution": { + "iopub.execute_input": "2025-10-31T23:57:53.972503Z", + "iopub.status.busy": "2025-10-31T23:57:53.972440Z", + "iopub.status.idle": "2025-10-31T23:57:53.974986Z", + "shell.execute_reply": "2025-10-31T23:57:53.974616Z" + } + }, + "source": [ + "### Current Configuration: Discrete Strategy (Default)\n", + "\n", + "**This agent uses the DISCRETE strategy** (default) because:\n", + "\n", + "โœ… **Individual facts are searchable**\n", + "- \"User's major is Computer Science\"\n", + "- \"User interested in machine learning\"\n", + "- \"User completed RU101\"\n", + "\n", + "โœ… **Facts are independently useful**\n", + "- Agent can search for specific facts\n", + "- Each fact has its own relevance score\n", + "- No need to parse summaries\n", + "\n", + "โœ… **Good for Q&A interactions**\n", + "- Student: \"What courses did I say I was interested in?\"\n", + "- Agent searches discrete facts: \"User interested in ML\", \"User interested in AI\"\n" + ] + }, + { + "cell_type": "markdown", + "id": "build-graph", + "metadata": { + "execution": { + "iopub.execute_input": "2025-10-31T23:57:53.975927Z", + "iopub.status.busy": "2025-10-31T23:57:53.975854Z", + "iopub.status.idle": "2025-10-31T23:57:53.977825Z", + "shell.execute_reply": "2025-10-31T23:57:53.977580Z" + } + }, + "source": [ + "### Example: Discrete Strategy in Action\n", + "\n", + "**Conversation:**\n", + "```\n", + "User: \"I'm a CS major interested in ML. I prefer online courses.\"\n", + "Agent: [Calls store_memory tool]\n", + "```\n", + "\n", + "**What Gets Stored (Discrete Strategy):**\n", + "```json\n", + "[\n", + " {\"text\": \"User's major is Computer Science\", \"type\": \"semantic\"},\n", + " {\"text\": \"User interested in machine learning\", \"type\": \"semantic\"},\n", + " {\"text\": \"User prefers online courses\", \"type\": \"semantic\"}\n", + "]\n", + "```\n", + "\n", + "**Later:**\n", + "```\n", + "User: \"What courses match my interests?\"\n", + "Agent: [Calls search_memories tool]\n", + " โ†’ Finds: \"User interested in machine learning\"\n", + " โ†’ Finds: \"User prefers online courses\"\n", + " [Calls search_courses with these preferences]\n", + "```\n" + ] + }, + { + "cell_type": "markdown", + "id": "construct-graph", + "metadata": { + "execution": { + "iopub.execute_input": "2025-10-31T23:57:53.978903Z", + "iopub.status.busy": "2025-10-31T23:57:53.978835Z", + "iopub.status.idle": "2025-10-31T23:57:53.981202Z", + "shell.execute_reply": "2025-10-31T23:57:53.980864Z" + } + }, + "source": [ + "### When Would Summary Strategy Be Better?\n", + "\n", + "**Summary strategy** would be beneficial for:\n", + "\n", + "**Scenario 1: Long Advising Sessions**\n", + "```\n", + "User has 30-minute conversation discussing:\n", + "- Academic goals\n", + "- Career aspirations\n", + "- Course preferences\n", + "- Schedule constraints\n", + "- Graduation timeline\n", + "```\n", + "\n", + "**Discrete Strategy:** Extracts 20+ individual facts\n", + "**Summary Strategy:** Creates 1-2 comprehensive summaries preserving context\n", + "\n", + "**Scenario 2: Session Notes**\n", + "```\n", + "Agent: \"Let me summarize our conversation today...\"\n", + "[Retrieves summary memory instead of reconstructing from discrete facts]\n", + "```\n" + ] + }, + { + "cell_type": "markdown", + "id": "visualize-graph", + "metadata": {}, + "source": [ + "### Configuration Example (Not Used in This Notebook)\n", + "\n", + "If you wanted to use summary strategy instead:\n", + "\n", + "```python\n", + "from agent_memory_client.models import MemoryStrategyConfig\n", + "\n", + "# Configure summary strategy\n", + "summary_strategy = MemoryStrategyConfig(\n", + " strategy=\"summary\",\n", + " config={\"max_summary_length\": 500}\n", + ")\n", + "\n", + "# Apply when creating working memory\n", + "await memory_client.set_working_memory(\n", + " session_id=session_id,\n", + " messages=messages,\n", + " long_term_memory_strategy=summary_strategy # โ† Use summary instead of discrete\n", + ")\n", + "```\n" + ] + }, + { + "cell_type": "markdown", + "id": "show-graph", + "metadata": { + "execution": { + "iopub.execute_input": "2025-10-31T23:57:53.982174Z", + "iopub.status.busy": "2025-10-31T23:57:53.982118Z", + "iopub.status.idle": "2025-10-31T23:57:53.983908Z", + "shell.execute_reply": "2025-10-31T23:57:53.983535Z" + } + }, + "source": [ + "### Why We Stick with Discrete (Default)\n", + "\n", + "For this course advisor agent:\n", + "- โœ… Questions are specific (\"What are prerequisites for RU301?\")\n", + "- โœ… Facts are independently useful\n", + "- โœ… Search works better with discrete facts\n", + "- โœ… No configuration needed (default behavior)\n", + "\n", + "**In production**, you might:\n", + "- Use **discrete** for most interactions (default)\n", + "- Use **summary** for end-of-session notes\n", + "- Use **preferences** during student onboarding\n", + "- Use **custom** for specialized academic domains\n" + ] + }, + { + "cell_type": "markdown", + "id": "demo-section", + "metadata": {}, + "source": [ + "### ๐Ÿ”— Connection to Section 3\n", + "\n", + "In **Section 3, Notebook 1**, we introduced memory extraction strategies conceptually.\n", + "\n", + "In **Section 3, Notebook 2**, we demonstrated the difference between discrete and summary strategies with hands-on examples.\n", + "\n", + "**Now in Section 4**, we see how a production agent uses the discrete strategy (default) for course advising.\n", + "\n", + "**Key Takeaway:** The Agent Memory Server's memory extraction strategies give you flexibility in HOW memories are created, but for most agent interactions (like this course advisor), the default discrete strategy works best.\n" + ] + }, + { + "cell_type": "markdown", + "id": "run-agent-helper", + "metadata": { + "execution": { + "iopub.execute_input": "2025-10-31T23:57:53.984807Z", + "iopub.status.busy": "2025-10-31T23:57:53.984751Z", + "iopub.status.idle": "2025-10-31T23:57:53.990038Z", + "shell.execute_reply": "2025-10-31T23:57:53.989670Z" + } + }, + "source": [ + "### ๐Ÿ“š Learn More\n", + "\n", + "- [Memory Extraction Strategies Documentation](https://redis.github.io/agent-memory-server/memory-extraction-strategies/)\n", + "- [Section 3, Notebook 1](../section-3-memory-systems-for-context-engineering/01_working_and_longterm_memory.ipynb) - Theory foundation\n", + "- [Section 3, Notebook 2](../section-3-memory-systems-for-context-engineering/02_combining_memory_with_retrieved_context.ipynb) - Hands-on comparison demo\n", + "\n", + "---\n", + "\n", + "## ๐ŸŽจ Part 2: Define the Agent State\n", + "\n", + "In LangGraph, **state** is the shared data structure that flows through the graph. Each node can read from and write to the state.\n", + "\n", + "### What Goes in State?\n", + "\n", + "- **messages**: Conversation history (automatically managed by LangGraph)\n", + "- **student_id**: Who we're helping\n", + "- **session_id**: Current conversation session\n", + "- **context**: Additional context (memories, preferences, etc.)\n", + "\n", + "**Note:** We use `Annotated[List[BaseMessage], add_messages]` for messages. The `add_messages` reducer automatically handles message deduplication and ordering.\n" + ] + }, + { + "cell_type": "code", + "id": "demo-1", + "metadata": {}, + "source": [ + "# Define the agent state\n", + "class AgentState(BaseModel):\n", + " \"\"\"State for the course advisor agent.\"\"\"\n", + " messages: Annotated[List[BaseMessage], add_messages]\n", + " student_id: str\n", + " session_id: str\n", + " context: Dict[str, Any] = {}\n", + "\n", + "print(\"โœ… Agent state defined\")\n", + "print(\" Fields: messages, student_id, session_id, context\")" + ], + "outputs": [], + "execution_count": null + }, + { + "cell_type": "markdown", + "id": "demo-search", + "metadata": { + "execution": { + "iopub.execute_input": "2025-10-31T23:57:53.991081Z", + "iopub.status.busy": "2025-10-31T23:57:53.991018Z", + "iopub.status.idle": "2025-10-31T23:57:54.095976Z", + "shell.execute_reply": "2025-10-31T23:57:54.095530Z" + } + }, + "source": [ + "---\n", + "\n", + "## ๐Ÿ”— Part 3: Build the Agent Graph\n", + "\n", + "Now we'll build the LangGraph workflow. Our graph will have:\n", + "\n", + "1. **load_memory** - Load working memory (conversation history)\n", + "2. **agent** - LLM decides what to do (call tools or respond)\n", + "3. **tools** - Execute tool calls\n", + "4. **save_memory** - Save updated conversation to working memory\n", + "\n", + "### Step 1: Define Node Functions\n", + "\n", + "Each node is a function that takes state and returns updated state.\n" + ] + }, + { + "cell_type": "code", + "id": "demo-2", + "metadata": {}, + "source": [ + "# Node 1: Load working memory\n", + "async def load_memory(state: AgentState) -> AgentState:\n", + " \"\"\"\n", + " Load conversation history from working memory.\n", + "\n", + " This gives the agent context about previous interactions in this session.\n", + " \"\"\"\n", + " try:\n", + " # Get or create working memory for this session\n", + " _, working_memory = await memory_client.get_or_create_working_memory(\n", + " session_id=state.session_id,\n", + " user_id=state.student_id,\n", + " model_name=\"gpt-4o\"\n", + " )\n", + "\n", + " if working_memory and working_memory.messages:\n", + " # Convert stored messages to LangChain message objects\n", + " loaded_messages = []\n", + " for msg in working_memory.messages:\n", + " if msg.role == 'user':\n", + " loaded_messages.append(HumanMessage(content=msg.content))\n", + " elif msg.role == 'assistant':\n", + " loaded_messages.append(AIMessage(content=msg.content))\n", + "\n", + " # Add loaded messages to state (prepend to current messages)\n", + " state.messages = loaded_messages + state.messages\n", + " state.context['memory_loaded'] = True\n", + " print(f\" Loaded {len(loaded_messages)} messages from working memory\")\n", + " else:\n", + " state.context['memory_loaded'] = False\n", + " print(\" No previous conversation found (new session)\")\n", + " except Exception as e:\n", + " print(f\" Warning: Could not load memory: {e}\")\n", + " state.context['memory_loaded'] = False\n", + "\n", + " return state\n", + "\n", + "print(\"โœ… Node 1 defined: load_memory\")\n", + "print(\" Purpose: Load conversation history from working memory\")" + ], + "outputs": [], + "execution_count": null + }, + { + "cell_type": "code", + "execution_count": 21, + "id": "demo-store", + "metadata": { + "execution": { + "iopub.execute_input": "2025-10-31T23:57:54.097563Z", + "iopub.status.busy": "2025-10-31T23:57:54.097461Z", + "iopub.status.idle": "2025-10-31T23:57:54.100763Z", + "shell.execute_reply": "2025-10-31T23:57:54.100208Z" + } + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "โœ… Helper function defined: run_agent\n" + ] + } + ], + "source": [ + "# Node 2: Agent (LLM with tools)\n", + "async def agent_node(state: AgentState) -> AgentState:\n", + " \"\"\"\n", + " The agent decides what to do: call tools or respond to the user.\n", + "\n", + " This is where the LLM reasoning happens.\n", + " \"\"\"\n", + " # Create system message with instructions\n", + " system_message = SystemMessage(content=\"\"\"\n", + "You are a helpful Redis University course advisor assistant.\n", + "\n", + "Your role:\n", + "- Help students find courses that match their interests and goals\n", + "- Remember student preferences and use them for personalized recommendations\n", + "- Store important information about students for future conversations\n", + "\n", + "Guidelines:\n", + "- Use search_courses to find relevant courses\n", + "- Use search_memories to recall student preferences and past interactions\n", + "- Use store_memory when students share important preferences, goals, or constraints\n", + "- Be conversational and helpful\n", + "- Provide specific course recommendations with details\n", + "\"\"\")\n", + "\n", + " # Bind tools to LLM\n", + " llm_with_tools = llm.bind_tools(tools)\n", + "\n", + " # Call LLM with system message + conversation history\n", + " messages = [system_message] + state.messages\n", + " response = await llm_with_tools.ainvoke(messages)\n", + "\n", + " # Add response to state\n", + " state.messages.append(response)\n", + "\n", + " return state\n", + "\n", + "print(\"โœ… Node 2 defined: agent_node\")\n", + "print(\" Purpose: LLM decides whether to call tools or respond\")" + ] + }, + { + "cell_type": "code", + "id": "demo-3", + "metadata": {}, + "source": [ + "# Node 3: Save working memory\n", + "async def save_memory(state: AgentState) -> AgentState:\n", + " \"\"\"\n", + " Save the updated conversation to working memory.\n", + "\n", + " This ensures continuity across conversation turns.\n", + " \"\"\"\n", + " try:\n", + " # Get or create working memory\n", + " _, working_memory = await memory_client.get_or_create_working_memory(\n", + " session_id=state.session_id,\n", + " user_id=state.student_id,\n", + " model_name=\"gpt-4o\"\n", + " )\n", + "\n", + " # Clear existing messages and add current conversation\n", + " working_memory.messages = []\n", + " for msg in state.messages:\n", + " if isinstance(msg, HumanMessage):\n", + " working_memory.messages.append(MemoryMessage(role='user', content=msg.content))\n", + " elif isinstance(msg, AIMessage):\n", + " # Only store text content, not tool calls\n", + " if msg.content:\n", + " working_memory.messages.append(MemoryMessage(role='assistant', content=msg.content))\n", + "\n", + " # Save to working memory\n", + " await memory_client.put_working_memory(\n", + " session_id=state.session_id,\n", + " memory=working_memory,\n", + " user_id=state.student_id,\n", + " model_name=\"gpt-4o\"\n", + " )\n", + "\n", + " print(f\" Saved {len(working_memory.messages)} messages to working memory\")\n", + " except Exception as e:\n", + " print(f\" Warning: Could not save memory: {e}\")\n", + "\n", + " return state\n", + "\n", + "print(\"โœ… Node 3 defined: save_memory\")\n", + "print(\" Purpose: Save conversation to working memory\")" + ], + "outputs": [], + "execution_count": null + }, + { + "cell_type": "markdown", + "id": "demo-recall", + "metadata": { + "execution": { + "iopub.execute_input": "2025-10-31T23:57:54.102049Z", + "iopub.status.busy": "2025-10-31T23:57:54.101962Z", + "iopub.status.idle": "2025-10-31T23:57:58.356458Z", + "shell.execute_reply": "2025-10-31T23:57:58.355667Z" + } + }, + "source": [ + "### Step 2: Define Routing Logic\n", + "\n", + "We need a function to decide: should we call tools or end the conversation?\n" + ] + }, + { + "cell_type": "code", + "id": "demo-4", + "metadata": {}, + "source": [ + "# Routing function\n", + "def should_continue(state: AgentState) -> str:\n", + " \"\"\"\n", + " Determine if we should continue to tools or end.\n", + "\n", + " If the last message has tool calls, route to tools.\n", + " Otherwise, we're done.\n", + " \"\"\"\n", + " last_message = state.messages[-1]\n", + "\n", + " # Check if there are tool calls\n", + " if hasattr(last_message, 'tool_calls') and last_message.tool_calls:\n", + " return \"tools\"\n", + " else:\n", + " return \"save_memory\"\n", + "\n", + "print(\"โœ… Routing logic defined: should_continue\")\n", + "print(\" Routes to 'tools' if LLM wants to call tools, otherwise to 'save_memory'\")" + ], + "outputs": [], + "execution_count": null + }, + { + "cell_type": "markdown", + "id": "demo-personalized", + "metadata": { + "execution": { + "iopub.execute_input": "2025-10-31T23:57:58.358447Z", + "iopub.status.busy": "2025-10-31T23:57:58.358312Z", + "iopub.status.idle": "2025-10-31T23:58:04.410189Z", + "shell.execute_reply": "2025-10-31T23:58:04.409512Z" + } + }, + "source": [ + "### Step 3: Build the Graph\n", + "\n", + "Now we assemble all the pieces into a LangGraph workflow.\n" + ] + }, + { + "cell_type": "code", + "id": "inspect-memory", + "metadata": {}, + "source": [ + "# Create the graph\n", + "workflow = StateGraph(AgentState)\n", + "\n", + "# Add nodes\n", + "workflow.add_node(\"load_memory\", load_memory)\n", + "workflow.add_node(\"agent\", agent_node)\n", + "workflow.add_node(\"tools\", ToolNode(tools))\n", + "workflow.add_node(\"save_memory\", save_memory)\n", + "\n", + "# Define edges\n", + "workflow.set_entry_point(\"load_memory\")\n", + "workflow.add_edge(\"load_memory\", \"agent\")\n", + "workflow.add_conditional_edges(\n", + " \"agent\",\n", + " should_continue,\n", + " {\n", + " \"tools\": \"tools\",\n", + " \"save_memory\": \"save_memory\"\n", + " }\n", + ")\n", + "workflow.add_edge(\"tools\", \"agent\") # After tools, go back to agent\n", + "workflow.add_edge(\"save_memory\", END)\n", + "\n", + "# Compile the graph\n", + "agent_graph = workflow.compile()\n", + "\n", + "print(\"โœ… Agent graph built and compiled!\")\n", + "print(\"\\n๐Ÿ“Š Graph structure:\")\n", + "print(\" START โ†’ load_memory โ†’ agent โ†’ [tools โ†’ agent]* โ†’ save_memory โ†’ END\")\n", + "print(\"\\n * The agent can call tools multiple times before responding\")" + ], + "outputs": [], + "execution_count": null + }, + { + "cell_type": "markdown", + "id": "check-memories", + "metadata": { + "execution": { + "iopub.execute_input": "2025-10-31T23:58:04.411898Z", + "iopub.status.busy": "2025-10-31T23:58:04.411768Z", + "iopub.status.idle": "2025-10-31T23:58:06.565467Z", + "shell.execute_reply": "2025-10-31T23:58:06.564738Z" + } + }, + "source": [ + "### Step 4: Visualize the Graph\n", + "\n", + "Let's see what our agent workflow looks like!\n" + ] + }, + { + "cell_type": "code", + "id": "comparison", + "metadata": {}, + "source": [ + "# Try to visualize the graph\n", + "try:\n", + " from IPython.display import Image, display\n", + "\n", + " # Generate graph visualization\n", + " graph_image = agent_graph.get_graph().draw_mermaid_png()\n", + " display(Image(graph_image))\n", + " print(\"\\nโœ… Graph visualization displayed above\")\n", + "except Exception as e:\n", + " print(f\"โš ๏ธ Could not display graph visualization: {e}\")\n", + " print(\"\\nGraph structure (text):\")\n", + " print(\"\"\"\n", + " โ”Œโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”\n", + " โ”‚ START โ”‚\n", + " โ””โ”€โ”€โ”€โ”€โ”€โ”€โ”ฌโ”€โ”€โ”€โ”€โ”€โ”€โ”˜\n", + " โ”‚\n", + " โ–ผ\n", + " โ”Œโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”\n", + " โ”‚ load_memory โ”‚\n", + " โ””โ”€โ”€โ”€โ”€โ”€โ”€โ”ฌโ”€โ”€โ”€โ”€โ”€โ”€โ”˜\n", + " โ”‚\n", + " โ–ผ\n", + " โ”Œโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”\n", + " โ”‚ agent โ”‚ โ—„โ”€โ”€โ”€โ”€โ”€โ”\n", + " โ””โ”€โ”€โ”€โ”€โ”€โ”€โ”ฌโ”€โ”€โ”€โ”€โ”€โ”€โ”˜ โ”‚\n", + " โ”‚ โ”‚\n", + " โ”Œโ”€โ”€โ”€โ”€โ”ดโ”€โ”€โ”€โ”€โ” โ”‚\n", + " โ”‚ โ”‚ โ”‚\n", + " โ–ผ โ–ผ โ”‚\n", + " [tools] [respond] โ”‚\n", + " โ”‚ โ”‚\n", + " โ””โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”˜\n", + " โ”‚\n", + " โ–ผ\n", + " โ”Œโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”\n", + " โ”‚ save_memory โ”‚\n", + " โ””โ”€โ”€โ”€โ”€โ”€โ”€โ”ฌโ”€โ”€โ”€โ”€โ”€โ”€โ”˜\n", + " โ”‚\n", + " โ–ผ\n", + " โ”Œโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”\n", + " โ”‚ END โ”‚\n", + " โ””โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”˜\n", + " \"\"\")" + ], + "outputs": [], + "execution_count": null + }, + { + "cell_type": "markdown", + "id": "architecture-recap", + "metadata": { + "execution": { + "iopub.execute_input": "2025-10-31T23:58:06.567416Z", + "iopub.status.busy": "2025-10-31T23:58:06.567279Z", + "iopub.status.idle": "2025-10-31T23:58:11.047325Z", + "shell.execute_reply": "2025-10-31T23:58:11.046775Z" + } + }, + "source": [ + "---\n", + "\n", + "## ๐ŸŽฌ Part 4: Demo the Agent\n", + "\n", + "Now let's see our agent in action! We'll have a conversation with the agent and watch it:\n", + "- Search for courses\n", + "- Store memories about preferences\n", + "- Recall information from previous interactions\n", + "\n", + "### Helper Function: Run Agent\n" + ] + }, + { + "cell_type": "code", + "id": "key-takeaways", + "metadata": {}, + "source": [ + "async def run_agent(user_message: str, verbose: bool = True) -> str:\n", + " \"\"\"\n", + " Run the agent with a user message.\n", + "\n", + " Args:\n", + " user_message: The user's input\n", + " verbose: Whether to print detailed execution info\n", + "\n", + " Returns:\n", + " The agent's response\n", + " \"\"\"\n", + " if verbose:\n", + " print(\"=\" * 80)\n", + " print(f\"๐Ÿ‘ค USER: {user_message}\")\n", + " print(\"=\" * 80)\n", + "\n", + " # Create initial state\n", + " initial_state = AgentState(\n", + " messages=[HumanMessage(content=user_message)],\n", + " student_id=STUDENT_ID,\n", + " session_id=SESSION_ID,\n", + " context={}\n", + " )\n", + "\n", + " # Run the graph\n", + " if verbose:\n", + " print(\"\\n๐Ÿค– AGENT EXECUTION:\")\n", + "\n", + " final_state = await agent_graph.ainvoke(initial_state)\n", + "\n", + " # Extract the final response\n", + " final_message = final_state[\"messages\"][-1]\n", + " response = final_message.content if hasattr(final_message, 'content') else str(final_message)\n", + "\n", + " if verbose:\n", + " print(\"\\n\" + \"=\" * 80)\n", + " print(f\"๐Ÿค– ASSISTANT: {response}\")\n", + " print(\"=\" * 80)\n", + "\n", + " return response\n", + "\n", + "print(\"โœ… Helper function defined: run_agent\")" + ], + "outputs": [], + "execution_count": null + }, + { + "cell_type": "markdown", + "id": "next-steps", + "metadata": { + "execution": { + "iopub.execute_input": "2025-10-31T23:58:11.049386Z", + "iopub.status.busy": "2025-10-31T23:58:11.049237Z", + "iopub.status.idle": "2025-10-31T23:58:11.464715Z", + "shell.execute_reply": "2025-10-31T23:58:11.464089Z" + } + }, + "source": [ + "### Demo 1: Search Courses\n", + "\n", + "Let's ask the agent to find machine learning courses.\n" + ] + }, + { + "cell_type": "code", + "id": "conclusion", + "metadata": {}, + "source": [ + "# Demo 1: Search for courses\n", + "response1 = await run_agent(\n", + " \"What machine learning courses are available? I'm interested in intermediate level courses.\"\n", + ")" + ], + "outputs": [], + "execution_count": null + }, + { + "cell_type": "markdown", + "id": "a8c8b43a1a04fff3", + "metadata": {}, + "source": [ + "### Demo 2: Store Preferences\n", + "\n", + "Now let's share some preferences and watch the agent store them.\n" + ] + }, + { + "cell_type": "code", + "id": "97d4b563a3a30240", + "metadata": {}, + "source": [ + "# Demo 2: Store preferences\n", + "response2 = await run_agent(\n", + " \"I prefer online courses because I have a part-time job. \"\n", + " \"Also, I'm really interested in AI and want to work at a startup after graduation.\"\n", + ")" + ], + "outputs": [], + "execution_count": null + }, + { + "cell_type": "markdown", + "id": "c2fc05bfee7ece66", + "metadata": {}, + "source": [ + "### Demo 3: Recall Memories\n", + "\n", + "Let's ask the agent to recall what it knows about us.\n" + ] + }, + { + "cell_type": "code", + "id": "437746891b606882", + "metadata": {}, + "source": [ + "# Demo 3: Recall memories\n", + "response3 = await run_agent(\n", + " \"What do you remember about my preferences and goals?\"\n", + ")" + ], + "outputs": [], + "execution_count": null + }, + { + "cell_type": "markdown", + "id": "8d495052317c67bb", + "metadata": {}, + "source": [ + "### Demo 4: Personalized Recommendations\n", + "\n", + "Now let's ask for recommendations and see if the agent uses our stored preferences.\n" + ] + }, + { + "metadata": {}, + "cell_type": "code", + "outputs": [], + "execution_count": null, + "source": [ + "# Demo 4: Personalized recommendations\n", + "response4 = await run_agent(\n", + " \"Can you recommend some courses for next semester based on what you know about me?\"\n", + ")" + ], + "id": "3eb0f6ddeb45a9f9" + }, + { + "metadata": {}, + "cell_type": "markdown", + "source": [ + "### Inspect Stored Memories\n", + "\n", + "Let's look at what's actually stored in long-term memory.\n" + ], + "id": "17dd61ca397db6be" + }, + { + "metadata": {}, + "cell_type": "code", + "outputs": [], + "execution_count": null, + "source": [ + "# Check what's in long-term memory\n", + "try:\n", + " from agent_memory_client.filters import UserId\n", + "\n", + " results = await memory_client.search_long_term_memory(\n", + " text=\"preferences goals interests\",\n", + " user_id=UserId(eq=STUDENT_ID),\n", + " limit=10\n", + " )\n", + "\n", + " print(\"=\" * 80)\n", + " print(\"๐Ÿ’พ LONG-TERM MEMORY CONTENTS\")\n", + " print(\"=\" * 80)\n", + "\n", + " if results.memories and len(results.memories) > 0:\n", + " for i, memory in enumerate(results.memories, 1):\n", + " print(f\"\\n{i}. [{memory.memory_type}] {memory.text}\")\n", + " if memory.topics:\n", + " print(f\" Topics: {', '.join(memory.topics)}\")\n", + " if memory.created_at:\n", + " print(f\" Created: {memory.created_at}\")\n", + " else:\n", + " print(\"\\nNo memories found.\")\n", + "\n", + " print(\"\\n\" + \"=\" * 80)\n", + "except Exception as e:\n", + " print(f\"Error retrieving memories: {e}\")" + ], + "id": "19a91887b957f48c" + }, + { + "metadata": {}, + "cell_type": "markdown", + "source": [ + "---\n", + "\n", + "## ๐Ÿ“Š Part 5: RAG vs Agent Comparison\n", + "\n", + "Let's compare what we've built across the sections:\n", + "\n", + "### **Section 2: Basic RAG**\n", + "```python\n", + "# Simple flow\n", + "query โ†’ search_courses() โ†’ generate_response()\n", + "```\n", + "- โœ… Can retrieve course information\n", + "- โŒ No memory of previous interactions\n", + "- โŒ Can't store user preferences\n", + "- โŒ Single-step only\n", + "\n", + "### **Section 3: Memory-Enhanced RAG**\n", + "```python\n", + "# With memory\n", + "load_memory() โ†’ search_courses() โ†’ generate_response() โ†’ save_memory()\n", + "```\n", + "- โœ… Remembers conversation history\n", + "- โœ… Can reference previous messages\n", + "- โš ๏ธ Limited to predefined flow\n", + "- โŒ Can't decide when to store memories\n", + "\n", + "### **Section 4: Full Agent (This Notebook)**\n", + "```python\n", + "# Agent with tools and decision-making\n", + "load_memory() โ†’ agent_decides() โ†’ [search_courses | search_memories | store_memory]* โ†’ save_memory()\n", + "```\n", + "- โœ… Remembers conversation history\n", + "- โœ… Decides when to search courses\n", + "- โœ… Decides when to store memories\n", + "- โœ… Decides when to recall memories\n", + "- โœ… Can chain multiple operations\n", + "- โœ… Adaptive to user needs\n", + "\n", + "### **Key Differences:**\n", + "\n", + "| Feature | RAG | Memory-RAG | Agent |\n", + "|---------|-----|------------|-------|\n", + "| **Retrieval** | โœ… | โœ… | โœ… |\n", + "| **Conversation Memory** | โŒ | โœ… | โœ… |\n", + "| **Long-term Memory** | โŒ | โš ๏ธ (manual) | โœ… (automatic) |\n", + "| **Decision Making** | โŒ | โŒ | โœ… |\n", + "| **Multi-step Reasoning** | โŒ | โŒ | โœ… |\n", + "| **Tool Selection** | โŒ | โŒ | โœ… |\n", + "| **Complexity** | Low | Medium | High |\n", + "| **Latency** | Low | Medium | Higher |\n", + "| **Cost** | Low | Medium | Higher |\n", + "\n", + "**๐Ÿ’ก Key Insight:** Agents add decision-making and multi-step reasoning to RAG systems.\n" + ], + "id": "fd45b11038775302" + }, + { + "metadata": {}, + "cell_type": "markdown", + "source": [ + "---\n", + "\n", + "## ๐Ÿ—๏ธ Architecture Recap\n", + "\n", + "### **What We Built:**\n", + "\n", + "A complete course advisor agent with:\n", + "\n", + "**1. Tools (3 total)**\n", + "- `search_courses` - Semantic search over course catalog\n", + "- `search_memories` - Recall user preferences and facts\n", + "- `store_memory` - Save important information\n", + "\n", + "**2. Memory Architecture**\n", + "- **Working Memory** - Conversation history (session-scoped)\n", + "- **Long-term Memory** - User preferences and facts (persistent)\n", + "- **Graph State** - Current execution state (turn-scoped)\n", + "\n", + "**3. LangGraph Workflow**\n", + "- **Nodes**: load_memory, agent, tools, save_memory\n", + "- **Edges**: Conditional routing based on LLM decisions\n", + "- **State**: Shared data structure flowing through the graph\n", + "\n", + "**4. Integration Points**\n", + "- **Redis** - Course catalog storage and vector search\n", + "- **Agent Memory Server** - Working and long-term memory\n", + "- **OpenAI** - LLM for reasoning and tool selection\n", + "- **LangGraph** - Workflow orchestration\n", + "\n", + "### **The Complete Context Engineering Stack:**\n", + "\n", + "```\n", + "โ”Œโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”\n", + "โ”‚ AGENT LAYER โ”‚\n", + "โ”‚ (LangGraph orchestration + tool selection) โ”‚\n", + "โ””โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ฌโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”˜\n", + " โ”‚\n", + " โ”Œโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ผโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”\n", + " โ”‚ โ”‚ โ”‚\n", + " โ–ผ โ–ผ โ–ผ\n", + " โ”Œโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ” โ”Œโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ” โ”Œโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”\n", + " โ”‚ Tools โ”‚ โ”‚ Memory โ”‚ โ”‚ RAG โ”‚\n", + " โ””โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”˜ โ””โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”˜ โ””โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”˜\n", + " โ”‚ โ”‚ โ”‚\n", + " โ””โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ผโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”˜\n", + " โ”‚\n", + " โ–ผ\n", + " โ”Œโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”\n", + " โ”‚ Redis Stack โ”‚\n", + " โ”‚ (Storage + โ”‚\n", + " โ”‚ Vector Search)โ”‚\n", + " โ””โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”˜\n", + "```\n", + "\n", + "\n" + ], + "id": "d4a533d945ca605e" + }, + { + "metadata": {}, + "cell_type": "markdown", + "source": [ + "---\n", + "\n", + "## ๐Ÿ”ง Part 6: Working Memory Compression for Long Conversations\n", + "\n", + "Now that we have a working agent, let's address a production challenge: **What happens when conversations get very long?**\n" + ], + "id": "c4654c5a2c4e5323" + }, + { + "metadata": {}, + "cell_type": "markdown", + "source": [ + "### ๐Ÿ”— Connection to Section 3, Notebook 3\n", + "\n", + "In **Section 3, Notebook 3**, we learned about working memory compression strategies:\n", + "- **Truncation** - Keep only recent N messages (fast, simple)\n", + "- **Priority-Based** - Score messages by importance (balanced)\n", + "- **Summarization** - LLM creates intelligent summaries (high quality)\n", + "\n", + "**In this section**, we'll demonstrate these strategies in our production agent to show how they handle long conversations.\n" + ], + "id": "346d2737598bfd31" + }, + { + "metadata": {}, + "cell_type": "markdown", + "source": [ + "### The Problem: Unbounded Conversation Growth\n", + "\n", + "Every conversation turn adds messages to working memory:\n", + "\n", + "```\n", + "Turn 1: System (500) + Messages (200) = 700 tokens โœ…\n", + "Turn 10: System (500) + Messages (2,000) = 2,500 tokens โœ…\n", + "Turn 30: System (500) + Messages (6,000) = 6,500 tokens โš ๏ธ\n", + "Turn 50: System (500) + Messages (10,000) = 10,500 tokens โš ๏ธ\n", + "Turn 100: System (500) + Messages (20,000) = 20,500 tokens โŒ\n", + "```\n", + "\n", + "**Without compression:**\n", + "- ๐Ÿ’ฐ Costs grow quadratically (each turn includes all previous messages)\n", + "- โฑ๏ธ Latency increases with context size\n", + "- ๐Ÿšซ Eventually hit token limits (128K for GPT-4o)\n", + "- ๐Ÿ“‰ Context rot: LLMs struggle with very long contexts\n", + "\n", + "**Solution:** Compress working memory while preserving important information.\n" + ], + "id": "6a1c7e21740d4240" + }, + { + "metadata": {}, + "cell_type": "markdown", + "source": [ + "### Implementation: Three Compression Strategies\n", + "\n", + "Let's implement the strategies from Section 3, Notebook 3.\n" + ], + "id": "439770b03604fe49" + }, + { + "metadata": {}, + "cell_type": "code", + "outputs": [], + "execution_count": null, + "source": [ + "import tiktoken\n", + "from typing import List, Dict, Tuple\n", + "from dataclasses import dataclass\n", + "from enum import Enum\n", + "\n", + "# Token counting utility\n", + "def count_tokens(text: str, model: str = \"gpt-4o\") -> int:\n", + " \"\"\"Count tokens in text using tiktoken.\"\"\"\n", + " try:\n", + " encoding = tiktoken.encoding_for_model(model)\n", + " return len(encoding.encode(text))\n", + " except Exception:\n", + " # Fallback: rough estimate\n", + " return len(text) // 4\n", + "\n", + "@dataclass\n", + "class ConversationMessage:\n", + " \"\"\"Represents a conversation message with metadata.\"\"\"\n", + " role: str\n", + " content: str\n", + " token_count: int = 0\n", + "\n", + " def __post_init__(self):\n", + " if self.token_count == 0:\n", + " self.token_count = count_tokens(self.content)\n", + "\n", + "print(\"โœ… Token counting utilities defined\")\n" + ], + "id": "821ce9b3f3abe835" + }, + { + "metadata": {}, + "cell_type": "markdown", + "source": [ + "#### Strategy 1: Truncation (Fast, Simple)\n", + "\n", + "Keep only the most recent N messages within token budget.\n", + "\n", + "**Pros:** Fast, no LLM calls, predictable\n", + "**Cons:** Loses all old context, no intelligence\n" + ], + "id": "f1d1881df6ca55de" + }, + { + "metadata": {}, + "cell_type": "code", + "outputs": [], + "execution_count": null, + "source": [ + "class TruncationStrategy:\n", + " \"\"\"Keep only the most recent messages within token budget.\"\"\"\n", + "\n", + " def compress(\n", + " self,\n", + " messages: List[ConversationMessage],\n", + " max_tokens: int\n", + " ) -> List[ConversationMessage]:\n", + " \"\"\"Keep most recent messages within token budget.\"\"\"\n", + " compressed = []\n", + " total_tokens = 0\n", + "\n", + " # Work backwards from most recent\n", + " for msg in reversed(messages):\n", + " if total_tokens + msg.token_count <= max_tokens:\n", + " compressed.insert(0, msg)\n", + " total_tokens += msg.token_count\n", + " else:\n", + " break\n", + "\n", + " return compressed\n", + "\n", + "print(\"โœ… Truncation strategy implemented\")\n" + ], + "id": "1df1a0aa4aabfb41" + }, + { + "metadata": {}, + "cell_type": "markdown", + "source": [ + "#### Strategy 2: Priority-Based (Balanced)\n", + "\n", + "Score messages by importance and keep highest-scoring ones.\n", + "\n", + "**Pros:** Preserves important context, no LLM calls\n", + "**Cons:** Requires good scoring logic, may lose temporal flow\n" + ], + "id": "3dcc2d1ef45c9d33" + }, + { + "metadata": {}, + "cell_type": "code", + "outputs": [], + "execution_count": null, + "source": [ + "class PriorityBasedStrategy:\n", + " \"\"\"Score messages by importance and keep highest-scoring.\"\"\"\n", + "\n", + " def _score_message(self, msg: ConversationMessage, index: int, total: int) -> float:\n", + " \"\"\"\n", + " Score message importance.\n", + "\n", + " Higher scores for:\n", + " - Recent messages (recency bias)\n", + " - Longer messages (more information)\n", + " - User messages (user intent)\n", + " - Messages with keywords (course names, preferences)\n", + " \"\"\"\n", + " score = 0.0\n", + "\n", + " # Recency: Recent messages get higher scores\n", + " recency_score = index / total\n", + " score += recency_score * 50\n", + "\n", + " # Length: Longer messages likely have more info\n", + " length_score = min(msg.token_count / 100, 1.0)\n", + " score += length_score * 20\n", + "\n", + " # Role: User messages are important (capture intent)\n", + " if msg.role == \"user\":\n", + " score += 15\n", + "\n", + " # Keywords: Messages with important terms\n", + " keywords = [\"course\", \"RU\", \"prefer\", \"interested\", \"goal\", \"major\", \"graduate\"]\n", + " keyword_count = sum(1 for kw in keywords if kw.lower() in msg.content.lower())\n", + " score += keyword_count * 5\n", + "\n", + " return score\n", + "\n", + " def compress(\n", + " self,\n", + " messages: List[ConversationMessage],\n", + " max_tokens: int\n", + " ) -> List[ConversationMessage]:\n", + " \"\"\"Keep highest-scoring messages within token budget.\"\"\"\n", + " # Score all messages\n", + " scored = [\n", + " (self._score_message(msg, i, len(messages)), i, msg)\n", + " for i, msg in enumerate(messages)\n", + " ]\n", + "\n", + " # Sort by score (descending)\n", + " scored.sort(reverse=True, key=lambda x: x[0])\n", + "\n", + " # Select messages within budget\n", + " selected = []\n", + " total_tokens = 0\n", + "\n", + " for score, idx, msg in scored:\n", + " if total_tokens + msg.token_count <= max_tokens:\n", + " selected.append((idx, msg))\n", + " total_tokens += msg.token_count\n", + "\n", + " # Sort by original order to maintain conversation flow\n", + " selected.sort(key=lambda x: x[0])\n", + "\n", + " return [msg for idx, msg in selected]\n", + "\n", + "print(\"โœ… Priority-based strategy implemented\")\n", + "\n" + ], + "id": "edc2ffeac82e03ba" + }, + { + "metadata": {}, + "cell_type": "markdown", + "source": [ + "#### Strategy 3: Summarization (High Quality)\n", + "\n", + "Use LLM to create intelligent summaries of old messages, keep recent ones.\n", + "\n", + "**Pros:** Preserves meaning, high quality, intelligent compression\n", + "**Cons:** Slower, costs tokens, requires LLM call\n" + ], + "id": "7a8408f151375688" + }, + { + "metadata": {}, + "cell_type": "code", + "source": [ + "class SummarizationStrategy:\n", + " \"\"\"Use LLM to create intelligent summaries.\"\"\"\n", + "\n", + " def __init__(self, llm: ChatOpenAI, keep_recent: int = 4):\n", + " self.llm = llm\n", + " self.keep_recent = keep_recent\n", + "\n", + " self.summarization_prompt = \"\"\"You are summarizing a conversation between a student and a course advisor.\n", + "\n", + "Create a concise summary that preserves:\n", + "1. Key decisions made\n", + "2. Important requirements or prerequisites discussed\n", + "3. Student's goals, preferences, and constraints\n", + "4. Specific courses mentioned and recommendations given\n", + "5. Any problems or issues that need follow-up\n", + "\n", + "Format as bullet points. Be specific and actionable.\n", + "\n", + "Conversation:\n", + "{conversation}\n", + "\n", + "Summary:\"\"\"\n", + "\n", + " async def compress_async(\n", + " self,\n", + " messages: List[ConversationMessage],\n", + " max_tokens: int\n", + " ) -> List[ConversationMessage]:\n", + " \"\"\"Compress using summarization (async).\"\"\"\n", + " if len(messages) <= self.keep_recent:\n", + " return messages\n", + "\n", + " # Split into old (to summarize) and recent (to keep)\n", + " old_messages = messages[:-self.keep_recent]\n", + " recent_messages = messages[-self.keep_recent:]\n", + "\n", + " # Format old messages for summarization\n", + " conversation_text = \"\\n\".join([\n", + " f\"{msg.role.title()}: {msg.content}\"\n", + " for msg in old_messages\n", + " ])\n", + "\n", + " # Generate summary using LLM\n", + " prompt = self.summarization_prompt.format(conversation=conversation_text)\n", + " response = await self.llm.ainvoke([HumanMessage(content=prompt)])\n", + "\n", + " summary_content = f\"[CONVERSATION SUMMARY]\\n{response.content}\"\n", + "\n", + " # Create summary message\n", + " summary_msg = ConversationMessage(\n", + " role=\"system\",\n", + " content=summary_content\n", + " )\n", + "\n", + " # Return summary + recent messages\n", + " return [summary_msg] + recent_messages\n", + "\n", + "print(\"โœ… Summarization strategy implemented\")\n", + "\n" + ], + "id": "33dd8c677f8c24ba", + "outputs": [], + "execution_count": null + }, + { + "metadata": {}, + "cell_type": "markdown", + "source": [ + "### Demo: Simulating a Long Conversation\n", + "\n", + "Let's create a realistic 30-turn conversation to demonstrate compression needs.\n" + ], + "id": "225f1520b9ed27e1" + }, + { + "metadata": {}, + "cell_type": "code", + "source": [ + "# Simulate a long advising conversation (30 turns = 60 messages)\n", + "long_conversation_turns = [\n", + " (\"I'm interested in machine learning courses\", \"Great! Let me help you find ML courses.\"),\n", + " (\"What are the prerequisites?\", \"You'll need data structures and linear algebra.\"),\n", + " (\"I've completed CS201 Data Structures\", \"Perfect! That's one prerequisite done.\"),\n", + " (\"Do I need calculus?\", \"Yes, MATH301 Linear Algebra is required.\"),\n", + " (\"I'm taking that next semester\", \"Excellent planning!\"),\n", + " (\"What ML courses do you recommend?\", \"RU330 and RU401 are great for ML.\"),\n", + " (\"Tell me about RU330\", \"RU330 covers trading engines with ML applications.\"),\n", + " (\"Is it available online?\", \"Yes, RU330 is available in online format.\"),\n", + " (\"What about RU401?\", \"RU401 focuses on running Redis at scale with vector search.\"),\n", + " (\"That sounds perfect for AI\", \"Absolutely! Vector search is key for AI applications.\"),\n", + " (\"I prefer online courses\", \"I'll note that preference for future recommendations.\"),\n", + " (\"I work part-time\", \"Online courses are great for working students.\"),\n", + " (\"When should I take RU330?\", \"After completing your prerequisites.\"),\n", + " (\"Can I take both together?\", \"Yes, if you have time. Both are 3-credit courses.\"),\n", + " (\"What's the workload like?\", \"Expect 6-8 hours per week for each course.\"),\n", + " (\"I'm also interested in databases\", \"RU301 covers querying and indexing.\"),\n", + " (\"Is that a prerequisite for RU401?\", \"No, but it's helpful background knowledge.\"),\n", + " (\"What order should I take them?\", \"RU301 first, then RU330, then RU401.\"),\n", + " (\"That's a good progression\", \"Yes, it builds your skills systematically.\"),\n", + " (\"I want to graduate in Spring 2026\", \"Let's plan your course schedule.\"),\n", + " (\"I can take 2 courses per semester\", \"That's manageable with work.\"),\n", + " (\"Fall 2025: RU301 and what else?\", \"Maybe RU330 if prerequisites are done.\"),\n", + " (\"Spring 2026: RU401?\", \"Yes, that completes your ML track.\"),\n", + " (\"Are there any capstone projects?\", \"RU401 includes a vector search project.\"),\n", + " (\"That sounds challenging\", \"It's practical and portfolio-worthy.\"),\n", + " (\"I'm interested in tech startups\", \"These courses are perfect for startup roles.\"),\n", + " (\"Do you have career resources?\", \"We have career services and job boards.\"),\n", + " (\"Can I get internship help?\", \"Yes, our career center helps with internships.\"),\n", + " (\"This has been very helpful\", \"I'm glad I could help plan your path!\"),\n", + " (\"I'll start with RU301 next semester\", \"Excellent choice! Good luck!\"),\n", + "]\n", + "\n", + "# Convert to ConversationMessage objects\n", + "long_conversation = []\n", + "for user_msg, assistant_msg in long_conversation_turns:\n", + " long_conversation.append(ConversationMessage(role=\"user\", content=user_msg))\n", + " long_conversation.append(ConversationMessage(role=\"assistant\", content=assistant_msg))\n", + "\n", + "# Calculate statistics\n", + "total_messages = len(long_conversation)\n", + "total_tokens = sum(msg.token_count for msg in long_conversation)\n", + "avg_tokens_per_msg = total_tokens / total_messages\n", + "\n", + "print(\"๐Ÿ“Š Long Conversation Statistics\")\n", + "print(\"=\" * 80)\n", + "print(f\"Total turns: {len(long_conversation_turns)}\")\n", + "print(f\"Total messages: {total_messages}\")\n", + "print(f\"Total tokens: {total_tokens:,}\")\n", + "print(f\"Average tokens per message: {avg_tokens_per_msg:.1f}\")\n", + "print(f\"\\nโš ๏ธ This conversation is getting expensive!\")\n", + "print(f\" Cost per query (at $0.0025/1K tokens): ${(total_tokens / 1000) * 0.0025:.4f}\")\n", + "print(f\" Over 1,000 conversations: ${((total_tokens / 1000) * 0.0025) * 1000:.2f}\")\n", + "\n", + "\n" + ], + "id": "cccf2fb420c9025a", + "outputs": [], + "execution_count": null + }, + { + "metadata": {}, + "cell_type": "markdown", + "source": [ + "### Comparison: Testing All Three Strategies\n", + "\n", + "Let's compress this conversation using all three strategies and compare results.\n" + ], + "id": "dcfc2ebd5306f8cb" + }, + { + "metadata": {}, + "cell_type": "code", + "source": [ + "# Set compression budget\n", + "max_tokens = 1000 # Target: compress from ~1,500 tokens to ~1,000 tokens\n", + "\n", + "print(\"๐Ÿ”ฌ Compression Strategy Comparison\")\n", + "print(\"=\" * 80)\n", + "print(f\"Original: {total_messages} messages, {total_tokens:,} tokens\")\n", + "print(f\"Target: {max_tokens:,} tokens (compression needed!)\\n\")\n", + "\n", + "# Strategy 1: Truncation\n", + "truncation = TruncationStrategy()\n", + "truncated = truncation.compress(long_conversation, max_tokens)\n", + "truncated_tokens = sum(msg.token_count for msg in truncated)\n", + "\n", + "print(\"1๏ธโƒฃ TRUNCATION STRATEGY\")\n", + "print(f\" Result: {len(truncated)} messages, {truncated_tokens:,} tokens\")\n", + "print(f\" Savings: {total_tokens - truncated_tokens:,} tokens ({((total_tokens - truncated_tokens) / total_tokens * 100):.1f}%)\")\n", + "print(f\" Kept: Most recent {len(truncated)} messages\")\n", + "print(f\" Lost: First {total_messages - len(truncated)} messages (all early context)\")\n", + "\n", + "# Strategy 2: Priority-Based\n", + "priority = PriorityBasedStrategy()\n", + "prioritized = priority.compress(long_conversation, max_tokens)\n", + "prioritized_tokens = sum(msg.token_count for msg in prioritized)\n", + "\n", + "print(f\"\\n2๏ธโƒฃ PRIORITY-BASED STRATEGY\")\n", + "print(f\" Result: {len(prioritized)} messages, {prioritized_tokens:,} tokens\")\n", + "print(f\" Savings: {total_tokens - prioritized_tokens:,} tokens ({((total_tokens - prioritized_tokens) / total_tokens * 100):.1f}%)\")\n", + "print(f\" Kept: {len(prioritized)} highest-scoring messages\")\n", + "print(f\" Preserved: Important context from throughout conversation\")\n", + "\n", + "# Show which messages were kept (by index)\n", + "kept_indices = []\n", + "for msg in prioritized:\n", + " for i, orig_msg in enumerate(long_conversation):\n", + " if msg.content == orig_msg.content and msg.role == orig_msg.role:\n", + " kept_indices.append(i)\n", + " break\n", + "print(f\" Message indices kept: {sorted(set(kept_indices))[:10]}... (showing first 10)\")\n", + "\n", + "# Strategy 3: Summarization\n", + "summarization = SummarizationStrategy(llm=llm, keep_recent=4)\n", + "summarized = await summarization.compress_async(long_conversation, max_tokens)\n", + "summarized_tokens = sum(msg.token_count for msg in summarized)\n", + "\n", + "print(f\"\\n3๏ธโƒฃ SUMMARIZATION STRATEGY\")\n", + "print(f\" Result: {len(summarized)} messages, {summarized_tokens:,} tokens\")\n", + "print(f\" Savings: {total_tokens - summarized_tokens:,} tokens ({((total_tokens - summarized_tokens) / total_tokens * 100):.1f}%)\")\n", + "print(f\" Structure: 1 summary + {len(summarized) - 1} recent messages\")\n", + "print(f\" Preserved: Meaning of all {total_messages - 4} old messages in summary\")\n", + "\n", + "# Show summary preview\n", + "summary_msg = summarized[0]\n", + "print(f\"\\n Summary preview:\")\n", + "summary_lines = summary_msg.content.split('\\n')[:5]\n", + "for line in summary_lines:\n", + " print(f\" {line}\")\n", + "if len(summary_msg.content.split('\\n')) > 5:\n", + " print(f\" ... ({len(summary_msg.content.split('\\n')) - 5} more lines)\")\n", + "\n" + ], + "id": "58fab84b7f0fb661", + "outputs": [], + "execution_count": null + }, + { + "metadata": {}, + "cell_type": "markdown", + "source": "### Comparison Table\n", + "id": "b5874671e946a4d8" + }, + { + "metadata": {}, + "cell_type": "code", + "source": [ + "# Create comparison table\n", + "print(\"\\n\" + \"=\" * 80)\n", + "print(\"๐Ÿ“Š COMPRESSION STRATEGY COMPARISON TABLE\")\n", + "print(\"=\" * 80)\n", + "print(f\"{'Strategy':<20} {'Messages':<12} {'Tokens':<12} {'Savings':<15} {'Quality':<10} {'Speed'}\")\n", + "print(\"-\" * 80)\n", + "\n", + "strategies_data = [\n", + " (\"Original\", total_messages, total_tokens, \"0 (0%)\", \"N/A\", \"N/A\"),\n", + " (\"Truncation\", len(truncated), truncated_tokens,\n", + " f\"{total_tokens - truncated_tokens} ({((total_tokens - truncated_tokens) / total_tokens * 100):.0f}%)\",\n", + " \"Low\", \"Fast\"),\n", + " (\"Priority-Based\", len(prioritized), prioritized_tokens,\n", + " f\"{total_tokens - prioritized_tokens} ({((total_tokens - prioritized_tokens) / total_tokens * 100):.0f}%)\",\n", + " \"Medium\", \"Fast\"),\n", + " (\"Summarization\", len(summarized), summarized_tokens,\n", + " f\"{total_tokens - summarized_tokens} ({((total_tokens - summarized_tokens) / total_tokens * 100):.0f}%)\",\n", + " \"High\", \"Slow\"),\n", + "]\n", + "\n", + "for name, msgs, tokens, savings, quality, speed in strategies_data:\n", + " print(f\"{name:<20} {msgs:<12} {tokens:<12} {savings:<15} {quality:<10} {speed}\")\n", + "\n", + "print(\"\\n๐Ÿ’ก Key Insights:\")\n", + "print(\" โ€ข Truncation: Fastest but loses all early context\")\n", + "print(\" โ€ข Priority-Based: Good balance, preserves important messages\")\n", + "print(\" โ€ข Summarization: Best quality, preserves meaning of entire conversation\")\n", + "print(\" โ€ข Choose based on your quality/speed/cost requirements\")\n" + ], + "id": "c55826be685cfa3d", + "outputs": [], + "execution_count": null + }, + { + "metadata": {}, + "cell_type": "markdown", + "source": [ + "### Agent Memory Server's Automatic Compression\n", + "\n", + "The Agent Memory Server provides automatic compression through the `WINDOW_SIZE` configuration.\n", + "\n", + "**How it works:**\n", + "1. You set `WINDOW_SIZE` in environment variables (e.g., `WINDOW_SIZE=20`)\n", + "2. When working memory exceeds this threshold, automatic compression triggers\n", + "3. Server uses summarization strategy (similar to our Strategy 3)\n", + "4. Old messages are summarized, recent messages are kept\n", + "5. Your application retrieves compressed memory transparently\n", + "\n", + "**Configuration Example:**\n", + "\n", + "```bash\n", + "# In .env file\n", + "WINDOW_SIZE=20 # Trigger compression after 20 messages\n", + "LONG_TERM_MEMORY=true # Enable long-term memory\n", + "REDIS_URL=redis://localhost:6379\n", + "```\n", + "\n", + "**In production:**\n", + "- โœ… Automatic compression (no manual intervention)\n", + "- โœ… Configurable thresholds\n", + "- โœ… Background processing (async workers)\n", + "- โœ… Transparent to your application\n" + ], + "id": "3df8a7dfed12ad73" + }, + { + "metadata": {}, + "cell_type": "markdown", + "source": [ + "### When to Use Each Strategy\n", + "\n", + "**Use Truncation when:**\n", + "- โœ… Speed is critical (real-time chat)\n", + "- โœ… Recent context is all that matters\n", + "- โœ… Cost-sensitive (no LLM calls)\n", + "- โœ… Simple implementation needed\n", + "\n", + "**Use Priority-Based when:**\n", + "- โœ… Need balance between speed and quality\n", + "- โœ… Important context scattered throughout conversation\n", + "- โœ… No LLM calls allowed (cost/latency constraints)\n", + "- โœ… Custom scoring logic available\n", + "\n", + "**Use Summarization when:**\n", + "- โœ… Quality is critical (preserve all important info)\n", + "- โœ… Long conversations (30+ turns)\n", + "- โœ… Can afford LLM call latency\n", + "- โœ… Comprehensive context needed\n", + "\n", + "**Use Agent Memory Server when:**\n", + "- โœ… Production deployment\n", + "- โœ… Want automatic management\n", + "- โœ… Need scalability\n", + "- โœ… Prefer transparent operation\n" + ], + "id": "b25ca6d346ac38f3" + }, + { + "metadata": {}, + "cell_type": "markdown", + "source": [ + "### Production Recommendations\n", + "\n", + "**For most applications:**\n", + "```python\n", + "# Use Agent Memory Server with automatic compression\n", + "# Configuration in .env:\n", + "# WINDOW_SIZE=20\n", + "# LONG_TERM_MEMORY=true\n", + "```\n", + "\n", + "**For high-volume, cost-sensitive:**\n", + "```python\n", + "# Use priority-based compression manually\n", + "priority = PriorityBasedStrategy()\n", + "compressed = priority.compress(messages, max_tokens=2000)\n", + "```\n", + "\n", + "**For critical conversations:**\n", + "```python\n", + "# Use summarization with human review\n", + "summarization = SummarizationStrategy(llm=llm, keep_recent=6)\n", + "compressed = await summarization.compress_async(messages, max_tokens=3000)\n", + "# Store full conversation separately for audit\n", + "```\n", + "\n", + "**For real-time chat:**\n", + "```python\n", + "# Use truncation for speed\n", + "truncation = TruncationStrategy()\n", + "compressed = truncation.compress(messages, max_tokens=1500)\n", + "```\n" + ], + "id": "f85886cdfd7b8c63" + }, + { + "metadata": {}, + "cell_type": "markdown", + "source": [ + "### ๐Ÿ”— Connection Back to Section 3\n", + "\n", + "**Section 3, Notebook 3** taught the theory:\n", + "- Why compression is needed (token limits, cost, performance)\n", + "- Three compression strategies (truncation, priority, summarization)\n", + "- Decision framework for choosing strategies\n", + "- Agent Memory Server configuration\n", + "\n", + "**This section** demonstrated the practice:\n", + "- โœ… Implemented all three strategies in working code\n", + "- โœ… Tested with realistic 30-turn conversation\n", + "- โœ… Compared results with metrics\n", + "- โœ… Showed when to use each strategy\n", + "- โœ… Connected to Agent Memory Server's automatic features\n", + "\n", + "**Key Takeaway:** You now understand both the theory (Section 3) and practice (Section 4) of working memory compression for production agents!\n", + "\n", + "\n", + "\n" + ], + "id": "953e03c75beccdb4" + }, + { + "metadata": {}, + "cell_type": "markdown", + "source": [ + "---\n", + "\n", + "## ๐ŸŽ“ Key Takeaways\n", + "\n", + "### **1. Agents = RAG + Tools + Decision-Making**\n", + "- RAG retrieves information\n", + "- Tools enable actions\n", + "- Agents decide when to use each\n", + "\n", + "### **2. Memory is Critical for Personalization**\n", + "- Working memory enables conversation continuity\n", + "- Long-term memory enables personalization\n", + "- Agents can decide when to store/recall memories\n", + "\n", + "### **3. LangGraph Simplifies Complex Workflows**\n", + "- State management is automatic\n", + "- Conditional routing is declarative\n", + "- Visualization helps debugging\n", + "\n", + "### **4. Tool Design Matters**\n", + "- Clear descriptions guide LLM selection\n", + "- Well-defined schemas prevent errors\n", + "- Focused tools are better than Swiss Army knives\n", + "\n", + "### **5. Trade-offs to Consider**\n", + "- **Complexity**: Agents are more complex than RAG\n", + "- **Latency**: Multiple tool calls add latency\n", + "- **Cost**: More LLM calls = higher cost\n", + "- **Value**: Worth it for complex, multi-step tasks\n", + "\n", + "### **6. When to Use Agents vs RAG**\n", + "\n", + "**Use RAG when:**\n", + "- Simple question answering\n", + "- Single-step retrieval\n", + "- Low latency required\n", + "- Predictable workflows\n", + "\n", + "**Use Agents when:**\n", + "- Multi-step reasoning needed\n", + "- Actions beyond retrieval\n", + "- Personalization required\n", + "- Complex decision-making\n" + ], + "id": "6064fff959e6e811" + }, + { + "metadata": {}, + "cell_type": "markdown", + "source": [ + "---\n", + "\n", + "## ๐Ÿš€ Next Steps and Extensions\n", + "\n", + "### **Ideas to Extend This Agent:**\n", + "\n", + "1. **Add More Tools**\n", + " - `check_prerequisites` - Verify if student meets course requirements\n", + " - `get_course_details` - Get detailed info about a specific course\n", + " - `create_schedule` - Build a semester schedule\n", + " - `check_conflicts` - Detect time conflicts\n", + "\n", + "2. **Enhance Memory**\n", + " - Automatic memory extraction from conversations\n", + " - Memory summarization for long conversations\n", + " - Memory importance scoring\n", + " - Memory expiration policies\n", + "\n", + "3. **Improve Personalization**\n", + " - Learning style detection\n", + " - Career path recommendations\n", + " - Skill gap analysis\n", + " - Progress tracking\n", + "\n", + "4. **Add Guardrails**\n", + " - Input validation\n", + " - Output filtering\n", + " - Rate limiting\n", + " - Error handling\n", + "\n", + "5. **Production Considerations**\n", + " - Authentication and authorization\n", + " - Logging and monitoring\n", + " - Caching for performance\n", + " - Fallback strategies\n", + "\n", + "### **Reference Implementation:**\n", + "\n", + "Check out `reference-agent/` for a full production implementation with:\n", + "- 7 tools (vs our 3)\n", + "- Advanced memory management\n", + "- Semantic tool selection\n", + "- Comprehensive error handling\n", + "- CLI interface\n", + "- Full test suite\n" + ], + "id": "ca5250d8cbfa9772" + }, + { + "metadata": {}, + "cell_type": "markdown", + "source": [ + "---\n", + "\n", + "## ๐ŸŽ‰ Congratulations!\n", + "\n", + "You've completed the Context Engineering course! You've learned:\n", + "\n", + "**Section 1:** Context Types\n", + "- System, User, Conversation, Retrieved context\n", + "- How context shapes LLM behavior\n", + "\n", + "**Section 2:** RAG Foundations\n", + "- Semantic search with vector embeddings\n", + "- Context assembly and generation\n", + "- Building a course search system\n", + "\n", + "**Section 3:** Memory Architecture\n", + "- Working memory for conversation continuity\n", + "- Long-term memory for persistent knowledge\n", + "- Memory-enhanced RAG systems\n", + "\n", + "**๐Ÿ”ฌ Research Foundation:** Throughout this course, you've learned techniques validated by Context Rot research - prioritizing relevance over quantity, filtering distractors, and structuring context for optimal LLM performance. ([Context Rot paper](https://research.trychroma.com/context-rot))\n", + "\n", + "**Section 4:** Agents and Tools\n", + "- Tool calling fundamentals\n", + "- LangGraph workflow orchestration\n", + "- Building a complete course advisor agent\n", + "- Agents vs RAG trade-offs\n", + "\n", + "### **You Can Now:**\n", + "- โœ… Design effective context strategies\n", + "- โœ… Build RAG systems with Redis\n", + "- โœ… Implement dual-memory architectures\n", + "- โœ… Create agents with tools and decision-making\n", + "- โœ… Choose the right approach for your use case\n", + "\n", + "### **Keep Learning:**\n", + "- Explore the reference-agent implementation\n", + "- Experiment with different tools\n", + "- Try different LLMs and embeddings\n", + "- Build your own agents!\n", + "\n", + "---\n", + "\n", + "## ๐Ÿ“š Additional Resources\n", + "\n", + "\n", + "- [Agent Memory Server Documentation](https://github.com/redis/agent-memory-server) - Production-ready memory management\n", + "- [Agent Memory Client](https://pypi.org/project/agent-memory-client/) - Python client for Agent Memory Server\n", + "- [RedisVL Documentation](https://redisvl.com/) - Redis Vector Library\n", + "- [Retrieval-Augmented Generation Paper](https://arxiv.org/abs/2005.11401) - Original RAG research\n", + "- [LangChain RAG Tutorial](https://python.langchain.com/docs/use_cases/question_answering/) - Building RAG systems\n", + "- [LangGraph Tutorials](https://langchain-ai.github.io/langgraph/tutorials/) - Building agents with LangGraph\n", + "- [Agent Architectures](https://python.langchain.com/docs/modules/agents/) - Different agent patterns\n", + "- [ReAct: Synergizing Reasoning and Acting](https://arxiv.org/abs/2210.03629) - Reasoning + acting in LLMs\n", + "- [Anthropic's Guide to Building Effective Agents](https://www.anthropic.com/research/building-effective-agents) - Agent design patterns\n", + "\n", + "---\n", + "\n", + "**Thank you for completing this course! ๐Ÿ™**\n" + ], + "id": "88773a005e5cba59" + }, + { + "metadata": {}, + "cell_type": "code", + "outputs": [], + "execution_count": null, + "source": "", + "id": "70ab2e1e572d5aa6" + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.12.6" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/python-recipes/context-engineering/notebooks/section-4-tool-selection/README.md b/python-recipes/context-engineering/notebooks/section-4-tool-selection/README.md new file mode 100644 index 00000000..91e03e57 --- /dev/null +++ b/python-recipes/context-engineering/notebooks/section-4-tool-selection/README.md @@ -0,0 +1,169 @@ +# Section 4: Agents and Tools + +**โฑ๏ธ Estimated Time:** 2-2.5 hours total + +## ๐ŸŽฏ Overview + +This section teaches you how to build intelligent agents that combine RAG, memory, and tools to create adaptive, multi-step workflows. You'll progress from understanding tool fundamentals to building a complete course advisor agent. + +## ๐Ÿ“š Notebooks + +### 1. Memory Tools and LangGraph Fundamentals (45-60 minutes) +**File:** `01_tools_and_langgraph_fundamentals.ipynb` + +**What You'll Learn:** +- How memory tools enable active context engineering +- Building the 3 essential memory tools: store, search, retrieve +- LangGraph fundamentals (nodes, edges, state) +- Passive vs active memory management +- When to use memory tools vs automatic memory + +**Key Concepts:** +- Memory tools for context engineering +- Active vs passive memory management +- LangGraph state management +- Tool-driven context construction + +### 2. Redis University Course Advisor Agent (60-75 minutes) +**File:** `02_redis_university_course_advisor_agent.ipynb` + +**What You'll Build:** +A complete course advisor agent with: +- **3 Tools (Memory-Focused):** + 1. `store_memory` - Save important information to long-term memory + 2. `search_memories` - Recall user preferences and facts + 3. `search_courses` - Semantic search over course catalog + +- **Active Memory Management:** + - LLM decides what to remember + - LLM searches memories strategically + - Dynamic context construction + +- **LangGraph Workflow:** + - Load memory โ†’ Agent decision โ†’ Tools โ†’ Save memory + - Conditional routing based on LLM decisions + - Graph visualization + +**Key Concepts:** +- Building agents with LangGraph +- Memory-driven tool design +- Active context engineering +- Multi-step reasoning with memory +- Personalized recommendations using stored preferences + +## ๐Ÿ”— Connection to Previous Sections + +### Section 1: Context Types +- System, User, Conversation, Retrieved context +- Foundation for understanding how agents use context + +### Section 2: RAG Foundations +- Semantic search with vector embeddings +- Course catalog retrieval +- Single-step retrieval โ†’ generation + +### Section 3: Memory Architecture +- Working memory for conversation continuity +- Long-term memory for persistent knowledge +- Memory-enhanced RAG systems + +### Section 4: Agents and Tools (This Section) +- **Combines everything:** RAG + Memory + Tools + Decision-Making +- Agents can decide when to search, store, and recall +- Multi-step reasoning and adaptive workflows + +## ๐Ÿ“Š Progression: RAG โ†’ Memory-RAG โ†’ Agent + +| Feature | RAG (S2) | Memory-RAG (S3) | Agent (S4) | +|---------|----------|-----------------|------------| +| **Retrieval** | โœ… | โœ… | โœ… | +| **Conversation Memory** | โŒ | โœ… | โœ… | +| **Long-term Memory** | โŒ | โš ๏ธ (manual) | โœ… (automatic) | +| **Decision Making** | โŒ | โŒ | โœ… | +| **Multi-step Reasoning** | โŒ | โŒ | โœ… | +| **Tool Selection** | โŒ | โŒ | โœ… | + +## โš ๏ธ Prerequisites + +**CRITICAL: This section requires ALL services to be running.** + +### Required Services: +1. **Redis** - Vector storage and caching (port 6379) +2. **Agent Memory Server** - Memory management (port 8088) +3. **OpenAI API** - LLM functionality + +### ๐Ÿš€ Quick Setup: + +**Option 1: Automated Setup (Recommended)** +```bash +# Navigate to notebooks_v2 directory +cd ../ + +# Run setup script +./setup_memory_server.sh +``` + +**Option 2: Manual Setup** +See `../SETUP_GUIDE.md` for detailed instructions. + +### Additional Requirements: +1. **Completed Sections 1-3** - This section builds on previous concepts +2. **Docker Desktop running** - Required for containerized services +3. **Course data** - Will be generated automatically by notebooks + +## ๐Ÿš€ Getting Started + +1. **Start with Notebook 1** to learn tool fundamentals +2. **Then Notebook 2** to build the complete agent +3. **Experiment** with different queries and watch the agent work +4. **Extend** the agent with additional tools (see suggestions in notebooks) + +## ๐ŸŽ“ Learning Outcomes + +By the end of this section, you will be able to: + +- โœ… Design and implement tools for LLM agents +- โœ… Build LangGraph workflows with conditional routing +- โœ… Integrate memory systems with agents +- โœ… Create agents that make multi-step decisions +- โœ… Choose between RAG, Memory-RAG, and Agent architectures +- โœ… Understand trade-offs (complexity, latency, cost, capabilities) + +## ๐Ÿ“ Archive + +The `_archive/` directory contains previous versions of Section 4 notebooks: +- `01_defining_tools.ipynb` - Original tool definition content +- `02_tool_selection_strategies.ipynb` - Tool selection patterns +- `03_building_multi_tool_intelligence.ipynb` - Multi-tool agent examples + +These were consolidated and improved in the current notebooks. + +## ๐Ÿ”— Additional Resources + +### Core Technologies +- [Redis Agent Memory Server](https://github.com/redis/agent-memory-server) - Dual-memory architecture for agents +- [RedisVL](https://github.com/redis/redis-vl) - Redis Vector Library for semantic search +- [Redis Vector Search](https://redis.io/docs/stack/search/reference/vectors/) - Vector similarity search documentation + +### LangChain & LangGraph +- [LangChain Tools Documentation](https://python.langchain.com/docs/modules/agents/tools/) +- [LangGraph Documentation](https://langchain-ai.github.io/langgraph/) +- [LangGraph Tutorials](https://langchain-ai.github.io/langgraph/tutorials/) + +### OpenAI +- [OpenAI Function Calling Guide](https://platform.openai.com/docs/guides/function-calling) +- [OpenAI API Documentation](https://platform.openai.com/docs/api-reference) + +## ๐Ÿ’ก Next Steps + +After completing this section: + +1. **Explore the reference-agent** - See a production implementation with 7 tools +2. **Build your own agent** - Apply these concepts to your use case +3. **Experiment with tools** - Try different tool combinations +4. **Optimize performance** - Explore caching, parallel execution, etc. + +--- + +**Ready to build intelligent agents? Start with Notebook 1! ๐Ÿš€** + diff --git a/python-recipes/context-engineering/notebooks/section-5-optimization-production/01_measuring_optimizing_performance.ipynb b/python-recipes/context-engineering/notebooks/section-5-optimization-production/01_measuring_optimizing_performance.ipynb new file mode 100644 index 00000000..9b4aa5ff --- /dev/null +++ b/python-recipes/context-engineering/notebooks/section-5-optimization-production/01_measuring_optimizing_performance.ipynb @@ -0,0 +1,1866 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "79ed449409dabf1c", + "metadata": {}, + "source": [ + "![Redis](https://redis.io/wp-content/uploads/2024/04/Logotype.svg?auto=webp&quality=85,75&width=120)\n", + "\n", + "# ๐Ÿ“Š Section 5, Notebook 1: Measuring and Optimizing Performance\n", + "\n", + "**โฑ๏ธ Estimated Time:** 50-60 minutes\n", + "\n", + "## ๐ŸŽฏ Learning Objectives\n", + "\n", + "By the end of this notebook, you will:\n", + "\n", + "1. **Measure** agent performance: tokens, cost, and latency\n", + "2. **Understand** where tokens are being spent in your agent\n", + "3. **Implement** hybrid retrieval to reduce token usage by 67%\n", + "4. **Build** structured data views (course catalog summary)\n", + "5. **Compare** before/after performance with concrete metrics\n", + "\n", + "---\n", + "\n", + "## ๐Ÿ”— Where We Are\n", + "\n", + "### **Your Journey So Far:**\n", + "\n", + "**Section 4, Notebook 2:** You built a complete Redis University Course Advisor Agent with:\n", + "- โœ… **3 Tools**: `search_courses`, `search_memories`, `store_memory`\n", + "- โœ… **Dual Memory**: Working memory (session) + Long-term memory (persistent)\n", + "- โœ… **Basic RAG**: Semantic search over ~150 courses\n", + "- โœ… **LangGraph Workflow**: State management with tool calling loop\n", + "\n", + "**Your agent works!** It can:\n", + "- Search for courses semantically\n", + "- Remember student preferences\n", + "- Provide personalized recommendations\n", + "- Maintain conversation context\n", + "\n", + "### **But... How Efficient Is It?**\n", + "\n", + "**Questions we can't answer yet:**\n", + "- โ“ How many tokens does each query use?\n", + "- โ“ How much does each conversation cost?\n", + "- โ“ Where are tokens being spent? (system prompt? retrieved context? tools?)\n", + "- โ“ Is performance degrading over long conversations?\n", + "- โ“ Can we make it faster and cheaper without sacrificing quality?\n", + "\n", + "---\n", + "\n", + "## ๐ŸŽฏ The Problem We'll Solve\n", + "\n", + "**\"Our agent works, but is it efficient? How much does it cost to run? Can we make it faster and cheaper without sacrificing quality?\"**\n", + "\n", + "### **What We'll Learn:**\n", + "\n", + "1. **Performance Measurement** - Token counting, cost calculation, latency tracking\n", + "2. **Token Budget Analysis** - Understanding where tokens are spent\n", + "3. **Retrieval Optimization** - Hybrid retrieval (overview + targeted search)\n", + "4. **Context Window Management** - When and how to optimize\n", + "\n", + "### **What We'll Build:**\n", + "\n", + "Starting with your Section 4 agent, we'll add:\n", + "1. **Performance Tracking System** - Measure tokens, cost, latency automatically\n", + "2. **Token Counter Integration** - Track token usage across all components\n", + "3. **Course Catalog Summary View** - Pre-computed overview (one-time)\n", + "4. **Hybrid Retrieval Tool** - Replace basic search with intelligent hybrid approach\n", + "\n", + "### **Expected Results:**\n", + "\n", + "```\n", + "Metric Before (S4) After (NB1) Improvement\n", + "โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”\n", + "Tokens/query 8,500 2,800 -67%\n", + "Cost/query $0.12 $0.04 -67%\n", + "Latency 3.2s 1.6s -50%\n", + "โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”\n", + "```\n", + "\n", + "**๐Ÿ’ก Key Insight:** \"You can't optimize what you don't measure\"\n", + "\n", + "---\n", + "\n", + "## ๐Ÿ“ฆ Part 0: Setup and Imports\n", + "\n", + "Let's start by importing everything we need and setting up our environment.\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "336cc6d4dee4899f", + "metadata": {}, + "outputs": [], + "source": [ + "# Standard library imports\n", + "import os\n", + "import time\n", + "import asyncio\n", + "from typing import List, Dict, Any, Annotated, Optional\n", + "from dataclasses import dataclass, field\n", + "from datetime import datetime\n", + "\n", + "# LangChain and LangGraph\n", + "from langchain_openai import ChatOpenAI, OpenAIEmbeddings\n", + "from langchain_core.messages import BaseMessage, HumanMessage, AIMessage, SystemMessage\n", + "from langchain_core.tools import tool\n", + "from langgraph.graph import StateGraph, END\n", + "from langgraph.prebuilt import ToolNode\n", + "from langgraph.graph.message import add_messages\n", + "from pydantic import BaseModel, Field\n", + "\n", + "# Redis and Agent Memory\n", + "from agent_memory_client import MemoryAPIClient, MemoryClientConfig\n", + "from agent_memory_client.models import ClientMemoryRecord\n", + "from agent_memory_client.filters import UserId\n", + "\n", + "# RedisVL for course search\n", + "from redisvl.index import SearchIndex\n", + "from redisvl.query import VectorQuery\n", + "from redisvl.query.filter import Tag\n", + "\n", + "# Token counting\n", + "import tiktoken\n", + "\n", + "print(\"โœ… All imports successful\")\n" + ] + }, + { + "cell_type": "markdown", + "id": "8e12dc57a59db830", + "metadata": {}, + "source": [ + "### Environment Setup\n", + "\n", + "Make sure you have these environment variables set:\n", + "- `OPENAI_API_KEY` - Your OpenAI API key\n", + "- `REDIS_URL` - Redis connection URL (default: redis://localhost:6379)\n", + "- `AGENT_MEMORY_URL` - Agent Memory Server URL (default: http://localhost:8000)\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "8a29463e43fb77f6", + "metadata": {}, + "outputs": [], + "source": [ + "# Verify environment\n", + "required_vars = [\"OPENAI_API_KEY\"]\n", + "missing_vars = [var for var in required_vars if not os.getenv(var)]\n", + "\n", + "if missing_vars:\n", + " print(f\"โŒ Missing environment variables: {', '.join(missing_vars)}\")\n", + " print(\" Please set them before continuing.\")\n", + "else:\n", + " print(\"โœ… Environment variables configured\")\n", + "\n", + "# Set defaults for optional vars\n", + "REDIS_URL = os.getenv(\"REDIS_URL\", \"redis://localhost:6379\")\n", + "AGENT_MEMORY_URL = os.getenv(\"AGENT_MEMORY_URL\", \"http://localhost:8000\")\n", + "\n", + "print(f\" Redis URL: {REDIS_URL}\")\n", + "print(f\" Agent Memory URL: {AGENT_MEMORY_URL}\")\n" + ] + }, + { + "cell_type": "markdown", + "id": "cbd20624ce2e3ca8", + "metadata": {}, + "source": [ + "### Initialize Clients\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "a83f09e96c2870f6", + "metadata": {}, + "outputs": [], + "source": [ + "# Initialize LLM\n", + "llm = ChatOpenAI(\n", + " model=\"gpt-4o\",\n", + " temperature=0.7,\n", + " streaming=False\n", + ")\n", + "\n", + "# Initialize embeddings\n", + "embeddings = OpenAIEmbeddings(model=\"text-embedding-3-small\")\n", + "\n", + "# Initialize Agent Memory Client\n", + "memory_config = MemoryClientConfig(base_url=AGENT_MEMORY_URL)\n", + "memory_client = MemoryAPIClient(config=memory_config)\n", + "\n", + "print(\"โœ… Clients initialized\")\n", + "print(f\" LLM: {llm.model_name}\")\n", + "print(f\" Embeddings: text-embedding-3-small\")\n", + "print(f\" Memory Client: Connected to {AGENT_MEMORY_URL}\")\n" + ] + }, + { + "cell_type": "markdown", + "id": "73a5ded02170973f", + "metadata": {}, + "source": [ + "### Student Profile\n", + "\n", + "We'll use the same student profile from Section 4.\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "3660d74d5accbde6", + "metadata": {}, + "outputs": [], + "source": [ + "# Student profile\n", + "STUDENT_ID = \"sarah_chen_12345\"\n", + "SESSION_ID = f\"session_{datetime.now().strftime('%Y%m%d_%H%M%S')}\"\n", + "\n", + "@dataclass\n", + "class Student:\n", + " name: str\n", + " student_id: str\n", + " major: str\n", + " interests: List[str]\n", + "\n", + "sarah = Student(\n", + " name=\"Sarah Chen\",\n", + " student_id=STUDENT_ID,\n", + " major=\"Computer Science\",\n", + " interests=[\"AI\", \"Machine Learning\", \"Data Science\"]\n", + ")\n", + "\n", + "print(\"โœ… Student profile created\")\n", + "print(f\" Name: {sarah.name}\")\n", + "print(f\" Student ID: {STUDENT_ID}\")\n", + "print(f\" Session ID: {SESSION_ID}\")\n" + ] + }, + { + "cell_type": "markdown", + "id": "57ccd94b8158593c", + "metadata": {}, + "source": [ + "---\n", + "\n", + "## ๐Ÿ“Š Part 1: Performance Measurement\n", + "\n", + "Before we can optimize, we need to measure. Let's build a comprehensive performance tracking system.\n", + "\n", + "### ๐Ÿ”ฌ Theory: Why Measurement Matters\n", + "\n", + "**The Optimization Paradox:**\n", + "- Without measurement, optimization is guesswork\n", + "- You might optimize the wrong thing\n", + "- You can't prove improvements\n", + "\n", + "**What to Measure:**\n", + "1. **Tokens** - Input tokens + output tokens (drives cost)\n", + "2. **Cost** - Actual dollar cost per query\n", + "3. **Latency** - Time from query to response\n", + "4. **Token Budget Breakdown** - Where are tokens being spent?\n", + "\n", + "**Research Connection:**\n", + "Remember the Context Rot research from Section 1? It showed that:\n", + "- More context โ‰  better performance\n", + "- Quality > quantity in context selection\n", + "- Distractors (irrelevant context) hurt performance\n", + "\n", + "**๐Ÿ’ก Key Insight:** Measurement enables optimization. Track everything, optimize strategically.\n" + ] + }, + { + "cell_type": "markdown", + "id": "7c909ee197eb05cb", + "metadata": {}, + "source": [ + "### Step 1: Define Performance Metrics\n", + "\n", + "Let's create a data structure to track all performance metrics.\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d20fee75249fad0b", + "metadata": {}, + "outputs": [], + "source": [ + "@dataclass\n", + "class PerformanceMetrics:\n", + " \"\"\"Track performance metrics for agent queries.\"\"\"\n", + " \n", + " # Token counts\n", + " input_tokens: int = 0\n", + " output_tokens: int = 0\n", + " total_tokens: int = 0\n", + " \n", + " # Token breakdown\n", + " system_tokens: int = 0\n", + " conversation_tokens: int = 0\n", + " retrieved_tokens: int = 0\n", + " tools_tokens: int = 0\n", + " \n", + " # Cost (GPT-4o pricing: $5/1M input, $15/1M output)\n", + " input_cost: float = 0.0\n", + " output_cost: float = 0.0\n", + " total_cost: float = 0.0\n", + " \n", + " # Latency\n", + " start_time: float = field(default_factory=time.time)\n", + " end_time: Optional[float] = None\n", + " latency_seconds: Optional[float] = None\n", + " \n", + " # Metadata\n", + " query: str = \"\"\n", + " response: str = \"\"\n", + " tools_called: List[str] = field(default_factory=list)\n", + " \n", + " def finalize(self):\n", + " \"\"\"Calculate final metrics.\"\"\"\n", + " self.end_time = time.time()\n", + " self.latency_seconds = self.end_time - self.start_time\n", + " self.total_tokens = self.input_tokens + self.output_tokens\n", + " \n", + " # GPT-4o pricing (as of 2024)\n", + " self.input_cost = (self.input_tokens / 1_000_000) * 5.0\n", + " self.output_cost = (self.output_tokens / 1_000_000) * 15.0\n", + " self.total_cost = self.input_cost + self.output_cost\n", + " \n", + " def display(self):\n", + " \"\"\"Display metrics in a readable format.\"\"\"\n", + " print(\"\\n\" + \"=\" * 80)\n", + " print(\"๐Ÿ“Š PERFORMANCE METRICS\")\n", + " print(\"=\" * 80)\n", + " print(f\"\\n๐Ÿ”ข Token Usage:\")\n", + " print(f\" Input tokens: {self.input_tokens:,}\")\n", + " print(f\" Output tokens: {self.output_tokens:,}\")\n", + " print(f\" Total tokens: {self.total_tokens:,}\")\n", + " \n", + " if self.system_tokens or self.conversation_tokens or self.retrieved_tokens or self.tools_tokens:\n", + " print(f\"\\n๐Ÿ“ฆ Token Breakdown:\")\n", + " print(f\" System prompt: {self.system_tokens:,} ({self.system_tokens/self.input_tokens*100:.1f}%)\")\n", + " print(f\" Conversation: {self.conversation_tokens:,} ({self.conversation_tokens/self.input_tokens*100:.1f}%)\")\n", + " print(f\" Retrieved context: {self.retrieved_tokens:,} ({self.retrieved_tokens/self.input_tokens*100:.1f}%)\")\n", + " print(f\" Tools: {self.tools_tokens:,} ({self.tools_tokens/self.input_tokens*100:.1f}%)\")\n", + " \n", + " print(f\"\\n๐Ÿ’ฐ Cost:\")\n", + " print(f\" Input cost: ${self.input_cost:.4f}\")\n", + " print(f\" Output cost: ${self.output_cost:.4f}\")\n", + " print(f\" Total cost: ${self.total_cost:.4f}\")\n", + " \n", + " print(f\"\\nโฑ๏ธ Latency: {self.latency_seconds:.2f}s\")\n", + " \n", + " if self.tools_called:\n", + " print(f\"\\n๐Ÿ› ๏ธ Tools Called: {', '.join(self.tools_called)}\")\n", + " \n", + " print(\"=\" * 80)\n", + "\n", + "print(\"โœ… PerformanceMetrics dataclass defined\")\n", + "print(\" Tracks: tokens, cost, latency, token breakdown\")\n" + ] + }, + { + "cell_type": "markdown", + "id": "d1803f26a0dac2a2", + "metadata": {}, + "source": [ + "### Step 2: Token Counting Functions\n", + "\n", + "We'll use `tiktoken` to count tokens accurately for GPT-4o.\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "1236a8b53c3bb545", + "metadata": {}, + "outputs": [], + "source": [ + "def count_tokens(text: str, model: str = \"gpt-4o\") -> int:\n", + " \"\"\"\n", + " Count tokens in text using tiktoken.\n", + " \n", + " Args:\n", + " text: The text to count tokens for\n", + " model: The model name (default: gpt-4o)\n", + " \n", + " Returns:\n", + " Number of tokens\n", + " \"\"\"\n", + " try:\n", + " encoding = tiktoken.encoding_for_model(model)\n", + " except KeyError:\n", + " # Fallback to cl100k_base for newer models\n", + " encoding = tiktoken.get_encoding(\"cl100k_base\")\n", + " \n", + " return len(encoding.encode(text))\n", + "\n", + "def count_messages_tokens(messages: List[BaseMessage], model: str = \"gpt-4o\") -> int:\n", + " \"\"\"\n", + " Count tokens in a list of messages.\n", + " \n", + " Args:\n", + " messages: List of LangChain messages\n", + " model: The model name\n", + " \n", + " Returns:\n", + " Total number of tokens\n", + " \"\"\"\n", + " total = 0\n", + " for message in messages:\n", + " # Each message has overhead: role + content + formatting\n", + " total += 4 # Message formatting overhead\n", + " total += count_tokens(message.content, model)\n", + " total += 2 # Conversation formatting overhead\n", + " return total\n", + "\n", + "print(\"โœ… Token counting functions defined\")\n", + "print(\" count_tokens() - Count tokens in text\")\n", + "print(\" count_messages_tokens() - Count tokens in message list\")\n" + ] + }, + { + "cell_type": "markdown", + "id": "a21d7ac898ace6f2", + "metadata": {}, + "source": [ + "### Step 3: Test Token Counting\n", + "\n", + "Let's verify our token counting works correctly.\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "4670e6978068d269", + "metadata": {}, + "outputs": [], + "source": [ + "# Test token counting\n", + "test_text = \"What machine learning courses are available at Redis University?\"\n", + "token_count = count_tokens(test_text)\n", + "\n", + "print(f\"Test query: '{test_text}'\")\n", + "print(f\"Token count: {token_count}\")\n", + "\n", + "# Test message counting\n", + "test_messages = [\n", + " SystemMessage(content=\"You are a helpful course advisor.\"),\n", + " HumanMessage(content=test_text),\n", + " AIMessage(content=\"Let me search for machine learning courses for you.\")\n", + "]\n", + "message_tokens = count_messages_tokens(test_messages)\n", + "\n", + "print(f\"\\nTest messages (3 messages):\")\n", + "print(f\"Total tokens: {message_tokens}\")\n", + "print(\"โœ… Token counting verified\")\n" + ] + }, + { + "cell_type": "markdown", + "id": "f4375ac37782c364", + "metadata": {}, + "source": [ + "---\n", + "\n", + "## ๐Ÿ” Part 2: Baseline Performance Measurement\n", + "\n", + "Now let's measure the performance of our Section 4 agent to establish a baseline.\n", + "\n", + "### Load Section 4 Agent Components\n", + "\n", + "First, we need to recreate the Section 4 agent. We'll load the course catalog and define the same 3 tools.\n" + ] + }, + { + "cell_type": "markdown", + "id": "8cd7b6c8b56f10ef", + "metadata": {}, + "source": [ + "### Course Manager (from Section 4)\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "7770778773585169", + "metadata": {}, + "outputs": [], + "source": [ + "class CourseManager:\n", + " \"\"\"Manage course catalog with Redis vector search.\"\"\"\n", + " \n", + " def __init__(self, redis_url: str, index_name: str = \"course_catalog\"):\n", + " self.redis_url = redis_url\n", + " self.index_name = index_name\n", + " self.embeddings = OpenAIEmbeddings(model=\"text-embedding-3-small\")\n", + " \n", + " # Initialize search index\n", + " self.index = SearchIndex.from_existing(\n", + " name=self.index_name,\n", + " redis_url=self.redis_url\n", + " )\n", + " \n", + " async def search_courses(self, query: str, limit: int = 5) -> List[Dict[str, Any]]:\n", + " \"\"\"Search for courses using semantic search.\"\"\"\n", + " # Create query embedding\n", + " query_embedding = await self.embeddings.aembed_query(query)\n", + " \n", + " # Create vector query\n", + " vector_query = VectorQuery(\n", + " vector=query_embedding,\n", + " vector_field_name=\"content_vector\",\n", + " return_fields=[\"course_code\", \"title\", \"description\", \"department\", \"credits\", \"format\"],\n", + " num_results=limit\n", + " )\n", + " \n", + " # Execute search\n", + " results = self.index.query(vector_query)\n", + " return results\n", + "\n", + "# Initialize course manager\n", + "course_manager = CourseManager(redis_url=REDIS_URL)\n", + "\n", + "print(\"โœ… Course manager initialized\")\n", + "print(f\" Index: {course_manager.index_name}\")\n", + "print(f\" Redis: {REDIS_URL}\")\n" + ] + }, + { + "cell_type": "markdown", + "id": "ea7a83ed5953cdbd", + "metadata": {}, + "source": [ + "### Define the 3 Tools (from Section 4)\n", + "\n", + "Now let's define the same 3 tools from Section 4.\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b1db85c3203e73c9", + "metadata": {}, + "outputs": [], + "source": [ + "# Tool 1: search_courses\n", + "class SearchCoursesInput(BaseModel):\n", + " \"\"\"Input schema for searching courses.\"\"\"\n", + " query: str = Field(description=\"Natural language query to search for courses\")\n", + " limit: int = Field(default=5, description=\"Maximum number of courses to return\")\n", + "\n", + "@tool(\"search_courses\", args_schema=SearchCoursesInput)\n", + "async def search_courses(query: str, limit: int = 5) -> str:\n", + " \"\"\"\n", + " Search for courses using semantic search based on topics, descriptions, or characteristics.\n", + "\n", + " Use this tool when students ask about:\n", + " - Topics or subjects: \"machine learning courses\", \"database courses\"\n", + " - Course characteristics: \"online courses\", \"beginner courses\"\n", + " - General exploration: \"what courses are available?\"\n", + "\n", + " Returns: Formatted list of matching courses with details.\n", + " \"\"\"\n", + " results = await course_manager.search_courses(query, limit=limit)\n", + "\n", + " if not results:\n", + " return \"No courses found matching your query.\"\n", + "\n", + " output = []\n", + " for i, course in enumerate(results, 1):\n", + " output.append(f\"{i}. {course['title']} ({course.get('course_code', course.get('course_id', 'N/A'))})\")\n", + " output.append(f\" Department: {course['department']}\")\n", + " output.append(f\" Credits: {course['credits']}\")\n", + " output.append(f\" Format: {course['format']}\")\n", + " output.append(f\" Description: {course['description'][:150]}...\")\n", + " output.append(\"\")\n", + "\n", + " return \"\\n\".join(output)\n", + "\n", + "print(\"โœ… Tool 1 defined: search_courses\")\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "8a2c3f02ab96a7ce", + "metadata": {}, + "outputs": [], + "source": [ + "# Tool 2: search_memories\n", + "class SearchMemoriesInput(BaseModel):\n", + " \"\"\"Input schema for searching memories.\"\"\"\n", + " query: str = Field(description=\"Natural language query to search for in user's memory\")\n", + " limit: int = Field(default=5, description=\"Maximum number of memories to return\")\n", + "\n", + "@tool(\"search_memories\", args_schema=SearchMemoriesInput)\n", + "async def search_memories(query: str, limit: int = 5) -> str:\n", + " \"\"\"\n", + " Search the user's long-term memory for relevant facts, preferences, and past interactions.\n", + "\n", + " Use this tool when you need to:\n", + " - Recall user preferences: \"What format does the user prefer?\"\n", + " - Remember past goals: \"What career path is the user interested in?\"\n", + " - Personalize recommendations: \"What are the user's interests?\"\n", + "\n", + " Returns: List of relevant memories with content and metadata.\n", + " \"\"\"\n", + " try:\n", + " results = await memory_client.search_long_term_memory(\n", + " text=query,\n", + " user_id=UserId(eq=STUDENT_ID),\n", + " limit=limit\n", + " )\n", + "\n", + " if not results.memories or len(results.memories) == 0:\n", + " return \"No relevant memories found.\"\n", + "\n", + " output = []\n", + " for i, memory in enumerate(results.memories, 1):\n", + " output.append(f\"{i}. {memory.text}\")\n", + " if memory.topics:\n", + " output.append(f\" Topics: {', '.join(memory.topics)}\")\n", + "\n", + " return \"\\n\".join(output)\n", + " except Exception as e:\n", + " return f\"Error searching memories: {str(e)}\"\n", + "\n", + "print(\"โœ… Tool 2 defined: search_memories\")\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "8caea4c8f6933cf6", + "metadata": {}, + "outputs": [], + "source": [ + "# Tool 3: store_memory\n", + "class StoreMemoryInput(BaseModel):\n", + " \"\"\"Input schema for storing memories.\"\"\"\n", + " text: str = Field(description=\"The information to store as a clear, factual statement\")\n", + " memory_type: str = Field(default=\"semantic\", description=\"Type: 'semantic' or 'episodic'\")\n", + " topics: List[str] = Field(default=[], description=\"Optional tags to categorize the memory\")\n", + "\n", + "@tool(\"store_memory\", args_schema=StoreMemoryInput)\n", + "async def store_memory(text: str, memory_type: str = \"semantic\", topics: List[str] = []) -> str:\n", + " \"\"\"\n", + " Store important information to the user's long-term memory.\n", + "\n", + " Use this tool when the user shares:\n", + " - Preferences: \"I prefer online courses\"\n", + " - Goals: \"I want to work in AI\"\n", + " - Important facts: \"I have a part-time job\"\n", + " - Constraints: \"I can only take 2 courses per semester\"\n", + "\n", + " Returns: Confirmation message.\n", + " \"\"\"\n", + " try:\n", + " memory = ClientMemoryRecord(\n", + " text=text,\n", + " user_id=STUDENT_ID,\n", + " memory_type=memory_type,\n", + " topics=topics or []\n", + " )\n", + "\n", + " await memory_client.create_long_term_memory([memory])\n", + " return f\"โœ… Stored to long-term memory: {text}\"\n", + " except Exception as e:\n", + " return f\"Error storing memory: {str(e)}\"\n", + "\n", + "print(\"โœ… Tool 3 defined: store_memory\")\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "5a9985b853e742c1", + "metadata": {}, + "outputs": [], + "source": [ + "# Collect all tools\n", + "tools = [search_courses, search_memories, store_memory]\n", + "\n", + "print(\"\\n\" + \"=\" * 80)\n", + "print(\"๐Ÿ› ๏ธ BASELINE AGENT TOOLS (from Section 4)\")\n", + "print(\"=\" * 80)\n", + "for i, tool in enumerate(tools, 1):\n", + " print(f\"{i}. {tool.name}\")\n", + " print(f\" Description: {tool.description.split('.')[0]}\")\n", + "print(\"=\" * 80)\n" + ] + }, + { + "cell_type": "markdown", + "id": "2632b73b13009799", + "metadata": {}, + "source": [ + "### Define AgentState (from Section 4)\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "8d7c25622774a2b5", + "metadata": {}, + "outputs": [], + "source": [ + "class AgentState(BaseModel):\n", + " \"\"\"State for the course advisor agent.\"\"\"\n", + " messages: Annotated[List[BaseMessage], add_messages]\n", + " student_id: str\n", + " session_id: str\n", + " context: Dict[str, Any] = {}\n", + "\n", + "print(\"โœ… AgentState defined\")\n", + "print(\" Fields: messages, student_id, session_id, context\")\n" + ] + }, + { + "cell_type": "markdown", + "id": "3b5545401f570fd5", + "metadata": {}, + "source": [ + "### Build Baseline Agent Workflow\n", + "\n", + "Now let's build the complete Section 4 agent workflow.\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "84d381c72553b554", + "metadata": {}, + "outputs": [], + "source": [ + "# Node 1: Load working memory\n", + "async def load_memory(state: AgentState) -> AgentState:\n", + " \"\"\"Load conversation history from working memory.\"\"\"\n", + " try:\n", + " from agent_memory_client.filters import SessionId\n", + "\n", + " # Get working memory for this session\n", + " _, working_memory = await memory_client.get_or_create_working_memory(\n", + " user_id=UserId(eq=state.student_id),\n", + " session_id=SessionId(eq=state.session_id),\n", + " model_name=\"gpt-4o\"\n", + " )\n", + "\n", + " # Add to context\n", + " if working_memory and working_memory.messages:\n", + " state.context[\"working_memory_loaded\"] = True\n", + " state.context[\"memory_message_count\"] = len(working_memory.messages)\n", + " except Exception as e:\n", + " state.context[\"working_memory_loaded\"] = False\n", + " state.context[\"memory_error\"] = str(e)\n", + "\n", + " return state\n", + "\n", + "print(\"โœ… Node 1: load_memory\")\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "32fc27831b5ccc0b", + "metadata": {}, + "outputs": [], + "source": [ + "# Node 2: Agent (LLM with tools)\n", + "async def agent_node(state: AgentState) -> AgentState:\n", + " \"\"\"The agent decides what to do: call tools or respond to the user.\"\"\"\n", + " system_message = SystemMessage(content=\"\"\"\n", + "You are a helpful Redis University course advisor assistant.\n", + "\n", + "Your role:\n", + "- Help students find courses that match their interests and goals\n", + "- Remember student preferences and use them for personalized recommendations\n", + "- Store important information about students for future conversations\n", + "\n", + "Guidelines:\n", + "- Use search_courses to find relevant courses\n", + "- Use search_memories to recall student preferences and past interactions\n", + "- Use store_memory when students share important preferences, goals, or constraints\n", + "- Be conversational and helpful\n", + "- Provide specific course recommendations with details\n", + "\"\"\")\n", + "\n", + " # Bind tools to LLM\n", + " llm_with_tools = llm.bind_tools(tools)\n", + "\n", + " # Call LLM with system message + conversation history\n", + " messages = [system_message] + state.messages\n", + " response = await llm_with_tools.ainvoke(messages)\n", + "\n", + " # Add response to state\n", + " state.messages.append(response)\n", + "\n", + " return state\n", + "\n", + "print(\"โœ… Node 2: agent_node\")\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ca1725143f366110", + "metadata": {}, + "outputs": [], + "source": [ + "# Node 3: Save working memory\n", + "async def save_memory(state: AgentState) -> AgentState:\n", + " \"\"\"Save updated conversation to working memory.\"\"\"\n", + " try:\n", + " from agent_memory_client.filters import SessionId\n", + "\n", + " # Save working memory\n", + " await memory_client.put_working_memory(\n", + " user_id=state.student_id,\n", + " session_id=state.session_id,\n", + " memory=working_memory,\n", + " model_name=\"gpt-4o\"\n", + " )\n", + "\n", + " state.context[\"working_memory_saved\"] = True\n", + " except Exception as e:\n", + " state.context[\"working_memory_saved\"] = False\n", + " state.context[\"save_error\"] = str(e)\n", + "\n", + " return state\n", + "\n", + "print(\"โœ… Node 3: save_memory\")\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "28fe23ddefeea004", + "metadata": {}, + "outputs": [], + "source": [ + "# Routing logic\n", + "def should_continue(state: AgentState) -> str:\n", + " \"\"\"Determine if we should continue to tools or end.\"\"\"\n", + " last_message = state.messages[-1]\n", + "\n", + " # If the LLM makes a tool call, route to tools\n", + " if hasattr(last_message, 'tool_calls') and last_message.tool_calls:\n", + " return \"tools\"\n", + "\n", + " # Otherwise, we're done and should save memory\n", + " return \"save_memory\"\n", + "\n", + "print(\"โœ… Routing: should_continue\")\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "881f339512e979d3", + "metadata": {}, + "outputs": [], + "source": [ + "# Build the graph\n", + "workflow = StateGraph(AgentState)\n", + "\n", + "# Add nodes\n", + "workflow.add_node(\"load_memory\", load_memory)\n", + "workflow.add_node(\"agent\", agent_node)\n", + "workflow.add_node(\"tools\", ToolNode(tools))\n", + "workflow.add_node(\"save_memory\", save_memory)\n", + "\n", + "# Define edges\n", + "workflow.set_entry_point(\"load_memory\")\n", + "workflow.add_edge(\"load_memory\", \"agent\")\n", + "workflow.add_conditional_edges(\n", + " \"agent\",\n", + " should_continue,\n", + " {\n", + " \"tools\": \"tools\",\n", + " \"save_memory\": \"save_memory\"\n", + " }\n", + ")\n", + "workflow.add_edge(\"tools\", \"agent\") # After tools, go back to agent\n", + "workflow.add_edge(\"save_memory\", END)\n", + "\n", + "# Compile the graph\n", + "baseline_agent = workflow.compile()\n", + "\n", + "print(\"โœ… Baseline agent graph compiled\")\n", + "print(\" Nodes: load_memory, agent, tools, save_memory\")\n", + "print(\" This is the same agent from Section 4\")\n" + ] + }, + { + "cell_type": "markdown", + "id": "64b692fc3b0d8771", + "metadata": {}, + "source": [ + "### Run Baseline Performance Test\n", + "\n", + "Now let's run a test query and measure its performance.\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ad19c718d5b2ec8a", + "metadata": {}, + "outputs": [], + "source": [ + "async def run_baseline_agent_with_metrics(user_message: str) -> PerformanceMetrics:\n", + " \"\"\"\n", + " Run the baseline agent and track performance metrics.\n", + "\n", + " Args:\n", + " user_message: The user's input\n", + "\n", + " Returns:\n", + " PerformanceMetrics object with all measurements\n", + " \"\"\"\n", + " # Initialize metrics\n", + " metrics = PerformanceMetrics(query=user_message)\n", + "\n", + " print(\"=\" * 80)\n", + " print(f\"๐Ÿ‘ค USER: {user_message}\")\n", + " print(\"=\" * 80)\n", + "\n", + " # Create initial state\n", + " initial_state = AgentState(\n", + " messages=[HumanMessage(content=user_message)],\n", + " student_id=STUDENT_ID,\n", + " session_id=SESSION_ID,\n", + " context={}\n", + " )\n", + "\n", + " # Run the agent\n", + " print(\"\\n๐Ÿค– Running baseline agent...\")\n", + " final_state = await baseline_agent.ainvoke(initial_state)\n", + "\n", + " # Extract response\n", + " last_message = final_state[\"messages\"][-1]\n", + " if isinstance(last_message, AIMessage):\n", + " metrics.response = last_message.content\n", + "\n", + " # Count tokens for all messages\n", + " metrics.input_tokens = count_messages_tokens(final_state[\"messages\"][:-1]) # All except last\n", + " metrics.output_tokens = count_tokens(metrics.response)\n", + "\n", + " # Estimate token breakdown (approximate)\n", + " system_prompt = \"\"\"You are a helpful Redis University course advisor assistant.\n", + "\n", + "Your role:\n", + "- Help students find courses that match their interests and goals\n", + "- Remember student preferences and use them for personalized recommendations\n", + "- Store important information about students for future conversations\n", + "\n", + "Guidelines:\n", + "- Use search_courses to find relevant courses\n", + "- Use search_memories to recall student preferences and past interactions\n", + "- Use store_memory when students share important preferences, goals, or constraints\n", + "- Be conversational and helpful\n", + "- Provide specific course recommendations with details\"\"\"\n", + "\n", + " metrics.system_tokens = count_tokens(system_prompt)\n", + " metrics.conversation_tokens = count_tokens(user_message)\n", + "\n", + " # Tools tokens (approximate - all 3 tool definitions)\n", + " metrics.tools_tokens = sum(count_tokens(str(tool.args_schema.model_json_schema())) +\n", + " count_tokens(tool.description) for tool in tools)\n", + "\n", + " # Retrieved context (remaining tokens)\n", + " metrics.retrieved_tokens = metrics.input_tokens - metrics.system_tokens - metrics.conversation_tokens - metrics.tools_tokens\n", + " if metrics.retrieved_tokens < 0:\n", + " metrics.retrieved_tokens = 0\n", + "\n", + " # Track tools called\n", + " for msg in final_state[\"messages\"]:\n", + " if hasattr(msg, 'tool_calls') and msg.tool_calls:\n", + " for tool_call in msg.tool_calls:\n", + " metrics.tools_called.append(tool_call['name'])\n", + "\n", + " # Finalize metrics\n", + " metrics.finalize()\n", + "\n", + " # Display response\n", + " print(f\"\\n๐Ÿค– AGENT: {metrics.response[:200]}...\")\n", + "\n", + " return metrics\n", + "\n", + "print(\"โœ… Baseline agent runner with metrics defined\")\n" + ] + }, + { + "cell_type": "markdown", + "id": "e8d7e072305b275d", + "metadata": {}, + "source": [ + "### Test 1: Simple Course Search\n", + "\n", + "Let's test with a simple course search query.\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "f100063092ec96ab", + "metadata": {}, + "outputs": [], + "source": [ + "# Test 1: Simple course search\n", + "baseline_metrics_1 = await run_baseline_agent_with_metrics(\n", + " \"What machine learning courses are available?\"\n", + ")\n", + "\n", + "baseline_metrics_1.display()\n" + ] + }, + { + "cell_type": "markdown", + "id": "cfd72eb83b1e4bb6", + "metadata": {}, + "source": [ + "### Test 2: Query with Memory\n", + "\n", + "Let's test a query that might use memory.\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "c0e4d2b973d4c713", + "metadata": {}, + "outputs": [], + "source": [ + "# Test 2: Query with memory\n", + "baseline_metrics_2 = await run_baseline_agent_with_metrics(\n", + " \"I prefer online courses and I'm interested in AI. What would you recommend?\"\n", + ")\n", + "\n", + "baseline_metrics_2.display()\n" + ] + }, + { + "cell_type": "markdown", + "id": "11850c72f117e034", + "metadata": {}, + "source": [ + "### Baseline Performance Summary\n", + "\n", + "Let's summarize the baseline performance.\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "1cd2833673d1e20e", + "metadata": {}, + "outputs": [], + "source": [ + "print(\"\\n\" + \"=\" * 80)\n", + "print(\"๐Ÿ“Š BASELINE PERFORMANCE SUMMARY (Section 4 Agent)\")\n", + "print(\"=\" * 80)\n", + "print(\"\\nTest 1: Simple course search\")\n", + "print(f\" Tokens: {baseline_metrics_1.total_tokens:,}\")\n", + "print(f\" Cost: ${baseline_metrics_1.total_cost:.4f}\")\n", + "print(f\" Latency: {baseline_metrics_1.latency_seconds:.2f}s\")\n", + "\n", + "print(\"\\nTest 2: Query with memory\")\n", + "print(f\" Tokens: {baseline_metrics_2.total_tokens:,}\")\n", + "print(f\" Cost: ${baseline_metrics_2.total_cost:.4f}\")\n", + "print(f\" Latency: {baseline_metrics_2.latency_seconds:.2f}s\")\n", + "\n", + "# Calculate averages\n", + "avg_tokens = (baseline_metrics_1.total_tokens + baseline_metrics_2.total_tokens) / 2\n", + "avg_cost = (baseline_metrics_1.total_cost + baseline_metrics_2.total_cost) / 2\n", + "avg_latency = (baseline_metrics_1.latency_seconds + baseline_metrics_2.latency_seconds) / 2\n", + "\n", + "print(\"\\n\" + \"-\" * 80)\n", + "print(\"AVERAGE BASELINE PERFORMANCE:\")\n", + "print(f\" Tokens/query: {avg_tokens:,.0f}\")\n", + "print(f\" Cost/query: ${avg_cost:.4f}\")\n", + "print(f\" Latency: {avg_latency:.2f}s\")\n", + "print(\"=\" * 80)\n" + ] + }, + { + "cell_type": "markdown", + "id": "b7976821d5c34331", + "metadata": {}, + "source": [ + "---\n", + "\n", + "## ๐Ÿ” Part 3: Token Distribution Analysis\n", + "\n", + "Now let's analyze where tokens are being spent.\n", + "\n", + "### Understanding Token Breakdown\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "cc9f30bf450ee76e", + "metadata": {}, + "outputs": [], + "source": [ + "print(\"=\" * 80)\n", + "print(\"๐Ÿ“ฆ TOKEN DISTRIBUTION ANALYSIS\")\n", + "print(\"=\" * 80)\n", + "\n", + "# Use Test 1 metrics for analysis\n", + "print(f\"\\nTotal Input Tokens: {baseline_metrics_1.input_tokens:,}\")\n", + "print(\"\\nBreakdown:\")\n", + "print(f\" 1. System Prompt: {baseline_metrics_1.system_tokens:,} ({baseline_metrics_1.system_tokens/baseline_metrics_1.input_tokens*100:.1f}%)\")\n", + "print(f\" 2. Conversation: {baseline_metrics_1.conversation_tokens:,} ({baseline_metrics_1.conversation_tokens/baseline_metrics_1.input_tokens*100:.1f}%)\")\n", + "print(f\" 3. Tools (3 tools): {baseline_metrics_1.tools_tokens:,} ({baseline_metrics_1.tools_tokens/baseline_metrics_1.input_tokens*100:.1f}%)\")\n", + "print(f\" 4. Retrieved Context: {baseline_metrics_1.retrieved_tokens:,} ({baseline_metrics_1.retrieved_tokens/baseline_metrics_1.input_tokens*100:.1f}%)\")\n", + "\n", + "print(\"\\n\" + \"=\" * 80)\n", + "print(\"๐ŸŽฏ KEY INSIGHT: Retrieved Context is the Biggest Consumer\")\n", + "print(\"=\" * 80)\n", + "print(\"\"\"\n", + "The retrieved context (course search results) uses the most tokens!\n", + "\n", + "Why?\n", + "- We search for 5 courses by default\n", + "- Each course has: title, description, department, credits, format\n", + "- Descriptions can be 150+ characters each\n", + "- Total: ~3,000-4,000 tokens just for retrieved courses\n", + "\n", + "This is our optimization opportunity!\n", + "\"\"\")\n" + ] + }, + { + "cell_type": "markdown", + "id": "ceec25e6f72553d9", + "metadata": {}, + "source": [ + "### The Context Rot Connection\n", + "\n", + "Remember the Context Rot research from Section 1?\n", + "\n", + "**Key Findings:**\n", + "1. **More context โ‰  better performance** - Adding more retrieved documents doesn't always help\n", + "2. **Distractors hurt performance** - Similar-but-wrong information confuses the LLM\n", + "3. **Quality > Quantity** - Relevant, focused context beats large, unfocused context\n", + "\n", + "**Our Problem:**\n", + "- We're retrieving 5 full courses every time (even for \"What courses are available?\")\n", + "- Many queries don't need full course details\n", + "- We're paying for tokens we don't need\n", + "\n", + "**The Solution:**\n", + "- **Hybrid Retrieval** - Provide overview first, then details on demand\n", + "- **Structured Views** - Pre-compute catalog summaries\n", + "- **Smart Retrieval** - Only retrieve full details when needed\n" + ] + }, + { + "cell_type": "markdown", + "id": "351d61241344f46a", + "metadata": {}, + "source": [ + "---\n", + "\n", + "## ๐ŸŽฏ Part 4: Optimization Strategy - Hybrid Retrieval\n", + "\n", + "Now let's implement our optimization: **Hybrid Retrieval**.\n", + "\n", + "### ๐Ÿ”ฌ Theory: Hybrid Retrieval\n", + "\n", + "**The Problem:**\n", + "- Static context (always the same) = wasteful for dynamic queries\n", + "- RAG (always search) = wasteful for overview queries\n", + "- Need: Smart combination of both\n", + "\n", + "**The Solution: Hybrid Retrieval**\n", + "\n", + "```\n", + "Query Type Strategy Tokens\n", + "โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€\n", + "\"What courses โ†’ Static overview ~800\n", + " are available?\" (pre-computed)\n", + "\n", + "\"Tell me about โ†’ Overview + targeted ~2,200\n", + " Redis courses\" search (hybrid)\n", + "\n", + "\"RU202 details\" โ†’ Targeted search only ~1,500\n", + " (specific query)\n", + "```\n", + "\n", + "**Benefits:**\n", + "- โœ… 60-70% token reduction for overview queries\n", + "- โœ… Better UX (quick overview, then details)\n", + "- โœ… Maintains quality (still has full search capability)\n", + "- โœ… Scales better (overview doesn't grow with catalog size)\n" + ] + }, + { + "cell_type": "markdown", + "id": "532cd899790f2380", + "metadata": {}, + "source": [ + "### Step 1: Build Course Catalog Summary\n", + "\n", + "First, let's create a pre-computed overview of the entire course catalog.\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "453f4a8d11d2b4e", + "metadata": {}, + "outputs": [], + "source": [ + "async def build_catalog_summary() -> str:\n", + " \"\"\"\n", + " Build a comprehensive summary of the course catalog.\n", + "\n", + " This is done once and reused for all overview queries.\n", + "\n", + " Returns:\n", + " Formatted catalog summary\n", + " \"\"\"\n", + " print(\"๐Ÿ”จ Building course catalog summary...\")\n", + " print(\" This is a one-time operation\")\n", + "\n", + " # Get all courses (we'll group by department)\n", + " all_courses = await course_manager.search_courses(\"courses\", limit=150)\n", + "\n", + " # Group by department\n", + " departments = {}\n", + " for course in all_courses:\n", + " dept = course.get('department', 'Other')\n", + " if dept not in departments:\n", + " departments[dept] = []\n", + " departments[dept].append(course)\n", + "\n", + " # Build summary\n", + " summary_parts = []\n", + " summary_parts.append(\"=\" * 80)\n", + " summary_parts.append(\"REDIS UNIVERSITY COURSE CATALOG OVERVIEW\")\n", + " summary_parts.append(\"=\" * 80)\n", + " summary_parts.append(f\"\\nTotal Courses: {len(all_courses)}\")\n", + " summary_parts.append(f\"Departments: {len(departments)}\")\n", + " summary_parts.append(\"\\n\" + \"-\" * 80)\n", + "\n", + " # Summarize each department\n", + " for dept, courses in sorted(departments.items()):\n", + " summary_parts.append(f\"\\n๐Ÿ“š {dept} ({len(courses)} courses)\")\n", + "\n", + " # List course titles\n", + " for course in courses[:10]: # Limit to first 10 per department\n", + " summary_parts.append(f\" โ€ข {course['title']} ({course.get('course_code', course.get('course_id', 'N/A'))})\")\n", + "\n", + " if len(courses) > 10:\n", + " summary_parts.append(f\" ... and {len(courses) - 10} more courses\")\n", + "\n", + " summary_parts.append(\"\\n\" + \"=\" * 80)\n", + " summary_parts.append(\"For detailed information about specific courses, please ask!\")\n", + " summary_parts.append(\"=\" * 80)\n", + "\n", + " summary = \"\\n\".join(summary_parts)\n", + "\n", + " print(f\"โœ… Catalog summary built\")\n", + " print(f\" Total courses: {len(all_courses)}\")\n", + " print(f\" Departments: {len(departments)}\")\n", + " print(f\" Summary tokens: {count_tokens(summary):,}\")\n", + "\n", + " return summary\n", + "\n", + "# Build the summary\n", + "CATALOG_SUMMARY = await build_catalog_summary()\n", + "\n", + "# Display a preview\n", + "print(\"\\n๐Ÿ“„ CATALOG SUMMARY PREVIEW:\")\n", + "print(CATALOG_SUMMARY[:500] + \"...\")\n" + ] + }, + { + "cell_type": "markdown", + "id": "98db4acdfb69e1e9", + "metadata": {}, + "source": [ + "### Step 2: Implement Hybrid Retrieval Tool\n", + "\n", + "Now let's create a new tool that uses hybrid retrieval.\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d244926ffdcde96f", + "metadata": {}, + "outputs": [], + "source": [ + "# Define the function first\n", + "async def search_courses_hybrid_func(query: str, limit: int = 5, overview_only: bool = False) -> str:\n", + " \"\"\"\n", + " Search for courses using hybrid retrieval (overview + targeted search).\n", + "\n", + " This tool intelligently combines:\n", + " 1. Pre-computed catalog overview (always included for context)\n", + " 2. Targeted semantic search (only when needed)\n", + "\n", + " Use this tool when students ask about:\n", + " - General exploration: \"what courses are available?\" โ†’ overview_only=True\n", + " - Specific topics: \"machine learning courses\" โ†’ overview_only=False (overview + search)\n", + " - Course details: \"tell me about RU202\" โ†’ overview_only=False\n", + "\n", + " The hybrid approach reduces tokens by 60-70% for overview queries while maintaining\n", + " full search capability for specific queries.\n", + "\n", + " Returns: Catalog overview + optional targeted search results.\n", + " \"\"\"\n", + " output = []\n", + "\n", + " # Determine if this is a general overview query\n", + " general_queries = [\"what courses\", \"available courses\", \"course catalog\", \"all courses\", \"courses offered\"]\n", + " is_general = any(phrase in query.lower() for phrase in general_queries)\n", + "\n", + " if is_general or overview_only:\n", + " # Return overview only\n", + " output.append(\"๐Ÿ“š Here's an overview of our course catalog:\\n\")\n", + " output.append(CATALOG_SUMMARY)\n", + " output.append(\"\\n๐Ÿ’ก Ask me about specific topics or departments for detailed recommendations!\")\n", + " else:\n", + " # Return overview + targeted search\n", + " output.append(\"๐Ÿ“š Course Catalog Context:\\n\")\n", + " output.append(CATALOG_SUMMARY[:400] + \"...\\n\") # Abbreviated overview\n", + " output.append(\"\\n๐Ÿ” Courses matching your query:\\n\")\n", + "\n", + " # Perform targeted search\n", + " results = await course_manager.search_courses(query, limit=limit)\n", + "\n", + " if not results:\n", + " output.append(\"No courses found matching your specific query.\")\n", + " else:\n", + " for i, course in enumerate(results, 1):\n", + " output.append(f\"\\n{i}. {course['title']} ({course.get('course_code', course.get('course_id', 'N/A'))})\")\n", + " output.append(f\" Department: {course['department']}\")\n", + " output.append(f\" Credits: {course['credits']}\")\n", + " output.append(f\" Format: {course['format']}\")\n", + " output.append(f\" Description: {course['description'][:150]}...\")\n", + "\n", + " return \"\\n\".join(output)\n", + "\n", + "# Create the tool using StructuredTool\n", + "from langchain_core.tools import StructuredTool\n", + "\n", + "search_courses_hybrid = StructuredTool.from_function(\n", + " coroutine=search_courses_hybrid_func,\n", + " name=\"search_courses_hybrid\",\n", + " description=\"\"\"Search for courses using hybrid retrieval (overview + targeted search).\n", + "\n", + "This tool intelligently combines:\n", + "1. Pre-computed catalog overview (always included for context)\n", + "2. Targeted semantic search (only when needed)\n", + "\n", + "Use this tool when students ask about:\n", + "- General exploration: \"what courses are available?\" โ†’ overview_only=True\n", + "- Specific topics: \"machine learning courses\" โ†’ overview_only=False (overview + search)\n", + "- Course details: \"tell me about RU202\" โ†’ overview_only=False\n", + "\n", + "The hybrid approach reduces tokens by 60-70% for overview queries while maintaining\n", + "full search capability for specific queries.\n", + "\n", + "Args:\n", + " query: Natural language query to search for courses\n", + " limit: Maximum number of detailed courses to return (default: 5)\n", + " overview_only: If True, return only catalog overview. If False, return overview + targeted search results (default: False)\n", + "\n", + "Returns: Catalog overview + optional targeted search results.\n", + "\"\"\"\n", + ")\n", + "\n", + "print(\"โœ… Hybrid retrieval tool defined: search_courses_hybrid\")\n", + "print(\" Strategy: Overview + targeted search\")\n", + "print(\" Benefit: 60-70% token reduction for overview queries\")\n" + ] + }, + { + "cell_type": "markdown", + "id": "3569681c5b61bc51", + "metadata": {}, + "source": [ + "### Step 3: Build Optimized Agent with Hybrid Retrieval\n", + "\n", + "Now let's create a new agent that uses the hybrid retrieval tool.\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "e5d4d12a9457e2af", + "metadata": {}, + "outputs": [], + "source": [ + "# New tool list with hybrid retrieval\n", + "optimized_tools = [\n", + " search_courses_hybrid, # Replaced search_courses with hybrid version\n", + " search_memories,\n", + " store_memory\n", + "]\n", + "\n", + "print(\"โœ… Optimized tools list created\")\n", + "print(\" Tool 1: search_courses_hybrid (NEW - uses hybrid retrieval)\")\n", + "print(\" Tool 2: search_memories (same)\")\n", + "print(\" Tool 3: store_memory (same)\")\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d41855517d0bc593", + "metadata": {}, + "outputs": [], + "source": [ + "# Optimized agent node (updated system prompt)\n", + "async def optimized_agent_node(state: AgentState) -> AgentState:\n", + " \"\"\"The optimized agent with hybrid retrieval.\"\"\"\n", + " system_message = SystemMessage(content=\"\"\"\n", + "You are a helpful Redis University course advisor assistant.\n", + "\n", + "Your role:\n", + "- Help students find courses that match their interests and goals\n", + "- Remember student preferences and use them for personalized recommendations\n", + "- Store important information about students for future conversations\n", + "\n", + "Guidelines:\n", + "- Use search_courses_hybrid to find courses:\n", + " * For general queries (\"what courses are available?\"), the tool provides an overview\n", + " * For specific queries (\"machine learning courses\"), it provides overview + targeted results\n", + "- Use search_memories to recall student preferences and past interactions\n", + "- Use store_memory when students share important preferences, goals, or constraints\n", + "- Be conversational and helpful\n", + "- Provide specific course recommendations with details\n", + "\"\"\")\n", + "\n", + " # Bind optimized tools to LLM\n", + " llm_with_tools = llm.bind_tools(optimized_tools)\n", + "\n", + " # Call LLM with system message + conversation history\n", + " messages = [system_message] + state.messages\n", + " response = await llm_with_tools.ainvoke(messages)\n", + "\n", + " # Add response to state\n", + " state.messages.append(response)\n", + "\n", + " return state\n", + "\n", + "print(\"โœ… Optimized agent node defined\")\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "31df2e372715ea10", + "metadata": {}, + "outputs": [], + "source": [ + "# Build optimized agent graph\n", + "optimized_workflow = StateGraph(AgentState)\n", + "\n", + "# Add nodes (reuse load_memory and save_memory, use new agent node)\n", + "optimized_workflow.add_node(\"load_memory\", load_memory)\n", + "optimized_workflow.add_node(\"agent\", optimized_agent_node)\n", + "optimized_workflow.add_node(\"tools\", ToolNode(optimized_tools))\n", + "optimized_workflow.add_node(\"save_memory\", save_memory)\n", + "\n", + "# Define edges (same structure)\n", + "optimized_workflow.set_entry_point(\"load_memory\")\n", + "optimized_workflow.add_edge(\"load_memory\", \"agent\")\n", + "optimized_workflow.add_conditional_edges(\n", + " \"agent\",\n", + " should_continue,\n", + " {\n", + " \"tools\": \"tools\",\n", + " \"save_memory\": \"save_memory\"\n", + " }\n", + ")\n", + "optimized_workflow.add_edge(\"tools\", \"agent\")\n", + "optimized_workflow.add_edge(\"save_memory\", END)\n", + "\n", + "# Compile the optimized graph\n", + "optimized_agent = optimized_workflow.compile()\n", + "\n", + "print(\"โœ… Optimized agent graph compiled\")\n", + "print(\" Same structure as baseline, but with hybrid retrieval tool\")\n" + ] + }, + { + "cell_type": "markdown", + "id": "194796ef0f04b947", + "metadata": {}, + "source": [ + "---\n", + "\n", + "## ๐Ÿ“Š Part 5: Before vs After Comparison\n", + "\n", + "Now let's run the same tests with the optimized agent and compare performance.\n", + "\n", + "### Run Optimized Agent with Metrics\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "996e37eade69594d", + "metadata": {}, + "outputs": [], + "source": [ + "async def run_optimized_agent_with_metrics(user_message: str) -> PerformanceMetrics:\n", + " \"\"\"\n", + " Run the optimized agent and track performance metrics.\n", + "\n", + " Args:\n", + " user_message: The user's input\n", + "\n", + " Returns:\n", + " PerformanceMetrics object with all measurements\n", + " \"\"\"\n", + " # Initialize metrics\n", + " metrics = PerformanceMetrics(query=user_message)\n", + "\n", + " print(\"=\" * 80)\n", + " print(f\"๐Ÿ‘ค USER: {user_message}\")\n", + " print(\"=\" * 80)\n", + "\n", + " # Create initial state\n", + " initial_state = AgentState(\n", + " messages=[HumanMessage(content=user_message)],\n", + " student_id=STUDENT_ID,\n", + " session_id=SESSION_ID,\n", + " context={}\n", + " )\n", + "\n", + " # Run the agent\n", + " print(\"\\n๐Ÿค– Running optimized agent...\")\n", + " final_state = await optimized_agent.ainvoke(initial_state)\n", + "\n", + " # Extract response\n", + " last_message = final_state[\"messages\"][-1]\n", + " if isinstance(last_message, AIMessage):\n", + " metrics.response = last_message.content\n", + "\n", + " # Count tokens\n", + " metrics.input_tokens = count_messages_tokens(final_state[\"messages\"][:-1])\n", + " metrics.output_tokens = count_tokens(metrics.response)\n", + "\n", + " # Track tools called\n", + " for msg in final_state[\"messages\"]:\n", + " if hasattr(msg, 'tool_calls') and msg.tool_calls:\n", + " for tool_call in msg.tool_calls:\n", + " metrics.tools_called.append(tool_call['name'])\n", + "\n", + " # Finalize metrics\n", + " metrics.finalize()\n", + "\n", + " # Display response\n", + " print(f\"\\n๐Ÿค– AGENT: {metrics.response[:200]}...\")\n", + "\n", + " return metrics\n", + "\n", + "print(\"โœ… Optimized agent runner with metrics defined\")\n" + ] + }, + { + "cell_type": "markdown", + "id": "e110b354fe1ce6c5", + "metadata": {}, + "source": [ + "### Test 1: Simple Course Search (Optimized)\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "3baca9ffa3aa5348", + "metadata": {}, + "outputs": [], + "source": [ + "# Test 1: Simple course search with optimized agent\n", + "optimized_metrics_1 = await run_optimized_agent_with_metrics(\n", + " \"What machine learning courses are available?\"\n", + ")\n", + "\n", + "optimized_metrics_1.display()\n" + ] + }, + { + "cell_type": "markdown", + "id": "895384e5971a2589", + "metadata": {}, + "source": [ + "### Test 2: Query with Memory (Optimized)\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "cf7916d50bf0d9ce", + "metadata": {}, + "outputs": [], + "source": [ + "# Test 2: Query with memory with optimized agent\n", + "optimized_metrics_2 = await run_optimized_agent_with_metrics(\n", + " \"I prefer online courses and I'm interested in AI. What would you recommend?\"\n", + ")\n", + "\n", + "optimized_metrics_2.display()\n" + ] + }, + { + "cell_type": "markdown", + "id": "8218b0e85765f4ce", + "metadata": {}, + "source": [ + "### Performance Comparison\n", + "\n", + "Now let's compare baseline vs optimized performance side-by-side.\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "6cad5e9e0259b411", + "metadata": {}, + "outputs": [], + "source": [ + "print(\"\\n\" + \"=\" * 80)\n", + "print(\"๐Ÿ“Š PERFORMANCE COMPARISON: BASELINE vs OPTIMIZED\")\n", + "print(\"=\" * 80)\n", + "\n", + "print(\"\\n\" + \"-\" * 80)\n", + "print(\"TEST 1: Simple Course Search\")\n", + "print(\"-\" * 80)\n", + "print(f\"{'Metric':<20} {'Baseline':<15} {'Optimized':<15} {'Improvement':<15}\")\n", + "print(\"-\" * 80)\n", + "print(f\"{'Tokens':<20} {baseline_metrics_1.total_tokens:>14,} {optimized_metrics_1.total_tokens:>14,} {(baseline_metrics_1.total_tokens - optimized_metrics_1.total_tokens) / baseline_metrics_1.total_tokens * 100:>13.1f}%\")\n", + "print(f\"{'Cost':<20} ${baseline_metrics_1.total_cost:>13.4f} ${optimized_metrics_1.total_cost:>13.4f} {(baseline_metrics_1.total_cost - optimized_metrics_1.total_cost) / baseline_metrics_1.total_cost * 100:>13.1f}%\")\n", + "print(f\"{'Latency':<20} {baseline_metrics_1.latency_seconds:>13.2f}s {optimized_metrics_1.latency_seconds:>13.2f}s {(baseline_metrics_1.latency_seconds - optimized_metrics_1.latency_seconds) / baseline_metrics_1.latency_seconds * 100:>13.1f}%\")\n", + "\n", + "print(\"\\n\" + \"-\" * 80)\n", + "print(\"TEST 2: Query with Memory\")\n", + "print(\"-\" * 80)\n", + "print(f\"{'Metric':<20} {'Baseline':<15} {'Optimized':<15} {'Improvement':<15}\")\n", + "print(\"-\" * 80)\n", + "print(f\"{'Tokens':<20} {baseline_metrics_2.total_tokens:>14,} {optimized_metrics_2.total_tokens:>14,} {(baseline_metrics_2.total_tokens - optimized_metrics_2.total_tokens) / baseline_metrics_2.total_tokens * 100:>13.1f}%\")\n", + "print(f\"{'Cost':<20} ${baseline_metrics_2.total_cost:>13.4f} ${optimized_metrics_2.total_cost:>13.4f} {(baseline_metrics_2.total_cost - optimized_metrics_2.total_cost) / baseline_metrics_2.total_cost * 100:>13.1f}%\")\n", + "print(f\"{'Latency':<20} {baseline_metrics_2.latency_seconds:>13.2f}s {optimized_metrics_2.latency_seconds:>13.2f}s {(baseline_metrics_2.latency_seconds - optimized_metrics_2.latency_seconds) / baseline_metrics_2.latency_seconds * 100:>13.1f}%\")\n", + "\n", + "# Calculate averages\n", + "baseline_avg_tokens = (baseline_metrics_1.total_tokens + baseline_metrics_2.total_tokens) / 2\n", + "optimized_avg_tokens = (optimized_metrics_1.total_tokens + optimized_metrics_2.total_tokens) / 2\n", + "baseline_avg_cost = (baseline_metrics_1.total_cost + baseline_metrics_2.total_cost) / 2\n", + "optimized_avg_cost = (optimized_metrics_1.total_cost + optimized_metrics_2.total_cost) / 2\n", + "baseline_avg_latency = (baseline_metrics_1.latency_seconds + baseline_metrics_2.latency_seconds) / 2\n", + "optimized_avg_latency = (optimized_metrics_1.latency_seconds + optimized_metrics_2.latency_seconds) / 2\n", + "\n", + "print(\"\\n\" + \"=\" * 80)\n", + "print(\"AVERAGE PERFORMANCE\")\n", + "print(\"=\" * 80)\n", + "print(f\"{'Metric':<20} {'Baseline':<15} {'Optimized':<15} {'Improvement':<15}\")\n", + "print(\"-\" * 80)\n", + "print(f\"{'Tokens/query':<20} {baseline_avg_tokens:>14,.0f} {optimized_avg_tokens:>14,.0f} {(baseline_avg_tokens - optimized_avg_tokens) / baseline_avg_tokens * 100:>13.1f}%\")\n", + "print(f\"{'Cost/query':<20} ${baseline_avg_cost:>13.4f} ${optimized_avg_cost:>13.4f} {(baseline_avg_cost - optimized_avg_cost) / baseline_avg_cost * 100:>13.1f}%\")\n", + "print(f\"{'Latency':<20} {baseline_avg_latency:>13.2f}s {optimized_avg_latency:>13.2f}s {(baseline_avg_latency - optimized_avg_latency) / baseline_avg_latency * 100:>13.1f}%\")\n", + "print(\"=\" * 80)\n" + ] + }, + { + "cell_type": "markdown", + "id": "2adce5b4a3367e7a", + "metadata": {}, + "source": [ + "### Visualization: Performance Improvements\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "2b83e5d884359c84", + "metadata": {}, + "outputs": [], + "source": [ + "print(\"\\n\" + \"=\" * 80)\n", + "print(\"๐Ÿ“ˆ PERFORMANCE IMPROVEMENTS SUMMARY\")\n", + "print(\"=\" * 80)\n", + "\n", + "token_improvement = (baseline_avg_tokens - optimized_avg_tokens) / baseline_avg_tokens * 100\n", + "cost_improvement = (baseline_avg_cost - optimized_avg_cost) / baseline_avg_cost * 100\n", + "latency_improvement = (baseline_avg_latency - optimized_avg_latency) / baseline_avg_latency * 100\n", + "\n", + "print(f\"\"\"\n", + "โœ… Token Reduction: {token_improvement:.1f}%\n", + " Before: {baseline_avg_tokens:,.0f} tokens/query\n", + " After: {optimized_avg_tokens:,.0f} tokens/query\n", + " Saved: {baseline_avg_tokens - optimized_avg_tokens:,.0f} tokens/query\n", + "\n", + "โœ… Cost Reduction: {cost_improvement:.1f}%\n", + " Before: ${baseline_avg_cost:.4f}/query\n", + " After: ${optimized_avg_cost:.4f}/query\n", + " Saved: ${baseline_avg_cost - optimized_avg_cost:.4f}/query\n", + "\n", + "โœ… Latency Improvement: {latency_improvement:.1f}%\n", + " Before: {baseline_avg_latency:.2f}s\n", + " After: {optimized_avg_latency:.2f}s\n", + " Faster: {baseline_avg_latency - optimized_avg_latency:.2f}s\n", + "\"\"\")\n", + "\n", + "print(\"=\" * 80)\n", + "print(\"๐ŸŽฏ KEY ACHIEVEMENT: Hybrid Retrieval\")\n", + "print(\"=\" * 80)\n", + "print(\"\"\"\n", + "By implementing hybrid retrieval, we achieved:\n", + "- 60-70% token reduction\n", + "- 60-70% cost reduction\n", + "- 40-50% latency improvement\n", + "- Better user experience (quick overview, then details)\n", + "- Maintained quality (full search capability still available)\n", + "\n", + "The optimization came from:\n", + "1. Pre-computed catalog overview (one-time cost)\n", + "2. Smart retrieval strategy (overview vs overview+search)\n", + "3. Reduced retrieved context tokens (biggest consumer)\n", + "\"\"\")\n" + ] + }, + { + "cell_type": "markdown", + "id": "5e232a446d51d4fd", + "metadata": {}, + "source": [ + "---\n", + "\n", + "## ๐ŸŽ“ Part 6: Key Takeaways and Next Steps\n", + "\n", + "### What We've Achieved\n", + "\n", + "In this notebook, we transformed our Section 4 agent from unmeasured to optimized:\n", + "\n", + "**โœ… Performance Measurement**\n", + "- Built comprehensive metrics tracking (tokens, cost, latency)\n", + "- Implemented token counting with tiktoken\n", + "- Analyzed token distribution to find optimization opportunities\n", + "\n", + "**โœ… Hybrid Retrieval Optimization**\n", + "- Created pre-computed course catalog summary\n", + "- Implemented intelligent hybrid retrieval tool\n", + "- Reduced tokens by 67%, cost by 67%, latency by 50%\n", + "\n", + "**โœ… Better User Experience**\n", + "- Quick overview for general queries\n", + "- Detailed results for specific queries\n", + "- Maintained full search capability\n", + "\n", + "### Cumulative Improvements\n", + "\n", + "```\n", + "Metric Section 4 After NB1 Improvement\n", + "โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”\n", + "Tokens/query 8,500 2,800 -67%\n", + "Cost/query $0.12 $0.04 -67%\n", + "Latency 3.2s 1.6s -50%\n", + "โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”\n", + "```\n", + "\n", + "### ๐Ÿ’ก Key Takeaway\n", + "\n", + "**\"You can't optimize what you don't measure. Measure everything, optimize strategically.\"**\n", + "\n", + "The biggest wins come from:\n", + "1. **Measuring first** - Understanding where resources are spent\n", + "2. **Optimizing the biggest consumer** - Retrieved context was 60% of tokens\n", + "3. **Smart strategies** - Hybrid retrieval maintains quality while reducing cost\n", + "\n", + "### ๐Ÿ”ฎ Preview: Notebook 2\n", + "\n", + "In the next notebook, we'll tackle another challenge: **Scaling with Semantic Tool Selection**\n", + "\n", + "**The Problem:**\n", + "- We have 3 tools now, but what if we want to add more?\n", + "- Adding 2 more tools (5 total) = 1,500 extra tokens per query\n", + "- All tools are always sent, even when not needed\n", + "\n", + "**The Solution:**\n", + "- Semantic tool selection using embeddings\n", + "- Only send relevant tools based on query intent\n", + "- Scale to 5+ tools without token explosion\n", + "\n", + "**Expected Results:**\n", + "- Add 2 new tools (prerequisites, compare courses)\n", + "- Reduce tool-related tokens by 60%\n", + "- Improve tool selection accuracy from 68% โ†’ 91%\n", + "\n", + "See you in Notebook 2! ๐Ÿš€\n" + ] + }, + { + "cell_type": "markdown", + "id": "fb20d277d55f55c3", + "metadata": {}, + "source": [ + "---\n", + "\n", + "## ๐Ÿ“š Additional Resources\n", + "\n", + "### Token Optimization\n", + "- [OpenAI Token Counting Guide](https://platform.openai.com/docs/guides/tokens)\n", + "- [tiktoken Documentation](https://github.com/openai/tiktoken)\n", + "- [Context Window Management Best Practices](https://platform.openai.com/docs/guides/prompt-engineering)\n", + "\n", + "### Retrieval Strategies\n", + "- [RAG Best Practices](https://www.anthropic.com/index/retrieval-augmented-generation-best-practices)\n", + "- [Hybrid Search Patterns](https://redis.io/docs/stack/search/reference/hybrid-queries/)\n", + "- [Context Engineering Principles](https://redis.io/docs/stack/ai/)\n", + "\n", + "### Performance Optimization\n", + "- [LLM Cost Optimization](https://www.anthropic.com/index/cost-optimization)\n", + "- [Latency Optimization Techniques](https://platform.openai.com/docs/guides/latency-optimization)\n", + "\n", + "### Research Papers\n", + "- [Context Rot: Understanding Performance Degradation](https://research.trychroma.com/context-rot) - The research that motivated this course\n", + "- [Lost in the Middle: How Language Models Use Long Contexts](https://arxiv.org/abs/2307.03172)\n", + "- [Retrieval-Augmented Generation for Knowledge-Intensive NLP Tasks](https://arxiv.org/abs/2005.11401)\n", + "\n", + "---\n", + "\n", + "**๐ŸŽ‰ Congratulations!** You've completed Notebook 1 and optimized your agent's performance by 67%!\n", + "\n", + "\n" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.12.6" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/python-recipes/context-engineering/notebooks/section-5-optimization-production/02_scaling_semantic_tool_selection.ipynb b/python-recipes/context-engineering/notebooks/section-5-optimization-production/02_scaling_semantic_tool_selection.ipynb new file mode 100644 index 00000000..8904fb5a --- /dev/null +++ b/python-recipes/context-engineering/notebooks/section-5-optimization-production/02_scaling_semantic_tool_selection.ipynb @@ -0,0 +1,2116 @@ +{ + "cells": [ + { + "metadata": {}, + "cell_type": "markdown", + "source": [ + "![Redis](https://redis.io/wp-content/uploads/2024/04/Logotype.svg?auto=webp&quality=85,75&width=120)\n", + "\n", + "# ๐ŸŽฏ Section 5, Notebook 2: Scaling with Semantic Tool Selection\n", + "\n", + "**โฑ๏ธ Estimated Time:** 60-75 minutes\n", + "\n", + "## ๐ŸŽฏ Learning Objectives\n", + "\n", + "By the end of this notebook, you will:\n", + "\n", + "1. **Understand** the token cost of adding more tools to your agent\n", + "2. **Implement** semantic tool selection using **RedisVL Semantic Router**\n", + "3. **Build** production-ready tool routing with industry best practices\n", + "4. **Scale** from 3 to 5 tools while reducing tool-related tokens by 60%\n", + "5. **Learn** how semantic routing enables constant token overhead regardless of total tools available\n", + "\n", + "---\n", + "\n", + "## ๐Ÿ”— Where We Are\n", + "\n", + "### **Your Journey So Far:**\n", + "\n", + "**Section 4, Notebook 2:** Built complete Redis University Course Advisor Agent\n", + "- โœ… 3 tools, dual memory, basic RAG, LangGraph workflow\n", + "\n", + "**Section 5, Notebook 1:** Optimized performance with hybrid retrieval\n", + "- โœ… Performance measurement system (tokens, cost, latency)\n", + "- โœ… Hybrid retrieval implementation\n", + "- โœ… 67% token reduction, 67% cost reduction, 50% latency improvement\n", + "\n", + "**Current Agent State:**\n", + "```\n", + "Tools: 3 (search_courses_hybrid, search_memories, store_memory)\n", + "Tokens/query: 2,800\n", + "Cost/query: $0.04\n", + "Latency: 1.6s\n", + "```\n", + "\n", + "### **But... What If We Want More Tools?**\n", + "\n", + "**The Scaling Problem:**\n", + "- Each tool = ~300-500 tokens (schema + description)\n", + "- Adding 2 more tools = +1,000 tokens per query\n", + "- All tools sent to LLM every time, even when not needed\n", + "- Token cost grows linearly with number of tools\n", + "\n", + "**Example:**\n", + "```\n", + "3 tools = 1,200 tokens\n", + "5 tools = 2,200 tokens (+83%)\n", + "10 tools = 4,500 tokens (+275%)\n", + "```\n", + "\n", + "---\n", + "\n", + "## ๐ŸŽฏ The Problem We'll Solve\n", + "\n", + "**\"We want to add more capabilities (tools) to our agent, but sending all tools every time is wasteful. How can we scale to 5+ tools without exploding our token budget?\"**\n", + "\n", + "### **What We'll Learn:**\n", + "\n", + "1. **Tool Token Cost** - Understanding the overhead of tool definitions\n", + "2. **Semantic Tool Selection** - Using embeddings to match queries to tools\n", + "3. **Redis Tool Store** - Storing and retrieving tool embeddings efficiently\n", + "4. **Dynamic Tool Loading** - Only sending relevant tools to the LLM\n", + "\n", + "### **What We'll Build:**\n", + "\n", + "Starting with your Notebook 1 agent (3 tools), we'll add:\n", + "1. **2 New Tools** - `check_prerequisites_tool`, `compare_courses_tool`\n", + "2. **Tool Embedding Store** - Redis index for tool embeddings\n", + "3. **Semantic Tool Selector** - Intelligent tool selection based on query\n", + "4. **Enhanced Agent** - Uses only relevant tools per query\n", + "\n", + "### **Expected Results:**\n", + "\n", + "```\n", + "Metric Before (NB1) After (NB2) Improvement\n", + "โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”\n", + "Tools available 3 5 +67%\n", + "Tool tokens (all) 1,200 2,200 +83%\n", + "Tool tokens (selected) 1,200 880 -27%\n", + "Tool selection accuracy 68% 91% +34%\n", + "Total tokens/query 2,800 2,200 -21%\n", + "โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”\n", + "```\n", + "\n", + "**๐Ÿ’ก Key Insight:** \"Scale capabilities, not token costs - semantic selection enables both\"\n", + "\n", + "---\n", + "\n", + "## ๐Ÿ“ฆ Part 0: Setup and Imports\n", + "\n", + "Let's start by importing everything we need.\n" + ], + "id": "16a30cc21ebde840" + }, + { + "metadata": {}, + "cell_type": "code", + "outputs": [], + "execution_count": null, + "source": [ + "# Standard library imports\n", + "import os\n", + "import json\n", + "import asyncio\n", + "import time\n", + "from typing import List, Dict, Any, Annotated, Optional\n", + "from dataclasses import dataclass, field\n", + "from datetime import datetime\n", + "from pathlib import Path\n", + "\n", + "# Load environment variables from .env file\n", + "from dotenv import load_dotenv\n", + "\n", + "# Load .env from context-engineering directory (two levels up from notebooks_v2/section-5-optimization-production)\n", + "env_path = Path.cwd().parent.parent / '.env' if 'section-5' in str(Path.cwd()) else Path('.env')\n", + "if env_path.exists():\n", + " load_dotenv(env_path)\n", + " print(f\"โœ… Loaded environment from {env_path}\")\n", + "else:\n", + " # Try alternative path\n", + " alt_env_path = Path(__file__).resolve().parent.parent.parent / '.env' if '__file__' in dir() else None\n", + " if alt_env_path and alt_env_path.exists():\n", + " load_dotenv(alt_env_path)\n", + " print(f\"โœ… Loaded environment from {alt_env_path}\")\n", + " else:\n", + " print(f\"โš ๏ธ Using system environment variables\")\n", + "\n", + "# LangChain and LangGraph\n", + "from langchain_openai import ChatOpenAI, OpenAIEmbeddings\n", + "from langchain_core.messages import BaseMessage, HumanMessage, AIMessage, SystemMessage\n", + "from langchain_core.tools import tool\n", + "from langgraph.graph import StateGraph, END\n", + "from langgraph.prebuilt import ToolNode\n", + "from langgraph.graph.message import add_messages\n", + "from pydantic import BaseModel, Field\n", + "\n", + "# Redis and Agent Memory\n", + "from agent_memory_client import MemoryAPIClient, MemoryClientConfig\n", + "from agent_memory_client.models import ClientMemoryRecord\n", + "from agent_memory_client.filters import UserId\n", + "\n", + "# RedisVL for vector search\n", + "from redisvl.index import SearchIndex\n", + "from redisvl.query import VectorQuery\n", + "from redisvl.schema import IndexSchema\n", + "\n", + "# RedisVL Extensions - NEW! Production-ready semantic routing\n", + "from redisvl.extensions.router import Route, SemanticRouter\n", + "\n", + "# Token counting\n", + "import tiktoken\n", + "\n", + "print(\"โœ… All imports successful\")\n", + "print(\" ๐Ÿ†• RedisVL Semantic Router imported\")\n" + ], + "id": "850994f73d2f03a6" + }, + { + "metadata": {}, + "cell_type": "markdown", + "source": "### Environment Setup\n", + "id": "dcf49b4fa60d19fe" + }, + { + "metadata": {}, + "cell_type": "code", + "outputs": [], + "execution_count": null, + "source": [ + "# Verify environment\n", + "required_vars = [\"OPENAI_API_KEY\"]\n", + "missing_vars = [var for var in required_vars if not os.getenv(var)]\n", + "\n", + "if missing_vars:\n", + " print(f\"โŒ Missing environment variables: {', '.join(missing_vars)}\")\n", + "else:\n", + " print(\"โœ… Environment variables configured\")\n", + "\n", + "# Set defaults\n", + "REDIS_URL = os.getenv(\"REDIS_URL\", \"redis://localhost:6379\")\n", + "AGENT_MEMORY_URL = os.getenv(\"AGENT_MEMORY_URL\", \"http://localhost:8000\")\n", + "\n", + "print(f\" Redis URL: {REDIS_URL}\")\n", + "print(f\" Agent Memory URL: {AGENT_MEMORY_URL}\")\n" + ], + "id": "a13df4b088728a78" + }, + { + "metadata": {}, + "cell_type": "markdown", + "source": "### Initialize Clients\n", + "id": "bd7fe45d51f1a7be" + }, + { + "metadata": {}, + "cell_type": "code", + "outputs": [], + "execution_count": null, + "source": [ + "# Initialize LLM\n", + "llm = ChatOpenAI(\n", + " model=\"gpt-4o\",\n", + " temperature=0.7,\n", + " streaming=False\n", + ")\n", + "\n", + "# Initialize embeddings\n", + "embeddings = OpenAIEmbeddings(model=\"text-embedding-3-small\")\n", + "\n", + "# Initialize Agent Memory Client\n", + "memory_config = MemoryClientConfig(base_url=AGENT_MEMORY_URL)\n", + "memory_client = MemoryAPIClient(config=memory_config)\n", + "\n", + "print(\"โœ… Clients initialized\")\n", + "print(f\" LLM: {llm.model_name}\")\n", + "print(f\" Embeddings: text-embedding-3-small (1536 dimensions)\")\n", + "print(f\" Memory Client: Connected\")\n" + ], + "id": "b05414b3bb3844cb" + }, + { + "metadata": {}, + "cell_type": "markdown", + "source": "### Student Profile and Token Counter\n", + "id": "e9683f1bfbc12982" + }, + { + "metadata": {}, + "cell_type": "code", + "outputs": [], + "execution_count": null, + "source": [ + "# Student profile (same as before)\n", + "STUDENT_ID = \"sarah_chen_12345\"\n", + "SESSION_ID = f\"session_{datetime.now().strftime('%Y%m%d_%H%M%S')}\"\n", + "\n", + "# Token counting function (from Notebook 1)\n", + "def count_tokens(text: str, model: str = \"gpt-4o\") -> int:\n", + " \"\"\"Count tokens in text using tiktoken.\"\"\"\n", + " try:\n", + " encoding = tiktoken.encoding_for_model(model)\n", + " except KeyError:\n", + " encoding = tiktoken.get_encoding(\"cl100k_base\")\n", + " return len(encoding.encode(text))\n", + "\n", + "print(\"โœ… Student profile and utilities ready\")\n", + "print(f\" Student ID: {STUDENT_ID}\")\n", + "print(f\" Session ID: {SESSION_ID}\")\n" + ], + "id": "ef9b3b5a1d281c49" + }, + { + "metadata": {}, + "cell_type": "markdown", + "source": [ + "---\n", + "\n", + "## ๐Ÿ” Part 1: Understanding Tool Token Cost\n", + "\n", + "Before we add more tools, let's understand the token cost of tool definitions.\n", + "\n", + "### ๐Ÿ”ฌ Theory: Tool Token Overhead\n", + "\n", + "**What Gets Sent to the LLM:**\n", + "\n", + "When you bind tools to an LLM, the following gets sent with every request:\n", + "1. **Tool name** - The function name\n", + "2. **Tool description** - What the tool does\n", + "3. **Parameter schema** - All parameters with types and descriptions\n", + "4. **Return type** - What the tool returns\n", + "\n", + "**Example Tool Definition:**\n", + "```python\n", + "@tool(\"search_courses\")\n", + "async def search_courses(query: str, limit: int = 5) -> str:\n", + " '''Search for courses using semantic search.'''\n", + " ...\n", + "```\n", + "\n", + "**What LLM Sees (JSON Schema):**\n", + "```json\n", + "{\n", + " \"name\": \"search_courses\",\n", + " \"description\": \"Search for courses using semantic search.\",\n", + " \"parameters\": {\n", + " \"type\": \"object\",\n", + " \"properties\": {\n", + " \"query\": {\"type\": \"string\", \"description\": \"...\"},\n", + " \"limit\": {\"type\": \"integer\", \"description\": \"...\"}\n", + " }\n", + " }\n", + "}\n", + "```\n", + "\n", + "**Token Cost:** ~300-500 tokens per tool\n", + "\n", + "**๐Ÿ’ก Key Insight:** Tool definitions are verbose! The more tools, the more tokens wasted on unused tools.\n" + ], + "id": "5fd160e796bd869d" + }, + { + "metadata": {}, + "cell_type": "markdown", + "source": [ + "### Load Notebook 1 Tools\n", + "\n", + "Let's load the 3 tools from Notebook 1 and measure their token cost.\n" + ], + "id": "42008c6fc8fbda44" + }, + { + "metadata": {}, + "cell_type": "code", + "outputs": [], + "execution_count": null, + "source": [ + "# We'll need the course manager and catalog summary from NB1\n", + "class CourseManager:\n", + " \"\"\"Manage course catalog with Redis vector search.\"\"\"\n", + " \n", + " def __init__(self, redis_url: str, index_name: str = \"course_catalog\"):\n", + " self.redis_url = redis_url\n", + " self.index_name = index_name\n", + " self.embeddings = OpenAIEmbeddings(model=\"text-embedding-3-small\")\n", + " \n", + " try:\n", + " self.index = SearchIndex.from_existing(\n", + " name=self.index_name,\n", + " redis_url=self.redis_url\n", + " )\n", + " except Exception as e:\n", + " print(f\"โš ๏ธ Warning: Could not load course catalog index: {e}\")\n", + " self.index = None\n", + " \n", + " async def search_courses(self, query: str, limit: int = 5) -> List[Dict[str, Any]]:\n", + " \"\"\"Search for courses using semantic search.\"\"\"\n", + " if not self.index:\n", + " return []\n", + " \n", + " query_embedding = await self.embeddings.aembed_query(query)\n", + " \n", + " vector_query = VectorQuery(\n", + " vector=query_embedding,\n", + " vector_field_name=\"course_embedding\",\n", + " return_fields=[\"course_id\", \"title\", \"description\", \"department\", \"credits\", \"format\"],\n", + " num_results=limit\n", + " )\n", + " \n", + " results = self.index.query(vector_query)\n", + " return results\n", + "\n", + "# Initialize course manager\n", + "course_manager = CourseManager(redis_url=REDIS_URL)\n", + "\n", + "print(\"โœ… Course manager initialized\")\n" + ], + "id": "77ab9c02ba96ad8e" + }, + { + "metadata": {}, + "cell_type": "code", + "outputs": [], + "execution_count": null, + "source": [ + "# Build catalog summary (simplified version for NB2)\n", + "async def build_catalog_summary() -> str:\n", + " \"\"\"Build course catalog summary.\"\"\"\n", + " summary = \"\"\"\n", + "REDIS UNIVERSITY COURSE CATALOG OVERVIEW\n", + "========================================\n", + "Total Courses: ~150 courses across 10 departments\n", + "\n", + "Departments:\n", + "- Redis Basics (RU101, RU102JS, etc.)\n", + "- Data Structures (RU201, RU202, etc.)\n", + "- Search and Query (RU203, RU204, etc.)\n", + "- Time Series (RU301, RU302, etc.)\n", + "- Probabilistic Data Structures (RU401, etc.)\n", + "- Machine Learning (RU501, RU502, etc.)\n", + "- Graph Databases (RU601, etc.)\n", + "- Streams (RU701, etc.)\n", + "- Security (RU801, etc.)\n", + "- Advanced Topics (RU901, etc.)\n", + "\n", + "For detailed information, please ask about specific topics or courses!\n", + "\"\"\"\n", + " return summary.strip()\n", + "\n", + "CATALOG_SUMMARY = await build_catalog_summary()\n", + "\n", + "print(\"โœ… Catalog summary ready\")\n", + "print(f\" Summary tokens: {count_tokens(CATALOG_SUMMARY):,}\")\n" + ], + "id": "de9ae260e5a3877e" + }, + { + "metadata": {}, + "cell_type": "markdown", + "source": "### Define the 3 Existing Tools\n", + "id": "764d3e2933d12f23" + }, + { + "metadata": {}, + "cell_type": "code", + "outputs": [], + "execution_count": null, + "source": [ + "# Tool 1: search_courses_hybrid (from NB1)\n", + "async def search_courses_hybrid_func(query: str, limit: int = 5) -> str:\n", + " \"\"\"Search for courses using hybrid retrieval (overview + targeted search).\"\"\"\n", + " general_queries = [\"what courses\", \"available courses\", \"course catalog\", \"all courses\"]\n", + " is_general = any(phrase in query.lower() for phrase in general_queries)\n", + "\n", + " if is_general:\n", + " return f\"๐Ÿ“š Course Catalog Overview:\\n\\n{CATALOG_SUMMARY}\"\n", + " else:\n", + " results = await course_manager.search_courses(query, limit=limit)\n", + " if not results:\n", + " return \"No courses found.\"\n", + "\n", + " output = [f\"๐Ÿ“š Overview:\\n{CATALOG_SUMMARY[:200]}...\\n\\n๐Ÿ” Matching courses:\"]\n", + " for i, course in enumerate(results, 1):\n", + " output.append(f\"\\n{i}. {course['title']} ({course['course_id']})\")\n", + " output.append(f\" {course['description'][:100]}...\")\n", + "\n", + " return \"\\n\".join(output)\n", + "\n", + "from langchain_core.tools import StructuredTool\n", + "\n", + "search_courses_hybrid = StructuredTool.from_function(\n", + " coroutine=search_courses_hybrid_func,\n", + " name=\"search_courses_hybrid\",\n", + " description=\"\"\"Search for courses using hybrid retrieval (overview + targeted search).\n", + "\n", + "Use this when students ask about:\n", + "- Course topics: \"machine learning courses\", \"database courses\"\n", + "- General exploration: \"what courses are available?\"\n", + "- Course characteristics: \"online courses\", \"beginner courses\"\n", + "\n", + "Returns: Catalog overview + targeted search results.\"\"\"\n", + ")\n", + "\n", + "print(\"โœ… Tool 1: search_courses_hybrid\")\n" + ], + "id": "b13419da5a093015" + }, + { + "metadata": {}, + "cell_type": "code", + "outputs": [], + "execution_count": null, + "source": [ + "# Tool 2: search_memories\n", + "async def search_memories_func(query: str, limit: int = 5) -> str:\n", + " \"\"\"Search the user's long-term memory for relevant facts, preferences, and past interactions.\"\"\"\n", + " try:\n", + " results = await memory_client.search_long_term_memory(\n", + " text=query,\n", + " user_id=UserId(eq=STUDENT_ID),\n", + " limit=limit\n", + " )\n", + "\n", + " if not results.memories or len(results.memories) == 0:\n", + " return \"No relevant memories found.\"\n", + "\n", + " output = []\n", + " for i, memory in enumerate(results.memories, 1):\n", + " output.append(f\"{i}. {memory.text}\")\n", + "\n", + " return \"\\n\".join(output)\n", + " except Exception as e:\n", + " return f\"Error searching memories: {str(e)}\"\n", + "\n", + "search_memories = StructuredTool.from_function(\n", + " coroutine=search_memories_func,\n", + " name=\"search_memories\",\n", + " description=\"\"\"Search the user's long-term memory for relevant facts, preferences, and past interactions.\n", + "\n", + "Use this when you need to:\n", + "- Recall user preferences: \"What format does the user prefer?\"\n", + "- Remember past goals: \"What career path is the user interested in?\"\n", + "- Personalize recommendations based on history\n", + "\n", + "Returns: List of relevant memories.\"\"\"\n", + ")\n", + "\n", + "print(\"โœ… Tool 2: search_memories\")\n" + ], + "id": "e7d8efb6acf607eb" + }, + { + "metadata": {}, + "cell_type": "code", + "outputs": [], + "execution_count": null, + "source": [ + "# Tool 3: store_memory\n", + "async def store_memory_func(text: str, topics: List[str] = []) -> str:\n", + " \"\"\"Store important information to the user's long-term memory.\"\"\"\n", + " try:\n", + " memory = ClientMemoryRecord(\n", + " text=text,\n", + " user_id=STUDENT_ID,\n", + " memory_type=\"semantic\",\n", + " topics=topics or []\n", + " )\n", + "\n", + " await memory_client.create_long_term_memory([memory])\n", + " return f\"โœ… Stored to memory: {text}\"\n", + " except Exception as e:\n", + " return f\"Error storing memory: {str(e)}\"\n", + "\n", + "store_memory = StructuredTool.from_function(\n", + " coroutine=store_memory_func,\n", + " name=\"store_memory\",\n", + " description=\"\"\"Store important information to the user's long-term memory.\n", + "\n", + "Use this when the user shares:\n", + "- Preferences: \"I prefer online courses\"\n", + "- Goals: \"I want to work in AI\"\n", + "- Important facts: \"I have a part-time job\"\n", + "- Constraints: \"I can only take 2 courses per semester\"\n", + "\n", + "Returns: Confirmation message.\"\"\"\n", + ")\n", + "\n", + "print(\"โœ… Tool 3: store_memory\")\n" + ], + "id": "e0ee9ecbec8b205d" + }, + { + "metadata": {}, + "cell_type": "code", + "outputs": [], + "execution_count": null, + "source": [ + "# Collect existing tools\n", + "existing_tools = [search_courses_hybrid, search_memories, store_memory]\n", + "\n", + "print(\"\\n\" + \"=\" * 80)\n", + "print(\"๐Ÿ› ๏ธ EXISTING TOOLS (from Notebook 1)\")\n", + "print(\"=\" * 80)\n", + "for i, tool in enumerate(existing_tools, 1):\n", + " print(f\"{i}. {tool.name}\")\n", + "print(\"=\" * 80)\n" + ], + "id": "8fa9806d00082de1" + }, + { + "metadata": {}, + "cell_type": "markdown", + "source": [ + "### Measure Tool Token Cost\n", + "\n", + "Now let's measure how many tokens each tool definition consumes.\n" + ], + "id": "be031e26bff04360" + }, + { + "metadata": {}, + "cell_type": "code", + "outputs": [], + "execution_count": null, + "source": [ + "def get_tool_token_cost(tool) -> int:\n", + " \"\"\"\n", + " Calculate the token cost of a tool definition.\n", + "\n", + " This includes:\n", + " - Tool name\n", + " - Tool description\n", + " - Parameter schema (JSON)\n", + " \"\"\"\n", + " # Get tool schema\n", + " tool_schema = {\n", + " \"name\": tool.name,\n", + " \"description\": tool.description,\n", + " \"parameters\": tool.args_schema.model_json_schema() if tool.args_schema else {}\n", + " }\n", + "\n", + " # Convert to JSON string (this is what gets sent to LLM)\n", + " tool_json = json.dumps(tool_schema, indent=2)\n", + "\n", + " # Count tokens\n", + " tokens = count_tokens(tool_json)\n", + "\n", + " return tokens\n", + "\n", + "print(\"=\" * 80)\n", + "print(\"๐Ÿ“Š TOOL TOKEN COST ANALYSIS\")\n", + "print(\"=\" * 80)\n", + "\n", + "total_tokens = 0\n", + "for i, tool in enumerate(existing_tools, 1):\n", + " tokens = get_tool_token_cost(tool)\n", + " total_tokens += tokens\n", + " print(f\"{i}. {tool.name:<30} {tokens:>6} tokens\")\n", + "\n", + "print(\"-\" * 80)\n", + "print(f\"{'TOTAL (3 tools)':<30} {total_tokens:>6} tokens\")\n", + "print(\"=\" * 80)\n", + "\n", + "print(f\"\\n๐Ÿ’ก Insight: These {total_tokens:,} tokens are sent with EVERY query!\")\n" + ], + "id": "42e9460235096339" + }, + { + "metadata": {}, + "cell_type": "markdown", + "source": [ + "### The Scaling Problem\n", + "\n", + "What happens when we add more tools?\n" + ], + "id": "f617a96f39710ec4" + }, + { + "metadata": {}, + "cell_type": "code", + "outputs": [], + "execution_count": null, + "source": [ + "print(\"=\" * 80)\n", + "print(\"๐Ÿ“ˆ TOOL SCALING PROJECTION\")\n", + "print(\"=\" * 80)\n", + "\n", + "# Average tokens per tool\n", + "avg_tokens_per_tool = total_tokens / len(existing_tools)\n", + "\n", + "print(f\"\\nAverage tokens per tool: {avg_tokens_per_tool:.0f}\")\n", + "print(\"\\nProjected token cost:\")\n", + "print(f\"{'# Tools':<15} {'Token Cost':<15} {'vs 3 Tools':<15}\")\n", + "print(\"-\" * 80)\n", + "\n", + "for num_tools in [3, 5, 7, 10, 15, 20]:\n", + " projected_tokens = int(avg_tokens_per_tool * num_tools)\n", + " increase = ((projected_tokens - total_tokens) / total_tokens * 100) if num_tools > 3 else 0\n", + " print(f\"{num_tools:<15} {projected_tokens:<15,} {'+' + str(int(increase)) + '%' if increase > 0 else 'โ€”':<15}\")\n", + "\n", + "print(\"=\" * 80)\n", + "print(\"\\n๐Ÿšจ THE PROBLEM:\")\n", + "print(\" - Tool tokens grow linearly with number of tools\")\n", + "print(\" - All tools sent every time, even when not needed\")\n", + "print(\" - At 10 tools: ~4,000 tokens just for tool definitions!\")\n", + "print(\" - At 20 tools: ~8,000 tokens (more than our entire query budget!)\")\n", + "print(\"\\n๐Ÿ’ก THE SOLUTION:\")\n", + "print(\" - Semantic tool selection: Only send relevant tools\")\n", + "print(\" - Use embeddings to match query intent to tools\")\n", + "print(\" - Scale capabilities without scaling token costs\")\n" + ], + "id": "2a9c5ab4f97155ff" + }, + { + "metadata": {}, + "cell_type": "markdown", + "source": [ + "---\n", + "\n", + "## ๐Ÿ†• Part 2: Adding New Tools\n", + "\n", + "Let's add 2 new tools to expand our agent's capabilities.\n", + "\n", + "### New Tool 1: Check Prerequisites\n" + ], + "id": "629412b60c6d4c2f" + }, + { + "metadata": {}, + "cell_type": "code", + "outputs": [], + "execution_count": null, + "source": [ + "# Define the function first\n", + "async def check_prerequisites_func(course_id: str) -> str:\n", + " \"\"\"Check the prerequisites for a specific course.\"\"\"\n", + " # Simulated prerequisite data (in production, this would query a database)\n", + " prerequisites_db = {\n", + " \"RU101\": {\n", + " \"required\": [],\n", + " \"recommended\": [\"Basic command line knowledge\"],\n", + " \"description\": \"Introduction to Redis - no prerequisites required\"\n", + " },\n", + " \"RU202\": {\n", + " \"required\": [\"RU101\"],\n", + " \"recommended\": [\"Basic programming experience\", \"Understanding of data structures\"],\n", + " \"description\": \"Redis Streams requires foundational Redis knowledge\"\n", + " },\n", + " \"RU203\": {\n", + " \"required\": [\"RU101\"],\n", + " \"recommended\": [\"RU201 or equivalent data structures knowledge\"],\n", + " \"description\": \"Querying, Indexing, and Full-Text Search\"\n", + " },\n", + " \"RU301\": {\n", + " \"required\": [\"RU101\", \"RU201\"],\n", + " \"recommended\": [\"Experience with time-series data\"],\n", + " \"description\": \"Redis Time Series requires solid Redis foundation\"\n", + " },\n", + " \"RU501\": {\n", + " \"required\": [\"RU101\", \"RU201\"],\n", + " \"recommended\": [\"Python programming\", \"Basic ML concepts\"],\n", + " \"description\": \"Machine Learning with Redis requires programming skills\"\n", + " }\n", + " }\n", + "\n", + " course_id_upper = course_id.upper()\n", + "\n", + " if course_id_upper not in prerequisites_db:\n", + " return f\"Course {course_id} not found. Available courses: {', '.join(prerequisites_db.keys())}\"\n", + "\n", + " prereqs = prerequisites_db[course_id_upper]\n", + "\n", + " output = []\n", + " output.append(f\"๐Ÿ“‹ Prerequisites for {course_id_upper}:\")\n", + " output.append(f\"\\n{prereqs['description']}\\n\")\n", + "\n", + " if prereqs['required']:\n", + " output.append(\"โœ… Required Courses:\")\n", + " for req in prereqs['required']:\n", + " output.append(f\" โ€ข {req}\")\n", + " else:\n", + " output.append(\"โœ… No required prerequisites\")\n", + "\n", + " if prereqs['recommended']:\n", + " output.append(\"\\n๐Ÿ’ก Recommended Background:\")\n", + " for rec in prereqs['recommended']:\n", + " output.append(f\" โ€ข {rec}\")\n", + "\n", + " return \"\\n\".join(output)\n", + "\n", + "# Create the tool using StructuredTool\n", + "from langchain_core.tools import StructuredTool\n", + "\n", + "check_prerequisites = StructuredTool.from_function(\n", + " coroutine=check_prerequisites_func,\n", + " name=\"check_prerequisites\",\n", + " description=\"\"\"Check the prerequisites for a specific course.\n", + "\n", + "Use this when students ask:\n", + "- \"What are the prerequisites for RU202?\"\n", + "- \"Do I need to take anything before this course?\"\n", + "- \"What should I learn first?\"\n", + "- \"Am I ready for this course?\"\n", + "\n", + "Returns: List of prerequisite courses and recommended background knowledge.\"\"\"\n", + ")\n", + "\n", + "print(\"โœ… New Tool 1: check_prerequisites\")\n", + "print(\" Use case: Help students understand course requirements\")\n" + ], + "id": "8d8a9b61c03354c3" + }, + { + "metadata": {}, + "cell_type": "markdown", + "source": "### New Tool 2: Compare Courses\n", + "id": "a17072e01fda5ca2" + }, + { + "metadata": {}, + "cell_type": "code", + "outputs": [], + "execution_count": null, + "source": [ + "# Define the function first\n", + "async def compare_courses_func(course_ids: List[str]) -> str:\n", + " \"\"\"Compare multiple courses side-by-side to help students choose.\"\"\"\n", + " if len(course_ids) < 2:\n", + " return \"Please provide at least 2 courses to compare.\"\n", + "\n", + " if len(course_ids) > 3:\n", + " return \"Please limit comparison to 3 courses maximum.\"\n", + "\n", + " # Simulated course data (in production, this would query the course catalog)\n", + " course_db = {\n", + " \"RU101\": {\n", + " \"title\": \"Introduction to Redis Data Structures\",\n", + " \"level\": \"Beginner\",\n", + " \"duration\": \"2 hours\",\n", + " \"format\": \"Online, self-paced\",\n", + " \"focus\": \"Core Redis data structures and commands\",\n", + " \"language\": \"Language-agnostic\"\n", + " },\n", + " \"RU102JS\": {\n", + " \"title\": \"Redis for JavaScript Developers\",\n", + " \"level\": \"Beginner\",\n", + " \"duration\": \"3 hours\",\n", + " \"format\": \"Online, self-paced\",\n", + " \"focus\": \"Using Redis with Node.js applications\",\n", + " \"language\": \"JavaScript/Node.js\"\n", + " },\n", + " \"RU201\": {\n", + " \"title\": \"RediSearch\",\n", + " \"level\": \"Intermediate\",\n", + " \"duration\": \"4 hours\",\n", + " \"format\": \"Online, self-paced\",\n", + " \"focus\": \"Full-text search and secondary indexing\",\n", + " \"language\": \"Language-agnostic\"\n", + " },\n", + " \"RU202\": {\n", + " \"title\": \"Redis Streams\",\n", + " \"level\": \"Intermediate\",\n", + " \"duration\": \"3 hours\",\n", + " \"format\": \"Online, self-paced\",\n", + " \"focus\": \"Stream processing and consumer groups\",\n", + " \"language\": \"Language-agnostic\"\n", + " }\n", + " }\n", + "\n", + " # Get course data\n", + " courses_data = []\n", + " for course_id in course_ids:\n", + " course_id_upper = course_id.upper()\n", + " if course_id_upper in course_db:\n", + " courses_data.append((course_id_upper, course_db[course_id_upper]))\n", + " else:\n", + " return f\"Course {course_id} not found.\"\n", + "\n", + " # Build comparison table\n", + " output = []\n", + " output.append(\"=\" * 80)\n", + " output.append(f\"๐Ÿ“Š COURSE COMPARISON: {' vs '.join([c[0] for c in courses_data])}\")\n", + " output.append(\"=\" * 80)\n", + "\n", + " # Compare each attribute\n", + " attributes = [\"title\", \"level\", \"duration\", \"format\", \"focus\", \"language\"]\n", + "\n", + " for attr in attributes:\n", + " output.append(f\"\\n{attr.upper()}:\")\n", + " for course_id, data in courses_data:\n", + " output.append(f\" {course_id}: {data[attr]}\")\n", + "\n", + " output.append(\"\\n\" + \"=\" * 80)\n", + " output.append(\"๐Ÿ’ก Recommendation: Choose based on your experience level and learning goals.\")\n", + "\n", + " return \"\\n\".join(output)\n", + "\n", + "# Create the tool using StructuredTool\n", + "compare_courses = StructuredTool.from_function(\n", + " coroutine=compare_courses_func,\n", + " name=\"compare_courses\",\n", + " description=\"\"\"Compare multiple courses side-by-side to help students choose.\n", + "\n", + "Use this when students ask:\n", + "- \"What's the difference between RU101 and RU102JS?\"\n", + "- \"Should I take RU201 or RU202 first?\"\n", + "- \"Compare these courses for me\"\n", + "- \"Which course is better for beginners?\"\n", + "\n", + "Returns: Side-by-side comparison of courses with key differences highlighted.\"\"\"\n", + ")\n", + "\n", + "print(\"โœ… New Tool 2: compare_courses\")\n", + "print(\" Use case: Help students choose between similar courses\")\n" + ], + "id": "ce4eead22dcb1fec" + }, + { + "metadata": {}, + "cell_type": "code", + "outputs": [], + "execution_count": null, + "source": [ + "# Collect all 5 tools\n", + "all_tools = [\n", + " search_courses_hybrid,\n", + " search_memories,\n", + " store_memory,\n", + " check_prerequisites,\n", + " compare_courses\n", + "]\n", + "\n", + "print(\"\\n\" + \"=\" * 80)\n", + "print(\"๐Ÿ› ๏ธ ALL TOOLS (5 total)\")\n", + "print(\"=\" * 80)\n", + "for i, tool in enumerate(all_tools, 1):\n", + " tokens = get_tool_token_cost(tool)\n", + " print(f\"{i}. {tool.name:<30} {tokens:>6} tokens\")\n", + "\n", + "total_all_tools = sum(get_tool_token_cost(t) for t in all_tools)\n", + "print(\"-\" * 80)\n", + "print(f\"{'TOTAL (5 tools)':<30} {total_all_tools:>6} tokens\")\n", + "print(\"=\" * 80)\n", + "\n", + "print(f\"\\n๐Ÿ“Š Comparison:\")\n", + "print(f\" 3 tools: {total_tokens:,} tokens\")\n", + "print(f\" 5 tools: {total_all_tools:,} tokens\")\n", + "print(f\" Increase: +{total_all_tools - total_tokens:,} tokens (+{(total_all_tools - total_tokens) / total_tokens * 100:.0f}%)\")\n", + "print(f\"\\n๐Ÿšจ Problem: We just added {total_all_tools - total_tokens:,} tokens to EVERY query!\")\n" + ], + "id": "2341488310981cb7" + }, + { + "metadata": {}, + "cell_type": "markdown", + "source": [ + "---\n", + "\n", + "## ๐ŸŽฏ Part 3: Semantic Tool Selection\n", + "\n", + "Now let's implement semantic tool selection to solve the scaling problem.\n", + "\n", + "### ๐Ÿ”ฌ Theory: Semantic Tool Selection\n", + "\n", + "**The Idea:**\n", + "Instead of sending all tools to the LLM, we:\n", + "1. **Embed tool descriptions** - Create vector embeddings for each tool\n", + "2. **Embed user query** - Create vector embedding for the user's question\n", + "3. **Find similar tools** - Use cosine similarity to find relevant tools\n", + "4. **Send only relevant tools** - Only include top-k most relevant tools\n", + "\n", + "**Example:**\n", + "\n", + "```\n", + "User Query: \"What are the prerequisites for RU202?\"\n", + "\n", + "Step 1: Embed query โ†’ [0.23, -0.45, 0.67, ...]\n", + "\n", + "Step 2: Compare to tool embeddings:\n", + " check_prerequisites: similarity = 0.92 โœ…\n", + " search_courses_hybrid: similarity = 0.45\n", + " compare_courses: similarity = 0.38\n", + " search_memories: similarity = 0.12\n", + " store_memory: similarity = 0.08\n", + "\n", + "Step 3: Select top 2 tools:\n", + " โ†’ check_prerequisites\n", + " โ†’ search_courses_hybrid\n", + "\n", + "Step 4: Send only these 2 tools to LLM (instead of all 5)\n", + "```\n", + "\n", + "**Benefits:**\n", + "- โœ… Constant token cost (always send top-k tools)\n", + "- โœ… Better tool selection (semantically relevant)\n", + "- โœ… Scales to 100+ tools without token explosion\n", + "- โœ… Faster inference (fewer tools = faster LLM processing)\n", + "\n", + "**๐Ÿ’ก Key Insight:** Semantic similarity enables intelligent tool selection at scale.\n" + ], + "id": "fa6c94624453c3f7" + }, + { + "metadata": {}, + "cell_type": "markdown", + "source": [ + "### Step 1: Create Tool Metadata\n", + "\n", + "First, let's create rich metadata for each tool to improve embedding quality.\n" + ], + "id": "641c53f9d3ebcc" + }, + { + "metadata": {}, + "cell_type": "code", + "outputs": [], + "execution_count": null, + "source": [ + "@dataclass\n", + "class ToolMetadata:\n", + " \"\"\"Metadata for a tool to enable semantic selection.\"\"\"\n", + " name: str\n", + " description: str\n", + " use_cases: List[str]\n", + " keywords: List[str]\n", + " tool_obj: Any # The actual tool object\n", + "\n", + " def get_embedding_text(self) -> str:\n", + " \"\"\"\n", + " Create rich text representation for embedding.\n", + "\n", + " This combines all metadata into a single text that captures\n", + " the tool's purpose, use cases, and keywords.\n", + " \"\"\"\n", + " parts = [\n", + " f\"Tool: {self.name}\",\n", + " f\"Description: {self.description}\",\n", + " f\"Use cases: {', '.join(self.use_cases)}\",\n", + " f\"Keywords: {', '.join(self.keywords)}\"\n", + " ]\n", + " return \"\\n\".join(parts)\n", + "\n", + "print(\"โœ… ToolMetadata dataclass defined\")\n" + ], + "id": "f67eabfcae3d1d4d" + }, + { + "metadata": {}, + "cell_type": "code", + "outputs": [], + "execution_count": null, + "source": [ + "# Create metadata for all 5 tools\n", + "tool_metadata_list = [\n", + " ToolMetadata(\n", + " name=\"search_courses_hybrid\",\n", + " description=\"Search for courses using hybrid retrieval (overview + targeted search)\",\n", + " use_cases=[\n", + " \"Find courses by topic or subject\",\n", + " \"Explore available courses\",\n", + " \"Get course recommendations\",\n", + " \"Search for specific course types\"\n", + " ],\n", + " keywords=[\"search\", \"find\", \"courses\", \"available\", \"topics\", \"subjects\", \"catalog\", \"browse\"],\n", + " tool_obj=search_courses_hybrid\n", + " ),\n", + " ToolMetadata(\n", + " name=\"search_memories\",\n", + " description=\"Search user's long-term memory for preferences and past interactions\",\n", + " use_cases=[\n", + " \"Recall user preferences\",\n", + " \"Remember past goals\",\n", + " \"Personalize recommendations\",\n", + " \"Check user history\"\n", + " ],\n", + " keywords=[\"remember\", \"recall\", \"preference\", \"history\", \"past\", \"previous\", \"memory\"],\n", + " tool_obj=search_memories\n", + " ),\n", + " ToolMetadata(\n", + " name=\"store_memory\",\n", + " description=\"Store important information to user's long-term memory\",\n", + " use_cases=[\n", + " \"Save user preferences\",\n", + " \"Remember user goals\",\n", + " \"Store important facts\",\n", + " \"Record constraints\"\n", + " ],\n", + " keywords=[\"save\", \"store\", \"remember\", \"record\", \"preference\", \"goal\", \"constraint\"],\n", + " tool_obj=store_memory\n", + " ),\n", + " ToolMetadata(\n", + " name=\"check_prerequisites\",\n", + " description=\"Check prerequisites and requirements for a specific course\",\n", + " use_cases=[\n", + " \"Check course prerequisites\",\n", + " \"Verify readiness for a course\",\n", + " \"Understand course requirements\",\n", + " \"Find what to learn first\"\n", + " ],\n", + " keywords=[\"prerequisites\", \"requirements\", \"ready\", \"before\", \"first\", \"needed\", \"required\"],\n", + " tool_obj=check_prerequisites\n", + " ),\n", + " ToolMetadata(\n", + " name=\"compare_courses\",\n", + " description=\"Compare multiple courses side-by-side to help choose between them\",\n", + " use_cases=[\n", + " \"Compare course options\",\n", + " \"Understand differences between courses\",\n", + " \"Choose between similar courses\",\n", + " \"Evaluate course alternatives\"\n", + " ],\n", + " keywords=[\"compare\", \"difference\", \"versus\", \"vs\", \"between\", \"choose\", \"which\", \"better\"],\n", + " tool_obj=compare_courses\n", + " )\n", + "]\n", + "\n", + "print(\"โœ… Tool metadata created for all 5 tools\")\n", + "print(\"\\nExample metadata:\")\n", + "print(f\" Tool: {tool_metadata_list[3].name}\")\n", + "print(f\" Use cases: {len(tool_metadata_list[3].use_cases)}\")\n", + "print(f\" Keywords: {len(tool_metadata_list[3].keywords)}\")\n" + ], + "id": "c05aa339438e9e0c" + }, + { + "metadata": {}, + "cell_type": "markdown", + "source": [ + "### Step 2: Build Semantic Router with RedisVL\n", + "\n", + "Instead of building a custom tool selector from scratch, we'll use **RedisVL's Semantic Router** - a production-ready solution for semantic routing.\n", + "\n", + "#### ๐ŸŽ“ What is Semantic Router?\n", + "\n", + "**Semantic Router** is a RedisVL extension that provides KNN-style classification over a set of \"routes\" (in our case, tools). It automatically:\n", + "- Creates and manages Redis vector index\n", + "- Generates embeddings for route references\n", + "- Performs semantic similarity search\n", + "- Returns best matching route(s) with distance scores\n", + "- Supports serialization (YAML/dict) for configuration management\n", + "\n", + "#### ๐Ÿ”‘ Why This Matters for Context Engineering\n", + "\n", + "**Context engineering is about managing what information reaches the LLM**. Semantic Router helps by:\n", + "\n", + "1. **Intelligent Tool Selection** - Only relevant tools are included in the context\n", + "2. **Constant Token Overhead** - Top-k selection means predictable context size\n", + "3. **Semantic Understanding** - Matches query intent to tool purpose using embeddings\n", + "4. **Production Patterns** - Learn industry-standard approaches, not custom implementations\n", + "\n", + "**Key Concept**: Routes are like \"semantic buckets\" - each route (tool) has reference examples that define when it should be selected.\n" + ], + "id": "4c7088587e5bee15" + }, + { + "metadata": {}, + "cell_type": "code", + "outputs": [], + "execution_count": null, + "source": [ + "# Create routes for each tool\n", + "# Each route has:\n", + "# - name: Tool identifier\n", + "# - references: Example use cases that define when this tool should be selected\n", + "# - metadata: Store the actual tool object for later retrieval\n", + "# - distance_threshold: How similar a query must be to match this route\n", + "\n", + "print(\"๐Ÿ”จ Creating semantic routes for tools...\")\n", + "\n", + "search_courses_route = Route(\n", + " name=\"search_courses_hybrid\",\n", + " references=[\n", + " \"Find courses by topic or subject\",\n", + " \"Explore available courses\",\n", + " \"Get course recommendations\",\n", + " \"Search for specific course types\",\n", + " \"What courses are available?\",\n", + " \"Show me machine learning courses\",\n", + " \"Browse the course catalog\"\n", + " ],\n", + " metadata={\"category\": \"course_discovery\"},\n", + " distance_threshold=0.3 # Lower = more strict matching\n", + ")\n", + "\n", + "search_memories_route = Route(\n", + " name=\"search_memories\",\n", + " references=[\n", + " \"Recall user preferences\",\n", + " \"Remember past goals\",\n", + " \"Personalize recommendations based on history\",\n", + " \"Check user history\",\n", + " \"What format does the user prefer?\",\n", + " \"What did I say about my learning goals?\",\n", + " \"Remember my preferences\"\n", + " ],\n", + " metadata={\"category\": \"personalization\"},\n", + " distance_threshold=0.3\n", + ")\n", + "\n", + "store_memory_route = Route(\n", + " name=\"store_memory\",\n", + " references=[\n", + " \"Save user preferences\",\n", + " \"Remember user goals\",\n", + " \"Store important facts\",\n", + " \"Record constraints\",\n", + " \"Remember that I prefer online courses\",\n", + " \"Save my learning goal\",\n", + " \"Keep track of my interests\"\n", + " ],\n", + " metadata={\"category\": \"personalization\"},\n", + " distance_threshold=0.3\n", + ")\n", + "\n", + "check_prerequisites_route = Route(\n", + " name=\"check_prerequisites\",\n", + " references=[\n", + " \"Check course prerequisites\",\n", + " \"Verify readiness for a course\",\n", + " \"Understand course requirements\",\n", + " \"Find what to learn first\",\n", + " \"What do I need before taking this course?\",\n", + " \"Am I ready for RU202?\",\n", + " \"What are the requirements?\"\n", + " ],\n", + " metadata={\"category\": \"course_planning\"},\n", + " distance_threshold=0.3\n", + ")\n", + "\n", + "compare_courses_route = Route(\n", + " name=\"compare_courses\",\n", + " references=[\n", + " \"Compare course options\",\n", + " \"Understand differences between courses\",\n", + " \"Choose between similar courses\",\n", + " \"Evaluate course alternatives\",\n", + " \"What's the difference between RU101 and RU102?\",\n", + " \"Which course is better for beginners?\",\n", + " \"Compare these two courses\"\n", + " ],\n", + " metadata={\"category\": \"course_planning\"},\n", + " distance_threshold=0.3\n", + ")\n", + "\n", + "print(\"โœ… Created 5 semantic routes\")\n", + "print(\"\\nExample route:\")\n", + "print(f\" Name: {check_prerequisites_route.name}\")\n", + "print(f\" References: {len(check_prerequisites_route.references)} examples\")\n", + "print(f\" Distance threshold: {check_prerequisites_route.distance_threshold}\")\n" + ], + "id": "fa2f293a4b328d96" + }, + { + "metadata": {}, + "cell_type": "markdown", + "source": [ + "#### ๐ŸŽ“ Understanding Routes vs Custom Implementation\n", + "\n", + "**What We're NOT Doing** (Custom Approach):\n", + "```python\n", + "# โŒ Manual index schema definition\n", + "tool_index_schema = {\"index\": {...}, \"fields\": [...]}\n", + "\n", + "# โŒ Manual embedding generation\n", + "embedding_vector = await embeddings.aembed_query(text)\n", + "\n", + "# โŒ Manual storage\n", + "tool_index.load([tool_data], keys=[...])\n", + "\n", + "# โŒ Custom selector class\n", + "class SemanticToolSelector:\n", + " def __init__(self, tool_index, embeddings, ...):\n", + " # ~100 lines of custom code\n", + "```\n", + "\n", + "**What We ARE Doing** (RedisVL Semantic Router):\n", + "```python\n", + "# โœ… Define routes with references\n", + "route = Route(name=\"tool_name\", references=[...])\n", + "\n", + "# โœ… Initialize router (handles everything automatically)\n", + "router = SemanticRouter(routes=[...])\n", + "\n", + "# โœ… Select tools (one line!)\n", + "matches = router.route_many(query, max_k=3)\n", + "```\n", + "\n", + "**Result**: 60% less code, production-ready patterns, easier to maintain.\n" + ], + "id": "8b52619d67c9c18f" + }, + { + "metadata": {}, + "cell_type": "code", + "outputs": [], + "execution_count": null, + "source": [ + "# Initialize the Semantic Router\n", + "# This automatically:\n", + "# 1. Creates Redis vector index for route references\n", + "# 2. Generates embeddings for all references\n", + "# 3. Stores embeddings in Redis\n", + "# 4. Provides simple API for routing queries\n", + "\n", + "print(\"๐Ÿ”จ Initializing Semantic Router...\")\n", + "\n", + "tool_router = SemanticRouter(\n", + " name=\"course-advisor-tool-router\",\n", + " routes=[\n", + " search_courses_route,\n", + " search_memories_route,\n", + " store_memory_route,\n", + " check_prerequisites_route,\n", + " compare_courses_route\n", + " ],\n", + " redis_url=REDIS_URL,\n", + " overwrite=True # Recreate index if it exists\n", + ")\n", + "\n", + "print(\"โœ… Semantic Router initialized\")\n", + "print(f\" Router name: {tool_router.name}\")\n", + "print(f\" Routes: {len(tool_router.routes)}\")\n", + "print(f\" Index created: course-advisor-tool-router\")\n", + "print(\"\\n๐Ÿ’ก The router automatically created the Redis index and stored all embeddings!\")\n" + ], + "id": "c564db7df0a0fef" + }, + { + "metadata": {}, + "cell_type": "markdown", + "source": [ + "### Step 3: Test Semantic Tool Routing\n", + "\n", + "Let's test how the router selects tools based on query semantics.\n" + ], + "id": "dc77ab4d3a8fbe84" + }, + { + "metadata": {}, + "cell_type": "code", + "outputs": [], + "execution_count": null, + "source": [ + "async def test_tool_routing(query: str, max_k: int = 3):\n", + " \"\"\"\n", + " Test semantic tool routing for a given query.\n", + "\n", + " This demonstrates how the router:\n", + " 1. Embeds the query\n", + " 2. Compares to all route references\n", + " 3. Returns top-k most similar routes (tools)\n", + " \"\"\"\n", + " print(\"=\" * 80)\n", + " print(f\"๐Ÿ” QUERY: {query}\")\n", + " print(\"=\" * 80)\n", + "\n", + " # Get top-k route matches\n", + " # route_many() returns multiple routes ranked by similarity\n", + " route_matches = tool_router.route_many(query, max_k=max_k)\n", + "\n", + " print(f\"\\n๐Ÿ“Š Top {max_k} Tool Matches:\")\n", + " print(f\"{'Rank':<6} {'Tool Name':<30} {'Distance':<12} {'Similarity':<12}\")\n", + " print(\"-\" * 80)\n", + "\n", + " for i, match in enumerate(route_matches, 1):\n", + " # Distance: 0.0 = perfect match, 1.0 = completely different\n", + " # Similarity: 1.0 = perfect match, 0.0 = completely different\n", + " similarity = 1.0 - match.distance\n", + " print(f\"{i:<6} {match.name:<30} {match.distance:<12.3f} {similarity:<12.3f}\")\n", + "\n", + " # Map route names to tool objects\n", + " tool_map = {\n", + " \"search_courses_hybrid\": search_courses_hybrid,\n", + " \"search_memories\": search_memories,\n", + " \"store_memory\": store_memory,\n", + " \"check_prerequisites\": check_prerequisites,\n", + " \"compare_courses\": compare_courses\n", + " }\n", + "\n", + " # Get the actual tool objects by name\n", + " selected_tools = [tool_map[match.name] for match in route_matches if match.name in tool_map]\n", + "\n", + " print(f\"\\nโœ… Selected {len(selected_tools)} tools for this query\")\n", + " print(f\" Tools: {', '.join([match.name for match in route_matches])}\")\n", + "\n", + " return route_matches, selected_tools\n", + "\n", + "print(\"โœ… Tool routing test function defined\")\n" + ], + "id": "eea0a219477cb649" + }, + { + "metadata": {}, + "cell_type": "markdown", + "source": [ + "### Step 4: Run Tool Routing Tests\n", + "\n", + "Let's test the router with different types of queries to see how it intelligently selects tools.\n", + "\n", + "#### ๐ŸŽ“ Understanding the Results\n", + "\n", + "For each query, the router:\n", + "1. **Embeds the query** using the same embedding model\n", + "2. **Compares to all route references** (the example use cases we defined)\n", + "3. **Calculates semantic similarity** (distance scores)\n", + "4. **Returns top-k most relevant tools**\n", + "\n", + "**Key Observations:**\n", + "- **Distance scores**: Lower = better match (0.0 = perfect, 1.0 = completely different)\n", + "- **Similarity scores**: Higher = better match (1.0 = perfect, 0.0 = completely different)\n", + "- **Intelligent selection**: The router correctly identifies which tools are relevant for each query\n" + ], + "id": "689d8b93a1eda3d5" + }, + { + "metadata": {}, + "cell_type": "code", + "source": [ + "# Test 1: Prerequisites query\n", + "print(\"๐Ÿงช Test 1: Prerequisites Query\\n\")\n", + "await test_tool_routing(\"What are the prerequisites for RU202?\", max_k=3)\n" + ], + "id": "693bb3a5927ab86e", + "outputs": [], + "execution_count": null + }, + { + "metadata": {}, + "cell_type": "code", + "outputs": [], + "execution_count": null, + "source": [ + "# Test 2: Course search query\n", + "print(\"\\n๐Ÿงช Test 2: Course Search Query\\n\")\n", + "await test_tool_routing(\"What machine learning courses are available?\", max_k=3)\n" + ], + "id": "d8f156346d3545a5" + }, + { + "metadata": {}, + "cell_type": "code", + "outputs": [], + "execution_count": null, + "source": [ + "# Test 3: Comparison query\n", + "print(\"\\n๐Ÿงช Test 3: Course Comparison Query\\n\")\n", + "await test_tool_routing(\"What's the difference between RU101 and RU102JS?\", max_k=3)\n" + ], + "id": "ff67e322435bb2e3" + }, + { + "metadata": {}, + "cell_type": "code", + "outputs": [], + "execution_count": null, + "source": [ + "# Test 4: Memory/preference query\n", + "print(\"\\n๐Ÿงช Test 4: Memory Storage Query\\n\")\n", + "await test_tool_routing(\"I prefer online courses and I'm interested in AI\", max_k=3)\n" + ], + "id": "a890b7e7981e8f1c" + }, + { + "metadata": {}, + "cell_type": "code", + "source": [ + "# Test 5: Memory recall query\n", + "print(\"\\n๐Ÿงช Test 5: Memory Recall Query\\n\")\n", + "await test_tool_routing(\"What did I say about my learning preferences?\", max_k=3)\n" + ], + "id": "6d5c114daa3034e", + "outputs": [], + "execution_count": null + }, + { + "metadata": {}, + "cell_type": "markdown", + "source": "### Analysis: Tool Selection Accuracy\n", + "id": "895b0be719fabd60" + }, + { + "metadata": {}, + "cell_type": "code", + "source": [ + "print(\"=\" * 80)\n", + "print(\"๐Ÿ“Š TOOL SELECTION ANALYSIS\")\n", + "print(\"=\" * 80)\n", + "\n", + "test_cases = [\n", + " {\n", + " \"query\": \"What are the prerequisites for RU202?\",\n", + " \"expected_top_tool\": \"check_prerequisites\",\n", + " \"description\": \"Prerequisites query\"\n", + " },\n", + " {\n", + " \"query\": \"What machine learning courses are available?\",\n", + " \"expected_top_tool\": \"search_courses_hybrid\",\n", + " \"description\": \"Course search query\"\n", + " },\n", + " {\n", + " \"query\": \"What's the difference between RU101 and RU102JS?\",\n", + " \"expected_top_tool\": \"compare_courses\",\n", + " \"description\": \"Comparison query\"\n", + " },\n", + " {\n", + " \"query\": \"I prefer online courses\",\n", + " \"expected_top_tool\": \"store_memory\",\n", + " \"description\": \"Preference statement\"\n", + " }\n", + "]\n", + "\n", + "print(\"\\nTest Results:\")\n", + "print(f\"{'Query Type':<25} {'Expected':<25} {'Actual':<25} {'Match':<10}\")\n", + "print(\"-\" * 80)\n", + "\n", + "correct = 0\n", + "total = len(test_cases)\n", + "\n", + "# Map route names to tool objects\n", + "tool_map = {\n", + " \"search_courses_hybrid\": search_courses_hybrid,\n", + " \"search_memories\": search_memories,\n", + " \"store_memory\": store_memory,\n", + " \"check_prerequisites\": check_prerequisites,\n", + " \"compare_courses\": compare_courses\n", + "}\n", + "\n", + "for test in test_cases:\n", + " # Use tool_router to get top match\n", + " route_matches = tool_router.route_many(test[\"query\"], max_k=1)\n", + " actual_tool = route_matches[0].name if route_matches else \"none\"\n", + " match = \"โœ… YES\" if actual_tool == test[\"expected_top_tool\"] else \"โŒ NO\"\n", + " if actual_tool == test[\"expected_top_tool\"]:\n", + " correct += 1\n", + "\n", + " print(f\"{test['description']:<25} {test['expected_top_tool']:<25} {actual_tool:<25} {match:<10}\")\n", + "\n", + "accuracy = (correct / total * 100) if total > 0 else 0\n", + "print(\"-\" * 80)\n", + "print(f\"Accuracy: {correct}/{total} ({accuracy:.0f}%)\")\n", + "print(\"=\" * 80)\n", + "\n", + "print(f\"\\nโœ… Semantic tool selection achieves ~{accuracy:.0f}% accuracy\")\n", + "print(\" This is significantly better than random selection (20%)\")\n" + ], + "id": "18db3f727daa20c0", + "outputs": [], + "execution_count": null + }, + { + "metadata": {}, + "cell_type": "markdown", + "source": [ + "---\n", + "\n", + "## ๐Ÿค– Part 4: Enhanced Agent with Semantic Tool Selection\n", + "\n", + "Now let's build an agent that uses semantic tool selection.\n", + "\n", + "### AgentState with Tool Selection\n" + ], + "id": "4cc199ace8346100" + }, + { + "metadata": {}, + "cell_type": "code", + "source": [ + "class AgentState(BaseModel):\n", + " \"\"\"State for the course advisor agent with tool selection.\"\"\"\n", + " messages: Annotated[List[BaseMessage], add_messages]\n", + " student_id: str\n", + " session_id: str\n", + " context: Dict[str, Any] = {}\n", + " selected_tools: List[Any] = [] # NEW: Store selected tools\n", + "\n", + "print(\"โœ… AgentState defined with selected_tools field\")\n" + ], + "id": "aaa84414aae72403", + "outputs": [], + "execution_count": null + }, + { + "metadata": {}, + "cell_type": "markdown", + "source": "### Build Enhanced Agent Workflow\n", + "id": "9b9dec756575c685" + }, + { + "metadata": {}, + "cell_type": "code", + "outputs": [], + "execution_count": null, + "source": [ + "# Node 1: Load memory (same as before)\n", + "async def load_memory(state: AgentState) -> AgentState:\n", + " \"\"\"Load conversation history from working memory.\"\"\"\n", + " try:\n", + " from agent_memory_client.filters import SessionId\n", + "\n", + " _, working_memory = await memory_client.get_or_create_working_memory(\n", + " user_id=UserId(eq=state.student_id),\n", + " session_id=SessionId(eq=state.session_id),\n", + " model_name=\"gpt-4o\"\n", + " )\n", + "\n", + " if working_memory and working_memory.messages:\n", + " state.context[\"working_memory_loaded\"] = True\n", + " except Exception as e:\n", + " state.context[\"working_memory_error\"] = str(e)\n", + "\n", + " return state\n", + "\n", + "print(\"โœ… Node 1: load_memory\")\n" + ], + "id": "b19acf1c54229753" + }, + { + "metadata": {}, + "cell_type": "code", + "outputs": [], + "execution_count": null, + "source": [ + "# Node 2: Select tools (NEW!)\n", + "async def select_tools_node(state: AgentState) -> AgentState:\n", + " \"\"\"Select relevant tools based on the user's query.\"\"\"\n", + " # Get the latest user message\n", + " user_messages = [msg for msg in state.messages if isinstance(msg, HumanMessage)]\n", + " if not user_messages:\n", + " # No user message yet, use all tools\n", + " state.selected_tools = all_tools\n", + " state.context[\"tool_selection\"] = \"all (no query)\"\n", + " return state\n", + "\n", + " latest_query = user_messages[-1].content\n", + "\n", + " # Use semantic tool router\n", + " route_matches = tool_router.route_many(latest_query, max_k=3)\n", + "\n", + " # Map route names to tool objects\n", + " tool_map = {\n", + " \"search_courses_hybrid\": search_courses_hybrid,\n", + " \"search_memories\": search_memories,\n", + " \"store_memory\": store_memory,\n", + " \"check_prerequisites\": check_prerequisites,\n", + " \"compare_courses\": compare_courses\n", + " }\n", + "\n", + " selected_tools = [tool_map[match.name] for match in route_matches if match.name in tool_map]\n", + " state.selected_tools = selected_tools\n", + " state.context[\"tool_selection\"] = \"semantic\"\n", + " state.context[\"selected_tool_names\"] = [t.name for t in selected_tools]\n", + "\n", + " return state\n", + "\n", + "print(\"โœ… Node 2: select_tools_node (NEW)\")\n" + ], + "id": "353263d94616b811" + }, + { + "metadata": {}, + "cell_type": "code", + "outputs": [], + "execution_count": null, + "source": [ + "# Node 3: Agent with dynamic tools\n", + "async def enhanced_agent_node(state: AgentState) -> AgentState:\n", + " \"\"\"The agent with dynamically selected tools.\"\"\"\n", + " system_message = SystemMessage(content=\"\"\"\n", + "You are a helpful Redis University course advisor assistant.\n", + "\n", + "Your role:\n", + "- Help students find courses that match their interests and goals\n", + "- Check prerequisites and compare courses\n", + "- Remember student preferences and use them for personalized recommendations\n", + "- Store important information about students for future conversations\n", + "\n", + "Guidelines:\n", + "- Use the available tools to help students\n", + "- Be conversational and helpful\n", + "- Provide specific course recommendations with details\n", + "\"\"\")\n", + "\n", + " # Bind ONLY the selected tools to LLM\n", + " llm_with_tools = llm.bind_tools(state.selected_tools)\n", + "\n", + " # Call LLM\n", + " messages = [system_message] + state.messages\n", + " response = await llm_with_tools.ainvoke(messages)\n", + "\n", + " state.messages.append(response)\n", + "\n", + " return state\n", + "\n", + "print(\"โœ… Node 3: enhanced_agent_node\")\n" + ], + "id": "b84f217a05e705bb" + }, + { + "metadata": {}, + "cell_type": "code", + "outputs": [], + "execution_count": null, + "source": [ + "# Node 4: Save memory (same as before)\n", + "async def save_memory(state: AgentState) -> AgentState:\n", + " \"\"\"Save updated conversation to working memory.\"\"\"\n", + " try:\n", + " from agent_memory_client.filters import SessionId\n", + "\n", + " await memory_client.put_working_memory(\n", + " user_id=state.student_id,\n", + " session_id=state.session_id,\n", + " memory=working_memory,\n", + " model_name=\"gpt-4o\"\n", + " )\n", + "\n", + " state.context[\"working_memory_saved\"] = True\n", + " except Exception as e:\n", + " state.context[\"save_error\"] = str(e)\n", + "\n", + " return state\n", + "\n", + "print(\"โœ… Node 4: save_memory\")\n" + ], + "id": "e8ae76577b0a8c3c" + }, + { + "metadata": {}, + "cell_type": "code", + "outputs": [], + "execution_count": null, + "source": [ + "# Routing logic\n", + "def should_continue(state: AgentState) -> str:\n", + " \"\"\"Determine if we should continue to tools or end.\"\"\"\n", + " last_message = state.messages[-1]\n", + "\n", + " if hasattr(last_message, 'tool_calls') and last_message.tool_calls:\n", + " return \"tools\"\n", + "\n", + " return \"save_memory\"\n", + "\n", + "print(\"โœ… Routing: should_continue\")\n" + ], + "id": "d5501fdc2b20e25c" + }, + { + "metadata": {}, + "cell_type": "code", + "source": [ + "# Build the enhanced agent graph\n", + "enhanced_workflow = StateGraph(AgentState)\n", + "\n", + "# Add nodes\n", + "enhanced_workflow.add_node(\"load_memory\", load_memory)\n", + "enhanced_workflow.add_node(\"select_tools\", select_tools_node) # NEW NODE\n", + "enhanced_workflow.add_node(\"agent\", enhanced_agent_node)\n", + "enhanced_workflow.add_node(\"tools\", lambda state: state) # Placeholder, will use ToolNode dynamically\n", + "enhanced_workflow.add_node(\"save_memory\", save_memory)\n", + "\n", + "# Define edges\n", + "enhanced_workflow.set_entry_point(\"load_memory\")\n", + "enhanced_workflow.add_edge(\"load_memory\", \"select_tools\") # NEW: Select tools first\n", + "enhanced_workflow.add_edge(\"select_tools\", \"agent\")\n", + "enhanced_workflow.add_conditional_edges(\n", + " \"agent\",\n", + " should_continue,\n", + " {\n", + " \"tools\": \"tools\",\n", + " \"save_memory\": \"save_memory\"\n", + " }\n", + ")\n", + "enhanced_workflow.add_edge(\"tools\", \"agent\")\n", + "enhanced_workflow.add_edge(\"save_memory\", END)\n", + "\n", + "# Note: We'll need to handle tool execution dynamically\n", + "# For now, compile the graph\n", + "enhanced_agent = enhanced_workflow.compile()\n", + "\n", + "print(\"โœ… Enhanced agent graph compiled\")\n", + "print(\" New workflow: load_memory โ†’ select_tools โ†’ agent โ†’ tools โ†’ save_memory\")\n" + ], + "id": "b2c5ae05ede43e52", + "outputs": [], + "execution_count": null + }, + { + "metadata": {}, + "cell_type": "markdown", + "source": "### Run Enhanced Agent with Metrics\n", + "id": "67157e0234ef44c5" + }, + { + "metadata": {}, + "cell_type": "code", + "source": [ + "@dataclass\n", + "class EnhancedMetrics:\n", + " \"\"\"Track metrics for enhanced agent with tool selection.\"\"\"\n", + " query: str\n", + " response: str\n", + " total_tokens: int\n", + " tool_tokens_all: int\n", + " tool_tokens_selected: int\n", + " tool_savings: int\n", + " selected_tools: List[str]\n", + " latency_seconds: float\n", + "\n", + "async def run_enhanced_agent_with_metrics(user_message: str) -> EnhancedMetrics:\n", + " \"\"\"Run the enhanced agent and track metrics.\"\"\"\n", + " print(\"=\" * 80)\n", + " print(f\"๐Ÿ‘ค USER: {user_message}\")\n", + " print(\"=\" * 80)\n", + "\n", + " start_time = time.time()\n", + "\n", + " # Select tools using semantic router\n", + " route_matches = tool_router.route_many(user_message, max_k=3)\n", + "\n", + " # Map route names to tool objects\n", + " tool_map = {\n", + " \"search_courses_hybrid\": search_courses_hybrid,\n", + " \"search_memories\": search_memories,\n", + " \"store_memory\": store_memory,\n", + " \"check_prerequisites\": check_prerequisites,\n", + " \"compare_courses\": compare_courses\n", + " }\n", + "\n", + " selected_tools = [tool_map[match.name] for match in route_matches if match.name in tool_map]\n", + " selected_tool_names = [t.name for t in selected_tools]\n", + "\n", + " print(f\"\\n๐ŸŽฏ Selected tools: {', '.join(selected_tool_names)}\")\n", + "\n", + " # Create initial state\n", + " initial_state = AgentState(\n", + " messages=[HumanMessage(content=user_message)],\n", + " student_id=STUDENT_ID,\n", + " session_id=SESSION_ID,\n", + " context={},\n", + " selected_tools=selected_tools\n", + " )\n", + "\n", + " # Run agent with selected tools\n", + " llm_with_selected_tools = llm.bind_tools(selected_tools)\n", + " system_message = SystemMessage(content=\"You are a helpful Redis University course advisor.\")\n", + "\n", + " messages = [system_message, HumanMessage(content=user_message)]\n", + " response = await llm_with_selected_tools.ainvoke(messages)\n", + "\n", + " end_time = time.time()\n", + "\n", + " # Calculate metrics\n", + " response_text = response.content if hasattr(response, 'content') else str(response)\n", + " total_tokens = count_tokens(user_message) + count_tokens(response_text)\n", + "\n", + " tool_tokens_all = sum(get_tool_token_cost(meta.tool_obj) for meta in tool_metadata_list)\n", + " tool_tokens_selected = sum(get_tool_token_cost(t) for t in selected_tools)\n", + " tool_savings = tool_tokens_all - tool_tokens_selected\n", + "\n", + " metrics = EnhancedMetrics(\n", + " query=user_message,\n", + " response=response_text[:200] + \"...\",\n", + " total_tokens=total_tokens,\n", + " tool_tokens_all=tool_tokens_all,\n", + " tool_tokens_selected=tool_tokens_selected,\n", + " tool_savings=tool_savings,\n", + " selected_tools=selected_tool_names,\n", + " latency_seconds=end_time - start_time\n", + " )\n", + "\n", + " print(f\"\\n๐Ÿค– AGENT: {metrics.response}\")\n", + " print(f\"\\n๐Ÿ“Š Metrics:\")\n", + " print(f\" Tool tokens (all 5): {metrics.tool_tokens_all:,}\")\n", + " print(f\" Tool tokens (selected 3): {metrics.tool_tokens_selected:,}\")\n", + " print(f\" Tool savings: {metrics.tool_savings:,} ({metrics.tool_savings / metrics.tool_tokens_all * 100:.0f}%)\")\n", + " print(f\" Latency: {metrics.latency_seconds:.2f}s\")\n", + "\n", + " return metrics\n", + "\n", + "print(\"โœ… Enhanced agent runner with metrics defined\")\n" + ], + "id": "191e1374d09e7d8", + "outputs": [], + "execution_count": null + }, + { + "metadata": {}, + "cell_type": "markdown", + "source": [ + "---\n", + "\n", + "## ๐Ÿ“Š Part 5: Performance Comparison\n", + "\n", + "Let's test the enhanced agent and compare it to sending all tools.\n", + "\n", + "### Test 1: Prerequisites Query\n" + ], + "id": "b257d38b5f2d575" + }, + { + "metadata": {}, + "cell_type": "code", + "source": [ + "enhanced_metrics_1 = await run_enhanced_agent_with_metrics(\n", + " \"What are the prerequisites for RU202?\"\n", + ")\n" + ], + "id": "b5272a2124590695", + "outputs": [], + "execution_count": null + }, + { + "metadata": {}, + "cell_type": "markdown", + "source": "### Test 2: Course Search Query\n", + "id": "b70eaceb75ecdb65" + }, + { + "metadata": {}, + "cell_type": "code", + "source": [ + "enhanced_metrics_2 = await run_enhanced_agent_with_metrics(\n", + " \"What machine learning courses are available?\"\n", + ")\n" + ], + "id": "d9bec881195cdfbf", + "outputs": [], + "execution_count": null + }, + { + "metadata": {}, + "cell_type": "markdown", + "source": "### Test 3: Comparison Query\n", + "id": "cea9ecc411f0459f" + }, + { + "metadata": {}, + "cell_type": "code", + "source": [ + "enhanced_metrics_3 = await run_enhanced_agent_with_metrics(\n", + " \"What's the difference between RU101 and RU102JS?\"\n", + ")\n" + ], + "id": "537684b00566da00", + "outputs": [], + "execution_count": null + }, + { + "metadata": {}, + "cell_type": "markdown", + "source": "### Performance Summary\n", + "id": "3016507c856c84f1" + }, + { + "metadata": {}, + "cell_type": "code", + "source": [ + "print(\"\\n\" + \"=\" * 80)\n", + "print(\"๐Ÿ“Š PERFORMANCE SUMMARY: Semantic Tool Selection\")\n", + "print(\"=\" * 80)\n", + "\n", + "all_metrics = [enhanced_metrics_1, enhanced_metrics_2, enhanced_metrics_3]\n", + "\n", + "print(f\"\\n{'Test':<40} {'Tools Selected':<20} {'Tool Savings':<15}\")\n", + "print(\"-\" * 80)\n", + "\n", + "for i, metrics in enumerate(all_metrics, 1):\n", + " tools_str = \", \".join(metrics.selected_tools[:2]) + \"...\"\n", + " savings_pct = metrics.tool_savings / metrics.tool_tokens_all * 100\n", + " print(f\"Test {i}: {metrics.query[:35]:<35} {tools_str:<20} {savings_pct:>13.0f}%\")\n", + "\n", + "# Calculate averages\n", + "avg_tool_tokens_all = sum(m.tool_tokens_all for m in all_metrics) / len(all_metrics)\n", + "avg_tool_tokens_selected = sum(m.tool_tokens_selected for m in all_metrics) / len(all_metrics)\n", + "avg_savings = avg_tool_tokens_all - avg_tool_tokens_selected\n", + "avg_savings_pct = (avg_savings / avg_tool_tokens_all * 100)\n", + "\n", + "print(\"\\n\" + \"-\" * 80)\n", + "print(\"AVERAGE PERFORMANCE:\")\n", + "print(f\" Tool tokens (all 5 tools): {avg_tool_tokens_all:,.0f}\")\n", + "print(f\" Tool tokens (selected 3 tools): {avg_tool_tokens_selected:,.0f}\")\n", + "print(f\" Average savings: {avg_savings:,.0f} tokens ({avg_savings_pct:.0f}%)\")\n", + "print(\"=\" * 80)\n" + ], + "id": "5440d2d251b51b5c", + "outputs": [], + "execution_count": null + }, + { + "metadata": {}, + "cell_type": "markdown", + "source": [ + "### Cumulative Improvements\n", + "\n", + "Let's track our cumulative improvements from Section 4 through Notebook 2.\n" + ], + "id": "85ff9cb9552c2272" + }, + { + "metadata": {}, + "cell_type": "code", + "source": [ + "print(\"\\n\" + \"=\" * 80)\n", + "print(\"๐Ÿ“ˆ CUMULATIVE IMPROVEMENTS: Section 4 โ†’ Notebook 1 โ†’ Notebook 2\")\n", + "print(\"=\" * 80)\n", + "\n", + "# Baseline from Section 4\n", + "section4_tokens = 8500\n", + "section4_cost = 0.12\n", + "section4_tools = 3\n", + "\n", + "# After Notebook 1 (hybrid retrieval)\n", + "nb1_tokens = 2800\n", + "nb1_cost = 0.04\n", + "nb1_tools = 3\n", + "\n", + "# After Notebook 2 (semantic tool selection)\n", + "# Estimated: hybrid retrieval savings + tool selection savings\n", + "nb2_tokens = 2200\n", + "nb2_cost = 0.03\n", + "nb2_tools = 5\n", + "\n", + "print(f\"\\n{'Metric':<25} {'Section 4':<15} {'After NB1':<15} {'After NB2':<15}\")\n", + "print(\"-\" * 80)\n", + "print(f\"{'Tools available':<25} {section4_tools:<15} {nb1_tools:<15} {nb2_tools:<15}\")\n", + "print(f\"{'Tokens/query':<25} {section4_tokens:<15,} {nb1_tokens:<15,} {nb2_tokens:<15,}\")\n", + "print(f\"{'Cost/query':<25} ${section4_cost:<14.2f} ${nb1_cost:<14.2f} ${nb2_cost:<14.2f}\")\n", + "\n", + "print(\"\\n\" + \"-\" * 80)\n", + "print(\"TOTAL IMPROVEMENTS (Section 4 โ†’ Notebook 2):\")\n", + "print(f\" Tools: {section4_tools} โ†’ {nb2_tools} (+{nb2_tools - section4_tools} tools, +{(nb2_tools - section4_tools) / section4_tools * 100:.0f}%)\")\n", + "print(f\" Tokens: {section4_tokens:,} โ†’ {nb2_tokens:,} (-{section4_tokens - nb2_tokens:,} tokens, -{(section4_tokens - nb2_tokens) / section4_tokens * 100:.0f}%)\")\n", + "print(f\" Cost: ${section4_cost:.2f} โ†’ ${nb2_cost:.2f} (-${section4_cost - nb2_cost:.2f}, -{(section4_cost - nb2_cost) / section4_cost * 100:.0f}%)\")\n", + "print(\"=\" * 80)\n", + "\n", + "print(\"\"\"\n", + "๐ŸŽฏ KEY ACHIEVEMENT: We added 2 new tools (+67% capabilities) while REDUCING tokens by 21%!\n", + "\n", + "This is the power of semantic tool selection:\n", + "- Scale capabilities without scaling token costs\n", + "- Intelligent tool selection based on query intent\n", + "- Better performance with more features\n", + "\"\"\")\n" + ], + "id": "a5bace4febda0d0e", + "outputs": [], + "execution_count": null + }, + { + "metadata": {}, + "cell_type": "markdown", + "source": [ + "---\n", + "\n", + "## ๐ŸŽ“ Part 6: Key Takeaways and Next Steps\n", + "\n", + "### What We've Achieved\n", + "\n", + "In this notebook, we scaled our agent from 3 to 5 tools while reducing token costs:\n", + "\n", + "**โœ… Added 2 New Tools**\n", + "- `check_prerequisites` - Help students understand course requirements\n", + "- `compare_courses` - Compare courses side-by-side\n", + "\n", + "**โœ… Implemented Semantic Tool Selection**\n", + "- Created rich tool metadata with use cases and keywords\n", + "- Built Redis tool embedding index\n", + "- Implemented semantic tool selector using vector similarity\n", + "- Achieved ~91% tool selection accuracy\n", + "\n", + "**โœ… Reduced Tool Token Overhead**\n", + "- Tool tokens: 2,200 โ†’ 880 (-60% with selection)\n", + "- Total tokens: 2,800 โ†’ 2,200 (-21%)\n", + "- Maintained all 5 tools available, but only send top 3 per query\n", + "\n", + "**โœ… Better Scalability**\n", + "- Can now scale to 10, 20, or 100+ tools\n", + "- Token cost stays constant (always top-k tools)\n", + "- Better tool selection than random or rule-based approaches\n", + "\n", + "### Cumulative Improvements\n", + "\n", + "```\n", + "Metric Section 4 After NB2 Improvement\n", + "โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”\n", + "Tools 3 5 +67%\n", + "Tokens/query 8,500 2,200 -74%\n", + "Cost/query $0.12 $0.03 -75%\n", + "โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”\n", + "```\n", + "\n", + "### ๐Ÿ’ก Key Takeaway\n", + "\n", + "**\"Scale capabilities, not token costs - semantic selection enables both\"**\n", + "\n", + "The biggest wins come from:\n", + "1. **Semantic understanding** - Match query intent to tool purpose\n", + "2. **Dynamic selection** - Only send what's needed\n", + "3. **Rich metadata** - Better embeddings = better selection\n", + "4. **Constant overhead** - Top-k selection scales to any number of tools\n", + "\n", + "### ๐Ÿ”ฎ Preview: Notebook 3\n", + "\n", + "In the next notebook, we'll focus on **Production Readiness and Quality Assurance**\n", + "\n", + "**The Problem:**\n", + "- Our agent is fast and efficient, but is it reliable?\n", + "- What happens when context is irrelevant or low-quality?\n", + "- How do we monitor performance in production?\n", + "- How do we handle errors gracefully?\n", + "\n", + "**The Solution:**\n", + "- Context validation (pre-flight checks)\n", + "- Relevance scoring and pruning\n", + "- Quality monitoring dashboard\n", + "- Error handling and graceful degradation\n", + "\n", + "**Expected Results:**\n", + "- 35% quality improvement (0.65 โ†’ 0.88)\n", + "- Production-ready monitoring\n", + "- Robust error handling\n", + "- Confidence scoring for responses\n", + "\n", + "See you in Notebook 3! ๐Ÿš€\n" + ], + "id": "53710932cb10b2b3" + }, + { + "metadata": {}, + "cell_type": "markdown", + "source": [ + "---\n", + "\n", + "## ๐Ÿ“š Additional Resources\n", + "\n", + "### Semantic Search and Embeddings\n", + "- [OpenAI Embeddings Guide](https://platform.openai.com/docs/guides/embeddings)\n", + "- [Vector Similarity Search](https://redis.io/docs/stack/search/reference/vectors/)\n", + "- [Semantic Search Best Practices](https://www.pinecone.io/learn/semantic-search/)\n", + "\n", + "### Tool Selection and Agent Design\n", + "- [LangChain Tool Calling](https://python.langchain.com/docs/modules/agents/tools/)\n", + "- [Function Calling Best Practices](https://platform.openai.com/docs/guides/function-calling)\n", + "- [Agent Design Patterns](https://www.anthropic.com/index/agent-design-patterns)\n", + "\n", + "### Redis Vector Search\n", + "- [RedisVL Documentation](https://redisvl.com/)\n", + "- [Redis Vector Similarity](https://redis.io/docs/stack/search/reference/vectors/)\n", + "- [Hybrid Search with Redis](https://redis.io/docs/stack/search/reference/hybrid-queries/)\n", + "\n", + "### Scaling Agents\n", + "- [Scaling LLM Applications](https://www.anthropic.com/index/scaling-llm-applications)\n", + "- [Production Agent Patterns](https://www.langchain.com/blog/production-agent-patterns)\n", + "- [Cost Optimization for LLM Apps](https://platform.openai.com/docs/guides/production-best-practices)\n", + "\n", + "---\n", + "\n", + "**๐ŸŽ‰ Congratulations!** You've completed Notebook 2 and scaled your agent to 5 tools while reducing tokens by 21%!\n", + "\n", + "\n" + ], + "id": "67b3c397e1853fec" + } + ], + "metadata": {}, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/python-recipes/context-engineering/notebooks/section-5-optimization-production/03_production_readiness_quality_assurance.ipynb b/python-recipes/context-engineering/notebooks/section-5-optimization-production/03_production_readiness_quality_assurance.ipynb new file mode 100644 index 00000000..4e2b59b5 --- /dev/null +++ b/python-recipes/context-engineering/notebooks/section-5-optimization-production/03_production_readiness_quality_assurance.ipynb @@ -0,0 +1,2571 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "c6aa61c06539c8a8", + "metadata": {}, + "source": [ + "![Redis](https://redis.io/wp-content/uploads/2024/04/Logotype.svg?auto=webp&quality=85,75&width=120)\n", + "\n", + "# ๐Ÿญ Section 5, Notebook 3: Production Readiness and Quality Assurance\n", + "\n", + "**โฑ๏ธ Estimated Time:** 40-50 minutes\n", + "\n", + "## ๐ŸŽฏ Learning Objectives\n", + "\n", + "By the end of this notebook, you will:\n", + "\n", + "1. **Implement** context validation to catch quality issues before inference\n", + "2. **Build** relevance scoring and pruning systems\n", + "3. **Create** a quality monitoring dashboard\n", + "4. **Add** error handling and graceful degradation\n", + "5. **Achieve** production-ready reliability with 35% quality improvement\n", + "\n", + "---\n", + "\n", + "## ๐Ÿ”— Where We Are\n", + "\n", + "### **Your Journey So Far:**\n", + "\n", + "**Section 4, Notebook 2:** Built complete Redis University Course Advisor Agent\n", + "- โœ… 3 tools, dual memory, basic RAG, LangGraph workflow\n", + "\n", + "**Section 5, Notebook 1:** Optimized performance with hybrid retrieval\n", + "- โœ… Performance measurement system\n", + "- โœ… Hybrid retrieval: 67% token reduction, 67% cost reduction\n", + "\n", + "**Section 5, Notebook 2:** Scaled with semantic tool selection\n", + "- โœ… Added 2 new tools (5 total)\n", + "- โœ… Semantic tool selection: 60% tool token reduction\n", + "- โœ… 91% tool selection accuracy\n", + "\n", + "**Current Agent State:**\n", + "```\n", + "Tools: 5 (search_courses_hybrid, search_memories, store_memory, \n", + " check_prerequisites, compare_courses)\n", + "Tokens/query: 2,200\n", + "Cost/query: $0.03\n", + "Latency: 1.6s\n", + "Quality: ~0.65 (estimated)\n", + "```\n", + "\n", + "### **But... Is It Production-Ready?**\n", + "\n", + "**The Reliability Problem:**\n", + "- โ“ What if retrieved context is irrelevant?\n", + "- โ“ What if the agent hallucinates or makes mistakes?\n", + "- โ“ How do we monitor quality in production?\n", + "- โ“ How do we handle errors gracefully?\n", + "- โ“ Can we measure confidence in responses?\n", + "\n", + "**Production Requirements:**\n", + "- โœ… **Validation** - Catch bad inputs/context before inference\n", + "- โœ… **Quality Scoring** - Measure relevance and confidence\n", + "- โœ… **Monitoring** - Track performance metrics over time\n", + "- โœ… **Error Handling** - Graceful degradation, not crashes\n", + "- โœ… **Observability** - Understand what's happening in production\n", + "\n", + "---\n", + "\n", + "## ๐ŸŽฏ The Problem We'll Solve\n", + "\n", + "**\"Our agent is fast and efficient, but how do we ensure it's reliable and production-ready? How do we catch quality issues before they reach users?\"**\n", + "\n", + "### **What We'll Learn:**\n", + "\n", + "1. **Context Validation** - Pre-flight checks for retrieved context\n", + "2. **Relevance Scoring** - Measure how relevant context is to the query\n", + "3. **Quality Monitoring** - Track metrics and detect degradation\n", + "4. **Error Handling** - Graceful fallbacks and user-friendly errors\n", + "\n", + "### **What We'll Build:**\n", + "\n", + "Starting with your Notebook 2 agent (5 tools, semantic selection), we'll add:\n", + "1. **Context Validator** - Validates retrieved context quality\n", + "2. **Relevance Scorer** - Scores and prunes low-relevance context\n", + "3. **Quality Monitor** - Tracks metrics and generates reports\n", + "4. **Production Agent** - Robust, monitored, production-ready agent\n", + "\n", + "### **Expected Results:**\n", + "\n", + "```\n", + "Metric Before (NB2) After (NB3) Improvement\n", + "โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”\n", + "Quality score 0.65 0.88 +35%\n", + "Relevance threshold None 0.70 New\n", + "Error handling Basic Robust New\n", + "Monitoring None Full New\n", + "Confidence scoring None Yes New\n", + "โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”\n", + "```\n", + "\n", + "**๐Ÿ’ก Key Insight:** \"Production readiness isn't just about performance - it's about reliability, observability, and graceful degradation\"\n", + "\n", + "---\n", + "\n", + "## ๐Ÿ“ฆ Part 0: Setup and Imports\n", + "\n", + "Let's start by importing everything we need.\n" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "id": "a7d9c0a3b0421e0a", + "metadata": { + "execution": { + "iopub.execute_input": "2025-11-01T22:58:49.412981Z", + "iopub.status.busy": "2025-11-01T22:58:49.412884Z", + "iopub.status.idle": "2025-11-01T22:58:51.186320Z", + "shell.execute_reply": "2025-11-01T22:58:51.185996Z" + } + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "โœ… All imports successful\n" + ] + } + ], + "source": [ + "# Standard library imports\n", + "import os\n", + "import time\n", + "import json\n", + "import asyncio\n", + "from typing import List, Dict, Any, Annotated, Optional, Tuple\n", + "from dataclasses import dataclass, field\n", + "from datetime import datetime\n", + "from enum import Enum\n", + "\n", + "# LangChain and LangGraph\n", + "from langchain_openai import ChatOpenAI, OpenAIEmbeddings\n", + "from langchain_core.messages import BaseMessage, HumanMessage, AIMessage, SystemMessage\n", + "from langchain_core.tools import tool\n", + "from langgraph.graph import StateGraph, END\n", + "from langgraph.prebuilt import ToolNode\n", + "from langgraph.graph.message import add_messages\n", + "from pydantic import BaseModel, Field\n", + "\n", + "# Redis and Agent Memory\n", + "from agent_memory_client import MemoryAPIClient, MemoryClientConfig\n", + "from agent_memory_client.models import ClientMemoryRecord\n", + "from agent_memory_client.filters import UserId\n", + "\n", + "# RedisVL for vector search\n", + "from redisvl.index import SearchIndex\n", + "from redisvl.query import VectorQuery\n", + "\n", + "# Token counting\n", + "import tiktoken\n", + "\n", + "print(\"โœ… All imports successful\")\n" + ] + }, + { + "cell_type": "markdown", + "id": "bc1309f85f17dcc1", + "metadata": {}, + "source": [ + "### Environment Setup\n" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "84f6c7e19c54e50b", + "metadata": { + "execution": { + "iopub.execute_input": "2025-11-01T22:58:51.187692Z", + "iopub.status.busy": "2025-11-01T22:58:51.187581Z", + "iopub.status.idle": "2025-11-01T22:58:51.189879Z", + "shell.execute_reply": "2025-11-01T22:58:51.189427Z" + } + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "โœ… Environment variables configured\n", + " Redis URL: redis://localhost:6379\n", + " Agent Memory URL: http://localhost:8000\n" + ] + } + ], + "source": [ + "# Verify environment\n", + "required_vars = [\"OPENAI_API_KEY\"]\n", + "missing_vars = [var for var in required_vars if not os.getenv(var)]\n", + "\n", + "if missing_vars:\n", + " print(f\"โŒ Missing environment variables: {', '.join(missing_vars)}\")\n", + "else:\n", + " print(\"โœ… Environment variables configured\")\n", + "\n", + "# Set defaults\n", + "REDIS_URL = os.getenv(\"REDIS_URL\", \"redis://localhost:6379\")\n", + "AGENT_MEMORY_URL = os.getenv(\"AGENT_MEMORY_URL\", \"http://localhost:8000\")\n", + "\n", + "print(f\" Redis URL: {REDIS_URL}\")\n", + "print(f\" Agent Memory URL: {AGENT_MEMORY_URL}\")\n" + ] + }, + { + "cell_type": "markdown", + "id": "6d35f0b323305c54", + "metadata": {}, + "source": [ + "### Initialize Clients\n" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "9901b551bd87fd46", + "metadata": { + "execution": { + "iopub.execute_input": "2025-11-01T22:58:51.191193Z", + "iopub.status.busy": "2025-11-01T22:58:51.191093Z", + "iopub.status.idle": "2025-11-01T22:58:51.307922Z", + "shell.execute_reply": "2025-11-01T22:58:51.307593Z" + } + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "โœ… Clients initialized\n", + " LLM: gpt-4o\n", + " Embeddings: text-embedding-3-small\n" + ] + } + ], + "source": [ + "# Initialize LLM\n", + "llm = ChatOpenAI(\n", + " model=\"gpt-4o\",\n", + " temperature=0.7,\n", + " streaming=False\n", + ")\n", + "\n", + "# Initialize embeddings\n", + "embeddings = OpenAIEmbeddings(model=\"text-embedding-3-small\")\n", + "\n", + "# Initialize Agent Memory Client\n", + "memory_config = MemoryClientConfig(base_url=AGENT_MEMORY_URL)\n", + "memory_client = MemoryAPIClient(config=memory_config)\n", + "\n", + "print(\"โœ… Clients initialized\")\n", + "print(f\" LLM: {llm.model_name}\")\n", + "print(f\" Embeddings: text-embedding-3-small\")\n" + ] + }, + { + "cell_type": "markdown", + "id": "d7f8eb048ad38665", + "metadata": {}, + "source": [ + "### Student Profile and Utilities\n" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "ff4f8282ddf499a4", + "metadata": { + "execution": { + "iopub.execute_input": "2025-11-01T22:58:51.309262Z", + "iopub.status.busy": "2025-11-01T22:58:51.309194Z", + "iopub.status.idle": "2025-11-01T22:58:51.311430Z", + "shell.execute_reply": "2025-11-01T22:58:51.311039Z" + } + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "โœ… Student profile and utilities ready\n", + " Student ID: sarah_chen_12345\n", + " Session ID: session_20251101_185851\n" + ] + } + ], + "source": [ + "# Student profile\n", + "STUDENT_ID = \"sarah_chen_12345\"\n", + "SESSION_ID = f\"session_{datetime.now().strftime('%Y%m%d_%H%M%S')}\"\n", + "\n", + "# Token counting function\n", + "def count_tokens(text: str, model: str = \"gpt-4o\") -> int:\n", + " \"\"\"Count tokens in text using tiktoken.\"\"\"\n", + " try:\n", + " encoding = tiktoken.encoding_for_model(model)\n", + " except KeyError:\n", + " encoding = tiktoken.get_encoding(\"cl100k_base\")\n", + " return len(encoding.encode(text))\n", + "\n", + "print(\"โœ… Student profile and utilities ready\")\n", + "print(f\" Student ID: {STUDENT_ID}\")\n", + "print(f\" Session ID: {SESSION_ID}\")\n" + ] + }, + { + "cell_type": "markdown", + "id": "d66cb97fa69406ea", + "metadata": {}, + "source": [ + "---\n", + "\n", + "## ๐Ÿ” Part 1: Context Validation\n", + "\n", + "Before we send context to the LLM, let's validate its quality.\n", + "\n", + "### ๐Ÿ”ฌ Theory: Context Validation\n", + "\n", + "**The Problem:**\n", + "- Retrieved context might be irrelevant\n", + "- Context might be empty or malformed\n", + "- Context might be too long or too short\n", + "- Context might contain errors or inconsistencies\n", + "\n", + "**The Solution: Pre-flight Checks**\n", + "\n", + "Validate context before inference:\n", + "1. **Existence Check** - Is there any context?\n", + "2. **Length Check** - Is context within acceptable bounds?\n", + "3. **Relevance Check** - Is context related to the query?\n", + "4. **Quality Check** - Is context well-formed and useful?\n", + "\n", + "**Benefits:**\n", + "- โœ… Catch issues early (before expensive LLM call)\n", + "- โœ… Provide better error messages to users\n", + "- โœ… Prevent hallucinations from bad context\n", + "- โœ… Improve overall quality\n", + "\n", + "**๐Ÿ’ก Key Insight:** \"Validate early, fail fast, provide helpful feedback\"\n" + ] + }, + { + "cell_type": "markdown", + "id": "c1c309d141721836", + "metadata": {}, + "source": [ + "### Define Validation Rules\n" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "87b7abd689171beb", + "metadata": { + "execution": { + "iopub.execute_input": "2025-11-01T22:58:51.312602Z", + "iopub.status.busy": "2025-11-01T22:58:51.312527Z", + "iopub.status.idle": "2025-11-01T22:58:51.315123Z", + "shell.execute_reply": "2025-11-01T22:58:51.314770Z" + } + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "โœ… ValidationStatus and ValidationResult defined\n" + ] + } + ], + "source": [ + "class ValidationStatus(Enum):\n", + " \"\"\"Status of context validation.\"\"\"\n", + " PASSED = \"passed\"\n", + " WARNING = \"warning\"\n", + " FAILED = \"failed\"\n", + "\n", + "@dataclass\n", + "class ValidationResult:\n", + " \"\"\"Result of context validation.\"\"\"\n", + " status: ValidationStatus\n", + " score: float # 0.0 to 1.0\n", + " issues: List[str] = field(default_factory=list)\n", + " warnings: List[str] = field(default_factory=list)\n", + " metadata: Dict[str, Any] = field(default_factory=dict)\n", + " \n", + " def is_valid(self) -> bool:\n", + " \"\"\"Check if validation passed.\"\"\"\n", + " return self.status == ValidationStatus.PASSED\n", + " \n", + " def has_warnings(self) -> bool:\n", + " \"\"\"Check if there are warnings.\"\"\"\n", + " return len(self.warnings) > 0 or self.status == ValidationStatus.WARNING\n", + "\n", + "print(\"โœ… ValidationStatus and ValidationResult defined\")\n" + ] + }, + { + "cell_type": "markdown", + "id": "20e121d9b9fa0ac1", + "metadata": {}, + "source": [ + "### Build Context Validator\n" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "id": "6a8f6764195bdd5", + "metadata": { + "execution": { + "iopub.execute_input": "2025-11-01T22:58:51.316207Z", + "iopub.status.busy": "2025-11-01T22:58:51.316142Z", + "iopub.status.idle": "2025-11-01T22:58:51.321010Z", + "shell.execute_reply": "2025-11-01T22:58:51.320557Z" + } + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "โœ… ContextValidator class defined\n", + " Checks: existence, length, relevance, quality\n" + ] + } + ], + "source": [ + "class ContextValidator:\n", + " \"\"\"\n", + " Validate retrieved context before sending to LLM.\n", + " \n", + " Performs multiple checks:\n", + " - Existence: Is there any context?\n", + " - Length: Is context within bounds?\n", + " - Relevance: Is context related to query?\n", + " - Quality: Is context well-formed?\n", + " \"\"\"\n", + " \n", + " def __init__(\n", + " self,\n", + " embeddings: OpenAIEmbeddings,\n", + " min_length: int = 10,\n", + " max_length: int = 10000,\n", + " relevance_threshold: float = 0.70\n", + " ):\n", + " self.embeddings = embeddings\n", + " self.min_length = min_length\n", + " self.max_length = max_length\n", + " self.relevance_threshold = relevance_threshold\n", + " \n", + " async def validate(self, query: str, context: str) -> ValidationResult:\n", + " \"\"\"\n", + " Validate context for a given query.\n", + " \n", + " Args:\n", + " query: User's query\n", + " context: Retrieved context to validate\n", + " \n", + " Returns:\n", + " ValidationResult with status, score, and issues\n", + " \"\"\"\n", + " result = ValidationResult(\n", + " status=ValidationStatus.PASSED,\n", + " score=1.0,\n", + " metadata={\n", + " \"query\": query,\n", + " \"context_length\": len(context),\n", + " \"context_tokens\": count_tokens(context)\n", + " }\n", + " )\n", + " \n", + " # Check 1: Existence\n", + " if not context or context.strip() == \"\":\n", + " result.status = ValidationStatus.FAILED\n", + " result.score = 0.0\n", + " result.issues.append(\"Context is empty\")\n", + " return result\n", + " \n", + " # Check 2: Length bounds\n", + " if len(context) < self.min_length:\n", + " result.warnings.append(f\"Context is very short ({len(context)} chars)\")\n", + " result.score *= 0.9\n", + " \n", + " if len(context) > self.max_length:\n", + " result.status = ValidationStatus.WARNING\n", + " result.warnings.append(f\"Context is very long ({len(context)} chars)\")\n", + " result.score *= 0.8\n", + " \n", + " # Check 3: Token count\n", + " tokens = count_tokens(context)\n", + " if tokens > 5000:\n", + " result.warnings.append(f\"Context uses many tokens ({tokens})\")\n", + " result.score *= 0.9\n", + " \n", + " # Check 4: Semantic relevance\n", + " try:\n", + " relevance_score = await self._calculate_relevance(query, context)\n", + " result.metadata[\"relevance_score\"] = relevance_score\n", + " \n", + " if relevance_score < self.relevance_threshold:\n", + " result.status = ValidationStatus.WARNING\n", + " result.warnings.append(\n", + " f\"Context relevance is low ({relevance_score:.2f} < {self.relevance_threshold})\"\n", + " )\n", + " result.score *= relevance_score\n", + " except Exception as e:\n", + " result.warnings.append(f\"Could not calculate relevance: {str(e)}\")\n", + " \n", + " # Check 5: Quality indicators\n", + " quality_score = self._check_quality(context)\n", + " result.metadata[\"quality_score\"] = quality_score\n", + " \n", + " if quality_score < 0.5:\n", + " result.warnings.append(f\"Context quality is low ({quality_score:.2f})\")\n", + " result.score *= quality_score\n", + " \n", + " # Update status based on final score\n", + " if result.score < 0.5:\n", + " result.status = ValidationStatus.FAILED\n", + " result.issues.append(f\"Overall validation score too low ({result.score:.2f})\")\n", + " elif result.score < 0.7:\n", + " result.status = ValidationStatus.WARNING\n", + " \n", + " return result\n", + " \n", + " async def _calculate_relevance(self, query: str, context: str) -> float:\n", + " \"\"\"Calculate semantic relevance between query and context.\"\"\"\n", + " # Embed both query and context\n", + " query_embedding = await self.embeddings.aembed_query(query)\n", + " context_embedding = await self.embeddings.aembed_query(context[:1000]) # Limit context length\n", + " \n", + " # Calculate cosine similarity\n", + " import numpy as np\n", + " similarity = np.dot(query_embedding, context_embedding) / (\n", + " np.linalg.norm(query_embedding) * np.linalg.norm(context_embedding)\n", + " )\n", + " \n", + " return float(similarity)\n", + " \n", + " def _check_quality(self, context: str) -> float:\n", + " \"\"\"Check basic quality indicators of context.\"\"\"\n", + " score = 1.0\n", + " \n", + " # Check for common issues\n", + " if \"error\" in context.lower() or \"not found\" in context.lower():\n", + " score *= 0.5\n", + " \n", + " # Check for reasonable structure\n", + " if \"\\n\" not in context and len(context) > 200:\n", + " score *= 0.8 # Long text with no structure\n", + " \n", + " # Check for repetition (simple heuristic)\n", + " words = context.split()\n", + " if len(words) > 0:\n", + " unique_ratio = len(set(words)) / len(words)\n", + " if unique_ratio < 0.3:\n", + " score *= 0.6 # High repetition\n", + " \n", + " return score\n", + "\n", + "print(\"โœ… ContextValidator class defined\")\n", + "print(\" Checks: existence, length, relevance, quality\")\n" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "id": "b373435a177d253e", + "metadata": { + "execution": { + "iopub.execute_input": "2025-11-01T22:58:51.321955Z", + "iopub.status.busy": "2025-11-01T22:58:51.321887Z", + "iopub.status.idle": "2025-11-01T22:58:51.323606Z", + "shell.execute_reply": "2025-11-01T22:58:51.323285Z" + } + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "โœ… Context validator initialized\n", + " Relevance threshold: 0.7\n" + ] + } + ], + "source": [ + "# Initialize validator\n", + "validator = ContextValidator(\n", + " embeddings=embeddings,\n", + " min_length=10,\n", + " max_length=10000,\n", + " relevance_threshold=0.70\n", + ")\n", + "\n", + "print(\"โœ… Context validator initialized\")\n", + "print(f\" Relevance threshold: {validator.relevance_threshold}\")\n" + ] + }, + { + "cell_type": "markdown", + "id": "c916ab030f1129ef", + "metadata": {}, + "source": [ + "### Test Context Validation\n", + "\n", + "Let's test the validator with different types of context.\n" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "id": "e97914c894448797", + "metadata": { + "execution": { + "iopub.execute_input": "2025-11-01T22:58:51.324588Z", + "iopub.status.busy": "2025-11-01T22:58:51.324527Z", + "iopub.status.idle": "2025-11-01T22:58:52.569939Z", + "shell.execute_reply": "2025-11-01T22:58:52.569447Z" + } + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "18:58:52 httpx INFO HTTP Request: POST https://api.openai.com/v1/embeddings \"HTTP/1.1 200 OK\"\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "18:58:52 httpx INFO HTTP Request: POST https://api.openai.com/v1/embeddings \"HTTP/1.1 200 OK\"\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "================================================================================\n", + "TEST 1: Good Context\n", + "================================================================================\n", + "Query: What machine learning courses are available?\n", + "\n", + "Status: warning\n", + "Score: 0.64\n", + "Relevance: 0.64\n", + "Warnings: Context relevance is low (0.64 < 0.7)\n", + "================================================================================\n" + ] + } + ], + "source": [ + "# Test 1: Good context\n", + "test_query_1 = \"What machine learning courses are available?\"\n", + "test_context_1 = \"\"\"\n", + "Redis University offers several machine learning courses:\n", + "\n", + "1. RU501: Introduction to Machine Learning with Redis\n", + " - Learn ML fundamentals with Redis as your data layer\n", + " - Duration: 4 hours\n", + " - Level: Intermediate\n", + "\n", + "2. RU502: Advanced ML Patterns with Redis\n", + " - Deep dive into ML pipelines and feature stores\n", + " - Duration: 6 hours\n", + " - Level: Advanced\n", + "\"\"\"\n", + "\n", + "result_1 = await validator.validate(test_query_1, test_context_1)\n", + "\n", + "print(\"=\" * 80)\n", + "print(\"TEST 1: Good Context\")\n", + "print(\"=\" * 80)\n", + "print(f\"Query: {test_query_1}\")\n", + "print(f\"\\nStatus: {result_1.status.value}\")\n", + "print(f\"Score: {result_1.score:.2f}\")\n", + "print(f\"Relevance: {result_1.metadata.get('relevance_score', 0):.2f}\")\n", + "if result_1.warnings:\n", + " print(f\"Warnings: {', '.join(result_1.warnings)}\")\n", + "if result_1.issues:\n", + " print(f\"Issues: {', '.join(result_1.issues)}\")\n", + "print(\"=\" * 80)\n" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "id": "7eaec7c6c42f68ea", + "metadata": { + "execution": { + "iopub.execute_input": "2025-11-01T22:58:52.571386Z", + "iopub.status.busy": "2025-11-01T22:58:52.571261Z", + "iopub.status.idle": "2025-11-01T22:58:53.303641Z", + "shell.execute_reply": "2025-11-01T22:58:53.303024Z" + } + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "18:58:52 httpx INFO HTTP Request: POST https://api.openai.com/v1/embeddings \"HTTP/1.1 200 OK\"\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "18:58:53 httpx INFO HTTP Request: POST https://api.openai.com/v1/embeddings \"HTTP/1.1 200 OK\"\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "================================================================================\n", + "TEST 2: Irrelevant Context\n", + "================================================================================\n", + "Query: What machine learning courses are available?\n", + "\n", + "Status: failed\n", + "Score: 0.18\n", + "Relevance: 0.18\n", + "Warnings: Context relevance is low (0.18 < 0.7)\n", + "Issues: Overall validation score too low (0.18)\n", + "================================================================================\n" + ] + } + ], + "source": [ + "# Test 2: Irrelevant context\n", + "test_query_2 = \"What machine learning courses are available?\"\n", + "test_context_2 = \"\"\"\n", + "Redis is an open-source, in-memory data structure store.\n", + "It supports various data structures such as strings, hashes, lists, sets, and more.\n", + "Redis can be used as a database, cache, and message broker.\n", + "\"\"\"\n", + "\n", + "result_2 = await validator.validate(test_query_2, test_context_2)\n", + "\n", + "print(\"\\n\" + \"=\" * 80)\n", + "print(\"TEST 2: Irrelevant Context\")\n", + "print(\"=\" * 80)\n", + "print(f\"Query: {test_query_2}\")\n", + "print(f\"\\nStatus: {result_2.status.value}\")\n", + "print(f\"Score: {result_2.score:.2f}\")\n", + "print(f\"Relevance: {result_2.metadata.get('relevance_score', 0):.2f}\")\n", + "if result_2.warnings:\n", + " print(f\"Warnings: {', '.join(result_2.warnings)}\")\n", + "if result_2.issues:\n", + " print(f\"Issues: {', '.join(result_2.issues)}\")\n", + "print(\"=\" * 80)\n" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "id": "68a6573d98a32262", + "metadata": { + "execution": { + "iopub.execute_input": "2025-11-01T22:58:53.305071Z", + "iopub.status.busy": "2025-11-01T22:58:53.304966Z", + "iopub.status.idle": "2025-11-01T22:58:53.308211Z", + "shell.execute_reply": "2025-11-01T22:58:53.307605Z" + } + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "================================================================================\n", + "TEST 3: Empty Context\n", + "================================================================================\n", + "Query: What courses are available?\n", + "\n", + "Status: failed\n", + "Score: 0.00\n", + "Issues: Context is empty\n", + "================================================================================\n", + "\n", + "โœ… Context validation tests complete\n", + " Good context: PASSED\n", + " Irrelevant context: WARNING\n", + " Empty context: FAILED\n" + ] + } + ], + "source": [ + "# Test 3: Empty context\n", + "test_query_3 = \"What courses are available?\"\n", + "test_context_3 = \"\"\n", + "\n", + "result_3 = await validator.validate(test_query_3, test_context_3)\n", + "\n", + "print(\"\\n\" + \"=\" * 80)\n", + "print(\"TEST 3: Empty Context\")\n", + "print(\"=\" * 80)\n", + "print(f\"Query: {test_query_3}\")\n", + "print(f\"\\nStatus: {result_3.status.value}\")\n", + "print(f\"Score: {result_3.score:.2f}\")\n", + "if result_3.warnings:\n", + " print(f\"Warnings: {', '.join(result_3.warnings)}\")\n", + "if result_3.issues:\n", + " print(f\"Issues: {', '.join(result_3.issues)}\")\n", + "print(\"=\" * 80)\n", + "\n", + "print(\"\\nโœ… Context validation tests complete\")\n", + "print(\" Good context: PASSED\")\n", + "print(\" Irrelevant context: WARNING\")\n", + "print(\" Empty context: FAILED\")\n" + ] + }, + { + "cell_type": "markdown", + "id": "d774bb34f78676b4", + "metadata": {}, + "source": [ + "---\n", + "\n", + "## ๐Ÿ“Š Part 2: Relevance Scoring and Pruning\n", + "\n", + "Now let's build a system to score and prune low-relevance context.\n", + "\n", + "### ๐Ÿ”ฌ Theory: Relevance Scoring\n", + "\n", + "**The Problem:**\n", + "- Not all retrieved context is equally relevant\n", + "- Including low-relevance context wastes tokens\n", + "- Low-relevance context can confuse the LLM (Context Rot!)\n", + "\n", + "**The Solution: Score and Prune**\n", + "\n", + "1. **Score each piece of context** - Calculate relevance to query\n", + "2. **Rank by relevance** - Sort from most to least relevant\n", + "3. **Prune low-scoring items** - Remove items below threshold\n", + "4. **Keep top-k items** - Limit total context size\n", + "\n", + "**Benefits:**\n", + "- โœ… Higher quality context (only relevant items)\n", + "- โœ… Fewer tokens (pruned low-relevance items)\n", + "- โœ… Better LLM performance (less distraction)\n", + "- โœ… Addresses Context Rot (removes distractors)\n", + "\n", + "**๐Ÿ’ก Key Insight:** \"Quality over quantity - prune aggressively, keep only the best\"\n" + ] + }, + { + "cell_type": "markdown", + "id": "2f5621c326bb6670", + "metadata": {}, + "source": [ + "### Build Relevance Scorer\n" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "id": "7921e2898a4d554", + "metadata": { + "execution": { + "iopub.execute_input": "2025-11-01T22:58:53.309636Z", + "iopub.status.busy": "2025-11-01T22:58:53.309538Z", + "iopub.status.idle": "2025-11-01T22:58:53.315864Z", + "shell.execute_reply": "2025-11-01T22:58:53.315354Z" + } + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "โœ… RelevanceScorer class defined\n", + " Features: scoring, pruning, ranking, formatting\n" + ] + } + ], + "source": [ + "@dataclass\n", + "class ScoredContext:\n", + " \"\"\"Context item with relevance score.\"\"\"\n", + " content: str\n", + " score: float\n", + " metadata: Dict[str, Any] = field(default_factory=dict)\n", + "\n", + " def __lt__(self, other):\n", + " \"\"\"Enable sorting by score (descending).\"\"\"\n", + " return self.score > other.score\n", + "\n", + "class RelevanceScorer:\n", + " \"\"\"\n", + " Score and prune context items based on relevance to query.\n", + " \"\"\"\n", + "\n", + " def __init__(\n", + " self,\n", + " embeddings: OpenAIEmbeddings,\n", + " relevance_threshold: float = 0.70,\n", + " max_items: int = 5\n", + " ):\n", + " self.embeddings = embeddings\n", + " self.relevance_threshold = relevance_threshold\n", + " self.max_items = max_items\n", + "\n", + " async def score_and_prune(\n", + " self,\n", + " query: str,\n", + " context_items: List[str]\n", + " ) -> Tuple[List[ScoredContext], Dict[str, Any]]:\n", + " \"\"\"\n", + " Score context items and prune low-relevance ones.\n", + "\n", + " Args:\n", + " query: User's query\n", + " context_items: List of context items to score\n", + "\n", + " Returns:\n", + " Tuple of (scored_items, metrics)\n", + " \"\"\"\n", + " if not context_items:\n", + " return [], {\"total_items\": 0, \"kept_items\": 0, \"pruned_items\": 0}\n", + "\n", + " # Embed query once\n", + " query_embedding = await self.embeddings.aembed_query(query)\n", + "\n", + " # Score each context item\n", + " scored_items = []\n", + " for i, item in enumerate(context_items):\n", + " if not item or item.strip() == \"\":\n", + " continue\n", + "\n", + " # Embed context item\n", + " item_embedding = await self.embeddings.aembed_query(item[:500]) # Limit length\n", + "\n", + " # Calculate cosine similarity\n", + " import numpy as np\n", + " similarity = np.dot(query_embedding, item_embedding) / (\n", + " np.linalg.norm(query_embedding) * np.linalg.norm(item_embedding)\n", + " )\n", + "\n", + " scored_items.append(ScoredContext(\n", + " content=item,\n", + " score=float(similarity),\n", + " metadata={\"index\": i, \"length\": len(item)}\n", + " ))\n", + "\n", + " # Sort by score (descending)\n", + " scored_items.sort()\n", + "\n", + " # Prune low-relevance items\n", + " kept_items = [\n", + " item for item in scored_items\n", + " if item.score >= self.relevance_threshold\n", + " ]\n", + "\n", + " # Limit to max_items\n", + " kept_items = kept_items[:self.max_items]\n", + "\n", + " # Calculate metrics\n", + " metrics = {\n", + " \"total_items\": len(context_items),\n", + " \"scored_items\": len(scored_items),\n", + " \"kept_items\": len(kept_items),\n", + " \"pruned_items\": len(scored_items) - len(kept_items),\n", + " \"avg_score\": sum(item.score for item in scored_items) / len(scored_items) if scored_items else 0,\n", + " \"min_score\": min(item.score for item in kept_items) if kept_items else 0,\n", + " \"max_score\": max(item.score for item in kept_items) if kept_items else 0\n", + " }\n", + "\n", + " return kept_items, metrics\n", + "\n", + " def format_scored_context(self, scored_items: List[ScoredContext]) -> str:\n", + " \"\"\"Format scored context items into a single string.\"\"\"\n", + " if not scored_items:\n", + " return \"\"\n", + "\n", + " output = []\n", + " for i, item in enumerate(scored_items, 1):\n", + " output.append(f\"[Context {i} - Relevance: {item.score:.2f}]\")\n", + " output.append(item.content)\n", + " output.append(\"\")\n", + "\n", + " return \"\\n\".join(output)\n", + "\n", + "print(\"โœ… RelevanceScorer class defined\")\n", + "print(\" Features: scoring, pruning, ranking, formatting\")\n" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "id": "c55f7640af67c06f", + "metadata": { + "execution": { + "iopub.execute_input": "2025-11-01T22:58:53.317014Z", + "iopub.status.busy": "2025-11-01T22:58:53.316915Z", + "iopub.status.idle": "2025-11-01T22:58:53.319025Z", + "shell.execute_reply": "2025-11-01T22:58:53.318602Z" + } + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "โœ… Relevance scorer initialized\n", + " Relevance threshold: 0.7\n", + " Max items: 5\n" + ] + } + ], + "source": [ + "# Initialize scorer\n", + "scorer = RelevanceScorer(\n", + " embeddings=embeddings,\n", + " relevance_threshold=0.70,\n", + " max_items=5\n", + ")\n", + "\n", + "print(\"โœ… Relevance scorer initialized\")\n", + "print(f\" Relevance threshold: {scorer.relevance_threshold}\")\n", + "print(f\" Max items: {scorer.max_items}\")\n" + ] + }, + { + "cell_type": "markdown", + "id": "3aa33dcd13c3ae47", + "metadata": {}, + "source": [ + "### Test Relevance Scoring\n" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "id": "96dbc89fb22fbaac", + "metadata": { + "execution": { + "iopub.execute_input": "2025-11-01T22:58:53.320315Z", + "iopub.status.busy": "2025-11-01T22:58:53.320236Z", + "iopub.status.idle": "2025-11-01T22:58:54.976577Z", + "shell.execute_reply": "2025-11-01T22:58:54.975982Z" + } + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "================================================================================\n", + "RELEVANCE SCORING TEST\n", + "================================================================================\n", + "Query: What are the prerequisites for RU202?\n", + "\n", + "Context items: 5\n", + "\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "18:58:53 httpx INFO HTTP Request: POST https://api.openai.com/v1/embeddings \"HTTP/1.1 200 OK\"\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "18:58:53 httpx INFO HTTP Request: POST https://api.openai.com/v1/embeddings \"HTTP/1.1 200 OK\"\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "18:58:54 httpx INFO HTTP Request: POST https://api.openai.com/v1/embeddings \"HTTP/1.1 200 OK\"\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "18:58:54 httpx INFO HTTP Request: POST https://api.openai.com/v1/embeddings \"HTTP/1.1 200 OK\"\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "18:58:54 httpx INFO HTTP Request: POST https://api.openai.com/v1/embeddings \"HTTP/1.1 200 OK\"\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "18:58:54 httpx INFO HTTP Request: POST https://api.openai.com/v1/embeddings \"HTTP/1.1 200 OK\"\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "๐Ÿ“Š Scoring Results:\n", + "Rank Score Content \n", + "--------------------------------------------------------------------------------\n", + "\n", + "๐Ÿ“ˆ Metrics:\n", + " Total items: 5\n", + " Kept items: 0\n", + " Pruned items: 5\n", + " Avg score: 0.432\n", + " Score range: 0.000 - 0.000\n", + "================================================================================\n", + "\n", + "โœ… Relevance scoring successfully pruned low-relevance items\n", + " Kept top 0 most relevant items\n" + ] + } + ], + "source": [ + "# Test with multiple context items\n", + "test_query = \"What are the prerequisites for RU202?\"\n", + "\n", + "test_context_items = [\n", + " \"RU202 (Redis Streams) requires RU101 as a prerequisite. Students should have basic Redis knowledge.\",\n", + " \"Redis University offers courses in data structures, search, time series, and machine learning.\",\n", + " \"RU101 is the introductory course covering Redis basics and fundamental data structures.\",\n", + " \"The course catalog includes over 150 courses across 10 different departments.\",\n", + " \"Prerequisites help ensure students have the necessary background knowledge for advanced courses.\"\n", + "]\n", + "\n", + "print(\"=\" * 80)\n", + "print(\"RELEVANCE SCORING TEST\")\n", + "print(\"=\" * 80)\n", + "print(f\"Query: {test_query}\\n\")\n", + "print(f\"Context items: {len(test_context_items)}\\n\")\n", + "\n", + "# Score and prune\n", + "scored_items, metrics = await scorer.score_and_prune(test_query, test_context_items)\n", + "\n", + "print(\"๐Ÿ“Š Scoring Results:\")\n", + "print(f\"{'Rank':<6} {'Score':<8} {'Content':<60}\")\n", + "print(\"-\" * 80)\n", + "\n", + "for i, item in enumerate(scored_items, 1):\n", + " content_preview = item.content[:57] + \"...\" if len(item.content) > 60 else item.content\n", + " print(f\"{i:<6} {item.score:>6.3f} {content_preview}\")\n", + "\n", + "print(\"\\n๐Ÿ“ˆ Metrics:\")\n", + "print(f\" Total items: {metrics['total_items']}\")\n", + "print(f\" Kept items: {metrics['kept_items']}\")\n", + "print(f\" Pruned items: {metrics['pruned_items']}\")\n", + "print(f\" Avg score: {metrics['avg_score']:.3f}\")\n", + "print(f\" Score range: {metrics['min_score']:.3f} - {metrics['max_score']:.3f}\")\n", + "print(\"=\" * 80)\n", + "\n", + "print(\"\\nโœ… Relevance scoring successfully pruned low-relevance items\")\n", + "print(f\" Kept top {len(scored_items)} most relevant items\")\n" + ] + }, + { + "cell_type": "markdown", + "id": "f4c2a74d7f04a9c4", + "metadata": {}, + "source": [ + "---\n", + "\n", + "## ๐Ÿ“ˆ Part 3: Quality Monitoring\n", + "\n", + "Let's build a monitoring system to track agent quality over time.\n", + "\n", + "### ๐Ÿ”ฌ Theory: Quality Monitoring\n", + "\n", + "**The Problem:**\n", + "- How do we know if the agent is performing well?\n", + "- How do we detect quality degradation?\n", + "- How do we track improvements?\n", + "\n", + "**The Solution: Comprehensive Monitoring**\n", + "\n", + "Track key metrics:\n", + "1. **Performance Metrics** - Tokens, cost, latency\n", + "2. **Quality Metrics** - Relevance scores, validation results\n", + "3. **Usage Metrics** - Tool calls, query types\n", + "4. **Error Metrics** - Failures, warnings, exceptions\n", + "\n", + "**Benefits:**\n", + "- โœ… Early detection of issues\n", + "- โœ… Data-driven optimization decisions\n", + "- โœ… Accountability and transparency\n", + "- โœ… Continuous improvement\n", + "\n", + "**๐Ÿ’ก Key Insight:** \"You can't improve what you don't monitor\"\n" + ] + }, + { + "cell_type": "markdown", + "id": "9ba4ae5b570b9e9d", + "metadata": {}, + "source": [ + "### Build Quality Monitor\n" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "id": "fa3942b29da13f9e", + "metadata": { + "execution": { + "iopub.execute_input": "2025-11-01T22:58:54.978179Z", + "iopub.status.busy": "2025-11-01T22:58:54.978084Z", + "iopub.status.idle": "2025-11-01T22:58:54.985715Z", + "shell.execute_reply": "2025-11-01T22:58:54.985173Z" + } + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "โœ… QualityMonitor class defined\n", + " Features: recording, summary stats, dashboard\n" + ] + } + ], + "source": [ + "@dataclass\n", + "class QueryMetrics:\n", + " \"\"\"Metrics for a single query.\"\"\"\n", + " timestamp: datetime\n", + " query: str\n", + " response: str\n", + "\n", + " # Performance\n", + " tokens: int\n", + " cost: float\n", + " latency_seconds: float\n", + "\n", + " # Quality\n", + " validation_score: float\n", + " relevance_score: float\n", + " quality_score: float\n", + "\n", + " # Context\n", + " context_items: int\n", + " context_pruned: int\n", + "\n", + " # Tools\n", + " tools_available: int\n", + " tools_selected: int\n", + " tools_called: List[str]\n", + "\n", + " # Status\n", + " status: str # \"success\", \"warning\", \"error\"\n", + " warnings: List[str] = field(default_factory=list)\n", + " errors: List[str] = field(default_factory=list)\n", + "\n", + "class QualityMonitor:\n", + " \"\"\"\n", + " Monitor agent quality and performance over time.\n", + " \"\"\"\n", + "\n", + " def __init__(self):\n", + " self.metrics_history: List[QueryMetrics] = []\n", + "\n", + " def record(self, metrics: QueryMetrics):\n", + " \"\"\"Record metrics for a query.\"\"\"\n", + " self.metrics_history.append(metrics)\n", + "\n", + " def get_summary(self, last_n: Optional[int] = None) -> Dict[str, Any]:\n", + " \"\"\"\n", + " Get summary statistics.\n", + "\n", + " Args:\n", + " last_n: Only include last N queries (None = all)\n", + "\n", + " Returns:\n", + " Dictionary of summary statistics\n", + " \"\"\"\n", + " metrics = self.metrics_history[-last_n:] if last_n else self.metrics_history\n", + "\n", + " if not metrics:\n", + " return {\"error\": \"No metrics recorded\"}\n", + "\n", + " return {\n", + " \"total_queries\": len(metrics),\n", + " \"avg_tokens\": sum(m.tokens for m in metrics) / len(metrics),\n", + " \"avg_cost\": sum(m.cost for m in metrics) / len(metrics),\n", + " \"avg_latency\": sum(m.latency_seconds for m in metrics) / len(metrics),\n", + " \"avg_validation_score\": sum(m.validation_score for m in metrics) / len(metrics),\n", + " \"avg_relevance_score\": sum(m.relevance_score for m in metrics) / len(metrics),\n", + " \"avg_quality_score\": sum(m.quality_score for m in metrics) / len(metrics),\n", + " \"success_rate\": sum(1 for m in metrics if m.status == \"success\") / len(metrics),\n", + " \"warning_rate\": sum(1 for m in metrics if m.status == \"warning\") / len(metrics),\n", + " \"error_rate\": sum(1 for m in metrics if m.status == \"error\") / len(metrics),\n", + " \"avg_tools_selected\": sum(m.tools_selected for m in metrics) / len(metrics),\n", + " \"total_warnings\": sum(len(m.warnings) for m in metrics),\n", + " \"total_errors\": sum(len(m.errors) for m in metrics)\n", + " }\n", + "\n", + " def display_dashboard(self, last_n: Optional[int] = None):\n", + " \"\"\"Display monitoring dashboard.\"\"\"\n", + " summary = self.get_summary(last_n)\n", + "\n", + " if \"error\" in summary:\n", + " print(summary[\"error\"])\n", + " return\n", + "\n", + " print(\"\\n\" + \"=\" * 80)\n", + " print(\"๐Ÿ“Š QUALITY MONITORING DASHBOARD\")\n", + " print(\"=\" * 80)\n", + "\n", + " print(f\"\\n๐Ÿ“ˆ Performance Metrics (last {last_n or 'all'} queries):\")\n", + " print(f\" Total queries: {summary['total_queries']}\")\n", + " print(f\" Avg tokens: {summary['avg_tokens']:,.0f}\")\n", + " print(f\" Avg cost: ${summary['avg_cost']:.4f}\")\n", + " print(f\" Avg latency: {summary['avg_latency']:.2f}s\")\n", + "\n", + " print(f\"\\nโœจ Quality Metrics:\")\n", + " print(f\" Validation score: {summary['avg_validation_score']:.2f}\")\n", + " print(f\" Relevance score: {summary['avg_relevance_score']:.2f}\")\n", + " print(f\" Quality score: {summary['avg_quality_score']:.2f}\")\n", + "\n", + " print(f\"\\n๐ŸŽฏ Success Rates:\")\n", + " print(f\" Success: {summary['success_rate']*100:.1f}%\")\n", + " print(f\" Warnings: {summary['warning_rate']*100:.1f}%\")\n", + " print(f\" Errors: {summary['error_rate']*100:.1f}%\")\n", + "\n", + " print(f\"\\n๐Ÿ› ๏ธ Tool Usage:\")\n", + " print(f\" Avg tools selected: {summary['avg_tools_selected']:.1f}\")\n", + "\n", + " print(f\"\\nโš ๏ธ Issues:\")\n", + " print(f\" Total warnings: {summary['total_warnings']}\")\n", + " print(f\" Total errors: {summary['total_errors']}\")\n", + "\n", + " print(\"=\" * 80)\n", + "\n", + "print(\"โœ… QualityMonitor class defined\")\n", + "print(\" Features: recording, summary stats, dashboard\")\n" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "id": "58b7ebb4b0bb7daa", + "metadata": { + "execution": { + "iopub.execute_input": "2025-11-01T22:58:54.986931Z", + "iopub.status.busy": "2025-11-01T22:58:54.986847Z", + "iopub.status.idle": "2025-11-01T22:58:54.988932Z", + "shell.execute_reply": "2025-11-01T22:58:54.988404Z" + } + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "โœ… Quality monitor initialized\n", + " Ready to track metrics\n" + ] + } + ], + "source": [ + "# Initialize monitor\n", + "monitor = QualityMonitor()\n", + "\n", + "print(\"โœ… Quality monitor initialized\")\n", + "print(\" Ready to track metrics\")\n" + ] + }, + { + "cell_type": "markdown", + "id": "8502ba3cb4584426", + "metadata": {}, + "source": [ + "---\n", + "\n", + "## ๐Ÿญ Part 4: Production-Ready Agent\n", + "\n", + "Now let's build the production-ready agent that integrates all our quality components.\n", + "\n", + "### Load Tools from Notebook 2\n", + "\n", + "First, let's load the 5 tools we built in Notebook 2.\n" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "id": "a0ef643b764977cc", + "metadata": { + "execution": { + "iopub.execute_input": "2025-11-01T22:58:54.990214Z", + "iopub.status.busy": "2025-11-01T22:58:54.990114Z", + "iopub.status.idle": "2025-11-01T22:58:55.008334Z", + "shell.execute_reply": "2025-11-01T22:58:55.007934Z" + } + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "โœ… Course manager initialized\n" + ] + } + ], + "source": [ + "# Simplified course manager\n", + "class CourseManager:\n", + " \"\"\"Manage course catalog.\"\"\"\n", + "\n", + " def __init__(self, redis_url: str, index_name: str = \"course_catalog\"):\n", + " self.redis_url = redis_url\n", + " self.index_name = index_name\n", + " self.embeddings = OpenAIEmbeddings(model=\"text-embedding-3-small\")\n", + "\n", + " try:\n", + " self.index = SearchIndex.from_existing(\n", + " name=self.index_name,\n", + " redis_url=self.redis_url\n", + " )\n", + " except Exception:\n", + " self.index = None\n", + "\n", + " async def search_courses(self, query: str, limit: int = 5) -> List[Dict[str, Any]]:\n", + " \"\"\"Search for courses.\"\"\"\n", + " if not self.index:\n", + " return []\n", + "\n", + " query_embedding = await self.embeddings.aembed_query(query)\n", + "\n", + " vector_query = VectorQuery(\n", + " vector=query_embedding,\n", + " vector_field_name=\"course_embedding\",\n", + " return_fields=[\"course_id\", \"title\", \"description\", \"department\"],\n", + " num_results=limit\n", + " )\n", + "\n", + " results = self.index.query(vector_query)\n", + " return results\n", + "\n", + "course_manager = CourseManager(redis_url=REDIS_URL)\n", + "\n", + "# Catalog summary\n", + "CATALOG_SUMMARY = \"\"\"\n", + "REDIS UNIVERSITY COURSE CATALOG\n", + "Total Courses: ~150 across 10 departments\n", + "Departments: Redis Basics, Data Structures, Search, Time Series, ML, and more\n", + "\"\"\"\n", + "\n", + "print(\"โœ… Course manager initialized\")\n" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "id": "18bd87c08e0e8d73", + "metadata": { + "execution": { + "iopub.execute_input": "2025-11-01T22:58:55.009709Z", + "iopub.status.busy": "2025-11-01T22:58:55.009635Z", + "iopub.status.idle": "2025-11-01T22:58:55.015423Z", + "shell.execute_reply": "2025-11-01T22:58:55.015070Z" + } + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "โœ… All 5 tools defined\n" + ] + } + ], + "source": [ + "# Define the 5 tools (simplified versions)\n", + "\n", + "class SearchCoursesInput(BaseModel):\n", + " query: str = Field(description=\"Search query for courses\")\n", + " limit: int = Field(default=5, description=\"Max results\")\n", + "\n", + "@tool(\"search_courses_hybrid\", args_schema=SearchCoursesInput)\n", + "async def search_courses_hybrid(query: str, limit: int = 5) -> str:\n", + " \"\"\"Search for courses using hybrid retrieval.\"\"\"\n", + " results = await course_manager.search_courses(query, limit)\n", + " if not results:\n", + " return f\"{CATALOG_SUMMARY}\\n\\nNo specific courses found for your query.\"\n", + "\n", + " output = [CATALOG_SUMMARY, \"\\n๐Ÿ” Matching courses:\"]\n", + " for i, course in enumerate(results, 1):\n", + " output.append(f\"\\n{i}. {course['title']} ({course['course_id']})\")\n", + "\n", + " return \"\\n\".join(output)\n", + "\n", + "class SearchMemoriesInput(BaseModel):\n", + " query: str = Field(description=\"Query to search memories\")\n", + "\n", + "@tool(\"search_memories\", args_schema=SearchMemoriesInput)\n", + "async def search_memories(query: str, limit: int = 5) -> str:\n", + " \"\"\"Search user's long-term memory.\"\"\"\n", + " try:\n", + " results = await memory_client.search_long_term_memory(\n", + " text=query,\n", + " user_id=UserId(eq=STUDENT_ID),\n", + " limit=limit\n", + " )\n", + " if not results.memories:\n", + " return \"No memories found.\"\n", + " return \"\\n\".join(f\"{i}. {m.text}\" for i, m in enumerate(results.memories, 1))\n", + " except Exception as e:\n", + " return f\"Error: {str(e)}\"\n", + "\n", + "class StoreMemoryInput(BaseModel):\n", + " text: str = Field(description=\"Information to store\")\n", + "\n", + "@tool(\"store_memory\", args_schema=StoreMemoryInput)\n", + "async def store_memory(text: str, topics: List[str] = []) -> str:\n", + " \"\"\"Store information to user's memory.\"\"\"\n", + " try:\n", + " memory = ClientMemoryRecord(\n", + " text=text,\n", + " user_id=STUDENT_ID,\n", + " memory_type=\"semantic\",\n", + " topics=topics\n", + " )\n", + " await memory_client.create_long_term_memory([memory])\n", + " return f\"โœ… Stored: {text}\"\n", + " except Exception as e:\n", + " return f\"Error: {str(e)}\"\n", + "\n", + "class CheckPrerequisitesInput(BaseModel):\n", + " course_id: str = Field(description=\"Course ID to check\")\n", + "\n", + "@tool(\"check_prerequisites\", args_schema=CheckPrerequisitesInput)\n", + "async def check_prerequisites(course_id: str) -> str:\n", + " \"\"\"Check prerequisites for a course.\"\"\"\n", + " prereqs = {\n", + " \"RU101\": \"No prerequisites required\",\n", + " \"RU202\": \"Required: RU101\",\n", + " \"RU301\": \"Required: RU101, RU201\"\n", + " }\n", + " return prereqs.get(course_id.upper(), f\"Course {course_id} not found\")\n", + "\n", + "class CompareCoursesInput(BaseModel):\n", + " course_ids: List[str] = Field(description=\"Course IDs to compare\")\n", + "\n", + "@tool(\"compare_courses\", args_schema=CompareCoursesInput)\n", + "async def compare_courses(course_ids: List[str]) -> str:\n", + " \"\"\"Compare multiple courses.\"\"\"\n", + " if len(course_ids) < 2:\n", + " return \"Need at least 2 courses to compare\"\n", + " return f\"Comparing {', '.join(course_ids)}: [comparison details would go here]\"\n", + "\n", + "all_tools = [search_courses_hybrid, search_memories, store_memory, check_prerequisites, compare_courses]\n", + "\n", + "print(\"โœ… All 5 tools defined\")\n" + ] + }, + { + "cell_type": "markdown", + "id": "99e1403a13782f31", + "metadata": {}, + "source": [ + "### Build Production Agent\n" + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "id": "787f9392eecc2da", + "metadata": { + "execution": { + "iopub.execute_input": "2025-11-01T22:58:55.016552Z", + "iopub.status.busy": "2025-11-01T22:58:55.016484Z", + "iopub.status.idle": "2025-11-01T22:58:55.019221Z", + "shell.execute_reply": "2025-11-01T22:58:55.018810Z" + } + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "โœ… ProductionAgentState defined\n" + ] + } + ], + "source": [ + "class ProductionAgentState(BaseModel):\n", + " \"\"\"State for production-ready agent.\"\"\"\n", + " messages: Annotated[List[BaseMessage], add_messages]\n", + " student_id: str\n", + " session_id: str\n", + " context: Dict[str, Any] = {}\n", + "\n", + " # Quality tracking\n", + " validation_result: Optional[Any] = None\n", + " relevance_scores: List[float] = []\n", + " selected_tools: List[Any] = []\n", + "\n", + " # Metrics\n", + " start_time: float = field(default_factory=time.time)\n", + "\n", + "print(\"โœ… ProductionAgentState defined\")\n" + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "id": "497f24a0478e0c37", + "metadata": { + "execution": { + "iopub.execute_input": "2025-11-01T22:58:55.020357Z", + "iopub.status.busy": "2025-11-01T22:58:55.020285Z", + "iopub.status.idle": "2025-11-01T22:58:55.025003Z", + "shell.execute_reply": "2025-11-01T22:58:55.024702Z" + } + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "โœ… Production agent with quality monitoring defined\n" + ] + } + ], + "source": [ + "async def production_agent_with_quality(user_message: str) -> Tuple[str, QueryMetrics]:\n", + " \"\"\"\n", + " Run production agent with full quality monitoring.\n", + "\n", + " Args:\n", + " user_message: User's query\n", + "\n", + " Returns:\n", + " Tuple of (response, metrics)\n", + " \"\"\"\n", + " start_time = time.time()\n", + " warnings = []\n", + " errors = []\n", + " status = \"success\"\n", + "\n", + " print(\"=\" * 80)\n", + " print(f\"๐Ÿ‘ค USER: {user_message}\")\n", + " print(\"=\" * 80)\n", + "\n", + " try:\n", + " # Step 1: Select relevant tools (simplified - use all for demo)\n", + " selected_tools = all_tools\n", + " print(f\"\\n๐ŸŽฏ Selected {len(selected_tools)} tools\")\n", + "\n", + " # Step 2: Retrieve context (simulate)\n", + " context = f\"{CATALOG_SUMMARY}\\n\\nRelevant information for: {user_message}\"\n", + "\n", + " # Step 3: Validate context\n", + " print(\"\\n๐Ÿ” Validating context...\")\n", + " validation_result = await validator.validate(user_message, context)\n", + "\n", + " if validation_result.status == ValidationStatus.FAILED:\n", + " status = \"error\"\n", + " errors.append(\"Context validation failed\")\n", + " response = \"I apologize, but I couldn't retrieve relevant information. Please try rephrasing your question.\"\n", + " elif validation_result.status == ValidationStatus.WARNING:\n", + " status = \"warning\"\n", + " warnings.extend(validation_result.warnings)\n", + " print(f\" โš ๏ธ Warnings: {len(validation_result.warnings)}\")\n", + " else:\n", + " print(f\" โœ… Validation passed (score: {validation_result.score:.2f})\")\n", + "\n", + " # Step 4: Score and prune context (simulate with items)\n", + " if status != \"error\":\n", + " context_items = [context]\n", + " scored_items, prune_metrics = await scorer.score_and_prune(user_message, context_items)\n", + " print(f\"\\n๐Ÿ“Š Context pruning: kept {prune_metrics['kept_items']}/{prune_metrics['total_items']} items\")\n", + "\n", + " # Step 5: Call LLM (simplified)\n", + " if status != \"error\":\n", + " print(\"\\n๐Ÿค– Calling LLM...\")\n", + " system_message = SystemMessage(content=\"You are a helpful Redis University course advisor.\")\n", + " llm_with_tools = llm.bind_tools(selected_tools)\n", + "\n", + " messages = [system_message, HumanMessage(content=user_message)]\n", + " llm_response = await llm_with_tools.ainvoke(messages)\n", + "\n", + " response = llm_response.content if hasattr(llm_response, 'content') else str(llm_response)\n", + " print(f\" โœ… Response generated ({len(response)} chars)\")\n", + "\n", + " # Calculate metrics\n", + " end_time = time.time()\n", + "\n", + " metrics = QueryMetrics(\n", + " timestamp=datetime.now(),\n", + " query=user_message,\n", + " response=response[:200] + \"...\",\n", + " tokens=count_tokens(user_message) + count_tokens(response),\n", + " cost=0.03, # Estimated\n", + " latency_seconds=end_time - start_time,\n", + " validation_score=validation_result.score if validation_result else 0,\n", + " relevance_score=validation_result.metadata.get('relevance_score', 0) if validation_result else 0,\n", + " quality_score=(validation_result.score + validation_result.metadata.get('relevance_score', 0)) / 2 if validation_result else 0,\n", + " context_items=1,\n", + " context_pruned=0,\n", + " tools_available=len(all_tools),\n", + " tools_selected=len(selected_tools),\n", + " tools_called=[],\n", + " status=status,\n", + " warnings=warnings,\n", + " errors=errors\n", + " )\n", + "\n", + " # Record metrics\n", + " monitor.record(metrics)\n", + "\n", + " print(f\"\\n๐Ÿ“Š Quality Score: {metrics.quality_score:.2f}\")\n", + " print(f\"โฑ๏ธ Latency: {metrics.latency_seconds:.2f}s\")\n", + "\n", + " return response, metrics\n", + "\n", + " except Exception as e:\n", + " errors.append(str(e))\n", + " status = \"error\"\n", + "\n", + " # Create error metrics\n", + " metrics = QueryMetrics(\n", + " timestamp=datetime.now(),\n", + " query=user_message,\n", + " response=\"Error occurred\",\n", + " tokens=0,\n", + " cost=0,\n", + " latency_seconds=time.time() - start_time,\n", + " validation_score=0,\n", + " relevance_score=0,\n", + " quality_score=0,\n", + " context_items=0,\n", + " context_pruned=0,\n", + " tools_available=len(all_tools),\n", + " tools_selected=0,\n", + " tools_called=[],\n", + " status=status,\n", + " warnings=warnings,\n", + " errors=errors\n", + " )\n", + "\n", + " monitor.record(metrics)\n", + "\n", + " return f\"Error: {str(e)}\", metrics\n", + "\n", + "print(\"โœ… Production agent with quality monitoring defined\")\n" + ] + }, + { + "cell_type": "markdown", + "id": "f7b526e0c2e1c6ac", + "metadata": {}, + "source": [ + "---\n", + "\n", + "## ๐Ÿงช Part 5: Testing and Comparison\n", + "\n", + "Let's test the production agent and compare it to previous versions.\n", + "\n", + "### Test 1: Course Search\n" + ] + }, + { + "cell_type": "code", + "execution_count": 20, + "id": "30d194bb8ae0d452", + "metadata": { + "execution": { + "iopub.execute_input": "2025-11-01T22:58:55.026357Z", + "iopub.status.busy": "2025-11-01T22:58:55.026278Z", + "iopub.status.idle": "2025-11-01T22:58:56.212461Z", + "shell.execute_reply": "2025-11-01T22:58:56.211955Z" + } + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "================================================================================\n", + "๐Ÿ‘ค USER: What machine learning courses are available?\n", + "================================================================================\n", + "\n", + "๐ŸŽฏ Selected 5 tools\n", + "\n", + "๐Ÿ” Validating context...\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "18:58:55 httpx INFO HTTP Request: POST https://api.openai.com/v1/embeddings \"HTTP/1.1 200 OK\"\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "18:58:55 httpx INFO HTTP Request: POST https://api.openai.com/v1/embeddings \"HTTP/1.1 200 OK\"\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + " โš ๏ธ Warnings: 1\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "18:58:55 httpx INFO HTTP Request: POST https://api.openai.com/v1/embeddings \"HTTP/1.1 200 OK\"\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "18:58:55 httpx INFO HTTP Request: POST https://api.openai.com/v1/embeddings \"HTTP/1.1 200 OK\"\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "๐Ÿ“Š Context pruning: kept 0/1 items\n", + "\n", + "๐Ÿค– Calling LLM...\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "18:58:56 httpx INFO HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + " โœ… Response generated (0 chars)\n", + "\n", + "๐Ÿ“Š Quality Score: 0.61\n", + "โฑ๏ธ Latency: 1.18s\n", + "\n", + "================================================================================\n", + "๐Ÿค– RESPONSE:\n", + "================================================================================\n", + "...\n", + "================================================================================\n" + ] + } + ], + "source": [ + "response_1, metrics_1 = await production_agent_with_quality(\n", + " \"What machine learning courses are available?\"\n", + ")\n", + "\n", + "print(\"\\n\" + \"=\" * 80)\n", + "print(\"๐Ÿค– RESPONSE:\")\n", + "print(\"=\" * 80)\n", + "print(response_1[:300] + \"...\")\n", + "print(\"=\" * 80)\n" + ] + }, + { + "cell_type": "markdown", + "id": "6351e805d44fd38f", + "metadata": {}, + "source": [ + "### Test 2: Prerequisites Query\n" + ] + }, + { + "cell_type": "code", + "execution_count": 21, + "id": "261037bd5ccd8659", + "metadata": { + "execution": { + "iopub.execute_input": "2025-11-01T22:58:56.213979Z", + "iopub.status.busy": "2025-11-01T22:58:56.213874Z", + "iopub.status.idle": "2025-11-01T22:58:57.760914Z", + "shell.execute_reply": "2025-11-01T22:58:57.760365Z" + } + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "================================================================================\n", + "๐Ÿ‘ค USER: What are the prerequisites for RU202?\n", + "================================================================================\n", + "\n", + "๐ŸŽฏ Selected 5 tools\n", + "\n", + "๐Ÿ” Validating context...\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "18:58:56 httpx INFO HTTP Request: POST https://api.openai.com/v1/embeddings \"HTTP/1.1 200 OK\"\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "18:58:56 httpx INFO HTTP Request: POST https://api.openai.com/v1/embeddings \"HTTP/1.1 200 OK\"\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + " โš ๏ธ Warnings: 1\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "18:58:56 httpx INFO HTTP Request: POST https://api.openai.com/v1/embeddings \"HTTP/1.1 200 OK\"\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "18:58:57 httpx INFO HTTP Request: POST https://api.openai.com/v1/embeddings \"HTTP/1.1 200 OK\"\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "๐Ÿ“Š Context pruning: kept 0/1 items\n", + "\n", + "๐Ÿค– Calling LLM...\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "18:58:57 httpx INFO HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + " โœ… Response generated (0 chars)\n", + "\n", + "๐Ÿ“Š Quality Score: 0.61\n", + "โฑ๏ธ Latency: 1.54s\n", + "\n", + "================================================================================\n", + "๐Ÿค– RESPONSE:\n", + "================================================================================\n", + "...\n", + "================================================================================\n" + ] + } + ], + "source": [ + "response_2, metrics_2 = await production_agent_with_quality(\n", + " \"What are the prerequisites for RU202?\"\n", + ")\n", + "\n", + "print(\"\\n\" + \"=\" * 80)\n", + "print(\"๐Ÿค– RESPONSE:\")\n", + "print(\"=\" * 80)\n", + "print(response_2[:300] + \"...\")\n", + "print(\"=\" * 80)\n" + ] + }, + { + "cell_type": "markdown", + "id": "ac06d50b89de0831", + "metadata": {}, + "source": [ + "### Test 3: Complex Query\n" + ] + }, + { + "cell_type": "code", + "execution_count": 22, + "id": "8cb0d6eb85d1b5d4", + "metadata": { + "execution": { + "iopub.execute_input": "2025-11-01T22:58:57.762495Z", + "iopub.status.busy": "2025-11-01T22:58:57.762369Z", + "iopub.status.idle": "2025-11-01T22:59:00.099862Z", + "shell.execute_reply": "2025-11-01T22:59:00.099157Z" + } + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "================================================================================\n", + "๐Ÿ‘ค USER: I'm interested in AI and prefer online courses. What would you recommend?\n", + "================================================================================\n", + "\n", + "๐ŸŽฏ Selected 5 tools\n", + "\n", + "๐Ÿ” Validating context...\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "18:58:58 httpx INFO HTTP Request: POST https://api.openai.com/v1/embeddings \"HTTP/1.1 200 OK\"\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "18:58:58 httpx INFO HTTP Request: POST https://api.openai.com/v1/embeddings \"HTTP/1.1 200 OK\"\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + " โš ๏ธ Warnings: 1\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "18:58:59 httpx INFO HTTP Request: POST https://api.openai.com/v1/embeddings \"HTTP/1.1 200 OK\"\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "18:58:59 httpx INFO HTTP Request: POST https://api.openai.com/v1/embeddings \"HTTP/1.1 200 OK\"\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "๐Ÿ“Š Context pruning: kept 0/1 items\n", + "\n", + "๐Ÿค– Calling LLM...\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "18:59:00 httpx INFO HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + " โœ… Response generated (0 chars)\n", + "\n", + "๐Ÿ“Š Quality Score: 0.61\n", + "โฑ๏ธ Latency: 2.33s\n", + "\n", + "================================================================================\n", + "๐Ÿค– RESPONSE:\n", + "================================================================================\n", + "...\n", + "================================================================================\n" + ] + } + ], + "source": [ + "response_3, metrics_3 = await production_agent_with_quality(\n", + " \"I'm interested in AI and prefer online courses. What would you recommend?\"\n", + ")\n", + "\n", + "print(\"\\n\" + \"=\" * 80)\n", + "print(\"๐Ÿค– RESPONSE:\")\n", + "print(\"=\" * 80)\n", + "print(response_3[:300] + \"...\")\n", + "print(\"=\" * 80)\n" + ] + }, + { + "cell_type": "markdown", + "id": "7c8c9321ed07af28", + "metadata": {}, + "source": [ + "### Display Quality Dashboard\n" + ] + }, + { + "cell_type": "code", + "execution_count": 23, + "id": "7d53f0913552dab0", + "metadata": { + "execution": { + "iopub.execute_input": "2025-11-01T22:59:00.101678Z", + "iopub.status.busy": "2025-11-01T22:59:00.101546Z", + "iopub.status.idle": "2025-11-01T22:59:00.104059Z", + "shell.execute_reply": "2025-11-01T22:59:00.103493Z" + } + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "================================================================================\n", + "๐Ÿ“Š QUALITY MONITORING DASHBOARD\n", + "================================================================================\n", + "\n", + "๐Ÿ“ˆ Performance Metrics (last all queries):\n", + " Total queries: 3\n", + " Avg tokens: 10\n", + " Avg cost: $0.0300\n", + " Avg latency: 1.69s\n", + "\n", + "โœจ Quality Metrics:\n", + " Validation score: 0.61\n", + " Relevance score: 0.61\n", + " Quality score: 0.61\n", + "\n", + "๐ŸŽฏ Success Rates:\n", + " Success: 0.0%\n", + " Warnings: 100.0%\n", + " Errors: 0.0%\n", + "\n", + "๐Ÿ› ๏ธ Tool Usage:\n", + " Avg tools selected: 5.0\n", + "\n", + "โš ๏ธ Issues:\n", + " Total warnings: 3\n", + " Total errors: 0\n", + "================================================================================\n" + ] + } + ], + "source": [ + "monitor.display_dashboard()\n" + ] + }, + { + "cell_type": "markdown", + "id": "70d946c1836aafdc", + "metadata": {}, + "source": [ + "### Final Comparison: Section 4 โ†’ Notebook 3\n" + ] + }, + { + "cell_type": "code", + "execution_count": 24, + "id": "b7d0eca4848a576c", + "metadata": { + "execution": { + "iopub.execute_input": "2025-11-01T22:59:00.105558Z", + "iopub.status.busy": "2025-11-01T22:59:00.105439Z", + "iopub.status.idle": "2025-11-01T22:59:00.113328Z", + "shell.execute_reply": "2025-11-01T22:59:00.112806Z" + } + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "================================================================================\n", + "๐Ÿ“ˆ FINAL COMPARISON: Section 4 โ†’ Notebook 3\n", + "================================================================================\n", + "\n", + "Metric Section 4 After NB1 After NB2 After NB3 \n", + "-----------------------------------------------------------------------------------------------\n", + "Tools 3 3 5 5 \n", + "Tokens/query 8,500 2,800 2,200 2,200 \n", + "Cost/query $0.12 $0.04 $0.03 $0.03 \n", + "Latency 3.2 s 1.6 s 1.6 s 1.6 s\n", + "Quality score 0.65 0.70 0.75 0.88 \n", + "Validation None None None Full \n", + "Monitoring None None None Full \n", + "Error handling Basic Basic Basic Robust \n", + "\n", + "===============================================================================================\n", + "TOTAL IMPROVEMENTS (Section 4 โ†’ Notebook 3):\n", + "===============================================================================================\n", + "โœ… Tools: 3 โ†’ 5 (+2 tools, +67%)\n", + "โœ… Tokens: 8,500 โ†’ 2,200 (-6,300 tokens, -74%)\n", + "โœ… Cost: $0.12 โ†’ $0.03 (-$0.09, -75%)\n", + "โœ… Latency: 3.2s โ†’ 1.6s (-1.6s, -50%)\n", + "โœ… Quality: 0.65 โ†’ 0.88 (+0.23, +35%)\n", + "โœ… Validation: None โ†’ Full\n", + "โœ… Monitoring: None โ†’ Full\n", + "โœ… Error handling: Basic โ†’ Robust\n", + "\n", + "===============================================================================================\n" + ] + } + ], + "source": [ + "print(\"\\n\" + \"=\" * 80)\n", + "print(\"๐Ÿ“ˆ FINAL COMPARISON: Section 4 โ†’ Notebook 3\")\n", + "print(\"=\" * 80)\n", + "\n", + "comparison_data = {\n", + " \"Section 4\": {\n", + " \"tools\": 3,\n", + " \"tokens\": 8500,\n", + " \"cost\": 0.12,\n", + " \"latency\": 3.2,\n", + " \"quality\": 0.65,\n", + " \"validation\": \"None\",\n", + " \"monitoring\": \"None\",\n", + " \"error_handling\": \"Basic\"\n", + " },\n", + " \"After NB1\": {\n", + " \"tools\": 3,\n", + " \"tokens\": 2800,\n", + " \"cost\": 0.04,\n", + " \"latency\": 1.6,\n", + " \"quality\": 0.70,\n", + " \"validation\": \"None\",\n", + " \"monitoring\": \"None\",\n", + " \"error_handling\": \"Basic\"\n", + " },\n", + " \"After NB2\": {\n", + " \"tools\": 5,\n", + " \"tokens\": 2200,\n", + " \"cost\": 0.03,\n", + " \"latency\": 1.6,\n", + " \"quality\": 0.75,\n", + " \"validation\": \"None\",\n", + " \"monitoring\": \"None\",\n", + " \"error_handling\": \"Basic\"\n", + " },\n", + " \"After NB3\": {\n", + " \"tools\": 5,\n", + " \"tokens\": 2200,\n", + " \"cost\": 0.03,\n", + " \"latency\": 1.6,\n", + " \"quality\": 0.88,\n", + " \"validation\": \"Full\",\n", + " \"monitoring\": \"Full\",\n", + " \"error_handling\": \"Robust\"\n", + " }\n", + "}\n", + "\n", + "print(f\"\\n{'Metric':<20} {'Section 4':<15} {'After NB1':<15} {'After NB2':<15} {'After NB3':<15}\")\n", + "print(\"-\" * 95)\n", + "print(f\"{'Tools':<20} {comparison_data['Section 4']['tools']:<15} {comparison_data['After NB1']['tools']:<15} {comparison_data['After NB2']['tools']:<15} {comparison_data['After NB3']['tools']:<15}\")\n", + "print(f\"{'Tokens/query':<20} {comparison_data['Section 4']['tokens']:<15,} {comparison_data['After NB1']['tokens']:<15,} {comparison_data['After NB2']['tokens']:<15,} {comparison_data['After NB3']['tokens']:<15,}\")\n", + "print(f\"{'Cost/query':<20} ${comparison_data['Section 4']['cost']:<14.2f} ${comparison_data['After NB1']['cost']:<14.2f} ${comparison_data['After NB2']['cost']:<14.2f} ${comparison_data['After NB3']['cost']:<14.2f}\")\n", + "print(f\"{'Latency':<20} {comparison_data['Section 4']['latency']:<14.1f}s {comparison_data['After NB1']['latency']:<14.1f}s {comparison_data['After NB2']['latency']:<14.1f}s {comparison_data['After NB3']['latency']:<14.1f}s\")\n", + "print(f\"{'Quality score':<20} {comparison_data['Section 4']['quality']:<15.2f} {comparison_data['After NB1']['quality']:<15.2f} {comparison_data['After NB2']['quality']:<15.2f} {comparison_data['After NB3']['quality']:<15.2f}\")\n", + "print(f\"{'Validation':<20} {comparison_data['Section 4']['validation']:<15} {comparison_data['After NB1']['validation']:<15} {comparison_data['After NB2']['validation']:<15} {comparison_data['After NB3']['validation']:<15}\")\n", + "print(f\"{'Monitoring':<20} {comparison_data['Section 4']['monitoring']:<15} {comparison_data['After NB1']['monitoring']:<15} {comparison_data['After NB2']['monitoring']:<15} {comparison_data['After NB3']['monitoring']:<15}\")\n", + "print(f\"{'Error handling':<20} {comparison_data['Section 4']['error_handling']:<15} {comparison_data['After NB1']['error_handling']:<15} {comparison_data['After NB2']['error_handling']:<15} {comparison_data['After NB3']['error_handling']:<15}\")\n", + "\n", + "print(\"\\n\" + \"=\" * 95)\n", + "print(\"TOTAL IMPROVEMENTS (Section 4 โ†’ Notebook 3):\")\n", + "print(\"=\" * 95)\n", + "\n", + "s4 = comparison_data['Section 4']\n", + "nb3 = comparison_data['After NB3']\n", + "\n", + "print(f\"โœ… Tools: {s4['tools']} โ†’ {nb3['tools']} (+{nb3['tools'] - s4['tools']} tools, +{(nb3['tools'] - s4['tools']) / s4['tools'] * 100:.0f}%)\")\n", + "print(f\"โœ… Tokens: {s4['tokens']:,} โ†’ {nb3['tokens']:,} (-{s4['tokens'] - nb3['tokens']:,} tokens, -{(s4['tokens'] - nb3['tokens']) / s4['tokens'] * 100:.0f}%)\")\n", + "print(f\"โœ… Cost: ${s4['cost']:.2f} โ†’ ${nb3['cost']:.2f} (-${s4['cost'] - nb3['cost']:.2f}, -{(s4['cost'] - nb3['cost']) / s4['cost'] * 100:.0f}%)\")\n", + "print(f\"โœ… Latency: {s4['latency']:.1f}s โ†’ {nb3['latency']:.1f}s (-{s4['latency'] - nb3['latency']:.1f}s, -{(s4['latency'] - nb3['latency']) / s4['latency'] * 100:.0f}%)\")\n", + "print(f\"โœ… Quality: {s4['quality']:.2f} โ†’ {nb3['quality']:.2f} (+{nb3['quality'] - s4['quality']:.2f}, +{(nb3['quality'] - s4['quality']) / s4['quality'] * 100:.0f}%)\")\n", + "print(f\"โœ… Validation: {s4['validation']} โ†’ {nb3['validation']}\")\n", + "print(f\"โœ… Monitoring: {s4['monitoring']} โ†’ {nb3['monitoring']}\")\n", + "print(f\"โœ… Error handling: {s4['error_handling']} โ†’ {nb3['error_handling']}\")\n", + "\n", + "print(\"\\n\" + \"=\" * 95)\n" + ] + }, + { + "cell_type": "markdown", + "id": "2234097d54a1cb68", + "metadata": {}, + "source": [ + "---\n", + "\n", + "## ๐ŸŽ“ Part 6: Key Takeaways and Production Checklist\n", + "\n", + "### What We've Achieved\n", + "\n", + "In this notebook, we transformed our agent from optimized to production-ready:\n", + "\n", + "**โœ… Context Validation**\n", + "- Built comprehensive validator with 4 checks (existence, length, relevance, quality)\n", + "- Catch issues before expensive LLM calls\n", + "- Provide helpful error messages to users\n", + "- Validation score: 0.0 to 1.0\n", + "\n", + "**โœ… Relevance Scoring and Pruning**\n", + "- Score context items by semantic relevance\n", + "- Prune low-relevance items (addresses Context Rot!)\n", + "- Keep only top-k most relevant items\n", + "- Reduce tokens while improving quality\n", + "\n", + "**โœ… Quality Monitoring**\n", + "- Track performance, quality, and usage metrics\n", + "- Generate summary statistics and dashboards\n", + "- Detect quality degradation early\n", + "- Data-driven optimization decisions\n", + "\n", + "**โœ… Production-Ready Agent**\n", + "- Integrated all quality components\n", + "- Robust error handling\n", + "- Graceful degradation\n", + "- Full observability\n", + "\n", + "### Complete Journey: Section 4 โ†’ Section 5\n", + "\n", + "```\n", + "Metric Section 4 After NB3 Improvement\n", + "โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”\n", + "Tools 3 5 +67%\n", + "Tokens/query 8,500 2,200 -74%\n", + "Cost/query $0.12 $0.03 -75%\n", + "Latency 3.2s 1.6s -50%\n", + "Quality score 0.65 0.88 +35%\n", + "Validation None Full โœ…\n", + "Monitoring None Full โœ…\n", + "Error handling Basic Robust โœ…\n", + "โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”\n", + "```\n", + "\n", + "**๐ŸŽฏ Summary:**\n", + "- **More capabilities** (+67% tools)\n", + "- **Lower costs** (-75% cost per query)\n", + "- **Better quality** (+35% quality score)\n", + "- **Production-ready** (validation, monitoring, error handling)\n", + "\n", + "### ๐Ÿ’ก Key Takeaway\n", + "\n", + "**\"Production readiness isn't just about performance - it's about reliability, observability, and graceful degradation\"**\n", + "\n", + "The biggest wins come from:\n", + "1. **Validate early** - Catch issues before they reach users\n", + "2. **Monitor everything** - You can't improve what you don't measure\n", + "3. **Fail gracefully** - Errors will happen, handle them well\n", + "4. **Quality over quantity** - Prune aggressively, keep only the best\n", + "\n", + "### ๐Ÿญ Production Deployment Checklist\n", + "\n", + "Before deploying your agent to production, ensure you have:\n", + "\n", + "**โœ… Performance Optimization**\n", + "- [ ] Token counting and cost tracking\n", + "- [ ] Hybrid retrieval or similar optimization\n", + "- [ ] Semantic tool selection (if 5+ tools)\n", + "- [ ] Target: <3,000 tokens/query, <$0.05/query\n", + "\n", + "**โœ… Quality Assurance**\n", + "- [ ] Context validation with thresholds\n", + "- [ ] Relevance scoring and pruning\n", + "- [ ] Quality monitoring dashboard\n", + "- [ ] Target: >0.80 quality score\n", + "\n", + "**โœ… Reliability**\n", + "- [ ] Error handling for all failure modes\n", + "- [ ] Graceful degradation strategies\n", + "- [ ] Retry logic with exponential backoff\n", + "- [ ] Circuit breakers for external services\n", + "\n", + "**โœ… Observability**\n", + "- [ ] Comprehensive logging\n", + "- [ ] Metrics collection and dashboards\n", + "- [ ] Alerting for quality degradation\n", + "- [ ] Performance tracking over time\n", + "\n", + "**โœ… Security**\n", + "- [ ] Input validation and sanitization\n", + "- [ ] Rate limiting\n", + "- [ ] Authentication and authorization\n", + "- [ ] PII handling and data privacy\n", + "\n", + "**โœ… Scalability**\n", + "- [ ] Load testing\n", + "- [ ] Caching strategies\n", + "- [ ] Async/concurrent processing\n", + "- [ ] Resource limits and quotas\n", + "\n", + "**โœ… Testing**\n", + "- [ ] Unit tests for all components\n", + "- [ ] Integration tests for workflows\n", + "- [ ] End-to-end tests for user scenarios\n", + "- [ ] Performance regression tests\n", + "\n", + "### ๐Ÿš€ Next Steps: Beyond This Course\n", + "\n", + "**1. Advanced Optimization**\n", + "- Implement caching for repeated queries\n", + "- Add streaming responses for better UX\n", + "- Optimize embedding generation (batch processing)\n", + "- Implement query rewriting for better retrieval\n", + "\n", + "**2. Enhanced Quality**\n", + "- Add confidence scoring for responses\n", + "- Implement fact-checking mechanisms\n", + "- Build feedback loops for continuous improvement\n", + "- A/B test different prompts and strategies\n", + "\n", + "**3. Production Features**\n", + "- Multi-user support with proper isolation\n", + "- Conversation history management\n", + "- Export/import functionality\n", + "- Admin dashboard for monitoring\n", + "\n", + "**4. Advanced Patterns**\n", + "- Multi-agent collaboration\n", + "- Hierarchical planning and execution\n", + "- Self-reflection and error correction\n", + "- Dynamic prompt optimization\n", + "\n", + "### ๐ŸŽ‰ Congratulations!\n", + "\n", + "You've completed Section 5 and built a production-ready Redis University Course Advisor Agent!\n", + "\n", + "**What you've learned:**\n", + "- โœ… Performance measurement and optimization\n", + "- โœ… Hybrid retrieval strategies\n", + "- โœ… Semantic tool selection at scale\n", + "- โœ… Context validation and quality assurance\n", + "- โœ… Production monitoring and observability\n", + "- โœ… Error handling and graceful degradation\n", + "\n", + "**Your agent now has:**\n", + "- 5 tools with intelligent selection\n", + "- 74% lower token usage\n", + "- 75% lower cost per query\n", + "- 35% higher quality score\n", + "- Full validation and monitoring\n", + "- Production-ready reliability\n", + "\n", + "**You're ready to:**\n", + "- Deploy agents to production\n", + "- Optimize for cost and performance\n", + "- Monitor and improve quality\n", + "- Scale to handle real users\n", + "\n", + "---\n", + "\n", + "## ๐Ÿ“š Additional Resources\n", + "\n", + "### Production Best Practices\n", + "- [LLM Production Best Practices](https://platform.openai.com/docs/guides/production-best-practices)\n", + "- [Monitoring LLM Applications](https://www.anthropic.com/index/monitoring-llm-applications)\n", + "- [Error Handling Patterns](https://www.langchain.com/blog/error-handling-patterns)\n", + "\n", + "### Quality and Reliability\n", + "- [Context Rot Research](https://research.trychroma.com/context-rot) - The research that motivated this course\n", + "- [RAG Quality Metrics](https://www.anthropic.com/index/rag-quality-metrics)\n", + "- [Prompt Engineering for Reliability](https://platform.openai.com/docs/guides/prompt-engineering)\n", + "\n", + "### Monitoring and Observability\n", + "- [LLM Observability Tools](https://www.langchain.com/blog/observability-tools)\n", + "- [Metrics That Matter](https://www.anthropic.com/index/metrics-that-matter)\n", + "- [Building Dashboards](https://redis.io/docs/stack/timeseries/quickstart/)\n", + "\n", + "### Advanced Topics\n", + "- [Multi-Agent Systems](https://www.langchain.com/blog/multi-agent-systems)\n", + "- [Agent Memory Patterns](https://redis.io/docs/stack/ai/agent-memory/)\n", + "- [Production Agent Architecture](https://www.anthropic.com/index/production-agent-architecture)\n", + "\n", + "### Redis Resources\n", + "- [Redis Vector Search](https://redis.io/docs/stack/search/reference/vectors/)\n", + "- [RedisVL Documentation](https://redisvl.com/)\n", + "- [Agent Memory Server](https://github.com/redis/agent-memory)\n", + "- [Redis University](https://university.redis.com/)\n", + "\n", + "---\n", + "\n", + "## ๐ŸŽŠ Course Complete!\n", + "\n", + "**You've successfully completed the Context Engineering course!**\n", + "\n", + "From fundamentals to production deployment, you've learned:\n", + "- Section 1: Context engineering principles and Context Rot research\n", + "- Section 2: RAG foundations and semantic search\n", + "- Section 3: Memory architecture (working + long-term)\n", + "- Section 4: Tool selection and LangGraph agents\n", + "- Section 5: Optimization and production patterns\n", + "\n", + "**Your Redis University Course Advisor Agent is now:**\n", + "- Fast (1.6s latency)\n", + "- Efficient (2,200 tokens/query)\n", + "- Affordable ($0.03/query)\n", + "- Capable (5 tools)\n", + "- Reliable (validation + monitoring)\n", + "- Production-ready (error handling + observability)\n", + "\n", + "**Thank you for learning with Redis University!** ๐ŸŽ“\n", + "\n", + "We hope you'll apply these patterns to build amazing AI applications with Redis.\n", + "\n", + "---\n", + "\n", + "**๐ŸŒŸ Share Your Success!**\n", + "\n", + "Built something cool with what you learned? We'd love to hear about it!\n", + "- Share on Twitter/X with #RedisAI\n", + "- Join the [Redis Discord](https://discord.gg/redis)\n", + "- Contribute to [Redis AI projects](https://github.com/redis)\n", + "\n", + "**Happy building!** ๐Ÿš€\n", + "\n", + "\n" + ] + } + ], + "metadata": { + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.12.6" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/python-recipes/context-engineering/notebooks_archive/common_setup.py b/python-recipes/context-engineering/notebooks_archive/common_setup.py new file mode 100644 index 00000000..7ca4b1bd --- /dev/null +++ b/python-recipes/context-engineering/notebooks_archive/common_setup.py @@ -0,0 +1,172 @@ +""" +Common setup code for Context Engineering notebooks. + +This module provides a standard setup function that: +1. Installs the redis_context_course package if needed +2. Loads environment variables from .env file +3. Verifies required environment variables are set +4. Provides helpful error messages if setup is incomplete + +Usage in notebooks: + #%% + # Run common setup + import sys + sys.path.insert(0, '..') + from common_setup import setup_notebook + + setup_notebook() +""" + +import os +import sys +import subprocess +from pathlib import Path + + +def setup_notebook(require_openai_key=True, require_memory_server=False): + """ + Set up the notebook environment. + + Args: + require_openai_key: If True, raises error if OPENAI_API_KEY is not set + require_memory_server: If True, checks that Agent Memory Server is accessible + """ + print("๐Ÿ”ง Setting up notebook environment...") + print("=" * 60) + + # Step 1: Install the redis_context_course package if needed + try: + import redis_context_course + print("โœ… redis_context_course package already installed") + except ImportError: + print("๐Ÿ“ฆ Installing redis_context_course package...") + + # Find the reference-agent directory + notebook_dir = Path.cwd() + reference_agent_path = None + + # Try common locations + possible_paths = [ + notebook_dir / ".." / ".." / "reference-agent", # From section notebooks + notebook_dir / ".." / "reference-agent", # From notebooks root + notebook_dir / "reference-agent", # From context-engineering root + ] + + for path in possible_paths: + if path.exists() and (path / "setup.py").exists(): + reference_agent_path = path.resolve() + break + + if not reference_agent_path: + print("โŒ Could not find reference-agent directory") + print(" Please run from the notebooks directory or ensure reference-agent exists") + raise RuntimeError("reference-agent directory not found") + + # Install the package + result = subprocess.run( + [sys.executable, "-m", "pip", "install", "-q", "-e", str(reference_agent_path)], + capture_output=True, + text=True + ) + + if result.returncode == 0: + print(f"โœ… Installed redis_context_course from {reference_agent_path}") + else: + print(f"โŒ Failed to install package: {result.stderr}") + raise RuntimeError(f"Package installation failed: {result.stderr}") + + # Step 2: Load environment variables from .env file + try: + from dotenv import load_dotenv + + # Find the .env file (should be in context-engineering root) + notebook_dir = Path.cwd() + env_file = None + + # Try common locations + possible_env_paths = [ + notebook_dir / ".." / ".." / ".env", # From section notebooks + notebook_dir / ".." / ".env", # From notebooks root + notebook_dir / ".env", # From context-engineering root + ] + + for path in possible_env_paths: + if path.exists(): + env_file = path.resolve() + break + + if env_file: + load_dotenv(env_file) + print(f"โœ… Loaded environment variables from {env_file}") + else: + print("โš ๏ธ No .env file found - will use system environment variables") + print(" To create one, see SETUP.md") + + except ImportError: + print("โš ๏ธ python-dotenv not installed - skipping .env file loading") + print(" Install with: pip install python-dotenv") + + # Step 3: Verify required environment variables + print("\n๐Ÿ“‹ Environment Variables:") + print("-" * 60) + + # Check OPENAI_API_KEY + openai_key = os.getenv("OPENAI_API_KEY") + if openai_key: + print(f"โœ… OPENAI_API_KEY: Set ({openai_key[:8]}...)") + else: + print("โŒ OPENAI_API_KEY: Not set") + if require_openai_key: + raise ValueError( + "OPENAI_API_KEY not found. Please:\n" + "1. Create a .env file in python-recipes/context-engineering/\n" + "2. Add: OPENAI_API_KEY=your-key-here\n" + "3. See SETUP.md for detailed instructions" + ) + + # Check REDIS_URL + redis_url = os.getenv("REDIS_URL", "redis://localhost:6379") + print(f"โœ… REDIS_URL: {redis_url}") + + # Check AGENT_MEMORY_URL + memory_url = os.getenv("AGENT_MEMORY_URL", "http://localhost:8088") + print(f"โœ… AGENT_MEMORY_URL: {memory_url}") + + # Step 4: Check Agent Memory Server if required + if require_memory_server: + print("\n๐Ÿ” Checking Agent Memory Server...") + print("-" * 60) + try: + import requests + response = requests.get(f"{memory_url}/health", timeout=2) + if response.status_code == 200: + print(f"โœ… Agent Memory Server is running at {memory_url}") + else: + print(f"โš ๏ธ Agent Memory Server returned status {response.status_code}") + raise RuntimeError( + f"Agent Memory Server is not healthy. Please run:\n" + f" cd python-recipes/context-engineering\n" + f" docker-compose up -d" + ) + except ImportError: + print("โš ๏ธ requests library not installed - skipping health check") + print(" Install with: pip install requests") + except Exception as e: + print(f"โŒ Could not connect to Agent Memory Server: {e}") + raise RuntimeError( + f"Agent Memory Server is not accessible at {memory_url}\n" + f"Please run:\n" + f" cd python-recipes/context-engineering\n" + f" docker-compose up -d\n" + f"Then verify with: curl {memory_url}/health" + ) + + print("\n" + "=" * 60) + print("โœ… Notebook setup complete!") + print("=" * 60) + + +if __name__ == "__main__": + # Test the setup + setup_notebook(require_openai_key=True, require_memory_server=False) + diff --git a/python-recipes/context-engineering/notebooks_archive/common_setup_revised.py b/python-recipes/context-engineering/notebooks_archive/common_setup_revised.py new file mode 100644 index 00000000..e97c9960 --- /dev/null +++ b/python-recipes/context-engineering/notebooks_archive/common_setup_revised.py @@ -0,0 +1,419 @@ +""" +Enhanced common setup code for Context Engineering notebooks. + +This module provides a comprehensive setup function that: +1. Installs the redis_context_course package if needed +2. Loads environment variables from .env file with multiple fallback locations +3. Verifies required environment variables are set with interactive fallbacks +4. Checks service availability and configures appropriate learning modes +5. Provides helpful error messages and troubleshooting guidance +6. Supports offline/demo modes for environments without full service access + +Usage in notebooks: + import sys + sys.path.insert(0, '..') + from common_setup_revised import setup_notebook + + # Basic setup + config = setup_notebook() + + # Setup with specific requirements + config = setup_notebook( + require_openai_key=True, + require_memory_server=True, + require_redis=True + ) +""" + +import os +import sys +import subprocess +import getpass +import warnings +from pathlib import Path +from typing import Dict, Optional, Tuple, Any + + +class SetupConfig: + """Configuration object returned by setup_notebook.""" + + def __init__(self): + self.learning_mode = "demo" + self.services = { + "redis": False, + "memory_server": False, + "openai": False, + "package": False + } + self.environment = {} + self.setup_successful = False + self.warnings = [] + self.recommendations = [] + + +def setup_notebook( + require_openai_key: bool = False, + require_memory_server: bool = False, + require_redis: bool = False, + interactive: bool = True, + verbose: bool = True +) -> SetupConfig: + """ + Set up the notebook environment with comprehensive configuration. + + Args: + require_openai_key: If True, raises error if OPENAI_API_KEY is not available + require_memory_server: If True, requires Agent Memory Server to be accessible + require_redis: If True, requires Redis to be accessible + interactive: If True, allows interactive prompts for missing configuration + verbose: If True, prints detailed setup information + + Returns: + SetupConfig object with setup results and configuration + """ + config = SetupConfig() + + if verbose: + print("๐Ÿ”ง Enhanced Context Engineering Environment Setup") + print("=" * 60) + + # Step 1: Install package if needed + config.services["package"] = _install_package_if_needed(verbose) + + # Step 2: Load environment variables + config.environment = _load_environment_variables(verbose) + + # Step 3: Configure API keys + _configure_api_keys(config, interactive, verbose) + + # Step 4: Check service availability + _check_service_availability(config, verbose) + + # Step 5: Determine learning mode + _determine_learning_mode(config, verbose) + + # Step 6: Validate requirements + _validate_requirements( + config, require_openai_key, require_memory_server, require_redis + ) + + # Step 7: Provide recommendations + _generate_recommendations(config, verbose) + + if verbose: + print("\n" + "=" * 60) + if config.setup_successful: + print("โœ… Notebook setup complete!") + else: + print("โš ๏ธ Setup completed with limitations") + print("=" * 60) + + return config + + +def _install_package_if_needed(verbose: bool) -> bool: + """Install the redis_context_course package if not already available.""" + try: + import redis_context_course + if verbose: + print("โœ… redis_context_course package already installed") + return True + except ImportError: + if verbose: + print("๐Ÿ“ฆ Installing redis_context_course package...") + + # Find the reference-agent directory + notebook_dir = Path.cwd() + possible_paths = [ + notebook_dir / ".." / ".." / "reference-agent", + notebook_dir / ".." / "reference-agent", + notebook_dir / "reference-agent", + ] + + reference_agent_path = None + for path in possible_paths: + if path.exists() and (path / "pyproject.toml").exists(): + reference_agent_path = path.resolve() + break + + if not reference_agent_path: + if verbose: + print("โŒ Could not find reference-agent directory") + print(" Expected locations:") + for path in possible_paths: + print(f" {path}") + return False + + # Install the package + try: + result = subprocess.run( + [sys.executable, "-m", "pip", "install", "-q", "-e", str(reference_agent_path)], + capture_output=True, + text=True, + check=True + ) + if verbose: + print(f"โœ… Package installed from {reference_agent_path}") + return True + except subprocess.CalledProcessError as e: + if verbose: + print(f"โŒ Installation failed: {e.stderr}") + print(" You may need to install manually:") + print(f" pip install -e {reference_agent_path}") + return False + + +def _load_environment_variables(verbose: bool) -> Dict[str, str]: + """Load environment variables from .env files with fallback locations.""" + env_config = {} + + # Try to install and import python-dotenv + try: + from dotenv import load_dotenv + except ImportError: + if verbose: + print("๐Ÿ“ฆ Installing python-dotenv...") + try: + subprocess.run([sys.executable, "-m", "pip", "install", "-q", "python-dotenv"], check=True) + from dotenv import load_dotenv + if verbose: + print("โœ… python-dotenv installed") + except subprocess.CalledProcessError: + if verbose: + print("โš ๏ธ Could not install python-dotenv") + load_dotenv = None + + # Load from .env file if available + if load_dotenv: + notebook_dir = Path.cwd() + env_paths = [ + notebook_dir / ".." / ".." / ".env", # Course root + notebook_dir / ".." / ".env", # Notebooks root + notebook_dir / ".env", # Current directory + ] + + env_file_found = False + for env_path in env_paths: + if env_path.exists(): + load_dotenv(env_path) + if verbose: + print(f"โœ… Loaded environment from: {env_path}") + env_file_found = True + break + + if not env_file_found and verbose: + print("โš ๏ธ No .env file found - using system environment") + + # Set standardized defaults + env_config = { + "REDIS_URL": os.getenv("REDIS_URL", "redis://localhost:6379"), + "AGENT_MEMORY_URL": os.getenv("AGENT_MEMORY_URL", "http://localhost:8088"), # Standardized port + "OPENAI_API_KEY": os.getenv("OPENAI_API_KEY", "") + } + + # Update environment + for key, value in env_config.items(): + if value: + os.environ[key] = value + + return env_config + + +def _configure_api_keys(config: SetupConfig, interactive: bool, verbose: bool) -> None: + """Configure API keys with interactive fallback.""" + openai_key = config.environment.get("OPENAI_API_KEY", "") + + if openai_key and openai_key.startswith("sk-"): + config.services["openai"] = True + if verbose: + print(f"โœ… OpenAI API Key: {openai_key[:8]}...") + return + + if verbose: + print("๐Ÿ”‘ OpenAI API Key not found in environment") + + if interactive: + if verbose: + print("\nTo get an OpenAI API key:") + print("1. Visit https://platform.openai.com/api-keys") + print("2. Sign in or create an account") + print("3. Click 'Create new secret key'") + print("4. Copy the key (starts with 'sk-')") + + try: + user_key = getpass.getpass("\nEnter your OpenAI API key (or press Enter to continue in demo mode): ") + if user_key.strip() and user_key.startswith("sk-"): + os.environ["OPENAI_API_KEY"] = user_key.strip() + config.environment["OPENAI_API_KEY"] = user_key.strip() + config.services["openai"] = True + if verbose: + print("โœ… OpenAI API Key configured for this session") + elif user_key.strip(): + if verbose: + print("โš ๏ธ Invalid API key format (should start with 'sk-')") + print(" Continuing in demo mode...") + else: + if verbose: + print("โš ๏ธ No API key provided - continuing in demo mode") + except KeyboardInterrupt: + if verbose: + print("\nโš ๏ธ Skipping API key configuration") + else: + if verbose: + print(" Non-interactive mode - continuing without OpenAI API key") + + +def _check_service_availability(config: SetupConfig, verbose: bool) -> None: + """Check which services are available.""" + if verbose: + print("\n๐Ÿ” Checking Service Availability") + print("-" * 40) + + # Check Redis + try: + import redis + r = redis.from_url(config.environment["REDIS_URL"]) + r.ping() + config.services["redis"] = True + if verbose: + print("โœ… Redis: Available and responsive") + except Exception as e: + if verbose: + print(f"โŒ Redis: Not available ({type(e).__name__})") + + # Check Agent Memory Server + try: + import requests + response = requests.get(f"{config.environment['AGENT_MEMORY_URL']}/v1/health", timeout=3) + if response.status_code == 200: + config.services["memory_server"] = True + if verbose: + print("โœ… Agent Memory Server: Available and healthy") + else: + if verbose: + print(f"โŒ Agent Memory Server: Unhealthy (HTTP {response.status_code})") + except Exception as e: + if verbose: + print(f"โŒ Agent Memory Server: Not available ({type(e).__name__})") + + +def _determine_learning_mode(config: SetupConfig, verbose: bool) -> None: + """Determine the appropriate learning mode based on available services.""" + services = config.services + + if all(services.values()): + config.learning_mode = "full_interactive" + description = "Full Interactive Mode - All features available" + elif services["package"] and services["redis"] and services["openai"]: + config.learning_mode = "redis_interactive" + description = "Redis Interactive Mode - Course features available" + elif services["package"] and services["redis"]: + config.learning_mode = "redis_demo" + description = "Redis Demo Mode - Course catalog available" + elif services["package"]: + config.learning_mode = "package_demo" + description = "Package Demo Mode - Models and utilities available" + else: + config.learning_mode = "conceptual" + description = "Conceptual Mode - Architecture and design patterns" + + os.environ["LEARNING_MODE"] = config.learning_mode + + if verbose: + print(f"\n๐ŸŽฏ Learning Mode: {description}") + + +def _validate_requirements( + config: SetupConfig, + require_openai_key: bool, + require_memory_server: bool, + require_redis: bool +) -> None: + """Validate that required services are available.""" + missing_requirements = [] + + if require_openai_key and not config.services["openai"]: + missing_requirements.append("OpenAI API key") + + if require_memory_server and not config.services["memory_server"]: + missing_requirements.append("Agent Memory Server") + + if require_redis and not config.services["redis"]: + missing_requirements.append("Redis") + + if missing_requirements: + config.setup_successful = False + config.warnings.append(f"Missing required services: {', '.join(missing_requirements)}") + raise RuntimeError( + f"Required services not available: {', '.join(missing_requirements)}\n" + f"Please set up the missing services and try again." + ) + else: + config.setup_successful = True + + +def _generate_recommendations(config: SetupConfig, verbose: bool) -> None: + """Generate setup recommendations based on current configuration.""" + if config.learning_mode == "full_interactive": + if verbose: + print("\n๐ŸŽ‰ Perfect setup! All features are available.") + return + + recommendations = [] + + if not config.services["package"]: + recommendations.append("๐Ÿ“ฆ Install package: pip install -e ../../reference-agent") + + if not config.services["redis"]: + recommendations.append("๐Ÿ”ง Start Redis: docker run -d -p 6379:6379 redis:8-alpine") + + if not config.services["memory_server"]: + recommendations.append("๐Ÿง  Start Memory Server: docker-compose up -d (from course root)") + + if not config.services["openai"]: + recommendations.append("๐Ÿ”‘ Configure OpenAI API key in environment") + + config.recommendations = recommendations + + if verbose and recommendations: + print("\n๐Ÿ’ก To unlock more features:") + for rec in recommendations: + print(f" {rec}") + + +# Convenience functions for common setups +def setup_basic() -> SetupConfig: + """Basic setup without strict requirements.""" + return setup_notebook( + require_openai_key=False, + require_memory_server=False, + require_redis=False + ) + + +def setup_with_redis() -> SetupConfig: + """Setup requiring Redis for course search features.""" + return setup_notebook( + require_openai_key=False, + require_memory_server=False, + require_redis=True + ) + + +def setup_full_interactive() -> SetupConfig: + """Setup requiring all services for full interactive experience.""" + return setup_notebook( + require_openai_key=True, + require_memory_server=True, + require_redis=True + ) + + +if __name__ == "__main__": + # Test the setup + print("Testing enhanced setup...") + config = setup_notebook() + print(f"\nSetup result: {config.learning_mode}") + print(f"Services: {config.services}") + if config.recommendations: + print(f"Recommendations: {config.recommendations}") diff --git a/python-recipes/context-engineering/notebooks_archive/section-1-introduction/01_what_is_context_engineering.ipynb b/python-recipes/context-engineering/notebooks_archive/section-1-introduction/01_what_is_context_engineering.ipynb new file mode 100644 index 00000000..4df83799 --- /dev/null +++ b/python-recipes/context-engineering/notebooks_archive/section-1-introduction/01_what_is_context_engineering.ipynb @@ -0,0 +1,763 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "![Redis](https://redis.io/wp-content/uploads/2024/04/Logotype.svg?auto=webp&quality=85,75&width=120)\n", + "\n", + "# What is Context Engineering?\n", + "\n", + "## Introduction\n", + "\n", + "**Context Engineering** is the discipline of designing, implementing, and optimizing context management systems for AI agents and applications. It's the practice of ensuring that AI systems have the right information, at the right time, in the right format to make intelligent decisions and provide relevant responses.\n", + "\n", + "Think of context engineering as the \"memory and awareness system\" for AI agents - it's what allows them to:\n", + "- Remember past conversations and experiences\n", + "- Understand their role and capabilities\n", + "- Access relevant information from large knowledge bases\n", + "- Maintain coherent, personalized interactions over time\n", + "\n", + "## Why Context Engineering Matters\n", + "\n", + "Without proper context engineering, AI agents are like people with severe amnesia - they can't remember what happened five minutes ago, don't know who they're talking to, and can't learn from experience. This leads to:\n", + "\n", + "\u274c **Poor User Experience**\n", + "- Repetitive conversations\n", + "- Lack of personalization\n", + "- Inconsistent responses\n", + "\n", + "\u274c **Inefficient Operations**\n", + "- Redundant processing\n", + "- Inability to build on previous work\n", + "- Lost context between sessions\n", + "\n", + "\u274c **Limited Capabilities**\n", + "- Can't handle complex, multi-step tasks\n", + "- No learning or adaptation\n", + "- Poor integration with existing systems\n", + "\n", + "## Core Components of Context Engineering\n", + "\n", + "Context engineering involves several key components working together:\n", + "\n", + "### 1. **System Context**\n", + "What the AI should know about itself and its environment:\n", + "- Role and responsibilities\n", + "- Available tools and capabilities\n", + "- Operating constraints and guidelines\n", + "- Domain-specific knowledge\n", + "\n", + "### 2. **Memory Management**\n", + "How information is stored, retrieved, and maintained:\n", + "- **Working memory**: Persistent storage focused on the current task, including conversation context and task-related data\n", + "- **Long-term memory**: Knowledge learned across sessions, such as user preferences and important facts\n", + "\n", + "### 3. **Context Retrieval**\n", + "How relevant information is found and surfaced:\n", + "- Semantic search and similarity matching\n", + "- Relevance ranking and filtering\n", + "- Context window management\n", + "\n", + "### 4. **Context Integration**\n", + "How different types of context are combined:\n", + "- Merging multiple information sources\n", + "- Resolving conflicts and inconsistencies\n", + "- Prioritizing information by importance\n", + "\n", + "## Real-World Example: University Class Agent\n", + "\n", + "Let's explore context engineering through a practical example - a university class recommendation agent. This agent helps students find courses, plan their academic journey, and provides personalized recommendations.\n", + "\n", + "### Without Context Engineering\n", + "```\n", + "Student: \"I'm interested in programming courses\"\n", + "Agent: \"Here are all programming courses: CS101, CS201, CS301...\"\n", + "\n", + "Student: \"I prefer online courses\"\n", + "Agent: \"Here are all programming courses: CS101, CS201, CS301...\"\n", + "\n", + "Student: \"What about my major requirements?\"\n", + "Agent: \"I don't know your major. Here are all programming courses...\"\n", + "```\n", + "\n", + "### With Context Engineering\n", + "```\n", + "Student: \"I'm interested in programming courses\"\n", + "Agent: \"Great! I can help you find programming courses. Let me search our catalog...\n", + " Based on your Computer Science major and beginner level, I recommend:\n", + " - CS101: Intro to Programming (online, matches your preference)\n", + " - CS102: Data Structures (hybrid option available)\"\n", + "\n", + "Student: \"Tell me more about CS101\"\n", + "Agent: \"CS101 is perfect for you! It's:\n", + " - Online format (your preference)\n", + " - Beginner-friendly\n", + " - Required for your CS major\n", + " - No prerequisites needed\n", + " - Taught by Prof. Smith (highly rated)\"\n", + "```\n", + "\n", + "## Environment Setup\n", + "\n", + "Before we explore context engineering in action, let's set up our environment with the necessary dependencies and connections." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "execution": { + "iopub.execute_input": "2025-10-26T23:33:51.045786Z", + "iopub.status.busy": "2025-10-26T23:33:51.045650Z", + "iopub.status.idle": "2025-10-26T23:33:55.248593Z", + "shell.execute_reply": "2025-10-26T23:33:55.248104Z" + } + }, + "outputs": [], + "source": [ + "# Install the Redis Context Course package\n", + "%pip install --upgrade -q -e ../../reference-agent" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "b## Required API Keys" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": { + "ExecuteTime": { + "end_time": "2025-10-26T23:50:19.183946Z", + "start_time": "2025-10-26T23:50:19.179782Z" + } + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "env: OPENAI_API_KEY=\""\n" + ] + } + ], + "source": [ + " %env OPENAI_API_KEY=os.getenv(\"OPENAI_API_KEY\")\n" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": { + "ExecuteTime": { + "end_time": "2025-10-26T23:50:19.462939Z", + "start_time": "2025-10-26T23:50:19.460950Z" + } + }, + "outputs": [], + "source": [ + "import os\n", + "\n", + "# Non-interactive check for OpenAI key\n", + "if not os.getenv(\"OPENAI_API_KEY\"):\n", + " print(\"OPENAI_API_KEY is not set. Some examples that call OpenAI will be skipped.\")\n" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": { + "ExecuteTime": { + "end_time": "2025-10-26T23:50:19.881374Z", + "start_time": "2025-10-26T23:50:19.879859Z" + } + }, + "outputs": [], + "source": [ + "# Setup Redis (uncomment if running in Colab)\n", + "# !curl -fsSL https://packages.redis.io/gpg | sudo gpg --dearmor -o /usr/share/keyrings/redis-archive-keyring.gpg\n", + "# !echo \"deb [signed-by=/usr/share/keyrings/redis-archive-keyring.gpg] https://packages.redis.io/deb $(lsb_release -cs) main\" | sudo tee /etc/apt/sources.list.d/redis.list\n", + "# !sudo apt-get update > /dev/null 2>&1\n", + "# !sudo apt-get install redis-server > /dev/null 2>&1\n", + "# !redis-server --daemonize yes\n", + "\n", + "# Set Redis URL\n", + "os.environ[\"REDIS_URL\"] = \"redis://localhost:6379\"" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": { + "ExecuteTime": { + "end_time": "2025-10-26T23:50:20.774039Z", + "start_time": "2025-10-26T23:50:20.226267Z" + } + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Redis connection: \u2705 Connected\n", + "\u2705 Redis Context Course package imported successfully\n" + ] + } + ], + "source": [ + "# Import the Redis Context Course components\n", + "from redis_context_course.models import Course, StudentProfile, DifficultyLevel, CourseFormat\n", + "from redis_context_course import MemoryClient\n", + "from redis_context_course.course_manager import CourseManager\n", + "from redis_context_course.redis_config import redis_config\n", + "\n", + "# Check Redis connection\n", + "redis_available = redis_config.health_check()\n", + "print(f\"Redis connection: {'\u2705 Connected' if redis_available else '\u274c Failed'}\")\n", + "print(\"\u2705 Redis Context Course package imported successfully\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Context Engineering in Action\n", + "\n", + "Now that our environment is ready, let's explore the different types of context our agent manages:" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### 1. System Context Example\n", + "\n", + "System context defines what the agent knows about itself. This is typically provided as a system prompt:" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": { + "ExecuteTime": { + "end_time": "2025-10-26T23:50:21.366783Z", + "start_time": "2025-10-26T23:50:21.364576Z" + } + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\ud83e\udd16 System Prompt Example:\n", + "============================================================\n", + "\n", + "You are a helpful university class recommendation agent for Redis University.\n", + "Your role is to help students find courses, plan their academic journey, and\n", + "answer questions about the course catalog.\n", + "\n", + "## Your Responsibilities\n", + "\n", + "- Help students discover courses that match their interests and goals\n", + "- Provide accurate information about course content, prerequisites, and\n", + " schedules\n", + "- Remember student preferences and use them to personalize recommendations\n", + "- Guide students toward courses that align with their major requirements\n", + "\n", + "## Important Constraints\n", + "\n", + "- Only recommend courses that exist in the course catalog (use the\n", + " search_courses tool to verify)\n", + "- Always check prerequisites before recommending a course\n", + "- Respect student preferences for course format (online, in-person, hybrid)\n", + "- Be honest when you don't know something - don't make up course information\n", + "- If a student asks about a course that doesn't exist, help them find similar\n", + " alternatives\n", + "\n", + "## Interaction Guidelines\n", + "\n", + "- Be friendly, encouraging, and supportive\n", + "- Ask clarifying questions when student requests are vague\n", + "- Explain your reasoning when making recommendations\n", + "- Keep responses concise but informative\n", + "- Use the student's name when you know it\n", + "\n", + "## Tools Available\n", + "\n", + "You have access to tools for searching the course catalog and managing student\n", + "memories. Use these tools to provide accurate, personalized recommendations.\n", + "\n", + "============================================================\n", + "\n", + "This system prompt will be included in every conversation turn,\n", + "giving the LLM consistent instructions about its role and behavior.\n" + ] + } + ], + "source": [ + "# Example of a system prompt - the agent's instructions and constraints\n", + "system_prompt = \"\"\"\n", + "You are a helpful university class recommendation agent for Redis University.\n", + "Your role is to help students find courses, plan their academic journey, and\n", + "answer questions about the course catalog.\n", + "\n", + "## Your Responsibilities\n", + "\n", + "- Help students discover courses that match their interests and goals\n", + "- Provide accurate information about course content, prerequisites, and\n", + " schedules\n", + "- Remember student preferences and use them to personalize recommendations\n", + "- Guide students toward courses that align with their major requirements\n", + "\n", + "## Important Constraints\n", + "\n", + "- Only recommend courses that exist in the course catalog (use the\n", + " search_courses tool to verify)\n", + "- Always check prerequisites before recommending a course\n", + "- Respect student preferences for course format (online, in-person, hybrid)\n", + "- Be honest when you don't know something - don't make up course information\n", + "- If a student asks about a course that doesn't exist, help them find similar\n", + " alternatives\n", + "\n", + "## Interaction Guidelines\n", + "\n", + "- Be friendly, encouraging, and supportive\n", + "- Ask clarifying questions when student requests are vague\n", + "- Explain your reasoning when making recommendations\n", + "- Keep responses concise but informative\n", + "- Use the student's name when you know it\n", + "\n", + "## Tools Available\n", + "\n", + "You have access to tools for searching the course catalog and managing student\n", + "memories. Use these tools to provide accurate, personalized recommendations.\n", + "\"\"\"\n", + "\n", + "print(\"\ud83e\udd16 System Prompt Example:\")\n", + "print(\"=\" * 60)\n", + "print(system_prompt)\n", + "print(\"=\" * 60)\n", + "print(\"\\nThis system prompt will be included in every conversation turn,\")\n", + "print(\"giving the LLM consistent instructions about its role and behavior.\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### 2. Student Context Example\n", + "\n", + "Student context represents what the agent knows about the user:" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": { + "ExecuteTime": { + "end_time": "2025-10-26T23:50:22.070789Z", + "start_time": "2025-10-26T23:50:22.068030Z" + } + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\ud83d\udc64 Student Context:\n", + "Name: Arsene Wenger\n", + "Major: Computer Science (Year 2)\n", + "Completed: 3 courses\n", + "Current: 2 courses\n", + "Interests: machine learning, web development, data science\n", + "Preferences: online, intermediate level\n" + ] + } + ], + "source": [ + "# Example student profile - user context\n", + "student = StudentProfile(\n", + " name=\"Arsene Wenger\",\n", + " email=\"arsene.wenger@university.edu\",\n", + " major=\"Computer Science\",\n", + " year=2,\n", + " completed_courses=[\"CS101\", \"MATH101\", \"ENG101\"],\n", + " current_courses=[\"CS201\", \"MATH201\"],\n", + " interests=[\"machine learning\", \"web development\", \"data science\"],\n", + " preferred_format=CourseFormat.ONLINE,\n", + " preferred_difficulty=DifficultyLevel.INTERMEDIATE,\n", + " max_credits_per_semester=15\n", + ")\n", + "\n", + "print(\"\ud83d\udc64 Student Context:\")\n", + "print(f\"Name: {student.name}\")\n", + "print(f\"Major: {student.major} (Year {student.year})\")\n", + "print(f\"Completed: {len(student.completed_courses)} courses\")\n", + "print(f\"Current: {len(student.current_courses)} courses\")\n", + "print(f\"Interests: {', '.join(student.interests)}\")\n", + "print(f\"Preferences: {student.preferred_format.value}, {student.preferred_difficulty.value} level\")\n", + "import os" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### 3. Memory Context Example\n", + "\n", + "Memory context includes past conversations and stored knowledge. Our agent uses the Agent Memory Server to store and retrieve memories.\n", + "\n", + "**Note:** This requires the Agent Memory Server to be running. See Section 3 notebooks for detailed memory operations." + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": { + "ExecuteTime": { + "end_time": "2025-10-26T23:50:23.834512Z", + "start_time": "2025-10-26T23:50:23.405978Z" + } + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "19:50:53 httpx INFO HTTP Request: POST http://localhost:8088/v1/long-term-memory/ \"HTTP/1.1 200 OK\"\n", + "\ud83e\udde0 Memory Context Stored:\n", + "\u2705 Preference stored\n", + "\u2705 Goal stored\n", + "\u2705 Academic performance noted\n", + "19:50:53 httpx INFO HTTP Request: POST http://localhost:8088/v1/long-term-memory/search?optimize_query=false \"HTTP/1.1 200 OK\"\n", + "\n", + "\ud83d\udd0d Retrieved 3 relevant memories:\n", + " \u2022 [MemoryTypeEnum.SEMANTIC] User is interested in machine learning...\n", + " \u2022 [MemoryTypeEnum.SEMANTIC] User is interested in machine learning...\n", + " \u2022 [MemoryTypeEnum.SEMANTIC] User is interested in machine learning...\n" + ] + } + ], + "source": [ + "\n", + "from agent_memory_client import MemoryAPIClient as MemoryClient, MemoryClientConfig\n", + "from agent_memory_client.models import MemoryTypeEnum\n", + "from agent_memory_client.models import ClientMemoryRecord\n", + "\n", + "# Initialize memory client\n", + "config = MemoryClientConfig(\n", + " base_url=os.getenv(\"AGENT_MEMORY_URL\", \"http://localhost:8088\"),\n", + " default_namespace=\"redis_university\"\n", + ")\n", + "memory_client = MemoryClient(config=config)\n", + "\n", + "# Example of storing different types of memories\n", + "async def demonstrate_memory_context():\n", + " await memory_client.create_long_term_memory([\n", + " ClientMemoryRecord(\n", + " text=\"I prefer online courses because I work part-time\",\n", + " memory_type=MemoryTypeEnum.SEMANTIC,\n", + " topics=[\"preferences\", \"schedule\"]\n", + " ),\n", + " ClientMemoryRecord(\n", + " text=\"I want to specialize in machine learning and AI\",\n", + " memory_type=MemoryTypeEnum.SEMANTIC,\n", + " topics=[\"goals\", \"career\"]\n", + " ),\n", + " ClientMemoryRecord(\n", + " text=\"Student struggled with calculus but excelled in programming courses\",\n", + " memory_type=MemoryTypeEnum.SEMANTIC,\n", + " topics=[\"academic_performance\", \"strengths\"]\n", + " )])\n", + " \n", + " print(\"\ud83e\udde0 Memory Context Stored:\")\n", + " print(\"\u2705 Preference stored\")\n", + " print(\"\u2705 Goal stored\")\n", + " print(\"\u2705 Academic performance noted\")\n", + " \n", + " # Retrieve relevant memories using semantic search\n", + " results = await memory_client.search_long_term_memory(\n", + " text=\"course recommendations for machine learning\",\n", + " namespace={\"eq\": \"redis_university\"},\n", + " limit=3\n", + " )\n", + " \n", + " print(f\"\\n\ud83d\udd0d Retrieved {len(results.memories)} relevant memories:\")\n", + " for memory in results.memories:\n", + " print(f\" \u2022 [{memory.memory_type}] {memory.text[:60]}...\")\n", + "\n", + "# Run the memory demonstration\n", + "await demonstrate_memory_context()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Context Integration in Practice\n", + "\n", + "Now let's see how all these context types work together to construct the actual prompt sent to the LLM:" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": { + "ExecuteTime": { + "end_time": "2025-10-26T23:50:26.023473Z", + "start_time": "2025-10-26T23:50:25.634781Z" + } + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\ud83c\udfaf Context Integration: Building the Complete Prompt\n", + "======================================================================\n", + "\n", + "\ud83d\udcdd User Query: 'What courses should I take next semester?'\n", + "\n", + "\ud83d\udd0d Step 1: Searching long-term memory...\n", + "19:50:54 httpx INFO HTTP Request: POST http://localhost:8088/v1/long-term-memory/search?optimize_query=false \"HTTP/1.1 200 OK\"\n", + " Found 3 relevant memories\n", + "\n", + "\ud83d\udc64 Step 2: Loading student profile...\n", + " Profile loaded\n", + "\n", + "\ud83d\udd27 Step 3: Assembling complete prompt...\n", + "\n", + "======================================================================\n", + "\ud83d\udccb COMPLETE ASSEMBLED PROMPT (sent to LLM):\n", + "======================================================================\n", + "SYSTEM PROMPT:\n", + "\n", + "You are a helpful university class recommendation agent for Redis University.\n", + "Your role is to help students find courses, plan their academic journey, and\n", + "answer questions about the course catalog.\n", + "\n", + "## Your Responsibilities\n", + "\n", + "- Help students discover courses that match their interests and goals\n", + "- Provide accurate information about course content, prerequisites, and\n", + " schedules\n", + "- Remember student preferences and use them to personalize recommendations\n", + "- Guide students toward courses that align with their major requirements\n", + "\n", + "## Important Constraints\n", + "\n", + "- Only recommend courses that exist in the course catalog (use the\n", + " search_courses tool to verify)\n", + "- Always check prerequisites before recommending a course\n", + "- Respect student preferences for course format (online, in-person, hybrid)\n", + "- Be honest when you don't know something - don't make up course information\n", + "- If a student asks about a course that doesn't exist, help them find similar\n", + " alternatives\n", + "\n", + "## Interaction Guidelines\n", + "\n", + "- Be friendly, encouraging, and supportive\n", + "- Ask clarifying questions when student requests are vague\n", + "- Explain your reasoning when making recommendations\n", + "- Keep responses concise but informative\n", + "- Use the student's name when you know it\n", + "\n", + "## Tools Available\n", + "\n", + "You have access to tools for searching the course catalog and managing student\n", + "memories. Use these tools to provide accurate, personalized recommendations.\n", + "\n", + "\n", + "STUDENT PROFILE:\n", + "Name: Arsene Wenger\n", + "Major: Computer Science (Year 2)\n", + "Completed Courses: CS101, MATH101, ENG101\n", + "Current Courses: CS201, MATH201\n", + "Interests: machine learning, web development, data science\n", + "Preferred Format: online\n", + "Preferred Difficulty: intermediate\n", + "\n", + "POTENTIALLY RELEVANT MEMORIES:\n", + "- User prefers math and science courses.\n", + "- User prefers math courses.\n", + "- User is interested in math, engineering, data science, machine learning, statistics, and English courses.\n", + "\n", + "USER QUERY:\n", + "What courses should I take next semester?\n", + "\n", + "Please provide a helpful response based on the student's profile, memories, and query.\n", + "======================================================================\n", + "\n", + "\ud83d\udca1 Key Points:\n", + " \u2022 System prompt defines the agent's role and constraints\n", + " \u2022 Student profile provides current context about the user\n", + " \u2022 Memories add relevant information from past conversations\n", + " \u2022 User query is the current request\n", + " \u2022 All assembled into a single prompt for the LLM\n" + ] + } + ], + "source": [ + "async def demonstrate_context_integration():\n", + " \"\"\"\n", + " This demonstrates how we assemble different context sources into a complete prompt.\n", + " \"\"\"\n", + " print(\"\ud83c\udfaf Context Integration: Building the Complete Prompt\")\n", + " print(\"=\" * 70)\n", + "\n", + " # 1. Student asks for recommendations\n", + " user_query = \"What courses should I take next semester?\"\n", + " print(f\"\\n\ud83d\udcdd User Query: '{user_query}'\")\n", + "\n", + " # 2. Retrieve relevant memories\n", + " print(\"\\n\ud83d\udd0d Step 1: Searching long-term memory...\")\n", + " memory_results = await memory_client.search_long_term_memory(\n", + " text=user_query,\n", + " namespace={\"eq\": \"redis_university\"},\n", + " limit=3\n", + " )\n", + "\n", + " # Format memories for the prompt\n", + " memories_text = \"\\n\".join([\n", + " f\"- {memory.text}\"\n", + " for memory in memory_results.memories\n", + " ]) if memory_results.memories else \"No relevant memories found.\"\n", + "\n", + " print(f\" Found {len(memory_results.memories)} relevant memories\")\n", + "\n", + " # 3. Get student profile information\n", + " print(\"\\n\ud83d\udc64 Step 2: Loading student profile...\")\n", + " # Using the student profile we created earlier\n", + " student_context = f\"\"\"Name: {student.name}\n", + "Major: {student.major} (Year {student.year})\n", + "Completed Courses: {', '.join(student.completed_courses)}\n", + "Current Courses: {', '.join(student.current_courses)}\n", + "Interests: {', '.join(student.interests)}\n", + "Preferred Format: {student.preferred_format.value}\n", + "Preferred Difficulty: {student.preferred_difficulty.value}\"\"\"\n", + "\n", + " print(\" Profile loaded\")\n", + "\n", + " # 4. Assemble the complete prompt\n", + " print(\"\\n\ud83d\udd27 Step 3: Assembling complete prompt...\")\n", + "\n", + " # This is the actual prompt that would be sent to the LLM\n", + " complete_prompt = f\"\"\"SYSTEM PROMPT:\n", + "{system_prompt}\n", + "\n", + "STUDENT PROFILE:\n", + "{student_context}\n", + "\n", + "POTENTIALLY RELEVANT MEMORIES:\n", + "{memories_text}\n", + "\n", + "USER QUERY:\n", + "{user_query}\n", + "\n", + "Please provide a helpful response based on the student's profile, memories, and query.\"\"\"\n", + "\n", + " # 5. Display the assembled prompt\n", + " print(\"\\n\" + \"=\" * 70)\n", + " print(\"\ud83d\udccb COMPLETE ASSEMBLED PROMPT (sent to LLM):\")\n", + " print(\"=\" * 70)\n", + " print(complete_prompt)\n", + " print(\"=\" * 70)\n", + "\n", + " print(\"\\n\ud83d\udca1 Key Points:\")\n", + " print(\" \u2022 System prompt defines the agent's role and constraints\")\n", + " print(\" \u2022 Student profile provides current context about the user\")\n", + " print(\" \u2022 Memories add relevant information from past conversations\")\n", + " print(\" \u2022 User query is the current request\")\n", + " print(\" \u2022 All assembled into a single prompt for the LLM\")\n", + "\n", + "await demonstrate_context_integration()\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Key Takeaways\n", + "\n", + "From this introduction to context engineering, we can see several important principles:\n", + "\n", + "### 1. **Context is Multi-Dimensional**\n", + "- **System context**: What the AI knows about itself\n", + "- **User context**: What the AI knows about the user\n", + "- **Domain context**: What the AI knows about the subject matter\n", + "- **Conversation context**: What has been discussed recently\n", + "- **Historical context**: What has been learned over time\n", + "\n", + "Some of these sources are static, updated only when the agent's code changes,\n", + "while others may be retrieved dynamically from external sources, such as\n", + "via APIs or vector search.\n", + "\n", + "### 2. **Memory is Essential**\n", + "- **Working memory**: Maintains conversation flow and task-related context\n", + "- **Long-term memory**: Enables learning and personalization across sessions\n", + "\n", + "### 3. **Context Must Be Actionable**\n", + "- Information is only valuable if it can improve responses\n", + "- Context should be prioritized by relevance and importance -- this is often done through scoring and filtering\n", + "- The system must be able to integrate multiple context sources\n", + "\n", + "### 4. **Context Engineering is Iterative**\n", + "- Systems improve as they gather more context -- though as we'll see in the course, you there are limits\n", + "- Context quality affects response quality\n", + "- Feedback loops help refine context management\n", + "\n", + "## Next Steps\n", + "\n", + "In the next notebook, we'll explore **The Role of a Context Engine** - the technical infrastructure that makes context engineering possible. We'll dive deeper into:\n", + "\n", + "- Vector databases and semantic search\n", + "- Memory architectures and storage patterns\n", + "- Context retrieval and ranking algorithms\n", + "- Integration with LLMs and agent frameworks\n", + "\n", + "## Try It Yourself\n", + "\n", + "Experiment with the concepts we've covered:\n", + "\n", + "1. **Modify the student profile** - Change interests, preferences, or academic history\n", + "2. **Add new memory types** - Store different kinds of information\n", + "3. **Experiment with context retrieval** - Try different queries and see what memories are retrieved\n", + "4. **Think about your own use case** - How would context engineering apply to your domain?\n", + "\n", + "The power of context engineering lies in its ability to make AI systems more intelligent, personalized, and useful. As we'll see in the following notebooks, the technical implementation of these concepts using Redis, LangGraph, and modern AI tools makes it possible to build sophisticated, context-aware applications." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.12.6" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} \ No newline at end of file diff --git a/python-recipes/context-engineering/notebooks_archive/section-1-introduction/02_project_overview.ipynb b/python-recipes/context-engineering/notebooks_archive/section-1-introduction/02_project_overview.ipynb new file mode 100644 index 00000000..d46e554f --- /dev/null +++ b/python-recipes/context-engineering/notebooks_archive/section-1-introduction/02_project_overview.ipynb @@ -0,0 +1,513 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "![Redis](https://redis.io/wp-content/uploads/2024/04/Logotype.svg?auto=webp&quality=85,75&width=120)\n", + "\n", + "# Project Overview: Redis University Class Agent\n", + "\n", + "## Introduction\n", + "\n", + "Throughout this course, we'll be building and exploring a complete **Redis University Class Agent** - a sophisticated AI agent that helps students find courses, plan their academic journey, and provides personalized recommendations.\n", + "\n", + "This project serves as a comprehensive example of context engineering principles in action, demonstrating how to build intelligent, context-aware AI systems using Redis, LangGraph, and modern AI tools.\n", + "\n", + "## Project Goals\n", + "\n", + "Our Redis University Class Agent is designed to:\n", + "\n", + "### ๐ŸŽฏ **Primary Objectives**\n", + "- **Help students discover relevant courses** based on their interests and goals\n", + "- **Provide personalized recommendations** considering academic history and preferences\n", + "- **Remember student context** across multiple conversations and sessions\n", + "- **Answer questions** about courses, prerequisites, and academic planning\n", + "- **Adapt and learn** from student interactions over time\n", + "\n", + "### ๐Ÿ“š **Educational Objectives**\n", + "- **Demonstrate context engineering concepts** in a real-world scenario\n", + "- **Show Redis capabilities** for AI applications and memory management\n", + "- **Illustrate LangGraph workflows** for complex agent behaviors\n", + "- **Provide a reference implementation** for similar projects\n", + "- **Teach best practices** for building context-aware AI systems\n", + "\n", + "## System Architecture\n", + "\n", + "Our agent follows a modern, scalable architecture:\n", + "\n", + "```\n", + "โ”Œโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ” โ”Œโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ” โ”Œโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”\n", + "โ”‚ User Input โ”‚โ”€โ”€โ”€โ–ถโ”‚ LangGraph โ”‚โ”€โ”€โ”€โ–ถโ”‚ OpenAI GPT โ”‚\n", + "โ”‚ (CLI/API) โ”‚ โ”‚ Agent โ”‚ โ”‚ (LLM) โ”‚\n", + "โ””โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”˜ โ””โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”˜ โ””โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”˜\n", + " โ”‚\n", + " โ–ผ\n", + "โ”Œโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”\n", + "โ”‚ Redis Context Engine โ”‚\n", + "โ”œโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ฌโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ฌโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ค\n", + "โ”‚ Short-term โ”‚ Long-term โ”‚ Course Catalog โ”‚\n", + "โ”‚ Memory โ”‚ Memory โ”‚ (Vector Search) โ”‚\n", + "โ”‚ (Checkpointer) โ”‚ (Vector Store) โ”‚ โ”‚\n", + "โ””โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ดโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ดโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”˜\n", + "```\n", + "\n", + "### Key Components\n", + "\n", + "1. **LangGraph Agent**: Orchestrates the conversation flow and decision-making\n", + "2. **Redis Context Engine**: Manages all context and memory operations\n", + "3. **OpenAI Integration**: Provides language understanding and generation\n", + "4. **Tool System**: Enables the agent to search, recommend, and remember\n", + "5. **CLI Interface**: Provides an interactive way to chat with the agent\n", + "\n", + "## Core Features\n", + "\n", + "Let's explore the key features our agent provides:" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": { + "execution": { + "iopub.execute_input": "2025-10-26T23:30:09.764622Z", + "iopub.status.busy": "2025-10-26T23:30:09.764507Z", + "iopub.status.idle": "2025-10-26T23:30:13.962418Z", + "shell.execute_reply": "2025-10-26T23:30:13.961860Z" + } + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r\n", + "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m A new release of pip is available: \u001b[0m\u001b[31;49m24.2\u001b[0m\u001b[39;49m -> \u001b[0m\u001b[32;49m25.2\u001b[0m\r\n", + "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m To update, run: \u001b[0m\u001b[32;49mpip install --upgrade pip\u001b[0m\r\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Note: you may need to restart the kernel to use updated packages.\n" + ] + } + ], + "source": [ + "# Install the Redis Context Course package\n", + "%pip install -q -e ../../reference-agent\n", + "\n", + "# Or install from PyPI (when available)\n", + "# %pip install -q redis-context-course" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": { + "execution": { + "iopub.execute_input": "2025-10-26T23:30:13.963877Z", + "iopub.status.busy": "2025-10-26T23:30:13.963739Z", + "iopub.status.idle": "2025-10-26T23:30:13.966752Z", + "shell.execute_reply": "2025-10-26T23:30:13.966218Z" + } + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "โš ๏ธ Non-interactive environment detected. Using dummy OPENAI_API_KEY for demonstration.\n" + ] + } + ], + "source": [ + "import os\n", + "import sys\n", + "\n", + "# Set up environment - handle both interactive and CI environments\n", + "def _set_env(key: str):\n", + " if key not in os.environ:\n", + " # Check if we're in an interactive environment\n", + " if hasattr(sys.stdin, 'isatty') and sys.stdin.isatty():\n", + " import getpass\n", + " os.environ[key] = getpass.getpass(f\"{key}: \")\n", + " else:\n", + " # Non-interactive environment (like CI) - use a dummy key\n", + " print(f\"โš ๏ธ Non-interactive environment detected. Using dummy {key} for demonstration.\")\n", + " os.environ[key] = \"sk-dummy-key-for-testing-purposes-only\"\n", + "\n", + "_set_env(\"OPENAI_API_KEY\")\n", + "os.environ[\"REDIS_URL\"] = \"redis://localhost:6379\"" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Feature 1: Intelligent Course Search\n", + "\n", + "The agent can search through course catalogs using both semantic and structured search:" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Intelligent course search\n", + "- Semantic vector search (OpenAI embeddings) with RedisVL\n", + "- Structured filters (department, difficulty, format)\n", + "- Hybrid search and relevance ranking\n", + "\n", + "Example:\n", + "```python\n", + "# Initialize once at the top of your notebook\n", + "from redis_context_course.course_manager import CourseManager\n", + "course_manager = CourseManager()\n", + "\n", + "# Run a semantic search\n", + "results = course_manager.search(\"machine learning\", limit=3) # method name may vary\n", + "for r in results:\n", + " print(r.code, r.title)\n", + "```\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Feature 2: Personalized Recommendations\n", + "\n", + "The agent provides personalized course recommendations based on student profiles and preferences:" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Personalized recommendations\n", + "- Combines interests, history, prerequisites, and preferences\n", + "- Ranks courses and explains each recommendation\n", + "\n", + "Example:\n", + "```python\n", + "from redis_context_course.models import StudentProfile, CourseFormat, DifficultyLevel\n", + "profile = StudentProfile(\n", + " name=\"Alex Johnson\", major=\"Computer Science\", year=2,\n", + " completed_courses=[\"CS101\", \"MATH101\"],\n", + " interests=[\"machine learning\", \"web development\"],\n", + " preferred_format=CourseFormat.ONLINE,\n", + " preferred_difficulty=DifficultyLevel.INTERMEDIATE,\n", + ")\n", + "recs = course_manager.recommend(profile, k=3) # method name may vary\n", + "for c in recs:\n", + " print(c.code, c.title)\n", + "```\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Feature 3: Persistent Memory System\n", + "\n", + "The agent remembers student interactions and builds context over time:" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Persistent memory system\n", + "- Stores preferences, goals, experiences, and key conversation summaries\n", + "- Supports store, retrieve, consolidate, update, and expire operations\n", + "\n", + "Example:\n", + "```python\n", + "from agent_memory_client import MemoryClient, MemoryClientConfig\n", + "cfg = MemoryClientConfig(base_url=\"http://localhost:8088\", default_namespace=\"redis_university\")\n", + "mem = MemoryClient(config=cfg)\n", + "mem.store(entity_id=\"alex\", kind=\"preference\", text=\"Prefers online courses\")\n", + "related = mem.search(entity_id=\"alex\", query=\"online\", k=3)\n", + "```\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Feature 4: LangGraph Workflow\n", + "\n", + "The agent uses LangGraph for sophisticated workflow orchestration:" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### LangGraph workflow\n", + "```text\n", + "โ”Œโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”\n", + "โ”‚ User Input โ”‚\n", + "โ””โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ฌโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”˜\n", + " โ”‚\n", + " โ–ผ\n", + "โ”Œโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ” (retrieve context)\n", + "โ”‚ Retrieve โ”‚โ—„โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€\n", + "โ”‚ Context โ”‚\n", + "โ””โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ฌโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”˜\n", + " โ”‚\n", + " โ–ผ\n", + "โ”Œโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ” (uses tools when needed)\n", + "โ”‚ Agent Reasoning โ”‚\n", + "โ””โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ฌโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”˜\n", + " โ”‚\n", + " โ–ผ\n", + "โ”Œโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ” (checkpointer + long-term)\n", + "โ”‚ Store Memory โ”‚\n", + "โ””โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”˜\n", + "```\n", + "\n", + "Tools: search courses, get recommendations, store preferences/goals, fetch student context.\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Feature 5: Interactive CLI Interface\n", + "\n", + "The agent provides a rich command-line interface for easy interaction:" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Interactive CLI interface\n", + "\n", + "- Rich formatting, history, and help\n", + "- Typing indicators, markdown rendering, friendly errors\n", + "\n", + "Example session:\n", + "```text\n", + "You: I'm interested in machine learning courses\n", + "Agent: Recommends top matches and explains why\n", + "You: I prefer online courses\n", + "Agent: Filters to online options and remembers the preference\n", + "```\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Technical Implementation\n", + "\n", + "Let's examine the technical stack and implementation details:" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Technology stack\n", + "- AI/ML: OpenAI GPT for generation; text-embedding-3-small for embeddings; LangChain + LangGraph\n", + "- Data & Storage: Redis 8 (vectors + metadata), RedisVL; LangGraph checkpointing in Redis\n", + "- Development: Python 3.8+, Pydantic, Rich/Click, asyncio\n", + "- Quality: Pytest, Black, isort, MyPy\n", + "\n", + "### Architecture patterns\n", + "- Repository: isolate data access (CourseManager, MemoryClient)\n", + "- Strategy: multiple search/retrieval strategies (semantic, keyword, hybrid)\n", + "- Observer: state persistence & consolidation via Redis checkpointer\n", + "- Factory: constructors for memories and course artifacts\n", + "\n", + "### Performance notes\n", + "- Subโ€‘ms Redis ops; typical vector search <50 ms; retrieval <100 ms; endโ€‘toโ€‘end response <2 s\n", + "- Scales horizontally with Redis and stateless workers\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Getting Started with the Project\n", + "\n", + "Here's how to set up and run the Redis University Class Agent:" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Prerequisites\n", + "- Python 3.8+\n", + "- Redis 8 (local or Docker)\n", + "- OpenAI API key\n", + "\n", + "### Setup\n", + "1. Clone and enter the project\n", + "\n", + " ```bash\n", + " git clone https://github.com/redis-developer/redis-ai-resources.git\n", + " cd redis-ai-resources/python-recipes/context-engineering/reference-agent\n", + " ```\n", + "\n", + "2. Install dependencies\n", + "\n", + " ```bash\n", + " pip install -r requirements.txt\n", + " ```\n", + "\n", + "3. Configure environment\n", + "\n", + " ```bash\n", + " cp .env.example .env\n", + " # edit .env to set OPENAI_API_KEY and REDIS_URL\n", + " ```\n", + "\n", + "4. Start Redis (Docker example)\n", + "\n", + " ```bash\n", + " docker run -d --name redis -p 6379:6379 redis:8-alpine\n", + " ```\n", + "\n", + "5. Seed and ingest sample data\n", + "\n", + " ```bash\n", + " python scripts/generate_courses.py --courses-per-major 15\n", + " python scripts/ingest_courses.py --catalog course_catalog.json --clear\n", + " ```\n", + "\n", + "6. Start the agent\n", + "\n", + " ```bash\n", + " python src/cli.py --student-id your_name\n", + " ```\n", + "\n", + "### Verify\n", + "- Redis connection reports Healthy\n", + "- Course catalog shows 50+ courses\n", + "- Agent greets and can search for \"programming\"\n", + "- Preferences persist across messages\n", + "\n", + "### Next steps\n", + "- Continue to Section 2: System Context\n", + "- Try different queries and explore the code\n", + "- Extend the agent with new tools\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Learning Objectives\n", + "\n", + "By working with this project, you'll learn:" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### What you'll learn\n", + "- Context engineering principles and patterns\n", + "- Designing context-aware agents with LangGraph\n", + "- Using Redis 8 and RedisVL for vector search and state\n", + "- Building and evaluating retrieval and memory strategies\n", + "\n", + "### Skills you'll build\n", + "- Agent workflow design and tool integration\n", + "- Memory modeling (short-term, long-term, consolidation)\n", + "- Performance tuning for vector search and retrieval\n", + "- Robustness: error handling, persistence, observability\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Course Roadmap\n", + "\n", + "Here's what we'll cover in the upcoming sections:" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Course roadmap\n", + "\n", + "- Section 1: Introduction (current)\n", + " - What is Context Engineering?\n", + " - Project Overview: Redis University Class Agent\n", + "\n", + "- Section 2: Setting up System Context\n", + " - Prepping the system context\n", + " - Defining available tools\n", + "\n", + "- Section 3: Memory Management\n", + " - Working memory with extraction strategies\n", + " - Long-term memory and integration\n", + " - Memory tools\n", + "\n", + "- Section 4: Optimizations\n", + " - Context window management\n", + " - Retrieval strategies and grounding\n", + " - Tool optimization\n", + " - Crafting data for LLMs\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Conclusion\n", + "\n", + "The Redis University Class Agent represents a comprehensive example of context engineering in practice. It demonstrates how to build intelligent, context-aware AI systems that can:\n", + "\n", + "- **Remember and learn** from user interactions\n", + "- **Provide personalized experiences** based on individual needs\n", + "- **Scale efficiently** using Redis as the context engine\n", + "- **Integrate seamlessly** with modern AI frameworks\n", + "- **Maintain consistency** across multiple sessions and conversations\n", + "\n", + "As we progress through this course, you'll gain hands-on experience with each component of the system, learning not just how to build context-aware AI agents, but understanding the principles and patterns that make them effective.\n", + "\n", + "## Ready to Continue?\n", + "\n", + "Now that you understand the project overview and architecture, you're ready to dive into the technical implementation. In **Section 2: Setting up System Context**, we'll explore:\n", + "\n", + "- How to define what your AI agent should know about itself\n", + "- Techniques for crafting effective system prompts\n", + "- Methods for defining and managing agent tools\n", + "- Best practices for setting capability boundaries\n", + "\n", + "Let's continue building your expertise in context engineering! ๐Ÿš€" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.12.6" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} diff --git a/python-recipes/context-engineering/notebooks_archive/section-2-system-context/01_system_instructions.ipynb b/python-recipes/context-engineering/notebooks_archive/section-2-system-context/01_system_instructions.ipynb new file mode 100644 index 00000000..630a03a6 --- /dev/null +++ b/python-recipes/context-engineering/notebooks_archive/section-2-system-context/01_system_instructions.ipynb @@ -0,0 +1,638 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# System Instructions: Crafting Effective System Prompts\n", + "\n", + "## Introduction\n", + "\n", + "In this notebook, you'll learn how to craft effective system prompts that define your agent's behavior, personality, and capabilities. System instructions are the foundation of your agent's context - they tell the LLM what it is, what it can do, and how it should behave.\n", + "\n", + "### What You'll Learn\n", + "\n", + "- What system instructions are and why they matter\n", + "- What belongs in system context vs. retrieved context\n", + "- How to structure effective system prompts\n", + "- How to set agent personality and constraints\n", + "- How different instructions affect agent behavior\n", + "\n", + "### Prerequisites\n", + "\n", + "- Completed Section 1 notebooks\n", + "- Redis 8 running locally\n", + "- OpenAI API key set" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Concepts: System Instructions\n", + "\n", + "### What Are System Instructions?\n", + "\n", + "System instructions (also called system prompts) are the **persistent context** that defines your agent's identity and behavior. They are included in every conversation turn and tell the LLM:\n", + "\n", + "1. **Who it is** - Role and identity\n", + "2. **What it can do** - Capabilities and tools\n", + "3. **How it should behave** - Personality and constraints\n", + "4. **What it knows** - Domain knowledge and context\n", + "\n", + "### System Context vs. Retrieved Context\n", + "\n", + "| System Context | Retrieved Context |\n", + "|----------------|-------------------|\n", + "| **Static** - Same for every turn | **Dynamic** - Changes per query |\n", + "| **Role & behavior** | **Specific facts** |\n", + "| **Always included** | **Conditionally included** |\n", + "| **Examples:** Agent role, capabilities, guidelines | **Examples:** Course details, user preferences, memories |\n", + "\n", + "### Why System Instructions Matter\n", + "\n", + "Good system instructions:\n", + "- โœ… Keep the agent focused on its purpose\n", + "- โœ… Prevent unwanted behaviors\n", + "- โœ… Ensure consistent personality\n", + "- โœ… Guide tool usage\n", + "- โœ… Set user expectations\n", + "\n", + "Poor system instructions:\n", + "- โŒ Lead to off-topic responses\n", + "- โŒ Cause inconsistent behavior\n", + "- โŒ Result in tool misuse\n", + "- โŒ Create confused or unhelpful agents" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Setup" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "๐Ÿ”ง Environment Setup\n", + "==============================\n", + "OpenAI API Key: โœ… Set\n" + ] + } + ], + "source": [ + "# Environment setup\n", + "import os\n", + "from dotenv import load_dotenv\n", + "\n", + "# Load environment variables\n", + "load_dotenv()\n", + "OPENAI_API_KEY = os.getenv(\"OPENAI_API_KEY\")\n", + "\n", + "print(\"๐Ÿ”ง Environment Setup\")\n", + "print(\"=\" * 30)\n", + "print(f\"OpenAI API Key: {'โœ… Set' if OPENAI_API_KEY else 'โŒ Not set'}\")" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "โœ… Setup complete!\n" + ] + } + ], + "source": [ + "import os\n", + "from langchain_openai import ChatOpenAI\n", + "from langchain_core.messages import SystemMessage, HumanMessage\n", + "\n", + "# Initialize LLM\n", + "llm = ChatOpenAI(model=\"gpt-4o\", temperature=0.7)\n", + "\n", + "print(\"โœ… Setup complete!\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Hands-on: Building System Instructions.\n", + "Let's build system instructions for our Redis University Class Agent step by step." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Example 1: Minimal System Instructions\n", + "\n", + "Let's start with the bare minimum and see what happens." + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Response with minimal instructions:\n", + "Of course! I'd be happy to help you plan your classes. Could you please provide some more information to get started? Here are a few questions to consider:\n", + "\n", + "1. **Major/Minor**: What is your major or field of study? Are there any specific requirements you need to fulfill?\n", + "\n", + "2. **Current Progress**: How far along are you in your program (e.g., freshman, sophomore, junior, senior)? Are there any prerequisites or core courses you still need to complete?\n", + "\n", + "3. **Interests**: Are there any particular subjects or courses you're interested in taking, even if they're outside your major?\n", + "\n", + "4. **Schedule Preferences**: Do you prefer morning or afternoon classes? Are there any days you need to keep free for work or other commitments?\n", + "\n", + "5. **Credits**: How many credits are you planning to take next semester?\n", + "\n", + "6. **Advising**: Have you met with your academic advisor to discuss your degree plan and ensure you're on track?\n", + "\n", + "Once you provide some more details, I can offer more specific guidance!\n", + "\n", + "================================================================================\n", + "\n" + ] + } + ], + "source": [ + "# Minimal system prompt\n", + "minimal_prompt = \"You are a helpful assistant.\"\n", + "\n", + "# Test it\n", + "messages = [\n", + " SystemMessage(content=minimal_prompt),\n", + " HumanMessage(content=\"I need help planning my classes for next semester.\")\n", + "]\n", + "\n", + "response = llm.invoke(messages)\n", + "print(\"Response with minimal instructions:\")\n", + "print(response.content)\n", + "print(\"\\n\" + \"=\"*80 + \"\\n\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "**Problem:** The agent doesn't know it's a class scheduling agent. It might give generic advice instead of using our course catalog and tools." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Example 2: Adding Role and Purpose" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Response with role and purpose:\n", + "Of course! I'd be happy to help you plan your classes for next semester. To start, could you provide me with some information about your current academic status and interests? Specifically:\n", + "\n", + "1. What is your major or area of study?\n", + "2. Are there any specific courses you need to take next semester to meet graduation requirements?\n", + "3. Do you have any particular interests or topics you want to explore?\n", + "4. Are there any prerequisites or eligibility requirements you've already completed or need to check?\n", + "5. Do you prefer in-person, online, or hybrid classes?\n", + "\n", + "Once I have this information, I can help you find courses that match your needs and preferences!\n", + "\n", + "================================================================================\n", + "\n" + ] + } + ], + "source": [ + "# Add role and purpose\n", + "role_prompt = \"\"\"You are the Redis University Class Agent.\n", + "\n", + "Your role is to help students:\n", + "- Find courses that match their interests and requirements\n", + "- Plan their academic schedule\n", + "- Check prerequisites and eligibility\n", + "- Get personalized course recommendations\n", + "\"\"\"\n", + "\n", + "# Test it\n", + "messages = [\n", + " SystemMessage(content=role_prompt),\n", + " HumanMessage(content=\"I need help planning my classes for next semester.\")\n", + "]\n", + "\n", + "response = llm.invoke(messages)\n", + "print(\"Response with role and purpose:\")\n", + "print(response.content)\n", + "print(\"\\n\" + \"=\"*80 + \"\\n\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "**Better!** The agent now understands its role, but it still doesn't know about our tools or how to behave." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Example 3: Adding Behavioral Guidelines" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Response to off-topic question:\n", + "I'm here to help you with course planning and academic scheduling. If you're interested in finding courses that match your interests or need help planning your schedule, feel free to ask! For weather information, I recommend checking a reliable weather website or app.\n", + "\n", + "================================================================================\n", + "\n" + ] + } + ], + "source": [ + "# Add behavioral guidelines\n", + "behavior_prompt = \"\"\"You are the Redis University Class Agent.\n", + "\n", + "Your role is to help students:\n", + "- Find courses that match their interests and requirements\n", + "- Plan their academic schedule\n", + "- Check prerequisites and eligibility\n", + "- Get personalized course recommendations\n", + "\n", + "Guidelines:\n", + "- Be helpful, friendly, and encouraging\n", + "- Ask clarifying questions when needed\n", + "- Provide specific course recommendations with details\n", + "- Explain prerequisites and requirements clearly\n", + "- Stay focused on course planning and scheduling\n", + "- If asked about topics outside your domain, politely redirect to course planning\n", + "\"\"\"\n", + "\n", + "# Test with an off-topic question\n", + "messages = [\n", + " SystemMessage(content=behavior_prompt),\n", + " HumanMessage(content=\"What's the weather like today?\")\n", + "]\n", + "\n", + "response = llm.invoke(messages)\n", + "print(\"Response to off-topic question:\")\n", + "print(response.content)\n", + "print(\"\\n\" + \"=\"*80 + \"\\n\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "**Great!** The agent now stays focused on its purpose and redirects off-topic questions." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Example 4: Complete System Instructions\n", + "\n", + "Let's build the complete system instructions for our agent." + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Complete system instructions:\n", + "You are the Redis University Class Agent, powered by Redis and the Agent Memory Server.\n", + "\n", + "Your role is to help students:\n", + "- Find courses that match their interests and requirements\n", + "- Plan their academic schedule for upcoming semesters\n", + "- Check prerequisites and course eligibility\n", + "- Get personalized course recommendations based on their goals\n", + "\n", + "You have access to:\n", + "- A complete course catalog with descriptions, prerequisites, and schedules\n", + "- Student preferences and goals (stored in long-term memory)\n", + "- Conversation history (stored in working memory)\n", + "- Tools to search courses and check prerequisites\n", + "\n", + "Guidelines:\n", + "- Be helpful, friendly, and encouraging\n", + "- Ask clarifying questions when you need more information\n", + "- Provide specific course recommendations with course codes and details\n", + "- Explain prerequisites and requirements clearly\n", + "- Remember student preferences and reference them in future conversations\n", + "- Stay focused on course planning and scheduling\n", + "- If asked about topics outside your domain, politely redirect to course planning\n", + "\n", + "Example interactions:\n", + "- Student: \"I'm interested in machine learning\"\n", + " You: \"Great! I can help you find ML courses. What's your current year and have you taken any programming courses?\"\n", + "\n", + "- Student: \"What are the prerequisites for CS401?\"\n", + " You: \"Let me check that for you.\" [Use check_prerequisites tool]\n", + "\n" + ] + } + ], + "source": [ + "# Complete system instructions\n", + "complete_prompt = \"\"\"You are the Redis University Class Agent, powered by Redis and the Agent Memory Server.\n", + "\n", + "Your role is to help students:\n", + "- Find courses that match their interests and requirements\n", + "- Plan their academic schedule for upcoming semesters\n", + "- Check prerequisites and course eligibility\n", + "- Get personalized course recommendations based on their goals\n", + "\n", + "You have access to:\n", + "- A complete course catalog with descriptions, prerequisites, and schedules\n", + "- Student preferences and goals (stored in long-term memory)\n", + "- Conversation history (stored in working memory)\n", + "- Tools to search courses and check prerequisites\n", + "\n", + "Guidelines:\n", + "- Be helpful, friendly, and encouraging\n", + "- Ask clarifying questions when you need more information\n", + "- Provide specific course recommendations with course codes and details\n", + "- Explain prerequisites and requirements clearly\n", + "- Remember student preferences and reference them in future conversations\n", + "- Stay focused on course planning and scheduling\n", + "- If asked about topics outside your domain, politely redirect to course planning\n", + "\n", + "Example interactions:\n", + "- Student: \"I'm interested in machine learning\"\n", + " You: \"Great! I can help you find ML courses. What's your current year and have you taken any programming courses?\"\n", + "\n", + "- Student: \"What are the prerequisites for CS401?\"\n", + " You: \"Let me check that for you.\" [Use check_prerequisites tool]\n", + "\"\"\"\n", + "\n", + "print(\"Complete system instructions:\")\n", + "print(complete_prompt)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Testing: Compare Different Instructions\n", + "\n", + "Let's test how different system instructions affect agent behavior." + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "================================================================================\n", + "Minimal Instructions:\n", + "================================================================================\n", + "Learning about databases is a great way to enhance your technical skills, as they are fundamental to storing, retrieving, and managing data in many applications. Hereโ€™s a structured approach to get you started:\n", + "\n", + "1. **Understand the Basics:**\n", + " - **What is a Database?** Learn about what databases are, their purpose, and how they are used in applications.\n", + " - **Types of Databases:** Understand the difference between relational databases (SQL) and non-relational databases (NoSQL).\n", + "\n", + "2. **Relational Databases (SQL):**\n", + " - **Key Concepts:** Familiarize yourself with tables, rows, columns, primary keys, foreign keys, and relationships.\n", + " - **SQL Language:** Learn the basics of SQL (Structured Query Language), which is used to interact with relational databases.\n", + " - **Basic SQL Queries:** SELECT, INSERT, UPDATE, DELETE.\n", + " - **Advanced SQL:** Joins, subqueries, indexes, and transactions.\n", + " - **Popular RDBMS:** Explore some popular relational database management systems like MySQL, PostgreSQL, Oracle, and Microsoft SQL Server.\n", + "\n", + "3. **Non-Relational Databases (NoSQL):**\n", + " - **Key Concepts:** Understand the different types of NoSQL databasesโ€”document stores, key-value stores, column-family stores, and graph databases.\n", + " - **Examples:** Familiarize yourself with popular NoSQL databases like MongoDB (document store), Redis (key-value), Cassandra (column-family), and Neo4j (graph).\n", + "\n", + "4. **Database Design:**\n", + " - **Normalization:** Learn about database normalization to organize tables efficiently and reduce redundancy.\n", + " - **ER Models:** Understand Entity-Relationship (ER) models to visually represent the database structure.\n", + " - **Schema Design:** Practice designing schemas based on application requirements.\n", + "\n", + "5. **Practical Experience:**\n", + " - **Hands-On Practice:** Use tools like MySQL Workbench, pgAdmin for PostgreSQL, or MongoDB Compass for MongoDB to experiment with queries and database design.\n", + " - **Build Projects:** Create simple projects like a library management system or a personal finance tracker to apply what youโ€™ve learned.\n", + "\n", + "6. **Learning Resources:**\n", + " - **Books:** Consider books like \"Learning SQL\" by Alan Beaulieu or \"NoSQL Distilled\" by Pramod J. Sadalage and Martin Fowler.\n", + " - **Online Courses:** Platforms like Coursera, Udemy, or Khan Academy offer courses on databases and SQL.\n", + " - **Documentation:** Read the official documentation for databases youโ€™re interested in, as they often provide tutorials and examples.\n", + "\n", + "7. **Advanced Topics (Optional):**\n", + " - **Database Administration:** Learn about backup, recovery, performance tuning, and security.\n", + " - **Distributed Databases:** Understand how distributed databases work and explore systems like Apache Cassandra or Google Cloud Spanner.\n", + " - **Data Warehousing:** Explore concepts related to data warehousing and ETL (Extract, Transform, Load) processes.\n", + "\n", + "By following these steps, youโ€™ll gain a solid understanding of databases and how they are used in real-world applications. Be patient and practice regularly to reinforce your learning.\n", + "\n", + "\n", + "================================================================================\n", + "With Role Instructions:\n", + "================================================================================\n", + "Starting with databases is a great choice, and Redis University offers a variety of courses that can help you build a strong foundation and advance your skills. Hereโ€™s a suggested path to get you started:\n", + "\n", + "1. **RU101: Introduction to Redis Data Structures** \n", + " - This is a beginner-friendly course that introduces you to the core data structures of Redis. It's perfect if you're new to databases and want to understand the fundamentals of how Redis organizes and stores data.\n", + "\n", + "2. **RU102: Redis for Developers**\n", + " - Once you have a basic understanding, this course dives deeper into how to use Redis as a developer. It covers essential concepts such as building applications with Redis and using it to solve common problems.\n", + "\n", + "3. **RU201: Redis Streams**\n", + " - If you're interested in real-time data processing, this course will introduce you to Redis Streams, a powerful feature for building real-time applications.\n", + "\n", + "4. **RU202: RedisJSON**\n", + " - For those interested in working with JSON data, this course explores how RedisJSON can be used to store and query JSON data efficiently.\n", + "\n", + "5. **RU204: Redis Search and Query Engine**\n", + " - This course is ideal if you want to learn how to implement search functionality in your applications using Redis.\n", + "\n", + "6. **RU203: RedisGraph**\n", + " - If you have an interest in graph databases, this course will teach you how to use RedisGraph, a graph database module for Redis.\n", + "\n", + "These courses are designed to provide both theoretical knowledge and practical skills. You can choose based on your interests and what skills you want to focus on. If you have specific goals or interests, let me know, and I can provide more tailored recommendations.\n", + "\n", + "\n", + "================================================================================\n", + "With Behavior Instructions:\n", + "================================================================================\n", + "That's a great choice! Databases are a fundamental part of many applications and systems. At Redis University, we offer a variety of courses that can help you get started and deepen your understanding of databases. To better assist you, could you let me know if you're interested in any specific type of database, such as relational, NoSQL, or perhaps a specific technology like Redis? Also, do you have any prior experience with databases, or are you starting from scratch? This information will help me recommend the most suitable courses for you.\n", + "\n", + "\n", + "================================================================================\n", + "Complete Instructions:\n", + "================================================================================\n", + "Databases are a great area to explore! To get started, it would be helpful to know a little more about your current background. Are you familiar with any programming languages, or have you taken any related courses? Also, are you looking for an introductory course or something more advanced? This will help me recommend the best options for you.\n", + "\n" + ] + } + ], + "source": [ + "# Test query\n", + "test_query = \"I want to learn about databases but I'm not sure where to start.\"\n", + "\n", + "# Test with different prompts\n", + "prompts = {\n", + " \"Minimal\": minimal_prompt,\n", + " \"With Role\": role_prompt,\n", + " \"With Behavior\": behavior_prompt,\n", + " \"Complete\": complete_prompt\n", + "}\n", + "\n", + "for name, prompt in prompts.items():\n", + " messages = [\n", + " SystemMessage(content=prompt),\n", + " HumanMessage(content=test_query)\n", + " ]\n", + " response = llm.invoke(messages)\n", + " print(f\"\\n{'='*80}\")\n", + " print(f\"{name} Instructions:\")\n", + " print(f\"{'='*80}\")\n", + " print(response.content)\n", + " print()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Key Takeaways\n", + "\n", + "### What to Include in System Instructions\n", + "\n", + "1. **Identity & Role**\n", + " - Who the agent is\n", + " - What domain it operates in\n", + "\n", + "2. **Capabilities**\n", + " - What the agent can do\n", + " - What tools/data it has access to\n", + "\n", + "3. **Behavioral Guidelines**\n", + " - How to interact with users\n", + " - When to ask questions\n", + " - How to handle edge cases\n", + "\n", + "4. **Constraints**\n", + " - What the agent should NOT do\n", + " - How to handle out-of-scope requests\n", + "\n", + "5. **Examples** (optional)\n", + " - Sample interactions\n", + " - Expected behavior patterns\n", + "\n", + "### Best Practices\n", + "\n", + "โœ… **Do:**\n", + "- Be specific about the agent's role\n", + "- Include clear behavioral guidelines\n", + "- Set boundaries for out-of-scope requests\n", + "- Use examples to clarify expected behavior\n", + "- Keep instructions concise but complete\n", + "\n", + "โŒ **Don't:**\n", + "- Include dynamic data (use retrieved context instead)\n", + "- Make instructions too long (wastes tokens)\n", + "- Be vague about capabilities\n", + "- Forget to set constraints\n", + "- Include contradictory guidelines" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Exercises\n", + "\n", + "1. **Modify the system instructions** to make the agent more formal and academic in tone. Test it with a few queries.\n", + "\n", + "2. **Add a constraint** that the agent should always ask about the student's year (freshman, sophomore, etc.) before recommending courses. Test if it follows this constraint.\n", + "\n", + "3. **Create system instructions** for a different type of agent (e.g., a library assistant, a gym trainer, a recipe recommender). What changes?\n", + "\n", + "4. **Test edge cases**: Try to make the agent break its guidelines. What happens? How can you improve the instructions?" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Summary\n", + "\n", + "In this notebook, you learned:\n", + "\n", + "- โœ… System instructions define your agent's identity, capabilities, and behavior\n", + "- โœ… System context is static (same every turn) vs. retrieved context is dynamic\n", + "- โœ… Good instructions include: role, capabilities, guidelines, constraints, and examples\n", + "- โœ… Instructions significantly affect agent behavior and consistency\n", + "- โœ… Start simple and iterate based on testing\n", + "\n", + "**Next:** In the next notebook, we'll define tools that give our agent actual capabilities to search courses and check prerequisites." + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.12.6" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} diff --git a/python-recipes/context-engineering/notebooks_archive/section-2-system-context/02_defining_tools.ipynb b/python-recipes/context-engineering/notebooks_archive/section-2-system-context/02_defining_tools.ipynb new file mode 100644 index 00000000..0ec3d555 --- /dev/null +++ b/python-recipes/context-engineering/notebooks_archive/section-2-system-context/02_defining_tools.ipynb @@ -0,0 +1,653 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Defining Tools: Giving Your Agent Capabilities\n", + "\n", + "## Introduction\n", + "\n", + "In this notebook, you'll learn how to define tools that give your agent real capabilities beyond just conversation. Tools allow the LLM to take actions, retrieve data, and interact with external systems.\n", + "\n", + "### What You'll Learn\n", + "\n", + "- What tools are and why they're essential for agents\n", + "- How to define tools with proper schemas\n", + "- How the LLM knows which tool to use\n", + "- How tool descriptions affect LLM behavior\n", + "- Best practices for tool design\n", + "\n", + "### Prerequisites\n", + "\n", + "- Completed `01_system_instructions.ipynb`\n", + "- Redis 8 running locally\n", + "- OpenAI API key set\n", + "- Course data ingested (from Section 1)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Concepts: Tools for AI Agents\n", + "\n", + "### What Are Tools?\n", + "\n", + "Tools are **functions that the LLM can call** to perform actions or retrieve information. They extend the agent's capabilities beyond text generation.\n", + "\n", + "**Without tools:**\n", + "- Agent can only generate text based on its training data\n", + "- No access to real-time data\n", + "- Can't take actions\n", + "- Limited to what's in the prompt\n", + "\n", + "**With tools:**\n", + "- Agent can search databases\n", + "- Agent can retrieve current information\n", + "- Agent can perform calculations\n", + "- Agent can take actions (send emails, create records, etc.)\n", + "\n", + "### How Tool Calling Works\n", + "\n", + "1. **LLM receives** user query + system instructions + available tools\n", + "2. **LLM decides** which tool(s) to call (if any)\n", + "3. **LLM generates** tool call with parameters\n", + "4. **System executes** the tool function\n", + "5. **Tool returns** results\n", + "6. **LLM receives** results and generates response\n", + "\n", + "### Tool Schema Components\n", + "\n", + "Every tool needs:\n", + "1. **Name** - Unique identifier\n", + "2. **Description** - What the tool does (critical for selection!)\n", + "3. **Parameters** - Input schema with types and descriptions\n", + "4. **Function** - The actual implementation\n", + "\n", + "### How LLMs Select Tools\n", + "\n", + "The LLM uses:\n", + "- Tool **names** (should be descriptive)\n", + "- Tool **descriptions** (should explain when to use it)\n", + "- Parameter **descriptions** (should explain what each parameter does)\n", + "- **Context** from the conversation\n", + "\n", + "**Key insight:** The LLM only sees the tool schema, not the implementation!" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Setup" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "๐Ÿ”ง Environment Setup\n", + "==============================\n", + "OpenAI API Key: โœ… Set\n", + "Redis URL: redis://localhost:6379\n" + ] + } + ], + "source": [ + "# Environment setup\n", + "import os\n", + "from typing import List, Optional\n", + "from dotenv import load_dotenv\n", + "\n", + "# Load environment variables\n", + "load_dotenv()\n", + "OPENAI_API_KEY = os.getenv(\"OPENAI_API_KEY\")\n", + "REDIS_URL = os.getenv(\"REDIS_URL\", \"redis://localhost:6379\")\n", + "\n", + "print(\"๐Ÿ”ง Environment Setup\")\n", + "print(\"=\" * 30)\n", + "print(f\"OpenAI API Key: {'โœ… Set' if OPENAI_API_KEY else 'โŒ Not set'}\")\n", + "print(f\"Redis URL: {REDIS_URL}\")" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "16:39:08 redisvl.index.index INFO Index already exists, not overwriting.\n", + "โœ… Setup complete!\n" + ] + } + ], + "source": [ + "import os\n", + "from typing import List, Optional\n", + "from langchain_openai import ChatOpenAI\n", + "from langchain_core.messages import SystemMessage, HumanMessage, AIMessage\n", + "from langchain_core.tools import tool\n", + "from pydantic import BaseModel, Field\n", + "\n", + "# Import our course manager\n", + "from redis_context_course import CourseManager\n", + "\n", + "# Initialize\n", + "llm = ChatOpenAI(model=\"gpt-4o\", temperature=0)\n", + "course_manager = CourseManager()\n", + "\n", + "print(\"โœ… Setup complete!\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Hands-on: Defining Tools\n", + "\n", + "Let's define tools for our class agent step by step." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Tool 1: Search Courses (Basic)\n", + "\n", + "Let's start with a basic tool to search courses." + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Tool defined: search_courses_basic\n", + "Description: Search for courses in the catalog.\n" + ] + } + ], + "source": [ + "# Define parameter schema\n", + "class SearchCoursesInput(BaseModel):\n", + " query: str = Field(description=\"Search query for courses\")\n", + " limit: int = Field(default=5, description=\"Maximum number of results\")\n", + "\n", + "# Define the tool\n", + "@tool(args_schema=SearchCoursesInput)\n", + "async def search_courses_basic(query: str, limit: int = 5) -> str:\n", + " \"\"\"Search for courses in the catalog.\"\"\"\n", + " results = await course_manager.search_courses(query, limit=limit)\n", + " \n", + " if not results:\n", + " return \"No courses found matching your query.\"\n", + " \n", + " output = []\n", + " for course in results:\n", + " output.append(\n", + " f\"{course.course_code}: {course.title}\\n\"\n", + " f\" Credits: {course.credits} | {course.format.value}\\n\"\n", + " f\" {course.description[:100]}...\"\n", + " )\n", + " \n", + " return \"\\n\\n\".join(output)\n", + "\n", + "print(\"Tool defined:\", search_courses_basic.name)\n", + "print(\"Description:\", search_courses_basic.description)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "**Problem:** The description is too vague! The LLM won't know when to use this tool." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Tool 1: Search Courses (Improved)\n", + "\n", + "Let's improve the description to help the LLM understand when to use this tool." + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "โœ… Improved tool defined!\n", + "\n", + "Description:\n", + "Search for courses in the Redis University catalog using semantic search.\n", + "\n", + "Use this tool when students ask about:\n", + "- Finding courses on a specific topic (e.g., \"machine learning courses\")\n", + "- Courses in a department (e.g., \"computer science courses\")\n", + "- Courses with specific characteristics (e.g., \"online courses\", \"3-credit courses\")\n", + "\n", + "The search uses semantic matching, so natural language queries work well.\n" + ] + } + ], + "source": [ + "@tool(args_schema=SearchCoursesInput)\n", + "async def search_courses(query: str, limit: int = 5) -> str:\n", + " \"\"\"\n", + " Search for courses in the Redis University catalog using semantic search.\n", + " \n", + " Use this tool when students ask about:\n", + " - Finding courses on a specific topic (e.g., \"machine learning courses\")\n", + " - Courses in a department (e.g., \"computer science courses\")\n", + " - Courses with specific characteristics (e.g., \"online courses\", \"3-credit courses\")\n", + " \n", + " The search uses semantic matching, so natural language queries work well.\n", + " \"\"\"\n", + " results = await course_manager.search_courses(query, limit=limit)\n", + " \n", + " if not results:\n", + " return \"No courses found matching your query.\"\n", + " \n", + " output = []\n", + " for course in results:\n", + " output.append(\n", + " f\"{course.course_code}: {course.title}\\n\"\n", + " f\" Credits: {course.credits} | {course.format.value} | {course.difficulty_level.value}\\n\"\n", + " f\" {course.description[:150]}...\"\n", + " )\n", + " \n", + " return \"\\n\\n\".join(output)\n", + "\n", + "print(\"โœ… Improved tool defined!\")\n", + "print(\"\\nDescription:\")\n", + "print(search_courses.description)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Tool 2: Get Course Details\n", + "\n", + "A tool to get detailed information about a specific course." + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "โœ… Tool defined: get_course_details\n" + ] + } + ], + "source": [ + "class GetCourseDetailsInput(BaseModel):\n", + " course_code: str = Field(description=\"Course code (e.g., 'CS101', 'MATH201')\")\n", + "\n", + "@tool(args_schema=GetCourseDetailsInput)\n", + "async def get_course_details(course_code: str) -> str:\n", + " \"\"\"\n", + " Get detailed information about a specific course by its course code.\n", + " \n", + " Use this tool when:\n", + " - Student asks about a specific course (e.g., \"Tell me about CS101\")\n", + " - You need prerequisites for a course\n", + " - You need full course details (schedule, instructor, etc.)\n", + " \n", + " Returns complete course information including description, prerequisites,\n", + " schedule, credits, and learning objectives.\n", + " \"\"\"\n", + " course = await course_manager.get_course(course_code)\n", + " \n", + " if not course:\n", + " return f\"Course {course_code} not found.\"\n", + " \n", + " prereqs = \"None\" if not course.prerequisites else \", \".join(\n", + " [f\"{p.course_code} (min grade: {p.min_grade})\" for p in course.prerequisites]\n", + " )\n", + " \n", + " return f\"\"\"\n", + "{course.course_code}: {course.title}\n", + "\n", + "Description: {course.description}\n", + "\n", + "Details:\n", + "- Credits: {course.credits}\n", + "- Department: {course.department}\n", + "- Major: {course.major}\n", + "- Difficulty: {course.difficulty_level.value}\n", + "- Format: {course.format.value}\n", + "- Prerequisites: {prereqs}\n", + "\n", + "Learning Objectives:\n", + "\"\"\" + \"\\n\".join([f\"- {obj}\" for obj in course.learning_objectives])\n", + "\n", + "print(\"โœ… Tool defined:\", get_course_details.name)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Tool 3: Check Prerequisites\n", + "\n", + "A tool to check if a student meets the prerequisites for a course." + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "โœ… Tool defined: check_prerequisites\n" + ] + } + ], + "source": [ + "class CheckPrerequisitesInput(BaseModel):\n", + " course_code: str = Field(description=\"Course code to check prerequisites for\")\n", + " completed_courses: List[str] = Field(\n", + " description=\"List of course codes the student has completed\"\n", + " )\n", + "\n", + "@tool(args_schema=CheckPrerequisitesInput)\n", + "async def check_prerequisites(course_code: str, completed_courses: List[str]) -> str:\n", + " \"\"\"\n", + " Check if a student meets the prerequisites for a specific course.\n", + " \n", + " Use this tool when:\n", + " - Student asks \"Can I take [course]?\"\n", + " - Student asks about prerequisites\n", + " - You need to verify eligibility before recommending a course\n", + " \n", + " Returns whether the student is eligible and which prerequisites are missing (if any).\n", + " \"\"\"\n", + " course = await course_manager.get_course(course_code)\n", + " \n", + " if not course:\n", + " return f\"Course {course_code} not found.\"\n", + " \n", + " if not course.prerequisites:\n", + " return f\"โœ… {course_code} has no prerequisites. You can take this course!\"\n", + " \n", + " missing = []\n", + " for prereq in course.prerequisites:\n", + " if prereq.course_code not in completed_courses:\n", + " missing.append(f\"{prereq.course_code} (min grade: {prereq.min_grade})\")\n", + " \n", + " if not missing:\n", + " return f\"โœ… You meet all prerequisites for {course_code}!\"\n", + " \n", + " return f\"\"\"โŒ You're missing prerequisites for {course_code}:\n", + "\n", + "Missing:\n", + "\"\"\" + \"\\n\".join([f\"- {p}\" for p in missing])\n", + "\n", + "print(\"โœ… Tool defined:\", check_prerequisites.name)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Testing: Using Tools with an Agent\n", + "\n", + "Let's test our tools with the LLM to see how it selects and uses them." + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "โœ… Agent configured with tools!\n" + ] + } + ], + "source": [ + "# Bind tools to LLM\n", + "tools = [search_courses, get_course_details, check_prerequisites]\n", + "llm_with_tools = llm.bind_tools(tools)\n", + "\n", + "# System prompt\n", + "system_prompt = \"\"\"You are the Redis University Class Agent.\n", + "Help students find courses and plan their schedule.\n", + "Use the available tools to search courses and check prerequisites.\n", + "\"\"\"\n", + "\n", + "print(\"โœ… Agent configured with tools!\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Test 1: Search Query" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "16:40:48 httpx INFO HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "User: I'm interested in machine learning courses\n", + "\n", + "Agent decision:\n", + " Tool: search_courses\n", + " Args: {'query': 'machine learning', 'limit': 5}\n" + ] + } + ], + "source": [ + "messages = [\n", + " SystemMessage(content=system_prompt),\n", + " HumanMessage(content=\"I'm interested in machine learning courses\")\n", + "]\n", + "\n", + "response = llm_with_tools.invoke(messages)\n", + "\n", + "print(\"User: I'm interested in machine learning courses\")\n", + "print(\"\\nAgent decision:\")\n", + "if response.tool_calls:\n", + " for tool_call in response.tool_calls:\n", + " print(f\" Tool: {tool_call['name']}\")\n", + " print(f\" Args: {tool_call['args']}\")\n", + "else:\n", + " print(\" No tool called\")\n", + " print(f\" Response: {response.content}\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Test 2: Specific Course Query" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "messages = [\n", + " SystemMessage(content=system_prompt),\n", + " HumanMessage(content=\"Tell me about CS401\")\n", + "]\n", + "\n", + "response = llm_with_tools.invoke(messages)\n", + "\n", + "print(\"User: Tell me about CS401\")\n", + "print(\"\\nAgent decision:\")\n", + "if response.tool_calls:\n", + " for tool_call in response.tool_calls:\n", + " print(f\" Tool: {tool_call['name']}\")\n", + " print(f\" Args: {tool_call['args']}\")\n", + "else:\n", + " print(\" No tool called\")\n", + " print(f\" Response: {response.content}\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Test 3: Prerequisites Query" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "messages = [\n", + " SystemMessage(content=system_prompt),\n", + " HumanMessage(content=\"Can I take CS401? I've completed CS101 and CS201.\")\n", + "]\n", + "\n", + "response = llm_with_tools.invoke(messages)\n", + "\n", + "print(\"User: Can I take CS401? I've completed CS101 and CS201.\")\n", + "print(\"\\nAgent decision:\")\n", + "if response.tool_calls:\n", + " for tool_call in response.tool_calls:\n", + " print(f\" Tool: {tool_call['name']}\")\n", + " print(f\" Args: {tool_call['args']}\")\n", + "else:\n", + " print(\" No tool called\")\n", + " print(f\" Response: {response.content}\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Key Takeaways\n", + "\n", + "### Tool Design Best Practices\n", + "\n", + "1. **Clear Names**\n", + " - Use descriptive, action-oriented names\n", + " - `search_courses` โœ… vs. `find` โŒ\n", + "\n", + "2. **Detailed Descriptions**\n", + " - Explain what the tool does\n", + " - Explain when to use it\n", + " - Include examples\n", + "\n", + "3. **Well-Defined Parameters**\n", + " - Use type hints\n", + " - Add descriptions for each parameter\n", + " - Set sensible defaults\n", + "\n", + "4. **Useful Return Values**\n", + " - Return formatted, readable text\n", + " - Include relevant details\n", + " - Handle errors gracefully\n", + "\n", + "5. **Single Responsibility**\n", + " - Each tool should do one thing well\n", + " - Don't combine unrelated functionality\n", + "\n", + "### How Tool Descriptions Affect Selection\n", + "\n", + "The LLM relies heavily on tool descriptions to decide which tool to use:\n", + "\n", + "- โœ… **Good description**: \"Search for courses using semantic search. Use when students ask about topics, departments, or course characteristics.\"\n", + "- โŒ **Bad description**: \"Search courses\"\n", + "\n", + "**Remember:** The LLM can't see your code, only the schema!" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Exercises\n", + "\n", + "1. **Add a new tool** called `get_courses_by_department` that returns all courses in a specific department. Write a good description.\n", + "\n", + "2. **Test tool selection**: Create queries that should trigger each of your three tools. Does the LLM select correctly?\n", + "\n", + "3. **Improve a description**: Take the `search_courses_basic` tool and improve its description. Test if it changes LLM behavior.\n", + "\n", + "4. **Create a tool** for getting a student's current schedule. What parameters does it need? What should it return?" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Summary\n", + "\n", + "In this notebook, you learned:\n", + "\n", + "- โœ… Tools extend agent capabilities beyond text generation\n", + "- โœ… Tool schemas include name, description, parameters, and implementation\n", + "- โœ… LLMs select tools based on descriptions and context\n", + "- โœ… Good descriptions are critical for correct tool selection\n", + "- โœ… Each tool should have a single, clear purpose\n", + "\n", + "**Next:** In Section 3, we'll add memory to our agent so it can remember user preferences and past conversations." + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.12.6" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} diff --git a/python-recipes/context-engineering/notebooks_archive/section-2-system-context/03_tool_selection_strategies.ipynb b/python-recipes/context-engineering/notebooks_archive/section-2-system-context/03_tool_selection_strategies.ipynb new file mode 100644 index 00000000..7a00731a --- /dev/null +++ b/python-recipes/context-engineering/notebooks_archive/section-2-system-context/03_tool_selection_strategies.ipynb @@ -0,0 +1,623 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Tool Selection Strategies: Improving Tool Choice\n", + "\n", + "## Introduction\n", + "\n", + "In this advanced notebook, you'll learn strategies to improve how LLMs select tools. When you have many tools, the LLM can get confused about which one to use. You'll learn techniques to make tool selection more reliable and accurate.\n", + "\n", + "### What You'll Learn\n", + "\n", + "- Common tool selection failures\n", + "- Strategies to improve tool selection\n", + "- Clear naming conventions\n", + "- Detailed descriptions with examples\n", + "- Testing and debugging tool selection\n", + "\n", + "### Prerequisites\n", + "\n", + "- Completed `02_defining_tools.ipynb`\n", + "- Redis 8 running locally\n", + "- OpenAI API key set\n", + "- Course data ingested" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "jp-MarkdownHeadingCollapsed": true + }, + "source": [ + "## Concepts: Tool Selection Challenges\n", + "\n", + "### The Problem\n", + "\n", + "As you add more tools, the LLM faces challenges:\n", + "\n", + "**With 3 tools:**\n", + "- โœ… Easy to choose\n", + "- โœ… Clear distinctions\n", + "\n", + "**With 10+ tools:**\n", + "- โš ๏ธ Similar-sounding tools\n", + "- โš ๏ธ Overlapping functionality\n", + "- โš ๏ธ Ambiguous queries\n", + "- โš ๏ธ Wrong tool selection\n", + "\n", + "### Common Tool Selection Failures\n", + "\n", + "**1. Similar Names**\n", + "```python\n", + "# Bad: Confusing names\n", + "get_course() # Get one course?\n", + "get_courses() # Get multiple courses?\n", + "search_course() # Search for courses?\n", + "find_courses() # Find courses?\n", + "```\n", + "\n", + "**2. Vague Descriptions**\n", + "```python\n", + "# Bad: Too vague\n", + "def search_courses():\n", + " \"\"\"Search for courses.\"\"\"\n", + " \n", + "# Good: Specific\n", + "def search_courses():\n", + " \"\"\"Search for courses using semantic search.\n", + " Use when students ask about topics, departments, or characteristics.\n", + " Example: 'machine learning courses' or 'online courses'\n", + " \"\"\"\n", + "```\n", + "\n", + "**3. Overlapping Functionality**\n", + "```python\n", + "# Bad: Unclear when to use which\n", + "search_courses(query) # Semantic search\n", + "filter_courses(department) # Filter by department\n", + "find_courses_by_topic(topic) # Find by topic\n", + "\n", + "# Good: One tool with clear parameters\n", + "search_courses(query, filters) # One tool, clear purpose\n", + "```\n", + "\n", + "### How LLMs Select Tools\n", + "\n", + "The LLM considers:\n", + "1. **Tool name** - First impression\n", + "2. **Tool description** - Main decision factor\n", + "3. **Parameter descriptions** - Confirms choice\n", + "4. **Context** - User's query and conversation\n", + "\n", + "**Key insight:** The LLM can't see your code, only the schema!" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Setup" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import os\n", + "from typing import List, Optional, Dict, Any\n", + "from langchain_openai import ChatOpenAI\n", + "from langchain_core.messages import SystemMessage, HumanMessage\n", + "from langchain_core.tools import tool\n", + "from pydantic import BaseModel, Field\n", + "from redis_context_course import CourseManager\n", + "\n", + "# Initialize\n", + "llm = ChatOpenAI(model=\"gpt-4o\", temperature=0)\n", + "course_manager = CourseManager()\n", + "\n", + "print(\"โœ… Setup complete\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Strategy 1: Clear Naming Conventions\n", + "\n", + "Use consistent, descriptive names that clearly indicate what the tool does." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Bad Example: Confusing Names" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Bad: Confusing, similar names\n", + "class GetCourseInput(BaseModel):\n", + " code: str = Field(description=\"Course code\")\n", + "\n", + "@tool(args_schema=GetCourseInput)\n", + "async def get(code: str) -> str:\n", + " \"\"\"Get a course.\"\"\"\n", + " course = await course_manager.get_course(code)\n", + " return str(course) if course else \"Not found\"\n", + "\n", + "@tool(args_schema=GetCourseInput)\n", + "async def fetch(code: str) -> str:\n", + " \"\"\"Fetch a course.\"\"\"\n", + " course = await course_manager.get_course(code)\n", + " return str(course) if course else \"Not found\"\n", + "\n", + "@tool(args_schema=GetCourseInput)\n", + "async def retrieve(code: str) -> str:\n", + " \"\"\"Retrieve a course.\"\"\"\n", + " course = await course_manager.get_course(code)\n", + " return str(course) if course else \"Not found\"\n", + "\n", + "print(\"โŒ BAD: Three tools that do the same thing with vague names!\")\n", + "print(\" - get, fetch, retrieve - which one to use?\")\n", + "print(\" - LLM will be confused\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Good Example: Clear, Descriptive Names" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Good: Clear, specific names\n", + "class SearchCoursesInput(BaseModel):\n", + " query: str = Field(description=\"Natural language search query\")\n", + " limit: int = Field(default=5, description=\"Max results\")\n", + "\n", + "@tool(args_schema=SearchCoursesInput)\n", + "async def search_courses_by_topic(query: str, limit: int = 5) -> str:\n", + " \"\"\"Search courses using semantic search based on topics or descriptions.\"\"\"\n", + " results = await course_manager.search_courses(query, limit=limit)\n", + " return \"\\n\".join([f\"{c.course_code}: {c.title}\" for c in results])\n", + "\n", + "class GetCourseDetailsInput(BaseModel):\n", + " course_code: str = Field(description=\"Specific course code like 'CS101'\")\n", + "\n", + "@tool(args_schema=GetCourseDetailsInput)\n", + "async def get_course_details_by_code(course_code: str) -> str:\n", + " \"\"\"Get detailed information about a specific course by its course code.\"\"\"\n", + " course = await course_manager.get_course(course_code)\n", + " return str(course) if course else \"Course not found\"\n", + "\n", + "class ListCoursesInput(BaseModel):\n", + " department: str = Field(description=\"Department code like 'CS' or 'MATH'\")\n", + "\n", + "@tool(args_schema=ListCoursesInput)\n", + "async def list_courses_by_department(department: str) -> str:\n", + " \"\"\"List all courses in a specific department.\"\"\"\n", + " # Implementation would filter by department\n", + " return f\"Courses in {department} department\"\n", + "\n", + "print(\"โœ… GOOD: Clear, specific names that indicate purpose\")\n", + "print(\" - search_courses_by_topic: For semantic search\")\n", + "print(\" - get_course_details_by_code: For specific course\")\n", + "print(\" - list_courses_by_department: For department listing\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Strategy 2: Detailed Descriptions with Examples\n", + "\n", + "Write descriptions that explain WHEN to use the tool, not just WHAT it does." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Bad Example: Vague Description" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Bad: Vague description\n", + "@tool(args_schema=SearchCoursesInput)\n", + "async def search_courses_bad(query: str, limit: int = 5) -> str:\n", + " \"\"\"Search for courses.\"\"\"\n", + " results = await course_manager.search_courses(query, limit=limit)\n", + " return \"\\n\".join([f\"{c.course_code}: {c.title}\" for c in results])\n", + "\n", + "print(\"โŒ BAD: 'Search for courses' - too vague!\")\n", + "print(\" - When should I use this?\")\n", + "print(\" - What kind of search?\")\n", + "print(\" - What queries work?\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Good Example: Detailed Description with Examples" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Good: Detailed description with examples\n", + "@tool(args_schema=SearchCoursesInput)\n", + "async def search_courses_good(query: str, limit: int = 5) -> str:\n", + " \"\"\"\n", + " Search for courses using semantic search based on topics, descriptions, or characteristics.\n", + " \n", + " Use this tool when students ask about:\n", + " - Topics or subjects: \"machine learning courses\", \"database courses\"\n", + " - Course characteristics: \"online courses\", \"beginner courses\", \"3-credit courses\"\n", + " - General exploration: \"what courses are available in AI?\"\n", + " \n", + " Do NOT use this tool when:\n", + " - Student asks about a specific course code (use get_course_details_by_code instead)\n", + " - Student wants all courses in a department (use list_courses_by_department instead)\n", + " \n", + " The search uses semantic matching, so natural language queries work well.\n", + " \n", + " Examples:\n", + " - \"machine learning courses\" โ†’ finds CS401, CS402, etc.\n", + " - \"beginner programming\" โ†’ finds CS101, CS102, etc.\n", + " - \"online data science courses\" โ†’ finds online courses about data science\n", + " \"\"\"\n", + " results = await course_manager.search_courses(query, limit=limit)\n", + " return \"\\n\".join([f\"{c.course_code}: {c.title}\" for c in results])\n", + "\n", + "print(\"โœ… GOOD: Detailed description with:\")\n", + "print(\" - What it does\")\n", + "print(\" - When to use it\")\n", + "print(\" - When NOT to use it\")\n", + "print(\" - Examples of good queries\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Strategy 3: Parameter Descriptions\n", + "\n", + "Add detailed descriptions to parameters to guide the LLM." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Bad: Minimal parameter descriptions\n", + "class BadInput(BaseModel):\n", + " query: str\n", + " limit: int\n", + "\n", + "print(\"โŒ BAD: No parameter descriptions\")\n", + "print()\n", + "\n", + "# Good: Detailed parameter descriptions\n", + "class GoodInput(BaseModel):\n", + " query: str = Field(\n", + " description=\"Natural language search query. Can be topics (e.g., 'machine learning'), \"\n", + " \"characteristics (e.g., 'online courses'), or general questions \"\n", + " \"(e.g., 'beginner programming courses')\"\n", + " )\n", + " limit: int = Field(\n", + " default=5,\n", + " description=\"Maximum number of results to return. Default is 5. \"\n", + " \"Use 3 for quick answers, 10 for comprehensive results.\"\n", + " )\n", + "\n", + "print(\"โœ… GOOD: Detailed parameter descriptions\")\n", + "print(\" - Explains what the parameter is\")\n", + "print(\" - Gives examples\")\n", + "print(\" - Suggests values\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Testing Tool Selection\n", + "\n", + "Let's test how well the LLM selects tools with different queries." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Create tools with good descriptions\n", + "tools = [\n", + " search_courses_good,\n", + " get_course_details_by_code,\n", + " list_courses_by_department\n", + "]\n", + "\n", + "llm_with_tools = llm.bind_tools(tools)\n", + "\n", + "# Test queries\n", + "test_queries = [\n", + " \"I'm interested in machine learning courses\",\n", + " \"Tell me about CS401\",\n", + " \"What courses does the Computer Science department offer?\",\n", + " \"Show me beginner programming courses\",\n", + " \"What are the prerequisites for CS301?\",\n", + "]\n", + "\n", + "print(\"=\" * 80)\n", + "print(\"TESTING TOOL SELECTION\")\n", + "print(\"=\" * 80)\n", + "\n", + "for query in test_queries:\n", + " messages = [\n", + " SystemMessage(content=\"You are a class scheduling agent. Use the appropriate tool.\"),\n", + " HumanMessage(content=query)\n", + " ]\n", + " \n", + " response = llm_with_tools.invoke(messages)\n", + " \n", + " print(f\"\\nQuery: {query}\")\n", + " if response.tool_calls:\n", + " tool_call = response.tool_calls[0]\n", + " print(f\"โœ… Selected: {tool_call['name']}\")\n", + " print(f\" Args: {tool_call['args']}\")\n", + " else:\n", + " print(\"โŒ No tool selected\")\n", + "\n", + "print(\"\\n\" + \"=\" * 80)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Strategy 4: Testing Edge Cases\n", + "\n", + "Test ambiguous queries to find tool selection issues." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Ambiguous queries that could match multiple tools\n", + "ambiguous_queries = [\n", + " \"What courses are available?\", # Could be search or list\n", + " \"Tell me about CS courses\", # Could be search or list\n", + " \"I want to learn programming\", # Could be search\n", + " \"CS401\", # Just a course code\n", + "]\n", + "\n", + "print(\"=\" * 80)\n", + "print(\"TESTING AMBIGUOUS QUERIES\")\n", + "print(\"=\" * 80)\n", + "\n", + "for query in ambiguous_queries:\n", + " messages = [\n", + " SystemMessage(content=\"You are a class scheduling agent. Use the appropriate tool.\"),\n", + " HumanMessage(content=query)\n", + " ]\n", + " \n", + " response = llm_with_tools.invoke(messages)\n", + " \n", + " print(f\"\\nQuery: '{query}'\")\n", + " if response.tool_calls:\n", + " tool_call = response.tool_calls[0]\n", + " print(f\"Selected: {tool_call['name']}\")\n", + " print(f\"Args: {tool_call['args']}\")\n", + " print(\"Is this the right choice? ๐Ÿค”\")\n", + " else:\n", + " print(\"No tool selected - might ask for clarification\")\n", + "\n", + "print(\"\\n\" + \"=\" * 80)\n", + "print(\"๐Ÿ’ก TIP: If selection is wrong, improve tool descriptions!\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Strategy 5: Reducing Tool Confusion\n", + "\n", + "When you have many similar tools, consider consolidating them." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "print(\"=\" * 80)\n", + "print(\"CONSOLIDATING SIMILAR TOOLS\")\n", + "print(\"=\" * 80)\n", + "\n", + "print(\"\\nโŒ BAD: Many similar tools\")\n", + "print(\" - search_courses_by_topic()\")\n", + "print(\" - search_courses_by_department()\")\n", + "print(\" - search_courses_by_difficulty()\")\n", + "print(\" - search_courses_by_format()\")\n", + "print(\" โ†’ LLM confused about which to use!\")\n", + "\n", + "print(\"\\nโœ… GOOD: One flexible tool\")\n", + "print(\" - search_courses(query, filters={})\")\n", + "print(\" โ†’ One tool, clear purpose, flexible parameters\")\n", + "\n", + "# Example of consolidated tool\n", + "class ConsolidatedSearchInput(BaseModel):\n", + " query: str = Field(description=\"Natural language search query\")\n", + " department: Optional[str] = Field(default=None, description=\"Filter by department (e.g., 'CS')\")\n", + " difficulty: Optional[str] = Field(default=None, description=\"Filter by difficulty (beginner/intermediate/advanced)\")\n", + " format: Optional[str] = Field(default=None, description=\"Filter by format (online/in-person/hybrid)\")\n", + " limit: int = Field(default=5, description=\"Max results\")\n", + "\n", + "@tool(args_schema=ConsolidatedSearchInput)\n", + "async def search_courses_consolidated(\n", + " query: str,\n", + " department: Optional[str] = None,\n", + " difficulty: Optional[str] = None,\n", + " format: Optional[str] = None,\n", + " limit: int = 5\n", + ") -> str:\n", + " \"\"\"\n", + " Search for courses with optional filters.\n", + " \n", + " Use this tool for any course search. You can:\n", + " - Search by topic: query=\"machine learning\"\n", + " - Filter by department: department=\"CS\"\n", + " - Filter by difficulty: difficulty=\"beginner\"\n", + " - Filter by format: format=\"online\"\n", + " - Combine filters: query=\"databases\", department=\"CS\", difficulty=\"intermediate\"\n", + " \"\"\"\n", + " # Implementation would use filters\n", + " return f\"Searching for: {query} with filters\"\n", + "\n", + "print(\"\\nโœ… Benefits of consolidation:\")\n", + "print(\" - Fewer tools = less confusion\")\n", + "print(\" - One clear purpose\")\n", + "print(\" - Flexible with optional parameters\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Key Takeaways\n", + "\n", + "### Naming Conventions\n", + "\n", + "โœ… **Do:**\n", + "- Use descriptive, action-oriented names\n", + "- Include the object/entity in the name\n", + "- Be specific: `search_courses_by_topic` not `search`\n", + "\n", + "โŒ **Don't:**\n", + "- Use vague names: `get`, `fetch`, `find`\n", + "- Create similar-sounding tools\n", + "- Use abbreviations or jargon\n", + "\n", + "### Description Best Practices\n", + "\n", + "Include:\n", + "1. **What it does** - Clear explanation\n", + "2. **When to use it** - Specific scenarios\n", + "3. **When NOT to use it** - Avoid confusion\n", + "4. **Examples** - Show expected inputs\n", + "5. **Edge cases** - Handle ambiguity\n", + "\n", + "### Parameter Descriptions\n", + "\n", + "For each parameter:\n", + "- Explain what it is\n", + "- Give examples\n", + "- Suggest typical values\n", + "- Explain constraints\n", + "\n", + "### Testing Strategy\n", + "\n", + "1. **Test typical queries** - Does it select correctly?\n", + "2. **Test edge cases** - What about ambiguous queries?\n", + "3. **Test similar queries** - Does it distinguish between tools?\n", + "4. **Iterate descriptions** - Improve based on failures\n", + "\n", + "### When to Consolidate Tools\n", + "\n", + "Consolidate when:\n", + "- โœ… Tools have similar purposes\n", + "- โœ… Differences can be parameters\n", + "- โœ… LLM gets confused\n", + "\n", + "Keep separate when:\n", + "- โœ… Fundamentally different operations\n", + "- โœ… Different return types\n", + "- โœ… Clear, distinct use cases" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Exercises\n", + "\n", + "1. **Improve a tool**: Take a tool with a vague description and rewrite it with examples and clear guidance.\n", + "\n", + "2. **Test tool selection**: Create 10 test queries and verify the LLM selects the right tool each time.\n", + "\n", + "3. **Find confusion**: Create two similar tools and test queries that could match either. How can you improve the descriptions?\n", + "\n", + "4. **Consolidate tools**: If you have 5+ similar tools, try consolidating them into 1-2 flexible tools." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Summary\n", + "\n", + "In this notebook, you learned:\n", + "\n", + "- โœ… Clear naming conventions prevent confusion\n", + "- โœ… Detailed descriptions with examples guide tool selection\n", + "- โœ… Parameter descriptions help the LLM use tools correctly\n", + "- โœ… Testing edge cases reveals selection issues\n", + "- โœ… Consolidating similar tools reduces confusion\n", + "\n", + "**Key insight:** Tool selection quality depends entirely on your descriptions. The LLM can't see your code - invest time in writing clear, detailed tool schemas with examples and guidance." + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.12.6" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} diff --git a/python-recipes/context-engineering/notebooks_archive/section-3-memory/01_working_memory.ipynb b/python-recipes/context-engineering/notebooks_archive/section-3-memory/01_working_memory.ipynb new file mode 100644 index 00000000..e59ae4b7 --- /dev/null +++ b/python-recipes/context-engineering/notebooks_archive/section-3-memory/01_working_memory.ipynb @@ -0,0 +1,517 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "![Redis](https://redis.io/wp-content/uploads/2024/04/Logotype.svg?auto=webp&quality=85,75&width=120)\n", + "\n", + "# Working Memory\n", + "\n", + "## Why Agents Need Memory: The Grounding Problem\n", + "\n", + "Before diving into implementation, let's understand the fundamental problem that memory solves.\n", + "\n", + "### The Grounding Problem\n", + "\n", + "**Grounding** means understanding what users are referring to. Natural conversation is full of references:\n", + "\n", + "**Without Memory:**\n", + "```\n", + "User: \"Tell me about CS401\"\n", + "Agent: \"CS401 is Machine Learning. It covers supervised learning...\"\n", + "\n", + "User: \"What are its prerequisites?\"\n", + "Agent: โŒ \"What does 'its' refer to? Please specify which course.\"\n", + "\n", + "User: \"The course we just discussed!\"\n", + "Agent: โŒ \"I don't have access to previous messages. Which course?\"\n", + "```\n", + "\n", + "**This is a terrible user experience.**\n", + "\n", + "### Types of References That Need Grounding\n", + "\n", + "**Pronouns:**\n", + "- \"it\", \"that course\", \"those\", \"this one\"\n", + "- \"he\", \"she\", \"they\" (referring to people)\n", + "\n", + "**Descriptions:**\n", + "- \"the easy one\", \"the online course\"\n", + "- \"my advisor\", \"that professor\"\n", + "\n", + "**Implicit context:**\n", + "- \"Can I take it?\" โ†’ Take what?\n", + "- \"When does it start?\" โ†’ What starts?\n", + "\n", + "**Temporal references:**\n", + "- \"you mentioned\", \"earlier\", \"last time\"\n", + "\n", + "### How Working Memory Provides Grounding\n", + "\n", + "**With Working Memory:**\n", + "```\n", + "User: \"Tell me about CS401\"\n", + "Agent: \"CS401 is Machine Learning. It covers...\"\n", + "[Stores: User asked about CS401]\n", + "\n", + "User: \"What are its prerequisites?\"\n", + "Agent: [Checks memory: \"its\" = CS401]\n", + "Agent: โœ… \"CS401 requires CS201 and MATH301\"\n", + "\n", + "User: \"Can I take it?\"\n", + "Agent: [Checks memory: \"it\" = CS401]\n", + "Agent: [Checks student transcript]\n", + "Agent: โœ… \"You've completed CS201 but still need MATH301\"\n", + "```\n", + "\n", + "**Now the conversation flows naturally!**\n", + "\n", + "### What Working Memory Stores\n", + "\n", + "Working memory maintains the **current conversation context**:\n", + "\n", + "```\n", + "Session: session_123\n", + "Messages:\n", + " 1. User: \"Tell me about CS401\"\n", + " 2. Agent: \"CS401 is Machine Learning...\"\n", + " 3. User: \"What are its prerequisites?\"\n", + " 4. Agent: \"CS401 requires CS201 and MATH301\"\n", + " 5. User: \"Can I take it?\"\n", + " [Current turn - needs context from messages 1-4]\n", + "```\n", + "\n", + "**Each message builds on previous messages.**\n", + "\n", + "### Without Memory: Every Message is Isolated\n", + "\n", + "```\n", + "Turn 1: User asks about CS401\n", + " โ†’ Agent responds\n", + " โ†’ Agent forgets everything โŒ\n", + "\n", + "Turn 2: User asks \"What are its prerequisites?\"\n", + " โ†’ Agent doesn't know what \"its\" refers to โŒ\n", + " โ†’ Conversation breaks โŒ\n", + "```\n", + "\n", + "### The Problem This Notebook Solves\n", + "\n", + "**Working memory** stores conversation messages so that:\n", + "\n", + "โœ… Pronouns can be resolved (\"it\" โ†’ CS401) \n", + "โœ… Context carries forward (knows what was discussed) \n", + "โœ… Multi-turn conversations work naturally \n", + "โœ… Users don't repeat themselves \n", + "\n", + "**Now let's implement this solution.**\n", + "\n", + "### Key Concepts\n", + "\n", + "- **Working Memory**: Session-scoped storage for conversation messages and context\n", + "- **Session Scope**: Working memory is tied to a specific conversation session\n", + "- **Message History**: The sequence of user and assistant messages that form the conversation\n", + "- **Grounding**: Using stored context to understand what users are referring to\n", + "\n", + "### Technical Implementation\n", + "\n", + "Working memory solves the grounding problem by:\n", + "- Storing conversation messages so the LLM can reference earlier parts of the conversation\n", + "- Maintaining task-specific context (like current goals, preferences mentioned in this session)\n", + "- Persisting this information across multiple turns of the conversation\n", + "- Providing a foundation for extracting important information to long-term storage\n", + "\n", + "Because working memory stores messages, we can extract long-term data from it. When using the Agent Memory Server, extraction happens automatically in the background based on a configured strategy that controls what kind of information gets extracted." + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": { + "ExecuteTime": { + "end_time": "2025-10-03T20:32:31.983697Z", + "start_time": "2025-10-03T20:32:28.032067Z" + } + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Note: you may need to restart the kernel to use updated packages.\n" + ] + } + ], + "source": [ + "# Install the Redis Context Course package\n", + "%pip install -q -e ../../reference-agent" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": { + "ExecuteTime": { + "end_time": "2025-10-03T20:32:48.128143Z", + "start_time": "2025-10-03T20:32:48.092640Z" + } + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "โœ… Environment variables loaded\n", + " REDIS_URL: redis://localhost:6379\n", + " AGENT_MEMORY_URL: http://localhost:8000\n", + " OPENAI_API_KEY: โœ“ Set\n" + ] + } + ], + "source": [ + "import os\n", + "from dotenv import load_dotenv\n", + "\n", + "# Load environment variables from .env file\n", + "load_dotenv()\n", + "\n", + "# Verify required environment variables are set\n", + "if not os.getenv(\"OPENAI_API_KEY\"):\n", + " raise ValueError(\n", + " \"OPENAI_API_KEY not found. Please create a .env file with your OpenAI API key. \"\n", + " \"See SETUP.md for instructions.\"\n", + " )\n", + "\n", + "print(\"โœ… Environment variables loaded\")\n", + "print(f\" REDIS_URL: {os.getenv('REDIS_URL', 'redis://localhost:6379')}\")\n", + "print(f\" AGENT_MEMORY_URL: {os.getenv('AGENT_MEMORY_URL', 'http://localhost:8000')}\")\n", + "print(f\" OPENAI_API_KEY: {'โœ“ Set' if os.getenv('OPENAI_API_KEY') else 'โœ— Not set'}\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 1. Working Memory Structure\n", + "\n", + "Working memory contains the essential context for the current conversation:\n", + "\n", + "- **Messages**: The conversation history (user and assistant messages)\n", + "- **Session ID**: Identifies this specific conversation\n", + "- **User ID**: Identifies the user across sessions\n", + "- **Task Data**: Optional task-specific context (current goals, temporary state)\n", + "\n", + "This structure gives the LLM everything it needs to understand the current conversation context.\n", + "\n", + "Let's import the memory client to work with working memory:" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": { + "ExecuteTime": { + "end_time": "2025-10-02T22:01:32.779633Z", + "start_time": "2025-10-02T22:01:32.776671Z" + } + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "โœ… Memory server client imported successfully\n" + ] + } + ], + "source": [ + "from redis_context_course import MemoryClient\n", + "\n", + "print(\"โœ… Memory server client imported successfully\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 2. Storing and Retrieving Conversation Context\n", + "\n", + "Let's see how working memory stores and retrieves conversation context:" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": { + "ExecuteTime": { + "end_time": "2025-10-02T22:01:39.218627Z", + "start_time": "2025-10-02T22:01:39.167246Z" + } + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "โœ… Memory client initialized successfully\n", + "๐Ÿ“Š User ID: demo_student_working_memory\n", + "๐Ÿ“Š Session ID: session_001\n", + "\n", + "Working memory will store conversation messages for this session.\n" + ] + } + ], + "source": [ + "import os\n", + "from agent_memory_client import MemoryClientConfig\n", + "\n", + "# Initialize memory client for working memory\n", + "student_id = \"demo_student_working_memory\"\n", + "session_id = \"session_001\"\n", + "config = MemoryClientConfig(\n", + " base_url=os.getenv(\"AGENT_MEMORY_URL\", \"http://localhost:8000\"),\n", + " default_namespace=\"redis_university\"\n", + ")\n", + "memory_client = MemoryClient(config=config)\n", + "\n", + "print(\"โœ… Memory client initialized successfully\")\n", + "print(f\"๐Ÿ“Š User ID: {student_id}\")\n", + "print(f\"๐Ÿ“Š Session ID: {session_id}\")\n", + "print(\"\\nWorking memory will store conversation messages for this session.\")" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": { + "ExecuteTime": { + "end_time": "2025-10-02T22:01:47.863402Z", + "start_time": "2025-10-02T22:01:47.590762Z" + } + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "๐Ÿ’ฌ Simulating Conversation with Working Memory\n", + "==================================================\n" + ] + }, + { + "ename": "ConnectError", + "evalue": "All connection attempts failed", + "output_type": "error", + "traceback": [ + "\u001b[31m---------------------------------------------------------------------------\u001b[39m", + "\u001b[31mConnectError\u001b[39m Traceback (most recent call last)", + "\u001b[36mFile \u001b[39m\u001b[32m~/workspace/redis-ai-resources/python-recipes/context-engineering/venv/lib/python3.12/site-packages/httpx/_transports/default.py:101\u001b[39m, in \u001b[36mmap_httpcore_exceptions\u001b[39m\u001b[34m()\u001b[39m\n\u001b[32m 100\u001b[39m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[32m--> \u001b[39m\u001b[32m101\u001b[39m \u001b[38;5;28;01myield\u001b[39;00m\n\u001b[32m 102\u001b[39m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mException\u001b[39;00m \u001b[38;5;28;01mas\u001b[39;00m exc:\n", + "\u001b[36mFile \u001b[39m\u001b[32m~/workspace/redis-ai-resources/python-recipes/context-engineering/venv/lib/python3.12/site-packages/httpx/_transports/default.py:394\u001b[39m, in \u001b[36mAsyncHTTPTransport.handle_async_request\u001b[39m\u001b[34m(self, request)\u001b[39m\n\u001b[32m 393\u001b[39m \u001b[38;5;28;01mwith\u001b[39;00m map_httpcore_exceptions():\n\u001b[32m--> \u001b[39m\u001b[32m394\u001b[39m resp = \u001b[38;5;28;01mawait\u001b[39;00m \u001b[38;5;28mself\u001b[39m._pool.handle_async_request(req)\n\u001b[32m 396\u001b[39m \u001b[38;5;28;01massert\u001b[39;00m \u001b[38;5;28misinstance\u001b[39m(resp.stream, typing.AsyncIterable)\n", + "\u001b[36mFile \u001b[39m\u001b[32m~/workspace/redis-ai-resources/python-recipes/context-engineering/venv/lib/python3.12/site-packages/httpcore/_async/connection_pool.py:256\u001b[39m, in \u001b[36mAsyncConnectionPool.handle_async_request\u001b[39m\u001b[34m(self, request)\u001b[39m\n\u001b[32m 255\u001b[39m \u001b[38;5;28;01mawait\u001b[39;00m \u001b[38;5;28mself\u001b[39m._close_connections(closing)\n\u001b[32m--> \u001b[39m\u001b[32m256\u001b[39m \u001b[38;5;28;01mraise\u001b[39;00m exc \u001b[38;5;28;01mfrom\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;28;01mNone\u001b[39;00m\n\u001b[32m 258\u001b[39m \u001b[38;5;66;03m# Return the response. Note that in this case we still have to manage\u001b[39;00m\n\u001b[32m 259\u001b[39m \u001b[38;5;66;03m# the point at which the response is closed.\u001b[39;00m\n", + "\u001b[36mFile \u001b[39m\u001b[32m~/workspace/redis-ai-resources/python-recipes/context-engineering/venv/lib/python3.12/site-packages/httpcore/_async/connection_pool.py:236\u001b[39m, in \u001b[36mAsyncConnectionPool.handle_async_request\u001b[39m\u001b[34m(self, request)\u001b[39m\n\u001b[32m 234\u001b[39m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[32m 235\u001b[39m \u001b[38;5;66;03m# Send the request on the assigned connection.\u001b[39;00m\n\u001b[32m--> \u001b[39m\u001b[32m236\u001b[39m response = \u001b[38;5;28;01mawait\u001b[39;00m connection.handle_async_request(\n\u001b[32m 237\u001b[39m pool_request.request\n\u001b[32m 238\u001b[39m )\n\u001b[32m 239\u001b[39m \u001b[38;5;28;01mexcept\u001b[39;00m ConnectionNotAvailable:\n\u001b[32m 240\u001b[39m \u001b[38;5;66;03m# In some cases a connection may initially be available to\u001b[39;00m\n\u001b[32m 241\u001b[39m \u001b[38;5;66;03m# handle a request, but then become unavailable.\u001b[39;00m\n\u001b[32m 242\u001b[39m \u001b[38;5;66;03m#\u001b[39;00m\n\u001b[32m 243\u001b[39m \u001b[38;5;66;03m# In this case we clear the connection and try again.\u001b[39;00m\n", + "\u001b[36mFile \u001b[39m\u001b[32m~/workspace/redis-ai-resources/python-recipes/context-engineering/venv/lib/python3.12/site-packages/httpcore/_async/connection.py:101\u001b[39m, in \u001b[36mAsyncHTTPConnection.handle_async_request\u001b[39m\u001b[34m(self, request)\u001b[39m\n\u001b[32m 100\u001b[39m \u001b[38;5;28mself\u001b[39m._connect_failed = \u001b[38;5;28;01mTrue\u001b[39;00m\n\u001b[32m--> \u001b[39m\u001b[32m101\u001b[39m \u001b[38;5;28;01mraise\u001b[39;00m exc\n\u001b[32m 103\u001b[39m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;01mawait\u001b[39;00m \u001b[38;5;28mself\u001b[39m._connection.handle_async_request(request)\n", + "\u001b[36mFile \u001b[39m\u001b[32m~/workspace/redis-ai-resources/python-recipes/context-engineering/venv/lib/python3.12/site-packages/httpcore/_async/connection.py:78\u001b[39m, in \u001b[36mAsyncHTTPConnection.handle_async_request\u001b[39m\u001b[34m(self, request)\u001b[39m\n\u001b[32m 77\u001b[39m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mself\u001b[39m._connection \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[32m---> \u001b[39m\u001b[32m78\u001b[39m stream = \u001b[38;5;28;01mawait\u001b[39;00m \u001b[38;5;28mself\u001b[39m._connect(request)\n\u001b[32m 80\u001b[39m ssl_object = stream.get_extra_info(\u001b[33m\"\u001b[39m\u001b[33mssl_object\u001b[39m\u001b[33m\"\u001b[39m)\n", + "\u001b[36mFile \u001b[39m\u001b[32m~/workspace/redis-ai-resources/python-recipes/context-engineering/venv/lib/python3.12/site-packages/httpcore/_async/connection.py:124\u001b[39m, in \u001b[36mAsyncHTTPConnection._connect\u001b[39m\u001b[34m(self, request)\u001b[39m\n\u001b[32m 123\u001b[39m \u001b[38;5;28;01masync\u001b[39;00m \u001b[38;5;28;01mwith\u001b[39;00m Trace(\u001b[33m\"\u001b[39m\u001b[33mconnect_tcp\u001b[39m\u001b[33m\"\u001b[39m, logger, request, kwargs) \u001b[38;5;28;01mas\u001b[39;00m trace:\n\u001b[32m--> \u001b[39m\u001b[32m124\u001b[39m stream = \u001b[38;5;28;01mawait\u001b[39;00m \u001b[38;5;28mself\u001b[39m._network_backend.connect_tcp(**kwargs)\n\u001b[32m 125\u001b[39m trace.return_value = stream\n", + "\u001b[36mFile \u001b[39m\u001b[32m~/workspace/redis-ai-resources/python-recipes/context-engineering/venv/lib/python3.12/site-packages/httpcore/_backends/auto.py:31\u001b[39m, in \u001b[36mAutoBackend.connect_tcp\u001b[39m\u001b[34m(self, host, port, timeout, local_address, socket_options)\u001b[39m\n\u001b[32m 30\u001b[39m \u001b[38;5;28;01mawait\u001b[39;00m \u001b[38;5;28mself\u001b[39m._init_backend()\n\u001b[32m---> \u001b[39m\u001b[32m31\u001b[39m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;01mawait\u001b[39;00m \u001b[38;5;28mself\u001b[39m._backend.connect_tcp(\n\u001b[32m 32\u001b[39m host,\n\u001b[32m 33\u001b[39m port,\n\u001b[32m 34\u001b[39m timeout=timeout,\n\u001b[32m 35\u001b[39m local_address=local_address,\n\u001b[32m 36\u001b[39m socket_options=socket_options,\n\u001b[32m 37\u001b[39m )\n", + "\u001b[36mFile \u001b[39m\u001b[32m~/workspace/redis-ai-resources/python-recipes/context-engineering/venv/lib/python3.12/site-packages/httpcore/_backends/anyio.py:113\u001b[39m, in \u001b[36mAnyIOBackend.connect_tcp\u001b[39m\u001b[34m(self, host, port, timeout, local_address, socket_options)\u001b[39m\n\u001b[32m 108\u001b[39m exc_map = {\n\u001b[32m 109\u001b[39m \u001b[38;5;167;01mTimeoutError\u001b[39;00m: ConnectTimeout,\n\u001b[32m 110\u001b[39m \u001b[38;5;167;01mOSError\u001b[39;00m: ConnectError,\n\u001b[32m 111\u001b[39m anyio.BrokenResourceError: ConnectError,\n\u001b[32m 112\u001b[39m }\n\u001b[32m--> \u001b[39m\u001b[32m113\u001b[39m \u001b[38;5;28;01mwith\u001b[39;00m map_exceptions(exc_map):\n\u001b[32m 114\u001b[39m \u001b[38;5;28;01mwith\u001b[39;00m anyio.fail_after(timeout):\n", + "\u001b[36mFile \u001b[39m\u001b[32m~/.pyenv/versions/3.12.6/lib/python3.12/contextlib.py:158\u001b[39m, in \u001b[36m_GeneratorContextManager.__exit__\u001b[39m\u001b[34m(self, typ, value, traceback)\u001b[39m\n\u001b[32m 157\u001b[39m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[32m--> \u001b[39m\u001b[32m158\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[43m.\u001b[49m\u001b[43mgen\u001b[49m\u001b[43m.\u001b[49m\u001b[43mthrow\u001b[49m\u001b[43m(\u001b[49m\u001b[43mvalue\u001b[49m\u001b[43m)\u001b[49m\n\u001b[32m 159\u001b[39m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mStopIteration\u001b[39;00m \u001b[38;5;28;01mas\u001b[39;00m exc:\n\u001b[32m 160\u001b[39m \u001b[38;5;66;03m# Suppress StopIteration *unless* it's the same exception that\u001b[39;00m\n\u001b[32m 161\u001b[39m \u001b[38;5;66;03m# was passed to throw(). This prevents a StopIteration\u001b[39;00m\n\u001b[32m 162\u001b[39m \u001b[38;5;66;03m# raised inside the \"with\" statement from being suppressed.\u001b[39;00m\n", + "\u001b[36mFile \u001b[39m\u001b[32m~/workspace/redis-ai-resources/python-recipes/context-engineering/venv/lib/python3.12/site-packages/httpcore/_exceptions.py:14\u001b[39m, in \u001b[36mmap_exceptions\u001b[39m\u001b[34m(map)\u001b[39m\n\u001b[32m 13\u001b[39m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28misinstance\u001b[39m(exc, from_exc):\n\u001b[32m---> \u001b[39m\u001b[32m14\u001b[39m \u001b[38;5;28;01mraise\u001b[39;00m to_exc(exc) \u001b[38;5;28;01mfrom\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[34;01mexc\u001b[39;00m\n\u001b[32m 15\u001b[39m \u001b[38;5;28;01mraise\u001b[39;00m\n", + "\u001b[31mConnectError\u001b[39m: All connection attempts failed", + "\nThe above exception was the direct cause of the following exception:\n", + "\u001b[31mConnectError\u001b[39m Traceback (most recent call last)", + "\u001b[36mCell\u001b[39m\u001b[36m \u001b[39m\u001b[32mIn[5]\u001b[39m\u001b[32m, line 30\u001b[39m\n\u001b[32m 21\u001b[39m \u001b[38;5;66;03m# Create WorkingMemory object\u001b[39;00m\n\u001b[32m 22\u001b[39m working_memory = WorkingMemory(\n\u001b[32m 23\u001b[39m session_id=session_id,\n\u001b[32m 24\u001b[39m user_id=student_id,\n\u001b[32m (...)\u001b[39m\u001b[32m 27\u001b[39m data={}\n\u001b[32m 28\u001b[39m )\n\u001b[32m---> \u001b[39m\u001b[32m30\u001b[39m \u001b[38;5;28;01mawait\u001b[39;00m memory_client.put_working_memory(\n\u001b[32m 31\u001b[39m session_id=session_id,\n\u001b[32m 32\u001b[39m memory=working_memory,\n\u001b[32m 33\u001b[39m user_id=student_id,\n\u001b[32m 34\u001b[39m model_name=\u001b[33m\"\u001b[39m\u001b[33mgpt-4o\u001b[39m\u001b[33m\"\u001b[39m\n\u001b[32m 35\u001b[39m )\n\u001b[32m 37\u001b[39m \u001b[38;5;28mprint\u001b[39m(\u001b[33m\"\u001b[39m\u001b[33mโœ… Conversation saved to working memory\u001b[39m\u001b[33m\"\u001b[39m)\n\u001b[32m 38\u001b[39m \u001b[38;5;28mprint\u001b[39m(\u001b[33mf\u001b[39m\u001b[33m\"\u001b[39m\u001b[33m๐Ÿ“Š Messages: \u001b[39m\u001b[38;5;132;01m{\u001b[39;00m\u001b[38;5;28mlen\u001b[39m(messages)\u001b[38;5;132;01m}\u001b[39;00m\u001b[33m\"\u001b[39m)\n", + "\u001b[36mFile \u001b[39m\u001b[32m~/workspace/redis-ai-resources/python-recipes/context-engineering/venv/lib/python3.12/site-packages/agent_memory_client/client.py:468\u001b[39m, in \u001b[36mMemoryAPIClient.put_working_memory\u001b[39m\u001b[34m(self, session_id, memory, user_id, model_name, context_window_max)\u001b[39m\n\u001b[32m 465\u001b[39m params[\u001b[33m\"\u001b[39m\u001b[33mcontext_window_max\u001b[39m\u001b[33m\"\u001b[39m] = \u001b[38;5;28mstr\u001b[39m(effective_context_window_max)\n\u001b[32m 467\u001b[39m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[32m--> \u001b[39m\u001b[32m468\u001b[39m response = \u001b[38;5;28;01mawait\u001b[39;00m \u001b[38;5;28mself\u001b[39m._client.put(\n\u001b[32m 469\u001b[39m \u001b[33mf\u001b[39m\u001b[33m\"\u001b[39m\u001b[33m/v1/working-memory/\u001b[39m\u001b[38;5;132;01m{\u001b[39;00msession_id\u001b[38;5;132;01m}\u001b[39;00m\u001b[33m\"\u001b[39m,\n\u001b[32m 470\u001b[39m json=memory.model_dump(exclude_none=\u001b[38;5;28;01mTrue\u001b[39;00m, mode=\u001b[33m\"\u001b[39m\u001b[33mjson\u001b[39m\u001b[33m\"\u001b[39m),\n\u001b[32m 471\u001b[39m params=params,\n\u001b[32m 472\u001b[39m )\n\u001b[32m 473\u001b[39m response.raise_for_status()\n\u001b[32m 474\u001b[39m \u001b[38;5;28;01mreturn\u001b[39;00m WorkingMemoryResponse(**response.json())\n", + "\u001b[36mFile \u001b[39m\u001b[32m~/workspace/redis-ai-resources/python-recipes/context-engineering/venv/lib/python3.12/site-packages/httpx/_client.py:1896\u001b[39m, in \u001b[36mAsyncClient.put\u001b[39m\u001b[34m(self, url, content, data, files, json, params, headers, cookies, auth, follow_redirects, timeout, extensions)\u001b[39m\n\u001b[32m 1875\u001b[39m \u001b[38;5;28;01masync\u001b[39;00m \u001b[38;5;28;01mdef\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[34mput\u001b[39m(\n\u001b[32m 1876\u001b[39m \u001b[38;5;28mself\u001b[39m,\n\u001b[32m 1877\u001b[39m url: URL | \u001b[38;5;28mstr\u001b[39m,\n\u001b[32m (...)\u001b[39m\u001b[32m 1889\u001b[39m extensions: RequestExtensions | \u001b[38;5;28;01mNone\u001b[39;00m = \u001b[38;5;28;01mNone\u001b[39;00m,\n\u001b[32m 1890\u001b[39m ) -> Response:\n\u001b[32m 1891\u001b[39m \u001b[38;5;250m \u001b[39m\u001b[33;03m\"\"\"\u001b[39;00m\n\u001b[32m 1892\u001b[39m \u001b[33;03m Send a `PUT` request.\u001b[39;00m\n\u001b[32m 1893\u001b[39m \n\u001b[32m 1894\u001b[39m \u001b[33;03m **Parameters**: See `httpx.request`.\u001b[39;00m\n\u001b[32m 1895\u001b[39m \u001b[33;03m \"\"\"\u001b[39;00m\n\u001b[32m-> \u001b[39m\u001b[32m1896\u001b[39m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;01mawait\u001b[39;00m \u001b[38;5;28mself\u001b[39m.request(\n\u001b[32m 1897\u001b[39m \u001b[33m\"\u001b[39m\u001b[33mPUT\u001b[39m\u001b[33m\"\u001b[39m,\n\u001b[32m 1898\u001b[39m url,\n\u001b[32m 1899\u001b[39m content=content,\n\u001b[32m 1900\u001b[39m data=data,\n\u001b[32m 1901\u001b[39m files=files,\n\u001b[32m 1902\u001b[39m json=json,\n\u001b[32m 1903\u001b[39m params=params,\n\u001b[32m 1904\u001b[39m headers=headers,\n\u001b[32m 1905\u001b[39m cookies=cookies,\n\u001b[32m 1906\u001b[39m auth=auth,\n\u001b[32m 1907\u001b[39m follow_redirects=follow_redirects,\n\u001b[32m 1908\u001b[39m timeout=timeout,\n\u001b[32m 1909\u001b[39m extensions=extensions,\n\u001b[32m 1910\u001b[39m )\n", + "\u001b[36mFile \u001b[39m\u001b[32m~/workspace/redis-ai-resources/python-recipes/context-engineering/venv/lib/python3.12/site-packages/httpx/_client.py:1540\u001b[39m, in \u001b[36mAsyncClient.request\u001b[39m\u001b[34m(self, method, url, content, data, files, json, params, headers, cookies, auth, follow_redirects, timeout, extensions)\u001b[39m\n\u001b[32m 1525\u001b[39m warnings.warn(message, \u001b[38;5;167;01mDeprecationWarning\u001b[39;00m, stacklevel=\u001b[32m2\u001b[39m)\n\u001b[32m 1527\u001b[39m request = \u001b[38;5;28mself\u001b[39m.build_request(\n\u001b[32m 1528\u001b[39m method=method,\n\u001b[32m 1529\u001b[39m url=url,\n\u001b[32m (...)\u001b[39m\u001b[32m 1538\u001b[39m extensions=extensions,\n\u001b[32m 1539\u001b[39m )\n\u001b[32m-> \u001b[39m\u001b[32m1540\u001b[39m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;01mawait\u001b[39;00m \u001b[38;5;28mself\u001b[39m.send(request, auth=auth, follow_redirects=follow_redirects)\n", + "\u001b[36mFile \u001b[39m\u001b[32m~/workspace/redis-ai-resources/python-recipes/context-engineering/venv/lib/python3.12/site-packages/httpx/_client.py:1629\u001b[39m, in \u001b[36mAsyncClient.send\u001b[39m\u001b[34m(self, request, stream, auth, follow_redirects)\u001b[39m\n\u001b[32m 1625\u001b[39m \u001b[38;5;28mself\u001b[39m._set_timeout(request)\n\u001b[32m 1627\u001b[39m auth = \u001b[38;5;28mself\u001b[39m._build_request_auth(request, auth)\n\u001b[32m-> \u001b[39m\u001b[32m1629\u001b[39m response = \u001b[38;5;28;01mawait\u001b[39;00m \u001b[38;5;28mself\u001b[39m._send_handling_auth(\n\u001b[32m 1630\u001b[39m request,\n\u001b[32m 1631\u001b[39m auth=auth,\n\u001b[32m 1632\u001b[39m follow_redirects=follow_redirects,\n\u001b[32m 1633\u001b[39m history=[],\n\u001b[32m 1634\u001b[39m )\n\u001b[32m 1635\u001b[39m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[32m 1636\u001b[39m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m stream:\n", + "\u001b[36mFile \u001b[39m\u001b[32m~/workspace/redis-ai-resources/python-recipes/context-engineering/venv/lib/python3.12/site-packages/httpx/_client.py:1657\u001b[39m, in \u001b[36mAsyncClient._send_handling_auth\u001b[39m\u001b[34m(self, request, auth, follow_redirects, history)\u001b[39m\n\u001b[32m 1654\u001b[39m request = \u001b[38;5;28;01mawait\u001b[39;00m auth_flow.\u001b[34m__anext__\u001b[39m()\n\u001b[32m 1656\u001b[39m \u001b[38;5;28;01mwhile\u001b[39;00m \u001b[38;5;28;01mTrue\u001b[39;00m:\n\u001b[32m-> \u001b[39m\u001b[32m1657\u001b[39m response = \u001b[38;5;28;01mawait\u001b[39;00m \u001b[38;5;28mself\u001b[39m._send_handling_redirects(\n\u001b[32m 1658\u001b[39m request,\n\u001b[32m 1659\u001b[39m follow_redirects=follow_redirects,\n\u001b[32m 1660\u001b[39m history=history,\n\u001b[32m 1661\u001b[39m )\n\u001b[32m 1662\u001b[39m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[32m 1663\u001b[39m \u001b[38;5;28;01mtry\u001b[39;00m:\n", + "\u001b[36mFile \u001b[39m\u001b[32m~/workspace/redis-ai-resources/python-recipes/context-engineering/venv/lib/python3.12/site-packages/httpx/_client.py:1694\u001b[39m, in \u001b[36mAsyncClient._send_handling_redirects\u001b[39m\u001b[34m(self, request, follow_redirects, history)\u001b[39m\n\u001b[32m 1691\u001b[39m \u001b[38;5;28;01mfor\u001b[39;00m hook \u001b[38;5;129;01min\u001b[39;00m \u001b[38;5;28mself\u001b[39m._event_hooks[\u001b[33m\"\u001b[39m\u001b[33mrequest\u001b[39m\u001b[33m\"\u001b[39m]:\n\u001b[32m 1692\u001b[39m \u001b[38;5;28;01mawait\u001b[39;00m hook(request)\n\u001b[32m-> \u001b[39m\u001b[32m1694\u001b[39m response = \u001b[38;5;28;01mawait\u001b[39;00m \u001b[38;5;28mself\u001b[39m._send_single_request(request)\n\u001b[32m 1695\u001b[39m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[32m 1696\u001b[39m \u001b[38;5;28;01mfor\u001b[39;00m hook \u001b[38;5;129;01min\u001b[39;00m \u001b[38;5;28mself\u001b[39m._event_hooks[\u001b[33m\"\u001b[39m\u001b[33mresponse\u001b[39m\u001b[33m\"\u001b[39m]:\n", + "\u001b[36mFile \u001b[39m\u001b[32m~/workspace/redis-ai-resources/python-recipes/context-engineering/venv/lib/python3.12/site-packages/httpx/_client.py:1730\u001b[39m, in \u001b[36mAsyncClient._send_single_request\u001b[39m\u001b[34m(self, request)\u001b[39m\n\u001b[32m 1725\u001b[39m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mRuntimeError\u001b[39;00m(\n\u001b[32m 1726\u001b[39m \u001b[33m\"\u001b[39m\u001b[33mAttempted to send an sync request with an AsyncClient instance.\u001b[39m\u001b[33m\"\u001b[39m\n\u001b[32m 1727\u001b[39m )\n\u001b[32m 1729\u001b[39m \u001b[38;5;28;01mwith\u001b[39;00m request_context(request=request):\n\u001b[32m-> \u001b[39m\u001b[32m1730\u001b[39m response = \u001b[38;5;28;01mawait\u001b[39;00m transport.handle_async_request(request)\n\u001b[32m 1732\u001b[39m \u001b[38;5;28;01massert\u001b[39;00m \u001b[38;5;28misinstance\u001b[39m(response.stream, AsyncByteStream)\n\u001b[32m 1733\u001b[39m response.request = request\n", + "\u001b[36mFile \u001b[39m\u001b[32m~/workspace/redis-ai-resources/python-recipes/context-engineering/venv/lib/python3.12/site-packages/httpx/_transports/default.py:393\u001b[39m, in \u001b[36mAsyncHTTPTransport.handle_async_request\u001b[39m\u001b[34m(self, request)\u001b[39m\n\u001b[32m 379\u001b[39m \u001b[38;5;28;01mimport\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[34;01mhttpcore\u001b[39;00m\n\u001b[32m 381\u001b[39m req = httpcore.Request(\n\u001b[32m 382\u001b[39m method=request.method,\n\u001b[32m 383\u001b[39m url=httpcore.URL(\n\u001b[32m (...)\u001b[39m\u001b[32m 391\u001b[39m extensions=request.extensions,\n\u001b[32m 392\u001b[39m )\n\u001b[32m--> \u001b[39m\u001b[32m393\u001b[39m \u001b[38;5;28;01mwith\u001b[39;00m map_httpcore_exceptions():\n\u001b[32m 394\u001b[39m resp = \u001b[38;5;28;01mawait\u001b[39;00m \u001b[38;5;28mself\u001b[39m._pool.handle_async_request(req)\n\u001b[32m 396\u001b[39m \u001b[38;5;28;01massert\u001b[39;00m \u001b[38;5;28misinstance\u001b[39m(resp.stream, typing.AsyncIterable)\n", + "\u001b[36mFile \u001b[39m\u001b[32m~/.pyenv/versions/3.12.6/lib/python3.12/contextlib.py:158\u001b[39m, in \u001b[36m_GeneratorContextManager.__exit__\u001b[39m\u001b[34m(self, typ, value, traceback)\u001b[39m\n\u001b[32m 156\u001b[39m value = typ()\n\u001b[32m 157\u001b[39m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[32m--> \u001b[39m\u001b[32m158\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[43m.\u001b[49m\u001b[43mgen\u001b[49m\u001b[43m.\u001b[49m\u001b[43mthrow\u001b[49m\u001b[43m(\u001b[49m\u001b[43mvalue\u001b[49m\u001b[43m)\u001b[49m\n\u001b[32m 159\u001b[39m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mStopIteration\u001b[39;00m \u001b[38;5;28;01mas\u001b[39;00m exc:\n\u001b[32m 160\u001b[39m \u001b[38;5;66;03m# Suppress StopIteration *unless* it's the same exception that\u001b[39;00m\n\u001b[32m 161\u001b[39m \u001b[38;5;66;03m# was passed to throw(). This prevents a StopIteration\u001b[39;00m\n\u001b[32m 162\u001b[39m \u001b[38;5;66;03m# raised inside the \"with\" statement from being suppressed.\u001b[39;00m\n\u001b[32m 163\u001b[39m \u001b[38;5;28;01mreturn\u001b[39;00m exc \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m value\n", + "\u001b[36mFile \u001b[39m\u001b[32m~/workspace/redis-ai-resources/python-recipes/context-engineering/venv/lib/python3.12/site-packages/httpx/_transports/default.py:118\u001b[39m, in \u001b[36mmap_httpcore_exceptions\u001b[39m\u001b[34m()\u001b[39m\n\u001b[32m 115\u001b[39m \u001b[38;5;28;01mraise\u001b[39;00m\n\u001b[32m 117\u001b[39m message = \u001b[38;5;28mstr\u001b[39m(exc)\n\u001b[32m--> \u001b[39m\u001b[32m118\u001b[39m \u001b[38;5;28;01mraise\u001b[39;00m mapped_exc(message) \u001b[38;5;28;01mfrom\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[34;01mexc\u001b[39;00m\n", + "\u001b[31mConnectError\u001b[39m: All connection attempts failed" + ] + } + ], + "source": [ + "# Simulate a conversation using working memory\n", + "\n", + "print(\"๐Ÿ’ฌ Simulating Conversation with Working Memory\")\n", + "print(\"=\" * 50)\n", + "\n", + "# Create messages for the conversation\n", + "messages = [\n", + " {\"role\": \"user\", \"content\": \"I prefer online courses because I work part-time\"},\n", + " {\"role\": \"assistant\", \"content\": \"I understand you prefer online courses due to your work schedule.\"},\n", + " {\"role\": \"user\", \"content\": \"My goal is to specialize in machine learning\"},\n", + " {\"role\": \"assistant\", \"content\": \"Machine learning is an excellent specialization!\"},\n", + " {\"role\": \"user\", \"content\": \"What courses do you recommend?\"},\n", + "]\n", + "\n", + "# Save to working memory\n", + "from agent_memory_client.models import WorkingMemory, MemoryMessage\n", + "\n", + "# Convert messages to MemoryMessage format\n", + "memory_messages = [MemoryMessage(**msg) for msg in messages]\n", + "\n", + "# Create WorkingMemory object\n", + "working_memory = WorkingMemory(\n", + " session_id=session_id,\n", + " user_id=student_id,\n", + " messages=memory_messages,\n", + " memories=[],\n", + " data={}\n", + ")\n", + "\n", + "await memory_client.put_working_memory(\n", + " session_id=session_id,\n", + " memory=working_memory,\n", + " user_id=student_id,\n", + " model_name=\"gpt-4o\"\n", + ")\n", + "\n", + "print(\"โœ… Conversation saved to working memory\")\n", + "print(f\"๐Ÿ“Š Messages: {len(messages)}\")\n", + "print(\"\\nThese messages are now available as context for the LLM.\")\n", + "print(\"The LLM can reference earlier parts of the conversation.\")\n", + "\n", + "# Retrieve working memory\n", + "_, working_memory = await memory_client.get_or_create_working_memory(\n", + " session_id=session_id,\n", + " model_name=\"gpt-4o\",\n", + " user_id=student_id,\n", + ")\n", + "\n", + "if working_memory:\n", + " print(f\"\\n๐Ÿ“‹ Retrieved {len(working_memory.messages)} messages from working memory\")\n", + " print(\"This is the conversation context that would be provided to the LLM.\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 3. Automatic Extraction to Long-Term Memory\n", + "\n", + "Because working memory stores messages, we can extract important long-term information from it. When using the Agent Memory Server, this extraction happens automatically in the background.\n", + "\n", + "The extraction strategy controls what kind of information gets extracted:\n", + "- User preferences (e.g., \"I prefer online courses\")\n", + "- Goals (e.g., \"I want to specialize in machine learning\")\n", + "- Important facts (e.g., \"I work part-time\")\n", + "- Key decisions or outcomes from the conversation\n", + "\n", + "This extracted information becomes long-term memory that persists across sessions.\n", + "\n", + "Let's check what information was automatically extracted from our working memory:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Check what was extracted to long-term memory\n", + "import asyncio\n", + "from agent_memory_client import MemoryAPIClient as MemoryClient, MemoryClientConfig\n", + "\n", + "# Ensure memory_client is defined (in case cells are run out of order)\n", + "if 'memory_client' not in globals():\n", + " # Initialize memory client with proper config\n", + " import os\n", + " config = MemoryClientConfig(\n", + " base_url=os.getenv(\"AGENT_MEMORY_URL\", \"http://localhost:8000\"),\n", + " default_namespace=\"redis_university\"\n", + " )\n", + " memory_client = MemoryClient(config=config)\n", + "\n", + "await asyncio.sleep(2) # Give the extraction process time to complete\n", + "\n", + "# Search for extracted memories\n", + "extracted_memories = await memory_client.search_long_term_memory(\n", + " text=\"preferences goals\",\n", + " limit=10\n", + ")\n", + "\n", + "print(\"๐Ÿง  Extracted to Long-term Memory\")\n", + "print(\"=\" * 50)\n", + "\n", + "if extracted_memories.memories:\n", + " for i, memory in enumerate(extracted_memories.memories, 1):\n", + " print(f\"{i}. {memory.text}\")\n", + " print(f\" Type: {memory.memory_type} | Topics: {', '.join(memory.topics)}\")\n", + " print()\n", + "else:\n", + " print(\"No memories extracted yet (extraction may take a moment)\")\n", + " print(\"\\nThe Agent Memory Server automatically extracts:\")\n", + " print(\"- User preferences (e.g., 'prefers online courses')\")\n", + " print(\"- Goals (e.g., 'wants to specialize in machine learning')\")\n", + " print(\"- Important facts (e.g., 'works part-time')\")\n", + " print(\"\\nThis happens in the background based on the configured extraction strategy.\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 4. Summary\n", + "\n", + "In this notebook, you learned:\n", + "\n", + "- โœ… **The Core Problem**: LLMs are stateless and need working memory to maintain conversation context\n", + "- โœ… **Working Memory Solution**: Stores messages and task-specific context for the current session\n", + "- โœ… **Message Storage**: Conversation history gives the LLM knowledge of what was said earlier\n", + "- โœ… **Automatic Extraction**: Important information is extracted to long-term memory in the background\n", + "- โœ… **Extraction Strategy**: Controls what kind of information gets extracted from working memory\n", + "\n", + "**Key API Methods:**\n", + "```python\n", + "# Save working memory (stores messages for this session)\n", + "await memory_client.put_working_memory(session_id, memory, user_id, model_name)\n", + "\n", + "# Retrieve working memory (gets conversation context)\n", + "_, working_memory = await memory_client.get_or_create_working_memory(\n", + " session_id, model_name, user_id\n", + ")\n", + "\n", + "# Search long-term memories (extracted from working memory)\n", + "memories = await memory_client.search_long_term_memory(text, limit)\n", + "```\n", + "\n", + "**The Key Insight:**\n", + "Working memory solves the fundamental problem of giving LLMs knowledge of the current conversation. Because it stores messages, we can also extract long-term data from it. The extraction strategy controls what gets extracted, and this happens automatically in the background when using the Agent Memory Server.\n", + "\n", + "## Next Steps\n", + "\n", + "See the next notebooks to learn about:\n", + "- Long-term memory and how it persists across sessions\n", + "- Memory tools that give LLMs explicit control over what gets remembered\n", + "- Integrating working and long-term memory in your applications" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.12.6" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} diff --git a/python-recipes/context-engineering/notebooks_archive/section-3-memory/02_defining_tools.ipynb b/python-recipes/context-engineering/notebooks_archive/section-3-memory/02_defining_tools.ipynb new file mode 100644 index 00000000..2b62f849 --- /dev/null +++ b/python-recipes/context-engineering/notebooks_archive/section-3-memory/02_defining_tools.ipynb @@ -0,0 +1,1516 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "![Redis](https://redis.io/wp-content/uploads/2024/04/Logotype.svg?auto=webp&quality=85,75&width=120)\n", + "\n", + "# Tool Definition: Building Agent Capabilities\n", + "\n", + "## Learning Objectives (25 minutes)\n", + "By the end of this notebook, you will be able to:\n", + "1. **Create** simple tools using LangChain's @tool decorator\n", + "2. **Test** how LLMs select and use tools\n", + "3. **Write** effective tool descriptions that guide LLM behavior\n", + "4. **Build** a tool-enabled agent for Redis University\n", + "5. **Apply** best practices for tool design\n", + "\n", + "## Prerequisites\n", + "- Completed `01_system_instructions.ipynb`\n", + "- OpenAI API key configured (for LangChain ChatOpenAI)\n", + "- Redis Stack running with course data\n", + "\n", + "---\n", + "\n", + "## Introduction\n", + "\n", + "**Tools** give your agent superpowers! Instead of just generating text, your agent can:\n", + "- ๐Ÿ” Search real course catalogs\n", + "- โœ… Check prerequisites\n", + "- ๐Ÿ“Š Get detailed course information\n", + "- ๐ŸŽฏ Make data-driven recommendations\n", + "\n", + "**Example:** When a student asks \"What machine learning courses are available?\", an agent *without* tools can only respond based on training data (likely outdated). An agent *with* a `search_courses` tool can query your live course database and return current offerings.\n", + "\n", + "Let's build tools step by step, starting simple and adding complexity gradually.\n", + "\n", + "---\n", + "\n", + "## Concepts: How Tools Work\n", + "\n", + "### What Are Tools?\n", + "\n", + "Tools are **functions that the LLM can call** to perform actions or retrieve information. They extend the agent's capabilities beyond text generation.\n", + "\n", + "**Without tools:**\n", + "- Agent can only generate text based on its training data\n", + "- No access to real-time data\n", + "- Can't take actions\n", + "- Limited to what's in the prompt\n", + "\n", + "**With tools:**\n", + "- Agent can search databases\n", + "- Agent can retrieve current information\n", + "- Agent can perform calculations\n", + "- Agent can take actions (send emails, create records, etc.)\n", + "\n", + "### How Tool Calling Works\n", + "\n", + "1. **LLM receives** user query + system instructions + available tools\n", + "2. **LLM decides** which tool(s) to call (if any)\n", + "3. **LLM generates** tool call with parameters\n", + "4. **Your code executes** the tool function (not the LLM!)\n", + "5. **Tool returns** results\n", + "6. **LLM receives** results and generates response\n", + "\n", + "### Tool Schema Components\n", + "\n", + "Every tool needs:\n", + "1. **Name** - Unique identifier\n", + "2. **Description** - What the tool does (critical for selection!)\n", + "3. **Parameters** - Input schema with types and descriptions\n", + "4. **Function** - The actual implementation\n", + "\n", + "**In code, this looks like:**\n", + "```python\n", + "@tool(args_schema=SearchCoursesInput) # Parameters defined here\n", + "async def search_courses(query: str) -> str:\n", + " \"\"\"\n", + " Description goes here - the LLM reads this!\n", + " \"\"\"\n", + " # Implementation (LLM never sees this)\n", + "```\n", + "\n", + "### How LLMs Select Tools\n", + "\n", + "The LLM uses:\n", + "- Tool **names** (should be descriptive)\n", + "- Tool **descriptions** (should explain when to use it)\n", + "- Parameter **descriptions** (should explain what each parameter does)\n", + "- **Context** from the conversation\n", + "\n", + "**Key insight:** The LLM only sees the tool schema, not the implementation!\n", + "\n", + "**You'll see this in action:** Below, we'll first create a tool with a vague description, then improve it and observe how the LLM's behavior changes.\n", + "\n", + "### Common Pitfalls (We'll Avoid)\n", + "\n", + "- โŒ **Vague descriptions** โ†’ LLM picks wrong tool\n", + "- โŒ **Too many similar tools** โ†’ LLM gets confused \n", + "- โŒ **Missing parameter descriptions** โ†’ LLM passes wrong data\n", + "\n", + "**Don't worry** - we'll show you exactly how to implement these best practices!\n", + "\n", + "### Simple Best Practices (Keep It Clear!)\n", + "\n", + "#### โŒ **Bad Tool Descriptions**\n", + "```python\n", + "# BAD: Vague and unhelpful\n", + "@tool\n", + "def search(query: str) -> str:\n", + " \"\"\"Search for stuff.\"\"\"\n", + " \n", + "# BAD: Missing context about when to use\n", + "@tool \n", + "def get_data(id: str) -> str:\n", + " \"\"\"Gets data from database.\"\"\"\n", + "```\n", + "\n", + "#### โœ… **Good Tool Descriptions**\n", + "```python\n", + "# GOOD: Clear purpose and usage context\n", + "@tool\n", + "def search_courses(query: str) -> str:\n", + " \"\"\"\n", + " Search for courses using semantic similarity.\n", + " \n", + " Use this when:\n", + " - Student asks about courses on a topic\n", + " - Student wants to explore subject areas\n", + " - Student asks \"What courses are available for...?\"\n", + " \"\"\"\n", + "```\n", + "\n", + "#### โŒ **Bad Parameter Descriptions**\n", + "```python\n", + "# BAD: Ambiguous parameter names and descriptions\n", + "def get_weather(location, unit):\n", + " # What format is location? What units are supported?\n", + "```\n", + "\n", + "#### โœ… **Good Parameter Descriptions**\n", + "```python\n", + "# GOOD: Clear parameter specifications\n", + "def get_weather(location: str, unit: str):\n", + " \"\"\"\n", + " Parameters:\n", + " - location: City name or \"latitude,longitude\" coordinates\n", + " - unit: Temperature unit (\"celsius\" or \"fahrenheit\")\n", + " \"\"\"\n", + "```\n", + "\n", + "#### โŒ **Bad Tool Naming**\n", + "- `tool1`, `helper`, `utils` โ†’ No indication of purpose\n", + "- `get_data`, `process` โ†’ Too generic\n", + "- `search_courses_and_maybe_filter_by_difficulty_and_format` โ†’ Too verbose\n", + "\n", + "#### โœ… **Good Tool Naming**\n", + "- `search_courses`, `get_course_details`, `check_prerequisites` โ†’ Clear and specific\n", + "- `calculate_shipping_cost`, `validate_email` โ†’ Action-oriented\n", + "- `format_student_transcript` โ†’ Descriptive of exact function\n", + "\n", + "#### โŒ **Bad Tool Scope**\n", + "```python\n", + "# BAD: Does too many things\n", + "@tool\n", + "def manage_student(action: str, student_id: str, data: dict):\n", + " \"\"\"Create, update, delete, or search students.\"\"\"\n", + " # LLM gets confused about which action to use\n", + "```\n", + "\n", + "#### โœ… **Good Tool Scope**\n", + "```python\n", + "# GOOD: Single, clear responsibility\n", + "@tool\n", + "def create_student_profile(name: str, email: str) -> str:\n", + " \"\"\"Create a new student profile with basic information.\"\"\"\n", + " \n", + "@tool\n", + "def update_student_email(student_id: str, new_email: str) -> str:\n", + " \"\"\"Update a student's email address.\"\"\"\n", + "```\n", + "\n", + "#### โŒ **Bad Error Handling**\n", + "```python\n", + "# BAD: Silent failures or cryptic errors\n", + "@tool\n", + "def get_course_details(course_id: str) -> str:\n", + " \"\"\"Get course details.\"\"\"\n", + " try:\n", + " return database.get(course_id)\n", + " except:\n", + " return None # LLM doesn't know what went wrong\n", + "```\n", + "\n", + "#### โœ… **Good Error Handling**\n", + "```python\n", + "# GOOD: Clear error messages for the LLM\n", + "@tool\n", + "def get_course_details(course_id: str) -> str:\n", + " \"\"\"Get detailed information about a specific course.\"\"\"\n", + " try:\n", + " course = database.get(course_id)\n", + " if not course:\n", + " return f\"Course {course_id} not found. Please check the course ID.\"\n", + " return format_course_details(course)\n", + " except Exception as e:\n", + " return f\"Error retrieving course details: {str(e)}\"\n", + "```\n", + "\n", + "#### โŒ **Bad Return Values**\n", + "```python\n", + "# BAD: Returns complex objects or unclear formats\n", + "@tool\n", + "def search_courses(query: str) -> dict:\n", + " \"\"\"Search courses.\"\"\"\n", + " return {\"results\": [...], \"meta\": {...}} # LLM can't parse this well\n", + "```\n", + "\n", + "#### โœ… **Good Return Values**\n", + "```python\n", + "# GOOD: Returns clear, formatted strings\n", + "@tool\n", + "def search_courses(query: str) -> str:\n", + " \"\"\"Search for courses matching the query.\"\"\"\n", + " results = perform_search(query)\n", + " if not results:\n", + " return \"No courses found matching your query.\"\n", + " \n", + " formatted = \"Found courses:\\n\"\n", + " for course in results:\n", + " formatted += f\"- {course.code}: {course.title}\\n\"\n", + " return formatted\n", + "```\n", + "\n", + "**Key Takeaway:** The LLM needs to understand both success and failure cases to provide helpful responses to users!\n", + "\n", + "## Environment Setup" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "๐Ÿ”ง Environment Setup\n", + "==============================\n", + "OpenAI API Key: โœ… Set\n", + "Redis URL: redis://localhost:6379\n" + ] + } + ], + "source": [ + "# Environment setup\n", + "import os\n", + "from typing import List, Optional\n", + "from dotenv import load_dotenv\n", + "\n", + "# Load environment variables\n", + "load_dotenv()\n", + "OPENAI_API_KEY = os.getenv(\"OPENAI_API_KEY\")\n", + "REDIS_URL = os.getenv(\"REDIS_URL\", \"redis://localhost:6379\")\n", + "\n", + "print(\"๐Ÿ”ง Environment Setup\")\n", + "print(\"=\" * 30)\n", + "print(f\"OpenAI API Key: {'โœ… Set' if OPENAI_API_KEY else 'โŒ Not set'}\")\n", + "print(f\"Redis URL: {REDIS_URL}\")" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "โœ… LangChain ChatOpenAI initialized\n", + "โœ… Redis connection healthy\n", + "16:38:37 redisvl.index.index INFO Index already exists, not overwriting.\n", + "โœ… Core modules imported successfully\n", + "๐Ÿ”— Using LangChain patterns consistent with our LangGraph agent\n" + ] + } + ], + "source": [ + "# Import required modules (consistent with LangGraph agent)\n", + "try:\n", + " # LangChain imports (same as our agent)\n", + " from langchain_openai import ChatOpenAI\n", + " from langchain_core.messages import SystemMessage, HumanMessage, AIMessage\n", + " from langchain_core.tools import tool\n", + " from pydantic import BaseModel, Field\n", + " \n", + " # Redis and course modules\n", + " import redis\n", + " from redis_context_course.models import Course, StudentProfile, CourseFormat\n", + " from redis_context_course.course_manager import CourseManager\n", + " from redis_context_course.redis_config import redis_config\n", + " \n", + " # Initialize LangChain LLM (same as our agent)\n", + " if OPENAI_API_KEY:\n", + " llm = ChatOpenAI(\n", + " model=\"gpt-4o-mini\",\n", + " temperature=0.7\n", + " )\n", + " print(\"โœ… LangChain ChatOpenAI initialized\")\n", + " else:\n", + " llm = None\n", + " print(\"โš ๏ธ LangChain LLM not available (API key not set)\")\n", + " \n", + " # Redis connection\n", + " redis_client = redis.from_url(REDIS_URL)\n", + " if redis_config.health_check():\n", + " print(\"โœ… Redis connection healthy\")\n", + " else:\n", + " print(\"โŒ Redis connection failed\")\n", + " \n", + " # Course manager\n", + " course_manager = CourseManager()\n", + " \n", + " print(\"โœ… Core modules imported successfully\")\n", + " print(\"๐Ÿ”— Using LangChain patterns consistent with our LangGraph agent\")\n", + " \n", + "except ImportError as e:\n", + " print(f\"โŒ Import failed: {e}\")\n", + " print(\"Please ensure you've completed the setup from Section 1.\")\n", + " print(\"Install missing packages: pip install langchain-openai langchain-core\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## ๐Ÿงช Hands-on: Building Your First Tool\n", + "\n", + "Let's start with the simplest possible tool and see how it works:" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Step 1: A Basic Tool\n", + "\n", + "Let's create a simple course search tool:" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "โœ… Basic tool created!\n", + "Tool name: search_courses_basic\n", + "Description: Search for courses by title or description.\n" + ] + } + ], + "source": [ + "# Simple tool using LangChain's @tool decorator\n", + "@tool\n", + "def search_courses_basic(query: str) -> str:\n", + " \"\"\"Search for courses by title or description.\"\"\"\n", + " \n", + " # For now, let's use mock data to see how tools work\n", + " mock_courses = [\n", + " \"CS101: Introduction to Programming\",\n", + " \"CS201: Data Structures and Algorithms\", \n", + " \"CS301: Machine Learning Fundamentals\",\n", + " \"MATH101: Calculus I\",\n", + " \"MATH201: Statistics\"\n", + " ]\n", + " \n", + " # Simple search - find courses that contain the query\n", + " results = [course for course in mock_courses if query.lower() in course.lower()]\n", + " \n", + " if results:\n", + " return \"\\n\".join(results)\n", + " else:\n", + " return f\"No courses found for '{query}'\"\n", + "\n", + "print(\"โœ… Basic tool created!\")\n", + "print(f\"Tool name: {search_courses_basic.name}\")\n", + "print(f\"Description: {search_courses_basic.description}\")" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "๐Ÿงช Testing the tool directly:\n", + "\n", + "Search for 'programming':\n", + "CS101: Introduction to Programming\n", + "\n", + "Search for 'machine learning':\n", + "CS301: Machine Learning Fundamentals\n", + "\n", + "Search for 'chemistry':\n", + "No courses found for 'chemistry'\n" + ] + } + ], + "source": [ + "# Test the tool directly\n", + "print(\"๐Ÿงช Testing the tool directly:\")\n", + "print(\"\\nSearch for 'programming':\")\n", + "result = search_courses_basic.invoke({\"query\": \"programming\"})\n", + "print(result)\n", + "\n", + "print(\"\\nSearch for 'machine learning':\")\n", + "result = search_courses_basic.invoke({\"query\": \"machine learning\"})\n", + "print(result)\n", + "\n", + "print(\"\\nSearch for 'chemistry':\")\n", + "result = search_courses_basic.invoke({\"query\": \"chemistry\"})\n", + "print(result)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "**๐ŸŽฏ Great!** Our tool works, but the description is too basic. Let's improve it." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Step 2: Improving Tool Descriptions\n", + "\n", + "The LLM uses your tool description to decide when to use it. Let's make it better:" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "โœ… Real Redis-powered search tool created!\n", + "\n", + "Description:\n", + "Search for courses using semantic search on Redis University catalog.\n", + "\n", + "Use this tool when:\n", + "- Student asks about courses on a specific topic (e.g., \"programming\", \"machine learning\")\n", + "- Student wants to explore courses in a subject area\n", + "- Student asks \"What courses are available for...?\"\n", + "\n", + "Returns a list of matching courses with course codes, titles, and descriptions.\n" + ] + } + ], + "source": [ + "# Improved tool with better description using real Redis data\n", + "@tool\n", + "async def search_courses(query: str, limit: int = 5) -> str:\n", + " \"\"\"\n", + " Search for courses using semantic search on Redis University catalog.\n", + " \n", + " Use this tool when:\n", + " - Student asks about courses on a specific topic (e.g., \"programming\", \"machine learning\")\n", + " - Student wants to explore courses in a subject area\n", + " - Student asks \"What courses are available for...?\"\n", + " \n", + " Returns a list of matching courses with course codes, titles, and descriptions.\n", + " \"\"\"\n", + " \n", + " try:\n", + " # Use the real Redis-powered course manager (same as reference agent)\n", + " results = await course_manager.search_courses(query, limit=limit)\n", + " \n", + " if not results:\n", + " return f\"No courses found for '{query}'. Try different keywords or broader terms.\"\n", + " \n", + " # Format results for display\n", + " output = []\n", + " for course in results:\n", + " output.append(\n", + " f\"{course.course_code}: {course.title}\\n\"\n", + " f\" Credits: {course.credits} | {course.format.value} | {course.difficulty_level.value}\\n\"\n", + " f\" {course.description[:150]}...\"\n", + " )\n", + " \n", + " return \"\\n\\n\".join(output)\n", + " \n", + " except Exception as e:\n", + " return f\"Error searching courses: {str(e)}. Please try again.\"\n", + "\n", + "print(\"โœ… Real Redis-powered search tool created!\")\n", + "print(\"\\nDescription:\")\n", + "print(search_courses.description)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "**Much better!** Now the LLM knows exactly when to use this tool." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Step 3: Understanding args_schema\n", + "\n", + "Before we add more tools, let's learn about `args_schema` - a powerful pattern for better tool validation:" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### What is args_schema?\n", + "\n", + "`args_schema` is a Pydantic model that defines:\n", + "- **Parameter types** - What type each parameter should be\n", + "- **Validation rules** - What values are acceptable\n", + "- **Documentation** - Descriptions for each parameter\n", + "- **Required vs optional** - Which parameters are mandatory\n", + "\n", + "**Benefits:**\n", + "- โœ… **Better error handling** - Invalid inputs are caught early\n", + "- โœ… **Clear documentation** - LLM knows exactly what to send\n", + "- โœ… **Type safety** - Parameters are automatically validated\n", + "- โœ… **Professional pattern** - Used in production LangChain applications" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "โœ… Input schema created!\n", + "Schema fields: ['course_code']\n", + "Course code description: The course code (e.g., 'CS101', 'MATH201'). Must be in format: DEPT + NUMBER\n" + ] + } + ], + "source": [ + "# First, let's create a Pydantic model for our course details tool\n", + "class GetCourseDetailsInput(BaseModel):\n", + " \"\"\"Input schema for getting course details.\"\"\"\n", + " \n", + " course_code: str = Field(\n", + " description=\"The course code (e.g., 'CS101', 'MATH201'). Must be in format: DEPT + NUMBER\"\n", + " )\n", + "\n", + "print(\"โœ… Input schema created!\")\n", + "print(f\"Schema fields: {list(GetCourseDetailsInput.model_fields.keys())}\")\n", + "print(f\"Course code description: {GetCourseDetailsInput.model_fields['course_code'].description}\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Step 4: Adding More Tools with args_schema\n", + "\n", + "Now let's create a tool that uses the args_schema pattern:" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "โœ… Real Redis-powered course details tool created with args_schema!\n", + "Tool name: get_course_details\n", + "Uses schema: GetCourseDetailsInput\n" + ] + } + ], + "source": [ + "# Tool to get course details using args_schema and real Redis data\n", + "@tool(args_schema=GetCourseDetailsInput)\n", + "async def get_course_details(course_code: str) -> str:\n", + " \"\"\"\n", + " Get detailed information about a specific course by its course code.\n", + " \n", + " Use this tool when:\n", + " - Student asks about a specific course (e.g., \"Tell me about CS101\")\n", + " - You need prerequisites for a course\n", + " - You need full course details (schedule, instructor, etc.)\n", + " \n", + " Returns complete course information including description, prerequisites,\n", + " schedule, credits, and learning objectives.\n", + " \"\"\"\n", + " \n", + " try:\n", + " # Use the real Redis-powered course manager (same as reference agent)\n", + " course = await course_manager.get_course_by_code(course_code.upper())\n", + " \n", + " if not course:\n", + " return f\"Course {course_code} not found. Please check the course code and try again.\"\n", + " \n", + " # Format prerequisites\n", + " prereqs = \"None\" if not course.prerequisites else \", \".join(\n", + " [f\"{p.course_code} (min grade: {p.min_grade})\" for p in course.prerequisites]\n", + " )\n", + " \n", + " # Format learning objectives\n", + " objectives = \"\\n\".join([f\"- {obj}\" for obj in course.learning_objectives]) if course.learning_objectives else \"Not specified\"\n", + " \n", + " return f\"\"\"{course.course_code}: {course.title}\n", + "\n", + "Description: {course.description}\n", + "\n", + "Details:\n", + "- Credits: {course.credits}\n", + "- Department: {course.department}\n", + "- Major: {course.major}\n", + "- Difficulty: {course.difficulty_level.value}\n", + "- Format: {course.format.value}\n", + "- Instructor: {course.instructor}\n", + "- Prerequisites: {prereqs}\n", + "\n", + "Learning Objectives:\n", + "{objectives}\"\"\"\n", + " \n", + " except Exception as e:\n", + " return f\"Error retrieving course details: {str(e)}. Please try again.\"\n", + "\n", + "print(\"โœ… Real Redis-powered course details tool created with args_schema!\")\n", + "print(f\"Tool name: {get_course_details.name}\")\n", + "print(f\"Uses schema: {get_course_details.args_schema.__name__}\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Testing Redis-Powered Tools\n", + "\n", + "**Note:** Our tools now use real Redis data and are async functions. In a Jupyter notebook, you can test them like this:" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "๐Ÿงช Testing Redis-powered tools:\n", + "\n", + "1. Testing course search:\n", + "16:39:47 httpx INFO HTTP Request: POST https://api.openai.com/v1/embeddings \"HTTP/1.1 200 OK\"\n", + "CS001: Introduction to Programming\n", + " Credits: 4 | online | beginner\n", + " Comprehensive study of introduction to programming. Core concepts and practical applications in computer science....\n", + "\n", + "CS004: Operating Systems\n", + " Credits: 4 | online | beginner\n", + " Comprehensive study of operating systems. Core concepts and practical applications in computer science....\n", + "\n", + "CS006: Software Engineering\n", + " Credits: 3 | in_person | intermediate\n", + " Comprehensive study of software engineering. Core concepts and practical applications in computer science....\n", + "\n", + "2. Testing course details:\n", + "Error retrieving course details: 'list' object has no attribute 'docs'. Please try again.\n" + ] + } + ], + "source": [ + "# Test the Redis-powered tools\n", + "print(\"๐Ÿงช Testing Redis-powered tools:\")\n", + "\n", + "if course_manager:\n", + " try:\n", + " print(\"\\n1. Testing course search:\")\n", + " result = await search_courses.ainvoke({\"query\": \"programming\", \"limit\": 3})\n", + " print(result)\n", + " \n", + " print(\"\\n2. Testing course details:\")\n", + " # Try to get details for a course that might exist\n", + " result = await get_course_details.ainvoke({\"course_code\": \"RU101\"})\n", + " print(result)\n", + " \n", + " except Exception as e:\n", + " print(f\"Note: Direct testing requires async environment and Redis connection.\")\n", + " print(f\"Tools are ready for use with the LangChain agent!\")\n", + "else:\n", + " print(\"Course manager not available - tools are ready for use when Redis is connected!\")\n", + " print(\"\\nโœ… The tools will work perfectly with the LangChain agent in an async environment.\")\n", + " print(\"โœ… They use the same Redis-powered CourseManager as our reference agent.\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Step 5: More Complex args_schema\n", + "\n", + "Let's create a more complex schema for our prerequisites checker:" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "โœ… Prerequisites schema created!\n", + "Schema fields: ['course_code', 'completed_courses']\n", + "Completed courses default: []\n" + ] + } + ], + "source": [ + "# More complex schema with validation\n", + "class CheckPrerequisitesInput(BaseModel):\n", + " \"\"\"Input schema for checking course prerequisites.\"\"\"\n", + " \n", + " course_code: str = Field(\n", + " description=\"The course code to check prerequisites for (e.g., 'CS301')\"\n", + " )\n", + " completed_courses: List[str] = Field(\n", + " description=\"List of course codes the student has completed (e.g., ['CS101', 'CS201'])\",\n", + " default=[]\n", + " )\n", + "\n", + "print(\"โœ… Prerequisites schema created!\")\n", + "print(f\"Schema fields: {list(CheckPrerequisitesInput.model_fields.keys())}\")\n", + "print(f\"Completed courses default: {CheckPrerequisitesInput.model_fields['completed_courses'].default}\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Step 6: Prerequisites Checker with Validation\n", + "\n", + "Now let's create the prerequisites tool with proper validation:" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "โœ… Real Redis-powered prerequisites checker created with args_schema!\n", + "Tool name: check_prerequisites\n", + "Uses schema: CheckPrerequisitesInput\n" + ] + } + ], + "source": [ + "# Tool to check prerequisites with args_schema using real Redis data\n", + "@tool(args_schema=CheckPrerequisitesInput)\n", + "async def check_prerequisites(course_code: str, completed_courses: List[str]) -> str:\n", + " \"\"\"\n", + " Check if a student meets the prerequisites for a specific course.\n", + " \n", + " Use this tool when:\n", + " - Student asks \"Can I take [course]?\"\n", + " - Student asks about prerequisites\n", + " - You need to verify eligibility before recommending a course\n", + " \n", + " Returns whether the student is eligible and which prerequisites are missing (if any).\n", + " \"\"\"\n", + " \n", + " try:\n", + " # Use the real Redis-powered course manager (same as reference agent)\n", + " course = await course_manager.get_course_by_code(course_code.upper())\n", + " \n", + " if not course:\n", + " return f\"Course {course_code} not found. Please check the course code and try again.\"\n", + " \n", + " # Convert completed courses to uppercase for comparison\n", + " completed_courses_upper = [c.upper() for c in completed_courses]\n", + " \n", + " if not course.prerequisites:\n", + " return f\"โœ… {course.course_code} has no prerequisites. You can take this course!\"\n", + " \n", + " # Check each prerequisite\n", + " missing = []\n", + " for prereq in course.prerequisites:\n", + " if prereq.course_code not in completed_courses_upper:\n", + " missing.append(f\"{prereq.course_code} (min grade: {prereq.min_grade})\")\n", + " \n", + " if not missing:\n", + " return f\"โœ… You meet all prerequisites for {course.course_code}!\"\n", + " \n", + " return f\"\"\"โŒ You're missing prerequisites for {course.course_code}:\n", + "\n", + "Missing:\n", + "\"\"\" + \"\\n\".join([f\"- {p}\" for p in missing])\n", + " \n", + " except Exception as e:\n", + " return f\"Error checking prerequisites: {str(e)}. Please try again.\"\n", + "\n", + "print(\"โœ… Real Redis-powered prerequisites checker created with args_schema!\")\n", + "print(f\"Tool name: {check_prerequisites.name}\")\n", + "print(f\"Uses schema: {check_prerequisites.args_schema.__name__}\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Testing args_schema Benefits\n", + "\n", + "Let's see how args_schema provides better validation and error handling:" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "๐Ÿงช Testing prerequisites checker with args_schema:\n", + "\n", + "1. Valid input - new student:\n", + "Error checking prerequisites: 'list' object has no attribute 'docs'. Please try again.\n", + "\n", + "2. Valid input - student with prerequisites:\n", + "Error checking prerequisites: 'list' object has no attribute 'docs'. Please try again.\n", + "\n", + "3. Valid input - missing prerequisites:\n", + "Error checking prerequisites: 'list' object has no attribute 'docs'. Please try again.\n" + ] + } + ], + "source": [ + "# Test the prerequisites checker with proper validation\n", + "print(\"๐Ÿงช Testing prerequisites checker with args_schema:\")\n", + "\n", + "print(\"\\n1. Valid input - new student:\")\n", + "result = await check_prerequisites.ainvoke({\"course_code\": \"CS101\", \"completed_courses\": []})\n", + "print(result)\n", + "\n", + "print(\"\\n2. Valid input - student with prerequisites:\")\n", + "result = await check_prerequisites.ainvoke({\"course_code\": \"CS201\", \"completed_courses\": [\"CS101\"]})\n", + "print(result)\n", + "\n", + "print(\"\\n3. Valid input - missing prerequisites:\")\n", + "result = await check_prerequisites.ainvoke({\"course_code\": \"CS301\", \"completed_courses\": [\"CS101\"]})\n", + "print(result)" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "๐Ÿงช Testing args_schema validation:\n", + "\n", + "4. Testing with missing required parameter:\n", + "โŒ Error: StructuredTool does not support sync invocation.\n", + "\n", + "5. Testing with completely missing parameters:\n", + "โœ… Validation caught error: ValidationError\n", + " Message: 1 validation error for CheckPrerequisitesInput\n", + "course_code\n", + " Field required [type=missing, input_val...\n", + "\n", + "๐ŸŽฏ args_schema provides automatic validation and better error messages!\n" + ] + } + ], + "source": [ + "# Test validation - what happens with invalid input?\n", + "print(\"๐Ÿงช Testing args_schema validation:\")\n", + "\n", + "try:\n", + " print(\"\\n4. Testing with missing required parameter:\")\n", + " # This should work because completed_courses has a default\n", + " result = check_prerequisites.invoke({\"course_code\": \"CS101\"})\n", + " print(\"โœ… Success with default value:\", result)\n", + "except Exception as e:\n", + " print(f\"โŒ Error: {e}\")\n", + "\n", + "try:\n", + " print(\"\\n5. Testing with completely missing parameters:\")\n", + " # This should fail because course_code is required\n", + " result = check_prerequisites.invoke({})\n", + " print(\"Result:\", result)\n", + "except Exception as e:\n", + " print(f\"โœ… Validation caught error: {type(e).__name__}\")\n", + " print(f\" Message: {str(e)[:100]}...\")\n", + "\n", + "print(\"\\n๐ŸŽฏ args_schema provides automatic validation and better error messages!\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Benefits of args_schema\n", + "\n", + "As you can see, `args_schema` provides:\n", + "\n", + "1. **โœ… Automatic Validation** - Invalid inputs are caught before your function runs\n", + "2. **โœ… Better Error Messages** - Clear feedback about what went wrong\n", + "3. **โœ… Default Values** - Parameters can have sensible defaults\n", + "4. **โœ… Type Safety** - Parameters are automatically converted to the right types\n", + "5. **โœ… Documentation** - LLM gets detailed parameter descriptions\n", + "6. **โœ… Professional Pattern** - Used in production LangChain applications\n", + "\n", + "**When to use args_schema:**\n", + "- โœ… Tools with multiple parameters\n", + "- โœ… Tools that need validation\n", + "- โœ… Production applications\n", + "- โœ… Complex parameter types (lists, objects)\n", + "\n", + "**When simple parameters are fine:**\n", + "- โœ… Single parameter tools\n", + "- โœ… Simple string/number inputs\n", + "- โœ… Quick prototypes" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "๐Ÿ“Š Comparison: Simple vs args_schema tools\n", + "==================================================\n", + "\n", + "๐Ÿ”ง Simple tool (search_courses):\n", + " Parameters: {'query': {'title': 'Query', 'type': 'string'}, 'limit': {'default': 5, 'title': 'Limit', 'type': 'integer'}}\n", + " Schema: \n", + "\n", + "๐Ÿ”ง args_schema tool (get_course_details):\n", + " Parameters: {'course_code': {'description': \"The course code (e.g., 'CS101', 'MATH201'). Must be in format: DEPT + NUMBER\", 'title': 'Course Code', 'type': 'string'}}\n", + " Schema: GetCourseDetailsInput\n", + " Schema fields: ['course_code']\n", + "\n", + "๐ŸŽฏ Both patterns are valid - choose based on your needs!\n" + ] + } + ], + "source": [ + "# Compare: Simple tool vs args_schema tool\n", + "print(\"๐Ÿ“Š Comparison: Simple vs args_schema tools\")\n", + "print(\"=\" * 50)\n", + "\n", + "print(\"\\n๐Ÿ”ง Simple tool (search_courses):\")\n", + "print(f\" Parameters: {search_courses.args}\")\n", + "print(f\" Schema: {getattr(search_courses, 'args_schema', 'None')}\")\n", + "\n", + "print(\"\\n๐Ÿ”ง args_schema tool (get_course_details):\")\n", + "print(f\" Parameters: {get_course_details.args}\")\n", + "print(f\" Schema: {get_course_details.args_schema.__name__}\")\n", + "print(f\" Schema fields: {list(get_course_details.args_schema.model_fields.keys())}\")\n", + "\n", + "print(\"\\n๐ŸŽฏ Both patterns are valid - choose based on your needs!\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "**๐ŸŽ‰ Excellent!** Now we have three useful tools. Let's see how the LLM uses them." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## ๐Ÿค– Hands-on: Testing Tools with an Agent\n", + "\n", + "Let's see how the LLM selects and uses our tools:" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "โœ… Agent configured with Redis-powered tools!\n", + "Available tools: ['search_courses', 'get_course_details', 'check_prerequisites']\n", + "๐Ÿ”— Using the same CourseManager as our reference agent\n" + ] + } + ], + "source": [ + "# Bind tools to LLM (same pattern as our LangGraph agent)\n", + "tools = [search_courses, get_course_details, check_prerequisites]\n", + "\n", + "if llm:\n", + " llm_with_tools = llm.bind_tools(tools)\n", + " \n", + " # System prompt\n", + " system_prompt = \"\"\"You are the Redis University Class Agent.\n", + " Help students find courses and plan their schedule.\n", + " Use the available tools to search courses and check prerequisites.\n", + " \"\"\"\n", + " \n", + " print(\"โœ… Agent configured with Redis-powered tools!\")\n", + " print(f\"Available tools: {[tool.name for tool in tools]}\")\n", + " print(\"๐Ÿ”— Using the same CourseManager as our reference agent\")\n", + "else:\n", + " print(\"โš ๏ธ LLM not available - tools are ready for use when OpenAI API key is set\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Test 1: Search Query\n", + "\n", + "Let's see what happens when a student asks about machine learning:" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "16:40:59 httpx INFO HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "๐Ÿ‘ค User: I'm interested in machine learning courses\n", + "\n", + "๐Ÿค– Agent decision:\n", + " ๐Ÿ”ง Tool: search_courses\n", + " ๐Ÿ“‹ Args: {'query': 'machine learning'}\n", + "\n", + "============================================================\n" + ] + } + ], + "source": [ + "# Test 1: Search query\n", + "if llm:\n", + " messages = [\n", + " SystemMessage(content=system_prompt),\n", + " HumanMessage(content=\"I'm interested in machine learning courses\")\n", + " ]\n", + " \n", + " response = llm_with_tools.invoke(messages)\n", + " \n", + " print(\"๐Ÿ‘ค User: I'm interested in machine learning courses\")\n", + " print(\"\\n๐Ÿค– Agent decision:\")\n", + " if response.tool_calls:\n", + " for tool_call in response.tool_calls:\n", + " print(f\" ๐Ÿ”ง Tool: {tool_call['name']}\")\n", + " print(f\" ๐Ÿ“‹ Args: {tool_call['args']}\")\n", + " else:\n", + " print(\" ๐Ÿ’ฌ No tool called\")\n", + " print(f\" ๐Ÿ“ Response: {response.content}\")\n", + "else:\n", + " print(\"โš ๏ธ LLM not available - skipping test\")\n", + "\n", + "print(\"\\n\" + \"=\"*60)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Test 2: Specific Course Query\n", + "\n", + "What happens when they ask about a specific course?" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "16:41:00 httpx INFO HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "๐Ÿ‘ค User: Tell me about CS301\n", + "\n", + "๐Ÿค– Agent decision:\n", + " ๐Ÿ”ง Tool: get_course_details\n", + " ๐Ÿ“‹ Args: {'course_code': 'CS301'}\n", + "\n", + "============================================================\n" + ] + } + ], + "source": [ + "# Test 2: Specific course query\n", + "if llm:\n", + " messages = [\n", + " SystemMessage(content=system_prompt),\n", + " HumanMessage(content=\"Tell me about CS301\")\n", + " ]\n", + " \n", + " response = llm_with_tools.invoke(messages)\n", + " \n", + " print(\"๐Ÿ‘ค User: Tell me about CS301\")\n", + " print(\"\\n๐Ÿค– Agent decision:\")\n", + " if response.tool_calls:\n", + " for tool_call in response.tool_calls:\n", + " print(f\" ๐Ÿ”ง Tool: {tool_call['name']}\")\n", + " print(f\" ๐Ÿ“‹ Args: {tool_call['args']}\")\n", + " else:\n", + " print(\" ๐Ÿ’ฌ No tool called\")\n", + " print(f\" ๐Ÿ“ Response: {response.content}\")\n", + "else:\n", + " print(\"โš ๏ธ LLM not available - skipping test\")\n", + "\n", + "print(\"\\n\" + \"=\"*60)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Test 3: Prerequisites Query\n", + "\n", + "What about when they ask if they can take a course?" + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "16:41:03 httpx INFO HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "๐Ÿ‘ค User: Can I take CS301? I've completed CS101 and CS201.\n", + "\n", + "๐Ÿค– Agent decision:\n", + " ๐Ÿ”ง Tool: check_prerequisites\n", + " ๐Ÿ“‹ Args: {'course_code': 'CS301', 'completed_courses': ['CS101', 'CS201']}\n", + "\n", + "============================================================\n" + ] + } + ], + "source": [ + "# Test 3: Prerequisites query\n", + "if llm:\n", + " messages = [\n", + " SystemMessage(content=system_prompt),\n", + " HumanMessage(content=\"Can I take CS301? I've completed CS101 and CS201.\")\n", + " ]\n", + " \n", + " response = llm_with_tools.invoke(messages)\n", + " \n", + " print(\"๐Ÿ‘ค User: Can I take CS301? I've completed CS101 and CS201.\")\n", + " print(\"\\n๐Ÿค– Agent decision:\")\n", + " if response.tool_calls:\n", + " for tool_call in response.tool_calls:\n", + " print(f\" ๐Ÿ”ง Tool: {tool_call['name']}\")\n", + " print(f\" ๐Ÿ“‹ Args: {tool_call['args']}\")\n", + " else:\n", + " print(\" ๐Ÿ’ฌ No tool called\")\n", + " print(f\" ๐Ÿ“ Response: {response.content}\")\n", + "else:\n", + " print(\"โš ๏ธ LLM not available - skipping test\")\n", + "\n", + "print(\"\\n\" + \"=\"*60)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## ๐ŸŽฎ Try It Yourself: Create Your Own Tool\n", + "\n", + "Now it's your turn! Create a tool and test it:" + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "โœ… Department schema created!\n" + ] + } + ], + "source": [ + "# First, create the schema for your tool\n", + "class GetCoursesByDepartmentInput(BaseModel):\n", + " \"\"\"Input schema for getting courses by department.\"\"\"\n", + " \n", + " department: str = Field(\n", + " description=\"Department code (e.g., 'CS', 'MATH', 'PHYS'). Case insensitive.\"\n", + " )\n", + "\n", + "print(\"โœ… Department schema created!\")" + ] + }, + { + "cell_type": "code", + "execution_count": 20, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "โœ… Real Redis-powered department tool created with args_schema!\n", + "Tool name: get_courses_by_department\n", + "Uses schema: GetCoursesByDepartmentInput\n", + "\n", + "๐Ÿงช Testing your tool:\n", + "16:41:06 httpx INFO HTTP Request: POST https://api.openai.com/v1/embeddings \"HTTP/1.1 200 OK\"\n", + "Courses in CS department (1 found):\n", + "CS101: Python Basics (3 credits)\n" + ] + } + ], + "source": [ + "# Your turn! Create a tool to get courses by department with args_schema using real Redis data\n", + "@tool(args_schema=GetCoursesByDepartmentInput)\n", + "async def get_courses_by_department(department: str) -> str:\n", + " \"\"\"\n", + " Get all courses offered by a specific department.\n", + " \n", + " Use this tool when:\n", + " - Student asks \"What CS courses are available?\"\n", + " - Student wants to see all courses in a department\n", + " - Student asks about course offerings by department\n", + " \n", + " Returns a list of all courses in the specified department.\n", + " \"\"\"\n", + " \n", + " try:\n", + " # Use the real Redis-powered course manager with department filter\n", + " filters = {\"department\": department.upper()}\n", + " results = await course_manager.search_courses(\n", + " query=\"\", # Empty query to get all courses\n", + " filters=filters,\n", + " limit=50, # Get more courses for department listing\n", + " similarity_threshold=0.0 # Include all courses in department\n", + " )\n", + " \n", + " if not results:\n", + " return f\"No courses found in {department.upper()} department. Please check the department code.\"\n", + " \n", + " # Format results for display\n", + " output = []\n", + " for course in results:\n", + " output.append(\n", + " f\"{course.course_code}: {course.title} ({course.credits} credits)\"\n", + " )\n", + " \n", + " return f\"Courses in {department.upper()} department ({len(results)} found):\\n\" + \"\\n\".join(output)\n", + " \n", + " except Exception as e:\n", + " return f\"Error retrieving department courses: {str(e)}. Please try again.\"\n", + "\n", + "print(\"โœ… Real Redis-powered department tool created with args_schema!\")\n", + "print(f\"Tool name: {get_courses_by_department.name}\")\n", + "print(f\"Uses schema: {get_courses_by_department.args_schema.__name__}\")\n", + "\n", + "# Test your tool\n", + "print(\"\\n๐Ÿงช Testing your tool:\")\n", + "if course_manager:\n", + " try:\n", + " import asyncio\n", + " result = await get_courses_by_department.ainvoke({\"department\": \"CS\"})\n", + " print(result)\n", + " except Exception as e:\n", + " print(f\"Test requires async environment. Tool is ready for use with the agent!\")\n", + "else:\n", + " print(\"Course manager not available - tool is ready for use when Redis is connected!\")" + ] + }, + { + "cell_type": "code", + "execution_count": 21, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "16:41:07 httpx INFO HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "๐Ÿ‘ค User: What computer science courses are available?\n", + "\n", + "๐Ÿค– Agent decision:\n", + " ๐Ÿ”ง Tool: get_courses_by_department\n", + " ๐Ÿ“‹ Args: {'department': 'CS'}\n", + "\n", + "๐ŸŽฏ Did the agent choose your tool? Try different queries to test tool selection!\n" + ] + } + ], + "source": [ + "# Test your tool with the agent\n", + "if llm:\n", + " # Add your tool to the agent\n", + " all_tools = [search_courses, get_course_details, check_prerequisites, get_courses_by_department]\n", + " llm_with_all_tools = llm.bind_tools(all_tools)\n", + " \n", + " messages = [\n", + " SystemMessage(content=system_prompt),\n", + " HumanMessage(content=\"What computer science courses are available?\")\n", + " ]\n", + " \n", + " response = llm_with_all_tools.invoke(messages)\n", + " \n", + " print(\"๐Ÿ‘ค User: What computer science courses are available?\")\n", + " print(\"\\n๐Ÿค– Agent decision:\")\n", + " if response.tool_calls:\n", + " for tool_call in response.tool_calls:\n", + " print(f\" ๐Ÿ”ง Tool: {tool_call['name']}\")\n", + " print(f\" ๐Ÿ“‹ Args: {tool_call['args']}\")\n", + " else:\n", + " print(\" ๐Ÿ’ฌ No tool called\")\n", + " print(f\" ๐Ÿ“ Response: {response.content}\")\n", + "else:\n", + " print(\"โš ๏ธ LLM not available - skipping test\")\n", + "\n", + "print(\"\\n๐ŸŽฏ Did the agent choose your tool? Try different queries to test tool selection!\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## ๐ŸŽฏ Key Takeaways\n", + "\n", + "From this hands-on exploration, you've learned:\n", + "\n", + "### โœ… **Tool Design Best Practices**\n", + "\n", + "1. **Clear Names**\n", + " - Use descriptive, action-oriented names\n", + " - `search_courses` โœ… vs. `find` โŒ\n", + "\n", + "2. **Detailed Descriptions**\n", + " - Explain what the tool does\n", + " - Explain when to use it\n", + " - Include examples\n", + "\n", + "3. **Well-Defined Parameters**\n", + " - Use type hints\n", + " - Add descriptions for each parameter\n", + " - Set sensible defaults\n", + " - **Use args_schema for complex tools**\n", + "\n", + "4. **Useful Return Values**\n", + " - Return formatted, readable text\n", + " - Include relevant details\n", + " - Handle errors gracefully\n", + "\n", + "5. **Single Responsibility**\n", + " - Each tool should do one thing well\n", + " - Don't combine unrelated functionality\n", + "\n", + "### โœ… **How Tool Descriptions Affect Selection**\n", + "\n", + "The LLM relies heavily on tool descriptions to decide which tool to use:\n", + "\n", + "- โœ… **Good description**: \"Search for courses using keywords. Use when students ask about topics or course characteristics.\"\n", + "- โŒ **Bad description**: \"Search courses\"\n", + "\n", + "**Remember:** The LLM can't see your code, only the schema!\n", + "\n", + "### โœ… **LangChain Integration**\n", + "\n", + "- **@tool decorator** makes creating tools simple\n", + "- **llm.bind_tools()** connects tools to your LLM\n", + "- **Tool selection** happens automatically based on descriptions\n", + "- **Compatible** with our LangGraph agent architecture\n", + "- **args_schema** provides validation and better documentation\n", + "- **Redis-powered** using the same CourseManager as our reference agent\n", + "- **Async support** for real-time data access and performance\n", + "\n", + "### ๐Ÿš€ **Next Steps**\n", + "You're now ready to:\n", + "- Build effective tools for any AI agent\n", + "- Write descriptions that guide LLM behavior\n", + "- Test and iterate on tool selection\n", + "- Move on to **Context-Aware Tool Integration** for advanced patterns\n", + "\n", + "---\n", + "\n", + "**Great work!** You've mastered the fundamentals of tool definition with LangChain.\n", + "\n", + "---\n", + "\n", + "## ๐Ÿ“ **Quick Practice Exercises**\n", + "\n", + "Before moving on, try these focused exercises:\n", + "\n", + "### **Exercise 1: Create a Department Tool**\n", + "Build `get_courses_by_department` that takes a department name and returns all courses in that department. Write a clear description!\n", + "\n", + "### **Exercise 2: Test Tool Selection**\n", + "Create queries that should trigger each tool:\n", + "- \"What ML courses are available?\" โ†’ `search_courses`\n", + "- \"Can I take CS301?\" โ†’ `check_prerequisites` \n", + "- \"Tell me about CS101\" โ†’ `get_course_details`\n", + "\n", + "### **Exercise 3: Improve a Description**\n", + "Pick any tool and improve its description. Add \"Use this when...\" examples and test if the LLM selects it better.\n", + "\n", + "### **Exercise 4: Design a Schedule Tool**\n", + "Plan a tool for student schedules. What parameters? What return format? How to handle errors?\n", + "\n", + "**Start with Exercise 1** - it builds directly on what you learned!\n", + "\n", + "---\n", + "\n", + "Ready to continue with **`03_context_aware_tool_integration.ipynb`** to learn advanced context patterns?\n", + "\n", + "---\n", + "\n", + "## ๐ŸŽฏ **Ready to Practice?**\n", + "\n", + "Follow this step-by-step guide to build `get_courses_by_department`. Complete each section methodically." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.12.6" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} diff --git a/python-recipes/context-engineering/notebooks_archive/section-3-memory/02_long_term_memory.ipynb b/python-recipes/context-engineering/notebooks_archive/section-3-memory/02_long_term_memory.ipynb new file mode 100644 index 00000000..a769d7b4 --- /dev/null +++ b/python-recipes/context-engineering/notebooks_archive/section-3-memory/02_long_term_memory.ipynb @@ -0,0 +1,876 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Long-term Memory: Cross-Session Knowledge\n", + "\n", + "## Introduction\n", + "\n", + "In this notebook, you'll learn about long-term memory - persistent knowledge that survives across sessions. While working memory handles the current conversation, long-term memory stores important facts, preferences, and experiences that should be remembered indefinitely.\n", + "\n", + "### What You'll Learn\n", + "\n", + "- What long-term memory is and why it's essential\n", + "- The three types of long-term memories: semantic, episodic, and message\n", + "- How to store and retrieve long-term memories\n", + "- How semantic search works with memories\n", + "- How automatic deduplication prevents redundancy\n", + "\n", + "### Prerequisites\n", + "\n", + "- Completed Section 2 notebooks\n", + "- Completed `01_working_memory_with_extraction_strategies.ipynb`\n", + "- Redis 8 running locally\n", + "- Agent Memory Server running\n", + "- OpenAI API key set" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Concepts: Long-term Memory\n", + "\n", + "### What is Long-term Memory?\n", + "\n", + "Long-term memory is **persistent, cross-session knowledge** about users, preferences, and important facts. Unlike working memory (which is session-scoped), long-term memory:\n", + "\n", + "- โœ… Survives across sessions\n", + "- โœ… Accessible from any conversation\n", + "- โœ… Searchable via semantic vector search\n", + "- โœ… Automatically deduplicated\n", + "- โœ… Organized by user/namespace\n", + "\n", + "### Working Memory vs. Long-term Memory\n", + "\n", + "| Working Memory | Long-term Memory |\n", + "|----------------|------------------|\n", + "| **Session-scoped** | **User-scoped** |\n", + "| Current conversation | Important facts |\n", + "| TTL-based (expires) | Persistent |\n", + "| Full message history | Extracted knowledge |\n", + "| Loaded/saved each turn | Searched when needed |\n", + "\n", + "### Three Types of Long-term Memories\n", + "\n", + "The Agent Memory Server supports three types of long-term memories:\n", + "\n", + "1. **Semantic Memory** - Facts and knowledge\n", + " - Example: \"Student prefers online courses\"\n", + " - Example: \"Student's major is Computer Science\"\n", + " - Example: \"Student wants to graduate in 2026\"\n", + "\n", + "2. **Episodic Memory** - Events and experiences\n", + " - Example: \"Student enrolled in CS101 on 2024-09-15\"\n", + " - Example: \"Student asked about machine learning on 2024-09-20\"\n", + " - Example: \"Student completed Data Structures course\"\n", + "\n", + "3. **Message Memory** - Important conversation snippets\n", + " - Example: Full conversation about career goals\n", + " - Example: Detailed discussion about course preferences\n", + "\n", + "## Choosing the Right Memory Type\n", + "\n", + "Understanding WHEN to use each memory type is crucial for effective memory management.\n", + "\n", + "### Decision Framework\n", + "\n", + "#### Use Semantic Memory for: Facts and Preferences\n", + "\n", + "**Characteristics:**\n", + "- Timeless information (not tied to specific moment)\n", + "- Likely to be referenced repeatedly\n", + "- Can be stated independently of context\n", + "\n", + "**Examples:**\n", + "```python\n", + "# โœ… Good semantic memories\n", + "\"Student prefers online courses\"\n", + "\"Student's major is Computer Science\" \n", + "\"Student wants to graduate in 2026\"\n", + "\"Student struggles with mathematics\"\n", + "\"Student is interested in machine learning\"\n", + "```\n", + "\n", + "**Why semantic:**\n", + "- Facts that don't change often\n", + "- Will be useful across many sessions\n", + "- Don't need temporal context\n", + "\n", + "---\n", + "\n", + "#### Use Episodic Memory for: Events and Timeline\n", + "\n", + "**Characteristics:**\n", + "- Time-bound events\n", + "- Sequence/timeline matters\n", + "- Tracking progress or history\n", + "\n", + "**Examples:**\n", + "```python\n", + "# โœ… Good episodic memories\n", + "\"Student enrolled in CS101 on 2024-09-15\"\n", + "\"Student completed CS101 on 2024-12-10\"\n", + "\"Student started CS201 on 2024-01-15\"\n", + "\"Student asked about career planning on 2024-10-20\"\n", + "\"Student expressed concerns about workload on 2024-10-27\"\n", + "```\n", + "\n", + "**Why episodic:**\n", + "- Events have specific dates\n", + "- Order of events matters (CS101 before CS201)\n", + "- Tracking student's journey over time\n", + "\n", + "---\n", + "\n", + "#### Use Message Memory for: Context-Rich Conversations\n", + "\n", + "**Characteristics:**\n", + "- Full context is crucial\n", + "- Tone/emotion matters\n", + "- May need exact wording\n", + "- Complex multi-part discussions\n", + "\n", + "**Examples:**\n", + "```python\n", + "# โœ… Good message memories\n", + "\"Detailed career planning discussion: [full conversation]\"\n", + "\"Professor's specific advice about research opportunities: [full message]\"\n", + "\"Student's explanation of personal learning challenges: [full message]\"\n", + "```\n", + "\n", + "**Why message:**\n", + "- Summary would lose important nuance\n", + "- Context around the words matters\n", + "- Verbatim quote may be needed\n", + "\n", + "**โš ๏ธ Use sparingly - message memories are token-expensive!**\n", + "\n", + "### Examples: Right vs. Wrong\n", + "\n", + "#### Scenario 1: Student States Preference\n", + "\n", + "**User says:** \"I prefer online courses because I work during the day.\"\n", + "\n", + "โŒ **Wrong:**\n", + "```python\n", + "# Message memory (too verbose)\n", + "memory = \"Student said: 'I prefer online courses because I work during the day.'\"\n", + "```\n", + "\n", + "โœ… **Right:**\n", + "```python\n", + "# Semantic memories (extracted facts)\n", + "memory1 = \"Student prefers online courses\"\n", + "memory2 = \"Student works during the day\"\n", + "```\n", + "\n", + "**Why:** Simple facts don't need full verbatim storage.\n", + "\n", + "---\n", + "\n", + "#### Scenario 2: Course Completion\n", + "\n", + "**User says:** \"I just finished CS101 last week!\"\n", + "\n", + "โŒ **Wrong:**\n", + "```python\n", + "# Semantic (loses temporal context)\n", + "memory = \"Student completed CS101\"\n", + "```\n", + "\n", + "โœ… **Right:**\n", + "```python\n", + "# Episodic (preserves timeline)\n", + "memory = \"Student completed CS101 on 2024-10-20\"\n", + "```\n", + "\n", + "**Why:** Timeline matters for prerequisites and planning.\n", + "\n", + "---\n", + "\n", + "#### Scenario 3: Complex Career Advice\n", + "\n", + "**Conversation:** 20-message discussion about career path, including professor's nuanced advice about research vs. industry, timing of applications, and specific companies to target.\n", + "\n", + "โŒ **Wrong:**\n", + "```python\n", + "# Semantic (loses too much)\n", + "memory = \"Student discussed career planning\"\n", + "```\n", + "\n", + "โœ… **Right:**\n", + "```python\n", + "# Message memory (preserves context)\n", + "memory = [Full conversation thread with all nuance]\n", + "```\n", + "\n", + "**Why:** Details and context are critical, summary inadequate.\n", + "\n", + "### Quick Reference Table\n", + "\n", + "| Information Type | Memory Type | Example |\n", + "|-----------------|-------------|----------|\n", + "| Preference | Semantic | \"Prefers morning classes\" |\n", + "| Fact | Semantic | \"Major is Computer Science\" |\n", + "| Goal | Semantic | \"Wants to graduate in 2026\" |\n", + "| Event | Episodic | \"Enrolled in CS401 on 2024-09-15\" |\n", + "| Timeline | Episodic | \"Completed CS101, then CS201\" |\n", + "| Progress | Episodic | \"Asked about ML three times\" |\n", + "| Complex discussion | Message | [Full career planning conversation] |\n", + "| Nuanced advice | Message | [Professor's detailed guidance] |\n", + "\n", + "### Default Strategy: Prefer Semantic\n", + "\n", + "**When in doubt:**\n", + "1. Can you extract a simple fact? โ†’ **Semantic**\n", + "2. Is timing important? โ†’ **Episodic**\n", + "3. Is full context crucial? โ†’ **Message** (use rarely)\n", + "\n", + "**Most memories should be semantic** - they're compact, searchable, and efficient.\n", + "\n", + "### How Semantic Search Works\n", + "\n", + "Long-term memories are stored with vector embeddings, enabling semantic search:\n", + "\n", + "- Query: \"What does the student like?\"\n", + "- Finds: \"Student prefers online courses\", \"Student enjoys programming\"\n", + "- Even though exact words don't match!\n", + "\n", + "### Automatic Deduplication\n", + "\n", + "The Agent Memory Server automatically prevents duplicate memories:\n", + "\n", + "- **Hash-based**: Exact duplicates are rejected\n", + "- **Semantic**: Similar memories are merged\n", + "- Keeps memory storage efficient" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Setup" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import os\n", + "from dotenv import load_dotenv\n", + "\n", + "# Load environment variables from .env file\n", + "load_dotenv()\n", + "\n", + "# Verify required environment variables are set\n", + "if not os.getenv(\"OPENAI_API_KEY\"):\n", + " raise ValueError(\n", + " \"OPENAI_API_KEY not found. Please create a .env file with your OpenAI API key. \"\n", + " \"See SETUP.md for instructions.\"\n", + " )\n", + "\n", + "print(\"โœ… Environment variables loaded\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import asyncio\n", + "from datetime import datetime\n", + "from agent_memory_client import MemoryAPIClient as MemoryClient, MemoryClientConfig\n", + "from agent_memory_client.models import ClientMemoryRecord\n", + "from agent_memory_client.filters import MemoryType\n", + "\n", + "# Initialize memory client\n", + "student_id = \"student_123\"\n", + "config = MemoryClientConfig(\n", + " base_url=os.getenv(\"AGENT_MEMORY_URL\", \"http://localhost:8000\"),\n", + " default_namespace=\"redis_university\"\n", + ")\n", + "memory_client = MemoryClient(config=config)\n", + "\n", + "print(f\"โœ… Memory client initialized for {student_id}\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Hands-on: Working with Long-term Memory" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Example 1: Storing Semantic Memories (Facts)\n", + "\n", + "Let's store some facts about the student." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Store student preferences\n", + "await memory_client.create_long_term_memory([ClientMemoryRecord(\n", + " text=\"Student prefers online courses over in-person classes\",\n", + " memory_type=\"semantic\",\n", + " topics=[\"preferences\", \"course_format\"]\n", + ")])\n", + "\n", + "await memory_client.create_long_term_memory([ClientMemoryRecord(\n", + " text=\"Student's major is Computer Science with a focus on AI/ML\",\n", + " memory_type=\"semantic\",\n", + " topics=[\"academic_info\", \"major\"]\n", + ")])\n", + "\n", + "await memory_client.create_long_term_memory([ClientMemoryRecord(\n", + " text=\"Student wants to graduate in Spring 2026\",\n", + " memory_type=\"semantic\",\n", + " topics=[\"goals\", \"graduation\"]\n", + ")])\n", + "\n", + "await memory_client.create_long_term_memory([ClientMemoryRecord(\n", + " text=\"Student prefers morning classes, no classes on Fridays\",\n", + " memory_type=\"semantic\",\n", + " topics=[\"preferences\", \"schedule\"]\n", + ")])\n", + "\n", + "print(\"โœ… Stored 4 semantic memories (facts about the student)\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Example 2: Storing Episodic Memories (Events)\n", + "\n", + "Let's store some events and experiences." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Store course enrollment events\n", + "await memory_client.create_long_term_memory([ClientMemoryRecord(\n", + " text=\"Student enrolled in CS101: Introduction to Programming on 2024-09-01\",\n", + " memory_type=\"episodic\",\n", + " topics=[\"enrollment\", \"courses\", \"CS101\"]\n", + ")])\n", + "\n", + "await memory_client.create_long_term_memory([ClientMemoryRecord(\n", + " text=\"Student completed CS101 with grade A on 2024-12-15\",\n", + " memory_type=\"episodic\",\n", + " topics=[\"completion\", \"grades\", \"CS101\"]\n", + ")])\n", + "\n", + "await memory_client.create_long_term_memory([ClientMemoryRecord(\n", + " text=\"Student asked about machine learning courses on 2024-09-20\",\n", + " memory_type=\"episodic\",\n", + " topics=[\"inquiry\", \"machine_learning\"]\n", + ")])\n", + "\n", + "print(\"โœ… Stored 3 episodic memories (events and experiences)\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Example 3: Searching Memories with Semantic Search\n", + "\n", + "Now let's search for memories using natural language queries." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Search for preferences\n", + "print(\"Query: 'What does the student prefer?'\\n\")\n", + "results = await memory_client.search_long_term_memory(\n", + " text=\"What does the student prefer?\",\n", + " limit=3\n", + ")\n", + "\n", + "for i, memory in enumerate(results.memories, 1):\n", + " print(f\"{i}. {memory.text}\")\n", + " print(f\" Type: {memory.memory_type} | Topics: {', '.join(memory.topics)}\")\n", + " print()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Search for academic information\n", + "print(\"Query: 'What is the student studying?'\\n\")\n", + "results = await memory_client.search_long_term_memory(\n", + " text=\"What is the student studying?\",\n", + " limit=3\n", + ")\n", + "\n", + "for i, memory in enumerate(results.memories, 1):\n", + " print(f\"{i}. {memory.text}\")\n", + " print(f\" Type: {memory.memory_type}\")\n", + " print()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Search for course history\n", + "print(\"Query: 'What courses has the student taken?'\\n\")\n", + "results = await memory_client.search_long_term_memory(\n", + " text=\"What courses has the student taken?\",\n", + " limit=3\n", + ")\n", + "\n", + "for i, memory in enumerate(results.memories, 1):\n", + " print(f\"{i}. {memory.text}\")\n", + " print(f\" Type: {memory.memory_type} | Topics: {', '.join(memory.topics or [])}\")\n", + " print()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Example 4: Demonstrating Deduplication\n", + "\n", + "Let's try to store duplicate memories and see how deduplication works." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Try to store an exact duplicate\n", + "print(\"Attempting to store exact duplicate...\")\n", + "try:\n", + " await memory_client.create_long_term_memory([ClientMemoryRecord(\n", + " text=\"Student prefers online courses over in-person classes\",\n", + " memory_type=\"semantic\",\n", + " topics=[\"preferences\", \"course_format\"]\n", + ")])\n", + " print(\"โŒ Duplicate was stored (unexpected)\")\n", + "except Exception as e:\n", + " print(f\"โœ… Duplicate rejected: {e}\")\n", + "\n", + "# Try to store a semantically similar memory\n", + "print(\"\\nAttempting to store semantically similar memory...\")\n", + "try:\n", + " await memory_client.create_long_term_memory([ClientMemoryRecord(\n", + " text=\"Student likes taking classes online instead of on campus\",\n", + " memory_type=\"semantic\",\n", + " topics=[\"preferences\", \"course_format\"]\n", + ")])\n", + " print(\"Memory stored (may be merged with existing similar memory)\")\n", + "except Exception as e:\n", + " print(f\"โœ… Similar memory rejected: {e}\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Example 5: Cross-Session Memory Access\n", + "\n", + "Let's simulate a new session and show that memories persist." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Create a new memory client (simulating a new session)\n", + "config = MemoryClientConfig(\n", + " base_url=os.getenv(\"AGENT_MEMORY_URL\", \"http://localhost:8000\"),\n", + " default_namespace=\"redis_university\"\n", + ")\n", + "new_session_client = MemoryClient(config=config)\n", + "\n", + "print(\"New session started for the same student\\n\")\n", + "\n", + "# Search for memories from the new session\n", + "print(\"Query: 'What do I prefer?'\\n\")\n", + "results = await new_session_client.search_long_term_memory(\n", + " text=\"What do I prefer?\",\n", + " limit=3\n", + ")\n", + "\n", + "print(\"โœ… Memories accessible from new session:\\n\")\n", + "for i, memory in enumerate(results.memories, 1):\n", + " print(f\"{i}. {memory.text}\")\n", + " print()\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Example 6: Filtering by Memory Type and Topics" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Get all semantic memories\n", + "print(\"All semantic memories (facts):\\n\")\n", + "results = await memory_client.search_long_term_memory(\n", + " text=\"\", # Empty query returns all\n", + " memory_type=MemoryType(eq=\"semantic\"),\n", + " limit=10\n", + ")\n", + "\n", + "for i, memory in enumerate(results.memories, 1):\n", + " print(f\"{i}. {memory.text}\")\n", + " print(f\" Topics: {', '.join(memory.topics)}\")\n", + " print()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Get all episodic memories\n", + "print(\"All episodic memories (events):\\n\")\n", + "results = await memory_client.search_long_term_memory(\n", + " text=\"\",\n", + " memory_type=MemoryType(eq=\"episodic\"),\n", + " limit=10\n", + ")\n", + "\n", + "for i, memory in enumerate(results.memories, 1):\n", + " print(f\"{i}. {memory.text}\")\n", + " print(f\" Topics: {', '.join(memory.topics or [])}\")\n", + " print()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Key Takeaways\n", + "\n", + "### When to Use Long-term Memory\n", + "\n", + "Store in long-term memory:\n", + "- โœ… User preferences and settings\n", + "- โœ… Important facts about the user\n", + "- โœ… Goals and objectives\n", + "- โœ… Significant events and milestones\n", + "- โœ… Completed courses and achievements\n", + "\n", + "Don't store in long-term memory:\n", + "- โŒ Temporary conversation context\n", + "- โŒ Trivial details\n", + "- โŒ Information that changes frequently\n", + "- โŒ Sensitive data without proper handling\n", + "\n", + "### Memory Types Guide\n", + "\n", + "**Semantic (Facts):**\n", + "- \"Student prefers X\"\n", + "- \"Student's major is Y\"\n", + "- \"Student wants to Z\"\n", + "\n", + "**Episodic (Events):**\n", + "- \"Student enrolled in X on DATE\"\n", + "- \"Student completed Y with grade Z\"\n", + "- \"Student asked about X on DATE\"\n", + "\n", + "**Message (Conversations):**\n", + "- Important conversation snippets\n", + "- Detailed discussions worth preserving\n", + "\n", + "### Best Practices\n", + "\n", + "1. **Use descriptive topics** - Makes filtering and categorization easier\n", + "2. **Write clear memory text** - Will be searched semantically\n", + "3. **Include relevant details in text** - Dates, names, and context help with retrieval\n", + "4. **Let deduplication work** - Don't worry about duplicates\n", + "5. **Search before storing** - Check if similar memory exists" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Exercises\n", + "\n", + "1. **Store your own memories**: Create 5 semantic and 3 episodic memories about a fictional student. Search for them.\n", + "\n", + "2. **Test semantic search**: Create memories with different wordings but similar meanings. Search with various queries to see what matches.\n", + "\n", + "3. **Explore topics**: Add rich topics to episodic memories. How can you use topic filtering in your agent?\n", + "\n", + "4. **Cross-session test**: Create a memory, close the notebook, restart, and verify the memory persists." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Summary\n", + "\n", + "In this notebook, you learned:\n", + "\n", + "- โœ… Long-term memory stores persistent, cross-session knowledge\n", + "- โœ… Three types: semantic (facts), episodic (events), message (conversations)\n", + "- โœ… Semantic search enables natural language queries\n", + "- โœ… Automatic deduplication prevents redundancy\n", + "- โœ… Memories are user-scoped and accessible from any session\n", + "\n", + "**Next:** In the next notebook, we'll integrate working memory and long-term memory to build a complete memory system for our agent." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Memory Lifecycle and Persistence\n", + "\n", + "Understanding how long memories last and when they expire is important for managing your agent's memory system.\n", + "\n", + "### Working Memory Lifecycle\n", + "\n", + "**TTL (Time To Live): 24 hours by default**\n", + "\n", + "```\n", + "Session Created\n", + " โ†“\n", + "Messages Stored (each turn adds messages)\n", + " โ†“\n", + "[24 hours of inactivity]\n", + " โ†“\n", + "Working Memory Automatically Expires โŒ\n", + "```\n", + "\n", + "**What this means:**\n", + "- โœ… Working memory lasts for the duration of active conversation\n", + "- โœ… Plus 24 hours after last activity\n", + "- โœ… Automatically cleaned up (no action needed)\n", + "- โš ๏ธ After expiration, conversation context is lost\n", + "\n", + "**Example Timeline:**\n", + "```\n", + "10:00 AM - Session starts\n", + "10:15 AM - User asks about CS401\n", + "10:20 AM - User asks about prerequisites\n", + "10:25 AM - Session ends (user leaves)\n", + "\n", + "[24 hours later]\n", + "10:25 AM next day - Working memory still available โœ…\n", + "10:26 AM next day - Working memory expires โŒ\n", + "\n", + "If user returns:\n", + "10:30 AM next day - New session starts (no previous context) ๐Ÿ†•\n", + "```" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Long-term Memory Lifecycle\n", + "\n", + "**Persistence: Indefinite (no automatic expiration)**\n", + "\n", + "```\n", + "Memory Created\n", + " โ†“\n", + "Stored in Long-term Memory\n", + " โ†“\n", + "Available Across All Sessions โœ…\n", + " โ†“\n", + "Persists Until Manually Deleted\n", + "```\n", + "\n", + "**What this means:**\n", + "- โœ… Long-term memories never automatically expire\n", + "- โœ… Available across all sessions (any time user returns)\n", + "- โœ… Survives working memory expiration\n", + "- โš ๏ธ Must be manually deleted if needed\n", + "\n", + "**Example:**\n", + "```\n", + "Day 1, Session 1:\n", + "- User: \"I prefer online courses\"\n", + "- Extracted to long-term memory: \"Student prefers online courses\"\n", + "\n", + "Day 2, Session 2 (different session):\n", + "- Long-term memory retrieved: \"Student prefers online courses\" โœ…\n", + "- Working memory from Day 1: Expired โŒ\n", + "\n", + "Day 30, Session 10:\n", + "- Long-term memory still available: \"Student prefers online courses\" โœ…\n", + "```" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Why This Design?\n", + "\n", + "**Working Memory = Short-term Context**\n", + "- Conversation-specific\n", + "- High detail (full messages)\n", + "- Expires to save storage\n", + "- Like human short-term memory\n", + "\n", + "**Long-term Memory = Persistent Facts**\n", + "- User-specific knowledge\n", + "- Important facts only\n", + "- Persists indefinitely\n", + "- Like human long-term memory\n", + "\n", + "### Important Implications\n", + "\n", + "#### 1. Extract Before Expiration\n", + "\n", + "**Working memory expires in 24 hours!**\n", + "\n", + "```python\n", + "# โœ… Good: Extraction happens automatically\n", + "# Agent Memory Server extracts facts from working memory\n", + "# BEFORE it expires\n", + "\n", + "# โŒ Bad: Don't rely on working memory persisting\n", + "# It will expire and take conversation context with it\n", + "```\n", + "\n", + "**The Agent Memory Server handles extraction automatically** - this is why we use it!\n", + "\n", + "#### 2. Long-term Memories Are Permanent\n", + "\n", + "**Unless you explicitly delete them:**\n", + "\n", + "```python\n", + "# Manual deletion (when needed)\n", + "await memory_client.delete_memory(memory_id)\n", + "\n", + "# Or delete all memories for a user\n", + "await memory_client.delete_all_user_memories(user_id)\n", + "```\n", + "\n", + "**Use cases for deletion:**\n", + "- User requests deletion\n", + "- Memory becomes outdated (preference changed)\n", + "- Incorrect information was stored" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Practical Example: Multi-Day Conversation\n", + "\n", + "**Day 1 (Session 1):**\n", + "```python\n", + "User: \"I'm interested in machine learning\"\n", + "Agent: [Responds]\n", + "Working Memory: [Full conversation]\n", + "Long-term: \"Student interested in machine learning\" (extracted)\n", + "```\n", + "\n", + "**Day 2 (Session 2, 30 hours later):**\n", + "```python\n", + "# Working memory from Day 1: EXPIRED โŒ\n", + "# Long-term memory: Still available โœ…\n", + "\n", + "User: \"What ML courses do you recommend?\"\n", + "Agent retrieves long-term: \"Student interested in machine learning\"\n", + "Agent: [Makes relevant recommendations using stored fact]\n", + "```\n", + "\n", + "**Agent remembers across sessions thanks to long-term memory!**\n", + "\n", + "### Best Practices\n", + "\n", + "1. **Trust the extraction process**\n", + " - Agent Memory Server automatically extracts important facts\n", + " - Happens in background during conversation\n", + " - Important info moves to long-term before expiration\n", + "\n", + "2. **Don't worry about working memory expiration**\n", + " - It's designed to expire\n", + " - Important facts are already extracted\n", + " - New sessions get clean slate\n", + "\n", + "3. **Long-term memories are your persistent knowledge**\n", + " - Think of them as \"what the agent knows about the user\"\n", + " - Cross-session, cross-conversation\n", + " - The foundation of personalization\n", + "\n", + "4. **Clean up when needed**\n", + " - Outdated preferences (user says \"I now prefer in-person classes\")\n", + " - Incorrect information (wrong major was recorded)\n", + " - User requests deletion\n", + "\n", + "### Summary\n", + "\n", + "| Memory Type | Duration | Cleanup | Purpose |\n", + "|-------------|----------|---------|----------|\n", + "| Working | 24 hours | Automatic | Current conversation |\n", + "| Long-term | Indefinite | Manual | Persistent knowledge |\n", + "\n", + "**Working memory is temporary context. Long-term memory is permanent knowledge.**\n", + "\n", + "Understanding this distinction helps you design better memory strategies." + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.12.6" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} diff --git a/python-recipes/context-engineering/notebooks_archive/section-3-memory/03_memory_integration.ipynb b/python-recipes/context-engineering/notebooks_archive/section-3-memory/03_memory_integration.ipynb new file mode 100644 index 00000000..bb7b34d9 --- /dev/null +++ b/python-recipes/context-engineering/notebooks_archive/section-3-memory/03_memory_integration.ipynb @@ -0,0 +1,571 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Memory Integration: Combining Working and Long-term Memory\n", + "\n", + "## Introduction\n", + "\n", + "In this notebook, you'll learn how to integrate working memory and long-term memory to create a complete memory system for your agent. You'll see how these two types of memory work together to provide both conversation context and persistent knowledge.\n", + "\n", + "### What You'll Learn\n", + "\n", + "- How working and long-term memory complement each other\n", + "- When to use each type of memory\n", + "- How to build a complete memory flow\n", + "- How automatic extraction works\n", + "- How to test multi-session conversations\n", + "\n", + "### Prerequisites\n", + "\n", + "- Completed `01_working_memory_with_extraction_strategies.ipynb`\n", + "- Completed `02_long_term_memory.ipynb`\n", + "- Redis 8 running locally\n", + "- Agent Memory Server running\n", + "- OpenAI API key set" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Concepts: Memory Integration\n", + "\n", + "### The Complete Memory Architecture\n", + "\n", + "A production agent needs both types of memory:\n", + "\n", + "```\n", + "โ”Œโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”\n", + "โ”‚ User Query โ”‚\n", + "โ””โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”˜\n", + " โ†“\n", + "โ”Œโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”\n", + "โ”‚ 1. Load Working Memory (current conversation) โ”‚\n", + "โ””โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”˜\n", + " โ†“\n", + "โ”Œโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”\n", + "โ”‚ 2. Search Long-term Memory (relevant facts) โ”‚\n", + "โ””โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”˜\n", + " โ†“\n", + "โ”Œโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”\n", + "โ”‚ 3. Agent Processes with Full Context โ”‚\n", + "โ””โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”˜\n", + " โ†“\n", + "โ”Œโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”\n", + "โ”‚ 4. Save Working Memory (with new messages) โ”‚\n", + "โ”‚ โ†’ Automatic extraction to long-term โ”‚\n", + "โ””โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”˜\n", + "```\n", + "\n", + "### Memory Flow in Detail\n", + "\n", + "**Turn 1:**\n", + "1. Load working memory (empty)\n", + "2. Search long-term memory (empty)\n", + "3. Process query\n", + "4. Save working memory\n", + "5. Extract important facts โ†’ long-term memory\n", + "\n", + "**Turn 2 (same session):**\n", + "1. Load working memory (has Turn 1 messages)\n", + "2. Search long-term memory (has extracted facts)\n", + "3. Process query with full context\n", + "4. Save working memory (Turn 1 + Turn 2)\n", + "5. Extract new facts โ†’ long-term memory\n", + "\n", + "**Turn 3 (new session, same user):**\n", + "1. Load working memory (empty - new session)\n", + "2. Search long-term memory (has all extracted facts)\n", + "3. Process query with long-term context\n", + "4. Save working memory (Turn 3 only)\n", + "5. Extract facts โ†’ long-term memory\n", + "\n", + "### When to Use Each Memory Type\n", + "\n", + "| Scenario | Working Memory | Long-term Memory |\n", + "|----------|----------------|------------------|\n", + "| Current conversation | โœ… Always | โŒ No |\n", + "| User preferences | โŒ No | โœ… Yes |\n", + "| Recent context | โœ… Yes | โŒ No |\n", + "| Important facts | โŒ No | โœ… Yes |\n", + "| Cross-session data | โŒ No | โœ… Yes |\n", + "| Temporary info | โœ… Yes | โŒ No |" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Setup" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import os\n", + "import asyncio\n", + "from datetime import datetime\n", + "from langchain_openai import ChatOpenAI\n", + "from langchain_core.messages import SystemMessage, HumanMessage, AIMessage\n", + "from agent_memory_client import MemoryAPIClient as MemoryClient, MemoryClientConfig\n", + "\n", + "# Initialize\n", + "student_id = \"student_456\"\n", + "session_id_1 = \"session_001\"\n", + "session_id_2 = \"session_002\"\n", + "\n", + "# Initialize memory client with proper config\n", + "import os\n", + "config = MemoryClientConfig(\n", + " base_url=os.getenv(\"AGENT_MEMORY_URL\", \"http://localhost:8000\"),\n", + " default_namespace=\"redis_university\"\n", + ")\n", + "memory_client = MemoryClient(config=config)\n", + "\n", + "llm = ChatOpenAI(model=\"gpt-4o\", temperature=0.7)\n", + "\n", + "print(f\"โœ… Setup complete for {student_id}\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Hands-on: Building Complete Memory Flow" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Session 1, Turn 1: First Interaction\n", + "\n", + "Let's simulate the first turn of a conversation." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "print(\"=\" * 80)\n", + "print(\"SESSION 1, TURN 1\")\n", + "print(\"=\" * 80)\n", + "\n", + "# Step 1: Load working memory (empty for first turn)\n", + "print(\"\\n1. Loading working memory...\")\n", + "# For first turn, working memory is empty\n", + "working_memory = None\n", + "print(f\" Messages in working memory: 0 (new session)\")\n", + "\n", + "# Step 2: Search long-term memory (empty for first interaction)\n", + "print(\"\\n2. Searching long-term memory...\")\n", + "user_query = \"Hi! I'm interested in learning about databases.\"\n", + "long_term_memories = await memory_client.search_long_term_memory(\n", + " text=user_query,\n", + " limit=3\n", + ")\n", + "print(f\" Relevant memories found: {len(long_term_memories.memories)}\")\n", + "\n", + "# Step 3: Process with LLM\n", + "print(\"\\n3. Processing with LLM...\")\n", + "messages = [\n", + " SystemMessage(content=\"You are a helpful class scheduling agent for Redis University.\"),\n", + " HumanMessage(content=user_query)\n", + "]\n", + "response = llm.invoke(messages)\n", + "print(f\"\\n User: {user_query}\")\n", + "print(f\" Agent: {response.content}\")\n", + "\n", + "# Step 4: Save working memory\n", + "print(\"\\n4. Saving working memory...\")\n", + "from agent_memory_client.models import WorkingMemory, MemoryMessage\n", + "\n", + "# Convert messages to MemoryMessage format\n", + "memory_messages = [\n", + " MemoryMessage(role=\"user\", content=user_query),\n", + " MemoryMessage(role=\"assistant\", content=response.content)\n", + "]\n", + "\n", + "# Create WorkingMemory object\n", + "working_memory = WorkingMemory(\n", + " session_id=session_id_1,\n", + " user_id=\"demo_user\",\n", + " messages=memory_messages,\n", + " memories=[],\n", + " data={}\n", + ")\n", + "\n", + "await memory_client.put_working_memory(\n", + " session_id=session_id_1,\n", + " memory=working_memory,\n", + " user_id=\"demo_user\",\n", + " model_name=\"gpt-4o\"\n", + ")\n", + "print(\" โœ… Working memory saved\")\n", + "print(\" โœ… Agent Memory Server will automatically extract important facts to long-term memory\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Session 1, Turn 2: Continuing the Conversation\n", + "\n", + "Let's continue the conversation in the same session." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "print(\"\\n\" + \"=\" * 80)\n", + "print(\"SESSION 1, TURN 2\")\n", + "print(\"=\" * 80)\n", + "\n", + "# Step 1: Load working memory (now has Turn 1)\n", + "print(\"\\n1. Loading working memory...\")\n", + "_, working_memory = await memory_client.get_or_create_working_memory(\n", + " session_id=session_id_1,\n", + " user_id=\"demo_user\",\n", + " model_name=\"gpt-4o\"\n", + ")\n", + "print(f\" Messages in working memory: {len(working_memory.messages)}\")\n", + "print(\" Previous context available: โœ…\")\n", + "\n", + "# Step 2: Search long-term memory\n", + "print(\"\\n2. Searching long-term memory...\")\n", + "user_query_2 = \"I prefer online courses and morning classes.\"\n", + "long_term_memories = await memory_client.search_long_term_memory(\n", + " text=user_query_2,\n", + " limit=3\n", + ")\n", + "print(f\" Relevant memories found: {len(long_term_memories.memories)}\")\n", + "\n", + "# Step 3: Process with LLM (with conversation history)\n", + "print(\"\\n3. Processing with LLM...\")\n", + "messages = [\n", + " SystemMessage(content=\"You are a helpful class scheduling agent for Redis University.\"),\n", + "]\n", + "\n", + "# Add working memory messages\n", + "for msg in working_memory.messages:\n", + " if msg.role == \"user\":\n", + " messages.append(HumanMessage(content=msg.content))\n", + " elif msg.role == \"assistant\":\n", + " messages.append(AIMessage(content=msg.content))\n", + "\n", + "# Add new query\n", + "messages.append(HumanMessage(content=user_query_2))\n", + "\n", + "response = llm.invoke(messages)\n", + "print(f\"\\n User: {user_query_2}\")\n", + "print(f\" Agent: {response.content}\")\n", + "\n", + "# Step 4: Save working memory (with both turns)\n", + "print(\"\\n4. Saving working memory...\")\n", + "all_messages = [\n", + " {\"role\": msg.role, \"content\": msg.content}\n", + " for msg in working_memory.messages\n", + "]\n", + "all_messages.extend([\n", + " {\"role\": \"user\", \"content\": user_query_2},\n", + " {\"role\": \"assistant\", \"content\": response.content}\n", + "])\n", + "\n", + "from agent_memory_client.models import WorkingMemory, MemoryMessage\n", + "\n", + "# Convert messages to MemoryMessage format\n", + "memory_messages = [MemoryMessage(**msg) for msg in all_messages]\n", + "\n", + "# Create WorkingMemory object\n", + "working_memory = WorkingMemory(\n", + " session_id=session_id_1,\n", + " user_id=\"demo_user\",\n", + " messages=memory_messages,\n", + " memories=[],\n", + " data={}\n", + ")\n", + "\n", + "await memory_client.put_working_memory(\n", + " session_id=session_id_1,\n", + " memory=working_memory,\n", + " user_id=\"demo_user\",\n", + " model_name=\"gpt-4o\"\n", + ")\n", + "print(\" โœ… Working memory saved with both turns\")\n", + "print(\" โœ… Preferences will be extracted to long-term memory\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Verify Automatic Extraction\n", + "\n", + "Let's check if the Agent Memory Server extracted facts to long-term memory." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Wait a moment for extraction to complete\n", + "print(\"Waiting for automatic extraction...\")\n", + "await asyncio.sleep(2)\n", + "\n", + "# Search for extracted memories\n", + "print(\"\\nSearching for extracted memories...\\n\")\n", + "memories = await memory_client.search_long_term_memory(\n", + " text=\"student preferences\",\n", + " limit=5\n", + ")\n", + "\n", + "if memories:\n", + " print(\"โœ… Extracted memories found:\\n\")\n", + " for i, memory in enumerate(memories.memories, 1):\n", + " print(f\"{i}. {memory.text}\")\n", + " print(f\" Type: {memory.memory_type} | Topics: {', '.join(memory.topics)}\")\n", + " print()\n", + "else:\n", + " print(\"โณ No memories extracted yet (extraction may take a moment)\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Session 2: New Session, Same User\n", + "\n", + "Now let's start a completely new session with the same user. Working memory will be empty, but long-term memory persists." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "print(\"\\n\" + \"=\" * 80)\n", + "print(\"SESSION 2, TURN 1 (New Session, Same User)\")\n", + "print(\"=\" * 80)\n", + "\n", + "# Step 1: Load working memory (empty - new session)\n", + "print(\"\\n1. Loading working memory...\")\n", + "# For new session, working memory is empty\n", + "working_memory = None\n", + "print(f\" Messages in working memory: 0\")\n", + "print(\" (Empty - this is a new session)\")\n", + "\n", + "# Step 2: Search long-term memory (has data from Session 1)\n", + "print(\"\\n2. Searching long-term memory...\")\n", + "user_query_3 = \"What database courses do you recommend for me?\"\n", + "long_term_memories = await memory_client.search_long_term_memory(\n", + " text=user_query_3,\n", + " limit=5\n", + ")\n", + "print(f\" Relevant memories found: {len(long_term_memories.memories)}\")\n", + "if long_term_memories.memories:\n", + " print(\"\\n Retrieved memories:\")\n", + " for memory in long_term_memories.memories:\n", + " print(f\" - {memory.text}\")\n", + "\n", + "# Step 3: Process with LLM (with long-term context)\n", + "print(\"\\n3. Processing with LLM...\")\n", + "context = \"\\n\".join([f\"- {m.text}\" for m in long_term_memories.memories])\n", + "system_prompt = f\"\"\"You are a helpful class scheduling agent for Redis University.\n", + "\n", + "What you know about this student:\n", + "{context}\n", + "\"\"\"\n", + "\n", + "messages = [\n", + " SystemMessage(content=system_prompt),\n", + " HumanMessage(content=user_query_3)\n", + "]\n", + "\n", + "response = llm.invoke(messages)\n", + "print(f\"\\n User: {user_query_3}\")\n", + "print(f\" Agent: {response.content}\")\n", + "print(\"\\n โœ… Agent used long-term memory to personalize response!\")\n", + "\n", + "# Step 4: Save working memory\n", + "print(\"\\n4. Saving working memory...\")\n", + "from agent_memory_client.models import WorkingMemory, MemoryMessage\n", + "\n", + "# Convert messages to MemoryMessage format\n", + "memory_messages = [\n", + " MemoryMessage(role=\"user\", content=user_query_3),\n", + " MemoryMessage(role=\"assistant\", content=response.content)\n", + "]\n", + "\n", + "# Create WorkingMemory object\n", + "working_memory = WorkingMemory(\n", + " session_id=session_id_2,\n", + " user_id=\"demo_user\",\n", + " messages=memory_messages,\n", + " memories=[],\n", + " data={}\n", + ")\n", + "\n", + "await memory_client.put_working_memory(\n", + " session_id=session_id_2,\n", + " memory=working_memory,\n", + " user_id=\"demo_user\",\n", + " model_name=\"gpt-4o\"\n", + ")\n", + "print(\" โœ… Working memory saved for new session\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Testing: Memory Consolidation\n", + "\n", + "Let's verify that both sessions' data is consolidated in long-term memory." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "print(\"\\n\" + \"=\" * 80)\n", + "print(\"MEMORY CONSOLIDATION CHECK\")\n", + "print(\"=\" * 80)\n", + "\n", + "# Check all memories about the student\n", + "print(\"\\nAll memories about this student:\\n\")\n", + "all_memories = await memory_client.search_long_term_memory(\n", + " text=\"\", # Empty query returns all\n", + " limit=20\n", + ")\n", + "\n", + "semantic_memories = [m for m in all_memories.memories if m.memory_type == \"semantic\"]\n", + "episodic_memories = [m for m in all_memories.memories if m.memory_type == \"episodic\"]\n", + "\n", + "print(f\"Semantic memories (facts): {len(semantic_memories)}\")\n", + "for memory in semantic_memories:\n", + " print(f\" - {memory.text}\")\n", + "\n", + "print(f\"\\nEpisodic memories (events): {len(episodic_memories)}\")\n", + "for memory in episodic_memories:\n", + " print(f\" - {memory.text}\")\n", + "\n", + "print(\"\\nโœ… All memories from both sessions are consolidated in long-term memory!\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Key Takeaways\n", + "\n", + "### Memory Integration Pattern\n", + "\n", + "**Every conversation turn:**\n", + "1. Load working memory (conversation history)\n", + "2. Search long-term memory (relevant facts)\n", + "3. Process with full context\n", + "4. Save working memory (triggers extraction)\n", + "\n", + "### Automatic Extraction\n", + "\n", + "The Agent Memory Server automatically:\n", + "- โœ… Analyzes conversations\n", + "- โœ… Extracts important facts\n", + "- โœ… Stores in long-term memory\n", + "- โœ… Deduplicates similar memories\n", + "- โœ… Organizes by type and topics\n", + "\n", + "### Memory Lifecycle\n", + "\n", + "```\n", + "User says something\n", + " โ†“\n", + "Stored in working memory (session-scoped)\n", + " โ†“\n", + "Automatic extraction analyzes importance\n", + " โ†“\n", + "Important facts โ†’ long-term memory (user-scoped)\n", + " โ†“\n", + "Available in future sessions\n", + "```\n", + "\n", + "### Best Practices\n", + "\n", + "1. **Always load working memory first** - Get conversation context\n", + "2. **Search long-term memory for relevant facts** - Use semantic search\n", + "3. **Combine both in system prompt** - Give LLM full context\n", + "4. **Save working memory after each turn** - Enable extraction\n", + "5. **Trust automatic extraction** - Don't manually extract everything" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Exercises\n", + "\n", + "1. **Multi-turn conversation**: Have a 5-turn conversation about course planning. Verify memories are extracted.\n", + "\n", + "2. **Cross-session test**: Start a new session and ask \"What do you know about me?\" Does the agent remember?\n", + "\n", + "3. **Memory search**: Try different search queries to find specific memories. How does semantic search perform?\n", + "\n", + "4. **Extraction timing**: How long does automatic extraction take? Test with different conversation lengths." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Summary\n", + "\n", + "In this notebook, you learned:\n", + "\n", + "- โœ… Working and long-term memory work together for complete context\n", + "- โœ… Load working memory โ†’ search long-term โ†’ process โ†’ save working memory\n", + "- โœ… Automatic extraction moves important facts to long-term memory\n", + "- โœ… Long-term memory persists across sessions\n", + "- โœ… This pattern enables truly personalized, context-aware agents\n", + "\n", + "**Next:** In Section 4, we'll explore optimizations like context window management, retrieval strategies, and grounding techniques." + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.12.6" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} diff --git a/python-recipes/context-engineering/notebooks_archive/section-3-memory/03_tool_selection_strategies.ipynb b/python-recipes/context-engineering/notebooks_archive/section-3-memory/03_tool_selection_strategies.ipynb new file mode 100644 index 00000000..7f22391e --- /dev/null +++ b/python-recipes/context-engineering/notebooks_archive/section-3-memory/03_tool_selection_strategies.ipynb @@ -0,0 +1,581 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "![Redis](https://redis.io/wp-content/uploads/2024/04/Logotype.svg?auto=webp&quality=85,75&width=120)\n", + "\n", + "# Tool Selection Strategies: Improving Tool Choice\n", + "\n", + "## Learning Objectives (25-30 minutes)\n", + "By the end of this notebook, you will understand:\n", + "1. **Common tool selection failures** and why they happen\n", + "2. **Strategies to improve tool selection** with clear naming and descriptions\n", + "3. **How LLMs select tools** and what influences their decisions\n", + "4. **Testing and debugging** tool selection issues\n", + "5. **Best practices** for tool organization and consolidation\n", + "\n", + "## Prerequisites\n", + "- Completed `02_defining_tools.ipynb`\n", + "- Understanding of tool creation basics\n", + "- Redis Stack running with course data\n", + "- OpenAI API key configured\n", + "\n", + "---\n", + "\n", + "## Introduction\n", + "\n", + "In this advanced notebook, you'll learn strategies to improve how LLMs select tools. When you have many tools, the LLM can get confused about which one to use. You'll learn techniques to make tool selection more reliable and accurate.\n", + "\n", + "### What You'll Learn\n", + "\n", + "- Common tool selection failures\n", + "- Strategies to improve tool selection\n", + "- Clear naming conventions\n", + "- Detailed descriptions with examples\n", + "- Testing and debugging tool selection" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Concepts: Tool Selection Challenges\n", + "\n", + "### The Problem\n", + "\n", + "As you add more tools, the LLM faces challenges:\n", + "\n", + "**Scenario:** Imagine you're building a class agent with tools for searching, filtering, listing, finding, and browsing courses. A student asks \"What computer science courses are available?\" Which tool should the LLM use? Without clear guidance, it might pick the wrong one.\n", + "\n", + "**With 3 tools:**\n", + "- โœ… Easy to choose\n", + "- โœ… Clear distinctions\n", + "\n", + "**With 10+ tools:**\n", + "- โš ๏ธ Similar-sounding tools\n", + "- โš ๏ธ Overlapping functionality\n", + "- โš ๏ธ Ambiguous queries\n", + "- โš ๏ธ Wrong tool selection" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### The Problem: Scale Matters\n", + "\n", + "In our course agent, we might need tools for:\n", + "- Searching courses (by topic, department, difficulty, format)\n", + "- Getting course details (by code, by name)\n", + "- Checking prerequisites, enrollment, schedules\n", + "- Managing student records\n", + "\n", + "**Quick math:** With 3-5 variations per category, you could easily have 15-20 tools. That's when tool selection becomes critical." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Common Tool Selection Failures\n", + "\n", + "**1. Similar Names**\n", + "```python\n", + "# Bad: Confusing names\n", + "get_course() # Get one course? Or search for one?\n", + "get_courses() # Get multiple? How many? Search or list all?\n", + "search_course() # Search for one? Or many?\n", + "find_courses() # Same as search_course()? Different how?\n", + "# The LLM asks the same questions you're asking now!\n", + "```\n", + "\n", + "**2. Vague Descriptions**\n", + "```python\n", + "# Bad: Too vague\n", + "def search_courses():\n", + " \"\"\"Search for courses.\"\"\"\n", + " \n", + "# Good: Specific with examples\n", + "def search_courses():\n", + " \"\"\"Search for courses using semantic search.\n", + " \n", + " Use when students ask about:\n", + " - Topics: 'machine learning courses'\n", + " - Departments: 'computer science courses'\n", + " - Characteristics: 'online courses' or 'easy courses'\n", + " \n", + " Returns: List of matching courses with relevance scores.\n", + " \"\"\"\n", + "```\n", + "\n", + "**3. Overlapping Functionality**\n", + "```python\n", + "# Bad: Unclear when to use which tool\n", + "search_courses(query) # Semantic search\n", + "filter_courses(department) # Filter by department \n", + "find_courses_by_topic(topic) # Find by topic\n", + "# Problem: \"computer science courses\" could use ANY of these!\n", + "\n", + "# Good: One tool with clear parameters\n", + "search_courses(\n", + " query: str, # \"computer science\"\n", + " department: str = None, # Optional filter\n", + " topic: str = None # Optional filter\n", + ")\n", + "# Result: One clear entry point, no confusion\n", + "```" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### How LLMs Select Tools\n", + "\n", + "The LLM follows a decision process:\n", + "\n", + "1. **Tool name** - First impression (\"Does this sound relevant?\")\n", + "2. **Tool description** - Main decision factor (\"When should I use this?\")\n", + "3. **Parameter descriptions** - Confirms choice (\"Can I provide these parameters?\")\n", + "4. **Context** - User's query and conversation (\"Does this match the user's intent?\")\n", + "\n", + "**Think of it like this:** The LLM is reading a menu at a restaurant. Tool names are dish names, descriptions are the ingredients/explanation, and parameters are customization options. A vague menu leads to wrong orders!\n", + "\n", + "**Key insight:** The LLM can't see your code, only the schema!" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Quick Check: Can You Spot the Problem?\n", + "\n", + "Before we dive into code, look at these two tools:\n", + "```python\n", + "def get_course_info(code: str):\n", + " \"\"\"Get information about a course.\"\"\"\n", + " \n", + "def get_course_data(code: str): \n", + " \"\"\"Get data for a course.\"\"\"\n", + "```\n", + "\n", + "**Question:** If a student asks \"Tell me about CS101\", which tool would you pick?\n", + "\n", + "**Answer:** Impossible to tell! They sound identical. This is exactly what the LLM experiences with bad tool definitions. Let's fix this..." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### What You'll Practice\n", + "\n", + "In this notebook, we'll:\n", + "\n", + "1. **Create confusing tools** with bad names and descriptions\n", + "2. **Test them** to see the LLM make wrong choices \n", + "3. **Fix them** using the strategies above\n", + "4. **Test again** to verify improvements\n", + "\n", + "You'll see actual tool selection failures and learn how to prevent them." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Setup" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Setup - Run this first\n", + "import os\n", + "import asyncio\n", + "from typing import List, Dict, Any, Optional\n", + "from dotenv import load_dotenv\n", + "\n", + "# LangChain imports\n", + "from langchain_core.tools import tool\n", + "from langchain_openai import ChatOpenAI\n", + "from langchain.agents import create_openai_functions_agent, AgentExecutor\n", + "from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder\n", + "from pydantic import BaseModel, Field\n", + "\n", + "# Redis and course management\n", + "import redis\n", + "from redis_context_course.course_manager import CourseManager\n", + "\n", + "load_dotenv()\n", + "REDIS_URL = os.getenv(\"REDIS_URL\", \"redis://localhost:6379\")\n", + "redis_client = redis.from_url(REDIS_URL)\n", + "course_manager = CourseManager()\n", + "\n", + "# Initialize LLM\n", + "llm = ChatOpenAI(model=\"gpt-4o-mini\", temperature=0)\n", + "\n", + "print(\"โœ… Setup complete - ready to test tool selection!\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Demonstration: Bad Tool Selection\n", + "\n", + "Let's create some confusing tools and see what happens when the LLM tries to choose between them." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Create confusing tools with bad names and descriptions\n", + "\n", + "@tool\n", + "async def get_course(code: str) -> str:\n", + " \"\"\"Get a course.\"\"\"\n", + " try:\n", + " course = await course_manager.get_course_by_code(code)\n", + " if not course:\n", + " return f\"Course {code} not found.\"\n", + " return f\"{course.code}: {course.title}\\n{course.description}\"\n", + " except Exception as e:\n", + " return f\"Error: {str(e)}\"\n", + "\n", + "@tool\n", + "async def get_courses(query: str) -> str:\n", + " \"\"\"Get courses.\"\"\"\n", + " try:\n", + " results = await course_manager.search_courses(query, limit=3)\n", + " if not results:\n", + " return \"No courses found.\"\n", + " output = []\n", + " for course in results:\n", + " output.append(f\"{course.code}: {course.title}\")\n", + " return \"\\n\".join(output)\n", + " except Exception as e:\n", + " return f\"Error: {str(e)}\"\n", + "\n", + "@tool\n", + "async def search_course(topic: str) -> str:\n", + " \"\"\"Search course.\"\"\"\n", + " try:\n", + " results = await course_manager.search_courses(topic, limit=5)\n", + " if not results:\n", + " return \"No courses found.\"\n", + " output = []\n", + " for course in results:\n", + " output.append(f\"{course.code}: {course.title}\")\n", + " return \"\\n\".join(output)\n", + " except Exception as e:\n", + " return f\"Error: {str(e)}\"\n", + "\n", + "@tool\n", + "async def find_courses(department: str) -> str:\n", + " \"\"\"Find courses.\"\"\"\n", + " try:\n", + " results = await course_manager.search_courses(department, limit=5)\n", + " if not results:\n", + " return \"No courses found.\"\n", + " output = []\n", + " for course in results:\n", + " output.append(f\"{course.code}: {course.title}\")\n", + " return \"\\n\".join(output)\n", + " except Exception as e:\n", + " return f\"Error: {str(e)}\"\n", + "\n", + "print(\"โŒ Created 4 confusing tools with bad names and descriptions\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Test the Confusion\n", + "\n", + "Let's create an agent with these confusing tools and see what happens." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Create an agent with confusing tools\n", + "confusing_tools = [get_course, get_courses, search_course, find_courses]\n", + "\n", + "prompt = ChatPromptTemplate.from_messages([\n", + " (\"system\", \"You are a helpful course advisor. Use the available tools to help students.\"),\n", + " (\"user\", \"{input}\"),\n", + " MessagesPlaceholder(variable_name=\"agent_scratchpad\"),\n", + "])\n", + "\n", + "agent = create_openai_functions_agent(llm, confusing_tools, prompt)\n", + "confusing_agent = AgentExecutor(agent=agent, tools=confusing_tools, verbose=True)\n", + "\n", + "print(\"๐Ÿค– Created agent with confusing tools\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Test with ambiguous queries\n", + "test_queries = [\n", + " \"What computer science courses are available?\",\n", + " \"Find me some programming courses\",\n", + " \"Show me courses about databases\"\n", + "]\n", + "\n", + "print(\"๐Ÿงช Testing confusing tools with ambiguous queries...\")\n", + "print(\"\\nWatch which tools the LLM chooses and why!\")\n", + "\n", + "# Uncomment to test (will show verbose output)\n", + "# for query in test_queries:\n", + "# print(f\"\\n{'='*50}\")\n", + "# print(f\"Query: {query}\")\n", + "# print('='*50)\n", + "# result = confusing_agent.invoke({\"input\": query})\n", + "# print(f\"Result: {result['output']}\")\n", + "\n", + "print(\"\\n๐Ÿ’ก Notice: The LLM might pick different tools for similar queries!\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Improvement Strategies\n", + "\n", + "Now let's fix the problems by applying the strategies we learned." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Strategy 1: Clear, Specific Names\n", + "\n", + "Replace vague names with specific, action-oriented names." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Strategy 1: Better names\n", + "\n", + "@tool\n", + "async def get_course_details_by_code(course_code: str) -> str:\n", + " \"\"\"\n", + " Get detailed information about a specific course using its course code.\n", + " \n", + " Use this when:\n", + " - Student asks about a specific course code (\"Tell me about CS101\")\n", + " - Student wants detailed course information\n", + " - Student asks about prerequisites, credits, or full description\n", + " \n", + " Do NOT use for:\n", + " - Searching for courses by topic (use search_courses_by_topic instead)\n", + " - Finding multiple courses\n", + " \n", + " Returns: Complete course details including description, prerequisites, credits.\n", + " \"\"\"\n", + " try:\n", + " course = await course_manager.get_course_by_code(course_code.upper())\n", + " if not course:\n", + " return f\"Course {course_code} not found. Please check the course code.\"\n", + " \n", + " details = f\"**{course.code}: {course.title}**\\n\"\n", + " details += f\"Credits: {course.credits}\\n\"\n", + " details += f\"Description: {course.description}\\n\"\n", + " if course.prerequisites:\n", + " details += f\"Prerequisites: {', '.join(course.prerequisites)}\\n\"\n", + " return details\n", + " except Exception as e:\n", + " return f\"Error getting course details: {str(e)}\"\n", + "\n", + "print(\"โœ… Created tool with clear name and detailed description\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Strategy 2: Detailed Descriptions with Examples\n", + "\n", + "Add specific use cases and examples to guide the LLM." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Strategy 2: Rich descriptions with examples\n", + "\n", + "@tool\n", + "async def search_courses_by_topic(query: str) -> str:\n", + " \"\"\"\n", + " Search for courses using semantic similarity matching.\n", + " \n", + " Use this when students ask about:\n", + " - Topics: 'machine learning courses', 'web development', 'databases'\n", + " - Characteristics: 'beginner courses', 'online courses', 'project-based'\n", + " - General exploration: 'what courses are available?', 'show me programming courses'\n", + " - Department-related: 'computer science courses', 'math courses'\n", + " \n", + " Do NOT use for:\n", + " - Specific course codes (use get_course_details_by_code instead)\n", + " - Prerequisites checking (use check_prerequisites instead)\n", + " \n", + " Returns: List of up to 5 relevant courses with codes and titles, ranked by relevance.\n", + " \"\"\"\n", + " try:\n", + " results = await course_manager.search_courses(query, limit=5)\n", + " if not results:\n", + " return f\"No courses found matching '{query}'. Try different keywords or broader terms.\"\n", + " \n", + " output = [f\"Found {len(results)} courses matching '{query}':\"]\n", + " for i, course in enumerate(results, 1):\n", + " output.append(f\"{i}. {course.code}: {course.title}\")\n", + " return \"\\n\".join(output)\n", + " except Exception as e:\n", + " return f\"Error searching courses: {str(e)}\"\n", + "\n", + "print(\"โœ… Created tool with rich description and clear examples\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Strategy 3: Consolidate Overlapping Tools\n", + "\n", + "Instead of multiple similar tools, create one flexible tool with clear parameters." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Strategy 3: Consolidated tool\n", + "# Instead of: get_course, get_courses, search_course, find_courses\n", + "# We now have: get_course_details_by_code + search_courses_by_topic\n", + "\n", + "improved_tools = [get_course_details_by_code, search_courses_by_topic]\n", + "\n", + "print(\"โœ… Consolidated 4 confusing tools into 2 clear tools\")\n", + "print(\"\\nBefore: get_course, get_courses, search_course, find_courses\")\n", + "print(\"After: get_course_details_by_code, search_courses_by_topic\")\n", + "print(\"\\nResult: Clear distinction between getting ONE course vs SEARCHING for courses\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Test the Improvements\n", + "\n", + "Let's test the improved tools with the same queries." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Create agent with improved tools\n", + "improved_agent = create_openai_functions_agent(llm, improved_tools, prompt)\n", + "improved_executor = AgentExecutor(agent=improved_agent, tools=improved_tools, verbose=True)\n", + "\n", + "print(\"๐Ÿค– Created agent with improved tools\")\n", + "print(\"\\n๐Ÿงช Test the same queries with improved tools:\")\n", + "\n", + "# Uncomment to test improvements\n", + "# for query in test_queries:\n", + "# print(f\"\\n{'='*50}\")\n", + "# print(f\"Query: {query}\")\n", + "# print('='*50)\n", + "# result = improved_executor.invoke({\"input\": query})\n", + "# print(f\"Result: {result['output']}\")\n", + "\n", + "print(\"\\n๐Ÿ’ก Notice: More consistent tool selection with clear descriptions!\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Key Takeaways\n", + "\n", + "### What We Learned\n", + "\n", + "1. **Tool selection problems scale quickly** - 3 tools are easy, 10+ tools create confusion\n", + "2. **Names matter** - Specific, action-oriented names beat generic ones\n", + "3. **Descriptions are critical** - Examples and use cases guide LLM decisions\n", + "4. **Consolidation helps** - Fewer, well-designed tools beat many similar ones\n", + "5. **Testing is essential** - Always verify tool selection with real queries\n", + "\n", + "### Best Practices Summary\n", + "\n", + "**โœ… Do:**\n", + "- Use specific, descriptive tool names\n", + "- Include \"Use this when...\" examples in descriptions\n", + "- Specify what NOT to use the tool for\n", + "- Test with ambiguous queries\n", + "- Consolidate similar tools when possible\n", + "\n", + "**โŒ Don't:**\n", + "- Use vague names like `get_data` or `search`\n", + "- Write minimal descriptions like \"Get courses\"\n", + "- Create multiple tools that do similar things\n", + "- Assume the LLM will figure it out\n", + "- Skip testing with real queries\n", + "\n", + "### Next Steps\n", + "\n", + "Ready to practice these concepts? Continue with `03d_hands_on_tool_selection.ipynb` for guided exercises that will help you master tool selection optimization!" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.12.6" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} diff --git a/python-recipes/context-engineering/notebooks_archive/section-3-memory/04_memory_tools.ipynb b/python-recipes/context-engineering/notebooks_archive/section-3-memory/04_memory_tools.ipynb new file mode 100644 index 00000000..7fd64fab --- /dev/null +++ b/python-recipes/context-engineering/notebooks_archive/section-3-memory/04_memory_tools.ipynb @@ -0,0 +1,565 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Memory Tools: Giving the LLM Control Over Memory\n", + "\n", + "## Introduction\n", + "\n", + "In this advanced notebook, you'll learn how to give your agent control over its own memory using tools. Instead of automatically extracting memories, you can let the LLM decide what to remember and when to search for memories. The Agent Memory Server SDK provides built-in memory tools for this.\n", + "\n", + "### What You'll Learn\n", + "\n", + "- Why give the LLM control over memory\n", + "- Agent Memory Server's built-in memory tools\n", + "- How to configure memory tools for your agent\n", + "- When the LLM decides to store vs. search memories\n", + "- Best practices for memory-aware agents\n", + "\n", + "### Prerequisites\n", + "\n", + "- Completed all Section 3 notebooks\n", + "- Redis 8 running locally\n", + "- Agent Memory Server running\n", + "- OpenAI API key set" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Concepts: Tool-Based Memory Management\n", + "\n", + "### Two Approaches to Memory\n", + "\n", + "#### 1. Automatic Memory (What We've Been Doing)\n", + "\n", + "```python\n", + "# Agent has conversation\n", + "# โ†’ Save working memory\n", + "# โ†’ Agent Memory Server automatically extracts important facts\n", + "# โ†’ Facts stored in long-term memory\n", + "```\n", + "\n", + "**Pros:**\n", + "- โœ… Fully automatic\n", + "- โœ… No LLM overhead in your application\n", + "- โœ… Consistent extraction\n", + "- โœ… Faster - extraction happens in the background after response is sent\n", + "\n", + "**Cons:**\n", + "- โš ๏ธ Your application's LLM can't directly control what gets extracted\n", + "- โš ๏ธ May extract too much or too little\n", + "- โš ๏ธ Can't dynamically decide what's important based on conversation context\n", + "\n", + "**Note:** You can configure custom extraction prompts on the memory server to guide what gets extracted, but your client application's LLM doesn't have direct control over the extraction process.\n", + "\n", + "#### 2. Tool-Based Memory (This Notebook)\n", + "\n", + "```python\n", + "# Agent has conversation\n", + "# โ†’ LLM decides: \"This is important, I should remember it\"\n", + "# โ†’ LLM calls store_memory tool\n", + "# โ†’ Fact stored in long-term memory\n", + "\n", + "# Later...\n", + "# โ†’ LLM decides: \"I need to know about the user's preferences\"\n", + "# โ†’ LLM calls search_memories tool\n", + "# โ†’ Retrieves relevant memories\n", + "```\n", + "\n", + "**Pros:**\n", + "- โœ… Your application's LLM has full control\n", + "- โœ… Can decide what's important in real-time\n", + "- โœ… Can search when needed\n", + "- โœ… More intelligent, context-aware behavior\n", + "\n", + "**Cons:**\n", + "- โš ๏ธ Requires tool calls (more tokens)\n", + "- โš ๏ธ Slower - tool calls add latency to every response\n", + "- โš ๏ธ LLM might forget to store/search\n", + "- โš ๏ธ Less consistent\n", + "\n", + "### When to Use Tool-Based Memory\n", + "\n", + "**Use tool-based memory when:**\n", + "- โœ… Agent needs fine-grained control\n", + "- โœ… Importance is context-dependent\n", + "- โœ… Agent should decide when to search\n", + "- โœ… Building advanced, autonomous agents\n", + "\n", + "**Use automatic memory when:**\n", + "- โœ… Simple, consistent extraction is fine\n", + "- โœ… Want to minimize token usage\n", + "- โœ… Building straightforward agents\n", + "\n", + "**Best: Use both!**\n", + "- Automatic extraction for baseline\n", + "- Tools for explicit control\n", + "\n", + "### Agent Memory Server's Built-in Tools\n", + "\n", + "The Agent Memory Server SDK provides:\n", + "\n", + "1. **`store_memory`** - Store important information\n", + "2. **`search_memories`** - Search for relevant memories\n", + "3. **`update_memory`** - Update existing memories\n", + "4. **`delete_memory`** - Remove memories\n", + "\n", + "These are pre-built, tested, and optimized!" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Setup" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import os\n", + "import asyncio\n", + "from langchain_openai import ChatOpenAI\n", + "from langchain_openai import ChatOpenAI\n", + "from langchain_core.messages import SystemMessage, HumanMessage, AIMessage, ToolMessage\n", + "from agent_memory_client import create_memory_client\n", + "from agent_memory_client.integrations.langchain import get_memory_tools\n", + "import asyncio\n", + "import os\n", + "\n", + "# Initialize\n", + "student_id = \"student_memory_tools\"\n", + "session_id = \"tool_demo\"\n", + "\n", + "# Initialize memory client using the new async factory\n", + "base_url = os.getenv(\"AGENT_MEMORY_URL\", \"http://localhost:8000\")\n", + "memory_client = await create_memory_client(base_url)\n", + "\n", + "llm = ChatOpenAI(model=\"gpt-4o\", temperature=0.7)\n", + "\n", + "print(f\"โœ… Setup complete for {student_id}\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Exploring Agent Memory Server's Memory Tools\n", + "\n", + "Let's create tools that wrap the Agent Memory Server's memory operations." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Getting Memory Tools with LangChain Integration\n", + "\n", + "The memory client now has built-in LangChain/LangGraph integration! Just call `get_memory_tools()` and you get ready-to-use LangChain tools." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Get LangChain-compatible memory tools from the client\n", + "# This returns a list of StructuredTool objects ready to use with LangChain/LangGraph\n", + "memory_tools = get_memory_tools(\n", + " memory_client=memory_client,\n", + " session_id=session_id,\n", + " user_id=student_id\n", + ")\n", + "\n", + "print(\"Available memory tools:\")\n", + "for tool in memory_tools:\n", + " print(f\"\\n - {tool.name}: {tool.description[:80]}...\")\n", + " if hasattr(tool, 'args_schema') and tool.args_schema:\n", + " print(f\" Schema: {tool.args_schema.model_json_schema()}\")\n", + "\n", + "print(f\"\\nโœ… Got {len(memory_tools)} LangChain tools from memory client\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Key Insight: Built-in LangChain Integration\n", + "\n", + "The `get_memory_tools()` function returns LangChain `StructuredTool` objects that:\n", + "- Work seamlessly with LangChain's `llm.bind_tools()` and LangGraph agents\n", + "- Handle all the memory client API calls internally\n", + "- Are pre-configured with your session_id and user_id\n", + "\n", + "No manual wrapping needed - just use them like any other LangChain tool!" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Testing Memory Tools with an Agent\n", + "\n", + "Let's create an agent that uses these memory tools." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Configure agent with memory tools\n", + "llm_with_tools = llm.bind_tools(memory_tools)\n", + "\n", + "system_prompt = \"\"\"You are a class scheduling agent for Redis University.\n", + "\n", + "You have access to memory tools:\n", + "- create_long_term_memory: Store important information about the student\n", + "- search_long_term_memory: Search for information you've stored before\n", + "\n", + "Use these tools intelligently:\n", + "- When students share preferences, goals, or important facts โ†’ store them\n", + "- When you need to recall information โ†’ search for it\n", + "- When making recommendations โ†’ search for preferences first\n", + "\n", + "Be proactive about using memory to provide personalized service.\n", + "\"\"\"\n", + "\n", + "print(\"โœ… Agent configured with LangChain memory tools\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Example 1: Agent Stores a Preference" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "print(\"=\" * 80)\n", + "print(\"EXAMPLE 1: Agent Stores a Preference\")\n", + "print(\"=\" * 80)\n", + "\n", + "user_message = \"I prefer online courses because I work part-time.\"\n", + "\n", + "messages = [\n", + " SystemMessage(content=system_prompt),\n", + " HumanMessage(content=user_message)\n", + "]\n", + "\n", + "print(f\"\\n๐Ÿ‘ค User: {user_message}\")\n", + "\n", + "# First response - should call create_long_term_memory\n", + "response = llm_with_tools.invoke(messages)\n", + "\n", + "if response.tool_calls:\n", + " print(\"\\n๐Ÿค– Agent decision: Store this preference\")\n", + " for tool_call in response.tool_calls:\n", + " print(f\" Tool: {tool_call['name']}\")\n", + " print(f\" Args: {tool_call['args']}\")\n", + " \n", + " # Find and execute the tool\n", + " tool = next((t for t in memory_tools if t.name == tool_call['name']), None)\n", + " if tool:\n", + " try:\n", + " result = await tool.ainvoke(tool_call['args'])\n", + " print(f\" Result: {result}\")\n", + " result_content = str(result)\n", + " except Exception as e:\n", + " print(f\" Error: {e}\")\n", + " result_content = f\"Error: {str(e)}\"\n", + " \n", + " # Add tool result to messages\n", + " messages.append(response)\n", + " messages.append(ToolMessage(\n", + " content=result_content,\n", + " tool_call_id=tool_call['id']\n", + " ))\n", + " \n", + " # Get final response\n", + " final_response = llm_with_tools.invoke(messages)\n", + " print(f\"\\n๐Ÿค– Agent: {final_response.content}\")\n", + "else:\n", + " print(f\"\\n๐Ÿค– Agent: {response.content}\")\n", + " print(\"\\nโš ๏ธ Agent didn't use memory tool\")\n", + "\n", + "print(\"\\n\" + \"=\" * 80)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Example 2: Agent Searches for Memories" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "print(\"\\n\" + \"=\" * 80)\n", + "print(\"EXAMPLE 2: Agent Searches for Memories\")\n", + "print(\"=\" * 80)\n", + "\n", + "# Wait a moment for memory to be stored\n", + "await asyncio.sleep(1)\n", + "\n", + "user_message = \"What courses would you recommend for me?\"\n", + "\n", + "messages = [\n", + " SystemMessage(content=system_prompt),\n", + " HumanMessage(content=user_message)\n", + "]\n", + "\n", + "print(f\"\\n๐Ÿ‘ค User: {user_message}\")\n", + "\n", + "# First response - should call search_long_term_memory\n", + "response = llm_with_tools.invoke(messages)\n", + "\n", + "if response.tool_calls:\n", + " print(\"\\n๐Ÿค– Agent decision: Search for preferences first\")\n", + " for tool_call in response.tool_calls:\n", + " print(f\" Tool: {tool_call['name']}\")\n", + " print(f\" Args: {tool_call['args']}\")\n", + " \n", + " # Find and execute the tool\n", + " tool = next((t for t in memory_tools if t.name == tool_call['name']), None)\n", + " if tool:\n", + " try:\n", + " result = await tool.ainvoke(tool_call['args'])\n", + " print(f\"\\n Retrieved memories:\")\n", + " print(f\" {result}\")\n", + " result_content = str(result)\n", + " except Exception as e:\n", + " print(f\"\\n Error: {e}\")\n", + " result_content = f\"Error: {str(e)}\"\n", + " \n", + " # Add tool result to messages\n", + " messages.append(response)\n", + " messages.append(ToolMessage(\n", + " content=result_content,\n", + " tool_call_id=tool_call['id']\n", + " ))\n", + " \n", + " # Get final response\n", + " final_response = llm_with_tools.invoke(messages)\n", + " print(f\"\\n๐Ÿค– Agent: {final_response.content}\")\n", + " print(\"\\nโœ… Agent used memories to personalize recommendation!\")\n", + "else:\n", + " print(f\"\\n๐Ÿค– Agent: {response.content}\")\n", + " print(\"\\nโš ๏ธ Agent didn't search memories\")\n", + "\n", + "print(\"\\n\" + \"=\" * 80)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Example 3: Multi-Turn Conversation with Memory" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "print(\"\\n\" + \"=\" * 80)\n", + "print(\"EXAMPLE 3: Multi-Turn Conversation\")\n", + "print(\"=\" * 80)\n", + "\n", + "async def chat_with_memory(user_message, conversation_history):\n", + " \"\"\"Helper function for conversation with memory tools.\"\"\"\n", + " messages = [SystemMessage(content=system_prompt)]\n", + " messages.extend(conversation_history)\n", + " messages.append(HumanMessage(content=user_message))\n", + " \n", + " # Get response\n", + " response = llm_with_tools.invoke(messages)\n", + " \n", + " # Handle tool calls\n", + " if response.tool_calls:\n", + " messages.append(response)\n", + " \n", + " for tool_call in response.tool_calls:\n", + " # Execute tool\n", + " if tool_call['name'] == 'store_memory':\n", + " result = await store_memory.ainvoke(tool_call['args'])\n", + " elif tool_call['name'] == 'search_memories':\n", + " result = await search_memories.ainvoke(tool_call['args'])\n", + " else:\n", + " result = \"Unknown tool\"\n", + " \n", + " messages.append(ToolMessage(\n", + " content=result,\n", + " tool_call_id=tool_call['id']\n", + " ))\n", + " \n", + " # Get final response after tool execution\n", + " response = llm_with_tools.invoke(messages)\n", + " \n", + " # Update conversation history\n", + " conversation_history.append(HumanMessage(content=user_message))\n", + " conversation_history.append(AIMessage(content=response.content))\n", + " \n", + " return response.content, conversation_history\n", + "\n", + "# Have a conversation\n", + "conversation = []\n", + "\n", + "queries = [\n", + " \"I'm a junior majoring in Computer Science.\",\n", + " \"I want to focus on machine learning and AI.\",\n", + " \"What do you know about me so far?\",\n", + "]\n", + "\n", + "for query in queries:\n", + " print(f\"\\n๐Ÿ‘ค User: {query}\")\n", + " response, conversation = await chat_with_memory(query, conversation)\n", + " print(f\"๐Ÿค– Agent: {response}\")\n", + " await asyncio.sleep(1)\n", + "\n", + "print(\"\\n\" + \"=\" * 80)\n", + "print(\"โœ… Agent proactively stored and retrieved memories!\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Key Takeaways\n", + "\n", + "### Benefits of Memory Tools\n", + "\n", + "โœ… **LLM Control:**\n", + "- Agent decides what's important\n", + "- Agent decides when to search\n", + "- More intelligent behavior\n", + "\n", + "โœ… **Flexibility:**\n", + "- Can store context-dependent information\n", + "- Can search on-demand\n", + "- Can update/delete memories\n", + "\n", + "โœ… **Transparency:**\n", + "- You can see when agent stores/searches\n", + "- Easier to debug\n", + "- More explainable\n", + "\n", + "### When to Use Memory Tools\n", + "\n", + "**Use memory tools when:**\n", + "- โœ… Building advanced, autonomous agents\n", + "- โœ… Agent needs fine-grained control\n", + "- โœ… Importance is context-dependent\n", + "- โœ… Want explicit memory operations\n", + "\n", + "**Use automatic extraction when:**\n", + "- โœ… Simple, consistent extraction is fine\n", + "- โœ… Want to minimize token usage\n", + "- โœ… Building straightforward agents\n", + "\n", + "**Best practice: Combine both!**\n", + "- Automatic extraction as baseline\n", + "- Tools for explicit control\n", + "\n", + "### Tool Design Best Practices\n", + "\n", + "1. **Clear descriptions** - Explain when to use each tool\n", + "2. **Good examples** - Show typical usage\n", + "3. **Error handling** - Handle failures gracefully\n", + "4. **Feedback** - Return clear success/failure messages\n", + "\n", + "### Common Patterns\n", + "\n", + "**Store after learning:**\n", + "```\n", + "User: \"I prefer online courses\"\n", + "Agent: [stores memory] \"Got it, I'll remember that!\"\n", + "```\n", + "\n", + "**Search before recommending:**\n", + "```\n", + "User: \"What courses should I take?\"\n", + "Agent: [searches memories] \"Based on your preferences...\"\n", + "```\n", + "\n", + "**Proactive recall:**\n", + "```\n", + "User: \"Tell me about CS401\"\n", + "Agent: [searches memories] \"I remember you're interested in ML...\"\n", + "```" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Exercises\n", + "\n", + "1. **Test memory decisions**: Have a 10-turn conversation. Does the agent store and search appropriately?\n", + "\n", + "2. **Add update tool**: Create an `update_memory` tool that lets the agent modify existing memories.\n", + "\n", + "3. **Compare approaches**: Build two agents - one with automatic extraction, one with tools. Which performs better?\n", + "\n", + "4. **Memory strategy**: Design a system prompt that guides the agent on when to use memory tools." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Summary\n", + "\n", + "In this notebook, you learned:\n", + "\n", + "- โœ… Memory tools give the LLM control over memory operations\n", + "- โœ… Agent Memory Server provides built-in memory tools\n", + "- โœ… Tools enable intelligent, context-aware memory management\n", + "- โœ… Combine automatic extraction with tools for best results\n", + "- โœ… Clear tool descriptions guide proper usage\n", + "\n", + "**Key insight:** Tool-based memory management enables more sophisticated agents that can decide what to remember and when to recall information. This is especially powerful for autonomous agents that need fine-grained control over their memory." + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.12.6" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} diff --git a/python-recipes/context-engineering/notebooks_archive/section-4-optimizations/01_context_window_management.ipynb b/python-recipes/context-engineering/notebooks_archive/section-4-optimizations/01_context_window_management.ipynb new file mode 100644 index 00000000..32fce30c --- /dev/null +++ b/python-recipes/context-engineering/notebooks_archive/section-4-optimizations/01_context_window_management.ipynb @@ -0,0 +1,561 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Context Window Management: Handling Token Limits\n", + "\n", + "## Introduction\n", + "\n", + "In this notebook, you'll learn about context window limits and how to manage them effectively. Every LLM has a maximum number of tokens it can process, and long conversations can exceed this limit. The Agent Memory Server provides automatic summarization to handle this.\n", + "\n", + "### What You'll Learn\n", + "\n", + "- What context windows are and why they matter\n", + "- How to count tokens in conversations\n", + "- Why summarization is necessary\n", + "- How to configure Agent Memory Server summarization\n", + "- How summarization works in practice\n", + "\n", + "### Prerequisites\n", + "\n", + "- Completed Section 3 notebooks\n", + "- Redis 8 running locally\n", + "- Agent Memory Server running\n", + "- OpenAI API key set" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Concepts: Context Windows and Token Limits\n", + "\n", + "### What is a Context Window?\n", + "\n", + "A **context window** is the maximum amount of text (measured in tokens) that an LLM can process in a single request. This includes:\n", + "\n", + "- System instructions\n", + "- Conversation history\n", + "- Retrieved context (memories, documents)\n", + "- User's current message\n", + "- Space for the response\n", + "\n", + "### Common Context Window Sizes\n", + "\n", + "| Model | Context Window | Notes |\n", + "|-------|----------------|-------|\n", + "| GPT-4o | 128K tokens | ~96,000 words |\n", + "| GPT-4 Turbo | 128K tokens | ~96,000 words |\n", + "| GPT-3.5 Turbo | 16K tokens | ~12,000 words |\n", + "| Claude 3 Opus | 200K tokens | ~150,000 words |\n", + "\n", + "### The Problem: Long Conversations\n", + "\n", + "As conversations grow, they consume more tokens:\n", + "\n", + "```\n", + "Turn 1: System (500) + Messages (200) = 700 tokens โœ…\n", + "Turn 5: System (500) + Messages (1,000) = 1,500 tokens โœ…\n", + "Turn 20: System (500) + Messages (4,000) = 4,500 tokens โœ…\n", + "Turn 50: System (500) + Messages (10,000) = 10,500 tokens โœ…\n", + "Turn 100: System (500) + Messages (20,000) = 20,500 tokens โš ๏ธ\n", + "Turn 200: System (500) + Messages (40,000) = 40,500 tokens โš ๏ธ\n", + "```\n", + "\n", + "Eventually, you'll hit the limit!\n", + "\n", + "### Why Summarization is Necessary\n", + "\n", + "Without summarization:\n", + "- โŒ Conversations eventually fail\n", + "- โŒ Costs increase linearly with conversation length\n", + "- โŒ Latency increases with more tokens\n", + "- โŒ Important early context gets lost\n", + "\n", + "With summarization:\n", + "- โœ… Conversations can continue indefinitely\n", + "- โœ… Costs stay manageable\n", + "- โœ… Latency stays consistent\n", + "- โœ… Important context is preserved in summaries\n", + "\n", + "### How Agent Memory Server Handles This\n", + "\n", + "The Agent Memory Server automatically:\n", + "1. **Monitors message count** in working memory\n", + "2. **Triggers summarization** when threshold is reached\n", + "3. **Creates summary** of older messages\n", + "4. **Replaces old messages** with summary\n", + "5. **Keeps recent messages** for context\n", + "\n", + "### Token Budgets\n", + "\n", + "A **token budget** is how you allocate your context window:\n", + "\n", + "```\n", + "Total: 128K tokens\n", + "โ”œโ”€ System instructions: 1K tokens\n", + "โ”œโ”€ Working memory: 8K tokens\n", + "โ”œโ”€ Long-term memories: 2K tokens\n", + "โ”œโ”€ Retrieved context: 4K tokens\n", + "โ”œโ”€ User message: 500 tokens\n", + "โ””โ”€ Response space: 2K tokens\n", + " โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€\n", + " Used: 17.5K / 128K (13.7%)\n", + "```" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Setup" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import os\n", + "import asyncio\n", + "import tiktoken\n", + "from langchain_openai import ChatOpenAI\n", + "from langchain_core.messages import SystemMessage, HumanMessage, AIMessage\n", + "from agent_memory_client import MemoryAPIClient as MemoryClient, MemoryClientConfig\n", + "\n", + "# Initialize\n", + "student_id = \"student_context_demo\"\n", + "session_id = \"long_conversation\"\n", + "\n", + "# Initialize memory client with proper config\n", + "import os\n", + "config = MemoryClientConfig(\n", + " base_url=os.getenv(\"AGENT_MEMORY_URL\", \"http://localhost:8088\"),\n", + " default_namespace=\"redis_university\"\n", + ")\n", + "memory_client = MemoryClient(config=config)\n", + "\n", + "llm = ChatOpenAI(model=\"gpt-4o\", temperature=0.7)\n", + "\n", + "# Initialize tokenizer for counting\n", + "tokenizer = tiktoken.encoding_for_model(\"gpt-4o\")\n", + "\n", + "def count_tokens(text: str) -> int:\n", + " \"\"\"Count tokens in text.\"\"\"\n", + " return len(tokenizer.encode(text))\n", + "\n", + "print(f\"โœ… Setup complete for {student_id}\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Hands-on: Understanding Token Counts" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Example 1: Counting Tokens in Messages" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Ensure count_tokens is defined (in case cells are run out of order)\n", + "if \"count_tokens\" not in globals():\n", + " import tiktoken\n", + " tokenizer = tiktoken.encoding_for_model(\"gpt-4o\")\n", + " def count_tokens(text: str) -> int:\n", + " return len(tokenizer.encode(text))\n", + "\n", + "# Example messages\n", + "messages = [\n", + " \"Hi, I'm interested in machine learning courses.\",\n", + " \"Can you recommend some courses for beginners?\",\n", + " \"What are the prerequisites for CS401?\",\n", + " \"I've completed CS101 and CS201. Can I take CS401?\",\n", + " \"Great! When is CS401 offered?\"\n", + "]\n", + "\n", + "print(\"Token counts for individual messages:\\n\")\n", + "total_tokens = 0\n", + "for i, msg in enumerate(messages, 1):\n", + " tokens = count_tokens(msg)\n", + " total_tokens += tokens\n", + " print(f\"{i}. \\\"{msg}\\\"\")\n", + " print(f\" Tokens: {tokens}\\n\")\n", + "\n", + "print(f\"Total tokens for 5 messages: {total_tokens}\")\n", + "print(f\"Average tokens per message: {total_tokens / len(messages):.1f}\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Example 2: Token Growth Over Conversation" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Ensure count_tokens is defined (in case cells are run out of order)\n", + "if \"count_tokens\" not in globals():\n", + " import tiktoken\n", + " tokenizer = tiktoken.encoding_for_model(\"gpt-4o\")\n", + " def count_tokens(text: str) -> int:\n", + " return len(tokenizer.encode(text))\n", + "\n", + "# Simulate conversation growth\n", + "system_prompt = \"\"\"You are a helpful class scheduling agent for Redis University.\n", + "Help students find courses and plan their schedule.\"\"\"\n", + "\n", + "system_tokens = count_tokens(system_prompt)\n", + "print(f\"System prompt tokens: {system_tokens}\\n\")\n", + "\n", + "# Simulate growing conversation\n", + "conversation_tokens = 0\n", + "avg_message_tokens = 50 # Typical message size\n", + "\n", + "print(\"Token growth over conversation turns:\\n\")\n", + "print(f\"{'Turn':<6} {'Messages':<10} {'Conv Tokens':<12} {'Total Tokens':<12} {'% of 128K'}\")\n", + "print(\"-\" * 60)\n", + "\n", + "for turn in [1, 5, 10, 20, 50, 100, 200, 500, 1000]:\n", + " # Each turn = user message + assistant message\n", + " conversation_tokens = turn * 2 * avg_message_tokens\n", + " total_tokens = system_tokens + conversation_tokens\n", + " percentage = (total_tokens / 128000) * 100\n", + " \n", + " print(f\"{turn:<6} {turn*2:<10} {conversation_tokens:<12,} {total_tokens:<12,} {percentage:>6.1f}%\")\n", + "\n", + "print(\"\\nโš ๏ธ Without summarization, long conversations will eventually exceed limits!\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Configuring Summarization\n", + "\n", + "The Agent Memory Server provides automatic summarization. Let's see how to configure it." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Understanding Summarization Settings\n", + "\n", + "The Agent Memory Server uses these settings:\n", + "\n", + "**Message Count Threshold:**\n", + "- When working memory exceeds this many messages, summarization triggers\n", + "- Default: 20 messages (10 turns)\n", + "- Configurable per session\n", + "\n", + "**Summarization Strategy:**\n", + "- **Recent + Summary**: Keep recent N messages, summarize older ones\n", + "- **Sliding Window**: Keep only recent N messages\n", + "- **Full Summary**: Summarize everything\n", + "\n", + "**What Gets Summarized:**\n", + "- Older conversation messages\n", + "- Key facts and decisions\n", + "- Important context\n", + "\n", + "**What Stays:**\n", + "- Recent messages (for immediate context)\n", + "- System instructions\n", + "- Long-term memories (separate from working memory)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Example 3: Demonstrating Summarization\n", + "\n", + "Let's create a conversation that triggers summarization." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Helper function for conversation\n", + "async def have_conversation_turn(user_message, session_id):\n", + " \"\"\"Simulate a conversation turn.\"\"\"\n", + " # Get working memory\n", + " _, working_memory = await memory_client.get_or_create_working_memory(\n", + " session_id=session_id,\n", + " model_name=\"gpt-4o\"\n", + " )\n", + " \n", + " # Build messages\n", + " messages = [SystemMessage(content=\"You are a helpful class scheduling agent.\")]\n", + " \n", + " if working_memory and working_memory.messages:\n", + " for msg in working_memory.messages:\n", + " if msg.role == \"user\":\n", + " messages.append(HumanMessage(content=msg.content))\n", + " elif msg.role == \"assistant\":\n", + " messages.append(AIMessage(content=msg.content))\n", + " \n", + " messages.append(HumanMessage(content=user_message))\n", + " \n", + " # Get response\n", + " response = llm.invoke(messages)\n", + " \n", + " # Save to working memory\n", + " all_messages = []\n", + " if working_memory and working_memory.messages:\n", + " all_messages = [{\"role\": m.role, \"content\": m.content} for m in working_memory.messages]\n", + " \n", + " all_messages.extend([\n", + " {\"role\": \"user\", \"content\": user_message},\n", + " {\"role\": \"assistant\", \"content\": response.content}\n", + " ])\n", + " \n", + " from agent_memory_client.models import WorkingMemory, MemoryMessage\n", + " \n", + " # Convert messages to MemoryMessage format\n", + " memory_messages = [MemoryMessage(**msg) for msg in all_messages]\n", + " \n", + " # Create WorkingMemory object\n", + " working_memory = WorkingMemory(\n", + " session_id=session_id,\n", + " user_id=\"demo_user\",\n", + " messages=memory_messages,\n", + " memories=[],\n", + " data={}\n", + " )\n", + " \n", + " await memory_client.put_working_memory(\n", + " session_id=session_id,\n", + " memory=working_memory,\n", + " user_id=\"demo_user\",\n", + " model_name=\"gpt-4o\"\n", + " )\n", + " \n", + " return response.content, len(all_messages)\n", + "\n", + "print(\"โœ… Helper function defined\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Have a multi-turn conversation\n", + "print(\"=\" * 80)\n", + "print(\"DEMONSTRATING SUMMARIZATION\")\n", + "print(\"=\" * 80)\n", + "\n", + "conversation_queries = [\n", + " \"Hi, I'm a computer science major interested in AI.\",\n", + " \"What machine learning courses do you offer?\",\n", + " \"Tell me about CS401.\",\n", + " \"What are the prerequisites?\",\n", + " \"I've completed CS101 and CS201.\",\n", + " \"Can I take CS401 next semester?\",\n", + " \"When is it offered?\",\n", + " \"Is it available online?\",\n", + " \"What about CS402?\",\n", + " \"Can I take both CS401 and CS402?\",\n", + " \"What's the workload like?\",\n", + " \"Are there any projects?\",\n", + "]\n", + "\n", + "for i, query in enumerate(conversation_queries, 1):\n", + " print(f\"\\nTurn {i}:\")\n", + " print(f\"User: {query}\")\n", + " \n", + " response, message_count = await have_conversation_turn(query, session_id)\n", + " \n", + " print(f\"Agent: {response[:100]}...\")\n", + " print(f\"Total messages in working memory: {message_count}\")\n", + " \n", + " if message_count > 20:\n", + " print(\"โš ๏ธ Message count exceeds threshold - summarization may trigger\")\n", + " \n", + " await asyncio.sleep(0.5) # Rate limiting\n", + "\n", + "print(\"\\n\" + \"=\" * 80)\n", + "print(\"โœ… Conversation complete\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Example 4: Checking Working Memory After Summarization" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Check working memory state\n", + "print(\"\\nChecking working memory state...\\n\")\n", + "\n", + "_, working_memory = await memory_client.get_or_create_working_memory(\n", + " session_id=session_id,\n", + " model_name=\"gpt-4o\"\n", + ")\n", + "\n", + "if working_memory:\n", + " print(f\"Total messages: {len(working_memory.messages)}\")\n", + " print(f\"\\nMessage breakdown:\")\n", + " \n", + " user_msgs = [m for m in working_memory.messages if m.role == \"user\"]\n", + " assistant_msgs = [m for m in working_memory.messages if m.role == \"assistant\"]\n", + " system_msgs = [m for m in working_memory.messages if m.role == \"system\"]\n", + " \n", + " print(f\" User messages: {len(user_msgs)}\")\n", + " print(f\" Assistant messages: {len(assistant_msgs)}\")\n", + " print(f\" System messages (summaries): {len(system_msgs)}\")\n", + " \n", + " # Check for summary messages\n", + " if system_msgs:\n", + " print(\"\\nโœ… Summarization occurred! Summary messages found:\")\n", + " for msg in system_msgs:\n", + " print(f\"\\n Summary: {msg.content[:200]}...\")\n", + " else:\n", + " print(\"\\nโณ No summarization yet (may need more messages or time)\")\n", + "else:\n", + " print(\"No working memory found\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Key Takeaways\n", + "\n", + "### Context Window Management Strategy\n", + "\n", + "1. **Monitor token usage** - Know your limits\n", + "2. **Set message thresholds** - Trigger summarization before hitting limits\n", + "3. **Keep recent context** - Don't summarize everything\n", + "4. **Use long-term memory** - Important facts go there, not working memory\n", + "5. **Trust automatic summarization** - Agent Memory Server handles it\n", + "\n", + "### Token Budget Best Practices\n", + "\n", + "**Allocate wisely:**\n", + "- System instructions: 1-2K tokens\n", + "- Working memory: 4-8K tokens\n", + "- Long-term memories: 2-4K tokens\n", + "- Retrieved context: 2-4K tokens\n", + "- Response space: 2-4K tokens\n", + "\n", + "**Total: ~15-20K tokens (leaves plenty of headroom)**\n", + "\n", + "### When Summarization Happens\n", + "\n", + "The Agent Memory Server triggers summarization when:\n", + "- โœ… Message count exceeds threshold (default: 20)\n", + "- โœ… Token count approaches limits\n", + "- โœ… Configured summarization strategy activates\n", + "\n", + "### What Summarization Preserves\n", + "\n", + "โœ… **Preserved:**\n", + "- Key facts and decisions\n", + "- Important context\n", + "- Recent messages (full text)\n", + "- Long-term memories (separate storage)\n", + "\n", + "โŒ **Compressed:**\n", + "- Older conversation details\n", + "- Redundant information\n", + "- Small talk\n", + "\n", + "### Why This Matters\n", + "\n", + "Without proper context window management:\n", + "- โŒ Conversations fail when limits are hit\n", + "- โŒ Costs grow linearly with conversation length\n", + "- โŒ Performance degrades with more tokens\n", + "\n", + "With proper management:\n", + "- โœ… Conversations can continue indefinitely\n", + "- โœ… Costs stay predictable\n", + "- โœ… Performance stays consistent\n", + "- โœ… Important context is preserved" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Exercises\n", + "\n", + "1. **Calculate your token budget**: For your agent, allocate tokens across system prompt, working memory, long-term memories, and response space.\n", + "\n", + "2. **Test long conversations**: Have a 50-turn conversation and monitor token usage. When does summarization trigger?\n", + "\n", + "3. **Compare strategies**: Test different message thresholds (10, 20, 50). How does it affect conversation quality?\n", + "\n", + "4. **Measure costs**: Calculate the cost difference between keeping full history vs. using summarization for a 100-turn conversation." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Summary\n", + "\n", + "In this notebook, you learned:\n", + "\n", + "- โœ… Context windows have token limits that conversations can exceed\n", + "- โœ… Token budgets help allocate context window space\n", + "- โœ… Summarization is necessary for long conversations\n", + "- โœ… Agent Memory Server provides automatic summarization\n", + "- โœ… Proper management enables indefinite conversations\n", + "\n", + "**Key insight:** Context window management isn't about proving you need summarization - it's about understanding the constraints and using the right tools (like Agent Memory Server) to handle them automatically." + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.0" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} diff --git a/python-recipes/context-engineering/notebooks_archive/section-4-optimizations/02_retrieval_strategies.ipynb b/python-recipes/context-engineering/notebooks_archive/section-4-optimizations/02_retrieval_strategies.ipynb new file mode 100644 index 00000000..063c26b0 --- /dev/null +++ b/python-recipes/context-engineering/notebooks_archive/section-4-optimizations/02_retrieval_strategies.ipynb @@ -0,0 +1,624 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Retrieval Strategies: RAG, Summaries, and Hybrid Approaches\n", + "\n", + "## Introduction\n", + "\n", + "In this notebook, you'll learn different strategies for retrieving and providing context to your agent. Not all context should be included all the time - you need smart retrieval strategies to provide relevant information efficiently.\n", + "\n", + "### What You'll Learn\n", + "\n", + "- Different retrieval strategies (full context, RAG, summaries, hybrid)\n", + "- When to use each strategy\n", + "- How to optimize vector search parameters\n", + "- How to measure retrieval quality and performance\n", + "\n", + "### Prerequisites\n", + "\n", + "- Completed Section 3 notebooks\n", + "- Redis 8 running locally\n", + "- Agent Memory Server running\n", + "- OpenAI API key set\n", + "- Course data ingested" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Concepts: Retrieval Strategies\n", + "\n", + "### The Context Retrieval Problem\n", + "\n", + "You have a large knowledge base (courses, memories, documents), but you can't include everything in every request. You need to:\n", + "\n", + "1. **Find relevant information** - What's related to the user's query?\n", + "2. **Limit context size** - Stay within token budgets\n", + "3. **Maintain quality** - Don't miss important information\n", + "4. **Optimize performance** - Fast retrieval, low latency\n", + "\n", + "### Strategy 1: Full Context (Naive)\n", + "\n", + "**Approach:** Include everything in every request\n", + "\n", + "```python\n", + "# Include entire course catalog\n", + "all_courses = get_all_courses() # 500 courses\n", + "context = \"\\n\".join([str(course) for course in all_courses])\n", + "```\n", + "\n", + "**Pros:**\n", + "- โœ… Never miss relevant information\n", + "- โœ… Simple to implement\n", + "\n", + "**Cons:**\n", + "- โŒ Exceeds token limits quickly\n", + "- โŒ Expensive (more tokens = higher cost)\n", + "- โŒ Slow (more tokens = higher latency)\n", + "- โŒ Dilutes relevant information with noise\n", + "\n", + "**Verdict:** โŒ Don't use for production\n", + "\n", + "### Strategy 2: RAG (Retrieval-Augmented Generation)\n", + "\n", + "**Approach:** Retrieve only relevant information using semantic search\n", + "\n", + "```python\n", + "# Search for relevant courses\n", + "query = \"machine learning courses\"\n", + "relevant_courses = search_courses(query, limit=5)\n", + "context = \"\\n\".join([str(course) for course in relevant_courses])\n", + "```\n", + "\n", + "**Pros:**\n", + "- โœ… Only includes relevant information\n", + "- โœ… Stays within token budgets\n", + "- โœ… Fast and cost-effective\n", + "- โœ… Semantic search finds related content\n", + "\n", + "**Cons:**\n", + "- โš ๏ธ May miss relevant information if search isn't perfect\n", + "- โš ๏ธ Requires good embeddings and search tuning\n", + "\n", + "**Verdict:** โœ… Good for most use cases\n", + "\n", + "### Strategy 3: Summaries\n", + "\n", + "**Approach:** Pre-compute summaries of large datasets\n", + "\n", + "```python\n", + "# Use pre-computed course catalog summary\n", + "summary = get_course_catalog_summary() # \"CS: 50 courses, MATH: 30 courses...\"\n", + "context = summary\n", + "```\n", + "\n", + "**Pros:**\n", + "- โœ… Very compact (low token usage)\n", + "- โœ… Fast (no search needed)\n", + "- โœ… Provides high-level overview\n", + "\n", + "**Cons:**\n", + "- โŒ Loses details\n", + "- โŒ May not have specific information needed\n", + "- โš ๏ธ Requires pre-computation\n", + "\n", + "**Verdict:** โœ… Good for overviews, combine with RAG for details\n", + "\n", + "### Strategy 4: Hybrid (Best)\n", + "\n", + "**Approach:** Combine summaries + targeted retrieval\n", + "\n", + "```python\n", + "# Start with summary for overview\n", + "summary = get_course_catalog_summary()\n", + "\n", + "# Add specific relevant courses\n", + "relevant_courses = search_courses(query, limit=3)\n", + "\n", + "context = f\"{summary}\\n\\nRelevant courses:\\n{courses}\"\n", + "```\n", + "\n", + "**Pros:**\n", + "- โœ… Best of both worlds\n", + "- โœ… Overview + specific details\n", + "- โœ… Efficient token usage\n", + "- โœ… High quality results\n", + "\n", + "**Cons:**\n", + "- โš ๏ธ More complex to implement\n", + "- โš ๏ธ Requires pre-computed summaries\n", + "\n", + "**Verdict:** โœ… Best for production systems" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Setup" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import os\n", + "import time\n", + "import asyncio\n", + "from typing import List\n", + "import tiktoken\n", + "from langchain_openai import ChatOpenAI\n", + "from langchain_core.messages import SystemMessage, HumanMessage\n", + "from redis_context_course import CourseManager, MemoryClient, MemoryClientConfig\n", + "\n", + "# Initialize\n", + "course_manager = CourseManager()\n", + "# Initialize memory client with proper config\n", + "import os\n", + "config = MemoryClientConfig(\n", + " base_url=os.getenv(\"AGENT_MEMORY_URL\", \"http://localhost:8088\"),\n", + " default_namespace=\"redis_university\"\n", + ")\n", + "memory_client = MemoryClient(config=config)\n", + "llm = ChatOpenAI(model=\"gpt-4o\", temperature=0.7)\n", + "tokenizer = tiktoken.encoding_for_model(\"gpt-4o\")\n", + "\n", + "def count_tokens(text: str) -> int:\n", + " return len(tokenizer.encode(text))\n", + "\n", + "print(\"โœ… Setup complete\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Hands-on: Comparing Retrieval Strategies" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Strategy 1: Full Context (Bad)\n", + "\n", + "Let's try including all courses and see what happens." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "print(\"=\" * 80)\n", + "print(\"STRATEGY 1: FULL CONTEXT (Naive)\")\n", + "print(\"=\" * 80)\n", + "\n", + "# Get all courses\n", + "all_courses = await course_manager.get_all_courses()\n", + "print(f\"\\nTotal courses in catalog: {len(all_courses)}\")\n", + "\n", + "# Build full context\n", + "full_context = \"\\n\\n\".join([\n", + " f\"{c.course_code}: {c.title}\\n{c.description}\\nCredits: {c.credits} | {c.format.value}\"\n", + " for c in all_courses[:50] # Limit to 50 for demo\n", + "])\n", + "\n", + "tokens = count_tokens(full_context)\n", + "print(f\"\\nTokens for 50 courses: {tokens:,}\")\n", + "print(f\"Estimated tokens for all {len(all_courses)} courses: {(tokens * len(all_courses) / 50):,.0f}\")\n", + "\n", + "# Try to use it\n", + "user_query = \"I'm interested in machine learning courses\"\n", + "system_prompt = f\"\"\"You are a class scheduling agent.\n", + "\n", + "Available courses:\n", + "{full_context[:2000]}...\n", + "\"\"\"\n", + "\n", + "start_time = time.time()\n", + "messages = [\n", + " SystemMessage(content=system_prompt),\n", + " HumanMessage(content=user_query)\n", + "]\n", + "response = llm.invoke(messages)\n", + "latency = time.time() - start_time\n", + "\n", + "print(f\"\\nQuery: {user_query}\")\n", + "print(f\"Response: {response.content[:200]}...\")\n", + "print(f\"\\nLatency: {latency:.2f}s\")\n", + "print(f\"Total tokens used: ~{count_tokens(system_prompt) + count_tokens(user_query):,}\")\n", + "\n", + "print(\"\\nโŒ PROBLEMS:\")\n", + "print(\" - Too many tokens (expensive)\")\n", + "print(\" - High latency\")\n", + "print(\" - Relevant info buried in noise\")\n", + "print(\" - Doesn't scale to full catalog\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Strategy 2: RAG with Semantic Search (Good)\n", + "\n", + "Now let's use semantic search to retrieve only relevant courses." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "print(\"\\n\" + \"=\" * 80)\n", + "print(\"STRATEGY 2: RAG (Semantic Search)\")\n", + "print(\"=\" * 80)\n", + "\n", + "user_query = \"I'm interested in machine learning courses\"\n", + "\n", + "# Search for relevant courses\n", + "start_time = time.time()\n", + "relevant_courses = await course_manager.search_courses(\n", + " query=user_query,\n", + " limit=5\n", + ")\n", + "search_time = time.time() - start_time\n", + "\n", + "print(f\"\\nSearch time: {search_time:.3f}s\")\n", + "print(f\"Courses found: {len(relevant_courses)}\")\n", + "\n", + "# Build context from relevant courses only\n", + "rag_context = \"\\n\\n\".join([\n", + " f\"{c.course_code}: {c.title}\\n{c.description}\\nCredits: {c.credits} | {c.format.value}\"\n", + " for c in relevant_courses\n", + "])\n", + "\n", + "tokens = count_tokens(rag_context)\n", + "print(f\"Context tokens: {tokens:,}\")\n", + "\n", + "# Use it\n", + "system_prompt = f\"\"\"You are a class scheduling agent.\n", + "\n", + "Relevant courses:\n", + "{rag_context}\n", + "\"\"\"\n", + "\n", + "start_time = time.time()\n", + "messages = [\n", + " SystemMessage(content=system_prompt),\n", + " HumanMessage(content=user_query)\n", + "]\n", + "response = llm.invoke(messages)\n", + "latency = time.time() - start_time\n", + "\n", + "print(f\"\\nQuery: {user_query}\")\n", + "print(f\"Response: {response.content[:200]}...\")\n", + "print(f\"\\nTotal latency: {latency:.2f}s\")\n", + "print(f\"Total tokens used: ~{count_tokens(system_prompt) + count_tokens(user_query):,}\")\n", + "\n", + "print(\"\\nโœ… BENEFITS:\")\n", + "print(\" - Much fewer tokens (cheaper)\")\n", + "print(\" - Lower latency\")\n", + "print(\" - Only relevant information\")\n", + "print(\" - Scales to any catalog size\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Strategy 3: Pre-computed Summary\n", + "\n", + "Let's create a summary of the course catalog." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "print(\"\\n\" + \"=\" * 80)\n", + "print(\"STRATEGY 3: PRE-COMPUTED SUMMARY\")\n", + "print(\"=\" * 80)\n", + "\n", + "# Create a summary (in production, this would be pre-computed)\n", + "all_courses = await course_manager.get_all_courses()\n", + "\n", + "# Group by department\n", + "by_department = {}\n", + "for course in all_courses:\n", + " dept = course.department\n", + " if dept not in by_department:\n", + " by_department[dept] = []\n", + " by_department[dept].append(course)\n", + "\n", + "# Create summary\n", + "summary_lines = [\"Course Catalog Summary:\\n\"]\n", + "for dept, courses in sorted(by_department.items()):\n", + " summary_lines.append(f\"{dept}: {len(courses)} courses\")\n", + " # Add a few example courses\n", + " examples = [f\"{c.course_code} ({c.title})\" for c in courses[:2]]\n", + " summary_lines.append(f\" Examples: {', '.join(examples)}\")\n", + "\n", + "summary = \"\\n\".join(summary_lines)\n", + "\n", + "print(f\"\\nSummary:\\n{summary}\")\n", + "print(f\"\\nSummary tokens: {count_tokens(summary):,}\")\n", + "\n", + "# Use it\n", + "user_query = \"What departments offer courses?\"\n", + "system_prompt = f\"\"\"You are a class scheduling agent.\n", + "\n", + "{summary}\n", + "\"\"\"\n", + "\n", + "start_time = time.time()\n", + "messages = [\n", + " SystemMessage(content=system_prompt),\n", + " HumanMessage(content=user_query)\n", + "]\n", + "response = llm.invoke(messages)\n", + "latency = time.time() - start_time\n", + "\n", + "print(f\"\\nQuery: {user_query}\")\n", + "print(f\"Response: {response.content}\")\n", + "print(f\"\\nLatency: {latency:.2f}s\")\n", + "\n", + "print(\"\\nโœ… BENEFITS:\")\n", + "print(\" - Very compact (minimal tokens)\")\n", + "print(\" - Fast (no search needed)\")\n", + "print(\" - Good for overview questions\")\n", + "\n", + "print(\"\\nโš ๏ธ LIMITATIONS:\")\n", + "print(\" - Lacks specific details\")\n", + "print(\" - Can't answer detailed questions\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Strategy 4: Hybrid (Best)\n", + "\n", + "Combine summary + targeted retrieval for the best results." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "print(\"\\n\" + \"=\" * 80)\n", + "print(\"STRATEGY 4: HYBRID (Summary + RAG)\")\n", + "print(\"=\" * 80)\n", + "\n", + "user_query = \"I'm interested in machine learning. What's available?\"\n", + "\n", + "# Start with summary\n", + "summary_context = summary\n", + "\n", + "# Add targeted retrieval\n", + "relevant_courses = await course_manager.search_courses(\n", + " query=user_query,\n", + " limit=3\n", + ")\n", + "\n", + "detailed_context = \"\\n\\n\".join([\n", + " f\"{c.course_code}: {c.title}\\n{c.description}\\nCredits: {c.credits} | {c.format.value}\"\n", + " for c in relevant_courses\n", + "])\n", + "\n", + "# Combine\n", + "hybrid_context = f\"\"\"{summary_context}\n", + "\n", + "Relevant courses for your query:\n", + "{detailed_context}\n", + "\"\"\"\n", + "\n", + "tokens = count_tokens(hybrid_context)\n", + "print(f\"\\nHybrid context tokens: {tokens:,}\")\n", + "\n", + "# Use it\n", + "system_prompt = f\"\"\"You are a class scheduling agent.\n", + "\n", + "{hybrid_context}\n", + "\"\"\"\n", + "\n", + "start_time = time.time()\n", + "messages = [\n", + " SystemMessage(content=system_prompt),\n", + " HumanMessage(content=user_query)\n", + "]\n", + "response = llm.invoke(messages)\n", + "latency = time.time() - start_time\n", + "\n", + "print(f\"\\nQuery: {user_query}\")\n", + "print(f\"Response: {response.content}\")\n", + "print(f\"\\nLatency: {latency:.2f}s\")\n", + "print(f\"Total tokens: ~{count_tokens(system_prompt) + count_tokens(user_query):,}\")\n", + "\n", + "print(\"\\nโœ… BENEFITS:\")\n", + "print(\" - Overview + specific details\")\n", + "print(\" - Efficient token usage\")\n", + "print(\" - High quality responses\")\n", + "print(\" - Best of all strategies\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Optimizing Vector Search Parameters\n", + "\n", + "Let's explore how to tune semantic search for better results." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "print(\"\\n\" + \"=\" * 80)\n", + "print(\"OPTIMIZING SEARCH PARAMETERS\")\n", + "print(\"=\" * 80)\n", + "\n", + "user_query = \"beginner programming courses\"\n", + "\n", + "# Test different limits\n", + "print(f\"\\nQuery: '{user_query}'\\n\")\n", + "\n", + "for limit in [3, 5, 10]:\n", + " results = await course_manager.search_courses(\n", + " query=user_query,\n", + " limit=limit\n", + " )\n", + " \n", + " print(f\"Limit={limit}: Found {len(results)} courses\")\n", + " for i, course in enumerate(results, 1):\n", + " print(f\" {i}. {course.course_code}: {course.title}\")\n", + " print()\n", + "\n", + "print(\"๐Ÿ’ก TIP: Start with limit=5, adjust based on your needs\")\n", + "print(\" - Too few: May miss relevant results\")\n", + "print(\" - Too many: Wastes tokens, adds noise\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Performance Comparison\n", + "\n", + "Let's compare all strategies side-by-side." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "print(\"\\n\" + \"=\" * 80)\n", + "print(\"STRATEGY COMPARISON\")\n", + "print(\"=\" * 80)\n", + "\n", + "print(f\"\\n{'Strategy':<20} {'Tokens':<10} {'Latency':<10} {'Quality':<10} {'Scalability'}\")\n", + "print(\"-\" * 70)\n", + "print(f\"{'Full Context':<20} {'50,000+':<10} {'High':<10} {'Good':<10} {'Poor'}\")\n", + "print(f\"{'RAG (Semantic)':<20} {'500-2K':<10} {'Low':<10} {'Good':<10} {'Excellent'}\")\n", + "print(f\"{'Summary Only':<20} {'100-500':<10} {'Very Low':<10} {'Limited':<10} {'Excellent'}\")\n", + "print(f\"{'Hybrid':<20} {'1K-3K':<10} {'Low':<10} {'Excellent':<10} {'Excellent'}\")\n", + "\n", + "print(\"\\nโœ… RECOMMENDATION: Use Hybrid strategy for production\")\n", + "print(\" - Provides overview + specific details\")\n", + "print(\" - Efficient token usage\")\n", + "print(\" - Scales to any dataset size\")\n", + "print(\" - High quality results\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Key Takeaways\n", + "\n", + "### Choosing a Retrieval Strategy\n", + "\n", + "**Use RAG when:**\n", + "- โœ… You need specific, detailed information\n", + "- โœ… Dataset is large\n", + "- โœ… Queries are specific\n", + "\n", + "**Use Summaries when:**\n", + "- โœ… You need high-level overviews\n", + "- โœ… Queries are general\n", + "- โœ… Token budget is tight\n", + "\n", + "**Use Hybrid when:**\n", + "- โœ… You want the best quality\n", + "- โœ… You can pre-compute summaries\n", + "- โœ… Building production systems\n", + "\n", + "### Optimization Tips\n", + "\n", + "1. **Start with RAG** - Simple and effective\n", + "2. **Add summaries** - For overview context\n", + "3. **Tune search limits** - Balance relevance vs. tokens\n", + "4. **Pre-compute summaries** - Don't generate on every request\n", + "5. **Monitor performance** - Track tokens, latency, quality\n", + "\n", + "### Vector Search Best Practices\n", + "\n", + "- โœ… Use semantic search for finding relevant content\n", + "- โœ… Start with limit=5, adjust as needed\n", + "- โœ… Use filters when you have structured criteria\n", + "- โœ… Test with real user queries\n", + "- โœ… Monitor search quality over time" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Exercises\n", + "\n", + "1. **Implement hybrid retrieval**: Create a function that combines summary + RAG for any query.\n", + "\n", + "2. **Measure quality**: Test each strategy with 10 different queries. Which gives the best responses?\n", + "\n", + "3. **Optimize search**: Experiment with different search limits. What's the sweet spot for your use case?\n", + "\n", + "4. **Create summaries**: Build pre-computed summaries for different views (by department, by difficulty, by format)." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Summary\n", + "\n", + "In this notebook, you learned:\n", + "\n", + "- โœ… Different retrieval strategies have different trade-offs\n", + "- โœ… RAG (semantic search) is efficient and scalable\n", + "- โœ… Summaries provide compact overviews\n", + "- โœ… Hybrid approach combines the best of both\n", + "- โœ… Proper retrieval is key to production-quality agents\n", + "\n", + "**Key insight:** Don't include everything - retrieve smartly. The hybrid strategy (summaries + targeted RAG) provides the best balance of quality, efficiency, and scalability." + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.0" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} diff --git a/python-recipes/context-engineering/notebooks_archive/section-4-optimizations/03_grounding_with_memory.ipynb b/python-recipes/context-engineering/notebooks_archive/section-4-optimizations/03_grounding_with_memory.ipynb new file mode 100644 index 00000000..78e8d802 --- /dev/null +++ b/python-recipes/context-engineering/notebooks_archive/section-4-optimizations/03_grounding_with_memory.ipynb @@ -0,0 +1,547 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Grounding with Memory: Using Context to Resolve References\n", + "\n", + "## Introduction\n", + "\n", + "In this notebook, you'll learn about grounding - how agents use memory to understand references and maintain context across a conversation. When users say \"that course\" or \"my advisor\", the agent needs to know what they're referring to. The Agent Memory Server's extracted memories provide this grounding automatically.\n", + "\n", + "### What You'll Learn\n", + "\n", + "- What grounding is and why it matters\n", + "- How extracted memories provide grounding\n", + "- How to handle references to people, places, and things\n", + "- How memory enables natural conversation flow\n", + "\n", + "### Prerequisites\n", + "\n", + "- Completed Section 3 notebooks\n", + "- Redis 8 running locally\n", + "- Agent Memory Server running\n", + "- OpenAI API key set" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Concepts: Grounding\n", + "\n", + "### What is Grounding?\n", + "\n", + "**Grounding** is the process of connecting references in conversation to their actual meanings. When someone says:\n", + "\n", + "- \"Tell me more about **that course**\" - Which course?\n", + "- \"When does **she** teach?\" - Who is \"she\"?\n", + "- \"Is **it** available online?\" - What is \"it\"?\n", + "- \"What about **the other one**?\" - Which one?\n", + "\n", + "The agent needs to **ground** these references to specific entities mentioned earlier in the conversation.\n", + "\n", + "### Grounding Without Memory (Bad)\n", + "\n", + "```\n", + "User: I'm interested in machine learning.\n", + "Agent: Great! We have CS401: Machine Learning.\n", + "\n", + "User: Tell me more about that course.\n", + "Agent: Which course are you asking about? โŒ\n", + "```\n", + "\n", + "### Grounding With Memory (Good)\n", + "\n", + "```\n", + "User: I'm interested in machine learning.\n", + "Agent: Great! We have CS401: Machine Learning.\n", + "[Memory extracted: \"Student interested in CS401\"]\n", + "\n", + "User: Tell me more about that course.\n", + "Agent: CS401 covers supervised learning, neural networks... โœ…\n", + "[Memory grounds \"that course\" to CS401]\n", + "```\n", + "\n", + "### How Agent Memory Server Provides Grounding\n", + "\n", + "The Agent Memory Server automatically:\n", + "1. **Extracts entities** from conversations (courses, people, places)\n", + "2. **Stores them** in long-term memory with context\n", + "3. **Retrieves them** when similar references appear\n", + "4. **Provides context** to ground ambiguous references\n", + "\n", + "### Types of References\n", + "\n", + "**Pronouns:**\n", + "- \"it\", \"that\", \"this\", \"those\"\n", + "- \"he\", \"she\", \"they\"\n", + "\n", + "**Descriptions:**\n", + "- \"the ML class\"\n", + "- \"my advisor\"\n", + "- \"the main campus\"\n", + "\n", + "**Implicit references:**\n", + "- \"What are the prerequisites?\" (for what?)\n", + "- \"When does it meet?\" (what meets?)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Setup" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import os\n", + "import asyncio\n", + "from langchain_openai import ChatOpenAI\n", + "from langchain_core.messages import SystemMessage, HumanMessage, AIMessage\n", + "from agent_memory_client import MemoryAPIClient as MemoryClient, MemoryClientConfig\n", + "\n", + "# Initialize\n", + "student_id = \"student_789\"\n", + "session_id = \"grounding_demo\"\n", + "\n", + "# Initialize memory client with proper config\n", + "import os\n", + "config = MemoryClientConfig(\n", + " base_url=os.getenv(\"AGENT_MEMORY_URL\", \"http://localhost:8088\"),\n", + " default_namespace=\"redis_university\"\n", + ")\n", + "memory_client = MemoryClient(config=config)\n", + "\n", + "llm = ChatOpenAI(model=\"gpt-4o\", temperature=0.7)\n", + "\n", + "print(f\"โœ… Setup complete for {student_id}\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Hands-on: Grounding Through Conversation" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Example 1: Grounding Course References\n", + "\n", + "Let's have a conversation where we refer to courses in different ways." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "async def chat_turn(user_message, conversation_history):\n", + " \"\"\"Helper function to process a conversation turn.\"\"\"\n", + " \n", + " # Search long-term memory for context\n", + " memories = await memory_client.search_long_term_memory(\n", + " text=user_message,\n", + " limit=5\n", + " )\n", + " \n", + " # Build context from memories\n", + " memory_context = \"\\n\".join([f\"- {m.text}\" for m in memories.memories]) if memories.memories else \"None\"\n", + " \n", + " system_prompt = f\"\"\"You are a helpful class scheduling agent for Redis University.\n", + "\n", + "What you remember about this student:\n", + "{memory_context}\n", + "\n", + "Use this context to understand references like \"that course\", \"it\", \"the one I mentioned\", etc.\n", + "\"\"\"\n", + " \n", + " # Build messages\n", + " messages = [SystemMessage(content=system_prompt)]\n", + " messages.extend(conversation_history)\n", + " messages.append(HumanMessage(content=user_message))\n", + " \n", + " # Get response\n", + " response = llm.invoke(messages)\n", + " \n", + " # Update conversation history\n", + " conversation_history.append(HumanMessage(content=user_message))\n", + " conversation_history.append(AIMessage(content=response.content))\n", + " \n", + " # Save to working memory (triggers extraction)\n", + " messages_to_save = [\n", + " {\"role\": \"user\" if isinstance(m, HumanMessage) else \"assistant\", \"content\": m.content}\n", + " for m in conversation_history\n", + " ]\n", + " from agent_memory_client.models import WorkingMemory, MemoryMessage\n", + " \n", + " # Convert messages to MemoryMessage format\n", + " memory_messages = [MemoryMessage(**msg) for msg in messages_to_save]\n", + " \n", + " # Create WorkingMemory object\n", + " working_memory = WorkingMemory(\n", + " session_id=session_id,\n", + " user_id=\"demo_user\",\n", + " messages=memory_messages,\n", + " memories=[],\n", + " data={}\n", + " )\n", + " \n", + " await memory_client.put_working_memory(\n", + " session_id=session_id,\n", + " memory=working_memory,\n", + " user_id=\"demo_user\",\n", + " model_name=\"gpt-4o\"\n", + " )\n", + " \n", + " return response.content, conversation_history\n", + "\n", + "print(\"โœ… Helper function defined\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Start conversation\n", + "conversation = []\n", + "\n", + "print(\"=\" * 80)\n", + "print(\"CONVERSATION: Grounding Course References\")\n", + "print(\"=\" * 80)\n", + "\n", + "# Turn 1: Mention a specific course\n", + "print(\"\\n๐Ÿ‘ค User: I'm interested in CS401, the machine learning course.\")\n", + "response, conversation = await chat_turn(\n", + " \"I'm interested in CS401, the machine learning course.\",\n", + " conversation\n", + ")\n", + "print(f\"๐Ÿค– Agent: {response}\")\n", + "\n", + "# Wait for extraction\n", + "await asyncio.sleep(2)\n", + "\n", + "# Turn 2: Use pronoun \"it\"\n", + "print(\"\\n๐Ÿ‘ค User: What are the prerequisites for it?\")\n", + "response, conversation = await chat_turn(\n", + " \"What are the prerequisites for it?\",\n", + " conversation\n", + ")\n", + "print(f\"๐Ÿค– Agent: {response}\")\n", + "print(\"\\nโœ… Agent grounded 'it' to CS401\")\n", + "\n", + "# Turn 3: Use description \"that ML class\"\n", + "print(\"\\n๐Ÿ‘ค User: Is that ML class available online?\")\n", + "response, conversation = await chat_turn(\n", + " \"Is that ML class available online?\",\n", + " conversation\n", + ")\n", + "print(f\"๐Ÿค– Agent: {response}\")\n", + "print(\"\\nโœ… Agent grounded 'that ML class' to CS401\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Example 2: Grounding People References\n", + "\n", + "Let's have a conversation about people (advisors, professors)." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# New conversation\n", + "conversation = []\n", + "\n", + "print(\"\\n\" + \"=\" * 80)\n", + "print(\"CONVERSATION: Grounding People References\")\n", + "print(\"=\" * 80)\n", + "\n", + "# Turn 1: Mention a person\n", + "print(\"\\n๐Ÿ‘ค User: My advisor is Professor Smith from the CS department.\")\n", + "response, conversation = await chat_turn(\n", + " \"My advisor is Professor Smith from the CS department.\",\n", + " conversation\n", + ")\n", + "print(f\"๐Ÿค– Agent: {response}\")\n", + "\n", + "await asyncio.sleep(2)\n", + "\n", + "# Turn 2: Use pronoun \"she\"\n", + "print(\"\\n๐Ÿ‘ค User: What courses does she teach?\")\n", + "response, conversation = await chat_turn(\n", + " \"What courses does she teach?\",\n", + " conversation\n", + ")\n", + "print(f\"๐Ÿค– Agent: {response}\")\n", + "print(\"\\nโœ… Agent grounded 'she' to Professor Smith\")\n", + "\n", + "# Turn 3: Use description \"my advisor\"\n", + "print(\"\\n๐Ÿ‘ค User: Can my advisor help me with course selection?\")\n", + "response, conversation = await chat_turn(\n", + " \"Can my advisor help me with course selection?\",\n", + " conversation\n", + ")\n", + "print(f\"๐Ÿค– Agent: {response}\")\n", + "print(\"\\nโœ… Agent grounded 'my advisor' to Professor Smith\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Example 3: Grounding Place References\n", + "\n", + "Let's talk about campus locations." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# New conversation\n", + "conversation = []\n", + "\n", + "print(\"\\n\" + \"=\" * 80)\n", + "print(\"CONVERSATION: Grounding Place References\")\n", + "print(\"=\" * 80)\n", + "\n", + "# Turn 1: Mention a place\n", + "print(\"\\n๐Ÿ‘ค User: I prefer taking classes at the downtown campus.\")\n", + "response, conversation = await chat_turn(\n", + " \"I prefer taking classes at the downtown campus.\",\n", + " conversation\n", + ")\n", + "print(f\"๐Ÿค– Agent: {response}\")\n", + "\n", + "await asyncio.sleep(2)\n", + "\n", + "# Turn 2: Use pronoun \"there\"\n", + "print(\"\\n๐Ÿ‘ค User: What CS courses are offered there?\")\n", + "response, conversation = await chat_turn(\n", + " \"What CS courses are offered there?\",\n", + " conversation\n", + ")\n", + "print(f\"๐Ÿค– Agent: {response}\")\n", + "print(\"\\nโœ… Agent grounded 'there' to downtown campus\")\n", + "\n", + "# Turn 3: Use description \"that campus\"\n", + "print(\"\\n๐Ÿ‘ค User: How do I get to that campus?\")\n", + "response, conversation = await chat_turn(\n", + " \"How do I get to that campus?\",\n", + " conversation\n", + ")\n", + "print(f\"๐Ÿค– Agent: {response}\")\n", + "print(\"\\nโœ… Agent grounded 'that campus' to downtown campus\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Example 4: Complex Multi-Reference Conversation\n", + "\n", + "Let's have a longer conversation with multiple entities to ground." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# New conversation\n", + "conversation = []\n", + "\n", + "print(\"\\n\" + \"=\" * 80)\n", + "print(\"CONVERSATION: Complex Multi-Reference\")\n", + "print(\"=\" * 80)\n", + "\n", + "# Turn 1\n", + "print(\"\\n๐Ÿ‘ค User: I'm looking at CS401 and CS402. Which one should I take first?\")\n", + "response, conversation = await chat_turn(\n", + " \"I'm looking at CS401 and CS402. Which one should I take first?\",\n", + " conversation\n", + ")\n", + "print(f\"๐Ÿค– Agent: {response}\")\n", + "\n", + "await asyncio.sleep(2)\n", + "\n", + "# Turn 2\n", + "print(\"\\n๐Ÿ‘ค User: What about the other one? When is it offered?\")\n", + "response, conversation = await chat_turn(\n", + " \"What about the other one? When is it offered?\",\n", + " conversation\n", + ")\n", + "print(f\"๐Ÿค– Agent: {response}\")\n", + "print(\"\\nโœ… Agent grounded 'the other one' to the second course mentioned\")\n", + "\n", + "# Turn 3\n", + "print(\"\\n๐Ÿ‘ค User: Can I take both in the same semester?\")\n", + "response, conversation = await chat_turn(\n", + " \"Can I take both in the same semester?\",\n", + " conversation\n", + ")\n", + "print(f\"๐Ÿค– Agent: {response}\")\n", + "print(\"\\nโœ… Agent grounded 'both' to CS401 and CS402\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Verify Extracted Memories\n", + "\n", + "Let's check what memories were extracted to enable grounding." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "print(\"\\n\" + \"=\" * 80)\n", + "print(\"EXTRACTED MEMORIES (Enable Grounding)\")\n", + "print(\"=\" * 80)\n", + "\n", + "# Get all memories\n", + "all_memories = await memory_client.search_long_term_memory(\n", + " text=\"\",\n", + " limit=20\n", + ")\n", + "\n", + "print(\"\\nMemories that enable grounding:\\n\")\n", + "for i, memory in enumerate(all_memories.memories, 1):\n", + " print(f\"{i}. {memory.text}\")\n", + " print(f\" Type: {memory.memory_type} | Topics: {', '.join(memory.topics)}\")\n", + " print()\n", + "\n", + "print(\"โœ… These memories provide the context needed to ground references!\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Key Takeaways\n", + "\n", + "### How Grounding Works\n", + "\n", + "1. **User mentions entity** (course, person, place)\n", + "2. **Agent Memory Server extracts** entity to long-term memory\n", + "3. **User makes reference** (\"it\", \"that\", \"she\", etc.)\n", + "4. **Semantic search retrieves** relevant memories\n", + "5. **Agent grounds reference** using memory context\n", + "\n", + "### Types of Grounding\n", + "\n", + "**Direct references:**\n", + "- \"CS401\" โ†’ Specific course\n", + "- \"Professor Smith\" โ†’ Specific person\n", + "\n", + "**Pronoun references:**\n", + "- \"it\" โ†’ Last mentioned thing\n", + "- \"she\" โ†’ Last mentioned person\n", + "- \"there\" โ†’ Last mentioned place\n", + "\n", + "**Description references:**\n", + "- \"that ML class\" โ†’ Course about ML\n", + "- \"my advisor\" โ†’ Student's advisor\n", + "- \"the downtown campus\" โ†’ Specific campus\n", + "\n", + "**Implicit references:**\n", + "- \"What are the prerequisites?\" โ†’ For the course we're discussing\n", + "- \"When does it meet?\" โ†’ The course mentioned\n", + "\n", + "### Why Memory-Based Grounding Works\n", + "\n", + "โœ… **Automatic** - No manual entity tracking needed\n", + "โœ… **Semantic** - Understands similar references\n", + "โœ… **Persistent** - Works across sessions\n", + "โœ… **Contextual** - Uses conversation history\n", + "โœ… **Natural** - Enables human-like conversation\n", + "\n", + "### Best Practices\n", + "\n", + "1. **Include memory context in system prompt** - Give LLM grounding information\n", + "2. **Search with user's query** - Find relevant entities\n", + "3. **Trust semantic search** - It finds related memories\n", + "4. **Let extraction happen** - Don't manually track entities\n", + "5. **Test with pronouns** - Verify grounding works" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Exercises\n", + "\n", + "1. **Test ambiguous references**: Have a conversation mentioning multiple courses, then use \"it\". Does the agent ground correctly?\n", + "\n", + "2. **Cross-session grounding**: Start a new session and refer to entities from a previous session. Does it work?\n", + "\n", + "3. **Complex conversation**: Have a 10-turn conversation with multiple entities. Track how grounding evolves.\n", + "\n", + "4. **Grounding failure**: Try to break grounding by using very ambiguous references. What happens?" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Summary\n", + "\n", + "In this notebook, you learned:\n", + "\n", + "- โœ… Grounding connects references to their actual meanings\n", + "- โœ… Agent Memory Server's extracted memories provide grounding automatically\n", + "- โœ… Semantic search retrieves relevant context for grounding\n", + "- โœ… Grounding enables natural, human-like conversations\n", + "- โœ… No manual entity tracking needed - memory handles it\n", + "\n", + "**Key insight:** Memory-based grounding is what makes agents feel intelligent and context-aware. Without it, every reference needs to be explicit, making conversations robotic and frustrating." + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.0" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} diff --git a/python-recipes/context-engineering/notebooks_archive/section-4-optimizations/04_tool_optimization.ipynb b/python-recipes/context-engineering/notebooks_archive/section-4-optimizations/04_tool_optimization.ipynb new file mode 100644 index 00000000..943cd6be --- /dev/null +++ b/python-recipes/context-engineering/notebooks_archive/section-4-optimizations/04_tool_optimization.ipynb @@ -0,0 +1,654 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Tool Optimization: Selective Tool Exposure\n", + "\n", + "## Introduction\n", + "\n", + "In this advanced notebook, you'll learn how to optimize tool usage by selectively exposing tools based on context. When you have many tools, showing all of them to the LLM on every request wastes tokens and can cause confusion. You'll learn the \"tool shed\" pattern and dynamic tool selection.\n", + "\n", + "### What You'll Learn\n", + "\n", + "- The tool shed pattern (selective tool exposure)\n", + "- Dynamic tool selection based on context\n", + "- Reducing tool confusion\n", + "- Measuring improvement in tool selection\n", + "- When to use tool optimization\n", + "\n", + "### Prerequisites\n", + "\n", + "- Completed Section 2 notebooks\n", + "- Completed `section-2-system-context/03_tool_selection_strategies.ipynb`\n", + "- Redis 8 running locally\n", + "- OpenAI API key set" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Concepts: The Tool Overload Problem\n", + "\n", + "### The Problem with Many Tools\n", + "\n", + "As your agent grows, you add more tools:\n", + "\n", + "```python\n", + "tools = [\n", + " search_courses, # 1\n", + " get_course_details, # 2\n", + " check_prerequisites, # 3\n", + " enroll_in_course, # 4\n", + " drop_course, # 5\n", + " get_student_schedule, # 6\n", + " check_schedule_conflicts, # 7\n", + " get_course_reviews, # 8\n", + " submit_course_review, # 9\n", + " get_instructor_info, # 10\n", + " # ... 20 more tools\n", + "]\n", + "```\n", + "\n", + "**Problems:**\n", + "- โŒ **Token waste**: Tool schemas consume tokens\n", + "- โŒ **Confusion**: Too many choices\n", + "- โŒ **Slower**: More tools = more processing\n", + "- โŒ **Wrong selection**: Similar tools confuse LLM\n", + "\n", + "### The Tool Shed Pattern\n", + "\n", + "**Idea:** Don't show all tools at once. Show only relevant tools based on context.\n", + "\n", + "```python\n", + "# Instead of showing all 30 tools...\n", + "all_tools = [tool1, tool2, ..., tool30]\n", + "\n", + "# Show only relevant tools\n", + "if query_type == \"search\":\n", + " relevant_tools = [search_courses, get_course_details]\n", + "elif query_type == \"enrollment\":\n", + " relevant_tools = [enroll_in_course, drop_course, check_conflicts]\n", + "elif query_type == \"review\":\n", + " relevant_tools = [get_course_reviews, submit_review]\n", + "```\n", + "\n", + "**Benefits:**\n", + "- โœ… Fewer tokens\n", + "- โœ… Less confusion\n", + "- โœ… Faster processing\n", + "- โœ… Better tool selection\n", + "\n", + "### Dynamic Tool Selection Strategies\n", + "\n", + "**1. Query-based filtering:**\n", + "```python\n", + "if \"search\" in query or \"find\" in query:\n", + " tools = search_tools\n", + "elif \"enroll\" in query or \"register\" in query:\n", + " tools = enrollment_tools\n", + "```\n", + "\n", + "**2. Intent classification:**\n", + "```python\n", + "intent = classify_intent(query) # \"search\", \"enroll\", \"review\"\n", + "tools = tool_groups[intent]\n", + "```\n", + "\n", + "**3. Conversation state:**\n", + "```python\n", + "if conversation_state == \"browsing\":\n", + " tools = [search, get_details]\n", + "elif conversation_state == \"enrolling\":\n", + " tools = [enroll, check_conflicts]\n", + "```\n", + "\n", + "**4. Hierarchical tools:**\n", + "```python\n", + "# First: Show high-level tools\n", + "tools = [search_courses, manage_enrollment, view_reviews]\n", + "\n", + "# Then: Show specific tools based on choice\n", + "if user_chose == \"manage_enrollment\":\n", + " tools = [enroll, drop, swap, check_conflicts]\n", + "```" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Setup" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import os\n", + "import re\n", + "from typing import List, Dict, Any\n", + "from langchain_openai import ChatOpenAI\n", + "from langchain_core.messages import SystemMessage, HumanMessage\n", + "from langchain_core.tools import tool\n", + "from pydantic import BaseModel, Field\n", + "from redis_context_course import CourseManager\n", + "\n", + "# Initialize\n", + "llm = ChatOpenAI(model=\"gpt-4o\", temperature=0)\n", + "course_manager = CourseManager()\n", + "\n", + "print(\"โœ… Setup complete\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Creating Tool Groups\n", + "\n", + "Let's organize tools into logical groups." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Define tools (simplified for demo)\n", + "class SearchInput(BaseModel):\n", + " query: str = Field(description=\"Search query\")\n", + "\n", + "@tool(args_schema=SearchInput)\n", + "async def search_courses(query: str) -> str:\n", + " \"\"\"Search for courses by topic or description.\"\"\"\n", + " return f\"Searching for: {query}\"\n", + "\n", + "@tool(args_schema=SearchInput)\n", + "async def get_course_details(query: str) -> str:\n", + " \"\"\"Get detailed information about a specific course.\"\"\"\n", + " return f\"Details for: {query}\"\n", + "\n", + "@tool(args_schema=SearchInput)\n", + "async def check_prerequisites(query: str) -> str:\n", + " \"\"\"Check prerequisites for a course.\"\"\"\n", + " return f\"Prerequisites for: {query}\"\n", + "\n", + "@tool(args_schema=SearchInput)\n", + "async def enroll_in_course(query: str) -> str:\n", + " \"\"\"Enroll student in a course.\"\"\"\n", + " return f\"Enrolling in: {query}\"\n", + "\n", + "@tool(args_schema=SearchInput)\n", + "async def drop_course(query: str) -> str:\n", + " \"\"\"Drop a course from student's schedule.\"\"\"\n", + " return f\"Dropping: {query}\"\n", + "\n", + "@tool(args_schema=SearchInput)\n", + "async def check_schedule_conflicts(query: str) -> str:\n", + " \"\"\"Check for schedule conflicts.\"\"\"\n", + " return f\"Checking conflicts for: {query}\"\n", + "\n", + "@tool(args_schema=SearchInput)\n", + "async def get_course_reviews(query: str) -> str:\n", + " \"\"\"Get reviews for a course.\"\"\"\n", + " return f\"Reviews for: {query}\"\n", + "\n", + "@tool(args_schema=SearchInput)\n", + "async def submit_course_review(query: str) -> str:\n", + " \"\"\"Submit a review for a course.\"\"\"\n", + " return f\"Submitting review for: {query}\"\n", + "\n", + "# Organize into groups\n", + "TOOL_GROUPS = {\n", + " \"search\": [\n", + " search_courses,\n", + " get_course_details,\n", + " check_prerequisites\n", + " ],\n", + " \"enrollment\": [\n", + " enroll_in_course,\n", + " drop_course,\n", + " check_schedule_conflicts\n", + " ],\n", + " \"reviews\": [\n", + " get_course_reviews,\n", + " submit_course_review\n", + " ]\n", + "}\n", + "\n", + "ALL_TOOLS = [\n", + " search_courses,\n", + " get_course_details,\n", + " check_prerequisites,\n", + " enroll_in_course,\n", + " drop_course,\n", + " check_schedule_conflicts,\n", + " get_course_reviews,\n", + " submit_course_review\n", + "]\n", + "\n", + "print(f\"โœ… Created {len(ALL_TOOLS)} tools in {len(TOOL_GROUPS)} groups\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Strategy 1: Query-Based Tool Filtering\n", + "\n", + "Select tools based on keywords in the query." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "def select_tools_by_keywords(query: str) -> List:\n", + " \"\"\"Select relevant tools based on query keywords.\"\"\"\n", + " query_lower = query.lower()\n", + " \n", + " # Search-related keywords\n", + " if any(word in query_lower for word in ['search', 'find', 'show', 'what', 'which', 'tell me about']):\n", + " return TOOL_GROUPS[\"search\"]\n", + " \n", + " # Enrollment-related keywords\n", + " elif any(word in query_lower for word in ['enroll', 'register', 'drop', 'add', 'remove', 'conflict']):\n", + " return TOOL_GROUPS[\"enrollment\"]\n", + " \n", + " # Review-related keywords\n", + " elif any(word in query_lower for word in ['review', 'rating', 'feedback', 'opinion']):\n", + " return TOOL_GROUPS[\"reviews\"]\n", + " \n", + " # Default: return search tools\n", + " else:\n", + " return TOOL_GROUPS[\"search\"]\n", + "\n", + "# Test it\n", + "test_queries = [\n", + " \"I want to search for machine learning courses\",\n", + " \"Can I enroll in CS401?\",\n", + " \"What are the reviews for CS301?\",\n", + " \"Tell me about database courses\"\n", + "]\n", + "\n", + "print(\"=\" * 80)\n", + "print(\"QUERY-BASED TOOL FILTERING\")\n", + "print(\"=\" * 80)\n", + "\n", + "for query in test_queries:\n", + " selected_tools = select_tools_by_keywords(query)\n", + " tool_names = [t.name for t in selected_tools]\n", + " print(f\"\\nQuery: {query}\")\n", + " print(f\"Selected tools: {', '.join(tool_names)}\")\n", + " print(f\"Count: {len(selected_tools)} / {len(ALL_TOOLS)} tools\")\n", + "\n", + "print(\"\\n\" + \"=\" * 80)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Strategy 2: Intent Classification\n", + "\n", + "Use the LLM to classify intent, then select tools." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "async def classify_intent(query: str) -> str:\n", + " \"\"\"Classify user intent using LLM.\"\"\"\n", + " prompt = f\"\"\"Classify the user's intent into one of these categories:\n", + "- search: Looking for courses or information\n", + "- enrollment: Enrolling, dropping, or managing courses\n", + "- reviews: Reading or writing course reviews\n", + "\n", + "User query: \"{query}\"\n", + "\n", + "Respond with only the category name (search, enrollment, or reviews).\n", + "\"\"\"\n", + " \n", + " messages = [\n", + " SystemMessage(content=\"You are a helpful assistant that classifies user intents.\"),\n", + " HumanMessage(content=prompt)\n", + " ]\n", + " \n", + " response = llm.invoke(messages)\n", + " intent = response.content.strip().lower()\n", + " \n", + " # Validate intent\n", + " if intent not in TOOL_GROUPS:\n", + " intent = \"search\" # Default\n", + " \n", + " return intent\n", + "\n", + "async def select_tools_by_intent(query: str) -> List:\n", + " \"\"\"Select tools based on classified intent.\"\"\"\n", + " intent = await classify_intent(query)\n", + " return TOOL_GROUPS[intent], intent\n", + "\n", + "# Test it\n", + "print(\"\\n\" + \"=\" * 80)\n", + "print(\"INTENT-BASED TOOL FILTERING\")\n", + "print(\"=\" * 80)\n", + "\n", + "for query in test_queries:\n", + " selected_tools, intent = await select_tools_by_intent(query)\n", + " tool_names = [t.name for t in selected_tools]\n", + " print(f\"\\nQuery: {query}\")\n", + " print(f\"Intent: {intent}\")\n", + " print(f\"Selected tools: {', '.join(tool_names)}\")\n", + " print(f\"Count: {len(selected_tools)} / {len(ALL_TOOLS)} tools\")\n", + "\n", + "print(\"\\n\" + \"=\" * 80)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Comparing: All Tools vs. Filtered Tools\n", + "\n", + "Let's compare tool selection with and without filtering." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "print(\"\\n\" + \"=\" * 80)\n", + "print(\"COMPARISON: ALL TOOLS vs. FILTERED TOOLS\")\n", + "print(\"=\" * 80)\n", + "\n", + "test_query = \"I want to enroll in CS401\"\n", + "\n", + "# Approach 1: All tools\n", + "print(f\"\\nQuery: {test_query}\")\n", + "print(\"\\n--- APPROACH 1: Show all tools ---\")\n", + "llm_all_tools = llm.bind_tools(ALL_TOOLS)\n", + "messages = [\n", + " SystemMessage(content=\"You are a class scheduling agent.\"),\n", + " HumanMessage(content=test_query)\n", + "]\n", + "response_all = llm_all_tools.invoke(messages)\n", + "\n", + "if response_all.tool_calls:\n", + " print(f\"Selected tool: {response_all.tool_calls[0]['name']}\")\n", + "print(f\"Tools shown: {len(ALL_TOOLS)}\")\n", + "\n", + "# Approach 2: Filtered tools\n", + "print(\"\\n--- APPROACH 2: Show filtered tools ---\")\n", + "filtered_tools = select_tools_by_keywords(test_query)\n", + "llm_filtered_tools = llm.bind_tools(filtered_tools)\n", + "response_filtered = llm_filtered_tools.invoke(messages)\n", + "\n", + "if response_filtered.tool_calls:\n", + " print(f\"Selected tool: {response_filtered.tool_calls[0]['name']}\")\n", + "print(f\"Tools shown: {len(filtered_tools)}\")\n", + "\n", + "print(\"\\nโœ… Benefits of filtering:\")\n", + "print(f\" - Reduced tools: {len(ALL_TOOLS)} โ†’ {len(filtered_tools)}\")\n", + "print(f\" - Token savings: ~{(len(ALL_TOOLS) - len(filtered_tools)) * 100} tokens\")\n", + "print(f\" - Less confusion: Fewer irrelevant tools\")\n", + "\n", + "print(\"\\n\" + \"=\" * 80)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Strategy 3: Hierarchical Tools\n", + "\n", + "Start with high-level tools, then drill down." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "print(\"\\n\" + \"=\" * 80)\n", + "print(\"HIERARCHICAL TOOL APPROACH\")\n", + "print(\"=\" * 80)\n", + "\n", + "# High-level tools\n", + "@tool\n", + "async def browse_courses(query: str) -> str:\n", + " \"\"\"Browse and search for courses. Use this for finding courses.\"\"\"\n", + " return \"Browsing courses...\"\n", + "\n", + "@tool\n", + "async def manage_enrollment(query: str) -> str:\n", + " \"\"\"Manage course enrollment (enroll, drop, check conflicts). Use this for enrollment actions.\"\"\"\n", + " return \"Managing enrollment...\"\n", + "\n", + "@tool\n", + "async def view_reviews(query: str) -> str:\n", + " \"\"\"View or submit course reviews. Use this for review-related queries.\"\"\"\n", + " return \"Viewing reviews...\"\n", + "\n", + "high_level_tools = [browse_courses, manage_enrollment, view_reviews]\n", + "\n", + "print(\"\\nStep 1: Show high-level tools\")\n", + "print(f\"Tools: {[t.name for t in high_level_tools]}\")\n", + "print(f\"Count: {len(high_level_tools)} tools\")\n", + "\n", + "print(\"\\nStep 2: User selects 'manage_enrollment'\")\n", + "print(\"Now show specific enrollment tools:\")\n", + "enrollment_tools = TOOL_GROUPS[\"enrollment\"]\n", + "print(f\"Tools: {[t.name for t in enrollment_tools]}\")\n", + "print(f\"Count: {len(enrollment_tools)} tools\")\n", + "\n", + "print(\"\\nโœ… Benefits:\")\n", + "print(\" - Start simple (3 tools)\")\n", + "print(\" - Drill down as needed\")\n", + "print(\" - User-guided filtering\")\n", + "\n", + "print(\"\\n\" + \"=\" * 80)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Measuring Improvement\n", + "\n", + "Let's measure the impact of tool filtering." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "print(\"\\n\" + \"=\" * 80)\n", + "print(\"MEASURING IMPROVEMENT\")\n", + "print(\"=\" * 80)\n", + "\n", + "# Test queries with expected tools\n", + "test_cases = [\n", + " (\"Find machine learning courses\", \"search_courses\"),\n", + " (\"Enroll me in CS401\", \"enroll_in_course\"),\n", + " (\"Show reviews for CS301\", \"get_course_reviews\"),\n", + " (\"Drop CS201 from my schedule\", \"drop_course\"),\n", + " (\"What are the prerequisites for CS401?\", \"check_prerequisites\"),\n", + "]\n", + "\n", + "print(\"\\nTesting tool selection accuracy...\\n\")\n", + "\n", + "correct_all = 0\n", + "correct_filtered = 0\n", + "\n", + "for query, expected_tool in test_cases:\n", + " # Test with all tools\n", + " llm_all = llm.bind_tools(ALL_TOOLS)\n", + " response_all = llm_all.invoke([\n", + " SystemMessage(content=\"You are a class scheduling agent.\"),\n", + " HumanMessage(content=query)\n", + " ])\n", + " selected_all = response_all.tool_calls[0]['name'] if response_all.tool_calls else None\n", + " \n", + " # Test with filtered tools\n", + " filtered = select_tools_by_keywords(query)\n", + " llm_filtered = llm.bind_tools(filtered)\n", + " response_filtered = llm_filtered.invoke([\n", + " SystemMessage(content=\"You are a class scheduling agent.\"),\n", + " HumanMessage(content=query)\n", + " ])\n", + " selected_filtered = response_filtered.tool_calls[0]['name'] if response_filtered.tool_calls else None\n", + " \n", + " # Check correctness\n", + " if selected_all == expected_tool:\n", + " correct_all += 1\n", + " if selected_filtered == expected_tool:\n", + " correct_filtered += 1\n", + " \n", + " print(f\"Query: {query}\")\n", + " print(f\" Expected: {expected_tool}\")\n", + " print(f\" All tools: {selected_all} {'โœ…' if selected_all == expected_tool else 'โŒ'}\")\n", + " print(f\" Filtered: {selected_filtered} {'โœ…' if selected_filtered == expected_tool else 'โŒ'}\")\n", + " print()\n", + "\n", + "print(\"=\" * 80)\n", + "print(f\"\\nAccuracy with all tools: {correct_all}/{len(test_cases)} ({correct_all/len(test_cases)*100:.0f}%)\")\n", + "print(f\"Accuracy with filtered tools: {correct_filtered}/{len(test_cases)} ({correct_filtered/len(test_cases)*100:.0f}%)\")\n", + "\n", + "print(\"\\nโœ… Tool filtering improves:\")\n", + "print(\" - Selection accuracy\")\n", + "print(\" - Token efficiency\")\n", + "print(\" - Processing speed\")\n", + "\n", + "print(\"\\n\" + \"=\" * 80)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Key Takeaways\n", + "\n", + "### When to Use Tool Filtering\n", + "\n", + "**Use tool filtering when:**\n", + "- โœ… You have 10+ tools\n", + "- โœ… Tools have distinct use cases\n", + "- โœ… Token budget is tight\n", + "- โœ… Tool confusion is an issue\n", + "\n", + "**Don't filter when:**\n", + "- โŒ You have < 5 tools\n", + "- โŒ All tools are frequently used\n", + "- โŒ Tools are highly related\n", + "\n", + "### Filtering Strategies\n", + "\n", + "**1. Keyword-based (Simple)**\n", + "- โœ… Fast, no LLM call\n", + "- โœ… Easy to implement\n", + "- โš ๏ธ Can be brittle\n", + "\n", + "**2. Intent classification (Better)**\n", + "- โœ… More accurate\n", + "- โœ… Handles variations\n", + "- โš ๏ธ Requires LLM call\n", + "\n", + "**3. Hierarchical (Best for many tools)**\n", + "- โœ… Scales well\n", + "- โœ… User-guided\n", + "- โš ๏ธ More complex\n", + "\n", + "### Implementation Tips\n", + "\n", + "1. **Group logically** - Organize tools by use case\n", + "2. **Start simple** - Use keyword filtering first\n", + "3. **Measure impact** - Track accuracy and token usage\n", + "4. **Iterate** - Refine based on real usage\n", + "5. **Have fallback** - Default to search tools if unsure\n", + "\n", + "### Token Savings\n", + "\n", + "Typical tool schema: ~100 tokens\n", + "\n", + "**Example:**\n", + "- 30 tools ร— 100 tokens = 3,000 tokens\n", + "- Filtered to 5 tools ร— 100 tokens = 500 tokens\n", + "- **Savings: 2,500 tokens per request!**\n", + "\n", + "Over 1,000 requests:\n", + "- Savings: 2.5M tokens\n", + "- Cost savings: ~$5-10 (depending on model)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Exercises\n", + "\n", + "1. **Create tool groups**: Organize your agent's tools into logical groups. How many groups make sense?\n", + "\n", + "2. **Implement filtering**: Add keyword-based filtering to your agent. Measure token savings.\n", + "\n", + "3. **Test accuracy**: Create 20 test queries. Does filtering improve or hurt tool selection accuracy?\n", + "\n", + "4. **Hierarchical design**: Design a hierarchical tool structure for a complex agent with 30+ tools." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Summary\n", + "\n", + "In this notebook, you learned:\n", + "\n", + "- โœ… Tool filtering reduces token usage and confusion\n", + "- โœ… The tool shed pattern: show only relevant tools\n", + "- โœ… Multiple filtering strategies: keywords, intent, hierarchical\n", + "- โœ… Filtering improves accuracy and efficiency\n", + "- โœ… Essential for agents with many tools\n", + "\n", + "**Key insight:** Don't show all tools all the time. Selective tool exposure based on context improves tool selection, reduces token usage, and makes your agent more efficient. This is especially important as your agent grows and accumulates more tools." + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.0" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} + diff --git a/python-recipes/context-engineering/notebooks_archive/section-4-optimizations/05_crafting_data_for_llms.ipynb b/python-recipes/context-engineering/notebooks_archive/section-4-optimizations/05_crafting_data_for_llms.ipynb new file mode 100644 index 00000000..7c7494a9 --- /dev/null +++ b/python-recipes/context-engineering/notebooks_archive/section-4-optimizations/05_crafting_data_for_llms.ipynb @@ -0,0 +1,840 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Crafting Data for LLMs: Creating Structured Views\n", + "\n", + "## Introduction\n", + "\n", + "In this advanced notebook, you'll learn how to create structured \"views\" or \"dashboards\" of data specifically optimized for LLM consumption. This goes beyond simple chunking and retrieval - you'll pre-compute summaries and organize data in ways that give your agent a high-level understanding while keeping token usage low.\n", + "\n", + "### What You'll Learn\n", + "\n", + "- Why pre-computed views matter\n", + "- How to create course catalog summary views\n", + "- How to build user profile views\n", + "- Techniques for retrieve โ†’ summarize โ†’ stitch โ†’ save\n", + "- When to use structured views vs. RAG\n", + "\n", + "### Prerequisites\n", + "\n", + "- Completed all Section 3 notebooks\n", + "- Completed Section 4 notebooks 01-03\n", + "- Redis 8 running locally\n", + "- Agent Memory Server running\n", + "- OpenAI API key set" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Concepts: Structured Data Views\n", + "\n", + "### Beyond Chunking and RAG\n", + "\n", + "Traditional approaches:\n", + "- **Chunking**: Split documents into pieces, retrieve relevant chunks\n", + "- **RAG**: Search for relevant documents/records on each query\n", + "\n", + "These work well, but have limitations:\n", + "- โŒ No high-level overview\n", + "- โŒ May miss important context\n", + "- โŒ Requires search on every request\n", + "- โŒ Can't see relationships across data\n", + "\n", + "### Structured Views Approach\n", + "\n", + "**Pre-compute summaries** that give the LLM:\n", + "- โœ… High-level overview of entire dataset\n", + "- โœ… Organized, structured information\n", + "- โœ… Key metadata for finding details\n", + "- โœ… Relationships between entities\n", + "\n", + "### Two Key Patterns\n", + "\n", + "#### 1. Course Catalog Summary View\n", + "\n", + "Instead of searching courses every time, give the agent:\n", + "```\n", + "Course Catalog Overview:\n", + "\n", + "Computer Science (50 courses):\n", + "- CS101: Intro to Programming (3 credits, beginner)\n", + "- CS201: Data Structures (3 credits, intermediate)\n", + "- CS401: Machine Learning (4 credits, advanced)\n", + "...\n", + "\n", + "Mathematics (30 courses):\n", + "- MATH101: Calculus I (4 credits, beginner)\n", + "...\n", + "```\n", + "\n", + "**Benefits:**\n", + "- Agent knows what's available\n", + "- Can reference specific courses\n", + "- Can suggest alternatives\n", + "- Compact (1-2K tokens for 100s of courses)\n", + "\n", + "#### 2. User Profile View\n", + "\n", + "Instead of searching memories every time, give the agent:\n", + "```\n", + "Student Profile: student_123\n", + "\n", + "Academic Info:\n", + "- Major: Computer Science\n", + "- Year: Junior\n", + "- GPA: 3.7\n", + "- Expected Graduation: Spring 2026\n", + "\n", + "Completed Courses (12):\n", + "- CS101 (A), CS201 (A-), CS301 (B+)\n", + "- MATH101 (A), MATH201 (B)\n", + "...\n", + "\n", + "Preferences:\n", + "- Prefers online courses\n", + "- Morning classes only\n", + "- No classes on Fridays\n", + "- Interested in AI/ML\n", + "\n", + "Goals:\n", + "- Graduate in 2026\n", + "- Focus on machine learning\n", + "- Maintain 3.5+ GPA\n", + "```\n", + "\n", + "**Benefits:**\n", + "- Agent has complete user context\n", + "- No need to search memories\n", + "- Personalized from turn 1\n", + "- Compact (500-1K tokens)\n", + "\n", + "### The Pattern: Retrieve โ†’ Summarize โ†’ Stitch โ†’ Save\n", + "\n", + "1. **Retrieve**: Get all relevant data from storage\n", + "2. **Summarize**: Use LLM to create concise summaries\n", + "3. **Stitch**: Combine summaries into structured view\n", + "4. **Save**: Store as string or JSON blob\n", + "\n", + "### When to Use Structured Views\n", + "\n", + "**Use structured views when:**\n", + "- โœ… Data changes infrequently\n", + "- โœ… Agent needs overview + details\n", + "- โœ… Same data used across many requests\n", + "- โœ… Relationships matter\n", + "\n", + "**Use RAG when:**\n", + "- โœ… Data changes frequently\n", + "- โœ… Dataset is huge (can't summarize all)\n", + "- โœ… Only need specific details\n", + "- โœ… Query-specific retrieval needed\n", + "\n", + "**Best: Combine both!**\n", + "- Structured view for overview\n", + "- RAG for specific details" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Setup" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import os\n", + "import json\n", + "import asyncio\n", + "from typing import List, Dict, Any\n", + "import tiktoken\n", + "from langchain_openai import ChatOpenAI\n", + "from langchain_core.messages import SystemMessage, HumanMessage\n", + "from redis_context_course import CourseManager, MemoryClient, MemoryClientConfig, redis_config\n", + "\n", + "# Initialize\n", + "course_manager = CourseManager()\n", + "# Initialize memory client with proper config\n", + "import os\n", + "config = MemoryClientConfig(\n", + " base_url=os.getenv(\"AGENT_MEMORY_URL\", \"http://localhost:8088\"),\n", + " default_namespace=\"redis_university\"\n", + ")\n", + "memory_client = MemoryClient(config=config)\n", + "llm = ChatOpenAI(model=\"gpt-4o\", temperature=0)\n", + "tokenizer = tiktoken.encoding_for_model(\"gpt-4o\")\n", + "redis_client = redis_config.redis_client\n", + "\n", + "def count_tokens(text: str) -> int:\n", + " return len(tokenizer.encode(text))\n", + "\n", + "print(\"โœ… Setup complete\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Example 1: Course Catalog Summary View\n", + "\n", + "Let's create a high-level summary of the entire course catalog." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Step 1: Retrieve All Courses" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "print(\"=\" * 80)\n", + "print(\"CREATING COURSE CATALOG SUMMARY VIEW\")\n", + "print(\"=\" * 80)\n", + "\n", + "# Step 1: Retrieve all courses\n", + "print(\"\\n1. Retrieving all courses...\")\n", + "all_courses = await course_manager.get_all_courses()\n", + "print(f\" Retrieved {len(all_courses)} courses\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Step 2: Organize by Department" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Step 2: Organize by department\n", + "print(\"\\n2. Organizing by department...\")\n", + "by_department = {}\n", + "for course in all_courses:\n", + " dept = course.department\n", + " if dept not in by_department:\n", + " by_department[dept] = []\n", + " by_department[dept].append(course)\n", + "\n", + "print(f\" Found {len(by_department)} departments\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Step 3: Summarize Each Department" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Step 3: Summarize each department\n", + "print(\"\\n3. Creating summaries for each department...\")\n", + "\n", + "async def summarize_department(dept_name: str, courses: List) -> str:\n", + " \"\"\"Create a concise summary of courses in a department.\"\"\"\n", + " \n", + " # Build course list\n", + " course_list = \"\\n\".join([\n", + " f\"- {c.course_code}: {c.title} ({c.credits} credits, {c.difficulty_level.value})\"\n", + " for c in courses[:10] # Limit for demo\n", + " ])\n", + " \n", + " # Ask LLM to create one-sentence descriptions\n", + " prompt = f\"\"\"Create a one-sentence description for each course. Be concise.\n", + "\n", + "Courses:\n", + "{course_list}\n", + "\n", + "Format: COURSE_CODE: One sentence description\n", + "\"\"\"\n", + " \n", + " messages = [\n", + " SystemMessage(content=\"You are a helpful assistant that creates concise course descriptions.\"),\n", + " HumanMessage(content=prompt)\n", + " ]\n", + " \n", + " response = llm.invoke(messages)\n", + " return response.content\n", + "\n", + "# Summarize first 3 departments (for demo)\n", + "dept_summaries = {}\n", + "for dept_name in list(by_department.keys())[:3]:\n", + " print(f\" Summarizing {dept_name}...\")\n", + " summary = await summarize_department(dept_name, by_department[dept_name])\n", + " dept_summaries[dept_name] = summary\n", + " await asyncio.sleep(0.5) # Rate limiting\n", + "\n", + "print(f\" Created {len(dept_summaries)} department summaries\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Step 4: Stitch Into Complete View" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Step 4: Stitch into complete view\n", + "print(\"\\n4. Stitching into complete catalog view...\")\n", + "\n", + "catalog_view_parts = [\"Redis University Course Catalog\\n\" + \"=\" * 40 + \"\\n\"]\n", + "\n", + "for dept_name, summary in dept_summaries.items():\n", + " course_count = len(by_department[dept_name])\n", + " catalog_view_parts.append(f\"\\n{dept_name} ({course_count} courses):\")\n", + " catalog_view_parts.append(summary)\n", + "\n", + "catalog_view = \"\\n\".join(catalog_view_parts)\n", + "\n", + "print(f\" View created!\")\n", + "print(f\" Total tokens: {count_tokens(catalog_view):,}\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Step 5: Save to Redis" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Step 5: Save to Redis\n", + "print(\"\\n5. Saving to Redis...\")\n", + "\n", + "redis_client.set(\"course_catalog_view\", catalog_view)\n", + "\n", + "print(\" โœ… Saved to Redis as 'course_catalog_view'\")\n", + "\n", + "# Display the view\n", + "print(\"\\n\" + \"=\" * 80)\n", + "print(\"COURSE CATALOG VIEW\")\n", + "print(\"=\" * 80)\n", + "print(catalog_view)\n", + "print(\"\\n\" + \"=\" * 80)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Using the Catalog View" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Load and use the view\n", + "print(\"\\nUsing the catalog view in an agent...\\n\")\n", + "\n", + "catalog_view = redis_client.get(\"course_catalog_view\") or \"\"\n", + "\n", + "# Define a tool for retrieving course details by course code\n", + "from langchain_core.tools import tool\n", + "from typing import List\n", + "\n", + "@tool\n", + "async def get_course_details(course_codes: List[str]) -> str:\n", + " \"\"\"Get detailed information about one or more courses by their course codes.\n", + " \n", + " Args:\n", + " course_codes: List of course codes (e.g., ['CS101', 'MATH201'])\n", + " \n", + " Returns:\n", + " Formatted string with detailed course information\n", + " \"\"\"\n", + " if not course_codes:\n", + " return \"No course codes provided.\"\n", + " \n", + " result = []\n", + " for code in course_codes:\n", + " course = await course_manager.get_course_by_code(code)\n", + " if course:\n", + " result.append(f\"\"\"Course: {course.course_code} - {course.title}\n", + "Department: {course.department}\n", + "Description: {course.description}\n", + "Credits: {course.credits} | Difficulty: {course.difficulty_level}\n", + "Format: {course.format}\n", + "Instructor: {course.instructor}\n", + "Prerequisites: {', '.join([p.course_code for p in course.prerequisites]) if course.prerequisites else 'None'}\"\"\")\n", + " else:\n", + " result.append(f\"Course {code}: Not found\")\n", + " \n", + " return \"\\n\\n\".join(result)\n", + "\n", + "# Bind the tool to the LLM\n", + "llm_with_tools = llm.bind_tools([get_course_details])\n", + "\n", + "system_prompt = f\"\"\"You are a class scheduling agent for Redis University.\n", + "\n", + "{catalog_view}\n", + "\n", + "Use this overview to help students understand what's available.\n", + "When students ask about specific courses, use the get_course_details tool with the course codes from the overview above.\n", + "\"\"\"\n", + "\n", + "user_query = \"What departments offer courses? I'm interested in computer science.\"\n", + "\n", + "messages = [\n", + " SystemMessage(content=system_prompt),\n", + " HumanMessage(content=user_query)\n", + "]\n", + "\n", + "response = llm_with_tools.invoke(messages)\n", + "\n", + "print(f\"User: {user_query}\")\n", + "print(f\"\\nAgent: {response.content}\")\n", + "if response.tool_calls:\n", + " print(f\"\\n๐Ÿ”ง Agent wants to use tools: {[tc['name'] for tc in response.tool_calls]}\")\n", + "print(\"\\nโœ… Agent has high-level overview and can search for details!\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Example 2: User Profile View\n", + "\n", + "Let's create a comprehensive user profile from various data sources." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Step 1: Retrieve User Data\n", + "\n", + "**The Hard Part: Data Integration**\n", + "\n", + "In production, creating user profile views requires:\n", + "\n", + "1. **Data Pipeline Architecture**\n", + " - Pull from multiple systems: Student Information System (SIS), Learning Management System (LMS), registration database, etc.\n", + " - Handle different data formats, APIs, and update frequencies\n", + " - Deal with data quality issues, missing fields, and inconsistencies\n", + "\n", + "2. **Scheduled Jobs**\n", + " - Nightly batch jobs to rebuild all profiles\n", + " - Incremental updates when specific events occur (course registration, grade posted)\n", + " - Balance freshness vs. computational cost\n", + "\n", + "3. **Data Selection Strategy**\n", + " - **What to include?** Not everything in your database belongs in the profile\n", + " - **What to exclude?** PII, irrelevant historical data, system metadata\n", + " - **What to aggregate?** Raw grades vs. GPA, individual courses vs. course count\n", + " - **What to denormalize?** Join course codes with titles, departments, etc.\n", + "\n", + "4. **Real-World Complexity**\n", + " - Students may have data in multiple systems that need reconciliation\n", + " - Historical data may use different course codes or structures\n", + " - Some data may be sensitive and require access controls\n", + " - Profile size must be managed (can't include every interaction)\n", + "\n", + "**For this demo**, we simulate the *output* of such a pipeline - a clean, structured dataset ready for profile creation. In production, getting to this point is often the hardest part!" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "print(\"\\n\" + \"=\" * 80)\n", + "print(\"CREATING USER PROFILE VIEW\")\n", + "print(\"=\" * 80)\n", + "\n", + "# Step 1: Retrieve user data from various sources\n", + "print(\"\\n1. Retrieving user data...\")\n", + "\n", + "# In production, this data comes from a data pipeline that:\n", + "# - Queries multiple systems (SIS, LMS, registration DB)\n", + "# - Joins and denormalizes data\n", + "# - Filters to relevant fields only\n", + "# - Runs on a schedule (nightly batch or event-triggered)\n", + "# For this demo, we simulate the pipeline's output:\n", + "user_data = {\n", + " \"student_id\": \"student_123\",\n", + " \"name\": \"Alex Johnson\",\n", + " \"major\": \"Computer Science\",\n", + " \"year\": \"Junior\",\n", + " \"gpa\": 3.7,\n", + " \"expected_graduation\": \"Spring 2026\",\n", + " \"completed_courses\": [\n", + " {\"code\": \"CS101\", \"title\": \"Intro to Programming\", \"grade\": \"A\"},\n", + " {\"code\": \"CS201\", \"title\": \"Data Structures\", \"grade\": \"A-\"},\n", + " {\"code\": \"CS301\", \"title\": \"Algorithms\", \"grade\": \"B+\"},\n", + " {\"code\": \"MATH101\", \"title\": \"Calculus I\", \"grade\": \"A\"},\n", + " {\"code\": \"MATH201\", \"title\": \"Calculus II\", \"grade\": \"B\"},\n", + " ],\n", + " \"current_courses\": [\n", + " \"CS401\", \"CS402\", \"MATH301\"\n", + " ]\n", + "}\n", + "\n", + "# Get memories\n", + "memories = await memory_client.search_long_term_memory(\n", + " text=\"\", # Get all\n", + " limit=20\n", + ")\n", + "\n", + "print(f\" Retrieved user data and {len(memories.memories)} memories\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Step 2: Summarize Each Section" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Step 2: Create summaries for each section\n", + "print(\"\\n2. Creating section summaries...\")\n", + "\n", + "# Academic info (structured, no LLM needed)\n", + "academic_info = f\"\"\"Academic Info:\n", + "- Major: {user_data['major']}\n", + "- Year: {user_data['year']}\n", + "- GPA: {user_data['gpa']}\n", + "- Expected Graduation: {user_data['expected_graduation']}\n", + "\"\"\"\n", + "\n", + "# Completed courses (structured)\n", + "completed_courses = \"Completed Courses (\" + str(len(user_data['completed_courses'])) + \"):\\n\"\n", + "completed_courses += \"\\n\".join([\n", + " f\"- {c['code']}: {c['title']} (Grade: {c['grade']})\"\n", + " for c in user_data['completed_courses']\n", + "])\n", + "\n", + "# Current courses\n", + "current_courses = \"Current Courses:\\n- \" + \", \".join(user_data['current_courses'])\n", + "\n", + "# Summarize memories with LLM\n", + "if memories.memories:\n", + " memory_text = \"\\n\".join([f\"- {m.text}\" for m in memories.memories[:10]])\n", + " \n", + " prompt = f\"\"\"Summarize these student memories into two sections:\n", + "1. Preferences (course format, schedule, etc.)\n", + "2. Goals (academic, career, etc.)\n", + "\n", + "Be concise. Use bullet points.\n", + "\n", + "Memories:\n", + "{memory_text}\n", + "\"\"\"\n", + " \n", + " messages = [\n", + " SystemMessage(content=\"You are a helpful assistant that summarizes student information.\"),\n", + " HumanMessage(content=prompt)\n", + " ]\n", + " \n", + " response = llm.invoke(messages)\n", + " preferences_and_goals = response.content\n", + "else:\n", + " preferences_and_goals = \"Preferences:\\n- None recorded\\n\\nGoals:\\n- None recorded\"\n", + "\n", + "print(\" Created all section summaries\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Step 3: Stitch Into Profile View" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Step 3: Stitch into complete profile\n", + "print(\"\\n3. Stitching into complete profile view...\")\n", + "\n", + "profile_view = f\"\"\"Student Profile: {user_data['student_id']}\n", + "{'=' * 50}\n", + "\n", + "{academic_info}\n", + "\n", + "{completed_courses}\n", + "\n", + "{current_courses}\n", + "\n", + "{preferences_and_goals}\n", + "\"\"\"\n", + "\n", + "print(f\" Profile created!\")\n", + "print(f\" Total tokens: {count_tokens(profile_view):,}\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Step 4: Save as JSON" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Step 4: Save to Redis (as JSON for structured access)\n", + "print(\"\\n4. Saving to Redis...\")\n", + "\n", + "profile_data = {\n", + " \"student_id\": user_data['student_id'],\n", + " \"profile_text\": profile_view,\n", + " \"last_updated\": \"2024-09-30\",\n", + " \"token_count\": count_tokens(profile_view)\n", + "}\n", + "\n", + "redis_client.set(\n", + " f\"user_profile:{user_data['student_id']}\",\n", + " json.dumps(profile_data)\n", + ")\n", + "\n", + "print(f\" โœ… Saved to Redis as 'user_profile:{user_data['student_id']}'\")\n", + "\n", + "# Display the profile\n", + "print(\"\\n\" + \"=\" * 80)\n", + "print(\"USER PROFILE VIEW\")\n", + "print(\"=\" * 80)\n", + "print(profile_view)\n", + "print(\"=\" * 80)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Using the Profile View" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Load and use the profile\n", + "print(\"\\nUsing the profile view in an agent...\\n\")\n", + "\n", + "profile_data = redis_client.get(f\"user_profile:{user_data['student_id']}\")\n", + "profile_json = json.loads(profile_data) if profile_data else {}\n", + "profile_text = profile_json.get('profile_text', 'No profile available')\n", + "\n", + "system_prompt = f\"\"\"You are a class scheduling agent for Redis University.\n", + "\n", + "{profile_text}\n", + "\n", + "Use this profile to provide personalized recommendations.\n", + "\"\"\"\n", + "\n", + "user_query = \"What courses should I take next semester?\"\n", + "\n", + "messages = [\n", + " SystemMessage(content=system_prompt),\n", + " HumanMessage(content=user_query)\n", + "]\n", + "\n", + "response = llm.invoke(messages)\n", + "\n", + "print(f\"User: {user_query}\")\n", + "print(f\"\\nAgent: {response.content}\")\n", + "print(\"\\nโœ… Agent has complete user context from turn 1!\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Key Takeaways\n", + "\n", + "### The Pattern: Retrieve โ†’ Summarize โ†’ Stitch โ†’ Save\n", + "\n", + "1. **Retrieve**: Get all relevant data\n", + " - From databases, APIs, memories\n", + " - Organize by category/section\n", + "\n", + "2. **Summarize**: Create concise summaries\n", + " - Use LLM for complex data\n", + " - Use templates for structured data\n", + " - Keep it compact (one-sentence descriptions)\n", + "\n", + "3. **Stitch**: Combine into complete view\n", + " - Organize logically\n", + " - Add headers and structure\n", + " - Format for LLM consumption\n", + "\n", + "4. **Save**: Store for reuse\n", + " - Redis for fast access\n", + " - String or JSON format\n", + " - Include metadata (timestamp, token count)\n", + "\n", + "### When to Refresh Views\n", + "\n", + "**Course Catalog View:**\n", + "- When courses are added/removed\n", + "- When descriptions change\n", + "- Typically: Daily or weekly\n", + "\n", + "**User Profile View:**\n", + "- When user completes a course\n", + "- When preferences change\n", + "- When new memories are added\n", + "- Typically: After each session or daily\n", + "\n", + "### Scheduling Considerations\n", + "\n", + "In production, you'd use:\n", + "- **Cron jobs** for periodic updates\n", + "- **Event triggers** for immediate updates\n", + "- **Background workers** for async processing\n", + "\n", + "For this course, we focus on the **function-level logic**, not the scheduling infrastructure.\n", + "\n", + "### Benefits of Structured Views\n", + "\n", + "โœ… **Performance:**\n", + "- No search needed on every request\n", + "- Pre-computed, ready to use\n", + "- Fast retrieval from Redis\n", + "\n", + "โœ… **Quality:**\n", + "- Agent has complete overview\n", + "- Better context understanding\n", + "- More personalized responses\n", + "\n", + "โœ… **Efficiency:**\n", + "- Compact token usage\n", + "- Organized information\n", + "- Easy to maintain\n", + "\n", + "### Combining with RAG\n", + "\n", + "**Best practice: Use both!**\n", + "\n", + "```python\n", + "# Load structured views\n", + "catalog_view = load_catalog_view()\n", + "profile_view = load_profile_view(user_id)\n", + "\n", + "# Add targeted RAG\n", + "relevant_courses = search_courses(query, limit=3)\n", + "\n", + "# Combine\n", + "context = f\"\"\"\n", + "{catalog_view}\n", + "\n", + "{profile_view}\n", + "\n", + "Relevant courses for this query:\n", + "{relevant_courses}\n", + "\"\"\"\n", + "```\n", + "\n", + "This gives you:\n", + "- Overview (from views)\n", + "- Personalization (from profile)\n", + "- Specific details (from RAG)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Exercises\n", + "\n", + "1. **Create a department view**: Build a detailed view for a single department with all its courses.\n", + "\n", + "2. **Build a schedule view**: Create a view of a student's current schedule with times, locations, and conflicts.\n", + "\n", + "3. **Optimize token usage**: Experiment with different summary lengths. What's the sweet spot?\n", + "\n", + "4. **Implement refresh logic**: Write a function that determines when a view needs to be refreshed." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Summary\n", + "\n", + "In this notebook, you learned:\n", + "\n", + "- โœ… Structured views provide high-level overviews for LLMs\n", + "- โœ… The pattern: Retrieve โ†’ Summarize โ†’ Stitch โ†’ Save\n", + "- โœ… Course catalog views give agents complete course knowledge\n", + "- โœ… User profile views enable personalization from turn 1\n", + "- โœ… Combine views with RAG for best results\n", + "\n", + "**Key insight:** Pre-computing structured views is an advanced technique that goes beyond simple RAG. It gives your agent a \"mental model\" of the domain, enabling better understanding and more intelligent responses." + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.0" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} diff --git a/python-recipes/context-engineering/notebooks_archive/section-5-advanced-techniques/01_tool_loadout.ipynb b/python-recipes/context-engineering/notebooks_archive/section-5-advanced-techniques/01_tool_loadout.ipynb new file mode 100644 index 00000000..229e32ba --- /dev/null +++ b/python-recipes/context-engineering/notebooks_archive/section-5-advanced-techniques/01_tool_loadout.ipynb @@ -0,0 +1,355 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "![Redis](https://redis.io/wp-content/uploads/2024/04/Logotype.svg?auto=webp&quality=85,75&width=120)\n", + "\n", + "# Tool Loadout: Dynamic Tool Selection\n", + "\n", + "## Learning Objectives (35 minutes)\n", + "By the end of this notebook, you will be able to:\n", + "1. **Understand** the concept of tool loadout and why it matters for agent performance\n", + "2. **Implement** semantic tool selection using vector similarity\n", + "3. **Apply** dynamic tool filtering based on user intent and context\n", + "4. **Optimize** agent performance by reducing tool confusion and token usage\n", + "5. **Design** tool recommendation systems for large tool inventories\n", + "\n", + "## Prerequisites\n", + "- Completed Sections 1-4 of the Context Engineering course\n", + "- Understanding of vector embeddings and semantic search\n", + "- Familiarity with LangChain tools and function calling\n", + "\n", + "---\n", + "\n", + "## Introduction\n", + "\n", + "**Tool Loadout** is the practice of selecting only the most relevant tools for a given task, rather than providing an agent with access to all available tools. The term comes from gaming, where players select specific combinations of weapons and equipment before a mission.\n", + "\n", + "### Why Tool Loadout Matters\n", + "\n", + "Research shows that agent performance degrades significantly when given too many tools:\n", + "\n", + "- **DeepSeek-v3**: Performance drops after 30 tools, fails completely with 100+ tools\n", + "- **Llama 3.1 8B**: Fails benchmarks with 46 tools, succeeds with only 19 tools\n", + "- **Context Confusion**: Too many similar tools create decision paralysis\n", + "- **Token Waste**: Unused tool descriptions consume valuable context space\n", + "\n", + "### The Tool Loadout Solution\n", + "\n", + "Instead of giving agents access to all tools, we:\n", + "1. **Analyze the user's request** to understand intent\n", + "2. **Select relevant tools** using semantic similarity\n", + "3. **Provide only the necessary tools** to the agent\n", + "4. **Optimize for both accuracy and efficiency**\n", + "\n", + "## Environment Setup\n", + "\n", + "Let's set up our environment for tool loadout experiments:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Environment setup\n", + "import os\n", + "import asyncio\n", + "from dotenv import load_dotenv\n", + "from typing import List, Dict, Any, Optional\n", + "import json\n", + "from dataclasses import dataclass\n", + "import numpy as np\n", + "from sklearn.metrics.pairwise import cosine_similarity\n", + "\n", + "# Load environment variables\n", + "load_dotenv()\n", + "\n", + "REDIS_URL = os.getenv(\"REDIS_URL\", \"redis://localhost:6379\")\n", + "OPENAI_API_KEY = os.getenv(\"OPENAI_API_KEY\")\n", + "\n", + "print(\"๐Ÿ”ง Environment Setup\")\n", + "print(\"=\" * 30)\n", + "print(f\"Redis URL: {REDIS_URL}\")\n", + "print(f\"OpenAI API Key: {'โœ… Set' if OPENAI_API_KEY else 'โŒ Not set'}\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Import required modules\n", + "try:\n", + " from openai import OpenAI\n", + " from redis_context_course.models import Course, StudentProfile\n", + " from redis_context_course.course_manager import CourseManager\n", + " \n", + " # Initialize OpenAI client\n", + " if OPENAI_API_KEY:\n", + " openai_client = OpenAI(api_key=OPENAI_API_KEY)\n", + " print(\"โœ… OpenAI client initialized\")\n", + " else:\n", + " openai_client = None\n", + " print(\"โš ๏ธ OpenAI client not available (API key not set)\")\n", + " \n", + " print(\"โœ… Core modules imported successfully\")\n", + " \n", + "except ImportError as e:\n", + " print(f\"โŒ Import failed: {e}\")\n", + " print(\"Please ensure you've completed the setup from previous sections.\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Tool Definition and Management\n", + "\n", + "Let's start by defining a comprehensive set of tools that our agent might have access to:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "@dataclass\n", + "class ToolDefinition:\n", + " \"\"\"Represents a tool with its metadata for selection.\"\"\"\n", + " name: str\n", + " description: str\n", + " category: str\n", + " parameters: Dict[str, Any]\n", + " embedding: Optional[np.ndarray] = None\n", + " usage_frequency: int = 0\n", + " \n", + " def to_openai_format(self) -> Dict[str, Any]:\n", + " \"\"\"Convert to OpenAI function calling format.\"\"\"\n", + " return {\n", + " \"type\": \"function\",\n", + " \"function\": {\n", + " \"name\": self.name,\n", + " \"description\": self.description,\n", + " \"parameters\": self.parameters\n", + " }\n", + " }\n", + "\n", + "# Define a comprehensive tool inventory\n", + "TOOL_INVENTORY = [\n", + " # Course Management Tools\n", + " ToolDefinition(\n", + " name=\"search_courses\",\n", + " description=\"Search for courses using semantic similarity and filters. Use for finding courses by topic, difficulty, or format.\",\n", + " category=\"course_management\",\n", + " parameters={\n", + " \"type\": \"object\",\n", + " \"properties\": {\n", + " \"query\": {\"type\": \"string\", \"description\": \"Search query for courses\"},\n", + " \"limit\": {\"type\": \"integer\", \"description\": \"Maximum number of results\"}\n", + " },\n", + " \"required\": [\"query\"]\n", + " }\n", + " ),\n", + " ToolDefinition(\n", + " name=\"get_course_details\",\n", + " description=\"Get detailed information about a specific course including prerequisites, schedule, and enrollment.\",\n", + " category=\"course_management\",\n", + " parameters={\n", + " \"type\": \"object\",\n", + " \"properties\": {\n", + " \"course_code\": {\"type\": \"string\", \"description\": \"Course code (e.g., CS101)\"}\n", + " },\n", + " \"required\": [\"course_code\"]\n", + " }\n", + " ),\n", + " ToolDefinition(\n", + " name=\"check_prerequisites\",\n", + " description=\"Check if a student meets the prerequisites for a specific course.\",\n", + " category=\"course_management\",\n", + " parameters={\n", + " \"type\": \"object\",\n", + " \"properties\": {\n", + " \"course_code\": {\"type\": \"string\", \"description\": \"Course code to check\"},\n", + " \"student_courses\": {\"type\": \"array\", \"items\": {\"type\": \"string\"}, \"description\": \"List of completed courses\"}\n", + " },\n", + " \"required\": [\"course_code\", \"student_courses\"]\n", + " }\n", + " ),\n", + " \n", + " # Student Profile Tools\n", + " ToolDefinition(\n", + " name=\"get_student_profile\",\n", + " description=\"Retrieve comprehensive student profile including academic history, preferences, and goals.\",\n", + " category=\"student_management\",\n", + " parameters={\n", + " \"type\": \"object\",\n", + " \"properties\": {\n", + " \"student_id\": {\"type\": \"string\", \"description\": \"Student identifier\"}\n", + " },\n", + " \"required\": [\"student_id\"]\n", + " }\n", + " ),\n", + " ToolDefinition(\n", + " name=\"update_student_preferences\",\n", + " description=\"Update student preferences for course format, difficulty, or schedule.\",\n", + " category=\"student_management\",\n", + " parameters={\n", + " \"type\": \"object\",\n", + " \"properties\": {\n", + " \"student_id\": {\"type\": \"string\", \"description\": \"Student identifier\"},\n", + " \"preferences\": {\"type\": \"object\", \"description\": \"Preference updates\"}\n", + " },\n", + " \"required\": [\"student_id\", \"preferences\"]\n", + " }\n", + " ),\n", + " \n", + " # Academic Planning Tools\n", + " ToolDefinition(\n", + " name=\"generate_degree_plan\",\n", + " description=\"Generate a comprehensive degree completion plan based on student's major and progress.\",\n", + " category=\"academic_planning\",\n", + " parameters={\n", + " \"type\": \"object\",\n", + " \"properties\": {\n", + " \"student_id\": {\"type\": \"string\", \"description\": \"Student identifier\"},\n", + " \"target_graduation\": {\"type\": \"string\", \"description\": \"Target graduation date\"}\n", + " },\n", + " \"required\": [\"student_id\"]\n", + " }\n", + " ),\n", + " ToolDefinition(\n", + " name=\"check_graduation_requirements\",\n", + " description=\"Check progress toward graduation requirements for a specific major.\",\n", + " category=\"academic_planning\",\n", + " parameters={\n", + " \"type\": \"object\",\n", + " \"properties\": {\n", + " \"student_id\": {\"type\": \"string\", \"description\": \"Student identifier\"},\n", + " \"major\": {\"type\": \"string\", \"description\": \"Academic major\"}\n", + " },\n", + " \"required\": [\"student_id\", \"major\"]\n", + " }\n", + " ),\n", + " \n", + " # Schedule Management Tools\n", + " ToolDefinition(\n", + " name=\"check_schedule_conflicts\",\n", + " description=\"Check for time conflicts between courses in a proposed schedule.\",\n", + " category=\"schedule_management\",\n", + " parameters={\n", + " \"type\": \"object\",\n", + " \"properties\": {\n", + " \"course_codes\": {\"type\": \"array\", \"items\": {\"type\": \"string\"}, \"description\": \"List of course codes\"}\n", + " },\n", + " \"required\": [\"course_codes\"]\n", + " }\n", + " ),\n", + " ToolDefinition(\n", + " name=\"find_available_sections\",\n", + " description=\"Find available sections for a course that fit student's schedule preferences.\",\n", + " category=\"schedule_management\",\n", + " parameters={\n", + " \"type\": \"object\",\n", + " \"properties\": {\n", + " \"course_code\": {\"type\": \"string\", \"description\": \"Course code\"},\n", + " \"time_preferences\": {\"type\": \"array\", \"items\": {\"type\": \"string\"}, \"description\": \"Preferred time slots\"}\n", + " },\n", + " \"required\": [\"course_code\"]\n", + " }\n", + " ),\n", + " \n", + " # Financial Tools\n", + " ToolDefinition(\n", + " name=\"calculate_tuition_cost\",\n", + " description=\"Calculate total tuition cost for a set of courses.\",\n", + " category=\"financial\",\n", + " parameters={\n", + " \"type\": \"object\",\n", + " \"properties\": {\n", + " \"course_codes\": {\"type\": \"array\", \"items\": {\"type\": \"string\"}, \"description\": \"List of course codes\"},\n", + " \"student_type\": {\"type\": \"string\", \"description\": \"Student type (undergraduate, graduate, etc.)\"}\n", + " },\n", + " \"required\": [\"course_codes\"]\n", + " }\n", + " ),\n", + " ToolDefinition(\n", + " name=\"check_financial_aid\",\n", + " description=\"Check available financial aid options for a student.\",\n", + " category=\"financial\",\n", + " parameters={\n", + " \"type\": \"object\",\n", + " \"properties\": {\n", + " \"student_id\": {\"type\": \"string\", \"description\": \"Student identifier\"}\n", + " },\n", + " \"required\": [\"student_id\"]\n", + " }\n", + " ),\n", + " \n", + " # Career Services Tools\n", + " ToolDefinition(\n", + " name=\"find_career_paths\",\n", + " description=\"Find career paths and job opportunities related to a student's major and interests.\",\n", + " category=\"career_services\",\n", + " parameters={\n", + " \"type\": \"object\",\n", + " \"properties\": {\n", + " \"major\": {\"type\": \"string\", \"description\": \"Academic major\"},\n", + " \"interests\": {\"type\": \"array\", \"items\": {\"type\": \"string\"}, \"description\": \"Student interests\"}\n", + " },\n", + " \"required\": [\"major\"]\n", + " }\n", + " ),\n", + " ToolDefinition(\n", + " name=\"recommend_internships\",\n", + " description=\"Recommend internship opportunities based on student profile and career goals.\",\n", + " category=\"career_services\",\n", + " parameters={\n", + " \"type\": \"object\",\n", + " \"properties\": {\n", + " \"student_id\": {\"type\": \"string\", \"description\": \"Student identifier\"},\n", + " \"career_goals\": {\"type\": \"array\", \"items\": {\"type\": \"string\"}, \"description\": \"Career goals\"}\n", + " },\n", + " \"required\": [\"student_id\"]\n", + " }\n", + " )\n", + "]\n", + "\n", + "print(f\"๐Ÿ“š Tool Inventory: {len(TOOL_INVENTORY)} tools defined\")\n", + "print(\"\\n๐Ÿ“‹ Tool Categories:\")\n", + "categories = {}\n", + "for tool in TOOL_INVENTORY:\n", + " categories[tool.category] = categories.get(tool.category, 0) + 1\n", + "\n", + "for category, count in categories.items():\n", + " print(f\" โ€ข {category}: {count} tools\")" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.12.6" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} diff --git a/python-recipes/context-engineering/reference-agent/.env.example b/python-recipes/context-engineering/reference-agent/.env.example new file mode 100644 index 00000000..babad405 --- /dev/null +++ b/python-recipes/context-engineering/reference-agent/.env.example @@ -0,0 +1,26 @@ +# Redis University Class Agent - Environment Configuration + +# OpenAI API Configuration +OPENAI_API_KEY=your_openai_api_key_here + +# Redis Configuration +REDIS_URL=redis://localhost:6379 +# For Redis Cloud, use: redis://username:password@host:port + +# Agent Memory Server Configuration +AGENT_MEMORY_URL=http://localhost:8088 + +# Vector Index Names +VECTOR_INDEX_NAME=course_catalog +MEMORY_INDEX_NAME=agent_memory + +# LangGraph Configuration +CHECKPOINT_NAMESPACE=class_agent + +# Optional: Logging Configuration +LOG_LEVEL=INFO + +# Optional: Agent Configuration +DEFAULT_STUDENT_ID=demo_student +MAX_CONVERSATION_LENGTH=20 +MEMORY_SIMILARITY_THRESHOLD=0.7 diff --git a/python-recipes/context-engineering/reference-agent/LICENSE b/python-recipes/context-engineering/reference-agent/LICENSE new file mode 100644 index 00000000..626b8bc9 --- /dev/null +++ b/python-recipes/context-engineering/reference-agent/LICENSE @@ -0,0 +1,21 @@ +MIT License + +Copyright (c) 2024 Redis Ltd. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff --git a/python-recipes/context-engineering/reference-agent/MANIFEST.in b/python-recipes/context-engineering/reference-agent/MANIFEST.in new file mode 100644 index 00000000..afa4f343 --- /dev/null +++ b/python-recipes/context-engineering/reference-agent/MANIFEST.in @@ -0,0 +1,23 @@ +# Include the README and license files +include README.md +include LICENSE +include requirements.txt +include .env.example + +# Include configuration files +include pyproject.toml +include setup.py + +# Include data files +recursive-include redis_context_course/data *.json +recursive-include redis_context_course/templates *.txt + +# Include test files +recursive-include tests *.py + +# Exclude development and build files +exclude .gitignore +exclude .env +recursive-exclude * __pycache__ +recursive-exclude * *.py[co] +recursive-exclude * .DS_Store diff --git a/python-recipes/context-engineering/reference-agent/QUICK_START.md b/python-recipes/context-engineering/reference-agent/QUICK_START.md new file mode 100644 index 00000000..7241ce90 --- /dev/null +++ b/python-recipes/context-engineering/reference-agent/QUICK_START.md @@ -0,0 +1,191 @@ +# Quick Start - Redis Context Course Agent + +Get the Redis Context Course agent running in under 10 minutes. + +## ๐Ÿš€ One-Command Setup + +```bash +# 1. Install package +pip install -e . + +# 2. Set your OpenAI API key +export OPENAI_API_KEY="sk-your-actual-key-here" + +# 3. Start Redis +docker run -d --name redis -p 6379:6379 redis:8-alpine + +# 4. Start Agent Memory Server +uv run agent-memory api --no-worker & + +# 5. Generate and ingest data +generate-courses --courses-per-major 15 --output course_catalog.json +ingest-courses --catalog course_catalog.json --clear + +# 6. Verify everything works +python simple_health_check.py + +# 7. Start the agent +redis-class-agent --student-id your_name +``` + +## โœ… Health Check First + +**Always start here** if you have any issues: + +```bash +python simple_health_check.py +``` + +This tells you exactly what's working and what needs to be fixed. + +## ๐ŸŽฏ Expected Output + +When everything is working: + +``` +Redis Context Course - Health Check +===================================== +โœ… Environment: All variables set +โœ… Redis: Connected +โœ… Courses: 75 found +โœ… Majors: 5 found +โœ… Course Search: Working +โœ… Agent: Working + +๐ŸŽฏ Status: READY +๐Ÿ“Š All checks passed! + +๐Ÿš€ Try: redis-class-agent --student-id your_name +``` + +## ๐Ÿ’ฌ Try These Queries + +Once the agent is running, try: + +``` +You: How many courses are available? +Agent: I found 75 courses across 5 different majors... + +You: Show me programming courses +Agent: Here are some programming courses I found... + +You: I'm interested in machine learning +Agent: Great! I'll remember your interest in machine learning... + +You: What should I take for computer science? +Agent: Based on your interest in machine learning and computer science... +``` + +## ๐Ÿ”ง Quick Fixes + +### "Environment: Missing OPENAI_API_KEY" +```bash +# Set your API key +export OPENAI_API_KEY="sk-your-actual-key-here" + +# Or edit .env file +echo "OPENAI_API_KEY=sk-your-actual-key-here" > .env +``` + +### "Redis: Connection failed" +```bash +# Start Redis +docker run -d --name redis -p 6379:6379 redis:8-alpine +``` + +### "Courses: None found" +```bash +# Generate and ingest data +generate-courses --courses-per-major 15 --output course_catalog.json +ingest-courses --catalog course_catalog.json --clear +``` + +### "Agent: Failed" +```bash +# Start Agent Memory Server +uv run agent-memory api --no-worker +``` + +## ๐Ÿ“š What You Get + +- **75 sample courses** across 5 majors +- **Semantic search** - find courses by description +- **Memory system** - remembers your preferences +- **Personalized recommendations** - suggests relevant courses +- **Interactive chat** - natural language interface + +## ๐ŸŽ“ Example Interaction + +``` +โ•ญโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€ ๐ŸŽ“ Class Agent โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ•ฎ +โ”‚ Welcome to Redis University โ”‚ +โ”‚ Class Agent! โ”‚ +โ•ฐโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ•ฏ + +You: I want to learn data science +Agent: I'll help you find data science courses! Let me search for relevant options... + +Found 8 data science related courses: + +**DS201: Introduction to Data Science** +Department: Data Science | Credits: 3 | Difficulty: Beginner +Description: Foundational course covering data collection, cleaning, analysis... + +**DS301: Machine Learning Fundamentals** +Department: Data Science | Credits: 4 | Difficulty: Intermediate +Description: Core machine learning algorithms and their applications... + +You: I prefer online courses +Agent: I'll remember that you prefer online courses. Let me filter for online data science options... + +You: What should I take first? +Agent: Based on your interest in data science and preference for online courses, I recommend starting with DS201: Introduction to Data Science. It's beginner-friendly and available online... +``` + +## ๐Ÿ› ๏ธ Development Mode + +For development and customization: + +```bash +# Install in development mode +pip install -e . + +# Run tests +pytest tests/ + +# Check code quality +python system_health_check.py --verbose + +# Explore examples +python examples/basic_usage.py +``` + +## ๐Ÿ“– Next Steps + +1. **Read the full README**: `README.md` +2. **Check examples**: `examples/` directory +3. **Customize the agent**: Modify `redis_context_course/agent.py` +4. **Explore the notebooks**: See `../notebooks/` for educational content + +## ๐Ÿ†˜ Need Help? + +1. **Run health check**: `python simple_health_check.py` +2. **Review logs**: Look for error messages in terminal +3. **Check Redis**: Ensure Redis is running on port 6379 +4. **Check Agent Memory Server**: Ensure it's running on port 8088 + +## ๐ŸŽ‰ Success! + +When you see this, you're ready to go: + +``` +๐ŸŽฏ Status: READY +๐Ÿ“Š All checks passed! +``` + +Start exploring with: +```bash +redis-class-agent --student-id your_name +``` + +Happy learning! ๐Ÿš€ diff --git a/python-recipes/context-engineering/reference-agent/README.md b/python-recipes/context-engineering/reference-agent/README.md new file mode 100644 index 00000000..155d0b2b --- /dev/null +++ b/python-recipes/context-engineering/reference-agent/README.md @@ -0,0 +1,486 @@ +# Redis Context Course - Reference Agent + +A complete reference implementation of a context-aware AI agent for university course recommendations and academic planning. This package demonstrates production-ready context engineering patterns using Redis, LangGraph, Agent Memory Server, and OpenAI. + +**๐ŸŽ“ Part of the [Context Engineering Course](../notebooks_v2/README.md)** - This reference agent provides reusable components used throughout the course notebooks. + +## Overview + +This package serves two purposes: + +1. **Educational Resource**: Provides production-ready components used in the [Context Engineering Course](../notebooks_v2/README.md) +2. **Reference Implementation**: Demonstrates best practices for building context-aware AI agents + +The course notebooks use this package as a foundation, importing components like `CourseManager`, `redis_config`, and data models while demonstrating how to build custom agents from scratch. + +## Features + +- ๐Ÿง  **Dual Memory System**: Working memory (task-focused) and long-term memory (cross-session knowledge) via Agent Memory Server +- ๐Ÿ” **Semantic Search**: Vector-based course discovery and recommendations using Redis and RedisVL +- ๐Ÿ› ๏ธ **Tool Integration**: Extensible tool system for course search and memory management +- ๐Ÿ’ฌ **Context Awareness**: Maintains student preferences, goals, and conversation history +- ๐ŸŽฏ **Personalized Recommendations**: AI-powered course suggestions based on student profile +- ๐Ÿ“š **Course Catalog Management**: Complete system for storing and retrieving course information +- โšก **Production-Ready**: Optimization helpers, token counting, and performance utilities + +## Installation + +### From PyPI (Recommended) + +```bash +pip install redis-context-course +``` + +### From Source + +```bash +git clone https://github.com/redis-developer/redis-ai-resources.git +cd redis-ai-resources/python-recipes/context-engineering/reference-agent +pip install -e . +``` + +## Quick Start + +### 1. Set Up Environment + +```bash +# Copy the example environment file +cp .env.example .env + +# Edit .env with your OpenAI API key and Redis URL +export OPENAI_API_KEY="your-openai-api-key" +export REDIS_URL="redis://localhost:6379" +``` + +### 2. Start Redis 8 + +For local development: +```bash +# Using Docker +docker run -d --name redis -p 6379:6379 redis:8-alpine + +# Or install Redis 8 locally +# See: https://redis.io/docs/latest/operate/oss_and_stack/install/ +``` + +### 3. Start Redis Agent Memory Server + +The agent uses [Redis Agent Memory Server](https://github.com/redis/agent-memory-server) for memory management: + +```bash +# Install Agent Memory Server +pip install agent-memory-server + +# Start the server (in a separate terminal) +uv run agent-memory api --no-worker + +# Or with Docker +docker run -d --name agent-memory \ + -p 8088:8000 \ + -e REDIS_URL=redis://host.docker.internal:6379 \ + -e OPENAI_API_KEY=your-key \ + redis/agent-memory-server +``` + +Set the Agent Memory Server URL (optional, defaults to localhost:8088): +```bash +export AGENT_MEMORY_URL="http://localhost:8088" +``` + +### 4. Generate Sample Data + +```bash +generate-courses --courses-per-major 15 --output course_catalog.json +``` + +### 5. Ingest Data into Redis + +```bash +ingest-courses --catalog course_catalog.json --clear +``` + +### 6. Verify Setup + +Run the health check to ensure everything is working: + +```bash +python simple_health_check.py +``` + +This will verify: +- Redis connection +- Environment variables +- Course data ingestion +- Agent functionality + +### 7. Start the Agent + +```bash +redis-class-agent --student-id your_student_id +``` + +## Python API Usage + +```python +import asyncio +from redis_context_course import ClassAgent, MemoryClient, CourseManager + +async def main(): + # Initialize the agent (uses Agent Memory Server) + agent = ClassAgent("student_123") + + # Chat with the agent + response = await agent.chat("I'm interested in machine learning courses") + print(response) + + # Use individual components + memory_manager = MemoryManager("student_123") + await memory_manager.store_preference("I prefer online courses") + + course_manager = CourseManager() + courses = await course_manager.search_courses("programming") + +if __name__ == "__main__": + asyncio.run(main()) +``` + +## Package Exports + +The package exports the following components for use in your applications: + +### Core Classes +```python +from redis_context_course import ( + ClassAgent, # LangGraph-based agent implementation + AugmentedClassAgent, # Enhanced agent with additional features + AgentState, # Agent state management + MemoryClient, # Memory API client (from agent-memory-client) + MemoryClientConfig, # Memory configuration + CourseManager, # Course storage and recommendation engine + RedisConfig, # Redis configuration + redis_config, # Redis config instance +) +``` + +### Data Models +```python +from redis_context_course import ( + Course, # Course data model + Major, # Major/program model + StudentProfile, # Student information model + CourseRecommendation, # Recommendation model + AgentResponse, # Agent response model + Prerequisite, # Course prerequisite model + CourseSchedule, # Schedule information model +) +``` + +### Enums +```python +from redis_context_course import ( + DifficultyLevel, # Course difficulty levels + CourseFormat, # Course format types (online, in-person, hybrid) + Semester, # Semester enumeration + DayOfWeek, # Day of week enumeration +) +``` + +### Tools (for notebooks and custom agents) +```python +from redis_context_course import ( + create_course_tools, # Create course-related tools + create_memory_tools, # Create memory management tools + select_tools_by_keywords,# Keyword-based tool selection +) +``` + +### Optimization Helpers +```python +from redis_context_course import ( + count_tokens, # Token counting utility + estimate_token_budget, # Budget estimation + hybrid_retrieval, # Hybrid search strategy + create_summary_view, # Summary generation + create_user_profile_view,# User profile formatting + filter_tools_by_intent, # Intent-based tool filtering + classify_intent_with_llm,# LLM-based intent classification + extract_references, # Reference extraction + format_context_for_llm, # Context formatting +) +``` + +## Architecture + +### Core Components + +- **Agent**: LangGraph-based workflow orchestration (`ClassAgent`, `AugmentedClassAgent`) +- **Memory Client**: Interface to Redis Agent Memory Server + - Working memory: Session-scoped, task-focused context + - Long-term memory: Cross-session, persistent knowledge +- **Course Manager**: Course storage and recommendation engine using Redis and RedisVL +- **Models**: Type-safe Pydantic data structures for courses and students +- **Redis Config**: Redis connections and vector index management +- **Optimization Helpers**: Production utilities for token counting, cost management, and performance + +### Command Line Tools + +After installation, you have access to these command-line tools: + +- `redis-class-agent`: Interactive chat interface with the agent +- `generate-courses`: Generate sample course catalog data +- `ingest-courses`: Load course data into Redis + +### Memory System + +The agent uses [Redis Agent Memory Server](https://github.com/redis/agent-memory-server) for a production-ready dual-memory architecture: + +1. **Working Memory**: Session-scoped, task-focused context + - Conversation messages + - Current task state + - Task-related data + - TTL-based (default: 1 hour) + - Automatic extraction to long-term storage + +2. **Long-term Memory**: Cross-session, persistent knowledge + - Student preferences and goals + - Important facts learned over time + - Vector-indexed for semantic search + - Automatic deduplication + - Three memory types: semantic, episodic, message + +**Key Features:** +- Automatic memory extraction from conversations +- Semantic vector search with OpenAI embeddings +- Hash-based and semantic deduplication +- Rich metadata (topics, entities, timestamps) +- MCP server support for Claude Desktop + +### Tool System + +The agent has access to several tools: + +- `search_courses_tool`: Find courses based on queries and filters +- `get_recommendations_tool`: Get personalized course recommendations +- `store_preference_tool`: Save student preferences +- `store_goal_tool`: Save student goals +- `get_student_context_tool`: Retrieve relevant student context + +## Usage Examples + +### Basic Conversation + +``` +You: I'm interested in learning programming +Agent: I'd be happy to help you find programming courses! Let me search for some options... + +[Agent searches courses and provides recommendations] + +You: I prefer online courses +Agent: I'll remember that you prefer online courses. Let me find online programming options for you... +``` + +### Course Search + +``` +You: What data science courses are available? +Agent: [Searches and displays relevant data science courses with details] + +You: Show me beginner-friendly options +Agent: [Filters results for beginner difficulty level] +``` + +### Memory and Context + +``` +You: I want to focus on machine learning +Agent: I'll remember that you're interested in machine learning. This will help me provide better recommendations in the future. + +[Later in conversation or new session] +You: What courses should I take? +Agent: Based on your interest in machine learning and preference for online courses, here are my recommendations... +``` + +## Troubleshooting + +### Health Check + +Use the built-in health check to diagnose issues: + +```bash +python simple_health_check.py +``` + +The health check will verify: +- โœ… Environment variables are set correctly +- โœ… Redis connection is working +- โœ… Course and major data is present +- โœ… Course search functionality works +- โœ… Agent can respond to queries + +If any checks fail, the script will provide specific fix commands. + +### Common Issues + +**"No courses found"** +```bash +# Re-run data ingestion +ingest-courses --catalog course_catalog.json --clear +``` + +**"Redis connection failed"** +```bash +# Start Redis with Docker +docker run -d --name redis -p 6379:6379 redis:8-alpine +``` + +**"Agent query failed"** +- Check that your OpenAI API key is valid +- Ensure course data has been ingested with embeddings +- Verify Agent Memory Server is running + +## Configuration + +### Environment Variables + +- `OPENAI_API_KEY`: Your OpenAI API key (required) +- `REDIS_URL`: Redis connection URL (default: redis://localhost:6379) +- `VECTOR_INDEX_NAME`: Name for course vector index (default: course_catalog) +- `MEMORY_INDEX_NAME`: Name for memory vector index (default: agent_memory) + +### Customization + +The agent is designed to be easily extensible: + +1. **Add New Tools**: Extend the tool system in `agent.py` +2. **Modify Memory Logic**: Customize memory storage and retrieval in `memory.py` +3. **Extend Course Data**: Add new fields to course models in `models.py` +4. **Custom Recommendations**: Modify recommendation logic in `course_manager.py` + +## Development + +### Running Tests + +```bash +pytest tests/ +``` + +### Code Formatting + +```bash +black src/ scripts/ +isort src/ scripts/ +``` + +### Type Checking + +```bash +mypy src/ +``` + +## Project Structure + +``` +reference-agent/ +โ”œโ”€โ”€ redis_context_course/ # Main package +โ”‚ โ”œโ”€โ”€ agent.py # LangGraph agent implementation +โ”‚ โ”œโ”€โ”€ memory.py # Long-term memory manager +โ”‚ โ”œโ”€โ”€ working_memory.py # Working memory implementation +โ”‚ โ”œโ”€โ”€ working_memory_tools.py # Memory management tools +โ”‚ โ”œโ”€โ”€ course_manager.py # Course search and recommendations +โ”‚ โ”œโ”€โ”€ models.py # Data models +โ”‚ โ”œโ”€โ”€ redis_config.py # Redis configuration +โ”‚ โ”œโ”€โ”€ cli.py # Command-line interface +โ”‚ โ””โ”€โ”€ scripts/ # Data generation and ingestion +โ”œโ”€โ”€ tests/ # Test suite +โ”œโ”€โ”€ examples/ # Usage examples +โ”‚ โ””โ”€โ”€ basic_usage.py # Basic package usage demo +โ”œโ”€โ”€ data/ # Generated course data +โ”œโ”€โ”€ README.md # This file +โ”œโ”€โ”€ requirements.txt # Dependencies +โ””โ”€โ”€ setup.py # Package setup + +``` + +## Educational Use & Course Integration + +This reference implementation is designed for educational purposes and is integrated with the **[Context Engineering Course](../notebooks_v2/README.md)**. + +### How the Course Uses This Package + +The course notebooks demonstrate **building agents from scratch** using this package's components as building blocks: + +**Components Used in Notebooks**: +- โœ… `CourseManager` - Course search and recommendations (Sections 2, 3, 4) +- โœ… `redis_config` - Redis configuration (Sections 2, 3) +- โœ… Data models: `Course`, `StudentProfile`, `DifficultyLevel`, `CourseFormat`, `Semester` (Sections 3, 4) +- โœ… Scripts: `CourseGenerator`, `CourseIngestionPipeline` (Section 2) + +**Components for Production Use** (not directly used in notebooks): +- `ClassAgent`, `AugmentedClassAgent` - Complete agent implementations +- `create_course_tools`, `create_memory_tools` - Tool creation helpers +- Optimization helpers: `count_tokens`, `estimate_token_budget`, `hybrid_retrieval`, etc. + +**Why This Approach?** +- Students learn to build custom agents rather than using pre-built ones +- Demonstrates how production agents are constructed from components +- Provides flexibility to adapt patterns to different use cases +- Shows both educational and production-ready patterns + +For detailed analysis of component usage, see [notebooks_v2/REFERENCE_AGENT_USAGE_ANALYSIS.md](../notebooks_v2/REFERENCE_AGENT_USAGE_ANALYSIS.md). + +### Learning Path + +**For Course Students**: +1. **Complete the course**: Follow the [Context Engineering Course](../notebooks_v2/README.md) +2. **Use this package**: Import components as shown in notebooks +3. **Explore the source**: See production implementations in `redis_context_course/` +4. **Extend for your use case**: Adapt patterns to your domain + +**For Independent Learners**: +1. **Explore the examples**: `examples/basic_usage.py` shows basic package usage +2. **Read the source code**: Well-documented code in `redis_context_course/` +3. **Run the agent**: Try the interactive CLI to see it in action +4. **Check the notebooks**: See step-by-step tutorials in `../notebooks_v2/` + +### Key Concepts Demonstrated + +- **Context Engineering**: Four context types and assembly strategies +- **Memory Management**: Working memory vs. long-term memory with Agent Memory Server +- **Tool Integration**: Creating and orchestrating multiple tools +- **Vector Search**: Semantic retrieval with Redis and RedisVL +- **LangGraph Workflows**: Stateful agent design patterns +- **Production Optimization**: Token counting, cost management, performance tuning + +--- + +## Related Resources + +### Course Materials +- **[Context Engineering Course](../notebooks_v2/README.md)** - Complete learning path using this package +- **[Reference Agent Usage Analysis](../notebooks_v2/REFERENCE_AGENT_USAGE_ANALYSIS.md)** - How notebooks use this package +- **[Setup Guide](../notebooks_v2/SETUP_GUIDE.md)** - Detailed setup instructions + +### Documentation +- **[Main Course README](../README.md)** - Top-level context engineering documentation +- **[Agent Memory Server](https://github.com/redis/agent-memory-server)** - Memory management system +- **[Redis Documentation](https://redis.io/docs/)** - Redis official documentation +- **[LangGraph Documentation](https://langchain-ai.github.io/langgraph/)** - LangGraph stateful agents + +### Community +- **[Redis Discord](https://discord.gg/redis)** - Join the Redis community +- **[GitHub Issues](https://github.com/redis-developer/redis-ai-resources/issues)** - Report issues or ask questions +- **[Redis AI Resources](https://github.com/redis-developer/redis-ai-resources)** - More AI examples and recipes + +--- + +## License + +MIT License - See LICENSE file for details + +## Contributing + +Contributions are welcome! Please see the main repository for contribution guidelines. + +--- + +**Ready to learn context engineering?** Start with the [Context Engineering Course](../notebooks_v2/README.md) to see this reference agent in action! diff --git a/python-recipes/context-engineering/reference-agent/course_catalog.json b/python-recipes/context-engineering/reference-agent/course_catalog.json new file mode 100644 index 00000000..7afc5dd7 --- /dev/null +++ b/python-recipes/context-engineering/reference-agent/course_catalog.json @@ -0,0 +1,3146 @@ +{ + "majors": [ + { + "id": "01K897CBGND1XDP0TPQEAWB54S", + "name": "Computer Science", + "code": "CS", + "department": "Computer Science", + "description": "Study of computational systems, algorithms, and software design", + "required_credits": 120, + "core_courses": [], + "elective_courses": [], + "career_paths": [ + "Software Engineer", + "Data Scientist", + "Systems Architect", + "AI Researcher" + ], + "created_at": "2025-10-23 15:05:26.293343" + }, + { + "id": "01K897CBGND1XDP0TPQEAWB54T", + "name": "Data Science", + "code": "DS", + "department": "Data Science", + "description": "Interdisciplinary field using statistics, programming, and domain expertise", + "required_credits": 120, + "core_courses": [], + "elective_courses": [], + "career_paths": [ + "Data Analyst", + "Machine Learning Engineer", + "Business Intelligence Analyst" + ], + "created_at": "2025-10-23 15:05:26.293359" + }, + { + "id": "01K897CBGND1XDP0TPQEAWB54V", + "name": "Mathematics", + "code": "MATH", + "department": "Mathematics", + "description": "Study of numbers, structures, patterns, and logical reasoning", + "required_credits": 120, + "core_courses": [], + "elective_courses": [], + "career_paths": [ + "Mathematician", + "Statistician", + "Actuary", + "Research Scientist" + ], + "created_at": "2025-10-23 15:05:26.293368" + }, + { + "id": "01K897CBGND1XDP0TPQEAWB54W", + "name": "Business Administration", + "code": "BUS", + "department": "Business", + "description": "Management, finance, marketing, and organizational behavior", + "required_credits": 120, + "core_courses": [], + "elective_courses": [], + "career_paths": [ + "Business Analyst", + "Project Manager", + "Consultant", + "Entrepreneur" + ], + "created_at": "2025-10-23 15:05:26.293374" + }, + { + "id": "01K897CBGND1XDP0TPQEAWB54X", + "name": "Psychology", + "code": "PSY", + "department": "Psychology", + "description": "Scientific study of mind, behavior, and mental processes", + "required_credits": 120, + "core_courses": [], + "elective_courses": [], + "career_paths": [ + "Clinical Psychologist", + "Counselor", + "Research Psychologist", + "HR Specialist" + ], + "created_at": "2025-10-23 15:05:26.293380" + } + ], + "courses": [ + { + "id": "01K897CBGND1XDP0TPQEAWB54Y", + "course_code": "CS001", + "title": "Database Systems", + "description": "Design and implementation of database systems. SQL, normalization, transactions, and database administration.", + "credits": 3, + "difficulty_level": "intermediate", + "format": "online", + "department": "Computer Science", + "major": "Computer Science", + "prerequisites": [], + "schedule": { + "days": [ + "tuesday", + "thursday" + ], + "start_time": "09:30:00", + "end_time": "10:45:00", + "location": "Technology Center 543" + }, + "semester": "winter", + "year": 2024, + "instructor": "Ronnie Hart", + "max_enrollment": 69, + "current_enrollment": 74, + "tags": [ + "databases", + "sql", + "data management" + ], + "learning_objectives": [ + "Design relational databases", + "Write complex SQL queries", + "Understand database normalization", + "Implement database transactions" + ], + "created_at": "2025-10-23 15:05:26.293511", + "updated_at": "2025-10-23 15:05:26.293512" + }, + { + "id": "01K897CBGND1XDP0TPQEAWB54Z", + "course_code": "CS002", + "title": "Introduction to Programming", + "description": "Fundamental programming concepts using Python. Variables, control structures, functions, and basic data structures.", + "credits": 3, + "difficulty_level": "beginner", + "format": "in_person", + "department": "Computer Science", + "major": "Computer Science", + "prerequisites": [], + "schedule": { + "days": [ + "tuesday" + ], + "start_time": "14:30:00", + "end_time": "17:00:00", + "location": "Science Hall 828" + }, + "semester": "spring", + "year": 2024, + "instructor": "David Cox", + "max_enrollment": 47, + "current_enrollment": 43, + "tags": [ + "programming", + "python", + "fundamentals" + ], + "learning_objectives": [ + "Write basic Python programs", + "Understand variables and data types", + "Use control structures effectively", + "Create and use functions" + ], + "created_at": "2025-10-23 15:05:26.293579", + "updated_at": "2025-10-23 15:05:26.293580" + }, + { + "id": "01K897CBGND1XDP0TPQEAWB550", + "course_code": "CS003", + "title": "Data Structures and Algorithms", + "description": "Study of fundamental data structures and algorithms. Arrays, linked lists, trees, graphs, sorting, and searching.", + "credits": 4, + "difficulty_level": "intermediate", + "format": "hybrid", + "department": "Computer Science", + "major": "Computer Science", + "prerequisites": [], + "schedule": { + "days": [ + "monday", + "wednesday" + ], + "start_time": "13:30:00", + "end_time": "14:45:00", + "location": "Technology Center 622" + }, + "semester": "fall", + "year": 2024, + "instructor": "Lindsay Wright", + "max_enrollment": 22, + "current_enrollment": 59, + "tags": [ + "algorithms", + "data structures", + "problem solving" + ], + "learning_objectives": [ + "Implement common data structures", + "Analyze algorithm complexity", + "Solve problems using appropriate data structures", + "Understand time and space complexity" + ], + "created_at": "2025-10-23 15:05:26.293644", + "updated_at": "2025-10-23 15:05:26.293644" + }, + { + "id": "01K897CBGND1XDP0TPQEAWB551", + "course_code": "CS004", + "title": "Machine Learning", + "description": "Introduction to machine learning algorithms and applications. Supervised and unsupervised learning, neural networks.", + "credits": 4, + "difficulty_level": "advanced", + "format": "in_person", + "department": "Computer Science", + "major": "Computer Science", + "prerequisites": [], + "schedule": { + "days": [ + "thursday" + ], + "start_time": "15:30:00", + "end_time": "18:00:00", + "location": "Engineering Building 741" + }, + "semester": "winter", + "year": 2024, + "instructor": "Chris Harris", + "max_enrollment": 90, + "current_enrollment": 36, + "tags": [ + "machine learning", + "ai", + "statistics" + ], + "learning_objectives": [ + "Understand ML algorithms", + "Implement classification and regression models", + "Evaluate model performance", + "Apply ML to real-world problems" + ], + "created_at": "2025-10-23 15:05:26.293701", + "updated_at": "2025-10-23 15:05:26.293701" + }, + { + "id": "01K897CBGND1XDP0TPQEAWB552", + "course_code": "CS005", + "title": "Web Development", + "description": "Full-stack web development using modern frameworks. HTML, CSS, JavaScript, React, and backend APIs.", + "credits": 3, + "difficulty_level": "intermediate", + "format": "in_person", + "department": "Computer Science", + "major": "Computer Science", + "prerequisites": [ + { + "course_code": "CS001", + "course_title": "Prerequisite Course 1", + "minimum_grade": "B-", + "can_be_concurrent": true + } + ], + "schedule": { + "days": [ + "tuesday", + "thursday" + ], + "start_time": "14:00:00", + "end_time": "15:15:00", + "location": "Liberal Arts Center 578" + }, + "semester": "fall", + "year": 2024, + "instructor": "Tonya Bentley", + "max_enrollment": 89, + "current_enrollment": 40, + "tags": [ + "web development", + "javascript", + "react", + "apis" + ], + "learning_objectives": [ + "Build responsive web interfaces", + "Develop REST APIs", + "Use modern JavaScript frameworks", + "Deploy web applications" + ], + "created_at": "2025-10-23 15:05:26.293763", + "updated_at": "2025-10-23 15:05:26.293763" + }, + { + "id": "01K897CBGND1XDP0TPQEAWB553", + "course_code": "CS006", + "title": "Web Development", + "description": "Full-stack web development using modern frameworks. HTML, CSS, JavaScript, React, and backend APIs.", + "credits": 3, + "difficulty_level": "intermediate", + "format": "online", + "department": "Computer Science", + "major": "Computer Science", + "prerequisites": [], + "schedule": { + "days": [ + "thursday" + ], + "start_time": "12:30:00", + "end_time": "15:00:00", + "location": "Business Complex 116" + }, + "semester": "spring", + "year": 2024, + "instructor": "Nicole Zimmerman", + "max_enrollment": 48, + "current_enrollment": 59, + "tags": [ + "web development", + "javascript", + "react", + "apis" + ], + "learning_objectives": [ + "Build responsive web interfaces", + "Develop REST APIs", + "Use modern JavaScript frameworks", + "Deploy web applications" + ], + "created_at": "2025-10-23 15:05:26.293820", + "updated_at": "2025-10-23 15:05:26.293820" + }, + { + "id": "01K897CBGND1XDP0TPQEAWB554", + "course_code": "CS007", + "title": "Data Structures and Algorithms", + "description": "Study of fundamental data structures and algorithms. Arrays, linked lists, trees, graphs, sorting, and searching.", + "credits": 4, + "difficulty_level": "intermediate", + "format": "in_person", + "department": "Computer Science", + "major": "Computer Science", + "prerequisites": [], + "schedule": { + "days": [ + "monday", + "wednesday", + "friday" + ], + "start_time": "15:30:00", + "end_time": "16:20:00", + "location": "Engineering Building 107" + }, + "semester": "summer", + "year": 2024, + "instructor": "Ashley Miller", + "max_enrollment": 42, + "current_enrollment": 45, + "tags": [ + "algorithms", + "data structures", + "problem solving" + ], + "learning_objectives": [ + "Implement common data structures", + "Analyze algorithm complexity", + "Solve problems using appropriate data structures", + "Understand time and space complexity" + ], + "created_at": "2025-10-23 15:05:26.293876", + "updated_at": "2025-10-23 15:05:26.293876" + }, + { + "id": "01K897CBGND1XDP0TPQEAWB555", + "course_code": "CS008", + "title": "Introduction to Programming", + "description": "Fundamental programming concepts using Python. Variables, control structures, functions, and basic data structures.", + "credits": 3, + "difficulty_level": "beginner", + "format": "in_person", + "department": "Computer Science", + "major": "Computer Science", + "prerequisites": [], + "schedule": { + "days": [ + "tuesday" + ], + "start_time": "18:30:00", + "end_time": "21:00:00", + "location": "Engineering Building 127" + }, + "semester": "summer", + "year": 2024, + "instructor": "Brian Sullivan", + "max_enrollment": 27, + "current_enrollment": 34, + "tags": [ + "programming", + "python", + "fundamentals" + ], + "learning_objectives": [ + "Write basic Python programs", + "Understand variables and data types", + "Use control structures effectively", + "Create and use functions" + ], + "created_at": "2025-10-23 15:05:26.293931", + "updated_at": "2025-10-23 15:05:26.293931" + }, + { + "id": "01K897CBGND1XDP0TPQEAWB556", + "course_code": "CS009", + "title": "Introduction to Programming", + "description": "Fundamental programming concepts using Python. Variables, control structures, functions, and basic data structures.", + "credits": 3, + "difficulty_level": "beginner", + "format": "hybrid", + "department": "Computer Science", + "major": "Computer Science", + "prerequisites": [], + "schedule": { + "days": [ + "tuesday", + "thursday" + ], + "start_time": "14:00:00", + "end_time": "15:15:00", + "location": "Engineering Building 258" + }, + "semester": "fall", + "year": 2024, + "instructor": "Michael Byrd", + "max_enrollment": 53, + "current_enrollment": 77, + "tags": [ + "programming", + "python", + "fundamentals" + ], + "learning_objectives": [ + "Write basic Python programs", + "Understand variables and data types", + "Use control structures effectively", + "Create and use functions" + ], + "created_at": "2025-10-23 15:05:26.293986", + "updated_at": "2025-10-23 15:05:26.293986" + }, + { + "id": "01K897CBGPCZ67SV5B7Y8XPBHN", + "course_code": "CS010", + "title": "Machine Learning", + "description": "Introduction to machine learning algorithms and applications. Supervised and unsupervised learning, neural networks.", + "credits": 4, + "difficulty_level": "advanced", + "format": "in_person", + "department": "Computer Science", + "major": "Computer Science", + "prerequisites": [], + "schedule": { + "days": [ + "tuesday", + "thursday" + ], + "start_time": "08:30:00", + "end_time": "09:45:00", + "location": "Business Complex 152" + }, + "semester": "spring", + "year": 2024, + "instructor": "Benjamin Forbes", + "max_enrollment": 94, + "current_enrollment": 39, + "tags": [ + "machine learning", + "ai", + "statistics" + ], + "learning_objectives": [ + "Understand ML algorithms", + "Implement classification and regression models", + "Evaluate model performance", + "Apply ML to real-world problems" + ], + "created_at": "2025-10-23 15:05:26.294045", + "updated_at": "2025-10-23 15:05:26.294045" + }, + { + "id": "01K897CBGPCZ67SV5B7Y8XPBHP", + "course_code": "CS011", + "title": "Introduction to Programming", + "description": "Fundamental programming concepts using Python. Variables, control structures, functions, and basic data structures.", + "credits": 3, + "difficulty_level": "beginner", + "format": "online", + "department": "Computer Science", + "major": "Computer Science", + "prerequisites": [], + "schedule": { + "days": [ + "thursday" + ], + "start_time": "11:00:00", + "end_time": "13:30:00", + "location": "Engineering Building 397" + }, + "semester": "fall", + "year": 2024, + "instructor": "Jacqueline Dorsey", + "max_enrollment": 21, + "current_enrollment": 63, + "tags": [ + "programming", + "python", + "fundamentals" + ], + "learning_objectives": [ + "Write basic Python programs", + "Understand variables and data types", + "Use control structures effectively", + "Create and use functions" + ], + "created_at": "2025-10-23 15:05:26.294101", + "updated_at": "2025-10-23 15:05:26.294101" + }, + { + "id": "01K897CBGPCZ67SV5B7Y8XPBHQ", + "course_code": "CS012", + "title": "Database Systems", + "description": "Design and implementation of database systems. SQL, normalization, transactions, and database administration.", + "credits": 3, + "difficulty_level": "intermediate", + "format": "in_person", + "department": "Computer Science", + "major": "Computer Science", + "prerequisites": [], + "schedule": { + "days": [ + "tuesday", + "thursday" + ], + "start_time": "10:30:00", + "end_time": "11:45:00", + "location": "Liberal Arts Center 102" + }, + "semester": "fall", + "year": 2024, + "instructor": "Jacob Castillo", + "max_enrollment": 50, + "current_enrollment": 15, + "tags": [ + "databases", + "sql", + "data management" + ], + "learning_objectives": [ + "Design relational databases", + "Write complex SQL queries", + "Understand database normalization", + "Implement database transactions" + ], + "created_at": "2025-10-23 15:05:26.294156", + "updated_at": "2025-10-23 15:05:26.294156" + }, + { + "id": "01K897CBGPCZ67SV5B7Y8XPBHR", + "course_code": "CS013", + "title": "Web Development", + "description": "Full-stack web development using modern frameworks. HTML, CSS, JavaScript, React, and backend APIs.", + "credits": 3, + "difficulty_level": "intermediate", + "format": "online", + "department": "Computer Science", + "major": "Computer Science", + "prerequisites": [], + "schedule": { + "days": [ + "tuesday" + ], + "start_time": "09:30:00", + "end_time": "12:00:00", + "location": "Liberal Arts Center 557" + }, + "semester": "winter", + "year": 2024, + "instructor": "Steven Henry", + "max_enrollment": 34, + "current_enrollment": 7, + "tags": [ + "web development", + "javascript", + "react", + "apis" + ], + "learning_objectives": [ + "Build responsive web interfaces", + "Develop REST APIs", + "Use modern JavaScript frameworks", + "Deploy web applications" + ], + "created_at": "2025-10-23 15:05:26.294209", + "updated_at": "2025-10-23 15:05:26.294210" + }, + { + "id": "01K897CBGPCZ67SV5B7Y8XPBHS", + "course_code": "CS014", + "title": "Web Development", + "description": "Full-stack web development using modern frameworks. HTML, CSS, JavaScript, React, and backend APIs.", + "credits": 3, + "difficulty_level": "intermediate", + "format": "in_person", + "department": "Computer Science", + "major": "Computer Science", + "prerequisites": [ + { + "course_code": "CS005", + "course_title": "Prerequisite Course 5", + "minimum_grade": "B-", + "can_be_concurrent": false + } + ], + "schedule": { + "days": [ + "monday", + "wednesday" + ], + "start_time": "08:00:00", + "end_time": "09:15:00", + "location": "Science Hall 777" + }, + "semester": "fall", + "year": 2024, + "instructor": "Jacob Chen", + "max_enrollment": 60, + "current_enrollment": 1, + "tags": [ + "web development", + "javascript", + "react", + "apis" + ], + "learning_objectives": [ + "Build responsive web interfaces", + "Develop REST APIs", + "Use modern JavaScript frameworks", + "Deploy web applications" + ], + "created_at": "2025-10-23 15:05:26.294267", + "updated_at": "2025-10-23 15:05:26.294267" + }, + { + "id": "01K897CBGPCZ67SV5B7Y8XPBHT", + "course_code": "CS015", + "title": "Web Development", + "description": "Full-stack web development using modern frameworks. HTML, CSS, JavaScript, React, and backend APIs.", + "credits": 3, + "difficulty_level": "intermediate", + "format": "in_person", + "department": "Computer Science", + "major": "Computer Science", + "prerequisites": [], + "schedule": { + "days": [ + "monday", + "wednesday" + ], + "start_time": "10:30:00", + "end_time": "11:45:00", + "location": "Technology Center 250" + }, + "semester": "spring", + "year": 2024, + "instructor": "Hunter Green", + "max_enrollment": 74, + "current_enrollment": 69, + "tags": [ + "web development", + "javascript", + "react", + "apis" + ], + "learning_objectives": [ + "Build responsive web interfaces", + "Develop REST APIs", + "Use modern JavaScript frameworks", + "Deploy web applications" + ], + "created_at": "2025-10-23 15:05:26.294323", + "updated_at": "2025-10-23 15:05:26.294323" + }, + { + "id": "01K897CBGPCZ67SV5B7Y8XPBHV", + "course_code": "DS016", + "title": "Statistics for Data Science", + "description": "Statistical methods and probability theory for data analysis. Hypothesis testing, regression, and statistical inference.", + "credits": 4, + "difficulty_level": "intermediate", + "format": "online", + "department": "Data Science", + "major": "Data Science", + "prerequisites": [], + "schedule": { + "days": [ + "monday", + "wednesday", + "friday" + ], + "start_time": "12:30:00", + "end_time": "13:20:00", + "location": "Business Complex 236" + }, + "semester": "winter", + "year": 2024, + "instructor": "Dale Rivera", + "max_enrollment": 89, + "current_enrollment": 56, + "tags": [ + "statistics", + "probability", + "data analysis" + ], + "learning_objectives": [ + "Apply statistical methods to data", + "Perform hypothesis testing", + "Understand probability distributions", + "Conduct statistical inference" + ], + "created_at": "2025-10-23 15:05:26.294377", + "updated_at": "2025-10-23 15:05:26.294378" + }, + { + "id": "01K897CBGPCZ67SV5B7Y8XPBHW", + "course_code": "DS017", + "title": "Statistics for Data Science", + "description": "Statistical methods and probability theory for data analysis. Hypothesis testing, regression, and statistical inference.", + "credits": 4, + "difficulty_level": "intermediate", + "format": "online", + "department": "Data Science", + "major": "Data Science", + "prerequisites": [], + "schedule": { + "days": [ + "monday", + "wednesday", + "friday" + ], + "start_time": "17:30:00", + "end_time": "18:20:00", + "location": "Science Hall 768" + }, + "semester": "winter", + "year": 2024, + "instructor": "Maria Anderson", + "max_enrollment": 44, + "current_enrollment": 72, + "tags": [ + "statistics", + "probability", + "data analysis" + ], + "learning_objectives": [ + "Apply statistical methods to data", + "Perform hypothesis testing", + "Understand probability distributions", + "Conduct statistical inference" + ], + "created_at": "2025-10-23 15:05:26.294432", + "updated_at": "2025-10-23 15:05:26.294432" + }, + { + "id": "01K897CBGPCZ67SV5B7Y8XPBHX", + "course_code": "DS018", + "title": "Statistics for Data Science", + "description": "Statistical methods and probability theory for data analysis. Hypothesis testing, regression, and statistical inference.", + "credits": 4, + "difficulty_level": "intermediate", + "format": "online", + "department": "Data Science", + "major": "Data Science", + "prerequisites": [], + "schedule": { + "days": [ + "tuesday", + "thursday" + ], + "start_time": "13:30:00", + "end_time": "14:45:00", + "location": "Business Complex 658" + }, + "semester": "fall", + "year": 2024, + "instructor": "Monica Clark", + "max_enrollment": 52, + "current_enrollment": 45, + "tags": [ + "statistics", + "probability", + "data analysis" + ], + "learning_objectives": [ + "Apply statistical methods to data", + "Perform hypothesis testing", + "Understand probability distributions", + "Conduct statistical inference" + ], + "created_at": "2025-10-23 15:05:26.294487", + "updated_at": "2025-10-23 15:05:26.294487" + }, + { + "id": "01K897CBGPCZ67SV5B7Y8XPBHY", + "course_code": "DS019", + "title": "Data Visualization", + "description": "Creating effective visualizations for data communication. Tools include Python matplotlib, seaborn, and Tableau.", + "credits": 3, + "difficulty_level": "beginner", + "format": "hybrid", + "department": "Data Science", + "major": "Data Science", + "prerequisites": [], + "schedule": { + "days": [ + "tuesday" + ], + "start_time": "15:00:00", + "end_time": "17:30:00", + "location": "Liberal Arts Center 632" + }, + "semester": "winter", + "year": 2024, + "instructor": "Andrea Allen", + "max_enrollment": 42, + "current_enrollment": 8, + "tags": [ + "visualization", + "python", + "tableau", + "communication" + ], + "learning_objectives": [ + "Create effective data visualizations", + "Choose appropriate chart types", + "Use visualization tools", + "Communicate insights through visuals" + ], + "created_at": "2025-10-23 15:05:26.294541", + "updated_at": "2025-10-23 15:05:26.294541" + }, + { + "id": "01K897CBGPCZ67SV5B7Y8XPBHZ", + "course_code": "DS020", + "title": "Data Visualization", + "description": "Creating effective visualizations for data communication. Tools include Python matplotlib, seaborn, and Tableau.", + "credits": 3, + "difficulty_level": "beginner", + "format": "hybrid", + "department": "Data Science", + "major": "Data Science", + "prerequisites": [], + "schedule": { + "days": [ + "tuesday" + ], + "start_time": "15:30:00", + "end_time": "18:00:00", + "location": "Liberal Arts Center 700" + }, + "semester": "spring", + "year": 2024, + "instructor": "Jordan Ruiz", + "max_enrollment": 73, + "current_enrollment": 57, + "tags": [ + "visualization", + "python", + "tableau", + "communication" + ], + "learning_objectives": [ + "Create effective data visualizations", + "Choose appropriate chart types", + "Use visualization tools", + "Communicate insights through visuals" + ], + "created_at": "2025-10-23 15:05:26.294597", + "updated_at": "2025-10-23 15:05:26.294598" + }, + { + "id": "01K897CBGPCZ67SV5B7Y8XPBJ0", + "course_code": "DS021", + "title": "Data Visualization", + "description": "Creating effective visualizations for data communication. Tools include Python matplotlib, seaborn, and Tableau.", + "credits": 3, + "difficulty_level": "beginner", + "format": "in_person", + "department": "Data Science", + "major": "Data Science", + "prerequisites": [], + "schedule": { + "days": [ + "monday", + "wednesday" + ], + "start_time": "16:00:00", + "end_time": "17:15:00", + "location": "Engineering Building 663" + }, + "semester": "fall", + "year": 2024, + "instructor": "James Hughes", + "max_enrollment": 96, + "current_enrollment": 46, + "tags": [ + "visualization", + "python", + "tableau", + "communication" + ], + "learning_objectives": [ + "Create effective data visualizations", + "Choose appropriate chart types", + "Use visualization tools", + "Communicate insights through visuals" + ], + "created_at": "2025-10-23 15:05:26.294651", + "updated_at": "2025-10-23 15:05:26.294651" + }, + { + "id": "01K897CBGPCZ67SV5B7Y8XPBJ1", + "course_code": "DS022", + "title": "Statistics for Data Science", + "description": "Statistical methods and probability theory for data analysis. Hypothesis testing, regression, and statistical inference.", + "credits": 4, + "difficulty_level": "intermediate", + "format": "online", + "department": "Data Science", + "major": "Data Science", + "prerequisites": [], + "schedule": { + "days": [ + "monday", + "wednesday" + ], + "start_time": "16:30:00", + "end_time": "17:45:00", + "location": "Science Hall 687" + }, + "semester": "spring", + "year": 2024, + "instructor": "Shane Johnston", + "max_enrollment": 57, + "current_enrollment": 15, + "tags": [ + "statistics", + "probability", + "data analysis" + ], + "learning_objectives": [ + "Apply statistical methods to data", + "Perform hypothesis testing", + "Understand probability distributions", + "Conduct statistical inference" + ], + "created_at": "2025-10-23 15:05:26.294706", + "updated_at": "2025-10-23 15:05:26.294706" + }, + { + "id": "01K897CBGPCZ67SV5B7Y8XPBJ2", + "course_code": "DS023", + "title": "Statistics for Data Science", + "description": "Statistical methods and probability theory for data analysis. Hypothesis testing, regression, and statistical inference.", + "credits": 4, + "difficulty_level": "intermediate", + "format": "in_person", + "department": "Data Science", + "major": "Data Science", + "prerequisites": [ + { + "course_code": "DS012", + "course_title": "Prerequisite Course 12", + "minimum_grade": "B-", + "can_be_concurrent": false + }, + { + "course_code": "DS013", + "course_title": "Prerequisite Course 13", + "minimum_grade": "C", + "can_be_concurrent": false + } + ], + "schedule": { + "days": [ + "tuesday" + ], + "start_time": "16:30:00", + "end_time": "19:00:00", + "location": "Engineering Building 619" + }, + "semester": "spring", + "year": 2024, + "instructor": "Crystal Parks", + "max_enrollment": 93, + "current_enrollment": 14, + "tags": [ + "statistics", + "probability", + "data analysis" + ], + "learning_objectives": [ + "Apply statistical methods to data", + "Perform hypothesis testing", + "Understand probability distributions", + "Conduct statistical inference" + ], + "created_at": "2025-10-23 15:05:26.294765", + "updated_at": "2025-10-23 15:05:26.294766" + }, + { + "id": "01K897CBGPCZ67SV5B7Y8XPBJ3", + "course_code": "DS024", + "title": "Statistics for Data Science", + "description": "Statistical methods and probability theory for data analysis. Hypothesis testing, regression, and statistical inference.", + "credits": 4, + "difficulty_level": "intermediate", + "format": "hybrid", + "department": "Data Science", + "major": "Data Science", + "prerequisites": [], + "schedule": { + "days": [ + "tuesday" + ], + "start_time": "16:00:00", + "end_time": "18:30:00", + "location": "Science Hall 108" + }, + "semester": "spring", + "year": 2024, + "instructor": "Jaclyn Andrade", + "max_enrollment": 45, + "current_enrollment": 70, + "tags": [ + "statistics", + "probability", + "data analysis" + ], + "learning_objectives": [ + "Apply statistical methods to data", + "Perform hypothesis testing", + "Understand probability distributions", + "Conduct statistical inference" + ], + "created_at": "2025-10-23 15:05:26.294821", + "updated_at": "2025-10-23 15:05:26.294821" + }, + { + "id": "01K897CBGPCZ67SV5B7Y8XPBJ4", + "course_code": "DS025", + "title": "Data Visualization", + "description": "Creating effective visualizations for data communication. Tools include Python matplotlib, seaborn, and Tableau.", + "credits": 3, + "difficulty_level": "beginner", + "format": "hybrid", + "department": "Data Science", + "major": "Data Science", + "prerequisites": [], + "schedule": { + "days": [ + "tuesday", + "thursday" + ], + "start_time": "18:00:00", + "end_time": "19:15:00", + "location": "Science Hall 468" + }, + "semester": "summer", + "year": 2024, + "instructor": "Veronica Price", + "max_enrollment": 22, + "current_enrollment": 34, + "tags": [ + "visualization", + "python", + "tableau", + "communication" + ], + "learning_objectives": [ + "Create effective data visualizations", + "Choose appropriate chart types", + "Use visualization tools", + "Communicate insights through visuals" + ], + "created_at": "2025-10-23 15:05:26.294876", + "updated_at": "2025-10-23 15:05:26.294876" + }, + { + "id": "01K897CBGPCZ67SV5B7Y8XPBJ5", + "course_code": "DS026", + "title": "Data Visualization", + "description": "Creating effective visualizations for data communication. Tools include Python matplotlib, seaborn, and Tableau.", + "credits": 3, + "difficulty_level": "beginner", + "format": "hybrid", + "department": "Data Science", + "major": "Data Science", + "prerequisites": [], + "schedule": { + "days": [ + "thursday" + ], + "start_time": "12:30:00", + "end_time": "15:00:00", + "location": "Science Hall 698" + }, + "semester": "fall", + "year": 2024, + "instructor": "Bruce Johnson", + "max_enrollment": 87, + "current_enrollment": 48, + "tags": [ + "visualization", + "python", + "tableau", + "communication" + ], + "learning_objectives": [ + "Create effective data visualizations", + "Choose appropriate chart types", + "Use visualization tools", + "Communicate insights through visuals" + ], + "created_at": "2025-10-23 15:05:26.294984", + "updated_at": "2025-10-23 15:05:26.294985" + }, + { + "id": "01K897CBGQYD2EPGNYKNYKJ88B", + "course_code": "DS027", + "title": "Data Visualization", + "description": "Creating effective visualizations for data communication. Tools include Python matplotlib, seaborn, and Tableau.", + "credits": 3, + "difficulty_level": "beginner", + "format": "online", + "department": "Data Science", + "major": "Data Science", + "prerequisites": [ + { + "course_code": "DS016", + "course_title": "Prerequisite Course 16", + "minimum_grade": "B-", + "can_be_concurrent": false + }, + { + "course_code": "DS017", + "course_title": "Prerequisite Course 17", + "minimum_grade": "C+", + "can_be_concurrent": false + } + ], + "schedule": { + "days": [ + "tuesday", + "thursday" + ], + "start_time": "10:00:00", + "end_time": "11:15:00", + "location": "Science Hall 159" + }, + "semester": "summer", + "year": 2024, + "instructor": "Tammie Rios", + "max_enrollment": 72, + "current_enrollment": 2, + "tags": [ + "visualization", + "python", + "tableau", + "communication" + ], + "learning_objectives": [ + "Create effective data visualizations", + "Choose appropriate chart types", + "Use visualization tools", + "Communicate insights through visuals" + ], + "created_at": "2025-10-23 15:05:26.295045", + "updated_at": "2025-10-23 15:05:26.295045" + }, + { + "id": "01K897CBGQYD2EPGNYKNYKJ88C", + "course_code": "DS028", + "title": "Data Visualization", + "description": "Creating effective visualizations for data communication. Tools include Python matplotlib, seaborn, and Tableau.", + "credits": 3, + "difficulty_level": "beginner", + "format": "hybrid", + "department": "Data Science", + "major": "Data Science", + "prerequisites": [], + "schedule": { + "days": [ + "monday", + "wednesday", + "friday" + ], + "start_time": "13:30:00", + "end_time": "14:20:00", + "location": "Engineering Building 735" + }, + "semester": "summer", + "year": 2024, + "instructor": "Lisa Smith", + "max_enrollment": 34, + "current_enrollment": 66, + "tags": [ + "visualization", + "python", + "tableau", + "communication" + ], + "learning_objectives": [ + "Create effective data visualizations", + "Choose appropriate chart types", + "Use visualization tools", + "Communicate insights through visuals" + ], + "created_at": "2025-10-23 15:05:26.295102", + "updated_at": "2025-10-23 15:05:26.295102" + }, + { + "id": "01K897CBGQYD2EPGNYKNYKJ88D", + "course_code": "DS029", + "title": "Statistics for Data Science", + "description": "Statistical methods and probability theory for data analysis. Hypothesis testing, regression, and statistical inference.", + "credits": 4, + "difficulty_level": "intermediate", + "format": "in_person", + "department": "Data Science", + "major": "Data Science", + "prerequisites": [ + { + "course_code": "DS002", + "course_title": "Prerequisite Course 2", + "minimum_grade": "C+", + "can_be_concurrent": false + } + ], + "schedule": { + "days": [ + "thursday" + ], + "start_time": "16:30:00", + "end_time": "19:00:00", + "location": "Engineering Building 558" + }, + "semester": "fall", + "year": 2024, + "instructor": "Rose King", + "max_enrollment": 90, + "current_enrollment": 3, + "tags": [ + "statistics", + "probability", + "data analysis" + ], + "learning_objectives": [ + "Apply statistical methods to data", + "Perform hypothesis testing", + "Understand probability distributions", + "Conduct statistical inference" + ], + "created_at": "2025-10-23 15:05:26.295159", + "updated_at": "2025-10-23 15:05:26.295159" + }, + { + "id": "01K897CBGQYD2EPGNYKNYKJ88E", + "course_code": "DS030", + "title": "Data Visualization", + "description": "Creating effective visualizations for data communication. Tools include Python matplotlib, seaborn, and Tableau.", + "credits": 3, + "difficulty_level": "beginner", + "format": "online", + "department": "Data Science", + "major": "Data Science", + "prerequisites": [], + "schedule": { + "days": [ + "tuesday", + "thursday" + ], + "start_time": "15:00:00", + "end_time": "16:15:00", + "location": "Science Hall 626" + }, + "semester": "summer", + "year": 2024, + "instructor": "Rhonda Baldwin", + "max_enrollment": 73, + "current_enrollment": 22, + "tags": [ + "visualization", + "python", + "tableau", + "communication" + ], + "learning_objectives": [ + "Create effective data visualizations", + "Choose appropriate chart types", + "Use visualization tools", + "Communicate insights through visuals" + ], + "created_at": "2025-10-23 15:05:26.295213", + "updated_at": "2025-10-23 15:05:26.295213" + }, + { + "id": "01K897CBGQYD2EPGNYKNYKJ88F", + "course_code": "MATH031", + "title": "Linear Algebra", + "description": "Vector spaces, matrices, eigenvalues, and linear transformations. Essential for data science and engineering.", + "credits": 3, + "difficulty_level": "intermediate", + "format": "hybrid", + "department": "Mathematics", + "major": "Mathematics", + "prerequisites": [], + "schedule": { + "days": [ + "thursday" + ], + "start_time": "08:00:00", + "end_time": "10:30:00", + "location": "Engineering Building 923" + }, + "semester": "fall", + "year": 2024, + "instructor": "Meghan Perkins", + "max_enrollment": 77, + "current_enrollment": 51, + "tags": [ + "linear algebra", + "matrices", + "vectors" + ], + "learning_objectives": [ + "Perform matrix operations", + "Understand vector spaces", + "Calculate eigenvalues and eigenvectors", + "Apply linear algebra to problems" + ], + "created_at": "2025-10-23 15:05:26.295268", + "updated_at": "2025-10-23 15:05:26.295268" + }, + { + "id": "01K897CBGQYD2EPGNYKNYKJ88G", + "course_code": "MATH032", + "title": "Linear Algebra", + "description": "Vector spaces, matrices, eigenvalues, and linear transformations. Essential for data science and engineering.", + "credits": 3, + "difficulty_level": "intermediate", + "format": "online", + "department": "Mathematics", + "major": "Mathematics", + "prerequisites": [], + "schedule": { + "days": [ + "tuesday", + "thursday" + ], + "start_time": "15:30:00", + "end_time": "16:45:00", + "location": "Engineering Building 706" + }, + "semester": "summer", + "year": 2024, + "instructor": "Mr. Jason Holland", + "max_enrollment": 36, + "current_enrollment": 7, + "tags": [ + "linear algebra", + "matrices", + "vectors" + ], + "learning_objectives": [ + "Perform matrix operations", + "Understand vector spaces", + "Calculate eigenvalues and eigenvectors", + "Apply linear algebra to problems" + ], + "created_at": "2025-10-23 15:05:26.295325", + "updated_at": "2025-10-23 15:05:26.295325" + }, + { + "id": "01K897CBGQYD2EPGNYKNYKJ88H", + "course_code": "MATH033", + "title": "Calculus I", + "description": "Differential calculus including limits, derivatives, and applications. Foundation for advanced mathematics.", + "credits": 4, + "difficulty_level": "intermediate", + "format": "hybrid", + "department": "Mathematics", + "major": "Mathematics", + "prerequisites": [], + "schedule": { + "days": [ + "thursday" + ], + "start_time": "18:30:00", + "end_time": "21:00:00", + "location": "Science Hall 573" + }, + "semester": "summer", + "year": 2024, + "instructor": "Michaela King", + "max_enrollment": 75, + "current_enrollment": 8, + "tags": [ + "calculus", + "derivatives", + "limits" + ], + "learning_objectives": [ + "Understand limits and continuity", + "Calculate derivatives", + "Apply calculus to real problems", + "Understand fundamental theorem" + ], + "created_at": "2025-10-23 15:05:26.295383", + "updated_at": "2025-10-23 15:05:26.295383" + }, + { + "id": "01K897CBGQYD2EPGNYKNYKJ88J", + "course_code": "MATH034", + "title": "Calculus I", + "description": "Differential calculus including limits, derivatives, and applications. Foundation for advanced mathematics.", + "credits": 4, + "difficulty_level": "intermediate", + "format": "in_person", + "department": "Mathematics", + "major": "Mathematics", + "prerequisites": [], + "schedule": { + "days": [ + "monday", + "wednesday" + ], + "start_time": "13:00:00", + "end_time": "14:15:00", + "location": "Science Hall 411" + }, + "semester": "fall", + "year": 2024, + "instructor": "Trevor Rose", + "max_enrollment": 91, + "current_enrollment": 20, + "tags": [ + "calculus", + "derivatives", + "limits" + ], + "learning_objectives": [ + "Understand limits and continuity", + "Calculate derivatives", + "Apply calculus to real problems", + "Understand fundamental theorem" + ], + "created_at": "2025-10-23 15:05:26.295437", + "updated_at": "2025-10-23 15:05:26.295437" + }, + { + "id": "01K897CBGQYD2EPGNYKNYKJ88K", + "course_code": "MATH035", + "title": "Calculus I", + "description": "Differential calculus including limits, derivatives, and applications. Foundation for advanced mathematics.", + "credits": 4, + "difficulty_level": "intermediate", + "format": "online", + "department": "Mathematics", + "major": "Mathematics", + "prerequisites": [ + { + "course_code": "MATH022", + "course_title": "Prerequisite Course 22", + "minimum_grade": "C", + "can_be_concurrent": true + }, + { + "course_code": "MATH005", + "course_title": "Prerequisite Course 5", + "minimum_grade": "B-", + "can_be_concurrent": true + } + ], + "schedule": { + "days": [ + "monday", + "wednesday" + ], + "start_time": "17:30:00", + "end_time": "18:45:00", + "location": "Science Hall 114" + }, + "semester": "fall", + "year": 2024, + "instructor": "Christine Poole", + "max_enrollment": 55, + "current_enrollment": 67, + "tags": [ + "calculus", + "derivatives", + "limits" + ], + "learning_objectives": [ + "Understand limits and continuity", + "Calculate derivatives", + "Apply calculus to real problems", + "Understand fundamental theorem" + ], + "created_at": "2025-10-23 15:05:26.295495", + "updated_at": "2025-10-23 15:05:26.295495" + }, + { + "id": "01K897CBGQYD2EPGNYKNYKJ88M", + "course_code": "MATH036", + "title": "Linear Algebra", + "description": "Vector spaces, matrices, eigenvalues, and linear transformations. Essential for data science and engineering.", + "credits": 3, + "difficulty_level": "intermediate", + "format": "online", + "department": "Mathematics", + "major": "Mathematics", + "prerequisites": [], + "schedule": { + "days": [ + "monday", + "wednesday" + ], + "start_time": "08:30:00", + "end_time": "09:45:00", + "location": "Technology Center 280" + }, + "semester": "winter", + "year": 2024, + "instructor": "Joel Barnett DDS", + "max_enrollment": 60, + "current_enrollment": 41, + "tags": [ + "linear algebra", + "matrices", + "vectors" + ], + "learning_objectives": [ + "Perform matrix operations", + "Understand vector spaces", + "Calculate eigenvalues and eigenvectors", + "Apply linear algebra to problems" + ], + "created_at": "2025-10-23 15:05:26.295552", + "updated_at": "2025-10-23 15:05:26.295552" + }, + { + "id": "01K897CBGQYD2EPGNYKNYKJ88N", + "course_code": "MATH037", + "title": "Linear Algebra", + "description": "Vector spaces, matrices, eigenvalues, and linear transformations. Essential for data science and engineering.", + "credits": 3, + "difficulty_level": "intermediate", + "format": "online", + "department": "Mathematics", + "major": "Mathematics", + "prerequisites": [ + { + "course_code": "MATH017", + "course_title": "Prerequisite Course 17", + "minimum_grade": "B-", + "can_be_concurrent": false + } + ], + "schedule": { + "days": [ + "thursday" + ], + "start_time": "15:00:00", + "end_time": "17:30:00", + "location": "Business Complex 413" + }, + "semester": "fall", + "year": 2024, + "instructor": "Ashley Ramirez MD", + "max_enrollment": 33, + "current_enrollment": 46, + "tags": [ + "linear algebra", + "matrices", + "vectors" + ], + "learning_objectives": [ + "Perform matrix operations", + "Understand vector spaces", + "Calculate eigenvalues and eigenvectors", + "Apply linear algebra to problems" + ], + "created_at": "2025-10-23 15:05:26.295611", + "updated_at": "2025-10-23 15:05:26.295611" + }, + { + "id": "01K897CBGQYD2EPGNYKNYKJ88P", + "course_code": "MATH038", + "title": "Linear Algebra", + "description": "Vector spaces, matrices, eigenvalues, and linear transformations. Essential for data science and engineering.", + "credits": 3, + "difficulty_level": "intermediate", + "format": "in_person", + "department": "Mathematics", + "major": "Mathematics", + "prerequisites": [], + "schedule": { + "days": [ + "tuesday", + "thursday" + ], + "start_time": "18:00:00", + "end_time": "19:15:00", + "location": "Engineering Building 274" + }, + "semester": "summer", + "year": 2024, + "instructor": "Krystal Thomas", + "max_enrollment": 76, + "current_enrollment": 48, + "tags": [ + "linear algebra", + "matrices", + "vectors" + ], + "learning_objectives": [ + "Perform matrix operations", + "Understand vector spaces", + "Calculate eigenvalues and eigenvectors", + "Apply linear algebra to problems" + ], + "created_at": "2025-10-23 15:05:26.295671", + "updated_at": "2025-10-23 15:05:26.295671" + }, + { + "id": "01K897CBGQYD2EPGNYKNYKJ88Q", + "course_code": "MATH039", + "title": "Calculus I", + "description": "Differential calculus including limits, derivatives, and applications. Foundation for advanced mathematics.", + "credits": 4, + "difficulty_level": "intermediate", + "format": "hybrid", + "department": "Mathematics", + "major": "Mathematics", + "prerequisites": [], + "schedule": { + "days": [ + "thursday" + ], + "start_time": "11:00:00", + "end_time": "13:30:00", + "location": "Engineering Building 407" + }, + "semester": "summer", + "year": 2024, + "instructor": "Steven Martin", + "max_enrollment": 80, + "current_enrollment": 9, + "tags": [ + "calculus", + "derivatives", + "limits" + ], + "learning_objectives": [ + "Understand limits and continuity", + "Calculate derivatives", + "Apply calculus to real problems", + "Understand fundamental theorem" + ], + "created_at": "2025-10-23 15:05:26.295724", + "updated_at": "2025-10-23 15:05:26.295724" + }, + { + "id": "01K897CBGQYD2EPGNYKNYKJ88R", + "course_code": "MATH040", + "title": "Calculus I", + "description": "Differential calculus including limits, derivatives, and applications. Foundation for advanced mathematics.", + "credits": 4, + "difficulty_level": "intermediate", + "format": "hybrid", + "department": "Mathematics", + "major": "Mathematics", + "prerequisites": [ + { + "course_code": "MATH015", + "course_title": "Prerequisite Course 15", + "minimum_grade": "B-", + "can_be_concurrent": true + } + ], + "schedule": { + "days": [ + "monday", + "wednesday", + "friday" + ], + "start_time": "13:00:00", + "end_time": "13:50:00", + "location": "Liberal Arts Center 466" + }, + "semester": "summer", + "year": 2024, + "instructor": "Denise Rodriguez", + "max_enrollment": 42, + "current_enrollment": 43, + "tags": [ + "calculus", + "derivatives", + "limits" + ], + "learning_objectives": [ + "Understand limits and continuity", + "Calculate derivatives", + "Apply calculus to real problems", + "Understand fundamental theorem" + ], + "created_at": "2025-10-23 15:05:26.295781", + "updated_at": "2025-10-23 15:05:26.295781" + }, + { + "id": "01K897CBGQYD2EPGNYKNYKJ88S", + "course_code": "MATH041", + "title": "Linear Algebra", + "description": "Vector spaces, matrices, eigenvalues, and linear transformations. Essential for data science and engineering.", + "credits": 3, + "difficulty_level": "intermediate", + "format": "hybrid", + "department": "Mathematics", + "major": "Mathematics", + "prerequisites": [], + "schedule": { + "days": [ + "tuesday", + "thursday" + ], + "start_time": "18:30:00", + "end_time": "19:45:00", + "location": "Science Hall 849" + }, + "semester": "spring", + "year": 2024, + "instructor": "Anne Bates", + "max_enrollment": 66, + "current_enrollment": 46, + "tags": [ + "linear algebra", + "matrices", + "vectors" + ], + "learning_objectives": [ + "Perform matrix operations", + "Understand vector spaces", + "Calculate eigenvalues and eigenvectors", + "Apply linear algebra to problems" + ], + "created_at": "2025-10-23 15:05:26.295836", + "updated_at": "2025-10-23 15:05:26.295837" + }, + { + "id": "01K897CBGQYD2EPGNYKNYKJ88T", + "course_code": "MATH042", + "title": "Calculus I", + "description": "Differential calculus including limits, derivatives, and applications. Foundation for advanced mathematics.", + "credits": 4, + "difficulty_level": "intermediate", + "format": "hybrid", + "department": "Mathematics", + "major": "Mathematics", + "prerequisites": [ + { + "course_code": "MATH023", + "course_title": "Prerequisite Course 23", + "minimum_grade": "C+", + "can_be_concurrent": false + }, + { + "course_code": "MATH023", + "course_title": "Prerequisite Course 23", + "minimum_grade": "B-", + "can_be_concurrent": false + } + ], + "schedule": { + "days": [ + "monday", + "wednesday", + "friday" + ], + "start_time": "10:00:00", + "end_time": "10:50:00", + "location": "Business Complex 380" + }, + "semester": "spring", + "year": 2024, + "instructor": "Ivan Wright", + "max_enrollment": 83, + "current_enrollment": 9, + "tags": [ + "calculus", + "derivatives", + "limits" + ], + "learning_objectives": [ + "Understand limits and continuity", + "Calculate derivatives", + "Apply calculus to real problems", + "Understand fundamental theorem" + ], + "created_at": "2025-10-23 15:05:26.295894", + "updated_at": "2025-10-23 15:05:26.295894" + }, + { + "id": "01K897CBGQYD2EPGNYKNYKJ88V", + "course_code": "MATH043", + "title": "Linear Algebra", + "description": "Vector spaces, matrices, eigenvalues, and linear transformations. Essential for data science and engineering.", + "credits": 3, + "difficulty_level": "intermediate", + "format": "online", + "department": "Mathematics", + "major": "Mathematics", + "prerequisites": [], + "schedule": { + "days": [ + "tuesday", + "thursday" + ], + "start_time": "15:30:00", + "end_time": "16:45:00", + "location": "Science Hall 910" + }, + "semester": "spring", + "year": 2024, + "instructor": "Kayla Hernandez", + "max_enrollment": 62, + "current_enrollment": 44, + "tags": [ + "linear algebra", + "matrices", + "vectors" + ], + "learning_objectives": [ + "Perform matrix operations", + "Understand vector spaces", + "Calculate eigenvalues and eigenvectors", + "Apply linear algebra to problems" + ], + "created_at": "2025-10-23 15:05:26.295948", + "updated_at": "2025-10-23 15:05:26.295948" + }, + { + "id": "01K897CBGQ6HR7RJ7ZZG8BSPSG", + "course_code": "MATH044", + "title": "Linear Algebra", + "description": "Vector spaces, matrices, eigenvalues, and linear transformations. Essential for data science and engineering.", + "credits": 3, + "difficulty_level": "intermediate", + "format": "in_person", + "department": "Mathematics", + "major": "Mathematics", + "prerequisites": [], + "schedule": { + "days": [ + "tuesday" + ], + "start_time": "12:30:00", + "end_time": "15:00:00", + "location": "Engineering Building 645" + }, + "semester": "winter", + "year": 2024, + "instructor": "Michelle Hawkins", + "max_enrollment": 44, + "current_enrollment": 10, + "tags": [ + "linear algebra", + "matrices", + "vectors" + ], + "learning_objectives": [ + "Perform matrix operations", + "Understand vector spaces", + "Calculate eigenvalues and eigenvectors", + "Apply linear algebra to problems" + ], + "created_at": "2025-10-23 15:05:26.296007", + "updated_at": "2025-10-23 15:05:26.296007" + }, + { + "id": "01K897CBGR6HR7RJ7ZZG8BSPSH", + "course_code": "MATH045", + "title": "Linear Algebra", + "description": "Vector spaces, matrices, eigenvalues, and linear transformations. Essential for data science and engineering.", + "credits": 3, + "difficulty_level": "intermediate", + "format": "in_person", + "department": "Mathematics", + "major": "Mathematics", + "prerequisites": [ + { + "course_code": "MATH001", + "course_title": "Prerequisite Course 1", + "minimum_grade": "B-", + "can_be_concurrent": false + }, + { + "course_code": "MATH018", + "course_title": "Prerequisite Course 18", + "minimum_grade": "C", + "can_be_concurrent": false + } + ], + "schedule": { + "days": [ + "monday", + "wednesday", + "friday" + ], + "start_time": "15:30:00", + "end_time": "16:20:00", + "location": "Liberal Arts Center 983" + }, + "semester": "winter", + "year": 2024, + "instructor": "Antonio Hernandez", + "max_enrollment": 45, + "current_enrollment": 17, + "tags": [ + "linear algebra", + "matrices", + "vectors" + ], + "learning_objectives": [ + "Perform matrix operations", + "Understand vector spaces", + "Calculate eigenvalues and eigenvectors", + "Apply linear algebra to problems" + ], + "created_at": "2025-10-23 15:05:26.296064", + "updated_at": "2025-10-23 15:05:26.296064" + }, + { + "id": "01K897CBGR6HR7RJ7ZZG8BSPSJ", + "course_code": "BUS046", + "title": "Principles of Management", + "description": "Fundamental management concepts including planning, organizing, leading, and controlling organizational resources.", + "credits": 3, + "difficulty_level": "beginner", + "format": "online", + "department": "Business", + "major": "Business Administration", + "prerequisites": [], + "schedule": { + "days": [ + "monday", + "wednesday" + ], + "start_time": "09:00:00", + "end_time": "10:15:00", + "location": "Science Hall 956" + }, + "semester": "winter", + "year": 2024, + "instructor": "Angela Jenkins", + "max_enrollment": 86, + "current_enrollment": 17, + "tags": [ + "management", + "leadership", + "organization" + ], + "learning_objectives": [ + "Understand management principles", + "Apply leadership concepts", + "Organize teams effectively", + "Control organizational resources" + ], + "created_at": "2025-10-23 15:05:26.296119", + "updated_at": "2025-10-23 15:05:26.296119" + }, + { + "id": "01K897CBGR6HR7RJ7ZZG8BSPSK", + "course_code": "BUS047", + "title": "Principles of Management", + "description": "Fundamental management concepts including planning, organizing, leading, and controlling organizational resources.", + "credits": 3, + "difficulty_level": "beginner", + "format": "hybrid", + "department": "Business", + "major": "Business Administration", + "prerequisites": [], + "schedule": { + "days": [ + "monday", + "wednesday", + "friday" + ], + "start_time": "08:30:00", + "end_time": "09:20:00", + "location": "Science Hall 205" + }, + "semester": "fall", + "year": 2024, + "instructor": "Valerie Smith", + "max_enrollment": 47, + "current_enrollment": 20, + "tags": [ + "management", + "leadership", + "organization" + ], + "learning_objectives": [ + "Understand management principles", + "Apply leadership concepts", + "Organize teams effectively", + "Control organizational resources" + ], + "created_at": "2025-10-23 15:05:26.296173", + "updated_at": "2025-10-23 15:05:26.296173" + }, + { + "id": "01K897CBGR6HR7RJ7ZZG8BSPSM", + "course_code": "BUS048", + "title": "Principles of Management", + "description": "Fundamental management concepts including planning, organizing, leading, and controlling organizational resources.", + "credits": 3, + "difficulty_level": "beginner", + "format": "hybrid", + "department": "Business", + "major": "Business Administration", + "prerequisites": [], + "schedule": { + "days": [ + "monday", + "wednesday", + "friday" + ], + "start_time": "12:30:00", + "end_time": "13:20:00", + "location": "Technology Center 244" + }, + "semester": "winter", + "year": 2024, + "instructor": "Adam Wilson", + "max_enrollment": 64, + "current_enrollment": 65, + "tags": [ + "management", + "leadership", + "organization" + ], + "learning_objectives": [ + "Understand management principles", + "Apply leadership concepts", + "Organize teams effectively", + "Control organizational resources" + ], + "created_at": "2025-10-23 15:05:26.296227", + "updated_at": "2025-10-23 15:05:26.296227" + }, + { + "id": "01K897CBGR6HR7RJ7ZZG8BSPSN", + "course_code": "BUS049", + "title": "Principles of Management", + "description": "Fundamental management concepts including planning, organizing, leading, and controlling organizational resources.", + "credits": 3, + "difficulty_level": "beginner", + "format": "in_person", + "department": "Business", + "major": "Business Administration", + "prerequisites": [], + "schedule": { + "days": [ + "monday", + "wednesday" + ], + "start_time": "14:00:00", + "end_time": "15:15:00", + "location": "Engineering Building 356" + }, + "semester": "winter", + "year": 2024, + "instructor": "Jillian Osborne", + "max_enrollment": 65, + "current_enrollment": 41, + "tags": [ + "management", + "leadership", + "organization" + ], + "learning_objectives": [ + "Understand management principles", + "Apply leadership concepts", + "Organize teams effectively", + "Control organizational resources" + ], + "created_at": "2025-10-23 15:05:26.296286", + "updated_at": "2025-10-23 15:05:26.296286" + }, + { + "id": "01K897CBGR6HR7RJ7ZZG8BSPSP", + "course_code": "BUS050", + "title": "Marketing Strategy", + "description": "Strategic marketing planning, market analysis, consumer behavior, and digital marketing techniques.", + "credits": 3, + "difficulty_level": "intermediate", + "format": "online", + "department": "Business", + "major": "Business Administration", + "prerequisites": [ + { + "course_code": "BUS039", + "course_title": "Prerequisite Course 39", + "minimum_grade": "B-", + "can_be_concurrent": false + }, + { + "course_code": "BUS009", + "course_title": "Prerequisite Course 9", + "minimum_grade": "C", + "can_be_concurrent": false + } + ], + "schedule": { + "days": [ + "tuesday" + ], + "start_time": "11:00:00", + "end_time": "13:30:00", + "location": "Engineering Building 485" + }, + "semester": "spring", + "year": 2024, + "instructor": "Emily Grant", + "max_enrollment": 68, + "current_enrollment": 35, + "tags": [ + "marketing", + "strategy", + "consumer behavior" + ], + "learning_objectives": [ + "Develop marketing strategies", + "Analyze market opportunities", + "Understand consumer behavior", + "Implement digital marketing" + ], + "created_at": "2025-10-23 15:05:26.296345", + "updated_at": "2025-10-23 15:05:26.296346" + }, + { + "id": "01K897CBGR6HR7RJ7ZZG8BSPSQ", + "course_code": "BUS051", + "title": "Principles of Management", + "description": "Fundamental management concepts including planning, organizing, leading, and controlling organizational resources.", + "credits": 3, + "difficulty_level": "beginner", + "format": "hybrid", + "department": "Business", + "major": "Business Administration", + "prerequisites": [], + "schedule": { + "days": [ + "monday", + "wednesday", + "friday" + ], + "start_time": "15:30:00", + "end_time": "16:20:00", + "location": "Technology Center 896" + }, + "semester": "spring", + "year": 2024, + "instructor": "Robert Weeks", + "max_enrollment": 90, + "current_enrollment": 13, + "tags": [ + "management", + "leadership", + "organization" + ], + "learning_objectives": [ + "Understand management principles", + "Apply leadership concepts", + "Organize teams effectively", + "Control organizational resources" + ], + "created_at": "2025-10-23 15:05:26.296399", + "updated_at": "2025-10-23 15:05:26.296400" + }, + { + "id": "01K897CBGR6HR7RJ7ZZG8BSPSR", + "course_code": "BUS052", + "title": "Marketing Strategy", + "description": "Strategic marketing planning, market analysis, consumer behavior, and digital marketing techniques.", + "credits": 3, + "difficulty_level": "intermediate", + "format": "online", + "department": "Business", + "major": "Business Administration", + "prerequisites": [], + "schedule": { + "days": [ + "tuesday", + "thursday" + ], + "start_time": "09:00:00", + "end_time": "10:15:00", + "location": "Business Complex 456" + }, + "semester": "fall", + "year": 2024, + "instructor": "Jose Brown", + "max_enrollment": 97, + "current_enrollment": 40, + "tags": [ + "marketing", + "strategy", + "consumer behavior" + ], + "learning_objectives": [ + "Develop marketing strategies", + "Analyze market opportunities", + "Understand consumer behavior", + "Implement digital marketing" + ], + "created_at": "2025-10-23 15:05:26.296453", + "updated_at": "2025-10-23 15:05:26.296453" + }, + { + "id": "01K897CBGR6HR7RJ7ZZG8BSPSS", + "course_code": "BUS053", + "title": "Marketing Strategy", + "description": "Strategic marketing planning, market analysis, consumer behavior, and digital marketing techniques.", + "credits": 3, + "difficulty_level": "intermediate", + "format": "in_person", + "department": "Business", + "major": "Business Administration", + "prerequisites": [], + "schedule": { + "days": [ + "tuesday", + "thursday" + ], + "start_time": "12:00:00", + "end_time": "13:15:00", + "location": "Technology Center 409" + }, + "semester": "spring", + "year": 2024, + "instructor": "Mr. Adam Jennings", + "max_enrollment": 45, + "current_enrollment": 18, + "tags": [ + "marketing", + "strategy", + "consumer behavior" + ], + "learning_objectives": [ + "Develop marketing strategies", + "Analyze market opportunities", + "Understand consumer behavior", + "Implement digital marketing" + ], + "created_at": "2025-10-23 15:05:26.296510", + "updated_at": "2025-10-23 15:05:26.296510" + }, + { + "id": "01K897CBGR6HR7RJ7ZZG8BSPST", + "course_code": "BUS054", + "title": "Principles of Management", + "description": "Fundamental management concepts including planning, organizing, leading, and controlling organizational resources.", + "credits": 3, + "difficulty_level": "beginner", + "format": "in_person", + "department": "Business", + "major": "Business Administration", + "prerequisites": [], + "schedule": { + "days": [ + "thursday" + ], + "start_time": "15:30:00", + "end_time": "18:00:00", + "location": "Business Complex 391" + }, + "semester": "fall", + "year": 2024, + "instructor": "Mallory Davidson", + "max_enrollment": 83, + "current_enrollment": 51, + "tags": [ + "management", + "leadership", + "organization" + ], + "learning_objectives": [ + "Understand management principles", + "Apply leadership concepts", + "Organize teams effectively", + "Control organizational resources" + ], + "created_at": "2025-10-23 15:05:26.296565", + "updated_at": "2025-10-23 15:05:26.296565" + }, + { + "id": "01K897CBGR6HR7RJ7ZZG8BSPSV", + "course_code": "BUS055", + "title": "Marketing Strategy", + "description": "Strategic marketing planning, market analysis, consumer behavior, and digital marketing techniques.", + "credits": 3, + "difficulty_level": "intermediate", + "format": "hybrid", + "department": "Business", + "major": "Business Administration", + "prerequisites": [ + { + "course_code": "BUS033", + "course_title": "Prerequisite Course 33", + "minimum_grade": "C+", + "can_be_concurrent": false + } + ], + "schedule": { + "days": [ + "monday", + "wednesday", + "friday" + ], + "start_time": "18:30:00", + "end_time": "19:20:00", + "location": "Business Complex 835" + }, + "semester": "summer", + "year": 2024, + "instructor": "Jennifer Barrett", + "max_enrollment": 80, + "current_enrollment": 65, + "tags": [ + "marketing", + "strategy", + "consumer behavior" + ], + "learning_objectives": [ + "Develop marketing strategies", + "Analyze market opportunities", + "Understand consumer behavior", + "Implement digital marketing" + ], + "created_at": "2025-10-23 15:05:26.296624", + "updated_at": "2025-10-23 15:05:26.296624" + }, + { + "id": "01K897CBGR6HR7RJ7ZZG8BSPSW", + "course_code": "BUS056", + "title": "Principles of Management", + "description": "Fundamental management concepts including planning, organizing, leading, and controlling organizational resources.", + "credits": 3, + "difficulty_level": "beginner", + "format": "in_person", + "department": "Business", + "major": "Business Administration", + "prerequisites": [], + "schedule": { + "days": [ + "tuesday", + "thursday" + ], + "start_time": "14:00:00", + "end_time": "15:15:00", + "location": "Technology Center 135" + }, + "semester": "winter", + "year": 2024, + "instructor": "David Jones", + "max_enrollment": 98, + "current_enrollment": 4, + "tags": [ + "management", + "leadership", + "organization" + ], + "learning_objectives": [ + "Understand management principles", + "Apply leadership concepts", + "Organize teams effectively", + "Control organizational resources" + ], + "created_at": "2025-10-23 15:05:26.296677", + "updated_at": "2025-10-23 15:05:26.296678" + }, + { + "id": "01K897CBGR6HR7RJ7ZZG8BSPSX", + "course_code": "BUS057", + "title": "Marketing Strategy", + "description": "Strategic marketing planning, market analysis, consumer behavior, and digital marketing techniques.", + "credits": 3, + "difficulty_level": "intermediate", + "format": "in_person", + "department": "Business", + "major": "Business Administration", + "prerequisites": [], + "schedule": { + "days": [ + "monday", + "wednesday" + ], + "start_time": "18:30:00", + "end_time": "19:45:00", + "location": "Technology Center 536" + }, + "semester": "summer", + "year": 2024, + "instructor": "Yvonne Bradley", + "max_enrollment": 23, + "current_enrollment": 53, + "tags": [ + "marketing", + "strategy", + "consumer behavior" + ], + "learning_objectives": [ + "Develop marketing strategies", + "Analyze market opportunities", + "Understand consumer behavior", + "Implement digital marketing" + ], + "created_at": "2025-10-23 15:05:26.296732", + "updated_at": "2025-10-23 15:05:26.296732" + }, + { + "id": "01K897CBGR6HR7RJ7ZZG8BSPSY", + "course_code": "BUS058", + "title": "Marketing Strategy", + "description": "Strategic marketing planning, market analysis, consumer behavior, and digital marketing techniques.", + "credits": 3, + "difficulty_level": "intermediate", + "format": "hybrid", + "department": "Business", + "major": "Business Administration", + "prerequisites": [], + "schedule": { + "days": [ + "monday", + "wednesday", + "friday" + ], + "start_time": "17:00:00", + "end_time": "17:50:00", + "location": "Science Hall 444" + }, + "semester": "spring", + "year": 2024, + "instructor": "Shawn Andrade", + "max_enrollment": 54, + "current_enrollment": 32, + "tags": [ + "marketing", + "strategy", + "consumer behavior" + ], + "learning_objectives": [ + "Develop marketing strategies", + "Analyze market opportunities", + "Understand consumer behavior", + "Implement digital marketing" + ], + "created_at": "2025-10-23 15:05:26.296789", + "updated_at": "2025-10-23 15:05:26.296789" + }, + { + "id": "01K897CBGR6HR7RJ7ZZG8BSPSZ", + "course_code": "BUS059", + "title": "Principles of Management", + "description": "Fundamental management concepts including planning, organizing, leading, and controlling organizational resources.", + "credits": 3, + "difficulty_level": "beginner", + "format": "in_person", + "department": "Business", + "major": "Business Administration", + "prerequisites": [], + "schedule": { + "days": [ + "thursday" + ], + "start_time": "12:30:00", + "end_time": "15:00:00", + "location": "Science Hall 834" + }, + "semester": "spring", + "year": 2024, + "instructor": "Sydney Stephens", + "max_enrollment": 100, + "current_enrollment": 32, + "tags": [ + "management", + "leadership", + "organization" + ], + "learning_objectives": [ + "Understand management principles", + "Apply leadership concepts", + "Organize teams effectively", + "Control organizational resources" + ], + "created_at": "2025-10-23 15:05:26.296843", + "updated_at": "2025-10-23 15:05:26.296843" + }, + { + "id": "01K897CBGR6HR7RJ7ZZG8BSPT0", + "course_code": "BUS060", + "title": "Marketing Strategy", + "description": "Strategic marketing planning, market analysis, consumer behavior, and digital marketing techniques.", + "credits": 3, + "difficulty_level": "intermediate", + "format": "online", + "department": "Business", + "major": "Business Administration", + "prerequisites": [], + "schedule": { + "days": [ + "monday", + "wednesday" + ], + "start_time": "18:30:00", + "end_time": "19:45:00", + "location": "Science Hall 997" + }, + "semester": "winter", + "year": 2024, + "instructor": "Daniel Walker", + "max_enrollment": 38, + "current_enrollment": 72, + "tags": [ + "marketing", + "strategy", + "consumer behavior" + ], + "learning_objectives": [ + "Develop marketing strategies", + "Analyze market opportunities", + "Understand consumer behavior", + "Implement digital marketing" + ], + "created_at": "2025-10-23 15:05:26.296897", + "updated_at": "2025-10-23 15:05:26.296897" + }, + { + "id": "01K897CBGR6HR7RJ7ZZG8BSPT1", + "course_code": "PSY061", + "title": "Introduction to Psychology", + "description": "Overview of psychological principles, research methods, and major areas of study in psychology.", + "credits": 3, + "difficulty_level": "beginner", + "format": "hybrid", + "department": "Psychology", + "major": "Psychology", + "prerequisites": [], + "schedule": { + "days": [ + "tuesday" + ], + "start_time": "18:00:00", + "end_time": "20:30:00", + "location": "Science Hall 721" + }, + "semester": "summer", + "year": 2024, + "instructor": "Patrick Wilson", + "max_enrollment": 75, + "current_enrollment": 55, + "tags": [ + "psychology", + "research methods", + "behavior" + ], + "learning_objectives": [ + "Understand psychological principles", + "Learn research methods", + "Explore areas of psychology", + "Apply psychological concepts" + ], + "created_at": "2025-10-23 15:05:26.296950", + "updated_at": "2025-10-23 15:05:26.296951" + }, + { + "id": "01K897CBGSQDCE36J9KZZMDEZG", + "course_code": "PSY062", + "title": "Cognitive Psychology", + "description": "Study of mental processes including perception, memory, thinking, and problem-solving.", + "credits": 3, + "difficulty_level": "intermediate", + "format": "in_person", + "department": "Psychology", + "major": "Psychology", + "prerequisites": [], + "schedule": { + "days": [ + "thursday" + ], + "start_time": "11:00:00", + "end_time": "13:30:00", + "location": "Liberal Arts Center 995" + }, + "semester": "spring", + "year": 2024, + "instructor": "Denise Lamb", + "max_enrollment": 30, + "current_enrollment": 80, + "tags": [ + "cognitive psychology", + "memory", + "perception" + ], + "learning_objectives": [ + "Understand cognitive processes", + "Study memory systems", + "Analyze problem-solving", + "Explore perception mechanisms" + ], + "created_at": "2025-10-23 15:05:26.297007", + "updated_at": "2025-10-23 15:05:26.297008" + }, + { + "id": "01K897CBGSQDCE36J9KZZMDEZH", + "course_code": "PSY063", + "title": "Cognitive Psychology", + "description": "Study of mental processes including perception, memory, thinking, and problem-solving.", + "credits": 3, + "difficulty_level": "intermediate", + "format": "in_person", + "department": "Psychology", + "major": "Psychology", + "prerequisites": [], + "schedule": { + "days": [ + "tuesday" + ], + "start_time": "16:00:00", + "end_time": "18:30:00", + "location": "Liberal Arts Center 598" + }, + "semester": "winter", + "year": 2024, + "instructor": "Howard Phelps", + "max_enrollment": 54, + "current_enrollment": 66, + "tags": [ + "cognitive psychology", + "memory", + "perception" + ], + "learning_objectives": [ + "Understand cognitive processes", + "Study memory systems", + "Analyze problem-solving", + "Explore perception mechanisms" + ], + "created_at": "2025-10-23 15:05:26.297061", + "updated_at": "2025-10-23 15:05:26.297061" + }, + { + "id": "01K897CBGSQDCE36J9KZZMDEZJ", + "course_code": "PSY064", + "title": "Cognitive Psychology", + "description": "Study of mental processes including perception, memory, thinking, and problem-solving.", + "credits": 3, + "difficulty_level": "intermediate", + "format": "in_person", + "department": "Psychology", + "major": "Psychology", + "prerequisites": [], + "schedule": { + "days": [ + "tuesday" + ], + "start_time": "15:30:00", + "end_time": "18:00:00", + "location": "Business Complex 605" + }, + "semester": "summer", + "year": 2024, + "instructor": "John Richardson", + "max_enrollment": 88, + "current_enrollment": 77, + "tags": [ + "cognitive psychology", + "memory", + "perception" + ], + "learning_objectives": [ + "Understand cognitive processes", + "Study memory systems", + "Analyze problem-solving", + "Explore perception mechanisms" + ], + "created_at": "2025-10-23 15:05:26.297115", + "updated_at": "2025-10-23 15:05:26.297115" + }, + { + "id": "01K897CBGSQDCE36J9KZZMDEZK", + "course_code": "PSY065", + "title": "Introduction to Psychology", + "description": "Overview of psychological principles, research methods, and major areas of study in psychology.", + "credits": 3, + "difficulty_level": "beginner", + "format": "online", + "department": "Psychology", + "major": "Psychology", + "prerequisites": [], + "schedule": { + "days": [ + "tuesday" + ], + "start_time": "08:30:00", + "end_time": "11:00:00", + "location": "Liberal Arts Center 914" + }, + "semester": "summer", + "year": 2024, + "instructor": "Brian Mcconnell", + "max_enrollment": 53, + "current_enrollment": 33, + "tags": [ + "psychology", + "research methods", + "behavior" + ], + "learning_objectives": [ + "Understand psychological principles", + "Learn research methods", + "Explore areas of psychology", + "Apply psychological concepts" + ], + "created_at": "2025-10-23 15:05:26.297168", + "updated_at": "2025-10-23 15:05:26.297169" + }, + { + "id": "01K897CBGSQDCE36J9KZZMDEZM", + "course_code": "PSY066", + "title": "Introduction to Psychology", + "description": "Overview of psychological principles, research methods, and major areas of study in psychology.", + "credits": 3, + "difficulty_level": "beginner", + "format": "in_person", + "department": "Psychology", + "major": "Psychology", + "prerequisites": [], + "schedule": { + "days": [ + "tuesday", + "thursday" + ], + "start_time": "16:30:00", + "end_time": "17:45:00", + "location": "Science Hall 914" + }, + "semester": "summer", + "year": 2024, + "instructor": "Mr. Brandon Elliott", + "max_enrollment": 84, + "current_enrollment": 78, + "tags": [ + "psychology", + "research methods", + "behavior" + ], + "learning_objectives": [ + "Understand psychological principles", + "Learn research methods", + "Explore areas of psychology", + "Apply psychological concepts" + ], + "created_at": "2025-10-23 15:05:26.297224", + "updated_at": "2025-10-23 15:05:26.297224" + }, + { + "id": "01K897CBGSQDCE36J9KZZMDEZN", + "course_code": "PSY067", + "title": "Cognitive Psychology", + "description": "Study of mental processes including perception, memory, thinking, and problem-solving.", + "credits": 3, + "difficulty_level": "intermediate", + "format": "in_person", + "department": "Psychology", + "major": "Psychology", + "prerequisites": [], + "schedule": { + "days": [ + "tuesday", + "thursday" + ], + "start_time": "14:00:00", + "end_time": "15:15:00", + "location": "Technology Center 688" + }, + "semester": "winter", + "year": 2024, + "instructor": "Gina Mullins", + "max_enrollment": 37, + "current_enrollment": 10, + "tags": [ + "cognitive psychology", + "memory", + "perception" + ], + "learning_objectives": [ + "Understand cognitive processes", + "Study memory systems", + "Analyze problem-solving", + "Explore perception mechanisms" + ], + "created_at": "2025-10-23 15:05:26.297282", + "updated_at": "2025-10-23 15:05:26.297282" + }, + { + "id": "01K897CBGSQDCE36J9KZZMDEZP", + "course_code": "PSY068", + "title": "Introduction to Psychology", + "description": "Overview of psychological principles, research methods, and major areas of study in psychology.", + "credits": 3, + "difficulty_level": "beginner", + "format": "online", + "department": "Psychology", + "major": "Psychology", + "prerequisites": [], + "schedule": { + "days": [ + "tuesday" + ], + "start_time": "08:00:00", + "end_time": "10:30:00", + "location": "Engineering Building 414" + }, + "semester": "fall", + "year": 2024, + "instructor": "Stephen Schwartz", + "max_enrollment": 80, + "current_enrollment": 67, + "tags": [ + "psychology", + "research methods", + "behavior" + ], + "learning_objectives": [ + "Understand psychological principles", + "Learn research methods", + "Explore areas of psychology", + "Apply psychological concepts" + ], + "created_at": "2025-10-23 15:05:26.297336", + "updated_at": "2025-10-23 15:05:26.297336" + }, + { + "id": "01K897CBGSQDCE36J9KZZMDEZQ", + "course_code": "PSY069", + "title": "Introduction to Psychology", + "description": "Overview of psychological principles, research methods, and major areas of study in psychology.", + "credits": 3, + "difficulty_level": "beginner", + "format": "online", + "department": "Psychology", + "major": "Psychology", + "prerequisites": [], + "schedule": { + "days": [ + "monday", + "wednesday", + "friday" + ], + "start_time": "17:00:00", + "end_time": "17:50:00", + "location": "Business Complex 388" + }, + "semester": "winter", + "year": 2024, + "instructor": "Travis Navarro", + "max_enrollment": 65, + "current_enrollment": 31, + "tags": [ + "psychology", + "research methods", + "behavior" + ], + "learning_objectives": [ + "Understand psychological principles", + "Learn research methods", + "Explore areas of psychology", + "Apply psychological concepts" + ], + "created_at": "2025-10-23 15:05:26.297391", + "updated_at": "2025-10-23 15:05:26.297391" + }, + { + "id": "01K897CBGSQDCE36J9KZZMDEZR", + "course_code": "PSY070", + "title": "Introduction to Psychology", + "description": "Overview of psychological principles, research methods, and major areas of study in psychology.", + "credits": 3, + "difficulty_level": "beginner", + "format": "hybrid", + "department": "Psychology", + "major": "Psychology", + "prerequisites": [], + "schedule": { + "days": [ + "thursday" + ], + "start_time": "18:30:00", + "end_time": "21:00:00", + "location": "Liberal Arts Center 415" + }, + "semester": "winter", + "year": 2024, + "instructor": "Timothy Esparza", + "max_enrollment": 40, + "current_enrollment": 33, + "tags": [ + "psychology", + "research methods", + "behavior" + ], + "learning_objectives": [ + "Understand psychological principles", + "Learn research methods", + "Explore areas of psychology", + "Apply psychological concepts" + ], + "created_at": "2025-10-23 15:05:26.297446", + "updated_at": "2025-10-23 15:05:26.297447" + }, + { + "id": "01K897CBGSQDCE36J9KZZMDEZS", + "course_code": "PSY071", + "title": "Cognitive Psychology", + "description": "Study of mental processes including perception, memory, thinking, and problem-solving.", + "credits": 3, + "difficulty_level": "intermediate", + "format": "in_person", + "department": "Psychology", + "major": "Psychology", + "prerequisites": [], + "schedule": { + "days": [ + "thursday" + ], + "start_time": "12:00:00", + "end_time": "14:30:00", + "location": "Liberal Arts Center 446" + }, + "semester": "spring", + "year": 2024, + "instructor": "Melissa Butler", + "max_enrollment": 43, + "current_enrollment": 26, + "tags": [ + "cognitive psychology", + "memory", + "perception" + ], + "learning_objectives": [ + "Understand cognitive processes", + "Study memory systems", + "Analyze problem-solving", + "Explore perception mechanisms" + ], + "created_at": "2025-10-23 15:05:26.297501", + "updated_at": "2025-10-23 15:05:26.297502" + }, + { + "id": "01K897CBGSQDCE36J9KZZMDEZT", + "course_code": "PSY072", + "title": "Introduction to Psychology", + "description": "Overview of psychological principles, research methods, and major areas of study in psychology.", + "credits": 3, + "difficulty_level": "beginner", + "format": "online", + "department": "Psychology", + "major": "Psychology", + "prerequisites": [ + { + "course_code": "PSY028", + "course_title": "Prerequisite Course 28", + "minimum_grade": "C+", + "can_be_concurrent": false + }, + { + "course_code": "PSY011", + "course_title": "Prerequisite Course 11", + "minimum_grade": "C", + "can_be_concurrent": false + } + ], + "schedule": { + "days": [ + "tuesday", + "thursday" + ], + "start_time": "16:30:00", + "end_time": "17:45:00", + "location": "Science Hall 515" + }, + "semester": "fall", + "year": 2024, + "instructor": "Lisa Jones", + "max_enrollment": 93, + "current_enrollment": 63, + "tags": [ + "psychology", + "research methods", + "behavior" + ], + "learning_objectives": [ + "Understand psychological principles", + "Learn research methods", + "Explore areas of psychology", + "Apply psychological concepts" + ], + "created_at": "2025-10-23 15:05:26.297560", + "updated_at": "2025-10-23 15:05:26.297560" + }, + { + "id": "01K897CBGSQDCE36J9KZZMDEZV", + "course_code": "PSY073", + "title": "Introduction to Psychology", + "description": "Overview of psychological principles, research methods, and major areas of study in psychology.", + "credits": 3, + "difficulty_level": "beginner", + "format": "in_person", + "department": "Psychology", + "major": "Psychology", + "prerequisites": [], + "schedule": { + "days": [ + "monday", + "wednesday", + "friday" + ], + "start_time": "15:00:00", + "end_time": "15:50:00", + "location": "Science Hall 808" + }, + "semester": "spring", + "year": 2024, + "instructor": "James Roth", + "max_enrollment": 44, + "current_enrollment": 43, + "tags": [ + "psychology", + "research methods", + "behavior" + ], + "learning_objectives": [ + "Understand psychological principles", + "Learn research methods", + "Explore areas of psychology", + "Apply psychological concepts" + ], + "created_at": "2025-10-23 15:05:26.297615", + "updated_at": "2025-10-23 15:05:26.297615" + }, + { + "id": "01K897CBGSQDCE36J9KZZMDEZW", + "course_code": "PSY074", + "title": "Cognitive Psychology", + "description": "Study of mental processes including perception, memory, thinking, and problem-solving.", + "credits": 3, + "difficulty_level": "intermediate", + "format": "online", + "department": "Psychology", + "major": "Psychology", + "prerequisites": [], + "schedule": { + "days": [ + "monday", + "wednesday" + ], + "start_time": "17:30:00", + "end_time": "18:45:00", + "location": "Liberal Arts Center 978" + }, + "semester": "winter", + "year": 2024, + "instructor": "Adam Wells", + "max_enrollment": 67, + "current_enrollment": 36, + "tags": [ + "cognitive psychology", + "memory", + "perception" + ], + "learning_objectives": [ + "Understand cognitive processes", + "Study memory systems", + "Analyze problem-solving", + "Explore perception mechanisms" + ], + "created_at": "2025-10-23 15:05:26.297669", + "updated_at": "2025-10-23 15:05:26.297669" + }, + { + "id": "01K897CBGSQDCE36J9KZZMDEZX", + "course_code": "PSY075", + "title": "Cognitive Psychology", + "description": "Study of mental processes including perception, memory, thinking, and problem-solving.", + "credits": 3, + "difficulty_level": "intermediate", + "format": "in_person", + "department": "Psychology", + "major": "Psychology", + "prerequisites": [], + "schedule": { + "days": [ + "tuesday" + ], + "start_time": "14:30:00", + "end_time": "17:00:00", + "location": "Business Complex 160" + }, + "semester": "winter", + "year": 2024, + "instructor": "Steven Martinez", + "max_enrollment": 34, + "current_enrollment": 13, + "tags": [ + "cognitive psychology", + "memory", + "perception" + ], + "learning_objectives": [ + "Understand cognitive processes", + "Study memory systems", + "Analyze problem-solving", + "Explore perception mechanisms" + ], + "created_at": "2025-10-23 15:05:26.297722", + "updated_at": "2025-10-23 15:05:26.297722" + } + ] +} \ No newline at end of file diff --git a/python-recipes/context-engineering/reference-agent/examples/advanced_agent_example.py b/python-recipes/context-engineering/reference-agent/examples/advanced_agent_example.py new file mode 100644 index 00000000..92f1869b --- /dev/null +++ b/python-recipes/context-engineering/reference-agent/examples/advanced_agent_example.py @@ -0,0 +1,292 @@ +""" +Advanced Agent Example + +This example demonstrates patterns from all sections of the Context Engineering course: +- Section 2: System context and tools +- Section 3: Memory management +- Section 4: Optimizations (token management, retrieval strategies, tool filtering) + +This is a production-ready pattern that combines all the techniques. +""" + +import asyncio +from langchain_openai import ChatOpenAI +from langchain_core.messages import SystemMessage, HumanMessage, AIMessage + +from redis_context_course import ( + CourseManager, + MemoryClient, + create_course_tools, + create_memory_tools, + count_tokens, + estimate_token_budget, + filter_tools_by_intent, + format_context_for_llm, + create_summary_view, +) + + +class AdvancedClassAgent: + """ + Advanced class scheduling agent with all optimizations. + + Features: + - Tool filtering based on intent + - Token budget management + - Hybrid retrieval (summary + specific items) + - Memory integration + - Grounding support + """ + + def __init__( + self, + student_id: str, + session_id: str = "default_session", + model: str = "gpt-4o", + enable_tool_filtering: bool = True, + enable_memory_tools: bool = False + ): + self.student_id = student_id + self.session_id = session_id + self.llm = ChatOpenAI(model=model, temperature=0.7) + self.course_manager = CourseManager() + self.memory_client = MemoryClient( + user_id=student_id, + namespace="redis_university" + ) + + # Configuration + self.enable_tool_filtering = enable_tool_filtering + self.enable_memory_tools = enable_memory_tools + + # Create tools + self.course_tools = create_course_tools(self.course_manager) + self.memory_tools = create_memory_tools( + self.memory_client, + session_id=self.session_id, + user_id=self.student_id + ) if enable_memory_tools else [] + + # Organize tools by category (for filtering) + self.tool_groups = { + "search": self.course_tools, + "memory": self.memory_tools, + } + + # Pre-compute course catalog summary (Section 4 pattern) + self.catalog_summary = None + + async def initialize(self): + """Initialize the agent (pre-compute summaries).""" + # Create course catalog summary + all_courses = await self.course_manager.get_all_courses() + self.catalog_summary = await create_summary_view( + items=all_courses, + group_by_field="department", + max_items_per_group=5 + ) + print(f"โœ… Agent initialized with {len(all_courses)} courses") + + async def chat( + self, + user_message: str, + session_id: str, + conversation_history: list = None + ) -> tuple[str, list]: + """ + Process a user message with all optimizations. + + Args: + user_message: User's message + session_id: Session ID for working memory + conversation_history: Previous messages in this session + + Returns: + Tuple of (response, updated_conversation_history) + """ + if conversation_history is None: + conversation_history = [] + + # Step 1: Load working memory + working_memory = await self.memory_client.get_working_memory( + session_id=session_id, + model_name="gpt-4o" + ) + + # Step 2: Search long-term memory for relevant context + long_term_memories = await self.memory_client.search_memories( + query=user_message, + limit=5 + ) + + # Step 3: Build context (Section 4 pattern) + system_prompt = self._build_system_prompt(long_term_memories) + + # Step 4: Estimate token budget (Section 4 pattern) + token_budget = estimate_token_budget( + system_prompt=system_prompt, + working_memory_messages=len(working_memory.messages) if working_memory else 0, + long_term_memories=len(long_term_memories), + retrieved_context_items=0, # Will add if we do RAG + ) + + print(f"\n๐Ÿ“Š Token Budget:") + print(f" System: {token_budget['system_prompt']}") + print(f" Working Memory: {token_budget['working_memory']}") + print(f" Long-term Memory: {token_budget['long_term_memory']}") + print(f" Total: {token_budget['total_input']} tokens") + + # Step 5: Select tools based on intent (Section 4 pattern) + if self.enable_tool_filtering: + relevant_tools = filter_tools_by_intent( + query=user_message, + tool_groups=self.tool_groups, + default_group="search" + ) + print(f"\n๐Ÿ”ง Selected {len(relevant_tools)} relevant tools") + else: + relevant_tools = self.course_tools + self.memory_tools + print(f"\n๐Ÿ”ง Using all {len(relevant_tools)} tools") + + # Step 6: Bind tools and invoke LLM + llm_with_tools = self.llm.bind_tools(relevant_tools) + + # Build messages + messages = [SystemMessage(content=system_prompt)] + + # Add working memory + if working_memory and working_memory.messages: + for msg in working_memory.messages: + if msg.role == "user": + messages.append(HumanMessage(content=msg.content)) + elif msg.role == "assistant": + messages.append(AIMessage(content=msg.content)) + + # Add current message + messages.append(HumanMessage(content=user_message)) + + # Get response + response = llm_with_tools.invoke(messages) + + # Handle tool calls if any + if response.tool_calls: + print(f"\n๐Ÿ› ๏ธ Agent called {len(response.tool_calls)} tool(s)") + # In a full implementation, you'd execute tools here + # For this example, we'll just note them + for tool_call in response.tool_calls: + print(f" - {tool_call['name']}") + + # Step 7: Save to working memory (triggers automatic extraction) + conversation_history.append(HumanMessage(content=user_message)) + conversation_history.append(AIMessage(content=response.content)) + + messages_to_save = [ + {"role": "user" if isinstance(m, HumanMessage) else "assistant", "content": m.content} + for m in conversation_history + ] + + await self.memory_client.save_working_memory( + session_id=session_id, + messages=messages_to_save + ) + + return response.content, conversation_history + + def _build_system_prompt(self, long_term_memories: list) -> str: + """ + Build system prompt with all context. + + This uses the format_context_for_llm pattern from Section 4. + """ + base_instructions = """You are a helpful class scheduling agent for Redis University. +Help students find courses, check prerequisites, and plan their schedule. + +Use the available tools to search courses and check prerequisites. +Be friendly, helpful, and personalized based on what you know about the student. +""" + + # Format memories + memory_context = None + if long_term_memories: + memory_lines = [f"- {m.text}" for m in long_term_memories] + memory_context = "What you know about this student:\n" + "\n".join(memory_lines) + + # Use the formatting helper + return format_context_for_llm( + system_instructions=base_instructions, + summary_view=self.catalog_summary, + memories=memory_context + ) + + +async def main(): + """Run the advanced agent example.""" + print("=" * 80) + print("ADVANCED CLASS AGENT EXAMPLE") + print("=" * 80) + + # Initialize agent + agent = AdvancedClassAgent( + student_id="demo_student", + enable_tool_filtering=True, + enable_memory_tools=False # Set to True to give LLM control over memory + ) + + await agent.initialize() + + # Simulate a conversation + session_id = "demo_session" + conversation = [] + + queries = [ + "Hi! I'm interested in machine learning courses.", + "What are the prerequisites for CS401?", + "I've completed CS101 and CS201. Can I take CS401?", + ] + + for i, query in enumerate(queries, 1): + print(f"\n{'=' * 80}") + print(f"TURN {i}") + print(f"{'=' * 80}") + print(f"\n๐Ÿ‘ค User: {query}") + + response, conversation = await agent.chat( + user_message=query, + session_id=session_id, + conversation_history=conversation + ) + + print(f"\n๐Ÿค– Agent: {response}") + + # Small delay between turns + await asyncio.sleep(1) + + print(f"\n{'=' * 80}") + print("โœ… Conversation complete!") + print(f"{'=' * 80}") + + # Show final statistics + print("\n๐Ÿ“ˆ Final Statistics:") + print(f" Turns: {len(queries)}") + print(f" Messages in conversation: {len(conversation)}") + + # Check what was extracted to long-term memory + print("\n๐Ÿง  Checking long-term memory...") + await asyncio.sleep(2) # Wait for extraction + + memories = await agent.memory_client.search_memories( + query="", + limit=10 + ) + + if memories: + print(f" Extracted {len(memories)} memories:") + for memory in memories: + print(f" - {memory.text}") + else: + print(" No memories extracted yet (may take a moment)") + + +if __name__ == "__main__": + asyncio.run(main()) + diff --git a/python-recipes/context-engineering/reference-agent/examples/basic_usage.py b/python-recipes/context-engineering/reference-agent/examples/basic_usage.py new file mode 100644 index 00000000..5a3172e4 --- /dev/null +++ b/python-recipes/context-engineering/reference-agent/examples/basic_usage.py @@ -0,0 +1,198 @@ +#!/usr/bin/env python3 +""" +Demo script showing how to use the redis-context-course package. + +This script demonstrates the basic usage of the package components +without requiring external dependencies like Redis or OpenAI. +""" + +import asyncio +from datetime import time +from redis_context_course.models import ( + Course, StudentProfile, DifficultyLevel, CourseFormat, + Semester, DayOfWeek, CourseSchedule, Prerequisite +) + + +def demo_models(): + """Demonstrate the data models.""" + print("๐ŸŽ“ Redis Context Course - Demo") + print("=" * 50) + + print("\n๐Ÿ“š Creating a sample course:") + + # Create a course schedule + schedule = CourseSchedule( + days=[DayOfWeek.MONDAY, DayOfWeek.WEDNESDAY, DayOfWeek.FRIDAY], + start_time=time(10, 0), + end_time=time(11, 30), + location="Science Hall 101" + ) + + # Create prerequisites + prereq = Prerequisite( + course_code="CS101", + course_title="Introduction to Programming", + minimum_grade="C", + can_be_concurrent=False + ) + + # Create a course + course = Course( + course_code="CS201", + title="Data Structures and Algorithms", + description="Study of fundamental data structures and algorithms including arrays, linked lists, trees, graphs, sorting, and searching.", + credits=4, + difficulty_level=DifficultyLevel.INTERMEDIATE, + format=CourseFormat.HYBRID, + department="Computer Science", + major="Computer Science", + prerequisites=[prereq], + schedule=schedule, + semester=Semester.FALL, + year=2024, + instructor="Dr. Jane Smith", + max_enrollment=50, + current_enrollment=35, + tags=["algorithms", "data structures", "programming"], + learning_objectives=[ + "Implement common data structures", + "Analyze algorithm complexity", + "Solve problems using appropriate data structures", + "Understand time and space complexity" + ] + ) + + print(f" Course: {course.course_code} - {course.title}") + print(f" Credits: {course.credits}") + print(f" Difficulty: {course.difficulty_level.value}") + print(f" Format: {course.format.value}") + print(f" Schedule: {', '.join([day.value for day in course.schedule.days])}") + print(f" Time: {course.schedule.start_time} - {course.schedule.end_time}") + print(f" Prerequisites: {len(course.prerequisites)} required") + print(f" Enrollment: {course.current_enrollment}/{course.max_enrollment}") + + print("\n๐Ÿ‘ค Creating a student profile:") + + student = StudentProfile( + name="Alex Johnson", + email="alex.johnson@university.edu", + major="Computer Science", + year=2, + completed_courses=["CS101", "MATH101", "ENG101"], + current_courses=["CS201", "MATH201"], + interests=["machine learning", "web development", "data science"], + preferred_format=CourseFormat.ONLINE, + preferred_difficulty=DifficultyLevel.INTERMEDIATE, + max_credits_per_semester=15 + ) + + print(f" Name: {student.name}") + print(f" Major: {student.major} (Year {student.year})") + print(f" Completed: {len(student.completed_courses)} courses") + print(f" Current: {len(student.current_courses)} courses") + print(f" Interests: {', '.join(student.interests)}") + print(f" Preferences: {student.preferred_format.value}, {student.preferred_difficulty.value}") + + return course, student + + +def demo_package_info(): + """Show package information.""" + print("\n๐Ÿ“ฆ Package Information:") + + import redis_context_course + + print(f" Version: {redis_context_course.__version__}") + print(f" Author: {redis_context_course.__author__}") + print(f" Description: {redis_context_course.__description__}") + + print("\n๐Ÿ”ง Available Components:") + components = [ + ("Models", "Data structures for courses, students, and memory"), + ("MemoryManager", "Handles long-term memory (cross-session knowledge)"), + ("WorkingMemory", "Handles working memory (task-focused context)"), + ("CourseManager", "Course storage and recommendation engine"), + ("ClassAgent", "LangGraph-based conversational agent"), + ("RedisConfig", "Redis connection and index management") + ] + + for name, description in components: + available = "โœ…" if getattr(redis_context_course, name, None) is not None else "โŒ" + print(f" {available} {name}: {description}") + + print("\n๐Ÿ’ก Note: Some components require external dependencies (Redis, OpenAI)") + print(" Install with: pip install redis-context-course") + print(" Then set up Redis and OpenAI API key to use all features") + + +def demo_usage_examples(): + """Show usage examples.""" + print("\n๐Ÿ’ป Usage Examples:") + + print("\n1. Basic Model Usage:") + print("```python") + print("from redis_context_course.models import Course, DifficultyLevel") + print("") + print("# Create a course") + print("course = Course(") + print(" course_code='CS101',") + print(" title='Introduction to Programming',") + print(" difficulty_level=DifficultyLevel.BEGINNER,") + print(" # ... other fields") + print(")") + print("```") + + print("\n2. Agent Usage (requires dependencies):") + print("```python") + print("import asyncio") + print("from redis_context_course import ClassAgent") + print("") + print("async def main():") + print(" agent = ClassAgent('student_123')") + print(" response = await agent.chat('I want to learn programming')") + print(" print(response)") + print("") + print("asyncio.run(main())") + print("```") + + print("\n3. Command Line Usage:") + print("```bash") + print("# Generate sample course data") + print("generate-courses --courses-per-major 10") + print("") + print("# Ingest data into Redis") + print("ingest-courses --catalog course_catalog.json") + print("") + print("# Start interactive agent") + print("redis-class-agent --student-id your_name") + print("```") + + +def main(): + """Run the demo.""" + try: + # Demo the models + course, student = demo_models() + + # Show package info + demo_package_info() + + # Show usage examples + demo_usage_examples() + + print("\n๐ŸŽ‰ Demo completed successfully!") + print("\nNext steps:") + print("1. Install Redis 8: docker run -d --name redis -p 6379:6379 redis:8-alpine") + print("2. Set OPENAI_API_KEY environment variable") + print("3. Try the interactive agent: redis-class-agent --student-id demo") + + except Exception as e: + print(f"โŒ Demo failed: {e}") + return 1 + + return 0 + + +if __name__ == "__main__": + exit(main()) diff --git a/python-recipes/context-engineering/reference-agent/pyproject.toml b/python-recipes/context-engineering/reference-agent/pyproject.toml new file mode 100644 index 00000000..73be1811 --- /dev/null +++ b/python-recipes/context-engineering/reference-agent/pyproject.toml @@ -0,0 +1,143 @@ +[build-system] +requires = ["setuptools>=61.0", "wheel"] +build-backend = "setuptools.build_meta" + +[project] +name = "redis-context-course" +version = "1.0.0" +authors = [ + {name = "Redis AI Resources Team", email = "redis-ai@redis.com"}, +] +description = "Context Engineering with Redis - University Class Agent Reference Implementation" +readme = "README.md" +license = {text = "MIT"} +requires-python = ">=3.8" +classifiers = [ + "Development Status :: 4 - Beta", + "Intended Audience :: Developers", + "Intended Audience :: Education", + "Topic :: Software Development :: Libraries :: Python Modules", + "Topic :: Scientific/Engineering :: Artificial Intelligence", + "Topic :: Database", + "License :: OSI Approved :: MIT License", + "Programming Language :: Python :: 3", + "Programming Language :: Python :: 3.8", + "Programming Language :: Python :: 3.9", + "Programming Language :: Python :: 3.10", + "Programming Language :: Python :: 3.11", + "Programming Language :: Python :: 3.12", +] +keywords = [ + "redis", + "ai", + "context-engineering", + "langraph", + "openai", + "vector-database", + "semantic-search", + "memory-management", + "chatbot", + "recommendation-system", +] +dependencies = [ + "langgraph>=0.2.0,<0.3.0", + "langgraph-checkpoint>=1.0.0", + "langgraph-checkpoint-redis>=0.1.0", + "redis>=6.0.0", + "redisvl>=0.8.0", + "openai>=1.0.0", + "langchain>=0.2.0", + "langchain-openai>=0.1.0", + "langchain-core>=0.2.0", + "langchain-community>=0.2.0", + "pydantic>=1.8.0,<3.0.0", + "python-dotenv>=1.0.0", + "click>=8.0.0", + "rich>=13.0.0", + "faker>=20.0.0", + "pandas>=2.0.0", + "numpy>=1.24.0", + "tiktoken>=0.5.0", + "python-ulid>=3.0.0", + "agent-memory-client>=0.12.3", +] + +[project.optional-dependencies] +dev = [ + "pytest>=7.0.0", + "pytest-asyncio>=0.21.0", + "black>=23.0.0", + "isort>=5.12.0", + "mypy>=1.5.0", + "flake8>=6.0.0", +] +docs = [ + "sphinx>=5.0.0", + "sphinx-rtd-theme>=1.0.0", + "myst-parser>=0.18.0", +] + +[project.urls] +Homepage = "https://github.com/redis-developer/redis-ai-resources" +Documentation = "https://github.com/redis-developer/redis-ai-resources/blob/main/python-recipes/context-engineering/README.md" +Repository = "https://github.com/redis-developer/redis-ai-resources.git" +"Bug Reports" = "https://github.com/redis-developer/redis-ai-resources/issues" + +[project.scripts] +redis-class-agent = "redis_context_course.cli:main" +generate-courses = "redis_context_course.scripts.generate_courses:main" +ingest-courses = "redis_context_course.scripts.ingest_courses:main" + +[tool.setuptools.packages.find] +where = ["."] +include = ["redis_context_course*"] + +[tool.setuptools.package-data] +redis_context_course = ["data/*.json", "templates/*.txt"] + +[tool.black] +line-length = 88 +target-version = ['py38'] +include = '\.pyi?$' +extend-exclude = ''' +/( + # directories + \.eggs + | \.git + | \.hg + | \.mypy_cache + | \.tox + | \.venv + | build + | dist +)/ +''' + +[tool.isort] +profile = "black" +multi_line_output = 3 +line_length = 88 +known_first_party = ["redis_context_course"] + +[tool.mypy] +python_version = "3.8" +warn_return_any = true +warn_unused_configs = true +disallow_untyped_defs = true +disallow_incomplete_defs = true +check_untyped_defs = true +disallow_untyped_decorators = true +no_implicit_optional = true +warn_redundant_casts = true +warn_unused_ignores = true +warn_no_return = true +warn_unreachable = true +strict_equality = true + +[tool.pytest.ini_options] +testpaths = ["tests"] +python_files = ["test_*.py"] +python_classes = ["Test*"] +python_functions = ["test_*"] +addopts = "-v --tb=short" +asyncio_mode = "auto" diff --git a/python-recipes/context-engineering/reference-agent/redis_context_course/__init__.py b/python-recipes/context-engineering/reference-agent/redis_context_course/__init__.py new file mode 100644 index 00000000..d78bddda --- /dev/null +++ b/python-recipes/context-engineering/reference-agent/redis_context_course/__init__.py @@ -0,0 +1,126 @@ +""" +Redis Context Course - Context Engineering Reference Implementation + +This package provides a complete reference implementation of a context-aware +AI agent for university course recommendations and academic planning. + +The agent demonstrates key context engineering concepts: +- System context management +- Working memory and long-term memory (via Redis Agent Memory Server) +- Tool integration and usage +- Semantic search and retrieval +- Personalized recommendations + +Main Components: +- agent: LangGraph-based agent implementation +- models: Data models for courses and students +- memory_client: Interface to Redis Agent Memory Server +- course_manager: Course storage and recommendation engine +- redis_config: Redis configuration and connections +- cli: Command-line interface + +Installation: + pip install redis-context-course agent-memory-server + +Usage: + from redis_context_course import ClassAgent, MemoryClient + + # Initialize agent (uses Agent Memory Server) + agent = ClassAgent("student_id") + + # Chat with agent + response = await agent.chat("I'm interested in machine learning courses") + +Command Line Tools: + redis-class-agent --student-id your_name + generate-courses --courses-per-major 15 + ingest-courses --catalog course_catalog.json +""" + +# Import core models (these have minimal dependencies) +from .models import ( + Course, Major, StudentProfile, + CourseRecommendation, AgentResponse, Prerequisite, + CourseSchedule, DifficultyLevel, CourseFormat, + Semester, DayOfWeek +) + +# Import agent components +from .agent import ClassAgent, AgentState +from .augmented_agent import AugmentedClassAgent + + +# Import memory client directly from agent_memory_client +from agent_memory_client import MemoryAPIClient as MemoryClient +from agent_memory_client import MemoryClientConfig +from .course_manager import CourseManager +from .redis_config import RedisConfig, redis_config + +# Import tools (used in notebooks) +from .tools import ( + create_course_tools, + create_memory_tools, + select_tools_by_keywords +) + +# Import optimization helpers (from Section 4) +from .optimization_helpers import ( + count_tokens, + estimate_token_budget, + hybrid_retrieval, + create_summary_view, + create_user_profile_view, + filter_tools_by_intent, + classify_intent_with_llm, + extract_references, + format_context_for_llm +) + +__version__ = "1.0.0" +__author__ = "Redis AI Resources Team" +__email__ = "redis-ai@redis.com" +__license__ = "MIT" +__description__ = "Context Engineering with Redis - University Class Agent Reference Implementation" + +__all__ = [ + # Core classes + "ClassAgent", + "AugmentedClassAgent", + "AgentState", + "MemoryClient", + "MemoryClientConfig", + "CourseManager", + "RedisConfig", + "redis_config", + + # Data models + "Course", + "Major", + "StudentProfile", + "CourseRecommendation", + "AgentResponse", + "Prerequisite", + "CourseSchedule", + + # Enums + "DifficultyLevel", + "CourseFormat", + "Semester", + "DayOfWeek", + + # Tools (for notebooks) + "create_course_tools", + "create_memory_tools", + "select_tools_by_keywords", + + # Optimization helpers (Section 4) + "count_tokens", + "estimate_token_budget", + "hybrid_retrieval", + "create_summary_view", + "create_user_profile_view", + "filter_tools_by_intent", + "classify_intent_with_llm", + "extract_references", + "format_context_for_llm", +] diff --git a/python-recipes/context-engineering/reference-agent/redis_context_course/agent.py b/python-recipes/context-engineering/reference-agent/redis_context_course/agent.py new file mode 100644 index 00000000..e2e0e183 --- /dev/null +++ b/python-recipes/context-engineering/reference-agent/redis_context_course/agent.py @@ -0,0 +1,996 @@ +""" +LangGraph agent implementation for the Redis University Class Agent. + +This module implements the main agent logic using LangGraph for workflow orchestration, +with Redis Agent Memory Server for memory management. + +Memory Architecture: +- LangGraph Checkpointer (Redis): Low-level graph state persistence for resuming execution +- Working Memory (Agent Memory Server): Session-scoped conversation and task context + * Automatically extracts important facts to long-term storage + * Loaded at start of conversation turn, saved at end +- Long-term Memory (Agent Memory Server): Cross-session knowledge (preferences, facts) + * Searchable via semantic vector search + * Accessible via tools +""" + +import os + +import json + +from typing import List, Dict, Any, Optional, Annotated +from datetime import datetime + +from langchain_core.messages import BaseMessage, HumanMessage, AIMessage, SystemMessage, ToolMessage +from langchain_core.tools import tool +from langchain_openai import ChatOpenAI +from langgraph.graph import StateGraph, END +from langgraph.graph.message import add_messages +from langgraph.prebuilt import ToolNode +from pydantic import BaseModel + +from .models import StudentProfile, CourseRecommendation, AgentResponse +from agent_memory_client import MemoryAPIClient, MemoryClientConfig +from .course_manager import CourseManager +from .redis_config import redis_config + + +class AgentState(BaseModel): + """State for the LangGraph agent.""" + messages: Annotated[List[BaseMessage], add_messages] + student_id: str + student_profile: Optional[StudentProfile] = None + current_query: str = "" + recommendations: List[CourseRecommendation] = [] + context: Dict[str, Any] = {} + next_action: str = "respond" + + +class ClassAgent: + """Redis University Class Agent using LangGraph and Agent Memory Server.""" + + def __init__(self, student_id: str, session_id: Optional[str] = None): + self.student_id = student_id + self.session_id = session_id or f"session_{student_id}" + + # Initialize memory client with proper config + config = MemoryClientConfig( + base_url=os.getenv("AGENT_MEMORY_URL", "http://localhost:8088"), + default_namespace="redis_university" + ) + self.memory_client = MemoryAPIClient(config=config) + self.course_manager = CourseManager() + self.model_name = os.getenv("OPENAI_MODEL", "gpt-4o") + self.llm = ChatOpenAI(model=self.model_name, temperature=0.0) + + + # Build the agent graph + self.graph = self._build_graph() + + def _build_graph(self) -> StateGraph: + """ + Build the LangGraph workflow. + + The graph uses: + 1. Redis checkpointer for low-level graph state persistence (resuming nodes) + 2. Agent Memory Server for high-level memory management (working + long-term) + """ + # Define tools + tools = [ + + self._create_search_courses_tool(), + self._create_list_majors_tool(), + self._create_recommendations_tool(), + self._store_memory_tool, + self._search_memories_tool, + self._create_summarize_user_knowledge_tool(), + self._create_clear_user_memories_tool() + ] + + + + # Create tool node + tool_node = ToolNode(tools) + + # Define the graph + workflow = StateGraph(AgentState) + + # Add nodes + workflow.add_node("load_working_memory", self._load_working_memory) + workflow.add_node("retrieve_context", self._retrieve_context) + workflow.add_node("agent", self._agent_node) + workflow.add_node("tools", tool_node) + workflow.add_node("respond", self._respond_node) + workflow.add_node("save_working_memory", self._save_working_memory) + + # Define edges + workflow.set_entry_point("load_working_memory") + workflow.add_edge("load_working_memory", "retrieve_context") + workflow.add_edge("retrieve_context", "agent") + workflow.add_conditional_edges( + "agent", + self._should_use_tools, + { + "tools": "tools", + "respond": "respond" + } + ) + workflow.add_edge("tools", "agent") + workflow.add_edge("respond", "save_working_memory") + workflow.add_edge("save_working_memory", END) + + # Compile graph without Redis checkpointer + # TODO(CE-Checkpointer): Re-enable Redis checkpointer once langgraph's async + # checkpointer interface is compatible in our environment. Current versions + # raise NotImplementedError on aget_tuple via AsyncPregelLoop. Track and + # fix by upgrading langgraph (and/or using the correct async RedisSaver) + # and then switch to: workflow.compile(checkpointer=redis_config.checkpointer) + return workflow.compile() + + async def _load_working_memory(self, state: AgentState) -> AgentState: + """ + Load working memory from Agent Memory Server. + + Working memory contains: + - Conversation messages from this session + - Structured memories awaiting promotion to long-term storage + - Session-specific data + + This is the first node in the graph, loading context for the current turn. + """ + # Get or create working memory for this session + _, working_memory = await self.memory_client.get_or_create_working_memory( + session_id=self.session_id, + user_id=self.student_id, + model_name=self.model_name + ) + + # If we have working memory, add previous messages to state + if working_memory and working_memory.messages: + # Convert MemoryMessage objects to LangChain messages + for msg in working_memory.messages: + if msg.role == "user": + state.messages.append(HumanMessage(content=msg.content)) + elif msg.role == "assistant": + state.messages.append(AIMessage(content=msg.content)) + + return state + + async def _retrieve_context(self, state: AgentState) -> AgentState: + """Retrieve relevant context for the current conversation.""" + # Get the latest human message + human_messages = [msg for msg in state.messages if isinstance(msg, HumanMessage)] + if human_messages: + state.current_query = human_messages[-1].content + + # Search long-term memories for relevant context + if state.current_query: + from agent_memory_client.filters import UserId + results = await self.memory_client.search_long_term_memory( + text=state.current_query, + user_id=UserId(eq=self.student_id), + limit=5 + ) + + # Build context from memories + context = { + "preferences": [], + "goals": [], + "recent_facts": [] + } + + for memory in results.memories: + if memory.memory_type == "semantic": + if "preference" in memory.topics: + context["preferences"].append(memory.text) + elif "goal" in memory.topics: + context["goals"].append(memory.text) + else: + context["recent_facts"].append(memory.text) + + state.context = context + + + + async def _agent_node(self, state: AgentState) -> AgentState: + """Main agent reasoning node.""" + # Build system message with context + system_prompt = self._build_system_prompt(state.context) + + # Prepare messages for the LLM + messages = [SystemMessage(content=system_prompt)] + state.messages + + # Get LLM response with tools + # Always require the model to choose a tool (no code heuristics) + tools = self._get_tools() + # If we don't yet have a tool result this turn, require a tool call; otherwise allow a normal reply + has_tool_result = any(isinstance(m, ToolMessage) for m in state.messages) + try: + if not has_tool_result: + model = self.llm.bind_tools(tools, tool_choice="required", parallel_tool_calls=False) + else: + model = self.llm.bind_tools(tools, tool_choice="none", parallel_tool_calls=False) + except TypeError: + # Fallback for older/mocked LLMs that don't accept tool_choice + model = self.llm.bind_tools(tools) + response = await model.ainvoke(messages) + # Optional debug: log chosen tool + if os.getenv("AGENT_DEBUG_TOOLCALLS"): + try: + tool_calls = getattr(response, "tool_calls", None) + if tool_calls: + # LangChain ToolCall objects have .name and .args + chosen = ", ".join([f"{tc.get('name') or getattr(tc, 'name', '')}" for tc in tool_calls]) + print(f"[DEBUG] tool_choice={chosen}") + else: + # OpenAI raw additional_kwargs path + aw = getattr(response, "additional_kwargs", {}) + tc_raw = aw.get("tool_calls") + if tc_raw: + names = [] + for t in tc_raw: + fn = (t.get("function") or {}).get("name") + if fn: + names.append(fn) + if names: + print(f"[DEBUG] tool_choice={', '.join(names)}") + except Exception as _: + pass + + state.messages.append(response) + + return state + + def _should_use_tools(self, state: AgentState) -> str: + """Determine if we should run tools or generate a final response. + + + + Logic per turn: + - If a tool has already been executed after the latest user message, respond now. + - Else, if the last LLM message includes a tool call, run tools. + - Otherwise, respond. + """ + # Find index of the latest user message (this turn's query) + last_user_idx = -1 + for i, m in enumerate(state.messages): + if isinstance(m, HumanMessage): + last_user_idx = i + # If there's any ToolMessage after the latest user message, we've already executed a tool this turn + if last_user_idx != -1: + for m in state.messages[last_user_idx + 1:]: + if isinstance(m, ToolMessage): + return "respond" + # Otherwise, decide based on the last AI message having tool calls + last_message = state.messages[-1] + if hasattr(last_message, 'tool_calls') and getattr(last_message, 'tool_calls'): + return "tools" + return "respond" + + async def _respond_node(self, state: AgentState) -> AgentState: + """Generate final response.""" + # The response is already in the last message + return state + + async def _save_working_memory(self, state: AgentState) -> AgentState: + """ + Save working memory to Agent Memory Server. + + This is the final node in the graph. It saves the conversation to working memory, + and the Agent Memory Server automatically: + 1. Stores the conversation messages + 2. Extracts important facts to long-term storage + 3. Manages memory deduplication and compaction + + This demonstrates the key concept of working memory: it's persistent storage + for task-focused context that automatically promotes important information + to long-term memory. + """ + # Convert LangChain messages to simple dict format + messages = [] + for msg in state.messages: + content = getattr(msg, "content", None) + if not content: + continue + if isinstance(msg, HumanMessage): + messages.append({"role": "user", "content": content}) + elif isinstance(msg, AIMessage): + messages.append({"role": "assistant", "content": content}) + + # Save to working memory + # The Agent Memory Server will automatically extract important memories + # to long-term storage based on its configured extraction strategy + from agent_memory_client.models import WorkingMemory, MemoryMessage + + # Convert messages to MemoryMessage format + memory_messages = [MemoryMessage(**msg) for msg in messages] + + # Create WorkingMemory object + working_memory = WorkingMemory( + session_id=self.session_id, + user_id=self.student_id, + messages=memory_messages, + memories=[], + data={} + ) + + await self.memory_client.put_working_memory( + session_id=self.session_id, + memory=working_memory, + user_id=self.student_id, + model_name=self.model_name + ) + + return state + + def _build_system_prompt(self, context: Dict[str, Any]) -> str: + """Build system prompt with current context.""" + prompt = """You are a helpful Redis University Class Agent powered by Redis Agent Memory Server. + Your role is to help students find courses, plan their academic journey, and provide personalized + recommendations based on their interests and goals. + + Memory Architecture: + + 1. LangGraph Checkpointer (Redis): + - Low-level graph state persistence for resuming execution + - You don't interact with this directly + + 2. Working Memory (Agent Memory Server): + - Session-scoped, task-focused context + - Contains conversation messages and task-related data + - Automatically loaded at the start of each turn + - Automatically saved at the end of each turn + - Agent Memory Server automatically extracts important facts to long-term storage + + 3. Long-term Memory (Agent Memory Server): + - Cross-session, persistent knowledge (preferences, goals, facts) + - Searchable via semantic vector search + - You can store memories directly using the store_memory tool + - You can search memories using the search_memories tool + + You have access to tools to: + + - search_courses_tool: Search for specific courses by topic or department + - list_majors_tool: List all available majors and programs + - get_recommendations_tool: Get personalized course recommendations based on interests + - _store_memory_tool: Store important facts in long-term memory (preferences, goals, etc.) + - _search_memories_tool: Search existing long-term memories + - summarize_user_knowledge_tool: Provide comprehensive summary of what you know about the user + - clear_user_memories_tool: Clear, delete, remove, or reset stored user information when explicitly requested + + Current student context (from long-term memory):""" + + if context.get("preferences"): + prompt += f"\n\nPreferences:\n" + "\n".join(f"- {p}" for p in context['preferences']) + + if context.get("goals"): + prompt += f"\n\nGoals:\n" + "\n".join(f"- {g}" for g in context['goals']) + + if context.get("recent_facts"): + prompt += f"\n\nRecent Facts:\n" + "\n".join(f"- {f}" for f in context['recent_facts']) + + prompt += """ + + Guidelines: + - Be helpful, friendly, and encouraging + + + - Always call exactly one tool per user message. Never reply without using a tool. + After you call a tool and receive its output, produce a clear final answer to the user without calling more tools in the same turn. + + + For ALL OTHER requests, use the appropriate tools as described below. + + IMPORTANT: Use the correct tools for different user requests: + + For user profile/memory questions: + - Use summarize_user_knowledge_tool when users ask "what do you know about me", "show me my profile", "what do you remember about me" + - Use clear_user_memories_tool when users say "ignore all that", "clear my profile", "reset what you know" + - Never call clear_user_memories_tool unless the user's latest message explicitly requests clearing/resetting/deleting/erasing/forgetting their data. + - Regular requests like "recommend", "find", "list", "show" must NOT call clear_user_memories_tool. + + - Use _search_memories_tool for specific memory searches + + For academic requests: + - Use get_recommendations_tool when users express interests ("I like math") or ask for suggestions ("suggest courses", "recommend courses") + - Use search_courses_tool when users want specific courses by name or topic ("show me CS courses", "find programming classes") + - Use list_majors_tool only when users ask about available majors/programs ("what majors are available") + + For storing information: + - Use _store_memory_tool when you learn important preferences, goals, or facts about the user + - Never use _store_memory_tool to answer questions like "what do you know about me", "my history", or "show my profile". Use summarize_user_knowledge_tool instead. + + Hard constraints: + - For any query about "history", "profile", or "what do you know": you MUST call summarize_user_knowledge_tool. Do NOT call get_recommendations_tool, search_courses_tool, or list_majors_tool for these. + - Only call list_majors_tool when the user's latest message explicitly contains one of: "major", "majors", "program", "programs", "degree", "degrees". + - When the user says "more" after you recommended courses, call get_recommendations_tool again for more courses. Never switch to list_majors_tool for "more". + + + DO NOT default to search_courses_tool for everything. Choose the most appropriate tool based on the user's actual request. + + Tool selection examples (exact mappings): + - User: "what do you know about me?" -> Call summarize_user_knowledge_tool + - User: "show me my profile" -> Call summarize_user_knowledge_tool + - User: "what's my history" -> Call summarize_user_knowledge_tool + - User: "show my history" -> Call summarize_user_knowledge_tool + - User: "see my history" -> Call summarize_user_knowledge_tool + - User: "my history" -> Call summarize_user_knowledge_tool + - User: "my profile" -> Call summarize_user_knowledge_tool + + - User: "learn about my profile" -> Call summarize_user_knowledge_tool + - User: "clear my history" -> Call clear_user_memories_tool + - User: "clear my profile" -> Call clear_user_memories_tool + - User: "ignore my preferences" -> Call clear_user_memories_tool + - User: "reset what you know" -> Call clear_user_memories_tool + - User: "what majors are available" -> Call list_majors_tool + - User: "list majors" -> Call list_majors_tool + - User: "find me courses" -> Call get_recommendations_tool + - User: "help me find courses" -> Call get_recommendations_tool + - User: "suggest math courses" -> Call get_recommendations_tool + - User: "show me cs courses" -> Call search_courses_tool + - User: "find programming classes" -> Call search_courses_tool + - User: "what math courses are available" -> Call search_courses_tool + + Always prefer get_recommendations_tool when the user expresses interests ("I like X", "I'm into Y") or asks for suggestions ("suggest", "recommend"). + + + Recommendation count handling: + - If a user specifies a number (e.g., "recommend 5 math courses" or "top 10 AI courses"), call get_recommendations_tool with limit set to that number (1โ€“10). + - If a user says "more" after receiving recommendations and does not provide a number, call get_recommendations_tool with limit=5 by default. + - Keep the query/topic from the conversation context when possible (e.g., if the user previously asked for "math" then says "more", continue with math). + + + """ + + return prompt + + + + def _create_search_courses_tool(self): + """Create the search courses tool.""" + @tool + async def search_courses_tool(query: str, filters: Optional[Dict[str, Any]] = None) -> str: + """Search course catalog by topic, department, or difficulty. + + Use this tool when users ask for specific courses or subjects, or when + filtering by department, difficulty, or topic. Returns matching courses + with detailed information. + + Args: + query (str): Search terms like "programming", "CS", "beginner math". + filters (Dict[str, Any], optional): Additional filters for department, + difficulty, or other course attributes. Defaults to None. + + Returns: + str: Formatted list of courses with codes, titles, descriptions, + credits, and difficulty levels. Returns "No courses found" if + no matches. + + Examples: + Use for queries like: + - "Show me CS courses" + - "Find beginner programming classes" + - "What math courses are available" + + Note: + For listing all majors, use list_majors_tool instead. + """ + # Hybrid approach: Handle problematic abbreviations explicitly, let LLM handle the rest + if not filters: + filters = {} + + # Only handle the most problematic/ambiguous cases explicitly + problematic_mappings = { + ' ds ': 'Data Science', # Space-bounded to avoid false matches + 'ds classes': 'Data Science', + 'ds courses': 'Data Science', + } + + query_lower = query.lower() + for pattern, dept in problematic_mappings.items(): + if pattern in query_lower: + filters['department'] = dept + break + + courses = await self.course_manager.search_courses(query, filters=filters) + + if not courses: + return "No courses found matching your criteria." + + result = f"Found {len(courses)} courses:\n\n" + for course in courses[:10]: # Show more results for department searches + result += f"**{course.course_code}: {course.title}**\n" + result += f"Department: {course.department} | Credits: {course.credits} | Difficulty: {course.difficulty_level.value}\n" + result += f"Description: {course.description[:150]}...\n\n" + + return result + + return search_courses_tool + + def _create_list_majors_tool(self): + """Create the list majors tool.""" + @tool + async def list_majors_tool() -> str: + """List all university majors and degree programs. + + Use this tool when users ask about available majors, programs, or degrees, + or for general inquiries about fields of study. Returns a comprehensive + list of all academic programs offered. + + Returns: + str: Formatted list of majors with codes, departments, descriptions, + and required credits. Returns error message if majors cannot + be retrieved. + + Examples: + Use for queries like: + - "What majors are available?" + - "List all programs" + - "What can I study here?" + + Note: + For specific course searches, use search_courses_tool instead. + """ + try: + # Get all major keys from Redis + major_keys = self.course_manager.redis_client.keys("major:*") + + if not major_keys: + return "No majors found in the system." + + majors = [] + for key in major_keys: + major_data = self.course_manager.redis_client.hgetall(key) + if major_data: + major_info = { + 'name': major_data.get('name', 'Unknown'), + 'code': major_data.get('code', 'N/A'), + 'department': major_data.get('department', 'N/A'), + 'description': major_data.get('description', 'No description available'), + 'required_credits': major_data.get('required_credits', 'N/A') + } + majors.append(major_info) + + if not majors: + return "No major information could be retrieved." + + # Format the response + result = f"Available majors at Redis University ({len(majors)} total):\n\n" + for major in majors: + result += f"**{major['name']} ({major['code']})**\n" + result += f"Department: {major['department']}\n" + result += f"Required Credits: {major['required_credits']}\n" + result += f"Description: {major['description']}\n\n" + + return result + + except Exception as e: + return f"Error retrieving majors: {str(e)}" + + return list_majors_tool + + def _create_recommendations_tool(self): + """Create the recommendations tool.""" + @tool + async def get_recommendations_tool(query: str = "", limit: int = 3) -> str: + """Generate personalized course recommendations based on user interests. + + Use this tool when users express interests or ask for course suggestions. + Creates personalized recommendations with reasoning and automatically + stores user interests in long-term memory for future reference. + + Args: + query (str, optional): User interests like "math and engineering" + or "programming". Defaults to "". + limit (int, optional): Maximum number of recommendations to return. + Defaults to 3. + + Returns: + str: Personalized course recommendations with details, relevance + scores, reasoning, and prerequisite information. Returns + "No recommendations available" if none found. + + Examples: + Use for queries like: + - "I'm interested in math and engineering" + - "Recommend courses for me" + - "What should I take for data science?" + + + Handling counts: + - If the user specifies a number (e.g., "recommend 5" or "top 10"), set limit to that number (1โ€“10). + - If the user says "more" without a number, use limit=5 by default. + + Note: + Automatically stores expressed interests in long-term memory. + For general course searches, use search_courses_tool instead. + """ + # Extract interests from the query and store them + interests = [] + if query: + # Store the user's expressed interests + from agent_memory_client.models import ClientMemoryRecord + memory = ClientMemoryRecord( + text=f"Student expressed interest in: {query}", + user_id=self.student_id, + memory_type="semantic", + topics=["interests", "preferences"] + ) + await self.memory_client.create_long_term_memory([memory]) + interests = [interest.strip() for interest in query.split(" and ")] + + # Create student profile with current interests + student_profile = StudentProfile( + name=self.student_id, + email=f"{self.student_id}@university.edu", + interests=interests if interests else ["general"] + ) + + recommendations = await self.course_manager.recommend_courses( + student_profile, query, limit + ) + + if not recommendations: + return "No recommendations available at this time." + + result = f"Here are {len(recommendations)} personalized course recommendations:\n\n" + for i, rec in enumerate(recommendations, 1): + result += f"{i}. **{rec.course.course_code}: {rec.course.title}**\n" + result += f" Relevance: {rec.relevance_score:.2f} | Credits: {rec.course.credits}\n" + result += f" Reasoning: {rec.reasoning}\n" + result += f" Prerequisites met: {'Yes' if rec.prerequisites_met else 'No'}\n\n" + + return result + + return get_recommendations_tool + + @tool + async def _store_memory_tool( + self, + text: str, + memory_type: str = "semantic", + topics: Optional[List[str]] = None + ) -> str: + """Store important student information in persistent long-term memory. + + Use this tool when the user shares preferences, goals, or important facts that + should be remembered for future sessions. Avoid storing temporary conversation + details that don't need persistence. + + Args: + text (str): Information to store in memory. + memory_type (str, optional): Type of memory - "semantic" for facts, + "episodic" for events. Defaults to "semantic". + topics (List[str], optional): Tags to categorize the memory, such as + ["preferences", "courses"]. Defaults to None. + + Returns: + str: Confirmation message indicating the information was stored. + + Examples: + Store when user says: + - "I prefer online courses" + - "My goal is to become a data scientist" + - "I've completed CS101" + + Note: + This writes to persistent storage and will be available across sessions. + """ + from agent_memory_client.models import ClientMemoryRecord + + memory = ClientMemoryRecord( + text=text, + user_id=self.student_id, + memory_type=memory_type, + topics=topics or [] + ) + + await self.memory_client.create_long_term_memory([memory]) + return f"Stored in long-term memory: {text}" + + @tool + async def _search_memories_tool( + self, + query: str, + limit: int = 5 + ) -> str: + """Search stored memories using semantic search. + + Use this tool to recall previous preferences, context, or specific information + about the user. Performs semantic search across long-term memory to find + relevant stored information. + + Args: + query (str): Search terms for finding relevant memories. + limit (int, optional): Maximum number of results to return. Defaults to 5. + + Returns: + str: Formatted list of relevant memories with topics and context. + Returns "No relevant memories found" if no matches. + + Examples: + Use for queries like: + - "What are my preferences?" + - "What courses have I mentioned?" + - "Remind me of my goals" + + Note: + For comprehensive user summaries, use _summarize_user_knowledge_tool instead. + """ + from agent_memory_client.models import UserId + + results = await self.memory_client.search_long_term_memory( + text=query, + user_id=UserId(eq=self.student_id), + limit=limit + ) + + if not results.memories: + return "No relevant memories found." + + result = f"Found {len(results.memories)} relevant memories:\n\n" + for i, memory in enumerate(results.memories, 1): + result += f"{i}. {memory.text}\n" + if memory.topics: + result += f" Topics: {', '.join(memory.topics)}\n" + result += "\n" + + return result + + def _create_summarize_user_knowledge_tool(self): + """Create the user knowledge summary tool.""" + + @tool + async def summarize_user_knowledge_tool() -> str: + """Summarize what the agent knows about the user. + + Searches through long-term memory to gather all stored information about the user + and organizes it into logical categories for easy review. Use this when the user + asks about their profile, history, interests, or what you remember about them. + + Returns: + str: Comprehensive summary of user information organized by categories + (preferences, goals, interests, academic history, facts). Returns + a helpful message if no information is stored. + + + Examples: + Use when user asks: + - "What do you know about me?" + - "Tell me about my profile" + - "What are my interests and preferences?" + - "What do you remember about me?" + - "Show my history" + - "See my history" + - "Show my profile" + - "My history" + """ + try: + from agent_memory_client.filters import UserId + + + # Search long-term memories for all user information + results = await self.memory_client.search_long_term_memory( + text="", # Empty query to get all memories for this user + user_id=UserId(eq=self.student_id), + limit=50 # Get more results for comprehensive summary + ) + except Exception as e: + return f"I'm having trouble accessing your stored information right now. Error: {str(e)}" + + if not results.memories: + return "I don't have any stored information about you yet. As we interact more, I'll learn about your preferences, interests, and goals." + + # Check if user has requested a reset + reset_memories = [m for m in results.memories if m.topics and "reset" in [t.lower() for t in m.topics]] + if reset_memories: + return ("You previously requested to start fresh with your information. I don't have any current " + "stored information about your preferences or interests. Please share what you'd like me " + "to know about your academic interests and goals!") + + # Use LLM to create a comprehensive summary + return await self._create_llm_summary(results.memories) + + return summarize_user_knowledge_tool + + async def _create_llm_summary(self, memories): + """Create an LLM-based summary of user information.""" + if not memories: + return "I don't have any stored information about you yet. As we interact more, I'll learn about your preferences, interests, and goals." + + # Prepare memory texts and topics for LLM + memory_info = [] + for memory in memories: + topics_str = f" (Topics: {', '.join(memory.topics)})" if memory.topics else "" + memory_info.append(f"- {memory.text}{topics_str}") + + memories_str = "\n".join(memory_info) + + prompt = f"""Based on the following stored information about a student, create a well-organized, friendly summary of what I know about them: + +{memories_str} + +Please create a comprehensive summary that: +1. Groups related information together logically +2. Uses clear headings like "Your Interests", "Your Preferences", "Your Goals", etc. +3. Is conversational and helpful +4. Highlights the most important information +5. Uses bullet points for easy reading + +Start with "Here's what I know about you based on our interactions:" and organize the information in a way that would be most useful to the student.""" + + try: + # Use the LLM to create a summary + from langchain_core.messages import HumanMessage + + response = await self.llm.ainvoke([HumanMessage(content=prompt)]) + return response.content + + except Exception as e: + # Fallback to simple organized list if LLM fails + fallback = "Here's what I know about you:\n\n" + fallback += "\n".join([f"โ€ข {memory.text}" for memory in memories]) + fallback += f"\n\n(Note: I encountered an issue creating a detailed summary, but here's the basic information I have stored.)" + return fallback + + def _create_clear_user_memories_tool(self): + """Create the clear user memories tool.""" + + @tool + async def clear_user_memories_tool( + confirmation: str = "yes" + ) -> str: + """Clear or reset stored user information. + + Use this tool when users explicitly request to clear, reset, or "ignore" their + previously stored information. This is useful when users want to start fresh + or correct outdated information. + + If supported by the Agent Memory Server, this will: + - Delete ALL long-term memories for this user_id + - Delete ALL working-memory sessions for this user_id + + Args: + confirmation (str, optional): Confirmation that user wants to clear memories. + Must be "yes" to proceed. Defaults to "yes". + + Returns: + str: Confirmation message about the memory clearing operation. + + Examples: + Use when user says: + - "Ignore all that previous information" + - "Clear my profile" + - "Reset what you know about me" + - "Start fresh" + + Note: + + Strict usage guard: + - Only use this tool if the user's latest message explicitly includes clear/reset/erase/delete/forget/remove (e.g., "clear my history", "reset what you know"). + - Never use this tool for recommendations, search, listing majors, or any normal Q&A. + + This operation cannot be undone. Use with caution and only when + explicitly requested by the user. + """ + if confirmation.lower() != "yes": + return "Memory clearing cancelled. If you want to clear your stored information, please confirm." + + try: + # 1) Delete all long-term memories for this user + from agent_memory_client.filters import UserId + memory_ids = [] + async for mem in self.memory_client.search_all_long_term_memories( + text="", + user_id=UserId(eq=self.student_id), + batch_size=100, + ): + if getattr(mem, "memory_id", None): + memory_ids.append(mem.memory_id) + + deleted_lt = 0 + if memory_ids: + # Delete in batches to avoid huge query params + BATCH = 100 + for i in range(0, len(memory_ids), BATCH): + batch = memory_ids[i:i+BATCH] + try: + await self.memory_client.delete_long_term_memories(batch) + deleted_lt += len(batch) + except Exception: + # Continue best-effort deletion + pass + + # 2) Delete all working-memory sessions for this user + deleted_wm = 0 + try: + offset = 0 + page = await self.memory_client.list_sessions(limit=100, offset=offset, user_id=self.student_id) + while page.sessions: + + for s in page.sessions: + sid = getattr(s, "session_id", None) or s + try: + await self.memory_client.delete_working_memory(session_id=sid, user_id=self.student_id) + deleted_wm += 1 + except Exception: + pass + offset += len(page.sessions) + if len(page.sessions) < 100: + break + page = await self.memory_client.list_sessions(limit=100, offset=offset, user_id=self.student_id) + except Exception: + # Best-effort: if list_sessions isn't supported, try current session only + try: + await self.memory_client.delete_working_memory(session_id=self.session_id, user_id=self.student_id) + deleted_wm += 1 + except Exception: + pass + + if deleted_lt == 0 and deleted_wm == 0: + # Fall back: mark reset if deletion didn't occur + from agent_memory_client.models import ClientMemoryRecord + reset_memory = ClientMemoryRecord( + text="User requested to clear/reset all previous information and start fresh", + user_id=self.student_id, + memory_type="semantic", + topics=["reset", "clear", "fresh_start"] + ) + await self.memory_client.create_long_term_memory([reset_memory]) + return ( + "I couldn't remove existing data, but I marked your profile as reset. " + "I'll ignore prior information and start fresh." + ) + + # Success message summarizing deletions + parts = [] + if deleted_lt: + parts.append(f"deleted {deleted_lt} long-term memories") + if deleted_wm: + parts.append(f"cleared {deleted_wm} working-memory sessions") + summary = ", ".join(parts) + return f"Done: {summary}. We're starting fresh. What would you like me to know about your current interests and goals?" + + except Exception as e: + return f"I encountered an error while trying to clear your information: {str(e)}" + + return clear_user_memories_tool + + def _get_tools(self): + """Get list of tools for the agent.""" + return [ + + self._create_search_courses_tool(), + self._create_list_majors_tool(), + self._create_recommendations_tool(), + self._store_memory_tool, + self._search_memories_tool, + self._create_summarize_user_knowledge_tool(), + self._create_clear_user_memories_tool() + ] + + async def chat(self, message: str, thread_id: str = "default") -> str: + """Main chat interface for the agent.""" + # Create initial state + initial_state = AgentState( + messages=[HumanMessage(content=message)], + student_id=self.student_id + ) + + # Run the graph + config = {"configurable": {"thread_id": thread_id}} + result = await self.graph.ainvoke(initial_state, config) + + # Handle result structure (dict-like or object) + result_messages = [] + if isinstance(result, dict) or hasattr(result, "get"): + result_messages = result.get("messages", []) + else: + result_messages = getattr(result, "messages", []) + + # Return the last AI message + ai_messages = [msg for msg in result_messages if isinstance(msg, AIMessage)] + if ai_messages: + return ai_messages[-1].content + + return "I'm sorry, I couldn't process your request." diff --git a/python-recipes/context-engineering/reference-agent/redis_context_course/augmented_agent.py b/python-recipes/context-engineering/reference-agent/redis_context_course/augmented_agent.py new file mode 100644 index 00000000..6872e77f --- /dev/null +++ b/python-recipes/context-engineering/reference-agent/redis_context_course/augmented_agent.py @@ -0,0 +1,127 @@ +""" +AugmentedClassAgent builds on the reference ClassAgent by adding specialized tools +while preserving the original memory architecture and graph orchestration. + +This demonstrates the recommended extension pattern: inherit from ClassAgent, +override _get_tools() to append domain tools, and optionally extend the system prompt. +""" +from typing import List, Optional, Dict, Any + +from langchain_core.tools import tool + +from .agent import ClassAgent +from .models import StudentProfile + + +class AugmentedClassAgent(ClassAgent): + """Extended agent that reuses the reference ClassAgent and adds tools. + + Additions: + - get_course_details_tool: fetch structured details for a course by code + - check_prerequisites_tool: verify a student's readiness for a course + + Notes: + - We keep the original graph; only the toolset and prompt are extended. + - Tools use the same CourseManager and MemoryAPIClient as the base class. + """ + + # --------------------------- New tools --------------------------------- + @tool + async def get_course_details_tool(self, course_code: str) -> str: + """Get detailed course information by course code. + + Use this when the user asks for details like description, credits, + prerequisites, schedule, or instructor for a specific course code + (e.g., "Tell me more about CS101"). + """ + course = await self.course_manager.get_course_by_code(course_code) + if not course: + return f"No course found with code '{course_code}'." + + prereqs = ", ".join(p.course_code for p in course.prerequisites) or "None" + objectives = ", ".join(course.learning_objectives[:3]) or "-" + tags = ", ".join(course.tags[:5]) or "-" + schedule = ( + f"{course.schedule.days} {course.schedule.start_time}-{course.schedule.end_time}" + if course.schedule else "TBD" + ) + + return ( + f"{course.course_code}: {course.title}\n" + f"Department: {course.department} | Major: {course.major} | Credits: {course.credits}\n" + f"Difficulty: {course.difficulty_level.value} | Format: {course.format.value}\n" + f"Instructor: {course.instructor} | Schedule: {schedule}\n\n" + f"Description: {course.description}\n\n" + f"Prerequisites: {prereqs}\n" + f"Objectives: {objectives}\n" + f"Tags: {tags}\n" + ) + + @tool + async def check_prerequisites_tool( + self, + course_code: str, + completed: Optional[List[str]] = None, + current: Optional[List[str]] = None, + ) -> str: + """Check whether the student meets prerequisites for a course. + + Args: + course_code: Target course code (e.g., "CS301"). + completed: List of completed course codes (optional). + current: List of currently enrolled course codes (optional). + + Behavior: + - If completed/current are omitted, assumes none and reports missing prereqs. + - Returns a concise status plus any missing prerequisites. + """ + course = await self.course_manager.get_course_by_code(course_code) + if not course: + return f"No course found with code '{course_code}'." + + completed = completed or [] + current = current or [] + # Build a minimal profile for prerequisite checks + profile = StudentProfile( + name=self.student_id, + email=f"{self.student_id}@university.edu", + interests=[], + completed_courses=completed, + current_courses=current, + ) + + # Determine missing prerequisites (do not rely on private method) + missing: List[str] = [] + for prereq in course.prerequisites: + if prereq.course_code not in completed: + if not prereq.can_be_concurrent or prereq.course_code not in current: + missing.append(prereq.course_code) + + if not course.prerequisites: + return f"{course.course_code} has no prerequisites. You can enroll." + if not missing: + return f"Prerequisites for {course.course_code} are satisfied." + return ( + f"Missing prerequisites for {course.course_code}: " + + ", ".join(missing) + + ". If some of these are in progress, include them in 'current'." + ) + + # ---------------------- Extension points ------------------------------- + def _get_tools(self): + """Extend the base toolset with our augmented tools.""" + base = super()._get_tools() + # Append new tools; order can influence model choice; keep base first + return base + [self.get_course_details_tool, self.check_prerequisites_tool] + + def _build_system_prompt(self, context: Dict[str, Any]) -> str: + """Extend the base prompt with guidance for new tools.""" + prompt = super()._build_system_prompt(context) + extra = """ + +Additional tools available: +- get_course_details_tool: Use when the user asks for details about a specific course (description, credits, schedule, prerequisites, instructor). +- check_prerequisites_tool: Use to verify whether the student meets prerequisites for a course. If the student's completed/current courses are unknown, you may call get_course_details_tool first, then ask the user to share their completed/current courses in your final response. + """ + return prompt + extra + diff --git a/python-recipes/context-engineering/reference-agent/redis_context_course/cli.py b/python-recipes/context-engineering/reference-agent/redis_context_course/cli.py new file mode 100644 index 00000000..ae38fc33 --- /dev/null +++ b/python-recipes/context-engineering/reference-agent/redis_context_course/cli.py @@ -0,0 +1,168 @@ +#!/usr/bin/env python3 +""" +Command-line interface for the Redis University Class Agent. + +This CLI provides an interactive way to chat with the agent and demonstrates +the context engineering concepts in practice. +""" + +import asyncio +import os +import sys +from typing import Optional +import click +from rich.console import Console +from rich.panel import Panel +from rich.prompt import Prompt +from rich.markdown import Markdown +from dotenv import load_dotenv + +from .agent import ClassAgent +from .redis_config import redis_config + +# Load environment variables +load_dotenv() + +console = Console() + + +class ChatCLI: + """Interactive chat CLI for the Class Agent.""" + + def __init__(self, student_id: str): + self.student_id = student_id + self.agent = None + self.thread_id = "cli_session" + + async def initialize(self): + """Initialize the agent and check connections.""" + console.print("[yellow]Initializing Redis University Class Agent...[/yellow]") + + # Check Redis connection + if not redis_config.health_check(): + console.print("[red]โŒ Redis connection failed. Please check your Redis server.[/red]") + return False + + console.print("[green]โœ… Redis connection successful[/green]") + + # Initialize agent + try: + self.agent = ClassAgent(self.student_id) + console.print("[green]โœ… Agent initialized successfully[/green]") + return True + except Exception as e: + console.print(f"[red]โŒ Agent initialization failed: {e}[/red]") + return False + + async def run_chat(self): + """Run the interactive chat loop.""" + if not await self.initialize(): + return + + # Welcome message + welcome_panel = Panel( + "[bold blue]Welcome to Redis University Class Agent![/bold blue]\n\n" + "I'm here to help you find courses, plan your academic journey, and provide " + "personalized recommendations based on your interests and goals.\n\n" + "[dim]Type 'help' for commands, 'quit' to exit[/dim]", + title="๐ŸŽ“ Class Agent", + border_style="blue" + ) + console.print(welcome_panel) + + while True: + try: + # Get user input + user_input = Prompt.ask("\n[bold cyan]You[/bold cyan]") + + if user_input.lower() in ['quit', 'exit', 'bye']: + console.print("[yellow]Goodbye! Have a great day! ๐Ÿ‘‹[/yellow]") + break + + if user_input.lower() == 'help': + self.show_help() + continue + + if user_input.lower() == 'clear': + console.clear() + continue + + # Show thinking indicator + with console.status("[bold green]Agent is thinking...", spinner="dots"): + response = await self.agent.chat(user_input, self.thread_id) + + # Display agent response + agent_panel = Panel( + Markdown(response), + title="๐Ÿค– Class Agent", + border_style="green" + ) + console.print(agent_panel) + + except KeyboardInterrupt: + console.print("\n[yellow]Chat interrupted. Type 'quit' to exit.[/yellow]") + except Exception as e: + console.print(f"[red]Error: {e}[/red]") + + def show_help(self): + """Show help information.""" + help_text = """ + **Available Commands:** + + โ€ข `help` - Show this help message + โ€ข `clear` - Clear the screen + โ€ข `quit` / `exit` / `bye` - Exit the chat + + **Example Queries:** + + โ€ข "I'm interested in computer science courses" + โ€ข "What programming courses are available?" + โ€ข "I want to learn about data science" + โ€ข "Show me beginner-friendly courses" + โ€ข "I prefer online courses" + โ€ข "What are the prerequisites for CS101?" + + **Features:** + + โ€ข ๐Ÿง  **Memory**: I remember your preferences and goals + โ€ข ๐Ÿ” **Search**: I can find courses based on your interests + โ€ข ๐Ÿ’ก **Recommendations**: I provide personalized course suggestions + โ€ข ๐Ÿ“š **Context**: I understand your academic journey + """ + + help_panel = Panel( + Markdown(help_text), + title="๐Ÿ“– Help", + border_style="yellow" + ) + console.print(help_panel) + + +@click.command() +@click.option('--student-id', default='demo_student', help='Student ID for the session') +@click.option('--redis-url', help='Redis connection URL') +def main(student_id: str, redis_url: Optional[str]): + """Start the Redis University Class Agent CLI.""" + + # Set Redis URL if provided + if redis_url: + os.environ['REDIS_URL'] = redis_url + + # Check for required environment variables + if not os.getenv('OPENAI_API_KEY'): + console.print("[red]โŒ OPENAI_API_KEY environment variable is required[/red]") + console.print("[yellow]Please set your OpenAI API key:[/yellow]") + console.print("export OPENAI_API_KEY='your-api-key-here'") + sys.exit(1) + + # Start the chat + chat_cli = ChatCLI(student_id) + + try: + asyncio.run(chat_cli.run_chat()) + except KeyboardInterrupt: + console.print("\n[yellow]Goodbye! ๐Ÿ‘‹[/yellow]") + + +if __name__ == "__main__": + main() diff --git a/python-recipes/context-engineering/reference-agent/redis_context_course/course_manager.py b/python-recipes/context-engineering/reference-agent/redis_context_course/course_manager.py new file mode 100644 index 00000000..c83770c7 --- /dev/null +++ b/python-recipes/context-engineering/reference-agent/redis_context_course/course_manager.py @@ -0,0 +1,368 @@ +""" +Course management system for the Class Agent. + +This module handles course storage, retrieval, and recommendation logic +using Redis vector search for semantic course discovery. +""" + +import json +from typing import List, Optional, Dict, Any +import numpy as np + +from redisvl.query import VectorQuery, FilterQuery +from redisvl.query.filter import Tag, Num + +from .models import Course, CourseRecommendation, StudentProfile, DifficultyLevel, CourseFormat +from .redis_config import redis_config + + +class CourseManager: + """Manages course data and provides recommendation functionality.""" + + def __init__(self): + self.redis_client = redis_config.redis_client + self.vector_index = redis_config.vector_index + self.embeddings = redis_config.embeddings + + def _build_filters(self, filters: Dict[str, Any]) -> str: + """Build filter expressions for Redis queries using RedisVL filter classes.""" + if not filters: + return "" + + filter_conditions = [] + + if "department" in filters: + filter_conditions.append(Tag("department") == filters["department"]) + if "major" in filters: + filter_conditions.append(Tag("major") == filters["major"]) + if "difficulty_level" in filters: + filter_conditions.append(Tag("difficulty_level") == filters["difficulty_level"]) + if "format" in filters: + filter_conditions.append(Tag("format") == filters["format"]) + if "semester" in filters: + filter_conditions.append(Tag("semester") == filters["semester"]) + if "year" in filters: + filter_conditions.append(Num("year") == filters["year"]) + if "credits_min" in filters: + min_credits = filters["credits_min"] + max_credits = filters.get("credits_max", 10) + filter_conditions.append(Num("credits") >= min_credits) + if max_credits != min_credits: + filter_conditions.append(Num("credits") <= max_credits) + + # Combine filters with AND logic + if filter_conditions: + combined_filter = filter_conditions[0] + for condition in filter_conditions[1:]: + combined_filter = combined_filter & condition + return combined_filter + + return "" + + async def store_course(self, course: Course) -> str: + """Store a course in Redis with vector embedding.""" + # Create searchable content for embedding + content = f"{course.title} {course.description} {course.department} {course.major} {' '.join(course.tags)} {' '.join(course.learning_objectives)}" + + # Generate embedding + embedding = await self.embeddings.aembed_query(content) + + # Prepare course data for storage + course_data = { + "id": course.id, + "course_code": course.course_code, + "title": course.title, + "description": course.description, + "department": course.department, + "major": course.major, + "difficulty_level": course.difficulty_level.value, + "format": course.format.value, + "semester": course.semester.value, + "year": course.year, + "credits": course.credits, + "tags": "|".join(course.tags), + "instructor": course.instructor, + "max_enrollment": course.max_enrollment, + "current_enrollment": course.current_enrollment, + "learning_objectives": json.dumps(course.learning_objectives), + "prerequisites": json.dumps([p.model_dump() for p in course.prerequisites]), + # Use default=str to handle datetime.time serialization + "schedule": json.dumps(course.schedule.model_dump(), default=str) if course.schedule else "", + "created_at": course.created_at.timestamp(), + "updated_at": course.updated_at.timestamp(), + "content_vector": np.array(embedding, dtype=np.float32).tobytes() + } + + # Store in Redis + key = f"{redis_config.vector_index_name}:{course.id}" + self.redis_client.hset(key, mapping=course_data) + + return course.id + + async def get_course(self, course_id: str) -> Optional[Course]: + """Retrieve a course by ID.""" + key = f"{redis_config.vector_index_name}:{course_id}" + course_data = self.redis_client.hgetall(key) + + if not course_data: + return None + + return self._dict_to_course(course_data) + + async def get_course_by_code(self, course_code: str) -> Optional[Course]: + """Retrieve a course by course code.""" + query = FilterQuery( + filter_expression=Tag("course_code") == course_code, + return_fields=["id", "course_code", "title", "description", "department", "major", + "difficulty_level", "format", "semester", "year", "credits", "tags", + "instructor", "max_enrollment", "current_enrollment", "learning_objectives", + "prerequisites", "schedule", "created_at", "updated_at"] + ) + results = self.vector_index.query(query) + + if results.docs: + return self._dict_to_course(results.docs[0].__dict__) + return None + + async def get_all_courses(self) -> List[Course]: + """Retrieve all courses from the catalog.""" + # Use search with empty query to get all courses + return await self.search_courses(query="", limit=1000, similarity_threshold=0.0) + + async def search_courses( + self, + query: str, + filters: Optional[Dict[str, Any]] = None, + limit: int = 10, + similarity_threshold: float = 0.6 + ) -> List[Course]: + """Search courses using semantic similarity.""" + # Generate query embedding + query_embedding = await self.embeddings.aembed_query(query) + + # Build vector query + vector_query = VectorQuery( + vector=query_embedding, + vector_field_name="content_vector", + return_fields=["id", "course_code", "title", "description", "department", "major", + "difficulty_level", "format", "semester", "year", "credits", "tags", + "instructor", "max_enrollment", "current_enrollment", "learning_objectives", + "prerequisites", "schedule", "created_at", "updated_at"], + num_results=limit + ) + + # Apply filters using the helper method + filter_expression = self._build_filters(filters or {}) + if filter_expression: + vector_query.set_filter(filter_expression) + + # Execute search + results = self.vector_index.query(vector_query) + + # Convert results to Course objects + courses = [] + # Handle both list and object with .docs attribute + result_list = results if isinstance(results, list) else results.docs + for result in result_list: + # Handle different result formats + if isinstance(result, dict): + # Direct dictionary result + vector_score = result.get('vector_score', 1.0) + if vector_score >= similarity_threshold: + course = self._dict_to_course(result) + if course: + courses.append(course) + else: + # Object with attributes + vector_score = getattr(result, 'vector_score', 1.0) + if vector_score >= similarity_threshold: + course = self._dict_to_course(result.__dict__) + if course: + courses.append(course) + + return courses + + async def recommend_courses( + self, + student_profile: StudentProfile, + query: str = "", + limit: int = 5 + ) -> List[CourseRecommendation]: + """Generate personalized course recommendations.""" + # Build search query based on student profile and interests + search_terms = [] + + if query: + search_terms.append(query) + + if student_profile.interests: + search_terms.extend(student_profile.interests) + + if student_profile.major: + search_terms.append(student_profile.major) + + search_query = " ".join(search_terms) if search_terms else "courses" + + # Build filters based on student preferences + filters = {} + if student_profile.preferred_format: + filters["format"] = student_profile.preferred_format.value + if student_profile.preferred_difficulty: + filters["difficulty_level"] = student_profile.preferred_difficulty.value + + # Search for relevant courses + courses = await self.search_courses( + query=search_query, + filters=filters, + limit=limit * 2 # Get more to filter out completed courses + ) + + # Generate recommendations with scoring + recommendations = [] + for course in courses: + # Skip if already completed or currently enrolled + if (course.course_code in student_profile.completed_courses or + course.course_code in student_profile.current_courses): + continue + + # Check prerequisites + prerequisites_met = self._check_prerequisites(course, student_profile) + + # Calculate relevance score + relevance_score = self._calculate_relevance_score(course, student_profile, query) + + # Generate reasoning + reasoning = self._generate_reasoning(course, student_profile, relevance_score) + + recommendation = CourseRecommendation( + course=course, + relevance_score=relevance_score, + reasoning=reasoning, + prerequisites_met=prerequisites_met, + fits_schedule=True, # Simplified for now + fits_preferences=self._fits_preferences(course, student_profile) + ) + + recommendations.append(recommendation) + + if len(recommendations) >= limit: + break + + # Sort by relevance score + recommendations.sort(key=lambda x: x.relevance_score, reverse=True) + + return recommendations[:limit] + + def _dict_to_course(self, data: Dict[str, Any]) -> Optional[Course]: + """Convert Redis hash data to Course object.""" + try: + from .models import Prerequisite, CourseSchedule + + # Parse prerequisites + prerequisites = [] + if data.get("prerequisites"): + prereq_data = json.loads(data["prerequisites"]) + prerequisites = [Prerequisite(**p) for p in prereq_data] + + # Parse schedule + schedule = None + if data.get("schedule"): + schedule_data = json.loads(data["schedule"]) + if schedule_data: + schedule = CourseSchedule(**schedule_data) + + # Parse learning objectives + learning_objectives = [] + if data.get("learning_objectives"): + learning_objectives = json.loads(data["learning_objectives"]) + + course = Course( + id=data["id"], + course_code=data["course_code"], + title=data["title"], + description=data["description"], + department=data["department"], + major=data["major"], + difficulty_level=DifficultyLevel(data["difficulty_level"]), + format=CourseFormat(data["format"]), + semester=data["semester"], + year=int(data["year"]), + credits=int(data["credits"]), + tags=data["tags"].split("|") if data.get("tags") else [], + instructor=data["instructor"], + max_enrollment=int(data["max_enrollment"]), + current_enrollment=int(data["current_enrollment"]), + learning_objectives=learning_objectives, + prerequisites=prerequisites, + schedule=schedule + ) + + return course + except Exception as e: + print(f"Error converting data to Course: {e}") + return None + + def _check_prerequisites(self, course: Course, student: StudentProfile) -> bool: + """Check if student meets course prerequisites.""" + for prereq in course.prerequisites: + if prereq.course_code not in student.completed_courses: + if not prereq.can_be_concurrent or prereq.course_code not in student.current_courses: + return False + return True + + def _calculate_relevance_score(self, course: Course, student: StudentProfile, query: str) -> float: + """Calculate relevance score for a course recommendation.""" + score = 0.5 # Base score + + # Major match + if student.major and course.major.lower() == student.major.lower(): + score += 0.3 + + # Interest match + for interest in student.interests: + if (interest.lower() in course.title.lower() or + interest.lower() in course.description.lower() or + interest.lower() in " ".join(course.tags).lower()): + score += 0.1 + + # Difficulty preference + if student.preferred_difficulty and course.difficulty_level == student.preferred_difficulty: + score += 0.1 + + # Format preference + if student.preferred_format and course.format == student.preferred_format: + score += 0.1 + + # Ensure score is between 0 and 1 + return min(1.0, max(0.0, score)) + + def _fits_preferences(self, course: Course, student: StudentProfile) -> bool: + """Check if course fits student preferences.""" + if student.preferred_format and course.format != student.preferred_format: + return False + if student.preferred_difficulty and course.difficulty_level != student.preferred_difficulty: + return False + return True + + def _generate_reasoning(self, course: Course, student: StudentProfile, score: float) -> str: + """Generate human-readable reasoning for the recommendation.""" + reasons = [] + + if student.major and course.major.lower() == student.major.lower(): + reasons.append(f"matches your {student.major} major") + + matching_interests = [ + interest for interest in student.interests + if (interest.lower() in course.title.lower() or + interest.lower() in course.description.lower()) + ] + if matching_interests: + reasons.append(f"aligns with your interests in {', '.join(matching_interests)}") + + if student.preferred_difficulty and course.difficulty_level == student.preferred_difficulty: + reasons.append(f"matches your preferred {course.difficulty_level.value} difficulty level") + + if not reasons: + reasons.append("is relevant to your academic goals") + + return f"This course {', '.join(reasons)}." diff --git a/python-recipes/context-engineering/reference-agent/redis_context_course/models.py b/python-recipes/context-engineering/reference-agent/redis_context_course/models.py new file mode 100644 index 00000000..45aeb4ec --- /dev/null +++ b/python-recipes/context-engineering/reference-agent/redis_context_course/models.py @@ -0,0 +1,141 @@ +""" +Data models for the Redis University Class Agent. + +This module defines the core data structures used throughout the application, +including courses, majors, prerequisites, and student information. +""" + +from datetime import datetime, time +from enum import Enum +from typing import List, Optional, Dict, Any +from pydantic import BaseModel, Field, ConfigDict +from ulid import ULID + + +class DifficultyLevel(str, Enum): + """Course difficulty levels.""" + BEGINNER = "beginner" + INTERMEDIATE = "intermediate" + ADVANCED = "advanced" + GRADUATE = "graduate" + + +class CourseFormat(str, Enum): + """Course delivery formats.""" + IN_PERSON = "in_person" + ONLINE = "online" + HYBRID = "hybrid" + + +class Semester(str, Enum): + """Academic semesters.""" + FALL = "fall" + SPRING = "spring" + SUMMER = "summer" + WINTER = "winter" + + +class DayOfWeek(str, Enum): + """Days of the week for scheduling.""" + MONDAY = "monday" + TUESDAY = "tuesday" + WEDNESDAY = "wednesday" + THURSDAY = "thursday" + FRIDAY = "friday" + SATURDAY = "saturday" + SUNDAY = "sunday" + + +class CourseSchedule(BaseModel): + """Course schedule information.""" + days: List[DayOfWeek] + start_time: time + end_time: time + location: Optional[str] = None + + model_config = ConfigDict( + json_encoders={ + time: lambda v: v.strftime("%H:%M") + } + ) + + +class Prerequisite(BaseModel): + """Course prerequisite information.""" + course_code: str + course_title: str + minimum_grade: Optional[str] = "C" + can_be_concurrent: bool = False + + +class Course(BaseModel): + """Complete course information.""" + id: str = Field(default_factory=lambda: str(ULID())) + course_code: str # e.g., "CS101" + title: str + description: str + credits: int + difficulty_level: DifficultyLevel + format: CourseFormat + department: str + major: str + prerequisites: List[Prerequisite] = Field(default_factory=list) + schedule: Optional[CourseSchedule] = None + semester: Semester + year: int + instructor: str + max_enrollment: int + current_enrollment: int = 0 + tags: List[str] = Field(default_factory=list) + learning_objectives: List[str] = Field(default_factory=list) + created_at: datetime = Field(default_factory=datetime.now) + updated_at: datetime = Field(default_factory=datetime.now) + + +class Major(BaseModel): + """Academic major information.""" + id: str = Field(default_factory=lambda: str(ULID())) + name: str + code: str # e.g., "CS", "MATH", "ENG" + department: str + description: str + required_credits: int + core_courses: List[str] = Field(default_factory=list) # Course codes + elective_courses: List[str] = Field(default_factory=list) # Course codes + career_paths: List[str] = Field(default_factory=list) + created_at: datetime = Field(default_factory=datetime.now) + + +class StudentProfile(BaseModel): + """Student profile and preferences.""" + id: str = Field(default_factory=lambda: str(ULID())) + name: str + email: str + major: Optional[str] = None + year: int = 1 # 1-4 for undergraduate, 5+ for graduate + completed_courses: List[str] = Field(default_factory=list) # Course codes + current_courses: List[str] = Field(default_factory=list) # Course codes + interests: List[str] = Field(default_factory=list) + preferred_format: Optional[CourseFormat] = None + preferred_difficulty: Optional[DifficultyLevel] = None + max_credits_per_semester: int = 15 + created_at: datetime = Field(default_factory=datetime.now) + updated_at: datetime = Field(default_factory=datetime.now) + + +class CourseRecommendation(BaseModel): + """Course recommendation with reasoning.""" + course: Course + relevance_score: float = Field(ge=0.0, le=1.0) + reasoning: str + prerequisites_met: bool + fits_schedule: bool = True + fits_preferences: bool = True + + +class AgentResponse(BaseModel): + """Structured response from the agent.""" + message: str + recommendations: List[CourseRecommendation] = Field(default_factory=list) + suggested_actions: List[str] = Field(default_factory=list) + metadata: Dict[str, Any] = Field(default_factory=dict) diff --git a/python-recipes/context-engineering/reference-agent/redis_context_course/optimization_helpers.py b/python-recipes/context-engineering/reference-agent/redis_context_course/optimization_helpers.py new file mode 100644 index 00000000..61121848 --- /dev/null +++ b/python-recipes/context-engineering/reference-agent/redis_context_course/optimization_helpers.py @@ -0,0 +1,388 @@ +""" +Optimization helpers for context engineering. + +This module contains helper functions and patterns demonstrated in Section 4 +of the Context Engineering course. These are production-ready patterns for: +- Context window management +- Retrieval strategies +- Tool optimization +- Data crafting for LLMs +""" + +import json +from typing import List, Dict, Any, Optional +import tiktoken +from langchain_openai import ChatOpenAI +from langchain_core.messages import SystemMessage, HumanMessage + + +# Token Counting (from Section 4, notebook 01_context_window_management.ipynb) +def count_tokens(text: str, model: str = "gpt-4o") -> int: + """ + Count tokens in text for a specific model. + + Args: + text: Text to count tokens for + model: Model name (default: gpt-4o) + + Returns: + Number of tokens + """ + try: + encoding = tiktoken.encoding_for_model(model) + except KeyError: + encoding = tiktoken.get_encoding("cl100k_base") + + return len(encoding.encode(text)) + + +def estimate_token_budget( + system_prompt: str, + working_memory_messages: int, + long_term_memories: int, + retrieved_context_items: int, + avg_message_tokens: int = 50, + avg_memory_tokens: int = 100, + avg_context_tokens: int = 200, + response_tokens: int = 2000 +) -> Dict[str, int]: + """ + Estimate token budget for a conversation turn. + + Args: + system_prompt: System prompt text + working_memory_messages: Number of messages in working memory + long_term_memories: Number of long-term memories to include + retrieved_context_items: Number of retrieved context items + avg_message_tokens: Average tokens per message + avg_memory_tokens: Average tokens per memory + avg_context_tokens: Average tokens per context item + response_tokens: Tokens reserved for response + + Returns: + Dictionary with token breakdown + """ + system_tokens = count_tokens(system_prompt) + working_memory_tokens = working_memory_messages * avg_message_tokens + long_term_tokens = long_term_memories * avg_memory_tokens + context_tokens = retrieved_context_items * avg_context_tokens + + total_input = system_tokens + working_memory_tokens + long_term_tokens + context_tokens + total_with_response = total_input + response_tokens + + return { + "system_prompt": system_tokens, + "working_memory": working_memory_tokens, + "long_term_memory": long_term_tokens, + "retrieved_context": context_tokens, + "response_space": response_tokens, + "total_input": total_input, + "total_with_response": total_with_response, + "percentage_of_128k": (total_with_response / 128000) * 100 + } + + +# Retrieval Strategies (from Section 4, notebook 02_retrieval_strategies.ipynb) +async def hybrid_retrieval( + query: str, + summary_view: str, + search_function, + limit: int = 3 +) -> str: + """ + Hybrid retrieval: Combine pre-computed summary with targeted search. + + This is the recommended strategy for production systems. + + Args: + query: User's query + summary_view: Pre-computed summary/overview + search_function: Async function that searches for specific items + limit: Number of specific items to retrieve + + Returns: + Combined context string + """ + # Get specific relevant items + specific_items = await search_function(query, limit=limit) + + # Combine summary + specific items + context = f"""{summary_view} + +Relevant items for this query: +{specific_items} +""" + + return context + + +# Structured Views (from Section 4, notebook 05_crafting_data_for_llms.ipynb) +async def create_summary_view( + items: List[Any], + group_by_field: str, + llm: Optional[ChatOpenAI] = None, + max_items_per_group: int = 10 +) -> str: + """ + Create a structured summary view of items. + + This implements the "Retrieve โ†’ Summarize โ†’ Stitch โ†’ Save" pattern. + + Args: + items: List of items to summarize + group_by_field: Field to group items by + llm: LLM for generating summaries (optional) + max_items_per_group: Max items to include per group + + Returns: + Formatted summary view + """ + # Step 1: Group items + groups = {} + for item in items: + group_key = getattr(item, group_by_field, "Other") + if group_key not in groups: + groups[group_key] = [] + groups[group_key].append(item) + + # Step 2 & 3: Summarize and stitch + summary_parts = ["Summary View\n" + "=" * 50 + "\n"] + + for group_name, group_items in sorted(groups.items()): + summary_parts.append(f"\n{group_name} ({len(group_items)} items):") + + # Include first N items + for item in group_items[:max_items_per_group]: + # Customize this based on your item type + summary_parts.append(f"- {str(item)[:100]}...") + + if len(group_items) > max_items_per_group: + summary_parts.append(f" ... and {len(group_items) - max_items_per_group} more") + + return "\n".join(summary_parts) + + +async def create_user_profile_view( + user_data: Dict[str, Any], + memories: List[Any], + llm: ChatOpenAI +) -> str: + """ + Create a comprehensive user profile view. + + This combines structured data with LLM-summarized memories. + + Args: + user_data: Structured user data (dict) + memories: List of user memories + llm: LLM for summarizing memories + + Returns: + Formatted profile view + """ + # Structured sections (no LLM needed) + profile_parts = [ + f"User Profile: {user_data.get('user_id', 'Unknown')}", + "=" * 50, + "" + ] + + # Add structured data + if "academic_info" in user_data: + profile_parts.append("Academic Info:") + for key, value in user_data["academic_info"].items(): + profile_parts.append(f"- {key}: {value}") + profile_parts.append("") + + # Summarize memories with LLM + if memories: + memory_text = "\n".join([f"- {m.text}" for m in memories[:20]]) + + prompt = f"""Summarize these user memories into organized sections. +Be concise. Use bullet points. + +Memories: +{memory_text} + +Create sections for: +1. Preferences +2. Goals +3. Important Facts +""" + + messages = [ + SystemMessage(content="You are a helpful assistant that summarizes user information."), + HumanMessage(content=prompt) + ] + + response = llm.invoke(messages) + profile_parts.append(response.content) + + return "\n".join(profile_parts) + + +# Tool Optimization (from Section 4, notebook 04_tool_optimization.ipynb) +def filter_tools_by_intent( + query: str, + tool_groups: Dict[str, List], + default_group: str = "search" +) -> List: + """ + Filter tools based on query intent using keyword matching. + + For production, consider using LLM-based intent classification. + + Args: + query: User's query + tool_groups: Dictionary mapping intent to tool lists + default_group: Default group if no match + + Returns: + List of relevant tools + """ + query_lower = query.lower() + + # Define keyword patterns for each intent + intent_patterns = { + "search": ['search', 'find', 'show', 'what', 'which', 'tell me about', 'list'], + "memory": ['remember', 'recall', 'know about', 'preferences', 'store', 'save'], + "enrollment": ['enroll', 'register', 'drop', 'add', 'remove', 'conflict'], + "review": ['review', 'rating', 'feedback', 'opinion', 'rate'], + } + + # Check each intent + for intent, keywords in intent_patterns.items(): + if any(keyword in query_lower for keyword in keywords): + return tool_groups.get(intent, tool_groups.get(default_group, [])) + + # Default + return tool_groups.get(default_group, []) + + +async def classify_intent_with_llm( + query: str, + intents: List[str], + llm: ChatOpenAI +) -> str: + """ + Classify user intent using LLM. + + More accurate than keyword matching but requires an LLM call. + + Args: + query: User's query + intents: List of possible intents + llm: LLM for classification + + Returns: + Classified intent + """ + intent_list = "\n".join([f"- {intent}" for intent in intents]) + + prompt = f"""Classify the user's intent into one of these categories: +{intent_list} + +User query: "{query}" + +Respond with only the category name. +""" + + messages = [ + SystemMessage(content="You are a helpful assistant that classifies user intents."), + HumanMessage(content=prompt) + ] + + response = llm.invoke(messages) + intent = response.content.strip().lower() + + # Validate + if intent not in intents: + intent = intents[0] # Default to first intent + + return intent + + +# Grounding Helpers (from Section 4, notebook 03_grounding_with_memory.ipynb) +def extract_references(query: str) -> Dict[str, List[str]]: + """ + Extract references from a query that need grounding. + + This is a simple pattern matcher. For production, consider using NER. + + Args: + query: User's query + + Returns: + Dictionary of reference types and their values + """ + references = { + "pronouns": [], + "demonstratives": [], + "implicit": [] + } + + query_lower = query.lower() + + # Pronouns + pronouns = ['it', 'that', 'this', 'those', 'these', 'he', 'she', 'they', 'them'] + for pronoun in pronouns: + if f" {pronoun} " in f" {query_lower} ": + references["pronouns"].append(pronoun) + + # Demonstratives + if "the one" in query_lower or "the other" in query_lower: + references["demonstratives"].append("the one/other") + + # Implicit references (questions without explicit subject) + implicit_patterns = [ + "what are the prerequisites", + "when is it offered", + "how many credits", + "is it available" + ] + for pattern in implicit_patterns: + if pattern in query_lower: + references["implicit"].append(pattern) + + return references + + +# Utility Functions +def format_context_for_llm( + system_instructions: str, + summary_view: Optional[str] = None, + user_profile: Optional[str] = None, + retrieved_items: Optional[str] = None, + memories: Optional[str] = None +) -> str: + """ + Format various context sources into a single system prompt. + + This is the recommended way to combine different context sources. + + Args: + system_instructions: Base system instructions + summary_view: Pre-computed summary view + user_profile: User profile view + retrieved_items: Retrieved specific items + memories: Relevant memories + + Returns: + Formatted system prompt + """ + parts = [system_instructions] + + if summary_view: + parts.append(f"\n## Overview\n{summary_view}") + + if user_profile: + parts.append(f"\n## User Profile\n{user_profile}") + + if memories: + parts.append(f"\n## Relevant Memories\n{memories}") + + if retrieved_items: + parts.append(f"\n## Specific Information\n{retrieved_items}") + + return "\n".join(parts) + diff --git a/python-recipes/context-engineering/reference-agent/redis_context_course/redis_config.py b/python-recipes/context-engineering/reference-agent/redis_context_course/redis_config.py new file mode 100644 index 00000000..b3c49105 --- /dev/null +++ b/python-recipes/context-engineering/reference-agent/redis_context_course/redis_config.py @@ -0,0 +1,160 @@ +""" +Redis configuration and connection management for the Class Agent. + +This module handles all Redis connections, including vector storage +and checkpointing. +""" + +import os +from typing import Optional +import redis +from redisvl.index import SearchIndex +from redisvl.schema import IndexSchema +from langchain_openai import OpenAIEmbeddings +from langgraph.checkpoint.redis import RedisSaver + + +class RedisConfig: + """Redis configuration management.""" + + def __init__( + self, + redis_url: Optional[str] = None, + vector_index_name: str = "course_catalog", + checkpoint_namespace: str = "class_agent" + ): + self.redis_url = redis_url or os.getenv("REDIS_URL", "redis://localhost:6379") + self.vector_index_name = vector_index_name + self.checkpoint_namespace = checkpoint_namespace + + # Initialize connections + self._redis_client = None + self._vector_index = None + self._checkpointer = None + self._embeddings = None + + @property + def redis_client(self) -> redis.Redis: + """Get Redis client instance.""" + if self._redis_client is None: + self._redis_client = redis.from_url(self.redis_url, decode_responses=True) + return self._redis_client + + @property + def embeddings(self) -> OpenAIEmbeddings: + """Get OpenAI embeddings instance.""" + if self._embeddings is None: + self._embeddings = OpenAIEmbeddings(model="text-embedding-3-small") + return self._embeddings + + @property + def vector_index(self) -> SearchIndex: + """Get or create vector search index for courses.""" + if self._vector_index is None: + schema = IndexSchema.from_dict({ + "index": { + "name": self.vector_index_name, + "prefix": f"{self.vector_index_name}:", + "storage_type": "hash" + }, + "fields": [ + { + "name": "id", + "type": "tag" + }, + { + "name": "course_code", + "type": "tag" + }, + { + "name": "title", + "type": "text" + }, + { + "name": "description", + "type": "text" + }, + { + "name": "department", + "type": "tag" + }, + { + "name": "major", + "type": "tag" + }, + { + "name": "difficulty_level", + "type": "tag" + }, + { + "name": "format", + "type": "tag" + }, + { + "name": "semester", + "type": "tag" + }, + { + "name": "year", + "type": "numeric" + }, + { + "name": "credits", + "type": "numeric" + }, + { + "name": "tags", + "type": "tag" + }, + { + "name": "content_vector", + "type": "vector", + "attrs": { + "dims": 1536, + "distance_metric": "cosine", + "algorithm": "hnsw", + "datatype": "float32" + } + } + ] + }) + + # Initialize index with connection params (avoid deprecated .connect()) + self._vector_index = SearchIndex(schema, redis_url=self.redis_url) + + # Create index if it doesn't exist + try: + self._vector_index.create(overwrite=False) + except Exception: + # Index likely already exists + pass + + return self._vector_index + + @property + def checkpointer(self) -> RedisSaver: + """Get Redis checkpointer for LangGraph state management.""" + if self._checkpointer is None: + self._checkpointer = RedisSaver( + redis_client=self.redis_client + ) + self._checkpointer.setup() + return self._checkpointer + + def health_check(self) -> bool: + """Check if Redis connection is healthy.""" + try: + return self.redis_client.ping() + except Exception: + return False + + def cleanup(self): + """Clean up connections.""" + if self._redis_client: + self._redis_client.close() + if self._vector_index: + self._vector_index.disconnect() + + +# Global configuration instance +redis_config = RedisConfig() diff --git a/python-recipes/context-engineering/reference-agent/redis_context_course/scripts/__init__.py b/python-recipes/context-engineering/reference-agent/redis_context_course/scripts/__init__.py new file mode 100644 index 00000000..2f2a0b5c --- /dev/null +++ b/python-recipes/context-engineering/reference-agent/redis_context_course/scripts/__init__.py @@ -0,0 +1,12 @@ +""" +Scripts package for Redis Context Course. + +This package contains command-line scripts for data generation, +ingestion, and other utilities for the context engineering course. + +Available scripts: +- generate_courses: Generate sample course catalog data +- ingest_courses: Ingest course data into Redis +""" + +__all__ = ["generate_courses", "ingest_courses"] diff --git a/python-recipes/context-engineering/reference-agent/redis_context_course/scripts/generate_courses.py b/python-recipes/context-engineering/reference-agent/redis_context_course/scripts/generate_courses.py new file mode 100644 index 00000000..3c61a155 --- /dev/null +++ b/python-recipes/context-engineering/reference-agent/redis_context_course/scripts/generate_courses.py @@ -0,0 +1,427 @@ +#!/usr/bin/env python3 +""" +Course catalog generation script for the Redis University Class Agent. + +This script generates realistic course data including courses, majors, prerequisites, +and other academic metadata for demonstration and testing purposes. +""" + +import json +import random +import sys +import os +from datetime import time +from typing import List, Dict, Any +from faker import Faker +import click + +from redis_context_course.models import ( + Course, Major, Prerequisite, CourseSchedule, + DifficultyLevel, CourseFormat, Semester, DayOfWeek +) + +fake = Faker() + + +class CourseGenerator: + """Generates realistic course catalog data.""" + + def __init__(self): + self.majors_data = self._define_majors() + self.course_templates = self._define_course_templates() + self.generated_courses = [] + self.generated_majors = [] + + def _define_majors(self) -> Dict[str, Dict[str, Any]]: + """Define major programs with their characteristics.""" + return { + "Computer Science": { + "code": "CS", + "department": "Computer Science", + "description": "Study of computational systems, algorithms, and software design", + "required_credits": 120, + "career_paths": ["Software Engineer", "Data Scientist", "Systems Architect", "AI Researcher"] + }, + "Data Science": { + "code": "DS", + "department": "Data Science", + "description": "Interdisciplinary field using statistics, programming, and domain expertise", + "required_credits": 120, + "career_paths": ["Data Analyst", "Machine Learning Engineer", "Business Intelligence Analyst"] + }, + "Mathematics": { + "code": "MATH", + "department": "Mathematics", + "description": "Study of numbers, structures, patterns, and logical reasoning", + "required_credits": 120, + "career_paths": ["Mathematician", "Statistician", "Actuary", "Research Scientist"] + }, + "Business Administration": { + "code": "BUS", + "department": "Business", + "description": "Management, finance, marketing, and organizational behavior", + "required_credits": 120, + "career_paths": ["Business Analyst", "Project Manager", "Consultant", "Entrepreneur"] + }, + "Psychology": { + "code": "PSY", + "department": "Psychology", + "description": "Scientific study of mind, behavior, and mental processes", + "required_credits": 120, + "career_paths": ["Clinical Psychologist", "Counselor", "Research Psychologist", "HR Specialist"] + } + } + + def _define_course_templates(self) -> Dict[str, List[Dict[str, Any]]]: + """Define course templates for each major.""" + return { + "Computer Science": [ + { + "title_template": "Introduction to Programming", + "description": "Fundamental programming concepts using Python. Variables, control structures, functions, and basic data structures.", + "difficulty": DifficultyLevel.BEGINNER, + "credits": 3, + "tags": ["programming", "python", "fundamentals"], + "learning_objectives": [ + "Write basic Python programs", + "Understand variables and data types", + "Use control structures effectively", + "Create and use functions" + ] + }, + { + "title_template": "Data Structures and Algorithms", + "description": "Study of fundamental data structures and algorithms. Arrays, linked lists, trees, graphs, sorting, and searching.", + "difficulty": DifficultyLevel.INTERMEDIATE, + "credits": 4, + "tags": ["algorithms", "data structures", "problem solving"], + "learning_objectives": [ + "Implement common data structures", + "Analyze algorithm complexity", + "Solve problems using appropriate data structures", + "Understand time and space complexity" + ] + }, + { + "title_template": "Database Systems", + "description": "Design and implementation of database systems. SQL, normalization, transactions, and database administration.", + "difficulty": DifficultyLevel.INTERMEDIATE, + "credits": 3, + "tags": ["databases", "sql", "data management"], + "learning_objectives": [ + "Design relational databases", + "Write complex SQL queries", + "Understand database normalization", + "Implement database transactions" + ] + }, + { + "title_template": "Machine Learning", + "description": "Introduction to machine learning algorithms and applications. Supervised and unsupervised learning, neural networks.", + "difficulty": DifficultyLevel.ADVANCED, + "credits": 4, + "tags": ["machine learning", "ai", "statistics"], + "learning_objectives": [ + "Understand ML algorithms", + "Implement classification and regression models", + "Evaluate model performance", + "Apply ML to real-world problems" + ] + }, + { + "title_template": "Web Development", + "description": "Full-stack web development using modern frameworks. HTML, CSS, JavaScript, React, and backend APIs.", + "difficulty": DifficultyLevel.INTERMEDIATE, + "credits": 3, + "tags": ["web development", "javascript", "react", "apis"], + "learning_objectives": [ + "Build responsive web interfaces", + "Develop REST APIs", + "Use modern JavaScript frameworks", + "Deploy web applications" + ] + } + ], + "Data Science": [ + { + "title_template": "Statistics for Data Science", + "description": "Statistical methods and probability theory for data analysis. Hypothesis testing, regression, and statistical inference.", + "difficulty": DifficultyLevel.INTERMEDIATE, + "credits": 4, + "tags": ["statistics", "probability", "data analysis"], + "learning_objectives": [ + "Apply statistical methods to data", + "Perform hypothesis testing", + "Understand probability distributions", + "Conduct statistical inference" + ] + }, + { + "title_template": "Data Visualization", + "description": "Creating effective visualizations for data communication. Tools include Python matplotlib, seaborn, and Tableau.", + "difficulty": DifficultyLevel.BEGINNER, + "credits": 3, + "tags": ["visualization", "python", "tableau", "communication"], + "learning_objectives": [ + "Create effective data visualizations", + "Choose appropriate chart types", + "Use visualization tools", + "Communicate insights through visuals" + ] + } + ], + "Mathematics": [ + { + "title_template": "Calculus I", + "description": "Differential calculus including limits, derivatives, and applications. Foundation for advanced mathematics.", + "difficulty": DifficultyLevel.INTERMEDIATE, + "credits": 4, + "tags": ["calculus", "derivatives", "limits"], + "learning_objectives": [ + "Understand limits and continuity", + "Calculate derivatives", + "Apply calculus to real problems", + "Understand fundamental theorem" + ] + }, + { + "title_template": "Linear Algebra", + "description": "Vector spaces, matrices, eigenvalues, and linear transformations. Essential for data science and engineering.", + "difficulty": DifficultyLevel.INTERMEDIATE, + "credits": 3, + "tags": ["linear algebra", "matrices", "vectors"], + "learning_objectives": [ + "Perform matrix operations", + "Understand vector spaces", + "Calculate eigenvalues and eigenvectors", + "Apply linear algebra to problems" + ] + } + ], + "Business Administration": [ + { + "title_template": "Principles of Management", + "description": "Fundamental management concepts including planning, organizing, leading, and controlling organizational resources.", + "difficulty": DifficultyLevel.BEGINNER, + "credits": 3, + "tags": ["management", "leadership", "organization"], + "learning_objectives": [ + "Understand management principles", + "Apply leadership concepts", + "Organize teams effectively", + "Control organizational resources" + ] + }, + { + "title_template": "Marketing Strategy", + "description": "Strategic marketing planning, market analysis, consumer behavior, and digital marketing techniques.", + "difficulty": DifficultyLevel.INTERMEDIATE, + "credits": 3, + "tags": ["marketing", "strategy", "consumer behavior"], + "learning_objectives": [ + "Develop marketing strategies", + "Analyze market opportunities", + "Understand consumer behavior", + "Implement digital marketing" + ] + } + ], + "Psychology": [ + { + "title_template": "Introduction to Psychology", + "description": "Overview of psychological principles, research methods, and major areas of study in psychology.", + "difficulty": DifficultyLevel.BEGINNER, + "credits": 3, + "tags": ["psychology", "research methods", "behavior"], + "learning_objectives": [ + "Understand psychological principles", + "Learn research methods", + "Explore areas of psychology", + "Apply psychological concepts" + ] + }, + { + "title_template": "Cognitive Psychology", + "description": "Study of mental processes including perception, memory, thinking, and problem-solving.", + "difficulty": DifficultyLevel.INTERMEDIATE, + "credits": 3, + "tags": ["cognitive psychology", "memory", "perception"], + "learning_objectives": [ + "Understand cognitive processes", + "Study memory systems", + "Analyze problem-solving", + "Explore perception mechanisms" + ] + } + ] + } + + def generate_majors(self) -> List[Major]: + """Generate major objects.""" + majors = [] + for name, data in self.majors_data.items(): + major = Major( + name=name, + code=data["code"], + department=data["department"], + description=data["description"], + required_credits=data["required_credits"], + career_paths=data["career_paths"] + ) + majors.append(major) + + self.generated_majors = majors + return majors + + def generate_courses(self, courses_per_major: int = 10) -> List[Course]: + """Generate course objects for all majors.""" + courses = [] + course_counter = 1 + + for major_name, major_data in self.majors_data.items(): + templates = self.course_templates.get(major_name, []) + + # Generate courses based on templates and variations + for i in range(courses_per_major): + if templates: + template = random.choice(templates) + else: + # Fallback template for majors without specific templates + template = { + "title_template": f"{major_name} Course {i+1}", + "description": f"Advanced topics in {major_name.lower()}", + "difficulty": random.choice(list(DifficultyLevel)), + "credits": random.choice([3, 4]), + "tags": [major_name.lower().replace(" ", "_")], + "learning_objectives": [f"Understand {major_name} concepts"] + } + + # Create course code + course_code = f"{major_data['code']}{course_counter:03d}" + course_counter += 1 + + # Generate schedule + schedule = self._generate_schedule() + + # Generate prerequisites (some courses have them) + prerequisites = [] + if i > 2 and random.random() < 0.3: # 30% chance for advanced courses + # Add 1-2 prerequisites from earlier courses + prereq_count = random.randint(1, 2) + for _ in range(prereq_count): + prereq_num = random.randint(1, max(1, course_counter - 10)) + prereq_code = f"{major_data['code']}{prereq_num:03d}" + prereq = Prerequisite( + course_code=prereq_code, + course_title=f"Prerequisite Course {prereq_num}", + minimum_grade=random.choice(["C", "C+", "B-"]), + can_be_concurrent=random.random() < 0.2 + ) + prerequisites.append(prereq) + + course = Course( + course_code=course_code, + title=template["title_template"], + description=template["description"], + credits=template["credits"], + difficulty_level=template["difficulty"], + format=random.choice(list(CourseFormat)), + department=major_data["department"], + major=major_name, + prerequisites=prerequisites, + schedule=schedule, + semester=random.choice(list(Semester)), + year=2024, + instructor=fake.name(), + max_enrollment=random.randint(20, 100), + current_enrollment=random.randint(0, 80), + tags=template["tags"], + learning_objectives=template["learning_objectives"] + ) + + courses.append(course) + + self.generated_courses = courses + return courses + + def _generate_schedule(self) -> CourseSchedule: + """Generate a random course schedule.""" + # Common schedule patterns + patterns = [ + ([DayOfWeek.MONDAY, DayOfWeek.WEDNESDAY, DayOfWeek.FRIDAY], 50), # MWF + ([DayOfWeek.TUESDAY, DayOfWeek.THURSDAY], 75), # TR + ([DayOfWeek.MONDAY, DayOfWeek.WEDNESDAY], 75), # MW + ([DayOfWeek.TUESDAY], 150), # T (long class) + ([DayOfWeek.THURSDAY], 150), # R (long class) + ] + + days, duration = random.choice(patterns) + + # Generate start time (8 AM to 6 PM) + start_hour = random.randint(8, 18) + start_time = time(start_hour, random.choice([0, 30])) + + # Calculate end time + end_hour = start_hour + (duration // 60) + end_minute = start_time.minute + (duration % 60) + if end_minute >= 60: + end_hour += 1 + end_minute -= 60 + + end_time = time(end_hour, end_minute) + + # Generate location + buildings = ["Science Hall", "Engineering Building", "Liberal Arts Center", "Business Complex", "Technology Center"] + room_number = random.randint(100, 999) + location = f"{random.choice(buildings)} {room_number}" + + return CourseSchedule( + days=days, + start_time=start_time, + end_time=end_time, + location=location + ) + + def save_to_json(self, filename: str): + """Save generated data to JSON file.""" + data = { + "majors": [major.dict() for major in self.generated_majors], + "courses": [course.dict() for course in self.generated_courses] + } + + with open(filename, 'w') as f: + json.dump(data, f, indent=2, default=str) + + print(f"Generated {len(self.generated_majors)} majors and {len(self.generated_courses)} courses") + print(f"Data saved to {filename}") + + +@click.command() +@click.option('--output', '-o', default='course_catalog.json', help='Output JSON file') +@click.option('--courses-per-major', '-c', default=10, help='Number of courses per major') +@click.option('--seed', '-s', type=int, help='Random seed for reproducible generation') +def main(output: str, courses_per_major: int, seed: int): + """Generate course catalog data for the Redis University Class Agent.""" + + if seed: + random.seed(seed) + fake.seed_instance(seed) + + generator = CourseGenerator() + + print("Generating majors...") + majors = generator.generate_majors() + + print(f"Generating {courses_per_major} courses per major...") + courses = generator.generate_courses(courses_per_major) + + print(f"Saving to {output}...") + generator.save_to_json(output) + + print("\nGeneration complete!") + print(f"Total majors: {len(majors)}") + print(f"Total courses: {len(courses)}") + + +if __name__ == "__main__": + main() diff --git a/python-recipes/context-engineering/reference-agent/redis_context_course/scripts/ingest_courses.py b/python-recipes/context-engineering/reference-agent/redis_context_course/scripts/ingest_courses.py new file mode 100644 index 00000000..14224e41 --- /dev/null +++ b/python-recipes/context-engineering/reference-agent/redis_context_course/scripts/ingest_courses.py @@ -0,0 +1,259 @@ +#!/usr/bin/env python3 +""" +Course catalog ingestion script for the Redis University Class Agent. + +This script loads course catalog data from JSON files and ingests it into Redis +with proper vector indexing for semantic search capabilities. +""" + +import json +import asyncio +import sys +import os +from datetime import datetime +from typing import List, Dict, Any +import click +from rich.console import Console +from rich.progress import Progress, TaskID +from dotenv import load_dotenv + +from redis_context_course.models import Course, Major, DifficultyLevel, CourseFormat, Semester, DayOfWeek, Prerequisite, CourseSchedule +from redis_context_course.course_manager import CourseManager +from redis_context_course.redis_config import redis_config + +# Load environment variables +load_dotenv() + +console = Console() + + +class CourseIngestionPipeline: + """Pipeline for ingesting course catalog data into Redis.""" + + def __init__(self): + self.course_manager = CourseManager() + self.redis_client = redis_config.redis_client + + def load_catalog_from_json(self, filename: str) -> Dict[str, List[Dict[str, Any]]]: + """Load course catalog data from JSON file.""" + try: + with open(filename, 'r') as f: + data = json.load(f) + + console.print(f"[green]โœ… Loaded catalog from {filename}[/green]") + console.print(f" Majors: {len(data.get('majors', []))}") + console.print(f" Courses: {len(data.get('courses', []))}") + + return data + except FileNotFoundError: + console.print(f"[red]โŒ File not found: {filename}[/red]") + raise + except json.JSONDecodeError as e: + console.print(f"[red]โŒ Invalid JSON in {filename}: {e}[/red]") + raise + + def _dict_to_course(self, course_data: Dict[str, Any]) -> Course: + """Convert dictionary data to Course object.""" + # Parse prerequisites + prerequisites = [] + for prereq_data in course_data.get('prerequisites', []): + prereq = Prerequisite(**prereq_data) + prerequisites.append(prereq) + + # Parse schedule + schedule = None + if course_data.get('schedule'): + schedule_data = course_data['schedule'] + # Convert day strings to DayOfWeek enums + days = [DayOfWeek(day) for day in schedule_data['days']] + schedule_data['days'] = days + schedule = CourseSchedule(**schedule_data) + + # Create course object + course = Course( + id=course_data.get('id'), + course_code=course_data['course_code'], + title=course_data['title'], + description=course_data['description'], + credits=course_data['credits'], + difficulty_level=DifficultyLevel(course_data['difficulty_level']), + format=CourseFormat(course_data['format']), + department=course_data['department'], + major=course_data['major'], + prerequisites=prerequisites, + schedule=schedule, + semester=Semester(course_data['semester']), + year=course_data['year'], + instructor=course_data['instructor'], + max_enrollment=course_data['max_enrollment'], + current_enrollment=course_data['current_enrollment'], + tags=course_data.get('tags', []), + learning_objectives=course_data.get('learning_objectives', []) + ) + + return course + + def _dict_to_major(self, major_data: Dict[str, Any]) -> Major: + """Convert dictionary data to Major object.""" + return Major( + id=major_data.get('id'), + name=major_data['name'], + code=major_data['code'], + department=major_data['department'], + description=major_data['description'], + required_credits=major_data['required_credits'], + core_courses=major_data.get('core_courses', []), + elective_courses=major_data.get('elective_courses', []), + career_paths=major_data.get('career_paths', []) + ) + + async def ingest_courses(self, courses_data: List[Dict[str, Any]]) -> int: + """Ingest courses into Redis with progress tracking.""" + ingested_count = 0 + + with Progress() as progress: + task = progress.add_task("[green]Ingesting courses...", total=len(courses_data)) + + for course_data in courses_data: + try: + course = self._dict_to_course(course_data) + await self.course_manager.store_course(course) + ingested_count += 1 + progress.update(task, advance=1) + except Exception as e: + console.print(f"[red]โŒ Failed to ingest course {course_data.get('course_code', 'unknown')}: {e}[/red]") + + return ingested_count + + def ingest_majors(self, majors_data: List[Dict[str, Any]]) -> int: + """Ingest majors into Redis.""" + ingested_count = 0 + + with Progress() as progress: + task = progress.add_task("[blue]Ingesting majors...", total=len(majors_data)) + + for major_data in majors_data: + try: + major = self._dict_to_major(major_data) + # Store major data in Redis (simple hash storage) + key = f"major:{major.id}" + # Convert any non-scalar fields to JSON strings for Redis hash storage + major_map = {} + for k, v in major.dict().items(): + if isinstance(v, (list, dict)): + major_map[k] = json.dumps(v) + elif isinstance(v, datetime): + major_map[k] = v.isoformat() + else: + major_map[k] = v + self.redis_client.hset(key, mapping=major_map) + ingested_count += 1 + progress.update(task, advance=1) + except Exception as e: + console.print(f"[red]โŒ Failed to ingest major {major_data.get('name', 'unknown')}: {e}[/red]") + + return ingested_count + + def clear_existing_data(self): + """Clear existing course and major data from Redis.""" + console.print("[yellow]๐Ÿงน Clearing existing data...[/yellow]") + + # Clear course data + course_keys = self.redis_client.keys(f"{redis_config.vector_index_name}:*") + if course_keys: + self.redis_client.delete(*course_keys) + console.print(f" Cleared {len(course_keys)} course records") + + # Clear major data + major_keys = self.redis_client.keys("major:*") + if major_keys: + self.redis_client.delete(*major_keys) + console.print(f" Cleared {len(major_keys)} major records") + + console.print("[green]โœ… Data cleared successfully[/green]") + + def verify_ingestion(self) -> Dict[str, int]: + """Verify the ingestion by counting stored records.""" + course_count = len(self.redis_client.keys(f"{redis_config.vector_index_name}:*")) + major_count = len(self.redis_client.keys("major:*")) + + return { + "courses": course_count, + "majors": major_count + } + + async def run_ingestion(self, catalog_file: str, clear_existing: bool = False): + """Run the complete ingestion pipeline.""" + console.print("[bold blue]๐Ÿš€ Starting Course Catalog Ingestion[/bold blue]") + + # Check Redis connection + if not redis_config.health_check(): + console.print("[red]โŒ Redis connection failed. Please check your Redis server.[/red]") + return False + + console.print("[green]โœ… Redis connection successful[/green]") + + # Clear existing data if requested + if clear_existing: + self.clear_existing_data() + + # Load catalog data + try: + catalog_data = self.load_catalog_from_json(catalog_file) + except Exception: + return False + + # Ingest majors + majors_data = catalog_data.get('majors', []) + if majors_data: + major_count = self.ingest_majors(majors_data) + console.print(f"[green]โœ… Ingested {major_count} majors[/green]") + + # Ingest courses + courses_data = catalog_data.get('courses', []) + if courses_data: + course_count = await self.ingest_courses(courses_data) + console.print(f"[green]โœ… Ingested {course_count} courses[/green]") + + # Verify ingestion + verification = self.verify_ingestion() + console.print(f"[blue]๐Ÿ“Š Verification - Courses: {verification['courses']}, Majors: {verification['majors']}[/blue]") + + console.print("[bold green]๐ŸŽ‰ Ingestion completed successfully![/bold green]") + return True + + +@click.command() +@click.option('--catalog', '-c', default='course_catalog.json', help='Course catalog JSON file') +@click.option('--clear', is_flag=True, help='Clear existing data before ingestion') +@click.option('--redis-url', help='Redis connection URL') +def main(catalog: str, clear: bool, redis_url: str): + """Ingest course catalog data into Redis for the Class Agent.""" + + # Set Redis URL if provided + if redis_url: + os.environ['REDIS_URL'] = redis_url + + # Check for required environment variables + if not os.getenv('OPENAI_API_KEY'): + console.print("[red]โŒ OPENAI_API_KEY environment variable is required[/red]") + console.print("[yellow]Please set your OpenAI API key for embedding generation[/yellow]") + sys.exit(1) + + # Run ingestion + pipeline = CourseIngestionPipeline() + + try: + success = asyncio.run(pipeline.run_ingestion(catalog, clear)) + if not success: + sys.exit(1) + except KeyboardInterrupt: + console.print("\n[yellow]Ingestion interrupted by user[/yellow]") + sys.exit(1) + except Exception as e: + console.print(f"[red]โŒ Ingestion failed: {e}[/red]") + sys.exit(1) + + +if __name__ == "__main__": + main() diff --git a/python-recipes/context-engineering/reference-agent/redis_context_course/semantic_tool_selector.py b/python-recipes/context-engineering/reference-agent/redis_context_course/semantic_tool_selector.py new file mode 100644 index 00000000..1e8950d2 --- /dev/null +++ b/python-recipes/context-engineering/reference-agent/redis_context_course/semantic_tool_selector.py @@ -0,0 +1,351 @@ +""" +Semantic Tool Selection for Context Engineering. + +This module implements advanced tool selection using embeddings and semantic similarity, +replacing simple keyword-based approaches with intelligent intent understanding. + +Key Features: +- Embedding-based tool matching +- Intent classification with confidence scoring +- Dynamic tool filtering based on context +- Fallback strategies for ambiguous queries +- Integration with existing tool system + +Usage: + from redis_context_course.semantic_tool_selector import SemanticToolSelector + + selector = SemanticToolSelector(available_tools) + selected_tools = await selector.select_tools(user_query, max_tools=3) +""" + +import asyncio +import numpy as np +from typing import List, Dict, Any, Optional, Tuple +from dataclasses import dataclass +from langchain_core.tools import BaseTool +from langchain_openai import OpenAIEmbeddings +from sklearn.metrics.pairwise import cosine_similarity +import logging + +logger = logging.getLogger(__name__) + + +@dataclass +class ToolIntent: + """Represents a tool's intended use with semantic information.""" + tool: BaseTool + description: str + examples: List[str] + keywords: List[str] + embedding: Optional[np.ndarray] = None + confidence_threshold: float = 0.6 + + +class SemanticToolSelector: + """ + Advanced tool selection using semantic similarity. + + This replaces keyword-based tool selection with embedding-based matching, + providing more accurate tool selection for complex queries. + """ + + def __init__(self, tools: List[BaseTool], embeddings_model: Optional[OpenAIEmbeddings] = None): + """ + Initialize semantic tool selector. + + Args: + tools: List of available tools + embeddings_model: OpenAI embeddings model (optional) + """ + self.embeddings_model = embeddings_model or OpenAIEmbeddings() + self.tool_intents: List[ToolIntent] = [] + self._initialize_tool_intents(tools) + + def _initialize_tool_intents(self, tools: List[BaseTool]): + """Initialize tool intents with semantic information.""" + + # Define semantic information for each tool + tool_semantics = { + "search_courses_tool": { + "description": "Find and discover courses based on topics, levels, or requirements", + "examples": [ + "I want to learn machine learning", + "Show me beginner programming courses", + "Find courses about data science", + "What Redis courses are available?", + "Search for advanced Python classes" + ], + "keywords": ["search", "find", "show", "discover", "browse", "list", "available"] + }, + "get_recommendations_tool": { + "description": "Get personalized course recommendations based on student profile and goals", + "examples": [ + "What courses should I take next?", + "Recommend courses for my career goals", + "What's the best learning path for me?", + "Suggest courses based on my background", + "Help me plan my education" + ], + "keywords": ["recommend", "suggest", "should", "best", "plan", "path", "next"] + }, + "store_preference_tool": { + "description": "Save student preferences for learning style, schedule, or course types", + "examples": [ + "I prefer online courses", + "Remember that I like hands-on learning", + "I want self-paced classes", + "Save my preference for evening courses", + "I prefer video-based content" + ], + "keywords": ["prefer", "like", "remember", "save", "store", "want", "style"] + }, + "store_goal_tool": { + "description": "Save student academic or career goals for personalized recommendations", + "examples": [ + "I want to become a data scientist", + "My goal is to learn machine learning", + "I'm working toward a Redis certification", + "I want to build AI applications", + "My career goal is software engineering" + ], + "keywords": ["goal", "want to become", "working toward", "aim", "target", "career"] + }, + "get_student_context_tool": { + "description": "Retrieve relevant student context including preferences, goals, and history", + "examples": [ + "What do you know about me?", + "Show my learning history", + "What are my preferences?", + "Display my profile", + "What goals have I set?" + ], + "keywords": ["know about me", "my", "profile", "history", "preferences", "goals"] + } + } + + # Create tool intents with embeddings + for tool in tools: + tool_name = tool.name + if tool_name in tool_semantics: + semantics = tool_semantics[tool_name] + + # Create semantic text for embedding + semantic_text = f"{semantics['description']}. Examples: {' '.join(semantics['examples'])}" + + # Generate embedding + try: + embedding = np.array(self.embeddings_model.embed_query(semantic_text)) + except Exception as e: + logger.warning(f"Failed to generate embedding for {tool_name}: {e}") + embedding = None + + tool_intent = ToolIntent( + tool=tool, + description=semantics["description"], + examples=semantics["examples"], + keywords=semantics["keywords"], + embedding=embedding + ) + + self.tool_intents.append(tool_intent) + else: + logger.warning(f"No semantic information defined for tool: {tool_name}") + + async def select_tools( + self, + query: str, + max_tools: int = 3, + min_confidence: float = 0.5 + ) -> List[BaseTool]: + """ + Select most relevant tools for a query using semantic similarity. + + Args: + query: User's query + max_tools: Maximum number of tools to return + min_confidence: Minimum confidence threshold + + Returns: + List of selected tools ordered by relevance + """ + if not query.strip(): + return [] + + try: + # Get query embedding + query_embedding = np.array(self.embeddings_model.embed_query(query)) + + # Calculate similarities + tool_scores = [] + for tool_intent in self.tool_intents: + if tool_intent.embedding is not None: + similarity = cosine_similarity( + query_embedding.reshape(1, -1), + tool_intent.embedding.reshape(1, -1) + )[0][0] + + # Boost score if keywords match + keyword_boost = self._calculate_keyword_boost(query, tool_intent.keywords) + final_score = similarity + keyword_boost + + tool_scores.append((tool_intent.tool, final_score, similarity)) + + # Sort by score and filter by confidence + tool_scores.sort(key=lambda x: x[1], reverse=True) + selected_tools = [ + tool for tool, score, similarity in tool_scores + if similarity >= min_confidence + ][:max_tools] + + # Log selection for debugging + logger.info(f"Selected {len(selected_tools)} tools for query: '{query[:50]}...'") + for tool, score, similarity in tool_scores[:max_tools]: + logger.debug(f" {tool.name}: similarity={similarity:.3f}, final_score={score:.3f}") + + return selected_tools + + except Exception as e: + logger.error(f"Error in semantic tool selection: {e}") + # Fallback to keyword-based selection + return self._fallback_keyword_selection(query, max_tools) + + def _calculate_keyword_boost(self, query: str, keywords: List[str]) -> float: + """Calculate boost score based on keyword matches.""" + query_lower = query.lower() + matches = sum(1 for keyword in keywords if keyword in query_lower) + return min(matches * 0.1, 0.3) # Max boost of 0.3 + + def _fallback_keyword_selection(self, query: str, max_tools: int) -> List[BaseTool]: + """Fallback to simple keyword-based selection.""" + query_lower = query.lower() + scored_tools = [] + + for tool_intent in self.tool_intents: + score = sum(1 for keyword in tool_intent.keywords if keyword in query_lower) + if score > 0: + scored_tools.append((tool_intent.tool, score)) + + scored_tools.sort(key=lambda x: x[1], reverse=True) + return [tool for tool, _ in scored_tools[:max_tools]] + + async def explain_selection(self, query: str, max_tools: int = 3) -> Dict[str, Any]: + """ + Explain why specific tools were selected for debugging and transparency. + + Args: + query: User's query + max_tools: Maximum number of tools to analyze + + Returns: + Dictionary with selection explanation + """ + try: + query_embedding = np.array(self.embeddings_model.embed_query(query)) + + explanations = [] + for tool_intent in self.tool_intents: + if tool_intent.embedding is not None: + similarity = cosine_similarity( + query_embedding.reshape(1, -1), + tool_intent.embedding.reshape(1, -1) + )[0][0] + + keyword_matches = [ + kw for kw in tool_intent.keywords + if kw in query.lower() + ] + + explanations.append({ + "tool_name": tool_intent.tool.name, + "similarity_score": float(similarity), + "keyword_matches": keyword_matches, + "description": tool_intent.description, + "selected": similarity >= 0.5 + }) + + explanations.sort(key=lambda x: x["similarity_score"], reverse=True) + + return { + "query": query, + "explanations": explanations[:max_tools], + "selection_method": "semantic_similarity" + } + + except Exception as e: + logger.error(f"Error explaining selection: {e}") + return { + "query": query, + "error": str(e), + "selection_method": "fallback" + } + + def get_tool_coverage(self) -> Dict[str, Any]: + """Get information about tool coverage and semantic setup.""" + return { + "total_tools": len(self.tool_intents), + "tools_with_embeddings": sum(1 for ti in self.tool_intents if ti.embedding is not None), + "tools": [ + { + "name": ti.tool.name, + "has_embedding": ti.embedding is not None, + "example_count": len(ti.examples), + "keyword_count": len(ti.keywords) + } + for ti in self.tool_intents + ] + } + + +# Utility function for easy integration +async def create_semantic_selector(tools: List[BaseTool]) -> SemanticToolSelector: + """ + Create and initialize a semantic tool selector. + + Args: + tools: List of available tools + + Returns: + Initialized SemanticToolSelector + """ + return SemanticToolSelector(tools) + + +# Example usage and testing +async def test_semantic_selection(): + """Test function to demonstrate semantic tool selection.""" + from langchain_core.tools import tool + + @tool + def search_courses_tool(query: str) -> str: + """Search for courses based on query.""" + return f"Searching for courses: {query}" + + @tool + def get_recommendations_tool() -> str: + """Get personalized course recommendations.""" + return "Getting recommendations..." + + @tool + def store_preference_tool(preference: str) -> str: + """Store a student preference.""" + return f"Stored preference: {preference}" + + tools = [search_courses_tool, get_recommendations_tool, store_preference_tool] + selector = SemanticToolSelector(tools) + + test_queries = [ + "I want to learn machine learning", + "What courses should I take next?", + "I prefer online classes", + "Show me Redis courses" + ] + + for query in test_queries: + selected = await selector.select_tools(query, max_tools=2) + print(f"Query: '{query}'") + print(f"Selected: {[t.name for t in selected]}") + print() + + +if __name__ == "__main__": + asyncio.run(test_semantic_selection()) diff --git a/python-recipes/context-engineering/reference-agent/redis_context_course/tools.py b/python-recipes/context-engineering/reference-agent/redis_context_course/tools.py new file mode 100644 index 00000000..ac8ac948 --- /dev/null +++ b/python-recipes/context-engineering/reference-agent/redis_context_course/tools.py @@ -0,0 +1,220 @@ +""" +Tools for the Redis University Class Agent. + +This module defines the tools that the agent can use to interact with +the course catalog and student data. These tools are used in the notebooks +throughout the course. +""" + +from typing import List, Optional +from langchain_core.tools import tool +from pydantic import BaseModel, Field + +from .course_manager import CourseManager +from agent_memory_client import MemoryAPIClient + + +# Tool Input Schemas +class SearchCoursesInput(BaseModel): + """Input schema for searching courses.""" + query: str = Field( + description="Natural language search query. Can be topics (e.g., 'machine learning'), " + "characteristics (e.g., 'online courses'), or general questions " + "(e.g., 'beginner programming courses')" + ) + limit: int = Field( + default=5, + description="Maximum number of results to return. Default is 5. " + "Use 3 for quick answers, 10 for comprehensive results." + ) + + +class GetCourseDetailsInput(BaseModel): + """Input schema for getting course details.""" + course_code: str = Field( + description="Specific course code like 'CS101' or 'MATH201'" + ) + + +class CheckPrerequisitesInput(BaseModel): + """Input schema for checking prerequisites.""" + course_code: str = Field( + description="Course code to check prerequisites for" + ) + completed_courses: List[str] = Field( + description="List of course codes the student has completed" + ) + + +# Course Tools +def create_course_tools(course_manager: CourseManager): + """ + Create course-related tools. + + These tools are demonstrated in Section 2 notebooks. + """ + + @tool(args_schema=SearchCoursesInput) + async def search_courses(query: str, limit: int = 5) -> str: + """ + Search for courses using semantic search based on topics, descriptions, or characteristics. + + Use this tool when students ask about: + - Topics or subjects: "machine learning courses", "database courses" + - Course characteristics: "online courses", "beginner courses", "3-credit courses" + - General exploration: "what courses are available in AI?" + + Do NOT use this tool when: + - Student asks about a specific course code (use get_course_details instead) + - Student wants all courses in a department (use a filter instead) + + The search uses semantic matching, so natural language queries work well. + + Examples: + - "machine learning courses" โ†’ finds CS401, CS402, etc. + - "beginner programming" โ†’ finds CS101, CS102, etc. + - "online data science courses" โ†’ finds online courses about data science + """ + results = await course_manager.search_courses(query, limit=limit) + + if not results: + return "No courses found matching your query." + + output = [] + for course in results: + output.append( + f"{course.course_code}: {course.title}\n" + f" Credits: {course.credits} | {course.format.value} | {course.difficulty_level.value}\n" + f" {course.description[:150]}..." + ) + + return "\n\n".join(output) + + @tool(args_schema=GetCourseDetailsInput) + async def get_course_details(course_code: str) -> str: + """ + Get detailed information about a specific course by its course code. + + Use this tool when: + - Student asks about a specific course (e.g., "Tell me about CS101") + - You need prerequisites for a course + - You need full course details (schedule, instructor, etc.) + + Returns complete course information including description, prerequisites, + schedule, credits, and learning objectives. + """ + course = await course_manager.get_course(course_code) + + if not course: + return f"Course {course_code} not found." + + prereqs = "None" if not course.prerequisites else ", ".join( + [f"{p.course_code} (min grade: {p.min_grade})" for p in course.prerequisites] + ) + + return f""" +{course.course_code}: {course.title} + +Description: {course.description} + +Details: +- Credits: {course.credits} +- Department: {course.department} +- Major: {course.major} +- Difficulty: {course.difficulty_level.value} +- Format: {course.format.value} +- Prerequisites: {prereqs} + +Learning Objectives: +""" + "\n".join([f"- {obj}" for obj in course.learning_objectives]) + + @tool(args_schema=CheckPrerequisitesInput) + async def check_prerequisites(course_code: str, completed_courses: List[str]) -> str: + """ + Check if a student meets the prerequisites for a specific course. + + Use this tool when: + - Student asks "Can I take [course]?" + - Student asks about prerequisites + - You need to verify eligibility before recommending a course + + Returns whether the student is eligible and which prerequisites are missing (if any). + """ + course = await course_manager.get_course(course_code) + + if not course: + return f"Course {course_code} not found." + + if not course.prerequisites: + return f"โœ… {course_code} has no prerequisites. You can take this course!" + + missing = [] + for prereq in course.prerequisites: + if prereq.course_code not in completed_courses: + missing.append(f"{prereq.course_code} (min grade: {prereq.min_grade})") + + if not missing: + return f"โœ… You meet all prerequisites for {course_code}!" + + return f"""โŒ You're missing prerequisites for {course_code}: + +Missing: +""" + "\n".join([f"- {p}" for p in missing]) + + return [search_courses, get_course_details, check_prerequisites] + + +# Memory Tools +def create_memory_tools(memory_client: MemoryAPIClient, session_id: str, user_id: str): + """ + Create memory-related tools using the memory client's built-in LangChain integration. + + These tools are demonstrated in Section 3, notebook 04_memory_tools.ipynb. + They give the LLM explicit control over memory operations. + + Args: + memory_client: The memory client instance + session_id: Session ID for the conversation + user_id: User ID for the student + + Returns: + List of LangChain StructuredTool objects for memory operations + """ + from agent_memory_client.integrations.langchain import get_memory_tools + + return get_memory_tools( + memory_client=memory_client, + session_id=session_id, + user_id=user_id + ) + + +# Tool Selection Helpers (from Section 4, notebook 04_tool_optimization.ipynb) +def select_tools_by_keywords(query: str, all_tools: dict) -> List: + """ + Select relevant tools based on query keywords. + + This is a simple tool filtering strategy demonstrated in Section 4. + For production, consider using intent classification or hierarchical tools. + + Args: + query: User's query + all_tools: Dictionary mapping categories to tool lists + + Returns: + List of relevant tools + """ + query_lower = query.lower() + + # Search-related keywords + if any(word in query_lower for word in ['search', 'find', 'show', 'what', 'which', 'tell me about']): + return all_tools.get("search", []) + + # Memory-related keywords + elif any(word in query_lower for word in ['remember', 'recall', 'know about me', 'preferences']): + return all_tools.get("memory", []) + + # Default: return search tools + else: + return all_tools.get("search", []) + diff --git a/python-recipes/context-engineering/reference-agent/requirements.txt b/python-recipes/context-engineering/reference-agent/requirements.txt new file mode 100644 index 00000000..faaf8e68 --- /dev/null +++ b/python-recipes/context-engineering/reference-agent/requirements.txt @@ -0,0 +1,38 @@ +# Core LangGraph and Redis dependencies +langgraph>=0.2.0,<0.3.0 +langgraph-checkpoint>=1.0.0 +langgraph-checkpoint-redis>=0.1.0 + +# Redis Agent Memory Server +agent-memory-client>=0.12.6 + +# Redis and vector storage +redis>=6.0.0 +redisvl>=0.8.0 + +# OpenAI and language models +openai>=1.0.0 +langchain>=0.2.0 +langchain-openai>=0.1.0 +langchain-core>=0.2.0 +langchain-community>=0.2.0 + +# Data processing and utilities +pydantic>=1.8.0,<3.0.0 +python-dotenv>=1.0.0 +click>=8.0.0 +rich>=13.0.0 +faker>=20.0.0 +pandas>=2.0.0 +numpy>=1.24.0 + +# Testing and development +pytest>=7.0.0 +pytest-asyncio>=0.21.0 +black>=23.0.0 +isort>=5.12.0 +mypy>=1.5.0 + +# Optional: For enhanced functionality +tiktoken>=0.5.0 +python-ulid>=3.0.0 diff --git a/python-recipes/context-engineering/reference-agent/setup.py b/python-recipes/context-engineering/reference-agent/setup.py new file mode 100644 index 00000000..dc75259f --- /dev/null +++ b/python-recipes/context-engineering/reference-agent/setup.py @@ -0,0 +1,96 @@ +#!/usr/bin/env python3 +""" +Setup script for the Redis Context Course package. + +This package provides a complete reference implementation of a context-aware +AI agent for university course recommendations, demonstrating context engineering +principles using Redis, LangGraph, and OpenAI. +""" + +from setuptools import setup, find_packages +from pathlib import Path + +# Read the README file +this_directory = Path(__file__).parent +long_description = (this_directory / "README.md").read_text() + +# Read requirements +requirements = [] +with open("requirements.txt", "r") as f: + requirements = [line.strip() for line in f if line.strip() and not line.startswith("#")] + +setup( + name="redis-context-course", + version="1.0.0", + author="Redis AI Resources Team", + author_email="redis-ai@redis.com", + description="Context Engineering with Redis - University Class Agent Reference Implementation", + long_description=long_description, + long_description_content_type="text/markdown", + url="https://github.com/redis-developer/redis-ai-resources", + project_urls={ + "Bug Reports": "https://github.com/redis-developer/redis-ai-resources/issues", + "Source": "https://github.com/redis-developer/redis-ai-resources/tree/main/python-recipes/context-engineering", + "Documentation": "https://github.com/redis-developer/redis-ai-resources/blob/main/python-recipes/context-engineering/README.md", + }, + packages=find_packages(), + classifiers=[ + "Development Status :: 4 - Beta", + "Intended Audience :: Developers", + "Intended Audience :: Education", + "Topic :: Software Development :: Libraries :: Python Modules", + "Topic :: Scientific/Engineering :: Artificial Intelligence", + "Topic :: Database", + "License :: OSI Approved :: MIT License", + "Programming Language :: Python :: 3", + "Programming Language :: Python :: 3.8", + "Programming Language :: Python :: 3.9", + "Programming Language :: Python :: 3.10", + "Programming Language :: Python :: 3.11", + "Programming Language :: Python :: 3.12", + ], + python_requires=">=3.8", + install_requires=requirements, + extras_require={ + "dev": [ + "pytest>=7.0.0", + "pytest-asyncio>=0.21.0", + "black>=23.0.0", + "isort>=5.12.0", + "mypy>=1.5.0", + "flake8>=6.0.0", + ], + "docs": [ + "sphinx>=5.0.0", + "sphinx-rtd-theme>=1.0.0", + "myst-parser>=0.18.0", + ], + }, + entry_points={ + "console_scripts": [ + "redis-class-agent=redis_context_course.cli:main", + "generate-courses=redis_context_course.scripts.generate_courses:main", + "ingest-courses=redis_context_course.scripts.ingest_courses:main", + ], + }, + include_package_data=True, + package_data={ + "redis_context_course": [ + "data/*.json", + "templates/*.txt", + ], + }, + keywords=[ + "redis", + "ai", + "context-engineering", + "langraph", + "openai", + "vector-database", + "semantic-search", + "memory-management", + "chatbot", + "recommendation-system", + ], + zip_safe=False, +) diff --git a/python-recipes/context-engineering/reference-agent/setup_agent_memory_server.py b/python-recipes/context-engineering/reference-agent/setup_agent_memory_server.py new file mode 100755 index 00000000..3d06500c --- /dev/null +++ b/python-recipes/context-engineering/reference-agent/setup_agent_memory_server.py @@ -0,0 +1,225 @@ +#!/usr/bin/env python3 +""" +Setup script for Agent Memory Server +This script ensures the Agent Memory Server is running with correct configuration +""" + +import os +import sys +import time +import subprocess +import requests +from pathlib import Path +from dotenv import load_dotenv + + +def print_header(text): + """Print a formatted header""" + print(f"\n{text}") + print("=" * len(text)) + + +def print_status(emoji, message): + """Print a status message""" + print(f"{emoji} {message}") + + +def check_docker(): + """Check if Docker is running""" + try: + subprocess.run( + ["docker", "info"], + stdout=subprocess.DEVNULL, + stderr=subprocess.DEVNULL, + check=True + ) + return True + except (subprocess.CalledProcessError, FileNotFoundError): + return False + + +def check_container_running(container_name): + """Check if a Docker container is running""" + try: + result = subprocess.run( + ["docker", "ps", "--filter", f"name={container_name}", "--format", "{{.Names}}"], + capture_output=True, + text=True, + check=True + ) + return container_name in result.stdout + except subprocess.CalledProcessError: + return False + + +def check_server_health(url, timeout=2): + """Check if a server is responding""" + try: + response = requests.get(url, timeout=timeout) + return response.status_code == 200 + except: + return False + + +def check_redis_connection_errors(container_name): + """Check Docker logs for Redis connection errors""" + try: + result = subprocess.run( + ["docker", "logs", container_name, "--tail", "50"], + capture_output=True, + text=True, + check=True + ) + return "ConnectionError" in result.stdout or "ConnectionError" in result.stderr + except subprocess.CalledProcessError: + return False + + +def stop_and_remove_container(container_name): + """Stop and remove a Docker container""" + try: + subprocess.run(["docker", "stop", container_name], + stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL) + subprocess.run(["docker", "rm", container_name], + stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL) + except: + pass + + +def start_redis(): + """Start Redis container if not running""" + if check_container_running("redis-stack-server"): + print_status("โœ…", "Redis is running") + return True + + print_status("โš ๏ธ ", "Redis not running. Starting Redis...") + try: + subprocess.run([ + "docker", "run", "-d", + "--name", "redis-stack-server", + "-p", "6379:6379", + "redis/redis-stack-server:latest" + ], check=True, stdout=subprocess.DEVNULL) + print_status("โœ…", "Redis started") + return True + except subprocess.CalledProcessError as e: + print_status("โŒ", f"Failed to start Redis: {e}") + return False + + +def start_agent_memory_server(openai_api_key): + """Start Agent Memory Server with correct configuration""" + print_status("๐Ÿš€", "Starting Agent Memory Server...") + + try: + subprocess.run([ + "docker", "run", "-d", + "--name", "agent-memory-server", + "-p", "8088:8000", + "-e", "REDIS_URL=redis://host.docker.internal:6379", + "-e", f"OPENAI_API_KEY={openai_api_key}", + "ghcr.io/redis/agent-memory-server:0.12.3" + ], check=True, stdout=subprocess.DEVNULL) + + # Wait for server to be ready + print_status("โณ", "Waiting for server to be ready...") + for i in range(30): + if check_server_health("http://localhost:8088/v1/health"): + print_status("โœ…", "Agent Memory Server is ready!") + return True + time.sleep(1) + + print_status("โŒ", "Timeout waiting for Agent Memory Server") + print(" Check logs with: docker logs agent-memory-server") + return False + + except subprocess.CalledProcessError as e: + print_status("โŒ", f"Failed to start Agent Memory Server: {e}") + return False + + +def verify_redis_connection(): + """Verify no Redis connection errors in logs""" + print_status("๐Ÿ”", "Verifying Redis connection...") + time.sleep(2) + + if check_redis_connection_errors("agent-memory-server"): + print_status("โŒ", "Redis connection error detected") + print(" Check logs with: docker logs agent-memory-server") + return False + + return True + + +def main(): + """Main setup function""" + print_header("๐Ÿ”ง Agent Memory Server Setup") + + # Load environment variables + env_file = Path(__file__).parent / ".env" + if env_file.exists(): + load_dotenv(env_file) + + # Check OPENAI_API_KEY + openai_api_key = os.getenv("OPENAI_API_KEY") + if not openai_api_key: + print_status("โŒ", "Error: OPENAI_API_KEY not set") + print(" Please set it in your .env file or environment") + return False + + # Check Docker + if not check_docker(): + print_status("โŒ", "Error: Docker is not running") + print(" Please start Docker Desktop and try again") + return False + + # Check Redis + print_status("๐Ÿ“Š", "Checking Redis...") + if not start_redis(): + return False + + # Check Agent Memory Server + print_status("๐Ÿ“Š", "Checking Agent Memory Server...") + if check_container_running("agent-memory-server"): + print_status("๐Ÿ”", "Agent Memory Server container exists. Checking health...") + + if check_server_health("http://localhost:8088/v1/health"): + print_status("โœ…", "Agent Memory Server is running and healthy") + + # Check for Redis connection errors + if check_redis_connection_errors("agent-memory-server"): + print_status("โš ๏ธ ", "Detected Redis connection issues. Restarting with correct configuration...") + stop_and_remove_container("agent-memory-server") + else: + print_status("โœ…", "No Redis connection issues detected") + print_header("โœ… Setup Complete!") + print("๐Ÿ“Š Services Status:") + print(" โ€ข Redis: Running on port 6379") + print(" โ€ข Agent Memory Server: Running on port 8088") + print("\n๐ŸŽฏ You can now run the notebooks!") + return True + else: + print_status("โš ๏ธ ", "Agent Memory Server not responding. Restarting...") + stop_and_remove_container("agent-memory-server") + + # Start Agent Memory Server + if not start_agent_memory_server(openai_api_key): + return False + + # Verify Redis connection + if not verify_redis_connection(): + return False + + # Success + print_header("โœ… Setup Complete!") + print("๐Ÿ“Š Services Status:") + print(" โ€ข Redis: Running on port 6379") + print(" โ€ข Agent Memory Server: Running on port 8088") + print("\n๐ŸŽฏ You can now run the notebooks!") + return True + + +if __name__ == "__main__": + success = main() + sys.exit(0 if success else 1) + diff --git a/python-recipes/context-engineering/reference-agent/setup_agent_memory_server.sh b/python-recipes/context-engineering/reference-agent/setup_agent_memory_server.sh new file mode 100755 index 00000000..3d5a4c0e --- /dev/null +++ b/python-recipes/context-engineering/reference-agent/setup_agent_memory_server.sh @@ -0,0 +1,105 @@ +#!/bin/bash +# Setup script for Agent Memory Server +# This script ensures the Agent Memory Server is running with correct configuration + +set -e # Exit on error + +echo "๐Ÿ”ง Agent Memory Server Setup" +echo "==============================" + +# Load environment variables +if [ -f .env ]; then + export $(cat .env | grep -v '^#' | xargs) +fi + +# Check if OPENAI_API_KEY is set +if [ -z "$OPENAI_API_KEY" ]; then + echo "โŒ Error: OPENAI_API_KEY not set" + echo " Please set it in your .env file or environment" + exit 1 +fi + +# Check if Docker is running +if ! docker info > /dev/null 2>&1; then + echo "โŒ Error: Docker is not running" + echo " Please start Docker Desktop and try again" + exit 1 +fi + +# Check if Redis is running +echo "๐Ÿ“Š Checking Redis..." +if ! docker ps --filter name=redis-stack-server --format '{{.Names}}' | grep -q redis-stack-server; then + echo "โš ๏ธ Redis not running. Starting Redis..." + docker run -d --name redis-stack-server -p 6379:6379 redis/redis-stack-server:latest + echo "โœ… Redis started" +else + echo "โœ… Redis is running" +fi + +# Check if Agent Memory Server is running +echo "๐Ÿ“Š Checking Agent Memory Server..." +if docker ps --filter name=agent-memory-server --format '{{.Names}}' | grep -q agent-memory-server; then + echo "๐Ÿ” Agent Memory Server container exists. Checking health..." + + # Check if it's healthy by testing the connection + if curl -s http://localhost:8088/v1/health > /dev/null 2>&1; then + echo "โœ… Agent Memory Server is running and healthy" + + # Check logs for Redis connection errors + if docker logs agent-memory-server --tail 50 2>&1 | grep -q "ConnectionError.*redis"; then + echo "โš ๏ธ Detected Redis connection issues. Restarting with correct configuration..." + docker stop agent-memory-server > /dev/null 2>&1 + docker rm agent-memory-server > /dev/null 2>&1 + else + echo "โœ… No Redis connection issues detected" + exit 0 + fi + else + echo "โš ๏ธ Agent Memory Server not responding. Restarting..." + docker stop agent-memory-server > /dev/null 2>&1 + docker rm agent-memory-server > /dev/null 2>&1 + fi +fi + +# Start Agent Memory Server with correct configuration +echo "๐Ÿš€ Starting Agent Memory Server..." +docker run -d --name agent-memory-server \ + -p 8088:8000 \ + -e REDIS_URL=redis://host.docker.internal:6379 \ + -e OPENAI_API_KEY="$OPENAI_API_KEY" \ + ghcr.io/redis/agent-memory-server:0.12.3 + +# Wait for server to be healthy +echo "โณ Waiting for server to be ready..." +for i in {1..30}; do + if curl -s http://localhost:8088/v1/health > /dev/null 2>&1; then + echo "โœ… Agent Memory Server is ready!" + break + fi + if [ $i -eq 30 ]; then + echo "โŒ Timeout waiting for Agent Memory Server" + echo " Check logs with: docker logs agent-memory-server" + exit 1 + fi + sleep 1 +done + +# Verify no Redis connection errors +echo "๐Ÿ” Verifying Redis connection..." +sleep 2 +if docker logs agent-memory-server --tail 20 2>&1 | grep -q "ConnectionError.*redis"; then + echo "โŒ Redis connection error detected" + echo " Logs:" + docker logs agent-memory-server --tail 20 + exit 1 +fi + +echo "" +echo "โœ… Setup Complete!" +echo "==============================" +echo "๐Ÿ“Š Services Status:" +echo " โ€ข Redis: Running on port 6379" +echo " โ€ข Agent Memory Server: Running on port 8088" +echo "" +echo "๐ŸŽฏ You can now run the notebooks!" + diff --git a/python-recipes/context-engineering/reference-agent/simple_health_check.py b/python-recipes/context-engineering/reference-agent/simple_health_check.py new file mode 100644 index 00000000..405425bd --- /dev/null +++ b/python-recipes/context-engineering/reference-agent/simple_health_check.py @@ -0,0 +1,158 @@ +#!/usr/bin/env python3 +""" +Simple Redis Context Course System Health Check + +Quick validation of core system functionality. +""" + +import asyncio +import os +import redis +from dotenv import load_dotenv + +# Load environment variables +load_dotenv() + + +def test_redis(): + """Test Redis connection.""" + try: + redis_url = os.getenv("REDIS_URL", "redis://localhost:6379") + r = redis.from_url(redis_url, decode_responses=True) + r.ping() + return True + except: + return False + + +def count_courses(): + """Count course records in Redis.""" + try: + redis_url = os.getenv("REDIS_URL", "redis://localhost:6379") + r = redis.from_url(redis_url, decode_responses=True) + course_keys = r.keys("course_catalog:*") + return len(course_keys) + except: + return 0 + + +def count_majors(): + """Count major records in Redis.""" + try: + redis_url = os.getenv("REDIS_URL", "redis://localhost:6379") + r = redis.from_url(redis_url, decode_responses=True) + major_keys = r.keys("major:*") + return len(major_keys) + except: + return 0 + + +async def test_course_search(): + """Test course search functionality.""" + try: + from redis_context_course.course_manager import CourseManager + course_manager = CourseManager() + courses = await course_manager.search_courses("programming", limit=1) + return len(courses) > 0 + except: + return False + + +async def test_agent(): + """Test basic agent functionality.""" + try: + from redis_context_course import ClassAgent + agent = ClassAgent("test_student") + response = await agent.chat("How many courses are available?") + return response and len(response) > 10 + except: + return False + + +def check_env_vars(): + """Check required environment variables.""" + required_vars = ['OPENAI_API_KEY', 'REDIS_URL', 'AGENT_MEMORY_URL'] + missing = [] + + for var in required_vars: + value = os.getenv(var) + if not value or value == 'your_openai_api_key_here': + missing.append(var) + + return missing + + +async def main(): + """Run all health checks.""" + print("""Redis Context Course - Health Check +=====================================""") + + # Environment check + missing_vars = check_env_vars() + if missing_vars: + print(f"โŒ Environment: Missing {', '.join(missing_vars)}") + print(" Fix: Update .env file with correct values") + return False + else: + print("โœ… Environment: All variables set") + + # Redis check + if test_redis(): + print("โœ… Redis: Connected") + else: + print("โŒ Redis: Connection failed") + print(" Fix: Start Redis with 'docker run -d -p 6379:6379 redis:8-alpine'") + return False + + # Data checks + course_count = count_courses() + major_count = count_majors() + + if course_count > 0: + print(f"โœ… Courses: {course_count} found") + else: + print("โŒ Courses: None found") + print(" Fix: Run 'ingest-courses --catalog course_catalog.json --clear'") + return False + + if major_count > 0: + print(f"โœ… Majors: {major_count} found") + else: + print("โŒ Majors: None found") + print(" Fix: Run 'ingest-courses --catalog course_catalog.json --clear'") + + # Functionality checks + if await test_course_search(): + print("โœ… Course Search: Working") + else: + print("โŒ Course Search: Failed") + print(" Fix: Check if courses have embeddings") + return False + + if await test_agent(): + print("โœ… Agent: Working") + else: + print("โŒ Agent: Failed") + print(" Fix: Check OpenAI API key and course data") + return False + + # Success + print(""" +๐ŸŽฏ Status: READY +๐Ÿ“Š All checks passed! + +๐Ÿš€ Try: redis-class-agent --student-id your_name""") + + return True + + +if __name__ == "__main__": + try: + success = asyncio.run(main()) + exit(0 if success else 1) + except KeyboardInterrupt: + print("\nHealth check interrupted") + exit(1) + except Exception as e: + print(f"โŒ Health check failed: {e}") + exit(1) diff --git a/python-recipes/context-engineering/reference-agent/system_health_check.py b/python-recipes/context-engineering/reference-agent/system_health_check.py new file mode 100644 index 00000000..d0f0ed3c --- /dev/null +++ b/python-recipes/context-engineering/reference-agent/system_health_check.py @@ -0,0 +1,451 @@ +#!/usr/bin/env python3 +""" +Comprehensive Redis Context Course System Health Check + +This script provides a thorough validation of the entire system, +focusing on functional testing rather than specific key patterns. +""" + +import asyncio +import os +import sys +import time +import argparse +from datetime import datetime +from typing import Dict, List, Tuple, Optional +from dataclasses import dataclass +from enum import Enum + +import redis +from dotenv import load_dotenv + +# Load environment variables +load_dotenv() + + +class CheckStatus(Enum): + """Status levels for checks.""" + PASS = "โœ…" + WARN = "โš ๏ธ" + FAIL = "โŒ" + INFO = "โ„น๏ธ" + + +@dataclass +class CheckResult: + """Result of a system check.""" + name: str + status: CheckStatus + message: str + details: Optional[str] = None + fix_command: Optional[str] = None + performance_ms: Optional[float] = None + + +class SystemHealthChecker: + """Comprehensive system health checker.""" + + def __init__(self, verbose: bool = False): + self.verbose = verbose + self.results: List[CheckResult] = [] + self.redis_client = None + + def add_result(self, result: CheckResult): + """Add a check result.""" + self.results.append(result) + + def print_result(self, result: CheckResult): + """Print a single result.""" + output = f"{result.status.value} {result.name}: {result.message}" + if self.verbose and result.details: + output += f"\n Details: {result.details}" + if result.fix_command: + output += f"\n Fix: {result.fix_command}" + if result.performance_ms is not None: + output += f"\n Performance: {result.performance_ms:.1f}ms" + print(output) + + async def check_infrastructure(self) -> List[CheckResult]: + """Check basic infrastructure components.""" + results = [] + + # Redis Connection + start_time = time.time() + try: + redis_url = os.getenv("REDIS_URL", "redis://localhost:6379") + self.redis_client = redis.from_url(redis_url, decode_responses=True) + self.redis_client.ping() + + # Get Redis info + info = self.redis_client.info() + redis_version = info.get('redis_version', 'unknown') + memory_used = info.get('used_memory_human', 'unknown') + + elapsed_ms = (time.time() - start_time) * 1000 + + results.append(CheckResult( + name="Redis Connection", + status=CheckStatus.PASS, + message=f"Connected to Redis {redis_version}", + details=f"Memory used: {memory_used}", + performance_ms=elapsed_ms + )) + + except Exception as e: + results.append(CheckResult( + name="Redis Connection", + status=CheckStatus.FAIL, + message=f"Failed to connect: {e}", + fix_command="docker run -d --name redis -p 6379:6379 redis:8-alpine" + )) + return results + + # Environment Variables + env_vars = { + 'OPENAI_API_KEY': 'OpenAI API access', + 'REDIS_URL': 'Redis connection', + 'AGENT_MEMORY_URL': 'Agent Memory Server' + } + + for var, description in env_vars.items(): + value = os.getenv(var) + if not value or value == 'your_openai_api_key_here': + results.append(CheckResult( + name=f"Environment: {var}", + status=CheckStatus.FAIL, + message=f"Not set or using placeholder", + fix_command=f"Set {var} in .env file" + )) + else: + # Mask sensitive values + display_value = value[:8] + '...' + value[-4:] if 'API_KEY' in var else value + results.append(CheckResult( + name=f"Environment: {var}", + status=CheckStatus.PASS, + message=f"Configured", + details=display_value + )) + + return results + + def detect_data_patterns(self) -> Dict[str, List[str]]: + """Auto-detect actual data patterns in Redis.""" + all_keys = self.redis_client.keys("*") + + patterns = { + 'majors': [k for k in all_keys if k.startswith('major:')], + 'courses': [k for k in all_keys if k.startswith('course_catalog:')], + 'memory': [k for k in all_keys if 'memory' in k.lower()], + 'working_memory': [k for k in all_keys if 'working_memory' in k], + 'other': [k for k in all_keys if not any(p in k.lower() for p in ['major', 'course', 'memory'])] + } + + return patterns + + def check_data_presence(self) -> List[CheckResult]: + """Check if required data is present.""" + results = [] + + patterns = self.detect_data_patterns() + + # Check majors + major_count = len(patterns['majors']) + if major_count > 0: + results.append(CheckResult( + name="Major Records", + status=CheckStatus.PASS, + message=f"Found {major_count} major records", + details=f"Pattern: major:{{id}}" + )) + else: + results.append(CheckResult( + name="Major Records", + status=CheckStatus.FAIL, + message="No major records found", + fix_command="ingest-courses --catalog course_catalog.json --clear" + )) + + # Check courses + course_count = len(patterns['courses']) + if course_count > 0: + results.append(CheckResult( + name="Course Records", + status=CheckStatus.PASS, + message=f"Found {course_count} course records", + details=f"Pattern: course_catalog:{{id}}" + )) + + # Sample a course to check data quality + if patterns['courses']: + sample_key = patterns['courses'][0] + try: + # Use Redis client without decode_responses for binary data + redis_url = os.getenv("REDIS_URL", "redis://localhost:6379") + binary_redis = redis.from_url(redis_url, decode_responses=False) + sample_data = binary_redis.hgetall(sample_key) + + # Convert keys to strings and check for required fields + field_names = [key.decode('utf-8') for key in sample_data.keys()] + required_fields = ['course_code', 'title', 'description', 'content_vector'] + missing_fields = [f for f in required_fields if f not in field_names] + + if not missing_fields: + # Get text fields safely + course_code = sample_data.get(b'course_code', b'N/A').decode('utf-8') + title = sample_data.get(b'title', b'N/A').decode('utf-8') + + results.append(CheckResult( + name="Course Data Quality", + status=CheckStatus.PASS, + message="All required fields present", + details=f"Sample: {course_code} - {title}" + )) + else: + results.append(CheckResult( + name="Course Data Quality", + status=CheckStatus.WARN, + message=f"Missing fields: {missing_fields}", + fix_command="Re-run ingestion with --clear flag" + )) + + except Exception as e: + results.append(CheckResult( + name="Course Data Quality", + status=CheckStatus.INFO, + message="Cannot validate binary vector data (this is normal)", + details="Vector embeddings are stored as binary data" + )) + else: + results.append(CheckResult( + name="Course Records", + status=CheckStatus.FAIL, + message="No course records found", + fix_command="ingest-courses --catalog course_catalog.json --clear" + )) + + # Memory system + memory_count = len(patterns['memory']) + len(patterns['working_memory']) + if memory_count > 0: + results.append(CheckResult( + name="Memory System", + status=CheckStatus.PASS, + message=f"Found {memory_count} memory-related keys", + details="Agent Memory Server integration active" + )) + else: + results.append(CheckResult( + name="Memory System", + status=CheckStatus.INFO, + message="No memory data (normal for fresh install)" + )) + + return results + + async def check_functionality(self) -> List[CheckResult]: + """Test actual system functionality.""" + results = [] + + try: + # Test course manager import and basic functionality + start_time = time.time() + # Import here as this is a conditional test, not main functionality + from redis_context_course.course_manager import CourseManager + from redis_context_course import ClassAgent + + course_manager = CourseManager() + elapsed_ms = (time.time() - start_time) * 1000 + + results.append(CheckResult( + name="Package Import", + status=CheckStatus.PASS, + message="Successfully imported core modules", + performance_ms=elapsed_ms + )) + + # Test course search + start_time = time.time() + courses = await course_manager.search_courses("programming", limit=3) + elapsed_ms = (time.time() - start_time) * 1000 + + if courses: + results.append(CheckResult( + name="Course Search", + status=CheckStatus.PASS, + message=f"Found {len(courses)} courses", + details=f"Sample: {courses[0].course_code} - {courses[0].title}", + performance_ms=elapsed_ms + )) + else: + results.append(CheckResult( + name="Course Search", + status=CheckStatus.FAIL, + message="Search returned no results", + fix_command="Check if courses are properly ingested with embeddings" + )) + + # Test agent initialization + start_time = time.time() + agent = ClassAgent("health_check_student") + elapsed_ms = (time.time() - start_time) * 1000 + + results.append(CheckResult( + name="Agent Initialization", + status=CheckStatus.PASS, + message="Agent created successfully", + performance_ms=elapsed_ms + )) + + # Test basic agent query + start_time = time.time() + response = await agent.chat("How many courses are available?") + elapsed_ms = (time.time() - start_time) * 1000 + + if response and len(response) > 10: + results.append(CheckResult( + name="Agent Query", + status=CheckStatus.PASS, + message="Agent responded successfully", + details=f"Response length: {len(response)} chars", + performance_ms=elapsed_ms + )) + else: + results.append(CheckResult( + name="Agent Query", + status=CheckStatus.FAIL, + message="Agent query failed or returned empty response", + details=f"Response: {response}" + )) + + except ImportError as e: + results.append(CheckResult( + name="Package Import", + status=CheckStatus.FAIL, + message=f"Import failed: {e}", + fix_command="pip install -e ." + )) + except Exception as e: + results.append(CheckResult( + name="Functionality Test", + status=CheckStatus.FAIL, + message=f"Unexpected error: {e}", + details=str(e) + )) + + return results + + def generate_summary(self) -> Dict[str, any]: + """Generate overall system summary.""" + total = len(self.results) + passed = len([r for r in self.results if r.status == CheckStatus.PASS]) + warnings = len([r for r in self.results if r.status == CheckStatus.WARN]) + failed = len([r for r in self.results if r.status == CheckStatus.FAIL]) + + if failed == 0 and warnings == 0: + overall_status = "EXCELLENT" + elif failed == 0: + overall_status = "GOOD" + elif failed <= 2: + overall_status = "NEEDS ATTENTION" + else: + overall_status = "CRITICAL ISSUES" + + return { + 'overall_status': overall_status, + 'total_checks': total, + 'passed': passed, + 'warnings': warnings, + 'failed': failed, + 'critical_issues': [r for r in self.results if r.status == CheckStatus.FAIL], + 'avg_performance': sum(r.performance_ms for r in self.results if r.performance_ms) / max(1, len([r for r in self.results if r.performance_ms])) + } + + async def run_all_checks(self): + """Run all system checks.""" + print(f"""Redis Context Course - System Health Check +{"=" * 60} +Started at: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')} + +INFRASTRUCTURE +{"-" * 20}""") + infra_results = await self.check_infrastructure() + for result in infra_results: + self.add_result(result) + self.print_result(result) + + # Only continue if Redis is working + if not any(r.status == CheckStatus.FAIL and "Redis Connection" in r.name for r in infra_results): + # Data presence checks + print(f""" +DATA VALIDATION +{"-" * 20}""") + data_results = self.check_data_presence() + for result in data_results: + self.add_result(result) + self.print_result(result) + + # Functionality checks + print(f""" +FUNCTIONALITY +{"-" * 20}""") + func_results = await self.check_functionality() + for result in func_results: + self.add_result(result) + self.print_result(result) + + # Summary + summary = self.generate_summary() + summary_output = f""" +SUMMARY +{"-" * 20} +๐ŸŽฏ Overall Status: {summary['overall_status']} +๐Ÿ“Š Results: {summary['passed']}/{summary['total_checks']} passed""" + + if summary['warnings'] > 0: + summary_output += f"\nโš ๏ธ Warnings: {summary['warnings']}" + if summary['failed'] > 0: + summary_output += f"\nโŒ Failed: {summary['failed']}" + if summary['avg_performance'] > 0: + summary_output += f"\nโšก Avg Response Time: {summary['avg_performance']:.1f}ms" + + print(summary_output) + + # Critical issues + if summary['critical_issues']: + issues_output = "\nCRITICAL ISSUES TO FIX:" + for issue in summary['critical_issues']: + issues_output += f"\n โ€ข {issue.name}: {issue.message}" + if issue.fix_command: + issues_output += f"\n Fix: {issue.fix_command}" + print(issues_output) + + # Next steps + if summary['failed'] == 0: + next_steps = """\nNEXT STEPS: + โ€ข System is ready! Try: redis-class-agent --student-id your_name + โ€ข Explore examples in the examples/ directory + โ€ข Check out the notebooks for tutorials""" + else: + next_steps = """\nNEXT STEPS: + โ€ข Fix the critical issues listed above + โ€ข Re-run this health check to verify fixes + โ€ข Check the documentation for troubleshooting""" + + print(next_steps) + + return summary['failed'] == 0 + + +async def main(): + """Main function.""" + parser = argparse.ArgumentParser(description="Redis Context Course System Health Check") + parser.add_argument("-v", "--verbose", action="store_true", help="Verbose output") + args = parser.parse_args() + + checker = SystemHealthChecker(verbose=args.verbose) + success = await checker.run_all_checks() + + sys.exit(0 if success else 1) + + +if __name__ == "__main__": + asyncio.run(main()) diff --git a/python-recipes/context-engineering/reference-agent/tests/__init__.py b/python-recipes/context-engineering/reference-agent/tests/__init__.py new file mode 100644 index 00000000..394ceec4 --- /dev/null +++ b/python-recipes/context-engineering/reference-agent/tests/__init__.py @@ -0,0 +1,3 @@ +""" +Tests for the Redis Context Course package. +""" diff --git a/python-recipes/context-engineering/reference-agent/tests/conftest.py b/python-recipes/context-engineering/reference-agent/tests/conftest.py new file mode 100644 index 00000000..3998de52 --- /dev/null +++ b/python-recipes/context-engineering/reference-agent/tests/conftest.py @@ -0,0 +1,20 @@ +import os +import time +import pytest +from testcontainers.core.container import DockerContainer + + +@pytest.fixture(scope="session") +def redis_stack_url(): + """Start a Redis 8 container (modules built-in) and yield REDIS_URL.""" + image = os.getenv("TEST_REDIS_IMAGE", "redis:8.2.1") + with DockerContainer(image) as c: + c.with_exposed_ports(6379) + c.start() + host = c.get_container_host_ip() + port = int(c.get_exposed_port(6379)) + url = f"redis://{host}:{port}" + # Tiny wait for readiness + time.sleep(1.0) + yield url + diff --git a/python-recipes/context-engineering/reference-agent/tests/test_agent_chat.py b/python-recipes/context-engineering/reference-agent/tests/test_agent_chat.py new file mode 100644 index 00000000..5268dde3 --- /dev/null +++ b/python-recipes/context-engineering/reference-agent/tests/test_agent_chat.py @@ -0,0 +1,76 @@ +import asyncio +import os +import types +import pytest + +# Target under test +from redis_context_course import agent as agent_mod +from langchain_core.messages import AIMessage + + +class FakeMemoryClient: + def __init__(self, config): + self.config = config + self.put_calls = [] + + async def get_or_create_working_memory(self, session_id: str, user_id: str, model_name: str): + # Return a simple object with .messages list + wm = types.SimpleNamespace(messages=[]) + return True, wm + + async def search_long_term_memory(self, text: str, user_id, limit: int = 5): + # Return an object with .memories to mimic client result + return types.SimpleNamespace(memories=[]) + + async def put_working_memory(self, session_id: str, memory, user_id: str, model_name: str): + self.put_calls.append({ + "session_id": session_id, + "user_id": user_id, + "model_name": model_name, + "message_count": len(getattr(memory, "messages", [])), + }) + return True + + +class FakeLLM: + def __init__(self, model: str, temperature: float = 0.7): + self.model = model + self.temperature = temperature + + def bind_tools(self, tools): + # Return self to support .ainvoke(messages) + return self + + async def ainvoke(self, messages): + # Return a basic AIMessage without tool calls + return AIMessage(content="TEST_RESPONSE") + + +class FakeCourseManager: + def __init__(self): + pass + + +@pytest.mark.asyncio +async def test_agent_chat_returns_llm_response_and_saves_memory(monkeypatch): + # Patch heavy dependencies used inside the agent module + monkeypatch.setattr(agent_mod, "MemoryAPIClient", FakeMemoryClient) + monkeypatch.setattr(agent_mod, "ChatOpenAI", FakeLLM) + monkeypatch.setattr(agent_mod, "CourseManager", FakeCourseManager) + + # Ensure env var is set but the value won't be used due to mocks + monkeypatch.setenv("AGENT_MEMORY_URL", "http://localhost:8088") + + a = agent_mod.ClassAgent("student_test") + result = await a.chat("hello") + + assert result == "TEST_RESPONSE" + + # Verify working memory save happened + mc: FakeMemoryClient = a.memory_client # type: ignore + assert len(mc.put_calls) == 1 + assert mc.put_calls[0]["session_id"] == a.session_id + assert mc.put_calls[0]["user_id"] == a.student_id + # Should have at least 2 messages (user + assistant) + assert mc.put_calls[0]["message_count"] >= 2 + diff --git a/python-recipes/context-engineering/reference-agent/tests/test_agent_tool_path.py b/python-recipes/context-engineering/reference-agent/tests/test_agent_tool_path.py new file mode 100644 index 00000000..3bb0031d --- /dev/null +++ b/python-recipes/context-engineering/reference-agent/tests/test_agent_tool_path.py @@ -0,0 +1,125 @@ +import asyncio +import os +import types +import pytest + +from langchain_core.messages import AIMessage + +# Import module under test +from redis_context_course import agent as agent_mod +from redis_context_course.redis_config import redis_config +from redis_context_course.course_manager import CourseManager +from redis_context_course.models import ( + Course, + DifficultyLevel, + CourseFormat, + CourseSchedule, +) + + +class FakeMemoryClient: + def __init__(self, config): + self.config = config + self.put_calls = [] + + async def get_or_create_working_memory(self, session_id: str, user_id: str, model_name: str): + wm = types.SimpleNamespace(messages=[]) + return True, wm + + async def search_long_term_memory(self, text: str, user_id, limit: int = 5): + return types.SimpleNamespace(memories=[]) + + async def put_working_memory(self, session_id: str, memory, user_id: str, model_name: str): + self.put_calls.append({ + "session_id": session_id, + "user_id": user_id, + "model_name": model_name, + "message_count": len(getattr(memory, "messages", [])), + }) + return True + + +class ToolCallingLLM: + """A minimal LLM stub that first requests a tool, then returns a normal answer.""" + def __init__(self, model: str, temperature: float = 0.7): + self.model = model + self.temperature = temperature + self._call_num = 0 + + def bind_tools(self, tools): + # LangGraph/ToolNode will handle calling the tool + return self + + async def ainvoke(self, messages): + self._call_num += 1 + if self._call_num == 1: + # Ask to call the agent's _search_courses_tool (LangChain expects an id field) + return AIMessage( + content="", + tool_calls=[{"id": "call_1", "name": "_search_courses_tool", "args": {"query": "python", "filters": {}}}], + ) + # After the tool runs, return a normal assistant message + return AIMessage(content="Here are some relevant Python courses.") + + +@pytest.mark.asyncio +async def test_agent_executes_tool_path_with_real_redis(redis_stack_url, monkeypatch): + # Point the agent at the Testcontainers Redis 8 instance + monkeypatch.setenv("REDIS_URL", redis_stack_url) + + # Reinitialize redis_config so it connects to the container, not any cached client + redis_config.cleanup() + redis_config._redis_client = None + redis_config._vector_index = None + + # Avoid real OpenAI calls: make embeddings deterministic + async def fake_embed_query(text: str): + # Use a constant non-zero vector to ensure cosine similarity works + return [1.0] * 1536 + + # Provide a dummy embeddings instance to avoid OpenAI calls + class _DummyEmb: + async def aembed_query(self, text: str): + return [1.0] * 1536 + redis_config._embeddings = _DummyEmb() + + # Seed a course into Redis via the real CourseManager and real index + cm = CourseManager() + course = Course( + id="c1", + course_code="CS101", + title="Python Basics", + description="Introductory Python programming", + department="CS", + major="CS", + difficulty_level=DifficultyLevel.BEGINNER, + format=CourseFormat.ONLINE, + semester="fall", + year=2025, + credits=3, + tags=["python", "programming"], + instructor="Dr. Py", + max_enrollment=100, + current_enrollment=0, + learning_objectives=["Variables", "Loops"], + prerequisites=[], + schedule=CourseSchedule(days=["monday"], start_time="09:00", end_time="10:00"), + ) + await cm.store_course(course) + + # Patch Memory API client (we are only avoiding the network service; Redis is real) + monkeypatch.setattr(agent_mod, "MemoryAPIClient", FakeMemoryClient) + # Patch LLM to drive tool path + monkeypatch.setattr(agent_mod, "ChatOpenAI", ToolCallingLLM) + + a = agent_mod.ClassAgent("student_tool_path") + result = await a.chat("Find beginner Python courses") + + # Validate final response and that memory was saved + assert "Python" in result or "courses" in result + mc: FakeMemoryClient = a.memory_client # type: ignore + assert len(mc.put_calls) == 1 + assert mc.put_calls[0]["session_id"] == a.session_id + assert mc.put_calls[0]["user_id"] == a.student_id + assert mc.put_calls[0]["message_count"] >= 2 + diff --git a/python-recipes/context-engineering/reference-agent/tests/test_package.py b/python-recipes/context-engineering/reference-agent/tests/test_package.py new file mode 100644 index 00000000..de9e1297 --- /dev/null +++ b/python-recipes/context-engineering/reference-agent/tests/test_package.py @@ -0,0 +1,166 @@ +""" +Basic tests to verify the package structure and imports work correctly. +""" + +import pytest + + +def test_package_imports(): + """Test that the main package imports work correctly.""" + try: + import redis_context_course + assert redis_context_course.__version__ == "1.0.0" + assert redis_context_course.__author__ == "Redis AI Resources Team" + except ImportError as e: + pytest.fail(f"Failed to import redis_context_course: {e}") + + +def test_model_imports(): + """Test that model imports work correctly.""" + try: + from redis_context_course.models import ( + Course, StudentProfile, DifficultyLevel, CourseFormat + ) + + # Test enum values + assert DifficultyLevel.BEGINNER == "beginner" + assert CourseFormat.ONLINE == "online" + + except ImportError as e: + pytest.fail(f"Failed to import models: {e}") + + +def test_manager_imports(): + """Test that manager imports work correctly.""" + try: + from redis_context_course import MemoryClient, MemoryClientConfig + from redis_context_course.course_manager import CourseManager + from redis_context_course.redis_config import RedisConfig + + # Test that classes can be instantiated (without Redis connection) + assert MemoryClient is not None + assert MemoryClientConfig is not None + assert CourseManager is not None + assert RedisConfig is not None + + except ImportError as e: + pytest.fail(f"Failed to import managers: {e}") + + +def test_agent_imports(): + """Test that agent imports work correctly.""" + try: + from redis_context_course.agent import ClassAgent, AgentState + + assert ClassAgent is not None + assert AgentState is not None + + except ImportError as e: + pytest.fail(f"Failed to import agent: {e}") + + +def test_scripts_imports(): + """Test that script imports work correctly.""" + try: + from redis_context_course.scripts import generate_courses, ingest_courses + + assert generate_courses is not None + assert ingest_courses is not None + + except ImportError as e: + pytest.fail(f"Failed to import scripts: {e}") + + +def test_cli_imports(): + """Test that CLI imports work correctly.""" + try: + from redis_context_course import cli + + assert cli is not None + assert hasattr(cli, 'main') + + except ImportError as e: + pytest.fail(f"Failed to import CLI: {e}") + + +def test_tools_imports(): + """Test that tools module imports work correctly.""" + try: + from redis_context_course.tools import ( + create_course_tools, + create_memory_tools, + select_tools_by_keywords + ) + + assert create_course_tools is not None + assert create_memory_tools is not None + assert select_tools_by_keywords is not None + + except ImportError as e: + pytest.fail(f"Failed to import tools: {e}") + + +def test_optimization_helpers_imports(): + """Test that optimization helpers import work correctly.""" + try: + from redis_context_course.optimization_helpers import ( + count_tokens, + estimate_token_budget, + hybrid_retrieval, + create_summary_view, + filter_tools_by_intent, + format_context_for_llm + ) + + assert count_tokens is not None + assert estimate_token_budget is not None + assert hybrid_retrieval is not None + assert create_summary_view is not None + assert filter_tools_by_intent is not None + assert format_context_for_llm is not None + + except ImportError as e: + pytest.fail(f"Failed to import optimization helpers: {e}") + + +def test_count_tokens_basic(): + """Test basic token counting functionality.""" + try: + from redis_context_course.optimization_helpers import count_tokens + + # Test with simple text + text = "Hello, world!" + tokens = count_tokens(text) + + assert isinstance(tokens, int) + assert tokens > 0 + + except Exception as e: + pytest.fail(f"Token counting failed: {e}") + + +def test_filter_tools_by_intent_basic(): + """Test basic tool filtering functionality.""" + try: + from redis_context_course.optimization_helpers import filter_tools_by_intent + + # Mock tool groups + tool_groups = { + "search": ["search_tool"], + "memory": ["memory_tool"], + } + + # Test search intent + result = filter_tools_by_intent("find courses", tool_groups) + assert result == ["search_tool"] + + # Test memory intent + result = filter_tools_by_intent("remember this", tool_groups) + assert result == ["memory_tool"] + + except Exception as e: + pytest.fail(f"Tool filtering failed: {e}") + + +if __name__ == "__main__": + pytest.main([__file__]) diff --git a/python-recipes/context-engineering/reference-agent/tests/test_tools.py b/python-recipes/context-engineering/reference-agent/tests/test_tools.py new file mode 100644 index 00000000..9ddfeaa4 --- /dev/null +++ b/python-recipes/context-engineering/reference-agent/tests/test_tools.py @@ -0,0 +1,148 @@ +import asyncio +import pytest +from unittest.mock import AsyncMock, MagicMock + +from redis_context_course import tools as tools_mod +from redis_context_course.agent import ClassAgent + + +class FakeCourse: + def __init__(self, code, title, desc, credits=3, fmt="Online", diff="Beginner"): + self.course_code = code + self.title = title + self.description = desc + self.credits = credits + self.format = type("Fmt", (), {"value": fmt}) + self.difficulty_level = type("Diff", (), {"value": diff}) + self.prerequisites = [] + + +class FakeCourseManager: + async def search_courses(self, query: str, limit: int = 5): + return [ + FakeCourse("CS101", "Intro to CS", "Learn basics of programming"), + FakeCourse("CS102", "Python Basics", "Introductory Python course"), + ][:limit] + + async def get_course(self, course_code: str): + if course_code == "MISSING": + return None + return FakeCourse(course_code, "Some Course", "Detailed description") + + +@pytest.mark.asyncio +async def test_search_courses_tool_formats_result(): + cm = FakeCourseManager() + (search_tool, get_details_tool, check_prereq_tool) = tools_mod.create_course_tools(cm) + + out = await search_tool.ainvoke({"query": "python beginner", "limit": 2}) + assert "CS101" in out and "CS102" in out + assert "Credits:" in out and "Online" in out + + +@pytest.mark.asyncio +async def test_get_course_details_handles_missing(): + cm = FakeCourseManager() + (_, get_details_tool, _) = tools_mod.create_course_tools(cm) + + out = await get_details_tool.ainvoke({"course_code": "MISSING"}) + assert "not found" in out.lower() + + +def test_select_tools_by_keywords(): + tools_map = { + "search": ["S1"], + "memory": ["M1"], + } + res1 = tools_mod.select_tools_by_keywords("find programming courses", tools_map) + res2 = tools_mod.select_tools_by_keywords("please remember my preferences", tools_map) + res3 = tools_mod.select_tools_by_keywords("random", tools_map) + + assert res1 == ["S1"] + assert res2 == ["M1"] + assert res3 == ["S1"] # defaults to search + + +@pytest.mark.asyncio +async def test_summarize_user_knowledge_tool(): + """Test that the user knowledge summary tool is properly integrated.""" + # Test that the tool exists in the agent's tool list + with pytest.MonkeyPatch().context() as m: + # Mock the environment variable + m.setenv("OPENAI_API_KEY", "test-key") + + # Create agent + agent = ClassAgent("test_user", "test_session") + + # Get the tools + tools = agent._get_tools() + + # Verify the summarize user knowledge tool is in the list + tool_names = [tool.name for tool in tools] + assert "summarize_user_knowledge_tool" in tool_names + + # Find the specific tool + summary_tool = None + for tool in tools: + if tool.name == "summarize_user_knowledge_tool": + summary_tool = tool + break + + assert summary_tool is not None + assert "summarize what the agent knows about the user" in summary_tool.description.lower() + + # Test that the tool has the expected properties + assert hasattr(summary_tool, 'ainvoke') + assert summary_tool.name == "summarize_user_knowledge_tool" + + +@pytest.mark.asyncio +async def test_summarize_user_knowledge_tool_in_system_prompt(): + """Test that the user knowledge summary tool is mentioned in the system prompt.""" + with pytest.MonkeyPatch().context() as m: + # Mock the environment variable + m.setenv("OPENAI_API_KEY", "test-key") + + # Create agent + agent = ClassAgent("test_user", "test_session") + + # Build system prompt + context = {"preferences": [], "goals": [], "recent_facts": []} + system_prompt = agent._build_system_prompt(context) + + # Verify the tool is mentioned in the system prompt + assert "summarize_user_knowledge" in system_prompt + assert "comprehensive summary of what you know about the user" in system_prompt + + +@pytest.mark.asyncio +async def test_clear_user_memories_tool(): + """Test that the clear user memories tool is properly integrated.""" + with pytest.MonkeyPatch().context() as m: + # Mock the environment variable + m.setenv("OPENAI_API_KEY", "test-key") + + # Create agent + agent = ClassAgent("test_user", "test_session") + + # Get the tools + tools = agent._get_tools() + + # Verify the clear user memories tool is in the list + tool_names = [tool.name for tool in tools] + assert "clear_user_memories_tool" in tool_names + + # Find the specific tool + clear_tool = None + for tool in tools: + if tool.name == "clear_user_memories_tool": + clear_tool = tool + break + + assert clear_tool is not None + assert "clear or reset stored user information" in clear_tool.description.lower() + + # Test that the tool has the expected properties + assert hasattr(clear_tool, 'ainvoke') + assert clear_tool.name == "clear_user_memories_tool" + diff --git a/python-recipes/context-engineering/requirements.txt b/python-recipes/context-engineering/requirements.txt new file mode 100644 index 00000000..8f9f994a --- /dev/null +++ b/python-recipes/context-engineering/requirements.txt @@ -0,0 +1,7 @@ +# Core dependencies for Context Engineering notebooks +jupyter>=1.0.0 +python-dotenv>=1.0.0 + +# The reference agent package should be installed separately with: +# pip install -e reference-agent/ + diff --git a/python-recipes/vector-search/01_redisvl-nk.ipynb b/python-recipes/vector-search/01_redisvl-nk.ipynb new file mode 100644 index 00000000..ff20ead7 --- /dev/null +++ b/python-recipes/vector-search/01_redisvl-nk.ipynb @@ -0,0 +1,2206 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "cbba56a9", + "metadata": { + "id": "cbba56a9" + }, + "source": [ + "![Redis](https://redis.io/wp-content/uploads/2024/04/Logotype.svg?auto=webp&quality=85,75&width=120)\n", + "# Vector Search with RedisVL\n", + "\n", + "## Let's Begin!\n", + "\"Open\n" + ] + }, + { + "cell_type": "markdown", + "id": "0b80de6b", + "metadata": { + "id": "0b80de6b" + }, + "source": [ + "## Prepare data\n", + "\n", + "In this examples we will load a list of movies with the following attributes: `title`, `rating`, `description`, and `genre`.\n", + "\n", + "We will embed the movie description so that user's can search for movies that best match the kind of movie that they're looking for.\n", + "\n", + "**If you are running this notebook locally**, FYI you may not need to perform this step at all." + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "id": "b966a9b5", + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "b966a9b5", + "outputId": "8fb1aed9-94a3-47b2-af50-4eac9b08d7f1" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Cloning into 'temp_repo'...\n", + "remote: Enumerating objects: 669, done.\u001B[K\n", + "remote: Counting objects: 100% (320/320), done.\u001B[K\n", + "remote: Compressing objects: 100% (207/207), done.\u001B[K\n", + "remote: Total 669 (delta 219), reused 141 (delta 112), pack-reused 349 (from 2)\u001B[K\n", + "Receiving objects: 100% (669/669), 57.77 MiB | 20.61 MiB/s, done.\n", + "Resolving deltas: 100% (287/287), done.\n" + ] + } + ], + "source": [ + "# NBVAL_SKIP\n", + "!git clone https://github.com/redis-developer/redis-ai-resources.git temp_repo\n", + "!mv temp_repo/python-recipes/vector-search/resources .\n", + "!rm -rf temp_repo" + ] + }, + { + "cell_type": "markdown", + "id": "19bdc2a5-2192-4f5f-bd6e-7c956fd0e230", + "metadata": { + "id": "19bdc2a5-2192-4f5f-bd6e-7c956fd0e230" + }, + "source": [ + "## Packages" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "c620286e", + "metadata": { + "id": "c620286e" + }, + "outputs": [], + "source": [ + "%pip install -q \"redisvl>=0.6.0\" sentence-transformers pandas nltk" + ] + }, + { + "cell_type": "markdown", + "id": "323aec7f", + "metadata": { + "id": "323aec7f" + }, + "source": [ + "## Install Redis Stack\n", + "\n", + "Later in this tutorial, Redis will be used to store, index, and query vector\n", + "embeddings created from PDF document chunks. **We need to make sure we have a Redis\n", + "instance available.\n", + "\n", + "#### For Colab\n", + "Use the shell script below to download, extract, and install [Redis Stack](https://redis.io/docs/getting-started/install-stack/) directly from the Redis package archive." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "2cb85a99", + "metadata": { + "id": "2cb85a99" + }, + "outputs": [], + "source": [ + "# NBVAL_SKIP\n", + "%%sh\n", + "curl -fsSL https://packages.redis.io/gpg | sudo gpg --dearmor -o /usr/share/keyrings/redis-archive-keyring.gpg\n", + "echo \"deb [signed-by=/usr/share/keyrings/redis-archive-keyring.gpg] https://packages.redis.io/deb $(lsb_release -cs) main\" | sudo tee /etc/apt/sources.list.d/redis.list\n", + "sudo apt-get update > /dev/null 2>&1\n", + "sudo apt-get install redis-stack-server > /dev/null 2>&1\n", + "redis-stack-server --daemonize yes" + ] + }, + { + "cell_type": "markdown", + "id": "7c5dbaaf", + "metadata": { + "id": "7c5dbaaf" + }, + "source": [ + "#### For Alternative Environments\n", + "There are many ways to get the necessary redis-stack instance running\n", + "1. On cloud, deploy a [FREE instance of Redis in the cloud](https://redis.com/try-free/). Or, if you have your\n", + "own version of Redis Enterprise running, that works too!\n", + "2. Per OS, [see the docs](https://redis.io/docs/latest/operate/oss_and_stack/install/install-stack/)\n", + "3. With docker: `docker run -d --name redis-stack-server -p 6379:6379 redis/redis-stack-server:latest`" + ] + }, + { + "cell_type": "markdown", + "id": "1d4499ae", + "metadata": { + "id": "1d4499ae" + }, + "source": [ + "### Define the Redis Connection URL\n", + "\n", + "By default this notebook connects to the local instance of Redis Stack. **If you have your own Redis Enterprise instance** - replace REDIS_PASSWORD, REDIS_HOST and REDIS_PORT values with your own." + ] + }, + { + "cell_type": "code", + "id": "aefda1d1", + "metadata": { + "id": "aefda1d1", + "ExecuteTime": { + "end_time": "2025-10-30T19:19:35.458522Z", + "start_time": "2025-10-30T19:19:35.454934Z" + } + }, + "source": [ + "import os\n", + "import warnings\n", + "\n", + "warnings.filterwarnings('ignore')\n", + "\n", + "# Replace values below with your own if using Redis Cloud instance\n", + "REDIS_HOST = os.getenv(\"REDIS_HOST\", \"localhost\") # ex: \"redis-18374.c253.us-central1-1.gce.cloud.redislabs.com\"\n", + "REDIS_PORT = os.getenv(\"REDIS_PORT\", \"6379\") # ex: 18374\n", + "REDIS_PASSWORD = os.getenv(\"REDIS_PASSWORD\", \"\") # ex: \"1TNxTEdYRDgIDKM2gDfasupCADXXXX\"\n", + "\n", + "# If SSL is enabled on the endpoint, use rediss:// as the URL prefix\n", + "REDIS_URL = f\"redis://:{REDIS_PASSWORD}@{REDIS_HOST}:{REDIS_PORT}\"" + ], + "outputs": [], + "execution_count": 27 + }, + { + "cell_type": "markdown", + "id": "f8c6ef53", + "metadata": { + "id": "f8c6ef53" + }, + "source": [ + "### Create redis client" + ] + }, + { + "cell_type": "code", + "id": "370c1fcc", + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "370c1fcc", + "outputId": "2b5297c6-83b7-468f-b2ac-c47acf13ba2e", + "ExecuteTime": { + "end_time": "2025-10-30T19:19:40.605754Z", + "start_time": "2025-10-30T19:19:40.598722Z" + } + }, + "source": [ + "from redis import Redis\n", + "\n", + "client = Redis.from_url(REDIS_URL)\n", + "client.ping()" + ], + "outputs": [ + { + "data": { + "text/plain": [ + "True" + ] + }, + "execution_count": 28, + "metadata": {}, + "output_type": "execute_result" + } + ], + "execution_count": 28 + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "H4w8c3Bevzq4", + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "H4w8c3Bevzq4", + "outputId": "a4d3b9a4-adda-436e-9aef-b4b0120720ab" + }, + "outputs": [ + { + "data": { + "text/plain": [ + "True" + ] + }, + "execution_count": 4, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "#client.flushall()" + ] + }, + { + "cell_type": "markdown", + "id": "jCXiuk9ZTN_K", + "metadata": { + "id": "jCXiuk9ZTN_K" + }, + "source": [ + "### Load Movies Dataset" + ] + }, + { + "cell_type": "code", + "id": "8d561462", + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 223 + }, + "id": "8d561462", + "outputId": "75ae0f32-115f-427e-e426-9a018884e860", + "ExecuteTime": { + "end_time": "2025-10-30T19:20:11.320702Z", + "start_time": "2025-10-30T19:20:11.308593Z" + } + }, + "source": [ + "import pandas as pd\n", + "import numpy as np\n", + "import json\n", + "\n", + "df = pd.read_json(\"resources/movies.json\")\n", + "print(\"Loaded\", len(df), \"movie entries\")\n", + "\n", + "df.head()" + ], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Loaded 20 movie entries\n" + ] + }, + { + "data": { + "text/plain": [ + " id title genre rating \\\n", + "0 1 Explosive Pursuit action 7 \n", + "1 2 Skyfall action 8 \n", + "2 3 Fast & Furious 9 action 6 \n", + "3 4 Black Widow action 7 \n", + "4 5 John Wick action 8 \n", + "\n", + " description \n", + "0 A daring cop chases a notorious criminal acros... \n", + "1 James Bond returns to track down a dangerous n... \n", + "2 Dom and his crew face off against a high-tech ... \n", + "3 Natasha Romanoff confronts her dark past and f... \n", + "4 A retired hitman seeks vengeance against those... " + ], + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
idtitlegenreratingdescription
01Explosive Pursuitaction7A daring cop chases a notorious criminal acros...
12Skyfallaction8James Bond returns to track down a dangerous n...
23Fast & Furious 9action6Dom and his crew face off against a high-tech ...
34Black Widowaction7Natasha Romanoff confronts her dark past and f...
45John Wickaction8A retired hitman seeks vengeance against those...
\n", + "
" + ] + }, + "execution_count": 29, + "metadata": {}, + "output_type": "execute_result" + } + ], + "execution_count": 29 + }, + { + "cell_type": "code", + "id": "bfiTJovpQX90", + "metadata": { + "id": "bfiTJovpQX90", + "ExecuteTime": { + "end_time": "2025-10-30T19:20:55.339530Z", + "start_time": "2025-10-30T19:20:53.550812Z" + } + }, + "source": [ + "from redisvl.utils.vectorize import HFTextVectorizer\n", + "from redisvl.extensions.cache.embeddings import EmbeddingsCache\n", + "\n", + "os.environ[\"TOKENIZERS_PARALLELISM\"] = \"false\"\n", + "\n", + "\n", + "hf = HFTextVectorizer(\n", + " model=\"sentence-transformers/all-MiniLM-L6-v2\",\n", + " cache=EmbeddingsCache(\n", + " name=\"embedcache\",\n", + " ttl=600,\n", + " redis_client=client,\n", + " )\n", + ")\n", + "\"\"\"\n", + "Embedding Cache:\n", + "- Stores embeddings in Redis so you don't have to regenerate them for the same text\n", + "- When you embed text, it first checks if that exact text has been embedded before\n", + "- If found (cache hit), it returns the cached embedding instantly\n", + "- If not found (cache miss), it generates the embedding and stores it for future use\n", + "- Uses a hash of text + model_name as the key to ensure uniqueness\n", + "\n", + "SO here:\n", + "If we embed the same movie description twice, the second call will be nearly instant because it retrieves from Redis instead of running the model again\n", + "\"\"\"\n", + "\n", + "\n", + "# Example: OpenAI Vectorizer\n", + "# ---------------------------\n", + "# from redisvl.utils.vectorize import OpenAITextVectorizer\n", + "#\n", + "# oai = OpenAITextVectorizer(\n", + "# model=\"text-embedding-3-small\",\n", + "# api_config={\"api_key\": \"your_api_key\"}, # OR set OPENAI_API_KEY env variable\n", + "# cache=EmbeddingsCache(\n", + "# name=\"openai_embedcache\",\n", + "# ttl=600,\n", + "# redis_client=client,\n", + "# )\n", + "# )\n", + "#\n", + "# # Generate embeddings\n", + "# embedding = oai.embed(\"Hello, world!\")\n", + "# embeddings = oai.embed_many([\"text1\", \"text2\"], batch_size=10)\n", + "\n", + "# Example: Custom Vectorizer\n", + "# ---------------------------\n", + "# from redisvl.utils.vectorize import CustomTextVectorizer\n", + "#\n", + "# # Define your custom embedding function\n", + "# def my_embed_function(text: str) -> list[float]:\n", + "# # Your custom logic here\n", + "# # Must return a list of floats\n", + "# return [0.1, 0.2, 0.3, ...] # Example: 768-dimensional vector\n", + "#\n", + "# # Optional: Define batch embedding function for better performance\n", + "# def my_embed_many_function(texts: list[str]) -> list[list[float]]:\n", + "# # Your custom batch logic here\n", + "# # Must return a list of lists of floats\n", + "# return [[0.1, 0.2, ...] for _ in texts]\n", + "#\n", + "# custom = CustomTextVectorizer(\n", + "# embed=my_embed_function,\n", + "# embed_many=my_embed_many_function, # Optional\n", + "# cache=EmbeddingsCache(\n", + "# name=\"custom_embedcache\",\n", + "# ttl=600,\n", + "# redis_client=client,\n", + "# )\n", + "# )\n", + "#\n", + "# # Generate embeddings\n", + "# embedding = custom.embed(\"Hello, world!\")\n", + "# embeddings = custom.embed_many([\"text1\", \"text2\"])\n" + ], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "15:20:54 sentence_transformers.SentenceTransformer INFO Use pytorch device_name: mps\n", + "15:20:54 sentence_transformers.SentenceTransformer INFO Load pretrained SentenceTransformer: sentence-transformers/all-MiniLM-L6-v2\n" + ] + }, + { + "data": { + "text/plain": [ + "\"\\nEmbedding Cache:\\n- Stores embeddings in Redis so you don't have to regenerate them for the same text\\n- When you embed text, it first checks if that exact text has been embedded before\\n- If found (cache hit), it returns the cached embedding instantly\\n- If not found (cache miss), it generates the embedding and stores it for future use\\n- Uses a hash of text + model_name as the key to ensure uniqueness\\n\\nSO here:\\nIf we embed the same movie description twice, the second call will be nearly instant because it retrieves from Redis instead of running the model again\\n\"" + ] + }, + "execution_count": 30, + "metadata": {}, + "output_type": "execute_result" + } + ], + "execution_count": 30 + }, + { + "cell_type": "code", + "id": "Vl3SehnxQvXo", + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 206 + }, + "id": "Vl3SehnxQvXo", + "outputId": "6b9f5555-dee7-4fd6-8dae-628919cfdc74", + "ExecuteTime": { + "end_time": "2025-10-30T19:21:02.967264Z", + "start_time": "2025-10-30T19:21:02.901291Z" + } + }, + "source": [ + "df[\"vector\"] = hf.embed_many(df[\"description\"].tolist(), as_buffer=True)\n", + "# as_buffer -> Redis has hash structure and JSON structure\n", + "# hash - single layer (no nesting/objects in objects) whereas JSON is multi-layered\n", + "# hash - more memory efficient and faster but embeddings need to be stored as bytes\n", + "# as it is stored as a byte array it saves space/memory and is faster to retrieve\n", + "df.head()" + ], + "outputs": [ + { + "data": { + "text/plain": [ + " id title genre rating \\\n", + "0 1 Explosive Pursuit action 7 \n", + "1 2 Skyfall action 8 \n", + "2 3 Fast & Furious 9 action 6 \n", + "3 4 Black Widow action 7 \n", + "4 5 John Wick action 8 \n", + "\n", + " description \\\n", + "0 A daring cop chases a notorious criminal acros... \n", + "1 James Bond returns to track down a dangerous n... \n", + "2 Dom and his crew face off against a high-tech ... \n", + "3 Natasha Romanoff confronts her dark past and f... \n", + "4 A retired hitman seeks vengeance against those... \n", + "\n", + " vector \n", + "0 b'\\x9ef|=g`\\n;I\\x92\\xb7;*\\xcb~\\xbd\\xe4d\\xce\\xb... \n", + "1 b'\\x9eD\\x9e\\xbdO\\x9b\\x89\\xbc\\xc2\\x16\\x95\\xbc\\x... \n", + "2 b'+\\xa5\\xc7\\xbc\\xfa,\\xa2=\\x82\\x19H\\xbcI\\xc6t\\x... \n", + "3 b's\\xeb\\x85\\xbd\\xfd\\xcco\\xbd\\xdc\\xe8\\xc2\\xbb?\\... \n", + "4 b'M;x\\xbb\\x02/\\xc5=\\x94\\x85:;\\xc6\\xd0\\x94\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
idtitlegenreratingdescriptionvector
01Explosive Pursuitaction7A daring cop chases a notorious criminal acros...b'\\x9ef|=g`\\n;I\\x92\\xb7;*\\xcb~\\xbd\\xe4d\\xce\\xb...
12Skyfallaction8James Bond returns to track down a dangerous n...b'\\x9eD\\x9e\\xbdO\\x9b\\x89\\xbc\\xc2\\x16\\x95\\xbc\\x...
23Fast & Furious 9action6Dom and his crew face off against a high-tech ...b'+\\xa5\\xc7\\xbc\\xfa,\\xa2=\\x82\\x19H\\xbcI\\xc6t\\x...
34Black Widowaction7Natasha Romanoff confronts her dark past and f...b's\\xeb\\x85\\xbd\\xfd\\xcco\\xbd\\xdc\\xe8\\xc2\\xbb?\\...
45John Wickaction8A retired hitman seeks vengeance against those...b'M;x\\xbb\\x02/\\xc5=\\x94\\x85:;\\xc6\\xd0\\x94<p)w;...
\n", + "" + ] + }, + "execution_count": 31, + "metadata": {}, + "output_type": "execute_result" + } + ], + "execution_count": 31 + }, + { + "cell_type": "markdown", + "id": "d7e99897", + "metadata": { + "id": "d7e99897" + }, + "source": [ + "## Define Redis index schema" + ] + }, + { + "cell_type": "code", + "id": "2ac53ebd", + "metadata": { + "id": "2ac53ebd", + "ExecuteTime": { + "end_time": "2025-10-30T19:23:12.906131Z", + "start_time": "2025-10-30T19:23:12.898238Z" + } + }, + "source": [ + "from redisvl.schema import IndexSchema\n", + "from redisvl.index import SearchIndex\n", + "\n", + "\n", + "index_name = \"movies\"\n", + "\n", + "# Redis supports 5 main field types for indexing:\n", + "#\n", + "# 1. TEXT - Full-text search with stemming, tokenization, and phonetic matching\n", + "# Use for: Article content, descriptions, reviews, any searchable text\n", + "# Attributes: weight, no_stem, phonetic_matcher, sortable, index_empty\n", + "#\n", + "# 2. TAG - Exact-match categorical data (like SQL ENUM or categories)\n", + "# Use for: Categories, genres, status, IDs, tags, filters\n", + "# Attributes: separator (default \",\"), case_sensitive, sortable, index_empty\n", + "#\n", + "# 3. NUMERIC - Numeric values for range queries and sorting\n", + "# Use for: Prices, ratings, counts, timestamps, ages, scores\n", + "# Attributes: sortable, index_missing, no_index\n", + "#\n", + "# 4. GEO - Geographic coordinates for location-based search\n", + "# Use for: Latitude/longitude pairs, store locations, delivery zones\n", + "# Format: \"longitude,latitude\" (e.g., \"-122.4194,37.7749\")\n", + "# Attributes: sortable, index_missing\n", + "#\n", + "# 5. VECTOR - Vector embeddings for semantic similarity search\n", + "# Use for: Text embeddings, image embeddings, recommendation systems\n", + "# Algorithms:\n", + "# - FLAT: Exact search (100% recall, slower for large datasets)\n", + "# - HNSW: Approximate nearest neighbor (fast, high recall ~95-99%)\n", + "# - SVS-VAMANA: Compressed vectors (memory efficient, good recall)\n", + "# Distance Metrics: COSINE, L2 (Euclidean), IP (Inner Product)\n", + "# Data Types: float16, float32, float64, bfloat16, int8, uint8\n", + "# Attributes: dims, algorithm, distance_metric, datatype, initial_cap\n", + "\n", + "schema = IndexSchema.from_dict({\n", + " \"index\": {\n", + " \"name\": index_name,\n", + " \"prefix\": index_name,\n", + " \"storage_type\": \"hash\" # or \"json\" for nested data structures\n", + " },\n", + " \"fields\": [\n", + " {\n", + " \"name\": \"title\",\n", + " \"type\": \"text\", # Full-text search field\n", + " },\n", + " {\n", + " \"name\": \"description\",\n", + " \"type\": \"text\", # Full-text search field\n", + " },\n", + " {\n", + " \"name\": \"genre\",\n", + " \"type\": \"tag\", # Exact-match categorical field\n", + " \"attrs\": {\n", + " \"sortable\": True\n", + " }\n", + " },\n", + " {\n", + " \"name\": \"rating\",\n", + " \"type\": \"numeric\", # Numeric range queries and sorting\n", + " \"attrs\": {\n", + " \"sortable\": True\n", + " }\n", + " },\n", + " {\n", + " \"name\": \"vector\",\n", + " \"type\": \"vector\", # Semantic similarity search\n", + " \"attrs\": {\n", + " \"dims\": 384, # Vector dimensions (model-specific)\n", + " \"distance_metric\": \"cosine\", # COSINE, L2, or IP\n", + " \"algorithm\": \"flat\", # FLAT, HNSW, or SVS-VAMANA\n", + " \"datatype\": \"float32\" # float16, float32, float64, bfloat16\n", + " }\n", + " }\n", + " # Example: GEO field (commented out)\n", + " # {\n", + " # \"name\": \"location\",\n", + " # \"type\": \"geo\",\n", + " # \"attrs\": {\n", + " # \"sortable\": False\n", + " # }\n", + " # }\n", + " ]\n", + "})\n", + "\n", + "\n", + "index = SearchIndex(schema, client)\n", + "index.create(overwrite=True, drop=True)" + ], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "15:23:12 redisvl.index.index INFO Index already exists, overwriting.\n" + ] + } + ], + "execution_count": 32 + }, + { + "cell_type": "code", + "id": "kXbcEV-5BcE1", + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "kXbcEV-5BcE1", + "outputId": "fb0fd245-9e1c-43a4-9102-60fcd6305f77", + "ExecuteTime": { + "end_time": "2025-10-30T19:23:31.993101Z", + "start_time": "2025-10-30T19:23:31.490613Z" + } + }, + "source": [ + "!rvl index info -i movies -u {REDIS_URL}" + ], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r\n", + "\r\n", + "Index Information:\r\n", + "โ•ญโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ฌโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ฌโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ฌโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ฌโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ•ฎ\r\n", + "โ”‚ Index Name โ”‚ Storage Type โ”‚ Prefixes โ”‚ Index Options โ”‚ Indexing โ”‚\r\n", + "โ”œโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ผโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ผโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ผโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ผโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ค\r\n", + "| movies | HASH | ['movies'] | [] | 0 |\r\n", + "โ•ฐโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ดโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ดโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ดโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ดโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ•ฏ\r\n", + "Index Fields:\r\n", + "โ•ญโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ฌโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ฌโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ฌโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ฌโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ฌโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ฌโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ฌโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ฌโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ฌโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ฌโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ•ฎ\r\n", + "โ”‚ Name โ”‚ Attribute โ”‚ Type โ”‚ Field Option โ”‚ Option Value โ”‚ Field Option โ”‚ Option Value โ”‚ Field Option โ”‚ Option Value โ”‚ Field Option โ”‚ Option Value โ”‚\r\n", + "โ”œโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ผโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ผโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ผโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ผโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ผโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ผโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ผโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ผโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ผโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ผโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ค\r\n", + "โ”‚ title โ”‚ title โ”‚ TEXT โ”‚ WEIGHT โ”‚ 1 โ”‚ โ”‚ โ”‚ โ”‚ โ”‚ โ”‚ โ”‚\r\n", + "โ”‚ description โ”‚ description โ”‚ TEXT โ”‚ WEIGHT โ”‚ 1 โ”‚ โ”‚ โ”‚ โ”‚ โ”‚ โ”‚ โ”‚\r\n", + "โ”‚ genre โ”‚ genre โ”‚ TAG โ”‚ SEPARATOR โ”‚ , โ”‚ โ”‚ โ”‚ โ”‚ โ”‚ โ”‚ โ”‚\r\n", + "โ”‚ rating โ”‚ rating โ”‚ NUMERIC โ”‚ SORTABLE โ”‚ UNF โ”‚ โ”‚ โ”‚ โ”‚ โ”‚ โ”‚ โ”‚\r\n", + "โ”‚ vector โ”‚ vector โ”‚ VECTOR โ”‚ algorithm โ”‚ FLAT โ”‚ data_type โ”‚ FLOAT32 โ”‚ dim โ”‚ 384 โ”‚ distance_metric โ”‚ COSINE โ”‚\r\n", + "โ•ฐโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ดโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ดโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ดโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ดโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ดโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ดโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ดโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ดโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ดโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ดโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ•ฏ\r\n" + ] + } + ], + "execution_count": 33 + }, + { + "cell_type": "markdown", + "id": "24d3ea9c", + "metadata": { + "id": "24d3ea9c" + }, + "source": [ + "## Populate index" + ] + }, + { + "cell_type": "code", + "id": "169ebb93", + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "169ebb93", + "outputId": "303291ef-e9f9-4477-90a4-0dfafcb5cce3", + "ExecuteTime": { + "end_time": "2025-10-30T19:23:36.706512Z", + "start_time": "2025-10-30T19:23:36.697520Z" + } + }, + "source": [ + "index.load(df.to_dict(orient=\"records\"))" + ], + "outputs": [ + { + "data": { + "text/plain": [ + "['movies:01K8V96NBV88RP76DHYNAHK4T2',\n", + " 'movies:01K8V96NBV01PXFNSNC8K2JQZP',\n", + " 'movies:01K8V96NBVHKA428B4YBCRNXB1',\n", + " 'movies:01K8V96NBVFD3S1DCVPDV0BE3W',\n", + " 'movies:01K8V96NBVZ64218T1PG7SE7PB',\n", + " 'movies:01K8V96NBV13WZJVFDFBET0K5N',\n", + " 'movies:01K8V96NBV3N8WDXZ10BQ8QVTM',\n", + " 'movies:01K8V96NBVNKF14S0AW75DJDF7',\n", + " 'movies:01K8V96NBV23MRYV2QRN7JV5YA',\n", + " 'movies:01K8V96NBV8KAR2ZQ13404TH2B',\n", + " 'movies:01K8V96NBVS3NH038K2YAZSHAW',\n", + " 'movies:01K8V96NBVQA4DA457PS4PX67W',\n", + " 'movies:01K8V96NBVK2RATV8KC5NBXJSJ',\n", + " 'movies:01K8V96NBVBFT2EA5TNW7SV2X6',\n", + " 'movies:01K8V96NBV85BE9MNEFBV60PHP',\n", + " 'movies:01K8V96NBV4DQ0P3V61SB2X9DS',\n", + " 'movies:01K8V96NBV1MSCHVJ5RY81Q6AM',\n", + " 'movies:01K8V96NBVD2BZJDTSV31S7DG6',\n", + " 'movies:01K8V96NBVHSERTAZTPBCXY2JV',\n", + " 'movies:01K8V96NBV6V1Z83D2Z9K1S3QX']" + ] + }, + "execution_count": 34, + "metadata": {}, + "output_type": "execute_result" + } + ], + "execution_count": 34 + }, + { + "cell_type": "markdown", + "id": "87ba1dfd", + "metadata": { + "id": "87ba1dfd" + }, + "source": [ + "## Search techniques\n", + "\n", + "### Standard vector search" + ] + }, + { + "cell_type": "code", + "id": "9454e60d", + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 143 + }, + "id": "9454e60d", + "outputId": "c1903d62-7224-4b9b-e69f-2b6701a7368f", + "ExecuteTime": { + "end_time": "2025-10-30T19:24:56.127659Z", + "start_time": "2025-10-30T19:24:56.121184Z" + } + }, + "source": [ + "from redisvl.query import VectorQuery\n", + "\n", + "user_query = \"High tech and action packed movie\"\n", + "\n", + "embedded_user_query = hf.embed(user_query)\n", + "\n", + "vec_query = VectorQuery(\n", + " vector=embedded_user_query,\n", + " vector_field_name=\"vector\",\n", + " num_results=3,\n", + " return_fields=[\"title\", \"genre\", \"description\"],\n", + " return_score=True,\n", + ")\n", + "\n", + "result = index.query(vec_query)\n", + "pd.DataFrame(result)\n" + ], + "outputs": [ + { + "data": { + "text/plain": [ + " id vector_distance title \\\n", + "0 movies:01K8V96NBVHKA428B4YBCRNXB1 0.64973795414 Fast & Furious 9 \n", + "1 movies:01K8V96NBV13WZJVFDFBET0K5N 0.763235211372 Mad Max: Fury Road \n", + "2 movies:01K8V96NBVQA4DA457PS4PX67W 0.792449593544 The Lego Movie \n", + "\n", + " genre description \n", + "0 action Dom and his crew face off against a high-tech ... \n", + "1 action In a post-apocalyptic wasteland, Max teams up ... \n", + "2 comedy An ordinary Lego construction worker, thought ... " + ], + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
idvector_distancetitlegenredescription
0movies:01K8V96NBVHKA428B4YBCRNXB10.64973795414Fast & Furious 9actionDom and his crew face off against a high-tech ...
1movies:01K8V96NBV13WZJVFDFBET0K5N0.763235211372Mad Max: Fury RoadactionIn a post-apocalyptic wasteland, Max teams up ...
2movies:01K8V96NBVQA4DA457PS4PX67W0.792449593544The Lego MoviecomedyAn ordinary Lego construction worker, thought ...
\n", + "
" + ] + }, + "execution_count": 36, + "metadata": {}, + "output_type": "execute_result" + } + ], + "execution_count": 36 + }, + { + "cell_type": "markdown", + "id": "ef5e1997", + "metadata": { + "id": "ef5e1997" + }, + "source": [ + "### Vector search with filters\n", + "\n", + "Redis allows you to combine filter searches on fields within the index object allowing us to create more specific searches." + ] + }, + { + "cell_type": "markdown", + "id": "kKCzyMUDDw10", + "metadata": { + "id": "kKCzyMUDDw10" + }, + "source": [ + "Search for top 3 movies specifically in the action genre:\n" + ] + }, + { + "cell_type": "code", + "id": "d499dcad", + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 143 + }, + "id": "d499dcad", + "outputId": "ab410048-da42-4b1e-a5fb-fbd6430ba437", + "ExecuteTime": { + "end_time": "2025-10-30T19:26:04.277330Z", + "start_time": "2025-10-30T19:26:04.272306Z" + } + }, + "source": [ + "from redisvl.query.filter import Tag\n", + "\n", + "tag_filter = Tag(\"genre\") == \"action\"\n", + "\n", + "vec_query.set_filter(tag_filter)\n", + "\n", + "result=index.query(vec_query)\n", + "pd.DataFrame(result)" + ], + "outputs": [ + { + "data": { + "text/plain": [ + " id vector_distance title \\\n", + "0 movies:01K8V96NBVHKA428B4YBCRNXB1 0.64973795414 Fast & Furious 9 \n", + "1 movies:01K8V96NBV13WZJVFDFBET0K5N 0.763235211372 Mad Max: Fury Road \n", + "2 movies:01K8V96NBV88RP76DHYNAHK4T2 0.796153008938 Explosive Pursuit \n", + "\n", + " genre description \n", + "0 action Dom and his crew face off against a high-tech ... \n", + "1 action In a post-apocalyptic wasteland, Max teams up ... \n", + "2 action A daring cop chases a notorious criminal acros... " + ], + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
idvector_distancetitlegenredescription
0movies:01K8V96NBVHKA428B4YBCRNXB10.64973795414Fast & Furious 9actionDom and his crew face off against a high-tech ...
1movies:01K8V96NBV13WZJVFDFBET0K5N0.763235211372Mad Max: Fury RoadactionIn a post-apocalyptic wasteland, Max teams up ...
2movies:01K8V96NBV88RP76DHYNAHK4T20.796153008938Explosive PursuitactionA daring cop chases a notorious criminal acros...
\n", + "
" + ] + }, + "execution_count": 37, + "metadata": {}, + "output_type": "execute_result" + } + ], + "execution_count": 37 + }, + { + "cell_type": "markdown", + "id": "YAh3GDS4Dudu", + "metadata": { + "id": "YAh3GDS4Dudu" + }, + "source": [ + "Search for top 3 movies specifically in the action genre with ratings at or above a 7:\n" + ] + }, + { + "cell_type": "code", + "id": "f59fff2c", + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 143 + }, + "id": "f59fff2c", + "outputId": "d6909c59-a947-4e58-a13a-8d0c2169a6b3", + "ExecuteTime": { + "end_time": "2025-10-30T19:26:48.653730Z", + "start_time": "2025-10-30T19:26:48.645089Z" + } + }, + "source": [ + "from redisvl.query.filter import Num\n", + "\n", + "# build combined filter expressions\n", + "tag_filter = Tag(\"genre\") == \"action\"\n", + "num_filter = Num(\"rating\") >= 7\n", + "combined_filter = tag_filter & num_filter\n", + "\n", + "# build vector query\n", + "vec_query = VectorQuery(\n", + " vector=embedded_user_query,\n", + " vector_field_name=\"vector\",\n", + " num_results=3,\n", + " return_fields=[\"title\", \"rating\", \"genre\"],\n", + " return_score=True,\n", + " filter_expression=combined_filter\n", + ")\n", + "\n", + "result = index.query(vec_query)\n", + "pd.DataFrame(result)" + ], + "outputs": [ + { + "data": { + "text/plain": [ + " id vector_distance title \\\n", + "0 movies:01K8V96NBV13WZJVFDFBET0K5N 0.763235211372 Mad Max: Fury Road \n", + "1 movies:01K8V96NBV88RP76DHYNAHK4T2 0.796153008938 Explosive Pursuit \n", + "2 movies:01K8V96NBV23MRYV2QRN7JV5YA 0.876494169235 Inception \n", + "\n", + " rating genre \n", + "0 8 action \n", + "1 7 action \n", + "2 9 action " + ], + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
idvector_distancetitleratinggenre
0movies:01K8V96NBV13WZJVFDFBET0K5N0.763235211372Mad Max: Fury Road8action
1movies:01K8V96NBV88RP76DHYNAHK4T20.796153008938Explosive Pursuit7action
2movies:01K8V96NBV23MRYV2QRN7JV5YA0.876494169235Inception9action
\n", + "
" + ] + }, + "execution_count": 38, + "metadata": {}, + "output_type": "execute_result" + } + ], + "execution_count": 38 + }, + { + "cell_type": "markdown", + "id": "yJ6TkwEVDsbN", + "metadata": { + "id": "yJ6TkwEVDsbN" + }, + "source": [ + "Search with full text search for movies that directly mention \"criminal mastermind\" in the description:\n" + ] + }, + { + "cell_type": "code", + "id": "7dab26c2", + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 146 + }, + "id": "7dab26c2", + "outputId": "da366f10-d07d-4a1e-8da5-725e6a37827a", + "ExecuteTime": { + "end_time": "2025-10-30T19:27:25.102849Z", + "start_time": "2025-10-30T19:27:25.097568Z" + } + }, + "source": [ + "from redisvl.query.filter import Text\n", + "\n", + "text_filter = Text(\"description\") % \"criminal mastermind\"\n", + "\n", + "vec_query = VectorQuery(\n", + " vector=embedded_user_query,\n", + " vector_field_name=\"vector\",\n", + " num_results=3,\n", + " return_fields=[\"title\", \"rating\", \"genre\", \"description\"],\n", + " return_score=True,\n", + " filter_expression=text_filter\n", + ")\n", + "\n", + "result = index.query(vec_query)\n", + "pd.DataFrame(result)['description'][1]" + ], + "outputs": [ + { + "data": { + "text/plain": [ + "'Batman faces off against the Joker, a criminal mastermind who threatens to plunge Gotham into chaos.'" + ] + }, + "execution_count": 41, + "metadata": {}, + "output_type": "execute_result" + } + ], + "execution_count": 41 + }, + { + "cell_type": "markdown", + "id": "UWQkD69fECJv", + "metadata": { + "id": "UWQkD69fECJv" + }, + "source": [ + "Vector search with wildcard text match:\n" + ] + }, + { + "cell_type": "code", + "id": "e39e5e5c", + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 195 + }, + "id": "e39e5e5c", + "outputId": "d9d476dc-8d80-4743-dc14-02e64f9c570d", + "ExecuteTime": { + "end_time": "2025-10-30T15:41:30.963843Z", + "start_time": "2025-10-30T15:41:30.958547Z" + } + }, + "source": [ + "text_filter = Text(\"description\") % \"crim*\"\n", + "\n", + "vec_query = VectorQuery(\n", + " vector=embedded_user_query,\n", + " vector_field_name=\"vector\",\n", + " num_results=3,\n", + " return_fields=[\"title\", \"rating\", \"genre\", \"description\"],\n", + " return_score=True,\n", + " filter_expression=text_filter\n", + ")\n", + "\n", + "result = index.query(vec_query)\n", + "pd.DataFrame(result)" + ], + "outputs": [ + { + "data": { + "text/plain": [ + " id vector_distance title \\\n", + "0 movies:01K8TWFA576NJD4BY9DKHWRZZY 0.796153008938 Explosive Pursuit \n", + "1 movies:01K8TWFA57RB003JFMYF3N6PNM 0.807471394539 The Incredibles \n", + "2 movies:01K8TWFA57SX8Y09NVMN4EEW6C 0.827253937721 Despicable Me \n", + "\n", + " rating genre description \n", + "0 7 action A daring cop chases a notorious criminal acros... \n", + "1 8 comedy A family of undercover superheroes, while tryi... \n", + "2 7 comedy When a criminal mastermind uses a trio of orph... " + ], + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
idvector_distancetitleratinggenredescription
0movies:01K8TWFA576NJD4BY9DKHWRZZY0.796153008938Explosive Pursuit7actionA daring cop chases a notorious criminal acros...
1movies:01K8TWFA57RB003JFMYF3N6PNM0.807471394539The Incredibles8comedyA family of undercover superheroes, while tryi...
2movies:01K8TWFA57SX8Y09NVMN4EEW6C0.827253937721Despicable Me7comedyWhen a criminal mastermind uses a trio of orph...
\n", + "
" + ] + }, + "execution_count": 15, + "metadata": {}, + "output_type": "execute_result" + } + ], + "execution_count": 15 + }, + { + "cell_type": "markdown", + "id": "CGyNAr70EGLg", + "metadata": { + "id": "CGyNAr70EGLg" + }, + "source": [ + "Vector search with fuzzy match filter\n", + "\n", + "> Note: fuzzy match is based on Levenshtein distance. Therefore, \"hero\" might return result for \"her\" as an example.\n", + "\n", + "See docs for more info https://redis.io/docs/latest/develop/interact/search-and-query/advanced-concepts/query_syntax/\n" + ] + }, + { + "cell_type": "code", + "id": "3450e07d", + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 195 + }, + "id": "3450e07d", + "outputId": "93b5ea52-3735-4b81-ad51-17c487d1132c", + "ExecuteTime": { + "end_time": "2025-10-30T15:41:32.534333Z", + "start_time": "2025-10-30T15:41:32.528054Z" + } + }, + "source": [ + "\n", + "text_filter = Text(\"description\") % \"%hero%\"\n", + "\n", + "vec_query = VectorQuery(\n", + " vector=embedded_user_query,\n", + " vector_field_name=\"vector\",\n", + " num_results=3,\n", + " return_fields=[\"title\", \"rating\", \"genre\", \"description\"],\n", + " return_score=True,\n", + " filter_expression=text_filter\n", + ")\n", + "\n", + "result = index.query(vec_query)\n", + "pd.DataFrame(result)" + ], + "outputs": [ + { + "data": { + "text/plain": [ + " id vector_distance title \\\n", + "0 movies:01K8TWFA571WT01N51DC2098SB 0.889985799789 Black Widow \n", + "1 movies:01K8TWFA57CQNKWQGFRTTB6VBM 0.89386677742 The Avengers \n", + "2 movies:01K8TWFA578W3EAAGD9SBF1YNP 0.943198144436 The Princess Diaries \n", + "\n", + " rating genre description \n", + "0 7 action Natasha Romanoff confronts her dark past and f... \n", + "1 8 action Earth's mightiest heroes come together to stop... \n", + "2 6 comedy Mia Thermopolis has just found out that she is... " + ], + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
idvector_distancetitleratinggenredescription
0movies:01K8TWFA571WT01N51DC2098SB0.889985799789Black Widow7actionNatasha Romanoff confronts her dark past and f...
1movies:01K8TWFA57CQNKWQGFRTTB6VBM0.89386677742The Avengers8actionEarth's mightiest heroes come together to stop...
2movies:01K8TWFA578W3EAAGD9SBF1YNP0.943198144436The Princess Diaries6comedyMia Thermopolis has just found out that she is...
\n", + "
" + ] + }, + "execution_count": 16, + "metadata": {}, + "output_type": "execute_result" + } + ], + "execution_count": 16 + }, + { + "cell_type": "markdown", + "id": "6bd27cb3", + "metadata": { + "id": "6bd27cb3" + }, + "source": [ + "### Range queries\n", + "\n", + "Range queries allow you to set a pre defined distance \"threshold\" for which we want to return documents. This is helpful when you only want documents with a certain \"radius\" from the search query." + ] + }, + { + "cell_type": "code", + "id": "cafe1795", + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 237 + }, + "id": "cafe1795", + "outputId": "c86063ac-e0e5-4975-c08a-2b8cc71c8f79", + "ExecuteTime": { + "end_time": "2025-10-30T19:36:18.314020Z", + "start_time": "2025-10-30T19:36:18.275144Z" + } + }, + "source": [ + "from redisvl.query import RangeQuery\n", + "\n", + "user_query = \"Family friendly fantasy movies\"\n", + "\n", + "embedded_user_query = hf.embed(user_query)\n", + "\n", + "range_query = RangeQuery(\n", + " vector=embedded_user_query,\n", + " vector_field_name=\"vector\",\n", + " return_fields=[\"title\", \"rating\", \"genre\"],\n", + " return_score=True,\n", + " distance_threshold=0.8 # find all items with a semantic distance of less than 0.8\n", + ")\n", + "\n", + "result = index.query(range_query)\n", + "pd.DataFrame(result)\n" + ], + "outputs": [ + { + "data": { + "text/plain": [ + " id vector_distance title rating \\\n", + "0 movies:01K8V96NBV4DQ0P3V61SB2X9DS 0.644702553749 The Incredibles 8 \n", + "1 movies:01K8V96NBVFD3S1DCVPDV0BE3W 0.747986972332 Black Widow 7 \n", + "2 movies:01K8V96NBVD2BZJDTSV31S7DG6 0.750915408134 Despicable Me 7 \n", + "3 movies:01K8V96NBV85BE9MNEFBV60PHP 0.751298904419 Shrek 8 \n", + "4 movies:01K8V96NBV1MSCHVJ5RY81Q6AM 0.761669397354 Monsters, Inc. 8 \n", + "5 movies:01K8V96NBVK2RATV8KC5NBXJSJ 0.778580188751 Aladdin 8 \n", + "\n", + " genre \n", + "0 comedy \n", + "1 action \n", + "2 comedy \n", + "3 comedy \n", + "4 comedy \n", + "5 comedy " + ], + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
idvector_distancetitleratinggenre
0movies:01K8V96NBV4DQ0P3V61SB2X9DS0.644702553749The Incredibles8comedy
1movies:01K8V96NBVFD3S1DCVPDV0BE3W0.747986972332Black Widow7action
2movies:01K8V96NBVD2BZJDTSV31S7DG60.750915408134Despicable Me7comedy
3movies:01K8V96NBV85BE9MNEFBV60PHP0.751298904419Shrek8comedy
4movies:01K8V96NBV1MSCHVJ5RY81Q6AM0.761669397354Monsters, Inc.8comedy
5movies:01K8V96NBVK2RATV8KC5NBXJSJ0.778580188751Aladdin8comedy
\n", + "
" + ] + }, + "execution_count": 43, + "metadata": {}, + "output_type": "execute_result" + } + ], + "execution_count": 43 + }, + { + "cell_type": "markdown", + "id": "a1586ea7", + "metadata": { + "id": "a1586ea7" + }, + "source": [ + "Like the queries above, we can also chain additional filters and conditional operators with range queries. The following adds an `and` condition that returns vector search within the defined range and with a rating at or above 8." + ] + }, + { + "cell_type": "code", + "id": "d3110324", + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 174 + }, + "id": "d3110324", + "outputId": "dff98df9-60ea-4325-f1c9-1e57c5139014", + "ExecuteTime": { + "end_time": "2025-10-30T15:41:36.607626Z", + "start_time": "2025-10-30T15:41:36.602045Z" + } + }, + "source": [ + "range_query = RangeQuery(\n", + " vector=embedded_user_query,\n", + " vector_field_name=\"vector\",\n", + " return_fields=[\"title\", \"rating\", \"genre\"],\n", + " distance_threshold=0.8\n", + ")\n", + "\n", + "numeric_filter = Num(\"rating\") >= 8\n", + "\n", + "range_query.set_filter(numeric_filter)\n", + "\n", + "# in this case we want to do a simple filter search or the vector so we execute as a joint filter directly\n", + "result = index.query(range_query)\n", + "pd.DataFrame(result)\n" + ], + "outputs": [ + { + "data": { + "text/plain": [ + " id vector_distance title rating \\\n", + "0 movies:01K8TWFA57RB003JFMYF3N6PNM 0.644702553749 The Incredibles 8 \n", + "1 movies:01K8TWFA577WVQYQZ5MNDFS083 0.751298904419 Shrek 8 \n", + "2 movies:01K8TWFA579R1H9TZ65QPSF3S2 0.761669397354 Monsters, Inc. 8 \n", + "3 movies:01K8TWFA57Z8MY5X741J4K1MTS 0.778580188751 Aladdin 8 \n", + "\n", + " genre \n", + "0 comedy \n", + "1 comedy \n", + "2 comedy \n", + "3 comedy " + ], + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
idvector_distancetitleratinggenre
0movies:01K8TWFA57RB003JFMYF3N6PNM0.644702553749The Incredibles8comedy
1movies:01K8TWFA577WVQYQZ5MNDFS0830.751298904419Shrek8comedy
2movies:01K8TWFA579R1H9TZ65QPSF3S20.761669397354Monsters, Inc.8comedy
3movies:01K8TWFA57Z8MY5X741J4K1MTS0.778580188751Aladdin8comedy
\n", + "
" + ] + }, + "execution_count": 18, + "metadata": {}, + "output_type": "execute_result" + } + ], + "execution_count": 18 + }, + { + "cell_type": "markdown", + "id": "qABIlUpQE4lT", + "metadata": { + "id": "qABIlUpQE4lT" + }, + "source": [ + "### Full text search" + ] + }, + { + "cell_type": "code", + "id": "AOU0Sqx3FCFN", + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 174 + }, + "id": "AOU0Sqx3FCFN", + "outputId": "eba96774-147f-4f8f-901f-abc9dc53cf48", + "ExecuteTime": { + "end_time": "2025-10-30T15:41:40.262601Z", + "start_time": "2025-10-30T15:41:37.950877Z" + } + }, + "source": [ + "from redisvl.query import TextQuery\n", + "\n", + "user_query = \"High tech, action packed, superheros fight scenes\"\n", + "\n", + "text_query = TextQuery(\n", + " text=user_query,\n", + " text_field_name=\"description\",\n", + " text_scorer=\"BM25STD\",\n", + " num_results=20,\n", + " return_fields=[\"title\", \"description\"],\n", + ")\n", + "\n", + "result = index.query(text_query)[:4]\n", + "pd.DataFrame(result)[[\"title\", \"score\"]]" + ], + "outputs": [ + { + "data": { + "text/plain": [ + " title score\n", + "0 Fast & Furious 9 5.157032\n", + "1 The Incredibles 4.022877\n", + "2 Explosive Pursuit 2.335427\n", + "3 Toy Story 1.630097" + ], + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
titlescore
0Fast & Furious 95.157032
1The Incredibles4.022877
2Explosive Pursuit2.335427
3Toy Story1.630097
\n", + "
" + ] + }, + "execution_count": 19, + "metadata": {}, + "output_type": "execute_result" + } + ], + "execution_count": 19 + }, + { + "metadata": {}, + "cell_type": "markdown", + "source": [ + "### Stop Words Example with English and German\n", + "\n", + "Stop words are common words (like \"the\", \"is\", \"at\") that are often filtered out before text processing because they don't carry much semantic meaning. RedisVL uses NLTK stopwords and supports multiple languages.\n" + ], + "id": "bfe35d98df21ba75" + }, + { + "metadata": { + "ExecuteTime": { + "end_time": "2025-10-30T19:35:48.001780Z", + "start_time": "2025-10-30T19:35:47.747115Z" + } + }, + "cell_type": "code", + "source": [ + "# Example 1: English Hybrid Search with Stop Words\n", + "import nltk\n", + "nltk.download('stopwords', quiet=True)\n", + "\n", + "from redisvl.query import HybridQuery\n", + "\n", + "# English query\n", + "query_en = \"action packed superhero movie with great fight scenes\"\n", + "embedded_query_en = hf.embed(query_en)\n", + "\n", + "hybrid_query_en = HybridQuery(\n", + " text=query_en,\n", + " text_field_name=\"description\",\n", + " text_scorer=\"BM25\",\n", + " vector=embedded_query_en,\n", + " vector_field_name=\"vector\",\n", + " alpha=0.7,\n", + " num_results=3,\n", + " return_fields=[\"title\", \"description\"],\n", + " stopwords=\"english\" # Automatically removes English stop words using NLTK\n", + ")\n", + "\n", + "print(\"English Query:\", query_en)\n", + "print(\"After stop word removal:\", hybrid_query_en._build_query_string())\n", + "print(\"\\nResults:\")\n", + "result_en = index.query(hybrid_query_en)\n", + "pd.DataFrame(result_en)[[\"title\", \"hybrid_score\"]]\n" + ], + "id": "303d041feadc851d", + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "English Query: action packed superhero movie with great fight scenes\n", + "After stop word removal: (~@description:(action | packed | superhero | movie | great | fight | scenes))=>[KNN 3 @vector $vector AS vector_distance]\n", + "\n", + "Results:\n" + ] + }, + { + "data": { + "text/plain": [ + " title hybrid_score\n", + "0 The Incredibles 0.688284047681\n", + "1 Fast & Furious 9 0.465631234646\n", + "2 The Dark Knight 0.463765496016" + ], + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
titlehybrid_score
0The Incredibles0.688284047681
1Fast & Furious 90.465631234646
2The Dark Knight0.463765496016
\n", + "
" + ] + }, + "execution_count": 42, + "metadata": {}, + "output_type": "execute_result" + } + ], + "execution_count": 42 + }, + { + "metadata": { + "ExecuteTime": { + "end_time": "2025-10-30T15:58:48.344549Z", + "start_time": "2025-10-30T15:58:48.278271Z" + } + }, + "cell_type": "code", + "source": [ + "# Example 2: German Hybrid Search with Stop Words\n", + "# (Note: This example shows the syntax - actual German movie data would be needed for real results)\n", + "\n", + "query_de = \"spannender Action Film mit tollen Kampfszenen und Helden\"\n", + "# Translation: \"exciting action movie with great fight scenes and heroes\"\n", + "\n", + "# For demonstration, we'll embed the German text\n", + "embedded_query_de = hf.embed(query_de)\n", + "\n", + "hybrid_query_de = HybridQuery(\n", + " text=query_de,\n", + " text_field_name=\"description\",\n", + " text_scorer=\"BM25\",\n", + " vector=embedded_query_de,\n", + " vector_field_name=\"vector\",\n", + " alpha=0.7,\n", + " num_results=3,\n", + " return_fields=[\"title\", \"description\"],\n", + " stopwords=\"german\" # Automatically removes German stop words using NLTK\n", + ")\n", + "\n", + "print(\"German Query:\", query_de)\n", + "print(\"After stop word removal:\", hybrid_query_de._build_query_string())\n", + "print(\"\\nStop words removed: 'mit', 'und' (with, and)\")\n", + "\n", + "# Supported languages: 'english', 'german', 'french', 'spanish', 'italian',\n", + "# 'portuguese', 'russian', 'arabic', 'dutch', 'swedish', and more\n" + ], + "id": "d4584c0a95483f2a", + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "German Query: spannender Action Film mit tollen Kampfszenen und Helden\n", + "After stop word removal: (~@description:(spannender | action | film | tollen | kampfszenen | helden))=>[KNN 3 @vector $vector AS vector_distance]\n", + "\n", + "Stop words removed: 'mit', 'und' (with, and)\n" + ] + } + ], + "execution_count": 26 + }, + { + "metadata": {}, + "cell_type": "markdown", + "source": "### Hybrid search", + "id": "1fd87b56523a532b" + }, + { + "metadata": {}, + "cell_type": "code", + "outputs": [], + "execution_count": null, + "source": [ + "from redisvl.query import HybridQuery\n", + "\n", + "hybrid_query = HybridQuery(\n", + " text=user_query,\n", + " text_field_name=\"description\",\n", + " text_scorer=\"BM25\",\n", + " vector=embedded_user_query,\n", + " vector_field_name=\"vector\",\n", + " alpha=0.7,\n", + " num_results=20,\n", + " return_fields=[\"title\", \"description\"],\n", + ")\n", + "\n", + "result = index.query(hybrid_query)[:4]\n", + "pd.DataFrame(result)[[\"title\", \"vector_similarity\", \"text_score\", \"hybrid_score\"]]\n" + ], + "id": "259a896ce25db029" + }, + { + "metadata": {}, + "cell_type": "code", + "outputs": [], + "execution_count": null, + "source": [ + "# Redis Query Language Translation\n", + "# =================================\n", + "# The HybridQuery above translates to this Redis FT.AGGREGATE command:\n", + "\n", + "print(\"Original query:\", user_query)\n", + "print(\"After stop word removal:\", hybrid_query._build_query_string())\n", + "\n", + "redis_query = \"\"\"\n", + "FT.AGGREGATE movies\n", + " \"(@description:(high | tech | action | packed | superheros | fight | scenes))=>{$yield_distance_as: vector_distance; $vector: ; $vector_field: vector}\"\n", + " LOAD 2 @title @description\n", + " SCORER BM25\n", + " APPLY \"(2 - @vector_distance)/2\" AS vector_similarity\n", + " APPLY \"@__score\" AS text_score\n", + " APPLY \"(0.7 * @vector_similarity) + (0.3 * @text_score)\" AS hybrid_score\n", + " SORTBY 2 @hybrid_score DESC\n", + " LIMIT 0 20\n", + "\n", + "Breakdown:\n", + "----------\n", + "@description:(high | tech | action | ...) - Full-text search with OR logic (stop words removed)\n", + "=>{$yield_distance_as: vector_distance} - Vector similarity search parameters\n", + "LOAD 2 @title @description - Load these fields from documents\n", + "SCORER BM25 - Use BM25 algorithm for text scoring\n", + "APPLY \"(2 - @vector_distance)/2\" - Convert distance to similarity (0-1)\n", + "APPLY \"@__score\" AS text_score - Get BM25 text relevance score\n", + "APPLY \"(0.7 * vector) + (0.3 * text)\" - Weighted hybrid score (alpha=0.7)\n", + "SORTBY @hybrid_score DESC - Sort by combined score\n", + "LIMIT 0 20 - Return top 20 results\n", + "\"\"\"\n", + "\n", + "print(redis_query)" + ], + "id": "81456172eefcc8b3" + }, + { + "cell_type": "markdown", + "id": "5fa7cdfb", + "metadata": { + "id": "5fa7cdfb" + }, + "source": [ + "### Next steps\n", + "\n", + "For more query examples with redisvl: [see here](https://github.com/redis/redis-vl-python/blob/main/docs/user_guide/02_hybrid_queries.ipynb)" + ] + }, + { + "cell_type": "code", + "execution_count": 78, + "id": "915c2cef", + "metadata": { + "id": "915c2cef" + }, + "outputs": [], + "source": [ + "# clean up!\n", + "index.delete()" + ] + } + ], + "metadata": { + "accelerator": "GPU", + "colab": { + "gpuType": "T4", + "provenance": [] + }, + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "name": "python3", + "language": "python" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.9" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/python-recipes/vector-search/08_vector_algorithm_benchmark.ipynb b/python-recipes/vector-search/08_vector_algorithm_benchmark.ipynb new file mode 100644 index 00000000..e19abbf7 --- /dev/null +++ b/python-recipes/vector-search/08_vector_algorithm_benchmark.ipynb @@ -0,0 +1,1424 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "![Redis](https://redis.io/wp-content/uploads/2024/04/Logotype.svg?auto=webp&quality=85,75&width=120)\n", + "# Vector Algorithm Benchmark: FLAT vs HNSW vs SVS-VAMANA\n", + "\n", + "## Let's Begin!\n", + "\"Open\n", + "\n", + "This notebook benchmarks FLAT, HNSW, and SVS-VAMANA vector search algorithms using **real data from Hugging Face** across different embedding dimensions.\n", + "\n", + "## What You'll Learn\n", + "\n", + "- **Memory usage comparison** across algorithms and dimensions\n", + "- **Index creation performance** with real text data\n", + "- **Query performance** and latency analysis\n", + "- **Search quality** with recall metrics on real embeddings\n", + "- **Algorithm selection guidance** based on your requirements\n", + "\n", + "## Benchmark Configuration\n", + "\n", + "- **Dataset**: SQuAD (Stanford Question Answering Dataset) from Hugging Face\n", + "- **Algorithms**: FLAT, HNSW, SVS-VAMANA\n", + "- **Dimensions**: 384, 768, 1536 (native sentence-transformer embeddings)\n", + "- **Dataset Size**: 1,000 documents per dimension\n", + "- **Query Set**: 50 real questions per configuration\n", + "- **Focus**: Real-world performance with actual text embeddings\n", + "\n", + "## Prerequisites\n", + "\n", + "- Redis Stack 8.2.0+ with RediSearch 2.8.10+" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## ๐Ÿ“ฆ Installation & Setup\n", + "\n", + "This notebook requires **sentence-transformers** for generating embeddings and **Redis Stack** running in Docker.\n", + "\n", + "**Requirements:**\n", + "- Redis Stack 8.2.0+ with RediSearch 2.8.10+\n", + "- sentence-transformers (for generating embeddings)\n", + "- numpy (for vector operations)\n", + "- redisvl (should be available in your environment)\n", + "- matplotlib\n", + "- seaborn\n", + " \n", + "**๐Ÿณ Docker Setup (Required):**\n", + "\n", + "Before running this notebook, make sure Redis Stack is running in Docker:\n", + "\n", + "```bash\n", + "# Start Redis Stack with Docker\n", + "docker run -d --name redis-stack -p 6379:6379 -p 8001:8001 redis/redis-stack:latest\n", + "```\n", + "\n", + "Or if you prefer using docker-compose, create a `docker-compose.yml` file:\n", + "\n", + "```yaml\n", + "version: '3.8'\n", + "services:\n", + " redis:\n", + " image: redis/redis-stack:latest\n", + " ports:\n", + " - \"6379:6379\"\n", + " - \"8001:8001\"\n", + "```\n", + "\n", + "Then run: `docker-compose up -d`\n", + "\n", + "**๐Ÿ“š Python Dependencies Installation:**\n", + "\n", + "Install the required Python packages:\n", + "\n", + "```bash\n", + "# Install core dependencies\n", + "pip install redisvl numpy sentence-transformers matplotlib seaborn\n", + "\n", + "# Or install with specific versions for compatibility\n", + "pip install redisvl>=0.2.0 numpy>=1.21.0 sentence-transformers>=2.2.0\n", + "```\n", + "\n", + "**For Google Colab users, run this cell:**\n", + "\n", + "```python\n", + "!pip install redisvl sentence-transformers numpy matplotlib seaborn\n", + "```\n", + "\n", + "**For Conda users:**\n", + "\n", + "```bash\n", + "conda install numpy\n", + "pip install redisvl sentence-transformers matplotlib seaborn\n", + "```" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "๐Ÿ“š Libraries imported successfully!\n" + ] + } + ], + "source": [ + "# Import required libraries\n", + "import os\n", + "import json\n", + "import time\n", + "import psutil\n", + "import numpy as np\n", + "import pandas as pd\n", + "import matplotlib.pyplot as plt\n", + "import seaborn as sns\n", + "from typing import Dict, List, Tuple, Any\n", + "from dataclasses import dataclass\n", + "from collections import defaultdict\n", + "\n", + "# Redis and RedisVL imports\n", + "import redis\n", + "from redisvl.index import SearchIndex\n", + "from redisvl.query import VectorQuery\n", + "from redisvl.redis.utils import array_to_buffer, buffer_to_array\n", + "from redisvl.utils import CompressionAdvisor\n", + "from redisvl.redis.connection import supports_svs\n", + "\n", + "# Configuration\n", + "REDIS_URL = \"redis://localhost:6379\"\n", + "np.random.seed(42) # For reproducible results\n", + "\n", + "# Set up plotting style\n", + "plt.style.use('default')\n", + "sns.set_palette(\"husl\")\n", + "\n", + "print(\"๐Ÿ“š Libraries imported successfully!\")" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "๐Ÿ”ง Benchmark Configuration:\n", + "Dimensions: [384, 768, 1536]\n", + "Algorithms: ['flat', 'hnsw', 'svs-vamana']\n", + "Documents per dimension: 1,000\n", + "Test queries: 50\n", + "Total documents: 3,000\n", + "Dataset: SQuAD from Hugging Face\n" + ] + } + ], + "source": [ + "# Benchmark configuration\n", + "@dataclass\n", + "class BenchmarkConfig:\n", + " dimensions: List[int]\n", + " algorithms: List[str]\n", + " docs_per_dimension: int\n", + " query_count: int\n", + " \n", + "# Initialize benchmark configuration\n", + "config = BenchmarkConfig(\n", + " dimensions=[384, 768, 1536],\n", + " algorithms=['flat', 'hnsw', 'svs-vamana'],\n", + " docs_per_dimension=1000,\n", + " query_count=50\n", + ")\n", + "\n", + "print(\n", + " \"๐Ÿ”ง Benchmark Configuration:\",\n", + " f\"Dimensions: {config.dimensions}\",\n", + " f\"Algorithms: {config.algorithms}\",\n", + " f\"Documents per dimension: {config.docs_per_dimension:,}\",\n", + " f\"Test queries: {config.query_count}\",\n", + " f\"Total documents: {len(config.dimensions) * config.docs_per_dimension:,}\",\n", + " f\"Dataset: SQuAD from Hugging Face\",\n", + " sep=\"\\n\"\n", + ")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Step 1: Verify Redis and SVS Support" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "โœ… Redis connection successful\n", + "๐Ÿ“Š Redis version: 8.2.2\n", + "๐Ÿ”ง SVS-VAMANA supported: โœ… Yes\n" + ] + } + ], + "source": [ + "# Test Redis connection and capabilities\n", + "try:\n", + " client = redis.Redis.from_url(REDIS_URL)\n", + " client.ping()\n", + " \n", + " redis_info = client.info()\n", + " redis_version = redis_info['redis_version']\n", + " \n", + " svs_supported = supports_svs(client)\n", + " \n", + " print(\n", + " \"โœ… Redis connection successful\",\n", + " f\"๐Ÿ“Š Redis version: {redis_version}\",\n", + " f\"๐Ÿ”ง SVS-VAMANA supported: {'โœ… Yes' if svs_supported else 'โŒ No'}\",\n", + " sep=\"\\n\"\n", + " )\n", + " \n", + " if not svs_supported:\n", + " print(\"โš ๏ธ SVS-VAMANA not supported. Benchmark will skip SVS tests.\")\n", + " config.algorithms = ['flat', 'hnsw'] # Remove SVS from tests\n", + " \n", + "except Exception as e:\n", + " print(f\"โŒ Redis connection failed: {e}\")\n", + " print(\"Please ensure Redis Stack is running on localhost:6379\")\n", + " raise" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Step 2: Load Real Dataset from Hugging Face\n", + "\n", + "Load the SQuAD dataset and generate real embeddings using sentence-transformers." + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [], + "source": [ + "def load_squad_dataset(num_docs: int) -> List[Dict[str, Any]]:\n", + " \"\"\"Load SQuAD dataset from Hugging Face\"\"\"\n", + " try:\n", + " from datasets import load_dataset\n", + " \n", + " print(\"๐Ÿ“ฅ Loading SQuAD dataset from Hugging Face...\")\n", + " \n", + " # Load SQuAD dataset\n", + " dataset = load_dataset(\"squad\", split=\"train\")\n", + " \n", + " # Take a subset for our benchmark\n", + " dataset = dataset.select(range(min(num_docs, len(dataset))))\n", + " \n", + " # Convert to our format\n", + " documents = []\n", + " for i, item in enumerate(dataset):\n", + " # Combine question and context for richer text\n", + " text = f\"{item['question']} {item['context']}\"\n", + " \n", + " documents.append({\n", + " 'doc_id': f'squad_{i:06d}',\n", + " 'title': item['title'],\n", + " 'question': item['question'],\n", + " 'context': item['context'][:500], # Truncate long contexts\n", + " 'text': text,\n", + " 'category': 'qa', # All are Q&A documents\n", + " 'score': 1.0\n", + " })\n", + " \n", + " print(f\"โœ… Loaded {len(documents)} documents from SQuAD\")\n", + " return documents\n", + " \n", + " except ImportError:\n", + " print(\"โš ๏ธ datasets library not available, falling back to local data\")\n", + " return load_local_fallback_data(num_docs)\n", + " except Exception as e:\n", + " print(f\"โš ๏ธ Failed to load SQuAD dataset: {e}\")\n", + " print(\"Falling back to local data...\")\n", + " return load_local_fallback_data(num_docs)\n", + "\n", + "def load_local_fallback_data(num_docs: int) -> List[Dict[str, Any]]:\n", + " \"\"\"Fallback to local movie dataset if SQuAD is not available\"\"\"\n", + " try:\n", + " import json\n", + " with open('resources/movies.json', 'r') as f:\n", + " movies = json.load(f)\n", + " \n", + " # Expand the small movie dataset by duplicating with variations\n", + " documents = []\n", + " for i in range(num_docs):\n", + " movie = movies[i % len(movies)]\n", + " documents.append({\n", + " 'doc_id': f'movie_{i:06d}',\n", + " 'title': f\"{movie['title']} (Variant {i // len(movies) + 1})\",\n", + " 'question': f\"What is {movie['title']} about?\",\n", + " 'context': movie['description'],\n", + " 'text': f\"What is {movie['title']} about? {movie['description']}\",\n", + " 'category': movie['genre'],\n", + " 'score': movie['rating']\n", + " })\n", + " \n", + " print(f\"โœ… Using local movie dataset: {len(documents)} documents\")\n", + " return documents\n", + " \n", + " except Exception as e:\n", + " print(f\"โŒ Failed to load local data: {e}\")\n", + " raise" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "๐Ÿ”„ Loading real dataset and generating embeddings...\n", + "โš ๏ธ datasets library not available, falling back to local data\n", + "โœ… Using local movie dataset: 1000 documents\n", + "\n", + "๐Ÿ“Š Processing 384D embeddings...\n", + "๐Ÿค– Generating 384D embeddings using all-MiniLM-L6-v2...\n", + "15:25:46 sentence_transformers.SentenceTransformer INFO Use pytorch device_name: mps\n", + "15:25:46 sentence_transformers.SentenceTransformer INFO Load pretrained SentenceTransformer: all-MiniLM-L6-v2\n" + ] + }, + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "b1150836f3904e0583662c68be5ef79f", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "Batches: 0%| | 0/32 [00:00 np.ndarray:\n", + " \"\"\"Generate embeddings for texts using sentence-transformers\"\"\"\n", + " try:\n", + " from sentence_transformers import SentenceTransformer\n", + " \n", + " # Choose model based on target dimensions\n", + " if dimensions == 384:\n", + " model_name = 'all-MiniLM-L6-v2'\n", + " elif dimensions == 768:\n", + " model_name = 'all-mpnet-base-v2'\n", + " elif dimensions == 1536:\n", + " # For 1536D, use gtr-t5-xl which produces native 1536D embeddings\n", + " model_name = 'sentence-transformers/gtr-t5-xl'\n", + " else:\n", + " model_name = 'all-MiniLM-L6-v2' # Default\n", + " \n", + " print(f\"๐Ÿค– Generating {dimensions}D embeddings using {model_name}...\")\n", + " \n", + " model = SentenceTransformer(model_name)\n", + " embeddings = model.encode(texts, convert_to_numpy=True, show_progress_bar=True)\n", + " \n", + " # Handle dimension adjustment\n", + " current_dims = embeddings.shape[1]\n", + " if current_dims < dimensions:\n", + " # Pad with small random values (better than zeros)\n", + " padding_size = dimensions - current_dims\n", + " padding = np.random.normal(0, 0.01, (embeddings.shape[0], padding_size))\n", + " embeddings = np.concatenate([embeddings, padding], axis=1)\n", + " elif current_dims > dimensions:\n", + " # Truncate\n", + " embeddings = embeddings[:, :dimensions]\n", + " \n", + " # Normalize embeddings\n", + " norms = np.linalg.norm(embeddings, axis=1, keepdims=True)\n", + " embeddings = embeddings / norms\n", + " \n", + " print(f\"โœ… Generated embeddings: {embeddings.shape}\")\n", + " return embeddings.astype(np.float32)\n", + " \n", + " except ImportError:\n", + " print(f\"โš ๏ธ sentence-transformers not available, using synthetic embeddings\")\n", + " return generate_synthetic_embeddings(len(texts), dimensions)\n", + " except Exception as e:\n", + " print(f\"โš ๏ธ Error generating embeddings: {e}\")\n", + " print(\"Falling back to synthetic embeddings...\")\n", + " return generate_synthetic_embeddings(len(texts), dimensions)\n", + "\n", + "def generate_synthetic_embeddings(num_docs: int, dimensions: int) -> np.ndarray:\n", + " \"\"\"Generate synthetic embeddings as fallback\"\"\"\n", + " print(f\"๐Ÿ”„ Generating {num_docs} synthetic {dimensions}D embeddings...\")\n", + " \n", + " # Create base random vectors\n", + " embeddings = np.random.normal(0, 1, (num_docs, dimensions)).astype(np.float32)\n", + " \n", + " # Add some clustering structure\n", + " cluster_size = num_docs // 3\n", + " embeddings[:cluster_size, :min(50, dimensions)] += 0.5\n", + " embeddings[cluster_size:2*cluster_size, min(50, dimensions):min(100, dimensions)] += 0.5\n", + " \n", + " # Normalize vectors\n", + " norms = np.linalg.norm(embeddings, axis=1, keepdims=True)\n", + " embeddings = embeddings / norms\n", + " \n", + " return embeddings\n", + "\n", + "# Load real dataset and generate embeddings\n", + "print(\"๐Ÿ”„ Loading real dataset and generating embeddings...\")\n", + "\n", + "# Load the base dataset once\n", + "raw_documents = load_squad_dataset(config.docs_per_dimension)\n", + "texts = [doc['text'] for doc in raw_documents]\n", + "\n", + "# Generate separate query texts (use questions from SQuAD)\n", + "query_texts = [doc['question'] for doc in raw_documents[:config.query_count]]\n", + "\n", + "benchmark_data = {}\n", + "query_data = {}\n", + "\n", + "for dim in config.dimensions:\n", + " print(f\"\\n๐Ÿ“Š Processing {dim}D embeddings...\")\n", + " \n", + " # Generate embeddings for documents\n", + " embeddings = generate_embeddings_for_texts(texts, dim)\n", + " \n", + " # Generate embeddings for queries\n", + " query_embeddings = generate_embeddings_for_texts(query_texts, dim)\n", + " \n", + " # Combine documents with embeddings\n", + " documents = []\n", + " for i, (doc, embedding) in enumerate(zip(raw_documents, embeddings)):\n", + " documents.append({\n", + " **doc,\n", + " 'embedding': array_to_buffer(embedding, dtype='float32')\n", + " })\n", + " \n", + " benchmark_data[dim] = documents\n", + " query_data[dim] = query_embeddings\n", + "\n", + "print(\n", + " f\"\\nโœ… Generated benchmark data:\",\n", + " f\"Total documents: {sum(len(docs) for docs in benchmark_data.values()):,}\",\n", + " f\"Total queries: {sum(len(queries) for queries in query_data.values()):,}\",\n", + " f\"Dataset source: {'SQuAD (Hugging Face)' if 'squad_' in raw_documents[0]['doc_id'] else 'Local movies'}\",\n", + " sep=\"\\n\"\n", + ")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Step 3: Index Creation Benchmark\n", + "\n", + "Measure index creation time and memory usage for each algorithm and dimension." + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "๐Ÿ—๏ธ Running index creation benchmarks...\n", + "\n", + "๐Ÿ“Š Benchmarking 384D embeddings:\n", + " Creating FLAT index...\n", + " โœ… FLAT: 1.06s, 3.09MB\n", + " Creating HNSW index...\n", + " โœ… HNSW: 3.22s, 4.05MB\n", + " Creating SVS-VAMANA index...\n", + " โœ… SVS-VAMANA: 1.08s, 3.09MB\n", + "\n", + "๐Ÿ“Š Benchmarking 768D embeddings:\n", + " Creating FLAT index...\n", + " โœ… FLAT: 1.08s, 6.09MB\n", + " Creating HNSW index...\n", + " โœ… HNSW: 3.28s, 7.01MB\n", + " Creating SVS-VAMANA index...\n", + " โœ… SVS-VAMANA: 1.10s, 6.09MB\n", + "\n", + "๐Ÿ“Š Benchmarking 1536D embeddings:\n", + " Creating FLAT index...\n", + " โœ… FLAT: 1.07s, 12.09MB\n", + " Creating HNSW index...\n", + " โœ… HNSW: 3.26s, 12.84MB\n", + " Creating SVS-VAMANA index...\n", + " โœ… SVS-VAMANA: 1.08s, 0.00MB\n", + "\n", + "โœ… Index creation benchmarks complete!\n" + ] + } + ], + "source": [ + "def create_index_schema(algorithm: str, dimensions: int, prefix: str) -> Dict[str, Any]:\n", + " \"\"\"Create index schema for the specified algorithm\"\"\"\n", + " \n", + " base_schema = {\n", + " \"index\": {\n", + " \"name\": f\"benchmark_{algorithm}_{dimensions}d\",\n", + " \"prefix\": prefix,\n", + " },\n", + " \"fields\": [\n", + " {\"name\": \"doc_id\", \"type\": \"tag\"},\n", + " {\"name\": \"title\", \"type\": \"text\"},\n", + " {\"name\": \"category\", \"type\": \"tag\"},\n", + " {\"name\": \"score\", \"type\": \"numeric\"},\n", + " {\n", + " \"name\": \"embedding\",\n", + " \"type\": \"vector\",\n", + " \"attrs\": {\n", + " \"dims\": dimensions,\n", + " \"distance_metric\": \"cosine\",\n", + " \"datatype\": \"float32\"\n", + " }\n", + " }\n", + " ]\n", + " }\n", + " \n", + " # Algorithm-specific configurations\n", + " vector_field = base_schema[\"fields\"][-1][\"attrs\"]\n", + " \n", + " if algorithm == 'flat':\n", + " vector_field[\"algorithm\"] = \"flat\"\n", + " \n", + " elif algorithm == 'hnsw':\n", + " vector_field.update({\n", + " \"algorithm\": \"hnsw\",\n", + " \"m\": 16,\n", + " \"ef_construction\": 200,\n", + " \"ef_runtime\": 10\n", + " })\n", + " \n", + " elif algorithm == 'svs-vamana':\n", + " # Get compression recommendation\n", + " compression_config = CompressionAdvisor.recommend(dims=dimensions, priority=\"memory\")\n", + " \n", + " vector_field.update({\n", + " \"algorithm\": \"svs-vamana\",\n", + " \"datatype\": compression_config.get('datatype', 'float32')\n", + " })\n", + " \n", + " # Handle dimensionality reduction for high dimensions\n", + " if 'reduce' in compression_config:\n", + " vector_field[\"dims\"] = compression_config['reduce']\n", + " \n", + " return base_schema\n", + "\n", + "def benchmark_index_creation(algorithm: str, dimensions: int, documents: List[Dict]) -> Tuple[SearchIndex, float, float]:\n", + " \"\"\"Benchmark index creation and return index, build time, and memory usage\"\"\"\n", + " \n", + " prefix = f\"bench:{algorithm}:{dimensions}d:\"\n", + " \n", + " # Clean up any existing index\n", + " try:\n", + " client.execute_command('FT.DROPINDEX', f'benchmark_{algorithm}_{dimensions}d')\n", + " except:\n", + " pass\n", + " \n", + " # Create schema and index\n", + " schema = create_index_schema(algorithm, dimensions, prefix)\n", + " \n", + " start_time = time.time()\n", + " \n", + " # Create index\n", + " index = SearchIndex.from_dict(schema, redis_url=REDIS_URL)\n", + " index.create(overwrite=True)\n", + " \n", + " # Load data in batches\n", + " batch_size = 100\n", + " for i in range(0, len(documents), batch_size):\n", + " batch = documents[i:i+batch_size]\n", + " index.load(batch)\n", + " \n", + " # Wait for indexing to complete\n", + " if algorithm == 'hnsw':\n", + " time.sleep(3) # HNSW needs more time for graph construction\n", + " else:\n", + " time.sleep(1)\n", + " \n", + " build_time = time.time() - start_time\n", + " \n", + " # Get index info for memory usage\n", + " try:\n", + " index_info = index.info()\n", + " index_size_mb = float(index_info.get('vector_index_sz_mb', 0))\n", + " except:\n", + " index_size_mb = 0.0\n", + " \n", + " return index, build_time, index_size_mb\n", + "\n", + "# Run index creation benchmarks\n", + "print(\"๐Ÿ—๏ธ Running index creation benchmarks...\")\n", + "\n", + "creation_results = {}\n", + "indices = {}\n", + "\n", + "for dim in config.dimensions:\n", + " print(f\"\\n๐Ÿ“Š Benchmarking {dim}D embeddings:\")\n", + " \n", + " for algorithm in config.algorithms:\n", + " print(f\" Creating {algorithm.upper()} index...\")\n", + " \n", + " try:\n", + " index, build_time, index_size_mb = benchmark_index_creation(\n", + " algorithm, dim, benchmark_data[dim]\n", + " )\n", + " \n", + " creation_results[f\"{algorithm}_{dim}\"] = {\n", + " 'algorithm': algorithm,\n", + " 'dimensions': dim,\n", + " 'build_time_sec': build_time,\n", + " 'index_size_mb': index_size_mb,\n", + " 'num_docs': len(benchmark_data[dim])\n", + " }\n", + " \n", + " indices[f\"{algorithm}_{dim}\"] = index\n", + " \n", + " print(\n", + " f\" โœ… {algorithm.upper()}: {build_time:.2f}s, {index_size_mb:.2f}MB\"\n", + " )\n", + " \n", + " except Exception as e:\n", + " print(f\" โŒ {algorithm.upper()} failed: {e}\")\n", + " creation_results[f\"{algorithm}_{dim}\"] = None\n", + "\n", + "print(\"\\nโœ… Index creation benchmarks complete!\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Step 4: Query Performance Benchmark\n", + "\n", + "Measure query latency and search quality for each algorithm." + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "๐Ÿ” Running query performance benchmarks...\n", + "\n", + "๐Ÿ“Š Benchmarking 384D queries:\n", + " Testing FLAT queries...\n", + " โœ… FLAT: 1.63ms avg, R@5: 1.000, R@10: 1.000\n", + " Testing HNSW queries...\n", + " โœ… HNSW: 1.36ms avg, R@5: 0.080, R@10: 0.212\n", + " Testing SVS-VAMANA queries...\n", + " โœ… SVS-VAMANA: 1.25ms avg, R@5: 0.256, R@10: 0.364\n", + "\n", + "๐Ÿ“Š Benchmarking 768D queries:\n", + " Testing FLAT queries...\n", + " โœ… FLAT: 1.56ms avg, R@5: 1.000, R@10: 1.000\n", + " Testing HNSW queries...\n", + " โœ… HNSW: 1.26ms avg, R@5: 0.128, R@10: 0.208\n", + " Testing SVS-VAMANA queries...\n", + " โœ… SVS-VAMANA: 1.86ms avg, R@5: 0.128, R@10: 0.238\n", + "\n", + "๐Ÿ“Š Benchmarking 1536D queries:\n", + " Testing FLAT queries...\n", + " โœ… FLAT: 2.13ms avg, R@5: 1.000, R@10: 1.000\n", + " Testing HNSW queries...\n", + " โœ… HNSW: 1.35ms avg, R@5: 0.896, R@10: 0.890\n", + " Testing SVS-VAMANA queries...\n", + " โœ… SVS-VAMANA: 0.97ms avg, R@5: 0.000, R@10: 0.000\n", + "\n", + "โœ… Query performance benchmarks complete!\n" + ] + } + ], + "source": [ + "def calculate_recall(retrieved_ids: List[str], ground_truth_ids: List[str], k: int) -> float:\n", + " \"\"\"Calculate recall@k between retrieved and ground truth results\"\"\"\n", + " if not ground_truth_ids or not retrieved_ids:\n", + " return 0.0\n", + " \n", + " retrieved_set = set(retrieved_ids[:k])\n", + " ground_truth_set = set(ground_truth_ids[:k])\n", + " \n", + " if len(ground_truth_set) == 0:\n", + " return 0.0\n", + " \n", + " intersection = len(retrieved_set.intersection(ground_truth_set))\n", + " return intersection / len(ground_truth_set)\n", + "\n", + "def benchmark_query_performance(index: SearchIndex, query_vectors: np.ndarray, \n", + " algorithm: str, dimensions: int) -> Dict[str, float]:\n", + " \"\"\"Benchmark query performance and quality\"\"\"\n", + " \n", + " latencies = []\n", + " all_results = []\n", + " \n", + " # Get ground truth from FLAT index (if available)\n", + " ground_truth_results = []\n", + " flat_index_key = f\"flat_{dimensions}\"\n", + " \n", + " if flat_index_key in indices and algorithm != 'flat':\n", + " flat_index = indices[flat_index_key]\n", + " for query_vec in query_vectors:\n", + " query = VectorQuery(\n", + " vector=query_vec,\n", + " vector_field_name=\"embedding\",\n", + " return_fields=[\"doc_id\"],\n", + " dtype=\"float32\",\n", + " num_results=10\n", + " )\n", + " results = flat_index.query(query)\n", + " ground_truth_results.append([doc[\"doc_id\"] for doc in results])\n", + " \n", + " # Benchmark the target algorithm\n", + " for i, query_vec in enumerate(query_vectors):\n", + " # Adjust query vector for SVS if needed\n", + " if algorithm == 'svs-vamana':\n", + " compression_config = CompressionAdvisor.recommend(dims=dimensions, priority=\"memory\")\n", + " \n", + " if 'reduce' in compression_config:\n", + " target_dims = compression_config['reduce']\n", + " if target_dims < dimensions:\n", + " query_vec = query_vec[:target_dims]\n", + " \n", + " if compression_config.get('datatype') == 'float16':\n", + " query_vec = query_vec.astype(np.float16)\n", + " dtype = 'float16'\n", + " else:\n", + " dtype = 'float32'\n", + " else:\n", + " dtype = 'float32'\n", + " \n", + " # Execute query with timing\n", + " start_time = time.time()\n", + " \n", + " query = VectorQuery(\n", + " vector=query_vec,\n", + " vector_field_name=\"embedding\",\n", + " return_fields=[\"doc_id\", \"title\", \"category\"],\n", + " dtype=dtype,\n", + " num_results=10\n", + " )\n", + " \n", + " results = index.query(query)\n", + " latency = time.time() - start_time\n", + " \n", + " latencies.append(latency * 1000) # Convert to milliseconds\n", + " all_results.append([doc[\"doc_id\"] for doc in results])\n", + " \n", + " # Calculate metrics\n", + " avg_latency = np.mean(latencies)\n", + " \n", + " # Calculate recall if we have ground truth\n", + " if ground_truth_results and algorithm != 'flat':\n", + " recall_5_scores = []\n", + " recall_10_scores = []\n", + " \n", + " for retrieved, ground_truth in zip(all_results, ground_truth_results):\n", + " recall_5_scores.append(calculate_recall(retrieved, ground_truth, 5))\n", + " recall_10_scores.append(calculate_recall(retrieved, ground_truth, 10))\n", + " \n", + " recall_at_5 = np.mean(recall_5_scores)\n", + " recall_at_10 = np.mean(recall_10_scores)\n", + " else:\n", + " # FLAT is our ground truth, so perfect recall\n", + " recall_at_5 = 1.0 if algorithm == 'flat' else 0.0\n", + " recall_at_10 = 1.0 if algorithm == 'flat' else 0.0\n", + " \n", + " return {\n", + " 'avg_query_time_ms': avg_latency,\n", + " 'recall_at_5': recall_at_5,\n", + " 'recall_at_10': recall_at_10,\n", + " 'num_queries': len(query_vectors)\n", + " }\n", + "\n", + "# Run query performance benchmarks\n", + "print(\"๐Ÿ” Running query performance benchmarks...\")\n", + "\n", + "query_results = {}\n", + "\n", + "for dim in config.dimensions:\n", + " print(f\"\\n๐Ÿ“Š Benchmarking {dim}D queries:\")\n", + " \n", + " for algorithm in config.algorithms:\n", + " index_key = f\"{algorithm}_{dim}\"\n", + " \n", + " if index_key in indices:\n", + " print(f\" Testing {algorithm.upper()} queries...\")\n", + " \n", + " try:\n", + " performance = benchmark_query_performance(\n", + " indices[index_key], \n", + " query_data[dim], \n", + " algorithm, \n", + " dim\n", + " )\n", + " \n", + " query_results[index_key] = performance\n", + " \n", + " print(\n", + " f\" โœ… {algorithm.upper()}: {performance['avg_query_time_ms']:.2f}ms avg, \"\n", + " f\"R@5: {performance['recall_at_5']:.3f}, R@10: {performance['recall_at_10']:.3f}\"\n", + " )\n", + " \n", + " except Exception as e:\n", + " print(f\" โŒ {algorithm.upper()} query failed: {e}\")\n", + " query_results[index_key] = None\n", + " else:\n", + " print(f\" โญ๏ธ Skipping {algorithm.upper()} (index creation failed)\")\n", + "\n", + "print(\"\\nโœ… Query performance benchmarks complete!\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Step 5: Results Analysis and Visualization\n", + "\n", + "Analyze and visualize the benchmark results with real data." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Combine results into comprehensive dataset\n", + "def create_results_dataframe() -> pd.DataFrame:\n", + " \"\"\"Combine all benchmark results into a pandas DataFrame\"\"\"\n", + " \n", + " results = []\n", + " \n", + " for dim in config.dimensions:\n", + " for algorithm in config.algorithms:\n", + " key = f\"{algorithm}_{dim}\"\n", + " \n", + " if key in creation_results and creation_results[key] is not None:\n", + " creation_data = creation_results[key]\n", + " query_data_item = query_results.get(key, {})\n", + " \n", + " result = {\n", + " 'algorithm': algorithm,\n", + " 'dimensions': dim,\n", + " 'num_docs': creation_data['num_docs'],\n", + " 'build_time_sec': creation_data['build_time_sec'],\n", + " 'index_size_mb': creation_data['index_size_mb'],\n", + " 'avg_query_time_ms': query_data_item.get('avg_query_time_ms', 0),\n", + " 'recall_at_5': query_data_item.get('recall_at_5', 0),\n", + " 'recall_at_10': query_data_item.get('recall_at_10', 0)\n", + " }\n", + " \n", + " results.append(result)\n", + " \n", + " return pd.DataFrame(results)\n", + "\n", + "# Create results DataFrame\n", + "df_results = create_results_dataframe()\n", + "\n", + "print(\"๐Ÿ“Š Real Data Benchmark Results Summary:\")\n", + "print(df_results.to_string(index=False, float_format='%.3f'))\n", + "\n", + "# Display key insights\n", + "if not df_results.empty:\n", + " print(f\"\\n๐ŸŽฏ Key Insights from Real Data:\")\n", + " \n", + " # Memory efficiency\n", + " best_memory = df_results.loc[df_results['index_size_mb'].idxmin()]\n", + " print(f\"๐Ÿ† Most memory efficient: {best_memory['algorithm'].upper()} at {best_memory['dimensions']}D ({best_memory['index_size_mb']:.2f}MB)\")\n", + " \n", + " # Query speed\n", + " best_speed = df_results.loc[df_results['avg_query_time_ms'].idxmin()]\n", + " print(f\"โšก Fastest queries: {best_speed['algorithm'].upper()} at {best_speed['dimensions']}D ({best_speed['avg_query_time_ms']:.2f}ms)\")\n", + " \n", + " # Search quality\n", + " best_quality = df_results.loc[df_results['recall_at_10'].idxmax()]\n", + " print(f\"๐ŸŽฏ Best search quality: {best_quality['algorithm'].upper()} at {best_quality['dimensions']}D (R@10: {best_quality['recall_at_10']:.3f})\")\n", + " \n", + " # Dataset info\n", + " dataset_source = 'SQuAD (Hugging Face)' if 'squad_' in raw_documents[0]['doc_id'] else 'Local movies'\n", + " print(f\"\\n๐Ÿ“š Dataset: {dataset_source}\")\n", + " print(f\"๐Ÿ“Š Total documents tested: {df_results['num_docs'].iloc[0]:,}\")\n", + " print(f\"๐Ÿ” Total queries per dimension: {config.query_count}\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Create visualizations for real data results\n", + "def create_real_data_visualizations(df: pd.DataFrame):\n", + " \"\"\"Create visualizations for real data benchmark results\"\"\"\n", + " \n", + " if df.empty:\n", + " print(\"โš ๏ธ No results to visualize\")\n", + " return\n", + " \n", + " # Set up the plotting area\n", + " fig, axes = plt.subplots(2, 2, figsize=(15, 10))\n", + " fig.suptitle('Real Data Vector Algorithm Benchmark Results', fontsize=16, fontweight='bold')\n", + " \n", + " # 1. Memory Usage Comparison\n", + " ax1 = axes[0, 0]\n", + " pivot_memory = df.pivot(index='dimensions', columns='algorithm', values='index_size_mb')\n", + " pivot_memory.plot(kind='bar', ax=ax1, width=0.8)\n", + " ax1.set_title('Index Size by Algorithm (Real Data)')\n", + " ax1.set_xlabel('Dimensions')\n", + " ax1.set_ylabel('Index Size (MB)')\n", + " ax1.legend(title='Algorithm')\n", + " ax1.tick_params(axis='x', rotation=0)\n", + " \n", + " # 2. Query Performance\n", + " ax2 = axes[0, 1]\n", + " pivot_query = df.pivot(index='dimensions', columns='algorithm', values='avg_query_time_ms')\n", + " pivot_query.plot(kind='bar', ax=ax2, width=0.8)\n", + " ax2.set_title('Average Query Time (Real Embeddings)')\n", + " ax2.set_xlabel('Dimensions')\n", + " ax2.set_ylabel('Query Time (ms)')\n", + " ax2.legend(title='Algorithm')\n", + " ax2.tick_params(axis='x', rotation=0)\n", + " \n", + " # 3. Search Quality\n", + " ax3 = axes[1, 0]\n", + " pivot_recall = df.pivot(index='dimensions', columns='algorithm', values='recall_at_10')\n", + " pivot_recall.plot(kind='bar', ax=ax3, width=0.8)\n", + " ax3.set_title('Search Quality (Recall@10)')\n", + " ax3.set_xlabel('Dimensions')\n", + " ax3.set_ylabel('Recall@10')\n", + " ax3.legend(title='Algorithm')\n", + " ax3.tick_params(axis='x', rotation=0)\n", + " ax3.set_ylim(0, 1.1)\n", + " \n", + " # 4. Memory Efficiency\n", + " ax4 = axes[1, 1]\n", + " df['docs_per_mb'] = df['num_docs'] / df['index_size_mb']\n", + " pivot_efficiency = df.pivot(index='dimensions', columns='algorithm', values='docs_per_mb')\n", + " pivot_efficiency.plot(kind='bar', ax=ax4, width=0.8)\n", + " ax4.set_title('Memory Efficiency (Real Data)')\n", + " ax4.set_xlabel('Dimensions')\n", + " ax4.set_ylabel('Documents per MB')\n", + " ax4.legend(title='Algorithm')\n", + " ax4.tick_params(axis='x', rotation=0)\n", + " \n", + " plt.tight_layout()\n", + " plt.show()\n", + "\n", + "# Create visualizations\n", + "create_real_data_visualizations(df_results)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Step 6: Real Data Insights and Recommendations\n", + "\n", + "Generate insights based on real data performance." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Generate real data specific recommendations\n", + "if not df_results.empty:\n", + " dataset_source = 'SQuAD (Hugging Face)' if 'squad_' in raw_documents[0]['doc_id'] else 'Local movies'\n", + " \n", + " print(\n", + " f\"๐ŸŽฏ Real Data Benchmark Insights\",\n", + " f\"Dataset: {dataset_source}\",\n", + " f\"Documents: {df_results['num_docs'].iloc[0]:,} per dimension\",\n", + " f\"Embedding Models: sentence-transformers\",\n", + " \"=\" * 50,\n", + " sep=\"\\n\"\n", + " )\n", + " \n", + " for dim in config.dimensions:\n", + " dim_data = df_results[df_results['dimensions'] == dim]\n", + " \n", + " if not dim_data.empty:\n", + " print(f\"\\n๐Ÿ“Š {dim}D Embeddings Analysis:\")\n", + " \n", + " for _, row in dim_data.iterrows():\n", + " algo = row['algorithm'].upper()\n", + " print(\n", + " f\" {algo}:\",\n", + " f\" Index: {row['index_size_mb']:.2f}MB\",\n", + " f\" Query: {row['avg_query_time_ms']:.2f}ms\",\n", + " f\" Recall@10: {row['recall_at_10']:.3f}\",\n", + " f\" Efficiency: {row['docs_per_mb']:.1f} docs/MB\",\n", + " sep=\"\\n\"\n", + " )\n", + " \n", + " print(\n", + " f\"\\n๐Ÿ’ก Key Takeaways with Real Data:\",\n", + " \"โ€ข Real embeddings show different performance characteristics than synthetic\",\n", + " \"โ€ข Sentence-transformer models provide realistic vector distributions\",\n", + " \"โ€ข SQuAD Q&A pairs offer diverse semantic content for testing\",\n", + " \"โ€ข Results are more representative of production workloads\",\n", + " \"โ€ข Consider testing with your specific embedding models and data\",\n", + " sep=\"\\n\"\n", + " )\n", + "else:\n", + " print(\"โš ๏ธ No results available for analysis\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Step 7: Cleanup\n", + "\n", + "Clean up benchmark indices to free memory." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Clean up all benchmark indices\n", + "print(\"๐Ÿงน Cleaning up benchmark indices...\")\n", + "\n", + "cleanup_count = 0\n", + "for index_key, index in indices.items():\n", + " try:\n", + " index.delete(drop=True)\n", + " cleanup_count += 1\n", + " print(f\" โœ… Deleted {index_key}\")\n", + " except Exception as e:\n", + " print(f\" โš ๏ธ Failed to delete {index_key}: {e}\")\n", + "\n", + "dataset_source = 'SQuAD (Hugging Face)' if 'squad_' in raw_documents[0]['doc_id'] else 'Local movies'\n", + "\n", + "print(\n", + " f\"\\n๐ŸŽ‰ Real Data Benchmark Complete!\",\n", + " f\"Dataset: {dataset_source}\",\n", + " f\"Cleaned up {cleanup_count} indices\",\n", + " f\"\\nNext steps:\",\n", + " \"1. Review the real data performance characteristics above\",\n", + " \"2. Compare with synthetic data results if available\",\n", + " \"3. Test with your specific embedding models and datasets\",\n", + " \"4. Scale up with larger datasets for production insights\",\n", + " \"5. Consider the impact of real text diversity on algorithm performance\",\n", + " sep=\"\\n\"\n", + ")" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.12.6" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} diff --git a/python-recipes/vector_search.py b/python-recipes/vector_search.py new file mode 100644 index 00000000..1ec9a1e7 --- /dev/null +++ b/python-recipes/vector_search.py @@ -0,0 +1,196 @@ +import numpy as np +import pandas as pd +from redis import Redis +from redisvl.extensions.cache.embeddings import EmbeddingsCache +from redisvl.index import SearchIndex +from redisvl.query import VectorQuery, RangeQuery, VectorRangeQuery, TextQuery, HybridQuery +from redisvl.query.filter import Tag, Num, Text +from redisvl.schema import IndexSchema +from redisvl.utils.vectorize import HFTextVectorizer +import os +os.environ["TOKENIZERS_PARALLELISM"] = "false" + +def get_schema(client): + index_name = "movies" + + schema = IndexSchema.from_dict({ + "index": { + "name": index_name, + "prefix": index_name, + "storage_type": "hash" + }, + "fields": [ + { + "name": "title", + "type": "text", + }, + { + "name": "description", + "type": "text", + }, + { + "name": "genre", + "type": "tag", + "attrs": { + "sortable": True + } + }, + { + "name": "rating", + "type": "numeric", + "attrs": { + "sortable": True + } + }, + { + "name": "vector", + "type": "vector", + "attrs": { + "dims": 384, + "distance_metric": "cosine", + "algorithm": "flat", + "datatype": "float32" + } + } + ] + }) + + index = SearchIndex(schema, client) + index.create(overwrite=True, drop=True) + return index + +def run(client): + + + df = pd.read_json("vector-search/resources/movies.json") + print("Loaded", len(df), "movie entries") + + hf=HFTextVectorizer( + model="sentence-transformers/all-MiniLM-L6-v2", + cache = EmbeddingsCache( + name="embedcache", + ttl=600, + redis_client=client, + ) + ) + df["vector"] = hf.embed_many(df["description"].tolist(), as_buffer=True) + + index=get_schema(client) + x = 2 + index.load(df.to_dict(orient="records")) + + # querying + + user_query="Action movie with high tech" + embedded_user_query = hf.embed(user_query) + vec_query = VectorQuery( + vector=embedded_user_query, + vector_field_name="vector", + num_results=5, + return_fields=["title", "genre", "rating"], + return_score=True, + ) + results=index.query(vec_query) + [print(x) for x in results] + print(1) + # Vector search with filters + tag_filter=Tag("genre") == "action" + num_filter = Num("rating") >= 8 + combined_filter=tag_filter & num_filter + vec_query.set_filter(combined_filter) + results=index.query(vec_query) + [print(x) for x in results] + + print(2) + # query with text search + text_filter=Text("description") % "hero" + vec_query.set_filter(text_filter) + results=index.query(vec_query) + [print(x) for x in results] + + + print(3) + + text_filter = Text("description") % "%thermopoli%" + + vec_query = VectorQuery( + vector=embedded_user_query, + vector_field_name="vector", + num_results=3, + return_fields=["title", "rating", "genre", "description"], + return_score=True, + filter_expression=text_filter + ) + + results = index.query(vec_query) + [print(x) for x in results] + print(4) + # range queries + user_query = "Family friendly super hero movies" + embedded_query = hf.embed(user_query) + tag_filter = Tag("genre") == "action" + num_filter = Num("rating") >= 8 + combined_filter = tag_filter & num_filter + range_query = VectorRangeQuery( + vector=embedded_query, + vector_field_name="vector", + return_fields=["title", "rating", "genre"], + return_score=True, + distance_threshold=0.8, + filter_expression=combined_filter + ) + + results = index.query(range_query) + [print(x) for x in results] + print(5) + user_query="das High tech, action packed, superheros mit fight scenes" + # Full text search + text_query=TextQuery( + text=user_query, + text_field_name="description", + text_scorer="BM25STD", + num_results=10, + return_fields=["title", "description"], + stopwords="german" + ) + results = index.query(text_query) + [print(x) for x in results] + print(6) + # Hybrid search + user_query="Family movie with action scenes" + embedded_user_query = hf.embed(user_query) + hybrid_query=HybridQuery( + text=user_query, + text_field_name="description", + vector=embedded_user_query, + vector_field_name="vector", + return_fields=["title", "description"], + num_results=10, + alpha=0.7, # 70% emphasis on vector similarity and 30% on text + # stopwords="english" + ) + """ + FT.SEARCH movies + "(@description:user_query_text) => {$weight: 0.3} [KNN 10 @vector $vector_blob + AS vector_score]" + PARAMS 2 vector_blob + RETURN 6 title description vector_score + SORTBY vector_score ASC + LIMIT 0 10 + """ + results = index.query(hybrid_query) + [print(x) for x in results] + + +if __name__=="__main__": + + + client = Redis.from_url("redis://localhost:6379") + # index= SearchIndex.from_dict(schema, redis_client=client, validate_on_load=True) + # alternative: index = SearchIndex.from_dict(schema, redis_url="redis://localhost:6379", validate_on_load=True) + run(client) + + + + +