Force fresh package install with cache busting

sattensil · sattensil · commit a0a742eea1dc · 2025-11-07T14:15:01.000-08:00
diff --git a/test_direct_evaluator.py b/test_direct_evaluator.py
@@ -0,0 +1,51 @@
+#!/usr/bin/env python3
+"""
+Quick test of Direct evaluator with tools.py
+"""
+import asyncio
+import os
+from dotenv import load_dotenv
+
+load_dotenv()
+
+async def test_direct_evaluator():
+    # Import from the CI/CD package
+    try:
+        from ld_aic_cicd.evaluators.direct import DirectEvaluator
+    except ImportError:
+        print("❌ ld-aic-cicd package not installed")
+        print("Install with: uv pip install git+https://...@feature/user-friendly-setup")
+        return
+    
+    print("✅ DirectEvaluator imported successfully")
+    
+    # Initialize evaluator
+    try:
+        evaluator = DirectEvaluator()
+        print(f"✅ DirectEvaluator initialized")
+    except Exception as e:
+        print(f"❌ Failed to initialize: {e}")
+        return
+    
+    # Test with support-agent config
+    print("\n🧪 Testing support-agent with tools...")
+    result = await evaluator.evaluate_case(
+        config_key="support-agent",
+        test_input="What is LaunchDarkly?",
+        context_attributes={
+            "key": "test-user",
+            "country": "US",
+            "plan": "free"
+        }
+    )
+    
+    print(f"\n📊 Result:")
+    print(f"  Variation: {result.variation}")
+    print(f"  Latency: {result.latency_ms:.0f}ms")
+    print(f"  Error: {result.error}")
+    print(f"  Response preview: {result.response[:200]}...")
+    
+    await evaluator.cleanup()
+
+if __name__ == "__main__":
+    asyncio.run(test_direct_evaluator())
diff --git a/test_rl_query.py b/test_rl_query.py
@@ -0,0 +1,48 @@
+#!/usr/bin/env python3
+"""Quick test to verify RL knowledge base queries work"""
+
+import requests
+import json
+
+API_URL = "http://localhost:8000"
+
+# Test query from new test data
+query = "What is a Markov Decision Process and why is it important in reinforcement learning?"
+
+print(f"Testing RL query: {query}\n")
+
+try:
+    response = requests.post(
+        f"{API_URL}/chat",
+        json={
+            "message": query,
+            "user_id": "test_user",
+            "user_context": {"country": "US", "plan": "paid"}
+        },
+        timeout=30
+    )
+
+    if response.status_code == 200:
+        data = response.json()
+        print(f"✅ Success!")
+        print(f"\nResponse: {data['response'][:500]}...")
+        print(f"\nVariation: {data.get('variation_key')}")
+        print(f"Model: {data.get('model')}")
+
+        # Check if response mentions RL concepts
+        response_lower = data['response'].lower()
+        rl_terms = ['markov', 'mdp', 'state', 'action', 'reward', 'reinforcement']
+        found_terms = [term for term in rl_terms if term in response_lower]
+
+        print(f"\nRL terms found: {found_terms}")
+
+        if len(found_terms) >= 3:
+            print("✅ Response appears to be about RL!")
+        else:
+            print("⚠️  Response may not be about RL")
+    else:
+        print(f"❌ Error: {response.status_code}")
+        print(response.text)
+
+except Exception as e:
+    print(f"❌ Error: {e}")