diff --git a/.gitignore b/.gitignore
index 6d9877f9..bb7659aa 100644
--- a/.gitignore
+++ b/.gitignore
@@ -3,4 +3,5 @@ node_modules
 *.tgz
 /testagent
 .vscode
-.opencode
\ No newline at end of file
+.opencode
+.DS_Store
\ No newline at end of file
diff --git a/CHANGELOG.md b/CHANGELOG.md
index c08ec028..344ecf74 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,5 +1,11 @@
 # @agentuity/sdk Changelog
 
+## 0.0.157
+
+### Patch Changes
+
+- Add support for eval running
+
 ## [0.0.156] - 2025-10-15
 
 ### Added
diff --git a/package-lock.json b/package-lock.json
index 812e80a0..cc09753f 100644
--- a/package-lock.json
+++ b/package-lock.json
@@ -1,12 +1,12 @@
 {
 	"name": "@agentuity/sdk",
-	"version": "0.0.156",
+	"version": "0.0.157",
 	"lockfileVersion": 3,
 	"requires": true,
 	"packages": {
 		"": {
 			"name": "@agentuity/sdk",
-			"version": "0.0.156",
+			"version": "0.0.157",
 			"license": "Apache-2.0",
 			"dependencies": {
 				"@opentelemetry/api": "^1.9.0",
@@ -6824,6 +6824,7 @@
 			"version": "2.3.3",
 			"resolved": "https://registry.npmjs.org/fsevents/-/fsevents-2.3.3.tgz",
 			"integrity": "sha512-5xoDfX+fL7faATnagmWPpbFtwh/R77WmMMqqHGS65C3vvB0YHrgF+B1YmZ3441tMj5n63k0212XNoJwzlhffQw==",
+			"dev": true,
 			"hasInstallScript": true,
 			"license": "MIT",
 			"optional": true,
diff --git a/package.json b/package.json
index bb17c23d..00367907 100644
--- a/package.json
+++ b/package.json
@@ -1,6 +1,6 @@
 {
 	"name": "@agentuity/sdk",
-	"version": "0.0.156",
+	"version": "0.0.157",
 	"description": "The Agentuity SDK for NodeJS and Bun",
 	"license": "Apache-2.0",
 	"public": true,
@@ -97,4 +97,4 @@
 		"mailparser": "^3.7.4",
 		"nodemailer": "^7.0.3"
 	}
-}
+}
\ No newline at end of file
diff --git a/src/apis/api.ts b/src/apis/api.ts
index 024ac48f..3991f111 100644
--- a/src/apis/api.ts
+++ b/src/apis/api.ts
@@ -29,7 +29,12 @@ interface ApiRequestWithUrl {
 
 type ApiRequestOptions = ApiRequestWithPath | ApiRequestWithUrl;
 
-export type ServiceName = 'vector' | 'keyvalue' | 'stream' | 'objectstore';
+export type ServiceName =
+	| 'vector'
+	| 'keyvalue'
+	| 'stream'
+	| 'objectstore'
+	| 'eval';
 
 interface ApiRequestBase {
 	method: 'POST' | 'GET' | 'PUT' | 'DELETE';
@@ -102,6 +107,10 @@ export const getBaseUrlForService = (service?: ServiceName) => {
 				process.env.AGENTUITY_OBJECTSTORE_URL ||
 				process.env.AGENTUITY_TRANSPORT_URL;
 			break;
+		case 'eval':
+			value =
+				process.env.AGENTUITY_EVAL_URL || process.env.AGENTUITY_TRANSPORT_URL;
+			break;
 		default:
 			break;
 	}
diff --git a/src/apis/eval.ts b/src/apis/eval.ts
new file mode 100644
index 00000000..106eb751
--- /dev/null
+++ b/src/apis/eval.ts
@@ -0,0 +1,467 @@
+import fs from 'node:fs';
+import path from 'node:path';
+import { pathToFileURL } from 'node:url';
+import { internal } from '../logger/internal';
+import { POST } from './api';
+
+// Eval SDK types
+export interface EvalRequest {
+	input: string;
+	output: string;
+	sessionId: string;
+}
+
+export interface EvalResponse {
+	pass: (
+		value: boolean,
+		metadata?: { reasoning?: string; [key: string]: unknown }
+	) => void;
+	score: (
+		value: number,
+		metadata?: { reasoning?: string; [key: string]: unknown }
+	) => void; // 0 to 1
+}
+
+export interface EvalContext {
+	// optional for now
+	[key: string]: unknown;
+}
+export type EvalRunResultMetadata = {
+	reason: string;
+	// biome-ignore lint/suspicious/noExplicitAny: metadata can contain any type of data
+	[key: string]: any;
+};
+
+type BaseEvalRunResult = {
+	success: boolean;
+	metadata?: EvalRunResultMetadata;
+};
+
+export type EvalRunResultBinary = BaseEvalRunResult & {
+	success: true;
+	passed: boolean;
+	metadata: EvalRunResultMetadata;
+};
+
+export type EvalRunResultScore = BaseEvalRunResult & {
+	success: true;
+	score: number; // 0-1 range
+	metadata: EvalRunResultMetadata;
+};
+
+export type EvalRunResultError = BaseEvalRunResult & {
+	success: false;
+	error: string;
+};
+
+export type EvalRunResult =
+	| EvalRunResultBinary
+	| EvalRunResultScore
+	| EvalRunResultError;
+
+export type CreateEvalRunRequest = {
+	projectId: string;
+	sessionId: string;
+	spanId: string;
+	result: EvalRunResult;
+	evalId: string;
+	promptHash?: string;
+};
+
+type EvalFunction = (
+	ctx: EvalContext,
+	req: EvalRequest,
+	res: EvalResponse
+) => Promise<void>;
+
+type CreateEvalRunResponse =
+	| {
+			success: true;
+			data: {
+				id: string;
+			};
+	  }
+	| {
+			success: false;
+			message: string;
+	  };
+
+export default class EvalAPI {
+	private evalsDir: string;
+	private isBundled: boolean;
+
+	constructor(evalsDir?: string) {
+		// Check if we're running from bundled code (.agentuity directory)
+		const bundledDir = path.join(process.cwd(), '.agentuity', 'src', 'evals');
+		const sourceDir = path.join(process.cwd(), 'src', 'evals');
+		this.isBundled = fs.existsSync(bundledDir);
+
+		// Use .agentuity/src/evals for bundled code, src/evals for development
+		this.evalsDir = evalsDir || (this.isBundled ? bundledDir : sourceDir);
+
+		internal.debug(
+			`EvalAPI initialized with evalsDir: ${this.evalsDir}, isBundled: ${this.isBundled}`
+		);
+	}
+
+	/**
+	 * Load eval function and metadata by name/slug
+	 * Scans through all eval files to find the one with matching slug
+	 */
+	async loadEvalByName(evalName: string): Promise<{
+		evalFn: EvalFunction;
+		metadata: { id: string; slug: string; name: string; description: string };
+	}> {
+		internal.debug(`Loading eval by name: ${evalName}`);
+
+		try {
+			// Get all files in the evals directory
+			const files = fs.readdirSync(this.evalsDir);
+
+			for (const file of files) {
+				// Skip index files and non-eval files
+				if (file === 'index.ts' || file === 'index.js') {
+					continue;
+				}
+
+				// Check file extension based on bundled state
+				const expectedExt = this.isBundled ? '.js' : '.ts';
+				if (!file.endsWith(expectedExt)) {
+					continue;
+				}
+
+				const filePath = path.join(this.evalsDir, file);
+
+				try {
+					// Convert to file URL for proper ESM import
+					const fileUrl = pathToFileURL(filePath).href;
+					const module = await import(fileUrl);
+
+					// Check if this module has the matching slug
+					if (module.metadata && module.metadata.slug === evalName) {
+						internal.debug(`Found eval with slug ${evalName} in file ${file}`);
+						return {
+							evalFn: module.default,
+							metadata: module.metadata,
+						};
+					}
+				} catch (error) {
+					// Skip files that can't be imported (might not be eval files)
+					internal.debug(`Skipping file ${file} due to import error: ${error}`);
+				}
+			}
+
+			throw new Error(`No eval found with slug: ${evalName}`);
+		} catch (error) {
+			throw new Error(`Failed to load eval by name ${evalName}: ${error}`);
+		}
+	}
+
+	/**
+	 * Load eval function and metadata by ID
+	 * Scans through all eval files to find the one with matching ID
+	 */
+	async loadEvalById(evalId: string): Promise<{
+		evalFn: EvalFunction;
+		metadata?: { id: string; slug: string; name: string; description: string };
+	}> {
+		internal.debug(`Loading eval by ID: ${evalId}`);
+
+		try {
+			// Get all files in the evals directory
+			const files = fs.readdirSync(this.evalsDir);
+
+			for (const file of files) {
+				// Skip index files and non-eval files
+				if (file === 'index.ts' || file === 'index.js') {
+					continue;
+				}
+
+				// Check file extension based on bundled state
+				const expectedExt = this.isBundled ? '.js' : '.ts';
+				if (!file.endsWith(expectedExt)) {
+					continue;
+				}
+
+				const filePath = path.join(this.evalsDir, file);
+
+				try {
+					// Convert to file URL for proper ESM import
+					const fileUrl = pathToFileURL(filePath).href;
+					const module = await import(fileUrl);
+
+					// Check if this module has the matching ID
+					if (module.metadata && module.metadata.id === evalId) {
+						internal.debug(`Found eval with ID ${evalId} in file ${file}`);
+						return {
+							evalFn: module.default,
+							metadata: module.metadata,
+						};
+					}
+				} catch (error) {
+					// Skip files that can't be imported (might not be eval files)
+					internal.debug(`Skipping file ${file} due to import error: ${error}`);
+				}
+			}
+
+			throw new Error(`No eval found with ID: ${evalId}`);
+		} catch (error) {
+			throw new Error(`Failed to load eval by ID ${evalId}: ${error}`);
+		}
+	}
+
+	/**
+	 * Run eval with input/output/sessionId/spanId
+	 */
+	async runEval(
+		evalName: string,
+		input: string,
+		output: string,
+		sessionId: string,
+		spanId: string,
+		promptHash?: string
+	): Promise<CreateEvalRunResponse> {
+		internal.debug(`Running eval ${evalName} for session ${sessionId}`);
+		// Get project ID from environment
+		const projectId = process.env.AGENTUITY_CLOUD_PROJECT_ID || '';
+
+		const request: EvalRequest = {
+			input,
+			output,
+			sessionId,
+		};
+
+		let createEvalRunRequest: CreateEvalRunRequest | null = null;
+		const evalContext: EvalContext = {};
+
+		// Load and run eval function
+		internal.debug('loading eval function');
+
+		try {
+			// Load eval by name/slug
+			const { evalFn, metadata } = await this.loadEvalByName(evalName);
+
+			if (!metadata?.id) {
+				throw new Error('Eval metadata not found');
+			}
+
+			const response: EvalResponse = {
+				pass: (
+					value: boolean,
+					meta?: { reasoning?: string; [key: string]: unknown }
+				) => {
+					createEvalRunRequest = {
+						projectId,
+						sessionId,
+						spanId,
+						result: {
+							success: true,
+							passed: value,
+							metadata: {
+								reason: meta?.reasoning || '',
+								...meta,
+							},
+						},
+						evalId: metadata.id,
+						promptHash,
+					};
+				},
+				score: (
+					val: number,
+					meta?: { reasoning?: string; [key: string]: unknown }
+				) => {
+					createEvalRunRequest = {
+						projectId,
+						sessionId,
+						spanId,
+						result: {
+							success: true,
+							score: val,
+							metadata: {
+								reason: meta?.reasoning || '',
+								...meta,
+							},
+						},
+						evalId: metadata.id,
+						promptHash,
+					};
+				},
+			};
+
+			await evalFn(evalContext, request, response);
+
+			// If no result was set, create an error result
+			if (!createEvalRunRequest) {
+				throw new Error('Eval function did not call res.pass() or res.score()');
+			}
+
+			const r = createEvalRunRequest as CreateEvalRunRequest;
+
+			internal.debug(`✅ Eval '${evalName}' completed successfully: %j`, {
+				resultType: r.result.success ? 'success' : 'error',
+				passed: 'passed' in r.result ? r.result.passed : undefined,
+				score: 'score' in r.result ? r.result.score : undefined,
+				metadata: r.result.metadata,
+			});
+			const resp = await POST<CreateEvalRunResponse>(
+				'/evalrun/finish',
+				JSON.stringify(createEvalRunRequest),
+				{
+					'Content-Type': 'application/json',
+				},
+				undefined,
+				undefined,
+				'eval'
+			);
+
+			if (!resp.status) {
+				internal.debug('Failed to update the database with the eval run', {
+					status: resp.status,
+					evalName,
+					evalId: r.evalId,
+					sessionId: r.sessionId,
+				});
+			} else {
+				internal.debug('Eval run updated in the database', {
+					status: resp.status,
+					evalName,
+					evalId: r.evalId,
+					sessionId: r.sessionId,
+				});
+			}
+
+			return {
+				success: true,
+				data: {
+					id: `local-${Date.now()}-${Math.random().toString(36).substring(2, 9)}`,
+				},
+			};
+		} catch (error) {
+			// Return error result if eval function throws
+			return {
+				success: false,
+				message: error instanceof Error ? error.message : String(error),
+			};
+		}
+	}
+
+	/**
+	 * Load eval metadata map from eval files (slug -> ID mapping)
+	 * Scans through all eval files to find metadata and build mapping
+	 */
+	async loadEvalMetadataMap(): Promise<Map<string, string>> {
+		internal.debug(`🔍 Loading eval metadata map from: ${this.evalsDir}`);
+
+		// Check if evals directory exists
+		if (!fs.existsSync(this.evalsDir)) {
+			internal.debug(`📁 Evals directory not found: ${this.evalsDir}`);
+			return new Map();
+		}
+
+		const files = fs.readdirSync(this.evalsDir);
+		const slugToIDMap = new Map<string, string>();
+		let processedFiles = 0;
+
+		internal.debug(`📂 Scanning ${files.length} files in evals directory`);
+
+		for (const file of files) {
+			const ext = path.extname(file);
+			if (
+				file === 'index.ts' ||
+				file === 'index.js' ||
+				(ext !== '.ts' && ext !== '.js')
+			) {
+				internal.debug(`⏭️  Skipping file: ${file}`);
+				continue;
+			}
+
+			const filePath = path.join(this.evalsDir, file);
+			processedFiles++;
+
+			try {
+				const content = fs.readFileSync(filePath, 'utf-8');
+				const metadata = this.parseEvalMetadata(content);
+
+				if (metadata && metadata.slug && metadata.id) {
+					slugToIDMap.set(metadata.slug, metadata.id);
+					internal.debug(
+						`✅ Mapped eval slug '${metadata.slug}' to ID '${metadata.id}' from ${file}`
+					);
+				} else {
+					internal.debug(`⚠️  No valid metadata found in ${file}`);
+				}
+			} catch (error) {
+				internal.warn(`❌ Failed to parse metadata from ${file}: ${error}`);
+			}
+		}
+
+		internal.debug(
+			`📚 Loaded ${slugToIDMap.size} eval mappings from ${processedFiles} files`
+		);
+		return slugToIDMap;
+	}
+
+	/**
+	 * Parse eval metadata from file content
+	 * Similar to CLI's ParseEvalMetadata but in TypeScript
+	 */
+	private parseEvalMetadata(
+		content: string
+	): { id: string; slug: string; name: string; description: string } | null {
+		// Find the metadata export pattern
+		const metadataRegex = /export\s+const\s+metadata\s*=\s*\{/;
+		const metadataMatch = content.match(metadataRegex);
+		if (!metadataMatch) {
+			return null;
+		}
+
+		// Find the opening brace position
+		const braceStart = metadataMatch.index! + metadataMatch[0].length - 1;
+		if (braceStart >= content.length || content[braceStart] !== '{') {
+			return null;
+		}
+
+		// Count braces to find the matching closing brace
+		let braceCount = 0;
+		let braceEnd = -1;
+		for (let i = braceStart; i < content.length; i++) {
+			if (content[i] === '{') {
+				braceCount++;
+			} else if (content[i] === '}') {
+				braceCount--;
+				if (braceCount === 0) {
+					braceEnd = i;
+					break;
+				}
+			}
+		}
+
+		if (braceEnd === -1) {
+			return null;
+		}
+
+		// Extract the object content
+		const objectContent = content.slice(braceStart, braceEnd + 1);
+
+		// Replace single quotes with double quotes for valid JSON
+		let jsonStr = objectContent.replace(/'([^']*)'/g, '"$1"');
+
+		// Clean up the JSON string
+		jsonStr = jsonStr.replace(/\s+/g, ' ');
+		jsonStr = jsonStr.replace(/\s*{\s*/g, '{');
+		jsonStr = jsonStr.replace(/\s*}\s*/g, '}');
+		jsonStr = jsonStr.replace(/\s*:\s*/g, ':');
+		jsonStr = jsonStr.replace(/\s*,\s*/g, ',');
+		jsonStr = jsonStr.replace(/,\s*}/g, '}');
+
+		// Quote the object keys
+		jsonStr = jsonStr.replace(/(\w+):/g, '"$1":');
+
+		try {
+			return JSON.parse(jsonStr);
+		} catch (error) {
+			internal.debug(`Failed to parse metadata JSON: ${error}`);
+			return null;
+		}
+	}
+}
diff --git a/src/apis/evaljobscheduler.ts b/src/apis/evaljobscheduler.ts
new file mode 100644
index 00000000..2fedb322
--- /dev/null
+++ b/src/apis/evaljobscheduler.ts
@@ -0,0 +1,176 @@
+import { internal } from '../logger/internal';
+import type { PromptAttributes } from '../utils/promptMetadata';
+
+// Global instance storage to ensure true singleton across all module contexts
+declare global {
+	var __evalJobSchedulerInstance: EvalJobScheduler | undefined;
+}
+
+export interface PendingEvalJob {
+	spanId: string;
+	sessionId: string;
+	promptMetadata: PromptAttributes[];
+	output?: string;
+	createdAt: string;
+}
+
+export interface JobFilter {
+	sessionId?: string;
+}
+
+/**
+ * Singleton class for EvalJobScheduler
+ */
+export default class EvalJobScheduler {
+	private pendingJobs: Map<string, PendingEvalJob> = new Map();
+	private instanceId: string;
+
+	private constructor() {
+		// Private constructor to prevent direct instantiation
+		this.instanceId = `EvalJobScheduler-${Date.now()}-${Math.random().toString(36).substr(2, 9)}`;
+		internal.debug(
+			'🏗️ EvalJobScheduler constructor called, instanceId:',
+			this.instanceId
+		);
+	}
+
+	/**
+	 * Get the singleton instance of EvalJobScheduler
+	 */
+	public static async getInstance(): Promise<EvalJobScheduler> {
+		internal.debug('🔍 EvalJobScheduler.getInstance() called');
+		internal.debug(
+			'🔍 globalThis.__evalJobSchedulerInstance exists:',
+			!!globalThis.__evalJobSchedulerInstance
+		);
+
+		if (!globalThis.__evalJobSchedulerInstance) {
+			globalThis.__evalJobSchedulerInstance = new EvalJobScheduler();
+			internal.debug(
+				'🆕 Created new EvalJobScheduler instance, ID:',
+				globalThis.__evalJobSchedulerInstance.instanceId
+			);
+		} else {
+			internal.debug(
+				'♻️ Returning existing EvalJobScheduler instance, ID:',
+				globalThis.__evalJobSchedulerInstance.instanceId
+			);
+		}
+		return globalThis.__evalJobSchedulerInstance;
+	}
+
+	/**
+	 * Create a new eval job
+	 */
+	public createJob(
+		spanId: string,
+		sessionId: string,
+		promptMetadata: PromptAttributes[]
+	): string {
+		internal.debug('🔍 EvalJobScheduler.createJob() called with:', {
+			spanId,
+			sessionId,
+			promptMetadataCount: promptMetadata.length,
+		});
+
+		const now = new Date().toISOString();
+
+		// Check if job already exists
+		if (this.pendingJobs.has(spanId)) {
+			internal.debug('⚠️ Job already exists, overwriting:', spanId);
+		}
+
+		// Count total evals across all prompt metadata
+		const totalEvals = promptMetadata.reduce(
+			(count, meta) => count + (meta.evals?.length || 0),
+			0
+		);
+		internal.debug(
+			`📦 Creating eval job ${spanId} for session ${sessionId} with ${totalEvals} evals`
+		);
+
+		const job: PendingEvalJob = {
+			spanId,
+			sessionId,
+			promptMetadata,
+			createdAt: now,
+		};
+
+		this.pendingJobs.set(spanId, job);
+		internal.debug('✅ Job created successfully:', spanId);
+		internal.debug('📊 Total jobs:', this.pendingJobs.size);
+
+		return spanId;
+	}
+
+	/**
+	 * Remove a job
+	 */
+	public removeJob(spanId: string): boolean {
+		internal.debug('🔍 EvalJobScheduler.removeJob() called with:', spanId);
+		const removed = this.pendingJobs.delete(spanId);
+		internal.debug('🔍 Job removed:', removed);
+		return removed;
+	}
+
+	/**
+	 * Get jobs with optional filtering
+	 */
+	public getJobs(filter?: JobFilter): PendingEvalJob[] {
+		internal.debug('🔍 EvalJobScheduler.getJobs() called with filter:', filter);
+		internal.debug(
+			'📊 Total pending jobs in scheduler:',
+			this.pendingJobs.size
+		);
+
+		let jobs = Array.from(this.pendingJobs.values());
+
+		if (filter?.sessionId) {
+			jobs = jobs.filter((job) => job.sessionId === filter.sessionId);
+			internal.debug(
+				`🔍 Filtered jobs for session ${filter.sessionId}:`,
+				jobs.length
+			);
+		}
+
+		internal.debug('📋 Returning jobs:', jobs.length);
+		if (jobs.length > 0) {
+			jobs.forEach((job, index) => {
+				internal.debug(`📦 Job ${index + 1}:`, {
+					spanId: job.spanId,
+					sessionId: job.sessionId,
+					promptMetadataCount: job.promptMetadata?.length || 0,
+					hasOutput: !!job.output,
+					createdAt: job.createdAt,
+				});
+			});
+		}
+		return jobs;
+	}
+
+	/**
+	 * Print out the whole state of the EvalJobScheduler
+	 */
+	public printState(): void {
+		internal.debug('🔍 EvalJobScheduler.printState() called');
+		internal.debug('🔍 Instance ID:', this.instanceId);
+		internal.debug('🔍 EvalJobScheduler State:');
+		internal.debug('📊 Total jobs:', this.pendingJobs.size);
+		internal.debug('📋 Job IDs:', Array.from(this.pendingJobs.keys()));
+		internal.debug(
+			'📦 All jobs:',
+			JSON.stringify(Array.from(this.pendingJobs.values()), null, 2)
+		);
+		internal.debug(
+			'🔍 Global instance check:',
+			globalThis.__evalJobSchedulerInstance === this
+		);
+	}
+
+	/**
+	 * Get the instance ID
+	 */
+	public getInstanceId(): string {
+		return this.instanceId;
+	}
+}
diff --git a/src/apis/patchportal.ts b/src/apis/patchportal.ts
index 6010b356..31078202 100644
--- a/src/apis/patchportal.ts
+++ b/src/apis/patchportal.ts
@@ -14,7 +14,7 @@ export default class PatchPortal {
 
 	private constructor() {
 		// Private constructor to prevent direct instantiation
-		this.instanceId = `PatchPortal-${Date.now()}-${Math.random().toString(36).substr(2, 9)}`;
+		this.instanceId = `PatchPortal-${Date.now()}-${Math.random().toString(36).substring(2, 9)}`;
 		internal.debug(
 			'🏗️ PatchPortal constructor called, instanceId:',
 			this.instanceId
diff --git a/src/apis/prompt/index.ts b/src/apis/prompt/index.ts
index b08a4a91..4b1228f9 100644
--- a/src/apis/prompt/index.ts
+++ b/src/apis/prompt/index.ts
@@ -88,7 +88,6 @@ export default class PromptAPI {
 
 	// Method to load prompts dynamically (called by context)
 	public async loadPrompts(): Promise<void> {
-		internal.debug('loadPrompts() called');
 		try {
 			// Try multiple possible paths for the generated prompts
 			let generatedModule: unknown;
@@ -97,10 +96,13 @@ export default class PromptAPI {
 			const possiblePaths = await this.resolveGeneratedPaths();
 
 			internal.debug('Trying absolute paths:', possiblePaths);
+			const attemptErrors: string[] = [];
 			for (const possiblePath of possiblePaths) {
 				internal.debug('  Checking:', possiblePath);
 				try {
 					await fs.access(possiblePath);
+					internal.debug('  ✓ File exists:', possiblePath);
+
 					// Get file stats for cache-busting
 					const stats = await fs.stat(possiblePath);
 					const mtime = stats.mtime.getTime();
@@ -110,13 +112,20 @@ export default class PromptAPI {
 
 					// Use ESM dynamic import instead of require
 					generatedModule = await import(fileUrl);
-					internal.debug('  Successfully loaded from:', possiblePath);
+					internal.debug('  ✓ Successfully loaded from:', possiblePath);
 					break;
-				} catch {}
+				} catch (error) {
+					const errorMsg =
+						error instanceof Error ? error.message : String(error);
+					internal.debug(`  ✗ Failed to load ${possiblePath}: ${errorMsg}`);
+					attemptErrors.push(`${possiblePath}: ${errorMsg}`);
+				}
 			}
 
 			if (!generatedModule) {
-				throw new Error('Generated prompts file not found');
+				throw new Error(
+					`Generated prompts file not found. Tried:\n${attemptErrors.join('\n')}`
+				);
 			}
 
 			// Type guard to ensure generatedModule has expected shape
diff --git a/src/index.ts b/src/index.ts
index 7ca7bfdd..f28dda38 100644
--- a/src/index.ts
+++ b/src/index.ts
@@ -1,6 +1,8 @@
+export * from './apis/eval';
 export * from './logger';
 export * from './server';
 export * from './types';
+export { hash, hashSync } from './utils/hash';
 export * from './utils/interpolate';
 export {
 	type PromptAttributes,
@@ -11,10 +13,20 @@ export {
 import DiscordAPI from './apis/discord';
 // Export APIs
 import EmailAPI from './apis/email';
+import EvalAPI from './apis/eval';
+import EvalJobScheduler from './apis/evaljobscheduler';
 import PatchPortal from './apis/patchportal';
 import PromptAPI from './apis/prompt';
 import StreamAPIImpl from './apis/stream';
-export { EmailAPI, DiscordAPI, PatchPortal, PromptAPI, StreamAPIImpl };
+export {
+	EmailAPI,
+	DiscordAPI,
+	EvalAPI,
+	EvalJobScheduler,
+	PatchPortal,
+	PromptAPI,
+	StreamAPIImpl,
+};
 
 import { TeamsActivityHandler } from 'botbuilder';
 import { run, type UserOpenTelemetryConfig } from './autostart';
diff --git a/src/router/context.ts b/src/router/context.ts
index c8dd8828..3b4310ed 100644
--- a/src/router/context.ts
+++ b/src/router/context.ts
@@ -1,11 +1,15 @@
 import {
 	context,
 	SpanStatusCode,
-	trace,
 	type Tracer,
+	trace,
 } from '@opentelemetry/api';
+import EvalAPI from '../apis/eval';
+import EvalJobScheduler from '../apis/evaljobscheduler';
 import { markSessionCompleted } from '../apis/session';
 import type { Logger } from '../logger';
+import { internal } from '../logger/internal';
+import type { PromptAttributes } from '../utils/promptMetadata';
 
 let running = 0;
 export function isIdle(): boolean {
@@ -68,19 +72,32 @@ export default class AgentContextWaitUntilHandler {
 	}
 
 	public async waitUntilAll(logger: Logger, sessionId: string): Promise<void> {
+		internal.debug(`🔍 waitUntilAll() called for session ${sessionId}`);
+
 		if (this.hasCalledWaitUntilAll) {
 			throw new Error('waitUntilAll can only be called once per instance');
 		}
 		this.hasCalledWaitUntilAll = true;
 
 		if (this.promises.length === 0) {
+			internal.debug('No promises to wait for, executing evals directly');
+			await this.executeEvalsForSession(logger, sessionId);
 			return;
 		}
+
+		internal.debug(
+			`⏳ Waiting for ${this.promises.length} promises to complete...`
+		);
 		try {
 			// Promises are already executing, just wait for them to complete
 			await Promise.all(this.promises);
 			const duration = Date.now() - (this.started as number);
+			internal.debug('✅ All promises completed, marking session completed');
 			await markSessionCompleted(sessionId, duration);
+
+			// Execute evals after session completion
+			internal.debug('🚀 Starting eval execution after session completion');
+			await this.executeEvalsForSession(logger, sessionId);
 		} catch (ex) {
 			logger.error('error sending session completed', ex);
 		} finally {
@@ -88,4 +105,141 @@ export default class AgentContextWaitUntilHandler {
 			this.promises.length = 0;
 		}
 	}
+
+	/**
+	 * Execute evals for the completed session
+	 */
+	private async executeEvalsForSession(
+		logger: Logger,
+		sessionId: string
+	): Promise<void> {
+		try {
+			internal.debug(`🔍 Starting eval execution for session ${sessionId}`);
+
+			// Get pending eval jobs for this session
+			internal.debug('🔍 Getting EvalJobScheduler instance...');
+			const evalJobScheduler = await EvalJobScheduler.getInstance();
+			internal.debug('✅ EvalJobScheduler instance obtained');
+
+			internal.debug(`🔍 Querying jobs for session ${sessionId}...`);
+			const jobs = evalJobScheduler.getJobs({ sessionId });
+
+			if (jobs.length === 0) {
+				internal.debug(`📭 No eval jobs found for session ${sessionId}`);
+				return;
+			}
+
+			internal.debug(
+				`📋 Found ${jobs.length} eval jobs for session ${sessionId}`
+			);
+
+			// Load eval metadata map
+			internal.debug('🔧 Loading eval metadata map...');
+			const evalAPI = new EvalAPI();
+			const evalMetadataMap = await evalAPI.loadEvalMetadataMap();
+			internal.debug(`📚 Loaded ${evalMetadataMap.size} eval mappings`);
+
+			// Execute evals for each job
+			let totalEvalsExecuted = 0;
+			for (let i = 0; i < jobs.length; i++) {
+				const job = jobs[i];
+				internal.debug(
+					`🎯 Processing job ${i + 1}/${jobs.length} (spanId: ${job.spanId})`
+				);
+				const evalsInJob = await this.executeEvalsForJob(
+					logger,
+					job,
+					evalAPI,
+					evalMetadataMap
+				);
+				totalEvalsExecuted += evalsInJob;
+				internal.debug(
+					`✅ Completed job ${i + 1}/${jobs.length}: ${evalsInJob} evals executed`
+				);
+			}
+
+			internal.debug(
+				`✅ Completed eval execution for session ${sessionId}: ${totalEvalsExecuted} evals executed`
+			);
+
+			// Clean up completed jobs
+			internal.debug(`🧹 Cleaning up ${jobs.length} completed jobs...`);
+			for (const job of jobs) {
+				evalJobScheduler.removeJob(job.spanId);
+			}
+			internal.debug(`✅ Cleaned up ${jobs.length} completed jobs`);
+		} catch (error) {
+			logger.error('❌ Error executing evals for session:', error);
+		}
+	}
+
+	/**
+	 * Execute evals for a specific job
+	 */
+	private async executeEvalsForJob(
+		logger: Logger,
+		job: {
+			spanId: string;
+			sessionId: string;
+			promptMetadata: PromptAttributes[];
+			input?: string;
+			output?: string;
+		},
+		evalAPI: EvalAPI,
+		evalMetadataMap: Map<string, string>
+	): Promise<number> {
+		let evalsExecuted = 0;
+
+		internal.debug(
+			`🎯 Processing job ${job.spanId} with ${job.promptMetadata.length} prompt metadata entries`
+		);
+
+		for (const promptMeta of job.promptMetadata || []) {
+			if (!promptMeta.evals || promptMeta.evals.length === 0) {
+				logger.debug('⏭️  Skipping prompt metadata with no evals');
+				continue;
+			}
+
+			internal.debug(
+				`📝 Found ${promptMeta.evals.length} evals for prompt: ${promptMeta.evals.join(', ')}`
+			);
+
+			for (const evalSlug of promptMeta.evals) {
+				try {
+					internal.debug(
+						`🚀 Running eval '${evalSlug}' for session ${job.sessionId}`
+					);
+
+					internal.debug(`🔑 Template hash: ${promptMeta.templateHash}`);
+					internal.debug(`🔑 Compiled hash: ${promptMeta.compiledHash}`);
+
+					const result = await evalAPI.runEval(
+						evalSlug,
+						job.input || '',
+						job.output || '',
+						job.sessionId,
+						job.spanId,
+						promptMeta.templateHash
+					);
+
+					if (result.success) {
+						internal.debug(`✅ Successfully executed eval '${evalSlug}'`);
+						evalsExecuted++;
+					} else {
+						logger.warn(
+							`⚠️  Eval '${evalSlug}' completed but returned error: ${result.message}`
+						);
+					}
+				} catch (error) {
+					logger.error(`❌ Failed to execute eval '${evalSlug}':`, error);
+					// Continue with other evals even if one fails
+				}
+			}
+		}
+
+		internal.debug(
+			`📊 Job ${job.spanId} completed: ${evalsExecuted} evals executed`
+		);
+		return evalsExecuted;
+	}
 }
diff --git a/src/router/router.ts b/src/router/router.ts
index abd9109d..12855225 100644
--- a/src/router/router.ts
+++ b/src/router/router.ts
@@ -413,8 +413,14 @@ export function createRouter(config: RouterConfig): ServerRoute['handler'] {
 								throw err;
 							}
 						}
-					).then((r) => {
-						contextHandler.waitUntilAll(logger, sessionId);
+					).then(async (r) => {
+						internal.info(
+							`🔍 Router calling waitUntilAll for session ${sessionId}`
+						);
+						await contextHandler.waitUntilAll(logger, sessionId);
+						internal.info(
+							`✅ Router completed waitUntilAll for session ${sessionId}`
+						);
 						return r;
 					});
 				});
diff --git a/src/server/bun.ts b/src/server/bun.ts
index f339138d..b2aefd5f 100644
--- a/src/server/bun.ts
+++ b/src/server/bun.ts
@@ -1,5 +1,7 @@
 import type { ReadableStream } from 'node:stream/web';
 import { context, SpanKind, SpanStatusCode, trace } from '@opentelemetry/api';
+import EvalJobScheduler from '../apis/evaljobscheduler';
+import { isIdle } from '../router/context';
 import type {
 	AgentResponseData,
 	AgentWelcomeResult,
@@ -17,7 +19,6 @@ import {
 	shouldIgnoreStaticFile,
 	toWelcomePrompt,
 } from './util';
-import { isIdle } from '../router/context';
 
 const idleTimeout = 255; // expressed in seconds
 
@@ -59,6 +60,10 @@ export class BunServer implements Server {
 
 		const devmode = process.env.AGENTUITY_SDK_DEV_MODE === 'true';
 		const { sdkVersion, logger } = this.config;
+
+		// Initialize EvalJobScheduler globally for patches
+		await EvalJobScheduler.getInstance();
+
 		const hostname =
 			process.env.AGENTUITY_ENV === 'development' ? '127.0.0.1' : '0.0.0.0';
 
diff --git a/src/server/node.ts b/src/server/node.ts
index 75193a2d..705e41b2 100644
--- a/src/server/node.ts
+++ b/src/server/node.ts
@@ -5,7 +5,9 @@ import {
 import { Readable } from 'node:stream';
 import type { ReadableStream } from 'node:stream/web';
 import { context, SpanKind, SpanStatusCode, trace } from '@opentelemetry/api';
+import EvalJobScheduler from '../apis/evaljobscheduler';
 import type { Logger } from '../logger';
+import { isIdle } from '../router/context';
 import type { AgentResponseData, AgentWelcomeResult } from '../types';
 import {
 	extractTraceContextFromNodeRequest,
@@ -21,7 +23,6 @@ import {
 	shouldIgnoreStaticFile,
 	toWelcomePrompt,
 } from './util';
-import { isIdle } from '../router/context';
 
 export const MAX_REQUEST_TIMEOUT = 60_000 * 10;
 
@@ -104,6 +105,10 @@ export class NodeServer implements Server {
 	async start(): Promise<void> {
 		const sdkVersion = this.sdkVersion;
 		const devmode = process.env.AGENTUITY_SDK_DEV_MODE === 'true';
+
+		// Initialize EvalJobScheduler globally for patches
+		await EvalJobScheduler.getInstance();
+
 		this.server = createHttpServer(async (req, res) => {
 			if (req.method === 'GET' && req.url === '/_health') {
 				res.writeHead(200, {
diff --git a/src/utils/hash.ts b/src/utils/hash.ts
new file mode 100644
index 00000000..a227d66f
--- /dev/null
+++ b/src/utils/hash.ts
@@ -0,0 +1,29 @@
+import crypto from 'crypto';
+
+/**
+ * Convert an ArrayBuffer to a hexadecimal string
+ */
+function arrayBufferToHex(arrayBuffer: ArrayBuffer): string {
+	const array = Array.from(new Uint8Array(arrayBuffer));
+	const hex = array.map((byte) => byte.toString(16).padStart(2, '0')).join('');
+	return hex;
+}
+
+/**
+ * Hash a string using SHA-256 and return the hexadecimal representation (async)
+ */
+export async function hash(value: string): Promise<string> {
+	const ctBuffer = await crypto.subtle.digest(
+		'SHA-256',
+		new TextEncoder().encode(value)
+	);
+	return arrayBufferToHex(ctBuffer);
+}
+
+/**
+ * Hash a string using SHA-256 and return the hexadecimal representation (sync)
+ * Uses Node.js crypto for synchronous operation
+ */
+export function hashSync(value: string): string {
+	return crypto.createHash('sha256').update(value).digest('hex');
+}
diff --git a/src/utils/promptMetadata.ts b/src/utils/promptMetadata.ts
index da074e64..58f624ea 100644
--- a/src/utils/promptMetadata.ts
+++ b/src/utils/promptMetadata.ts
@@ -1,12 +1,14 @@
 import crypto from 'node:crypto';
 import PatchPortal from '../apis/patchportal.js';
 import { internal } from '../logger/internal';
+import { hashSync } from './hash.js';
 
 export interface PromptAttributesParams {
 	slug: string;
 	compiled: string;
 	template: string;
 	variables?: Record<string, string>;
+	evals?: string[];
 }
 
 export interface PromptAttributes extends PromptAttributesParams {
@@ -26,23 +28,17 @@ export async function processPromptMetadata(
 		template: `${attributes.template?.substring(0, 50)}...`,
 		compiled: `${attributes.compiled?.substring(0, 50)}...`,
 		variables: attributes.variables,
+		evals: attributes.evals,
 	});
 
 	const patchPortal = await PatchPortal.getInstance();
 	internal.debug('✅ PatchPortal instance obtained');
 
 	// Generate hash
-	const templateHash = crypto
-		.createHash('sha256')
-		.update(attributes.template)
-		.digest('hex');
-
+	const templateHash = hashSync(attributes.template);
 	internal.debug('🔑 Template hash:', templateHash);
 
-	const compiledHash = crypto
-		.createHash('sha256')
-		.update(attributes.compiled)
-		.digest('hex');
+	const compiledHash = hashSync(attributes.compiled);
 
 	internal.debug('🔑 Compiled hash:', compiledHash);
 
@@ -57,6 +53,7 @@ export async function processPromptMetadata(
 		slug: metadata.slug,
 		templateHash: metadata.templateHash,
 		compiledHash: metadata.compiledHash,
+		evals: metadata.evals,
 		timestamp: new Date().toISOString(),
 	});
 
diff --git a/test/utils/hash.test.ts b/test/utils/hash.test.ts
new file mode 100644
index 00000000..aca7f640
--- /dev/null
+++ b/test/utils/hash.test.ts
@@ -0,0 +1,61 @@
+import { describe, expect, it } from 'bun:test';
+import { hash, hashSync } from '../../src/utils/hash';
+
+describe('Hash Functions', () => {
+	it('should produce the same hash for both async and sync functions', async () => {
+		const testCases = [
+			'Hello, World!',
+			'',
+			'This is a longer string with special characters: !@#$%^&*()',
+			'Unicode test: 🚀 🌟 🎉',
+			'Multiple\nlines\nwith\ttabs',
+			'Very long string '.repeat(100),
+		];
+
+		for (const testString of testCases) {
+			const asyncHash = await hash(testString);
+			const syncHash = hashSync(testString);
+
+			expect(asyncHash).toBe(syncHash);
+			expect(asyncHash).toMatch(/^[a-f0-9]{64}$/); // SHA-256 produces 64 hex characters
+			expect(syncHash).toMatch(/^[a-f0-9]{64}$/);
+		}
+	});
+
+	it('should produce consistent hashes for the same input', async () => {
+		const testString = 'Consistent hash test';
+
+		const hash1 = await hash(testString);
+		const hash2 = await hash(testString);
+		const syncHash1 = hashSync(testString);
+		const syncHash2 = hashSync(testString);
+
+		expect(hash1).toBe(hash2);
+		expect(syncHash1).toBe(syncHash2);
+		expect(hash1).toBe(syncHash1);
+	});
+
+	it('should produce different hashes for different inputs', async () => {
+		const string1 = 'Hello';
+		const string2 = 'World';
+
+		const hash1 = await hash(string1);
+		const hash2 = await hash(string2);
+		const syncHash1 = hashSync(string1);
+		const syncHash2 = hashSync(string2);
+
+		expect(hash1).not.toBe(hash2);
+		expect(syncHash1).not.toBe(syncHash2);
+		expect(hash1).toBe(syncHash1);
+		expect(hash2).toBe(syncHash2);
+	});
+
+	it('should handle empty string', async () => {
+		const emptyString = '';
+		const asyncHash = await hash(emptyString);
+		const syncHash = hashSync(emptyString);
+
+		expect(asyncHash).toBe(syncHash);
+		expect(asyncHash).toMatch(/^[a-f0-9]{64}$/);
+	});
+});