diff --git a/.gitignore b/.gitignore index 6d9877f9..bb7659aa 100644 --- a/.gitignore +++ b/.gitignore @@ -3,4 +3,5 @@ node_modules *.tgz /testagent .vscode -.opencode \ No newline at end of file +.opencode +.DS_Store \ No newline at end of file diff --git a/CHANGELOG.md b/CHANGELOG.md index c08ec028..344ecf74 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,11 @@ # @agentuity/sdk Changelog +## 0.0.157 + +### Patch Changes + +- Add support for eval running + ## [0.0.156] - 2025-10-15 ### Added diff --git a/package-lock.json b/package-lock.json index 812e80a0..cc09753f 100644 --- a/package-lock.json +++ b/package-lock.json @@ -1,12 +1,12 @@ { "name": "@agentuity/sdk", - "version": "0.0.156", + "version": "0.0.157", "lockfileVersion": 3, "requires": true, "packages": { "": { "name": "@agentuity/sdk", - "version": "0.0.156", + "version": "0.0.157", "license": "Apache-2.0", "dependencies": { "@opentelemetry/api": "^1.9.0", @@ -6824,6 +6824,7 @@ "version": "2.3.3", "resolved": "https://registry.npmjs.org/fsevents/-/fsevents-2.3.3.tgz", "integrity": "sha512-5xoDfX+fL7faATnagmWPpbFtwh/R77WmMMqqHGS65C3vvB0YHrgF+B1YmZ3441tMj5n63k0212XNoJwzlhffQw==", + "dev": true, "hasInstallScript": true, "license": "MIT", "optional": true, diff --git a/package.json b/package.json index bb17c23d..00367907 100644 --- a/package.json +++ b/package.json @@ -1,6 +1,6 @@ { "name": "@agentuity/sdk", - "version": "0.0.156", + "version": "0.0.157", "description": "The Agentuity SDK for NodeJS and Bun", "license": "Apache-2.0", "public": true, @@ -97,4 +97,4 @@ "mailparser": "^3.7.4", "nodemailer": "^7.0.3" } -} +} \ No newline at end of file diff --git a/src/apis/api.ts b/src/apis/api.ts index 024ac48f..3991f111 100644 --- a/src/apis/api.ts +++ b/src/apis/api.ts @@ -29,7 +29,12 @@ interface ApiRequestWithUrl { type ApiRequestOptions = ApiRequestWithPath | ApiRequestWithUrl; -export type ServiceName = 'vector' | 'keyvalue' | 'stream' | 'objectstore'; +export type ServiceName = + | 'vector' + | 'keyvalue' + | 'stream' + | 'objectstore' + | 'eval'; interface ApiRequestBase { method: 'POST' | 'GET' | 'PUT' | 'DELETE'; @@ -102,6 +107,10 @@ export const getBaseUrlForService = (service?: ServiceName) => { process.env.AGENTUITY_OBJECTSTORE_URL || process.env.AGENTUITY_TRANSPORT_URL; break; + case 'eval': + value = + process.env.AGENTUITY_EVAL_URL || process.env.AGENTUITY_TRANSPORT_URL; + break; default: break; } diff --git a/src/apis/eval.ts b/src/apis/eval.ts new file mode 100644 index 00000000..106eb751 --- /dev/null +++ b/src/apis/eval.ts @@ -0,0 +1,467 @@ +import fs from 'node:fs'; +import path from 'node:path'; +import { pathToFileURL } from 'node:url'; +import { internal } from '../logger/internal'; +import { POST } from './api'; + +// Eval SDK types +export interface EvalRequest { + input: string; + output: string; + sessionId: string; +} + +export interface EvalResponse { + pass: ( + value: boolean, + metadata?: { reasoning?: string; [key: string]: unknown } + ) => void; + score: ( + value: number, + metadata?: { reasoning?: string; [key: string]: unknown } + ) => void; // 0 to 1 +} + +export interface EvalContext { + // optional for now + [key: string]: unknown; +} +export type EvalRunResultMetadata = { + reason: string; + // biome-ignore lint/suspicious/noExplicitAny: metadata can contain any type of data + [key: string]: any; +}; + +type BaseEvalRunResult = { + success: boolean; + metadata?: EvalRunResultMetadata; +}; + +export type EvalRunResultBinary = BaseEvalRunResult & { + success: true; + passed: boolean; + metadata: EvalRunResultMetadata; +}; + +export type EvalRunResultScore = BaseEvalRunResult & { + success: true; + score: number; // 0-1 range + metadata: EvalRunResultMetadata; +}; + +export type EvalRunResultError = BaseEvalRunResult & { + success: false; + error: string; +}; + +export type EvalRunResult = + | EvalRunResultBinary + | EvalRunResultScore + | EvalRunResultError; + +export type CreateEvalRunRequest = { + projectId: string; + sessionId: string; + spanId: string; + result: EvalRunResult; + evalId: string; + promptHash?: string; +}; + +type EvalFunction = ( + ctx: EvalContext, + req: EvalRequest, + res: EvalResponse +) => Promise; + +type CreateEvalRunResponse = + | { + success: true; + data: { + id: string; + }; + } + | { + success: false; + message: string; + }; + +export default class EvalAPI { + private evalsDir: string; + private isBundled: boolean; + + constructor(evalsDir?: string) { + // Check if we're running from bundled code (.agentuity directory) + const bundledDir = path.join(process.cwd(), '.agentuity', 'src', 'evals'); + const sourceDir = path.join(process.cwd(), 'src', 'evals'); + this.isBundled = fs.existsSync(bundledDir); + + // Use .agentuity/src/evals for bundled code, src/evals for development + this.evalsDir = evalsDir || (this.isBundled ? bundledDir : sourceDir); + + internal.debug( + `EvalAPI initialized with evalsDir: ${this.evalsDir}, isBundled: ${this.isBundled}` + ); + } + + /** + * Load eval function and metadata by name/slug + * Scans through all eval files to find the one with matching slug + */ + async loadEvalByName(evalName: string): Promise<{ + evalFn: EvalFunction; + metadata: { id: string; slug: string; name: string; description: string }; + }> { + internal.debug(`Loading eval by name: ${evalName}`); + + try { + // Get all files in the evals directory + const files = fs.readdirSync(this.evalsDir); + + for (const file of files) { + // Skip index files and non-eval files + if (file === 'index.ts' || file === 'index.js') { + continue; + } + + // Check file extension based on bundled state + const expectedExt = this.isBundled ? '.js' : '.ts'; + if (!file.endsWith(expectedExt)) { + continue; + } + + const filePath = path.join(this.evalsDir, file); + + try { + // Convert to file URL for proper ESM import + const fileUrl = pathToFileURL(filePath).href; + const module = await import(fileUrl); + + // Check if this module has the matching slug + if (module.metadata && module.metadata.slug === evalName) { + internal.debug(`Found eval with slug ${evalName} in file ${file}`); + return { + evalFn: module.default, + metadata: module.metadata, + }; + } + } catch (error) { + // Skip files that can't be imported (might not be eval files) + internal.debug(`Skipping file ${file} due to import error: ${error}`); + } + } + + throw new Error(`No eval found with slug: ${evalName}`); + } catch (error) { + throw new Error(`Failed to load eval by name ${evalName}: ${error}`); + } + } + + /** + * Load eval function and metadata by ID + * Scans through all eval files to find the one with matching ID + */ + async loadEvalById(evalId: string): Promise<{ + evalFn: EvalFunction; + metadata?: { id: string; slug: string; name: string; description: string }; + }> { + internal.debug(`Loading eval by ID: ${evalId}`); + + try { + // Get all files in the evals directory + const files = fs.readdirSync(this.evalsDir); + + for (const file of files) { + // Skip index files and non-eval files + if (file === 'index.ts' || file === 'index.js') { + continue; + } + + // Check file extension based on bundled state + const expectedExt = this.isBundled ? '.js' : '.ts'; + if (!file.endsWith(expectedExt)) { + continue; + } + + const filePath = path.join(this.evalsDir, file); + + try { + // Convert to file URL for proper ESM import + const fileUrl = pathToFileURL(filePath).href; + const module = await import(fileUrl); + + // Check if this module has the matching ID + if (module.metadata && module.metadata.id === evalId) { + internal.debug(`Found eval with ID ${evalId} in file ${file}`); + return { + evalFn: module.default, + metadata: module.metadata, + }; + } + } catch (error) { + // Skip files that can't be imported (might not be eval files) + internal.debug(`Skipping file ${file} due to import error: ${error}`); + } + } + + throw new Error(`No eval found with ID: ${evalId}`); + } catch (error) { + throw new Error(`Failed to load eval by ID ${evalId}: ${error}`); + } + } + + /** + * Run eval with input/output/sessionId/spanId + */ + async runEval( + evalName: string, + input: string, + output: string, + sessionId: string, + spanId: string, + promptHash?: string + ): Promise { + internal.debug(`Running eval ${evalName} for session ${sessionId}`); + // Get project ID from environment + const projectId = process.env.AGENTUITY_CLOUD_PROJECT_ID || ''; + + const request: EvalRequest = { + input, + output, + sessionId, + }; + + let createEvalRunRequest: CreateEvalRunRequest | null = null; + const evalContext: EvalContext = {}; + + // Load and run eval function + internal.debug('loading eval function'); + + try { + // Load eval by name/slug + const { evalFn, metadata } = await this.loadEvalByName(evalName); + + if (!metadata?.id) { + throw new Error('Eval metadata not found'); + } + + const response: EvalResponse = { + pass: ( + value: boolean, + meta?: { reasoning?: string; [key: string]: unknown } + ) => { + createEvalRunRequest = { + projectId, + sessionId, + spanId, + result: { + success: true, + passed: value, + metadata: { + reason: meta?.reasoning || '', + ...meta, + }, + }, + evalId: metadata.id, + promptHash, + }; + }, + score: ( + val: number, + meta?: { reasoning?: string; [key: string]: unknown } + ) => { + createEvalRunRequest = { + projectId, + sessionId, + spanId, + result: { + success: true, + score: val, + metadata: { + reason: meta?.reasoning || '', + ...meta, + }, + }, + evalId: metadata.id, + promptHash, + }; + }, + }; + + await evalFn(evalContext, request, response); + + // If no result was set, create an error result + if (!createEvalRunRequest) { + throw new Error('Eval function did not call res.pass() or res.score()'); + } + + const r = createEvalRunRequest as CreateEvalRunRequest; + + internal.debug(`โœ… Eval '${evalName}' completed successfully: %j`, { + resultType: r.result.success ? 'success' : 'error', + passed: 'passed' in r.result ? r.result.passed : undefined, + score: 'score' in r.result ? r.result.score : undefined, + metadata: r.result.metadata, + }); + const resp = await POST( + '/evalrun/finish', + JSON.stringify(createEvalRunRequest), + { + 'Content-Type': 'application/json', + }, + undefined, + undefined, + 'eval' + ); + + if (!resp.status) { + internal.debug('Failed to update the database with the eval run', { + status: resp.status, + evalName, + evalId: r.evalId, + sessionId: r.sessionId, + }); + } else { + internal.debug('Eval run updated in the database', { + status: resp.status, + evalName, + evalId: r.evalId, + sessionId: r.sessionId, + }); + } + + return { + success: true, + data: { + id: `local-${Date.now()}-${Math.random().toString(36).substring(2, 9)}`, + }, + }; + } catch (error) { + // Return error result if eval function throws + return { + success: false, + message: error instanceof Error ? error.message : String(error), + }; + } + } + + /** + * Load eval metadata map from eval files (slug -> ID mapping) + * Scans through all eval files to find metadata and build mapping + */ + async loadEvalMetadataMap(): Promise> { + internal.debug(`๐Ÿ” Loading eval metadata map from: ${this.evalsDir}`); + + // Check if evals directory exists + if (!fs.existsSync(this.evalsDir)) { + internal.debug(`๐Ÿ“ Evals directory not found: ${this.evalsDir}`); + return new Map(); + } + + const files = fs.readdirSync(this.evalsDir); + const slugToIDMap = new Map(); + let processedFiles = 0; + + internal.debug(`๐Ÿ“‚ Scanning ${files.length} files in evals directory`); + + for (const file of files) { + const ext = path.extname(file); + if ( + file === 'index.ts' || + file === 'index.js' || + (ext !== '.ts' && ext !== '.js') + ) { + internal.debug(`โญ๏ธ Skipping file: ${file}`); + continue; + } + + const filePath = path.join(this.evalsDir, file); + processedFiles++; + + try { + const content = fs.readFileSync(filePath, 'utf-8'); + const metadata = this.parseEvalMetadata(content); + + if (metadata && metadata.slug && metadata.id) { + slugToIDMap.set(metadata.slug, metadata.id); + internal.debug( + `โœ… Mapped eval slug '${metadata.slug}' to ID '${metadata.id}' from ${file}` + ); + } else { + internal.debug(`โš ๏ธ No valid metadata found in ${file}`); + } + } catch (error) { + internal.warn(`โŒ Failed to parse metadata from ${file}: ${error}`); + } + } + + internal.debug( + `๐Ÿ“š Loaded ${slugToIDMap.size} eval mappings from ${processedFiles} files` + ); + return slugToIDMap; + } + + /** + * Parse eval metadata from file content + * Similar to CLI's ParseEvalMetadata but in TypeScript + */ + private parseEvalMetadata( + content: string + ): { id: string; slug: string; name: string; description: string } | null { + // Find the metadata export pattern + const metadataRegex = /export\s+const\s+metadata\s*=\s*\{/; + const metadataMatch = content.match(metadataRegex); + if (!metadataMatch) { + return null; + } + + // Find the opening brace position + const braceStart = metadataMatch.index! + metadataMatch[0].length - 1; + if (braceStart >= content.length || content[braceStart] !== '{') { + return null; + } + + // Count braces to find the matching closing brace + let braceCount = 0; + let braceEnd = -1; + for (let i = braceStart; i < content.length; i++) { + if (content[i] === '{') { + braceCount++; + } else if (content[i] === '}') { + braceCount--; + if (braceCount === 0) { + braceEnd = i; + break; + } + } + } + + if (braceEnd === -1) { + return null; + } + + // Extract the object content + const objectContent = content.slice(braceStart, braceEnd + 1); + + // Replace single quotes with double quotes for valid JSON + let jsonStr = objectContent.replace(/'([^']*)'/g, '"$1"'); + + // Clean up the JSON string + jsonStr = jsonStr.replace(/\s+/g, ' '); + jsonStr = jsonStr.replace(/\s*{\s*/g, '{'); + jsonStr = jsonStr.replace(/\s*}\s*/g, '}'); + jsonStr = jsonStr.replace(/\s*:\s*/g, ':'); + jsonStr = jsonStr.replace(/\s*,\s*/g, ','); + jsonStr = jsonStr.replace(/,\s*}/g, '}'); + + // Quote the object keys + jsonStr = jsonStr.replace(/(\w+):/g, '"$1":'); + + try { + return JSON.parse(jsonStr); + } catch (error) { + internal.debug(`Failed to parse metadata JSON: ${error}`); + return null; + } + } +} diff --git a/src/apis/evaljobscheduler.ts b/src/apis/evaljobscheduler.ts new file mode 100644 index 00000000..2fedb322 --- /dev/null +++ b/src/apis/evaljobscheduler.ts @@ -0,0 +1,176 @@ +import { internal } from '../logger/internal'; +import type { PromptAttributes } from '../utils/promptMetadata'; + +// Global instance storage to ensure true singleton across all module contexts +declare global { + var __evalJobSchedulerInstance: EvalJobScheduler | undefined; +} + +export interface PendingEvalJob { + spanId: string; + sessionId: string; + promptMetadata: PromptAttributes[]; + output?: string; + createdAt: string; +} + +export interface JobFilter { + sessionId?: string; +} + +/** + * Singleton class for EvalJobScheduler + */ +export default class EvalJobScheduler { + private pendingJobs: Map = new Map(); + private instanceId: string; + + private constructor() { + // Private constructor to prevent direct instantiation + this.instanceId = `EvalJobScheduler-${Date.now()}-${Math.random().toString(36).substr(2, 9)}`; + internal.debug( + '๐Ÿ—๏ธ EvalJobScheduler constructor called, instanceId:', + this.instanceId + ); + } + + /** + * Get the singleton instance of EvalJobScheduler + */ + public static async getInstance(): Promise { + internal.debug('๐Ÿ” EvalJobScheduler.getInstance() called'); + internal.debug( + '๐Ÿ” globalThis.__evalJobSchedulerInstance exists:', + !!globalThis.__evalJobSchedulerInstance + ); + + if (!globalThis.__evalJobSchedulerInstance) { + globalThis.__evalJobSchedulerInstance = new EvalJobScheduler(); + internal.debug( + '๐Ÿ†• Created new EvalJobScheduler instance, ID:', + globalThis.__evalJobSchedulerInstance.instanceId + ); + } else { + internal.debug( + 'โ™ป๏ธ Returning existing EvalJobScheduler instance, ID:', + globalThis.__evalJobSchedulerInstance.instanceId + ); + } + return globalThis.__evalJobSchedulerInstance; + } + + /** + * Create a new eval job + */ + public createJob( + spanId: string, + sessionId: string, + promptMetadata: PromptAttributes[] + ): string { + internal.debug('๐Ÿ” EvalJobScheduler.createJob() called with:', { + spanId, + sessionId, + promptMetadataCount: promptMetadata.length, + }); + + const now = new Date().toISOString(); + + // Check if job already exists + if (this.pendingJobs.has(spanId)) { + internal.debug('โš ๏ธ Job already exists, overwriting:', spanId); + } + + // Count total evals across all prompt metadata + const totalEvals = promptMetadata.reduce( + (count, meta) => count + (meta.evals?.length || 0), + 0 + ); + internal.debug( + `๐Ÿ“ฆ Creating eval job ${spanId} for session ${sessionId} with ${totalEvals} evals` + ); + + const job: PendingEvalJob = { + spanId, + sessionId, + promptMetadata, + createdAt: now, + }; + + this.pendingJobs.set(spanId, job); + internal.debug('โœ… Job created successfully:', spanId); + internal.debug('๐Ÿ“Š Total jobs:', this.pendingJobs.size); + + return spanId; + } + + /** + * Remove a job + */ + public removeJob(spanId: string): boolean { + internal.debug('๐Ÿ” EvalJobScheduler.removeJob() called with:', spanId); + const removed = this.pendingJobs.delete(spanId); + internal.debug('๐Ÿ” Job removed:', removed); + return removed; + } + + /** + * Get jobs with optional filtering + */ + public getJobs(filter?: JobFilter): PendingEvalJob[] { + internal.debug('๐Ÿ” EvalJobScheduler.getJobs() called with filter:', filter); + internal.debug( + '๐Ÿ“Š Total pending jobs in scheduler:', + this.pendingJobs.size + ); + + let jobs = Array.from(this.pendingJobs.values()); + + if (filter?.sessionId) { + jobs = jobs.filter((job) => job.sessionId === filter.sessionId); + internal.debug( + `๐Ÿ” Filtered jobs for session ${filter.sessionId}:`, + jobs.length + ); + } + + internal.debug('๐Ÿ“‹ Returning jobs:', jobs.length); + if (jobs.length > 0) { + jobs.forEach((job, index) => { + internal.debug(`๐Ÿ“ฆ Job ${index + 1}:`, { + spanId: job.spanId, + sessionId: job.sessionId, + promptMetadataCount: job.promptMetadata?.length || 0, + hasOutput: !!job.output, + createdAt: job.createdAt, + }); + }); + } + return jobs; + } + + /** + * Print out the whole state of the EvalJobScheduler + */ + public printState(): void { + internal.debug('๐Ÿ” EvalJobScheduler.printState() called'); + internal.debug('๐Ÿ” Instance ID:', this.instanceId); + internal.debug('๐Ÿ” EvalJobScheduler State:'); + internal.debug('๐Ÿ“Š Total jobs:', this.pendingJobs.size); + internal.debug('๐Ÿ“‹ Job IDs:', Array.from(this.pendingJobs.keys())); + internal.debug( + '๐Ÿ“ฆ All jobs:', + JSON.stringify(Array.from(this.pendingJobs.values()), null, 2) + ); + internal.debug( + '๐Ÿ” Global instance check:', + globalThis.__evalJobSchedulerInstance === this + ); + } + + /** + * Get the instance ID + */ + public getInstanceId(): string { + return this.instanceId; + } +} diff --git a/src/apis/patchportal.ts b/src/apis/patchportal.ts index 6010b356..31078202 100644 --- a/src/apis/patchportal.ts +++ b/src/apis/patchportal.ts @@ -14,7 +14,7 @@ export default class PatchPortal { private constructor() { // Private constructor to prevent direct instantiation - this.instanceId = `PatchPortal-${Date.now()}-${Math.random().toString(36).substr(2, 9)}`; + this.instanceId = `PatchPortal-${Date.now()}-${Math.random().toString(36).substring(2, 9)}`; internal.debug( '๐Ÿ—๏ธ PatchPortal constructor called, instanceId:', this.instanceId diff --git a/src/apis/prompt/index.ts b/src/apis/prompt/index.ts index b08a4a91..4b1228f9 100644 --- a/src/apis/prompt/index.ts +++ b/src/apis/prompt/index.ts @@ -88,7 +88,6 @@ export default class PromptAPI { // Method to load prompts dynamically (called by context) public async loadPrompts(): Promise { - internal.debug('loadPrompts() called'); try { // Try multiple possible paths for the generated prompts let generatedModule: unknown; @@ -97,10 +96,13 @@ export default class PromptAPI { const possiblePaths = await this.resolveGeneratedPaths(); internal.debug('Trying absolute paths:', possiblePaths); + const attemptErrors: string[] = []; for (const possiblePath of possiblePaths) { internal.debug(' Checking:', possiblePath); try { await fs.access(possiblePath); + internal.debug(' โœ“ File exists:', possiblePath); + // Get file stats for cache-busting const stats = await fs.stat(possiblePath); const mtime = stats.mtime.getTime(); @@ -110,13 +112,20 @@ export default class PromptAPI { // Use ESM dynamic import instead of require generatedModule = await import(fileUrl); - internal.debug(' Successfully loaded from:', possiblePath); + internal.debug(' โœ“ Successfully loaded from:', possiblePath); break; - } catch {} + } catch (error) { + const errorMsg = + error instanceof Error ? error.message : String(error); + internal.debug(` โœ— Failed to load ${possiblePath}: ${errorMsg}`); + attemptErrors.push(`${possiblePath}: ${errorMsg}`); + } } if (!generatedModule) { - throw new Error('Generated prompts file not found'); + throw new Error( + `Generated prompts file not found. Tried:\n${attemptErrors.join('\n')}` + ); } // Type guard to ensure generatedModule has expected shape diff --git a/src/index.ts b/src/index.ts index 7ca7bfdd..f28dda38 100644 --- a/src/index.ts +++ b/src/index.ts @@ -1,6 +1,8 @@ +export * from './apis/eval'; export * from './logger'; export * from './server'; export * from './types'; +export { hash, hashSync } from './utils/hash'; export * from './utils/interpolate'; export { type PromptAttributes, @@ -11,10 +13,20 @@ export { import DiscordAPI from './apis/discord'; // Export APIs import EmailAPI from './apis/email'; +import EvalAPI from './apis/eval'; +import EvalJobScheduler from './apis/evaljobscheduler'; import PatchPortal from './apis/patchportal'; import PromptAPI from './apis/prompt'; import StreamAPIImpl from './apis/stream'; -export { EmailAPI, DiscordAPI, PatchPortal, PromptAPI, StreamAPIImpl }; +export { + EmailAPI, + DiscordAPI, + EvalAPI, + EvalJobScheduler, + PatchPortal, + PromptAPI, + StreamAPIImpl, +}; import { TeamsActivityHandler } from 'botbuilder'; import { run, type UserOpenTelemetryConfig } from './autostart'; diff --git a/src/router/context.ts b/src/router/context.ts index c8dd8828..3b4310ed 100644 --- a/src/router/context.ts +++ b/src/router/context.ts @@ -1,11 +1,15 @@ import { context, SpanStatusCode, - trace, type Tracer, + trace, } from '@opentelemetry/api'; +import EvalAPI from '../apis/eval'; +import EvalJobScheduler from '../apis/evaljobscheduler'; import { markSessionCompleted } from '../apis/session'; import type { Logger } from '../logger'; +import { internal } from '../logger/internal'; +import type { PromptAttributes } from '../utils/promptMetadata'; let running = 0; export function isIdle(): boolean { @@ -68,19 +72,32 @@ export default class AgentContextWaitUntilHandler { } public async waitUntilAll(logger: Logger, sessionId: string): Promise { + internal.debug(`๐Ÿ” waitUntilAll() called for session ${sessionId}`); + if (this.hasCalledWaitUntilAll) { throw new Error('waitUntilAll can only be called once per instance'); } this.hasCalledWaitUntilAll = true; if (this.promises.length === 0) { + internal.debug('No promises to wait for, executing evals directly'); + await this.executeEvalsForSession(logger, sessionId); return; } + + internal.debug( + `โณ Waiting for ${this.promises.length} promises to complete...` + ); try { // Promises are already executing, just wait for them to complete await Promise.all(this.promises); const duration = Date.now() - (this.started as number); + internal.debug('โœ… All promises completed, marking session completed'); await markSessionCompleted(sessionId, duration); + + // Execute evals after session completion + internal.debug('๐Ÿš€ Starting eval execution after session completion'); + await this.executeEvalsForSession(logger, sessionId); } catch (ex) { logger.error('error sending session completed', ex); } finally { @@ -88,4 +105,141 @@ export default class AgentContextWaitUntilHandler { this.promises.length = 0; } } + + /** + * Execute evals for the completed session + */ + private async executeEvalsForSession( + logger: Logger, + sessionId: string + ): Promise { + try { + internal.debug(`๐Ÿ” Starting eval execution for session ${sessionId}`); + + // Get pending eval jobs for this session + internal.debug('๐Ÿ” Getting EvalJobScheduler instance...'); + const evalJobScheduler = await EvalJobScheduler.getInstance(); + internal.debug('โœ… EvalJobScheduler instance obtained'); + + internal.debug(`๐Ÿ” Querying jobs for session ${sessionId}...`); + const jobs = evalJobScheduler.getJobs({ sessionId }); + + if (jobs.length === 0) { + internal.debug(`๐Ÿ“ญ No eval jobs found for session ${sessionId}`); + return; + } + + internal.debug( + `๐Ÿ“‹ Found ${jobs.length} eval jobs for session ${sessionId}` + ); + + // Load eval metadata map + internal.debug('๐Ÿ”ง Loading eval metadata map...'); + const evalAPI = new EvalAPI(); + const evalMetadataMap = await evalAPI.loadEvalMetadataMap(); + internal.debug(`๐Ÿ“š Loaded ${evalMetadataMap.size} eval mappings`); + + // Execute evals for each job + let totalEvalsExecuted = 0; + for (let i = 0; i < jobs.length; i++) { + const job = jobs[i]; + internal.debug( + `๐ŸŽฏ Processing job ${i + 1}/${jobs.length} (spanId: ${job.spanId})` + ); + const evalsInJob = await this.executeEvalsForJob( + logger, + job, + evalAPI, + evalMetadataMap + ); + totalEvalsExecuted += evalsInJob; + internal.debug( + `โœ… Completed job ${i + 1}/${jobs.length}: ${evalsInJob} evals executed` + ); + } + + internal.debug( + `โœ… Completed eval execution for session ${sessionId}: ${totalEvalsExecuted} evals executed` + ); + + // Clean up completed jobs + internal.debug(`๐Ÿงน Cleaning up ${jobs.length} completed jobs...`); + for (const job of jobs) { + evalJobScheduler.removeJob(job.spanId); + } + internal.debug(`โœ… Cleaned up ${jobs.length} completed jobs`); + } catch (error) { + logger.error('โŒ Error executing evals for session:', error); + } + } + + /** + * Execute evals for a specific job + */ + private async executeEvalsForJob( + logger: Logger, + job: { + spanId: string; + sessionId: string; + promptMetadata: PromptAttributes[]; + input?: string; + output?: string; + }, + evalAPI: EvalAPI, + evalMetadataMap: Map + ): Promise { + let evalsExecuted = 0; + + internal.debug( + `๐ŸŽฏ Processing job ${job.spanId} with ${job.promptMetadata.length} prompt metadata entries` + ); + + for (const promptMeta of job.promptMetadata || []) { + if (!promptMeta.evals || promptMeta.evals.length === 0) { + logger.debug('โญ๏ธ Skipping prompt metadata with no evals'); + continue; + } + + internal.debug( + `๐Ÿ“ Found ${promptMeta.evals.length} evals for prompt: ${promptMeta.evals.join(', ')}` + ); + + for (const evalSlug of promptMeta.evals) { + try { + internal.debug( + `๐Ÿš€ Running eval '${evalSlug}' for session ${job.sessionId}` + ); + + internal.debug(`๐Ÿ”‘ Template hash: ${promptMeta.templateHash}`); + internal.debug(`๐Ÿ”‘ Compiled hash: ${promptMeta.compiledHash}`); + + const result = await evalAPI.runEval( + evalSlug, + job.input || '', + job.output || '', + job.sessionId, + job.spanId, + promptMeta.templateHash + ); + + if (result.success) { + internal.debug(`โœ… Successfully executed eval '${evalSlug}'`); + evalsExecuted++; + } else { + logger.warn( + `โš ๏ธ Eval '${evalSlug}' completed but returned error: ${result.message}` + ); + } + } catch (error) { + logger.error(`โŒ Failed to execute eval '${evalSlug}':`, error); + // Continue with other evals even if one fails + } + } + } + + internal.debug( + `๐Ÿ“Š Job ${job.spanId} completed: ${evalsExecuted} evals executed` + ); + return evalsExecuted; + } } diff --git a/src/router/router.ts b/src/router/router.ts index abd9109d..12855225 100644 --- a/src/router/router.ts +++ b/src/router/router.ts @@ -413,8 +413,14 @@ export function createRouter(config: RouterConfig): ServerRoute['handler'] { throw err; } } - ).then((r) => { - contextHandler.waitUntilAll(logger, sessionId); + ).then(async (r) => { + internal.info( + `๐Ÿ” Router calling waitUntilAll for session ${sessionId}` + ); + await contextHandler.waitUntilAll(logger, sessionId); + internal.info( + `โœ… Router completed waitUntilAll for session ${sessionId}` + ); return r; }); }); diff --git a/src/server/bun.ts b/src/server/bun.ts index f339138d..b2aefd5f 100644 --- a/src/server/bun.ts +++ b/src/server/bun.ts @@ -1,5 +1,7 @@ import type { ReadableStream } from 'node:stream/web'; import { context, SpanKind, SpanStatusCode, trace } from '@opentelemetry/api'; +import EvalJobScheduler from '../apis/evaljobscheduler'; +import { isIdle } from '../router/context'; import type { AgentResponseData, AgentWelcomeResult, @@ -17,7 +19,6 @@ import { shouldIgnoreStaticFile, toWelcomePrompt, } from './util'; -import { isIdle } from '../router/context'; const idleTimeout = 255; // expressed in seconds @@ -59,6 +60,10 @@ export class BunServer implements Server { const devmode = process.env.AGENTUITY_SDK_DEV_MODE === 'true'; const { sdkVersion, logger } = this.config; + + // Initialize EvalJobScheduler globally for patches + await EvalJobScheduler.getInstance(); + const hostname = process.env.AGENTUITY_ENV === 'development' ? '127.0.0.1' : '0.0.0.0'; diff --git a/src/server/node.ts b/src/server/node.ts index 75193a2d..705e41b2 100644 --- a/src/server/node.ts +++ b/src/server/node.ts @@ -5,7 +5,9 @@ import { import { Readable } from 'node:stream'; import type { ReadableStream } from 'node:stream/web'; import { context, SpanKind, SpanStatusCode, trace } from '@opentelemetry/api'; +import EvalJobScheduler from '../apis/evaljobscheduler'; import type { Logger } from '../logger'; +import { isIdle } from '../router/context'; import type { AgentResponseData, AgentWelcomeResult } from '../types'; import { extractTraceContextFromNodeRequest, @@ -21,7 +23,6 @@ import { shouldIgnoreStaticFile, toWelcomePrompt, } from './util'; -import { isIdle } from '../router/context'; export const MAX_REQUEST_TIMEOUT = 60_000 * 10; @@ -104,6 +105,10 @@ export class NodeServer implements Server { async start(): Promise { const sdkVersion = this.sdkVersion; const devmode = process.env.AGENTUITY_SDK_DEV_MODE === 'true'; + + // Initialize EvalJobScheduler globally for patches + await EvalJobScheduler.getInstance(); + this.server = createHttpServer(async (req, res) => { if (req.method === 'GET' && req.url === '/_health') { res.writeHead(200, { diff --git a/src/utils/hash.ts b/src/utils/hash.ts new file mode 100644 index 00000000..a227d66f --- /dev/null +++ b/src/utils/hash.ts @@ -0,0 +1,29 @@ +import crypto from 'crypto'; + +/** + * Convert an ArrayBuffer to a hexadecimal string + */ +function arrayBufferToHex(arrayBuffer: ArrayBuffer): string { + const array = Array.from(new Uint8Array(arrayBuffer)); + const hex = array.map((byte) => byte.toString(16).padStart(2, '0')).join(''); + return hex; +} + +/** + * Hash a string using SHA-256 and return the hexadecimal representation (async) + */ +export async function hash(value: string): Promise { + const ctBuffer = await crypto.subtle.digest( + 'SHA-256', + new TextEncoder().encode(value) + ); + return arrayBufferToHex(ctBuffer); +} + +/** + * Hash a string using SHA-256 and return the hexadecimal representation (sync) + * Uses Node.js crypto for synchronous operation + */ +export function hashSync(value: string): string { + return crypto.createHash('sha256').update(value).digest('hex'); +} diff --git a/src/utils/promptMetadata.ts b/src/utils/promptMetadata.ts index da074e64..58f624ea 100644 --- a/src/utils/promptMetadata.ts +++ b/src/utils/promptMetadata.ts @@ -1,12 +1,14 @@ import crypto from 'node:crypto'; import PatchPortal from '../apis/patchportal.js'; import { internal } from '../logger/internal'; +import { hashSync } from './hash.js'; export interface PromptAttributesParams { slug: string; compiled: string; template: string; variables?: Record; + evals?: string[]; } export interface PromptAttributes extends PromptAttributesParams { @@ -26,23 +28,17 @@ export async function processPromptMetadata( template: `${attributes.template?.substring(0, 50)}...`, compiled: `${attributes.compiled?.substring(0, 50)}...`, variables: attributes.variables, + evals: attributes.evals, }); const patchPortal = await PatchPortal.getInstance(); internal.debug('โœ… PatchPortal instance obtained'); // Generate hash - const templateHash = crypto - .createHash('sha256') - .update(attributes.template) - .digest('hex'); - + const templateHash = hashSync(attributes.template); internal.debug('๐Ÿ”‘ Template hash:', templateHash); - const compiledHash = crypto - .createHash('sha256') - .update(attributes.compiled) - .digest('hex'); + const compiledHash = hashSync(attributes.compiled); internal.debug('๐Ÿ”‘ Compiled hash:', compiledHash); @@ -57,6 +53,7 @@ export async function processPromptMetadata( slug: metadata.slug, templateHash: metadata.templateHash, compiledHash: metadata.compiledHash, + evals: metadata.evals, timestamp: new Date().toISOString(), }); diff --git a/test/utils/hash.test.ts b/test/utils/hash.test.ts new file mode 100644 index 00000000..aca7f640 --- /dev/null +++ b/test/utils/hash.test.ts @@ -0,0 +1,61 @@ +import { describe, expect, it } from 'bun:test'; +import { hash, hashSync } from '../../src/utils/hash'; + +describe('Hash Functions', () => { + it('should produce the same hash for both async and sync functions', async () => { + const testCases = [ + 'Hello, World!', + '', + 'This is a longer string with special characters: !@#$%^&*()', + 'Unicode test: ๐Ÿš€ ๐ŸŒŸ ๐ŸŽ‰', + 'Multiple\nlines\nwith\ttabs', + 'Very long string '.repeat(100), + ]; + + for (const testString of testCases) { + const asyncHash = await hash(testString); + const syncHash = hashSync(testString); + + expect(asyncHash).toBe(syncHash); + expect(asyncHash).toMatch(/^[a-f0-9]{64}$/); // SHA-256 produces 64 hex characters + expect(syncHash).toMatch(/^[a-f0-9]{64}$/); + } + }); + + it('should produce consistent hashes for the same input', async () => { + const testString = 'Consistent hash test'; + + const hash1 = await hash(testString); + const hash2 = await hash(testString); + const syncHash1 = hashSync(testString); + const syncHash2 = hashSync(testString); + + expect(hash1).toBe(hash2); + expect(syncHash1).toBe(syncHash2); + expect(hash1).toBe(syncHash1); + }); + + it('should produce different hashes for different inputs', async () => { + const string1 = 'Hello'; + const string2 = 'World'; + + const hash1 = await hash(string1); + const hash2 = await hash(string2); + const syncHash1 = hashSync(string1); + const syncHash2 = hashSync(string2); + + expect(hash1).not.toBe(hash2); + expect(syncHash1).not.toBe(syncHash2); + expect(hash1).toBe(syncHash1); + expect(hash2).toBe(syncHash2); + }); + + it('should handle empty string', async () => { + const emptyString = ''; + const asyncHash = await hash(emptyString); + const syncHash = hashSync(emptyString); + + expect(asyncHash).toBe(syncHash); + expect(asyncHash).toMatch(/^[a-f0-9]{64}$/); + }); +});