diff --git a/src/ensemble/arbiter.js b/src/ensemble/arbiter.js new file mode 100644 index 000000000..245528a4c --- /dev/null +++ b/src/ensemble/arbiter.js @@ -0,0 +1,156 @@ +import { commandExists } from '../agent/commands/index.js'; + +export class Arbiter { + /** + * @param {Object} config + * @param {string} config.strategy - "heuristic" (Phase 1) or "llm_judge" (Phase 2) + * @param {number} config.majority_bonus - score boost for majority command (default 0.2) + * @param {number} config.latency_penalty_per_sec - penalty per second of latency (default 0.02) + */ + constructor(config = {}) { + this.strategy = config.strategy || 'heuristic'; + this.majorityBonus = config.majority_bonus ?? 0.2; + this.latencyPenalty = config.latency_penalty_per_sec ?? 0.02; + this._confidenceThreshold = config.confidence_threshold ?? 0.08; + this._lastConfidence = 1.0; // set after each pick() + } + + /** + * Confidence threshold for triggering LLM judge. + * If top 2 scores are within this margin, it's "low confidence". + */ + get confidenceThreshold() { + return this._confidenceThreshold ?? 0.08; + } + + /** + * Pick the best proposal from the panel's responses. + * Also sets `this._lastConfidence` for the controller to check. + * @param {Proposal[]} proposals - all proposals (may include failures) + * @returns {Proposal} - the winning proposal with `score` and `winReason` set + */ + pick(proposals) { + const successful = proposals.filter(p => p.status === 'success'); + + if (successful.length === 0) { + return { + agentId: 'none', + modelName: 'none', + response: "I'm having trouble thinking right now. Let me try again in a moment.", + command: null, + commandArgs: null, + preCommandText: '', + latencyMs: 0, + status: 'error', + error: 'All panel members failed', + score: 0, + winReason: 'fallback' + }; + } + + // Score each proposal + for (const p of successful) { + p.score = this._scoreProposal(p); + } + + // Find majority command and apply bonus + const majorityCommand = this._findMajorityCommand(successful); + if (majorityCommand) { + for (const p of successful) { + if (p.command === majorityCommand) { + p.score += this.majorityBonus; + } + } + } + + // Apply latency penalty (tiebreaker) + for (const p of successful) { + p.score -= this.latencyPenalty * (p.latencyMs / 1000); + } + + // Sort: highest score first, then fastest (lowest latency) + successful.sort((a, b) => { + if (Math.abs(b.score - a.score) > 0.001) return b.score - a.score; + return a.latencyMs - b.latencyMs; + }); + + const winner = successful[0]; + winner.winReason = majorityCommand && winner.command === majorityCommand + ? 'majority+highest_score' + : 'highest_score'; + + // Compute confidence: margin between top 2 scores + this._lastConfidence = successful.length >= 2 + ? successful[0].score - successful[1].score + : 1.0; + + return winner; + } + + /** + * Returns true if the last pick() result had low confidence + * and an LLM judge should be consulted. + */ + isLowConfidence() { + return this._lastConfidence < this._confidenceThreshold; + } + + /** + * Compute heuristic score for a proposal. + * @param {Proposal} proposal + * @returns {number} score between 0.0 and ~1.0 + */ + _scoreProposal(proposal) { + let score = 0; + const r = proposal.response || ''; + + // Non-empty response + if (r.trim().length > 0) score += 0.10; + + // Contains a command + if (proposal.command) score += 0.25; + + // Command exists in the game's registry + if (proposal.command && commandExists(proposal.command)) score += 0.15; + + // No hallucination markers + const hallucinations = ['(FROM OTHER BOT)', 'My brain disconnected', 'Error:']; + if (!hallucinations.some(h => r.includes(h))) score += 0.15; + + // Reasonable length (not too short, not too long) + if (r.length > 5 && r.length < 2000) score += 0.10; + + // Not a tab-only or whitespace-only response + if (r.trim().length > 1) score += 0.10; + + // Has pre-command reasoning text (shows the model "thought") + if (proposal.preCommandText && proposal.preCommandText.trim().length > 0) score += 0.05; + + // Response contains actual content words (not just a command) + if (r.replace(/![a-zA-Z]+\(.*?\)/g, '').trim().length > 3) score += 0.10; + + return score; + } + + /** + * Find the command that appears most among proposals. + * @param {Proposal[]} proposals - successful proposals only + * @returns {string|null} majority command or null + */ + _findMajorityCommand(proposals) { + const commands = proposals.map(p => p.command).filter(Boolean); + if (commands.length === 0) return null; + + const counts = {}; + for (const c of commands) { + counts[c] = (counts[c] || 0) + 1; + } + + const sorted = Object.entries(counts).sort((a, b) => b[1] - a[1]); + // Majority: top command appears more than once AND strictly more than runner-up + if (sorted[0][1] > 1 && (sorted.length === 1 || sorted[0][1] > sorted[1][1])) { + return sorted[0][0]; + } + return null; + } +} diff --git a/src/ensemble/controller.js b/src/ensemble/controller.js new file mode 100644 index 000000000..01c96f2f1 --- /dev/null +++ b/src/ensemble/controller.js @@ -0,0 +1,190 @@ +import { Panel } from './panel.js'; +import { Arbiter } from './arbiter.js'; +import { LLMJudge } from './judge.js'; +import { EnsembleLogger } from './logger.js'; +import { FeedbackCollector } from './feedback.js'; + +/** + * EnsembleModel — implements the same interface as any single model class + * (Gemini, Grok, etc.) so it can be used as a drop-in replacement for chat_model + * in the Prompter class. + * + * Instead of a single LLM call, it queries a panel of models in parallel, + * runs an arbiter to pick the best response, and returns the winning string. + */ +export class EnsembleModel { + static prefix = 'ensemble'; + + /** + * @param {Object} ensembleConfig - the profile.ensemble configuration block + * @param {Object} profile - the full bot profile (for context/name) + */ + constructor(ensembleConfig, profile) { + this.model_name = 'ensemble'; + this.profile = profile; + + this.panel = new Panel( + ensembleConfig.panel, + ensembleConfig.timeout_ms || 15000 + ); + this.arbiter = new Arbiter(ensembleConfig.arbiter || {}); + this.judge = ensembleConfig.judge !== false + ? new LLMJudge(ensembleConfig.judge || {}) + : null; + this.logger = new EnsembleLogger(profile.name); + this.feedback = new FeedbackCollector(); + + this.minResponses = ensembleConfig.min_responses || 2; + this.logDecisions = ensembleConfig.log_decisions !== false; + + // Usage tracking compatibility (Prompter reads this after each call) + this._lastUsage = null; + this._lastUsageByModel = null; + + console.log(`[Ensemble] Initialized for ${profile.name}: ${this.panel.members.length} panel members`); + } + + /** + * Phase 3: inject the shared embedding model into FeedbackCollector. + * Called by Prompter after both chat_model and embedding_model are ready. + */ + setEmbeddingModel(embeddingModel) { + this.feedback.setEmbeddingModel(embeddingModel); + } + + /** + * Standard model interface — called by Prompter.promptConvo(). + * Queries all panel members, arbitrates, returns winning response. + * + * @param {Array<{role:string, content:string}>} turns - conversation history + * @param {string} systemMessage - the built system prompt + * @returns {Promise} - the winning response text + */ + async sendRequest(turns, systemMessage) { + const startTime = Date.now(); + + // Phase 3: retrieve similar past experiences to augment context + let augmentedSystem = systemMessage; + if (this.feedback.isReady) { + const situationText = turns.filter(t => t.role === 'user').slice(-2) + .map(t => t.content).join(' '); + const experiences = await this.feedback.getSimilar(situationText, 3); + if (experiences.length > 0) { + const memBlock = experiences.map(e => { + const m = e.metadata; + const outcome = m.outcome && m.outcome !== 'pending' ? ` (outcome: ${m.outcome})` : ''; + return `- Situation: "${e.document.slice(0, 120)}" → action: ${m.winner_command || 'chat'}${outcome}`; + }).join('\n'); + augmentedSystem = systemMessage + `\n\n[PAST EXPERIENCE - similar situations]\n${memBlock}`; + console.log(`[Ensemble] Injected ${experiences.length} past experience(s) into context`); + } + } + + // Query all panel members in parallel + const proposals = await this.panel.queryAll(turns, augmentedSystem); + + const successful = proposals.filter(p => p.status === 'success'); + const failed = proposals.filter(p => p.status !== 'success'); + + if (failed.length > 0) { + const failSummary = failed.map(p => `${p.agentId}:${p.status}`).join(', '); + console.log(`[Ensemble] Panel failures: ${failSummary}`); + } + + if (successful.length < this.minResponses) { + console.warn(`[Ensemble] Only ${successful.length}/${this.panel.members.length} responses (need ${this.minResponses})`); + if (successful.length === 0) { + this._lastUsage = null; + return "I'm having trouble processing right now. Let me try again."; + } + } + + // Heuristic arbiter — always runs first + let winner = this.arbiter.pick(proposals); + let judgeUsed = false; + + // Phase 2: LLM judge fallback when heuristic confidence is low + if (this.judge && this.arbiter.isLowConfidence() && successful.length >= 2) { + console.log(`[Ensemble] Low confidence (margin=${this.arbiter._lastConfidence.toFixed(3)}), consulting LLM judge...`); + try { + const judgeId = await this.judge.judge(successful, systemMessage, turns); + if (judgeId) { + const judgeWinner = successful.find(p => p.agentId === judgeId); + if (judgeWinner) { + judgeWinner.winReason = 'llm_judge'; + winner = judgeWinner; + judgeUsed = true; + console.log(`[Ensemble] Judge overruled heuristic: winner=${judgeId}`); + } + } + } catch (err) { + console.warn(`[Ensemble] Judge error, keeping heuristic winner: ${err.message}`); + } + } + + const totalMs = Date.now() - startTime; + console.log( + `[Ensemble] Decision in ${totalMs}ms: ` + + `${successful.length}/${this.panel.members.length} responded, ` + + `winner=${winner.agentId} (${winner.command || 'chat'}, score=${winner.score?.toFixed(2)})` + + (judgeUsed ? ' [judge]' : '') + ); + + // Log decision + if (this.logDecisions) { + this.logger.logDecision(proposals, winner); + } + + // Phase 3: Record decision in ChromaDB for continuous learning + const situationText = turns.filter(t => t.role === 'user').slice(-2) + .map(t => t.content).join(' '); + this.feedback.recordDecision({ + winner, + proposals, + timestamp: Date.now(), + situationText + }); + + // Aggregate usage from all successful members + this._lastUsage = this._aggregateUsage(successful); + this._lastUsageByModel = this._buildUsageBreakdown(successful); + + return winner.response; + } + + /** + * Embeddings are not supported by the ensemble — the Prompter uses + * a separate embedding model configured in the profile. + */ + async embed(_text) { + throw new Error('Embeddings not supported by EnsembleModel. Configure a separate embedding model in the profile.'); + } + + /** + * Sum token usage across all panel members for cost tracking. + */ + _aggregateUsage(proposals) { + let prompt = 0, completion = 0; + for (const p of proposals) { + if (p.usage) { + prompt += p.usage.prompt_tokens || 0; + completion += p.usage.completion_tokens || 0; + } + } + if (prompt === 0 && completion === 0) return null; + return { prompt_tokens: prompt, completion_tokens: completion, total_tokens: prompt + completion }; + } + + _buildUsageBreakdown(proposals) { + const breakdown = []; + for (const p of proposals) { + if (!p.usage) continue; + breakdown.push({ + modelName: p.modelName || 'unknown', + provider: p.provider || 'unknown', + usage: p.usage + }); + } + return breakdown.length > 0 ? breakdown : null; + } +} diff --git a/src/ensemble/feedback.js b/src/ensemble/feedback.js new file mode 100644 index 000000000..f784d2050 --- /dev/null +++ b/src/ensemble/feedback.js @@ -0,0 +1,178 @@ +import { ChromaClient } from 'chromadb'; + +const COLLECTION_NAME = 'ensemble_memory'; +const CHROMADB_URL = process.env.CHROMADB_URL || 'http://localhost:8000'; + +/** Ensure embedding is a flat array of numbers */ +function flattenEmbedding(raw) { + if (!raw) return null; + // Already a flat number array + if (Array.isArray(raw) && (raw.length === 0 || typeof raw[0] === 'number')) return raw; + // Array of objects with values (e.g. [{values:[...]}]) + if (Array.isArray(raw) && raw[0]?.values) return raw[0].values; + // Object with values + if (raw.values && Array.isArray(raw.values)) return raw.values; + // Object with embedding.values + if (raw.embedding?.values) return raw.embedding.values; + return null; +} + +export class FeedbackCollector { + constructor() { + this._client = null; + this._collection = null; + this._ready = false; + this._embedFn = null; + this._decisionCount = 0; + this._lastDecisionId = null; + this._initAsync(); + } + + setEmbeddingModel(model) { + this._embedFn = async (text) => { + const raw = await model.embed(text); + return flattenEmbedding(raw); + }; + } + + async _initAsync() { + try { + this._client = new ChromaClient({ path: CHROMADB_URL }); + this._collection = await this._client.getOrCreateCollection({ + name: COLLECTION_NAME, + metadata: { 'hnsw:space': 'cosine' } + }); + this._ready = true; + console.log(`[Feedback] ChromaDB ready at ${CHROMADB_URL}, collection: ${COLLECTION_NAME}`); + } catch (err) { + console.warn(`[Feedback] ChromaDB unavailable (${err.message}). Running without vector memory.`); + this._ready = false; + } + } + + async recordDecision(decision) { + if (!this._ready || !this._embedFn) return; + + // Hoist variables so the catch block can access them for retry + let id = null; + let cleanEmb = null; + let text = ''; + let meta = null; + + try { + const { winner, proposals, situationText } = decision; + text = situationText || ''; + if (text.trim().length < 5) return; + + const embedding = await this._embedFn(text.slice(0, 512)); + if (!embedding || !Array.isArray(embedding) || embedding.length === 0) { + console.warn('[Feedback] Invalid embedding, skipping storage'); + return; + } + // Ensure all values are numbers + cleanEmb = embedding.map(v => Number(v)); + if (cleanEmb.some(v => !isFinite(v))) { + console.warn('[Feedback] Embedding contains non-finite values, skipping'); + return; + } + + this._decisionCount++; + const ts = Date.now(); + id = 'dec_' + ts + '_' + this._decisionCount; + this._lastDecisionId = id; + + const successful = proposals.filter(p => p.status === 'success'); + const rawCmd = winner.command; + const rawScore = winner.score; + + meta = { + winner_id: String(winner.agentId || 'unknown'), + winner_command: (rawCmd && typeof rawCmd === 'string') ? rawCmd : '', + winner_score: (typeof rawScore === 'number' && isFinite(rawScore)) ? rawScore : 0, + win_reason: String(winner.winReason || 'highest_score'), + panel_size: Number(proposals.length), + responders: Number(successful.length), + timestamp: Number(ts), + outcome: 'pending' + }; + + await this._collection.add({ + ids: [id], + embeddings: [cleanEmb], + documents: [text.slice(0, 512)], + metadatas: [meta] + }); + console.log('[Feedback] Decision stored in ChromaDB:', id); + } catch (err) { + if (err.message?.includes('already exists')) { + // Skip duplicate IDs + } else if (err.message?.includes('dimension') || err.message?.includes('shape') || err.message?.includes('mismatch')) { + // Dimension mismatch: delete and recreate collection + console.warn('[Feedback] Embedding dimension mismatch, recreating collection'); + try { + await this._client.deleteCollection({ name: COLLECTION_NAME }); + this._collection = await this._client.createCollection({ + name: COLLECTION_NAME, + metadata: { 'hnsw:space': 'cosine' } + }); + // Retry the add + await this._collection.add({ + ids: [id], + embeddings: [cleanEmb], + documents: [text.slice(0, 512)], + metadatas: [meta] + }); + console.log('[Feedback] Decision stored after collection recreation:', id); + } catch (retryErr) { + console.warn('[Feedback] Failed to recreate collection and store decision:', retryErr.message); + } + } else { + console.warn('[Feedback] Failed to record decision:', err.message); + } + } + } + + async recordOutcome(outcome, details) { + if (!this._ready || !this._lastDecisionId) return; + try { + await this._collection.update({ + ids: [this._lastDecisionId], + metadatas: [{ outcome: String(outcome), outcome_detail: String(details || '').slice(0, 200) }] + }); + } catch (err) { + console.warn('[Feedback] Failed to update outcome:', err.message); + } + } + + async getSimilar(situationText, topK) { + if (!this._ready || !this._embedFn) return []; + if (!situationText || situationText.trim().length < 5) return []; + try { + const k = topK || 3; + const embedding = await this._embedFn(situationText.slice(0, 512)); + if (!embedding || !Array.isArray(embedding)) return []; + const cleanEmb = embedding.map(v => Number(v)); + + const results = await this._collection.query({ + queryEmbeddings: [cleanEmb], + nResults: Math.min(k, 10), + include: ['documents', 'metadatas', 'distances'] + }); + + const docs = results.documents?.[0] || []; + const metas = results.metadatas?.[0] || []; + const dists = results.distances?.[0] || []; + + return docs.map((doc, i) => ({ + document: doc, + metadata: metas[i] || {}, + similarity: 1 - (dists[i] || 0) + })).filter(r => r.similarity > 0.6); + } catch (err) { + console.warn('[Feedback] Failed to query similar:', err.message); + return []; + } + } + + get isReady() { return this._ready; } +} diff --git a/src/ensemble/judge.js b/src/ensemble/judge.js new file mode 100644 index 000000000..a8edbe420 --- /dev/null +++ b/src/ensemble/judge.js @@ -0,0 +1,92 @@ +import { selectAPI, createModel } from '../models/_model_map.js'; + +/** + * LLM-as-Judge: when the heuristic arbiter has low confidence, + * a fast judge model reviews all proposals and picks the best one. + */ +export class LLMJudge { + /** + * @param {Object} config + * @param {string} config.model - model name to use as judge (e.g. "gemini-2.5-flash") + * @param {number} config.timeout_ms - max ms to wait for judge (default 10000) + */ + constructor(config = {}) { + this.modelName = config.model || 'gemini-2.5-flash'; + this.timeoutMs = config.timeout_ms || 10000; + this._model = null; + } + + _getModel() { + if (!this._model) { + const profile = selectAPI(this.modelName); + this._model = createModel(profile); + } + return this._model; + } + + /** + * Ask the judge to pick the best proposal. + * @param {Proposal[]} proposals - successful proposals only + * @param {string} systemMessage - the original system prompt (abbreviated) + * @param {Array} turns - last few conversation turns for context + * @returns {Promise} winning agentId, or null if judge fails + */ + async judge(proposals, systemMessage, turns) { + if (proposals.length === 0) return null; + if (proposals.length === 1) return proposals[0].agentId; + + const model = this._getModel(); + + // Build a concise judgment prompt + const lastUserMsg = [...turns].reverse().find(t => t.role === 'user')?.content || ''; + + const proposalText = proposals.map((p, _i) => + `[${p.agentId}] (${p.modelName})\n${p.response}` + ).join('\n\n---\n\n'); + + const judgeSystem = [ + 'You are an expert judge evaluating Minecraft bot AI responses.', + 'Pick the SINGLE best response for the current game situation.', + 'Consider: command correctness, relevance to context, clarity, and safety.', + 'Respond with ONLY the agent ID (e.g. "gemini_a"). No explanation.' + ].join('\n'); + + const judgePrompt = [ + `Current situation: ${lastUserMsg.slice(0, 300)}`, + '', + 'Responses to evaluate:', + proposalText, + '', + `Valid agent IDs: ${proposals.map(p => p.agentId).join(', ')}`, + 'Which response is best? Reply with only the agent ID.' + ].join('\n'); + + const judgeTurns = [{ role: 'user', content: judgePrompt }]; + + try { + const result = await Promise.race([ + model.sendRequest(judgeTurns, judgeSystem), + new Promise((_, reject) => + setTimeout(() => reject(new Error('judge timeout')), this.timeoutMs) + ) + ]); + + // Parse: extract first matching agent ID from the response + const validIds = proposals.map(p => p.agentId); + const trimmedResult = result.trim(); + // Try exact match first (model responded with just the ID) + if (validIds.includes(trimmedResult)) return trimmedResult; + // Fall back to word-boundary regex match + for (const id of validIds) { + const pattern = new RegExp(`\\b${id.replace(/[.*+?^${}()|[\]\\]/g, '\\$&')}\\b`); + if (pattern.test(result)) return id; + } + + console.warn(`[Judge] Could not parse agent ID from response: "${result.slice(0, 100)}"`); + return null; + } catch (err) { + console.warn(`[Judge] Failed: ${err.message}`); + return null; + } + } +} diff --git a/src/ensemble/logger.js b/src/ensemble/logger.js new file mode 100644 index 000000000..50b67bc67 --- /dev/null +++ b/src/ensemble/logger.js @@ -0,0 +1,114 @@ +import { writeFile, readFile, mkdir } from 'fs/promises'; +import { existsSync } from 'fs'; +import path from 'path'; + +const MAX_ENTRIES = 500; +const TRIM_TO = 400; + +export class EnsembleLogger { + constructor(agentName) { + this.agentName = agentName; + this.dir = `./bots/${agentName}`; + this.filePath = path.join(this.dir, 'ensemble_log.json'); + this.decisionCount = 0; + + this._ready = !existsSync(this.dir) + ? mkdir(this.dir, { recursive: true }) + : Promise.resolve(); + } + + async logDecision(allProposals, winner) { + this.decisionCount++; + + const successful = allProposals.filter(p => p.status === 'success'); + const commands = successful.map(p => p.command).filter(Boolean); + const uniqueCommands = [...new Set(commands)]; + const agreement = uniqueCommands.length <= 1 && commands.length > 0 + ? 1.0 + : commands.length > 0 + ? Math.max(...uniqueCommands.map(c => commands.filter(x => x === c).length)) / commands.length + : 0; + + const entry = { + timestamp: new Date().toISOString(), + decision_id: this.decisionCount, + proposals: allProposals.map(p => ({ + agent_id: p.agentId, + model: p.modelName, + status: p.status, + command: p.command || null, + pre_text: p.preCommandText ? p.preCommandText.slice(0, 100) : '', + score: p.score ?? null, + latency_ms: p.latencyMs, + error: p.error || null + })), + winner: winner ? { + agent_id: winner.agentId, + command: winner.command, + score: winner.score, + reason: winner.winReason || 'highest_score' + } : null, + majority_command: this._findMajority(commands), + panel_agreement: Math.round(agreement * 100) / 100 + }; + + await this._ready; + let log = await this._readLog(); + log.push(entry); + + if (log.length > MAX_ENTRIES) { + log = log.slice(log.length - TRIM_TO); + } + + try { + await writeFile(this.filePath, JSON.stringify(log, null, 2)); + } catch (err) { + console.error(`[Ensemble] Failed to write log: ${err.message}`); + } + } + + async getStats() { + const log = await this._readLog(); + const wins = {}; + let totalLatency = 0; + let latencyCount = 0; + + for (const entry of log) { + if (entry.winner?.agent_id) { + wins[entry.winner.agent_id] = (wins[entry.winner.agent_id] || 0) + 1; + } + for (const p of entry.proposals) { + if (p.status === 'success' && p.latency_ms) { + totalLatency += p.latency_ms; + latencyCount++; + } + } + } + + return { + total_decisions: log.length, + per_member_wins: wins, + avg_latency_ms: latencyCount > 0 ? Math.round(totalLatency / latencyCount) : 0 + }; + } + + async _readLog() { + try { + const raw = await readFile(this.filePath, 'utf8'); + return JSON.parse(raw); + } catch { + // file missing or corrupted — start fresh + } + return []; + } + + _findMajority(commands) { + if (commands.length === 0) return null; + const counts = {}; + for (const c of commands) { + counts[c] = (counts[c] || 0) + 1; + } + const sorted = Object.entries(counts).sort((a, b) => b[1] - a[1]); + return sorted[0][1] > 1 ? sorted[0][0] : null; + } +} diff --git a/src/ensemble/panel.js b/src/ensemble/panel.js new file mode 100644 index 000000000..4a7732722 --- /dev/null +++ b/src/ensemble/panel.js @@ -0,0 +1,146 @@ +import { selectAPI, createModel } from '../models/_model_map.js'; +import { containsCommand } from '../agent/commands/index.js'; + +/** + * @typedef {Object} Proposal + * @property {string} agentId - Panel member ID (e.g., "gemini_a") + * @property {string} modelName - Model name (e.g., "gemini-2.5-pro") + * @property {string} provider - Provider prefix (e.g., "gemini", "xai") + * @property {string} response - Raw response string from the model + * @property {string|null} command - Extracted command (e.g., "!attackEntity") or null + * @property {string} preCommandText - Text before the first command + * @property {number} latencyMs - Time taken for this model's response + * @property {string} status - "success" | "error" | "timeout" + * @property {string|null} error - Error message if status !== "success" + * @property {number|null} score - Set by Arbiter + */ + +export class Panel { + /** + * @param {Array<{id: string, model: string}>} memberConfigs - panel member definitions + * @param {number} timeoutMs - per-model timeout in ms (default 15000) + */ + constructor(memberConfigs, timeoutMs = 15000) { + this.timeoutMs = timeoutMs; + this.members = []; + + for (const config of memberConfigs) { + try { + const profile = selectAPI(config.model); + const model = createModel(profile); + this.members.push({ + id: config.id, + model: model, + modelName: config.model + }); + console.log(`[Ensemble Panel] Loaded: ${config.id} → ${config.model}`); + } catch (err) { + console.error(`[Ensemble Panel] Failed to load ${config.id} (${config.model}): ${err.message}`); + } + } + + if (this.members.length === 0) { + throw new Error('[Ensemble Panel] No panel members loaded. Check profile.ensemble.panel config.'); + } + + console.log(`[Ensemble Panel] Ready: ${this.members.length} members, ${this.timeoutMs}ms timeout`); + } + + /** + * Query all panel members in parallel with timeout. + * Uses Promise.allSettled — one failure won't block others. + * + * @param {Array} turns - conversation turns [{role, content}] + * @param {string} systemMessage - the system prompt + * @returns {Promise} - all proposals (includes failures) + */ + async queryAll(turns, systemMessage) { + const promises = this.members.map(member => this._queryMember(member, turns, systemMessage)); + const results = await Promise.allSettled(promises); + + return results.map((result, i) => { + if (result.status === 'fulfilled') { + return result.value; + } + // Promise rejected (shouldn't happen since _queryMember catches, but just in case) + return { + agentId: this.members[i].id, + modelName: this.members[i].modelName, + response: '', + command: null, + preCommandText: '', + latencyMs: this.timeoutMs, + status: 'error', + error: result.reason?.message || 'Unknown error', + score: null + }; + }); + } + + /** + * Query a single panel member with timeout protection. + * @param {Object} member - {id, model, modelName} + * @param {Array} turns + * @param {string} systemMessage + * @returns {Promise} + */ + async _queryMember(member, turns, systemMessage) { + const startTime = Date.now(); + let timer = null; + + const timeoutPromise = new Promise((_, reject) => { + timer = setTimeout(() => reject(new Error('timeout')), this.timeoutMs); + }); + + try { + const response = await Promise.race([ + member.model.sendRequest(turns, systemMessage), + timeoutPromise + ]); + + clearTimeout(timer); + const latencyMs = Date.now() - startTime; + const responseStr = typeof response === 'string' ? response : String(response || ''); + const command = containsCommand(responseStr); + + // Extract text before the command (pre-command reasoning) + let preCommandText = ''; + if (command) { + const cmdIndex = responseStr.indexOf(command); + if (cmdIndex > 0) { + preCommandText = responseStr.slice(0, cmdIndex).trim(); + } + } + + return { + agentId: member.id, + modelName: member.modelName, + provider: member.model.constructor?.prefix || 'unknown', + response: responseStr, + command: command, + preCommandText: preCommandText, + latencyMs: latencyMs, + status: 'success', + error: null, + score: null, + usage: member.model._lastUsage || null + }; + } catch (err) { + clearTimeout(timer); + const latencyMs = Date.now() - startTime; + const isTimeout = err.message === 'timeout'; + + return { + agentId: member.id, + modelName: member.modelName, + response: '', + command: null, + preCommandText: '', + latencyMs: latencyMs, + status: isTimeout ? 'timeout' : 'error', + error: err.message, + score: null + }; + } + } +}