diff --git a/leanring-buddy/CompanionManager.swift b/leanring-buddy/CompanionManager.swift index 0234cf19..181e15a0 100644 --- a/leanring-buddy/CompanionManager.swift +++ b/leanring-buddy/CompanionManager.swift @@ -76,6 +76,10 @@ final class CompanionManager: ObservableObject { return ClaudeAPI(proxyURL: "\(Self.workerBaseURL)/chat", model: selectedModel) }() + private lazy var openRouterAPI: OpenRouterAPI = { + return OpenRouterAPI(proxyURL: "\(Self.workerBaseURL)/openrouter-chat", model: selectedOpenRouterModel ?? "anthropic/claude-sonnet-4") + }() + private lazy var elevenLabsTTSClient: ElevenLabsTTSClient = { return ElevenLabsTTSClient(proxyURL: "\(Self.workerBaseURL)/tts") }() @@ -110,12 +114,29 @@ final class CompanionManager: ObservableObject { /// The Claude model used for voice responses. Persisted to UserDefaults. @Published var selectedModel: String = UserDefaults.standard.string(forKey: "selectedClaudeModel") ?? "claude-sonnet-4-6" + /// The selected OpenRouter model ID, or nil if using Claude direct. + /// When set, voice requests route through OpenRouter instead of the Anthropic API. + @Published var selectedOpenRouterModel: String? = UserDefaults.standard.string(forKey: "selectedOpenRouterModel") + + /// Whether voice requests should route through OpenRouter instead of Claude direct. + var isUsingOpenRouter: Bool { + return selectedOpenRouterModel != nil + } + func setSelectedModel(_ model: String) { selectedModel = model + selectedOpenRouterModel = nil UserDefaults.standard.set(model, forKey: "selectedClaudeModel") + UserDefaults.standard.removeObject(forKey: "selectedOpenRouterModel") claudeAPI.model = model } + func setSelectedOpenRouterModel(_ openRouterModelID: String) { + selectedOpenRouterModel = openRouterModelID + UserDefaults.standard.set(openRouterModelID, forKey: "selectedOpenRouterModel") + openRouterAPI.model = openRouterModelID + } + /// User preference for whether the Clicky cursor should be shown. /// When toggled off, the overlay is hidden and push-to-talk is disabled. /// Persisted to UserDefaults so the choice survives app restarts. @@ -610,7 +631,7 @@ final class CompanionManager: ObservableObject { (userPlaceholder: entry.userTranscript, assistantResponse: entry.assistantResponse) } - let (fullResponseText, _) = try await claudeAPI.analyzeImageStreaming( + let (fullResponseText, _) = try await analyzeImageStreamingWithActiveProvider( images: labeledImages, systemPrompt: Self.companionVoiceResponseSystemPrompt, conversationHistory: historyForAPI, @@ -982,7 +1003,7 @@ final class CompanionManager: ObservableObject { let dimensionInfo = " (image dimensions: \(cursorScreenCapture.screenshotWidthInPixels)x\(cursorScreenCapture.screenshotHeightInPixels) pixels)" let labeledImages = [(data: cursorScreenCapture.imageData, label: cursorScreenCapture.label + dimensionInfo)] - let (fullResponseText, _) = try await claudeAPI.analyzeImageStreaming( + let (fullResponseText, _) = try await analyzeImageStreamingWithActiveProvider( images: labeledImages, systemPrompt: Self.onboardingDemoSystemPrompt, userPrompt: "look around my screen and find something interesting to point at", @@ -1023,4 +1044,34 @@ final class CompanionManager: ObservableObject { } } } + + // MARK: - Provider Routing + + /// Routes a streaming vision request to either Claude direct or OpenRouter, + /// depending on whether an OpenRouter model is selected. + private func analyzeImageStreamingWithActiveProvider( + images: [(data: Data, label: String)], + systemPrompt: String, + conversationHistory: [(userPlaceholder: String, assistantResponse: String)] = [], + userPrompt: String, + onTextChunk: @MainActor @Sendable (String) -> Void + ) async throws -> (text: String, duration: TimeInterval) { + if isUsingOpenRouter { + return try await openRouterAPI.analyzeImageStreaming( + images: images, + systemPrompt: systemPrompt, + conversationHistory: conversationHistory, + userPrompt: userPrompt, + onTextChunk: onTextChunk + ) + } else { + return try await claudeAPI.analyzeImageStreaming( + images: images, + systemPrompt: systemPrompt, + conversationHistory: conversationHistory, + userPrompt: userPrompt, + onTextChunk: onTextChunk + ) + } + } } diff --git a/leanring-buddy/CompanionPanelView.swift b/leanring-buddy/CompanionPanelView.swift index 76789b4c..dd458771 100644 --- a/leanring-buddy/CompanionPanelView.swift +++ b/leanring-buddy/CompanionPanelView.swift @@ -31,6 +31,9 @@ struct CompanionPanelView: View { modelPickerRow .padding(.horizontal, 16) + + openRouterModelPickerRow + .padding(.horizontal, 16) } if !companionManager.allPermissionsGranted { @@ -623,7 +626,7 @@ struct CompanionPanelView: View { } private func modelOptionButton(label: String, modelID: String) -> some View { - let isSelected = companionManager.selectedModel == modelID + let isSelected = companionManager.selectedModel == modelID && !companionManager.isUsingOpenRouter return Button(action: { companionManager.setSelectedModel(modelID) }) { @@ -641,6 +644,87 @@ struct CompanionPanelView: View { .pointerCursor() } + // MARK: - OpenRouter Model Picker + + /// Available OpenRouter models shown in the dropdown. + private static let openRouterModels: [(label: String, modelID: String)] = [ + ("Sonnet", "anthropic/claude-sonnet-4"), + ("Opus", "anthropic/claude-opus-4"), + ("ChatGPT", "openai/gpt-4.1"), + ("Grok", "x-ai/grok-3"), + ("Gemini", "google/gemini-2.5-pro"), + ("Qwen", "qwen/qwen3-235b-a22b"), + ] + + private var openRouterModelPickerRow: some View { + HStack { + Text("OpenRouter") + .font(.system(size: 13, weight: .medium)) + .foregroundColor(DS.Colors.textSecondary) + + Spacer() + + Menu { + // "Off" option to switch back to Claude direct + Button(action: { + companionManager.setSelectedModel(companionManager.selectedModel) + }) { + if !companionManager.isUsingOpenRouter { + Label("Off", systemImage: "checkmark") + } else { + Text("Off") + } + } + + Divider() + + ForEach(Self.openRouterModels, id: \.modelID) { model in + Button(action: { + companionManager.setSelectedOpenRouterModel(model.modelID) + }) { + if companionManager.selectedOpenRouterModel == model.modelID { + Label(model.label, systemImage: "checkmark") + } else { + Text(model.label) + } + } + } + } label: { + HStack(spacing: 4) { + Text(openRouterDisplayLabel) + .font(.system(size: 11, weight: .medium)) + .foregroundColor(companionManager.isUsingOpenRouter ? DS.Colors.textPrimary : DS.Colors.textTertiary) + + Image(systemName: "chevron.up.chevron.down") + .font(.system(size: 8, weight: .medium)) + .foregroundColor(DS.Colors.textTertiary) + } + .padding(.horizontal, 10) + .padding(.vertical, 5) + .background( + RoundedRectangle(cornerRadius: 5, style: .continuous) + .fill(companionManager.isUsingOpenRouter ? Color.white.opacity(0.1) : Color.clear) + ) + .overlay( + RoundedRectangle(cornerRadius: 5, style: .continuous) + .stroke(DS.Colors.borderSubtle, lineWidth: 0.5) + ) + } + .menuStyle(.borderlessButton) + .fixedSize() + .pointerCursor() + } + .padding(.vertical, 4) + } + + /// Display label for the currently selected OpenRouter model, or "Off" if not using OpenRouter. + private var openRouterDisplayLabel: String { + guard let selectedOpenRouterModelID = companionManager.selectedOpenRouterModel else { + return "Off" + } + return Self.openRouterModels.first(where: { $0.modelID == selectedOpenRouterModelID })?.label ?? selectedOpenRouterModelID + } + // MARK: - DM Farza Button private var dmFarzaButton: some View { diff --git a/leanring-buddy/OpenRouterAPI.swift b/leanring-buddy/OpenRouterAPI.swift new file mode 100644 index 00000000..dc62789a --- /dev/null +++ b/leanring-buddy/OpenRouterAPI.swift @@ -0,0 +1,188 @@ +// +// OpenRouterAPI.swift +// OpenRouter API Implementation with streaming support (OpenAI-compatible format) +// + +import Foundation + +/// OpenRouter API helper with streaming for progressive text display. +/// Uses the OpenAI-compatible chat completions format that OpenRouter exposes. +class OpenRouterAPI { + private static let tlsWarmupLock = NSLock() + private static var hasStartedTLSWarmup = false + + private let apiURL: URL + var model: String + private let session: URLSession + + init(proxyURL: String, model: String) { + self.apiURL = URL(string: proxyURL)! + self.model = model + + let config = URLSessionConfiguration.default + config.timeoutIntervalForRequest = 120 + config.timeoutIntervalForResource = 300 + config.waitsForConnectivity = true + config.urlCache = nil + config.httpCookieStorage = nil + self.session = URLSession(configuration: config) + + warmUpTLSConnectionIfNeeded() + } + + private func makeAPIRequest() -> URLRequest { + var request = URLRequest(url: apiURL) + request.httpMethod = "POST" + request.timeoutInterval = 120 + request.setValue("application/json", forHTTPHeaderField: "Content-Type") + return request + } + + /// Detects the MIME type of image data by inspecting the first bytes. + private func detectImageMediaType(for imageData: Data) -> String { + if imageData.count >= 4 { + let pngSignature: [UInt8] = [0x89, 0x50, 0x4E, 0x47] + let firstFourBytes = [UInt8](imageData.prefix(4)) + if firstFourBytes == pngSignature { + return "image/png" + } + } + return "image/jpeg" + } + + private func warmUpTLSConnectionIfNeeded() { + Self.tlsWarmupLock.lock() + let shouldStartTLSWarmup = !Self.hasStartedTLSWarmup + if shouldStartTLSWarmup { + Self.hasStartedTLSWarmup = true + } + Self.tlsWarmupLock.unlock() + + guard shouldStartTLSWarmup else { return } + + guard var warmupURLComponents = URLComponents(url: apiURL, resolvingAgainstBaseURL: false) else { + return + } + warmupURLComponents.path = "/" + warmupURLComponents.query = nil + warmupURLComponents.fragment = nil + + guard let warmupURL = warmupURLComponents.url else { return } + + var warmupRequest = URLRequest(url: warmupURL) + warmupRequest.httpMethod = "HEAD" + warmupRequest.timeoutInterval = 10 + session.dataTask(with: warmupRequest) { _, _, _ in }.resume() + } + + /// Send a vision request via OpenRouter with streaming. + /// Uses the OpenAI chat completions format: system message, image_url content blocks, + /// and SSE events with choices[0].delta.content. + func analyzeImageStreaming( + images: [(data: Data, label: String)], + systemPrompt: String, + conversationHistory: [(userPlaceholder: String, assistantResponse: String)] = [], + userPrompt: String, + onTextChunk: @MainActor @Sendable (String) -> Void + ) async throws -> (text: String, duration: TimeInterval) { + let startTime = Date() + + var request = makeAPIRequest() + + // OpenAI-compatible messages format + var messages: [[String: Any]] = [] + + // System prompt is a message with role "system" + messages.append([ + "role": "system", + "content": systemPrompt + ]) + + for (userPlaceholder, assistantResponse) in conversationHistory { + messages.append(["role": "user", "content": userPlaceholder]) + messages.append(["role": "assistant", "content": assistantResponse]) + } + + // Build current message with images as data URIs + text + var contentBlocks: [[String: Any]] = [] + for image in images { + let mediaType = detectImageMediaType(for: image.data) + contentBlocks.append([ + "type": "text", + "text": image.label + ]) + contentBlocks.append([ + "type": "image_url", + "image_url": [ + "url": "data:\(mediaType);base64,\(image.data.base64EncodedString())" + ] + ]) + } + contentBlocks.append([ + "type": "text", + "text": userPrompt + ]) + messages.append(["role": "user", "content": contentBlocks]) + + let body: [String: Any] = [ + "model": model, + "max_tokens": 1024, + "stream": true, + "messages": messages + ] + + let bodyData = try JSONSerialization.data(withJSONObject: body) + request.httpBody = bodyData + let payloadMB = Double(bodyData.count) / 1_048_576.0 + print("🌐 OpenRouter streaming request: \(String(format: "%.1f", payloadMB))MB, \(images.count) image(s), model: \(model)") + + let (byteStream, response) = try await session.bytes(for: request) + + guard let httpResponse = response as? HTTPURLResponse else { + throw NSError( + domain: "OpenRouterAPI", + code: -1, + userInfo: [NSLocalizedDescriptionKey: "Invalid HTTP response"] + ) + } + + guard (200...299).contains(httpResponse.statusCode) else { + var errorBodyChunks: [String] = [] + for try await line in byteStream.lines { + errorBodyChunks.append(line) + } + let errorBody = errorBodyChunks.joined(separator: "\n") + throw NSError( + domain: "OpenRouterAPI", + code: httpResponse.statusCode, + userInfo: [NSLocalizedDescriptionKey: "API Error (\(httpResponse.statusCode)): \(errorBody)"] + ) + } + + // Parse SSE stream — OpenAI format: choices[0].delta.content + var accumulatedResponseText = "" + + for try await line in byteStream.lines { + guard line.hasPrefix("data: ") else { continue } + let jsonString = String(line.dropFirst(6)) + + guard jsonString != "[DONE]" else { break } + + guard let jsonData = jsonString.data(using: .utf8), + let eventPayload = try? JSONSerialization.jsonObject(with: jsonData) as? [String: Any], + let choices = eventPayload["choices"] as? [[String: Any]], + let firstChoice = choices.first, + let delta = firstChoice["delta"] as? [String: Any], + let textChunk = delta["content"] as? String else { + continue + } + + accumulatedResponseText += textChunk + let currentAccumulatedText = accumulatedResponseText + await onTextChunk(currentAccumulatedText) + } + + let duration = Date().timeIntervalSince(startTime) + return (text: accumulatedResponseText, duration: duration) + } +} diff --git a/worker/src/index.ts b/worker/src/index.ts index 2e3e9345..9b969c48 100644 --- a/worker/src/index.ts +++ b/worker/src/index.ts @@ -14,6 +14,7 @@ interface Env { ELEVENLABS_API_KEY: string; ELEVENLABS_VOICE_ID: string; ASSEMBLYAI_API_KEY: string; + OPENROUTER_API_KEY: string; } export default { @@ -36,6 +37,10 @@ export default { if (url.pathname === "/transcribe-token") { return await handleTranscribeToken(env); } + + if (url.pathname === "/openrouter-chat") { + return await handleOpenRouterChat(request, env); + } } catch (error) { console.error(`[${url.pathname}] Unhandled error:`, error); return new Response( @@ -106,6 +111,36 @@ async function handleTranscribeToken(env: Env): Promise { }); } +async function handleOpenRouterChat(request: Request, env: Env): Promise { + const body = await request.text(); + + const response = await fetch("https://openrouter.ai/api/v1/chat/completions", { + method: "POST", + headers: { + "Authorization": `Bearer ${env.OPENROUTER_API_KEY}`, + "content-type": "application/json", + }, + body, + }); + + if (!response.ok) { + const errorBody = await response.text(); + console.error(`[/openrouter-chat] OpenRouter API error ${response.status}: ${errorBody}`); + return new Response(errorBody, { + status: response.status, + headers: { "content-type": "application/json" }, + }); + } + + return new Response(response.body, { + status: response.status, + headers: { + "content-type": response.headers.get("content-type") || "text/event-stream", + "cache-control": "no-cache", + }, + }); +} + async function handleTTS(request: Request, env: Env): Promise { const body = await request.text(); const voiceId = env.ELEVENLABS_VOICE_ID;