Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
55 changes: 53 additions & 2 deletions leanring-buddy/CompanionManager.swift
Original file line number Diff line number Diff line change
Expand Up @@ -76,6 +76,10 @@ final class CompanionManager: ObservableObject {
return ClaudeAPI(proxyURL: "\(Self.workerBaseURL)/chat", model: selectedModel)
}()

private lazy var openRouterAPI: OpenRouterAPI = {
return OpenRouterAPI(proxyURL: "\(Self.workerBaseURL)/openrouter-chat", model: selectedOpenRouterModel ?? "anthropic/claude-sonnet-4")
}()

private lazy var elevenLabsTTSClient: ElevenLabsTTSClient = {
return ElevenLabsTTSClient(proxyURL: "\(Self.workerBaseURL)/tts")
}()
Expand Down Expand Up @@ -110,12 +114,29 @@ final class CompanionManager: ObservableObject {
/// The Claude model used for voice responses. Persisted to UserDefaults.
@Published var selectedModel: String = UserDefaults.standard.string(forKey: "selectedClaudeModel") ?? "claude-sonnet-4-6"

/// The selected OpenRouter model ID, or nil if using Claude direct.
/// When set, voice requests route through OpenRouter instead of the Anthropic API.
@Published var selectedOpenRouterModel: String? = UserDefaults.standard.string(forKey: "selectedOpenRouterModel")

/// Whether voice requests should route through OpenRouter instead of Claude direct.
var isUsingOpenRouter: Bool {
return selectedOpenRouterModel != nil
}

func setSelectedModel(_ model: String) {
selectedModel = model
selectedOpenRouterModel = nil
UserDefaults.standard.set(model, forKey: "selectedClaudeModel")
UserDefaults.standard.removeObject(forKey: "selectedOpenRouterModel")
claudeAPI.model = model
}

func setSelectedOpenRouterModel(_ openRouterModelID: String) {
selectedOpenRouterModel = openRouterModelID
UserDefaults.standard.set(openRouterModelID, forKey: "selectedOpenRouterModel")
openRouterAPI.model = openRouterModelID
}

/// User preference for whether the Clicky cursor should be shown.
/// When toggled off, the overlay is hidden and push-to-talk is disabled.
/// Persisted to UserDefaults so the choice survives app restarts.
Expand Down Expand Up @@ -610,7 +631,7 @@ final class CompanionManager: ObservableObject {
(userPlaceholder: entry.userTranscript, assistantResponse: entry.assistantResponse)
}

let (fullResponseText, _) = try await claudeAPI.analyzeImageStreaming(
let (fullResponseText, _) = try await analyzeImageStreamingWithActiveProvider(
images: labeledImages,
systemPrompt: Self.companionVoiceResponseSystemPrompt,
conversationHistory: historyForAPI,
Expand Down Expand Up @@ -982,7 +1003,7 @@ final class CompanionManager: ObservableObject {
let dimensionInfo = " (image dimensions: \(cursorScreenCapture.screenshotWidthInPixels)x\(cursorScreenCapture.screenshotHeightInPixels) pixels)"
let labeledImages = [(data: cursorScreenCapture.imageData, label: cursorScreenCapture.label + dimensionInfo)]

let (fullResponseText, _) = try await claudeAPI.analyzeImageStreaming(
let (fullResponseText, _) = try await analyzeImageStreamingWithActiveProvider(
images: labeledImages,
systemPrompt: Self.onboardingDemoSystemPrompt,
userPrompt: "look around my screen and find something interesting to point at",
Expand Down Expand Up @@ -1023,4 +1044,34 @@ final class CompanionManager: ObservableObject {
}
}
}

// MARK: - Provider Routing

/// Routes a streaming vision request to either Claude direct or OpenRouter,
/// depending on whether an OpenRouter model is selected.
private func analyzeImageStreamingWithActiveProvider(
images: [(data: Data, label: String)],
systemPrompt: String,
conversationHistory: [(userPlaceholder: String, assistantResponse: String)] = [],
userPrompt: String,
onTextChunk: @MainActor @Sendable (String) -> Void
) async throws -> (text: String, duration: TimeInterval) {
if isUsingOpenRouter {
return try await openRouterAPI.analyzeImageStreaming(
images: images,
systemPrompt: systemPrompt,
conversationHistory: conversationHistory,
userPrompt: userPrompt,
onTextChunk: onTextChunk
)
} else {
return try await claudeAPI.analyzeImageStreaming(
images: images,
systemPrompt: systemPrompt,
conversationHistory: conversationHistory,
userPrompt: userPrompt,
onTextChunk: onTextChunk
)
}
}
}
86 changes: 85 additions & 1 deletion leanring-buddy/CompanionPanelView.swift
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,9 @@ struct CompanionPanelView: View {

modelPickerRow
.padding(.horizontal, 16)

openRouterModelPickerRow
.padding(.horizontal, 16)
}

if !companionManager.allPermissionsGranted {
Expand Down Expand Up @@ -623,7 +626,7 @@ struct CompanionPanelView: View {
}

private func modelOptionButton(label: String, modelID: String) -> some View {
let isSelected = companionManager.selectedModel == modelID
let isSelected = companionManager.selectedModel == modelID && !companionManager.isUsingOpenRouter
return Button(action: {
companionManager.setSelectedModel(modelID)
}) {
Expand All @@ -641,6 +644,87 @@ struct CompanionPanelView: View {
.pointerCursor()
}

// MARK: - OpenRouter Model Picker

/// Available OpenRouter models shown in the dropdown.
private static let openRouterModels: [(label: String, modelID: String)] = [
("Sonnet", "anthropic/claude-sonnet-4"),
("Opus", "anthropic/claude-opus-4"),
("ChatGPT", "openai/gpt-4.1"),
("Grok", "x-ai/grok-3"),
("Gemini", "google/gemini-2.5-pro"),
("Qwen", "qwen/qwen3-235b-a22b"),
]

private var openRouterModelPickerRow: some View {
HStack {
Text("OpenRouter")
.font(.system(size: 13, weight: .medium))
.foregroundColor(DS.Colors.textSecondary)

Spacer()

Menu {
// "Off" option to switch back to Claude direct
Button(action: {
companionManager.setSelectedModel(companionManager.selectedModel)
}) {
if !companionManager.isUsingOpenRouter {
Label("Off", systemImage: "checkmark")
} else {
Text("Off")
}
}

Divider()

ForEach(Self.openRouterModels, id: \.modelID) { model in
Button(action: {
companionManager.setSelectedOpenRouterModel(model.modelID)
}) {
if companionManager.selectedOpenRouterModel == model.modelID {
Label(model.label, systemImage: "checkmark")
} else {
Text(model.label)
}
}
}
} label: {
HStack(spacing: 4) {
Text(openRouterDisplayLabel)
.font(.system(size: 11, weight: .medium))
.foregroundColor(companionManager.isUsingOpenRouter ? DS.Colors.textPrimary : DS.Colors.textTertiary)

Image(systemName: "chevron.up.chevron.down")
.font(.system(size: 8, weight: .medium))
.foregroundColor(DS.Colors.textTertiary)
}
.padding(.horizontal, 10)
.padding(.vertical, 5)
.background(
RoundedRectangle(cornerRadius: 5, style: .continuous)
.fill(companionManager.isUsingOpenRouter ? Color.white.opacity(0.1) : Color.clear)
)
.overlay(
RoundedRectangle(cornerRadius: 5, style: .continuous)
.stroke(DS.Colors.borderSubtle, lineWidth: 0.5)
)
}
.menuStyle(.borderlessButton)
.fixedSize()
.pointerCursor()
}
.padding(.vertical, 4)
}

/// Display label for the currently selected OpenRouter model, or "Off" if not using OpenRouter.
private var openRouterDisplayLabel: String {
guard let selectedOpenRouterModelID = companionManager.selectedOpenRouterModel else {
return "Off"
}
return Self.openRouterModels.first(where: { $0.modelID == selectedOpenRouterModelID })?.label ?? selectedOpenRouterModelID
}

// MARK: - DM Farza Button

private var dmFarzaButton: some View {
Expand Down
188 changes: 188 additions & 0 deletions leanring-buddy/OpenRouterAPI.swift
Original file line number Diff line number Diff line change
@@ -0,0 +1,188 @@
//
// OpenRouterAPI.swift
// OpenRouter API Implementation with streaming support (OpenAI-compatible format)
//

import Foundation

/// OpenRouter API helper with streaming for progressive text display.
/// Uses the OpenAI-compatible chat completions format that OpenRouter exposes.
class OpenRouterAPI {
private static let tlsWarmupLock = NSLock()
private static var hasStartedTLSWarmup = false

private let apiURL: URL
var model: String
private let session: URLSession

init(proxyURL: String, model: String) {
self.apiURL = URL(string: proxyURL)!
self.model = model

let config = URLSessionConfiguration.default
config.timeoutIntervalForRequest = 120
config.timeoutIntervalForResource = 300
config.waitsForConnectivity = true
config.urlCache = nil
config.httpCookieStorage = nil
self.session = URLSession(configuration: config)

warmUpTLSConnectionIfNeeded()
}

private func makeAPIRequest() -> URLRequest {
var request = URLRequest(url: apiURL)
request.httpMethod = "POST"
request.timeoutInterval = 120
request.setValue("application/json", forHTTPHeaderField: "Content-Type")
return request
}

/// Detects the MIME type of image data by inspecting the first bytes.
private func detectImageMediaType(for imageData: Data) -> String {
if imageData.count >= 4 {
let pngSignature: [UInt8] = [0x89, 0x50, 0x4E, 0x47]
let firstFourBytes = [UInt8](imageData.prefix(4))
if firstFourBytes == pngSignature {
return "image/png"
}
}
return "image/jpeg"
}

private func warmUpTLSConnectionIfNeeded() {
Self.tlsWarmupLock.lock()
let shouldStartTLSWarmup = !Self.hasStartedTLSWarmup
if shouldStartTLSWarmup {
Self.hasStartedTLSWarmup = true
}
Self.tlsWarmupLock.unlock()

guard shouldStartTLSWarmup else { return }

guard var warmupURLComponents = URLComponents(url: apiURL, resolvingAgainstBaseURL: false) else {
return
}
warmupURLComponents.path = "/"
warmupURLComponents.query = nil
warmupURLComponents.fragment = nil

guard let warmupURL = warmupURLComponents.url else { return }

var warmupRequest = URLRequest(url: warmupURL)
warmupRequest.httpMethod = "HEAD"
warmupRequest.timeoutInterval = 10
session.dataTask(with: warmupRequest) { _, _, _ in }.resume()
}

/// Send a vision request via OpenRouter with streaming.
/// Uses the OpenAI chat completions format: system message, image_url content blocks,
/// and SSE events with choices[0].delta.content.
func analyzeImageStreaming(
images: [(data: Data, label: String)],
systemPrompt: String,
conversationHistory: [(userPlaceholder: String, assistantResponse: String)] = [],
userPrompt: String,
onTextChunk: @MainActor @Sendable (String) -> Void
) async throws -> (text: String, duration: TimeInterval) {
let startTime = Date()

var request = makeAPIRequest()

// OpenAI-compatible messages format
var messages: [[String: Any]] = []

// System prompt is a message with role "system"
messages.append([
"role": "system",
"content": systemPrompt
])

for (userPlaceholder, assistantResponse) in conversationHistory {
messages.append(["role": "user", "content": userPlaceholder])
messages.append(["role": "assistant", "content": assistantResponse])
}

// Build current message with images as data URIs + text
var contentBlocks: [[String: Any]] = []
for image in images {
let mediaType = detectImageMediaType(for: image.data)
contentBlocks.append([
"type": "text",
"text": image.label
])
contentBlocks.append([
"type": "image_url",
"image_url": [
"url": "data:\(mediaType);base64,\(image.data.base64EncodedString())"
]
])
}
contentBlocks.append([
"type": "text",
"text": userPrompt
])
messages.append(["role": "user", "content": contentBlocks])

let body: [String: Any] = [
"model": model,
"max_tokens": 1024,
"stream": true,
"messages": messages
]

let bodyData = try JSONSerialization.data(withJSONObject: body)
request.httpBody = bodyData
let payloadMB = Double(bodyData.count) / 1_048_576.0
print("🌐 OpenRouter streaming request: \(String(format: "%.1f", payloadMB))MB, \(images.count) image(s), model: \(model)")

let (byteStream, response) = try await session.bytes(for: request)

guard let httpResponse = response as? HTTPURLResponse else {
throw NSError(
domain: "OpenRouterAPI",
code: -1,
userInfo: [NSLocalizedDescriptionKey: "Invalid HTTP response"]
)
}

guard (200...299).contains(httpResponse.statusCode) else {
var errorBodyChunks: [String] = []
for try await line in byteStream.lines {
errorBodyChunks.append(line)
}
let errorBody = errorBodyChunks.joined(separator: "\n")
throw NSError(
domain: "OpenRouterAPI",
code: httpResponse.statusCode,
userInfo: [NSLocalizedDescriptionKey: "API Error (\(httpResponse.statusCode)): \(errorBody)"]
)
}

// Parse SSE stream — OpenAI format: choices[0].delta.content
var accumulatedResponseText = ""

for try await line in byteStream.lines {
guard line.hasPrefix("data: ") else { continue }
let jsonString = String(line.dropFirst(6))

guard jsonString != "[DONE]" else { break }

guard let jsonData = jsonString.data(using: .utf8),
let eventPayload = try? JSONSerialization.jsonObject(with: jsonData) as? [String: Any],
let choices = eventPayload["choices"] as? [[String: Any]],
let firstChoice = choices.first,
let delta = firstChoice["delta"] as? [String: Any],
let textChunk = delta["content"] as? String else {
continue
}

accumulatedResponseText += textChunk
let currentAccumulatedText = accumulatedResponseText
await onTextChunk(currentAccumulatedText)
}

let duration = Date().timeIntervalSince(startTime)
return (text: accumulatedResponseText, duration: duration)
}
}
Loading