From fd61912606d456afc7994d3b5dc1296b1ae7ec3d Mon Sep 17 00:00:00 2001 From: roberto Date: Fri, 20 Mar 2026 19:46:07 -0400 Subject: [PATCH] Add multi-provider support with model selection and custom model input --- Sources/APIProvider.swift | 276 ++++++++++++++++++++++++++++ Sources/AppContextService.swift | 8 +- Sources/AppState.swift | 133 ++++++++++++-- Sources/PostProcessingService.swift | 5 +- Sources/SettingsView.swift | 193 ++++++++++++++++--- Sources/SetupView.swift | 212 +++++++++++++++++++-- Sources/TranscriptionService.swift | 5 +- 7 files changed, 768 insertions(+), 64 deletions(-) create mode 100644 Sources/APIProvider.swift diff --git a/Sources/APIProvider.swift b/Sources/APIProvider.swift new file mode 100644 index 0000000..7a40cae --- /dev/null +++ b/Sources/APIProvider.swift @@ -0,0 +1,276 @@ +import Foundation + +// MARK: - API Provider + +enum APIProvider: String, CaseIterable, Identifiable, Codable { + case groq + case openai + case togetherAI = "together_ai" + case fireworks + case nvidia + case custom + + var id: String { rawValue } + + var displayName: String { + switch self { + case .groq: return "Groq" + case .openai: return "OpenAI" + case .togetherAI: return "Together AI" + case .fireworks: return "Fireworks" + case .nvidia: return "NVIDIA NIM" + case .custom: return "Custom" + } + } + + var defaultBaseURL: String { + switch self { + case .groq: return "https://api.groq.com/openai/v1" + case .openai: return "https://api.openai.com/v1" + case .togetherAI: return "https://api.together.xyz/v1" + case .fireworks: return "https://audio-prod.api.fireworks.ai/v1" + case .nvidia: return "https://integrate.api.nvidia.com/v1" + case .custom: return "" + } + } + + var availableTranscriptionModels: [TranscriptionModel] { + switch self { + case .groq: + return [ + TranscriptionModel( + id: "whisper-large-v3", + displayName: "Whisper Large V3", + description: "Default. Highest accuracy, best for detailed transcription." + ), + TranscriptionModel( + id: "whisper-large-v3-turbo", + displayName: "Whisper Large V3 Turbo", + description: "Faster and cheaper. Slightly less accurate but great for most use cases." + ), + ] + case .openai: + return [ + TranscriptionModel( + id: "whisper-1", + displayName: "Whisper 1", + description: "Default. OpenAI's general-purpose speech recognition model." + ), + TranscriptionModel( + id: "gpt-4o-transcribe", + displayName: "GPT-4o Transcribe", + description: "Higher accuracy using GPT-4o. Better at handling accents and noisy audio." + ), + TranscriptionModel( + id: "gpt-4o-mini-transcribe", + displayName: "GPT-4o Mini Transcribe", + description: "Cheaper GPT-4o variant. Good balance of cost and quality." + ), + ] + case .togetherAI: + return [ + TranscriptionModel( + id: "openai/whisper-large-v3", + displayName: "Whisper Large V3", + description: "Default. High accuracy Whisper model hosted on Together AI." + ), + TranscriptionModel( + id: "openai/whisper-large-v3-turbo", + displayName: "Whisper Large V3 Turbo", + description: "Faster variant with slightly reduced accuracy." + ), + TranscriptionModel( + id: "deepgram/deepgram-nova-3", + displayName: "Deepgram Nova 3", + description: "Deepgram's latest model. Optimized for real-time, low-latency transcription." + ), + TranscriptionModel( + id: "nvidia/parakeet-tdt-0.6b-v3", + displayName: "NVIDIA Parakeet", + description: "Lightweight 0.6B parameter model. Very fast with good English accuracy." + ), + ] + case .fireworks: + return [ + TranscriptionModel( + id: "whisper-v3", + displayName: "Whisper V3", + description: "Default. Full Whisper V3 model for highest accuracy." + ), + TranscriptionModel( + id: "whisper-v3-turbo", + displayName: "Whisper V3 Turbo", + description: "Faster variant. Good for shorter recordings where speed matters." + ), + ] + case .nvidia: + return [ + TranscriptionModel( + id: "openai/whisper-large-v3", + displayName: "Whisper Large V3", + description: "Default. Whisper Large V3 running on NVIDIA GPU infrastructure." + ), + ] + case .custom: + return [ + TranscriptionModel( + id: "whisper-large-v3", + displayName: "Whisper Large V3", + description: "Default. Common model ID for OpenAI-compatible endpoints." + ), + TranscriptionModel( + id: "whisper-large-v3-turbo", + displayName: "Whisper Large V3 Turbo", + description: "Faster variant. Common model ID for OpenAI-compatible endpoints." + ), + ] + } + } + + var availableChatModels: [ChatModel] { + switch self { + case .groq: + return [ + ChatModel( + id: "meta-llama/llama-4-scout-17b-16e-instruct", + displayName: "Llama 4 Scout 17B", + description: "Default. Fast and capable model with vision support for context analysis." + ), + ] + case .openai: + return [ + ChatModel( + id: "gpt-4o-mini", + displayName: "GPT-4o Mini", + description: "Default. Fast and affordable. Great for post-processing transcriptions." + ), + ChatModel( + id: "gpt-4o", + displayName: "GPT-4o", + description: "Most capable. Better at complex context but slower and more expensive." + ), + ] + case .togetherAI: + return [ + ChatModel( + id: "meta-llama/Llama-3.3-70B-Instruct-Turbo", + displayName: "Llama 3.3 70B Turbo", + description: "Default. High quality with fast inference. Best for accurate post-processing." + ), + ChatModel( + id: "meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo", + displayName: "Llama 3.1 8B Turbo", + description: "Smaller and faster. Good enough for simple dictation cleanup." + ), + ] + case .fireworks: + return [ + ChatModel( + id: "accounts/fireworks/models/llama-v3p3-70b-instruct", + displayName: "Llama 3.3 70B", + description: "Default. High quality Llama model on Fireworks infrastructure." + ), + ] + case .nvidia: + return [ + ChatModel( + id: "meta/llama-3.1-8b-instruct", + displayName: "Llama 3.1 8B", + description: "Default. Lightweight model running on NVIDIA GPU infrastructure." + ), + ] + case .custom: + return [ + ChatModel( + id: "meta-llama/llama-4-scout-17b-16e-instruct", + displayName: "Llama 4 Scout 17B", + description: "Default. Common model ID for OpenAI-compatible endpoints." + ), + ] + } + } + + var defaultTranscriptionModel: TranscriptionModel { + availableTranscriptionModels.first! + } + + var defaultChatModel: ChatModel { + availableChatModels.first! + } + + var apiKeyStorageKey: String { + switch self { + case .groq: return "groq_api_key" + case .openai: return "openai_api_key" + case .togetherAI: return "together_api_key" + case .fireworks: return "fireworks_api_key" + case .nvidia: return "nvidia_api_key" + case .custom: return "custom_api_key" + } + } + + var apiKeyPlaceholder: String { + switch self { + case .groq: return "Paste your Groq API key" + case .openai: return "Paste your OpenAI API key" + case .togetherAI: return "Paste your Together AI API key" + case .fireworks: return "Paste your Fireworks API key" + case .nvidia: return "Paste your NVIDIA API key" + case .custom: return "Paste your API key" + } + } + + var keyInstructionURL: URL? { + switch self { + case .groq: return URL(string: "https://console.groq.com/keys") + case .openai: return URL(string: "https://platform.openai.com/api-keys") + case .togetherAI: return URL(string: "https://api.together.ai/settings/api-keys") + case .fireworks: return URL(string: "https://fireworks.ai/account/api-keys") + case .nvidia: return URL(string: "https://build.nvidia.com/") + case .custom: return nil + } + } + + var keyInstructionDisplayURL: String { + switch self { + case .groq: return "console.groq.com/keys" + case .openai: return "platform.openai.com/api-keys" + case .togetherAI: return "api.together.ai/settings/api-keys" + case .fireworks: return "fireworks.ai/account/api-keys" + case .nvidia: return "build.nvidia.com" + case .custom: return "" + } + } +} + +// MARK: - Model Types + +struct TranscriptionModel: Identifiable, Codable, Equatable, Hashable { + let id: String + let displayName: String + var description: String = "" + + /// Sentinel value representing a user-entered custom model ID + static let customPlaceholder = TranscriptionModel( + id: "__custom__", + displayName: "Custom", + description: "Enter your own model ID." + ) + + var isCustom: Bool { id == "__custom__" } +} + +struct ChatModel: Identifiable, Codable, Equatable, Hashable { + let id: String + let displayName: String + var description: String = "" + + /// Sentinel value representing a user-entered custom model ID + static let customPlaceholder = ChatModel( + id: "__custom__", + displayName: "Custom", + description: "Enter your own model ID." + ) + + var isCustom: Bool { id == "__custom__" } +} diff --git a/Sources/AppContextService.swift b/Sources/AppContextService.swift index f177028..a95b68e 100644 --- a/Sources/AppContextService.swift +++ b/Sources/AppContextService.swift @@ -31,15 +31,17 @@ Return only two sentences, no labels, no markdown, no extra commentary. private let apiKey: String private let baseURL: String private let customContextPrompt: String - private let fallbackTextModel = "meta-llama/llama-4-scout-17b-16e-instruct" - private let visionModel = "meta-llama/llama-4-scout-17b-16e-instruct" + private let fallbackTextModel: String + private let visionModel: String private let maxScreenshotDataURILength = 500_000 private let screenshotCompressionPrimary = 0.5 private let screenshotMaxDimension: CGFloat = 1024 - init(apiKey: String, baseURL: String = "https://api.groq.com/openai/v1", customContextPrompt: String = "") { + init(apiKey: String, baseURL: String = "https://api.groq.com/openai/v1", chatModel: String = "meta-llama/llama-4-scout-17b-16e-instruct", visionModel: String = "meta-llama/llama-4-scout-17b-16e-instruct", customContextPrompt: String = "") { self.apiKey = apiKey self.baseURL = baseURL + self.fallbackTextModel = chatModel + self.visionModel = visionModel self.customContextPrompt = customContextPrompt } diff --git a/Sources/AppState.swift b/Sources/AppState.swift index f4200ce..89f6b68 100644 --- a/Sources/AppState.swift +++ b/Sources/AppState.swift @@ -35,7 +35,9 @@ enum SettingsTab: String, CaseIterable, Identifiable { } final class AppState: ObservableObject, @unchecked Sendable { - private let apiKeyStorageKey = "groq_api_key" + private let selectedProviderStorageKey = "selected_provider" + private let selectedTranscriptionModelStorageKey = "selected_transcription_model" + private let selectedChatModelStorageKey = "selected_chat_model" private let apiBaseURLStorageKey = "api_base_url" private let holdShortcutStorageKey = "hold_shortcut" private let toggleShortcutStorageKey = "toggle_shortcut" @@ -58,17 +60,78 @@ final class AppState: ObservableObject, @unchecked Sendable { } } + @Published var selectedProvider: APIProvider { + didSet { + UserDefaults.standard.set(selectedProvider.rawValue, forKey: selectedProviderStorageKey) + apiKey = Self.loadStoredAPIKey(account: selectedProvider.apiKeyStorageKey) + if selectedProvider != .custom { + apiBaseURL = selectedProvider.defaultBaseURL + } + // Reset models to provider defaults + selectedTranscriptionModel = selectedProvider.defaultTranscriptionModel + selectedChatModel = selectedProvider.defaultChatModel + rebuildContextService() + } + } + + @Published var selectedTranscriptionModel: TranscriptionModel { + didSet { + if let data = try? JSONEncoder().encode(selectedTranscriptionModel) { + UserDefaults.standard.set(data, forKey: selectedTranscriptionModelStorageKey) + } + } + } + + @Published var selectedChatModel: ChatModel { + didSet { + if let data = try? JSONEncoder().encode(selectedChatModel) { + UserDefaults.standard.set(data, forKey: selectedChatModelStorageKey) + } + rebuildContextService() + } + } + + @Published var customTranscriptionModelID: String { + didSet { + UserDefaults.standard.set(customTranscriptionModelID, forKey: "custom_transcription_model_id") + } + } + + @Published var customChatModelID: String { + didSet { + UserDefaults.standard.set(customChatModelID, forKey: "custom_chat_model_id") + } + } + + /// Returns the effective transcription model ID, accounting for custom model entry. + var effectiveTranscriptionModelID: String { + if selectedTranscriptionModel.isCustom { + let trimmed = customTranscriptionModelID.trimmingCharacters(in: .whitespacesAndNewlines) + return trimmed.isEmpty ? "whisper-large-v3" : trimmed + } + return selectedTranscriptionModel.id + } + + /// Returns the effective chat model ID, accounting for custom model entry. + var effectiveChatModelID: String { + if selectedChatModel.isCustom { + let trimmed = customChatModelID.trimmingCharacters(in: .whitespacesAndNewlines) + return trimmed.isEmpty ? "meta-llama/llama-4-scout-17b-16e-instruct" : trimmed + } + return selectedChatModel.id + } + @Published var apiKey: String { didSet { persistAPIKey(apiKey) - contextService = AppContextService(apiKey: apiKey, baseURL: apiBaseURL, customContextPrompt: customContextPrompt) + rebuildContextService() } } @Published var apiBaseURL: String { didSet { persistAPIBaseURL(apiBaseURL) - contextService = AppContextService(apiKey: apiKey, baseURL: apiBaseURL, customContextPrompt: customContextPrompt) + rebuildContextService() } } @@ -113,7 +176,7 @@ final class AppState: ObservableObject, @unchecked Sendable { @Published var customContextPrompt: String { didSet { UserDefaults.standard.set(customContextPrompt, forKey: customContextPromptStorageKey) - contextService = AppContextService(apiKey: apiKey, baseURL: apiBaseURL, customContextPrompt: customContextPrompt) + rebuildContextService() } } @@ -192,8 +255,32 @@ final class AppState: ObservableObject, @unchecked Sendable { init() { let hasCompletedSetup = UserDefaults.standard.bool(forKey: "hasCompletedSetup") - let apiKey = Self.loadStoredAPIKey(account: apiKeyStorageKey) - let apiBaseURL = Self.loadStoredAPIBaseURL(account: "api_base_url") + let selectedProvider = APIProvider(rawValue: UserDefaults.standard.string(forKey: "selected_provider") ?? "") ?? .groq + let apiKey = Self.loadStoredAPIKey(account: selectedProvider.apiKeyStorageKey) + let apiBaseURL: String + if selectedProvider == .custom { + apiBaseURL = Self.loadStoredAPIBaseURL(account: "api_base_url") + } else { + apiBaseURL = selectedProvider.defaultBaseURL + } + let selectedTranscriptionModel: TranscriptionModel + if let data = UserDefaults.standard.data(forKey: "selected_transcription_model"), + let model = try? JSONDecoder().decode(TranscriptionModel.self, from: data), + model.isCustom || selectedProvider.availableTranscriptionModels.contains(where: { $0.id == model.id }) { + selectedTranscriptionModel = model + } else { + selectedTranscriptionModel = selectedProvider.defaultTranscriptionModel + } + let selectedChatModel: ChatModel + if let data = UserDefaults.standard.data(forKey: "selected_chat_model"), + let model = try? JSONDecoder().decode(ChatModel.self, from: data), + model.isCustom || selectedProvider.availableChatModels.contains(where: { $0.id == model.id }) { + selectedChatModel = model + } else { + selectedChatModel = selectedProvider.defaultChatModel + } + let customTranscriptionModelID = UserDefaults.standard.string(forKey: "custom_transcription_model_id") ?? "" + let customChatModelID = UserDefaults.standard.string(forKey: "custom_chat_model_id") ?? "" let shortcuts = Self.loadShortcutConfiguration( holdKey: holdShortcutStorageKey, toggleKey: toggleShortcutStorageKey @@ -224,8 +311,21 @@ final class AppState: ObservableObject, @unchecked Sendable { let selectedMicrophoneID = UserDefaults.standard.string(forKey: selectedMicrophoneStorageKey) ?? "default" - self.contextService = AppContextService(apiKey: apiKey, baseURL: apiBaseURL, customContextPrompt: customContextPrompt) + // Compute effective model IDs before self is fully initialized + let effectiveTranscriptionID = selectedTranscriptionModel.isCustom + ? (customTranscriptionModelID.trimmingCharacters(in: .whitespacesAndNewlines).isEmpty ? "whisper-large-v3" : customTranscriptionModelID) + : selectedTranscriptionModel.id + let effectiveChatID = selectedChatModel.isCustom + ? (customChatModelID.trimmingCharacters(in: .whitespacesAndNewlines).isEmpty ? "meta-llama/llama-4-scout-17b-16e-instruct" : customChatModelID) + : selectedChatModel.id + _ = effectiveTranscriptionID // used later in service creation + self.contextService = AppContextService(apiKey: apiKey, baseURL: apiBaseURL, chatModel: effectiveChatID, visionModel: effectiveChatID, customContextPrompt: customContextPrompt) self.hasCompletedSetup = hasCompletedSetup + self.selectedProvider = selectedProvider + self.selectedTranscriptionModel = selectedTranscriptionModel + self.selectedChatModel = selectedChatModel + self.customTranscriptionModelID = customTranscriptionModelID + self.customChatModelID = customChatModelID self.apiKey = apiKey self.apiBaseURL = apiBaseURL self.holdShortcut = shortcuts.hold @@ -292,12 +392,22 @@ final class AppState: ObservableObject, @unchecked Sendable { private func persistAPIKey(_ value: String) { let trimmed = value.trimmingCharacters(in: .whitespacesAndNewlines) if trimmed.isEmpty { - AppSettingsStorage.delete(account: apiKeyStorageKey) + AppSettingsStorage.delete(account: selectedProvider.apiKeyStorageKey) } else { - AppSettingsStorage.save(trimmed, account: apiKeyStorageKey) + AppSettingsStorage.save(trimmed, account: selectedProvider.apiKeyStorageKey) } } + private func rebuildContextService() { + contextService = AppContextService( + apiKey: apiKey, + baseURL: apiBaseURL, + chatModel: effectiveChatModelID, + visionModel: effectiveChatModelID, + customContextPrompt: customContextPrompt + ) + } + private static let defaultAPIBaseURL = "https://api.groq.com/openai/v1" private struct StoredShortcutConfiguration { @@ -904,9 +1014,10 @@ final class AppState: ObservableObject, @unchecked Sendable { let transcriptionService = TranscriptionService( apiKey: apiKey, baseURL: apiBaseURL, - forceHTTP2: forceHTTP2Transcription + forceHTTP2: forceHTTP2Transcription, + transcriptionModel: effectiveTranscriptionModelID ) - let postProcessingService = PostProcessingService(apiKey: apiKey, baseURL: apiBaseURL) + let postProcessingService = PostProcessingService(apiKey: apiKey, baseURL: apiBaseURL, chatModel: effectiveChatModelID) Task { do { diff --git a/Sources/PostProcessingService.swift b/Sources/PostProcessingService.swift index 4830ff0..46fa0b1 100644 --- a/Sources/PostProcessingService.swift +++ b/Sources/PostProcessingService.swift @@ -42,12 +42,13 @@ Output rules: private let apiKey: String private let baseURL: String - private let defaultModel = "meta-llama/llama-4-scout-17b-16e-instruct" + private let defaultModel: String private let postProcessingTimeoutSeconds: TimeInterval = 20 - init(apiKey: String, baseURL: String = "https://api.groq.com/openai/v1") { + init(apiKey: String, baseURL: String = "https://api.groq.com/openai/v1", chatModel: String = "meta-llama/llama-4-scout-17b-16e-instruct") { self.apiKey = apiKey self.baseURL = baseURL + self.defaultModel = chatModel } func postProcess( diff --git a/Sources/SettingsView.swift b/Sources/SettingsView.swift index dcce169..3099161 100644 --- a/Sources/SettingsView.swift +++ b/Sources/SettingsView.swift @@ -228,9 +228,15 @@ struct GeneralSettingsView: View { SettingsCard("Updates", icon: "arrow.triangle.2.circlepath") { updatesSection } + SettingsCard("API Provider", icon: "server.rack") { + providerSection + } SettingsCard("API Key", icon: "key.fill") { apiKeySection } + SettingsCard("Models", icon: "cpu") { + modelSection + } SettingsCard("Dictation Shortcuts", icon: "keyboard.fill") { hotkeySection } @@ -394,16 +400,70 @@ struct GeneralSettingsView: View { } } + // MARK: API Provider + + private var providerSection: some View { + VStack(alignment: .leading, spacing: 10) { + Picker("Provider", selection: $appState.selectedProvider) { + ForEach(APIProvider.allCases) { provider in + Text(provider.displayName).tag(provider) + } + } + .onChange(of: appState.selectedProvider) { _ in + apiKeyInput = appState.apiKey + apiBaseURLInput = appState.apiBaseURL + keyValidationError = nil + keyValidationSuccess = false + } + + if appState.selectedProvider == .custom { + Divider() + + Text("Custom API Base URL") + .font(.caption.weight(.semibold)) + + Text("Enter the base URL of your OpenAI-compatible API provider.") + .font(.caption) + .foregroundStyle(.secondary) + + HStack(spacing: 8) { + TextField("https://api.example.com/v1", text: $apiBaseURLInput) + .textFieldStyle(.roundedBorder) + .font(.system(.body, design: .monospaced)) + .onChange(of: apiBaseURLInput) { newValue in + let trimmed = newValue.trimmingCharacters(in: .whitespacesAndNewlines) + if !trimmed.isEmpty { + appState.apiBaseURL = trimmed + } + } + } + } + + if let url = appState.selectedProvider.keyInstructionURL { + HStack(spacing: 4) { + Image(systemName: "link") + .font(.caption2) + Button(appState.selectedProvider.keyInstructionDisplayURL) { + openURL(url) + } + .buttonStyle(.plain) + .foregroundStyle(.blue) + .font(.caption) + } + } + } + } + // MARK: API Key private var apiKeySection: some View { VStack(alignment: .leading, spacing: 10) { - Text("FreeFlow uses Groq's whisper-large-v3 model for transcription.") + Text("Enter your \(appState.selectedProvider.displayName) API key for transcription and post-processing.") .font(.caption) .foregroundStyle(.secondary) HStack(spacing: 8) { - SecureField("Enter your Groq API key", text: $apiKeyInput) + SecureField(appState.selectedProvider.apiKeyPlaceholder, text: $apiKeyInput) .textFieldStyle(.roundedBorder) .font(.system(.body, design: .monospaced)) .disabled(isValidatingKey) @@ -430,55 +490,128 @@ struct GeneralSettingsView: View { Divider() - Text("API Base URL") - .font(.caption.weight(.semibold)) + Toggle(isOn: $appState.forceHTTP2Transcription) { + VStack(alignment: .leading, spacing: 4) { + Text("Force HTTP/2 for Transcription") + .font(.caption.weight(.semibold)) + Text("Uses `curl --http2` for audio transcription uploads. Leave this off unless the default transport is failing.") + .font(.caption) + .foregroundStyle(.secondary) + } + } + .toggleStyle(.switch) + } + } - Text("Change this to use a different OpenAI-compatible API provider.") - .font(.caption) - .foregroundStyle(.secondary) + // MARK: Models - HStack(spacing: 8) { - TextField("https://api.groq.com/openai/v1", text: $apiBaseURLInput) - .textFieldStyle(.roundedBorder) - .font(.system(.body, design: .monospaced)) - .onChange(of: apiBaseURLInput) { newValue in - let trimmed = newValue.trimmingCharacters(in: .whitespacesAndNewlines) - if !trimmed.isEmpty { - appState.apiBaseURL = trimmed + private var modelSection: some View { + VStack(alignment: .leading, spacing: 10) { + VStack(alignment: .leading, spacing: 6) { + Text("Transcription Model") + .font(.caption.weight(.semibold)) + + ForEach(appState.selectedProvider.availableTranscriptionModels + [.customPlaceholder]) { model in + Button { + appState.selectedTranscriptionModel = model + } label: { + HStack(alignment: .top, spacing: 8) { + Image(systemName: appState.selectedTranscriptionModel == model ? "checkmark.circle.fill" : "circle") + .foregroundStyle(appState.selectedTranscriptionModel == model ? .blue : .secondary) + .font(.caption) + .padding(.top, 1) + VStack(alignment: .leading, spacing: 1) { + Text(model.displayName) + .font(.caption.weight(.medium)) + .foregroundStyle(.primary) + if !model.description.isEmpty && !model.isCustom { + Text(model.description) + .font(.caption2) + .foregroundStyle(.tertiary) + } + } + Spacer() } + .padding(.vertical, 3) } + .buttonStyle(.plain) + } + + if appState.selectedTranscriptionModel.isCustom { + TextField("Enter model ID (e.g. whisper-large-v3)", text: $appState.customTranscriptionModelID) + .textFieldStyle(.roundedBorder) + .font(.system(.caption, design: .monospaced)) + } - Button("Reset to Default") { - apiBaseURLInput = "https://api.groq.com/openai/v1" - appState.apiBaseURL = "https://api.groq.com/openai/v1" + if !appState.selectedTranscriptionModel.isCustom { + Text("Model ID: \(appState.selectedTranscriptionModel.id)") + .font(.caption2) + .foregroundStyle(.tertiary) + .textSelection(.enabled) } - .font(.caption) } Divider() - Toggle(isOn: $appState.forceHTTP2Transcription) { - VStack(alignment: .leading, spacing: 4) { - Text("Force HTTP/2 for Transcription") - .font(.caption.weight(.semibold)) - Text("Uses `curl --http2` for audio transcription uploads. Leave this off unless the default transport is failing.") - .font(.caption) - .foregroundStyle(.secondary) + VStack(alignment: .leading, spacing: 6) { + Text("Chat / Post-Processing Model") + .font(.caption.weight(.semibold)) + + Text("Used for context analysis and cleaning up transcriptions.") + .font(.caption) + .foregroundStyle(.secondary) + + ForEach(appState.selectedProvider.availableChatModels + [.customPlaceholder]) { model in + Button { + appState.selectedChatModel = model + } label: { + HStack(alignment: .top, spacing: 8) { + Image(systemName: appState.selectedChatModel == model ? "checkmark.circle.fill" : "circle") + .foregroundStyle(appState.selectedChatModel == model ? .blue : .secondary) + .font(.caption) + .padding(.top, 1) + VStack(alignment: .leading, spacing: 1) { + Text(model.displayName) + .font(.caption.weight(.medium)) + .foregroundStyle(.primary) + if !model.description.isEmpty && !model.isCustom { + Text(model.description) + .font(.caption2) + .foregroundStyle(.tertiary) + } + } + Spacer() + } + .padding(.vertical, 3) + } + .buttonStyle(.plain) + } + + if appState.selectedChatModel.isCustom { + TextField("Enter model ID (e.g. gpt-4o-mini)", text: $appState.customChatModelID) + .textFieldStyle(.roundedBorder) + .font(.system(.caption, design: .monospaced)) + } + + if !appState.selectedChatModel.isCustom { + Text("Model ID: \(appState.selectedChatModel.id)") + .font(.caption2) + .foregroundStyle(.tertiary) + .textSelection(.enabled) } } - .toggleStyle(.switch) } } private func validateAndSaveKey() { let key = apiKeyInput.trimmingCharacters(in: .whitespacesAndNewlines) - let baseURL = apiBaseURLInput.trimmingCharacters(in: .whitespacesAndNewlines) + let baseURL = appState.apiBaseURL.trimmingCharacters(in: .whitespacesAndNewlines) isValidatingKey = true keyValidationError = nil keyValidationSuccess = false Task { - let valid = await TranscriptionService.validateAPIKey(key, baseURL: baseURL.isEmpty ? "https://api.groq.com/openai/v1" : baseURL) + let valid = await TranscriptionService.validateAPIKey(key, baseURL: baseURL.isEmpty ? appState.selectedProvider.defaultBaseURL : baseURL) await MainActor.run { isValidatingKey = false if valid { @@ -1354,7 +1487,7 @@ struct RunLogEntryView: View { title: "Transcribe Audio", content: { VStack(alignment: .leading, spacing: 4) { - Text("Sent audio to Groq whisper-large-v3") + Text("Sent audio to \(appState.selectedProvider.displayName) \(appState.selectedTranscriptionModel.id)") .font(.caption) .foregroundStyle(.secondary) .textSelection(.enabled) diff --git a/Sources/SetupView.swift b/Sources/SetupView.swift index c363ae2..132c8c0 100644 --- a/Sources/SetupView.swift +++ b/Sources/SetupView.swift @@ -11,7 +11,9 @@ struct SetupView: View { private let freeflowRepoURL = URL(string: "https://github.com/zachlatta/freeflow")! private enum SetupStep: Int, CaseIterable { case welcome = 0 + case provider case apiKey + case modelSelection case micPermission case accessibility case screenRecording @@ -155,8 +157,12 @@ struct SetupView: View { switch currentStep { case .welcome: welcomeStep + case .provider: + providerStep case .apiKey: apiKeyStep + case .modelSelection: + modelSelectionStep case .micPermission: micPermissionStep case .accessibility: @@ -304,36 +310,38 @@ struct SetupView: View { .font(.system(size: 60)) .foregroundStyle(.blue) - Text("Groq API Key") + Text("\(appState.selectedProvider.displayName) API Key") .font(.title) .fontWeight(.bold) - Text("FreeFlow uses Groq for fast, high-accuracy transcription.") + Text("FreeFlow uses \(appState.selectedProvider.displayName) for transcription and post-processing.") .multilineTextAlignment(.center) .foregroundStyle(.secondary) .fixedSize(horizontal: false, vertical: true) VStack(alignment: .leading, spacing: 10) { - VStack(alignment: .leading, spacing: 4) { - Text("How to get a free API key:") - .font(.subheadline.weight(.semibold)) - VStack(alignment: .leading, spacing: 2) { - instructionRow(number: "1", text: "Go to [console.groq.com/keys](https://console.groq.com/keys)") - instructionRow(number: "2", text: "Create a free account (if you don't have one)") - instructionRow(number: "3", text: "Click **Create API Key** and copy it") + if let url = appState.selectedProvider.keyInstructionURL { + VStack(alignment: .leading, spacing: 4) { + Text("How to get an API key:") + .font(.subheadline.weight(.semibold)) + VStack(alignment: .leading, spacing: 2) { + instructionRow(number: "1", text: "Go to [\(appState.selectedProvider.keyInstructionDisplayURL)](\(url.absoluteString))") + instructionRow(number: "2", text: "Create an account (if you don't have one)") + instructionRow(number: "3", text: "Click **Create API Key** and copy it") + } } + .padding(10) + .frame(maxWidth: .infinity, alignment: .leading) + .background( + RoundedRectangle(cornerRadius: 8) + .fill(Color.blue.opacity(0.06)) + ) } - .padding(10) - .frame(maxWidth: .infinity, alignment: .leading) - .background( - RoundedRectangle(cornerRadius: 8) - .fill(Color.blue.opacity(0.06)) - ) VStack(alignment: .leading, spacing: 6) { Text("API Key") .font(.headline) - SecureField("Paste your Groq API key", text: $apiKeyInput) + SecureField(appState.selectedProvider.apiKeyPlaceholder, text: $apiKeyInput) .textFieldStyle(.roundedBorder) .font(.system(.body, design: .monospaced)) .disabled(isValidatingKey) @@ -347,11 +355,183 @@ struct SetupView: View { .font(.caption) } } + + if appState.selectedProvider == .custom { + VStack(alignment: .leading, spacing: 6) { + Text("API Base URL") + .font(.headline) + TextField("https://api.example.com/v1", text: Binding( + get: { appState.apiBaseURL }, + set: { appState.apiBaseURL = $0 } + )) + .textFieldStyle(.roundedBorder) + .font(.system(.body, design: .monospaced)) + } + } } } } + var providerStep: some View { + VStack(spacing: 20) { + Image(systemName: "server.rack") + .font(.system(size: 60)) + .foregroundStyle(.blue) + + Text("API Provider") + .font(.title) + .fontWeight(.bold) + + Text("Choose which API provider to use for\ntranscription and post-processing.") + .multilineTextAlignment(.center) + .foregroundStyle(.secondary) + .fixedSize(horizontal: false, vertical: true) + + VStack(spacing: 8) { + ForEach(APIProvider.allCases) { provider in + Button { + appState.selectedProvider = provider + apiKeyInput = appState.apiKey + } label: { + HStack { + Image(systemName: appState.selectedProvider == provider ? "checkmark.circle.fill" : "circle") + .foregroundStyle(appState.selectedProvider == provider ? .blue : .secondary) + Text(provider.displayName) + .foregroundStyle(.primary) + Spacer() + } + .padding(.horizontal, 16) + .padding(.vertical, 10) + .background( + RoundedRectangle(cornerRadius: 8) + .fill(appState.selectedProvider == provider ? Color.blue.opacity(0.1) : Color.clear) + .overlay( + RoundedRectangle(cornerRadius: 8) + .stroke(appState.selectedProvider == provider ? Color.blue.opacity(0.4) : Color.primary.opacity(0.1), lineWidth: 1) + ) + ) + } + .buttonStyle(.plain) + } + } + } + } + + var modelSelectionStep: some View { + VStack(spacing: 20) { + Image(systemName: "cpu") + .font(.system(size: 60)) + .foregroundStyle(.blue) + + Text("Choose Models") + .font(.title) + .fontWeight(.bold) + + Text("Select which models to use. Defaults work great for most users.") + .multilineTextAlignment(.center) + .foregroundStyle(.secondary) + .fixedSize(horizontal: false, vertical: true) + + VStack(alignment: .leading, spacing: 14) { + VStack(alignment: .leading, spacing: 6) { + Text("Transcription Model") + .font(.subheadline.weight(.semibold)) + + VStack(spacing: 4) { + ForEach(appState.selectedProvider.availableTranscriptionModels + [.customPlaceholder]) { model in + Button { + appState.selectedTranscriptionModel = model + } label: { + HStack(alignment: .top) { + Image(systemName: appState.selectedTranscriptionModel == model ? "checkmark.circle.fill" : "circle") + .foregroundStyle(appState.selectedTranscriptionModel == model ? .blue : .secondary) + .padding(.top, 2) + VStack(alignment: .leading, spacing: 2) { + Text(model.displayName) + .foregroundStyle(.primary) + .font(.subheadline.weight(.medium)) + if !model.description.isEmpty { + Text(model.description) + .font(.caption) + .foregroundStyle(.secondary) + } + } + Spacer() + } + .padding(.horizontal, 12) + .padding(.vertical, 6) + .background( + RoundedRectangle(cornerRadius: 6) + .fill(appState.selectedTranscriptionModel == model ? Color.blue.opacity(0.08) : Color.clear) + ) + } + .buttonStyle(.plain) + } + + if appState.selectedTranscriptionModel.isCustom { + TextField("Enter model ID", text: $appState.customTranscriptionModelID) + .textFieldStyle(.roundedBorder) + .font(.system(.caption, design: .monospaced)) + .padding(.horizontal, 12) + } + } + } + + Divider() + + VStack(alignment: .leading, spacing: 6) { + Text("Chat / Post-Processing Model") + .font(.subheadline.weight(.semibold)) + + VStack(spacing: 4) { + ForEach(appState.selectedProvider.availableChatModels + [.customPlaceholder]) { model in + Button { + appState.selectedChatModel = model + } label: { + HStack(alignment: .top) { + Image(systemName: appState.selectedChatModel == model ? "checkmark.circle.fill" : "circle") + .foregroundStyle(appState.selectedChatModel == model ? .blue : .secondary) + .padding(.top, 2) + VStack(alignment: .leading, spacing: 2) { + Text(model.displayName) + .foregroundStyle(.primary) + .font(.subheadline.weight(.medium)) + if !model.description.isEmpty { + Text(model.description) + .font(.caption) + .foregroundStyle(.secondary) + } + } + Spacer() + } + .padding(.horizontal, 12) + .padding(.vertical, 6) + .background( + RoundedRectangle(cornerRadius: 6) + .fill(appState.selectedChatModel == model ? Color.blue.opacity(0.08) : Color.clear) + ) + } + .buttonStyle(.plain) + } + + if appState.selectedChatModel.isCustom { + TextField("Enter model ID", text: $appState.customChatModelID) + .textFieldStyle(.roundedBorder) + .font(.system(.caption, design: .monospaced)) + .padding(.horizontal, 12) + } + } + } + } + .padding(10) + .background( + RoundedRectangle(cornerRadius: 8) + .fill(Color.blue.opacity(0.04)) + ) + } + } + var micPermissionStep: some View { VStack(spacing: 20) { Image(systemName: "mic.fill") diff --git a/Sources/TranscriptionService.swift b/Sources/TranscriptionService.swift index 649bd70..f928866 100644 --- a/Sources/TranscriptionService.swift +++ b/Sources/TranscriptionService.swift @@ -8,15 +8,16 @@ class TranscriptionService { private let apiKey: String private let baseURL: String private let forceHTTP2: Bool - private let transcriptionModel = "whisper-large-v3" + private let transcriptionModel: String private let transcriptionTimeoutSeconds: TimeInterval = 20 private let uploadSampleRate = 16_000.0 private let uploadChannelCount: AVAudioChannelCount = 1 - init(apiKey: String, baseURL: String = "https://api.groq.com/openai/v1", forceHTTP2: Bool = false) { + init(apiKey: String, baseURL: String = "https://api.groq.com/openai/v1", forceHTTP2: Bool = false, transcriptionModel: String = "whisper-large-v3") { self.apiKey = apiKey self.baseURL = baseURL self.forceHTTP2 = forceHTTP2 + self.transcriptionModel = transcriptionModel } // Validate API key by hitting a lightweight endpoint