From 5345e495dc73b5d3b0ba809184b3dc5d182361f8 Mon Sep 17 00:00:00 2001 From: Reid Date: Sat, 14 Mar 2026 12:38:56 -0700 Subject: [PATCH 1/2] OpenAI API --- Sources/APIProvider.swift | 77 +++++++++++++++++++++ Sources/AppContextService.swift | 8 ++- Sources/AppState.swift | 50 +++++++++++--- Sources/PostProcessingService.swift | 7 +- Sources/SettingsView.swift | 77 +++++++++++++-------- Sources/SetupView.swift | 103 +++++++++++++++++++++++----- Sources/TranscriptionService.swift | 7 +- 7 files changed, 260 insertions(+), 69 deletions(-) create mode 100644 Sources/APIProvider.swift diff --git a/Sources/APIProvider.swift b/Sources/APIProvider.swift new file mode 100644 index 0000000..a27a344 --- /dev/null +++ b/Sources/APIProvider.swift @@ -0,0 +1,77 @@ +import Foundation + +enum APIProvider: String, CaseIterable, Identifiable { + case groq + case openai + case custom + + var id: String { rawValue } + + var displayName: String { + switch self { + case .groq: return "Groq" + case .openai: return "OpenAI" + case .custom: return "Custom" + } + } + + var defaultBaseURL: String { + switch self { + case .groq: return "https://api.groq.com/openai/v1" + case .openai: return "https://api.openai.com/v1" + case .custom: return "" + } + } + + var transcriptionModel: String { + switch self { + case .groq: return "whisper-large-v3" + case .openai: return "whisper-1" + case .custom: return "whisper-large-v3" + } + } + + var chatModel: String { + switch self { + case .groq: return "meta-llama/llama-4-scout-17b-16e-instruct" + case .openai: return "gpt-4o-mini" + case .custom: return "meta-llama/llama-4-scout-17b-16e-instruct" + } + } + + var visionModel: String { + chatModel + } + + var apiKeyStorageKey: String { + switch self { + case .groq: return "groq_api_key" + case .openai: return "openai_api_key" + case .custom: return "custom_api_key" + } + } + + var apiKeyPlaceholder: String { + switch self { + case .groq: return "Paste your Groq API key" + case .openai: return "Paste your OpenAI API key" + case .custom: return "Paste your API key" + } + } + + var keyInstructionURL: String { + switch self { + case .groq: return "https://console.groq.com/keys" + case .openai: return "https://platform.openai.com/api-keys" + case .custom: return "" + } + } + + var keyInstructionDisplayURL: String { + switch self { + case .groq: return "console.groq.com/keys" + case .openai: return "platform.openai.com/api-keys" + case .custom: return "" + } + } +} diff --git a/Sources/AppContextService.swift b/Sources/AppContextService.swift index f177028..87c1a51 100644 --- a/Sources/AppContextService.swift +++ b/Sources/AppContextService.swift @@ -31,15 +31,17 @@ Return only two sentences, no labels, no markdown, no extra commentary. private let apiKey: String private let baseURL: String private let customContextPrompt: String - private let fallbackTextModel = "meta-llama/llama-4-scout-17b-16e-instruct" - private let visionModel = "meta-llama/llama-4-scout-17b-16e-instruct" + private let fallbackTextModel: String + private let visionModel: String private let maxScreenshotDataURILength = 500_000 private let screenshotCompressionPrimary = 0.5 private let screenshotMaxDimension: CGFloat = 1024 - init(apiKey: String, baseURL: String = "https://api.groq.com/openai/v1", customContextPrompt: String = "") { + init(apiKey: String, baseURL: String, chatModel: String, visionModel: String, customContextPrompt: String = "") { self.apiKey = apiKey self.baseURL = baseURL + self.fallbackTextModel = chatModel + self.visionModel = visionModel self.customContextPrompt = customContextPrompt } diff --git a/Sources/AppState.swift b/Sources/AppState.swift index 2592012..84d098d 100644 --- a/Sources/AppState.swift +++ b/Sources/AppState.swift @@ -35,8 +35,8 @@ enum SettingsTab: String, CaseIterable, Identifiable { } final class AppState: ObservableObject, @unchecked Sendable { - private let apiKeyStorageKey = "groq_api_key" private let apiBaseURLStorageKey = "api_base_url" + private let selectedProviderStorageKey = "selected_provider" private let customVocabularyStorageKey = "custom_vocabulary" private let selectedMicrophoneStorageKey = "selected_microphone_id" private let customSystemPromptStorageKey = "custom_system_prompt" @@ -52,17 +52,28 @@ final class AppState: ObservableObject, @unchecked Sendable { } } + @Published var selectedProvider: APIProvider { + didSet { + UserDefaults.standard.set(selectedProvider.rawValue, forKey: selectedProviderStorageKey) + apiKey = Self.loadStoredAPIKey(account: selectedProvider.apiKeyStorageKey) + if selectedProvider != .custom { + apiBaseURL = selectedProvider.defaultBaseURL + } + rebuildContextService() + } + } + @Published var apiKey: String { didSet { persistAPIKey(apiKey) - contextService = AppContextService(apiKey: apiKey, baseURL: apiBaseURL, customContextPrompt: customContextPrompt) + rebuildContextService() } } @Published var apiBaseURL: String { didSet { persistAPIBaseURL(apiBaseURL) - contextService = AppContextService(apiKey: apiKey, baseURL: apiBaseURL, customContextPrompt: customContextPrompt) + rebuildContextService() } } @@ -88,7 +99,7 @@ final class AppState: ObservableObject, @unchecked Sendable { @Published var customContextPrompt: String { didSet { UserDefaults.standard.set(customContextPrompt, forKey: customContextPromptStorageKey) - contextService = AppContextService(apiKey: apiKey, baseURL: apiBaseURL, customContextPrompt: customContextPrompt) + rebuildContextService() } } @@ -149,8 +160,14 @@ final class AppState: ObservableObject, @unchecked Sendable { init() { let hasCompletedSetup = UserDefaults.standard.bool(forKey: "hasCompletedSetup") - let apiKey = Self.loadStoredAPIKey(account: apiKeyStorageKey) - let apiBaseURL = Self.loadStoredAPIBaseURL(account: "api_base_url") + let selectedProvider = APIProvider(rawValue: UserDefaults.standard.string(forKey: "selected_provider") ?? "") ?? .groq + let apiKey = Self.loadStoredAPIKey(account: selectedProvider.apiKeyStorageKey) + let apiBaseURL: String + if selectedProvider == .custom { + apiBaseURL = Self.loadStoredAPIBaseURL(account: "api_base_url") + } else { + apiBaseURL = selectedProvider.defaultBaseURL + } let selectedHotkey = HotkeyOption(rawValue: UserDefaults.standard.string(forKey: "hotkey_option") ?? "fn") ?? .fnKey let customVocabulary = UserDefaults.standard.string(forKey: customVocabularyStorageKey) ?? "" let customSystemPrompt = UserDefaults.standard.string(forKey: customSystemPromptStorageKey) ?? "" @@ -172,8 +189,9 @@ final class AppState: ObservableObject, @unchecked Sendable { let selectedMicrophoneID = UserDefaults.standard.string(forKey: selectedMicrophoneStorageKey) ?? "default" - self.contextService = AppContextService(apiKey: apiKey, baseURL: apiBaseURL, customContextPrompt: customContextPrompt) + self.contextService = AppContextService(apiKey: apiKey, baseURL: apiBaseURL, chatModel: selectedProvider.chatModel, visionModel: selectedProvider.visionModel, customContextPrompt: customContextPrompt) self.hasCompletedSetup = hasCompletedSetup + self.selectedProvider = selectedProvider self.apiKey = apiKey self.apiBaseURL = apiBaseURL self.selectedHotkey = selectedHotkey @@ -222,9 +240,9 @@ final class AppState: ObservableObject, @unchecked Sendable { private func persistAPIKey(_ value: String) { let trimmed = value.trimmingCharacters(in: .whitespacesAndNewlines) if trimmed.isEmpty { - AppSettingsStorage.delete(account: apiKeyStorageKey) + AppSettingsStorage.delete(account: selectedProvider.apiKeyStorageKey) } else { - AppSettingsStorage.save(trimmed, account: apiKeyStorageKey) + AppSettingsStorage.save(trimmed, account: selectedProvider.apiKeyStorageKey) } } @@ -246,6 +264,16 @@ final class AppState: ObservableObject, @unchecked Sendable { } } + private func rebuildContextService() { + contextService = AppContextService( + apiKey: apiKey, + baseURL: apiBaseURL, + chatModel: selectedProvider.chatModel, + visionModel: selectedProvider.visionModel, + customContextPrompt: customContextPrompt + ) + } + static func audioStorageDirectory() -> URL { let appSupport = FileManager.default.urls(for: .applicationSupportDirectory, in: .userDomainMask).first! let appName = Bundle.main.object(forInfoDictionaryKey: "CFBundleName") as? String ?? "FreeFlow" @@ -630,8 +658,8 @@ final class AppState: ObservableObject, @unchecked Sendable { } catch {} } - let transcriptionService = TranscriptionService(apiKey: apiKey, baseURL: apiBaseURL) - let postProcessingService = PostProcessingService(apiKey: apiKey, baseURL: apiBaseURL) + let transcriptionService = TranscriptionService(apiKey: apiKey, baseURL: apiBaseURL, transcriptionModel: selectedProvider.transcriptionModel) + let postProcessingService = PostProcessingService(apiKey: apiKey, baseURL: apiBaseURL, model: selectedProvider.chatModel) Task { do { diff --git a/Sources/PostProcessingService.swift b/Sources/PostProcessingService.swift index 4830ff0..80b6b62 100644 --- a/Sources/PostProcessingService.swift +++ b/Sources/PostProcessingService.swift @@ -42,12 +42,13 @@ Output rules: private let apiKey: String private let baseURL: String - private let defaultModel = "meta-llama/llama-4-scout-17b-16e-instruct" + private let model: String private let postProcessingTimeoutSeconds: TimeInterval = 20 - init(apiKey: String, baseURL: String = "https://api.groq.com/openai/v1") { + init(apiKey: String, baseURL: String, model: String) { self.apiKey = apiKey self.baseURL = baseURL + self.model = model } func postProcess( @@ -67,7 +68,7 @@ Output rules: return try await self.process( transcript: transcript, contextSummary: context.contextSummary, - model: defaultModel, + model: model, customVocabulary: vocabularyTerms, customSystemPrompt: customSystemPrompt ) diff --git a/Sources/SettingsView.swift b/Sources/SettingsView.swift index d095f86..de17518 100644 --- a/Sources/SettingsView.swift +++ b/Sources/SettingsView.swift @@ -228,8 +228,8 @@ struct GeneralSettingsView: View { SettingsCard("Updates", icon: "arrow.triangle.2.circlepath") { updatesSection } - SettingsCard("API Key", icon: "key.fill") { - apiKeySection + SettingsCard("API Provider", icon: "server.rack") { + apiProviderSection } SettingsCard("Push-to-Talk Key", icon: "keyboard.fill") { hotkeySection @@ -394,16 +394,32 @@ struct GeneralSettingsView: View { } } - // MARK: API Key + // MARK: API Provider - private var apiKeySection: some View { + private var apiProviderSection: some View { VStack(alignment: .leading, spacing: 10) { - Text("FreeFlow uses Groq's whisper-large-v3 model for transcription.") + Picker("Provider", selection: Binding( + get: { appState.selectedProvider }, + set: { newProvider in + appState.selectedProvider = newProvider + apiKeyInput = appState.apiKey + apiBaseURLInput = appState.apiBaseURL + keyValidationError = nil + keyValidationSuccess = false + } + )) { + ForEach(APIProvider.allCases) { provider in + Text(provider.displayName).tag(provider) + } + } + .pickerStyle(.segmented) + + Text("FreeFlow uses \(appState.selectedProvider.displayName) for transcription and post-processing.") .font(.caption) .foregroundStyle(.secondary) HStack(spacing: 8) { - SecureField("Enter your Groq API key", text: $apiKeyInput) + SecureField(appState.selectedProvider.apiKeyPlaceholder, text: $apiKeyInput) .textFieldStyle(.roundedBorder) .font(.system(.body, design: .monospaced)) .disabled(isValidatingKey) @@ -418,6 +434,12 @@ struct GeneralSettingsView: View { .disabled(apiKeyInput.trimmingCharacters(in: .whitespacesAndNewlines).isEmpty || isValidatingKey) } + if !appState.selectedProvider.keyInstructionURL.isEmpty { + Text("Get an API key at [\(appState.selectedProvider.keyInstructionDisplayURL)](\(appState.selectedProvider.keyInstructionURL))") + .font(.caption) + .tint(.blue) + } + if let error = keyValidationError { Label(error, systemImage: "xmark.circle.fill") .foregroundStyle(.red) @@ -428,44 +450,37 @@ struct GeneralSettingsView: View { .font(.caption) } - Divider() - - Text("API Base URL") - .font(.caption.weight(.semibold)) + if appState.selectedProvider == .custom { + Divider() - Text("Change this to use a different OpenAI-compatible API provider.") - .font(.caption) - .foregroundStyle(.secondary) + Text("API Base URL") + .font(.caption.weight(.semibold)) - HStack(spacing: 8) { - TextField("https://api.groq.com/openai/v1", text: $apiBaseURLInput) - .textFieldStyle(.roundedBorder) - .font(.system(.body, design: .monospaced)) - .onChange(of: apiBaseURLInput) { newValue in - let trimmed = newValue.trimmingCharacters(in: .whitespacesAndNewlines) - if !trimmed.isEmpty { - appState.apiBaseURL = trimmed + HStack(spacing: 8) { + TextField("https://api.example.com/v1", text: $apiBaseURLInput) + .textFieldStyle(.roundedBorder) + .font(.system(.body, design: .monospaced)) + .onChange(of: apiBaseURLInput) { newValue in + let trimmed = newValue.trimmingCharacters(in: .whitespacesAndNewlines) + if !trimmed.isEmpty { + appState.apiBaseURL = trimmed + } } - } - - Button("Reset to Default") { - apiBaseURLInput = "https://api.groq.com/openai/v1" - appState.apiBaseURL = "https://api.groq.com/openai/v1" } - .font(.caption) } } } private func validateAndSaveKey() { let key = apiKeyInput.trimmingCharacters(in: .whitespacesAndNewlines) - let baseURL = apiBaseURLInput.trimmingCharacters(in: .whitespacesAndNewlines) isValidatingKey = true keyValidationError = nil keyValidationSuccess = false + let baseURL = appState.apiBaseURL + Task { - let valid = await TranscriptionService.validateAPIKey(key, baseURL: baseURL.isEmpty ? "https://api.groq.com/openai/v1" : baseURL) + let valid = await TranscriptionService.validateAPIKey(key, baseURL: baseURL) await MainActor.run { isValidatingKey = false if valid { @@ -881,7 +896,7 @@ struct PromptsSettingsView: View { systemTestError = nil systemTestPrompt = nil - let service = PostProcessingService(apiKey: appState.apiKey, baseURL: appState.apiBaseURL) + let service = PostProcessingService(apiKey: appState.apiKey, baseURL: appState.apiBaseURL, model: appState.selectedProvider.chatModel) let input = systemTestInput let customPrompt = appState.customSystemPrompt let vocabulary = appState.customVocabulary @@ -1097,6 +1112,8 @@ struct PromptsSettingsView: View { let service = AppContextService( apiKey: appState.apiKey, baseURL: appState.apiBaseURL, + chatModel: appState.selectedProvider.chatModel, + visionModel: appState.selectedProvider.visionModel, customContextPrompt: appState.customContextPrompt ) diff --git a/Sources/SetupView.swift b/Sources/SetupView.swift index 4fd1fb0..f89bc9d 100644 --- a/Sources/SetupView.swift +++ b/Sources/SetupView.swift @@ -11,6 +11,7 @@ struct SetupView: View { private let freeflowRepoURL = URL(string: "https://github.com/zachlatta/freeflow")! private enum SetupStep: Int, CaseIterable { case welcome = 0 + case provider case apiKey case micPermission case accessibility @@ -53,6 +54,8 @@ struct SetupView: View { switch currentStep { case .welcome: welcomeStep + case .provider: + providerStep case .apiKey: apiKeyStep case .micPermission: @@ -96,7 +99,7 @@ struct SetupView: View { validateAndContinue() } .keyboardShortcut(.defaultAction) - .disabled(apiKeyInput.trimmingCharacters(in: .whitespacesAndNewlines).isEmpty || isValidatingKey) + .disabled(apiKeyInput.trimmingCharacters(in: .whitespacesAndNewlines).isEmpty || isValidatingKey || (appState.selectedProvider == .custom && appState.apiBaseURL.trimmingCharacters(in: .whitespacesAndNewlines).isEmpty)) } else if currentStep == .vocabulary { Button("Continue") { saveCustomVocabularyAndContinue() @@ -275,42 +278,89 @@ struct SetupView: View { } } + var providerStep: some View { + VStack(spacing: 20) { + Image(systemName: "server.rack") + .font(.system(size: 60)) + .foregroundStyle(.blue) + + Text("API Provider") + .font(.title) + .fontWeight(.bold) + + Text("Choose which API provider to use for\ntranscription and post-processing.") + .multilineTextAlignment(.center) + .foregroundStyle(.secondary) + .fixedSize(horizontal: false, vertical: true) + + VStack(spacing: 8) { + ForEach(APIProvider.allCases) { provider in + Button { + appState.selectedProvider = provider + apiKeyInput = appState.apiKey + } label: { + HStack { + Image(systemName: appState.selectedProvider == provider ? "checkmark.circle.fill" : "circle") + .foregroundStyle(appState.selectedProvider == provider ? .blue : .secondary) + Text(provider.displayName) + .foregroundStyle(.primary) + Spacer() + } + .padding(12) + .background(appState.selectedProvider == provider ? Color.blue.opacity(0.1) : Color(nsColor: .controlBackgroundColor)) + .cornerRadius(8) + .overlay( + RoundedRectangle(cornerRadius: 8) + .stroke(appState.selectedProvider == provider ? Color.blue : Color.clear, lineWidth: 1.5) + ) + } + .buttonStyle(.plain) + } + } + .padding(.top, 10) + + stepIndicator + } + } + var apiKeyStep: some View { VStack(spacing: 20) { Image(systemName: "key.fill") .font(.system(size: 60)) .foregroundStyle(.blue) - Text("Groq API Key") + Text("\(appState.selectedProvider.displayName) API Key") .font(.title) .fontWeight(.bold) - Text("FreeFlow uses Groq for fast, high-accuracy transcription.") + Text("FreeFlow uses \(appState.selectedProvider.displayName) for transcription and post-processing.") .multilineTextAlignment(.center) .foregroundStyle(.secondary) .fixedSize(horizontal: false, vertical: true) VStack(alignment: .leading, spacing: 10) { - VStack(alignment: .leading, spacing: 4) { - Text("How to get a free API key:") - .font(.subheadline.weight(.semibold)) - VStack(alignment: .leading, spacing: 2) { - instructionRow(number: "1", text: "Go to [console.groq.com/keys](https://console.groq.com/keys)") - instructionRow(number: "2", text: "Create a free account (if you don't have one)") - instructionRow(number: "3", text: "Click **Create API Key** and copy it") + if !appState.selectedProvider.keyInstructionURL.isEmpty { + VStack(alignment: .leading, spacing: 4) { + Text("How to get an API key:") + .font(.subheadline.weight(.semibold)) + VStack(alignment: .leading, spacing: 2) { + instructionRow(number: "1", text: "Go to [\(appState.selectedProvider.keyInstructionDisplayURL)](\(appState.selectedProvider.keyInstructionURL))") + instructionRow(number: "2", text: "Create an account (if you don't have one)") + instructionRow(number: "3", text: "Click **Create API Key** and copy it") + } } + .padding(10) + .frame(maxWidth: .infinity, alignment: .leading) + .background( + RoundedRectangle(cornerRadius: 8) + .fill(Color.blue.opacity(0.06)) + ) } - .padding(10) - .frame(maxWidth: .infinity, alignment: .leading) - .background( - RoundedRectangle(cornerRadius: 8) - .fill(Color.blue.opacity(0.06)) - ) VStack(alignment: .leading, spacing: 6) { Text("API Key") .font(.headline) - SecureField("Paste your Groq API key", text: $apiKeyInput) + SecureField(appState.selectedProvider.apiKeyPlaceholder, text: $apiKeyInput) .textFieldStyle(.roundedBorder) .font(.system(.body, design: .monospaced)) .disabled(isValidatingKey) @@ -324,6 +374,19 @@ struct SetupView: View { .font(.caption) } } + + if appState.selectedProvider == .custom { + VStack(alignment: .leading, spacing: 6) { + Text("API Base URL") + .font(.headline) + TextField("https://api.example.com/v1", text: Binding( + get: { appState.apiBaseURL }, + set: { appState.apiBaseURL = $0 } + )) + .textFieldStyle(.roundedBorder) + .font(.system(.body, design: .monospaced)) + } + } } stepIndicator @@ -791,8 +854,10 @@ struct SetupView: View { isValidatingKey = true keyValidationError = nil + let baseURL = appState.apiBaseURL + Task { - let valid = await TranscriptionService.validateAPIKey(key, baseURL: appState.apiBaseURL) + let valid = await TranscriptionService.validateAPIKey(key, baseURL: baseURL) await MainActor.run { isValidatingKey = false if valid { @@ -920,7 +985,7 @@ struct SetupView: View { Task { do { - let service = TranscriptionService(apiKey: appState.apiKey, baseURL: appState.apiBaseURL) + let service = TranscriptionService(apiKey: appState.apiKey, baseURL: appState.apiBaseURL, transcriptionModel: appState.selectedProvider.transcriptionModel) let transcript = try await service.transcribe(fileURL: url) await MainActor.run { testTranscript = transcript diff --git a/Sources/TranscriptionService.swift b/Sources/TranscriptionService.swift index c6717b2..667b0e1 100644 --- a/Sources/TranscriptionService.swift +++ b/Sources/TranscriptionService.swift @@ -3,16 +3,17 @@ import Foundation class TranscriptionService { private let apiKey: String private let baseURL: String - private let transcriptionModel = "whisper-large-v3" + private let transcriptionModel: String private let transcriptionTimeoutSeconds: TimeInterval = 20 - init(apiKey: String, baseURL: String = "https://api.groq.com/openai/v1") { + init(apiKey: String, baseURL: String, transcriptionModel: String) { self.apiKey = apiKey self.baseURL = baseURL + self.transcriptionModel = transcriptionModel } // Validate API key by hitting a lightweight endpoint - static func validateAPIKey(_ key: String, baseURL: String = "https://api.groq.com/openai/v1") async -> Bool { + static func validateAPIKey(_ key: String, baseURL: String) async -> Bool { let trimmed = key.trimmingCharacters(in: .whitespacesAndNewlines) guard !trimmed.isEmpty else { return false } From e94b1b06c221a4868735401e8c35391021a891ad Mon Sep 17 00:00:00 2001 From: Reid Date: Sat, 14 Mar 2026 19:54:13 -0700 Subject: [PATCH 2/2] use gpt 5 mini instead of 4o mini --- Sources/APIProvider.swift | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Sources/APIProvider.swift b/Sources/APIProvider.swift index a27a344..4ff79a9 100644 --- a/Sources/APIProvider.swift +++ b/Sources/APIProvider.swift @@ -34,7 +34,7 @@ enum APIProvider: String, CaseIterable, Identifiable { var chatModel: String { switch self { case .groq: return "meta-llama/llama-4-scout-17b-16e-instruct" - case .openai: return "gpt-4o-mini" + case .openai: return "gpt-5-mini-2025-08-07" case .custom: return "meta-llama/llama-4-scout-17b-16e-instruct" } }