diff --git a/firebaseai/FirebaseAIExample.xcodeproj/project.pbxproj b/firebaseai/FirebaseAIExample.xcodeproj/project.pbxproj index 24a92e20c..b33bc51ae 100644 --- a/firebaseai/FirebaseAIExample.xcodeproj/project.pbxproj +++ b/firebaseai/FirebaseAIExample.xcodeproj/project.pbxproj @@ -7,6 +7,34 @@ objects = { /* Begin PBXBuildFile section */ + 0EE94F252E9599B800CEFD69 /* TranscriptView.swift in Sources */ = {isa = PBXBuildFile; fileRef = 0EE94F212E9599B800CEFD69 /* TranscriptView.swift */; }; + 0EE94F262E9599B800CEFD69 /* ConnectButton.swift in Sources */ = {isa = PBXBuildFile; fileRef = 0EE94F1D2E9599B800CEFD69 /* ConnectButton.swift */; }; + 0EE94F272E9599B800CEFD69 /* TranscriptViewModel.swift in Sources */ = {isa = PBXBuildFile; fileRef = 0EE94F1B2E9599B800CEFD69 /* TranscriptViewModel.swift */; }; + 0EE94F282E9599B800CEFD69 /* AudioPlayer.swift in Sources */ = {isa = PBXBuildFile; fileRef = 0EE94F132E9599B800CEFD69 /* AudioPlayer.swift */; }; + 0EE94F292E9599B800CEFD69 /* LiveAudioScreen.swift in Sources */ = {isa = PBXBuildFile; fileRef = 0EE94F182E9599B800CEFD69 /* LiveAudioScreen.swift */; }; + 0EE94F2A2E9599B800CEFD69 /* LiveViewModel.swift in Sources */ = {isa = PBXBuildFile; fileRef = 0EE94F1A2E9599B800CEFD69 /* LiveViewModel.swift */; }; + 0EE94F2B2E9599B800CEFD69 /* TranscriptLine.swift in Sources */ = {isa = PBXBuildFile; fileRef = 0EE94F162E9599B800CEFD69 /* TranscriptLine.swift */; }; + 0EE94F2C2E9599B800CEFD69 /* ModelPhoto.swift in Sources */ = {isa = PBXBuildFile; fileRef = 0EE94F202E9599B800CEFD69 /* ModelPhoto.swift */; }; + 0EE94F2D2E9599B800CEFD69 /* LiveErrorView.swift in Sources */ = {isa = PBXBuildFile; fileRef = 0EE94F1F2E9599B800CEFD69 /* LiveErrorView.swift */; }; + 0EE94F2E2E9599B800CEFD69 /* Microphone.swift in Sources */ = {isa = PBXBuildFile; fileRef = 0EE94F142E9599B800CEFD69 /* Microphone.swift */; }; + 0EE94F2F2E9599B800CEFD69 /* LiveErrorDetailsView.swift in Sources */ = {isa = PBXBuildFile; fileRef = 0EE94F1E2E9599B800CEFD69 /* LiveErrorDetailsView.swift */; }; + 0EE94F302E9599B800CEFD69 /* AudioController.swift in Sources */ = {isa = PBXBuildFile; fileRef = 0EE94F122E9599B800CEFD69 /* AudioController.swift */; }; + 0EE94F312E9599B800CEFD69 /* AudioBufferHelpers.swift in Sources */ = {isa = PBXBuildFile; fileRef = 0EE94F112E9599B800CEFD69 /* AudioBufferHelpers.swift */; }; + 0EE94F322E9599B800CEFD69 /* Assets.xcassets in Resources */ = {isa = PBXBuildFile; fileRef = 0EE94F232E9599B800CEFD69 /* Assets.xcassets */; }; + 0EE94F332E9599B800CEFD69 /* Assets.xcassets in Resources */ = {isa = PBXBuildFile; fileRef = 0EE94F232E9599B800CEFD69 /* Assets.xcassets */; }; + 0EE94F342E9599B800CEFD69 /* TranscriptView.swift in Sources */ = {isa = PBXBuildFile; fileRef = 0EE94F212E9599B800CEFD69 /* TranscriptView.swift */; }; + 0EE94F352E9599B800CEFD69 /* ConnectButton.swift in Sources */ = {isa = PBXBuildFile; fileRef = 0EE94F1D2E9599B800CEFD69 /* ConnectButton.swift */; }; + 0EE94F362E9599B800CEFD69 /* TranscriptViewModel.swift in Sources */ = {isa = PBXBuildFile; fileRef = 0EE94F1B2E9599B800CEFD69 /* TranscriptViewModel.swift */; }; + 0EE94F372E9599B800CEFD69 /* AudioPlayer.swift in Sources */ = {isa = PBXBuildFile; fileRef = 0EE94F132E9599B800CEFD69 /* AudioPlayer.swift */; }; + 0EE94F382E9599B800CEFD69 /* LiveAudioScreen.swift in Sources */ = {isa = PBXBuildFile; fileRef = 0EE94F182E9599B800CEFD69 /* LiveAudioScreen.swift */; }; + 0EE94F392E9599B800CEFD69 /* LiveViewModel.swift in Sources */ = {isa = PBXBuildFile; fileRef = 0EE94F1A2E9599B800CEFD69 /* LiveViewModel.swift */; }; + 0EE94F3A2E9599B800CEFD69 /* TranscriptLine.swift in Sources */ = {isa = PBXBuildFile; fileRef = 0EE94F162E9599B800CEFD69 /* TranscriptLine.swift */; }; + 0EE94F3B2E9599B800CEFD69 /* ModelPhoto.swift in Sources */ = {isa = PBXBuildFile; fileRef = 0EE94F202E9599B800CEFD69 /* ModelPhoto.swift */; }; + 0EE94F3C2E9599B800CEFD69 /* LiveErrorView.swift in Sources */ = {isa = PBXBuildFile; fileRef = 0EE94F1F2E9599B800CEFD69 /* LiveErrorView.swift */; }; + 0EE94F3D2E9599B800CEFD69 /* Microphone.swift in Sources */ = {isa = PBXBuildFile; fileRef = 0EE94F142E9599B800CEFD69 /* Microphone.swift */; }; + 0EE94F3E2E9599B800CEFD69 /* LiveErrorDetailsView.swift in Sources */ = {isa = PBXBuildFile; fileRef = 0EE94F1E2E9599B800CEFD69 /* LiveErrorDetailsView.swift */; }; + 0EE94F3F2E9599B800CEFD69 /* AudioController.swift in Sources */ = {isa = PBXBuildFile; fileRef = 0EE94F122E9599B800CEFD69 /* AudioController.swift */; }; + 0EE94F402E9599B800CEFD69 /* AudioBufferHelpers.swift in Sources */ = {isa = PBXBuildFile; fileRef = 0EE94F112E9599B800CEFD69 /* AudioBufferHelpers.swift */; }; 860F09212E8C4179002D85D0 /* FirebaseAILogic.xcframework in Frameworks */ = {isa = PBXBuildFile; fileRef = 860F09142E8C4171002D85D0 /* FirebaseAILogic.xcframework */; }; 860F09222E8C4179002D85D0 /* FirebaseAILogic.xcframework in Embed Frameworks */ = {isa = PBXBuildFile; fileRef = 860F09142E8C4171002D85D0 /* FirebaseAILogic.xcframework */; settings = {ATTRIBUTES = (CodeSignOnCopy, RemoveHeadersOnCopy, ); }; }; 860F09242E8C417A002D85D0 /* FirebaseAppCheckInterop.xcframework in Frameworks */ = {isa = PBXBuildFile; fileRef = 860F09152E8C4171002D85D0 /* FirebaseAppCheckInterop.xcframework */; }; @@ -93,6 +121,20 @@ /* End PBXCopyFilesBuildPhase section */ /* Begin PBXFileReference section */ + 0EE94F112E9599B800CEFD69 /* AudioBufferHelpers.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = AudioBufferHelpers.swift; sourceTree = ""; }; + 0EE94F122E9599B800CEFD69 /* AudioController.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = AudioController.swift; sourceTree = ""; }; + 0EE94F132E9599B800CEFD69 /* AudioPlayer.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = AudioPlayer.swift; sourceTree = ""; }; + 0EE94F142E9599B800CEFD69 /* Microphone.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = Microphone.swift; sourceTree = ""; }; + 0EE94F162E9599B800CEFD69 /* TranscriptLine.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = TranscriptLine.swift; sourceTree = ""; }; + 0EE94F182E9599B800CEFD69 /* LiveAudioScreen.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = LiveAudioScreen.swift; sourceTree = ""; }; + 0EE94F1A2E9599B800CEFD69 /* LiveViewModel.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = LiveViewModel.swift; sourceTree = ""; }; + 0EE94F1B2E9599B800CEFD69 /* TranscriptViewModel.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = TranscriptViewModel.swift; sourceTree = ""; }; + 0EE94F1D2E9599B800CEFD69 /* ConnectButton.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = ConnectButton.swift; sourceTree = ""; }; + 0EE94F1E2E9599B800CEFD69 /* LiveErrorDetailsView.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = LiveErrorDetailsView.swift; sourceTree = ""; }; + 0EE94F1F2E9599B800CEFD69 /* LiveErrorView.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = LiveErrorView.swift; sourceTree = ""; }; + 0EE94F202E9599B800CEFD69 /* ModelPhoto.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = ModelPhoto.swift; sourceTree = ""; }; + 0EE94F212E9599B800CEFD69 /* TranscriptView.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = TranscriptView.swift; sourceTree = ""; }; + 0EE94F232E9599B800CEFD69 /* Assets.xcassets */ = {isa = PBXFileReference; lastKnownFileType = folder.assetcatalog; path = Assets.xcassets; sourceTree = ""; }; 860F09142E8C4171002D85D0 /* FirebaseAILogic.xcframework */ = {isa = PBXFileReference; lastKnownFileType = wrapper.xcframework; path = FirebaseAILogic.xcframework; sourceTree = ""; }; 860F09152E8C4171002D85D0 /* FirebaseAppCheckInterop.xcframework */ = {isa = PBXFileReference; lastKnownFileType = wrapper.xcframework; path = FirebaseAppCheckInterop.xcframework; sourceTree = ""; }; 860F09162E8C4171002D85D0 /* FirebaseAuthInterop.xcframework */ = {isa = PBXFileReference; lastKnownFileType = wrapper.xcframework; path = FirebaseAuthInterop.xcframework; sourceTree = ""; }; @@ -163,6 +205,67 @@ /* End PBXFrameworksBuildPhase section */ /* Begin PBXGroup section */ + 0EE94F152E9599B800CEFD69 /* Audio */ = { + isa = PBXGroup; + children = ( + 0EE94F112E9599B800CEFD69 /* AudioBufferHelpers.swift */, + 0EE94F122E9599B800CEFD69 /* AudioController.swift */, + 0EE94F132E9599B800CEFD69 /* AudioPlayer.swift */, + 0EE94F142E9599B800CEFD69 /* Microphone.swift */, + ); + path = Audio; + sourceTree = ""; + }; + 0EE94F172E9599B800CEFD69 /* Models */ = { + isa = PBXGroup; + children = ( + 0EE94F162E9599B800CEFD69 /* TranscriptLine.swift */, + ); + path = Models; + sourceTree = ""; + }; + 0EE94F192E9599B800CEFD69 /* Screens */ = { + isa = PBXGroup; + children = ( + 0EE94F182E9599B800CEFD69 /* LiveAudioScreen.swift */, + ); + path = Screens; + sourceTree = ""; + }; + 0EE94F1C2E9599B800CEFD69 /* ViewModels */ = { + isa = PBXGroup; + children = ( + 0EE94F1A2E9599B800CEFD69 /* LiveViewModel.swift */, + 0EE94F1B2E9599B800CEFD69 /* TranscriptViewModel.swift */, + ); + path = ViewModels; + sourceTree = ""; + }; + 0EE94F222E9599B800CEFD69 /* Views */ = { + isa = PBXGroup; + children = ( + 0EE94F1D2E9599B800CEFD69 /* ConnectButton.swift */, + 0EE94F1E2E9599B800CEFD69 /* LiveErrorDetailsView.swift */, + 0EE94F1F2E9599B800CEFD69 /* LiveErrorView.swift */, + 0EE94F202E9599B800CEFD69 /* ModelPhoto.swift */, + 0EE94F212E9599B800CEFD69 /* TranscriptView.swift */, + ); + path = Views; + sourceTree = ""; + }; + 0EE94F242E9599B800CEFD69 /* LiveAudioExample */ = { + isa = PBXGroup; + children = ( + 0EE94F152E9599B800CEFD69 /* Audio */, + 0EE94F172E9599B800CEFD69 /* Models */, + 0EE94F192E9599B800CEFD69 /* Screens */, + 0EE94F1C2E9599B800CEFD69 /* ViewModels */, + 0EE94F222E9599B800CEFD69 /* Views */, + 0EE94F232E9599B800CEFD69 /* Assets.xcassets */, + ); + path = LiveAudioExample; + sourceTree = ""; + }; 860F091A2E8C4171002D85D0 /* Firebase */ = { isa = PBXGroup; children = ( @@ -244,6 +347,7 @@ 8848C8262B0D04BC007B434F = { isa = PBXGroup; children = ( + 0EE94F242E9599B800CEFD69 /* LiveAudioExample */, DEFECAA82D7B4CCD00EF9621 /* ImagenScreen */, 88B8A9352B0FCBA700424728 /* GenerativeAIUIComponents */, 869200B22B879C4F00482873 /* GoogleService-Info.plist */, @@ -492,6 +596,7 @@ files = ( 86BB56022E8B2D6D0054B8B5 /* Preview Assets.xcassets in Resources */, 86BB56032E8B2D6D0054B8B5 /* Assets.xcassets in Resources */, + 0EE94F322E9599B800CEFD69 /* Assets.xcassets in Resources */, 86BB56042E8B2D6D0054B8B5 /* GoogleService-Info.plist in Resources */, ); runOnlyForDeploymentPostprocessing = 0; @@ -502,6 +607,7 @@ files = ( 8848C83A2B0D04BD007B434F /* Preview Assets.xcassets in Resources */, 8848C8372B0D04BD007B434F /* Assets.xcassets in Resources */, + 0EE94F332E9599B800CEFD69 /* Assets.xcassets in Resources */, 869200B32B879C4F00482873 /* GoogleService-Info.plist in Resources */, ); runOnlyForDeploymentPostprocessing = 0; @@ -519,6 +625,19 @@ 86BB55ED2E8B2D6D0054B8B5 /* ChatMessage.swift in Sources */, 86BB55EE2E8B2D6D0054B8B5 /* ErrorDetailsView.swift in Sources */, 86BB55EF2E8B2D6D0054B8B5 /* ContentView.swift in Sources */, + 0EE94F252E9599B800CEFD69 /* TranscriptView.swift in Sources */, + 0EE94F262E9599B800CEFD69 /* ConnectButton.swift in Sources */, + 0EE94F272E9599B800CEFD69 /* TranscriptViewModel.swift in Sources */, + 0EE94F282E9599B800CEFD69 /* AudioPlayer.swift in Sources */, + 0EE94F292E9599B800CEFD69 /* LiveAudioScreen.swift in Sources */, + 0EE94F2A2E9599B800CEFD69 /* LiveViewModel.swift in Sources */, + 0EE94F2B2E9599B800CEFD69 /* TranscriptLine.swift in Sources */, + 0EE94F2C2E9599B800CEFD69 /* ModelPhoto.swift in Sources */, + 0EE94F2D2E9599B800CEFD69 /* LiveErrorView.swift in Sources */, + 0EE94F2E2E9599B800CEFD69 /* Microphone.swift in Sources */, + 0EE94F2F2E9599B800CEFD69 /* LiveErrorDetailsView.swift in Sources */, + 0EE94F302E9599B800CEFD69 /* AudioController.swift in Sources */, + 0EE94F312E9599B800CEFD69 /* AudioBufferHelpers.swift in Sources */, 86BB55F02E8B2D6D0054B8B5 /* GenerateContentScreen.swift in Sources */, 86BB55F12E8B2D6D0054B8B5 /* FirebaseAIExampleApp.swift in Sources */, 86BB55F22E8B2D6D0054B8B5 /* ConversationViewModel.swift in Sources */, @@ -545,6 +664,19 @@ 886F95DE2B17D5010036F07A /* ChatMessage.swift in Sources */, 88263BF12B239C11008AB09B /* ErrorDetailsView.swift in Sources */, 8848C8352B0D04BC007B434F /* ContentView.swift in Sources */, + 0EE94F342E9599B800CEFD69 /* TranscriptView.swift in Sources */, + 0EE94F352E9599B800CEFD69 /* ConnectButton.swift in Sources */, + 0EE94F362E9599B800CEFD69 /* TranscriptViewModel.swift in Sources */, + 0EE94F372E9599B800CEFD69 /* AudioPlayer.swift in Sources */, + 0EE94F382E9599B800CEFD69 /* LiveAudioScreen.swift in Sources */, + 0EE94F392E9599B800CEFD69 /* LiveViewModel.swift in Sources */, + 0EE94F3A2E9599B800CEFD69 /* TranscriptLine.swift in Sources */, + 0EE94F3B2E9599B800CEFD69 /* ModelPhoto.swift in Sources */, + 0EE94F3C2E9599B800CEFD69 /* LiveErrorView.swift in Sources */, + 0EE94F3D2E9599B800CEFD69 /* Microphone.swift in Sources */, + 0EE94F3E2E9599B800CEFD69 /* LiveErrorDetailsView.swift in Sources */, + 0EE94F3F2E9599B800CEFD69 /* AudioController.swift in Sources */, + 0EE94F402E9599B800CEFD69 /* AudioBufferHelpers.swift in Sources */, 886F95D52B17BA010036F07A /* GenerateContentScreen.swift in Sources */, 8848C8332B0D04BC007B434F /* FirebaseAIExampleApp.swift in Sources */, 886F95E02B17D5010036F07A /* ConversationViewModel.swift in Sources */, @@ -755,6 +887,7 @@ ENABLE_PREVIEWS = YES; ENABLE_USER_SCRIPT_SANDBOXING = NO; GENERATE_INFOPLIST_FILE = YES; + INFOPLIST_KEY_NSMicrophoneUsageDescription = "Communicating with the model through the Live Audio screen"; INFOPLIST_KEY_UIApplicationSceneManifest_Generation = YES; INFOPLIST_KEY_UIApplicationSupportsIndirectInputEvents = YES; INFOPLIST_KEY_UILaunchScreen_Generation = YES; @@ -785,6 +918,7 @@ ENABLE_PREVIEWS = YES; ENABLE_USER_SCRIPT_SANDBOXING = NO; GENERATE_INFOPLIST_FILE = YES; + INFOPLIST_KEY_NSMicrophoneUsageDescription = "Communicating with the model through the Live Audio screen"; INFOPLIST_KEY_UIApplicationSceneManifest_Generation = YES; INFOPLIST_KEY_UIApplicationSupportsIndirectInputEvents = YES; INFOPLIST_KEY_UILaunchScreen_Generation = YES; diff --git a/firebaseai/FirebaseAIExample/ContentView.swift b/firebaseai/FirebaseAIExample/ContentView.swift index 1714a0653..9a9b0aa48 100644 --- a/firebaseai/FirebaseAIExample/ContentView.swift +++ b/firebaseai/FirebaseAIExample/ContentView.swift @@ -50,6 +50,11 @@ struct ContentView: View { } Section("Examples") { + NavigationLink { + LiveAudioScreen(firebaseService: firebaseService, backend: selectedBackend) + } label: { + Label("Live Audio", systemImage: "microphone") + } NavigationLink { GenerateContentScreen(firebaseService: firebaseService) } label: { diff --git a/firebaseai/FirebaseAIExample/FirebaseAIExampleApp.swift b/firebaseai/FirebaseAIExample/FirebaseAIExampleApp.swift index 1d59440ea..e1714ce4d 100644 --- a/firebaseai/FirebaseAIExample/FirebaseAIExampleApp.swift +++ b/firebaseai/FirebaseAIExample/FirebaseAIExampleApp.swift @@ -22,6 +22,9 @@ class AppDelegate: NSObject, UIApplicationDelegate { // Recommendation: Protect your Vertex AI API resources from abuse by preventing unauthorized // clients using App Check; see https://firebase.google.com/docs/app-check#get_started. + // let providerFactor = AppCheckDebugProviderFactory() + // AppCheck.setAppCheckProviderFactory(providerFactor) + FirebaseApp.configure() if let firebaseApp = FirebaseApp.app(), firebaseApp.options.projectID == "mockproject-1234" { diff --git a/firebaseai/LiveAudioExample/Assets.xcassets/Contents.json b/firebaseai/LiveAudioExample/Assets.xcassets/Contents.json new file mode 100644 index 000000000..73c00596a --- /dev/null +++ b/firebaseai/LiveAudioExample/Assets.xcassets/Contents.json @@ -0,0 +1,6 @@ +{ + "info" : { + "author" : "xcode", + "version" : 1 + } +} diff --git a/firebaseai/LiveAudioExample/Assets.xcassets/gemini-logo.imageset/Contents.json b/firebaseai/LiveAudioExample/Assets.xcassets/gemini-logo.imageset/Contents.json new file mode 100644 index 000000000..8d93c4b5c --- /dev/null +++ b/firebaseai/LiveAudioExample/Assets.xcassets/gemini-logo.imageset/Contents.json @@ -0,0 +1,35 @@ +{ + "images" : [ + { + "filename" : "gemini-logo.png", + "idiom" : "universal", + "resizing" : { + "cap-insets" : { + "bottom" : 512, + "left" : 511, + "right" : 512, + "top" : 511 + }, + "center" : { + "height" : 1, + "mode" : "tile", + "width" : 1 + }, + "mode" : "9-part" + }, + "scale" : "1x" + }, + { + "idiom" : "universal", + "scale" : "2x" + }, + { + "idiom" : "universal", + "scale" : "3x" + } + ], + "info" : { + "author" : "xcode", + "version" : 1 + } +} diff --git a/firebaseai/LiveAudioExample/Assets.xcassets/gemini-logo.imageset/gemini-logo.png b/firebaseai/LiveAudioExample/Assets.xcassets/gemini-logo.imageset/gemini-logo.png new file mode 100644 index 000000000..11ef95cdd Binary files /dev/null and b/firebaseai/LiveAudioExample/Assets.xcassets/gemini-logo.imageset/gemini-logo.png differ diff --git a/firebaseai/LiveAudioExample/Audio/AudioBufferHelpers.swift b/firebaseai/LiveAudioExample/Audio/AudioBufferHelpers.swift new file mode 100644 index 000000000..e61d6512c --- /dev/null +++ b/firebaseai/LiveAudioExample/Audio/AudioBufferHelpers.swift @@ -0,0 +1,90 @@ +// Copyright 2025 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +import AVFoundation + +extension AVAudioPCMBuffer { + /// Creates a new `AVAudioPCMBuffer` from a `Data` struct. + /// + /// Only works with interleaved data. + static func fromInterleavedData(data: Data, format: AVAudioFormat) -> AVAudioPCMBuffer? { + guard format.isInterleaved else { + fatalError("Only interleaved data is supported") + } + + let frameCapacity = AVAudioFrameCount(data + .count / Int(format.streamDescription.pointee.mBytesPerFrame)) + guard let buffer = AVAudioPCMBuffer(pcmFormat: format, frameCapacity: frameCapacity) else { + return nil + } + + buffer.frameLength = frameCapacity + data.withUnsafeBytes { bytes in + guard let baseAddress = bytes.baseAddress else { return } + let dst = buffer.mutableAudioBufferList.pointee.mBuffers + dst.mData?.copyMemory(from: baseAddress, byteCount: Int(dst.mDataByteSize)) + } + + return buffer + } + + /// Gets the underlying `Data` in this buffer. + /// + /// Will throw an error if this buffer doesn't hold int16 data. + func int16Data() -> Data { + guard let bufferPtr = audioBufferList.pointee.mBuffers.mData else { + fatalError("Missing audio buffer list") + } + + let audioBufferLenth = Int(audioBufferList.pointee.mBuffers.mDataByteSize) + return Data(bytes: bufferPtr, count: audioBufferLenth) + } +} + +extension AVAudioConverter { + /// Uses the converter to convert the provided `buffer`. + /// + /// Will handle determining the proper frame capacity, ensuring formats align, and propogating any errors that occur. + /// + /// - Returns: A new buffer, with the converted data. + func convertBuffer(_ buffer: AVAudioPCMBuffer) -> AVAudioPCMBuffer { + if buffer.format == outputFormat { return buffer } + guard buffer.format == inputFormat else { + fatalError("The buffer's format was different than the converter's input format") + } + + let frameCapacity = AVAudioFrameCount( + ceil(Double(buffer.frameLength) * outputFormat.sampleRate / inputFormat.sampleRate) + ) + + guard let output = AVAudioPCMBuffer( + pcmFormat: outputFormat, + frameCapacity: frameCapacity + ) else { + fatalError("Failed to create output buffer") + } + + var error: NSError? + convert(to: output, error: &error) { _, status in + status.pointee = .haveData + return buffer + } + + if let error { + fatalError("Failed to convert buffer: \(error.localizedDescription)") + } + + return output + } +} diff --git a/firebaseai/LiveAudioExample/Audio/AudioController.swift b/firebaseai/LiveAudioExample/Audio/AudioController.swift new file mode 100644 index 000000000..27ebec979 --- /dev/null +++ b/firebaseai/LiveAudioExample/Audio/AudioController.swift @@ -0,0 +1,256 @@ +// Copyright 2025 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +import AVFoundation + +/// Controls audio playback and recording. +actor AudioController { + /// Data processed from the microphone. + private let microphoneData: AsyncStream + private let microphoneDataQueue: AsyncStream.Continuation + private var audioPlayer: AudioPlayer? + private var audioEngine: AVAudioEngine? + private var microphone: Microphone? + private var listenTask: Task? + private var routeTask: Task? + + /// Port types that are considered "headphones" for our use-case. + /// + /// More specifically, airpods are considered bluetooth ports instead of headphones, so + /// this array is necessary. + private let headphonePortTypes: [AVAudioSession.Port] = [ + .headphones, + .bluetoothA2DP, + .bluetoothLE, + .bluetoothHFP, + ] + + private let modelInputFormat: AVAudioFormat + private let modelOutputFormat: AVAudioFormat + + private var stopped = false + + public init() async throws { + let session = AVAudioSession.sharedInstance() + try session.setCategory( + .playAndRecord, + mode: .voiceChat, + options: [.defaultToSpeaker, .allowBluetooth, .duckOthers, + .interruptSpokenAudioAndMixWithOthers, .allowBluetoothA2DP] + ) + try session.setPreferredIOBufferDuration(0.01) + try session.setActive(true) + + guard let modelInputFormat = AVAudioFormat( + commonFormat: .pcmFormatInt16, + sampleRate: 16000, + channels: 1, + interleaved: false + ) else { + fatalError("Failed to create model input format") + } + + guard let modelOutputFormat = AVAudioFormat( + commonFormat: .pcmFormatInt16, + sampleRate: 24000, + channels: 1, + interleaved: true + ) else { + fatalError("Failed to create model output format") + } + + self.modelInputFormat = modelInputFormat + self.modelOutputFormat = modelOutputFormat + + let (processedData, dataQueue) = AsyncStream.makeStream() + microphoneData = processedData + microphoneDataQueue = dataQueue + + listenForRouteChange() + } + + deinit { + stopped = true + listenTask?.cancel() + // audio engine needs to be stopped before disconnecting nodes + audioEngine?.pause() + audioEngine?.stop() + if let audioEngine { + do { + // the VP IO leaves behind artifacts, so we need to disable it to properly clean up + if audioEngine.inputNode.isVoiceProcessingEnabled { + try audioEngine.inputNode.setVoiceProcessingEnabled(false) + } + } catch { + print("Failed to disable voice processing: \(error.localizedDescription)") + } + } + microphone?.stop() + audioPlayer?.stop() + microphoneDataQueue.finish() + routeTask?.cancel() + } + + /// Kicks off audio processing, and returns a stream of recorded microphone audio data. + public func listenToMic() throws -> AsyncStream { + spawnAudioProcessingThread() + return microphoneData + } + + /// Permanently stop all audio processing. + /// + /// To start again, create a new instance of ``AudioController``. + public func stop() { + stopped = true + stopListeningAndPlayback() + microphoneDataQueue.finish() + routeTask?.cancel() + } + + /// Queues audio for playback. + public func playAudio(audio: Data) { + audioPlayer?.play(audio) + } + + /// Interrupts and clears the currently pending audio playback queue. + public func interrupt() { + audioPlayer?.interrupt() + } + + private func stopListeningAndPlayback() { + listenTask?.cancel() + // audio engine needs to be stopped before disconnecting nodes + audioEngine?.pause() + audioEngine?.stop() + if let audioEngine { + do { + // the VP IO leaves behind artifacts, so we need to disable it to properly clean up + if audioEngine.inputNode.isVoiceProcessingEnabled { + try audioEngine.inputNode.setVoiceProcessingEnabled(false) + } + } catch { + print("Failed to disable voice processing: \(error.localizedDescription)") + } + } + microphone?.stop() + audioPlayer?.stop() + } + + /// Start audio processing functionality. + /// + /// Will stop any currently running audio processing. + /// + /// This function is also called whenever the input or output device change, + /// so it needs to be able to setup the audio processing without disrupting + /// the consumer of the microphone data. + private func spawnAudioProcessingThread() { + if stopped { return } + + stopListeningAndPlayback() + + // we need to start a new audio engine if the output device changed, so we might as well do it regardless + let audioEngine = AVAudioEngine() + self.audioEngine = audioEngine + + setupAudioPlayback(audioEngine) + setupVoiceProcessing(audioEngine) + + do { + try audioEngine.start() + } catch { + fatalError("Failed to start audio engine: \(error.localizedDescription)") + } + + setupMicrophone(audioEngine) + } + + private func setupMicrophone(_ engine: AVAudioEngine) { + let microphone = Microphone(engine: engine) + self.microphone = microphone + + microphone.start() + + let micFormat = engine.inputNode.outputFormat(forBus: 0) + guard let converter = AVAudioConverter(from: micFormat, to: modelInputFormat) else { + fatalError("Failed to create audio converter") + } + + listenTask = Task { + for await audio in microphone.audio { + microphoneDataQueue.yield(converter.convertBuffer(audio)) + } + } + } + + private func setupAudioPlayback(_ engine: AVAudioEngine) { + let playbackFormat = engine.outputNode.outputFormat(forBus: 0) + audioPlayer = AudioPlayer( + engine: engine, + inputFormat: modelOutputFormat, + outputFormat: playbackFormat + ) + } + + /// Sets up the voice processing I/O, if it needs to be setup. + private func setupVoiceProcessing(_ engine: AVAudioEngine) { + do { + let headphonesConnected = headphonesConnected() + let vpEnabled = engine.inputNode.isVoiceProcessingEnabled + + if !vpEnabled, !headphonesConnected { + try engine.inputNode.setVoiceProcessingEnabled(true) + } else if headphonesConnected, vpEnabled { + // bluetooth headphones have integrated AEC, so if we don't disable VP IO we get muted output + try engine.inputNode.setVoiceProcessingEnabled(false) + } + } catch { + fatalError("Failed to enable voice processing: \(error.localizedDescription)") + } + } + + /// When the output device changes, ensure the audio playback and recording classes are properly restarted. + private func listenForRouteChange() { + routeTask?.cancel() + routeTask = Task { [weak self] in + for await notification in NotificationCenter.default.notifications( + named: AVAudioSession.routeChangeNotification + ) { + await self?.handleRouteChange(notification: notification) + } + } + } + + private func handleRouteChange(notification: Notification) { + guard let userInfo = notification.userInfo, + let reasonValue = userInfo[AVAudioSessionRouteChangeReasonKey] as? UInt, + let reason = AVAudioSession.RouteChangeReason(rawValue: reasonValue) else { + return + } + + switch reason { + case .newDeviceAvailable, .oldDeviceUnavailable: + spawnAudioProcessingThread() + default: () + } + } + + /// Checks if the current audio route is a a headphone. + /// + /// This includes airpods. + private func headphonesConnected() -> Bool { + return AVAudioSession.sharedInstance().currentRoute.outputs.contains { + headphonePortTypes.contains($0.portType) + } + } +} diff --git a/firebaseai/LiveAudioExample/Audio/AudioPlayer.swift b/firebaseai/LiveAudioExample/Audio/AudioPlayer.swift new file mode 100644 index 000000000..069cd2a8e --- /dev/null +++ b/firebaseai/LiveAudioExample/Audio/AudioPlayer.swift @@ -0,0 +1,85 @@ +// Copyright 2025 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +import AVFoundation +import Foundation + +/// Plays back audio through the primary output device. +class AudioPlayer { + private let engine: AVAudioEngine + private let inputFormat: AVAudioFormat + private let outputFormat: AVAudioFormat + private let playbackNode: AVAudioPlayerNode + private var formatConverter: AVAudioConverter + + init(engine: AVAudioEngine, inputFormat: AVAudioFormat, outputFormat: AVAudioFormat) { + self.engine = engine + + guard let formatConverter = AVAudioConverter(from: inputFormat, to: outputFormat) else { + fatalError("Failed to create the audio converter") + } + + let playbackNode = AVAudioPlayerNode() + + engine.attach(playbackNode) + engine.connect(playbackNode, to: engine.mainMixerNode, format: outputFormat) + + self.inputFormat = inputFormat + self.outputFormat = outputFormat + self.formatConverter = formatConverter + self.playbackNode = playbackNode + } + + deinit { + stop() + } + + /// Queue audio to be played through the output device. + /// + /// Note that in a real app, you'd ideally schedule the data before converting it, and then mark data as consumed after its been played + /// back. That way, if the audio route changes during playback, you can requeue the buffer on the new output device. + /// + /// For the sake of simplicity, that is not implemented here; a route change will prevent the currently queued conversation from + /// being played through the output device. + public func play(_ audio: Data) { + guard engine.isRunning else { + print("Audio engine needs to be running to play audio.") + return + } + + guard let inputBuffer = AVAudioPCMBuffer.fromInterleavedData( + data: audio, + format: inputFormat + ) else { + fatalError("Failed to create input buffer for playback") + } + + let buffer = formatConverter.convertBuffer(inputBuffer) + + playbackNode.scheduleBuffer(buffer, at: nil) + playbackNode.play() + } + + /// Stops the current audio playing. + public func interrupt() { + playbackNode.stop() + } + + /// Permanently stop all audio playback. + public func stop() { + interrupt() + engine.disconnectNodeInput(playbackNode) + engine.disconnectNodeOutput(playbackNode) + } +} diff --git a/firebaseai/LiveAudioExample/Audio/Microphone.swift b/firebaseai/LiveAudioExample/Audio/Microphone.swift new file mode 100644 index 000000000..7d182bad6 --- /dev/null +++ b/firebaseai/LiveAudioExample/Audio/Microphone.swift @@ -0,0 +1,62 @@ +// Copyright 2025 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +import Foundation +import AVFoundation + +/// Microphone bindings using Apple's AudioEngine API. +class Microphone { + /// Data recorded from the microphone. + public let audio: AsyncStream + private let audioQueue: AsyncStream.Continuation + + private let inputNode: AVAudioInputNode + private let audioEngine: AVAudioEngine + + private var isRunning = false + + init(engine: AVAudioEngine) { + let (audio, audioQueue) = AsyncStream.makeStream() + + self.audio = audio + self.audioQueue = audioQueue + inputNode = engine.inputNode + audioEngine = engine + } + + deinit { + stop() + } + + public func start() { + guard !isRunning else { return } + isRunning = true + + // 50ms buffer size for balancing latency and cpu overhead + let targetBufferSize = UInt32(inputNode.outputFormat(forBus: 0).sampleRate / 20) + inputNode + .installTap(onBus: 0, bufferSize: targetBufferSize, format: nil) { [weak self] buffer, _ in + guard let self else { return } + audioQueue.yield(buffer) + } + } + + public func stop() { + audioQueue.finish() + if isRunning { + isRunning = false + inputNode.removeTap(onBus: 0) + } + } +} diff --git a/firebaseai/LiveAudioExample/Models/TranscriptLine.swift b/firebaseai/LiveAudioExample/Models/TranscriptLine.swift new file mode 100644 index 000000000..0e9447c9d --- /dev/null +++ b/firebaseai/LiveAudioExample/Models/TranscriptLine.swift @@ -0,0 +1,28 @@ +// Copyright 2025 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +import FirebaseAI +import Foundation + +/// A line of a transcript. +struct TranscriptLine: Identifiable, Equatable { + let id = UUID().uuidString + var message: String = "" + + /// Whether text should be added or not. + /// + /// When a transcript line is final, no further text should be + /// added to `message`. + var isFinal: Bool = false +} diff --git a/firebaseai/LiveAudioExample/Screens/LiveAudioScreen.swift b/firebaseai/LiveAudioExample/Screens/LiveAudioScreen.swift new file mode 100644 index 000000000..9cf8756aa --- /dev/null +++ b/firebaseai/LiveAudioExample/Screens/LiveAudioScreen.swift @@ -0,0 +1,56 @@ +// Copyright 2025 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +import MarkdownUI +import SwiftUI +import GenerativeAIUIComponents +import FirebaseAI +import AVFoundation +import Accelerate +import Charts + +struct LiveAudioScreen: View { + let firebaseService: FirebaseAI + @StateObject var viewModel: LiveViewModel + + init(firebaseService: FirebaseAI, backend: BackendOption) { + self.firebaseService = firebaseService + _viewModel = + StateObject(wrappedValue: LiveViewModel(firebaseService: firebaseService, backend: backend)) + } + + var body: some View { + VStack(spacing: 20) { + ModelPhoto(isConnected: viewModel.state == .connected) + TranscriptView(vm: viewModel.transcriptViewModel) + + Spacer() + if let error = viewModel.error { + LiveErrorView(error: error) + } + ConnectButton( + state: viewModel.state, + onConnect: viewModel.connect, + onDisconnect: viewModel.disconnect + ) + } + .padding() + .navigationTitle("Live Audio") + .background(viewModel.backgroundColor ?? .clear) + } +} + +#Preview { + LiveAudioScreen(firebaseService: FirebaseAI.firebaseAI(), backend: .googleAI) +} diff --git a/firebaseai/LiveAudioExample/ViewModels/LiveViewModel.swift b/firebaseai/LiveAudioExample/ViewModels/LiveViewModel.swift new file mode 100644 index 000000000..84ed1802f --- /dev/null +++ b/firebaseai/LiveAudioExample/ViewModels/LiveViewModel.swift @@ -0,0 +1,295 @@ +// Copyright 2025 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +import FirebaseAI +import Foundation +import OSLog +import AVFoundation +import SwiftUI +import AVKit + +enum LiveViewModelState { + case idle + case connecting + case connected +} + +@MainActor +class LiveViewModel: ObservableObject { + private var logger = Logger(subsystem: Bundle.main.bundleIdentifier!, category: "generative-ai") + + @Published + var error: Error? + + @Published + var state: LiveViewModelState = .idle + + @Published + var transcriptViewModel: TranscriptViewModel = TranscriptViewModel() + + @Published + var backgroundColor: Color? = nil + + private var model: LiveGenerativeModel? + private var liveSession: LiveSession? + + private var audioController: AudioController? + private var microphoneTask = Task {} + + init(firebaseService: FirebaseAI, backend: BackendOption) { + model = firebaseService.liveModel( + modelName: (backend == .googleAI) ? "gemini-live-2.5-flash-preview" : "gemini-2.0-flash-exp", + generationConfig: LiveGenerationConfig( + responseModalities: [.audio], + speech: SpeechConfig(voiceName: "Zephyr", languageCode: "en-US"), + outputAudioTranscription: AudioTranscriptionConfig() + ), + tools: [ + .functionDeclarations([ + FunctionDeclaration( + name: "changeBackgroundColor", + description: "Changes the background color to the specified hex color.", + parameters: [ + "color": .string( + description: "Hex code of the color to change to. (eg, #F54927)" + ), + ], + ), + FunctionDeclaration( + name: "clearBackgroundColor", + description: "Removes the background color.", + parameters: [:] + ), + ]), + ] + ) + } + + /// Start a connection to the model. + /// + /// If a connection is already active, you'll need to call ``LiveViewModel/disconnect()`` first. + func connect() async { + guard let model, state == .idle else { + return + } + + guard await requestRecordPermission() else { + logger.warning("The user denied us permission to record the microphone.") + return + } + + state = .connecting + transcriptViewModel.restart() + + do { + liveSession = try await model.connect() + audioController = try await AudioController() + + try await startRecording() + + state = .connected + try await startProcessingResponses() + } catch { + logger.error("\(String(describing: error))") + self.error = error + await disconnect() + } + } + + /// Disconnects the model. + /// + /// Will stop any pending playback, and the recording of the mic. + func disconnect() async { + await audioController?.stop() + await liveSession?.close() + microphoneTask.cancel() + state = .idle + liveSession = nil + transcriptViewModel.clearPending() + + withAnimation { + backgroundColor = nil + } + } + + /// Starts recording data from the user's microphone, and sends it to the model. + private func startRecording() async throws { + guard let audioController, let liveSession else { return } + + let stream = try await audioController.listenToMic() + microphoneTask = Task { + for await audioBuffer in stream { + await liveSession.sendAudioRealtime(audioBuffer.int16Data()) + } + } + } + + /// Starts queuing responses from the model for parsing. + private func startProcessingResponses() async throws { + guard let liveSession else { return } + + for try await response in liveSession.responses { + await processServerMessage(response) + } + } + + /// Requests permission to record the user's microphone, returning the result. + /// + /// This is a requirement on iOS devices, on top of needing the proper recording + /// intents. + private func requestRecordPermission() async -> Bool { + await withCheckedContinuation { cont in + if #available(iOS 17.0, *) { + Task { + let ok = await AVAudioApplication.requestRecordPermission() + cont.resume(with: .success(ok)) + } + } else { + AVAudioSession.sharedInstance().requestRecordPermission { ok in + cont.resume(with: .success(ok)) + } + } + } + } + + private func processServerMessage(_ message: LiveServerMessage) async { + switch message.payload { + case let .content(content): + await processServerContent(content) + case let .toolCall(toolCall): + await processFunctionCalls(functionCalls: toolCall.functionCalls ?? []) + case .toolCallCancellation: + // we don't have any long running functions to cancel + return + case let .goingAwayNotice(goingAwayNotice): + let time = goingAwayNotice.timeLeft?.description ?? "soon" + logger.warning("Going away in: \(time)") + } + } + + private func processServerContent(_ content: LiveServerContent) async { + if let message = content.modelTurn { + await processAudioMessages(message) + } + + if content.isTurnComplete { + // add a space, so the next time a transcript comes in, it's not squished with the previous one + transcriptViewModel.appendTranscript(" ") + } + + if content.wasInterrupted { + logger.warning("Model was interrupted") + await audioController?.interrupt() + transcriptViewModel.clearPending() + // adds an em dash to indiciate that the model was cutoff + transcriptViewModel.appendTranscript("— ") + } else if let transcript = content.outputAudioTranscription?.text { + appendAudioTranscript(transcript) + } + } + + private func processAudioMessages(_ content: ModelContent) async { + for part in content.parts { + if let part = part as? InlineDataPart { + if part.mimeType.starts(with: "audio/pcm") { + await audioController?.playAudio(audio: part.data) + } else { + logger.warning("Received non audio inline data part: \(part.mimeType)") + } + } + } + } + + private func processFunctionCalls(functionCalls: [FunctionCallPart]) async { + let responses = functionCalls.map { functionCall in + switch functionCall.name { + case "changeBackgroundColor": + return changeBackgroundColor(args: functionCall.args, id: functionCall.functionId) + case "clearBackgroundColor": + return clearBackgroundColor(id: functionCall.functionId) + default: + logger.debug("Function call: \(String(describing: functionCall))") + fatalError("Unknown function named \"\(functionCall.name)\".") + } + } + + await liveSession?.sendFunctionResponses(responses) + } + + private func appendAudioTranscript(_ transcript: String) { + transcriptViewModel.appendTranscript(transcript) + } + + private func changeBackgroundColor(args: JSONObject, id: String?) -> FunctionResponsePart { + guard case let .string(color) = args["color"] else { + logger.debug("Function arguments: \(String(describing: args))") + fatalError("Missing `color` parameter.") + } + + withAnimation { + backgroundColor = Color(hex: color) + } + + if backgroundColor == nil { + logger.warning("The model sent us an invalid hex color: \(color)") + } + + return FunctionResponsePart( + name: "changeBackgroundColor", + response: JSONObject(), + functionId: id + ) + } + + private func clearBackgroundColor(id: String?) -> FunctionResponsePart { + withAnimation { + backgroundColor = nil + } + + return FunctionResponsePart( + name: "clearBackgroundColor", + response: JSONObject(), + functionId: id + ) + } +} + +extension Color { + /// Creates a new `Color` instance from a hex string. + /// + /// Supports both RGB and RGBA hex strings. + init?(hex: String) { + let hex = hex.replacingOccurrences(of: "#", with: "").uppercased() + + var rgb: UInt64 = 0 + guard Scanner(string: hex).scanHexInt64(&rgb) else { return nil } + + var r: CGFloat = 0, g: CGFloat = 0, b: CGFloat = 0, a: CGFloat = 1 + + if hex.count == 6 { + r = CGFloat((rgb & 0xFF0000) >> 16) / 255.0 + g = CGFloat((rgb & 0x00FF00) >> 8) / 255.0 + b = CGFloat(rgb & 0x0000FF) / 255.0 + } else if hex.count == 8 { + r = CGFloat((rgb & 0xFF00_0000) >> 24) / 255.0 + g = CGFloat((rgb & 0x00FF_0000) >> 16) / 255.0 + b = CGFloat((rgb & 0x0000_FF00) >> 8) / 255.0 + a = CGFloat(rgb & 0x0000_00FF) / 255.0 + } else { + return nil + } + + self.init(red: r, green: g, blue: b, opacity: a) + } +} diff --git a/firebaseai/LiveAudioExample/ViewModels/TranscriptViewModel.swift b/firebaseai/LiveAudioExample/ViewModels/TranscriptViewModel.swift new file mode 100644 index 000000000..4b4c81214 --- /dev/null +++ b/firebaseai/LiveAudioExample/ViewModels/TranscriptViewModel.swift @@ -0,0 +1,150 @@ +// Copyright 2025 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +import SwiftUI +import Foundation + +/// How long to wait (in milliseconds) between showing the next character. +private let CharDelayMS = 65 + +/// The intended amount of characters in a line. +/// +/// Can exceed this if the line doesn't end in a space or punctuation. +private let LineCharacterLength = 20 + +/// The max amount of lines to hold references for at a time. +private let MaxLines = 3 + +/// Creates lines of transcripts to display, populated in a type-writer manner. +@MainActor +class TranscriptViewModel: ObservableObject { + /// Lines of characters to display. + @Published + var audioTranscripts: [TranscriptLine] = [] + + private var pendingText = [Character]() + private var processTextTask: Task? + + init() { + processTask() + } + + deinit { + processTextTask?.cancel() + } + + /// Queues text to show. + /// + /// Since the text is queued, the text wont be displayed until the previous + /// pending text is populated. + func appendTranscript(_ text: String) { + pendingText.append(contentsOf: text) + } + + /// Clears any text from the queue that is pending being added to a transcript line. + func clearPending() { + pendingText.removeAll() + } + + /// Restarts the class to be a fresh instance. + /// + /// Effectively, this removes all the currently tracked transcript lines, + /// and any pending text. + func restart() { + clearPending() + audioTranscripts.removeAll() + } + + /// Long running task for processing characters. + private func processTask() { + processTextTask = Task { + var delay = CharDelayMS + while !Task.isCancelled { + try? await Task.sleep(for: .milliseconds(delay)) + + delay = processNextCharacter() + } + } + } + + private func processNextCharacter() -> Int { + guard !pendingText.isEmpty else { + return CharDelayMS // Default delay if no text is pending + } + + let char = pendingText.removeFirst() + var line = popCurrentLine() + line.message.append(char) + + let nextDelay = determineNextDelayAndFinalize(for: char, in: &line) + + updateTranscripts(with: line) + + return nextDelay + } + + /// Determines the delay for the next character, finalizing the line as needed. + /// + /// We don't have a delay when outputting whitespace or the end of a sentence. + /// + /// We also don't mark a line as "complete" unless it ends in whitespace or some + /// punctuation; as this helps avoid weird situations where words are split across lines. + /// + /// - Returns: The MS delay before working on the next character in the queue. + private func determineNextDelayAndFinalize(for char: Character, + in line: inout TranscriptLine) -> Int { + if char.isWhitespace || char.isEndOfSentence { + if line.message.count >= LineCharacterLength { + line.isFinal = true + } + + return 0 + } + + return CharDelayMS + } + + /// Updates `audioTranscripts` with the current line. + /// + /// Will remove the oldest line if we exceed `MaxLines`. + private func updateTranscripts(with line: TranscriptLine) { + audioTranscripts.append(line) + + if audioTranscripts.count > MaxLines { + // fade out the removal; makes it less jumpy during rendering when lines are moved up + withAnimation { + _ = audioTranscripts.removeFirst() + } + } + } + + /// Removes the last line from `audioTranscripts`. + /// + /// If the last line is already finalized, a new line will be returned instead. + private func popCurrentLine() -> TranscriptLine { + if audioTranscripts.last?.isFinal != false { + return TranscriptLine() + } + return audioTranscripts.removeLast() + } +} + +extension Character { + /// Marker for punctuation that dictates the end of a sentence. + /// + /// Namely, this checks for `.`, `!` and `?`. + var isEndOfSentence: Bool { + self == "." || self == "!" || self == "?" + } +} diff --git a/firebaseai/LiveAudioExample/Views/ConnectButton.swift b/firebaseai/LiveAudioExample/Views/ConnectButton.swift new file mode 100644 index 000000000..48189ce36 --- /dev/null +++ b/firebaseai/LiveAudioExample/Views/ConnectButton.swift @@ -0,0 +1,103 @@ +// Copyright 2025 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +import SwiftUI + +struct ConnectButton: View { + var state: LiveViewModelState + var onConnect: () async -> Void + var onDisconnect: () async -> Void + + @State private var gradientAngle: Angle = .zero + + private var isConnected: Bool { state == .connected } + + private var title: String { + switch state { + case .connected: "Stop" + case .connecting: "Connecting..." + case .idle: "Start" + } + } + + private var image: String { + switch state { + case .connected: "stop.fill" + case .connecting: "wifi.square.fill" + case .idle: "play.square.fill" + } + } + + private var color: Color { + switch state { + case .connected: Color.red + case .connecting: Color.secondary + case .idle: Color.accentColor + } + } + + private var gradientColors: [Color] { + switch state { + case .connected: [] + case .connecting: [.secondary, .white] + case .idle: [.red, .blue, .green, .yellow, .red] + } + } + + var body: some View { + Button(action: onClick) { + Label(title, systemImage: image) + .font(.title2.bold()) + .frame(maxWidth: .infinity) + .padding() + .background(color) + .foregroundStyle(.white) + .clipShape(RoundedRectangle(cornerRadius: 12)) + }.disabled(state == .connecting).overlay( + RoundedRectangle(cornerRadius: 12) + .stroke( + AngularGradient( + gradient: Gradient(colors: gradientColors), + center: .center, + startAngle: gradientAngle, + endAngle: gradientAngle + .degrees(360) + ), + lineWidth: 3 + ) + ) + .onAppear { + withAnimation(.linear(duration: 5).repeatForever(autoreverses: false)) { + self.gradientAngle = .degrees(360) + } + } + } + + private func onClick() { + Task { + if isConnected { + await onDisconnect() + } else { + await onConnect() + } + } + } +} + +#Preview { + VStack(spacing: 30) { + ConnectButton(state: .idle, onConnect: {}, onDisconnect: {}) + ConnectButton(state: .connecting, onConnect: {}, onDisconnect: {}) + ConnectButton(state: .connected, onConnect: {}, onDisconnect: {}) + }.padding(.horizontal) +} diff --git a/firebaseai/LiveAudioExample/Views/LiveErrorDetailsView.swift b/firebaseai/LiveAudioExample/Views/LiveErrorDetailsView.swift new file mode 100644 index 000000000..4f325d774 --- /dev/null +++ b/firebaseai/LiveAudioExample/Views/LiveErrorDetailsView.swift @@ -0,0 +1,85 @@ +// Copyright 2025 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +@testable import FirebaseAI +import MarkdownUI +import SwiftUI + +struct LiveErrorDetailsView: View { + var error: Error + + var body: some View { + NavigationView { + Form { + if let title = error.title { + Section("Error type") { + Text(title) + } + } + + Section("Details") { + SubtitleFormRow(title: "Error description", value: error.localizedDescription) + } + } + .navigationTitle("Error details") + .navigationBarTitleDisplayMode(.inline) + } + } +} + +private struct SubtitleFormRow: View { + var title: String + var value: String + + var body: some View { + VStack(alignment: .leading) { + Text(title).font(.subheadline) + Text(value) + } + } +} + +private extension Error { + var title: String? { + switch self { + case _ as LiveSessionSetupError: + "Failed to set up live session" + case _ as LiveSessionLostConnectionError: + "Lost connection to the model" + case _ as LiveSessionUnexpectedClosureError: + "Session was closed" + case _ as LiveSessionUnsupportedMessageError: + "Unsupported model message" + default: + nil + } + } +} + +#Preview("Live error") { + let cause = NSError(domain: "network.api", code: 1, userInfo: [ + NSLocalizedDescriptionKey: "Network timed out.", + ]) + let error = LiveSessionLostConnectionError(underlyingError: cause) + + LiveErrorDetailsView(error: error) +} + +#Preview("Unexpected error") { + let error = NSError(domain: "network.api", code: 1, userInfo: [ + NSLocalizedDescriptionKey: "Network timed out.", + ]) + + LiveErrorDetailsView(error: error) +} diff --git a/firebaseai/LiveAudioExample/Views/LiveErrorView.swift b/firebaseai/LiveAudioExample/Views/LiveErrorView.swift new file mode 100644 index 000000000..8e3aafaa2 --- /dev/null +++ b/firebaseai/LiveAudioExample/Views/LiveErrorView.swift @@ -0,0 +1,44 @@ +// Copyright 2025 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +@testable import FirebaseAI +import SwiftUI + +struct LiveErrorView: View { + var error: Error + @State private var isDetailsSheetPresented = false + + var body: some View { + HStack { + Text("An error occurred.") + Button(action: { isDetailsSheetPresented.toggle() }) { + Image(systemName: "info.circle") + }.foregroundStyle(.red) + } + .frame(maxWidth: .infinity, alignment: .center) + .listRowSeparator(.hidden) + .sheet(isPresented: $isDetailsSheetPresented) { + LiveErrorDetailsView(error: error) + } + } +} + +#Preview { + let cause = NSError(domain: "network.api", code: 1, userInfo: [ + NSLocalizedDescriptionKey: "Network timed out.", + ]) + let error = LiveSessionLostConnectionError(underlyingError: cause) + + LiveErrorView(error: error) +} diff --git a/firebaseai/LiveAudioExample/Views/ModelPhoto.swift b/firebaseai/LiveAudioExample/Views/ModelPhoto.swift new file mode 100644 index 000000000..a2e8a427e --- /dev/null +++ b/firebaseai/LiveAudioExample/Views/ModelPhoto.swift @@ -0,0 +1,65 @@ +// Copyright 2025 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +import SwiftUI + +struct ModelPhoto: View { + var isConnected = false + + @State private var gradientAngle: Angle = .zero + + var colors: [Color] { + if isConnected { + [.red, .blue, .green, .yellow, .red] + } else { + [Color(red: 0.5, green: 0.5, blue: 0.5, opacity: 0.3)] + } + } + + var body: some View { + Image("gemini-logo").resizable().aspectRatio(contentMode: .fit).padding().colorMultiply(.black) + .maskedOverlay { + AngularGradient( + gradient: Gradient(colors: colors), + center: .leading, + startAngle: gradientAngle, + endAngle: gradientAngle + .degrees(360) + ) + }.onAppear { + withAnimation(.linear(duration: 10).repeatForever(autoreverses: false)) { + self.gradientAngle = .degrees(360) + } + } + } +} + +extension View { + /// Creates an overlay which takes advantage of a mask to respect the size of the view. + /// + /// Especially useful when you want to create an overlay of an view with a non standard + /// size. + @ViewBuilder + func maskedOverlay(mask: () -> some View) -> some View { + overlay { + mask().mask { self } + } + } +} + +#Preview { + VStack { + ModelPhoto(isConnected: true) + ModelPhoto(isConnected: false) + } +} diff --git a/firebaseai/LiveAudioExample/Views/TranscriptView.swift b/firebaseai/LiveAudioExample/Views/TranscriptView.swift new file mode 100644 index 000000000..aa151586c --- /dev/null +++ b/firebaseai/LiveAudioExample/Views/TranscriptView.swift @@ -0,0 +1,42 @@ +// Copyright 2025 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +import SwiftUI + +struct TranscriptView: View { + @ObservedObject var vm: TranscriptViewModel + + var body: some View { + VStack { + ForEach(vm.audioTranscripts) { transcript in + Text(transcript.message) + .bold() + .font(.title) + .frame(maxWidth: .infinity, alignment: .leading) + .transition(.opacity) + .padding(.horizontal) + } + } + } +} + +#Preview { + let vm = TranscriptViewModel() + TranscriptView(vm: vm).onAppear { + vm + .appendTranscript( + "The sky is blue primarily because of a phenomenon called Rayleigh scattering, where tiny molecules of gas (mainly nitrogen and oxygen) in Earth's atmosphere scatter sunlight in all directions." + ) + } +}