diff --git a/AudioCap.xcodeproj/project.pbxproj b/AudioCap.xcodeproj/project.pbxproj index 629b31c..471b220 100644 --- a/AudioCap.xcodeproj/project.pbxproj +++ b/AudioCap.xcodeproj/project.pbxproj @@ -7,34 +7,40 @@ objects = { /* Begin PBXBuildFile section */ + CC537FC52E09E7CA00503A96 /* RealtimeAudioMonitor.swift in Sources */ = {isa = PBXBuildFile; fileRef = CC537FC42E09E7CA00503A96 /* RealtimeAudioMonitor.swift */; }; + CC537FC72E09E7D500503A96 /* VoiceActivityDetector.swift in Sources */ = {isa = PBXBuildFile; fileRef = CC537FC62E09E7D500503A96 /* VoiceActivityDetector.swift */; }; + CC98B1A62E17370900A4EDAB /* ProcessSelectionView.swift in Sources */ = {isa = PBXBuildFile; fileRef = CC98B1A02E17370900A4EDAB /* ProcessSelectionView.swift */; }; + CC98B1A72E17370900A4EDAB /* RootView.swift in Sources */ = {isa = PBXBuildFile; fileRef = CC98B1A42E17370900A4EDAB /* RootView.swift */; }; + CC98B1A82E17370900A4EDAB /* RecordingIndicator.swift in Sources */ = {isa = PBXBuildFile; fileRef = CC98B1A22E17370900A4EDAB /* RecordingIndicator.swift */; }; + CC98B1A92E17370900A4EDAB /* FileProxyView.swift in Sources */ = {isa = PBXBuildFile; fileRef = CC98B19F2E17370900A4EDAB /* FileProxyView.swift */; }; + CC98B1AA2E17370900A4EDAB /* RecordingView.swift in Sources */ = {isa = PBXBuildFile; fileRef = CC98B1A32E17370900A4EDAB /* RecordingView.swift */; }; + CC98B1AB2E17370900A4EDAB /* RealtimeVADView.swift in Sources */ = {isa = PBXBuildFile; fileRef = CC98B1A12E17370900A4EDAB /* RealtimeVADView.swift */; }; F431728D2BF68C0C00D918A3 /* AudioRecordingPermission.swift in Sources */ = {isa = PBXBuildFile; fileRef = F431728C2BF68C0C00D918A3 /* AudioRecordingPermission.swift */; }; F43172902BF6A92900D918A3 /* AudioProcessController.swift in Sources */ = {isa = PBXBuildFile; fileRef = F431728F2BF6A92900D918A3 /* AudioProcessController.swift */; }; - F43172922BF6ABB000D918A3 /* ProcessSelectionView.swift in Sources */ = {isa = PBXBuildFile; fileRef = F43172912BF6ABB000D918A3 /* ProcessSelectionView.swift */; }; F43172942BF6B01000D918A3 /* ProcessTap.swift in Sources */ = {isa = PBXBuildFile; fileRef = F43172932BF6B01000D918A3 /* ProcessTap.swift */; }; F43172962BF787C300D918A3 /* CoreAudioUtils.swift in Sources */ = {isa = PBXBuildFile; fileRef = F43172952BF787C300D918A3 /* CoreAudioUtils.swift */; }; - F431729A2BF7A51A00D918A3 /* RecordingView.swift in Sources */ = {isa = PBXBuildFile; fileRef = F43172992BF7A51A00D918A3 /* RecordingView.swift */; }; - F431729C2BF7B66A00D918A3 /* FileProxyView.swift in Sources */ = {isa = PBXBuildFile; fileRef = F431729B2BF7B66A00D918A3 /* FileProxyView.swift */; }; - F43172A12BF7BF1300D918A3 /* RecordingIndicator.swift in Sources */ = {isa = PBXBuildFile; fileRef = F43172A02BF7BF1300D918A3 /* RecordingIndicator.swift */; }; F47AD9422BF5B61E005B75AC /* AudioCapApp.swift in Sources */ = {isa = PBXBuildFile; fileRef = F47AD9412BF5B61E005B75AC /* AudioCapApp.swift */; }; - F47AD9442BF5B61E005B75AC /* RootView.swift in Sources */ = {isa = PBXBuildFile; fileRef = F47AD9432BF5B61E005B75AC /* RootView.swift */; }; F47AD9462BF5B61F005B75AC /* Assets.xcassets in Resources */ = {isa = PBXBuildFile; fileRef = F47AD9452BF5B61F005B75AC /* Assets.xcassets */; }; F47AD9492BF5B61F005B75AC /* Preview Assets.xcassets in Resources */ = {isa = PBXBuildFile; fileRef = F47AD9482BF5B61F005B75AC /* Preview Assets.xcassets */; }; /* End PBXBuildFile section */ /* Begin PBXFileReference section */ + CC537FC42E09E7CA00503A96 /* RealtimeAudioMonitor.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = RealtimeAudioMonitor.swift; sourceTree = ""; }; + CC537FC62E09E7D500503A96 /* VoiceActivityDetector.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = VoiceActivityDetector.swift; sourceTree = ""; }; + CC98B19F2E17370900A4EDAB /* FileProxyView.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = FileProxyView.swift; sourceTree = ""; }; + CC98B1A02E17370900A4EDAB /* ProcessSelectionView.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = ProcessSelectionView.swift; sourceTree = ""; }; + CC98B1A12E17370900A4EDAB /* RealtimeVADView.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = RealtimeVADView.swift; sourceTree = ""; }; + CC98B1A22E17370900A4EDAB /* RecordingIndicator.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = RecordingIndicator.swift; sourceTree = ""; }; + CC98B1A32E17370900A4EDAB /* RecordingView.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = RecordingView.swift; sourceTree = ""; }; + CC98B1A42E17370900A4EDAB /* RootView.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = RootView.swift; sourceTree = ""; }; F431728C2BF68C0C00D918A3 /* AudioRecordingPermission.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = AudioRecordingPermission.swift; sourceTree = ""; }; F431728E2BF691D900D918A3 /* Info.plist */ = {isa = PBXFileReference; lastKnownFileType = text.plist; path = Info.plist; sourceTree = ""; }; F431728F2BF6A92900D918A3 /* AudioProcessController.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = AudioProcessController.swift; sourceTree = ""; }; - F43172912BF6ABB000D918A3 /* ProcessSelectionView.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = ProcessSelectionView.swift; sourceTree = ""; }; F43172932BF6B01000D918A3 /* ProcessTap.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = ProcessTap.swift; sourceTree = ""; }; F43172952BF787C300D918A3 /* CoreAudioUtils.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = CoreAudioUtils.swift; sourceTree = ""; }; - F43172992BF7A51A00D918A3 /* RecordingView.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = RecordingView.swift; sourceTree = ""; }; - F431729B2BF7B66A00D918A3 /* FileProxyView.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = FileProxyView.swift; sourceTree = ""; }; F431729E2BF7BB0E00D918A3 /* Main.xcconfig */ = {isa = PBXFileReference; lastKnownFileType = text.xcconfig; path = Main.xcconfig; sourceTree = ""; }; - F43172A02BF7BF1300D918A3 /* RecordingIndicator.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = RecordingIndicator.swift; sourceTree = ""; }; F47AD93E2BF5B61E005B75AC /* AudioCap.app */ = {isa = PBXFileReference; explicitFileType = wrapper.application; includeInIndex = 0; path = AudioCap.app; sourceTree = BUILT_PRODUCTS_DIR; }; F47AD9412BF5B61E005B75AC /* AudioCapApp.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = AudioCapApp.swift; sourceTree = ""; }; - F47AD9432BF5B61E005B75AC /* RootView.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = RootView.swift; sourceTree = ""; }; F47AD9452BF5B61F005B75AC /* Assets.xcassets */ = {isa = PBXFileReference; lastKnownFileType = folder.assetcatalog; path = Assets.xcassets; sourceTree = ""; }; F47AD9482BF5B61F005B75AC /* Preview Assets.xcassets */ = {isa = PBXFileReference; lastKnownFileType = folder.assetcatalog; path = "Preview Assets.xcassets"; sourceTree = ""; }; F47AD94A2BF5B61F005B75AC /* AudioCap.entitlements */ = {isa = PBXFileReference; lastKnownFileType = text.plist.entitlements; path = AudioCap.entitlements; sourceTree = ""; }; @@ -51,6 +57,19 @@ /* End PBXFrameworksBuildPhase section */ /* Begin PBXGroup section */ + CC98B1A52E17370900A4EDAB /* Views */ = { + isa = PBXGroup; + children = ( + CC98B19F2E17370900A4EDAB /* FileProxyView.swift */, + CC98B1A02E17370900A4EDAB /* ProcessSelectionView.swift */, + CC98B1A12E17370900A4EDAB /* RealtimeVADView.swift */, + CC98B1A22E17370900A4EDAB /* RecordingIndicator.swift */, + CC98B1A32E17370900A4EDAB /* RecordingView.swift */, + CC98B1A42E17370900A4EDAB /* RootView.swift */, + ); + path = Views; + sourceTree = ""; + }; F431729D2BF7BB0600D918A3 /* Config */ = { isa = PBXGroup; children = ( @@ -64,6 +83,8 @@ children = ( F431728C2BF68C0C00D918A3 /* AudioRecordingPermission.swift */, F431728F2BF6A92900D918A3 /* AudioProcessController.swift */, + CC537FC42E09E7CA00503A96 /* RealtimeAudioMonitor.swift */, + CC537FC62E09E7D500503A96 /* VoiceActivityDetector.swift */, F43172932BF6B01000D918A3 /* ProcessTap.swift */, F43172952BF787C300D918A3 /* CoreAudioUtils.swift */, ); @@ -92,11 +113,7 @@ F431729D2BF7BB0600D918A3 /* Config */, F431729F2BF7BD4700D918A3 /* ProcessTap */, F47AD9412BF5B61E005B75AC /* AudioCapApp.swift */, - F47AD9432BF5B61E005B75AC /* RootView.swift */, - F43172912BF6ABB000D918A3 /* ProcessSelectionView.swift */, - F43172992BF7A51A00D918A3 /* RecordingView.swift */, - F431729B2BF7B66A00D918A3 /* FileProxyView.swift */, - F43172A02BF7BF1300D918A3 /* RecordingIndicator.swift */, + CC98B1A52E17370900A4EDAB /* Views */, F431728E2BF691D900D918A3 /* Info.plist */, F47AD9452BF5B61F005B75AC /* Assets.xcassets */, F47AD94A2BF5B61F005B75AC /* AudioCap.entitlements */, @@ -141,7 +158,7 @@ attributes = { BuildIndependentTargetsInParallel = 1; LastSwiftUpdateCheck = 1530; - LastUpgradeCheck = 1530; + LastUpgradeCheck = 1640; TargetAttributes = { F47AD93D2BF5B61E005B75AC = { CreatedOnToolsVersion = 15.3; @@ -183,16 +200,19 @@ isa = PBXSourcesBuildPhase; buildActionMask = 2147483647; files = ( + CC98B1A62E17370900A4EDAB /* ProcessSelectionView.swift in Sources */, + CC98B1A72E17370900A4EDAB /* RootView.swift in Sources */, + CC98B1A82E17370900A4EDAB /* RecordingIndicator.swift in Sources */, + CC98B1A92E17370900A4EDAB /* FileProxyView.swift in Sources */, + CC98B1AA2E17370900A4EDAB /* RecordingView.swift in Sources */, + CC98B1AB2E17370900A4EDAB /* RealtimeVADView.swift in Sources */, + CC537FC52E09E7CA00503A96 /* RealtimeAudioMonitor.swift in Sources */, F43172902BF6A92900D918A3 /* AudioProcessController.swift in Sources */, F43172942BF6B01000D918A3 /* ProcessTap.swift in Sources */, - F43172922BF6ABB000D918A3 /* ProcessSelectionView.swift in Sources */, - F47AD9442BF5B61E005B75AC /* RootView.swift in Sources */, + CC537FC72E09E7D500503A96 /* VoiceActivityDetector.swift in Sources */, F47AD9422BF5B61E005B75AC /* AudioCapApp.swift in Sources */, F431728D2BF68C0C00D918A3 /* AudioRecordingPermission.swift in Sources */, - F431729A2BF7A51A00D918A3 /* RecordingView.swift in Sources */, - F43172A12BF7BF1300D918A3 /* RecordingIndicator.swift in Sources */, F43172962BF787C300D918A3 /* CoreAudioUtils.swift in Sources */, - F431729C2BF7B66A00D918A3 /* FileProxyView.swift in Sources */, ); runOnlyForDeploymentPostprocessing = 0; }; @@ -234,7 +254,9 @@ CLANG_WARN_UNREACHABLE_CODE = YES; CLANG_WARN__DUPLICATE_METHOD_MATCH = YES; COPY_PHASE_STRIP = NO; + DEAD_CODE_STRIPPING = YES; DEBUG_INFORMATION_FORMAT = dwarf; + DEVELOPMENT_TEAM = BD72FKWLAY; ENABLE_STRICT_OBJC_MSGSEND = YES; ENABLE_TESTABILITY = YES; ENABLE_USER_SCRIPT_SANDBOXING = YES; @@ -299,7 +321,9 @@ CLANG_WARN_UNREACHABLE_CODE = YES; CLANG_WARN__DUPLICATE_METHOD_MATCH = YES; COPY_PHASE_STRIP = NO; + DEAD_CODE_STRIPPING = YES; DEBUG_INFORMATION_FORMAT = "dwarf-with-dsym"; + DEVELOPMENT_TEAM = BD72FKWLAY; ENABLE_NS_ASSERTIONS = NO; ENABLE_STRICT_OBJC_MSGSEND = YES; ENABLE_USER_SCRIPT_SANDBOXING = YES; @@ -328,8 +352,8 @@ CODE_SIGN_ENTITLEMENTS = AudioCap/AudioCap.entitlements; CODE_SIGN_STYLE = Automatic; COMBINE_HIDPI_IMAGES = YES; + DEAD_CODE_STRIPPING = YES; DEVELOPMENT_ASSET_PATHS = "\"AudioCap/Preview Content\""; - DEVELOPMENT_TEAM = 8C7439RJLG; ENABLE_HARDENED_RUNTIME = YES; ENABLE_PREVIEWS = YES; GENERATE_INFOPLIST_FILE = YES; @@ -354,8 +378,8 @@ CODE_SIGN_ENTITLEMENTS = AudioCap/AudioCap.entitlements; CODE_SIGN_STYLE = Automatic; COMBINE_HIDPI_IMAGES = YES; + DEAD_CODE_STRIPPING = YES; DEVELOPMENT_ASSET_PATHS = "\"AudioCap/Preview Content\""; - DEVELOPMENT_TEAM = 8C7439RJLG; ENABLE_HARDENED_RUNTIME = YES; ENABLE_PREVIEWS = YES; GENERATE_INFOPLIST_FILE = YES; diff --git a/AudioCap.xcodeproj/xcshareddata/xcschemes/AudioCap.xcscheme b/AudioCap.xcodeproj/xcshareddata/xcschemes/AudioCap.xcscheme index c985a06..1f546fc 100644 --- a/AudioCap.xcodeproj/xcshareddata/xcschemes/AudioCap.xcscheme +++ b/AudioCap.xcodeproj/xcshareddata/xcschemes/AudioCap.xcscheme @@ -1,6 +1,6 @@ 0 else { + logger.warning("Invalid audio buffer received") + return + } + + let frameLength = Int(buffer.frameLength) + let channelCount = Int(buffer.format.channelCount) + let sampleRate = buffer.format.sampleRate + + // Convert to mono if stereo (mix channels) + var monoSamples: [Float] + if channelCount == 1 { + monoSamples = Array(UnsafeBufferPointer(start: floatChannelData[0], count: frameLength)) + } else { + // Mix stereo to mono + monoSamples = [] + monoSamples.reserveCapacity(frameLength) + + for i in 0..= actualFrameSize { + let frameData = Array(accumulatedSamples.prefix(actualFrameSize)) + accumulatedSamples.removeFirst(actualFrameSize) + + analyzeFrame(frameData, sampleRate: sampleRate) + } + } + + /// Analyze a single frame of audio data + private func analyzeFrame(_ samples: [Float], sampleRate: Double) { + let rms = calculateRMS(samples) + let isSpeech = rms > rmsThreshold + let timestamp = CFAbsoluteTimeGetCurrent() + + // Update current state + currentRMS = rms + isSpeechDetected = isSpeech + totalFramesProcessed += 1 + + if isSpeech { + speechFrameCount += 1 + } + + // Create analysis result + let analysis = AudioFrameAnalysis( + timestamp: timestamp, + rmsValue: rms, + isSpeech: isSpeech, + frameSize: samples.count, + sampleRate: sampleRate + ) + + // Add to recent frames (keep only latest frames) + recentFrames.append(analysis) + if recentFrames.count > maxRecentFrames { + recentFrames.removeFirst() + } + + logger.debug("Frame analyzed: \(analysis.description)") + } + + /// Calculate RMS (Root Mean Square) of audio samples + private func calculateRMS(_ samples: [Float]) -> Float { + guard !samples.isEmpty else { return 0.0 } + + var rms: Float = 0.0 + + // Use Accelerate framework for efficient computation + vDSP_rmsqv(samples, 1, &rms, vDSP_Length(samples.count)) + + return rms + } + + /// Reset analysis state + func reset() { + accumulatedSamples.removeAll() + recentFrames.removeAll() + currentRMS = 0.0 + isSpeechDetected = false + totalFramesProcessed = 0 + speechFrameCount = 0 + logger.debug("VAD state reset") + } + + /// Get speech activity percentage + var speechActivityPercentage: Float { + guard totalFramesProcessed > 0 else { return 0.0 } + return Float(speechFrameCount) / Float(totalFramesProcessed) * 100.0 + } +} \ No newline at end of file diff --git a/AudioCap/FileProxyView.swift b/AudioCap/Views/FileProxyView.swift similarity index 100% rename from AudioCap/FileProxyView.swift rename to AudioCap/Views/FileProxyView.swift diff --git a/AudioCap/ProcessSelectionView.swift b/AudioCap/Views/ProcessSelectionView.swift similarity index 52% rename from AudioCap/ProcessSelectionView.swift rename to AudioCap/Views/ProcessSelectionView.swift index b9262c4..8e4667d 100644 --- a/AudioCap/ProcessSelectionView.swift +++ b/AudioCap/Views/ProcessSelectionView.swift @@ -5,6 +5,7 @@ struct ProcessSelectionView: View { @State private var processController = AudioProcessController() @State private var tap: ProcessTap? @State private var recorder: ProcessTapRecorder? + @State private var realtimeMonitor: RealtimeAudioMonitor? @State private var selectedProcess: AudioProcess? @@ -39,8 +40,10 @@ struct ProcessSelectionView: View { if let newValue { setupRecording(for: newValue) + setupRealtimeMonitoring(for: newValue) } else if oldValue == tap?.process { teardownTap() + teardownRealtimeMonitoring() } } } header: { @@ -63,6 +66,69 @@ struct ProcessSelectionView: View { } } } + + // Real-time VAD monitoring section + if let monitor = realtimeMonitor { + Section { + VStack(spacing: 12) { + HStack { + if monitor.isMonitoring { + Button("Stop Real-time Analysis") { + monitor.stopMonitoring() + } + .buttonStyle(.bordered) + } else { + Button("Start Real-time Analysis") { + handlingErrors { + try monitor.startMonitoring() + } + } + .buttonStyle(.borderedProminent) + } + + Spacer() + + if monitor.isMonitoring { + HStack { + Circle() + .fill(Color.green) + .frame(width: 8, height: 8) + .opacity(0.8) + .animation(.easeInOut(duration: 1.0).repeatForever(autoreverses: true), value: monitor.isMonitoring) + + Text("Live") + .font(.caption) + .foregroundColor(.green) + } + } + } + + if monitor.isMonitoring { + RealtimeVADView(vad: monitor.voiceActivityDetector) + } else { + Text("Real-time voice activity detection analyzes audio in 4800-sample frames (0.1s at 48kHz) using RMS threshold of 0.01") + .font(.caption) + .foregroundColor(.secondary) + .multilineTextAlignment(.center) + .padding(.vertical, 8) + } + } + } header: { + HStack { + Image(systemName: "waveform.badge.magnifyingglass") + .foregroundColor(.blue) + Text("Real-time Analysis") + .font(.headline) + } + } + + if let errorMessage = monitor.errorMessage { + Text(errorMessage) + .font(.caption) + .foregroundStyle(.red) + .padding(.top, 4) + } + } } private func setupRecording(for process: AudioProcess) { @@ -86,6 +152,24 @@ struct ProcessSelectionView: View { private func teardownTap() { tap = nil } + + private func setupRealtimeMonitoring(for process: AudioProcess) { + let monitor = RealtimeAudioMonitor(process: process) + self.realtimeMonitor = monitor + } + + private func teardownRealtimeMonitoring() { + realtimeMonitor?.stopMonitoring() + realtimeMonitor = nil + } + + private func handlingErrors(perform block: () throws -> Void) { + do { + try block() + } catch { + NSAlert(error: error).runModal() + } + } } extension URL { diff --git a/AudioCap/Views/RealtimeVADView.swift b/AudioCap/Views/RealtimeVADView.swift new file mode 100644 index 0000000..2cb161f --- /dev/null +++ b/AudioCap/Views/RealtimeVADView.swift @@ -0,0 +1,174 @@ +import SwiftUI + +@MainActor +struct RealtimeVADView: View { + let vad: VoiceActivityDetector + + var body: some View { + Section { + VStack(alignment: .leading, spacing: 12) { + // Current Status + HStack { + Circle() + .fill(vad.isSpeechDetected ? Color.green : Color.gray) + .frame(width: 12, height: 12) + .animation(.easeInOut(duration: 0.2), value: vad.isSpeechDetected) + + Text(vad.isSpeechDetected ? "Speech Detected" : "No Speech") + .font(.headline) + .foregroundColor(vad.isSpeechDetected ? .green : .secondary) + + Spacer() + + Text("RMS: \(String(format: "%.4f", vad.currentRMS))") + .font(.caption) + .monospaced() + .foregroundColor(.secondary) + } + + // Statistics + HStack { + VStack(alignment: .leading) { + Text("Frames Processed") + .font(.caption) + .foregroundColor(.secondary) + Text("\(vad.totalFramesProcessed)") + .font(.title3) + .fontWeight(.medium) + } + + Spacer() + + VStack(alignment: .trailing) { + Text("Speech Activity") + .font(.caption) + .foregroundColor(.secondary) + Text("\(String(format: "%.1f", vad.speechActivityPercentage))%") + .font(.title3) + .fontWeight(.medium) + .foregroundColor(vad.speechActivityPercentage > 0 ? .green : .secondary) + } + } + .padding(.vertical, 4) + + // RMS Level Indicator + VStack(alignment: .leading, spacing: 4) { + HStack { + Text("RMS Level") + .font(.caption) + .foregroundColor(.secondary) + + Spacer() + + Text("Threshold: 0.01") + .font(.caption) + .foregroundColor(.secondary) + } + + GeometryReader { geometry in + ZStack(alignment: .leading) { + // Background + Rectangle() + .fill(Color.gray.opacity(0.2)) + .frame(height: 8) + .cornerRadius(4) + + // Threshold line + Rectangle() + .fill(Color.orange) + .frame(width: 2, height: 12) + .offset(x: geometry.size.width * (0.01 / 0.1)) // Assuming max scale of 0.1 + + // Current level + Rectangle() + .fill(vad.isSpeechDetected ? Color.green : Color.blue) + .frame(width: max(2, geometry.size.width * min(1.0, Double(vad.currentRMS) / 0.1)), height: 8) + .cornerRadius(4) + .animation(.easeOut(duration: 0.1), value: vad.currentRMS) + } + } + .frame(height: 12) + } + + // Frame List + if !vad.recentFrames.isEmpty { + Divider() + + VStack(alignment: .leading, spacing: 8) { + Text("Recent Frames") + .font(.caption) + .foregroundColor(.secondary) + + ScrollView { + LazyVStack(alignment: .leading, spacing: 2) { + ForEach(vad.recentFrames.suffix(15).reversed()) { frame in + FrameAnalysisRow(frame: frame) + } + } + } + .frame(maxHeight: 200) + } + } + } + .padding(.vertical, 4) + } header: { + HStack { + Image(systemName: "waveform.circle.fill") + .foregroundColor(vad.isSpeechDetected ? .green : .secondary) + .animation(.easeInOut(duration: 0.2), value: vad.isSpeechDetected) + + Text("Real-time Voice Activity Detection") + .font(.headline) + } + } + } +} + +struct FrameAnalysisRow: View { + let frame: AudioFrameAnalysis + + var body: some View { + HStack(spacing: 8) { + // Speech indicator + Circle() + .fill(frame.isSpeech ? Color.green : Color.gray.opacity(0.3)) + .frame(width: 6, height: 6) + + // RMS value + Text(String(format: "%.4f", frame.rmsValue)) + .font(.caption) + .monospaced() + .frame(width: 60, alignment: .trailing) + .foregroundColor(frame.isSpeech ? .primary : .secondary) + + // Speech status + Text(frame.isSpeech ? "SPEECH" : "SILENCE") + .font(.caption) + .fontWeight(frame.isSpeech ? .medium : .regular) + .frame(width: 60, alignment: .leading) + .foregroundColor(frame.isSpeech ? .green : .secondary) + + Spacer() + + // Frame info + Text("\(frame.frameSize) samples") + .font(.caption) + .foregroundColor(.secondary) + } + .padding(.horizontal, 4) + .padding(.vertical, 1) + .background( + RoundedRectangle(cornerRadius: 4) + .fill(frame.isSpeech ? Color.green.opacity(0.1) : Color.clear) + ) + } +} + +#if DEBUG +#Preview { + Form { + RealtimeVADView(vad: VoiceActivityDetector()) + } + .formStyle(.grouped) +} +#endif \ No newline at end of file diff --git a/AudioCap/RecordingIndicator.swift b/AudioCap/Views/RecordingIndicator.swift similarity index 100% rename from AudioCap/RecordingIndicator.swift rename to AudioCap/Views/RecordingIndicator.swift diff --git a/AudioCap/RecordingView.swift b/AudioCap/Views/RecordingView.swift similarity index 100% rename from AudioCap/RecordingView.swift rename to AudioCap/Views/RecordingView.swift diff --git a/AudioCap/RootView.swift b/AudioCap/Views/RootView.swift similarity index 100% rename from AudioCap/RootView.swift rename to AudioCap/Views/RootView.swift diff --git a/README.md b/README.md index 13fd28e..5ce566d 100644 --- a/README.md +++ b/README.md @@ -8,6 +8,16 @@ This project is provided as documentation for this new API to help developers of https://github.com/insidegui/AudioCap/assets/67184/95d72d1f-a4d6-4544-9d2f-a2ab99507cfc + + + +https://github.com/user-attachments/assets/efc03340-1d1c-46bc-92b4-ee6e1e763dbc + + + + + + ## API Description Here’s a brief summary of the new API added in macOS 14.4 and how to put everything together.