Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
33 changes: 17 additions & 16 deletions OpenOats/Sources/OpenOats/Audio/AudioRecorder.swift
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
@preconcurrency import AVFoundation
import os

/// Records mic and system audio to temporary CAF files during a session,
/// then merges and encodes them into a single M4A (AAC) file on finalization.
Expand Down Expand Up @@ -67,14 +68,14 @@ final class AudioRecorder: @unchecked Sendable {
guard let monoFormat = AVAudioFormat(
standardFormatWithSampleRate: buffer.format.sampleRate, channels: 1
) else {
diagLog("[RECORDER] mic file SKIP: cannot create mono format at \(buffer.format.sampleRate)Hz")
Log.recorder.error("Mic file SKIP: cannot create mono format at \(buffer.format.sampleRate, privacy: .public)Hz")
return
}
do {
micFile = try AVAudioFile(forWriting: url, settings: monoFormat.settings)
diagLog("[RECORDER] mic file created: \(url.lastPathComponent) mono at \(buffer.format.sampleRate)Hz")
Log.recorder.info("Mic file created: \(url.lastPathComponent, privacy: .private(mask: .hash)) mono at \(buffer.format.sampleRate, privacy: .public)Hz")
} catch {
diagLog("[RECORDER] mic file creation FAILED: \(error)")
Log.recorder.error("Mic file creation failed: \(error.localizedDescription, privacy: .public)")
return
}
}
Expand Down Expand Up @@ -156,19 +157,19 @@ final class AudioRecorder: @unchecked Sendable {
}
}
} else {
diagLog("[RECORDER] mic write SKIP: unsupported buffer format \(buffer.format.commonFormat.rawValue)")
Log.recorder.error("Mic write SKIP: unsupported buffer format \(buffer.format.commonFormat.rawValue, privacy: .public)")
return
}

micWriteCount += 1
if micWriteCount <= 5 || micWriteCount % 100 == 0 {
let peak = Self.peakLevel(monoBuf)
diagLog("[RECORDER] mic write #\(micWriteCount): frames=\(frames) peak=\(peak)")
Log.recorder.debug("Mic write #\(self.micWriteCount, privacy: .public): frames=\(frames, privacy: .public) peak=\(peak, privacy: .public)")
}
do {
try micFile?.write(from: monoBuf)
} catch {
diagLog("[RECORDER] mic write ERROR: \(error)")
Log.recorder.error("Mic write error: \(error.localizedDescription, privacy: .public)")
}
}
}
Expand All @@ -185,7 +186,7 @@ final class AudioRecorder: @unchecked Sendable {
interleaved: buffer.format.isInterleaved
)
} catch {
diagLog("[RECORDER] sys file creation FAILED: \(error)")
Log.recorder.error("Sys file creation failed: \(error.localizedDescription, privacy: .public)")
return
}
}
Expand All @@ -204,7 +205,7 @@ final class AudioRecorder: @unchecked Sendable {
do {
try sysFile?.write(from: buffer)
} catch {
diagLog("[RECORDER] sys write ERROR: \(error)")
Log.recorder.error("Sys write error: \(error.localizedDescription, privacy: .public)")
}
}
}
Expand Down Expand Up @@ -310,20 +311,20 @@ final class AudioRecorder: @unchecked Sendable {
}()

guard micReader != nil || sysReader != nil else {
diagLog("[RECORDER] No audio data recorded")
Log.recorder.info("No audio data recorded")
return
}

let targetRate: Double = 48_000
guard let targetFormat = AVAudioFormat(standardFormatWithSampleRate: targetRate, channels: 1) else { return }

if let mic = micReader {
diagLog("[RECORDER] mic temp: \(mic.length) frames, format=\(mic.processingFormat)")
Log.recorder.info("Mic temp: \(mic.length, privacy: .public) frames, format=\(mic.processingFormat, privacy: .public)")
}
if let sys = sysReader {
diagLog("[RECORDER] sys temp: \(sys.length) frames, format=\(sys.processingFormat)")
Log.recorder.info("Sys temp: \(sys.length, privacy: .public) frames, format=\(sys.processingFormat, privacy: .public)")
if let eff = sysEffectiveRate {
diagLog("[RECORDER] sys effective sample rate: \(eff) Hz (declared: \(sys.processingFormat.sampleRate) Hz)")
Log.recorder.info("Sys effective sample rate: \(eff, privacy: .public) Hz (declared: \(sys.processingFormat.sampleRate, privacy: .public) Hz)")
}
}

Expand All @@ -334,7 +335,7 @@ final class AudioRecorder: @unchecked Sendable {
let effectiveRate = sysEffectiveRate,
abs(effectiveRate - sysReader.processingFormat.sampleRate) > 1000
{
diagLog("[RECORDER] sys rate mismatch: effective=\(effectiveRate) vs declared=\(sysReader.processingFormat.sampleRate), resampling from effective rate")
Log.recorder.info("Sys rate mismatch: effective=\(effectiveRate, privacy: .public) vs declared=\(sysReader.processingFormat.sampleRate, privacy: .public), resampling from effective rate")
sysSamples = Self.readAllMono(
file: sysReader,
targetRate: targetRate,
Expand All @@ -347,7 +348,7 @@ final class AudioRecorder: @unchecked Sendable {

let micPeak = micSamples.reduce(Float(0)) { max($0, abs($1)) }
let sysPeak = sysSamples.reduce(Float(0)) { max($0, abs($1)) }
diagLog("[RECORDER] after readAllMono: micSamples=\(micSamples.count) micPeak=\(micPeak) sysSamples=\(sysSamples.count) sysPeak=\(sysPeak)")
Log.recorder.info("After readAllMono: micSamples=\(micSamples.count, privacy: .public) micPeak=\(micPeak, privacy: .public) sysSamples=\(sysSamples.count, privacy: .public) sysPeak=\(sysPeak, privacy: .public)")

let length = max(micSamples.count, sysSamples.count)
guard length > 0 else { return }
Expand All @@ -364,7 +365,7 @@ final class AudioRecorder: @unchecked Sendable {
commonFormat: .pcmFormatFloat32,
interleaved: false
) else {
diagLog("[RECORDER] Failed to create output file")
Log.recorder.error("Failed to create output file")
return
}

Expand All @@ -387,7 +388,7 @@ final class AudioRecorder: @unchecked Sendable {
offset += count
}

diagLog("[RECORDER] Saved \(outputURL.lastPathComponent) (\(length) frames)")
Log.recorder.info("Saved \(outputURL.lastPathComponent, privacy: .private(mask: .hash)) (\(length, privacy: .public) frames)")
}

private static func readAllMono(
Expand Down
41 changes: 20 additions & 21 deletions OpenOats/Sources/OpenOats/Audio/MicCapture.swift
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@ import CoreAudio
import Foundation
import os

private let micLog = Logger(subsystem: "com.openoats", category: "MicCapture")

/// Captures microphone audio via AVAudioEngine and streams PCM buffers.
final class MicCapture: @unchecked Sendable {
Expand Down Expand Up @@ -55,21 +54,21 @@ final class MicCapture: @unchecked Sendable {
errorHolder.value = nil
self._hasCapturedFrames.value = false

diagLog("[MIC-1] bufferStream called, deviceID=\(String(describing: deviceID))")
Log.mic.info("bufferStream called, deviceID=\(String(describing: deviceID), privacy: .public)")

let engine = self.makeFreshEngine()
diagLog("[MIC-1a] fresh engine created")
Log.mic.info("Fresh engine created")

let inputNode = engine.inputNode
diagLog("[MIC-1b] input node ready")
Log.mic.info("Input node ready")

// Enable voice processing (AEC + noise suppression) if requested
if echoCancellation {
do {
try inputNode.setVoiceProcessingEnabled(true)
diagLog("[MIC-1c] voice processing (AEC) enabled")
Log.mic.info("Voice processing (AEC) enabled")
} catch {
diagLog("[MIC-1c] failed to enable voice processing: \(error.localizedDescription)")
Log.mic.error("Failed to enable voice processing: \(error.localizedDescription, privacy: .public)")
}
}

Expand All @@ -78,7 +77,7 @@ final class MicCapture: @unchecked Sendable {
if let id = deviceID {
guard let inAU = inputNode.audioUnit else {
let msg = "inputNode has no audio unit after prepare"
diagLog("[MIC-2-FAIL] \(msg)")
Log.mic.error("\(msg, privacy: .public)")
errorHolder.value = msg
continuation.finish()
return
Expand All @@ -92,10 +91,10 @@ final class MicCapture: @unchecked Sendable {
&devID,
UInt32(MemoryLayout<AudioDeviceID>.size)
)
diagLog("[MIC-2] setInputDevice status=\(inStatus) (0=ok)")
Log.mic.info("setInputDevice status=\(inStatus, privacy: .public) (0=ok)")
resolvedDeviceID = id
} else {
diagLog("[MIC-2] no deviceID, using system default")
Log.mic.info("No deviceID, using system default")
resolvedDeviceID = Self.defaultInputDeviceID()
}

Expand All @@ -108,15 +107,15 @@ final class MicCapture: @unchecked Sendable {
if let devID = resolvedDeviceID,
let hwRate = Self.deviceNominalSampleRate(for: devID),
hwRate > 0, hwRate != sampleRate {
diagLog("[MIC-3] hardware sr=\(hwRate) differs from inputNode sr=\(sampleRate), using hardware rate")
Log.mic.info("Hardware sr=\(hwRate, privacy: .public) differs from inputNode sr=\(sampleRate, privacy: .public), using hardware rate")
sampleRate = hwRate
}

diagLog("[MIC-3] inputNode format: sr=\(format.sampleRate) ch=\(format.channelCount) interleaved=\(format.isInterleaved) commonFormat=\(format.commonFormat.rawValue), effective sr=\(sampleRate)")
Log.mic.info("inputNode format: sr=\(format.sampleRate, privacy: .public) ch=\(format.channelCount, privacy: .public) interleaved=\(format.isInterleaved, privacy: .public) commonFormat=\(format.commonFormat.rawValue, privacy: .public), effective sr=\(sampleRate, privacy: .public)")

guard sampleRate > 0 && format.channelCount > 0 else {
let msg = "Invalid audio format: sr=\(sampleRate) ch=\(format.channelCount)"
diagLog("[MIC-3-FAIL] \(msg)")
Log.mic.error("\(msg, privacy: .public)")
errorHolder.value = msg
continuation.finish()
return
Expand All @@ -130,14 +129,14 @@ final class MicCapture: @unchecked Sendable {
tapFormat = f
} else if sampleRate != format.sampleRate,
let f = AVAudioFormat(standardFormatWithSampleRate: format.sampleRate, channels: format.channelCount) {
diagLog("[MIC-4] hardware-rate format failed, using node rate \(format.sampleRate)")
Log.mic.info("Hardware-rate format failed, using node rate \(format.sampleRate, privacy: .public)")
tapFormat = f
} else {
diagLog("[MIC-4] standard formats failed, using native input format")
Log.mic.info("Standard formats failed, using native input format")
tapFormat = format
}

diagLog("[MIC-4] tapFormat: sr=\(tapFormat.sampleRate) ch=\(tapFormat.channelCount)")
Log.mic.info("tapFormat: sr=\(tapFormat.sampleRate, privacy: .public) ch=\(tapFormat.channelCount, privacy: .public)")

let muted = self._muted
var tapCallCount = 0
Expand All @@ -148,29 +147,29 @@ final class MicCapture: @unchecked Sendable {
level.value = min(rms * 25, 1.0)

if tapCallCount <= 5 || tapCallCount % 100 == 0 {
diagLog("[MIC-6] tap #\(tapCallCount): frames=\(buffer.frameLength) rms=\(rms) level=\(level.value)")
Log.mic.debug("tap #\(tapCallCount, privacy: .public): frames=\(buffer.frameLength, privacy: .public) rms=\(rms, privacy: .public) level=\(level.value, privacy: .public)")
}

guard !muted.value else { return }
continuation.yield(buffer)
}
self.hasTapInstalled = true

diagLog("[MIC-5] tap installed, preparing engine...")
Log.mic.info("Tap installed, preparing engine")

continuation.onTermination = { _ in
diagLog("[MIC-TERM] stream terminated")
Log.mic.info("Stream terminated")
// Audio hardware teardown handled by stop() — not here,
// so finishStream() can drain without premature engine shutdown.
}

do {
diagLog("[MIC-7] engine prepared, starting...")
Log.mic.info("Engine prepared, starting")
try engine.start()
diagLog("[MIC-8] engine started successfully, isRunning=\(engine.isRunning)")
Log.mic.info("Engine started successfully, isRunning=\(engine.isRunning, privacy: .public)")
} catch {
let msg = "Mic failed: \(error.localizedDescription)"
print("[MIC-8-FAIL] \(msg)")
Log.mic.error("\(msg, privacy: .public)")
errorHolder.value = msg
self.hasTapInstalled = false
continuation.finish()
Expand Down
12 changes: 7 additions & 5 deletions OpenOats/Sources/OpenOats/Models/TranscriptStore.swift
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import Foundation
import Observation
import os

@Observable
@MainActor
Expand Down Expand Up @@ -203,11 +204,12 @@ final class TranscriptStore {

guard similarity >= acousticEchoSimilarityThreshold || containsOther else { continue }

diagLog(
"[TRANSCRIPT-ECHO] dropped mic utterance as system-audio echo " +
"dt=\(String(format: "%.2f", timeDelta)) " +
"similarity=\(String(format: "%.2f", similarity)) " +
"you='\(utterance.text.prefix(80))' them='\(candidate.text.prefix(80))'"
let dtFormatted = String(format: "%.2f", timeDelta)
let simFormatted = String(format: "%.2f", similarity)
let youSnippet = String(utterance.text.prefix(80))
let themSnippet = String(candidate.text.prefix(80))
Log.transcript.info(
"Dropped mic utterance as system-audio echo dt=\(dtFormatted, privacy: .public) similarity=\(simFormatted, privacy: .public) you='\(youSnippet, privacy: .private)' them='\(themSnippet, privacy: .private)'"
)
return true
}
Expand Down
12 changes: 7 additions & 5 deletions OpenOats/Sources/OpenOats/Transcription/AcousticEchoFilter.swift
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import Foundation
import os

/// Shared acoustic echo suppression logic.
/// Detects when mic (YOU) utterances are echoes of system (THEM) audio based on
Expand Down Expand Up @@ -37,11 +38,12 @@ enum AcousticEchoFilter {
normalizedThem.contains(normalizedYou)

if similarity >= similarityThreshold || containsOther {
diagLog(
"[ECHO-FILTER] suppressed mic record as echo " +
"dt=\(String(format: "%.2f", timeDelta)) " +
"sim=\(String(format: "%.2f", similarity)) " +
"mic='\(micRecord.text.prefix(80))' sys='\(sysRecord.text.prefix(80))'"
let dtFormatted = String(format: "%.2f", timeDelta)
let simFormatted = String(format: "%.2f", similarity)
let micSnippet = String(micRecord.text.prefix(80))
let sysSnippet = String(sysRecord.text.prefix(80))
Log.echo.info(
"Suppressed mic record as echo dt=\(dtFormatted, privacy: .public) sim=\(simFormatted, privacy: .public) mic='\(micSnippet, privacy: .private)' sys='\(sysSnippet, privacy: .private)'"
)
return true
}
Expand Down
Loading
Loading