diff --git a/OpenOats/Sources/OpenOats/App/AppCoordinator.swift b/OpenOats/Sources/OpenOats/App/AppCoordinator.swift index 24b784ff..1b96f5bb 100644 --- a/OpenOats/Sources/OpenOats/App/AppCoordinator.swift +++ b/OpenOats/Sources/OpenOats/App/AppCoordinator.swift @@ -254,6 +254,17 @@ final class AppCoordinator { let title = transcriptStore.conversationState.currentTopic.isEmpty ? nil : transcriptStore.conversationState.currentTopic + // Extract meeting app name from state machine metadata (available in .ending state) + let meetingAppName: String? + if case .ending(let metadata) = state { + meetingAppName = metadata.detectionContext?.meetingApp?.name + } else { + meetingAppName = nil + } + + // Capture the ASR engine name from current settings + let engineName = settings?.transcriptionModel.rawValue + let index = SessionIndex( id: sessionID, startedAt: transcriptStore.utterances.first?.timestamp ?? Date(), @@ -261,13 +272,26 @@ final class AppCoordinator { templateSnapshot: sessionTemplateSnapshot, title: title, utteranceCount: utteranceCount, - hasNotes: false + hasNotes: false, + meetingApp: meetingAppName, + engine: engineName ) let sidecar = SessionSidecar(index: index, notes: nil) // 4. Write sidecar await sessionStore.writeSidecar(sidecar) + // 4b. Generate structured Markdown file from JSONL (has refined text after backfill) + let jsonlRecords = await sessionStore.loadTranscript(sessionID: sessionID) + if !jsonlRecords.isEmpty, let settings { + let outputDir = URL(fileURLWithPath: settings.notesFolderPath) + MarkdownMeetingWriter.write( + metadata: .init(from: index), + records: jsonlRecords, + outputDirectory: outputDir + ) + } + // 5. Close JSONL file await sessionStore.endSession() diff --git a/OpenOats/Sources/OpenOats/App/OpenOatsApp.swift b/OpenOats/Sources/OpenOats/App/OpenOatsApp.swift index f7850031..6de8925b 100644 --- a/OpenOats/Sources/OpenOats/App/OpenOatsApp.swift +++ b/OpenOats/Sources/OpenOats/App/OpenOatsApp.swift @@ -1,6 +1,7 @@ import SwiftUI import AppKit import Sparkle +import UserNotifications public struct OpenOatsRootApp: App { @NSApplicationDelegateAdaptor(AppDelegate.self) var appDelegate @@ -28,7 +29,14 @@ public struct OpenOatsRootApp: App { .defaultAppStorage(defaults) .onAppear { appDelegate.coordinator = coordinator + appDelegate.settings = settings appDelegate.defaults = defaults + appDelegate.runtime = runtime + appDelegate.setupMenuBarIfNeeded( + coordinator: coordinator, + settings: settings, + showMainWindow: { [self] in showMainWindow() } + ) settings.applyScreenShareVisibility() } .onOpenURL { url in @@ -84,42 +92,56 @@ public struct OpenOatsRootApp: App { } extension OpenOatsRootApp { + static let mainWindowID = "main" + private func openNotesWindow() { openWindow(id: "notes") } + + private func showMainWindow() { + NSApp.setActivationPolicy(.regular) + NSApp.activate(ignoringOtherApps: true) + if let window = NSApp.windows.first(where: { $0.identifier?.rawValue == Self.mainWindowID }) { + window.makeKeyAndOrderFront(nil) + } else { + openWindow(id: Self.mainWindowID) + } + } } -/// Observes new window creation and applies screen-share visibility setting. @MainActor -final class AppDelegate: NSObject, NSApplicationDelegate { +final class AppDelegate: NSObject, NSApplicationDelegate, NSWindowDelegate { private var windowObserver: Any? + private var menuBarController: MenuBarController? + private var isTerminating = false var coordinator: AppCoordinator? + var settings: AppSettings? + var runtime: AppRuntime? var defaults: UserDefaults = .standard - func applicationDockMenu(_ sender: NSApplication) -> NSMenu? { - guard let coordinator else { return nil } - let menu = NSMenu() - if coordinator.isRecording { - let item = NSMenuItem(title: "Stop Recording", action: #selector(stopRecording), keyEquivalent: "") - item.target = self - menu.addItem(item) - } else { - let item = NSMenuItem(title: "Start Recording", action: #selector(startRecording), keyEquivalent: "") - item.target = self - menu.addItem(item) - } - return menu - } + func setupMenuBarIfNeeded( + coordinator: AppCoordinator, + settings: AppSettings, + showMainWindow: @escaping () -> Void + ) { + guard menuBarController == nil else { return } - @objc private func startRecording() { - coordinator?.queueExternalCommand(.startSession) - } + runtime?.ensureServicesInitialized(settings: settings, coordinator: coordinator) - @objc private func stopRecording() { - coordinator?.queueExternalCommand(.stopSession) + let controller = MenuBarController( + coordinator: coordinator, + settings: settings + ) + controller.onShowMainWindow = showMainWindow + controller.onQuitApp = { [weak self] in + self?.handleQuit() + } + menuBarController = controller } func applicationDidFinishLaunching(_ notification: Notification) { + NSApp.setActivationPolicy(.regular) + let hidden = defaults.object(forKey: "hideFromScreenShare") == nil ? true : defaults.bool(forKey: "hideFromScreenShare") @@ -127,9 +149,9 @@ final class AppDelegate: NSObject, NSApplicationDelegate { for window in NSApp.windows { window.sharingType = sharingType + window.delegate = self } - // Watch for new windows being created (e.g. Settings window) windowObserver = NotificationCenter.default.addObserver( forName: NSWindow.didBecomeKeyNotification, object: nil, @@ -142,8 +164,99 @@ final class AppDelegate: NSObject, NSApplicationDelegate { let type: NSWindow.SharingType = hide ? .none : .readOnly for window in NSApp.windows { window.sharingType = type + if window.delegate == nil || window.delegate === self { + window.delegate = self + } } } } } + + func applicationShouldTerminate(_ sender: NSApplication) -> NSApplication.TerminateReply { + guard let coordinator else { return .terminateNow } + + if isTerminating { + return .terminateNow + } + + guard coordinator.isRecording else { + return .terminateNow + } + + let alert = NSAlert() + alert.messageText = "Recording in Progress" + alert.informativeText = "Stop recording and quit?" + alert.alertStyle = .warning + alert.addButton(withTitle: "Stop & Quit") + alert.addButton(withTitle: "Cancel") + + let response = alert.runModal() + guard response == .alertFirstButtonReturn else { + return .terminateCancel + } + + isTerminating = true + coordinator.handle(.userStopped, settings: settings) + + Task { @MainActor [weak self] in + let deadline = Date().addingTimeInterval(30) + while Date() < deadline { + if case .idle = coordinator.state { break } + try? await Task.sleep(for: .milliseconds(100)) + } + self?.isTerminating = true + NSApp.reply(toApplicationShouldTerminate: true) + } + return .terminateLater + } + + func applicationShouldTerminateAfterLastWindowClosed(_ sender: NSApplication) -> Bool { + false + } + + // MARK: - NSWindowDelegate + + func windowShouldClose(_ sender: NSWindow) -> Bool { + let isMainWindow = sender.identifier?.rawValue == OpenOatsRootApp.mainWindowID + + if isMainWindow { + sender.orderOut(nil) + NSApp.setActivationPolicy(.accessory) + showBackgroundModeHintIfNeeded() + return false + } + return true + } + + // MARK: - One-Shot Background Notification + + private func showBackgroundModeHintIfNeeded() { + guard !defaults.bool(forKey: "hasShownBackgroundModeHint") else { return } + guard settings?.meetingAutoDetectEnabled == true else { return } + + defaults.set(true, forKey: "hasShownBackgroundModeHint") + + Task { + let center = UNUserNotificationCenter.current() + let granted = try? await center.requestAuthorization(options: [.alert]) + guard granted == true else { return } + + let content = UNMutableNotificationContent() + content.title = "OpenOats is still running" + content.body = "Meeting detection is active. Click the menu bar icon to access controls." + + let request = UNNotificationRequest( + identifier: "background-mode-hint", + content: content, + trigger: nil + ) + try? await center.add(request) + } + } + + // MARK: - Quit + + private func handleQuit() { + NSApp.terminate(nil) + } } diff --git a/OpenOats/Sources/OpenOats/Audio/AudioRecorder.swift b/OpenOats/Sources/OpenOats/Audio/AudioRecorder.swift index f8ccfa92..6c652e41 100644 --- a/OpenOats/Sources/OpenOats/Audio/AudioRecorder.swift +++ b/OpenOats/Sources/OpenOats/Audio/AudioRecorder.swift @@ -40,11 +40,11 @@ final class AudioRecorder: @unchecked Sendable { func writeMicBuffer(_ buffer: AVAudioPCMBuffer) { lock.withLock { - guard buffer.frameLength > 0, let src = buffer.floatChannelData else { return } + guard buffer.frameLength > 0 else { return } let frames = Int(buffer.frameLength) let channels = Int(buffer.format.channelCount) - // Lazily create file as mono 48kHz (avoids deinterleaved format issues) + // Lazily create file as mono at the source sample rate if micFile == nil, let url = micTempURL { let monoFormat = AVAudioFormat( standardFormatWithSampleRate: buffer.format.sampleRate, channels: 1 @@ -53,7 +53,7 @@ final class AudioRecorder: @unchecked Sendable { diagLog("[RECORDER] mic file created: \(url.lastPathComponent) mono at \(buffer.format.sampleRate)Hz") } - // Downmix to mono inline + // Downmix to mono inline — handle float32, int16, and int32 formats guard let monoFormat = AVAudioFormat( standardFormatWithSampleRate: buffer.format.sampleRate, channels: 1 ), @@ -61,15 +61,70 @@ final class AudioRecorder: @unchecked Sendable { let dst = monoBuf.floatChannelData?[0] else { return } monoBuf.frameLength = buffer.frameLength - if channels == 1 { - memcpy(dst, src[0], frames * MemoryLayout.size) - } else { - let scale = 1.0 / Float(channels) - for i in 0...size) + } else { + memcpy(dst, src[0], frames * MemoryLayout.size) + } + } else { + let scale = 1.0 / Float(channels) + if buffer.format.isInterleaved { + for i in 0.. AsyncStream { + func bufferStream(deviceID: AudioDeviceID? = nil, echoCancellation: Bool = false) -> AsyncStream { // Defensive cleanup of any prior state _streamContinuation.withLock { $0?.finish(); $0 = nil } engine.inputNode.removeTap(onBus: 0) @@ -55,6 +55,16 @@ final class MicCapture: @unchecked Sendable { let inputNode = engine.inputNode diagLog("[MIC-1b] input node ready") + // Enable voice processing (AEC + noise suppression) if requested + if echoCancellation { + do { + try inputNode.setVoiceProcessingEnabled(true) + diagLog("[MIC-1c] voice processing (AEC) enabled") + } catch { + diagLog("[MIC-1c] failed to enable voice processing: \(error.localizedDescription)") + } + } + // Set input device before accessing inputNode format var resolvedDeviceID: AudioDeviceID? if let id = deviceID { @@ -104,15 +114,19 @@ final class MicCapture: @unchecked Sendable { return } - guard let tapFormat = AVAudioFormat( - standardFormatWithSampleRate: sampleRate, - channels: format.channelCount - ) else { - let msg = "Failed to build tap format from input format" - diagLog("[MIC-4-FAIL] \(msg)") - errorHolder.value = msg - continuation.finish() - return + // Try multiple tap formats — some devices report formats that don't + // round-trip through AVAudioFormat(standardFormat:). Fall back to the + // native input format as a last resort. + let tapFormat: AVAudioFormat + if let f = AVAudioFormat(standardFormatWithSampleRate: sampleRate, channels: format.channelCount) { + tapFormat = f + } else if sampleRate != format.sampleRate, + let f = AVAudioFormat(standardFormatWithSampleRate: format.sampleRate, channels: format.channelCount) { + diagLog("[MIC-4] hardware-rate format failed, using node rate \(format.sampleRate)") + tapFormat = f + } else { + diagLog("[MIC-4] standard formats failed, using native input format") + tapFormat = format } diagLog("[MIC-4] tapFormat: sr=\(tapFormat.sampleRate) ch=\(tapFormat.channelCount)") diff --git a/OpenOats/Sources/OpenOats/Info.plist b/OpenOats/Sources/OpenOats/Info.plist index daa86b58..ffdb4a60 100644 --- a/OpenOats/Sources/OpenOats/Info.plist +++ b/OpenOats/Sources/OpenOats/Info.plist @@ -43,5 +43,7 @@ SUPublicEDKey RMSxoyjk8gEBvqmECGMvBLYQ7Kl7ptbacDBWSbsdgeI= + LSUIElement + diff --git a/OpenOats/Sources/OpenOats/Intelligence/MarkdownMeetingWriter.swift b/OpenOats/Sources/OpenOats/Intelligence/MarkdownMeetingWriter.swift new file mode 100644 index 00000000..1051a6e3 --- /dev/null +++ b/OpenOats/Sources/OpenOats/Intelligence/MarkdownMeetingWriter.swift @@ -0,0 +1,583 @@ +import Foundation +import os + +private let writerLogger = Logger(subsystem: "com.openoats.app", category: "MarkdownMeetingWriter") + +/// Produces spec-compliant openoats/v1 Markdown files from session data. +/// +/// The writer is stateless: call `write(...)` with session metadata and transcript records, +/// and it returns the URL of the generated `.md` file. All I/O is synchronous and runs +/// on the caller's context (designed for `nonisolated static` or actor-isolated use). +enum MarkdownMeetingWriter { + + // MARK: - Public API + + /// Metadata needed to produce the Markdown file, extracted from SessionIndex + sidecar. + struct Metadata: Sendable { + let sessionID: String + let title: String? + let startedAt: Date + let endedAt: Date? + let meetingApp: String? + let engine: String? + + init(from index: SessionIndex) { + self.sessionID = index.id + self.title = index.title + self.startedAt = index.startedAt + self.endedAt = index.endedAt + self.meetingApp = index.meetingApp + self.engine = index.engine + } + } + + /// Write a spec-compliant `.md` file to the output directory. + /// + /// - Parameters: + /// - metadata: Session metadata (title, dates, app, engine). + /// - records: The transcript records from the JSONL session store. + /// - outputDirectory: The directory to write into (e.g. `~/Documents/OpenOats/`). + /// - Returns: The URL of the written file, or `nil` on failure. + @discardableResult + static func write( + metadata: Metadata, + records: [SessionRecord], + outputDirectory: URL + ) -> URL? { + guard !records.isEmpty else { + writerLogger.warning("MarkdownMeetingWriter: no records, skipping write") + return nil + } + + let fm = FileManager.default + try? fm.createDirectory(at: outputDirectory, withIntermediateDirectories: true) + + // Build the Markdown content + let content = buildMarkdown(metadata: metadata, records: records) + + // Generate filename with collision handling + let fileURL = resolveFilename( + title: metadata.title, + startedAt: metadata.startedAt, + directory: outputDirectory + ) + + // Write with restricted permissions + do { + try content.write(to: fileURL, atomically: true, encoding: .utf8) + try fm.setAttributes([.posixPermissions: 0o600], ofItemAtPath: fileURL.path) + writerLogger.info("Wrote meeting markdown: \(fileURL.lastPathComponent, privacy: .public)") + return fileURL + } catch { + writerLogger.error("Failed to write markdown: \(error.localizedDescription, privacy: .public)") + return nil + } + } + + // MARK: - Markdown Assembly + + static func buildMarkdown(metadata: Metadata, records: [SessionRecord]) -> String { + let resolvedTitle = metadata.title?.isEmpty == false ? metadata.title! : "Meeting" + let frontmatter = buildFrontmatter(metadata: metadata, records: records, title: resolvedTitle) + let body = buildBody(title: resolvedTitle, records: records, startedAt: metadata.startedAt) + return frontmatter + "\n" + body + } + + // MARK: - YAML Frontmatter + + static func buildFrontmatter( + metadata: Metadata, + records: [SessionRecord], + title: String + ) -> String { + var lines: [String] = ["---"] + + lines.append("schema: openoats/v1") + lines.append("title: \(yamlQuote(title))") + lines.append("date: \(formatISO8601(metadata.startedAt))") + lines.append("duration: \(computeDuration(records: records, metadata: metadata))") + + // Participants - always You/Them for now + lines.append("participants:") + lines.append(" - You") + lines.append(" - Them") + + // Recorder (system user's full name) + let recorderName = NSFullUserName() + if !recorderName.isEmpty { + lines.append("recorder: \(yamlQuote(recorderName))") + } + + // Engine + if let engine = metadata.engine, !engine.isEmpty { + lines.append("engine: \(engine)") + } + + // Meeting app (lowercase per spec) + if let app = metadata.meetingApp, !app.isEmpty { + lines.append("app: \(normalizeAppName(app))") + } + + // Extension: link back to session ID + lines.append("x_openoats_session: \(yamlQuote(metadata.sessionID))") + + lines.append("---") + return lines.joined(separator: "\n") + } + + // MARK: - Body + + static func buildBody(title: String, records: [SessionRecord], startedAt: Date) -> String { + var parts: [String] = [] + + // H1 title + parts.append("# \(title)") + parts.append("") + + // Transcript section + parts.append("## Transcript") + parts.append("") + + let transcriptLines = formatTranscriptLines(records: records, startedAt: startedAt) + parts.append(transcriptLines) + + return parts.joined(separator: "\n") + } + + // MARK: - Transcript Formatting + + static func formatTranscriptLines(records: [SessionRecord], startedAt: Date) -> String { + var lines: [String] = [] + + for record in records { + let relativeTimestamp = formatRelativeTimestamp( + record.timestamp, + relativeTo: startedAt + ) + let speaker = speakerLabel(record.speaker) + let text = record.refinedText ?? record.text + lines.append("[\(relativeTimestamp)] **\(speaker):** \(text)") + lines.append("") + } + + return lines.joined(separator: "\n") + } + + // MARK: - Timestamp Helpers + + /// Format a date as a relative timestamp `HH:MM:SS` from the meeting start. + static func formatRelativeTimestamp(_ timestamp: Date, relativeTo start: Date) -> String { + let interval = max(0, timestamp.timeIntervalSince(start)) + let totalSeconds = Int(interval.rounded()) + let hours = totalSeconds / 3600 + let minutes = (totalSeconds % 3600) / 60 + let seconds = totalSeconds % 60 + return String(format: "%02d:%02d:%02d", hours, minutes, seconds) + } + + /// Format a date as ISO 8601 with timezone offset. + static func formatISO8601(_ date: Date) -> String { + let formatter = ISO8601DateFormatter() + formatter.formatOptions = [.withInternetDateTime] + formatter.timeZone = TimeZone.current + return formatter.string(from: date) + } + + // MARK: - Duration + + /// Compute meeting duration in minutes from transcript records, rounded to nearest minute. + /// Minimum is 1 minute. + static func computeDuration(records: [SessionRecord], metadata: Metadata) -> Int { + // Prefer endedAt from metadata if available + if let endedAt = metadata.endedAt { + let seconds = endedAt.timeIntervalSince(metadata.startedAt) + return max(1, Int((seconds / 60.0).rounded())) + } + + // Fallback: difference between first and last record timestamps + guard let first = records.first, let last = records.last else { return 1 } + let seconds = last.timestamp.timeIntervalSince(first.timestamp) + return max(1, Int((seconds / 60.0).rounded())) + } + + // MARK: - Speaker Label + + static func speakerLabel(_ speaker: Speaker) -> String { + switch speaker { + case .you: return "You" + case .them: return "Them" + } + } + + // MARK: - YAML Quoting + + /// Quote a YAML string value. Per spec, title MUST always be quoted. + /// Wraps in double quotes and escapes internal double quotes and backslashes. + static func yamlQuote(_ value: String) -> String { + let escaped = value + .replacingOccurrences(of: "\\", with: "\\\\") + .replacingOccurrences(of: "\"", with: "\\\"") + .replacingOccurrences(of: "\n", with: "\\n") + .replacingOccurrences(of: "\t", with: "\\t") + return "\"\(escaped)\"" + } + + // MARK: - App Name Normalization + + /// Normalize meeting app display name to a lowercase slug for the `app` frontmatter field. + /// Maps known display names to standard short names per spec. + static func normalizeAppName(_ name: String) -> String { + let lower = name.lowercased() + // Map well-known display names to their spec identifiers + if lower.contains("zoom") { return "zoom" } + if lower.contains("teams") { return "teams" } + if lower.contains("meet") && lower.contains("google") { return "meet" } + if lower.contains("facetime") { return "facetime" } + if lower.contains("slack") { return "slack" } + if lower.contains("discord") { return "discord" } + if lower.contains("webex") { return "webex" } + if lower.contains("whatsapp") { return "whatsapp" } + if lower.contains("tuple") { return "tuple" } + if lower.contains("around") { return "around" } + // Fallback: kebab-case the name + return toKebabCase(lower) + } + + // MARK: - Kebab Case + + /// Convert a string to kebab-case: lowercase, ASCII-only, hyphens for separators. + /// Non-ASCII characters are stripped. Multiple hyphens are collapsed. + /// Leading/trailing hyphens are trimmed. + static func toKebabCase(_ input: String) -> String { + let lowered = input.lowercased() + + // Replace non-alphanumeric ASCII with hyphens, strip non-ASCII + var result = "" + for scalar in lowered.unicodeScalars { + if scalar.isASCII { + let char = Character(scalar) + if char.isLetter || char.isNumber { + result.append(char) + } else { + result.append("-") + } + } + // Non-ASCII characters are silently dropped + } + + // Collapse multiple hyphens + while result.contains("--") { + result = result.replacingOccurrences(of: "--", with: "-") + } + + // Trim leading/trailing hyphens + result = result.trimmingCharacters(in: CharacterSet(charactersIn: "-")) + + // Truncate to 60 characters per spec + if result.count > 60 { + result = String(result.prefix(60)) + // Don't end on a hyphen after truncation + result = result.trimmingCharacters(in: CharacterSet(charactersIn: "-")) + } + + // If nothing remains, use fallback + return result.isEmpty ? "meeting" : result + } + + // MARK: - Filename Generation + + /// Generate the filename: `YYYY-MM-DD-HHMM-kebab-title.md` + /// Handles collisions by appending -2, -3, etc. + static func resolveFilename(title: String?, startedAt: Date, directory: URL) -> URL { + let dateFmt = DateFormatter() + dateFmt.dateFormat = "yyyy-MM-dd-HHmm" + dateFmt.timeZone = TimeZone.current + let datePrefix = dateFmt.string(from: startedAt) + + let titleSlug = toKebabCase(title ?? "meeting") + let baseName = "\(datePrefix)-\(titleSlug)" + + let fm = FileManager.default + var candidate = directory.appendingPathComponent("\(baseName).md") + var counter = 2 + + while fm.fileExists(atPath: candidate.path) { + candidate = directory.appendingPathComponent("\(baseName)-\(counter).md") + counter += 1 + } + + return candidate + } + + // MARK: - Stage 3: Insert LLM Sections + + /// Insert LLM-generated sections (Summary, Action Items, Decisions) into an existing + /// Stage 1+2 Markdown file. Updates frontmatter title and tags if provided. + /// + /// - Parameters: + /// - fileURL: The existing `.md` file to update. + /// - llmMarkdown: The raw LLM-generated markdown (may contain ## Summary, ## Action Items, ## Decisions). + /// - newTitle: An optional new title from the LLM. + /// - tags: Optional tags array from the LLM. + /// - Returns: The (possibly renamed) URL of the updated file, or `nil` on failure. + @discardableResult + static func insertLLMSections( + fileURL: URL, + llmMarkdown: String, + newTitle: String? = nil, + tags: [String]? = nil + ) -> URL? { + guard let content = try? String(contentsOf: fileURL, encoding: .utf8) else { + writerLogger.error("Failed to read file for LLM insertion: \(fileURL.lastPathComponent, privacy: .public)") + return nil + } + + // Parse frontmatter and body + let parts = content.components(separatedBy: "---") + guard parts.count >= 3 else { + writerLogger.error("No valid frontmatter in file: \(fileURL.lastPathComponent, privacy: .public)") + return nil + } + + let bodyContent = parts.dropFirst(2).joined(separator: "---") + let originalFrontmatterLines = parts[1].components(separatedBy: "\n") + .filter { !$0.isEmpty } + + let resolvedTitle = newTitle ?? extractTitle(from: originalFrontmatterLines) + var updatedFrontmatter = rebuildFrontmatterWithUpdates( + originalLines: originalFrontmatterLines, + newTitle: newTitle, + tags: tags + ) + + // Parse body to find ## Transcript + let bodyLines = bodyContent.components(separatedBy: "\n") + var transcriptStartIndex: Int? + for (i, line) in bodyLines.enumerated() { + if line.trimmingCharacters(in: .whitespaces) == "## Transcript" { + transcriptStartIndex = i + break + } + } + + // Build new body: # Title + LLM sections + ## Transcript + var newBody: [String] = [] + newBody.append("# \(resolvedTitle ?? "Meeting")") + newBody.append("") + + // Insert LLM-generated sections + let llmSections = extractLLMSections(from: llmMarkdown) + if !llmSections.isEmpty { + newBody.append(llmSections) + newBody.append("") + } + + // Append transcript section (everything from ## Transcript onwards) + if let transcriptStart = transcriptStartIndex { + let transcriptContent = bodyLines[transcriptStart...].joined(separator: "\n") + newBody.append(transcriptContent) + } + + let finalContent = "---\n\(updatedFrontmatter)\n---\n\n\(newBody.joined(separator: "\n"))" + + // Write updated content + do { + try finalContent.write(to: fileURL, atomically: true, encoding: .utf8) + } catch { + writerLogger.error("Failed to write LLM sections: \(error.localizedDescription, privacy: .public)") + return nil + } + + // Rename file if title changed + if let newTitle, !newTitle.isEmpty { + let directory = fileURL.deletingLastPathComponent() + // Extract date from existing filename + let existingName = fileURL.deletingPathExtension().lastPathComponent + let datePrefix: String + if existingName.count >= 15 { + datePrefix = String(existingName.prefix(15)) // YYYY-MM-DD-HHMM + } else { + return fileURL + } + + let newSlug = toKebabCase(newTitle) + let newBaseName = "\(datePrefix)-\(newSlug)" + var newURL = directory.appendingPathComponent("\(newBaseName).md") + + // Don't rename to self + if newURL.lastPathComponent == fileURL.lastPathComponent { + return fileURL + } + + // Handle collision + var counter = 2 + while FileManager.default.fileExists(atPath: newURL.path) { + newURL = directory.appendingPathComponent("\(newBaseName)-\(counter).md") + counter += 1 + } + + do { + try FileManager.default.moveItem(at: fileURL, to: newURL) + writerLogger.info("Renamed meeting file to: \(newURL.lastPathComponent, privacy: .public)") + return newURL + } catch { + writerLogger.warning("Failed to rename file: \(error.localizedDescription, privacy: .public)") + return fileURL + } + } + + return fileURL + } + + // MARK: - Stage 3 Helpers + + /// Extract the title from frontmatter lines. + private static func extractTitle(from lines: [String]) -> String? { + for line in lines { + let trimmed = line.trimmingCharacters(in: .whitespaces) + if trimmed.hasPrefix("title:") { + var value = String(trimmed.dropFirst("title:".count)).trimmingCharacters(in: .whitespaces) + // Remove quotes + if value.hasPrefix("\"") && value.hasSuffix("\"") { + value = String(value.dropFirst().dropLast()) + value = value.replacingOccurrences(of: "\\\\", with: "\u{0000}") + value = value.replacingOccurrences(of: "\\\"", with: "\"") + value = value.replacingOccurrences(of: "\u{0000}", with: "\\") + } + return value + } + } + return nil + } + + /// Rebuild frontmatter with optional title and tags updates. + private static func rebuildFrontmatterWithUpdates( + originalLines: [String], + newTitle: String?, + tags: [String]? + ) -> String { + var result: [String] = [] + var insideParticipants = false + var insideTags = false + // Tags are re-inserted at the end after stripping originals + + for line in originalLines { + let trimmed = line.trimmingCharacters(in: .whitespaces) + + // Track multi-line YAML arrays + if trimmed.hasPrefix("participants:") { insideParticipants = true; insideTags = false } + else if trimmed.hasPrefix("tags:") { insideTags = true; insideParticipants = false } + else if !trimmed.hasPrefix("- ") && !trimmed.isEmpty { + insideParticipants = false + insideTags = false + } + + // Skip existing tags (we'll re-add them) + if tags != nil && (trimmed.hasPrefix("tags:") || (insideTags && trimmed.hasPrefix("- "))) { + continue + } + + // Update title + if let newTitle, trimmed.hasPrefix("title:") { + result.append("title: \(yamlQuote(newTitle))") + continue + } + + result.append(line) + } + + // Insert tags before the end + if let tags, !tags.isEmpty { + // Find a good insertion point (after recorder or engine, before closing ---) + var insertIndex = result.count + for (i, line) in result.enumerated().reversed() { + let trimmed = line.trimmingCharacters(in: .whitespaces) + if !trimmed.isEmpty { + insertIndex = i + 1 + break + } + } + var tagLines = ["tags:"] + for tag in tags { + tagLines.append(" - \(tag)") + } + result.insert(contentsOf: tagLines, at: insertIndex) + } + + return result.joined(separator: "\n") + } + + /// Extract ## Summary, ## Action Items, ## Decisions sections from LLM markdown. + /// Returns the sections as a single string block ready for insertion. + static func extractLLMSections(from markdown: String) -> String { + // The LLM output might contain these sections mixed with other content. + // We extract them in order: Summary, Action Items, Decisions. + let lines = markdown.components(separatedBy: "\n") + var sections: [String] = [] + var currentSection: [String]? + var currentHeader: String? + + let knownHeaders = ["## Summary", "## Action Items", "## Decisions"] + + for line in lines { + let trimmed = line.trimmingCharacters(in: .whitespaces) + + if knownHeaders.contains(where: { trimmed.hasPrefix($0) }) { + // Save previous section + if let section = currentSection { + let content = section.joined(separator: "\n") + .trimmingCharacters(in: .whitespacesAndNewlines) + if !content.isEmpty { + sections.append(content) + } + } + currentSection = [line] + currentHeader = trimmed + } else if trimmed.hasPrefix("## ") || trimmed.hasPrefix("# ") { + // End of a known section, hit an unknown heading + if let section = currentSection { + let content = section.joined(separator: "\n") + .trimmingCharacters(in: .whitespacesAndNewlines) + if !content.isEmpty { + sections.append(content) + } + } + currentSection = nil + currentHeader = nil + } else if currentSection != nil { + currentSection?.append(line) + } + } + + // Flush last section + if let section = currentSection { + let content = section.joined(separator: "\n") + .trimmingCharacters(in: .whitespacesAndNewlines) + if !content.isEmpty { + sections.append(content) + } + } + + return sections.joined(separator: "\n\n") + } + + // MARK: - Find Markdown File for Session + + /// Find the `.md` file for a given session ID in the output directory. + /// Searches by the `x_openoats_session` frontmatter field. + static func findMarkdownFile(sessionID: String, in directory: URL) -> URL? { + let fm = FileManager.default + guard let files = try? fm.contentsOfDirectory(at: directory, includingPropertiesForKeys: nil) else { + return nil + } + + for file in files where file.pathExtension == "md" { + guard let content = try? String(contentsOf: file, encoding: .utf8) else { continue } + if content.contains("x_openoats_session: \"\(sessionID)\"") { + return file + } + } + + return nil + } +} diff --git a/OpenOats/Sources/OpenOats/Models/Models.swift b/OpenOats/Sources/OpenOats/Models/Models.swift index a63ed93a..e050500e 100644 --- a/OpenOats/Sources/OpenOats/Models/Models.swift +++ b/OpenOats/Sources/OpenOats/Models/Models.swift @@ -257,6 +257,10 @@ struct SessionIndex: Identifiable, Codable, Sendable { var title: String? var utteranceCount: Int var hasNotes: Bool + /// The detected meeting application name (e.g. "Zoom", "Microsoft Teams"). + var meetingApp: String? + /// The ASR engine used for transcription (e.g. "parakeetV2"). + var engine: String? } struct SessionSidecar: Codable, Sendable { diff --git a/OpenOats/Sources/OpenOats/Settings/AppSettings.swift b/OpenOats/Sources/OpenOats/Settings/AppSettings.swift index 4ade36d5..a3ca365e 100644 --- a/OpenOats/Sources/OpenOats/Settings/AppSettings.swift +++ b/OpenOats/Sources/OpenOats/Settings/AppSettings.swift @@ -416,6 +416,19 @@ final class AppSettings { } } + /// When true, Apple's voice-processing IO is enabled on the mic input to cancel + /// speaker echo and reduce double-transcription when using built-in speakers + mic. + @ObservationIgnored nonisolated(unsafe) private var _enableEchoCancellation: Bool + var enableEchoCancellation: Bool { + get { access(keyPath: \.enableEchoCancellation); return _enableEchoCancellation } + set { + withMutation(keyPath: \.enableEchoCancellation) { + _enableEchoCancellation = newValue + defaults.set(newValue, forKey: "enableEchoCancellation") + } + } + } + /// When true, uses the LLM to clean up filler words and fix punctuation in real-time. @ObservationIgnored nonisolated(unsafe) private var _enableTranscriptRefinement: Bool var enableTranscriptRefinement: Bool { @@ -555,6 +568,13 @@ final class AppSettings { self._saveAudioRecording = defaults.bool(forKey: "saveAudioRecording") self._enableTranscriptRefinement = defaults.bool(forKey: "enableTranscriptRefinement") + // Echo cancellation — default to enabled + if defaults.object(forKey: "enableEchoCancellation") == nil { + self._enableEchoCancellation = true + } else { + self._enableEchoCancellation = defaults.bool(forKey: "enableEchoCancellation") + } + // Default to true (shown) if key has never been set if defaults.object(forKey: "showLiveTranscript") == nil { self._showLiveTranscript = true diff --git a/OpenOats/Sources/OpenOats/Storage/SessionStore.swift b/OpenOats/Sources/OpenOats/Storage/SessionStore.swift index 564b8aee..66ad0de0 100644 --- a/OpenOats/Sources/OpenOats/Storage/SessionStore.swift +++ b/OpenOats/Sources/OpenOats/Storage/SessionStore.swift @@ -128,28 +128,33 @@ actor SessionStore { } } - /// Rewrite the current JSONL file, backfilling `refinedText` from the in-memory TranscriptStore. - /// - /// The 5-second delayed write often captures `refinedText` as nil because the LLM refinement - /// call hasn't finished yet. This method is called after both the refinement engine and pending - /// writes have drained, so the TranscriptStore now has the final refined text for all utterances. + /// Backfill refined text into the current session's JSONL from the in-memory TranscriptStore. func backfillRefinedText(from utterances: [Utterance]) { guard let currentFile else { return } - // Close the file handle so we can read/rewrite the file safely try? fileHandle?.close() fileHandle = nil - guard let content = try? String(contentsOf: currentFile, encoding: .utf8) else { return } + rewriteJSONLWithRefinedText(file: currentFile, utterances: utterances) + + fileHandle = try? FileHandle(forWritingTo: currentFile) + } + + /// Backfill refined text into a past session's JSONL. + func backfillRefinedText(sessionID: String, from utterances: [Utterance]) { + rewriteJSONLWithRefinedText(file: jsonlURL(for: sessionID), utterances: utterances) + } + + @discardableResult + private func rewriteJSONLWithRefinedText(file: URL, utterances: [Utterance]) -> Bool { + guard let content = try? String(contentsOf: file, encoding: .utf8) else { return false } let decoder = JSONDecoder() decoder.dateDecodingStrategy = .iso8601 let lines = content.components(separatedBy: "\n").filter { !$0.isEmpty } - guard !lines.isEmpty else { return } + guard !lines.isEmpty else { return false } - // Build a lookup from (timestamp, speaker) -> refinedText - // Uses ISO8601 string representation of the date for reliable matching let iso8601Formatter = ISO8601DateFormatter() iso8601Formatter.formatOptions = [.withInternetDateTime, .withFractionalSeconds] var refinedLookup: [String: String] = [:] @@ -159,11 +164,7 @@ actor SessionStore { refinedLookup[key] = refined } - guard !refinedLookup.isEmpty else { - // No refined text to backfill; reopen file handle and return - fileHandle = try? FileHandle(forWritingTo: currentFile) - return - } + guard !refinedLookup.isEmpty else { return false } var updatedLines: [String] = [] var anyUpdated = false @@ -175,7 +176,6 @@ actor SessionStore { continue } - // Only backfill if the record doesn't already have refinedText if record.refinedText == nil { let key = "\(iso8601Formatter.string(from: record.timestamp))|\(record.speaker.rawValue)" if let refined = refinedLookup[key] { @@ -204,11 +204,10 @@ actor SessionStore { if anyUpdated { let newContent = updatedLines.joined(separator: "\n") + "\n" - try? newContent.write(to: currentFile, atomically: true, encoding: .utf8) + try? newContent.write(to: file, atomically: true, encoding: .utf8) } - // Reopen file handle for any subsequent writes before endSession() - fileHandle = try? FileHandle(forWritingTo: currentFile) + return anyUpdated } func endSession() { @@ -360,7 +359,9 @@ actor SessionStore { templateSnapshot: idx.templateSnapshot, title: idx.title, utteranceCount: idx.utteranceCount, - hasNotes: true + hasNotes: true, + meetingApp: idx.meetingApp, + engine: idx.engine ), notes: notes ) @@ -391,7 +392,9 @@ actor SessionStore { templateSnapshot: idx.templateSnapshot, title: newTitle.isEmpty ? nil : newTitle, utteranceCount: idx.utteranceCount, - hasNotes: idx.hasNotes + hasNotes: idx.hasNotes, + meetingApp: idx.meetingApp, + engine: idx.engine ), notes: sidecar.notes ) diff --git a/OpenOats/Sources/OpenOats/Transcription/ParakeetBackend.swift b/OpenOats/Sources/OpenOats/Transcription/ParakeetBackend.swift index e7c86b99..537f5a5e 100644 --- a/OpenOats/Sources/OpenOats/Transcription/ParakeetBackend.swift +++ b/OpenOats/Sources/OpenOats/Transcription/ParakeetBackend.swift @@ -23,6 +23,11 @@ final class ParakeetBackend: TranscriptionBackend, @unchecked Sendable { return exists ? .ready : .needsDownload(prompt: "Transcription requires a one-time model download.") } + func clearModelCache() { + let cacheDir = AsrModels.defaultCacheDirectory(for: version) + try? FileManager.default.removeItem(at: cacheDir) + } + func prepare(onStatus: @Sendable (String) -> Void) async throws { onStatus("Downloading \(displayName)...") let models = try await AsrModels.downloadAndLoad(version: version) diff --git a/OpenOats/Sources/OpenOats/Transcription/Qwen3Backend.swift b/OpenOats/Sources/OpenOats/Transcription/Qwen3Backend.swift index a8db2da8..6e365550 100644 --- a/OpenOats/Sources/OpenOats/Transcription/Qwen3Backend.swift +++ b/OpenOats/Sources/OpenOats/Transcription/Qwen3Backend.swift @@ -12,6 +12,11 @@ final class Qwen3Backend: TranscriptionBackend, @unchecked Sendable { return exists ? .ready : .needsDownload(prompt: "Qwen3 ASR requires a one-time model download.") } + func clearModelCache() { + let cacheDir = Qwen3AsrModels.defaultCacheDirectory() + try? FileManager.default.removeItem(at: cacheDir) + } + func prepare(onStatus: @Sendable (String) -> Void) async throws { onStatus("Downloading \(displayName)...") let modelsDirectory = try await Qwen3AsrModels.download() diff --git a/OpenOats/Sources/OpenOats/Transcription/TranscriptionBackend.swift b/OpenOats/Sources/OpenOats/Transcription/TranscriptionBackend.swift index 6ce3bc67..863816b1 100644 --- a/OpenOats/Sources/OpenOats/Transcription/TranscriptionBackend.swift +++ b/OpenOats/Sources/OpenOats/Transcription/TranscriptionBackend.swift @@ -25,6 +25,13 @@ protocol TranscriptionBackend: Sendable { /// Transcribe a segment of Float32 audio samples at 16kHz mono. /// Returns the transcribed text, or empty string if no speech detected. func transcribe(_ samples: [Float], locale: Locale) async throws -> String + + /// Remove cached model files so the next prepare() triggers a fresh download. + func clearModelCache() +} + +extension TranscriptionBackend { + func clearModelCache() {} } enum TranscriptionBackendError: Error { diff --git a/OpenOats/Sources/OpenOats/Transcription/TranscriptionEngine.swift b/OpenOats/Sources/OpenOats/Transcription/TranscriptionEngine.swift index 5f63ef5b..42fb7748 100644 --- a/OpenOats/Sources/OpenOats/Transcription/TranscriptionEngine.swift +++ b/OpenOats/Sources/OpenOats/Transcription/TranscriptionEngine.swift @@ -212,6 +212,11 @@ final class TranscriptionEngine { lastError = msg assetStatus = "Ready" isRunning = false + // Clear corrupt cache so the next attempt triggers a fresh download + settings.transcriptionModel.makeBackend().clearModelCache() + diagLog("[ENGINE-2-FAIL] cleared model cache for \(settings.transcriptionModel.rawValue)") + needsModelDownload = true + downloadConfirmed = false return } @@ -228,11 +233,21 @@ final class TranscriptionEngine { return } currentMicDeviceID = targetMicID - diagLog("[ENGINE-3] starting mic capture, targetMicID=\(String(describing: targetMicID))") + // AEC (voice processing) conflicts with system audio capture on macOS — + // both cause CoreAudio aggregate-device reconfiguration that can stall the + // mic stream. Since system audio capture is always active during recording, + // AEC must be disabled to prevent capture failures. + let useAEC = false + if settings.enableEchoCancellation { + diagLog("[ENGINE-3] AEC disabled — conflicts with system audio capture") + } + + diagLog("[ENGINE-3] starting mic capture, targetMicID=\(String(describing: targetMicID)), aec=\(useAEC)") startMicStream( locale: locale, vadManager: vadManager, - deviceID: targetMicID + deviceID: targetMicID, + echoCancellation: useAEC ) // Check for immediate mic capture failure @@ -241,13 +256,28 @@ final class TranscriptionEngine { lastError = micError } - // Health check: warn if mic produces no audio within 5 seconds + // Health check: if mic produces no audio within 5 seconds, retry once + // without AEC before surfacing the error. Task { @MainActor [weak self] in try? await Task.sleep(for: .seconds(5)) guard let self, self.isRunning else { return } if !self.micCapture.hasCapturedFrames && self.micCapture.captureError == nil { - diagLog("[ENGINE-HEALTH] no mic audio after 5s") - self.lastError = "Microphone is not producing audio. Check your input device in System Settings." + if useAEC { + diagLog("[ENGINE-HEALTH] no mic audio after 5s with AEC, retrying without") + self.micCapture.finishStream() + await self.micTask?.value + self.micTask = nil + self.micCapture.stop() + self.startMicStream( + locale: locale, + vadManager: vadManager, + deviceID: targetMicID, + echoCancellation: false + ) + } else { + diagLog("[ENGINE-HEALTH] no mic audio after 5s") + self.lastError = "Microphone is not producing audio. Check your input device in System Settings." + } } } @@ -545,9 +575,10 @@ final class TranscriptionEngine { private func startMicStream( locale: Locale, vadManager: VadManager, - deviceID: AudioDeviceID + deviceID: AudioDeviceID, + echoCancellation: Bool = false ) { - var micStream = micCapture.bufferStream(deviceID: deviceID) + var micStream = micCapture.bufferStream(deviceID: deviceID, echoCancellation: echoCancellation) if let recorder = audioRecorder { micStream = Self.tappedStream(micStream) { buffer in recorder.writeMicBuffer(buffer) diff --git a/OpenOats/Sources/OpenOats/Transcription/WhisperKitBackend.swift b/OpenOats/Sources/OpenOats/Transcription/WhisperKitBackend.swift index f872404f..2add2611 100644 --- a/OpenOats/Sources/OpenOats/Transcription/WhisperKitBackend.swift +++ b/OpenOats/Sources/OpenOats/Transcription/WhisperKitBackend.swift @@ -19,6 +19,20 @@ final class WhisperKitBackend: TranscriptionBackend, @unchecked Sendable { ) } + func clearModelCache() { + let fm = FileManager.default + guard let documentsDir = fm.urls(for: .documentDirectory, in: .userDomainMask).first else { return } + let hfCacheDir = documentsDir + .appendingPathComponent("huggingface") + .appendingPathComponent("models") + .appendingPathComponent("argmaxinc") + .appendingPathComponent("whisperkit-coreml") + guard let contents = try? fm.contentsOfDirectory(atPath: hfCacheDir.path) else { return } + for entry in contents where entry.contains("whisper-\(variant.rawValue)") { + try? fm.removeItem(at: hfCacheDir.appendingPathComponent(entry)) + } + } + func prepare(onStatus: @Sendable (String) -> Void) async throws { onStatus("Downloading \(displayName)...") let manager = WhisperKitManager(variant: variant) diff --git a/OpenOats/Sources/OpenOats/Views/ControlBar.swift b/OpenOats/Sources/OpenOats/Views/ControlBar.swift index 4ccd94df..cacd05ae 100644 --- a/OpenOats/Sources/OpenOats/Views/ControlBar.swift +++ b/OpenOats/Sources/OpenOats/Views/ControlBar.swift @@ -16,7 +16,7 @@ struct ControlBar: View { // Error banner if let error = errorMessage { Text(error) - .font(.system(size: 10)) + .font(.system(size: 12)) .foregroundStyle(.red) .frame(maxWidth: .infinity, alignment: .leading) .padding(.horizontal, 16) @@ -45,9 +45,9 @@ struct ControlBar: View { if let status = statusMessage, status != "Ready" { HStack(spacing: 6) { ProgressView() - .controlSize(.mini) + .controlSize(.small) Text(status) - .font(.system(size: 11)) + .font(.system(size: 12)) .foregroundStyle(.secondary) .accessibilityIdentifier("app.controlBar.status") } diff --git a/OpenOats/Sources/OpenOats/Views/NotesView.swift b/OpenOats/Sources/OpenOats/Views/NotesView.swift index 22e29a57..13f085fa 100644 --- a/OpenOats/Sources/OpenOats/Views/NotesView.swift +++ b/OpenOats/Sources/OpenOats/Views/NotesView.swift @@ -12,23 +12,32 @@ struct NotesView: View { @State private var sessionToDelete: String? @State private var showDeleteConfirmation = false + enum DetailViewMode: String, CaseIterable { + case transcript = "Transcript" + case notes = "Notes" + } + + @State private var detailViewMode: DetailViewMode = .transcript + @State private var showingOriginal = false + var body: some View { - NavigationSplitView { + HStack(spacing: 0) { sidebar - } detail: { + .frame(width: 250) + Divider() detailContent + .frame(maxWidth: .infinity, maxHeight: .infinity) } .task { await coordinator.loadHistory() if let requested = coordinator.consumeRequestedSessionSelection() { selectedSessionID = requested + detailViewMode = .notes } else if let last = coordinator.lastEndedSession { selectedSessionID = last.id } } .onChange(of: coordinator.lastEndedSession?.id) { - // When a new session ends (even if Notes window is already open), - // refresh history and auto-select it if let last = coordinator.lastEndedSession { Task { await coordinator.loadHistory() @@ -39,11 +48,10 @@ struct NotesView: View { .onChange(of: coordinator.requestedSessionSelectionID) { if let requested = coordinator.consumeRequestedSessionSelection() { selectedSessionID = requested + // Deep links target notes, so default to the Notes tab + detailViewMode = .notes } } - .onChange(of: coordinator.sessionHistory.count) { - // Refresh sidebar when history changes - } } // MARK: - Sidebar @@ -102,9 +110,8 @@ struct NotesView: View { } } } - .navigationTitle("Sessions") - .frame(minWidth: 200) - .accessibilityIdentifier("notes.sessionList") + .listStyle(.sidebar) + .frame(maxHeight: .infinity) .onChange(of: selectedSessionID) { loadSelectedSession() } @@ -126,143 +133,316 @@ struct NotesView: View { private var detailContent: some View { if let sessionID = selectedSessionID { VStack(spacing: 0) { - if coordinator.notesEngine.isGenerating { - generatingView - } else if let notes = loadedNotes { - notesReadyView(notes) - } else { - noNotesView(sessionID: sessionID) + detailToolbar + Divider() + detailBody(sessionID: sessionID) + } + .background { + Group { + Button("") { detailViewMode = .transcript } + .keyboardShortcut("1", modifiers: .command) + Button("") { detailViewMode = .notes } + .keyboardShortcut("2", modifiers: .command) } + .frame(width: 0, height: 0) + .opacity(0) + .accessibilityHidden(true) } } else { ContentUnavailableView("Select a Session", systemImage: "doc.text", description: Text("Choose a session from the sidebar to view or generate notes.")) } } + private enum CleanupState { + case notCleaned + case inProgress + case partiallyCleaned + case cleaned + } + + private var cleanupState: CleanupState { + if coordinator.cleanupEngine.isCleaningUp { return .inProgress } + guard !loadedTranscript.isEmpty else { return .notCleaned } + let hasAnyRefined = loadedTranscript.contains(where: { $0.refinedText != nil }) + if !hasAnyRefined { return .notCleaned } + let allRefined = !loadedTranscript.contains(where: { $0.refinedText == nil }) + return allRefined ? .cleaned : .partiallyCleaned + } + + @ViewBuilder + private var detailToolbar: some View { + HStack(spacing: 8) { + Picker("View", selection: $detailViewMode) { + ForEach(DetailViewMode.allCases, id: \.self) { mode in + Text(mode.rawValue).tag(mode) + } + } + .pickerStyle(.segmented) + .frame(minWidth: 120, maxWidth: 220) + .layoutPriority(1) + + Spacer(minLength: 4) + + if detailViewMode == .transcript { + transcriptToolbarActions + } else if detailViewMode == .notes { + notesToolbarActions + } + + Button { + copyCurrentContent() + } label: { + Label("Copy", systemImage: "doc.on.doc") + .font(.system(size: 12)) + } + .labelStyle(.iconOnly) + .buttonStyle(.bordered) + .disabled(copyContentIsEmpty) + .help("Copy to clipboard") + } + .padding(.horizontal, 16) + .padding(.vertical, 10) + } + + @ViewBuilder + private var transcriptToolbarActions: some View { + switch cleanupState { + case .notCleaned: + Button { + cleanUpTranscript() + } label: { + Label("Clean Up", systemImage: "sparkles") + .font(.system(size: 12)) + } + .buttonStyle(.borderedProminent) + .disabled(loadedTranscript.isEmpty) + .help("Remove filler words and fix punctuation") + + case .inProgress: + HStack(spacing: 6) { + Text("\(coordinator.cleanupEngine.chunksCompleted)/\(coordinator.cleanupEngine.totalChunks) cleaning...") + .font(.system(size: 12)) + .foregroundStyle(.secondary) + Button("Cancel") { + coordinator.cleanupEngine.cancel() + } + .buttonStyle(.bordered) + .font(.system(size: 11)) + .controlSize(.small) + } + + case .partiallyCleaned: + Button { + cleanUpTranscript() + } label: { + Label("Clean Up", systemImage: "sparkles") + .font(.system(size: 12)) + } + .buttonStyle(.borderedProminent) + .help("Clean up remaining utterances") + + Button { + showingOriginal.toggle() + } label: { + Label("Show Original", systemImage: showingOriginal ? "text.badge.checkmark" : "text.badge.minus") + .font(.system(size: 12)) + } + .buttonStyle(.bordered) + .tint(showingOriginal ? .accentColor : nil) + .help(showingOriginal ? "Showing original transcript" : "Show original transcript") + + case .cleaned: + Button { + showingOriginal.toggle() + } label: { + Label("Show Original", systemImage: showingOriginal ? "text.badge.checkmark" : "text.badge.minus") + .font(.system(size: 12)) + } + .buttonStyle(.bordered) + .tint(showingOriginal ? .accentColor : nil) + .help(showingOriginal ? "Showing original transcript" : "Show original transcript") + } + } + + @ViewBuilder + private var notesToolbarActions: some View { + if let notes = loadedNotes { + Menu { + ForEach(coordinator.templateStore.templates) { template in + Button { + regenerateNotes(with: template) + } label: { + Label(template.name, systemImage: template.icon) + } + .disabled(notes.template.id == template.id) + } + } label: { + Label(notes.template.name, systemImage: notes.template.icon) + .font(.system(size: 12)) + } primaryAction: { + regenerateNotes() + } + .menuStyle(.button) + .buttonStyle(.bordered) + .fixedSize() + .help("Click to regenerate, or pick a different template") + } + } + + @ViewBuilder + private func detailBody(sessionID: String) -> some View { + Group { + switch detailViewMode { + case .transcript: + transcriptView + case .notes: + notesTab(sessionID: sessionID) + } + } + .frame(maxWidth: .infinity, maxHeight: .infinity) + } + + @ViewBuilder + private func notesTab(sessionID: String) -> some View { + if coordinator.notesEngine.isGenerating { + generatingView + } else if let notes = loadedNotes { + notesContentView(notes) + } else { + notesEmptyState(sessionID: sessionID) + } + } + private var generatingView: some View { ScrollView { VStack(alignment: .leading, spacing: 12) { HStack { ProgressView() - .scaleEffect(0.8) + .controlSize(.small) Text("Generating notes...") - .font(.system(size: 13)) + .font(.system(size: 12)) .foregroundStyle(.secondary) .accessibilityIdentifier("notes.generating") Spacer() Button("Cancel") { coordinator.notesEngine.cancel() } - .buttonStyle(.plain) - .foregroundStyle(.red) + .buttonStyle(.bordered) + .font(.system(size: 11)) } markdownContent(coordinator.notesEngine.generatedMarkdown) } - .padding(20) + .padding(16) } } - private func notesReadyView(_ notes: EnhancedNotes) -> some View { - VStack(spacing: 0) { - // Toolbar - HStack { - Label(notes.template.name, systemImage: notes.template.icon) - .font(.system(size: 12)) - .foregroundStyle(.secondary) - Spacer() - Text("Generated \(notes.generatedAt, style: .relative) ago") - .font(.system(size: 11)) - .foregroundStyle(.tertiary) - - Button { - NSPasteboard.general.clearContents() - NSPasteboard.general.setString(notes.markdown, forType: .string) - } label: { - Label("Copy", systemImage: "doc.on.doc") - .font(.system(size: 12)) - } - .buttonStyle(.bordered) - - Button { - regenerateNotes() - } label: { - Label("Regenerate", systemImage: "arrow.clockwise") - .font(.system(size: 12)) - } - .buttonStyle(.bordered) - } - .padding(.horizontal, 20) - .padding(.vertical, 10) - - Divider() - - ScrollView { - markdownContent(notes.markdown) - .padding(20) - .accessibilityIdentifier("notes.renderedMarkdown") - } + private func notesContentView(_ notes: EnhancedNotes) -> some View { + ScrollView { + markdownContent(notes.markdown) + .padding(16) + .accessibilityIdentifier("notes.renderedMarkdown") } } - private func noNotesView(sessionID: String) -> some View { - VStack(spacing: 16) { + private func notesEmptyState(sessionID: String) -> some View { + ContentUnavailableView { + Label("Generate Notes", systemImage: "sparkles") + } description: { + Text("Summarize this transcript into structured meeting notes.") + } actions: { if let error = coordinator.notesEngine.error { Text(error) .foregroundStyle(.red) .font(.system(size: 12)) } - if !loadedTranscript.isEmpty { - // Transcript preview - ScrollView { - VStack(alignment: .leading, spacing: 4) { - ForEach(Array(loadedTranscript.prefix(20).enumerated()), id: \.offset) { _, record in - HStack(alignment: .top, spacing: 8) { - Text(record.speaker == .you ? "You" : "Them") - .font(.system(size: 11, weight: .semibold, design: .monospaced)) - .foregroundStyle(record.speaker == .you ? .blue : .green) - .frame(width: 35, alignment: .trailing) - Text(record.text) - .font(.system(size: 12)) - .foregroundStyle(.primary) - } - } - if loadedTranscript.count > 20 { - Text("... and \(loadedTranscript.count - 20) more utterances") - .font(.system(size: 11)) - .foregroundStyle(.tertiary) - .padding(.top, 4) - } - } - .padding(16) - } - .frame(maxHeight: 300) - .background(.quaternary.opacity(0.3)) - .clipShape(RoundedRectangle(cornerRadius: 8)) + Button { + generateNotes(sessionID: sessionID) + } label: { + Label("Generate Notes", systemImage: "sparkles") } + .buttonStyle(.borderedProminent) + .disabled(loadedTranscript.isEmpty) + .accessibilityIdentifier("notes.generateButton") + } + } - // Template picker for generation - HStack { - Picker("Template", selection: $selectedTemplateForGeneration) { - ForEach(coordinator.templateStore.templates) { template in - Label(template.name, systemImage: template.icon).tag(Optional(template)) + // MARK: - Transcript Views + + @ViewBuilder + private var transcriptView: some View { + if loadedTranscript.isEmpty { + ContentUnavailableView("No Transcript", systemImage: "waveform", description: Text("This session has no recorded utterances.")) + } else { + ScrollView { + if coordinator.cleanupEngine.isCleaningUp { + cleanupProgressBanner + } + if let cleanupError = coordinator.cleanupEngine.error { + Text(cleanupError) + .font(.system(size: 12)) + .foregroundStyle(.red) + .frame(maxWidth: .infinity, alignment: .leading) + .padding(.horizontal, 16) + .padding(.vertical, 4) + } + LazyVStack(alignment: .leading, spacing: 8) { + let isCleaning = coordinator.cleanupEngine.isCleaningUp + ForEach(Array(loadedTranscript.enumerated()), id: \.offset) { _, record in + transcriptRow(record: record, isCleaning: isCleaning) } } - .frame(maxWidth: 200) + .padding(16) + } + } + } - Button { - generateNotes(sessionID: sessionID) - } label: { - Label("Generate Notes", systemImage: "sparkles") - } - .buttonStyle(.borderedProminent) - .disabled(loadedTranscript.isEmpty) - .accessibilityIdentifier("notes.generateButton") + private var cleanupProgressBanner: some View { + HStack(spacing: 8) { + ProgressView() + .controlSize(.small) + Text("Cleaning up transcript... \(coordinator.cleanupEngine.chunksCompleted)/\(coordinator.cleanupEngine.totalChunks) sections") + .font(.system(size: 12)) + .lineLimit(1) + .foregroundStyle(.secondary) + Spacer() + Button("Cancel") { + coordinator.cleanupEngine.cancel() } + .buttonStyle(.bordered) + .font(.system(size: 11)) + } + .padding(.horizontal, 16) + .padding(.vertical, 8) + .background(.bar) + } + + @ViewBuilder + private func transcriptRow(record: SessionRecord, isCleaning: Bool) -> some View { + HStack(alignment: .firstTextBaseline, spacing: 8) { + Text(record.speaker == .you ? "You" : "Them") + .font(.system(size: 11, weight: .semibold)) + .foregroundStyle(record.speaker == .you ? Color.youColor : Color.themColor) + .frame(width: 36, alignment: .trailing) + + let displayText = showingOriginal ? record.text : (record.refinedText ?? record.text) + Text(displayText) + .font(.system(size: 13)) + .foregroundStyle( + isCleaning && record.refinedText == nil ? .secondary : .primary + ) + .textSelection(.enabled) + } + } + + private var copyContentIsEmpty: Bool { + switch detailViewMode { + case .transcript: + return loadedTranscript.isEmpty + case .notes: + return loadedNotes == nil } - .padding(20) - .frame(maxWidth: .infinity, maxHeight: .infinity) } // MARK: - Markdown Rendering @@ -334,7 +514,6 @@ struct NotesView: View { } } - // Final section if currentHeading != nil || !currentBody.isEmpty { sections.append(MarkdownSection(heading: currentHeading, level: currentLevel, body: currentBody.joined(separator: "\n").trimmingCharacters(in: .whitespacesAndNewlines))) } @@ -344,6 +523,23 @@ struct NotesView: View { // MARK: - Actions + private func copyCurrentContent() { + let text: String + switch detailViewMode { + case .transcript: + text = loadedTranscript.map { record in + let label = record.speaker == .you ? "You" : "Them" + let content = showingOriginal ? record.text : (record.refinedText ?? record.text) + return "[\(Self.transcriptTimeFormatter.string(from: record.timestamp))] \(label): \(content)" + }.joined(separator: "\n") + case .notes: + text = loadedNotes?.markdown ?? "" + } + + NSPasteboard.general.clearContents() + NSPasteboard.general.setString(text, forType: .string) + } + private func loadSelectedSession() { guard let sessionID = selectedSessionID else { loadedNotes = nil @@ -351,11 +547,20 @@ struct NotesView: View { return } + loadedNotes = nil + loadedTranscript = [] + showingOriginal = false + coordinator.cleanupEngine.cancel() + Task { - loadedNotes = await coordinator.sessionStore.loadNotes(sessionID: sessionID) - loadedTranscript = await coordinator.sessionStore.loadTranscript(sessionID: sessionID) + let notes = await coordinator.sessionStore.loadNotes(sessionID: sessionID) + let transcript = await coordinator.sessionStore.loadTranscript(sessionID: sessionID) + + guard selectedSessionID == sessionID else { return } + + loadedNotes = notes + loadedTranscript = transcript - // Default template for generation let session = coordinator.sessionHistory.first { $0.id == sessionID } if let snapID = session?.templateSnapshot?.id { selectedTemplateForGeneration = coordinator.templateStore.template(for: snapID) @@ -377,7 +582,6 @@ struct NotesView: View { settings: settings ) - // Save completed notes if !coordinator.notesEngine.generatedMarkdown.isEmpty { let notes = EnhancedNotes( template: coordinator.templateStore.snapshot(of: template), @@ -387,7 +591,18 @@ struct NotesView: View { await coordinator.sessionStore.saveNotes(sessionID: sessionID, notes: notes) loadedNotes = notes - // Refresh history to update hasNotes + // Update the structured Markdown file with LLM-generated sections + let outputDir = URL(fileURLWithPath: settings.notesFolderPath) + if let mdFile = MarkdownMeetingWriter.findMarkdownFile( + sessionID: sessionID, + in: outputDir + ) { + MarkdownMeetingWriter.insertLLMSections( + fileURL: mdFile, + llmMarkdown: coordinator.notesEngine.generatedMarkdown + ) + } + await coordinator.loadHistory() } } @@ -413,9 +628,42 @@ struct NotesView: View { } } - private func regenerateNotes() { + private func regenerateNotes(with template: MeetingTemplate? = nil) { guard let sessionID = selectedSessionID else { return } + if let template { + selectedTemplateForGeneration = template + } loadedNotes = nil generateNotes(sessionID: sessionID) } + + private static let transcriptTimeFormatter: DateFormatter = { + let f = DateFormatter() + f.dateFormat = "HH:mm:ss" + return f + }() + + private func cleanUpTranscript() { + guard let sessionID = selectedSessionID, !loadedTranscript.isEmpty else { return } + + Task { + let updated = await coordinator.cleanupEngine.cleanup( + records: loadedTranscript, + settings: settings + ) + + let utterances = updated.map { record in + Utterance( + text: record.text, + speaker: record.speaker, + timestamp: record.timestamp, + refinedText: record.refinedText + ) + } + await coordinator.sessionStore.backfillRefinedText(sessionID: sessionID, from: utterances) + + guard selectedSessionID == sessionID else { return } + loadedTranscript = await coordinator.sessionStore.loadTranscript(sessionID: sessionID) + } + } } diff --git a/OpenOats/Sources/OpenOats/Views/SettingsView.swift b/OpenOats/Sources/OpenOats/Views/SettingsView.swift index 0e645650..ed1b31a1 100644 --- a/OpenOats/Sources/OpenOats/Views/SettingsView.swift +++ b/OpenOats/Sources/OpenOats/Views/SettingsView.swift @@ -149,6 +149,12 @@ struct SettingsView: View { Text("Save a local audio file (.m4a) alongside each transcript. Audio never leaves your device.") .font(.system(size: 11)) .foregroundStyle(.secondary) + + Toggle("Echo cancellation", isOn: $settings.enableEchoCancellation) + .font(.system(size: 12)) + Text("Reduces duplicate transcription when using speakers and microphone simultaneously. Currently disabled during recording because it conflicts with system audio capture on macOS.") + .font(.system(size: 11)) + .foregroundStyle(.secondary) } Section("Transcription") { @@ -179,9 +185,9 @@ struct SettingsView: View { .font(.system(size: 11)) .foregroundStyle(.secondary) - Toggle("Refine transcript", isOn: $settings.enableTranscriptRefinement) + Toggle("Clean up transcript during recording", isOn: $settings.enableTranscriptRefinement) .font(.system(size: 12)) - Text("Uses your LLM provider to clean up filler words and fix punctuation in real-time. Original text is preserved.") + Text("Automatically removes filler words and fixes punctuation as you record. You can always clean up past transcripts manually from the Notes window.") .font(.system(size: 11)) .foregroundStyle(.secondary) diff --git a/OpenOats/Tests/OpenOatsTests/MarkdownMeetingWriterTests.swift b/OpenOats/Tests/OpenOatsTests/MarkdownMeetingWriterTests.swift new file mode 100644 index 00000000..1e518e59 --- /dev/null +++ b/OpenOats/Tests/OpenOatsTests/MarkdownMeetingWriterTests.swift @@ -0,0 +1,554 @@ +import XCTest +@testable import OpenOatsKit + +final class MarkdownMeetingWriterTests: XCTestCase { + + // MARK: - Kebab Case Conversion + + func testKebabCaseBasic() { + XCTAssertEqual(MarkdownMeetingWriter.toKebabCase("Weekly Product Sync"), "weekly-product-sync") + } + + func testKebabCaseWithSpecialCharacters() { + XCTAssertEqual(MarkdownMeetingWriter.toKebabCase("Q1 Launch: Planning!"), "q1-launch-planning") + } + + func testKebabCaseCollapsesMultipleHyphens() { + XCTAssertEqual(MarkdownMeetingWriter.toKebabCase("hello---world"), "hello-world") + } + + func testKebabCaseTrimsEdgeHyphens() { + XCTAssertEqual(MarkdownMeetingWriter.toKebabCase("--hello--"), "hello") + } + + func testKebabCaseEmptyInputReturnsMeeting() { + XCTAssertEqual(MarkdownMeetingWriter.toKebabCase(""), "meeting") + } + + func testKebabCaseNonASCIIStripped() { + // Non-ASCII characters are stripped; if only non-ASCII remains, returns "meeting" + XCTAssertEqual(MarkdownMeetingWriter.toKebabCase("spotkanie"), "spotkanie") + // Pure non-ASCII (e.g., Chinese) should fallback + XCTAssertEqual(MarkdownMeetingWriter.toKebabCase("\u{4F1A}\u{8BAE}"), "meeting") + } + + func testKebabCaseTruncatesLongStrings() { + let longTitle = String(repeating: "a", count: 100) + let result = MarkdownMeetingWriter.toKebabCase(longTitle) + XCTAssertLessThanOrEqual(result.count, 60) + } + + func testKebabCaseMixedASCIIAndNonASCII() { + XCTAssertEqual(MarkdownMeetingWriter.toKebabCase("Meeting z klientem"), "meeting-z-klientem") + } + + // MARK: - YAML Quoting + + func testYamlQuoteSimpleString() { + XCTAssertEqual(MarkdownMeetingWriter.yamlQuote("Meeting"), "\"Meeting\"") + } + + func testYamlQuoteStringWithColons() { + XCTAssertEqual( + MarkdownMeetingWriter.yamlQuote("Feature Flag Rollout: New Editor"), + "\"Feature Flag Rollout: New Editor\"" + ) + } + + func testYamlQuoteStringWithDoubleQuotes() { + XCTAssertEqual( + MarkdownMeetingWriter.yamlQuote("The \"big\" meeting"), + "\"The \\\"big\\\" meeting\"" + ) + } + + func testYamlQuoteStringWithBackslashes() { + XCTAssertEqual( + MarkdownMeetingWriter.yamlQuote("path\\to\\file"), + "\"path\\\\to\\\\file\"" + ) + } + + func testYamlQuoteYesNoBooleanSafety() { + // "yes" unquoted would be parsed as boolean true in YAML + let result = MarkdownMeetingWriter.yamlQuote("yes") + XCTAssertEqual(result, "\"yes\"") + } + + // MARK: - Relative Timestamp Conversion + + func testRelativeTimestampZero() { + let start = Date() + XCTAssertEqual( + MarkdownMeetingWriter.formatRelativeTimestamp(start, relativeTo: start), + "00:00:00" + ) + } + + func testRelativeTimestampMinutesAndSeconds() { + let start = Date() + let later = start.addingTimeInterval(65) // 1 min 5 sec + XCTAssertEqual( + MarkdownMeetingWriter.formatRelativeTimestamp(later, relativeTo: start), + "00:01:05" + ) + } + + func testRelativeTimestampOverOneHour() { + let start = Date() + let later = start.addingTimeInterval(3661) // 1 hour, 1 min, 1 sec + XCTAssertEqual( + MarkdownMeetingWriter.formatRelativeTimestamp(later, relativeTo: start), + "01:01:01" + ) + } + + func testRelativeTimestampNegativeClampedToZero() { + let start = Date() + let earlier = start.addingTimeInterval(-30) // 30 sec before start + XCTAssertEqual( + MarkdownMeetingWriter.formatRelativeTimestamp(earlier, relativeTo: start), + "00:00:00" + ) + } + + // MARK: - Transcript Line Formatting + + func testTranscriptLineFormat() { + let start = Date() + let records = [ + SessionRecord(speaker: .you, text: "Hello", timestamp: start), + SessionRecord(speaker: .them, text: "Hi there", timestamp: start.addingTimeInterval(5)), + ] + + let output = MarkdownMeetingWriter.formatTranscriptLines(records: records, startedAt: start) + + XCTAssertTrue(output.contains("[00:00:00] **You:** Hello")) + XCTAssertTrue(output.contains("[00:00:05] **Them:** Hi there")) + } + + func testTranscriptLinePrefersRefinedText() { + let start = Date() + let record = SessionRecord( + speaker: .them, + text: "um uh like hello", + timestamp: start, + refinedText: "Hello." + ) + + let output = MarkdownMeetingWriter.formatTranscriptLines(records: [record], startedAt: start) + XCTAssertTrue(output.contains("**Them:** Hello.")) + XCTAssertFalse(output.contains("um uh like hello")) + } + + func testTranscriptLineBlankLineSeparation() { + let start = Date() + let records = [ + SessionRecord(speaker: .you, text: "One", timestamp: start), + SessionRecord(speaker: .them, text: "Two", timestamp: start.addingTimeInterval(3)), + ] + + let output = MarkdownMeetingWriter.formatTranscriptLines(records: records, startedAt: start) + let lines = output.components(separatedBy: "\n") + + // Should be: line1, blank, line2, blank (trailing) + XCTAssertTrue(lines.count >= 4) + XCTAssertTrue(lines[0].hasPrefix("[00:00:00]")) + XCTAssertEqual(lines[1], "") + XCTAssertTrue(lines[2].hasPrefix("[00:00:03]")) + } + + // MARK: - Frontmatter Generation + + func testFrontmatterContainsRequiredFields() { + let start = Date() + let metadata = MarkdownMeetingWriter.Metadata( + from: SessionIndex( + id: "session_2026-03-20_14-00-06", + startedAt: start, + endedAt: start.addingTimeInterval(1920), // 32 minutes + utteranceCount: 10, + hasNotes: false, + engine: "parakeetV2" + ) + ) + + let records = [ + SessionRecord(speaker: .you, text: "Hello", timestamp: start), + SessionRecord(speaker: .them, text: "Hi", timestamp: start.addingTimeInterval(1920)), + ] + + let frontmatter = MarkdownMeetingWriter.buildFrontmatter( + metadata: metadata, records: records, title: "Meeting" + ) + + XCTAssertTrue(frontmatter.hasPrefix("---")) + XCTAssertTrue(frontmatter.hasSuffix("---")) + XCTAssertTrue(frontmatter.contains("schema: openoats/v1")) + XCTAssertTrue(frontmatter.contains("title: \"Meeting\"")) + XCTAssertTrue(frontmatter.contains("duration: 32")) + XCTAssertTrue(frontmatter.contains("participants:")) + XCTAssertTrue(frontmatter.contains(" - You")) + XCTAssertTrue(frontmatter.contains(" - Them")) + XCTAssertTrue(frontmatter.contains("engine: parakeetV2")) + } + + func testFrontmatterIncludesMeetingApp() { + let start = Date() + let metadata = MarkdownMeetingWriter.Metadata( + from: SessionIndex( + id: "test", + startedAt: start, + endedAt: start.addingTimeInterval(60), + utteranceCount: 1, + hasNotes: false, + meetingApp: "Zoom", + engine: "parakeetV2" + ) + ) + + let records = [SessionRecord(speaker: .you, text: "Hi", timestamp: start)] + let frontmatter = MarkdownMeetingWriter.buildFrontmatter( + metadata: metadata, records: records, title: "Meeting" + ) + + XCTAssertTrue(frontmatter.contains("app: zoom")) + } + + func testFrontmatterIncludesSessionExtension() { + let start = Date() + let metadata = MarkdownMeetingWriter.Metadata( + from: SessionIndex( + id: "session_2026-03-20_14-00-06", + startedAt: start, + utteranceCount: 1, + hasNotes: false + ) + ) + + let records = [SessionRecord(speaker: .you, text: "Hi", timestamp: start)] + let frontmatter = MarkdownMeetingWriter.buildFrontmatter( + metadata: metadata, records: records, title: "Meeting" + ) + + XCTAssertTrue(frontmatter.contains("x_openoats_session: \"session_2026-03-20_14-00-06\"")) + } + + // MARK: - Duration Computation + + func testDurationFromEndedAt() { + let start = Date() + let metadata = MarkdownMeetingWriter.Metadata( + from: SessionIndex( + id: "test", + startedAt: start, + endedAt: start.addingTimeInterval(1920), // 32 minutes + utteranceCount: 2, + hasNotes: false + ) + ) + + let records = [ + SessionRecord(speaker: .you, text: "a", timestamp: start), + SessionRecord(speaker: .them, text: "b", timestamp: start.addingTimeInterval(60)), + ] + + XCTAssertEqual(MarkdownMeetingWriter.computeDuration(records: records, metadata: metadata), 32) + } + + func testDurationMinimumIsOne() { + let start = Date() + let metadata = MarkdownMeetingWriter.Metadata( + from: SessionIndex( + id: "test", + startedAt: start, + endedAt: start.addingTimeInterval(10), // 10 seconds + utteranceCount: 1, + hasNotes: false + ) + ) + + let records = [SessionRecord(speaker: .you, text: "a", timestamp: start)] + XCTAssertEqual(MarkdownMeetingWriter.computeDuration(records: records, metadata: metadata), 1) + } + + // MARK: - App Name Normalization + + func testNormalizeAppNameZoom() { + XCTAssertEqual(MarkdownMeetingWriter.normalizeAppName("Zoom"), "zoom") + } + + func testNormalizeAppNameMicrosoftTeams() { + XCTAssertEqual(MarkdownMeetingWriter.normalizeAppName("Microsoft Teams"), "teams") + } + + func testNormalizeAppNameFaceTime() { + XCTAssertEqual(MarkdownMeetingWriter.normalizeAppName("FaceTime"), "facetime") + } + + func testNormalizeAppNameGoogleMeetPWA() { + XCTAssertEqual(MarkdownMeetingWriter.normalizeAppName("Google Meet (PWA)"), "meet") + } + + func testNormalizeAppNameUnknown() { + XCTAssertEqual(MarkdownMeetingWriter.normalizeAppName("MyVideoApp"), "myvideoapp") + } + + // MARK: - Full Markdown Output + + func testBuildMarkdownProducesCompleteFile() { + let start = Date() + let metadata = MarkdownMeetingWriter.Metadata( + from: SessionIndex( + id: "session_2026-03-20_14-00-06", + startedAt: start, + endedAt: start.addingTimeInterval(120), + utteranceCount: 2, + hasNotes: false, + meetingApp: "Zoom", + engine: "parakeetV2" + ) + ) + + let records = [ + SessionRecord(speaker: .you, text: "Hello world", timestamp: start), + SessionRecord( + speaker: .them, text: "raw text", + timestamp: start.addingTimeInterval(5), + refinedText: "Refined text here." + ), + ] + + let markdown = MarkdownMeetingWriter.buildMarkdown(metadata: metadata, records: records) + + // Verify structure + XCTAssertTrue(markdown.hasPrefix("---\n")) + XCTAssertTrue(markdown.contains("schema: openoats/v1")) + XCTAssertTrue(markdown.contains("# Meeting")) + XCTAssertTrue(markdown.contains("## Transcript")) + XCTAssertTrue(markdown.contains("[00:00:00] **You:** Hello world")) + XCTAssertTrue(markdown.contains("[00:00:05] **Them:** Refined text here.")) + // Refined text should be used, not raw + XCTAssertFalse(markdown.contains("raw text")) + } + + // MARK: - File Writing + + func testWriteCreatesFileOnDisk() { + let tmpDir = FileManager.default.temporaryDirectory + .appendingPathComponent("OpenOatsTest-\(UUID().uuidString)") + defer { try? FileManager.default.removeItem(at: tmpDir) } + + let start = Date() + let metadata = MarkdownMeetingWriter.Metadata( + from: SessionIndex( + id: "test_session", + startedAt: start, + endedAt: start.addingTimeInterval(60), + utteranceCount: 1, + hasNotes: false, + engine: "parakeetV2" + ) + ) + + let records = [SessionRecord(speaker: .you, text: "Test", timestamp: start)] + + let fileURL = MarkdownMeetingWriter.write( + metadata: metadata, + records: records, + outputDirectory: tmpDir + ) + + XCTAssertNotNil(fileURL) + XCTAssertTrue(FileManager.default.fileExists(atPath: fileURL!.path)) + XCTAssertTrue(fileURL!.lastPathComponent.hasSuffix(".md")) + + // Verify content + let content = try! String(contentsOf: fileURL!, encoding: .utf8) + XCTAssertTrue(content.contains("schema: openoats/v1")) + } + + func testWriteHandlesFilenameCollision() { + let tmpDir = FileManager.default.temporaryDirectory + .appendingPathComponent("OpenOatsTest-\(UUID().uuidString)") + defer { try? FileManager.default.removeItem(at: tmpDir) } + + let start = Date() + let metadata = MarkdownMeetingWriter.Metadata( + from: SessionIndex( + id: "test_session", + startedAt: start, + endedAt: start.addingTimeInterval(60), + utteranceCount: 1, + hasNotes: false + ) + ) + + let records = [SessionRecord(speaker: .you, text: "Test", timestamp: start)] + + // Write first file + let first = MarkdownMeetingWriter.write( + metadata: metadata, records: records, outputDirectory: tmpDir + )! + + // Write second file with same metadata (collision) + let second = MarkdownMeetingWriter.write( + metadata: metadata, records: records, outputDirectory: tmpDir + )! + + XCTAssertNotEqual(first.lastPathComponent, second.lastPathComponent) + XCTAssertTrue(second.lastPathComponent.contains("-2")) + } + + func testWriteReturnsNilForEmptyRecords() { + let tmpDir = FileManager.default.temporaryDirectory + .appendingPathComponent("OpenOatsTest-\(UUID().uuidString)") + defer { try? FileManager.default.removeItem(at: tmpDir) } + + let metadata = MarkdownMeetingWriter.Metadata( + from: SessionIndex( + id: "test", startedAt: Date(), utteranceCount: 0, hasNotes: false + ) + ) + + let result = MarkdownMeetingWriter.write( + metadata: metadata, records: [], outputDirectory: tmpDir + ) + + XCTAssertNil(result) + } + + // MARK: - LLM Section Extraction + + func testExtractLLMSectionsAllPresent() { + let llmOutput = """ + ## Summary + + The team discussed the launch timeline and decided to move it up. + + ## Action Items + + - [ ] Update the timeline [owner:: You] [due:: 2026-03-25] + - [ ] Run load testing [owner:: Them] + + ## Decisions + + - Launch date set to April 15 + - Collaborative editing deferred to v1.1 + """ + + let sections = MarkdownMeetingWriter.extractLLMSections(from: llmOutput) + + XCTAssertTrue(sections.contains("## Summary")) + XCTAssertTrue(sections.contains("## Action Items")) + XCTAssertTrue(sections.contains("## Decisions")) + XCTAssertTrue(sections.contains("[owner:: You]")) + } + + func testExtractLLMSectionsOnlySummary() { + let llmOutput = """ + ## Summary + + A brief discussion about the product roadmap. + + ## Key Points + + - Point one + - Point two + """ + + let sections = MarkdownMeetingWriter.extractLLMSections(from: llmOutput) + + XCTAssertTrue(sections.contains("## Summary")) + // Key Points is not a recognized section, should not be included + XCTAssertFalse(sections.contains("## Key Points")) + } + + // MARK: - Find Markdown File + + func testFindMarkdownFileBySessionID() { + let tmpDir = FileManager.default.temporaryDirectory + .appendingPathComponent("OpenOatsTest-\(UUID().uuidString)") + defer { try? FileManager.default.removeItem(at: tmpDir) } + + let start = Date() + let sessionID = "session_2026-03-20_14-00-06" + let metadata = MarkdownMeetingWriter.Metadata( + from: SessionIndex( + id: sessionID, + startedAt: start, + endedAt: start.addingTimeInterval(60), + utteranceCount: 1, + hasNotes: false + ) + ) + + let records = [SessionRecord(speaker: .you, text: "Test", timestamp: start)] + MarkdownMeetingWriter.write( + metadata: metadata, records: records, outputDirectory: tmpDir + ) + + let found = MarkdownMeetingWriter.findMarkdownFile(sessionID: sessionID, in: tmpDir) + XCTAssertNotNil(found) + } + + // MARK: - Filename Format + + func testFilenameFormatMatchesSpec() { + let calendar = Calendar.current + var components = DateComponents() + components.year = 2026 + components.month = 3 + components.day = 20 + components.hour = 14 + components.minute = 0 + let date = calendar.date(from: components)! + + let tmpDir = FileManager.default.temporaryDirectory + .appendingPathComponent("OpenOatsTest-\(UUID().uuidString)") + try? FileManager.default.createDirectory(at: tmpDir, withIntermediateDirectories: true) + defer { try? FileManager.default.removeItem(at: tmpDir) } + + let url = MarkdownMeetingWriter.resolveFilename( + title: "Weekly Product Sync", + startedAt: date, + directory: tmpDir + ) + + XCTAssertEqual(url.lastPathComponent, "2026-03-20-1400-weekly-product-sync.md") + } + + func testFilenameWithNilTitleUsesMeeting() { + let tmpDir = FileManager.default.temporaryDirectory + .appendingPathComponent("OpenOatsTest-\(UUID().uuidString)") + try? FileManager.default.createDirectory(at: tmpDir, withIntermediateDirectories: true) + defer { try? FileManager.default.removeItem(at: tmpDir) } + + let url = MarkdownMeetingWriter.resolveFilename( + title: nil, + startedAt: Date(), + directory: tmpDir + ) + + XCTAssertTrue(url.lastPathComponent.contains("-meeting.md")) + } + + // MARK: - Speaker Label + + func testSpeakerLabelYou() { + XCTAssertEqual(MarkdownMeetingWriter.speakerLabel(.you), "You") + } + + func testSpeakerLabelThem() { + XCTAssertEqual(MarkdownMeetingWriter.speakerLabel(.them), "Them") + } + + // MARK: - ISO 8601 Formatting + + func testISO8601IncludesTimezone() { + let result = MarkdownMeetingWriter.formatISO8601(Date()) + // Should contain a timezone offset like +01:00 or -05:00 or Z + let hasTimezone = result.contains("+") || result.contains("Z") || result.hasSuffix("00") + XCTAssertTrue(hasTimezone, "ISO 8601 date should include timezone: \(result)") + } +} diff --git a/assets/openoats-wordmark.png b/assets/openoats-wordmark.png new file mode 100644 index 00000000..81e1783d Binary files /dev/null and b/assets/openoats-wordmark.png differ diff --git a/docs/example-transcript.md b/docs/example-transcript.md new file mode 100644 index 00000000..27e881d4 --- /dev/null +++ b/docs/example-transcript.md @@ -0,0 +1,110 @@ +--- +schema: openoats/v1 +title: "Notification System: Scope and Launch Plan" +date: 2026-03-18T10:30:00+01:00 +duration: 11 +participants: + - You + - Them +recorder: Szymon Sypniewicz +tags: + - product + - notifications + - launch +language: en +engine: parakeet-tdt-v2 +app: meet +--- + +# Notification System: Scope and Launch Plan + +## Summary + +Discussed the scope and timeline for shipping the in-app notification system. The original plan included real-time push notifications, email digests, and in-app alerts, but the team decided to cut email digests from v1 to avoid the deliverability rabbit hole (SPF, DKIM, reputation management). The notification system will ship with in-app alerts and optional browser push notifications only. Target launch is March 28. A soft rollout to beta users will happen on March 25, with three days of monitoring before the public release. The backend will use a simple polling architecture rather than WebSockets to keep infrastructure costs flat. + +## Action Items + +- [ ] Write the notification preferences UI component [owner:: You] [due:: 2026-03-21] +- [ ] Set up the notifications database table and API endpoints [owner:: Them] [due:: 2026-03-22] +- [ ] Deploy notification service to staging [owner:: Them] [due:: 2026-03-24] +- [ ] Draft the changelog entry for the notification feature [owner:: You] [due:: 2026-03-25] +- [ ] Run load test simulating 500 concurrent users polling for notifications [owner:: Them] [due:: 2026-03-25] +- [ ] Coordinate with beta users for soft rollout [owner:: You] [due:: 2026-03-25] + +## Decisions + +- Email digests cut from v1, will revisit in v1.1 +- Polling architecture instead of WebSockets for notifications +- 30-second polling interval as default, configurable per user +- Soft rollout to beta users on March 25, public launch March 28 +- Notifications auto-expire after 30 days + +## Transcript + +[00:00:00] **You:** Morning. I wanted to nail down the notification system scope before the weekend so we can start building Monday. + +[00:00:06] **Them:** Good timing. I was actually sketching out the data model last night. I think we are overcomplicating this. + +[00:00:12] **You:** How so? + +[00:00:14] **Them:** The original spec has three channels: in-app alerts, browser push, and email digests. The first two are straightforward. Email digests are a completely different beast. We need a transactional email provider, SPF records, DKIM signing, domain reputation management. It is a whole project on its own. + +[00:00:32] **You:** Yeah, I had that thought too. The email setup alone could take a week if we hit deliverability issues. + +[00:00:38] **Them:** Exactly. And honestly, who reads email digests? Our users live in the app. If we ship in-app alerts and browser push, that covers 95% of the use case. + +[00:00:48] **You:** I agree. Let's cut email digests from v1. We can revisit it in v1.1 if users actually ask for it. + +[00:00:55] **Them:** Good. Now, for the delivery mechanism. I know WebSockets are the trendy choice, but I think simple polling is better for us right now. + +[00:01:04] **You:** Because of infrastructure cost? + +[00:01:07] **Them:** Partly. WebSocket connections are persistent. If we have a thousand users online, that is a thousand open connections our server is maintaining. Polling lets us stay on a basic HTTP setup. No special infrastructure, no connection management, no reconnection logic on the client. + +[00:01:22] **You:** What polling interval are you thinking? + +[00:01:25] **Them:** 30 seconds default. Fast enough that notifications feel responsive, infrequent enough that we are not hammering the server. We can let power users configure it down to 10 seconds if they want. + +[00:01:37] **You:** That sounds reasonable. At 30 seconds, even with a few thousand active users, the load is trivial. + +[00:01:44] **Them:** Right. And if we ever need real-time, we can swap polling for WebSockets later without changing the notification data model. The upgrade path is clean. + +[00:01:53] **You:** Perfect. Let's talk timeline. We said end of March originally. Is that still realistic with the reduced scope? + +[00:02:00] **Them:** More than realistic. Without email digests, I think we can have the backend done by the 22nd. That gives us time to test and do a soft rollout. + +[00:02:09] **You:** I want to do a soft rollout to our beta users before the public launch. Maybe three days of monitoring. + +[00:02:16] **Them:** So beta on the 25th, public on the 28th? + +[00:02:19] **You:** Exactly. That gives us the weekend as a buffer too. If something breaks during beta, we have Monday and Tuesday to fix it. + +[00:02:27] **Them:** Works for me. One thing I want to decide now: notification expiry. Do they stay forever or auto-delete? + +[00:02:34] **You:** Auto-expire. Stale notifications are worse than no notifications. What is a reasonable window? + +[00:02:40] **Them:** 30 days. Long enough that people do not miss things on vacation, short enough that the database does not grow forever. + +[00:02:48] **You:** 30 days. Done. Let's split the work. I will take the frontend: the notification bell, the preferences panel, the dropdown list. You take the backend: database schema, API endpoints, the polling service. + +[00:03:01] **Them:** Agreed. I will have the database table and endpoints ready by the 22nd so you can start integrating the frontend against real data. + +[00:03:10] **You:** Good. And I need to write the changelog entry for this feature. I will do that on the 25th once we have the final build. + +[00:03:18] **Them:** One more thing. We should run a load test before the public launch. I want to simulate 500 concurrent users polling at 30-second intervals and make sure response times stay under 200ms. + +[00:03:30] **You:** Absolutely. Can you set that up as part of the staging deploy? + +[00:03:34] **Them:** Yeah. I will deploy to staging on the 24th and run the load test on the 25th, same day as the beta rollout. + +[00:03:42] **You:** Great. I will reach out to the beta group today and give them a heads up about the March 25th date. + +[00:03:49] **Them:** Sounds good. I think we are in good shape. + +[00:03:52] **You:** Agreed. Nice call on cutting the email digests. That would have derailed the whole timeline. + +[00:03:58] **Them:** Every feature you do not build is a feature that cannot break. + +[00:04:02] **You:** Words to live by. Talk Monday. + +[00:04:05] **Them:** See you then. diff --git a/docs/meeting-format-spec.md b/docs/meeting-format-spec.md new file mode 100644 index 00000000..3105770c --- /dev/null +++ b/docs/meeting-format-spec.md @@ -0,0 +1,796 @@ +# OpenOats Meeting Format Specification + +**Version:** 1.0 +**Status:** Draft +**License:** MIT + +The key words "MUST", "MUST NOT", "SHOULD", "SHOULD NOT", and "MAY" in this document are to be interpreted as described in RFC 2119. + +--- + +## Table of Contents + +- [Overview](#overview) +- [File Naming](#file-naming) +- [YAML Frontmatter](#yaml-frontmatter) +- [Processing Stages](#processing-stages) +- [Body Structure](#body-structure) +- [Transcript Line Format](#transcript-line-format) +- [Speaker Model](#speaker-model) +- [Extensibility](#extensibility) +- [Parsing Guide](#parsing-guide) +- [Versioning](#versioning) +- [Complete Example: Stage 1+2 File (No LLM)](#complete-example-stage-12-file-no-llm) +- [Complete Example: Stage 1+2+3 File (After LLM Processing)](#complete-example-stage-123-file-after-llm-processing) +- [Conformance](#conformance) +- [Security Considerations](#security-considerations) +- [Design Rationale (Non-normative)](#design-rationale-non-normative) +- [Acknowledgments](#acknowledgments) + +--- + +## Overview + +The OpenOats Meeting Format (`.md`) is a structured Markdown format for meeting transcripts. It replaces OpenOats' plain `.txt` output with a file that is simultaneously human-readable, grep-friendly, Obsidian-native, and parseable by LLM agents. + +### Goals + +1. **Human-readable** in any text editor, Obsidian, or GitHub preview +2. **Agent-ready** for LLM consumption (Claude Code, Cursor, RAG pipelines) +3. **CLI-friendly** with predictable patterns for grep/ripgrep +4. **Obsidian-native** with Dataview-queryable frontmatter and Dataview TASK-queryable action items +5. **Adoptable** by other tools as a shared standard for meeting transcripts +6. **Incrementally structured** so files are useful at every processing stage + +### Non-goals + +- Replacing the JSONL session store (that stays for word-level data, RAG hits, etc.) +- Encoding audio playback offsets with sub-second precision (use JSONL for that) +- Handling real-time streaming (this format describes the finished artifact) + +--- + +## File Naming + +Filenames MUST be valid UTF-8. The kebab-case title portion MUST contain only ASCII characters `[a-z0-9-]`. If the title contains non-ASCII characters that produce an empty slug after conversion, use the fallback title `meeting`. The title portion SHOULD NOT exceed 60 characters. + +``` +YYYY-MM-DD-HHMM-kebab-case-title.md +``` + +Examples: +``` +2026-03-20-1400-weekly-product-sync.md +2026-03-20-0930-investor-update.md +2026-03-21-1600-onboarding-call.md +``` + +Rules: +- Date and time MUST be the meeting start time in local time +- Time MUST be 24-hour, no separator between hours and minutes +- Title MUST be kebab-case, lowercase, ASCII only +- Filenames MUST NOT contain spaces (CLI-friendly) +- Lexical sort = chronological sort +- The filename is the file's unique identifier. No UUID field needed. +- If a file with the generated name already exists, implementations SHOULD append `-2`, `-3`, etc. before `.md`. + +When the app cannot determine a title (no LLM post-processing, no calendar integration), use a fallback: +``` +2026-03-20-1400-meeting.md +``` + +--- + +## YAML Frontmatter + +Every file starts with a YAML frontmatter block. + +### Field Reference + +| Field | Type | Required | Description | +|-------|------|----------|-------------| +| `schema` | string | Yes | Format identifier. Always `openoats/v1` for this version. | +| `title` | string | Yes | Meeting title. Auto-generated from conversation topic, calendar event, or user edit. The H1 heading in the body MUST be identical to the `title` frontmatter value (after YAML string parsing). | +| `date` | ISO 8601 datetime | Yes | Meeting start time. Include timezone offset when available (e.g., `2026-03-20T14:00:00+01:00`). Omit timezone only if unknown. | +| `duration` | integer | Yes | Meeting duration in minutes, rounded to nearest minute. MUST be a positive integer (>= 1). | +| `participants` | string array | Yes | List of participant names. Default: `["You", "Them"]`. MUST contain at least one entry. See Speaker Model below. | +| `recorder` | string | No | Name of the person who recorded the meeting. Maps the speaker `You` to a real identity. The app SHOULD set this automatically from the system user name. | +| `tags` | string array | No | Topic tags. Auto-generated by LLM or user-assigned. Plain strings, no `#` prefix. | +| `language` | string | No | BCP 47 language code (e.g., `en`, `pl`, `de`). Defaults to `en` if omitted. | +| `engine` | string | No | ASR backend used for transcription (e.g., `parakeet-tdt-v2`, `qwen3-asr`, `whisper-large-v3`, `11labs-scribe-2`). | +| `app` | string | No | Detected meeting application, lowercase (e.g., `zoom`, `teams`, `meet`, `slack`, `facetime`). Omit if not detected. | +| `x_*` | any | No | Extension namespace. Any field prefixed with `x_` is valid. See Extensibility. | + +### Frontmatter Rules + +1. **UTF-8 without BOM, LF line endings.** Files MUST be encoded as UTF-8 without BOM. Lines MUST use LF (`\n`) line endings. +2. **YAML arrays, never comma-separated strings.** Array fields MUST use YAML array syntax (either inline `tags: [product, roadmap]` or block list with `- item` entries), not a comma-separated string like `tags: "product, roadmap"`. +3. **Plain text in frontmatter, wikilinks in body.** Frontmatter fields MUST NOT contain wikilink syntax (`[[Person Name]]`). +4. **Flat structure, no nesting.** Frontmatter MUST NOT use nested YAML objects. Dataview cannot query nested YAML objects without DataviewJS. +5. **Keep under 20 lines.** Frontmatter SHOULD be scannable and SHOULD NOT exceed 20 lines. +6. **The `title` field MUST always be quoted in YAML.** Unquoted titles risk silent coercion by YAML parsers (`yes` becomes boolean, `null` becomes empty, `#` starts a comment that truncates the value). +7. **Consistent types across files.** If `participants` is an array in one file, it MUST be an array in all files. + +### Minimal Frontmatter (Stage 1+2, no LLM) + +```yaml +--- +schema: openoats/v1 +title: "Meeting" +date: 2026-03-20T14:00:00+01:00 +duration: 32 +participants: + - You + - Them +engine: parakeet-tdt-v2 +--- +``` + +### Full Frontmatter (Stage 3, after LLM processing) + +```yaml +--- +schema: openoats/v1 +title: "Q1 Launch Planning" +date: 2026-03-20T14:00:00+01:00 +duration: 47 +participants: + - You + - Them +recorder: Szymon Sypniewicz +tags: + - product + - launch + - roadmap +language: en +engine: parakeet-tdt-v2 +app: zoom +--- +``` + +--- + +## Processing Stages + +OpenOats produces this file in stages. Stage 2 refines Stage 1 output in-place (filler removal, punctuation, speaker correction). Stage 3 inserts new sections without modifying what Stage 1+2 wrote. + +### Stage 1: Transcription + +Raw ASR output. Produces a file with: +- Frontmatter: `schema`, `title` (fallback), `date`, `duration`, `participants` (`You`/`Them`), `engine` +- Body: `# Title` and `## Transcript` only + +### Stage 2: Post-processing + +Cleanup applied to Stage 1 output: +- Filler word removal (uh, um, like, you know) +- Punctuation and capitalization correction +- Speaker attribution corrections (echo cancellation artifacts) +- Enrichment from available context (meeting app name from detection, title from conversation state) + +Stage 2 modifies the transcript text in-place and may update frontmatter fields (`title`, `app`). The file structure is identical to Stage 1. + +### Stage 3: Intelligence (optional, LLM) + +User triggers LLM post-processing. The LLM reads the Stage 1+2 file and generates: +- `## Summary` section +- `## Action Items` section +- `## Decisions` section (if applicable) +- `tags` array in frontmatter + +These sections are inserted between `# Title` and `## Transcript`. The transcript itself is not modified. + +--- + +## Body Structure + +The body follows the title-first, transcript-last principle. Synthesized content (summary, action items, decisions) goes at the top because LLMs weight the beginning and end of documents more heavily, and humans scanning the file want the high-signal content first. + +### Section Order + +``` +# Title + +## Summary <- Stage 3 only (LLM-generated) + +## Action Items <- Stage 3 only (LLM-generated) + +## Decisions <- Stage 3 only (LLM-generated, optional) + +## Transcript <- Stage 1+2 (always present) +``` + +### Required vs Optional Sections + +| Section | Required | Added by | +|---------|----------|----------| +| `# Title` | Yes | Stage 1 | +| `## Transcript` | Yes | Stage 1+2 | +| `## Summary` | No | Stage 3 (LLM) | +| `## Action Items` | No | Stage 3 (LLM) | +| `## Decisions` | No | Stage 3 (LLM) | + +A Stage 1+2 file (no LLM post-processing) contains only the title heading and the transcript section. This is a complete, valid file. + +A Stage 3 file has the Summary, Action Items, and Decisions sections inserted between the title and the transcript. + +Custom `## ` sections are permitted. They MUST appear between `## Decisions` (or `# Title` if no Stage 3 sections exist) and `## Transcript`. Parsers MUST ignore sections they do not recognize. + +### Section Details + +#### `# Title` + +The H1 heading matches the `title` frontmatter field. It appears once, at the top of the body. + +```markdown +# Q1 Launch Planning +``` + +#### `## Summary` + +One to three paragraphs. No bullet points in the summary. Written in past tense, describing what happened and what was decided. + +```markdown +## Summary + +The team discussed moving the v1.0 launch from May 1 to April 15 to stay ahead of +TranscriptPro's desktop release. Engineering confirmed the encryption module is +production-ready but recommended deferring collaborative editing to v1.1 due to CRDT +complexity. The marketing site will lead with privacy-first messaging. +``` + +#### `## Action Items` + +Standard Markdown checkboxes with Obsidian Dataview inline fields for `owner` and `due`. + +```markdown +## Action Items + +- [ ] Finalize launch announcement blog post [owner:: You] [due:: 2026-03-25] +- [ ] Run load testing on SQLite concurrency [owner:: Them] [due:: 2026-03-28] +- [ ] Send revised timeline to stakeholders [owner:: You] [due:: 2026-03-22] +``` + +Format per line: +``` +- [ ] {task description} [owner:: {name}] [due:: {YYYY-MM-DD}] +``` + +Rules: +- `owner` MUST use the same name that appears in the `participants` array +- `due` MUST be an ISO 8601 date. Omit the `[due:: ...]` field entirely if no due date was discussed +- Completed items MUST use `[x]` or `[X]` instead of `[ ]` +- Each item MUST be a single line (no multi-line tasks) +- When both `owner` and `due` are present, `owner` MUST precede `due` +- Task descriptions MUST NOT contain Dataview inline field syntax (`[field:: value]`). All metadata goes in the trailing inline fields. + +#### `## Decisions` + +A flat bullet list of decisions made during the meeting. No IDs, no sub-structure. + +```markdown +## Decisions + +- Launch date set to April 15, moved up from May 1 +- Collaborative editing deferred to v1.1 +- Marketing hero copy leads with privacy, not open source +``` + +This section is optional even in Stage 3 output. If the LLM determines no decisions were made, omit the section entirely. + +#### `## Transcript` + +The raw transcript. Every utterance is a single line following the transcript line format described below. + +--- + +## Transcript Line Format + +Each line in the `## Transcript` section follows this pattern: + +``` +[HH:MM:SS] **Speaker Name:** Utterance text here. +``` + +Each utterance MUST be a single line, regardless of length. Line wrapping within an utterance is not supported. + +### Formal Pattern + +``` +[{timestamp}] **{speaker}:** {text} +``` + +| Component | Format | Example | +|-----------|--------|---------| +| Timestamp | `HH:MM:SS` - hours, minutes, seconds, zero-padded | `[00:05:23]` | +| Speaker | Bold Markdown, followed by colon | `**You:**` | +| Text | Free text, single line | `I think we should launch earlier.` | + +### Regex for Parsing + +```regex +^\[(\d{2}:\d{2}:\d{2})\] \*\*(.+?):\*\* (.*)$ +``` + +Capture groups: +1. Timestamp (`00:05:23`) +2. Speaker name (`You`) +3. Utterance text (`I think we should launch earlier.`), may be empty + +> **Parser note:** Parsers SHOULD normalize speaker names by stripping Markdown bold markers (`**`) before comparison. + +### Timestamp Rules + +- Timestamps MUST be relative to meeting start, not wall-clock time +- Wall-clock time is in the frontmatter `date` field +- Format MUST always be `HH:MM:SS`, zero-padded (e.g., `00:01:05`, not `0:1:5`) +- For meetings over 24 hours (unlikely), hours MAY exceed 23: `[25:00:00]` + +### Blank Lines + +Utterances are separated by blank lines for readability: + +```markdown +[00:00:00] **You:** Let's get started. The main topic is the launch timeline. + +[00:00:08] **Them:** Sure. I looked at the latest metrics and usage is up significantly. + +[00:00:15] **You:** That matches what I'm hearing from users too. +``` + +Parsers SHOULD treat blank lines between transcript entries as cosmetic. They carry no semantic meaning. + +--- + +## Speaker Model + +### Current State: You/Them + +OpenOats captures two audio streams: +- **Microphone** (your voice) mapped to speaker `You` +- **System audio** (remote participants) mapped to speaker `Them` + +There is no diarization between multiple remote speakers. All remote audio is attributed to `Them`. + +In Stage 1+2 output, `participants` is always `["You", "Them"]` and the transcript uses `**You:**` and `**Them:**`. + +### Future State: Named Participants + +When OpenOats gains participant identification (via calendar integration, manual labeling, or diarization), the format supports named speakers with no structural changes: + +```yaml +participants: + - Alice Chen + - Bob Martinez + - Carol Wu +``` + +```markdown +[00:00:00] **Alice Chen:** Let's get started. + +[00:00:08] **Bob Martinez:** Sure, I've got the slides ready. +``` + +The transition is seamless: `You`/`Them` are just speaker names. Named participants are also just speaker names. No format change required. + +### Speaker Rules + +1. Speaker names in transcript lines MUST match an entry in the `participants` array +2. Speaker names MUST be compared case-sensitively +3. Speaker names MUST NOT contain `*`, `:`, `[`, or `]` characters +4. When the app cannot identify individual remote speakers, all remote audio MUST use `Them` +5. The app SHOULD NOT invent names. Use `You`/`Them` until reliable identification exists + +--- + +## Extensibility + +Any field prefixed with `x_` in the frontmatter is a valid extension field. This namespace is reserved for tool-specific or user-specific metadata that is outside the core schema. + +### Examples + +```yaml +x_openoats_session: "session_2026-03-20_14-00-06" +x_openoats_template: "customer-discovery" +x_calendar_event_id: "abc123def456" +x_project: "OpenOats v1.0" +x_confidence: 0.92 +``` + +### Extension Rules + +1. Extension fields MUST start with `x_` +2. Extension fields are always optional. Implementations MUST NOT require any `x_` field for conformance. +3. Parsers MUST ignore extension fields they do not recognize +4. Tools SHOULD namespace their extensions: `x_toolname_field` (e.g., `x_openoats_session`) +5. Extension fields MUST follow all other frontmatter rules (flat structure, consistent types) + +--- + +## Parsing Guide + +### Error Handling + +- If the `schema` field is missing or unrecognized, parsers SHOULD emit a warning and attempt best-effort parsing. +- Transcript lines that do not match the regex SHOULD be preserved as-is but excluded from structured output. +- Parsers MUST NOT reject a file due to unknown `x_` extension fields or unknown `## ` sections. +- Parsers SHOULD emit a warning if a `[due:: ...]` field appears before `[owner:: ...]` in an action item line, as this violates the ordering rule and will not be captured correctly by the reference regex. + +### Reading Frontmatter (Python) + +```python +import yaml + +def parse_openoats(filepath): + with open(filepath) as f: + content = f.read() + + # Split frontmatter from body + parts = content.split("---", 2) + if len(parts) < 3: + raise ValueError("No YAML frontmatter found") + + meta = yaml.safe_load(parts[1]) + body = parts[2].strip() + + if meta.get("schema") != "openoats/v1": + raise ValueError(f"Unsupported schema: {meta.get('schema')}") + return meta, body +``` + +### Extracting Transcript Lines (Python) + +```python +import re + +TRANSCRIPT_RE = re.compile( + r"^\[(\d{2}:\d{2}:\d{2})\] \*\*(.+?):\*\* (.*)$" +) + +def extract_transcript(body): + lines = [] + for line in body.splitlines(): + match = TRANSCRIPT_RE.match(line) + if match: + lines.append({ + "timestamp": match.group(1), + "speaker": match.group(2), + "text": match.group(3), + }) + return lines +``` + +### Extracting Action Items (Python) + +```python +import re + +ACTION_RE = re.compile( + r"^- \[([ xX])\] (.+?)(?:\s*\[owner:: ([^\]]+)\])?(?:\s*\[due:: ([^\]]+)\])?\s*$" +) + +def extract_actions(body): + items = [] + for line in body.splitlines(): + match = ACTION_RE.match(line) + if match: + items.append({ + "completed": match.group(1) in ("x", "X"), + "task": match.group(2).strip(), + "owner": match.group(3), + "due": match.group(4), + }) + return items +``` + +### Extracting Transcript Lines (JavaScript/Node) + +```javascript +const TRANSCRIPT_RE = /^\[(\d{2}:\d{2}:\d{2})\] \*\*(.+?):\*\* (.*)$/; + +function extractTranscript(body) { + return body.split("\n") + .map(line => TRANSCRIPT_RE.exec(line)) + .filter(Boolean) + .map(m => ({ timestamp: m[1], speaker: m[2], text: m[3] })); +} +``` + +### Extracting Action Items (JavaScript/Node) + +```javascript +const ACTION_RE = /^- \[([ xX])\] (.+?)(?:\s*\[owner:: ([^\]]+)\])?(?:\s*\[due:: ([^\]]+)\])?\s*$/; + +function extractActions(body) { + return body.split("\n") + .map(line => ACTION_RE.exec(line)) + .filter(Boolean) + .map(m => ({ + completed: m[1] === "x" || m[1] === "X", + task: m[2].trim(), + owner: m[3] || null, + due: m[4] || null, + })); +} +``` + +### Grep/Ripgrep Recipes + +```bash +# All meetings with a specific speaker +rg '\*\*Alice Chen\*\*:' ~/Documents/OpenOats/ + +# Everything "Them" said in a specific meeting +rg '\*\*Them\*\*:' ~/Documents/OpenOats/2026-03-20-1400-meeting.md + +# All open action items across all meetings +rg '^\- \[ \]' ~/Documents/OpenOats/ + +# Open action items assigned to You +rg '\[ \].*\[owner:: You\]' ~/Documents/OpenOats/ + +# Decisions (approximate - also grabs action items starting with "- ") +# For precise extraction, use the Python parser +rg '## Decisions' -A 10 ~/Documents/OpenOats/ + +# Meetings tagged with a specific topic (works with both YAML array styles) +rg 'tags:.*product|^\s+- product$' ~/Documents/OpenOats/ + +# Meetings that used Zoom +rg '^app: zoom' ~/Documents/OpenOats/ + +# Meetings 60 minutes or longer +rg '^duration: [6-9][0-9]$|^duration: [1-9][0-9]{2,}$' ~/Documents/OpenOats/ + +# Find what was said about a topic +rg -i 'launch date' ~/Documents/OpenOats/ + +# List all meeting files chronologically (filenames sort naturally) +ls ~/Documents/OpenOats/*.md +``` + +### Obsidian Dataview Queries + +List all meetings: +```dataview +TABLE date, duration, participants +FROM "OpenOats" +WHERE schema = "openoats/v1" +SORT date DESC +``` + +Meetings tagged with a specific topic: +```dataview +TABLE date, title, duration +FROM "OpenOats" +WHERE contains(tags, "product") +SORT date DESC +``` + +All open action items assigned to You: +```dataview +TASK +FROM "OpenOats" +WHERE !completed AND contains(text, "owner:: You") +``` + +All action items due this week: +```dataview +TASK +FROM "OpenOats" +WHERE !completed AND date(due) >= date(today) AND date(due) <= date(today) + dur(7 days) +SORT due ASC +``` + +Meetings using a specific ASR engine: +```dataview +TABLE date, title, duration +FROM "OpenOats" +WHERE engine = "parakeet-tdt-v2" +SORT date DESC +``` + +Today's meetings (for daily notes embeds): +```dataview +TABLE title, duration +FROM "OpenOats" +WHERE dateformat(date(date), "yyyy-MM-dd") = dateformat(date(today), "yyyy-MM-dd") +SORT date ASC +``` + +> **Note:** The `date` frontmatter field includes time and timezone. Use `dateformat()` to compare date-only portions. + +### Graph View + +Meeting files connect to other vault notes through wikilinks in the body. The Stage 3 Summary section MAY contain wikilinks to people notes (e.g., `[[Alice Chen]]`) and project notes. Parsers MUST preserve wikilink syntax when present. + +--- + +## Versioning + +The `schema` field identifies the format version. The current version is `openoats/v1`. + +### Compatibility Promise + +- **Patch changes** (bug fixes, clarifications) do not change the schema identifier +- **Minor additions** (new optional fields, new optional sections) do not change the schema identifier. Parsers built for `openoats/v1` will continue to work. +- **Breaking changes** (removing fields, changing required fields, changing the transcript line format) increment the version: `openoats/v2` + +### Migration + +When a new version is released, the specification will include migration notes describing what changed and how to update existing files. + +--- + +## Complete Example: Stage 1+2 File (No LLM) + +This is what the app outputs immediately after a meeting, before any LLM post-processing. It is a complete, valid file. + +```markdown +--- +schema: openoats/v1 +title: "Meeting" +date: 2026-03-20T14:00:00+01:00 +duration: 2 +participants: + - You + - Them +engine: parakeet-tdt-v2 +app: zoom +--- + +# Meeting + +## Transcript + +[00:00:00] **You:** Hey, thanks for jumping on. I wanted to talk through the feature flag rollout before we commit to the timeline. + +[00:00:07] **Them:** Sure. I spent the morning looking at the current implementation and I have some concerns about the gradual rollout approach. + +[00:00:14] **You:** What kind of concerns? + +[00:00:16] **Them:** The percentage-based rollout is fine for stateless features, but for the new editor it creates a split-brain problem. Users who got the new editor early will have documents in the new format. If we roll them back, those documents break. + +[00:00:31] **You:** Right. So we need a migration path regardless. + +[00:00:35] **Them:** Exactly. My suggestion is we skip the gradual rollout entirely for this one. Ship it behind a manual opt-in toggle, let power users find the bugs, then flip it to default once we are confident. + +[00:00:48] **You:** That makes sense. How long do you think the opt-in period needs to be? + +[00:00:53] **Them:** Two weeks minimum. We need at least one full sprint cycle of feedback before we go default. + +[00:01:02] **You:** Okay. Let's do that. I will update the rollout plan and send it around by end of day. + +[00:01:09] **Them:** Sounds good. One more thing. Can we add a telemetry event for when someone toggles the feature on? I want to track adoption rate during the opt-in phase. + +[00:01:19] **You:** Yeah, that's easy. I will add it to the ticket. + +[00:01:24] **Them:** Great. I think that covers it. + +[00:01:27] **You:** Agreed. Thanks for flagging the split-brain issue, that would have bitten us. + +[00:01:33] **Them:** No problem. Talk soon. +``` + +## Complete Example: Stage 1+2+3 File (After LLM Processing) + +This is the same meeting after the user runs LLM post-processing. The Summary, Action Items, and Decisions sections have been inserted. The transcript is unchanged. + +```markdown +--- +schema: openoats/v1 +title: "Feature Flag Rollout: New Editor" +date: 2026-03-20T14:00:00+01:00 +duration: 2 +participants: + - You + - Them +recorder: Szymon Sypniewicz +tags: + - engineering + - feature-flags + - editor +engine: parakeet-tdt-v2 +app: zoom +--- + +# Feature Flag Rollout: New Editor + +## Summary + +Discussed the rollout strategy for the new editor feature. The original plan for a percentage-based gradual rollout was rejected because documents created in the new format would break if users were rolled back, creating a split-brain problem. The team decided to use a manual opt-in toggle instead, with a minimum two-week opt-in period before switching to default. A telemetry event will be added to track adoption during the opt-in phase. + +## Action Items + +- [ ] Update rollout plan and circulate by end of day [owner:: You] [due:: 2026-03-20] +- [ ] Add telemetry event for feature toggle [owner:: You] +- [ ] Review opt-in feedback after two-week period [owner:: Them] [due:: 2026-04-03] + +## Decisions + +- Skip percentage-based gradual rollout for the new editor +- Use manual opt-in toggle instead +- Minimum two-week opt-in period before switching to default + +## Transcript + +[00:00:00] **You:** Hey, thanks for jumping on. I wanted to talk through the feature flag rollout before we commit to the timeline. + +[00:00:07] **Them:** Sure. I spent the morning looking at the current implementation and I have some concerns about the gradual rollout approach. + +[00:00:14] **You:** What kind of concerns? + +[00:00:16] **Them:** The percentage-based rollout is fine for stateless features, but for the new editor it creates a split-brain problem. Users who got the new editor early will have documents in the new format. If we roll them back, those documents break. + +[00:00:31] **You:** Right. So we need a migration path regardless. + +[00:00:35] **Them:** Exactly. My suggestion is we skip the gradual rollout entirely for this one. Ship it behind a manual opt-in toggle, let power users find the bugs, then flip it to default once we are confident. + +[00:00:48] **You:** That makes sense. How long do you think the opt-in period needs to be? + +[00:00:53] **Them:** Two weeks minimum. We need at least one full sprint cycle of feedback before we go default. + +[00:01:02] **You:** Okay. Let's do that. I will update the rollout plan and send it around by end of day. + +[00:01:09] **Them:** Sounds good. One more thing. Can we add a telemetry event for when someone toggles the feature on? I want to track adoption rate during the opt-in phase. + +[00:01:19] **You:** Yeah, that's easy. I will add it to the ticket. + +[00:01:24] **Them:** Great. I think that covers it. + +[00:01:27] **You:** Agreed. Thanks for flagging the split-brain issue, that would have bitten us. + +[00:01:33] **Them:** No problem. Talk soon. +``` + +--- + +## Conformance + +A conformant file MUST include all required frontmatter fields, a `# Title` heading, and a `## Transcript` section with at least one valid transcript line. + +A conformant parser MUST be able to extract frontmatter, transcript lines, and action items from any conformant file. Parsers MUST ignore unknown frontmatter fields and unknown `## ` sections. + +--- + +## Security Considerations + +Meeting transcript files may contain confidential business information, PII, or privileged communications. Implementations SHOULD NOT expose transcript files to untrusted parties without explicit user consent. + +--- + +## Design Rationale (Non-normative) + +### Why full names in transcript, not speaker IDs + +Speaker IDs (`AC`, `BM`, `S1`) save a few bytes per line but require a mental lookup table. Full bold names are self-documenting, grep-friendly without consulting frontmatter, and LLMs parse them without the indirection layer. + +### Why summary at top, transcript at bottom + +LLMs weight the beginning and end of context windows more heavily than the middle ("lost in the middle" effect). Putting synthesized insights first means agents get high-signal content before raw transcript. Humans scanning the file also want the summary first. + +### Why relative timestamps, not wall-clock + +`[00:01:24]` (1 minute 24 seconds into the meeting) is more useful than `[14:01:24]` because audio playback tools use relative offsets, the absolute start time is already in the `date` frontmatter field, and relative times are shorter and scan faster. + +### Why simple participant arrays, not structured objects + +`participants: [You, Them]` vs `participants: [{name: You, role: host}]`. The simple array keeps frontmatter under 20 lines, works with basic Dataview queries without DataviewJS, and does not force OpenOats to know information it does not have (email, role). Rich participant data can live in `x_` extension fields. + +### Why Dataview inline fields for action items + +`[owner:: You] [due:: 2026-03-25]` looks slightly unusual in raw Markdown, but it renders cleanly (brackets are unobtrusive), Obsidian Dataview queries work natively (`TASK WHERE owner = "You"`), the Obsidian Tasks plugin aggregates them across the vault, and grep works (`rg 'owner:: You' meetings/`). Any non-Obsidian user just sees mildly-decorated checkboxes. + +### Why no UUID + +The filename is the identifier. `2026-03-20-1400-weekly-product-sync.md` is unique, human-readable, and does not require a generator. If cross-system referencing is needed later, use `x_uuid` as an extension field. + +### Why `schema: openoats/v1` not `schema_version: "1.0"` + +A namespaced identifier (`openoats/v1`) is more specific than a bare version number. If another tool adopts this format, it can use `openoats/v1` to signal compatibility. Future versions (`openoats/v2`) can include migration notes. + +--- + +## Acknowledgments + +This specification was informed by analysis of output formats from whisper.cpp, WhisperX, AssemblyAI, Deepgram, Granola, Meetily, and Screenpipe, as well as Obsidian community conventions for meeting notes and PKM frontmatter patterns.