diff --git a/Funnel/Funnel/AudioRecorderManager.swift b/Funnel/Funnel/AudioRecorderManager.swift index 7882315..b711a14 100644 --- a/Funnel/Funnel/AudioRecorderManager.swift +++ b/Funnel/Funnel/AudioRecorderManager.swift @@ -12,7 +12,7 @@ class AudioRecorderManager: NSObject, ObservableObject { private var levelTimer: Timer? private(set) var currentRecordingURL: URL? private var recordingCompletion: ((Result) -> Void)? - + // Live streaming properties private var audioEngine = AVAudioEngine() private var webSocket: URLSessionWebSocketTask? @@ -46,10 +46,10 @@ class AudioRecorderManager: NSObject, ObservableObject { func startRecording(completion: @escaping (Result) -> Void) { print("AudioRecorderManager: startRecording called") - + // For testing live streaming - set this to true to use live streaming let useLiveStreaming = true - + if useLiveStreaming { startLiveStreaming { result in switch result { @@ -58,7 +58,7 @@ class AudioRecorderManager: NSObject, ObservableObject { let documentsPath = FileManager.default.urls(for: .documentDirectory, in: .userDomainMask)[0] let dummyURL = documentsPath.appendingPathComponent("live-stream-\(self.recordingId ?? "unknown").m4a") completion(.success(dummyURL)) - case .failure(let error): + case let .failure(error): completion(.failure(error)) } } @@ -154,99 +154,100 @@ class AudioRecorderManager: NSObject, ObservableObject { self.audioLevel = curvedLevel } } - + // MARK: - Live Streaming Methods - + func startLiveStreaming(completion: @escaping (Result) -> Void) { print("AudioRecorderManager: Starting live streaming") - + recordingId = UUID().uuidString isLiveStreaming = true print("AudioRecorderManager: Generated recording ID: \(recordingId!)") - + // Setup WebSocket connection setupWebSocket { [weak self] result in switch result { case .success: self?.startAudioEngine(completion: completion) - case .failure(let error): + case let .failure(error): completion(.failure(error)) } } } - + private func setupWebSocket(completion: @escaping (Result) -> Void) { guard let recordingId = recordingId else { completion(.failure(FunnelError.recordingFailed(reason: "No recording ID"))) return } - + // Create URLSession for WebSocket let config = URLSessionConfiguration.default urlSession = URLSession(configuration: config, delegate: self, delegateQueue: OperationQueue()) - + // Use APIClient to construct WebSocket URL guard let url = APIClient.shared.webSocketURL(for: "/api/recordings/\(recordingId)/stream") else { completion(.failure(FunnelError.recordingFailed(reason: "Invalid WebSocket URL"))) return } - + webSocket = urlSession?.webSocketTask(with: url) webSocket?.resume() - + // Listen for messages receiveWebSocketMessage() - + // Send audio format configuration after connection DispatchQueue.main.asyncAfter(deadline: .now() + 0.2) { [weak self] in guard let self = self else { return } - + // Send configuration message to indicate PCM format let config: [String: Any] = [ "type": "config", "format": "pcm16", "sampleRate": Int(AVAudioSession.sharedInstance().sampleRate), // Send actual device sample rate - "channels": 1 + "channels": 1, ] - + if let jsonData = try? JSONSerialization.data(withJSONObject: config), - let jsonString = String(data: jsonData, encoding: .utf8) { + let jsonString = String(data: jsonData, encoding: .utf8) + { self.webSocket?.send(.string(jsonString)) { error in if let error = error { print("Failed to send config: \(error)") } } } - + completion(.success(())) } } - + private func receiveWebSocketMessage() { webSocket?.receive { [weak self] result in switch result { - case .success(let message): + case let .success(message): switch message { - case .string(let text): + case let .string(text): print("WebSocket received text: \(text)") - // Handle transcript responses here if needed - case .data(let data): + // Handle transcript responses here if needed + case let .data(data): print("WebSocket received data: \(data.count) bytes") @unknown default: break } // Continue listening self?.receiveWebSocketMessage() - case .failure(let error): + case let .failure(error): print("WebSocket receive error: \(error)") } } } - + private func startAudioEngine(completion: @escaping (Result) -> Void) { let inputNode = audioEngine.inputNode let inputFormat = inputNode.inputFormat(forBus: 0) - + // Create output format - PCM 16-bit as recommended by Deepgram guard let outputFormat = AVAudioFormat( commonFormat: .pcmFormatInt16, @@ -257,43 +258,43 @@ class AudioRecorderManager: NSObject, ObservableObject { completion(.failure(FunnelError.recordingFailed(reason: "Failed to create audio format"))) return } - + // Create converter node let converterNode = AVAudioMixerNode() let sinkNode = AVAudioMixerNode() - + audioEngine.attach(converterNode) audioEngine.attach(sinkNode) - + // Install tap to capture audio converterNode.installTap(onBus: 0, bufferSize: 1024, format: converterNode.outputFormat(forBus: 0)) { [weak self] buffer, _ in self?.processAudioBuffer(buffer) } - + // Connect nodes audioEngine.connect(inputNode, to: converterNode, format: inputFormat) audioEngine.connect(converterNode, to: sinkNode, format: outputFormat) - + // Prepare and start engine audioEngine.prepare() - + do { try AVAudioSession.sharedInstance().setCategory(.record) try audioEngine.start() - + isRecording = true recordingTime = 0 - + // Start timers timer = Timer.scheduledTimer(withTimeInterval: 0.1, repeats: true) { _ in self.recordingTime += 0.1 } - + // For live streaming, we'll calculate audio levels from the buffer levelTimer = Timer.scheduledTimer(withTimeInterval: 0.05, repeats: true) { _ in // Audio level will be updated in processAudioBuffer } - + print("AudioRecorderManager: Audio engine started successfully") completion(.success(())) } catch { @@ -301,33 +302,33 @@ class AudioRecorderManager: NSObject, ObservableObject { completion(.failure(error)) } } - + private func processAudioBuffer(_ buffer: AVAudioPCMBuffer) { guard let channelData = buffer.int16ChannelData else { return } - + let channelDataValue = channelData.pointee let channelDataValueArray = stride(from: 0, to: Int(buffer.frameLength), by: buffer.stride) .map { channelDataValue[$0] } - + // Calculate audio level for visualization let rms = sqrt(channelDataValueArray .map { Double($0) * Double($0) } .reduce(0, +) / Double(channelDataValueArray.count)) - + let avgPower = 20 * log10(rms / 32768.0) // Convert to dB let minDb: Float = -50 let maxDb: Float = -10 let normalizedLevel = Float((avgPower - Double(minDb)) / Double(maxDb - minDb)) let clampedLevel = max(0, min(1, normalizedLevel)) let curvedLevel = pow(clampedLevel, 2.5) - + DispatchQueue.main.async { [weak self] in self?.audioLevel = curvedLevel } - + // Convert buffer to Data for WebSocket let data = toData(buffer: buffer) - + // Send data through WebSocket if let data = data { webSocket?.send(.data(data)) { error in @@ -337,30 +338,30 @@ class AudioRecorderManager: NSObject, ObservableObject { } } } - + private func toData(buffer: AVAudioPCMBuffer) -> Data? { let audioBuffer = buffer.audioBufferList.pointee.mBuffers return Data(bytes: audioBuffer.mData!, count: Int(audioBuffer.mDataByteSize)) } - + private func stopLiveStreaming() { print("AudioRecorderManager: Stopping live streaming") print("AudioRecorderManager: Recording ID at stop: \(recordingId ?? "nil")") - + // Stop audio engine audioEngine.stop() // Remove tap from all attached nodes - audioEngine.attachedNodes.forEach { node in + for node in audioEngine.attachedNodes { node.removeTap(onBus: 0) } - + // Stop timers timer?.invalidate() levelTimer?.invalidate() - + // Close WebSocket webSocket?.cancel(with: .goingAway, reason: nil) - + // Reset state isRecording = false isLiveStreaming = false @@ -372,11 +373,11 @@ class AudioRecorderManager: NSObject, ObservableObject { } extension AudioRecorderManager: URLSessionWebSocketDelegate { - func urlSession(_ session: URLSession, webSocketTask: URLSessionWebSocketTask, didOpenWithProtocol protocol: String?) { + func urlSession(_: URLSession, webSocketTask _: URLSessionWebSocketTask, didOpenWithProtocol protocol: String?) { print("WebSocket connected with protocol: \(String(describing: `protocol`))") } - - func urlSession(_ session: URLSession, webSocketTask: URLSessionWebSocketTask, didCloseWith closeCode: URLSessionWebSocketTask.CloseCode, reason: Data?) { + + func urlSession(_: URLSession, webSocketTask _: URLSessionWebSocketTask, didCloseWith closeCode: URLSessionWebSocketTask.CloseCode, reason _: Data?) { print("WebSocket closed with code: \(closeCode)") } } diff --git a/Funnel/Funnel/Extensions/String+Extensions.swift b/Funnel/Funnel/Extensions/String+Extensions.swift index 3018339..87d0d03 100644 --- a/Funnel/Funnel/Extensions/String+Extensions.swift +++ b/Funnel/Funnel/Extensions/String+Extensions.swift @@ -4,4 +4,4 @@ extension String { func prepending(_ prefix: String) -> String { return prefix + self } -} \ No newline at end of file +} diff --git a/Funnel/Funnel/FunnelApp.swift b/Funnel/Funnel/FunnelApp.swift index 95a7bbc..b39fc97 100644 --- a/Funnel/Funnel/FunnelApp.swift +++ b/Funnel/Funnel/FunnelApp.swift @@ -4,7 +4,7 @@ import SwiftUI @main struct FunnelApp: App { @StateObject private var debugSettings = DebugSettings() - + var sharedModelContainer: ModelContainer = { let schema = Schema([ Recording.self, diff --git a/Funnel/Funnel/Models/APIModels.swift b/Funnel/Funnel/Models/APIModels.swift index e76b199..489e4e9 100644 --- a/Funnel/Funnel/Models/APIModels.swift +++ b/Funnel/Funnel/Models/APIModels.swift @@ -12,8 +12,10 @@ struct ErrorResponse: Codable { struct ProcessedRecording: Codable { let transcript: String + let editedTranscript: String let duration: Double let bulletSummary: [String] + let thingsToThinkAbout: [String] let diagram: DiagramData struct DiagramData: Codable { diff --git a/Funnel/Funnel/Models/Recording.swift b/Funnel/Funnel/Models/Recording.swift index 7f7f0e0..801e123 100644 --- a/Funnel/Funnel/Models/Recording.swift +++ b/Funnel/Funnel/Models/Recording.swift @@ -27,7 +27,9 @@ final class Recording { // Processed content var transcript: String? + var editedTranscript: String? var bulletSummary: [String]? + var thingsToThinkAbout: [String]? var diagramTitle: String? var diagramDescription: String? var diagramContent: String? diff --git a/Funnel/Funnel/Preview Content/PreviewContainer.swift b/Funnel/Funnel/Preview Content/PreviewContainer.swift index 1e01a74..eda0f98 100644 --- a/Funnel/Funnel/Preview Content/PreviewContainer.swift +++ b/Funnel/Funnel/Preview Content/PreviewContainer.swift @@ -47,7 +47,13 @@ extension ModelContainer { if processingStatus == .completed { recording.transcript = sampleTranscripts[index] + recording.editedTranscript = "## Key Ideas\n\n\(sampleTranscripts[index])\n\n### Action Items\n\n- Follow up on main points\n- Schedule next steps" recording.bulletSummary = sampleBulletSummaries[index] + recording.thingsToThinkAbout = [ + "What would be the biggest challenge in implementing this idea?", + "How might this impact our current users?", + "What resources would we need to make this successful?", + ] recording.diagramTitle = sampleDiagramTitles[index] recording.diagramDescription = "A visual representation of the key concepts discussed" recording.diagramContent = """ diff --git a/Funnel/Funnel/Services/APIClient.swift b/Funnel/Funnel/Services/APIClient.swift index c43ac71..d11c282 100644 --- a/Funnel/Funnel/Services/APIClient.swift +++ b/Funnel/Funnel/Services/APIClient.swift @@ -162,17 +162,17 @@ class APIClient { } // MARK: - WebSocket URL Generation - + func webSocketURL(for endpoint: String) -> URL? { let scheme = baseURL.hasPrefix("https") ? "wss" : "ws" let host = baseURL .replacingOccurrences(of: "https://", with: "") .replacingOccurrences(of: "http://", with: "") - + let wsURLString = "\(scheme)://\(host)\(endpoint)" return URL(string: wsURLString) } - + // MARK: - Helper Methods private func mimeType(for pathExtension: String) -> String { diff --git a/Funnel/Funnel/Services/FunnelAPIService.swift b/Funnel/Funnel/Services/FunnelAPIService.swift index c0143c5..6a58418 100644 --- a/Funnel/Funnel/Services/FunnelAPIService.swift +++ b/Funnel/Funnel/Services/FunnelAPIService.swift @@ -17,9 +17,9 @@ class FunnelAPIService { fieldName: "audio" ) } - + // MARK: - Live Streaming - + /// Finalize a live-streamed recording func finalizeRecording(recordingId: String) async throws -> ProcessedRecording { print("FunnelAPIService: Finalizing recording with ID: \(recordingId)") diff --git a/Funnel/Funnel/Services/RecordingManager.swift b/Funnel/Funnel/Services/RecordingManager.swift index b8c76c8..ec615d8 100644 --- a/Funnel/Funnel/Services/RecordingManager.swift +++ b/Funnel/Funnel/Services/RecordingManager.swift @@ -34,13 +34,13 @@ class RecordingManager: ObservableObject { private func processRecordingSteps(recording: Recording, modelContext: ModelContext, recordingId: String? = nil, isLiveStreaming: Bool = false) async { do { let processedData: ProcessedRecording - + if isLiveStreaming, let recordingId = recordingId { // For live streaming, skip upload and just finalize recording.processingStatus = .transcribing processingStatus = "Finalizing recording..." try? modelContext.save() - + print("RecordingManager: Calling finalize endpoint for recording ID: \(recordingId)") processedData = try await apiService.finalizeRecording(recordingId: recordingId) print("RecordingManager: Received processed data from finalize endpoint:") @@ -62,8 +62,10 @@ class RecordingManager: ObservableObject { } recording.transcript = processedData.transcript + recording.editedTranscript = processedData.editedTranscript recording.duration = processedData.duration recording.bulletSummary = processedData.bulletSummary + recording.thingsToThinkAbout = processedData.thingsToThinkAbout recording.diagramTitle = processedData.diagram.title recording.diagramDescription = processedData.diagram.description recording.diagramContent = processedData.diagram.content diff --git a/Funnel/Funnel/Views/CardOptions.swift b/Funnel/Funnel/Views/CardOptions.swift index b86b7a3..5accaf5 100644 --- a/Funnel/Funnel/Views/CardOptions.swift +++ b/Funnel/Funnel/Views/CardOptions.swift @@ -4,40 +4,47 @@ enum CardType { case bulletSummary([String]) case diagram(Recording.Diagram?) case transcript(String) - + case thingsToThinkAbout([String]) + var shareContent: String { switch self { - case .bulletSummary(let bullets): + case let .bulletSummary(bullets): return bullets.joined(separator: "\n• ").prepending("• ") - case .diagram(let diagram): + case let .diagram(diagram): if let diagram = diagram { return "\(diagram.title)\n\n\(diagram.description)\n\n\(diagram.content)" } return "" - case .transcript(let text): + case let .transcript(text): return text + case let .thingsToThinkAbout(questions): + return questions.enumerated().map { index, question in + "\(index + 1). \(question)" + }.joined(separator: "\n\n") } } - + var copyContent: String { return shareContent } - + var canShare: Bool { switch self { - case .bulletSummary(let bullets): + case let .bulletSummary(bullets): return !bullets.isEmpty - case .diagram(let diagram): + case let .diagram(diagram): return diagram != nil - case .transcript(let text): + case let .transcript(text): return !text.isEmpty + case let .thingsToThinkAbout(questions): + return !questions.isEmpty } } } struct CardOptions: View { let cardType: CardType - + var body: some View { HStack(spacing: 0) { Button { @@ -47,7 +54,7 @@ struct CardOptions: View { } .disabled(!cardType.canShare) .opacity(cardType.canShare ? 1.0 : 0.5) - + if cardType.canShare { ShareLink(item: cardType.shareContent) { Image("share-btn") @@ -62,5 +69,4 @@ struct CardOptions: View { #Preview { CardOptions(cardType: .diagram(nil)) .background(GradientBackground()) - } diff --git a/Funnel/Funnel/Views/Components/CheckboxToggleStyle.swift b/Funnel/Funnel/Views/Components/CheckboxToggleStyle.swift index 0d9e06f..aa0bfa5 100644 --- a/Funnel/Funnel/Views/Components/CheckboxToggleStyle.swift +++ b/Funnel/Funnel/Views/Components/CheckboxToggleStyle.swift @@ -11,4 +11,4 @@ struct CheckboxToggleStyle: ToggleStyle { configuration.label } } -} \ No newline at end of file +} diff --git a/Funnel/Funnel/Views/NewRecordingView.swift b/Funnel/Funnel/Views/NewRecordingView.swift index ea51447..51fb037 100644 --- a/Funnel/Funnel/Views/NewRecordingView.swift +++ b/Funnel/Funnel/Views/NewRecordingView.swift @@ -32,7 +32,7 @@ struct NewRecordingView: View { FunnelLogo() .padding(.leading, 30) Spacer() - + // Blur toggle for debugging // Toggle("Blur", isOn: $debugSettings.blurEnabled) // .toggleStyle(CheckboxToggleStyle()) @@ -189,7 +189,7 @@ struct RecordingControlsView: View { let wasLiveStreaming = audioRecorder.isLiveStreaming let recordingId = audioRecorder.recordingId let recordingURL = audioRecorder.currentRecordingURL - + print("RecordingControlsView: Stopping recording - wasLiveStreaming: \(wasLiveStreaming), recordingId: \(recordingId ?? "nil")") audioRecorder.stopRecording() diff --git a/Funnel/Funnel/Views/SwipeableCardsView.swift b/Funnel/Funnel/Views/SwipeableCardsView.swift index b066fae..ef27287 100644 --- a/Funnel/Funnel/Views/SwipeableCardsView.swift +++ b/Funnel/Funnel/Views/SwipeableCardsView.swift @@ -61,9 +61,13 @@ struct SwipeableCardsView: View { .frame(width: cardWidth) .id(1) - TranscriptCard(transcript: recording.transcript ?? "") + EditedTranscriptCard(editedTranscript: recording.editedTranscript ?? recording.transcript ?? "") .frame(width: cardWidth) .id(2) + + ThingsToThinkAboutCard(questions: recording.thingsToThinkAbout ?? []) + .frame(width: cardWidth) + .id(3) } .scrollTargetLayout() } @@ -97,6 +101,8 @@ struct SwipeableCardsView: View { gradientManager.setTheme(.pinkRed) case 2: gradientManager.setTheme(.blueTeal) + case 3: + gradientManager.setTheme(.purplePink) default: gradientManager.setTheme(.defaultTheme) } @@ -233,12 +239,12 @@ struct DiagramCard: View { } } -struct TranscriptCard: View { - let transcript: String +struct EditedTranscriptCard: View { + let editedTranscript: String var body: some View { ScrollView { - Text(transcript) + Text(try! AttributedString(markdown: editedTranscript)) .funnelCallout() .foregroundColor(.white.opacity(0.9)) .frame(maxWidth: .infinity, alignment: .leading) @@ -273,7 +279,61 @@ struct TranscriptCard: View { .liveGlassmorphicCell(cornerRadius: 9) .padding(.bottom, 100) .overlay(alignment: .bottomLeading) { - CardOptions(cardType: .transcript(transcript)) + CardOptions(cardType: .transcript(editedTranscript)) + } + } +} + +struct ThingsToThinkAboutCard: View { + let questions: [String] + + var body: some View { + VStack(spacing: 20) { + Text("Things to Think About") + .funnelBodyBold() + .foregroundColor(.white) + .frame(maxWidth: .infinity) + + VStack(alignment: .leading, spacing: 16) { + ForEach(Array(questions.enumerated()), id: \.offset) { index, question in + VStack(alignment: .leading, spacing: 8) { + Text("\(index + 1).") + .funnelCallout() + .foregroundColor(.white.opacity(0.6)) + + Text(question) + .funnelCallout() + .foregroundColor(.white.opacity(0.9)) + .fixedSize(horizontal: false, vertical: true) + } + } + } + + Spacer() + } + .padding(25) + .frame(maxWidth: .infinity, maxHeight: .infinity) + .overlay( + RoundedRectangle(cornerRadius: 10) + .stroke( + LinearGradient( + stops: [ + .init(color: Color.white, location: 0), + .init(color: Color.white.opacity(0), location: 0.434), + .init(color: Color.white, location: 1), + ], + startPoint: .topLeading, + endPoint: .bottomTrailing + ), + lineWidth: 1 + ) + ) + .clipShape(RoundedRectangle(cornerRadius: 10)) + .shadow(color: .black.opacity(0.12), radius: 12, x: 0, y: 4) + .liveGlassmorphicCell(cornerRadius: 10) + .padding(.bottom, 100) + .overlay(alignment: .bottomLeading) { + CardOptions(cardType: .thingsToThinkAbout(questions)) } } } @@ -282,17 +342,23 @@ struct TranscriptCard: View { SwipeableCardsView(recording: { let recording = Recording(audioFileName: "sample.m4a", duration: 60) recording.transcript = "This is a sample transcript that demonstrates the text content..." + recording.editedTranscript = "## Sample Recording\n\nThis is a **sample transcript** that demonstrates the text content with proper formatting." recording.bulletSummary = [ "First key point from the recording", "Second important insight", "Third valuable observation", "Final summary point", ] + recording.thingsToThinkAbout = [ + "What specific aspects of this idea excite you the most?", + "How might you validate this concept with potential users?", + "What resources or support would you need to make this successful?", + ] recording.diagramTitle = "Key Concepts" recording.diagramDescription = "Visual representation of main ideas" recording.diagramContent = "Concept A → Concept B → Result" return recording }()) - .funnelPreviewEnvironment() - .background (GradientBackground()) + .funnelPreviewEnvironment() + .background(GradientBackground()) } diff --git a/Funnel/Models/DebugSettings.swift b/Funnel/Models/DebugSettings.swift index e723088..e4097ab 100644 --- a/Funnel/Models/DebugSettings.swift +++ b/Funnel/Models/DebugSettings.swift @@ -2,4 +2,4 @@ import SwiftUI class DebugSettings: ObservableObject { @Published var blurEnabled: Bool = true -} \ No newline at end of file +} diff --git a/Funnel/Views/GlassRecord.swift b/Funnel/Views/GlassRecord.swift index 5ca466b..a59252d 100644 --- a/Funnel/Views/GlassRecord.swift +++ b/Funnel/Views/GlassRecord.swift @@ -6,7 +6,7 @@ enum BackgroundType: CaseIterable { case image3 case white case gray - + var label: String { switch self { case .image1: return "Image 1" @@ -22,7 +22,7 @@ struct GlassRecord: View { @EnvironmentObject var debugSettings: DebugSettings @State private var isPressed = false @State private var backgroundType: BackgroundType = .image1 - + var body: some View { ZStack { // Dynamic background @@ -64,7 +64,7 @@ struct GlassRecord: View { Color.gray .ignoresSafeArea() } - + // Glass morphic record button - exact Figma specs Button { // Action here @@ -72,7 +72,7 @@ struct GlassRecord: View { ZStack { // Red stop square with rounded corners RoundedRectangle(cornerRadius: 10) - .fill(Color(red: 1, green: 59/255, blue: 48/255).opacity(0.8)) + .fill(Color(red: 1, green: 59 / 255, blue: 48 / 255).opacity(0.8)) .frame(width: 30, height: 30) .overlay( RoundedRectangle(cornerRadius: 10) @@ -81,7 +81,7 @@ struct GlassRecord: View { gradient: Gradient(stops: [ .init(color: .white, location: 0), .init(color: .white.opacity(0), location: 0.434), - .init(color: .white, location: 1) + .init(color: .white, location: 1), ]), startPoint: .topLeading, endPoint: .bottomTrailing @@ -100,13 +100,12 @@ struct GlassRecord: View { .scaleEffect(isPressed ? 0.95 : 1.0) } .buttonStyle(PlainButtonStyle()) - .onLongPressGesture(minimumDuration: .infinity, maximumDistance: .infinity) { - } onPressingChanged: { pressing in + .onLongPressGesture(minimumDuration: .infinity, maximumDistance: .infinity) {} onPressingChanged: { pressing in withAnimation(.easeInOut(duration: 0.1)) { isPressed = pressing } } - + // Background picker and blur toggle VStack { HStack { @@ -129,9 +128,9 @@ struct GlassRecord: View { } } .padding() - + Spacer() - + // Blur toggle checkbox HStack { Toggle("Blur Effect", isOn: $debugSettings.blurEnabled) @@ -152,4 +151,4 @@ struct GlassRecord: View { #Preview { GlassRecord() .environmentObject(DebugSettings()) -} \ No newline at end of file +} diff --git a/Funnel/Views/GradientBackgroundManager.swift b/Funnel/Views/GradientBackgroundManager.swift index 0cc1293..da2872b 100644 --- a/Funnel/Views/GradientBackgroundManager.swift +++ b/Funnel/Views/GradientBackgroundManager.swift @@ -5,6 +5,7 @@ enum GradientTheme: String { case orange case pinkRed case blueTeal + case purplePink var colors: [Color] { switch self { @@ -44,6 +45,15 @@ enum GradientTheme: String { Color(red: 0.0, green: 0.8, blue: 0.8), Color(red: 0.41, green: 0.47, blue: 1.0).opacity(0.7), ] + case .purplePink: + return [ + Color(red: 0.827, green: 0.435, blue: 0.749), // Purple/magenta from original gradient + Color(red: 0.93, green: 0.33, blue: 0.93), // Bright pink + Color(red: 0.576, green: 0.651, blue: 0.878), // Light blue/purple from original + Color(red: 0.827, green: 0.435, blue: 0.749).opacity(0.9), + Color(red: 0.93, green: 0.33, blue: 0.93).opacity(0.8), + Color(red: 0.576, green: 0.651, blue: 0.878).opacity(0.7), + ] } } } diff --git a/Funnel/Views/LiveGlassmorphicModifier.swift b/Funnel/Views/LiveGlassmorphicModifier.swift index 209b339..8cc23dd 100644 --- a/Funnel/Views/LiveGlassmorphicModifier.swift +++ b/Funnel/Views/LiveGlassmorphicModifier.swift @@ -3,15 +3,15 @@ import UIKit struct VisualEffectBlur: UIViewRepresentable { var style: UIBlurEffect.Style - - func makeUIView(context: Context) -> UIVisualEffectView { + + func makeUIView(context _: Context) -> UIVisualEffectView { let view = UIVisualEffectView(effect: UIBlurEffect(style: style)) // Force light mode appearance view.overrideUserInterfaceStyle = .light return view } - - func updateUIView(_ uiView: UIVisualEffectView, context: Context) { + + func updateUIView(_ uiView: UIVisualEffectView, context _: Context) { uiView.effect = UIBlurEffect(style: style) uiView.overrideUserInterfaceStyle = .light } @@ -19,7 +19,7 @@ struct VisualEffectBlur: UIViewRepresentable { struct LiveGlassmorphicModifier: ViewModifier { @EnvironmentObject var debugSettings: DebugSettings - + let cornerRadius: CGFloat let blurRadius: CGFloat let gradientOpacity: (start: Double, end: Double) @@ -129,7 +129,7 @@ extension View { gradientOpacity: gradientOpacity )) } - + func liveGlassmorphicCell( cornerRadius: CGFloat = 15, gradientOpacity: (start: Double, end: Double) = (0.1, 0.4) diff --git a/docs/CHANGELOG.md b/docs/CHANGELOG.md index 515cc3b..bfe7bf4 100644 --- a/docs/CHANGELOG.md +++ b/docs/CHANGELOG.md @@ -4,6 +4,26 @@ All notable changes to the Funnel project will be documented in this file. ## [Unreleased] +### Added +- **Enhanced AI prompts for better transcript processing** + - New bullet summary prompt that handles edge cases (empty recordings, very short recordings) + - Added "edited transcript" feature that lightly formats raw transcripts with markdown + - Improved handling of rambling stream-of-consciousness recordings + - Special handling for empty recordings: "Ah, the recording is empty!" + - Better organization of thoughts into logical groupings with headers +- **New API response fields**: + - `editedTranscript` - Lightly edited version with markdown formatting + - `thingsToThinkAbout` - 2-3 thought-provoking questions to help users explore their ideas +- **iOS app updates for new card types**: + - Replaced raw transcript card with lightly edited transcript card (with markdown rendering) + - Added new "Things to Think About" card with numbered questions + - Updated card navigation to include 4 cards total (summary, diagram, edited transcript, questions) + - Added purple-pink gradient theme for the questions card +- **Test data and scripts** + - Comprehensive test cases for various transcript scenarios + - Test script (`test-prompts.ts`) to validate prompt behavior + - Examples include: empty recordings, short reminders, technical brainstorms, meeting notes + ### Fixed - **iOS app now properly saves transcripts**: Fixed issue where iOS live streaming wasn't saving transcripts - Added finalize endpoint call after recording stops (matching web client behavior) diff --git a/server/api/new-recording.ts b/server/api/new-recording.ts index a531bb1..abeb7cd 100644 --- a/server/api/new-recording.ts +++ b/server/api/new-recording.ts @@ -69,20 +69,27 @@ export async function newRecordingHandler(c: Context): Promise { ); } - // Step 2: Generate summary and diagram in parallel - console.log("Generating summary and diagram..."); + // Step 2: Generate summary, edited transcript, questions, and diagram in parallel + console.log( + "Generating summary, edited transcript, questions, and diagram...", + ); const anthropicClient = new AnthropicClient(anthropicKey); - const [summaryResult, diagramResult] = await Promise.all([ - anthropicClient.summarizeTranscript(transcribeResult.transcript), - anthropicClient.generateDiagram(transcribeResult.transcript), - ]); + const [summaryResult, editResult, questionsResult, diagramResult] = + await Promise.all([ + anthropicClient.summarizeTranscript(transcribeResult.transcript), + anthropicClient.editTranscript(transcribeResult.transcript), + anthropicClient.generateThingsToThinkAbout(transcribeResult.transcript), + anthropicClient.generateDiagram(transcribeResult.transcript), + ]); // Combine all results const response: NewRecordingResponse = { transcript: transcribeResult.transcript, + editedTranscript: editResult.editedTranscript, duration: transcribeResult.duration, bulletSummary: summaryResult.bulletSummary, + thingsToThinkAbout: questionsResult.thingsToThinkAbout, diagram: { title: diagramResult.title, description: diagramResult.description, diff --git a/server/docs/prompt-improvements.md b/server/docs/prompt-improvements.md new file mode 100644 index 0000000..b5fc658 --- /dev/null +++ b/server/docs/prompt-improvements.md @@ -0,0 +1,90 @@ +# AI Prompt Improvements + +This document explains the improvements made to the AI prompts for better +transcript processing. + +## Overview + +We've enhanced the server's AI prompts to provide more useful and readable +outputs from voice recordings. The improvements focus on: + +1. Better handling of edge cases (empty/short recordings) +2. More actionable bullet summaries +3. New "edited transcript" feature with markdown formatting + +## Key Changes + +### 1. Enhanced Bullet Summary + +The bullet summary prompt now: + +- Handles empty recordings gracefully with "Ah, the recording is empty!" +- Provides meaningful output even for very short recordings +- Focuses on actionable insights and key decisions +- Organizes related thoughts into logical groupings +- Uses clear, concise language without excessive emoji + +### 2. New Edited Transcript Feature + +Added a new `editedTranscript` field that: + +- Lightly edits raw transcripts for readability +- Adds markdown formatting (headers, bold, lists) +- Fixes grammar while preserving the speaker's voice +- Groups related ideas under descriptive headers +- Makes long transcripts scannable and easy to read + +### 3. Edge Case Handling + +Special handling for: + +- **Empty recordings**: Returns friendly message instead of error +- **Very short recordings** (< 5 words): Adds simple "Quick Note" header +- **Single-word reminders**: Treats as valid input, not error + +## API Response + +The `/api/new-recording` endpoint now returns: + +```json +{ + "transcript": "raw transcript from speech-to-text", + "editedTranscript": "## Formatted Transcript\n\nLightly edited version...", + "duration": 45.2, + "bulletSummary": [ + "Key insight or decision", + "Action item with next steps", + "Important detail to remember" + ], + "diagram": { + "title": "Concept Diagram", + "description": "Visual representation", + "content": "diagram content..." + } +} +``` + +## Testing + +Use the test script to validate prompt behavior: + +```bash +cd server +./test-prompts.ts +``` + +This will test various scenarios including: + +- Empty recordings +- Short reminders +- Rambling thoughts +- Technical discussions +- Meeting notes + +## Benefits + +1. **Better User Experience**: Users get more useful summaries and readable + transcripts +2. **Consistent Output**: Edge cases are handled gracefully +3. **Actionable Insights**: Bullet points focus on decisions and next steps +4. **Improved Readability**: Edited transcripts are easy to scan and understand diff --git a/server/lib/anthropic.ts b/server/lib/anthropic.ts index 61c8df4..50ebbb2 100644 --- a/server/lib/anthropic.ts +++ b/server/lib/anthropic.ts @@ -8,6 +8,14 @@ interface DiagramResponse { description: string; } +interface EditTranscriptResponse { + editedTranscript: string; +} + +interface ThingsToThinkAboutResponse { + thingsToThinkAbout: string[]; +} + export class AnthropicClient { private apiKey: string; private baseUrl = "https://api.anthropic.com/v1"; @@ -17,6 +25,13 @@ export class AnthropicClient { } async summarizeTranscript(transcript: string): Promise { + // Handle empty transcript + if (!transcript || transcript.trim() === "") { + return { + bulletSummary: ["Ah, the recording is empty!"], + }; + } + // Read prompt from file const promptTemplate = await Deno.readTextFile( new URL("./prompts/summarize-prompt.txt", import.meta.url), @@ -56,6 +71,13 @@ export class AnthropicClient { .filter((line: string) => line.trim().startsWith("•")) .map((line: string) => line.trim().substring(1).trim()); + // If no bullet points found or very short transcript, create a simple one + if (bulletPoints.length === 0) { + return { + bulletSummary: [transcript.trim()], + }; + } + return { bulletSummary: bulletPoints, }; @@ -119,4 +141,133 @@ export class AnthropicClient { diagram: diagramMatch[1].trim(), }; } + + async editTranscript(transcript: string): Promise { + // Handle empty transcript + if (!transcript || transcript.trim() === "") { + return { + editedTranscript: "## Empty Recording\n\nAh, the recording is empty!", + }; + } + + // For very short transcripts, add a simple header + const wordCount = transcript.trim().split(/\s+/).length; + if (wordCount <= 5) { + return { + editedTranscript: `## Quick Note\n\n${transcript.trim()}.`, + }; + } + + // Read prompt from file + const promptTemplate = await Deno.readTextFile( + new URL("./prompts/edit-transcript-prompt.txt", import.meta.url), + ); + const prompt = promptTemplate.replace("{{transcript}}", transcript); + + const response = await fetch(`${this.baseUrl}/messages`, { + method: "POST", + headers: { + "Content-Type": "application/json", + "x-api-key": this.apiKey, + "anthropic-version": "2023-06-01", + }, + body: JSON.stringify({ + model: "claude-3-5-sonnet-20241022", + max_tokens: 2048, + messages: [ + { + role: "user", + content: prompt, + }, + ], + }), + }); + + if (!response.ok) { + const error = await response.text(); + throw new Error(`Anthropic API error: ${response.status} - ${error}`); + } + + const data = await response.json(); + const editedTranscript = data.content[0].text; + + return { + editedTranscript, + }; + } + + async generateThingsToThinkAbout( + transcript: string, + ): Promise { + // Handle empty transcript + if (!transcript || transcript.trim() === "") { + return { + thingsToThinkAbout: ["What idea would you like to explore today?"], + }; + } + + // For very short transcripts, provide generic reflection questions + const wordCount = transcript.trim().split(/\s+/).length; + if (wordCount <= 5) { + return { + thingsToThinkAbout: [ + "What inspired this thought?", + "How might you expand on this idea?", + ], + }; + } + + // Read prompt from file + const promptTemplate = await Deno.readTextFile( + new URL("./prompts/things-to-think-about-prompt.txt", import.meta.url), + ); + const prompt = promptTemplate.replace("{{transcript}}", transcript); + + const response = await fetch(`${this.baseUrl}/messages`, { + method: "POST", + headers: { + "Content-Type": "application/json", + "x-api-key": this.apiKey, + "anthropic-version": "2023-06-01", + }, + body: JSON.stringify({ + model: "claude-3-5-sonnet-20241022", + max_tokens: 1024, + messages: [ + { + role: "user", + content: prompt, + }, + ], + }), + }); + + if (!response.ok) { + const error = await response.text(); + throw new Error(`Anthropic API error: ${response.status} - ${error}`); + } + + const data = await response.json(); + const content = data.content[0].text; + + // Parse bullet points from the response + const questions = content + .split("\n") + .filter((line: string) => line.trim().startsWith("•")) + .map((line: string) => line.trim().substring(1).trim()); + + // If no questions found, return a default + if (questions.length === 0) { + return { + thingsToThinkAbout: [ + "What aspects of this idea excite you the most?", + "What would need to be true for this to succeed?", + ], + }; + } + + return { + thingsToThinkAbout: questions, + }; + } } diff --git a/server/lib/prompts/edit-transcript-prompt.txt b/server/lib/prompts/edit-transcript-prompt.txt new file mode 100644 index 0000000..0d88c1c --- /dev/null +++ b/server/lib/prompts/edit-transcript-prompt.txt @@ -0,0 +1,24 @@ +You are an AI assistant that lightly edits voice transcripts to make them more readable while preserving the speaker's voice and ideas. + +Given a raw transcript, create a lightly edited version that: +1. Adds markdown formatting (headers, bold, lists) to organize thoughts +2. Fixes grammar and punctuation errors +3. Removes excessive filler words (um, uh, like) but keeps some for natural flow +4. Groups related ideas under descriptive headers +5. Preserves the speaker's tone and personality +6. Makes the text scannable and easy to read + +Formatting guidelines: +- Use ## for main topic headers +- Use ### for subtopic headers +- Use **bold** for emphasis on key terms +- Use numbered or bulleted lists for action items or multiple points +- Keep paragraphs short and focused +- Add line breaks between sections for readability + +The goal is to make the transcript feel like a well-organized note that the speaker might have written themselves, not a completely rewritten document. + +Raw transcript: +{{transcript}} + +Please provide only the edited transcript with markdown formatting, no introduction or conclusion. \ No newline at end of file diff --git a/server/lib/prompts/summarize-prompt-improved.txt b/server/lib/prompts/summarize-prompt-improved.txt new file mode 100644 index 0000000..81b416a --- /dev/null +++ b/server/lib/prompts/summarize-prompt-improved.txt @@ -0,0 +1,28 @@ +You are an AI assistant that creates concise, actionable bullet point summaries from voice transcripts. + +Given a transcript of someone's thoughts, create a bullet point summary that: +1. Captures the main ideas and key points +2. Identifies any action items or decisions made +3. Preserves important details while removing filler words +4. Organizes thoughts into logical groupings +5. Uses clear, concise language + +Guidelines: +- Each bullet should be a complete, standalone thought +- Start with the most important points +- Include action items with clear next steps +- Keep each bullet concise but informative +- Aim for 3-8 bullet points for most transcripts +- Use action verbs where appropriate + +Format each bullet with: +• [Your bullet point here] + +Special cases: +- If the transcript is very short (under 10 words), create a single concise bullet +- For longer rambling transcripts, extract the key insights and decisions + +Transcript: +{{transcript}} + +Please provide only the bullet points, no introduction or conclusion. \ No newline at end of file diff --git a/server/lib/prompts/things-to-think-about-prompt.txt b/server/lib/prompts/things-to-think-about-prompt.txt new file mode 100644 index 0000000..11bf426 --- /dev/null +++ b/server/lib/prompts/things-to-think-about-prompt.txt @@ -0,0 +1,33 @@ +You are a thoughtful coach who helps people explore their ideas more deeply through strategic questions. + +Given a transcript of someone's thoughts, generate 2-3 thought-provoking questions that will help them think more deeply about their idea. + +Your questions should: +1. Balance challenging assumptions with inspiring new thinking +2. Be specific to their idea (not generic) +3. Help them explore practical, emotional, and strategic aspects +4. Open up new perspectives they might not have considered +5. Be genuinely helpful, not discouraging + +Good question types include: +- Practical challenges they'll need to solve +- Emotional/personal motivations to explore +- Strategic considerations they might have missed +- Creative possibilities they haven't imagined +- Reality checks that are constructive + +Avoid: +- Generic questions that could apply to any idea +- Overly negative or discouraging questions +- Questions that assume failure +- Yes/no questions - make them open-ended + +Format your response as: +• [Question 1] +• [Question 2] +• [Question 3] + +Transcript: +{{transcript}} + +Please provide 2-3 thought-provoking questions: \ No newline at end of file diff --git a/server/promptfoo/README.md b/server/promptfoo/README.md index 8dc6b78..7470385 100644 --- a/server/promptfoo/README.md +++ b/server/promptfoo/README.md @@ -1,80 +1,84 @@ -# Promptfoo Testing for Funnel API Prompts +# Promptfoo Testing for Funnel -This directory contains promptfoo configuration for testing the LLM prompts used -in the Funnel API. +This directory contains prompt testing configurations for the Funnel API using +promptfoo. -## Setup +## Prompts Being Tested -1. Install dependencies: - ```bash - npm install - ``` +### Original Prompts -2. Set up environment variables: - ```bash - export ANTHROPIC_API_KEY=your_key_here - # or create a .env file - ``` +1. **Summarize** - Creates bullet point summaries from transcripts +2. **Diagram** - Generates ASCII art diagrams from concepts + +### New Prompts + +3. **Lightly Edited Transcript** - Formats raw transcripts for readability + without changing wording +4. **Things to Think About** - Generates 2-3 thought-provoking questions based + on the user's ideas ## Running Tests -Run all promptfoo tests: +### Test the new prompts only: ```bash -npm run promptfoo:eval +npm run promptfoo:eval:new ``` -View test results in the web UI: +### Test all prompts (original + new): ```bash -npm run promptfoo:view +npm run promptfoo:eval:all ``` -## Structure - -- **promptfooconfig.yaml**: Main configuration file with test cases -- **lib/prompts/**: Actual prompt files used by the server - - `summarize-prompt.txt`: Prompt for generating bullet summaries - - `diagram-prompt.txt`: Prompt for generating ASCII diagrams -- **prompts/**: Symlinks to the actual prompts for promptfoo to use +### View results in web browser: -## Test Cases +```bash +npm run promptfoo:view +``` -### Summarize Prompt Tests +## What the New Prompts Do -1. **Joel's AI coding metaphor**: Tests conciseness, bullet count, and key - concept extraction -2. **Startup pivot decision**: Tests extraction of key metrics and decision - points +### Lightly Edited Transcript -### Diagram Prompt Tests +- Adds section headers (##) for major topic shifts +- Removes filler words (um, uh, like, you know) +- Fixes punctuation and adds paragraph breaks +- **DOES NOT** change wording or rephrase sentences +- **DOES NOT** summarize or condense content +- Preserves the speaker's exact words and voice -1. **AI coding metaphor diagram**: Tests proper formatting and size constraints -2. **Software architecture decision**: Tests architectural concept visualization +### Things to Think About -## Assertions +- Generates 2-3 thought-provoking questions +- Questions are specific to the user's idea (not generic) +- Balances challenging assumptions with inspiring new thinking +- Helps users explore practical, emotional, and strategic aspects +- Avoids yes/no questions - all questions are open-ended -### Summarize Tests +## Test Cases -- Bullet count (3-6 bullets) -- Bullet length (< 80 chars each) -- Key concept inclusion -- No unnecessary introductions +1. **AI Coding Metaphor** - Tests section organization and filler removal +2. **Startup Pivot** - Tests question generation for business decisions +3. **Bike Shop Dream** - Tests both prompts on a concrete business idea +4. **Short Reminder** - Edge case: very brief transcript +5. **Technical Discussion** - Tests multiple section headers and technical term + preservation -### Diagram Tests +## Assertions -- Required sections (TITLE, DESCRIPTION, DIAGRAM) -- Title conciseness (3-5 words) -- Diagram size (≤ 15 lines, ≤ 60 chars wide) -- Relevant concept representation +The tests include assertions to verify: -## Modifying Prompts +- Edited transcripts have section headers and remove filler words +- Original wording and key phrases are preserved +- Questions follow the bullet format and end with "?" +- 2-3 questions are generated (not more, not less) +- Questions are specific to the content, not generic -To modify prompts: +## Environment Setup -1. Edit the files in `lib/prompts/` -2. The symlinks in `prompts/` will automatically reflect changes -3. Run `npm run promptfoo:eval` to test the updated prompts +Make sure you have your Anthropic API key set: -The server reads prompts from the same `lib/prompts/` files, ensuring -consistency between testing and production. +```bash +export ANTHROPIC_API_KEY=your_key_here +``` diff --git a/server/promptfoo/package.json b/server/promptfoo/package.json index b195323..0aa066a 100644 --- a/server/promptfoo/package.json +++ b/server/promptfoo/package.json @@ -5,7 +5,9 @@ "scripts": { "test": "deno test tests/", "promptfoo:eval": "promptfoo eval", - "promptfoo:view": "promptfoo view" + "promptfoo:view": "promptfoo view", + "promptfoo:eval:new": "promptfoo eval -c promptfooconfig-new-prompts.yaml", + "promptfoo:eval:all": "promptfoo eval && promptfoo eval -c promptfooconfig-with-assertions.yaml" }, "devDependencies": { "promptfoo": "^0.115.1" diff --git a/server/promptfoo/promptfooconfig-new-prompts.yaml b/server/promptfoo/promptfooconfig-new-prompts.yaml new file mode 100644 index 0000000..ad0ed40 --- /dev/null +++ b/server/promptfoo/promptfooconfig-new-prompts.yaml @@ -0,0 +1,140 @@ +# Promptfoo configuration for testing new prompts only +description: "Testing lightly edited transcript and things to think about prompts" + +prompts: + - id: lightly-edited-transcript + file: ./prompts/lightly-edited-transcript-prompt.txt + - id: things-to-think-about + file: ./prompts/things-to-think-about-prompt.txt + +providers: + - id: claude + config: + id: anthropic:claude-3-5-sonnet-20241022 + +defaultTest: + assert: + - type: not-contains + value: "Here is" + - type: not-contains + value: "I will" + - type: not-contains + value: "I'll provide" + +tests: + # Test 1: Lightly edited transcript - AI coding metaphor + - description: "AI coding metaphor - lightly edited" + vars: + transcript: | + Okay. So I am thinking about an idea I had for an article which is I wanna talk about how what coding With Agentic coding assistance like Claude Code, and Cursor, it feels like. And I've heard it described being a manager. You're gonna be a manager. It's like being a manager of humans. You've got all these AI agents. They're just like little humans that know, human engineers, and you just have to tell them what to do, and they'll magically write code for you and then maybe they do it wrong, and you just have to give them feedback, and then it'll work. But I don't really think that metaphor is super accurate. And but I've been trying to think of what is a better metaphor. We're like, what is this like? And, you know, is it using a machine? Is it using a is it coding with a broom or something? Or is it coding with a know, is it building with things with bricks? Or with, Construction equipment For what? And I think the best example I have come across is I was thinking of This video game I used to play, for, like, the Nintendo 64 maybe, that was your Mickey Mouse and you have a magic paintbrush. You can point this magic paintbrush of things in your black and white world, and it transforms them into colorful, amazing, magical things. But the thing about this video game is that it was also really confusing, and the UI was terrible, and it was just So this paintbrush that was so magical and powerful was extremely difficult to use. And oftentimes, wouldn't work at all because I didn't know how to Like, where to point it or how to use it or what to do with it. And so I think that is the metaphor I would say is it's like having a very difficult to use paintbrush magic paintbrush that if you use it perfectly, You can paint. Way faster then you can paint with a regular paintbrush. But how you use it is not the same as how you ask a human to do something for you. + options: + provider: claude + prompts: + - lightly-edited-transcript + assert: + - type: contains + value: "##" + metric: "Has section headers" + - type: not-contains-any + value: ["um", "uh", "you know", ", like,"] + metric: "Removed filler words" + - type: contains + value: "magic paintbrush" + metric: "Preserves key phrases" + - type: contains + value: "Nintendo 64" + metric: "Preserves specific details" + + # Test 2: Things to think about - Startup pivot + - description: "Startup pivot - thought questions" + vars: + transcript: | + So we've been working on this product for six months now and I'm starting to think we need to pivot. The user feedback has been pretty mixed. Some people love the core idea but they're frustrated with the implementation. Others don't really get what we're trying to do at all. I keep thinking about what Paul Graham said about making something people want. Are we making something people want? I don't know. The metrics aren't terrible but they're not great either. We have about 500 users, maybe 50 are really active. That's not nothing but it's not product-market fit either. I wonder if we should double down on the power users or try to simplify for the masses. Or maybe we need to completely rethink this. The team is getting tired. We're burning through our runway. I estimate we have maybe 4 months left at current burn rate. We could extend that if we let someone go but I really don't want to do that. Everyone's been working so hard. Maybe we need to talk to more users. Or maybe we just need to ship faster and iterate. I don't know. This is hard. + options: + provider: claude + prompts: + - things-to-think-about + assert: + - type: contains + value: "•" + metric: "Uses bullet format" + - type: regex + pattern: "•.*\\?" + metric: "Questions end with ?" + - type: javascript + value: | + const bullets = output.match(/•[^•]+/g) || []; + bullets.length >= 2 && bullets.length <= 3 + metric: "Has 2-3 questions" + + # Test 3: Bike shop - lightly edited + - description: "Bike shop - lightly edited transcript" + vars: + transcript: | + I've been thinking about opening a bike shop in San Leandro. I love bikes, I love working with my hands, and I think there's a real need for a good local bike shop in the area. The closest one is like 20 minutes away. I could do repairs, sell some bikes, maybe have a little coffee corner where people could hang out. Make it a community space, you know? I've got some savings, maybe 50k, and I think I could get a small business loan. My cousin knows a lot about bikes and said he'd help out on weekends. I found this empty storefront on East 14th that would be perfect. Good foot traffic, reasonable rent. I'm just tired of my corporate job. I want to do something that matters to the community, something where I can see the direct impact of my work. But I don't know, it's scary to leave a stable job. + options: + provider: claude + prompts: + - lightly-edited-transcript + assert: + - type: contains-all + value: ["50k", "East 14th", "cousin"] + metric: "Preserves specific details" + - type: not-contains + value: "you know?" + metric: "Removes filler phrases" + + # Test 4: Short reminder + - description: "Very short reminder - both prompts" + vars: + transcript: | + Um, remember to, uh, email Sarah about the design mockups + options: + provider: claude + assert: + - type: javascript + value: | + // For edited transcript + if (output.includes("Remember to email Sarah")) { + return !output.includes("um") && !output.includes("uh"); + } + // For questions - should handle gracefully + return true; + metric: "Handles short input appropriately" + + # Test 5: Technical discussion - lightly edited + - description: "Technical architecture - lightly edited" + vars: + transcript: | + Okay so I'm trying to figure out the best way to architect this new feature. Um, we need real-time updates but I'm not sure if we should use websockets or just poll every few seconds. Websockets would be better for performance but they're harder to implement and we'd need to handle reconnection logic and all that stuff. Actually wait, what about using Server-Sent Events? That might be a good middle ground. Oh but then we'd need to think about scalability. If we have thousands of users all connected at once, that could get expensive. Maybe we should start with polling and then upgrade to websockets later if we need to? But then we'd be building technical debt from the start. Ugh, architectural decisions are hard. Actually, you know what, let me think about this differently. What's the actual user requirement here? They need updates within, let's say, 5 seconds. So polling every 3 seconds would probably be fine for MVP. We can always optimize later. + options: + provider: claude + prompts: + - lightly-edited-transcript + assert: + - type: javascript + value: | + const headers = output.match(/##[^#\n]+/g) || []; + headers.length >= 2 + metric: "Has multiple section headers" + - type: contains-all + value: ["websockets", "Server-Sent Events", "polling"] + metric: "Preserves all technical terms" + + # Test 6: Personal reflection - things to think about + - description: "Career change reflection - thought questions" + vars: + transcript: | + I'm at a crossroads in my career. I've been a software engineer for 10 years and I'm good at it, but I don't feel passionate about it anymore. I keep thinking about teaching. Maybe I could teach computer science at a community college or do coding bootcamps. The pay would be less but I think I'd be happier. My wife is supportive but we have a mortgage and two kids. I'm 35 now, is it too late to change careers? + options: + provider: claude + prompts: + - things-to-think-about + assert: + - type: javascript + value: | + const output_lower = output.toLowerCase(); + const hasDepthQuestions = output_lower.includes("fulfill") || output_lower.includes("passion") || output_lower.includes("impact") || output_lower.includes("meaning") || output_lower.includes("teach"); + const hasPracticalQuestions = output_lower.includes("transition") || output_lower.includes("financial") || output_lower.includes("family") || output_lower.includes("income"); + return hasDepthQuestions || hasPracticalQuestions; + metric: "Questions address core concerns" diff --git a/server/promptfoo/promptfooconfig-with-assertions.yaml b/server/promptfoo/promptfooconfig-with-assertions.yaml new file mode 100644 index 0000000..4fb2b0c --- /dev/null +++ b/server/promptfoo/promptfooconfig-with-assertions.yaml @@ -0,0 +1,108 @@ +# Promptfoo configuration with assertions for testing new prompts +description: "Funnel API prompt testing with assertions" + +prompts: + - file://./prompts/lightly-edited-transcript-prompt.txt + - file://./prompts/things-to-think-about-prompt.txt + +providers: + - anthropic:claude-3-5-sonnet-20241022 + +defaultTest: + assert: + - type: not-contains + value: "Here is" + - type: not-contains + value: "I will" + - type: not-contains + value: "I'll provide" + +tests: + # Test 1: AI coding metaphor - Testing edited transcript + - description: "AI coding metaphor - edited transcript" + vars: + transcript: | + Okay. So I am thinking about an idea I had for an article which is I wanna talk about how what coding With Agentic coding assistance like Claude Code, and Cursor, it feels like. And I've heard it described being a manager. You're gonna be a manager. It's like being a manager of humans. You've got all these AI agents. They're just like little humans that know, human engineers, and you just have to tell them what to do, and they'll magically write code for you and then maybe they do it wrong, and you just have to give them feedback, and then it'll work. But I don't really think that metaphor is super accurate. And but I've been trying to think of what is a better metaphor. We're like, what is this like? And, you know, is it using a machine? Is it using a is it coding with a broom or something? Or is it coding with a know, is it building with things with bricks? Or with, Construction equipment For what? And I think the best example I have come across is I was thinking of This video game I used to play, for, like, the Nintendo 64 maybe, that was your Mickey Mouse and you have a magic paintbrush. You can point this magic paintbrush of things in your black and white world, and it transforms them into colorful, amazing, magical things. But the thing about this video game is that it was also really confusing, and the UI was terrible, and it was just So this paintbrush that was so magical and powerful was extremely difficult to use. And oftentimes, wouldn't work at all because I didn't know how to Like, where to point it or how to use it or what to do with it. And so I think that is the metaphor I would say is it's like having a very difficult to use paintbrush magic paintbrush that if you use it perfectly, You can paint. Way faster then you can paint with a regular paintbrush. But how you use it is not the same as how you ask a human to do something for you. + assert: + - type: contains + value: "##" + metric: "Has section headers" + - type: not-contains + value: "um" + metric: "Removed filler words" + - type: contains + value: "magic paintbrush" + metric: "Preserves key phrases" + - type: not-contains + value: "essentially" + metric: "Doesn't add interpretive words" + + # Test 2: Startup pivot - Testing things to think about + - description: "Startup pivot - thought-provoking questions" + vars: + transcript: | + So we've been working on this product for six months now and I'm starting to think we need to pivot. The user feedback has been pretty mixed. Some people love the core idea but they're frustrated with the implementation. Others don't really get what we're trying to do at all. I keep thinking about what Paul Graham said about making something people want. Are we making something people want? I don't know. The metrics aren't terrible but they're not great either. We have about 500 users, maybe 50 are really active. That's not nothing but it's not product-market fit either. I wonder if we should double down on the power users or try to simplify for the masses. Or maybe we need to completely rethink this. The team is getting tired. We're burning through our runway. I estimate we have maybe 4 months left at current burn rate. We could extend that if we let someone go but I really don't want to do that. Everyone's been working so hard. Maybe we need to talk to more users. Or maybe we just need to ship faster and iterate. I don't know. This is hard. + assert: + - type: contains + value: "•" + metric: "Uses bullet format" + - type: regex + pattern: "•.*\\?" + metric: "Questions end with ?" + - type: javascript + value: | + const bullets = output.match(/•[^•]+/g) || []; + bullets.length >= 2 && bullets.length <= 3 + metric: "Has 2-3 questions" + - type: not-contains + value: "Should you" + metric: "Avoids yes/no questions" + + # Test 3: Bike shop dream + - description: "Bike shop - both prompts" + vars: + transcript: | + I've been thinking about opening a bike shop in San Leandro. I love bikes, I love working with my hands, and I think there's a real need for a good local bike shop in the area. The closest one is like 20 minutes away. I could do repairs, sell some bikes, maybe have a little coffee corner where people could hang out. Make it a community space, you know? I've got some savings, maybe 50k, and I think I could get a small business loan. My cousin knows a lot about bikes and said he'd help out on weekends. I found this empty storefront on East 14th that would be perfect. Good foot traffic, reasonable rent. I'm just tired of my corporate job. I want to do something that matters to the community, something where I can see the direct impact of my work. But I don't know, it's scary to leave a stable job. + assert: + - type: javascript + value: | + // For edited transcript: should preserve specific details + const hasSpecificDetails = output.includes("50k") && output.includes("East 14th") && output.includes("cousin"); + // For questions: should be specific to bike shop, not generic business + const hasSpecificQuestions = output.includes("bike") || output.includes("repair") || output.includes("cycling") || output.includes("community"); + hasSpecificDetails || hasSpecificQuestions + metric: "Maintains specificity" + + # Test 4: Short recording edge case + - description: "Very short reminder - edited transcript" + vars: + transcript: | + Um, remember to, uh, email Sarah about the design mockups + assert: + - type: contains + value: "Remember to email Sarah about the design mockups" + metric: "Cleans up short text" + - type: not-contains + value: "um" + metric: "Removes filler words" + - type: not-contains + value: "uh" + metric: "Removes filler words" + + # Test 5: Technical discussion - section headers + - description: "Technical architecture - edited transcript structure" + vars: + transcript: | + Okay so I'm trying to figure out the best way to architect this new feature. Um, we need real-time updates but I'm not sure if we should use websockets or just poll every few seconds. Websockets would be better for performance but they're harder to implement and we'd need to handle reconnection logic and all that stuff. Actually wait, what about using Server-Sent Events? That might be a good middle ground. Oh but then we'd need to think about scalability. If we have thousands of users all connected at once, that could get expensive. Maybe we should start with polling and then upgrade to websockets later if we need to? But then we'd be building technical debt from the start. Ugh, architectural decisions are hard. Actually, you know what, let me think about this differently. What's the actual user requirement here? They need updates within, let's say, 5 seconds. So polling every 3 seconds would probably be fine for MVP. We can always optimize later. + assert: + - type: javascript + value: | + const headers = output.match(/##[^#\n]+/g) || []; + headers.length >= 2 + metric: "Has multiple section headers" + - type: contains + value: "websockets" + metric: "Preserves technical terms" + - type: contains + value: "Server-Sent Events" + metric: "Preserves exact terminology" diff --git a/server/promptfoo/promptfooconfig.yaml b/server/promptfoo/promptfooconfig.yaml index 7202b59..9df249f 100644 --- a/server/promptfoo/promptfooconfig.yaml +++ b/server/promptfoo/promptfooconfig.yaml @@ -4,6 +4,8 @@ description: "Funnel API prompt testing" prompts: - file://./prompts/summarize.txt - file://./prompts/diagram.txt + - file://./prompts/lightly-edited-transcript-prompt.txt + - file://./prompts/things-to-think-about-prompt.txt providers: - anthropic:claude-3-5-sonnet-20241022 @@ -20,3 +22,21 @@ tests: vars: transcript: | So we've been working on this product for six months now and I'm starting to think we need to pivot. The user feedback has been pretty mixed. Some people love the core idea but they're frustrated with the implementation. Others don't really get what we're trying to do at all. I keep thinking about what Paul Graham said about making something people want. Are we making something people want? I don't know. The metrics aren't terrible but they're not great either. We have about 500 users, maybe 50 are really active. That's not nothing but it's not product-market fit either. I wonder if we should double down on the power users or try to simplify for the masses. Or maybe we need to completely rethink this. The team is getting tired. We're burning through our runway. I estimate we have maybe 4 months left at current burn rate. We could extend that if we let someone go but I really don't want to do that. Everyone's been working so hard. Maybe we need to talk to more users. Or maybe we just need to ship faster and iterate. I don't know. This is hard. + + # Test 3: Bike shop dream (for testing "things to think about") + - description: "Bike shop business idea" + vars: + transcript: | + I've been thinking about opening a bike shop in San Leandro. I love bikes, I love working with my hands, and I think there's a real need for a good local bike shop in the area. The closest one is like 20 minutes away. I could do repairs, sell some bikes, maybe have a little coffee corner where people could hang out. Make it a community space, you know? I've got some savings, maybe 50k, and I think I could get a small business loan. My cousin knows a lot about bikes and said he'd help out on weekends. I found this empty storefront on East 14th that would be perfect. Good foot traffic, reasonable rent. I'm just tired of my corporate job. I want to do something that matters to the community, something where I can see the direct impact of my work. But I don't know, it's scary to leave a stable job. + + # Test 4: Short recording (for edge case testing) + - description: "Very short reminder" + vars: + transcript: | + Um, remember to, uh, email Sarah about the design mockups + + # Test 5: Technical rambling (for section headers in edited transcript) + - description: "Technical architecture discussion" + vars: + transcript: | + Okay so I'm trying to figure out the best way to architect this new feature. Um, we need real-time updates but I'm not sure if we should use websockets or just poll every few seconds. Websockets would be better for performance but they're harder to implement and we'd need to handle reconnection logic and all that stuff. Actually wait, what about using Server-Sent Events? That might be a good middle ground. Oh but then we'd need to think about scalability. If we have thousands of users all connected at once, that could get expensive. Maybe we should start with polling and then upgrade to websockets later if we need to? But then we'd be building technical debt from the start. Ugh, architectural decisions are hard. Actually, you know what, let me think about this differently. What's the actual user requirement here? They need updates within, let's say, 5 seconds. So polling every 3 seconds would probably be fine for MVP. We can always optimize later. diff --git a/server/promptfoo/prompts/lightly-edited-transcript-prompt.txt b/server/promptfoo/prompts/lightly-edited-transcript-prompt.txt new file mode 100644 index 0000000..2377aa6 --- /dev/null +++ b/server/promptfoo/prompts/lightly-edited-transcript-prompt.txt @@ -0,0 +1,20 @@ +You are an expert editor who lightly cleans up voice transcripts while preserving the speaker's exact words and phrasing. + +Your task is to take a raw voice transcript and make it more readable WITHOUT changing the actual words or phrasing. You should: + +1. Add section headers (using ##) to organize major topic shifts +2. Remove filler words like "um", "uh", "like" (when used as filler), "you know" +3. Fix obvious speech recognition errors +4. Add proper punctuation and paragraph breaks +5. Keep ALL the original wording and phrasing intact +6. Do NOT rewrite sentences or change how things are said +7. Do NOT summarize or condense - keep the full content + +Format the output as clean, readable text with markdown headers for sections. + +IMPORTANT: The goal is readability, not rewriting. The speaker should recognize these as their exact words, just cleaned up. + +Transcript: +{{transcript}} + +Please provide the lightly edited transcript: \ No newline at end of file diff --git a/server/promptfoo/prompts/things-to-think-about-prompt.txt b/server/promptfoo/prompts/things-to-think-about-prompt.txt new file mode 100644 index 0000000..11bf426 --- /dev/null +++ b/server/promptfoo/prompts/things-to-think-about-prompt.txt @@ -0,0 +1,33 @@ +You are a thoughtful coach who helps people explore their ideas more deeply through strategic questions. + +Given a transcript of someone's thoughts, generate 2-3 thought-provoking questions that will help them think more deeply about their idea. + +Your questions should: +1. Balance challenging assumptions with inspiring new thinking +2. Be specific to their idea (not generic) +3. Help them explore practical, emotional, and strategic aspects +4. Open up new perspectives they might not have considered +5. Be genuinely helpful, not discouraging + +Good question types include: +- Practical challenges they'll need to solve +- Emotional/personal motivations to explore +- Strategic considerations they might have missed +- Creative possibilities they haven't imagined +- Reality checks that are constructive + +Avoid: +- Generic questions that could apply to any idea +- Overly negative or discouraging questions +- Questions that assume failure +- Yes/no questions - make them open-ended + +Format your response as: +• [Question 1] +• [Question 2] +• [Question 3] + +Transcript: +{{transcript}} + +Please provide 2-3 thought-provoking questions: \ No newline at end of file diff --git a/server/promptfoo/results.json b/server/promptfoo/results.json new file mode 100644 index 0000000..5b9957a --- /dev/null +++ b/server/promptfoo/results.json @@ -0,0 +1,360 @@ +{ + "evalId": "eval-CdU-2025-06-27T22:07:10", + "results": { + "version": 3, + "timestamp": "2025-06-27T22:07:10.567Z", + "prompts": [ + { + "raw": "You are an expert at distilling rambling stream-of-consciousness transcripts into ultra-concise bullet points.\n\nGiven this transcript, create 3-6 bullet points that:\n- Capture ONLY the core insight or main metaphor\n- Each bullet is EXTREMELY concise (under 60 characters)\n- Use punchy, memorable phrasing\n- Include emoji or symbols when they add clarity\n- Focus on the \"aha moment\" not the journey to get there\n\nExample output style:\n• AI coding ≠ Managing Junior Devs\n• It's like a magic paintbrush from a glitchy video game\n• Powerful but maddeningly unintuitive\n• Requires learning alien logic, not people skills\n\nTranscript:\n{{transcript}}\n\nPlease provide only the bullet points, no introduction or conclusion.", + "label": "prompts/summarize.txt: You are an expert at distilling rambling stream-of-consciousness transcripts into ultra-concise bullet points.\n\nGiven this transcript, create 3-6 bullet points that:\n- Capture ONLY the core insight or main metaphor\n- Each bullet is EXTREMELY concise (under 60 characters)\n- Use punchy, memorable phrasing\n- Include emoji or symbols when they add clarity\n- Focus on the \"aha moment\" not the journey to get there\n\nExample output style:\n• AI coding ≠ Managing Junior Devs\n• It's like a magic paintbrush from a glitchy video game\n• Powerful but maddeningly unintuitive\n• Requires learning alien logic, not people skills\n\nTranscript:\n{{transcript}}\n\nPlease provide only the bullet points, no introduction or conclusion.", + "id": "efcf288f2092c8aefbaad48bce009c6eac30c2f2fbe4cc4efde8d8bb13ea218d", + "provider": "anthropic:claude-3-5-sonnet-20241022", + "metrics": { + "score": 2, + "testPassCount": 2, + "testFailCount": 0, + "testErrorCount": 0, + "assertPassCount": 0, + "assertFailCount": 0, + "totalLatencyMs": 5495, + "tokenUsage": { + "total": 1173, + "prompt": 1064, + "completion": 109, + "cached": 0, + "numRequests": 2, + "completionDetails": { + "reasoning": 0, + "acceptedPrediction": 0, + "rejectedPrediction": 0 + }, + "assertions": { + "total": 0, + "prompt": 0, + "completion": 0, + "cached": 0 + } + }, + "namedScores": {}, + "namedScoresCount": {}, + "cost": 0.004827 + } + }, + { + "raw": "You are an expert at creating simple, clear ASCII art diagrams that capture the essence of complex ideas.\n\nGiven this transcript, create a napkin-style diagram that:\n- Uses ASCII art to visualize the main concept or metaphor\n- Keeps it simple and clear (max 15 lines, 60 chars wide)\n- Captures the core relationship or transformation described\n- Uses arrows, boxes, or simple shapes to show connections\n\nAlso provide:\n- A short title (3-5 words)\n- A one-sentence description of what the diagram shows\n\nFormat your response exactly like this:\nTITLE: [Your title here]\nDESCRIPTION: [Your one-sentence description here]\nDIAGRAM:\n[Your ASCII art diagram here]\n\nTranscript:\n{{transcript}}", + "label": "prompts/diagram.txt: You are an expert at creating simple, clear ASCII art diagrams that capture the essence of complex ideas.\n\nGiven this transcript, create a napkin-style diagram that:\n- Uses ASCII art to visualize the main concept or metaphor\n- Keeps it simple and clear (max 15 lines, 60 chars wide)\n- Captures the core relationship or transformation described\n- Uses arrows, boxes, or simple shapes to show connections\n\nAlso provide:\n- A short title (3-5 words)\n- A one-sentence description of what the diagram shows\n\nFormat your response exactly like this:\nTITLE: [Your title here]\nDESCRIPTION: [Your one-sentence description here]\nDIAGRAM:\n[Your ASCII art diagram here]\n\nTranscript:\n{{transcript}}", + "id": "65291442696fd531edc49e1f267d27746dad991408b8fec9496eeee51e6898fe", + "provider": "anthropic:claude-3-5-sonnet-20241022", + "metrics": { + "score": 2, + "testPassCount": 2, + "testFailCount": 0, + "testErrorCount": 0, + "assertPassCount": 0, + "assertFailCount": 0, + "totalLatencyMs": 11443, + "tokenUsage": { + "total": 1439, + "prompt": 1040, + "completion": 399, + "cached": 0, + "numRequests": 2, + "completionDetails": { + "reasoning": 0, + "acceptedPrediction": 0, + "rejectedPrediction": 0 + }, + "assertions": { + "total": 0, + "prompt": 0, + "completion": 0, + "cached": 0 + } + }, + "namedScores": {}, + "namedScoresCount": {}, + "cost": 0.009105 + } + } + ], + "results": [ + { + "cost": 0.002061, + "gradingResult": { + "pass": true, + "score": 1, + "reason": "No assertions", + "tokensUsed": { + "total": 0, + "prompt": 0, + "completion": 0, + "cached": 0 + }, + "assertion": null + }, + "id": "3a938ff4-48e5-48d8-a436-1dc715670443", + "latencyMs": 2396, + "namedScores": {}, + "prompt": { + "raw": "You are an expert at distilling rambling stream-of-consciousness transcripts into ultra-concise bullet points.\n\nGiven this transcript, create 3-6 bullet points that:\n- Capture ONLY the core insight or main metaphor\n- Each bullet is EXTREMELY concise (under 60 characters)\n- Use punchy, memorable phrasing\n- Include emoji or symbols when they add clarity\n- Focus on the \"aha moment\" not the journey to get there\n\nExample output style:\n• AI coding ≠ Managing Junior Devs\n• It's like a magic paintbrush from a glitchy video game\n• Powerful but maddeningly unintuitive\n• Requires learning alien logic, not people skills\n\nTranscript:\nSo we've been working on this product for six months now and I'm starting to think we need to pivot. The user feedback has been pretty mixed. Some people love the core idea but they're frustrated with the implementation. Others don't really get what we're trying to do at all. I keep thinking about what Paul Graham said about making something people want. Are we making something people want? I don't know. The metrics aren't terrible but they're not great either. We have about 500 users, maybe 50 are really active. That's not nothing but it's not product-market fit either. I wonder if we should double down on the power users or try to simplify for the masses. Or maybe we need to completely rethink this. The team is getting tired. We're burning through our runway. I estimate we have maybe 4 months left at current burn rate. We could extend that if we let someone go but I really don't want to do that. Everyone's been working so hard. Maybe we need to talk to more users. Or maybe we just need to ship faster and iterate. I don't know. This is hard.\n\nPlease provide only the bullet points, no introduction or conclusion.", + "label": "prompts/summarize.txt: You are an expert at distilling rambling stream-of-consciousness transcripts into ultra-concise bullet points.\n\nGiven this transcript, create 3-6 bullet points that:\n- Capture ONLY the core insight or main metaphor\n- Each bullet is EXTREMELY concise (under 60 characters)\n- Use punchy, memorable phrasing\n- Include emoji or symbols when they add clarity\n- Focus on the \"aha moment\" not the journey to get there\n\nExample output style:\n• AI coding ≠ Managing Junior Devs\n• It's like a magic paintbrush from a glitchy video game\n• Powerful but maddeningly unintuitive\n• Requires learning alien logic, not people skills\n\nTranscript:\n{{transcript}}\n\nPlease provide only the bullet points, no introduction or conclusion." + }, + "promptId": "efcf288f2092c8aefbaad48bce009c6eac30c2f2fbe4cc4efde8d8bb13ea218d", + "promptIdx": 0, + "provider": { + "id": "anthropic:claude-3-5-sonnet-20241022", + "label": "" + }, + "response": { + "output": "• 📉 500 users ≠ Product-Market Fit\n• 🎯 Caught between power users and mass appeal\n• ⏳ 4 months of runway left to find answers", + "tokenUsage": { + "total": 479, + "prompt": 427, + "completion": 52 + }, + "cost": 0.002061 + }, + "score": 1, + "success": true, + "testCase": { + "description": "Startup pivot decision", + "vars": { + "transcript": "So we've been working on this product for six months now and I'm starting to think we need to pivot. The user feedback has been pretty mixed. Some people love the core idea but they're frustrated with the implementation. Others don't really get what we're trying to do at all. I keep thinking about what Paul Graham said about making something people want. Are we making something people want? I don't know. The metrics aren't terrible but they're not great either. We have about 500 users, maybe 50 are really active. That's not nothing but it's not product-market fit either. I wonder if we should double down on the power users or try to simplify for the masses. Or maybe we need to completely rethink this. The team is getting tired. We're burning through our runway. I estimate we have maybe 4 months left at current burn rate. We could extend that if we let someone go but I really don't want to do that. Everyone's been working so hard. Maybe we need to talk to more users. Or maybe we just need to ship faster and iterate. I don't know. This is hard." + }, + "assert": [], + "options": {}, + "metadata": {} + }, + "testIdx": 1, + "vars": { + "transcript": "So we've been working on this product for six months now and I'm starting to think we need to pivot. The user feedback has been pretty mixed. Some people love the core idea but they're frustrated with the implementation. Others don't really get what we're trying to do at all. I keep thinking about what Paul Graham said about making something people want. Are we making something people want? I don't know. The metrics aren't terrible but they're not great either. We have about 500 users, maybe 50 are really active. That's not nothing but it's not product-market fit either. I wonder if we should double down on the power users or try to simplify for the masses. Or maybe we need to completely rethink this. The team is getting tired. We're burning through our runway. I estimate we have maybe 4 months left at current burn rate. We could extend that if we let someone go but I really don't want to do that. Everyone's been working so hard. Maybe we need to talk to more users. Or maybe we just need to ship faster and iterate. I don't know. This is hard." + }, + "metadata": { + "_promptfooFileMetadata": {} + }, + "failureReason": 0 + }, + { + "cost": 0.0027660000000000002, + "gradingResult": { + "pass": true, + "score": 1, + "reason": "No assertions", + "tokensUsed": { + "total": 0, + "prompt": 0, + "completion": 0, + "cached": 0 + }, + "assertion": null + }, + "id": "44f6f531-df91-44a9-a088-dce20f8dd272", + "latencyMs": 3099, + "namedScores": {}, + "prompt": { + "raw": "You are an expert at distilling rambling stream-of-consciousness transcripts into ultra-concise bullet points.\n\nGiven this transcript, create 3-6 bullet points that:\n- Capture ONLY the core insight or main metaphor\n- Each bullet is EXTREMELY concise (under 60 characters)\n- Use punchy, memorable phrasing\n- Include emoji or symbols when they add clarity\n- Focus on the \"aha moment\" not the journey to get there\n\nExample output style:\n• AI coding ≠ Managing Junior Devs\n• It's like a magic paintbrush from a glitchy video game\n• Powerful but maddeningly unintuitive\n• Requires learning alien logic, not people skills\n\nTranscript:\nOkay. So I am thinking about an idea I had for an article which is I wanna talk about how what coding With Agentic coding assistance like Claude Code, and Cursor, it feels like. And I've heard it described being a manager. You're gonna be a manager. It's like being a manager of humans. You've got all these AI agents. They're just like little humans that know, human engineers, and you just have to tell them what to do, and they'll magically write code for you and then maybe they do it wrong, and you just have to give them feedback, and then it'll work. But I don't really think that metaphor is super accurate. And but I've been trying to think of what is a better metaphor. We're like, what is this like? And, you know, is it using a machine? Is it using a is it coding with a broom or something? Or is it coding with a know, is it building with things with bricks? Or with, Construction equipment For what? And I think the best example I have come across is I was thinking of This video game I used to play, for, like, the Nintendo 64 maybe, that was your Mickey Mouse and you have a magic paintbrush. You can point this magic paintbrush of things in your black and white world, and it transforms them into colorful, amazing, magical things. But the thing about this video game is that it was also really confusing, and the UI was terrible, and it was just So this paintbrush that was so magical and powerful was extremely difficult to use. And oftentimes, wouldn't work at all because I didn't know how to Like, where to point it or how to use it or what to do with it. And so I think that is the metaphor I would say is it's like having a very difficult to use paintbrush magic paintbrush that if you use it perfectly, You can paint. Way faster then you can paint with a regular paintbrush. But how you use it is not the same as how you ask a human to do something for you.\n\nPlease provide only the bullet points, no introduction or conclusion.", + "label": "prompts/summarize.txt: You are an expert at distilling rambling stream-of-consciousness transcripts into ultra-concise bullet points.\n\nGiven this transcript, create 3-6 bullet points that:\n- Capture ONLY the core insight or main metaphor\n- Each bullet is EXTREMELY concise (under 60 characters)\n- Use punchy, memorable phrasing\n- Include emoji or symbols when they add clarity\n- Focus on the \"aha moment\" not the journey to get there\n\nExample output style:\n• AI coding ≠ Managing Junior Devs\n• It's like a magic paintbrush from a glitchy video game\n• Powerful but maddeningly unintuitive\n• Requires learning alien logic, not people skills\n\nTranscript:\n{{transcript}}\n\nPlease provide only the bullet points, no introduction or conclusion." + }, + "promptId": "efcf288f2092c8aefbaad48bce009c6eac30c2f2fbe4cc4efde8d8bb13ea218d", + "promptIdx": 0, + "provider": { + "id": "anthropic:claude-3-5-sonnet-20241022", + "label": "" + }, + "response": { + "output": "• 🎨 AI coding = glitchy magic paintbrush\n• Not managing humans, more like wielding magic\n• Powerful but frustratingly hard to control\n• ✨ Right spell = fast results, wrong spell = nothing", + "tokenUsage": { + "total": 694, + "prompt": 637, + "completion": 57 + }, + "cost": 0.0027660000000000002 + }, + "score": 1, + "success": true, + "testCase": { + "description": "AI coding metaphor summary", + "vars": { + "transcript": "Okay. So I am thinking about an idea I had for an article which is I wanna talk about how what coding With Agentic coding assistance like Claude Code, and Cursor, it feels like. And I've heard it described being a manager. You're gonna be a manager. It's like being a manager of humans. You've got all these AI agents. They're just like little humans that know, human engineers, and you just have to tell them what to do, and they'll magically write code for you and then maybe they do it wrong, and you just have to give them feedback, and then it'll work. But I don't really think that metaphor is super accurate. And but I've been trying to think of what is a better metaphor. We're like, what is this like? And, you know, is it using a machine? Is it using a is it coding with a broom or something? Or is it coding with a know, is it building with things with bricks? Or with, Construction equipment For what? And I think the best example I have come across is I was thinking of This video game I used to play, for, like, the Nintendo 64 maybe, that was your Mickey Mouse and you have a magic paintbrush. You can point this magic paintbrush of things in your black and white world, and it transforms them into colorful, amazing, magical things. But the thing about this video game is that it was also really confusing, and the UI was terrible, and it was just So this paintbrush that was so magical and powerful was extremely difficult to use. And oftentimes, wouldn't work at all because I didn't know how to Like, where to point it or how to use it or what to do with it. And so I think that is the metaphor I would say is it's like having a very difficult to use paintbrush magic paintbrush that if you use it perfectly, You can paint. Way faster then you can paint with a regular paintbrush. But how you use it is not the same as how you ask a human to do something for you." + }, + "assert": [], + "options": {}, + "metadata": {} + }, + "testIdx": 0, + "vars": { + "transcript": "Okay. So I am thinking about an idea I had for an article which is I wanna talk about how what coding With Agentic coding assistance like Claude Code, and Cursor, it feels like. And I've heard it described being a manager. You're gonna be a manager. It's like being a manager of humans. You've got all these AI agents. They're just like little humans that know, human engineers, and you just have to tell them what to do, and they'll magically write code for you and then maybe they do it wrong, and you just have to give them feedback, and then it'll work. But I don't really think that metaphor is super accurate. And but I've been trying to think of what is a better metaphor. We're like, what is this like? And, you know, is it using a machine? Is it using a is it coding with a broom or something? Or is it coding with a know, is it building with things with bricks? Or with, Construction equipment For what? And I think the best example I have come across is I was thinking of This video game I used to play, for, like, the Nintendo 64 maybe, that was your Mickey Mouse and you have a magic paintbrush. You can point this magic paintbrush of things in your black and white world, and it transforms them into colorful, amazing, magical things. But the thing about this video game is that it was also really confusing, and the UI was terrible, and it was just So this paintbrush that was so magical and powerful was extremely difficult to use. And oftentimes, wouldn't work at all because I didn't know how to Like, where to point it or how to use it or what to do with it. And so I think that is the metaphor I would say is it's like having a very difficult to use paintbrush magic paintbrush that if you use it perfectly, You can paint. Way faster then you can paint with a regular paintbrush. But how you use it is not the same as how you ask a human to do something for you." + }, + "metadata": { + "_promptfooFileMetadata": {} + }, + "failureReason": 0 + }, + { + "cost": 0.00408, + "gradingResult": { + "pass": true, + "score": 1, + "reason": "No assertions", + "tokensUsed": { + "total": 0, + "prompt": 0, + "completion": 0, + "cached": 0 + }, + "assertion": null + }, + "id": "8f172c9e-8af4-4936-a567-5ef6b8db8f96", + "latencyMs": 5502, + "namedScores": {}, + "prompt": { + "raw": "You are an expert at creating simple, clear ASCII art diagrams that capture the essence of complex ideas.\n\nGiven this transcript, create a napkin-style diagram that:\n- Uses ASCII art to visualize the main concept or metaphor\n- Keeps it simple and clear (max 15 lines, 60 chars wide)\n- Captures the core relationship or transformation described\n- Uses arrows, boxes, or simple shapes to show connections\n\nAlso provide:\n- A short title (3-5 words)\n- A one-sentence description of what the diagram shows\n\nFormat your response exactly like this:\nTITLE: [Your title here]\nDESCRIPTION: [Your one-sentence description here]\nDIAGRAM:\n[Your ASCII art diagram here]\n\nTranscript:\nOkay. So I am thinking about an idea I had for an article which is I wanna talk about how what coding With Agentic coding assistance like Claude Code, and Cursor, it feels like. And I've heard it described being a manager. You're gonna be a manager. It's like being a manager of humans. You've got all these AI agents. They're just like little humans that know, human engineers, and you just have to tell them what to do, and they'll magically write code for you and then maybe they do it wrong, and you just have to give them feedback, and then it'll work. But I don't really think that metaphor is super accurate. And but I've been trying to think of what is a better metaphor. We're like, what is this like? And, you know, is it using a machine? Is it using a is it coding with a broom or something? Or is it coding with a know, is it building with things with bricks? Or with, Construction equipment For what? And I think the best example I have come across is I was thinking of This video game I used to play, for, like, the Nintendo 64 maybe, that was your Mickey Mouse and you have a magic paintbrush. You can point this magic paintbrush of things in your black and white world, and it transforms them into colorful, amazing, magical things. But the thing about this video game is that it was also really confusing, and the UI was terrible, and it was just So this paintbrush that was so magical and powerful was extremely difficult to use. And oftentimes, wouldn't work at all because I didn't know how to Like, where to point it or how to use it or what to do with it. And so I think that is the metaphor I would say is it's like having a very difficult to use paintbrush magic paintbrush that if you use it perfectly, You can paint. Way faster then you can paint with a regular paintbrush. But how you use it is not the same as how you ask a human to do something for you.", + "label": "prompts/diagram.txt: You are an expert at creating simple, clear ASCII art diagrams that capture the essence of complex ideas.\n\nGiven this transcript, create a napkin-style diagram that:\n- Uses ASCII art to visualize the main concept or metaphor\n- Keeps it simple and clear (max 15 lines, 60 chars wide)\n- Captures the core relationship or transformation described\n- Uses arrows, boxes, or simple shapes to show connections\n\nAlso provide:\n- A short title (3-5 words)\n- A one-sentence description of what the diagram shows\n\nFormat your response exactly like this:\nTITLE: [Your title here]\nDESCRIPTION: [Your one-sentence description here]\nDIAGRAM:\n[Your ASCII art diagram here]\n\nTranscript:\n{{transcript}}" + }, + "promptId": "65291442696fd531edc49e1f267d27746dad991408b8fec9496eeee51e6898fe", + "promptIdx": 1, + "provider": { + "id": "anthropic:claude-3-5-sonnet-20241022", + "label": "" + }, + "response": { + "output": "TITLE: The Magic Coding Brush\nDESCRIPTION: Shows how AI coding assistants are like a powerful but tricky-to-master magic paintbrush, transforming basic input into enhanced output.\n\nDIAGRAM:\n```\n Magic AI Brush\n ___/\\___\nUser --> | ?? | --> Enhanced Code\nInput | | Output\n |(...!)|\n ‾\\____/‾\n ^ ^\n | |\n Simple, Powerful but\n Direct Requires\n Commands Skill & Practice\n\n [Black & White] => [Full Color]\n```", + "tokenUsage": { + "total": 772, + "prompt": 625, + "completion": 147 + }, + "cost": 0.00408 + }, + "score": 1, + "success": true, + "testCase": { + "description": "AI coding metaphor summary", + "vars": { + "transcript": "Okay. So I am thinking about an idea I had for an article which is I wanna talk about how what coding With Agentic coding assistance like Claude Code, and Cursor, it feels like. And I've heard it described being a manager. You're gonna be a manager. It's like being a manager of humans. You've got all these AI agents. They're just like little humans that know, human engineers, and you just have to tell them what to do, and they'll magically write code for you and then maybe they do it wrong, and you just have to give them feedback, and then it'll work. But I don't really think that metaphor is super accurate. And but I've been trying to think of what is a better metaphor. We're like, what is this like? And, you know, is it using a machine? Is it using a is it coding with a broom or something? Or is it coding with a know, is it building with things with bricks? Or with, Construction equipment For what? And I think the best example I have come across is I was thinking of This video game I used to play, for, like, the Nintendo 64 maybe, that was your Mickey Mouse and you have a magic paintbrush. You can point this magic paintbrush of things in your black and white world, and it transforms them into colorful, amazing, magical things. But the thing about this video game is that it was also really confusing, and the UI was terrible, and it was just So this paintbrush that was so magical and powerful was extremely difficult to use. And oftentimes, wouldn't work at all because I didn't know how to Like, where to point it or how to use it or what to do with it. And so I think that is the metaphor I would say is it's like having a very difficult to use paintbrush magic paintbrush that if you use it perfectly, You can paint. Way faster then you can paint with a regular paintbrush. But how you use it is not the same as how you ask a human to do something for you." + }, + "assert": [], + "options": {}, + "metadata": {} + }, + "testIdx": 0, + "vars": { + "transcript": "Okay. So I am thinking about an idea I had for an article which is I wanna talk about how what coding With Agentic coding assistance like Claude Code, and Cursor, it feels like. And I've heard it described being a manager. You're gonna be a manager. It's like being a manager of humans. You've got all these AI agents. They're just like little humans that know, human engineers, and you just have to tell them what to do, and they'll magically write code for you and then maybe they do it wrong, and you just have to give them feedback, and then it'll work. But I don't really think that metaphor is super accurate. And but I've been trying to think of what is a better metaphor. We're like, what is this like? And, you know, is it using a machine? Is it using a is it coding with a broom or something? Or is it coding with a know, is it building with things with bricks? Or with, Construction equipment For what? And I think the best example I have come across is I was thinking of This video game I used to play, for, like, the Nintendo 64 maybe, that was your Mickey Mouse and you have a magic paintbrush. You can point this magic paintbrush of things in your black and white world, and it transforms them into colorful, amazing, magical things. But the thing about this video game is that it was also really confusing, and the UI was terrible, and it was just So this paintbrush that was so magical and powerful was extremely difficult to use. And oftentimes, wouldn't work at all because I didn't know how to Like, where to point it or how to use it or what to do with it. And so I think that is the metaphor I would say is it's like having a very difficult to use paintbrush magic paintbrush that if you use it perfectly, You can paint. Way faster then you can paint with a regular paintbrush. But how you use it is not the same as how you ask a human to do something for you." + }, + "metadata": { + "_promptfooFileMetadata": {} + }, + "failureReason": 0 + }, + { + "cost": 0.005025, + "gradingResult": { + "pass": true, + "score": 1, + "reason": "No assertions", + "tokensUsed": { + "total": 0, + "prompt": 0, + "completion": 0, + "cached": 0 + }, + "assertion": null + }, + "id": "8e7135a6-b4b4-496c-889c-422672606ec9", + "latencyMs": 5941, + "namedScores": {}, + "prompt": { + "raw": "You are an expert at creating simple, clear ASCII art diagrams that capture the essence of complex ideas.\n\nGiven this transcript, create a napkin-style diagram that:\n- Uses ASCII art to visualize the main concept or metaphor\n- Keeps it simple and clear (max 15 lines, 60 chars wide)\n- Captures the core relationship or transformation described\n- Uses arrows, boxes, or simple shapes to show connections\n\nAlso provide:\n- A short title (3-5 words)\n- A one-sentence description of what the diagram shows\n\nFormat your response exactly like this:\nTITLE: [Your title here]\nDESCRIPTION: [Your one-sentence description here]\nDIAGRAM:\n[Your ASCII art diagram here]\n\nTranscript:\nSo we've been working on this product for six months now and I'm starting to think we need to pivot. The user feedback has been pretty mixed. Some people love the core idea but they're frustrated with the implementation. Others don't really get what we're trying to do at all. I keep thinking about what Paul Graham said about making something people want. Are we making something people want? I don't know. The metrics aren't terrible but they're not great either. We have about 500 users, maybe 50 are really active. That's not nothing but it's not product-market fit either. I wonder if we should double down on the power users or try to simplify for the masses. Or maybe we need to completely rethink this. The team is getting tired. We're burning through our runway. I estimate we have maybe 4 months left at current burn rate. We could extend that if we let someone go but I really don't want to do that. Everyone's been working so hard. Maybe we need to talk to more users. Or maybe we just need to ship faster and iterate. I don't know. This is hard.", + "label": "prompts/diagram.txt: You are an expert at creating simple, clear ASCII art diagrams that capture the essence of complex ideas.\n\nGiven this transcript, create a napkin-style diagram that:\n- Uses ASCII art to visualize the main concept or metaphor\n- Keeps it simple and clear (max 15 lines, 60 chars wide)\n- Captures the core relationship or transformation described\n- Uses arrows, boxes, or simple shapes to show connections\n\nAlso provide:\n- A short title (3-5 words)\n- A one-sentence description of what the diagram shows\n\nFormat your response exactly like this:\nTITLE: [Your title here]\nDESCRIPTION: [Your one-sentence description here]\nDIAGRAM:\n[Your ASCII art diagram here]\n\nTranscript:\n{{transcript}}" + }, + "promptId": "65291442696fd531edc49e1f267d27746dad991408b8fec9496eeee51e6898fe", + "promptIdx": 1, + "provider": { + "id": "anthropic:claude-3-5-sonnet-20241022", + "label": "" + }, + "response": { + "output": "TITLE: Product-Market Fit Journey\nDESCRIPTION: Shows the current state of a startup struggling between different paths while running out of time and resources.\n\nDIAGRAM:\n```\n RUNWAY LEFT: 4 months\n $$$$$>----------------->x\n \n 500 USERS DECISION POINT\n ┌─────┐ │\n │░░░░░│ ┌────┴────┐\n │░█░░░│───────>│ PIVOT? │\n │░░░░░│ └────┬────┘\n └─────┘ │\n (50 active) ╔════╧════╗\n ║ OPTIONS ║\n ╚═══╤════╝\n ┌───┴───┐\n ┌─────────┤PATHS ├─────────┐\n ▼ └───────┘ ▼\n Double down Simplify for\n on power users the masses\n```", + "tokenUsage": { + "total": 667, + "prompt": 415, + "completion": 252 + }, + "cost": 0.005025 + }, + "score": 1, + "success": true, + "testCase": { + "description": "Startup pivot decision", + "vars": { + "transcript": "So we've been working on this product for six months now and I'm starting to think we need to pivot. The user feedback has been pretty mixed. Some people love the core idea but they're frustrated with the implementation. Others don't really get what we're trying to do at all. I keep thinking about what Paul Graham said about making something people want. Are we making something people want? I don't know. The metrics aren't terrible but they're not great either. We have about 500 users, maybe 50 are really active. That's not nothing but it's not product-market fit either. I wonder if we should double down on the power users or try to simplify for the masses. Or maybe we need to completely rethink this. The team is getting tired. We're burning through our runway. I estimate we have maybe 4 months left at current burn rate. We could extend that if we let someone go but I really don't want to do that. Everyone's been working so hard. Maybe we need to talk to more users. Or maybe we just need to ship faster and iterate. I don't know. This is hard." + }, + "assert": [], + "options": {}, + "metadata": {} + }, + "testIdx": 1, + "vars": { + "transcript": "So we've been working on this product for six months now and I'm starting to think we need to pivot. The user feedback has been pretty mixed. Some people love the core idea but they're frustrated with the implementation. Others don't really get what we're trying to do at all. I keep thinking about what Paul Graham said about making something people want. Are we making something people want? I don't know. The metrics aren't terrible but they're not great either. We have about 500 users, maybe 50 are really active. That's not nothing but it's not product-market fit either. I wonder if we should double down on the power users or try to simplify for the masses. Or maybe we need to completely rethink this. The team is getting tired. We're burning through our runway. I estimate we have maybe 4 months left at current burn rate. We could extend that if we let someone go but I really don't want to do that. Everyone's been working so hard. Maybe we need to talk to more users. Or maybe we just need to ship faster and iterate. I don't know. This is hard." + }, + "metadata": { + "_promptfooFileMetadata": {} + }, + "failureReason": 0 + } + ], + "stats": { + "successes": 4, + "failures": 0, + "errors": 0, + "tokenUsage": { + "cached": 0, + "completion": 508, + "prompt": 2104, + "total": 2612, + "numRequests": 4, + "completionDetails": { + "reasoning": 0, + "acceptedPrediction": 0, + "rejectedPrediction": 0 + }, + "assertions": { + "total": 0, + "prompt": 0, + "completion": 0, + "cached": 0 + } + } + } + }, + "config": { + "description": "Funnel API prompt testing", + "prompts": [ + "file://./prompts/summarize.txt", + "file://./prompts/diagram.txt" + ], + "providers": [ + "anthropic:claude-3-5-sonnet-20241022" + ], + "tests": [ + { + "description": "AI coding metaphor summary", + "vars": { + "transcript": "Okay. So I am thinking about an idea I had for an article which is I wanna talk about how what coding With Agentic coding assistance like Claude Code, and Cursor, it feels like. And I've heard it described being a manager. You're gonna be a manager. It's like being a manager of humans. You've got all these AI agents. They're just like little humans that know, human engineers, and you just have to tell them what to do, and they'll magically write code for you and then maybe they do it wrong, and you just have to give them feedback, and then it'll work. But I don't really think that metaphor is super accurate. And but I've been trying to think of what is a better metaphor. We're like, what is this like? And, you know, is it using a machine? Is it using a is it coding with a broom or something? Or is it coding with a know, is it building with things with bricks? Or with, Construction equipment For what? And I think the best example I have come across is I was thinking of This video game I used to play, for, like, the Nintendo 64 maybe, that was your Mickey Mouse and you have a magic paintbrush. You can point this magic paintbrush of things in your black and white world, and it transforms them into colorful, amazing, magical things. But the thing about this video game is that it was also really confusing, and the UI was terrible, and it was just So this paintbrush that was so magical and powerful was extremely difficult to use. And oftentimes, wouldn't work at all because I didn't know how to Like, where to point it or how to use it or what to do with it. And so I think that is the metaphor I would say is it's like having a very difficult to use paintbrush magic paintbrush that if you use it perfectly, You can paint. Way faster then you can paint with a regular paintbrush. But how you use it is not the same as how you ask a human to do something for you.\n" + } + }, + { + "description": "Startup pivot decision", + "vars": { + "transcript": "So we've been working on this product for six months now and I'm starting to think we need to pivot. The user feedback has been pretty mixed. Some people love the core idea but they're frustrated with the implementation. Others don't really get what we're trying to do at all. I keep thinking about what Paul Graham said about making something people want. Are we making something people want? I don't know. The metrics aren't terrible but they're not great either. We have about 500 users, maybe 50 are really active. That's not nothing but it's not product-market fit either. I wonder if we should double down on the power users or try to simplify for the masses. Or maybe we need to completely rethink this. The team is getting tired. We're burning through our runway. I estimate we have maybe 4 months left at current burn rate. We could extend that if we let someone go but I really don't want to do that. Everyone's been working so hard. Maybe we need to talk to more users. Or maybe we just need to ship faster and iterate. I don't know. This is hard.\n" + } + } + ], + "sharing": true, + "outputPath": [ + "results.json" + ], + "extensions": [] + }, + "shareableUrl": null +} diff --git a/server/promptfoo/test-cases/edge-cases.yaml b/server/promptfoo/test-cases/edge-cases.yaml new file mode 100644 index 0000000..5694e2b --- /dev/null +++ b/server/promptfoo/test-cases/edge-cases.yaml @@ -0,0 +1,85 @@ +# Edge case tests for new prompts + +tests: + # Empty recording + - description: "Empty transcript" + vars: + transcript: "" + assert: + - type: not-empty + metric: "Handles empty input gracefully" + + # Very short (2-3 words) + - description: "Ultra short transcript" + vars: + transcript: "Buy milk" + assert: + - type: not-contains + value: "##" + metric: "No headers for very short text" + - type: contains + value: "Buy milk" + metric: "Preserves exact text" + + # All filler words + - description: "Transcript with many filler words" + vars: + transcript: "Um, so, like, you know, I was thinking, uh, that we should, um, maybe, like, consider, you know, implementing the new feature" + assert: + - type: not-contains + value: "um" + metric: "Removes 'um'" + - type: not-contains + value: "uh" + metric: "Removes 'uh'" + - type: contains + value: "implementing the new feature" + metric: "Preserves meaningful content" + + # Multiple topic changes + - description: "Transcript with clear topic changes" + vars: + transcript: | + Let me talk about the user interface first. We need to make sure the buttons are accessible and the colors have good contrast. The current design is too cluttered. + + Now switching to the backend architecture. I think we should use microservices instead of a monolith. This will help with scaling and deployment. + + Finally, about the timeline. We're looking at about 3 months for the MVP if we have 2 developers full time. + assert: + - type: javascript + value: | + const headers = output.match(/##[^#\n]+/g) || []; + headers.length >= 3 + metric: "Creates headers for each topic" + + # Personal reflection (for questions) + - description: "Personal career reflection" + vars: + transcript: | + I'm at a crossroads in my career. I've been a software engineer for 10 years and I'm good at it, but I don't feel passionate about it anymore. I keep thinking about teaching. Maybe I could teach computer science at a community college or do coding bootcamps. The pay would be less but I think I'd be happier. My wife is supportive but we have a mortgage and two kids. I'm 35 now, is it too late to change careers? + assert: + - type: javascript + value: | + // Should ask about deeper motivations, not just surface level + const hasDepthQuestions = output.includes("fulfill") || output.includes("passion") || output.includes("impact") || output.includes("meaning"); + const hasPracticalQuestions = output.includes("transition") || output.includes("financial") || output.includes("family"); + hasDepthQuestions && hasPracticalQuestions + metric: "Questions balance emotional and practical aspects" + + # Technical jargon preservation + - description: "Technical transcript with specific terms" + vars: + transcript: "We need to implement OAuth2 with PKCE flow for the mobile app. The JWT tokens should have a 15 minute expiry and we'll use refresh tokens stored in the keychain." + assert: + - type: contains + value: "OAuth2" + metric: "Preserves OAuth2" + - type: contains + value: "PKCE" + metric: "Preserves PKCE" + - type: contains + value: "JWT" + metric: "Preserves JWT" + - type: contains + value: "15 minute" + metric: "Preserves specific numbers" diff --git a/server/prompts/bullet-summary-prompt.ts b/server/prompts/bullet-summary-prompt.ts new file mode 100644 index 0000000..ea1737c --- /dev/null +++ b/server/prompts/bullet-summary-prompt.ts @@ -0,0 +1,33 @@ +export const BULLET_SUMMARY_PROMPT = + `You are an AI assistant that creates concise, actionable bullet point summaries from voice transcripts. + +Given a transcript of someone's thoughts, create a bullet point summary that: +1. Captures the main ideas and key points +2. Identifies any action items or decisions made +3. Preserves important details while removing filler words +4. Organizes thoughts into logical groupings +5. Uses clear, concise language + +Special cases: +- If the transcript is empty or contains no words, return a single bullet: "Ah, the recording is empty!" +- If the transcript is very short (just a few words), return those words as a single bullet point +- Even the shortest recording deserves at least one bullet point + +Format: +- Return a JSON array of bullet points as strings +- Each bullet should be a complete, standalone thought +- Start with the most important points +- Include action items with clear next steps +- Keep each bullet concise but informative + +Example output: +["Main idea or decision", "Key insight or observation", "Action: Specific next step", "Important detail to remember"]`; + +export function createBulletSummaryPrompt(transcript: string): string { + return `${BULLET_SUMMARY_PROMPT} + +Transcript: +${transcript} + +Create a bullet point summary:`; +} diff --git a/server/prompts/edited-transcript-prompt.ts b/server/prompts/edited-transcript-prompt.ts new file mode 100644 index 0000000..edf8b83 --- /dev/null +++ b/server/prompts/edited-transcript-prompt.ts @@ -0,0 +1,34 @@ +export const EDITED_TRANSCRIPT_PROMPT = + `You are an AI assistant that lightly edits voice transcripts to make them more readable while preserving the speaker's voice and ideas. + +Given a raw transcript, create a lightly edited version that: +1. Adds markdown formatting (headers, bold, lists) to organize thoughts +2. Fixes grammar and punctuation errors +3. Removes excessive filler words (um, uh, like) but keeps some for natural flow +4. Groups related ideas under descriptive headers +5. Preserves the speaker's tone and personality +6. Makes the text scannable and easy to read + +Formatting guidelines: +- Use ## for main topic headers +- Use ### for subtopic headers +- Use **bold** for emphasis on key terms +- Use numbered or bulleted lists for action items or multiple points +- Keep paragraphs short and focused +- Add line breaks between sections for readability + +Special cases: +- If the transcript is empty, return: "## Empty Recording\\n\\nAh, the recording is empty!" +- If the transcript is very short, add a "## Quick Note" or "## Quick Reminder" header +- Always add at least one header to structure the content + +The goal is to make the transcript feel like a well-organized note that the speaker might have written themselves, not a completely rewritten document.`; + +export function createEditedTranscriptPrompt(transcript: string): string { + return `${EDITED_TRANSCRIPT_PROMPT} + +Raw transcript: +${transcript} + +Create a lightly edited version with markdown formatting:`; +} diff --git a/server/scripts/update-prompts.ts b/server/scripts/update-prompts.ts new file mode 100644 index 0000000..b5788c9 --- /dev/null +++ b/server/scripts/update-prompts.ts @@ -0,0 +1,57 @@ +#!/usr/bin/env -S deno run --allow-read --allow-write + +/** + * Script to optionally update prompt files to use improved versions + * Run with --apply flag to actually make changes + */ + +const apply = Deno.args.includes("--apply"); + +async function updatePrompts() { + console.log("Prompt Update Tool"); + console.log("==================\n"); + + if (!apply) { + console.log("Running in preview mode. Use --apply to make changes.\n"); + } + + // Check if improved prompt exists + const improvedPromptPath = "./lib/prompts/summarize-prompt-improved.txt"; + const currentPromptPath = "./lib/prompts/summarize-prompt.txt"; + + try { + const improvedContent = await Deno.readTextFile(improvedPromptPath); + const currentContent = await Deno.readTextFile(currentPromptPath); + + console.log("Current summarize prompt:"); + console.log("-".repeat(40)); + console.log(currentContent.substring(0, 200) + "..."); + console.log("\nImproved summarize prompt:"); + console.log("-".repeat(40)); + console.log(improvedContent.substring(0, 200) + "..."); + + if (apply) { + // Backup current prompt + await Deno.writeTextFile(currentPromptPath + ".backup", currentContent); + console.log(`\n✓ Created backup at ${currentPromptPath}.backup`); + + // You can uncomment this to replace the current prompt with improved version + // await Deno.writeTextFile(currentPromptPath, improvedContent); + // console.log(`✓ Updated ${currentPromptPath} with improved version`); + + console.log( + "\nNote: The improved prompt focuses more on actionable insights and handles edge cases better.", + ); + console.log( + "The current prompt is more focused on ultra-concise summaries with emojis.", + ); + console.log("Choose based on your users' preferences."); + } else { + console.log("\n→ Run with --apply to update the prompts"); + } + } catch (error) { + console.error("Error:", error.message); + } +} + +await updatePrompts(); diff --git a/server/test-data/transcript-examples.json b/server/test-data/transcript-examples.json new file mode 100644 index 0000000..ee976a5 --- /dev/null +++ b/server/test-data/transcript-examples.json @@ -0,0 +1,91 @@ +{ + "testCases": [ + { + "id": "rambling-idea", + "description": "Typical rambling stream of consciousness about a startup idea", + "transcript": "So I've been thinking about this idea for like the past week and it's been keeping me up at night. What if we created an app that helps people, you know, actually stick to their habits? I know there's a million habit trackers out there but hear me out. The problem with all of them is that they're too complicated or they make you feel guilty when you miss a day. So what if instead of tracking streaks, we focused on, um, momentum? Like, the app would understand that life happens and missing a day doesn't mean you failed. It would be more like a supportive friend rather than a strict teacher. Oh and another thing - it could use AI to suggest the best time to do your habit based on your calendar and past behavior. Like if you usually go for runs in the morning but you have an early meeting, it could suggest doing it at lunch instead. And maybe it could even connect with your friends so you could have accountability buddies but in a fun way, not a judgmental way. I don't know, maybe this is stupid but I really think there's something here.", + "expectedBulletSummary": [ + "Create a habit tracking app focused on momentum rather than streaks", + "Core differentiator: Acts as a supportive friend vs strict teacher", + "Doesn't guilt users for missing days - understands that life happens", + "AI-powered scheduling: Suggests optimal times based on calendar and past behavior", + "Social accountability features with friends, but designed to be encouraging not judgmental", + "Problem it solves: Current habit trackers are too complicated or guilt-inducing" + ], + "expectedEditedTranscript": "## Habit Tracking App Idea\n\nI've been thinking about this idea for the past week and it's been keeping me up at night.\n\n### The Problem\n\nWhat if we created an app that helps people actually stick to their habits? I know there's a million habit trackers out there, but hear me out. The problem with all of them is that they're too complicated or they make you feel guilty when you miss a day.\n\n### The Solution: Focus on Momentum\n\nWhat if instead of tracking streaks, we focused on momentum? The app would understand that life happens and missing a day doesn't mean you failed. It would be more like a supportive friend rather than a strict teacher.\n\n### Key Features\n\n**AI-Powered Scheduling**: It could use AI to suggest the best time to do your habit based on your calendar and past behavior. For example, if you usually go for runs in the morning but you have an early meeting, it could suggest doing it at lunch instead.\n\n**Social Accountability**: Maybe it could connect with your friends so you could have accountability buddies, but in a fun way, not a judgmental way.\n\nI really think there's something here." + }, + { + "id": "technical-brainstorm", + "description": "Technical brainstorming session about architecture", + "transcript": "Okay so I'm trying to figure out the best way to architect this new feature. We need real-time updates but I'm not sure if we should use websockets or just poll every few seconds. Websockets would be better for performance but they're harder to implement and we'd need to handle reconnection logic and all that stuff. Actually wait, what about using Server-Sent Events? That might be a good middle ground. Oh but then we'd need to think about scalability. If we have thousands of users all connected at once, that could get expensive. Maybe we should start with polling and then upgrade to websockets later if we need to? But then we'd be building technical debt from the start. Ugh, architectural decisions are hard. Actually, you know what, let me think about this differently. What's the actual user requirement here? They need updates within, let's say, 5 seconds. So polling every 3 seconds would probably be fine for MVP. We can always optimize later.", + "expectedBulletSummary": [ + "Need to implement real-time updates for new feature", + "Considering three options: WebSockets, polling, or Server-Sent Events", + "WebSockets: Better performance but complex implementation and reconnection logic", + "Server-Sent Events: Potential middle ground solution", + "Scalability concern: Thousands of concurrent connections could be expensive", + "User requirement: Updates needed within 5 seconds", + "MVP recommendation: Start with 3-second polling, optimize later if needed" + ], + "expectedEditedTranscript": "## Architecture Decision: Real-Time Updates\n\nI'm trying to figure out the best way to architect this new feature. We need real-time updates, but I'm not sure if we should use WebSockets or just poll every few seconds.\n\n### Option 1: WebSockets\nWebSockets would be better for performance, but they're harder to implement and we'd need to handle reconnection logic and all that complexity.\n\n### Option 2: Server-Sent Events\nWhat about using Server-Sent Events? That might be a good middle ground.\n\n### Scalability Concerns\nWe need to think about scalability. If we have thousands of users all connected at once, that could get expensive.\n\n### Pragmatic Approach\nMaybe we should start with polling and then upgrade to WebSockets later if we need to? But then we'd be building technical debt from the start.\n\n### Final Decision\nLet me think about this differently. What's the actual user requirement here? They need updates within 5 seconds. So polling every 3 seconds would probably be fine for MVP. We can always optimize later." + }, + { + "id": "short-reminder", + "description": "Very short recording with just a quick reminder", + "transcript": "Remember to email Sarah about the design mockups", + "expectedBulletSummary": [ + "Email Sarah about the design mockups" + ], + "expectedEditedTranscript": "## Quick Reminder\n\nEmail Sarah about the design mockups." + }, + { + "id": "empty-recording", + "description": "Empty or silent recording", + "transcript": "", + "expectedBulletSummary": [ + "Ah, the recording is empty!" + ], + "expectedEditedTranscript": "## Empty Recording\n\nAh, the recording is empty!" + }, + { + "id": "meeting-notes", + "description": "Rambling meeting notes with action items", + "transcript": "Just got out of the product meeting and wow there's a lot to unpack. So first thing, James mentioned that the new onboarding flow is causing confusion. Users are dropping off at the third step where we ask for their preferences. I think maybe we're asking for too much information upfront? We should probably just get them into the app first and then gradually collect preferences. Oh and then Maria brought up that customer support is getting a lot of questions about the pricing. Apparently people don't understand the difference between the pro and enterprise tiers. We need to make that clearer on the pricing page. Action items - I need to redesign the onboarding flow, probably by next sprint. And someone needs to update the pricing page copy, I'll ask the marketing team about that. Oh also, almost forgot, we're pushing the launch date back by two weeks because engineering found some performance issues with the real-time sync. That's probably for the best honestly, gives us more time to polish things.", + "expectedBulletSummary": [ + "Onboarding flow causing confusion - users dropping at step 3 (preferences)", + "Recommendation: Get users into app first, collect preferences gradually", + "Customer support receiving many pricing questions - pro vs enterprise unclear", + "Action: Redesign onboarding flow by next sprint", + "Action: Update pricing page copy (assign to marketing team)", + "Launch date pushed back 2 weeks due to performance issues with real-time sync", + "Silver lining: More time to polish the product" + ], + "expectedEditedTranscript": "## Product Meeting Recap\n\nJust got out of the product meeting and there's a lot to unpack.\n\n### Onboarding Flow Issues\n\nJames mentioned that the new onboarding flow is causing confusion. Users are dropping off at the third step where we ask for their preferences. I think we're asking for too much information upfront. We should probably just get them into the app first and then gradually collect preferences.\n\n### Pricing Clarity\n\nMaria brought up that customer support is getting a lot of questions about the pricing. Apparently, people don't understand the difference between the pro and enterprise tiers. We need to make that clearer on the pricing page.\n\n### Action Items\n\n1. **Redesign the onboarding flow** - I need to complete this by next sprint\n2. **Update pricing page copy** - I'll ask the marketing team about this\n\n### Launch Update\n\nWe're pushing the launch date back by two weeks because engineering found some performance issues with the real-time sync. That's probably for the best honestly - gives us more time to polish things." + }, + { + "id": "personal-reflection", + "description": "Personal reflection on work-life balance", + "transcript": "Man I've been working way too much lately. It's like 9pm and I'm still thinking about work stuff. I need to set better boundaries. Maybe I should try that thing where you have a hard stop at 6pm no matter what. But then again there's so much to do and the team is counting on me. I don't know, it's hard to balance everything. I love what I'm working on but I also miss having time for hobbies. When was the last time I went climbing? Must be at least a month. You know what, I'm going to block out Wednesday evenings for climbing. Non-negotiable. And maybe Saturday mornings for reading. I used to read so much and now I barely finish a book a month. Work will always be there but I need to take care of myself too.", + "expectedBulletSummary": [ + "Working too much lately - still thinking about work at 9pm", + "Need to set better boundaries with work", + "Considering a hard stop at 6pm daily", + "Struggling to balance work demands with personal time", + "Haven't gone climbing in at least a month", + "Decision: Block Wednesday evenings for climbing (non-negotiable)", + "Decision: Reserve Saturday mornings for reading", + "Reminder: Work will always be there, self-care is important" + ], + "expectedEditedTranscript": "## Work-Life Balance Reflection\n\nI've been working way too much lately. It's 9pm and I'm still thinking about work stuff.\n\n### The Problem\n\nI need to set better boundaries. Maybe I should try that thing where you have a hard stop at 6pm no matter what. But then again, there's so much to do and the team is counting on me. It's hard to balance everything.\n\n### What I'm Missing\n\nI love what I'm working on, but I also miss having time for hobbies. When was the last time I went climbing? Must be at least a month. I used to read so much and now I barely finish a book a month.\n\n### Commitments\n\n1. **Wednesday evenings**: Blocked for climbing (non-negotiable)\n2. **Saturday mornings**: Reserved for reading\n\nWork will always be there, but I need to take care of myself too." + }, + { + "id": "very-short", + "description": "Just a few words", + "transcript": "Buy milk", + "expectedBulletSummary": [ + "Buy milk" + ], + "expectedEditedTranscript": "## Quick Note\n\nBuy milk." + } + ] +} diff --git a/server/test-prompts.ts b/server/test-prompts.ts new file mode 100755 index 0000000..8510488 --- /dev/null +++ b/server/test-prompts.ts @@ -0,0 +1,72 @@ +#!/usr/bin/env -S deno run --allow-read --allow-env --allow-net + +import { load } from "@std/dotenv"; +import { AnthropicClient } from "./lib/anthropic.ts"; + +// Load environment variables +await load({ export: true }); + +const testCases = [ + { + name: "Empty recording", + transcript: "", + }, + { + name: "Very short reminder", + transcript: "Buy milk", + }, + { + name: "Short reminder with context", + transcript: "Remember to email Sarah about the design mockups", + }, + { + name: "Rambling startup idea", + transcript: + "So I've been thinking about this idea for like the past week and it's been keeping me up at night. What if we created an app that helps people, you know, actually stick to their habits? I know there's a million habit trackers out there but hear me out. The problem with all of them is that they're too complicated or they make you feel guilty when you miss a day. So what if instead of tracking streaks, we focused on, um, momentum? Like, the app would understand that life happens and missing a day doesn't mean you failed. It would be more like a supportive friend rather than a strict teacher. Oh and another thing - it could use AI to suggest the best time to do your habit based on your calendar and past behavior. Like if you usually go for runs in the morning but you have an early meeting, it could suggest doing it at lunch instead. And maybe it could even connect with your friends so you could have accountability buddies but in a fun way, not a judgmental way. I don't know, maybe this is stupid but I really think there's something here.", + }, + { + name: "Technical brainstorm", + transcript: + "Okay so I'm trying to figure out the best way to architect this new feature. We need real-time updates but I'm not sure if we should use websockets or just poll every few seconds. Websockets would be better for performance but they're harder to implement and we'd need to handle reconnection logic and all that stuff. Actually wait, what about using Server-Sent Events? That might be a good middle ground. Oh but then we'd need to think about scalability. If we have thousands of users all connected at once, that could get expensive. Maybe we should start with polling and then upgrade to websockets later if we need to? But then we'd be building technical debt from the start. Ugh, architectural decisions are hard. Actually, you know what, let me think about this differently. What's the actual user requirement here? They need updates within, let's say, 5 seconds. So polling every 3 seconds would probably be fine for MVP. We can always optimize later.", + }, +]; + +async function testPrompts() { + const anthropicKey = Deno.env.get("ANTHROPIC_API_KEY"); + if (!anthropicKey) { + console.error("Error: ANTHROPIC_API_KEY not set"); + Deno.exit(1); + } + + const client = new AnthropicClient(anthropicKey); + + for (const testCase of testCases) { + console.log(`\n${"=".repeat(80)}`); + console.log(`Test Case: ${testCase.name}`); + console.log(`${"=".repeat(80)}`); + console.log(`\nTranscript: "${testCase.transcript}"\n`); + + try { + // Test bullet summary + console.log("Generating bullet summary..."); + const summaryResult = await client.summarizeTranscript( + testCase.transcript, + ); + console.log("\nBullet Summary:"); + summaryResult.bulletSummary.forEach((bullet) => { + console.log(` • ${bullet}`); + }); + + // Test edited transcript + console.log("\nGenerating edited transcript..."); + const editResult = await client.editTranscript(testCase.transcript); + console.log("\nEdited Transcript:"); + console.log(editResult.editedTranscript); + } catch (error) { + console.error(`Error: ${error.message}`); + } + } +} + +// Run the tests +await testPrompts(); diff --git a/server/tests/api-integration.test.ts b/server/tests/api-integration.test.ts new file mode 100644 index 0000000..a4423a1 --- /dev/null +++ b/server/tests/api-integration.test.ts @@ -0,0 +1,63 @@ +import { assertEquals, assertExists } from "@std/assert"; +import { app } from "../main.ts"; + +Deno.test("POST /api/new-recording - returns edited transcript", async () => { + // Create a simple test audio file + const audioContent = new Uint8Array([0, 1, 2, 3, 4]); // Dummy audio data + const audioFile = new File([audioContent], "test.m4a", { type: "audio/m4a" }); + + const formData = new FormData(); + formData.append("audio", audioFile); + + const req = new Request("http://localhost/api/new-recording", { + method: "POST", + body: formData, + }); + + const res = await app.fetch(req); + + // This will fail without real API keys, but we're checking the structure + if (res.status === 200) { + const data = await res.json(); + + // Check that all expected fields are present + assertExists(data.transcript, "Response should include transcript"); + assertExists( + data.editedTranscript, + "Response should include editedTranscript", + ); + assertExists(data.duration, "Response should include duration"); + assertExists(data.bulletSummary, "Response should include bulletSummary"); + assertExists(data.diagram, "Response should include diagram"); + + // Check types + assertEquals(typeof data.transcript, "string"); + assertEquals(typeof data.editedTranscript, "string"); + assertEquals(typeof data.duration, "number"); + assertEquals(Array.isArray(data.bulletSummary), true); + assertEquals(typeof data.diagram.title, "string"); + assertEquals(typeof data.diagram.description, "string"); + assertEquals(typeof data.diagram.content, "string"); + } +}); + +Deno.test("Anthropic client handles empty transcripts correctly", async () => { + const { AnthropicClient } = await import("../lib/anthropic.ts"); + + // This test doesn't require API key since we handle empty transcripts locally + const client = new AnthropicClient("dummy-key"); + + // Test empty transcript + const emptyResult = await client.summarizeTranscript(""); + assertEquals(emptyResult.bulletSummary, ["Ah, the recording is empty!"]); + + const emptyEditResult = await client.editTranscript(""); + assertEquals( + emptyEditResult.editedTranscript, + "## Empty Recording\n\nAh, the recording is empty!", + ); + + // Test very short transcript + const shortEditResult = await client.editTranscript("Buy milk"); + assertEquals(shortEditResult.editedTranscript, "## Quick Note\n\nBuy milk."); +}); diff --git a/server/types/api.ts b/server/types/api.ts index 9ce0aad..47e0968 100644 --- a/server/types/api.ts +++ b/server/types/api.ts @@ -1,7 +1,9 @@ export interface NewRecordingResponse { transcript: string; + editedTranscript: string; duration: number; bulletSummary: string[]; + thingsToThinkAbout: string[]; diagram: { title: string; description: string;