Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
56 changes: 56 additions & 0 deletions .github/MiniWhisper-wordmark.svg
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
68 changes: 68 additions & 0 deletions .github/icon.svg
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
8 changes: 7 additions & 1 deletion Package.swift
Original file line number Diff line number Diff line change
Expand Up @@ -17,14 +17,20 @@ let package = Package(
.executableTarget(
name: "MiniWhisper",
dependencies: [
"FluidAudio"
"FluidAudio",
"whisper"
],
path: "Sources/MiniWhisper",
exclude: ["Resources"],
swiftSettings: [
.enableExperimentalFeature("StrictConcurrency")
]
),
.binaryTarget(
name: "whisper",
url: "https://github.com/andyhtran/MiniWhisper/releases/download/whisper-xcframework-1.0/whisper.xcframework.zip",
checksum: "866b43e4a3f31d1f898c7300d36e786841723e7be5a0fcdaa5879daea2f4389d"
),
.testTarget(
name: "MiniWhisperTests",
dependencies: ["MiniWhisper"],
Expand Down
5 changes: 3 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
# MiniWhisper

A minimal macOS menu bar app for voice-to-text using the [Parakeet](https://github.com/FluidInference/FluidAudio) model. Press a hotkey, speak, and the transcription is automatically pasted into the active app.
A minimal macOS menu bar app for voice-to-text with fast English transcription via [Parakeet](https://github.com/FluidInference/FluidAudio) and multilingual transcription via [whisper.cpp](https://github.com/ggml-org/whisper.cpp). Press a hotkey, speak, and the transcription is automatically pasted into the active app.

![macOS 14.0+](https://img.shields.io/badge/macOS-14.0%2B-blue)
![Swift 6.0+](https://img.shields.io/badge/Swift-6.0%2B-orange)
Expand Down Expand Up @@ -51,7 +51,8 @@ just dev
- **Text replacements** — auto-correct words or phrases after transcription
- **Recording history** — browse and copy recent transcriptions
- **Usage stats** — track recordings, speaking time, word count, and average WPM
- **On-device** — all processing happens locally via the Parakeet model, nothing leaves your Mac
- **Multiple models** — switch between fast English-only (Parakeet) and multilingual auto-detect (whisper.cpp)
- **On-device** — all processing happens locally on your Mac, nothing leaves your device

## Build commands

Expand Down
16 changes: 16 additions & 0 deletions Scripts/build-app.sh
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,22 @@ mkdir -p "$APP_BUNDLE/Contents/Resources"

cp "$BUILD_DIR/$APP_NAME" "$APP_BUNDLE/Contents/MacOS/"

# Embed whisper.framework with proper macOS versioned structure
FRAMEWORKS_DIR="$APP_BUNDLE/Contents/Frameworks"
if [ -d "$BUILD_DIR/whisper.framework" ]; then
mkdir -p "$FRAMEWORKS_DIR/whisper.framework/Versions/A"
cp -R "$BUILD_DIR/whisper.framework/Versions/A/Headers" "$FRAMEWORKS_DIR/whisper.framework/Versions/A/"
cp -R "$BUILD_DIR/whisper.framework/Versions/A/Modules" "$FRAMEWORKS_DIR/whisper.framework/Versions/A/"
cp -R "$BUILD_DIR/whisper.framework/Versions/A/Resources" "$FRAMEWORKS_DIR/whisper.framework/Versions/A/"
cp "$BUILD_DIR/whisper.framework/Versions/A/whisper" "$FRAMEWORKS_DIR/whisper.framework/Versions/A/"
ln -sf A "$FRAMEWORKS_DIR/whisper.framework/Versions/Current"
ln -sf Versions/Current/Headers "$FRAMEWORKS_DIR/whisper.framework/Headers"
ln -sf Versions/Current/Modules "$FRAMEWORKS_DIR/whisper.framework/Modules"
ln -sf Versions/Current/Resources "$FRAMEWORKS_DIR/whisper.framework/Resources"
ln -sf Versions/Current/whisper "$FRAMEWORKS_DIR/whisper.framework/whisper"
install_name_tool -add_rpath "@executable_path/../Frameworks" "$APP_BUNDLE/Contents/MacOS/$APP_NAME" 2>/dev/null || true
fi

cat > "$APP_BUNDLE/Contents/Info.plist" << PLIST
<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
Expand Down
5 changes: 5 additions & 0 deletions Scripts/sign-and-notarize.sh
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,11 @@ SIGNING_ID="${CODESIGN_IDENTITY:?Set CODESIGN_IDENTITY to your Developer ID Appl
echo "==> Building release..."
bash "$ROOT/Scripts/build-app.sh" release

echo "==> Signing embedded frameworks..."
for fw in "$APP_BUNDLE"/Contents/Frameworks/*.framework; do
[ -d "$fw" ] && codesign --force --timestamp --options runtime --sign "$SIGNING_ID" "$fw"
done

echo "==> Signing with: $SIGNING_ID"
codesign --force --timestamp --options runtime \
--sign "$SIGNING_ID" \
Expand Down
71 changes: 63 additions & 8 deletions Sources/MiniWhisper/AppState.swift
Original file line number Diff line number Diff line change
Expand Up @@ -8,13 +8,15 @@ import UserNotifications
final class AppState: Sendable {
let recorder = AudioRecorder()
let parakeet = ParakeetProvider()
let whisper = WhisperProvider()
let recordingStore = RecordingStore()
let analyticsStore = AnalyticsStore()
let permissions = PermissionsManager()
let pasteboard = PasteboardService()
let toast = ToastWindowController.shared

var replacementSettings = ReplacementSettings.load()
var transcriptionMode: TranscriptionMode = TranscriptionModeStorage.load()

let maxRecordingDuration: TimeInterval = 600.0 // 10 minutes
var warningDuration: TimeInterval { maxRecordingDuration * 0.8 } // 8 minutes
Expand All @@ -23,18 +25,65 @@ final class AppState: Sendable {
private var durationCheckTimer: Timer?
private var currentRecordingId: String?

// Callbacks for HotkeyManager to track recording state
var onRecordingStarted: (() -> Void)?
var onRecordingEnded: (() -> Void)?

var isModelLoaded: Bool { parakeet.isInitialized }
var isModelLoaded: Bool {
switch transcriptionMode {
case .english: return parakeet.isInitialized
case .multilingual: return whisper.isInitialized
}
}

var isModelDownloading: Bool { whisper.isDownloading }
var modelDownloadProgress: Double { whisper.downloadProgress }

// MARK: - Initialization

func preloadModel() {
Task {
do {
try await parakeet.initialize()
switch transcriptionMode {
case .english:
try await parakeet.initialize()
case .multilingual:
guard whisper.modelExists else { return }
try await whisper.initialize()
}
} catch {
toast.showError(title: "Model Load Failed", message: error.localizedDescription)
}
}
}

func switchTranscriptionMode(to mode: TranscriptionMode) {
guard mode != transcriptionMode else { return }

if recorder.state.isRecording {
toast.showError(title: "Cannot Switch", message: "Stop recording before switching models.")
return
}

if recorder.state == .processing {
return
}

switch transcriptionMode {
case .english: parakeet.unload()
case .multilingual: whisper.unload()
}

transcriptionMode = mode
TranscriptionModeStorage.save(mode)

Task {
do {
switch mode {
case .english:
try await parakeet.initialize()
case .multilingual:
try await whisper.initialize()
}
} catch {
toast.showError(title: "Model Load Failed", message: error.localizedDescription)
}
Expand All @@ -53,7 +102,7 @@ final class AppState: Sendable {

func startRecording() {
guard recorder.state.isIdle else { return }
guard parakeet.isInitialized else {
guard isModelLoaded else {
toast.showError(title: "Model Not Ready", message: "Please wait for the model to finish loading.")
return
}
Expand Down Expand Up @@ -113,7 +162,13 @@ final class AppState: Sendable {

private func transcribe(audioURL: URL, recordingId: String, duration: TimeInterval, sampleRate: Double) async {
do {
let result = try await parakeet.transcribe(audioURL: audioURL)
let result: TranscriptionResult
switch transcriptionMode {
case .english:
result = try await parakeet.transcribe(audioURL: audioURL)
case .multilingual:
result = try await whisper.transcribe(audioURL: audioURL)
}

// Guard against stale callback: if the user rapid-tapped and started a new
// recording while transcription was in-flight, the state has moved on.
Expand Down Expand Up @@ -156,8 +211,8 @@ final class AppState: Sendable {
transcriptionDuration: result.duration
),
configuration: RecordingConfiguration(
voiceModel: "Parakeet",
language: "en"
voiceModel: result.model,
language: result.language
)
)

Expand Down Expand Up @@ -186,7 +241,7 @@ final class AppState: Sendable {
),
transcription: nil,
configuration: RecordingConfiguration(
voiceModel: "Parakeet",
voiceModel: transcriptionMode == .english ? "Parakeet" : "Whisper",
language: "en"
)
)
Expand Down
10 changes: 5 additions & 5 deletions Sources/MiniWhisper/Models/CustomShortcut.swift
Original file line number Diff line number Diff line change
Expand Up @@ -54,10 +54,10 @@ struct CustomShortcut: Codable, Equatable, Hashable {

var compactDisplayString: String {
var str = ""
if control { str += "" }
if option { str += "" }
if shift { str += "" }
if command { str += "" }
if control { str += "Ctrl+" }
if option { str += "Option+" }
if shift { str += "Shift+" }
if command { str += "Cmd+" }
if fn { str += "Fn+" }
str += keyCodeDisplayName
return str
Expand Down Expand Up @@ -215,7 +215,7 @@ final class CustomShortcutStorage {

static func defaultShortcuts() -> [CustomShortcutName: CustomShortcut] {
[
.toggleRecording: CustomShortcut(keyCode: UInt16(kVK_ANSI_Grave), option: true), // Option + `
.toggleRecording: CustomShortcut(keyCode: UInt16(kVK_ANSI_R), option: true, shift: true),
.cancelRecording: CustomShortcut(keyCode: UInt16(kVK_Escape)), // Escape
]
}
Expand Down
22 changes: 22 additions & 0 deletions Sources/MiniWhisper/Models/TranscriptionMode.swift
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
import Foundation

enum TranscriptionMode: String, Codable, Sendable {
case english
case multilingual
}

struct TranscriptionModeStorage: Sendable {
private static let storageKey = "TranscriptionMode"

static func load() -> TranscriptionMode {
guard let raw = UserDefaults.standard.string(forKey: storageKey),
let mode = TranscriptionMode(rawValue: raw) else {
return .english
}
return mode
}

static func save(_ mode: TranscriptionMode) {
UserDefaults.standard.set(mode.rawValue, forKey: storageKey)
}
}
5 changes: 5 additions & 0 deletions Sources/MiniWhisper/Services/ParakeetProvider.swift
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,11 @@ final class ParakeetProvider {

var isInitialized: Bool { asrManager != nil }

func unload() {
asrManager = nil
initializationTask = nil
}

func initialize() async throws {
if asrManager != nil { return }

Expand Down
Loading
Loading