diff --git a/Cargo.lock b/Cargo.lock index b901bb1..3254efb 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2073,6 +2073,7 @@ dependencies = [ "serde_json", "serde_yaml", "signal-hook", + "similar", "sys-locale", "toml", "tracing", @@ -4082,6 +4083,12 @@ version = "0.3.7" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d66dc143e6b11c1eddc06d5c423cfc97062865baf299914ab64caa38182078fe" +[[package]] +name = "similar" +version = "2.7.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bbbb5d9659141646ae647b42fe094daf6c6192d1620870b449d9557f748b2daa" + [[package]] name = "slab" version = "0.4.11" diff --git a/Cargo.toml b/Cargo.toml index f23f928..c8d4853 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -42,6 +42,7 @@ serde_json = "1.0" toml = "0.8" serde_yaml = "0.9" image = { version = "0.24", default-features = false, features = ["png", "jpeg"] } +similar = "2.6" # GUI egui = "0.32" diff --git a/docs/llm-postprocess.html b/docs/llm-postprocess.html new file mode 100644 index 0000000..42b7f6e --- /dev/null +++ b/docs/llm-postprocess.html @@ -0,0 +1,190 @@ + + + + + + HootVoice — LLM Post-processing Guide + + + + + +
+
日本語
+ +
+

LLM Post-processing Guide

+

Configure Ollama or LM Studio so HootVoice can clean up, summarize, and rephrase Whisper transcripts using a local LLM.

+
+
+
+ + +
+

Overview

+

When LLM post-processing is enabled, HootVoice sends each Whisper transcript to a local LLM endpoint so the text can be polished, made polite, or summarised automatically. The feature expects an OpenAI-compatible API such as Ollama or LM Studio.

+

The toggle is disabled by default. Open Settings → LLM and enable Enable LLM post-processing, then configure the API base URL and model name to match your local server. In that tab we recommend starting with gemma-3-12b-it quantised to Q4_K_M for proofreading-focused workflows.

+
+ +
+

How it Works

+
    +
  1. HootVoice transcribes your recording locally with Whisper.
  2. +
  3. Once transcription finishes, it calls /v1/chat/completions on the configured LLM endpoint.
  4. +
  5. The LLM response replaces the raw transcript in the log and clipboard.
  6. +
  7. If auto-paste is enabled, the processed text is inserted into the frontmost app.
  8. +
+

If the API fails or times out, HootVoice falls back to the original Whisper text. Logs capture HTTP status codes and any error payloads for quick diagnostics.

+
+ +
+

Setup Checklist

+ +
+ +
+

Using Ollama

+

Ollama exposes an OpenAI-compatible REST API at http://localhost:11434/v1, which matches the default value in HootVoice.

+

macOS

+
    +
  1. Install via brew install ollama (requires Homebrew).
  2. +
  3. Run ollama run llama3.1:8b to download and cache the model.
  4. +
  5. Keep the background service running with ollama serve or the Ollama menu bar app.
  6. +
+

Windows

+
    +
  1. Download the installer from ollama.com and complete setup.
  2. +
  3. Open PowerShell and run ollama run llama3.1:8b to fetch the model.
  4. +
  5. The service stays active in the background; manage it from the system tray.
  6. +
+

Linux

+
    +
  1. Run curl https://ollama.ai/install.sh | sh.
  2. +
  3. Enable the user service with systemctl --user enable --now ollama.
  4. +
  5. Download a model via ollama run llama3.1:8b and verify the API responds.
  6. +
+

Test connectivity with:

+
curl http://localhost:11434/v1/models
+
+ +
+

Using LM Studio

+

LM Studio offers a GUI for managing models and ships with an OpenAI-compatible server. The default port is 1234, so set HootVoice’s base URL to http://localhost:1234/v1.

+

macOS

+
    +
  1. Download the DMG from the LM Studio website and install it.
  2. +
  3. Open “Download Models” and grab the models you want.
  4. +
  5. Click “Start Server” and enable the “OpenAI Compatible Server” option.
  6. +
+

Windows

+
    +
  1. Run the Windows installer with the default options.
  2. +
  3. Download models from within the app, then switch to the “Server” tab.
  4. +
  5. Press “Start Server” and enable auto-start if you need it on boot.
  6. +
+

Linux

+
    +
  1. Launch the AppImage or install the Debian package.
  2. +
  3. Download a model, then toggle the server switch in the top-right corner.
  4. +
  5. Allow inbound traffic on port 1234 if your firewall prompts.
  6. +
+

Confirm the server is reachable:

+
curl http://localhost:1234/v1/models
+
+ +
+

Recommended Models

+ + + + + + + + + + + + + + + + + + + + + + + + + +
Use caseModelNotes
Japanese polishing & polite tonellama3.1:8b (Ollama), Meta-Llama-3-8B-Instruct (LM Studio)Multilingual, runs on ~8 GB RAM/VRAM.
English summariesqwen2.5:7b-instruct, Phi-3.5-mini-instructFast responses with concise outputs; ideal for meeting notes.
Maximum accuracyllama3.1:70b or other large instruction-tuned modelsRequires high-end GPU/VRAM; tune OLLAMA_NUM_PARALLEL as needed.
+

Make sure the model identifier matches your runtime. Ollama lists models via ollama list, while LM Studio shows the identifier in the “Local Models” panel.

+
+ +
+

Local Resource Requirements

+

Running Gemma-3-12B locally in 4-bit/QAT mode for proofreading tasks generally requires the following resources:

+ +
+ +
+

Troubleshooting

+ +

If issues persist, copy the log entry with the failing request/response and share it with the HootVoice team.

+
+
+ + + + diff --git a/docs/llm-postprocess.ja.html b/docs/llm-postprocess.ja.html new file mode 100644 index 0000000..b825795 --- /dev/null +++ b/docs/llm-postprocess.ja.html @@ -0,0 +1,161 @@ + + + + + + HootVoice — LLM 後処理セットアップガイド + + + + + +
+
English
+ +
+

LLM 後処理セットアップガイド

+

Whisper の文字起こし結果を LLM で整形・要約する仕組みと、Ollama / LM Studio を使った導入手順をまとめました。

+
+
+
+ + +
+

概要

+

LLM 後処理機能を有効にすると、Whisper が出力したテキストをローカルの LLM API に送り、句読点整形・敬体化・要約などを自動で行えます。API は OpenAI 互換エンドポイントを想定しており、Ollama や LM Studio と組み合わせて利用します。

+

初期設定では無効になっているため、アプリの「設定 → LLM」で「LLM による後処理を有効化」をオンにしたうえで、API ベース URL やモデル名を指定してください。

+
+ +
+

仕組み

+
    +
  1. HootVoice が音声を Whisper で文字起こしします。
  2. +
  3. 文字起こしが完了すると、指定した LLM API に /v1/chat/completions リクエストを送信します。
  4. +
  5. LLM が整形したテキストが戻り、ログに結果が記録されます。
  6. +
  7. 自動ペーストが有効な場合は、LLM の結果がそのまま前面アプリに貼り付けられます。
  8. +
+

API が応答しない場合やエラーが発生した場合は、従来どおり Whisper の生テキストを使用します。ログには HTTP ステータスやエラーメッセージが記録されるため、問題の切り分けに活用できます。

+
+ +
+

導入チェックリスト

+ +
+ +
+

Ollama を使う

+

Ollama はシンプルな CLI/サービスで、`http://localhost:11434/v1` に OpenAI 互換の REST API を提供します。HootVoice の既定値とも一致します。

+

macOS

+
    +
  1. brew install ollama を実行(Homebrew が必要)。
  2. +
  3. ollama run llama3.1:8b などで初回モデルをダウンロードして動作確認。
  4. +
  5. 常駐させる場合は ollama serve を起動するか、Ollama.app をログイン項目に追加。
  6. +
+

Windows

+
    +
  1. Ollama for Windows をダウンロードしてインストール。
  2. +
  3. インストール後に PowerShell で ollama run llama3.1:8b を実行しモデルを取得。
  4. +
  5. サービスは自動的にバックグラウンドで起動します。必要に応じてタスクトレイから制御してください。
  6. +
+

Linux

+
    +
  1. curl https://ollama.ai/install.sh | sh を実行。
  2. +
  3. systemctl --user enable --now ollama でユーザーサービスとして常駐させます。
  4. +
  5. ollama run llama3.1:8b でモデルをダウンロードし、API が応答するか確認。
  6. +
+

接続テストには以下のコマンドが利用できます。

+
curl http://localhost:11434/v1/models
+
+ +
+

LM Studio を使う

+

LM Studio は GUI ベースでモデル管理が行いやすく、OpenAI 互換サーバーも同梱されています。既定ポートは 1234 なので、HootVoice の URL を http://localhost:1234/v1 に変更してください。

+

macOS

+
    +
  1. 公式サイト から DMG をダウンロードしてインストール。
  2. +
  3. 起動後、「Download Models」から利用したいモデルを追加。
  4. +
  5. 画面右上の「Start Server」を押し、「OpenAI Compatible Server」を有効化。
  6. +
+

Windows

+
    +
  1. インストーラーをダウンロードし実行。既定設定で問題ありません。
  2. +
  3. アプリ内でモデルをダウンロード後、「Server」タブからサーバーを起動。
  4. +
  5. 必要に応じてスタートアップ登録し、自動起動を有効にします。
  6. +
+

Linux

+
    +
  1. AppImage または Debian パッケージを入手して実行。
  2. +
  3. モデルをダウンロードしたら、右上のサーバースイッチをオンにします。
  4. +
  5. 初回はファイアウォールでポート 1234 へのアクセス許可が必要な場合があります。
  6. +
+

API が起動しているかどうかは以下で確認できます。

+
curl http://localhost:1234/v1/models
+
+ +
+

推奨モデル

+ + + + + + + + + + + + + + + + + + + + + + + + + +
用途モデル備考
日本語の整形・敬体化llama3.1:8b(Ollama)、Meta-Llama-3-8B-Instruct(LM Studio)軽量で多言語対応。メモリ 8GB 前後で稼働。
英語中心の要約qwen2.5:7b-instruct / Phi-3.5-mini-instruct高速レスポンス。要約プロンプトと相性良好。
精度重視llama3.1:70b などの大型モデル高性能 GPU/VRAM が必要。Ollama では OLLAMA_NUM_PARALLEL で調整。
+

モデル名は API に合わせて指定する必要があります。Ollama の場合は ollama list で確認でき、LM Studio では「Local Models」一覧の識別子を利用します。

+
+ +
+

トラブルシューティング

+ +

それでも解決しない場合は、アプリのログウィンドウから該当するリクエスト/レスポンスをコピーし、開発チームまで共有してください。

+
+
+ + + + diff --git a/docs/manual.html b/docs/manual.html index 863bf3b..fec1ee6 100644 --- a/docs/manual.html +++ b/docs/manual.html @@ -26,6 +26,7 @@

Contents

  • Quick Start
  • Floating Toolbar
  • Settings
  • +
  • LLM Post-processing
  • Recording Flow
  • Tips
  • Troubleshooting
  • @@ -109,6 +110,16 @@

    Settings

    Changes are saved automatically. Apply model/dictionary changes via the Apply button.

    +
    +

    LLM Post-processing

    +

    Open the LLM tab in Settings and enable Enable LLM post-processing to send Whisper transcripts to a local LLM that can clean up punctuation, switch tone, or generate summaries.

    + +

    See the LLM post-processing setup guide for full platform instructions and recommended models.

    +

    Recording Flow

      diff --git a/docs/manual.ja.html b/docs/manual.ja.html index 1dec0ba..023d273 100644 --- a/docs/manual.ja.html +++ b/docs/manual.ja.html @@ -26,6 +26,7 @@

      目次

    1. クイックスタート
    2. フローティングツールバー
    3. 設定
    4. +
    5. LLM 後処理
    6. 録音の流れ
    7. ヒント
    8. トラブルシューティング
    9. @@ -109,6 +110,16 @@

      設定

      変更は自動保存されます。モデル/辞書は「適用」ボタンでコアに反映します。

    +
    +

    LLM 後処理

    +

    設定ウィンドウの「LLM」タブで「LLM による後処理を有効化」をオンにすると、Whisper の文字起こし結果をローカルの LLM に送り、整形・敬体化・要約などを自動で適用できます。

    + +

    詳しいセットアップ手順や推奨モデルは LLM 後処理セットアップガイド を確認してください。

    +

    録音の流れ

      diff --git a/i18n/en/app.ftl b/i18n/en/app.ftl index cd68e67..384ab32 100644 --- a/i18n/en/app.ftl +++ b/i18n/en/app.ftl @@ -2,6 +2,8 @@ tab-general = General tab-devices = Devices tab-speech-model = Speech Model tab-dictionary = Dictionary +tab-llm = LLM +tab-history = History tab-logs = Logs section-devices = Input & Output Devices @@ -77,6 +79,101 @@ option-auto-os = Auto (match OS) option-english = English option-japanese = Japanese label-auto-paste = Auto Paste (copy only when OFF) +heading-llm = LLM Post-processing +llm-description = Sends Whisper transcripts to a local LLM-compatible API so you can polish the text, apply custom transformations, and reuse the processed result. +label-llm-enable = Enable LLM post-processing +link-llm-doc = Read the LLM post-processing spec… +label-llm-api-base = API Base URL: +placeholder-llm-api-base = e.g. http://localhost:11434/v1 +note-llm-api-base-local = Ollama default: http://localhost:11434/v1 / LM Studio: http://localhost:1234/v1 +label-llm-model = Model: +placeholder-llm-model = e.g. llama3.1:8b +llm-model-dropdown-placeholder = Pick from fetched models +btn-llm-fetch-models = Fetch models +tooltip-llm-fetch-models = Query the API for available chat-completion models +msg-llm-fetch-empty = No model list yet. Click "Fetch models" if you need suggestions. +label-llm-mode = Output mode: +llm-mode-format = Formatting (punctuation + polite tone) +llm-mode-summary = Summary (short bullet list) +llm-mode-custom = Custom prompt +llm-mode-custom-add = Add custom mode +llm-mode-custom-draft = Custom mode (draft) +llm-mode-custom-empty-hint = Click “Add custom mode” to create one +label-llm-custom-prompt = Custom prompt (injects `{{transcript}}`): +placeholder-llm-custom-prompt = Example: Please polish the transcript in polite Japanese. +label-llm-custom-name = Mode name: +btn-llm-save-custom = Save as new mode +tooltip-llm-save-custom = Add this prompt to the output mode list +btn-llm-delete-custom = Delete mode +tooltip-llm-delete-custom = Remove the selected custom mode +btn-llm-duplicate-custom = Duplicate as new mode +tooltip-llm-duplicate-custom = Save the current prompt as an additional custom mode +msg-llm-custom-invalid = Enter both a name and a prompt before saving. +msg-llm-custom-delete-failed = Could not delete the custom mode. +msg-llm-custom-update-failed = Could not update the custom mode. +label-llm-profile = Prompt profile: +btn-llm-profile-duplicate = Duplicate preset +tooltip-llm-profile-duplicate = Copy the preset into an editable profile +btn-llm-profile-edit = Edit profile +tooltip-llm-profile-edit = Open the prompt editor for this profile +btn-llm-profile-delete = Delete profile +tooltip-llm-profile-delete = Remove the selected custom profile +title-llm-profile-editor = Edit Prompt Profile +label-llm-profile-locale = Locale: +btn-llm-profile-load-locale = Load/Create locale +tooltip-llm-profile-load-locale = Load the locale if it exists or create a new entry +label-llm-profile-system = System prompt (optional): +label-llm-profile-user = User prompt: +btn-llm-profile-remove-locale = Delete locale +tooltip-llm-profile-remove-locale = Remove this locale from the profile +btn-llm-profile-cancel = Cancel +btn-llm-profile-save = Save +msg-llm-profile-error-no-locale = Please enter a locale code. +msg-llm-profile-error-user-empty = User prompt cannot be empty. +msg-llm-profile-error-profile-missing = Profile not found. +msg-llm-profile-error-not-editable = This profile cannot be edited. +msg-llm-profile-error-remove-locale = Failed to remove locale from profile. +label-llm-max-input = Max input characters: +label-llm-timeout = Timeout: +note-llm-timeout-unit = (seconds) +label-llm-apply-autopaste = Use LLM output for auto paste +btn-llm-test-connection = Test connection +msg-llm-test-running = Testing… (may take a few seconds) +label-llm-test-enable = Show LLM Post-process Test +label-llm-test-no-history = No transcription history available. Record something first. +label-llm-test-select-transcript = Select transcript to test +note-llm-test-not-saved = Test results are not stored in history +label-llm-test-transcript = Transcript +btn-llm-test-run = Run test +msg-llm-prompt-test-running = Running LLM post-processing test… +label-llm-test-duration = Duration: +label-llm-test-truncated = Input was truncated to the maximum length +label-llm-test-output = LLM Output +btn-llm-test-copy-output = Copy +msg-llm-history-load-failed = Failed to read LLM history +heading-llm-history = History +label-llm-history-empty = No history yet +label-llm-history-details = Details +label-llm-history-select-entry = Select an entry from the list above +label-llm-history-timestamp = Date: +label-llm-history-model = LLM Model: +label-llm-history-mode = LLM Mode: +label-llm-history-base-url = LLM API Base: +label-llm-history-language = Language: +label-llm-history-language-auto = Auto (Whisper setting) +label-llm-history-latency = LLM Processing Time: +label-llm-history-truncated = Input was truncated to the maximum length +label-llm-history-transcript = Transcript +label-llm-history-output = LLM Output +btn-llm-history-copy-transcript = Copy Transcript +btn-llm-history-copy-output = Copy LLM Output +tooltip-llm-history-copy-transcript = Copy the transcript to the clipboard +tooltip-llm-history-copy-output = Copy the LLM output to the clipboard +tooltip-llm-history-select-row = Show details +label-llm-history-duration-column = Duration +label-llm-history-custom-prompts = Custom prompts used +label-llm-history-custom-system = System prompt +label-llm-history-custom-user = User prompt label-dev-tools = Developer Tools: btn-launch-setup-wizard = Launch Initial Setup Wizard btn-reset-defaults = Reset All Settings to Default @@ -222,6 +319,7 @@ status-idle = Idle status-recording = Recording status-processing = Processing status-busy = Busy +status-post-processing = LLM post-processing btn-toggle-recording = Start/Stop Recording btn-show-floating = Show Floating title-debug-log = Debug Log @@ -263,3 +361,5 @@ model-note-base = Lightweight and fast. Sufficient for short notes. model-note-small = Balanced. Safe choice for meetings and general transcription. model-note-medium = Accuracy-oriented. Good for long texts and technical terms. model-note-large = Highest accuracy. Recommended when speed/memory allow. +label-llm-system = System prompt (optional): +label-llm-user = User prompt: diff --git a/i18n/ja/app.ftl b/i18n/ja/app.ftl index 304e332..6e95305 100644 --- a/i18n/ja/app.ftl +++ b/i18n/ja/app.ftl @@ -2,6 +2,8 @@ tab-general = 一般 tab-devices = デバイス tab-speech-model = 音声モデル tab-dictionary = 辞書 +tab-llm = LLM後処理 +tab-history = 履歴 tab-logs = ログ section-devices = 入出力デバイス @@ -77,6 +79,101 @@ option-auto-os = 自動(OSに合わせる) option-english = English option-japanese = 日本語 label-auto-paste = 自動ペースト(OFFのときはコピーのみ) +heading-llm = LLM後処理 +llm-description = Whisper の書き起こし結果をローカル LLM API に送信し、文章を自然に整えたり特定の変換を適用した結果を活用できます。 +label-llm-enable = LLM による後処理を有効化 +link-llm-doc = LLM後処理のセットアップ方法を確認... +label-llm-api-base = API Base URL: +placeholder-llm-api-base = 例: http://localhost:11434/v1 +note-llm-api-base-local = Ollama 既定: http://localhost:11434/v1 / LM Studio: http://localhost:1234/v1 +label-llm-model = モデル名: +placeholder-llm-model = 例: llama3.1:8b +llm-model-dropdown-placeholder = モデル候補を選択 +btn-llm-fetch-models = モデル一覧取得 +tooltip-llm-fetch-models = API からチャット対応モデル一覧を読み込みます +msg-llm-fetch-empty = モデル候補は未取得です。必要な場合は「モデル一覧取得」を押してください。 +label-llm-mode = 出力モード: +llm-mode-format = 整形 (句読点・敬体化) +llm-mode-summary = 要約 (短い箇条書き) +llm-mode-custom = カスタムプロンプト +llm-mode-custom-add = カスタムモードを追加 +llm-mode-custom-draft = カスタムモード(編集中) +llm-mode-custom-empty-hint = 「カスタムモードを追加」で新しいモードを作成できます +label-llm-custom-prompt = カスタムプロンプト (`{{transcript}}` を入力文に展開): +placeholder-llm-custom-prompt = 例: 以下の議事録を敬体の丁寧な文章に整形してください。 +label-llm-custom-name = モード名: +btn-llm-save-custom = 新しいモードとして保存 +tooltip-llm-save-custom = このプロンプトを出力モードとして追加します +btn-llm-delete-custom = モードを削除 +tooltip-llm-delete-custom = 選択中のカスタムモードを削除します +btn-llm-duplicate-custom = 複製して新規作成 +tooltip-llm-duplicate-custom = 現在の内容を別のカスタムモードとして保存します +msg-llm-custom-invalid = モード名とプロンプトを入力してください。 +msg-llm-custom-delete-failed = カスタムモードを削除できませんでした。 +msg-llm-custom-update-failed = カスタムモードを更新できませんでした。 +label-llm-profile = プロンプトプロファイル: +btn-llm-profile-duplicate = プリセットを複製 +tooltip-llm-profile-duplicate = プリセットを編集可能なプロファイルとして複製します +btn-llm-profile-edit = プロファイルを編集 +tooltip-llm-profile-edit = このプロファイルのプロンプト編集ダイアログを開きます +btn-llm-profile-delete = プロファイルを削除 +tooltip-llm-profile-delete = 選択したカスタムプロファイルを削除します +title-llm-profile-editor = プロンプトプロファイルの編集 +label-llm-profile-locale = ロケール: +btn-llm-profile-load-locale = ロケールを読み込み/作成 +tooltip-llm-profile-load-locale = 既存のロケールを読み込むか新しく作成します +label-llm-profile-system = システムプロンプト(任意): +label-llm-profile-user = ユーザープロンプト: +btn-llm-profile-remove-locale = ロケールを削除 +tooltip-llm-profile-remove-locale = このプロファイルからロケールを削除します +btn-llm-profile-cancel = キャンセル +btn-llm-profile-save = 保存 +msg-llm-profile-error-no-locale = ロケールコードを入力してください。 +msg-llm-profile-error-user-empty = ユーザープロンプトを入力してください。 +msg-llm-profile-error-profile-missing = プロファイルが見つかりません。 +msg-llm-profile-error-not-editable = このプロファイルは編集できません。 +msg-llm-profile-error-remove-locale = ロケールを削除できませんでした。 +label-llm-max-input = 最大入力文字数: +label-llm-timeout = タイムアウト: +note-llm-timeout-unit = (秒) +label-llm-apply-autopaste = LLM 出力を自動ペーストに使用 +btn-llm-test-connection = 接続テスト +msg-llm-test-running = テスト中…(数秒かかる場合があります) +label-llm-test-enable = LLM後処理テストを表示 +label-llm-test-no-history = 書き起こし履歴がありません。まず録音を行ってください。 +label-llm-test-select-transcript = テストする書き起こしを選択 +note-llm-test-not-saved = ※ テスト結果は履歴に保存されません +label-llm-test-transcript = 書き起こしテキスト +btn-llm-test-run = テスト実行 +msg-llm-prompt-test-running = LLM後処理をテストしています… +label-llm-test-duration = 処理時間: +label-llm-test-truncated = 入力は最大文字数で切り詰められました +label-llm-test-output = LLM出力 +btn-llm-test-copy-output = コピー +msg-llm-history-load-failed = 履歴の読み込みに失敗しました +heading-llm-history = 履歴 +label-llm-history-empty = まだ履歴はありません +label-llm-history-details = 詳細 +label-llm-history-select-entry = 上の一覧から履歴を選択してください +label-llm-history-timestamp = 日付: +label-llm-history-model = LLMモデル: +label-llm-history-mode = LLMモード: +label-llm-history-base-url = LLM API Base: +label-llm-history-language = 言語設定: +label-llm-history-language-auto = 自動 (Whisper設定) +label-llm-history-latency = LLM後処理時間: +label-llm-history-truncated = 入力は最大文字数で切り詰められました +label-llm-history-transcript = 書き起こしテキスト +label-llm-history-output = LLM出力 +btn-llm-history-copy-transcript = 書き起こしをコピー +btn-llm-history-copy-output = LLM出力をコピー +tooltip-llm-history-copy-transcript = 書き起こしテキストをクリップボードにコピーします +tooltip-llm-history-copy-output = LLM出力をクリップボードにコピーします +tooltip-llm-history-select-row = 詳細を表示 +label-llm-history-duration-column = 処理時間 +label-llm-history-custom-prompts = 使用したカスタムプロンプト +label-llm-history-custom-system = システムプロンプト +label-llm-history-custom-user = ユーザープロンプト label-dev-tools = 開発ツール: btn-launch-setup-wizard = 初回セットアップウィザードを起動 btn-reset-defaults = 全設定をデフォルトに戻す @@ -222,6 +319,7 @@ status-idle = 待機中 status-recording = 録音中 status-processing = 処理中 status-busy = ビジー +status-post-processing = LLM 後処理中 btn-toggle-recording = 録音開始/停止 btn-show-floating = フローティング表示 title-debug-log = デバッグログ @@ -263,3 +361,5 @@ model-note-base = 軽量で速い。短文のメモなら十分 model-note-small = バランス良好。会議や一般的な書き起こしに無難 model-note-medium = 精度重視。長文や専門用語に強い model-note-large = 最高精度。速度/メモリに余裕がある場合に推奨 +label-llm-system = システムプロンプト(任意): +label-llm-user = ユーザープロンプト: diff --git a/src/core.rs b/src/core.rs index 3079b34..fe08e9a 100644 --- a/src/core.rs +++ b/src/core.rs @@ -11,9 +11,11 @@ use whisper_rs::{WhisperContext, WhisperContextParameters}; mod audio_io; mod output; +mod postprocess; mod transcriber; use crate::audio::VadStrategy; use crate::dictionary::DictionaryEntry; +use crate::llm::{LlmPostProcessSettings, LlmPostProcessor}; use crate::transcription::ensure_model; use crate::transcription::WhisperOptimizationParams; use crate::utils::sound; @@ -26,6 +28,7 @@ pub enum SimpleRecState { Idle, Recording, Processing, + PostProcessing, Busy, } @@ -41,6 +44,7 @@ pub struct WhisperCore { processing_thread: Arc>>>, current_model_path: Arc>, preferred_output_device: Arc>>, + llm_settings: Arc>, #[cfg(target_os = "macos")] front_app_before_paste: Arc>>, @@ -91,6 +95,13 @@ impl WhisperCore { let current_session = Arc::new(std::sync::atomic::AtomicU64::new(0)); let auto_stop_silence_secs = Arc::new(Mutex::new(10.0)); let max_record_secs = Arc::new(Mutex::new(600.0)); + let llm_settings = Arc::new(Mutex::new(LlmPostProcessSettings::default())); + let llm_processor = Arc::new(LlmPostProcessor::new()); + let postprocess_engine = postprocess::PostProcessEngine::new( + llm_settings.clone(), + llm_processor.clone(), + state.clone(), + ); #[cfg(target_os = "macos")] let front_app_before_paste = Arc::new(Mutex::new(None)); @@ -117,6 +128,8 @@ impl WhisperCore { dictionary_entries.clone(), auto_stop_silence_secs.clone(), max_record_secs.clone(), + postprocess_engine.clone(), + state.clone(), ); let out = output::OutputBehavior::new( behavior.clone(), @@ -134,6 +147,7 @@ impl WhisperCore { processing_thread, current_model_path, preferred_output_device, + llm_settings, #[cfg(target_os = "macos")] front_app_before_paste, audio, @@ -209,6 +223,10 @@ impl WhisperCore { self.log("[Warning] Already processing"); SimpleRecState::Processing } + SimpleRecState::PostProcessing => { + self.log("[Warning] Already processing"); + SimpleRecState::PostProcessing + } SimpleRecState::Busy => { self.log("[Warning] State busy"); SimpleRecState::Busy @@ -270,6 +288,11 @@ impl WhisperCore { self.trans.set_auto_stop_params(silence_secs, max_secs); } + pub fn set_llm_postprocess_settings(&self, settings: LlmPostProcessSettings) { + *self.llm_settings.lock().unwrap() = settings.clone(); + self.trans.set_llm_settings(settings); + } + // Behavior options reflected from GUI settings pub fn set_behavior_options(&self, use_clipboard: bool, auto_paste: bool) { self.out.set_behavior_options(use_clipboard, auto_paste); diff --git a/src/core/postprocess.rs b/src/core/postprocess.rs new file mode 100644 index 0000000..a3d862f --- /dev/null +++ b/src/core/postprocess.rs @@ -0,0 +1,209 @@ +use crate::core::{LogCallback, SimpleRecState}; +use crate::llm::{ + history_file_path, record_history, LlmPostProcessSettings, LlmPostProcessor, + MAX_HISTORY_ENTRIES, +}; +use similar::{ChangeTag, TextDiff}; +use std::sync::{Arc, Mutex}; + +pub struct PostProcessResult { + pub final_text: String, + pub llm_latency_secs: f32, +} + +#[derive(Clone)] +pub struct PostProcessEngine { + settings: Arc>, + processor: Arc, + state: Arc>, +} + +impl PostProcessEngine { + pub fn new( + settings: Arc>, + processor: Arc, + state: Arc>, + ) -> Self { + Self { + settings, + processor, + state, + } + } + + pub fn set_settings(&self, settings: LlmPostProcessSettings) { + *self.settings.lock().unwrap() = settings; + } + + pub fn process( + &self, + base_text: &str, + dictionary_hint: Option<&str>, + language_hint: Option<&str>, + log: &Arc>>, + ) -> PostProcessResult { + let snapshot = self.settings.lock().unwrap().clone(); + if !snapshot.enabled { + return PostProcessResult { + final_text: base_text.to_string(), + llm_latency_secs: 0.0, + }; + } + + log_message(log, &format!("[llm][input] {}", base_text)); + log_message( + log, + &format!( + "[llm] Processing via {} (model: {}).", + snapshot.effective_base_url(), + snapshot.model + ), + ); + + if let Ok(mut state) = self.state.lock() { + *state = SimpleRecState::PostProcessing; + } + + let mut final_text = base_text.to_string(); + let mut llm_latency_secs = 0.0f32; + let mut llm_output_for_diff: Option = None; + + let mut history_payload: Option<(String, bool, u128)> = None; + match self + .processor + .process(&snapshot, base_text, dictionary_hint, language_hint) + { + Ok(outcome) => { + let content = outcome.content; + let truncated_input = outcome.truncated_input; + let latency_ms = outcome.latency_ms; + history_payload = Some((content.clone(), truncated_input, latency_ms)); + if outcome.truncated_input { + log_message( + log, + &format!( + "[llm] Input truncated to {} chars.", + snapshot.max_input_chars + ), + ); + } + llm_latency_secs = latency_ms as f32 / 1000.0; + log_message( + log, + &format!("[llm] Completed in {:.2}s.", llm_latency_secs), + ); + log_message(log, &format!("[llm][output] {}", content)); + llm_output_for_diff = Some(content.clone()); + if snapshot.apply_to_autopaste { + final_text = content.clone(); + } else { + log_message( + log, + "[llm] Auto paste uses Whisper text (setting disabled).", + ); + } + } + Err(err) => { + let mut message = err.message; + if let Some(status) = err.status { + message = format!("{} (status {})", message, status); + } + if let Some(wait) = err.retry_after_secs { + message = format!("{} (retry after {}s)", message, wait); + } + log_message(log, &format!("[llm][error] {}", message)); + log_message(log, "[llm] Falling back to Whisper text."); + } + } + + if let Some((llm_output, truncated_input, latency_ms)) = history_payload { + match record_history( + base_text, + &llm_output, + truncated_input, + latency_ms, + &snapshot, + ) { + Ok(outcome) => { + log_message( + log, + &format!( + "[llm][history] Saved entry {}/{} → {}", + outcome.total_entries, + MAX_HISTORY_ENTRIES, + history_file_path().display() + ), + ); + } + Err(err) => { + log_message( + log, + &format!("[Warning] Failed to persist LLM history: {}", err), + ); + } + } + } + + if let Some(output_text) = llm_output_for_diff.as_ref() { + if output_text == base_text { + log_message(log, "[llm] Output identical to Whisper text."); + } else { + let diff = TextDiff::from_lines(base_text, output_text); + let mut diff_lines = Vec::new(); + let mut truncated = false; + for change in diff.iter_all_changes() { + let prefix = match change.tag() { + ChangeTag::Delete => "-", + ChangeTag::Insert => "+", + ChangeTag::Equal => continue, + }; + if diff_lines.len() >= 120 { + truncated = true; + break; + } + let mut line = change.value().trim_end_matches('\n').to_string(); + if line.chars().count() > 200 { + let mut truncated = String::with_capacity(201); + truncated.extend(line.chars().take(200)); + truncated.push_str("…"); + line = truncated; + } + diff_lines.push(format!("{}{}", prefix, line)); + } + + if diff_lines.is_empty() { + log_message(log, "[llm] Output identical to Whisper text."); + } else { + if truncated { + diff_lines.push("... (diff truncated)".to_string()); + } + log_message(log, &format!("[llm][diff]\n{}", diff_lines.join("\n"))); + } + } + } + + if let Ok(mut state) = self.state.lock() { + *state = SimpleRecState::Processing; + } + + PostProcessResult { + final_text, + llm_latency_secs, + } + } +} + +fn log_message(log_callback: &Arc>>, message: &str) { + if let Some(ref callback) = *log_callback.lock().unwrap() { + callback(message); + } + if let Some(rest) = message.strip_prefix("[Error]") { + tracing::error!("{}", rest.trim()); + } else if let Some(rest) = message.strip_prefix("[Warning]") { + tracing::warn!("{}", rest.trim()); + } else if let Some(rest) = message.strip_prefix("[Info]") { + tracing::info!("{}", rest.trim()); + } else { + tracing::info!("{}", message); + } +} diff --git a/src/core/transcriber.rs b/src/core/transcriber.rs index 642a708..8e564e9 100644 --- a/src/core/transcriber.rs +++ b/src/core/transcriber.rs @@ -4,10 +4,15 @@ use std::time::Instant; use whisper_rs::WhisperContext; +use super::{ + postprocess::{PostProcessEngine, PostProcessResult}, + SimpleRecState, +}; use crate::app::chunk_processor::ChunkProcessor; use crate::audio::VadStrategy; use crate::core::LogCallback; use crate::dictionary::{apply_pairs, flatten_sorted_with_context, DictionaryEntry}; +use crate::llm::LlmPostProcessSettings; use crate::transcription::WhisperOptimizationParams; #[derive(Clone)] @@ -26,6 +31,8 @@ pub struct Transcriber { pub auto_stop_silence_secs: Arc>, // 0 disables pub max_record_secs: Arc>, // 0 disables + pub postprocess: PostProcessEngine, + pub state: Arc>, } impl Transcriber { @@ -42,6 +49,8 @@ impl Transcriber { dictionary_entries: Arc>>, auto_stop_silence_secs: Arc>, max_record_secs: Arc>, + postprocess: PostProcessEngine, + state: Arc>, ) -> Self { Self { ctx, @@ -55,6 +64,8 @@ impl Transcriber { dictionary_entries, auto_stop_silence_secs, max_record_secs, + postprocess, + state, } } @@ -277,7 +288,13 @@ impl Transcriber { Self::log_with_callback(log, &format!("[Whisper] Combined result: {}", full_text)); // Dictionary - let corrected_text = self.apply_dictionary_to_text(&full_text); + let dictionary_snapshot = self.dictionary_entries.lock().unwrap().clone(); + let pairs = flatten_sorted_with_context(&dictionary_snapshot, &full_text); + let corrected_text = if pairs.is_empty() { + full_text.clone() + } else { + apply_pairs(&full_text, &pairs) + }; if corrected_text != full_text { Self::log_with_callback( log, @@ -286,8 +303,22 @@ impl Transcriber { } else { Self::log_with_callback(log, "[Dictionary] No change (no matches)"); } + let dictionary_prompt = Self::dictionary_prompt_text(&dictionary_snapshot); + + let language_setting = self.language.lock().unwrap().clone(); + let language_hint = language_setting.as_deref(); + let PostProcessResult { + final_text, + llm_latency_secs, + } = self.postprocess.process( + &corrected_text, + dictionary_prompt.as_deref(), + language_hint, + log, + ); - output.apply_output(&corrected_text); + output.apply_output(&final_text); + crate::utils::sound::stop_loop("processing"); // Performance info let recording_duration = { @@ -306,7 +337,13 @@ impl Transcriber { log, &format!(" 🔄 Whisper processing: {:.2}s", whisper_processing_time), ); - let total = whisper_processing_time + 0.0; + if llm_latency_secs > 0.0 { + Self::log_with_callback( + log, + &format!(" 🤖 LLM processing: {:.2}s", llm_latency_secs), + ); + } + let total = whisper_processing_time + llm_latency_secs; Self::log_with_callback(log, &format!(" ⏱️ Total processing time: {:.2}s", total)); if recording_duration > 0.0 { Self::log_with_callback( @@ -319,15 +356,41 @@ impl Transcriber { } else { Self::log_with_callback(log, " ⚡ RTF (Real Time Factor): N/A\n"); } + + if let Ok(mut state) = self.state.lock() { + *state = SimpleRecState::Idle; + } } - fn apply_dictionary_to_text(&self, text: &str) -> String { - let entries = self.dictionary_entries.lock().unwrap(); - if entries.is_empty() { - return text.to_string(); + fn dictionary_prompt_text(entries: &[DictionaryEntry]) -> Option { + const MAX_LINES: usize = 40; + let mut lines = Vec::new(); + for entry in entries { + if entry.aliases.is_empty() { + continue; + } + let mut line = format!("- {}: {}", entry.canonical, entry.aliases.join(", ")); + if !entry.include.is_empty() { + line.push_str(" (context: "); + line.push_str(&entry.include.join(", ")); + line.push(')'); + } + lines.push(line); + if lines.len() >= MAX_LINES { + break; + } } - let pairs = flatten_sorted_with_context(&entries, text); - apply_pairs(text, &pairs) + if lines.is_empty() { + None + } else { + let mut prompt = String::from("User dictionary replacements:\n"); + prompt.push_str(&lines.join("\n")); + Some(prompt) + } + } + + pub fn set_llm_settings(&self, settings: LlmPostProcessSettings) { + self.postprocess.set_settings(settings); } fn log_with_callback(log_callback: &Arc>>, message: &str) { diff --git a/src/gui/app.rs b/src/gui/app.rs index fa87420..c37c16f 100644 --- a/src/gui/app.rs +++ b/src/gui/app.rs @@ -13,6 +13,7 @@ use crate::audio::VadStrategy; use crate::core::{SimpleRecState, WhisperCore}; use crate::hotkey::HotkeyManager; use crate::i18n; +use crate::llm::LlmPostProcessSettings; use crate::utils::app_config_dir; use egui::FontFamily; use lucide_icons::Icon; @@ -29,6 +30,8 @@ enum TabView { Devices, SpeechModel, Dictionary, + Llm, + History, Logs, } @@ -51,6 +54,7 @@ pub struct WhisperApp { main_minimized_by_app: bool, // Keep global hotkey manager alive for app lifetime hotkey_manager: Option, + llm_was_enabled: bool, } #[derive(Clone, Debug)] @@ -71,6 +75,7 @@ struct LiveSettingsSnapshot { max_record_secs: f32, sound_enabled: bool, sound_volume_percent: f32, + llm_postprocess: LlmPostProcessSettings, } // File I/O helpers moved to utils::logfile @@ -98,6 +103,7 @@ impl WhisperApp { max_record_secs: s0.max_record_secs, sound_enabled: s0.sound_enabled, sound_volume_percent: s0.sound_volume_percent, + llm_postprocess: s0.llm_postprocess.clone(), })); crate::utils::sound::set_enabled(s0.sound_enabled); crate::utils::sound::set_volume_percent(s0.sound_volume_percent); @@ -128,6 +134,7 @@ impl WhisperApp { if !settings_window.dict_entries.is_empty() { core.set_dictionary_entries(settings_window.dict_entries.clone()); } + core.set_llm_postprocess_settings(s0.llm_postprocess.clone()); // Settings UI log integration not required let settings_requested = Arc::new(std::sync::atomic::AtomicBool::new(false)); @@ -155,6 +162,7 @@ impl WhisperApp { main_hidden_by_app: false, main_minimized_by_app: false, hotkey_manager: None, + llm_was_enabled: s0.llm_postprocess.enabled, }; // removed: system tray @@ -223,6 +231,7 @@ impl WhisperApp { core_for_hotkey.set_chunk_split_strategy(s.chunk_split_strategy); core_for_hotkey .set_auto_stop_params(s.auto_stop_silence_secs, s.max_record_secs); + core_for_hotkey.set_llm_postprocess_settings(s.llm_postprocess.clone()); } core_for_hotkey.toggle_recording(); }) { @@ -241,7 +250,6 @@ impl WhisperApp { } } } - // SIGUSR1/SIGUSR2 signal handling (Linux/macOS only) #[cfg(unix)] { @@ -290,6 +298,7 @@ impl WhisperApp { core_for_signal.set_chunk_split_strategy(s.chunk_split_strategy); core_for_signal .set_auto_stop_params(s.auto_stop_silence_secs, s.max_record_secs); + core_for_signal.set_llm_postprocess_settings(s.llm_postprocess.clone()); } core_for_signal.toggle_recording(); } else if sig == SIGUSR2 { @@ -338,6 +347,8 @@ impl WhisperApp { // 自動停止 self.core .set_auto_stop_params(s.auto_stop_silence_secs, s.max_record_secs); + self.core + .set_llm_postprocess_settings(s.llm_postprocess.clone()); } pub fn show_floating_window(&mut self) { @@ -408,6 +419,7 @@ impl eframe::App for WhisperApp { // Each frame, reflect latest UI settings to a snapshot { let s = self.settings_window.get_settings(); + let llm_enabled_now = s.llm_postprocess.enabled; if let Ok(mut snap) = self.live_settings.lock() { snap.whisper_language = s.whisper_language.clone(); snap.input_device = s.input_device.clone(); @@ -425,7 +437,12 @@ impl eframe::App for WhisperApp { snap.max_record_secs = s.max_record_secs; snap.sound_enabled = s.sound_enabled; snap.sound_volume_percent = s.sound_volume_percent; + snap.llm_postprocess = s.llm_postprocess.clone(); + } + if llm_enabled_now && !self.llm_was_enabled { + self.add_log("LLM post-processing enabled."); } + self.llm_was_enabled = llm_enabled_now; } // system tray removed @@ -445,7 +462,10 @@ impl eframe::App for WhisperApp { // 待機中は10秒ごとに更新 ctx.request_repaint_after(std::time::Duration::from_secs(10)); } - SimpleRecState::Recording | SimpleRecState::Processing | SimpleRecState::Busy => { + SimpleRecState::Recording + | SimpleRecState::Processing + | SimpleRecState::PostProcessing + | SimpleRecState::Busy => { // アクティブな処理中は1秒ごとに更新 ctx.request_repaint_after(std::time::Duration::from_secs(1)); } @@ -502,6 +522,15 @@ impl eframe::App for WhisperApp { Icon::Loader.unicode(), ) } + SimpleRecState::PostProcessing => { + ctx.request_repaint_after(std::time::Duration::from_millis(500)); + ( + i18n::tr("status-post-processing"), + egui::Color32::from_rgb(75, 154, 242), + egui::Color32::WHITE, + Icon::Loader.unicode(), + ) + } SimpleRecState::Busy => { ctx.request_repaint_after(std::time::Duration::from_millis(500)); ( @@ -556,6 +585,12 @@ impl eframe::App for WhisperApp { self.add_log("[Record] Stopped recording; started processing"); i18n::tr("msg-processing") } + SimpleRecState::PostProcessing => { + self.add_log( + "[Record] Stopped recording; running post-processing", + ); + i18n::tr("status-post-processing") + } SimpleRecState::Idle => { self.add_log("[Record] Recording stopped"); i18n::tr("msg-recording-stopped") @@ -632,6 +667,16 @@ impl eframe::App for WhisperApp { add_tab(Icon::Library, &label, TabView::Dictionary, ui); } ui.add_space(6.0); + { + let label = i18n::tr("tab-llm"); + add_tab(Icon::Wand, &label, TabView::Llm, ui); + } + ui.add_space(6.0); + { + let label = i18n::tr("tab-history"); + add_tab(Icon::Clock, &label, TabView::History, ui); + } + ui.add_space(6.0); { let label = i18n::tr("tab-logs"); add_tab(Icon::FileText, &label, TabView::Logs, ui); @@ -676,6 +721,12 @@ impl eframe::App for WhisperApp { TabView::Dictionary => { self.settings_window.ui_dictionary_section(ui); } + TabView::Llm => { + self.settings_window.ui_section_llm(ui); + } + TabView::History => { + self.settings_window.ui_section_history(ui); + } TabView::Logs => { // ログビュー ui.horizontal(|ui| { @@ -731,7 +782,9 @@ impl eframe::App for WhisperApp { egui::Color32::from_rgb(100, 200, 255) } else if log.contains("[Process]") { egui::Color32::from_rgb(255, 255, 100) - } else if log.contains("[Whisper]") { + } else if log.contains("[Whisper]") + || log.contains("[llm]") + { egui::Color32::from_rgb(200, 150, 255) } else if log.contains("[Startup]") || log.contains("[Tray]") @@ -762,6 +815,7 @@ impl eframe::App for WhisperApp { let s = self.settings_window.get_settings(); // Clipboard always enabled; toggle only auto-paste self.core.set_behavior_options(true, s.auto_paste); + let llm_settings_snapshot = s.llm_postprocess.clone(); // Apply Whisper language (auto: None) let lang_opt = if s.whisper_language == "auto" { None @@ -806,6 +860,9 @@ impl eframe::App for WhisperApp { if let Some(entries) = self.settings_window.take_dictionary_to_apply() { self.core.set_dictionary_entries(entries); } + + self.core + .set_llm_postprocess_settings(llm_settings_snapshot); } // Drain SettingsWindow-generated update logs into Debug Log tab diff --git a/src/gui/floating.rs b/src/gui/floating.rs index b7c104b..9138c8d 100644 --- a/src/gui/floating.rs +++ b/src/gui/floating.rs @@ -122,6 +122,7 @@ impl FloatingWindow { SimpleRecState::Idle => Icon::Pause, SimpleRecState::Recording => Icon::Mic, SimpleRecState::Processing => Icon::Loader, + SimpleRecState::PostProcessing => Icon::Loader, SimpleRecState::Busy => Icon::Loader, } .unicode(); @@ -130,6 +131,9 @@ impl FloatingWindow { SimpleRecState::Idle => egui::Color32::from_rgb(40, 167, 69), // green SimpleRecState::Recording => egui::Color32::from_rgb(220, 53, 69), // red SimpleRecState::Processing => egui::Color32::from_rgb(255, 193, 7), // yellow + SimpleRecState::PostProcessing => { + egui::Color32::from_rgb(75, 154, 242) + } // blue SimpleRecState::Busy => egui::Color32::from_rgb(108, 117, 125), // gray }; @@ -146,6 +150,7 @@ impl FloatingWindow { .clicked(); if rec_clicked && state != SimpleRecState::Processing + && state != SimpleRecState::PostProcessing && state != SimpleRecState::Busy { self.core.toggle_recording(); diff --git a/src/gui/settings.rs b/src/gui/settings.rs index ac9be8a..b6b579b 100644 --- a/src/gui/settings.rs +++ b/src/gui/settings.rs @@ -3,15 +3,21 @@ use serde::{Deserialize, Serialize}; use std::path::PathBuf; // ProjectDirs and utility imports moved to submodules use crate::i18n; +use crate::llm::{ + builtin_prompt_preview, history_modified_time, load_history_entries, ConnectionTestOutcome, + LlmHistoryEntry, LlmModelInfo, LlmPostProcessSettings, LlmPostProcessor, PostProcessOutcome, + DEFAULT_LOCAL_BASE_URL, MODE_ID_CUSTOM_DRAFT, PRESET_ID_FORMAT, PRESET_ID_SUMMARY, +}; use crate::transcription::SUPPORTED_MODELS; use crate::utils::update::{releases_latest_url, spawn_check_update, AvailableUpdate, UpdateState}; -use crate::utils::{open::open_url, reveal_in_file_manager, update}; -use std::sync::{Arc, Mutex}; +use crate::utils::{open::open_url, update}; +use std::sync::{mpsc, Arc, Mutex}; // (kept above) use std::sync::atomic::{AtomicBool, Ordering}; use crate::audio::VadStrategy; use std::sync::atomic::AtomicBool; // device trait usage moved to submodules -use std::time::Instant; +use chrono::Local; +use std::time::{Instant, SystemTime}; // moved audio test helpers into submodule; keep imports local there // removed: correction feature @@ -33,6 +39,33 @@ const THIRD_PARTY_LICENSES_MD: &str = include_str!(concat!( "/assets/THIRD_PARTY_LICENSES.md" )); +#[derive(Clone, Debug)] +struct LlmModelOption { + id: String, + label: String, +} + +#[derive(Default)] +struct LlmPromptTestState { + open: bool, + in_progress: bool, + selected_entry: Option, + output: Option, + latency_ms: Option, + truncated_input: bool, + error: Option, +} + +fn is_builtin_mode_id(id: &str) -> bool { + matches!(id, PRESET_ID_FORMAT | PRESET_ID_SUMMARY) +} + +enum LlmUiMessage { + TestResult(Result), + ModelList(Result, String>), + PromptTest(Result), +} + // Legacy tab enum removed (unused) #[derive(Debug, Clone, Serialize, Deserialize, PartialEq)] @@ -53,6 +86,7 @@ pub struct Settings { pub use_clipboard: bool, pub floating_opacity: f32, pub floating_always_on_top: bool, + pub llm_postprocess: LlmPostProcessSettings, // Last floating window position (screen coords after OS scale) pub floating_position: Option<[f32; 2]>, pub whisper_no_timestamps: bool, @@ -92,6 +126,7 @@ impl Default for Settings { use_clipboard: true, floating_opacity: 1.0, floating_always_on_top: true, + llm_postprocess: LlmPostProcessSettings::default(), floating_position: None, whisper_no_timestamps: true, whisper_token_timestamps: false, @@ -164,6 +199,22 @@ pub struct SettingsWindow { pub(crate) dict_editor_includes: Vec, // Dictionary list search filter pub(crate) dict_filter_text: String, + // LLM post-processing UI state + llm_model_options: Vec, + llm_fetching_models: bool, + llm_fetch_error: Option, + llm_test_in_progress: bool, + llm_test_message: Option, + llm_test_error: Option, + llm_async_tx: mpsc::Sender, + llm_async_rx: mpsc::Receiver, + llm_custom_error: Option, + llm_mode_loaded_id: Option, + llm_prompt_test: LlmPromptTestState, + llm_history_entries: Vec, + llm_history_error: Option, + llm_history_last_modified: Option, + llm_history_selected: Option, // Update check state (GitHub Releases) update_state: Arc>, update_downloading: Arc>, @@ -177,6 +228,7 @@ pub struct SettingsWindow { impl SettingsWindow { pub fn new() -> Self { let settings = Self::load_settings().unwrap_or_default(); + let (llm_async_tx, llm_async_rx) = mpsc::channel(); let mut this = Self { hotkey_input: settings.hotkey_recording.clone(), original_settings: settings.clone(), @@ -219,6 +271,21 @@ impl SettingsWindow { dict_editor_aliases: Vec::new(), dict_editor_includes: Vec::new(), dict_filter_text: String::new(), + llm_model_options: Vec::new(), + llm_fetching_models: false, + llm_fetch_error: None, + llm_test_in_progress: false, + llm_test_message: None, + llm_test_error: None, + llm_async_tx, + llm_async_rx, + llm_custom_error: None, + llm_mode_loaded_id: None, + llm_prompt_test: LlmPromptTestState::default(), + llm_history_entries: Vec::new(), + llm_history_error: None, + llm_history_last_modified: None, + llm_history_selected: None, update_state: Arc::new(Mutex::new(UpdateState::Checking)), update_downloading: Arc::new(Mutex::new(false)), update_progress: Arc::new(Mutex::new(None)), @@ -259,13 +326,170 @@ impl SettingsWindow { this.dict_dirty = false; } } + this.settings.llm_postprocess.ensure_mode_valid(); + this.sync_llm_custom_editor(); // Initialize UI language (switch Fluent based on the setting) crate::i18n::set_ui_language_preference(&this.settings.ui_language); // Kick off one-shot update check (background) spawn_check_update(this.update_state.clone(), Some(this.update_logs.clone())); + this.reload_llm_history(); this } + fn poll_llm_messages(&mut self) { + loop { + match self.llm_async_rx.try_recv() { + Ok(LlmUiMessage::TestResult(result)) => { + self.llm_test_in_progress = false; + match result { + Ok(outcome) => { + let seconds = outcome.duration_ms as f32 / 1000.0; + self.llm_test_message = Some(format!( + "{} (status: {}, {:.2}s)", + outcome.message, + outcome + .status + .map(|s| s.to_string()) + .unwrap_or_else(|| "n/a".to_string()), + seconds + )); + self.llm_test_error = None; + } + Err(err) => { + self.llm_test_error = Some(err); + self.llm_test_message = None; + } + } + } + Ok(LlmUiMessage::ModelList(result)) => { + self.llm_fetching_models = false; + match result { + Ok(models) => { + self.llm_model_options = models + .into_iter() + .map(|info| LlmModelOption { + id: info.id, + label: info.label, + }) + .collect(); + self.llm_fetch_error = None; + } + Err(err) => { + self.llm_fetch_error = Some(err); + } + } + } + Ok(LlmUiMessage::PromptTest(result)) => { + self.llm_prompt_test.in_progress = false; + match result { + Ok(outcome) => { + self.llm_prompt_test.output = Some(outcome.content); + self.llm_prompt_test.latency_ms = Some(outcome.latency_ms); + self.llm_prompt_test.truncated_input = outcome.truncated_input; + self.llm_prompt_test.error = None; + } + Err(err) => { + self.llm_prompt_test.error = Some(err); + self.llm_prompt_test.output = None; + self.llm_prompt_test.latency_ms = None; + self.llm_prompt_test.truncated_input = false; + } + } + } + Err(std::sync::mpsc::TryRecvError::Empty) => break, + Err(std::sync::mpsc::TryRecvError::Disconnected) => break, + } + } + } + + fn reload_llm_history(&mut self) { + match load_history_entries() { + Ok(entries) => { + self.llm_history_entries = entries; + self.llm_history_error = None; + } + Err(err) => { + self.llm_history_entries.clear(); + self.llm_history_error = Some(format!( + "{}: {}", + i18n::tr("msg-llm-history-load-failed"), + err + )); + } + } + self.llm_history_last_modified = history_modified_time(); + let len = self.llm_history_entries.len(); + if len == 0 { + self.llm_history_selected = None; + self.llm_prompt_test.selected_entry = None; + } else if let Some(sel) = self.llm_history_selected { + if sel >= len { + self.llm_history_selected = Some(len - 1); + } + if let Some(test_sel) = self.llm_prompt_test.selected_entry { + if test_sel >= len { + self.llm_prompt_test.selected_entry = Some(len - 1); + } + } else { + self.llm_prompt_test.selected_entry = Some(len - 1); + } + } else { + self.llm_history_selected = Some(len - 1); + if self.llm_prompt_test.selected_entry.is_none() { + self.llm_prompt_test.selected_entry = Some(len - 1); + } + } + } + + fn refresh_llm_history_if_needed(&mut self) { + let current = history_modified_time(); + let should_reload = match (self.llm_history_last_modified, current) { + (None, None) => self.llm_history_entries.is_empty() || self.llm_history_error.is_some(), + (None, Some(_)) => true, + (Some(_), None) => true, + (Some(prev), Some(cur)) => prev != cur, + }; + if should_reload { + self.reload_llm_history(); + } + } + + fn format_history_timestamp(ts: &str) -> String { + chrono::DateTime::parse_from_rfc3339(ts) + .map(|dt| { + dt.with_timezone(&Local) + .format("%Y-%m-%d %H:%M:%S") + .to_string() + }) + .unwrap_or_else(|_| ts.to_string()) + } + + fn format_history_language(lang: &Option) -> String { + match lang { + Some(code) if !code.trim().is_empty() => code.clone(), + _ => i18n::tr("label-llm-history-language-auto"), + } + } + + fn history_entry_preview(entry: &LlmHistoryEntry) -> String { + let timestamp = Self::format_history_timestamp(&entry.timestamp); + let mut preview: String = entry.transcript.chars().take(32).collect(); + preview = preview.replace('\n', " "); + let trimmed = preview.trim(); + if trimmed.is_empty() { + timestamp + } else { + format!("{} • {}", timestamp, trimmed) + } + } + + fn clear_llm_prompt_test_result(&mut self) { + self.llm_prompt_test.output = None; + self.llm_prompt_test.error = None; + self.llm_prompt_test.latency_ms = None; + self.llm_prompt_test.truncated_input = false; + } + // Persist flag when mic preflight succeeds #[cfg(target_os = "macos")] pub fn mark_mic_preflight_done(&mut self) { @@ -1066,6 +1290,936 @@ impl SettingsWindow { } } + pub fn ui_section_llm(&mut self, ui: &mut egui::Ui) { + let heading = egui::RichText::new(i18n::tr("heading-llm")) + .color(ui.visuals().strong_text_color()) + .strong(); + ui.heading(heading); + ui.add_space(5.0); + self.poll_llm_messages(); + + egui::Frame::default() + .fill(ui.visuals().faint_bg_color) + .corner_radius(egui::CornerRadius::same(6)) + .inner_margin(egui::Margin::symmetric(16, 12)) + .show(ui, |ui| { + ui.set_min_width(ui.available_width()); + ui.add(egui::Label::new(i18n::tr("llm-description")).wrap()); + ui.add_space(8.0); + if ui.link(i18n::tr("link-llm-doc")).clicked() { + open_url("https://hootvoice.com/llm-postprocess.html"); + } + ui.add_space(8.0); + + let mut enabled = self.settings.llm_postprocess.enabled; + if ui + .checkbox(&mut enabled, i18n::tr("label-llm-enable")) + .changed() + { + self.settings.llm_postprocess.enabled = enabled; + self.check_changes(); + } + + if self.settings.llm_postprocess.enabled { + self.ui_llm_details(ui); + } + ui.add_space(10.0); + }); + } + + fn ui_llm_details(&mut self, ui: &mut egui::Ui) { + ui.add_space(8.0); + + let mut base = self.settings.llm_postprocess.api_base_url.clone(); + let mut base_changed = false; + ui.horizontal(|ui| { + ui.label(i18n::tr("label-llm-api-base")); + if ui + .add( + egui::TextEdit::singleline(&mut base) + .desired_width(260.0) + .hint_text(i18n::tr("placeholder-llm-api-base")), + ) + .changed() + { + base_changed = true; + } + }); + if base_changed { + if base.trim().is_empty() { + self.settings.llm_postprocess.api_base_url = DEFAULT_LOCAL_BASE_URL.to_string(); + } else { + self.settings.llm_postprocess.api_base_url = base; + } + self.check_changes(); + } + ui.small(i18n::tr("note-llm-api-base-local")); + if let Some(msg) = &self.llm_test_message { + ui.colored_label(egui::Color32::LIGHT_GREEN, msg); + } + if let Some(err) = &self.llm_test_error { + ui.colored_label(egui::Color32::YELLOW, err); + } + + ui.add_space(6.0); + let mut model = self.settings.llm_postprocess.model.clone(); + let mut model_changed = false; + ui.horizontal(|ui| { + ui.label(i18n::tr("label-llm-model")); + if ui + .add( + egui::TextEdit::singleline(&mut model) + .desired_width(200.0) + .hint_text(i18n::tr("placeholder-llm-model")), + ) + .changed() + { + model_changed = true; + } + if self.llm_fetching_models { + ui.spinner(); + } else if !self.llm_model_options.is_empty() { + egui::ComboBox::from_id_salt("llm_model_candidates") + .selected_text(i18n::tr("llm-model-dropdown-placeholder")) + .show_ui(ui, |ui| { + for opt in &self.llm_model_options { + if ui.selectable_label(false, &opt.label).clicked() { + model = opt.id.clone(); + model_changed = true; + } + } + }); + } + if ui + .button(i18n::tr("btn-llm-fetch-models")) + .on_hover_text(i18n::tr("tooltip-llm-fetch-models")) + .clicked() + { + self.request_llm_model_list(); + } + ui.add_space(6.0); + if ui + .add_enabled( + !self.llm_test_in_progress, + egui::Button::new(i18n::tr("btn-llm-test-connection")), + ) + .clicked() + { + self.request_llm_connection_test(); + } + if self.llm_test_in_progress { + ui.spinner(); + } + }); + if let Some(err) = &self.llm_fetch_error { + ui.colored_label(egui::Color32::YELLOW, err); + } else if self.llm_model_options.is_empty() && !self.llm_fetching_models { + ui.small(i18n::tr("msg-llm-fetch-empty")); + } + if model_changed { + self.settings.llm_postprocess.model = model; + self.check_changes(); + } + + ui.add_space(6.0); + let language_hint = self.llm_language_hint(); + let mut mode_id = self.settings.llm_postprocess.mode_id.clone(); + let original_mode_id = mode_id.clone(); + let mut mode_changed = false; + let mut new_custom_requested = false; + let mut mode_options = vec![ + (PRESET_ID_FORMAT.to_string(), i18n::tr("llm-mode-format")), + (PRESET_ID_SUMMARY.to_string(), i18n::tr("llm-mode-summary")), + ]; + for custom in &self.settings.llm_postprocess.custom_prompts { + mode_options.push((custom.id.clone(), custom.name.clone())); + } + mode_options.push(( + MODE_ID_CUSTOM_DRAFT.to_string(), + i18n::tr("llm-mode-custom-add"), + )); + + let current_label = if mode_id == MODE_ID_CUSTOM_DRAFT { + i18n::tr("llm-mode-custom-draft") + } else { + mode_options + .iter() + .find(|(id, _)| id == &mode_id) + .map(|(_, label)| label.clone()) + .unwrap_or_else(|| mode_id.clone()) + }; + + ui.horizontal(|ui| { + ui.label(i18n::tr("label-llm-mode")); + egui::ComboBox::from_id_salt("llm_mode_combo") + .selected_text(current_label) + .show_ui(ui, |ui| { + for (id, label) in &mode_options { + let selected = mode_id == *id; + if ui.selectable_label(selected, label).clicked() { + if mode_id != *id { + mode_id = id.clone(); + mode_changed = true; + if id == MODE_ID_CUSTOM_DRAFT { + new_custom_requested = true; + } + } + } + } + }); + if self.settings.llm_postprocess.custom_prompts.is_empty() { + ui.label( + egui::RichText::new(i18n::tr("llm-mode-custom-empty-hint")) + .italics() + .color(ui.visuals().weak_text_color()), + ); + } + }); + + if mode_changed { + self.llm_custom_error = None; + self.llm_mode_loaded_id = None; + self.settings.llm_postprocess.mode_id = mode_id.clone(); + if mode_id == MODE_ID_CUSTOM_DRAFT + && (new_custom_requested || original_mode_id != MODE_ID_CUSTOM_DRAFT) + { + self.settings + .llm_postprocess + .begin_custom_draft(language_hint.as_deref()); + } else { + self.settings.llm_postprocess.ensure_mode_valid(); + } + self.sync_llm_custom_editor(); + self.check_changes(); + } else { + self.sync_llm_custom_editor(); + } + + let is_custom_mode = !is_builtin_mode_id(&mode_id); + if !is_custom_mode { + let locales = self + .settings + .llm_postprocess + .locale_priority(language_hint.as_deref()); + if let Some((system_preview, user_preview)) = builtin_prompt_preview(&mode_id, &locales) + { + ui.add_space(4.0); + ui.label(i18n::tr("label-llm-system")); + let mut system_display = system_preview.clone(); + ui.add( + egui::TextEdit::multiline(&mut system_display) + .desired_rows(3) + .desired_width(f32::INFINITY) + .interactive(false), + ); + ui.add_space(4.0); + ui.label(i18n::tr("label-llm-user")); + let mut user_display = user_preview.clone(); + ui.add( + egui::TextEdit::multiline(&mut user_display) + .desired_rows(6) + .desired_width(f32::INFINITY) + .interactive(false), + ); + } + } + + if is_custom_mode { + ui.add_space(4.0); + if let Some(err) = &self.llm_custom_error { + ui.colored_label(egui::Color32::from_rgb(220, 80, 80), err); + ui.add_space(4.0); + } + + let mut name_changed = false; + ui.horizontal(|ui| { + ui.label(i18n::tr("label-llm-custom-name")); + if ui + .text_edit_singleline(&mut self.settings.llm_postprocess.custom_prompt_name) + .changed() + { + name_changed = true; + } + }); + ui.add_space(4.0); + ui.label(i18n::tr("label-llm-system")); + let system_changed = ui + .add( + egui::TextEdit::multiline( + &mut self.settings.llm_postprocess.custom_prompt_system, + ) + .desired_rows(3) + .desired_width(f32::INFINITY), + ) + .changed(); + ui.add_space(4.0); + ui.label(i18n::tr("label-llm-user")); + let user_changed = ui + .add( + egui::TextEdit::multiline(&mut self.settings.llm_postprocess.custom_prompt) + .desired_rows(6) + .desired_width(f32::INFINITY) + .hint_text(i18n::tr("placeholder-llm-custom-prompt")), + ) + .changed(); + + if name_changed || system_changed || user_changed { + self.check_changes(); + if mode_id != MODE_ID_CUSTOM_DRAFT { + let name_trimmed = self + .settings + .llm_postprocess + .custom_prompt_name + .trim() + .to_string(); + let system_trimmed = self + .settings + .llm_postprocess + .custom_prompt_system + .trim() + .to_string(); + let user_trimmed = self + .settings + .llm_postprocess + .custom_prompt + .trim() + .to_string(); + if name_trimmed.is_empty() || user_trimmed.is_empty() { + self.llm_custom_error = Some(i18n::tr("msg-llm-custom-invalid")); + } else if self + .settings + .llm_postprocess + .update_custom_mode( + &mode_id, + name_trimmed.as_str(), + system_trimmed.as_str(), + user_trimmed.as_str(), + ) + .is_ok() + { + self.llm_custom_error = None; + self.llm_mode_loaded_id = Some(mode_id.clone()); + } else { + self.llm_custom_error = Some(i18n::tr("msg-llm-custom-update-failed")); + } + } + } + ui.with_layout(egui::Layout::right_to_left(egui::Align::TOP), |ui| { + let name_trimmed = self + .settings + .llm_postprocess + .custom_prompt_name + .trim() + .to_string(); + let system_trimmed = self + .settings + .llm_postprocess + .custom_prompt_system + .trim() + .to_string(); + let user_trimmed = self + .settings + .llm_postprocess + .custom_prompt + .trim() + .to_string(); + + if mode_id == MODE_ID_CUSTOM_DRAFT { + if ui + .button(i18n::tr("btn-llm-save-custom")) + .on_hover_text(i18n::tr("tooltip-llm-save-custom")) + .clicked() + { + if name_trimmed.is_empty() || user_trimmed.is_empty() { + self.llm_custom_error = Some(i18n::tr("msg-llm-custom-invalid")); + } else { + let new_id = self.settings.llm_postprocess.create_custom_mode( + name_trimmed.as_str(), + system_trimmed.as_str(), + user_trimmed.as_str(), + ); + self.settings.llm_postprocess.mode_id = new_id.clone(); + self.llm_mode_loaded_id = Some(new_id); + self.llm_custom_error = None; + self.sync_llm_custom_editor(); + self.check_changes(); + } + } + } else { + let duplicate_clicked = ui + .button(i18n::tr("btn-llm-duplicate-custom")) + .on_hover_text(i18n::tr("tooltip-llm-duplicate-custom")) + .clicked(); + let delete_clicked = ui + .button(i18n::tr("btn-llm-delete-custom")) + .on_hover_text(i18n::tr("tooltip-llm-delete-custom")) + .clicked(); + + if delete_clicked { + if self.settings.llm_postprocess.remove_custom_mode(&mode_id) { + self.llm_custom_error = None; + self.llm_mode_loaded_id = None; + self.settings.llm_postprocess.ensure_mode_valid(); + self.sync_llm_custom_editor(); + self.check_changes(); + } else { + self.llm_custom_error = Some(i18n::tr("msg-llm-custom-delete-failed")); + } + } + + if duplicate_clicked { + if name_trimmed.is_empty() || user_trimmed.is_empty() { + self.llm_custom_error = Some(i18n::tr("msg-llm-custom-invalid")); + } else { + let new_id = self.settings.llm_postprocess.create_custom_mode( + name_trimmed.as_str(), + system_trimmed.as_str(), + user_trimmed.as_str(), + ); + self.llm_custom_error = None; + self.llm_mode_loaded_id = Some(new_id.clone()); + self.settings.llm_postprocess.mode_id = new_id; + self.sync_llm_custom_editor(); + self.check_changes(); + } + } + } + }); + } else { + self.llm_custom_error = None; + } + + ui.add_space(6.0); + let mut max_chars = self.settings.llm_postprocess.max_input_chars as i32; + let mut max_changed = false; + ui.horizontal(|ui| { + ui.label(i18n::tr("label-llm-max-input")); + if ui + .add(egui::Slider::new(&mut max_chars, 500..=8000).show_value(true)) + .changed() + { + max_changed = true; + } + }); + if max_changed { + self.settings.llm_postprocess.max_input_chars = max_chars.clamp(500, 8000) as usize; + self.check_changes(); + } + + ui.add_space(4.0); + let mut timeout = self.settings.llm_postprocess.timeout_secs as i32; + let mut timeout_changed = false; + ui.horizontal(|ui| { + ui.label(i18n::tr("label-llm-timeout")); + if ui + .add(egui::Slider::new(&mut timeout, 3..=60).show_value(true)) + .changed() + { + timeout_changed = true; + } + ui.small(i18n::tr("note-llm-timeout-unit")); + }); + if timeout_changed { + self.settings.llm_postprocess.timeout_secs = timeout.clamp(3, 60) as u64; + self.check_changes(); + } + + ui.add_space(4.0); + let mut apply_autopaste = self.settings.llm_postprocess.apply_to_autopaste; + if ui + .checkbox(&mut apply_autopaste, i18n::tr("label-llm-apply-autopaste")) + .changed() + { + self.settings.llm_postprocess.apply_to_autopaste = apply_autopaste; + self.check_changes(); + } + + ui.add_space(10.0); + self.ui_llm_prompt_test_section(ui); + } + + fn ui_llm_prompt_test_section(&mut self, ui: &mut egui::Ui) { + let mut open_flag = self.llm_prompt_test.open; + if ui + .checkbox(&mut open_flag, i18n::tr("label-llm-test-enable")) + .changed() + { + if open_flag { + self.refresh_llm_history_if_needed(); + let len = self.llm_history_entries.len(); + if len == 0 { + self.llm_prompt_test.selected_entry = None; + } else if self + .llm_prompt_test + .selected_entry + .map_or(true, |idx| idx >= len) + { + self.llm_prompt_test.selected_entry = Some(len.saturating_sub(1)); + } + self.clear_llm_prompt_test_result(); + self.llm_prompt_test.open = true; + } else { + self.llm_prompt_test.open = false; + } + } + if !self.llm_prompt_test.open { + return; + } + self.refresh_llm_history_if_needed(); + ui.add_space(6.0); + egui::Frame::group(ui.style()).show(ui, |ui| { + ui.set_min_width(ui.available_width().max(360.0)); + let history_len = self.llm_history_entries.len(); + if history_len == 0 { + ui.label(i18n::tr("label-llm-test-no-history")); + } else { + let mut selected_idx = self + .llm_prompt_test + .selected_entry + .unwrap_or(history_len - 1); + if selected_idx >= history_len { + selected_idx = history_len - 1; + self.llm_prompt_test.selected_entry = Some(selected_idx); + } + ui.label(i18n::tr("label-llm-test-select-transcript")); + let mut combo_selection = selected_idx; + let selected_text = + Self::history_entry_preview(&self.llm_history_entries[selected_idx]); + egui::ComboBox::from_id_salt("llm_prompt_test_history") + .selected_text(selected_text) + .show_ui(ui, |cb| { + for (idx, entry) in self.llm_history_entries.iter().enumerate().rev() { + let label = Self::history_entry_preview(entry); + if cb + .selectable_label(combo_selection == idx, label.clone()) + .clicked() + { + combo_selection = idx; + } + } + }); + if combo_selection != selected_idx { + selected_idx = combo_selection; + self.llm_prompt_test.selected_entry = Some(selected_idx); + self.clear_llm_prompt_test_result(); + } + if self.llm_prompt_test.selected_entry.is_none() { + self.llm_prompt_test.selected_entry = Some(selected_idx); + } + ui.add_space(6.0); + ui.small(i18n::tr("note-llm-test-not-saved")); + ui.add_space(6.0); + let transcript = self.llm_history_entries[selected_idx].transcript.clone(); + let base_text_height = ui.text_style_height(&egui::TextStyle::Body); + let text_box_height = (base_text_height * 6.0).max(90.0); + let text_box_width = ui.available_width(); + ui.label(i18n::tr("label-llm-test-transcript")); + let mut transcript_text = transcript.clone(); + let transcript_widget = egui::TextEdit::multiline(&mut transcript_text) + .desired_rows(6) + .desired_width(text_box_width) + .interactive(false); + ui.add_sized([text_box_width, text_box_height], transcript_widget); + ui.add_space(6.0); + let transcript_for_test = transcript.clone(); + ui.horizontal(|ui| { + if ui + .add_enabled( + !self.llm_prompt_test.in_progress, + egui::Button::new(i18n::tr("btn-llm-test-run")), + ) + .clicked() + { + self.request_llm_prompt_test(transcript_for_test.clone()); + } + if self.llm_prompt_test.in_progress { + ui.spinner(); + } + }); + if self.llm_prompt_test.in_progress { + ui.label(i18n::tr("msg-llm-prompt-test-running")); + } + if let Some(err) = &self.llm_prompt_test.error { + ui.colored_label(ui.visuals().warn_fg_color, err); + } + if let Some(latency_ms) = self.llm_prompt_test.latency_ms { + let seconds = latency_ms as f32 / 1000.0; + ui.label(format!( + "{} {:.2}s", + i18n::tr("label-llm-test-duration"), + seconds + )); + } + if self.llm_prompt_test.truncated_input { + ui.colored_label( + ui.visuals().warn_fg_color, + i18n::tr("label-llm-test-truncated"), + ); + } + if let Some(output) = &self.llm_prompt_test.output { + ui.add_space(6.0); + ui.label(i18n::tr("label-llm-test-output")); + let mut output_text = output.clone(); + let output_widget = egui::TextEdit::multiline(&mut output_text) + .desired_rows(6) + .desired_width(text_box_width) + .interactive(false); + ui.add_sized([text_box_width, text_box_height], output_widget); + ui.add_space(4.0); + if ui.button(i18n::tr("btn-llm-test-copy-output")).clicked() { + ui.ctx().copy_text(output.clone()); + } + } + } + }); + } + + pub fn ui_section_history(&mut self, ui: &mut egui::Ui) { + self.refresh_llm_history_if_needed(); + ui.add_space(8.0); + let heading = egui::RichText::new(i18n::tr("heading-llm-history")) + .color(ui.visuals().strong_text_color()) + .strong(); + ui.heading(heading); + ui.add_space(4.0); + + if let Some(err) = &self.llm_history_error { + ui.colored_label(ui.visuals().warn_fg_color, err); + return; + } + + if self.llm_history_entries.is_empty() { + ui.label(i18n::tr("label-llm-history-empty")); + return; + } + + let base_height = ui.text_style_height(&egui::TextStyle::Body); + let row_height = base_height.max(18.0) * 1.3; + let list_height = row_height * 5.0; + + let mut new_selection = self.llm_history_selected; + let time_column_width = 140.0; + egui::ScrollArea::vertical() + .id_salt("llm_history_scroll") + .max_height(list_height) + .auto_shrink([false; 2]) + .show(ui, |ui| { + for (display_pos, entry) in self.llm_history_entries.iter().enumerate().rev() { + let is_selected = new_selection == Some(display_pos); + ui.horizontal(|row| { + row.set_min_height(row_height); + let preview: String = entry.transcript.chars().take(20).collect(); + let preview = preview.replace('\n', " "); + let time_text = format!( + "{} {:.2}s", + i18n::tr("label-llm-history-duration-column"), + entry.llm_latency_ms as f32 / 1000.0 + ); + let spacing = row.spacing().item_spacing.x; + let available = row.available_width(); + let button_width = (available - time_column_width - spacing).max(60.0); + let mut button_text = format!( + "{} {}", + Self::format_history_timestamp(&entry.timestamp), + preview + ); + let max_chars = 40; + if button_text.chars().count() > max_chars { + let mut truncated = String::with_capacity(max_chars); + for (idx, ch) in button_text.chars().enumerate() { + if idx >= max_chars - 1 { + break; + } + truncated.push(ch); + } + truncated.push('…'); + button_text = truncated; + } + let btn_inner = row.allocate_ui_with_layout( + egui::vec2(button_width, row_height), + egui::Layout::left_to_right(egui::Align::Min), + |ui_btn| { + #[allow(deprecated)] + ui_btn.selectable_label(is_selected, button_text.clone()) + }, + ); + let response = btn_inner.response.union(btn_inner.inner); + if response + .on_hover_text(i18n::tr("tooltip-llm-history-select-row")) + .clicked() + { + new_selection = Some(display_pos); + } + row.add_space(spacing); + row.allocate_ui_with_layout( + egui::vec2(time_column_width, row_height), + egui::Layout::left_to_right(egui::Align::Min), + |label_ui| { + label_ui.label(egui::RichText::new(time_text).monospace()); + }, + ); + }); + ui.separator(); + } + }); + + if new_selection.is_none() { + new_selection = self.llm_history_entries.len().checked_sub(1); + } + self.llm_history_selected = new_selection; + + ui.add_space(6.0); + ui.separator(); + ui.add_space(6.0); + + let Some(selected_index) = self.llm_history_selected else { + ui.label(i18n::tr("label-llm-history-select-entry")); + return; + }; + + let Some(entry) = self.llm_history_entries.get(selected_index) else { + ui.label(i18n::tr("label-llm-history-select-entry")); + return; + }; + + ui.heading( + egui::RichText::new(i18n::tr("label-llm-history-details")) + .color(ui.visuals().strong_text_color()) + .strong(), + ); + ui.add_space(4.0); + + ui.label(format!( + "{} {}", + i18n::tr("label-llm-history-timestamp"), + Self::format_history_timestamp(&entry.timestamp) + )); + ui.label(format!( + "{} {}", + i18n::tr("label-llm-history-language"), + Self::format_history_language(&entry.settings.language_override) + )); + ui.label(format!( + "{} {}", + i18n::tr("label-llm-history-model"), + entry.settings.model + )); + ui.label(format!( + "{} {}", + i18n::tr("label-llm-history-mode"), + entry.settings.mode_label + )); + ui.label(format!( + "{} {}", + i18n::tr("label-llm-history-base-url"), + entry.settings.api_base_url + )); + ui.label(format!( + "{} {:.2}s", + i18n::tr("label-llm-history-latency"), + entry.llm_latency_ms as f32 / 1000.0 + )); + if entry.truncated_input { + ui.colored_label( + ui.visuals().warn_fg_color, + i18n::tr("label-llm-history-truncated"), + ); + } + + ui.add_space(6.0); + ui.horizontal(|ui| { + if ui + .button(i18n::tr("btn-llm-history-copy-transcript")) + .on_hover_text(i18n::tr("tooltip-llm-history-copy-transcript")) + .clicked() + { + ui.ctx().copy_text(entry.transcript.clone()); + } + if ui + .button(i18n::tr("btn-llm-history-copy-output")) + .on_hover_text(i18n::tr("tooltip-llm-history-copy-output")) + .clicked() + { + ui.ctx().copy_text(entry.llm_output.clone()); + } + }); + + ui.add_space(6.0); + let base_text_height = ui.text_style_height(&egui::TextStyle::Body); + let text_box_height = (base_text_height * 6.0).max(90.0); + + ui.label(i18n::tr("label-llm-history-transcript")); + let mut transcript_text = entry.transcript.clone(); + let text_box_width = ui.available_width(); + let transcript_widget = egui::TextEdit::multiline(&mut transcript_text) + .desired_rows(6) + .desired_width(text_box_width) + .clip_text(true) + .interactive(false); + ui.add_sized([text_box_width, text_box_height], transcript_widget); + + ui.add_space(6.0); + ui.label(i18n::tr("label-llm-history-output")); + let mut output_text = entry.llm_output.clone(); + let output_widget = egui::TextEdit::multiline(&mut output_text) + .desired_rows(6) + .desired_width(text_box_width) + .clip_text(true) + .interactive(false); + ui.add_sized([text_box_width, text_box_height], output_widget); + + if entry.settings.custom_prompt_system.is_some() + || entry.settings.custom_prompt_user.is_some() + { + ui.add_space(6.0); + egui::CollapsingHeader::new(i18n::tr("label-llm-history-custom-prompts")) + .id_salt(format!("llm_hist_prompts_{}", entry.timestamp)) + .default_open(false) + .show(ui, |ui| { + if let Some(system) = &entry.settings.custom_prompt_system { + ui.label(i18n::tr("label-llm-history-custom-system")); + let mut text = system.clone(); + ui.add( + egui::TextEdit::multiline(&mut text) + .desired_rows(3) + .desired_width(f32::INFINITY) + .interactive(false), + ); + ui.add_space(4.0); + } + if let Some(user) = &entry.settings.custom_prompt_user { + ui.label(i18n::tr("label-llm-history-custom-user")); + let mut text = user.clone(); + ui.add( + egui::TextEdit::multiline(&mut text) + .desired_rows(4) + .desired_width(f32::INFINITY) + .interactive(false), + ); + } + }); + } + } + + fn request_llm_connection_test(&mut self) { + if self.llm_test_in_progress { + return; + } + let tx = self.llm_async_tx.clone(); + let settings = self.settings.llm_postprocess.clone(); + self.llm_test_in_progress = true; + self.llm_test_error = None; + self.llm_test_message = Some(i18n::tr("msg-llm-test-running")); + std::thread::spawn(move || { + let result = crate::llm::run_connection_test(&settings).map_err(|e| e.to_string()); + let _ = tx.send(LlmUiMessage::TestResult(result)); + }); + } + + fn request_llm_prompt_test(&mut self, transcript: String) { + if self.llm_prompt_test.in_progress { + return; + } + let tx = self.llm_async_tx.clone(); + let settings = self.settings.llm_postprocess.clone(); + let language_hint = self.llm_language_hint(); + self.clear_llm_prompt_test_result(); + self.llm_prompt_test.in_progress = true; + std::thread::spawn(move || { + let processor = LlmPostProcessor::new(); + let result = processor + .process(&settings, &transcript, None, language_hint.as_deref()) + .map_err(|err| { + let mut msg = err.message; + if let Some(status) = err.status { + msg = format!("{} (status: {})", msg, status); + } + if let Some(wait) = err.retry_after_secs { + msg = format!("{} (retry after {}s)", msg, wait); + } + msg + }); + let _ = tx.send(LlmUiMessage::PromptTest(result)); + }); + } + + fn request_llm_model_list(&mut self) { + if self.llm_fetching_models { + return; + } + let tx = self.llm_async_tx.clone(); + let settings = self.settings.llm_postprocess.clone(); + self.llm_fetching_models = true; + self.llm_fetch_error = None; + std::thread::spawn(move || { + let result = crate::llm::fetch_models(&settings).map_err(|e| e.to_string()); + let _ = tx.send(LlmUiMessage::ModelList(result)); + }); + } + + fn sync_llm_custom_editor(&mut self) { + let mode_id = self.settings.llm_postprocess.mode_id.clone(); + if mode_id == MODE_ID_CUSTOM_DRAFT { + self.llm_mode_loaded_id = None; + if self + .settings + .llm_postprocess + .custom_prompt_system + .trim() + .is_empty() + && self.settings.llm_postprocess.custom_prompt.trim() == "{{transcript}}" + { + let language_hint = self.llm_language_hint(); + self.settings + .llm_postprocess + .begin_custom_draft(language_hint.as_deref()); + self.check_changes(); + } + return; + } + if is_builtin_mode_id(&mode_id) { + self.llm_mode_loaded_id = None; + return; + } + if self.llm_mode_loaded_id.as_deref() == Some(mode_id.as_str()) { + return; + } + if let Some(custom) = self + .settings + .llm_postprocess + .custom_prompt(&mode_id) + .cloned() + { + self.settings.llm_postprocess.custom_prompt_name = custom.name; + self.settings.llm_postprocess.custom_prompt_system = + custom.system_prompt.unwrap_or_default(); + self.settings.llm_postprocess.custom_prompt = custom.user_prompt; + self.llm_mode_loaded_id = Some(mode_id); + } else { + self.llm_mode_loaded_id = None; + self.settings.llm_postprocess.mode_id = MODE_ID_CUSTOM_DRAFT.to_string(); + } + } + + fn llm_language_hint(&self) -> Option { + if let Some(explicit) = self + .settings + .llm_postprocess + .language_override + .as_deref() + .map(str::trim) + .filter(|s| !s.is_empty()) + { + return Some(explicit.to_string()); + } + let wl = self.settings.whisper_language.trim(); + if wl.is_empty() || wl == "auto" { + None + } else { + Some(wl.to_string()) + } + } + fn ui_update_status(&mut self, ui: &mut egui::Ui) { let state = self.update_state.lock().unwrap().clone(); match state { diff --git a/src/gui/waybar.rs b/src/gui/waybar.rs index fdc77a3..170c287 100644 --- a/src/gui/waybar.rs +++ b/src/gui/waybar.rs @@ -33,6 +33,13 @@ pub fn write_status(state: SimpleRecState) { "processing", "proc", ), + SimpleRecState::PostProcessing => ( + "●", + &i18n::tr("status-post-processing"), + "#4b9af2", + "post_processing", + "llm", + ), SimpleRecState::Busy => ("●", &i18n::tr("status-busy"), "#6c757d", "busy", "busy"), }; let json = format!( diff --git a/src/lib.rs b/src/lib.rs index 0653bba..317ab6b 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -5,6 +5,7 @@ pub mod dictionary; pub mod gui; pub mod hotkey; pub mod i18n; +pub mod llm; pub mod transcription; pub mod utils; diff --git a/src/llm/history.rs b/src/llm/history.rs new file mode 100644 index 0000000..d35c897 --- /dev/null +++ b/src/llm/history.rs @@ -0,0 +1,168 @@ +use crate::llm::LlmPostProcessSettings; +use crate::utils::app_config_dir; +use chrono::Local; +use once_cell::sync::Lazy; +use serde::{Deserialize, Serialize}; +use std::fs; +use std::io::Write; +use std::path::PathBuf; +use std::sync::Mutex; + +pub const HISTORY_FILENAME: &str = "llm_history.yaml"; +pub const MAX_HISTORY_ENTRIES: usize = 20; + +static HISTORY_LOCK: Lazy> = Lazy::new(|| Mutex::new(())); + +#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)] +pub struct LlmHistoryEntry { + pub timestamp: String, + pub transcript: String, + pub llm_output: String, + pub truncated_input: bool, + pub llm_latency_ms: u64, + pub settings: LlmHistorySettingsSnapshot, +} + +#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)] +pub struct LlmHistorySettingsSnapshot { + pub api_base_url: String, + pub model: String, + pub mode_id: String, + pub mode_label: String, + pub language_override: Option, + pub custom_prompt_system: Option, + pub custom_prompt_user: Option, +} + +#[derive(Debug, Clone, Copy)] +pub struct HistorySaveOutcome { + pub total_entries: usize, +} + +#[derive(Debug, Serialize, Deserialize, Default)] +struct HistoryFile { + #[serde(default)] + entries: Vec, +} + +fn history_path() -> PathBuf { + app_config_dir().join(HISTORY_FILENAME) +} + +fn mode_label(snapshot: &LlmPostProcessSettings) -> String { + if snapshot.mode_id == crate::llm::PRESET_ID_FORMAT { + "format".to_string() + } else if snapshot.mode_id == crate::llm::PRESET_ID_SUMMARY { + "summary".to_string() + } else { + snapshot + .custom_prompts + .iter() + .find(|p| p.id == snapshot.mode_id) + .map(|p| p.name.clone()) + .unwrap_or_else(|| snapshot.mode_id.clone()) + } +} + +fn build_settings_snapshot(settings: &LlmPostProcessSettings) -> LlmHistorySettingsSnapshot { + let custom_prompts = &settings.custom_prompts; + let (custom_system, custom_user) = + if let Some(custom) = custom_prompts.iter().find(|p| p.id == settings.mode_id) { + ( + custom.system_prompt.clone(), + Some(custom.user_prompt.clone()), + ) + } else if settings.mode_id == crate::llm::MODE_ID_CUSTOM_DRAFT { + ( + if settings.custom_prompt_system.trim().is_empty() { + None + } else { + Some(settings.custom_prompt_system.clone()) + }, + if settings.custom_prompt.trim().is_empty() { + None + } else { + Some(settings.custom_prompt.clone()) + }, + ) + } else { + (None, None) + }; + + LlmHistorySettingsSnapshot { + api_base_url: settings.effective_base_url(), + model: settings.model.clone(), + mode_id: settings.mode_id.clone(), + mode_label: mode_label(settings), + language_override: settings.language_override.clone(), + custom_prompt_system: custom_system, + custom_prompt_user: custom_user, + } +} + +pub fn record_entry( + transcript: &str, + llm_output: &str, + truncated_input: bool, + llm_latency_ms: u128, + settings: &LlmPostProcessSettings, +) -> anyhow::Result { + let _guard = HISTORY_LOCK.lock().unwrap(); + + let path = history_path(); + if let Some(parent) = path.parent() { + fs::create_dir_all(parent)?; + } + + let existing = if path.exists() { + let yaml = fs::read_to_string(&path)?; + serde_yaml::from_str::(&yaml).unwrap_or_default() + } else { + HistoryFile::default() + }; + + let mut entries = existing.entries; + let entry = LlmHistoryEntry { + timestamp: Local::now().to_rfc3339(), + transcript: transcript.to_string(), + llm_output: llm_output.to_string(), + truncated_input, + llm_latency_ms: llm_latency_ms.min(u64::MAX as u128) as u64, + settings: build_settings_snapshot(settings), + }; + entries.push(entry); + if entries.len() > MAX_HISTORY_ENTRIES { + let remove_count = entries.len() - MAX_HISTORY_ENTRIES; + entries.drain(0..remove_count); + } + let total_entries = entries.len(); + + let file = HistoryFile { entries }; + // Keep newest entries last on disk for chronological order + let yaml = serde_yaml::to_string(&file)?; + let tmp_path = path.with_extension("yaml.tmp"); + let mut fh = fs::File::create(&tmp_path)?; + fh.write_all(yaml.as_bytes())?; + fh.flush()?; + fs::rename(tmp_path, path)?; + Ok(HistorySaveOutcome { total_entries }) +} + +pub fn load_entries() -> anyhow::Result> { + let path = history_path(); + if !path.exists() { + return Ok(Vec::new()); + } + let yaml = fs::read_to_string(&path)?; + let file = serde_yaml::from_str::(&yaml).unwrap_or_default(); + Ok(file.entries) +} + +pub fn history_modified_time() -> Option { + let path = history_path(); + fs::metadata(path).and_then(|m| m.modified()).ok() +} + +pub fn history_file_path() -> PathBuf { + history_path() +} diff --git a/src/llm/mod.rs b/src/llm/mod.rs new file mode 100644 index 0000000..1883805 --- /dev/null +++ b/src/llm/mod.rs @@ -0,0 +1,1165 @@ +use anyhow::{anyhow, Context, Result}; +use reqwest::blocking::Client; +use reqwest::header::{HeaderMap, HeaderValue, CONTENT_TYPE, USER_AGENT}; +use reqwest::StatusCode; +use serde::{Deserialize, Serialize}; +use serde_json::Value; +use std::cmp::min; +use std::collections::HashSet; +use std::sync::Mutex; +use std::time::{Duration, Instant}; + +mod history; + +/// Default API base URL for OpenAI 互換ローカルエンドポイント (例: Ollama)。 +pub const DEFAULT_LOCAL_BASE_URL: &str = "http://localhost:11434/v1"; +/// Local provider default model. +pub const DEFAULT_LOCAL_MODEL: &str = "llama3.1:8b"; +/// Default maximum number of input characters sent to the LLM. +pub const DEFAULT_MAX_INPUT_CHARS: usize = 4_000; +/// Default request timeout in seconds. +pub const DEFAULT_TIMEOUT_SECS: u64 = 30; + +const USER_AGENT_VALUE: &str = concat!("hootvoice/", env!("CARGO_PKG_VERSION")); +const CHAT_COMPLETIONS_PATH: &str = "chat/completions"; +const MODELS_PATH: &str = "models"; +const BACKOFF_FAILURES: u32 = 3; +const BACKOFF_SECS: u64 = 60; +const MAX_ERROR_BODY_PREVIEW: usize = 300; +const GLOBAL_LOCALE: &str = "global"; +const LOCALE_JA_JP: &str = "ja-JP"; +const LOCALE_EN_US: &str = "en-US"; +const PLACEHOLDER_TRANSCRIPT: &str = "{{transcript}}"; +const PLACEHOLDER_DICTIONARY: &str = "{{dictionary}}"; +const FORMAT_SYSTEM_JA: &str = "ユーザーは文字起こしされたテキストを送ってくるので内容を確認して、文字起こしで欠損したり誤変換した単語などを全体の文脈を考慮して修正してください。段落ごとに改行や空行を積極的に使って、読みやすい構造にしてください。結果は修正後のテキストのみを返却します。修正が必要ない場合は元の文章のみを返します。出力する文字列には校正後の文章以外は一切含まないこと。「えーと」「あー」などの人が話す際に発した不要な情報は除去します。"; +const FORMAT_SYSTEM_EN: &str = "You receive an automatic transcript. Fix recognition mistakes, add punctuation, keep a neutral narrator style, and remove filler words such as \"um\" or \"uh\". Return only the corrected text."; +const FORMAT_SYSTEM_GLOBAL: &str = "You receive an automatic transcript. Clean it up, fix recognition mistakes, add punctuation, and remove filler words. Return only the corrected text in the same language as the input."; +const SUMMARY_SYSTEM_JA: &str = "以下の文字起こしを最大5つの簡潔な箇条書きで日本語のまま要約してください。各行は \"- \" で開始し、余計な前置きや感想は入れないでください。"; +const SUMMARY_SYSTEM_EN: &str = "Summarize the transcript into at most five concise bullet points written in English. Start each bullet with \"- \" and avoid any commentary."; +const SUMMARY_SYSTEM_GLOBAL: &str = "Summarize the transcript into at most five concise bullet points. Prefer the transcript language when obvious, otherwise use English. Start each bullet with \"- \"."; +const FORMAT_USER_JA: &str = "校正対象:\n{{transcript}}"; +const FORMAT_USER_EN: &str = "Transcript to revise:\n{{transcript}}"; +const FORMAT_USER_GLOBAL: &str = "Transcript:\n{{transcript}}"; +const SUMMARY_USER_DEFAULT: &str = "{{transcript}}"; +pub const PRESET_ID_FORMAT: &str = "preset:format"; +pub const PRESET_ID_SUMMARY: &str = "preset:summary"; +pub const MODE_ID_CUSTOM_DRAFT: &str = "custom:draft"; + +pub use history::{ + history_file_path, history_modified_time, load_entries as load_history_entries, + record_entry as record_history, LlmHistoryEntry, MAX_HISTORY_ENTRIES, +}; + +fn default_mode_id() -> String { + PRESET_ID_FORMAT.to_string() +} + +fn is_builtin_mode(id: &str) -> bool { + matches!(id, PRESET_ID_FORMAT | PRESET_ID_SUMMARY) +} + +fn generate_custom_mode_id(existing: &HashSet, name: &str) -> String { + let mut slug = name + .trim() + .chars() + .map(|c| { + if c.is_ascii_alphanumeric() { + c.to_ascii_lowercase() + } else if c.is_whitespace() || c == '-' || c == '_' { + '-' + } else { + '-' + } + }) + .collect::(); + + if slug.trim_matches('-').is_empty() { + slug = "custom-mode".to_string(); + } else { + while slug.contains("--") { + slug = slug.replace("--", "-"); + } + slug = slug.trim_matches('-').to_string(); + if slug.is_empty() { + slug = "custom-mode".to_string(); + } + } + + let mut candidate = format!("custom:{}", slug); + let mut counter = 2; + while existing.contains(&candidate) { + candidate = format!("custom:{}-{}", slug, counter); + counter += 1; + } + candidate +} + +pub fn builtin_prompt_preview(mode_id: &str, locales: &[String]) -> Option<(String, String)> { + if !is_builtin_mode(mode_id) { + return None; + } + match mode_id { + PRESET_ID_FORMAT => Some(format_prompt_strings(locales)), + PRESET_ID_SUMMARY => Some(summary_prompt_strings(locales)), + _ => None, + } +} + +/// User configurable LLM post processing settings persisted in settings.toml. +#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)] +#[serde(default)] +pub struct LlmPostProcessSettings { + pub enabled: bool, + pub api_base_url: String, + pub model: String, + #[serde(default = "default_mode_id", alias = "mode")] + pub mode_id: String, + #[serde(default)] + pub custom_prompts: Vec, + #[serde(default)] + pub custom_prompt_name: String, + pub language_override: Option, + #[serde(default)] + pub custom_prompt_system: String, + #[serde(default)] + pub custom_prompt: String, + pub max_input_chars: usize, + pub timeout_secs: u64, + pub apply_to_autopaste: bool, +} + +/// User defined custom prompt mode stored in settings. +#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)] +pub struct CustomPromptMode { + pub id: String, + pub name: String, + pub system_prompt: Option, + pub user_prompt: String, +} + +impl Default for LlmPostProcessSettings { + fn default() -> Self { + Self { + enabled: false, + api_base_url: DEFAULT_LOCAL_BASE_URL.to_string(), + model: DEFAULT_LOCAL_MODEL.to_string(), + mode_id: default_mode_id(), + custom_prompts: Vec::new(), + custom_prompt_name: "Custom prompt".to_string(), + language_override: None, + custom_prompt_system: String::new(), + custom_prompt: "{{transcript}}".to_string(), + max_input_chars: DEFAULT_MAX_INPUT_CHARS, + timeout_secs: DEFAULT_TIMEOUT_SECS, + apply_to_autopaste: true, + } + } +} + +/// Metadata describing a chat-completion capable model. +#[derive(Debug, Clone)] +pub struct LlmModelInfo { + pub id: String, + pub label: String, +} + +/// Result of running a connection test against the configured endpoint. +#[derive(Debug, Clone)] +pub struct ConnectionTestOutcome { + pub status: Option, + pub duration_ms: u128, + pub message: String, +} + +/// Successful response from the LLM post-processing step. +#[derive(Debug, Clone)] +pub struct PostProcessOutcome { + pub content: String, + pub truncated_input: bool, + pub latency_ms: u128, +} + +/// Error information returned when an LLM request fails. +#[derive(Debug, Clone)] +pub struct LlmRequestError { + pub message: String, + pub status: Option, + pub retry_after_secs: Option, +} + +pub type LlmResult = std::result::Result; + +#[derive(Default, Debug)] +struct RetryState { + consecutive_failures: u32, + next_retry_at: Option, +} + +/// Simple retry/backoff manager for LLM post-processing calls. +#[derive(Debug, Default)] +pub struct LlmPostProcessor { + state: Mutex, +} + +impl LlmPostProcessSettings { + /// Returns the effective API base URL, forcing the OpenAI default when that provider is used. + pub fn effective_base_url(&self) -> String { + if self.api_base_url.trim().is_empty() { + DEFAULT_LOCAL_BASE_URL.to_string() + } else { + self.api_base_url.trim().to_string() + } + } + + /// Returns the effective model placeholder default. + pub fn default_model() -> &'static str { + DEFAULT_LOCAL_MODEL + } + + /// Returns the model name ensuring defaults when settings still carry a previous provider. + pub fn effective_model(&self) -> String { + if self.model.trim().is_empty() { + Self::default_model().to_string() + } else { + self.model.trim().to_string() + } + } + + /// Returns locale priority list for prompt resolution. + pub fn locale_priority(&self, language_hint: Option<&str>) -> Vec { + let mut locales = Vec::new(); + if let Some(locale) = self + .language_override + .as_deref() + .and_then(normalize_locale_code) + { + locales.push(locale); + } else if let Some(locale) = language_hint.and_then(normalize_locale_code) { + locales.push(locale); + } + if !locales + .iter() + .any(|l| l.eq_ignore_ascii_case(GLOBAL_LOCALE)) + { + locales.push(GLOBAL_LOCALE.to_string()); + } + locales + } + + fn unique_custom_name(&self, raw_name: &str) -> String { + let base = { + let trimmed = raw_name.trim(); + if trimmed.is_empty() { + "Custom prompt".to_string() + } else { + trimmed.to_string() + } + }; + + if !self.custom_prompts.iter().any(|mode| mode.name == base) { + return base; + } + + let mut counter = 2; + loop { + let candidate = format!("{} ({})", base, counter); + if !self + .custom_prompts + .iter() + .any(|mode| mode.name == candidate) + { + return candidate; + } + counter += 1; + } + } + + pub fn custom_prompt(&self, id: &str) -> Option<&CustomPromptMode> { + self.custom_prompts.iter().find(|mode| mode.id == id) + } + + pub fn custom_prompt_mut(&mut self, id: &str) -> Option<&mut CustomPromptMode> { + self.custom_prompts.iter_mut().find(|mode| mode.id == id) + } + + pub fn ensure_mode_valid(&mut self) { + if self.mode_id.is_empty() { + self.mode_id = default_mode_id(); + } + if self.mode_id == "custom" || self.mode_id == "preset:custom" { + self.mode_id = MODE_ID_CUSTOM_DRAFT.to_string(); + } + if self.mode_id != MODE_ID_CUSTOM_DRAFT + && !is_builtin_mode(&self.mode_id) + && self.custom_prompt(&self.mode_id).is_none() + { + self.mode_id = MODE_ID_CUSTOM_DRAFT.to_string(); + } + } + + pub fn begin_custom_draft(&mut self, language_hint: Option<&str>) { + self.mode_id = MODE_ID_CUSTOM_DRAFT.to_string(); + self.custom_prompt_name.clear(); + let locales = self.locale_priority(language_hint); + let (system, user) = format_prompt_strings(&locales); + self.custom_prompt_system = system; + self.custom_prompt = user; + } + + pub fn create_custom_mode( + &mut self, + name: &str, + system_prompt: &str, + user_prompt: &str, + ) -> String { + let final_name = self.unique_custom_name(name); + let mut existing: HashSet = self + .custom_prompts + .iter() + .map(|mode| mode.id.clone()) + .collect(); + existing.insert(PRESET_ID_FORMAT.to_string()); + existing.insert(PRESET_ID_SUMMARY.to_string()); + existing.insert(MODE_ID_CUSTOM_DRAFT.to_string()); + + let id = generate_custom_mode_id(&existing, &final_name); + let mode = CustomPromptMode { + id: id.clone(), + name: final_name, + system_prompt: if system_prompt.trim().is_empty() { + None + } else { + Some(system_prompt.to_string()) + }, + user_prompt: user_prompt.to_string(), + }; + self.custom_prompts.push(mode); + id + } + + pub fn update_custom_mode( + &mut self, + id: &str, + name: &str, + system_prompt: &str, + user_prompt: &str, + ) -> Result<(), ()> { + if let Some(mode) = self.custom_prompt_mut(id) { + mode.name = name.to_string(); + mode.system_prompt = if system_prompt.trim().is_empty() { + None + } else { + Some(system_prompt.to_string()) + }; + mode.user_prompt = user_prompt.to_string(); + Ok(()) + } else { + Err(()) + } + } + + pub fn remove_custom_mode(&mut self, id: &str) -> bool { + let before = self.custom_prompts.len(); + self.custom_prompts.retain(|mode| mode.id != id); + if before != self.custom_prompts.len() { + if self.mode_id == id { + self.mode_id = default_mode_id(); + } + true + } else { + false + } + } +} + +fn normalize_locale_code(raw: &str) -> Option { + let trimmed = raw.trim(); + if trimmed.is_empty() { + return None; + } + let lower = trimmed.to_lowercase(); + if lower == "auto" { + return None; + } + let parts: Vec<&str> = trimmed + .split(|c| c == '-' || c == '_') + .filter(|p| !p.is_empty()) + .collect(); + if parts.is_empty() { + return None; + } + let language = parts[0].to_lowercase(); + if parts.len() >= 2 { + let region = parts[1].to_uppercase(); + return Some(format!("{}-{}", language, region)); + } + match language.as_str() { + "ja" => Some(LOCALE_JA_JP.to_string()), + "en" => Some(LOCALE_EN_US.to_string()), + _ => Some(language), + } +} + +impl LlmPostProcessor { + pub fn new() -> Self { + Self::default() + } + + pub fn process( + &self, + settings: &LlmPostProcessSettings, + transcript: &str, + dictionary_hint: Option<&str>, + language_hint: Option<&str>, + ) -> LlmResult { + if !settings.enabled { + return Err(LlmRequestError { + message: "LLM post-processing is disabled".to_string(), + status: None, + retry_after_secs: None, + }); + } + + if let Some(wait) = self.check_backoff() { + return Err(LlmRequestError { + message: format!("Backoff active. Retry after {}s.", wait), + status: None, + retry_after_secs: Some(wait), + }); + } + + let trimmed = transcript.trim(); + if trimmed.is_empty() { + return Err(LlmRequestError { + message: "Transcript is empty".to_string(), + status: None, + retry_after_secs: None, + }); + } + + let (prepared, truncated) = prepare_transcript(trimmed, settings.max_input_chars); + if prepared.is_empty() { + return Err(LlmRequestError { + message: "Transcript is empty after trimming".to_string(), + status: None, + retry_after_secs: None, + }); + } + + let payload = build_chat_payload(settings, &prepared, dictionary_hint, language_hint); + match execute_chat_completion(settings, &payload) { + Ok((response, status, latency_ms)) => { + if let Some(content) = extract_first_choice_text(&response) { + self.note_success(); + let polished = content.trim().to_string(); + return Ok(PostProcessOutcome { + content: if polished.is_empty() { + content + } else { + polished + }, + truncated_input: truncated, + latency_ms, + }); + } + let mut err = LlmRequestError { + message: "Missing content field".to_string(), + status: Some(status.as_u16()), + retry_after_secs: None, + }; + if let Some(wait) = self.register_failure() { + err.retry_after_secs = err.retry_after_secs.or(Some(wait)); + } + Err(err) + } + Err(mut err) => { + if let Some(wait) = self.register_failure() { + err.retry_after_secs = err.retry_after_secs.or(Some(wait)); + } + Err(err) + } + } + } + + fn check_backoff(&self) -> Option { + let mut state = self.state.lock().unwrap(); + if let Some(next) = state.next_retry_at { + if let Some(remaining) = next.checked_duration_since(Instant::now()) { + return Some(remaining.as_secs().max(1)); + } + state.next_retry_at = None; + } + None + } + + fn register_failure(&self) -> Option { + let mut state = self.state.lock().unwrap(); + state.consecutive_failures += 1; + if state.consecutive_failures >= BACKOFF_FAILURES { + state.consecutive_failures = 0; + let wait = Duration::from_secs(BACKOFF_SECS); + state.next_retry_at = Some(Instant::now() + wait); + Some(wait.as_secs()) + } else { + None + } + } + + fn note_success(&self) { + let mut state = self.state.lock().unwrap(); + state.consecutive_failures = 0; + state.next_retry_at = None; + } +} + +#[derive(Serialize)] +struct ChatCompletionPayload { + model: String, + messages: Vec, + temperature: f32, + #[serde(skip_serializing_if = "Option::is_none")] + max_tokens: Option, +} + +#[derive(Serialize)] +struct ChatMessagePayload { + role: &'static str, + content: String, +} + +#[derive(Deserialize)] +struct ChatCompletionResponse { + choices: Vec, +} + +#[derive(Deserialize)] +struct ChatChoice { + message: Option, +} + +#[derive(Deserialize)] +struct ChatMessage { + content: Option, +} + +fn prepare_transcript(text: &str, max_chars: usize) -> (String, bool) { + let trimmed = text.trim(); + if trimmed.is_empty() { + return (String::new(), false); + } + + if max_chars == 0 { + return (trimmed.to_string(), false); + } + + let mut out = String::with_capacity(min(trimmed.len(), max_chars)); + let mut truncated = false; + for (idx, ch) in trimmed.chars().enumerate() { + if idx == max_chars { + truncated = true; + break; + } + out.push(ch); + } + (out, truncated) +} + +struct PromptTemplateResolved { + system: Option, + user: String, +} + +impl PromptTemplateResolved { + fn apply_dictionary(mut self, dictionary: &str) -> Self { + if let Some(ref mut system) = self.system { + let replaced = inject_dictionary(std::mem::take(system), dictionary); + *system = replaced; + } + self.user = inject_dictionary(self.user, dictionary); + self + } +} + +fn build_chat_payload( + settings: &LlmPostProcessSettings, + transcript: &str, + dictionary_hint: Option<&str>, + language_hint: Option<&str>, +) -> ChatCompletionPayload { + let dictionary = dictionary_hint.unwrap_or_default(); + let resolved = resolve_prompt(settings, transcript, dictionary, language_hint); + ChatCompletionPayload { + model: settings.effective_model(), + messages: prompt_to_messages(resolved), + temperature: 0.2, + max_tokens: Some(1024), + } +} + +fn resolve_prompt( + settings: &LlmPostProcessSettings, + transcript: &str, + dictionary: &str, + language_hint: Option<&str>, +) -> PromptTemplateResolved { + let mut mode_id = settings.mode_id.trim(); + if mode_id.is_empty() { + mode_id = PRESET_ID_FORMAT; + } + + if let Some(resolved) = + resolve_builtin_prompt(mode_id, settings, transcript, dictionary, language_hint) + { + return resolved; + } + + if mode_id == MODE_ID_CUSTOM_DRAFT { + return custom_prompt_to_resolved( + settings.custom_prompt_system.as_str(), + settings.custom_prompt.as_str(), + transcript, + dictionary, + ); + } + + if let Some(custom) = settings.custom_prompt(mode_id) { + return custom_prompt_to_resolved( + custom.system_prompt.as_deref().unwrap_or_default(), + custom.user_prompt.as_str(), + transcript, + dictionary, + ); + } + + custom_prompt_to_resolved( + settings.custom_prompt_system.as_str(), + settings.custom_prompt.as_str(), + transcript, + dictionary, + ) +} + +fn resolve_builtin_prompt( + mode_id: &str, + settings: &LlmPostProcessSettings, + transcript: &str, + dictionary: &str, + language_hint: Option<&str>, +) -> Option { + if !is_builtin_mode(mode_id) { + return None; + } + let locales = settings.locale_priority(language_hint); + let resolved = match mode_id { + PRESET_ID_FORMAT => format_prompt_for_locales(&locales, transcript), + PRESET_ID_SUMMARY => summary_prompt_for_locales(&locales, transcript), + _ => return None, + }; + Some(resolved.apply_dictionary(dictionary)) +} + +fn format_prompt_for_locales(locales: &[String], transcript: &str) -> PromptTemplateResolved { + for locale in locales { + if let Some(resolved) = try_format_prompt(locale, transcript) { + return resolved; + } + } + PromptTemplateResolved { + system: Some(FORMAT_SYSTEM_GLOBAL.to_string()), + user: format!("Transcript:\n{}", transcript), + } +} + +fn format_prompt_strings(locales: &[String]) -> (String, String) { + for locale in locales { + if let Some(pair) = format_prompt_template_for_locale(locale) { + return pair; + } + } + ( + FORMAT_SYSTEM_GLOBAL.to_string(), + FORMAT_USER_GLOBAL.to_string(), + ) +} + +fn format_prompt_template_for_locale(locale: &str) -> Option<(String, String)> { + let normalized = locale.to_ascii_lowercase(); + match normalized.as_str() { + "ja-jp" => Some((FORMAT_SYSTEM_JA.to_string(), FORMAT_USER_JA.to_string())), + "en-us" => Some((FORMAT_SYSTEM_EN.to_string(), FORMAT_USER_EN.to_string())), + "global" => Some(( + FORMAT_SYSTEM_GLOBAL.to_string(), + FORMAT_USER_GLOBAL.to_string(), + )), + _ => None, + } +} + +fn try_format_prompt(locale: &str, transcript: &str) -> Option { + let normalized = locale.to_ascii_lowercase(); + match normalized.as_str() { + "ja-jp" => Some(PromptTemplateResolved { + system: Some(FORMAT_SYSTEM_JA.to_string()), + user: format!("校正対象:\n{}", transcript), + }), + "en-us" => Some(PromptTemplateResolved { + system: Some(FORMAT_SYSTEM_EN.to_string()), + user: format!("Transcript to revise:\n{}", transcript), + }), + "global" => Some(PromptTemplateResolved { + system: Some(FORMAT_SYSTEM_GLOBAL.to_string()), + user: format!("Transcript:\n{}", transcript), + }), + _ => None, + } +} + +fn summary_prompt_for_locales(locales: &[String], transcript: &str) -> PromptTemplateResolved { + for locale in locales { + if let Some(resolved) = try_summary_prompt(locale, transcript) { + return resolved; + } + } + PromptTemplateResolved { + system: Some(SUMMARY_SYSTEM_GLOBAL.to_string()), + user: transcript.to_string(), + } +} + +fn summary_prompt_strings(locales: &[String]) -> (String, String) { + for locale in locales { + if let Some(pair) = summary_prompt_template_for_locale(locale) { + return pair; + } + } + ( + SUMMARY_SYSTEM_GLOBAL.to_string(), + SUMMARY_USER_DEFAULT.to_string(), + ) +} + +fn summary_prompt_template_for_locale(locale: &str) -> Option<(String, String)> { + let normalized = locale.to_ascii_lowercase(); + match normalized.as_str() { + "ja-jp" => Some(( + SUMMARY_SYSTEM_JA.to_string(), + SUMMARY_USER_DEFAULT.to_string(), + )), + "en-us" => Some(( + SUMMARY_SYSTEM_EN.to_string(), + SUMMARY_USER_DEFAULT.to_string(), + )), + "global" => Some(( + SUMMARY_SYSTEM_GLOBAL.to_string(), + SUMMARY_USER_DEFAULT.to_string(), + )), + _ => None, + } +} + +fn try_summary_prompt(locale: &str, transcript: &str) -> Option { + let normalized = locale.to_ascii_lowercase(); + match normalized.as_str() { + "ja-jp" => Some(PromptTemplateResolved { + system: Some(SUMMARY_SYSTEM_JA.to_string()), + user: transcript.to_string(), + }), + "en-us" => Some(PromptTemplateResolved { + system: Some(SUMMARY_SYSTEM_EN.to_string()), + user: transcript.to_string(), + }), + "global" => Some(PromptTemplateResolved { + system: Some(SUMMARY_SYSTEM_GLOBAL.to_string()), + user: transcript.to_string(), + }), + _ => None, + } +} + +fn custom_prompt_to_resolved( + system_prompt: &str, + user_prompt: &str, + transcript: &str, + dictionary: &str, +) -> PromptTemplateResolved { + let system = if system_prompt.trim().is_empty() { + None + } else { + Some(render_with_placeholders(system_prompt, transcript, dictionary).0) + }; + + let (rendered_user, had_transcript) = + render_with_placeholders(user_prompt, transcript, dictionary); + if had_transcript { + PromptTemplateResolved { + system, + user: rendered_user, + } + } else { + let mut content = rendered_user; + if content.trim().is_empty() { + content.push_str("Transcript:\n"); + } else { + content.push_str("\n\nTranscript:\n"); + } + content.push_str(transcript); + PromptTemplateResolved { + system, + user: content, + } + } +} + +fn prompt_to_messages(resolved: PromptTemplateResolved) -> Vec { + let mut messages = Vec::new(); + if let Some(system) = resolved.system { + messages.push(ChatMessagePayload { + role: "system", + content: system, + }); + } + messages.push(ChatMessagePayload { + role: "user", + content: resolved.user, + }); + messages +} + +fn render_with_placeholders(template: &str, transcript: &str, dictionary: &str) -> (String, bool) { + let mut rendered = template.to_string(); + let has_transcript = rendered.contains(PLACEHOLDER_TRANSCRIPT); + if has_transcript { + rendered = rendered.replace(PLACEHOLDER_TRANSCRIPT, transcript); + } + if rendered.contains(PLACEHOLDER_DICTIONARY) { + rendered = rendered.replace(PLACEHOLDER_DICTIONARY, dictionary); + } + (rendered, has_transcript) +} + +fn inject_dictionary(text: String, dictionary: &str) -> String { + if text.contains(PLACEHOLDER_DICTIONARY) { + text.replace(PLACEHOLDER_DICTIONARY, dictionary) + } else { + text + } +} + +fn execute_chat_completion( + settings: &LlmPostProcessSettings, + payload: &ChatCompletionPayload, +) -> LlmResult<(ChatCompletionResponse, StatusCode, u128)> { + let client = build_client_with_timeout(settings.timeout_secs).map_err(|e| LlmRequestError { + message: format!("Failed to create HTTP client: {}", e), + status: None, + retry_after_secs: None, + })?; + + let headers = create_headers(settings, true).map_err(|e| LlmRequestError { + message: e.to_string(), + status: None, + retry_after_secs: None, + })?; + + let url = join_url(&settings.effective_base_url(), CHAT_COMPLETIONS_PATH); + let start = Instant::now(); + let response = client + .post(&url) + .headers(headers) + .json(payload) + .send() + .map_err(map_reqwest_error)?; + + let status = response.status(); + let headers_snapshot = response.headers().clone(); + let elapsed_ms = start.elapsed().as_millis(); + let body = response.text().unwrap_or_default(); + + if !status.is_success() { + let mut retry_after_secs = parse_retry_after_secs(&headers_snapshot); + if status.as_u16() == 429 && retry_after_secs.is_none() { + retry_after_secs = Some(BACKOFF_SECS); + } + let snippet = preview_body(&body); + return Err(LlmRequestError { + message: format!("HTTP {} {}", status.as_u16(), snippet), + status: Some(status.as_u16()), + retry_after_secs, + }); + } + + let parsed: ChatCompletionResponse = + serde_json::from_str(&body).map_err(|e| LlmRequestError { + message: format!("Failed to parse JSON: {}", e), + status: Some(status.as_u16()), + retry_after_secs: None, + })?; + + Ok((parsed, status, elapsed_ms)) +} + +fn extract_first_choice_text(resp: &ChatCompletionResponse) -> Option { + resp.choices + .get(0) + .and_then(|choice| choice.message.as_ref()) + .and_then(|msg| msg.content.as_ref()) + .and_then(extract_content_value) +} + +fn extract_content_value(value: &Value) -> Option { + match value { + Value::String(s) => Some(s.trim().to_string()), + Value::Array(parts) => { + let mut buf = String::new(); + for part in parts { + if let Some(text) = part.get("text").and_then(|v| v.as_str()) { + buf.push_str(text); + buf.push('\n'); + } else if let Some(text) = part.get("content").and_then(|v| v.as_str()) { + buf.push_str(text); + buf.push('\n'); + } + } + if buf.is_empty() { + None + } else { + Some(buf.trim().to_string()) + } + } + Value::Object(map) => { + if let Some(text) = map.get("text").and_then(|v| v.as_str()) { + return Some(text.trim().to_string()); + } + if let Some(content) = map.get("content").and_then(|v| v.as_str()) { + return Some(content.trim().to_string()); + } + if let Some(resp) = map.get("response").and_then(|v| v.as_str()) { + return Some(resp.trim().to_string()); + } + None + } + _ => None, + } +} + +#[derive(Deserialize)] +struct ModelsResponse { + data: Vec, +} + +#[derive(Deserialize, Default)] +struct ModelItem { + id: String, + owned_by: Option, + hidden: Option, + format: Option, + details: Option, +} + +#[derive(Deserialize, Default)] +struct ModelDetails { + #[serde(default)] + parameter_size: Option, +} + +pub fn run_connection_test(settings: &LlmPostProcessSettings) -> Result { + run_connection_test_local(settings) +} + +fn run_connection_test_chat(settings: &LlmPostProcessSettings) -> Result { + let payload = ChatCompletionPayload { + model: settings.effective_model(), + messages: vec![ + ChatMessagePayload { + role: "system", + content: "Reply with the word 'pong'.".to_string(), + }, + ChatMessagePayload { + role: "user", + content: "ping".to_string(), + }, + ], + temperature: 0.0, + max_tokens: Some(8), + }; + + let (response, status, latency_ms) = + execute_chat_completion(settings, &payload).map_err(|err| anyhow!(err.message))?; + + let content = + extract_first_choice_text(&response).ok_or_else(|| anyhow!("Missing content field"))?; + let message = format!("Chat completion OK: {}", preview_body(&content)); + + Ok(ConnectionTestOutcome { + status: Some(status.as_u16()), + duration_ms: latency_ms, + message, + }) +} + +fn run_connection_test_local(settings: &LlmPostProcessSettings) -> Result { + let start = Instant::now(); + match fetch_models(settings) { + Ok(models) => { + let mut message = format!("Model list OK ({} models)", models.len()); + if let Some(first) = models.first() { + message = format!("Model list OK (first: {})", first.id); + } + Ok(ConnectionTestOutcome { + status: Some(200), + duration_ms: start.elapsed().as_millis(), + message, + }) + } + Err(first_err) => { + let first_msg = first_err.to_string(); + match run_connection_test_chat(settings) { + Ok(mut outcome) => { + outcome.message = format!( + "Chat completion OK (model fetch failed: {})", + preview_body(&first_msg) + ); + Ok(outcome) + } + Err(second_err) => Err(first_err.context(second_err)), + } + } + } +} + +pub fn fetch_models(settings: &LlmPostProcessSettings) -> Result> { + let client = build_client_with_timeout(settings.timeout_secs)?; + let headers = create_headers(settings, false)?; + let url = join_url(&settings.effective_base_url(), MODELS_PATH); + let response = client + .get(&url) + .headers(headers) + .send() + .with_context(|| format!("GET {}", url))?; + let status = response.status(); + if !status.is_success() { + let body = response.text().unwrap_or_default(); + return Err(anyhow!("HTTP {} {}", status.as_u16(), preview_body(&body))); + } + + let parsed: ModelsResponse = response.json().context("parse models response")?; + let mut models: Vec = parsed.data.iter().filter_map(map_model_item).collect(); + models.sort_by(|a, b| a.id.cmp(&b.id)); + if models.is_empty() { + return Err(anyhow!("No chat-capable models reported")); + } + Ok(models) +} + +fn map_model_item(item: &ModelItem) -> Option { + if item.id.trim().is_empty() { + return None; + } + + if item.hidden.unwrap_or(false) { + return None; + } + if let Some(format) = item.format.as_ref() { + let lower = format.to_ascii_lowercase(); + if lower.contains("embed") { + return None; + } + } + + let mut label = item.id.clone(); + if let Some(owner) = item.owned_by.as_ref() { + if !owner.is_empty() { + label = format!("{} ({})", item.id, owner); + } + } + + if let Some(details) = item.details.as_ref() { + if let Some(size) = details.parameter_size.as_ref() { + if !size.is_empty() { + label = format!("{} [{}]", label, size); + } + } + } + + Some(LlmModelInfo { + id: item.id.clone(), + label, + }) +} + +fn build_client_with_timeout(timeout_secs: u64) -> Result { + let secs = timeout_secs.max(3).min(120); + Client::builder() + .timeout(Duration::from_secs(secs)) + .build() + .context("create HTTP client") +} + +fn create_headers( + _settings: &LlmPostProcessSettings, + include_content_type: bool, +) -> Result { + let mut headers = HeaderMap::new(); + headers.insert(USER_AGENT, HeaderValue::from_static(USER_AGENT_VALUE)); + if include_content_type { + headers.insert(CONTENT_TYPE, HeaderValue::from_static("application/json")); + } + + Ok(headers) +} + +fn join_url(base: &str, path: &str) -> String { + let mut joined = base.trim_end_matches('/').to_string(); + joined.push('/'); + joined.push_str(path.trim_start_matches('/')); + joined +} + +fn map_reqwest_error(err: reqwest::Error) -> LlmRequestError { + if err.is_timeout() { + return LlmRequestError { + message: "LLM request timed out".to_string(), + status: None, + retry_after_secs: None, + }; + } + if err.is_connect() { + return LlmRequestError { + message: format!("Failed to connect: {}", err), + status: None, + retry_after_secs: None, + }; + } + LlmRequestError { + message: format!("HTTP request failed: {}", err), + status: err.status().map(|s| s.as_u16()), + retry_after_secs: None, + } +} + +fn parse_retry_after_secs(headers: &HeaderMap) -> Option { + headers + .get("Retry-After") + .and_then(|value| value.to_str().ok()) + .and_then(|s| s.trim().parse::().ok()) +} + +fn preview_body(body: &str) -> String { + let mut out = String::new(); + let mut count = 0usize; + let mut truncated = false; + for ch in body.chars() { + if count >= MAX_ERROR_BODY_PREVIEW { + truncated = true; + break; + } + out.push(ch); + count += 1; + } + let trimmed = out.trim(); + if truncated { + format!("{}…", trimmed) + } else { + trimmed.to_string() + } +} diff --git a/src/main.rs b/src/main.rs index d0d77b4..cfe6227 100644 --- a/src/main.rs +++ b/src/main.rs @@ -15,6 +15,7 @@ mod dictionary; mod gui; mod hotkey; mod i18n; +mod llm; mod transcription; mod utils; // removed updater module (unused)