ayutaz · ayutaz · Jan 15, 2026 · Jan 15, 2026
diff --git a/Assets/uCosyVoice/Runtime/Inference/FlowRunner.cs b/Assets/uCosyVoice/Runtime/Inference/FlowRunner.cs
@@ -218,7 +218,7 @@ public Tensor<float> ProcessWithPrompt(
             var xFinal = xBatch.DownloadToArray();
             xBatch.Dispose();
 
-            // Extract only the generated portion (excluding prompt)
+            // Extract generated portion only (first batch, skip prompt mel frames)
             var melData = new float[MEL_CHANNELS * generatedMelLen];
             for (int c = 0; c < MEL_CHANNELS; c++)
             {

diff --git a/Assets/uCosyVoice/Samples/TTSDemo.cs b/Assets/uCosyVoice/Samples/TTSDemo.cs
@@ -61,9 +61,9 @@ private void Start()
                 _stopButton.gameObject.SetActive(false);
             }
 
-            // Set default text
+            // Set default text (same as Python comparison test)
             if (_textInput != null && string.IsNullOrEmpty(_textInput.text))
-                _textInput.text = "Hello, this is a test of CosyVoice text to speech synthesis.";
+                _textInput.text = "Hello, this is a test of CosyVoice.";
 
             SetStatus("Click 'Load Models' to initialize TTS.");
             SetStats("");
@@ -189,18 +189,21 @@ private void DoSynthesizeZeroShot(string text)
 
             try
             {
-                // Load prompt audio if not cached
-                if (_promptAudio == null)
+                // Always reload prompt audio to ensure fresh data
+                if (_promptAudioClip == null)
                 {
-                    if (_promptAudioClip == null)
-                    {
-                        SetStatus("Error: No prompt audio clip assigned in Inspector.");
-                        return;
-                    }
+                    SetStatus("Error: No prompt audio clip assigned in Inspector.");
+                    return;
+                }
 
-                    SetStatus("Processing reference voice...");
-                    _promptAudio = ExtractAndResampleAudio(_promptAudioClip, 16000);
-                    Debug.Log($"[TTSDemo] Prompt audio: {_promptAudio.Length} samples at 16kHz ({_promptAudio.Length / 16000f:F2}s)");
+                SetStatus("Processing reference voice...");
+                _promptAudio = ExtractAndResampleAudio(_promptAudioClip, 16000);
+                Debug.Log($"[TTSDemo] Prompt audio: {_promptAudio.Length} samples at 16kHz ({_promptAudio.Length / 16000f:F2}s)");
+
+                if (_promptAudio == null || _promptAudio.Length == 0)
+                {
+                    SetStatus("Error: Failed to extract prompt audio.");
+                    return;
                 }
 
                 SetStatus("Synthesizing with voice cloning... (UI may freeze)");