stakira · rokujyushi · Jan 11, 2026 · Jan 22, 2026 · Jan 24, 2026 · Jan 24, 2026
diff --git a/OpenUtau.Core/Voicevox/Phonemizers/VoicevoxPhonemizer.cs b/OpenUtau.Core/Voicevox/Phonemizers/VoicevoxPhonemizer.cs
@@ -22,15 +22,14 @@ public override void SetUp(Note[][] notes, UProject project, UTrack track) {
             for (int i = 0; i < notes.Length; i++) {
                 var currentLyric = notes[i][0].lyric.Normalize();
                 var lyricList = currentLyric.Split(" ");
-                if (lyricList.Length > 1) {
-                    currentLyric = lyricList[1];
-                }
-                if (!VoicevoxUtils.IsSyllableVowelExtensionNote(currentLyric)) {
-                    if (VoicevoxUtils.IsDicPau(currentLyric)) {
+                if (!VoicevoxUtils.IsSyllableVowelExtensionNote(lyricList[^1])) {
+                    if (VoicevoxUtils.IsPau(lyricList[^1])) {
+                        currentLyric = string.Empty;
+                    } else if (VoicevoxUtils.dic.IsDic(lyricList[^1])) {
+                        currentLyric = VoicevoxUtils.dic.Lyrictodic(lyricList[^1]);
+                    } else if (!VoicevoxUtils.IsKana(lyricList[^1])) {
                         currentLyric = string.Empty;
-                    } else if (VoicevoxUtils.dic.IsDic(currentLyric)) {
-                        currentLyric = VoicevoxUtils.dic.Lyrictodic(currentLyric);
-                    } else if (!VoicevoxUtils.IsDicKana(currentLyric)) {
+                    } else {
                         currentLyric = string.Empty;
-                        currentLyric = string.Empty;
+                        currentLyric = lyricList[^1];
-                        currentLyric = string.Empty;
+                        currentLyric = lyricList[^1];
                     }
                 }

diff --git a/OpenUtau.Core/Voicevox/VoicevoxRenderer.cs b/OpenUtau.Core/Voicevox/VoicevoxRenderer.cs
@@ -4,13 +4,15 @@
 using System.Linq;
 using System.Threading;
 using System.Threading.Tasks;
+using K4os.Hash.xxHash;
 using NAudio.Wave;
 using Newtonsoft.Json;
 using Newtonsoft.Json.Linq;
 using OpenUtau.Core.Format;
 using OpenUtau.Core.Render;
 using OpenUtau.Core.Ustx;
 using Serilog;
+using SharpCompress;
 using ThirdParty;
 
 /*
@@ -65,7 +67,8 @@ public Task<RenderResult> Render(RenderPhrase phrase, Progress progress, int tra
                     }
                     string progressInfo = $"Track {trackNo + 1}: {this} \"{string.Join(" ", phrase.phones.Select(p => p.phoneme))}\"";
                     progress.Complete(0, progressInfo);
-                    var wavPath = Path.Join(PathManager.Inst.CachePath, $"vv-{phrase.hash:x16}.wav");
+                    ulong hash = HashPhraseGroups(phrase);
+                    var wavPath = Path.Join(PathManager.Inst.CachePath, $"vv-{phrase.hash:x16}_{hash:x8}.wav");
                     phrase.AddCacheFile(wavPath);
                     var result = Layout(phrase);
                     if (!File.Exists(wavPath)) {
@@ -75,7 +78,8 @@ public Task<RenderResult> Render(RenderPhrase phrase, Progress progress, int tra
                                 VoicevoxUtils.Loaddic(singer);
                             }
                             try {
-                                VoicevoxSynthParams vsParams = PhraseToVoicevoxSynthParams(phrase, phrase.singer as VoicevoxSinger);
+                                Log.Information($"Starting Voicevox synthesis");
+                                VoicevoxSynthParams vsParams = PhraseToVoicevoxSynthParams(phrase, phrase.singer as VoicevoxSinger, false);
 
                                 int vvTotalFrames = 0;
                                 double frameMs = (1000d / VoicevoxUtils.fps);
@@ -106,18 +110,21 @@ public Task<RenderResult> Render(RenderPhrase phrase, Progress progress, int tra
                                     result.positionMs = phrase.positionMs - phrase.timeAxis.TickPosToMsPos((vsParams.phonemes.First().frame_length / VoicevoxUtils.fps) * 1000d);
                                 }
 
-                                int speaker = 0;
+                                int speakerID = 0;
                                 singer.voicevoxConfig.styles.ForEach(style => {
                                     if (style.name.Equals(phrase.singer.Subbanks[1].Suffix) && style.type.Equals("frame_decode")) {
-                                        speaker = style.id;
+                                        speakerID = style.id;
                                     }
+                                    // Apply the voice color setting value
                                     if (style.name.Equals(phrase.phones[0].suffix) && style.type.Equals("frame_decode")) {
-                                        speaker = style.id;
-                                    } else if ((style.name + "_" + style.type).Equals(phrase.phones[0].suffix)) {
-                                        speaker = style.id;
+                                        speakerID = style.id;
+                                    } else // Supports styles with the same name but different types
+                                    if ((style.name + "_" + style.type).Equals(phrase.phones[0].suffix)) {
+                                        speakerID = style.id;
                                     }
                                 });
-                                var queryurl = new VoicevoxURL() { method = "POST", path = "/frame_synthesis", query = new Dictionary<string, string> { { "speaker", speaker.ToString() } }, body = JsonConvert.SerializeObject(vsParams), accept = "audio/wav" };
+                                VoicevoxUtils.InitializedSpeaker(speakerID.ToString(), false);
+                                var queryurl = new VoicevoxURL() { method = "POST", path = "/frame_synthesis", query = new Dictionary<string, string> { { "speaker", speakerID.ToString() } }, body = JsonConvert.SerializeObject(vsParams), accept = "audio/wav" };
                                 var response = VoicevoxClient.Inst.SendRequest(queryurl);
                                 byte[] bytes = null;
                                 if (!response.Item2.Equals(null)) {
@@ -132,7 +139,11 @@ public Task<RenderResult> Render(RenderPhrase phrase, Progress progress, int tra
                                     File.WriteAllBytes(wavPath, bytes);
                                 }
                             } catch (Exception e) {
-                                Log.Error(e, "Failed to create a voice base.");
+                                if (e is VoicevoxException) {
+                                    throw new MessageCustomizableException("Failed to create the audio.", "Failed to create the audio.", e);
+                                } else {
+                                    Log.Error(e.Message);
+                                }
                             }
                             if (cancellation.IsCancellationRequested) {
                                 return new RenderResult();
@@ -155,85 +166,34 @@ public Task<RenderResult> Render(RenderPhrase phrase, Progress progress, int tra
             return task;
         }
 
-        private VoicevoxSynthParams PhraseToVoicevoxSynthParams(RenderPhrase phrase, VoicevoxSinger singer) {
+        private VoicevoxSynthParams PhraseToVoicevoxSynthParams(RenderPhrase phrase, VoicevoxSinger singer, bool pitch_slur) {
 
-            VoicevoxSynthParams vsParams = new VoicevoxSynthParams();
             //Prepare for future additions of Teacher Singer.
             string baseSingerID = VoicevoxUtils.getBaseSingerID(singer);
-
-            if (phrase.phones.All(p => VoicevoxUtils.IsDicKana(p.phoneme) || VoicevoxUtils.IsDicPau(p.phoneme))) {
-                // TODO: slur support
-                List<VoicevoxNote> vnotes = new List<VoicevoxNote>();
-                //if (slur) {
-                for (int i = 0; i < phrase.phones.Length; i++) {
-                    vnotes.Add(new VoicevoxNote() {
-                        lyric = phrase.phones[i].phoneme,
-                        positionMs = phrase.phones[i].positionMs,
-                        durationMs = phrase.phones[i].durationMs,
-                        tone = (int)(phrase.phones[i].tone + phrase.phones[i].toneShift)
-                    });
-                }
-                //} else {
-                //}
-
-                VoicevoxQueryMain vqMain = VoicevoxUtils.NoteGroupsToVQuery(vnotes.ToArray(), phrase.timeAxis);
-
+            VoicevoxUtils.InitializedSpeaker(baseSingerID, true);
+            List<VoicevoxNote> vNotes = BuildVNotes(phrase, baseSingerID);
+
+            //Match the phonemes in the synthesis parameters to the scores in the score to update F0 and volume  
+            //Create parameters for the update source. 
+            VoicevoxQueryMain vqMain = VoicevoxUtils.NoteGroupsToVQuery(vNotes.ToArray(), phrase.timeAxis);
+            VoicevoxSynthParams vsParams;
+            if (IsPhonemeNoteCountMatch(phrase)) {
                 vsParams = VoicevoxUtils.VoicevoxVoiceBase(vqMain, baseSingerID);
-            } else if (phrase.phones.All(p => VoicevoxUtils.IsVowel(p.phoneme) || VoicevoxUtils.IsConsonant(p.phoneme))) {
-                List<VoicevoxNote> vnotes = new List<VoicevoxNote>();
-                for (int i = 0; i < phrase.notes.Length; i++) {
-                    var durationMs = phrase.notes[i].durationMs;
-                    var currentLyric = phrase.notes[i].lyric.Normalize();
-                    var lyricList = currentLyric.Split(" ");
-                    var shiftTone = phrase.phones[0].toneShift;
-                    if (lyricList.Length > 1) {
-                        currentLyric = lyricList[1];
-                    }
-                    if (!VoicevoxUtils.IsSyllableVowelExtensionNote(currentLyric)) {
-                        if (VoicevoxUtils.IsDicPau(currentLyric)) {
-                            currentLyric = string.Empty;
-                        } else if (VoicevoxUtils.dic.IsDic(currentLyric)) {
-                            currentLyric = VoicevoxUtils.dic.Lyrictodic(currentLyric);
-                        } else if (!VoicevoxUtils.IsDicKana(currentLyric)) {
-                            currentLyric = string.Empty;
-                        }
-                    } else if (vnotes.Count >= i - 1 && 0 <= i - 1) {
-                        // TODO: slur support
-                        //if (slur) {
-                        var tempNote = vnotes[vnotes.Count - 1];
-                        tempNote.durationMs += durationMs;
-                        vnotes[vnotes.Count - 1] = tempNote;
-                        continue;
-                        //} else {
-                        //    if (VoicevoxUtils.phoneme_List.kanas.TryGetValue(vnotes[i - 1].lyric, out string str)) {
-                        //        currentLyric = str;
-                        //    }
-                        //}
-                    }
-                    vnotes.Add(new VoicevoxNote() {
-                        lyric = currentLyric,
-                        positionMs = phrase.notes[i].positionMs,
-                        durationMs = durationMs,
-                        tone = (int)(phrase.notes[i].tone + shiftTone)
-                    });
-                }
-                //Match the phonemes in the synthesis parameters to the scores in the score to update F0 and volume  
-                //Create parameters for the update source. 
-                VoicevoxQueryMain vqMain = VoicevoxUtils.NoteGroupsToVQuery(vnotes.ToArray(), phrase.timeAxis);
-                VoicevoxSynthParams vsParams_1 = VoicevoxUtils.VoicevoxVoiceBase(vqMain, baseSingerID);
+            } else {
+                //vsParamsServer is a parameter to hold phonemes generated from note lyrics
+                VoicevoxSynthParams vsParamsServer = VoicevoxUtils.VoicevoxVoiceBase(vqMain, baseSingerID);
 
                 //Create parameters for the update destination.
                 vsParams = PhonemeToVoicevoxSynthParams(phrase);
-                VoicevoxSynthParams vsParams_2 = vsParams.Clone();
-
-
-                if (vsParams.phonemes.Count == vsParams_1.phonemes.Count) {
-                    for (int i = 0; i < vsParams_1.phonemes.Count; i++) {
+                //vsParamsUser is a parameter to hold phonemes changed by the user
+                VoicevoxSynthParams vsParamsUser = vsParams.Clone();
+                if (vsParams.phonemes.Count == vsParamsServer.phonemes.Count) {
+                    for (int i = 0; i < vsParamsServer.phonemes.Count; i++) {
                         // TODO: Develop a VOICEVOX engine dedicated to OpenUtau so that synthesis parameters are updated when phonemes are changed.
                         //var flag = phrase.phones[i].flags.FirstOrDefault(f => f.Item1 == VoicevoxUtils.REPM);
                         //if (flag != null) {
                         //    if (flag.Item3.Equals(VoicevoxUtils.REPLACE)) {
-                        vsParams.phonemes[i].phoneme = vsParams_1.phonemes[i].phoneme;
+                        vsParams.phonemes[i].phoneme = vsParamsServer.phonemes[i].phoneme;
                         //    }
                         //}
                     }
@@ -242,22 +202,62 @@ private VoicevoxSynthParams PhraseToVoicevoxSynthParams(RenderPhrase phrase, Voi
                 vsParams.f0 = VoicevoxUtils.QueryToF0(vqMain, vsParams, baseSingerID);
                 vsParams.volume = VoicevoxUtils.QueryToVolume(vqMain, vsParams, baseSingerID);
                 //Update phoneme
-                for (int i = 0; i < vsParams_2.phonemes.Count; i++) {
+                for (int i = 0; i < vsParamsUser.phonemes.Count; i++) {
                     //var flag = phrase.phones[i].flags.FirstOrDefault(f => f.Item1 == VoicevoxUtils.REPM);
                     //if (flag != null) {
                     //    if (flag.Item3.Equals(VoicevoxUtils.REPLACE)) {
-                    vsParams.phonemes[i].phoneme = vsParams_2.phonemes[i].phoneme;
+                    vsParams.phonemes[i].phoneme = vsParamsUser.phonemes[i].phoneme;
                     //    }
                     //}
                 }
-            } else {
-                throw new MessageCustomizableException(
-                    $"Failed to create a voice base. The phoneme is not supported by the VOICEVOX engine.\n{string.Join(" ", phrase.phones.Select(p => p.phoneme))}",
-                    $"You are confusing phonemes and hiragana.\n{string.Join(" ", phrase.phones.Select(p => p.phoneme))}", new VoicevoxException());
+            }
+            if (pitch_slur) {
+                VoicevoxUtils.AdjustF0ForSlur(VoicevoxUtils.NoteGroupsToVQuery(vNotes.ToArray(), phrase.timeAxis, true), vsParams.f0);
             }
             return vsParams;
         }
 
+        private List<VoicevoxNote> BuildVNotes(RenderPhrase phrase, string baseSingerID) {
+            List<VoicevoxNote> vNotes = new List<VoicevoxNote>();
+            try {
+                for (int i = 0; i < phrase.notes.Length; i++) {
+                    var durationMs = phrase.notes[i].durationMs;
+                    var currentLyric = phrase.notes[i].lyric.Normalize();
+                    var lyricList = currentLyric.Split(" ");
+                    var shiftTone = phrase.phones[0].toneShift;
+                    if (!VoicevoxUtils.IsSyllableVowelExtensionNote(lyricList[^1])) {
+                        if (VoicevoxUtils.IsPau(lyricList[^1])) {
+                            currentLyric = string.Empty;
+                        } else if (VoicevoxUtils.dic.IsDic(lyricList[^1])) {
+                            currentLyric = VoicevoxUtils.dic.Lyrictodic(lyricList[^1]);
+                        } else if (VoicevoxUtils.phoneme_List.kanas.ContainsKey(lyricList[^1])) {
+                            currentLyric = lyricList[^1];
+                        } else {
+                            currentLyric = string.Empty;
+                        }
+                    }
+                    vNotes.Add(new VoicevoxNote() {
+                        lyric = currentLyric,
+                        positionMs = phrase.notes[i].positionMs,
+                        durationMs = durationMs,
+                        tone = (int)(phrase.notes[i].tone + shiftTone)
+                    });
+                }
+            } catch (Exception e) {
+                var phonemeText = string.Join(" ", phrase.phones.Select(p => p.phoneme));
+                throw new MessageCustomizableException(
+                    $"Failed to create a voice base. One or more phonemes may not be supported by the VOICEVOX engine.\n{phonemeText}",
+                    $"An error occurred while creating a voice base from the current phrase. This may be caused by unsupported phonemes, invalid input, or a mismatch between phonemes and hiragana.\nPhonemes: {phonemeText}\nDetails: {e.Message}",
+                    new VoicevoxException());
+            }
+
+            return vNotes;
+        }
+
+        private bool IsPhonemeNoteCountMatch(RenderPhrase phrase) {
+            return phrase.phones.Length == phrase.notes.Where(note => !VoicevoxUtils.IsSyllableVowelExtensionNote(note.lyric)).Count() && phrase.phones.All(p => VoicevoxUtils.phoneme_List.kanas.ContainsKey(p.phoneme));
+        }
+
         private VoicevoxSynthParams PhonemeToVoicevoxSynthParams(RenderPhrase phrase) {
             VoicevoxSynthParams vsParams = new VoicevoxSynthParams();
             int headFrames = (int)Math.Round((VoicevoxUtils.headS * VoicevoxUtils.fps), MidpointRounding.AwayFromZero);
@@ -407,7 +407,7 @@ RenderPitchResult IRenderer.LoadRenderedPitch(RenderPhrase phrase) {
                 if (singer != null) {
 
                     string baseSingerID = VoicevoxUtils.getBaseSingerID(singer);
-                    string baseSingerID = VoicevoxUtils.getBaseSingerID(singer);
-                    string baseSingerID = VoicevoxUtils.getBaseSingerID(singer);
-                    VoicevoxSynthParams vsParams = PhraseToVoicevoxSynthParams(phrase, phrase.singer as VoicevoxSinger/*, true*/);
+                    VoicevoxSynthParams vsParams = PhraseToVoicevoxSynthParams(phrase, phrase.singer as VoicevoxSinger, true);
                     double frameMs = (1000d / VoicevoxUtils.fps);
                     int vvTotalFrames = 0;
                     vsParams.phonemes.ForEach(x => vvTotalFrames += x.frame_length);
@@ -441,5 +441,23 @@ RenderPitchResult IRenderer.LoadRenderedPitch(RenderPhrase phrase) {
             }
             return null;
         }
+
+        ulong HashPhraseGroups(RenderPhrase phrase) {
+            using (var stream = new MemoryStream()) {
+                using (var writer = new BinaryWriter(stream)) {
+                    writer.Write(phrase.preEffectHash);
+                    writer.Write(phrase.phones[0].tone);
+                    writer.Write(phrase.phones[0].direct);
+                    if (phrase.phones[0].direct) {
+                        writer.Write(phrase.phones[0].toneShift);
+                    } else {
+                        phrase.phones.ForEach(x => writer.Write(x.toneShift));
+                    }
+                    writer.Write(phrase.phones[0].volume);
+                    return XXH64.DigestOf(stream.ToArray());
+                }
+            }
+        }
+
     }
 }