Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion LrcParser.Tests/Parser/Lrc/Lines/LrcLyricParserTest.cs
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,8 @@ public void TestDecode(string lyric, LrcLyric expected)
{
Text = "帰り道は",
StartTimes = [17000],
TimeTags = TestCaseTagHelper.ParseTimeTags(["[1,start]:1000", "[2,start]:2000", "[3,start]:3000", "[3,end]:4000"]),
// [0,start]:17000 is created from the line time tag
TimeTags = TestCaseTagHelper.ParseTimeTags(["[0,start]:17000", "[1,start]:1000", "[2,start]:2000", "[3,start]:3000", "[3,end]:4000"]),
},
],
[
Expand Down
61 changes: 54 additions & 7 deletions LrcParser.Tests/Parser/Lrc/Utils/LrcTimedTextUtilsTest.cs
Original file line number Diff line number Diff line change
Expand Up @@ -13,25 +13,72 @@ public class LrcTimedTextUtilsTest
#region Decode

[TestCase("<00:17.97>帰<00:18.37>り<00:18.55>道<00:18.94>は<00:19.22>", "帰り道は", new[] { "[0,start]:17970", "[1,start]:18370", "[2,start]:18550", "[3,start]:18940", "[3,end]:19220" })]
[TestCase(" <00:17.97>帰<00:18.37>り<00:18.55>道<00:18.94>は<00:19.22>", " 帰り道は", new[] { "[1,start]:17970", "[2,start]:18370", "[3,start]:18550", "[4,start]:18940", "[4,end]:19220" })]
[TestCase("<00:17.97>帰<00:18.37>り<00:18.55>道<00:18.94>は<00:19.22> ", "帰り道は ", new[] { "[0,start]:17970", "[1,start]:18370", "[2,start]:18550", "[3,start]:18940", "[3,end]:19220" })]
[TestCase("帰<00:18.37>り<00:18.55>道<00:18.94>は<00:19.22>", "帰り道は", new[] { "[1,start]:18370", "[2,start]:18550", "[3,start]:18940", "[3,end]:19220" })]
[TestCase(" <00:17.97>帰<00:18.37>り<00:18.55>道<00:18.94>は<00:19.22>", "帰り道は", new[] { "[0,start]:17970", "[1,start]:18370", "[2,start]:18550", "[3,start]:18940", "[3,end]:19220" })]
[TestCase("<00:17.97>帰<00:18.37>り<00:18.55>道<00:18.94>は<00:19.22> ", "帰り道は", new[] { "[0,start]:17970", "[1,start]:18370", "[2,start]:18550", "[3,start]:18940", "[3,end]:19220" })]
[TestCase("帰<00:18.37>り<00:18.55>道<00:18.94>は<00:19.22>", "帰り道は", new[] { "[0,start]:0", "[1,start]:18370", "[2,start]:18550", "[3,start]:18940", "[3,end]:19220" })]
[TestCase("<00:17.97>帰<00:18.37>り<00:18.55>道<00:18.94>は", "帰り道は", new[] { "[0,start]:17970", "[1,start]:18370", "[2,start]:18550", "[3,start]:18940" })]
[TestCase("帰り道は", "帰り道は", new string[] { })]
[TestCase("", "", new string[] { })]
[TestCase(" ", "", new string[] { })]
[TestCase(null, "", new string[] { })]
[TestCase("<00:51.00> <01:29.99><01:48.29> <02:31.00> <02:41.99>You gotta fight !", "You gotta fight !", new[] { "[0,start]:161990" })] // multiple empty tags
// Surrounding time tags
[TestCase(
"<00:06.84> Every <00:07.20> <00:07.56> night <00:07.87> <00:08.19> that <00:08.46> <00:08.79> goes <00:09.19> <00:09.59> between",
"Every night that goes between",
new[] { "[0,start]:6840", "[4,end]:7200", "[6,start]:7560", "[10,end]:7870", "[12,start]:8190", "[15,end]:8460", "[17,start]:8790", "[20,end]:9190", "[22,start]:9590" }
)]
// Alternating time tags, spaced on both sides
[TestCase(
"<00:06.84> Every <00:07.56> night <00:08.19> that <00:08.79> goes <00:09.59> between", "Every night that goes between",
new[] { "[0,start]:6840", "[6,start]:7560", "[12,start]:8190", "[17,start]:8790", "[22,start]:9590" }
)]
// Alternating time tags, unspaced
[TestCase(
"<00:06.84>Every<00:07.56>night<00:08.19>that<00:08.79>goes<00:09.59>between", "Everynightthatgoesbetween",
new[] { "[0,start]:6840", "[5,start]:7560", "[10,start]:8190", "[14,start]:8790", "[18,start]:9590" }
)]
[TestCase(
"Every<00:07.56>night<00:08.19>that<00:08.79>goes<00:09.59>between", "Everynightthatgoesbetween",
new[] { "[0,start]:0", "[5,start]:7560", "[10,start]:8190", "[14,start]:8790", "[18,start]:9590" }
)]
// Alternating time tags, prefix spaced
[TestCase(
"<00:06.84> Every<00:07.56> night<00:08.19> that<00:08.79> goes<00:09.59> between", "Every night that goes between",
new[] { "[0,start]:6840", "[6,start]:7560", "[12,start]:8190", "[17,start]:8790", "[22,start]:9590" }
)]
[TestCase(
"Every<00:07.56> night<00:08.19> that<00:08.79> goes<00:09.59> between", "Every night that goes between",
new[] { "[0,start]:0", "[6,start]:7560", "[12,start]:8190", "[17,start]:8790", "[22,start]:9590" }
)]
// Alternating time tags, postfix spaced
[TestCase(
"<00:06.84>Every <00:07.56>night <00:08.19>that <00:08.79>goes <00:09.59>between", "Every night that goes between",
new[] { "[0,start]:6840", "[6,start]:7560", "[12,start]:8190", "[17,start]:8790", "[22,start]:9590" }
)]
[TestCase(
"Every <00:07.56>night <00:08.19>that <00:08.79>goes <00:09.59>between", "Every night that goes between",
new[] { "[0,start]:0", "[6,start]:7560", "[12,start]:8190", "[17,start]:8790", "[22,start]:9590" }
)]
public void TestDecode(string text, string expectedText, string[] expectedTimeTags)
{
var (actualText, actualTimeTags) = LrcTimedTextUtils.TimedTextToObject(text);
var (actualText, actualTimeTags) = LrcTimedTextUtils.TimedTextToObject(text, 0);

Assert.That(actualText, Is.EqualTo(expectedText));
Assert.That(actualTimeTags, Is.EqualTo(TestCaseTagHelper.ParseTimeTags(expectedTimeTags)));
}

[TestCase("<00:51.00><01:29.99><01:48.29><02:31.00><02:41.99>You gotta fight !", "You gotta fight !", new[] { "[0,start]:51000" })] // decode with invalid format.
public void TestDecodeWithInvalidFormat(string text, string expectedText, string[] expectedTimeTags)
[TestCase(
"<00:06.84>Every<00:07.56>night<00:08.19>that<00:08.79>goes<00:09.59>between", 6840, "Everynightthatgoesbetween",
new[] { "[0,start]:6840", "[5,start]:7560", "[10,start]:8190", "[14,start]:8790", "[18,start]:9590" }
)]
[TestCase(
"Every<00:07.56>night<00:08.19>that<00:08.79>goes<00:09.59>between", 6840, "Everynightthatgoesbetween",
new[] { "[0,start]:6840", "[5,start]:7560", "[10,start]:8190", "[14,start]:8790", "[18,start]:9590" }
)]
public void TestDecodeWithStartTime(string text, int lineStartTime, string expectedText, string[] expectedTimeTags)
{
var (actualText, actualTimeTags) = LrcTimedTextUtils.TimedTextToObject(text);
var (actualText, actualTimeTags) = LrcTimedTextUtils.TimedTextToObject(text, lineStartTime);

Assert.That(actualText, Is.EqualTo(expectedText));
Assert.That(actualTimeTags, Is.EqualTo(TestCaseTagHelper.ParseTimeTags(expectedTimeTags)));
Expand Down
2 changes: 1 addition & 1 deletion LrcParser/Parser/Lrc/Lines/LrcLyricParser.cs
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ public override LrcLyric Decode(string text)
};
}

var (lyric, timeTags) = LrcTimedTextUtils.TimedTextToObject(rawLyric);
var (lyric, timeTags) = LrcTimedTextUtils.TimedTextToObject(rawLyric, startTimes[0]);

return new LrcLyric
{
Expand Down
105 changes: 79 additions & 26 deletions LrcParser/Parser/Lrc/Utils/LrcTimedTextUtils.cs
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
// Copyright (c) karaoke.dev <contact@karaoke.dev>. Licensed under the MIT Licence.
// See the LICENCE file in the repository root for full licence text.

using System.Text;
using LrcParser.Model;
using LrcParser.Utils;
using static LrcParser.Parser.Lrc.Utils.TimeTagMode;
Expand All @@ -10,53 +11,105 @@ namespace LrcParser.Parser.Lrc.Utils;
internal static class LrcTimedTextUtils
{
/// <summary>
///
/// Parses the passed text for word time tags.
/// </summary>
/// <param name="timedText"></param>
/// <param name="lineStartTime"></param>
/// <returns></returns>
internal static Tuple<string, SortedDictionary<TextIndex, int>> TimedTextToObject(string timedText)
internal static Tuple<string, SortedDictionary<TextIndex, int>> TimedTextToObject(string timedText, int lineStartTime)
{
if (string.IsNullOrEmpty(timedText))
if (string.IsNullOrWhiteSpace(timedText))
{
return new Tuple<string, SortedDictionary<TextIndex, int>>("", new SortedDictionary<TextIndex, int>());
}

var matchTimeTags = TimeTagUtils.WORD_TIME_TAG_REGEX.Matches(timedText);
var textLength = timedText.Length;
var lyricText = new StringBuilder();
var timeTags = new SortedDictionary<TextIndex, int>();

var endTextIndex = timedText.Length;
var timeTagMatches = TimeTagUtils.WORD_TIME_TAG_REGEX.Matches(timedText);

var startIndex = 0;
if (timeTagMatches.Count == 0)
{
// no word time tags, return lyric as-is
return new Tuple<string, SortedDictionary<TextIndex, int>>(timedText, new SortedDictionary<TextIndex, int>());
}

var text = string.Empty;
var timeTags = new SortedDictionary<TextIndex, int>();
var lastTimeTag = lineStartTime;
var segmentStartIndex = 0;
var insertSpace = false;
var lastTagWasStartTag = false;

foreach (var match in matchTimeTags.ToArray())
foreach (var match in timeTagMatches.ToArray())
{
var endIndex = match.Index;
// Segment ends at the start of the next time tag
var segmentEndIndex = match.Index;

var segment = timedText[segmentStartIndex..segmentEndIndex];

// Update next start index
segmentStartIndex = segmentEndIndex + match.Length;

if (startIndex < endIndex)
if (string.IsNullOrWhiteSpace(segment))
{
// add the text.
text += timedText[startIndex..endIndex];
// The last segment was a start tag, and the next segment is empty, insert end tag
if (lastTagWasStartTag)
{
timeTags.TryAdd(new TextIndex(lyricText.Length - 1, IndexState.End), lastTimeTag);
lastTagWasStartTag = false;
}

// Skip empty lyric, update start time
lastTimeTag = TimeTagUtils.ConvertTimeTagToMilliseconds(match.Value, WordTimeTag);

// Segment contains only whitespace but isn't empty, insert a space before an upcoming valid segment.
if (segment.Length > 0) insertSpace = true;
continue;
}

// update the new start for next time-tag calculation.
startIndex = endIndex + match.Length;
// If the last segment ended with whitespace, or the current starts with whitespace,
// insert a single space before the next segment.
if ((char.IsWhiteSpace(segment[0]) || insertSpace) && lyricText.Length > 0)
{
lyricText.Append(' ');
}

// add the time-tag.
var hasText = startIndex < endTextIndex;
var isEmptyStringNext = hasText && timedText[startIndex] == ' ';
// Add start time tag for next lyric
timeTags.TryAdd(new TextIndex(lyricText.Length), lastTimeTag);
lastTagWasStartTag = true;

var state = hasText && !isEmptyStringNext ? IndexState.Start : IndexState.End;
var textIndex = text.Length - (state == IndexState.Start ? 0 : 1);
var time = TimeTagUtils.ConvertTimeTagToMilliseconds(match.Value, WordTimeTag);
// Append lyric segment without surrounding whitespace
lyricText.Append(segment.Trim());

// using try add because it might be possible with duplicated time-tag position in the lyric.
timeTags.TryAdd(new TextIndex(textIndex, state), time);
// Update start time for the next segment
lastTimeTag = TimeTagUtils.ConvertTimeTagToMilliseconds(match.Value, WordTimeTag);

// Reset insertSpace flag after adding a segment,
// and instead track whether this new segment ends with whitespace
insertSpace = char.IsWhiteSpace(segment[^1]);
}

// should add remaining text at the right of the end time-tag.
text += timedText[startIndex..endTextIndex];
var remaining = timedText[segmentStartIndex..textLength];

if (!string.IsNullOrWhiteSpace(remaining))
{
if ((char.IsWhiteSpace(remaining[0]) || insertSpace) && lyricText.Length > 0)
{
// Add space before the next segment
lyricText.Append(' ');
}

// Add remaining text with start time tag
timeTags.TryAdd(new TextIndex(lyricText.Length), lastTimeTag);
lyricText.Append(remaining.Trim());
}
else
{
// No remaining text, last time tag was end tag
timeTags.TryAdd(new TextIndex(lyricText.Length - 1, IndexState.End), lastTimeTag);
}

return new Tuple<string, SortedDictionary<TextIndex, int>>(text, timeTags);
return new Tuple<string, SortedDictionary<TextIndex, int>>(lyricText.ToString(), timeTags);
}

internal static string ToTimedText(string text, SortedDictionary<TextIndex, int> timeTags)
Expand Down