From 3dec5ac34a7795bea5352209144cc090b47e49a3 Mon Sep 17 00:00:00 2001 From: Peter Chapman Date: Thu, 19 Mar 2026 15:15:31 +1300 Subject: [PATCH 1/5] Porting usability score computation from confidence scores --- .../QualityEstimation/QualityEstimation.cs | 235 ++++++++++++++++++ .../QualityEstimation/Scores/BookScores.cs | 29 +++ .../QualityEstimation/Scores/ChapterScores.cs | 53 ++++ .../QualityEstimation/Scores/Score.cs | 15 ++ .../QualityEstimation/Scores/SequenceScore.cs | 21 ++ .../QualityEstimation/Scores/TxtFileScores.cs | 32 +++ .../QualityEstimation/Scores/VerseScore.cs | 15 ++ .../Thresholds/BookThresholds.cs | 9 + .../Thresholds/ChapterThresholds.cs | 9 + .../Thresholds/Thresholds.cs | 14 ++ .../Thresholds/VerseThresholds.cs | 9 + .../Usability/BookUsability.cs | 7 + .../Usability/ChapterUsability.cs | 7 + .../Usability/SequenceUsability.cs | 7 + .../Usability/TxtFileUsability.cs | 7 + .../Usability/UsabilityBase.cs | 11 + .../Usability/VerseUsability.cs | 7 + .../QualityEstimation/UsabilityLabel.cs | 9 + .../QualityEstimation/UsabilityParameters.cs | 30 +++ 19 files changed, 526 insertions(+) create mode 100644 src/SIL.Machine/QualityEstimation/QualityEstimation.cs create mode 100644 src/SIL.Machine/QualityEstimation/Scores/BookScores.cs create mode 100644 src/SIL.Machine/QualityEstimation/Scores/ChapterScores.cs create mode 100644 src/SIL.Machine/QualityEstimation/Scores/Score.cs create mode 100644 src/SIL.Machine/QualityEstimation/Scores/SequenceScore.cs create mode 100644 src/SIL.Machine/QualityEstimation/Scores/TxtFileScores.cs create mode 100644 src/SIL.Machine/QualityEstimation/Scores/VerseScore.cs create mode 100644 src/SIL.Machine/QualityEstimation/Thresholds/BookThresholds.cs create mode 100644 src/SIL.Machine/QualityEstimation/Thresholds/ChapterThresholds.cs create mode 100644 src/SIL.Machine/QualityEstimation/Thresholds/Thresholds.cs create mode 100644 src/SIL.Machine/QualityEstimation/Thresholds/VerseThresholds.cs create mode 100644 src/SIL.Machine/QualityEstimation/Usability/BookUsability.cs create mode 100644 src/SIL.Machine/QualityEstimation/Usability/ChapterUsability.cs create mode 100644 src/SIL.Machine/QualityEstimation/Usability/SequenceUsability.cs create mode 100644 src/SIL.Machine/QualityEstimation/Usability/TxtFileUsability.cs create mode 100644 src/SIL.Machine/QualityEstimation/Usability/UsabilityBase.cs create mode 100644 src/SIL.Machine/QualityEstimation/Usability/VerseUsability.cs create mode 100644 src/SIL.Machine/QualityEstimation/UsabilityLabel.cs create mode 100644 src/SIL.Machine/QualityEstimation/UsabilityParameters.cs diff --git a/src/SIL.Machine/QualityEstimation/QualityEstimation.cs b/src/SIL.Machine/QualityEstimation/QualityEstimation.cs new file mode 100644 index 00000000..2f42fc7a --- /dev/null +++ b/src/SIL.Machine/QualityEstimation/QualityEstimation.cs @@ -0,0 +1,235 @@ +using System; +using System.Collections.Generic; +using System.Linq; +using SIL.Machine.QualityEstimation.Scores; +using SIL.Machine.QualityEstimation.Thresholds; +using SIL.Machine.QualityEstimation.Usability; +using SIL.Scripture; + +namespace SIL.Machine.QualityEstimation +{ + public class QualityEstimation + { + public BookThresholds BookThresholds { get; set; } = new BookThresholds(); + + public ChapterThresholds ChapterThresholds { get; set; } = new ChapterThresholds(); + + public VerseThresholds VerseThresholds { get; set; } = new VerseThresholds(); + + public UsabilityParameters Usable { get; set; } = UsabilityParameters.Usable; + + public UsabilityParameters Unusable { get; set; } = UsabilityParameters.Unusable; + + public List UsabilityBooks { get; } = new List(); + + public List UsabilityChapters { get; } = new List(); + + public List UsabilitySequences { get; } = new List(); + + public List UsabilityTxtFiles { get; } = new List(); + + public List UsabilityVerses { get; } = new List(); + + public double CalculateUsableProbability(double chrF3) + { + double usableWeight = Math.Exp(-Math.Pow(chrF3 - Usable.Mean, 2) / (2 * Usable.Variance)) * Usable.Count; + double unusableWeight = + Math.Exp(-Math.Pow(chrF3 - Unusable.Mean, 2) / (2 * Unusable.Variance)) * Unusable.Count; + return usableWeight / (usableWeight + unusableWeight); + } + + public void ComputeBookUsability(BookScores bookScores) + { + foreach (string book in bookScores.Scores.Keys) + { + Score score = bookScores.GetScore(book); + if (score is null) + { + continue; + } + + List bookUsabilities = bookScores.GetVerseUsabilities(book); + double averageProbability = bookUsabilities.Average(); + UsabilityBooks.Add( + new BookUsability + { + Book = book, + Usability = averageProbability, + ProjectedChrF3 = score.ProjectedChrF3, + Label = BookThresholds.ReturnLabel(averageProbability), + } + ); + } + } + + public void ComputeChapterUsability(ChapterScores chapterScores) + { + foreach (KeyValuePair> chapterScoresByBook in chapterScores.Scores) + { + string book = chapterScoresByBook.Key; + foreach (int chapter in chapterScoresByBook.Value.Keys) + { + Score score = chapterScores.GetScore(book, chapter); + if (score is null) + { + continue; + } + + List chapterUsabilities = chapterScores.GetVerseUsabilities(book, chapter); + double averageProbability = chapterUsabilities.Average(); + UsabilityChapters.Add( + new ChapterUsability + { + Book = book, + Chapter = chapter, + Usability = averageProbability, + ProjectedChrF3 = score.ProjectedChrF3, + Label = ChapterThresholds.ReturnLabel(averageProbability), + } + ); + } + } + } + + public void ComputeTxtFileUsability(TxtFileScores txtFileScores) + { + foreach (string targetDraftFileStem in txtFileScores.Scores.Keys) + { + Score score = txtFileScores.GetScore(targetDraftFileStem); + if (score is null) + { + continue; + } + + List txtFileUsabilities = txtFileScores.GetSequenceUsabilities(targetDraftFileStem); + double averageProbability = txtFileUsabilities.Average(); + UsabilityTxtFiles.Add( + new TxtFileUsability + { + TargetDraftFile = targetDraftFileStem, + Usability = averageProbability, + ProjectedChrF3 = score.ProjectedChrF3, + Label = VerseThresholds.ReturnLabel(averageProbability), + } + ); + } + } + + public void ComputeUsableProportions( + List verseScores, + ref ChapterScores chapterScores, + ref BookScores bookScores + ) + { + foreach (VerseScore verseScore in verseScores.Where(v => v.VerseRef.VerseNum > 0)) + { + double probability = CalculateUsableProbability(verseScore.ProjectedChrF3); + chapterScores.AppendVerseUsability( + verseScore.VerseRef.Book, + verseScore.VerseRef.ChapterNum, + probability + ); + bookScores.AppendVerseUsability(verseScore.VerseRef.Book, probability); + UsabilityVerses.Add( + new VerseUsability + { + Book = verseScore.VerseRef.Book, + Chapter = verseScore.VerseRef.ChapterNum, + Verse = verseScore.VerseRef.Verse, + Usability = probability, + ProjectedChrF3 = verseScore.ProjectedChrF3, + Label = VerseThresholds.ReturnLabel(probability), + } + ); + } + + ComputeChapterUsability(chapterScores); + ComputeBookUsability(bookScores); + } + + public void ComputeUsableProportions(List sequenceScores, ref TxtFileScores txtFileScores) + { + foreach (SequenceScore sequenceScore in sequenceScores) + { + double probability = CalculateUsableProbability(sequenceScore.ProjectedChrF3); + txtFileScores.AppendSequenceUsability(sequenceScore.TargetDraftFileStem, probability); + UsabilitySequences.Add( + new SequenceUsability + { + TargetDraftFile = sequenceScore.TargetDraftFileStem, + SequenceNumber = sequenceScore.SequenceNumber, + Usability = probability, + ProjectedChrF3 = sequenceScore.ProjectedChrF3, + Label = VerseThresholds.ReturnLabel(probability), + } + ); + } + + ComputeTxtFileUsability(txtFileScores); + } + + public void EstimateQuality(double slope, double intercept, Dictionary confidences) + { + var sequenceScores = new List(); + var txtFileScores = new TxtFileScores(); + ProjectChrF3(slope, intercept, confidences, ref sequenceScores, ref txtFileScores); + ComputeUsableProportions(sequenceScores, ref txtFileScores); + } + + public void EstimateQuality(double slope, double intercept, Dictionary confidences) + { + var verseScores = new List(); + var chapterScores = new ChapterScores(); + var bookScores = new BookScores(); + ProjectChrF3(slope, intercept, confidences, ref verseScores, ref chapterScores, ref bookScores); + ComputeUsableProportions(verseScores, ref chapterScores, ref bookScores); + } + + public void ProjectChrF3( + double slope, + double intercept, + Dictionary confidences, + ref List sequenceScores, + ref TxtFileScores txtFileScores + ) + { + foreach (KeyValuePair confidence in confidences) + { + string[] keyParts = confidence.Key.Split(':'); + if (keyParts.Length == 2 && int.TryParse(keyParts[1], out int sequenceNumber)) + { + string targetDraftFileStem = keyParts[0]; + var score = new SequenceScore( + slope, + confidence.Value, + intercept, + sequenceNumber, + targetDraftFileStem + ); + sequenceScores.Add(score); + txtFileScores.AddScore(targetDraftFileStem, score); + } + } + } + + public void ProjectChrF3( + double slope, + double intercept, + Dictionary confidences, + ref List verseScores, + ref ChapterScores chapterScores, + ref BookScores bookScores + ) + { + foreach (KeyValuePair confidence in confidences) + { + var score = new VerseScore(slope, confidence.Value, intercept, confidence.Key); + verseScores.Add(score); + string book = confidence.Key.Book; + int chapter = confidence.Key.ChapterNum; + chapterScores.AddScore(book, chapter, score); + bookScores.AddScore(book, score); + } + } + } +} diff --git a/src/SIL.Machine/QualityEstimation/Scores/BookScores.cs b/src/SIL.Machine/QualityEstimation/Scores/BookScores.cs new file mode 100644 index 00000000..d76dd676 --- /dev/null +++ b/src/SIL.Machine/QualityEstimation/Scores/BookScores.cs @@ -0,0 +1,29 @@ +using System.Collections.Generic; + +namespace SIL.Machine.QualityEstimation.Scores +{ + public class BookScores + { + private readonly Dictionary> _verseUsabilities = new Dictionary>(); + + public readonly Dictionary Scores = new Dictionary(); + + public void AddScore(string book, Score score) => Scores[book] = score; + + public Score GetScore(string book) => Scores.TryGetValue(book, out Score score) ? score : null; + + public void AppendVerseUsability(string book, double usability) + { + if (!_verseUsabilities.TryGetValue(book, out List list)) + { + list = new List(); + _verseUsabilities[book] = list; + } + + list.Add(usability); + } + + public List GetVerseUsabilities(string book) => + _verseUsabilities.TryGetValue(book, out List list) ? new List(list) : new List(); + } +} diff --git a/src/SIL.Machine/QualityEstimation/Scores/ChapterScores.cs b/src/SIL.Machine/QualityEstimation/Scores/ChapterScores.cs new file mode 100644 index 00000000..147a4b57 --- /dev/null +++ b/src/SIL.Machine/QualityEstimation/Scores/ChapterScores.cs @@ -0,0 +1,53 @@ +using System.Collections.Generic; + +namespace SIL.Machine.QualityEstimation.Scores +{ + public class ChapterScores + { + private readonly Dictionary>> _verseUsabilities = + new Dictionary>>(); + + public readonly Dictionary> Scores = + new Dictionary>(); + + public void AddScore(string book, int chapter, Score score) + { + if (!Scores.TryGetValue(book, out Dictionary chapters)) + { + chapters = new Dictionary(); + Scores[book] = chapters; + } + + chapters[chapter] = score; + } + + public Score GetScore(string book, int chapter) => + Scores.TryGetValue(book, out Dictionary chapters) + && chapters.TryGetValue(chapter, out Score score) + ? score + : null; + + public void AppendVerseUsability(string book, int chapter, double usability) + { + if (!_verseUsabilities.TryGetValue(book, out Dictionary> chapters)) + { + chapters = new Dictionary>(); + _verseUsabilities[book] = chapters; + } + + if (!chapters.TryGetValue(chapter, out List list)) + { + list = new List(); + chapters[chapter] = list; + } + + list.Add(usability); + } + + public List GetVerseUsabilities(string book, int chapter) => + _verseUsabilities.TryGetValue(book, out Dictionary> chapters) + && chapters.TryGetValue(chapter, out List list) + ? new List(list) + : new List(); + } +} diff --git a/src/SIL.Machine/QualityEstimation/Scores/Score.cs b/src/SIL.Machine/QualityEstimation/Scores/Score.cs new file mode 100644 index 00000000..5173e5cb --- /dev/null +++ b/src/SIL.Machine/QualityEstimation/Scores/Score.cs @@ -0,0 +1,15 @@ +namespace SIL.Machine.QualityEstimation.Scores +{ + public class Score + { + public Score(double slope, double confidence, double intercept) + { + Confidence = confidence; + ProjectedChrF3 = slope * confidence + intercept; + } + + public double Confidence { get; } + + public double ProjectedChrF3 { get; } + } +} diff --git a/src/SIL.Machine/QualityEstimation/Scores/SequenceScore.cs b/src/SIL.Machine/QualityEstimation/Scores/SequenceScore.cs new file mode 100644 index 00000000..51560663 --- /dev/null +++ b/src/SIL.Machine/QualityEstimation/Scores/SequenceScore.cs @@ -0,0 +1,21 @@ +namespace SIL.Machine.QualityEstimation.Scores +{ + public class SequenceScore : Score + { + public SequenceScore( + double slope, + double confidence, + double intercept, + int sequenceNumber, + string targetDraftFileStem + ) + : base(slope, confidence, intercept) + { + SequenceNumber = sequenceNumber; + TargetDraftFileStem = targetDraftFileStem; + } + + public int SequenceNumber { get; } + public string TargetDraftFileStem { get; } + } +} diff --git a/src/SIL.Machine/QualityEstimation/Scores/TxtFileScores.cs b/src/SIL.Machine/QualityEstimation/Scores/TxtFileScores.cs new file mode 100644 index 00000000..eb3c6895 --- /dev/null +++ b/src/SIL.Machine/QualityEstimation/Scores/TxtFileScores.cs @@ -0,0 +1,32 @@ +using System.Collections.Generic; + +namespace SIL.Machine.QualityEstimation.Scores +{ + public class TxtFileScores + { + private readonly Dictionary> _sequenceUsabilities = new Dictionary>(); + + public readonly Dictionary Scores = new Dictionary(); + + public void AddScore(string targetDraftFileStem, Score score) => Scores[targetDraftFileStem] = score; + + public Score GetScore(string targetDraftFileStem) => + Scores.TryGetValue(targetDraftFileStem, out Score score) ? score : null; + + public void AppendSequenceUsability(string targetDraftFileStem, double usability) + { + if (!_sequenceUsabilities.TryGetValue(targetDraftFileStem, out List list)) + { + list = new List(); + _sequenceUsabilities[targetDraftFileStem] = list; + } + + list.Add(usability); + } + + public List GetSequenceUsabilities(string targetDraftFileStem) => + _sequenceUsabilities.TryGetValue(targetDraftFileStem, out List list) + ? new List(list) + : new List(); + } +} diff --git a/src/SIL.Machine/QualityEstimation/Scores/VerseScore.cs b/src/SIL.Machine/QualityEstimation/Scores/VerseScore.cs new file mode 100644 index 00000000..c8bdaf29 --- /dev/null +++ b/src/SIL.Machine/QualityEstimation/Scores/VerseScore.cs @@ -0,0 +1,15 @@ +using SIL.Scripture; + +namespace SIL.Machine.QualityEstimation.Scores +{ + public class VerseScore : Score + { + public VerseScore(double slope, double confidence, double intercept, VerseRef verseRef) + : base(slope, confidence, intercept) + { + VerseRef = verseRef; + } + + public VerseRef VerseRef { get; } + } +} diff --git a/src/SIL.Machine/QualityEstimation/Thresholds/BookThresholds.cs b/src/SIL.Machine/QualityEstimation/Thresholds/BookThresholds.cs new file mode 100644 index 00000000..7d2d0067 --- /dev/null +++ b/src/SIL.Machine/QualityEstimation/Thresholds/BookThresholds.cs @@ -0,0 +1,9 @@ +namespace SIL.Machine.QualityEstimation.Thresholds +{ + public class BookThresholds : Thresholds + { + public override double GreenThreshold => 0.745; + + public override double YellowThreshold => 0.62; + } +} diff --git a/src/SIL.Machine/QualityEstimation/Thresholds/ChapterThresholds.cs b/src/SIL.Machine/QualityEstimation/Thresholds/ChapterThresholds.cs new file mode 100644 index 00000000..387cfb7a --- /dev/null +++ b/src/SIL.Machine/QualityEstimation/Thresholds/ChapterThresholds.cs @@ -0,0 +1,9 @@ +namespace SIL.Machine.QualityEstimation.Thresholds +{ + public class ChapterThresholds : Thresholds + { + public override double GreenThreshold => 0.745; + + public override double YellowThreshold => 0.62; + } +} diff --git a/src/SIL.Machine/QualityEstimation/Thresholds/Thresholds.cs b/src/SIL.Machine/QualityEstimation/Thresholds/Thresholds.cs new file mode 100644 index 00000000..4205d463 --- /dev/null +++ b/src/SIL.Machine/QualityEstimation/Thresholds/Thresholds.cs @@ -0,0 +1,14 @@ +namespace SIL.Machine.QualityEstimation.Thresholds +{ + public abstract class Thresholds + { + public abstract double GreenThreshold { get; } + + public abstract double YellowThreshold { get; } + + public UsabilityLabel ReturnLabel(double probability) => + probability >= GreenThreshold ? UsabilityLabel.Green + : probability >= YellowThreshold ? UsabilityLabel.Yellow + : UsabilityLabel.Red; + } +} diff --git a/src/SIL.Machine/QualityEstimation/Thresholds/VerseThresholds.cs b/src/SIL.Machine/QualityEstimation/Thresholds/VerseThresholds.cs new file mode 100644 index 00000000..9312e55e --- /dev/null +++ b/src/SIL.Machine/QualityEstimation/Thresholds/VerseThresholds.cs @@ -0,0 +1,9 @@ +namespace SIL.Machine.QualityEstimation.Thresholds +{ + public class VerseThresholds : Thresholds + { + public override double GreenThreshold => 0.745; + + public override double YellowThreshold => 0.62; + } +} diff --git a/src/SIL.Machine/QualityEstimation/Usability/BookUsability.cs b/src/SIL.Machine/QualityEstimation/Usability/BookUsability.cs new file mode 100644 index 00000000..69ed2ff2 --- /dev/null +++ b/src/SIL.Machine/QualityEstimation/Usability/BookUsability.cs @@ -0,0 +1,7 @@ +namespace SIL.Machine.QualityEstimation.Usability +{ + public class BookUsability : UsabilityBase + { + public string Book { get; set; } + } +} diff --git a/src/SIL.Machine/QualityEstimation/Usability/ChapterUsability.cs b/src/SIL.Machine/QualityEstimation/Usability/ChapterUsability.cs new file mode 100644 index 00000000..ab46cc27 --- /dev/null +++ b/src/SIL.Machine/QualityEstimation/Usability/ChapterUsability.cs @@ -0,0 +1,7 @@ +namespace SIL.Machine.QualityEstimation.Usability +{ + public class ChapterUsability : BookUsability + { + public int Chapter { get; set; } + } +} diff --git a/src/SIL.Machine/QualityEstimation/Usability/SequenceUsability.cs b/src/SIL.Machine/QualityEstimation/Usability/SequenceUsability.cs new file mode 100644 index 00000000..8295e70f --- /dev/null +++ b/src/SIL.Machine/QualityEstimation/Usability/SequenceUsability.cs @@ -0,0 +1,7 @@ +namespace SIL.Machine.QualityEstimation.Usability +{ + public class SequenceUsability : TxtFileUsability + { + public int SequenceNumber { get; set; } + } +} diff --git a/src/SIL.Machine/QualityEstimation/Usability/TxtFileUsability.cs b/src/SIL.Machine/QualityEstimation/Usability/TxtFileUsability.cs new file mode 100644 index 00000000..1688b68e --- /dev/null +++ b/src/SIL.Machine/QualityEstimation/Usability/TxtFileUsability.cs @@ -0,0 +1,7 @@ +namespace SIL.Machine.QualityEstimation.Usability +{ + public class TxtFileUsability : UsabilityBase + { + public string TargetDraftFile { get; set; } + } +} diff --git a/src/SIL.Machine/QualityEstimation/Usability/UsabilityBase.cs b/src/SIL.Machine/QualityEstimation/Usability/UsabilityBase.cs new file mode 100644 index 00000000..178e233a --- /dev/null +++ b/src/SIL.Machine/QualityEstimation/Usability/UsabilityBase.cs @@ -0,0 +1,11 @@ +namespace SIL.Machine.QualityEstimation.Usability +{ + public abstract class UsabilityBase + { + public UsabilityLabel Label { get; set; } + + public double ProjectedChrF3 { get; set; } + + public double Usability { get; set; } + } +} diff --git a/src/SIL.Machine/QualityEstimation/Usability/VerseUsability.cs b/src/SIL.Machine/QualityEstimation/Usability/VerseUsability.cs new file mode 100644 index 00000000..16ca3ea6 --- /dev/null +++ b/src/SIL.Machine/QualityEstimation/Usability/VerseUsability.cs @@ -0,0 +1,7 @@ +namespace SIL.Machine.QualityEstimation.Usability +{ + public class VerseUsability : ChapterUsability + { + public string Verse { get; set; } + } +} diff --git a/src/SIL.Machine/QualityEstimation/UsabilityLabel.cs b/src/SIL.Machine/QualityEstimation/UsabilityLabel.cs new file mode 100644 index 00000000..0b207384 --- /dev/null +++ b/src/SIL.Machine/QualityEstimation/UsabilityLabel.cs @@ -0,0 +1,9 @@ +namespace SIL.Machine.QualityEstimation +{ + public enum UsabilityLabel + { + Red, + Yellow, + Green, + } +} diff --git a/src/SIL.Machine/QualityEstimation/UsabilityParameters.cs b/src/SIL.Machine/QualityEstimation/UsabilityParameters.cs new file mode 100644 index 00000000..0181af76 --- /dev/null +++ b/src/SIL.Machine/QualityEstimation/UsabilityParameters.cs @@ -0,0 +1,30 @@ +namespace SIL.Machine.QualityEstimation +{ + public class UsabilityParameters + { + public static readonly UsabilityParameters Unusable = new UsabilityParameters( + count: 97, + mean: 45.85, + variance: 99.91 + ); + + public static readonly UsabilityParameters Usable = new UsabilityParameters( + count: 263, + mean: 51.4, + variance: 95.19 + ); + + public UsabilityParameters(double count, double mean, double variance) + { + Count = count; + Mean = mean; + Variance = variance; + } + + public double Count { get; } + + public double Mean { get; } + + public double Variance { get; } + } +} From 5e5db10778f6c71a8a1dad5be5cecd9437109225 Mon Sep 17 00:00:00 2001 From: Peter Chapman Date: Tue, 24 Mar 2026 11:46:35 +1300 Subject: [PATCH 2/5] Refactored quality estimation --- .../QualityEstimation/QualityEstimation.cs | 175 +++++++++++------- .../QualityEstimation/Thresholds.cs | 20 ++ .../Thresholds/BookThresholds.cs | 9 - .../Thresholds/ChapterThresholds.cs | 9 - .../Thresholds/Thresholds.cs | 14 -- .../Thresholds/VerseThresholds.cs | 9 - 6 files changed, 124 insertions(+), 112 deletions(-) create mode 100644 src/SIL.Machine/QualityEstimation/Thresholds.cs delete mode 100644 src/SIL.Machine/QualityEstimation/Thresholds/BookThresholds.cs delete mode 100644 src/SIL.Machine/QualityEstimation/Thresholds/ChapterThresholds.cs delete mode 100644 src/SIL.Machine/QualityEstimation/Thresholds/Thresholds.cs delete mode 100644 src/SIL.Machine/QualityEstimation/Thresholds/VerseThresholds.cs diff --git a/src/SIL.Machine/QualityEstimation/QualityEstimation.cs b/src/SIL.Machine/QualityEstimation/QualityEstimation.cs index 2f42fc7a..ee3bea96 100644 --- a/src/SIL.Machine/QualityEstimation/QualityEstimation.cs +++ b/src/SIL.Machine/QualityEstimation/QualityEstimation.cs @@ -2,35 +2,102 @@ using System.Collections.Generic; using System.Linq; using SIL.Machine.QualityEstimation.Scores; -using SIL.Machine.QualityEstimation.Thresholds; using SIL.Machine.QualityEstimation.Usability; using SIL.Scripture; namespace SIL.Machine.QualityEstimation { + /// + /// Provides chrF3 quality estimation support for pre-translations. + /// public class QualityEstimation { - public BookThresholds BookThresholds { get; set; } = new BookThresholds(); + private readonly BookScores _bookScores = new BookScores(); + private readonly ChapterScores _chapterScores = new ChapterScores(); + private readonly double _intercept; + private readonly List _sequenceScores = new List(); + private readonly double _slope; + private readonly TxtFileScores _txtFileScores = new TxtFileScores(); + private readonly List _verseScores = new List(); - public ChapterThresholds ChapterThresholds { get; set; } = new ChapterThresholds(); + public QualityEstimation(double slope, double intercept) + { + _slope = slope; + _intercept = intercept; + } + + /// + /// The threshold values used to calculate the usability label for every book. + /// + public Thresholds BookThresholds { get; set; } = new Thresholds(greenThreshold: 0.745, yellowThreshold: 0.62); + + /// + /// The threshold values used to calculate the usability label for every chapter. + /// + public Thresholds ChapterThresholds { get; set; } = + new Thresholds(greenThreshold: 0.745, yellowThreshold: 0.62); - public VerseThresholds VerseThresholds { get; set; } = new VerseThresholds(); + /// + /// The threshold values used to calculate the usability label for every verse. + /// + public Thresholds VerseThresholds { get; set; } = new Thresholds(greenThreshold: 0.745, yellowThreshold: 0.62); + /// + /// The usable parameters to calculate the usable probabilities. + /// public UsabilityParameters Usable { get; set; } = UsabilityParameters.Usable; + /// + /// The unusable parameters to calculate the usable probabilities. + /// public UsabilityParameters Unusable { get; set; } = UsabilityParameters.Unusable; + /// + /// The usability scores for every book. + /// public List UsabilityBooks { get; } = new List(); + /// + /// The usability scores for every chapter. + /// public List UsabilityChapters { get; } = new List(); + /// + /// The usability scores for every line in a text file. + /// public List UsabilitySequences { get; } = new List(); + /// + /// The usability scores for every text file. + /// public List UsabilityTxtFiles { get; } = new List(); + /// + /// The usability scores for every verse. + /// public List UsabilityVerses { get; } = new List(); - public double CalculateUsableProbability(double chrF3) + /// + /// Estimate the quality of the pre-translations from text files. + /// + /// The confidence values. + public void EstimateQuality(Dictionary confidences) + { + ProjectChrF3(confidences); + ComputeUsableProportionsForTxtFiles(); + } + + /// + /// Estimate the quality of the pre-translations from USFM files. + /// + /// The confidence values. + public void EstimateQuality(Dictionary confidences) + { + ProjectChrF3(confidences); + ComputeUsableProportionsForVerses(); + } + + private double CalculateUsableProbability(double chrF3) { double usableWeight = Math.Exp(-Math.Pow(chrF3 - Usable.Mean, 2) / (2 * Usable.Variance)) * Usable.Count; double unusableWeight = @@ -38,17 +105,17 @@ public double CalculateUsableProbability(double chrF3) return usableWeight / (usableWeight + unusableWeight); } - public void ComputeBookUsability(BookScores bookScores) + private void ComputeBookUsability() { - foreach (string book in bookScores.Scores.Keys) + foreach (string book in _bookScores.Scores.Keys) { - Score score = bookScores.GetScore(book); + Score score = _bookScores.GetScore(book); if (score is null) { continue; } - List bookUsabilities = bookScores.GetVerseUsabilities(book); + List bookUsabilities = _bookScores.GetVerseUsabilities(book); double averageProbability = bookUsabilities.Average(); UsabilityBooks.Add( new BookUsability @@ -62,20 +129,20 @@ public void ComputeBookUsability(BookScores bookScores) } } - public void ComputeChapterUsability(ChapterScores chapterScores) + public void ComputeChapterUsability() { - foreach (KeyValuePair> chapterScoresByBook in chapterScores.Scores) + foreach (KeyValuePair> chapterScoresByBook in _chapterScores.Scores) { string book = chapterScoresByBook.Key; foreach (int chapter in chapterScoresByBook.Value.Keys) { - Score score = chapterScores.GetScore(book, chapter); + Score score = _chapterScores.GetScore(book, chapter); if (score is null) { continue; } - List chapterUsabilities = chapterScores.GetVerseUsabilities(book, chapter); + List chapterUsabilities = _chapterScores.GetVerseUsabilities(book, chapter); double averageProbability = chapterUsabilities.Average(); UsabilityChapters.Add( new ChapterUsability @@ -91,17 +158,17 @@ public void ComputeChapterUsability(ChapterScores chapterScores) } } - public void ComputeTxtFileUsability(TxtFileScores txtFileScores) + private void ComputeTxtFileUsability() { - foreach (string targetDraftFileStem in txtFileScores.Scores.Keys) + foreach (string targetDraftFileStem in _txtFileScores.Scores.Keys) { - Score score = txtFileScores.GetScore(targetDraftFileStem); + Score score = _txtFileScores.GetScore(targetDraftFileStem); if (score is null) { continue; } - List txtFileUsabilities = txtFileScores.GetSequenceUsabilities(targetDraftFileStem); + List txtFileUsabilities = _txtFileScores.GetSequenceUsabilities(targetDraftFileStem); double averageProbability = txtFileUsabilities.Average(); UsabilityTxtFiles.Add( new TxtFileUsability @@ -115,21 +182,17 @@ public void ComputeTxtFileUsability(TxtFileScores txtFileScores) } } - public void ComputeUsableProportions( - List verseScores, - ref ChapterScores chapterScores, - ref BookScores bookScores - ) + private void ComputeUsableProportionsForVerses() { - foreach (VerseScore verseScore in verseScores.Where(v => v.VerseRef.VerseNum > 0)) + foreach (VerseScore verseScore in _verseScores.Where(v => v.VerseRef.VerseNum > 0)) { double probability = CalculateUsableProbability(verseScore.ProjectedChrF3); - chapterScores.AppendVerseUsability( + _chapterScores.AppendVerseUsability( verseScore.VerseRef.Book, verseScore.VerseRef.ChapterNum, probability ); - bookScores.AppendVerseUsability(verseScore.VerseRef.Book, probability); + _bookScores.AppendVerseUsability(verseScore.VerseRef.Book, probability); UsabilityVerses.Add( new VerseUsability { @@ -143,16 +206,16 @@ ref BookScores bookScores ); } - ComputeChapterUsability(chapterScores); - ComputeBookUsability(bookScores); + ComputeChapterUsability(); + ComputeBookUsability(); } - public void ComputeUsableProportions(List sequenceScores, ref TxtFileScores txtFileScores) + private void ComputeUsableProportionsForTxtFiles() { - foreach (SequenceScore sequenceScore in sequenceScores) + foreach (SequenceScore sequenceScore in _sequenceScores) { double probability = CalculateUsableProbability(sequenceScore.ProjectedChrF3); - txtFileScores.AppendSequenceUsability(sequenceScore.TargetDraftFileStem, probability); + _txtFileScores.AppendSequenceUsability(sequenceScore.TargetDraftFileStem, probability); UsabilitySequences.Add( new SequenceUsability { @@ -165,33 +228,10 @@ public void ComputeUsableProportions(List sequenceScores, ref Txt ); } - ComputeTxtFileUsability(txtFileScores); - } - - public void EstimateQuality(double slope, double intercept, Dictionary confidences) - { - var sequenceScores = new List(); - var txtFileScores = new TxtFileScores(); - ProjectChrF3(slope, intercept, confidences, ref sequenceScores, ref txtFileScores); - ComputeUsableProportions(sequenceScores, ref txtFileScores); - } - - public void EstimateQuality(double slope, double intercept, Dictionary confidences) - { - var verseScores = new List(); - var chapterScores = new ChapterScores(); - var bookScores = new BookScores(); - ProjectChrF3(slope, intercept, confidences, ref verseScores, ref chapterScores, ref bookScores); - ComputeUsableProportions(verseScores, ref chapterScores, ref bookScores); + ComputeTxtFileUsability(); } - public void ProjectChrF3( - double slope, - double intercept, - Dictionary confidences, - ref List sequenceScores, - ref TxtFileScores txtFileScores - ) + private void ProjectChrF3(Dictionary confidences) { foreach (KeyValuePair confidence in confidences) { @@ -200,35 +240,28 @@ ref TxtFileScores txtFileScores { string targetDraftFileStem = keyParts[0]; var score = new SequenceScore( - slope, + _slope, confidence.Value, - intercept, + _intercept, sequenceNumber, targetDraftFileStem ); - sequenceScores.Add(score); - txtFileScores.AddScore(targetDraftFileStem, score); + _sequenceScores.Add(score); + _txtFileScores.AddScore(targetDraftFileStem, score); } } } - public void ProjectChrF3( - double slope, - double intercept, - Dictionary confidences, - ref List verseScores, - ref ChapterScores chapterScores, - ref BookScores bookScores - ) + private void ProjectChrF3(Dictionary confidences) { foreach (KeyValuePair confidence in confidences) { - var score = new VerseScore(slope, confidence.Value, intercept, confidence.Key); - verseScores.Add(score); + var score = new VerseScore(_slope, confidence.Value, _intercept, confidence.Key); + _verseScores.Add(score); string book = confidence.Key.Book; int chapter = confidence.Key.ChapterNum; - chapterScores.AddScore(book, chapter, score); - bookScores.AddScore(book, score); + _chapterScores.AddScore(book, chapter, score); + _bookScores.AddScore(book, score); } } } diff --git a/src/SIL.Machine/QualityEstimation/Thresholds.cs b/src/SIL.Machine/QualityEstimation/Thresholds.cs new file mode 100644 index 00000000..095c9178 --- /dev/null +++ b/src/SIL.Machine/QualityEstimation/Thresholds.cs @@ -0,0 +1,20 @@ +namespace SIL.Machine.QualityEstimation +{ + public class Thresholds + { + public Thresholds(double greenThreshold, double yellowThreshold) + { + GreenThreshold = greenThreshold; + YellowThreshold = yellowThreshold; + } + + public double GreenThreshold { get; } + + public double YellowThreshold { get; } + + public UsabilityLabel ReturnLabel(double probability) => + probability >= GreenThreshold ? UsabilityLabel.Green + : probability >= YellowThreshold ? UsabilityLabel.Yellow + : UsabilityLabel.Red; + } +} diff --git a/src/SIL.Machine/QualityEstimation/Thresholds/BookThresholds.cs b/src/SIL.Machine/QualityEstimation/Thresholds/BookThresholds.cs deleted file mode 100644 index 7d2d0067..00000000 --- a/src/SIL.Machine/QualityEstimation/Thresholds/BookThresholds.cs +++ /dev/null @@ -1,9 +0,0 @@ -namespace SIL.Machine.QualityEstimation.Thresholds -{ - public class BookThresholds : Thresholds - { - public override double GreenThreshold => 0.745; - - public override double YellowThreshold => 0.62; - } -} diff --git a/src/SIL.Machine/QualityEstimation/Thresholds/ChapterThresholds.cs b/src/SIL.Machine/QualityEstimation/Thresholds/ChapterThresholds.cs deleted file mode 100644 index 387cfb7a..00000000 --- a/src/SIL.Machine/QualityEstimation/Thresholds/ChapterThresholds.cs +++ /dev/null @@ -1,9 +0,0 @@ -namespace SIL.Machine.QualityEstimation.Thresholds -{ - public class ChapterThresholds : Thresholds - { - public override double GreenThreshold => 0.745; - - public override double YellowThreshold => 0.62; - } -} diff --git a/src/SIL.Machine/QualityEstimation/Thresholds/Thresholds.cs b/src/SIL.Machine/QualityEstimation/Thresholds/Thresholds.cs deleted file mode 100644 index 4205d463..00000000 --- a/src/SIL.Machine/QualityEstimation/Thresholds/Thresholds.cs +++ /dev/null @@ -1,14 +0,0 @@ -namespace SIL.Machine.QualityEstimation.Thresholds -{ - public abstract class Thresholds - { - public abstract double GreenThreshold { get; } - - public abstract double YellowThreshold { get; } - - public UsabilityLabel ReturnLabel(double probability) => - probability >= GreenThreshold ? UsabilityLabel.Green - : probability >= YellowThreshold ? UsabilityLabel.Yellow - : UsabilityLabel.Red; - } -} diff --git a/src/SIL.Machine/QualityEstimation/Thresholds/VerseThresholds.cs b/src/SIL.Machine/QualityEstimation/Thresholds/VerseThresholds.cs deleted file mode 100644 index 9312e55e..00000000 --- a/src/SIL.Machine/QualityEstimation/Thresholds/VerseThresholds.cs +++ /dev/null @@ -1,9 +0,0 @@ -namespace SIL.Machine.QualityEstimation.Thresholds -{ - public class VerseThresholds : Thresholds - { - public override double GreenThreshold => 0.745; - - public override double YellowThreshold => 0.62; - } -} From c477a488bdb44b30735b99447e7e86e791cf1578 Mon Sep 17 00:00:00 2001 From: Peter Chapman Date: Tue, 24 Mar 2026 13:02:25 +1300 Subject: [PATCH 3/5] Add calculation of book and chapter confidences by geometric mean --- .../QualityEstimation/QualityEstimation.cs | 94 +++++++++++++++++-- .../QualityEstimationTests.cs | 56 +++++++++++ 2 files changed, 142 insertions(+), 8 deletions(-) create mode 100644 tests/SIL.Machine.Tests/QualityEstimation/QualityEstimationTests.cs diff --git a/src/SIL.Machine/QualityEstimation/QualityEstimation.cs b/src/SIL.Machine/QualityEstimation/QualityEstimation.cs index ee3bea96..0abd9cba 100644 --- a/src/SIL.Machine/QualityEstimation/QualityEstimation.cs +++ b/src/SIL.Machine/QualityEstimation/QualityEstimation.cs @@ -84,7 +84,7 @@ public QualityEstimation(double slope, double intercept) public void EstimateQuality(Dictionary confidences) { ProjectChrF3(confidences); - ComputeUsableProportionsForTxtFiles(); + ComputeSequenceUsability(); } /// @@ -94,7 +94,23 @@ public void EstimateQuality(Dictionary confidences) public void EstimateQuality(Dictionary confidences) { ProjectChrF3(confidences); - ComputeUsableProportionsForVerses(); + ComputeVerseUsability(); + } + + /// + /// Calculates the geometric mean for a collection of values. + /// + /// + /// The geometric mean. + private static double GeometricMean(IList values) + { + // Geometric mean requires positive values + if (values == null || !values.Any() || values.Any(x => x <= 0)) + return 0; + + // Compute the sum of the natural logarithms of all values, + // and divide by the count of numbers and take the exponential + return Math.Exp(values.Sum(Math.Log) / values.Count); } private double CalculateUsableProbability(double chrF3) @@ -129,7 +145,7 @@ private void ComputeBookUsability() } } - public void ComputeChapterUsability() + private void ComputeChapterUsability() { foreach (KeyValuePair> chapterScoresByBook in _chapterScores.Scores) { @@ -182,7 +198,7 @@ private void ComputeTxtFileUsability() } } - private void ComputeUsableProportionsForVerses() + private void ComputeVerseUsability() { foreach (VerseScore verseScore in _verseScores.Where(v => v.VerseRef.VerseNum > 0)) { @@ -210,7 +226,7 @@ private void ComputeUsableProportionsForVerses() ComputeBookUsability(); } - private void ComputeUsableProportionsForTxtFiles() + private void ComputeSequenceUsability() { foreach (SequenceScore sequenceScore in _sequenceScores) { @@ -233,6 +249,7 @@ private void ComputeUsableProportionsForTxtFiles() private void ProjectChrF3(Dictionary confidences) { + var confidencesByTxtFile = new Dictionary>(); foreach (KeyValuePair confidence in confidences) { string[] keyParts = confidence.Key.Split(':'); @@ -247,21 +264,82 @@ private void ProjectChrF3(Dictionary confidences) targetDraftFileStem ); _sequenceScores.Add(score); - _txtFileScores.AddScore(targetDraftFileStem, score); + + // Record the confidence by text file + if (!confidencesByTxtFile.TryGetValue(targetDraftFileStem, out List txtFileConfidences)) + { + txtFileConfidences = new List(); + confidencesByTxtFile[targetDraftFileStem] = txtFileConfidences; + } + + txtFileConfidences.Add(confidence.Value); } } + + foreach (KeyValuePair> txtFileConfidences in confidencesByTxtFile) + { + _txtFileScores.AddScore( + txtFileConfidences.Key, + new Score(_slope, confidence: GeometricMean(txtFileConfidences.Value), _intercept) + ); + } } private void ProjectChrF3(Dictionary confidences) { + var confidencesByBook = new Dictionary>(); + var confidencesByBookAndChapter = new Dictionary<(string, int), List>(); foreach (KeyValuePair confidence in confidences) { var score = new VerseScore(_slope, confidence.Value, _intercept, confidence.Key); _verseScores.Add(score); string book = confidence.Key.Book; int chapter = confidence.Key.ChapterNum; - _chapterScores.AddScore(book, chapter, score); - _bookScores.AddScore(book, score); + + // Record the confidence by and chapter + if ( + !confidencesByBookAndChapter.TryGetValue( + (book, chapter), + out List bookAndChapterConfidences + ) + ) + { + bookAndChapterConfidences = new List(); + confidencesByBookAndChapter[(book, chapter)] = bookAndChapterConfidences; + } + + bookAndChapterConfidences.Add(confidence.Value); + + // Record the confidence by book + if (!confidencesByBook.TryGetValue(book, out List bookConfidences)) + { + bookConfidences = new List(); + confidencesByBook[book] = bookConfidences; + } + + bookConfidences.Add(confidence.Value); + } + + foreach (KeyValuePair> bookConfidences in confidencesByBook) + { + _bookScores.AddScore( + bookConfidences.Key, + new Score(_slope, confidence: GeometricMean(bookConfidences.Value), _intercept) + ); + } + + foreach ( + KeyValuePair< + (string Book, int Chapter), + List + > bookAndChapterConfidences in confidencesByBookAndChapter + ) + { + _chapterScores.AddScore( + bookAndChapterConfidences.Key.Book, + bookAndChapterConfidences.Key.Chapter, + new Score(_slope, confidence: GeometricMean(bookAndChapterConfidences.Value), _intercept) + ); } } } diff --git a/tests/SIL.Machine.Tests/QualityEstimation/QualityEstimationTests.cs b/tests/SIL.Machine.Tests/QualityEstimation/QualityEstimationTests.cs new file mode 100644 index 00000000..0b1558e0 --- /dev/null +++ b/tests/SIL.Machine.Tests/QualityEstimation/QualityEstimationTests.cs @@ -0,0 +1,56 @@ +using NUnit.Framework; +using SIL.Scripture; + +namespace SIL.Machine.QualityEstimation; + +[TestFixture] +public class QualityEstimationTests +{ + [Test] + public void QualityEstimation_TxtFiles() + { + var qualityEstimation = new QualityEstimation(slope: 0.6, intercept: 1.0); + var confidences = new Dictionary + { + ["MAT.txt:1"] = 85.0, + ["MAT.txt:2"] = 80.0, + ["MRK.txt:1"] = 60.0, + }; + qualityEstimation.EstimateQuality(confidences); + using (Assert.EnterMultipleScope()) + { + Assert.That(qualityEstimation.UsabilitySequences, Has.Count.EqualTo(3)); + Assert.That(qualityEstimation.UsabilitySequences[0].Label, Is.EqualTo(UsabilityLabel.Green)); + Assert.That(qualityEstimation.UsabilitySequences[1].Label, Is.EqualTo(UsabilityLabel.Yellow)); + Assert.That(qualityEstimation.UsabilitySequences[2].Label, Is.EqualTo(UsabilityLabel.Red)); + Assert.That(qualityEstimation.UsabilityTxtFiles, Has.Count.EqualTo(2)); + Assert.That(qualityEstimation.UsabilityTxtFiles[0].Label, Is.EqualTo(UsabilityLabel.Green)); + Assert.That(qualityEstimation.UsabilityTxtFiles[1].Label, Is.EqualTo(UsabilityLabel.Red)); + } + } + + [Test] + public void QualityEstimation_Verses() + { + var qualityEstimation = new QualityEstimation(slope: 0.6, intercept: 1.0); + var confidences = new Dictionary + { + [new VerseRef(1, 1, 1)] = 85.0, + [new VerseRef(1, 1, 2)] = 80.0, + [new VerseRef(1, 2, 1)] = 60.0, + }; + qualityEstimation.EstimateQuality(confidences); + using (Assert.EnterMultipleScope()) + { + Assert.That(qualityEstimation.UsabilityVerses, Has.Count.EqualTo(3)); + Assert.That(qualityEstimation.UsabilityVerses[0].Label, Is.EqualTo(UsabilityLabel.Green)); + Assert.That(qualityEstimation.UsabilityVerses[1].Label, Is.EqualTo(UsabilityLabel.Yellow)); + Assert.That(qualityEstimation.UsabilityVerses[2].Label, Is.EqualTo(UsabilityLabel.Red)); + Assert.That(qualityEstimation.UsabilityChapters, Has.Count.EqualTo(2)); + Assert.That(qualityEstimation.UsabilityChapters[0].Label, Is.EqualTo(UsabilityLabel.Green)); + Assert.That(qualityEstimation.UsabilityChapters[1].Label, Is.EqualTo(UsabilityLabel.Red)); + Assert.That(qualityEstimation.UsabilityBooks, Has.Count.EqualTo(1)); + Assert.That(qualityEstimation.UsabilityBooks[0].Label, Is.EqualTo(UsabilityLabel.Yellow)); + } + } +} From 1c18dbed4e56d8f59f0eee6eae5732d9166453c1 Mon Sep 17 00:00:00 2001 From: Peter Chapman Date: Thu, 26 Mar 2026 07:55:27 +1300 Subject: [PATCH 4/5] Removed namespaces, made usabilities immutable, refactored confidences --- .../{Scores => }/BookScores.cs | 4 +- .../QualityEstimation/BookUsability.cs | 13 ++ .../{Scores => }/ChapterScores.cs | 4 +- .../QualityEstimation/ChapterUsability.cs | 13 ++ ...stimation.cs => ChrF3QualityEstimation.cs} | 132 ++++++++---------- .../QualityEstimation/{Scores => }/Score.cs | 4 +- .../QualityEstimation/Scores/VerseScore.cs | 15 -- .../{Scores => }/SequenceScore.cs | 4 +- .../QualityEstimation/SequenceUsability.cs | 19 +++ .../{Scores => }/TxtFileScores.cs | 4 +- .../QualityEstimation/TxtFileUsability.cs | 13 ++ .../Usability/BookUsability.cs | 7 - .../Usability/ChapterUsability.cs | 7 - .../Usability/SequenceUsability.cs | 7 - .../Usability/TxtFileUsability.cs | 7 - .../Usability/UsabilityBase.cs | 11 -- .../Usability/VerseUsability.cs | 7 - .../QualityEstimation/UsabilityBase.cs | 18 +++ .../QualityEstimation/VerseScore.cs | 15 ++ .../QualityEstimation/VerseUsability.cs | 20 +++ ...ests.cs => ChrF3QualityEstimationTests.cs} | 35 ++--- 21 files changed, 195 insertions(+), 164 deletions(-) rename src/SIL.Machine/QualityEstimation/{Scores => }/BookScores.cs (92%) create mode 100644 src/SIL.Machine/QualityEstimation/BookUsability.cs rename src/SIL.Machine/QualityEstimation/{Scores => }/ChapterScores.cs (95%) create mode 100644 src/SIL.Machine/QualityEstimation/ChapterUsability.cs rename src/SIL.Machine/QualityEstimation/{QualityEstimation.cs => ChrF3QualityEstimation.cs} (72%) rename src/SIL.Machine/QualityEstimation/{Scores => }/Score.cs (80%) delete mode 100644 src/SIL.Machine/QualityEstimation/Scores/VerseScore.cs rename src/SIL.Machine/QualityEstimation/{Scores => }/SequenceScore.cs (84%) create mode 100644 src/SIL.Machine/QualityEstimation/SequenceUsability.cs rename src/SIL.Machine/QualityEstimation/{Scores => }/TxtFileScores.cs (93%) create mode 100644 src/SIL.Machine/QualityEstimation/TxtFileUsability.cs delete mode 100644 src/SIL.Machine/QualityEstimation/Usability/BookUsability.cs delete mode 100644 src/SIL.Machine/QualityEstimation/Usability/ChapterUsability.cs delete mode 100644 src/SIL.Machine/QualityEstimation/Usability/SequenceUsability.cs delete mode 100644 src/SIL.Machine/QualityEstimation/Usability/TxtFileUsability.cs delete mode 100644 src/SIL.Machine/QualityEstimation/Usability/UsabilityBase.cs delete mode 100644 src/SIL.Machine/QualityEstimation/Usability/VerseUsability.cs create mode 100644 src/SIL.Machine/QualityEstimation/UsabilityBase.cs create mode 100644 src/SIL.Machine/QualityEstimation/VerseScore.cs create mode 100644 src/SIL.Machine/QualityEstimation/VerseUsability.cs rename tests/SIL.Machine.Tests/QualityEstimation/{QualityEstimationTests.cs => ChrF3QualityEstimationTests.cs} (69%) diff --git a/src/SIL.Machine/QualityEstimation/Scores/BookScores.cs b/src/SIL.Machine/QualityEstimation/BookScores.cs similarity index 92% rename from src/SIL.Machine/QualityEstimation/Scores/BookScores.cs rename to src/SIL.Machine/QualityEstimation/BookScores.cs index d76dd676..0e6623f0 100644 --- a/src/SIL.Machine/QualityEstimation/Scores/BookScores.cs +++ b/src/SIL.Machine/QualityEstimation/BookScores.cs @@ -1,8 +1,8 @@ using System.Collections.Generic; -namespace SIL.Machine.QualityEstimation.Scores +namespace SIL.Machine.QualityEstimation { - public class BookScores + internal class BookScores { private readonly Dictionary> _verseUsabilities = new Dictionary>(); diff --git a/src/SIL.Machine/QualityEstimation/BookUsability.cs b/src/SIL.Machine/QualityEstimation/BookUsability.cs new file mode 100644 index 00000000..645cc0a0 --- /dev/null +++ b/src/SIL.Machine/QualityEstimation/BookUsability.cs @@ -0,0 +1,13 @@ +namespace SIL.Machine.QualityEstimation +{ + public class BookUsability : UsabilityBase + { + public BookUsability(string book, UsabilityLabel label, double projectedChrF3, double usability) + : base(label, projectedChrF3, usability) + { + Book = book; + } + + public string Book { get; } + } +} diff --git a/src/SIL.Machine/QualityEstimation/Scores/ChapterScores.cs b/src/SIL.Machine/QualityEstimation/ChapterScores.cs similarity index 95% rename from src/SIL.Machine/QualityEstimation/Scores/ChapterScores.cs rename to src/SIL.Machine/QualityEstimation/ChapterScores.cs index 147a4b57..0f76a804 100644 --- a/src/SIL.Machine/QualityEstimation/Scores/ChapterScores.cs +++ b/src/SIL.Machine/QualityEstimation/ChapterScores.cs @@ -1,8 +1,8 @@ using System.Collections.Generic; -namespace SIL.Machine.QualityEstimation.Scores +namespace SIL.Machine.QualityEstimation { - public class ChapterScores + internal class ChapterScores { private readonly Dictionary>> _verseUsabilities = new Dictionary>>(); diff --git a/src/SIL.Machine/QualityEstimation/ChapterUsability.cs b/src/SIL.Machine/QualityEstimation/ChapterUsability.cs new file mode 100644 index 00000000..012f2a15 --- /dev/null +++ b/src/SIL.Machine/QualityEstimation/ChapterUsability.cs @@ -0,0 +1,13 @@ +namespace SIL.Machine.QualityEstimation +{ + public class ChapterUsability : BookUsability + { + public ChapterUsability(string book, int chapter, UsabilityLabel label, double projectedChrF3, double usability) + : base(book, label, projectedChrF3, usability) + { + Chapter = chapter; + } + + public int Chapter { get; } + } +} diff --git a/src/SIL.Machine/QualityEstimation/QualityEstimation.cs b/src/SIL.Machine/QualityEstimation/ChrF3QualityEstimation.cs similarity index 72% rename from src/SIL.Machine/QualityEstimation/QualityEstimation.cs rename to src/SIL.Machine/QualityEstimation/ChrF3QualityEstimation.cs index 0abd9cba..88bf0e91 100644 --- a/src/SIL.Machine/QualityEstimation/QualityEstimation.cs +++ b/src/SIL.Machine/QualityEstimation/ChrF3QualityEstimation.cs @@ -1,16 +1,14 @@ using System; using System.Collections.Generic; using System.Linq; -using SIL.Machine.QualityEstimation.Scores; -using SIL.Machine.QualityEstimation.Usability; -using SIL.Scripture; +using SIL.Machine.Corpora; namespace SIL.Machine.QualityEstimation { /// /// Provides chrF3 quality estimation support for pre-translations. /// - public class QualityEstimation + public class ChrF3QualityEstimation { private readonly BookScores _bookScores = new BookScores(); private readonly ChapterScores _chapterScores = new ChapterScores(); @@ -20,7 +18,7 @@ public class QualityEstimation private readonly TxtFileScores _txtFileScores = new TxtFileScores(); private readonly List _verseScores = new List(); - public QualityEstimation(double slope, double intercept) + public ChrF3QualityEstimation(double slope, double intercept) { _slope = slope; _intercept = intercept; @@ -81,7 +79,7 @@ public QualityEstimation(double slope, double intercept) /// Estimate the quality of the pre-translations from text files. /// /// The confidence values. - public void EstimateQuality(Dictionary confidences) + public void EstimateQuality(IEnumerable<(MultiKeyRef key, double confidence)> confidences) { ProjectChrF3(confidences); ComputeSequenceUsability(); @@ -91,7 +89,7 @@ public void EstimateQuality(Dictionary confidences) /// Estimate the quality of the pre-translations from USFM files. /// /// The confidence values. - public void EstimateQuality(Dictionary confidences) + public void EstimateQuality(IEnumerable<(ScriptureRef key, double confidence)> confidences) { ProjectChrF3(confidences); ComputeVerseUsability(); @@ -127,20 +125,17 @@ private void ComputeBookUsability() { Score score = _bookScores.GetScore(book); if (score is null) - { continue; - } List bookUsabilities = _bookScores.GetVerseUsabilities(book); double averageProbability = bookUsabilities.Average(); UsabilityBooks.Add( - new BookUsability - { - Book = book, - Usability = averageProbability, - ProjectedChrF3 = score.ProjectedChrF3, - Label = BookThresholds.ReturnLabel(averageProbability), - } + new BookUsability( + book, + label: BookThresholds.ReturnLabel(averageProbability), + usability: averageProbability, + projectedChrF3: score.ProjectedChrF3 + ) ); } } @@ -154,21 +149,18 @@ private void ComputeChapterUsability() { Score score = _chapterScores.GetScore(book, chapter); if (score is null) - { continue; - } List chapterUsabilities = _chapterScores.GetVerseUsabilities(book, chapter); double averageProbability = chapterUsabilities.Average(); UsabilityChapters.Add( - new ChapterUsability - { - Book = book, - Chapter = chapter, - Usability = averageProbability, - ProjectedChrF3 = score.ProjectedChrF3, - Label = ChapterThresholds.ReturnLabel(averageProbability), - } + new ChapterUsability( + book, + chapter, + label: ChapterThresholds.ReturnLabel(averageProbability), + usability: averageProbability, + projectedChrF3: score.ProjectedChrF3 + ) ); } } @@ -180,45 +172,41 @@ private void ComputeTxtFileUsability() { Score score = _txtFileScores.GetScore(targetDraftFileStem); if (score is null) - { continue; - } List txtFileUsabilities = _txtFileScores.GetSequenceUsabilities(targetDraftFileStem); double averageProbability = txtFileUsabilities.Average(); UsabilityTxtFiles.Add( - new TxtFileUsability - { - TargetDraftFile = targetDraftFileStem, - Usability = averageProbability, - ProjectedChrF3 = score.ProjectedChrF3, - Label = VerseThresholds.ReturnLabel(averageProbability), - } + new TxtFileUsability( + targetDraftFileStem, + label: BookThresholds.ReturnLabel(averageProbability), + usability: averageProbability, + projectedChrF3: score.ProjectedChrF3 + ) ); } } private void ComputeVerseUsability() { - foreach (VerseScore verseScore in _verseScores.Where(v => v.VerseRef.VerseNum > 0)) + foreach (VerseScore verseScore in _verseScores.Where(v => v.ScriptureRef.VerseNum > 0)) { double probability = CalculateUsableProbability(verseScore.ProjectedChrF3); _chapterScores.AppendVerseUsability( - verseScore.VerseRef.Book, - verseScore.VerseRef.ChapterNum, + verseScore.ScriptureRef.Book, + verseScore.ScriptureRef.ChapterNum, probability ); - _bookScores.AppendVerseUsability(verseScore.VerseRef.Book, probability); + _bookScores.AppendVerseUsability(verseScore.ScriptureRef.Book, probability); UsabilityVerses.Add( - new VerseUsability - { - Book = verseScore.VerseRef.Book, - Chapter = verseScore.VerseRef.ChapterNum, - Verse = verseScore.VerseRef.Verse, - Usability = probability, - ProjectedChrF3 = verseScore.ProjectedChrF3, - Label = VerseThresholds.ReturnLabel(probability), - } + new VerseUsability( + book: verseScore.ScriptureRef.Book, + chapter: verseScore.ScriptureRef.ChapterNum, + verse: verseScore.ScriptureRef.Verse, + label: VerseThresholds.ReturnLabel(probability), + usability: probability, + projectedChrF3: verseScore.ProjectedChrF3 + ) ); } @@ -233,36 +221,28 @@ private void ComputeSequenceUsability() double probability = CalculateUsableProbability(sequenceScore.ProjectedChrF3); _txtFileScores.AppendSequenceUsability(sequenceScore.TargetDraftFileStem, probability); UsabilitySequences.Add( - new SequenceUsability - { - TargetDraftFile = sequenceScore.TargetDraftFileStem, - SequenceNumber = sequenceScore.SequenceNumber, - Usability = probability, - ProjectedChrF3 = sequenceScore.ProjectedChrF3, - Label = VerseThresholds.ReturnLabel(probability), - } + new SequenceUsability( + targetDraftFile: sequenceScore.TargetDraftFileStem, + sequenceNumber: sequenceScore.SequenceNumber, + label: VerseThresholds.ReturnLabel(probability), + usability: probability, + projectedChrF3: sequenceScore.ProjectedChrF3 + ) ); } ComputeTxtFileUsability(); } - private void ProjectChrF3(Dictionary confidences) + private void ProjectChrF3(IEnumerable<(MultiKeyRef, double)> confidences) { var confidencesByTxtFile = new Dictionary>(); - foreach (KeyValuePair confidence in confidences) + foreach ((MultiKeyRef key, double confidence) in confidences) { - string[] keyParts = confidence.Key.Split(':'); - if (keyParts.Length == 2 && int.TryParse(keyParts[1], out int sequenceNumber)) + if (key.Keys.Count >= 0 && int.TryParse(key.Keys[0].ToString(), out int sequenceNumber)) { - string targetDraftFileStem = keyParts[0]; - var score = new SequenceScore( - _slope, - confidence.Value, - _intercept, - sequenceNumber, - targetDraftFileStem - ); + string targetDraftFileStem = key.TextId; + var score = new SequenceScore(_slope, confidence, _intercept, sequenceNumber, targetDraftFileStem); _sequenceScores.Add(score); // Record the confidence by text file @@ -272,7 +252,7 @@ private void ProjectChrF3(Dictionary confidences) confidencesByTxtFile[targetDraftFileStem] = txtFileConfidences; } - txtFileConfidences.Add(confidence.Value); + txtFileConfidences.Add(confidence); } } @@ -285,16 +265,16 @@ private void ProjectChrF3(Dictionary confidences) } } - private void ProjectChrF3(Dictionary confidences) + private void ProjectChrF3(IEnumerable<(ScriptureRef, double)> confidences) { var confidencesByBook = new Dictionary>(); var confidencesByBookAndChapter = new Dictionary<(string, int), List>(); - foreach (KeyValuePair confidence in confidences) + foreach ((ScriptureRef key, double confidence) in confidences) { - var score = new VerseScore(_slope, confidence.Value, _intercept, confidence.Key); + var score = new VerseScore(_slope, confidence, _intercept, key); _verseScores.Add(score); - string book = confidence.Key.Book; - int chapter = confidence.Key.ChapterNum; + string book = key.Book; + int chapter = key.ChapterNum; // Record the confidence by and chapter if ( @@ -308,7 +288,7 @@ out List bookAndChapterConfidences confidencesByBookAndChapter[(book, chapter)] = bookAndChapterConfidences; } - bookAndChapterConfidences.Add(confidence.Value); + bookAndChapterConfidences.Add(confidence); // Record the confidence by book if (!confidencesByBook.TryGetValue(book, out List bookConfidences)) @@ -317,7 +297,7 @@ out List bookAndChapterConfidences confidencesByBook[book] = bookConfidences; } - bookConfidences.Add(confidence.Value); + bookConfidences.Add(confidence); } foreach (KeyValuePair> bookConfidences in confidencesByBook) diff --git a/src/SIL.Machine/QualityEstimation/Scores/Score.cs b/src/SIL.Machine/QualityEstimation/Score.cs similarity index 80% rename from src/SIL.Machine/QualityEstimation/Scores/Score.cs rename to src/SIL.Machine/QualityEstimation/Score.cs index 5173e5cb..ee98dd6a 100644 --- a/src/SIL.Machine/QualityEstimation/Scores/Score.cs +++ b/src/SIL.Machine/QualityEstimation/Score.cs @@ -1,6 +1,6 @@ -namespace SIL.Machine.QualityEstimation.Scores +namespace SIL.Machine.QualityEstimation { - public class Score + internal class Score { public Score(double slope, double confidence, double intercept) { diff --git a/src/SIL.Machine/QualityEstimation/Scores/VerseScore.cs b/src/SIL.Machine/QualityEstimation/Scores/VerseScore.cs deleted file mode 100644 index c8bdaf29..00000000 --- a/src/SIL.Machine/QualityEstimation/Scores/VerseScore.cs +++ /dev/null @@ -1,15 +0,0 @@ -using SIL.Scripture; - -namespace SIL.Machine.QualityEstimation.Scores -{ - public class VerseScore : Score - { - public VerseScore(double slope, double confidence, double intercept, VerseRef verseRef) - : base(slope, confidence, intercept) - { - VerseRef = verseRef; - } - - public VerseRef VerseRef { get; } - } -} diff --git a/src/SIL.Machine/QualityEstimation/Scores/SequenceScore.cs b/src/SIL.Machine/QualityEstimation/SequenceScore.cs similarity index 84% rename from src/SIL.Machine/QualityEstimation/Scores/SequenceScore.cs rename to src/SIL.Machine/QualityEstimation/SequenceScore.cs index 51560663..d85821b3 100644 --- a/src/SIL.Machine/QualityEstimation/Scores/SequenceScore.cs +++ b/src/SIL.Machine/QualityEstimation/SequenceScore.cs @@ -1,6 +1,6 @@ -namespace SIL.Machine.QualityEstimation.Scores +namespace SIL.Machine.QualityEstimation { - public class SequenceScore : Score + internal class SequenceScore : Score { public SequenceScore( double slope, diff --git a/src/SIL.Machine/QualityEstimation/SequenceUsability.cs b/src/SIL.Machine/QualityEstimation/SequenceUsability.cs new file mode 100644 index 00000000..828dc266 --- /dev/null +++ b/src/SIL.Machine/QualityEstimation/SequenceUsability.cs @@ -0,0 +1,19 @@ +namespace SIL.Machine.QualityEstimation +{ + public class SequenceUsability : TxtFileUsability + { + public SequenceUsability( + string targetDraftFile, + int sequenceNumber, + UsabilityLabel label, + double projectedChrF3, + double usability + ) + : base(targetDraftFile, label, projectedChrF3, usability) + { + SequenceNumber = sequenceNumber; + } + + public int SequenceNumber { get; } + } +} diff --git a/src/SIL.Machine/QualityEstimation/Scores/TxtFileScores.cs b/src/SIL.Machine/QualityEstimation/TxtFileScores.cs similarity index 93% rename from src/SIL.Machine/QualityEstimation/Scores/TxtFileScores.cs rename to src/SIL.Machine/QualityEstimation/TxtFileScores.cs index eb3c6895..8f5e563e 100644 --- a/src/SIL.Machine/QualityEstimation/Scores/TxtFileScores.cs +++ b/src/SIL.Machine/QualityEstimation/TxtFileScores.cs @@ -1,8 +1,8 @@ using System.Collections.Generic; -namespace SIL.Machine.QualityEstimation.Scores +namespace SIL.Machine.QualityEstimation { - public class TxtFileScores + internal class TxtFileScores { private readonly Dictionary> _sequenceUsabilities = new Dictionary>(); diff --git a/src/SIL.Machine/QualityEstimation/TxtFileUsability.cs b/src/SIL.Machine/QualityEstimation/TxtFileUsability.cs new file mode 100644 index 00000000..0c23a325 --- /dev/null +++ b/src/SIL.Machine/QualityEstimation/TxtFileUsability.cs @@ -0,0 +1,13 @@ +namespace SIL.Machine.QualityEstimation +{ + public class TxtFileUsability : UsabilityBase + { + public TxtFileUsability(string targetDraftFile, UsabilityLabel label, double projectedChrF3, double usability) + : base(label, projectedChrF3, usability) + { + TargetDraftFile = targetDraftFile; + } + + public string TargetDraftFile { get; } + } +} diff --git a/src/SIL.Machine/QualityEstimation/Usability/BookUsability.cs b/src/SIL.Machine/QualityEstimation/Usability/BookUsability.cs deleted file mode 100644 index 69ed2ff2..00000000 --- a/src/SIL.Machine/QualityEstimation/Usability/BookUsability.cs +++ /dev/null @@ -1,7 +0,0 @@ -namespace SIL.Machine.QualityEstimation.Usability -{ - public class BookUsability : UsabilityBase - { - public string Book { get; set; } - } -} diff --git a/src/SIL.Machine/QualityEstimation/Usability/ChapterUsability.cs b/src/SIL.Machine/QualityEstimation/Usability/ChapterUsability.cs deleted file mode 100644 index ab46cc27..00000000 --- a/src/SIL.Machine/QualityEstimation/Usability/ChapterUsability.cs +++ /dev/null @@ -1,7 +0,0 @@ -namespace SIL.Machine.QualityEstimation.Usability -{ - public class ChapterUsability : BookUsability - { - public int Chapter { get; set; } - } -} diff --git a/src/SIL.Machine/QualityEstimation/Usability/SequenceUsability.cs b/src/SIL.Machine/QualityEstimation/Usability/SequenceUsability.cs deleted file mode 100644 index 8295e70f..00000000 --- a/src/SIL.Machine/QualityEstimation/Usability/SequenceUsability.cs +++ /dev/null @@ -1,7 +0,0 @@ -namespace SIL.Machine.QualityEstimation.Usability -{ - public class SequenceUsability : TxtFileUsability - { - public int SequenceNumber { get; set; } - } -} diff --git a/src/SIL.Machine/QualityEstimation/Usability/TxtFileUsability.cs b/src/SIL.Machine/QualityEstimation/Usability/TxtFileUsability.cs deleted file mode 100644 index 1688b68e..00000000 --- a/src/SIL.Machine/QualityEstimation/Usability/TxtFileUsability.cs +++ /dev/null @@ -1,7 +0,0 @@ -namespace SIL.Machine.QualityEstimation.Usability -{ - public class TxtFileUsability : UsabilityBase - { - public string TargetDraftFile { get; set; } - } -} diff --git a/src/SIL.Machine/QualityEstimation/Usability/UsabilityBase.cs b/src/SIL.Machine/QualityEstimation/Usability/UsabilityBase.cs deleted file mode 100644 index 178e233a..00000000 --- a/src/SIL.Machine/QualityEstimation/Usability/UsabilityBase.cs +++ /dev/null @@ -1,11 +0,0 @@ -namespace SIL.Machine.QualityEstimation.Usability -{ - public abstract class UsabilityBase - { - public UsabilityLabel Label { get; set; } - - public double ProjectedChrF3 { get; set; } - - public double Usability { get; set; } - } -} diff --git a/src/SIL.Machine/QualityEstimation/Usability/VerseUsability.cs b/src/SIL.Machine/QualityEstimation/Usability/VerseUsability.cs deleted file mode 100644 index 16ca3ea6..00000000 --- a/src/SIL.Machine/QualityEstimation/Usability/VerseUsability.cs +++ /dev/null @@ -1,7 +0,0 @@ -namespace SIL.Machine.QualityEstimation.Usability -{ - public class VerseUsability : ChapterUsability - { - public string Verse { get; set; } - } -} diff --git a/src/SIL.Machine/QualityEstimation/UsabilityBase.cs b/src/SIL.Machine/QualityEstimation/UsabilityBase.cs new file mode 100644 index 00000000..da7ae05c --- /dev/null +++ b/src/SIL.Machine/QualityEstimation/UsabilityBase.cs @@ -0,0 +1,18 @@ +namespace SIL.Machine.QualityEstimation +{ + public abstract class UsabilityBase + { + protected UsabilityBase(UsabilityLabel label, double projectedChrF3, double usability) + { + Label = label; + ProjectedChrF3 = projectedChrF3; + Usability = usability; + } + + public UsabilityLabel Label { get; } + + public double ProjectedChrF3 { get; } + + public double Usability { get; } + } +} diff --git a/src/SIL.Machine/QualityEstimation/VerseScore.cs b/src/SIL.Machine/QualityEstimation/VerseScore.cs new file mode 100644 index 00000000..baa5d0a0 --- /dev/null +++ b/src/SIL.Machine/QualityEstimation/VerseScore.cs @@ -0,0 +1,15 @@ +using SIL.Machine.Corpora; + +namespace SIL.Machine.QualityEstimation +{ + internal class VerseScore : Score + { + public VerseScore(double slope, double confidence, double intercept, ScriptureRef scriptureRef) + : base(slope, confidence, intercept) + { + ScriptureRef = scriptureRef; + } + + public ScriptureRef ScriptureRef { get; } + } +} diff --git a/src/SIL.Machine/QualityEstimation/VerseUsability.cs b/src/SIL.Machine/QualityEstimation/VerseUsability.cs new file mode 100644 index 00000000..77c9c271 --- /dev/null +++ b/src/SIL.Machine/QualityEstimation/VerseUsability.cs @@ -0,0 +1,20 @@ +namespace SIL.Machine.QualityEstimation +{ + public class VerseUsability : ChapterUsability + { + public VerseUsability( + string book, + int chapter, + string verse, + UsabilityLabel label, + double projectedChrF3, + double usability + ) + : base(book, chapter, label, projectedChrF3, usability) + { + Verse = verse; + } + + public string Verse { get; } + } +} diff --git a/tests/SIL.Machine.Tests/QualityEstimation/QualityEstimationTests.cs b/tests/SIL.Machine.Tests/QualityEstimation/ChrF3QualityEstimationTests.cs similarity index 69% rename from tests/SIL.Machine.Tests/QualityEstimation/QualityEstimationTests.cs rename to tests/SIL.Machine.Tests/QualityEstimation/ChrF3QualityEstimationTests.cs index 0b1558e0..39db8af5 100644 --- a/tests/SIL.Machine.Tests/QualityEstimation/QualityEstimationTests.cs +++ b/tests/SIL.Machine.Tests/QualityEstimation/ChrF3QualityEstimationTests.cs @@ -1,21 +1,22 @@ using NUnit.Framework; +using SIL.Machine.Corpora; using SIL.Scripture; namespace SIL.Machine.QualityEstimation; [TestFixture] -public class QualityEstimationTests +public class ChrF3QualityEstimationTests { [Test] - public void QualityEstimation_TxtFiles() + public void ChrF3QualityEstimation_TxtFiles() { - var qualityEstimation = new QualityEstimation(slope: 0.6, intercept: 1.0); - var confidences = new Dictionary - { - ["MAT.txt:1"] = 85.0, - ["MAT.txt:2"] = 80.0, - ["MRK.txt:1"] = 60.0, - }; + var qualityEstimation = new ChrF3QualityEstimation(slope: 0.6, intercept: 1.0); + List<(MultiKeyRef Key, double Confidence)> confidences = + [ + (new MultiKeyRef("MAT.txt", 1), 85.0), + (new MultiKeyRef("MAT.txt", 2), 80.0), + (new MultiKeyRef("MRK.txt", 1), 60.0), + ]; qualityEstimation.EstimateQuality(confidences); using (Assert.EnterMultipleScope()) { @@ -30,15 +31,15 @@ public void QualityEstimation_TxtFiles() } [Test] - public void QualityEstimation_Verses() + public void ChrF3QualityEstimation_Verses() { - var qualityEstimation = new QualityEstimation(slope: 0.6, intercept: 1.0); - var confidences = new Dictionary - { - [new VerseRef(1, 1, 1)] = 85.0, - [new VerseRef(1, 1, 2)] = 80.0, - [new VerseRef(1, 2, 1)] = 60.0, - }; + var qualityEstimation = new ChrF3QualityEstimation(slope: 0.6, intercept: 1.0); + List<(ScriptureRef key, double confidence)> confidences = + [ + (new ScriptureRef(new VerseRef(1, 1, 1)), 85.0), + (new ScriptureRef(new VerseRef(1, 1, 2)), 80.0), + (new ScriptureRef(new VerseRef(1, 2, 1)), 60.0), + ]; qualityEstimation.EstimateQuality(confidences); using (Assert.EnterMultipleScope()) { From 8e5fbe86ee6b09538be5b96484ee7c34b5b6677c Mon Sep 17 00:00:00 2001 From: Peter Chapman Date: Thu, 26 Mar 2026 09:03:14 +1300 Subject: [PATCH 5/5] Refactored quality estimation to use tuples --- .../ChrF3QualityEstimation.cs | 111 ++++++++---------- .../ChrF3QualityEstimationTests.cs | 41 ++++--- 2 files changed, 75 insertions(+), 77 deletions(-) diff --git a/src/SIL.Machine/QualityEstimation/ChrF3QualityEstimation.cs b/src/SIL.Machine/QualityEstimation/ChrF3QualityEstimation.cs index 88bf0e91..e3f7d7b5 100644 --- a/src/SIL.Machine/QualityEstimation/ChrF3QualityEstimation.cs +++ b/src/SIL.Machine/QualityEstimation/ChrF3QualityEstimation.cs @@ -50,49 +50,32 @@ public ChrF3QualityEstimation(double slope, double intercept) /// public UsabilityParameters Unusable { get; set; } = UsabilityParameters.Unusable; - /// - /// The usability scores for every book. - /// - public List UsabilityBooks { get; } = new List(); - - /// - /// The usability scores for every chapter. - /// - public List UsabilityChapters { get; } = new List(); - - /// - /// The usability scores for every line in a text file. - /// - public List UsabilitySequences { get; } = new List(); - - /// - /// The usability scores for every text file. - /// - public List UsabilityTxtFiles { get; } = new List(); - - /// - /// The usability scores for every verse. - /// - public List UsabilityVerses { get; } = new List(); - /// /// Estimate the quality of the pre-translations from text files. /// /// The confidence values. - public void EstimateQuality(IEnumerable<(MultiKeyRef key, double confidence)> confidences) + /// The usability scores for every line in the text files, and for the text files. + public (List usabilitySequences, List usabilityTxtFiles) EstimateQuality( + IEnumerable<(MultiKeyRef key, double confidence)> confidences + ) { ProjectChrF3(confidences); - ComputeSequenceUsability(); + return ComputeSequenceUsability(); } /// /// Estimate the quality of the pre-translations from USFM files. /// /// The confidence values. - public void EstimateQuality(IEnumerable<(ScriptureRef key, double confidence)> confidences) + /// The usability scores for every verse, chapter, and book. + public ( + List usabilityVerses, + List usabilityChapters, + List usabilityBooks + ) EstimateQuality(IEnumerable<(ScriptureRef key, double confidence)> confidences) { ProjectChrF3(confidences); - ComputeVerseUsability(); + return ComputeVerseUsability(); } /// @@ -119,8 +102,9 @@ private double CalculateUsableProbability(double chrF3) return usableWeight / (usableWeight + unusableWeight); } - private void ComputeBookUsability() + private List ComputeBookUsability() { + var usabilityBooks = new List(); foreach (string book in _bookScores.Scores.Keys) { Score score = _bookScores.GetScore(book); @@ -129,7 +113,7 @@ private void ComputeBookUsability() List bookUsabilities = _bookScores.GetVerseUsabilities(book); double averageProbability = bookUsabilities.Average(); - UsabilityBooks.Add( + usabilityBooks.Add( new BookUsability( book, label: BookThresholds.ReturnLabel(averageProbability), @@ -138,10 +122,13 @@ private void ComputeBookUsability() ) ); } + + return usabilityBooks; } - private void ComputeChapterUsability() + private List ComputeChapterUsability() { + var usabilityChapters = new List(); foreach (KeyValuePair> chapterScoresByBook in _chapterScores.Scores) { string book = chapterScoresByBook.Key; @@ -153,7 +140,7 @@ private void ComputeChapterUsability() List chapterUsabilities = _chapterScores.GetVerseUsabilities(book, chapter); double averageProbability = chapterUsabilities.Average(); - UsabilityChapters.Add( + usabilityChapters.Add( new ChapterUsability( book, chapter, @@ -164,10 +151,34 @@ private void ComputeChapterUsability() ); } } + + return usabilityChapters; + } + + private (List, List) ComputeSequenceUsability() + { + var usabilitySequences = new List(); + foreach (SequenceScore sequenceScore in _sequenceScores) + { + double probability = CalculateUsableProbability(sequenceScore.ProjectedChrF3); + _txtFileScores.AppendSequenceUsability(sequenceScore.TargetDraftFileStem, probability); + usabilitySequences.Add( + new SequenceUsability( + targetDraftFile: sequenceScore.TargetDraftFileStem, + sequenceNumber: sequenceScore.SequenceNumber, + label: VerseThresholds.ReturnLabel(probability), + usability: probability, + projectedChrF3: sequenceScore.ProjectedChrF3 + ) + ); + } + + return (usabilitySequences, ComputeTxtFileUsability()); } - private void ComputeTxtFileUsability() + private List ComputeTxtFileUsability() { + var usabilityTxtFiles = new List(); foreach (string targetDraftFileStem in _txtFileScores.Scores.Keys) { Score score = _txtFileScores.GetScore(targetDraftFileStem); @@ -176,7 +187,7 @@ private void ComputeTxtFileUsability() List txtFileUsabilities = _txtFileScores.GetSequenceUsabilities(targetDraftFileStem); double averageProbability = txtFileUsabilities.Average(); - UsabilityTxtFiles.Add( + usabilityTxtFiles.Add( new TxtFileUsability( targetDraftFileStem, label: BookThresholds.ReturnLabel(averageProbability), @@ -185,10 +196,13 @@ private void ComputeTxtFileUsability() ) ); } + + return usabilityTxtFiles; } - private void ComputeVerseUsability() + private (List, List, List) ComputeVerseUsability() { + var usabilityVerses = new List(); foreach (VerseScore verseScore in _verseScores.Where(v => v.ScriptureRef.VerseNum > 0)) { double probability = CalculateUsableProbability(verseScore.ProjectedChrF3); @@ -198,7 +212,7 @@ private void ComputeVerseUsability() probability ); _bookScores.AppendVerseUsability(verseScore.ScriptureRef.Book, probability); - UsabilityVerses.Add( + usabilityVerses.Add( new VerseUsability( book: verseScore.ScriptureRef.Book, chapter: verseScore.ScriptureRef.ChapterNum, @@ -210,28 +224,7 @@ private void ComputeVerseUsability() ); } - ComputeChapterUsability(); - ComputeBookUsability(); - } - - private void ComputeSequenceUsability() - { - foreach (SequenceScore sequenceScore in _sequenceScores) - { - double probability = CalculateUsableProbability(sequenceScore.ProjectedChrF3); - _txtFileScores.AppendSequenceUsability(sequenceScore.TargetDraftFileStem, probability); - UsabilitySequences.Add( - new SequenceUsability( - targetDraftFile: sequenceScore.TargetDraftFileStem, - sequenceNumber: sequenceScore.SequenceNumber, - label: VerseThresholds.ReturnLabel(probability), - usability: probability, - projectedChrF3: sequenceScore.ProjectedChrF3 - ) - ); - } - - ComputeTxtFileUsability(); + return (usabilityVerses, ComputeChapterUsability(), ComputeBookUsability()); } private void ProjectChrF3(IEnumerable<(MultiKeyRef, double)> confidences) diff --git a/tests/SIL.Machine.Tests/QualityEstimation/ChrF3QualityEstimationTests.cs b/tests/SIL.Machine.Tests/QualityEstimation/ChrF3QualityEstimationTests.cs index 39db8af5..da09a074 100644 --- a/tests/SIL.Machine.Tests/QualityEstimation/ChrF3QualityEstimationTests.cs +++ b/tests/SIL.Machine.Tests/QualityEstimation/ChrF3QualityEstimationTests.cs @@ -17,16 +17,17 @@ public void ChrF3QualityEstimation_TxtFiles() (new MultiKeyRef("MAT.txt", 2), 80.0), (new MultiKeyRef("MRK.txt", 1), 60.0), ]; - qualityEstimation.EstimateQuality(confidences); + (List usabilitySequences, List usabilityTxtFiles) = + qualityEstimation.EstimateQuality(confidences); using (Assert.EnterMultipleScope()) { - Assert.That(qualityEstimation.UsabilitySequences, Has.Count.EqualTo(3)); - Assert.That(qualityEstimation.UsabilitySequences[0].Label, Is.EqualTo(UsabilityLabel.Green)); - Assert.That(qualityEstimation.UsabilitySequences[1].Label, Is.EqualTo(UsabilityLabel.Yellow)); - Assert.That(qualityEstimation.UsabilitySequences[2].Label, Is.EqualTo(UsabilityLabel.Red)); - Assert.That(qualityEstimation.UsabilityTxtFiles, Has.Count.EqualTo(2)); - Assert.That(qualityEstimation.UsabilityTxtFiles[0].Label, Is.EqualTo(UsabilityLabel.Green)); - Assert.That(qualityEstimation.UsabilityTxtFiles[1].Label, Is.EqualTo(UsabilityLabel.Red)); + Assert.That(usabilitySequences, Has.Count.EqualTo(3)); + Assert.That(usabilitySequences[0].Label, Is.EqualTo(UsabilityLabel.Green)); + Assert.That(usabilitySequences[1].Label, Is.EqualTo(UsabilityLabel.Yellow)); + Assert.That(usabilitySequences[2].Label, Is.EqualTo(UsabilityLabel.Red)); + Assert.That(usabilityTxtFiles, Has.Count.EqualTo(2)); + Assert.That(usabilityTxtFiles[0].Label, Is.EqualTo(UsabilityLabel.Green)); + Assert.That(usabilityTxtFiles[1].Label, Is.EqualTo(UsabilityLabel.Red)); } } @@ -40,18 +41,22 @@ public void ChrF3QualityEstimation_Verses() (new ScriptureRef(new VerseRef(1, 1, 2)), 80.0), (new ScriptureRef(new VerseRef(1, 2, 1)), 60.0), ]; - qualityEstimation.EstimateQuality(confidences); + ( + List usabilityVerses, + List usabilityChapters, + List usabilityBooks + ) = qualityEstimation.EstimateQuality(confidences); using (Assert.EnterMultipleScope()) { - Assert.That(qualityEstimation.UsabilityVerses, Has.Count.EqualTo(3)); - Assert.That(qualityEstimation.UsabilityVerses[0].Label, Is.EqualTo(UsabilityLabel.Green)); - Assert.That(qualityEstimation.UsabilityVerses[1].Label, Is.EqualTo(UsabilityLabel.Yellow)); - Assert.That(qualityEstimation.UsabilityVerses[2].Label, Is.EqualTo(UsabilityLabel.Red)); - Assert.That(qualityEstimation.UsabilityChapters, Has.Count.EqualTo(2)); - Assert.That(qualityEstimation.UsabilityChapters[0].Label, Is.EqualTo(UsabilityLabel.Green)); - Assert.That(qualityEstimation.UsabilityChapters[1].Label, Is.EqualTo(UsabilityLabel.Red)); - Assert.That(qualityEstimation.UsabilityBooks, Has.Count.EqualTo(1)); - Assert.That(qualityEstimation.UsabilityBooks[0].Label, Is.EqualTo(UsabilityLabel.Yellow)); + Assert.That(usabilityVerses, Has.Count.EqualTo(3)); + Assert.That(usabilityVerses[0].Label, Is.EqualTo(UsabilityLabel.Green)); + Assert.That(usabilityVerses[1].Label, Is.EqualTo(UsabilityLabel.Yellow)); + Assert.That(usabilityVerses[2].Label, Is.EqualTo(UsabilityLabel.Red)); + Assert.That(usabilityChapters, Has.Count.EqualTo(2)); + Assert.That(usabilityChapters[0].Label, Is.EqualTo(UsabilityLabel.Green)); + Assert.That(usabilityChapters[1].Label, Is.EqualTo(UsabilityLabel.Red)); + Assert.That(usabilityBooks, Has.Count.EqualTo(1)); + Assert.That(usabilityBooks[0].Label, Is.EqualTo(UsabilityLabel.Yellow)); } } }