From 952f3f03e59b40f10f6c934e4fba4d0aea70565d Mon Sep 17 00:00:00 2001 From: mahdibaghbanzadeh Date: Wed, 12 Mar 2025 16:09:53 -0400 Subject: [PATCH] Fix Filtering Logic for Normalized WER Calculation --- chapters/en/chapter5/evaluation.mdx | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) diff --git a/chapters/en/chapter5/evaluation.mdx b/chapters/en/chapter5/evaluation.mdx index 88875db..e2f2e7b 100644 --- a/chapters/en/chapter5/evaluation.mdx +++ b/chapters/en/chapter5/evaluation.mdx @@ -348,18 +348,16 @@ normalizer = BasicTextNormalizer() all_predictions_norm = [normalizer(pred) for pred in all_predictions] all_references_norm = [normalizer(label) for label in common_voice_test["sentence"]] -# filtering step to only evaluate the samples that correspond to non-zero references -all_predictions_norm = [ - all_predictions_norm[i] - for i in range(len(all_predictions_norm)) - if len(all_references_norm[i]) > 0 -] -all_references_norm = [ - all_references_norm[i] +# Filtering step to only evaluate the samples that correspond to non-zero references +filtered_pairs = [ + (all_predictions_norm[i], all_references_norm[i]) for i in range(len(all_references_norm)) if len(all_references_norm[i]) > 0 ] +# Unpack filtered lists +all_predictions_norm, all_references_norm = zip(*filtered_pairs) if filtered_pairs else ([], []) + wer = 100 * wer_metric.compute( references=all_references_norm, predictions=all_predictions_norm )