Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
37 changes: 33 additions & 4 deletions src/jiwer/alignment.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@
pairs.
"""

from typing import List, Union
from typing import List, Union, Optional

from jiwer.process import CharacterOutput, WordOutput, AlignmentChunk

Expand All @@ -32,6 +32,7 @@ def visualize_alignment(
output: Union[WordOutput, CharacterOutput],
show_measures: bool = True,
skip_correct: bool = True,
line_width: Optional[int] = None,
) -> str:
"""
Visualize the output of [jiwer.process_words][process.process_words] and
Expand All @@ -45,6 +46,7 @@ def visualize_alignment(
show_measures: If enabled, the visualization will include measures like the WER
or CER
skip_correct: If enabled, the visualization will exclude correct reference and hypothesis pairs
line_width: If set, try, at best effort, to spit sentences into multiple lines if they exceed the width.

Returns:
(str): The visualization as a string
Expand Down Expand Up @@ -95,6 +97,18 @@ def visualize_alignment(
HYP: quite * bit of an even longest sentence here
D I I S I
```

When setting `line_width=80`, the following output will be split into multiple lines:

```txt
sentence 1
REF: This is a very long sentence that is *** much longer than the previous one
HYP: This is a very loong sentence that is not much longer than the previous one
S I
REF: or the one before that
HYP: or *** one before that
D
```
"""
references = output.references
hypothesis = output.hypotheses
Expand All @@ -110,7 +124,7 @@ def visualize_alignment(

final_str += f"sentence {idx+1}\n"
final_str += _construct_comparison_string(
gt, hp, chunks, include_space_seperator=not is_cer
gt, hp, chunks, include_space_seperator=not is_cer, line_width=line_width
)
final_str += "\n"

Expand Down Expand Up @@ -140,10 +154,12 @@ def _construct_comparison_string(
hypothesis: List[str],
ops: List[AlignmentChunk],
include_space_seperator: bool = False,
line_width: Optional[int] = None,
) -> str:
ref_str = "REF: "
hyp_str = "HYP: "
op_str = " "
agg_str = "" # aggregate string for max_chars split

for op in ops:
if op.type == "equal" or op.type == "substitute":
Expand All @@ -165,6 +181,19 @@ def _construct_comparison_string(
for rf, hp, c in zip(ref, hyp, op_chars):
str_len = max(len(rf), len(hp), len(c))

if line_width is not None:
if len(ref_str) + str_len > line_width:
# aggregate the strings
if include_space_seperator:
agg_str += f"{ref_str[:-1]}\n{hyp_str[:-1]}\n{op_str[:-1]}\n\n"
else:
agg_str += f"{ref_str}\n{hyp_str}\n{op_str}\n\n"

# reset the strings
ref_str = "REF: "
hyp_str = "HYP: "
op_str = " "

if rf == "*":
rf = "".join(["*"] * str_len)
elif hp == "*":
Expand All @@ -181,6 +210,6 @@ def _construct_comparison_string(

if include_space_seperator:
# remove last space
return f"{ref_str[:-1]}\n{hyp_str[:-1]}\n{op_str[:-1]}\n"
return agg_str + f"{ref_str[:-1]}\n{hyp_str[:-1]}\n{op_str[:-1]}\n"
else:
return f"{ref_str}\n{hyp_str}\n{op_str}\n"
return agg_str + f"{ref_str}\n{hyp_str}\n{op_str}\n"
39 changes: 39 additions & 0 deletions tests/test_alignment.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import unittest
import jiwer
from jiwer import visualize_alignment


class TestAlignmentVisualizationWords(unittest.TestCase):
Expand Down Expand Up @@ -143,6 +144,25 @@ def test_empty_ref_with_hyp_deletion(self):
)
self.assertEqual(alignment, correct_alignment)

def test_line_width(self):
correct = """sentence 1
REF: this sentence could be
HYP: this sentence will be
S

REF: split ** **
HYP: split by ai
I I
"""
alignment = visualize_alignment(
jiwer.process_words(
"this sentence could be split", "this sentence will be split by ai"
),
line_width=30,
show_measures=False,
)
self.assertEqual(correct, alignment)


class TestAlignmentVisualizationCharacters(unittest.TestCase):
def test_insertion(self):
Expand Down Expand Up @@ -263,3 +283,22 @@ def test_empty_ref_with_hyp_deletion(self):
show_measures=False,
)
self.assertEqual(alignment, correct_alignment)

def test_line_width(self):
correct = """sentence 1
REF: this sentence could be sp
HYP: this sentence will* be sp
SSS D

REF: lit******
HYP: lit by ai
IIIIII
"""
alignment = visualize_alignment(
jiwer.process_characters(
"this sentence could be split", "this sentence will be split by ai"
),
line_width=30,
show_measures=False,
)
self.assertEqual(correct, alignment)