src/jiwer/alignment.py

            
                      Original file line number
                      Diff line number
                      Diff line change
                  
    @@ -21,7 +21,7 @@
  
    pairs.

    """

    from typing import List, Union

    from typing import List, Union, Optional

    from jiwer.process import CharacterOutput, WordOutput, AlignmentChunk

    @@ -32,6 +32,7 @@ def visualize_alignment(
  
        output: Union[WordOutput, CharacterOutput],

        show_measures: bool = True,

        skip_correct: bool = True,

        line_width: Optional[int] = None,

    ) -> str:

        """

        Visualize the output of [jiwer.process_words][process.process_words] and

    @@ -45,6 +46,7 @@ def visualize_alignment(
  
            show_measures: If enabled, the visualization will include measures like the WER

                           or CER

            skip_correct: If enabled, the visualization will exclude correct reference and hypothesis pairs

            line_width: If set, try, at best effort, to spit sentences into multiple lines if they exceed the width.

        Returns:

            (str): The visualization as a string

    @@ -95,6 +97,18 @@ def visualize_alignment(
  
            HYP: quite * bit of an even longest sentence here

                       D         I    I       S             I

            ```

            When setting `line_width=80`, the following output will be split into multiple lines:

            ```txt

            sentence 1

            REF: This is a very  long sentence that is *** much longer than the previous one

            HYP: This is a very loong sentence that is not much longer than the previous one

                                    S                    I

            REF: or the one before that

            HYP: or *** one before that

                      D

            ```

        """

        references = output.references

        hypothesis = output.hypotheses

    @@ -110,7 +124,7 @@ def visualize_alignment(
  
            final_str += f"sentence {idx+1}\n"

            final_str += _construct_comparison_string(

                gt, hp, chunks, include_space_seperator=not is_cer

                gt, hp, chunks, include_space_seperator=not is_cer, line_width=line_width

            )

            final_str += "\n"

    @@ -140,10 +154,12 @@ def _construct_comparison_string(
  
        hypothesis: List[str],

        ops: List[AlignmentChunk],

        include_space_seperator: bool = False,

        line_width: Optional[int] = None,

    ) -> str:

        ref_str = "REF: "

        hyp_str = "HYP: "

        op_str = "     "

        agg_str = ""  # aggregate string for max_chars split

        for op in ops:

            if op.type == "equal" or op.type == "substitute":

    @@ -165,6 +181,19 @@ def _construct_comparison_string(
  
            for rf, hp, c in zip(ref, hyp, op_chars):

                str_len = max(len(rf), len(hp), len(c))

                if line_width is not None:

                    if len(ref_str) + str_len > line_width:

                        # aggregate the strings

                        if include_space_seperator:

                            agg_str += f"{ref_str[:-1]}\n{hyp_str[:-1]}\n{op_str[:-1]}\n\n"

                        else:

                            agg_str += f"{ref_str}\n{hyp_str}\n{op_str}\n\n"

                        # reset the strings

                        ref_str = "REF: "

                        hyp_str = "HYP: "

                        op_str = "     "

                if rf == "*":

                    rf = "".join(["*"] * str_len)

                elif hp == "*":

    @@ -181,6 +210,6 @@ def _construct_comparison_string(
  
        if include_space_seperator:

            # remove last space

            return f"{ref_str[:-1]}\n{hyp_str[:-1]}\n{op_str[:-1]}\n"

            return agg_str + f"{ref_str[:-1]}\n{hyp_str[:-1]}\n{op_str[:-1]}\n"

        else:

            return f"{ref_str}\n{hyp_str}\n{op_str}\n"

            return agg_str + f"{ref_str}\n{hyp_str}\n{op_str}\n"

tests/test_alignment.py

-Original file line number
+Diff line change
@@ -1,5 +1,6 @@
     import unittest
     import jiwer
+    from jiwer import visualize_alignment
     class TestAlignmentVisualizationWords(unittest.TestCase):
@@ Expand Down Expand Up / @@ -143,6 +144,25 @@ def test_empty_ref_with_hyp_deletion(self): @@
             )
             self.assertEqual(alignment, correct_alignment)
+        def test_line_width(self):
+            correct = """sentence 1
+    REF: this sentence could be
+    HYP: this sentence  will be
+                           S
+    REF: split ** **
+    HYP: split by ai
+                I  I
+    """
+            alignment = visualize_alignment(
+                jiwer.process_words(
+                    "this sentence could be split", "this sentence will be split by ai"
+                ),
+                line_width=30,
+                show_measures=False,
+            )
+            self.assertEqual(correct, alignment)
     class TestAlignmentVisualizationCharacters(unittest.TestCase):
         def test_insertion(self):
@@ Expand Down Expand Up / @@ -263,3 +283,22 @@ def test_empty_ref_with_hyp_deletion(self): @@
                 show_measures=False,
             )
             self.assertEqual(alignment, correct_alignment)
+        def test_line_width(self):
+            correct = """sentence 1
+    REF: this sentence could be sp
+    HYP: this sentence will* be sp
+                       SSS D
+    REF: lit******
+    HYP: lit by ai
+            IIIIII
+    """
+            alignment = visualize_alignment(
+                jiwer.process_characters(
+                    "this sentence could be split", "this sentence will be split by ai"
+                ),
+                line_width=30,
+                show_measures=False,
+            )
+            self.assertEqual(correct, alignment)

feat: optionally split alignment visualization #110

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged

nikvaessen merged 2 commits into jitsi:master from nikvaessen:split_lines

Feb 2, 2025

-Original file line number
+Diff line change
@@ -1,5 +1,6 @@
     import unittest
     import jiwer
+    from jiwer import visualize_alignment
     class TestAlignmentVisualizationWords(unittest.TestCase):
@@ Expand Down Expand Up / @@ -143,6 +144,25 @@ def test_empty_ref_with_hyp_deletion(self): @@
             )
             self.assertEqual(alignment, correct_alignment)
+        def test_line_width(self):
+            correct = """sentence 1
+    REF: this sentence could be
+    HYP: this sentence  will be
+                           S
+    REF: split ** **
+    HYP: split by ai
+                I  I
+    """
+            alignment = visualize_alignment(
+                jiwer.process_words(
+                    "this sentence could be split", "this sentence will be split by ai"
+                ),
+                line_width=30,
+                show_measures=False,
+            )
+            self.assertEqual(correct, alignment)
     class TestAlignmentVisualizationCharacters(unittest.TestCase):
         def test_insertion(self):
@@ Expand Down Expand Up / @@ -263,3 +283,22 @@ def test_empty_ref_with_hyp_deletion(self): @@
                 show_measures=False,
             )
             self.assertEqual(alignment, correct_alignment)
+        def test_line_width(self):
+            correct = """sentence 1
+    REF: this sentence could be sp
+    HYP: this sentence will* be sp
+                       SSS D
+    REF: lit******
+    HYP: lit by ai
+            IIIIII
+    """
+            alignment = visualize_alignment(
+                jiwer.process_characters(
+                    "this sentence could be split", "this sentence will be split by ai"
+                ),
+                line_width=30,
+                show_measures=False,
+            )
+            self.assertEqual(correct, alignment)

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

feat: optionally split alignment visualization #110

Uh oh!

Diff view

Diff view

There are no files selected for viewing