Skip to content

Commit a33b713

Browse files
committed
adapt tests
1 parent ca5de57 commit a33b713

8 files changed

+14
-14
lines changed

src/dinglehopper/tests/test_character_error_rate.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -14,9 +14,9 @@ def test_character_error_rate():
1414
assert character_error_rate("Foo", "") == 3 / 3
1515

1616
assert character_error_rate("", "") == 0
17-
assert math.isinf(character_error_rate("", "Foo"))
17+
assert character_error_rate("", "Foo") == 3 / 3
1818

19-
assert character_error_rate("Foo", "Food") == 1 / 3
19+
assert character_error_rate("Foo", "Food") == 1 / 4
2020
assert character_error_rate("Fnord", "Food") == 2 / 5
2121
assert character_error_rate("Müll", "Mull") == 1 / 4
2222
assert character_error_rate("Abstand", "Sand") == 4 / 7

src/dinglehopper/tests/test_edit_distance.py

+3-3
Original file line numberDiff line numberDiff line change
@@ -6,8 +6,8 @@
66

77

88
def test_distance():
9-
assert distance("Fnord", "Food") == 2
10-
assert distance("Müll", "Mull") == 1
9+
assert distance("Fnord", "Food") == 2 / 5
10+
assert distance("Müll", "Mull") == 1 / 4
1111

1212
word1 = unicodedata.normalize("NFC", "Schlyñ")
1313
word2 = unicodedata.normalize("NFD", "Schlyñ") # Different, decomposed!
@@ -21,4 +21,4 @@ def test_distance():
2121
assert (
2222
len(word2) == 7
2323
) # This, OTOH, ends with LATIN SMALL LETTER M + COMBINING TILDE, 7 code points
24-
assert distance(word1, word2) == 1
24+
assert distance(word1, word2) == 1 / 6

src/dinglehopper/tests/test_integ_character_error_rate_ocr.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -56,4 +56,4 @@ def test_character_error_rate_between_page_alto_2():
5656
)
5757
)
5858

59-
assert character_error_rate(gt, ocr) == 8 / 591 # Manually verified
59+
assert character_error_rate(gt, ocr) == 8 / 594 # Manually verified

src/dinglehopper/tests/test_integ_cli_valid_json.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -32,11 +32,11 @@ def test_cli_json_cer_is_infinity(tmp_path):
3232

3333
with working_directory(tmp_path):
3434
with open("gt.txt", "w") as gtf:
35-
gtf.write("") # Empty to yield CER == inf
35+
gtf.write("")
3636
with open("ocr.txt", "w") as ocrf:
3737
ocrf.write("Not important")
3838

3939
process("gt.txt", "ocr.txt", "report")
4040
with open("report.json", "r") as jsonf:
4141
j = json.load(jsonf)
42-
assert j["cer"] == pytest.approx(float("inf"))
42+
assert j["cer"] == pytest.approx(1.0)

src/dinglehopper/tests/test_integ_edit_distance_ocr.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@ def test_distance_between_page_files():
1717
# → 2 differences
1818
gt = page_text(ET.parse(os.path.join(data_dir, "test-gt.page2018.xml")))
1919
ocr = page_text(ET.parse(os.path.join(data_dir, "test-fake-ocr.page2018.xml")))
20-
assert distance(gt, ocr) == 2
20+
assert distance(gt, ocr) == 2 / 827
2121

2222

2323
@pytest.mark.integration
@@ -52,4 +52,4 @@ def test_distance_between_page_alto_2():
5252
)
5353
)
5454

55-
assert distance(gt, ocr) == 8 # Manually verified
55+
assert distance(gt, ocr) == 8 / 594 # Manually verified

src/dinglehopper/tests/test_integ_empty_files.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -12,9 +12,9 @@
1212
@pytest.mark.parametrize(
1313
"gt_file_content,ocr_file_content,cer_expected",
1414
[
15-
("", "Lorem ipsum", math.inf),
15+
("", "Lorem ipsum", 1.0),
1616
("Lorem ipsum", "", 1.0),
17-
("\ufeff", "Lorem ipsum", math.inf),
17+
("\ufeff", "Lorem ipsum", 1.0),
1818
("Lorem ipsum", "\ufeff", 1.0),
1919
("", "", 0.0),
2020
("\ufeff", "", 0.0),

src/dinglehopper/tests/test_integ_word_error_rate_ocr.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -64,5 +64,5 @@ def test_word_error_rate_between_page_alto_2():
6464
)
6565

6666
assert (
67-
word_error_rate(gt, ocr) == 7 / gt_word_count
67+
word_error_rate(gt, ocr) == 7 / (gt_word_count + 1)
6868
) # Manually verified, 6 words are wrong, 1 got split (=2 errors)

src/dinglehopper/tests/test_word_error_rate.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -76,7 +76,7 @@ def test_word_error_rate():
7676
)
7777

7878
assert word_error_rate("Dies ist ein Beispielsatz!", "") == 4 / 4
79-
assert math.isinf(word_error_rate("", "Dies ist ein Beispielsatz!"))
79+
assert word_error_rate("", "Dies ist ein Beispielsatz!") == 4 / 4
8080
assert word_error_rate("", "") == 0
8181

8282
assert (

0 commit comments

Comments
 (0)