|
| 1 | +import numpy as np |
| 2 | +from collections import Counter |
| 3 | + |
| 4 | +def meteor_score(reference, candidate, alpha=0.9, beta=3, gamma=0.5): |
| 5 | + if not reference or not candidate: |
| 6 | + raise ValueError("Reference and candidate cannot be empty") |
| 7 | + |
| 8 | + # Tokenize and count |
| 9 | + ref_tokens = reference.lower().split() |
| 10 | + cand_tokens = candidate.lower().split() |
| 11 | + |
| 12 | + # Counter for unigram for reference and candidate |
| 13 | + ref_counts = Counter(ref_tokens) |
| 14 | + cand_counts = Counter(cand_tokens) |
| 15 | + |
| 16 | + # Calculate matches |
| 17 | + num_matches = sum((ref_counts & cand_counts).values()) # Number of matching words in candidate and reference |
| 18 | + ref_len = len(ref_tokens) |
| 19 | + cand_len = len(cand_tokens) |
| 20 | + |
| 21 | + # Unigram Precision and Recall |
| 22 | + precision = num_matches / cand_len if cand_len > 0 else 0 # Avoiding Division by zero |
| 23 | + recall = num_matches / ref_len if ref_len > 0 else 0 # Avoiding Division by zero |
| 24 | + |
| 25 | + if num_matches == 0: |
| 26 | + return 0.0 |
| 27 | + |
| 28 | + fmean = (precision * recall) / (alpha * precision + (1 - alpha) * recall) |
| 29 | + |
| 30 | + # Chunk calculation |
| 31 | + matched_positions = [] |
| 32 | + ref_positions = {} # Store positions of words in reference |
| 33 | + used_positions = set() # Track already used indices |
| 34 | + |
| 35 | + # Populate reference positions for word alignment tracking |
| 36 | + for i, word in enumerate(ref_tokens): |
| 37 | + ref_positions.setdefault(word, []).append(i) |
| 38 | + |
| 39 | + # Determine the sequence of matched positions in reference |
| 40 | + for word in cand_tokens: |
| 41 | + if word in ref_positions: |
| 42 | + for pos in ref_positions[word]: |
| 43 | + if pos not in used_positions: |
| 44 | + matched_positions.append(pos) |
| 45 | + used_positions.add(pos) |
| 46 | + break # Ensure each match is used only once |
| 47 | + |
| 48 | + # Count chunks by detecting breaks in position sequence |
| 49 | + num_chunks = 1 if matched_positions else 0 |
| 50 | + for i in range(1, len(matched_positions)): |
| 51 | + if matched_positions[i] != matched_positions[i - 1] + 1: |
| 52 | + num_chunks += 1 # Break in sequence → new chunk |
| 53 | + |
| 54 | + # Fragmentation penalty |
| 55 | + penalty = gamma * ((num_chunks / num_matches) ** beta) if num_matches > 0 else 0 |
| 56 | + |
| 57 | + # Final score |
| 58 | + return round(fmean * (1 - penalty), 3) # Rounding to 3 Decimal places |
| 59 | + |
| 60 | +def test_meteor_score(): |
| 61 | + # Test Case 1: Identical translations |
| 62 | + ref_test1 = "The cat sits on the mat" |
| 63 | + cand_test1 = "The cat sits on the mat" |
| 64 | + expected1 = 1.0 |
| 65 | + assert meteor_score(ref_test1, cand_test1) == expected1, "Test Case 1 Failed" |
| 66 | + |
| 67 | + # Test Case 2: Similar translations |
| 68 | + ref_test2 = "The quick brown fox jumps over the lazy dog" |
| 69 | + cand_test2 = "A quick brown fox jumps over a lazy dog" |
| 70 | + expected2 = 0.991 |
| 71 | + assert meteor_score(ref_test2, cand_test2) == expected2, "Test Case 2 Failed" |
| 72 | + |
| 73 | + # Test Case 3: Completely different translations |
| 74 | + ref_test3 = "The cat sits on the mat" |
| 75 | + cand_test3 = "Dogs run in the park" |
| 76 | + expected3 = 0.0 |
| 77 | + assert meteor_score(ref_test3, cand_test3) == expected3, "Test Case 3 Failed" |
| 78 | + |
| 79 | + # Test Case 4: Partially matching translations |
| 80 | + ref_test4 = "Machine learning is an exciting field" |
| 81 | + cand_test4 = "Machine learning algorithms are fascinating" |
| 82 | + expected4 = 0.667 |
| 83 | + assert meteor_score(ref_test4, cand_test4) == expected4, "Test Case 4 Failed" |
| 84 | + |
| 85 | + # Test Case 5: Empty input handling |
| 86 | + try: |
| 87 | + meteor_score("", "Some text") |
| 88 | + assert False, "Test Case 5 Failed" |
| 89 | + except ValueError: |
| 90 | + pass |
| 91 | + |
| 92 | + # Test Case 6: Partial match with penalty |
| 93 | + ref_test6 = "The cat sits on the mat" |
| 94 | + cand_test6 = "The cat on the mat sits" |
| 95 | + expected6 = 0.933 |
| 96 | + assert meteor_score(ref_test6, cand_test6) == expected6, "Test Case 6 Failed" |
| 97 | + |
| 98 | +if __name__ == "__main__": |
| 99 | + test_meteor_score() |
| 100 | + print("All Test Cases Passed!") |
0 commit comments