9
9
10
10
11
11
@multimethod
12
- def distance (seq1 : List [str ], seq2 : List [str ]) -> int :
12
+ def distance (seq1 : List [str ], seq2 : List [str ]) -> float :
13
13
"""Compute the Levenshtein edit distance between two lists of grapheme clusters.
14
14
15
15
This assumes that the grapheme clusters are already normalized.
16
16
17
17
Use distance(str, str) instead if you need to compare two Unicode strings.
18
18
"""
19
- return Levenshtein .distance (seq1 , seq2 )
19
+ return Levenshtein .normalized_distance (seq1 , seq2 )
20
20
21
21
22
22
@distance .register
23
- def _ (s1 : str , s2 : str ) -> int :
23
+ def _ (s1 : str , s2 : str ) -> float :
24
24
"""Compute the Levenshtein edit distance between two Unicode strings
25
25
26
26
Note that this is different from levenshtein() as this function knows about Unicode
@@ -29,12 +29,12 @@ def _(s1: str, s2: str) -> int:
29
29
"""
30
30
seq1 = list (grapheme_clusters (unicodedata .normalize ("NFC" , s1 )))
31
31
seq2 = list (grapheme_clusters (unicodedata .normalize ("NFC" , s2 )))
32
- return Levenshtein .distance (seq1 , seq2 )
32
+ return Levenshtein .normalized_distance (seq1 , seq2 )
33
33
34
34
35
35
@distance .register
36
- def _ (s1 : ExtractedText , s2 : ExtractedText ) -> int :
37
- return Levenshtein .distance (s1 .grapheme_clusters , s2 .grapheme_clusters )
36
+ def _ (s1 : ExtractedText , s2 : ExtractedText ) -> float :
37
+ return Levenshtein .normalized_distance (s1 .grapheme_clusters , s2 .grapheme_clusters )
38
38
39
39
40
40
def editops (word1 , word2 ):
0 commit comments