Skip to content

Commit a108ca9

Browse files
committed
Update core.py
1 parent b546f89 commit a108ca9

File tree

1 file changed

+10
-2
lines changed

1 file changed

+10
-2
lines changed

pythainlp/tokenize/core.py

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -46,7 +46,11 @@ def clause_tokenize(doc: List[str]) -> List[List[str]]:
4646
return segment(doc)
4747

4848

49-
def word_detokenize(segments: Union[List[List[str]], List[str]], output: str = "str") -> Union[str, List[str]]:
49+
def word_detokenize(
50+
segments: Union[List[List[str]],
51+
List[str]],
52+
output: str = "str"
53+
) -> Union[str, List[str]]:
5054
"""
5155
Word detokenizer.
5256
@@ -68,7 +72,11 @@ def word_detokenize(segments: Union[List[List[str]], List[str]], output: str = "
6872
# previous word
6973
p_w = s[j-1]
7074
# if w is number or other language and not be space
71-
if w[0] not in thai_characters and not w.isspace() and not p_w.isspace():
75+
if (
76+
w[0] not in thai_characters
77+
and not w.isspace()
78+
and not p_w.isspace()
79+
):
7280
_list_sents.append(" ")
7381
_add_index.append(j)
7482
# if previous word is number or other language and not be space

0 commit comments

Comments
 (0)