Skip to content

Commit ae452a2

Browse files
committed
Update core.py
1 parent 695df0a commit ae452a2

File tree

1 file changed

+6
-6
lines changed

1 file changed

+6
-6
lines changed

pythainlp/tokenize/core.py

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -63,6 +63,8 @@ def word_detokenize(segments: Union[List[List[str]], List[str]], output: str = "
6363
for i, s in enumerate(segments):
6464
_list_sents = []
6565
_add_index = []
66+
_space_index = []
67+
_mark_index = []
6668
for j, w in enumerate(s):
6769
if j > 0:
6870
# previous word
@@ -83,12 +85,10 @@ def word_detokenize(segments: Union[List[List[str]], List[str]], output: str = "
8385
elif w == "ๆ":
8486
if not p_w.isspace():
8587
_list_sents.append(" ")
86-
_add_index.append(j)
87-
else:
88-
_add_index.append(j)
89-
elif w.isspace():
90-
_add_index.append(j)
91-
elif j-1 in _add_index:
88+
_mark_index.append(j)
89+
elif w.isspace() and j-1 not in _space_index:
90+
_space_index.append(j)
91+
elif j-1 in _mark_index:
9292
_list_sents.append(" ")
9393
_list_sents.append(w)
9494
_list_all.append(_list_sents)

0 commit comments

Comments
 (0)