Skip to content

Commit 55556b1

Browse files
Update crfcut.py
Formatting checks handled
1 parent c37670d commit 55556b1

File tree

1 file changed

+9
-9
lines changed

1 file changed

+9
-9
lines changed

pythainlp/tokenize/crfcut.py

Lines changed: 9 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -198,21 +198,21 @@ def segment(text: str) -> List[str]:
198198
feat = extract_features(toks)
199199
labs = _tagger.tag(feat)
200200
labs[-1] = "E" # make sure it cuts the last sentence
201-
202-
#To ensure splitting of sentences using Terminal Punctuation
201+
202+
# To ensure splitting of sentences using Terminal Punctuation
203203
for idx, _ in enumerate(toks):
204-
if(toks[idx].strip().endswith(('!', '.', '?'))):
205-
labs[idx] = "E"
206-
207-
#Spaces or empty strings would no longer be treated as end of the sentence.
208-
elif(toks[idx].strip() == ""):
209-
labs[idx] = "I"
204+
if(toks[idx].strip().endswith(('!', '.', '?'))):
205+
labs[idx] = "E"
206+
207+
# Spaces or empty strings would no longer be treated as end of sentence.
208+
elif(toks[idx].strip() == ""):
209+
labs[idx] = "I"
210210

211211
sentences = []
212212
sentence = ""
213213
for i, w in enumerate(toks):
214214
sentence = sentence + w
215-
#Constraining empty strings to get added, to avoid any sort of unusual behaviour due to empty strings.
215+
# Empty strings should not be part of output.
216216
if labs[i] == "E" and sentence != '':
217217
sentences.append(sentence)
218218
sentence = ""

0 commit comments

Comments
 (0)