Skip to content

Commit 485b12d

Browse files
committed
add new karun handler to karvee matra checker
1 parent baa645f commit 485b12d

File tree

1 file changed

+20
-5
lines changed

1 file changed

+20
-5
lines changed

pythainlp/khavee/core.py

Lines changed: 20 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -214,11 +214,7 @@ def check_marttra(self, word: str) -> str:
214214
"""
215215
if word[-1] == 'ร' and word[-2] in ['ต','ท'] :
216216
word = word[:-1]
217-
if '์' in word[-1]:
218-
if 'ิ' in word[-2] or 'ุ' in word[-2]:
219-
word = word[:-3]
220-
else:
221-
word = word[:-2]
217+
word = self.handle_karun_sound_silenced(word)
222218
if 'ำ' in word or ('ํ' in word and 'า' in word) or 'ไ' in word or 'ใ' in word:
223219
return 'กา'
224220
elif word[-1] in ['า','ะ','ิ','ี','ุ','ู','อ'] or ('ี' in word and 'ย' in word[-1]) or ('ื' in word and 'อ' in word[-1]):
@@ -451,3 +447,22 @@ def check_aek_too(self, text: Union[List[str], str], dead_syllable_as_aek:bool =
451447
return 'aek'
452448
else:
453449
return False
450+
451+
def handle_karun_sound_silence(text: str) -> str:
452+
"""
453+
Handle sound silence in Thai word using '์' character (Karun)
454+
by stripping all the characters before the 'Karun' character that should be silenced
455+
456+
:param str text: Thai word
457+
:return: Thai word with silence word stripped
458+
:rtype: str
459+
"""
460+
sound_silenced = True if word.endswith('์') else False
461+
if not sound_silenced:
462+
return text
463+
thai_consonants = "กขฃคฅฆงจฉชซฌญฎฏฐฑฒณดตถทธนบปผฝพฟภมยรลวศษสหฬอฮ"
464+
locate_silenced = word.rfind('์') - 1
465+
can_silence_two = True if word[locate_silenced-2] in thai_consonants else False
466+
cut_off = 2 if can_silence_two else 1
467+
word = word[:locate_silenced + 1 - cut_off]
468+
return word

0 commit comments

Comments
 (0)