|
16 | 16 | from pythainlp.tokenize import subword_tokenize |
17 | 17 | from pythainlp.util import sound_syllable |
18 | 18 |
|
19 | | - |
20 | 19 | class KhaveeVerifier: |
21 | 20 | def __init__(self): |
22 | 21 | """ |
@@ -215,11 +214,7 @@ def check_marttra(self, word: str) -> str: |
215 | 214 | """ |
216 | 215 | if word[-1] == 'ร' and word[-2] in ['ต','ท'] : |
217 | 216 | word = word[:-1] |
218 | | - if '์' in word[-1]: |
219 | | - if 'ิ' in word[-2] or 'ุ' in word[-2]: |
220 | | - word = word[:-3] |
221 | | - else: |
222 | | - word = word[:-2] |
| 217 | + word = self.handle_karun_sound_silence(word) |
223 | 218 | if 'ำ' in word or ('ํ' in word and 'า' in word) or 'ไ' in word or 'ใ' in word: |
224 | 219 | return 'กา' |
225 | 220 | elif word[-1] in ['า','ะ','ิ','ี','ุ','ู','อ'] or ('ี' in word and 'ย' in word[-1]) or ('ื' in word and 'อ' in word[-1]): |
@@ -417,7 +412,6 @@ def check_klon(self, text: str,k_type: int=8) -> Union[List[str], str]: |
417 | 412 | def check_aek_too(self, text: Union[List[str], str], dead_syllable_as_aek:bool = False) -> Union[List[bool], List[str], bool, str]: |
418 | 413 | """ |
419 | 414 | Thai tonal word checker |
420 | | -
|
421 | 415 | :param Union[List[str], str] text: Thai word or list of Thai words |
422 | 416 | :param bool dead_syllable_as_aek: if True, dead syllable will be considered as aek |
423 | 417 | :return: the check if the word is aek or too or False(not both) or list of the check if input is list |
@@ -453,3 +447,22 @@ def check_aek_too(self, text: Union[List[str], str], dead_syllable_as_aek:bool = |
453 | 447 | return 'aek' |
454 | 448 | else: |
455 | 449 | return False |
| 450 | + |
| 451 | + def handle_karun_sound_silence(self, word: str) -> str: |
| 452 | + """ |
| 453 | + Handle sound silence in Thai word using '์' character (Karun) |
| 454 | + by stripping all the characters before the 'Karun' character that should be silenced |
| 455 | +
|
| 456 | + :param str text: Thai word |
| 457 | + :return: Thai word with silence word stripped |
| 458 | + :rtype: str |
| 459 | + """ |
| 460 | + sound_silenced = True if word.endswith('์') else False |
| 461 | + if not sound_silenced: |
| 462 | + return word |
| 463 | + thai_consonants = "กขฃคฅฆงจฉชซฌญฎฏฐฑฒณดตถทธนบปผฝพฟภมยรลวศษสหฬอฮ" |
| 464 | + locate_silenced = word.rfind('์') - 1 |
| 465 | + can_silence_two = True if word[locate_silenced-2] in thai_consonants else False |
| 466 | + cut_off = 2 if can_silence_two else 1 |
| 467 | + word = word[:locate_silenced + 1 - cut_off] |
| 468 | + return word |
0 commit comments