Skip to content

Commit b3c1143

Browse files
authored
Merge pull request #891 from PyThaiNLP/add-thai-morse
Add pythainlp.util.morse
2 parents dd11578 + 5079d3f commit b3c1143

File tree

3 files changed

+232
-16
lines changed

3 files changed

+232
-16
lines changed

docs/api/util.rst

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -277,3 +277,13 @@ Modules
277277
:members:
278278

279279
The `Trie` class is a data structure for efficient dictionary operations. It's a valuable resource for managing and searching word lists and dictionaries in a structured and efficient manner.
280+
281+
.. autofunction:: pythainlp.util.morse.morse_encode
282+
:noindex:
283+
284+
The `pythainlp.util.morse.morse_encode` function is convert text to Morse code.
285+
286+
.. autofunction:: pythainlp.util.morse.morse_decode
287+
:noindex:
288+
289+
The `pythainlp.util.morse.morse_decode` function is convert Morse code to text.

pythainlp/util/morse.py

Lines changed: 197 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,197 @@
1+
# -*- coding: utf-8 -*-
2+
# SPDX-FileCopyrightText: Copyright 2016-2023 PyThaiNLP Project
3+
# SPDX-License-Identifier: Apache-2.0
4+
5+
THAI_MORSE_CODE = {
6+
"ก": "--.",
7+
"ข": "-.-.",
8+
"ค": "-.-",
9+
"ฆ": "-.-",
10+
"ง": "-.--.",
11+
"จ": "-..-.",
12+
"ฉ": "----",
13+
"ช": "-..-",
14+
"ฌ": "-..-",
15+
"ซ": "--..",
16+
"ญ": ".---",
17+
"ด": "-..",
18+
"ถ": "-.-..",
19+
"ฐ": "-.-..",
20+
"ฑ": "-..--",
21+
"ฒ": "-..--",
22+
"ท": "-..--",
23+
"ธ": "-..--",
24+
"ณ": "-.",
25+
"น": "-.",
26+
"บ": "-...",
27+
"ป": ".--.",
28+
"ผ": "--.-",
29+
"ฝ": "-.-.-",
30+
"พ": ".--..",
31+
"ภ": ".--..",
32+
"ฟ": "..-.",
33+
"ม": "--",
34+
"ย": "-.--",
35+
"ร": ".-.",
36+
"ล": ".-..",
37+
"ฬ": ".-..",
38+
"ว": ".--",
39+
"ศ": "...",
40+
"ษ": "...",
41+
"ส": "...",
42+
"ห": "....",
43+
"ฮ": "--.--",
44+
"ฎ": "-..",
45+
"ต": "-",
46+
"ฏ": "-",
47+
"ฤ": ".-.--",
48+
"่": "..-",
49+
"้": "...-",
50+
"๊": "--...",
51+
"๋": ".-.-.",
52+
"ั": ".--.-",
53+
"็": "---..",
54+
"์": "--..-",
55+
"ั้": ".---.",
56+
"ฯ": "--.-.",
57+
"ฯลฯ": "---.-",
58+
"ๆ": "---.-",
59+
"ะ": ".-...",
60+
"า": ".-",
61+
"ิ": "..-..",
62+
"ี": "..",
63+
"ึ": "..--.",
64+
"ื": "..--",
65+
"ุ": "..-.-",
66+
"ู": "---.",
67+
"เ": ".",
68+
"แ": ".-.-",
69+
"โ": "---",
70+
"ไ": ".-..-",
71+
"ใ": ".-..-",
72+
"ำ": "...-.",
73+
"อ": "-...-",
74+
}
75+
76+
ENGLISH_MORSE_CODE = {
77+
"A": ".-",
78+
"B": "-...",
79+
"C": "-.-.",
80+
"D": "-..",
81+
"E": ".",
82+
"F": "..-.",
83+
"G": "--.",
84+
"H": "....",
85+
"I": "..",
86+
"J": ".---",
87+
"K": "-.-",
88+
"L": ".-..",
89+
"M": "--",
90+
"N": "-.",
91+
"O": "---",
92+
"P": ".--.",
93+
"Q": "--.-",
94+
"R": ".-.",
95+
"S": "...",
96+
"T": "-",
97+
"U": "..-",
98+
"V": "...-",
99+
"W": ".--",
100+
"X": "-..-",
101+
"Y": "-.--",
102+
"Z": "--..",
103+
"0": "-----",
104+
",": "--..--",
105+
"1": ".----",
106+
".": ".-.-.-",
107+
"2": "..---",
108+
"?": "..--..",
109+
"3": "...--",
110+
";": "-.-.-.",
111+
"4": "....-",
112+
":": "---...",
113+
"5": ".....",
114+
"'": ".----.",
115+
"6": "-....",
116+
"-": "-....-",
117+
"7": "--...",
118+
"/": "-..-.",
119+
"8": "---..",
120+
"(": "-.--.-",
121+
}
122+
123+
decodingeng = {}
124+
for key, val in ENGLISH_MORSE_CODE.items():
125+
decodingeng[val] = key
126+
127+
decodingthai = {}
128+
for key, val in THAI_MORSE_CODE.items():
129+
decodingthai[val.replace(" ", "")] = key
130+
131+
for key, val in THAI_MORSE_CODE.items():
132+
THAI_MORSE_CODE[key] = val.replace(" ", "")
133+
134+
135+
def morse_encode(text: str, lang: str = "th") -> str:
136+
"""
137+
Convert text to Morse code (support Thai and English)
138+
139+
:param str text: Text
140+
:param str lang: Language Code (*th* is Thai and *en* is English)
141+
:return: Morse code
142+
:rtype: str
143+
144+
:Example:
145+
::
146+
from pythainlp.util.morse import morse_encode
147+
print(morse_encode("แมว", lang="th"))
148+
# output: .-.- -- .--
149+
150+
print(morse_encode("cat", lang="en"))
151+
# output: -.-. .- -
152+
"""
153+
if lang == "th": # Thai
154+
return " ".join(
155+
map(lambda x, g=THAI_MORSE_CODE.get: g(x, " "), text.upper())
156+
)
157+
elif lang == "en": # English
158+
return " ".join(
159+
map(lambda x, g=ENGLISH_MORSE_CODE.get: g(x, " "), text.upper())
160+
)
161+
else:
162+
raise NotImplementedError(f"This function doesn't support {lang}.")
163+
164+
165+
def morse_decode(morse_text: str, lang: str = "th") -> str:
166+
"""
167+
Simple Convert Morse code to text
168+
169+
Thai still have some wrong character problem that\
170+
can fix by spell corrector.
171+
172+
:param str morse_text: Morse code
173+
:param str lang: Language Code (*th* is Thai and *en* is English)
174+
:return: Text
175+
:rtype: str
176+
177+
:Example:
178+
::
179+
from pythainlp.util.morse import morse_decode
180+
print(morse_decode(".-.- -- .--", lang="th"))
181+
# output: แมว
182+
183+
print(morse_decode("-.-. .- -", lang="en"))
184+
# output: CAT
185+
"""
186+
if lang == "th":
187+
ans = "".join(
188+
map(lambda x, g=decodingthai.get: g(x, ""), morse_text.split(" "))
189+
)
190+
return "".join(ans.split())
191+
elif lang == "en":
192+
ans = "".join(
193+
map(lambda x, g=decodingeng.get: g(x, " "), morse_text.split(" "))
194+
)
195+
return " ".join(ans.split())
196+
else:
197+
raise NotImplementedError(f"This function doesn't support {lang}.")

tests/test_util.py

Lines changed: 25 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -18,54 +18,55 @@
1818
arabic_digit_to_thai_digit,
1919
bahttext,
2020
collate,
21-
countthai,
21+
convert_years,
2222
count_thai_chars,
23+
countthai,
2324
dict_trie,
2425
display_thai_char,
2526
digit_to_text,
2627
emoji_to_thai,
2728
eng_to_thai,
2829
find_keyword,
30+
ipa_to_rtgs,
2931
is_native_thai,
3032
isthai,
3133
isthaichar,
3234
normalize,
3335
now_reign_year,
3436
num_to_thaiword,
3537
maiyamok,
38+
nectec_to_ipa,
3639
rank,
3740
reign_year_to_ad,
3841
remove_dangling,
3942
remove_dup_spaces,
43+
remove_tone_ipa,
4044
remove_tonemark,
45+
remove_trailing_repeat_consonants,
4146
remove_zw,
4247
rhyme,
4348
text_to_arabic_digit,
49+
text_to_num,
4450
text_to_thai_digit,
45-
thaiword_to_date,
4651
thai_digit_to_arabic_digit,
52+
thai_keyboard_dist,
53+
thai_to_eng,
4754
thai_strftime,
55+
thai_strptime,
56+
thai_word_tone_detector,
57+
thaiword_to_date,
58+
thaiword_to_num,
4859
thaiword_to_time,
4960
time_to_thaiword,
50-
thai_to_eng,
61+
tis620_to_utf8,
5162
to_idna,
52-
thaiword_to_num,
53-
thai_keyboard_dist,
54-
text_to_num,
55-
words_to_num,
63+
tone_detector,
5664
sound_syllable,
5765
syllable_length,
5866
syllable_open_close_detector,
59-
tone_detector,
60-
thai_word_tone_detector,
61-
convert_years,
62-
thai_strptime,
63-
nectec_to_ipa,
64-
ipa_to_rtgs,
65-
remove_tone_ipa,
66-
tis620_to_utf8,
67-
remove_trailing_repeat_consonants,
67+
words_to_num,
6868
)
69+
from pythainlp.util.morse import morse_decode, morse_encode
6970
from pythainlp.util.spell_words import spell_word
7071

7172

@@ -835,5 +836,13 @@ def test_remove_repeat_consonants(self):
835836
"อืมมม คุณมีบุคลิกที่เริ่ด ฉันจะให้เกรดดีกับคุณ\nนี่เป็นความลับ",
836837
)
837838

839+
def test_morse_encode(self):
840+
self.assertEqual(morse_encode("แมว", lang="th"), ".-.- -- .--")
841+
self.assertEqual(morse_encode("cat", lang="en"), "-.-. .- -")
842+
843+
def test_morse_decode(self):
844+
self.assertEqual(morse_decode(".-.- -- .--", lang="th"), "แมว")
845+
self.assertEqual(morse_decode("-.-. .- -", lang="en"), "CAT")
846+
838847
# def test_abbreviation_to_full_text(self):
839848
# self.assertIsInstance(abbreviation_to_full_text("รร.ของเราน่าอยู่", list))

0 commit comments

Comments
 (0)