|
1 | | -# -*- coding: utf-8 -*- |
2 | | -__version__ = "2.3.0dev0" |
3 | | - |
4 | | -thai_consonants = "กขฃคฅฆงจฉชซฌญฎฏฐฑฒณดตถทธนบปผฝพฟภมยรลวศษสหฬอฮ" # 44 chars |
5 | | - |
6 | | -thai_vowels = ( |
7 | | - "\u0e24\u0e26\u0e30\u0e31\u0e32\u0e33\u0e34\u0e35\u0e36\u0e37" |
8 | | - + "\u0e38\u0e39\u0e40\u0e41\u0e42\u0e43\u0e44\u0e45\u0e4d\u0e47" |
9 | | -) # 20 |
10 | | -thai_lead_vowels = "\u0e40\u0e41\u0e42\u0e43\u0e44" # 5 |
11 | | -thai_follow_vowels = "\u0e30\u0e32\u0e33\u0e45" # 4 |
12 | | -thai_above_vowels = "\u0e31\u0e34\u0e35\u0e36\u0e37\u0e4d\u0e47" # 7 |
13 | | -thai_below_vowels = "\u0e38\u0e39" # 2 |
14 | | - |
15 | | -thai_tonemarks = "\u0e48\u0e49\u0e4a\u0e4b" # 4 |
16 | | - |
17 | | -# Paiyannoi, Maiyamok, Phinthu, Thanthakhat, Nikhahit, Yamakkan: |
18 | | -# These signs can be part of a word |
19 | | -thai_signs = "\u0e2f\u0e3a\u0e46\u0e4c\u0e4d\u0e4e" # 6 chars |
20 | | - |
21 | | -# Any Thai character that can be part of a word |
22 | | -thai_letters = "".join( |
23 | | - [thai_consonants, thai_vowels, thai_tonemarks, thai_signs] |
24 | | -) # 74 |
25 | | - |
26 | | -# Fongman, Angkhankhu, Khomut: |
27 | | -# These characters are section markers |
28 | | -thai_punctuations = "\u0e4f\u0e5a\u0e5b" # 3 chars |
29 | | - |
30 | | -thai_digits = "๐๑๒๓๔๕๖๗๘๙" # 10 |
31 | | -thai_symbols = "\u0e3f" # Thai Bath ฿ |
32 | | - |
33 | | -# All Thai characters that presented in Unicode |
34 | | -thai_characters = "".join( |
35 | | - [thai_letters, thai_punctuations, thai_digits, thai_symbols] |
36 | | -) |
37 | | - |
38 | | - |
39 | | -from pythainlp.soundex import soundex |
40 | | -from pythainlp.spell import correct, spell |
41 | | -from pythainlp.tag import pos_tag |
42 | | -from pythainlp.tokenize import ( |
43 | | - Tokenizer, |
44 | | - sent_tokenize, |
45 | | - subword_tokenize, |
46 | | - syllable_tokenize, |
47 | | - word_tokenize, |
48 | | -) |
49 | | -from pythainlp.transliterate import romanize, transliterate |
50 | | -from pythainlp.util import collate, thai_strftime |
| 1 | +# -*- coding: utf-8 -*- |
| 2 | +__version__ = "2.3.0-dev1" |
| 3 | + |
| 4 | +thai_consonants = "กขฃคฅฆงจฉชซฌญฎฏฐฑฒณดตถทธนบปผฝพฟภมยรลวศษสหฬอฮ" # 44 chars |
| 5 | + |
| 6 | +thai_vowels = ( |
| 7 | + "\u0e24\u0e26\u0e30\u0e31\u0e32\u0e33\u0e34\u0e35\u0e36\u0e37" |
| 8 | + + "\u0e38\u0e39\u0e40\u0e41\u0e42\u0e43\u0e44\u0e45\u0e4d\u0e47" |
| 9 | +) # 20 |
| 10 | +thai_lead_vowels = "\u0e40\u0e41\u0e42\u0e43\u0e44" # 5 |
| 11 | +thai_follow_vowels = "\u0e30\u0e32\u0e33\u0e45" # 4 |
| 12 | +thai_above_vowels = "\u0e31\u0e34\u0e35\u0e36\u0e37\u0e4d\u0e47" # 7 |
| 13 | +thai_below_vowels = "\u0e38\u0e39" # 2 |
| 14 | + |
| 15 | +thai_tonemarks = "\u0e48\u0e49\u0e4a\u0e4b" # 4 |
| 16 | + |
| 17 | +# Paiyannoi, Maiyamok, Phinthu, Thanthakhat, Nikhahit, Yamakkan: |
| 18 | +# These signs can be part of a word |
| 19 | +thai_signs = "\u0e2f\u0e3a\u0e46\u0e4c\u0e4d\u0e4e" # 6 chars |
| 20 | + |
| 21 | +# Any Thai character that can be part of a word |
| 22 | +thai_letters = "".join( |
| 23 | + [thai_consonants, thai_vowels, thai_tonemarks, thai_signs] |
| 24 | +) # 74 |
| 25 | + |
| 26 | +# Fongman, Angkhankhu, Khomut: |
| 27 | +# These characters are section markers |
| 28 | +thai_punctuations = "\u0e4f\u0e5a\u0e5b" # 3 chars |
| 29 | + |
| 30 | +thai_digits = "๐๑๒๓๔๕๖๗๘๙" # 10 |
| 31 | +thai_symbols = "\u0e3f" # Thai Bath ฿ |
| 32 | + |
| 33 | +# All Thai characters that presented in Unicode |
| 34 | +thai_characters = "".join( |
| 35 | + [thai_letters, thai_punctuations, thai_digits, thai_symbols] |
| 36 | +) |
| 37 | + |
| 38 | + |
| 39 | +from pythainlp.soundex import soundex |
| 40 | +from pythainlp.spell import correct, spell |
| 41 | +from pythainlp.tag import pos_tag |
| 42 | +from pythainlp.tokenize import ( |
| 43 | + Tokenizer, |
| 44 | + sent_tokenize, |
| 45 | + subword_tokenize, |
| 46 | + syllable_tokenize, |
| 47 | + word_tokenize, |
| 48 | +) |
| 49 | +from pythainlp.transliterate import romanize, transliterate |
| 50 | +from pythainlp.util import collate, thai_strftime |
0 commit comments