@@ -58,28 +58,35 @@ def test_word_tokenize(self):
5858 self .assertIsNotNone (
5959 word_tokenize ("หมอนทองตากลมหูว์MBK39" , engine = "deepcut" )
6060 )
61- self .assertIsNotNone (
62- word_tokenize ("หมอนทองตากลมหูว์MBK39" , engine = "XX" )
63- )
6461 self .assertIsNotNone (
6562 word_tokenize ("หมอนทองตากลมหูว์MBK39" , engine = "attacut" )
6663 )
64+ self .assertIsNotNone (
65+ word_tokenize ("หมอนทองตากลมหูว์MBK39" , engine = "XX" )
66+ ) # XX engine is not existed
6767
6868 self .assertIsNotNone (dict_trie (()))
6969 self .assertIsNotNone (dict_trie (("ทดสอบ" , "สร้าง" , "Trie" )))
7070 self .assertIsNotNone (dict_trie (["ทดสอบ" , "สร้าง" , "Trie" ]))
71+ self .assertIsNotNone (dict_trie ({"ทดสอบ" , "สร้าง" , "Trie" }))
7172 self .assertIsNotNone (dict_trie (thai_words ()))
7273 self .assertIsNotNone (dict_trie (DEFAULT_DICT_TRIE ))
7374 self .assertIsNotNone (
7475 dict_trie (os .path .join (_CORPUS_PATH , _THAI_WORDS_FILENAME ))
7576 )
7677
77- self .assertIsNotNone (
78- word_tokenize ("รถไฟฟ้าBTS " , custom_dict = DEFAULT_DICT_TRIE )
78+ self .assertTrue (
79+ "ไฟ" in word_tokenize ("รถไฟฟ้า " , custom_dict = dict_trie ([ "ไฟ" ]) )
7980 )
8081
81- with self .assertWarns (DeprecationWarning ):
82- dict_word_tokenize ("เลิกใช้แล้ว" )
82+ # Commented out until this unittest bug get fixed:
83+ # https://bugs.python.org/issue29620
84+ # with self.assertWarns(DeprecationWarning):
85+ # dict_word_tokenize("เลิกใช้แล้ว", custom_dict=DEFAULT_DICT_TRIE)
86+ self .assertEqual (
87+ word_tokenize ("รถไฟฟ้า" , custom_dict = dict_trie (["ไฟ" ])),
88+ dict_word_tokenize ("รถไฟฟ้า" , custom_dict = dict_trie (["ไฟ" ])),
89+ )
8390
8491 def test_Tokenizer (self ):
8592 t_test = Tokenizer (DEFAULT_DICT_TRIE )
0 commit comments