@@ -90,27 +90,27 @@ def revise_wordset(
9090
9191 :Example::
9292 ::
93-
94- from pythainlp.corpus import thai_words
95- from pythainlp.corpus.util import revise_wordset
96- from pythainlp.tokenize.longest import segment
97-
98- base_words = thai_words()
99- more_words = {
100- "ถวิล อุดล", "ทองอินทร์ ภูริพัฒน์", "เตียง ศิริขันธ์", "จำลอง ดาวเรือง"
101- }
102- base_words = base_words.union(more_words)
103- dict_trie = Trie(wordlist)
104-
105- tokenize = lambda text: segment(text, dict_trie)
106-
107- training_data = [
108- [str, str, str. ...],
109- [str, str, str, str, ...],
110- ...
111- ]
112-
113- revised_words = revise_wordset(tokenize, wordlist, training_data)
93+
94+ from pythainlp.corpus import thai_words
95+ from pythainlp.corpus.util import revise_wordset
96+ from pythainlp.tokenize.longest import segment
97+
98+ base_words = thai_words()
99+ more_words = {
100+ "ถวิล อุดล", "ทองอินทร์ ภูริพัฒน์", "เตียง ศิริขันธ์", "จำลอง ดาวเรือง"
101+ }
102+ base_words = base_words.union(more_words)
103+ dict_trie = Trie(wordlist)
104+
105+ tokenize = lambda text: segment(text, dict_trie)
106+
107+ training_data = [
108+ [str, str, str. ...],
109+ [str, str, str, str, ...],
110+ ...
111+ ]
112+
113+ revised_words = revise_wordset(tokenize, wordlist, training_data)
114114 """
115115 bad_words = find_badwords (tokenize , training_data )
116116 return set (orig_words ) - bad_words
0 commit comments