File tree Expand file tree Collapse file tree 3 files changed +20
-27
lines changed Expand file tree Collapse file tree 3 files changed +20
-27
lines changed Original file line number Diff line number Diff line change 736736 {
737737 "data" : {
738738 "text/plain" : [
739- " [('จะ ', 51681 ),\n " ,
740- " ('เป็น ', 51273 ),\n " ,
741- " ('ไป ', 46567 ),\n " ,
742- " ('ก็ ', 46409 ),\n " ,
743- " ('ไม่ ', 45895 ),\n " ,
744- " ('มี ', 44899 ),\n " ,
745- " ('ได้ ', 44513 ),\n " ,
746- " ('ว่า ', 40290 ),\n " ,
747- " ('ให้ ', 38715 )]"
739+ " [('งวงช้าง ', 12 ),\n " ,
740+ " ('เทิบทาบ ', 7 ),\n " ,
741+ " ('กริน ', 3 ),\n " ,
742+ " ('นาภี ', 2 ),\n " ,
743+ " ('แด่วๆ ', 3 ),\n " ,
744+ " ('คู่ใจ ', 7 ),\n " ,
745+ " ('คุณพ่อ ', 732 ),\n " ,
746+ " ('สิ้น ', 755 ),\n " ,
747+ " ('เยาะ ', 150 )]"
748748 ]
749749 },
750750 "execution_count" : 28 ,
Original file line number Diff line number Diff line change 55Credit: Korakot Chaovavanich
66https://www.facebook.com/photo.php?fbid=363640477387469&set=gm.434330506948445&type=3&permPage=1
77"""
8- import os
98import re
109
11- from pythainlp .corpus import download as download_data
12- from pythainlp .corpus import get_corpus
13- from pythainlp .tools import get_full_data_path
1410import requests
11+ from pythainlp .corpus import get_corpus
12+
1513__all__ = ["word_freq" , "word_freqs" ]
1614
15+ _FILENAME = "tnc_freq.txt"
16+
1717
1818def word_freq (word , domain = "all" ):
1919 """
@@ -56,10 +56,10 @@ def word_freqs():
5656 """
5757 Get word frequency from Thai National Corpus (TNC)
5858 """
59- lines = list (get_corpus ("tnc_freq.txt" ))
59+ lines = list (get_corpus (_FILENAME ))
6060 listword = []
6161 for line in lines :
62- listindata = line .split (" " )
62+ listindata = line .split ("\t " )
6363 listword .append ((listindata [0 ], int (listindata [1 ])))
6464
6565 return listword
Original file line number Diff line number Diff line change 55Credit: Korakot Chaovavanich
66https://www.facebook.com/photo.php?fbid=363640477387469&set=gm.434330506948445&type=3&permPage=1
77"""
8- import os
98
10- from pythainlp .corpus import download as download_data
11- from pythainlp .tools import get_full_data_path
9+ from pythainlp .corpus import get_corpus
1210
1311__all__ = ["word_freqs" ]
1412
13+ _FILENAME = "ttc_freq.txt"
14+
1515
1616def word_freqs ():
1717 """
1818 Get word frequency from Thai Textbook Corpus (TTC)
1919 """
20- path = get_full_data_path ("ttc_freq.txt" ) # try local copy first
21- if not os .path .exists (path ): # if fail, download from internet
22- download_data ("ttc" )
23-
24- with open (path , "r" , encoding = "utf8" ) as f :
25- lines = f .read ().splitlines ()
26- f .close ()
27-
20+ lines = list (get_corpus (_FILENAME ))
2821 listword = []
2922 for line in lines :
30- listindata = line .split (" " )
23+ listindata = line .split ("\t " )
3124 listword .append ((listindata [0 ], int (listindata [1 ])))
3225
3326 return listword
You can’t perform that action at this time.
0 commit comments