@@ -23,8 +23,10 @@ def word_freq(word: str, domain: str = "all") -> int:
2323 This function will make a query to the server of Thai National Corpus.
2424 Internet connection is required.
2525
26- **IMPORTANT:** Currently (as of 29 April 2019) always return 0,
27- as the service URL has been changed and the code is not updated yet.
26+ **IMPORTANT:** Currently (as of 29 April 2019) it is likely to return 0,
27+ regardless of the word, as the service URL has been changed and the code
28+ is not updated yet.
29+ New URL is http://www.arts.chula.ac.th/~ling/tnc3/
2830
2931 :param string word: word
3032 :param string domain: domain
@@ -42,8 +44,7 @@ def word_freq(word: str, domain: str = "all") -> int:
4244 "leisure" : "9" ,
4345 "others" : "0" ,
4446 }
45- url = "http://www.arts.chula.ac.th/~ling/TNCII/corp.php"
46- # New URL is http://www.arts.chula.ac.th/~ling/tnc3/
47+ url = "http://www.arts.chula.ac.th/~ling/tnc3/"
4748 data = {"genre[]" : "" , "domain[]" : listdomain [domain ], "sortby" : "perc" , "p" : word }
4849
4950 r = requests .post (url , data = data )
@@ -63,9 +64,10 @@ def word_freqs() -> List[Tuple[str, int]]:
6364 Get word frequency from Thai National Corpus (TNC)
6465 """
6566 lines = list (get_corpus (_FILENAME ))
66- listword = []
67+ word_freqs = []
6768 for line in lines :
68- listindata = line .split ("\t " )
69- listword .append ((listindata [0 ], int (listindata [1 ])))
69+ word_freq = line .split ("\t " )
70+ if len (word_freq ) >= 2 :
71+ word_freqs .append ((word_freq [0 ], int (word_freq [1 ])))
7072
71- return listword
73+ return word_freqs
0 commit comments