Skip to content

Commit 0f4c517

Browse files
authored
Merge pull request #304 from PyThaiNLP/remove_tnc_word_freq
Remove pythainlp.corpus.tnc.word_freq
2 parents 5bcda6c + 9cf870b commit 0f4c517

File tree

3 files changed

+2
-54
lines changed

3 files changed

+2
-54
lines changed

docs/api/corpus.rst

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,6 @@ Modules
2222
TNC
2323
---
2424

25-
.. autofunction:: pythainlp.corpus.tnc.word_freq
2625
.. autofunction:: pythainlp.corpus.tnc.word_freqs
2726

2827
TTC
@@ -51,4 +50,4 @@ Definition
5150
++++++++++
5251

5352
Synset
54-
a set of synonyms that share a common meaning.
53+
a set of synonyms that share a common meaning.

pythainlp/corpus/tnc.py

Lines changed: 1 addition & 51 deletions
Original file line numberDiff line numberDiff line change
@@ -5,64 +5,14 @@
55
Credit: Korakot Chaovavanich‎
66
https://www.facebook.com/photo.php?fbid=363640477387469&set=gm.434330506948445&type=3&permPage=1
77
"""
8-
import re
98
from typing import List, Tuple
109

11-
import requests
1210
from pythainlp.corpus import get_corpus
1311

14-
__all__ = ["word_freq", "word_freqs"]
12+
__all__ = ["word_freqs"]
1513

1614
_FILENAME = "tnc_freq.txt"
1715

18-
19-
def word_freq(word: str, domain: str = "all") -> int:
20-
"""
21-
22-
.. note::
23-
**Not officially supported.**
24-
Get word frequency of a word by domain.
25-
This function will make a query to the server of
26-
Thai National Corpus.
27-
Internet connection is required.
28-
29-
.. warning::
30-
Currently (as of 29 April 2019) it is likely to return 0,
31-
regardless of the word, as the service URL has been changed
32-
and the code is not updated yet.
33-
New URL is http://www.arts.chula.ac.th/~ling/tnc3/
34-
35-
:param string word: word
36-
:param string domain: domain
37-
"""
38-
listdomain = {
39-
"all": "",
40-
"imaginative": "1",
41-
"natural-pure-science": "2",
42-
"applied-science": "3",
43-
"social-science": "4",
44-
"world-affairs-history": "5",
45-
"commerce-finance": "6",
46-
"arts": "7",
47-
"belief-thought": "8",
48-
"leisure": "9",
49-
"others": "0",
50-
}
51-
url = "http://www.arts.chula.ac.th/~ling/tnc3/"
52-
data = {"genre[]": "", "domain[]": listdomain[domain], "sortby": "perc", "p": word}
53-
54-
r = requests.post(url, data=data)
55-
56-
pat = re.compile(r'TOTAL</font>.*?#ffffff">(.*?)</font>', flags=re.DOTALL)
57-
match = pat.search(r.text)
58-
59-
n = 0
60-
if match:
61-
n = int(match.group(1).strip())
62-
63-
return n
64-
65-
6616
def word_freqs() -> List[Tuple[str, int]]:
6717
"""
6818
Get word frequency from Thai National Corpus (TNC)

tests/test_corpus.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -42,7 +42,6 @@ def test_corpus(self):
4242

4343
def test_tnc(self):
4444
self.assertIsNotNone(tnc.word_freqs())
45-
self.assertIsNotNone(tnc.word_freq("นก"))
4645

4746
def test_ttc(self):
4847
self.assertIsNotNone(ttc.word_freqs())

0 commit comments

Comments
 (0)