Skip to content

Commit 277d4a4

Browse files
authored
Merge pull request #215 from bact/dev
Fix various small issues
2 parents e9a300b + 1f56415 commit 277d4a4

File tree

5 files changed

+41
-12
lines changed

5 files changed

+41
-12
lines changed

appveyor.yml

Lines changed: 25 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,15 +1,37 @@
1+
# Use unofficial Windows Binaries for Python Extension Packages from
2+
# https://www.lfd.uci.edu/~gohlke/pythonlibs/
3+
14
build: off
25

36
environment:
47
matrix:
8+
# - PYTHON: "C:/Python36"
9+
# PYTHON_VERSION: "3.6"
10+
# PYTHON_ARCH: "32"
11+
# ARTAGGER_PKG: "https://github.com/wannaphongcom/artagger/tarball/master#egg=artagger"
12+
# PYICU_PKG: "https://www.dropbox.com/s/pahorbq29y9cura/PyICU-2.3.1-cp36-cp36m-win32.whl?dl=1"
13+
514
- PYTHON: "C:/Python36-x64"
615
PYTHON_VERSION: "3.6"
716
PYTHON_ARCH: "64"
817
ARTAGGER_PKG: "https://github.com/wannaphongcom/artagger/tarball/master#egg=artagger"
18+
PYICU_PKG: "https://www.dropbox.com/s/7t0rrxwckqbgivi/PyICU-2.3.1-cp36-cp36m-win_amd64.whl?dl=1"
19+
20+
# - PYTHON: "C:/Python37"
21+
# PYTHON_VERSION: "3.7"
22+
# PYTHON_ARCH: "32"
23+
# ARTAGGER_PKG: "https://github.com/wannaphongcom/artagger/tarball/master#egg=artagger"
24+
# PYICU_PKG: "https://www.dropbox.com/s/3xwdnwhdcu619x4/PyICU-2.3.1-cp37-cp37m-win32.whl?dl=1"
25+
26+
- PYTHON: "C:/Python37-x64"
27+
PYTHON_VERSION: "3.7"
28+
PYTHON_ARCH: "64"
29+
ARTAGGER_PKG: "https://github.com/wannaphongcom/artagger/tarball/master#egg=artagger"
30+
PYICU_PKG: "https://www.dropbox.com/s/le5dckc3231opqt/PyICU-2.3.1-cp37-cp37m-win_amd64.whl?dl=1"
931

1032
init:
1133
- "ECHO %PYTHON% %PYTHON_VERSION% %PYTHON_ARCH%"
12-
- ps: "ls C:/Python*"
34+
# - ps: "ls C:/Python*"
1335

1436
install:
1537
- "chcp 65001"
@@ -19,10 +41,10 @@ install:
1941
- "%PYTHON%/python.exe -m pip install --upgrade pip"
2042
- "%PYTHON%/python.exe -m pip install coveralls[yaml]"
2143
- "%PYTHON%/python.exe -m pip install coverage"
22-
- "%PYTHON%/python.exe -m pip install https://www.dropbox.com/s/g84479l8yhv5ohi/PyICU-2.2-cp36-cp36m-win_amd64.whl?dl=1"
44+
- "%PYTHON%/python.exe -m pip install %PYICU_PKG%"
2345
- "%PYTHON%/python.exe -m pip install %ARTAGGER_PKG%"
2446
- "%PYTHON%/python.exe -m pip install -e .[artagger,icu,ipa,ner,thai2fit,deepcut]"
2547

2648
test_script:
2749
- "%PYTHON%/python.exe -m pip --version"
28-
- "%PYTHON%/python.exe -m coverage run --source=pythainlp setup.py test"
50+
- "%PYTHON%/python.exe -m coverage run --source=pythainlp setup.py test"

pythainlp/corpus/__init__.py

Lines changed: 13 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
# -*- coding: utf-8 -*-
22

33
import os
4+
from typing import NoReturn, Union
45
from urllib.request import urlopen
56

67
import requests
@@ -14,9 +15,9 @@
1415
_CORPUS_PATH = os.path.join(get_pythainlp_path(), _CORPUS_DIRNAME)
1516

1617
_CORPUS_DB_URL = (
17-
"https://raw.githubusercontent.com/" +
18-
"PyThaiNLP/pythainlp-corpus/" +
19-
"master/db.json"
18+
"https://raw.githubusercontent.com/"
19+
+ "PyThaiNLP/pythainlp-corpus/"
20+
+ "master/db.json"
2021
)
2122

2223
_CORPUS_DB_FILENAME = "db.json"
@@ -51,7 +52,7 @@ def get_corpus(filename: str) -> frozenset:
5152
return frozenset(lines)
5253

5354

54-
def get_corpus_path(name: str) -> [str, None]:
55+
def get_corpus_path(name: str) -> Union[str, None]:
5556
"""
5657
Get corpus path
5758
@@ -72,18 +73,21 @@ def get_corpus_path(name: str) -> [str, None]:
7273
return None
7374

7475

75-
def _download(url: str, dst: str):
76+
def _download(url: str, dst: str) -> int:
7677
"""
7778
@param: url to download file
7879
@param: dst place to put the file
7980
"""
8081
file_size = int(urlopen(url).info().get("Content-Length", -1))
82+
8183
if os.path.exists(dst):
8284
first_byte = os.path.getsize(dst)
8385
else:
8486
first_byte = 0
87+
8588
if first_byte >= file_size:
8689
return file_size
90+
8791
header = {"Range": "bytes=%s-%s" % (first_byte, file_size)}
8892
pbar = tqdm(
8993
total=file_size,
@@ -99,10 +103,11 @@ def _download(url: str, dst: str):
99103
f.write(chunk)
100104
pbar.update(1024)
101105
pbar.close()
102-
# return file_size
103106

107+
return file_size
104108

105-
def download(name: str, force: bool = False):
109+
110+
def download(name: str, force: bool = False) -> NoReturn:
106111
"""
107112
Download corpus
108113
@@ -113,6 +118,7 @@ def download(name: str, force: bool = False):
113118
temp = Query()
114119
data = requests.get(corpus_db_url())
115120
data_json = data.json()
121+
116122
if name in list(data_json.keys()):
117123
temp_name = data_json[name]
118124
print("Download:", name)

pythainlp/corpus/tnc.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -48,7 +48,7 @@ def word_freq(word: str, domain: str = "all") -> int:
4848

4949
r = requests.post(url, data=data)
5050

51-
pat = re.compile(r'TOTAL</font>(?s).*?#ffffff">(.*?)</font>')
51+
pat = re.compile(r'TOTAL</font>.*?#ffffff">(.*?)</font>', flags=re.DOTALL)
5252
match = pat.search(r.text)
5353

5454
n = 0

pythainlp/spell/pn.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,7 @@ def _keep(
3434
min_len: int,
3535
max_len: int,
3636
dict_filter: Callable[[str], bool],
37-
):
37+
) -> Callable[[str], bool]:
3838
"""
3939
Keep only Thai words with at least min_freq frequency
4040
and has length between min_len and max_len characters

tests/__init__.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -577,6 +577,7 @@ def test_thai_strftime(self):
577577
thai_strftime(date, "%Aที่ %d %B พ.ศ. %Y เวลา %H:%Mน. (%a %d-%b-%y) %% %"),
578578
"วันพุธที่ 06 ตุลาคม พ.ศ. 2519 เวลา 01:40น. (พ 06-ต.ค.-19) % %",
579579
)
580+
self.assertIsNotNone(thai_strftime(date, "%A%a%B%b%C%c%D%F%G%g%v%X%x%Y%y%+"))
580581

581582
# ### pythainlp.util.normalize
582583

0 commit comments

Comments
 (0)