11# -*- coding: utf-8 -*-
22
33import os
4+ from typing import NoReturn , Union
45from urllib .request import urlopen
56
67import requests
1415_CORPUS_PATH = os .path .join (get_pythainlp_path (), _CORPUS_DIRNAME )
1516
1617_CORPUS_DB_URL = (
17- "https://raw.githubusercontent.com/" +
18- "PyThaiNLP/pythainlp-corpus/" +
19- "master/db.json"
18+ "https://raw.githubusercontent.com/"
19+ + "PyThaiNLP/pythainlp-corpus/"
20+ + "master/db.json"
2021)
2122
2223_CORPUS_DB_FILENAME = "db.json"
@@ -51,7 +52,7 @@ def get_corpus(filename: str) -> frozenset:
5152 return frozenset (lines )
5253
5354
54- def get_corpus_path (name : str ) -> [str , None ]:
55+ def get_corpus_path (name : str ) -> Union [str , None ]:
5556 """
5657 Get corpus path
5758
@@ -72,18 +73,21 @@ def get_corpus_path(name: str) -> [str, None]:
7273 return None
7374
7475
75- def _download (url : str , dst : str ):
76+ def _download (url : str , dst : str ) -> int :
7677 """
7778 @param: url to download file
7879 @param: dst place to put the file
7980 """
8081 file_size = int (urlopen (url ).info ().get ("Content-Length" , - 1 ))
82+
8183 if os .path .exists (dst ):
8284 first_byte = os .path .getsize (dst )
8385 else :
8486 first_byte = 0
87+
8588 if first_byte >= file_size :
8689 return file_size
90+
8791 header = {"Range" : "bytes=%s-%s" % (first_byte , file_size )}
8892 pbar = tqdm (
8993 total = file_size ,
@@ -99,10 +103,11 @@ def _download(url: str, dst: str):
99103 f .write (chunk )
100104 pbar .update (1024 )
101105 pbar .close ()
102- # return file_size
103106
107+ return file_size
104108
105- def download (name : str , force : bool = False ):
109+
110+ def download (name : str , force : bool = False ) -> NoReturn :
106111 """
107112 Download corpus
108113
@@ -113,6 +118,7 @@ def download(name: str, force: bool = False):
113118 temp = Query ()
114119 data = requests .get (corpus_db_url ())
115120 data_json = data .json ()
121+
116122 if name in list (data_json .keys ()):
117123 temp_name = data_json [name ]
118124 print ("Download:" , name )
0 commit comments