@@ -189,7 +189,7 @@ def _check_hash(dst: str, md5: str) -> None:
189189 raise Exception ("Hash does not match expected." )
190190
191191
192- def download (name : str , force : bool = False , url : str = None ) -> bool :
192+ def download (name : str , force : bool = False , url : str = None , version : str = None ) -> bool :
193193 """
194194 Download corpus.
195195
@@ -199,6 +199,7 @@ def download(name: str, force: bool = False, url: str = None) -> bool:
199199 :param str name: corpus name
200200 :param bool force: force download
201201 :param str url: URL of the corpus catalog
202+ :param str version: Version of the corpus
202203 :return: **True** if the corpus is found and succesfully downloaded.
203204 Otherwise, it returns **False**.
204205 :rtype: bool
@@ -233,39 +234,49 @@ def download(name: str, force: bool = False, url: str = None) -> bool:
233234 local_db = TinyDB (corpus_db_path ())
234235 query = Query ()
235236
236- corpus = corpus_db [name ]
237+ corpus = corpus_db [name . lower () ]
237238 print ("Corpus:" , name )
238- found = local_db .search (query .name == name )
239+ if version is None :
240+ version = corpus ['latest_version' ]
241+ corpus_versions = corpus ["versions" ][version ]
242+ file_name = corpus_versions ["filename" ]
243+ found = local_db .search ((query .name == name ) & (query .version == version ))
239244
240245 # If not found in local, download
241246 if force or not found :
242- print (f"- Downloading: { name } { corpus ['version' ]} " )
243- _download (corpus ["download" ], corpus ["file_name" ])
244- _check_hash (corpus ["file_name" ], corpus ["md5" ])
247+ print (f"- Downloading: { name } { version } " )
248+ _download (
249+ corpus_versions ["download_url" ],
250+ file_name ,
251+ )
252+ _check_hash (
253+ file_name ,
254+ corpus_versions ["md5" ],
255+ )
245256
246257 if found :
247258 local_db .update (
248- {"version" : corpus [ " version" ] }, query .name == name
259+ {"version" : version }, query .name == name
249260 )
250261 else :
251262 local_db .insert (
252263 {
253264 "name" : name ,
254- "version" : corpus [ " version" ] ,
255- "file_name" : corpus [ " file_name" ] ,
265+ "version" : version ,
266+ "file_name" : file_name ,
256267 }
257268 )
258269 else :
259270 if local_db .search (
260- query .name == name and query .version == corpus [ " version" ]
271+ query .name == name and query .version == version
261272 ):
262273 # Already has the same version
263274 print ("- Already up to date." )
264275 else :
265276 # Has the corpus but different version
266277 current_ver = local_db .search (query .name == name )[0 ]["version" ]
267278 print (f"- Existing version: { current_ver } " )
268- print (f"- New version available: { corpus [ ' version' ] } " )
279+ print (f"- New version available: { version } " )
269280 print ("- Use download(data_name, force=True) to update" )
270281
271282 local_db .close ()
0 commit comments