@@ -708,22 +708,47 @@ def download_files(self, files, savedir=None, cache=True,
708708 if savedir is None :
709709 savedir = self .cache_location
710710 for fileLink in unique (files ):
711+ log .debug ("Downloading {0} to {1}" .format (fileLink , savedir ))
711712 try :
712- log .debug ("Downloading {0} to {1}" .format (fileLink , savedir ))
713713 check_filename = self ._request ('HEAD' , fileLink , auth = auth ,
714714 stream = True )
715715 check_filename .raise_for_status ()
716- if 'text/html' in check_filename .headers ['Content-Type' ]:
717- raise ValueError ("Bad query. This can happen if you "
718- "attempt to download proprietary "
719- "data when not logged in" )
720-
721- filename = self ._request ("GET" , fileLink , save = True ,
722- savedir = savedir ,
723- timeout = self .TIMEOUT ,
724- cache = cache ,
725- auth = auth ,
726- continuation = continuation )
716+ except requests .HTTPError as ex :
717+ if ex .response .status_code == 401 :
718+ if skip_unauthorized :
719+ log .info ("Access denied to {url}. Skipping to"
720+ " next file" .format (url = fileLink ))
721+ continue
722+ else :
723+ raise (ex )
724+
725+ if 'text/html' in check_filename .headers ['Content-Type' ]:
726+ raise ValueError ("Bad query. This can happen if you "
727+ "attempt to download proprietary "
728+ "data when not logged in" )
729+
730+ try :
731+ filename = re .search ("filename=(.*)" ,
732+ check_filename .headers ['Content-Disposition' ]).groups ()[0 ]
733+ except KeyError :
734+ log .info (f"Unable to find filename for { fileLink } "
735+ "(missing Content-Disposition in header). "
736+ "Skipping to next file." )
737+
738+ if savedir is not None :
739+ filename = os .path .join (savedir ,
740+ filename )
741+
742+ try :
743+ self ._download_file (fileLink ,
744+ filename ,
745+ timeout = self .TIMEOUT ,
746+ auth = auth ,
747+ cache = cache ,
748+ method = 'GET' ,
749+ head_safe = True ,
750+ continuation = continuation )
751+
727752 downloaded_files .append (filename )
728753 except requests .HTTPError as ex :
729754 if ex .response .status_code == 401 :
@@ -744,12 +769,15 @@ def download_files(self, files, savedir=None, cache=True,
744769 raise ex
745770 elif ex .response .status_code == 500 :
746771 # empirically, this works the second time most of the time...
747- filename = self ._request ("GET" , fileLink , save = True ,
748- savedir = savedir ,
749- timeout = self .TIMEOUT ,
750- cache = cache ,
751- auth = auth ,
752- continuation = continuation )
772+ self ._download_file (fileLink ,
773+ filename ,
774+ timeout = self .TIMEOUT ,
775+ auth = auth ,
776+ cache = cache ,
777+ method = 'GET' ,
778+ head_safe = True ,
779+ continuation = continuation )
780+
753781 downloaded_files .append (filename )
754782 else :
755783 raise ex
0 commit comments