7
7
8
8
import httpx
9
9
import pandas as pd
10
+ import stamina
10
11
from erddapy import ERDDAP
11
12
from erddapy .core .url import urlopen
12
13
28
29
_server = "https://gliders.ioos.us/erddap"
29
30
30
31
32
+ @stamina .retry (on = httpx .HTTPError , attempts = 3 )
33
+ def _call_erddapy (glider_grab : "GliderDataFetcher" ) -> pd .DataFrame :
34
+ """Temporary workaround until we move optional stamina to erddapy."""
35
+ return glider_grab .fetcher .to_pandas ()
36
+
37
+
31
38
@functools .lru_cache (maxsize = 128 )
32
- def _to_pandas_multiple (glider_grab : "GliderDataFetcher" ) -> pd .DataFrame :
39
+ def _to_pandas (
40
+ glider_grab : "GliderDataFetcher" ,
41
+ * ,
42
+ query : OptionalBool = True ,
43
+ ) -> pd .DataFrame :
33
44
"""Thin wrapper to cache results when multiple datasets are requested."""
34
45
df_all = {}
35
46
glider_grab_copy = copy (glider_grab )
36
- for dataset_id in glider_grab_copy .datasets ["Dataset ID" ]:
47
+ if query :
48
+ dataset_ids = glider_grab_copy .datasets ["Dataset ID" ]
49
+ else :
50
+ dataset_ids = glider_grab_copy .dataset_ids
51
+
52
+ for dataset_id in dataset_ids :
37
53
glider_grab_copy .fetcher .dataset_id = dataset_id
38
- glider_df = glider_grab_copy . fetcher . to_pandas ( )
54
+ glider_df = _call_erddapy ( glider_grab_copy )
39
55
dataset_url = glider_grab_copy .fetcher .get_download_url ().split ("?" )[0 ]
40
56
glider_df = standardise_df (glider_df , dataset_url )
41
57
df_all .update ({dataset_id : glider_df })
@@ -81,7 +97,7 @@ def __init__(
81
97
protocol = "tabledap" ,
82
98
)
83
99
self .fetcher .variables = server_vars [server ]
84
- self .fetcher . dataset_id : OptionalStr = None
100
+ self .dataset_ids : OptionalList = None
85
101
self .datasets : OptionalDF = None
86
102
87
103
def to_pandas (self : "GliderDataFetcher" ) -> pd .DataFrame :
@@ -90,21 +106,20 @@ def to_pandas(self: "GliderDataFetcher") -> pd.DataFrame:
90
106
:return: pandas a dataframe with datetime UTC as index,
91
107
multiple dataset_ids dataframes are stored in a dictionary
92
108
"""
93
- if self .fetcher .dataset_id :
94
- glider_df = self .fetcher .to_pandas ()
95
- elif not self .fetcher .dataset_id and self .datasets is not None :
96
- glider_df = _to_pandas_multiple (self )
97
- # We need to reset to avoid fetching a single dataset_id when
98
- # making multiple requests.
99
- self .fetcher .dataset_id = None
100
- return glider_df
109
+ if self .dataset_ids is not None :
110
+ query = False # Passing known dataset_ids
111
+ elif self .dataset_ids is None and self .datasets is not None :
112
+ query = True # Passing an ERDDAP query
101
113
else :
102
114
msg = "Must provide a dataset_id or query terms to download data."
103
115
raise ValueError (msg )
104
116
105
- # Standardize variable names for the single dataset_id.
106
- dataset_url = self .fetcher .get_download_url ().split ("?" )[0 ]
107
- return standardise_df (glider_df , dataset_url )
117
+ glider_df = _to_pandas (self , query = query )
118
+ # We need to reset to avoid fetching a single dataset_id when
119
+ # making multiple requests.
120
+ self .fetcher .dataset_id = None
121
+
122
+ return glider_df
108
123
109
124
def query ( # noqa: PLR0913
110
125
self : "GliderDataFetcher" ,
@@ -196,8 +211,10 @@ class DatasetList:
196
211
197
212
def __init__ (
198
213
self : "DatasetList" ,
214
+ * ,
199
215
server : OptionalStr = _server ,
200
216
search_for : OptionalStr = None ,
217
+ delayed : OptionalBool = False ,
201
218
) -> None :
202
219
"""Instantiate main class attributes.
203
220
@@ -216,6 +233,7 @@ def __init__(
216
233
protocol = "tabledap" ,
217
234
)
218
235
self .search_for = search_for
236
+ self .delayed = delayed
219
237
220
238
def get_ids (self : "DatasetList" ) -> list :
221
239
"""Return the allDatasets list for the glider server."""
@@ -229,5 +247,10 @@ def get_ids(self: "DatasetList") -> list:
229
247
self .e .dataset_id = "allDatasets"
230
248
dataset_ids = self .e .to_pandas ()["datasetID" ].to_list ()
231
249
dataset_ids .remove ("allDatasets" )
232
- self .dataset_ids = dataset_ids
250
+ if not self .delayed :
251
+ self .dataset_ids = [
252
+ dataset_id
253
+ for dataset_id in dataset_ids
254
+ if not dataset_id .endswith ("-delayed" )
255
+ ]
233
256
return self .dataset_ids
0 commit comments