Skip to content

Commit 62a2a44

Browse files
authored
Merge pull request #146 from ocefpaf/summary
Add a summary method
2 parents b2151f9 + 247b5ff commit 62a2a44

File tree

9 files changed

+57359
-27
lines changed

9 files changed

+57359
-27
lines changed

gliderpy/__init__.py

Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,8 @@
11
"""Easier access to glider data."""
22

3+
import pandas as pd
4+
from pandas_flavor import register_dataframe_method
5+
36
try:
47
from ._version import __version__
58
except ImportError:
@@ -15,3 +18,36 @@
1518
"plot_transect",
1619
"plot_ts",
1720
]
21+
22+
23+
def _num_profiles(df: pd.DataFrame) -> int:
24+
"""Compute the number of unique glider pofiles."""
25+
return len(df[["latitude", "longitude"]].value_counts())
26+
27+
28+
def _days(df: pd.DataFrame) -> pd.Timedelta:
29+
"""Compute the glider days."""
30+
return df.index.dropna()[-1].ceil("D") - df.index.dropna()[0].floor("D")
31+
32+
33+
def _deployment_lat(df: pd.DataFrame) -> dict:
34+
"""Return the glider deployment latitude."""
35+
return df["latitude"].to_list()[0]
36+
37+
38+
def _deployment_lon(df: pd.DataFrame) -> dict:
39+
"""Return the glider deployment longitude."""
40+
return df["longitude"].to_list()[0]
41+
42+
43+
@register_dataframe_method
44+
def summary(df: pd.DataFrame) -> pd.DataFrame:
45+
"""Return the summary for a set of gliders."""
46+
summ = {
47+
"num_profiles": _num_profiles(df),
48+
"days": _days(df),
49+
"deployment_lat": _deployment_lat(df),
50+
"deployment_lon": _deployment_lon(df),
51+
}
52+
53+
return pd.Series(summ)

gliderpy/fetchers.py

Lines changed: 39 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77

88
import httpx
99
import pandas as pd
10+
import stamina
1011
from erddapy import ERDDAP
1112
from erddapy.core.url import urlopen
1213

@@ -28,14 +29,29 @@
2829
_server = "https://gliders.ioos.us/erddap"
2930

3031

32+
@stamina.retry(on=httpx.HTTPError, attempts=3)
33+
def _call_erddapy(glider_grab: "GliderDataFetcher") -> pd.DataFrame:
34+
"""Temporary workaround until we move optional stamina to erddapy."""
35+
return glider_grab.fetcher.to_pandas()
36+
37+
3138
@functools.lru_cache(maxsize=128)
32-
def _to_pandas_multiple(glider_grab: "GliderDataFetcher") -> pd.DataFrame:
39+
def _to_pandas(
40+
glider_grab: "GliderDataFetcher",
41+
*,
42+
query: OptionalBool = True,
43+
) -> pd.DataFrame:
3344
"""Thin wrapper to cache results when multiple datasets are requested."""
3445
df_all = {}
3546
glider_grab_copy = copy(glider_grab)
36-
for dataset_id in glider_grab_copy.datasets["Dataset ID"]:
47+
if query:
48+
dataset_ids = glider_grab_copy.datasets["Dataset ID"]
49+
else:
50+
dataset_ids = glider_grab_copy.dataset_ids
51+
52+
for dataset_id in dataset_ids:
3753
glider_grab_copy.fetcher.dataset_id = dataset_id
38-
glider_df = glider_grab_copy.fetcher.to_pandas()
54+
glider_df = _call_erddapy(glider_grab_copy)
3955
dataset_url = glider_grab_copy.fetcher.get_download_url().split("?")[0]
4056
glider_df = standardise_df(glider_df, dataset_url)
4157
df_all.update({dataset_id: glider_df})
@@ -81,7 +97,7 @@ def __init__(
8197
protocol="tabledap",
8298
)
8399
self.fetcher.variables = server_vars[server]
84-
self.fetcher.dataset_id: OptionalStr = None
100+
self.dataset_ids: OptionalList = None
85101
self.datasets: OptionalDF = None
86102

87103
def to_pandas(self: "GliderDataFetcher") -> pd.DataFrame:
@@ -90,21 +106,20 @@ def to_pandas(self: "GliderDataFetcher") -> pd.DataFrame:
90106
:return: pandas a dataframe with datetime UTC as index,
91107
multiple dataset_ids dataframes are stored in a dictionary
92108
"""
93-
if self.fetcher.dataset_id:
94-
glider_df = self.fetcher.to_pandas()
95-
elif not self.fetcher.dataset_id and self.datasets is not None:
96-
glider_df = _to_pandas_multiple(self)
97-
# We need to reset to avoid fetching a single dataset_id when
98-
# making multiple requests.
99-
self.fetcher.dataset_id = None
100-
return glider_df
109+
if self.dataset_ids is not None:
110+
query = False # Passing known dataset_ids
111+
elif self.dataset_ids is None and self.datasets is not None:
112+
query = True # Passing an ERDDAP query
101113
else:
102114
msg = "Must provide a dataset_id or query terms to download data."
103115
raise ValueError(msg)
104116

105-
# Standardize variable names for the single dataset_id.
106-
dataset_url = self.fetcher.get_download_url().split("?")[0]
107-
return standardise_df(glider_df, dataset_url)
117+
glider_df = _to_pandas(self, query=query)
118+
# We need to reset to avoid fetching a single dataset_id when
119+
# making multiple requests.
120+
self.fetcher.dataset_id = None
121+
122+
return glider_df
108123

109124
def query( # noqa: PLR0913
110125
self: "GliderDataFetcher",
@@ -196,8 +211,10 @@ class DatasetList:
196211

197212
def __init__(
198213
self: "DatasetList",
214+
*,
199215
server: OptionalStr = _server,
200216
search_for: OptionalStr = None,
217+
delayed: OptionalBool = False,
201218
) -> None:
202219
"""Instantiate main class attributes.
203220
@@ -216,6 +233,7 @@ def __init__(
216233
protocol="tabledap",
217234
)
218235
self.search_for = search_for
236+
self.delayed = delayed
219237

220238
def get_ids(self: "DatasetList") -> list:
221239
"""Return the allDatasets list for the glider server."""
@@ -229,5 +247,10 @@ def get_ids(self: "DatasetList") -> list:
229247
self.e.dataset_id = "allDatasets"
230248
dataset_ids = self.e.to_pandas()["datasetID"].to_list()
231249
dataset_ids.remove("allDatasets")
232-
self.dataset_ids = dataset_ids
250+
if not self.delayed:
251+
self.dataset_ids = [
252+
dataset_id
253+
for dataset_id in dataset_ids
254+
if not dataset_id.endswith("-delayed")
255+
]
233256
return self.dataset_ids

notebooks/00-quick_intro.ipynb

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -71,8 +71,9 @@
7171
"\n",
7272
"glider_grab = GliderDataFetcher()\n",
7373
"\n",
74-
"glider_grab.fetcher.dataset_id = \"whoi_406-20160902T1700\"\n",
75-
"df = glider_grab.to_pandas()\n",
74+
"glider_grab.dataset_ids = [\"whoi_406-20160902T1700\"]\n",
75+
"dfs = glider_grab.to_pandas()\n",
76+
"df = dfs[\"whoi_406-20160902T1700\"]\n",
7677
"df.head()"
7778
]
7879
},
@@ -174,7 +175,7 @@
174175
"name": "python",
175176
"nbconvert_exporter": "python",
176177
"pygments_lexer": "ipython3",
177-
"version": "3.10.13"
178+
"version": "3.13.3"
178179
}
179180
},
180181
"nbformat": 4,

notebooks/01-plotting_intro.ipynb

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -20,8 +20,8 @@
2020
"\n",
2121
"glider_grab = GliderDataFetcher()\n",
2222
"\n",
23-
"glider_grab.fetcher.dataset_id = \"whoi_406-20160902T1700\"\n",
24-
"df = glider_grab.to_pandas()"
23+
"glider_grab.dataset_ids = [\"whoi_406-20160902T1700\"]\n",
24+
"dfs = glider_grab.to_pandas()"
2525
]
2626
},
2727
{
@@ -39,6 +39,8 @@
3939
"metadata": {},
4040
"outputs": [],
4141
"source": [
42+
"df = dfs[\"whoi_406-20160902T1700\"]\n",
43+
"\n",
4244
"fig, ax = df.plot_track()"
4345
]
4446
},
@@ -141,7 +143,7 @@
141143
"name": "python",
142144
"nbconvert_exporter": "python",
143145
"pygments_lexer": "ipython3",
144-
"version": "3.12.4"
146+
"version": "3.13.3"
145147
}
146148
},
147149
"nbformat": 4,

pyproject.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,7 @@ dynamic = [
2828
"version",
2929
]
3030

31-
dependencies = [ "erddapy", "httpx", "pandas", "pandas-flavor", "xarray" ]
31+
dependencies = [ "erddapy", "httpx", "pandas", "pandas-flavor", "stamina", "xarray" ]
3232

3333
optional-dependencies.docs = [ "jupyter", "nbconvert", "nbsphinx", "palettable", "sphinx" ]
3434
optional-dependencies.plotting = [ "cartopy", "gsw", "matplotlib" ]

0 commit comments

Comments
 (0)