Skip to content

Commit cea25ab

Browse files
committed
define function to convert json obs to dict grouped by location and epiweek
1 parent 9bf3c82 commit cea25ab

File tree

1 file changed

+61
-36
lines changed

1 file changed

+61
-36
lines changed

src/acquisition/flusurv/flusurv.py

Lines changed: 61 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,7 @@
4040
from datetime import datetime
4141
import json
4242
import time
43+
from warnings import warn
4344

4445
# third party
4546
import requests
@@ -154,50 +155,74 @@ def mmwrid_to_epiweek(mmwrid):
154155
return epiweek_200340.add_weeks(mmwrid - mmwrid_200340).get_ew()
155156

156157

157-
def extract_from_object(data_in):
158+
def reformat_to_nested(data):
158159
"""
159-
Given a FluSurv data object, return hospitalization rates.
160+
Convert the default data object into a dictionary grouped by location and epiweek
160161
161-
The returned object is indexed first by epiweek, then by zero-indexed age
162-
group.
162+
Arg data is a list of dictionaries of the format
163+
[
164+
{'networkid': 1, 'catchmentid': 22, 'seasonid': 49, 'ageid': 0, 'sexid': 0, 'raceid': 1, 'rate': 4.3, 'weeklyrate': 1.7, 'mmwrid': 2493},
165+
{'networkid': 1, 'catchmentid': 22, 'seasonid': 49, 'ageid': 0, 'sexid': 0, 'raceid': 1, 'rate': 20.3, 'weeklyrate': 0.1, 'mmwrid': 2513},
166+
{'networkid': 1, 'catchmentid': 22, 'seasonid': 49, 'ageid': 0, 'sexid': 0, 'raceid': 1, 'rate': 20.6, 'weeklyrate': 0.1, 'mmwrid': 2516},
167+
...
168+
]
169+
170+
This object is stored as the value associated with the 'default_data' key in the
171+
GRASP API response object, as fetched with 'fetch_flusurv_object()'
172+
173+
Returns a dictionary of the format
174+
{
175+
<location>: {
176+
<epiweek>: {
177+
<ageid1>: <value>,
178+
...
179+
<raceid2>: <value>,
180+
...
181+
}
182+
...
183+
}
184+
...
185+
}
163186
"""
187+
# Sanity check the input. We expect to see some epiweeks
188+
if len(data["default_data"]) == 0:
189+
raise Exception("no data found")
164190

165191
# Create output object
166-
# First layer of keys is epiweeks. Second layer of keys is age groups
167-
# (by id, not age).
192+
# First layer of keys is locations. Second layer of keys is epiweeks.
193+
# Third layer of keys is groups (by id, not age in years, sex abbr, etc).
168194
#
169195
# If a top-level key doesn't already exist, create a new empty dict.
170-
# If a secondary key doesn't already exist, create a new dict. Default
171-
# value is None if not provided.
172-
data_out = defaultdict(lambda: defaultdict(lambda: None))
173-
174-
# iterate over all seasons and age groups
175-
for obj in data_in["busdata"]["dataseries"]:
176-
age_group = obj["age"]
177-
if age_group in (10, 11, 12):
178-
# TODO(https://github.com/cmu-delphi/delphi-epidata/issues/242):
179-
# capture as-of-yet undefined age groups 10, 11, and 12
180-
continue
181-
# iterate over weeks
182-
for mmwrid, _, _, rate in obj["data"]:
183-
epiweek = mmwrid_to_epiweek(mmwrid)
184-
prev_rate = data_out[epiweek][age_group]
185-
if prev_rate is None:
186-
# this is the first time to see a rate for this epiweek-age
187-
# group combo
188-
data_out[epiweek][age_group] = rate
189-
elif prev_rate != rate:
190-
# a different rate was already found for this epiweek-age
191-
# group combo
192-
format_args = (epiweek, age_group, prev_rate, rate)
193-
print("warning: %d %d %f != %f" % format_args)
194-
195-
# Sanity check the result. We expect to have seen some epiweeks
196+
# If a secondary key doesn't already exist, create a new empty dict.
197+
# If a tertiary key doesn't already exist, create a new key with a
198+
# default value of None if not provided.
199+
data_out = defaultdict(lambda: defaultdict(lambda: defaultdict(lambda: None)))
200+
201+
for obs in data["default_data"]:
202+
epiweek = mmwrid_to_epiweek(obs["mmwrid"])
203+
location = code_to_location[(obs["networkid"], obs["catchmentid"])]
204+
groupname = groupids_to_name((obs["ageid"], obs["sexid"], obs["raceid"]))
205+
206+
prev_rate = data_out[location][epiweek][groupname]
207+
if prev_rate is None:
208+
# this is the first time to see a rate for this location-epiweek-
209+
# group combo
210+
data_out[location][epiweek][groupname] = rate
211+
elif prev_rate != rate:
212+
# Skip and warn
213+
# a different rate was already found for this location-epiweek-
214+
# group combo
215+
warn((f"warning: Multiple rates seen for {location} {epiweek} "
216+
f"{groupname}, but previous value {prev_rate} does not "
217+
f"equal new value {rate}. Using the first value."))
218+
219+
# Sanity check the input. We expect to have populated our dictionary
196220
if len(data_out.keys()) == 0:
197-
raise Exception("no data found")
221+
raise Exception("no data loaded")
222+
223+
print(f"found data for {len(data_out.keys())} locations")
224+
print(f"found data for {len(data_out[location].keys())} epiweeks for {location}")
198225

199-
# print the result and return flu data
200-
print(f"found data for {len(data_out)} weeks")
201226
return data_out
202227

203228

@@ -216,7 +241,7 @@ def get_data(location_code):
216241

217242
# extract
218243
print("[extracting values...]")
219-
data_out = extract_from_object(data_in)
244+
data_out = reformat_to_nested(data_in)
220245

221246
# return
222247
print("[scraped successfully]")

0 commit comments

Comments
 (0)