Skip to content

Commit 24dc088

Browse files
committed
auto-map from valueids to ordinal and label-based group names
1 parent cea25ab commit 24dc088

File tree

1 file changed

+87
-15
lines changed

1 file changed

+87
-15
lines changed

src/acquisition/flusurv/flusurv.py

Lines changed: 87 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -159,16 +159,8 @@ def reformat_to_nested(data):
159159
"""
160160
Convert the default data object into a dictionary grouped by location and epiweek
161161
162-
Arg data is a list of dictionaries of the format
163-
[
164-
{'networkid': 1, 'catchmentid': 22, 'seasonid': 49, 'ageid': 0, 'sexid': 0, 'raceid': 1, 'rate': 4.3, 'weeklyrate': 1.7, 'mmwrid': 2493},
165-
{'networkid': 1, 'catchmentid': 22, 'seasonid': 49, 'ageid': 0, 'sexid': 0, 'raceid': 1, 'rate': 20.3, 'weeklyrate': 0.1, 'mmwrid': 2513},
166-
{'networkid': 1, 'catchmentid': 22, 'seasonid': 49, 'ageid': 0, 'sexid': 0, 'raceid': 1, 'rate': 20.6, 'weeklyrate': 0.1, 'mmwrid': 2516},
167-
...
168-
]
169-
170-
This object is stored as the value associated with the 'default_data' key in the
171-
GRASP API response object, as fetched with 'fetch_flusurv_object()'
162+
Args:
163+
A GRASP API response object, as fetched with 'fetch_flusurv_object()'
172164
173165
Returns a dictionary of the format
174166
{
@@ -188,6 +180,8 @@ def reformat_to_nested(data):
188180
if len(data["default_data"]) == 0:
189181
raise Exception("no data found")
190182

183+
id_label_map = make_id_label_map(data)
184+
191185
# Create output object
192186
# First layer of keys is locations. Second layer of keys is epiweeks.
193187
# Third layer of keys is groups (by id, not age in years, sex abbr, etc).
@@ -198,20 +192,30 @@ def reformat_to_nested(data):
198192
# default value of None if not provided.
199193
data_out = defaultdict(lambda: defaultdict(lambda: defaultdict(lambda: None)))
200194

195+
# data["default_data"] is a list of dictionaries, with the format
196+
# [
197+
# {'networkid': 1, 'catchmentid': 22, 'seasonid': 49, 'ageid': 0, 'sexid': 0, 'raceid': 1, 'rate': 4.3, 'weeklyrate': 1.7, 'mmwrid': 2493},
198+
# {'networkid': 1, 'catchmentid': 22, 'seasonid': 49, 'ageid': 0, 'sexid': 0, 'raceid': 1, 'rate': 20.3, 'weeklyrate': 0.1, 'mmwrid': 2513},
199+
# {'networkid': 1, 'catchmentid': 22, 'seasonid': 49, 'ageid': 0, 'sexid': 0, 'raceid': 1, 'rate': 20.6, 'weeklyrate': 0.1, 'mmwrid': 2516},
200+
# ...
201+
# ]
201202
for obs in data["default_data"]:
202203
epiweek = mmwrid_to_epiweek(obs["mmwrid"])
203204
location = code_to_location[(obs["networkid"], obs["catchmentid"])]
204-
groupname = groupids_to_name((obs["ageid"], obs["sexid"], obs["raceid"]))
205+
groupname = groupids_to_name(
206+
ageid = obs["ageid"], sexid = obs["sexid"], raceid = obs["raceid"],
207+
id_label_map = id_label_map
208+
)
205209

210+
rate = obs["weeklyrate"]
206211
prev_rate = data_out[location][epiweek][groupname]
207212
if prev_rate is None:
208-
# this is the first time to see a rate for this location-epiweek-
213+
# This is the first time to see a rate for this location-epiweek-
209214
# group combo
210215
data_out[location][epiweek][groupname] = rate
211216
elif prev_rate != rate:
212-
# Skip and warn
213-
# a different rate was already found for this location-epiweek-
214-
# group combo
217+
# Skip and warn; a different rate was already found for this
218+
# location-epiweek-group combo
215219
warn((f"warning: Multiple rates seen for {location} {epiweek} "
216220
f"{groupname}, but previous value {prev_rate} does not "
217221
f"equal new value {rate}. Using the first value."))
@@ -221,6 +225,8 @@ def reformat_to_nested(data):
221225
raise Exception("no data loaded")
222226

223227
print(f"found data for {len(data_out.keys())} locations")
228+
# Just check one location to avoid iterating through the entire
229+
# dictionary.
224230
print(f"found data for {len(data_out[location].keys())} epiweeks for {location}")
225231

226232
return data_out
@@ -259,3 +265,69 @@ def get_current_issue(data):
259265

260266
# convert and return
261267
return EpiDate(date.year, date.month, date.day).get_ew()
268+
269+
270+
def make_id_label_map(data):
271+
"""Create a map from valueid to group description"""
272+
id_to_label = defaultdict(lambda: defaultdict(lambda: None))
273+
for group in data["master_lookup"]:
274+
# Skip "overall" group
275+
if group["Variable"] is None:
276+
continue
277+
id_to_label[group["Variable"]][group["valueid"]] = group["Label"].replace(
278+
" ", ""
279+
).replace(
280+
"/", ""
281+
).replace(
282+
"-", "t"
283+
).replace(
284+
"yr", ""
285+
).lower()
286+
287+
return id_to_label
288+
289+
290+
def groupids_to_name(ageid, sexid, raceid, id_label_map):
291+
# Expect at least 2 of three ids to be 0
292+
assert (ageid, sexid, raceid).count(0) >= 2, \
293+
"At most one groupid can be non-zero"
294+
if (ageid, sexid, raceid).count(0) == 3:
295+
group = "overall"
296+
elif ageid != 0:
297+
# The column names used in the DB for the original age groups
298+
# are ordinal, such that:
299+
# "rate_age_0" corresponds to age group 1, 0-4 yr
300+
# "rate_age_1" corresponds to age group 2, 5-17 yr
301+
# "rate_age_2" corresponds to age group 3, 18-49 yr
302+
# "rate_age_3" corresponds to age group 4, 50-64 yr
303+
# "rate_age_4" corresponds to age group 5, 65+ yr
304+
# "rate_age_5" corresponds to age group 7, 65-74 yr
305+
# "rate_age_6" corresponds to age group 8, 75-84 yr
306+
# "rate_age_7" corresponds to age group 9, 85+ yr
307+
#
308+
# Group 6 was the "overall" category and not included in the
309+
# ordinal naming scheme. Because of that, groups 1-5 have column
310+
# ids equal to the ageid - 1; groups 7-9 have column ids equal
311+
# to ageid - 2.
312+
#
313+
# Automatically map from ageids 1-9 to column ids to match
314+
# the historical convention.
315+
if ageid <= 5:
316+
age_group = str(ageid - 1)
317+
elif ageid == 6:
318+
# Ageid of 6 used to be used for the "overall" category.
319+
# Now "overall" is represented by a valueid of 0, and ageid of 6
320+
# is not used for any group. If we see an ageid of 6, something
321+
# has gone wrong.
322+
raise ValueError("Ageid cannot be 6; please check for changes in the API")
323+
elif ageid <= 9:
324+
age_group = str(ageid - 2)
325+
else:
326+
age_group = id_label_map["Age"][ageid]
327+
group = "age_" + age_group
328+
elif sexid != 0:
329+
group = "sex_" + id_label_map["Sex"][sexid]
330+
elif raceid != 0:
331+
group = "race_" + id_label_map["Race"][raceid]
332+
333+
return "rate_" + group

0 commit comments

Comments
 (0)