Skip to content

Commit 9bf3c82

Browse files
committed
store extracted rates by strata name rather than position
Previously, age strata were numbered sequentially which allowed us to store rate values by position in a list. With the introduction of the new strata, this system is not robust enough to track all the different groups (e.g. ageids are no longer sequential and there are now race and sex groupings with separate numbering systems).
1 parent 9cb1da4 commit 9bf3c82

File tree

2 files changed

+21
-20
lines changed

2 files changed

+21
-20
lines changed

src/acquisition/flusurv/flusurv.py

Lines changed: 20 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,7 @@
3636
"""
3737

3838
# standard library
39+
from collections import defaultdict
3940
from datetime import datetime
4041
import json
4142
import time
@@ -161,38 +162,38 @@ def extract_from_object(data_in):
161162
group.
162163
"""
163164

164-
# an object to hold the result
165-
data_out = {}
165+
# Create output object
166+
# First layer of keys is epiweeks. Second layer of keys is age groups
167+
# (by id, not age).
168+
#
169+
# If a top-level key doesn't already exist, create a new empty dict.
170+
# If a secondary key doesn't already exist, create a new dict. Default
171+
# value is None if not provided.
172+
data_out = defaultdict(lambda: defaultdict(lambda: None))
166173

167174
# iterate over all seasons and age groups
168175
for obj in data_in["busdata"]["dataseries"]:
169-
if obj["age"] in (10, 11, 12):
176+
age_group = obj["age"]
177+
if age_group in (10, 11, 12):
170178
# TODO(https://github.com/cmu-delphi/delphi-epidata/issues/242):
171179
# capture as-of-yet undefined age groups 10, 11, and 12
172180
continue
173-
age_index = obj["age"] - 1
174181
# iterate over weeks
175182
for mmwrid, _, _, rate in obj["data"]:
176183
epiweek = mmwrid_to_epiweek(mmwrid)
177-
if epiweek not in data_out:
178-
# weekly rate of each age group
179-
# TODO what is this magic constant? Maybe total # of age
180-
# groups?? Appears to be assuming that age groups are
181-
# numbered sequentially. Better to store data_out in a
182-
# dictionary of dictionaries, given new age group ids
183-
# (e.g. 99, 21, etc)
184-
data_out[epiweek] = [None] * 9
185-
prev_rate = data_out[epiweek][age_index]
184+
prev_rate = data_out[epiweek][age_group]
186185
if prev_rate is None:
187-
# this is the first time to see a rate for this epiweek/age
188-
data_out[epiweek][age_index] = rate
186+
# this is the first time to see a rate for this epiweek-age
187+
# group combo
188+
data_out[epiweek][age_group] = rate
189189
elif prev_rate != rate:
190-
# a different rate was already found for this epiweek/age
191-
format_args = (epiweek, obj["age"], prev_rate, rate)
190+
# a different rate was already found for this epiweek-age
191+
# group combo
192+
format_args = (epiweek, age_group, prev_rate, rate)
192193
print("warning: %d %d %f != %f" % format_args)
193194

194-
# sanity check the result
195-
if len(data_out) == 0:
195+
# Sanity check the result. We expect to have seen some epiweeks
196+
if len(data_out.keys()) == 0:
196197
raise Exception("no data found")
197198

198199
# print the result and return flu data

src/acquisition/flusurv/flusurv_update.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -142,7 +142,7 @@ def update(issue, location, test_mode=False):
142142
continue
143143
args_meta = [release_date, issue, epiweek, location, lag]
144144
# List of values in order of columns specified in sql statement above
145-
args_insert = data[epiweek]
145+
args_insert = [week_rate_tuple[1] for week_rate_tuple in sorted(data[epiweek].items())]
146146
args_update = [release_date] + args_insert
147147
cur.execute(sql, tuple(args_meta + args_insert + args_update))
148148

0 commit comments

Comments
 (0)