|
36 | 36 | """ |
37 | 37 |
|
38 | 38 | # standard library |
| 39 | +from collections import defaultdict |
39 | 40 | from datetime import datetime |
40 | 41 | import json |
41 | 42 | import time |
@@ -161,38 +162,38 @@ def extract_from_object(data_in): |
161 | 162 | group. |
162 | 163 | """ |
163 | 164 |
|
164 | | - # an object to hold the result |
165 | | - data_out = {} |
| 165 | + # Create output object |
| 166 | + # First layer of keys is epiweeks. Second layer of keys is age groups |
| 167 | + # (by id, not age). |
| 168 | + # |
| 169 | + # If a top-level key doesn't already exist, create a new empty dict. |
| 170 | + # If a secondary key doesn't already exist, create a new dict. Default |
| 171 | + # value is None if not provided. |
| 172 | + data_out = defaultdict(lambda: defaultdict(lambda: None)) |
166 | 173 |
|
167 | 174 | # iterate over all seasons and age groups |
168 | 175 | for obj in data_in["busdata"]["dataseries"]: |
169 | | - if obj["age"] in (10, 11, 12): |
| 176 | + age_group = obj["age"] |
| 177 | + if age_group in (10, 11, 12): |
170 | 178 | # TODO(https://github.com/cmu-delphi/delphi-epidata/issues/242): |
171 | 179 | # capture as-of-yet undefined age groups 10, 11, and 12 |
172 | 180 | continue |
173 | | - age_index = obj["age"] - 1 |
174 | 181 | # iterate over weeks |
175 | 182 | for mmwrid, _, _, rate in obj["data"]: |
176 | 183 | epiweek = mmwrid_to_epiweek(mmwrid) |
177 | | - if epiweek not in data_out: |
178 | | - # weekly rate of each age group |
179 | | - # TODO what is this magic constant? Maybe total # of age |
180 | | - # groups?? Appears to be assuming that age groups are |
181 | | - # numbered sequentially. Better to store data_out in a |
182 | | - # dictionary of dictionaries, given new age group ids |
183 | | - # (e.g. 99, 21, etc) |
184 | | - data_out[epiweek] = [None] * 9 |
185 | | - prev_rate = data_out[epiweek][age_index] |
| 184 | + prev_rate = data_out[epiweek][age_group] |
186 | 185 | if prev_rate is None: |
187 | | - # this is the first time to see a rate for this epiweek/age |
188 | | - data_out[epiweek][age_index] = rate |
| 186 | + # this is the first time to see a rate for this epiweek-age |
| 187 | + # group combo |
| 188 | + data_out[epiweek][age_group] = rate |
189 | 189 | elif prev_rate != rate: |
190 | | - # a different rate was already found for this epiweek/age |
191 | | - format_args = (epiweek, obj["age"], prev_rate, rate) |
| 190 | + # a different rate was already found for this epiweek-age |
| 191 | + # group combo |
| 192 | + format_args = (epiweek, age_group, prev_rate, rate) |
192 | 193 | print("warning: %d %d %f != %f" % format_args) |
193 | 194 |
|
194 | | - # sanity check the result |
195 | | - if len(data_out) == 0: |
| 195 | + # Sanity check the result. We expect to have seen some epiweeks |
| 196 | + if len(data_out.keys()) == 0: |
196 | 197 | raise Exception("no data found") |
197 | 198 |
|
198 | 199 | # print the result and return flu data |
|
0 commit comments