4040from datetime import datetime
4141import json
4242import time
43+ from warnings import warn
4344
4445# third party
4546import requests
@@ -154,50 +155,74 @@ def mmwrid_to_epiweek(mmwrid):
154155 return epiweek_200340 .add_weeks (mmwrid - mmwrid_200340 ).get_ew ()
155156
156157
157- def extract_from_object ( data_in ):
158+ def reformat_to_nested ( data ):
158159 """
159- Given a FluSurv data object, return hospitalization rates.
160+ Convert the default data object into a dictionary grouped by location and epiweek
160161
161- The returned object is indexed first by epiweek, then by zero-indexed age
162- group.
162+ Arg data is a list of dictionaries of the format
163+ [
164+ {'networkid': 1, 'catchmentid': 22, 'seasonid': 49, 'ageid': 0, 'sexid': 0, 'raceid': 1, 'rate': 4.3, 'weeklyrate': 1.7, 'mmwrid': 2493},
165+ {'networkid': 1, 'catchmentid': 22, 'seasonid': 49, 'ageid': 0, 'sexid': 0, 'raceid': 1, 'rate': 20.3, 'weeklyrate': 0.1, 'mmwrid': 2513},
166+ {'networkid': 1, 'catchmentid': 22, 'seasonid': 49, 'ageid': 0, 'sexid': 0, 'raceid': 1, 'rate': 20.6, 'weeklyrate': 0.1, 'mmwrid': 2516},
167+ ...
168+ ]
169+
170+ This object is stored as the value associated with the 'default_data' key in the
171+ GRASP API response object, as fetched with 'fetch_flusurv_object()'
172+
173+ Returns a dictionary of the format
174+ {
175+ <location>: {
176+ <epiweek>: {
177+ <ageid1>: <value>,
178+ ...
179+ <raceid2>: <value>,
180+ ...
181+ }
182+ ...
183+ }
184+ ...
185+ }
163186 """
187+ # Sanity check the input. We expect to see some epiweeks
188+ if len (data ["default_data" ]) == 0 :
189+ raise Exception ("no data found" )
164190
165191 # Create output object
166- # First layer of keys is epiweeks . Second layer of keys is age groups
167- # (by id, not age).
192+ # First layer of keys is locations . Second layer of keys is epiweeks.
193+ # Third layer of keys is groups (by id, not age in years, sex abbr, etc ).
168194 #
169195 # If a top-level key doesn't already exist, create a new empty dict.
170- # If a secondary key doesn't already exist, create a new dict. Default
171- # value is None if not provided.
172- data_out = defaultdict (lambda : defaultdict (lambda : None ))
173-
174- # iterate over all seasons and age groups
175- for obj in data_in ["busdata" ]["dataseries" ]:
176- age_group = obj ["age" ]
177- if age_group in (10 , 11 , 12 ):
178- # TODO(https://github.com/cmu-delphi/delphi-epidata/issues/242):
179- # capture as-of-yet undefined age groups 10, 11, and 12
180- continue
181- # iterate over weeks
182- for mmwrid , _ , _ , rate in obj ["data" ]:
183- epiweek = mmwrid_to_epiweek (mmwrid )
184- prev_rate = data_out [epiweek ][age_group ]
185- if prev_rate is None :
186- # this is the first time to see a rate for this epiweek-age
187- # group combo
188- data_out [epiweek ][age_group ] = rate
189- elif prev_rate != rate :
190- # a different rate was already found for this epiweek-age
191- # group combo
192- format_args = (epiweek , age_group , prev_rate , rate )
193- print ("warning: %d %d %f != %f" % format_args )
194-
195- # Sanity check the result. We expect to have seen some epiweeks
196+ # If a secondary key doesn't already exist, create a new empty dict.
197+ # If a tertiary key doesn't already exist, create a new key with a
198+ # default value of None if not provided.
199+ data_out = defaultdict (lambda : defaultdict (lambda : defaultdict (lambda : None )))
200+
201+ for obs in data ["default_data" ]:
202+ epiweek = mmwrid_to_epiweek (obs ["mmwrid" ])
203+ location = code_to_location [(obs ["networkid" ], obs ["catchmentid" ])]
204+ groupname = groupids_to_name ((obs ["ageid" ], obs ["sexid" ], obs ["raceid" ]))
205+
206+ prev_rate = data_out [location ][epiweek ][groupname ]
207+ if prev_rate is None :
208+ # this is the first time to see a rate for this location-epiweek-
209+ # group combo
210+ data_out [location ][epiweek ][groupname ] = rate
211+ elif prev_rate != rate :
212+ # Skip and warn
213+ # a different rate was already found for this location-epiweek-
214+ # group combo
215+ warn ((f"warning: Multiple rates seen for { location } { epiweek } "
216+ f"{ groupname } , but previous value { prev_rate } does not "
217+ f"equal new value { rate } . Using the first value." ))
218+
219+ # Sanity check the input. We expect to have populated our dictionary
196220 if len (data_out .keys ()) == 0 :
197- raise Exception ("no data found" )
221+ raise Exception ("no data loaded" )
222+
223+ print (f"found data for { len (data_out .keys ())} locations" )
224+ print (f"found data for { len (data_out [location ].keys ())} epiweeks for { location } " )
198225
199- # print the result and return flu data
200- print (f"found data for { len (data_out )} weeks" )
201226 return data_out
202227
203228
@@ -216,7 +241,7 @@ def get_data(location_code):
216241
217242 # extract
218243 print ("[extracting values...]" )
219- data_out = extract_from_object (data_in )
244+ data_out = reformat_to_nested (data_in )
220245
221246 # return
222247 print ("[scraped successfully]" )
0 commit comments