Skip to content

Commit 26eff97

Browse files
committed
add new strata to sql insert statement by name, not order
1 parent 24dc088 commit 26eff97

File tree

1 file changed

+157
-19
lines changed

1 file changed

+157
-19
lines changed

src/acquisition/flusurv/flusurv_update.py

Lines changed: 157 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -112,39 +112,177 @@ def update(issue, location, test_mode=False):
112112
# SQL for insert/update
113113
sql = """
114114
INSERT INTO `flusurv` (
115-
`release_date`, `issue`, `epiweek`, `location`, `lag`, `rate_age_0`,
116-
`rate_age_1`, `rate_age_2`, `rate_age_3`, `rate_age_4`, `rate_overall`,
117-
`rate_age_5`, `rate_age_6`, `rate_age_7`
115+
`release_date`,
116+
`issue`,
117+
`epiweek`,
118+
`location`,
119+
`lag`,
120+
121+
`rate_overall`,
122+
123+
`rate_age_0`,
124+
`rate_age_1`,
125+
`rate_age_2`,
126+
`rate_age_3`,
127+
`rate_age_4`,
128+
`rate_age_5`,
129+
`rate_age_6`,
130+
`rate_age_7`,
131+
132+
`rate_age_18t29`,
133+
`rate_age_30t39`,
134+
`rate_age_40t49`,
135+
`rate_age_5t11`,
136+
`rate_age_12t17`,
137+
`rate_age_lt18`,
138+
`rate_age_gte18`,
139+
140+
`rate_race_white`,
141+
`rate_race_black`,
142+
`rate_race_hisp`,
143+
`rate_race_asian`,
144+
`rate_race_natamer`,
145+
146+
`rate_sex_male`,
147+
`rate_sex_female`
118148
)
119149
VALUES (
120-
%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s
150+
%(release_date)s,
151+
%(issue)s,
152+
%(epiweek)s,
153+
%(location)s,
154+
%(lag)s,
155+
156+
%(rate_overall)s,
157+
158+
%(rate_age_0)s,
159+
%(rate_age_1)s,
160+
%(rate_age_2)s,
161+
%(rate_age_3)s,
162+
%(rate_age_4)s,
163+
%(rate_age_5)s,
164+
%(rate_age_6)s,
165+
%(rate_age_7)s,
166+
167+
%(rate_age_18t29)s,
168+
%(rate_age_30t39)s,
169+
%(rate_age_40t49)s,
170+
%(rate_age_5t11)s,
171+
%(rate_age_12t17)s,
172+
%(rate_age_<18)s,
173+
%(rate_age_>=18)s,
174+
175+
%(rate_race_white)s,
176+
%(rate_race_black)s,
177+
%(rate_race_hispaniclatino)s,
178+
%(rate_race_asianpacificislander)s,
179+
%(rate_race_americanindianalaskanative)s,
180+
181+
%(rate_sex_male)s,
182+
%(rate_sex_female)s
121183
)
122184
ON DUPLICATE KEY UPDATE
123-
`release_date` = least(`release_date`, %s),
124-
`rate_age_0` = coalesce(%s, `rate_age_0`),
125-
`rate_age_1` = coalesce(%s, `rate_age_1`),
126-
`rate_age_2` = coalesce(%s, `rate_age_2`),
127-
`rate_age_3` = coalesce(%s, `rate_age_3`),
128-
`rate_age_4` = coalesce(%s, `rate_age_4`),
129-
`rate_overall` = coalesce(%s, `rate_overall`),
130-
`rate_age_5` = coalesce(%s, `rate_age_5`),
131-
`rate_age_6` = coalesce(%s, `rate_age_6`),
132-
`rate_age_7` = coalesce(%s, `rate_age_7`)
185+
`release_date` = least(`release_date`, %(release_date)s),
186+
`rate_overall` = coalesce(%(rate_overall)s, `rate_overall`),
187+
188+
`rate_age_0` = coalesce(%(rate_age_0)s, `rate_age_0`),
189+
`rate_age_1` = coalesce(%(rate_age_1)s, `rate_age_1`),
190+
`rate_age_2` = coalesce(%(rate_age_2)s, `rate_age_2`),
191+
`rate_age_3` = coalesce(%(rate_age_3)s, `rate_age_3`),
192+
`rate_age_4` = coalesce(%(rate_age_4)s, `rate_age_4`),
193+
`rate_age_5` = coalesce(%(rate_age_5)s, `rate_age_5`),
194+
`rate_age_6` = coalesce(%(rate_age_6)s, `rate_age_6`),
195+
`rate_age_7` = coalesce(%(rate_age_7)s, `rate_age_7`),
196+
197+
`rate_age_18t29` = coalesce(%(rate_age_18t29)s, `rate_age_18t29`),
198+
`rate_age_30t39` = coalesce(%(rate_age_30t39)s, `rate_age_30t39`),
199+
`rate_age_40t49` = coalesce(%(rate_age_40t49)s, `rate_age_40t49`),
200+
`rate_age_5t11` = coalesce(%(rate_age_5t11)s, `rate_age_5t11`),
201+
`rate_age_12t17` = coalesce(%(rate_age_12t17)s, `rate_age_12t17`),
202+
`rate_age_lt18` = coalesce(%(rate_age_<18)s, `rate_age_lt18`),
203+
`rate_age_gte18` = coalesce(%(rate_age_>=18)s, `rate_age_gte18`),
204+
205+
`rate_race_white` = coalesce(%(rate_race_white)s, `rate_race_white`),
206+
`rate_race_black` = coalesce(%(rate_race_black)s, `rate_race_black`),
207+
`rate_race_hisp` = coalesce(%(rate_race_hispaniclatino)s, `rate_race_hisp`),
208+
`rate_race_asian` = coalesce(%(rate_race_asianpacificislander)s, `rate_race_asian`),
209+
`rate_race_natamer` = coalesce(%(rate_race_americanindianalaskanative)s, `rate_race_natamer`),
210+
211+
`rate_sex_male` = coalesce(%(rate_sex_male)s, `rate_sex_male`),
212+
`rate_sex_female` = coalesce(%(rate_sex_female)s, `rate_sex_female`)
133213
"""
134214

135215
# insert/update each row of data (one per epiweek)
136216
for epiweek in epiweeks:
217+
# As of Sept 2023, we expect to see these 24 groups, as described in
218+
# the top-level "master_lookup" element of the new GRASP API
219+
# (https://gis.cdc.gov/GRASP/Flu3/PostPhase03DataTool) response
220+
# object:
221+
# 'master_lookup' = [
222+
# {'Variable': 'Age', 'valueid': 1, 'parentid': 97, 'Label': '0-4 yr', 'Color_HexValue': '#d19833', 'Enabled': True},
223+
# {'Variable': 'Age', 'valueid': 2, 'parentid': 97, 'Label': '5-17 yr', 'Color_HexValue': '#707070', 'Enabled': True},
224+
# {'Variable': 'Age', 'valueid': 3, 'parentid': 98, 'Label': '18-49 yr', 'Color_HexValue': '#44b3c6', 'Enabled': True},
225+
# {'Variable': 'Age', 'valueid': 4, 'parentid': 98, 'Label': '50-64 yr', 'Color_HexValue': '#516889', 'Enabled': True},
226+
# {'Variable': 'Age', 'valueid': 5, 'parentid': 98, 'Label': '65+ yr', 'Color_HexValue': '#cc5e56', 'Enabled': True},
227+
# {'Variable': 'Age', 'valueid': 7, 'parentid': 5, 'Label': '65-74 yr', 'Color_HexValue': '#cc5e56', 'Enabled': True},
228+
# {'Variable': 'Age', 'valueid': 8, 'parentid': 5, 'Label': '75-84 yr', 'Color_HexValue': '#cc5e56', 'Enabled': True},
229+
# {'Variable': 'Age', 'valueid': 9, 'parentid': 5, 'Label': '85+', 'Color_HexValue': '#cc5e56', 'Enabled': True},
230+
# {'Variable': 'Age', 'valueid': 10, 'parentid': 3, 'Label': '18-29 yr', 'Color_HexValue': '#44b3c6', 'Enabled': True},
231+
# {'Variable': 'Age', 'valueid': 11, 'parentid': 3, 'Label': '30-39 yr', 'Color_HexValue': '#44b3c6', 'Enabled': True},
232+
# {'Variable': 'Age', 'valueid': 12, 'parentid': 3, 'Label': '40-49 yr', 'Color_HexValue': '#44b3c6', 'Enabled': True},
233+
# {'Variable': 'Age', 'valueid': 21, 'parentid': 2, 'Label': '5-11 yr', 'Color_HexValue': '#707070', 'Enabled': True},
234+
# {'Variable': 'Age', 'valueid': 22, 'parentid': 2, 'Label': '12-17 yr', 'Color_HexValue': '#707070', 'Enabled': True}
235+
# {'Variable': 'Age', 'valueid': 97, 'parentid': 0, 'Label': '< 18', 'Color_HexValue': '#000000', 'Enabled': True},
236+
# {'Variable': 'Age', 'valueid': 98, 'parentid': 0, 'Label': '>= 18', 'Color_HexValue': '#000000', 'Enabled': True},
237+
#
238+
# {'Variable': 'Race', 'valueid': 1, 'parentid': None, 'Label': 'White', 'Color_HexValue': '#516889', 'Enabled': True},
239+
# {'Variable': 'Race', 'valueid': 2, 'parentid': None, 'Label': 'Black', 'Color_HexValue': '#44b3c6', 'Enabled': True},
240+
# {'Variable': 'Race', 'valueid': 3, 'parentid': None, 'Label': 'Hispanic/Latino', 'Color_HexValue': '#d19833', 'Enabled': True},
241+
# {'Variable': 'Race', 'valueid': 4, 'parentid': None, 'Label': 'Asian/Pacific Islander', 'Color_HexValue': '#cc5e56', 'Enabled': True},
242+
# {'Variable': 'Race', 'valueid': 5, 'parentid': None, 'Label': 'American Indian/Alaska Native', 'Color_HexValue': '#007d8e', 'Enabled': True},
243+
#
244+
# {'Variable': 'Sex', 'valueid': 1, 'parentid': None, 'Label': 'Male', 'Color_HexValue': '#44b3c6', 'Enabled': True},
245+
# {'Variable': 'Sex', 'valueid': 2, 'parentid': None, 'Label': 'Female', 'Color_HexValue': '#F2775F', 'Enabled': True},
246+
#
247+
# {'Variable': None, 'valueid': 0, 'parentid': 0, 'Label': 'Overall', 'Color_HexValue': '#000000', 'Enabled': True},
248+
# ]
249+
#
250+
# The previous version of the GRASP API
251+
# (https://gis.cdc.gov/GRASP/Flu3/GetPhase03InitApp)
252+
# used a different age group-id mapping, as described in the
253+
# top-level "ages" element:
254+
# 'ages' = [
255+
# {'label': '0-4 yr', 'ageid': 1, 'color_hexvalue': '#1B9E77'},
256+
# {'label': '5-17 yr', 'ageid': 2, 'color_hexvalue': '#D95F02'},
257+
# {'label': '18-49 yr', 'ageid': 3, 'color_hexvalue': '#4A298B'},
258+
# {'label': '50-64 yr', 'ageid': 4, 'color_hexvalue': '#E7298A'},
259+
# {'label': '65+ yr', 'ageid': 5, 'color_hexvalue': '#6AA61E'},
260+
# {'label': 'Overall', 'ageid': 6, 'color_hexvalue': '#000000'},
261+
# {'label': '65-74 yr', 'ageid': 7, 'color_hexvalue': '#A6CEE3'},
262+
# {'label': '75-84 yr', 'ageid': 8, 'color_hexvalue': '#CAB2D6'},
263+
# {'label': '85+', 'ageid': 9, 'color_hexvalue': '#1f78b4'}
264+
# ]
265+
#
266+
# In addition to the new age, race, and sex breakdowns, the
267+
# group id for overall reporting has changed from 6 to 0.
268+
n_max_expected_groups = 24
269+
assert len(epiweek.keys()) == n_max_expected_groups, \
270+
f"{location} {epiweek} data does not contain the expected {n_max_expected_groups} groups"
271+
137272
lag = delta_epiweeks(epiweek, issue)
138273
if lag > 52:
139274
# Ignore values older than one year, as (1) they are assumed not to
140275
# change, and (2) it would adversely affect database performance if all
141276
# values (including duplicates) were stored on each run.
142277
continue
143-
args_meta = [release_date, issue, epiweek, location, lag]
144-
# List of values in order of columns specified in sql statement above
145-
args_insert = [week_rate_tuple[1] for week_rate_tuple in sorted(data[epiweek].items())]
146-
args_update = [release_date] + args_insert
147-
cur.execute(sql, tuple(args_meta + args_insert + args_update))
278+
args_meta = {
279+
"release_date": release_date,
280+
"issue": issue,
281+
"epiweek": epiweek,
282+
"location": location,
283+
"lag": lag
284+
}
285+
cur.execute(sql, {**args_meta, **data[epiweek]})
148286

149287
# commit and disconnect
150288
rows2 = get_rows(cur)

0 commit comments

Comments
 (0)