Skip to content

Commit fc7fa58

Browse files
authored
Merge pull request #1407 from cmu-delphi/krivard/fix-hhs-prop
Refactor pop prop calculations to fix #1399
2 parents 267aa73 + 356d6f3 commit fc7fa58

File tree

2 files changed

+57
-42
lines changed

2 files changed

+57
-42
lines changed

hhs_hosp/delphi_hhs/run.py

Lines changed: 22 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -109,19 +109,8 @@ def run_module(params):
109109
geo_res = geo,
110110
sensor = sensor,
111111
smoother = smoother)
112-
df = geo_mapper.add_geocode(make_signal(all_columns, sensor),
113-
"state_id",
114-
"state_code",
115-
from_col="state")
116-
if sensor.endswith("_prop"):
117-
df=pop_proportion(df, geo_mapper)
118-
df = make_geo(df, geo, geo_mapper)
119-
df = smooth_values(df, smoother[0])
120-
# Fix N/A MA values, see issue #1360
121-
if geo == "state" and sensor.startswith(CONFIRMED_FLU):
122-
ma_filter = df.val.isna() & (df.geo_id == "ma") & (df.timestamp > "08-01-2021") & \
123-
(df.timestamp.dt.day_name() == "Tuesday")
124-
df = df[~ma_filter]
112+
df = make_signal(all_columns, sensor)
113+
df = transform_signal(sensor, smoother, geo, df, geo_mapper)
125114
if df.empty:
126115
continue
127116
sensor_name = sensor + smoother[1]
@@ -155,17 +144,32 @@ def smooth_values(df, smoother):
155144
)
156145
return df
157146

158-
def pop_proportion(df,geo_mapper):
159-
"""Get the population-proportionate variants as the dataframe val."""
160-
pop_val=geo_mapper.add_population_column(df, "state_code")
161-
df["val"]=round(df["val"]/pop_val["population"]*100000, 7)
162-
pop_val.drop("population", axis=1, inplace=True)
147+
def transform_signal(sensor, smoother, geo, df, geo_mapper):
148+
"""Transform base df into specified geo/smoothing/prop configuration."""
149+
df = geo_mapper.add_geocode(df, "state_id", "state_code", from_col="state")
150+
# handling population:
151+
# add population column
152+
# sum admission counts *and* population counts during make_geo
153+
# *then* divide counts by population to get the proportion
154+
if sensor.endswith("_prop"):
155+
df=geo_mapper.add_population_column(df, "state_code")
156+
df = make_geo(df, geo, geo_mapper)
157+
if sensor.endswith("_prop"):
158+
df["val"]=round(df["val"]/df["population"]*100000, 7)
159+
df.drop("population", axis=1, inplace=True)
160+
df = smooth_values(df, smoother[0])
161+
# Fix N/A MA values, see issue #1360
162+
if geo == "state" and sensor.startswith(CONFIRMED_FLU):
163+
ma_filter = df.val.isna() & (df.geo_id == "ma") & (df.timestamp > "08-01-2021") & \
164+
(df.timestamp.dt.day_name() == "Tuesday")
165+
df = df[~ma_filter]
163166
return df
164167

165168
def make_geo(state, geo, geo_mapper):
166169
"""Transform incoming geo (state) to another geo."""
167170
if geo == "state":
168171
exported = state.rename(columns={"state": "geo_id"})
172+
exported = exported.drop(columns="state_code")
169173
else:
170174
exported = geo_mapper.replace_geocode(state, "state_code", geo, new_col="geo_id")
171175
exported["se"] = np.nan

hhs_hosp/tests/test_run.py

Lines changed: 35 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55
import os
66

77
from delphi_hhs.run import _date_to_int, int_date_to_previous_day_datetime, generate_date_ranges, \
8-
make_signal, make_geo, run_module, pop_proportion
8+
make_signal, make_geo, run_module, transform_signal
99
from delphi_hhs.constants import SMOOTHERS, GEOS, SIGNALS, \
1010
CONFIRMED, SUM_CONF_SUSP, CONFIRMED_FLU, CONFIRMED_PROP, SUM_CONF_SUSP_PROP, CONFIRMED_FLU_PROP
1111
from delphi_utils.geomap import GeoMapper
@@ -89,40 +89,51 @@ def test_make_signal():
8989
with pytest.raises(Exception):
9090
make_signal(data, "zig")
9191

92-
def test_pop_proportion():
92+
def test_transform_signal_pop():
9393
geo_mapper = GeoMapper()
94-
state_pop = geo_mapper.get_crosswalk("state_code", "pop")
94+
state_pop = geo_mapper.get_crosswalk("state_id", "pop")
95+
identity_smoother = SMOOTHERS[0]
96+
hundo_k = 100000
9597

9698
test_df = pd.DataFrame({
97-
'state': ['PA'],
98-
'state_code': [42],
99-
'timestamp': [datetime(year=2020, month=1, day=1)],
100-
'val': [15.],})
99+
'state': ['pa', 'wv'],
100+
'timestamp': [datetime(year=2020, month=1, day=1)]*2,
101+
'val': [15., 150.],})
101102

102-
pa_pop = int(state_pop.loc[state_pop.state_code == "42", "pop"])
103+
pa_pop = int(state_pop.loc[state_pop.state_id == "pa", "pop"])
104+
wv_pop = int(state_pop.loc[state_pop.state_id == "wv", "pop"])
103105
pd.testing.assert_frame_equal(
104-
pop_proportion(test_df, geo_mapper),
106+
transform_signal(
107+
CONFIRMED_PROP,
108+
identity_smoother,
109+
'state',
110+
test_df.copy(),
111+
geo_mapper),
105112
pd.DataFrame({
106-
'state': ['PA'],
107-
'state_code': [42],
108-
'timestamp': [datetime(year=2020, month=1, day=1)],
109-
'val': [15/pa_pop*100000],})
113+
'geo_id': ['pa', 'wv'],
114+
'timestamp': [datetime(year=2020, month=1, day=1)]*2,
115+
'val': [15/pa_pop*hundo_k, 150/wv_pop*hundo_k],
116+
'se': [None]*2,
117+
'sample_size': [None]*2,}),
118+
check_dtype=False,
119+
check_like=True
110120
)
111121

112-
test_df= pd.DataFrame({
113-
'state': ['WV'],
114-
'state_code': [54],
115-
'timestamp': [datetime(year=2020, month=1, day=1)],
116-
'val': [150.],})
117-
118-
wv_pop = int(state_pop.loc[state_pop.state_code == "54", "pop"])
119122
pd.testing.assert_frame_equal(
120-
pop_proportion(test_df, geo_mapper),
123+
transform_signal(
124+
CONFIRMED_PROP,
125+
identity_smoother,
126+
'nation',
127+
test_df.copy(),
128+
geo_mapper),
121129
pd.DataFrame({
122-
'state': ['WV'],
123-
'state_code': [54],
130+
'geo_id': ['us'],
124131
'timestamp': [datetime(year=2020, month=1, day=1)],
125-
'val': [150/wv_pop*100000],})
132+
'val': [165/(pa_pop+wv_pop)*hundo_k],
133+
'se': [None],
134+
'sample_size': [None],}),
135+
check_dtype=False,
136+
check_like=True
126137
)
127138

128139
def test_make_geo():

0 commit comments

Comments
 (0)