Skip to content

Commit 2677a9e

Browse files
committed
Refactor pop prop calculations to fix #1399
Add population, replace geo, *then* divide by population. Includes test of aggregation.
1 parent 8e6c13d commit 2677a9e

File tree

2 files changed

+61
-43
lines changed

2 files changed

+61
-43
lines changed

hhs_hosp/delphi_hhs/run.py

Lines changed: 18 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -109,19 +109,8 @@ def run_module(params):
109109
geo_res = geo,
110110
sensor = sensor,
111111
smoother = smoother)
112-
df = geo_mapper.add_geocode(make_signal(all_columns, sensor),
113-
"state_id",
114-
"state_code",
115-
from_col="state")
116-
if sensor.endswith("_prop"):
117-
df=pop_proportion(df, geo_mapper)
118-
df = make_geo(df, geo, geo_mapper)
119-
df = smooth_values(df, smoother[0])
120-
# Fix N/A MA values, see issue #1360
121-
if geo == "state" and sensor.startswith(CONFIRMED_FLU):
122-
ma_filter = df.val.isna() & (df.geo_id == "ma") & (df.timestamp > "08-01-2021") & \
123-
(df.timestamp.dt.day_name() == "Tuesday")
124-
df = df[~ma_filter]
112+
df = make_signal(all_columns, sensor)
113+
df = transform_signal(sensor, smoother, geo, df, geo_mapper)
125114
if df.empty:
126115
continue
127116
sensor_name = sensor + smoother[1]
@@ -155,17 +144,28 @@ def smooth_values(df, smoother):
155144
)
156145
return df
157146

158-
def pop_proportion(df,geo_mapper):
159-
"""Get the population-proportionate variants as the dataframe val."""
160-
pop_val=geo_mapper.add_population_column(df, "state_code")
161-
df["val"]=round(df["val"]/pop_val["population"]*100000, 7)
162-
pop_val.drop("population", axis=1, inplace=True)
147+
def transform_signal(sensor, smoother, geo, df, geo_mapper):
148+
"""Transform base df into specified geo/smoothing/prop configuration."""
149+
df = geo_mapper.add_geocode(df, "state_id", "state_code", from_col="state")
150+
if sensor.endswith("_prop"):
151+
df=geo_mapper.add_population_column(df, "state_code")
152+
df = make_geo(df, geo, geo_mapper)
153+
if sensor.endswith("_prop"):
154+
df["val"]=round(df["val"]/df["population"]*100000, 7)
155+
df.drop("population", axis=1, inplace=True)
156+
df = smooth_values(df, smoother[0])
157+
# Fix N/A MA values, see issue #1360
158+
if geo == "state" and sensor.startswith(CONFIRMED_FLU):
159+
ma_filter = df.val.isna() & (df.geo_id == "ma") & (df.timestamp > "08-01-2021") & \
160+
(df.timestamp.dt.day_name() == "Tuesday")
161+
df = df[~ma_filter]
163162
return df
164163

165164
def make_geo(state, geo, geo_mapper):
166165
"""Transform incoming geo (state) to another geo."""
167166
if geo == "state":
168167
exported = state.rename(columns={"state": "geo_id"})
168+
exported = exported.drop(columns="state_code")
169169
else:
170170
exported = geo_mapper.replace_geocode(state, "state_code", geo, new_col="geo_id")
171171
exported["se"] = np.nan

hhs_hosp/tests/test_run.py

Lines changed: 43 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55
import os
66

77
from delphi_hhs.run import _date_to_int, int_date_to_previous_day_datetime, generate_date_ranges, \
8-
make_signal, make_geo, run_module, pop_proportion
8+
make_signal, make_geo, run_module, transform_signal
99
from delphi_hhs.constants import SMOOTHERS, GEOS, SIGNALS, \
1010
CONFIRMED, SUM_CONF_SUSP, CONFIRMED_FLU, CONFIRMED_PROP, SUM_CONF_SUSP_PROP, CONFIRMED_FLU_PROP
1111
from delphi_utils.geomap import GeoMapper
@@ -89,40 +89,58 @@ def test_make_signal():
8989
with pytest.raises(Exception):
9090
make_signal(data, "zig")
9191

92-
def test_pop_proportion():
92+
def test_transform_signal_pop():
9393
geo_mapper = GeoMapper()
94-
state_pop = geo_mapper.get_crosswalk("state_code", "pop")
94+
state_pop = geo_mapper.get_crosswalk("state_id", "pop")
95+
identity_smoother = SMOOTHERS[0]
96+
hundo_k = 100000
9597

9698
test_df = pd.DataFrame({
97-
'state': ['PA'],
98-
'state_code': [42],
99-
'timestamp': [datetime(year=2020, month=1, day=1)],
100-
'val': [15.],})
101-
102-
pa_pop = int(state_pop.loc[state_pop.state_code == "42", "pop"])
99+
'state': ['pa', 'wv'],
100+
'timestamp': [datetime(year=2020, month=1, day=1)]*2,
101+
'val': [15., 150.],})
102+
103+
pa_pop = int(state_pop.loc[state_pop.state_id == "pa", "pop"])
104+
wv_pop = int(state_pop.loc[state_pop.state_id == "wv", "pop"])
105+
assert True, \
106+
transform_signal(
107+
CONFIRMED_PROP,
108+
identity_smoother,
109+
'state',
110+
test_df.copy(),
111+
geo_mapper)
103112
pd.testing.assert_frame_equal(
104-
pop_proportion(test_df, geo_mapper),
113+
transform_signal(
114+
CONFIRMED_PROP,
115+
identity_smoother,
116+
'state',
117+
test_df.copy(),
118+
geo_mapper),
105119
pd.DataFrame({
106-
'state': ['PA'],
107-
'state_code': [42],
108-
'timestamp': [datetime(year=2020, month=1, day=1)],
109-
'val': [15/pa_pop*100000],})
120+
'geo_id': ['pa', 'wv'],
121+
'timestamp': [datetime(year=2020, month=1, day=1)]*2,
122+
'val': [15/pa_pop*hundo_k, 150/wv_pop*hundo_k],
123+
'se': [None]*2,
124+
'sample_size': [None]*2,}),
125+
check_dtype=False,
126+
check_like=True
110127
)
111128

112-
test_df= pd.DataFrame({
113-
'state': ['WV'],
114-
'state_code': [54],
115-
'timestamp': [datetime(year=2020, month=1, day=1)],
116-
'val': [150.],})
117-
118-
wv_pop = int(state_pop.loc[state_pop.state_code == "54", "pop"])
119129
pd.testing.assert_frame_equal(
120-
pop_proportion(test_df, geo_mapper),
130+
transform_signal(
131+
CONFIRMED_PROP,
132+
identity_smoother,
133+
'nation',
134+
test_df.copy(),
135+
geo_mapper),
121136
pd.DataFrame({
122-
'state': ['WV'],
123-
'state_code': [54],
137+
'geo_id': ['us'],
124138
'timestamp': [datetime(year=2020, month=1, day=1)],
125-
'val': [150/wv_pop*100000],})
139+
'val': [165/(pa_pop+wv_pop)*hundo_k],
140+
'se': [None],
141+
'sample_size': [None],}),
142+
check_dtype=False,
143+
check_like=True
126144
)
127145

128146
def test_make_geo():

0 commit comments

Comments
 (0)