Skip to content

Commit 0383c61

Browse files
authored
Merge pull request #1293 from cmu-delphi/krivard/chng-empty-files
Fix CHNG output so that changes in startdate change which files are created, but nothing else
2 parents 5891f72 + 5ce253b commit 0383c61

File tree

3 files changed

+53
-10
lines changed

3 files changed

+53
-10
lines changed

changehc/delphi_changehc/config.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@ class Config:
1919
BURN_IN_PERIOD = timedelta(days=1)
2020

2121
# shift dates forward for labeling purposes
22-
DAY_SHIFT = timedelta(days=1)
22+
DAY_SHIFT = timedelta(days=0)
2323

2424
## data columns
2525
COVID_COL = "COVID"

changehc/delphi_changehc/update_sensor.py

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -155,11 +155,13 @@ def geo_reindex(self, data):
155155
Config.MIN_DEN,
156156
Config.MAX_BACKFILL_WINDOW,
157157
thr_col="den",
158-
mega_col=geo)
158+
mega_col=geo,
159+
date_col=Config.DATE_COL)
159160
elif geo == "state":
160-
data_frame = gmpr.replace_geocode(data, "fips", "state_id", new_col="state")
161+
data_frame = gmpr.replace_geocode(data, "fips", "state_id", new_col="state",
162+
date_col=Config.DATE_COL)
161163
else:
162-
data_frame = gmpr.replace_geocode(data, "fips", geo)
164+
data_frame = gmpr.replace_geocode(data, "fips", geo, date_col=Config.DATE_COL)
163165

164166
unique_geo_ids = pd.unique(data_frame[geo])
165167
data_frame.set_index([geo, Config.DATE_COL],inplace=True)

changehc/tests/test_update_sensor.py

Lines changed: 47 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -132,6 +132,47 @@ def test_update_sensor(self):
132132
assert outputs["20200319_hhs_smoothed_outpatient_covid.csv"].empty
133133
assert outputs["20200319_nation_smoothed_outpatient_covid.csv"].empty
134134

135+
def test_update_sensor_output_daterange(self):
136+
"""Tests that output does not change when data range changes"""
137+
small_test_data = pd.DataFrame({
138+
"num": [0, 100, 200, 300, 400, 500, 600, 100, 200, 300, 400, 500, 600] * 2,
139+
"fips": ["01001"] * 13 + ["42003"] * 13,
140+
"den": [30, 50, 50, 10, 1, 5, 5, 50, 50, 50, 0, 0, 0] * 2,
141+
"timestamp": list(pd.date_range("20200301", "20200313")) * 2
142+
}).set_index(["fips", "timestamp"])
143+
startdates = ["2020-03-01", "2020-03-05"]
144+
outputs = {s:{} for s in startdates}
145+
for startdate in startdates:
146+
for geo in ["county", "state", "hhs", "nation"]:
147+
td = TemporaryDirectory()
148+
su_inst = CHCSensorUpdater(
149+
startdate,
150+
"03-22-2020",
151+
"03-27-2020",
152+
geo,
153+
self.parallel,
154+
self.weekday,
155+
self.numtype,
156+
self.se,
157+
"",
158+
TEST_LOGGER
159+
)
160+
su_inst.update_sensor(small_test_data.copy(), td.name)
161+
for f in os.listdir(td.name):
162+
outputs[startdate][f] = pd.read_csv(os.path.join(td.name, f))
163+
assert len(os.listdir(td.name)) == len(su_inst.sensor_dates),\
164+
f"failed {geo} update sensor test"
165+
td.cleanup()
166+
167+
def pretty(key):
168+
return "\n".join(f"{s}[{key}]: {len(outputs[s][key])}" for s in startdates)
169+
for f in outputs[startdates[-1]]:
170+
assert len(outputs[startdates[0]][f]) == len(outputs[startdates[1]][f]), \
171+
f"\n{pretty(f)}"
172+
assert np.array_equal(
173+
outputs[startdates[0]][f].val.values,
174+
outputs[startdates[1]][f].val.values
175+
), f
135176

136177
class TestWriteToCsv:
137178
"""Tests for writing output files to CSV."""
@@ -141,7 +182,7 @@ def test_write_to_csv_results(self):
141182
"val": [0.1, 0.5, 1.5] + [1, 2, 3],
142183
"se": [0.1, 1, 1.1] + [0.5, np.nan, 0.5],
143184
"sample_size": [np.nan] * 6,
144-
"timestamp": pd.to_datetime(["2020-05-01", "2020-05-02", "2020-05-04"] * 2),
185+
"timestamp": pd.to_datetime(["2020-05-02", "2020-05-03", "2020-05-05"] * 2),
145186
"include": [True, True, True] + [True, False, True],
146187
"geo_id": ["a"] * 3 + ["b"] * 3,
147188
})
@@ -197,7 +238,7 @@ def test_write_to_csv_with_se_results(self):
197238
"val": [0.1, 0.5, 1.5] + [1, 2, 3],
198239
"se": [0.1, 1, 1.1] + [0.5, np.nan, 0.5],
199240
"sample_size": [np.nan] * 6,
200-
"timestamp": pd.to_datetime(["2020-05-01", "2020-05-02", "2020-05-04"] * 2),
241+
"timestamp": pd.to_datetime(["2020-05-02", "2020-05-03", "2020-05-05"] * 2),
201242
"include": [True, True, True] + [True, False, True],
202243
"geo_id": ["a"] * 3 + ["b"] * 3,
203244
})
@@ -231,7 +272,7 @@ def test_write_to_csv_wrong_results(self):
231272
"val": [0.1, 0.5, 1.5] + [1, 2, 3],
232273
"se": [0.1, 1, 1.1] + [0.5, 0.5, 0.5],
233274
"sample_size": [np.nan] * 6,
234-
"timestamp": pd.to_datetime(["2020-05-01", "2020-05-02", "2020-05-04"] * 2),
275+
"timestamp": pd.to_datetime(["2020-05-02", "2020-05-03", "2020-05-05"] * 2),
235276
"include": [True, True, True] + [True, False, True],
236277
"geo_id": ["a"] * 3 + ["b"] * 3,
237278
}).set_index(["timestamp", "geo_id"]).sort_index()
@@ -241,7 +282,7 @@ def test_write_to_csv_wrong_results(self):
241282
# nan value for included loc-date
242283
res1 = res0.copy()
243284
res1 = res1[res1['include']]
244-
res1.loc[("2020-05-01", "a"), "val"] = np.nan
285+
res1.loc[("2020-05-02", "a"), "val"] = np.nan
245286
res1.reset_index(inplace=True)
246287
with pytest.raises(AssertionError):
247288
write_to_csv(
@@ -257,7 +298,7 @@ def test_write_to_csv_wrong_results(self):
257298
# nan se for included loc-date
258299
res2 = res0.copy()
259300
res2 = res2[res2['include']]
260-
res2.loc[("2020-05-01", "a"), "se"] = np.nan
301+
res2.loc[("2020-05-02", "a"), "se"] = np.nan
261302
res2.reset_index(inplace=True)
262303
with pytest.raises(AssertionError):
263304
write_to_csv(
@@ -273,7 +314,7 @@ def test_write_to_csv_wrong_results(self):
273314
# large se value
274315
res3 = res0.copy()
275316
res3 = res3[res3['include']]
276-
res3.loc[("2020-05-01", "a"), "se"] = 10
317+
res3.loc[("2020-05-02", "a"), "se"] = 10
277318
res3.reset_index(inplace=True)
278319
with pytest.raises(AssertionError):
279320
write_to_csv(

0 commit comments

Comments
 (0)