Skip to content

Commit 5f76fe0

Browse files
authored
Merge pull request #1370 from cmu-delphi/release/indicators_v0.2.6_utils_v0.2.4
Release covidcast-indicators 0.2.6
2 parents a382f85 + 28db784 commit 5f76fe0

File tree

7 files changed

+94
-22
lines changed

7 files changed

+94
-22
lines changed

.bumpversion.cfg

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
[bumpversion]
2-
current_version = 0.2.5
2+
current_version = 0.2.6
33
commit = True
44
message = chore: bump covidcast-indicators to {new_version}
55
tag = False

changehc/delphi_changehc/config.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@ class Config:
1919
BURN_IN_PERIOD = timedelta(days=1)
2020

2121
# shift dates forward for labeling purposes
22-
DAY_SHIFT = timedelta(days=1)
22+
DAY_SHIFT = timedelta(days=0)
2323

2424
## data columns
2525
COVID_COL = "COVID"

changehc/delphi_changehc/update_sensor.py

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -155,11 +155,13 @@ def geo_reindex(self, data):
155155
Config.MIN_DEN,
156156
Config.MAX_BACKFILL_WINDOW,
157157
thr_col="den",
158-
mega_col=geo)
158+
mega_col=geo,
159+
date_col=Config.DATE_COL)
159160
elif geo == "state":
160-
data_frame = gmpr.replace_geocode(data, "fips", "state_id", new_col="state")
161+
data_frame = gmpr.replace_geocode(data, "fips", "state_id", new_col="state",
162+
date_col=Config.DATE_COL)
161163
else:
162-
data_frame = gmpr.replace_geocode(data, "fips", geo)
164+
data_frame = gmpr.replace_geocode(data, "fips", geo, date_col=Config.DATE_COL)
163165

164166
unique_geo_ids = pd.unique(data_frame[geo])
165167
data_frame.set_index([geo, Config.DATE_COL],inplace=True)

changehc/tests/test_update_sensor.py

Lines changed: 47 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -132,6 +132,47 @@ def test_update_sensor(self):
132132
assert outputs["20200319_hhs_smoothed_outpatient_covid.csv"].empty
133133
assert outputs["20200319_nation_smoothed_outpatient_covid.csv"].empty
134134

135+
def test_update_sensor_output_daterange(self):
136+
"""Tests that output does not change when data range changes"""
137+
small_test_data = pd.DataFrame({
138+
"num": [0, 100, 200, 300, 400, 500, 600, 100, 200, 300, 400, 500, 600] * 2,
139+
"fips": ["01001"] * 13 + ["42003"] * 13,
140+
"den": [30, 50, 50, 10, 1, 5, 5, 50, 50, 50, 0, 0, 0] * 2,
141+
"timestamp": list(pd.date_range("20200301", "20200313")) * 2
142+
}).set_index(["fips", "timestamp"])
143+
startdates = ["2020-03-01", "2020-03-05"]
144+
outputs = {s:{} for s in startdates}
145+
for startdate in startdates:
146+
for geo in ["county", "state", "hhs", "nation"]:
147+
td = TemporaryDirectory()
148+
su_inst = CHCSensorUpdater(
149+
startdate,
150+
"03-22-2020",
151+
"03-27-2020",
152+
geo,
153+
self.parallel,
154+
self.weekday,
155+
self.numtype,
156+
self.se,
157+
"",
158+
TEST_LOGGER
159+
)
160+
su_inst.update_sensor(small_test_data.copy(), td.name)
161+
for f in os.listdir(td.name):
162+
outputs[startdate][f] = pd.read_csv(os.path.join(td.name, f))
163+
assert len(os.listdir(td.name)) == len(su_inst.sensor_dates),\
164+
f"failed {geo} update sensor test"
165+
td.cleanup()
166+
167+
def pretty(key):
168+
return "\n".join(f"{s}[{key}]: {len(outputs[s][key])}" for s in startdates)
169+
for f in outputs[startdates[-1]]:
170+
assert len(outputs[startdates[0]][f]) == len(outputs[startdates[1]][f]), \
171+
f"\n{pretty(f)}"
172+
assert np.array_equal(
173+
outputs[startdates[0]][f].val.values,
174+
outputs[startdates[1]][f].val.values
175+
), f
135176

136177
class TestWriteToCsv:
137178
"""Tests for writing output files to CSV."""
@@ -141,7 +182,7 @@ def test_write_to_csv_results(self):
141182
"val": [0.1, 0.5, 1.5] + [1, 2, 3],
142183
"se": [0.1, 1, 1.1] + [0.5, np.nan, 0.5],
143184
"sample_size": [np.nan] * 6,
144-
"timestamp": pd.to_datetime(["2020-05-01", "2020-05-02", "2020-05-04"] * 2),
185+
"timestamp": pd.to_datetime(["2020-05-02", "2020-05-03", "2020-05-05"] * 2),
145186
"include": [True, True, True] + [True, False, True],
146187
"geo_id": ["a"] * 3 + ["b"] * 3,
147188
})
@@ -197,7 +238,7 @@ def test_write_to_csv_with_se_results(self):
197238
"val": [0.1, 0.5, 1.5] + [1, 2, 3],
198239
"se": [0.1, 1, 1.1] + [0.5, np.nan, 0.5],
199240
"sample_size": [np.nan] * 6,
200-
"timestamp": pd.to_datetime(["2020-05-01", "2020-05-02", "2020-05-04"] * 2),
241+
"timestamp": pd.to_datetime(["2020-05-02", "2020-05-03", "2020-05-05"] * 2),
201242
"include": [True, True, True] + [True, False, True],
202243
"geo_id": ["a"] * 3 + ["b"] * 3,
203244
})
@@ -231,7 +272,7 @@ def test_write_to_csv_wrong_results(self):
231272
"val": [0.1, 0.5, 1.5] + [1, 2, 3],
232273
"se": [0.1, 1, 1.1] + [0.5, 0.5, 0.5],
233274
"sample_size": [np.nan] * 6,
234-
"timestamp": pd.to_datetime(["2020-05-01", "2020-05-02", "2020-05-04"] * 2),
275+
"timestamp": pd.to_datetime(["2020-05-02", "2020-05-03", "2020-05-05"] * 2),
235276
"include": [True, True, True] + [True, False, True],
236277
"geo_id": ["a"] * 3 + ["b"] * 3,
237278
}).set_index(["timestamp", "geo_id"]).sort_index()
@@ -241,7 +282,7 @@ def test_write_to_csv_wrong_results(self):
241282
# nan value for included loc-date
242283
res1 = res0.copy()
243284
res1 = res1[res1['include']]
244-
res1.loc[("2020-05-01", "a"), "val"] = np.nan
285+
res1.loc[("2020-05-02", "a"), "val"] = np.nan
245286
res1.reset_index(inplace=True)
246287
with pytest.raises(AssertionError):
247288
write_to_csv(
@@ -257,7 +298,7 @@ def test_write_to_csv_wrong_results(self):
257298
# nan se for included loc-date
258299
res2 = res0.copy()
259300
res2 = res2[res2['include']]
260-
res2.loc[("2020-05-01", "a"), "se"] = np.nan
301+
res2.loc[("2020-05-02", "a"), "se"] = np.nan
261302
res2.reset_index(inplace=True)
262303
with pytest.raises(AssertionError):
263304
write_to_csv(
@@ -273,7 +314,7 @@ def test_write_to_csv_wrong_results(self):
273314
# large se value
274315
res3 = res0.copy()
275316
res3 = res3[res3['include']]
276-
res3.loc[("2020-05-01", "a"), "se"] = 10
317+
res3.loc[("2020-05-02", "a"), "se"] = 10
277318
res3.reset_index(inplace=True)
278319
with pytest.raises(AssertionError):
279320
write_to_csv(

facebook/delphiFacebook/R/responses.R

Lines changed: 13 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,13 +18,25 @@
1818
#' @export
1919
load_responses_all <- function(params, contingency_run = FALSE) {
2020
msg_plain(paste0("Loading ", length(params$input), " CSVs"))
21-
21+
2222
map_fn <- if (params$parallel) { mclapply } else { lapply }
2323
input_data <- map_fn(seq_along(params$input), function(i) {
2424
load_response_one(params$input[i], params, contingency_run)
2525
})
2626

2727
msg_plain(paste0("Finished loading CSVs"))
28+
29+
which_errors <- unlist(lapply(input_data, inherits, "try-error"))
30+
if (any( which_errors )) {
31+
errored_filenames <- paste(params$input[which_errors], collapse=", ")
32+
stop(
33+
"ingestion and field creation failed for at least one of input data file(s) ",
34+
errored_filenames,
35+
" with error(s)\n",
36+
unique(input_data[which_errors])
37+
)
38+
}
39+
2840
input_data <- bind_rows(input_data)
2941
msg_plain(paste0("Finished combining CSVs"))
3042
return(input_data)

facebook/delphiFacebook/integration-tests/testthat/test-integration.R

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -385,3 +385,10 @@ test_that("testing national aggregation", {
385385
}
386386

387387
})
388+
389+
test_that("testing load_responses behavior for missing input", {
390+
params <- relativize_params(read_params(test_path("params-test.json")))
391+
params$input <- c(params$input, "file-does-not-exist.csv")
392+
params$parallel <- TRUE
393+
expect_error(load_responses_all(params), regexp="ingestion and field creation failed")
394+
})

facebook/micro/monthly-archive.sh

Lines changed: 20 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -8,13 +8,23 @@ else
88
fi
99
echo ${MONTH}
1010
R_MONTH=${MONTH#*_}; R_MONTH=${R_MONTH#0}
11-
BATCH="cd fb-public-results\nls -1 cvid_responses_${MONTH}*.gz"
12-
sftp -b <(echo -e "${BATCH}") -P 2222 fb-automation@ftp.delphi.cmu.edu 2>/dev/null | \
13-
grep "^cvid" | \
14-
awk -F_ 'BEGIN{print "cd fb-public-results"} {key=$3 $4 $5; if (key!=last && last!="") {print record} last=key; record=$0} END{print record}' | \
15-
sed '/^cvid/ s/^/get /' >fetch.sftp
16-
sftp -b fetch.sftp -P 2222 fb-automation@ftp.delphi.cmu.edu
17-
OUT=${MONTH/_/-}
18-
Rscript ../monthly-files.R ${MONTH%_*} ${R_MONTH} . >${OUT}.csv
19-
gzip ${OUT}.csv
20-
sftp -b <(echo -e "cd fb-public-results\nput ${OUT}.csv.gz") -P 2222 fb-automation@ftp.delphi.cmu.edu
11+
12+
perform_rollup_and_post ()
13+
{
14+
BATCH="cd $1\nls -1 cvid_responses_${MONTH}*.gz"
15+
sftp -b <(echo -e "${BATCH}") -P 2222 fb-automation@ftp.delphi.cmu.edu 2>/dev/null | \
16+
grep "^cvid" | \
17+
awk -F_ -vDIR="$1" 'BEGIN{print "cd " DIR} {key=$3 $4 $5; if (key!=last && last!="") {print record} last=key; record=$0} END{print record}' | \
18+
sed '/^cvid/ s/^/get /' >fetch.sftp
19+
sftp -b fetch.sftp -P 2222 fb-automation@ftp.delphi.cmu.edu
20+
OUT=${MONTH/_/-}$2
21+
Rscript ../monthly-files.R ${MONTH%_*} ${R_MONTH} . >${OUT}.csv
22+
gzip ${OUT}.csv
23+
sftp -b <(echo -e "cd $1\nput ${OUT}.csv.gz") -P 2222 fb-automation@ftp.delphi.cmu.edu
24+
rm -rf $1
25+
mkdir $1
26+
mv *.gz $1/
27+
}
28+
29+
perform_rollup_and_post "fb-public-results" ""
30+
perform_rollup_and_post "protected-race-ethnicity-data" "-race-ethnicity"

0 commit comments

Comments
 (0)