Skip to content

Commit 8b3bcc7

Browse files
committed
Add lag calculation
1 parent 05bffea commit 8b3bcc7

File tree

3 files changed

+16
-14
lines changed

3 files changed

+16
-14
lines changed

integrations/acquisition/covidcast_nowcast/test_csv_uploading.py

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -75,8 +75,8 @@ def test_uploading(self):
7575

7676
# valid
7777
with open(receiving_dir + '20200419_state_sig.csv', 'w') as f:
78-
f.write('sensor_name,geo_value,value,lag,issue\n')
79-
f.write('testsensor,ca,1,2,20200421\n')
78+
f.write('sensor_name,geo_value,value,issue\n')
79+
f.write('testsensor,ca,1,20200421\n')
8080

8181
# invalid filename
8282
with open(receiving_dir + 'hello.csv', 'w') as f:
@@ -129,12 +129,12 @@ def test_duplicate_row(self):
129129
os.makedirs(receiving_dir, exist_ok=True)
130130

131131
with open(receiving_dir + '20200419_state_sig.csv', 'w') as f:
132-
f.write('sensor_name,geo_value,value,lag,issue\n')
133-
f.write('testsensor,ca,1,2,20200415\n')
132+
f.write('sensor_name,geo_value,value,issue\n')
133+
f.write('testsensor,ca,1,20200425\n')
134134
main()
135135
with open(receiving_dir + '20200419_state_sig.csv', 'w') as f:
136-
f.write('sensor_name,geo_value,value,lag,issue\n')
137-
f.write('testsensor,ca,2,2,20200415\n')
136+
f.write('sensor_name,geo_value,value,issue\n')
137+
f.write('testsensor,ca,2,20200425\n')
138138
main()
139139

140140
# most most recent value is the one stored
@@ -146,8 +146,8 @@ def test_duplicate_row(self):
146146
'time_value': 20200419,
147147
'geo_value': 'ca',
148148
'value': 2,
149-
'issue': 20200415,
150-
'lag': 2,
149+
'issue': 20200425,
150+
'lag': 6,
151151
'signal': 'sig',
152152
}],
153153
'message': 'success',

src/acquisition/covidcast_nowcast/load_sensors.py

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
from shutil import move
2+
from datetime import datetime
23
import os
34
import time
45

@@ -12,7 +13,7 @@
1213
FAIL_DIR = "archive/failed"
1314
TABLE_NAME = "covidcast_nowcast"
1415
DB_NAME = "epidata"
15-
CSV_DTYPES = {"sensor_name": str, "geo_value": str, "value": float, "lag": int, "issue": int}
16+
CSV_DTYPES = {"sensor_name": str, "geo_value": str, "value": float, "issue": int}
1617

1718

1819
def main(csv_path: str = SENSOR_CSV_PATH) -> None:
@@ -75,7 +76,8 @@ def load_and_prepare_file(filepath: str, attributes: tuple) -> pd.DataFrame:
7576
data["time_type"] = time_type
7677
data["geo_type"] = geo_type
7778
data["time_value"] = time_value
78-
# we don't use the lag and issue calculation since it's specified in the data.
79+
data["lag"] = [(datetime.strptime(str(i), "%Y%m%d") - datetime.strptime(str(j), "%Y%m%d")).days
80+
for i, j in zip(data["issue"], data["time_value"])]
7981
data["value_updated_timestamp"] = int(time.time())
8082
return data
8183

tests/acquisition/covidcast_nowcast/test_load_sensors.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -24,21 +24,21 @@ def test_load_and_prepare_file(self):
2424
"test_signal",
2525
"test_time_type",
2626
"test_geo_type",
27-
"test_time_value",
27+
20201231,
2828
"test_issue_value",
2929
"test_lag_value")
3030

31-
test_df = load_and_prepare_file(StringIO("sensor_name,geo_value,value,lag,issue\ntestname,01001,1.5,2,20200101"), test_attributes)
31+
test_df = load_and_prepare_file(StringIO("sensor_name,geo_value,value,lag,issue\ntestname,01001,1.5,2,20210102"), test_attributes)
3232
pd.testing.assert_frame_equal(test_df,
3333
pd.DataFrame({"sensor_name": ["testname"],
3434
"geo_value": ["01001"],
3535
"value": [1.5],
3636
"lag": [2],
37-
"issue": [20200101],
37+
"issue": [20210102],
3838
"source": ["test_source"],
3939
"signal": ["test_signal"],
4040
"time_type": ["test_time_type"],
4141
"geo_type": ["test_geo_type"],
42-
"time_value": ["test_time_value"],
42+
"time_value": [20201231],
4343
"value_updated_timestamp": [12345]})
4444
)

0 commit comments

Comments
 (0)