Skip to content

Commit c5c177f

Browse files
committed
use issue specific CSV files
1 parent 5c829b9 commit c5c177f

File tree

3 files changed

+20
-19
lines changed

3 files changed

+20
-19
lines changed

integrations/acquisition/covidcast_nowcast/test_csv_uploading.py

Lines changed: 10 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -68,15 +68,15 @@ def test_uploading(self):
6868
# print full diff if something unexpected comes out
6969
self.maxDiff=None
7070

71-
receiving_dir = '/common/covidcast_nowcast/receiving/src/'
72-
success_dir = '/common/covidcast_nowcast/archive/successful/src/'
73-
failed_dir = '/common/covidcast_nowcast/archive/failed/src/'
71+
receiving_dir = '/common/covidcast_nowcast/receiving/issue_20200421/src/'
72+
success_dir = '/common/covidcast_nowcast/archive/successful/issue_20200421/src/'
73+
failed_dir = '/common/covidcast_nowcast/archive/failed/issue_20200421/src/'
7474
os.makedirs(receiving_dir, exist_ok=True)
7575

7676
# valid
7777
with open(receiving_dir + '20200419_state_sig.csv', 'w') as f:
78-
f.write('sensor_name,geo_value,value,issue\n')
79-
f.write('testsensor,ca,1,20200421\n')
78+
f.write('sensor_name,geo_value,value\n')
79+
f.write('testsensor,ca,1\n')
8080

8181
# invalid filename
8282
with open(receiving_dir + 'hello.csv', 'w') as f:
@@ -125,16 +125,16 @@ def test_duplicate_row(self):
125125
# print full diff if something unexpected comes out
126126
self.maxDiff=None
127127

128-
receiving_dir = '/common/covidcast_nowcast/receiving/src/'
128+
receiving_dir = '/common/covidcast_nowcast/receiving/issue_20200425/src/'
129129
os.makedirs(receiving_dir, exist_ok=True)
130130

131131
with open(receiving_dir + '20200419_state_sig.csv', 'w') as f:
132-
f.write('sensor_name,geo_value,value,issue\n')
133-
f.write('testsensor,ca,1,20200425\n')
132+
f.write('sensor_name,geo_value,value\n')
133+
f.write('testsensor,ca,1\n')
134134
main()
135135
with open(receiving_dir + '20200419_state_sig.csv', 'w') as f:
136-
f.write('sensor_name,geo_value,value,issue\n')
137-
f.write('testsensor,ca,2,20200425\n')
136+
f.write('sensor_name,geo_value,value\n')
137+
f.write('testsensor,ca,2\n')
138138
main()
139139

140140
# most most recent value is the one stored

src/acquisition/covidcast_nowcast/load_sensors.py

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@
1313
FAIL_DIR = "archive/failed"
1414
TABLE_NAME = "covidcast_nowcast"
1515
DB_NAME = "epidata"
16-
CSV_DTYPES = {"sensor_name": str, "geo_value": str, "value": float, "issue": int}
16+
CSV_DTYPES = {"sensor_name": str, "geo_value": str, "value": float}
1717

1818

1919
def main(csv_path: str = SENSOR_CSV_PATH) -> None:
@@ -37,7 +37,8 @@ def main(csv_path: str = SENSOR_CSV_PATH) -> None:
3737
"""
3838
user, pw = secrets.db.epi
3939
engine = sqlalchemy.create_engine(f"mysql+pymysql://{user}:{pw}@{secrets.db.host}/{DB_NAME}")
40-
for filepath, attribute in CsvImporter.find_csv_files(csv_path):
40+
# for filepath, attribute in CsvImporter.find_csv_files(csv_path):
41+
for filepath, attribute in CsvImporter.find_issue_specific_csv_files(csv_path):
4142
if attribute is None:
4243
_move_after_processing(filepath, success=False)
4344
continue
@@ -76,8 +77,8 @@ def load_and_prepare_file(filepath: str, attributes: tuple) -> pd.DataFrame:
7677
data["time_type"] = time_type
7778
data["geo_type"] = geo_type
7879
data["time_value"] = time_value
79-
data["lag"] = [(datetime.strptime(str(i), "%Y%m%d") - datetime.strptime(str(j), "%Y%m%d")).days
80-
for i, j in zip(data["issue"], data["time_value"])]
80+
data["issue"] = issue_value
81+
data["lag"] = lag_value
8182
data["value_updated_timestamp"] = int(time.time())
8283
return data
8384

tests/acquisition/covidcast_nowcast/test_load_sensors.py

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -25,20 +25,20 @@ def test_load_and_prepare_file(self):
2525
"test_time_type",
2626
"test_geo_type",
2727
20201231,
28-
"test_issue_value",
29-
"test_lag_value")
28+
20210102,
29+
3)
3030

31-
test_df = load_and_prepare_file(StringIO("sensor_name,geo_value,value,issue\ntestname,01001,1.5,20210102"), test_attributes)
31+
test_df = load_and_prepare_file(StringIO("sensor_name,geo_value,value\ntestname,01001,1.5"), test_attributes)
3232
pd.testing.assert_frame_equal(test_df,
3333
pd.DataFrame({"sensor_name": ["testname"],
3434
"geo_value": ["01001"],
3535
"value": [1.5],
36-
"issue": [20210102],
3736
"source": ["test_source"],
3837
"signal": ["test_signal"],
3938
"time_type": ["test_time_type"],
4039
"geo_type": ["test_geo_type"],
4140
"time_value": [20201231],
42-
"lag": [2],
41+
"issue": [20210102],
42+
"lag": [3],
4343
"value_updated_timestamp": [12345]})
4444
)

0 commit comments

Comments
 (0)