Skip to content

Commit 9dfdf52

Browse files
authored
Merge pull request #862 from cmu-delphi/ds/archive-differ-fix
Allow NAs in missing columns
2 parents 315960e + 3ece7c1 commit 9dfdf52

File tree

2 files changed

+19
-11
lines changed

2 files changed

+19
-11
lines changed

src/acquisition/covidcast/csv_importer.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -43,9 +43,9 @@ class CsvImporter:
4343
"val": float,
4444
"se": float,
4545
"sample_size": float,
46-
"missing_val": int,
47-
"missing_se": int,
48-
"missing_sample_size": int
46+
"missing_val": "Int64",
47+
"missing_se": "Int64",
48+
"missing_sample_size": "Int64"
4949
}
5050

5151
# NOTE: this should be a Python 3.7+ `dataclass`, but the server is on 3.4

tests/acquisition/covidcast/test_csv_importer.py

Lines changed: 16 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -278,13 +278,13 @@ def test_load_csv_with_valid_header(self):
278278

279279
# now with missing values!
280280
data = {
281-
'geo_id': ['ca', 'tx', 'fl', 'ak'],
282-
'val': [np.nan, '1.2', '1.3', '1.4'],
283-
'se': ['2.1', "na", '2.3', '2.4'],
284-
'sample_size': ['301', '302', None, '304'],
285-
'missing_value': [Nans.NOT_APPLICABLE] + [Nans.NOT_MISSING] * 3,
286-
'missing_stderr': [Nans.NOT_MISSING, Nans.REGION_EXCEPTION, Nans.NOT_MISSING, Nans.NOT_MISSING],
287-
'missing_sample_size': [Nans.NOT_MISSING] * 2 + [Nans.REGION_EXCEPTION] * 2
281+
'geo_id': ['ca', 'tx', 'fl', 'ak', 'wa'],
282+
'val': [np.nan, '1.2', '1.3', '1.4', '1.5'],
283+
'se': ['2.1', "na", '2.3', '2.4', '2.5'],
284+
'sample_size': ['301', '302', None, '304', None],
285+
'missing_value': [Nans.NOT_APPLICABLE] + [Nans.NOT_MISSING] * 3 + [None],
286+
'missing_stderr': [Nans.NOT_MISSING, Nans.REGION_EXCEPTION, Nans.NOT_MISSING, Nans.NOT_MISSING] + [None],
287+
'missing_sample_size': [Nans.NOT_MISSING] * 2 + [Nans.REGION_EXCEPTION] * 2 + [None]
288288
}
289289
mock_pandas = MagicMock()
290290
mock_pandas.read_csv.return_value = pandas.DataFrame(data=data)
@@ -295,7 +295,7 @@ def test_load_csv_with_valid_header(self):
295295

296296
self.assertTrue(mock_pandas.read_csv.called)
297297
self.assertTrue(mock_pandas.read_csv.call_args[0][0], filepath)
298-
self.assertEqual(len(rows), 4)
298+
self.assertEqual(len(rows), 5)
299299

300300
self.assertEqual(rows[0].geo_value, 'ca')
301301
self.assertIsNone(rows[0].value)
@@ -328,3 +328,11 @@ def test_load_csv_with_valid_header(self):
328328
self.assertEqual(rows[3].missing_value, Nans.NOT_MISSING)
329329
self.assertEqual(rows[3].missing_stderr, Nans.NOT_MISSING)
330330
self.assertEqual(rows[3].missing_sample_size, Nans.NOT_MISSING)
331+
332+
self.assertEqual(rows[4].geo_value, 'wa')
333+
self.assertEqual(rows[4].value, 1.5)
334+
self.assertEqual(rows[4].stderr, 2.5)
335+
self.assertEqual(rows[4].sample_size, None)
336+
self.assertEqual(rows[4].missing_value, Nans.NOT_MISSING)
337+
self.assertEqual(rows[4].missing_stderr, Nans.NOT_MISSING)
338+
self.assertEqual(rows[4].missing_sample_size, Nans.OTHER)

0 commit comments

Comments
 (0)