Skip to content

Commit a12f696

Browse files
committed
replace nan with None to match expected output
1 parent 6b272b6 commit a12f696

File tree

4 files changed

+137
-79
lines changed

4 files changed

+137
-79
lines changed

integrations/acquisition/rvdss/test_scenarios.py

Lines changed: 94 additions & 45 deletions
Original file line numberDiff line numberDiff line change
@@ -11,9 +11,12 @@
1111
from delphi_utils import get_structured_logger
1212

1313
# third party
14-
import mysql.connector
14+
import mysql.connector
15+
from mysql.connector.errors import IntegrityError
1516
import pandas as pd
17+
import numpy as np
1618
from pathlib import Path
19+
import pdb
1720

1821
# py3tester coverage target (equivalent to `import *`)
1922
# __test_target__ = 'delphi.epidata.acquisition.covid_hosp.facility.update'
@@ -68,8 +71,47 @@ def test_rvdss_repiratory_detections(self, mock_sql):
6871
TEST_DIR = Path(__file__).parent.parent.parent.parent
6972
detection_data = pd.read_csv(str(TEST_DIR) + "/testdata/acquisition/rvdss/RVD_CurrentWeekTable_Formatted.csv")
7073
detection_data['time_type'] = "week"
71-
detection_subset = detection_data[(detection_data['geo_value'].isin(['nl', 'nb'])) & (detection_data['time_value'].isin([20240831, 20240907])) ]
74+
detection_data=detection_data.replace({np.nan: None})
75+
#detection_data=detection_data.replace({float('nan'): None})
7276

77+
pdb.set_trace()
78+
# take a small subset just for testing insertion
79+
detection_subset = detection_data[(detection_data['geo_value'].isin(['nl', 'nb'])) & (detection_data['time_value'].isin([20240831, 20240907])) ]
80+
81+
# get the expected response when calling the API
82+
# the dataframe needs to add the missing columns and replace nan with None
83+
# since that is what is returned from the API
84+
df = detection_subset.reindex(rvdss_cols,axis=1)
85+
df = df.replace({np.nan: None}).sort_values(by=["epiweek","geo_value"])
86+
df = df.to_dict(orient = "records")
87+
88+
expected_response = {"epidata": df,
89+
"result": 1,
90+
"message": "success",
91+
}
92+
93+
# get the rest of the data not in the subset to test more calling options
94+
detection_subset2 = detection_data[(detection_data['geo_value'].isin(['nu', 'nt'])) & (detection_data['time_value'].isin([20240831, 20240907])) ]
95+
96+
df2 = detection_subset2.reindex(rvdss_cols,axis=1)
97+
df2 = df2.replace({np.nan: None}).sort_values(by=["epiweek","geo_value"])
98+
df2 = df2.to_dict(orient = "records")
99+
100+
expected_response2 = {"epidata": df2,
101+
"result": 1,
102+
"message": "success",
103+
}
104+
105+
# after two aquisitions
106+
df_full = pd.concat([detection_subset, detection_subset2], ignore_index=True).reindex(rvdss_cols,axis=1)
107+
df_full = df_full.replace({np.nan: None}).sort_values(by=["epiweek","geo_value"])
108+
df_full = df_full.to_dict(orient = "records")
109+
110+
expected_response_full = {"epidata": df_full,
111+
"result": 1,
112+
"message": "success",
113+
}
114+
73115
# make sure the data does not yet exist
74116
with self.subTest(name='no data yet'):
75117
response = Epidata.rvdss(geo_type='province',
@@ -92,47 +134,54 @@ def test_rvdss_repiratory_detections(self, mock_sql):
92134
response = Epidata.rvdss(geo_type='province',
93135
time_values= [202435, 202436],
94136
geo_value = ['nl','nb'])
137+
138+
self.assertEqual(response,expected_response)
139+
140+
with self.subTest(name='duplicate aquisition'):
141+
# The main run function checks if the update has already been fetched/updated
142+
# so it should never run twice, and duplocate aquisitions should never
143+
# occur. Running the update twice will result in an error
144+
145+
# When the MagicMock connection's `cursor()` method is called, return
146+
# a real cursor made from the current open connection `cnx`.
147+
connection_mock.cursor.return_value = self.cnx.cursor()
148+
# Commit via the current open connection `cnx`, from which the cursor
149+
# is derived
150+
connection_mock.commit = self.cnx.commit
151+
mock_sql.return_value = connection_mock
152+
153+
with self.assertRaises(mysql.connector.errors.IntegrityError):
154+
update(detection_subset, self.logger)
155+
156+
# TODO: test with exact column order
157+
with self.subTest(name='exact column order'):
158+
rvdss_cols_subset = [col for col in detection_subset2.columns if col in rvdss_cols]
159+
ordered_cols = [col for col in rvdss_cols if col in rvdss_cols_subset]
160+
ordered_df = detection_subset2[ordered_cols]
161+
162+
connection_mock.cursor.return_value = self.cnx.cursor()
163+
connection_mock.commit = self.cnx.commit
164+
mock_sql.return_value = connection_mock
165+
166+
pdb.set_trace()
167+
update(ordered_df, self.logger)
168+
pdb.set_trace()
169+
170+
response = Epidata.rvdss(geo_type='province',
171+
time_values= [202435, 202436],
172+
geo_value = ['nt','nu'])
173+
174+
self.assertEqual(response,expected_response2)
175+
176+
177+
# TODO: check requesting by issue
178+
# with self.subTest(name='issue request'):
179+
# response = Epidata.rvdss(geo_type='province',
180+
# time_values= [202435, 202436],
181+
# geo_value = ['nl','nb'],
182+
# issues = [])
183+
184+
185+
# # TODO: check requesting individual lists
186+
# with self.subTest(name='duplicate aquisition'):
95187

96-
self.assertEqual(response['result'], 1)
97-
98-
99-
# # make sure the data now exists
100-
# with self.subTest(name='initial data checks'):
101-
# expected_spotchecks = {
102-
# "hospital_pk": "450822",
103-
# "collection_week": 20201030,
104-
# "publication_date": 20210315,
105-
# "previous_day_total_ed_visits_7_day_sum": 536,
106-
# "total_personnel_covid_vaccinated_doses_all_7_day_sum": 18,
107-
# "total_beds_7_day_avg": 69.3,
108-
# "previous_day_admission_influenza_confirmed_7_day_sum": -999999
109-
# }
110-
# response = Epidata.covid_hosp_facility(
111-
# '450822', Epidata.range(20200101, 20210101))
112-
# self.assertEqual(response['result'], 1)
113-
# self.assertEqual(len(response['epidata']), 2)
114-
# row = response['epidata'][0]
115-
# for k,v in expected_spotchecks.items():
116-
# self.assertTrue(
117-
# k in row,
118-
# f"no '{k}' in row:\n{NEWLINE.join(sorted(row.keys()))}"
119-
# )
120-
# if isinstance(v, float):
121-
# self.assertAlmostEqual(row[k], v, f"row[{k}] is {row[k]} not {v}")
122-
# else:
123-
# self.assertEqual(row[k], v, f"row[{k}] is {row[k]} not {v}")
124-
125-
# # expect 113 fields per row (114 database columns, except `id`)
126-
# self.assertEqual(len(row), 113)
127-
128-
# # re-acquisition of the same dataset should be a no-op
129-
# with self.subTest(name='second acquisition'):
130-
# acquired = Update.run(network=mock_network)
131-
# self.assertFalse(acquired)
132-
133-
# # make sure the data still exists
134-
# with self.subTest(name='final data checks'):
135-
# response = Epidata.covid_hosp_facility(
136-
# '450822', Epidata.range(20200101, 20210101))
137-
# self.assertEqual(response['result'], 1)
138-
# self.assertEqual(len(response['epidata']), 2)

integrations/server/test_rvdss.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -66,7 +66,6 @@ def test_rvdss_repiratory_detections(self):
6666
"sarscov2_pct_positive":10.0,
6767
"sarscov2_positive_tests":1.0,
6868
"sarscov2_tests":10.0
69-
7069
}
7170
],
7271
"result": 1,

src/acquisition/rvdss/database.py

Lines changed: 39 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -18,58 +18,60 @@
1818

1919
# third party
2020
import mysql.connector
21+
import pdb
2122

2223
# first party
2324
import delphi.operations.secrets as secrets
2425

2526
rvdss_cols= (
27+
"geo_type",
28+
'geo_value',
29+
"time_type",
2630
"epiweek",
2731
"time_value",
28-
"time_type",
2932
"issue",
30-
"geo_type",
31-
"geo_value",
32-
"sarscov2_tests",
33-
"sarscov2_positive_tests",
34-
"sarscov2_pct_positive",
35-
"flu_tests",
36-
"flu_positive_tests",
33+
"year",
34+
"adv_pct_positive",
35+
"adv_positive_tests",
36+
"adv_tests",
37+
"evrv_pct_positive",
38+
"evrv_positive_tests",
39+
"evrv_tests",
3740
"flu_pct_positive",
41+
"flu_positive_tests",
42+
"flu_tests",
43+
"flua_pct_positive",
44+
"flua_positive_tests",
45+
"flua_tests",
3846
"fluah1n1pdm09_positive_tests",
3947
"fluah3_positive_tests",
4048
"fluauns_positive_tests",
41-
"flua_positive_tests",
42-
"flua_tests",
43-
"flua_pct_positive",
49+
"flub_pct_positive",
4450
"flub_positive_tests",
4551
"flub_tests",
46-
"flub_pct_positive",
47-
"rsv_tests",
48-
"rsv_positive_tests",
49-
"rsv_pct_positive",
50-
"hpiv_tests",
52+
"hcov_pct_positive",
53+
"hcov_positive_tests",
54+
"hcov_tests",
55+
"hmpv_pct_positive",
56+
"hmpv_positive_tests",
57+
"hmpv_tests",
5158
"hpiv1_positive_tests",
5259
"hpiv2_positive_tests",
5360
"hpiv3_positive_tests",
5461
"hpiv4_positive_tests",
55-
"hpivother_positive_tests",
56-
"hpiv_positive_tests",
5762
"hpiv_pct_positive",
58-
"adv_tests",
59-
"adv_positive_tests",
60-
"adv_pct_positive",
61-
"hmpv_tests",
62-
"hmpv_positive_tests",
63-
"hmpv_pct_positive",
64-
"evrv_tests",
65-
"evrv_positive_tests",
66-
"evrv_pct_positive",
67-
"hcov_tests",
68-
"hcov_positive_tests",
69-
"hcov_pct_positive",
70-
"year"
63+
"hpiv_positive_tests",
64+
"hpiv_tests",
65+
"hpivother_positive_tests",
66+
"rsv_pct_positive",
67+
"rsv_positive_tests",
68+
"rsv_tests",
69+
"sarscov2_pct_positive",
70+
"sarscov2_positive_tests",
71+
"sarscov2_tests"
7172
)
7273

74+
7375
def get_num_rows(cursor):
7476
cursor.execute("SELECT count(1) `num` FROM `rvdss`")
7577
for (num,) in cursor:
@@ -79,8 +81,13 @@ def get_num_rows(cursor):
7981
def update(data, logger):
8082
# connect to the database
8183
u, p = secrets.db.epi
82-
cnx = mysql.connector.connect(user=u, password=p, database="epidata")
84+
cnx = mysql.connector.connect(user=u,
85+
password=p,
86+
host = secrets.db.host,
87+
database="epidata")
8388
cur = cnx.cursor()
89+
90+
pdb.set_trace()
8491

8592
rvdss_cols_subset = [col for col in data.columns if col in rvdss_cols]
8693
data = data.to_dict(orient = "records")

src/acquisition/rvdss/utils.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -262,7 +262,10 @@ def get_detections_data(base_url,headers,update_date):
262262

263263
return(df_detections.set_index(['epiweek', 'time_value', 'issue', 'geo_type', 'geo_value'],verify_integrity=True))
264264

265-
def expand_detections_columns(new_data):
265+
def expand_detections_columns(data):
266+
new_data = data.copy(deep=True)
267+
new_data = new_data.reset_index()
268+
266269
# add extra columns - percent positivities
267270
if "adv_positive_tests" in new_data.columns and "adv_tests" in new_data.columns:
268271
new_data["adv_pct_positive"] = np.divide(new_data["adv_positive_tests"], new_data["adv_tests"],

0 commit comments

Comments
 (0)