1111from delphi_utils import get_structured_logger
1212
1313# third party
14- import mysql .connector
14+ import mysql .connector
15+ from mysql .connector .errors import IntegrityError
1516import pandas as pd
17+ import numpy as np
1618from pathlib import Path
19+ import pdb
1720
1821# py3tester coverage target (equivalent to `import *`)
1922# __test_target__ = 'delphi.epidata.acquisition.covid_hosp.facility.update'
@@ -68,8 +71,47 @@ def test_rvdss_repiratory_detections(self, mock_sql):
6871 TEST_DIR = Path (__file__ ).parent .parent .parent .parent
6972 detection_data = pd .read_csv (str (TEST_DIR ) + "/testdata/acquisition/rvdss/RVD_CurrentWeekTable_Formatted.csv" )
7073 detection_data ['time_type' ] = "week"
71- detection_subset = detection_data [(detection_data ['geo_value' ].isin (['nl' , 'nb' ])) & (detection_data ['time_value' ].isin ([20240831 , 20240907 ])) ]
74+ detection_data = detection_data .replace ({np .nan : None })
75+ #detection_data=detection_data.replace({float('nan'): None})
7276
77+ pdb .set_trace ()
78+ # take a small subset just for testing insertion
79+ detection_subset = detection_data [(detection_data ['geo_value' ].isin (['nl' , 'nb' ])) & (detection_data ['time_value' ].isin ([20240831 , 20240907 ])) ]
80+
81+ # get the expected response when calling the API
82+ # the dataframe needs to add the missing columns and replace nan with None
83+ # since that is what is returned from the API
84+ df = detection_subset .reindex (rvdss_cols ,axis = 1 )
85+ df = df .replace ({np .nan : None }).sort_values (by = ["epiweek" ,"geo_value" ])
86+ df = df .to_dict (orient = "records" )
87+
88+ expected_response = {"epidata" : df ,
89+ "result" : 1 ,
90+ "message" : "success" ,
91+ }
92+
93+ # get the rest of the data not in the subset to test more calling options
94+ detection_subset2 = detection_data [(detection_data ['geo_value' ].isin (['nu' , 'nt' ])) & (detection_data ['time_value' ].isin ([20240831 , 20240907 ])) ]
95+
96+ df2 = detection_subset2 .reindex (rvdss_cols ,axis = 1 )
97+ df2 = df2 .replace ({np .nan : None }).sort_values (by = ["epiweek" ,"geo_value" ])
98+ df2 = df2 .to_dict (orient = "records" )
99+
100+ expected_response2 = {"epidata" : df2 ,
101+ "result" : 1 ,
102+ "message" : "success" ,
103+ }
104+
105+ # after two aquisitions
106+ df_full = pd .concat ([detection_subset , detection_subset2 ], ignore_index = True ).reindex (rvdss_cols ,axis = 1 )
107+ df_full = df_full .replace ({np .nan : None }).sort_values (by = ["epiweek" ,"geo_value" ])
108+ df_full = df_full .to_dict (orient = "records" )
109+
110+ expected_response_full = {"epidata" : df_full ,
111+ "result" : 1 ,
112+ "message" : "success" ,
113+ }
114+
73115 # make sure the data does not yet exist
74116 with self .subTest (name = 'no data yet' ):
75117 response = Epidata .rvdss (geo_type = 'province' ,
@@ -92,47 +134,54 @@ def test_rvdss_repiratory_detections(self, mock_sql):
92134 response = Epidata .rvdss (geo_type = 'province' ,
93135 time_values = [202435 , 202436 ],
94136 geo_value = ['nl' ,'nb' ])
137+
138+ self .assertEqual (response ,expected_response )
139+
140+ with self .subTest (name = 'duplicate aquisition' ):
141+ # The main run function checks if the update has already been fetched/updated
142+ # so it should never run twice, and duplocate aquisitions should never
143+ # occur. Running the update twice will result in an error
144+
145+ # When the MagicMock connection's `cursor()` method is called, return
146+ # a real cursor made from the current open connection `cnx`.
147+ connection_mock .cursor .return_value = self .cnx .cursor ()
148+ # Commit via the current open connection `cnx`, from which the cursor
149+ # is derived
150+ connection_mock .commit = self .cnx .commit
151+ mock_sql .return_value = connection_mock
152+
153+ with self .assertRaises (mysql .connector .errors .IntegrityError ):
154+ update (detection_subset , self .logger )
155+
156+ # TODO: test with exact column order
157+ with self .subTest (name = 'exact column order' ):
158+ rvdss_cols_subset = [col for col in detection_subset2 .columns if col in rvdss_cols ]
159+ ordered_cols = [col for col in rvdss_cols if col in rvdss_cols_subset ]
160+ ordered_df = detection_subset2 [ordered_cols ]
161+
162+ connection_mock .cursor .return_value = self .cnx .cursor ()
163+ connection_mock .commit = self .cnx .commit
164+ mock_sql .return_value = connection_mock
165+
166+ pdb .set_trace ()
167+ update (ordered_df , self .logger )
168+ pdb .set_trace ()
169+
170+ response = Epidata .rvdss (geo_type = 'province' ,
171+ time_values = [202435 , 202436 ],
172+ geo_value = ['nt' ,'nu' ])
173+
174+ self .assertEqual (response ,expected_response2 )
175+
176+
177+ # TODO: check requesting by issue
178+ # with self.subTest(name='issue request'):
179+ # response = Epidata.rvdss(geo_type='province',
180+ # time_values= [202435, 202436],
181+ # geo_value = ['nl','nb'],
182+ # issues = [])
183+
184+
185+ # # TODO: check requesting individual lists
186+ # with self.subTest(name='duplicate aquisition'):
95187
96- self .assertEqual (response ['result' ], 1 )
97-
98-
99- # # make sure the data now exists
100- # with self.subTest(name='initial data checks'):
101- # expected_spotchecks = {
102- # "hospital_pk": "450822",
103- # "collection_week": 20201030,
104- # "publication_date": 20210315,
105- # "previous_day_total_ed_visits_7_day_sum": 536,
106- # "total_personnel_covid_vaccinated_doses_all_7_day_sum": 18,
107- # "total_beds_7_day_avg": 69.3,
108- # "previous_day_admission_influenza_confirmed_7_day_sum": -999999
109- # }
110- # response = Epidata.covid_hosp_facility(
111- # '450822', Epidata.range(20200101, 20210101))
112- # self.assertEqual(response['result'], 1)
113- # self.assertEqual(len(response['epidata']), 2)
114- # row = response['epidata'][0]
115- # for k,v in expected_spotchecks.items():
116- # self.assertTrue(
117- # k in row,
118- # f"no '{k}' in row:\n{NEWLINE.join(sorted(row.keys()))}"
119- # )
120- # if isinstance(v, float):
121- # self.assertAlmostEqual(row[k], v, f"row[{k}] is {row[k]} not {v}")
122- # else:
123- # self.assertEqual(row[k], v, f"row[{k}] is {row[k]} not {v}")
124-
125- # # expect 113 fields per row (114 database columns, except `id`)
126- # self.assertEqual(len(row), 113)
127-
128- # # re-acquisition of the same dataset should be a no-op
129- # with self.subTest(name='second acquisition'):
130- # acquired = Update.run(network=mock_network)
131- # self.assertFalse(acquired)
132-
133- # # make sure the data still exists
134- # with self.subTest(name='final data checks'):
135- # response = Epidata.covid_hosp_facility(
136- # '450822', Epidata.range(20200101, 20210101))
137- # self.assertEqual(response['result'], 1)
138- # self.assertEqual(len(response['epidata']), 2)
0 commit comments