Skip to content

Commit e6715c0

Browse files
authored
Merge pull request #388 from chinandrew/sql-columns
Add sql column names to csv column mapping
2 parents 4068ddf + cdd97a1 commit e6715c0

File tree

11 files changed

+378
-243
lines changed

11 files changed

+378
-243
lines changed

src/acquisition/covid_hosp/common/database.py

Lines changed: 16 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@ def __init__(self,
1717
connection,
1818
table_name=None,
1919
columns_and_types=None,
20-
additional_fields=tuple()):
20+
additional_fields=None):
2121
"""Create a new Database object.
2222
2323
Parameters
@@ -26,20 +26,20 @@ def __init__(self,
2626
An open connection to a database.
2727
table_name : str
2828
The name of the table which holds the dataset.
29-
columns_and_types : list[tuple[str, Callable[str, ...]]]
30-
List of CSV columns in order of appearance in the database. The first
31-
element of each tuple is the CSV column name, and the second element is a
32-
function which converts a string into the appropriate datatype for the
33-
column.
29+
columns_and_types : tuple[str, str, Callable]
30+
List of 3-tuples of (CSV header name, SQL column name, data type) for
31+
all the columns in the CSV file.
3432
additional_fields : tuple[str]
35-
Tuple of additional fields to include at the end of the row which are not
36-
present in the CSV data.
33+
List of 2-tuples of (value, SQL column name) fordditional fields to include
34+
at the end of the row which are not present in the CSV data.
3735
"""
3836

3937
self.connection = connection
4038
self.table_name = table_name
39+
self.publication_col_name = "issue" if table_name == 'covid_hosp_state_timeseries' else \
40+
'publication_date'
4141
self.columns_and_types = columns_and_types
42-
self.additional_fields = additional_fields
42+
self.additional_fields = additional_fields if additional_fields is not None else []
4343

4444
@classmethod
4545
@contextmanager
@@ -152,16 +152,19 @@ def insert_dataset(self, publication_date, dataframe):
152152

153153
num_columns = 2 + len(self.columns_and_types) + len(self.additional_fields)
154154
value_placeholders = ', '.join(['%s'] * num_columns)
155-
sql = f'INSERT INTO `{self.table_name}` VALUES ({value_placeholders})'
156-
155+
columns = ', '.join(f'`{i[1]}`' for i in self.columns_and_types + self.additional_fields)
156+
sql = f'INSERT INTO `{self.table_name}` (`id`, `{self.publication_col_name}`, {columns}) ' \
157+
f'VALUES ({value_placeholders})'
157158
id_and_publication_date = (0, publication_date)
158159
with self.new_cursor() as cursor:
159160
for _, row in dataframe.iterrows():
160161
values = []
161-
for name, dtype in self.columns_and_types:
162+
for name, _, dtype in self.columns_and_types:
162163
if isinstance(row[name], float) and math.isnan(row[name]):
163164
values.append(None)
164165
else:
165166
values.append(dtype(row[name]))
166167
cursor.execute(sql,
167-
id_and_publication_date + tuple(values) + self.additional_fields)
168+
id_and_publication_date +
169+
tuple(values) +
170+
tuple(i[0] for i in self.additional_fields))

src/acquisition/covid_hosp/facility/database.py

Lines changed: 155 additions & 96 deletions
Large diffs are not rendered by default.

src/acquisition/covid_hosp/state_daily/database.py

Lines changed: 92 additions & 64 deletions
Original file line numberDiff line numberDiff line change
@@ -8,75 +8,103 @@ class Database(BaseDatabase):
88
# note we share a database with state_timeseries
99
TABLE_NAME = 'covid_hosp_state_timeseries'
1010

11-
# These are the names that appear in the CSV header, in order of appearance
12-
# in the database table, along with corresponding data type converters.
13-
# However, note that the corresponding database column names may be shorter
11+
# These are 3-tuples of (CSV header name, SQL db column name, data type) for
12+
# all the columns in the CSV file.
13+
# Note that the corresponding database column names may be shorter
1414
# due to constraints on the length of column names. See
1515
# /src/ddl/covid_hosp.sql for more information.
1616
# Additionally, all column names below are shared with state_timeseries,
1717
# except for reporting_cutoff_start (here) and date (there). If you need
1818
# to update a column name, do it in both places.
1919
ORDERED_CSV_COLUMNS = [
20-
('state', str),
21-
('reporting_cutoff_start', Utils.int_from_date),
22-
('critical_staffing_shortage_today_yes', int),
23-
('critical_staffing_shortage_today_no', int),
24-
('critical_staffing_shortage_today_not_reported', int),
25-
('critical_staffing_shortage_anticipated_within_week_yes', int),
26-
('critical_staffing_shortage_anticipated_within_week_no', int),
27-
('critical_staffing_shortage_anticipated_within_week_not_reported', int),
28-
('hospital_onset_covid', int),
29-
('hospital_onset_covid_coverage', int),
30-
('inpatient_beds', int),
31-
('inpatient_beds_coverage', int),
32-
('inpatient_beds_used', int),
33-
('inpatient_beds_used_coverage', int),
34-
('inpatient_beds_used_covid', int),
35-
('inpatient_beds_used_covid_coverage', int),
36-
('previous_day_admission_adult_covid_confirmed', int),
37-
('previous_day_admission_adult_covid_confirmed_coverage', int),
38-
('previous_day_admission_adult_covid_suspected', int),
39-
('previous_day_admission_adult_covid_suspected_coverage', int),
40-
('previous_day_admission_pediatric_covid_confirmed', int),
41-
('previous_day_admission_pediatric_covid_confirmed_coverage', int),
42-
('previous_day_admission_pediatric_covid_suspected', int),
43-
('previous_day_admission_pediatric_covid_suspected_coverage', int),
44-
('staffed_adult_icu_bed_occupancy', int),
45-
('staffed_adult_icu_bed_occupancy_coverage', int),
46-
('staffed_icu_adult_patients_confirmed_and_suspected_covid', int),
47-
('staffed_icu_adult_patients_confirmed_and_suspected_covid_coverage', int),
48-
('staffed_icu_adult_patients_confirmed_covid', int),
49-
('staffed_icu_adult_patients_confirmed_covid_coverage', int),
50-
('total_adult_patients_hospitalized_confirmed_and_suspected_covid', int),
51-
('total_adult_patients_hospitalized_confirmed_and_suspected_covid_coverage', int),
52-
('total_adult_patients_hospitalized_confirmed_covid', int),
53-
('total_adult_patients_hospitalized_confirmed_covid_coverage', int),
54-
('total_pediatric_patients_hospitalized_confirmed_and_suspected_covid', int),
55-
('total_pediatric_patients_hospitalized_confirmed_and_suspected_covid_coverage', int),
56-
('total_pediatric_patients_hospitalized_confirmed_covid', int),
57-
('total_pediatric_patients_hospitalized_confirmed_covid_coverage', int),
58-
('total_staffed_adult_icu_beds', int),
59-
('total_staffed_adult_icu_beds_coverage', int),
60-
('inpatient_beds_utilization', float),
61-
('inpatient_beds_utilization_coverage', int),
62-
('inpatient_beds_utilization_numerator', int),
63-
('inpatient_beds_utilization_denominator', int),
64-
('percent_of_inpatients_with_covid', float),
65-
('percent_of_inpatients_with_covid_coverage', int),
66-
('percent_of_inpatients_with_covid_numerator', int),
67-
('percent_of_inpatients_with_covid_denominator', int),
68-
('inpatient_bed_covid_utilization', float),
69-
('inpatient_bed_covid_utilization_coverage', int),
70-
('inpatient_bed_covid_utilization_numerator', int),
71-
('inpatient_bed_covid_utilization_denominator', int),
72-
('adult_icu_bed_covid_utilization', float),
73-
('adult_icu_bed_covid_utilization_coverage', int),
74-
('adult_icu_bed_covid_utilization_numerator', int),
75-
('adult_icu_bed_covid_utilization_denominator', int),
76-
('adult_icu_bed_utilization', float),
77-
('adult_icu_bed_utilization_coverage', int),
78-
('adult_icu_bed_utilization_numerator', int),
79-
('adult_icu_bed_utilization_denominator', int),
20+
('state', 'state', str),
21+
('reporting_cutoff_start', 'reporting_cutoff_start', Utils.int_from_date),
22+
('critical_staffing_shortage_today_yes', 'critical_staffing_shortage_today_yes', int),
23+
('critical_staffing_shortage_today_no', 'critical_staffing_shortage_today_no', int),
24+
('critical_staffing_shortage_today_not_reported',
25+
'critical_staffing_shortage_today_not_reported', int),
26+
('critical_staffing_shortage_anticipated_within_week_yes',
27+
'critical_staffing_shortage_anticipated_within_week_yes', int),
28+
('critical_staffing_shortage_anticipated_within_week_no',
29+
'critical_staffing_shortage_anticipated_within_week_no', int),
30+
('critical_staffing_shortage_anticipated_within_week_not_reported',
31+
'critical_staffing_shortage_anticipated_within_week_not_reported', int),
32+
('hospital_onset_covid', 'hospital_onset_covid', int),
33+
('hospital_onset_covid_coverage', 'hospital_onset_covid_coverage', int),
34+
('inpatient_beds', 'inpatient_beds', int),
35+
('inpatient_beds_coverage', 'inpatient_beds_coverage', int),
36+
('inpatient_beds_used', 'inpatient_beds_used', int),
37+
('inpatient_beds_used_coverage', 'inpatient_beds_used_coverage', int),
38+
('inpatient_beds_used_covid', 'inpatient_beds_used_covid', int),
39+
('inpatient_beds_used_covid_coverage', 'inpatient_beds_used_covid_coverage', int),
40+
('previous_day_admission_adult_covid_confirmed', 'previous_day_admission_adult_covid_confirmed',
41+
int),
42+
('previous_day_admission_adult_covid_confirmed_coverage',
43+
'previous_day_admission_adult_covid_confirmed_coverage', int),
44+
('previous_day_admission_adult_covid_suspected', 'previous_day_admission_adult_covid_suspected',
45+
int),
46+
('previous_day_admission_adult_covid_suspected_coverage',
47+
'previous_day_admission_adult_covid_suspected_coverage', int),
48+
('previous_day_admission_pediatric_covid_confirmed',
49+
'previous_day_admission_pediatric_covid_confirmed', int),
50+
('previous_day_admission_pediatric_covid_confirmed_coverage',
51+
'previous_day_admission_pediatric_covid_confirmed_coverage', int),
52+
('previous_day_admission_pediatric_covid_suspected',
53+
'previous_day_admission_pediatric_covid_suspected', int),
54+
('previous_day_admission_pediatric_covid_suspected_coverage',
55+
'previous_day_admission_pediatric_covid_suspected_coverage', int),
56+
('staffed_adult_icu_bed_occupancy', 'staffed_adult_icu_bed_occupancy', int),
57+
('staffed_adult_icu_bed_occupancy_coverage', 'staffed_adult_icu_bed_occupancy_coverage', int),
58+
('staffed_icu_adult_patients_confirmed_and_suspected_covid',
59+
'staffed_icu_adult_patients_confirmed_suspected_covid', int),
60+
('staffed_icu_adult_patients_confirmed_and_suspected_covid_coverage',
61+
'staffed_icu_adult_patients_confirmed_suspected_covid_coverage', int),
62+
('staffed_icu_adult_patients_confirmed_covid', 'staffed_icu_adult_patients_confirmed_covid',
63+
int),
64+
('staffed_icu_adult_patients_confirmed_covid_coverage',
65+
'staffed_icu_adult_patients_confirmed_covid_coverage', int),
66+
('total_adult_patients_hospitalized_confirmed_and_suspected_covid',
67+
'total_adult_patients_hosp_confirmed_suspected_covid', int),
68+
('total_adult_patients_hospitalized_confirmed_and_suspected_covid_coverage',
69+
'total_adult_patients_hosp_confirmed_suspected_covid_coverage', int),
70+
('total_adult_patients_hospitalized_confirmed_covid',
71+
'total_adult_patients_hosp_confirmed_covid', int),
72+
('total_adult_patients_hospitalized_confirmed_covid_coverage',
73+
'total_adult_patients_hosp_confirmed_covid_coverage', int),
74+
('total_pediatric_patients_hospitalized_confirmed_and_suspected_covid',
75+
'total_pediatric_patients_hosp_confirmed_suspected_covid', int),
76+
('total_pediatric_patients_hospitalized_confirmed_and_suspected_covid_coverage',
77+
'total_pediatric_patients_hosp_confirmed_suspected_covid_coverage', int),
78+
('total_pediatric_patients_hospitalized_confirmed_covid',
79+
'total_pediatric_patients_hosp_confirmed_covid', int),
80+
('total_pediatric_patients_hospitalized_confirmed_covid_coverage',
81+
'total_pediatric_patients_hosp_confirmed_covid_coverage', int),
82+
('total_staffed_adult_icu_beds', 'total_staffed_adult_icu_beds', int),
83+
('total_staffed_adult_icu_beds_coverage', 'total_staffed_adult_icu_beds_coverage', int),
84+
('inpatient_beds_utilization', 'inpatient_beds_utilization', float),
85+
('inpatient_beds_utilization_coverage', 'inpatient_beds_utilization_coverage', int),
86+
('inpatient_beds_utilization_numerator', 'inpatient_beds_utilization_numerator', int),
87+
('inpatient_beds_utilization_denominator', 'inpatient_beds_utilization_denominator', int),
88+
('percent_of_inpatients_with_covid', 'percent_of_inpatients_with_covid', float),
89+
('percent_of_inpatients_with_covid_coverage', 'percent_of_inpatients_with_covid_coverage', int),
90+
('percent_of_inpatients_with_covid_numerator', 'percent_of_inpatients_with_covid_numerator',
91+
int),
92+
('percent_of_inpatients_with_covid_denominator', 'percent_of_inpatients_with_covid_denominator',
93+
int),
94+
('inpatient_bed_covid_utilization', 'inpatient_bed_covid_utilization', float),
95+
('inpatient_bed_covid_utilization_coverage', 'inpatient_bed_covid_utilization_coverage', int),
96+
('inpatient_bed_covid_utilization_numerator', 'inpatient_bed_covid_utilization_numerator', int),
97+
('inpatient_bed_covid_utilization_denominator', 'inpatient_bed_covid_utilization_denominator',
98+
int),
99+
('adult_icu_bed_covid_utilization', 'adult_icu_bed_covid_utilization', float),
100+
('adult_icu_bed_covid_utilization_coverage', 'adult_icu_bed_covid_utilization_coverage', int),
101+
('adult_icu_bed_covid_utilization_numerator', 'adult_icu_bed_covid_utilization_numerator', int),
102+
('adult_icu_bed_covid_utilization_denominator', 'adult_icu_bed_covid_utilization_denominator',
103+
int),
104+
('adult_icu_bed_utilization', 'adult_icu_bed_utilization', float),
105+
('adult_icu_bed_utilization_coverage', 'adult_icu_bed_utilization_coverage', int),
106+
('adult_icu_bed_utilization_numerator', 'adult_icu_bed_utilization_numerator', int),
107+
('adult_icu_bed_utilization_denominator', 'adult_icu_bed_utilization_denominator', int),
80108
]
81109

82110
def __init__(self, *args, **kwargs):
@@ -85,4 +113,4 @@ def __init__(self, *args, **kwargs):
85113
**kwargs,
86114
table_name=Database.TABLE_NAME,
87115
columns_and_types=Database.ORDERED_CSV_COLUMNS,
88-
additional_fields=('D',))
116+
additional_fields=[('D', 'record_type')])

0 commit comments

Comments
 (0)