Skip to content

Commit 70f4b43

Browse files
committed
Change CHNG and claims_hosp to use timestamp as the date_col
1 parent af6c0c2 commit 70f4b43

File tree

9 files changed

+34
-32
lines changed

9 files changed

+34
-32
lines changed

changehc/delphi_changehc/config.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,7 @@ class Config:
2929
FLU_LIKE_COL = "Flu-like"
3030
COVID_LIKE_COL = "Covid-like"
3131
COUNT_COLS = [COVID_COL,DENOM_COL,FLU_COL,MIXED_COL,FLU_LIKE_COL,COVID_LIKE_COL]
32-
DATE_COL = "date"
32+
DATE_COL = "timestamp"
3333
GEO_COL = "fips"
3434
ID_COLS = [DATE_COL] + [GEO_COL]
3535
FILT_COLS = ID_COLS + COUNT_COLS

changehc/tests/test_load_data.py

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -45,7 +45,7 @@ def test_base_unit(self):
4545

4646
def test_denom_columns(self):
4747
assert "fips" in self.denom_data.index.names
48-
assert "date" in self.denom_data.index.names
48+
assert "timestamp" in self.denom_data.index.names
4949

5050
expected_denom_columns = ["Denominator"]
5151
for col in expected_denom_columns:
@@ -54,7 +54,7 @@ def test_denom_columns(self):
5454

5555
def test_claims_columns(self):
5656
assert "fips" in self.covid_data.index.names
57-
assert "date" in self.covid_data.index.names
57+
assert "timestamp" in self.covid_data.index.names
5858

5959
expected_covid_columns = ["COVID"]
6060
for col in expected_covid_columns:
@@ -63,7 +63,7 @@ def test_claims_columns(self):
6363

6464
def test_combined_columns(self):
6565
assert "fips" in self.combined_data.index.names
66-
assert "date" in self.combined_data.index.names
66+
assert "timestamp" in self.combined_data.index.names
6767

6868
expected_combined_columns = ["num", "den"]
6969
for col in expected_combined_columns:
@@ -75,16 +75,16 @@ def test_edge_values(self):
7575
for data in [self.denom_data,
7676
self.covid_data,
7777
self.combined_data]:
78-
assert data.index.get_level_values('date').max() >= Config.FIRST_DATA_DATE
79-
assert data.index.get_level_values('date').min() < DROP_DATE
78+
assert data.index.get_level_values("timestamp").max() >= Config.FIRST_DATA_DATE
79+
assert data.index.get_level_values("timestamp").min() < DROP_DATE
8080

8181
def test_fips_values(self):
8282
for data in [self.denom_data,
8383
self.covid_data,
8484
self.combined_data]:
8585
assert (
8686
len(data.index.get_level_values(
87-
'fips').unique()) <= len(self.gmpr.get_geo_values("fips"))
87+
"fips").unique()) <= len(self.gmpr.get_geo_values("fips"))
8888
)
8989

9090
def test_combined_fips_values(self):

changehc/tests/test_update_sensor.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -40,7 +40,7 @@ class TestCHCSensorUpdator:
4040
"num": [0, 100, 200, 300, 400, 500, 600, 100, 200, 300, 400, 500, 600],
4141
"fips": ['01001'] * 7 + ['04007'] * 6,
4242
"den": [1000] * 7 + [2000] * 6,
43-
"date": [pd.Timestamp(f'03-{i}-2020') for i in range(1, 14)]}).set_index(["fips","date"])
43+
"timestamp": [pd.Timestamp(f'03-{i}-2020') for i in range(1, 14)]}).set_index(["fips","timestamp"])
4444

4545
def test_shift_dates(self):
4646
"""Tests that dates in the data are shifted according to the burn-in and lag."""
@@ -84,7 +84,7 @@ def test_geo_reindex(self):
8484
"num": [0, 100, 200, 300, 400, 500, 600, 100, 200, 300, 400, 500, 600],
8585
"fips": ['01001'] * 7 + ['04007'] * 6,
8686
"den": [1000] * 7 + [2000] * 6,
87-
"date": [pd.Timestamp(f'03-{i}-2020') for i in range(1, 14)]})
87+
"timestamp": [pd.Timestamp(f'03-{i}-2020') for i in range(1, 14)]})
8888
data_frame = su_inst.geo_reindex(test_data)
8989
assert data_frame.shape[0] == multiple*len(su_inst.fit_dates)
9090
assert (data_frame.sum() == (4200,19000)).all()
@@ -113,8 +113,8 @@ def test_update_sensor(self):
113113
"num": [0, 100, 200, 300, 400, 500, 600, 100, 200, 300, 400, 500, 600] * 2,
114114
"fips": ["01001"] * 13 + ["42003"] * 13,
115115
"den": [30, 50, 50, 10, 1, 5, 5, 50, 50, 50, 0, 0, 0] * 2,
116-
"date": list(pd.date_range("20200301", "20200313")) * 2
117-
}).set_index(["fips", "date"])
116+
"timestamp": list(pd.date_range("20200301", "20200313")) * 2
117+
}).set_index(["fips", "timestamp"])
118118
su_inst.update_sensor(small_test_data, td.name)
119119
for f in os.listdir(td.name):
120120
outputs[f] = pd.read_csv(os.path.join(td.name, f))

claims_hosp/delphi_claims_hosp/config.py

Lines changed: 8 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -32,8 +32,13 @@ class Config:
3232
# data columns
3333
CLAIMS_COUNT_COLS = ["Denominator", "Covid_like"]
3434
CLAIMS_DATE_COL = "ServiceDate"
35-
CLAIMS_RENAME_COLS = {"Pat HRR ID": "hrr", "ServiceDate": "date",
36-
"PatCountyFIPS": "fips", "PatAgeGroup": "age_group"}
35+
FIPS_COL = "fips"
36+
DATE_COL = "timestamp"
37+
AGE_COL = "age_group"
38+
HRR_COL = "hrr"
39+
40+
CLAIMS_RENAME_COLS = {"Pat HRR ID": HRR_COL, "ServiceDate": DATE_COL,
41+
"PatCountyFIPS": FIPS_COL, "PatAgeGroup": AGE_COL}
3742
CLAIMS_DTYPES = {
3843
"ServiceDate": str,
3944
"PatCountyFIPS": str,
@@ -43,10 +48,7 @@ class Config:
4348
"Pat HRR ID": str,
4449
}
4550

46-
FIPS_COL = "fips"
47-
DATE_COL = "date"
48-
AGE_COL = "age_group"
49-
HRR_COL = "hrr"
51+
5052

5153
SMOOTHER_BANDWIDTH = 100 # bandwidth for the linear left Gaussian filter
5254
MIN_DEN = 100 # number of total visits needed to produce a sensor

claims_hosp/delphi_claims_hosp/load_data.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -48,7 +48,7 @@ def load_claims_data(claims_filepath, dropdate, base_geo):
4848
), "Claims counts must be nonnegative"
4949

5050
# aggregate age groups (so data is unique by date and base geography)
51-
claims_data = claims_data.groupby([base_geo, "date"]).sum()
51+
claims_data = claims_data.groupby([base_geo, Config.DATE_COL]).sum()
5252
claims_data.dropna(inplace=True) # drop rows with any missing entries
5353

5454
return claims_data

claims_hosp/delphi_claims_hosp/update_indicator.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -120,11 +120,11 @@ def geo_reindex(self, data):
120120
return False
121121

122122
unique_geo_ids = pd.unique(data_frame[self.geo])
123-
data_frame.set_index([self.geo, 'date'], inplace=True)
123+
data_frame.set_index([self.geo, "timestamp"], inplace=True)
124124

125125
# for each location, fill in all missing dates with 0 values
126126
multiindex = pd.MultiIndex.from_product((unique_geo_ids, self.fit_dates),
127-
names=[self.geo, "date"])
127+
names=[self.geo, Config.DATE_COL])
128128
assert (
129129
len(multiindex) <= (GeoConstants.MAX_GEO[self.geo] * len(self.fit_dates))
130130
), "more loc-date pairs than maximum number of geographies x number of dates"

claims_hosp/tests/test_indicator.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -56,7 +56,7 @@ def test_backwards_pad(self):
5656
def test_fit_fips(self):
5757
date_range = pd.date_range("2020-05-01", "2020-05-20")
5858
all_fips = self.fips_data.fips.unique()
59-
loc_index_fips_data = self.fips_data.set_index(["fips", "date"])
59+
loc_index_fips_data = self.fips_data.set_index(["fips", "timestamp"])
6060
sample_fips = nr.choice(all_fips, 10)
6161

6262
for fips in sample_fips:
@@ -79,7 +79,7 @@ def test_fit_fips(self):
7979
def test_fit_hrrs(self):
8080
date_range = pd.date_range("2020-05-01", "2020-05-20")
8181
all_hrrs = self.hrr_data.hrr.unique()
82-
loc_index_hrr_data = self.hrr_data.set_index(["hrr", "date"])
82+
loc_index_hrr_data = self.hrr_data.set_index(["hrr", "timestamp"])
8383
sample_hrrs = nr.choice(all_hrrs, 10)
8484

8585
for hrr in sample_hrrs:

claims_hosp/tests/test_load_data.py

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -34,8 +34,8 @@ def test_base_unit(self):
3434
def test_claims_columns(self):
3535
assert "hrr" in self.hrr_claims_data.index.names
3636
assert "fips" in self.fips_claims_data.index.names
37-
assert "date" in self.hrr_claims_data.index.names
38-
assert "date" in self.fips_claims_data.index.names
37+
assert "timestamp" in self.hrr_claims_data.index.names
38+
assert "timestamp" in self.fips_claims_data.index.names
3939

4040
expected_claims_columns = ["Denominator", "Covid_like"]
4141
for col in expected_claims_columns:
@@ -47,8 +47,8 @@ def test_claims_columns(self):
4747
def test_data_columns(self):
4848
assert "hrr" in self.hrr_data.columns
4949
assert "fips" in self.fips_data.columns
50-
assert "date" in self.hrr_data.columns
51-
assert "date" in self.fips_data.columns
50+
assert "timestamp" in self.hrr_data.columns
51+
assert "timestamp" in self.fips_data.columns
5252

5353
expected_columns = ["num", "den"]
5454
for col in expected_columns:
@@ -57,12 +57,12 @@ def test_data_columns(self):
5757

5858
def test_edge_values(self):
5959
for data in [self.hrr_claims_data, self.fips_claims_data]:
60-
assert data.index.get_level_values('date').max() >= Config.FIRST_DATA_DATE
61-
assert data.index.get_level_values('date').min() < DROP_DATE
60+
assert data.index.get_level_values("timestamp").max() >= Config.FIRST_DATA_DATE
61+
assert data.index.get_level_values("timestamp").min() < DROP_DATE
6262

6363
for data in [self.hrr_data, self.fips_data]:
64-
assert data.date.max() >= Config.FIRST_DATA_DATE
65-
assert data.date.min() < DROP_DATE
64+
assert data["timestamp"].max() >= Config.FIRST_DATA_DATE
65+
assert data["timestamp"].min() < DROP_DATE
6666

6767
def test_hrrs_values(self):
6868
assert len(self.hrr_data.hrr.unique()) <= CONSTANTS.NUM_HRRS

claims_hosp/tests/test_update_indicator.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -37,8 +37,8 @@ class TestClaimsHospIndicatorUpdater:
3737
"num": [0, 100, 200, 300, 400, 500, 600, 100, 200, 300, 400, 500, 600],
3838
"hrr": [1.0] * 7 + [2.0] * 6,
3939
"den": [1000] * 7 + [2000] * 6,
40-
"date": [pd.Timestamp(f'03-{i}-2020') for i in range(1, 14)]}).set_index(
41-
["hrr", "date"])
40+
"timestamp": [pd.Timestamp(f'03-{i}-2020') for i in range(1, 14)]}).set_index(
41+
["hrr", "timestamp"])
4242

4343
def test_shift_dates(self):
4444
updater = ClaimsHospIndicatorUpdater(

0 commit comments

Comments
 (0)