|
| 1 | +import logging |
1 | 2 | import os |
2 | 3 | import glob |
3 | 4 | from datetime import datetime |
| 5 | +from pathlib import Path |
| 6 | +import shutil |
4 | 7 |
|
5 | 8 | # third party |
6 | 9 | import pandas as pd |
7 | 10 | import pytest |
8 | 11 |
|
9 | 12 | # first party |
10 | 13 | from delphi_claims_hosp.config import Config, GeoConstants |
11 | | -from delphi_claims_hosp.backfill import store_backfill_file, merge_backfill_file |
| 14 | +from delphi_claims_hosp.backfill import store_backfill_file, merge_backfill_file, merge_existing_backfill_files |
12 | 15 |
|
13 | 16 | CONFIG = Config() |
14 | 17 | CONSTANTS = GeoConstants() |
| 18 | +TEST_PATH = Path(__file__).parent |
15 | 19 | PARAMS = { |
16 | 20 | "indicator": { |
17 | | - "input_file": "test_data/SYNEDI_AGG_INPATIENT_11062020_1451CDT.csv.gz", |
18 | | - "backfill_dir": "./backfill", |
| 21 | + "input_file": f"{TEST_PATH}/test_data/SYNEDI_AGG_INPATIENT_11062020_1451CDT.csv.gz", |
| 22 | + "backfill_dir": f"{TEST_PATH}/backfill", |
19 | 23 | "drop_date": "2020-06-11", |
20 | 24 | } |
21 | 25 | } |
22 | 26 | DATA_FILEPATH = PARAMS["indicator"]["input_file"] |
23 | 27 | DROP_DATE = pd.to_datetime(PARAMS["indicator"]["drop_date"]) |
24 | 28 | backfill_dir = PARAMS["indicator"]["backfill_dir"] |
| 29 | +TEST_LOGGER = logging.getLogger() |
25 | 30 |
|
26 | 31 | class TestBackfill: |
27 | 32 |
|
@@ -95,3 +100,67 @@ def test_merge_backfill_file(self): |
95 | 100 |
|
96 | 101 | os.remove(backfill_dir + "/" + fn) |
97 | 102 | assert fn not in os.listdir(backfill_dir) |
| 103 | + |
| 104 | + def test_merge_existing_backfill_files(self): |
| 105 | + issue_date = datetime(year=2020, month=6, day=13) |
| 106 | + issue_date_str = issue_date.strftime("%Y%m%d") |
| 107 | + def prep_backfill_data(): |
| 108 | + # Generate backfill daily files |
| 109 | + for d in range(11, 15): |
| 110 | + dropdate = datetime(2020, 6, d) |
| 111 | + store_backfill_file(DATA_FILEPATH, dropdate, backfill_dir) |
| 112 | + |
| 113 | + today = datetime(2020, 6, 14) |
| 114 | + # creating expected file |
| 115 | + merge_backfill_file(backfill_dir, today.weekday(), today, |
| 116 | + test_mode=True, check_nd=2) |
| 117 | + original = f"{backfill_dir}/claims_hosp_from_20200611_to_20200614.parquet" |
| 118 | + os.rename(original, f"{backfill_dir}/expected.parquet") |
| 119 | + |
| 120 | + # creating backfill without issue date |
| 121 | + os.remove(f"{backfill_dir}/claims_hosp_as_of_{issue_date_str}.parquet") |
| 122 | + today = datetime(2020, 6, 14) |
| 123 | + merge_backfill_file(backfill_dir, today.weekday(), today, |
| 124 | + test_mode=True, check_nd=2) |
| 125 | + |
| 126 | + old_files = glob.glob(backfill_dir + "/claims_hosp_as_of_*") |
| 127 | + for file in old_files: |
| 128 | + os.remove(file) |
| 129 | + |
| 130 | + prep_backfill_data() |
| 131 | + file_to_add = store_backfill_file(DATA_FILEPATH, issue_date, backfill_dir) |
| 132 | + merge_existing_backfill_files(backfill_dir, file_to_add, issue_date, TEST_LOGGER) |
| 133 | + |
| 134 | + expected = pd.read_parquet(f"{backfill_dir}/expected.parquet") |
| 135 | + merged = pd.read_parquet(f"{backfill_dir}/claims_hosp_from_20200611_to_20200614.parquet") |
| 136 | + |
| 137 | + check_diff = expected.merge(merged, how='left', indicator=True) |
| 138 | + assert check_diff[check_diff["_merge"] == "both"].shape[0] == expected.shape[0] |
| 139 | + for file in glob.glob(backfill_dir + "/*.parquet"): |
| 140 | + os.remove(file) |
| 141 | + |
| 142 | + |
| 143 | + def test_merge_existing_backfill_files_no_call(self): |
| 144 | + issue_date = datetime(year=2020, month=6, day=20) |
| 145 | + issue_date_str = issue_date.strftime("%Y%m%d") |
| 146 | + def prep_backfill_data(): |
| 147 | + # Generate backfill daily files |
| 148 | + for d in range(11, 15): |
| 149 | + dropdate = datetime(2020, 6, d) |
| 150 | + store_backfill_file(DATA_FILEPATH, dropdate, backfill_dir) |
| 151 | + |
| 152 | + today = datetime(2020, 6, 14) |
| 153 | + # creating expected file |
| 154 | + merge_backfill_file(backfill_dir, today.weekday(), today, |
| 155 | + test_mode=True, check_nd=8) |
| 156 | + |
| 157 | + prep_backfill_data() |
| 158 | + file_to_add = store_backfill_file(DATA_FILEPATH, issue_date, backfill_dir) |
| 159 | + merge_existing_backfill_files(backfill_dir, file_to_add, issue_date, TEST_LOGGER) |
| 160 | + |
| 161 | + old_files = glob.glob(backfill_dir + "*.parquet") |
| 162 | + for file in old_files: |
| 163 | + os.remove(file) |
| 164 | + |
| 165 | + |
| 166 | + |
0 commit comments