|
19 | 19 | FILENAME_REGEX = re.compile( |
20 | 20 | r'^(?P<date>\d{8})_(?P<geo_type>\w+?)_(?P<signal>\w+)\.csv$') |
21 | 21 |
|
22 | | -def _parse_datetimes(date_int: int, time_type: str, date_format: str = "%Y%m%d") -> Union[pd.Timestamp, None]: |
23 | | - """Convert a date or epiweeks string into timestamp objects. |
24 | | -
|
25 | | - Datetimes (length 8) are converted to their corresponding date, while epiweeks (length 6) |
26 | | - are converted to the date of the start of the week. Returns nan otherwise |
27 | | -
|
28 | | - Epiweeks use the CDC format. |
29 | | -
|
30 | | - date_int: Int representation of date. |
31 | | - time_type: The temporal resolution to request this data. Most signals |
32 | | - are available at the "day" resolution (the default); some are only |
33 | | - available at the "week" resolution, representing an MMWR week ("epiweek"). |
34 | | - date_format: String of the date format to parse. |
35 | | - :returns: Timestamp. |
36 | | - """ |
37 | | - date_str = str(date_int) |
38 | | - if time_type == "day": |
39 | | - return pd.to_datetime(date_str, format=date_format) |
40 | | - if time_type == "week": |
41 | | - epiwk = Week(int(date_str[:4]), int(date_str[-2:])) |
42 | | - return pd.to_datetime(epiwk.startdate()) |
43 | | - return None |
44 | | - |
45 | 22 | def make_date_filter(start_date, end_date): |
46 | 23 | """ |
47 | 24 | Create a function to filter dates in the specified date range (inclusive). |
@@ -150,8 +127,9 @@ def get_geo_signal_combos(data_source, api_key): |
150 | 127 | raise RuntimeError("Error when fetching metadata from the API", response["message"]) |
151 | 128 |
|
152 | 129 | meta = pd.DataFrame.from_dict(response["epidata"]) |
153 | | - meta["min_time"] = meta.apply(lambda x: _parse_datetimes(x.min_time, x.time_type), axis=1) |
154 | | - meta["max_time"] = meta.apply(lambda x: _parse_datetimes(x.max_time, x.time_type), axis=1) |
| 130 | + # note: this will fail for signals with weekly data, but currently not supported for validation |
| 131 | + meta["min_time"] = meta.apply(lambda x: pd.to_datetime(str(x.min_time), format="%Y%m%d"), axis=1) |
| 132 | + meta["max_time"] = meta.apply(lambda x: pd.to_datetime(str(x.max_time), format="%Y%m%d"), axis=1) |
155 | 133 | meta["last_update"] = pd.to_datetime(meta["last_update"], unit="s") |
156 | 134 |
|
157 | 135 | source_meta = meta[meta['data_source'] == data_source] |
|
0 commit comments