Skip to content

Commit 062a642

Browse files
authored
Merge branch 'main' into main
2 parents 3264e61 + 9bf67da commit 062a642

File tree

171 files changed

+13427
-995
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

171 files changed

+13427
-995
lines changed

Python-packages/covidcast-py/covidcast/__init__.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,4 +15,5 @@
1515
from .covidcast import signal, metadata, aggregate_signals
1616
from .plotting import plot, plot_choropleth, get_geo_df, animate
1717
from .geography import (fips_to_name, cbsa_to_name, abbr_to_name,
18-
name_to_abbr, name_to_cbsa, name_to_fips)
18+
name_to_abbr, name_to_cbsa, name_to_fips,
19+
fips_to_abbr, abbr_to_fips)

Python-packages/covidcast-py/covidcast/covidcast.py

Lines changed: 21 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,8 @@
11
"""This is the client side library for accessing the COVIDcast API."""
22
import warnings
33
from datetime import timedelta, date
4-
from typing import Union, Iterable, Tuple, List
54
from functools import reduce
5+
from typing import Union, Iterable, Tuple, List
66

77
import pandas as pd
88
from delphi_epidata import Epidata
@@ -213,21 +213,21 @@ def metadata() -> pd.DataFrame:
213213
``signal``
214214
Signal name.
215215
216-
``min_time``
217-
First day for which this signal is available.
218-
219-
``max_time``
220-
Most recent day for which this signal is available.
216+
``time_type``
217+
Temporal resolution at which this signal is reported. "day", for
218+
example, means the signal is reported daily.
221219
222220
``geo_type``
223221
Geographic level for which this signal is available, such as county,
224222
state, msa, or hrr. Most signals are available at multiple geographic
225223
levels and will hence be listed in multiple rows with their own
226224
metadata.
227225
228-
``time_type``
229-
Temporal resolution at which this signal is reported. "day", for
230-
example, means the signal is reported daily.
226+
``min_time``
227+
First day for which this signal is available.
228+
229+
``max_time``
230+
Most recent day for which this signal is available.
231231
232232
``num_locations``
233233
Number of distinct geographic locations available for this signal. For
@@ -246,6 +246,17 @@ def metadata() -> pd.DataFrame:
246246
``stdev_value``
247247
The sample standard deviation of all reported values.
248248
249+
``last_update``
250+
The UTC datetime for when the signal value was last updated.
251+
252+
``max_issue``
253+
Most recent date data was issued.
254+
255+
``min_lag``
256+
Smallest lag from observation to issue, in days.
257+
258+
``max_lag``
259+
Largest lag from observation to issue, in days.
249260
"""
250261
meta = Epidata.covidcast_meta()
251262

@@ -257,7 +268,7 @@ def metadata() -> pd.DataFrame:
257268
meta_df = pd.DataFrame.from_dict(meta["epidata"])
258269
meta_df["min_time"] = pd.to_datetime(meta_df["min_time"], format="%Y%m%d")
259270
meta_df["max_time"] = pd.to_datetime(meta_df["max_time"], format="%Y%m%d")
260-
271+
meta_df["last_update"] = pd.to_datetime(meta_df["last_update"], unit="s")
261272
return meta_df
262273

263274

Python-packages/covidcast-py/covidcast/geography.py

Lines changed: 61 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,11 @@
1616
# Filter undesired rows from CSVs.
1717
# They're not removed from the files to keep them identical to rda files.
1818
STATE_CENSUS = STATE_CENSUS.loc[STATE_CENSUS.STATE != "0"]
19+
# pad to 2 characters with leading 0s
20+
STATE_CENSUS["STATE"] = STATE_CENSUS["STATE"].str.zfill(2)
21+
# add 000 to the end to get a 5 digit code
22+
STATE_CENSUS["STATE"] = STATE_CENSUS["STATE"].str.pad(width=5, fillchar="0", side="right")
23+
# filter out micropolitan areas
1924
MSA_CENSUS = MSA_CENSUS.loc[MSA_CENSUS.LSAD == "Metropolitan Statistical Area"]
2025

2126

@@ -128,6 +133,34 @@ def name_to_abbr(name: Union[str, Iterable],
128133
return _lookup(name, STATE_CENSUS.NAME, STATE_CENSUS.ABBR, ignore_case, fixed, ties_method)
129134

130135

136+
def fips_to_abbr(code: Union[str, Iterable],
137+
ignore_case: bool = False,
138+
fixed: bool = False,
139+
ties_method: str = "first") -> list:
140+
"""Look up state abbreviation by FIPS codes with regular expression support.
141+
142+
Given an individual or list of FIPS codes or regular expressions, look up the corresponding
143+
state abbreviation. FIPS codes can be the 2 digit code (``covidcast.fips_to_abbr("12")``) or
144+
the 2 digit code with 000 appended to the end (``covidcast.fips_to_abbr("12000")``.
145+
146+
:param code: Individual or list of FIPS codes or regular expressions.
147+
:param ignore_case: Boolean for whether or not to be case insensitive in the regular expression.
148+
If ``fixed=True``, this argument is ignored. Defaults to ``False``.
149+
:param fixed: Conduct an exact case sensitive match with the input string.
150+
Defaults to ``False``.
151+
:param ties_method: Method for determining how to deal with multiple outputs for a given input.
152+
Must be one of ``"all"`` or ``"first"``. If ``"first"``, then only the first match for each
153+
code is returned. If ``"all"``, then all matches for each code are returned.
154+
Defaults to ``first``.
155+
:return: If ``ties_method="first"``, returns a list of the first value found for each input key.
156+
If ``ties_method="all"``, returns a list of dicts, one for each input, with keys
157+
corresponding to all matched input keys and values corresponding to the list of county names.
158+
The returned list will be the same length as the input, with ``None`` or ``{}`` if no values
159+
are found for ``ties_method="first"`` and ``ties_method="all"``, respectively.
160+
"""
161+
return _lookup(code, STATE_CENSUS.STATE, STATE_CENSUS.ABBR, ignore_case, fixed, ties_method)
162+
163+
131164
def name_to_cbsa(name: Union[str, Iterable],
132165
ignore_case: bool = False,
133166
fixed: bool = False,
@@ -162,6 +195,34 @@ def name_to_cbsa(name: Union[str, Iterable],
162195
return _lookup(name, df.NAME, df.CBSA, ignore_case, fixed, ties_method)
163196

164197

198+
def abbr_to_fips(code: Union[str, Iterable],
199+
ignore_case: bool = False,
200+
fixed: bool = False,
201+
ties_method: str = "first") -> list:
202+
"""Look up state FIPS codes by abbreviation with regular expression support.
203+
204+
Given an individual or list of state abbreviations or regular expressions,
205+
look up the corresponding state FIPS codes. The returned codes are 5 digits: the
206+
2 digit state FIPS with 000 appended to the end.
207+
208+
:param code: Individual or list of abbreviations or regular expressions.
209+
:param ignore_case: Boolean for whether or not to be case insensitive in the regular expression.
210+
If ``fixed=True``, this argument is ignored. Defaults to ``False``.
211+
:param fixed: Conduct an exact case sensitive match with the input string.
212+
Defaults to ``False``.
213+
:param ties_method: Method for determining how to deal with multiple outputs for a given input.
214+
Must be one of ``"all"`` or ``"first"``. If ``"first"``, then only the first match for each
215+
code is returned. If ``"all"``, then all matches for each code are returned.
216+
Defaults to ``first``.
217+
:return: If ``ties_method="first"``, returns a list of the first value found for each input key.
218+
If ``ties_method="all"``, returns a list of dicts, one for each input, with keys
219+
corresponding to all matched input keys and values corresponding to the list of county names.
220+
The returned list will be the same length as the input, with ``None`` or ``{}`` if no values
221+
are found for ``ties_method="first"`` and ``ties_method="all"``, respectively.
222+
"""
223+
return _lookup(code, STATE_CENSUS.ABBR, STATE_CENSUS.STATE, ignore_case, fixed, ties_method)
224+
225+
165226
def name_to_fips(name: Union[str, Iterable],
166227
ignore_case: bool = False,
167228
fixed: bool = False,

Python-packages/covidcast-py/docs/signals.rst

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -61,3 +61,7 @@ States
6161
.. autofunction:: covidcast.abbr_to_name
6262

6363
.. autofunction:: covidcast.name_to_abbr
64+
65+
.. autofunction:: covidcast.abbr_to_fips
66+
67+
.. autofunction:: covidcast.fips_to_abbr

Python-packages/covidcast-py/tests/covidcast/test_covidcast.py

Lines changed: 13 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -77,19 +77,21 @@ def test_signal(mock_covidcast, mock_metadata):
7777
@patch("delphi_epidata.Epidata.covidcast_meta")
7878
def test_metadata(mock_covidcast_meta):
7979
# not generating full DF since most attributes used
80-
mock_covidcast_meta.side_effect = [{"result": 1, # successful API response
81-
"epidata": [{"max_time": 20200622, "min_time": 20200421},
82-
{"max_time": 20200724, "min_time": 20200512}],
83-
"message": "success"},
84-
{"result": 0, # unsuccessful API response
85-
"epidata": [{"max_time": 20200622, "min_time": 20200421},
86-
{"max_time": 20200724, "min_time": 20200512}],
87-
"message": "error: failed"}]
88-
80+
mock_covidcast_meta.side_effect = [
81+
{"result": 1, # successful API response
82+
"epidata": [{"max_time": 20200622, "min_time": 20200421, "last_update": 12345},
83+
{"max_time": 20200724, "min_time": 20200512, "last_update": 99999}],
84+
"message": "success"},
85+
{"result": 0, # unsuccessful API response
86+
"epidata": [{"max_time": 20200622, "min_time": 20200421},
87+
{"max_time": 20200724, "min_time": 20200512}],
88+
"message": "error: failed"}]
8989
# test happy path
9090
response = covidcast.metadata()
91-
expected = pd.DataFrame({"max_time": [datetime(2020, 6, 22), datetime(2020, 7, 24)],
92-
"min_time": [datetime(2020, 4, 21), datetime(2020, 5, 12)]})
91+
expected = pd.DataFrame({
92+
"max_time": [datetime(2020, 6, 22), datetime(2020, 7, 24)],
93+
"min_time": [datetime(2020, 4, 21), datetime(2020, 5, 12)],
94+
"last_update": [datetime(1970, 1, 1, 3, 25, 45), datetime(1970, 1, 2, 3, 46, 39)]})
9395
assert sort_df(response).equals(sort_df(expected))
9496

9597
# test failed response raises RuntimeError

Python-packages/covidcast-py/tests/covidcast/test_geography.py

Lines changed: 48 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -118,6 +118,32 @@ def test_name_to_abbr(test_key, test_kwargs, expected):
118118
assert geography.name_to_abbr(test_key, **test_kwargs) == expected
119119

120120

121+
@pytest.mark.parametrize("test_key, test_kwargs, expected", [
122+
(
123+
"12",
124+
{},
125+
["FL"]
126+
),
127+
(
128+
"7",
129+
{"ties_method": "all"},
130+
[{'17000': ['IL'],
131+
'27000': ['MN'],
132+
'37000': ['NC'],
133+
'47000': ['TN'],
134+
'72000': ['PR']}]
135+
136+
),
137+
(
138+
["ABC"],
139+
{},
140+
[None]
141+
),
142+
])
143+
def test_fips_to_abbr(test_key, test_kwargs, expected):
144+
assert geography.fips_to_abbr(test_key, **test_kwargs) == expected
145+
146+
121147
@pytest.mark.parametrize("test_key, test_kwargs, expected", [
122148
(
123149
"Pittsburgh",
@@ -139,6 +165,27 @@ def test_name_to_cbsa(test_key, test_kwargs, expected):
139165
assert geography.name_to_cbsa(test_key, **test_kwargs) == expected
140166

141167

168+
@pytest.mark.parametrize("test_key, test_kwargs, expected", [
169+
(
170+
"PA",
171+
{},
172+
["42000"]
173+
),
174+
(
175+
"New",
176+
{},
177+
[None]
178+
),
179+
(
180+
["PA", "ca"],
181+
{"ignore_case": True},
182+
["42000", "06000"]
183+
)
184+
])
185+
def test_abbr_to_fips(test_key, test_kwargs, expected):
186+
assert geography.abbr_to_fips(test_key, **test_kwargs) == expected
187+
188+
142189
@pytest.mark.parametrize("test_key, test_kwargs, expected", [
143190
(
144191
"Allegheny",
@@ -317,11 +364,11 @@ def test__lookup(test_args, test_kwargs, expected):
317364
None
318365
)
319366
])
367+
320368
def test__get_first_tie(test_dict_list, expected_return, warn, expected_warning):
321369
if warn:
322370
with pytest.warns(UserWarning) as record:
323371
assert geography._get_first_tie(test_dict_list) == expected_return
324372
assert record[0].message.args[0] == expected_warning
325373
else:
326374
assert geography._get_first_tie(test_dict_list) == expected_return
327-

R-notebooks/dashboard_functions.R

Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,40 @@
1+
make_covidcast_signal <- function(destination, source, geo_type) {
2+
destination$time_value = source$time_value[1]
3+
destination$issue = source$issue[1]
4+
attributes(destination)$geo_type = geo_type
5+
class(destination) = c("covidcast_signal", "data.frame")
6+
return(destination)
7+
}
8+
9+
plot_28_day_frequency_state <- function(df_to_plot) {
10+
states_present = df_to_plot %>%
11+
group_by(geo_value) %>%
12+
summarize(value = n())
13+
14+
covidcast_signal_to_plot = make_covidcast_signal(states_present, df_to_plot, "state")
15+
16+
plot(covidcast_signal_to_plot,
17+
title = sprintf(
18+
"State frequency in last 28 days (start date: %s)",
19+
covidcast_signal_to_plot$time_value[1]
20+
),
21+
range = c(0, 28))
22+
}
23+
24+
plot_28_day_frequency_county <- function(df_to_plot) {
25+
counties_present = df_to_plot %>%
26+
group_by(geo_value) %>%
27+
summarize(value = n()) %>% ungroup() %>%
28+
filter(substr(geo_value, 3, 5) != "000")
29+
30+
covidcast_signal_to_plot = make_covidcast_signal(counties_present, df_to_plot, "county")
31+
32+
plot(
33+
covidcast_signal_to_plot,
34+
title = sprintf(
35+
"County frequency in last 28 days (start date: %s)",
36+
covidcast_signal_to_plot$time_value[1]
37+
),
38+
range = c(0, 28)
39+
)
40+
}
Lines changed: 59 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,59 @@
1+
---
2+
title: "Doctor Visits dashboard"
3+
author: "Delphi Lab"
4+
date: "`r format(Sys.time(), '%B %d, %Y')`"
5+
output:
6+
html_document:
7+
code_folding: hide
8+
---
9+
10+
```{r, include = FALSE}
11+
knitr::opts_chunk$set(message = FALSE, warning = FALSE)
12+
```
13+
14+
### Coverage
15+
16+
This measures how much county coverage we have in the samples (i.e., how many unique counties are present each day), and how it has recently changed over time.
17+
18+
```{r, fig.width = 7, fig.height = 5}
19+
20+
library(covidcast)
21+
library(dplyr)
22+
library(ggplot2)
23+
24+
date_scale <-
25+
scale_x_date(date_breaks = "1 week", date_labels = "%b %d")
26+
27+
twenty_eight_days_ago = Sys.Date() - 28
28+
29+
# Sampling coverage
30+
df_doctor_visits_counties = covidcast_signal("doctor-visits",
31+
"smoothed_cli",
32+
start_day = twenty_eight_days_ago,
33+
geo_type = "county")
34+
counties_per_day = df_doctor_visits_counties %>%
35+
group_by(time_value) %>%
36+
summarize(n = n())
37+
38+
ggplot(counties_per_day, aes(x = time_value, y = n)) +
39+
geom_line() + geom_point() + theme_bw() +
40+
labs(
41+
x = "Date",
42+
y = "Number of Counties",
43+
title = sprintf(
44+
"Unique Counties: %i, mean per day: %i",
45+
length(unique(df_doctor_visits_counties$geo_value)),
46+
round(mean(counties_per_day$n))
47+
)
48+
) + date_scale
49+
```
50+
51+
## County Coverage Map
52+
53+
This visualizes the county coverage -- how frequently does each county show up in the data over the last 28 days?
54+
55+
```{r, fig.width = 10, fig.height = 8}
56+
57+
source("dashboard_functions.R")
58+
plot_28_day_frequency_county(df_doctor_visits_counties)
59+
```

0 commit comments

Comments
 (0)