cmu-delphi
diff --git a/‎Python-packages/covidcast-py/covidcast/__init__.py‎
Lines changed: 2 additions & 1 deletion b/‎Python-packages/covidcast-py/covidcast/__init__.py‎
Lines changed: 2 additions & 1 deletion
diff --git a/‎Python-packages/covidcast-py/covidcast/covidcast.py‎
Lines changed: 21 additions & 10 deletions b/‎Python-packages/covidcast-py/covidcast/covidcast.py‎
Lines changed: 21 additions & 10 deletions
diff --git a/‎Python-packages/covidcast-py/covidcast/geography.py‎
Lines changed: 61 additions & 0 deletions b/‎Python-packages/covidcast-py/covidcast/geography.py‎
Lines changed: 61 additions & 0 deletions
diff --git a/‎Python-packages/covidcast-py/docs/signals.rst‎
Lines changed: 4 additions & 0 deletions b/‎Python-packages/covidcast-py/docs/signals.rst‎
Lines changed: 4 additions & 0 deletions
diff --git a/‎Python-packages/covidcast-py/tests/covidcast/test_covidcast.py‎
Lines changed: 13 additions & 11 deletions b/‎Python-packages/covidcast-py/tests/covidcast/test_covidcast.py‎
Lines changed: 13 additions & 11 deletions
diff --git a/‎Python-packages/covidcast-py/tests/covidcast/test_geography.py‎
Lines changed: 48 additions & 1 deletion b/‎Python-packages/covidcast-py/tests/covidcast/test_geography.py‎
Lines changed: 48 additions & 1 deletion
diff --git a/‎R-notebooks/dashboard_functions.R‎
Lines changed: 40 additions & 0 deletions b/‎R-notebooks/dashboard_functions.R‎
Lines changed: 40 additions & 0 deletions
diff --git a/‎R-notebooks/doctor_visits_dashboard.Rmd‎
Lines changed: 59 additions & 0 deletions b/‎R-notebooks/doctor_visits_dashboard.Rmd‎
Lines changed: 59 additions & 0 deletions
@@ -15,4 +15,5 @@
 from .covidcast import signal, metadata, aggregate_signals
 from .plotting import plot, plot_choropleth, get_geo_df, animate
 from .geography import (fips_to_name, cbsa_to_name, abbr_to_name,
-                        name_to_abbr, name_to_cbsa, name_to_fips)
+                        name_to_abbr, name_to_cbsa, name_to_fips,
+                        fips_to_abbr, abbr_to_fips)
@@ -1,8 +1,8 @@
 """This is the client side library for accessing the COVIDcast API."""
 import warnings
 from datetime import timedelta, date
-from typing import Union, Iterable, Tuple, List
 from functools import reduce
+from typing import Union, Iterable, Tuple, List
 
 import pandas as pd
 from delphi_epidata import Epidata
@@ -213,21 +213,21 @@ def metadata() -> pd.DataFrame:
       ``signal``
         Signal name.
 
-      ``min_time``
-        First day for which this signal is available.
-
-      ``max_time``
-        Most recent day for which this signal is available.
+      ``time_type``
+        Temporal resolution at which this signal is reported. "day", for
+        example, means the signal is reported daily.
 
       ``geo_type``
         Geographic level for which this signal is available, such as county,
         state, msa, or hrr. Most signals are available at multiple geographic
         levels and will hence be listed in multiple rows with their own
         metadata.
 
-      ``time_type``
-        Temporal resolution at which this signal is reported. "day", for
-        example, means the signal is reported daily.
+      ``min_time``
+        First day for which this signal is available.
+
+      ``max_time``
+        Most recent day for which this signal is available.
 
       ``num_locations``
         Number of distinct geographic locations available for this signal. For
@@ -246,6 +246,17 @@ def metadata() -> pd.DataFrame:
       ``stdev_value``
         The sample standard deviation of all reported values.
 
+      ``last_update``
+        The UTC datetime for when the signal value was last updated.
+
+      ``max_issue``
+        Most recent date data was issued.
+
+      ``min_lag``
+        Smallest lag from observation to issue, in days.
+
+      ``max_lag``
+        Largest lag from observation to issue, in days.
     """
     meta = Epidata.covidcast_meta()
 
@@ -257,7 +268,7 @@ def metadata() -> pd.DataFrame:
     meta_df = pd.DataFrame.from_dict(meta["epidata"])
     meta_df["min_time"] = pd.to_datetime(meta_df["min_time"], format="%Y%m%d")
     meta_df["max_time"] = pd.to_datetime(meta_df["max_time"], format="%Y%m%d")
-
+    meta_df["last_update"] = pd.to_datetime(meta_df["last_update"], unit="s")
     return meta_df
 
 
 
@@ -16,6 +16,11 @@
 # Filter undesired rows from CSVs.
 # They're not removed from the files to keep them identical to rda files.
 STATE_CENSUS = STATE_CENSUS.loc[STATE_CENSUS.STATE != "0"]
+# pad to 2 characters with leading 0s
+STATE_CENSUS["STATE"] = STATE_CENSUS["STATE"].str.zfill(2)
+# add 000 to the end to get a 5 digit code
+STATE_CENSUS["STATE"] = STATE_CENSUS["STATE"].str.pad(width=5, fillchar="0", side="right")
+# filter out micropolitan areas
 MSA_CENSUS = MSA_CENSUS.loc[MSA_CENSUS.LSAD == "Metropolitan Statistical Area"]
 
 
@@ -128,6 +133,34 @@ def name_to_abbr(name: Union[str, Iterable],
     return _lookup(name, STATE_CENSUS.NAME, STATE_CENSUS.ABBR, ignore_case, fixed, ties_method)
 
 
+def fips_to_abbr(code: Union[str, Iterable],
+                 ignore_case: bool = False,
+                 fixed: bool = False,
+                 ties_method: str = "first") -> list:
+    """Look up state abbreviation by FIPS codes with regular expression support.
+
+    Given an individual or list of FIPS codes or regular expressions, look up the corresponding
+    state abbreviation. FIPS codes can be the 2 digit code (``covidcast.fips_to_abbr("12")``) or
+    the 2 digit code with 000 appended to the end (``covidcast.fips_to_abbr("12000")``.
+
+    :param code: Individual or list of FIPS codes or regular expressions.
+    :param ignore_case: Boolean for whether or not to be case insensitive in the regular expression.
+      If ``fixed=True``, this argument is ignored. Defaults to ``False``.
+    :param fixed: Conduct an exact case sensitive match with the input string.
+      Defaults to ``False``.
+    :param ties_method: Method for determining how to deal with multiple outputs for a given input.
+      Must be one of ``"all"`` or ``"first"``. If ``"first"``, then only the first match for each
+      code is returned. If ``"all"``, then all matches for each code are returned.
+      Defaults to ``first``.
+    :return: If ``ties_method="first"``, returns a list of the first value found for each input key.
+      If ``ties_method="all"``, returns a list of dicts, one for each input, with keys
+      corresponding to all matched input keys and values corresponding to the list of county names.
+      The returned list will be the same length as the input, with ``None`` or ``{}`` if no values
+      are found for ``ties_method="first"`` and ``ties_method="all"``, respectively.
+    """
+    return _lookup(code, STATE_CENSUS.STATE, STATE_CENSUS.ABBR, ignore_case, fixed, ties_method)
+
+
 def name_to_cbsa(name: Union[str, Iterable],
                  ignore_case: bool = False,
                  fixed: bool = False,
@@ -162,6 +195,34 @@ def name_to_cbsa(name: Union[str, Iterable],
     return _lookup(name, df.NAME, df.CBSA, ignore_case, fixed, ties_method)
 
 
+def abbr_to_fips(code: Union[str, Iterable],
+                 ignore_case: bool = False,
+                 fixed: bool = False,
+                 ties_method: str = "first") -> list:
+    """Look up state FIPS codes by abbreviation with regular expression support.
+
+    Given an individual or list of state abbreviations or regular expressions,
+    look up the corresponding state FIPS codes. The returned codes are 5 digits: the
+    2 digit state FIPS with 000 appended to the end.
+
+    :param code: Individual or list of abbreviations or regular expressions.
+    :param ignore_case: Boolean for whether or not to be case insensitive in the regular expression.
+      If ``fixed=True``, this argument is ignored. Defaults to ``False``.
+    :param fixed: Conduct an exact case sensitive match with the input string.
+      Defaults to ``False``.
+    :param ties_method: Method for determining how to deal with multiple outputs for a given input.
+      Must be one of ``"all"`` or ``"first"``. If ``"first"``, then only the first match for each
+      code is returned. If ``"all"``, then all matches for each code are returned.
+      Defaults to ``first``.
+    :return: If ``ties_method="first"``, returns a list of the first value found for each input key.
+      If ``ties_method="all"``, returns a list of dicts, one for each input, with keys
+      corresponding to all matched input keys and values corresponding to the list of county names.
+      The returned list will be the same length as the input, with ``None`` or ``{}`` if no values
+      are found for ``ties_method="first"`` and ``ties_method="all"``, respectively.
+    """
+    return _lookup(code, STATE_CENSUS.ABBR, STATE_CENSUS.STATE, ignore_case, fixed, ties_method)
+
+
 def name_to_fips(name: Union[str, Iterable],
                  ignore_case: bool = False,
                  fixed: bool = False,
 
@@ -61,3 +61,7 @@ States
 .. autofunction:: covidcast.abbr_to_name
 
 .. autofunction:: covidcast.name_to_abbr
+
+.. autofunction:: covidcast.abbr_to_fips
+
+.. autofunction:: covidcast.fips_to_abbr
@@ -77,19 +77,21 @@ def test_signal(mock_covidcast, mock_metadata):
 @patch("delphi_epidata.Epidata.covidcast_meta")
 def test_metadata(mock_covidcast_meta):
     # not generating full DF since most attributes used
-    mock_covidcast_meta.side_effect = [{"result": 1,  # successful API response
-                                        "epidata": [{"max_time": 20200622, "min_time": 20200421},
-                                                    {"max_time": 20200724, "min_time": 20200512}],
-                                        "message": "success"},
-                                       {"result": 0,  # unsuccessful API response
-                                        "epidata": [{"max_time": 20200622, "min_time": 20200421},
-                                                    {"max_time": 20200724, "min_time": 20200512}],
-                                        "message": "error: failed"}]
-
+    mock_covidcast_meta.side_effect = [
+        {"result": 1,  # successful API response
+         "epidata": [{"max_time": 20200622, "min_time": 20200421, "last_update": 12345},
+                     {"max_time": 20200724, "min_time": 20200512, "last_update": 99999}],
+         "message": "success"},
+        {"result": 0,  # unsuccessful API response
+         "epidata": [{"max_time": 20200622, "min_time": 20200421},
+                     {"max_time": 20200724, "min_time": 20200512}],
+         "message": "error: failed"}]
     # test happy path
     response = covidcast.metadata()
-    expected = pd.DataFrame({"max_time": [datetime(2020, 6, 22), datetime(2020, 7, 24)],
-                             "min_time": [datetime(2020, 4, 21), datetime(2020, 5, 12)]})
+    expected = pd.DataFrame({
+        "max_time": [datetime(2020, 6, 22), datetime(2020, 7, 24)],
+        "min_time": [datetime(2020, 4, 21), datetime(2020, 5, 12)],
+        "last_update": [datetime(1970, 1, 1, 3, 25, 45), datetime(1970, 1, 2, 3, 46, 39)]})
     assert sort_df(response).equals(sort_df(expected))
 
     # test failed response raises RuntimeError
 
@@ -118,6 +118,32 @@ def test_name_to_abbr(test_key, test_kwargs, expected):
     assert geography.name_to_abbr(test_key, **test_kwargs) == expected
 
 
+@pytest.mark.parametrize("test_key, test_kwargs, expected", [
+    (
+            "12",
+            {},
+            ["FL"]
+    ),
+    (
+            "7",
+            {"ties_method": "all"},
+            [{'17000': ['IL'],
+              '27000': ['MN'],
+              '37000': ['NC'],
+              '47000': ['TN'],
+              '72000': ['PR']}]
+
+    ),
+    (
+            ["ABC"],
+            {},
+            [None]
+    ),
+])
+def test_fips_to_abbr(test_key, test_kwargs, expected):
+    assert geography.fips_to_abbr(test_key, **test_kwargs) == expected
+
+
 @pytest.mark.parametrize("test_key, test_kwargs, expected", [
     (
             "Pittsburgh",
@@ -139,6 +165,27 @@ def test_name_to_cbsa(test_key, test_kwargs, expected):
     assert geography.name_to_cbsa(test_key, **test_kwargs) == expected
 
 
+@pytest.mark.parametrize("test_key, test_kwargs, expected", [
+    (
+            "PA",
+            {},
+            ["42000"]
+    ),
+    (
+            "New",
+            {},
+            [None]
+    ),
+    (
+            ["PA", "ca"],
+            {"ignore_case": True},
+            ["42000", "06000"]
+    )
+])
+def test_abbr_to_fips(test_key, test_kwargs, expected):
+    assert geography.abbr_to_fips(test_key, **test_kwargs) == expected
+
+
 @pytest.mark.parametrize("test_key, test_kwargs, expected", [
     (
             "Allegheny",
@@ -317,11 +364,11 @@ def test__lookup(test_args, test_kwargs, expected):
             None
     )
 ])
+
 def test__get_first_tie(test_dict_list, expected_return, warn, expected_warning):
     if warn:
         with pytest.warns(UserWarning) as record:
             assert geography._get_first_tie(test_dict_list) == expected_return
             assert record[0].message.args[0] == expected_warning
     else:
         assert geography._get_first_tie(test_dict_list) == expected_return
-
 
@@ -0,0 +1,40 @@
+make_covidcast_signal <- function(destination, source, geo_type) {
+  destination$time_value = source$time_value[1]
+  destination$issue = source$issue[1]
+  attributes(destination)$geo_type = geo_type
+  class(destination) = c("covidcast_signal", "data.frame")
+  return(destination)
+}
+
+plot_28_day_frequency_state  <- function(df_to_plot) {
+  states_present = df_to_plot %>%
+    group_by(geo_value) %>%
+    summarize(value = n())
+  
+  covidcast_signal_to_plot = make_covidcast_signal(states_present, df_to_plot, "state")
+  
+  plot(covidcast_signal_to_plot,
+       title = sprintf(
+         "State frequency in last 28 days (start date: %s)",
+         covidcast_signal_to_plot$time_value[1]
+       ),
+       range = c(0, 28))
+}
+
+plot_28_day_frequency_county  <- function(df_to_plot) {
+  counties_present = df_to_plot %>%
+    group_by(geo_value) %>%
+    summarize(value = n()) %>% ungroup() %>%
+    filter(substr(geo_value, 3, 5) != "000")
+  
+  covidcast_signal_to_plot = make_covidcast_signal(counties_present, df_to_plot, "county")
+  
+  plot(
+    covidcast_signal_to_plot,
+    title = sprintf(
+      "County frequency in last 28 days (start date: %s)",
+      covidcast_signal_to_plot$time_value[1]
+    ),
+    range = c(0, 28)
+  )
+}
@@ -0,0 +1,59 @@
+---
+title: "Doctor Visits dashboard"
+author: "Delphi Lab"
+date: "`r format(Sys.time(), '%B %d, %Y')`"
+output:
+  html_document:
+    code_folding: hide
+---
+
+```{r, include = FALSE}
+knitr::opts_chunk$set(message = FALSE, warning = FALSE)
+```
+
+### Coverage
+
+This measures how much county coverage we have in the samples (i.e., how many unique counties are present each day), and how it has recently changed over time.
+
+```{r, fig.width = 7, fig.height = 5}
+
+library(covidcast)
+library(dplyr)
+library(ggplot2)
+
+date_scale <-
+  scale_x_date(date_breaks = "1 week", date_labels = "%b %d")
+
+twenty_eight_days_ago = Sys.Date() - 28
+
+# Sampling coverage
+df_doctor_visits_counties = covidcast_signal("doctor-visits",
+                                             "smoothed_cli",
+                                             start_day = twenty_eight_days_ago,
+                                             geo_type = "county")
+counties_per_day = df_doctor_visits_counties %>%
+  group_by(time_value) %>%
+  summarize(n = n())
+
+ggplot(counties_per_day, aes(x = time_value, y = n)) +
+  geom_line() + geom_point() + theme_bw() +
+  labs(
+    x = "Date",
+    y = "Number of Counties",
+    title = sprintf(
+      "Unique Counties: %i, mean per day: %i",
+      length(unique(df_doctor_visits_counties$geo_value)),
+      round(mean(counties_per_day$n))
+    )
+  ) + date_scale
+```
+
+## County Coverage Map
+
+This visualizes the county coverage -- how frequently does each county show up in the data over the last 28 days?
+
+```{r, fig.width = 10, fig.height = 8}
+
+source("dashboard_functions.R")
+plot_28_day_frequency_county(df_doctor_visits_counties)
+```