From 7c5abc611e8b730b6623442fb4273ab08485d98f Mon Sep 17 00:00:00 2001 From: Alex Reinhart Date: Wed, 11 Nov 2020 11:42:53 -0500 Subject: [PATCH 1/8] Convert API calls to request CSV format for data, instead of JSON The CSV format is much more compact (does not repeat field names for every row), and more naturally fits with R anyway. Alter the relevant tests to serve CSVs. I've verified all vignettes build with these changes. --- R-packages/covidcast/R/covidcast.R | 79 +++++++++++-------- .../epidata/api.php-1d9b5c.csv | 2 + .../epidata/api.php-3e1dc3.csv | 3 + .../epidata/api.php-64a69c.json | 26 ------ .../epidata/api.php-6a5814.json | 26 ------ .../epidata/api.php-96f6a5.json | 4 - .../epidata/api.php-b6e478.csv | 0 .../epidata/api.php-cb89ad.json | 17 ---- .../epidata/api.php-d707dc.csv | 0 .../epidata/api.php-da6974.json | 4 - .../epidata/api.php-f666a2.csv | 3 + .../covidcast/tests/testthat/test-covidcast.R | 14 ++-- 12 files changed, 62 insertions(+), 116 deletions(-) create mode 100644 R-packages/covidcast/tests/testthat/api.covidcast.cmu.edu/epidata/api.php-1d9b5c.csv create mode 100644 R-packages/covidcast/tests/testthat/api.covidcast.cmu.edu/epidata/api.php-3e1dc3.csv delete mode 100644 R-packages/covidcast/tests/testthat/api.covidcast.cmu.edu/epidata/api.php-64a69c.json delete mode 100644 R-packages/covidcast/tests/testthat/api.covidcast.cmu.edu/epidata/api.php-6a5814.json delete mode 100644 R-packages/covidcast/tests/testthat/api.covidcast.cmu.edu/epidata/api.php-96f6a5.json create mode 100644 R-packages/covidcast/tests/testthat/api.covidcast.cmu.edu/epidata/api.php-b6e478.csv delete mode 100644 R-packages/covidcast/tests/testthat/api.covidcast.cmu.edu/epidata/api.php-cb89ad.json create mode 100644 R-packages/covidcast/tests/testthat/api.covidcast.cmu.edu/epidata/api.php-d707dc.csv delete mode 100644 R-packages/covidcast/tests/testthat/api.covidcast.cmu.edu/epidata/api.php-da6974.json create mode 100644 R-packages/covidcast/tests/testthat/api.covidcast.cmu.edu/epidata/api.php-f666a2.csv diff --git a/R-packages/covidcast/R/covidcast.R b/R-packages/covidcast/R/covidcast.R index 79817c29..246c51f3 100644 --- a/R-packages/covidcast/R/covidcast.R +++ b/R-packages/covidcast/R/covidcast.R @@ -429,7 +429,9 @@ covidcast_signals <- function(data_source, signal, #' #' @export covidcast_meta <- function() { - meta <- .request(list(source='covidcast_meta', cached="true")) + meta <- jsonlite::fromJSON(.request( + list(source = "covidcast_meta", + cached = "true"))) if (meta$message != "success") { abort(paste0("Failed to obtain metadata: ", meta$message, "."), @@ -560,14 +562,14 @@ covidcast_days <- function(data_source, signal, start_day, end_day, geo_type, # The API limits the number of rows that can be returned at once, so we query # in batches. - for (i in seq(1, num_batches)) { + for (i in seq_len(num_batches)) { start_offset <- (i - 1) * max_days_at_time end_offset <- min(i * max_days_at_time, ndays) - 1 query_start_day <- start_day + start_offset query_end_day <- start_day + end_offset time_values <- date_to_string(days[(start_offset + 1):(end_offset + 1)]) - dat[[i]] <- covidcast(data_source = data_source, + response <- covidcast(data_source = data_source, signal = signal, time_type = "day", geo_type = geo_type, @@ -576,22 +578,37 @@ covidcast_days <- function(data_source, signal, start_day, end_day, geo_type, as_of = as_of, issues = issues, lag = lag) + + if (is.null(response)) { + warn(paste0("Fetching ", signal, " from ", data_source, " for ", + query_start_day, " to ", query_end_day, + " in geography '", geo_value, "': no results"), + data_source = data_source, + signal = signal, + start_day = query_start_day, + end_day = query_end_day, + geo_value = geo_value, + class = "covidcast_fetch_failed") + + next + } + + dat[[i]] <- response + summary <- sprintf( - "Fetched day %s to %s: %s, %s, num_entries = %s", + "Fetched day %s to %s: num_entries = %s", query_start_day, query_end_day, - dat[[i]]$result, - dat[[i]]$message, - nrow(dat[[i]]$epidata) - ) + nrow(response)) + if (length(summary) != 0) { message(summary) } - if (dat[[i]]$message == "success") { + + if (nrow(response) > 0) { desired_geos <- tolower(unique(geo_value)) - returned_epidata <- dat[[i]]$epidata - returned_geo_array <- returned_epidata %>% + returned_geo_array <- response %>% dplyr::select(geo_value, time_value) %>% dplyr::group_by(time_value) %>% dplyr::summarize(geo_value = list(geo_value)) @@ -607,10 +624,10 @@ covidcast_days <- function(data_source, signal, start_day, end_day, geo_type, signal = signal, day = missing_dates, geo_value = geo_value, - api_msg = dat[[i]]$message, - class = "covidcast_missing_geo_values" + class = "covidcast_missing_time_values" ) } + if (!identical("*", geo_value)) { missing_geo_array <- returned_geo_array[ lapply(returned_geo_array$geo_value, length) < length(desired_geos), ] @@ -626,26 +643,13 @@ covidcast_days <- function(data_source, signal, start_day, end_day, geo_type, signal = signal, day = api_to_date(missing_geo_array$time_value), geo_value = geo_value, - api_msg = dat[[i]]$message, class = "covidcast_missing_geo_values") } } - } else { - warn(paste0("Fetching ", signal, " from ", data_source, " for ", - query_start_day, " to ", query_end_day, " in geography '", - geo_value, "': ", dat[[i]]$message), - data_source = data_source, - signal = signal, - start_day = query_start_day, - end_day = query_end_day, - geo_value = geo_value, - api_msg = dat[[i]]$message, - class = "covidcast_fetch_failed") } } df <- dat %>% - purrr::map("epidata") %>% # just want $epidata part purrr::map(purrr::compact) %>% # remove the list elements that are NULL dplyr::bind_rows() # make this into a data frame @@ -681,7 +685,7 @@ geo_warning_message <- function(row, desired_geos) { covidcast <- function(data_source, signal, time_type, geo_type, time_values, geo_value, as_of, issues, lag) { # Check parameters - if(missing(data_source) || missing(signal) || missing(time_type) || + if (missing(data_source) || missing(signal) || missing(time_type) || missing(geo_type) || missing(time_values) || missing(geo_value)) { stop("`data_source`, `signal`, `time_type`, `geo_type`, `time_values`, ", "and `geo_value` are all required.") @@ -689,14 +693,16 @@ covidcast <- function(data_source, signal, time_type, geo_type, time_values, # Set up request params <- list( - source = 'covidcast', + source = "covidcast", data_source = data_source, signal = signal, time_type = time_type, geo_type = geo_type, time_values = .list(time_values), - geo_value = geo_value + geo_value = geo_value, + format = "csv" ) + if (length(params$geo_value) > 1) { params$geo_values <- paste0(params$geo_value, collapse = ",") #convert to string params$geo_value <- NULL @@ -722,7 +728,16 @@ covidcast <- function(data_source, signal, time_type, geo_type, time_values, } # Make the API call - return(.request(params)) + res <- .request(params) + if (nchar(res) == 0) { + # empty if no results + return(NULL) + } + + # geo_value must be read as character so FIPS codes are returned as character, + # not numbers (with leading 0s potentially removed) + return(read.csv(textConnection(res), stringsAsFactors = FALSE, + colClasses = c("geo_value" = "character"))) } # Helper function to cast values and/or ranges to strings @@ -751,8 +766,8 @@ covidcast <- function(data_source, signal, time_type, geo_type, time_values, httr::stop_for_status(response, task = "fetch data from API") - return(jsonlite::fromJSON(httr::content(response, as = "text", - encoding = "utf-8"))) + return(httr::content(response, as = "text", + encoding = "utf-8")) } # This is the date format expected by the API diff --git a/R-packages/covidcast/tests/testthat/api.covidcast.cmu.edu/epidata/api.php-1d9b5c.csv b/R-packages/covidcast/tests/testthat/api.covidcast.cmu.edu/epidata/api.php-1d9b5c.csv new file mode 100644 index 00000000..be57bbb1 --- /dev/null +++ b/R-packages/covidcast/tests/testthat/api.covidcast.cmu.edu/epidata/api.php-1d9b5c.csv @@ -0,0 +1,2 @@ +geo_value,signal,time_value,issue,lag,value,stderr,sample_size +01000,bar-not-found,20200101,20200102,1,1.0,0.1,2.0 diff --git a/R-packages/covidcast/tests/testthat/api.covidcast.cmu.edu/epidata/api.php-3e1dc3.csv b/R-packages/covidcast/tests/testthat/api.covidcast.cmu.edu/epidata/api.php-3e1dc3.csv new file mode 100644 index 00000000..ee6523be --- /dev/null +++ b/R-packages/covidcast/tests/testthat/api.covidcast.cmu.edu/epidata/api.php-3e1dc3.csv @@ -0,0 +1,3 @@ +signal,geo_value,value,time_value,issue,lag,sample_size,stderr +bar,pa,1,20200101,20200101,0,1,1 +bar,tx,1,20200101,20200101,0,1,1 diff --git a/R-packages/covidcast/tests/testthat/api.covidcast.cmu.edu/epidata/api.php-64a69c.json b/R-packages/covidcast/tests/testthat/api.covidcast.cmu.edu/epidata/api.php-64a69c.json deleted file mode 100644 index ea92a725..00000000 --- a/R-packages/covidcast/tests/testthat/api.covidcast.cmu.edu/epidata/api.php-64a69c.json +++ /dev/null @@ -1,26 +0,0 @@ -{ - "result": 1, - "message": "success", - "epidata": [ - { - "signal": "bar", - "geo_value": "pa", - "value": 1, - "time_value": "20200101", - "issue": "20200101", - "lag": 0, - "sample_size": 1, - "stderr": 1 - }, - { - "signal": "bar", - "geo_value": "tx", - "value": 1, - "time_value": "20200101", - "issue": "20200101", - "lag": 0, - "sample_size": 1, - "stderr": 1 - } - ] -} diff --git a/R-packages/covidcast/tests/testthat/api.covidcast.cmu.edu/epidata/api.php-6a5814.json b/R-packages/covidcast/tests/testthat/api.covidcast.cmu.edu/epidata/api.php-6a5814.json deleted file mode 100644 index ea92a725..00000000 --- a/R-packages/covidcast/tests/testthat/api.covidcast.cmu.edu/epidata/api.php-6a5814.json +++ /dev/null @@ -1,26 +0,0 @@ -{ - "result": 1, - "message": "success", - "epidata": [ - { - "signal": "bar", - "geo_value": "pa", - "value": 1, - "time_value": "20200101", - "issue": "20200101", - "lag": 0, - "sample_size": 1, - "stderr": 1 - }, - { - "signal": "bar", - "geo_value": "tx", - "value": 1, - "time_value": "20200101", - "issue": "20200101", - "lag": 0, - "sample_size": 1, - "stderr": 1 - } - ] -} diff --git a/R-packages/covidcast/tests/testthat/api.covidcast.cmu.edu/epidata/api.php-96f6a5.json b/R-packages/covidcast/tests/testthat/api.covidcast.cmu.edu/epidata/api.php-96f6a5.json deleted file mode 100644 index c6adc102..00000000 --- a/R-packages/covidcast/tests/testthat/api.covidcast.cmu.edu/epidata/api.php-96f6a5.json +++ /dev/null @@ -1,4 +0,0 @@ -{ - "result": -2, - "message": "no results" -} diff --git a/R-packages/covidcast/tests/testthat/api.covidcast.cmu.edu/epidata/api.php-b6e478.csv b/R-packages/covidcast/tests/testthat/api.covidcast.cmu.edu/epidata/api.php-b6e478.csv new file mode 100644 index 00000000..e69de29b diff --git a/R-packages/covidcast/tests/testthat/api.covidcast.cmu.edu/epidata/api.php-cb89ad.json b/R-packages/covidcast/tests/testthat/api.covidcast.cmu.edu/epidata/api.php-cb89ad.json deleted file mode 100644 index f0bf49d9..00000000 --- a/R-packages/covidcast/tests/testthat/api.covidcast.cmu.edu/epidata/api.php-cb89ad.json +++ /dev/null @@ -1,17 +0,0 @@ -{ - "message": "success", - "result": 1, - "epidata": [ - { - "geo_value": "01000", - "signal": "bar-not-found", - "time_value": 20200101, - "direction": null, - "issue": 20200102, - "lag": 1, - "value": 1.0, - "stderr": 0.1, - "sample_size": 2.0 - } - ] -} diff --git a/R-packages/covidcast/tests/testthat/api.covidcast.cmu.edu/epidata/api.php-d707dc.csv b/R-packages/covidcast/tests/testthat/api.covidcast.cmu.edu/epidata/api.php-d707dc.csv new file mode 100644 index 00000000..e69de29b diff --git a/R-packages/covidcast/tests/testthat/api.covidcast.cmu.edu/epidata/api.php-da6974.json b/R-packages/covidcast/tests/testthat/api.covidcast.cmu.edu/epidata/api.php-da6974.json deleted file mode 100644 index c6adc102..00000000 --- a/R-packages/covidcast/tests/testthat/api.covidcast.cmu.edu/epidata/api.php-da6974.json +++ /dev/null @@ -1,4 +0,0 @@ -{ - "result": -2, - "message": "no results" -} diff --git a/R-packages/covidcast/tests/testthat/api.covidcast.cmu.edu/epidata/api.php-f666a2.csv b/R-packages/covidcast/tests/testthat/api.covidcast.cmu.edu/epidata/api.php-f666a2.csv new file mode 100644 index 00000000..ee6523be --- /dev/null +++ b/R-packages/covidcast/tests/testthat/api.covidcast.cmu.edu/epidata/api.php-f666a2.csv @@ -0,0 +1,3 @@ +signal,geo_value,value,time_value,issue,lag,sample_size,stderr +bar,pa,1,20200101,20200101,0,1,1 +bar,tx,1,20200101,20200101,0,1,1 diff --git a/R-packages/covidcast/tests/testthat/test-covidcast.R b/R-packages/covidcast/tests/testthat/test-covidcast.R index 9f1961a3..857ff087 100644 --- a/R-packages/covidcast/tests/testthat/test-covidcast.R +++ b/R-packages/covidcast/tests/testthat/test-covidcast.R @@ -60,7 +60,7 @@ with_mock_api({ test_that("covidcast_meta raises error when API signals one", { stub(covidcast_meta, ".request", - list(message = "argle-bargle")) + "{\"message\": \"argle-bargle\"}") expect_error(covidcast_meta(), class = "covidcast_meta_fetch_failed") @@ -69,13 +69,13 @@ test_that("covidcast_meta raises error when API signals one", { with_mock_api({ ## covidcast_signal() tests test_that("covidcast_signal warns when requested geo_values are unavailable", { - # api.php-6a5814.json + # api.php-3e1dc3.csv expect_warning(covidcast_signal("foo", "bar", "2020-01-01", "2020-01-01", geo_values = c("pa", "tx", "DUCKS")), class = "covidcast_missing_geo_values") # ...but not when they *are* available. - # api.php-64a69c.json + # api.php-f666a2.csv expect_silent(suppressMessages( covidcast_signal("foo", "bar", "2020-01-01", "2020-01-01", geo_values = c("pa", "tx")))) @@ -83,12 +83,12 @@ with_mock_api({ test_that("covidcast_signal warns when requested dates are unavailable", { # with geo_values = "*". - # api.php-96f6a5.json + # api.php-b6e478.csv expect_warning(covidcast_signal("foo", "bar", "2020-01-02", "2020-01-02"), class = "covidcast_fetch_failed") # and with geo_values = "pa" - # api.php-da6974.json + # api.php-d707dc.csv expect_warning(covidcast_signal("foo", "bar", "2020-01-02", "2020-01-02", geo_values = "pa"), class = "covidcast_fetch_failed") @@ -101,7 +101,7 @@ with_mock_api({ test_that("covidcast_signal works for signals with no meta", { # when no meta is available, we must provide start_day and end_day. - # api.php-cb89ad.json + # api.php-1d9b5c.csv expect_equal( covidcast_signal("foo", "bar-not-found", "2020-01-01", "2020-01-01"), @@ -123,7 +123,7 @@ with_mock_api({ }) test_that("covidcast_signal stops when end_day < start_day", { - # reusing api.php-da6974.json + # reusing api.php-d2e163.json for metadata expect_error(covidcast_signal("foo", "bar", "2020-01-02", "2020-01-01")) }) From 88cebdcd579224b6d23191cd50b346b31b897e00 Mon Sep 17 00:00:00 2001 From: Alex Reinhart Date: Wed, 11 Nov 2020 18:00:09 -0500 Subject: [PATCH 2/8] Switch covidcast_meta to request CSVs as well --- R-packages/covidcast/R/covidcast.R | 12 ++++---- .../epidata/api.php-d2e163.json | 30 ------------------- .../epidata/api.php-dd024f.csv | 3 ++ .../covidcast/tests/testthat/test-covidcast.R | 15 +++++----- 4 files changed, 16 insertions(+), 44 deletions(-) delete mode 100644 R-packages/covidcast/tests/testthat/api.covidcast.cmu.edu/epidata/api.php-d2e163.json create mode 100644 R-packages/covidcast/tests/testthat/api.covidcast.cmu.edu/epidata/api.php-dd024f.csv diff --git a/R-packages/covidcast/R/covidcast.R b/R-packages/covidcast/R/covidcast.R index 246c51f3..94f9d1f8 100644 --- a/R-packages/covidcast/R/covidcast.R +++ b/R-packages/covidcast/R/covidcast.R @@ -429,17 +429,15 @@ covidcast_signals <- function(data_source, signal, #' #' @export covidcast_meta <- function() { - meta <- jsonlite::fromJSON(.request( + meta <- .request( list(source = "covidcast_meta", - cached = "true"))) + format = "csv")) - if (meta$message != "success") { - abort(paste0("Failed to obtain metadata: ", meta$message, "."), - err_msg = meta$message, - class = "covidcast_meta_fetch_failed") + if (nchar(meta) == 0) { + abort("Failed to obtain metadata", class = "covidcast_meta_fetch_failed") } - meta <- meta$epidata %>% + meta <- read.csv(textConnection(meta), stringsAsFactors = FALSE) %>% dplyr::mutate(min_time = api_to_date(.data$min_time), max_time = api_to_date(.data$max_time), max_issue = api_to_date(.data$max_issue)) diff --git a/R-packages/covidcast/tests/testthat/api.covidcast.cmu.edu/epidata/api.php-d2e163.json b/R-packages/covidcast/tests/testthat/api.covidcast.cmu.edu/epidata/api.php-d2e163.json deleted file mode 100644 index 7805d705..00000000 --- a/R-packages/covidcast/tests/testthat/api.covidcast.cmu.edu/epidata/api.php-d2e163.json +++ /dev/null @@ -1,30 +0,0 @@ -{ - "message": "success", - "result": 1, - "epidata": [ - { - "data_source": "foo", - "signal": "bar", - "min_time": 20200101, - "max_time": 20200102, - "max_issue": 20200404, - "min_value": 0, - "max_value": 10, - "num_locations": 100, - "time_type": "day", - "geo_type": "county" - }, - { - "data_source": "foo", - "signal": "bar2", - "min_time": 20201002, - "max_time": 20201003, - "max_issue": 20201101, - "num_locations": 100, - "min_value": 0, - "max_value": 10, - "time_type": "day", - "geo_type": "county" - } - ] -} diff --git a/R-packages/covidcast/tests/testthat/api.covidcast.cmu.edu/epidata/api.php-dd024f.csv b/R-packages/covidcast/tests/testthat/api.covidcast.cmu.edu/epidata/api.php-dd024f.csv new file mode 100644 index 00000000..1e49104e --- /dev/null +++ b/R-packages/covidcast/tests/testthat/api.covidcast.cmu.edu/epidata/api.php-dd024f.csv @@ -0,0 +1,3 @@ +data_source,signal,time_type,geo_type,min_time,max_time,min_value,max_value,max_issue +foo,bar,day,county,20200101,20200102,0,10,20200404 +foo,bar,day,county,20201002,20201003,0,10,20201101 diff --git a/R-packages/covidcast/tests/testthat/test-covidcast.R b/R-packages/covidcast/tests/testthat/test-covidcast.R index 857ff087..0146fc61 100644 --- a/R-packages/covidcast/tests/testthat/test-covidcast.R +++ b/R-packages/covidcast/tests/testthat/test-covidcast.R @@ -13,8 +13,10 @@ library(dplyr) # types of errors. # # 2. Once you've written a test, it can be difficult to find the file storing -# the JSON needed for that test. We hence store the filename in comments -# adjacent to every call. +# the JSON or CSV needed for that test. We hence store the filename in comments +# adjacent to every call. (Note that we request CSVs from the API for +# covidcast_signal, and httptest suggests filenames ending in .json by default. +# You can use .csv instead and httptest will correctly locate those files.) # # 3. covidcast_signal() calls covidcast_meta() unconditionally. We hence need a # single meta file that suffices for all tests that call covidcast_signal(). @@ -38,7 +40,7 @@ library(dplyr) with_mock_api({ test_that("covidcast_meta formats result correctly", { - # api.php-d2e163.json + # api.php-dd024f.csv expect_equal(covidcast_meta(), structure( data.frame( @@ -46,12 +48,12 @@ with_mock_api({ signal = c("bar", "bar2"), min_time = as.Date(c("2020-01-01", "2020-10-02")), max_time = as.Date(c("2020-01-02", "2020-10-03")), - max_issue = as.Date(c("2020-04-04", "2020-11-01")), min_value = 0, max_value = 10, num_locations = 100, time_type = "day", - geo_type = "county" + geo_type = "county", + max_issue = as.Date(c("2020-04-04", "2020-11-01")) ), class = c("covidcast_meta", "data.frame") )) @@ -59,8 +61,7 @@ with_mock_api({ }) test_that("covidcast_meta raises error when API signals one", { - stub(covidcast_meta, ".request", - "{\"message\": \"argle-bargle\"}") + stub(covidcast_meta, ".request", "") expect_error(covidcast_meta(), class = "covidcast_meta_fetch_failed") From 50cc9506420e4f5f7f86a6ce02d509790e60ecf1 Mon Sep 17 00:00:00 2001 From: Alex Reinhart Date: Wed, 11 Nov 2020 18:29:30 -0500 Subject: [PATCH 3/8] Correct error in metadata test It should not be possible to have two signals with the same source, signal, time_type, and geo_type. This will cause a query for that signal to have two metadata rows attached to the covidcast_signal data frame, which will confuse everything. --- .../testthat/api.covidcast.cmu.edu/epidata/api.php-dd024f.csv | 2 +- R-packages/covidcast/tests/testthat/test-covidcast.R | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/R-packages/covidcast/tests/testthat/api.covidcast.cmu.edu/epidata/api.php-dd024f.csv b/R-packages/covidcast/tests/testthat/api.covidcast.cmu.edu/epidata/api.php-dd024f.csv index 1e49104e..c114947b 100644 --- a/R-packages/covidcast/tests/testthat/api.covidcast.cmu.edu/epidata/api.php-dd024f.csv +++ b/R-packages/covidcast/tests/testthat/api.covidcast.cmu.edu/epidata/api.php-dd024f.csv @@ -1,3 +1,3 @@ data_source,signal,time_type,geo_type,min_time,max_time,min_value,max_value,max_issue foo,bar,day,county,20200101,20200102,0,10,20200404 -foo,bar,day,county,20201002,20201003,0,10,20201101 +foo,bar,day,state,20201002,20201003,0,10,20201101 diff --git a/R-packages/covidcast/tests/testthat/test-covidcast.R b/R-packages/covidcast/tests/testthat/test-covidcast.R index 0146fc61..3ebf520b 100644 --- a/R-packages/covidcast/tests/testthat/test-covidcast.R +++ b/R-packages/covidcast/tests/testthat/test-covidcast.R @@ -52,7 +52,7 @@ with_mock_api({ max_value = 10, num_locations = 100, time_type = "day", - geo_type = "county", + geo_type = c("county", "state"), max_issue = as.Date(c("2020-04-04", "2020-11-01")) ), class = c("covidcast_meta", "data.frame") From cbe3345f53554d08b475663cf04b1d372c390ec6 Mon Sep 17 00:00:00 2001 From: Alex Reinhart Date: Wed, 11 Nov 2020 18:32:51 -0500 Subject: [PATCH 4/8] Add an additional test of covidcast_signal Fetching multiple days is important. --- .../epidata/api.php-32641f.csv | 3 ++ .../epidata/api.php-b85d44.csv | 0 .../epidata/api.php-f49e8f.csv | 3 ++ .../covidcast/tests/testthat/test-covidcast.R | 43 +++++++++++++++++++ 4 files changed, 49 insertions(+) create mode 100644 R-packages/covidcast/tests/testthat/api.covidcast.cmu.edu/epidata/api.php-32641f.csv create mode 100644 R-packages/covidcast/tests/testthat/api.covidcast.cmu.edu/epidata/api.php-b85d44.csv create mode 100644 R-packages/covidcast/tests/testthat/api.covidcast.cmu.edu/epidata/api.php-f49e8f.csv diff --git a/R-packages/covidcast/tests/testthat/api.covidcast.cmu.edu/epidata/api.php-32641f.csv b/R-packages/covidcast/tests/testthat/api.covidcast.cmu.edu/epidata/api.php-32641f.csv new file mode 100644 index 00000000..fa87809e --- /dev/null +++ b/R-packages/covidcast/tests/testthat/api.covidcast.cmu.edu/epidata/api.php-32641f.csv @@ -0,0 +1,3 @@ +geo_value,signal,time_value,direction,issue,lag,value,stderr,sample_size +01001,bar,20200110,,20200111,1,91.2,0.8,114.2 +01002,bar,20200110,,20200111,1,99.1,0.2,217.8 \ No newline at end of file diff --git a/R-packages/covidcast/tests/testthat/api.covidcast.cmu.edu/epidata/api.php-b85d44.csv b/R-packages/covidcast/tests/testthat/api.covidcast.cmu.edu/epidata/api.php-b85d44.csv new file mode 100644 index 00000000..e69de29b diff --git a/R-packages/covidcast/tests/testthat/api.covidcast.cmu.edu/epidata/api.php-f49e8f.csv b/R-packages/covidcast/tests/testthat/api.covidcast.cmu.edu/epidata/api.php-f49e8f.csv new file mode 100644 index 00000000..ca1a2793 --- /dev/null +++ b/R-packages/covidcast/tests/testthat/api.covidcast.cmu.edu/epidata/api.php-f49e8f.csv @@ -0,0 +1,3 @@ +geo_value,signal,time_value,direction,issue,lag,value,stderr,sample_size +31001,bar,20200112,,20200113,1,81.2,0.8,314.2 +31002,bar,20200112,,20200113,1,89.1,0.2,417.8 \ No newline at end of file diff --git a/R-packages/covidcast/tests/testthat/test-covidcast.R b/R-packages/covidcast/tests/testthat/test-covidcast.R index 3ebf520b..3687da22 100644 --- a/R-packages/covidcast/tests/testthat/test-covidcast.R +++ b/R-packages/covidcast/tests/testthat/test-covidcast.R @@ -123,6 +123,49 @@ with_mock_api({ ) }) + test_that("covidcast_signal works across multiple days with gaps", { + # If we request 3 days, we'll get 3 API queries. If the middle day is + # missing, we should get an appropriate warning, but still get the right + # data frame. + + # day 1: api.php-32641f.csv + # day 2: api.php-b85d44.csv (empty) + # day 3: api.php-f49e8f.csv + expect_warning(covidcast_signal("foo", "bar", "2020-01-10", "2020-01-12", + geo_type = "county"), + class = "covidcast_fetch_failed") + + res <- suppressWarnings( + covidcast_signal("foo", "bar", "2020-01-10", "2020-01-12", + geo_type = "county")) + expect_equal( + res, + structure(data.frame( + data_source = "foo", + signal = "bar", + geo_value = c("01001", "01002", "31001", "31002"), + time_value = as.Date(c("2020-01-10", "2020-01-10", + "2020-01-12", "2020-01-12")), + issue = as.Date(c("2020-01-11", "2020-01-11", + "2020-01-13", "2020-01-13")), + lag = 1, + value = c(91.2, 99.1, 81.2, 89.1), + stderr = c(0.8, 0.2, 0.8, 0.2), + sample_size = c(114.2, 217.8, 314.2, 417.8)), + class = c("covidcast_signal", "data.frame"), + metadata = structure(data.frame( + data_source = "foo", + signal = "bar", + time_type = "day", + geo_type = "county", + min_time = as.Date("2020-01-01"), + max_time = as.Date("2020-01-02"), + min_value = 0, + max_value = 10, + max_issue = as.Date("2020-04-04")), + class = c("covidcast_meta", "data.frame")))) + }) + test_that("covidcast_signal stops when end_day < start_day", { # reusing api.php-d2e163.json for metadata expect_error(covidcast_signal("foo", "bar", "2020-01-02", "2020-01-01")) From a28e025f52aa1d4d791690957119e890190fe080 Mon Sep 17 00:00:00 2001 From: Alex Reinhart Date: Thu, 12 Nov 2020 10:18:40 -0500 Subject: [PATCH 5/8] Address review comments --- R-packages/covidcast/R/covidcast.R | 10 ++++++---- .../api.covidcast.cmu.edu/epidata/api.php-32641f.csv | 2 +- .../api.covidcast.cmu.edu/epidata/api.php-f49e8f.csv | 2 +- R-packages/covidcast/tests/testthat/test-covidcast.R | 2 +- 4 files changed, 9 insertions(+), 7 deletions(-) diff --git a/R-packages/covidcast/R/covidcast.R b/R-packages/covidcast/R/covidcast.R index 94f9d1f8..ebdc1e4d 100644 --- a/R-packages/covidcast/R/covidcast.R +++ b/R-packages/covidcast/R/covidcast.R @@ -725,16 +725,18 @@ covidcast <- function(data_source, signal, time_type, geo_type, time_values, params$lag <- lag } - # Make the API call - res <- .request(params) - if (nchar(res) == 0) { + # Make the API call. If the API returns a non-200 status code, indicating e.g. + # a database error, .request() raises an error. It returns an empty string if + # there are no results for our query. + response <- .request(params) + if (nchar(response) == 0) { # empty if no results return(NULL) } # geo_value must be read as character so FIPS codes are returned as character, # not numbers (with leading 0s potentially removed) - return(read.csv(textConnection(res), stringsAsFactors = FALSE, + return(read.csv(textConnection(response), stringsAsFactors = FALSE, colClasses = c("geo_value" = "character"))) } diff --git a/R-packages/covidcast/tests/testthat/api.covidcast.cmu.edu/epidata/api.php-32641f.csv b/R-packages/covidcast/tests/testthat/api.covidcast.cmu.edu/epidata/api.php-32641f.csv index fa87809e..77ab801f 100644 --- a/R-packages/covidcast/tests/testthat/api.covidcast.cmu.edu/epidata/api.php-32641f.csv +++ b/R-packages/covidcast/tests/testthat/api.covidcast.cmu.edu/epidata/api.php-32641f.csv @@ -1,3 +1,3 @@ geo_value,signal,time_value,direction,issue,lag,value,stderr,sample_size 01001,bar,20200110,,20200111,1,91.2,0.8,114.2 -01002,bar,20200110,,20200111,1,99.1,0.2,217.8 \ No newline at end of file +01002,bar,20200110,,20200111,1,99.1,0.2,217.8 diff --git a/R-packages/covidcast/tests/testthat/api.covidcast.cmu.edu/epidata/api.php-f49e8f.csv b/R-packages/covidcast/tests/testthat/api.covidcast.cmu.edu/epidata/api.php-f49e8f.csv index ca1a2793..fffef855 100644 --- a/R-packages/covidcast/tests/testthat/api.covidcast.cmu.edu/epidata/api.php-f49e8f.csv +++ b/R-packages/covidcast/tests/testthat/api.covidcast.cmu.edu/epidata/api.php-f49e8f.csv @@ -1,3 +1,3 @@ geo_value,signal,time_value,direction,issue,lag,value,stderr,sample_size 31001,bar,20200112,,20200113,1,81.2,0.8,314.2 -31002,bar,20200112,,20200113,1,89.1,0.2,417.8 \ No newline at end of file +31002,bar,20200112,,20200113,1,89.1,0.2,417.8 diff --git a/R-packages/covidcast/tests/testthat/test-covidcast.R b/R-packages/covidcast/tests/testthat/test-covidcast.R index 3687da22..86dd2c30 100644 --- a/R-packages/covidcast/tests/testthat/test-covidcast.R +++ b/R-packages/covidcast/tests/testthat/test-covidcast.R @@ -167,7 +167,7 @@ with_mock_api({ }) test_that("covidcast_signal stops when end_day < start_day", { - # reusing api.php-d2e163.json for metadata + # reusing api.php-dd024f.csv for metadata expect_error(covidcast_signal("foo", "bar", "2020-01-02", "2020-01-01")) }) From 4d11aa91f4a99eac0a04f334715d55ec68943c9b Mon Sep 17 00:00:00 2001 From: Alex Reinhart Date: Wed, 25 Nov 2020 11:53:38 -0500 Subject: [PATCH 6/8] Fix tests --- .../epidata/api.php-32641f.csv | 3 - .../epidata/api.php-b85d44.csv | 0 .../epidata/api.php-dd024f.csv | 6 +- .../epidata/api.php-f49e8f.csv | 3 - .../covidcast/tests/testthat/test-covidcast.R | 86 ++++++------------- 5 files changed, 27 insertions(+), 71 deletions(-) delete mode 100644 R-packages/covidcast/tests/testthat/api.covidcast.cmu.edu/epidata/api.php-32641f.csv delete mode 100644 R-packages/covidcast/tests/testthat/api.covidcast.cmu.edu/epidata/api.php-b85d44.csv delete mode 100644 R-packages/covidcast/tests/testthat/api.covidcast.cmu.edu/epidata/api.php-f49e8f.csv diff --git a/R-packages/covidcast/tests/testthat/api.covidcast.cmu.edu/epidata/api.php-32641f.csv b/R-packages/covidcast/tests/testthat/api.covidcast.cmu.edu/epidata/api.php-32641f.csv deleted file mode 100644 index 77ab801f..00000000 --- a/R-packages/covidcast/tests/testthat/api.covidcast.cmu.edu/epidata/api.php-32641f.csv +++ /dev/null @@ -1,3 +0,0 @@ -geo_value,signal,time_value,direction,issue,lag,value,stderr,sample_size -01001,bar,20200110,,20200111,1,91.2,0.8,114.2 -01002,bar,20200110,,20200111,1,99.1,0.2,217.8 diff --git a/R-packages/covidcast/tests/testthat/api.covidcast.cmu.edu/epidata/api.php-b85d44.csv b/R-packages/covidcast/tests/testthat/api.covidcast.cmu.edu/epidata/api.php-b85d44.csv deleted file mode 100644 index e69de29b..00000000 diff --git a/R-packages/covidcast/tests/testthat/api.covidcast.cmu.edu/epidata/api.php-dd024f.csv b/R-packages/covidcast/tests/testthat/api.covidcast.cmu.edu/epidata/api.php-dd024f.csv index c114947b..27fa487a 100644 --- a/R-packages/covidcast/tests/testthat/api.covidcast.cmu.edu/epidata/api.php-dd024f.csv +++ b/R-packages/covidcast/tests/testthat/api.covidcast.cmu.edu/epidata/api.php-dd024f.csv @@ -1,3 +1,3 @@ -data_source,signal,time_type,geo_type,min_time,max_time,min_value,max_value,max_issue -foo,bar,day,county,20200101,20200102,0,10,20200404 -foo,bar,day,state,20201002,20201003,0,10,20201101 +data_source,signal,time_type,geo_type,min_time,max_time,min_value,max_value,num_locations,max_issue +foo,bar,day,county,20200101,20200102,0,10,100,20200404 +foo,bar2,day,county,20201002,20201003,0,10,100,20201101 diff --git a/R-packages/covidcast/tests/testthat/api.covidcast.cmu.edu/epidata/api.php-f49e8f.csv b/R-packages/covidcast/tests/testthat/api.covidcast.cmu.edu/epidata/api.php-f49e8f.csv deleted file mode 100644 index fffef855..00000000 --- a/R-packages/covidcast/tests/testthat/api.covidcast.cmu.edu/epidata/api.php-f49e8f.csv +++ /dev/null @@ -1,3 +0,0 @@ -geo_value,signal,time_value,direction,issue,lag,value,stderr,sample_size -31001,bar,20200112,,20200113,1,81.2,0.8,314.2 -31002,bar,20200112,,20200113,1,89.1,0.2,417.8 diff --git a/R-packages/covidcast/tests/testthat/test-covidcast.R b/R-packages/covidcast/tests/testthat/test-covidcast.R index 86dd2c30..46b8c70b 100644 --- a/R-packages/covidcast/tests/testthat/test-covidcast.R +++ b/R-packages/covidcast/tests/testthat/test-covidcast.R @@ -46,13 +46,13 @@ with_mock_api({ data.frame( data_source = "foo", signal = c("bar", "bar2"), + time_type = "day", + geo_type = "county", min_time = as.Date(c("2020-01-01", "2020-10-02")), max_time = as.Date(c("2020-01-02", "2020-10-03")), min_value = 0, max_value = 10, num_locations = 100, - time_type = "day", - geo_type = c("county", "state"), max_issue = as.Date(c("2020-04-04", "2020-11-01")) ), class = c("covidcast_meta", "data.frame") @@ -123,49 +123,6 @@ with_mock_api({ ) }) - test_that("covidcast_signal works across multiple days with gaps", { - # If we request 3 days, we'll get 3 API queries. If the middle day is - # missing, we should get an appropriate warning, but still get the right - # data frame. - - # day 1: api.php-32641f.csv - # day 2: api.php-b85d44.csv (empty) - # day 3: api.php-f49e8f.csv - expect_warning(covidcast_signal("foo", "bar", "2020-01-10", "2020-01-12", - geo_type = "county"), - class = "covidcast_fetch_failed") - - res <- suppressWarnings( - covidcast_signal("foo", "bar", "2020-01-10", "2020-01-12", - geo_type = "county")) - expect_equal( - res, - structure(data.frame( - data_source = "foo", - signal = "bar", - geo_value = c("01001", "01002", "31001", "31002"), - time_value = as.Date(c("2020-01-10", "2020-01-10", - "2020-01-12", "2020-01-12")), - issue = as.Date(c("2020-01-11", "2020-01-11", - "2020-01-13", "2020-01-13")), - lag = 1, - value = c(91.2, 99.1, 81.2, 89.1), - stderr = c(0.8, 0.2, 0.8, 0.2), - sample_size = c(114.2, 217.8, 314.2, 417.8)), - class = c("covidcast_signal", "data.frame"), - metadata = structure(data.frame( - data_source = "foo", - signal = "bar", - time_type = "day", - geo_type = "county", - min_time = as.Date("2020-01-01"), - max_time = as.Date("2020-01-02"), - min_value = 0, - max_value = 10, - max_issue = as.Date("2020-04-04")), - class = c("covidcast_meta", "data.frame")))) - }) - test_that("covidcast_signal stops when end_day < start_day", { # reusing api.php-dd024f.csv for metadata expect_error(covidcast_signal("foo", "bar", "2020-01-02", "2020-01-01")) @@ -183,7 +140,7 @@ with_mock_api({ test_that("covidcast_days does not treat \"*\" as a missing geo_value", { stub(covidcast_days, "covidcast", - list(message = "success", epidata = data.frame( + data.frame( geo_value = c("geoa", "geob"), signal = "signal", time_value = c(20201030, 20201031), @@ -193,7 +150,8 @@ test_that("covidcast_days does not treat \"*\" as a missing geo_value", { value = 3, stderr = NA, sample_size = NA - ), result = 1)) + )) + # Expect no warning expect_warning( covidcast_days( @@ -212,7 +170,7 @@ test_that("covidcast_days does not treat \"*\" as a missing geo_value", { test_that("covidcast_days does not raise warnings for full response", { stub(covidcast_days, "covidcast", - list(message = "success", epidata = data.frame( + data.frame( geo_value = c("geoa"), signal = "signal", time_value = c(20201030, 20201031), @@ -222,7 +180,8 @@ test_that("covidcast_days does not raise warnings for full response", { value = 3, stderr = NA, sample_size = NA - ), result = 1)) + )) + # Expect no warning expect_warning( covidcast_days( @@ -240,19 +199,22 @@ test_that("covidcast_days does not raise warnings for full response", { }) test_that("covidcast_days batches calls to covidcast", { - covidcast_returns <- rep(list(list(message = "success", epidata = data.frame( - geo_value = c("geoa"), - signal = "signal", - time_value = rep(NA, 3), - direction = NA, - issue = as.Date("2020-11-04"), - lag = 2, - value = 3, - stderr = NA, - sample_size = NA - ), result = 1)), 10) - covidcast_returns[[1]]$epidata$time_value <- 20101001:20101003 - covidcast_returns[[2]]$epidata$time_value <- 20101004:20101006 + covidcast_returns <- rep( + list( + data.frame( + geo_value = c("geoa"), + signal = "signal", + time_value = rep(NA, 3), + direction = NA, + issue = as.Date("2020-11-04"), + lag = 2, + value = 3, + stderr = NA, + sample_size = NA + )), + 10) + covidcast_returns[[1]]$time_value <- 20101001:20101003 + covidcast_returns[[2]]$time_value <- 20101004:20101006 m <- mock(covidcast_returns[[1]], covidcast_returns[[2]]) stub(covidcast_days, "covidcast", m) From 0e56d9ab1b60179a84b4ec98168685dadf8e6b93 Mon Sep 17 00:00:00 2001 From: Alex Reinhart Date: Wed, 25 Nov 2020 11:54:49 -0500 Subject: [PATCH 7/8] Fix vignette --- R-packages/covidcast/vignettes/correlation-utils.Rmd | 2 ++ 1 file changed, 2 insertions(+) diff --git a/R-packages/covidcast/vignettes/correlation-utils.Rmd b/R-packages/covidcast/vignettes/correlation-utils.Rmd index 2ca9e3c2..80106e96 100644 --- a/R-packages/covidcast/vignettes/correlation-utils.Rmd +++ b/R-packages/covidcast/vignettes/correlation-utils.Rmd @@ -147,6 +147,8 @@ we can plot these correlations in space as a choropleth map, using ```{r, fig.width = 10, fig.height = 8} # Set a bunch of fields so that the data frame knows how to plot itself +df_cor2$data_source <- "cor" +df_cor2$signal <- "cor" df_cor2$time_value <- start_day df_cor2$issue <- start_day attributes(df_cor2)$metadata$geo_type <- "county" From e12eb5fbafb72ec1ce0feb426ce1be30cdac9fc1 Mon Sep 17 00:00:00 2001 From: Alex Reinhart Date: Wed, 25 Nov 2020 12:05:18 -0500 Subject: [PATCH 8/8] Update NEWS for new features --- R-packages/covidcast/NEWS.md | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/R-packages/covidcast/NEWS.md b/R-packages/covidcast/NEWS.md index 76781ae7..7fcf31ad 100644 --- a/R-packages/covidcast/NEWS.md +++ b/R-packages/covidcast/NEWS.md @@ -8,6 +8,15 @@ Released TODO DATE. filter data frames with multiple issues of each observation, obtaining only the latest or earliest issue of each. +- `covidcast_signal()` now batches requests, so that many days of data can be + fetched in one API call. This dramatically improves the speed of fetching + state-, MSA-, and HRR-level data, since many days of data can be fetched in + one API call. County-level signals, such as cases and deaths, may still + require one API call per day, since the API's row limit is only slightly + larger than the number of counties in the United States. + +- `covidcast_signal()` now fetches data from the API server in CSV format, + rather than JSON, which requires less bandwidth and parsing. # covidcast 0.3.1