Skip to content

Commit 7c5abc6

Browse files
committed
Convert API calls to request CSV format for data, instead of JSON
The CSV format is much more compact (does not repeat field names for every row), and more naturally fits with R anyway. Alter the relevant tests to serve CSVs. I've verified all vignettes build with these changes.
1 parent 1ef8ff1 commit 7c5abc6

File tree

12 files changed

+62
-116
lines changed

12 files changed

+62
-116
lines changed

R-packages/covidcast/R/covidcast.R

Lines changed: 47 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -429,7 +429,9 @@ covidcast_signals <- function(data_source, signal,
429429
#'
430430
#' @export
431431
covidcast_meta <- function() {
432-
meta <- .request(list(source='covidcast_meta', cached="true"))
432+
meta <- jsonlite::fromJSON(.request(
433+
list(source = "covidcast_meta",
434+
cached = "true")))
433435

434436
if (meta$message != "success") {
435437
abort(paste0("Failed to obtain metadata: ", meta$message, "."),
@@ -560,14 +562,14 @@ covidcast_days <- function(data_source, signal, start_day, end_day, geo_type,
560562

561563
# The API limits the number of rows that can be returned at once, so we query
562564
# in batches.
563-
for (i in seq(1, num_batches)) {
565+
for (i in seq_len(num_batches)) {
564566
start_offset <- (i - 1) * max_days_at_time
565567
end_offset <- min(i * max_days_at_time, ndays) - 1
566568
query_start_day <- start_day + start_offset
567569
query_end_day <- start_day + end_offset
568570

569571
time_values <- date_to_string(days[(start_offset + 1):(end_offset + 1)])
570-
dat[[i]] <- covidcast(data_source = data_source,
572+
response <- covidcast(data_source = data_source,
571573
signal = signal,
572574
time_type = "day",
573575
geo_type = geo_type,
@@ -576,22 +578,37 @@ covidcast_days <- function(data_source, signal, start_day, end_day, geo_type,
576578
as_of = as_of,
577579
issues = issues,
578580
lag = lag)
581+
582+
if (is.null(response)) {
583+
warn(paste0("Fetching ", signal, " from ", data_source, " for ",
584+
query_start_day, " to ", query_end_day,
585+
" in geography '", geo_value, "': no results"),
586+
data_source = data_source,
587+
signal = signal,
588+
start_day = query_start_day,
589+
end_day = query_end_day,
590+
geo_value = geo_value,
591+
class = "covidcast_fetch_failed")
592+
593+
next
594+
}
595+
596+
dat[[i]] <- response
597+
579598
summary <- sprintf(
580-
"Fetched day %s to %s: %s, %s, num_entries = %s",
599+
"Fetched day %s to %s: num_entries = %s",
581600
query_start_day,
582601
query_end_day,
583-
dat[[i]]$result,
584-
dat[[i]]$message,
585-
nrow(dat[[i]]$epidata)
586-
)
602+
nrow(response))
603+
587604
if (length(summary) != 0) {
588605
message(summary)
589606
}
590-
if (dat[[i]]$message == "success") {
607+
608+
if (nrow(response) > 0) {
591609
desired_geos <- tolower(unique(geo_value))
592610

593-
returned_epidata <- dat[[i]]$epidata
594-
returned_geo_array <- returned_epidata %>%
611+
returned_geo_array <- response %>%
595612
dplyr::select(geo_value, time_value) %>%
596613
dplyr::group_by(time_value) %>%
597614
dplyr::summarize(geo_value = list(geo_value))
@@ -607,10 +624,10 @@ covidcast_days <- function(data_source, signal, start_day, end_day, geo_type,
607624
signal = signal,
608625
day = missing_dates,
609626
geo_value = geo_value,
610-
api_msg = dat[[i]]$message,
611-
class = "covidcast_missing_geo_values"
627+
class = "covidcast_missing_time_values"
612628
)
613629
}
630+
614631
if (!identical("*", geo_value)) {
615632
missing_geo_array <- returned_geo_array[
616633
lapply(returned_geo_array$geo_value, length) < length(desired_geos), ]
@@ -626,26 +643,13 @@ covidcast_days <- function(data_source, signal, start_day, end_day, geo_type,
626643
signal = signal,
627644
day = api_to_date(missing_geo_array$time_value),
628645
geo_value = geo_value,
629-
api_msg = dat[[i]]$message,
630646
class = "covidcast_missing_geo_values")
631647
}
632648
}
633-
} else {
634-
warn(paste0("Fetching ", signal, " from ", data_source, " for ",
635-
query_start_day, " to ", query_end_day, " in geography '",
636-
geo_value, "': ", dat[[i]]$message),
637-
data_source = data_source,
638-
signal = signal,
639-
start_day = query_start_day,
640-
end_day = query_end_day,
641-
geo_value = geo_value,
642-
api_msg = dat[[i]]$message,
643-
class = "covidcast_fetch_failed")
644649
}
645650
}
646651

647652
df <- dat %>%
648-
purrr::map("epidata") %>% # just want $epidata part
649653
purrr::map(purrr::compact) %>% # remove the list elements that are NULL
650654
dplyr::bind_rows() # make this into a data frame
651655

@@ -681,22 +685,24 @@ geo_warning_message <- function(row, desired_geos) {
681685
covidcast <- function(data_source, signal, time_type, geo_type, time_values,
682686
geo_value, as_of, issues, lag) {
683687
# Check parameters
684-
if(missing(data_source) || missing(signal) || missing(time_type) ||
688+
if (missing(data_source) || missing(signal) || missing(time_type) ||
685689
missing(geo_type) || missing(time_values) || missing(geo_value)) {
686690
stop("`data_source`, `signal`, `time_type`, `geo_type`, `time_values`, ",
687691
"and `geo_value` are all required.")
688692
}
689693

690694
# Set up request
691695
params <- list(
692-
source = 'covidcast',
696+
source = "covidcast",
693697
data_source = data_source,
694698
signal = signal,
695699
time_type = time_type,
696700
geo_type = geo_type,
697701
time_values = .list(time_values),
698-
geo_value = geo_value
702+
geo_value = geo_value,
703+
format = "csv"
699704
)
705+
700706
if (length(params$geo_value) > 1) {
701707
params$geo_values <- paste0(params$geo_value, collapse = ",") #convert to string
702708
params$geo_value <- NULL
@@ -722,7 +728,16 @@ covidcast <- function(data_source, signal, time_type, geo_type, time_values,
722728
}
723729

724730
# Make the API call
725-
return(.request(params))
731+
res <- .request(params)
732+
if (nchar(res) == 0) {
733+
# empty if no results
734+
return(NULL)
735+
}
736+
737+
# geo_value must be read as character so FIPS codes are returned as character,
738+
# not numbers (with leading 0s potentially removed)
739+
return(read.csv(textConnection(res), stringsAsFactors = FALSE,
740+
colClasses = c("geo_value" = "character")))
726741
}
727742

728743
# Helper function to cast values and/or ranges to strings
@@ -751,8 +766,8 @@ covidcast <- function(data_source, signal, time_type, geo_type, time_values,
751766

752767
httr::stop_for_status(response, task = "fetch data from API")
753768

754-
return(jsonlite::fromJSON(httr::content(response, as = "text",
755-
encoding = "utf-8")))
769+
return(httr::content(response, as = "text",
770+
encoding = "utf-8"))
756771
}
757772

758773
# This is the date format expected by the API
Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
geo_value,signal,time_value,issue,lag,value,stderr,sample_size
2+
01000,bar-not-found,20200101,20200102,1,1.0,0.1,2.0
Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
signal,geo_value,value,time_value,issue,lag,sample_size,stderr
2+
bar,pa,1,20200101,20200101,0,1,1
3+
bar,tx,1,20200101,20200101,0,1,1

R-packages/covidcast/tests/testthat/api.covidcast.cmu.edu/epidata/api.php-64a69c.json

Lines changed: 0 additions & 26 deletions
This file was deleted.

R-packages/covidcast/tests/testthat/api.covidcast.cmu.edu/epidata/api.php-6a5814.json

Lines changed: 0 additions & 26 deletions
This file was deleted.

R-packages/covidcast/tests/testthat/api.covidcast.cmu.edu/epidata/api.php-96f6a5.json

Lines changed: 0 additions & 4 deletions
This file was deleted.

R-packages/covidcast/tests/testthat/api.covidcast.cmu.edu/epidata/api.php-b6e478.csv

Whitespace-only changes.

R-packages/covidcast/tests/testthat/api.covidcast.cmu.edu/epidata/api.php-cb89ad.json

Lines changed: 0 additions & 17 deletions
This file was deleted.

R-packages/covidcast/tests/testthat/api.covidcast.cmu.edu/epidata/api.php-d707dc.csv

Whitespace-only changes.

R-packages/covidcast/tests/testthat/api.covidcast.cmu.edu/epidata/api.php-da6974.json

Lines changed: 0 additions & 4 deletions
This file was deleted.

0 commit comments

Comments
 (0)