@@ -126,6 +126,7 @@ signal_sheet <- suppressMessages(read_csv("delphi-eng-covidcast-data-sources-sig
126126# Fields we want to add.
127127new_fields <- c(
128128 " Geographic Scope" ,
129+ " Delphi-Aggregated Geography" ,
129130 " Temporal Scope Start" ,
130131 " Temporal Scope End" ,
131132 " Reporting Cadence" ,
@@ -344,74 +345,169 @@ geo_scope <- c(
344345source_updated [, col ] <- geo_scope [source_updated $ data_source ]
345346
346347
348+
349+
347350col <- " Available Geography"
348- # List all available geo-levels. If a geo-level was created by Delphi
349- # aggregation (as opposed to being ingested directly from the data source),
350- # indicate this as per this example: county, state (by Delphi), National
351- # (by Delphi).
352-
353- # Tool: Create lists of geos for each data source-signal combo based on what is reported in metadata (does not include quidel, at least with).
354- metadata_factorgeo <- metadata
355- metadata_factorgeo $ geo_type <- factor (metadata_factorgeo $ geo_type , levels = c(" county" , " hrr" , " msa" , " dma" , " state" , " hhs" , " nation" ))
356- auto_geo_list_by_signal <- arrange(
357- metadata_factorgeo ,
358- geo_type
359- ) %> %
360- group_by(
361- data_source ,
362- signal
363- ) %> %
364- summarize(
365- geos_list = paste(geo_type , collapse = " , " ),
366- .groups = " keep"
367- ) %> %
368- ungroup()
351+ # List all available geo-levels, e.g: county,state,nation
352+
353+ # # Tool: Create lists of geos for each data source-signal combo based on what is
354+ # # reported in metadata (does not include quidel).
355+ # metadata_factorgeo <- metadata
356+ # metadata_factorgeo$geo_type <- factor(metadata_factorgeo$geo_type, levels = c("county", "hrr", "msa", "dma", "state", "hhs", "nation"))
357+ # auto_geo_list_by_signal <- arrange(
358+ # metadata_factorgeo,
359+ # geo_type
360+ # ) %>%
361+ # group_by(
362+ # data_source,
363+ # signal
364+ # ) %>%
365+ # summarize(
366+ # geos_list = paste(geo_type, collapse = ", "),
367+ # .groups = "keep"
368+ # ) %>%
369+ # ungroup()
370+
371+ # # Tool: Are there any data sources where geos_list is different for different signal?
372+ # different_geos_by_signal <- count(auto_geo_list_by_signal, data_source, geos_list, name = "n_signals")
373+ # # different_geos_by_signal
374+ # # which(duplicated(select(different_geos_by_signal, data_source)))
375+
376+ # # Keep most common geos_list for each data source.
377+ # most_common_geos_list <- group_by(different_geos_by_signal, data_source) %>%
378+ # slice_max(n_signals, with_ties = FALSE)
379+ # # most_common_geos_list
380+ # leftover_datasource_geos <- anti_join(different_geos_by_signal, most_common_geos_list)
381+ # # leftover_datasource_geos
382+ # leftover_signal_geos <- semi_join(auto_geo_list_by_signal, leftover_datasource_geos)
383+ # # leftover_signal_geos
384+
385+ # These values are applied first. They are the default (most common) geos for each data source.
386+ avail_geos <- c(
387+ " chng" = glue(" county,hrr,msa,state,hhs,nation" ),
388+ " covid-act-now" = glue(" county,hrr,msa,state,hhs,nation" ),
389+ " doctor-visits" = glue(" county,hrr,msa,state,hhs,nation" ),
390+ " dsew-cpr" = glue(" county,msa,state,hhs,nation" ),
391+ " fb-survey" = glue(" county,hrr,msa,state,nation" ),
392+ " ght" = glue(" hrr,msa,dma,state" ),
393+ " google-survey" = glue(" county,hrr,msa,state" ),
394+ " google-symptoms" = glue(" county,hrr,msa,state,hhs,nation" ),
395+ " hhs" = glue(" state,hhs,nation" ),
396+ " hospital-admissions" = glue(" county,hrr,msa,state,hhs,nation" ),
397+ " indicator-combination" = glue(" county,hrr,msa,state,hhs,nation" ),
398+ " jhu-csse" = glue(" county,hrr,msa,state,hhs,nation" ),
399+ " nchs-mortality" = glue(" state,nation" ),
400+ # Quidel non-flu signals
401+ " quidel" = glue(" county,hrr,msa,state,hhs,nation" ),
402+ " safegraph" = glue(" county,hrr,msa,state,hhs,nation" ),
403+ " usa-facts" = glue(" county,hrr,msa,state,hhs,nation" ),
404+ " youtube-survey" = " state"
405+ )
406+
407+ # These are signal-specific geo lists. These are less common and are applied as a patch.
408+ dsew_geos <- glue(" state,hhs,nation" )
409+ fb_geos1 <- glue(" county,state,nation" )
410+ fb_geos2 <- glue(" county,msa,state,nation" )
411+ hosp_geos <- glue(" county,hrr,msa,state" )
412+ combo_geos <- glue(" county,msa,state" )
413+ quidel_geos <- glue(" msa,state" )
414+ leftover_signal_geos_manual <- tibble :: tribble(
415+ ~ data_source , ~ signal , ~ geos_list ,
416+ " chng" , " 7dav_inpatient_covid" , " state" ,
417+ " chng" , " 7dav_outpatient_covid" , " state" ,
418+
419+ " dsew-cpr" , " booster_doses_admin_7dav" , dsew_geos ,
420+ " dsew-cpr" , " doses_admin_7dav" , dsew_geos ,
421+ " dsew-cpr" , " people_booster_doses" , dsew_geos ,
422+
423+ " fb-survey" , " smoothed_vaccine_barrier_appointment_location_tried" , fb_geos1 ,
424+ " fb-survey" , " smoothed_vaccine_barrier_other_tried" , fb_geos1 ,
425+ " fb-survey" , " smoothed_wvaccine_barrier_appointment_location_tried" , fb_geos1 ,
426+ " fb-survey" , " smoothed_wvaccine_barrier_other_tried" , fb_geos1 ,
427+
428+ " fb-survey" , " smoothed_vaccine_barrier_appointment_time_tried" , fb_geos2 ,
429+ " fb-survey" , " smoothed_vaccine_barrier_childcare_tried" , fb_geos2 ,
430+ " fb-survey" , " smoothed_vaccine_barrier_document_tried" , fb_geos2 ,
431+ " fb-survey" , " smoothed_vaccine_barrier_eligible_tried" , fb_geos2 ,
432+ " fb-survey" , " smoothed_vaccine_barrier_language_tried" , fb_geos2 ,
433+ " fb-survey" , " smoothed_vaccine_barrier_no_appointments_tried" , fb_geos2 ,
434+ " fb-survey" , " smoothed_vaccine_barrier_none_tried" , fb_geos2 ,
435+ " fb-survey" , " smoothed_vaccine_barrier_technical_difficulties_tried" , fb_geos2 ,
436+ " fb-survey" , " smoothed_vaccine_barrier_technology_access_tried" , fb_geos2 ,
437+ " fb-survey" , " smoothed_vaccine_barrier_time_tried" , fb_geos2 ,
438+ " fb-survey" , " smoothed_vaccine_barrier_travel_tried" , fb_geos2 ,
439+ " fb-survey" , " smoothed_vaccine_barrier_type_tried" , fb_geos2 ,
440+ " fb-survey" , " smoothed_wvaccine_barrier_appointment_time_tried" , fb_geos2 ,
441+ " fb-survey" , " smoothed_wvaccine_barrier_childcare_tried" , fb_geos2 ,
442+ " fb-survey" , " smoothed_wvaccine_barrier_document_tried" , fb_geos2 ,
443+ " fb-survey" , " smoothed_wvaccine_barrier_eligible_tried" , fb_geos2 ,
444+ " fb-survey" , " smoothed_wvaccine_barrier_language_tried" , fb_geos2 ,
445+ " fb-survey" , " smoothed_wvaccine_barrier_no_appointments_tried" , fb_geos2 ,
446+ " fb-survey" , " smoothed_wvaccine_barrier_none_tried" , fb_geos2 ,
447+ " fb-survey" , " smoothed_wvaccine_barrier_technical_difficulties_tried" , fb_geos2 ,
448+ " fb-survey" , " smoothed_wvaccine_barrier_technology_access_tried" , fb_geos2 ,
449+ " fb-survey" , " smoothed_wvaccine_barrier_time_tried" , fb_geos2 ,
450+ " fb-survey" , " smoothed_wvaccine_barrier_travel_tried" , fb_geos2 ,
451+ " fb-survey" , " smoothed_wvaccine_barrier_type_tried" , fb_geos2 ,
452+
453+ " hospital-admissions" , " smoothed_adj_covid19" , hosp_geos ,
454+ " hospital-admissions" , " smoothed_covid19" , hosp_geos ,
455+
456+ " indicator-combination" , " nmf_day_doc_fbc_fbs_ght" , combo_geos ,
457+ " indicator-combination" , " nmf_day_doc_fbs_ght" , combo_geos ,
458+
459+ # Quidel flu signals
460+ " quidel" , " raw_pct_negative" , quidel_geos ,
461+ " quidel" , " smoothed_pct_negative" , quidel_geos ,
462+ " quidel" , " raw_tests_per_device" , quidel_geos ,
463+ " quidel" , " smoothed_tests_per_device" , quidel_geos
464+ )
369465
370- # Tool: Are there any data sources where geos_list is different for different signal?
371- different_geos_by_signal <- count(auto_geo_list_by_signal , data_source , geos_list , name = " n_signals" )
372- # different_geos_by_signal
373- # which(duplicated(select(different_geos_by_signal, data_source)))
466+ source_updated [, col ] <- coalesce(avail_geos [source_updated $ data_source ], source_updated [[col ]])
467+
468+ source_updated <- left_join(
469+ source_updated , leftover_signal_geos_manual ,
470+ by = c(" Signal" = " signal" , " data_source" )
471+ ) %> %
472+ mutate(`Available Geography` = coalesce(geos_list , `Available Geography` )) %> %
473+ select(- geos_list )
374474
375- # Keep most common geos_list for each data source.
376- most_common_geos_list <- group_by(different_geos_by_signal , data_source ) %> %
377- slice_max(n_signals , with_ties = FALSE )
378- # most_common_geos_list
379- leftover_datasource_geos <- anti_join(different_geos_by_signal , most_common_geos_list )
380- # leftover_datasource_geos
381- leftover_signal_geos <- semi_join(auto_geo_list_by_signal , leftover_datasource_geos )
382- # leftover_signal_geos
383475
384- delphi_agg_text <- " (by Delphi)"
476+ col <- " Delphi-Aggregated Geography"
477+ # List available geo-levels that were created by Delphi (as opposed to being
478+ # ingested directly from the data source), e.g. if available at the county,
479+ # state, and nation levels but state and nation were aggregated by us from
480+ # provided county data: state,nation
385481
386482# These values are applied first. They are the default (most common) geos for each data source.
387483avail_geos <- c(
388- " chng" = glue(" county, hrr{delphi_agg_text}, msa{delphi_agg_text}, state{delphi_agg_text}, hhs{delphi_agg_text}, nation{delphi_agg_text} " ),
389- " covid-act-now" = glue(" county, hrr{delphi_agg_text}, msa{delphi_agg_text}, state{delphi_agg_text}, hhs{delphi_agg_text}, nation{delphi_agg_text} " ),
390- " doctor-visits" = glue(" county, hrr{delphi_agg_text}, msa{delphi_agg_text}, state{delphi_agg_text}, hhs{delphi_agg_text}, nation{delphi_agg_text} " ),
391- " dsew-cpr" = glue(" county, msa, state, hhs, nation{delphi_agg_text} " ),
392- " fb-survey" = glue(" county{delphi_agg_text}, hrr{delphi_agg_text}, msa{delphi_agg_text}, state{delphi_agg_text}, nation{delphi_agg_text} " ),
393- " ght" = glue(" hrr{delphi_agg_text}, msa{delphi_agg_text}, dma, state " ),
394- " google-survey" = glue(" county{delphi_agg_text}, hrr{delphi_agg_text}, msa{delphi_agg_text}, state{delphi_agg_text} " ),
395- " google-symptoms" = glue(" county, hrr{delphi_agg_text}, msa{delphi_agg_text}, state, hhs{delphi_agg_text}, nation{delphi_agg_text} " ),
396- " hhs" = glue(" state, hhs{delphi_agg_text}, nation{delphi_agg_text} " ),
397- " hospital-admissions" = glue(" county{delphi_agg_text}, hrr{delphi_agg_text}, msa{delphi_agg_text}, state{delphi_agg_text}, hhs{delphi_agg_text}, nation{delphi_agg_text} " ),
398- " indicator-combination" = glue(" county{delphi_agg_text}, hrr{delphi_agg_text}, msa{delphi_agg_text}, state{delphi_agg_text}, hhs{delphi_agg_text}, nation{delphi_agg_text} " ),
399- " jhu-csse" = glue(" county, hrr{delphi_agg_text}, msa{delphi_agg_text}, state{delphi_agg_text}, hhs{delphi_agg_text}, nation{delphi_agg_text} " ),
400- " nchs-mortality" = glue( " state, nation " ) ,
484+ " chng" = glue(" hrr, msa, state, hhs, nation" ),
485+ " covid-act-now" = glue(" hrr, msa, state, hhs, nation" ),
486+ " doctor-visits" = glue(" hrr, msa, state, hhs, nation" ),
487+ " dsew-cpr" = glue(" nation" ),
488+ " fb-survey" = glue(" county, hrr, msa, state, nation" ),
489+ " ght" = glue(" hrr, msa" ),
490+ " google-survey" = glue(" county, hrr, msa, state" ),
491+ " google-symptoms" = glue(" hrr, msa, hhs, nation" ),
492+ " hhs" = glue(" hhs, nation" ),
493+ " hospital-admissions" = glue(" county, hrr, msa, state, hhs, nation" ),
494+ " indicator-combination" = glue(" county, hrr, msa, state, hhs, nation" ),
495+ " jhu-csse" = glue(" hrr, msa, state, hhs, nation" ),
496+ " nchs-mortality" = NA_character_ ,
401497 # Quidel non-flu signals
402- " quidel" = glue(" county{delphi_agg_text}, hrr{delphi_agg_text}, msa{delphi_agg_text}, state{delphi_agg_text}, hhs{delphi_agg_text}, nation{delphi_agg_text} " ),
403- " safegraph" = glue(" county{delphi_agg_text}, hrr{delphi_agg_text}, msa{delphi_agg_text}, state{delphi_agg_text}, hhs{delphi_agg_text}, nation{delphi_agg_text} " ),
404- " usa-facts" = glue(" county, hrr{delphi_agg_text}, msa{delphi_agg_text}, state{delphi_agg_text}, hhs{delphi_agg_text}, nation{delphi_agg_text} " ),
405- " youtube-survey" = " state{delphi_agg_text} "
498+ " quidel" = glue(" county, hrr, msa, state, hhs, nation" ),
499+ " safegraph" = glue(" county, hrr, msa, state, hhs, nation" ),
500+ " usa-facts" = glue(" hrr, msa, state, hhs, nation" ),
501+ " youtube-survey" = " state"
406502)
407503
408504# These are signal-specific geo lists. These are less common and are applied as a patch.
409- dsew_geos <- glue(" state, hhs, nation{delphi_agg_text} " )
410- fb_geos1 <- glue(" county{delphi_agg_text}, state{delphi_agg_text}, nation{delphi_agg_text} " )
411- fb_geos2 <- glue(" county{delphi_agg_text}, msa{delphi_agg_text}, state{delphi_agg_text}, nation{delphi_agg_text} " )
412- hosp_geos <- glue(" county{delphi_agg_text}, hrr{delphi_agg_text}, msa{delphi_agg_text}, state{delphi_agg_text} " )
413- combo_geos <- glue(" county{delphi_agg_text}, msa{delphi_agg_text}, state{delphi_agg_text} " )
414- quidel_geos <- glue(" msa{delphi_agg_text}, state{delphi_agg_text} " )
505+ dsew_geos <- glue(" nation" )
506+ fb_geos1 <- glue(" county, state, nation" )
507+ fb_geos2 <- glue(" county, msa, state, nation" )
508+ hosp_geos <- glue(" county, hrr, msa, state" )
509+ combo_geos <- glue(" county, msa, state" )
510+ quidel_geos <- glue(" msa, state" )
415511leftover_signal_geos_manual <- tibble :: tribble(
416512 ~ data_source , ~ signal , ~ geos_list ,
417513 " chng" , " 7dav_inpatient_covid" , " state" ,
@@ -470,10 +566,11 @@ source_updated <- left_join(
470566 source_updated , leftover_signal_geos_manual ,
471567 by = c(" Signal" = " signal" , " data_source" )
472568) %> %
473- mutate(`Available Geography` = coalesce(geos_list , `Available Geography` )) %> %
569+ mutate(`Delphi-Aggregated Geography` = coalesce(geos_list , `Delphi-Aggregated Geography` )) %> %
474570 select(- geos_list )
475571
476572
573+
477574# Temporal Scope Start
478575# Above. YYYY-MM-DD, with epiweeks as YYYY-WW. Formatted as a string
479576
0 commit comments