@@ -130,6 +130,9 @@ load_response_one <- function(input_filename, params, contingency_run) {
130130 Q80 = col_integer(),
131131 I5 = col_character(),
132132 I7 = col_character(),
133+ V1alt = col_character(),
134+ V15c = col_character(),
135+ P6 = col_character(),
133136 E2_1 = col_integer(),
134137 E2_2 = col_integer()
135138 ),
@@ -162,6 +165,8 @@ load_response_one <- function(input_filename, params, contingency_run) {
162165 assert(length(wave ) == 1 , " can only code one wave at a time" )
163166
164167 input_data <- module_assignment(input_data , wave )
168+ input_data <- experimental_arm_assignment(input_data , wave )
169+
165170 input_data <- bodge_v4_translation(input_data , wave )
166171 input_data <- bodge_C6_C8(input_data , wave )
167172 input_data <- bodge_B13(input_data , wave )
@@ -364,7 +369,8 @@ filter_data_for_aggregation <- function(df, params, lead_days = 12L)
364369 dplyr :: between(.data $ hh_number_sick , 0L , 30L ),
365370 dplyr :: between(.data $ hh_number_total , 1L , 30L ),
366371 .data $ hh_number_sick < = .data $ hh_number_total ,
367- .data $ day > = (as.Date(params $ start_date ) - lead_days )
372+ .data $ day > = (as.Date(params $ start_date ) - lead_days ),
373+ .data $ wave != 12.5 # Ignore experimental Wave 12 data
368374 )
369375
370376 msg_plain(paste0(" Finished filtering data for aggregations" ))
@@ -503,6 +509,28 @@ module_assignment <- function(input_data, wave) {
503509 return (input_data )
504510}
505511
512+ # ' Label arms of experimental Wave 12.
513+ # '
514+ # ' @param input_data data frame of responses, before subsetting to select
515+ # ' variables
516+ # ' @param wave integer indicating survey version
517+ # '
518+ # ' @return data frame with new `module` column
519+ # ' @importFrom dplyr case_when
520+ experimental_arm_assignment <- function (input_data , wave ) {
521+ if (wave == 12.5 ) {
522+ assert( " random_number_exp" %in% names(input_data ) )
523+ input_data $ w12_treatment <- case_when(
524+ input_data $ random_number_exp > = 0.6666 ~ 1 , # demographics placed after symptom items
525+ input_data $ random_number_exp > = 0.3333 ~ 2 , # demographics placed after vaccine items
526+ input_data $ random_number_exp < 0.3333 ~ 3 , # alternative wording to V1
527+ TRUE ~ NA_real_
528+ )
529+ }
530+
531+ return (input_data )
532+ }
533+
506534# ' Create dataset for sharing with research partners
507535# '
508536# ' Different survey waves may have different sets of questions. Here we report
@@ -511,11 +539,13 @@ module_assignment <- function(input_data, wave) {
511539# '
512540# ' @param input_data data frame of responses
513541# ' @param county_crosswalk crosswalk mapping ZIP5 to counties
542+ # ' @param params list containing `produce_individual_raceeth`, indicating
543+ # ' whether or not to issue microdata with race-ethnicity field
514544# ' @importFrom stringi stri_trim stri_replace_all
515545# ' @importFrom dplyr left_join group_by filter ungroup select rename
516546# '
517547# ' @export
518- create_complete_responses <- function (input_data , county_crosswalk )
548+ create_complete_responses <- function (input_data , county_crosswalk , params )
519549{
520550 cols_to_report <- c(
521551 " start_dt" , " end_dt" , " date" ,
@@ -541,9 +571,10 @@ create_complete_responses <- function(input_data, county_crosswalk)
541571 " B10c" , " B13" , " C18a" , " C18b" , " C7a" , " D12" , " E4" ,
542572 " G1" , " G2" , " G3" , " H1" , " H2" , " H3" , " I1" , " I2" , " I3" , " I4" , " I5" ,
543573 " I6_1" , " I6_2" , " I6_3" , " I6_4" , " I6_5" , " I6_6" , " I6_7" , " I6_8" ,
544- " I7" , " K1" , " K2" , " V11a" , " V12a" , " V15a" , " V15b" , " V16" , " V3a" , " module" , # added in Wave 11
574+ " I7" , " K1" , " K2" , " V11a" , " V12a" , " V15a" , " V15b" , " V16" , " V3a" , # added in Wave 11
575+ " V1alt" , " B13a" , " V15c" , " P1" , " P2" , " P3" , " P4" , " P5" , " P6" , # added in experimental Wave 12
545576
546- " raceethnicity" , " token" , " wave" , " UserLanguage" ,
577+ " raceethnicity" , " token" , " wave" , " w12_treatment " , " module " , " UserLanguage" ,
547578 " zip5" # temporarily; we'll filter by this column later and then drop it before writing
548579 )
549580
@@ -617,7 +648,10 @@ surveyID_to_wave <- Vectorize(function(surveyID) {
617648 " SV_ddjHkcYrrLWgM2V" = 7 ,
618649 " SV_ewAVaX7Wz3l0UqG" = 8 ,
619650 " SV_6PADB8DyF9SIyXk" = 10 ,
620- " SV_4VEaeffqQtDo33M" = 11 )
651+ " SV_4VEaeffqQtDo33M" = 11 ,
652+ " SV_3TL0r243mLkDzCK" = 12.5 , # experimental version of Wave 12
653+ " TBD finalized version" = 12 # finalized version of Wave 12
654+ )
621655
622656 if ( any(names(waves ) == surveyID ) ) {
623657 return (waves [[surveyID ]])
@@ -667,9 +701,11 @@ filter_complete_responses <- function(data_full, params)
667701 data_full <- select(data_full , - .data $ zip5 )
668702
669703 # 9 includes StartDatetime, EndDatetime, Date, token, wave, geo_id,
670- # UserLanguage + two questions (ignore raceethnicity field which may or may
671- # not exist, depending on params)
672- valid_row_filter <- rowSums( ! is.na(data_full [, names(data_full ) != " raceethnicity" ]) ) > = 9
704+ # UserLanguage + two questions (ignore raceethnicity, module, and
705+ # w12_assignment fields which may or may not exist, depending on params and
706+ # survey version)
707+ ignore_cols <- c(" raceethnicity" , " w12_assignment" , " module" )
708+ valid_row_filter <- rowSums( ! is.na(data_full [, ! (names(data_full ) %in% ignore_cols )]) ) > = 9
673709 data_full <- data_full [valid_row_filter , ]
674710
675711 return (data_full )
0 commit comments