diff --git a/DESCRIPTION b/DESCRIPTION index 45978d0..ab83089 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: qualcontrol Title: Quality Control -Version: 1.3.8 +Version: 1.3.8.9000 Authors@R: person("Vegard", "Lysne", , "vegard.lysne@helsedir.no", role = c("aut", "cre"), comment = c(ORCID = "0000-0002-0816-5075")) diff --git a/NEWS.md b/NEWS.md index 3e83922..00f6eef 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,3 +1,34 @@ +# qualcontrol (development version) + +## Bugfix and other changes +1. In `plot_timeseries()`, the input data is limited to rows with actual data, which was causing a bug if all geo was missing data on specific years. These years was omitted from the plots. + - Now the data is rectangularized on year to ensure all years are included. + - The mechanism to limit number of years plotted filtered out 2 more years than requested, which is now fixed. +2. `check_friskvik()` changes + - gains a comment column + - Implemented `fix_case_insensitive_match` to fix case-insensitive matching of column names (e.g. aar vs AAR) + - Implemented `read_friskvik_spec()` to read spec file if it exists + - When checking if cube file is in publication folder, ignore file extension. + - Include a check of whether the indicator is included on page 4 of the profiles ([issue #54](https://github.com/helseprofil/qualcontrol/issues/54)) + - UNGDATA-indicators excluded from periode_bm/nn-check +3. `plot_boxplot` + - More effective calculation of baseplotdata using collapse + - Calculates device size based on file with max number of panels, insted of always using the first file ([issue #53](https://github.com/helseprofil/qualcontrol/issues/53)) +4. Options are now checked (and updated) when the package is attached, not just when loaded ([issue #51](https://github.com/helseprofil/qualcontrol/issues/51)) +5. In `comparecube_summary`, new_prikk and expired_prikk are now only reported for SPVFLAGG, and based on 0 vs non-0 ([issue #41](https://github.com/helseprofil/qualcontrol/issues/41)) +6. `plot_timeseries_bydel` + - Optimized using the same strategy as other plotting functions. Unneccessary panel text is removed and caption added ([issue #47](https://github.com/helseprofil/qualcontrol/issues/47)). + - Strata with only 1 observations is filtered out ([issue #52](https://github.com/helseprofil/qualcontrol/issues/52)) +7. `aggregate_cube` now uses collapse, and keeps unique and max values of columns that cannot be aggregated. +8. `read_files` now collects naboprikk-columns into one 0|1-column, and adds missing censor-columns +9. `identify_coltypes` now collects censor-columns, and does not include `any_diffs`/`newrow`/`exprow` as values +10. New function `explore_different_censoring`, filtering out strata with differing censoring across the new and old file. +11. New function `diffvals_which_levels`, which identify the levels of a dimension which are different on a specific value column. Used to e.g. identify GEO-codes with TELLER/NEVNER-diffs. + +X. Code cleaning + - Removed all use of get/mget + - Implemented the env-argument in data.table for cleaner syntax + # qualcontrol 1.3.8 (2026-01-20) ## Changes diff --git a/R/attack-secondary-censoring.R b/R/attack-secondary-censoring.R index 327b021..605af86 100644 --- a/R/attack-secondary-censoring.R +++ b/R/attack-secondary-censoring.R @@ -8,14 +8,14 @@ attack_naboprikk <- function(cubefile = NULL, cubepath = NULL){ naboprikkdims <- grep("nabopr", names(specs), value = T) naboprikkdims <- setNames(naboprikkdims, toupper(gsub("Stata_nabopr([^_]+).*$", "\\1", naboprikkdims))) - out_pvern1 <- data.table::copy(d[0, ..dims]) + out_pvern1 <- data.table::copy(d[0, .SD, .SDcols = dims]) for(dim in names(naboprikkdims)){ restdims <- setdiff(dims, dim) triangles <- decode_triangles(dim = dim, naboprikkdims = naboprikkdims, specs = specs) for(i in 1:length(triangles)){ - d_subset <- d[get(dim) %in% triangles[[i]]] + d_subset <- d[x %in% triangles[[i]], env = list(x = dim)] pvern1 <- d_subset[, .(n_pvern = sum(pvern, na.rm = T), n_prikkok = sum(prikket_ok, na.rm = T)), by = restdims][, (dim) := paste0("{", paste(triangles[[i]], collapse = ","), "}")][n_pvern > 0 & n_prikkok == 1][, let(n_pvern = NULL, n_prikkok = NULL)] if(ncol(pvern1) > 0) out_pvern1 <- data.table::rbindlist(list(out_pvern1, pvern1), use.names = T) } diff --git a/R/barometer.R b/R/barometer.R index d9d1839..d86b66b 100644 --- a/R/barometer.R +++ b/R/barometer.R @@ -91,10 +91,10 @@ check_barometer <- function(type = c("FHP", "OVP"), } cat("... \n") withVar <- c("Aar", indV1) - bar[ind, (withVar) := mget(withVar), on = c(stedskode_string = "Sted_kode", LPnr = "LPnr")] + bar <- collapse::join(bar, ind, on = c(stedskode_string = "Sted_kode", LPnr = "LPnr"), verbose = 0, overid = 2)[, .SD, .SDcols = c(names(bar), withVar)] verdiCol <- ifelse(geo == "fylke", "Verdi_mellomGeonivaa", "Verdi_lavesteGeonivaa") - outDT <- bar[get(verdiCol) == Verdi_referansenivaa, ][ + outDT <- bar[x == Verdi_referansenivaa, env = list(x = verdiCol)][ !is.na(roede) | !is.na(groenne) | !is.na(hvitMprikk)] return(outDT) diff --git a/R/check-censoring.R b/R/check-censoring.R index 0a5c3f3..0a21b76 100644 --- a/R/check-censoring.R +++ b/R/check-censoring.R @@ -40,7 +40,7 @@ check_censoring <- function(dt = newcube){ } else { cat(paste0("\nTELLER variable controlled: ", tellerval)) cat(paste0("\nCriteria: No values <= ", lim_teller)) - notcensored_teller <- dt[SPVFLAGG == 0 & get(tellerval) <= lim_teller] + notcensored_teller <- dt[SPVFLAGG == 0 & x <= lim_teller, env = list(x = tellerval)] } if(!is.null(notcensored_teller)){ @@ -61,7 +61,7 @@ check_censoring <- function(dt = newcube){ } else { cat(paste0("\nNEVNER variable controlled: ", nevnerval)) cat(paste0("\nCriteria: No values <= ", lim_nevner)) - notcensored_nevner <- dt[SPVFLAGG == 0 & get(nevnerval) <= lim_nevner] + notcensored_nevner <- dt[SPVFLAGG == 0 & x <= lim_nevner, env = list(x = nevnerval)] } if(!is.null(notcensored_nevner)){ @@ -75,6 +75,37 @@ check_censoring <- function(dt = newcube){ } } +#' @title explore_different_censoring +#' @description filters out strata with different censoring and prints out censor-info columns to identify the reason for differences. +#' @param compare comparecube +#' @param dt_new newcube +#' @param dt_old oldcube +#' @export +explore_different_censoring <- function(compare = comparecube, dt_new = newcube, dt_old = oldcube){ + if(is.null(dt_old)){ + message("No old cube, check not possible") + return(invisible(NULL)) + } + colinfo <- identify_coltypes(dt_new, dt_old) + dims <- colinfo$commondims + diffs <- compare[(SPVFLAGG_new == 0 & SPVFLAGG_old > 0) | (SPVFLAGG_new > 0 & SPVFLAGG_old == 0), .SD, .SDcols = c(dims, "SPVFLAGG_new", "SPVFLAGG_old")] + colorder <- names(diffs) + censor_new <- colinfo$censor.new + + diffs <- collapse::join(diffs, dt_new, on = dims, overid = 2, verbose = 0)[, .SD, .SDcols = c(names(diffs), censor_new)] + data.table::setnames(diffs, censor_new, paste0(censor_new, "_new")) + + censor_old <- colinfo$censor.old + if(length(censor_old) > 0){ + diffs <- collapse::join(diffs, dt_old, on = dims, overid = 2, verbose = 0)[, .SD, .SDcols = c(names(diffs), censor_old)] + data.table::setnames(diffs, censor_old, paste0(censor_old, "_old")) + for(col in intersect(censor_new, censor_old)) colorder <- c(colorder, paste0(col, c("_new", "_old"))) + } + + data.table::setcolorder(diffs, colorder) + return(diffs) +} + #' @@title compare_censoring #' @description #' Calculate number of censored observations and calculate absolute and relative difference @@ -168,8 +199,8 @@ compare_censoring_timeseries <- function(cube.new = newcube, } if(!is.null(cube.old)){ - d <- data.table::rbindlist(list(data.table::copy(cube.new)[!grepl("99$", GEO), mget(colinfo$commoncols)][, cube := "New"], - data.table::copy(cube.old)[!grepl("99$", GEO), mget(colinfo$commoncols)][, cube := "Old"])) + d <- data.table::rbindlist(list(data.table::copy(cube.new)[!grepl("99$", GEO), .SD, .SDcols = colinfo$commoncols][, cube := "New"], + data.table::copy(cube.old)[!grepl("99$", GEO), .SD, .SDcols = colinfo$commoncols][, cube := "Old"])) groupdims <- grep("^AAR$", c(colinfo$commondims), invert = T, value = T) d <- d[, .(N_censored = sum(SPVFLAGG != 0, na.rm = T)), by = c("cube", groupdims)] diff --git a/R/check-comparecube.R b/R/check-comparecube.R index f5eca5e..33f08c8 100644 --- a/R/check-comparecube.R +++ b/R/check-comparecube.R @@ -9,8 +9,7 @@ #' #' @return a DT output table #' @export -comparecube_summary <- function(dt = comparecube, - save = TRUE){ +comparecube_summary <- function(dt = comparecube, save = TRUE){ if(is.null(dt)){ cat("comparecube is NULL, no check performed") return(invisible(NULL)) @@ -129,8 +128,7 @@ plot_diff_timetrends <- function(dt = comparecube, d <- data.table::copy(dt[newrow == 0 & SPVFLAGG_new == 0 & SPVFLAGG_old == 0]) |> translate_geoniv() - d[, let(Absolute = get(diffval), - Relative = get(reldiffval))] + d[, let(Absolute = x, Relative = y), env = list(x = diffval, y = reldiffval)] d <- data.table::melt(d, measure.vars = c("Absolute", "Relative"))[, .(GEOniv, AAR, variable, value)] allyears <- get_all_combinations(d, c("GEOniv", "AAR", "variable")) d <- d[!(variable == "Absolute" & value == 0 | variable == "Relative" & value == 1)] @@ -147,6 +145,28 @@ plot_diff_timetrends <- function(dt = comparecube, } } +#' @title diffvals_which_levels +#' @description Prints a list of unique values of a dimensions with diffs of a specified value +#' @param dt comparecube +#' @param dim dimension to list levels with diffs +#' @param val value column to identify diffs +#' @export +diffvals_which_levels <- function(dt = comparecube, dim = "GEO", val = "sumTELLER_uprikk"){ + if(is.null(dt)){ + cat("comparecube is NULL, no check performed") + return(invisible(NULL)) + } + colinfo <- identify_coltypes(comparecube) + available_vals <- unique(sub("_new$|_old$", "", grep("_diff$|_reldiff$", colinfo$vals.new, invert = T, value = T))) + + if(dim %notin% colinfo$dims.new) stop("dim må være en dimensjon i filen: ", paste(colinfo$dims.new, collapse = ", ")) + if(val %notin% available_vals) stop("val må være en verdikolonne i filen: ", paste(available_vals, collapse = ", ")) + + diffval <- paste0(val, "_diff") + cat("Unike verdier for ", dim, " som har differ for ", val, ":\n") + qc_round(dt)[any_diffs == 1 & x != 0, unique(y), env = list(x = diffval, y = dim)] +} + ## ---- HELPER FUNCTIONS ---- #' @title summarise_diffvals @@ -169,10 +189,15 @@ summarise_diffvals <- function(out, reldiff <- paste0(value, "_reldiff") calculate_reldiff <- reldiff %in% names(subset) - identical <- subset[get(diff) == 0, .N] - different <- subset[get(diff) != 0, .N] - newprikk <- subset[is.na(get(new)) & !is.na(get(old)), .N] - expprikk <- subset[!is.na(get(new)) & is.na(get(old)), .N] + identical <- subset[x == 0, .N, env = list(x = diff)] + different <- subset[x != 0, .N, env = list(x = diff)] + + newprikk <- expprikk <- NA_real_ + if(value == "SPVFLAGG"){ + newprikk <- subset[x > 0 & y == 0, .N, env = list(x = new, y = old)] + expprikk <- subset[x == 0 & y > 0, .N, env = list(x = new, y = old)] + } + out[GEOniv == geolevel & Value == value, let(Identical = identical, Different = different, @@ -180,7 +205,7 @@ summarise_diffvals <- function(out, Expired_prikk = expprikk)] if(different > 0){ - diffdata <- subset[get(diff) != 0 & !is.na(get(new)) & !is.na(get(old))] + diffdata <- subset[x != 0 & !is.na(y) & !is.na(z), env = list(x = diff, y = new, z = old)] out[GEOniv == geolevel & Value == value, let(Mean_diff = round(mean(diffdata[[diff]], na.rm = T), 3), Min_diff = round(min(diffdata[[diff]], na.rm = T), 3), Max_diff = round(max(diffdata[[diff]], na.rm = T), 3))] diff --git a/R/check-geolevel.R b/R/check-geolevel.R index ae9f687..2716d8c 100644 --- a/R/check-geolevel.R +++ b/R/check-geolevel.R @@ -54,11 +54,11 @@ compare_geolevels <- function(dt = newcube, by <- c("GEOniv", groupdims) # Estimate sum and diffs - d <- d[, .("sum" = collapse::fsum(get(teller_val))), keyby = by] + d <- d[, .("sum" = sum(x)), by = by, env = list(x = teller_val)] d <- data.table::dcast(d, ... ~ GEOniv, value.var = "sum") data.table::setcolorder(d, c(groupdims, outcols[1], outcols[2])) - d[, let(Absolute = get(outcols[1])-get(outcols[2]), - Relative = round(get(outcols[1])/get(outcols[2]), 3))] + d[, let(Absolute = GEOh-GEOl, Relative = round(GEOh/GEOl, 3)), + env = list(GEOh = outcols[[1]], GEOl = outcols[[2]])] # Format output d[, (groupdims) := lapply(.SD, as.factor), .SDcols = groupdims] @@ -93,6 +93,11 @@ unknown_bydel <- function(dt = newcube, return(invisible(NULL)) } + # Filter relevant rows + d <- d[GEO %in% c(301, 1103, 4601, 5001) | GEOniv == "B"] + contains_bydel <- d[GEOniv == "B" & SPVFLAGG == 0, unique(AAR)] + d <- d[AAR %in% contains_bydel] + cubefile <- get_cubefilename(dt) savepath <- get_table_savefolder(get_cubename(dt)) suffix <- paste0("unknown_bydel_", type) @@ -110,12 +115,9 @@ unknown_bydel <- function(dt = newcube, return(invisible(NULL)) } - # Filter and format data - d <- d[GEO %in% c(301, 1103, 4601, 5001) | GEOniv == "B"] - contains_bydel <- d[GEOniv == "B" & SPVFLAGG == 0, unique(AAR)] - d <- d[AAR %in% contains_bydel] + # Format data add_kommune(d) - d <- d[, mget(c("KOMMUNE", "GEOniv", colinfo$dims.new, targets, "SPVFLAGG"))] + d <- d[, .SD, .SDcols = c("KOMMUNE", "GEOniv", colinfo$dims.new, targets, "SPVFLAGG")] d <- data.table::melt(d, measure.vars = targets, variable.name = "TARGET") d <- get_complete_strata(d, c("KOMMUNE", "TARGET", bydims), type = type, "value") @@ -129,7 +131,7 @@ unknown_bydel <- function(dt = newcube, d <- data.table::dcast(d, ... ~ GEOniv, value.var = "sum") d[, UNKNOWN := round(100*(1 - B/K), 2)] d[B == 0 & K == 0, UNKNOWN := NA_real_] - d <- d[order(-UNKNOWN)] + data.table::setorder(d, -UNKNOWN, na.last = T) data.table::setcolorder(d, c("KOMMUNE", bydims, "TARGET", "K", "B", "UNKNOWN")) data.table::setnames(d, c("K", "B", "UNKNOWN"), c("Kommune", "Bydel", "UNKNOWN, %")) diff --git a/R/friskvik.R b/R/friskvik.R index 6b89fff..e81d555 100644 --- a/R/friskvik.R +++ b/R/friskvik.R @@ -22,8 +22,8 @@ check_friskvik <- function(profile = c("FHP", "OVP"), on.exit(RODBC::odbcClose(con), add = TRUE) paths <- friskvik_create_path(profile = profile, geolevel = geolevel, profileyear = profileyear, test = test) friskvikfiles <- list.files(paths$godkjent, pattern = ".csv") - outcols <- c("Friskvik", "Kube", "ALT_OK", "FIL_I_STATBANK", "FRISKVIK_ETAB", "KUBE_KJONN", "KUBE_ALDER", "KUBE_UTDANN", "KUBE_INNVKAT", "KUBE_LANDBAK", - "FRISKVIK_AAR", "SISTE_AAR", "Periode_bm", "Periode_nn", "IDENTISK_PRIKKING", "MATCHER_KOLONNE", "Different_kubecol", "Enhet", "REFVERDI_VP", "VALID_kombo") + outcols <- c("Friskvik", "Kube", "ALT_OK", "FIL_I_STATBANK", "SIDE 1", "SIDE 4", "FRISKVIK_ETAB", "KUBE_KJONN", "KUBE_ALDER", "KUBE_UTDANN", "KUBE_INNVKAT", "KUBE_LANDBAK", + "FRISKVIK_AAR", "SISTE_AAR", "Periode_bm", "Periode_nn", "IDENTISK_PRIKKING", "MATCHER_KOLONNE", "Different_kubecol", "Enhet", "REFVERDI_VP", "VALID_standardisering_kombo", "Kommentar") out_format <- data.table::setDT(as.list(setNames(rep(NA_character_, length(outcols)), outcols))) out <- data.table::copy(out_format[0, ]) @@ -34,37 +34,58 @@ check_friskvik <- function(profile = c("FHP", "OVP"), tryload <- try(friskvik_read_file(filename = file, geolevel = geolevel, profile = profile, profileyear = profileyear, friskvikpath = paths$godkjent, con = con), silent = T) if(!("try-error" %in% class(tryload))){ + FRISKVIK <- .GlobalEnv$datasets$FRISKVIK + KUBE <- .GlobalEnv$datasets$KUBE + SPEC <- .GlobalEnv$datasets$SPEC + indikatornavn <- sub("(.*)(_\\d{4}-\\d{2}-\\d{2}-\\d{2}-\\d{2}.*)", "\\1", file) newline[["Kube"]] <- attributes(KUBE)$Filename newline[["FIL_I_STATBANK"]] <- friskvik_in_publication(KUBE, profileyear) + newline[["SIDE 1"]] <- friskvik_read_access(con, "Side1", "FRISKVIK", indikatornavn, profile, geolevel, profileyear) + newline[["SIDE 4"]] <- friskvik_read_access(con, "Side4", "FRISKVIK", indikatornavn, profile, geolevel, profileyear) newline[["FRISKVIK_ETAB"]] <- friskvik_unique_level(FRISKVIK, "ETAB") - newline[["KUBE_KJONN"]] <- friskvik_unique_level(KUBE, "KJONN") - newline[["KUBE_ALDER"]] <- friskvik_unique_level(KUBE, "ALDER") - newline[["KUBE_UTDANN"]] <- friskvik_unique_level(KUBE, "UTDANN") - newline[["KUBE_INNVKAT"]] <- friskvik_unique_level(KUBE, "INNVKAT") - newline[["KUBE_LANDBAK"]] <- friskvik_unique_level(KUBE, "LANDBAK") newline[["FRISKVIK_AAR"]] <- friskvik_unique_level(FRISKVIK, "AAR") - newline[["SISTE_AAR"]] <- friskvik_last_year() - Periode_bm <- friskvik_read_access(con, "Periode_bm", "FRISKVIK", indikatornavn, profile, geolevel, profileyear) - newline[["Periode_bm"]] <- ifelse(length(Periode_bm) == 0 || is.na(Periode_bm), "!! empty", Periode_bm) Periode_nn <- friskvik_read_access(con, "Periode_nn", "FRISKVIK", indikatornavn, profile, geolevel, profileyear) + newline[["Periode_bm"]] <- ifelse(length(Periode_bm) == 0 || is.na(Periode_bm), "!! empty", Periode_bm) newline[["Periode_nn"]] <- ifelse(length(Periode_nn) == 0 || is.na(Periode_nn), "!! empty", Periode_nn) - - newline[["IDENTISK_PRIKKING"]] <- friskvik_compare_prikk() - compvals <- friskvik_compare_val() - newline[["MATCHER_KOLONNE"]] <- compvals$matches - newline[["Different_kubecol"]] <- compvals$different - + isungdata <- grepl("UNGDATA", newline[["Kube"]]) + if(isungdata) newline[["Periode_bm"]] <- newline[["Periode_nn"]] <- "-" ENHET <- friskvik_read_access(con, "Enhet", "FRISKVIK", indikatornavn, profile, geolevel, profileyear) newline[["Enhet"]] <- ifelse(length(ENHET) == 0 || is.na(ENHET), "!!MISSING", ENHET) - REFVERDI_VP <- SPEC[Kolonne == "REFVERDI_VP", Innhold] - newline[["REFVERDI_VP"]] <- ifelse(length(REFVERDI_VP) == 0 || is.na(REFVERDI_VP), "!! MISSING from SPECS-file", REFVERDI_VP) - isAK <- grepl("\\([ak,]+\\)", newline[["Enhet"]]) - isPD <- newline[["REFVERDI_VP"]] %in% c("P", "D") - isMEIS <- "MEIS" %in% newline[["MATCHER_KOLONNE"]] - newline[["VALID_kombo"]] <- ifelse(all(isAK, isPD, isMEIS) | !(any(isAK, isPD, isMEIS)), "Yes", "!! No!!") + + if(nrow(KUBE) > 0){ + newline[["KUBE_KJONN"]] <- friskvik_unique_level(KUBE, "KJONN") + newline[["KUBE_ALDER"]] <- friskvik_unique_level(KUBE, "ALDER") + newline[["KUBE_UTDANN"]] <- friskvik_unique_level(KUBE, "UTDANN") + newline[["KUBE_INNVKAT"]] <- friskvik_unique_level(KUBE, "INNVKAT") + newline[["KUBE_LANDBAK"]] <- friskvik_unique_level(KUBE, "LANDBAK") + newline[["SISTE_AAR"]] <- friskvik_last_year(data1 = FRISKVIK, data2 = KUBE) + + # Filtrer KUBE til å bare inneholde samme år som i friskvik + KUBE <- KUBE[AAR %in% unique(FRISKVIK$AAR)] + isequal <- KUBE[,.N] == FRISKVIK[, .N] + if(!isequal) newline[["Kommentar"]] <- "Klarer ikke filtrere KUBE til å matche FRISKVIK, kan skyldes 'rare' kolonnenavn. Noen sjekker ikke mulig." + newline[["IDENTISK_PRIKKING"]] <- friskvik_compare_prikk(data1 = FRISKVIK, data2 = KUBE) + compvals <- friskvik_compare_val(data1 = FRISKVIK, data2 = KUBE) + newline[["MATCHER_KOLONNE"]] <- compvals$matches + newline[["Different_kubecol"]] <- compvals$different + REFVERDI_VP <- ifelse(nrow(SPEC) > 0, SPEC[Kolonne == "REFVERDI_VP", Innhold], "SPEC finnes ikke") + newline[["REFVERDI_VP"]] <- ifelse(length(REFVERDI_VP) == 0 || is.na(REFVERDI_VP), "!! MISSING from SPECS-file", REFVERDI_VP) + + if(newline$REFVERDI_VP %in% c("V", "P", "D") && !grepl("!!", newline$MATCHER_KOLONNE)){ + isAK <- grepl("\\([ak,]+\\)", newline[["Enhet"]]) + isPD <- newline[["REFVERDI_VP"]] %in% c("P", "D") + isMEIS <- "MEIS" %in% newline[["MATCHER_KOLONNE"]] + newline[["VALID_standardisering_kombo"]] <- ifelse(all(isAK, isPD, isMEIS) | !(any(isAK, isPD, isMEIS)), "Ja", "!! NEI !!") + } else { + newline[["VALID_standardisering_kombo"]] <- "Vurdering ikke mulig" + } + } else{ + newline[["Kommentar"]] <- "Ingen rader i filtrert KUBE, kan skyldes 'rare' kolonnenavn. Noen sjekker ikke mulig." + } } + rm(tryload) out <- data.table::rbindlist(list(out, newline)) } @@ -75,8 +96,9 @@ check_friskvik <- function(profile = c("FHP", "OVP"), grepl("!!", Periode_nn) | grepl("!!", IDENTISK_PRIKKING) | grepl("!!", MATCHER_KOLONNE) | - grepl("!!", VALID_kombo), - ALT_OK := "!! KANSKJE IKKE !!"] + grepl("!!", VALID_standardisering_kombo), + ALT_OK := "!! KANSKJE IKKE, utslag på noen tester !!"] + out[!is.na(Kommentar), ALT_OK := "!! KANSKJE IKKE, se kommentar !!"] assign(paste("FRISKVIKSJEKK",profile,geolevel, sep = "_"), out, envir = .GlobalEnv) @@ -94,13 +116,8 @@ check_friskvik <- function(profile = c("FHP", "OVP"), #' @param geolevel One of "B", "K", or "F" #' @param profileyear 4-digit profileyear #' @param friskvikpath can provide full path, defaults to NULL -friskvik_read_file <- function(filename = NULL, - profile = NULL, - geolevel = NULL, - profileyear = NULL, - friskvikpath = NULL, - con = NULL){ - clean_friskvik_environment() +friskvik_read_file <- function(filename = NULL, profile = NULL, geolevel = NULL, profileyear = NULL, friskvikpath = NULL, con = NULL){ + .GlobalEnv[["datasets"]] <- list() if(is.null(filename)) stop("file not selected") if(is.null(friskvikpath)){ if(is.null(profile)) stop("profile must be provided") @@ -118,15 +135,14 @@ friskvik_read_file <- function(filename = NULL, if(length(friskvikfile) > 1) stop("> 1 FRISKVIK files with the same name identified", cat(friskvikfile, sep = "\n")) friskvikfilepath <- file.path(friskvikpath, friskvikfile) friskvikdatetag <- sub(".*(\\d{4}-\\d{2}-\\d{2}-\\d{2}-\\d{2})(.csv$)", "\\1", friskvikfilepath) - FRISKVIK <<- read_friskvik(path = friskvikfilepath) + FRISKVIK <- read_friskvik(path = friskvikfilepath) correctcube <- friskvik_read_access(con, "KUBE_NAVN", "FRISKVIK", friskvikindikator, profile, geolevel, profileyear) cubefilepath <- get_cube_path(datetag = friskvikdatetag, correctcube = correctcube) specfilepath <- get_specfile_path(datetag = friskvikdatetag, correctcube = correctcube) KUBE <- read_friskvik_cube(path = cubefilepath) - SPEC <<- data.table::fread(specfilepath) - cat(paste0("\nSPEC loaded: ", basename(specfilepath))) + SPEC <- read_friskvik_spec(path = specfilepath) colinfo <- identify_coltypes(FRISKVIK, KUBE) KUBE <- filter_cube_to_friskvik(cube = KUBE, friskvik = FRISKVIK, colinfo = colinfo, friskvikindikator = friskvikindikator) @@ -134,28 +150,44 @@ friskvik_read_file <- function(filename = NULL, data.table::setkeyv(KUBE, colinfo$commondims) data.table::setkeyv(FRISKVIK, colinfo$commondims) - KUBE <<- KUBE + .GlobalEnv[["datasets"]][["FRISKVIK"]] <- FRISKVIK + .GlobalEnv[["datasets"]][["KUBE"]] <- KUBE + .GlobalEnv[["datasets"]][["SPEC"]] <- SPEC } -clean_friskvik_environment <- function(){ - .GlobalEnv[["FRISKVIK"]] <- NULL - .GlobalEnv[["KUBE"]] <- NULL - .GlobalEnv[["SPEC"]] <- NULL -} +# clean_friskvik_environment <- function(){ +# .GlobalEnv[["datasets"]] <- NULL +# .GlobalEnv[["FRISKVIK"]] <- NULL +# .GlobalEnv[["KUBE"]] <- NULL +# .GlobalEnv[["SPEC"]] <- NULL +# } read_friskvik <- function(path){ file <- data.table::fread(path) + fix_case_insensitive_match(file = file) data.table::setattr(file, "Filename", basename(path)) cat(paste0("\nFRISKVIK loaded: ", sub("(.*KUBER/)(.*)", "\\2", path))) return(file) } +fix_case_insensitive_match <- function(file){ + friskvikcols <- getOption("qualcontrol.friskvikcols") + wrongnames <- names(file)[!names(file) %in% friskvikcols] + if(length(wrongnames) == 0) return(invisible(NULL)) + ci_idx <- match(toupper(wrongnames), toupper(friskvikcols)) + ci_match <- wrongnames[!is.na(ci_idx)] + if(length(ci_match) == 0) return(invisi) + ci_correct <- friskvikcols[ci_idx[!is.na(ci_idx)]] + data.table::setnames(file, ci_match, ci_correct) +} + read_friskvik_cube <- function(path){ if(length(path) < 1) stop("corresponding KUBE file not found, check arguments") if(length(path) > 1) stop("> 1 KUBE files with the same name and dato tag identified", cat(path, sep = "\n")) if(grepl(".parquet$", path)){ KUBE <- data.table::setDT(arrow::read_parquet(path)) + KUBE[, GEO := as.integer(GEO)] } else if (grepl(".csv$", path)){ KUBE <- data.table::fread(path) } @@ -165,6 +197,17 @@ read_friskvik_cube <- function(path){ return(KUBE) } +read_friskvik_spec <- function(path){ + if(length(path) < 1){ + SPEC <- data.table::data.table() + cat(paste0("\nSPEC not found")) + } else { + SPEC <- data.table::fread(path) + cat(paste0("\nSPEC loaded: ", basename(path))) + } + return(SPEC) +} + get_cube_path <- function(datetag, correctcube){ base <- file.path(getOption("qualcontrol.root"), getOption("qualcontrol.cubefiles")) kubepath <- file.path(base, "DATERT/parquet") @@ -184,12 +227,17 @@ get_specfile_path <- function(datetag, correctcube){ return(path) } +#' @title filter_cube_to_friskvik +#' @description +#' Filters KUBE file to match FRISKVIK on all dimensions except AAR +#' @noRd filter_cube_to_friskvik <- function(cube, friskvik, colinfo, friskvikindikator){ - friskvikindikator <- - ETAB <- friskvik[, unique(ETAB)] - if(!is.na(ETAB)) cube <- cube[eval(parse(text = ETAB))] + if("ETAB" %in% names(friskvik)){ + ETAB <- friskvik[, unique(ETAB)] + if(!is.na(ETAB)) cube <- cube[eval(parse(text = ETAB))] + } filtercols <- grep("^AAR$", colinfo$commondims, invert = T, value = T) - for(i in filtercols) cube <- cube[get(i) %in% friskvik[, unique(get(i))]] + for(i in filtercols) cube <- cube[x %in% unique(friskvik[[i]]), env = list(x = i)] if("INNVKAT" %in% colinfo$expdims) cube <- cube[INNVKAT == 0] if("LANDBAK" %in% colinfo$expdims){ @@ -210,17 +258,16 @@ filter_cube_to_friskvik <- function(cube, friskvik, colinfo, friskvikindikator){ #' @noRd #' @param data1 FRISKVIK #' @param data2 KUBE -friskvik_last_year <- function(data1 = FRISKVIK, - data2 = KUBE){ +friskvik_last_year <- function(data1 = FRISKVIK, data2 = KUBE){ if(length(data1[, unique(AAR)]) > 1){ lastyear <- max(data1[, unique(AAR)]) - out <- data.table::fcase(lastyear == max(data2[, unique(AAR)]), "Yes", - default = "!!NO!!") + out <- data.table::fcase(lastyear == max(data2[, unique(AAR)]), "Ja", + default = "!! NEI !!") } else if(data1[, unique(AAR)] == max(data2[, unique(AAR)])){ - out <- "Yes" + out <- "Ja" } else { - out <- "!!NO!!" + out <- "!! NEI !!" } return(out) @@ -231,18 +278,15 @@ friskvik_last_year <- function(data1 = FRISKVIK, #' @noRd #' @param data1 FRISKVIK #' @param data2 KUBE -friskvik_compare_prikk <- function(data1 = FRISKVIK, - data2 = KUBE){ - - # Only include years included in FRISKVIK - data2 <- data2[AAR %in% data1[, unique(AAR)]] +friskvik_compare_prikk <- function(data1 = FRISKVIK,data2 = KUBE){ + if(data2[, .N] != data1[, .N]) return("Kan ikke beregnes") # Compare values censored in FRISKVIK with values censored in KUBE if(isTRUE(all.equal(is.na(data1$MEIS), data2[, SPVFLAGG > 0]))){ - "Yes" + "Ja" } else { geodiff <- data1[is.na(data1$MEIS) != data2[, SPVFLAGG > 0], GEO] - paste("!!NO!! Diff for GEO:", paste0(geodiff, collapse = ", ")) + paste("!! NEI !! Diff for GEO:", paste0(geodiff, collapse = ", ")) } } @@ -253,9 +297,6 @@ friskvik_compare_prikk <- function(data1 = FRISKVIK, #' @param data2 KUBE friskvik_compare_val <- function(data1 = FRISKVIK, data2 = KUBE){ - # Only include years included in FRISKVIK - data2 <- data2[AAR %in% data1[, unique(AAR)]] - # Find value columns in KUBE colinfo <- identify_coltypes(data1, data2) kubevals <- grep("RATE.n|SPVFLAGG", colinfo$vals.old, invert = TRUE, value = T) @@ -263,9 +304,14 @@ friskvik_compare_val <- function(data1 = FRISKVIK, data2 = KUBE){ matches <- character() different <- character() + if(data1[,.N] != data2[, .N]){ + matches <- different <- "Kan ikke beregnes" + return(list(matches = matches, different = different)) + } + # Map over value columns in KUBE, find the column(s) matching FRISKVIK$MEIS for(i in kubevals){ - if(isTRUE(all.equal(data1$MEIS, data2[, get(i)]))){ + if(isTRUE(all.equal(data1$MEIS, data2[[i]]))){ matches <- c(matches, i) } else { different <- c(different, i) @@ -327,7 +373,9 @@ friskvik_create_path <- function(profile, geolevel, profileyear, test = FALSE){ friskvik_in_publication <- function(file, year){ pub_folder <- paste0("STATBANK/STATBANK_", year) pub_folder <- file.path(getOption("qualcontrol.root"),getOption("qualcontrol.files"), pub_folder) - attributes(KUBE)$Filename %in% list.files(pub_folder, pattern = ".csv") + pub_files <- sub(pattern = "\\.csv$|\\.parquet$", "", list.files(pub_folder, pattern = "\\.csv$|\\.parquet$")) + cubename <- sub(pattern = "\\.csv$|\\.parquet$", "", attributes(.GlobalEnv[["datasets"]][["KUBE"]])$Filename) + cubename %in% pub_files } #' @title friskvik_unique_level @@ -335,7 +383,7 @@ friskvik_in_publication <- function(file, year){ #' @noRd friskvik_unique_level <- function(data, dim = NULL){ if(!dim %in% names(data)) return(NA_character_) - paste(data[, unique(get(dim))], collapse = ", ") + paste(unique(data[[dim]]), collapse = ", ") } #' @title friskvik_read_access diff --git a/R/make_comparecube.R b/R/make_comparecube.R index ab31c25..b8396c4 100644 --- a/R/make_comparecube.R +++ b/R/make_comparecube.R @@ -107,10 +107,7 @@ exclude_lks_for_compare <- function(dt){ #' @examples #' # flag_rows(cube.new, cube.old, colinfo, "newrow") #' # flag_rows(cube.new, cube.old, colinfo, "exprow") -flag_rows <- function(cube.new, - cube.old = NULL, - colinfo, - flag = c("newrow", "exprow")){ +flag_rows <- function(cube.new, cube.old = NULL, colinfo, flag = c("newrow", "exprow")){ flag <- match.arg(flag) if(flag == "newrow" & is.null(cube.new)) stop("cube.new cannot be NULL when flag = 'newrow'") @@ -123,24 +120,12 @@ flag_rows <- function(cube.new, ref <- switch(flag, newrow = data.table::copy(cube.old), exprow = data.table::copy(cube.new)) - new <- switch(flag, - newrow = colinfo$newdims, - exprow = colinfo$expdims) dt[, (flag) := 0L] for(dim in colinfo$commondims){ flaglevels <- unique(ref[[dim]]) - dt[get(dim) %notin% flaglevels, (flag) := 1L] - } - - for(dim in new){ - total <- find_total(dt, dim) - if(!is.na(total)){ - dt[get(dim) != total & get(flag) == 0, (flag) := 1L] - } else { - dt[get(flag) == 0, (flag) := 1L] - } + dt[x %notin% flaglevels, (flag) := 1L, env = list(x = dim)] } return(dt) @@ -156,11 +141,10 @@ flag_rows <- function(cube.new, #' @param cube data file #' @param outlierval Which value is used to detect outliers? Selected with [qualcontrol::select_outlier_pri()] #' @return cube with outlier information -flag_outliers <- function(cube, - outlierval){ +flag_outliers <- function(cube, outlierval){ dt <- data.table::copy(cube) split_kommuneniv(dt) - dims <- grep("^AAR$", names(dt)[names(dt) %in% getOption("qualcontrol.alldimensions")], invert = T, value = T) + dims <- setdiff(names(dt)[names(dt) %in% getOption("qualcontrol.alldimensions")], "AAR") keyvars <- c(dims, "AAR") data.table::setkeyv(dt, keyvars) @@ -187,19 +171,17 @@ flag_outliers <- function(cube, #' @param dt data file ordered by #' @param val value column to calculate change from #' @param by dimensions to group the change value by -add_changeval <- function(dt, - val, - by){ +add_changeval <- function(dt, val, by){ if(length(unique(dt$AAR)) < 2) return(dt) if(!all.equal(data.table::key(dt), c(by, "AAR"))) data.table::setkeyv(dt, c(by, "AAR")) changevalue <- paste0("change_", val) - min_nonzeroval <- dt[get(val) != 0, collapse::fmin(get(val))] + min_nonzeroval <- collapse::fmin(data.table::fifelse(dt[[val]] != 0, dt[[val]], NA_real_)) g <- collapse::GRP(dt, by) dt[, (changevalue) := collapse::flag(dt[[val]], g = g)] - dt[, (changevalue) := zoo::na.locf(get(changevalue), na.rm = F), by = by] - dt[get(changevalue) == 0, (changevalue) := min_nonzeroval/2] - dt[, (changevalue) := 100*(get(val)/get(changevalue)-1)] + dt[, (changevalue) := zoo::na.locf(x, na.rm = F), by = by, env = list(x = changevalue)] + dt[x == 0, x := min_nonzeroval/2, env = list(x = changevalue)] + dt[, y := 100*(x/y-1), env = list(x = val, y = changevalue)] return(dt) } @@ -220,7 +202,6 @@ add_outlier <- function(dt, val, by, change = FALSE){ if(isTRUE(change) && length(unique(dt$AAR)) < 2) return(dt) if(isTRUE(change)) val <- paste0("change_", val) - missgeoniv <- dt[is.na(GEOniv)] dt <- dt[!is.na(GEOniv)] by <- sub("^GEO$", "GEOniv", by) @@ -253,9 +234,8 @@ add_outlier <- function(dt, val, by, change = FALSE){ } dt <- collapse::join(dt, cutoffs, on = by, verbose = 0, overid = 2) - dt[get(val) > get(highcutoff), (outliercol) := 1] - dt[get(val) < get(lowcutoff), (outliercol) := 1] - # dt[get(val) > get(lowcutoff) & get(val) < get(highcutoff), (outliercol) := 0] + dt[x > y, (outliercol) := 1, env = list(x = val, y = highcutoff)] + dt[x < y, (outliercol) := 1, env = list(x = val, y = lowcutoff)] return(dt) } @@ -305,25 +285,25 @@ combine_cubes <- function(newcube_flag, oldcube_flag, colinfo){ valuecolumns <- c("TELLER", "NEVNER", "sumTELLER", "sumNEVNER", "RATE.n") for(col in valuecolumns){ - if(col %in% names(d_old) & !(col %in% names(d_new)) & paste0(col, "_uprikk") %in% names(d_new)){ - d_new[, (col) := get(paste0(col, "_uprikk"))] + if(col %in% names(d_old) && !(col %in% names(d_new)) && paste0(col, "_uprikk") %in% names(d_new)){ + data.table::set(d_new, j = col, value = d_new[[paste0(col, "_uprikk")]]) d_new[SPVFLAGG != 0, (col) := NA_real_] commonvals <- c(commonvals, col) } } - d_new <- d_new[, c(..colinfo[["commondims"]], ..commonvals, "newrow", "GEOniv")] + d_new <- d_new[, .SD, .SDcols = c(colinfo[["commondims"]], commonvals, "newrow", "GEOniv")] data.table::setnames(d_new, commonvals, paste0(commonvals, "_new")) - # Handle new (add total to d_old) and expired (aggregate d_old) dimensions - if(length(colinfo$expdims) > 0) aggregate_cube_multi(d_old, colinfo$expdims) - if(length(colinfo$newdims) > 0) { - for(dim in colinfo$newdims){ - d_old[, (dim) := find_total(d_new, dim)] - } - } + # # Handle new (add total to d_old) and expired (aggregate d_old) dimensions + # if(length(colinfo$expdims) > 0) aggregate_cube_multi(d_old, colinfo$expdims) + # if(length(colinfo$newdims) > 0) { + # for(dim in colinfo$newdims){ + # d_old[, (dim) := find_total(d_new, dim)] + # } + # } - d_old <- d_old[, c(..colinfo[["commondims"]], ..commonvals)] + d_old <- d_old[, .SD, .SDcols = c(colinfo[["commondims"]], commonvals)] data.table::setnames(d_old, commonvals, paste0(commonvals, "_old")) compare <- collapse::join(d_new, d_old, on = colinfo[["commondims"]], how = "full", verbose = 0, overid = 2) @@ -358,22 +338,25 @@ add_totals_for_missing_dims <- function(dt, ref, dimlist){ #' @param comparecube combined new and old cube with _new and _old valuecolumns, created by [qualcontrol::combine_cubes] #' @param valuecolumns vector containing value columns to calculate diff columns #' @return comparecube with diff columns -add_diffcolumns <- function(comparecube, - valuecolumns){ +add_diffcolumns <- function(comparecube, valuecolumns){ for(val in valuecolumns){ new <- paste0(val, "_new") old <- paste0(val, "_old") diff <- paste0(val, "_diff") reldiff <- paste0(val, "_reldiff") - comparecube[, (diff) := get(new) - get(old)] - comparecube[, (reldiff) := get(new) / get(old)] + data.table::set(comparecube, j = diff, value = comparecube[[new]] - comparecube[[old]]) + data.table::set(comparecube, j = reldiff, value = comparecube[[new]] / comparecube[[old]]) + # For rows with missing new or old values, set _diff and _reldiff to NA - comparecube[is.na(get(new)) + is.na(get(old)) == 1, (diff) := NA_real_] - comparecube[is.na(get(new)) + is.na(get(old)) == 1, (reldiff) := NA_real_] + idx_one_na <- which(is.na(comparecube[[new]]) + is.na(comparecube[[old]]) == 1L) + data.table::set(comparecube, i = idx_one_na, j = diff, value = NA_real_) + data.table::set(comparecube, i = idx_one_na, j = reldiff, value = NA_real_) + # For rows with missing old AND new, set _diff = 0, and _reldiff = 1 - comparecube[is.na(get(new)) & is.na(get(old)), (diff) := 0] - comparecube[is.na(get(new)) & is.na(get(old)), (reldiff) := 1] + idx_both_na <- which(is.na(comparecube[[new]]) & is.na(comparecube[[old]])) + data.table::set(comparecube, i = idx_both_na, j = diff, value = 0) + data.table::set(comparecube, i = idx_both_na, j = reldiff, value = 1) } for(val in c("SPVFLAGG", "RATE.n")){ @@ -385,7 +368,7 @@ add_diffcolumns <- function(comparecube, diffcolumns <- grep("_diff$", names(comparecube), value = T) comparecube[, let(any_diffs = 0L)] - comparecube[rowSums(abs(comparecube[, ..diffcolumns]) > 0.1, na.rm = T) > 0, let(any_diffs = 1L)] + comparecube[rowSums(abs(comparecube[, .SD, .SDcols = diffcolumns]) > 0.1, na.rm = T) > 0, let(any_diffs = 1L)] } #' @title get_dump_folder @@ -464,9 +447,9 @@ qc_round <- function(dt){ round1 <- values[grepl("TELLER|NEVNER", values) & !grepl("_reldiff", values)] round2 <- values[grepl("RATE|SMR|MEIS|MIN$|MAX$|LOW$|HIGH$|.*wq\\d{2}$", values, perl = T) | grepl("_reldiff", values)] - for(val in round0){ dt[, (val) := round(get(val), 0)] } - for(val in round1){ dt[, (val) := round(get(val), 1)] } - for(val in round2){ dt[, (val) := round(get(val), 2)] } + for(val in round0) data.table::set(dt, j = val, value = round(dt[[val]], 0)) + for(val in round1) data.table::set(dt, j = val, value = round(dt[[val]], 1)) + for(val in round2) data.table::set(dt, j = val, value = round(dt[[val]], 2)) return(dt) } diff --git a/R/plot_boxplot.R b/R/plot_boxplot.R index 383d8d8..e2ea491 100644 --- a/R/plot_boxplot.R +++ b/R/plot_boxplot.R @@ -27,14 +27,17 @@ plot_boxplot <- function(dt = newcube_flag, onlynew = TRUE, change = FALSE, save # Extract baseplotdata bycols <- c("GEOniv", grep("^GEO$|^AAR$", colinfo$dims.new, invert = T, value = T)) - g <- collapse::GRP(d, c(bycols, quantiles, limits)) - - bpdata <- collapse::join(g[["groups"]], - d[, .(N_obs = collapse::fsum(!is.na(get(plotvalue))), - MINABOVELOW = collapse::fmin(get(plotvalue)[get(plotvalue) >= get(limits[1])]), - MAXBELOWHIGH = collapse::fmax(get(plotvalue)[get(plotvalue) <= get(limits[2])])), - by = bycols], - verbose = 0, overid = 2) + + g <- collapse::GRP(d, bycols) + val <- d[[plotvalue]] + lowlim <- d[[limits[1]]] + highlim <- d[[limits[2]]] + + bpdata <- collapse::add_vars(g[["groups"]], + collapse::ffirst(collapse::get_vars(d, c(quantiles, limits)), g = g), + N_obs = collapse::fnobs(d[[plotvalue]], g = g), + MINABOVELOW = collapse::fmin(ifelse(val > lowlim, val, NA_real_), g = g), + MAXBELOWHIGH = collapse::fmax(ifelse(val < highlim, val, NA_real_), g = g)) bpdata[, (limits) := NULL] panels <- grep("^GEOniv$", bycols, invert = T, value = T) @@ -54,8 +57,6 @@ plot_boxplot <- function(dt = newcube_flag, onlynew = TRUE, change = FALSE, save } else { oldata <- d[x == 1, env = list(x = outlier)] } - # oldata[, let(label = paste0(GEO, "'", sub(".*(\\d{2}$)", "\\1", AAR),"'(", round(x, 0), ")"), - # yval = x), env = list(x = plotvalue)] oldata[, let(label = paste0(GEO, "'", sub(".*(\\d{2}$)", "\\1", AAR)), yval = x), env = list(x = plotvalue)] oldata <- oldata[, .SD, .SDcols = c(bycols, "label", "yval")] @@ -68,7 +69,15 @@ plot_boxplot <- function(dt = newcube_flag, onlynew = TRUE, change = FALSE, save plotargs[["ylab"]] <- ifelse(change, paste0(sub("change_", "", plotvalue), ", (% change)"), plotvalue) dpi = 220 - size <- compute_device_size_px(plot_boxplot_plotfun(collect_boxplot_plotdata(bpdata, oldata, filter, 1), plotargs), dpi = dpi) + maxpanelsfile <- 1 + if(length(filter) > 1){ + n_panels <- integer() + for(i in seq_along(filter)){ + n_panels <- c(n_panels, bpdata[x, env = list(x = str2lang(filter[[i]]))][N_obs > 2, length(unique(panels))]) + } + maxpanelsfile <- which.max(n_panels) + } + size <- compute_device_size_px(plot_boxplot_plotfun(collect_boxplot_plotdata(bpdata, oldata, filter, maxpanelsfile), plotargs), dpi = dpi) metadata <- data.table::data.table(file = seq_len(length(filter)), filter = filter) suffix <- character() @@ -86,10 +95,10 @@ plot_boxplot <- function(dt = newcube_flag, onlynew = TRUE, change = FALSE, save } for(i in metadata$file){ - plotdata = collect_boxplot_plotdata(bpdata, oldata, filter, i) + plotdata <- collect_boxplot_plotdata(bpdata, oldata, filter, i) plotargs[["subtitle"]] <- character() for(dim in filedims) plotargs$subtitle <- c(plotargs$subtitle, paste0("\n", dim, ": ", unique(plotdata$bp[[dim]]))) - plot <- plot_boxplot_plotfun(plotdata, plotargs = plotargs) + plot <- plot_boxplot_plotfun(plotdata = plotdata, plotargs = plotargs) if(save) print(plot) pb$tick() } @@ -104,6 +113,9 @@ plot_boxplot <- function(dt = newcube_flag, onlynew = TRUE, change = FALSE, save } } +#' @title collect_boxplot_plotdata +#' @keywords internal +#' @noRd collect_boxplot_plotdata <- function(bpdata, oldata, filter, file){ data <- list() data[["bp"]] <- bpdata[x, env = list(x = str2lang(filter[[file]]))][N_obs > 2] @@ -134,9 +146,9 @@ plot_boxplot_plotfun <- function(plotdata, plotargs){ ggplot2::coord_flip() + ggplot2::geom_boxplot(data = plotdata$bp, ggplot2::aes(ymin = MINABOVELOW, - lower = get(plotargs$quantiles[1]), - middle = get(plotargs$quantiles[2]), - upper = get(plotargs$quantiles[3]), + lower = .data[[plotargs$quantiles[1]]], + middle = .data[[plotargs$quantiles[2]]], + upper = .data[[plotargs$quantiles[3]]], ymax = MAXBELOWHIGH), stat = "identity") + ggplot2::geom_text(data = plotdata$ol, @@ -149,7 +161,8 @@ plot_boxplot_plotfun <- function(plotdata, plotargs){ ggh4x::force_panelsizes(cols = ggplot2::unit(7, "cm"), rows = ggplot2::unit(5, "cm")) + theme_qc() + - ggplot2::theme(plot.subtitle = ggplot2::element_text(size = 12), + ggplot2::theme(plot.title = ggplot2::element_text(size = 12, family = "sans", hjust = 1), + plot.subtitle = ggplot2::element_text(size = 12), plot.caption = ggplot2::element_text(size = 12), axis.title = ggplot2::element_text(size = 12), axis.text = ggplot2::element_text(size = 8), diff --git a/R/plot_timeseries.R b/R/plot_timeseries.R index 9286a23..7b0d749 100644 --- a/R/plot_timeseries.R +++ b/R/plot_timeseries.R @@ -41,15 +41,17 @@ isnewoutlier <- newoutlier %in% names(dt) if(onlynew & !isnewoutlier) onlynew <- FALSE - keepcols <- intersect(unique(c(colinfo$dims.new, unlist(tscols, use.names = F), teller)), names(dt)) + dt[, AARh := sub("\\d{4}_(\\d{4})", "\\1", AAR)] + keepcols <- sub("AAR", "AARh", intersect(unique(c(colinfo$dims.new, unlist(tscols, use.names = F), teller)), names(dt))) complete <- dt[, !is.na(GEOniv) & !is.na(x), env = list(x = as.name(plotvalue))] d <- dt[complete, .SD, .SDcols = keepcols] - d[, AARh := sub("\\d{4}_(\\d{4})", "\\1", AAR)] if(!is.null(show_n_years)){ - incl_aar <- (max(as.numeric(d$AARh)) - show_n_years - 1):max(as.numeric(d$AARh)) + incl_aar <- (max(as.numeric(d$AARh)) - show_n_years + 1):max(as.numeric(d$AARh)) d <- d[AARh %in% incl_aar] } + allyears <- d[, unique(AARh)] + allyears <- min(allyears):max(allyears) bycols <- c("GEO", setdiff(colinfo$dims.new, c("GEO", "AAR"))) data.table::setkeyv(d, c(bycols, "AARh")) @@ -81,7 +83,8 @@ strata[, let(page = ((.I - 1L) %/% 25) + 1L, # max 25 paneler per side panels = interaction(.SD, drop = TRUE, sep = ",", lex.order = T)), .SDcols = bycols] - plotdata <- collapse::join(strata, d, on = bycols, how = "left", multiple = TRUE, verbose = 0, overid = 2) + strata <- strata[, .(AARh = allyears), by = names(strata)] + plotdata <- collapse::join(strata, d, on = c(bycols, "AARh"), how = "left", multiple = TRUE, verbose = 0, overid = 2) plotdata[, let(yval = x, tv = round(y,0), ol = z), env=list(x = plotvalue, y = teller, z = outlier)] plotdata[ol == 1, let(ollabel = "New outlier")] if(isnewoutlier){ @@ -131,28 +134,12 @@ } } -#' @keywords internal -#' @noRd -compute_device_size_px <- function(p, dpi = 160) { - - g <- ggplot2::ggplotGrob(p) - - total_w_cm <- as.numeric(grid::convertWidth(sum(g$widths), "cm", valueOnly = TRUE)) - total_h_cm <- as.numeric(grid::convertHeight(sum(g$heights), "cm", valueOnly = TRUE)) - - width_px <- ceiling(total_w_cm * dpi / 2.54) - height_px <- ceiling(total_h_cm * dpi / 2.54) - - list(width_px = width_px, height_px = height_px) -} - #' @keywords internal #' @noRd collect_timeseries_plotdata <- function(plotdata, page){ plot_d <- list() plot_d[["base"]] <- plotdata[[page]] - # plot_d[["ol"]] <- plot_d[["base"]][ol == 1] - plot_d[["line"]] <- plot_d[["base"]][n_obs > 1] + plot_d[["line"]] <- plot_d[["base"]][n_obs > 1 & !is.na(yval)] return(plot_d) } @@ -165,18 +152,18 @@ plot_timeseries_plotfun <- function(datasets, plotargs){ plot <- ggplot2::ggplot(datasets$base, ggplot2::aes(x = AARh, y = yval)) + ggplot2::facet_wrap(facets = ggplot2::vars(panels), scales = "free_y", ncol = 5) + - ggplot2::geom_point(ggplot2::aes(color = ollabel), size = 1.5, show.legend = TRUE) + + ggplot2::geom_point(ggplot2::aes(color = ollabel), size = 1.5, show.legend = TRUE, na.rm = T) + ggplot2::scale_color_manual(values = c("Normal" = "grey40", "Previous outlier" = "blue", "New outlier" = "red"), limits = c("Normal", "Previous outlier", "New outlier"), breaks = c("Previous outlier", "New outlier"), drop = FALSE) + ggplot2::guides(color = ggplot2::guide_legend(title = NULL)) + - ggplot2::geom_line(data = datasets$line, ggplot2::aes(group = panels), linewidth = 0.3, na.rm = T) + + ggplot2::geom_line(data = datasets$line, ggplot2::aes(group = panels), linewidth = 0.3) + theme_qc() if(!is.na(plotargs$teller)){ plot <- plot + - ggplot2::geom_text(ggplot2::aes(label = tv, y = y_middle), hjust = 0.5, angle = 90, size = 9/ggplot2::.pt) + ggplot2::geom_text(ggplot2::aes(label = tv, y = y_middle), hjust = 0.5, angle = 90, size = 9/ggplot2::.pt, na.rm = T) } plot <- plot + @@ -186,7 +173,7 @@ plot_timeseries_plotfun <- function(datasets, plotargs){ y = plotargs$ylab, caption = plotargs$caption) + ggplot2::theme(text = ggplot2::element_text(family = "sans"), - plot.title = ggplot2::element_text(size = 20), + plot.title = ggplot2::element_text(size = 12, family = "sans", hjust = 1), axis.text.x = ggplot2::element_text(angle = 90, vjust = 0.5, size = 8), strip.text = ggplot2::element_text(hjust = 0, size = 9)) diff --git a/R/plot_timeseries_bydel.R b/R/plot_timeseries_bydel.R index f5afb27..89b2bf5 100644 --- a/R/plot_timeseries_bydel.R +++ b/R/plot_timeseries_bydel.R @@ -12,26 +12,39 @@ plot_timeseries_bydel <- function(dt = newcube_flag, save = TRUE){ cat("Only one unique year in the file, time series not plotted") return(invisible(NULL)) } - d <- data.table::copy(dt) + + d <- data.table::copy(dt)[(GEO %in% c(301, 1103, 4601, 5001) | GEOniv == "B")] + colinfo <- identify_coltypes(d) + plotvalue <- select_outlier_pri(d, colinfo = colinfo) cubename <- get_cubename(d) cubefile <- get_cubefilename(d) savepath <- get_plotsavefolder(cubename, "TimeSeries_bydel") if(save) archive_old_files(savepath, ".png") - colinfo <- identify_coltypes(d) - plotvalue <- select_outlier_pri(d, colinfo = colinfo) - d <- d[(GEO %in% c(301, 1103, 4601, 5001) | GEOniv == "B") & !is.na(get(plotvalue))] contains_bydel <- d[GEOniv == "B" & SPVFLAGG == 0, unique(AAR)] - d <- d[AAR %in% contains_bydel] + d <- d[AAR %in% contains_bydel & !is.na(x), .SD, + .SDcols = c(colinfo$dims.new, plotvalue, "GEOniv"), + env = list(x = plotvalue)] add_kommune(d) bycols <- c("KOMMUNE", grep("^GEO$", colinfo$dims.new, invert = T, value = T)) + d[, N_obs := .N, by = c("GEO", setdiff(bycols, c("KOMMUNE", "AAR")))] panels <- grep("^KOMMUNE$|^AAR$", bycols, invert = T, value = T) filedims <- get_plot_subset(d, panels, maxpanels = 5) - if(length(filedims > 0)) panels <- panels[panels %notin% filedims] + if(length(filedims) > 0) panels <- panels[panels %notin% filedims] filter <- get_plot_filter(d, filedims) d[, let(allpanels = "alle")] - if(length(panels > 0)) d[, allpanels := interaction(mget(panels))] + subtitlepanels <- character() + for(dim in panels){ + if(length(unique(dt[[dim]])) == 1){ + subtitlepanels <- c(subtitlepanels, paste0(dim, " = ", unique(dt[[dim]]))) + panels <- setdiff(panels, dim) + } + } + + if(length(panels) > 0){ + d[, allpanels := interaction(.SD), .SDcols = panels] + } trends <- plot_timeseries_bydel_trendlines(d, bycols, filedims, plotvalue) @@ -39,29 +52,70 @@ plot_timeseries_bydel <- function(dt = newcube_flag, save = TRUE){ plotargs <- list() plotargs$plotvalue <- plotvalue plotargs$title <- paste0("File: ", attributes(dt)$Filename, ", Plotting date: ", Sys.Date()) - plotargs$subtitle <- paste0("Variable plotted: ", plotvalue) + plotargs$subtitlepanels <- character() + if(length(subtitlepanels) > 0) plotargs$subtitlepanels <- paste(subtitlepanels, collapse = "\n") plotargs$allplotdims <- get_all_combinations(d, c("KOMMUNE", "allpanels")) plotargs$anyrows <- ifelse(length(panels) > 0, 1, 0) + if(plotargs$anyrows == 1){ + plotargs$caption <- paste0("Rader fordelt på ", paste(panels, collapse = ", ")) + } rows <- nrow(plotargs$allplotdims[, .N, by = allpanels]) - for(i in filter){ - cat("\nSaving file", which(filter == i), "/", length(filter)) - plotdata <- d[eval(parse(text = i))] - trenddata <- trends[eval(parse(text = i))] - - if(nrow(plotdata)>0){ - suffix <- get_multifile_plot_suffix(plotdata, filedims) - plotargs$subtitle_full <- plotargs$subtitle - for(i in filedims){ - plotargs$subtitle_full <- paste0(plotargs$subtitle_full, "\n", i, ": ", unique(plotdata[[i]])) - } - plot <- plot_timeseries_bydel_plotfun(plotdata, trenddata, plotargs) - if(save) plot_timeseries_bydel_savefun(plot, savepath, cubefile, suffix, rows) - print(plot) + metadata <- data.table::data.table(file = seq_len(length(filter)), filter = filter) + suffix <- character() + for(i in metadata$file) suffix <- c(suffix, get_multifile_plot_suffix(d[x, env = list(x = str2lang(filter[[i]]))], filedims)) + metadata[, let(tmp_name = sprintf("plot-%04d.png", file), + filename = paste0(cubefile, "_", suffix, ".png"))] + n_plot <- metadata[, .N] + + pb <- progress::progress_bar$new(format = "Plotter :current / :total filer. [:bar] Estimert ferdig om: :eta", + total = n_plot, clear = FALSE) + + dpi = 220 + maxrowfile <- 1 + if(length(filter) > 1){ + n_panels <- integer() + for(i in seq_along(filter)){ + n_panels <- c(n_panels, d[x, env = list(x = str2lang(filter[[i]]))][, length(unique(panels))]) + } + maxrowfile <- which.max(n_panels) + } + size <- compute_device_size_px(plot_timeseries_bydel_plotfun(collect_tsb_plotdata(d, trends, filter, maxrowfile), plotargs), dpi = dpi) + + if(save){ + ragg::agg_png(filename = file.path(savepath, "plot-%04d.png"), res = dpi, width = size$width_px, height = size$height_px, units = "px") + } + + for(i in metadata$file){ + plotdata <- collect_tsb_plotdata(d, trends, filter, i) + plotargs$subtitle <- character() + for(dim in filedims) plotargs$subtitle <- paste0(plotargs$subtitle, paste0(dim, ": ", unique(plotdata$pd[[dim]]), collapse = "\n")) + if(length(plotargs$subtitlepanels) > 0) plotargs$subtitle <- paste0(plotargs$subtitlepanels, "\n", plotargs$subtitle) + plot <- plot_timeseries_bydel_plotfun(plotdata, plotargs) + if(save) print(plot) + pb$tick() + } + + if(save){ + dev.off() + for (k in 1:30) { + if (all(file.exists(file.path(savepath, metadata$tmp_name)))) break + Sys.sleep(0.1) } + invisible(file.rename(file.path(savepath, metadata$tmp_name),file.path(savepath, metadata$filename))) } } +#' @title collect_tsb_plotdata +#' @keywords internal +#' @noRd +collect_tsb_plotdata <- function(plotdata, trenddata, filter, file){ + data <- list() + data[["pd"]] <- plotdata[x, env = list(x = str2lang(filter[[file]]))][N_obs > 1] + data[["td"]] <- trenddata[x, env = list(x = str2lang(filter[[file]]))] + return(data) +} + #' @title plot_boxplot_plotfun #' @description #' Plotting function for [qualcontrol::plot_timeseries_bydel()] @@ -70,9 +124,7 @@ plot_timeseries_bydel <- function(dt = newcube_flag, save = TRUE){ #' @param pd plotdata #' @param td trenddata #' @param plotargs list of plot arguments -plot_timeseries_bydel_plotfun <- function(pd, - td, - plotargs){ +plot_timeseries_bydel_plotfun <- function(plotdata, plotargs){ plot <- ggplot2::ggplot(plotargs$allplotdims) + ggplot2::facet_grid(cols = ggplot2::vars(KOMMUNE), @@ -80,25 +132,31 @@ plot_timeseries_bydel_plotfun <- function(pd, switch = "y", scales = "free_y") + ggplot2::labs(title = plotargs$title, - subtitle = plotargs$subtitle_full, + subtitle = plotargs$subtitle, y = plotargs$plotvalue) + - ggplot2::geom_line(data = td, + ggplot2::geom_line(data = plotdata$td, ggplot2::aes(x = AAR, y = y, color = type, group = type), linewidth = 1.5) + ggplot2::scale_color_manual(values = c("red", "blue")) + - ggplot2::geom_line(data = pd, - ggplot2::aes(x = AAR, y = get(plotargs$plotvalue), group = GEO), linetype = 2) + - ggplot2::geom_point(data = pd, - ggplot2::aes(x = AAR, y = get(plotargs$plotvalue)), + ggplot2::geom_line(data = plotdata$pd, + ggplot2::aes(x = AAR, y = .data[[plotargs$plotvalue]], group = GEO), linetype = 2) + + ggplot2::geom_point(data = plotdata$pd, + ggplot2::aes(x = AAR, y = .data[[plotargs$plotvalue]]), size = 3, shape = 1) + ggplot2::guides(color = ggplot2::guide_legend(title = NULL)) + theme_qc() + - ggplot2::theme(axis.text.x = ggplot2::element_text(angle = 90, vjust = 0.5)) + ggplot2::theme(axis.text.x = ggplot2::element_text(angle = 90, vjust = 0.5), + plot.title = ggplot2::element_text(size = 12, family = "sans", hjust = 1)) + + ggh4x::force_panelsizes(rows = ggplot2::unit(4.5, "cm"), + cols = ggplot2::unit(7, "cm")) if(plotargs$anyrows == 0){ plot <- plot + ggplot2::theme(strip.background.y = ggplot2::element_blank(), strip.text.y = ggplot2::element_blank()) + } else { + plot <- plot + + ggplot2::labs(caption = plotargs$caption) } return(plot) diff --git a/R/plot_utils.R b/R/plot_utils.R index 5830806..d4002a9 100644 --- a/R/plot_utils.R +++ b/R/plot_utils.R @@ -156,3 +156,17 @@ get_multifile_plot_suffix <- function(dt, files = files){ return(suffix) } +#' @keywords internal +#' @noRd +compute_device_size_px <- function(p, dpi = 160) { + + g <- ggplot2::ggplotGrob(p) + + total_w_cm <- as.numeric(grid::convertWidth(sum(g$widths), "cm", valueOnly = TRUE)) + total_h_cm <- as.numeric(grid::convertHeight(sum(g$heights), "cm", valueOnly = TRUE)) + + width_px <- ceiling(total_w_cm * dpi / 2.54) + height_px <- ceiling(total_h_cm * dpi / 2.54) + + list(width_px = width_px, height_px = height_px) +} diff --git a/R/population.R b/R/population.R index 8ac32c2..9e5cc09 100644 --- a/R/population.R +++ b/R/population.R @@ -23,7 +23,7 @@ check_befvekst <- function(statusfolder = NULL){ befolk <- .GlobalEnv$oldcube dims <- identify_coltypes(befvekst, befolk)$commondims for(dim in dims){ - befolk <- befolk[get(dim) %in% unique(befvekst[[dim]])] + befolk <- befolk[x %in% unique(befvekst[[dim]]), env = list(x = dim)] } data.table::setkeyv(befvekst, dims) data.table::setkeyv(befolk, dims) diff --git a/R/read-files.R b/R/read-files.R index b1449e2..35abddc 100644 --- a/R/read-files.R +++ b/R/read-files.R @@ -26,12 +26,14 @@ readfiles <- function(cube.new = NULL, newcube <- read_cube(path.new, type = "New") newcube <- recode_geo(newcube, recode.new) newcube <- add_geoparams(newcube) + collect_censor_information(dt = newcube) if(!is.null(cube.old)){ path.old <- find_cube(cube.old) oldcube <- read_cube(path.old, type = "Old") oldcube <- recode_geo(oldcube, recode.old) oldcube <- add_geoparams(oldcube) + collect_censor_information(dt = oldcube) } newcube <<- newcube @@ -204,6 +206,42 @@ add_geoparams <- function(dt){ return(dt) } +#' @title collect_censor_information +#' @description +#' If information on secondary censoring is only provided splitted into naboprikketIOmgX-columns, +#' collect them into column naboprikket (0|1). If naboprikket exists, only keep this column. If no +#' secondary censoring column exist, add naboprikket = NA_real_ +#' @param dt +#' @keywords internal +#' @noRd +collect_censor_information <- function(dt){ + for(col in c("pvern", "serieprikket")){ + if(col %in% names(dt)){ + dt[, (col) := as.integer(x), env = list(x = col)] + } else { + dt[, (col) := NA_integer_] + } + } + + naboprikkcols <- grep("^naboprikketIomg", names(dt), value = T) + if("naboprikket" %in% names(dt)){ # Future standard from khfunctions + dt[, naboprikket := as.integer(naboprikket)] + if(length(naboprikkcols) > 0) dt[, (naboprikkcols) := NULL] + return(invisible(NULL)) + } + + if(length(naboprikkcols) > 0){ + dt[, naboprikket := 0L] + idx <- which(rowSums(dt[, .SD, .SDcols = naboprikkcols]) > 0) + data.table::set(dt, i = idx, j = "naboprikket", value = 1L) + dt[, (naboprikkcols) := NULL] + return(invisible(NULL)) + } + + dt[, naboprikket := NA_integer_] + return(invisible(NULL)) +} + #' @keywords internal #' @noRd add_csv <- function(string){ diff --git a/R/ungdata.R b/R/ungdata.R index 2379076..1f171c3 100644 --- a/R/ungdata.R +++ b/R/ungdata.R @@ -5,8 +5,7 @@ #' #' @param dt cube file #' @export -check_nevner_change <- function(dt = newcube, - save = TRUE){ +check_nevner_change <- function(dt = newcube, save = TRUE){ cubefile <- get_cubefilename(dt) savepath <- get_table_savefolder(get_cubename(dt)) @@ -18,9 +17,9 @@ check_nevner_change <- function(dt = newcube, d <- aggregate_cube_multi(d, aggdims) nevnercol <- select_nevner_pri(colinfo$vals.new) if(is.na(nevnercol)) return("no nevner in file") - d <- d[!is.na(get(nevnercol))] + d <- d[!is.na(x), env = list(x = nevnercol)] bycols <- grep("AAR", colinfo$dims.new, invert = T, value = T) - d <- d[, mget(c("AAR", bycols, nevnercol))] + d <- d[, .SD, .SDcols = c("AAR", bycols, nevnercol)] data.table::setkeyv(d, c(bycols, "AAR")) g <- collapse::GRP(d, bycols) @@ -28,13 +27,13 @@ check_nevner_change <- function(dt = newcube, d[, sumNEVNER_last := collapse::flag(d[[nevnercol]], g = g)] d[, sumNEVNER_last := zoo::na.locf(sumNEVNER_last, na.rm = F), by = bycols] d[, sumNEVNER_max := collapse::fmax(d[[nevnercol]], g = g, TRA = 1)] - d[, sumNEVNER_vs_last := round(get(nevnercol)/sumNEVNER_last, 2)] - d[, sumNEVNER_vs_max := round(get(nevnercol)/sumNEVNER_max, 2)] + d[, sumNEVNER_vs_last := round(x/sumNEVNER_last, 2), env = list(x = nevnercol)] + d[, sumNEVNER_vs_max := round(x/sumNEVNER_max, 2), env = list(x = nevnercol)] dims <- c("GEO", "AAR") if("ALDER" %in% names(d)) dims <- c(dims, "ALDER") - d <- d[, mget(c(dims, nevnercol, "sumNEVNER_max", "sumNEVNER_last", "sumNEVNER_vs_last", "sumNEVNER_vs_max"))] + d <- d[, .SD, .SDcols = c(dims, nevnercol, "sumNEVNER_max", "sumNEVNER_last", "sumNEVNER_vs_last", "sumNEVNER_vs_max")] convert_coltype(d, dims, "factor") if(save) save_table_output(table = d, savepath = savepath, cubefile = cubefile, suffix = suffix) diff --git a/R/utils.R b/R/utils.R index 7075e34..c85fc0e 100644 --- a/R/utils.R +++ b/R/utils.R @@ -17,26 +17,34 @@ aggregate_cube <- function(cube, dim){ total <- find_total(cube, dim) if(!is.na(total)){ - return(cube[get(dim) == total]) + return(cube[x == total, env = list(x = dim)]) } if(is.na(total)){ colinfo <- identify_coltypes(cube) - vals <- colinfo$vals.new - vals <- grep("SPVFLAGG", vals, value = T, invert = T) + vals <- grep("SPVFLAGG", colinfo$vals.new, value = T, invert = T) cube[, (vals) := lapply(.SD, as.numeric), .SDcols = vals] sumvals <- grep("TELLER", vals, value = T) - avgvals <- grep("TELLER", vals, value = T, invert = T) - groupdims <- grep(dim, colinfo$dims.new, value = T, invert = T) - data.table::setkeyv(cube, groupdims) - - cube[, (avgvals) := lapply(.SD, mean, na.rm = T), .SDcols = avgvals, by = groupdims] - cube[, (sumvals) := lapply(.SD, sum, na.rm = T), .SDcols = sumvals, by = groupdims] - for(i in avgvals){cube[is.nan(get(i)), (i) := NA_real_]} - cube[, (dim) := "Total"] - cube <- cube[, .SD[1], by = groupdims] - data.table::setcolorder(cube, colorder) - return(cube) + avgvals <- setdiff(vals, sumvals) + groupdims <- setdiff(colinfo$dims.new, dim) + uniquevals <- intersect(names(cube), c("GEOniv", "WEIGHTS")) + maxvals <- setdiff(names(cube), c(sumvals, avgvals, groupdims, uniquevals, dim)) + + + g <- collapse::GRP(cube, groupdims) + + agg <- collapse::add_vars( + g[["groups"]], + collapse::fsum(collapse::get_vars(cube, sumvals), g = g), + collapse::fmean(collapse::get_vars(cube, avgvals), g = g), + collapse::fmax(collapse::get_vars(cube, maxvals), g = g), + collapse::ffirst(collapse::get_vars(cube, uniquevals), g = g) + ) + + for(j in avgvals) data.table::set(agg, i = which(is.nan(agg[[j]])), j = j, value = NA_real_) + agg[, (dim) := "Total"] + data.table::setcolorder(agg, colorder) + return(agg) } } @@ -152,10 +160,7 @@ find_total <- function(cube, dim){ #' @param cube.old old file #' @param dimtable table generated with [qualcontrol::compare_dimensions()] #' @param filter "new" or "old", indicating whether the file to filter is the new or old file -filter_cube <- function(cube.new, - cube.old, - dimtable, - filter = c("new", "old")){ +filter_cube <- function(cube.new, cube.old, dimtable, filter = c("new", "old")){ filter <- match.arg(filter) filteron <- switch(filter, new = "New levels", @@ -166,11 +171,11 @@ filter_cube <- function(cube.new, refcube <- switch(filter, new = data.table::copy(cube.old), old = data.table::copy(cube.new)) - filterdims <- data.table::copy(dimtable)[get(filteron) != ""]$Dimension + filterdims <- data.table::copy(dimtable)[x != "", env = list(x = filteron)][["Dimension"]] if(length(filterdims) > 0){ for(dim in filterdims){ - filtercube <- filtercube[get(dim) %in% unique(refcube[[dim]])] + filtercube <- filtercube[x %in% unique(refcube[[dim]]), env = list(x = dim)] } } @@ -267,10 +272,7 @@ get_all_combinations <- function(dt, #' # get_complete_strata(data, by = bycols, type = "censored") #' # Actually filter data #' # data <- get_complete_strata(data, by = bycols, type = "censored") -get_complete_strata <- function(data, - by, - type = c("censored", "missing"), - valuecolumn = NULL){ +get_complete_strata <- function(data, by, type = c("censored", "missing"), valuecolumn = NULL){ if("GEO" %in% by) by <- grep("^GEO$", by, invert = T, value = T) if(type == "missing" && (is.null(valuecolumn) || valuecolumn %notin% names(data))){ @@ -279,7 +281,7 @@ get_complete_strata <- function(data, switch(type, censored = data[, let(n_censored = sum(SPVFLAGG != 0)), by = by], - missing = data[, let(n_censored = sum(is.na(get(valuecolumn)))), by = by]) + missing = data[, let(n_censored = sum(is.na(x))), by = by, env = list(x = valuecolumn)]) data <- data[n_censored == 0] data[, let(n_censored = NULL)] return(data) @@ -333,18 +335,20 @@ identify_coltypes <- function(cube.new = NULL, if(is.null(cube.new)) stop("cube.new must be provided") - misc_cols <- c("origgeo", "GEOniv", "KOMMUNE", "WEIGHTS") - prikkeparams <- "^pvern$|^serieprikket$|^naboprikket" + misc_cols <- c("origgeo", "GEOniv", "KOMMUNE", "WEIGHTS", "any_diffs", "newrow", "exprow") + censorparams <- "^pvern$|^serieprikket$|^naboprikket" out <- list() allcolsnew <- names(cube.new) out[["dims.new"]] <- intersect(allcolsnew, getOption("qualcontrol.alldimensions")) - out[["vals.new"]] <- setdiff(allcolsnew, c(out$dims.new, misc_cols, grep(prikkeparams, allcolsnew, value = T))) + out[["vals.new"]] <- setdiff(allcolsnew, c(out$dims.new, misc_cols, grep(censorparams, allcolsnew, value = T))) + out[["censor.new"]] <- grep(censorparams, allcolsnew, value = T) if(!is.null(cube.old)){ allcolsold <- names(cube.old) out[["dims.old"]] <- intersect(allcolsold, getOption("qualcontrol.alldimensions")) - out[["vals.old"]] <- setdiff(allcolsold, c(out$dims.old, misc_cols, grep(prikkeparams, allcolsold, value = T))) + out[["vals.old"]] <- setdiff(allcolsold, c(out$dims.old, misc_cols, grep(censorparams, allcolsold, value = T))) + out[["censor.old"]] <- grep(censorparams, allcolsold, value = T) out[["commondims"]] <- intersect(out$dims.new, out$dims.old) out[["commonvals"]] <- intersect(out$vals.new, out$vals.old) out[["commoncols"]] <- c(out$commondims, out$commonvals) diff --git a/R/zzz.R b/R/zzz.R index 92e8108..38be9f1 100644 --- a/R/zzz.R +++ b/R/zzz.R @@ -35,6 +35,20 @@ opt.qualcontrol <- orgdata:::is_globs("qualcontrol") orgdata::update_khpackage("qualcontrol") } } + + optqualcontrol <- orgdata:::is_globs("qualcontrol") + orgDT <- !(names(optqualcontrol) %in% names(options())) + if (any(orgDT)) options(optqualcontrol[orgDT]) + + corrglobs <- orgdata:::is_correct_globs(optqualcontrol) + if(!isTRUE(corrglobs)){ + x <- utils::menu(title = "Options are not the same as in the config file, update options now?", + choices = c("Yes", "No")) + if(x == 1){ + orgdata:::update_globs("qualcontrol") + } + } + packageStartupMessage("qualcontrol version: ", utils::packageDescription("qualcontrol")[["Version"]], "\n- Population file used for weighting and geo-level: ", attributes(.popinfo)$popfile, diff --git a/dev/benchmarking.R b/dev/benchmarking.R index 70a9539..b15d551 100644 --- a/dev/benchmarking.R +++ b/dev/benchmarking.R @@ -2,12 +2,10 @@ # # microbenchmark( # Old = { -# PlotTimeDiff(comparecube) +# fun2(d, bycols, quantiles, limits) # }, # # New = { -# plot_diff_timetrends(comparecube, save = F) +# fun1(d, bycols, quantiles, limits) # }, # times = 25) -# -#