From f7479295955a0fdc1130baf22ff090960576eb9c Mon Sep 17 00:00:00 2001 From: Mason Garrison Date: Wed, 7 May 2025 19:43:24 -0400 Subject: [PATCH 1/2] smarter ampping --- NAMESPACE | 1 + R/cleanPedigree.R | 38 ++++++++++++++++++++++++-------------- man/standardizeColnames.Rd | 5 +++-- vignettes/ASOIAF.Rmd | 9 ++++++++- 4 files changed, 36 insertions(+), 17 deletions(-) diff --git a/NAMESPACE b/NAMESPACE index d739dd51..9680db70 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -42,6 +42,7 @@ export(related_coef) export(repairSex) export(resample) export(simulatePedigree) +export(standardizeColnames) export(summariseFamilies) export(summariseMatrilines) export(summarisePatrilines) diff --git a/R/cleanPedigree.R b/R/cleanPedigree.R index 788d38c0..86067960 100644 --- a/R/cleanPedigree.R +++ b/R/cleanPedigree.R @@ -7,23 +7,33 @@ #' #' @param df A dataframe whose column names need to be standardized. #' @param verbose A logical indicating whether to print progress messages. +#' @param mapping A list of mapping options for customizing the renaming process. #' @return A dataframe with standardized column names. #' -#' @keywords internal -standardizeColnames <- function(df, verbose = FALSE) { +#' @export +standardizeColnames <- function(df, verbose = FALSE, mapping = list()) { # Internal mapping of standardized names to possible variants - mapping <- list( - "famID" = "^(?:fam(?:ily)?[\\.\\-_]?(?:id)?)", - "ID" = "^(?:i(?:d$|ndiv(?:idual)?)|p(?:erson)?[\\.\\-_]?id)", - "gen" = "^(?:gen(?:s|eration)?)", - "dadID" = "^(?:d(?:ad)?id|paid|fatherid|pid[\\.\\-_]?fath[er]*|sire)", - "patID" = "^(?:dat[\\.\\-_]?id|pat[\\.\\-_]?id|paternal[\\.\\-_]?(?:id)?)", - "momID" = "^(?:m(?:om|a|other)?[\\.\\-_]?id|pid[\\.\\-_]?moth[er]*|dame)", - "matID" = "^(?:mat[\\.\\-_]?id|maternal[\\.\\-_]?(?:id)?)", - "spID" = "^(?:s(?:pt)?id|spouse[\\.\\-_]?(?:id)?|partner[\\.\\-_]?(?:id)?|husb(?:and)?[\\.\\-_]?id|wife[\\.\\-_]?(?:id)?|pid[\\.\\-_]?spouse1?)", - "twinID" = "^(?:twin[\\.\\-_]?(?:id)?)", - "sex" = "^(?:sex|gender|female|m(?:a(?:le|n)|en)|wom[ae]n)" - ) + + # default config + default_mapping <- list( + "famID" = "^(?:fam(?:ily)?[\\.\\-_]?(?:id)?)", + "ID" = "^(?:i(?:d$|ndiv(?:idual)?)|p(?:erson)?[\\.\\-_]?id)", + "gen" = "^(?:gen(?:s|eration)?)", + "dadID" = "^(?:d(?:ad)?id|paid|fatherid|pid[\\.\\-_]?fath[er]*|sire)", + "patID" = "^(?:dat[\\.\\-_]?id|pat[\\.\\-_]?id|paternal[\\.\\-_]?(?:id)?)", + "momID" = "^(?:m(?:om|a|other)?[\\.\\-_]?id|pid[\\.\\-_]?moth[er]*|dame)", + "matID" = "^(?:mat[\\.\\-_]?id|maternal[\\.\\-_]?(?:id)?)", + "spID" = "^(?:s(?:pt)?id|spouse[\\.\\-_]?(?:id)?|partner[\\.\\-_]?(?:id)?|husb(?:and)?[\\.\\-_]?id|wife[\\.\\-_]?(?:id)?|pid[\\.\\-_]?spouse1?)", + "twinID" = "^(?:twin[\\.\\-_]?(?:id)?)", + "sex" = "^(?:sex|gender|female|m(?:a(?:le|n)|en)|wom[ae]n)" + ) + + # Add fill in default_config values to config if config doesn't already have them + + mapping <- utils::modifyList(default_mapping, mapping) + + + if (verbose) { print("Standardizing column names...") } diff --git a/man/standardizeColnames.Rd b/man/standardizeColnames.Rd index a6e38f3a..118108bf 100644 --- a/man/standardizeColnames.Rd +++ b/man/standardizeColnames.Rd @@ -4,12 +4,14 @@ \alias{standardizeColnames} \title{Standardize Column Names in a Dataframe (Internal)} \usage{ -standardizeColnames(df, verbose = FALSE) +standardizeColnames(df, verbose = FALSE, mapping = list()) } \arguments{ \item{df}{A dataframe whose column names need to be standardized.} \item{verbose}{A logical indicating whether to print progress messages.} + +\item{mapping}{A list of mapping options for customizing the renaming process.} } \value{ A dataframe with standardized column names. @@ -20,4 +22,3 @@ It utilizes regular expressions and the `tolower()` function to match column nam against a list of predefined standard names. The approach is case-insensitive and allows for flexible matching of column names. } -\keyword{internal} diff --git a/vignettes/ASOIAF.Rmd b/vignettes/ASOIAF.Rmd index 00211864..977abea7 100644 --- a/vignettes/ASOIAF.Rmd +++ b/vignettes/ASOIAF.Rmd @@ -21,6 +21,8 @@ We begin by loading the required libraries and examining the structure of the bu ```{r echo=TRUE, message=FALSE, warning=FALSE} library(BGmisc) library(tidyverse) +library(ggpedigree) + data(ASOIAF) ``` @@ -137,7 +139,7 @@ Many real-world and fictional pedigrees contain individuals with unknown or part - Create "phantom" placeholders for the missing parent --Optionally repair and harmonize parent fields +- Optionally repair and harmonize parent fields To facilitate plotting, we check for individuals with one known parent but a missing other. For those cases, we assign a placeholder ID to the missing parent. @@ -166,4 +168,9 @@ We can now visualize the repaired pedigree using the `plotPedigree()` function. ```{r, message=FALSE, warning=FALSE} plotPedigree(df_repaired, affected = df_repaired$affected, verbose = FALSE) + +ggPedigree(df_repaired, status_col = "affected", personID_col = "ID", code_male = "M", + config = list(unaffected = 0,affected = 1, + ped_width=15)) + ``` From 1922c8194c8393b482c06cb22965f3d4cd7a1668 Mon Sep 17 00:00:00 2001 From: Mason Garrison Date: Sat, 17 May 2025 21:09:07 -0400 Subject: [PATCH 2/2] Update documentData.R --- R/documentData.R | 250 +++++++++++++++++++++++------------------------ 1 file changed, 125 insertions(+), 125 deletions(-) diff --git a/R/documentData.R b/R/documentData.R index cf589584..013f2c76 100644 --- a/R/documentData.R +++ b/R/documentData.R @@ -1,10 +1,10 @@ -##' Artificial pedigree data on eight families with inbreeding -##' -##' A dataset created purely from imagination that includes several types of inbreeding. -##' Different kinds of inbreeding occur in each extended family. -##' -##' The types of inbreeding are as follows: -##' +#' Artificial pedigree data on eight families with inbreeding +#' +#' A dataset created purely from imagination that includes several types of inbreeding. +#' Different kinds of inbreeding occur in each extended family. +#' +#' The types of inbreeding are as follows: +#' #' \itemize{ #' \item Extended Family 1: Sister wives - Children with the same father and different mothers who are sisters. #' \item Extended Family 2: Full siblings have children. @@ -15,131 +15,131 @@ #' \item Extended Family 7: Uncle-niece and Aunt-nephew have children. #' \item Extended Family 8: A father-son pairs has children with a corresponding mother-daughter pair. #' } -##' -##' Although not all of the above structures are technically inbreeding, they aim to test pedigree diagramming and path tracing algorithms. -##' -##' The variables are as follows: -##' -##' \itemize{ -##' \item \code{ID}: Person identification variable -##' \item \code{sex}: Sex of the ID: 1 is female; 0 is male -##' \item \code{dadID}: ID of the father -##' \item \code{momID}: ID of the mother -##' \item \code{FamID}: ID of the extended family -##' \item \code{Gen}: Generation of the person -##' \item \code{proband}: Always FALSE -##' } -##' -##' @docType data -##' @keywords datasets -##' @name inbreeding -##' @usage data(inbreeding) -##' @format A data frame (and ped object) with 134 rows and 7 variables +#' +#' Although not all of the above structures are technically inbreeding, they aim to test pedigree diagramming and path tracing algorithms. +#' +#' The variables are as follows: +#' +#' \itemize{ +#' \item \code{ID}: Person identification variable +#' \item \code{sex}: Sex of the ID: 1 is female; 0 is male +#' \item \code{dadID}: ID of the father +#' \item \code{momID}: ID of the mother +#' \item \code{FamID}: ID of the extended family +#' \item \code{Gen}: Generation of the person +#' \item \code{proband}: Always FALSE +#' } +#' +#' @docType data +#' @keywords datasets +#' @name inbreeding +#' @usage data(inbreeding) +#' @format A data frame (and ped object) with 134 rows and 7 variables NULL -##' Simulated pedigree with two extended families and an age-related hazard -##' -##' A dataset simulated to have an age-related hazard. -##' There are two extended families that are sampled from the same population. -##' -##' The variables are as follows: -##' -##' \itemize{ -##' \item \code{FamID}: ID of the extended family -##' \item \code{ID}: Person identification variable -##' \item \code{sex}: Sex of the ID: 1 is female; 0 is male -##' \item \code{dadID}: ID of the father -##' \item \code{momID}: ID of the mother -##' \item \code{affected}: logical. Whether the person is affected or not -##' \item \code{DA1}: Binary variable signifying the meaninglessness of life -##' \item \code{DA2}: Binary variable signifying the fundamental unknowability of existence -##' \item \code{birthYr}: Birth year for person -##' \item \code{onsetYr}: Year of onset for person -##' \item \code{deathYr}: Death year for person -##' \item \code{available}: logical. Whether -##' \item \code{Gen}: Generation of the person -##' \item \code{proband}: logical. Whether the person is a proband or not -##' } -##' -##' @docType data -##' @keywords datasets -##' @name hazard -##' @usage data(hazard) -##' @format A data frame with 43 rows and 14 variables +#' Simulated pedigree with two extended families and an age-related hazard +#' +#' A dataset simulated to have an age-related hazard. +#' There are two extended families that are sampled from the same population. +#' +#' The variables are as follows: +#' +#' \itemize{ +#' \item \code{FamID}: ID of the extended family +#' \item \code{ID}: Person identification variable +#' \item \code{sex}: Sex of the ID: 1 is female; 0 is male +#' \item \code{dadID}: ID of the father +#' \item \code{momID}: ID of the mother +#' \item \code{affected}: logical. Whether the person is affected or not +#' \item \code{DA1}: Binary variable signifying the meaninglessness of life +#' \item \code{DA2}: Binary variable signifying the fundamental unknowability of existence +#' \item \code{birthYr}: Birth year for person +#' \item \code{onsetYr}: Year of onset for person +#' \item \code{deathYr}: Death year for person +#' \item \code{available}: logical. Whether +#' \item \code{Gen}: Generation of the person +#' \item \code{proband}: logical. Whether the person is a proband or not +#' } +#' +#' @docType data +#' @keywords datasets +#' @name hazard +#' @usage data(hazard) +#' @format A data frame with 43 rows and 14 variables NULL -##' Fictional pedigree data on a wizarding family -##' -##' A dataset created purely from imagination that includes a subset of the Potter extended family. -##' -##' The variables are as follows: -##' -##' \itemize{ -##' \item \code{personID}: Person identification variable -##' \item \code{famID}: Family identification variable -##' \item \code{name}: Name of the person -##' \item \code{gen}: Generation of the person -##' \item \code{momID}: ID of the mother -##' \item \code{dadID}: ID of the father -##' \item \code{spouseID}: ID of the spouse -##' \item \code{sex}: Sex of the ID: 1 is male; 0 is female -##' -##' } -##' -##' IDs in the 100s \code{momID}s and \code{dadID}s are for people not in the dataset. -##' -##' @docType data -##' @keywords datasets -##' @name potter -##' @usage data(potter) -##' @format A data frame (and ped object) with 36 rows and 8 variables +#' Fictional pedigree data on a wizarding family +#' +#' A dataset created purely from imagination that includes a subset of the Potter extended family. +#' +#' The variables are as follows: +#' +#' \itemize{ +#' \item \code{personID}: Person identification variable +#' \item \code{famID}: Family identification variable +#' \item \code{name}: Name of the person +#' \item \code{gen}: Generation of the person +#' \item \code{momID}: ID of the mother +#' \item \code{dadID}: ID of the father +#' \item \code{spouseID}: ID of the spouse +#' \item \code{sex}: Sex of the ID: 1 is male; 0 is female +#' +#' } +#' +#' IDs in the 100s \code{momID}s and \code{dadID}s are for people not in the dataset. +#' +#' @docType data +#' @keywords datasets +#' @name potter +#' @usage data(potter) +#' @format A data frame (and ped object) with 36 rows and 8 variables NULL -##' Royal pedigree data from 1992 -##' -##' A dataset created by Denis Reid from the Royal Families of Europe. -##' -##' The variables are as follows: -##' id,momID,dadID,name,sex,birth_date,death_date,attribute_title -##' \itemize{ -##' \item \code{id}: Person identification variable -##' \item \code{momID}: ID of the mother -##' \item \code{dadID}: ID of the father -##' \item \code{name}: Name of the person -##' \item \code{sex}: Biological sex -##' \item \code{birth_date}: Date of birth -##' \item \code{death_date}: Date of death -##' \item \code{attribute_title}: Title of the person -##' -##' } -##' -##' -##' @docType data -##' @keywords datasets -##' @name royal92 -##' @usage data(royal92) -##' @format A data frame with 3110 observations +#' Royal pedigree data from 1992 +#' +#' A dataset created by Denis Reid from the Royal Families of Europe. +#' +#' The variables are as follows: +#' id,momID,dadID,name,sex,birth_date,death_date,attribute_title +#' \itemize{ +#' \item \code{id}: Person identification variable +#' \item \code{momID}: ID of the mother +#' \item \code{dadID}: ID of the father +#' \item \code{name}: Name of the person +#' \item \code{sex}: Biological sex +#' \item \code{birth_date}: Date of birth +#' \item \code{death_date}: Date of death +#' \item \code{attribute_title}: Title of the person +#' +#' } +#' +#' +#' @docType data +#' @keywords datasets +#' @name royal92 +#' @usage data(royal92) +#' @format A data frame with 3110 observations NULL -##' A song of ice and fire pedigree data -##' -##' A dataset created from the Song of Ice and Fire series by George R. R. Martin. Core data is from the [Westeros.org forum](https://asoiaf.westeros.org/index.php?/topic/88863-all-the-family-trees/). -##' -##' -##' -##' The variables are as follows: -##' \itemize{ -##' \item \code{id}: Person identification variable -##' \item \code{momID}: ID of the mother -##' \item \code{dadID}: ID of the father -##' \item \code{name}: Name of the person -##' \item \code{sex}: Biological sex -##' } -##' -##' @docType data -##' @keywords datasets -##' @name ASOIAF -##' @usage data(ASOIAF) -##' @format A data frame with 501 observations +#' A song of ice and fire pedigree data +#' +#' A dataset created from the Song of Ice and Fire series by George R. R. Martin. Core data is from the [Westeros.org forum](https://asoiaf.westeros.org/index.php?/topic/88863-all-the-family-trees/). +#' +#' +#' +#' The variables are as follows: +#' \itemize{ +#' \item \code{id}: Person identification variable +#' \item \code{momID}: ID of the mother +#' \item \code{dadID}: ID of the father +#' \item \code{name}: Name of the person +#' \item \code{sex}: Biological sex +#' } +#' +#' @docType data +#' @keywords datasets +#' @name ASOIAF +#' @usage data(ASOIAF) +#' @format A data frame with 501 observations NULL