From f7479295955a0fdc1130baf22ff090960576eb9c Mon Sep 17 00:00:00 2001
From: Mason Garrison <garrissm@wfu.edu>
Date: Wed, 7 May 2025 19:43:24 -0400
Subject: [PATCH 1/2] smarter ampping

---
 NAMESPACE                  |  1 +
 R/cleanPedigree.R          | 38 ++++++++++++++++++++++++--------------
 man/standardizeColnames.Rd |  5 +++--
 vignettes/ASOIAF.Rmd       |  9 ++++++++-
 4 files changed, 36 insertions(+), 17 deletions(-)

diff --git a/NAMESPACE b/NAMESPACE
index d739dd51..9680db70 100644
--- a/NAMESPACE
+++ b/NAMESPACE
@@ -42,6 +42,7 @@ export(related_coef)
 export(repairSex)
 export(resample)
 export(simulatePedigree)
+export(standardizeColnames)
 export(summariseFamilies)
 export(summariseMatrilines)
 export(summarisePatrilines)
diff --git a/R/cleanPedigree.R b/R/cleanPedigree.R
index 788d38c0..86067960 100644
--- a/R/cleanPedigree.R
+++ b/R/cleanPedigree.R
@@ -7,23 +7,33 @@
 #'
 #' @param df A dataframe whose column names need to be standardized.
 #' @param verbose A logical indicating whether to print progress messages.
+#' @param mapping A list of mapping options for customizing the renaming process.
 #' @return A dataframe with standardized column names.
 #'
-#' @keywords internal
-standardizeColnames <- function(df, verbose = FALSE) {
+#' @export
+standardizeColnames <- function(df, verbose = FALSE, mapping = list()) {
   # Internal mapping of standardized names to possible variants
-  mapping <- list(
-    "famID" = "^(?:fam(?:ily)?[\\.\\-_]?(?:id)?)",
-    "ID" = "^(?:i(?:d$|ndiv(?:idual)?)|p(?:erson)?[\\.\\-_]?id)",
-    "gen" = "^(?:gen(?:s|eration)?)",
-    "dadID" = "^(?:d(?:ad)?id|paid|fatherid|pid[\\.\\-_]?fath[er]*|sire)",
-    "patID" = "^(?:dat[\\.\\-_]?id|pat[\\.\\-_]?id|paternal[\\.\\-_]?(?:id)?)",
-    "momID" = "^(?:m(?:om|a|other)?[\\.\\-_]?id|pid[\\.\\-_]?moth[er]*|dame)",
-    "matID" = "^(?:mat[\\.\\-_]?id|maternal[\\.\\-_]?(?:id)?)",
-    "spID" = "^(?:s(?:pt)?id|spouse[\\.\\-_]?(?:id)?|partner[\\.\\-_]?(?:id)?|husb(?:and)?[\\.\\-_]?id|wife[\\.\\-_]?(?:id)?|pid[\\.\\-_]?spouse1?)",
-    "twinID" = "^(?:twin[\\.\\-_]?(?:id)?)",
-    "sex" = "^(?:sex|gender|female|m(?:a(?:le|n)|en)|wom[ae]n)"
-  )
+
+    # default config
+    default_mapping <- list(
+      "famID" = "^(?:fam(?:ily)?[\\.\\-_]?(?:id)?)",
+      "ID" = "^(?:i(?:d$|ndiv(?:idual)?)|p(?:erson)?[\\.\\-_]?id)",
+      "gen" = "^(?:gen(?:s|eration)?)",
+      "dadID" = "^(?:d(?:ad)?id|paid|fatherid|pid[\\.\\-_]?fath[er]*|sire)",
+      "patID" = "^(?:dat[\\.\\-_]?id|pat[\\.\\-_]?id|paternal[\\.\\-_]?(?:id)?)",
+      "momID" = "^(?:m(?:om|a|other)?[\\.\\-_]?id|pid[\\.\\-_]?moth[er]*|dame)",
+      "matID" = "^(?:mat[\\.\\-_]?id|maternal[\\.\\-_]?(?:id)?)",
+      "spID" = "^(?:s(?:pt)?id|spouse[\\.\\-_]?(?:id)?|partner[\\.\\-_]?(?:id)?|husb(?:and)?[\\.\\-_]?id|wife[\\.\\-_]?(?:id)?|pid[\\.\\-_]?spouse1?)",
+      "twinID" = "^(?:twin[\\.\\-_]?(?:id)?)",
+      "sex" = "^(?:sex|gender|female|m(?:a(?:le|n)|en)|wom[ae]n)"
+    )
+
+    # Add fill in default_config values to config if config doesn't already have them
+
+  mapping <- utils::modifyList(default_mapping, mapping)
+
+
+
   if (verbose) {
     print("Standardizing column names...")
   }
diff --git a/man/standardizeColnames.Rd b/man/standardizeColnames.Rd
index a6e38f3a..118108bf 100644
--- a/man/standardizeColnames.Rd
+++ b/man/standardizeColnames.Rd
@@ -4,12 +4,14 @@
 \alias{standardizeColnames}
 \title{Standardize Column Names in a Dataframe (Internal)}
 \usage{
-standardizeColnames(df, verbose = FALSE)
+standardizeColnames(df, verbose = FALSE, mapping = list())
 }
 \arguments{
 \item{df}{A dataframe whose column names need to be standardized.}
 
 \item{verbose}{A logical indicating whether to print progress messages.}
+
+\item{mapping}{A list of mapping options for customizing the renaming process.}
 }
 \value{
 A dataframe with standardized column names.
@@ -20,4 +22,3 @@ It utilizes regular expressions and the `tolower()` function to match column nam
 against a list of predefined standard names. The approach is case-insensitive and
 allows for flexible matching of column names.
 }
-\keyword{internal}
diff --git a/vignettes/ASOIAF.Rmd b/vignettes/ASOIAF.Rmd
index 00211864..977abea7 100644
--- a/vignettes/ASOIAF.Rmd
+++ b/vignettes/ASOIAF.Rmd
@@ -21,6 +21,8 @@ We begin by loading the required libraries and examining the structure of the bu
 ```{r echo=TRUE, message=FALSE, warning=FALSE}
 library(BGmisc)
 library(tidyverse)
+library(ggpedigree)
+
 data(ASOIAF)
 ```
 
@@ -137,7 +139,7 @@ Many real-world and fictional pedigrees contain individuals with unknown or part
 
 - Create "phantom" placeholders for the missing parent
 
--Optionally repair and harmonize parent fields
+- Optionally repair and harmonize parent fields
 
 To facilitate plotting, we check for individuals with one known parent but a missing other. For those cases, we assign a placeholder ID to the missing parent.
 
@@ -166,4 +168,9 @@ We can now visualize the repaired pedigree using the `plotPedigree()` function.
 
 ```{r, message=FALSE, warning=FALSE}
 plotPedigree(df_repaired, affected = df_repaired$affected, verbose = FALSE)
+
+ggPedigree(df_repaired,  status_col = "affected",  personID_col = "ID",  code_male = "M", 
+           config = list(unaffected =  0,affected = 1,
+                         ped_width=15))
+
 ```

From 1922c8194c8393b482c06cb22965f3d4cd7a1668 Mon Sep 17 00:00:00 2001
From: Mason Garrison <garrissm@wfu.edu>
Date: Sat, 17 May 2025 21:09:07 -0400
Subject: [PATCH 2/2] Update documentData.R

---
 R/documentData.R | 250 +++++++++++++++++++++++------------------------
 1 file changed, 125 insertions(+), 125 deletions(-)

diff --git a/R/documentData.R b/R/documentData.R
index cf589584..013f2c76 100644
--- a/R/documentData.R
+++ b/R/documentData.R
@@ -1,10 +1,10 @@
-##' Artificial pedigree data on eight families with inbreeding
-##'
-##' A dataset created purely from imagination that includes several types of inbreeding.
-##' Different kinds of inbreeding occur in each extended family.
-##'
-##' The types of inbreeding are as follows:
-##'
+#' Artificial pedigree data on eight families with inbreeding
+#'
+#' A dataset created purely from imagination that includes several types of inbreeding.
+#' Different kinds of inbreeding occur in each extended family.
+#'
+#' The types of inbreeding are as follows:
+#'
 #' \itemize{
 #'     \item Extended Family 1: Sister wives - Children with the same father and different mothers who are sisters.
 #'     \item Extended Family 2: Full siblings have children.
@@ -15,131 +15,131 @@
 #'     \item Extended Family 7: Uncle-niece and Aunt-nephew have children.
 #'     \item Extended Family 8: A father-son pairs has children with a corresponding mother-daughter pair.
 #' }
-##'
-##' Although not all of the above structures are technically inbreeding, they aim to test pedigree diagramming and path tracing algorithms.
-##'
-##' The variables are as follows:
-##'
-##' \itemize{
-##'   \item \code{ID}:  Person identification variable
-##'   \item \code{sex}:  Sex of the ID: 1 is female; 0 is male
-##'   \item \code{dadID}:  ID of the father
-##'   \item \code{momID}:  ID of the mother
-##'   \item \code{FamID}:  ID of the extended family
-##'   \item \code{Gen}:  Generation of the person
-##'   \item \code{proband}:  Always FALSE
-##' }
-##'
-##' @docType data
-##' @keywords datasets
-##' @name inbreeding
-##' @usage data(inbreeding)
-##' @format A data frame (and ped object) with 134 rows and 7 variables
+#'
+#' Although not all of the above structures are technically inbreeding, they aim to test pedigree diagramming and path tracing algorithms.
+#'
+#' The variables are as follows:
+#'
+#' \itemize{
+#'   \item \code{ID}:  Person identification variable
+#'   \item \code{sex}:  Sex of the ID: 1 is female; 0 is male
+#'   \item \code{dadID}:  ID of the father
+#'   \item \code{momID}:  ID of the mother
+#'   \item \code{FamID}:  ID of the extended family
+#'   \item \code{Gen}:  Generation of the person
+#'   \item \code{proband}:  Always FALSE
+#' }
+#'
+#' @docType data
+#' @keywords datasets
+#' @name inbreeding
+#' @usage data(inbreeding)
+#' @format A data frame (and ped object) with 134 rows and 7 variables
 NULL
 
-##' Simulated pedigree with two extended families and an age-related hazard
-##'
-##' A dataset simulated to have an age-related hazard.
-##' There are two extended families that are sampled from the same population.
-##'
-##' The variables are as follows:
-##'
-##' \itemize{
-##'   \item \code{FamID}:  ID of the extended family
-##'   \item \code{ID}:  Person identification variable
-##'   \item \code{sex}:  Sex of the ID: 1 is female; 0 is male
-##'   \item \code{dadID}:  ID of the father
-##'   \item \code{momID}:  ID of the mother
-##'   \item \code{affected}:  logical.  Whether the person is affected or not
-##'   \item \code{DA1}:  Binary variable signifying the meaninglessness of life
-##'   \item \code{DA2}:  Binary variable signifying the fundamental unknowability of existence
-##'   \item \code{birthYr}:  Birth year for person
-##'   \item \code{onsetYr}:  Year of onset for person
-##'   \item \code{deathYr}:  Death year for person
-##'   \item \code{available}:  logical.  Whether
-##'   \item \code{Gen}:  Generation of the person
-##'   \item \code{proband}:  logical.  Whether the person is a proband or not
-##' }
-##'
-##' @docType data
-##' @keywords datasets
-##' @name hazard
-##' @usage data(hazard)
-##' @format A data frame with 43 rows and 14 variables
+#' Simulated pedigree with two extended families and an age-related hazard
+#'
+#' A dataset simulated to have an age-related hazard.
+#' There are two extended families that are sampled from the same population.
+#'
+#' The variables are as follows:
+#'
+#' \itemize{
+#'   \item \code{FamID}:  ID of the extended family
+#'   \item \code{ID}:  Person identification variable
+#'   \item \code{sex}:  Sex of the ID: 1 is female; 0 is male
+#'   \item \code{dadID}:  ID of the father
+#'   \item \code{momID}:  ID of the mother
+#'   \item \code{affected}:  logical.  Whether the person is affected or not
+#'   \item \code{DA1}:  Binary variable signifying the meaninglessness of life
+#'   \item \code{DA2}:  Binary variable signifying the fundamental unknowability of existence
+#'   \item \code{birthYr}:  Birth year for person
+#'   \item \code{onsetYr}:  Year of onset for person
+#'   \item \code{deathYr}:  Death year for person
+#'   \item \code{available}:  logical.  Whether
+#'   \item \code{Gen}:  Generation of the person
+#'   \item \code{proband}:  logical.  Whether the person is a proband or not
+#' }
+#'
+#' @docType data
+#' @keywords datasets
+#' @name hazard
+#' @usage data(hazard)
+#' @format A data frame with 43 rows and 14 variables
 NULL
 
-##' Fictional pedigree data on a wizarding family
-##'
-##' A dataset created purely from imagination that includes a subset of the Potter extended family.
-##'
-##' The variables are as follows:
-##'
-##' \itemize{
-##'   \item \code{personID}:  Person identification variable
-##'   \item \code{famID}: Family identification variable
-##'   \item \code{name}:  Name of the person
-##'   \item \code{gen}: Generation of the person
-##'   \item \code{momID}:  ID of the mother
-##'   \item \code{dadID}:  ID of the father
-##'   \item \code{spouseID}: ID of the spouse
-##'   \item \code{sex}:  Sex of the ID: 1 is male; 0 is female
-##'
-##' }
-##'
-##' IDs in the 100s \code{momID}s and \code{dadID}s are for people not in the dataset.
-##'
-##' @docType data
-##' @keywords datasets
-##' @name potter
-##' @usage data(potter)
-##' @format A data frame (and ped object) with 36 rows and 8 variables
+#' Fictional pedigree data on a wizarding family
+#'
+#' A dataset created purely from imagination that includes a subset of the Potter extended family.
+#'
+#' The variables are as follows:
+#'
+#' \itemize{
+#'   \item \code{personID}:  Person identification variable
+#'   \item \code{famID}: Family identification variable
+#'   \item \code{name}:  Name of the person
+#'   \item \code{gen}: Generation of the person
+#'   \item \code{momID}:  ID of the mother
+#'   \item \code{dadID}:  ID of the father
+#'   \item \code{spouseID}: ID of the spouse
+#'   \item \code{sex}:  Sex of the ID: 1 is male; 0 is female
+#'
+#' }
+#'
+#' IDs in the 100s \code{momID}s and \code{dadID}s are for people not in the dataset.
+#'
+#' @docType data
+#' @keywords datasets
+#' @name potter
+#' @usage data(potter)
+#' @format A data frame (and ped object) with 36 rows and 8 variables
 NULL
 
 
-##' Royal pedigree data from 1992
-##'
-##' A dataset created by Denis Reid from the Royal Families of Europe.
-##'
-##' The variables are as follows:
-##' id,momID,dadID,name,sex,birth_date,death_date,attribute_title
-##' \itemize{
-##'   \item \code{id}:  Person identification variable
-##'   \item \code{momID}:  ID of the mother
-##'   \item \code{dadID}:  ID of the father
-##'   \item \code{name}:  Name of the person
-##'   \item \code{sex}: Biological sex
-##'   \item \code{birth_date}:  Date of birth
-##'   \item \code{death_date}:  Date of death
-##'   \item \code{attribute_title}:  Title of the person
-##'
-##' }
-##'
-##'
-##' @docType data
-##' @keywords datasets
-##' @name royal92
-##' @usage data(royal92)
-##' @format A data frame with 3110 observations
+#' Royal pedigree data from 1992
+#'
+#' A dataset created by Denis Reid from the Royal Families of Europe.
+#'
+#' The variables are as follows:
+#' id,momID,dadID,name,sex,birth_date,death_date,attribute_title
+#' \itemize{
+#'   \item \code{id}:  Person identification variable
+#'   \item \code{momID}:  ID of the mother
+#'   \item \code{dadID}:  ID of the father
+#'   \item \code{name}:  Name of the person
+#'   \item \code{sex}: Biological sex
+#'   \item \code{birth_date}:  Date of birth
+#'   \item \code{death_date}:  Date of death
+#'   \item \code{attribute_title}:  Title of the person
+#'
+#' }
+#'
+#'
+#' @docType data
+#' @keywords datasets
+#' @name royal92
+#' @usage data(royal92)
+#' @format A data frame with 3110 observations
 NULL
 
-##' A song of ice and fire pedigree data
-##'
-##' A dataset created from the Song of Ice and Fire series by George R. R. Martin. Core data is from the [Westeros.org forum](https://asoiaf.westeros.org/index.php?/topic/88863-all-the-family-trees/).
-##'
-##'
-##'
-##' The variables are as follows:
-##' \itemize{
-##'   \item \code{id}:  Person identification variable
-##'   \item \code{momID}:  ID of the mother
-##'   \item \code{dadID}:  ID of the father
-##'   \item \code{name}:  Name of the person
-##'   \item \code{sex}: Biological sex
-##'   }
-##'
-##' @docType data
-##' @keywords datasets
-##' @name ASOIAF
-##' @usage data(ASOIAF)
-##' @format A data frame with 501 observations
+#' A song of ice and fire pedigree data
+#'
+#' A dataset created from the Song of Ice and Fire series by George R. R. Martin. Core data is from the [Westeros.org forum](https://asoiaf.westeros.org/index.php?/topic/88863-all-the-family-trees/).
+#'
+#'
+#'
+#' The variables are as follows:
+#' \itemize{
+#'   \item \code{id}:  Person identification variable
+#'   \item \code{momID}:  ID of the mother
+#'   \item \code{dadID}:  ID of the father
+#'   \item \code{name}:  Name of the person
+#'   \item \code{sex}: Biological sex
+#'   }
+#'
+#' @docType data
+#' @keywords datasets
+#' @name ASOIAF
+#' @usage data(ASOIAF)
+#' @format A data frame with 501 observations
 NULL