From c05ce0a1aefa0c58c52b6d9982e7aaa0c6d1f6ac Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Tue, 14 Oct 2025 15:17:23 +0000 Subject: [PATCH 1/4] Initial plan From 4232e8c6b5a6afb6e202aaadee9e3dece3a87517 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Tue, 14 Oct 2025 15:23:11 +0000 Subject: [PATCH 2/4] Add describeRelatedness function with comprehensive tests and documentation Co-authored-by: smasongarrison <6001608+smasongarrison@users.noreply.github.com> --- NAMESPACE | 1 + R/describeRelatedness.R | 417 ++++++++++++++++++++++ man/describeRelatedness.Rd | 107 ++++++ tests/testthat/test-describeRelatedness.R | 387 ++++++++++++++++++++ 4 files changed, 912 insertions(+) create mode 100644 R/describeRelatedness.R create mode 100644 man/describeRelatedness.Rd create mode 100644 tests/testthat/test-describeRelatedness.R diff --git a/NAMESPACE b/NAMESPACE index e5cbba65..1ed0ce21 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -15,6 +15,7 @@ export(com2links) export(comp2vech) export(computeParentAdjacency) export(createGenDataFrame) +export(describeRelatedness) export(determineSex) export(dropLink) export(evenInsert) diff --git a/R/describeRelatedness.R b/R/describeRelatedness.R new file mode 100644 index 00000000..be294c58 --- /dev/null +++ b/R/describeRelatedness.R @@ -0,0 +1,417 @@ +#' Describe Genetic Relatedness with Textual Labels +#' +#' @description +#' This function takes a flat/vectorized relatedness matrix and generates textual +#' descriptions of the genetic relationships based on additive relatedness coefficients +#' and generation differences. +#' +#' @param rel_df A data.frame with columns for pairwise relatedness information. +#' Required columns: ID1, ID2, addRel (additive relatedness coefficient). +#' Optional columns: gen1, gen2 (generation numbers), sex1, sex2 (biological sex). +#' @param add_col Character. Name of the column containing additive relatedness coefficients. +#' Default is "addRel". +#' @param gen1_col Character. Name of the column containing generation for person 1. +#' Default is "gen1". +#' @param gen2_col Character. Name of the column containing generation for person 2. +#' Default is "gen2". +#' @param sex1_col Character. Name of the column containing sex for person 1. +#' Default is "sex1". +#' @param sex2_col Character. Name of the column containing sex for person 2. +#' Default is "sex2". +#' @param code_male The value used to denote males. Default is 1. +#' @param code_female The value used to denote females. Default is 0. +#' @param use_sex Logical. If TRUE and sex columns are available, generate sex-specific +#' relationship labels (e.g., "mother-child" vs "father-child"). Default is FALSE. +#' @param return_list Logical. If TRUE, return a list with both the original data and +#' the relationship descriptions. If FALSE, return a data.frame with an added +#' "relationship" column. Default is FALSE. +#' +#' @details +#' The function uses the additive relatedness coefficient (`addRel`) and generation +#' difference to infer relationship types. Common relationships and their expected +#' values are: +#' \itemize{ +#' \item addRel = 1.0, gen1 == gen2: "self" (same individual) +#' \item addRel = 0.5, gen1 == gen2: "full siblings" +#' \item addRel = 0.5, |gen1 - gen2| == 1: "parent-child" +#' \item addRel = 0.25, gen1 == gen2: "half siblings" +#' \item addRel = 0.25, |gen1 - gen2| == 1: "aunt/uncle-niece/nephew" +#' \item addRel = 0.25, |gen1 - gen2| == 2: "grandparent-grandchild" +#' \item addRel = 0.125, gen1 == gen2: "first cousins" +#' \item addRel = 0.0625, gen1 == gen2: "second cousins" +#' \item And relationships with various degrees of removal for cousins +#' } +#' +#' When `use_sex = TRUE` and sex information is available, parent-child relationships +#' can be further specified as "mother-child", "father-child", "mother-son", +#' "mother-daughter", "father-son", or "father-daughter". +#' +#' @return +#' If `return_list = FALSE` (default), returns a data.frame identical to `rel_df` +#' with an additional "relationship" column containing textual descriptions. +#' If `return_list = TRUE`, returns a list with two elements: +#' \itemize{ +#' \item `data`: The original data.frame with the relationship column added +#' \item `relationships`: A character vector of relationship descriptions +#' } +#' +#' @examples +#' \dontrun{ +#' # Create example relatedness data +#' rel_data <- data.frame( +#' ID1 = c(1, 1, 1, 2), +#' ID2 = c(2, 3, 4, 3), +#' addRel = c(0.5, 0.25, 0.125, 0.5), +#' gen1 = c(1, 1, 1, 2), +#' gen2 = c(2, 3, 1, 2) +#' ) +#' +#' # Get relationship descriptions +#' described <- describeRelatedness(rel_data) +#' print(described) +#' +#' # With sex-specific labels +#' rel_data$sex1 <- c(0, 0, 1, 1) +#' rel_data$sex2 <- c(1, 0, 0, 1) +#' described_sex <- describeRelatedness(rel_data, use_sex = TRUE) +#' print(described_sex) +#' } +#' +#' @export +describeRelatedness <- function(rel_df, + add_col = "addRel", + gen1_col = "gen1", + gen2_col = "gen2", + sex1_col = "sex1", + sex2_col = "sex2", + code_male = 1, + code_female = 0, + use_sex = FALSE, + return_list = FALSE) { + # Validate input + if (!is.data.frame(rel_df)) { + stop("rel_df must be a data.frame") + } + + if (!"ID1" %in% names(rel_df) || !"ID2" %in% names(rel_df)) { + stop("rel_df must contain 'ID1' and 'ID2' columns") + } + + if (!add_col %in% names(rel_df)) { + stop(paste0("Column '", add_col, "' not found in rel_df")) + } + + # Check for generation columns + has_gen <- gen1_col %in% names(rel_df) && gen2_col %in% names(rel_df) + + # Check for sex columns + has_sex <- sex1_col %in% names(rel_df) && sex2_col %in% names(rel_df) + + if (use_sex && !has_sex) { + warning("use_sex is TRUE but sex columns are not available. Proceeding without sex-specific labels.") + use_sex <- FALSE + } + + # Extract relatedness coefficients + addRel <- rel_df[[add_col]] + + # Extract generation differences if available + if (has_gen) { + gen1 <- rel_df[[gen1_col]] + gen2 <- rel_df[[gen2_col]] + gen_diff <- gen2 - gen1 # Positive means person 2 is in a younger generation + gen_abs_diff <- abs(gen_diff) + } else { + gen1 <- NULL + gen2 <- NULL + gen_diff <- NULL + gen_abs_diff <- NULL + } + + # Extract sex if needed + if (use_sex && has_sex) { + sex1 <- rel_df[[sex1_col]] + sex2 <- rel_df[[sex2_col]] + } else { + sex1 <- NULL + sex2 <- NULL + } + + # Initialize relationship vector + n <- nrow(rel_df) + relationships <- character(n) + + # Classify each relationship + for (i in 1:n) { + r <- addRel[i] + relationships[i] <- classify_relationship( + r = r, + gen_diff = if (has_gen) gen_diff[i] else NA, + gen_abs_diff = if (has_gen) gen_abs_diff[i] else NA, + sex1 = if (use_sex) sex1[i] else NA, + sex2 = if (use_sex) sex2[i] else NA, + code_male = code_male, + code_female = code_female, + use_sex = use_sex + ) + } + + # Prepare output + result_df <- rel_df + result_df$relationship <- relationships + + if (return_list) { + return(list( + data = result_df, + relationships = relationships + )) + } else { + return(result_df) + } +} + + +#' Classify a single relationship based on relatedness and generation +#' +#' @param r Numeric. Additive relatedness coefficient +#' @param gen_diff Numeric. Generation difference (gen2 - gen1) +#' @param gen_abs_diff Numeric. Absolute generation difference +#' @param sex1 Sex of person 1 +#' @param sex2 Sex of person 2 +#' @param code_male Code for male +#' @param code_female Code for female +#' @param use_sex Logical. Whether to use sex-specific labels +#' +#' @return Character string describing the relationship +#' @keywords internal +classify_relationship <- function(r, gen_diff, gen_abs_diff, sex1, sex2, + code_male, code_female, use_sex) { + # Tolerance for floating point comparison + tol <- 0.001 + + # Handle self (same person or identical twins) + if (abs(r - 1.0) < tol) { + if (!is.na(gen_abs_diff) && gen_abs_diff == 0) { + return("self/identical twin") + } + return("self") + } + + # Handle unrelated (r = 0) + if (abs(r) < tol) { + return("unrelated") + } + + # For relationships with generation information + if (!is.na(gen_abs_diff) && !is.na(gen_diff)) { + # Same generation relationships + if (gen_abs_diff == 0) { + if (abs(r - 0.5) < tol) { + return("full siblings") + } else if (abs(r - 0.25) < tol) { + return("half siblings") + } else if (abs(r - 0.125) < tol) { + return("first cousins") + } else if (abs(r - 0.0625) < tol) { + return("second cousins") + } else if (abs(r - 0.03125) < tol) { + return("third cousins") + } + } + + # One generation apart + if (gen_abs_diff == 1) { + if (abs(r - 0.5) < tol) { + # Parent-child relationship + if (use_sex && !is.na(sex1) && !is.na(sex2)) { + return(get_parent_child_label(gen_diff, sex1, sex2, code_male, code_female)) + } + return("parent-child") + } else if (abs(r - 0.25) < tol) { + # Aunt/uncle - niece/nephew + if (use_sex && !is.na(sex1) && !is.na(sex2)) { + return(get_avuncular_label(gen_diff, sex1, sex2, code_male, code_female)) + } + return("aunt/uncle-niece/nephew") + } else if (abs(r - 0.125) < tol) { + return("first cousins once removed") + } else if (abs(r - 0.0625) < tol) { + return("second cousins once removed") + } + } + + # Two generations apart + if (gen_abs_diff == 2) { + if (abs(r - 0.25) < tol) { + if (use_sex && !is.na(sex1) && !is.na(sex2)) { + return(get_grandparent_label(gen_diff, sex1, sex2, code_male, code_female)) + } + return("grandparent-grandchild") + } else if (abs(r - 0.125) < tol) { + return("first cousins twice removed") + } else if (abs(r - 0.0625) < tol) { + return("second cousins twice removed") + } + } + + # Three generations apart + if (gen_abs_diff == 3) { + if (abs(r - 0.125) < tol) { + return("great-grandparent-great-grandchild") + } + } + + # Four generations apart + if (gen_abs_diff == 4) { + if (abs(r - 0.0625) < tol) { + return("great-great-grandparent-great-great-grandchild") + } + } + } + + # Fallback: describe by coefficient only if we can't determine specific relationship + if (abs(r - 0.5) < tol) { + return("unknown (r=0.5)") + } else if (abs(r - 0.25) < tol) { + return("unknown (r=0.25)") + } else if (abs(r - 0.125) < tol) { + return("unknown (r=0.125)") + } else if (abs(r - 0.0625) < tol) { + return("unknown (r=0.0625)") + } else { + return(paste0("unknown (r=", round(r, 4), ")")) + } +} + + +#' Get sex-specific parent-child relationship label +#' +#' @param gen_diff Generation difference +#' @param sex1 Sex of person 1 +#' @param sex2 Sex of person 2 +#' @param code_male Code for male +#' @param code_female Code for female +#' +#' @return Character string +#' @keywords internal +get_parent_child_label <- function(gen_diff, sex1, sex2, code_male, code_female) { + # gen_diff > 0 means person 1 is parent (older generation) + # gen_diff < 0 means person 2 is parent (older generation) + + if (gen_diff > 0) { + # Person 1 is the parent + if (sex1 == code_female && sex2 == code_male) { + return("mother-son") + } else if (sex1 == code_female && sex2 == code_female) { + return("mother-daughter") + } else if (sex1 == code_male && sex2 == code_male) { + return("father-son") + } else if (sex1 == code_male && sex2 == code_female) { + return("father-daughter") + } else if (sex1 == code_female) { + return("mother-child") + } else if (sex1 == code_male) { + return("father-child") + } + } else { + # Person 2 is the parent + if (sex2 == code_female && sex1 == code_male) { + return("son-mother") + } else if (sex2 == code_female && sex1 == code_female) { + return("daughter-mother") + } else if (sex2 == code_male && sex1 == code_male) { + return("son-father") + } else if (sex2 == code_male && sex1 == code_female) { + return("daughter-father") + } else if (sex2 == code_female) { + return("child-mother") + } else if (sex2 == code_male) { + return("child-father") + } + } + return("parent-child") +} + + +#' Get sex-specific avuncular relationship label +#' +#' @param gen_diff Generation difference +#' @param sex1 Sex of person 1 +#' @param sex2 Sex of person 2 +#' @param code_male Code for male +#' @param code_female Code for female +#' +#' @return Character string +#' @keywords internal +get_avuncular_label <- function(gen_diff, sex1, sex2, code_male, code_female) { + if (gen_diff > 0) { + # Person 1 is the aunt/uncle + if (sex1 == code_female && sex2 == code_male) { + return("aunt-nephew") + } else if (sex1 == code_female && sex2 == code_female) { + return("aunt-niece") + } else if (sex1 == code_male && sex2 == code_male) { + return("uncle-nephew") + } else if (sex1 == code_male && sex2 == code_female) { + return("uncle-niece") + } + } else { + # Person 2 is the aunt/uncle + if (sex2 == code_female && sex1 == code_male) { + return("nephew-aunt") + } else if (sex2 == code_female && sex1 == code_female) { + return("niece-aunt") + } else if (sex2 == code_male && sex1 == code_male) { + return("nephew-uncle") + } else if (sex2 == code_male && sex1 == code_female) { + return("niece-uncle") + } + } + return("aunt/uncle-niece/nephew") +} + + +#' Get sex-specific grandparent relationship label +#' +#' @param gen_diff Generation difference +#' @param sex1 Sex of person 1 +#' @param sex2 Sex of person 2 +#' @param code_male Code for male +#' @param code_female Code for female +#' +#' @return Character string +#' @keywords internal +get_grandparent_label <- function(gen_diff, sex1, sex2, code_male, code_female) { + if (gen_diff > 0) { + # Person 1 is the grandparent + if (sex1 == code_female && sex2 == code_male) { + return("grandmother-grandson") + } else if (sex1 == code_female && sex2 == code_female) { + return("grandmother-granddaughter") + } else if (sex1 == code_male && sex2 == code_male) { + return("grandfather-grandson") + } else if (sex1 == code_male && sex2 == code_female) { + return("grandfather-granddaughter") + } else if (sex1 == code_female) { + return("grandmother-grandchild") + } else if (sex1 == code_male) { + return("grandfather-grandchild") + } + } else { + # Person 2 is the grandparent + if (sex2 == code_female && sex1 == code_male) { + return("grandson-grandmother") + } else if (sex2 == code_female && sex1 == code_female) { + return("granddaughter-grandmother") + } else if (sex2 == code_male && sex1 == code_male) { + return("grandson-grandfather") + } else if (sex2 == code_male && sex1 == code_female) { + return("granddaughter-grandfather") + } else if (sex2 == code_female) { + return("grandchild-grandmother") + } else if (sex2 == code_male) { + return("grandchild-grandfather") + } + } + return("grandparent-grandchild") +} diff --git a/man/describeRelatedness.Rd b/man/describeRelatedness.Rd new file mode 100644 index 00000000..38a8f43c --- /dev/null +++ b/man/describeRelatedness.Rd @@ -0,0 +1,107 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/describeRelatedness.R +\name{describeRelatedness} +\alias{describeRelatedness} +\title{Describe Genetic Relatedness with Textual Labels} +\usage{ +describeRelatedness( + rel_df, + add_col = "addRel", + gen1_col = "gen1", + gen2_col = "gen2", + sex1_col = "sex1", + sex2_col = "sex2", + code_male = 1, + code_female = 0, + use_sex = FALSE, + return_list = FALSE +) +} +\arguments{ +\item{rel_df}{A data.frame with columns for pairwise relatedness information. +Required columns: ID1, ID2, addRel (additive relatedness coefficient). +Optional columns: gen1, gen2 (generation numbers), sex1, sex2 (biological sex).} + +\item{add_col}{Character. Name of the column containing additive relatedness coefficients. +Default is "addRel".} + +\item{gen1_col}{Character. Name of the column containing generation for person 1. +Default is "gen1".} + +\item{gen2_col}{Character. Name of the column containing generation for person 2. +Default is "gen2".} + +\item{sex1_col}{Character. Name of the column containing sex for person 1. +Default is "sex1".} + +\item{sex2_col}{Character. Name of the column containing sex for person 2. +Default is "sex2".} + +\item{code_male}{The value used to denote males. Default is 1.} + +\item{code_female}{The value used to denote females. Default is 0.} + +\item{use_sex}{Logical. If TRUE and sex columns are available, generate sex-specific +relationship labels (e.g., "mother-child" vs "father-child"). Default is FALSE.} + +\item{return_list}{Logical. If TRUE, return a list with both the original data and +the relationship descriptions. If FALSE, return a data.frame with an added +"relationship" column. Default is FALSE.} +} +\value{ +If \code{return_list = FALSE} (default), returns a data.frame identical to \code{rel_df} +with an additional "relationship" column containing textual descriptions. +If \code{return_list = TRUE}, returns a list with two elements: +\itemize{ +\item \code{data}: The original data.frame with the relationship column added +\item \code{relationships}: A character vector of relationship descriptions +} +} +\description{ +This function takes a flat/vectorized relatedness matrix and generates textual +descriptions of the genetic relationships based on additive relatedness coefficients +and generation differences. +} +\details{ +The function uses the additive relatedness coefficient (\code{addRel}) and generation +difference to infer relationship types. Common relationships and their expected +values are: +\itemize{ +\item addRel = 1.0, gen1 == gen2: "self" (same individual) +\item addRel = 0.5, gen1 == gen2: "full siblings" +\item addRel = 0.5, |gen1 - gen2| == 1: "parent-child" +\item addRel = 0.25, gen1 == gen2: "half siblings" +\item addRel = 0.25, |gen1 - gen2| == 1: "aunt/uncle-niece/nephew" +\item addRel = 0.25, |gen1 - gen2| == 2: "grandparent-grandchild" +\item addRel = 0.125, gen1 == gen2: "first cousins" +\item addRel = 0.0625, gen1 == gen2: "second cousins" +\item And relationships with various degrees of removal for cousins +} + +When \code{use_sex = TRUE} and sex information is available, parent-child relationships +can be further specified as "mother-child", "father-child", "mother-son", +"mother-daughter", "father-son", or "father-daughter". +} +\examples{ +\dontrun{ +# Create example relatedness data +rel_data <- data.frame( + ID1 = c(1, 1, 1, 2), + ID2 = c(2, 3, 4, 3), + addRel = c(0.5, 0.25, 0.125, 0.5), + gen1 = c(1, 1, 1, 2), + gen2 = c(2, 3, 1, 2) +) + +# Get relationship descriptions +described <- describeRelatedness(rel_data) +print(described) + +# With sex-specific labels +rel_data$sex1 <- c(0, 0, 1, 1) +rel_data$sex2 <- c(1, 0, 0, 1) +described_sex <- describeRelatedness(rel_data, use_sex = TRUE) +print(described_sex) +} + +} diff --git a/tests/testthat/test-describeRelatedness.R b/tests/testthat/test-describeRelatedness.R new file mode 100644 index 00000000..5f5d66db --- /dev/null +++ b/tests/testthat/test-describeRelatedness.R @@ -0,0 +1,387 @@ +# Tests for describeRelatedness function + +test_that("describeRelatedness handles basic relationships without generation info", { + rel_data <- data.frame( + ID1 = c(1, 2, 3), + ID2 = c(2, 3, 4), + addRel = c(0.5, 0.25, 0.0) + ) + + result <- describeRelatedness(rel_data) + + expect_true("relationship" %in% names(result)) + expect_equal(nrow(result), 3) + expect_equal(result$relationship[1], "unknown (r=0.5)") + expect_equal(result$relationship[2], "unknown (r=0.25)") + expect_equal(result$relationship[3], "unrelated") +}) + +test_that("describeRelatedness identifies full siblings correctly", { + rel_data <- data.frame( + ID1 = c(1), + ID2 = c(2), + addRel = c(0.5), + gen1 = c(1), + gen2 = c(1) + ) + + result <- describeRelatedness(rel_data) + expect_equal(result$relationship[1], "full siblings") +}) + +test_that("describeRelatedness identifies half siblings correctly", { + rel_data <- data.frame( + ID1 = c(1), + ID2 = c(2), + addRel = c(0.25), + gen1 = c(1), + gen2 = c(1) + ) + + result <- describeRelatedness(rel_data) + expect_equal(result$relationship[1], "half siblings") +}) + +test_that("describeRelatedness identifies parent-child relationships", { + rel_data <- data.frame( + ID1 = c(1, 2), + ID2 = c(2, 1), + addRel = c(0.5, 0.5), + gen1 = c(1, 2), + gen2 = c(2, 1) + ) + + result <- describeRelatedness(rel_data) + expect_equal(result$relationship[1], "parent-child") + expect_equal(result$relationship[2], "parent-child") +}) + +test_that("describeRelatedness identifies grandparent-grandchild relationships", { + rel_data <- data.frame( + ID1 = c(1, 3), + ID2 = c(3, 1), + addRel = c(0.25, 0.25), + gen1 = c(1, 3), + gen2 = c(3, 1) + ) + + result <- describeRelatedness(rel_data) + expect_equal(result$relationship[1], "grandparent-grandchild") + expect_equal(result$relationship[2], "grandparent-grandchild") +}) + +test_that("describeRelatedness identifies aunt/uncle-niece/nephew relationships", { + rel_data <- data.frame( + ID1 = c(1), + ID2 = c(2), + addRel = c(0.25), + gen1 = c(1), + gen2 = c(2) + ) + + result <- describeRelatedness(rel_data) + expect_equal(result$relationship[1], "aunt/uncle-niece/nephew") +}) + +test_that("describeRelatedness identifies first cousins", { + rel_data <- data.frame( + ID1 = c(1), + ID2 = c(2), + addRel = c(0.125), + gen1 = c(2), + gen2 = c(2) + ) + + result <- describeRelatedness(rel_data) + expect_equal(result$relationship[1], "first cousins") +}) + +test_that("describeRelatedness identifies second cousins", { + rel_data <- data.frame( + ID1 = c(1), + ID2 = c(2), + addRel = c(0.0625), + gen1 = c(3), + gen2 = c(3) + ) + + result <- describeRelatedness(rel_data) + expect_equal(result$relationship[1], "second cousins") +}) + +test_that("describeRelatedness identifies third cousins", { + rel_data <- data.frame( + ID1 = c(1), + ID2 = c(2), + addRel = c(0.03125), + gen1 = c(4), + gen2 = c(4) + ) + + result <- describeRelatedness(rel_data) + expect_equal(result$relationship[1], "third cousins") +}) + +test_that("describeRelatedness identifies cousins once removed", { + rel_data <- data.frame( + ID1 = c(1, 2), + ID2 = c(2, 3), + addRel = c(0.125, 0.0625), + gen1 = c(2, 3), + gen2 = c(3, 4) + ) + + result <- describeRelatedness(rel_data) + expect_equal(result$relationship[1], "first cousins once removed") + expect_equal(result$relationship[2], "second cousins once removed") +}) + +test_that("describeRelatedness identifies cousins twice removed", { + rel_data <- data.frame( + ID1 = c(1, 2), + ID2 = c(2, 3), + addRel = c(0.125, 0.0625), + gen1 = c(2, 3), + gen2 = c(4, 5) + ) + + result <- describeRelatedness(rel_data) + expect_equal(result$relationship[1], "first cousins twice removed") + expect_equal(result$relationship[2], "second cousins twice removed") +}) + +test_that("describeRelatedness identifies self", { + rel_data <- data.frame( + ID1 = c(1, 2), + ID2 = c(1, 2), + addRel = c(1.0, 1.0), + gen1 = c(1, 2), + gen2 = c(1, 2) + ) + + result <- describeRelatedness(rel_data) + expect_equal(result$relationship[1], "self/identical twin") + expect_equal(result$relationship[2], "self/identical twin") +}) + +test_that("describeRelatedness handles sex-specific parent-child labels", { + rel_data <- data.frame( + ID1 = c(1, 2, 3, 4), + ID2 = c(5, 6, 7, 8), + addRel = c(0.5, 0.5, 0.5, 0.5), + gen1 = c(1, 1, 1, 1), + gen2 = c(2, 2, 2, 2), + sex1 = c(0, 0, 1, 1), # 0 = female, 1 = male + sex2 = c(0, 1, 0, 1) + ) + + result <- describeRelatedness(rel_data, use_sex = TRUE) + expect_equal(result$relationship[1], "mother-daughter") + expect_equal(result$relationship[2], "mother-son") + expect_equal(result$relationship[3], "father-daughter") + expect_equal(result$relationship[4], "father-son") +}) + +test_that("describeRelatedness handles sex-specific child-parent labels", { + rel_data <- data.frame( + ID1 = c(1, 2, 3, 4), + ID2 = c(5, 6, 7, 8), + addRel = c(0.5, 0.5, 0.5, 0.5), + gen1 = c(2, 2, 2, 2), + gen2 = c(1, 1, 1, 1), + sex1 = c(0, 1, 0, 1), # 0 = female, 1 = male + sex2 = c(0, 0, 1, 1) + ) + + result <- describeRelatedness(rel_data, use_sex = TRUE) + expect_equal(result$relationship[1], "daughter-mother") + expect_equal(result$relationship[2], "son-mother") + expect_equal(result$relationship[3], "daughter-father") + expect_equal(result$relationship[4], "son-father") +}) + +test_that("describeRelatedness handles sex-specific aunt/uncle labels", { + rel_data <- data.frame( + ID1 = c(1, 2, 3, 4), + ID2 = c(5, 6, 7, 8), + addRel = c(0.25, 0.25, 0.25, 0.25), + gen1 = c(1, 1, 1, 1), + gen2 = c(2, 2, 2, 2), + sex1 = c(0, 0, 1, 1), # 0 = female, 1 = male + sex2 = c(0, 1, 0, 1) + ) + + result <- describeRelatedness(rel_data, use_sex = TRUE) + expect_equal(result$relationship[1], "aunt-niece") + expect_equal(result$relationship[2], "aunt-nephew") + expect_equal(result$relationship[3], "uncle-niece") + expect_equal(result$relationship[4], "uncle-nephew") +}) + +test_that("describeRelatedness handles sex-specific grandparent labels", { + rel_data <- data.frame( + ID1 = c(1, 2, 3, 4), + ID2 = c(5, 6, 7, 8), + addRel = c(0.25, 0.25, 0.25, 0.25), + gen1 = c(1, 1, 1, 1), + gen2 = c(3, 3, 3, 3), + sex1 = c(0, 0, 1, 1), # 0 = female, 1 = male + sex2 = c(0, 1, 0, 1) + ) + + result <- describeRelatedness(rel_data, use_sex = TRUE) + expect_equal(result$relationship[1], "grandmother-granddaughter") + expect_equal(result$relationship[2], "grandmother-grandson") + expect_equal(result$relationship[3], "grandfather-granddaughter") + expect_equal(result$relationship[4], "grandfather-grandson") +}) + +test_that("describeRelatedness custom column names work", { + rel_data <- data.frame( + ID1 = c(1), + ID2 = c(2), + my_addRel = c(0.5), + my_gen1 = c(1), + my_gen2 = c(1) + ) + + result <- describeRelatedness( + rel_data, + add_col = "my_addRel", + gen1_col = "my_gen1", + gen2_col = "my_gen2" + ) + expect_equal(result$relationship[1], "full siblings") +}) + +test_that("describeRelatedness return_list option works", { + rel_data <- data.frame( + ID1 = c(1), + ID2 = c(2), + addRel = c(0.5), + gen1 = c(1), + gen2 = c(1) + ) + + result <- describeRelatedness(rel_data, return_list = TRUE) + expect_true(is.list(result)) + expect_true("data" %in% names(result)) + expect_true("relationships" %in% names(result)) + expect_equal(result$relationships[1], "full siblings") +}) + +test_that("describeRelatedness validates input correctly", { + # Not a data.frame + expect_error( + describeRelatedness(c(1, 2, 3)), + "rel_df must be a data.frame" + ) + + # Missing ID columns + expect_error( + describeRelatedness(data.frame(a = 1, b = 2)), + "rel_df must contain 'ID1' and 'ID2' columns" + ) + + # Missing addRel column + expect_error( + describeRelatedness(data.frame(ID1 = 1, ID2 = 2)), + "Column 'addRel' not found in rel_df" + ) +}) + +test_that("describeRelatedness handles use_sex warning when sex columns missing", { + rel_data <- data.frame( + ID1 = c(1), + ID2 = c(2), + addRel = c(0.5), + gen1 = c(1), + gen2 = c(2) + ) + + expect_warning( + result <- describeRelatedness(rel_data, use_sex = TRUE), + "use_sex is TRUE but sex columns are not available" + ) + expect_equal(result$relationship[1], "parent-child") +}) + +test_that("describeRelatedness handles custom sex codes", { + rel_data <- data.frame( + ID1 = c(1, 2), + ID2 = c(3, 4), + addRel = c(0.5, 0.5), + gen1 = c(1, 1), + gen2 = c(2, 2), + sex1 = c("F", "M"), + sex2 = c("M", "F") + ) + + result <- describeRelatedness( + rel_data, + use_sex = TRUE, + code_male = "M", + code_female = "F" + ) + expect_equal(result$relationship[1], "mother-son") + expect_equal(result$relationship[2], "father-daughter") +}) + +test_that("describeRelatedness handles great-grandparent relationships", { + rel_data <- data.frame( + ID1 = c(1), + ID2 = c(2), + addRel = c(0.125), + gen1 = c(1), + gen2 = c(4) + ) + + result <- describeRelatedness(rel_data) + expect_equal(result$relationship[1], "great-grandparent-great-grandchild") +}) + +test_that("describeRelatedness handles great-great-grandparent relationships", { + rel_data <- data.frame( + ID1 = c(1), + ID2 = c(2), + addRel = c(0.0625), + gen1 = c(1), + gen2 = c(5) + ) + + result <- describeRelatedness(rel_data) + expect_equal(result$relationship[1], "great-great-grandparent-great-great-grandchild") +}) + +test_that("describeRelatedness handles multiple relationships in one call", { + rel_data <- data.frame( + ID1 = c(1, 1, 1, 2, 2, 3), + ID2 = c(2, 3, 4, 3, 4, 4), + addRel = c(0.5, 0.25, 0.125, 0.5, 0.25, 0.5), + gen1 = c(1, 1, 1, 1, 1, 2), + gen2 = c(1, 2, 2, 1, 3, 2) + ) + + result <- describeRelatedness(rel_data) + expect_equal(nrow(result), 6) + expect_equal(result$relationship[1], "full siblings") + expect_equal(result$relationship[2], "aunt/uncle-niece/nephew") + expect_equal(result$relationship[3], "first cousins once removed") + expect_equal(result$relationship[4], "full siblings") + expect_equal(result$relationship[5], "grandparent-grandchild") + expect_equal(result$relationship[6], "parent-child") +}) + +test_that("describeRelatedness handles unknown relationships gracefully", { + rel_data <- data.frame( + ID1 = c(1, 2), + ID2 = c(2, 3), + addRel = c(0.15, 0.333), + gen1 = c(1, 1), + gen2 = c(1, 1) + ) + + result <- describeRelatedness(rel_data) + expect_true(grepl("unknown", result$relationship[1])) + expect_true(grepl("unknown", result$relationship[2])) +}) From 74bd8270fefac33f01c0df2d8598cdc56428b641 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Tue, 14 Oct 2025 15:38:14 +0000 Subject: [PATCH 3/4] Revise describeRelatedness to align with BGmisc data structures and workflows Co-authored-by: smasongarrison <6001608+smasongarrison@users.noreply.github.com> --- R/describeRelatedness.R | 195 +++++++++++++------- man/describeRelatedness.Rd | 110 ++++++------ tests/testthat/test-describeRelatedness.R | 205 +++++++++++++--------- 3 files changed, 313 insertions(+), 197 deletions(-) diff --git a/R/describeRelatedness.R b/R/describeRelatedness.R index be294c58..7354a0f1 100644 --- a/R/describeRelatedness.R +++ b/R/describeRelatedness.R @@ -1,89 +1,99 @@ #' Describe Genetic Relatedness with Textual Labels #' #' @description -#' This function takes a flat/vectorized relatedness matrix and generates textual -#' descriptions of the genetic relationships based on additive relatedness coefficients -#' and generation differences. +#' This function takes a flat/vectorized relatedness matrix (as produced by +#' \code{\link{com2links}}) and generates textual descriptions of the genetic +#' relationships based on additive relatedness coefficients and generation differences. #' #' @param rel_df A data.frame with columns for pairwise relatedness information. -#' Required columns: ID1, ID2, addRel (additive relatedness coefficient). -#' Optional columns: gen1, gen2 (generation numbers), sex1, sex2 (biological sex). +#' Required columns: ID1, ID2 (or custom names via \code{ID1_col}/\code{ID2_col}). +#' Typically produced by \code{\link{com2links}}. +#' @param ped Optional data.frame containing pedigree information. If provided, sex +#' and generation information will be automatically joined. Should contain columns +#' specified by \code{personID}, \code{sex_col}, and optionally \code{gen_col}. +#' @param personID Character. Name of the column in \code{ped} containing person IDs. +#' Default is "personID". +#' @param sex_col Character. Name of the column in \code{ped} containing sex information. +#' Default is "sex". +#' @param gen_col Character. Name of the column in \code{ped} containing generation numbers. +#' If NULL (default), relationships will be described without generation information. +#' @param ID1_col Character. Name of the column in \code{rel_df} for person 1 ID. +#' Default is "ID1". +#' @param ID2_col Character. Name of the column in \code{rel_df} for person 2 ID. +#' Default is "ID2". #' @param add_col Character. Name of the column containing additive relatedness coefficients. #' Default is "addRel". -#' @param gen1_col Character. Name of the column containing generation for person 1. -#' Default is "gen1". -#' @param gen2_col Character. Name of the column containing generation for person 2. -#' Default is "gen2". -#' @param sex1_col Character. Name of the column containing sex for person 1. -#' Default is "sex1". -#' @param sex2_col Character. Name of the column containing sex for person 2. -#' Default is "sex2". -#' @param code_male The value used to denote males. Default is 1. -#' @param code_female The value used to denote females. Default is 0. -#' @param use_sex Logical. If TRUE and sex columns are available, generate sex-specific +#' @param code_male The value used to denote males in the sex column. Default is 1. +#' @param code_female The value used to denote females in the sex column. Default is 0. +#' @param use_sex Logical. If TRUE and sex information is available, generate sex-specific #' relationship labels (e.g., "mother-child" vs "father-child"). Default is FALSE. #' @param return_list Logical. If TRUE, return a list with both the original data and #' the relationship descriptions. If FALSE, return a data.frame with an added #' "relationship" column. Default is FALSE. #' #' @details -#' The function uses the additive relatedness coefficient (`addRel`) and generation +#' The function uses the additive relatedness coefficient (\code{addRel}) and generation #' difference to infer relationship types. Common relationships and their expected #' values are: #' \itemize{ -#' \item addRel = 1.0, gen1 == gen2: "self" (same individual) -#' \item addRel = 0.5, gen1 == gen2: "full siblings" -#' \item addRel = 0.5, |gen1 - gen2| == 1: "parent-child" -#' \item addRel = 0.25, gen1 == gen2: "half siblings" -#' \item addRel = 0.25, |gen1 - gen2| == 1: "aunt/uncle-niece/nephew" -#' \item addRel = 0.25, |gen1 - gen2| == 2: "grandparent-grandchild" -#' \item addRel = 0.125, gen1 == gen2: "first cousins" -#' \item addRel = 0.0625, gen1 == gen2: "second cousins" +#' \item addRel = 1.0, same generation: "self" (same individual) +#' \item addRel = 0.5, same generation: "full siblings" +#' \item addRel = 0.5, 1 generation apart: "parent-child" +#' \item addRel = 0.25, same generation: "half siblings" +#' \item addRel = 0.25, 1 generation apart: "aunt/uncle-niece/nephew" +#' \item addRel = 0.25, 2 generations apart: "grandparent-grandchild" +#' \item addRel = 0.125, same generation: "first cousins" +#' \item addRel = 0.0625, same generation: "second cousins" #' \item And relationships with various degrees of removal for cousins #' } #' -#' When `use_sex = TRUE` and sex information is available, parent-child relationships -#' can be further specified as "mother-child", "father-child", "mother-son", -#' "mother-daughter", "father-son", or "father-daughter". +#' When \code{use_sex = TRUE} and sex information is available (either from \code{ped} +#' or already present in \code{rel_df}), parent-child relationships can be further +#' specified as "mother-child", "father-child", "mother-son", "mother-daughter", +#' "father-son", or "father-daughter". Similar sex-specific labels are provided for +#' grandparent and avuncular relationships. #' #' @return -#' If `return_list = FALSE` (default), returns a data.frame identical to `rel_df` +#' If \code{return_list = FALSE} (default), returns a data.frame identical to \code{rel_df} #' with an additional "relationship" column containing textual descriptions. -#' If `return_list = TRUE`, returns a list with two elements: +#' If \code{return_list = TRUE}, returns a list with two elements: #' \itemize{ -#' \item `data`: The original data.frame with the relationship column added -#' \item `relationships`: A character vector of relationship descriptions +#' \item \code{data}: The original data.frame with the relationship column added +#' \item \code{relationships}: A character vector of relationship descriptions #' } #' #' @examples #' \dontrun{ -#' # Create example relatedness data -#' rel_data <- data.frame( -#' ID1 = c(1, 1, 1, 2), -#' ID2 = c(2, 3, 4, 3), -#' addRel = c(0.5, 0.25, 0.125, 0.5), -#' gen1 = c(1, 1, 1, 2), -#' gen2 = c(2, 3, 1, 2) -#' ) -#' -#' # Get relationship descriptions -#' described <- describeRelatedness(rel_data) -#' print(described) -#' -#' # With sex-specific labels -#' rel_data$sex1 <- c(0, 0, 1, 1) -#' rel_data$sex2 <- c(1, 0, 0, 1) -#' described_sex <- describeRelatedness(rel_data, use_sex = TRUE) -#' print(described_sex) +#' # Basic usage with com2links output +#' library(BGmisc) +#' data(potter) +#' +#' # Compute additive relatedness matrix +#' add_matrix <- ped2add(potter, sparse = FALSE) +#' +#' # Convert to pairwise format +#' rel_pairs <- com2links(ad_ped_matrix = add_matrix, writetodisk = FALSE) +#' +#' # Describe relationships (without generation info) +#' described <- describeRelatedness(rel_pairs) +#' head(described) +#' +#' # With pedigree for sex-specific labels +#' # (assumes potter has a 'gen' column for generation) +#' described_sex <- describeRelatedness(rel_pairs, ped = potter, +#' use_sex = TRUE, gen_col = "gen") +#' head(described_sex) #' } #' #' @export describeRelatedness <- function(rel_df, + ped = NULL, + personID = "personID", + sex_col = "sex", + gen_col = NULL, + ID1_col = "ID1", + ID2_col = "ID2", add_col = "addRel", - gen1_col = "gen1", - gen2_col = "gen2", - sex1_col = "sex1", - sex2_col = "sex2", code_male = 1, code_female = 0, use_sex = FALSE, @@ -93,19 +103,76 @@ describeRelatedness <- function(rel_df, stop("rel_df must be a data.frame") } - if (!"ID1" %in% names(rel_df) || !"ID2" %in% names(rel_df)) { - stop("rel_df must contain 'ID1' and 'ID2' columns") + if (!ID1_col %in% names(rel_df) || !ID2_col %in% names(rel_df)) { + stop(paste0("rel_df must contain '", ID1_col, "' and '", ID2_col, "' columns")) } if (!add_col %in% names(rel_df)) { stop(paste0("Column '", add_col, "' not found in rel_df")) } - # Check for generation columns - has_gen <- gen1_col %in% names(rel_df) && gen2_col %in% names(rel_df) + # If pedigree is provided, join sex and generation information + if (!is.null(ped)) { + if (!is.data.frame(ped)) { + stop("ped must be a data.frame") + } + + if (!personID %in% names(ped)) { + stop(paste0("Column '", personID, "' not found in ped")) + } + + # Prepare pedigree data for joining + ped_for_join <- ped[, personID, drop = FALSE] + + # Add sex if available and requested + if (use_sex && sex_col %in% names(ped)) { + ped_for_join[[sex_col]] <- ped[[sex_col]] + } else if (use_sex && !sex_col %in% names(ped)) { + warning(paste0("use_sex is TRUE but '", sex_col, "' column not found in ped. Proceeding without sex-specific labels.")) + use_sex <- FALSE + } + + # Add generation if available + if (!is.null(gen_col) && gen_col %in% names(ped)) { + ped_for_join[[gen_col]] <- ped[[gen_col]] + } + + # Join sex and generation info for ID1 + names(ped_for_join)[1] <- ID1_col + if (use_sex && sex_col %in% names(ped_for_join)) { + names(ped_for_join)[names(ped_for_join) == sex_col] <- "sex1" + } + if (!is.null(gen_col) && gen_col %in% names(ped_for_join)) { + names(ped_for_join)[names(ped_for_join) == gen_col] <- "gen1" + } + + rel_df <- merge(rel_df, ped_for_join, by = ID1_col, all.x = TRUE, sort = FALSE) + + # Join sex and generation info for ID2 + ped_for_join2 <- ped[, personID, drop = FALSE] + if (use_sex && sex_col %in% names(ped)) { + ped_for_join2[[sex_col]] <- ped[[sex_col]] + } + if (!is.null(gen_col) && gen_col %in% names(ped)) { + ped_for_join2[[gen_col]] <- ped[[gen_col]] + } + + names(ped_for_join2)[1] <- ID2_col + if (use_sex && sex_col %in% names(ped_for_join2)) { + names(ped_for_join2)[names(ped_for_join2) == sex_col] <- "sex2" + } + if (!is.null(gen_col) && gen_col %in% names(ped_for_join2)) { + names(ped_for_join2)[names(ped_for_join2) == gen_col] <- "gen2" + } + + rel_df <- merge(rel_df, ped_for_join2, by = ID2_col, all.x = TRUE, sort = FALSE) + } + + # Check for generation columns (either from ped join or already in rel_df) + has_gen <- "gen1" %in% names(rel_df) && "gen2" %in% names(rel_df) - # Check for sex columns - has_sex <- sex1_col %in% names(rel_df) && sex2_col %in% names(rel_df) + # Check for sex columns (either from ped join or already in rel_df) + has_sex <- "sex1" %in% names(rel_df) && "sex2" %in% names(rel_df) if (use_sex && !has_sex) { warning("use_sex is TRUE but sex columns are not available. Proceeding without sex-specific labels.") @@ -117,8 +184,8 @@ describeRelatedness <- function(rel_df, # Extract generation differences if available if (has_gen) { - gen1 <- rel_df[[gen1_col]] - gen2 <- rel_df[[gen2_col]] + gen1 <- rel_df[["gen1"]] + gen2 <- rel_df[["gen2"]] gen_diff <- gen2 - gen1 # Positive means person 2 is in a younger generation gen_abs_diff <- abs(gen_diff) } else { @@ -130,8 +197,8 @@ describeRelatedness <- function(rel_df, # Extract sex if needed if (use_sex && has_sex) { - sex1 <- rel_df[[sex1_col]] - sex2 <- rel_df[[sex2_col]] + sex1 <- rel_df[["sex1"]] + sex2 <- rel_df[["sex2"]] } else { sex1 <- NULL sex2 <- NULL diff --git a/man/describeRelatedness.Rd b/man/describeRelatedness.Rd index 38a8f43c..855b41a0 100644 --- a/man/describeRelatedness.Rd +++ b/man/describeRelatedness.Rd @@ -6,11 +6,13 @@ \usage{ describeRelatedness( rel_df, + ped = NULL, + personID = "personID", + sex_col = "sex", + gen_col = NULL, + ID1_col = "ID1", + ID2_col = "ID2", add_col = "addRel", - gen1_col = "gen1", - gen2_col = "gen2", - sex1_col = "sex1", - sex2_col = "sex2", code_male = 1, code_female = 0, use_sex = FALSE, @@ -19,29 +21,36 @@ describeRelatedness( } \arguments{ \item{rel_df}{A data.frame with columns for pairwise relatedness information. -Required columns: ID1, ID2, addRel (additive relatedness coefficient). -Optional columns: gen1, gen2 (generation numbers), sex1, sex2 (biological sex).} +Required columns: ID1, ID2 (or custom names via \code{ID1_col}/\code{ID2_col}). +Typically produced by \code{\link{com2links}}.} -\item{add_col}{Character. Name of the column containing additive relatedness coefficients. -Default is "addRel".} +\item{ped}{Optional data.frame containing pedigree information. If provided, sex +and generation information will be automatically joined. Should contain columns +specified by \code{personID}, \code{sex_col}, and optionally \code{gen_col}.} + +\item{personID}{Character. Name of the column in \code{ped} containing person IDs. +Default is "personID".} -\item{gen1_col}{Character. Name of the column containing generation for person 1. -Default is "gen1".} +\item{sex_col}{Character. Name of the column in \code{ped} containing sex information. +Default is "sex".} -\item{gen2_col}{Character. Name of the column containing generation for person 2. -Default is "gen2".} +\item{gen_col}{Character. Name of the column in \code{ped} containing generation numbers. +If NULL (default), relationships will be described without generation information.} -\item{sex1_col}{Character. Name of the column containing sex for person 1. -Default is "sex1".} +\item{ID1_col}{Character. Name of the column in \code{rel_df} for person 1 ID. +Default is "ID1".} -\item{sex2_col}{Character. Name of the column containing sex for person 2. -Default is "sex2".} +\item{ID2_col}{Character. Name of the column in \code{rel_df} for person 2 ID. +Default is "ID2".} -\item{code_male}{The value used to denote males. Default is 1.} +\item{add_col}{Character. Name of the column containing additive relatedness coefficients. +Default is "addRel".} -\item{code_female}{The value used to denote females. Default is 0.} +\item{code_male}{The value used to denote males in the sex column. Default is 1.} -\item{use_sex}{Logical. If TRUE and sex columns are available, generate sex-specific +\item{code_female}{The value used to denote females in the sex column. Default is 0.} + +\item{use_sex}{Logical. If TRUE and sex information is available, generate sex-specific relationship labels (e.g., "mother-child" vs "father-child"). Default is FALSE.} \item{return_list}{Logical. If TRUE, return a list with both the original data and @@ -58,50 +67,53 @@ If \code{return_list = TRUE}, returns a list with two elements: } } \description{ -This function takes a flat/vectorized relatedness matrix and generates textual -descriptions of the genetic relationships based on additive relatedness coefficients -and generation differences. +This function takes a flat/vectorized relatedness matrix (as produced by +\code{\link{com2links}}) and generates textual descriptions of the genetic +relationships based on additive relatedness coefficients and generation differences. } \details{ The function uses the additive relatedness coefficient (\code{addRel}) and generation difference to infer relationship types. Common relationships and their expected values are: \itemize{ -\item addRel = 1.0, gen1 == gen2: "self" (same individual) -\item addRel = 0.5, gen1 == gen2: "full siblings" -\item addRel = 0.5, |gen1 - gen2| == 1: "parent-child" -\item addRel = 0.25, gen1 == gen2: "half siblings" -\item addRel = 0.25, |gen1 - gen2| == 1: "aunt/uncle-niece/nephew" -\item addRel = 0.25, |gen1 - gen2| == 2: "grandparent-grandchild" -\item addRel = 0.125, gen1 == gen2: "first cousins" -\item addRel = 0.0625, gen1 == gen2: "second cousins" +\item addRel = 1.0, same generation: "self" (same individual) +\item addRel = 0.5, same generation: "full siblings" +\item addRel = 0.5, 1 generation apart: "parent-child" +\item addRel = 0.25, same generation: "half siblings" +\item addRel = 0.25, 1 generation apart: "aunt/uncle-niece/nephew" +\item addRel = 0.25, 2 generations apart: "grandparent-grandchild" +\item addRel = 0.125, same generation: "first cousins" +\item addRel = 0.0625, same generation: "second cousins" \item And relationships with various degrees of removal for cousins } -When \code{use_sex = TRUE} and sex information is available, parent-child relationships -can be further specified as "mother-child", "father-child", "mother-son", -"mother-daughter", "father-son", or "father-daughter". +When \code{use_sex = TRUE} and sex information is available (either from \code{ped} +or already present in \code{rel_df}), parent-child relationships can be further +specified as "mother-child", "father-child", "mother-son", "mother-daughter", +"father-son", or "father-daughter". Similar sex-specific labels are provided for +grandparent and avuncular relationships. } \examples{ \dontrun{ -# Create example relatedness data -rel_data <- data.frame( - ID1 = c(1, 1, 1, 2), - ID2 = c(2, 3, 4, 3), - addRel = c(0.5, 0.25, 0.125, 0.5), - gen1 = c(1, 1, 1, 2), - gen2 = c(2, 3, 1, 2) -) +# Basic usage with com2links output +library(BGmisc) +data(potter) + +# Compute additive relatedness matrix +add_matrix <- ped2add(potter, sparse = FALSE) + +# Convert to pairwise format +rel_pairs <- com2links(ad_ped_matrix = add_matrix, writetodisk = FALSE) -# Get relationship descriptions -described <- describeRelatedness(rel_data) -print(described) +# Describe relationships (without generation info) +described <- describeRelatedness(rel_pairs) +head(described) -# With sex-specific labels -rel_data$sex1 <- c(0, 0, 1, 1) -rel_data$sex2 <- c(1, 0, 0, 1) -described_sex <- describeRelatedness(rel_data, use_sex = TRUE) -print(described_sex) +# With pedigree for sex-specific labels +# (assumes potter has a 'gen' column for generation) +described_sex <- describeRelatedness(rel_pairs, ped = potter, + use_sex = TRUE, gen_col = "gen") +head(described_sex) } } diff --git a/tests/testthat/test-describeRelatedness.R b/tests/testthat/test-describeRelatedness.R index 5f5d66db..60692c51 100644 --- a/tests/testthat/test-describeRelatedness.R +++ b/tests/testthat/test-describeRelatedness.R @@ -16,14 +16,33 @@ test_that("describeRelatedness handles basic relationships without generation in expect_equal(result$relationship[3], "unrelated") }) +test_that("describeRelatedness works with pedigree data", { + # Create simple pedigree + ped <- data.frame( + personID = 1:4, + sex = c(0, 1, 0, 1), + gen = c(1, 1, 1, 1) + ) + + rel_data <- data.frame( + ID1 = c(1, 3), + ID2 = c(2, 4), + addRel = c(0.5, 0.5) + ) + + result <- describeRelatedness(rel_data, ped = ped, gen_col = "gen") + expect_equal(result$relationship[1], "full siblings") + expect_equal(result$relationship[2], "full siblings") +}) + test_that("describeRelatedness identifies full siblings correctly", { rel_data <- data.frame( ID1 = c(1), ID2 = c(2), - addRel = c(0.5), - gen1 = c(1), - gen2 = c(1) + addRel = c(0.5) ) + rel_data$gen1 <- 1 + rel_data$gen2 <- 1 result <- describeRelatedness(rel_data) expect_equal(result$relationship[1], "full siblings") @@ -33,10 +52,10 @@ test_that("describeRelatedness identifies half siblings correctly", { rel_data <- data.frame( ID1 = c(1), ID2 = c(2), - addRel = c(0.25), - gen1 = c(1), - gen2 = c(1) + addRel = c(0.25) ) + rel_data$gen1 <- 1 + rel_data$gen2 <- 1 result <- describeRelatedness(rel_data) expect_equal(result$relationship[1], "half siblings") @@ -46,24 +65,42 @@ test_that("describeRelatedness identifies parent-child relationships", { rel_data <- data.frame( ID1 = c(1, 2), ID2 = c(2, 1), - addRel = c(0.5, 0.5), - gen1 = c(1, 2), - gen2 = c(2, 1) + addRel = c(0.5, 0.5) ) + rel_data$gen1 <- c(1, 2) + rel_data$gen2 <- c(2, 1) result <- describeRelatedness(rel_data) expect_equal(result$relationship[1], "parent-child") expect_equal(result$relationship[2], "parent-child") }) +test_that("describeRelatedness works with pedigree for sex-specific labels", { + ped <- data.frame( + personID = 1:4, + sex = c(0, 1, 0, 1), + gen = c(1, 2, 1, 2) + ) + + rel_data <- data.frame( + ID1 = c(1, 1), + ID2 = c(2, 4), + addRel = c(0.5, 0.5) + ) + + result <- describeRelatedness(rel_data, ped = ped, use_sex = TRUE, gen_col = "gen") + expect_equal(result$relationship[1], "mother-son") + expect_equal(result$relationship[2], "mother-son") +}) + test_that("describeRelatedness identifies grandparent-grandchild relationships", { rel_data <- data.frame( ID1 = c(1, 3), ID2 = c(3, 1), - addRel = c(0.25, 0.25), - gen1 = c(1, 3), - gen2 = c(3, 1) + addRel = c(0.25, 0.25) ) + rel_data$gen1 <- c(1, 3) + rel_data$gen2 <- c(3, 1) result <- describeRelatedness(rel_data) expect_equal(result$relationship[1], "grandparent-grandchild") @@ -74,10 +111,10 @@ test_that("describeRelatedness identifies aunt/uncle-niece/nephew relationships" rel_data <- data.frame( ID1 = c(1), ID2 = c(2), - addRel = c(0.25), - gen1 = c(1), - gen2 = c(2) + addRel = c(0.25) ) + rel_data$gen1 <- 1 + rel_data$gen2 <- 2 result <- describeRelatedness(rel_data) expect_equal(result$relationship[1], "aunt/uncle-niece/nephew") @@ -87,10 +124,10 @@ test_that("describeRelatedness identifies first cousins", { rel_data <- data.frame( ID1 = c(1), ID2 = c(2), - addRel = c(0.125), - gen1 = c(2), - gen2 = c(2) + addRel = c(0.125) ) + rel_data$gen1 <- 2 + rel_data$gen2 <- 2 result <- describeRelatedness(rel_data) expect_equal(result$relationship[1], "first cousins") @@ -100,10 +137,10 @@ test_that("describeRelatedness identifies second cousins", { rel_data <- data.frame( ID1 = c(1), ID2 = c(2), - addRel = c(0.0625), - gen1 = c(3), - gen2 = c(3) + addRel = c(0.0625) ) + rel_data$gen1 <- 3 + rel_data$gen2 <- 3 result <- describeRelatedness(rel_data) expect_equal(result$relationship[1], "second cousins") @@ -113,10 +150,10 @@ test_that("describeRelatedness identifies third cousins", { rel_data <- data.frame( ID1 = c(1), ID2 = c(2), - addRel = c(0.03125), - gen1 = c(4), - gen2 = c(4) + addRel = c(0.03125) ) + rel_data$gen1 <- 4 + rel_data$gen2 <- 4 result <- describeRelatedness(rel_data) expect_equal(result$relationship[1], "third cousins") @@ -126,10 +163,10 @@ test_that("describeRelatedness identifies cousins once removed", { rel_data <- data.frame( ID1 = c(1, 2), ID2 = c(2, 3), - addRel = c(0.125, 0.0625), - gen1 = c(2, 3), - gen2 = c(3, 4) + addRel = c(0.125, 0.0625) ) + rel_data$gen1 <- c(2, 3) + rel_data$gen2 <- c(3, 4) result <- describeRelatedness(rel_data) expect_equal(result$relationship[1], "first cousins once removed") @@ -140,10 +177,10 @@ test_that("describeRelatedness identifies cousins twice removed", { rel_data <- data.frame( ID1 = c(1, 2), ID2 = c(2, 3), - addRel = c(0.125, 0.0625), - gen1 = c(2, 3), - gen2 = c(4, 5) + addRel = c(0.125, 0.0625) ) + rel_data$gen1 <- c(2, 3) + rel_data$gen2 <- c(4, 5) result <- describeRelatedness(rel_data) expect_equal(result$relationship[1], "first cousins twice removed") @@ -154,10 +191,10 @@ test_that("describeRelatedness identifies self", { rel_data <- data.frame( ID1 = c(1, 2), ID2 = c(1, 2), - addRel = c(1.0, 1.0), - gen1 = c(1, 2), - gen2 = c(1, 2) + addRel = c(1.0, 1.0) ) + rel_data$gen1 <- c(1, 2) + rel_data$gen2 <- c(1, 2) result <- describeRelatedness(rel_data) expect_equal(result$relationship[1], "self/identical twin") @@ -168,12 +205,12 @@ test_that("describeRelatedness handles sex-specific parent-child labels", { rel_data <- data.frame( ID1 = c(1, 2, 3, 4), ID2 = c(5, 6, 7, 8), - addRel = c(0.5, 0.5, 0.5, 0.5), - gen1 = c(1, 1, 1, 1), - gen2 = c(2, 2, 2, 2), - sex1 = c(0, 0, 1, 1), # 0 = female, 1 = male - sex2 = c(0, 1, 0, 1) + addRel = c(0.5, 0.5, 0.5, 0.5) ) + rel_data$gen1 <- c(1, 1, 1, 1) + rel_data$gen2 <- c(2, 2, 2, 2) + rel_data$sex1 <- c(0, 0, 1, 1) # 0 = female, 1 = male + rel_data$sex2 <- c(0, 1, 0, 1) result <- describeRelatedness(rel_data, use_sex = TRUE) expect_equal(result$relationship[1], "mother-daughter") @@ -186,12 +223,12 @@ test_that("describeRelatedness handles sex-specific child-parent labels", { rel_data <- data.frame( ID1 = c(1, 2, 3, 4), ID2 = c(5, 6, 7, 8), - addRel = c(0.5, 0.5, 0.5, 0.5), - gen1 = c(2, 2, 2, 2), - gen2 = c(1, 1, 1, 1), - sex1 = c(0, 1, 0, 1), # 0 = female, 1 = male - sex2 = c(0, 0, 1, 1) + addRel = c(0.5, 0.5, 0.5, 0.5) ) + rel_data$gen1 <- c(2, 2, 2, 2) + rel_data$gen2 <- c(1, 1, 1, 1) + rel_data$sex1 <- c(0, 1, 0, 1) # 0 = female, 1 = male + rel_data$sex2 <- c(0, 0, 1, 1) result <- describeRelatedness(rel_data, use_sex = TRUE) expect_equal(result$relationship[1], "daughter-mother") @@ -204,12 +241,12 @@ test_that("describeRelatedness handles sex-specific aunt/uncle labels", { rel_data <- data.frame( ID1 = c(1, 2, 3, 4), ID2 = c(5, 6, 7, 8), - addRel = c(0.25, 0.25, 0.25, 0.25), - gen1 = c(1, 1, 1, 1), - gen2 = c(2, 2, 2, 2), - sex1 = c(0, 0, 1, 1), # 0 = female, 1 = male - sex2 = c(0, 1, 0, 1) + addRel = c(0.25, 0.25, 0.25, 0.25) ) + rel_data$gen1 <- c(1, 1, 1, 1) + rel_data$gen2 <- c(2, 2, 2, 2) + rel_data$sex1 <- c(0, 0, 1, 1) # 0 = female, 1 = male + rel_data$sex2 <- c(0, 1, 0, 1) result <- describeRelatedness(rel_data, use_sex = TRUE) expect_equal(result$relationship[1], "aunt-niece") @@ -222,12 +259,12 @@ test_that("describeRelatedness handles sex-specific grandparent labels", { rel_data <- data.frame( ID1 = c(1, 2, 3, 4), ID2 = c(5, 6, 7, 8), - addRel = c(0.25, 0.25, 0.25, 0.25), - gen1 = c(1, 1, 1, 1), - gen2 = c(3, 3, 3, 3), - sex1 = c(0, 0, 1, 1), # 0 = female, 1 = male - sex2 = c(0, 1, 0, 1) + addRel = c(0.25, 0.25, 0.25, 0.25) ) + rel_data$gen1 <- c(1, 1, 1, 1) + rel_data$gen2 <- c(3, 3, 3, 3) + rel_data$sex1 <- c(0, 0, 1, 1) # 0 = female, 1 = male + rel_data$sex2 <- c(0, 1, 0, 1) result <- describeRelatedness(rel_data, use_sex = TRUE) expect_equal(result$relationship[1], "grandmother-granddaughter") @@ -238,18 +275,18 @@ test_that("describeRelatedness handles sex-specific grandparent labels", { test_that("describeRelatedness custom column names work", { rel_data <- data.frame( - ID1 = c(1), - ID2 = c(2), - my_addRel = c(0.5), - my_gen1 = c(1), - my_gen2 = c(1) + person1 = c(1), + person2 = c(2), + r_coef = c(0.5) ) + rel_data$gen1 <- 1 + rel_data$gen2 <- 1 result <- describeRelatedness( rel_data, - add_col = "my_addRel", - gen1_col = "my_gen1", - gen2_col = "my_gen2" + ID1_col = "person1", + ID2_col = "person2", + add_col = "r_coef" ) expect_equal(result$relationship[1], "full siblings") }) @@ -258,10 +295,10 @@ test_that("describeRelatedness return_list option works", { rel_data <- data.frame( ID1 = c(1), ID2 = c(2), - addRel = c(0.5), - gen1 = c(1), - gen2 = c(1) + addRel = c(0.5) ) + rel_data$gen1 <- 1 + rel_data$gen2 <- 1 result <- describeRelatedness(rel_data, return_list = TRUE) expect_true(is.list(result)) @@ -294,10 +331,10 @@ test_that("describeRelatedness handles use_sex warning when sex columns missing" rel_data <- data.frame( ID1 = c(1), ID2 = c(2), - addRel = c(0.5), - gen1 = c(1), - gen2 = c(2) + addRel = c(0.5) ) + rel_data$gen1 <- 1 + rel_data$gen2 <- 2 expect_warning( result <- describeRelatedness(rel_data, use_sex = TRUE), @@ -310,12 +347,12 @@ test_that("describeRelatedness handles custom sex codes", { rel_data <- data.frame( ID1 = c(1, 2), ID2 = c(3, 4), - addRel = c(0.5, 0.5), - gen1 = c(1, 1), - gen2 = c(2, 2), - sex1 = c("F", "M"), - sex2 = c("M", "F") + addRel = c(0.5, 0.5) ) + rel_data$gen1 <- c(1, 1) + rel_data$gen2 <- c(2, 2) + rel_data$sex1 <- c("F", "M") + rel_data$sex2 <- c("M", "F") result <- describeRelatedness( rel_data, @@ -331,10 +368,10 @@ test_that("describeRelatedness handles great-grandparent relationships", { rel_data <- data.frame( ID1 = c(1), ID2 = c(2), - addRel = c(0.125), - gen1 = c(1), - gen2 = c(4) + addRel = c(0.125) ) + rel_data$gen1 <- 1 + rel_data$gen2 <- 4 result <- describeRelatedness(rel_data) expect_equal(result$relationship[1], "great-grandparent-great-grandchild") @@ -344,10 +381,10 @@ test_that("describeRelatedness handles great-great-grandparent relationships", { rel_data <- data.frame( ID1 = c(1), ID2 = c(2), - addRel = c(0.0625), - gen1 = c(1), - gen2 = c(5) + addRel = c(0.0625) ) + rel_data$gen1 <- 1 + rel_data$gen2 <- 5 result <- describeRelatedness(rel_data) expect_equal(result$relationship[1], "great-great-grandparent-great-great-grandchild") @@ -357,10 +394,10 @@ test_that("describeRelatedness handles multiple relationships in one call", { rel_data <- data.frame( ID1 = c(1, 1, 1, 2, 2, 3), ID2 = c(2, 3, 4, 3, 4, 4), - addRel = c(0.5, 0.25, 0.125, 0.5, 0.25, 0.5), - gen1 = c(1, 1, 1, 1, 1, 2), - gen2 = c(1, 2, 2, 1, 3, 2) + addRel = c(0.5, 0.25, 0.125, 0.5, 0.25, 0.5) ) + rel_data$gen1 <- c(1, 1, 1, 1, 1, 2) + rel_data$gen2 <- c(1, 2, 2, 1, 3, 2) result <- describeRelatedness(rel_data) expect_equal(nrow(result), 6) @@ -376,10 +413,10 @@ test_that("describeRelatedness handles unknown relationships gracefully", { rel_data <- data.frame( ID1 = c(1, 2), ID2 = c(2, 3), - addRel = c(0.15, 0.333), - gen1 = c(1, 1), - gen2 = c(1, 1) + addRel = c(0.15, 0.333) ) + rel_data$gen1 <- c(1, 1) + rel_data$gen2 <- c(1, 1) result <- describeRelatedness(rel_data) expect_true(grepl("unknown", result$relationship[1])) From a04b4f7df3d0a8b021ce4973d79ee5addc1f9f88 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Fri, 17 Oct 2025 16:21:35 +0000 Subject: [PATCH 4/4] Optimize describeRelatedness: vectorize, use lookup tables, eliminate dataframe duplication Co-authored-by: smasongarrison <6001608+smasongarrison@users.noreply.github.com> --- R/describeRelatedness.R | 574 ++++++++++++++++++++++++---------------- 1 file changed, 352 insertions(+), 222 deletions(-) diff --git a/R/describeRelatedness.R b/R/describeRelatedness.R index 7354a0f1..aeb5fc5d 100644 --- a/R/describeRelatedness.R +++ b/R/describeRelatedness.R @@ -121,51 +121,45 @@ describeRelatedness <- function(rel_df, stop(paste0("Column '", personID, "' not found in ped")) } - # Prepare pedigree data for joining - ped_for_join <- ped[, personID, drop = FALSE] - - # Add sex if available and requested + # Determine which columns to extract from pedigree + cols_needed <- personID if (use_sex && sex_col %in% names(ped)) { - ped_for_join[[sex_col]] <- ped[[sex_col]] + cols_needed <- c(cols_needed, sex_col) } else if (use_sex && !sex_col %in% names(ped)) { warning(paste0("use_sex is TRUE but '", sex_col, "' column not found in ped. Proceeding without sex-specific labels.")) use_sex <- FALSE } - # Add generation if available if (!is.null(gen_col) && gen_col %in% names(ped)) { - ped_for_join[[gen_col]] <- ped[[gen_col]] - } - - # Join sex and generation info for ID1 - names(ped_for_join)[1] <- ID1_col - if (use_sex && sex_col %in% names(ped_for_join)) { - names(ped_for_join)[names(ped_for_join) == sex_col] <- "sex1" - } - if (!is.null(gen_col) && gen_col %in% names(ped_for_join)) { - names(ped_for_join)[names(ped_for_join) == gen_col] <- "gen1" + cols_needed <- c(cols_needed, gen_col) } - rel_df <- merge(rel_df, ped_for_join, by = ID1_col, all.x = TRUE, sort = FALSE) + # Extract only needed columns once + ped_subset <- ped[, cols_needed, drop = FALSE] - # Join sex and generation info for ID2 - ped_for_join2 <- ped[, personID, drop = FALSE] - if (use_sex && sex_col %in% names(ped)) { - ped_for_join2[[sex_col]] <- ped[[sex_col]] + # Join for ID1 + ped_for_id1 <- ped_subset + names(ped_for_id1)[1] <- ID1_col + if (use_sex && sex_col %in% names(ped_subset)) { + names(ped_for_id1)[names(ped_for_id1) == sex_col] <- "sex1" } - if (!is.null(gen_col) && gen_col %in% names(ped)) { - ped_for_join2[[gen_col]] <- ped[[gen_col]] + if (!is.null(gen_col) && gen_col %in% names(ped_subset)) { + names(ped_for_id1)[names(ped_for_id1) == gen_col] <- "gen1" } - names(ped_for_join2)[1] <- ID2_col - if (use_sex && sex_col %in% names(ped_for_join2)) { - names(ped_for_join2)[names(ped_for_join2) == sex_col] <- "sex2" + rel_df <- merge(rel_df, ped_for_id1, by = ID1_col, all.x = TRUE, sort = FALSE) + + # Join for ID2 - reuse the subset with renamed columns + ped_for_id2 <- ped_subset + names(ped_for_id2)[1] <- ID2_col + if (use_sex && sex_col %in% names(ped_subset)) { + names(ped_for_id2)[names(ped_for_id2) == sex_col] <- "sex2" } - if (!is.null(gen_col) && gen_col %in% names(ped_for_join2)) { - names(ped_for_join2)[names(ped_for_join2) == gen_col] <- "gen2" + if (!is.null(gen_col) && gen_col %in% names(ped_subset)) { + names(ped_for_id2)[names(ped_for_id2) == gen_col] <- "gen2" } - rel_df <- merge(rel_df, ped_for_join2, by = ID2_col, all.x = TRUE, sort = FALSE) + rel_df <- merge(rel_df, ped_for_id2, by = ID2_col, all.x = TRUE, sort = FALSE) } # Check for generation columns (either from ped join or already in rel_df) @@ -184,15 +178,11 @@ describeRelatedness <- function(rel_df, # Extract generation differences if available if (has_gen) { - gen1 <- rel_df[["gen1"]] - gen2 <- rel_df[["gen2"]] - gen_diff <- gen2 - gen1 # Positive means person 2 is in a younger generation + gen_diff <- rel_df[["gen2"]] - rel_df[["gen1"]] gen_abs_diff <- abs(gen_diff) } else { - gen1 <- NULL - gen2 <- NULL - gen_diff <- NULL - gen_abs_diff <- NULL + gen_diff <- rep(NA_real_, nrow(rel_df)) + gen_abs_diff <- rep(NA_real_, nrow(rel_df)) } # Extract sex if needed @@ -200,28 +190,21 @@ describeRelatedness <- function(rel_df, sex1 <- rel_df[["sex1"]] sex2 <- rel_df[["sex2"]] } else { - sex1 <- NULL - sex2 <- NULL + sex1 <- rep(NA, nrow(rel_df)) + sex2 <- rep(NA, nrow(rel_df)) } - # Initialize relationship vector - n <- nrow(rel_df) - relationships <- character(n) - - # Classify each relationship - for (i in 1:n) { - r <- addRel[i] - relationships[i] <- classify_relationship( - r = r, - gen_diff = if (has_gen) gen_diff[i] else NA, - gen_abs_diff = if (has_gen) gen_abs_diff[i] else NA, - sex1 = if (use_sex) sex1[i] else NA, - sex2 = if (use_sex) sex2[i] else NA, - code_male = code_male, - code_female = code_female, - use_sex = use_sex - ) - } + # Vectorized classification using lookup table + relationships <- classify_relationships_vectorized( + addRel = addRel, + gen_diff = gen_diff, + gen_abs_diff = gen_abs_diff, + sex1 = sex1, + sex2 = sex2, + code_male = code_male, + code_female = code_female, + use_sex = use_sex + ) # Prepare output result_df <- rel_df @@ -238,116 +221,249 @@ describeRelatedness <- function(rel_df, } -#' Classify a single relationship based on relatedness and generation +#' Vectorized relationship classification using lookup tables #' -#' @param r Numeric. Additive relatedness coefficient -#' @param gen_diff Numeric. Generation difference (gen2 - gen1) -#' @param gen_abs_diff Numeric. Absolute generation difference -#' @param sex1 Sex of person 1 -#' @param sex2 Sex of person 2 +#' @param addRel Numeric vector of additive relatedness coefficients +#' @param gen_diff Numeric vector of generation differences +#' @param gen_abs_diff Numeric vector of absolute generation differences +#' @param sex1 Vector of sex values for person 1 +#' @param sex2 Vector of sex values for person 2 #' @param code_male Code for male #' @param code_female Code for female #' @param use_sex Logical. Whether to use sex-specific labels #' -#' @return Character string describing the relationship +#' @return Character vector of relationship descriptions #' @keywords internal -classify_relationship <- function(r, gen_diff, gen_abs_diff, sex1, sex2, - code_male, code_female, use_sex) { - # Tolerance for floating point comparison +classify_relationships_vectorized <- function(addRel, gen_diff, gen_abs_diff, + sex1, sex2, code_male, code_female, use_sex) { + n <- length(addRel) + relationships <- character(n) tol <- 0.001 - - # Handle self (same person or identical twins) - if (abs(r - 1.0) < tol) { - if (!is.na(gen_abs_diff) && gen_abs_diff == 0) { - return("self/identical twin") + + # Create lookup table for basic relationships + lookup_table <- create_relationship_lookup() + + # Vectorized processing + for (i in seq_len(n)) { + r <- addRel[i] + g_abs <- gen_abs_diff[i] + g_diff <- gen_diff[i] + + # Handle special cases first + if (abs(r - 1.0) < tol) { + relationships[i] <- if (!is.na(g_abs) && g_abs == 0) "self/identical twin" else "self" + next + } + + if (abs(r) < tol) { + relationships[i] <- "unrelated" + next + } + + # Use lookup table for known relationships + rel <- lookup_relationship(r, g_abs, tol, lookup_table) + + if (!is.na(rel)) { + # Check if sex-specific label is needed + if (use_sex && !is.na(sex1[i]) && !is.na(sex2[i]) && !is.na(g_diff)) { + sex_specific <- get_sex_specific_label(rel, g_diff, sex1[i], sex2[i], + code_male, code_female) + if (!is.null(sex_specific)) { + relationships[i] <- sex_specific + next + } + } + relationships[i] <- rel + } else { + # Fallback for unknown relationships + relationships[i] <- sprintf("unknown (r=%.4g)", r) } - return("self") } + + relationships +} - # Handle unrelated (r = 0) - if (abs(r) < tol) { - return("unrelated") - } - # For relationships with generation information - if (!is.na(gen_abs_diff) && !is.na(gen_diff)) { - # Same generation relationships - if (gen_abs_diff == 0) { - if (abs(r - 0.5) < tol) { - return("full siblings") - } else if (abs(r - 0.25) < tol) { - return("half siblings") - } else if (abs(r - 0.125) < tol) { - return("first cousins") - } else if (abs(r - 0.0625) < tol) { - return("second cousins") - } else if (abs(r - 0.03125) < tol) { - return("third cousins") - } - } +#' Create relationship lookup table +#' +#' @return List structure mapping (r, gen_abs_diff) to relationship names +#' @keywords internal +create_relationship_lookup <- function() { + list( + # Format: r_value = list of (gen_abs_diff, relationship_name) pairs + "1.0" = list(c(0, "self/identical twin")), + "0.5" = list( + c(0, "full siblings"), + c(1, "parent-child") + ), + "0.25" = list( + c(0, "half siblings"), + c(1, "aunt/uncle-niece/nephew"), + c(2, "grandparent-grandchild") + ), + "0.125" = list( + c(0, "first cousins"), + c(1, "first cousins once removed"), + c(2, "first cousins twice removed"), + c(3, "great-grandparent-great-grandchild") + ), + "0.0625" = list( + c(0, "second cousins"), + c(1, "second cousins once removed"), + c(2, "second cousins twice removed"), + c(4, "great-great-grandparent-great-great-grandchild") + ), + "0.03125" = list( + c(0, "third cousins"), + c(1, "third cousins once removed"), + c(2, "third cousins twice removed") + ) + ) +} - # One generation apart - if (gen_abs_diff == 1) { - if (abs(r - 0.5) < tol) { - # Parent-child relationship - if (use_sex && !is.na(sex1) && !is.na(sex2)) { - return(get_parent_child_label(gen_diff, sex1, sex2, code_male, code_female)) - } - return("parent-child") - } else if (abs(r - 0.25) < tol) { - # Aunt/uncle - niece/nephew - if (use_sex && !is.na(sex1) && !is.na(sex2)) { - return(get_avuncular_label(gen_diff, sex1, sex2, code_male, code_female)) - } - return("aunt/uncle-niece/nephew") - } else if (abs(r - 0.125) < tol) { - return("first cousins once removed") - } else if (abs(r - 0.0625) < tol) { - return("second cousins once removed") + +#' Lookup relationship from table +#' +#' @param r Relatedness coefficient +#' @param gen_abs_diff Absolute generation difference +#' @param tol Tolerance for floating point comparison +#' @param lookup_table Lookup table created by create_relationship_lookup +#' +#' @return Relationship name or NA if not found +#' @keywords internal +lookup_relationship <- function(r, gen_abs_diff, tol, lookup_table) { + if (is.na(gen_abs_diff)) { + # Without generation info, only return coefficient-based label + for (r_key in names(lookup_table)) { + r_val <- as.numeric(r_key) + if (abs(r - r_val) < tol) { + return(sprintf("unknown (r=%.4g)", r)) } } - - # Two generations apart - if (gen_abs_diff == 2) { - if (abs(r - 0.25) < tol) { - if (use_sex && !is.na(sex1) && !is.na(sex2)) { - return(get_grandparent_label(gen_diff, sex1, sex2, code_male, code_female)) + return(NA_character_) + } + + # Search lookup table + for (r_key in names(lookup_table)) { + r_val <- as.numeric(r_key) + if (abs(r - r_val) < tol) { + # Found matching r value, now check generation difference + entries <- lookup_table[[r_key]] + for (entry in entries) { + if (entry[1] == gen_abs_diff) { + return(entry[2]) } - return("grandparent-grandchild") - } else if (abs(r - 0.125) < tol) { - return("first cousins twice removed") - } else if (abs(r - 0.0625) < tol) { - return("second cousins twice removed") } - } - - # Three generations apart - if (gen_abs_diff == 3) { - if (abs(r - 0.125) < tol) { - return("great-grandparent-great-grandchild") + # r matches but not the generation - could be a cousin n-times removed + # Use generalized cousin naming + if (r_val <= 0.125 && r_val >= 0.015625 && gen_abs_diff > 0) { + return(generalize_cousin_relationship(r_val, gen_abs_diff, tol)) } } + } + + NA_character_ +} + - # Four generations apart - if (gen_abs_diff == 4) { - if (abs(r - 0.0625) < tol) { - return("great-great-grandparent-great-great-grandchild") +#' Generalize cousin relationships for arbitrary degrees of removal +#' +#' @param r Relatedness coefficient +#' @param gen_abs_diff Absolute generation difference +#' @param tol Tolerance for comparison +#' +#' @return Generalized relationship name +#' @keywords internal +generalize_cousin_relationship <- function(r, gen_abs_diff, tol) { + # Determine cousin degree from relatedness coefficient + # First cousins: 0.125 = 2^-3 + # Second cousins: 0.0625 = 2^-4 + # Third cousins: 0.03125 = 2^-5 + # Fourth cousins: 0.015625 = 2^-6 + + degree_map <- c(0.125, 0.0625, 0.03125, 0.015625) + degree_names <- c("first", "second", "third", "fourth") + + for (i in seq_along(degree_map)) { + if (abs(r - degree_map[i]) < tol) { + if (gen_abs_diff == 0) { + return(paste(degree_names[i], "cousins")) + } else if (gen_abs_diff == 1) { + return(paste(degree_names[i], "cousins once removed")) + } else if (gen_abs_diff == 2) { + return(paste(degree_names[i], "cousins twice removed")) + } else { + times <- c("once", "twice", "thrice") + if (gen_abs_diff <= 3) { + times_word <- times[gen_abs_diff] + } else { + times_word <- paste(gen_abs_diff, "times") + } + return(paste(degree_names[i], "cousins", times_word, "removed")) } } } + + sprintf("unknown (r=%.4g)", r) +} - # Fallback: describe by coefficient only if we can't determine specific relationship - if (abs(r - 0.5) < tol) { - return("unknown (r=0.5)") - } else if (abs(r - 0.25) < tol) { - return("unknown (r=0.25)") - } else if (abs(r - 0.125) < tol) { - return("unknown (r=0.125)") - } else if (abs(r - 0.0625) < tol) { - return("unknown (r=0.0625)") - } else { - return(paste0("unknown (r=", round(r, 4), ")")) + +#' Get sex-specific relationship label +#' +#' @param base_rel Base relationship name +#' @param gen_diff Generation difference (gen2 - gen1) +#' @param sex1 Sex of person 1 +#' @param sex2 Sex of person 2 +#' @param code_male Code for male +#' @param code_female Code for female +#' +#' @return Sex-specific relationship name or NULL if not applicable +#' @keywords internal +get_sex_specific_label <- function(base_rel, gen_diff, sex1, sex2, code_male, code_female) { + # Only provide sex-specific labels for certain relationship types + if (base_rel == "parent-child") { + return(get_parent_child_label(gen_diff, sex1, sex2, code_male, code_female)) + } else if (base_rel == "aunt/uncle-niece/nephew") { + return(get_avuncular_label(gen_diff, sex1, sex2, code_male, code_female)) + } else if (base_rel == "grandparent-grandchild") { + return(get_grandparent_label(gen_diff, sex1, sex2, code_male, code_female)) } + + NULL +} + + +#' Classify a single relationship based on relatedness and generation (DEPRECATED) +#' +#' This function is kept for backward compatibility but is no longer used internally. +#' Use classify_relationships_vectorized instead. +#' +#' @param r Numeric. Additive relatedness coefficient +#' @param gen_diff Numeric. Generation difference (gen2 - gen1) +#' @param gen_abs_diff Numeric. Absolute generation difference +#' @param sex1 Sex of person 1 +#' @param sex2 Sex of person 2 +#' @param code_male Code for male +#' @param code_female Code for female +#' @param use_sex Logical. Whether to use sex-specific labels +#' +#' @return Character string describing the relationship +#' @keywords internal +classify_relationship <- function(r, gen_diff, gen_abs_diff, sex1, sex2, + code_male, code_female, use_sex) { + # Deprecated: Use classify_relationships_vectorized instead + # This wrapper is kept for backward compatibility + result <- classify_relationships_vectorized( + addRel = r, + gen_diff = gen_diff, + gen_abs_diff = gen_abs_diff, + sex1 = sex1, + sex2 = sex2, + code_male = code_male, + code_female = code_female, + use_sex = use_sex + ) + result[1] } @@ -362,41 +478,48 @@ classify_relationship <- function(r, gen_diff, gen_abs_diff, sex1, sex2, #' @return Character string #' @keywords internal get_parent_child_label <- function(gen_diff, sex1, sex2, code_male, code_female) { - # gen_diff > 0 means person 1 is parent (older generation) - # gen_diff < 0 means person 2 is parent (older generation) - - if (gen_diff > 0) { + # Create lookup key + parent_is_1 <- gen_diff > 0 + + # Lookup table: (parent_sex, child_sex) -> label + labels <- list( + parent_child = list( + list(code_female, code_female, "mother-daughter"), + list(code_female, code_male, "mother-son"), + list(code_male, code_female, "father-daughter"), + list(code_male, code_male, "father-son") + ), + child_parent = list( + list(code_female, code_female, "daughter-mother"), + list(code_male, code_female, "son-mother"), + list(code_female, code_male, "daughter-father"), + list(code_male, code_male, "son-father") + ) + ) + + if (parent_is_1) { # Person 1 is the parent - if (sex1 == code_female && sex2 == code_male) { - return("mother-son") - } else if (sex1 == code_female && sex2 == code_female) { - return("mother-daughter") - } else if (sex1 == code_male && sex2 == code_male) { - return("father-son") - } else if (sex1 == code_male && sex2 == code_female) { - return("father-daughter") - } else if (sex1 == code_female) { - return("mother-child") - } else if (sex1 == code_male) { - return("father-child") + for (combo in labels$parent_child) { + if (sex1 == combo[[1]] && sex2 == combo[[2]]) { + return(combo[[3]]) + } } + # Fallback with partial info + if (sex1 == code_female) return("mother-child") + if (sex1 == code_male) return("father-child") } else { # Person 2 is the parent - if (sex2 == code_female && sex1 == code_male) { - return("son-mother") - } else if (sex2 == code_female && sex1 == code_female) { - return("daughter-mother") - } else if (sex2 == code_male && sex1 == code_male) { - return("son-father") - } else if (sex2 == code_male && sex1 == code_female) { - return("daughter-father") - } else if (sex2 == code_female) { - return("child-mother") - } else if (sex2 == code_male) { - return("child-father") + for (combo in labels$child_parent) { + if (sex1 == combo[[1]] && sex2 == combo[[2]]) { + return(combo[[3]]) + } } + # Fallback with partial info + if (sex2 == code_female) return("child-mother") + if (sex2 == code_male) return("child-father") } - return("parent-child") + + "parent-child" } @@ -411,30 +534,33 @@ get_parent_child_label <- function(gen_diff, sex1, sex2, code_male, code_female) #' @return Character string #' @keywords internal get_avuncular_label <- function(gen_diff, sex1, sex2, code_male, code_female) { - if (gen_diff > 0) { - # Person 1 is the aunt/uncle - if (sex1 == code_female && sex2 == code_male) { - return("aunt-nephew") - } else if (sex1 == code_female && sex2 == code_female) { - return("aunt-niece") - } else if (sex1 == code_male && sex2 == code_male) { - return("uncle-nephew") - } else if (sex1 == code_male && sex2 == code_female) { - return("uncle-niece") - } - } else { - # Person 2 is the aunt/uncle - if (sex2 == code_female && sex1 == code_male) { - return("nephew-aunt") - } else if (sex2 == code_female && sex1 == code_female) { - return("niece-aunt") - } else if (sex2 == code_male && sex1 == code_male) { - return("nephew-uncle") - } else if (sex2 == code_male && sex1 == code_female) { - return("niece-uncle") + # Lookup table approach + elder_is_1 <- gen_diff > 0 + + labels <- list( + elder_younger = list( + list(code_female, code_female, "aunt-niece"), + list(code_female, code_male, "aunt-nephew"), + list(code_male, code_female, "uncle-niece"), + list(code_male, code_male, "uncle-nephew") + ), + younger_elder = list( + list(code_female, code_female, "niece-aunt"), + list(code_male, code_female, "nephew-aunt"), + list(code_female, code_male, "niece-uncle"), + list(code_male, code_male, "nephew-uncle") + ) + ) + + label_list <- if (elder_is_1) labels$elder_younger else labels$younger_elder + + for (combo in label_list) { + if (sex1 == combo[[1]] && sex2 == combo[[2]]) { + return(combo[[3]]) } } - return("aunt/uncle-niece/nephew") + + "aunt/uncle-niece/nephew" } @@ -449,36 +575,40 @@ get_avuncular_label <- function(gen_diff, sex1, sex2, code_male, code_female) { #' @return Character string #' @keywords internal get_grandparent_label <- function(gen_diff, sex1, sex2, code_male, code_female) { - if (gen_diff > 0) { - # Person 1 is the grandparent - if (sex1 == code_female && sex2 == code_male) { - return("grandmother-grandson") - } else if (sex1 == code_female && sex2 == code_female) { - return("grandmother-granddaughter") - } else if (sex1 == code_male && sex2 == code_male) { - return("grandfather-grandson") - } else if (sex1 == code_male && sex2 == code_female) { - return("grandfather-granddaughter") - } else if (sex1 == code_female) { - return("grandmother-grandchild") - } else if (sex1 == code_male) { - return("grandfather-grandchild") + # Lookup table approach + elder_is_1 <- gen_diff > 0 + + labels <- list( + grand_grand = list( + list(code_female, code_female, "grandmother-granddaughter"), + list(code_female, code_male, "grandmother-grandson"), + list(code_male, code_female, "grandfather-granddaughter"), + list(code_male, code_male, "grandfather-grandson") + ), + grand_grand_reverse = list( + list(code_female, code_female, "granddaughter-grandmother"), + list(code_male, code_female, "grandson-grandmother"), + list(code_female, code_male, "granddaughter-grandfather"), + list(code_male, code_male, "grandson-grandfather") + ) + ) + + label_list <- if (elder_is_1) labels$grand_grand else labels$grand_grand_reverse + + for (combo in label_list) { + if (sex1 == combo[[1]] && sex2 == combo[[2]]) { + return(combo[[3]]) } + } + + # Fallback with partial info + if (elder_is_1) { + if (sex1 == code_female) return("grandmother-grandchild") + if (sex1 == code_male) return("grandfather-grandchild") } else { - # Person 2 is the grandparent - if (sex2 == code_female && sex1 == code_male) { - return("grandson-grandmother") - } else if (sex2 == code_female && sex1 == code_female) { - return("granddaughter-grandmother") - } else if (sex2 == code_male && sex1 == code_male) { - return("grandson-grandfather") - } else if (sex2 == code_male && sex1 == code_female) { - return("granddaughter-grandfather") - } else if (sex2 == code_female) { - return("grandchild-grandmother") - } else if (sex2 == code_male) { - return("grandchild-grandfather") - } + if (sex2 == code_female) return("grandchild-grandmother") + if (sex2 == code_male) return("grandchild-grandfather") } - return("grandparent-grandchild") + + "grandparent-grandchild" }