From 976953ab605ec38e1186b43654e3120b97835732 Mon Sep 17 00:00:00 2001 From: Vinicius Junqueira Date: Fri, 11 Jul 2025 14:18:20 -0300 Subject: [PATCH] adding new fQC function --- NAMESPACE | 1 + R/utils_fQC.R | 36 ++++++++++++++++++++++++++++++++++++ man/check.snp.hwe.chi2.Rd | 36 ++++++++++++++++++++++++++++++++++++ 3 files changed, 73 insertions(+) create mode 100644 man/check.snp.hwe.chi2.Rd diff --git a/NAMESPACE b/NAMESPACE index 5f52552..1418f51 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -13,6 +13,7 @@ export(check.sample.call.rate) export(check.sample.heterozygosity) export(check.snp.chromo) export(check.snp.hwe) +export(check.snp.hwe.chi2) export(check.snp.maf) export(check.snp.mgf) export(check.snp.monomorf) diff --git a/R/utils_fQC.R b/R/utils_fQC.R index 225af25..19f2083 100644 --- a/R/utils_fQC.R +++ b/R/utils_fQC.R @@ -631,3 +631,39 @@ get.hwe.chi2 <- function(snp.summary) { pvalues <- pchisq(chi2stat, df = 1, lower.tail = FALSE) return(pvalues) } + +#' Check SNPs for Hardy-Weinberg equilibrium deviation using chi-square p-values +#' +#' This function identifies SNP markers whose Hardy-Weinberg equilibrium (HWE) chi-square p-values +#' indicate significant deviation beyond a specified threshold. It uses the p-values computed by +#' \code{get.hwe.chi2} on the input summary data frame. +#' +#' @param snp.summary A data frame or matrix containing summary statistics for SNP markers. +#' The row names should correspond to SNP identifiers. It must be compatible with +#' the function \code{get.hwe.chi2}. +#' @param max.dev A numeric value specifying the maximum acceptable p-value threshold. +#' SNPs with p-values below this threshold are considered as deviating from HWE. +#' +#' @return A character vector of SNP identifiers (rownames) that fail the HWE test (p-value < \code{max.dev}). +#' If no SNPs fail, an empty vector is returned. +#' +#' @details Any SNP with missing p-value (NA) is treated as not failing (returned as FALSE). +#' +#' @seealso \code{\link{get.hwe.chi2}} +#' +#' @examples +#' # Example usage (assuming snp.summary is precomputed and get.hwe.chi2 is defined) +#' # snps_failed <- check.snp.hwe.chi2(snp.summary, max.dev = 0.05) +#' +#' @export +check.snp.hwe.chi2 <- function (snp.summary, max.dev) +{ + pvalues <- get.hwe.chi2(snp.summary) + result <- pvalues < max.dev + result[is.na(result)] <- FALSE + snps <- NULL + if (sum(result) > 0) { + snps <- rownames(snp.summary[result, ]) + } + return(snps) +} diff --git a/man/check.snp.hwe.chi2.Rd b/man/check.snp.hwe.chi2.Rd new file mode 100644 index 0000000..53ac0c1 --- /dev/null +++ b/man/check.snp.hwe.chi2.Rd @@ -0,0 +1,36 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/utils_fQC.R +\name{check.snp.hwe.chi2} +\alias{check.snp.hwe.chi2} +\title{Check SNPs for Hardy-Weinberg equilibrium deviation using chi-square p-values} +\usage{ +check.snp.hwe.chi2(snp.summary, max.dev) +} +\arguments{ +\item{snp.summary}{A data frame or matrix containing summary statistics for SNP markers. +The row names should correspond to SNP identifiers. It must be compatible with +the function \code{get.hwe.chi2}.} + +\item{max.dev}{A numeric value specifying the maximum acceptable p-value threshold. +SNPs with p-values below this threshold are considered as deviating from HWE.} +} +\value{ +A character vector of SNP identifiers (rownames) that fail the HWE test (p-value < \code{max.dev}). + If no SNPs fail, an empty vector is returned. +} +\description{ +This function identifies SNP markers whose Hardy-Weinberg equilibrium (HWE) chi-square p-values +indicate significant deviation beyond a specified threshold. It uses the p-values computed by +\code{get.hwe.chi2} on the input summary data frame. +} +\details{ +Any SNP with missing p-value (NA) is treated as not failing (returned as FALSE). +} +\examples{ +# Example usage (assuming snp.summary is precomputed and get.hwe.chi2 is defined) +# snps_failed <- check.snp.hwe.chi2(snp.summary, max.dev = 0.05) + +} +\seealso{ +\code{\link{get.hwe.chi2}} +}