diff --git a/resources/missingnessHeatMaps.R b/resources/missingnessHeatMaps.R index d7b5117..388151b 100644 --- a/resources/missingnessHeatMaps.R +++ b/resources/missingnessHeatMaps.R @@ -98,7 +98,7 @@ if (length(vcfFiles) > 1) { matrixForBinding <- makeMatrix(vcfFiles[i], printStats = 0) # Make sure that the full and intermediate matrices line up - if (!all.equal(colnames(matrixForBinding), colnames(superMatrix))) { + if (!identical(colnames(matrixForBinding), colnames(superMatrix))) { stop("Col names don't match for ", args[1], " and ", args[i], ": Exiting now", call.=TRUE) } @@ -137,15 +137,23 @@ for (i in 1:length(vcfFiles)) { snpgdsVCF2GDS(vcfFiles[i], gdsOut) gdsInter <- snpgdsOpen(gdsOut) - # Compute the dendrogram: - ibsInter <- snpgdsHCluster(snpgdsIBS(gdsInter, num.thread=2)) + + # Compute IBS matrix + ibsInter.matOnly <- snpgdsIBS(gdsInter, num.thread=2, autosome.only=FALSE) + + # If some values in IBS matrix are NA, change them to average IBS + ibs.ave <- mean(ibsInter.matOnly$ibs, na.rm=TRUE) + ibsInter.matOnly$ibs[is.na(ibsInter.matOnly$ibs)] <- ibs.ave + + # Compute the dendrogram + ibsInter <- snpgdsHCluster(ibsInter.matOnly) # Make the matrix missingMatrix <- makeMatrix(vcfFiles[i], printStats = 0) heatmapFile = paste(gsub(".vcf","", vcfFiles[i], fixed=T), '.heatmap', sep="") # Print out the correlation coefficient of missingness as a function of relatedness: - missGenCorrelation <- cor(c(missingMatrix[lower.tri(missingMatrix, diag=F)]), c(ibsInter$dist[lower.tri(ibsInter$dist, diag = F)]), method="pearson") + missGenCorrelation <- cor(c(missingMatrix[lower.tri(missingMatrix, diag=F)]), c(ibsInter$dist[lower.tri(ibsInter$dist, diag = F)]), method="pearson", use="pairwise.complete.obs") cat("Correlation (PCC) between missingness and genetic similarity for ", vcfFiles[i], ": ", missGenCorrelation, "\n", sep="") # Plot the heatmaps