diff --git a/DESCRIPTION b/DESCRIPTION index 5bbf21f..52693ae 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: TFBlearner Title: Functionality for training TF-specific classifiers to predict TF bindings based on ATAC-seq data. -Version: 0.1.0 +Version: 0.1.1 Authors@R: person("Emanuel", "Sonder", , "emanuel.sonder@hest.ethz.ch", role = c("aut", "cre"), comment = c(ORCID = "0000-0003-4788-9508")) diff --git a/R/getFeatureMatrix.R b/R/getFeatureMatrix.R index b6f1058..9d277cf 100644 --- a/R/getFeatureMatrix.R +++ b/R/getFeatureMatrix.R @@ -178,8 +178,8 @@ getFeatureMatrix <- function(mae, } # get the cofactors - tfCofactors <- unique(unlist(subset(colData(mae[[TFFEAT]]), - get(TFNAMECOL)==tf)[[TFCOFACTORSCOL]])) + tfCofactors <- unlist(subset(colData(mae[[TFFEAT]]), + get(TFNAMECOL)==tf)[[TFCOFACTORSCOL]]) message("Attaching Site & TF-Features") selMotifs <- subset(colData(mae[[TFFEAT]]), diff --git a/R/listFeatures.R b/R/listFeatures.R index 27462e3..042b674 100644 --- a/R/listFeatures.R +++ b/R/listFeatures.R @@ -22,7 +22,9 @@ #' @export listFeatures <- function(){ motifPrefices <- c(PRIORMOTIFPREFIX, TFMOTIFPREFIX, TFCOFACTORMOTIFPREFIX, - SELMOTIFPREFIX, CTCFMOTIFPREFIX) + paste(SELMOTIFPREFIX, COMOTIFAFFIX, sep="."), + paste(SELMOTIFPREFIX, EXMOTIFAFFIX, sep="."), + CTCFMOTIFPREFIX) featList <- data.table(feature_name=c("Sequence", "Width", "Annot", diff --git a/R/tfFeatures.R b/R/tfFeatures.R index dac85ee..501ddf0 100644 --- a/R/tfFeatures.R +++ b/R/tfFeatures.R @@ -142,7 +142,7 @@ maxScores, labels, nMotifs=10, - subSample=10000) + subSample=2e5) { labels <- .binMat(labels, threshold=0L) labels <- .marginMax(labels, margin="row") @@ -200,9 +200,7 @@ .getCofactorBindings <- function(chIPMat, tfCofactors){ tfCols <- unlist(tstrsplit(colnames(chIPMat), split="_", keep=2)) - namesSub <- names(tfCofactors)[which(tfCofactors %in% tfCols)] - tfCofactors <- intersect(tfCols, tfCofactors) - names(tfCofactors) <- namesSub + tfCofactors <- tfCofactors[tfCofactors %in% tfCols] if(length(tfCofactors)>0){ cofactBindings <- lapply(tfCofactors, function(tfCol){ @@ -210,7 +208,8 @@ colnames(cofactBinding) <- paste(COBINDFEATNAME, tfCol, sep=".") cofactBinding}) names(cofactBindings) <- paste(COBINDFEATNAME, - gsub(MOTIFAFFIX, "", namesSub), sep="_") + gsub(MOTIFAFFIX, "", names(tfCofactors)), + sep="_") return(cofactBindings)} else{ return(NULL) @@ -416,10 +415,12 @@ tfFeatures <- function(mae, if("Cofactor_Binding" %in% features){ message("Cofactor Bindings") if(is.null(tfCofactors)){ - stop("Please provide cofactor names (`tfCofactors`) if Cofactor_Bindings should be computed.")} - cofactBindings <- .getCofactorBindings(chIPMat, tfCofactors) - if(!is.null(cofactBindings)){ - featMats <- append(featMats, cofactBindings) + warning("Please provide cofactor names (`tfCofactors`) if Cofactor_Bindings should be computed.") + }else{ + cofactBindings <- .getCofactorBindings(chIPMat, tfCofactors) + if(!is.null(cofactBindings)){ + featMats <- append(featMats, cofactBindings) + } } } @@ -474,10 +475,7 @@ tfFeatures <- function(mae, names(tfSimMotifCols) <- paste(PRIORMOTIFPREFIX, 1:length(tfSimMotifCols), sep="_")} - tfCofactorCols <- intersect(tfCofactors, motifNames) - if(length(tfCofactorCols)>0){ - names(tfCofactorCols) <- paste(TFCOFACTORMOTIFPREFIX, - 1:length(tfCofactorCols), sep="_")} + tfCofactorCols <- tfCofactors[tfCofactors %in% motifNames] tfMotifCols <- intersect(tfName, motifNames) if(length(tfMotifCols)>0){ names(tfMotifCols) <- paste(TFMOTIFPREFIX, 1:length(tfMotifCols), sep="_")} @@ -529,10 +527,7 @@ tfFeatures <- function(mae, names(tfSimMotifCols) <- paste(PRIORMOTIFPREFIX, 1:length(tfSimMotifCols), sep="_")} - tfCofactorCols <- intersect(tfCofactors, actMotifNames) - if(length(tfCofactorCols)>0){ - names(tfCofactorCols) <- paste(TFCOFACTORMOTIFPREFIX, - 1:length(tfCofactorCols), sep="_")} + tfCofactorCols <- tfCofactors[tfCofactors %in% actMotifNames] tfMotifCols <- intersect(tfName, actMotifNames) if(length(tfMotifCols)>0){ names(tfMotifCols) <- paste(TFMOTIFPREFIX, 1:length(tfMotifCols), sep="_")} @@ -563,7 +558,7 @@ tfFeatures <- function(mae, selActMotifs <- .selectMotifs(actAssoc, rep(1*scaleFactAct, ncol(actAssoc)), labels, nMotifs=nMotifs) if(length(selActMotifs)>0){ - names(selActMotifs) <- paste0(SELMOTIFPREFIX, names(selActMotifs))} + names(selActMotifs) <- paste(SELMOTIFPREFIX, names(selActMotifs), sep=".")} } else{ selActMotifs <- NULL diff --git a/R/trainTfModel.R b/R/trainTfModel.R index b64472e..fe9ee0b 100644 --- a/R/trainTfModel.R +++ b/R/trainTfModel.R @@ -560,6 +560,17 @@ stop(paste("Feature matrix has been computed for", fmTfName, "and not for", tfName)) } + # check for cellular contexts containing no-positive labels + labelCountDt <- as.data.table(table(isPos=assays(fm)$features[,LABELCOLNAME]>0, + context=rowData(fm)[[annoCol]])) + labelCountDt[,isPos:=as.logical(isPos)] + zeroContexts <- subset(labelCountDt, isPos & N==0)$context + if(length(zeroContexts)>0){ + warning(paste("Context(s):", paste(zeroContexts, collapse=","), + "contain(s) no positive labels - removed from training data")) + fm <- fm[!(rowData(fm)[[annoCol]] %in% zeroContexts),] + } + # sample stacked chrs rangesFm <- unique(rowRanges(fm)) mcols(rangesFm) <- NULL diff --git a/tests/testthat/test-getFeatureMatrix.R b/tests/testthat/test-getFeatureMatrix.R index 8c7fd95..768dd8a 100644 --- a/tests/testthat/test-getFeatureMatrix.R +++ b/tests/testthat/test-getFeatureMatrix.R @@ -93,7 +93,9 @@ test_that("Feature Matrix: Correct context selection - only for specified contex "A549") expect_equal(obsRanges, expRanges) expect_equal(metadata(fm)[[annoCol]], "A549") - expect_equal(metadata(fm)[[TFCOFACTORSCOL]], "JUN") + expCofactors <- c("JUN") + names(expCofactors) <- c("tfCofactorMotif_1") + expect_equal(metadata(fm)[[TFCOFACTORSCOL]], expCofactors) }) test_that("Feature Matrix: Correct metadata assignment", { diff --git a/tests/testthat/test-tfFeatures.R b/tests/testthat/test-tfFeatures.R index 9edadb0..c4f960b 100644 --- a/tests/testthat/test-tfFeatures.R +++ b/tests/testthat/test-tfFeatures.R @@ -60,3 +60,9 @@ test_that("Preselected motifs are saved in colData", { expect_true(is.vector(preSelActMotifs)) expect_equal(preSelActMotifs[[paste(TFMOTIFPREFIX, 1, sep="_")]], "JUN") }) + +test_that("No cofactors provided for co-binding features", { + experiments(maeTest2)[[TFFEAT]] <- NULL + expect_warning(tfFeatures(maeTest2, tfName="JUN", tfCofactors=NULL), + regexp="provide cofactor names") +}) diff --git a/tests/testthat/test-trainTfModel.R b/tests/testthat/test-trainTfModel.R index 43e9d53..a510d1c 100644 --- a/tests/testthat/test-trainTfModel.R +++ b/tests/testthat/test-trainTfModel.R @@ -337,3 +337,12 @@ test_that("Sampling of additional points for training unweighted",{ expect_equal(sum(labels[trainSet]==1)/length(trainSet), posFracExp, tolerance=0.01) }) + +test_that("Context containing no positive labels",{ + assays(fmTest)$features[rowData(fmTest)$context=="A549", LABELCOLNAME] <- 0 + expect_no_error(trainTfModel(tfName="CTCF", fmTest, evalRounds=1)) + expect_warning(trainTfModel(tfName="CTCF", fmTest, evalRounds=1), + regexp="A549.*no positive labels") +}) + +# Context(s): A549 contain(s) no positive labels - removed from training data