diff --git a/DESCRIPTION b/DESCRIPTION index 2a5e2b9..83b4455 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: TFBlearner Title: Functionality for training TF-specific classifiers to predict TF bindings based on ATAC-seq data. -Version: 0.0.1.0001 +Version: 0.0.1.1000 Authors@R: person("Emanuel", "Sonder", , "emanuel.sonder@hest.ethz.ch", role = c("aut", "cre"), comment = c(ORCID = "0000-0003-4788-9508")) diff --git a/R/contextFeatures.R b/R/contextFeatures.R index 5065929..ae3cc10 100644 --- a/R/contextFeatures.R +++ b/R/contextFeatures.R @@ -119,7 +119,7 @@ saveHdf5, outDir){ data.table::setDTthreads(threads) - x <- as.integer(round(1000*cor(atacMat, assay(cvSe, NORMDEVASSAY)[motif,]))) + x <- as.integer(round(scaleFactAct*cor(atacMat, assay(cvSe, NORMDEVASSAY)[motif,]))) q <- as.integer(round(quantile(x, prob=c(0,0.1,0.2,0.8,0.9,1), na.rm=TRUE))) x[abs(x)= thr[matchCoScores@j + 1] & - matchCoScores@x>=addThr*scalFactMotif] <- 1 + matchCoScores@x[matchCoScores@x < thr[matchCoScores@j + 1]] <- 0 + matchCoScores@x[matchCoScores@x >= thr[matchCoScores@j + 1]] <- 1 + matchCoScores <- drop0(matchCoScores) # get mutually exclusive motif scores zeroInd <- which(matchSubScores==0, arr.ind = TRUE) @@ -168,28 +166,34 @@ x=rep(1, nrow(zeroInd)), dims=c(nrow(matchSubScores), ncol(matchSubScores))) + colnames(matchExScores) <- colnames(matchSubScores) # jaccard index of mutually exclusive and top co-occuring motifs labels <- matrix(labels, nrow=length(labels), ncol=1) matchCo <- .jaccard(matchCoScores, labels) - matchCo[,motif_id:=1:.N] setorder(matchCo, -cont) - topCoMotif <- matchCo$motif_id[1:nMotifs] + topCoMotif <- matchCo$set1_col[1:nMotifs] matchEx <- .jaccard(matchExScores, labels) - matchEx[,motif_id:=1:.N] setorder(matchEx, -cont) - topExMotif <- matchEx$motif_id[1:nMotifs] + topExMotif <- matchEx$set1_col[1:nMotifs] - topExMotif <- intersect(topExMotif, colnames(matchScores)) - topCoMotif <- intersect(topCoMotif, colnames(matchScores)) selectedMotifs <- c(topCoMotif, topExMotif) if(length(selectedMotifs)>0){ - names(selectedMotifs) <- c(paste(COMOTIFAFFIX, 1:length(topCoMotif), sep="_"), - paste(EXMOTIFAFFIX, 1:length(topExMotif), sep="_"))} - - # can happen if the motif-matches matrix has less columns than motifs to select - selectedMotifs <- unique(selectedMotifs[!is.na(selectedMotifs)]) + if(length(topCoMotif)>0){ + namesCo <- paste(COMOTIFAFFIX, 1:length(topCoMotif), sep="_") + } + else{ + namesCo <- NULL + } + if(length(topExMotif)>0){ + namesEx <- paste(EXMOTIFAFFIX, 1:length(topExMotif), sep="_") + } + else{ + namesEx <- NULL + } + names(selectedMotifs) <- c(namesCo, namesEx) + } return(selectedMotifs) } @@ -470,8 +474,7 @@ tfFeatures <- function(mae, names(tfSimMotifCols) <- paste(PRIORMOTIFPREFIX, 1:length(tfSimMotifCols), sep="_")} - tfCofactorCols <- unique(grep(paste(tfCofactors,collapse="|"), - motifNames, value=TRUE)) + tfCofactorCols <- intersect(tfCofactors, motifNames) if(length(tfCofactorCols)>0){ names(tfCofactorCols) <- paste(TFCOFACTORMOTIFPREFIX, 1:length(tfCofactorCols), sep="_")} @@ -511,7 +514,7 @@ tfFeatures <- function(mae, maxScores <- colDataMotifs[[MAXSCORECOL]] selMotifs <- .selectMotifs(matchScores, maxScores, labels, nMotifs=nMotifs) if(length(selMotifs)>0){ - names(selMotifs) <- paste0(SELMOTIFPREFIX, names(selMotifs))} + names(selMotifs) <- paste(SELMOTIFPREFIX, names(selMotifs), sep=".")} } else{ selMotifs <- NULL @@ -526,8 +529,7 @@ tfFeatures <- function(mae, names(tfSimMotifCols) <- paste(PRIORMOTIFPREFIX, 1:length(tfSimMotifCols), sep="_")} - tfCofactorCols <- unique(grep(paste(tfCofactors,collapse="|"), - actMotifNames, value=TRUE)) + tfCofactorCols <- intersect(tfCofactors, actMotifNames) if(length(tfCofactorCols)>0){ names(tfCofactorCols) <- paste(TFCOFACTORMOTIFPREFIX, 1:length(tfCofactorCols), sep="_")} @@ -558,8 +560,8 @@ tfFeatures <- function(mae, # select motifs co-occuring around ChIP-peaks or motif matches of TF of interest actAssoc <- assays(mae[[ASSOCEXP]])[[ASSOCASSAY]] actAssoc <- actAssoc[,!c(colnames(actAssoc) %in% priorMotifCols), drop=FALSE] - selActMotifs <- .selectMotifs(actAssoc, rep(1, ncol(actAssoc)), labels, - addThr=0, nMotifs=nMotifs) + selActMotifs <- .selectMotifs(actAssoc, rep(1*scaleFactAct, ncol(actAssoc)), + labels, nMotifs=nMotifs) if(length(selActMotifs)>0){ names(selActMotifs) <- paste0(SELMOTIFPREFIX, names(selActMotifs))} }