From b2b353e01a63ba496563a2c7f133fd71e4978953 Mon Sep 17 00:00:00 2001 From: Shraddha Pai Date: Thu, 23 Sep 2021 10:50:31 -0400 Subject: [PATCH 01/56] version bump. --- DESCRIPTION | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/DESCRIPTION b/DESCRIPTION index b03de76d..4e136881 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: netDx Title: Network-based patient classifier -Version: 1.5.3 +Version: 1.5.4 Authors@R: c(person("Shraddha", "Pai", email = "shraddha.pai@utoronto.ca", role = c("aut", "cre"), From 8485691abf29433bbfbbf0723bdbb5a0de5bfab1 Mon Sep 17 00:00:00 2001 From: Shraddha Pai Date: Thu, 23 Sep 2021 11:45:25 -0400 Subject: [PATCH 02/56] build docker --- .github/workflows/push-docker.yml | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) create mode 100644 .github/workflows/push-docker.yml diff --git a/.github/workflows/push-docker.yml b/.github/workflows/push-docker.yml new file mode 100644 index 00000000..08e1a385 --- /dev/null +++ b/.github/workflows/push-docker.yml @@ -0,0 +1,21 @@ +name: Docker Build + +on: + push: + branches: [ master ] + +jobs: + build: + + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v2 + name: Check out code + + - uses: mr-smithers-excellent/docker-build-push@v5 + name: Build and push Docker image + with: + image: shraddhapai/netdx_devenv + registry: docker.io + username: ${{ secrets.DOCKER_USERNAME }} + password: ${{ secrets.DOCKER_PASSWORD }} \ No newline at end of file From 3452dbdcc855bb1ab01c583cb427adfa9cb7b52a Mon Sep 17 00:00:00 2001 From: Shraddha Pai Date: Thu, 23 Sep 2021 11:45:43 -0400 Subject: [PATCH 03/56] docker build --- .github/workflows/push-docker.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/push-docker.yml b/.github/workflows/push-docker.yml index 08e1a385..c2ff8426 100644 --- a/.github/workflows/push-docker.yml +++ b/.github/workflows/push-docker.yml @@ -8,6 +8,7 @@ jobs: build: runs-on: ubuntu-latest + steps: - uses: actions/checkout@v2 name: Check out code From 406f5cbcd01b2cfee42e05cb7a7f61e295eb8df0 Mon Sep 17 00:00:00 2001 From: Shraddha Pai Date: Thu, 23 Sep 2021 12:03:24 -0400 Subject: [PATCH 04/56] readme update --- README.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/README.md b/README.md index ec10561b..586b32ca 100644 --- a/README.md +++ b/README.md @@ -1,3 +1,5 @@ +### Main repo for netDx dev work as of Sep 2021. + netDx is a general-purpose algorithm for building patient classifiers by using patient similarity networks as features. It excels at interpretability and handling missing data. It also allows custom grouping rules for features, notably grouping genes into pathways. It integrates with RCy3 for network visualization of predictive pathways. As of February 2020, netDx is available via the BioConductor repository. From e155bea048659d2ec9fe1288d86a6809598ef365 Mon Sep 17 00:00:00 2001 From: Shraddha Pai Date: Thu, 23 Sep 2021 13:57:42 -0400 Subject: [PATCH 05/56] separated bioc-check and docker push --- .github/workflows/check-bioc.yml | 12 +----------- .github/workflows/push-docker.yml | 4 +++- DESCRIPTION | 2 +- 3 files changed, 5 insertions(+), 13 deletions(-) diff --git a/.github/workflows/check-bioc.yml b/.github/workflows/check-bioc.yml index a21d18af..462ffbe8 100644 --- a/.github/workflows/check-bioc.yml +++ b/.github/workflows/check-bioc.yml @@ -292,14 +292,4 @@ jobs: uses: actions/upload-artifact@master with: name: ${{ runner.os }}-biocversion-devel-r-4.1-results - path: check - - - name: Build & push Docker image v5 - if: runner.os == 'Linux' - uses: mr-smithers-excellent/docker-build-push@v5 - with: - image: shraddhapai/netdx_devenv - tags: latest - registry: docker.io - username: ${{ secrets.DOCKER_USERNAME }} - password: ${{ secrets.DOCKER_PASSWORD }} + path: check \ No newline at end of file diff --git a/.github/workflows/push-docker.yml b/.github/workflows/push-docker.yml index c2ff8426..7a9aed3e 100644 --- a/.github/workflows/push-docker.yml +++ b/.github/workflows/push-docker.yml @@ -16,7 +16,9 @@ jobs: - uses: mr-smithers-excellent/docker-build-push@v5 name: Build and push Docker image with: - image: shraddhapai/netdx_devenv + image: realpailab/netdx registry: docker.io + addLatest: 'true' + addTimestamp: 'true' username: ${{ secrets.DOCKER_USERNAME }} password: ${{ secrets.DOCKER_PASSWORD }} \ No newline at end of file diff --git a/DESCRIPTION b/DESCRIPTION index 4e136881..267fa1c5 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: netDx Title: Network-based patient classifier -Version: 1.5.4 +Version: 1.5.5 Authors@R: c(person("Shraddha", "Pai", email = "shraddha.pai@utoronto.ca", role = c("aut", "cre"), From 30a4db8692c8f918f7352522e086bffdeb0f8f8c Mon Sep 17 00:00:00 2001 From: Shraddha Pai Date: Thu, 23 Sep 2021 17:49:40 -0400 Subject: [PATCH 06/56] easier PSN constructions - draft code. --- R/buildPredictor.R | 1 + R/similarities.R | 124 +++++++++++++++++++++++++ man/allowedSims.Rd | 11 +++ man/buildPredictor.Rd | 1 + man/buildPredictor_sparseGenetic.Rd | 2 +- man/checkSimValid.Rd | 17 ++++ man/psn__builtIn.Rd | 14 +++ man/psn__corr.Rd | 14 +++ man/psn__custom.Rd | 14 +++ tests/testthat/test_buildpredictor.R | 1 + tests/testthat/test_psn_construction.R | 10 ++ 11 files changed, 208 insertions(+), 1 deletion(-) create mode 100644 man/allowedSims.Rd create mode 100644 man/checkSimValid.Rd create mode 100644 man/psn__builtIn.Rd create mode 100644 man/psn__corr.Rd create mode 100644 man/psn__custom.Rd create mode 100644 tests/testthat/test_psn_construction.R diff --git a/R/buildPredictor.R b/R/buildPredictor.R index a84fcebb..f197f7b5 100644 --- a/R/buildPredictor.R +++ b/R/buildPredictor.R @@ -172,6 +172,7 @@ #' # outDir=paste(tempdir(),"pred_output",sep=getFileSep()), ## absolute path #' # numCores=16L,featScoreMax=2L, featSelCutoff=1L,numSplits=2L) buildPredictor <- function(dataList,groupList,outDir=tempdir(),makeNetFunc, + sims, featScoreMax=10L,trainProp=0.8,numSplits=10L,numCores,JavaMemory=4L, featSelCutoff=9L,keepAllData=FALSE,startAt=1L, preFilter=FALSE, impute=FALSE,preFilterGroups=NULL, imputeGroups=NULL,logging="default", diff --git a/R/similarities.R b/R/similarities.R index b4c988e0..5f94e012 100644 --- a/R/similarities.R +++ b/R/similarities.R @@ -206,3 +206,127 @@ avgNormDiff <- function(x) { sim <- sim/nrow(x) sim } + +#' built-in similarity functions +#' +allowedSims <- function(){ + return(c("pearson_corr","normDiff","avgNormDiff", + "sim.pearscale","sim.eucscale")) +} + + + +#' checks if provided similarity functions are valid. Returns error if not +#' +#' @param sims (list) keys are layer names, values are functions or characters (names of built-in similarity functions) +#' @return TRUE if all pass check. Else throws error. +checkSimValid <- function(sims){ + allowed <- allowedSims() + for (k in names(sims)){ + if (class(sims[[k]])!="function"){ + if (class(sims[[k]])!="character"){ + stop(paste("Invalid sims datatype. ", + "sims entries must be functions or keywords (characters) ", + "for built-in similarity functions.",sep="")) + } else { + if (!sims[[k]] %in% allowed){ + stop(paste( + sprintf("sims[[%s]] has invalid similarity type:",k), + sims[[k]],". ", + "Allowed values are: {%s}", + paste(allowed,collapse=","))) + } + } + } + } + return(TRUE) +} + + +makeNetFunc <- function(dataList, groupList, netDir, sims,...){ + settings <- list(dataList=dataList,groupList=groupList, + netDir=netDir,sims=sims) + netList <- c() + for (nm in names(sims)){ + csim <- sims[[nm]] + netList_cur <- NULL + + cur_set <- settings; + cur_set[["name"]] <- nm; cur_set[["similarity"]] <- csim + + if (!is.null(groupList[[nm]])){ + if (class(csim)=="function") {# custom function + netList_cur <- psn__custom(cur_set,csim,...) + } else if (csim == "pearson_corr") { + netList_cur <- psn__corr(cur_set,...) + } else { + netList_cur <- psn__builtIn(cur_set,...) + } + netList <- c(netList,netList_cur) + } + } + unlist(netList) +} + +#' make PSN for built-in similarity functions +#' +#' @param settings (list) from makeNetFunc +psn__builtIn <- function(settings,...){ +funcs <- list( + "normDiff"=normDiff, + "avgNormDiff"=avgNormDiff, + "sim.pearscale"=sim.pearscale, + "sim.eucscale"=sim.eucscale +) + + message(sprintf("Layer %s: Function %s",settings$name,settings$similarity)) + + nm <- settings$name + netList <- makePSN_NamedMatrix( + settings$dataList[[nm]], + rownames(settings$dataList[[nm]]), + settings$groupList[[nm]], + settings$netDir, + simMetric="custom", + customFunc=funcs[[settings$similarity]], # custom function + writeProfiles=FALSE, + sparsify=TRUE,... + ) + netList +} + +#' make PSN for custom similarity functions +#' +#' @param settings (list) from makeNetFunc +psn__custom <- function(settings,fn, ...){ + nm <- settings$name + message(sprintf("Layer %s: CUSTOM FUNCTION",settings$name)) + netList <- makePSN_NamedMatrix( + settings$dataList[[nm]], + rownames(settings$dataList[[nm]]), + settings$groupList[[nm]], + settings$netDir, + simMetric="custom",customFunc=fn, # custom function + writeProfiles=FALSE, + sparsify=TRUE,... + ) + netList +} + +#' wrapper for PSNs using Pearson correlation +#' +#' @param settings (list) from makeNetFunc +psn__corr <- function(settings,...){ + message(sprintf("Layer %s: PEARSON CORR",settings$name)) + nm <- settings$name + netList <- makePSN_NamedMatrix( + settings$dataList, + rownames(settings$dataList[[nm]]), ## names of measures (e.g. genes, CpGs) + settings$groupList[[nm]], ## how to group measures in that layer + settings$netDir, ## leave this as-is, netDx will figure out where this is. + verbose=FALSE, + writeProfiles=TRUE, ## use Pearson correlation-based similarity + ... + ) + return(netList) +} \ No newline at end of file diff --git a/man/allowedSims.Rd b/man/allowedSims.Rd new file mode 100644 index 00000000..06b08570 --- /dev/null +++ b/man/allowedSims.Rd @@ -0,0 +1,11 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/similarities.R +\name{allowedSims} +\alias{allowedSims} +\title{built-in similarity functions} +\usage{ +allowedSims() +} +\description{ +built-in similarity functions +} diff --git a/man/buildPredictor.Rd b/man/buildPredictor.Rd index b0480695..800224cf 100644 --- a/man/buildPredictor.Rd +++ b/man/buildPredictor.Rd @@ -9,6 +9,7 @@ buildPredictor( groupList, outDir = tempdir(), makeNetFunc, + sims, featScoreMax = 10L, trainProp = 0.8, numSplits = 10L, diff --git a/man/buildPredictor_sparseGenetic.Rd b/man/buildPredictor_sparseGenetic.Rd index 7dbfce64..e8729bbb 100644 --- a/man/buildPredictor_sparseGenetic.Rd +++ b/man/buildPredictor_sparseGenetic.Rd @@ -147,7 +147,7 @@ cnv_GR <- GRanges(pheno$seqnames,IRanges(pheno$start,pheno$end), ID=pheno$ID,LOCUS_NAMES=pheno$Gene_symbols) # get gene coordinates -geneURL <- paste("https://download.baderlab.org/netDx/", +geneURL <- paste("http://download.baderlab.org/netDx/", "supporting_data/refGene.hg18.bed",sep="") cache <- rappdirs::user_cache_dir(appname = "netDx") bfc <- BiocFileCache::BiocFileCache(cache,ask=FALSE) diff --git a/man/checkSimValid.Rd b/man/checkSimValid.Rd new file mode 100644 index 00000000..ca6626e2 --- /dev/null +++ b/man/checkSimValid.Rd @@ -0,0 +1,17 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/similarities.R +\name{checkSimValid} +\alias{checkSimValid} +\title{checks if provided similarity functions are valid. Returns error if not} +\usage{ +checkSimValid(sims) +} +\arguments{ +\item{sims}{(list) keys are layer names, values are functions or characters (names of built-in similarity functions)} +} +\value{ +TRUE if all pass check. Else throws error. +} +\description{ +checks if provided similarity functions are valid. Returns error if not +} diff --git a/man/psn__builtIn.Rd b/man/psn__builtIn.Rd new file mode 100644 index 00000000..25b0bf9e --- /dev/null +++ b/man/psn__builtIn.Rd @@ -0,0 +1,14 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/similarities.R +\name{psn__builtIn} +\alias{psn__builtIn} +\title{make PSN for built-in similarity functions} +\usage{ +psn__builtIn(settings, ...) +} +\arguments{ +\item{settings}{(list) from makeNetFunc} +} +\description{ +make PSN for built-in similarity functions +} diff --git a/man/psn__corr.Rd b/man/psn__corr.Rd new file mode 100644 index 00000000..9c3c52ec --- /dev/null +++ b/man/psn__corr.Rd @@ -0,0 +1,14 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/similarities.R +\name{psn__corr} +\alias{psn__corr} +\title{wrapper for PSNs using Pearson correlation} +\usage{ +psn__corr(settings, ...) +} +\arguments{ +\item{settings}{(list) from makeNetFunc} +} +\description{ +wrapper for PSNs using Pearson correlation +} diff --git a/man/psn__custom.Rd b/man/psn__custom.Rd new file mode 100644 index 00000000..92b7cb63 --- /dev/null +++ b/man/psn__custom.Rd @@ -0,0 +1,14 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/similarities.R +\name{psn__custom} +\alias{psn__custom} +\title{make PSN for custom similarity functions} +\usage{ +psn__custom(settings, fn, ...) +} +\arguments{ +\item{settings}{(list) from makeNetFunc} +} +\description{ +make PSN for custom similarity functions +} diff --git a/tests/testthat/test_buildpredictor.R b/tests/testthat/test_buildpredictor.R index 1678d8a2..b334896f 100644 --- a/tests/testthat/test_buildpredictor.R +++ b/tests/testthat/test_buildpredictor.R @@ -69,6 +69,7 @@ test_that("feature construction and compilation",{ # directory contains GENES.TXT, NETWORKS.TXT INTERACTIONS folder outDir <- tempdir() netDir <- sprintf("%s/tmp",outDir) + if (file.exists(netDir)) unlink(netDir,recursive=TRUE) dir.create(netDir) pheno_id <- setupFeatureDB(pheno,netDir) diff --git a/tests/testthat/test_psn_construction.R b/tests/testthat/test_psn_construction.R new file mode 100644 index 00000000..27a6ec96 --- /dev/null +++ b/tests/testthat/test_psn_construction.R @@ -0,0 +1,10 @@ + + +test_that("invalid sims is flagged",{ + + expect_equal(TRUE, checkSimValid(list(a="pearson_corr"))) + expect_identical(TRUE, checkSimValid(list(a="pearson_corr",b=function(x) 2+4))) + expect_identical(TRUE, checkSimValid(list(a="normDiff"))) + expect_error(checkSimValid(list(a="normDifff"))) + expect_error(checkSimValid(list(a=list(a=2)))) +}) \ No newline at end of file From 9b435f25cbd304dd9635487d5feadeeaece4fb26 Mon Sep 17 00:00:00 2001 From: Indy Ng Date: Mon, 27 Sep 2021 12:33:00 -0400 Subject: [PATCH 07/56] added function to convert list to MAE --- R/convertToMAE.R | 72 ++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 72 insertions(+) create mode 100644 R/convertToMAE.R diff --git a/R/convertToMAE.R b/R/convertToMAE.R new file mode 100644 index 00000000..884770eb --- /dev/null +++ b/R/convertToMAE.R @@ -0,0 +1,72 @@ +#' Wrapper that converts an input list into a MultiAssayExperiment object +#' +#' @details This function takes in a list of key-value pairs (keys: data types, +#' values: matrices/dataframes) and calls the necessary functions from the +#' MultiAssayExperiment and SummarizedExperiment packages to incorporate the +#' values from the input list into a MultiAssayExperiment object, transforming +#' the values according to the keys. +#' @param dataList (list) input key-value pairs (keys: data types, values: data in +#' the form of matrices/dataframes); must have a key-value pair that +#' corresponds to patient IDs/metadata labelled patientPheno. +#' @return MAE (MultiAssayExperiment) data from input list incorporated into a +#' MultiAssayExperiment object, compatible with further analysis using the +#' netDx algorithm. + + + +convertToMAE <- function(dataList) { + + # Check input data: + if (!class(dataList) == "list") { + stop("dataList must be a list. \n") + } + if (is.null(dataList$patientPheno)) { + stop("dataList must have patientPheno key-value pair.\n") + } + if (length(dataList) == 1) { + stop("dataList must have assay data to incorporate into a + MultiAssayExperiment object") + } + + # Note that a MultiAssayExperiment object requires an ExperimentList and + # colData (sampleMap optional if each assay uses the same colnames) + + # Possible elements for ExperimentList: + # - base::matrix (gene expression, microRNA, metabolomics, microbiome data) + # - SummarizedExperiment::SummarizedExperiment (same as matrix, but capable + # of storing additional assay-level metadata) + # - Biobase::ExpressionSet (legacy representation, use SummarizedExperiment) + # - SummarizedExperiment::RangedSummarizedExperiment (range-based datasets; + # gene expression, methylation, data types that refer to genomic positions) + # - RaggedExperiment::RaggedExperiment (range-based datasets; copy number and + # mutation data, measurements by genomic positions) + + # Assumes that patientPheno is a DataFrame (or coerceable to be a DataFrame) + colData <- dataList$patientPheno + + # Generate ExperimentList from input dataList + tmp <- NULL + track <- c() + datType <- names(dataList) + for (k in 1:length(dataList)) { + # For key-value pairs that aren't labelled patientPheno, transform into + # objects compatible with input into MultiAssayExperiment object + if (!(names(dataList[k]) == "patientPheno")) { + + # Remove duplicated columns (we keep the first column) in the assay data + if (sum(duplicated(colnames(dataList[[k]]))) != 0) { + dataList[[k]] <- dataList[[k]][,!duplicated(colnames(dataList[[k]]))] + } + + # Assumes that data is of matrix class + # *(maybe implement matrix conversion into SummarizedExperiment in future) + track <- c(track, k) + tmp <- c(tmp, list(dataList[[k]])) + } + } + names(tmp) <- datType[track] + + MAE <- MultiAssayExperiment(experiments = tmp, colData = colData) + + return(MAE) +} \ No newline at end of file From 051aa3e05fb1574ebcad906a4dcf0c96da30bf06 Mon Sep 17 00:00:00 2001 From: Indy Ng Date: Mon, 27 Sep 2021 12:35:14 -0400 Subject: [PATCH 08/56] uploading auto-generated Roxygen2 file documenting convertToMAE function in convertToMAE.Rd --- man/convertToMAE.Rd | 28 ++++++++++++++++++++++++++++ 1 file changed, 28 insertions(+) create mode 100644 man/convertToMAE.Rd diff --git a/man/convertToMAE.Rd b/man/convertToMAE.Rd new file mode 100644 index 00000000..edc68fda --- /dev/null +++ b/man/convertToMAE.Rd @@ -0,0 +1,28 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/convertToMAE.R +\name{convertToMAE} +\alias{convertToMAE} +\title{Wrapper that converts an input list into a MultiAssayExperiment object} +\usage{ +convertToMAE(dataList) +} +\arguments{ +\item{dataList}{(list) input key-value pairs (keys: data types, values: data in +the form of matrices/dataframes); must have a key-value pair that +corresponds to patient IDs/metadata labelled patientPheno.} +} +\value{ +MAE (MultiAssayExperiment) data from input list incorporated into a +MultiAssayExperiment object, compatible with further analysis using the +netDx algorithm. +} +\description{ +Wrapper that converts an input list into a MultiAssayExperiment object +} +\details{ +This function takes in a list of key-value pairs (keys: data types, +values: matrices/dataframes) and calls the necessary functions from the +MultiAssayExperiment and SummarizedExperiment packages to incorporate the +values from the input list into a MultiAssayExperiment object, transforming +the values according to the keys. +} From 90c984a7c0159634955087ca0bcee21c9fa0453e Mon Sep 17 00:00:00 2001 From: Indy Ng Date: Mon, 27 Sep 2021 12:36:02 -0400 Subject: [PATCH 09/56] uploading tests for convertToMAE() function --- tests/testthat/test_convertToMAE.R | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) create mode 100644 tests/testthat/test_convertToMAE.R diff --git a/tests/testthat/test_convertToMAE.R b/tests/testthat/test_convertToMAE.R new file mode 100644 index 00000000..4ef9e4f5 --- /dev/null +++ b/tests/testthat/test_convertToMAE.R @@ -0,0 +1,21 @@ +# test convertToMAE.R + +test_that("convertToMAE works", { + # 20 patients, 10 case, 10 control + pheno <- data.frame(ID=sprintf("PAT%i",1:20), + STATUS=rep(c("case","control"),each=10)) + # 100 dummy genes + rna <- matrix(rnorm(100*20),nrow=100); + colnames(rna) <- pheno$ID + rownames(rna) <- sprintf("gene%i",1:100) + # 2 dummy clin variables + clin <- t(data.frame(AGE=runif(20,10,50))) + colnames(clin) <- pheno$ID + clin <- t(clin) + + # netDx files + dataList <- list(rna=rna,patientPheno=clin) + + x <- convertToMAE(dataList) + expect_is(x, "MultiAssayExperiment") +}) \ No newline at end of file From 013fc96c7ff0e46695a49f4405d36ff6d3bf3578 Mon Sep 17 00:00:00 2001 From: Indy Ng Date: Mon, 27 Sep 2021 12:37:28 -0400 Subject: [PATCH 10/56] uploading auto-generated Roxygen2 documentation for buildPredictor_sparseGenetic.Rd --- man/buildPredictor_sparseGenetic.Rd | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/man/buildPredictor_sparseGenetic.Rd b/man/buildPredictor_sparseGenetic.Rd index 7dbfce64..e8729bbb 100644 --- a/man/buildPredictor_sparseGenetic.Rd +++ b/man/buildPredictor_sparseGenetic.Rd @@ -147,7 +147,7 @@ cnv_GR <- GRanges(pheno$seqnames,IRanges(pheno$start,pheno$end), ID=pheno$ID,LOCUS_NAMES=pheno$Gene_symbols) # get gene coordinates -geneURL <- paste("https://download.baderlab.org/netDx/", +geneURL <- paste("http://download.baderlab.org/netDx/", "supporting_data/refGene.hg18.bed",sep="") cache <- rappdirs::user_cache_dir(appname = "netDx") bfc <- BiocFileCache::BiocFileCache(cache,ask=FALSE) From 4b97aec77b8222bb53e70d36e88076d39947465b Mon Sep 17 00:00:00 2001 From: Shraddha Pai Date: Tue, 28 Sep 2021 10:25:34 -0400 Subject: [PATCH 11/56] Easier sim input - vignettes/unit tests work. --- NAMESPACE | 1 + R/buildPredictor.R | 32 +++- R/createPSN_MultiData.R | 26 ++- R/helper.R | 23 ++- R/plotIntegratedPatientNetwork.R | 13 +- R/predict.R | 34 ++-- R/similarities.R | 77 ++++++-- man/buildPredictor.Rd | 8 +- man/checkMakeNetFuncSims.Rd | 25 +++ man/createNetFuncFromSimList.Rd | 18 ++ man/createPSN_MultiData.Rd | 7 +- man/getPSN.Rd | 8 +- man/plotIntegratedPatientNetwork.Rd | 5 +- man/predict.Rd | 11 +- man/psn__builtIn.Rd | 2 +- man/psn__corr.Rd | 2 +- man/psn__custom.Rd | 2 +- tests/testthat/test_buildpredictor.R | 2 +- tests/testthat/{test_suite.R => test_misc.R} | 0 tests/testthat/test_psn_construction.R | 133 +++++++++++++- vignettes/ThreeWayClassifier.Rmd | 169 +++++++++--------- .../{ValidateNew.Rmd => ValidateNew.Rmd.old} | 8 +- vignettes/makeSimFunction.R | 117 ++++++++++++ 23 files changed, 562 insertions(+), 161 deletions(-) create mode 100644 man/checkMakeNetFuncSims.Rd create mode 100644 man/createNetFuncFromSimList.Rd rename tests/testthat/{test_suite.R => test_misc.R} (100%) rename vignettes/{ValidateNew.Rmd => ValidateNew.Rmd.old} (97%) create mode 100644 vignettes/makeSimFunction.R diff --git a/NAMESPACE b/NAMESPACE index 94091be0..7a567b60 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -15,6 +15,7 @@ export(convertProfileToNetworks) export(countIntType) export(countIntType_batch) export(countPatientsInNet) +export(createNetFuncFromSimList) export(createPSN_MultiData) export(dataList2List) export(enrichLabelNets) diff --git a/R/buildPredictor.R b/R/buildPredictor.R index f197f7b5..df05dad3 100644 --- a/R/buildPredictor.R +++ b/R/buildPredictor.R @@ -29,6 +29,9 @@ #' So keys(groupList[["rna"]]) would have pathway names, generating one PSN #' per pathways, and values(groupList[["rna"]]) would be genes that would be #' grouped for the corresponding pathwayList. +#' @param sims (list) rules to create similarity networks from input data. Keys are names of +#' data layers and should be identical to names(groupList). Values is either a character +#' for built-in similarity functions; call allowedSims() to see full list; or a custom function. #' @param makeNetFunc (function) user-defined function for creating the set #' of input PSN provided to netDx. See createPSN_MultiData()::customFunc. #' @param outDir (char) directory where results will be stored. If this @@ -171,8 +174,8 @@ #' # makeNetFunc=makeNets, ### custom network creation function #' # outDir=paste(tempdir(),"pred_output",sep=getFileSep()), ## absolute path #' # numCores=16L,featScoreMax=2L, featSelCutoff=1L,numSplits=2L) -buildPredictor <- function(dataList,groupList,outDir=tempdir(),makeNetFunc, - sims, +buildPredictor <- function(dataList,groupList,outDir=tempdir(), + makeNetFunc=NULL,sims=NULL, featScoreMax=10L,trainProp=0.8,numSplits=10L,numCores,JavaMemory=4L, featSelCutoff=9L,keepAllData=FALSE,startAt=1L, preFilter=FALSE, impute=FALSE,preFilterGroups=NULL, imputeGroups=NULL,logging="default", @@ -197,7 +200,7 @@ if (logging == "all") { verbose_predict <- FALSE } -# Check input +# Check input - error handling if (missing(dataList)) stop("dataList must be supplied.\n") if (missing(groupList)) stop("groupList must be supplied.\n") if (length(groupList)<1) stop("groupList must be of length 1+\n") @@ -214,6 +217,9 @@ if (!is(groupList,"list") || not_list || names_nomatch ) { stop(paste(msg,sep="")) } +# checks either/or provided, sets missing var to NULL +x <- checkMakeNetFuncSims(makeNetFunc=makeNetFunc, sims=sims,groupList=groupList) + if (!is(dataList,"MultiAssayExperiment")) stop("dataList must be a MultiAssayExperiment") @@ -221,6 +227,8 @@ if (trainProp <= 0 | trainProp >= 1) stop("trainProp must be greater than 0 and less than 1") if (startAt > numSplits) stop("startAt should be between 1 and numSplits") +# end check input error handling + megaDir <- outDir if (file.exists(megaDir)) { stop(paste("outDir seems to already exist!", @@ -275,7 +283,6 @@ if (verbose_default){ } } - outList <- list() # create master list of possible networks @@ -290,8 +297,14 @@ colnames(tmp) <- c("NetType","NetName") outList[["inputNets"]] <- tmp if (verbose_default) { - message("\n\nCustom function to generate input nets:") - print(makeNetFunc) + if (!is.null(makeNetFunc)){ + message("\n\nCustom function to generate input nets:") + print(makeNetFunc) + + } else { + message("Similarity metrics provided:") + print(sims) + } message(sprintf("-------------------------------\n")) } @@ -387,8 +400,8 @@ for (rngNum in startAt:numSplits) { if (verbose_default) message("** Creating features") createPSN_MultiData(dataList=dats_train,groupList=groupList, - pheno=pheno_id, - netDir=netDir,customFunc=makeNetFunc,numCores=numCores, + pheno=pheno_id, + netDir=netDir,makeNetFunc=makeNetFunc,sims=sims, numCores=numCores, verbose=verbose_makeFeatures) if (verbose_default) message("** Compiling features") dbDir <- compileFeatures(netDir,outDir, numCores=numCores, @@ -513,7 +526,8 @@ for (rngNum in startAt:numSplits) { pheno_id <- setupFeatureDB(pheno,netDir) createPSN_MultiData(dataList=dats_tmp,groupList=groupList, pheno=pheno_id, - netDir=netDir,customFunc=makeNetFunc,numCores=numCores, + netDir=netDir,makeNetFunc=makeNetFunc,sims=sims, + numCores=numCores, filterSet=pTally,verbose=verbose_default) dbDir <- compileFeatures(netDir,outDir=pDir,numCores=numCores, verbose=verbose_compileNets,debugMode=debugMode) diff --git a/R/createPSN_MultiData.R b/R/createPSN_MultiData.R index ea6e976c..f5e51ac5 100644 --- a/R/createPSN_MultiData.R +++ b/R/createPSN_MultiData.R @@ -12,7 +12,7 @@ #' with internally-generated identifiers. #' @param netDir (char) path to directory where networks will be stored #' @param filterSet (char) vector of networks to include -#' @param customFunc (function) custom user-function to create PSN. +#' @param makeNetFunc (function) custom user-function to create PSN. #' Must take dataList,groupList,netDir as parameters. Must #' check if a given groupList is empty (no networks to create) before #' the makePSN call for it. This is to avoid trying to make nets for datatypes @@ -95,24 +95,24 @@ #' pheno_id <- setupFeatureDB(colData(brca),netDir) #' createPSN_MultiData(dataList=datList2,groupList=groupList, #' pheno=pheno_id, -#' netDir=netDir,customFunc=makeNets,numCores=1) +#' netDir=netDir,makeNetFunc=makeNets,numCores=1) #' @export createPSN_MultiData <- function(dataList, groupList, pheno, netDir=tempdir(), filterSet = NULL, - verbose = TRUE, customFunc, ...) { + verbose = TRUE, makeNetFunc=NULL, sims=NULL, ...) { if (missing(dataList)) stop("dataList must be supplied.\n") if (missing(groupList)) stop("groupList must be supplied.\n") - + # resolve user-provided IDs with internal IDs dataList <- lapply(dataList, function(x) { midx <- match(colnames(x), pheno$ID) colnames(x) <- pheno$INTERNAL_ID[midx] x }) - + if (!is.null(filterSet)) { if (length(filterSet) < 1) { s1 <- "filterSet is empty." @@ -120,8 +120,8 @@ createPSN_MultiData <- function(dataList, groupList, pheno, netDir=tempdir(), stop(paste(s1, s2, sep = " ")) } } - if (missing(customFunc)) - stop("customFunc must be suppled.\n") + + # Filter for nets (potentially feature-selected ones) if (!is.null(filterSet)) { @@ -139,12 +139,22 @@ createPSN_MultiData <- function(dataList, groupList, pheno, netDir=tempdir(), } } groupList <- groupList2 + sims <- sims[which(names(sims) %in% names(groupList))] rm(groupList2) } + if (!is.null(makeNetFunc)){ # call user-defined function for making PSN - netList <- customFunc(dataList = dataList, groupList = groupList, + netList <- makeNetFunc(dataList = dataList, groupList = groupList, netDir = netDir, ...) + } else { + netList <- createNetFuncFromSimList(dataList=dataList, + groupList = groupList, + netDir = netDir, + sims = sims, + ... + ) + } if (length(netList) < 1) stop("\n\nNo features created! Filters may be too stringent.\n") diff --git a/R/helper.R b/R/helper.R index e5e0bdec..6c0a7d64 100755 --- a/R/helper.R +++ b/R/helper.R @@ -214,8 +214,10 @@ return(list(GMTfiles=gmtFiles,NodeStyles=nodeAttrFiles)) #' same class, relative to those of other classes, using Dijkstra distance (calcShortestPath flag). #' @param dat (MultiAssayExperiment) input data #' @param groupList (list) feature groups, identical to groupList provided for buildPredictor() -#' @param makeNets (function) Function used to create patient similarity networks. Identical to +#' @param makeNetFunc (function) Function used to create patient similarity networks. Identical to #' makeNets provided to buildPredictor() +#' @param sims (list) rules for creating PSN. Preferred over makeNetFunc. See buildPredictor() +#' for details. #' @param selectedFeatures (list) selected features for each class (key of list). This object is returned as #' part of a call to getResults(), after running buildPredictor(). #' @param plotCytoscape (logical) If TRUE, plots network in Cytoscape. @@ -245,9 +247,18 @@ return(list(GMTfiles=gmtFiles,NodeStyles=nodeAttrFiles)) #' colours (colour) #' 6) outDir (char) value of outDir parameter #' @export -getPSN <- function(dat, groupList, makeNets, selectedFeatures, plotCytoscape=FALSE, - aggFun="MEAN", prune_pctX=0.30, prune_useTop=TRUE,numCores=1L,calcShortestPath=FALSE +getPSN <- function(dat, groupList, + makeNetFunc=NULL, sims=NULL, + selectedFeatures, plotCytoscape=FALSE, + aggFun="MEAN", prune_pctX=0.30, prune_useTop=TRUE, + numCores=1L,calcShortestPath=FALSE ){ + + +# checks either/or provided, sets missing var to NULL +x <- checkMakeNetFuncSims(makeNetFunc=makeNetFunc, + sims=sims,groupList=groupList) + topPath <- gsub(".profile","", unique(unlist(selectedFeatures))) topPath <- gsub("_cont.txt","",topPath) @@ -262,8 +273,10 @@ for (nm in names(groupList)) { message("* Making integrated PSN") psn <- - plotIntegratedPatientNetwork(dat, - groupList=g2, makeNetFunc=makeNets, + plotIntegratedPatientNetwork( + dataList=dat, + groupList=g2, makeNetFunc=makeNetFunc, + sims=sims, aggFun=aggFun, prune_pctX=prune_pctX, prune_useTop=prune_useTop, diff --git a/R/plotIntegratedPatientNetwork.R b/R/plotIntegratedPatientNetwork.R index cd9ed4c4..ba0313d5 100644 --- a/R/plotIntegratedPatientNetwork.R +++ b/R/plotIntegratedPatientNetwork.R @@ -19,6 +19,7 @@ #' list of lists, where the outer list corresponds to assay (e.g. mRNA, #' clinical) and inner list to features to generate from that datatype. #' @param makeNetFunc (function) function to create features +#' @param sims (list) rules for creating PSN. Preferred over makeNetFunc #' @param setName (char) name to assign the network in Cytoscape #' @param numCores (integer) number of cores for parallel processing #' @param prune_pctX (numeric between 0 and 1) fraction of most/least @@ -59,7 +60,8 @@ #' @importFrom RColorBrewer brewer.pal #' @importFrom stats wilcox.test qexp density #' @export -plotIntegratedPatientNetwork <- function(dataList,groupList,makeNetFunc, +plotIntegratedPatientNetwork <- function(dataList,groupList, + makeNetFunc=NULL,sims=NULL, setName="predictor",prune_pctX=0.05, prune_useTop=TRUE, aggFun="MAX",calcShortestPath=FALSE, showStats=FALSE, @@ -67,6 +69,11 @@ plotIntegratedPatientNetwork <- function(dataList,groupList,makeNetFunc, nodeTransparency=155L,plotCytoscape=FALSE, verbose=FALSE) { + +# checks either/or provided, sets missing var to NULL +checkMakeNetFuncSims(makeNetFunc=makeNetFunc, + sims=sims,groupList=groupList) + if (missing(dataList)) stop("dataList is missing.") dat <- dataList2List(dataList, groupList) @@ -81,7 +88,9 @@ pheno_id <- setupFeatureDB(pheno,outDir) createPSN_MultiData(dataList=dat$assays,groupList=groupList, pheno=pheno_id, - netDir=outDir,customFunc=makeNetFunc,numCores=numCores, + netDir=outDir, + customFunc=makeNetFunc,sims=sims, + numCores=numCores, verbose=FALSE) convertProfileToNetworks( netDir=profDir, diff --git a/R/predict.R b/R/predict.R index 4fd8ee0a..42698da1 100644 --- a/R/predict.R +++ b/R/predict.R @@ -5,11 +5,12 @@ #' @param testMAE (MultiAssayExperiment) new patient dataset for testing model. Assays must be the same as for trainMAE. #' @param groupList (list) list of features used to train the model. Keys are data types, and values are lists for groupings within those datatypes. #' e.g. keys could include {'clinical','rna','methylation'}, and values within 'rna' could include pathway names {'cell cycle', 'DNA repair'}, etc., -#' featSel will be used to subset -#' @param featSel (list) selected features to be used in the predictive model. +#' selectedFeatures will be used to subset +#' @param selectedFeatures (list) selected features to be used in the predictive model. #' keys are patient labels (e.g. "responder/nonresponder"), and values are feature names #' identified by running buildPredictor(). Feature names must correspond to names of groupList, from which they will be subset. #' @param makeNetFunc (function) function to create PSN features from patient data. See makeNetFunc in buildPredictor() for details +#' @param sims (list) rules for creating PSN. Preferred over makeNetFunc. #' @param impute (logical) if TRUE imputes train and test samples separately before creating features. Currently unsupported. #' @param outDir (char) directory for results #' @param verbose (logical) print messages @@ -20,15 +21,19 @@ #' columns are: 1) ID, 2) STATUS (ground truth), 3)