diff --git a/docker/environment.yml b/docker/environment.yml index 92f6303..0e8b702 100755 --- a/docker/environment.yml +++ b/docker/environment.yml @@ -80,6 +80,7 @@ dependencies: - pytz==2023.3 - requests==2.31.0 - requests-oauthlib==1.3.1 + - rectanglepy==1.0.0 - rich==13.3.5 - rsa==4.9 - git+https://github.com/omnideconv/scaden.git diff --git a/docker_rectangle/Dockerfile b/docker_rectangle/Dockerfile new file mode 100755 index 0000000..7e33643 --- /dev/null +++ b/docker_rectangle/Dockerfile @@ -0,0 +1,84 @@ +FROM eddelbuettel/r2u:20.04 + +LABEL description="Image for Omnideconv benchmarking pipeline" +LABEL maintainer="Alexander Dietrich" + +# needed so that R r-base installation does not get stuck +ENV DEBIAN_FRONTEND noninteractive + +# install system dependencies +RUN apt-get update -y && apt-get --no-install-recommends --fix-broken install -y git \ + wget \ + vim \ + software-properties-common \ + dirmngr \ + gdebi \ + curl + +# install miniconda into /root/miniconda3 +RUN wget \ + https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh \ + && bash Miniconda3-latest-Linux-x86_64.sh -b -p /root/.local/share/r-miniconda \ + && rm Miniconda3-latest-Linux-x86_64.sh + +ENV PATH="/root/.local/share/r-miniconda/bin:${PATH}" +ARG PATH="/root/.local/share/r-miniconda/bin:${PATH}" + +RUN conda --version + +RUN conda tos accept --override-channels --channel https://repo.anaconda.com/pkgs/main +RUN conda tos accept --override-channels --channel https://repo.anaconda.com/pkgs/r + +# create r-omnideconv environment based on yml file +COPY environment.yml . +RUN conda env create -f environment.yml +RUN echo "source activate r-omnideconv" > ~/.bashrc + + +ENV PATH="/root/.local/share/r-miniconda/envs/omnideconv/bin:${PATH}" +ARG PATH="/root/.local/share/r-miniconda/envs/omnideconv/bin:${PATH}" + +# install omnideconv with all dependencies +RUN apt-get --no-install-recommends --fix-broken install -y libcurl4-openssl-dev \ + libxml2-dev \ + libfontconfig1-dev \ + libssl-dev \ + libharfbuzz-dev \ + libfribidi-dev \ + libfreetype6-dev \ + libpng-dev \ + libtiff5-dev \ + libjpeg-dev \ + libgdal-dev \ + cmake + + +RUN R -e "install.packages(c('pak','pkgdepends','devtools','remotes','harmony','terra','igraph', 'RSQLite'), repos='https://cloud.r-project.org')" +# Note: Set GITHUB_PAT as a build argument if needed: docker build --build-arg GITHUB_PAT=your_token +RUN R -e "remotes::install_github('omnideconv/omnideconv@benchmark', dependencies = TRUE)" +#RUN R -e "pak::pkg_install('omnideconv/SCDC')" +#RUN R -e "pak::pkg_install('omnideconv/SCDC')" +#RUN R -e "remotes::install_version('NMF', '0.27', repos='https://cloud.r-project.org')" + + +# set systems variable RETICULATE_PYTHON to new conda env, so that omnideconv will use it +RUN echo "RETICULATE_PYTHON = '/root/.local/share/r-miniconda/envs/r-omnideconv/bin/python'" > /root/.Rprofile +RUN R -e "reticulate::use_miniconda(condaenv = 'r-omnideconv', required = TRUE)" + +# install benchmarking related dependencies +RUN R -e "install.packages(c('conflicted','docopt','readxl','tidyverse','profvis'), repos='https://cloud.r-project.org')" +RUN R -e "BiocManager::install('SimBu')" + +# install Docker, so that CibersortX can run +RUN mkdir -p /etc/apt/keyrings +RUN curl -fsSL https://download.docker.com/linux/ubuntu/gpg | gpg --dearmor -o /etc/apt/keyrings/docker.gpg +RUN echo \ + "deb [arch=$(dpkg --print-architecture) signed-by=/etc/apt/keyrings/docker.gpg] https://download.docker.com/linux/ubuntu \ + $(lsb_release -cs) stable" | tee /etc/apt/sources.list.d/docker.list > /dev/null +RUN apt-get update -y +RUN apt-get install -y --no-install-recommends \ + docker-ce \ + docker-ce-cli \ + containerd.io + + diff --git a/docker_rectangle/environment.yml b/docker_rectangle/environment.yml new file mode 100644 index 0000000..1b2af6e --- /dev/null +++ b/docker_rectangle/environment.yml @@ -0,0 +1,6 @@ +name: r-omnideconv +dependencies: + - python=3.11 + - pip + - pip: + - rectanglepy==1.0.0 diff --git a/pipeline/bin/computeSignaturesNF.R b/pipeline/bin/computeSignaturesNF.R index f1fc0b2..68861fc 100755 --- a/pipeline/bin/computeSignaturesNF.R +++ b/pipeline/bin/computeSignaturesNF.R @@ -76,7 +76,6 @@ bulk_matrix <- as.matrix(bulk_matrix) - #################################### #### Perform signature building #### #################################### diff --git a/pipeline/bin/runDeconvolutionNF.R b/pipeline/bin/runDeconvolutionNF.R index 6ff22c7..bd95cb6 100755 --- a/pipeline/bin/runDeconvolutionNF.R +++ b/pipeline/bin/runDeconvolutionNF.R @@ -106,6 +106,17 @@ if(method=='cibersortx'){ #### Perform devonvolution #### ############################### +if(method == 'rectangle'){ + # do not incluce anndata creation in runtime measurement + AnnData <- reticulate::import("anndata") + counts <- as.data.frame(t(sc_matrix)) + + ad <- AnnData$AnnData(X = counts, + obs = data.frame('cell_type' = sc_celltype_annotations, row.names = colnames(sc_matrix)), + var = data.frame('genes' = rownames(sc_matrix), row.names = rownames(sc_matrix))) + sc_matrix <- ad +} + runtime <- system.time({ deconvolution <- deconvolution_workflow_general( diff --git a/pipeline/bin/utils.R b/pipeline/bin/utils.R index c7daac9..9611a20 100755 --- a/pipeline/bin/utils.R +++ b/pipeline/bin/utils.R @@ -215,6 +215,7 @@ signature_workflow_general <- function(sc_matrix, annotations, annotation_catego # path to temporary directory that is inside results directory of one unique result tmp_dir_path <- paste0(res_path, '/tmp/') + ncores <- as.numeric(ncores) # Signature building part # Dependent on which method we have @@ -282,8 +283,7 @@ signature_workflow_general <- function(sc_matrix, annotations, annotation_catego model_path = paste0(res_path, '/model'), verbose = TRUE ) - - }else if (method %in% c('autogenes', 'bayesprism', 'bisque', 'music')){ + } else if (method %in% c('autogenes', 'bayesprism', 'bisque', 'music', 'rectangle')){ signature <- NULL } else { @@ -312,6 +312,7 @@ deconvolution_workflow_general <- function(sc_matrix, annotations, annotation_ca # path to temporary directory that is inside results directory of one unique result tmp_dir_path <- paste0(res_path, '/tmp/') + ncores <- as.numeric(ncores) if(method=="autogenes"){ @@ -463,6 +464,16 @@ deconvolution_workflow_general <- function(sc_matrix, annotations, annotation_ca ) #unlink(tmp_dir_path, recursive=TRUE) + } else if (method == "rectangle") { + + rp <- reticulate::import("rectanglepy") + + #ad <- anndata::AnnData(X = t(sc_matrix), + # obs = data.frame('cell_type' = annotations, row.names = colnames(sc_matrix)), + # var = data.frame('genes' = rownames(sc_matrix), row.names = rownames(sc_matrix))) + + deconvolution <- rp$rectangle(sc_matrix, bulks = as.data.frame(t(bulk_matrix)), n_cpus=ncores)[[1]] + } else { message('Selected method is not supported in the benchmark. Please check again.') stop() diff --git a/pipeline/cibersortx_credentials.csv b/pipeline/cibersortx_credentials.csv index 2f64c43..6bce71a 100755 --- a/pipeline/cibersortx_credentials.csv +++ b/pipeline/cibersortx_credentials.csv @@ -1,2 +1,2 @@ email, token -alex.dietrich@tum.de, e1750b96fcfef36421bf9927a9f0fc49 +alex.dietrich@tum.de, 8ad207154371cd524393fd78e1bb16f0 diff --git a/pipeline/main.nf b/pipeline/main.nf index 136946f..e23ce9d 100755 --- a/pipeline/main.nf +++ b/pipeline/main.nf @@ -118,6 +118,8 @@ process ANALYSIS_SPILLOVER { } process SIMULATE_BULK_UNKNOWN_CELL_TYPE { + + label 'process_default' publishDir "${params.preProcess_dir}/pseudo_bulk_unknown_content", mode: 'copy' @@ -145,6 +147,8 @@ process SIMULATE_BULK_UNKNOWN_CELL_TYPE { process ANALYSIS_BULK_UNKNOWN_CELL_TYPE { + label 'process_default' + input: tuple val(sc_dataset), val(sim_bulk_name), diff --git a/pipeline/nextflow_alex.config b/pipeline/nextflow_alex.config deleted file mode 100755 index 25d43ff..0000000 --- a/pipeline/nextflow_alex.config +++ /dev/null @@ -1,116 +0,0 @@ -params { - - /*** input directories ***/ - - data_dir_bulk = "/vol/omnideconv_input/omnideconv_data/PBMC" // absolute path to directory that contains RNA-seq datasets - data_dir_sc = "/vol/omnideconv_input/omnideconv_data/singleCell" // absolute path to directory that contains scRNA-seq datasets - - - /*** output directories ***/ - results_dir_general = "/vol/omnideconv_results/results_tmp" // absolute path to directory in which final results of the main and subsampling workflow are stored - preProcess_dir = "/vol/omnideconv_input/preprocess" // absolute path to directory in which temporary files are stored - - - /*** Benchmarking Parameters [general] ***/ - single_cell_list = ["hao-sampled-3"] // list of scRNA-seq dataset names that will be used for deconvolution. - bulk_list = ["hoek-simulation-nobias"] // list of RNA-seq dataset names that will be used for deconvolution. - method_list = ["scaden"] // list of method names that are used for deconvolution. Possible values are: ["autogenes","bayesprism","bisque","cibersortx" ,"dwls","music","scaden","scdc"] - - - /*** general parameters ***/ - ncores = '4' // Number of cores that are available for methods - species_sc = 'hs' // Type of species in scRNA-seq dataset (mm or hs). Currently only supported by BayesPrism. - - - /****** Workflow-specific parameters ******/ - - - /*** subsampling ***/ - - ct_fractions = [5, 50] // cell-type fractions for subsampling - replicates = 5 // number of replicates for subsampling - - - /*** simulation_impact_technology ***/ - results_dir_impact_technology = "/path/to/impact_technology/results/" // results directory for this workflow - replicates_simulation = [5] // number of simulation replicates for this workflow - datasets_impact_technology = ["lambrechts", "maynard"] // names of sc datasets that are used for this workflow - - - /*** simulation_spillover ***/ - results_dir_spillover = "/path/to/simulation_spillover/results/" // results directory for this workflow - spillover_samples_per_cell = 10 // number of simulation replicates for this workflow - spillover_celltypes = ["B cells,Monocytes,NK cells,T cells CD8,T cells CD4 conv"] // names of cell types for which spillover analysis will be done; have to be present in single-cell dataset - - - /*** simulation_unkown_content ***/ - results_dir_unkown_content = "/path/to/simulation_unkown_content/results/" // results directory for this workflow - known_cell_types = ["B cells,Stromal cells,T cells CD4 conv,Macrophages"] // subset of cell types that we will use to build the signature matrix - unkown_cell_types = ["Tumor cells"] // unknown cell type to use - fractions_unkown_cells = [0, 0.2, 0.3, 0.9] // How large the fraction of the unkown cell type is - replicates_unknown_content = [5] // number of technical replicates for pseudo-bulk simulation - - - /*** simulation_resolution_analysis ***/ - results_dir_resolution = "/path/to/simulation_resolution_analysis/results/" // results directory for this workflow - cell_types_finer_res = ["Endothelial ACKR1,Endothelial RGS5,Endothelial CXCL12, ..."] //which cell types will we inspect at various resolutions? // number of simulation replicates for this workflow - - - /*** impact_missing_cell_types ***/ - results_dir_missing_cell_types = "/path/to/impact_missing_cell_types/results/" // results directory for this workflow - cell_types_to_exclude = ["B cells","mDC","Monocytes","NK cells","T cells CD4 conv"] // each of the cell types that will be excluded from the single cell dataset before deconvoltion // in the spillover analysis samples contain only one cell types. How many samples we should generate this way - - -} - - -profiles { - standard { - process.executor = 'local' - process.cpus = 4 - process.memory = '50 GB' - docker.enabled = false - } - docker { - process.executor = 'local' - process.cpus = 4 - process.memory = '10 GB' - docker.enabled = true - process.container = 'alexd13/omnideconv_benchmark:latest' - docker.temp = "auto" - } - apptainer { - process.executor = 'local' - process.cpus = 4 - process.memory = '10 GB' - apptainer.enabled = true - process.container = '/nfs/proj/omnideconv_benchmarking/deconvBench/omnideconv_benchmark.sif' - } - slurm_no_docker { - process.executor = 'slurm' - process.cpus = 4 - process.memory = '50 GB' - docker.enabled = false - } - slurm_docker { - process.executor = 'slurm' - process.cpus = 4 - process.memory = '200 GB' - docker.enabled = true - process.container = 'alexd13/omnideconv_benchmark:1.2' - docker.temp = "auto" - } -} - -// this is necessary to run CIBERSORTx with a docker in docker environment. The local docker socket has to be mapped as a volume into the omnideconv_benchmark container, so that the CIBERSORTx container can be started inside -docker { - runOptions = '-v /var/run/docker.sock:/var/run/docker.sock -v /vol/omnideconv_results/:/vol/omnideconv_results/' - -} - -// use the `-with-trace` command to get tracing information on each job. Useful for the subsampling workflow, to see how resources scale with size of single-cell dataset -trace { - fields = 'task_id,process,name,status,module,container,cpus,time,disk,memory,realtime,%cpu,%mem,rss,vmem,peak_rss,peak_vmem,script' - sep = ',' - overwrite = true -} diff --git a/pipeline/run_cibersortx.sh b/pipeline/run_cibersortx.sh index 3d8d92e..c7a8f8b 100755 --- a/pipeline/run_cibersortx.sh +++ b/pipeline/run_cibersortx.sh @@ -1,3 +1,5 @@ #!/bin/bash -nextflow -C /nfs/proj/omnideconv_benchmarking/omnideconv/benchmark/pipeline/nextflow_norm.config run /nfs/proj/omnideconv_benchmarking/omnideconv/benchmark/pipeline/main.nf -profile cibersortx -work-dir /localscratch/omnideconv_work "$@" \ No newline at end of file +nextflow -C /nfs/proj/omnideconv_benchmarking/omnideconv/benchmark/pipeline/nextflow_norm.config run /nfs/proj/omnideconv_benchmarking/omnideconv/benchmark/pipeline/main.nf \ + -profile cibersortx \ + -work-dir /localscratch/omnideconv_work "$@" \ No newline at end of file diff --git a/pipeline/run_default.sh b/pipeline/run_default.sh index c982487..17ca48b 100755 --- a/pipeline/run_default.sh +++ b/pipeline/run_default.sh @@ -1,3 +1,6 @@ #!/bin/bash -nextflow -C /nfs/proj/omnideconv_benchmarking/omnideconv/benchmark/pipeline/nextflow_norm.config run /nfs/proj/omnideconv_benchmarking/omnideconv/benchmark/pipeline/main.nf -profile apptainer -work-dir /nfs/scratch/nf-core_work/omnideconv.benchmark -with-trace "$@" \ No newline at end of file +nextflow -C /nfs/proj/omnideconv_benchmarking/omnideconv/benchmark/pipeline/nextflow_norm.config run /nfs/proj/omnideconv_benchmarking/omnideconv/benchmark/pipeline/main.nf \ + -profile apptainer \ + -work-dir /nfs/scratch/nf-core_work/omnideconv.benchmark \ + -with-trace "$@" \ No newline at end of file diff --git a/visualisation/fig2/fig2.nb.html b/visualisation/fig2/fig2.nb.html index 8ab963a..beec0ce 100755 --- a/visualisation/fig2/fig2.nb.html +++ b/visualisation/fig2/fig2.nb.html @@ -1754,27 +1754,13 @@

R Notebook

- -
source("../helper_functions.R")
-source('ggradar_custom.R')
+
+
source("../../visualisation/helper_functions.R")
+source('../../revision/ggradar_custom.R')
 library(tidyverse)
 library(data.table)
-library(cowplot)
- - -

-Attaching package: ‘cowplot’
-
-The following object is masked from ‘package:patchwork’:
-
-    align_plots
-
-The following object is masked from ‘package:lubridate’:
-
-    stamp
- - -
library(patchwork)
+library(cowplot)
+library(patchwork)
 
 color_palette <- c('B cells'='#999933',
                    'Macrophages'='#CC6677',
@@ -1830,25 +1816,8 @@ 

R Notebook

df.cor <- data_summary(performance_df, 'cor', c('cell_type','method','bulk_ds','org'))
- -
Loading required package: plyr
------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
-You have loaded plyr after dplyr - this is likely to cause problems.
-If you need functions from both plyr and dplyr, please load plyr first, then dplyr:
-library(plyr); library(dplyr)
------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
-
-Attaching package: ‘plyr’
-
-The following objects are masked from ‘package:dplyr’:
-
-    arrange, count, desc, failwith, id, mutate, rename, summarise, summarize
-
-The following object is masked from ‘package:purrr’:
-
-    compact
-
-using median as new default.
+ +
using median as new default.
df.rmse <- data_summary(performance_df, 'RMSE', c('cell_type','method','bulk_ds','org'))
@@ -1951,38 +1920,24 @@

Fig2a

-

+

- -
```r
-ggsave('../../revision/visualization/fig2/fig_2a.pdf', plot=fig_2a, width = 18, height = 9)
-

-<!-- rnb-source-end -->
-
-<!-- rnb-chunk-end -->
-
-
-<!-- rnb-text-begin -->
-
-
-
-# Fig2b
-
-
-<!-- rnb-text-end -->
-
-
-<!-- rnb-chunk-begin -->
-
-
-<!-- rnb-source-begin eyJkYXRhIjoiYGBgclxucHJlcGFyZV9tZXRyaWNfZGYgPC0gZnVuY3Rpb24oZGYsIGRzLCBtZXRyaWMsIHRyYW5zZm9ybV9mbiA9IGlkZW50aXR5KSB7XG4gIGRmIHw+XG4gICAgZmlsdGVyKGJ1bGtfZHMgPT0gZHMsIGNlbGxfdHlwZSAhPSBcImFsbFwiKSB8PlxuICAgIHNlbGVjdChtZXRob2QsIGNlbGxfdHlwZSwgYnVsa19kcywgdmFsdWUgPSBhbGxfb2YobWV0cmljKSkgfD5cbiAgICBtdXRhdGUodmFsdWUgPSB0cmFuc2Zvcm1fZm4odmFsdWUpKSB8PlxuICAgIHBpdm90X3dpZGVyKG5hbWVzX2Zyb20gPSBjZWxsX3R5cGUsIHZhbHVlc19mcm9tID0gdmFsdWUpIHw+XG4gICAgc2VsZWN0KC1idWxrX2RzKSB8PlxuICAgIHJlcGxhY2UoaXMubmEoLiksIDApIHw+IFxuICAgIGFzX3RpYmJsZSgpXG59XG5cbnByZXBhcmVfbWV0cmljX2RmX3Blcl9jZWxsdHlwZSA8LSBmdW5jdGlvbihkZiwgZHMsIG1ldHJpYywgdHJhbnNmb3JtX2ZuID0gaWRlbnRpdHkpIHtcbiAgZGYgfD5cbiAgICBmaWx0ZXIoYnVsa19kcyA9PSBkcywgY2VsbF90eXBlICE9IFwiYWxsXCIpIHw+XG4gICAgc2VsZWN0KG1ldGhvZCwgY2VsbF90eXBlLCBidWxrX2RzLCB2YWx1ZSA9IGFsbF9vZihtZXRyaWMpKSB8PlxuICAgIG11dGF0ZSh2YWx1ZSA9IHRyYW5zZm9ybV9mbih2YWx1ZSkpIHw+XG4gICAgcGl2b3Rfd2lkZXIobmFtZXNfZnJvbSA9IG1ldGhvZCwgdmFsdWVzX2Zyb20gPSB2YWx1ZSkgfD5cbiAgICBzZWxlY3QoLWJ1bGtfZHMpIHw+XG4gICAgcmVwbGFjZShpcy5uYSguKSwgMCkgfD4gXG4gICAgYXNfdGliYmxlKClcbn1cblxuY29tcHV0ZV9leHRyZW1lIDwtIGZ1bmN0aW9uKGRmLCBmdW4sIGFkanVzdCA9IDApIHtcbiAgcm91bmQoZnVuKGFwcGx5KGRmWywtMV0sIDIsIGZ1biksIG5hLnJtID0gVFJVRSksIDEpICsgYWRqdXN0XG59XG5cbiMgSGVscGVyIGZ1bmN0aW9uIHRvIGNyZWF0ZSByYWRhciBwbG90XG5wbG90X3JhZGFyIDwtIGZ1bmN0aW9uKGRmLCB0aXRsZSwgZ3JpZC5taW4sIGdyaWQubWlkLCBncmlkLm1heCwgdmFsdWVzLnJhZGFyLCBsZWdlbmQucG9zaXRpb249J25vbmUnLCBwYWxldHRlKSB7XG4gIGdncmFkYXIoZGYsXG4gICAgICAgICAgZ3JvdXAuY29sb3VycyA9IHBhbGV0dGUsXG4gICAgICAgICAgZ3JvdXAucG9pbnQuc2l6ZSA9IDIsXG4gICAgICAgICAgZ3JvdXAubGluZS53aWR0aCA9IC44LFxuICAgICAgICAgIGdyaWQubWluID0gZ3JpZC5taW4sXG4gICAgICAgICAgZ3JpZC5taWQgPSBncmlkLm1pZCxcbiAgICAgICAgICBncmlkLm1heCA9IGdyaWQubWF4LFxuICAgICAgICAgIHZhbHVlcy5yYWRhciA9IHZhbHVlcy5yYWRhcixcbiAgICAgICAgICBmb250LnJhZGFyID0gXCJzYW5zXCIsXG4gICAgICAgICAgbGVnZW5kLnRleHQuc2l6ZSA9IDEwLFxuICAgICAgICAgIGF4aXMubGFiZWwuc2l6ZSA9IDMuNSxcbiAgICAgICAgICBsZWdlbmQucG9zaXRpb24gPSBsZWdlbmQucG9zaXRpb24sXG4gICAgICAgICAgcGxvdC50aXRsZSA9IHRpdGxlKVxufVxuXG5wbG90X3JhZGFyX2N1c3RvbSA8LSBmdW5jdGlvbihkZiwgdGl0bGUsIGdyaWQubWluLCBncmlkLm1pZCwgZ3JpZC5hZGQsIGdyaWQubWF4LCB2YWx1ZXMucmFkYXIsIGxlZ2VuZC5wb3NpdGlvbj0nbm9uZScsIGdyaWRsaW5lLm1pZC5jb2xvdXI9J2dyZXknLCBwYWxldHRlKSB7XG4gIGdncmFkYXJfY3VzdG9tKGRmLFxuICAgICAgICAgIGdyb3VwLmNvbG91cnMgPSBwYWxldHRlLFxuICAgICAgICAgIGdyb3VwLnBvaW50LnNpemUgPSAyLFxuICAgICAgICAgIGdyb3VwLmxpbmUud2lkdGggPSAuNSxcbiAgICAgICAgICBncmlkLm1pbiA9IGdyaWQubWluLFxuICAgICAgICAgIGdyaWQubWlkID0gZ3JpZC5taWQsXG4gICAgICAgICAgZ3JpZC5hZGQgPSBncmlkLmFkZCxcbiAgICAgICAgICBncmlkLm1heCA9IGdyaWQubWF4LFxuICAgICAgICAgIHZhbHVlcy5yYWRhciA9IHZhbHVlcy5yYWRhciwgXG4gICAgICAgICAgZ3JpZGxpbmUuYWRkLmNvbG91ciA9ICdncmV5JyxcbiAgICAgICAgICBncmlkbGluZS5taWQuY29sb3VyID0gZ3JpZGxpbmUubWlkLmNvbG91cixcbiAgICAgICAgICBmb250LnJhZGFyID0gXCJzYW5zXCIsXG4gICAgICAgICAgbGVnZW5kLnRleHQuc2l6ZSA9IDEwLFxuICAgICAgICAgIGF4aXMubGFiZWwuc2l6ZSA9IDMuNSxcbiAgICAgICAgICBsZWdlbmQucG9zaXRpb24gPSBsZWdlbmQucG9zaXRpb24sXG4gICAgICAgICAgcGxvdC50aXRsZSA9IHRpdGxlKVxufVxuXG5gYGAifQ== -->
-
-```r
-prepare_metric_df <- function(df, ds, metric, transform_fn = identity) {
+
+
ggsave('../../revision2/visualization/fig2/fig_2a.pdf', plot=fig_2a, width = 18, height = 9)
+ + + + +
+

Fig2b

+ + + +
prepare_metric_df <- function(df, ds, metric, transform_fn = identity) {
   df |>
     filter(bulk_ds == ds, cell_type != "all") |>
     select(method, cell_type, bulk_ds, value = all_of(metric)) |>
@@ -2109,36 +2064,20 @@ 

Fig2a

- -
```r
-fig_2b <- (rmse_plots$p_rmse_morandini + rmse_plots$p_rmse_finotello + rmse_plots$p_rmse_hoek) / (cor_plots$p_cor_morandini + cor_plots$p_cor_finotello + cor_plots$p_cor_hoek)
-
-ggsave('/nfs/proj/omnideconv_benchmarking/omnideconv/benchmark/revision/visualization/fig2/fig_2b.pdf', plot = fig_2b, width = 20, height = 10)
-

-<!-- rnb-source-end -->
-
-<!-- rnb-chunk-end -->
-
-
-<!-- rnb-text-begin -->
-
-
-
-
-# Fig S2
+
+
fig_2b <- (rmse_plots$p_rmse_morandini + rmse_plots$p_rmse_finotello + rmse_plots$p_rmse_hoek) / (cor_plots$p_cor_morandini + cor_plots$p_cor_finotello + cor_plots$p_cor_hoek)
 
-
-<!-- rnb-text-end -->
-
-
-<!-- rnb-chunk-begin -->
-
-
-<!-- rnb-source-begin eyJkYXRhIjoiYGBgclxuYGBgclxuZmlnX3MyYSA8LSBmcmFjdGlvbnNfZGYgJT4lIHN1YnNldChidWxrX2RzICVpbiUgYygnQ2hlbicsJ1BldGl0cHJleicsJ0FsdG1hbicpKSAlPiVcbiAgZ2dwbG90KC4sIGFlcyh4PWZyYWN0aW9uLnRydWUsIHk9ZnJhY3Rpb24uZXN0aW1hdGUpKStcbiAgZ2VvbV9hYmxpbmUobGluZXR5cGU9J2Rhc2hlZCcpK1xuICBnZW9tX2hsaW5lKHlpbnRlcmNlcHQgPSAxKStcbiAgZ2VvbV9wb2ludChhZXMoY29sb3I9Y2VsbHR5cGUpLCBzaXplPTIsIGFscGhhPS43KStcbiAgZmFjZXRfZ3JpZChidWxrX2RzIH4gbWV0aG9kKStcbiAgeGxhYignR3JvdW5kIHRydXRoIGNlbGwtdHlwZSBmcmFjdGlvbicpK1xuICB5bGFiKCdFc3RpbWF0ZWQgY2VsbC10eXBlIGZyYWN0aW9uJykrXG4gIHNjYWxlX3lfY29udGludW91cyhsaW1pdHMgPSBjKDAsIDEuMiksIGJyZWFrcyA9IHNlcSgwLCAxLCBieSA9IDAuMjUpKStcbiAgdGhlbWVfYncoKStcbiAgdGhlbWUocGFuZWwuZ3JpZCA9IGVsZW1lbnRfYmxhbmsoKSxcbiAgICAgICAgbGVnZW5kLnBvc2l0aW9uID0gJ3RvcCcsXG4gICAgICAgIHN0cmlwLmJhY2tncm91bmQgPSBlbGVtZW50X3JlY3QoZmlsbCA9ICd3aGl0ZScpKStcbiAgc2NhbGVfY29sb3JfbWFudWFsKHZhbHVlcyA9IGNvbG9yX3BhbGV0dGUsIGRyb3AgPSBUKStcbiAgZ2VvbV90ZXh0KGRhdGE9ZGYuYm90aCAlPiUgc3Vic2V0KGJ1bGtfZHMgJWluJSBjKCdDaGVuJywnUGV0aXRwcmV6JywnQWx0bWFuJykgJiBjZWxsX3R5cGUgPT0gJ2FsbCcpLCBcbiAgICAgICAgICAgIGFlcyhsYWJlbD1wYXN0ZTAoJ1I6ICcscm91bmQoY29yLCAzKSwnXFxuUk1TRTogJywgcm91bmQoUk1TRSwgMykpKSwgeD0uMDMsIHk9MS4xMywgc2l6ZT0zLCBoanVzdD0wKStcbiAgbGFicyhjb2xvcj0nQ2VsbHR5cGUnKVxuXG5cbmZpZ19zMmIgPC0gZnJhY3Rpb25zX2RmICU+JSBzdWJzZXQoYnVsa19kcyAlaW4lIGMoJ0NoZW5TaW0nLCdQZXRpdHByZXpTaW0nLCdGaW5vdGVsbG9TaW0nLCdIb2VrU2ltJywnTW9yYW5kaW5pU2ltJywnQWx0bWFuU2ltJykpICU+JVxuICBnZ3Bsb3QoLiwgYWVzKHg9ZnJhY3Rpb24udHJ1ZSwgeT1mcmFjdGlvbi5lc3RpbWF0ZSkpK1xuICBnZW9tX2FibGluZShsaW5ldHlwZT0nZGFzaGVkJykrXG4gIGdlb21faGxpbmUoeWludGVyY2VwdCA9IDEpK1xuICBnZW9tX3BvaW50KGFlcyhjb2xvcj1jZWxsdHlwZSksIHNpemU9MiwgYWxwaGE9LjIpK1xuICBmYWNldF9ncmlkKGJ1bGtfZHMgfiBtZXRob2QpK1xuICB4bGFiKCdHcm91bmQgdHJ1dGggY2VsbC10eXBlIGZyYWN0aW9uJykrXG4gIHlsYWIoJ0VzdGltYXRlZCBjZWxsLXR5cGUgZnJhY3Rpb24nKStcbiAgc2NhbGVfeV9jb250aW51b3VzKGxpbWl0cyA9IGMoMCwgMS4yKSwgYnJlYWtzID0gc2VxKDAsIDEsIGJ5ID0gMC4yNSkpK1xuICB0aGVtZV9idygpK1xuICB0aGVtZShwYW5lbC5ncmlkID0gZWxlbWVudF9ibGFuaygpLFxuICAgICAgICBsZWdlbmQucG9zaXRpb24gPSAnYm90dG9tJyxcbiAgICAgICAgc3RyaXAuYmFja2dyb3VuZCA9IGVsZW1lbnRfcmVjdChmaWxsID0gJ3doaXRlJykpK1xuICBndWlkZXMoY29sb3IgPSBndWlkZV9sZWdlbmQobnJvdyA9IDEsIG92ZXJyaWRlLmFlcyA9IGxpc3Qoc2l6ZT01LCBhbHBoYT0xKSkpK1xuICBzY2FsZV9jb2xvcl9tYW51YWwodmFsdWVzID0gY29sb3JfcGFsZXR0ZSwgZHJvcCA9IFQpK1xuICBnZW9tX3RleHQoZGF0YT1kZi5ib3RoICU+JSBzdWJzZXQoYnVsa19kcyAlaW4lIGMoJ0NoZW5TaW0nLCdQZXRpdHByZXpTaW0nLCdGaW5vdGVsbG9TaW0nLCdIb2VrU2ltJywnTW9yYW5kaW5pU2ltJywnQWx0bWFuU2ltJykgJiBjZWxsX3R5cGUgPT0gJ2FsbCcpLCBcbiAgICAgICAgICAgIGFlcyhsYWJlbD1wYXN0ZTAoJ1I6ICcscm91bmQoY29yLCAzKSwnXFxuUk1TRTogJywgcm91bmQoUk1TRSwgMykpKSwgeD0uMDMsIHk9MS4xMywgc2l6ZT0zLCBoanVzdD0wKStcbiAgbGFicyhjb2xvcj0nQ2VsbHR5cGUnKVxuXG5maWdfczIgPC0gcGxvdF9ncmlkKFxuICBmaWdfczJhICsgdGhlbWUobGVnZW5kLnBvc2l0aW9uPVxcbm9uZVxcKSxcbiAgZmlnX3MyYixcbiAgYWxpZ24gPSAnaCcsXG4gIGxhYmVscyA9IGMoXFxBXFwsIFxcQlxcKSwgXG4gIGxhYmVsX3NpemUgPSAxNSwgXG4gIGhqdXN0ID0gLTEsXG4gIG5yb3cgPSAyLCBuY29sID0gMSxcbiAgcmVsX2hlaWdodHMgPSBjKC40NSwgLjc1KVxuKVxuYGBgXG5gYGAifQ== -->
-
-```r
-```r
-fig_s2a <- fractions_df %>% subset(bulk_ds %in% c('Chen','Petitprez','Altman')) %>%
+ggsave('/nfs/proj/omnideconv_benchmarking/omnideconv/benchmark/revision2/visualization/fig2/fig_2b.pdf', plot = fig_2b, width = 20, height = 10)
+ + + +
+
+

Fig S2

+ + + +
fig_s2a <- fractions_df %>% subset(bulk_ds %in% c('Chen','Petitprez','Altman')) %>%
   ggplot(., aes(x=fraction.true, y=fraction.estimate))+
   geom_abline(linetype='dashed')+
   geom_hline(yintercept = 1)+
@@ -2177,57 +2116,31 @@ 

Fig2a

labs(color='Celltype') fig_s2 <- plot_grid( - fig_s2a + theme(legend.position=\none\), + fig_s2a + theme(legend.position="none"), fig_s2b, align = 'h', - labels = c(\A\, \B\), + labels = c("A", "B"), label_size = 15, hjust = -1, nrow = 2, ncol = 1, rel_heights = c(.45, .75) )
-

-<!-- rnb-source-end -->
-
-<!-- rnb-output-begin eyJkYXRhIjoiV2FybmluZzogUmVtb3ZlZCAxMTYgcm93cyBjb250YWluaW5nIG1pc3NpbmcgdmFsdWVzIG9yIHZhbHVlcyBvdXRzaWRlIHRoZSBzY2FsZSByYW5nZSAoYGdlb21fcG9pbnQoKWApLldhcm5pbmc6IFJlbW92ZWQgMTggcm93cyBjb250YWluaW5nIG1pc3NpbmcgdmFsdWVzIG9yIHZhbHVlcyBvdXRzaWRlIHRoZSBzY2FsZSByYW5nZSAoYGdlb21fcG9pbnQoKWApLldhcm5pbmc6IEdyYXBocyBjYW5ub3QgYmUgaG9yaXpvbnRhbGx5IGFsaWduZWQgdW5sZXNzIHRoZSBheGlzIHBhcmFtZXRlciBpcyBzZXQuIFBsYWNpbmcgZ3JhcGhzIHVuYWxpZ25lZC5cbiJ9 -->
-
-

Warning: Removed 116 rows containing missing values or values outside -the scale range (geom_point()).Warning: Removed 18 rows -containing missing values or values outside the scale range -(geom_point()).Warning: Graphs cannot be horizontally -aligned unless the axis parameter is set. Placing graphs unaligned.

-

-
-
-<!-- rnb-output-end -->
-
-<!-- rnb-source-begin eyJkYXRhIjoiYGBgclxuYGBgclxuZ2dzYXZlKHBsb3QgPSBmaWdfczIsIGZpbGVuYW1lID0gJy9uZnMvcHJvai9vbW5pZGVjb252X2JlbmNobWFya2luZy9vbW5pZGVjb252L2JlbmNobWFyay9yZXZpc2lvbi92aXN1YWxpemF0aW9uL2ZpZzIvZmlnX3MyLnBkZicsIHdpZHRoID0gMTIsIGhlaWdodCA9IDE3KVxuYGBgXG5gYGAifQ== -->
-
-```r
-```r
-ggsave(plot = fig_s2, filename = '/nfs/proj/omnideconv_benchmarking/omnideconv/benchmark/revision/visualization/fig2/fig_s2.pdf', width = 12, height = 17)
-

-<!-- rnb-source-end -->
-
-<!-- rnb-chunk-end -->
-
-
-<!-- rnb-text-begin -->
-
-
-# Fig S3
-
-
-<!-- rnb-text-end -->
-
-
-<!-- rnb-chunk-begin -->
-
-
-<!-- rnb-source-begin eyJkYXRhIjoiYGBgclxuZGF0YXNldHMgPC0gYyhcIk1vcmFuZGluaVwiLCBcIkZpbm90ZWxsb1wiLCBcIkhvZWtcIiwgXCJBbHRtYW5cIiwnQ2hlbicsJ1BldGl0cHJleicsJ0NoZW5TaW0nLCdQZXRpdHByZXpTaW0nLCdGaW5vdGVsbG9TaW0nLCdIb2VrU2ltJywnTW9yYW5kaW5pU2ltJywnQWx0bWFuU2ltJylcblxuZGYuaGVhdG1hcCA8LSBkZi5ib3RoIHw+IHN1YnNldChidWxrX2RzICVpbiUgZGF0YXNldHMgJiBjZWxsX3R5cGUgIT0gJ2FsbCcpIHw+IHNlbGVjdChjZWxsX3R5cGUsIG1ldGhvZCwgYnVsa19kcywgY29yLCBSTVNFKVxuXG5kZl9sb25nIDwtIGRmLmhlYXRtYXAgJT4lXG4gIHBpdm90X2xvbmdlcihjb2xzID0gYyhjb3IsIFJNU0UpLCBuYW1lc190byA9IFwibWV0cmljXCIsIHZhbHVlc190byA9IFwidmFsdWVcIilcbnhfbGV2ZWxzIDwtIHVuaXF1ZShkZi5oZWF0bWFwJGNlbGxfdHlwZSlcbnlfbGV2ZWxzIDwtIHVuaXF1ZShkZi5oZWF0bWFwJG1ldGhvZClcbmRmX2xvbmcgPC0gZGZfbG9uZyAlPiVcbiAgbXV0YXRlKFxuICAgIHhfbnVtID0gYXMubnVtZXJpYyhmYWN0b3IoY2VsbF90eXBlLCBsZXZlbHMgPSB4X2xldmVscykpLFxuICAgIHlfbnVtID0gYXMubnVtZXJpYyhmYWN0b3IobWV0aG9kLCBsZXZlbHMgPSByZXYoeV9sZXZlbHMpKSkgICMgUmV2ZXJzZSBmb3IgaGVhdG1hcCBsYXlvdXRcbiAgKVxuXG5nZXRfdHJpX2Nvb3JkcyA8LSBmdW5jdGlvbih4LCB5LCBtZXRyaWMpIHtcbiAgaWYgKG1ldHJpYyA9PSBcImNvclwiKSB7XG4gICAgdGliYmxlKFxuICAgICAgeF9jb29yZCA9IGMoeCAtIDAuNSwgeCArIDAuNSwgeCAtIDAuNSksXG4gICAgICB5X2Nvb3JkID0gYyh5ICsgMC41LCB5ICsgMC41LCB5IC0gMC41KVxuICAgIClcbiAgfSBlbHNlIHtcbiAgICB0aWJibGUoXG4gICAgICB4X2Nvb3JkID0gYyh4ICsgMC41LCB4ICsgMC41LCB4IC0gMC41KSxcbiAgICAgIHlfY29vcmQgPSBjKHkgKyAwLjUsIHkgLSAwLjUsIHkgLSAwLjUpXG4gICAgKVxuICB9XG59XG5cbmRmX3BvbHkgPC0gZGZfbG9uZyAlPiVcbiAgZHBseXI6Om11dGF0ZShjb29yZHMgPSBwbWFwKGxpc3QoeF9udW0sIHlfbnVtLCBtZXRyaWMpLCBnZXRfdHJpX2Nvb3JkcykpICU+JVxuICB1bm5lc3QoY29vcmRzKSAlPiVcbiAgZ3JvdXBfYnkoY2VsbF90eXBlLCBtZXRob2QsIGJ1bGtfZHMsIG1ldHJpYykgJT4lXG4gIGRwbHlyOjptdXRhdGUodGlsZV9pZCA9IGN1cl9ncm91cF9pZCgpKSAlPiVcbiAgdW5ncm91cCgpXG5cbmRmX2xhYmVscyA8LSBkZl9sb25nICU+JVxuICBtdXRhdGUoXG4gICAgbGFiZWwgPSBpZmVsc2UobWV0cmljID09IFwiY29yXCIsXG4gICAgICAgICAgICAgICAgICAgc3ByaW50ZihcIiUuMmZcIiwgdmFsdWUpLFxuICAgICAgICAgICAgICAgICAgIHNwcmludGYoXCIlLjJmXCIsIHZhbHVlKSksXG4gICAgeF9sYWJlbCA9IHhfbnVtICsgaWZlbHNlKG1ldHJpYyA9PSBcImNvclwiLCAtMC4yLCAwLjIpLFxuICAgIHlfbGFiZWwgPSB5X251bSArIGlmZWxzZShtZXRyaWMgPT0gXCJjb3JcIiwgMC4yLCAtMC4yKVxuICApXG5cbmRmX3IgPC0gZGZfcG9seSAlPiUgZmlsdGVyKG1ldHJpYyA9PSBcImNvclwiKVxuZGZfcm1zZSA8LSBkZl9wb2x5ICU+JSBmaWx0ZXIobWV0cmljID09IFwiUk1TRVwiKVxuXG50cmlhbmdsZV9oZWF0bWFwIDwtIGdncGxvdCgpICtcbiAgIyBSIHRyaWFuZ2xlc1xuICBnZW9tX3BvbHlnb24oZGF0YSA9IGRmX3IsIGFlcyh4ID0geF9jb29yZCwgeSA9IHlfY29vcmQsIGdyb3VwID0gdGlsZV9pZCwgZmlsbCA9IHZhbHVlKSwgY29sb3IgPSBcImJsYWNrXCIpICtcbiAgc2NhbGVfZmlsbF9ncmFkaWVudDIobWlkID0gJ2RhcmtyZWQnLCBsb3cgPSBcImJsYWNrXCIsIGhpZ2ggPSBcInN0ZWVsYmx1ZVwiLCBuYW1lID0gXCJSXCIpICtcbiAgbmV3X3NjYWxlX2ZpbGwoKSArXG4gICMgUk1TRSB0cmlhbmdsZXNcbiAgZ2VvbV9wb2x5Z29uKGRhdGEgPSBkZl9ybXNlLCBhZXMoeCA9IHhfY29vcmQsIHkgPSB5X2Nvb3JkLCBncm91cCA9IHRpbGVfaWQsIGZpbGwgPSB2YWx1ZSksIGNvbG9yID0gXCJibGFja1wiKSArXG4gIHNjYWxlX2ZpbGxfZ3JhZGllbnQobG93ID0gXCJzdGVlbGJsdWVcIiwgaGlnaCA9IFwiZGFya3JlZFwiLCBuYW1lID0gXCJSTVNFXCIpICtcbiAgZ2VvbV90ZXh0KGRhdGEgPSBkZl9sYWJlbHMsIGFlcyh4ID0geF9sYWJlbCwgeSA9IHlfbGFiZWwsIGxhYmVsID0gbGFiZWwsIGdyb3VwID0gbWV0cmljKSwgc2l6ZSA9IDIsIGNvbG9yPSd3aGl0ZScpICtcbiAgZmFjZXRfd3JhcCh+YnVsa19kcywgc2NhbGVzID0gJ2ZyZWUnLCBuY29sID0gMykgK1xuICB0aGVtZV9idygpICtcbiAgdGhlbWUoXG4gICAgcGFuZWwuZ3JpZCA9IGVsZW1lbnRfYmxhbmsoKSxcbiAgICBheGlzLnRleHQueCA9IGVsZW1lbnRfdGV4dChhbmdsZSA9IDQ1LCBoanVzdCA9IDEpXG4gICkgK1xuICBzY2FsZV94X2Rpc2NyZXRlKGxpbWl0cyA9IHhfbGV2ZWxzKSArXG4gIHNjYWxlX3lfZGlzY3JldGUobGltaXRzID0gcmV2KHlfbGV2ZWxzKSkgK1xuICBsYWJzKHggPSBcIlwiLCB5ID0gXCJcIilcblxuYGBgIn0= -->
-
-```r
-datasets <- c("Morandini", "Finotello", "Hoek", "Altman",'Chen','Petitprez','ChenSim','PetitprezSim','FinotelloSim','HoekSim','MorandiniSim','AltmanSim')
+
+
+
Warning: Removed 116 rows containing missing values or values outside the scale range (`geom_point()`).Warning: Removed 18 rows containing missing values or values outside the scale range (`geom_point()`).Warning: Graphs cannot be horizontally aligned unless the axis parameter is set. Placing graphs unaligned.
+ + +
ggsave(plot = fig_s2, filename = '/nfs/proj/omnideconv_benchmarking/omnideconv/benchmark/revision2/visualization/fig2/fig_s2_v2.pdf', width = 12, height = 17)
+ + + +
+
+

Fig S3

+ + + +
datasets <- c("Morandini", "Finotello", "Hoek", "Altman",'Chen','Petitprez','ChenSim','PetitprezSim','FinotelloSim','HoekSim','MorandiniSim','AltmanSim')
 
 df.heatmap <- df.both |> subset(bulk_ds %in% datasets & cell_type != 'all') |> select(cell_type, method, bulk_ds, cor, RMSE)
 
@@ -2292,17 +2205,15 @@ 

Fig2a

scale_x_discrete(limits = x_levels) + scale_y_discrete(limits = rev(y_levels)) + labs(x = "", y = "") -
+ +ggsave('/nfs/proj/omnideconv_benchmarking/omnideconv/benchmark/revision2/visualization/fig2/fig_s3_v3.pdf', plot = triangle_heatmap, width = 14, height = 16)
- -
Error in new_scale_fill() : could not find function "new_scale_fill"
- -
---
title: "R Notebook"
output: html_notebook
---

```{r}
source("../helper_functions.R")
source('ggradar_custom.R')
library(tidyverse)
library(data.table)
library(cowplot)
library(patchwork)
library(ggnewscale)

color_palette <- c('B cells'='#999933',
                   'Macrophages'='#CC6677',
                   'mDC'='#882255',
                   'Monocytes'='#AA4499',
                   'NK cells'='#DDCC77',
                   'T cell'='#332288',
                   'CD4 T cells'='#117733',
                   'CD8 T cells'='#44AA99',
                   'Tregs'='#88CCEE',
                   'T cells'='#332288',
                   'Lymphocytes' = '#6600CC')

method_palette <- palette.colors(palette = "Okabe-Ito")[1:8]
names(method_palette) <- c('AutoGeneS','BayesPrism','Bisque','CIBERSORTx','DWLS','MuSiC','Scaden','SCDC')
```

```{r}
methods <- c('autogenes','bayesprism','bisque','cibersortx','dwls','music','scaden','scdc')
sc_norm <- c('cpm', rep('counts', 2),'cpm',rep('counts',4))
bulk_norm <- c('tpm', rep('counts', 2), rep('tpm',5))
method_parameter_df <- data.frame(method=methods, sc_norm=sc_norm, bulk_norm=bulk_norm)
method_parameter_df$method_norm_combi <- paste0(method_parameter_df$method, method_parameter_df$sc_norm, method_parameter_df$bulk_norm)

fractions_df_mouse <- get_fractions('/nfs/data/omnideconv_benchmarking_clean/benchmark_results/results_mouse/', '0', 'tabula-muris', method_parameter_df)
fractions_df_mouse$org <- 'mm'
fractions_df_hao <- get_all_fractions('/nfs/data/omnideconv_benchmarking_clean/benchmark_results/results_main3/', '0', 'HaoCleaned-sampled', method_parameter_df)
fractions_df_hao$org <- 'hs'
fractions_df <- rbind(fractions_df_hao, fractions_df_mouse)
fractions_df <- subset(fractions_df, method_norm_combi %in% method_parameter_df$method_norm_combi)

fractions_df$celltype <- recode(fractions_df$celltype,
                                   'T cell' = 'T cells',
                                   'T cells CD4 conv' = 'CD4 T cells',
                                   'T cells CD4' = 'CD4 T cells',
                                   'T cells CD8' = 'CD8 T cells')

performance_df_mouse <- get_performance_metrics('/nfs/data/omnideconv_benchmarking_clean/benchmark_results/results_mouse/', '0', 'tabula-muris', method_parameter_df)
performance_df_mouse$org <- 'mm'
performance_df_hao <- get_all_performance_metrics('/nfs/data/omnideconv_benchmarking_clean/benchmark_results/results_main3/', '0', 'HaoCleaned-sampled', method_parameter_df)
performance_df_hao$org <- 'hs'
performance_df <- rbind(performance_df_hao, performance_df_mouse)
performance_df <- subset(performance_df, method_norm_combi %in% method_parameter_df$method_norm_combi)

performance_df$cell_type <- recode(performance_df$cell_type,
                                   'T cell' = 'T cells',
                                   'T cells CD4 conv' = 'CD4 T cells',
                                   'T cells CD4' = 'CD4 T cells',
                                   'T cells CD8' = 'CD8 T cells')

df.cor <- data_summary(performance_df, 'cor', c('cell_type','method','bulk_ds','org'))
df.rmse <- data_summary(performance_df, 'RMSE', c('cell_type','method','bulk_ds','org'))
df.both <- data.table(merge(df.cor, df.rmse, by = c('cell_type','method','bulk_ds','org'), suffixes = c('.cor','.rmse')))
```

```{r}
df.both$cell_type <- as.factor(df.both$cell_type)
fractions_df$celltype <- as.factor(fractions_df$celltype)

fractions_df <- fractions_df %>% 
  mutate(method = recode(method,
                         'autogenes'='AutoGeneS',
                         'bayesprism'='BayesPrism',
                         'bisque'='Bisque',
                         'cibersortx'='CIBERSORTx',
                         'dwls'='DWLS',
                         'music'='MuSiC',
                         'scaden'='Scaden',
                         'scdc'='SCDC'))
fractions_df <- fractions_df %>%
  mutate(bulk_ds = recode(bulk_ds,
                           "chen" = "Chen",
                           "chen-simulation" = "ChenSim",
                           "finotello" = "Finotello",
                           "finotello-simulation" = "FinotelloSim",
                           "hoek" = "Hoek",
                           "hoek-simulation" = "HoekSim",
                           "petitprez" = "Petitprez",
                           "petitprez-simulation" = "PetitprezSim",
                           "altman"="Altman",
                           "morandini"="Morandini",
                           "altman-simulation"="AltmanSim",
                           "morandini-simulation"="MorandiniSim"))
df.both <- df.both %>%
  mutate(bulk_ds =  recode(bulk_ds,
                           "chen" = "Chen",
                           "chen-simulation" = "ChenSim",
                           "finotello" = "Finotello",
                           "finotello-simulation" = "FinotelloSim",
                           "hoek" = "Hoek",
                           "hoek-simulation" = "HoekSim",
                           "petitprez" = "Petitprez",
                           "petitprez-simulation" = "PetitprezSim",
                           "altman"="Altman",
                           "morandini"="Morandini",
                           "altman-simulation"="AltmanSim",
                           "morandini-simulation"="MorandiniSim"))
df.both <- df.both %>% 
  mutate(method = recode(method,
                         'autogenes'='AutoGeneS',
                         'bayesprism'='BayesPrism',
                         'bisque'='Bisque',
                         'cibersortx'='CIBERSORTx',
                         'dwls'='DWLS',
                         'music'='MuSiC',
                         'scaden'='Scaden',
                         'scdc'='SCDC'))

fractions_df <- fractions_df %>%
  mutate(sc_ds = recode(sc_ds,
                          "hao-sampled-3" = "HaoSub",
                          "Hao-sampled" = "HaoSub",
                          "tabula-muris" = "TM"))
```

# Fig2a

```{r}
fig_2a <- fractions_df %>% subset(bulk_ds %in% c('Hoek','Finotello','Morandini')) %>%
  ggplot(., aes(x=fraction.true, y=fraction.estimate))+
  geom_abline(linetype='dashed')+
  geom_hline(yintercept = 1)+
  geom_point(aes(color=celltype), size=2, alpha=.7)+
  facet_grid(bulk_ds ~ method)+
  xlab('Ground truth cell-type fraction')+
  ylab('Estimated cell-type fraction')+
  scale_y_continuous(limits = c(0, 1.2), breaks = seq(0, 1, by = 0.25))+
  theme_bw()+
  theme(panel.grid = element_blank(),
        legend.position = 'top',
        strip.background = element_rect(fill = 'white'))+
  scale_color_manual(values = color_palette, drop = T)+
  geom_text(data=df.both %>% subset(bulk_ds %in% c('Hoek','Finotello','Morandini') & cell_type == 'all'), 
            aes(label=paste0('R: ',round(cor, 3),'\nRMSE: ', round(RMSE, 3))), x=.03, y=1.13, size=3, hjust=0)+
  labs(color='Celltype')

fig_2a

```

```{r}
ggsave('../../revision/visualization/fig2/fig_2a.pdf', plot=fig_2a, width = 18, height = 9)
```


# Fig2b

```{r}
prepare_metric_df <- function(df, ds, metric, transform_fn = identity) {
  df |>
    filter(bulk_ds == ds, cell_type != "all") |>
    select(method, cell_type, bulk_ds, value = all_of(metric)) |>
    mutate(value = transform_fn(value)) |>
    pivot_wider(names_from = cell_type, values_from = value) |>
    select(-bulk_ds) |>
    replace(is.na(.), 0) |> 
    as_tibble()
}

prepare_metric_df_per_celltype <- function(df, ds, metric, transform_fn = identity) {
  df |>
    filter(bulk_ds == ds, cell_type != "all") |>
    select(method, cell_type, bulk_ds, value = all_of(metric)) |>
    mutate(value = transform_fn(value)) |>
    pivot_wider(names_from = method, values_from = value) |>
    select(-bulk_ds) |>
    replace(is.na(.), 0) |> 
    as_tibble()
}

compute_extreme <- function(df, fun, adjust = 0) {
  round(fun(apply(df[,-1], 2, fun), na.rm = TRUE), 1) + adjust
}

# Helper function to create radar plot
plot_radar <- function(df, title, grid.min, grid.mid, grid.max, values.radar, legend.position='none', palette) {
  ggradar(df,
          group.colours = palette,
          group.point.size = 2,
          group.line.width = .8,
          grid.min = grid.min,
          grid.mid = grid.mid,
          grid.max = grid.max,
          values.radar = values.radar,
          font.radar = "sans",
          legend.text.size = 10,
          axis.label.size = 3.5,
          legend.position = legend.position,
          plot.title = title)
}

plot_radar_custom <- function(df, title, grid.min, grid.mid, grid.add, grid.max, values.radar, legend.position='none', gridline.mid.colour='grey', palette) {
  ggradar_custom(df,
          group.colours = palette,
          group.point.size = 2,
          group.line.width = .5,
          grid.min = grid.min,
          grid.mid = grid.mid,
          grid.add = grid.add,
          grid.max = grid.max,
          values.radar = values.radar, 
          gridline.add.colour = 'grey',
          gridline.mid.colour = gridline.mid.colour,
          font.radar = "sans",
          legend.text.size = 10,
          axis.label.size = 3.5,
          legend.position = legend.position,
          plot.title = title)
}

```

```{r}
# Datasets
datasets <- c("Morandini", "Finotello", "Hoek")

method_order <- df.both |> 
  subset(bulk_ds %in% datasets) |> 
  dplyr::group_by(method) |> 
  dplyr::summarize(sum=sum(cor, na.rm=T)) |> 
  arrange(sum) |> 
  select(method) |> 
  unlist()

rmse_plots <- lapply(datasets, function(ds) {
  df_rmse <- prepare_metric_df_per_celltype(df.both, ds, "RMSE" ,function(x) log(1 / x))[,c('cell_type',method_order)]
  max_val <- compute_extreme(df_rmse, max, adjust = 0.1)
  plot_radar_custom(df_rmse,
             title = paste0("log(1/RMSE) ", ds),
             grid.min = 0,
             grid.mid = round(max_val * .33, 2),
             grid.add = round(max_val * .66, 2),
             grid.max = round(max_val, 2),
             values.radar = c("0", 
                              as.character(round(max_val * .33, 2)), 
                              as.character(round(max_val * .66, 2)),
                              as.character(round(max_val, 2))),
             palette = color_palette[as.character(df_rmse$cell_type)])
})

# Generate correlation plots
cor_plots <- lapply(datasets, function(ds) {
  df_cor <- prepare_metric_df_per_celltype(df.both, ds, "cor")[,c('cell_type',method_order)]
  min_val <- compute_extreme(df_cor, min, adjust = -0.1)
  df_cor[is.na(df_cor)] <- min_val - ((1/9) * (1 - min_val))
  plot_radar_custom(df_cor,
             title = paste0("Correlation ", ds),
             grid.min = ifelse(min_val < 0, min_val, 0),
             grid.mid = ifelse(min_val < 0, 0, min_val),
             grid.add = 0.5,
             grid.max = 1,
             values.radar = c(as.character(min_val), "0", "0.5", "1"),
             gridline.mid.colour='black',
             palette = color_palette[as.character(df_cor$cell_type)])
})

# Assign plots to named variables if needed
names(rmse_plots) <- paste0("p_rmse_", tolower(datasets))
names(cor_plots)  <- paste0("p_cor_", tolower(datasets))
```



```{r}
fig_2b <- (rmse_plots$p_rmse_morandini + rmse_plots$p_rmse_finotello + rmse_plots$p_rmse_hoek) / (cor_plots$p_cor_morandini + cor_plots$p_cor_finotello + cor_plots$p_cor_hoek)

ggsave('/nfs/proj/omnideconv_benchmarking/omnideconv/benchmark/revision/visualization/fig2/fig_2b.pdf', plot = fig_2b, width = 20, height = 10)
```



# Fig S2

```{r}
fig_s2a <- fractions_df %>% subset(bulk_ds %in% c('Chen','Petitprez','Altman')) %>%
  ggplot(., aes(x=fraction.true, y=fraction.estimate))+
  geom_abline(linetype='dashed')+
  geom_hline(yintercept = 1)+
  geom_point(aes(color=celltype), size=2, alpha=.7)+
  facet_grid(bulk_ds ~ method)+
  xlab('Ground truth cell-type fraction')+
  ylab('Estimated cell-type fraction')+
  scale_y_continuous(limits = c(0, 1.2), breaks = seq(0, 1, by = 0.25))+
  theme_bw()+
  theme(panel.grid = element_blank(),
        legend.position = 'top',
        strip.background = element_rect(fill = 'white'))+
  scale_color_manual(values = color_palette, drop = T)+
  geom_text(data=df.both %>% subset(bulk_ds %in% c('Chen','Petitprez','Altman') & cell_type == 'all'), 
            aes(label=paste0('R: ',round(cor, 3),'\nRMSE: ', round(RMSE, 3))), x=.03, y=1.13, size=3, hjust=0)+
  labs(color='Celltype')


fig_s2b <- fractions_df %>% subset(bulk_ds %in% c('ChenSim','PetitprezSim','FinotelloSim','HoekSim','MorandiniSim','AltmanSim')) %>%
  ggplot(., aes(x=fraction.true, y=fraction.estimate))+
  geom_abline(linetype='dashed')+
  geom_hline(yintercept = 1)+
  geom_point(aes(color=celltype), size=2, alpha=.2)+
  facet_grid(bulk_ds ~ method)+
  xlab('Ground truth cell-type fraction')+
  ylab('Estimated cell-type fraction')+
  scale_y_continuous(limits = c(0, 1.2), breaks = seq(0, 1, by = 0.25))+
  theme_bw()+
  theme(panel.grid = element_blank(),
        legend.position = 'bottom',
        strip.background = element_rect(fill = 'white'))+
  guides(color = guide_legend(nrow = 1, override.aes = list(size=5, alpha=1)))+
  scale_color_manual(values = color_palette, drop = T)+
  geom_text(data=df.both %>% subset(bulk_ds %in% c('ChenSim','PetitprezSim','FinotelloSim','HoekSim','MorandiniSim','AltmanSim') & cell_type == 'all'), 
            aes(label=paste0('R: ',round(cor, 3),'\nRMSE: ', round(RMSE, 3))), x=.03, y=1.13, size=3, hjust=0)+
  labs(color='Celltype')

fig_s2 <- plot_grid(
  fig_s2a + theme(legend.position="none"),
  fig_s2b,
  align = 'h',
  labels = c("A", "B"), 
  label_size = 15, 
  hjust = -1,
  nrow = 2, ncol = 1,
  rel_heights = c(.45, .75)
)

ggsave(plot = fig_s2, filename = '/nfs/proj/omnideconv_benchmarking/omnideconv/benchmark/revision/visualization/fig2/fig_s2.pdf', width = 12, height = 17)
```

# Fig S3

```{r}
datasets <- c("Morandini", "Finotello", "Hoek", "Altman",'Chen','Petitprez','ChenSim','PetitprezSim','FinotelloSim','HoekSim','MorandiniSim','AltmanSim')

df.heatmap <- df.both |> subset(bulk_ds %in% datasets & cell_type != 'all') |> select(cell_type, method, bulk_ds, cor, RMSE)

df_long <- df.heatmap %>%
  pivot_longer(cols = c(cor, RMSE), names_to = "metric", values_to = "value")
x_levels <- unique(df.heatmap$cell_type)
y_levels <- unique(df.heatmap$method)
df_long <- df_long %>%
  mutate(
    x_num = as.numeric(factor(cell_type, levels = x_levels)),
    y_num = as.numeric(factor(method, levels = rev(y_levels)))  # Reverse for heatmap layout
  )

get_tri_coords <- function(x, y, metric) {
  if (metric == "cor") {
    tibble(
      x_coord = c(x - 0.5, x + 0.5, x - 0.5),
      y_coord = c(y + 0.5, y + 0.5, y - 0.5)
    )
  } else {
    tibble(
      x_coord = c(x + 0.5, x + 0.5, x - 0.5),
      y_coord = c(y + 0.5, y - 0.5, y - 0.5)
    )
  }
}

df_poly <- df_long %>%
  dplyr::mutate(coords = pmap(list(x_num, y_num, metric), get_tri_coords)) %>%
  unnest(coords) %>%
  group_by(cell_type, method, bulk_ds, metric) %>%
  dplyr::mutate(tile_id = cur_group_id()) %>%
  ungroup()

df_labels <- df_long %>%
  mutate(
    label = ifelse(metric == "cor",
                   sprintf("%.2f", value),
                   sprintf("%.2f", value)),
    x_label = x_num + ifelse(metric == "cor", -0.2, 0.2),
    y_label = y_num + ifelse(metric == "cor", 0.2, -0.2)
  )

df_r <- df_poly %>% filter(metric == "cor")
df_rmse <- df_poly %>% filter(metric == "RMSE")

triangle_heatmap <- ggplot() +
  # R triangles
  geom_polygon(data = df_r, aes(x = x_coord, y = y_coord, group = tile_id, fill = value), color = "black") +
  scale_fill_gradient2(mid = 'darkred', low = "black", high = "steelblue", name = "R") +
  new_scale_fill() +
  # RMSE triangles
  geom_polygon(data = df_rmse, aes(x = x_coord, y = y_coord, group = tile_id, fill = value), color = "black") +
  scale_fill_gradient(low = "steelblue", high = "darkred", name = "RMSE") +
  geom_text(data = df_labels, aes(x = x_label, y = y_label, label = label, group = metric), size = 2, color='white') +
  facet_wrap(~bulk_ds, scales = 'free', ncol = 3) +
  theme_bw() +
  theme(
    panel.grid = element_blank(),
    axis.text.x = element_text(angle = 45, hjust = 1)
  ) +
  scale_x_discrete(limits = x_levels) +
  scale_y_discrete(limits = rev(y_levels)) +
  labs(x = "", y = "")

ggsave('/nfs/proj/omnideconv_benchmarking/omnideconv/benchmark/revision/visualization/fig2/fig_s3_v2.pdf', plot = triangle_heatmap, width = 14, height = 16)
```


+
---
title: "R Notebook"
output: html_notebook
---

```{r}
source("../helper_functions.R")
source('ggradar_custom.R')
library(tidyverse)
library(data.table)
library(cowplot)
library(patchwork)
library(ggnewscale)

color_palette <- c('B cells'='#999933',
                   'Macrophages'='#CC6677',
                   'mDC'='#882255',
                   'Monocytes'='#AA4499',
                   'NK cells'='#DDCC77',
                   'T cell'='#332288',
                   'CD4 T cells'='#117733',
                   'CD8 T cells'='#44AA99',
                   'Tregs'='#88CCEE',
                   'T cells'='#332288',
                   'Lymphocytes' = '#6600CC')

method_palette <- palette.colors(palette = "Okabe-Ito")[1:8]
names(method_palette) <- c('AutoGeneS','BayesPrism','Bisque','CIBERSORTx','DWLS','MuSiC','Scaden','SCDC')
```

```{r}
methods <- c('autogenes','bayesprism','bisque','cibersortx','dwls','music','scaden','scdc')
sc_norm <- c('cpm', rep('counts', 2),'cpm',rep('counts',4))
bulk_norm <- c('tpm', rep('counts', 2), rep('tpm',5))
method_parameter_df <- data.frame(method=methods, sc_norm=sc_norm, bulk_norm=bulk_norm)
method_parameter_df$method_norm_combi <- paste0(method_parameter_df$method, method_parameter_df$sc_norm, method_parameter_df$bulk_norm)

fractions_df_mouse <- get_fractions('/nfs/data/omnideconv_benchmarking_clean/benchmark_results/results_mouse/', '0', 'tabula-muris', method_parameter_df)
fractions_df_mouse$org <- 'mm'
fractions_df_hao <- get_all_fractions('/nfs/data/omnideconv_benchmarking_clean/benchmark_results/results_main3/', '0', 'HaoCleaned-sampled', method_parameter_df)
fractions_df_hao$org <- 'hs'
fractions_df <- rbind(fractions_df_hao, fractions_df_mouse)
fractions_df <- subset(fractions_df, method_norm_combi %in% method_parameter_df$method_norm_combi)

fractions_df$celltype <- recode(fractions_df$celltype,
                                   'T cell' = 'T cells',
                                   'T cells CD4 conv' = 'CD4 T cells',
                                   'T cells CD4' = 'CD4 T cells',
                                   'T cells CD8' = 'CD8 T cells')

performance_df_mouse <- get_performance_metrics('/nfs/data/omnideconv_benchmarking_clean/benchmark_results/results_mouse/', '0', 'tabula-muris', method_parameter_df)
performance_df_mouse$org <- 'mm'
performance_df_hao <- get_all_performance_metrics('/nfs/data/omnideconv_benchmarking_clean/benchmark_results/results_main3/', '0', 'HaoCleaned-sampled', method_parameter_df)
performance_df_hao$org <- 'hs'
performance_df <- rbind(performance_df_hao, performance_df_mouse)
performance_df <- subset(performance_df, method_norm_combi %in% method_parameter_df$method_norm_combi)

performance_df$cell_type <- recode(performance_df$cell_type,
                                   'T cell' = 'T cells',
                                   'T cells CD4 conv' = 'CD4 T cells',
                                   'T cells CD4' = 'CD4 T cells',
                                   'T cells CD8' = 'CD8 T cells')

df.cor <- data_summary(performance_df, 'cor', c('cell_type','method','bulk_ds','org'))
df.rmse <- data_summary(performance_df, 'RMSE', c('cell_type','method','bulk_ds','org'))
df.both <- data.table(merge(df.cor, df.rmse, by = c('cell_type','method','bulk_ds','org'), suffixes = c('.cor','.rmse')))
```


```{r}
df.both$cell_type <- as.factor(df.both$cell_type)
fractions_df$celltype <- as.factor(fractions_df$celltype)

fractions_df <- fractions_df %>% 
  mutate(method = recode(method,
                         'autogenes'='AutoGeneS',
                         'bayesprism'='BayesPrism',
                         'bisque'='Bisque',
                         'cibersortx'='CIBERSORTx',
                         'dwls'='DWLS',
                         'music'='MuSiC',
                         'scaden'='Scaden',
                         'scdc'='SCDC'))
fractions_df <- fractions_df %>%
  mutate(bulk_ds = recode(bulk_ds,
                           "chen" = "Chen",
                           "chen-simulation" = "ChenSim",
                           "finotello" = "Finotello",
                           "finotello-simulation" = "FinotelloSim",
                           "hoek" = "Hoek",
                           "hoek-simulation" = "HoekSim",
                           "petitprez" = "Petitprez",
                           "petitprez-simulation" = "PetitprezSim",
                           "altman"="Altman",
                           "morandini"="Morandini",
                           "altman-simulation"="AltmanSim",
                           "morandini-simulation"="MorandiniSim"))
df.both <- df.both %>%
  mutate(bulk_ds =  recode(bulk_ds,
                           "chen" = "Chen",
                           "chen-simulation" = "ChenSim",
                           "finotello" = "Finotello",
                           "finotello-simulation" = "FinotelloSim",
                           "hoek" = "Hoek",
                           "hoek-simulation" = "HoekSim",
                           "petitprez" = "Petitprez",
                           "petitprez-simulation" = "PetitprezSim",
                           "altman"="Altman",
                           "morandini"="Morandini",
                           "altman-simulation"="AltmanSim",
                           "morandini-simulation"="MorandiniSim"))
df.both <- df.both %>% 
  mutate(method = recode(method,
                         'autogenes'='AutoGeneS',
                         'bayesprism'='BayesPrism',
                         'bisque'='Bisque',
                         'cibersortx'='CIBERSORTx',
                         'dwls'='DWLS',
                         'music'='MuSiC',
                         'scaden'='Scaden',
                         'scdc'='SCDC'))

fractions_df <- fractions_df %>%
  mutate(sc_ds = recode(sc_ds,
                          "hao-sampled-3" = "HaoSub",
                          "Hao-sampled" = "HaoSub",
                          "tabula-muris" = "TM"))
```

# Fig2a

```{r}
fig_2a <- fractions_df %>% subset(bulk_ds %in% c('Hoek','Finotello','Morandini')) %>%
  ggplot(., aes(x=fraction.true, y=fraction.estimate))+
  geom_abline(linetype='dashed')+
  geom_hline(yintercept = 1)+
  geom_point(aes(color=celltype), size=2, alpha=.7)+
  facet_grid(bulk_ds ~ method)+
  xlab('Ground truth cell-type fraction')+
  ylab('Estimated cell-type fraction')+
  scale_y_continuous(limits = c(0, 1.2), breaks = seq(0, 1, by = 0.25))+
  theme_bw()+
  theme(panel.grid = element_blank(),
        legend.position = 'top',
        strip.background = element_rect(fill = 'white'))+
  scale_color_manual(values = color_palette, drop = T)+
  geom_text(data=df.both %>% subset(bulk_ds %in% c('Hoek','Finotello','Morandini') & cell_type == 'all'), 
            aes(label=paste0('R: ',round(cor, 3),'\nRMSE: ', round(RMSE, 3))), x=.03, y=1.13, size=3, hjust=0)+
  labs(color='Celltype')

fig_2a

```

```{r}
ggsave('../../revision2/visualization/fig2/fig_2a.pdf', plot=fig_2a, width = 18, height = 9)
```


# Fig2b

```{r}
prepare_metric_df <- function(df, ds, metric, transform_fn = identity) {
  df |>
    filter(bulk_ds == ds, cell_type != "all") |>
    select(method, cell_type, bulk_ds, value = all_of(metric)) |>
    mutate(value = transform_fn(value)) |>
    pivot_wider(names_from = cell_type, values_from = value) |>
    select(-bulk_ds) |>
    replace(is.na(.), 0) |> 
    as_tibble()
}

prepare_metric_df_per_celltype <- function(df, ds, metric, transform_fn = identity) {
  df |>
    filter(bulk_ds == ds, cell_type != "all") |>
    select(method, cell_type, bulk_ds, value = all_of(metric)) |>
    mutate(value = transform_fn(value)) |>
    pivot_wider(names_from = method, values_from = value) |>
    select(-bulk_ds) |>
    replace(is.na(.), 0) |> 
    as_tibble()
}

compute_extreme <- function(df, fun, adjust = 0) {
  round(fun(apply(df[,-1], 2, fun), na.rm = TRUE), 1) + adjust
}

# Helper function to create radar plot
plot_radar <- function(df, title, grid.min, grid.mid, grid.max, values.radar, legend.position='none', palette) {
  ggradar(df,
          group.colours = palette,
          group.point.size = 2,
          group.line.width = .8,
          grid.min = grid.min,
          grid.mid = grid.mid,
          grid.max = grid.max,
          values.radar = values.radar,
          font.radar = "sans",
          legend.text.size = 10,
          axis.label.size = 3.5,
          legend.position = legend.position,
          plot.title = title)
}

plot_radar_custom <- function(df, title, grid.min, grid.mid, grid.add, grid.max, values.radar, legend.position='none', gridline.mid.colour='grey', palette) {
  ggradar_custom(df,
          group.colours = palette,
          group.point.size = 2,
          group.line.width = .5,
          grid.min = grid.min,
          grid.mid = grid.mid,
          grid.add = grid.add,
          grid.max = grid.max,
          values.radar = values.radar, 
          gridline.add.colour = 'grey',
          gridline.mid.colour = gridline.mid.colour,
          font.radar = "sans",
          legend.text.size = 10,
          axis.label.size = 3.5,
          legend.position = legend.position,
          plot.title = title)
}

```

```{r}
# Datasets
datasets <- c("Morandini", "Finotello", "Hoek")

method_order <- df.both |> 
  subset(bulk_ds %in% datasets) |> 
  dplyr::group_by(method) |> 
  dplyr::summarize(sum=sum(cor, na.rm=T)) |> 
  arrange(sum) |> 
  select(method) |> 
  unlist()

rmse_plots <- lapply(datasets, function(ds) {
  df_rmse <- prepare_metric_df_per_celltype(df.both, ds, "RMSE" ,function(x) log(1 / x))[,c('cell_type',method_order)]
  max_val <- compute_extreme(df_rmse, max, adjust = 0.1)
  plot_radar_custom(df_rmse,
             title = paste0("log(1/RMSE) ", ds),
             grid.min = 0,
             grid.mid = round(max_val * .33, 2),
             grid.add = round(max_val * .66, 2),
             grid.max = round(max_val, 2),
             values.radar = c("0", 
                              as.character(round(max_val * .33, 2)), 
                              as.character(round(max_val * .66, 2)),
                              as.character(round(max_val, 2))),
             palette = color_palette[as.character(df_rmse$cell_type)])
})

# Generate correlation plots
cor_plots <- lapply(datasets, function(ds) {
  df_cor <- prepare_metric_df_per_celltype(df.both, ds, "cor")[,c('cell_type',method_order)]
  min_val <- compute_extreme(df_cor, min, adjust = -0.1)
  df_cor[is.na(df_cor)] <- min_val - ((1/9) * (1 - min_val))
  plot_radar_custom(df_cor,
             title = paste0("Correlation ", ds),
             grid.min = ifelse(min_val < 0, min_val, 0),
             grid.mid = ifelse(min_val < 0, 0, min_val),
             grid.add = 0.5,
             grid.max = 1,
             values.radar = c(as.character(min_val), "0", "0.5", "1"),
             gridline.mid.colour='black',
             palette = color_palette[as.character(df_cor$cell_type)])
})

# Assign plots to named variables if needed
names(rmse_plots) <- paste0("p_rmse_", tolower(datasets))
names(cor_plots)  <- paste0("p_cor_", tolower(datasets))
```



```{r}
fig_2b <- (rmse_plots$p_rmse_morandini + rmse_plots$p_rmse_finotello + rmse_plots$p_rmse_hoek) / (cor_plots$p_cor_morandini + cor_plots$p_cor_finotello + cor_plots$p_cor_hoek)

ggsave('/nfs/proj/omnideconv_benchmarking/omnideconv/benchmark/revision2/visualization/fig2/fig_2b.pdf', plot = fig_2b, width = 20, height = 10)
```



# Fig S2

```{r}
fig_s2a <- fractions_df %>% subset(bulk_ds %in% c('Chen','Petitprez','Altman')) %>%
  ggplot(., aes(x=fraction.true, y=fraction.estimate))+
  geom_abline(linetype='dashed')+
  geom_hline(yintercept = 1)+
  geom_point(aes(color=celltype), size=2, alpha=.7)+
  facet_grid(bulk_ds ~ method)+
  xlab('Ground truth cell-type fraction')+
  ylab('Estimated cell-type fraction')+
  scale_y_continuous(limits = c(0, 1.2), breaks = seq(0, 1, by = 0.25))+
  theme_bw()+
  theme(panel.grid = element_blank(),
        legend.position = 'top',
        strip.background = element_rect(fill = 'white'))+
  scale_color_manual(values = color_palette, drop = T)+
  geom_text(data=df.both %>% subset(bulk_ds %in% c('Chen','Petitprez','Altman') & cell_type == 'all'), 
            aes(label=paste0('R: ',round(cor, 3),'\nRMSE: ', round(RMSE, 3))), x=.03, y=1.13, size=3, hjust=0)+
  labs(color='Celltype')


fig_s2b <- fractions_df %>% subset(bulk_ds %in% c('ChenSim','PetitprezSim','FinotelloSim','HoekSim','MorandiniSim','AltmanSim')) %>%
  ggplot(., aes(x=fraction.true, y=fraction.estimate))+
  geom_abline(linetype='dashed')+
  geom_hline(yintercept = 1)+
  geom_point(aes(color=celltype), size=2, alpha=.2)+
  facet_grid(bulk_ds ~ method)+
  xlab('Ground truth cell-type fraction')+
  ylab('Estimated cell-type fraction')+
  scale_y_continuous(limits = c(0, 1.2), breaks = seq(0, 1, by = 0.25))+
  theme_bw()+
  theme(panel.grid = element_blank(),
        legend.position = 'bottom',
        strip.background = element_rect(fill = 'white'))+
  guides(color = guide_legend(nrow = 1, override.aes = list(size=5, alpha=1)))+
  scale_color_manual(values = color_palette, drop = T)+
  geom_text(data=df.both %>% subset(bulk_ds %in% c('ChenSim','PetitprezSim','FinotelloSim','HoekSim','MorandiniSim','AltmanSim') & cell_type == 'all'), 
            aes(label=paste0('R: ',round(cor, 3),'\nRMSE: ', round(RMSE, 3))), x=.03, y=1.13, size=3, hjust=0)+
  labs(color='Celltype')

fig_s2 <- plot_grid(
  fig_s2a + theme(legend.position="none"),
  fig_s2b,
  align = 'h',
  labels = c("A", "B"), 
  label_size = 15, 
  hjust = -1,
  nrow = 2, ncol = 1,
  rel_heights = c(.45, .75)
)

ggsave(plot = fig_s2, filename = '/nfs/proj/omnideconv_benchmarking/omnideconv/benchmark/revision2/visualization/fig2/fig_s2_v2.pdf', width = 12, height = 17)
```

# Fig S3

```{r}
datasets <- c("Morandini", "Finotello", "Hoek", "Altman",'Chen','Petitprez','ChenSim','PetitprezSim','FinotelloSim','HoekSim','MorandiniSim','AltmanSim')

df.heatmap <- df.both |> subset(bulk_ds %in% datasets & cell_type != 'all') |> select(cell_type, method, bulk_ds, cor, RMSE)

df_long <- df.heatmap %>%
  pivot_longer(cols = c(cor, RMSE), names_to = "metric", values_to = "value")
x_levels <- unique(df.heatmap$cell_type)
y_levels <- unique(df.heatmap$method)
df_long <- df_long %>%
  mutate(
    x_num = as.numeric(factor(cell_type, levels = x_levels)),
    y_num = as.numeric(factor(method, levels = rev(y_levels)))  # Reverse for heatmap layout
  )

get_tri_coords <- function(x, y, metric) {
  if (metric == "cor") {
    tibble(
      x_coord = c(x - 0.5, x + 0.5, x - 0.5),
      y_coord = c(y + 0.5, y + 0.5, y - 0.5)
    )
  } else {
    tibble(
      x_coord = c(x + 0.5, x + 0.5, x - 0.5),
      y_coord = c(y + 0.5, y - 0.5, y - 0.5)
    )
  }
}

df_poly <- df_long %>%
  dplyr::mutate(coords = pmap(list(x_num, y_num, metric), get_tri_coords)) %>%
  unnest(coords) %>%
  group_by(cell_type, method, bulk_ds, metric) %>%
  dplyr::mutate(tile_id = cur_group_id()) %>%
  ungroup()

df_labels <- df_long %>%
  mutate(
    label = ifelse(metric == "cor",
                   sprintf("%.2f", value),
                   sprintf("%.2f", value)),
    x_label = x_num + ifelse(metric == "cor", -0.2, 0.2),
    y_label = y_num + ifelse(metric == "cor", 0.2, -0.2)
  )

df_r <- df_poly %>% filter(metric == "cor")
df_rmse <- df_poly %>% filter(metric == "RMSE")

triangle_heatmap <- ggplot() +
  # R triangles
  geom_polygon(data = df_r, aes(x = x_coord, y = y_coord, group = tile_id, fill = value), color = "black") +
  scale_fill_gradient2(mid = 'darkred', low = "black", high = "steelblue", name = "R") +
  new_scale_fill() +
  # RMSE triangles
  geom_polygon(data = df_rmse, aes(x = x_coord, y = y_coord, group = tile_id, fill = value), color = "black") +
  scale_fill_gradient(low = "steelblue", high = "darkred", name = "RMSE") +
  geom_text(data = df_labels, aes(x = x_label, y = y_label, label = label, group = metric), size = 2, color='white') +
  facet_wrap(~bulk_ds, scales = 'free', ncol = 3) +
  theme_bw() +
  theme(
    panel.grid = element_blank(),
    axis.text.x = element_text(angle = 45, hjust = 1)
  ) +
  scale_x_discrete(limits = x_levels) +
  scale_y_discrete(limits = rev(y_levels)) +
  labs(x = "", y = "")

ggsave('/nfs/proj/omnideconv_benchmarking/omnideconv/benchmark/revision2/visualization/fig2/fig_s3_v3.pdf', plot = triangle_heatmap, width = 14, height = 16)
```


diff --git a/visualisation/fig5/fig_5D.R b/visualisation/fig5/fig_5D.R index 0775b2f..7c336e2 100755 --- a/visualisation/fig5/fig_5D.R +++ b/visualisation/fig5/fig_5D.R @@ -9,7 +9,7 @@ bulk_norm <- c('tpm', rep('counts', 2), rep('tpm',5)) # 1: List directories, methods, cell types -unknown.content.deconv.results <- list.files('/vol/omnideconv_results/results_unknown_content', full.names=F, recursive=T) +unknown.content.deconv.results <- list.files('/nfs/data/omnideconv_benchmarking_clean/benchmark_results/results_unkown_content/results_unknown_content/', full.names=F, recursive=T) unknown.content.deconv.results <- unknown.content.deconv.results[grep('replicate', unknown.content.deconv.results)] unknown.content.deconv.results <- unknown.content.deconv.results[grep('lambrechts', unknown.content.deconv.results)] @@ -37,10 +37,10 @@ metadata.table <- metadata.table[metadata.table$unknown_fraction %in% vector.fra #2: Combine these in a unique dataframe data <- NULL for(i in 1:nrow(metadata.table)){ - result <- readRDS(paste0('/vol/omnideconv_results/results_unknown_content/', metadata.table$path[i])) %>% + result <- readRDS(paste0('/nfs/data/omnideconv_benchmarking_clean/benchmark_results/results_unkown_content/results_unknown_content/', metadata.table$path[i])) %>% .$deconvolution %>% as.data.frame() - true.fractions <- readRDS(paste0('/vol/omnideconv_results/results_unknown_content/', metadata.table$path[i])) %>% + true.fractions <- readRDS(paste0('/nfs/data/omnideconv_benchmarking_clean/benchmark_results/results_unkown_content/results_unknown_content/', metadata.table$path[i])) %>% .$true_cell_fractions %>% as.data.frame() %>% .[!(row.names(.) == 'Tumor cells'), ]