diff --git a/human_general_analysis/Try_other_enrichment_methods.ipynb b/human_general_analysis/Try_other_enrichment_methods.ipynb new file mode 100644 index 00000000..f7249216 --- /dev/null +++ b/human_general_analysis/Try_other_enrichment_methods.ipynb @@ -0,0 +1,154 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Name\n", + "\n", + "This notebook plugs in other gene set enrichment methods to demonstrate that our method, SOPHIE, can be inserted into different pipelines and work with other methods" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Using TensorFlow backend.\n", + "/home/alexandra/anaconda3/envs/generic_expression/lib/python3.7/site-packages/matplotlib/__init__.py:886: MatplotlibDeprecationWarning: \n", + "examples.directory is deprecated; in the future, examples will be found relative to the 'datapath' directory.\n", + " \"found relative to the 'datapath' directory.\".format(key))\n" + ] + } + ], + "source": [ + "%load_ext autoreload\n", + "%load_ext rpy2.ipython\n", + "%autoreload 2\n", + "\n", + "import os\n", + "import sys\n", + "import pandas as pd\n", + "import numpy as np\n", + "import pickle\n", + "\n", + "from rpy2.robjects import pandas2ri\n", + "pandas2ri.activate()\n", + "\n", + "from ponyo import utils\n", + "from generic_expression_patterns_modules import calc, process\n", + "\n", + "np.random.seed(123)" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "# Read in config variables\n", + "base_dir = os.path.abspath(os.path.join(os.getcwd(), \"../\"))\n", + "\n", + "config_filename = os.path.abspath(\n", + " os.path.join(base_dir, \"configs\", \"config_human_general.tsv\")\n", + ")\n", + "\n", + "params = utils.read_config(config_filename)" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [], + "source": [ + "# Load params\n", + "local_dir = params[\"local_dir\"]\n", + "project_id = params['project_id']\n", + "hallmark_DB_filename = params[\"pathway_DB_filename\"]" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [], + "source": [ + "# Load DE stats directory\n", + "DE_stats_dir = os.path.join(local_dir, \"DE_stats\")\n", + "\n", + "# Template experiment DE stats\n", + "template_DE_stats_filename = os.path.join(\n", + " DE_stats_dir,\n", + " f\"DE_stats_template_data_{project_id}_real.txt\"\n", + ")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Enrichment methods\n", + "* [ROAST](https://pubmed.ncbi.nlm.nih.gov/20610611/) is available in limma\n", + "* [CAMERA](https://www.ncbi.nlm.nih.gov/pmc/articles/PMC3458527/) is available in limma\n", + "* [GSVA](https://www.ncbi.nlm.nih.gov/pmc/articles/PMC3618321/) its own bioconductor package\n", + "* [ORA]() is available in PathwayStudios or David\n", + "\n", + "TO DO: Write about each method" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [], + "source": [ + "# Define function\n", + "# ORA works on list of DE\n", + "# Apply voom on gene expression >> ROAST, CAMERA, GVSA\n", + "\n", + "# Process data using voom\n", + "\n", + "\n", + "# Run method on template experiments\n", + "# Run method on simulated experiments\n", + "# Output table sort by ranking" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [], + "source": [ + "# Get summary rank of pathways" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python [conda env:generic_expression] *", + "language": "python", + "name": "conda-env-generic_expression-py" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.8" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} diff --git a/human_general_analysis/nbconverted/Try_other_enrichment_methods.py b/human_general_analysis/nbconverted/Try_other_enrichment_methods.py new file mode 100644 index 00000000..a6a59f00 --- /dev/null +++ b/human_general_analysis/nbconverted/Try_other_enrichment_methods.py @@ -0,0 +1,92 @@ +#!/usr/bin/env python +# coding: utf-8 + +# ## Name +# +# This notebook plugs in other gene set enrichment methods to demonstrate that our method, SOPHIE, can be inserted into different pipelines and work with other methods + +# In[1]: + + +get_ipython().run_line_magic('load_ext', 'autoreload') +get_ipython().run_line_magic('load_ext', 'rpy2.ipython') +get_ipython().run_line_magic('autoreload', '2') + +import os +import sys +import pandas as pd +import numpy as np +import pickle + +from rpy2.robjects import pandas2ri +pandas2ri.activate() + +from ponyo import utils +from generic_expression_patterns_modules import calc, process + +np.random.seed(123) + + +# In[2]: + + +# Read in config variables +base_dir = os.path.abspath(os.path.join(os.getcwd(), "../")) + +config_filename = os.path.abspath( + os.path.join(base_dir, "configs", "config_human_general.tsv") +) + +params = utils.read_config(config_filename) + + +# In[3]: + + +# Load params +local_dir = params["local_dir"] +project_id = params['project_id'] +hallmark_DB_filename = params["pathway_DB_filename"] + + +# In[4]: + + +# Load DE stats directory +DE_stats_dir = os.path.join(local_dir, "DE_stats") + +# Template experiment DE stats +template_DE_stats_filename = os.path.join( + DE_stats_dir, + f"DE_stats_template_data_{project_id}_real.txt" +) + + +# ## Enrichment methods +# * [ROAST](https://pubmed.ncbi.nlm.nih.gov/20610611/) is available in limma +# * [CAMERA](https://www.ncbi.nlm.nih.gov/pmc/articles/PMC3458527/) is available in limma +# * [GSVA](https://www.ncbi.nlm.nih.gov/pmc/articles/PMC3618321/) its own bioconductor package +# * [ORA]() is available in PathwayStudios or David +# +# TO DO: Write about each method + +# In[5]: + + +# Define function +# ORA works on list of DE +# Apply voom on gene expression >> ROAST, CAMERA, GVSA + +# Process data using voom + + +# Run method on template experiments +# Run method on simulated experiments +# Output table sort by ranking + + +# In[6]: + + +# Get summary rank of pathways +