Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
270 commits
Select commit Hold shift + click to select a range
dc203cf
add train/infer
Liuy12 Jul 27, 2023
0c43c0d
update params
Liuy12 Jul 27, 2023
8d9ebcf
add .yml
Liuy12 Jul 27, 2023
027117e
update params
Liuy12 Aug 3, 2023
be7baab
update conda path
Liuy12 Aug 11, 2023
629f023
fix conda
Liuy12 Aug 11, 2023
5bb62b3
update preprocess.sh
Liuy12 Aug 11, 2023
d24c498
update preprocess.sh
Liuy12 Aug 11, 2023
326c68d
update preprocess_new.py
Liuy12 Aug 11, 2023
3c6e7bf
update env
Liuy12 Aug 11, 2023
21cfc16
update preproce_new.py
Liuy12 Aug 11, 2023
fcb9d3a
update preproce_new.py
Liuy12 Aug 11, 2023
5ab58dc
update files
Liuy12 Aug 11, 2023
a3a76d7
update params
Liuy12 Aug 11, 2023
212560f
fix params
Liuy12 Aug 11, 2023
2c6e778
update preproce_new.py
Liuy12 Aug 12, 2023
32a50c7
update preprocess_new.py
Liuy12 Aug 12, 2023
30e62fa
update preprocess_new.py
Liuy12 Aug 12, 2023
ff0c86f
update file
Liuy12 Aug 12, 2023
8567eb6
update file
Liuy12 Aug 12, 2023
d50dc04
update file
Liuy12 Aug 12, 2023
fa81f1f
update script
Liuy12 Aug 16, 2023
fc39696
add def
Liuy12 Aug 16, 2023
d6caada
add script
Liuy12 Aug 16, 2023
7a5aabe
update file
Liuy12 Aug 16, 2023
a5e4360
update FNN_new
Liuy12 Aug 16, 2023
09e739f
update FNN
Liuy12 Aug 16, 2023
3d014a5
update params
Liuy12 Aug 16, 2023
8db2812
fix param
Liuy12 Aug 16, 2023
fdf2d8c
fix bug
Liuy12 Aug 16, 2023
9e179ae
add time
Liuy12 Aug 17, 2023
4f4d94f
update def
Liuy12 Aug 17, 2023
9767ff6
update yml
Liuy12 Aug 17, 2023
0588da4
update train.sh
Liuy12 Aug 17, 2023
e6bd115
update train.sh
Liuy12 Aug 17, 2023
c590e88
update train.py
Liuy12 Aug 17, 2023
9c6afe3
update train
Liuy12 Aug 17, 2023
679a2f0
fix bug
Liuy12 Aug 17, 2023
b13eab8
update file
Liuy12 Aug 22, 2023
c7bd594
update file
Liuy12 Aug 22, 2023
0c45b09
use polars
Liuy12 Aug 23, 2023
8419839
update files
Liuy12 Aug 23, 2023
2a767ea
update preprocess
Liuy12 Aug 23, 2023
4564d8e
update infer.sh
Liuy12 Aug 30, 2023
5ed9d0e
PathDSP in candle format (#2)
Liuy12 Aug 30, 2023
1ffad5f
Merge branch 'JDACS4C-IMPROVE:develop' into develop
Liuy12 Oct 26, 2023
6951b09
process author data
Liuy12 Nov 2, 2023
b5d8c57
fix args
Liuy12 Nov 6, 2023
8ece3ff
add infer.sh
Liuy12 Nov 6, 2023
92b7518
update doc
Liuy12 Nov 6, 2023
ba8eb0e
fix path
Liuy12 Nov 6, 2023
91b5ecb
fix conda
Liuy12 Nov 6, 2023
cd7303f
add documentation for container and conda usage (#6)
Liuy12 Nov 6, 2023
17fd698
Merge branch 'JDACS4C-IMPROVE:develop' into develop
Liuy12 Nov 13, 2023
2ea4a3d
use improve repo
Liuy12 Nov 13, 2023
8ee16b9
use improve module
Liuy12 Dec 6, 2023
dc1f3b4
update readme
Liuy12 Dec 6, 2023
a216999
update to use IMPROVE library (#8)
Liuy12 Dec 6, 2023
d8e7222
update csa
Liuy12 Jan 3, 2024
a763877
Merge branch 'develop' into develop
Liuy12 Jan 3, 2024
ea88f9f
update to csa study (#9)
Liuy12 Jan 3, 2024
0544b49
Merge branch 'JDACS4C-IMPROVE:develop' into develop
Liuy12 Jan 3, 2024
e74d203
add dropout
Liuy12 Feb 29, 2024
aabd9aa
update def
Liuy12 Feb 29, 2024
e227345
rebuild image
Liuy12 Mar 4, 2024
d53e43a
update def
Liuy12 Mar 4, 2024
8892530
prepare reformatted model for singularity (#10)
Liuy12 Mar 4, 2024
226a7fa
add cuda device (#15)
Liuy12 Mar 21, 2024
638580c
add cuda device (#16)
Liuy12 Mar 29, 2024
70b27ab
add hpo script for dh
Liuy12 Mar 29, 2024
d5e902d
fix conflicts
Liuy12 Mar 29, 2024
8785d64
Merge pull request #17 from Liuy12/develop
Liuy12 Mar 29, 2024
5c552d5
use multiple gpus
Liuy12 Apr 1, 2024
b5c9f76
Merge pull request #18 from Liuy12/develop
Liuy12 Apr 1, 2024
8e11b6e
update hpo scripts for polaris
Liuy12 Apr 11, 2024
2f45d28
update readme
Liuy12 Apr 11, 2024
5e7e0df
update readme
Liuy12 Apr 12, 2024
9a588d6
update readme
Liuy12 Apr 15, 2024
c55a3d5
add dh singularity scripts
Liuy12 Apr 23, 2024
82773c7
fix gpu device
Liuy12 Apr 23, 2024
86770cd
pass hps to model
Liuy12 Apr 24, 2024
ff1bb80
set priority for cuda_visible_devices
Liuy12 Apr 29, 2024
c4dcb63
set priority for cuda_visible_device
Liuy12 Apr 29, 2024
2c47366
pass hps to singularity
Liuy12 Apr 29, 2024
6477475
update install instruction
Liuy12 May 1, 2024
8ba3a09
update hpo scripts
Liuy12 May 1, 2024
39b9794
remove duplicate cuda_name
Liuy12 May 2, 2024
6d7ed79
fix issues with cuda_visible_devices
Liuy12 May 2, 2024
563ffae
fix issue with cuda_visible_devices
Liuy12 May 2, 2024
333b984
fix cuda_visible_devices issue
Liuy12 May 2, 2024
be69c3b
fix cuda_visible_devices issue
Liuy12 May 2, 2024
b551ea9
update readme
Liuy12 May 7, 2024
e6808fd
update scripts for hpo
Liuy12 May 7, 2024
5403d89
update .gitignore
Liuy12 May 13, 2024
fb95186
update installer script
Liuy12 May 13, 2024
7a2d9fd
update hpo subprocess scripts
Liuy12 May 13, 2024
4373efa
add hpo_scale scripts
Liuy12 May 13, 2024
89119a9
update hpo_scale
Liuy12 May 14, 2024
284c0fb
update readme
Liuy12 May 14, 2024
18a467b
add hpo scale script for singularity
Liuy12 May 14, 2024
5a1f43c
update hpo scripts
Liuy12 May 24, 2024
95e9bc3
update default params
Liuy12 Jun 7, 2024
f3e1631
set epochs to 50
Liuy12 Jun 7, 2024
cb3e34a
update default params values
Liuy12 Jun 7, 2024
e0aedfd
update scaling scripts
Liuy12 Jun 7, 2024
b7e62c8
update readme
Liuy12 Jun 7, 2024
11b93ab
add dropout as hp
Liuy12 Jun 7, 2024
e0d3973
disable random seed
Liuy12 Jun 12, 2024
d389b14
merged deephyper to develop
Liuy12 Jun 12, 2024
32a8a93
add readme for deephyper
Liuy12 Jun 12, 2024
05370f0
Merge pull request #19 from Liuy12/develop
Liuy12 Jun 14, 2024
ae39d4a
preprocess - framework-api changes
nkoussa Aug 19, 2024
c3dd54a
train - framework-api changes
nkoussa Aug 19, 2024
e611190
epochs = 3 for testing
nkoussa Aug 19, 2024
1cbaf32
forgot cfg =
nkoussa Aug 19, 2024
82ce3ac
fix for output_dir
nkoussa Aug 19, 2024
a54a57e
testing bug with splits
nkoussa Aug 19, 2024
3dbe442
testing bug
nkoussa Aug 19, 2024
cbec4aa
extra raw_data
nkoussa Aug 20, 2024
b34b0e6
model_outdir = output_dir
nkoussa Aug 20, 2024
db52a56
ml_data_outdir = input_dir
nkoussa Aug 20, 2024
0ba3d03
train_ml_data_dir to ml_data_dir, etc
nkoussa Aug 20, 2024
278dc14
ml_data_dir to input_dir
nkoussa Aug 20, 2024
7a95323
add data_format to config
nkoussa Aug 20, 2024
6ff3cc6
quotes out of config
nkoussa Aug 20, 2024
c539f14
infer - framework-api changes
nkoussa Aug 21, 2024
69136fd
another input dir
nkoussa Aug 21, 2024
87305c4
update config
nkoussa Aug 21, 2024
2e50fd0
update config
nkoussa Aug 21, 2024
c0bb2a4
updates for I/O and params
nkoussa Aug 29, 2024
ce314e1
readme and setup_improve
nkoussa Aug 30, 2024
3f1b445
move setup_improve
nkoussa Aug 30, 2024
324b9bc
download_csa
nkoussa Aug 30, 2024
7d71aaf
last readme updates
nkoussa Aug 30, 2024
0980b95
imports for model specific stuff
nkoussa Aug 30, 2024
af87906
another import
nkoussa Aug 30, 2024
fbac751
updating params
nkoussa Aug 30, 2024
22e7fee
take out preprocess in train
nkoussa Aug 30, 2024
74fdb9e
imports and take out preprocess
nkoussa Aug 30, 2024
4923cc8
Merge pull request #20 from JDACS4C-IMPROVE/framework-api
nkoussa Sep 5, 2024
da48c2c
Update README.md
nkoussa Sep 5, 2024
b3c7527
updating with latest improvelib changes
nkoussa Sep 5, 2024
f9a84bf
typo in output_dir
nkoussa Sep 6, 2024
e10b460
params in infer
nkoussa Sep 6, 2024
c441551
added csa workflow files
nkoussa Sep 6, 2024
8c4cf48
took out default_model
nkoussa Sep 6, 2024
08fb913
rm " from model_name
nkoussa Sep 6, 2024
05ee3ec
readme, setup, etc
nkoussa Sep 6, 2024
f414554
adjusting csa parameters for test
nkoussa Sep 9, 2024
65b0391
hyperparam defaults added
nkoussa Sep 10, 2024
c363fd1
updated csa_workflow
nkoussa Sep 10, 2024
487312b
fixed readme
nkoussa Sep 10, 2024
6f3e19d
fix author_data in params
nkoussa Sep 10, 2024
280192f
don't return test scores
nkoussa Sep 11, 2024
1266c91
add and change files for csa parsl workflow
Sep 11, 2024
8063bd1
epochs back up
nkoussa Sep 11, 2024
e9c5b7a
update csa_params.ini for full CSA with parsl
Sep 13, 2024
127a9e6
brute force csa
nkoussa Sep 13, 2024
c2db430
bruteforce csa with parameters
nkoussa Sep 15, 2024
96a3e8f
fixed ini
nkoussa Sep 16, 2024
4930b04
split nums
nkoussa Sep 16, 2024
38d5f36
remove build paths
nkoussa Sep 16, 2024
22038ff
print statements
nkoussa Sep 16, 2024
6af6777
bug
nkoussa Sep 16, 2024
391319b
write output
nkoussa Sep 16, 2024
312bf52
print std
nkoussa Sep 16, 2024
336e385
bug
nkoussa Sep 17, 2024
bd6c9f3
outdir
nkoussa Sep 17, 2024
f55dbaf
saves logs
nkoussa Sep 17, 2024
76b797e
all splits
nkoussa Sep 18, 2024
320f6d6
check false
nkoussa Sep 18, 2024
2930191
reserve cuda testing
nkoussa Sep 18, 2024
0aba600
trying to make logs back compatible with python < 3.7
nkoussa Sep 20, 2024
0983abc
updated brute force workflow
nkoussa Sep 20, 2024
c4557e5
Update PathDSP_train_improve.py
nkoussa Sep 25, 2024
0f24f14
Update PathDSP_infer_improve.py
nkoussa Sep 25, 2024
6779091
Update PathDSP_params.txt
nkoussa Sep 25, 2024
4d2dcf5
save stage ydf
nkoussa Sep 26, 2024
8f4a3f2
untransform
nkoussa Sep 30, 2024
e6bc9d6
setup improve
nkoussa Sep 30, 2024
8e0268b
fix cuda name
nkoussa Sep 30, 2024
a58d8a5
to pandas
nkoussa Sep 30, 2024
4cc8d18
remove print
nkoussa Sep 30, 2024
a9662bf
update readme
nkoussa Sep 30, 2024
79e1f14
Merge pull request #21 from JDACS4C-IMPROVE/v010_202409XX
nkoussa Sep 30, 2024
8b64f77
parsl update
nkoussa Oct 15, 2024
e5c9fb7
fixing save ydf
nkoussa Oct 16, 2024
db4da00
fixing save ydf
nkoussa Oct 16, 2024
19236ad
bug
nkoussa Oct 16, 2024
6ff0346
bug
nkoussa Oct 16, 2024
8287965
updated install instructions
nkoussa Oct 29, 2024
75b6dd8
deephyper updates
nkoussa Nov 12, 2024
46584a1
train.sh
nkoussa Nov 14, 2024
d8d477d
updates
nkoussa Nov 14, 2024
9f86340
import time
nkoussa Nov 14, 2024
58a4ead
import Path
nkoussa Nov 14, 2024
5af2c27
rank
nkoussa Nov 14, 2024
e1ce535
prints
nkoussa Nov 14, 2024
81f0219
prints
nkoussa Nov 14, 2024
11f6c7f
moved config
nkoussa Nov 14, 2024
44709dd
fix log dir
nkoussa Nov 15, 2024
4d45ff6
add logger
nkoussa Nov 15, 2024
53a61c3
bug
nkoussa Nov 15, 2024
043a9db
bug
nkoussa Nov 15, 2024
d454fe1
bug fix
nkoussa Nov 15, 2024
efe73fa
log
nkoussa Nov 15, 2024
dfb31a6
path
nkoussa Nov 15, 2024
3d44236
path
nkoussa Nov 15, 2024
a30c6c8
logs
nkoussa Nov 15, 2024
9b00698
env issues
nkoussa Nov 15, 2024
07d8d4b
correct sh script
nkoussa Nov 15, 2024
7e6a9d5
added param for type of val loss since it's no longer recorded as val…
nkoussa Nov 15, 2024
1a1dea2
fix log dir
nkoussa Nov 15, 2024
494946f
bug
nkoussa Nov 15, 2024
8cb0969
results
nkoussa Nov 15, 2024
0c7f5d9
move params
nkoussa Nov 15, 2024
e6efff9
add back sort to results
nkoussa Nov 18, 2024
6370116
got rid of log_dir, write all to output folder
nkoussa Nov 18, 2024
1e092dc
clean up, add max_evals as param
nkoussa Nov 18, 2024
e81f760
bug
nkoussa Nov 18, 2024
3f999df
creates output dir
nkoussa Nov 18, 2024
b60188b
path for output dir
nkoussa Nov 18, 2024
f1d87b3
bugs
nkoussa Nov 18, 2024
13c9416
bug
nkoussa Nov 18, 2024
a34b6ef
interactive session in params
nkoussa Nov 18, 2024
0139542
bug
nkoussa Nov 18, 2024
883158a
inputdir
nkoussa Nov 18, 2024
bc140c9
ml_data_dir, input_dir throws error related to x_data path
nkoussa Nov 18, 2024
d6c7ebd
bug
nkoussa Nov 18, 2024
8dd589e
fix readme, clean up repo
nkoussa Nov 19, 2024
ac1c7e6
Interface scripts for CSA
wilke Nov 19, 2024
d41df9a
Merge branch 'develop' of github.com:JDACS4C-IMPROVE/PathDSP into dev…
wilke Nov 19, 2024
1271f2c
Moved interfaced scripts back
wilke Nov 19, 2024
8804acd
un-hardcoded hyperparams, testing HpProblem
nkoussa Nov 20, 2024
a074fdb
modified shell script to take n hyperparameters
nkoussa Nov 20, 2024
befc4b5
modified run() to take n hyperparameters
nkoussa Nov 20, 2024
e230ca0
bug
nkoussa Nov 20, 2024
e5eb697
testing 3 hps
nkoussa Nov 20, 2024
619c3f1
Added aliases for stage files to work with csa
wilke Nov 25, 2024
d743806
Update PathDSP_preprocess_improve.py
nkoussa Nov 27, 2024
2044d94
Update PathDSP_preprocess_improve.py
nkoussa Dec 2, 2024
aef3a41
Update PathDSP_preprocess_improve.py
nkoussa Dec 2, 2024
3dc67cd
Removing symlinks, creating problems in case insensitive file systems
wilke Dec 3, 2024
dbf1111
Merge pull request #23 from JDACS4C-IMPROVE/wilke/make-happy
wilke Dec 3, 2024
5da39c9
Added backups from original files
wilke Dec 3, 2024
895c137
Merge branch 'develop' of github.com:JDACS4C-IMPROVE/PathDSP into dev…
wilke Dec 3, 2024
c8279d8
Renamed files, back to original name
wilke Dec 3, 2024
5729886
added exp_id
nkoussa Dec 9, 2024
8da37bd
Update README.md
nkoussa Jun 16, 2025
b524636
Update setup_improve.sh
nkoussa Jun 16, 2025
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
20 changes: 20 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
.ipynb_checkpoints/
PathDSP/__pycache__/
__pycache__/
EDA.ipynib
ml_data/
dh_hpo_improve/
dh_hpo_logs/

## gpu utilization
PathDSP_gpu_util_model.txt
gpu_log_strip.txt
gpu_logs.txt
out_models/
train_gpu_util.sh

## image
PathDSP.sif

## log files
dh_hpo_scale_test.*
212 changes: 212 additions & 0 deletions NetPEA.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,212 @@
"""
Implementation of NetPEA: pathway enrichment with networks (Liu, 2017)

Ref: https://www.ncbi.nlm.nih.gov/pmc/articles/PMC5664096/
zscore >1.65, equivalent to p-value=0.05
"""

import os
import sys
import argparse
import numpy as np
import pandas as pd
import multiprocessing as mp
import scipy.stats as scistat
from datetime import datetime

class NetPEA:
"""
:param rwrDf: dataframe with cell by PPI genes
:param pathwayGMT: pathway database in gmt format
:param permutation:
:param seed:
:param threshold:
"""
def __init__(self, rwrPath, pathwayGMT, log_transform=False, permutation=1000, seed=42, n_cpu=5, out_path='./'):
# load data
self.rwr_path = rwrPath #pd.read_csv(rwrDf, header=0, index_col=0, sep="\t")
self.pathway_gmt = pathwayGMT
self.permutation = int(permutation)
self.seed = int(seed)
self.out_path = out_path

# settings
np.random.seed(self.seed)
self.n_cpu = int(n_cpu)
if len(self.rwr_path) < self.n_cpu:
self.n_cpu = len(self.rwr_path)

# prepare pathway genes to save time
print('{:}: collect pathway genes'.format(datetime.now()))
pathway_geneList_dict = self._get_pathway_genes(pathwayGMT) # {pathway: geneList}
# obtain shared genes for calculating score of pathway genes
self.rwrDf = self.rwr_path#pd.read_csv(rwrPath, header=0, index_col=0, sep="\t")
if log_transform == True:
print('log transform input data')
self.rwrDf = np.log(self.rwrDf)
pathway_shareGeneList_dict = self._find_overlaps(self.rwrDf, pathway_geneList_dict) # {pathway: shareGeneList}
# generate random gene list for calculating score of random pathway genes
pathway_randomGeneListList_dict = {}
bg_gene_list = self.rwrDf.columns.tolist() # ppi genes
for pathway, shareGeneList in pathway_shareGeneList_dict.items():
pathway_randomGeneListList_dict.update({pathway:[]})
for p in range(self.permutation):
gene_list = np.random.choice(bg_gene_list, len(shareGeneList)).tolist()
pathway_randomGeneListList_dict[pathway].append(gene_list)
self.pathwayDictList = [pathway_geneList_dict, pathway_shareGeneList_dict, pathway_randomGeneListList_dict]

# call function
self.netpea_parallel(self.rwrDf, self.pathwayDictList, self.n_cpu, self.out_path)

def netpea_parallel(self, rwrDf, pathwayDictList, n_cpu, out_path):
# split dataframe
n_partitions = int(n_cpu)
split_list = np.array_split(rwrDf, n_partitions)
# parallel computing
pool = mp.Pool(int(n_cpu))
df_list = pool.starmap(self.netpea, [(df, pathwayDictList) for df in split_list])
pool.close()
pool.join()
print('{:}: comple {:} dfs'.format(datetime.now(), len(df_list)))
print(df_list[0])

# merge result of all cells and save to file
print('{:}: merge result of all cells and save to file'.format(datetime.now()))
all_cell_zscore_df = pd.concat(df_list, axis=0)
zscore_fname = self.out_path
all_cell_zscore_df.to_csv(zscore_fname, header=True, index=True, sep="\t")
#print(all_cell_zscore_df)


def netpea(self, rwrDf, pathwayDictList):
"""return dataframe with cell by pathway"""
pathway_geneList_dict, pathway_shareGeneList_dict, pathway_randomGeneListList_dict = pathwayDictList
# convert to dataframe with headers=[pathway, #pathway genes, overlap genes]
pathway_df = self._merge_pathway_dict(pathway_geneList_dict, pathway_shareGeneList_dict)
# collect score of random gene list
print('{:}: collect score of random gene list'.format(datetime.now()))
cell_pathway_bgScoreList_dict = {} # dict of dict
for cell in rwrDf.index:
cell_pathway_bgScoreList_dict.update({cell:{}})
# prepare data
rwr_df = rwrDf.loc[cell] # 1 by ppiG dataframe
# append aggregate score for each randomgenelist for each pathway
for pathway, randomGeneListList in pathway_randomGeneListList_dict.items():
bgScoreList = [rwr_df.loc[randomGeneList].mean() for randomGeneList in randomGeneListList]
cell_pathway_bgScoreList_dict[cell].update({pathway:bgScoreList})

# collect score of share gene list
print('{:}: collect score of share gene list'.format(datetime.now()))
cell_pathway_ScoreList_dict = {} # dict of dict
for cell in rwrDf.index:
cell_pathway_ScoreList_dict.update({cell:{}})
# prepare data
rwr_df = rwrDf.loc[cell] # 1 by ppiG dataframe
# append aggregate score for each randomgenelist for each pathway
for pathway, shareGeneList in pathway_shareGeneList_dict.items():
score = rwr_df.loc[shareGeneList].mean()
cell_pathway_ScoreList_dict[cell].update({pathway:score})
# ztest to determin significance
print('{:}: ztest to determin significance'.format(datetime.now()))
zscore_dfs = []
cell_pathway_zscore_dict = {} # collect zscore for each pathway
cell_pathway_ztest_dict = {} # collect zscore and pvalue for each pathway
for cell in rwrDf.index:
cell_pathway_zscore_dict.update({cell:{}})
cell_pathway_ztest_dict.update({cell:{}})
pathway_score_dict = cell_pathway_ScoreList_dict[cell]
pathway_bgList_dict = cell_pathway_bgScoreList_dict[cell]
for pathway in pathway_geneList_dict.keys():
score = pathway_score_dict[pathway]
bgList = pathway_bgList_dict[pathway]
[zscore, pvalue] = self._cal_zscore(score, bgList)
cell_pathway_ztest_dict[cell].update({pathway: [zscore, pvalue]})
cell_pathway_zscore_dict[cell].update({pathway:zscore})
# save per-cell zscore
cell_zscore_df = pd.DataFrame(cell_pathway_zscore_dict[cell], index=[cell])
zscore_dfs.append(cell_zscore_df)
# save per-cell ztest results
cell_bgtest_df = pd.DataFrame(cell_pathway_ztest_dict[cell], index=['zscore', 'pvalue']).T
cell_bgtest_df.index.name = 'pathway'
cell_bgtest_df = cell_bgtest_df.join(pathway_df)
#percell_fname = self.out_path + '.' + cell + '.NetPEA.background_result.txt'
#cell_bgtest_df.to_csv(percell_fname, header=True, index=True, sep="\t")
# merge result of all cells and save to file
#print('{:}: merge result of all cells and save to file'.format(datetime.now()))
all_cell_zscore_df = pd.concat(zscore_dfs, axis=0)
#zscore_fname = self.out_path + '.NetPEA.zscore.txt'
#all_cell_zscore_df.to_csv(zscore_fname, header=True, index=True, sep="\t")

# clear space
pathwayDictList = []
return all_cell_zscore_df

def _merge_pathway_dict(self, pathway_geneList_dict, pathway_shareGeneList_dict):
"""return dataframe with headers = [pathway, #pathway genes, overlap genes]"""
pathway_lenG_dict = {pathway: len(geneList) for pathway, geneList in pathway_geneList_dict.items()}
pathway_strG_dict = {pathway: ",".join(geneList) for pathway, geneList in pathway_shareGeneList_dict.items()}
df1 = pd.DataFrame(pathway_lenG_dict.items(), columns=['pathway', '#pathway genes'])
df2 = pd.DataFrame(pathway_strG_dict.items(), columns=['pathway', 'overlap genes'])
return df1.set_index('pathway').join(df2.set_index('pathway'))

def _find_overlaps(self, rwrDf, pathway_dict):
"""return diction with pathway:geneList"""
# create result dictionary
result_dict = {} #pathway:sharedGeneList
# get ppiGenes
ppi_gene_list = rwrDf.columns.tolist()
# find overlaps
for pathway, geneList in pathway_dict.items():
shareGene_list = sorted(list(set(geneList) & set(ppi_gene_list)))
result_dict.update({pathway:shareGene_list})
return result_dict

def _cal_zscore(self, score, scoreList):
"""return zscore and pvalue by lookup table"""
if np.std(scoreList) != 0:
zscore = (score - np.mean(scoreList) ) / np.std(scoreList)
pvalue = scistat.norm.sf(abs(zscore)) # not pdf
#print('score={:}, scoreList={:}, zscore={:}, pvalue={:}'.format(
# score, scoreList[:10], zscore, pvalue))
else:
zscore, pvalue = np.nan, np.nan
return [zscore, pvalue]

def _cal_similarity_score(self, rwrDf, geneList):
"""return similarity score by taking average of rwr for given geneList"""
return rwrDf.loc[geneList].mean()

def _get_pathway_genes(self, gmt):
"""
Return pathwayStr_geneList_dict

:param fin: file name to pathway in gmt format
:return pathway_dict: dictionary of pathway as key, genelist as values
"""
pathwayStr_geneList_dict = {}
with open(gmt, 'r') as f:
for line in f:
# extract fields
line = line.strip('\n').split('\t')
pathway_str = line[0]
gene_list = line[2:]
# update to dict
pathwayStr_geneList_dict.update({pathway_str:gene_list})
return pathwayStr_geneList_dict

def _df2dict(self, df):
"""return 1 by N dataframe to dictionary of N keys"""
return df.to_dict('records')[0] # keys are column names = gene nams


if __name__ == "__main__":
# timer
datetimeFormat = '%Y-%m-%d %H:%M:%S.%f'
start_time = datetime.now()
rwr_df = 'test.txt' #'/repo4/ytang4/PHD/db/GDSC/processed/GDSC.MUTCNV.STRING.RWR.txt'
pathway_gmt = '/repo4/ytang4/PHD/db/MSigdb/c2.cp.pid.v7.1.symbols.gmt'
# initiate
cell_pathway_df = NetPEA(rwr_df, pathway_gmt, permutation=3, seed=42, n_cpu=5, out_path='./test_netpea/GDSC')
spend = datetime.strptime(str(datetime.now()), datetimeFormat) - datetime.strptime(str(start_time),datetimeFormat)
print( '[Finished in {:}]'.format(spend) )

59 changes: 59 additions & 0 deletions PathDSP.def
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
Bootstrap: docker
From: pytorch/pytorch:2.0.1-cuda11.7-cudnn8-runtime

%labels
MANTAINER Yuanhang Liu

%setup
cp ./src/Singularity_gpu_fix.sh $SINGULARITY_ROOTFS
# add local url of this repository for testing


%environment
export PATH=$PATH:/usr/local/PathDSP
export IMPROVE_MODEL_DIR=/usr/local/PathDSP
export CANDLE_DATA_DIR=/candle_data_dir
export AUTHOR_DATA_DIR=/candle_data_dir
export PYTHONPATH=$PYTHONPATH:/usr/local/IMPROVE/:/usr/local/PathDSP/PathDSP/

%post
apt-get update -y
apt-get install wget -y
apt-get install -y gnupg
apt-key adv --keyserver hkp://keyserver.ubuntu.com:80 --recv-keys F60F4B3D7FA2AF80
apt-key adv --keyserver hkp://keyserver.ubuntu.com:80 --recv-keys A4B469963BF863CC

apt-get install build-essential -y
apt-get install git -y
apt-get install vim -y
apt-get install subversion -y

# install gpu fix and clean up
cd /
chmod +x Singularity_gpu_fix.sh
./Singularity_gpu_fix.sh
rm Singularity_gpu_fix.sh

# these three need to be compiled and linked to the cuda libs.
# at the moment, what works for me is to build these in a
# singularity shell in a sandbox with the --nv flag to singularity set.


# create default internal candle_data_dir, map external candle_data_dir here
mkdir /candle_data_dir

#install python modules and model prerequites
cd /usr/local
git clone https://github.com/JDACS4C-IMPROVE/IMPROVE.git
git clone -b develop https://github.com/JDACS4C-IMPROVE/PathDSP.git
cd PathDSP

# download conda

/opt/conda/bin/conda env create -f environment_082223.yml --prefix /usr/local/conda_envs/PathDSP_env/
#/opt/conda/bin/conda activate PathDSP_env
/usr/local/conda_envs/PathDSP_env/bin/pip install git+https://github.com/ECP-CANDLE/candle_lib@develop

cp *.sh /usr/local/bin
chmod a+x /usr/local/bin/*.sh
chmod a+x /usr/local/PathDSP/*.sh
Loading