From a2ada528788c5dfff8fa7a7f93a8584eef759428 Mon Sep 17 00:00:00 2001 From: Michael Kotliar Date: Fri, 18 Jan 2019 13:48:37 -0500 Subject: [PATCH 01/10] Add CMakeLists.txt file --- CMakeLists.txt | 13 +++++++++++++ 1 file changed, 13 insertions(+) create mode 100644 CMakeLists.txt diff --git a/CMakeLists.txt b/CMakeLists.txt new file mode 100644 index 0000000..71cc5d0 --- /dev/null +++ b/CMakeLists.txt @@ -0,0 +1,13 @@ +cmake_minimum_required(VERSION 3.6) +project(RELI) + +set(CMAKE_CXX_STANDARD 11) +find_package(GSL REQUIRED) + +set(SOURCE_FILES + src/RELI.cpp + src/RELI_impl.cpp + src/RELI_impl.h) + +add_executable(RELI ${SOURCE_FILES}) +target_link_libraries(RELI GSL::gsl) \ No newline at end of file From 84f9a24a3b8b8e9eee8e7840dd48550318bc38cf Mon Sep 17 00:00:00 2001 From: Michael Kotliar Date: Fri, 18 Jan 2019 13:50:01 -0500 Subject: [PATCH 02/10] Minor args refactor --- src/RELI.cpp | 84 ++++++++++++++++++++++++++-------------------------- 1 file changed, 42 insertions(+), 42 deletions(-) diff --git a/src/RELI.cpp b/src/RELI.cpp index d0e63c6..55378a9 100644 --- a/src/RELI.cpp +++ b/src/RELI.cpp @@ -59,37 +59,37 @@ void display_help(){ cout << "Usage: ./RELI [options]" << endl; cout << "OPTIONS are:" << endl; cout << endl; - cout << "-snp FILE: Phenotype snp file in 4 column bed format. [required]" << endl; - cout << "-ld FILE: Phenotype linkage disequilibrium structure for snps, default: no ld file. [optional]" << endl; - cout << "-index FILE: ChIP-seq index file. [required]" << endl; - cout << "-data DIR: Specify directory where ChIP-seq data are stored. [required]" << endl; - cout << "-target STRING: Target label of ChIP-seq experiment to be tested from index file. [required]" << endl; - cout << "-build FILE: Genome build file. [required]" << endl; - cout << "-null FILE: Null model file. [required]" << endl; - cout << "-dbsnp FILE: dbSNP table file. [required]" << endl; - cout << "-out DIR: Specify output directory name under current working folder. [required]" << endl; - cout << "-match: Boolean switch to turn on minor allele frequency based matching, default: off. [optional]" << endl; - cout << "-rep NUMBER: Number of permutation/simulation to be performed, default: 2000. [optional]" << endl; - cout << "-corr NUMBER: Bonferroni correction multiplier for multiple test, default: 1 [optional]" << endl; - cout << "-phenotype STRING: User provided phenotype name, default: \".\". [optional]" << endl; - cout << "-ancestry STRING: User provided ancestry name, default: \".\". [optional]" << endl; + cout << "--snp FILE: Phenotype snp file in 4 column bed format. [required]" << endl; + cout << "--ld FILE: Phenotype linkage disequilibrium structure for snps, default: no ld file. [optional]" << endl; + cout << "--index FILE: ChIP-seq index file. [required]" << endl; + cout << "--data DIR: Specify directory where ChIP-seq data are stored. [required]" << endl; + cout << "--target STRING: Target label of ChIP-seq experiment to be tested from index file. [required]" << endl; + cout << "--build FILE: Genome build file. [required]" << endl; + cout << "--null FILE: Null model file. [required]" << endl; + cout << "--dbsnp FILE: dbSNP table file. [required]" << endl; + cout << "--out DIR: Specify output directory name under current working folder. [required]" << endl; + cout << "--match: Boolean switch to turn on minor allele frequency based matching, default: off. [optional]" << endl; + cout << "--rep NUMBER: Number of permutation/simulation to be performed, default: 2000. [optional]" << endl; + cout << "--corr NUMBER: Bonferroni correction multiplier for multiple test, default: 1 [optional]" << endl; + cout << "--phenotype STRING: User provided phenotype name, default: \".\". [optional]" << endl; + cout << "--ancestry STRING: User provided ancestry name, default: \".\". [optional]" << endl; cout << endl; cout << "EXAMPLE:" << endl; cout << "../script/RELI \\" << endl; - cout << "-snp SLE_EU.snp \\" << endl; - cout << "-ld SLE_EU.ld \\" << endl; - cout << "-index ../data/ChIPseq.index \\" << endl; - cout << "-data ../data/ChIP-seq \\" << endl; - cout << "-target hg19_0302 \\" << endl; - cout << "-build ../data/GenomeBuild/hg19.txt \\" << endl; - cout << "-null ../data/Null/CommonSNP_MAFmatch \\" << endl; - cout << "-dbsnp ../data/SNPtable/SNPtable \\" << endl; - cout << "-out Output \\" << endl; - cout << "-match \\" << endl; - cout << "-rep 2000 \\" << endl; - cout << "-corr 1544 \\" << endl; - cout << "-phenotype Systemic_Lupus_Erythematosus \\" << endl; - cout << "-ancestry EU " << endl; + cout << "--snp SLE_EU.snp \\" << endl; + cout << "--ld SLE_EU.ld \\" << endl; + cout << "--index ../data/ChIPseq.index \\" << endl; + cout << "--data ../data/ChIP-seq \\" << endl; + cout << "--target hg19_0302 \\" << endl; + cout << "--build ../data/GenomeBuild/hg19.txt \\" << endl; + cout << "--null ../data/Null/CommonSNP_MAFmatch \\" << endl; + cout << "--dbsnp ../data/SNPtable/SNPtable \\" << endl; + cout << "--out Output \\" << endl; + cout << "--match \\" << endl; + cout << "--rep 2000 \\" << endl; + cout << "--corr 1544 \\" << endl; + cout << "--phenotype Systemic_Lupus_Erythematosus \\" << endl; + cout << "--ancestry EU " << endl; exit(-1); } @@ -101,57 +101,57 @@ int main(int argc, char* argv[]){ */ RELI::RELIobj *RELIinstance = new RELI::RELIobj; for (auto i = 1; i < argc; ++i){ - if (strcmp(argv[i], "-null") == 0){ // null model + if (strcmp(argv[i], "--null") == 0){ // null model RELIinstance->public_ver_null_fname = argv[i + 1]; RELIinstance->flag_null_file = true; } - if (strcmp(argv[i], "-dbsnp") == 0){ // dbSNP table that contains MAF allele frequency info + if (strcmp(argv[i], "--dbsnp") == 0){ // dbSNP table that contains MAF allele frequency info RELIinstance->public_ver_snp_table_fname = argv[i + 1]; RELIinstance->flag_dbsnp_table = true; } - if (strcmp(argv[i], "-match") == 0){ // snp matching indicator + if (strcmp(argv[i], "--match") == 0){ // snp matching indicator RELI::snp_matching = true; } - if (strcmp(argv[i], "-snp") == 0){ // phenotype snp file + if (strcmp(argv[i], "--snp") == 0){ // phenotype snp file RELIinstance->public_ver_snp_fname = argv[i + 1]; RELIinstance->flag_input_snp = true; } - if (strcmp(argv[i], "-ld") == 0){ // phenotype ld file + if (strcmp(argv[i], "--ld") == 0){ // phenotype ld file RELI::ldfile = argv[i + 1]; RELI::ldfile_flag = true; RELIinstance->flag_ld_file = true; } - if (strcmp(argv[i], "-data") == 0){ // directory of target ChIP-seq files + if (strcmp(argv[i], "--data") == 0){ // directory of target ChIP-seq files RELIinstance->public_ver_data_dir= argv[i + 1]; RELIinstance->flag_chipseq_data_dir = true; } - if (strcmp(argv[i], "-out") == 0){ // output directory + if (strcmp(argv[i], "--out") == 0){ // output directory RELIinstance->public_ver_output_dir = argv[i + 1]; RELIinstance->flag_output_dir = true; } - if (strcmp(argv[i], "-rep") == 0){ // replication number + if (strcmp(argv[i], "--rep") == 0){ // replication number RELI::repmax = atoi(argv[i + 1]); } - if (strcmp(argv[i], "-corr") == 0){ // bonferroni correction multiplier + if (strcmp(argv[i], "--corr") == 0){ // bonferroni correction multiplier RELI::corr_muliplier = atof(argv[i + 1]); } - if (strcmp(argv[i], "-target") == 0){ // string corresponding to target ChIP-seq data + if (strcmp(argv[i], "--target") == 0){ // string corresponding to target ChIP-seq data RELIinstance->public_ver_target_label = argv[i + 1]; RELIinstance->flag_target_label = true; } - if (strcmp(argv[i], "-index") == 0){ // ChIP-seq index file + if (strcmp(argv[i], "--index") == 0){ // ChIP-seq index file RELIinstance->public_ver_data_index_fname = argv[i + 1]; RELIinstance->flag_chipseq_data_index = true; } - if (strcmp(argv[i], "-build") == 0){ // genome build file + if (strcmp(argv[i], "--build") == 0){ // genome build file RELI::species_chr_mapping_file = argv[i + 1]; RELI::using_default_species = false; RELIinstance->flag_genome_build = true; } - if (strcmp(argv[i], "-phenotype") == 0){ // phenotype name + if (strcmp(argv[i], "--phenotype") == 0){ // phenotype name RELIinstance->public_ver_phenotype_name = argv[i + 1]; } - if (strcmp(argv[i], "-ancestry") == 0){ // phenotype ancestry + if (strcmp(argv[i], "--ancestry") == 0){ // phenotype ancestry RELIinstance->public_ver_ancestry_name= argv[i + 1]; } } From 7cccc750ce530abccb15b48978d80120b0ad51e8 Mon Sep 17 00:00:00 2001 From: Michael Kotliar Date: Fri, 18 Jan 2019 16:12:14 -0500 Subject: [PATCH 03/10] Allow to set file path in --target If --target points to the file, --index and --data are ignored. Output filename is fixed to RELI.stats and RELI.overlaps --- src/RELI.cpp | 16 ++++++--- src/RELI_impl.cpp | 91 ++++++++++++++++++++++++++--------------------- 2 files changed, 62 insertions(+), 45 deletions(-) diff --git a/src/RELI.cpp b/src/RELI.cpp index 55378a9..03e3c2c 100644 --- a/src/RELI.cpp +++ b/src/RELI.cpp @@ -39,6 +39,7 @@ along with this program. If not, see . #include #include #include +#include #include "RELI_impl.h" using namespace std; @@ -61,9 +62,9 @@ void display_help(){ cout << endl; cout << "--snp FILE: Phenotype snp file in 4 column bed format. [required]" << endl; cout << "--ld FILE: Phenotype linkage disequilibrium structure for snps, default: no ld file. [optional]" << endl; - cout << "--index FILE: ChIP-seq index file. [required]" << endl; - cout << "--data DIR: Specify directory where ChIP-seq data are stored. [required]" << endl; - cout << "--target STRING: Target label of ChIP-seq experiment to be tested from index file. [required]" << endl; + cout << "--index FILE: ChIP-seq index file. [required if --target points to the label]" << endl; + cout << "--data DIR: Specify directory where ChIP-seq data are stored. [required if --target points to the label]" << endl; + cout << "--target STRING: Target label of ChIP-seq experiment to be tested from index file. Or path to the file [required]" << endl; cout << "--build FILE: Genome build file. [required]" << endl; cout << "--null FILE: Null model file. [required]" << endl; cout << "--dbsnp FILE: dbSNP table file. [required]" << endl; @@ -135,8 +136,15 @@ int main(int argc, char* argv[]){ if (strcmp(argv[i], "--corr") == 0){ // bonferroni correction multiplier RELI::corr_muliplier = atof(argv[i + 1]); } - if (strcmp(argv[i], "--target") == 0){ // string corresponding to target ChIP-seq data + if (strcmp(argv[i], "--target") == 0){ // string corresponding to target ChIP-seq data or path to file RELIinstance->public_ver_target_label = argv[i + 1]; + if (access(RELIinstance->public_ver_target_label.c_str(), F_OK ) != -1 ) { + // if --target points to the file, --data and --index are not required anymore and should be set to True + RELIinstance->flag_chipseq_data_dir = true; + RELIinstance->flag_chipseq_data_index = true; + RELIinstance->public_ver_data_dir = "Ignored"; + RELIinstance->public_ver_data_index_fname = "Ignored"; + } RELIinstance->flag_target_label = true; } if (strcmp(argv[i], "--index") == 0){ // ChIP-seq index file diff --git a/src/RELI_impl.cpp b/src/RELI_impl.cpp index 43fe6de..62f4449 100644 --- a/src/RELI_impl.cpp +++ b/src/RELI_impl.cpp @@ -23,6 +23,7 @@ #include #include #include +#include #include "RELI_impl.h" #include // for mkdir and mkdirat (requires kernel >= 2.6.16) @@ -960,57 +961,65 @@ double RELI::binomial_pvalue_appr(int _total, int _overlap, double _prob){ return gsl_cdf_ugaussian_Q((_overlap - _total*_prob) / sqrt(_total*_prob*(1 - _prob))); } void RELI::RELIobj::public_ver_read_data_index(){ - ifstream in; - in.open(this->public_ver_data_index_fname.c_str()); - if (!in){ - cerr << "cannot open data index file, please check with option -index" << endl; - exit(-1); - } - in.ignore(bufferSize, '\n'); - while (!in.eof()){ - in.getline(bufferChar, bufferSize); - in.peek(); - buffer = bufferChar; - auto linevec = linehandler(buffer); + if (access(this->public_ver_target_label.c_str(), F_OK ) != -1 ) { + cout << "Skip loading chip-seq index file, because --target points to file" << endl; + } else { + ifstream in; + in.open(this->public_ver_data_index_fname.c_str()); + if (!in){ + cerr << "cannot open data index file, please check with option -index" << endl; + exit(-1); + } + in.ignore(bufferSize, '\n'); + while (!in.eof()){ + in.getline(bufferChar, bufferSize); + in.peek(); + buffer = bufferChar; + auto linevec = linehandler(buffer); - RELI::data_index t; - t.datalabel = linevec.at(0); - t.source = linevec.at(1); - t.cell = linevec.at(2); - t.tf = linevec.at(3); - t.cell_label = linevec.at(4); - t.pmid = linevec.at(5); - t.group= linevec.at(6); - t.ebv_status = linevec.at(7); - t.species = linevec.at(8); - - this->dataindexvec.push_back(t); - } - in.close(); - cout << "chip-seq index file loaded." << endl; + RELI::data_index t; + t.datalabel = linevec.at(0); + t.source = linevec.at(1); + t.cell = linevec.at(2); + t.tf = linevec.at(3); + t.cell_label = linevec.at(4); + t.pmid = linevec.at(5); + t.group= linevec.at(6); + t.ebv_status = linevec.at(7); + t.species = linevec.at(8); + + this->dataindexvec.push_back(t); + } + in.close(); + cout << "chip-seq index file loaded." << endl; + } } void RELI::RELIobj::public_ver_set_target_data(){ - auto k = find(this->dataindexvec.begin(), - this->dataindexvec.end(), - this->public_ver_target_label); - if (k != this->dataindexvec.end()){ - this->public_ver_selected_data_index = *k; + if (access(this->public_ver_target_label.c_str(), F_OK ) != -1 ) { + this->public_ver_target_data_fname = this->public_ver_target_label; } - else{ - cerr << "cannot find corresponding data entry in the index file, exiting." - << endl; - exit(-1); + else { + auto k = find(this->dataindexvec.begin(), + this->dataindexvec.end(), + this->public_ver_target_label); + if (k != this->dataindexvec.end()){ + this->public_ver_selected_data_index = *k; + } + else{ + cerr << "cannot find corresponding data entry in the index file, exiting." + << endl; + exit(-1); + } + this->public_ver_target_data_fname = this->public_ver_data_dir + "/" + this->public_ver_target_label; } - this->public_ver_target_data_fname = this->public_ver_data_dir + "/" - + this->public_ver_target_label; cout << "target ChIP-seq file set." << endl; } void RELI::RELIobj::public_ver_read_null(){ ifstream in; } bool RELI::RELIobj::minimum_check(){ - this->public_ver_output_fname = this->public_ver_output_dir + "/" + this->public_ver_target_label + ".RELI.stats"; - this->public_ver_output_fname_overlaps = this->public_ver_output_dir + "/" + this->public_ver_target_label + ".RELI.overlaps"; + this->public_ver_output_fname = this->public_ver_output_dir + "/RELI.stats"; + this->public_ver_output_fname_overlaps = this->public_ver_output_dir + "/RELI.overlaps"; cout << "Start Regulatory Element Locus Intersection (RELI) analysis." << endl; cout << "Running arguments: " << endl; @@ -1019,7 +1028,7 @@ bool RELI::RELIobj::minimum_check(){ cout << "3) SNP matching mode: " << RELI::snp_matching << endl; cout << "4) null model file: " << this->public_ver_null_fname << endl; cout << "5) dbSNP table file: " << this->public_ver_snp_table_fname << endl; - cout << "6) target chip-seq label: " << this->public_ver_target_label << endl; + cout << "6) target chip-seq label / file: " << this->public_ver_target_label << endl; cout << "7) chip-seq index file: " << this->public_ver_data_index_fname << endl; cout << "8) chip-seq data dir: " << this->public_ver_data_dir << endl; cout << "9) output dir name: " << this->public_ver_output_dir << endl; From 6921ec84117f4c7072737ac9d898eba3fa1b48e5 Mon Sep 17 00:00:00 2001 From: Michael Kotliar Date: Fri, 18 Jan 2019 16:32:30 -0500 Subject: [PATCH 04/10] Updated Dockerfile to point to specific version --- Dockerfile | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/Dockerfile b/Dockerfile index 6877e26..701a265 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,10 +1,14 @@ # ================================================================ # # Dockerfile for RELI (public release) -# +# Version: 0.0.2 # Author: Kevin Ernst # Date: 25 July 2018 # +# Build: docker build --rm -t weirauchlab/reli:v0.0.2 -f Dockerfile . +# Pull: docker pull weirauchlab/reli:v0.0.2 +# Run: docker run --rm -ti weirauchlab/reli:v0.0.2 +# # ================================================================ # start with https://store.docker.com/images/centos From 2cb7235c10c030f704e176e48171afe7e354ec50 Mon Sep 17 00:00:00 2001 From: Michael Kotliar Date: Fri, 18 Jan 2019 17:00:34 -0500 Subject: [PATCH 05/10] Unimportant changes --- src/RELI_impl.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/RELI_impl.cpp b/src/RELI_impl.cpp index 62f4449..172f4e9 100644 --- a/src/RELI_impl.cpp +++ b/src/RELI_impl.cpp @@ -1065,7 +1065,7 @@ void RELI::MAF_binned_null_model::loading_null_data(string rhs){ RELI::nullmodelinfilename = rhs; in.open(RELI::nullmodelinfilename.c_str()); if (!in){ - cerr << "cannot load selected null model, check with option -null : " + cerr << "cannot load selected null model, check with option --null : " << RELI::nullmodelinfilename << endl; exit(-1); } @@ -1089,7 +1089,7 @@ void RELI::MAF_binned_null_model::loading_null_data(string rhs){ void RELI::loadSnpFile(string rhs){ ifstream in; in.open(rhs); - if (!in){ cout << "cannot load phenotype snp file, check with option -snp_file: " <public_ver_snp_table_fname.c_str()); if (!in){ - cerr << "cannot load snp table, please check with option -index " + cerr << "cannot load snp table, please check with option --index " << this->public_ver_snp_table_fname << endl; exit(-1); } From 1952f7dbb0dd5ead989fe6feabb9a66c953a2c84 Mon Sep 17 00:00:00 2001 From: Michael Kotliar Date: Fri, 18 Jan 2019 17:02:05 -0500 Subject: [PATCH 06/10] Unimportant changes --- src/RELI_impl.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/RELI_impl.cpp b/src/RELI_impl.cpp index 172f4e9..3423792 100644 --- a/src/RELI_impl.cpp +++ b/src/RELI_impl.cpp @@ -1132,7 +1132,7 @@ void RELI::RELIobj::load_snp_table(){ ifstream in; in.open(this->public_ver_snp_table_fname.c_str()); if (!in){ - cerr << "cannot load snp table, please check with option --index " + cerr << "cannot load snp table, please check with option --dbsnp " << this->public_ver_snp_table_fname << endl; exit(-1); } From 497def5aa27cb341273267b66fba32c5241eb4f7 Mon Sep 17 00:00:00 2001 From: Michael Kotliar Date: Mon, 4 Feb 2019 15:17:01 -0500 Subject: [PATCH 07/10] Return to one dash arguments --- src/RELI.cpp | 86 +++++++++++++++++++++++------------------------ src/RELI_impl.cpp | 8 ++--- 2 files changed, 47 insertions(+), 47 deletions(-) diff --git a/src/RELI.cpp b/src/RELI.cpp index 03e3c2c..04320d7 100644 --- a/src/RELI.cpp +++ b/src/RELI.cpp @@ -60,37 +60,37 @@ void display_help(){ cout << "Usage: ./RELI [options]" << endl; cout << "OPTIONS are:" << endl; cout << endl; - cout << "--snp FILE: Phenotype snp file in 4 column bed format. [required]" << endl; - cout << "--ld FILE: Phenotype linkage disequilibrium structure for snps, default: no ld file. [optional]" << endl; - cout << "--index FILE: ChIP-seq index file. [required if --target points to the label]" << endl; - cout << "--data DIR: Specify directory where ChIP-seq data are stored. [required if --target points to the label]" << endl; - cout << "--target STRING: Target label of ChIP-seq experiment to be tested from index file. Or path to the file [required]" << endl; - cout << "--build FILE: Genome build file. [required]" << endl; - cout << "--null FILE: Null model file. [required]" << endl; - cout << "--dbsnp FILE: dbSNP table file. [required]" << endl; - cout << "--out DIR: Specify output directory name under current working folder. [required]" << endl; - cout << "--match: Boolean switch to turn on minor allele frequency based matching, default: off. [optional]" << endl; - cout << "--rep NUMBER: Number of permutation/simulation to be performed, default: 2000. [optional]" << endl; - cout << "--corr NUMBER: Bonferroni correction multiplier for multiple test, default: 1 [optional]" << endl; - cout << "--phenotype STRING: User provided phenotype name, default: \".\". [optional]" << endl; - cout << "--ancestry STRING: User provided ancestry name, default: \".\". [optional]" << endl; + cout << "-snp FILE: Phenotype snp file in 4 column bed format. [required]" << endl; + cout << "-ld FILE: Phenotype linkage disequilibrium structure for snps, default: no ld file. [optional]" << endl; + cout << "-index FILE: ChIP-seq index file. [required if -target points to the label]" << endl; + cout << "-data DIR: Specify directory where ChIP-seq data are stored. [required if -target points to the label]" << endl; + cout << "-target STRING: Target label of ChIP-seq experiment to be tested from index file. Or path to the file [required]" << endl; + cout << "-build FILE: Genome build file. [required]" << endl; + cout << "-null FILE: Null model file. [required]" << endl; + cout << "-dbsnp FILE: dbSNP table file. [required]" << endl; + cout << "-out DIR: Specify output directory name under current working folder. [required]" << endl; + cout << "-match: Boolean switch to turn on minor allele frequency based matching, default: off. [optional]" << endl; + cout << "-rep NUMBER: Number of permutation/simulation to be performed, default: 2000. [optional]" << endl; + cout << "-corr NUMBER: Bonferroni correction multiplier for multiple test, default: 1 [optional]" << endl; + cout << "-phenotype STRING: User provided phenotype name, default: \".\". [optional]" << endl; + cout << "-ancestry STRING: User provided ancestry name, default: \".\". [optional]" << endl; cout << endl; cout << "EXAMPLE:" << endl; cout << "../script/RELI \\" << endl; - cout << "--snp SLE_EU.snp \\" << endl; - cout << "--ld SLE_EU.ld \\" << endl; - cout << "--index ../data/ChIPseq.index \\" << endl; - cout << "--data ../data/ChIP-seq \\" << endl; - cout << "--target hg19_0302 \\" << endl; - cout << "--build ../data/GenomeBuild/hg19.txt \\" << endl; - cout << "--null ../data/Null/CommonSNP_MAFmatch \\" << endl; - cout << "--dbsnp ../data/SNPtable/SNPtable \\" << endl; - cout << "--out Output \\" << endl; - cout << "--match \\" << endl; - cout << "--rep 2000 \\" << endl; - cout << "--corr 1544 \\" << endl; - cout << "--phenotype Systemic_Lupus_Erythematosus \\" << endl; - cout << "--ancestry EU " << endl; + cout << "-snp SLE_EU.snp \\" << endl; + cout << "-ld SLE_EU.ld \\" << endl; + cout << "-index ../data/ChIPseq.index \\" << endl; + cout << "-data ../data/ChIP-seq \\" << endl; + cout << "-target hg19_0302 \\" << endl; + cout << "-build ../data/GenomeBuild/hg19.txt \\" << endl; + cout << "-null ../data/Null/CommonSNP_MAFmatch \\" << endl; + cout << "-dbsnp ../data/SNPtable/SNPtable \\" << endl; + cout << "-out Output \\" << endl; + cout << "-match \\" << endl; + cout << "-rep 2000 \\" << endl; + cout << "-corr 1544 \\" << endl; + cout << "-phenotype Systemic_Lupus_Erythematosus \\" << endl; + cout << "-ancestry EU " << endl; exit(-1); } @@ -102,44 +102,44 @@ int main(int argc, char* argv[]){ */ RELI::RELIobj *RELIinstance = new RELI::RELIobj; for (auto i = 1; i < argc; ++i){ - if (strcmp(argv[i], "--null") == 0){ // null model + if (strcmp(argv[i], "-null") == 0){ // null model RELIinstance->public_ver_null_fname = argv[i + 1]; RELIinstance->flag_null_file = true; } - if (strcmp(argv[i], "--dbsnp") == 0){ // dbSNP table that contains MAF allele frequency info + if (strcmp(argv[i], "-dbsnp") == 0){ // dbSNP table that contains MAF allele frequency info RELIinstance->public_ver_snp_table_fname = argv[i + 1]; RELIinstance->flag_dbsnp_table = true; } - if (strcmp(argv[i], "--match") == 0){ // snp matching indicator + if (strcmp(argv[i], "-match") == 0){ // snp matching indicator RELI::snp_matching = true; } - if (strcmp(argv[i], "--snp") == 0){ // phenotype snp file + if (strcmp(argv[i], "-snp") == 0){ // phenotype snp file RELIinstance->public_ver_snp_fname = argv[i + 1]; RELIinstance->flag_input_snp = true; } - if (strcmp(argv[i], "--ld") == 0){ // phenotype ld file + if (strcmp(argv[i], "-ld") == 0){ // phenotype ld file RELI::ldfile = argv[i + 1]; RELI::ldfile_flag = true; RELIinstance->flag_ld_file = true; } - if (strcmp(argv[i], "--data") == 0){ // directory of target ChIP-seq files + if (strcmp(argv[i], "-data") == 0){ // directory of target ChIP-seq files RELIinstance->public_ver_data_dir= argv[i + 1]; RELIinstance->flag_chipseq_data_dir = true; } - if (strcmp(argv[i], "--out") == 0){ // output directory + if (strcmp(argv[i], "-out") == 0){ // output directory RELIinstance->public_ver_output_dir = argv[i + 1]; RELIinstance->flag_output_dir = true; } - if (strcmp(argv[i], "--rep") == 0){ // replication number + if (strcmp(argv[i], "-rep") == 0){ // replication number RELI::repmax = atoi(argv[i + 1]); } - if (strcmp(argv[i], "--corr") == 0){ // bonferroni correction multiplier + if (strcmp(argv[i], "-corr") == 0){ // bonferroni correction multiplier RELI::corr_muliplier = atof(argv[i + 1]); } - if (strcmp(argv[i], "--target") == 0){ // string corresponding to target ChIP-seq data or path to file + if (strcmp(argv[i], "-target") == 0){ // string corresponding to target ChIP-seq data or path to file RELIinstance->public_ver_target_label = argv[i + 1]; if (access(RELIinstance->public_ver_target_label.c_str(), F_OK ) != -1 ) { - // if --target points to the file, --data and --index are not required anymore and should be set to True + // if -target points to the file, -data and -index are not required anymore and should be set to True RELIinstance->flag_chipseq_data_dir = true; RELIinstance->flag_chipseq_data_index = true; RELIinstance->public_ver_data_dir = "Ignored"; @@ -147,19 +147,19 @@ int main(int argc, char* argv[]){ } RELIinstance->flag_target_label = true; } - if (strcmp(argv[i], "--index") == 0){ // ChIP-seq index file + if (strcmp(argv[i], "-index") == 0){ // ChIP-seq index file RELIinstance->public_ver_data_index_fname = argv[i + 1]; RELIinstance->flag_chipseq_data_index = true; } - if (strcmp(argv[i], "--build") == 0){ // genome build file + if (strcmp(argv[i], "-build") == 0){ // genome build file RELI::species_chr_mapping_file = argv[i + 1]; RELI::using_default_species = false; RELIinstance->flag_genome_build = true; } - if (strcmp(argv[i], "--phenotype") == 0){ // phenotype name + if (strcmp(argv[i], "-phenotype") == 0){ // phenotype name RELIinstance->public_ver_phenotype_name = argv[i + 1]; } - if (strcmp(argv[i], "--ancestry") == 0){ // phenotype ancestry + if (strcmp(argv[i], "-ancestry") == 0){ // phenotype ancestry RELIinstance->public_ver_ancestry_name= argv[i + 1]; } } diff --git a/src/RELI_impl.cpp b/src/RELI_impl.cpp index 3423792..ebf2099 100644 --- a/src/RELI_impl.cpp +++ b/src/RELI_impl.cpp @@ -962,7 +962,7 @@ double RELI::binomial_pvalue_appr(int _total, int _overlap, double _prob){ } void RELI::RELIobj::public_ver_read_data_index(){ if (access(this->public_ver_target_label.c_str(), F_OK ) != -1 ) { - cout << "Skip loading chip-seq index file, because --target points to file" << endl; + cout << "Skip loading chip-seq index file, because -target points to file" << endl; } else { ifstream in; in.open(this->public_ver_data_index_fname.c_str()); @@ -1065,7 +1065,7 @@ void RELI::MAF_binned_null_model::loading_null_data(string rhs){ RELI::nullmodelinfilename = rhs; in.open(RELI::nullmodelinfilename.c_str()); if (!in){ - cerr << "cannot load selected null model, check with option --null : " + cerr << "cannot load selected null model, check with option -null : " << RELI::nullmodelinfilename << endl; exit(-1); } @@ -1089,7 +1089,7 @@ void RELI::MAF_binned_null_model::loading_null_data(string rhs){ void RELI::loadSnpFile(string rhs){ ifstream in; in.open(rhs); - if (!in){ cout << "cannot load phenotype snp file, check with option --snp_file: " <public_ver_snp_table_fname.c_str()); if (!in){ - cerr << "cannot load snp table, please check with option --dbsnp " + cerr << "cannot load snp table, please check with option -dbsnp " << this->public_ver_snp_table_fname << endl; exit(-1); } From 1cd21c65a5fa0cfadf4ff8fc74bc953c4b2d19cf Mon Sep 17 00:00:00 2001 From: Michael Kotliar Date: Mon, 4 Feb 2019 15:35:33 -0500 Subject: [PATCH 08/10] Decreased number of layers in Dockerfile --- Dockerfile | 19 ++++++++----------- 1 file changed, 8 insertions(+), 11 deletions(-) diff --git a/Dockerfile b/Dockerfile index 701a265..12d7bfc 100644 --- a/Dockerfile +++ b/Dockerfile @@ -14,20 +14,17 @@ # start with https://store.docker.com/images/centos FROM alpine -# install prerequisite development packages -RUN apk update -RUN apk add g++ make gsl gsl-dev bzip2 bash which - -RUN mkdir -p /reli/src WORKDIR /reli +ENV PATH $PATH:/reli + COPY src src/ COPY Makefile . +# install prerequisite development packages # build RELI from source -RUN make -RUN ln -s RELI reli -ENV PATH $PATH:/reli - # Remove unnecessary packages pulled in as dependencies of g++ -RUN apk del g++ -RUN apk add libstdc++ libgcc +RUN apk update &&\ + apk add g++ make gsl gsl-dev bzip2 bash which libstdc++ libgcc &&\ + make &&\ + ln -s RELI reli &&\ + apk del g++ \ No newline at end of file From 695b68e559a3c6bf11718c6e266d8d2be9e18956 Mon Sep 17 00:00:00 2001 From: Michael Kotliar Date: Mon, 4 Feb 2019 16:10:10 -0500 Subject: [PATCH 09/10] Add -prefix argument If -prefix is not specified, use RELI --- src/RELI.cpp | 5 +++++ src/RELI_impl.cpp | 21 +++++++++++++-------- src/RELI_impl.h | 3 +++ 3 files changed, 21 insertions(+), 8 deletions(-) diff --git a/src/RELI.cpp b/src/RELI.cpp index 04320d7..29d71e9 100644 --- a/src/RELI.cpp +++ b/src/RELI.cpp @@ -69,6 +69,7 @@ void display_help(){ cout << "-null FILE: Null model file. [required]" << endl; cout << "-dbsnp FILE: dbSNP table file. [required]" << endl; cout << "-out DIR: Specify output directory name under current working folder. [required]" << endl; + cout << "-prefix STRING: Specify output files prefix. [optional]" << endl; cout << "-match: Boolean switch to turn on minor allele frequency based matching, default: off. [optional]" << endl; cout << "-rep NUMBER: Number of permutation/simulation to be performed, default: 2000. [optional]" << endl; cout << "-corr NUMBER: Bonferroni correction multiplier for multiple test, default: 1 [optional]" << endl; @@ -130,6 +131,10 @@ int main(int argc, char* argv[]){ RELIinstance->public_ver_output_dir = argv[i + 1]; RELIinstance->flag_output_dir = true; } + if (strcmp(argv[i], "-prefix") == 0){ // output file prefix + RELIinstance->public_ver_output_prefix = argv[i + 1]; + RELIinstance->flag_output_prefix = true; + } if (strcmp(argv[i], "-rep") == 0){ // replication number RELI::repmax = atoi(argv[i + 1]); } diff --git a/src/RELI_impl.cpp b/src/RELI_impl.cpp index ebf2099..d6d5a1d 100644 --- a/src/RELI_impl.cpp +++ b/src/RELI_impl.cpp @@ -1018,8 +1018,12 @@ void RELI::RELIobj::public_ver_read_null(){ ifstream in; } bool RELI::RELIobj::minimum_check(){ - this->public_ver_output_fname = this->public_ver_output_dir + "/RELI.stats"; - this->public_ver_output_fname_overlaps = this->public_ver_output_dir + "/RELI.overlaps"; + if (!this->flag_output_prefix){ + this->public_ver_output_prefix = "RELI"; + } + + this->public_ver_output_fname = this->public_ver_output_dir + "/" + this->public_ver_output_prefix + ".stats"; + this->public_ver_output_fname_overlaps = this->public_ver_output_dir + "/" + this->public_ver_output_prefix + ".overlaps"; cout << "Start Regulatory Element Locus Intersection (RELI) analysis." << endl; cout << "Running arguments: " << endl; @@ -1031,12 +1035,13 @@ bool RELI::RELIobj::minimum_check(){ cout << "6) target chip-seq label / file: " << this->public_ver_target_label << endl; cout << "7) chip-seq index file: " << this->public_ver_data_index_fname << endl; cout << "8) chip-seq data dir: " << this->public_ver_data_dir << endl; - cout << "9) output dir name: " << this->public_ver_output_dir << endl; - cout << "10) genome build file: " << RELI::species_chr_mapping_file << endl; - cout << "11) statistic output file name: " << this->public_ver_output_fname << endl; - cout << "12) overlap output file name: " << this->public_ver_output_fname_overlaps << endl; - cout << "13) provided phenotype name: " << this->public_ver_phenotype_name << endl; - cout << "14) provided ancestry name: " << this->public_ver_ancestry_name << endl; + cout << "9) output dir name: " << this->public_ver_output_dir << endl; + cout << "10) output file prefix: " << this->public_ver_output_prefix << endl; + cout << "11) genome build file: " << RELI::species_chr_mapping_file << endl; + cout << "12) statistic output file name: " << this->public_ver_output_fname << endl; + cout << "13) overlap output file name: " << this->public_ver_output_fname_overlaps << endl; + cout << "14) provided phenotype name: " << this->public_ver_phenotype_name << endl; + cout << "15) provided ancestry name: " << this->public_ver_ancestry_name << endl; // if (!RELI::snp_matching){ return (this->flag_input_snp diff --git a/src/RELI_impl.h b/src/RELI_impl.h index ce22c8b..7c2583c 100644 --- a/src/RELI_impl.h +++ b/src/RELI_impl.h @@ -298,6 +298,7 @@ namespace RELI{ data_index public_ver_selected_data_index; // selected index obj string public_ver_snp_table_fname; // snp table string public_ver_output_dir; + string public_ver_output_prefix; // output files prefix string public_ver_output_fname; // initialized as target label + "RELI.stats" string public_ver_output_fname_overlaps; // initialized as target label + "RELI.overlaps" unordered_map snptablemap; @@ -313,6 +314,7 @@ namespace RELI{ bool flag_genome_build; bool flag_chipseq_data_dir; bool flag_output_dir; + bool flag_output_prefix; /* functions */ @@ -349,6 +351,7 @@ namespace RELI{ this->flag_genome_build = false; this->flag_chipseq_data_dir = false; this->flag_output_dir = false; + this->flag_output_prefix = false; } }; //variables From ec00305c9203684358eb233057dc68fd6b6753e8 Mon Sep 17 00:00:00 2001 From: Michael Kotliar Date: Thu, 14 Feb 2019 11:00:25 -0500 Subject: [PATCH 10/10] Move header file to include folder --- CMakeLists.txt | 3 ++- {src => include}/RELI_impl.h | 0 2 files changed, 2 insertions(+), 1 deletion(-) rename {src => include}/RELI_impl.h (100%) diff --git a/CMakeLists.txt b/CMakeLists.txt index 71cc5d0..233a77a 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -3,11 +3,12 @@ project(RELI) set(CMAKE_CXX_STANDARD 11) find_package(GSL REQUIRED) +include_directories (include) set(SOURCE_FILES src/RELI.cpp src/RELI_impl.cpp - src/RELI_impl.h) + include/RELI_impl.h) add_executable(RELI ${SOURCE_FILES}) target_link_libraries(RELI GSL::gsl) \ No newline at end of file diff --git a/src/RELI_impl.h b/include/RELI_impl.h similarity index 100% rename from src/RELI_impl.h rename to include/RELI_impl.h