diff --git a/CMakeLists.txt b/CMakeLists.txt new file mode 100644 index 0000000..233a77a --- /dev/null +++ b/CMakeLists.txt @@ -0,0 +1,14 @@ +cmake_minimum_required(VERSION 3.6) +project(RELI) + +set(CMAKE_CXX_STANDARD 11) +find_package(GSL REQUIRED) +include_directories (include) + +set(SOURCE_FILES + src/RELI.cpp + src/RELI_impl.cpp + include/RELI_impl.h) + +add_executable(RELI ${SOURCE_FILES}) +target_link_libraries(RELI GSL::gsl) \ No newline at end of file diff --git a/Dockerfile b/Dockerfile index 6877e26..12d7bfc 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,29 +1,30 @@ # ================================================================ # # Dockerfile for RELI (public release) -# +# Version: 0.0.2 # Author: Kevin Ernst # Date: 25 July 2018 # +# Build: docker build --rm -t weirauchlab/reli:v0.0.2 -f Dockerfile . +# Pull: docker pull weirauchlab/reli:v0.0.2 +# Run: docker run --rm -ti weirauchlab/reli:v0.0.2 +# # ================================================================ # start with https://store.docker.com/images/centos FROM alpine -# install prerequisite development packages -RUN apk update -RUN apk add g++ make gsl gsl-dev bzip2 bash which - -RUN mkdir -p /reli/src WORKDIR /reli +ENV PATH $PATH:/reli + COPY src src/ COPY Makefile . +# install prerequisite development packages # build RELI from source -RUN make -RUN ln -s RELI reli -ENV PATH $PATH:/reli - # Remove unnecessary packages pulled in as dependencies of g++ -RUN apk del g++ -RUN apk add libstdc++ libgcc +RUN apk update &&\ + apk add g++ make gsl gsl-dev bzip2 bash which libstdc++ libgcc &&\ + make &&\ + ln -s RELI reli &&\ + apk del g++ \ No newline at end of file diff --git a/src/RELI_impl.h b/include/RELI_impl.h similarity index 98% rename from src/RELI_impl.h rename to include/RELI_impl.h index ce22c8b..7c2583c 100644 --- a/src/RELI_impl.h +++ b/include/RELI_impl.h @@ -298,6 +298,7 @@ namespace RELI{ data_index public_ver_selected_data_index; // selected index obj string public_ver_snp_table_fname; // snp table string public_ver_output_dir; + string public_ver_output_prefix; // output files prefix string public_ver_output_fname; // initialized as target label + "RELI.stats" string public_ver_output_fname_overlaps; // initialized as target label + "RELI.overlaps" unordered_map snptablemap; @@ -313,6 +314,7 @@ namespace RELI{ bool flag_genome_build; bool flag_chipseq_data_dir; bool flag_output_dir; + bool flag_output_prefix; /* functions */ @@ -349,6 +351,7 @@ namespace RELI{ this->flag_genome_build = false; this->flag_chipseq_data_dir = false; this->flag_output_dir = false; + this->flag_output_prefix = false; } }; //variables diff --git a/src/RELI.cpp b/src/RELI.cpp index d0e63c6..29d71e9 100644 --- a/src/RELI.cpp +++ b/src/RELI.cpp @@ -39,6 +39,7 @@ along with this program. If not, see . #include #include #include +#include #include "RELI_impl.h" using namespace std; @@ -61,13 +62,14 @@ void display_help(){ cout << endl; cout << "-snp FILE: Phenotype snp file in 4 column bed format. [required]" << endl; cout << "-ld FILE: Phenotype linkage disequilibrium structure for snps, default: no ld file. [optional]" << endl; - cout << "-index FILE: ChIP-seq index file. [required]" << endl; - cout << "-data DIR: Specify directory where ChIP-seq data are stored. [required]" << endl; - cout << "-target STRING: Target label of ChIP-seq experiment to be tested from index file. [required]" << endl; + cout << "-index FILE: ChIP-seq index file. [required if -target points to the label]" << endl; + cout << "-data DIR: Specify directory where ChIP-seq data are stored. [required if -target points to the label]" << endl; + cout << "-target STRING: Target label of ChIP-seq experiment to be tested from index file. Or path to the file [required]" << endl; cout << "-build FILE: Genome build file. [required]" << endl; cout << "-null FILE: Null model file. [required]" << endl; cout << "-dbsnp FILE: dbSNP table file. [required]" << endl; cout << "-out DIR: Specify output directory name under current working folder. [required]" << endl; + cout << "-prefix STRING: Specify output files prefix. [optional]" << endl; cout << "-match: Boolean switch to turn on minor allele frequency based matching, default: off. [optional]" << endl; cout << "-rep NUMBER: Number of permutation/simulation to be performed, default: 2000. [optional]" << endl; cout << "-corr NUMBER: Bonferroni correction multiplier for multiple test, default: 1 [optional]" << endl; @@ -129,14 +131,25 @@ int main(int argc, char* argv[]){ RELIinstance->public_ver_output_dir = argv[i + 1]; RELIinstance->flag_output_dir = true; } + if (strcmp(argv[i], "-prefix") == 0){ // output file prefix + RELIinstance->public_ver_output_prefix = argv[i + 1]; + RELIinstance->flag_output_prefix = true; + } if (strcmp(argv[i], "-rep") == 0){ // replication number RELI::repmax = atoi(argv[i + 1]); } if (strcmp(argv[i], "-corr") == 0){ // bonferroni correction multiplier RELI::corr_muliplier = atof(argv[i + 1]); } - if (strcmp(argv[i], "-target") == 0){ // string corresponding to target ChIP-seq data + if (strcmp(argv[i], "-target") == 0){ // string corresponding to target ChIP-seq data or path to file RELIinstance->public_ver_target_label = argv[i + 1]; + if (access(RELIinstance->public_ver_target_label.c_str(), F_OK ) != -1 ) { + // if -target points to the file, -data and -index are not required anymore and should be set to True + RELIinstance->flag_chipseq_data_dir = true; + RELIinstance->flag_chipseq_data_index = true; + RELIinstance->public_ver_data_dir = "Ignored"; + RELIinstance->public_ver_data_index_fname = "Ignored"; + } RELIinstance->flag_target_label = true; } if (strcmp(argv[i], "-index") == 0){ // ChIP-seq index file diff --git a/src/RELI_impl.cpp b/src/RELI_impl.cpp index 43fe6de..d6d5a1d 100644 --- a/src/RELI_impl.cpp +++ b/src/RELI_impl.cpp @@ -23,6 +23,7 @@ #include #include #include +#include #include "RELI_impl.h" #include // for mkdir and mkdirat (requires kernel >= 2.6.16) @@ -960,57 +961,69 @@ double RELI::binomial_pvalue_appr(int _total, int _overlap, double _prob){ return gsl_cdf_ugaussian_Q((_overlap - _total*_prob) / sqrt(_total*_prob*(1 - _prob))); } void RELI::RELIobj::public_ver_read_data_index(){ - ifstream in; - in.open(this->public_ver_data_index_fname.c_str()); - if (!in){ - cerr << "cannot open data index file, please check with option -index" << endl; - exit(-1); - } - in.ignore(bufferSize, '\n'); - while (!in.eof()){ - in.getline(bufferChar, bufferSize); - in.peek(); - buffer = bufferChar; - auto linevec = linehandler(buffer); + if (access(this->public_ver_target_label.c_str(), F_OK ) != -1 ) { + cout << "Skip loading chip-seq index file, because -target points to file" << endl; + } else { + ifstream in; + in.open(this->public_ver_data_index_fname.c_str()); + if (!in){ + cerr << "cannot open data index file, please check with option -index" << endl; + exit(-1); + } + in.ignore(bufferSize, '\n'); + while (!in.eof()){ + in.getline(bufferChar, bufferSize); + in.peek(); + buffer = bufferChar; + auto linevec = linehandler(buffer); - RELI::data_index t; - t.datalabel = linevec.at(0); - t.source = linevec.at(1); - t.cell = linevec.at(2); - t.tf = linevec.at(3); - t.cell_label = linevec.at(4); - t.pmid = linevec.at(5); - t.group= linevec.at(6); - t.ebv_status = linevec.at(7); - t.species = linevec.at(8); - - this->dataindexvec.push_back(t); - } - in.close(); - cout << "chip-seq index file loaded." << endl; + RELI::data_index t; + t.datalabel = linevec.at(0); + t.source = linevec.at(1); + t.cell = linevec.at(2); + t.tf = linevec.at(3); + t.cell_label = linevec.at(4); + t.pmid = linevec.at(5); + t.group= linevec.at(6); + t.ebv_status = linevec.at(7); + t.species = linevec.at(8); + + this->dataindexvec.push_back(t); + } + in.close(); + cout << "chip-seq index file loaded." << endl; + } } void RELI::RELIobj::public_ver_set_target_data(){ - auto k = find(this->dataindexvec.begin(), - this->dataindexvec.end(), - this->public_ver_target_label); - if (k != this->dataindexvec.end()){ - this->public_ver_selected_data_index = *k; + if (access(this->public_ver_target_label.c_str(), F_OK ) != -1 ) { + this->public_ver_target_data_fname = this->public_ver_target_label; } - else{ - cerr << "cannot find corresponding data entry in the index file, exiting." - << endl; - exit(-1); + else { + auto k = find(this->dataindexvec.begin(), + this->dataindexvec.end(), + this->public_ver_target_label); + if (k != this->dataindexvec.end()){ + this->public_ver_selected_data_index = *k; + } + else{ + cerr << "cannot find corresponding data entry in the index file, exiting." + << endl; + exit(-1); + } + this->public_ver_target_data_fname = this->public_ver_data_dir + "/" + this->public_ver_target_label; } - this->public_ver_target_data_fname = this->public_ver_data_dir + "/" - + this->public_ver_target_label; cout << "target ChIP-seq file set." << endl; } void RELI::RELIobj::public_ver_read_null(){ ifstream in; } bool RELI::RELIobj::minimum_check(){ - this->public_ver_output_fname = this->public_ver_output_dir + "/" + this->public_ver_target_label + ".RELI.stats"; - this->public_ver_output_fname_overlaps = this->public_ver_output_dir + "/" + this->public_ver_target_label + ".RELI.overlaps"; + if (!this->flag_output_prefix){ + this->public_ver_output_prefix = "RELI"; + } + + this->public_ver_output_fname = this->public_ver_output_dir + "/" + this->public_ver_output_prefix + ".stats"; + this->public_ver_output_fname_overlaps = this->public_ver_output_dir + "/" + this->public_ver_output_prefix + ".overlaps"; cout << "Start Regulatory Element Locus Intersection (RELI) analysis." << endl; cout << "Running arguments: " << endl; @@ -1019,15 +1032,16 @@ bool RELI::RELIobj::minimum_check(){ cout << "3) SNP matching mode: " << RELI::snp_matching << endl; cout << "4) null model file: " << this->public_ver_null_fname << endl; cout << "5) dbSNP table file: " << this->public_ver_snp_table_fname << endl; - cout << "6) target chip-seq label: " << this->public_ver_target_label << endl; + cout << "6) target chip-seq label / file: " << this->public_ver_target_label << endl; cout << "7) chip-seq index file: " << this->public_ver_data_index_fname << endl; cout << "8) chip-seq data dir: " << this->public_ver_data_dir << endl; - cout << "9) output dir name: " << this->public_ver_output_dir << endl; - cout << "10) genome build file: " << RELI::species_chr_mapping_file << endl; - cout << "11) statistic output file name: " << this->public_ver_output_fname << endl; - cout << "12) overlap output file name: " << this->public_ver_output_fname_overlaps << endl; - cout << "13) provided phenotype name: " << this->public_ver_phenotype_name << endl; - cout << "14) provided ancestry name: " << this->public_ver_ancestry_name << endl; + cout << "9) output dir name: " << this->public_ver_output_dir << endl; + cout << "10) output file prefix: " << this->public_ver_output_prefix << endl; + cout << "11) genome build file: " << RELI::species_chr_mapping_file << endl; + cout << "12) statistic output file name: " << this->public_ver_output_fname << endl; + cout << "13) overlap output file name: " << this->public_ver_output_fname_overlaps << endl; + cout << "14) provided phenotype name: " << this->public_ver_phenotype_name << endl; + cout << "15) provided ancestry name: " << this->public_ver_ancestry_name << endl; // if (!RELI::snp_matching){ return (this->flag_input_snp @@ -1123,7 +1137,7 @@ void RELI::RELIobj::load_snp_table(){ ifstream in; in.open(this->public_ver_snp_table_fname.c_str()); if (!in){ - cerr << "cannot load snp table, please check with option -index " + cerr << "cannot load snp table, please check with option -dbsnp " << this->public_ver_snp_table_fname << endl; exit(-1); }