Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 14 additions & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
cmake_minimum_required(VERSION 3.6)
project(RELI)

set(CMAKE_CXX_STANDARD 11)
find_package(GSL REQUIRED)
include_directories (include)

set(SOURCE_FILES
src/RELI.cpp
src/RELI_impl.cpp
include/RELI_impl.h)

add_executable(RELI ${SOURCE_FILES})
target_link_libraries(RELI GSL::gsl)
25 changes: 13 additions & 12 deletions Dockerfile
Original file line number Diff line number Diff line change
@@ -1,29 +1,30 @@
# ================================================================
#
# Dockerfile for RELI (public release)
#
Copy link
Copy Markdown
Contributor

@ernstki ernstki Jan 30, 2019

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Misha, what's the purpose of this (the Version: 0.0.2)? I like the idea of having examples for "Build," "Pull," and "Run," but I'm trying to understand why the version tag cannot simply be latest.

Copy link
Copy Markdown
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If our CWL file uses latest then after updating docker container, the inputs in CWL file should be updated too. To better control the correspondence between CWL and Docker I'd rather use more specific versions instead of latest

Copy link
Copy Markdown
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Btw, try to rebuild Dockerfile. I guess, it should me much smaller, then the original one.

# Version: 0.0.2
# Author: Kevin Ernst <kevin.ernst -at- cchmc.org>
# Date: 25 July 2018
#
# Build: docker build --rm -t weirauchlab/reli:v0.0.2 -f Dockerfile .
# Pull: docker pull weirauchlab/reli:v0.0.2
# Run: docker run --rm -ti weirauchlab/reli:v0.0.2
#
# ================================================================

# start with https://store.docker.com/images/centos
FROM alpine

# install prerequisite development packages
RUN apk update
RUN apk add g++ make gsl gsl-dev bzip2 bash which

RUN mkdir -p /reli/src
WORKDIR /reli
ENV PATH $PATH:/reli

COPY src src/
COPY Makefile .

# install prerequisite development packages
# build RELI from source
RUN make
RUN ln -s RELI reli
ENV PATH $PATH:/reli

# Remove unnecessary packages pulled in as dependencies of g++
RUN apk del g++
RUN apk add libstdc++ libgcc
RUN apk update &&\
apk add g++ make gsl gsl-dev bzip2 bash which libstdc++ libgcc &&\
make &&\
ln -s RELI reli &&\
apk del g++
3 changes: 3 additions & 0 deletions src/RELI_impl.h → include/RELI_impl.h
Original file line number Diff line number Diff line change
Expand Up @@ -298,6 +298,7 @@ namespace RELI{
data_index public_ver_selected_data_index; // selected index obj
string public_ver_snp_table_fname; // snp table
string public_ver_output_dir;
string public_ver_output_prefix; // output files prefix
string public_ver_output_fname; // initialized as target label + "RELI.stats"
string public_ver_output_fname_overlaps; // initialized as target label + "RELI.overlaps"
unordered_map<string, RELI::snp_table_data> snptablemap;
Expand All @@ -313,6 +314,7 @@ namespace RELI{
bool flag_genome_build;
bool flag_chipseq_data_dir;
bool flag_output_dir;
bool flag_output_prefix;
/*
functions
*/
Expand Down Expand Up @@ -349,6 +351,7 @@ namespace RELI{
this->flag_genome_build = false;
this->flag_chipseq_data_dir = false;
this->flag_output_dir = false;
this->flag_output_prefix = false;
}
};
//variables
Expand Down
21 changes: 17 additions & 4 deletions src/RELI.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@ along with this program. If not, see <http://www.gnu.org/licenses/>.
#include <cstring>
#include <queue>
#include <random>
#include <unistd.h>
#include "RELI_impl.h"

using namespace std;
Expand All @@ -61,13 +62,14 @@ void display_help(){
cout << endl;
cout << "-snp FILE: Phenotype snp file in 4 column bed format. [required]" << endl;
cout << "-ld FILE: Phenotype linkage disequilibrium structure for snps, default: no ld file. [optional]" << endl;
cout << "-index FILE: ChIP-seq index file. [required]" << endl;
cout << "-data DIR: Specify directory where ChIP-seq data are stored. [required]" << endl;
cout << "-target STRING: Target label of ChIP-seq experiment to be tested from index file. [required]" << endl;
cout << "-index FILE: ChIP-seq index file. [required if -target points to the label]" << endl;
cout << "-data DIR: Specify directory where ChIP-seq data are stored. [required if -target points to the label]" << endl;
cout << "-target STRING: Target label of ChIP-seq experiment to be tested from index file. Or path to the file [required]" << endl;
cout << "-build FILE: Genome build file. [required]" << endl;
cout << "-null FILE: Null model file. [required]" << endl;
cout << "-dbsnp FILE: dbSNP table file. [required]" << endl;
cout << "-out DIR: Specify output directory name under current working folder. [required]" << endl;
cout << "-prefix STRING: Specify output files prefix. [optional]" << endl;
cout << "-match: Boolean switch to turn on minor allele frequency based matching, default: off. [optional]" << endl;
cout << "-rep NUMBER: Number of permutation/simulation to be performed, default: 2000. [optional]" << endl;
cout << "-corr NUMBER: Bonferroni correction multiplier for multiple test, default: 1 [optional]" << endl;
Expand Down Expand Up @@ -129,14 +131,25 @@ int main(int argc, char* argv[]){
RELIinstance->public_ver_output_dir = argv[i + 1];
RELIinstance->flag_output_dir = true;
}
if (strcmp(argv[i], "-prefix") == 0){ // output file prefix
RELIinstance->public_ver_output_prefix = argv[i + 1];
RELIinstance->flag_output_prefix = true;
}
if (strcmp(argv[i], "-rep") == 0){ // replication number
RELI::repmax = atoi(argv[i + 1]);
}
if (strcmp(argv[i], "-corr") == 0){ // bonferroni correction multiplier
RELI::corr_muliplier = atof(argv[i + 1]);
}
if (strcmp(argv[i], "-target") == 0){ // string corresponding to target ChIP-seq data
if (strcmp(argv[i], "-target") == 0){ // string corresponding to target ChIP-seq data or path to file
RELIinstance->public_ver_target_label = argv[i + 1];
if (access(RELIinstance->public_ver_target_label.c_str(), F_OK ) != -1 ) {
// if -target points to the file, -data and -index are not required anymore and should be set to True
RELIinstance->flag_chipseq_data_dir = true;
RELIinstance->flag_chipseq_data_index = true;
RELIinstance->public_ver_data_dir = "Ignored";
RELIinstance->public_ver_data_index_fname = "Ignored";
}
RELIinstance->flag_target_label = true;
}
if (strcmp(argv[i], "-index") == 0){ // ChIP-seq index file
Expand Down
110 changes: 62 additions & 48 deletions src/RELI_impl.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@
#include <queue>
#include <random>
#include <assert.h>
#include <unistd.h>
#include "RELI_impl.h"

#include <sys/stat.h> // for mkdir and mkdirat (requires kernel >= 2.6.16)
Expand Down Expand Up @@ -960,57 +961,69 @@ double RELI::binomial_pvalue_appr(int _total, int _overlap, double _prob){
return gsl_cdf_ugaussian_Q((_overlap - _total*_prob) / sqrt(_total*_prob*(1 - _prob)));
}
void RELI::RELIobj::public_ver_read_data_index(){
ifstream in;
in.open(this->public_ver_data_index_fname.c_str());
if (!in){
cerr << "cannot open data index file, please check with option -index" << endl;
exit(-1);
}
in.ignore(bufferSize, '\n');
while (!in.eof()){
in.getline(bufferChar, bufferSize);
in.peek();
buffer = bufferChar;
auto linevec = linehandler(buffer);
if (access(this->public_ver_target_label.c_str(), F_OK ) != -1 ) {
cout << "Skip loading chip-seq index file, because -target points to file" << endl;
} else {
ifstream in;
in.open(this->public_ver_data_index_fname.c_str());
if (!in){
cerr << "cannot open data index file, please check with option -index" << endl;
exit(-1);
}
in.ignore(bufferSize, '\n');
while (!in.eof()){
in.getline(bufferChar, bufferSize);
in.peek();
buffer = bufferChar;
auto linevec = linehandler(buffer);

RELI::data_index t;
t.datalabel = linevec.at(0);
t.source = linevec.at(1);
t.cell = linevec.at(2);
t.tf = linevec.at(3);
t.cell_label = linevec.at(4);
t.pmid = linevec.at(5);
t.group= linevec.at(6);
t.ebv_status = linevec.at(7);
t.species = linevec.at(8);

this->dataindexvec.push_back(t);
}
in.close();
cout << "chip-seq index file loaded." << endl;
RELI::data_index t;
t.datalabel = linevec.at(0);
t.source = linevec.at(1);
t.cell = linevec.at(2);
t.tf = linevec.at(3);
t.cell_label = linevec.at(4);
t.pmid = linevec.at(5);
t.group= linevec.at(6);
t.ebv_status = linevec.at(7);
t.species = linevec.at(8);

this->dataindexvec.push_back(t);
}
in.close();
cout << "chip-seq index file loaded." << endl;
}
}
void RELI::RELIobj::public_ver_set_target_data(){
auto k = find(this->dataindexvec.begin(),
this->dataindexvec.end(),
this->public_ver_target_label);
if (k != this->dataindexvec.end()){
this->public_ver_selected_data_index = *k;
if (access(this->public_ver_target_label.c_str(), F_OK ) != -1 ) {
this->public_ver_target_data_fname = this->public_ver_target_label;
}
else{
cerr << "cannot find corresponding data entry in the index file, exiting."
<< endl;
exit(-1);
else {
auto k = find(this->dataindexvec.begin(),
this->dataindexvec.end(),
this->public_ver_target_label);
if (k != this->dataindexvec.end()){
this->public_ver_selected_data_index = *k;
}
else{
cerr << "cannot find corresponding data entry in the index file, exiting."
<< endl;
exit(-1);
}
this->public_ver_target_data_fname = this->public_ver_data_dir + "/" + this->public_ver_target_label;
}
this->public_ver_target_data_fname = this->public_ver_data_dir + "/"
+ this->public_ver_target_label;
cout << "target ChIP-seq file set." << endl;
}
void RELI::RELIobj::public_ver_read_null(){
ifstream in;
}
bool RELI::RELIobj::minimum_check(){
this->public_ver_output_fname = this->public_ver_output_dir + "/" + this->public_ver_target_label + ".RELI.stats";
this->public_ver_output_fname_overlaps = this->public_ver_output_dir + "/" + this->public_ver_target_label + ".RELI.overlaps";
if (!this->flag_output_prefix){
this->public_ver_output_prefix = "RELI";
}

this->public_ver_output_fname = this->public_ver_output_dir + "/" + this->public_ver_output_prefix + ".stats";
this->public_ver_output_fname_overlaps = this->public_ver_output_dir + "/" + this->public_ver_output_prefix + ".overlaps";

cout << "Start Regulatory Element Locus Intersection (RELI) analysis." << endl;
cout << "Running arguments: " << endl;
Expand All @@ -1019,15 +1032,16 @@ bool RELI::RELIobj::minimum_check(){
cout << "3) SNP matching mode: " << RELI::snp_matching << endl;
cout << "4) null model file: " << this->public_ver_null_fname << endl;
cout << "5) dbSNP table file: " << this->public_ver_snp_table_fname << endl;
cout << "6) target chip-seq label: " << this->public_ver_target_label << endl;
cout << "6) target chip-seq label / file: " << this->public_ver_target_label << endl;
cout << "7) chip-seq index file: " << this->public_ver_data_index_fname << endl;
cout << "8) chip-seq data dir: " << this->public_ver_data_dir << endl;
cout << "9) output dir name: " << this->public_ver_output_dir << endl;
cout << "10) genome build file: " << RELI::species_chr_mapping_file << endl;
cout << "11) statistic output file name: " << this->public_ver_output_fname << endl;
cout << "12) overlap output file name: " << this->public_ver_output_fname_overlaps << endl;
cout << "13) provided phenotype name: " << this->public_ver_phenotype_name << endl;
cout << "14) provided ancestry name: " << this->public_ver_ancestry_name << endl;
cout << "9) output dir name: " << this->public_ver_output_dir << endl;
cout << "10) output file prefix: " << this->public_ver_output_prefix << endl;
cout << "11) genome build file: " << RELI::species_chr_mapping_file << endl;
cout << "12) statistic output file name: " << this->public_ver_output_fname << endl;
cout << "13) overlap output file name: " << this->public_ver_output_fname_overlaps << endl;
cout << "14) provided phenotype name: " << this->public_ver_phenotype_name << endl;
cout << "15) provided ancestry name: " << this->public_ver_ancestry_name << endl;
//
if (!RELI::snp_matching){
return (this->flag_input_snp
Expand Down Expand Up @@ -1123,7 +1137,7 @@ void RELI::RELIobj::load_snp_table(){
ifstream in;
in.open(this->public_ver_snp_table_fname.c_str());
if (!in){
cerr << "cannot load snp table, please check with option -index "
cerr << "cannot load snp table, please check with option -dbsnp "
<< this->public_ver_snp_table_fname << endl;
exit(-1);
}
Expand Down