diff --git a/abcAncError.cpp b/abcAncError.cpp index c5effce..549ddea 100644 --- a/abcAncError.cpp +++ b/abcAncError.cpp @@ -10,9 +10,10 @@ #include #include - + #include "analysisFunction.h" -#include "abcAncError.h" +#include "abcAncError.h" +#include "aio.h" void abcAncError::printArg(FILE *argFile){ fprintf(argFile,"--------------\n%s:\n",__FILE__); diff --git a/abcAncestry.cpp b/abcAncestry.cpp index fbe5423..409fdc1 100644 --- a/abcAncestry.cpp +++ b/abcAncestry.cpp @@ -1,8 +1,9 @@ +#include #include #include "shared.h" #include "analysisFunction.h" #include "abcAncestry.h" -#include +#include "aio.h" void abcAncestry::printArg(FILE *argFile){ fprintf(argFile,"------------------------\n%s:\n",__FILE__); diff --git a/abcAsso.cpp b/abcAsso.cpp index e927904..80648a1 100644 --- a/abcAsso.cpp +++ b/abcAsso.cpp @@ -20,6 +20,7 @@ #include "abcFreq.h" #include "abcAsso.h" +#include "aio.h" void abcAsso::printArg(FILE *argFile){ fprintf(argFile,"-------------\n%s:\n",__FILE__); diff --git a/abcCallGenotypes.cpp b/abcCallGenotypes.cpp index 4b87519..20b7da5 100644 --- a/abcCallGenotypes.cpp +++ b/abcCallGenotypes.cpp @@ -9,7 +9,7 @@ #include "analysisFunction.h" #include "abcCallGenotypes.h" - +#include "aio.h" //filename of dumped file #define GENO ".geno.gz" diff --git a/abcCounts.cpp b/abcCounts.cpp index d8c0f95..e7b88f8 100644 --- a/abcCounts.cpp +++ b/abcCounts.cpp @@ -4,11 +4,12 @@ Class that works with counts */ +#include #include #include "analysisFunction.h" #include "abc.h" #include "abcCounts.h" -#include +#include "aio.h" void abcCounts::printArg(FILE *argFile){ fprintf(argFile,"---------------\n%s:\n",__FILE__); diff --git a/abcCovar.cpp b/abcCovar.cpp index 6db9d26..de17e12 100644 --- a/abcCovar.cpp +++ b/abcCovar.cpp @@ -4,7 +4,7 @@ #include "analysisFunction.h" #include "abcCovar.h" #include "abcFreq.h" - +#include "aio.h" typedef struct{ double *res; diff --git a/abcDstat.cpp b/abcDstat.cpp index 65fa9db..e514af8 100644 --- a/abcDstat.cpp +++ b/abcDstat.cpp @@ -8,15 +8,12 @@ #include #include - #include "shared.h" #include "analysisFunction.h" - #include #include "abc.h" - #include "abcDstat.h" - +#include "aio.h" void abcDstat::printArg(FILE *argFile){ diff --git a/abcDstat2.cpp b/abcDstat2.cpp index 9e2ff52..581dca7 100644 --- a/abcDstat2.cpp +++ b/abcDstat2.cpp @@ -6,6 +6,7 @@ #include #include "abc.h" #include "abcDstat2.h" +#include "aio.h" typedef struct { double **NUM; diff --git a/abcError.cpp b/abcError.cpp index d55ddb7..c8bfa48 100644 --- a/abcError.cpp +++ b/abcError.cpp @@ -5,6 +5,7 @@ #include "analysisFunction.h" #include "abcError.h" #include "abcFreq.h" +#include "aio.h" //from analysisMaf.cpp double phatFun(suint *counts,int nFiles,double eps,char major,char minor) ; diff --git a/abcFilterSNP.cpp b/abcFilterSNP.cpp index d73338f..1d3f230 100644 --- a/abcFilterSNP.cpp +++ b/abcFilterSNP.cpp @@ -22,6 +22,7 @@ #include "abcHWE.h" #include "abcCallGenotypes.h" #include +#include "aio.h" /* wilcox manwhitney u test, whatever ,implementation is from wiki diff --git a/abcFreq.cpp b/abcFreq.cpp index 97cc8d5..d46c528 100644 --- a/abcFreq.cpp +++ b/abcFreq.cpp @@ -17,6 +17,7 @@ #include "abcSaf.h" #include "abcFilter.h" #include "analysisFunction.h" +#include "aio.h" int abcFreq::emIter = EM_NITER; double abcFreq::EM_start = EM_START; diff --git a/abcGL.cpp b/abcGL.cpp index 332737f..abd6dd7 100644 --- a/abcGL.cpp +++ b/abcGL.cpp @@ -17,21 +17,18 @@ 2) beagle output (requires estimation of major/minor)\ 3) binary beagle 4) text output of the 10 llhs persample - */ - - #include #include #include #include "analysisFunction.h" #include "abc.h" - #include "abcGL.h" #include "abcError.h" #include "phys_likes.h" #include "abcMajorMinor.h" +#include "aio.h" extern int refToInt[256]; diff --git a/abcGetFasta.cpp b/abcGetFasta.cpp index d0fde06..866022b 100644 --- a/abcGetFasta.cpp +++ b/abcGetFasta.cpp @@ -17,7 +17,7 @@ #include "pthread.h" #include "abc.h" #include "abcGetFasta.h" - +#include "aio.h" //this will initialize our data perFasta *init(const char *fname){ diff --git a/abcHWE.cpp b/abcHWE.cpp index 2944867..8003fa1 100644 --- a/abcHWE.cpp +++ b/abcHWE.cpp @@ -18,13 +18,15 @@ #include #include "abcFreq.h" #include "abcHWE.h" - +#include "aio.h" void abcHWE::printArg(FILE *argFile){ fprintf(argFile,"-------------\n%s:\n",__FILE__); fprintf(argFile,"\t-doHWE\t%d\n",doHWE); fprintf(argFile,"\t-minHWEpval\t%f\n",minHWEpval); fprintf(argFile,"\t-maxHWEpval\t%f\n",maxHWEpval); + fprintf(argFile,"\t-maxHetFreq\t%f\n",maxHetFreq);//nspope;hetFilter + fprintf(argFile,"\t-minHetFreq\t%f\n",minHetFreq);//nspope;hetFilter fprintf(argFile,"\n"); } @@ -37,6 +39,8 @@ void abcHWE::getOptions(argStruct *arguments){ minHWEpval = angsd::getArg("-minHWEpval",minHWEpval,arguments); maxHWEpval = angsd::getArg("-maxHWEpval",maxHWEpval,arguments); + maxHetFreq = angsd::getArg("-maxHetFreq",maxHetFreq,arguments);//nspope;hetFilter + minHetFreq = angsd::getArg("-minHetFreq",minHetFreq,arguments);//nspope;hetFilter if(arguments->inputtype==INPUT_BEAGLE||arguments->inputtype==INPUT_VCF_GP){ @@ -55,6 +59,8 @@ abcHWE::abcHWE(const char *outfiles,argStruct *arguments,int inputtype){ doHWE = 0; maxHWEpval = -1; minHWEpval = -1; + maxHetFreq = -1;//nspope;hetFilter + minHetFreq = -1;//nspope;hetFilter testMe=0; tolStop = 0.00001; bufstr.s=NULL;bufstr.l=bufstr.m=0; @@ -77,8 +83,14 @@ abcHWE::abcHWE(const char *outfiles,argStruct *arguments,int inputtype){ outfileZ = aio::openFileBG(outfiles,postfix); //print header - const char *str = "Chromo\tPosition\tMajor\tMinor\thweFreq\tFreq\tF\tLRT\tp-value\n"; - aio::bgzf_write(outfileZ,str,strlen(str)); + if (maxHetFreq != -1 || minHetFreq != -1) //nspope;maxHetFilter + { + const char *str = "Chromo\tPosition\tMajor\tMinor\thweFreq\tFreq\tF\tLRT\tp-value\thetFreq\n"; + aio::bgzf_write(outfileZ,str,strlen(str)); + } else { + const char *str = "Chromo\tPosition\tMajor\tMinor\thweFreq\tFreq\tF\tLRT\tp-value\n"; + aio::bgzf_write(outfileZ,str,strlen(str)); + } } @@ -98,6 +110,7 @@ void abcHWE::clean(funkyPars *pars){ funkyHWE *hweStruct =(funkyHWE *) pars->extras[index]; delete[] hweStruct->freq; delete[] hweStruct->freqHWE; + delete[] hweStruct->hetfreq;//nspope;hetFilter delete[] hweStruct->F; delete[] hweStruct->like0; delete[] hweStruct->pval; @@ -118,7 +131,11 @@ void abcHWE::print(funkyPars *pars){ float lrt= 2*hweStruct->like0[s]-2*hweStruct->likeF[s]; - ksprintf(&bufstr,"%s\t%d\t%c\t%c\t%f\t%f\t%f\t%e\t%e\n",header->target_name[pars->refId],pars->posi[s]+1,intToRef[pars->major[s]],intToRef[pars->minor[s]],hweStruct->freqHWE[s],hweStruct->freq[s],hweStruct->F[s],lrt,hweStruct->pval[s]); + if (maxHetFreq!=-1 || minHetFreq!=-1){//nspope;hetFilter + ksprintf(&bufstr,"%s\t%d\t%c\t%c\t%f\t%f\t%f\t%e\t%e\t%f\n",header->target_name[pars->refId],pars->posi[s]+1,intToRef[pars->major[s]],intToRef[pars->minor[s]],hweStruct->freqHWE[s],hweStruct->freq[s],hweStruct->F[s],lrt,hweStruct->pval[s],hweStruct->hetfreq[s]); + } else { + ksprintf(&bufstr,"%s\t%d\t%c\t%c\t%f\t%f\t%f\t%e\t%e\n",header->target_name[pars->refId],pars->posi[s]+1,intToRef[pars->major[s]],intToRef[pars->minor[s]],hweStruct->freqHWE[s],hweStruct->freq[s],hweStruct->F[s],lrt,hweStruct->pval[s]); + } } aio::bgzf_write(outfileZ,bufstr.s,bufstr.l);bufstr.l=0; @@ -134,6 +151,7 @@ void abcHWE::run(funkyPars *pars){ double *freq = new double[pars->numSites]; double *freqHWE = new double[pars->numSites]; + double *hetfreq = new double[pars->numSites];//nspope;hetFilter double *F = new double[pars->numSites]; double *like0 = new double[pars->numSites]; double *likeF = new double[pars->numSites]; @@ -188,15 +206,19 @@ void abcHWE::run(funkyPars *pars){ if(minHWEpval!=-1 && pval[s] < minHWEpval) pars->keepSites[s] = 0; - - + //nspope;hetFilter + hetfreq[s] = 2.*(1.-x[1])*x[0]*(1.-x[0]); + if( (maxHetFreq!=-1 && hetfreq[s] > maxHetFreq) || + (minHetFreq!=-1 && hetfreq[s] < minHetFreq) ) + pars->keepSites[s] = 0; } hweStruct->freq=freq; hweStruct->freqHWE=freqHWE; + hweStruct->hetfreq=hetfreq;//nspope;hetFilter hweStruct->F=F; hweStruct->like0=like0; hweStruct->pval=pval; diff --git a/abcHWE.h b/abcHWE.h index f99fc9b..1607844 100644 --- a/abcHWE.h +++ b/abcHWE.h @@ -10,6 +10,8 @@ typedef struct { double *pval; double *F; + + double *hetfreq;//nspope;hetFilter }funkyHWE; @@ -26,6 +28,8 @@ class abcHWE:public abc{ int doHWE; double minHWEpval; double maxHWEpval; + double maxHetFreq;//nspope;hetFilter + double minHetFreq;//nspope;hetFilter int testMe; double tolStop; double differ(double *x,double *y); diff --git a/abcHWE_F.cpp b/abcHWE_F.cpp index 4672dbf..f46fa5a 100644 --- a/abcHWE_F.cpp +++ b/abcHWE_F.cpp @@ -10,7 +10,7 @@ #include #include "abcFreq.h" #include "abcHWE_F.h" - +#include "aio.h" void abcHWE_F::printArg(FILE *argFile){ fprintf(argFile,"-------------\n%s:\n",__FILE__); diff --git a/abcHaploCall.cpp b/abcHaploCall.cpp index 09f11c8..1963559 100644 --- a/abcHaploCall.cpp +++ b/abcHaploCall.cpp @@ -12,6 +12,7 @@ #include #include "analysisFunction.h" #include "abcHaploCall.h" +#include "aio.h" void abcHaploCall::printArg(FILE *argFile){ fprintf(argFile,"--------------\n%s:\n",__FILE__); diff --git a/abcHetPlas.cpp b/abcHetPlas.cpp index cd96c18..e7920cc 100644 --- a/abcHetPlas.cpp +++ b/abcHetPlas.cpp @@ -5,6 +5,8 @@ #include "analysisFunction.h" #include "shared.h" #include "abcHetPlas.h" +#include "aio.h" + #define MAX_QS 256 //struct which contains the results for a single chunk diff --git a/abcIBS.cpp b/abcIBS.cpp index f9c1f7b..71c6cf3 100644 --- a/abcIBS.cpp +++ b/abcIBS.cpp @@ -6,13 +6,13 @@ part of angsd NB for cov. maf > 0.001 is hardcoded (don't want the world to explode) */ - +#include #include #include #include #include "analysisFunction.h" #include "abcIBS.h" -#include +#include "aio.h" void abcIBS::printArg(FILE *argFile){ fprintf(argFile,"--------------\n%s:\n",__FILE__); diff --git a/abcMismatch.cpp b/abcMismatch.cpp index 5f36fc1..93404b0 100644 --- a/abcMismatch.cpp +++ b/abcMismatch.cpp @@ -4,7 +4,7 @@ #include "analysisFunction.h" #include "abcMismatch.h" - +#include "aio.h" #define rlen 400 #define qslen 64 diff --git a/abcPSMC.cpp b/abcPSMC.cpp index af466c4..2d6ae29 100644 --- a/abcPSMC.cpp +++ b/abcPSMC.cpp @@ -8,8 +8,7 @@ #include "shared.h" #include #include "abcPSMC.h" - - +#include "aio.h" void abcPSMC::printArg(FILE *argFile){ fprintf(argFile,"------------------------\n%s:\n",__FILE__); diff --git a/abcRAD.cpp b/abcRAD.cpp index ca0f9b9..5774145 100644 --- a/abcRAD.cpp +++ b/abcRAD.cpp @@ -29,7 +29,7 @@ #include //<-used for isupper/islower #include //in order to construct large array/objects #include - +#include "aio.h" #include "shared.h" //<-contains the struct defintino for funkypars #include "analysisFunction.h" //<-contains some utility functions, usefull for parsing args #include "abcRAD.h"//contains the analysis class defition associated with this file diff --git a/abcSaf.cpp b/abcSaf.cpp index e7556bf..a943d10 100644 --- a/abcSaf.cpp +++ b/abcSaf.cpp @@ -8,55 +8,13 @@ #include "analysisFunction.h" #include #include "abc.h" - #include "abcSaf.h" +#include "aio.h" #define MINLIKE -1000.0 //this is for setting genotypelikelhoods to missing (EXPLAINED BELOW) - - -void writeAllThetas(BGZF *dat,FILE *idx,char *tmpChr,int64_t &offs,std::vector &p,std::vector *res,int nChr){ - assert(dat!=NULL); - assert(idx!=NULL); - assert(p.size()==res[0].size()); - for(int i=1;i<5;i++) - assert(p.size()==res[i].size());//DRAGON, might be discarded during compilation - - if(p.size()!=0&&tmpChr!=NULL){ - //write clen and chromoname for both index and bgzf - size_t clen = strlen(tmpChr); - fwrite(&clen,sizeof(size_t),1,idx); - fwrite(tmpChr,1,clen,idx); - assert(sizeof(size_t)==bgzf_write(dat,&clen,sizeof(size_t))); - assert(clen==bgzf_write(dat,tmpChr,clen)); - - //write number of sites for both index and bgzf - size_t tt = p.size(); - fwrite(&tt,sizeof(size_t),1,idx); - assert(sizeof(size_t)==bgzf_write(dat,&tt,sizeof(size_t))); - //write nChr for both index and bgzf - fwrite(&nChr,sizeof(int),1,idx); - assert(sizeof(int)==bgzf_write(dat,&nChr,sizeof(int))); - - //write bgzf offset into idx - fwrite(&offs,sizeof(int64_t),1,idx); - for(int i=0;i Please use -doPost 3 instead for -doSaf 3\n"); exit(0); } if(doSaf>0||doPost==3){ if(pest!=NULL){ - if(fold==0) - prior=angsd::readDouble(pest,arguments->nInd*2+1); - else - prior=angsd::readDouble(pest,arguments->nInd+1); + prior=angsd::readDouble(pest,arguments->nInd*2+1); int nd=arguments->nInd*2+1; - if(fold) - nd=arguments->nInd+1; + double tts=0; for(int i=0;inInd,i); } mynchr = 2*arguments->nInd; - if(fold) - mynchr = arguments->nInd; } @@ -128,21 +77,10 @@ void abcSaf::getOptions(argStruct *arguments){ int GL = 0; GL = angsd::getArg("-GL",GL,arguments); - doThetas= angsd::getArg("-doThetas",doThetas,arguments); - if(doSaf==0&&doThetas==0&doPost!=3) + if(doSaf==0&&doPost!=3) return; - if(doThetas!=0&& doSaf==0){ - fprintf(stderr,"\t-> Must estimate joint llh (-doSaf) when estimating thetas\n"); - exit(0); - } - - - if(pest==NULL&& doThetas){ - fprintf(stderr,"\t-> Must supply a sfs used as prior for estimating thetas\n"); - exit(0); - } underFlowProtect=angsd::getArg("-underFlowProtect",underFlowProtect,arguments); @@ -219,7 +157,6 @@ double **abcSaf::myComb2Tab=NULL; double *abcSaf::prior = NULL; abcSaf::abcSaf(const char *outfiles,argStruct *arguments,int inputtype){ - theta_res = NULL; tsktsktsk=0; tmpChr = NULL; isHap =0; @@ -227,14 +164,10 @@ abcSaf::abcSaf(const char *outfiles,argStruct *arguments,int inputtype){ const char *SAF = ".saf.gz"; const char *SAFPOS =".saf.pos.gz"; const char *SAFIDX =".saf.idx"; - //for use when dumping binary indexed thetas files - const char *THETAS =".thetas.gz"; - const char *THETASIDX =".thetas.idx"; //for use when dumping called genotypes const char *GENO = ".saf.geno.gz"; //default underFlowProtect = 0; - fold =0; isSim =0; //from command line anc=NULL; @@ -243,14 +176,10 @@ abcSaf::abcSaf(const char *outfiles,argStruct *arguments,int inputtype){ prior = NULL; doSaf=0; doPost =0; - doThetas = 0; outfileSAF = NULL; outfileSAFPOS = NULL; outfileSAFIDX = NULL; - theta_dat = NULL; - theta_idx = NULL; outfileGprobs = NULL; - scalings = NULL; nnnSites = 0; if(arguments->argc==2){ if(!strcasecmp(arguments->argv[1],"-doSaf")){ @@ -269,17 +198,13 @@ abcSaf::abcSaf(const char *outfiles,argStruct *arguments,int inputtype){ } printArg(arguments->argumentFile); newDim = 2*arguments->nInd+1; - if(fold && isHap){ - fprintf(stderr,"\t-> folding for haploid hasn't been defined and implemented\n"); - exit(0); - } - - if(fold||isHap) + + if(isHap) newDim = arguments->nInd+1; if(doSaf==3){ fprintf(stderr,"\t-> -doSaf 3 Does not work in combination with -snpstat and other snpfilters\n"); outfileGprobs = aio::openFileBG(outfiles,GENO); - }else if(doSaf!=0&&doThetas==0){ + }else if(doSaf!=0){ outfileSAF = aio::openFileBG(outfiles,SAF); outfileSAFPOS = aio::openFileBG(outfiles,SAFPOS); outfileSAFIDX = aio::openFile(outfiles,SAFIDX); @@ -292,62 +217,21 @@ abcSaf::abcSaf(const char *outfiles,argStruct *arguments,int inputtype){ size_t tt = newDim-1; fwrite(&tt,sizeof(tt),1,outfileSAFIDX); - }else { - char buf[8] = "thetav2"; - theta_dat = aio::openFileBG(outfiles,THETAS); - theta_idx = aio::openFile(outfiles,THETASIDX); - aio::bgzf_write(theta_dat,buf,8); - fwrite(buf,1,8,theta_idx); - theta_res = new std::vector[5]; - offs_thetas = bgzf_tell(theta_dat); - - - aConst=0; - int nChr = 2*arguments->nInd; - for(int i=1;inInd-i]))-log(2.0);//THORFINN NEW - - } } -#if 0//just for printing the scalings - int lim = nChr; - if(fold) - lim = newDim; - for(int i=0;ioklist[it] = 2; - else{ - r->oklist[it] = 1; - r->pLikes[myCounter] =new float[numInds+1]; - for(int iii=0;iiipLikes[myCounter][iii] = sumMinors[iii]; - // memcpy(r->pLikes[myCounter],sumMinors,sizeof(double)*(numInds+1)); - myCounter++; - } - }else{ - for(int i=0;i<2*numInds+1;i++) - sumMinors[i] = log(sumMinors[i]); - // angsd::logrescale(sumMinors,2*numInds+1); - double ts=0; - for(int i=0;ioklist[it] = 2; + else{ + r->oklist[it] = 1; + r->pLikes[myCounter] =new float[2*numInds+1]; + for(int iii=0;iii<2*numInds+1;iii++) + r->pLikes[myCounter][iii] = sumMinors[iii]; - if(std::isnan(sumMinors[0])) - r->oklist[it] = 2; - else{ - r->oklist[it] = 1; - r->pLikes[myCounter] =new float[2*numInds+1]; - for(int iii=0;iii<2*numInds+1;iii++) - r->pLikes[myCounter][iii] = sumMinors[iii]; - - // memcpy(r->pLikes[myCounter],sumMinors,sizeof(double)*(2*numInds+1)); - myCounter++; - } + // memcpy(r->pLikes[myCounter],sumMinors,sizeof(double)*(2*numInds+1)); + myCounter++; } + } } -void abcSaf::algoJointPost(double **post,int nSites,int nInd,int *keepSites,realRes *r,int doFold){ +void abcSaf::algoJointPost(double **post,int nSites,int nInd,int *keepSites,realRes *r){ // fprintf(stderr,"[%s] nsites:%d r:%p r->pLikes:%p\n",__FUNCTION__,nSites,r,r->pLikes); int myCounter =0; for(int s=0;soklist[s] = 2; @@ -656,7 +507,7 @@ void abcSaf::algoJointPost(double **post,int nSites,int nInd,int *keepSites,real int homo[4] = {0,4,7,9}; -void abcSaf::algoJointHap(double **liks,char *anc,int nsites,int numInds,int underFlowProtect, int fold,int *keepSites,realRes *r,int noTrans) { +void abcSaf::algoJointHap(double **liks,char *anc,int nsites,int numInds,int underFlowProtect, int *keepSites,realRes *r,int noTrans) { int myCounter =0; if(anc==NULL||liks==NULL){ fprintf(stderr,"problems receiving data in [%s] will exit (likes=%p||ancestral=%p)\n",__FUNCTION__,liks,anc); @@ -797,16 +648,12 @@ void abcSaf::algoJointHap(double **liks,char *anc,int nsites,int numInds,int und } //exit(0); } - if(fold){ - fprintf(stderr,"folding is not implemented for -doSaf 1 with haploids. Please write developer if you would like this implemented\n"); - exit(0); - } } -void abcSaf::algoJoint(double **liks,char *anc,int nsites,int numInds,int underFlowProtect, int fold,int *keepSites,realRes *r,int noTrans) { +void abcSaf::algoJoint(double **liks,char *anc,int nsites,int numInds,int underFlowProtect, int *keepSites,realRes *r,int noTrans) { // fprintf(stderr,"[%s]\n",__FUNCTION__); int myCounter =0; if(anc==NULL||liks==NULL){ @@ -952,46 +799,27 @@ void abcSaf::algoJoint(double **liks,char *anc,int nsites,int numInds,int underF we do 3 things. 1. log scaling everyting 2. rescaling to the most likely in order to avoid underflows in the optimization - 3. we might do a fold also. + (3. we might do a fold also.) moved to realSFS */ - if(fold) { - int newDim = numInds+1; - for(int i=0;ioklist[it] = 2; - else{ - r->oklist[it] = 1; - r->pLikes[myCounter] =new float[numInds+1]; - for(int iii=0;iiipLikes[myCounter][iii] = sumMinors[iii]; - //memcpy(r->pLikes[myCounter],sumMinors,sizeof(double)*(numInds+1)); - myCounter++; - } - }else{ - for(int i=0;i<2*numInds+1;i++) - sumMinors[i] = log(sumMinors[i]); - angsd::logrescale(sumMinors,2*numInds+1); - // fprintf(stderr,"sumMinors[0]:%f\n",sumMinors[0]); - if(std::isnan(sumMinors[0])) - r->oklist[it] = 2; - else{ - r->oklist[it] = 1; - r->pLikes[myCounter] =new float[2*numInds+1]; - for(int iii=0;iii<2*numInds+1;iii++){ - // fprintf(stderr,"iii:%f\n",sumMinors[iii]); - r->pLikes[myCounter][iii] = sumMinors[iii]; - } - // memcpy(r->pLikes[myCounter],sumMinors,sizeof(double)*(2*numInds+1)); - myCounter++; + for(int i=0;i<2*numInds+1;i++) + sumMinors[i] = log(sumMinors[i]); + angsd::logrescale(sumMinors,2*numInds+1); + // fprintf(stderr,"sumMinors[0]:%f\n",sumMinors[0]); + if(std::isnan(sumMinors[0])) + r->oklist[it] = 2; + else{ + r->oklist[it] = 1; + r->pLikes[myCounter] =new float[2*numInds+1]; + for(int iii=0;iii<2*numInds+1;iii++){ + // fprintf(stderr,"iii:%f\n",sumMinors[iii]); + r->pLikes[myCounter][iii] = sumMinors[iii]; } + // memcpy(r->pLikes[myCounter],sumMinors,sizeof(double)*(2*numInds+1)); + myCounter++; } } - } @@ -1000,7 +828,7 @@ void abcSaf::algoJoint(double **liks,char *anc,int nsites,int numInds,int underF -void abcSaf::algoJointMajorMinor(double **liks,int nsites,int numInds, int *keepSites,realRes *r,int fold,char *major, char *minor) { +void abcSaf::algoJointMajorMinor(double **liks,int nsites,int numInds, int *keepSites,realRes *r,char *major, char *minor) { // fprintf(stderr,"[%s]\n",__FUNCTION__); int myCounter =0; if(liks==NULL){ @@ -1125,46 +953,27 @@ void abcSaf::algoJointMajorMinor(double **liks,int nsites,int numInds, int *keep we do 3 things. 1. log scaling everyting 2. rescaling to the most likely in order to avoid underflows in the optimization - 3. we might do a fold also. + (3. we might do a fold also.) in realSFS now */ - if(fold) { - int newDim = numInds+1; - for(int i=0;ioklist[it] = 2; - else{ - r->oklist[it] = 1; - r->pLikes[myCounter] =new float[numInds+1]; - for(int iii=0;iiipLikes[myCounter][iii] = sumMinors[iii]; - //memcpy(r->pLikes[myCounter],sumMinors,sizeof(double)*(numInds+1)); - myCounter++; - } - }else{ - for(int i=0;i<2*numInds+1;i++) - sumMinors[i] = log(sumMinors[i]); - angsd::logrescale(sumMinors,2*numInds+1); - // fprintf(stderr,"sumMinors[0]:%f\n",sumMinors[0]); - if(std::isnan(sumMinors[0])) - r->oklist[it] = 2; - else{ - r->oklist[it] = 1; - r->pLikes[myCounter] =new float[2*numInds+1]; - for(int iii=0;iii<2*numInds+1;iii++){ - // fprintf(stderr,"iii:%f\n",sumMinors[iii]); - r->pLikes[myCounter][iii] = sumMinors[iii]; - } - // memcpy(r->pLikes[myCounter],sumMinors,sizeof(double)*(2*numInds+1)); - myCounter++; + for(int i=0;i<2*numInds+1;i++) + sumMinors[i] = log(sumMinors[i]); + angsd::logrescale(sumMinors,2*numInds+1); + // fprintf(stderr,"sumMinors[0]:%f\n",sumMinors[0]); + if(std::isnan(sumMinors[0])) + r->oklist[it] = 2; + else{ + r->oklist[it] = 1; + r->pLikes[myCounter] =new float[2*numInds+1]; + for(int iii=0;iii<2*numInds+1;iii++){ + // fprintf(stderr,"iii:%f\n",sumMinors[iii]); + r->pLikes[myCounter][iii] = sumMinors[iii]; } + // memcpy(r->pLikes[myCounter],sumMinors,sizeof(double)*(2*numInds+1)); + myCounter++; } } - } @@ -1198,16 +1007,16 @@ void abcSaf::run(funkyPars *p){ r->pLikes=new float*[p->numSites]; if(doSaf==1&&isHap==0) - algoJoint(p->likes,p->anc,p->numSites,p->nInd,underFlowProtect,fold,p->keepSites,r,noTrans); + algoJoint(p->likes,p->anc,p->numSites,p->nInd,underFlowProtect,p->keepSites,r,noTrans); else if(doSaf==1&&isHap==1) - algoJointHap(p->likes,p->anc,p->numSites,p->nInd,underFlowProtect,fold,p->keepSites,r,noTrans); + algoJointHap(p->likes,p->anc,p->numSites,p->nInd,underFlowProtect,p->keepSites,r,noTrans); else if(doSaf==2){ freqStruct *freq = (freqStruct *) p->extras[6]; - filipe::algoJoint(p->likes,p->anc,p->numSites,p->nInd,underFlowProtect,fold,p->keepSites,r,noTrans,doSaf,p->major,p->minor,freq->freq,filipeIndF,newDim); + filipe::algoJoint(p->likes,p->anc,p->numSites,p->nInd,underFlowProtect,p->keepSites,r,noTrans,doSaf,p->major,p->minor,freq->freq,filipeIndF,newDim); }else if(doSaf==4){ - algoJointPost(p->post,p->numSites,p->nInd,p->keepSites,r,fold); + algoJointPost(p->post,p->numSites,p->nInd,p->keepSites,r); }else if(doSaf==5){ - algoJointMajorMinor(p->likes,p->numSites,p->nInd,p->keepSites,r,fold,p->major,p->minor); + algoJointMajorMinor(p->likes,p->numSites,p->nInd,p->keepSites,r,p->major,p->minor); } p->extras[index] = r; @@ -1261,63 +1070,6 @@ void printFull(funkyPars *p,int index,BGZF *outfileSFS,BGZF *outfileSFSPOS,char //fprintf(stderr,"asdf: nnnSites:%d\n",nnnSites); } - -void abcSaf::calcThetas(funkyPars *pars,int index,double *prior,std::vector *vecs,std::vector &myposi,int newDim){ - realRes *r=(realRes *) pars->extras[index]; - int id=0; - for(int i=0; inumSites;i++) { - - if(r->oklist[i]==1 &&pars->keepSites[i]!=0){ - double workarray[newDim]; - for(int iii=0;iiipLikes[id][iii]; - id++; - //First find thetaW: nSeg/a1 - double pv,seq; - if(fold) - pv = 1-exp(workarray[0]); - else - pv = 1-exp(workarray[0])-exp(workarray[2*pars->nInd]); - - if(pv<0)//catch underflow - seq=log(0.0); - else - seq = log(pv)-aConst;//watterson - vecs[0].push_back(seq); - // ksprintf(&kb,"%s\t%d\t%f\t",header->target_name[pars->refId],pars->posi[i]+1,seq); - myposi.push_back(pars->posi[i]); - if(fold==0) { - double pairwise=0; //Find theta_pi the pairwise - double thL=0; //Find thetaL sfs[i]*i; - double thH=0;//thetaH sfs[i]*i^2 - for(size_t ii=1;ii<2*pars->nInd;ii++){ - - pairwise += exp(workarray[ii]+scalings[ii] ); - double li=log(ii); - - thL += exp(workarray[ii])*ii; - thH += exp(2*li+workarray[ii]); - } - vecs[1].push_back(log(pairwise)-aConst2); - vecs[2].push_back(workarray[1]); - vecs[3].push_back(log(thH)-aConst2); - vecs[4].push_back(log(thL)-aConst3); - }else{ - double pairwise=0; //Find theta_pi the pairwise - for(size_t ii=1;iioklist[i]==2) - fprintf(stderr,"PROBS at: %s\t%d\n",header->target_name[pars->refId],pars->posi[i]+1); - } -} - - - - void abcSaf::print(funkyPars *p){ if(p->numSites==0||(doSaf==0)) return; @@ -1352,15 +1104,11 @@ void abcSaf::print(funkyPars *p){ } } - if(doThetas==0){ - //fprintf(stderr,"\t-> newdi:%d\n",newDim); - if(isHap==0) - printFull(p,index,outfileSAF,outfileSAFPOS,header->target_name[p->refId],newDim,nnnSites); - else - printFull(p,index,outfileSAF,outfileSAFPOS,header->target_name[p->refId],newDim,nnnSites); - } - else - calcThetas(p,index,prior,theta_res,theta_pos,newDim); + if(isHap==0) + printFull(p,index,outfileSAF,outfileSAFPOS,header->target_name[p->refId],newDim,nnnSites); + else + printFull(p,index,outfileSAF,outfileSAFPOS,header->target_name[p->refId],newDim,nnnSites); + } } @@ -1753,12 +1501,9 @@ void abcSaf::writeAll(){ } void abcSaf::changeChr(int refId) { - if(doSaf&&doThetas==0&&doSaf!=3) + if(doSaf&&doSaf!=3) writeAll(); - if(doSaf&&doThetas==1&&doSaf!=3) - writeAllThetas(theta_dat,theta_idx,tmpChr,offs_thetas,theta_pos,theta_res,mynchr); - free(tmpChr); tmpChr = strdup(header->target_name[refId]); } diff --git a/abcSaf.h b/abcSaf.h index bccd876..0dfe0da 100644 --- a/abcSaf.h +++ b/abcSaf.h @@ -6,43 +6,30 @@ typedef struct{ class abcSaf : public abc{ - std::vector *theta_res; - std::vector theta_pos; int doSaf; BGZF *outfileGprobs; BGZF *outfileSAF; FILE *outfileSAFIDX; BGZF *outfileSAFPOS; - BGZF *theta_dat; - FILE *theta_idx; int underFlowProtect; - int fold; int isSim; int noTrans; char *anc; char *pest; int doPost; - int doThetas; - void calcThetas(funkyPars *p,int index,double *prior,std::vector *vecs,std::vector &myposi,int newdim); - - double aConst; - double aConst2; - double aConst3; - double *scalings; int tsktsktsk; int isHap; double *filipeIndF; int ishap; int newDim; int64_t offs[2]; - int64_t offs_thetas; int nnnSites; char *tmpChr; - void algoJointPost(double **post,int nSites,int nInd,int *keepSites,realRes *r,int doFold); + void algoJointPost(double **post,int nSites,int nInd,int *keepSites,realRes *r); void algoGeno(int refId,double **liks,char *major,char *minor,int nsites,int numInds,kstring_t *sfsfile,int underFlowProtect, int *posi,int *keepSites,double *pest); - void algoJoint(double **liks,char *anc,int nsites,int numInds,int underFlowProtect, int fold,int *keepSites,realRes *r,int noTrans); - void algoJointHap(double **liks,char *anc,int nsites,int numInds,int underFlowProtect, int fold,int *keepSites,realRes *r,int noTrans); - void algoJointMajorMinor(double **liks,int nsites,int numInds, int *keepSites,realRes *r,int fold,char *major, char *minor); + void algoJoint(double **liks,char *anc,int nsites,int numInds,int underFlowProtect, int *keepSites,realRes *r,int noTrans); + void algoJointHap(double **liks,char *anc,int nsites,int numInds,int underFlowProtect, int *keepSites,realRes *r,int noTrans); + void algoJointMajorMinor(double **liks,int nsites,int numInds, int *keepSites,realRes *r, char *major, char *minor); void writeAll(); int mynchr; diff --git a/abcScounts.cpp b/abcScounts.cpp index 60ae7c0..4d2eaa6 100644 --- a/abcScounts.cpp +++ b/abcScounts.cpp @@ -1,10 +1,9 @@ #include +#include #include "shared.h" #include "analysisFunction.h" #include "abcScounts.h" -#include - - +#include "aio.h" typedef unsigned char uchar; diff --git a/abcSmartCounts.cpp b/abcSmartCounts.cpp index 70f08c5..eabda42 100644 --- a/abcSmartCounts.cpp +++ b/abcSmartCounts.cpp @@ -4,7 +4,7 @@ */ #include #include "abcSmartCounts.h" - +#include "aio.h" unsigned char **counts = NULL; diff --git a/abcTemplate.cpp b/abcTemplate.cpp index 56b28e6..39de0cb 100644 --- a/abcTemplate.cpp +++ b/abcTemplate.cpp @@ -33,7 +33,7 @@ #include "shared.h" //<-contains the struct defintino for funkypars #include "analysisFunction.h" //<-contains some utility functions, usefull for parsing args #include "abcTemplate.h"//contains the analysis class defition associated with this file - +#include "aio.h" //#include "phys_genolike_calc.h" diff --git a/abcWriteBcf.cpp b/abcWriteBcf.cpp index d1adf9b..11ee84a 100644 --- a/abcWriteBcf.cpp +++ b/abcWriteBcf.cpp @@ -16,7 +16,7 @@ #include "abcCallGenotypes.h" #include "abcMajorMinor.h" #include "abcWriteBcf.h" - +#include "aio.h" void error(const char *format, ...) @@ -43,18 +43,7 @@ void abcWriteBcf::run(funkyPars *pars){ //last is from abc.h void print_bcf_header(htsFile *fp,bcf_hdr_t *hdr,argStruct *args,kstring_t &buf,const bam_hdr_t *bhdr){ - assert(args); - ksprintf(&buf, "##angsdVersion=%s+htslib-%s\n",angsd_version(),hts_version()); - bcf_hdr_append(hdr, buf.s); - - buf.l = 0; - ksprintf(&buf, "##angsdCommand="); - for (int i=1; iargc; i++) - ksprintf(&buf, " %s", args->argv[i]); - aio::kputc('\n', &buf); - bcf_hdr_append(hdr, buf.s); - buf.l=0; - + if(args->ref){ buf.l = 0; ksprintf(&buf, "##reference=file://%s\n", args->ref); @@ -111,6 +100,16 @@ void print_bcf_header(htsFile *fp,bcf_hdr_t *hdr,argStruct *args,kstring_t &buf, bcf_hdr_append(hdr,"##FORMAT="); bcf_hdr_append(hdr,"##FORMAT="); //fprintf(stderr,"samples from sm:%d\n",args->sm->n); + buf.l =0; + assert(args); + ksprintf(&buf, "##angsdVersion=%s",args->version); + bcf_hdr_append(hdr, buf.s); + + buf.l = 0; + ksprintf(&buf, "##angsdCommand=%s;Date=%s",args->cmdline,args->datetime); + bcf_hdr_append(hdr, buf.s); + buf.l=0; + for(int i=0;ism->n;i++){ bcf_hdr_add_sample(hdr, args->sm->smpl[i]); } @@ -121,26 +120,15 @@ void print_bcf_header(htsFile *fp,bcf_hdr_t *hdr,argStruct *args,kstring_t &buf, buf.l=0; } - - void abcWriteBcf::clean(funkyPars *pars){ if(doBcf==0) return; - - } void abcWriteBcf::print(funkyPars *pars){ if(doBcf==0) return; - kstring_t buf; - if(fp==NULL){ - buf.s=NULL;buf.l=buf.m=0; - fp=aio::openFileHts(outfiles,".bcf"); - hdr = bcf_hdr_init("w"); - rec = bcf_init1(); - print_bcf_header(fp,hdr,args,buf,header); - } + lh3struct *lh3 = (lh3struct*) pars->extras[5]; freqStruct *freq = (freqStruct *) pars->extras[6]; genoCalls *geno = (genoCalls *) pars->extras[10]; @@ -177,17 +165,21 @@ void abcWriteBcf::print(funkyPars *pars){ } // .. FORMAT - assert(geno); + // assert(geno); + // fprintf(stderr,"bcf_hdr_nsamples(hdr): %d\n",bcf_hdr_nsamples(hdr)); if(geno){ int32_t *tmpia = (int*)malloc(bcf_hdr_nsamples(hdr)*2*sizeof(int32_t)); for(int i=0; inInd;i++){ - if(geno->dat[s][i]==0){ + if(geno->dat[s][i]==-1){ + tmpia[2*i+0] = bcf_gt_missing; + tmpia[2*i+1] = bcf_gt_missing; + }else if(geno->dat[s][i]==0){ tmpia[2*i+0] = bcf_gt_unphased(0); tmpia[2*i+1] = bcf_gt_unphased(0); }else if(geno->dat[s][i]==1){ tmpia[2*i+0] = bcf_gt_unphased(0); tmpia[2*i+1] = bcf_gt_unphased(1); - } else{ + } else if(geno->dat[s][i]==2){ tmpia[2*i+0] = bcf_gt_unphased(1); tmpia[2*i+1] = bcf_gt_unphased(1); } @@ -199,7 +191,7 @@ void abcWriteBcf::print(funkyPars *pars){ int32_t *tmpfa = (int32_t*)malloc(sizeof(int32_t)*bcf_hdr_nsamples(hdr)); suint *ary=pars->counts[s]; for(int i=0;iargumentFile); + kstring_t buf; + buf.s=NULL; + buf.l=buf.m=0; + fp=aio::openFileHts(outfiles,".bcf"); - + rec = bcf_init1(); + if(arguments->inputtype==INPUT_BAM){ + hdr = bcf_hdr_init("w"); + print_bcf_header(fp,hdr,args,buf,header); + } else if(arguments->inputtype==INPUT_VCF_GP||arguments->inputtype==INPUT_VCF_GL){ + fprintf(stderr,"\t-> Building bcf header\n"); + hdr = vcfreader_hs_bcf_hdr; + ksprintf(&buf, "##angsdVersion=%s",args->version); + bcf_hdr_append(hdr, buf.s); + buf.l = 0; + ksprintf(&buf, "##angsdCommand=%s;Date=%s",args->cmdline,args->datetime); + bcf_hdr_append(hdr, buf.s); + buf.l=0; + if ( bcf_hdr_write(fp, hdr)!=0 ) + fprintf(stderr,"Failed to write bcf\n"); + //hts_close(fp);exit(0); + } + + if(buf.s) + free(buf.s); } abcWriteBcf::~abcWriteBcf(){ + if(rec) + bcf_destroy(rec); + if(hdr) + bcf_hdr_destroy(hdr); if(fp!=NULL) hts_close(fp); } diff --git a/abcWriteFasta.cpp b/abcWriteFasta.cpp index 4acab0b..c8aacba 100644 --- a/abcWriteFasta.cpp +++ b/abcWriteFasta.cpp @@ -9,6 +9,7 @@ #include #include "abc.h" #include "abcWriteFasta.h" +#include "aio.h" void abcWriteFasta::printArg(FILE *argFile){ fprintf(argFile,"--------------\n%s:\n",__FILE__); diff --git a/abcWritePlink.cpp b/abcWritePlink.cpp index 690ed5f..083aab9 100644 --- a/abcWritePlink.cpp +++ b/abcWritePlink.cpp @@ -9,6 +9,8 @@ #include #include "abcCallGenotypes.h" #include "abcWritePlink.h" +#include "aio.h" + void abcWritePlink::printArg(FILE *argFile){ fprintf(argFile,"------------------------\n%s:\n",__FILE__); fprintf(argFile,"\t-doPlink\t%d\n",doPlink); diff --git a/abcWriteVcf.cpp b/abcWriteVcf.cpp index b6cfe72..b518206 100644 --- a/abcWriteVcf.cpp +++ b/abcWriteVcf.cpp @@ -11,6 +11,8 @@ #include "abcCallGenotypes.h" #include "abcMajorMinor.h" #include "abcWriteVcf.h" +#include "aio.h" + void abcWriteVcf::printArg(FILE *argFile){ fprintf(argFile,"------------------------\n%s:\n",__FILE__); fprintf(argFile,"\t-doVcf\t%d\n",doVcf); diff --git a/aio.cpp b/aio.cpp new file mode 100644 index 0000000..d002416 --- /dev/null +++ b/aio.cpp @@ -0,0 +1,118 @@ +#include +#include +#include "aio.h" + +int aio::fexists(const char* str){///@param str Filename given as a string. + struct stat buffer ; + return (stat(str, &buffer )==0 ); /// @return Function returns 1 if file exists. +} + +size_t aio::fsize(const char* fname){ + struct stat st ; + stat(fname,&st); + return st.st_size; +} + +std::vector dumpedFiles;//small hack for getting a nice vector of outputfiles +FILE *aio::openFile(const char* a,const char* b){ + if(0) + fprintf(stderr,"[%s] %s %s",__FUNCTION__,a,b); + char *c = new char[strlen(a)+strlen(b)+1]; + strcpy(c,a); + strncat(c,b,strlen(b)); + // fprintf(stderr,"\t-> Dumping file: %s\n",c); + dumpedFiles.push_back(strdup(c)); + FILE *fp = NULL; + fp = fopen(c,"w"); + if(fp==NULL){ + fprintf(stderr,"\t-> Problem opening file: \'%s\' check permissions\n",c); + exit(0); + } + delete [] c; + return fp; +} + +BGZF *aio::openFileBG(const char* a,const char* b){ + + char *c = new char[strlen(a)+strlen(b)+1]; + strcpy(c,a); + strncat(c,b,strlen(b)); + dumpedFiles.push_back(strdup(c)); + BGZF *fp = bgzf_open(c,"w6h"); + delete [] c; + return fp; +} + +htsFile *aio::openFileHts(const char* a,const char* b){ + + char *c = new char[strlen(a)+strlen(b)+1]; + strcpy(c,a); + strncat(c,b,strlen(b)); + dumpedFiles.push_back(strdup(c)); + htsFile *fp = hts_open(c,"w"); + delete [] c; + return fp; +} + +FILE *aio::getFILE(const char*fname,const char* mode){ + int writeFile = 0; + for(size_t i=0;i Error opening FILE handle for file:%s exiting\n",fname); + exit(0); + } + return fp; +} + +//checks that newer is newer than older +int aio::isNewer(const char *newer,const char *older){ + if (strstr(older, "ftp://") == older || strstr(older, "http://") == older) + return 0; + // fprintf(stderr,"newer:%s older:%s\n",newer,older); + // return 0; + struct stat one; + struct stat two; + stat(newer, &one ); + stat(older, &two ); + + return one.st_mtime>=two.st_mtime; +} + +ssize_t aio::bgzf_write(BGZF *fp, const void *data, size_t length){ + if(length>0) + return ::bgzf_write(fp,data,length); + return 0; +} + + +int aio::tgets(gzFile gz,char**buf,int *l){ + int rlen = 0; + neverUseGoto: + char *tok = gzgets(gz,*buf+rlen,*l-rlen); + if(!tok) + return rlen; + int tmp = tok?strlen(tok):0; + if(tok[tmp-1]!='\n'){ + rlen += tmp; + *l *= 2; + *buf = (char*) realloc(*buf,*l); + goto neverUseGoto; + } + rlen += tmp; + return rlen; +} + + +//kputw-kputc-kputs breaks in newer versions of htslib +int aio::kputw(int c,kstring_t *s){ + return ksprintf(s,"%d",c); +} +int aio::kputc(char c,kstring_t *s){ + return ksprintf(s,"%c",c); +} +int aio::kputs(char *c,kstring_t *s){ + return ksprintf(s,"%s",c); +} diff --git a/aio.h b/aio.h new file mode 100644 index 0000000..7ea466f --- /dev/null +++ b/aio.h @@ -0,0 +1,21 @@ +#include +#include +#include +#include +#include +//angsd io +namespace aio{ + size_t fsize(const char* fname); + int fexists(const char* str);//{///@param str Filename given as a string. + FILE *openFile(const char* a,const char* b); + FILE *getFILE(const char*fname,const char* mode); + BGZF *openFileBG(const char* a,const char* b); + htsFile *openFileHts(const char * a, const char*b); + int isNewer(const char *newer,const char *older); + ssize_t bgzf_write(BGZF *fp, const void *data, size_t length); + int tgets(gzFile gz,char**buf,int *l); + //kputw breaks in newer versions of htslib + int kputw(int c,kstring_t *s); + int kputc(char c,kstring_t *s); + int kputs(char *c,kstring_t *s); +} diff --git a/analysisFunction.cpp b/analysisFunction.cpp index 8ca130e..4c8b5d8 100644 --- a/analysisFunction.cpp +++ b/analysisFunction.cpp @@ -1,6 +1,3 @@ - - - #include #include #include @@ -10,118 +7,7 @@ #include #include #include "analysisFunction.h" - - -int angsd::getArg(const char* argName,int type,argStruct *arguments){ - - int argPos = 1; - while(argPos argc){ - if (strcasecmp(arguments->argv[argPos],argName)==0){ - if(arguments->argc==2) - return(-999); - // fprintf(stderr,"HIT %s vs %s\n",argName,arguments->argv[argPos]); - arguments->usedArgs[argPos]=1; - arguments->usedArgs[argPos+1]=1; - if(argPos==arguments->argc-1){ - fprintf(stderr,"\t-> Must supply a parameter for: %s\n",argName); - exit(0); - } - return(atoi(arguments->argv[argPos+1])); - } - argPos++; - } - return(type); -} - -char* angsd::getArg(const char* argName,char* type,argStruct *arguments){ - ///fprintf(stderr,"pre %p %s \n",type,argName); - int argPos = 1; - while(argPos argc){ - if (strcasecmp(arguments->argv[argPos],argName)==0){ - if(arguments->argc==2){ - return(strdup("-999")); - } - arguments->usedArgs[argPos]=1; - arguments->usedArgs[argPos+1]=1; - if(argPos==arguments->argc-1){ - fprintf(stderr,"\t-> Must supply a parameter for: %s\n",argName); - exit(0); - } - return(strdup(arguments->argv[argPos+1])); //VALGRIND says leak. don't care very small DRAGON - } - argPos++; - } - // fprintf(stderr,"post %p\n",type); - return(type); - -} - - -char* angsd::getArg(const char* argName, const char* type,argStruct *arguments){ - //fprintf(stderr,"pre %p %s \n",type,argName); - int argPos = 1; - while(argPos argc){ - if (strcasecmp(arguments->argv[argPos],argName)==0){ - if(arguments->argc==2){ - return(strdup("-999")); - } - arguments->usedArgs[argPos]=1; - arguments->usedArgs[argPos+1]=1; - if(argPos==arguments->argc-1){ - fprintf(stderr,"\t-> Must supply a parameter for: %s\n",argName); - exit(0); - } - return(strdup(arguments->argv[argPos+1])); //VALGRIND says leak. don't care very small DRAGON - } - argPos++; - } - //assert(0==1); - // fprintf(stderr,"post %p\n",type); - return NULL; - -} - - - -float angsd::getArg(const char* argName,float type,argStruct *arguments){ - int argPos = 1; - while(argPos argc){ - if (strcasecmp(arguments->argv[argPos],argName)==0){ - if(arguments->argc==2) - return(-999); - arguments->usedArgs[argPos]=1; - arguments->usedArgs[argPos+1]=1; - if(argPos==arguments->argc-1){ - fprintf(stderr,"\t-> Must supply a parameter for: %s\n",argName); - exit(0); - } - return(atof(arguments->argv[argPos+1])); - } - argPos++; - } - return(type); -} - -double angsd::getArg(const char* argName,double type,argStruct *arguments){ - int argPos = 1; - while(argPos argc){ - if (strcasecmp(arguments->argv[argPos],argName)==0){ - if(arguments->argc==2) - return(-999); - arguments->usedArgs[argPos]=1; - arguments->usedArgs[argPos+1]=1; - if(argPos==arguments->argc-1){ - fprintf(stderr,"\t-> Must supply a parameter for: %s\n",argName); - exit(0); - } - return(atof(arguments->argv[argPos+1])); - } - argPos++; - } - return(type); -} - - +#include "aio.h" double angsd::addProtect2(double a,double b){ //function does: log(exp(a)+exp(b)) while protecting for underflow @@ -556,10 +442,11 @@ double **angsd::get3likesRescale(funkyPars *pars){ if(pars->keepSites[s]==0) continue; for(int i=0;inInd;i++){ - // fprintf(stderr,"refid: %d posi:%d pars->major:%p\n",pars->refId,pars->posi[s]+1,pars->major); - //fprintf(stderr,"mm: %d\t%d\n",pars->major[s],pars->major[s]); - // fprintf(stderr,"%d\t%d\t%c\t%c\t",pars->refId,pars->posi[s]+1,intToRef[pars->major[s]],intToRef[pars->minor[s]]); - + /*( + fprintf(stderr,"refid: %d posi:%d pars->major:%p\n",pars->refId,pars->posi[s]+1,pars->major); + fprintf(stderr,"mm: %d\t%d\n",pars->major[s],pars->major[s]); + fprintf(stderr,"%d\t%d\t%c\t%c\t",pars->refId,pars->posi[s]+1,intToRef[pars->major[s]],intToRef[pars->minor[s]]); + */ loglike[s][i*3+0]=pars->likes[s][i*10+angsd::majorminor[pars->major[s]][pars->major[s]]]; loglike[s][i*3+1]=pars->likes[s][i*10+angsd::majorminor[pars->major[s]][pars->minor[s]]]; loglike[s][i*3+2]=pars->likes[s][i*10+angsd::majorminor[pars->minor[s]][pars->minor[s]]]; @@ -761,52 +648,6 @@ void angsd::logrescale(double *ary,int len){ } - -std::vector angsd::getFilenames(const char * name,int nInd){ - if(strchr(name,'\r')){ - fprintf(stderr,"\t\t-> Filelist contains carriage return. Looks like a windows file please remove hidden \'\r\' from filelist\n"); - exit(0); - } - - if(!fexists(name)){ - fprintf(stderr,"[%s]\t-> Problems opening file: %s\n",__FUNCTION__,name); - exit(0); - } - const char* delims = " \t"; - std::vector ret; - std::ifstream pFile(name,std::ios::in); - - char buffer[LENS]; - while(!pFile.eof()){ - pFile.getline(buffer,LENS); - char *tok = strtok(buffer,delims); - while(tok!=NULL){ - if(tok[0]!='#') - ret.push_back(strdup(buffer)); - tok = strtok(NULL,delims); - } - } - if(nInd>0) { - if(ret.size() Number of samples is smaller than subset requested %lu vs %d\n",ret.size(),nInd); - else{ - // fprintf(stderr,"\t-> Will remove tail of filename list\n"); - for(int ii=nInd;ii filename list now contains only: %lu\n",ret.size()); - } -#if 0 - for(size_t ii=0;ii%s\n",ii,ret[ii]); - fprintf(stderr,"\n"); -#endif - } - - return ret; -} - - void print_array(FILE *fp,double *ary,int len){ for(int i=0;i dumpedFiles;//small hack for getting a nice vector of outputfiles -FILE *aio::openFile(const char* a,const char* b){ - if(0) - fprintf(stderr,"[%s] %s %s",__FUNCTION__,a,b); - char *c = new char[strlen(a)+strlen(b)+1]; - strcpy(c,a); - strncat(c,b,strlen(b)); - // fprintf(stderr,"\t-> Dumping file: %s\n",c); - dumpedFiles.push_back(strdup(c)); - FILE *fp = NULL; - fp = fopen(c,"w"); - if(fp==NULL){ - fprintf(stderr,"\t-> Problem opening file: \'%s\' check permissions\n",c); - exit(0); - } - delete [] c; - return fp; -} - -BGZF *aio::openFileBG(const char* a,const char* b){ - - char *c = new char[strlen(a)+strlen(b)+1]; - strcpy(c,a); - strncat(c,b,strlen(b)); - dumpedFiles.push_back(strdup(c)); - BGZF *fp = bgzf_open(c,GZOPT); - delete [] c; - return fp; -} - -htsFile *aio::openFileHts(const char* a,const char* b){ - - char *c = new char[strlen(a)+strlen(b)+1]; - strcpy(c,a); - strncat(c,b,strlen(b)); - dumpedFiles.push_back(strdup(c)); - htsFile *fp = hts_open(c,"w"); - delete [] c; - return fp; -} - -FILE *aio::getFILE(const char*fname,const char* mode){ - int writeFile = 0; - for(size_t i=0;i Error opening FILE handle for file:%s exiting\n",fname); - exit(0); - } - return fp; -} - -//checks that newer is newer than older -int aio::isNewer(const char *newer,const char *older){ - if (strstr(older, "ftp://") == older || strstr(older, "http://") == older) - return 0; - // fprintf(stderr,"newer:%s older:%s\n",newer,older); - // return 0; - struct stat one; - struct stat two; - stat(newer, &one ); - stat(older, &two ); - - return one.st_mtime>=two.st_mtime; -} - -ssize_t aio::bgzf_write(BGZF *fp, const void *data, size_t length){ - if(length>0) - return ::bgzf_write(fp,data,length); - return 0; -} - void angsd::norm(double *d,size_t len){ double ts=0; for(int i=0;i getFilenames(const char * name,int nInd); char *strpop(char **str,char split); int getRandomCount(suint *d, int i, int depth = -1); int getMaxCount(suint *d, int i, int depth = -1); @@ -126,22 +120,7 @@ namespace angsd { } -//angsd io -namespace aio{ - size_t fsize(const char* fname); - int fexists(const char* str);//{///@param str Filename given as a string. - FILE *openFile(const char* a,const char* b); - FILE *getFILE(const char*fname,const char* mode); - BGZF *openFileBG(const char* a,const char* b); - htsFile *openFileHts(const char * a, const char*b); - int isNewer(const char *newer,const char *older); - ssize_t bgzf_write(BGZF *fp, const void *data, size_t length); - int tgets(gzFile gz,char**buf,int *l); - //kputw breaks in newer versions of htslib - int kputw(int c,kstring_t *s); - int kputc(char c,kstring_t *s); - int kputs(char *c,kstring_t *s); -} + #endif diff --git a/angsd.cpp b/angsd.cpp index 81eeceb..8e933b7 100644 --- a/angsd.cpp +++ b/angsd.cpp @@ -18,6 +18,7 @@ #include #include "cigstat.h" #include "shared.h" +#include "argStruct.h" #include "multiReader.h" @@ -175,44 +176,40 @@ void parseArgStruct(argStruct *arguments){ } int main(int argc, char** argv){ - fprintf(stderr,"\t-> angsd version: %s (htslib: %s) build(%s %s)\n",ANGSD_VERSION,hts_version(),__DATE__,__TIME__); + if(!strcasecmp("sites",argv[1])){ + //from prep_sites.* used for indexing -sites files + int main_sites(int argc,char **argv); + main_sites(--argc,++argv); + return 0; + } + + argStruct *args=setArgStruct(argc,argv); + //no arguments supplied -> print info if(argc==1||(argc==2&&(strcasecmp(argv[1],"--version")==0||strcasecmp(argv[1],"--help")==0))){//if haven't been supplied with arguments, load default,print, and exit printProgInfo(stderr); return 0; } - - if(!strcasecmp("sites",argv[1])){ - //from prep_sites.* used for indexing -sites files - int main_sites(int argc,char **argv); - main_sites(--argc,++argv); - return 0; - } + - //print time - clock_t t=clock(); - time_t t2=time(NULL); + //print time + clock_t t=clock(); + time_t t2=time(NULL); //intialize our signal handler for ctrl+c catchkill(); - - argStruct *args=NULL; + if(argc==2){ fprintf(stderr,"\t-> Analysis helpbox/synopsis information:\n"); - multiReader mr(argc,argv); - args = mr.getargs(); - - init(args);//program dies here after printing info, if a match is found + multiReader mr(args); + init((void*)args);//program dies here after printing info, if a match is found,void* to avoid circular .h inclusion fprintf(stderr,"\nUnknown argument supplied: \'%s\'\n\n",argv[1]); printProgInfo(stderr); exit(0);//important otherwise the abc classes will try to clean up, which doesnt make sense in this context // return 0; } - multiReader *mr= new multiReader(argc,argv); - args = mr->getargs(); - - + multiReader *mr= new multiReader(args); init(args); parseArgStruct(args); @@ -279,5 +276,6 @@ int main(int argc, char** argv){ //check extern htsFormat *dingding2; free(dingding2); + destroy_argStruct(args); return 0; } diff --git a/argStruct.cpp b/argStruct.cpp new file mode 100644 index 0000000..0fd9f7d --- /dev/null +++ b/argStruct.cpp @@ -0,0 +1,411 @@ +#include +#include //for checking if output dir exists 'dirname' +#include +#include "version.h" +#include "argStruct.h" +#include "aio.h" + +#define ARGS ".arg" +#define LENS 10000 + +void whatIsTheInput(int type){ + switch(type){ + case INPUT_BAM: + fprintf(stderr,"\t-> Inputtype is BAM/CRAM\n"); + break; + case INPUT_GLF: + fprintf(stderr,"\t-> Inputtype is GLF\n"); + break; + case INPUT_BEAGLE: + fprintf(stderr,"\t-> Inputtype is beagle\n"); + break; + case INPUT_PILEUP: + fprintf(stderr,"\t-> Inputtype is pileup\n"); + break; + case INPUT_VCF_GP: + fprintf(stderr,"\t-> Inputtype is vcf/bcf\n"); + break; + case INPUT_VCF_GL: + fprintf(stderr,"\t-> Inputtype is vcf/bcf\n"); + break; + case INPUT_GLF10_TEXT: + fprintf(stderr,"\t-> Inputtype is GLF10 text\n"); + break; + default:{ + fprintf(stderr,"\t-> Unknown input type\n"); + exit(0); + } + } +} + +void setInputType(argStruct *args){ +#if 0 + fprintf(stderr,"bam:%d\n",INPUT_BAM); + fprintf(stderr,"glf:%d\n",INPUT_GLF); + fprintf(stderr,"glf3:%d\n",INPUT_GLF3); + fprintf(stderr,"blg:%d\n",INPUT_BEAGLE); + fprintf(stderr,"plp:%d\n",INPUT_PILEUP); + fprintf(stderr,"vcf_gp:%d\n",INPUT_VCF_GP); + fprintf(stderr,"vcf_gl:%d\n",INPUT_VCF_GL); + fprintf(stderr,"glf10_text:%d\n",INPUT_GLF10_TEXT); +#endif + if(args->fai){ + free(args->fai); + args->fai=NULL; + } + args->fai = angsd::getArg("-fai",args->fai,args); + char *tmp =NULL; + tmp = angsd::getArg("-glf",tmp,args); + if(tmp!=NULL){ + args->inputtype=INPUT_GLF; + args->infile = tmp; + args->nams.push_back(strdup(args->infile)); + return; + } + free(tmp); + tmp = NULL; + tmp = angsd::getArg("-glf3",tmp,args); + if(tmp!=NULL){ + args->inputtype=INPUT_GLF3; + args->infile = tmp; + args->nams.push_back(strdup(args->infile)); + return; + } + free(tmp); + tmp = NULL; + tmp = angsd::getArg("-glf10_text",tmp,args); + if(tmp!=NULL){ + args->inputtype=INPUT_GLF10_TEXT; + args->infile = tmp; + args->nams.push_back(strdup(args->infile)); + return; + } + free(tmp); + tmp=NULL; + tmp = angsd::getArg("-vcf-GP",tmp,args); + if(tmp!=NULL){ + args->inputtype=INPUT_VCF_GP; + args->infile = tmp; + args->nams.push_back(strdup(args->infile)); + fprintf(stderr,"\t-> -vcf-gp has been removed from current version, will be included in later versions depending on need\n"); + exit(0); + return; + } + free(tmp); + tmp=NULL; + tmp = angsd::getArg("-vcf-GL",tmp,args); + if(tmp!=NULL){ + args->inputtype=INPUT_VCF_GL; + args->infile = tmp; + args->nams.push_back(strdup(args->infile)); + return; + } + free(tmp); + tmp=NULL; + tmp = angsd::getArg("-vcf-PL",tmp,args); + if(tmp!=NULL){ + args->inputtype=INPUT_VCF_GL; + args->infile = tmp; + args->nams.push_back(strdup(args->infile)); + return; + } + free(tmp); + tmp=NULL; + tmp = angsd::getArg("-pileup",tmp,args); + if(tmp!=NULL){ + args->inputtype=INPUT_PILEUP; + args->infile = tmp; + args->nams.push_back(strdup(args->infile)); + return; + } + free(tmp); + tmp=NULL; + tmp = angsd::getArg("-beagle",tmp,args); + if(tmp!=NULL){ + args->inputtype=INPUT_BEAGLE; + args->infile = tmp; + args->nams.push_back(strdup(args->infile)); + return; + } + free(tmp); + tmp=NULL; + tmp = angsd::getArg("-i",tmp,args); + if(tmp!=NULL){ + args->inputtype=INPUT_BAM; + args->infile = tmp; + args->nams.push_back(strdup(args->infile)); + return; + } + int nInd = 0; + nInd = angsd::getArg("-nInd",nInd,args); + free(tmp); + tmp=NULL; + tmp = angsd::getArg("-bam",tmp,args); + tmp = angsd::getArg("-b",tmp,args); + if(tmp!=NULL && args->argc>2){ + args->inputtype=INPUT_BAM; + args->infile = tmp; + args->nams = angsd::getFilenames(args->infile,nInd); + + return; + } + if(args->inputtype==INPUT_BAM){ + + + } +} + +void checkIfDir(char *fname){ + char *dirNam2 = strdup(fname); + char *dirNam=dirname(dirNam2); + if(strlen(dirNam)>1 &&!aio::fexists(dirNam)){ + fprintf(stderr,"\t Folder: \'%s\' doesn't exist, please create\n",dirNam); + exit(0); + } + free(dirNam2); +} + +argStruct *setArgStruct(int argc,char **argv) { + + argStruct *arguments = new argStruct; + arguments->cmdline = NULL; + arguments->version = NULL; + arguments->argumentFile = stderr; + arguments->outfiles =NULL; + arguments->fai=arguments->anc=arguments->ref=NULL; + arguments->hd=NULL; + arguments->revMap= NULL; + arguments->argc=argc; + arguments->argv=argv; + arguments->nReads = 50; + arguments->sm=NULL; + arguments->sm=bam_smpl_init(); + assert(arguments->sm); + + arguments->usedArgs= new int[argc+1];//well here we allocate one more than needed, this is only used in the ./angsd -beagle version + for(int i=0;iusedArgs[i]=0; + + arguments->inputtype=-1; + arguments->infile = NULL; + arguments->show =0; + arguments->fai = NULL; + + kstring_t kstr; + kstr.s=NULL;kstr.l=kstr.m=0; + ksprintf(&kstr,"angsd version: %s (htslib: %s) build(%s %s)\n",ANGSD_VERSION,hts_version(),__DATE__,__TIME__); + arguments->version = strdup(kstr.s); + kstr.l=0; + time_t rawtime; + struct tm * timeinfo; + time ( &rawtime ); + timeinfo = localtime ( &rawtime ); + ksprintf (&kstr, "%s", asctime (timeinfo) ); + arguments->datetime = strdup(kstr.s); + kstr.l = 0; + for(int i=0;icmdline = kstr.s; + fprintf(stderr,"\t-> %s",arguments->version); + fprintf(stderr,"\t-> %s\n",arguments->cmdline); + //check output filename + arguments->outfiles = angsd::getArg("-out",arguments->outfiles,arguments); + if(arguments->outfiles==NULL){ + if(argc!=2) + fprintf(stderr,"\t-> No \'-out\' argument given, output files will be called \'angsdput\'\n"); + arguments->outfiles = strdup("angsdput"); + } + checkIfDir(arguments->outfiles); + + if(argc<=2) + return arguments; + //print arguments into logfile + if(argc>2) + arguments->argumentFile=aio::openFile(arguments->outfiles,ARGS); + fprintf(arguments->argumentFile,"\t-> %s",arguments->version); + fprintf(arguments->argumentFile,"\t-> %s\n",arguments->cmdline); + setInputType(arguments); + whatIsTheInput(arguments->inputtype); + return arguments; +} + + + +int angsd::getArg(const char* argName,int type,argStruct *arguments){ + + int argPos = 1; + while(argPos argc){ + if (strcasecmp(arguments->argv[argPos],argName)==0){ + if(arguments->argc==2) + return(-999); + // fprintf(stderr,"HIT %s vs %s\n",argName,arguments->argv[argPos]); + arguments->usedArgs[argPos]=1; + arguments->usedArgs[argPos+1]=1; + if(argPos==arguments->argc-1){ + fprintf(stderr,"\t-> Must supply a parameter for: %s\n",argName); + exit(0); + } + return(atoi(arguments->argv[argPos+1])); + } + argPos++; + } + return(type); +} + +char* angsd::getArg(const char* argName,char* type,argStruct *arguments){ + ///fprintf(stderr,"pre %p %s \n",type,argName); + int argPos = 1; + while(argPos argc){ + if (strcasecmp(arguments->argv[argPos],argName)==0){ + if(arguments->argc==2){ + return(strdup("-999")); + } + arguments->usedArgs[argPos]=1; + arguments->usedArgs[argPos+1]=1; + if(argPos==arguments->argc-1){ + fprintf(stderr,"\t-> Must supply a parameter for: %s\n",argName); + exit(0); + } + return(strdup(arguments->argv[argPos+1])); //VALGRIND says leak. don't care very small DRAGON + } + argPos++; + } + // fprintf(stderr,"post %p\n",type); + return(type); + +} + + +char* angsd::getArg(const char* argName, const char* type,argStruct *arguments){ + //fprintf(stderr,"pre %p %s \n",type,argName); + int argPos = 1; + while(argPos argc){ + if (strcasecmp(arguments->argv[argPos],argName)==0){ + if(arguments->argc==2){ + return(strdup("-999")); + } + arguments->usedArgs[argPos]=1; + arguments->usedArgs[argPos+1]=1; + if(argPos==arguments->argc-1){ + fprintf(stderr,"\t-> Must supply a parameter for: %s\n",argName); + exit(0); + } + return(strdup(arguments->argv[argPos+1])); //VALGRIND says leak. don't care very small DRAGON + } + argPos++; + } + //assert(0==1); + // fprintf(stderr,"post %p\n",type); + return NULL; + +} + + + +float angsd::getArg(const char* argName,float type,argStruct *arguments){ + int argPos = 1; + while(argPos argc){ + if (strcasecmp(arguments->argv[argPos],argName)==0){ + if(arguments->argc==2) + return(-999); + arguments->usedArgs[argPos]=1; + arguments->usedArgs[argPos+1]=1; + if(argPos==arguments->argc-1){ + fprintf(stderr,"\t-> Must supply a parameter for: %s\n",argName); + exit(0); + } + return(atof(arguments->argv[argPos+1])); + } + argPos++; + } + return(type); +} + +double angsd::getArg(const char* argName,double type,argStruct *arguments){ + int argPos = 1; + while(argPos argc){ + if (strcasecmp(arguments->argv[argPos],argName)==0){ + if(arguments->argc==2) + return(-999); + arguments->usedArgs[argPos]=1; + arguments->usedArgs[argPos+1]=1; + if(argPos==arguments->argc-1){ + fprintf(stderr,"\t-> Must supply a parameter for: %s\n",argName); + exit(0); + } + return(atof(arguments->argv[argPos+1])); + } + argPos++; + } + return(type); +} + + + +std::vector angsd::getFilenames(const char * name,int nInd){ + if(strchr(name,'\r')){ + fprintf(stderr,"\t\t-> Filelist contains carriage return. Looks like a windows file please remove hidden \'\r\' from filelist\n"); + exit(0); + } + + if(!aio::fexists(name)){ + fprintf(stderr,"[%s]\t-> Problems opening file: %s\n",__FUNCTION__,name); + exit(0); + } + const char* delims = " \t"; + std::vector ret; + std::ifstream pFile(name,std::ios::in); + + char buffer[LENS]; + while(!pFile.eof()){ + pFile.getline(buffer,LENS); + char *tok = strtok(buffer,delims); + while(tok!=NULL){ + if(tok[0]!='#') + ret.push_back(strdup(buffer)); + tok = strtok(NULL,delims); + } + } + if(nInd>0) { + if(ret.size() Number of samples is smaller than subset requested %lu vs %d\n",ret.size(),nInd); + else{ + // fprintf(stderr,"\t-> Will remove tail of filename list\n"); + for(int ii=nInd;ii filename list now contains only: %lu\n",ret.size()); + } +#if 0 + for(size_t ii=0;ii%s\n",ii,ret[ii]); + fprintf(stderr,"\n"); +#endif + } + + return ret; +} + + void destroy_argStruct(argStruct *args){ + if(args->cmdline) + free(args->cmdline); + if(args->version) + free(args->version); + if(args->datetime) + free(args->datetime); + if(args->sm) + bam_smpl_destroy(args->sm); + + for(unsigned i=0;inams.size();i++) + free(args->nams[i]); + if(args->fai) + free(args->fai); + delete [] args->usedArgs; + free(args->outfiles); + free(args->infile); + if(args->anc) + free(args->anc); + bam_hdr_destroy(args->hd); + if(args->argumentFile!=stderr) fclose(args->argumentFile); + delete args; + } diff --git a/argStruct.h b/argStruct.h index 6b3a8a7..41be0bc 100644 --- a/argStruct.h +++ b/argStruct.h @@ -5,7 +5,11 @@ #include #include #include + #include "sample.h" + +enum{INPUT_BAM,INPUT_GLF,INPUT_GLF3,INPUT_BEAGLE,INPUT_PILEUP,INPUT_VCF_GL,INPUT_VCF_GP,INPUT_GLF10_TEXT}; + //little struct for keeping information of regions to extract typedef struct{ int refID; @@ -13,6 +17,17 @@ typedef struct{ int stop; }regs; +typedef struct{ + htsFile *fp; + char *fn; + bam_hdr_t *hdr; + int isEOF; + int regionDone; + hts_idx_t *idx; + hts_itr_t *itr; + regs regions; +}bufReader; + struct ltstr { bool operator()(const char* s1, const char* s2) const @@ -45,4 +60,21 @@ typedef struct{ bam_sample_t *sm;//for dealing with readgroups char *ref; char *anc; + char *cmdline; + char *version; + bufReader *rd; + char *datetime; }argStruct; + +void destroy_argStruct(argStruct *arg); +argStruct *setArgStruct(int argc,char **argv); + +namespace angsd { + int getArg(const char* argName,int type,argStruct *arguments); + float getArg(const char* argName,float type,argStruct *arguments); + char* getArg(const char* argName,char* type,argStruct *arguments); + char* getArg(const char* argName,const char* type,argStruct *arguments); + double getArg(const char* argName,double type,argStruct *arguments); + std::vector getFilenames(const char * name,int nInd); +} + diff --git a/bammer_main.cpp b/bammer_main.cpp index 789b40f..06e9338 100644 --- a/bammer_main.cpp +++ b/bammer_main.cpp @@ -14,7 +14,7 @@ #include "abcGetFasta.h" #include "analysisFunction.h" #include "sample.h" - +#include "aio.h" extern int SIG_COND; #define bam_nt16_rev_table seq_nt16_str @@ -220,27 +220,21 @@ void printReg(FILE *fp,std::vector ®ions){ extern abc **allMethods; abcGetFasta *gf=NULL; -int bammer_main(argStruct *args){ +void init_bamreaders(argStruct *args){ - gf=(abcGetFasta *) allMethods[1]; - //read bamfiles - extern int checkBamHeaders; - extern int doCheck; - extern char *fai_fname; - args->sm = NULL; - args->sm=bam_smpl_init(); - assert(args->sm); - bufReader *rd = initializeBufReaders2(args->nams,checkBamHeaders,doCheck,fai_fname,args->sm); - fprintf(stderr, "[%s] %d samples in %lu input files\n", __func__, args->sm->n, args->nams.size()); +} + + +int bammer_main(argStruct *args){ + gf=(abcGetFasta *) allMethods[1]; extern int maxThreads; - - uppile(args->show,maxThreads,rd,args->nReads,args->nams.size(),args->regions,gf); + uppile(args->show,maxThreads,args->rd,args->nReads,args->nams.size(),args->regions,gf); //cleanup stuff for(unsigned i=0;inams.size();i++) - dalloc_bufReader(rd[i]); + dalloc_bufReader(args->rd[i]); - delete [] rd; + delete [] args->rd; return 0; } diff --git a/beagleReader.cpp b/beagleReader.cpp index 0a75988..f7542bd 100644 --- a/beagleReader.cpp +++ b/beagleReader.cpp @@ -2,6 +2,7 @@ #include #include "analysisFunction.h" #include "beagleReader.h" +#include "aio.h" beagle_reader::~beagle_reader(){ free(buffer); diff --git a/cigstat.cpp b/cigstat.cpp index 7356215..3359761 100644 --- a/cigstat.cpp +++ b/cigstat.cpp @@ -6,6 +6,8 @@ #include #include "analysisFunction.h" #include "cigstat.h" +#include "aio.h" + #define max_rlen 1024 size_t **cig_counts = NULL; diff --git a/glfReader_text.cpp b/glfReader_text.cpp index 9f8d728..44feed1 100644 --- a/glfReader_text.cpp +++ b/glfReader_text.cpp @@ -1,6 +1,7 @@ #include #include "analysisFunction.h" #include "glfReader_text.h" +#include "aio.h" glfReader_text::~glfReader_text(){ free(buffer); diff --git a/mUpPile.cpp b/mUpPile.cpp index cb27fb8..d3979f2 100644 --- a/mUpPile.cpp +++ b/mUpPile.cpp @@ -12,7 +12,7 @@ #include "pop1_read.h" #define USE_MALLOC_WRAPPERS #include "malloc_wrap.h" - +#include "aio.h" #include "pooled_alloc.h" void *tail=NULL;//<- this will be point to the pool->free diff --git a/makeReadPool.h b/makeReadPool.h index db66c5f..b261a12 100644 --- a/makeReadPool.h +++ b/makeReadPool.h @@ -20,17 +20,6 @@ typedef struct{ }readPool; -typedef struct{ - htsFile *fp; - char *fn; - bam_hdr_t *hdr; - int isEOF; - int regionDone; - hts_idx_t *idx; - hts_itr_t *itr; - regs regions; -}bufReader; - readPool makePoolb(int l); void dalloc (readPool *ret); diff --git a/misc/Makefile b/misc/Makefile index 06722a4..a8d53e2 100644 --- a/misc/Makefile +++ b/misc/Makefile @@ -105,7 +105,7 @@ multisafreader.hpp: Matrix.hpp realSFS_shared.o realSFS: realSFS.cpp safreader.o keep.hpp safstat.o realSFS_args.o header.o fstreader.o safcat.o multisafreader.hpp realSFS_optim.o realSFS_shared.o realSFS_dadi.o - $(CXX) $(FLAGS) realSFS.cpp -o realSFS safreader.o realSFS_args.o safstat.o fstreader.o header.o safcat.o realSFS_optim.o realSFS_shared.o ../prep_sites.o ../analysisFunction.o ../chisquare.o realSFS_dadi.o $(LIBS) -lz -lpthread + $(CXX) $(FLAGS) realSFS.cpp -o realSFS safreader.o realSFS_args.o safstat.o fstreader.o header.o safcat.o realSFS_optim.o realSFS_shared.o ../prep_sites.o ../aio.o ../analysisFunction.o ../chisquare.o realSFS_dadi.o $(LIBS) -lz -lpthread hmm_psmc.o: hmm_psmc.cpp hmm_psmc.h compute.c $(CXX) $(FLAGS) -c hmm_psmc.cpp diff --git a/misc/header.h b/misc/header.h index c820f98..00c95a7 100644 --- a/misc/header.h +++ b/misc/header.h @@ -1,9 +1,10 @@ #pragma once -#include "../prep_sites.h" + #include #include #include #include +#include "../prep_sites.h" void normalize(double *tmp,size_t len); size_t fsize(const char* fname); diff --git a/misc/realSFS.cpp b/misc/realSFS.cpp index a31a51e..b9b0cb8 100644 --- a/misc/realSFS.cpp +++ b/misc/realSFS.cpp @@ -1,5 +1,4 @@ /* - The functionality of this file, has replaced the old emOptim and testfolded.c programs. part of ANGSD @@ -16,7 +15,7 @@ april 20, removed 2dsfs as special scenario april 20, split out the safreader into seperate cpp/h may 5, seems to work well now - */ +*/ #include #include @@ -40,7 +39,7 @@ int SIG_COND =1; int howOften =5e6;//how often should we print out (just to make sure something is happening) #include "multisafreader.hpp" - +#include "../aio.h" int really_kill =3; int VERBOSE = 1; /* @@ -730,7 +729,31 @@ int saf2theta(int argc,char**argv){ double workarray[nChr+1]; for(int i=0;imat is float lets pluginto double workarray[i] = gls[0]->mat[s][i]; - + if(arg->fold){ + if((nChr+1) %2 ){ + // fprintf(stderr,"\t-> Folding odd number of categories\n"); + int first=0; + int last=nChr; + while(first Folding even number of categories\n"); + int first=0; + int last=nChr; + while(first<=last){ + //fprintf(stderr,"first: %d last:%d\n",first,last); + workarray[first] = log(exp(workarray[first]) + exp(workarray[last]));//<- should be divided by two but this is a constant + first++; + last--; + } + } + } //calculate post probs double tsum =exp(workarray[0] + prior[0]); for(int i=1;i #include #include "pooled_alloc.h" +#include "mpileup.h" +#include "mUpPile.h" +#include "aio.h" + extern tpool_alloc_t *tnodes; mpileup::mpileup(int nInd_a,gzFile gz_a,const aMap* revMap_a,int minQ_a){ diff --git a/multiReader.cpp b/multiReader.cpp index 5106f5d..35fcc7d 100644 --- a/multiReader.cpp +++ b/multiReader.cpp @@ -1,4 +1,3 @@ -#include //for checking if output dir exists 'dirname' #include #include "abc.h" #include "shared.h" @@ -6,21 +5,7 @@ #include "parseArgs_bambi.h" #include "version.h" #include "sample.h" - -#define ARGS ".arg" -void checkIfDir(char *fname){ - char *dirNam2 = strdup(fname); - char *dirNam=dirname(dirNam2); - - if(strlen(dirNam)>1 &&!aio::fexists(dirNam)){ - fprintf(stderr,"\t Folder: \'%s\' doesn't exist, please create\n",dirNam); - exit(0); - } - //free(dirNam); VALGRIND on osx wants this line - free(dirNam2); - -} - +#include "aio.h" bam_hdr_t *getHeadFromFai(const char *fname){ std::vector chrs; @@ -58,37 +43,6 @@ bam_hdr_t *getHeadFromFai(const char *fname){ return ret; } -/* -bam_hdr_t *bcf_hdr_2_bam_hdr_t (htsstuff *hs){ - bam_hdr_t *ret = bam_hdr_init(); - ret->l_text = 0; - ret->text =NULL; - const char **seqnames = NULL; - int nseq; - seqnames = bcf_hdr_seqnames(hs->hdr, &nseq); assert(seqnames); - for (int i=0; ihdr->nhrec; i++){ - bcf_hrec_t *hrec=hs->hdr->hrec[i]; - if(strcmp(hrec->key,"contig")==0){ - fprintf(stderr,"%d) hrec->value:%s key:%s\n",i,hrec->value,hrec->key); - for(int j=0;jnkeys;j++) - fprintf(stderr,"i:%d j:%d keys:%s vals:%s\n",i,j,hrec->keys[j],hrec->vals[j]); - } - } - exit(0); - - ret->n_targets = nseq; - ret->target_len = (uint32_t*) malloc(sizeof(uint32_t)*nseq); - ret->target_name = (char**) malloc(sizeof(char*)*nseq); - for(size_t i=0;itarget_len[i] =0x7fffffff;// strlen(seqnames[i]); - ret->target_name[i] =strdup(seqnames[i]); - } - free(seqnames); - return ret; -} -*/ - aMap *buildRevTable(const bam_hdr_t *hd){ assert(hd); aMap *ret = new aMap; @@ -100,173 +54,6 @@ aMap *buildRevTable(const bam_hdr_t *hd){ return ret; } - -void setInputType(argStruct *args){ -#if 0 - fprintf(stderr,"bam:%d\n",INPUT_BAM); - fprintf(stderr,"glf:%d\n",INPUT_GLF); - fprintf(stderr,"glf3:%d\n",INPUT_GLF3); - fprintf(stderr,"blg:%d\n",INPUT_BEAGLE); - fprintf(stderr,"plp:%d\n",INPUT_PILEUP); - fprintf(stderr,"vcf_gp:%d\n",INPUT_VCF_GP); - fprintf(stderr,"vcf_gl:%d\n",INPUT_VCF_GL); - fprintf(stderr,"glf10_text:%d\n",INPUT_GLF10_TEXT); - fprintf(stderr,"bgen:%d\n",INPUT_BGEN); -#endif - if(args->fai){ - free(args->fai); - args->fai=NULL; - } - args->fai = angsd::getArg("-fai",args->fai,args); - char *tmp =NULL; - tmp = angsd::getArg("-glf",tmp,args); - if(tmp!=NULL){ - args->inputtype=INPUT_GLF; - args->infile = tmp; - args->nams.push_back(strdup(args->infile)); - return; - } - free(tmp); - tmp = NULL; - tmp = angsd::getArg("-glf3",tmp,args); - if(tmp!=NULL){ - args->inputtype=INPUT_GLF3; - args->infile = tmp; - args->nams.push_back(strdup(args->infile)); - return; - } - free(tmp); - tmp = NULL; - tmp = angsd::getArg("-glf10_text",tmp,args); - if(tmp!=NULL){ - args->inputtype=INPUT_GLF10_TEXT; - args->infile = tmp; - args->nams.push_back(strdup(args->infile)); - return; - } - free(tmp); - tmp=NULL; - tmp = angsd::getArg("-vcf-GP",tmp,args); - if(tmp!=NULL){ - args->inputtype=INPUT_VCF_GP; - args->infile = tmp; - args->nams.push_back(strdup(args->infile)); - fprintf(stderr,"\t-> -vcf-gp has been removed from current version, will be included in later versions depending on need\n"); - exit(0); - return; - } - free(tmp); - tmp=NULL; - tmp = angsd::getArg("-vcf-GL",tmp,args); - if(tmp!=NULL){ - args->inputtype=INPUT_VCF_GL; - args->infile = tmp; - args->nams.push_back(strdup(args->infile)); - return; - } - free(tmp); - tmp=NULL; - tmp = angsd::getArg("-vcf-PL",tmp,args); - if(tmp!=NULL){ - args->inputtype=INPUT_VCF_GL; - args->infile = tmp; - args->nams.push_back(strdup(args->infile)); - return; - } - free(tmp); - tmp=NULL; - tmp = angsd::getArg("-pileup",tmp,args); - if(tmp!=NULL){ - args->inputtype=INPUT_PILEUP; - args->infile = tmp; - args->nams.push_back(strdup(args->infile)); - return; - } - free(tmp); - tmp=NULL; - tmp = angsd::getArg("-beagle",tmp,args); - if(tmp!=NULL){ - args->inputtype=INPUT_BEAGLE; - args->infile = tmp; - args->nams.push_back(strdup(args->infile)); - return; - } - free(tmp); - tmp=NULL; - tmp = angsd::getArg("-bgen",tmp,args); - if(tmp!=NULL){ - args->inputtype=INPUT_BGEN; - args->infile = tmp; - args->nams.push_back(strdup(args->infile)); - return; - } - free(tmp); - - tmp=NULL; - tmp = angsd::getArg("-i",tmp,args); - if(tmp!=NULL){ - args->inputtype=INPUT_BAM; - args->infile = tmp; - args->nams.push_back(strdup(args->infile)); - return; - } - int nInd = 0; - nInd = angsd::getArg("-nInd",nInd,args); - free(tmp); - tmp=NULL; - tmp = angsd::getArg("-bam",tmp,args); - tmp = angsd::getArg("-b",tmp,args); - if(tmp!=NULL && args->argc>2){ - args->inputtype=INPUT_BAM; - args->infile = tmp; - args->nams = angsd::getFilenames(args->infile,nInd); - - return; - } - -} - - - -argStruct *setArgStruct(int argc,char **argv) { - - argStruct *arguments = new argStruct; - arguments->argumentFile = stderr; - arguments->outfiles =NULL; - arguments->fai=arguments->anc=arguments->ref=NULL; - arguments->hd=NULL; - arguments->revMap= NULL; - arguments->argc=argc; - arguments->argv=argv; - arguments->nReads = 50; - arguments->sm=NULL; - arguments->usedArgs= new int[argc+1];//well here we allocate one more than needed, this is only used in the ./angsd -beagle version - for(int i=0;iusedArgs[i]=0; - - arguments->inputtype=-1; - arguments->infile = NULL; - arguments->show =0; - arguments->fai = NULL; - //check output filename - arguments->outfiles = angsd::getArg("-out",arguments->outfiles,arguments); - if(arguments->outfiles==NULL){ - if(argc!=2) - fprintf(stderr,"\t-> No \'-out\' argument given, output files will be called \'angsdput\'\n"); - arguments->outfiles = strdup("angsdput"); - } - checkIfDir(arguments->outfiles); - - - //print arguments into logfile - if(argc>2) - arguments->argumentFile=aio::openFile(arguments->outfiles,ARGS); - - setInputType(arguments); - // fprintf(stderr,"setintputtype:%d\n",arguments->inputtype);exit(0); - return arguments; -} - void multiReader::printArg(FILE *argFile,argStruct *args){ fprintf(argFile,"----------------\n%s:\n",__FILE__); fprintf(argFile,"\t-nLines\t%d\t(Number of lines to read)\n",nLines); @@ -321,7 +108,7 @@ void multiReader::getOptions(argStruct *arguments){ } -multiReader::multiReader(int argc,char**argv){ +multiReader::multiReader(argStruct *args_arg){ gz=Z_NULL; myglf=NULL;myvcf=NULL;mpil=NULL;bglObj=NULL;bgenObj=NULL; @@ -334,13 +121,7 @@ multiReader::multiReader(int argc,char**argv){ nInd =0; isSim =0; args=NULL; - args = setArgStruct(argc,argv); - fprintf(args->argumentFile,"\t-> Command: \n"); - for(int i=0;iargumentFile,"%s ",argv[i]); - // fprintf(args->argumentFile,"\n\n"); - if(args->argumentFile!=stderr) - fprintf(args->argumentFile,"\n\t-> angsd version: %s (htslib: %s) build(%s %s)\n",ANGSD_VERSION,hts_version(),__DATE__,__TIME__); + args = args_arg; void printTime(FILE *fp); printTime(args->argumentFile); @@ -458,7 +239,16 @@ multiReader::multiReader(int argc,char**argv){ } } - + + if(args->inputtype==INPUT_BAM){ + //read bamfiles + extern int checkBamHeaders; + extern int doCheck; + extern char *fai_fname; + bufReader *initializeBufReaders2(const std::vector &vec,int exitOnError,int doCheck,char *fai_fname,bam_sample_t *sm); + args->rd = initializeBufReaders2(args->nams,checkBamHeaders,doCheck,fai_fname,args->sm); + fprintf(stderr, "[%s] %d samples in %lu input files\n", __func__, args->sm->n, args->nams.size()); + } if(fname==NULL) @@ -513,6 +303,7 @@ multiReader::multiReader(int argc,char**argv){ fprintf(stderr,"\t 5. FILTER column is currently NOT used (not sure what concensus is)\n"); fprintf(stderr,"\t 6. -sites does NOT work with vcf input but -r does\n"); fprintf(stderr,"\t 7. vcffilereading is still BETA, please report strange behaviour\n"); + fprintf(stderr,"\t 8. Consider adding \'-doMajorMinor 1\'\n"); } } @@ -524,57 +315,22 @@ multiReader::~multiReader(){ delete revMap; - switch(args->inputtype){ - case INPUT_PILEUP:{ + if(mpil) delete mpil; - break; - } - case INPUT_VCF_GP:{ - delete myvcf; - break; - } - case INPUT_VCF_GL:{ + if(myvcf) delete myvcf; - break; - } - case INPUT_GLF: - case INPUT_GLF3:{ + if(myglf) delete myglf; - break; - } - case INPUT_BEAGLE:{ + if(bglObj) delete bglObj; - break; - } - case INPUT_BGEN:{ + if(bgenObj) delete bgenObj; - break; - } - - default:{ - break; - } - } + if(gz!=Z_NULL) gzclose(gz); free(fname); - if(args->sm) - bam_smpl_destroy(args->sm); - - for(unsigned i=0;inams.size();i++) - free(args->nams[i]); - if(args->fai){ - free(args->fai); - } - delete [] args->usedArgs; - free(args->outfiles); - free(args->infile); - if(args->anc) - free(args->anc); - bam_hdr_destroy(args->hd); - if(args->argumentFile!=stderr) fclose(args->argumentFile); - delete args; + } diff --git a/multiReader.h b/multiReader.h index aa6cc33..8e0087f 100644 --- a/multiReader.h +++ b/multiReader.h @@ -32,8 +32,7 @@ class multiReader{ aMap *revMap; int pl_or_gl; // <-pl: pl_or_gl=0,gl:pl_or_gl=1 public: - multiReader(int,char**); - argStruct *getargs(){return args;} + multiReader(argStruct *arg); funkyPars *fetch(); ~multiReader(); }; diff --git a/pooled_alloc.h b/pooled_alloc.h index 6113503..0c56b8d 100644 --- a/pooled_alloc.h +++ b/pooled_alloc.h @@ -34,7 +34,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #ifdef __cplusplus extern "C" { #endif - +#include //tsk /* * Implements a pooled block allocator where all items are the same size, * but we need many of them. diff --git a/prep_sites.cpp b/prep_sites.cpp index ae4acf6..620004d 100644 --- a/prep_sites.cpp +++ b/prep_sites.cpp @@ -6,7 +6,6 @@ */ #ifdef __WITH_MAIN__ -#define GZOPT "w6h" #endif @@ -15,7 +14,8 @@ #include #define kv_roundup32(x) (--(x), (x)|=(x)>>1, (x)|=(x)>>2, (x)|=(x)>>4, (x)|=(x)>>8, (x)|=(x)>>16, ++(x)) #include "prep_sites.h" - +#include "aio.h" +#include "analysisFunction.h" #define BIN ".bin" #define IDX ".idx" @@ -124,15 +124,20 @@ void set(tary *ta,size_t pos,T val){ ta->d[pos] =val; } -void filt_print(FILE *fp,filt*f,char *chr){ - fprintf(stderr,"\n---------------\nfp:%p\nbg:%p\nhasExtra:%d nchr:%lu\n",f->fp,f->bg,f->hasExtra,f->offs.size()); +void filt_print(FILE *fp,filt*f,char *chr,int onlyHeader){ + fprintf(fp,"@@\tfp:%p\tqbg:%p\thasExtra:%d nchr:%lu\n",f->fp,f->bg,f->hasExtra,f->offs.size()); for(std::map::const_iterator it=f->offs.begin();it!=f->offs.end();++it){ - fprintf(stderr,"chr:\t\'%s\'\t",it->first); - fprintf(stderr,"last:\t\'%lu\'\t",it->second.len); - fprintf(stderr,"\toffs:%" PRId64 "\n",it->second.offs); + if(chr) + it=f->offs.find(chr); + if(it==f->offs.end()) + break; + fprintf(fp,"@@\tchr:\t\'%s\'\t",it->first); + fprintf(fp,"last:\t\'%lu\'\t",it->second.len); + fprintf(fp,"\toffs:%" PRId64 "\n",it->second.offs); + if(chr) + break; } - fprintf(stderr,"------------\n"); - for(std::map::const_iterator it=f->offs.begin();it!=f->offs.end();++it){ + for(std::map::const_iterator it=f->offs.begin();onlyHeader==0&&it!=f->offs.end();++it){ //if we have supplied a single chr if(chr!=NULL) filt_readSites(f,chr,0); @@ -299,6 +304,7 @@ int writeDat(char *last,mmap &mm,tary *keep,tary *major,tary * }else mm[strdup(last)]=1; //write data and index stuff + assert(0==bgzf_flush(BFP)); int64_t retVal =bgzf_tell(BFP);//now contains the offset to which we should point. //write chrname @@ -345,7 +351,7 @@ void filt_gen(const char *fname,int posi_off,int doCompl) { char* outnames_bin = append(fname,BIN); char* outnames_idx = append(fname,IDX); - BGZF *cfpD = bgzf_open(outnames_bin,GZOPT); + BGZF *cfpD = bgzf_open(outnames_bin,"w6h"); FILE *fp=fopen(outnames_idx,"w"); std::map mm;//simple structure to check that input has been sorted by chr/contig @@ -574,20 +580,28 @@ void filt_init(int argc,char**argv){ } int main_sites(int argc,char **argv){ -#if 0 - for(int i=0;i2) + chr=argv[2]; + filt *f = filt_read(*++argv); + filt_print(stdout,f,chr,0); + filt_dalloc(f); + }else if(!strcasecmp(*argv,"print_header")){ + char *chr = NULL; + if(argc>2) + chr=argv[2]; filt *f = filt_read(*++argv); - filt_print(stdout,f,NULL); + filt_print(stdout,f,chr,1); filt_dalloc(f); }else fprintf(stderr,"Unknown option: \'%s\'\n",*argv); diff --git a/shared.cpp b/shared.cpp index 58cb72d..cc52105 100644 --- a/shared.cpp +++ b/shared.cpp @@ -70,7 +70,8 @@ int queueStop =0; int howOften = 100; -void init(argStruct *arguments){ +void init(void *arg){ + argStruct *arguments = (argStruct *) arg; if(!isatty(fileno(stderr))){ isAtty = 0; } @@ -489,8 +490,3 @@ void printFunky(funkyPars *p){ } - - -const char *angsd_version(){ - return ANGSD_VERSION; -} diff --git a/shared.h b/shared.h index 2d3c109..8be9e1e 100644 --- a/shared.h +++ b/shared.h @@ -1,23 +1,17 @@ extern int SIG_COND; - - - #include #include #include #include #include "bambi_interface.h" #include "argStruct.h" -#include "version.h" + #ifndef _types_t #define _types_t - #define LENS 10000 -#define GZOPT "w6h" - enum{INPUT_BAM,INPUT_GLF,INPUT_GLF3,INPUT_BEAGLE,INPUT_PILEUP,INPUT_VCF_GL,INPUT_VCF_GP,INPUT_GLF10_TEXT,INPUT_BGEN}; @@ -59,7 +53,7 @@ funkyPars *funkyPars_init(); void funkyPars_destroy(funkyPars *p); -void init(argStruct *arguments);//intialize all needed objects +void init(void *arguments);//intialize all needed objects void destroy_shared();//destroy all initialized objects void selector(funkyPars *p); const char *angsd_version(); diff --git a/soap_likes.cpp b/soap_likes.cpp index cf59edd..f7d4ce3 100644 --- a/soap_likes.cpp +++ b/soap_likes.cpp @@ -25,7 +25,7 @@ #include "bambi_interface.h" #include "soap_likes.h" #include "analysisFunction.h" - +#include "aio.h" //calc_offset into an array of dim 256 256 4 4 int co(int a, int b, int c, int d){ return (a<<12 | b<<4 | c<<2|d ); diff --git a/test/tajima/check.R b/test/tajima/check.R new file mode 100644 index 0000000..c02725d --- /dev/null +++ b/test/tajima/check.R @@ -0,0 +1,138 @@ +##this is code for validating that the old -fold 1 in angsd and -dothetas in angsd correspond to the similar analyses that has been moved to realSFS where it is easier to generalize +##this is not meant being run, but will stay in angsd for the time being as documentation + +##compare SFS for the folded (in angsd) and fold in realSFS +a <- scan("output//fold.saf.em.ml") +b <- scan("output//fold2.saf.em.ml") +b[b!=0]-a +# [1] 2.410800e-02 -2.538800e-02 1.336000e-03 -5.900000e-05 2.000001e-06 +# [6] 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 +#[11] 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 +#[16] 0.000000e+00 -2.000000e-06 3.000000e-06 -1.000000e-06 -1.000000e-06 +#[21] 9.999999e-07 + +##compare unfolded popstats +a<-read.table("output/norm.thetas.idx.pestPG") +b<-read.table("output/norm.thetaFromSaf.thetas.idx.pestPG") +a[1] +## V1 +##1 (0,264255)(1,264256)(0,264256) +b[1] +## V1 +##1 (0,264255)(1,264256)(0,264256) +as.numeric(a[-1])-as.numeric(b[-1]) +## [1] 0.000000e+00 0.000000e+00 0.000000e+00 3.000001e-06 0.000000e+00 +## [6] -6.999995e-06 -1.200000e-05 0.000000e+00 0.000000e+00 0.000000e+00 +##[11] 0.000000e+00 0.000000e+00 0.000000e+00 + + +a<-read.table("output/norm.thetas.idx.win.pestPG",as.is=T) +b<-read.table("output/norm.thetaFromSaf.thetas.idx.win.pestPG",as.is=T) +table(a[,1]!=b[,1]) +## +##FALSE +## 259 +range(a[,-1]-b[,-1]) +##[1] -3e-06 3e-06 + +##check persite thetas +##../../misc/thetaStat print output/norm.thetas.idx >output/norm.thetas.idx.txt +##../../misc/thetaStat print output/norm.thetaFromSaf.thetas.idx >output/norm.thetaFromSaf.thetas.idx.txt +a<-read.table("output/norm.thetas.idx.txt") +b<-read.table("output/norm.thetaFromSaf.thetas.idx.txt") + + +#> table(is.infinite(as.matrix(a))==is.infinite(as.matrix(b))) +## +## +## TRUE +##1849792 +a<- as.matrix(a) +b<- as.matrix(b) +a[is.infinite(a)] <- 0 +b[is.infinite(b)] <- 0 + ## +range(a-b) +##[1] -1e-06 1e-06 + +##Compare unfolded popstats +a<-read.table("output/fold.thetas.idx.pestPG") +b<-read.table("output/fold.thetaFromSaf.thetas.idx.pestPG") +a[1] +## V1 +##1 (0,264255)(1,264256)(0,264256) +b[1] +## V1 +##1 (0,264255)(1,264256)(0,264256) + +as.numeric(a[-1])-as.numeric(b[-1]) +## [1] 0.000000e+00 0.000000e+00 0.000000e+00 3.000001e-06 0.000000e+00 +## [6] -6.999995e-06 -1.200000e-05 0.000000e+00 0.000000e+00 0.000000e+00 +##[11] 0.000000e+00 0.000000e+00 0.000000e+00 + + +a<-read.table("output/norm.thetas.idx.win.pestPG",as.is=T) +b<-read.table("output/norm.thetaFromSaf.thetas.idx.win.pestPG",as.is=T) +table(a[,1]!=b[,1]) +## +##FALSE +## 259 +range(a[,-1]-b[,-1]) +#[1] -3e-06 3e-06 + +##check persite thetas norm +../../misc/thetaStat print output/norm.thetas.idx >output/norm.thetas.idx.txt +../../misc/thetaStat print output/norm.thetaFromSaf.thetas.idx >output/norm.thetaFromSaf.thetas.idx.txt +../../misc/thetaStat print output/fold.thetas.idx >output/fold.thetas.idx.txt +../../misc/thetaStat print output/fold.thetaFromSaf.thetas.idx >output/fold.thetaFromSaf.thetas.idx.txt + + +a<-as.matrix(read.table("output/norm.thetas.idx.txt")) +b<-as.matrix(read.table("output/norm.thetaFromSaf.thetas.idx.txt")) +table(a[is.infinite(a)]!=b[is.infinite(b)]) + +## +## FALSE +##106721 +a[is.infinite(a)]<-0 +b[is.infinite(b)]<-0 + range(a-b) +##[1] -1e-06 1e-06 + +a<-as.matrix(read.table("output/fold.thetas.idx.txt")) +b<-as.matrix(read.table("output/fold.thetaFromSaf.thetas.idx.txt")) +table(a[is.infinite(a)]!=b[is.infinite(b)]) +a[is.infinite(a)]<-0 +b[is.infinite(b)]<-0 +## +table(a[is.infinite(a)]!=b[is.infinite(b)]) + +## FALSE +##792768 + +a[is.infinite(a)]<-0 +b[is.infinite(b)]<-0 + range(a-b) +##[1] -1e-06 7.2e-05 + +##per site looks fine, lets look at the global and the window statistic + +range(read.table("output/norm.thetas.idx.pestPG")[1,-1]-read.table("output/norm.thetaFromSaf.thetas.idx.pestPG")[1,-1]) +##[1] -1.200000e-05 3.000001e-06 + +range(read.table("output/norm.thetas.idx.win.pestPG")[,-1]-read.table("output/norm.thetaFromSaf.thetas.idx.win.pestPG")[,-1]) +#[1] -3e-06 3e-06 + + +range(read.table("output/fold.thetas.idx.pestPG")[1,-1]-read.table("output/fold.thetaFromSaf.thetas.idx.pestPG")[1,-1]) +##[1] 0.000000 0.005551 +read.table("output/fold.thetas.idx.pestPG")[1,-1]-read.table("output/fold.thetaFromSaf.thetas.idx.pestPG")[1,-1] +## V2 V3 V4 V5 V6 V7 V8 V9 V10 V11 V12 V13 V14 +##1 0 0 0.005551 0.001118 0 0 0 0 0 0 0 0 0 + +##Difference in 0.005 in watterson and 0.001 in pairwise, that seems reasonable given that the old analyses was with double and the new with floats. This should just be a difference in precision +range(read.table("output/fold.thetas.idx.win.pestPG")[,-1]-read.table("output/fold.thetaFromSaf.thetas.idx.win.pestPG")[,-1]) +##[1] -0.000001 0.000181 + +#again this looks like the same + diff --git a/test/tajima/md5/pestPG.md5sum b/test/tajima/md5/pestPG.md5sum index b7dd2b4..c64430d 100644 --- a/test/tajima/md5/pestPG.md5sum +++ b/test/tajima/md5/pestPG.md5sum @@ -13,5 +13,5 @@ d287128a98471e0170abbb1a98d27461 tajima/output/norm.thetas.gz c8298ccb0c2587f417b70b24cff82e62 tajima/output/norm.thetas.idx.pestPG 47142c0c1741b549d346e5fb7da8cdc2 tajima/output/fold.thetas.idx.win.pestPG 9f732415df2fbbe53aa1d60ddee23695 tajima/output/norm.thetas.idx.win.pestPG -f1c2b542b933dd9ead062c3799950f84 tajima/output/thetaFromSaf.thetas.gz -c7af1253fca1fadb80da4a116e18438c tajima/output/thetaFromSaf.thetas.idx +f1c2b542b933dd9ead062c3799950f84 tajima/output/norm.thetaFromSaf.thetas.gz +c7af1253fca1fadb80da4a116e18438c tajima/output/norm.thetaFromSaf.thetas.idx diff --git a/test/tajima/md5/pestPG2.md5sum b/test/tajima/md5/pestPG2.md5sum new file mode 100644 index 0000000..d1e28fe --- /dev/null +++ b/test/tajima/md5/pestPG2.md5sum @@ -0,0 +1,17 @@ +a19115f8072f160dd82b56b2fa979d9f tajima/output/fold.thetaFromSaf.thetas.gz +c7af1253fca1fadb80da4a116e18438c tajima/output/fold.thetaFromSaf.thetas.idx +3b214b586d7e2f5061a89e859e4720dd tajima/output/fold.thetaFromSaf.thetas.idx.pestPG +7e724c7defb92c81ac1b7f86751459fb tajima/output/fold.thetaFromSaf.thetas.idx.win.pestPG +482c3873bd67b37a71f65984fd314809 tajima/output/fold2.saf.em.ml +bba35f18e822155e20e382832ec2f4c6 tajima/output/glout.argg +d1912927c016d7e015534628d2db9c2f tajima/output/glout.glf.gz +f202632b69282e04d0348d80e4c572f2 tajima/output/glout.pgEstH +a5d398a8483dfc4c620b33654c69dd65 tajima/output/glout.vPos +377a1958bdafd617e78f570669a80f5b tajima/output/norm.saf.em.ml +c852a54d9e6ff25163a4164cc352772e tajima/output/norm.saf.gz +c4b22ab1fe1c510ad31d7b0c854139ef tajima/output/norm.saf.idx +63b99ed660e5714221b1a6c93a449e9d tajima/output/norm.saf.pos.gz +f1c2b542b933dd9ead062c3799950f84 tajima/output/norm.thetaFromSaf.thetas.gz +c7af1253fca1fadb80da4a116e18438c tajima/output/norm.thetaFromSaf.thetas.idx +bd70106934c5cf8e475ebe1f4be65342 tajima/output/norm.thetaFromSaf.thetas.idx.pestPG +ad19cbdc833619ab4ea7a3ea8dce6180 tajima/output/norm.thetaFromSaf.thetas.idx.win.pestPG diff --git a/test/testAll.sh b/test/testAll.sh index 3da43b7..e535f71 100755 --- a/test/testAll.sh +++ b/test/testAll.sh @@ -32,7 +32,7 @@ RVAL=0 if [[ ! -z "$BAMDIR" ]]; then echo "Testing -sites" -./testFilterSites.sh $WDIR/angsd $BAMDIR +time ./testFilterSites.sh $WDIR/angsd $BAMDIR if [ ! $? -eq 0 ] ;then echo "Problem with -sites exit code: $?" cat ./testFilterSites.sh.log @@ -42,7 +42,7 @@ fi if [[ ! -z "$BAMDIR" ]]; then echo "Testing vcfreading" -./testVcf.sh $WDIR/angsd ${BAMDIR}/small2.bcf +time ./testVcf.sh $WDIR/angsd ${BAMDIR}/small2.bcf if [ ! $? -eq 0 ] ;then echo "Problem with -vcf-pl exit code: $?" cat ./testVcf.sh.log @@ -51,7 +51,7 @@ fi fi echo "Testing neutrality test statistics" -./testTaj.sh $WDIR +time ./testTaj.sh $WDIR if [ ! $? -eq 0 ] ;then echo "Problem with neutrality test statistics exit code: $?" cat ./testTaj.sh.log @@ -59,7 +59,7 @@ if [ ! $? -eq 0 ] ;then fi echo "Testing fst using msms" -./testFst.sh $WDIR +time ./testFst.sh $WDIR if [ ! $? -eq 0 ] ;then echo "Problem with Fst test statistics exit code: $?" cat ./testFst.sh.log @@ -67,7 +67,7 @@ if [ ! $? -eq 0 ] ;then fi echo "Testing fst_folded using msms" -./testFst_folded.sh $WDIR +time ./testFst_folded.sh $WDIR if [ ! $? -eq 0 ] ;then echo "Problem with Fst_folded test statistics exit code: $?" cat ./testFst_folded.sh.log @@ -77,7 +77,7 @@ fi echo "Testing SFS" -./testSFS.sh $WDIR +time ./testSFS.sh $WDIR if [ ! $? -eq 0 ] ;then echo "Problem with SFS exit code: $?" cat ./testSFS.sh.log @@ -86,7 +86,7 @@ fi if [[ ! -z "$BAMDIR" ]]; then echo "Testing basic mpileup" -./testBam.sh $WDIR/angsd $BAMDIR +time ./testBam.sh $WDIR/angsd $BAMDIR if [ ! $? -eq 0 ] ;then echo "Problem with basic pileup exit code: $?" cat ./testBam.sh.log @@ -95,7 +95,7 @@ fi fi echo "Testing association" -./testDoAsso2456.sh $WDIR +time ./testDoAsso2456.sh $WDIR if [ ! $? -eq 0 ] ;then echo "Problem with association exit code: $?" cat ./testDoAsso2456.sh.log @@ -104,7 +104,7 @@ fi if [[ ! -z "$BAMDIR" ]]; then echo "Testing haplocall" -./testHaploCall.sh $WDIR/angsd +time ./testHaploCall.sh $WDIR/angsd if [ ! $? -eq 0 ] ;then echo "Problem with haplocall exit code: $?" cat ./testHaploCall.sh.log diff --git a/test/testTaj.sh b/test/testTaj.sh index 73e0c03..ce1143b 100755 --- a/test/testTaj.sh +++ b/test/testTaj.sh @@ -29,29 +29,32 @@ echo "Generating genotype likelihood based on the haplotypes" >>${LOG} 2>&1 ${WDIR}/misc/msToGlf -in ${MSMS} -out ${ODIR}/glout -err 0.005 -depth 8 -singleOut 1 -simpleRand 0 >>${LOG} 2>&1 ${WDIR}/angsd -isSim 1 -glf ${ODIR}/glout.glf.gz -out ${ODIR}/norm -doSaf 1 -nInd 20 -fai hg19.fa.fai 2>>${LOG} ${WDIR}/misc/realSFS ${ODIR}/norm.saf.idx -P 24 -nSites 1000000 -m 0 -seed -1 >${ODIR}/norm.saf.em.ml 2>>${LOG} -${WDIR}/angsd -isSim 1 -glf ${ODIR}/glout.glf.gz -out ${ODIR}/norm -nInd 20 -doThetas 1 -doSaf 1 -pest ${ODIR}/norm.saf.em.ml -fai hg19.fa.fai 2>>${LOG} -${WDIR}/misc/realSFS saf2theta ${ODIR}/norm.saf.idx -sfs ${ODIR}/norm.saf.em.ml -outname ${ODIR}/thetaFromSaf 2>>${LOG} +${WDIR}/misc/realSFS saf2theta ${ODIR}/norm.saf.idx -sfs ${ODIR}/norm.saf.em.ml -outname ${ODIR}/norm.thetaFromSaf 2>>${LOG} + ${WDIR}/misc/thetaStat do_stat ${ODIR}/norm.thetas.idx 2>>${LOG} ${WDIR}/misc/thetaStat do_stat ${ODIR}/norm.thetas.idx -outnames ${ODIR}/norm.thetas.idx.win -step 1000 -win 5000 2>>${LOG} -echo "2) Will do folded analysis" >>${LOG} 2>&1 +${WDIR}/misc/thetaStat do_stat ${ODIR}/norm.thetaFromSaf.thetas.idx 2>>${LOG} +${WDIR}/misc/thetaStat do_stat ${ODIR}/norm.thetaFromSaf.thetas.idx -outnames ${ODIR}/norm.thetaFromSaf.thetas.idx.win -step 1000 -win 5000 2>>${LOG} -${WDIR}/angsd -isSim 1 -glf ${ODIR}/glout.glf.gz -out ${ODIR}/fold -doSaf 1 -nInd 20 -fold 1 -fai hg19.fa.fai 2>>${LOG} -${WDIR}/misc/realSFS ${ODIR}/fold.saf.idx -P 24 -nSites 1000000 -m 0 -seed -1 >${ODIR}/fold.saf.em.ml 2>>${LOG} -${WDIR}/angsd -isSim 1 -glf ${ODIR}/glout.glf.gz -out ${ODIR}/fold -nInd 20 -doThetas 1 -doSaf 1 -pest ${ODIR}/fold.saf.em.ml -fold 1 -fai hg19.fa.fai 2>>${LOG} -${WDIR}/misc/thetaStat do_stat ${ODIR}/fold.thetas.idx -nChr 40 2>>${LOG} -${WDIR}/misc/thetaStat do_stat ${ODIR}/fold.thetas.idx -nChr 40 -outnames ${ODIR}/fold.thetas.idx.win -step 1000 -win 5000 2>>${LOG} +echo "2) Will do folded analysis" >>${LOG} 2>&1 +${WDIR}/misc/realSFS ${ODIR}/norm.saf.idx -P 24 -nSites 1000000 -m 0 -seed -1 -fold 1 >${ODIR}/fold2.saf.em.ml 2>>${LOG} +${WDIR}/misc/realSFS saf2theta ${ODIR}/norm.saf.idx -sfs ${ODIR}/fold2.saf.em.ml -outname ${ODIR}/fold.thetaFromSaf -fold 1 2>>${LOG} +${WDIR}/misc/thetaStat do_stat ${ODIR}/fold.thetaFromSaf.thetas.idx -nChr 40 2>>${LOG} +${WDIR}/misc/thetaStat do_stat ${ODIR}/fold.thetaFromSaf.thetas.idx -nChr 40 -outnames ${ODIR}/fold.thetaFromSaf.thetas.idx.win -step 1000 -win 5000 2>>${LOG} echo -e "\tThe theta estimates from msms simulated haplotypes for 8x 0.5% errorrate" >>${LOG} 2>&1 echo -e "\tWatterson\tpairwise\ttajimasD" >>${LOG} 2>&1 grep ThetaW ${MSMS}|grep Sum >>${LOG} 2>&1 echo -e "-------\nThe estimated thetas (sum of 100 reps) and the tajima value (unfolded)" >>${LOG} 2>&1 -cat ${ODIR}/norm.thetas.idx.pestPG >>${LOG} 2>&1 +cat ${ODIR}/norm.thetaFromSaf.thetas.idx.pestPG >>${LOG} 2>&1 echo -e "-------\nThe estimated thetas (sum of 100 reps) and the tajima value (unfolded)" >>${LOG} 2>&1 -cat ${ODIR}/fold.thetas.idx.pestPG >>${LOG} 2>&1 +cat ${ODIR}/fold.thetaFromSaf.thetas.idx.pestPG >>${LOG} 2>&1 echo "You should eyeball the above and see if they are comparable column (1-5,9) (not all defined for unfold)" >>${LOG} 2>&1 ##when generated the results: ##md5sum tajima/output/*pestPG tajima/output/*.ml tajima/output/*.saf tajima/output/*.gz >tajima/md5/pestPG.md5sum -${MD5} -c tajima/md5/pestPG.md5sum >>${LOG} 2>&1 || exit 1 +#${MD5} -c tajima/md5/pestPG.md5sum >>${LOG} 2>&1 || exit 1 +${MD5} -c tajima/md5/pestPG2.md5sum >>${LOG} 2>&1 || exit 2 + diff --git a/vcfReader.cpp b/vcfReader.cpp index d21e31a..6187aca 100644 --- a/vcfReader.cpp +++ b/vcfReader.cpp @@ -7,7 +7,6 @@ #include "analysisFunction.h" #include "vcfReader.h" - bam_hdr_t *bcf_hdr_2_bam_hdr_t (htsstuff *hs){ bam_hdr_t *ret = bam_hdr_init(); ret->l_text = 0; @@ -48,7 +47,6 @@ bam_hdr_t *bcf_hdr_2_bam_hdr_t (htsstuff *hs){ return ret; } - void htsstuff_seek(htsstuff *hs,char *seek){ assert(seek); if(seek){ @@ -189,78 +187,7 @@ void buildreorder(int swap[10],char **alleles,int len){ fprintf(stderr,"TT swap[%d]:%d\n",9,swap[9]); #endif } -#if 0 -//copied from vcf.c in htslib to understand accessing of these variables -int vcf_format_tsk(const bcf_hdr_t *h, const bcf1_t *v) -{ - kstring_t *s=(kstring_t *)malloc(sizeof(kstring_t)); - int i; - bcf_unpack((bcf1_t*)v, BCF_UN_ALL); - kputs(h->id[BCF_DT_CTG][v->rid].key, s); // CHROM - kputc('\t', s); kputw(v->pos + 1, s); // POS - kputc('\t', s); kputs(v->d.id ? v->d.id : ".", s); // ID - kputc('\t', s); // REF - if (v->n_allele > 0) kputs(v->d.allele[0], s); - else kputc('.', s); - kputc('\t', s); // ALT - if (v->n_allele > 1) { - for (i = 1; i < v->n_allele; ++i) { - if (i > 1) kputc(',', s); - kputs(v->d.allele[i], s); - } - } else kputc('.', s); - kputc('\t', s); // QUAL - if ( bcf_float_is_missing(v->qual) ) kputc('.', s); // QUAL - else kputd(v->qual, s); - kputc('\t', s); // FILTER - if (v->d.n_flt) { - for (i = 0; i < v->d.n_flt; ++i) { - if (i) kputc(';', s); - kputs(h->id[BCF_DT_ID][v->d.flt[i]].key, s); - } - } else kputc('.', s); - kputc('\t', s); // INFO - if (v->n_info) { - int first = 1; - for (i = 0; i < v->n_info; ++i) { - bcf_info_t *z = &v->d.info[i]; - if ( !z->vptr ) continue; - if ( !first ) kputc(';', s); - first = 0; - if (z->key >= h->n[BCF_DT_ID]) { - hts_log_error("Invalid BCF, the INFO index is too large"); - errno = EINVAL; - return -1; - } - kputs(h->id[BCF_DT_ID][z->key].key, s); - fprintf(stderr,"%s: \n",h->id[BCF_DT_ID][z->key].key); - if (z->len <= 0) continue; - kputc('=', s); - if (z->len == 1) - { - switch (z->type) - { - case BCF_BT_INT8: if ( z->v1.i==bcf_int8_missing ) kputc('.', s); else kputw(z->v1.i, s); break; - case BCF_BT_INT16: if ( z->v1.i==bcf_int16_missing ) kputc('.', s); else kputw(z->v1.i, s); break; - case BCF_BT_INT32: if ( z->v1.i==bcf_int32_missing ) kputc('.', s); else kputw(z->v1.i, s); break; - case BCF_BT_FLOAT: if ( bcf_float_is_missing(z->v1.f) ) kputc('.', s); else kputd(z->v1.f, s); break; - case BCF_BT_CHAR: kputc(z->v1.i, s); break; - default: hts_log_error("Unexpected type %d", z->type); exit(1); break; - } - } - else bcf_fmt_array(s, z->len, z->type, z->vptr); - } - if ( first ) kputc('.', s); - } else kputc('.', s); - // FORMAT and individual information - kputc('\n', s); - fprintf(stderr,"kstirngts: %s\n",s->s); - free(s->s);free(s); - - return 0; -} -#endif //returns which info field is the INDEL int whichisindel(const bcf_hdr_t *h){ int hit =-1; @@ -443,7 +370,7 @@ int vcfReader::vcfReaderwrap_reader(htsstuff *hts,bcf1_t *rec){ // int onlyprint = 10; -funkyPars *vcfReader::fetch(int chunkSize){ +funkyPars *vcfReader::fetch(int chunkSize) { funkyPars *r = funkyPars_init(); r->nInd = hs->nsamples; r->likes=new double*[chunkSize]; @@ -513,7 +440,25 @@ funkyPars *vcfReader::fetch(int chunkSize){ r=NULL; } return r; - } - +bcf_hdr_t *vcfreader_hs_bcf_hdr = NULL;//nasty hack global dragon +vcfReader::vcfReader(char *fname,char *seek,int pl_or_gl_a,std::vector *regions_a){ + itrname.s=NULL;itrname.l=itrname.m =0; + regions = regions_a; + farr=NULL; + iarr=NULL; + mfarr=0; + miarr=0; + ln_gl_m = 1024; + ln_gl =(float *) malloc(sizeof(float)*ln_gl_m); + pl_or_gl = pl_or_gl_a; + hs=htsstuff_init(fname,seek); + vcfreader_hs_bcf_hdr = hs->hdr; + bamhdr = bcf_hdr_2_bam_hdr_t(hs); + acpy=NULL; + for(int i=0;i *regions_a){ - itrname.s=NULL;itrname.l=itrname.m =0; - regions = regions_a; - farr=NULL; - iarr=NULL; - mfarr=0; - miarr=0; - ln_gl_m = 1024; - ln_gl =(float *) malloc(sizeof(float)*ln_gl_m); - pl_or_gl = pl_or_gl_a; - hs=htsstuff_init(fname,seek); - bamhdr = bcf_hdr_2_bam_hdr_t(hs); - acpy=NULL; - for(int i=0;i *regions_a); ~vcfReader(){htsstuff_destroy(hs);free(pl);free(ln_gl);free(iarr);free(farr);free(itrname.s);} };