diff --git a/Main/lib/perl/RetrieveFuncscanResultsFromComputeCluster.pm b/Main/lib/perl/RetrieveFuncscanResultsFromComputeCluster.pm new file mode 100644 index 0000000..fd0f9eb --- /dev/null +++ b/Main/lib/perl/RetrieveFuncscanResultsFromComputeCluster.pm @@ -0,0 +1,46 @@ +package MicrobiomeWorkflow::Main::RetrieveFunscanResultsFromComputeCluster; + +@ISA = (ReFlow::Controller::WorkflowStepHandle); + +use strict; +use warnings; +use ReFlow::Controller::WorkflowStepHandle; +use File::Basename; + +sub run { + my ($self, $test, $undo) = @_; + + my $clusterDir = join("/", $self->getClusterWorkflowDataDir(), $self->getParamValue("clusterDir")); + my $targetDir = join("/", $self->getWorkflowDataDir(), $self->getParamValue("targetDir")); + + if($undo){ + $self->runCmd(0, "rm -f $targetDir/*"); + }else { + if ($test) { + $self->runCmd(0, "echo test > $targetDir/test.txt"); ## TODO a real file name or something here + } else { + + ## TODO decide what all we need from funcscan and collect it up nicely here + ## $self->runCmd(0, "fun stuff here"); + my $from = "TODO"; + my $to = "TODO"; + + $self->copyFromCluster("$clusterDir", $from, $to, 0); + } + } + +} + +sub getParamDeclaration { + return ( + "clusterDir", + "targetDir", + ); +} + +sub getConfigDeclaration { + return ( + # [name, default, description] + ); +} +1; diff --git a/Main/lib/perl/RetrieveMagResultsFromComputeCluster.pm b/Main/lib/perl/RetrieveMagResultsFromComputeCluster.pm new file mode 100644 index 0000000..a838905 --- /dev/null +++ b/Main/lib/perl/RetrieveMagResultsFromComputeCluster.pm @@ -0,0 +1,48 @@ +package MicrobiomeWorkflow::Main::RetrieveMagResultsFromComputeCluster; + +@ISA = (ReFlow::Controller::WorkflowStepHandle); + +use strict; +use warnings; +use ReFlow::Controller::WorkflowStepHandle; +use File::Basename; + +sub run { + my ($self, $test, $undo) = @_; + + my $clusterDir = join("/", $self->getClusterWorkflowDataDir(), $self->getParamValue("clusterDir")); + my $targetDir = join("/", $self->getWorkflowDataDir(), $self->getParamValue("targetDir")); + + if($undo){ + $self->runCmd(0, "rm -f $targetDir/*"); + }else { + if ($test) { + $self->runCmd(0, "echo test > $targetDir/test.txt"); ## TODO a real file name or something here + } else { + + ## TODO decide what all we need from mag and collect it up nicely here + ## I think we want to return everything except the megahit assemblies ?? + ## were not planning to load anything for the assemblies, but funcscan will need them as input + ## $self->runCmd(0, "fun stuff here"); + my $from = "TODO"; + my $to = "TODO"; + + $self->copyFromCluster("$clusterDir", $from, $to, 0); + } + } + +} + +sub getParamDeclaration { + return ( + "clusterDir", + "targetDir", + ); +} + +sub getConfigDeclaration { + return ( + # [name, default, description] + ); +} +1; diff --git a/Main/lib/perl/WorkflowSteps/MakeNfCoreFuncscanParamsFile.pm b/Main/lib/perl/WorkflowSteps/MakeNfCoreFuncscanParamsFile.pm new file mode 100644 index 0000000..9f09fbc --- /dev/null +++ b/Main/lib/perl/WorkflowSteps/MakeNfCoreFuncscanParamsFile.pm @@ -0,0 +1,46 @@ +package MicrobiomeWorkflow::Main::WorkflowSteps::MakeNfCoreFuncscanParamsFile; + +@ISA = (ApiCommonWorkflow::Main::WorkflowSteps::WorkflowStep); + +use strict; +use warnings; +use ApiCommonWorkflow::Main::WorkflowSteps::WorkflowStep; + +sub run { + my ($self, $test, $undo) = @_; + + my $paramsFilePath = join("/", $self->getWorkflowDataDir(), $self->getParamValue("analysisDir"), "nf-params.json"); + my $samplesheetPath = join("/", $self->getWorkflowDataDir(), $self->getParamValue("analysisDir"), "samplesheet.csv"); + + ## TODO sort all these ref dbs, and double check which ones i need. + ## TODO compare these args to what i tested on pmacs and make sure i didnt miss anything + + if ($undo) { + $self->runCmd(0,"rm -rf $configPath"); + } else { + open(F, ">", $paramsFilePath) or die "$! :Can't open config file '$paramsFilePath' for writing"; + + print F +" +{ + \"input\": \"$samplesheetPath\", + \"outdir\": \"out\", + \"run_amp_screening\": true, + \"run_arg_screening\": true, + \"run_bgc_screening\": true, + \"amp_hmmsearch_models\": \"TODO\", + \"amp_ampcombi_db\": \"TODO\", + \"arg_amrfinderplus_db\": \"TODO\", + \"arg_deeparg_data\": \"TODO\", + \"bgc_antismash_databases\": \"TODO\", + \"bgc_deepbgc_database\": \"TODO\", + \"bgc_hmmsearch_models\": \"TODO\" +} +"; + + close(F); + } +} + +1; + diff --git a/Main/lib/perl/WorkflowSteps/MakeNfCoreFuncscanSamplesheet.pm b/Main/lib/perl/WorkflowSteps/MakeNfCoreFuncscanSamplesheet.pm new file mode 100644 index 0000000..8124974 --- /dev/null +++ b/Main/lib/perl/WorkflowSteps/MakeNfCoreFuncscanSamplesheet.pm @@ -0,0 +1,31 @@ +package MicrobiomeWorkflow::Main::WorkflowSteps::MakeNfCoreFuncscanSamplesheet; + +@ISA = (ApiCommonWorkflow::Main::WorkflowSteps::WorkflowStep); + +use strict; +use warnings; +use ApiCommonWorkflow::Main::WorkflowSteps::WorkflowStep; + +sub run { + my ($self, $test, $undo) = @_; + + my $sampleToFastqPath = join("/", $self->getWorkflowDataDir(), $self->getParamValue("analysisDir"), $self->getParamValue("sampleToFastqFileName")); + my $magAnalysisDir = join("/", $self->getWorkflowDataDir(), $self->getParamValue("magAnalysisDir")); + my $samplesheetPath = join("/", $self->getWorkflowDataDir(), $self->getParamValue("analysisDir"), "samplesheet.csv"); + + if ($undo) { + $self->runCmd(0,"rm -rf $configPath"); + } else { + open(F, ">", $samplesheetPath) or die "$! :Can't open config file '$samplesheetPath' for writing"; + + # TODO write the header here, then + # loop through the sampleToFastq file and get sample names + # for each sample, write a row to the samplesheet that points to the associated mag output file + close(F); + + ## remove the sampleToFastq file + $self->runCmd(0,"rm -rf $sampleToFastqPath"); ## TODO make sure we want to do this, vs ignore its existence + } +} + +1; \ No newline at end of file diff --git a/Main/lib/perl/WorkflowSteps/MakeNfCoreMagParamsFile.pm b/Main/lib/perl/WorkflowSteps/MakeNfCoreMagParamsFile.pm new file mode 100644 index 0000000..5495c06 --- /dev/null +++ b/Main/lib/perl/WorkflowSteps/MakeNfCoreMagParamsFile.pm @@ -0,0 +1,51 @@ +package MicrobiomeWorkflow::Main::WorkflowSteps::MakeNfCoreMagParamsFile; + +@ISA = (ApiCommonWorkflow::Main::WorkflowSteps::WorkflowStep); + +use strict; +use warnings; +use ApiCommonWorkflow::Main::WorkflowSteps::WorkflowStep; + +sub run { + my ($self, $test, $undo) = @_; + + my $paramsFilePath = join("/", $self->getWorkflowDataDir(), $self->getParamValue("analysisDir"), "nf-params.json"); + my $samplesheetPath = join("/", $self->getWorkflowDataDir(), $self->getParamValue("analysisDir"), "samplesheet.csv"); + my $krakenDBPath = join("/", $self->getWorkflowDataDir(), $self->getParamValue("krakenDBPath")); + my $singleEnd = $self->getParamValue("isPaired") ? "false" : "true"; + + ## TODO compare these args to what i tested on pmacs and make sure i didnt miss anything + ## TODO figure out for sure which of these ref dbs we need and update them + + if ($undo) { + $self->runCmd(0,"rm -rf $configPath"); + } else { + open(F, ">", $paramsFilePath) or die "$! :Can't open config file '$paramsFilePath' for writing"; + + print F +" +{ + \"input\": \"$samplesheetPath\", + \"outdir\": \"out\", + \"kraken2_db\": \"$krakenDBPath\", + \"single_end\": $singleEnd, + \"cat_db\": \"TODO\", + \"gtdb_db\": \"TODO\", + \"gtdb_mash\": \"TODO\", + \"skip_spades\": true, + \"skip_spadeshybrid\": true, + \"run_virus_identification\": true, + \"skip_concoct\": true, + \"metaeuk_db\": \"TODO\", + \"busco_db\": \"TODO\", + \"checkm_db\": \"TODO\", + \"gunc_db\": \"TODO\" +} +"; + + close(F); + } +} + +1; + diff --git a/Main/lib/perl/WorkflowSteps/MakeNfCoreMagSamplesheet.pm b/Main/lib/perl/WorkflowSteps/MakeNfCoreMagSamplesheet.pm new file mode 100644 index 0000000..61ed178 --- /dev/null +++ b/Main/lib/perl/WorkflowSteps/MakeNfCoreMagSamplesheet.pm @@ -0,0 +1,33 @@ +package MicrobiomeWorkflow::Main::WorkflowSteps::MakeNfCoreMagSamplesheet; + +@ISA = (ApiCommonWorkflow::Main::WorkflowSteps::WorkflowStep); + +use strict; +use warnings; +use ApiCommonWorkflow::Main::WorkflowSteps::WorkflowStep; + +sub run { + my ($self, $test, $undo) = @_; + + my $sampleToFastqPath = join("/", $self->getWorkflowDataDir(), $self->getParamValue("analysisDir"), $self->getParamValue("sampleToFastqFileName")); + my $samplesheetPath = join("/", $self->getWorkflowDataDir(), $self->getParamValue("analysisDir"), "samplesheet.csv"); + + if ($undo) { + $self->runCmd(0,"rm -rf $configPath"); + } else { + open(F, ">", $samplesheetPath) or die "$! :Can't open config file '$samplesheetPath' for writing"; + + # TODO write the header here, then + # loop through the sampleToFastq file and reformat rows + # write out the reformatted rows to the samplesheet + # i need to go look at these two files yet to see what needs doing here + + close(F); + + ## remove the sampleToFastq file + $self->runCmd(0,"rm -rf $sampleToFastqPath"); + } +} + +1; + diff --git a/Main/lib/perl/WorkflowSteps/MakeNfCorePMACSConfig.pm b/Main/lib/perl/WorkflowSteps/MakeNfCorePMACSConfig.pm new file mode 100644 index 0000000..80a76e5 --- /dev/null +++ b/Main/lib/perl/WorkflowSteps/MakeNfCorePMACSConfig.pm @@ -0,0 +1,53 @@ +package MicrobiomeWorkflow::Main::WorkflowSteps::MakeNfCorePMACSConfig; + +@ISA = (ApiCommonWorkflow::Main::WorkflowSteps::WorkflowStep); + +use strict; +use warnings; +use ApiCommonWorkflow::Main::WorkflowSteps::WorkflowStep; + +sub run { + my ($self, $test, $undo) = @_; + + my $configPath = join("/", $self->getWorkflowDataDir(), $self->getParamValue("analysisDir"), $self->getParamValue("configFileName")); + + my $memoryInGb = $self->getParamValue("memoryInGb"); + my $nextflowTowerAccessToken = $self->getParamValue("nextflowTowerAccessToken"); + my $nextflowTowerWorkspaceId = $self->getParamValue("nextflowTowerWorkspaceId"); + + # not sure we need to grab these, since this is pmacs specific :( + # id like to look into a pmacs config file here: https://nf-co.re/configs + my $executor = $self->getClusterExecutor(); + my $queue = $self->getClusterQueue(); + + if ($undo) { + $self->runCmd(0,"rm -rf $configPath"); + } else { + open(F, ">", $configPath) or die "$! :Can't open config file '$configPath' for writing"; + + print F +"process { + executor = '$executor' + queue = '$queue' +} +// nf-core requests memory in kb +// pmacs wants requests for memory in mb +// therefore, request for 6mb will look like 6gb to pmacs. +// having to do this trick makes this config specific to pmacs, unfortunately. +params { + max_memory = $memoryInGb.MB +} +singularity { + enabled = true +} +tower { + accessToken = '$nextflowTowerAccessToken' + workspaceId = '$nextflowTowerWorkspaceId' + enabled = true +}"; + close(F); + } +} + +1; + diff --git a/Main/lib/xml/workflow/runNfCoreFunscanOnCluster.xml b/Main/lib/xml/workflow/runNfCoreFunscanOnCluster.xml new file mode 100644 index 0000000..16f11d5 --- /dev/null +++ b/Main/lib/xml/workflow/runNfCoreFunscanOnCluster.xml @@ -0,0 +1,53 @@ + + + + + + + + + + + + $$analysisDir$$ + $$analysisDir$$/results + nextflow.config + $$memoryInGb + $$nextflowTowerAccessToken + $$nextflowTowerWorkspaceId + + + + $$analysisDir$$ + $$sampleToFastqFileName + $$magAnalysisDir + + + + $$analysisDir$$ + + + $$analysisDir$$ + + + + + + + $$analysisDir$$ + $$analysisDir$$/results + $$analysisDir$$/nextflow.config + $$analysisDir$$/nf-params.json + nf-core/funcscan + true + $$pipelineVersion$$ + + + + + $$analysisDir$$/results + $$resultDir$$ + + + + diff --git a/Main/lib/xml/workflow/runNfCoreMagOnCluster.xml b/Main/lib/xml/workflow/runNfCoreMagOnCluster.xml new file mode 100644 index 0000000..fe157c6 --- /dev/null +++ b/Main/lib/xml/workflow/runNfCoreMagOnCluster.xml @@ -0,0 +1,53 @@ + + + + + + + + + + + + + $$analysisDir$$ + $$analysisDir$$/results + nextflow.config + $$memoryInGb + $$nextflowTowerAccessToken + $$nextflowTowerWorkspaceId + + + + $$analysisDir$$ + $$sampleToFastqFileName + + + $$analysisDir$$ + $$krakenDB$$ + $$isPaired$$ + + + $$analysisDir$$ + + + + + + + $$analysisDir$$ + $$analysisDir$$/results + $$analysisDir$$/nextflow.config + $$analysisDir$$/nf-params.json + nf-core/mag + true + $$pipelineVersion$$ + + + + + $$analysisDir$$/results + $$resultDir$$ + + + \ No newline at end of file diff --git a/Main/lib/xml/workflowTemplates/microbiomeRoot.xml b/Main/lib/xml/workflowTemplates/microbiomeRoot.xml index 6c33921..7bc22f1 100644 --- a/Main/lib/xml/workflowTemplates/microbiomeRoot.xml +++ b/Main/lib/xml/workflowTemplates/microbiomeRoot.xml @@ -1,7 +1,7 @@ - + webServices/$$projectName$$/release-$$projectVersionForWebsiteFiles$$ downloadSite/$$projectName$$/release-$$projectVersionForWebsiteFiles$$/ @@ -30,6 +30,7 @@ + rRNAReference_RSRC $$projectName$$.xml @@ -42,6 +43,12 @@ + + + KrakenReferenceDB_RSRC + $$projectName$$.xml + + rRNAReference_RSRC @@ -54,6 +61,12 @@ + + KrakenReferenceDB_RSRC + + + + all_results @@ -83,6 +96,8 @@ + + @@ -144,6 +159,34 @@ + + + + ${sampleToWgsFastqFileName} + otuDADA2_${name}_RSRC/MAG_${name} + all_results + KrakenReferenceDB_RSRC/TODO + 80 + ${nextflowTowerAccessToken} + ${nextflowTowerWorkspaceId} + 2.5.4 + ${isPaired} + + + + + + ${sampleToWgsFastqFileName} + otuDADA2_${name}_RSRC/MAG_${name} + otuDADA2_${name}_RSRC/Funcscan_${name} + all_results + 40 + ${nextflowTowerAccessToken} + ${nextflowTowerWorkspaceId} + 1.1.5 + + + otuDADA2_${name}_RSRC @@ -159,7 +202,7 @@ - + CrossStudy_RSRC $$projectName$$.xml @@ -167,15 +210,15 @@ - + LineageTaxonLinkingTable_RSRC $$projectName$$.xml - + - + ISASimple_RSRC $$projectName$$.xml