Skip to content
46 changes: 46 additions & 0 deletions Main/lib/perl/RetrieveFuncscanResultsFromComputeCluster.pm
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
package MicrobiomeWorkflow::Main::RetrieveFunscanResultsFromComputeCluster;

@ISA = (ReFlow::Controller::WorkflowStepHandle);

use strict;
use warnings;
use ReFlow::Controller::WorkflowStepHandle;
use File::Basename;

sub run {
my ($self, $test, $undo) = @_;

my $clusterDir = join("/", $self->getClusterWorkflowDataDir(), $self->getParamValue("clusterDir"));
my $targetDir = join("/", $self->getWorkflowDataDir(), $self->getParamValue("targetDir"));

if($undo){
$self->runCmd(0, "rm -f $targetDir/*");
}else {
if ($test) {
$self->runCmd(0, "echo test > $targetDir/test.txt"); ## TODO a real file name or something here
} else {

## TODO decide what all we need from funcscan and collect it up nicely here
## $self->runCmd(0, "fun stuff here");
my $from = "TODO";
my $to = "TODO";

$self->copyFromCluster("$clusterDir", $from, $to, 0);
}
}

}

sub getParamDeclaration {
return (
"clusterDir",
"targetDir",
);
}

sub getConfigDeclaration {
return (
# [name, default, description]
);
}
1;
48 changes: 48 additions & 0 deletions Main/lib/perl/RetrieveMagResultsFromComputeCluster.pm
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
package MicrobiomeWorkflow::Main::RetrieveMagResultsFromComputeCluster;

@ISA = (ReFlow::Controller::WorkflowStepHandle);

use strict;
use warnings;
use ReFlow::Controller::WorkflowStepHandle;
use File::Basename;

sub run {
my ($self, $test, $undo) = @_;

my $clusterDir = join("/", $self->getClusterWorkflowDataDir(), $self->getParamValue("clusterDir"));
my $targetDir = join("/", $self->getWorkflowDataDir(), $self->getParamValue("targetDir"));

if($undo){
$self->runCmd(0, "rm -f $targetDir/*");
}else {
if ($test) {
$self->runCmd(0, "echo test > $targetDir/test.txt"); ## TODO a real file name or something here
} else {

## TODO decide what all we need from mag and collect it up nicely here
## I think we want to return everything except the megahit assemblies ??
## were not planning to load anything for the assemblies, but funcscan will need them as input
## $self->runCmd(0, "fun stuff here");
my $from = "TODO";
my $to = "TODO";

$self->copyFromCluster("$clusterDir", $from, $to, 0);
}
}

}

sub getParamDeclaration {
return (
"clusterDir",
"targetDir",
);
}

sub getConfigDeclaration {
return (
# [name, default, description]
);
}
1;
46 changes: 46 additions & 0 deletions Main/lib/perl/WorkflowSteps/MakeNfCoreFuncscanParamsFile.pm
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
package MicrobiomeWorkflow::Main::WorkflowSteps::MakeNfCoreFuncscanParamsFile;

@ISA = (ApiCommonWorkflow::Main::WorkflowSteps::WorkflowStep);

use strict;
use warnings;
use ApiCommonWorkflow::Main::WorkflowSteps::WorkflowStep;

sub run {
my ($self, $test, $undo) = @_;

my $paramsFilePath = join("/", $self->getWorkflowDataDir(), $self->getParamValue("analysisDir"), "nf-params.json");
my $samplesheetPath = join("/", $self->getWorkflowDataDir(), $self->getParamValue("analysisDir"), "samplesheet.csv");

## TODO sort all these ref dbs, and double check which ones i need.
## TODO compare these args to what i tested on pmacs and make sure i didnt miss anything

if ($undo) {
$self->runCmd(0,"rm -rf $configPath");
} else {
open(F, ">", $paramsFilePath) or die "$! :Can't open config file '$paramsFilePath' for writing";

print F
"
{
\"input\": \"$samplesheetPath\",
\"outdir\": \"out\",
\"run_amp_screening\": true,
\"run_arg_screening\": true,
\"run_bgc_screening\": true,
\"amp_hmmsearch_models\": \"TODO\",
\"amp_ampcombi_db\": \"TODO\",
\"arg_amrfinderplus_db\": \"TODO\",
\"arg_deeparg_data\": \"TODO\",
\"bgc_antismash_databases\": \"TODO\",
\"bgc_deepbgc_database\": \"TODO\",
\"bgc_hmmsearch_models\": \"TODO\"
}
";

close(F);
}
}

1;

31 changes: 31 additions & 0 deletions Main/lib/perl/WorkflowSteps/MakeNfCoreFuncscanSamplesheet.pm
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
package MicrobiomeWorkflow::Main::WorkflowSteps::MakeNfCoreFuncscanSamplesheet;

@ISA = (ApiCommonWorkflow::Main::WorkflowSteps::WorkflowStep);

use strict;
use warnings;
use ApiCommonWorkflow::Main::WorkflowSteps::WorkflowStep;

sub run {
my ($self, $test, $undo) = @_;

my $sampleToFastqPath = join("/", $self->getWorkflowDataDir(), $self->getParamValue("analysisDir"), $self->getParamValue("sampleToFastqFileName"));
my $magAnalysisDir = join("/", $self->getWorkflowDataDir(), $self->getParamValue("magAnalysisDir"));
my $samplesheetPath = join("/", $self->getWorkflowDataDir(), $self->getParamValue("analysisDir"), "samplesheet.csv");

if ($undo) {
$self->runCmd(0,"rm -rf $configPath");
} else {
open(F, ">", $samplesheetPath) or die "$! :Can't open config file '$samplesheetPath' for writing";

# TODO write the header here, then
# loop through the sampleToFastq file and get sample names
# for each sample, write a row to the samplesheet that points to the associated mag output file
close(F);

## remove the sampleToFastq file
$self->runCmd(0,"rm -rf $sampleToFastqPath"); ## TODO make sure we want to do this, vs ignore its existence
}
}

1;
51 changes: 51 additions & 0 deletions Main/lib/perl/WorkflowSteps/MakeNfCoreMagParamsFile.pm
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
package MicrobiomeWorkflow::Main::WorkflowSteps::MakeNfCoreMagParamsFile;

@ISA = (ApiCommonWorkflow::Main::WorkflowSteps::WorkflowStep);

use strict;
use warnings;
use ApiCommonWorkflow::Main::WorkflowSteps::WorkflowStep;

sub run {
my ($self, $test, $undo) = @_;

my $paramsFilePath = join("/", $self->getWorkflowDataDir(), $self->getParamValue("analysisDir"), "nf-params.json");
my $samplesheetPath = join("/", $self->getWorkflowDataDir(), $self->getParamValue("analysisDir"), "samplesheet.csv");
my $krakenDBPath = join("/", $self->getWorkflowDataDir(), $self->getParamValue("krakenDBPath"));
my $singleEnd = $self->getParamValue("isPaired") ? "false" : "true";

## TODO compare these args to what i tested on pmacs and make sure i didnt miss anything
## TODO figure out for sure which of these ref dbs we need and update them

if ($undo) {
$self->runCmd(0,"rm -rf $configPath");
} else {
open(F, ">", $paramsFilePath) or die "$! :Can't open config file '$paramsFilePath' for writing";

print F
"
{
\"input\": \"$samplesheetPath\",
\"outdir\": \"out\",
\"kraken2_db\": \"$krakenDBPath\",
\"single_end\": $singleEnd,
\"cat_db\": \"TODO\",
\"gtdb_db\": \"TODO\",
\"gtdb_mash\": \"TODO\",
\"skip_spades\": true,
\"skip_spadeshybrid\": true,
\"run_virus_identification\": true,
\"skip_concoct\": true,
\"metaeuk_db\": \"TODO\",
\"busco_db\": \"TODO\",
\"checkm_db\": \"TODO\",
\"gunc_db\": \"TODO\"
}
";

close(F);
}
}

1;

33 changes: 33 additions & 0 deletions Main/lib/perl/WorkflowSteps/MakeNfCoreMagSamplesheet.pm
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
package MicrobiomeWorkflow::Main::WorkflowSteps::MakeNfCoreMagSamplesheet;

@ISA = (ApiCommonWorkflow::Main::WorkflowSteps::WorkflowStep);

use strict;
use warnings;
use ApiCommonWorkflow::Main::WorkflowSteps::WorkflowStep;

sub run {
my ($self, $test, $undo) = @_;

my $sampleToFastqPath = join("/", $self->getWorkflowDataDir(), $self->getParamValue("analysisDir"), $self->getParamValue("sampleToFastqFileName"));
my $samplesheetPath = join("/", $self->getWorkflowDataDir(), $self->getParamValue("analysisDir"), "samplesheet.csv");

if ($undo) {
$self->runCmd(0,"rm -rf $configPath");
} else {
open(F, ">", $samplesheetPath) or die "$! :Can't open config file '$samplesheetPath' for writing";

# TODO write the header here, then
# loop through the sampleToFastq file and reformat rows
# write out the reformatted rows to the samplesheet
# i need to go look at these two files yet to see what needs doing here

close(F);

## remove the sampleToFastq file
$self->runCmd(0,"rm -rf $sampleToFastqPath");
}
}

1;

53 changes: 53 additions & 0 deletions Main/lib/perl/WorkflowSteps/MakeNfCorePMACSConfig.pm
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
package MicrobiomeWorkflow::Main::WorkflowSteps::MakeNfCorePMACSConfig;

@ISA = (ApiCommonWorkflow::Main::WorkflowSteps::WorkflowStep);

use strict;
use warnings;
use ApiCommonWorkflow::Main::WorkflowSteps::WorkflowStep;

sub run {
my ($self, $test, $undo) = @_;

my $configPath = join("/", $self->getWorkflowDataDir(), $self->getParamValue("analysisDir"), $self->getParamValue("configFileName"));

my $memoryInGb = $self->getParamValue("memoryInGb");
my $nextflowTowerAccessToken = $self->getParamValue("nextflowTowerAccessToken");
my $nextflowTowerWorkspaceId = $self->getParamValue("nextflowTowerWorkspaceId");

# not sure we need to grab these, since this is pmacs specific :(
# id like to look into a pmacs config file here: https://nf-co.re/configs
my $executor = $self->getClusterExecutor();
my $queue = $self->getClusterQueue();

if ($undo) {
$self->runCmd(0,"rm -rf $configPath");
} else {
open(F, ">", $configPath) or die "$! :Can't open config file '$configPath' for writing";

print F
"process {
executor = '$executor'
queue = '$queue'
}
// nf-core requests memory in kb
// pmacs wants requests for memory in mb
// therefore, request for 6mb will look like 6gb to pmacs.
// having to do this trick makes this config specific to pmacs, unfortunately.
params {
max_memory = $memoryInGb.MB
}
singularity {
enabled = true
}
tower {
accessToken = '$nextflowTowerAccessToken'
workspaceId = '$nextflowTowerWorkspaceId'
enabled = true
}";
close(F);
}
}

1;

53 changes: 53 additions & 0 deletions Main/lib/xml/workflow/runNfCoreFunscanOnCluster.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
<workflowGraph name="runNfCoreFuncscanOnCluster">
<param name="memoryInGb"/>
<param name="nextflowTowerAccessToken"/>
<param name="nextflowTowerWorkspaceId"/>
<param name="sampleToFastqFileName"/>
<param name="analysisDir"/>
<param name="resultDir"/>
<param name="magAnalysisDir"/>
<param name="pipelineVersion"/>

<step name="makeNfCoreFuncscanPMACSConfig" stepClass="MicrobiomeWorkflow::Main::WorkflowSteps::MakeNfCorePMACSConfig">
<paramValue name="analysisDir">$$analysisDir$$</paramValue>
<paramValue name="clusterResultDir">$$analysisDir$$/results</paramValue>
<paramValue name="configFileName">nextflow.config</paramValue>
<paramValue name="memoryInGb">$$memoryInGb</paramValue>
<paramValue name="nextflowTowerAccessToken">$$nextflowTowerAccessToken</paramValue>
<paramValue name="nextflowTowerWorkspaceId">$$nextflowTowerWorkspaceId</paramValue>
</step>

<step name="makeNfCoreFuncscanSamplesheet" stepClass="MicrobiomeWorkflow::Main::WorkflowSteps::MakeNfCoreFuncscanSamplesheet">
<paramValue name="analysisDir">$$analysisDir$$</paramValue>
<paramValue name="sampleToFastqFileName">$$sampleToFastqFileName</paramValue>
<paramValue name="magAnalysisDir">$$magAnalysisDir</paramValue>

<!-- TODO also some ref dbs to manage here -->
<step name="makeNfCoreFuncscanParamsFile" stepClass="MicrobiomeWorkflow::Main::WorkflowSteps::MakeNfCoreFuncscanParamsFile">
<paramValue name="analysisDir">$$analysisDir$$</paramValue>

<step name="mirrorToCluster" stepClass="MicrobiomeWorkflow::Main::MirrorToComputeCluster" stepLoadTypes="toCluster">
<paramValue name="fileOrDirToMirror">$$analysisDir$$</paramValue>
<depends name="makeNfCoreFuncscanPMACSConfig"/>
<depends name="makeNfCoreFuncscanSamplesheet"/>
<depends name="makeNfCoreFuncscanParamsFile"/>
</step>

<step name="runClusterTask" stepClass="ReFlow::StepClasses::RunAndMonitorNextflow">
<paramValue name="workingDir">$$analysisDir$$</paramValue>
<paramValue name="resultsDir">$$analysisDir$$/results</paramValue>
<paramValue name="nextflowConfigFile">$$analysisDir$$/nextflow.config</paramValue>
<paramValue name="nextflowParamsFile">$$analysisDir$$/nf-params.json</paramValue>
<paramValue name="nextflowWorkflow">nf-core/funcscan</paramValue>
<paramValue name="isNfCoreWorkflow">true</paramValue>
<paramValue name="pipelineVersion">$$pipelineVersion$$</paramValue>
<depends name="mirrorToCluster"/>
</step>

<step name="retrieveNfCoreFuncscanResultsFromComputeCluster" stepClass="MicrobiomeWorkflow::Main::RetrieveNfCoreFuncscanResultsFromComputeCluster" stepLoadTypes="fromCluster">
<paramValue name="clusterDir">$$analysisDir$$/results</paramValue>
<paramValue name="targetDir">$$resultDir$$</paramValue>
<depends name="runClusterTask"/>
</step>
</workflowGraph>

Loading