-
Notifications
You must be signed in to change notification settings - Fork 1
Expand file tree
/
Copy pathsubmitWorkflow.pl
More file actions
executable file
·119 lines (99 loc) · 2.84 KB
/
submitWorkflow.pl
File metadata and controls
executable file
·119 lines (99 loc) · 2.84 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
#!/usr/bin/perl
$SOFTWAREROOT = "/primary/vari/software/ECWorkflow/default";
my $workflowCmd = "java -Xms4096m -Xmx4096m -cp $SOFTWAREROOT/ECWorkFlow.v4.jar:$SOFTWAREROOT/libs/pegasus.jar edu.usc.epigenome.workflow.SequencingPipeline -pbs ";
my $workflow = shift @ARGV;
-e $workflow || die "workflow not found";
checkParam($workflow);
$otherArgs = join(" ",@ARGV);
open(IN,"$workflow") || die;
@lines = <IN>;
close IN;
@header = grep {!/Sample/} @lines;
@lines = grep /Sample/, @lines;
$SIG{'INT'} = sub {die("Ctrl-C detected! BAILING! you can continue where you left off by using $workflow\.incomplete")};
my $current_sample = -1;
for $line (@lines)
{
if($line =~ /^\#/)
{
push @samples, $sample_entry if $sample_entry;
$sample_entry = $line;
$current_sample = -1;
}
elsif($line =~ /ample\.(\d+)\./)
{
my $id = $1;
if($current_sample == $id || $current_sample == -1)
{
$sample_entry .= $line;
$current_sample = $id;
}
else
{
push @samples, $sample_entry if $sample_entry;
$sample_entry = $line;
$current_sample = $id;
}
}
else { die "unexpected line: $line";}
}
push @samples, $sample_entry if $sample_entry;
while(@samples)
{
writeSample("$workflow\.incomplete",@samples);
writeSample("$workflow\.current", $samples[0]);
print STDERR "Submitting Sample: " . @{[split("\n",$samples[0])]}[0] . "\n" ;
`$workflowCmd $workflow\.current $otherArgs`;
shift @samples;
}
unlink("$workflow\.current");
unlink("$workflow\.incomplete");
sub writeSample
{
my $fileName = shift @_;
unlink($fileName);
return unless @_;
my @linesWrite = (@header,@_);
open(OUT, ">$fileName");
for (@linesWrite)
{
print OUT "$_\n" unless $_ =~ /^\s+$/;
}
close OUT;
}
sub checkParam
{
my $file = shift @_;
die "Param filename should look someting like \"workFlowParams*\", $file not valid name" unless $file =~ /aram/;
open(INCHECK, "<$file");
my $foundCS;
my $foundFC;
my $foundQ;
my %uniqParams;
while(my $line = <INCHECK>)
{
$foundCS = 1 if $line =~ /ClusterSize\s*\=\s*\d+/;
$foundFC = 1 if $line =~ /FlowCellName\s*\=\s*\S+/ && $line !~ /_/;
$foundQ = 1 if $line =~ /queue\s*\=\s*\S+/;
if($line =~ /^\s*(.+?)\s*\=\s*(.+)\s*$/)
{
my $param = $1;
my $val = $2;
print "$param is defined multiple times in file\n" if $uniqParams{$param};
$uniqParams{$param} ++;
if($param =~ /Sample.+input/i)
{
my @files = split(",", $val);
-e $_ || warn "INPUT FILE \t $_ \tspecified in params file but not found!\n" for @files;
}
if($param =~ /Sample.+efer/i)
{
-e $val || -e "$val.fa" || die "REFERENCE GENOME \t $val \t specified in param file but not found!\n";
}
}
}
die "MISSING ClusterSize! (ex: ClusterSize = 1)\n" if(!$foundCS);
die "MISSING/INCORRECT FlowCellName! (ex: FlowCellName = ZXC123XX)\n" if(!$foundFC);
die "MISSING queue! (ex: queue = laird)\n" if(!$foundQ);
close INCHECK;
}