diff --git a/.tt_skip b/.tt_skip
index d931e7ca..10b65eb8 100644
--- a/.tt_skip
+++ b/.tt_skip
@@ -55,7 +55,6 @@ tools/rmap
tools/weightedaverage
tools/annotation_profiler
tools/megablast_xml_parser
-tools/correlation
tools/merge_cols
tools/microsats_alignment_level
tools/bowtie_color_wrappers
diff --git a/tools/correlation/cor.py b/tools/correlation/cor.py
index 881bd951..da9c117d 100644
--- a/tools/correlation/cor.py
+++ b/tools/correlation/cor.py
@@ -6,7 +6,9 @@
"""
import sys
-from rpy import *
+import rpy2.robjects as robjects
+r = robjects.r
+
def stop_err(msg):
sys.stderr.write(msg)
@@ -17,17 +19,25 @@ def main():
assert method in ( "pearson", "kendall", "spearman" )
try:
- columns = map( int, sys.argv[3].split( ',' ) )
+ column_string = sys.argv[3]
+ columns = list()
+ for col in column_string.split(','):
+ if '-' in col:
+ s, e = col.split('-')
+ col = list(range(int(s), int(e) + 1))
+ columns.extend(col)
+ else:
+ columns.append(int(col))
except:
stop_err( "Problem determining columns, perhaps your query does not contain a column of numerical data." )
-
+
matrix = []
skipped_lines = 0
first_invalid_line = 0
invalid_value = ''
invalid_column = 0
- for i, line in enumerate( file( sys.argv[1] ) ):
+ for i, line in enumerate( open( sys.argv[1] ) ):
valid = True
line = line.rstrip('\n\r')
@@ -60,29 +70,32 @@ def main():
first_invalid_line = i+1
if valid:
- matrix.append( row )
+ matrix += row
if skipped_lines < i:
- try:
- out = open( sys.argv[2], "w" )
- except:
- stop_err( "Unable to open output file" )
-
# Run correlation
try:
- value = r.cor( array( matrix ), use="pairwise.complete.obs", method=method )
- except Exception, exc:
- out.close()
- stop_err("%s" %str( exc ))
- for row in value:
- print >> out, "\t".join( map( str, row ) )
- out.close()
+ fv = robjects.FloatVector(matrix)
+ m = r['matrix'](fv, ncol=len(columns),byrow=True)
+ rslt_mat = r.cor(m, use="pairwise.complete.obs", method=method )
+ value = []
+ for ri in range(1, rslt_mat.nrow + 1):
+ row = []
+ for ci in range(1, rslt_mat.ncol + 1):
+ row.append(rslt_mat.rx(ri,ci)[0])
+ value.append(row)
+ except Exception as exc:
+ stop_err("%s" % str( exc ))
+
+ with open( sys.argv[2], "w" ) as out:
+ for row in value:
+ out.write("%s\n" % "\t".join( map( str, row ) ))
if skipped_lines > 0:
msg = "..Skipped %d lines starting with line #%d. " %( skipped_lines, first_invalid_line )
if invalid_value and invalid_column > 0:
msg += "Value '%s' in column %d is not numeric." % ( invalid_value, invalid_column )
- print msg
+ print(msg)
if __name__ == "__main__":
main()
diff --git a/tools/correlation/cor.xml b/tools/correlation/cor.xml
index 5186dbaa..7f08290c 100644
--- a/tools/correlation/cor.xml
+++ b/tools/correlation/cor.xml
@@ -1,33 +1,48 @@
-
- for numeric columns
-
- rpy
-
- cor.py $input1 $out_file1 $numeric_columns $method
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
+
+ for numeric columns
+
+ rpy2
+
+
+ python '$__tool_directory__/cor.py'
+ '$input1'
+ '$out_file1'
+ $numeric_columns
+ $method
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
.. class:: infomark
@@ -97,5 +112,5 @@ This tool computes the matrix of correlation coefficients between numeric column
0.730635686279 1.0
So the correlation for our twenty cases is .73, which is a fairly strong positive relationship.
-
+
diff --git a/tools/correlation/test-data/cor.tabular b/tools/correlation/test-data/cor.tabular
index 035e6f11..53a91de2 100644
--- a/tools/correlation/test-data/cor.tabular
+++ b/tools/correlation/test-data/cor.tabular
@@ -18,4 +18,4 @@ Person Height Self Esteem
1 65 4.1
1 67 3.8
1 63 3.4
-2 61 3.6
\ No newline at end of file
+2 61 3.6
diff --git a/tools/correlation/test-data/cor_out.txt b/tools/correlation/test-data/cor_out.txt
index 376ab447..535cb1b2 100644
--- a/tools/correlation/test-data/cor_out.txt
+++ b/tools/correlation/test-data/cor_out.txt
@@ -1,2 +1,2 @@
-1.0 0.730635686279
-0.730635686279 1.0
+1.0 0.7306356862792351
+0.7306356862792351 1.0
diff --git a/tools/correlation/tool_dependencies.xml b/tools/correlation/tool_dependencies.xml
deleted file mode 100644
index aad4f58b..00000000
--- a/tools/correlation/tool_dependencies.xml
+++ /dev/null
@@ -1,6 +0,0 @@
-
-
-
-
-
-