diff --git a/.tt_skip b/.tt_skip index d931e7ca..10b65eb8 100644 --- a/.tt_skip +++ b/.tt_skip @@ -55,7 +55,6 @@ tools/rmap tools/weightedaverage tools/annotation_profiler tools/megablast_xml_parser -tools/correlation tools/merge_cols tools/microsats_alignment_level tools/bowtie_color_wrappers diff --git a/tools/correlation/cor.py b/tools/correlation/cor.py index 881bd951..da9c117d 100644 --- a/tools/correlation/cor.py +++ b/tools/correlation/cor.py @@ -6,7 +6,9 @@ """ import sys -from rpy import * +import rpy2.robjects as robjects +r = robjects.r + def stop_err(msg): sys.stderr.write(msg) @@ -17,17 +19,25 @@ def main(): assert method in ( "pearson", "kendall", "spearman" ) try: - columns = map( int, sys.argv[3].split( ',' ) ) + column_string = sys.argv[3] + columns = list() + for col in column_string.split(','): + if '-' in col: + s, e = col.split('-') + col = list(range(int(s), int(e) + 1)) + columns.extend(col) + else: + columns.append(int(col)) except: stop_err( "Problem determining columns, perhaps your query does not contain a column of numerical data." ) - + matrix = [] skipped_lines = 0 first_invalid_line = 0 invalid_value = '' invalid_column = 0 - for i, line in enumerate( file( sys.argv[1] ) ): + for i, line in enumerate( open( sys.argv[1] ) ): valid = True line = line.rstrip('\n\r') @@ -60,29 +70,32 @@ def main(): first_invalid_line = i+1 if valid: - matrix.append( row ) + matrix += row if skipped_lines < i: - try: - out = open( sys.argv[2], "w" ) - except: - stop_err( "Unable to open output file" ) - # Run correlation try: - value = r.cor( array( matrix ), use="pairwise.complete.obs", method=method ) - except Exception, exc: - out.close() - stop_err("%s" %str( exc )) - for row in value: - print >> out, "\t".join( map( str, row ) ) - out.close() + fv = robjects.FloatVector(matrix) + m = r['matrix'](fv, ncol=len(columns),byrow=True) + rslt_mat = r.cor(m, use="pairwise.complete.obs", method=method ) + value = [] + for ri in range(1, rslt_mat.nrow + 1): + row = [] + for ci in range(1, rslt_mat.ncol + 1): + row.append(rslt_mat.rx(ri,ci)[0]) + value.append(row) + except Exception as exc: + stop_err("%s" % str( exc )) + + with open( sys.argv[2], "w" ) as out: + for row in value: + out.write("%s\n" % "\t".join( map( str, row ) )) if skipped_lines > 0: msg = "..Skipped %d lines starting with line #%d. " %( skipped_lines, first_invalid_line ) if invalid_value and invalid_column > 0: msg += "Value '%s' in column %d is not numeric." % ( invalid_value, invalid_column ) - print msg + print(msg) if __name__ == "__main__": main() diff --git a/tools/correlation/cor.xml b/tools/correlation/cor.xml index 5186dbaa..7f08290c 100644 --- a/tools/correlation/cor.xml +++ b/tools/correlation/cor.xml @@ -1,33 +1,48 @@ - - for numeric columns - - rpy - - cor.py $input1 $out_file1 $numeric_columns $method - - - - - - - - - - - - - - - - - - - - - - + + for numeric columns + + rpy2 + + + python '$__tool_directory__/cor.py' + '$input1' + '$out_file1' + $numeric_columns + $method + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + .. class:: infomark @@ -97,5 +112,5 @@ This tool computes the matrix of correlation coefficients between numeric column 0.730635686279 1.0 So the correlation for our twenty cases is .73, which is a fairly strong positive relationship. - + diff --git a/tools/correlation/test-data/cor.tabular b/tools/correlation/test-data/cor.tabular index 035e6f11..53a91de2 100644 --- a/tools/correlation/test-data/cor.tabular +++ b/tools/correlation/test-data/cor.tabular @@ -18,4 +18,4 @@ Person Height Self Esteem 1 65 4.1 1 67 3.8 1 63 3.4 -2 61 3.6 \ No newline at end of file +2 61 3.6 diff --git a/tools/correlation/test-data/cor_out.txt b/tools/correlation/test-data/cor_out.txt index 376ab447..535cb1b2 100644 --- a/tools/correlation/test-data/cor_out.txt +++ b/tools/correlation/test-data/cor_out.txt @@ -1,2 +1,2 @@ -1.0 0.730635686279 -0.730635686279 1.0 +1.0 0.7306356862792351 +0.7306356862792351 1.0 diff --git a/tools/correlation/tool_dependencies.xml b/tools/correlation/tool_dependencies.xml deleted file mode 100644 index aad4f58b..00000000 --- a/tools/correlation/tool_dependencies.xml +++ /dev/null @@ -1,6 +0,0 @@ - - - - - -