From 4ee18a7b548bcbb951b3fbc09fb51ca62898aea4 Mon Sep 17 00:00:00 2001 From: milkschen Date: Wed, 25 Mar 2020 22:00:15 -0400 Subject: [PATCH 1/2] fixed issues with python3 and latest version of matplotlib; this works for me with Python 3.6.10 and matplotlib 3.0.3 --- varmatch | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/varmatch b/varmatch index de4cd82..2cfbd86 100755 --- a/varmatch +++ b/varmatch @@ -21,7 +21,8 @@ limitations under the License. Chen Sun(chensun@cse.psu.edu) Paul Medvedev(pashadag@cse.psu.edu) """ -from __future__ import print_function +# from __future__ import print_function +exec('from __future__ import print_function') import sys versionError = "You are using an old version of python, please upgrade to python 2.7+\n" if sys.hexversion < 0x02070000: @@ -518,7 +519,7 @@ def create_stat_html(query_list, output_prefix_list): plt.clf() colormap = plt.cm.gist_ncar - plt.gca().set_color_cycle([colormap(k) for k in np.linspace(0, 0.9, len(parameter_table))]) + plt.gca().set_prop_cycle(color = [colormap(k) for k in np.linspace(0, 0.9, len(parameter_table))]) for j in range(len(parameter_table)-1): x = sensitivity_table_list[i][j] From 2daf43d9910c329b779fdea07e7b67ebfb0e2d4e Mon Sep 17 00:00:00 2001 From: milkschen Date: Wed, 25 Mar 2020 22:36:06 -0400 Subject: [PATCH 2/2] uses flexible ylim (lower part); added --label feature so that the labels of figs and html tables can be specified by user --- varmatch | 40 ++++++++++++++++++++++++++++++---------- 1 file changed, 30 insertions(+), 10 deletions(-) diff --git a/varmatch b/varmatch index 2cfbd86..7b3213d 100755 --- a/varmatch +++ b/varmatch @@ -57,6 +57,7 @@ citation = 'Please cite our paper.' parser = argparse.ArgumentParser(prog="varmatch", epilog = citation, formatter_class=lambda prog: SmartFormatter(prog,max_help_position=8)) parser.add_argument('-b', '--baseline', required=True, metavar='File', help = 'baseline variant VCF filename') parser.add_argument('-q', '--query', nargs='+', metavar='File List', help = 'query variant VCF filename') +parser.add_argument('-l', '--label', nargs='+', metavar='Label List', help = 'query variant VCF label showing in figures', default=None) parser.add_argument('-g', '--genome', required=True, metavar='File', help= 'genome sequence FASTA filename') parser.add_argument('-o', '--output', help='output directory', metavar='DIRECTORY',default='./output') @@ -326,7 +327,7 @@ def parse_stat(output_prefix): return baseline_num, query_num, x, y, z, no_filter_table, sensitivity_table, specificity_table -def create_table_by_matchingid_from_by_query(table_list, matching_list, query_number): +def create_table_by_matchingid_from_by_query(table_list, matching_list, query_number, query_label_list): table_by_matchingid = [] for matching_index in range(len(matching_list)): matching_table = [] @@ -334,7 +335,10 @@ def create_table_by_matchingid_from_by_query(table_list, matching_list, query_nu matching_table.append(title) for table_index in range(len(table_list)): raw_row = table_list[table_index][matching_index] - new_row = ['Query' + str(table_index+1)] + if query_label_list: + new_row = [query_label_list[table_index]] + else: + new_row = ['Query' + str(table_index+1)] new_row += raw_row[4:] matching_table.append(new_row) table_by_matchingid.append(matching_table) @@ -342,7 +346,7 @@ def create_table_by_matchingid_from_by_query(table_list, matching_list, query_nu # all html and picture are created from stat file, not parameters -def create_stat_html(query_list, output_prefix_list): +def create_stat_html(query_list, output_prefix_list, query_label_list): global output_dir html_filename = output_dir + '/stat.html' html_file = open(html_filename, 'w') @@ -385,15 +389,21 @@ def create_stat_html(query_list, output_prefix_list): import matplotlib.pyplot as plt axes = plt.gca() #axes.set_xlim([xmin,xmax]) - axes.set_ylim([0,100]) + # axes.set_ylim([0,100]) + min_sensitivity = 100 for i in range(exp_num): marker_id = i % len(marker_list) marker_sign = marker_list[marker_id] - label_sign = 'Query ' + str(i+1) + if query_label_list: + label_sign = query_label_list[i] + else: + label_sign = 'Query ' + str(i+1) x = np.array(range(len(label_list[0]))) plt.xticks(x, label_list[0]) plt.plot(x, sensitivity_list[i], marker = marker_sign, linestyle = '-', label = label_sign) + min_sensitivity = min(min_sensitivity, min(sensitivity_list[i])) + axes.set_ylim([min_sensitivity * 0.9,100]) plt.xlabel('Matching Id') plt.ylabel('Recall(%)') #plt.title('Sensitivity of Queries under Different Matching Parameters') @@ -404,14 +414,20 @@ def create_stat_html(query_list, output_prefix_list): axes = plt.gca() #axes.set_xlim([xmin,xmax]) axes.set_ylim([0,100]) + min_precision = 100 for i in range(exp_num): marker_id = i % len(marker_list) marker_sign = marker_list[marker_id] - label_sign = 'Query ' + str(i+1) + if query_label_list: + label_sign = query_label_list[i] + else: + label_sign = 'Query ' + str(i+1) x = np.array(range(len(label_list[0]))) plt.xticks(x, label_list[0]) plt.plot(x, specificity_list[i], marker = marker_sign, linestyle = '-', label = label_sign) + min_precision = min(min_precision, min(specificity_list[i])) + axes.set_ylim([min_precision * 0.9,100]) plt.xlabel('Matching Id') plt.ylabel('Precision(%)') #plt.title('Specificity of Queries under Different Matching Parameters') @@ -449,7 +465,7 @@ def create_stat_html(query_list, output_prefix_list): # sensitivity and specificity analysis by matching id html_file.write('

') - table_by_matchingid = create_table_by_matchingid_from_by_query(table_list, label_list[0], exp_num) + table_by_matchingid = create_table_by_matchingid_from_by_query(table_list, label_list[0], exp_num, query_label_list) for i in range(len(label_list[0])): html_file.write('
') html_file.write('

Matching Id: ' + label_list[0][i] + '

'+'\n') @@ -495,7 +511,10 @@ def create_stat_html(query_list, output_prefix_list): x.sort() y.sort(reverse=True) y = y[::-1] - label_sign = 'Query ' + str(j+1) + if query_label_list: + label_sign = query_label_list[j] + else: + label_sign = 'Query ' + str(j+1) plt.plot(x,y, label = label_sign) #x = [0.0, 1.0] @@ -584,8 +603,9 @@ def main(): temp_dir = output_dir + '/temp' query_list = args.query + label_list = args.label - multiple_compare(args.baseline, query_list, args.genome) + # multiple_compare(args.baseline, query_list, args.genome) if args.score_scheme == '3': exit() @@ -594,7 +614,7 @@ def main(): for i in range(len(query_list)): output_prefix_list.append('query'+str(i+1)) - create_stat_html(query_list, output_prefix_list) + create_stat_html(query_list, output_prefix_list, label_list) if __name__ == '__main__': main()