@@ -54,49 +54,57 @@ print(
5454
5555df_raw = word_tokenization .benchmark (expected , actual )
5656
57- df_res = df_raw .describe ()
58- df_res = df_res [
59- [
60- "char_level:tp" ,
61- "char_level:tn" ,
62- "char_level:fp" ,
63- "char_level:fn" ,
64- "char_level:precision" ,
65- "char_level:recall" ,
66- "char_level:f1" ,
67- "word_level:precision" ,
68- "word_level:recall" ,
69- "word_level:f1" ,
70- ]
57+
58+ columns = [
59+ "char_level:tp" ,
60+ "char_level:fp" ,
61+ "char_level:tn" ,
62+ "char_level:fn" ,
63+ "word_level:correctly_tokenised_words" ,
64+ "word_level:total_words_in_sample" ,
65+ "word_level:total_words_in_ref_sample" ,
7166]
7267
73- df_res = df_res .T .reset_index (0 )
68+ statistics = dict ()
69+
70+ for c in columns :
71+ statistics [c ] = float (df_raw [c ].sum ())
72+
73+ statistics ["char_level:precision" ] = statistics ["char_level:tp" ] / (
74+ statistics ["char_level:tp" ] + statistics ["char_level:fp" ]
75+ )
7476
75- df_res [ "mean±std " ] = df_res . apply (
76- lambda r : "%2.2f±%2.2f" % ( r [ "mean" ], r [ "std" ]), axis = 1
77+ statistics [ "char_level:recall " ] = statistics [ "char_level:tp" ] / (
78+ statistics [ "char_level:tp" ] + statistics [ "char_level:fn" ]
7779)
7880
79- df_res ["metric" ] = df_res ["index" ]
81+ statistics ["word_level:precision" ] = statistics ["word_level:correctly_tokenised_words" ] \
82+ / statistics ["word_level:total_words_in_sample" ]
83+
84+ statistics ["word_level:recall" ] = statistics ["word_level:correctly_tokenised_words" ] \
85+ / statistics ["word_level:total_words_in_ref_sample" ]
8086
8187print ("============== Benchmark Result ==============" )
82- print (df_res [["metric" , "mean±std" , "min" , "max" ]].to_string (index = False ))
8388
89+ for c in ["tp" , "fn" , "tn" , "fp" , "precision" , "recall" ]:
90+ c = f"char_level:{ c } "
91+ v = statistics [c ]
92+ print (f"{ c :>40s} { v :.4f} " )
8493
85- if args .save_details :
86- data = {}
87- for r in df_res .to_dict ("records" ):
88- metric = r ["index" ]
89- del r ["index" ]
90- data [metric ] = r
94+ for c in ["total_words_in_sample" , "total_words_in_ref_sample" , "correctly_tokenised_words" , "precision" , "recall" ]:
95+ c = f"word_level:{ c } "
96+ v = statistics [c ]
97+ print (f"{ c :>40s} { v :.4f} " )
9198
99+ if args .save_details :
92100 dir_name = os .path .dirname (args .input_file )
93101 file_name = args .input_file .split ("/" )[- 1 ].split ("." )[0 ]
94102
95103 res_path = "%s/eval-%s.yml" % (dir_name , file_name )
96104 print ("Evaluation result is saved to %s" % res_path )
97105
98106 with open (res_path , "w" , encoding = "utf-8" ) as outfile :
99- yaml .dump (data , outfile , default_flow_style = False )
107+ yaml .dump (statistics , outfile , default_flow_style = False )
100108
101109 res_path = "%s/eval-details-%s.json" % (dir_name , file_name )
102110 print ("Details of comparisons is saved to %s" % res_path )
@@ -110,6 +118,6 @@ if args.save_details:
110118
111119 samples .append (dict (metrics = r , expected = expected , actual = actual , id = i ))
112120
113- details = dict (metrics = data , samples = samples )
121+ details = dict (metrics = statistics , samples = samples )
114122
115123 json .dump (details , f , ensure_ascii = False )
0 commit comments