Skip to content

Commit 4460a35

Browse files
author
Charin
authored
Merge pull request #369 from PyThaiNLP/fix-tokenization-benchmark
[WIP] Fix tokenization benchmark statistics
2 parents 37fd7da + bec416f commit 4460a35

File tree

2 files changed

+38
-37
lines changed

2 files changed

+38
-37
lines changed

bin/word-tokenization-benchmark

Lines changed: 35 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -54,49 +54,57 @@ print(
5454

5555
df_raw = word_tokenization.benchmark(expected, actual)
5656

57-
df_res = df_raw.describe()
58-
df_res = df_res[
59-
[
60-
"char_level:tp",
61-
"char_level:tn",
62-
"char_level:fp",
63-
"char_level:fn",
64-
"char_level:precision",
65-
"char_level:recall",
66-
"char_level:f1",
67-
"word_level:precision",
68-
"word_level:recall",
69-
"word_level:f1",
70-
]
57+
58+
columns = [
59+
"char_level:tp",
60+
"char_level:fp",
61+
"char_level:tn",
62+
"char_level:fn",
63+
"word_level:correctly_tokenised_words",
64+
"word_level:total_words_in_sample",
65+
"word_level:total_words_in_ref_sample",
7166
]
7267

73-
df_res = df_res.T.reset_index(0)
68+
statistics = dict()
69+
70+
for c in columns:
71+
statistics[c] = float(df_raw[c].sum())
72+
73+
statistics["char_level:precision"] = statistics["char_level:tp"] / (
74+
statistics["char_level:tp"] + statistics["char_level:fp"]
75+
)
7476

75-
df_res["mean±std"] = df_res.apply(
76-
lambda r: "%2.2f±%2.2f" % (r["mean"], r["std"]), axis=1
77+
statistics["char_level:recall"] = statistics["char_level:tp"] / (
78+
statistics["char_level:tp"] + statistics["char_level:fn"]
7779
)
7880

79-
df_res["metric"] = df_res["index"]
81+
statistics["word_level:precision"] = statistics["word_level:correctly_tokenised_words"] \
82+
/ statistics["word_level:total_words_in_sample"]
83+
84+
statistics["word_level:recall"] = statistics["word_level:correctly_tokenised_words"] \
85+
/ statistics["word_level:total_words_in_ref_sample"]
8086

8187
print("============== Benchmark Result ==============")
82-
print(df_res[["metric", "mean±std", "min", "max"]].to_string(index=False))
8388

89+
for c in ["tp", "fn", "tn", "fp", "precision", "recall"]:
90+
c = f"char_level:{c}"
91+
v = statistics[c]
92+
print(f"{c:>40s} {v:.4f}")
8493

85-
if args.save_details:
86-
data = {}
87-
for r in df_res.to_dict("records"):
88-
metric = r["index"]
89-
del r["index"]
90-
data[metric] = r
94+
for c in ["total_words_in_sample", "total_words_in_ref_sample", "correctly_tokenised_words", "precision", "recall"]:
95+
c = f"word_level:{c}"
96+
v = statistics[c]
97+
print(f"{c:>40s} {v:.4f}")
9198

99+
if args.save_details:
92100
dir_name = os.path.dirname(args.input_file)
93101
file_name = args.input_file.split("/")[-1].split(".")[0]
94102

95103
res_path = "%s/eval-%s.yml" % (dir_name, file_name)
96104
print("Evaluation result is saved to %s" % res_path)
97105

98106
with open(res_path, "w", encoding="utf-8") as outfile:
99-
yaml.dump(data, outfile, default_flow_style=False)
107+
yaml.dump(statistics, outfile, default_flow_style=False)
100108

101109
res_path = "%s/eval-details-%s.json" % (dir_name, file_name)
102110
print("Details of comparisons is saved to %s" % res_path)
@@ -110,6 +118,6 @@ if args.save_details:
110118

111119
samples.append(dict(metrics=r, expected=expected, actual=actual, id=i))
112120

113-
details = dict(metrics=data, samples=samples)
121+
details = dict(metrics=statistics, samples=samples)
114122

115123
json.dump(details, f, ensure_ascii=False)

pythainlp/benchmarks/word_tokenization.py

Lines changed: 3 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -185,10 +185,6 @@ def compute_stats(ref_sample: str, raw_sample: str) -> dict:
185185

186186
correctly_tokenised_words = np.sum(tokenization_indicators)
187187

188-
w_precision = correctly_tokenised_words / np.sum(sample)
189-
w_recall = correctly_tokenised_words / np.sum(ref_sample)
190-
w_f1 = _f1(w_precision, w_recall)
191-
192188
tokenization_indicators = list(
193189
map(lambda x: str(x), tokenization_indicators)
194190
)
@@ -199,14 +195,11 @@ def compute_stats(ref_sample: str, raw_sample: str) -> dict:
199195
"fp": c_fp,
200196
"tn": c_tn,
201197
"fn": c_fn,
202-
"precision": c_precision,
203-
"recall": c_recall,
204-
"f1": c_f1,
205198
},
206199
"word_level": {
207-
"precision": w_precision,
208-
"recall": w_recall,
209-
"f1": w_f1,
200+
"correctly_tokenised_words": correctly_tokenised_words,
201+
"total_words_in_sample": np.sum(sample),
202+
"total_words_in_ref_sample": np.sum(ref_sample)
210203
},
211204
"global": {
212205
"tokenisation_indicators": "".join(tokenization_indicators)

0 commit comments

Comments
 (0)