|
1 | 1 | #!/usr/bin/env python3 |
2 | 2 | # -*- coding: utf-8 -*- |
3 | 3 |
|
| 4 | +import argparse |
4 | 5 | import json |
5 | 6 | import os |
6 | | -import argparse |
7 | | -import yaml |
8 | 7 |
|
9 | | -from pythainlp.benchmarks import word_tokenisation |
| 8 | +import yaml |
| 9 | +from pythainlp.benchmarks import word_tokenization |
10 | 10 |
|
11 | 11 | parser = argparse.ArgumentParser( |
12 | 12 | description="Script for benchmarking tokenizaiton results" |
13 | 13 | ) |
14 | 14 |
|
15 | 15 | parser.add_argument( |
16 | | - "--input", |
| 16 | + "--input-file", |
17 | 17 | action="store", |
18 | | - help="path to file that you want to compare against the test file" |
| 18 | + help="Path to input file to compare against the test file", |
19 | 19 | ) |
20 | 20 |
|
21 | 21 | parser.add_argument( |
22 | 22 | "--test-file", |
23 | 23 | action="store", |
24 | | - help="path to test file" |
| 24 | + help="Path to test file i.e. ground truth", |
25 | 25 | ) |
26 | 26 |
|
27 | 27 | parser.add_argument( |
28 | 28 | "--save-details", |
29 | 29 | default=False, |
30 | | - action='store_true', |
31 | | - help="specify whether to save the details of comparisons" |
| 30 | + action="store_true", |
| 31 | + help="Save comparison details to files (eval-XXX.json and eval-details-XXX.json)", |
32 | 32 | ) |
33 | 33 |
|
34 | 34 | args = parser.parse_args() |
35 | 35 |
|
| 36 | + |
36 | 37 | def _read_file(path): |
37 | 38 | with open(path, "r", encoding="utf-8") as f: |
38 | 39 | lines = map(lambda r: r.strip(), f.readlines()) |
39 | 40 | return list(lines) |
40 | 41 |
|
41 | 42 |
|
42 | | -print(args.input) |
43 | | -actual = _read_file(args.input) |
| 43 | +print(args.input_file) |
| 44 | +actual = _read_file(args.input_file) |
44 | 45 | expected = _read_file(args.test_file) |
45 | 46 |
|
46 | | -assert len(actual) == len(expected), \ |
47 | | - 'Input and test files do not have the same number of samples' |
48 | | -print('Benchmarking %s against %s with %d samples in total' % ( |
49 | | - args.input, args.test_file, len(actual) |
50 | | -)) |
51 | | - |
52 | | -df_raw = word_tokenisation.benchmark(expected, actual) |
53 | | - |
54 | | -df_res = df_raw\ |
55 | | - .describe() |
56 | | -df_res = df_res[[ |
57 | | - 'char_level:tp', |
58 | | - 'char_level:tn', |
59 | | - 'char_level:fp', |
60 | | - 'char_level:fn', |
61 | | - 'char_level:precision', |
62 | | - 'char_level:recall', |
63 | | - 'char_level:f1', |
64 | | - 'word_level:precision', |
65 | | - 'word_level:recall', |
66 | | - 'word_level:f1', |
67 | | -]] |
| 47 | +assert len(actual) == len( |
| 48 | + expected |
| 49 | +), "Input and test files do not have the same number of samples" |
| 50 | +print( |
| 51 | + "Benchmarking %s against %s with %d samples in total" |
| 52 | + % (args.input_file, args.test_file, len(actual)) |
| 53 | +) |
| 54 | + |
| 55 | +df_raw = word_tokenization.benchmark(expected, actual) |
| 56 | + |
| 57 | +df_res = df_raw.describe() |
| 58 | +df_res = df_res[ |
| 59 | + [ |
| 60 | + "char_level:tp", |
| 61 | + "char_level:tn", |
| 62 | + "char_level:fp", |
| 63 | + "char_level:fn", |
| 64 | + "char_level:precision", |
| 65 | + "char_level:recall", |
| 66 | + "char_level:f1", |
| 67 | + "word_level:precision", |
| 68 | + "word_level:recall", |
| 69 | + "word_level:f1", |
| 70 | + ] |
| 71 | +] |
68 | 72 |
|
69 | 73 | df_res = df_res.T.reset_index(0) |
70 | 74 |
|
71 | | -df_res['mean±std'] = df_res.apply( |
72 | | - lambda r: '%2.2f±%2.2f' % (r['mean'], r['std']), |
73 | | - axis=1 |
| 75 | +df_res["mean±std"] = df_res.apply( |
| 76 | + lambda r: "%2.2f±%2.2f" % (r["mean"], r["std"]), axis=1 |
74 | 77 | ) |
75 | 78 |
|
76 | | -df_res['metric'] = df_res['index'] |
| 79 | +df_res["metric"] = df_res["index"] |
77 | 80 |
|
78 | 81 | print("============== Benchmark Result ==============") |
79 | | -print(df_res[['metric', 'mean±std', 'min', 'max']].to_string(index=False)) |
80 | | - |
| 82 | +print(df_res[["metric", "mean±std", "min", "max"]].to_string(index=False)) |
81 | 83 |
|
82 | 84 |
|
83 | 85 | if args.save_details: |
84 | 86 | data = {} |
85 | | - for r in df_res.to_dict('records'): |
86 | | - metric = r['index'] |
87 | | - del r['index'] |
| 87 | + for r in df_res.to_dict("records"): |
| 88 | + metric = r["index"] |
| 89 | + del r["index"] |
88 | 90 | data[metric] = r |
89 | 91 |
|
90 | | - dir_name = os.path.dirname(args.input) |
91 | | - file_name = args.input.split("/")[-1].split(".")[0] |
| 92 | + dir_name = os.path.dirname(args.input_file) |
| 93 | + file_name = args.input_file.split("/")[-1].split(".")[0] |
92 | 94 |
|
93 | 95 | res_path = "%s/eval-%s.yml" % (dir_name, file_name) |
94 | 96 | print("Evaluation result is saved to %s" % res_path) |
95 | 97 |
|
96 | | - with open(res_path, 'w') as outfile: |
| 98 | + with open(res_path, "w", encoding="utf-8") as outfile: |
97 | 99 | yaml.dump(data, outfile, default_flow_style=False) |
98 | 100 |
|
99 | 101 | res_path = "%s/eval-details-%s.json" % (dir_name, file_name) |
100 | 102 | print("Details of comparisons is saved to %s" % res_path) |
101 | 103 |
|
102 | | - with open(res_path, "w") as f: |
| 104 | + with open(res_path, "w", encoding="utf-8") as f: |
103 | 105 | samples = [] |
104 | 106 | for i, r in enumerate(df_raw.to_dict("records")): |
105 | 107 | expected, actual = r["expected"], r["actual"] |
106 | 108 | del r["expected"] |
107 | 109 | del r["actual"] |
108 | 110 |
|
109 | | - samples.append(dict( |
110 | | - metrics=r, |
111 | | - expected=expected, |
112 | | - actual=actual, |
113 | | - id=i |
114 | | - )) |
115 | | - |
116 | | - details = dict( |
117 | | - metrics=data, |
118 | | - samples=samples |
119 | | - ) |
| 111 | + samples.append(dict(metrics=r, expected=expected, actual=actual, id=i)) |
| 112 | + |
| 113 | + details = dict(metrics=data, samples=samples) |
120 | 114 |
|
121 | 115 | json.dump(details, f, ensure_ascii=False) |
0 commit comments