Skip to content
This repository was archived by the owner on May 1, 2025. It is now read-only.

Commit 89937dd

Browse files
author
Guangsen Wang
committed
clean streamlit code
1 parent 75ec79e commit 89937dd

File tree

13 files changed

+312
-285
lines changed

13 files changed

+312
-285
lines changed

botsim/modules/remediator/dashboard/dashboard_utils.py

Lines changed: 22 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,13 @@
55

66
import os, json
77
import numpy as np
8-
from botsim.botsim_utils.utils import read_s3_json, dump_s3_file, file_exists, read_s3_data, convert_list_to_dict
8+
from botsim.botsim_utils.utils import (
9+
read_s3_json,
10+
dump_s3_file,
11+
file_exists,
12+
read_s3_data,
13+
convert_list_to_dict,
14+
S3_BUCKET_NAME)
915
from sentence_transformers import SentenceTransformer
1016

1117

@@ -30,10 +36,11 @@ def get_embedding(intents, database, test_id="169", paraphrase=False, para_setti
3036
dev_embedding, dev_labels = np.empty((0, 384)), {"label": []}
3137

3238
if "STORAGE" in os.environ and os.environ["STORAGE"] == "S3":
33-
if file_exists("botsim", goals_dir + "/dev_embedding.npy") and \
34-
file_exists("botsim", goals_dir + "/dev_embedding_label.npy"):
35-
dev_embedding = np.frombuffer(read_s3_data("botsim", goals_dir + "/dev_embedding.npy")).reshape(-1, 384)
36-
dev_labels = read_s3_json("botsim", goals_dir + "/dev_embedding_label.npy")["label"]
39+
if file_exists(S3_BUCKET_NAME, goals_dir + "/dev_embedding.npy") and \
40+
file_exists(S3_BUCKET_NAME, goals_dir + "/dev_embedding_label.npy"):
41+
dev_embedding = np.frombuffer(read_s3_data(S3_BUCKET_NAME, goals_dir + "/dev_embedding.npy")).reshape(-1,
42+
384)
43+
dev_labels = read_s3_json(S3_BUCKET_NAME, goals_dir + "/dev_embedding_label.npy")["label"]
3744
return dev_embedding, dev_labels
3845
else:
3946
if os.path.exists(goals_dir + "/dev_embedding.npy"):
@@ -47,13 +54,13 @@ def get_embedding(intents, database, test_id="169", paraphrase=False, para_setti
4754
for i, intent in enumerate(intents):
4855
file_name = goals_dir + "/" + intent + "_" + para_setting + ".paraphrases.json"
4956
if "STORAGE" in os.environ and os.environ["STORAGE"] == "S3":
50-
if not file_exists("botsim", file_name):
57+
if not file_exists(S3_BUCKET_NAME, file_name):
5158
paraphrase = False
5259
file_name = goals_dir + "/" + intent + ".json"
53-
utterances = read_s3_json("botsim", file_name)[intent]
60+
utterances = read_s3_json(S3_BUCKET_NAME, file_name)[intent]
5461
else:
5562
print("processing", intent)
56-
paras = read_s3_json("botsim", file_name)
63+
paras = read_s3_json(S3_BUCKET_NAME, file_name)
5764
utterances = []
5865
for p in paras:
5966
utterances.append(p["source"])
@@ -65,7 +72,6 @@ def get_embedding(intents, database, test_id="169", paraphrase=False, para_setti
6572
file_name = goals_dir + "/" + intent + ".json"
6673
utterances = json.load(open(file_name))[intent]
6774
else:
68-
print("processing", intent)
6975
paras = json.load(open(file_name))
7076
utterances = []
7177
for p in paras:
@@ -81,7 +87,7 @@ def get_embedding(intents, database, test_id="169", paraphrase=False, para_setti
8187
dump_s3_file(goals_dir + "/dev_embedding.npy", dev_embedding.tobytes())
8288
dump_s3_file(goals_dir + "/dev_embedding_label.npy", bytes(json.dumps(dev_labels, indent=2).encode("UTF-8")))
8389
else:
84-
with open(goals_dir + "/dev_embedding.npy", "wb") as f:
90+
with open(goals_dir + "/dev_embedding.npy", "wb") as f:
8591
np.save(f, dev_embedding, allow_pickle=False)
8692
with open(goals_dir + "/dev_embedding_label.npy", "wb") as f:
8793
np.save(f, dev_labels, allow_pickle=True)
@@ -110,9 +116,9 @@ def get_bot_health_reports(database, test_id):
110116
report_path = "data/bots/{}/{}/aggregated_report.json".format(config["type"], test_id)
111117

112118
if "STORAGE" in os.environ and os.environ["STORAGE"] == "S3":
113-
if not file_exists("botsim", report_path):
119+
if not file_exists(S3_BUCKET_NAME, report_path):
114120
return None, None, None
115-
report = read_s3_json("botsim", report_path)
121+
report = read_s3_json(S3_BUCKET_NAME, report_path)
116122
else:
117123
if not os.path.exists(report_path):
118124
return None, None, None
@@ -130,8 +136,8 @@ def get_entities(database, test_id):
130136
entities = None
131137

132138
if "STORAGE" in os.environ and os.environ["STORAGE"] == "S3":
133-
if file_exists("botsim", entity_path):
134-
entities = read_s3_json("botsim", entity_path)
139+
if file_exists(S3_BUCKET_NAME, entity_path):
140+
entities = read_s3_json(S3_BUCKET_NAME, entity_path)
135141
else:
136142
if os.path.exists(entity_path):
137143
entities = json.load(open(entity_path, "r"))
@@ -153,8 +159,8 @@ def parse_confusion_matrix(database, test_id, mode):
153159
config = dict(database.get_one_bot_test_instance(test_id))
154160
cm_report_path = "data/bots/{}/{}/remediation/cm_{}_report.json".format(config["type"], test_id, mode)
155161

156-
if file_exists("botsim", cm_report_path):
157-
report = read_s3_json("botsim", cm_report_path)
162+
if file_exists(S3_BUCKET_NAME, cm_report_path):
163+
report = read_s3_json(S3_BUCKET_NAME, cm_report_path)
158164
else:
159165
return None, None, None, None, None, None, None
160166
rows = report["cm_table"]["body_row"]

botsim/modules/remediator/dashboard/layout.py

Lines changed: 55 additions & 68 deletions
Original file line numberDiff line numberDiff line change
@@ -8,12 +8,40 @@
88
import botsim.modules.remediator.dashboard.plot as dashboard_plot
99
from streamlit_chat import message
1010

11+
12+
def plot_dialog_performance_banner(overall_performance, F1_scores, selected_intent, mode):
13+
row2_spacer1, row2_1, row2_spacer2, row2_2, row2_spacer3, row2_3, \
14+
row2_spacer4, row2_4, row2_spacer5, row2_5, row2_spacer6, row2_6 = st.columns(
15+
(.8, 2.5, .4, 2.5, .4, 2.5, .4, 2.5, .4, 2.5, .4, 2.5))
16+
17+
intent_performance = overall_performance[mode.lower()][selected_intent.replace("_eval", "")]
18+
row2_1.metric("#Sessions", str(sum(list(intent_performance["overall_performance"].values()))), "")
19+
if F1_scores[selected_intent] < 0.9:
20+
row2_2.metric("F1 score", str(F1_scores[selected_intent]), "", "inverse")
21+
else:
22+
row2_2.metric("F1 score", str(F1_scores[selected_intent]), "Good")
23+
if intent_performance["success_rate"] < 0.7:
24+
row2_3.metric("Goal-completion Rate", str(intent_performance["success_rate"]), "", "inverse")
25+
else:
26+
row2_3.metric("Goal-completion Rate", str(intent_performance["success_rate"]), "")
27+
if intent_performance["intent_error_rate"] > 0.5:
28+
row2_4.metric("Intent Error Rate", str(intent_performance["intent_error_rate"]), "", "inverse")
29+
else:
30+
row2_4.metric("Intent Error Rate", str(intent_performance["intent_error_rate"]), "")
31+
if intent_performance["NER_error_rate"] > 0.5:
32+
row2_5.metric("NER Error Rate", str(intent_performance["NER_error_rate"]), "", "inverse")
33+
else:
34+
row2_5.metric("NER Error Rate", str(intent_performance["NER_error_rate"]), "")
35+
row2_6.metric("Other Error Rate", str(intent_performance["other_error_rate"]), "")
36+
37+
1138
def render_summary_reports(database, mode, test, dataset_info, overall_performance):
1239
row1_spacer1, row1_1, row1_spacer2 = st.columns((.2, 7.1, .2))
1340
with row1_1:
1441
st.header("Bot health reports 📊")
15-
st.markdown("The bot health reports consist of a summary report across all intents and "
16-
"per-intent reports to show both the task-completion and NLU performance.")
42+
st.markdown("The bot health reports comprises 1) a summary report of a simulation session "
43+
"across all intents and 2) "
44+
"intent/dialog-specific reports to show both the task-completion and NLU performance.")
1745
row2_spacer1, row2_1, row2_spacer2 = st.columns((.2, 7.1, .4))
1846
with row2_1:
1947
st.subheader("Performance summary for selected test (test_id={}):".format(test))
@@ -46,36 +74,15 @@ def render_summary_reports(database, mode, test, dataset_info, overall_performan
4674
st.plotly_chart(dashboard_plot.plot_test_performance(intent_to_errors), use_container_width=True)
4775

4876

49-
def render_dialog_report(mode, selected_intent, F1s, overall_performance, detailed_performance):
77+
def render_dialog_report(mode, selected_intent, F1_scores, overall_performance, detailed_performance):
5078
row1_spacer1, row1_1, row1_spacer2 = st.columns((.3, 7.1, .4))
51-
if not F1s:
52-
F1s = {selected_intent: 1.0}
79+
if not F1_scores:
80+
F1_scores = {selected_intent: 1.0}
5381
with row1_1:
5482
st.markdown("---")
5583
st.subheader("Performance report for selected dialog \"" + selected_intent + "\"")
56-
row2_spacer1, row2_1, row2_spacer2, row2_2, row2_spacer3, row2_3, \
57-
row2_spacer4, row2_4, row2_spacer5, row2_5, row2_spacer6, row2_6 = st.columns(
58-
(.8, 2.5, .4, 2.5, .4, 2.5, .4, 2.5, .4, 2.5, .4, 2.5))
5984

60-
intent_performance = overall_performance[mode.lower()][selected_intent.replace("_eval", "")]
61-
row2_1.metric("#Sessions", str(sum(list(intent_performance["overall_performance"].values()))), "")
62-
if F1s[selected_intent] < 0.9:
63-
row2_2.metric("F1 score", str(F1s[selected_intent]), "", "inverse")
64-
else:
65-
row2_2.metric("F1 score", str(F1s[selected_intent]), "Good")
66-
if intent_performance["success_rate"] < 0.7:
67-
row2_3.metric("Goal-completion Rate", str(intent_performance["success_rate"]), "", "inverse")
68-
else:
69-
row2_3.metric("Goal-completion Rate", str(intent_performance["success_rate"]), "")
70-
if intent_performance["intent_error_rate"] > 0.5:
71-
row2_4.metric("Intent Error Rate", str(intent_performance["intent_error_rate"]), "", "inverse")
72-
else:
73-
row2_4.metric("Intent Error Rate", str(intent_performance["intent_error_rate"]), "")
74-
if intent_performance["NER_error_rate"] > 0.5:
75-
row2_5.metric("NER Error Rate", str(intent_performance["NER_error_rate"]), "", "inverse")
76-
else:
77-
row2_5.metric("NER Error Rate", str(intent_performance["NER_error_rate"]), "")
78-
row2_6.metric("Other Error Rate", str(intent_performance["other_error_rate"]), "")
85+
plot_dialog_performance_banner(overall_performance, F1_scores, selected_intent, mode)
7986

8087
st.plotly_chart(
8188
dashboard_plot.plot_intent_performance(
@@ -84,10 +91,10 @@ def render_dialog_report(mode, selected_intent, F1s, overall_performance, detail
8491
use_container_width=True)
8592

8693

87-
def render_remediation(mode, selected_intent, F1s, overall_performance, detailed_performance):
94+
def render_remediation(mode, selected_intent, F1_scores, overall_performance, detailed_performance):
8895
row1_spacer1, row1_1, row1_spacer2 = st.columns((.2, 7.1, .2))
89-
if not F1s:
90-
F1s = {selected_intent: 1.0}
96+
if not F1_scores:
97+
F1_scores = {selected_intent: 1.0}
9198
with row1_1:
9299
st.markdown("---")
93100
st.header("Remediation Suggestions for {} 🛠️".format(selected_intent))
@@ -96,28 +103,7 @@ def render_remediation(mode, selected_intent, F1s, overall_performance, detailed
96103
"They can also be extended by BotSIM users to include domain expertise or bot-specific "
97104
"information. ")
98105

99-
row2_spacer1, row2_1, row2_spacer2, row2_2, row2_spacer3, row2_3, \
100-
row2_spacer4, row2_4, row2_spacer5, row2_5, row2_spacer6, row2_6 = st.columns(
101-
(.8, 2.5, .4, 2.5, .4, 2.5, .4, 2.5, .4, 2.5, .4, 2.5))
102-
intent_performance = overall_performance[mode.lower()][selected_intent.replace("_eval", "")]
103-
row2_1.metric("#Sessions", str(sum(list(intent_performance["overall_performance"].values()))), "")
104-
if F1s[selected_intent] < 0.9:
105-
row2_2.metric("F1 score", str(F1s[selected_intent]), "", "inverse")
106-
else:
107-
row2_2.metric("F1 score", str(F1s[selected_intent]), "Good")
108-
if intent_performance["success_rate"] < 0.7:
109-
row2_3.metric("Goal-completion Rate", str(intent_performance["success_rate"]), "", "inverse")
110-
else:
111-
row2_3.metric("Goal-completion Rate", str(intent_performance["success_rate"]), "")
112-
if intent_performance["intent_error_rate"] > 0.5:
113-
row2_4.metric("Intent Error Rate", str(intent_performance["intent_error_rate"]), "", "inverse")
114-
else:
115-
row2_4.metric("Intent Error Rate", str(intent_performance["intent_error_rate"]), "")
116-
if intent_performance["NER_error_rate"] > 0.5:
117-
row2_5.metric("NER Error Rate", str(intent_performance["NER_error_rate"]), "", "inverse")
118-
else:
119-
row2_5.metric("NER Error Rate", str(intent_performance["NER_error_rate"]), "")
120-
row2_6.metric("Other Error Rate", str(intent_performance["other_error_rate"]), "")
106+
plot_dialog_performance_banner(overall_performance, F1_scores, selected_intent, mode)
121107

122108
row3_spacer1, row3_1, row3_spacer2 = st.columns((.2, 7.1, .2))
123109
with row3_1:
@@ -147,13 +133,13 @@ def render_remediation(mode, selected_intent, F1s, overall_performance, detailed
147133
if len(droplist_labels) > 0:
148134
row4_spacer1, row4_1, row4_spacer2, row4_2, row4_spacer3 = st.columns((.4, 8.3, .4, .4, .2))
149135
with row4_1:
150-
st.markdown("For intent classification models, we show the wrongly predicted paraphrases intent queries "
136+
st.markdown("For intent models, we show the wrongly predicted paraphrases intent queries "
151137
"grouped by their corresponding original"
152138
" training utterances (**sorted in descending order by number of errors**). "
153139
"Detailed analysis can be found on the right hand side expander.")
154140
row5_spacer1, row5_1, row5_spacer2, row5_2, row5_spacer3 = st.columns((.4, 4.3, .4, 4.3, .2))
155141
with row5_1:
156-
utt_selected = st.selectbox("Which utterance do you want to analyze? "
142+
utt_selected = st.selectbox("Which utterance do you want to investigate? "
157143
"(" + str(len(droplist_labels)) + " in total)",
158144
list(droplist_labels), key="utt")
159145
with row5_2:
@@ -213,15 +199,16 @@ def render_remediation(mode, selected_intent, F1s, overall_performance, detailed
213199
st.json(ner_errors)
214200

215201

216-
def render_analytics(database, test, cm_plot, recalls, precisions, F1s, intent_to_clusters, intent_to_supports,
202+
def render_analytics(database, test, cm_plot, recalls, precisions, F1_scores, intent_to_clusters, intent_to_supports,
217203
all_intents):
218204
row1_spacer1, row1_1, row1_spacer2 = st.columns((.2, 7.1, .2))
219205
with row1_1:
220206
st.markdown("---")
221207
st.header("Conversation Analytics ⚙️")
222-
st.markdown("BotSIM also offers analytical tools for helping users gain more insights into their systems. "
208+
st.markdown("Analytical tools for helping users gain insights into their bots for "
209+
"troubleshooting and improvement. "
223210
"These tools include confusion matrix analysis, intent utterance tSNE clustering and "
224-
"bootstrap-based confidence analysis ")
211+
"many more can be added in the layout.")
225212

226213
row2_spacer1, row2_1, row2_spacer2 = st.columns((.4, 7.1, .4))
227214
with row2_1:
@@ -244,24 +231,24 @@ def render_analytics(database, test, cm_plot, recalls, precisions, F1s, intent_t
244231

245232
sorted_recall = dict(sorted(recalls.items(), key=lambda item: -item[1]))
246233
sorted_precision = dict(sorted(precisions.items(), key=lambda item: -item[1]))
247-
sorted_F1 = dict(sorted(F1s.items(), key=lambda item: -item[1]))
234+
sorted_F1 = dict(sorted(F1_scores.items(), key=lambda item: -item[1]))
248235
table = []
249236

250237
if sorted_by == "Sorted by Recall":
251238
for intent in sorted_recall:
252-
precision, recall, F1 = sorted_precision[intent], recalls[intent], F1s[intent]
239+
precision, recall, F1_score = sorted_precision[intent], recalls[intent], F1_scores[intent]
253240
table.append(
254-
[intent, precision, recall, F1, intent_to_supports[intent], intent_to_clusters[intent]])
241+
[intent, precision, recall, F1_score, intent_to_supports[intent], intent_to_clusters[intent]])
255242
elif sorted_by == "Sorted by Precision":
256243
for intent in sorted_precision:
257-
precision, recall, F1 = sorted_precision[intent], recalls[intent], F1s[intent]
244+
precision, recall, F1_score = sorted_precision[intent], recalls[intent], F1_scores[intent]
258245
table.append(
259-
[intent, precision, recall, F1, intent_to_supports[intent], intent_to_clusters[intent]])
246+
[intent, precision, recall, F1_score, intent_to_supports[intent], intent_to_clusters[intent]])
260247
else:
261248
for intent in sorted_F1:
262-
precision, recall, F1 = sorted_precision[intent], recalls[intent], F1s[intent]
249+
precision, recall, F1_score = sorted_precision[intent], recalls[intent], F1_scores[intent]
263250
table.append(
264-
[intent, precision, recall, F1, intent_to_supports[intent], intent_to_clusters[intent]])
251+
[intent, precision, recall, F1_score, intent_to_supports[intent], intent_to_clusters[intent]])
265252

266253
row4_spacer1, row4_1, row4_2, row4_3, row4_4, row4_5, row4_6, row4_spacer2 = st.columns(
267254
(2.3, 1.5, 1.5, 1.5, 1.5, 1.5, 1.5, 0.5))
@@ -290,10 +277,10 @@ def render_analytics(database, test, cm_plot, recalls, precisions, F1s, intent_t
290277
with row5_1:
291278
st.markdown("---")
292279
st.subheader("tSNE visualisation of intent training utterances")
293-
st.markdown("To gauge the quality of the intent training utterances and identify intent overlaps, "
294-
"tSNE clustering is performed based on the sentence transformer embeddings of the intent training "
280+
st.markdown("To gauge the intent training data quality, "
281+
"tSNE clustering is performed on the sentence transformer embeddings of the intent training "
295282
"utterances. "
296-
"By examining the clusters, not only can users find intents with significant overlap in training "
283+
"Not only can the clustering identify intents with significant overlap in training "
297284
"data semantic space, "
298-
"they can also potentially discover novel intents from production logs to aid dialog re-design.")
285+
"it can also potentially discover novel intents from production logs to aid dialog re-design.")
299286
st.plotly_chart(dashboard_plot.plot_tSNE(all_intents, database, test), use_container_width=True)

botsim/modules/remediator/dashboard/plot.py

Lines changed: 4 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -165,24 +165,21 @@ def plot_intent_performance(intent, mode, overall_performance, detailed_performa
165165
ner_errors = detailed_performance[mode][intent.replace("_eval", "")]["ner_errors"]
166166
intent_predictions = overall_performance[mode][intent.replace("_eval", "")]["intent_predictions"]
167167

168-
import plotly.graph_objects as go
169-
from plotly.subplots import make_subplots
170-
171168
prediction_labels, prediction_counts = [], []
172169
for p in intent_predictions:
173170
prediction_labels.append(p)
174171
prediction_counts.append(intent_predictions[p])
175172

176173
entity_labels, entity_counts = [], []
177174
for ent in ner_errors:
178-
type = ner_errors[ent]["extraction_type"]
175+
extraction_type = ner_errors[ent]["extraction_type"]
179176
if "pattern" in ner_errors[ent]:
180177
pattern = ner_errors[ent]["pattern"]
181-
if type == "UNK": continue
182-
if type == "regex":
178+
if extraction_type == "UNK": continue
179+
if extraction_type == "regex":
183180
entity_labels.append(ner_errors[ent]["entity_name"] + " (" + pattern + ")")
184181
else:
185-
entity_labels.append(ner_errors[ent]["entity_name"] + " extracted via " + type)
182+
entity_labels.append(ner_errors[ent]["entity_name"] + " extracted via " + extraction_type)
186183
count = 0
187184
if "missed" in ner_errors[ent]:
188185
count += len(ner_errors[ent]["missed"])

0 commit comments

Comments
 (0)