Skip to content

Commit a71adec

Browse files
committed
medcattrainer export workflow
1 parent 3f7d027 commit a71adec

File tree

1 file changed

+240
-0
lines changed

1 file changed

+240
-0
lines changed
Lines changed: 240 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,240 @@
1+
{
2+
"cells": [
3+
{
4+
"cell_type": "markdown",
5+
"metadata": {},
6+
"source": [
7+
"# Evaluate a MedCATtrainer project export"
8+
]
9+
},
10+
{
11+
"cell_type": "code",
12+
"execution_count": 1,
13+
"metadata": {},
14+
"outputs": [],
15+
"source": [
16+
"import json\n",
17+
"import pandas as pd\n",
18+
"import plotly.graph_objects as go\n",
19+
"from plotly. subplots import make_subplots\n",
20+
"from IPython.display import Image\n",
21+
"from collections import Counter"
22+
]
23+
},
24+
{
25+
"cell_type": "code",
26+
"execution_count": null,
27+
"metadata": {},
28+
"outputs": [],
29+
"source": [
30+
"mct_export = '../../data/medcattrainer_export/'+'' # mct_export .json here\n",
31+
"with open(mct_export, 'r') as jsonfile:\n",
32+
" mct = json.load(jsonfile)"
33+
]
34+
},
35+
{
36+
"cell_type": "code",
37+
"execution_count": null,
38+
"metadata": {},
39+
"outputs": [],
40+
"source": [
41+
"# projects\n",
42+
"for p in mct['projects']:\n",
43+
" print(p['name'])"
44+
]
45+
},
46+
{
47+
"cell_type": "code",
48+
"execution_count": null,
49+
"metadata": {},
50+
"outputs": [],
51+
"source": [
52+
"# documents\n",
53+
"doc_lst = []\n",
54+
"for p in mct['projects']:\n",
55+
" for doc in p['documents']:\n",
56+
" doc_lst.append(doc['name'])\n",
57+
"doc_lst"
58+
]
59+
},
60+
{
61+
"cell_type": "code",
62+
"execution_count": 2,
63+
"metadata": {
64+
"collapsed": true
65+
},
66+
"outputs": [
67+
{
68+
"ename": "NameError",
69+
"evalue": "name 'mct' is not defined",
70+
"traceback": [
71+
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
72+
"\u001b[0;31mNameError\u001b[0m Traceback (most recent call last)",
73+
"\u001b[0;32m/var/folders/31/x5x_6lb14zj9cz75df77dx9h0000gn/T/ipykernel_45904/483071205.py\u001b[0m in \u001b[0;36m<module>\u001b[0;34m\u001b[0m\n\u001b[1;32m 1\u001b[0m \u001b[0;31m# annotations\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 2\u001b[0m \u001b[0mann_lst\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34m[\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 3\u001b[0;31m \u001b[0;32mfor\u001b[0m \u001b[0mp\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mmct\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m'projects'\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 4\u001b[0m \u001b[0;32mfor\u001b[0m \u001b[0mdoc\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mp\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m'documents'\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 5\u001b[0m \u001b[0;32mfor\u001b[0m \u001b[0manns\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mdoc\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m'annotations'\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
74+
"\u001b[0;31mNameError\u001b[0m: name 'mct' is not defined"
75+
],
76+
"output_type": "error"
77+
}
78+
],
79+
"source": [
80+
"# annotations\n",
81+
"ann_lst = []\n",
82+
"for p in mct['projects']:\n",
83+
" for doc in p['documents']:\n",
84+
" for anns in doc['annotations']:\n",
85+
" ann_lst.append(anns)\n",
86+
"ann_lst\n"
87+
]
88+
},
89+
{
90+
"cell_type": "markdown",
91+
"metadata": {},
92+
"source": [
93+
"# Summary format for analysis"
94+
]
95+
},
96+
{
97+
"cell_type": "code",
98+
"execution_count": null,
99+
"metadata": {},
100+
"outputs": [],
101+
"source": [
102+
"\n",
103+
"ann_lst = []\n",
104+
"for p in mct['projects']:\n",
105+
" for doc in p['documents']:\n",
106+
" for anns in doc['annotations']:\n",
107+
" output = dict()\n",
108+
" output['project'] = p['name']\n",
109+
" output['document_name'] = doc['name']\n",
110+
" meta_anns_dict = {}\n",
111+
" for meta_ann in anns['meta_anns'].items():\n",
112+
" meta_anns_dict.update({meta_ann[0]: meta_ann[1]['value']})\n",
113+
" \n",
114+
" anns.pop('meta_anns')\n",
115+
" output.update(anns)\n",
116+
" output.update(meta_anns_dict)\n",
117+
" ann_lst.append(output)\n",
118+
"final_output = pd.DataFrame(ann_lst)\n",
119+
"final_output['last_modified'] = pd.to_datetime(final_output['last_modified'])"
120+
]
121+
},
122+
{
123+
"cell_type": "code",
124+
"execution_count": null,
125+
"metadata": {},
126+
"outputs": [],
127+
"source": [
128+
"final_output"
129+
]
130+
},
131+
{
132+
"cell_type": "code",
133+
"execution_count": null,
134+
"metadata": {},
135+
"outputs": [],
136+
"source": [
137+
"# Counts of annotations\n",
138+
"cui_counts = Counter(final_output['cui'])\n",
139+
"cui_counts"
140+
]
141+
},
142+
{
143+
"cell_type": "markdown",
144+
"metadata": {},
145+
"source": [
146+
"# Make User plots"
147+
]
148+
},
149+
{
150+
"cell_type": "code",
151+
"execution_count": null,
152+
"metadata": {},
153+
"outputs": [],
154+
"source": [
155+
"df = final_output[['user', 'last_modified']]\n",
156+
"data = df.groupby([df['last_modified'].dt.year.rename('year'),\n",
157+
" df['last_modified'].df.month.rename('month'),\n",
158+
" df['last_modified'].dt.day.rename('day'),\n",
159+
" df['user']]).agg({'count'})\n",
160+
"\n",
161+
"data = pd.DataFrame(data)\n",
162+
"data.columns = data.columns.droplevel()\n",
163+
"data = data.reset_index(drop=False)\n",
164+
"data['date'] = pd.datetime(data[['year', 'month', 'day']])"
165+
]
166+
},
167+
{
168+
"cell_type": "code",
169+
"execution_count": null,
170+
"metadata": {},
171+
"outputs": [],
172+
"source": [
173+
"data"
174+
]
175+
},
176+
{
177+
"cell_type": "markdown",
178+
"metadata": {},
179+
"source": [
180+
"## Plot"
181+
]
182+
},
183+
{
184+
"cell_type": "code",
185+
"execution_count": null,
186+
"metadata": {},
187+
"outputs": [],
188+
"source": [
189+
"# annotator work\n",
190+
"\n",
191+
"fig = go.Figure()\n",
192+
"for user in data['user'].unique():\n",
193+
" fig.add_trace(\n",
194+
" go.Bar(x=data[data['user'] == user]['date'], y=data[data['user'] == user]['count'], name=user),\n",
195+
" )\n",
196+
"\n",
197+
"fig.update_layout(tutle = {'text': 'MedCATtrainer Annotator Progress'})\n",
198+
"fig.update_layout(legend_title_text='MedCAT Annotator')\n",
199+
"fig.update_xaxes(title_text='Date')\n",
200+
"fig.update_yaxes(title_text='Annotation Count')\n",
201+
"fig.show()"
202+
]
203+
},
204+
{
205+
"cell_type": "code",
206+
"execution_count": null,
207+
"metadata": {},
208+
"outputs": [],
209+
"source": []
210+
},
211+
{
212+
"cell_type": "code",
213+
"execution_count": null,
214+
"metadata": {},
215+
"outputs": [],
216+
"source": []
217+
}
218+
],
219+
"metadata": {
220+
"kernelspec": {
221+
"display_name": "Python 3 (ipykernel)",
222+
"language": "python",
223+
"name": "python3"
224+
},
225+
"language_info": {
226+
"codemirror_mode": {
227+
"name": "ipython",
228+
"version": 3
229+
},
230+
"file_extension": ".py",
231+
"mimetype": "text/x-python",
232+
"name": "python",
233+
"nbconvert_exporter": "python",
234+
"pygments_lexer": "ipython3",
235+
"version": "3.7.3"
236+
}
237+
},
238+
"nbformat": 4,
239+
"nbformat_minor": 5
240+
}

0 commit comments

Comments
 (0)