Skip to content

Commit bc094cc

Browse files
committed
cogstack search scripts for large data extractions to df
1 parent 8884ca7 commit bc094cc

File tree

1 file changed

+220
-0
lines changed

1 file changed

+220
-0
lines changed

search/search_template.ipynb

Lines changed: 220 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,220 @@
1+
{
2+
"cells": [
3+
{
4+
"cell_type": "markdown",
5+
"metadata": {},
6+
"source": [
7+
"# Seaching CogStack\n",
8+
"\n",
9+
"This script is designed to be a template for cogstack searches"
10+
]
11+
},
12+
{
13+
"cell_type": "code",
14+
"execution_count": 2,
15+
"metadata": {},
16+
"outputs": [],
17+
"source": [
18+
"import sys\n",
19+
"sys.path.append('..')\n",
20+
"from credentials import *\n",
21+
"from cogstack import CogStack"
22+
]
23+
},
24+
{
25+
"cell_type": "markdown",
26+
"metadata": {},
27+
"source": [
28+
"# Login and Initialise"
29+
]
30+
},
31+
{
32+
"cell_type": "code",
33+
"execution_count": null,
34+
"metadata": {},
35+
"outputs": [],
36+
"source": [
37+
"cs = CogStack(hosts, username, password, api=True)"
38+
]
39+
},
40+
{
41+
"cell_type": "markdown",
42+
"metadata": {},
43+
"source": [
44+
"# Check the list of Indices and columns"
45+
]
46+
},
47+
{
48+
"cell_type": "code",
49+
"execution_count": null,
50+
"metadata": {},
51+
"outputs": [],
52+
"source": [
53+
"for i in cs.elastic.indices.get_mapping().keys():\n",
54+
" print(i)"
55+
]
56+
},
57+
{
58+
"cell_type": "code",
59+
"execution_count": null,
60+
"metadata": {},
61+
"outputs": [],
62+
"source": [
63+
"# Check the list of columns in that index\n",
64+
"index = ''\n",
65+
"for col in cs.elastic.indices.get_mapping(index=index)[index]['mappings']['properties'].keys():\n",
66+
" print(col)"
67+
]
68+
},
69+
{
70+
"cell_type": "markdown",
71+
"metadata": {},
72+
"source": [
73+
"# Set parameters"
74+
]
75+
},
76+
{
77+
"cell_type": "code",
78+
"execution_count": null,
79+
"metadata": {},
80+
"outputs": [],
81+
"source": [
82+
"pt_list = [] # example list of patients' patient_TrustNumber here"
83+
]
84+
},
85+
{
86+
"cell_type": "markdown",
87+
"metadata": {},
88+
"source": [
89+
"## Columns of interest\n",
90+
"\n",
91+
"Select your fields and list in order of output columns"
92+
]
93+
},
94+
{
95+
"cell_type": "code",
96+
"execution_count": null,
97+
"metadata": {},
98+
"outputs": [],
99+
"source": [
100+
"columns = []"
101+
]
102+
},
103+
{
104+
"cell_type": "markdown",
105+
"metadata": {},
106+
"source": [
107+
"## Build query\n",
108+
"\n",
109+
"For further information on [how to build a query can be found here](https://www.elastic.co/guide/en/elasticsearch/reference/current/query-dsl-simple-query-string-query.html)\n"
110+
]
111+
},
112+
{
113+
"cell_type": "code",
114+
"execution_count": null,
115+
"metadata": {},
116+
"outputs": [],
117+
"source": [
118+
"# Example query structure\n",
119+
"query = {\n",
120+
" \"from\": 0,\n",
121+
" \"size\": 10000,\n",
122+
" \"query\": {\n",
123+
" \"bool\": {\n",
124+
" \"filter\": {\n",
125+
" \"terms\": {\"patient_TrustNumber\": pt_list}\n",
126+
" },\n",
127+
" \"must\": [\n",
128+
" {\"query_string\": {\n",
129+
" \"query\": \"***YOUR LUCENE QUERY HERE***\"}\n",
130+
" }\n",
131+
" ]\n",
132+
" }\n",
133+
" },\n",
134+
" \"_source\": columns # This is a search column filter. remove if all columns are to be retrieved\n",
135+
"}"
136+
]
137+
},
138+
{
139+
"cell_type": "markdown",
140+
"metadata": {
141+
"tags": []
142+
},
143+
"source": [
144+
"# Search, Process, and Save"
145+
]
146+
},
147+
{
148+
"cell_type": "code",
149+
"execution_count": null,
150+
"metadata": {},
151+
"outputs": [],
152+
"source": [
153+
"df = cs.cogstack2df(query=query, index=index, column_headers=columns)"
154+
]
155+
},
156+
{
157+
"cell_type": "markdown",
158+
"metadata": {},
159+
"source": [
160+
"## Process"
161+
]
162+
},
163+
{
164+
"cell_type": "code",
165+
"execution_count": null,
166+
"metadata": {},
167+
"outputs": [],
168+
"source": [
169+
"# Whatever you want here\n",
170+
"df.head()"
171+
]
172+
},
173+
{
174+
"cell_type": "markdown",
175+
"metadata": {},
176+
"source": [
177+
"## Save"
178+
]
179+
},
180+
{
181+
"cell_type": "code",
182+
"execution_count": null,
183+
"metadata": {},
184+
"outputs": [],
185+
"source": [
186+
"file_name = \"file_name.csv\""
187+
]
188+
},
189+
{
190+
"cell_type": "code",
191+
"execution_count": null,
192+
"metadata": {},
193+
"outputs": [],
194+
"source": [
195+
"df.to_csv(file_name, index=False)"
196+
]
197+
}
198+
],
199+
"metadata": {
200+
"kernelspec": {
201+
"display_name": "Python 3 (ipykernel)",
202+
"language": "python",
203+
"name": "python3"
204+
},
205+
"language_info": {
206+
"codemirror_mode": {
207+
"name": "ipython",
208+
"version": 3
209+
},
210+
"file_extension": ".py",
211+
"mimetype": "text/x-python",
212+
"name": "python",
213+
"nbconvert_exporter": "python",
214+
"pygments_lexer": "ipython3",
215+
"version": "3.7.3"
216+
}
217+
},
218+
"nbformat": 4,
219+
"nbformat_minor": 4
220+
}

0 commit comments

Comments
 (0)