Skip to content

Commit 6d317cb

Browse files
authored
Merge pull request #1022 from Labelbox/ao_ner_bbox_subclass_ndjson
Added NER with subclass NDJSON -PDF AL-5147 Added BBOX with subclass NDJSON -PDF AL-5146 Validated: AL-5296 Validated : https://labelbox.slack.com/archives/C03PTTCEFUJ/p1679072775529929 Added Conversational data class and ner annotation for convo Added classifications with annotation types for convo : AL-5258
2 parents 6e94b3c + 8fb3c8f commit 6d317cb

File tree

2 files changed

+170
-8
lines changed

2 files changed

+170
-8
lines changed

examples/annotation_import/conversational.ipynb

Lines changed: 69 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -136,6 +136,14 @@
136136
"metadata": {},
137137
"source": [
138138
"# message based classifications\n",
139+
"ner_annotation = lb_types.ObjectAnnotation(\n",
140+
" name=\"ner\",\n",
141+
" value=lb_types.ConversationEntity(\n",
142+
" start=0,\n",
143+
" end=8,\n",
144+
" message_id=\"4\"\n",
145+
" )\n",
146+
")\n",
139147
"\n",
140148
"ner_annotation_ndjson = { \n",
141149
" \"name\": \"ner\",\n",
@@ -155,7 +163,13 @@
155163
"source": [
156164
"##### Classification free text #####\n",
157165
"\n",
158-
"# Only supported with NDJSON\n",
166+
"text_annotation = lb_types.ClassificationAnnotation(\n",
167+
" name=\"text_convo\",\n",
168+
" value=lb_types.Text(answer=\"the answer to the text questions right here\"),\n",
169+
" message_id=\"0\"\n",
170+
")\n",
171+
"\n",
172+
"\n",
159173
"text_annotation_ndjson = {\n",
160174
" 'name': 'text_convo',\n",
161175
" 'answer': 'the answer to the text questions right here',\n",
@@ -171,7 +185,22 @@
171185
"source": [
172186
"##### Checklist Classification ####### \n",
173187
"\n",
174-
"# Only supported with NDJSON\n",
188+
"checklist_annotation= lb_types.ClassificationAnnotation(\n",
189+
" name=\"checklist_convo\", # must match your ontology feature's name\n",
190+
" value=lb_types.Checklist(\n",
191+
" answer = [\n",
192+
" lb_types.ClassificationAnswer(\n",
193+
" name = \"first_checklist_answer\"\n",
194+
" ), \n",
195+
" lb_types.ClassificationAnswer(\n",
196+
" name = \"second_checklist_answer\"\n",
197+
" )\n",
198+
" ]\n",
199+
" ),\n",
200+
" message_id=\"2\"\n",
201+
" )\n",
202+
"\n",
203+
"\n",
175204
"checklist_annotation_ndjson = {\n",
176205
" 'name': 'checklist_convo',\n",
177206
" 'answers': [\n",
@@ -190,7 +219,12 @@
190219
"source": [
191220
"######## Radio Classification ######\n",
192221
"\n",
193-
"# Only supported with NDJSON\n",
222+
"radio_annotation = lb_types.ClassificationAnnotation(\n",
223+
" name='radio_convo', \n",
224+
" value=lb_types.Radio(answer = lb_types.ClassificationAnswer(name = 'first_radio_answer')),\n",
225+
" message_id=\"0\"\n",
226+
")\n",
227+
"\n",
194228
"radio_annotation_ndjson = {\n",
195229
" 'name': 'radio_convo',\n",
196230
" 'answer': {\n",
@@ -348,6 +382,36 @@
348382
],
349383
"cell_type": "markdown"
350384
},
385+
{
386+
"metadata": {},
387+
"source": [
388+
"#### Python annotation\n",
389+
"Here we create the complete labels ndjson payload of annotations only using python annotation format. There is one annotation for each reference to an annotation that we created. "
390+
],
391+
"cell_type": "markdown"
392+
},
393+
{
394+
"metadata": {},
395+
"source": [
396+
"label = []\n",
397+
"label.append(\n",
398+
" lb_types.Label(\n",
399+
" data=lb_types.ConversationData(\n",
400+
" global_key=global_key\n",
401+
" ),\n",
402+
" annotations=[\n",
403+
" ner_annotation,\n",
404+
" text_annotation,\n",
405+
" checklist_annotation,\n",
406+
" radio_annotation\n",
407+
" ]\n",
408+
" )\n",
409+
")"
410+
],
411+
"cell_type": "code",
412+
"outputs": [],
413+
"execution_count": null
414+
},
351415
{
352416
"metadata": {},
353417
"source": [
@@ -398,7 +462,7 @@
398462
" client = client, \n",
399463
" project_id = project.uid, \n",
400464
" name=f\"mal_job-{str(uuid.uuid4())}\", \n",
401-
" predictions=label_ndjson)\n",
465+
" predictions=label)\n",
402466
"\n",
403467
"upload_job.wait_until_done();\n",
404468
"print(\"Errors:\", upload_job.errors)\n",
@@ -423,8 +487,7 @@
423487
" client = client, \n",
424488
" project_id = project.uid, \n",
425489
" name=\"label_import_job\"+str(uuid.uuid4()), \n",
426-
" # user label_ndjson if labels were created using python annotation tools\n",
427-
" labels=label_ndjson)\n",
490+
" labels=label)\n",
428491
"\n",
429492
"upload_job.wait_until_done();\n",
430493
"print(\"Errors:\", upload_job.errors)\n",

examples/annotation_import/pdf.ipynb

Lines changed: 101 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -273,6 +273,62 @@
273273
"outputs": [],
274274
"execution_count": null
275275
},
276+
{
277+
"metadata": {},
278+
"source": [
279+
"######### BBOX with nested classifications #########\n",
280+
"\n",
281+
"bbox_with_radio_subclass_annotation_ndjson = {\n",
282+
" 'name': 'bbox_with_radio_subclass',\n",
283+
" 'classifications': [\n",
284+
" {\n",
285+
" 'name': 'sub_radio_question',\n",
286+
" 'answer': {'name': 'first_sub_radio_answer'}\n",
287+
" }\n",
288+
" ],\n",
289+
" 'bbox': {\n",
290+
" \"top\": 214.894,\n",
291+
" \"left\": 189.215,\n",
292+
" \"height\": 264,\n",
293+
" \"width\": 240.573\n",
294+
" },\n",
295+
" 'page': 1,\n",
296+
" 'unit': \"POINTS\"\n",
297+
"}"
298+
],
299+
"cell_type": "code",
300+
"outputs": [],
301+
"execution_count": null
302+
},
303+
{
304+
"metadata": {},
305+
"source": [
306+
"############ NER with nested classifications ######## \n",
307+
"\n",
308+
"ner_with_checklist_subclass_annotation_ndjson = {\n",
309+
" 'name': 'ner_with_checklist_subclass',\n",
310+
" 'classifications':[\n",
311+
" {\n",
312+
" 'name': 'sub_checklist_question',\n",
313+
" 'answer': [{'name': 'first_sub_checklist_answer'}] \n",
314+
" }\n",
315+
" ],\n",
316+
" 'textSelections': [\n",
317+
" {\n",
318+
" \"tokenIds\": [\n",
319+
" \"<UUID>\",\n",
320+
" ],\n",
321+
" \"groupId\": \"<UUID>\",\n",
322+
" \"page\": 1,\n",
323+
" }\n",
324+
" ] \n",
325+
"}\n",
326+
" \n"
327+
],
328+
"cell_type": "code",
329+
"outputs": [],
330+
"execution_count": null
331+
},
276332
{
277333
"metadata": {},
278334
"source": [
@@ -388,7 +444,34 @@
388444
" name=\"bounding_box\"), \n",
389445
" lb.Tool(\n",
390446
" tool=lb.Tool.Type.NER, \n",
391-
" name=\"named_entity\")]\n",
447+
" name=\"named_entity\"),\n",
448+
" lb.Tool(\n",
449+
" tool=lb.Tool.Type.NER, \n",
450+
" name=\"ner_with_checklist_subclass\",\n",
451+
" classifications=[\n",
452+
" lb.Classification(\n",
453+
" class_type=lb.Classification.Type.CHECKLIST,\n",
454+
" name=\"sub_checklist_question\",\n",
455+
" options=[\n",
456+
" lb.Option(value=\"first_sub_checklist_answer\")\n",
457+
" ]\n",
458+
" )\n",
459+
" ]\n",
460+
" ),\n",
461+
" lb.Tool( \n",
462+
" tool=lb.Tool.Type.BBOX,\n",
463+
" name=\"bbox_with_radio_subclass\",\n",
464+
" classifications=[\n",
465+
" lb.Classification(\n",
466+
" class_type=lb.Classification.Type.RADIO,\n",
467+
" name=\"sub_radio_question\",\n",
468+
" options=[\n",
469+
" lb.Option(value=\"first_sub_radio_answer\")\n",
470+
" ]\n",
471+
" )\n",
472+
" ]\n",
473+
" )\n",
474+
" ]\n",
392475
")\n",
393476
"\n",
394477
"ontology = client.create_ontology(\"Document Annotation Import Demo\",\n",
@@ -487,7 +570,6 @@
487570
" # build text selections for Annotation Types\n",
488571
" document_text_selection = lb_types.DocumentTextSelection(groupId=group['id'], tokenIds=list_tokens, page=1)\n",
489572
" text_selections.append(document_text_selection)\n",
490-
" \n",
491573
" # build text selection for the NDJson annotation\n",
492574
" entities_annotations_ndjson.update(\n",
493575
" {\n",
@@ -497,17 +579,32 @@
497579
" \"tokenIds\": list_tokens, #id associated with each word in a sentence group\n",
498580
" \"page\": 1,\n",
499581
" }\n",
582+
" ]}\n",
583+
" )\n",
584+
" if group['content'] == \"T. Sasaki,* N. Yoneyama, and N. Kobayashi\":\n",
585+
" list_tokens_2 = [x['id'] for x in group['tokens']]\n",
586+
" ner_with_checklist_subclass_annotation_ndjson.update(\n",
587+
" {\n",
588+
" \"textSelections\": [\n",
589+
" {\n",
590+
" \"groupId\": group['id'], #id associated with the group of words\n",
591+
" \"tokenIds\": list_tokens_2, #id associated with each word in a sentence group\n",
592+
" \"page\": 1,\n",
593+
" }\n",
500594
" ]\n",
501595
" }\n",
502596
" )\n",
597+
" \n",
503598
"# re-write the entity annotation with text selections (annotation types)\n",
504599
"entities_annotation_document_entity = lb_types.DocumentEntity(name=\"named_entity\", \n",
505600
" textSelections = text_selections)\n",
506601
"entities_annotation = lb_types.ObjectAnnotation(name=\"named_entity\",\n",
507602
" value=entities_annotation_document_entity)\n",
603+
"\n",
508604
" \n",
509605
"print(f\"entities_annotations_ndjson={entities_annotations_ndjson}\")\n",
510606
"print(f\"entities_annotation={entities_annotation}\")\n",
607+
"print(f\"nested_entities_annotation={ner_with_checklist_subclass_annotation_ndjson}\")\n",
511608
" "
512609
],
513610
"cell_type": "code",
@@ -565,6 +662,8 @@
565662
" text_annotation_ndjson,\n",
566663
" checklist_annotation_ndjson,\n",
567664
" nested_checklist_annotation_ndjson,\n",
665+
" bbox_with_radio_subclass_annotation_ndjson,\n",
666+
" ner_with_checklist_subclass_annotation_ndjson,\n",
568667
" nested_radio_annotation_ndjson,\n",
569668
" radio_annotation_ndjson\n",
570669
" ]:\n",

0 commit comments

Comments
 (0)