|
273 | 273 | "outputs": [],
|
274 | 274 | "execution_count": null
|
275 | 275 | },
|
| 276 | + { |
| 277 | + "metadata": {}, |
| 278 | + "source": [ |
| 279 | + "######### BBOX with nested classifications #########\n", |
| 280 | + "\n", |
| 281 | + "bbox_with_radio_subclass_annotation_ndjson = {\n", |
| 282 | + " 'name': 'bbox_with_radio_subclass',\n", |
| 283 | + " 'classifications': [\n", |
| 284 | + " {\n", |
| 285 | + " 'name': 'sub_radio_question',\n", |
| 286 | + " 'answer': {'name': 'first_sub_radio_answer'}\n", |
| 287 | + " }\n", |
| 288 | + " ],\n", |
| 289 | + " 'bbox': {\n", |
| 290 | + " \"top\": 214.894,\n", |
| 291 | + " \"left\": 189.215,\n", |
| 292 | + " \"height\": 264,\n", |
| 293 | + " \"width\": 240.573\n", |
| 294 | + " },\n", |
| 295 | + " 'page': 1,\n", |
| 296 | + " 'unit': \"POINTS\"\n", |
| 297 | + "}" |
| 298 | + ], |
| 299 | + "cell_type": "code", |
| 300 | + "outputs": [], |
| 301 | + "execution_count": null |
| 302 | + }, |
| 303 | + { |
| 304 | + "metadata": {}, |
| 305 | + "source": [ |
| 306 | + "############ NER with nested classifications ######## \n", |
| 307 | + "\n", |
| 308 | + "ner_with_checklist_subclass_annotation_ndjson = {\n", |
| 309 | + " 'name': 'ner_with_checklist_subclass',\n", |
| 310 | + " 'classifications':[\n", |
| 311 | + " {\n", |
| 312 | + " 'name': 'sub_checklist_question',\n", |
| 313 | + " 'answer': [{'name': 'first_sub_checklist_answer'}] \n", |
| 314 | + " }\n", |
| 315 | + " ],\n", |
| 316 | + " 'textSelections': [\n", |
| 317 | + " {\n", |
| 318 | + " \"tokenIds\": [\n", |
| 319 | + " \"<UUID>\",\n", |
| 320 | + " ],\n", |
| 321 | + " \"groupId\": \"<UUID>\",\n", |
| 322 | + " \"page\": 1,\n", |
| 323 | + " }\n", |
| 324 | + " ] \n", |
| 325 | + "}\n", |
| 326 | + " \n" |
| 327 | + ], |
| 328 | + "cell_type": "code", |
| 329 | + "outputs": [], |
| 330 | + "execution_count": null |
| 331 | + }, |
276 | 332 | {
|
277 | 333 | "metadata": {},
|
278 | 334 | "source": [
|
|
388 | 444 | " name=\"bounding_box\"), \n",
|
389 | 445 | " lb.Tool(\n",
|
390 | 446 | " tool=lb.Tool.Type.NER, \n",
|
391 |
| - " name=\"named_entity\")]\n", |
| 447 | + " name=\"named_entity\"),\n", |
| 448 | + " lb.Tool(\n", |
| 449 | + " tool=lb.Tool.Type.NER, \n", |
| 450 | + " name=\"ner_with_checklist_subclass\",\n", |
| 451 | + " classifications=[\n", |
| 452 | + " lb.Classification(\n", |
| 453 | + " class_type=lb.Classification.Type.CHECKLIST,\n", |
| 454 | + " name=\"sub_checklist_question\",\n", |
| 455 | + " options=[\n", |
| 456 | + " lb.Option(value=\"first_sub_checklist_answer\")\n", |
| 457 | + " ]\n", |
| 458 | + " )\n", |
| 459 | + " ]\n", |
| 460 | + " ),\n", |
| 461 | + " lb.Tool( \n", |
| 462 | + " tool=lb.Tool.Type.BBOX,\n", |
| 463 | + " name=\"bbox_with_radio_subclass\",\n", |
| 464 | + " classifications=[\n", |
| 465 | + " lb.Classification(\n", |
| 466 | + " class_type=lb.Classification.Type.RADIO,\n", |
| 467 | + " name=\"sub_radio_question\",\n", |
| 468 | + " options=[\n", |
| 469 | + " lb.Option(value=\"first_sub_radio_answer\")\n", |
| 470 | + " ]\n", |
| 471 | + " )\n", |
| 472 | + " ]\n", |
| 473 | + " )\n", |
| 474 | + " ]\n", |
392 | 475 | ")\n",
|
393 | 476 | "\n",
|
394 | 477 | "ontology = client.create_ontology(\"Document Annotation Import Demo\",\n",
|
|
487 | 570 | " # build text selections for Annotation Types\n",
|
488 | 571 | " document_text_selection = lb_types.DocumentTextSelection(groupId=group['id'], tokenIds=list_tokens, page=1)\n",
|
489 | 572 | " text_selections.append(document_text_selection)\n",
|
490 |
| - " \n", |
491 | 573 | " # build text selection for the NDJson annotation\n",
|
492 | 574 | " entities_annotations_ndjson.update(\n",
|
493 | 575 | " {\n",
|
|
497 | 579 | " \"tokenIds\": list_tokens, #id associated with each word in a sentence group\n",
|
498 | 580 | " \"page\": 1,\n",
|
499 | 581 | " }\n",
|
| 582 | + " ]}\n", |
| 583 | + " )\n", |
| 584 | + " if group['content'] == \"T. Sasaki,* N. Yoneyama, and N. Kobayashi\":\n", |
| 585 | + " list_tokens_2 = [x['id'] for x in group['tokens']]\n", |
| 586 | + " ner_with_checklist_subclass_annotation_ndjson.update(\n", |
| 587 | + " {\n", |
| 588 | + " \"textSelections\": [\n", |
| 589 | + " {\n", |
| 590 | + " \"groupId\": group['id'], #id associated with the group of words\n", |
| 591 | + " \"tokenIds\": list_tokens_2, #id associated with each word in a sentence group\n", |
| 592 | + " \"page\": 1,\n", |
| 593 | + " }\n", |
500 | 594 | " ]\n",
|
501 | 595 | " }\n",
|
502 | 596 | " )\n",
|
| 597 | + " \n", |
503 | 598 | "# re-write the entity annotation with text selections (annotation types)\n",
|
504 | 599 | "entities_annotation_document_entity = lb_types.DocumentEntity(name=\"named_entity\", \n",
|
505 | 600 | " textSelections = text_selections)\n",
|
506 | 601 | "entities_annotation = lb_types.ObjectAnnotation(name=\"named_entity\",\n",
|
507 | 602 | " value=entities_annotation_document_entity)\n",
|
| 603 | + "\n", |
508 | 604 | " \n",
|
509 | 605 | "print(f\"entities_annotations_ndjson={entities_annotations_ndjson}\")\n",
|
510 | 606 | "print(f\"entities_annotation={entities_annotation}\")\n",
|
| 607 | + "print(f\"nested_entities_annotation={ner_with_checklist_subclass_annotation_ndjson}\")\n", |
511 | 608 | " "
|
512 | 609 | ],
|
513 | 610 | "cell_type": "code",
|
|
565 | 662 | " text_annotation_ndjson,\n",
|
566 | 663 | " checklist_annotation_ndjson,\n",
|
567 | 664 | " nested_checklist_annotation_ndjson,\n",
|
| 665 | + " bbox_with_radio_subclass_annotation_ndjson,\n", |
| 666 | + " ner_with_checklist_subclass_annotation_ndjson,\n", |
568 | 667 | " nested_radio_annotation_ndjson,\n",
|
569 | 668 | " radio_annotation_ndjson\n",
|
570 | 669 | " ]:\n",
|
|
0 commit comments