Merge pull request #1022 from Labelbox/ao_ner_bbox_subclass_ndjson

ovalle15 · web-flow · commit 6d317cb66743 · 2023-03-22T12:53:26.000-04:00
Added NER with subclass NDJSON -PDF AL-5147 Added BBOX with subclass NDJSON -PDF AL-5146 Validated: AL-5296 Validated : https://labelbox.slack.com/archives/C03PTTCEFUJ/p1679072775529929 Added Conversational data class and ner annotation for convo Added classifications with annotation types for convo : AL-5258
diff --git a/examples/annotation_import/conversational.ipynb b/examples/annotation_import/conversational.ipynb
@@ -136,6 +136,14 @@
       "metadata": {},
       "source": [
         "# message based classifications\n",
+        "ner_annotation = lb_types.ObjectAnnotation(\n",
+        "    name=\"ner\",\n",
+        "    value=lb_types.ConversationEntity(\n",
+        "        start=0,\n",
+        "        end=8,\n",
+        "        message_id=\"4\"\n",
+        "    )\n",
+        ")\n",
         "\n",
         "ner_annotation_ndjson = { \n",
         "        \"name\": \"ner\",\n",
@@ -155,7 +163,13 @@
       "source": [
         "##### Classification free text #####\n",
         "\n",
-        "# Only supported with NDJSON\n",
+        "text_annotation = lb_types.ClassificationAnnotation(\n",
+        "    name=\"text_convo\",\n",
+        "    value=lb_types.Text(answer=\"the answer to the text questions right here\"),\n",
+        "    message_id=\"0\"\n",
+        ")\n",
+        "\n",
+        "\n",
         "text_annotation_ndjson = {\n",
         "    'name': 'text_convo',\n",
         "    'answer': 'the answer to the text questions right here',\n",
@@ -171,7 +185,22 @@
       "source": [
         "##### Checklist Classification ####### \n",
         "\n",
-        "# Only supported with NDJSON\n",
+        "checklist_annotation= lb_types.ClassificationAnnotation(\n",
+        "  name=\"checklist_convo\", # must match your ontology feature's name\n",
+        "  value=lb_types.Checklist(\n",
+        "      answer = [\n",
+        "        lb_types.ClassificationAnswer(\n",
+        "            name = \"first_checklist_answer\"\n",
+        "        ), \n",
+        "        lb_types.ClassificationAnswer(\n",
+        "            name = \"second_checklist_answer\"\n",
+        "        )\n",
+        "      ]\n",
+        "    ),\n",
+        "  message_id=\"2\"\n",
+        " )\n",
+        "\n",
+        "\n",
         "checklist_annotation_ndjson = {\n",
         "    'name': 'checklist_convo',\n",
         "    'answers': [\n",
@@ -190,7 +219,12 @@
       "source": [
         "######## Radio Classification ######\n",
         "\n",
-        "# Only supported with NDJSON\n",
+        "radio_annotation = lb_types.ClassificationAnnotation(\n",
+        "    name='radio_convo', \n",
+        "    value=lb_types.Radio(answer = lb_types.ClassificationAnswer(name = 'first_radio_answer')),\n",
+        "    message_id=\"0\"\n",
+        ")\n",
+        "\n",
         "radio_annotation_ndjson = {\n",
         "    'name': 'radio_convo',\n",
         "    'answer': {\n",
@@ -348,6 +382,36 @@
       ],
       "cell_type": "markdown"
     },
+    {
+      "metadata": {},
+      "source": [
+        "#### Python annotation\n",
+        "Here we create the complete labels ndjson payload of annotations only using python annotation format. There is one annotation for each reference to an annotation that we created. "
+      ],
+      "cell_type": "markdown"
+    },
+    {
+      "metadata": {},
+      "source": [
+        "label = []\n",
+        "label.append(\n",
+        "  lb_types.Label(\n",
+        "    data=lb_types.ConversationData(\n",
+        "      global_key=global_key\n",
+        "    ),\n",
+        "    annotations=[\n",
+        "      ner_annotation,\n",
+        "      text_annotation,\n",
+        "      checklist_annotation,\n",
+        "      radio_annotation\n",
+        "    ]\n",
+        "  )\n",
+        ")"
+      ],
+      "cell_type": "code",
+      "outputs": [],
+      "execution_count": null
+    },
     {
       "metadata": {},
       "source": [
@@ -398,7 +462,7 @@
         "    client = client, \n",
         "    project_id = project.uid, \n",
         "    name=f\"mal_job-{str(uuid.uuid4())}\", \n",
-        "    predictions=label_ndjson)\n",
+        "    predictions=label)\n",
         "\n",
         "upload_job.wait_until_done();\n",
         "print(\"Errors:\", upload_job.errors)\n",
@@ -423,8 +487,7 @@
         "    client = client, \n",
         "    project_id = project.uid, \n",
         "    name=\"label_import_job\"+str(uuid.uuid4()),  \n",
-        "    # user label_ndjson if labels were created using python annotation tools\n",
-        "    labels=label_ndjson)\n",
+        "    labels=label)\n",
         "\n",
         "upload_job.wait_until_done();\n",
         "print(\"Errors:\", upload_job.errors)\n",
diff --git a/examples/annotation_import/pdf.ipynb b/examples/annotation_import/pdf.ipynb
@@ -273,6 +273,62 @@
       "outputs": [],
       "execution_count": null
     },
+    {
+      "metadata": {},
+      "source": [
+        "######### BBOX with nested classifications #########\n",
+        "\n",
+        "bbox_with_radio_subclass_annotation_ndjson = {\n",
+        "  'name': 'bbox_with_radio_subclass',\n",
+        "  'classifications': [\n",
+        "    {\n",
+        "      'name': 'sub_radio_question',\n",
+        "      'answer': {'name': 'first_sub_radio_answer'}\n",
+        "    }\n",
+        "  ],\n",
+        "  'bbox': {\n",
+        "        \"top\": 214.894,\n",
+        "        \"left\": 189.215,\n",
+        "        \"height\": 264,\n",
+        "        \"width\": 240.573\n",
+        "    },\n",
+        "  'page': 1,\n",
+        "  'unit': \"POINTS\"\n",
+        "}"
+      ],
+      "cell_type": "code",
+      "outputs": [],
+      "execution_count": null
+    },
+    {
+      "metadata": {},
+      "source": [
+        "############ NER with nested classifications ######## \n",
+        "\n",
+        "ner_with_checklist_subclass_annotation_ndjson = {\n",
+        "  'name': 'ner_with_checklist_subclass',\n",
+        "  'classifications':[\n",
+        "    {\n",
+        "      'name': 'sub_checklist_question',\n",
+        "      'answer': [{'name': 'first_sub_checklist_answer'}] \n",
+        "    }\n",
+        "  ],\n",
+        "  'textSelections': [\n",
+        "      {\n",
+        "          \"tokenIds\": [\n",
+        "              \"<UUID>\",\n",
+        "          ],\n",
+        "          \"groupId\": \"<UUID>\",\n",
+        "          \"page\": 1,\n",
+        "      }\n",
+        "  ] \n",
+        "}\n",
+        "  \n"
+      ],
+      "cell_type": "code",
+      "outputs": [],
+      "execution_count": null
+    },
     {
       "metadata": {},
       "source": [
@@ -388,7 +444,34 @@
         "      name=\"bounding_box\"), \n",
         "    lb.Tool(\n",
         "        tool=lb.Tool.Type.NER, \n",
-        "        name=\"named_entity\")]\n",
+        "        name=\"named_entity\"),\n",
+        "    lb.Tool(\n",
+        "        tool=lb.Tool.Type.NER, \n",
+        "        name=\"ner_with_checklist_subclass\",\n",
+        "        classifications=[\n",
+        "          lb.Classification(\n",
+        "            class_type=lb.Classification.Type.CHECKLIST,\n",
+        "            name=\"sub_checklist_question\",\n",
+        "            options=[\n",
+        "              lb.Option(value=\"first_sub_checklist_answer\")\n",
+        "            ]\n",
+        "          )\n",
+        "          ]\n",
+        "    ),\n",
+        "    lb.Tool( \n",
+        "      tool=lb.Tool.Type.BBOX,\n",
+        "      name=\"bbox_with_radio_subclass\",\n",
+        "      classifications=[\n",
+        "            lb.Classification(\n",
+        "                class_type=lb.Classification.Type.RADIO,\n",
+        "                name=\"sub_radio_question\",\n",
+        "                options=[\n",
+        "                  lb.Option(value=\"first_sub_radio_answer\")\n",
+        "                ]\n",
+        "              )\n",
+        "        ]\n",
+        "      )\n",
+        "    ]\n",
         ")\n",
         "\n",
         "ontology = client.create_ontology(\"Document Annotation Import Demo\",\n",
@@ -487,7 +570,6 @@
         "      # build text selections for Annotation Types\n",
         "      document_text_selection = lb_types.DocumentTextSelection(groupId=group['id'], tokenIds=list_tokens, page=1)\n",
         "      text_selections.append(document_text_selection)\n",
-        "      \n",
         "      # build text selection for the NDJson annotation\n",
         "      entities_annotations_ndjson.update(\n",
         "        {\n",
@@ -497,17 +579,32 @@
         "              \"tokenIds\": list_tokens, #id associated with each word in a sentence group\n",
         "              \"page\": 1,\n",
         "            }\n",
+        "          ]}\n",
+        "      )\n",
+        "    if group['content'] == \"T. Sasaki,* N. Yoneyama, and N. Kobayashi\":\n",
+        "      list_tokens_2 = [x['id'] for x in group['tokens']]\n",
+        "      ner_with_checklist_subclass_annotation_ndjson.update(\n",
+        "        {\n",
+        "          \"textSelections\": [\n",
+        "            {\n",
+        "              \"groupId\": group['id'], #id associated with the group of words\n",
+        "              \"tokenIds\": list_tokens_2, #id associated with each word in a sentence group\n",
+        "              \"page\": 1,\n",
+        "            }\n",
         "          ]\n",
         "        }\n",
         "      )\n",
+        "      \n",
         "# re-write the entity annotation with text selections (annotation types)\n",
         "entities_annotation_document_entity = lb_types.DocumentEntity(name=\"named_entity\", \n",
         "                                          textSelections = text_selections)\n",
         "entities_annotation = lb_types.ObjectAnnotation(name=\"named_entity\",\n",
         "                                                value=entities_annotation_document_entity)\n",
+        "\n",
         "        \n",
         "print(f\"entities_annotations_ndjson={entities_annotations_ndjson}\")\n",
         "print(f\"entities_annotation={entities_annotation}\")\n",
+        "print(f\"nested_entities_annotation={ner_with_checklist_subclass_annotation_ndjson}\")\n",
         "  "
       ],
       "cell_type": "code",
@@ -565,6 +662,8 @@
         "    text_annotation_ndjson,\n",
         "    checklist_annotation_ndjson,\n",
         "    nested_checklist_annotation_ndjson,\n",
+        "    bbox_with_radio_subclass_annotation_ndjson,\n",
+        "    ner_with_checklist_subclass_annotation_ndjson,\n",
         "    nested_radio_annotation_ndjson,\n",
         "    radio_annotation_ndjson\n",
         "  ]:\n",