Merge pull request #1012 from Labelbox/imuhammad/AL-5258-message-based-classification-annotation-types

whistler · web-flow · commit 223b790e5150 · 2023-03-22T08:07:59.000-07:00
[AL-5258] Annotation types for message based conversation classifications
diff --git a/labelbox/data/annotation_types/annotation.py b/labelbox/data/annotation_types/annotation.py
@@ -1,5 +1,5 @@
 import abc
-from typing import Any, Dict, List, Union
+from typing import Any, Dict, List, Optional, Union
 
 from labelbox.data.mixins import ConfidenceMixin
 
@@ -27,10 +27,12 @@ class ClassificationAnnotation(BaseAnnotation, ConfidenceMixin):
         name (Optional[str])
         feature_schema_id (Optional[Cuid])
         value (Union[Text, Checklist, Radio, Dropdown])
+        message_id (Optional[str]) Message id for conversational text
         extra (Dict[str, Any])
      """
 
     value: Union[Text, Checklist, Radio, Dropdown]
+    message_id: Optional[str] = None
 
 
 class ObjectAnnotation(BaseAnnotation, ConfidenceMixin):
diff --git a/labelbox/data/serialization/ndjson/base.py b/labelbox/data/serialization/ndjson/base.py
@@ -38,6 +38,7 @@ def dict(self, *args, **kwargs):
 class NDAnnotation(NDJsonBase):
     name: Optional[str] = None
     schema_id: Optional[Cuid] = None
+    message_id: Optional[str] = None
     page: Optional[int] = None
     unit: Optional[str] = None
 
diff --git a/labelbox/data/serialization/ndjson/classification.py b/labelbox/data/serialization/ndjson/classification.py
@@ -127,13 +127,15 @@ def from_common(cls,
                     feature_schema_id: Cuid,
                     extra: Dict[str, Any],
                     data: Union[TextData, ImageData],
+                    message_id: str,
                     confidence: Optional[float] = None) -> "NDText":
         return cls(
             answer=text.answer,
             data_row=DataRow(id=data.uid, global_key=data.global_key),
             name=name,
             schema_id=feature_schema_id,
             uuid=extra.get('uuid'),
+            message_id=message_id,
             confidence=confidence,
         )
 
@@ -147,6 +149,7 @@ def from_common(cls,
                     feature_schema_id: Cuid,
                     extra: Dict[str, Any],
                     data: Union[VideoData, TextData, ImageData],
+                    message_id: str,
                     confidence: Optional[float] = None) -> "NDChecklist":
         return cls(answer=[
             NDFeature(name=answer.name,
@@ -159,6 +162,7 @@ def from_common(cls,
                    schema_id=feature_schema_id,
                    uuid=extra.get('uuid'),
                    frames=extra.get('frames'),
+                   message_id=message_id,
                    confidence=confidence)
 
 
@@ -171,6 +175,7 @@ def from_common(cls,
                     feature_schema_id: Cuid,
                     extra: Dict[str, Any],
                     data: Union[VideoData, TextData, ImageData],
+                    message_id: str,
                     confidence: Optional[float] = None) -> "NDRadio":
         return cls(answer=NDFeature(name=radio.answer.name,
                                     schema_id=radio.answer.feature_schema_id,
@@ -180,6 +185,7 @@ def from_common(cls,
                    schema_id=feature_schema_id,
                    uuid=extra.get('uuid'),
                    frames=extra.get('frames'),
+                   message_id=message_id,
                    confidence=confidence)
 
 
@@ -228,6 +234,7 @@ def to_common(
             name=annotation.name,
             feature_schema_id=annotation.schema_id,
             extra={'uuid': annotation.uuid},
+            message_id=annotation.message_id,
             confidence=annotation.confidence)
         if getattr(annotation, 'frames', None) is None:
             return [common]
@@ -252,6 +259,7 @@ def from_common(
         return classify_obj.from_common(annotation.value, annotation.name,
                                         annotation.feature_schema_id,
                                         annotation.extra, data,
+                                        annotation.message_id,
                                         annotation.confidence)
 
     @staticmethod
diff --git a/labelbox/data/serialization/ndjson/converter.py b/labelbox/data/serialization/ndjson/converter.py
@@ -6,7 +6,7 @@
 
 logger = logging.getLogger(__name__)
 
-IGNORE_IF_NONE = ["page", "unit"]
+IGNORE_IF_NONE = ["page", "unit", "messageId"]
 
 
 class NDJsonConverter:
diff --git a/tests/data/annotation_types/classification/test_classification.py b/tests/data/annotation_types/classification/test_classification.py
@@ -51,7 +51,8 @@ def test_subclass():
         'extra': {},
         'value': {
             'answer': answer
-        }
+        },
+        'message_id': None,
     }
     classification = ClassificationAnnotation(
         value=Text(answer=answer),
@@ -64,7 +65,8 @@ def test_subclass():
         'value': {
             'answer': answer
         },
-        'name': name
+        'name': name,
+        'message_id': None,
     }
     classification = ClassificationAnnotation(
         value=Text(answer=answer),
@@ -76,7 +78,8 @@ def test_subclass():
         'extra': {},
         'value': {
             'answer': answer
-        }
+        },
+        'message_id': None,
     }
 
 
@@ -115,7 +118,8 @@ def test_radio():
                 'extra': {},
                 'confidence': 0.81
             }
-        }
+        },
+        'message_id': None,
     }
 
 
@@ -156,6 +160,7 @@ def test_checklist():
                 'confidence': 0.99
             }]
         },
+        'message_id': None,
     }
 
 
@@ -194,5 +199,6 @@ def test_dropdown():
                 'confidence': 1,
                 'extra': {}
             }]
-        }
+        },
+        'message_id': None,
     }
diff --git a/tests/data/serialization/ndjson/test_conversation.py b/tests/data/serialization/ndjson/test_conversation.py
@@ -1,9 +1,95 @@
 import json
 
 import pytest
-
+import labelbox.types as lb_types
 from labelbox.data.serialization.ndjson.converter import NDJsonConverter
 
+radio_ndjson = [{
+    'dataRow': {
+        'globalKey': 'my_global_key'
+    },
+    'name': 'radio',
+    'answer': {
+        'name': 'first_radio_answer'
+    },
+    'messageId': '0'
+}]
+
+radio_label = [
+    lb_types.Label(
+        data=lb_types.ConversationData(global_key='my_global_key'),
+        annotations=[
+            lb_types.ClassificationAnnotation(
+                name='radio',
+                value=lb_types.Radio(answer=lb_types.ClassificationAnswer(
+                    name="first_radio_answer")),
+                message_id="0")
+        ])
+]
+
+checklist_ndjson = [{
+    'dataRow': {
+        'globalKey': 'my_global_key'
+    },
+    'name': 'checklist',
+    'answer': [
+        {
+            'name': 'first_checklist_answer'
+        },
+        {
+            'name': 'second_checklist_answer'
+        },
+    ],
+    'messageId': '2'
+}]
+
+checklist_label = [
+    lb_types.Label(data=lb_types.ConversationData(global_key='my_global_key'),
+                   annotations=[
+                       lb_types.ClassificationAnnotation(
+                           name='checklist',
+                           message_id="2",
+                           value=lb_types.Checklist(answer=[
+                               lb_types.ClassificationAnswer(
+                                   name="first_checklist_answer"),
+                               lb_types.ClassificationAnswer(
+                                   name="second_checklist_answer")
+                           ]))
+                   ])
+]
+
+free_text_ndjson = [{
+    'dataRow': {
+        'globalKey': 'my_global_key'
+    },
+    'name': 'free_text',
+    'answer': 'sample text',
+    'messageId': '0'
+}]
+free_text_label = [
+    lb_types.Label(data=lb_types.ConversationData(global_key='my_global_key'),
+                   annotations=[
+                       lb_types.ClassificationAnnotation(
+                           name='free_text',
+                           message_id="0",
+                           value=lb_types.Text(answer="sample text"))
+                   ])
+]
+
+
+@pytest.mark.parametrize(
+    "label, ndjson",
+    [[radio_label, radio_ndjson], [checklist_label, checklist_ndjson],
+     [free_text_label, free_text_ndjson]])
+def test_message_based_radio_classification(label, ndjson):
+    serialized_label = list(NDJsonConverter().serialize(label))
+    serialized_label[0].pop('uuid')
+    assert serialized_label == ndjson
+
+    deserialized_label = list(NDJsonConverter().deserialize(ndjson))
+    deserialized_label[0].annotations[0].extra.pop('uuid')
+    assert deserialized_label[0].annotations == label[0].annotations
+
 
 @pytest.mark.parametrize("filename", [
     "tests/data/assets/ndjson/conversation_entity_import.json",