chore: stan comments

rishisurana-labelbox · rishisurana-labelbox · commit 68773cfdb0fc · 2025-09-25T15:46:51.000-07:00
diff --git a/libs/labelbox/src/labelbox/data/annotation_types/audio.py b/libs/labelbox/src/labelbox/data/annotation_types/audio.py
@@ -1,4 +1,5 @@
 from typing import Optional
+from pydantic import Field, AliasChoices
 
 from labelbox.data.annotation_types.annotation import (
     ClassificationAnnotation,
@@ -23,7 +24,15 @@ class AudioClassificationAnnotation(ClassificationAnnotation):
         extra (Dict[str, Any]): Additional metadata
     """
 
-    start_frame: int
+    start_frame: int = Field(
+        validation_alias=AliasChoices("start_frame", "frame"),
+        serialization_alias="frame",
+    )
+    end_frame: Optional[int] = Field(
+        default=None,
+        validation_alias=AliasChoices("end_frame", "endFrame"),
+        serialization_alias="end_frame",
+    )
     segment_index: Optional[int] = None
 
 
@@ -45,7 +54,14 @@ class AudioTextClassificationAnnotation(ClassificationAnnotation):
         extra (Dict[str, Any]): Additional metadata
     """
 
-    start_frame: int
-    end_frame: int = None
+    start_frame: int = Field(
+        validation_alias=AliasChoices("start_frame", "frame"),
+        serialization_alias="frame",
+    )
+    end_frame: Optional[int] = Field(
+        default=None,
+        validation_alias=AliasChoices("end_frame", "endFrame"),
+        serialization_alias="end_frame",
+    )
 
 
diff --git a/libs/labelbox/src/labelbox/data/annotation_types/label.py b/libs/labelbox/src/labelbox/data/annotation_types/label.py
@@ -77,36 +77,21 @@ def _get_annotations_by_type(self, annotation_type):
 
     def frame_annotations(
         self,
-    ) -> Dict[str, Union[VideoObjectAnnotation, VideoClassificationAnnotation]]:
-        frame_dict = defaultdict(list)
-        for annotation in self.annotations:
-            if isinstance(
-                annotation,
-                (VideoObjectAnnotation, VideoClassificationAnnotation),
-            ):
-                frame_dict[annotation.frame].append(annotation)
-        return frame_dict
-
-    def audio_annotations_by_frame(
-        self,
-    ) -> Dict[
-        int, List[AudioClassificationAnnotation]
-    ]:
-        """Get audio annotations organized by frame (millisecond)
-
+    ) -> Dict[int, Union[VideoObjectAnnotation, VideoClassificationAnnotation, AudioClassificationAnnotation]]:
+        """Get temporal annotations organized by frame
+        
         Returns:
-            Dict[int, List]: Dictionary mapping frame (milliseconds) to list of audio annotations
-
+            Dict[int, List]: Dictionary mapping frame (milliseconds) to list of temporal annotations
+            
         Example:
-            >>> label.audio_annotations_by_frame()
-            {2500: [AudioClassificationAnnotation(...)]}
+            >>> label.frame_annotations()
+            {2500: [VideoClassificationAnnotation(...), AudioClassificationAnnotation(...)]}
         """
         frame_dict = defaultdict(list)
         for annotation in self.annotations:
-            if isinstance(
-                annotation,
-                AudioClassificationAnnotation,
-            ):
+            if isinstance(annotation, (VideoObjectAnnotation, VideoClassificationAnnotation)):
+                frame_dict[annotation.frame].append(annotation)
+            elif isinstance(annotation, AudioClassificationAnnotation):
                 frame_dict[annotation.start_frame].append(annotation)
         return dict(frame_dict)
 
diff --git a/libs/labelbox/src/labelbox/data/serialization/ndjson/classification.py b/libs/labelbox/src/labelbox/data/serialization/ndjson/classification.py
@@ -12,7 +12,6 @@
 
 from ...annotation_types.annotation import ClassificationAnnotation
 from ...annotation_types.video import VideoClassificationAnnotation
-from ...annotation_types.audio import AudioClassificationAnnotation
 from ...annotation_types.llm_prompt_response.prompt import (
     PromptClassificationAnnotation,
     PromptText,
@@ -401,11 +400,7 @@ class NDClassification:
     @staticmethod
     def to_common(
         annotation: "NDClassificationType",
-    ) -> Union[
-        ClassificationAnnotation,
-        VideoClassificationAnnotation,
-        AudioClassificationAnnotation,
-    ]:
+    ) -> Union[ClassificationAnnotation, VideoClassificationAnnotation]:
         common = ClassificationAnnotation(
             value=annotation.to_common(),
             name=annotation.name,
@@ -420,26 +415,11 @@ def to_common(
         results = []
         for frame in annotation.frames:
             for idx in range(frame.start, frame.end + 1, 1):
-                # Check if this is an audio annotation by looking at the extra data
-                # Audio annotations will have start_frame/end_frame in extra, video annotations won't
-                if (
-                    hasattr(annotation, "extra")
-                    and annotation.extra
-                    and "frames" in annotation.extra
-                ):
-                    # This is likely an audio temporal annotation
-                    results.append(
-                        AudioClassificationAnnotation(
-                            frame=idx, **common.model_dump(exclude_none=True)
-                        )
-                    )
-                else:
-                    # This is a video temporal annotation
-                    results.append(
-                        VideoClassificationAnnotation(
-                            frame=idx, **common.model_dump(exclude_none=True)
-                        )
+                results.append(
+                    VideoClassificationAnnotation(
+                        frame=idx, **common.model_dump(exclude_none=True)
                     )
+                )
         return results
 
     @classmethod
@@ -448,7 +428,6 @@ def from_common(
         annotation: Union[
             ClassificationAnnotation,
             VideoClassificationAnnotation,
-            AudioClassificationAnnotation,
         ],
         data: GenericDataRowData,
     ) -> Union[NDTextSubclass, NDChecklistSubclass, NDRadioSubclass]:
@@ -473,7 +452,6 @@ def lookup_classification(
         annotation: Union[
             ClassificationAnnotation,
             VideoClassificationAnnotation,
-            AudioClassificationAnnotation,
         ],
     ) -> Union[NDText, NDChecklist, NDRadio]:
         return {Text: NDText, Checklist: NDChecklist, Radio: NDRadio}.get(
diff --git a/libs/labelbox/src/labelbox/data/serialization/ndjson/label.py b/libs/labelbox/src/labelbox/data/serialization/ndjson/label.py
@@ -85,45 +85,6 @@ def _get_consecutive_frames(
             consecutive.append((group[0], group[-1]))
         return consecutive
 
-    @classmethod
-    def _get_audio_frame_ranges(cls, annotation_group: List[AudioClassificationAnnotation]) -> List[Tuple[int, int]]:
-        """Get frame ranges for audio annotations (simpler than video segments)"""
-        return [(ann.start_frame, getattr(ann, 'end_frame', None) or ann.start_frame) for ann in annotation_group]
-
-    @classmethod
-    def _has_changing_values(cls, annotation_group: List[AudioClassificationAnnotation]) -> bool:
-        """Check if annotations have different values (multi-value per instance)"""
-        if len(annotation_group) <= 1:
-            return False
-        first_value = annotation_group[0].value.answer
-        return any(ann.value.answer != first_value for ann in annotation_group)
-
-    @classmethod
-    def _create_multi_value_annotation(cls, annotation_group: List[AudioClassificationAnnotation], data):
-        """Create annotation with frame-value mapping for changing values"""
-        import json
-        
-        # Build frame data and mapping in one pass
-        frames_data = []
-        frame_mapping = {}
-        
-        for ann in annotation_group:
-            start, end = ann.start_frame, getattr(ann, 'end_frame', None) or ann.start_frame
-            frames_data.append({"start": start, "end": end})
-            frame_mapping[str(start)] = ann.value.answer
-        
-        # Create content structure
-        content = json.dumps({
-            "frame_mapping": frame_mapping,
-        })
-        
-        # Update template annotation
-        template = annotation_group[0]
-        from ...annotation_types.classification.classification import Text
-        template.value = Text(answer=content)
-        template.extra = {"frames": frames_data}
-        
-        yield NDClassification.from_common(template, data)
 
     @classmethod
     def _get_segment_frame_ranges(
@@ -208,28 +169,24 @@ def _create_video_annotations(
     def _create_audio_annotations(
         cls, label: Label
     ) -> Generator[Union[NDChecklistSubclass, NDRadioSubclass], None, None]:
-        """Create audio annotations with multi-value support"""
+        """Create audio annotations serialized in Video NDJSON classification format."""
         audio_annotations = defaultdict(list)
-        
-        # Collect audio annotations
+
+        # Collect audio annotations by name/schema_id
         for annot in label.annotations:
             if isinstance(annot, AudioClassificationAnnotation):
                 audio_annotations[annot.feature_schema_id or annot.name].append(annot)
 
         for annotation_group in audio_annotations.values():
-            frame_ranges = cls._get_audio_frame_ranges(annotation_group)
-            
-            # Process classifications
-            if isinstance(annotation_group[0], AudioClassificationAnnotation):
-                if cls._has_changing_values(annotation_group):
-                    # For audio with changing values, create frame-value mapping
-                    yield from cls._create_multi_value_annotation(annotation_group, label.data)
-                else:
-                    # Standard processing for audio with same values
-                    annotation = annotation_group[0]
-                    frames_data = [{"start": start, "end": end} for start, end in frame_ranges]
-                    annotation.extra.update({"frames": frames_data})
-                    yield NDClassification.from_common(annotation, label.data)
+            # Simple grouping: one NDJSON entry per annotation group (same as video)
+            annotation = annotation_group[0]
+            frames_data = []
+            for ann in annotation_group:
+                start = ann.start_frame
+                end = getattr(ann, "end_frame", None) or ann.start_frame
+                frames_data.append({"start": start, "end": end})
+            annotation.extra.update({"frames": frames_data})
+            yield NDClassification.from_common(annotation, label.data)