Skip to content

Commit 7861537

Browse files
chore: remove audio object annotation
1 parent 0ca9cd6 commit 7861537

File tree

6 files changed

+9
-197
lines changed

6 files changed

+9
-197
lines changed

libs/labelbox/src/labelbox/data/annotation_types/__init__.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,6 @@
2020
from .video import VideoMaskAnnotation
2121

2222
from .audio import AudioClassificationAnnotation
23-
from .audio import AudioObjectAnnotation
2423

2524
from .ner import ConversationEntity
2625
from .ner import DocumentEntity

libs/labelbox/src/labelbox/data/annotation_types/audio.py

Lines changed: 0 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -2,11 +2,6 @@
22

33
from labelbox.data.annotation_types.annotation import (
44
ClassificationAnnotation,
5-
ObjectAnnotation,
6-
)
7-
from labelbox.data.mixins import (
8-
ConfidenceNotSupportedMixin,
9-
CustomMetricsNotSupportedMixin,
105
)
116

127

@@ -33,31 +28,3 @@ class AudioClassificationAnnotation(ClassificationAnnotation):
3328
segment_index: Optional[int] = None
3429

3530

36-
class AudioObjectAnnotation(
37-
ObjectAnnotation,
38-
ConfidenceNotSupportedMixin,
39-
CustomMetricsNotSupportedMixin,
40-
):
41-
"""Audio object annotation for specific time range
42-
43-
Examples:
44-
- Transcription: "Hello world" from 2500ms to 4100ms
45-
- Sound events: "Dog barking" from 10000ms to 12000ms
46-
- Audio segments with metadata
47-
48-
Args:
49-
name (Optional[str]): Name of the annotation
50-
feature_schema_id (Optional[Cuid]): Feature schema identifier
51-
value (Union[TextEntity, Geometry]): Localization or text content
52-
start_frame (int): The frame index in milliseconds (e.g., 10000 = 10.0 seconds)
53-
end_frame (Optional[int]): End frame in milliseconds (for time ranges)
54-
keyframe (bool): Whether this is a keyframe annotation (default: True)
55-
segment_index (Optional[int]): Index of audio segment this annotation belongs to
56-
classifications (Optional[List[ClassificationAnnotation]]): Optional sub-classifications
57-
extra (Dict[str, Any]): Additional metadata
58-
"""
59-
60-
start_frame: int
61-
end_frame: Optional[int] = None
62-
keyframe: bool = True
63-
segment_index: Optional[int] = None

libs/labelbox/src/labelbox/data/annotation_types/label.py

Lines changed: 5 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@
1313
from .metrics import ScalarMetric, ConfusionMatrixMetric
1414
from .video import VideoClassificationAnnotation
1515
from .video import VideoObjectAnnotation, VideoMaskAnnotation
16-
from .audio import AudioClassificationAnnotation, AudioObjectAnnotation
16+
from .audio import AudioClassificationAnnotation
1717
from .mmc import MessageEvaluationTaskAnnotation
1818
from pydantic import BaseModel, field_validator
1919

@@ -46,7 +46,6 @@ class Label(BaseModel):
4646
ObjectAnnotation,
4747
VideoMaskAnnotation,
4848
AudioClassificationAnnotation,
49-
AudioObjectAnnotation,
5049
ScalarMetric,
5150
ConfusionMatrixMetric,
5251
RelationshipAnnotation,
@@ -91,7 +90,7 @@ def frame_annotations(
9190
def audio_annotations_by_frame(
9291
self,
9392
) -> Dict[
94-
int, List[Union[AudioObjectAnnotation, AudioClassificationAnnotation]]
93+
int, List[AudioClassificationAnnotation]
9594
]:
9695
"""Get audio annotations organized by frame (millisecond)
9796
@@ -100,15 +99,15 @@ def audio_annotations_by_frame(
10099
101100
Example:
102101
>>> label.audio_annotations_by_frame()
103-
{2500: [AudioClassificationAnnotation(...)], 10000: [AudioObjectAnnotation(...)]}
102+
{2500: [AudioClassificationAnnotation(...)]}
104103
"""
105104
frame_dict = defaultdict(list)
106105
for annotation in self.annotations:
107106
if isinstance(
108107
annotation,
109-
(AudioObjectAnnotation, AudioClassificationAnnotation),
108+
AudioClassificationAnnotation,
110109
):
111-
frame_dict[annotation.frame].append(annotation)
110+
frame_dict[annotation.start_frame].append(annotation)
112111
return dict(frame_dict)
113112

114113
def add_url_to_masks(self, signer) -> "Label":

libs/labelbox/src/labelbox/data/serialization/ndjson/label.py

Lines changed: 2 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,6 @@
2727
from typing import List
2828
from ...annotation_types.audio import (
2929
AudioClassificationAnnotation,
30-
AudioObjectAnnotation,
3130
)
3231
from labelbox.types import DocumentRectangle, DocumentEntity
3332
from .classification import (
@@ -87,7 +86,7 @@ def _get_consecutive_frames(
8786
return consecutive
8887

8988
@classmethod
90-
def _get_audio_frame_ranges(cls, annotation_group: List[Union[AudioClassificationAnnotation, AudioObjectAnnotation]]) -> List[Tuple[int, int]]:
89+
def _get_audio_frame_ranges(cls, annotation_group: List[AudioClassificationAnnotation]) -> List[Tuple[int, int]]:
9190
"""Get frame ranges for audio annotations (simpler than video segments)"""
9291
return [(ann.start_frame, getattr(ann, 'end_frame', None) or ann.start_frame) for ann in annotation_group]
9392

@@ -214,7 +213,7 @@ def _create_audio_annotations(
214213

215214
# Collect audio annotations
216215
for annot in label.annotations:
217-
if isinstance(annot, (AudioClassificationAnnotation, AudioObjectAnnotation)):
216+
if isinstance(annot, AudioClassificationAnnotation):
218217
audio_annotations[annot.feature_schema_id or annot.name].append(annot)
219218

220219
for annotation_group in audio_annotations.values():
@@ -232,11 +231,6 @@ def _create_audio_annotations(
232231
annotation.extra.update({"frames": frames_data})
233232
yield NDClassification.from_common(annotation, label.data)
234233

235-
# Process objects
236-
elif isinstance(annotation_group[0], AudioObjectAnnotation):
237-
# For audio objects, process individually (simpler than video segments)
238-
for annotation in annotation_group:
239-
yield NDObject.from_common(annotation, label.data)
240234

241235

242236
@classmethod
@@ -251,7 +245,6 @@ def _create_non_video_annotations(cls, label: Label):
251245
VideoObjectAnnotation,
252246
VideoMaskAnnotation,
253247
AudioClassificationAnnotation,
254-
AudioObjectAnnotation,
255248
RelationshipAnnotation,
256249
),
257250
)

libs/labelbox/src/labelbox/data/serialization/ndjson/objects.py

Lines changed: 0 additions & 44 deletions
Original file line numberDiff line numberDiff line change
@@ -14,9 +14,6 @@
1414
from labelbox.data.annotation_types.video import (
1515
VideoObjectAnnotation,
1616
)
17-
from labelbox.data.annotation_types.audio import (
18-
AudioObjectAnnotation,
19-
)
2017
from labelbox.data.mixins import (
2118
ConfidenceMixin,
2219
CustomMetric,
@@ -718,7 +715,6 @@ def from_common(
718715
ObjectAnnotation,
719716
List[List[VideoObjectAnnotation]],
720717
VideoMaskAnnotation,
721-
AudioObjectAnnotation,
722718
],
723719
data: GenericDataRowData,
724720
) -> Union[
@@ -746,9 +742,6 @@ def from_common(
746742
return obj.from_common(**args)
747743
elif obj == NDVideoMasks:
748744
return obj.from_common(annotation, data)
749-
elif isinstance(annotation, AudioObjectAnnotation):
750-
# Handle audio object annotation like single video frame
751-
return cls._serialize_audio_object_annotation(annotation, data)
752745

753746
subclasses = [
754747
NDSubclassification.from_common(annot)
@@ -772,43 +765,6 @@ def from_common(
772765
**optional_kwargs,
773766
)
774767

775-
@classmethod
776-
def _serialize_audio_object_annotation(
777-
cls, annotation: AudioObjectAnnotation, data: GenericDataRowData
778-
):
779-
"""Serialize audio object annotation with temporal information
780-
781-
Args:
782-
annotation: Audio object annotation to process
783-
data: Data row data
784-
785-
Returns:
786-
NDObject: Serialized audio object annotation
787-
"""
788-
# Get the appropriate NDObject subclass based on the annotation value type
789-
obj = cls.lookup_object(annotation)
790-
791-
# Process sub-classifications if any
792-
subclasses = [
793-
NDSubclassification.from_common(annot)
794-
for annot in annotation.classifications
795-
]
796-
797-
# Add frame information to extra (milliseconds)
798-
extra = annotation.extra.copy() if annotation.extra else {}
799-
extra.update({"frame": annotation.frame})
800-
801-
# Create the NDObject with frame information
802-
return obj.from_common(
803-
str(annotation._uuid),
804-
annotation.value,
805-
subclasses,
806-
annotation.name,
807-
annotation.feature_schema_id,
808-
extra,
809-
data,
810-
)
811-
812768
@staticmethod
813769
def lookup_object(
814770
annotation: Union[ObjectAnnotation, List],

libs/labelbox/tests/data/annotation_types/test_audio.py

Lines changed: 2 additions & 104 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,6 @@
22
import labelbox.types as lb_types
33
from labelbox.data.annotation_types.audio import (
44
AudioClassificationAnnotation,
5-
AudioObjectAnnotation,
65
)
76
from labelbox.data.annotation_types.classification.classification import (
87
ClassificationAnswer,
@@ -71,64 +70,6 @@ def test_audio_classification_text_type():
7170
assert annotation.value.answer == "excellent"
7271

7372

74-
def test_audio_object_creation():
75-
"""Test creating audio object annotation"""
76-
annotation = AudioObjectAnnotation(
77-
start_frame=10000,
78-
end_frame=12500,
79-
name="transcription",
80-
value=lb_types.TextEntity(
81-
start=0, end=11
82-
), # "Hello world" has 11 characters
83-
)
84-
85-
assert annotation.start_frame == 10000
86-
assert annotation.end_frame == 12500
87-
assert annotation.keyframe is True
88-
assert annotation.segment_index is None
89-
assert annotation.name == "transcription"
90-
assert isinstance(annotation.value, lb_types.TextEntity)
91-
assert annotation.value.start == 0
92-
assert annotation.value.end == 11
93-
94-
95-
def test_audio_object_creation_with_classifications():
96-
"""Test creating audio object with sub-classifications"""
97-
sub_classification = AudioClassificationAnnotation(
98-
start_frame=10000,
99-
name="confidence",
100-
value=Radio(answer=ClassificationAnswer(name="high")),
101-
)
102-
103-
annotation = AudioObjectAnnotation(
104-
start_frame=10000,
105-
end_frame=12500,
106-
name="transcription",
107-
value=lb_types.TextEntity(start=0, end=11),
108-
classifications=[sub_classification],
109-
)
110-
111-
assert len(annotation.classifications) == 1
112-
assert annotation.classifications[0].name == "confidence"
113-
assert annotation.classifications[0].start_frame == 10000
114-
115-
116-
def test_audio_object_direct_creation():
117-
"""Test creating audio object directly with various options"""
118-
annotation = AudioObjectAnnotation(
119-
start_frame=7500, # 7.5 seconds
120-
name="sound_event",
121-
value=lb_types.TextEntity(start=0, end=11),
122-
keyframe=False,
123-
segment_index=2,
124-
)
125-
126-
assert annotation.start_frame == 7500
127-
assert annotation.end_frame is None
128-
assert annotation.keyframe is False
129-
assert annotation.segment_index == 2
130-
131-
13273
def test_frame_precision():
13374
"""Test frame values maintain precision"""
13475
# Test various time values in milliseconds
@@ -155,38 +96,24 @@ def test_audio_label_integration():
15596
value=Radio(answer=ClassificationAnswer(name="john")),
15697
)
15798

158-
transcription_annotation = AudioObjectAnnotation(
159-
start_frame=1000,
160-
end_frame=2000,
161-
name="transcription",
162-
value=lb_types.TextEntity(start=0, end=5),
163-
)
164-
16599
# Create label with audio annotations
166100
label = lb_types.Label(
167101
data={"global_key": "audio_file.mp3"},
168-
annotations=[speaker_annotation, transcription_annotation],
102+
annotations=[speaker_annotation],
169103
)
170104

171105
# Verify annotations are accessible
172-
assert len(label.annotations) == 2
106+
assert len(label.annotations) == 1
173107

174108
# Check annotation types
175109
audio_classifications = [
176110
ann
177111
for ann in label.annotations
178112
if isinstance(ann, AudioClassificationAnnotation)
179113
]
180-
audio_objects = [
181-
ann
182-
for ann in label.annotations
183-
if isinstance(ann, AudioObjectAnnotation)
184-
]
185114

186115
assert len(audio_classifications) == 1
187-
assert len(audio_objects) == 1
188116
assert audio_classifications[0].name == "speaker"
189-
assert audio_objects[0].name == "transcription"
190117

191118

192119
def test_audio_annotation_validation():
@@ -384,32 +311,3 @@ def test_temporal_annotation_grouping():
384311
assert annotations[1].start_frame == 1000
385312
assert annotations[0].end_frame == 900
386313
assert annotations[1].end_frame == 1900
387-
388-
389-
def test_audio_object_types():
390-
"""Test different types of audio object annotations"""
391-
# Text entity (transcription)
392-
text_obj = AudioObjectAnnotation(
393-
start_frame=1000,
394-
name="transcription",
395-
value=TextEntity(start=0, end=5), # "hello"
396-
)
397-
398-
assert isinstance(text_obj.value, TextEntity)
399-
assert text_obj.value.start == 0
400-
assert text_obj.value.end == 5
401-
402-
# Test with keyframe and segment settings
403-
keyframe_obj = AudioObjectAnnotation(
404-
start_frame=2000,
405-
end_frame=3000,
406-
name="segment",
407-
value=TextEntity(start=10, end=15),
408-
keyframe=True,
409-
segment_index=1,
410-
)
411-
412-
assert keyframe_obj.keyframe is True
413-
assert keyframe_obj.segment_index == 1
414-
assert keyframe_obj.start_frame == 2000
415-
assert keyframe_obj.end_frame == 3000

0 commit comments

Comments
 (0)