Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
33 commits
Select commit Hold shift + click to select a range
e4fd630
chore: PoC + ipynb
rishisurana-labelbox Sep 3, 2025
dbcc7bf
chore: use ms instead of s in sdk interface
rishisurana-labelbox Sep 8, 2025
dbb592f
:art: Cleaned
github-actions[bot] Sep 8, 2025
ff298d4
:memo: README updated
github-actions[bot] Sep 8, 2025
16896fd
chore: it works for temporal text/radio/checklist classifications
rishisurana-labelbox Sep 11, 2025
7a666cc
chore: clean up and organize code
rishisurana-labelbox Sep 11, 2025
ac58ad0
chore: update tests fail and documentation update
rishisurana-labelbox Sep 11, 2025
67dd14a
:art: Cleaned
github-actions[bot] Sep 11, 2025
a1600e5
:memo: README updated
github-actions[bot] Sep 11, 2025
b4d2f42
chore: improve imports
rishisurana-labelbox Sep 11, 2025
fadb14e
chore: restore py version
rishisurana-labelbox Sep 11, 2025
1e12596
chore: restore py version
rishisurana-labelbox Sep 11, 2025
c2a7b4c
chore: cleanup
rishisurana-labelbox Sep 12, 2025
26a35fd
chore: lint
rishisurana-labelbox Sep 12, 2025
b16f2ea
fix: failing build issue due to lint
rishisurana-labelbox Sep 12, 2025
943cb73
chore: simplify
rishisurana-labelbox Sep 19, 2025
a838513
chore: update examples - all tests passing
rishisurana-labelbox Sep 19, 2025
0ca9cd6
chore: use start frame instead of frame
rishisurana-labelbox Sep 22, 2025
7861537
chore: remove audio object annotation
rishisurana-labelbox Sep 22, 2025
6c3c50a
chore: change class shape for text and radio/checklist
rishisurana-labelbox Sep 22, 2025
68773cf
chore: stan comments
rishisurana-labelbox Sep 25, 2025
58b30f7
chore: top level + nested working
rishisurana-labelbox Sep 26, 2025
0a63def
feat: nested class for temporal annotations support
rishisurana-labelbox Sep 29, 2025
538ba66
chore: revert old change
rishisurana-labelbox Sep 29, 2025
9675c73
chore: update tests
rishisurana-labelbox Sep 29, 2025
327800b
chore: clean up and track test files
rishisurana-labelbox Sep 29, 2025
1174ad8
chore: update audio.ipynb to reflect breadth of use cases
rishisurana-labelbox Sep 29, 2025
2361ca3
chore: cursor reported bug
rishisurana-labelbox Sep 29, 2025
59f0cd8
chore: extract generic temporal nested logic
rishisurana-labelbox Sep 29, 2025
b186359
chore: update temporal logic to be 1:1 with v3 script
rishisurana-labelbox Sep 30, 2025
e63b306
chore: simplifiy drastically
rishisurana-labelbox Sep 30, 2025
a74c6c4
:art: Cleaned
github-actions[bot] Sep 30, 2025
0683dfd
:memo: README updated
github-actions[bot] Sep 30, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
168 changes: 84 additions & 84 deletions examples/README.md

Large diffs are not rendered by default.

77 changes: 76 additions & 1 deletion examples/annotation_import/audio.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,30 @@
],
"cell_type": "markdown"
},
{
"metadata": {},
"source": [
"<td>\n",
" <a target=\"_blank\" href=\"https://labelbox.com\" ><img src=\"https://labelbox.com/blog/content/images/2021/02/logo-v4.svg\" width=256/></a>\n",
"</td>\n"
],
"cell_type": "markdown"
},
{
"metadata": {},
"source": [
"<td>\n",
"<a href=\"https://colab.research.google.com/github/Labelbox/labelbox-python/blob/develop/examples/annotation_import/audio.ipynb\" target=\"_blank\"><img\n",
"src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"></a>\n",
"</td>\n",
"\n",
"<td>\n",
"<a href=\"https://github.com/Labelbox/labelbox-python/tree/develop/examples/annotation_import/audio.ipynb\" target=\"_blank\"><img\n",
"src=\"https://img.shields.io/badge/GitHub-100000?logo=github&logoColor=white\" alt=\"GitHub\"></a>\n",
"</td>"
],
"cell_type": "markdown"
},
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Bug: Duplicate Header Cells in Audio Notebook

The audio.ipynb notebook now includes duplicate header cells at the start. The commit adds new markdown cells (lines 30-53 in the diff) that are identical to the existing Labelbox logo and badge links, resulting in redundant content.

Fix in CursorΒ Fix in Web

{
"metadata": {},
"source": [
Expand Down Expand Up @@ -170,7 +194,7 @@
},
{
"metadata": {},
"source": "ontology_builder = lb.OntologyBuilder(classifications=[\n lb.Classification(class_type=lb.Classification.Type.TEXT,\n name=\"text_audio\"),\n lb.Classification(\n class_type=lb.Classification.Type.CHECKLIST,\n name=\"checklist_audio\",\n options=[\n lb.Option(value=\"first_checklist_answer\"),\n lb.Option(value=\"second_checklist_answer\"),\n ],\n ),\n lb.Classification(\n class_type=lb.Classification.Type.RADIO,\n name=\"radio_audio\",\n options=[\n lb.Option(value=\"first_radio_answer\"),\n lb.Option(value=\"second_radio_answer\"),\n ],\n ),\n])\n\nontology = client.create_ontology(\n \"Ontology Audio Annotations\",\n ontology_builder.asdict(),\n media_type=lb.MediaType.Audio,\n)",
"source": "ontology_builder = lb.OntologyBuilder(classifications=[\n lb.Classification(class_type=lb.Classification.Type.TEXT,\n name=\"text_audio\"),\n lb.Classification(\n class_type=lb.Classification.Type.CHECKLIST,\n name=\"checklist_audio\",\n options=[\n lb.Option(value=\"first_checklist_answer\"),\n lb.Option(value=\"second_checklist_answer\"),\n ],\n ),\n lb.Classification(\n class_type=lb.Classification.Type.RADIO,\n name=\"radio_audio\",\n options=[\n lb.Option(value=\"first_radio_answer\"),\n lb.Option(value=\"second_radio_answer\"),\n ],\n ),\n # Temporal classification for token-level annotations\n lb.Classification(\n class_type=lb.Classification.Type.TEXT,\n name=\"User Speaker\",\n scope=lb.Classification.Scope.INDEX, # INDEX scope for temporal\n ),\n])\n\nontology = client.create_ontology(\n \"Ontology Audio Annotations\",\n ontology_builder.asdict(),\n media_type=lb.MediaType.Audio,\n)",
"cell_type": "code",
"outputs": [],
"execution_count": null
Expand Down Expand Up @@ -223,6 +247,27 @@
],
"cell_type": "markdown"
},
{
"metadata": {},
"source": [
"\n"
],
"cell_type": "markdown"
},
{
"metadata": {},
"source": "",
"cell_type": "code",
"outputs": [],
"execution_count": null
},
{
"metadata": {},
"source": "",
"cell_type": "code",
"outputs": [],
"execution_count": null
},
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Bug: Audio Notebook Contains Unintended Empty Cells

The examples/annotation_import/audio.ipynb notebook includes three empty code cells (around lines 226-246). These cells don't contain any content and appear to be unintended additions.

Fix in CursorΒ Fix in Web

{
"metadata": {},
"source": "label = []\nlabel.append(\n lb_types.Label(\n data={\"global_key\": global_key},\n annotations=[text_annotation, checklist_annotation, radio_annotation],\n ))",
Expand Down Expand Up @@ -252,6 +297,29 @@
],
"cell_type": "markdown"
},
{
"metadata": {},
"source": [
"## Temporal Audio Annotations\n",
"\n",
"You can create temporal annotations for individual tokens (words) with precise timing:\n"
],
"cell_type": "markdown"
},
{
"metadata": {},
"source": "# Define tokens with precise timing (from demo script)\ntokens_data = [\n (\"Hello\", 586, 770), # Hello: frames 586-770\n (\"AI\", 771, 955), # AI: frames 771-955\n (\"how\", 956, 1140), # how: frames 956-1140\n (\"are\", 1141, 1325), # are: frames 1141-1325\n (\"you\", 1326, 1510), # you: frames 1326-1510\n (\"doing\", 1511, 1695), # doing: frames 1511-1695\n (\"today\", 1696, 1880), # today: frames 1696-1880\n]\n\n# Create temporal annotations for each token\ntemporal_annotations = []\nfor token, start_frame, end_frame in tokens_data:\n token_annotation = lb_types.AudioClassificationAnnotation(\n frame=start_frame,\n end_frame=end_frame,\n name=\"User Speaker\",\n value=lb_types.Text(answer=token),\n )\n temporal_annotations.append(token_annotation)\n\nprint(f\"Created {len(temporal_annotations)} temporal token annotations\")",
"cell_type": "code",
"outputs": [],
"execution_count": null
},
{
"metadata": {},
"source": "# Create label with both regular and temporal annotations\nlabel_with_temporal = []\nlabel_with_temporal.append(\n lb_types.Label(\n data={\"global_key\": global_key},\n annotations=[text_annotation, checklist_annotation, radio_annotation] +\n temporal_annotations,\n ))\n\nprint(\n f\"Created label with {len(label_with_temporal[0].annotations)} total annotations\"\n)\nprint(f\" - Regular annotations: 3\")\nprint(f\" - Temporal annotations: {len(temporal_annotations)}\")",
"cell_type": "code",
"outputs": [],
"execution_count": null
},
{
"metadata": {},
"source": [
Expand All @@ -260,6 +328,13 @@
],
"cell_type": "markdown"
},
{
"metadata": {},
"source": "# Upload temporal annotations via MAL\ntemporal_upload_job = lb.MALPredictionImport.create_from_objects(\n client=client,\n project_id=project.uid,\n name=f\"temporal_mal_job-{str(uuid.uuid4())}\",\n predictions=label_with_temporal,\n)\n\ntemporal_upload_job.wait_until_done()\nprint(\"Temporal upload completed!\")\nprint(\"Errors:\", temporal_upload_job.errors)\nprint(\"Status:\", temporal_upload_job.statuses)",
"cell_type": "code",
"outputs": [],
"execution_count": null
},
{
"metadata": {},
"source": "# Upload our label using Model-Assisted Labeling\nupload_job = lb.MALPredictionImport.create_from_objects(\n client=client,\n project_id=project.uid,\n name=f\"mal_job-{str(uuid.uuid4())}\",\n predictions=label,\n)\n\nupload_job.wait_until_done()\nprint(\"Errors:\", upload_job.errors)\nprint(\"Status of uploads: \", upload_job.statuses)",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,8 @@
from .video import MaskInstance
from .video import VideoMaskAnnotation

from .audio import AudioClassificationAnnotation

from .ner import ConversationEntity
from .ner import DocumentEntity
from .ner import DocumentTextSelection
Expand Down
37 changes: 37 additions & 0 deletions libs/labelbox/src/labelbox/data/annotation_types/audio.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
from typing import Optional
from pydantic import Field, AliasChoices

from labelbox.data.annotation_types.annotation import (
ClassificationAnnotation,
)


class AudioClassificationAnnotation(ClassificationAnnotation):
"""Audio classification for specific time range
Examples:
- Speaker identification from 2500ms to 4100ms
- Audio quality assessment for a segment
- Language detection for audio segments
Args:
name (Optional[str]): Name of the classification
feature_schema_id (Optional[Cuid]): Feature schema identifier
value (Union[Text, Checklist, Radio]): Classification value
start_frame (int): The frame index in milliseconds (e.g., 2500 = 2.5 seconds)
end_frame (Optional[int]): End frame in milliseconds (for time ranges)
segment_index (Optional[int]): Index of audio segment this annotation belongs to
extra (Dict[str, Any]): Additional metadata
"""

start_frame: int = Field(
validation_alias=AliasChoices("start_frame", "frame"),
serialization_alias="start_frame",
)
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Bug: Audio Annotation API Inconsistency

The AudioClassificationAnnotation's start_frame field accepts both start_frame and frame as input aliases, but is named start_frame and always serializes as start_frame. This creates an inconsistent API, as users can provide frame=X (matching examples) while the underlying field and output use start_frame, leading to ambiguity.

Fix in CursorΒ Fix in Web

end_frame: Optional[int] = Field(
default=None,
validation_alias=AliasChoices("end_frame", "endFrame"),
serialization_alias="end_frame",
)
segment_index: Optional[int] = None

Original file line number Diff line number Diff line change
Expand Up @@ -17,11 +17,17 @@ class ClassificationAnswer(FeatureSchema, ConfidenceMixin, CustomMetricsMixin):
Each answer can have a keyframe independent of the others.
So unlike object annotations, classification annotations
track keyframes at a classification answer level.
- For temporal classifications (audio/video), optional start_frame/end_frame can specify
the time range for this answer. Must be within root annotation's frame range.
Defaults to root frame range if not specified.
"""

extra: Dict[str, Any] = {}
keyframe: Optional[bool] = None
classifications: Optional[List["ClassificationAnnotation"]] = None
start_frame: Optional[int] = None
end_frame: Optional[int] = None


class Radio(ConfidenceMixin, CustomMetricsMixin, BaseModel):
Expand Down Expand Up @@ -69,8 +75,12 @@ class ClassificationAnnotation(
classifications (Optional[List[ClassificationAnnotation]]): Optional sub classification of the annotation
feature_schema_id (Optional[Cuid])
value (Union[Text, Checklist, Radio])
start_frame (Optional[int]): Start frame for temporal classifications (audio/video). Must be within root annotation's frame range. Defaults to root start_frame if not specified.
end_frame (Optional[int]): End frame for temporal classifications (audio/video). Must be within root annotation's frame range. Defaults to root end_frame if not specified.
extra (Dict[str, Any])
"""

value: Union[Text, Checklist, Radio]
message_id: Optional[str] = None
start_frame: Optional[int] = None
end_frame: Optional[int] = None
22 changes: 16 additions & 6 deletions libs/labelbox/src/labelbox/data/annotation_types/label.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
from .metrics import ScalarMetric, ConfusionMatrixMetric
from .video import VideoClassificationAnnotation
from .video import VideoObjectAnnotation, VideoMaskAnnotation
from .audio import AudioClassificationAnnotation
from .mmc import MessageEvaluationTaskAnnotation
from pydantic import BaseModel, field_validator

Expand Down Expand Up @@ -44,6 +45,7 @@ class Label(BaseModel):
ClassificationAnnotation,
ObjectAnnotation,
VideoMaskAnnotation,
AudioClassificationAnnotation,
ScalarMetric,
ConfusionMatrixMetric,
RelationshipAnnotation,
Expand Down Expand Up @@ -75,15 +77,23 @@ def _get_annotations_by_type(self, annotation_type):

def frame_annotations(
self,
) -> Dict[str, Union[VideoObjectAnnotation, VideoClassificationAnnotation]]:
) -> Dict[int, Union[VideoObjectAnnotation, VideoClassificationAnnotation, AudioClassificationAnnotation]]:
"""Get temporal annotations organized by frame

Returns:
Dict[int, List]: Dictionary mapping frame (milliseconds) to list of temporal annotations

Example:
>>> label.frame_annotations()
{2500: [VideoClassificationAnnotation(...), AudioClassificationAnnotation(...)]}
"""
frame_dict = defaultdict(list)
for annotation in self.annotations:
if isinstance(
annotation,
(VideoObjectAnnotation, VideoClassificationAnnotation),
):
if isinstance(annotation, (VideoObjectAnnotation, VideoClassificationAnnotation)):
frame_dict[annotation.frame].append(annotation)
return frame_dict
elif isinstance(annotation, AudioClassificationAnnotation):
frame_dict[annotation.start_frame].append(annotation)
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Bug: Audio Annotations Indexed Inconsistently

The frame_annotations method indexes AudioClassificationAnnotation instances using only their start_frame. As audio annotations represent a time range, this prevents querying for annotations active at intermediate frames within their duration and is inconsistent with how single-frame video annotations are indexed.

Fix in CursorΒ Fix in Web

return dict(frame_dict)

def add_url_to_masks(self, signer) -> "Label":
"""
Expand Down
38 changes: 36 additions & 2 deletions libs/labelbox/src/labelbox/data/serialization/ndjson/label.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
import copy
from itertools import groupby
from operator import itemgetter
from typing import Generator, List, Tuple, Union
from typing import Any, Dict, Generator, List, Tuple, Union
from uuid import uuid4

from pydantic import BaseModel
Expand All @@ -24,6 +24,11 @@
VideoMaskAnnotation,
VideoObjectAnnotation,
)
from typing import List
from ...annotation_types.audio import (
AudioClassificationAnnotation,
)
from .temporal import create_audio_ndjson_annotations
from labelbox.types import DocumentRectangle, DocumentEntity
from .classification import (
NDChecklistSubclass,
Expand Down Expand Up @@ -69,6 +74,7 @@ def from_common(
yield from cls._create_relationship_annotations(label)
yield from cls._create_non_video_annotations(label)
yield from cls._create_video_annotations(label)
yield from cls._create_audio_annotations(label)

@staticmethod
def _get_consecutive_frames(
Expand All @@ -80,6 +86,7 @@ def _get_consecutive_frames(
consecutive.append((group[0], group[-1]))
return consecutive


@classmethod
def _get_segment_frame_ranges(
cls,
Expand Down Expand Up @@ -159,6 +166,32 @@ def _create_video_annotations(
segments.append(segment)
yield NDObject.from_common(segments, label.data)

@classmethod
def _create_audio_annotations(
cls, label: Label
) -> Generator[BaseModel, None, None]:
"""Create audio annotations with nested classifications using modular hierarchy builder."""
# Extract audio annotations from the label
audio_annotations = [
annot for annot in label.annotations
if isinstance(annot, AudioClassificationAnnotation)
]

if not audio_annotations:
return

# Use the modular hierarchy builder to create NDJSON annotations
ndjson_annotations = create_audio_ndjson_annotations(
audio_annotations,
label.data.global_key
)

# Yield each NDJSON annotation
for annotation in ndjson_annotations:
yield annotation



@classmethod
def _create_non_video_annotations(cls, label: Label):
non_video_annotations = [
Expand All @@ -170,6 +203,7 @@ def _create_non_video_annotations(cls, label: Label):
VideoClassificationAnnotation,
VideoObjectAnnotation,
VideoMaskAnnotation,
AudioClassificationAnnotation,
RelationshipAnnotation,
),
)
Expand All @@ -187,7 +221,7 @@ def _create_non_video_annotations(cls, label: Label):
yield NDMessageTask.from_common(annotation, label.data)
else:
raise TypeError(
f"Unable to convert object to MAL format. `{type(getattr(annotation, 'value',annotation))}`"
f"Unable to convert object to MAL format. `{type(getattr(annotation, 'value', annotation))}`"
)

@classmethod
Expand Down
Loading