Skip to content

Commit 6b54e26

Browse files
chore: works perfectly
1 parent e63b306 commit 6b54e26

File tree

2 files changed

+135
-51
lines changed

2 files changed

+135
-51
lines changed

libs/labelbox/src/labelbox/data/serialization/ndjson/temporal.py

Lines changed: 124 additions & 40 deletions
Original file line numberDiff line numberDiff line change
@@ -93,15 +93,15 @@ def group_by_value(self, annotations: List[TemporalAnnotation]) -> List[Dict[str
9393

9494
entries = []
9595
for _, anns in value_buckets.items():
96-
first = anns[0]
9796
# Extract frames from each annotation (root frames)
9897
frames = [self.frame_extractor(a) for a in anns]
9998
frame_dicts = [{"start": start, "end": end} for start, end in frames]
10099

101-
# Get root frames for passing to nested classifications
100+
# Get root frames for passing to nested classifications (use first annotation's frames)
102101
root_frames = frames[0] if frames else (None, None)
103102

104-
entry = self._create_answer_entry(first, frame_dicts, root_frames)
103+
# Pass ALL annotations so we can merge their nested classifications
104+
entry = self._create_answer_entry(anns, frame_dicts, root_frames)
105105
entries.append(entry)
106106

107107
return entries
@@ -138,49 +138,80 @@ def _get_nested_frames(self, obj: Any, parent_frames: List[Dict[str, int]], root
138138
# Use explicitly specified frames
139139
return [{"start": obj.start_frame, "end": obj.end_frame}]
140140
else:
141-
# Default to root frames
142-
if root_frames and root_frames[0] is not None and root_frames[1] is not None:
141+
# Default to parent frames first, then root frames
142+
if parent_frames:
143+
return parent_frames
144+
elif root_frames and root_frames[0] is not None and root_frames[1] is not None:
143145
return [{"start": root_frames[0], "end": root_frames[1]}]
144146
else:
145-
# Fall back to parent frames if root not available
146-
return parent_frames
147+
return []
147148

148-
def _create_answer_entry(self, first_ann: TemporalAnnotation, frames: List[Dict[str, int]], root_frames: Tuple[int, int]) -> Dict[str, Any]:
149-
"""Create an answer entry from the first annotation and frames.
149+
def _create_answer_entry(self, anns: List[TemporalAnnotation], frames: List[Dict[str, int]], root_frames: Tuple[int, int]) -> Dict[str, Any]:
150+
"""Create an answer entry from all annotations with the same value, merging their nested classifications.
150151
151152
Args:
152-
first_ann: The first annotation in the value group
153+
anns: All annotations in the value group
153154
frames: List of frame dictionaries for this answer
154155
root_frames: Tuple of (start, end) from the root AudioClassificationAnnotation
155156
"""
157+
first_ann = anns[0]
158+
156159
if hasattr(first_ann.value, "answer") and isinstance(first_ann.value.answer, list):
157-
# Checklist: emit one entry per distinct option present in this bucket
160+
# Checklist: emit one entry per distinct option present across ALL annotations
161+
# First, collect all unique option names across all annotations
162+
all_option_names = set()
163+
for ann in anns:
164+
if hasattr(ann.value, "answer") and isinstance(ann.value.answer, list):
165+
for opt in ann.value.answer:
166+
all_option_names.add(opt.name)
167+
158168
entries = []
159-
for opt in first_ann.value.answer:
160-
# Get frames for this specific checklist option (from opt or parent)
161-
opt_frames = self._get_nested_frames(opt, frames, root_frames)
162-
entry = {"name": opt.name, "frames": opt_frames}
163-
# Handle explicit nesting for this checklist option
164-
if hasattr(opt, 'classifications') and opt.classifications:
165-
entry["classifications"] = self._serialize_explicit_classifications(opt.classifications, root_frames)
169+
for opt_name in sorted(all_option_names): # Sort for consistent ordering
170+
# For each unique option, collect frames and nested classifications from all annotations
171+
opt_frames = []
172+
all_nested = []
173+
for ann in anns:
174+
if hasattr(ann.value, "answer") and isinstance(ann.value.answer, list):
175+
for ann_opt in ann.value.answer:
176+
if ann_opt.name == opt_name:
177+
# Get this annotation's root frame range
178+
ann_start, ann_end = self.frame_extractor(ann)
179+
ann_frame_dict = [{"start": ann_start, "end": ann_end}]
180+
# Collect this option's frame range (from option or parent annotation)
181+
frames_for_this_opt = self._get_nested_frames(ann_opt, ann_frame_dict, root_frames)
182+
opt_frames.extend(frames_for_this_opt)
183+
# Collect nested classifications
184+
if hasattr(ann_opt, 'classifications') and ann_opt.classifications:
185+
all_nested.extend(ann_opt.classifications)
186+
187+
entry = {"name": opt_name, "frames": opt_frames}
188+
if all_nested:
189+
entry["classifications"] = self._serialize_explicit_classifications(all_nested, root_frames)
166190
entries.append(entry)
167191
return entries[0] if len(entries) == 1 else {"options": entries, "frames": frames}
168192
elif hasattr(first_ann.value, "answer") and hasattr(first_ann.value.answer, "name"):
169193
# Radio
170194
opt = first_ann.value.answer
171-
# Get frames for this radio answer (from answer or parent)
172-
opt_frames = self._get_nested_frames(opt, frames, root_frames)
173-
entry = {"name": opt.name, "frames": opt_frames}
174-
# Handle explicit nesting via ClassificationAnswer.classifications
175-
if hasattr(opt, 'classifications') and opt.classifications:
176-
entry["classifications"] = self._serialize_explicit_classifications(opt.classifications, root_frames)
195+
# Use the merged frames from all annotations (already passed in)
196+
entry = {"name": opt.name, "frames": frames}
197+
# Collect nested classifications from all annotations
198+
all_nested = []
199+
for ann in anns:
200+
if hasattr(ann.value, "answer") and hasattr(ann.value.answer, "classifications") and ann.value.answer.classifications:
201+
all_nested.extend(ann.value.answer.classifications)
202+
if all_nested:
203+
entry["classifications"] = self._serialize_explicit_classifications(all_nested, root_frames)
177204
return entry
178205
else:
179206
# Text - nesting is at the annotation level, not answer level
180207
entry = {"value": first_ann.value.answer, "frames": frames}
181-
# Handle explicit nesting via AudioClassificationAnnotation.classifications
182-
if hasattr(first_ann, 'classifications') and first_ann.classifications:
183-
entry["classifications"] = self._serialize_explicit_classifications(first_ann.classifications, root_frames)
208+
# Collect nested classifications from all annotations
209+
all_nested = []
210+
for ann in anns:
211+
if hasattr(ann, 'classifications') and ann.classifications:
212+
all_nested.extend(ann.classifications)
213+
if all_nested:
214+
entry["classifications"] = self._serialize_explicit_classifications(all_nested, root_frames)
184215
return entry
185216

186217
def _serialize_explicit_classifications(self, classifications: List[Any], root_frames: Tuple[int, int]) -> List[Dict[str, Any]]:
@@ -207,10 +238,12 @@ def _serialize_explicit_classifications(self, classifications: List[Any], root_f
207238
display_name = cls_list[0].name if cls_list[0].name else name
208239

209240
# Create answer entries for this nested classification
210-
answers = []
241+
# De-duplicate by answer value
242+
seen_values = {} # value_key -> (answer_dict, nested_classifications)
211243
for cls in cls_list:
212244
# Get frames for this ClassificationAnnotation (from cls or root)
213245
cls_frames = self._get_nested_frames(cls, [], root_frames)
246+
value_key = self._get_value_key(cls)
214247

215248
if hasattr(cls.value, "answer"):
216249
if isinstance(cls.value.answer, list):
@@ -219,27 +252,78 @@ def _serialize_explicit_classifications(self, classifications: List[Any], root_f
219252
# Get frames for this checklist option (from opt or cls or root)
220253
opt_frames = self._get_nested_frames(opt, cls_frames, root_frames)
221254
answer = {"name": opt.name, "frames": opt_frames}
222-
# Recursively handle deeper nesting
255+
# Collect nested for recursion
256+
opt_nested = []
223257
if hasattr(opt, 'classifications') and opt.classifications:
224-
answer["classifications"] = self._serialize_explicit_classifications(opt.classifications, root_frames)
225-
answers.append(answer)
258+
opt_nested = opt.classifications
259+
if opt_nested:
260+
answer["classifications"] = self._serialize_explicit_classifications(opt_nested, root_frames)
261+
# Note: Checklist options don't need de-duplication
262+
# (they're already handled at the parent level)
263+
if value_key not in seen_values:
264+
seen_values[value_key] = []
265+
seen_values[value_key].append(answer)
226266
elif hasattr(cls.value.answer, "name"):
227-
# Radio
267+
# Radio - de-duplicate by name
228268
opt = cls.value.answer
269+
# Check if this answer has explicit frames
270+
has_explicit_frames = (hasattr(opt, 'start_frame') and opt.start_frame is not None and
271+
hasattr(opt, 'end_frame') and opt.end_frame is not None)
229272
# Get frames for this radio answer (from opt or cls or root)
230273
opt_frames = self._get_nested_frames(opt, cls_frames, root_frames)
231-
answer = {"name": opt.name, "frames": opt_frames}
232-
# Recursively handle deeper nesting
233-
if hasattr(opt, 'classifications') and opt.classifications:
234-
answer["classifications"] = self._serialize_explicit_classifications(opt.classifications, root_frames)
235-
answers.append(answer)
274+
275+
# Check if we've already seen this answer name
276+
if value_key in seen_values:
277+
# Only merge frames if both have explicit frames, or neither does
278+
existing_has_explicit = seen_values[value_key].get("_has_explicit", False)
279+
if has_explicit_frames and existing_has_explicit:
280+
# Both explicit - merge
281+
seen_values[value_key]["frames"].extend(opt_frames)
282+
elif has_explicit_frames and not existing_has_explicit:
283+
# Current is explicit, existing is implicit - replace with explicit
284+
seen_values[value_key]["frames"] = opt_frames
285+
seen_values[value_key]["_has_explicit"] = True
286+
elif not has_explicit_frames and existing_has_explicit:
287+
# Current is implicit, existing is explicit - keep existing (don't merge)
288+
pass
289+
else:
290+
# Both implicit - merge
291+
seen_values[value_key]["frames"].extend(opt_frames)
292+
293+
# Always merge nested classifications
294+
if hasattr(opt, 'classifications') and opt.classifications:
295+
seen_values[value_key]["_nested"].extend(opt.classifications)
296+
else:
297+
answer = {"name": opt.name, "frames": opt_frames, "_nested": [], "_has_explicit": has_explicit_frames}
298+
if hasattr(opt, 'classifications') and opt.classifications:
299+
answer["_nested"] = list(opt.classifications)
300+
seen_values[value_key] = answer
236301
else:
237302
# Text - check for annotation-level nesting
238303
answer = {"value": cls.value.answer, "frames": cls_frames}
239-
# Recursively handle deeper nesting at ClassificationAnnotation level
304+
# Collect nested
305+
text_nested = []
240306
if hasattr(cls, 'classifications') and cls.classifications:
241-
answer["classifications"] = self._serialize_explicit_classifications(cls.classifications, root_frames)
242-
answers.append(answer)
307+
text_nested = cls.classifications
308+
if text_nested:
309+
answer["classifications"] = self._serialize_explicit_classifications(text_nested, root_frames)
310+
if value_key not in seen_values:
311+
seen_values[value_key] = []
312+
seen_values[value_key].append(answer)
313+
314+
# Convert seen_values to answers list
315+
answers = []
316+
for value_key, value_data in seen_values.items():
317+
if isinstance(value_data, list):
318+
answers.extend(value_data)
319+
else:
320+
# Radio case - handle nested classifications
321+
if value_data.get("_nested"):
322+
value_data["classifications"] = self._serialize_explicit_classifications(value_data["_nested"], root_frames)
323+
# Clean up internal fields
324+
value_data.pop("_nested", None)
325+
value_data.pop("_has_explicit", None)
326+
answers.append(value_data)
243327

244328
result.append({
245329
"name": display_name,

libs/labelbox/tests/data/serialization/ndjson/test_audio.py

Lines changed: 11 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -315,15 +315,15 @@ def test_audio_nested_text_radio_checklist_structure():
315315
radio_nd = next(obj for obj in ndjson if obj["name"] == "radio_class")
316316

317317
# Check first_radio_answer
318-
# Note: The two annotation segments have different nested structures, so they create separate answer entries
318+
# Note: Segments with the same answer value are merged (both segments have "first_radio_answer")
319319
first_radios = [
320320
a for a in radio_nd["answer"] if a["name"] == "first_radio_answer"
321321
]
322-
# We get only first segment (200-1500) because second segment has different nested structure
323-
assert len(first_radios) >= 1
322+
# We get one merged answer with both frame ranges
323+
assert len(first_radios) == 1
324324
first_radio = first_radios[0]
325-
# First segment frames
326-
assert first_radio["frames"] == [{"start": 200, "end": 1500}]
325+
# Merged frames from both segments: [200-1500] and [2000-2500]
326+
assert first_radio["frames"] == [{"start": 200, "end": 1500}, {"start": 2000, "end": 2500}]
327327

328328
# Check explicit nested sub_radio_question
329329
assert "classifications" in first_radio
@@ -363,16 +363,16 @@ def test_audio_nested_text_radio_checklist_structure():
363363
)
364364

365365
# Check first_checklist_option
366-
# Note: segments with different nested structures don't merge
366+
# Note: segments with the same answer value are merged
367367
first_opts = [
368368
a
369369
for a in checklist_nd["answer"]
370370
if a["name"] == "first_checklist_option"
371371
]
372-
assert len(first_opts) >= 1
372+
assert len(first_opts) == 1
373373
first_opt = first_opts[0]
374-
# First segment frames
375-
assert first_opt["frames"] == [{"start": 300, "end": 800}]
374+
# Merged frames from both segments: [300-800] and [1200-1800]
375+
assert first_opt["frames"] == [{"start": 300, "end": 800}, {"start": 1200, "end": 1800}]
376376

377377
# Check explicit nested_checklist
378378
assert "classifications" in first_opt
@@ -382,8 +382,8 @@ def test_audio_nested_text_radio_checklist_structure():
382382
if c["name"] == "nested_checklist"
383383
)
384384

385-
# Check nested_checklist has nested_option_1 from first segment
386-
assert len(nested_checklist["answer"]) >= 1
385+
# Check nested_checklist has all 3 options (nested_option_1, 2, 3) from both segments
386+
assert len(nested_checklist["answer"]) == 3
387387

388388
# Check nested_option_1 with specific frame range
389389
opt1 = next(

0 commit comments

Comments
 (0)