@@ -93,15 +93,15 @@ def group_by_value(self, annotations: List[TemporalAnnotation]) -> List[Dict[str
93
93
94
94
entries = []
95
95
for _ , anns in value_buckets .items ():
96
- first = anns [0 ]
97
96
# Extract frames from each annotation (root frames)
98
97
frames = [self .frame_extractor (a ) for a in anns ]
99
98
frame_dicts = [{"start" : start , "end" : end } for start , end in frames ]
100
99
101
- # Get root frames for passing to nested classifications
100
+ # Get root frames for passing to nested classifications (use first annotation's frames)
102
101
root_frames = frames [0 ] if frames else (None , None )
103
102
104
- entry = self ._create_answer_entry (first , frame_dicts , root_frames )
103
+ # Pass ALL annotations so we can merge their nested classifications
104
+ entry = self ._create_answer_entry (anns , frame_dicts , root_frames )
105
105
entries .append (entry )
106
106
107
107
return entries
@@ -138,49 +138,80 @@ def _get_nested_frames(self, obj: Any, parent_frames: List[Dict[str, int]], root
138
138
# Use explicitly specified frames
139
139
return [{"start" : obj .start_frame , "end" : obj .end_frame }]
140
140
else :
141
- # Default to root frames
142
- if root_frames and root_frames [0 ] is not None and root_frames [1 ] is not None :
141
+ # Default to parent frames first, then root frames
142
+ if parent_frames :
143
+ return parent_frames
144
+ elif root_frames and root_frames [0 ] is not None and root_frames [1 ] is not None :
143
145
return [{"start" : root_frames [0 ], "end" : root_frames [1 ]}]
144
146
else :
145
- # Fall back to parent frames if root not available
146
- return parent_frames
147
+ return []
147
148
148
- def _create_answer_entry (self , first_ann : TemporalAnnotation , frames : List [Dict [str , int ]], root_frames : Tuple [int , int ]) -> Dict [str , Any ]:
149
- """Create an answer entry from the first annotation and frames .
149
+ def _create_answer_entry (self , anns : List [ TemporalAnnotation ] , frames : List [Dict [str , int ]], root_frames : Tuple [int , int ]) -> Dict [str , Any ]:
150
+ """Create an answer entry from all annotations with the same value, merging their nested classifications .
150
151
151
152
Args:
152
- first_ann: The first annotation in the value group
153
+ anns: All annotations in the value group
153
154
frames: List of frame dictionaries for this answer
154
155
root_frames: Tuple of (start, end) from the root AudioClassificationAnnotation
155
156
"""
157
+ first_ann = anns [0 ]
158
+
156
159
if hasattr (first_ann .value , "answer" ) and isinstance (first_ann .value .answer , list ):
157
- # Checklist: emit one entry per distinct option present in this bucket
160
+ # Checklist: emit one entry per distinct option present across ALL annotations
161
+ # First, collect all unique option names across all annotations
162
+ all_option_names = set ()
163
+ for ann in anns :
164
+ if hasattr (ann .value , "answer" ) and isinstance (ann .value .answer , list ):
165
+ for opt in ann .value .answer :
166
+ all_option_names .add (opt .name )
167
+
158
168
entries = []
159
- for opt in first_ann .value .answer :
160
- # Get frames for this specific checklist option (from opt or parent)
161
- opt_frames = self ._get_nested_frames (opt , frames , root_frames )
162
- entry = {"name" : opt .name , "frames" : opt_frames }
163
- # Handle explicit nesting for this checklist option
164
- if hasattr (opt , 'classifications' ) and opt .classifications :
165
- entry ["classifications" ] = self ._serialize_explicit_classifications (opt .classifications , root_frames )
169
+ for opt_name in sorted (all_option_names ): # Sort for consistent ordering
170
+ # For each unique option, collect frames and nested classifications from all annotations
171
+ opt_frames = []
172
+ all_nested = []
173
+ for ann in anns :
174
+ if hasattr (ann .value , "answer" ) and isinstance (ann .value .answer , list ):
175
+ for ann_opt in ann .value .answer :
176
+ if ann_opt .name == opt_name :
177
+ # Get this annotation's root frame range
178
+ ann_start , ann_end = self .frame_extractor (ann )
179
+ ann_frame_dict = [{"start" : ann_start , "end" : ann_end }]
180
+ # Collect this option's frame range (from option or parent annotation)
181
+ frames_for_this_opt = self ._get_nested_frames (ann_opt , ann_frame_dict , root_frames )
182
+ opt_frames .extend (frames_for_this_opt )
183
+ # Collect nested classifications
184
+ if hasattr (ann_opt , 'classifications' ) and ann_opt .classifications :
185
+ all_nested .extend (ann_opt .classifications )
186
+
187
+ entry = {"name" : opt_name , "frames" : opt_frames }
188
+ if all_nested :
189
+ entry ["classifications" ] = self ._serialize_explicit_classifications (all_nested , root_frames )
166
190
entries .append (entry )
167
191
return entries [0 ] if len (entries ) == 1 else {"options" : entries , "frames" : frames }
168
192
elif hasattr (first_ann .value , "answer" ) and hasattr (first_ann .value .answer , "name" ):
169
193
# Radio
170
194
opt = first_ann .value .answer
171
- # Get frames for this radio answer (from answer or parent)
172
- opt_frames = self ._get_nested_frames (opt , frames , root_frames )
173
- entry = {"name" : opt .name , "frames" : opt_frames }
174
- # Handle explicit nesting via ClassificationAnswer.classifications
175
- if hasattr (opt , 'classifications' ) and opt .classifications :
176
- entry ["classifications" ] = self ._serialize_explicit_classifications (opt .classifications , root_frames )
195
+ # Use the merged frames from all annotations (already passed in)
196
+ entry = {"name" : opt .name , "frames" : frames }
197
+ # Collect nested classifications from all annotations
198
+ all_nested = []
199
+ for ann in anns :
200
+ if hasattr (ann .value , "answer" ) and hasattr (ann .value .answer , "classifications" ) and ann .value .answer .classifications :
201
+ all_nested .extend (ann .value .answer .classifications )
202
+ if all_nested :
203
+ entry ["classifications" ] = self ._serialize_explicit_classifications (all_nested , root_frames )
177
204
return entry
178
205
else :
179
206
# Text - nesting is at the annotation level, not answer level
180
207
entry = {"value" : first_ann .value .answer , "frames" : frames }
181
- # Handle explicit nesting via AudioClassificationAnnotation.classifications
182
- if hasattr (first_ann , 'classifications' ) and first_ann .classifications :
183
- entry ["classifications" ] = self ._serialize_explicit_classifications (first_ann .classifications , root_frames )
208
+ # Collect nested classifications from all annotations
209
+ all_nested = []
210
+ for ann in anns :
211
+ if hasattr (ann , 'classifications' ) and ann .classifications :
212
+ all_nested .extend (ann .classifications )
213
+ if all_nested :
214
+ entry ["classifications" ] = self ._serialize_explicit_classifications (all_nested , root_frames )
184
215
return entry
185
216
186
217
def _serialize_explicit_classifications (self , classifications : List [Any ], root_frames : Tuple [int , int ]) -> List [Dict [str , Any ]]:
@@ -207,10 +238,12 @@ def _serialize_explicit_classifications(self, classifications: List[Any], root_f
207
238
display_name = cls_list [0 ].name if cls_list [0 ].name else name
208
239
209
240
# Create answer entries for this nested classification
210
- answers = []
241
+ # De-duplicate by answer value
242
+ seen_values = {} # value_key -> (answer_dict, nested_classifications)
211
243
for cls in cls_list :
212
244
# Get frames for this ClassificationAnnotation (from cls or root)
213
245
cls_frames = self ._get_nested_frames (cls , [], root_frames )
246
+ value_key = self ._get_value_key (cls )
214
247
215
248
if hasattr (cls .value , "answer" ):
216
249
if isinstance (cls .value .answer , list ):
@@ -219,27 +252,78 @@ def _serialize_explicit_classifications(self, classifications: List[Any], root_f
219
252
# Get frames for this checklist option (from opt or cls or root)
220
253
opt_frames = self ._get_nested_frames (opt , cls_frames , root_frames )
221
254
answer = {"name" : opt .name , "frames" : opt_frames }
222
- # Recursively handle deeper nesting
255
+ # Collect nested for recursion
256
+ opt_nested = []
223
257
if hasattr (opt , 'classifications' ) and opt .classifications :
224
- answer ["classifications" ] = self ._serialize_explicit_classifications (opt .classifications , root_frames )
225
- answers .append (answer )
258
+ opt_nested = opt .classifications
259
+ if opt_nested :
260
+ answer ["classifications" ] = self ._serialize_explicit_classifications (opt_nested , root_frames )
261
+ # Note: Checklist options don't need de-duplication
262
+ # (they're already handled at the parent level)
263
+ if value_key not in seen_values :
264
+ seen_values [value_key ] = []
265
+ seen_values [value_key ].append (answer )
226
266
elif hasattr (cls .value .answer , "name" ):
227
- # Radio
267
+ # Radio - de-duplicate by name
228
268
opt = cls .value .answer
269
+ # Check if this answer has explicit frames
270
+ has_explicit_frames = (hasattr (opt , 'start_frame' ) and opt .start_frame is not None and
271
+ hasattr (opt , 'end_frame' ) and opt .end_frame is not None )
229
272
# Get frames for this radio answer (from opt or cls or root)
230
273
opt_frames = self ._get_nested_frames (opt , cls_frames , root_frames )
231
- answer = {"name" : opt .name , "frames" : opt_frames }
232
- # Recursively handle deeper nesting
233
- if hasattr (opt , 'classifications' ) and opt .classifications :
234
- answer ["classifications" ] = self ._serialize_explicit_classifications (opt .classifications , root_frames )
235
- answers .append (answer )
274
+
275
+ # Check if we've already seen this answer name
276
+ if value_key in seen_values :
277
+ # Only merge frames if both have explicit frames, or neither does
278
+ existing_has_explicit = seen_values [value_key ].get ("_has_explicit" , False )
279
+ if has_explicit_frames and existing_has_explicit :
280
+ # Both explicit - merge
281
+ seen_values [value_key ]["frames" ].extend (opt_frames )
282
+ elif has_explicit_frames and not existing_has_explicit :
283
+ # Current is explicit, existing is implicit - replace with explicit
284
+ seen_values [value_key ]["frames" ] = opt_frames
285
+ seen_values [value_key ]["_has_explicit" ] = True
286
+ elif not has_explicit_frames and existing_has_explicit :
287
+ # Current is implicit, existing is explicit - keep existing (don't merge)
288
+ pass
289
+ else :
290
+ # Both implicit - merge
291
+ seen_values [value_key ]["frames" ].extend (opt_frames )
292
+
293
+ # Always merge nested classifications
294
+ if hasattr (opt , 'classifications' ) and opt .classifications :
295
+ seen_values [value_key ]["_nested" ].extend (opt .classifications )
296
+ else :
297
+ answer = {"name" : opt .name , "frames" : opt_frames , "_nested" : [], "_has_explicit" : has_explicit_frames }
298
+ if hasattr (opt , 'classifications' ) and opt .classifications :
299
+ answer ["_nested" ] = list (opt .classifications )
300
+ seen_values [value_key ] = answer
236
301
else :
237
302
# Text - check for annotation-level nesting
238
303
answer = {"value" : cls .value .answer , "frames" : cls_frames }
239
- # Recursively handle deeper nesting at ClassificationAnnotation level
304
+ # Collect nested
305
+ text_nested = []
240
306
if hasattr (cls , 'classifications' ) and cls .classifications :
241
- answer ["classifications" ] = self ._serialize_explicit_classifications (cls .classifications , root_frames )
242
- answers .append (answer )
307
+ text_nested = cls .classifications
308
+ if text_nested :
309
+ answer ["classifications" ] = self ._serialize_explicit_classifications (text_nested , root_frames )
310
+ if value_key not in seen_values :
311
+ seen_values [value_key ] = []
312
+ seen_values [value_key ].append (answer )
313
+
314
+ # Convert seen_values to answers list
315
+ answers = []
316
+ for value_key , value_data in seen_values .items ():
317
+ if isinstance (value_data , list ):
318
+ answers .extend (value_data )
319
+ else :
320
+ # Radio case - handle nested classifications
321
+ if value_data .get ("_nested" ):
322
+ value_data ["classifications" ] = self ._serialize_explicit_classifications (value_data ["_nested" ], root_frames )
323
+ # Clean up internal fields
324
+ value_data .pop ("_nested" , None )
325
+ value_data .pop ("_has_explicit" , None )
326
+ answers .append (value_data )
243
327
244
328
result .append ({
245
329
"name" : display_name ,
0 commit comments