From 64e7abf134bae04da90fd53012d28bf197e7c2af Mon Sep 17 00:00:00 2001 From: Dean Date: Mon, 9 Feb 2026 17:27:08 +0200 Subject: [PATCH 01/21] initial commite --- dagshub/data_engine/annotation/importer.py | 97 +++++++- dagshub/data_engine/annotation/metadata.py | 27 +++ dagshub/data_engine/model/query_result.py | 170 +++++++++++++- .../annotation_import/test_coco.py | 198 +++++++++++++++++ .../annotation_import/test_cvat_video.py | 156 +++++++++++++ .../data_engine/annotation_import/test_mot.py | 210 ++++++++++++++++++ 6 files changed, 845 insertions(+), 13 deletions(-) create mode 100644 tests/data_engine/annotation_import/test_coco.py create mode 100644 tests/data_engine/annotation_import/test_cvat_video.py create mode 100644 tests/data_engine/annotation_import/test_mot.py diff --git a/dagshub/data_engine/annotation/importer.py b/dagshub/data_engine/annotation/importer.py index c19212de..8ddf367e 100644 --- a/dagshub/data_engine/annotation/importer.py +++ b/dagshub/data_engine/annotation/importer.py @@ -1,13 +1,20 @@ from difflib import SequenceMatcher from pathlib import Path, PurePosixPath, PurePath from tempfile import TemporaryDirectory -from typing import TYPE_CHECKING, Literal, Optional, Union, Sequence, Mapping, Callable, List - -from dagshub_annotation_converter.converters.cvat import load_cvat_from_zip +from typing import TYPE_CHECKING, Dict, Literal, Optional, Union, Sequence, Mapping, Callable, List + +from dagshub_annotation_converter.converters.coco import load_coco_from_file +from dagshub_annotation_converter.converters.cvat import ( + load_cvat_from_zip, + load_cvat_from_xml_file, +) +from dagshub_annotation_converter.converters.mot import load_mot_from_dir, load_mot_from_zip from dagshub_annotation_converter.converters.yolo import load_yolo_from_fs +from dagshub_annotation_converter.converters.label_studio_video import video_ir_to_ls_video_tasks from dagshub_annotation_converter.formats.label_studio.task import LabelStudioTask from dagshub_annotation_converter.formats.yolo import YoloContext from dagshub_annotation_converter.ir.image.annotations.base import IRAnnotationBase +from dagshub_annotation_converter.ir.video import IRVideoBBoxAnnotation from dagshub.common.api import UserAPI from dagshub.common.api.repo import PathNotFoundError @@ -16,7 +23,7 @@ if TYPE_CHECKING: from dagshub.data_engine.model.datasource import Datasource -AnnotationType = Literal["yolo", "cvat"] +AnnotationType = Literal["yolo", "cvat", "coco", "mot", "cvat_video"] AnnotationLocation = Literal["repo", "disk"] @@ -57,6 +64,10 @@ def __init__( 'Add `yolo_type="bbox"|"segmentation"|pose"` to the arguments.' ) + @property + def is_video_format(self) -> bool: + return self.annotations_type in ("mot", "cvat_video") + def import_annotations(self) -> Mapping[str, Sequence[IRAnnotationBase]]: # Double check that the annotation file exists if self.load_from == "disk": @@ -84,15 +95,66 @@ def import_annotations(self) -> Mapping[str, Sequence[IRAnnotationBase]]: annotation_type=self.additional_args["yolo_type"], meta_file=annotations_file ) elif self.annotations_type == "cvat": - annotation_dict = load_cvat_from_zip(annotations_file) + result = load_cvat_from_zip(annotations_file) + if self._is_video_annotation_dict(result): + annotation_dict = self._flatten_video_annotations(result) + else: + annotation_dict = result + elif self.annotations_type == "coco": + annotation_dict, _ = load_coco_from_file(annotations_file) + elif self.annotations_type == "mot": + mot_kwargs = {} + if "image_width" in self.additional_args: + mot_kwargs["image_width"] = self.additional_args["image_width"] + if "image_height" in self.additional_args: + mot_kwargs["image_height"] = self.additional_args["image_height"] + if annotations_file.suffix == ".zip": + video_anns, _ = load_mot_from_zip(annotations_file, **mot_kwargs) + else: + video_anns, _ = load_mot_from_dir(annotations_file, **mot_kwargs) + annotation_dict = self._flatten_video_annotations(video_anns) + elif self.annotations_type == "cvat_video": + cvat_kwargs = {} + if "image_width" in self.additional_args: + cvat_kwargs["image_width"] = self.additional_args["image_width"] + if "image_height" in self.additional_args: + cvat_kwargs["image_height"] = self.additional_args["image_height"] + if annotations_file.suffix == ".zip": + result = load_cvat_from_zip(annotations_file, **cvat_kwargs) + else: + result = load_cvat_from_xml_file(annotations_file, **cvat_kwargs) + if self._is_video_annotation_dict(result): + annotation_dict = self._flatten_video_annotations(result) + else: + annotation_dict = result + else: + raise ValueError(f"Unsupported annotation type: {self.annotations_type}") return annotation_dict + @staticmethod + def _is_video_annotation_dict(result) -> bool: + """Check if the result from a CVAT loader is video annotations (int keys) vs image annotations (str keys).""" + if not isinstance(result, dict) or len(result) == 0: + return False + first_key = next(iter(result.keys())) + return isinstance(first_key, int) + + def _flatten_video_annotations( + self, + frame_annotations: Dict[int, Sequence[IRAnnotationBase]], + ) -> Dict[str, Sequence[IRAnnotationBase]]: + """Flatten frame-indexed video annotations into a single entry keyed by video name.""" + video_name = self.additional_args.get("video_name", self.annotations_file.stem) + all_anns: List[IRAnnotationBase] = [] + for frame_anns in frame_annotations.values(): + all_anns.extend(frame_anns) + return {video_name: all_anns} + def download_annotations(self, dest_dir: Path): log_message("Downloading annotations from repository") repoApi = self.ds.source.repoApi - if self.annotations_type == "cvat": - # Download just the annotation file + if self.annotations_type in ("cvat", "cvat_video"): repoApi.download(self.annotations_file.as_posix(), dest_dir, keep_source_prefix=True) elif self.annotations_type == "yolo": # Download the dataset .yaml file and the images + annotations @@ -104,6 +166,8 @@ def download_annotations(self, dest_dir: Path): # Download the annotation data assert context.path is not None repoApi.download(self.annotations_file.parent / context.path, dest_dir, keep_source_prefix=True) + elif self.annotations_type in ("coco", "mot"): + repoApi.download(self.annotations_file.as_posix(), dest_dir, keep_source_prefix=True) @staticmethod def determine_load_location(ds: "Datasource", annotations_path: Union[str, Path]) -> AnnotationLocation: @@ -288,6 +352,8 @@ def convert_to_ls_tasks(self, annotations: Mapping[str, Sequence[IRAnnotationBas """ Converts the annotations to Label Studio tasks. """ + if self.is_video_format: + return self._convert_to_ls_video_tasks(annotations) current_user_id = UserAPI.get_current_user(self.ds.source.repoApi.host).user_id tasks = {} for filename, anns in annotations.items(): @@ -296,3 +362,20 @@ def convert_to_ls_tasks(self, annotations: Mapping[str, Sequence[IRAnnotationBas t.add_ir_annotations(anns) tasks[filename] = t.model_dump_json().encode("utf-8") return tasks + + def _convert_to_ls_video_tasks( + self, annotations: Mapping[str, Sequence[IRAnnotationBase]] + ) -> Mapping[str, bytes]: + """ + Converts video annotations to Label Studio video tasks. + """ + tasks = {} + for filename, anns in annotations.items(): + video_anns = [a for a in anns if isinstance(a, IRVideoBBoxAnnotation)] + if not video_anns: + continue + video_path = self.ds.source.raw_path(filename) + ls_tasks = video_ir_to_ls_video_tasks(video_anns, video_path=video_path) + if ls_tasks: + tasks[filename] = ls_tasks[0].model_dump_json().encode("utf-8") + return tasks diff --git a/dagshub/data_engine/annotation/metadata.py b/dagshub/data_engine/annotation/metadata.py index 8b5d632c..0fb5bc6b 100644 --- a/dagshub/data_engine/annotation/metadata.py +++ b/dagshub/data_engine/annotation/metadata.py @@ -20,6 +20,11 @@ from dagshub.data_engine.model.datapoint import Datapoint import ultralytics.engine.results +from dagshub_annotation_converter.formats.label_studio.videorectangle import VideoRectangleAnnotation +from dagshub_annotation_converter.formats.label_studio.task import task_lookup as _task_lookup + +_task_lookup["videorectangle"] = VideoRectangleAnnotation + class AnnotationMetaDict(dict): def __init__(self, annotation: "MetadataAnnotations", *args, **kwargs): @@ -269,6 +274,28 @@ def add_image_pose( self.annotations.append(ann) self._update_datapoint() + def add_coco_annotation( + self, + coco_json: str, + ): + """ + Add annotations from a COCO-format JSON string. + + Args: + coco_json: A COCO-format JSON string with ``categories``, ``images``, and ``annotations`` keys. + """ + from dagshub_annotation_converter.converters.coco import load_coco_from_json_string + + grouped, _ = load_coco_from_json_string(coco_json) + new_anns: list[IRAnnotationBase] = [] + for anns in grouped.values(): + for ann in anns: + ann.filename = self.datapoint.path + new_anns.append(ann) + self.annotations.extend(new_anns) + log_message(f"Added {len(new_anns)} COCO annotation(s) to datapoint {self.datapoint.path}") + self._update_datapoint() + def add_yolo_annotation( self, annotation_type: Literal["bbox", "segmentation", "pose"], diff --git a/dagshub/data_engine/model/query_result.py b/dagshub/data_engine/model/query_result.py index 6c326eab..7ff08699 100644 --- a/dagshub/data_engine/model/query_result.py +++ b/dagshub/data_engine/model/query_result.py @@ -15,10 +15,16 @@ import dacite import dagshub_annotation_converter.converters.yolo import rich.progress +from dagshub_annotation_converter.converters.coco import export_to_coco_file +from dagshub_annotation_converter.converters.cvat import export_cvat_video_to_file +from dagshub_annotation_converter.converters.mot import export_mot_to_dir +from dagshub_annotation_converter.formats.coco import CocoContext +from dagshub_annotation_converter.formats.mot import MOTContext from dagshub_annotation_converter.formats.yolo import YoloContext from dagshub_annotation_converter.formats.yolo.categories import Categories from dagshub_annotation_converter.formats.yolo.common import ir_mapping from dagshub_annotation_converter.ir.image import IRImageAnnotationBase +from dagshub_annotation_converter.ir.video import IRVideoBBoxAnnotation from pydantic import ValidationError from dagshub.auth import get_token @@ -760,6 +766,20 @@ def _get_all_annotations(self, annotation_field: str) -> List[IRImageAnnotationB annotations.extend(dp.metadata[annotation_field].annotations) return annotations + def _get_all_video_annotations(self, annotation_field: str) -> List[IRVideoBBoxAnnotation]: + all_anns = self._get_all_annotations(annotation_field) + return [a for a in all_anns if isinstance(a, IRVideoBBoxAnnotation)] + + def _resolve_annotation_field(self, annotation_field: Optional[str]) -> str: + if annotation_field is not None: + return annotation_field + annotation_fields = sorted([f.name for f in self.fields if f.is_annotation()]) + if len(annotation_fields) == 0: + raise ValueError("No annotation fields found in the datasource") + annotation_field = annotation_fields[0] + log_message(f"Using annotations from field {annotation_field}") + return annotation_field + def export_as_yolo( self, download_dir: Optional[Union[str, Path]] = None, @@ -785,12 +805,7 @@ def export_as_yolo( Returns: The path to the YAML file with the metadata. Pass this path to ``YOLO.train()`` to train a model. """ - if annotation_field is None: - annotation_fields = sorted([f.name for f in self.fields if f.is_annotation()]) - if len(annotation_fields) == 0: - raise ValueError("No annotation fields found in the datasource") - annotation_field = annotation_fields[0] - log_message(f"Using annotations from field {annotation_field}") + annotation_field = self._resolve_annotation_field(annotation_field) if download_dir is None: download_dir = Path("dagshub_export") @@ -843,6 +858,149 @@ def export_as_yolo( log_message(f"Done! Saved YOLO Dataset, YAML file is at {yaml_path.absolute()}") return yaml_path + def export_as_coco( + self, + download_dir: Optional[Union[str, Path]] = None, + annotation_field: Optional[str] = None, + output_filename: str = "annotations.json", + classes: Optional[Dict[int, str]] = None, + ) -> Path: + """ + Downloads the files and exports annotations in COCO format. + + Args: + download_dir: Where to download the files. Defaults to ``./dagshub_export`` + annotation_field: Field with the annotations. If None, uses the first alphabetical annotation field. + output_filename: Name of the output COCO JSON file. Default is ``annotations.json``. + classes: Category mapping for the COCO dataset as ``{id: name}``. + If ``None``, categories will be inferred from the annotations. + + Returns: + Path to the exported COCO JSON file. + """ + annotation_field = self._resolve_annotation_field(annotation_field) + + if download_dir is None: + download_dir = Path("dagshub_export") + download_dir = Path(download_dir) + + annotations = self._get_all_annotations(annotation_field) + if not annotations: + raise RuntimeError("No annotations found to export") + + context = CocoContext() + if classes is not None: + context.categories = dict(classes) + + # Add the source prefix to all annotations + for ann in annotations: + ann.filename = os.path.join(self.datasource.source.source_prefix, ann.filename) + + image_download_path = download_dir / "data" + log_message("Downloading image files...") + self.download_files(image_download_path) + + output_path = download_dir / output_filename + log_message("Exporting COCO annotations...") + result_path = export_to_coco_file(annotations, output_path, context=context) + log_message(f"Done! Saved COCO annotations to {result_path.absolute()}") + return result_path + + def export_as_mot( + self, + download_dir: Optional[Union[str, Path]] = None, + annotation_field: Optional[str] = None, + image_width: Optional[int] = None, + image_height: Optional[int] = None, + ) -> Path: + """ + Exports video annotations in MOT (Multiple Object Tracking) format. + + The output follows the MOT Challenge directory structure:: + + output_dir/ + gt/ + gt.txt + labels.txt + seqinfo.ini + + Args: + download_dir: Where to export. Defaults to ``./dagshub_export`` + annotation_field: Field with the annotations. If None, uses the first alphabetical annotation field. + image_width: Frame width. If None, inferred from annotations. + image_height: Frame height. If None, inferred from annotations. + + Returns: + Path to the exported MOT directory. + """ + annotation_field = self._resolve_annotation_field(annotation_field) + + if download_dir is None: + download_dir = Path("dagshub_export") + download_dir = Path(download_dir) / "mot" + + video_annotations = self._get_all_video_annotations(annotation_field) + if not video_annotations: + raise RuntimeError("No video annotations found to export") + + context = MOTContext() + if image_width is not None: + context.image_width = image_width + elif video_annotations: + context.image_width = video_annotations[0].image_width + if image_height is not None: + context.image_height = image_height + elif video_annotations: + context.image_height = video_annotations[0].image_height + + log_message("Exporting MOT annotations...") + result_path = export_mot_to_dir(video_annotations, context, download_dir) + log_message(f"Done! Saved MOT annotations to {result_path.absolute()}") + return result_path + + def export_as_cvat_video( + self, + download_dir: Optional[Union[str, Path]] = None, + annotation_field: Optional[str] = None, + video_name: str = "video.mp4", + image_width: Optional[int] = None, + image_height: Optional[int] = None, + ) -> Path: + """ + Exports video annotations in CVAT video XML format. + + Args: + download_dir: Where to export. Defaults to ``./dagshub_export`` + annotation_field: Field with the annotations. If None, uses the first alphabetical annotation field. + video_name: Name of the source video to embed in the XML metadata. + image_width: Frame width. If None, inferred from annotations. + image_height: Frame height. If None, inferred from annotations. + + Returns: + Path to the exported CVAT video XML file. + """ + annotation_field = self._resolve_annotation_field(annotation_field) + + if download_dir is None: + download_dir = Path("dagshub_export") + download_dir = Path(download_dir) + + video_annotations = self._get_all_video_annotations(annotation_field) + if not video_annotations: + raise RuntimeError("No video annotations found to export") + + output_path = download_dir / "annotations.xml" + log_message("Exporting CVAT video annotations...") + result_path = export_cvat_video_to_file( + video_annotations, + output_path, + video_name=video_name, + image_width=image_width, + image_height=image_height, + ) + log_message(f"Done! Saved CVAT video annotations to {result_path.absolute()}") + return result_path + def to_voxel51_dataset(self, **kwargs) -> "fo.Dataset": """ Creates a voxel51 dataset that can be used with\ diff --git a/tests/data_engine/annotation_import/test_coco.py b/tests/data_engine/annotation_import/test_coco.py new file mode 100644 index 00000000..38180811 --- /dev/null +++ b/tests/data_engine/annotation_import/test_coco.py @@ -0,0 +1,198 @@ +import datetime +import json +from pathlib import PurePosixPath +from unittest.mock import patch, PropertyMock + +import pytest +from dagshub_annotation_converter.ir.image import ( + IRBBoxImageAnnotation, + IRSegmentationImageAnnotation, + IRSegmentationPoint, + CoordinateStyle, +) + +from dagshub.data_engine.annotation.importer import AnnotationImporter +from dagshub.data_engine.annotation.metadata import MetadataAnnotations +from dagshub.data_engine.client.models import MetadataSelectFieldSchema +from dagshub.data_engine.dtypes import MetadataFieldType, ReservedTags +from dagshub.data_engine.model.datapoint import Datapoint +from dagshub.data_engine.model.query_result import QueryResult + + +@pytest.fixture(autouse=True) +def mock_source_prefix(ds): + with patch.object(type(ds.source), "source_prefix", new_callable=PropertyMock, return_value=PurePosixPath()): + yield + + +# --- COCO import --- + + +def test_import_coco_from_file(ds, tmp_path): + coco_file = tmp_path / "annotations.json" + coco_file.write_text(json.dumps(_make_coco_json())) + + importer = AnnotationImporter(ds, "coco", coco_file, load_from="disk") + result = importer.import_annotations() + + assert "image1.jpg" in result + assert len(result["image1.jpg"]) == 1 + assert isinstance(result["image1.jpg"][0], IRBBoxImageAnnotation) + + +def test_convert_image_to_ls_tasks(ds, tmp_path, mock_dagshub_auth): + importer = AnnotationImporter(ds, "coco", tmp_path / "ann.json", load_from="disk") + bbox = IRBBoxImageAnnotation( + filename="test.jpg", + categories={"cat": 1.0}, + top=0.1, left=0.1, width=0.2, height=0.2, + image_width=640, image_height=480, + coordinate_style=CoordinateStyle.NORMALIZED, + ) + + tasks = importer.convert_to_ls_tasks({"test.jpg": [bbox]}) + + assert "test.jpg" in tasks + task_json = json.loads(tasks["test.jpg"]) + assert "annotations" in task_json + + +# --- add_coco_annotation --- + + +def test_add_coco_annotation(ds, mock_dagshub_auth): + dp = Datapoint(datasource=ds, path="test.jpg", datapoint_id=0, metadata={}) + meta_ann = MetadataAnnotations(datapoint=dp, field="ann") + meta_ann.add_coco_annotation(json.dumps(_make_coco_json())) + + assert len(meta_ann.annotations) == 1 + assert isinstance(meta_ann.annotations[0], IRBBoxImageAnnotation) + assert meta_ann.annotations[0].filename == "test.jpg" + + +def test_add_coco_annotation_segmentation(ds, mock_dagshub_auth): + dp = Datapoint(datasource=ds, path="test.jpg", datapoint_id=0, metadata={}) + coco = { + "categories": [{"id": 1, "name": "dog"}], + "images": [{"id": 1, "width": 640, "height": 480, "file_name": "img.jpg"}], + "annotations": [ + {"id": 1, "image_id": 1, "category_id": 1, "segmentation": [[10, 20, 30, 40, 50, 60]]} + ], + } + meta_ann = MetadataAnnotations(datapoint=dp, field="ann") + meta_ann.add_coco_annotation(json.dumps(coco)) + + assert len(meta_ann.annotations) == 1 + + +# --- _resolve_annotation_field --- + + +def test_resolve_explicit(ds): + qr = _make_qr(ds, [], ann_field="my_ann") + assert qr._resolve_annotation_field("explicit") == "explicit" + + +def test_resolve_auto(ds): + qr = _make_qr(ds, [], ann_field="my_ann") + assert qr._resolve_annotation_field(None) == "my_ann" + + +def test_resolve_no_fields(ds): + qr = _make_qr(ds, [], ann_field=None) + with pytest.raises(ValueError, match="No annotation fields"): + qr._resolve_annotation_field(None) + + +# --- export_as_coco --- + + +def test_export_as_coco_bbox(ds, tmp_path): + dp = Datapoint(datasource=ds, path="images/test.jpg", datapoint_id=0, metadata={}) + ann = IRBBoxImageAnnotation( + filename="images/test.jpg", categories={"cat": 1.0}, + top=20.0, left=10.0, width=30.0, height=40.0, + image_width=640, image_height=480, + coordinate_style=CoordinateStyle.DENORMALIZED, + ) + dp.metadata["ann"] = MetadataAnnotations(datapoint=dp, field="ann", annotations=[ann]) + + qr = _make_qr(ds, [dp], ann_field="ann") + with patch.object(qr, "download_files"): + result = qr.export_as_coco(download_dir=tmp_path, annotation_field="ann") + + assert result.exists() + coco = json.loads(result.read_text()) + assert len(coco["annotations"]) == 1 + assert len(coco["images"]) == 1 + assert coco["annotations"][0]["bbox"] == [10.0, 20.0, 30.0, 40.0] + + +def test_export_as_coco_segmentation(ds, tmp_path): + dp = Datapoint(datasource=ds, path="images/test.jpg", datapoint_id=0, metadata={}) + ann = IRSegmentationImageAnnotation( + filename="images/test.jpg", categories={"dog": 1.0}, + points=[IRSegmentationPoint(x=10, y=20), IRSegmentationPoint(x=30, y=40), IRSegmentationPoint(x=50, y=60)], + image_width=640, image_height=480, + coordinate_style=CoordinateStyle.DENORMALIZED, + ) + dp.metadata["ann"] = MetadataAnnotations(datapoint=dp, field="ann", annotations=[ann]) + + qr = _make_qr(ds, [dp], ann_field="ann") + with patch.object(qr, "download_files"): + result = qr.export_as_coco(download_dir=tmp_path, annotation_field="ann") + + coco = json.loads(result.read_text()) + assert len(coco["annotations"]) == 1 + assert "segmentation" in coco["annotations"][0] + + +def test_export_as_coco_no_annotations(ds, tmp_path): + dp = Datapoint(datasource=ds, path="test.jpg", datapoint_id=0, metadata={}) + dp.metadata["ann"] = MetadataAnnotations(datapoint=dp, field="ann", annotations=[]) + + qr = _make_qr(ds, [dp], ann_field="ann") + with pytest.raises(RuntimeError, match="No annotations found"): + qr.export_as_coco(download_dir=tmp_path, annotation_field="ann") + + +def test_export_as_coco_with_classes(ds, tmp_path): + dp = Datapoint(datasource=ds, path="images/test.jpg", datapoint_id=0, metadata={}) + ann = IRBBoxImageAnnotation( + filename="images/test.jpg", categories={"cat": 1.0}, + top=20.0, left=10.0, width=30.0, height=40.0, + image_width=640, image_height=480, + coordinate_style=CoordinateStyle.DENORMALIZED, + ) + dp.metadata["ann"] = MetadataAnnotations(datapoint=dp, field="ann", annotations=[ann]) + + qr = _make_qr(ds, [dp], ann_field="ann") + with patch.object(qr, "download_files"): + result = qr.export_as_coco(download_dir=tmp_path, annotation_field="ann", classes={1: "cat", 2: "dog"}) + + coco = json.loads(result.read_text()) + cat_names = {c["name"] for c in coco["categories"]} + assert "cat" in cat_names + + +# --- Helpers --- + + +def _make_coco_json(): + return { + "categories": [{"id": 1, "name": "cat"}], + "images": [{"id": 1, "width": 640, "height": 480, "file_name": "image1.jpg"}], + "annotations": [{"id": 1, "image_id": 1, "category_id": 1, "bbox": [10, 20, 30, 40]}], + } + + +def _make_qr(ds, datapoints, ann_field=None): + fields = [] + if ann_field: + fields.append(MetadataSelectFieldSchema( + asOf=int(datetime.datetime.now().timestamp()), + autoGenerated=False, originalName=ann_field, + multiple=False, valueType=MetadataFieldType.BLOB, + name=ann_field, tags={ReservedTags.ANNOTATION.value}, + )) + return QueryResult(datasource=ds, _entries=datapoints, fields=fields) diff --git a/tests/data_engine/annotation_import/test_cvat_video.py b/tests/data_engine/annotation_import/test_cvat_video.py new file mode 100644 index 00000000..8a0dac69 --- /dev/null +++ b/tests/data_engine/annotation_import/test_cvat_video.py @@ -0,0 +1,156 @@ +import datetime +import json +from pathlib import PurePosixPath +from unittest.mock import patch, PropertyMock + +import pytest +from dagshub_annotation_converter.converters.cvat import export_cvat_video_to_xml_string +from dagshub_annotation_converter.ir.image import IRBBoxImageAnnotation, CoordinateStyle +from dagshub_annotation_converter.ir.video import IRVideoBBoxAnnotation + +from dagshub.data_engine.annotation.importer import AnnotationImporter +from dagshub.data_engine.annotation.metadata import MetadataAnnotations +from dagshub.data_engine.client.models import MetadataSelectFieldSchema +from dagshub.data_engine.dtypes import MetadataFieldType, ReservedTags +from dagshub.data_engine.model.datapoint import Datapoint +from dagshub.data_engine.model.query_result import QueryResult + + +@pytest.fixture(autouse=True) +def mock_source_prefix(ds): + with patch.object(type(ds.source), "source_prefix", new_callable=PropertyMock, return_value=PurePosixPath()): + yield + + +# --- CVAT video import --- + + +def test_import_cvat_video_from_xml(ds, tmp_path): + xml_file = tmp_path / "annotations.xml" + xml_file.write_bytes(_make_cvat_video_xml()) + + importer = AnnotationImporter(ds, "cvat_video", xml_file, load_from="disk") + result = importer.import_annotations() + + assert len(result) == 1 + anns = list(result.values())[0] + assert len(anns) == 2 + assert all(isinstance(a, IRVideoBBoxAnnotation) for a in anns) + + +# --- _get_all_video_annotations --- + + +def test_get_all_video_annotations_filters(ds): + image_ann = IRBBoxImageAnnotation( + filename="test.jpg", categories={"cat": 1.0}, + top=0.1, left=0.1, width=0.2, height=0.2, + image_width=640, image_height=480, + coordinate_style=CoordinateStyle.NORMALIZED, + ) + video_ann = _make_video_bbox() + + dp = Datapoint(datasource=ds, path="dp_0", datapoint_id=0, metadata={}) + dp.metadata["ann"] = MetadataAnnotations(datapoint=dp, field="ann", annotations=[image_ann, video_ann]) + + qr = _make_qr(ds, [dp], ann_field="ann") + result = qr._get_all_video_annotations("ann") + assert len(result) == 1 + assert isinstance(result[0], IRVideoBBoxAnnotation) + + +def test_get_all_video_annotations_empty(ds): + dp = Datapoint(datasource=ds, path="dp_0", datapoint_id=0, metadata={}) + dp.metadata["ann"] = MetadataAnnotations(datapoint=dp, field="ann", annotations=[]) + + qr = _make_qr(ds, [dp], ann_field="ann") + assert qr._get_all_video_annotations("ann") == [] + + +# --- videorectangle LS round-trip --- + + +def test_videorectangle_ls_roundtrip(): + from dagshub_annotation_converter.converters.label_studio_video import ( + video_ir_to_ls_video_tasks, + ls_video_json_to_video_ir, + ) + + anns = [_make_video_bbox(frame=0, track_id=1), _make_video_bbox(frame=5, track_id=1)] + tasks = video_ir_to_ls_video_tasks(anns) + assert len(tasks) == 1 + + recovered = ls_video_json_to_video_ir(tasks[0].model_dump_json()) + assert len(recovered) == 2 + assert recovered[0].frame_number == 0 + assert recovered[1].frame_number == 5 + + +# --- export_as_cvat_video --- + + +def test_export_as_cvat_video(ds, tmp_path): + dp = Datapoint(datasource=ds, path="video.mp4", datapoint_id=0, metadata={}) + anns = [_make_video_bbox(frame=0, track_id=0), _make_video_bbox(frame=5, track_id=0)] + dp.metadata["ann"] = MetadataAnnotations(datapoint=dp, field="ann", annotations=anns) + + qr = _make_qr(ds, [dp], ann_field="ann") + result = qr.export_as_cvat_video(download_dir=tmp_path, annotation_field="ann") + + assert result.exists() + content = result.read_text() + assert " IRVideoBBoxAnnotation: + return IRVideoBBoxAnnotation( + track_id=track_id, frame_number=frame, + left=100.0, top=150.0, width=50.0, height=80.0, + image_width=1920, image_height=1080, + categories={"person": 1.0}, + coordinate_style=CoordinateStyle.DENORMALIZED, + ) + + +def _make_cvat_video_xml() -> bytes: + anns = [_make_video_bbox(frame=0, track_id=0), _make_video_bbox(frame=5, track_id=0)] + return export_cvat_video_to_xml_string(anns) + + +def _make_qr(ds, datapoints, ann_field=None): + fields = [] + if ann_field: + fields.append(MetadataSelectFieldSchema( + asOf=int(datetime.datetime.now().timestamp()), + autoGenerated=False, originalName=ann_field, + multiple=False, valueType=MetadataFieldType.BLOB, + name=ann_field, tags={ReservedTags.ANNOTATION.value}, + )) + return QueryResult(datasource=ds, _entries=datapoints, fields=fields) diff --git a/tests/data_engine/annotation_import/test_mot.py b/tests/data_engine/annotation_import/test_mot.py new file mode 100644 index 00000000..93e3132d --- /dev/null +++ b/tests/data_engine/annotation_import/test_mot.py @@ -0,0 +1,210 @@ +import configparser +import datetime +import json +import zipfile +from pathlib import Path, PurePosixPath +from unittest.mock import patch, PropertyMock + +import pytest +from dagshub_annotation_converter.ir.image import CoordinateStyle +from dagshub_annotation_converter.ir.video import IRVideoBBoxAnnotation + +from dagshub.data_engine.annotation.importer import AnnotationImporter +from dagshub.data_engine.annotation.metadata import MetadataAnnotations +from dagshub.data_engine.client.models import MetadataSelectFieldSchema +from dagshub.data_engine.dtypes import MetadataFieldType, ReservedTags +from dagshub.data_engine.model.datapoint import Datapoint +from dagshub.data_engine.model.query_result import QueryResult + + +@pytest.fixture(autouse=True) +def mock_source_prefix(ds): + with patch.object(type(ds.source), "source_prefix", new_callable=PropertyMock, return_value=PurePosixPath()): + yield + + +# --- _is_video_annotation_dict --- + + +def test_is_video_dict_with_int_keys(): + assert AnnotationImporter._is_video_annotation_dict({0: [], 1: []}) is True + + +def test_is_video_dict_with_str_keys(): + assert AnnotationImporter._is_video_annotation_dict({"file.jpg": []}) is False + + +def test_is_video_dict_empty(): + assert AnnotationImporter._is_video_annotation_dict({}) is False + + +def test_is_video_dict_non_dict(): + assert AnnotationImporter._is_video_annotation_dict([]) is False + + +# --- is_video_format --- + + +@pytest.mark.parametrize( + "ann_type, expected", + [ + ("yolo", False), + ("cvat", False), + ("coco", False), + ("mot", True), + ("cvat_video", True), + ], +) +def test_is_video_format(ds, ann_type, expected, tmp_path): + kwargs = {} + if ann_type == "yolo": + kwargs["yolo_type"] = "bbox" + importer = AnnotationImporter(ds, ann_type, tmp_path / "dummy", load_from="disk", **kwargs) + assert importer.is_video_format is expected + + +# --- _flatten_video_annotations --- + + +def test_flatten_video_annotations(ds, tmp_path): + importer = AnnotationImporter(ds, "mot", tmp_path / "test_video", load_from="disk") + ann = _make_video_bbox(frame=0) + result = importer._flatten_video_annotations({0: [ann], 5: [ann]}) + assert "test_video" in result + assert len(result["test_video"]) == 2 + + +def test_flatten_video_annotations_custom_name(ds, tmp_path): + importer = AnnotationImporter(ds, "mot", tmp_path / "test_video", load_from="disk", video_name="my_video.mp4") + result = importer._flatten_video_annotations({0: [_make_video_bbox()]}) + assert "my_video.mp4" in result + + +# --- convert_to_ls_tasks for video --- + + +def test_convert_video_to_ls_tasks(ds, tmp_path): + importer = AnnotationImporter(ds, "mot", tmp_path / "video", load_from="disk") + video_anns = {"video.mp4": [_make_video_bbox(frame=0), _make_video_bbox(frame=1)]} + + tasks = importer.convert_to_ls_tasks(video_anns) + + assert "video.mp4" in tasks + task_json = json.loads(tasks["video.mp4"]) + assert "annotations" in task_json + + +# --- MOT import --- + + +def test_import_mot_from_dir(ds, tmp_path): + mot_dir = tmp_path / "mot_seq" + _create_mot_dir(mot_dir) + + importer = AnnotationImporter(ds, "mot", mot_dir, load_from="disk") + result = importer.import_annotations() + + assert len(result) == 1 + anns = list(result.values())[0] + assert len(anns) == 2 + assert all(isinstance(a, IRVideoBBoxAnnotation) for a in anns) + + +def test_import_mot_from_zip(ds, tmp_path): + mot_dir = tmp_path / "mot_seq" + _create_mot_dir(mot_dir) + + zip_path = tmp_path / "mot.zip" + with zipfile.ZipFile(zip_path, "w") as z: + z.write(mot_dir / "gt" / "gt.txt", "gt/gt.txt") + z.write(mot_dir / "gt" / "labels.txt", "gt/labels.txt") + z.write(mot_dir / "seqinfo.ini", "seqinfo.ini") + + importer = AnnotationImporter(ds, "mot", zip_path, load_from="disk") + result = importer.import_annotations() + + assert len(result) == 1 + anns = list(result.values())[0] + assert len(anns) == 2 + + +# --- export_as_mot --- + + +def test_export_as_mot(ds, tmp_path): + dp = Datapoint(datasource=ds, path="video.mp4", datapoint_id=0, metadata={}) + anns = [_make_video_bbox(frame=0, track_id=1), _make_video_bbox(frame=1, track_id=1)] + dp.metadata["ann"] = MetadataAnnotations(datapoint=dp, field="ann", annotations=anns) + + qr = _make_qr(ds, [dp], ann_field="ann") + result = qr.export_as_mot(download_dir=tmp_path, annotation_field="ann") + + assert result.exists() + assert (result / "gt" / "gt.txt").exists() + assert (result / "gt" / "labels.txt").exists() + assert (result / "seqinfo.ini").exists() + gt_lines = (result / "gt" / "gt.txt").read_text().strip().splitlines() + assert len(gt_lines) == 2 + + +def test_export_as_mot_no_annotations(ds, tmp_path): + dp = Datapoint(datasource=ds, path="video.mp4", datapoint_id=0, metadata={}) + dp.metadata["ann"] = MetadataAnnotations(datapoint=dp, field="ann", annotations=[]) + + qr = _make_qr(ds, [dp], ann_field="ann") + with pytest.raises(RuntimeError, match="No video annotations"): + qr.export_as_mot(download_dir=tmp_path, annotation_field="ann") + + +def test_export_as_mot_explicit_dimensions(ds, tmp_path): + dp = Datapoint(datasource=ds, path="video.mp4", datapoint_id=0, metadata={}) + anns = [_make_video_bbox(frame=0)] + dp.metadata["ann"] = MetadataAnnotations(datapoint=dp, field="ann", annotations=anns) + + qr = _make_qr(ds, [dp], ann_field="ann") + result = qr.export_as_mot( + download_dir=tmp_path, annotation_field="ann", image_width=1280, image_height=720 + ) + + seqinfo = (result / "seqinfo.ini").read_text() + assert "1280" in seqinfo + assert "720" in seqinfo + + +# --- Helpers --- + + +def _make_video_bbox(frame=0, track_id=0) -> IRVideoBBoxAnnotation: + return IRVideoBBoxAnnotation( + track_id=track_id, frame_number=frame, + left=100.0, top=150.0, width=50.0, height=80.0, + image_width=1920, image_height=1080, + categories={"person": 1.0}, + coordinate_style=CoordinateStyle.DENORMALIZED, + ) + + +def _create_mot_dir(mot_dir: Path): + gt_dir = mot_dir / "gt" + gt_dir.mkdir(parents=True) + (gt_dir / "gt.txt").write_text("1,1,100,150,50,80,1,1,1.0\n2,1,110,160,50,80,1,1,0.9\n") + (gt_dir / "labels.txt").write_text("person\n") + config = configparser.ConfigParser() + config["Sequence"] = { + "name": "test", "frameRate": "30", "seqLength": "100", + "imWidth": "1920", "imHeight": "1080", + } + with open(mot_dir / "seqinfo.ini", "w") as f: + config.write(f) + + +def _make_qr(ds, datapoints, ann_field=None): + fields = [] + if ann_field: + fields.append(MetadataSelectFieldSchema( + asOf=int(datetime.datetime.now().timestamp()), + autoGenerated=False, originalName=ann_field, + multiple=False, valueType=MetadataFieldType.BLOB, + name=ann_field, tags={ReservedTags.ANNOTATION.value}, + )) + return QueryResult(datasource=ds, _entries=datapoints, fields=fields) From 7ea6bab57ed6c0a46a53d29f48f86f185476e7a9 Mon Sep 17 00:00:00 2001 From: Dean Date: Mon, 9 Feb 2026 17:49:22 +0200 Subject: [PATCH 02/21] finished tests for new converter functionality. Need to do manual QA before opening a PR --- dagshub/data_engine/model/query_result.py | 4 +- .../annotation_import/test_coco.py | 152 ++++++++++-------- .../annotation_import/test_cvat_video.py | 99 +++++++----- .../data_engine/annotation_import/test_mot.py | 134 ++++++++------- 4 files changed, 231 insertions(+), 158 deletions(-) diff --git a/dagshub/data_engine/model/query_result.py b/dagshub/data_engine/model/query_result.py index 7ff08699..52e48603 100644 --- a/dagshub/data_engine/model/query_result.py +++ b/dagshub/data_engine/model/query_result.py @@ -946,11 +946,11 @@ def export_as_mot( context = MOTContext() if image_width is not None: context.image_width = image_width - elif video_annotations: + else: context.image_width = video_annotations[0].image_width if image_height is not None: context.image_height = image_height - elif video_annotations: + else: context.image_height = video_annotations[0].image_height log_message("Exporting MOT annotations...") diff --git a/tests/data_engine/annotation_import/test_coco.py b/tests/data_engine/annotation_import/test_coco.py index 38180811..9b238fd1 100644 --- a/tests/data_engine/annotation_import/test_coco.py +++ b/tests/data_engine/annotation_import/test_coco.py @@ -6,12 +6,10 @@ import pytest from dagshub_annotation_converter.ir.image import ( IRBBoxImageAnnotation, - IRSegmentationImageAnnotation, - IRSegmentationPoint, CoordinateStyle, ) -from dagshub.data_engine.annotation.importer import AnnotationImporter +from dagshub.data_engine.annotation.importer import AnnotationImporter, AnnotationsNotFoundError from dagshub.data_engine.annotation.metadata import MetadataAnnotations from dagshub.data_engine.client.models import MetadataSelectFieldSchema from dagshub.data_engine.dtypes import MetadataFieldType, ReservedTags @@ -25,14 +23,12 @@ def mock_source_prefix(ds): yield -# --- COCO import --- +# --- import --- def test_import_coco_from_file(ds, tmp_path): - coco_file = tmp_path / "annotations.json" - coco_file.write_text(json.dumps(_make_coco_json())) - - importer = AnnotationImporter(ds, "coco", coco_file, load_from="disk") + _write_coco(tmp_path, _make_coco_json()) + importer = AnnotationImporter(ds, "coco", tmp_path / "annotations.json", load_from="disk") result = importer.import_annotations() assert "image1.jpg" in result @@ -40,74 +36,77 @@ def test_import_coco_from_file(ds, tmp_path): assert isinstance(result["image1.jpg"][0], IRBBoxImageAnnotation) -def test_convert_image_to_ls_tasks(ds, tmp_path, mock_dagshub_auth): +def test_import_coco_nonexistent_raises(ds, tmp_path): + importer = AnnotationImporter(ds, "coco", tmp_path / "nope.json", load_from="disk") + with pytest.raises(AnnotationsNotFoundError): + importer.import_annotations() + + +def test_coco_convert_to_ls_tasks(ds, tmp_path, mock_dagshub_auth): importer = AnnotationImporter(ds, "coco", tmp_path / "ann.json", load_from="disk") bbox = IRBBoxImageAnnotation( - filename="test.jpg", - categories={"cat": 1.0}, + filename="test.jpg", categories={"cat": 1.0}, top=0.1, left=0.1, width=0.2, height=0.2, image_width=640, image_height=480, coordinate_style=CoordinateStyle.NORMALIZED, ) - tasks = importer.convert_to_ls_tasks({"test.jpg": [bbox]}) assert "test.jpg" in tasks task_json = json.loads(tasks["test.jpg"]) assert "annotations" in task_json + assert len(task_json["annotations"]) > 0 # --- add_coco_annotation --- -def test_add_coco_annotation(ds, mock_dagshub_auth): - dp = Datapoint(datasource=ds, path="test.jpg", datapoint_id=0, metadata={}) +def test_add_coco_annotation_rewrites_filename(ds, mock_dagshub_auth): + dp = Datapoint(datasource=ds, path="my_images/photo.jpg", datapoint_id=0, metadata={}) meta_ann = MetadataAnnotations(datapoint=dp, field="ann") meta_ann.add_coco_annotation(json.dumps(_make_coco_json())) assert len(meta_ann.annotations) == 1 assert isinstance(meta_ann.annotations[0], IRBBoxImageAnnotation) - assert meta_ann.annotations[0].filename == "test.jpg" - - -def test_add_coco_annotation_segmentation(ds, mock_dagshub_auth): - dp = Datapoint(datasource=ds, path="test.jpg", datapoint_id=0, metadata={}) - coco = { - "categories": [{"id": 1, "name": "dog"}], - "images": [{"id": 1, "width": 640, "height": 480, "file_name": "img.jpg"}], - "annotations": [ - {"id": 1, "image_id": 1, "category_id": 1, "segmentation": [[10, 20, 30, 40, 50, 60]]} - ], - } - meta_ann = MetadataAnnotations(datapoint=dp, field="ann") - meta_ann.add_coco_annotation(json.dumps(coco)) - - assert len(meta_ann.annotations) == 1 + assert meta_ann.annotations[0].filename == "my_images/photo.jpg" # --- _resolve_annotation_field --- -def test_resolve_explicit(ds): +def test_resolve_explicit_field(ds): qr = _make_qr(ds, [], ann_field="my_ann") assert qr._resolve_annotation_field("explicit") == "explicit" -def test_resolve_auto(ds): +def test_resolve_auto_field(ds): qr = _make_qr(ds, [], ann_field="my_ann") assert qr._resolve_annotation_field(None) == "my_ann" -def test_resolve_no_fields(ds): +def test_resolve_no_fields_raises(ds): qr = _make_qr(ds, [], ann_field=None) with pytest.raises(ValueError, match="No annotation fields"): qr._resolve_annotation_field(None) +def test_resolve_picks_alphabetically_first(ds): + fields = [] + for name in ["zebra_ann", "alpha_ann"]: + fields.append(MetadataSelectFieldSchema( + asOf=int(datetime.datetime.now().timestamp()), + autoGenerated=False, originalName=name, + multiple=False, valueType=MetadataFieldType.BLOB, + name=name, tags={ReservedTags.ANNOTATION.value}, + )) + qr = QueryResult(datasource=ds, _entries=[], fields=fields) + assert qr._resolve_annotation_field(None) == "alpha_ann" + + # --- export_as_coco --- -def test_export_as_coco_bbox(ds, tmp_path): +def test_export_coco_bbox_coordinates(ds, tmp_path): dp = Datapoint(datasource=ds, path="images/test.jpg", datapoint_id=0, metadata={}) ann = IRBBoxImageAnnotation( filename="images/test.jpg", categories={"cat": 1.0}, @@ -121,61 +120,69 @@ def test_export_as_coco_bbox(ds, tmp_path): with patch.object(qr, "download_files"): result = qr.export_as_coco(download_dir=tmp_path, annotation_field="ann") - assert result.exists() coco = json.loads(result.read_text()) - assert len(coco["annotations"]) == 1 - assert len(coco["images"]) == 1 assert coco["annotations"][0]["bbox"] == [10.0, 20.0, 30.0, 40.0] -def test_export_as_coco_segmentation(ds, tmp_path): +def test_export_coco_no_annotations_raises(ds, tmp_path): + dp = Datapoint(datasource=ds, path="test.jpg", datapoint_id=0, metadata={}) + dp.metadata["ann"] = MetadataAnnotations(datapoint=dp, field="ann", annotations=[]) + + qr = _make_qr(ds, [dp], ann_field="ann") + with pytest.raises(RuntimeError, match="No annotations found"): + qr.export_as_coco(download_dir=tmp_path, annotation_field="ann") + + +def test_export_coco_explicit_classes(ds, tmp_path): dp = Datapoint(datasource=ds, path="images/test.jpg", datapoint_id=0, metadata={}) - ann = IRSegmentationImageAnnotation( - filename="images/test.jpg", categories={"dog": 1.0}, - points=[IRSegmentationPoint(x=10, y=20), IRSegmentationPoint(x=30, y=40), IRSegmentationPoint(x=50, y=60)], - image_width=640, image_height=480, - coordinate_style=CoordinateStyle.DENORMALIZED, + dp.metadata["ann"] = MetadataAnnotations( + datapoint=dp, field="ann", annotations=[_make_image_bbox("images/test.jpg")] ) - dp.metadata["ann"] = MetadataAnnotations(datapoint=dp, field="ann", annotations=[ann]) qr = _make_qr(ds, [dp], ann_field="ann") with patch.object(qr, "download_files"): - result = qr.export_as_coco(download_dir=tmp_path, annotation_field="ann") + result = qr.export_as_coco( + download_dir=tmp_path, annotation_field="ann", classes={1: "cat", 2: "dog"} + ) coco = json.loads(result.read_text()) - assert len(coco["annotations"]) == 1 - assert "segmentation" in coco["annotations"][0] + assert "cat" in {c["name"] for c in coco["categories"]} -def test_export_as_coco_no_annotations(ds, tmp_path): - dp = Datapoint(datasource=ds, path="test.jpg", datapoint_id=0, metadata={}) - dp.metadata["ann"] = MetadataAnnotations(datapoint=dp, field="ann", annotations=[]) +def test_export_coco_custom_filename(ds, tmp_path): + dp = Datapoint(datasource=ds, path="images/test.jpg", datapoint_id=0, metadata={}) + dp.metadata["ann"] = MetadataAnnotations( + datapoint=dp, field="ann", annotations=[_make_image_bbox("images/test.jpg")] + ) qr = _make_qr(ds, [dp], ann_field="ann") - with pytest.raises(RuntimeError, match="No annotations found"): - qr.export_as_coco(download_dir=tmp_path, annotation_field="ann") + with patch.object(qr, "download_files"): + result = qr.export_as_coco( + download_dir=tmp_path, annotation_field="ann", output_filename="custom.json" + ) + assert result.name == "custom.json" -def test_export_as_coco_with_classes(ds, tmp_path): - dp = Datapoint(datasource=ds, path="images/test.jpg", datapoint_id=0, metadata={}) - ann = IRBBoxImageAnnotation( - filename="images/test.jpg", categories={"cat": 1.0}, - top=20.0, left=10.0, width=30.0, height=40.0, - image_width=640, image_height=480, - coordinate_style=CoordinateStyle.DENORMALIZED, - ) - dp.metadata["ann"] = MetadataAnnotations(datapoint=dp, field="ann", annotations=[ann]) - qr = _make_qr(ds, [dp], ann_field="ann") +def test_export_coco_multiple_datapoints(ds, tmp_path): + dps = [] + for i, name in enumerate(["a.jpg", "b.jpg"]): + dp = Datapoint(datasource=ds, path=name, datapoint_id=i, metadata={}) + dp.metadata["ann"] = MetadataAnnotations( + datapoint=dp, field="ann", annotations=[_make_image_bbox(name)] + ) + dps.append(dp) + + qr = _make_qr(ds, dps, ann_field="ann") with patch.object(qr, "download_files"): - result = qr.export_as_coco(download_dir=tmp_path, annotation_field="ann", classes={1: "cat", 2: "dog"}) + result = qr.export_as_coco(download_dir=tmp_path, annotation_field="ann") coco = json.loads(result.read_text()) - cat_names = {c["name"] for c in coco["categories"]} - assert "cat" in cat_names + assert len(coco["annotations"]) == 2 + assert len(coco["images"]) == 2 -# --- Helpers --- +# --- helpers --- def _make_coco_json(): @@ -186,6 +193,19 @@ def _make_coco_json(): } +def _write_coco(tmp_path, coco): + (tmp_path / "annotations.json").write_text(json.dumps(coco)) + + +def _make_image_bbox(filename="test.jpg") -> IRBBoxImageAnnotation: + return IRBBoxImageAnnotation( + filename=filename, categories={"cat": 1.0}, + top=20.0, left=10.0, width=30.0, height=40.0, + image_width=640, image_height=480, + coordinate_style=CoordinateStyle.DENORMALIZED, + ) + + def _make_qr(ds, datapoints, ann_field=None): fields = [] if ann_field: diff --git a/tests/data_engine/annotation_import/test_cvat_video.py b/tests/data_engine/annotation_import/test_cvat_video.py index 8a0dac69..0abdc841 100644 --- a/tests/data_engine/annotation_import/test_cvat_video.py +++ b/tests/data_engine/annotation_import/test_cvat_video.py @@ -1,5 +1,4 @@ import datetime -import json from pathlib import PurePosixPath from unittest.mock import patch, PropertyMock @@ -22,10 +21,10 @@ def mock_source_prefix(ds): yield -# --- CVAT video import --- +# --- import --- -def test_import_cvat_video_from_xml(ds, tmp_path): +def test_import_cvat_video(ds, tmp_path): xml_file = tmp_path / "annotations.xml" xml_file.write_bytes(_make_cvat_video_xml()) @@ -41,7 +40,7 @@ def test_import_cvat_video_from_xml(ds, tmp_path): # --- _get_all_video_annotations --- -def test_get_all_video_annotations_filters(ds): +def test_get_all_video_filters(ds): image_ann = IRBBoxImageAnnotation( filename="test.jpg", categories={"cat": 1.0}, top=0.1, left=0.1, width=0.2, height=0.2, @@ -51,7 +50,9 @@ def test_get_all_video_annotations_filters(ds): video_ann = _make_video_bbox() dp = Datapoint(datasource=ds, path="dp_0", datapoint_id=0, metadata={}) - dp.metadata["ann"] = MetadataAnnotations(datapoint=dp, field="ann", annotations=[image_ann, video_ann]) + dp.metadata["ann"] = MetadataAnnotations( + datapoint=dp, field="ann", annotations=[image_ann, video_ann] + ) qr = _make_qr(ds, [dp], ann_field="ann") result = qr._get_all_video_annotations("ann") @@ -59,7 +60,7 @@ def test_get_all_video_annotations_filters(ds): assert isinstance(result[0], IRVideoBBoxAnnotation) -def test_get_all_video_annotations_empty(ds): +def test_get_all_video_empty(ds): dp = Datapoint(datasource=ds, path="dp_0", datapoint_id=0, metadata={}) dp.metadata["ann"] = MetadataAnnotations(datapoint=dp, field="ann", annotations=[]) @@ -67,34 +68,24 @@ def test_get_all_video_annotations_empty(ds): assert qr._get_all_video_annotations("ann") == [] -# --- videorectangle LS round-trip --- - - -def test_videorectangle_ls_roundtrip(): - from dagshub_annotation_converter.converters.label_studio_video import ( - video_ir_to_ls_video_tasks, - ls_video_json_to_video_ir, - ) - - anns = [_make_video_bbox(frame=0, track_id=1), _make_video_bbox(frame=5, track_id=1)] - tasks = video_ir_to_ls_video_tasks(anns) - assert len(tasks) == 1 +def test_get_all_video_aggregates_across_datapoints(ds): + dps = [] + for i in range(3): + dp = Datapoint(datasource=ds, path=f"dp_{i}", datapoint_id=i, metadata={}) + dp.metadata["ann"] = MetadataAnnotations( + datapoint=dp, field="ann", annotations=[_make_video_bbox(frame=i)] + ) + dps.append(dp) - recovered = ls_video_json_to_video_ir(tasks[0].model_dump_json()) - assert len(recovered) == 2 - assert recovered[0].frame_number == 0 - assert recovered[1].frame_number == 5 + qr = _make_qr(ds, dps, ann_field="ann") + assert len(qr._get_all_video_annotations("ann")) == 3 # --- export_as_cvat_video --- -def test_export_as_cvat_video(ds, tmp_path): - dp = Datapoint(datasource=ds, path="video.mp4", datapoint_id=0, metadata={}) - anns = [_make_video_bbox(frame=0, track_id=0), _make_video_bbox(frame=5, track_id=0)] - dp.metadata["ann"] = MetadataAnnotations(datapoint=dp, field="ann", annotations=anns) - - qr = _make_qr(ds, [dp], ann_field="ann") +def test_export_cvat_video_xml(ds, tmp_path): + qr, _ = _make_video_qr(ds) result = qr.export_as_cvat_video(download_dir=tmp_path, annotation_field="ann") assert result.exists() @@ -103,7 +94,7 @@ def test_export_as_cvat_video(ds, tmp_path): assert "= 2 + + +# --- helpers --- def _make_video_bbox(frame=0, track_id=0) -> IRVideoBBoxAnnotation: @@ -144,6 +163,14 @@ def _make_cvat_video_xml() -> bytes: return export_cvat_video_to_xml_string(anns) +def _make_video_qr(ds): + dp = Datapoint(datasource=ds, path="video.mp4", datapoint_id=0, metadata={}) + anns = [_make_video_bbox(frame=0, track_id=0), _make_video_bbox(frame=5, track_id=0)] + dp.metadata["ann"] = MetadataAnnotations(datapoint=dp, field="ann", annotations=anns) + qr = _make_qr(ds, [dp], ann_field="ann") + return qr, dp + + def _make_qr(ds, datapoints, ann_field=None): fields = [] if ann_field: diff --git a/tests/data_engine/annotation_import/test_mot.py b/tests/data_engine/annotation_import/test_mot.py index 93e3132d..ccefc86f 100644 --- a/tests/data_engine/annotation_import/test_mot.py +++ b/tests/data_engine/annotation_import/test_mot.py @@ -9,7 +9,7 @@ from dagshub_annotation_converter.ir.image import CoordinateStyle from dagshub_annotation_converter.ir.video import IRVideoBBoxAnnotation -from dagshub.data_engine.annotation.importer import AnnotationImporter +from dagshub.data_engine.annotation.importer import AnnotationImporter, AnnotationsNotFoundError from dagshub.data_engine.annotation.metadata import MetadataAnnotations from dagshub.data_engine.client.models import MetadataSelectFieldSchema from dagshub.data_engine.dtypes import MetadataFieldType, ReservedTags @@ -26,11 +26,11 @@ def mock_source_prefix(ds): # --- _is_video_annotation_dict --- -def test_is_video_dict_with_int_keys(): +def test_is_video_dict_int_keys(): assert AnnotationImporter._is_video_annotation_dict({0: [], 1: []}) is True -def test_is_video_dict_with_str_keys(): +def test_is_video_dict_str_keys(): assert AnnotationImporter._is_video_annotation_dict({"file.jpg": []}) is False @@ -42,6 +42,10 @@ def test_is_video_dict_non_dict(): assert AnnotationImporter._is_video_annotation_dict([]) is False +def test_is_video_dict_mixed_first_int(): + assert AnnotationImporter._is_video_annotation_dict({0: [], "a": []}) is True + + # --- is_video_format --- @@ -66,35 +70,31 @@ def test_is_video_format(ds, ann_type, expected, tmp_path): # --- _flatten_video_annotations --- -def test_flatten_video_annotations(ds, tmp_path): +def test_flatten_merges_frames(ds, tmp_path): importer = AnnotationImporter(ds, "mot", tmp_path / "test_video", load_from="disk") - ann = _make_video_bbox(frame=0) - result = importer._flatten_video_annotations({0: [ann], 5: [ann]}) + result = importer._flatten_video_annotations({ + 0: [_make_video_bbox(frame=0)], + 5: [_make_video_bbox(frame=5)], + }) assert "test_video" in result assert len(result["test_video"]) == 2 -def test_flatten_video_annotations_custom_name(ds, tmp_path): - importer = AnnotationImporter(ds, "mot", tmp_path / "test_video", load_from="disk", video_name="my_video.mp4") +def test_flatten_defaults_to_file_stem(ds, tmp_path): + importer = AnnotationImporter(ds, "mot", tmp_path / "my_sequence", load_from="disk") result = importer._flatten_video_annotations({0: [_make_video_bbox()]}) - assert "my_video.mp4" in result - - -# --- convert_to_ls_tasks for video --- - + assert "my_sequence" in result -def test_convert_video_to_ls_tasks(ds, tmp_path): - importer = AnnotationImporter(ds, "mot", tmp_path / "video", load_from="disk") - video_anns = {"video.mp4": [_make_video_bbox(frame=0), _make_video_bbox(frame=1)]} - - tasks = importer.convert_to_ls_tasks(video_anns) - assert "video.mp4" in tasks - task_json = json.loads(tasks["video.mp4"]) - assert "annotations" in task_json +def test_flatten_video_name_override(ds, tmp_path): + importer = AnnotationImporter( + ds, "mot", tmp_path / "test_video", load_from="disk", video_name="custom.mp4" + ) + result = importer._flatten_video_annotations({0: [_make_video_bbox()]}) + assert "custom.mp4" in result -# --- MOT import --- +# --- import --- def test_import_mot_from_dir(ds, tmp_path): @@ -113,55 +113,55 @@ def test_import_mot_from_dir(ds, tmp_path): def test_import_mot_from_zip(ds, tmp_path): mot_dir = tmp_path / "mot_seq" _create_mot_dir(mot_dir) - - zip_path = tmp_path / "mot.zip" - with zipfile.ZipFile(zip_path, "w") as z: - z.write(mot_dir / "gt" / "gt.txt", "gt/gt.txt") - z.write(mot_dir / "gt" / "labels.txt", "gt/labels.txt") - z.write(mot_dir / "seqinfo.ini", "seqinfo.ini") + zip_path = _zip_mot_dir(tmp_path, mot_dir) importer = AnnotationImporter(ds, "mot", zip_path, load_from="disk") result = importer.import_annotations() assert len(result) == 1 - anns = list(result.values())[0] - assert len(anns) == 2 + assert len(list(result.values())[0]) == 2 -# --- export_as_mot --- +def test_import_mot_nonexistent_raises(ds, tmp_path): + importer = AnnotationImporter(ds, "mot", tmp_path / "missing", load_from="disk") + with pytest.raises(AnnotationsNotFoundError): + importer.import_annotations() -def test_export_as_mot(ds, tmp_path): - dp = Datapoint(datasource=ds, path="video.mp4", datapoint_id=0, metadata={}) - anns = [_make_video_bbox(frame=0, track_id=1), _make_video_bbox(frame=1, track_id=1)] - dp.metadata["ann"] = MetadataAnnotations(datapoint=dp, field="ann", annotations=anns) +# --- convert_to_ls_tasks --- - qr = _make_qr(ds, [dp], ann_field="ann") + +def test_convert_video_to_ls_tasks(ds, tmp_path): + importer = AnnotationImporter(ds, "mot", tmp_path / "video", load_from="disk") + video_anns = {"video.mp4": [_make_video_bbox(frame=0), _make_video_bbox(frame=1)]} + tasks = importer.convert_to_ls_tasks(video_anns) + + assert "video.mp4" in tasks + task_json = json.loads(tasks["video.mp4"]) + assert "annotations" in task_json + + +def test_convert_video_empty_skipped(ds, tmp_path): + importer = AnnotationImporter(ds, "mot", tmp_path / "video", load_from="disk") + tasks = importer.convert_to_ls_tasks({"video.mp4": []}) + assert "video.mp4" not in tasks + + +# --- export_as_mot --- + + +def test_export_mot_directory_structure(ds, tmp_path): + qr, _ = _make_video_qr(ds) result = qr.export_as_mot(download_dir=tmp_path, annotation_field="ann") assert result.exists() assert (result / "gt" / "gt.txt").exists() assert (result / "gt" / "labels.txt").exists() assert (result / "seqinfo.ini").exists() - gt_lines = (result / "gt" / "gt.txt").read_text().strip().splitlines() - assert len(gt_lines) == 2 - - -def test_export_as_mot_no_annotations(ds, tmp_path): - dp = Datapoint(datasource=ds, path="video.mp4", datapoint_id=0, metadata={}) - dp.metadata["ann"] = MetadataAnnotations(datapoint=dp, field="ann", annotations=[]) - qr = _make_qr(ds, [dp], ann_field="ann") - with pytest.raises(RuntimeError, match="No video annotations"): - qr.export_as_mot(download_dir=tmp_path, annotation_field="ann") - - -def test_export_as_mot_explicit_dimensions(ds, tmp_path): - dp = Datapoint(datasource=ds, path="video.mp4", datapoint_id=0, metadata={}) - anns = [_make_video_bbox(frame=0)] - dp.metadata["ann"] = MetadataAnnotations(datapoint=dp, field="ann", annotations=anns) - qr = _make_qr(ds, [dp], ann_field="ann") +def test_export_mot_explicit_dimensions(ds, tmp_path): + qr, _ = _make_video_qr(ds) result = qr.export_as_mot( download_dir=tmp_path, annotation_field="ann", image_width=1280, image_height=720 ) @@ -171,7 +171,16 @@ def test_export_as_mot_explicit_dimensions(ds, tmp_path): assert "720" in seqinfo -# --- Helpers --- +def test_export_mot_no_annotations_raises(ds, tmp_path): + dp = Datapoint(datasource=ds, path="video.mp4", datapoint_id=0, metadata={}) + dp.metadata["ann"] = MetadataAnnotations(datapoint=dp, field="ann", annotations=[]) + + qr = _make_qr(ds, [dp], ann_field="ann") + with pytest.raises(RuntimeError, match="No video annotations"): + qr.export_as_mot(download_dir=tmp_path, annotation_field="ann") + + +# --- helpers --- def _make_video_bbox(frame=0, track_id=0) -> IRVideoBBoxAnnotation: @@ -198,6 +207,23 @@ def _create_mot_dir(mot_dir: Path): config.write(f) +def _zip_mot_dir(tmp_path: Path, mot_dir: Path) -> Path: + zip_path = tmp_path / "mot.zip" + with zipfile.ZipFile(zip_path, "w") as z: + z.write(mot_dir / "gt" / "gt.txt", "gt/gt.txt") + z.write(mot_dir / "gt" / "labels.txt", "gt/labels.txt") + z.write(mot_dir / "seqinfo.ini", "seqinfo.ini") + return zip_path + + +def _make_video_qr(ds): + dp = Datapoint(datasource=ds, path="video.mp4", datapoint_id=0, metadata={}) + anns = [_make_video_bbox(frame=0, track_id=1), _make_video_bbox(frame=1, track_id=1)] + dp.metadata["ann"] = MetadataAnnotations(datapoint=dp, field="ann", annotations=anns) + qr = _make_qr(ds, [dp], ann_field="ann") + return qr, dp + + def _make_qr(ds, datapoints, ann_field=None): fields = [] if ann_field: From c1b684aca29f108790a3f1b7a96420b0544705fd Mon Sep 17 00:00:00 2001 From: Dean Date: Tue, 10 Feb 2026 15:04:45 +0200 Subject: [PATCH 03/21] ignored manual testing file --- .gitignore | 1 + 1 file changed, 1 insertion(+) diff --git a/.gitignore b/.gitignore index 14b6b327..33c013cc 100644 --- a/.gitignore +++ b/.gitignore @@ -141,3 +141,4 @@ params.yml !dagshub/data_engine/voxel_plugin_server/plugins/dagshub/dist/ scratchpad.ipynb scratchpad/ +my_test.py From 31360346dcb947aef2d66c743f228a562281c3ff Mon Sep 17 00:00:00 2001 From: Dean Date: Tue, 10 Feb 2026 15:11:44 +0200 Subject: [PATCH 04/21] fix a bug in get blob by moving the try catch from the datapoint.py to query_result.py --- dagshub/data_engine/model/datapoint.py | 19 ++++++++----------- dagshub/data_engine/model/query_result.py | 9 ++++++--- 2 files changed, 14 insertions(+), 14 deletions(-) diff --git a/dagshub/data_engine/model/datapoint.py b/dagshub/data_engine/model/datapoint.py index b7aa89b5..d1499fc6 100644 --- a/dagshub/data_engine/model/datapoint.py +++ b/dagshub/data_engine/model/datapoint.py @@ -303,17 +303,14 @@ def get(): else: raise Exception(f"Non-retrying status code {resp.status_code} returned") - try: - for attempt in Retrying( - retry=retry_if_exception_type(RuntimeError), - stop=stop_after_attempt(5), - wait=wait_exponential(multiplier=1, min=4, max=10), - before_sleep=before_sleep_log(logger, logging.WARNING), - ): - with attempt: - content = get() - except Exception as e: - return f"Error while downloading binary blob: {e}" + for attempt in Retrying( + retry=retry_if_exception_type(RuntimeError), + stop=stop_after_attempt(5), + wait=wait_exponential(multiplier=1, min=4, max=10), + before_sleep=before_sleep_log(logger, logging.WARNING), + ): + with attempt: + content = get() if cache_on_disk: with cache_path.open("wb") as f: diff --git a/dagshub/data_engine/model/query_result.py b/dagshub/data_engine/model/query_result.py index 6c326eab..b039ef9e 100644 --- a/dagshub/data_engine/model/query_result.py +++ b/dagshub/data_engine/model/query_result.py @@ -401,9 +401,12 @@ def get_blob_fields( auth = self.datasource.source.repoApi.auth def _get_blob_fn(dp: Datapoint, field: str, url: str, blob_path: Path): - blob_or_path = _get_blob(url, blob_path, auth, cache_on_disk, load_into_memory, path_format) - if isinstance(blob_or_path, str) and path_format != "str": - logger.warning(f"Error while downloading blob for field {field} in datapoint {dp.path}:{blob_or_path}") + try: + blob_or_path = _get_blob(url, blob_path, auth, cache_on_disk, load_into_memory, path_format) + except Exception as e: + logger.warning(f"Error while downloading blob for field {field} in datapoint {dp.path}: {e}") + dp.metadata.pop(field, None) + return dp.metadata[field] = blob_or_path with progress: From 98844cce770732714b4db9b543628e57f77d20b8 Mon Sep 17 00:00:00 2001 From: Dean Date: Tue, 10 Feb 2026 16:54:20 +0200 Subject: [PATCH 05/21] fix an issue where video dimensions weren't found --- dagshub/data_engine/annotation/importer.py | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/dagshub/data_engine/annotation/importer.py b/dagshub/data_engine/annotation/importer.py index 8ddf367e..36921708 100644 --- a/dagshub/data_engine/annotation/importer.py +++ b/dagshub/data_engine/annotation/importer.py @@ -108,6 +108,8 @@ def import_annotations(self) -> Mapping[str, Sequence[IRAnnotationBase]]: mot_kwargs["image_width"] = self.additional_args["image_width"] if "image_height" in self.additional_args: mot_kwargs["image_height"] = self.additional_args["image_height"] + if "video_name" in self.additional_args: + mot_kwargs["video_file"] = self.additional_args["video_name"] if annotations_file.suffix == ".zip": video_anns, _ = load_mot_from_zip(annotations_file, **mot_kwargs) else: @@ -217,8 +219,11 @@ def remap_annotations( ) continue for ann in anns: - assert ann.filename is not None - ann.filename = remap_func(ann.filename) + if ann.filename is not None: + ann.filename = remap_func(ann.filename) + else: + assert self.is_video_format, f"Non-video annotation has no filename: {ann}" + ann.filename = new_filename remapped[new_filename] = anns return remapped From 0af7a9fd787aa3f6ab839856dc5db0bfbee5f52a Mon Sep 17 00:00:00 2001 From: Dean Date: Tue, 10 Feb 2026 17:04:22 +0200 Subject: [PATCH 06/21] fix comments by Qodo --- dagshub/data_engine/model/query_result.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/dagshub/data_engine/model/query_result.py b/dagshub/data_engine/model/query_result.py index 0a08a77f..8ea81d63 100644 --- a/dagshub/data_engine/model/query_result.py +++ b/dagshub/data_engine/model/query_result.py @@ -411,7 +411,6 @@ def _get_blob_fn(dp: Datapoint, field: str, url: str, blob_path: Path): blob_or_path = _get_blob(url, blob_path, auth, cache_on_disk, load_into_memory, path_format) except Exception as e: logger.warning(f"Error while downloading blob for field {field} in datapoint {dp.path}: {e}") - dp.metadata.pop(field, None) return dp.metadata[field] = blob_or_path @@ -434,6 +433,8 @@ def _get_blob_fn(dp: Datapoint, field: str, url: str, blob_path: Path): for dp in self: for fld in document_fields: if fld in dp.metadata: + if isinstance(dp.metadata[fld], str): + continue # Override the load_into_memory flag, because we need the contents if not load_into_memory: dp.metadata[fld] = Path(dp.metadata[fld]).read_bytes() @@ -455,6 +456,10 @@ def _convert_annotation_fields(self, *fields, load_into_memory): # Already loaded - skip if isinstance(dp.metadata[fld], MetadataAnnotations): continue + # Still a str means blob download failed - skip + if isinstance(dp.metadata[fld], str): + bad_annotations[fld].append(dp.path) + continue # Override the load_into_memory flag, because we need the contents if not load_into_memory: dp.metadata[fld] = Path(dp.metadata[fld]).read_bytes() From 31d6fc3ea634d9a6adc303b71c2b35ea422344bc Mon Sep 17 00:00:00 2001 From: Dean Date: Tue, 10 Feb 2026 17:04:22 +0200 Subject: [PATCH 07/21] fix comments by Qodo --- dagshub/data_engine/model/query_result.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/dagshub/data_engine/model/query_result.py b/dagshub/data_engine/model/query_result.py index b039ef9e..b3a2c986 100644 --- a/dagshub/data_engine/model/query_result.py +++ b/dagshub/data_engine/model/query_result.py @@ -405,7 +405,6 @@ def _get_blob_fn(dp: Datapoint, field: str, url: str, blob_path: Path): blob_or_path = _get_blob(url, blob_path, auth, cache_on_disk, load_into_memory, path_format) except Exception as e: logger.warning(f"Error while downloading blob for field {field} in datapoint {dp.path}: {e}") - dp.metadata.pop(field, None) return dp.metadata[field] = blob_or_path @@ -428,6 +427,8 @@ def _get_blob_fn(dp: Datapoint, field: str, url: str, blob_path: Path): for dp in self: for fld in document_fields: if fld in dp.metadata: + if isinstance(dp.metadata[fld], str): + continue # Override the load_into_memory flag, because we need the contents if not load_into_memory: dp.metadata[fld] = Path(dp.metadata[fld]).read_bytes() @@ -449,6 +450,10 @@ def _convert_annotation_fields(self, *fields, load_into_memory): # Already loaded - skip if isinstance(dp.metadata[fld], MetadataAnnotations): continue + # Still a str means blob download failed - skip + if isinstance(dp.metadata[fld], str): + bad_annotations[fld].append(dp.path) + continue # Override the load_into_memory flag, because we need the contents if not load_into_memory: dp.metadata[fld] = Path(dp.metadata[fld]).read_bytes() From e972946c9b939aa660f6e4200826fa8f23a124d7 Mon Sep 17 00:00:00 2001 From: Dean Date: Tue, 10 Feb 2026 17:27:09 +0200 Subject: [PATCH 08/21] remove assert --- dagshub/data_engine/annotation/importer.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/dagshub/data_engine/annotation/importer.py b/dagshub/data_engine/annotation/importer.py index 36921708..73ec09a6 100644 --- a/dagshub/data_engine/annotation/importer.py +++ b/dagshub/data_engine/annotation/importer.py @@ -222,7 +222,8 @@ def remap_annotations( if ann.filename is not None: ann.filename = remap_func(ann.filename) else: - assert self.is_video_format, f"Non-video annotation has no filename: {ann}" + if not self.is_video_format: + raise ValueError(f"Non-video annotation has no filename: {ann}") ann.filename = new_filename remapped[new_filename] = anns From a478d8a36d2fc6c951c6fa735f0bea9deee94683 Mon Sep 17 00:00:00 2001 From: Dean Date: Tue, 10 Feb 2026 17:28:06 +0200 Subject: [PATCH 09/21] remove logic expecting string error in get_blob --- dagshub/data_engine/model/datapoint.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/dagshub/data_engine/model/datapoint.py b/dagshub/data_engine/model/datapoint.py index d1499fc6..7b0e1b9a 100644 --- a/dagshub/data_engine/model/datapoint.py +++ b/dagshub/data_engine/model/datapoint.py @@ -183,8 +183,6 @@ def get_blob(self, column: str, cache_on_disk=True, store_value=False) -> bytes: self.blob_cache_location.mkdir(parents=True, exist_ok=True) content = _get_blob(blob_url, blob_location, self.datasource.source.repoApi.auth, cache_on_disk, True) - if type(content) is str: - raise RuntimeError(f"Error while downloading blob: {content}") if store_value: self.metadata[column] = content From 550a45330c2de52d6bd46b45481440862a1f33e4 Mon Sep 17 00:00:00 2001 From: Dean Date: Mon, 9 Feb 2026 17:27:08 +0200 Subject: [PATCH 10/21] initial commite --- dagshub/data_engine/annotation/importer.py | 97 +++++++- dagshub/data_engine/annotation/metadata.py | 27 +++ dagshub/data_engine/model/query_result.py | 170 +++++++++++++- .../annotation_import/test_coco.py | 198 +++++++++++++++++ .../annotation_import/test_cvat_video.py | 156 +++++++++++++ .../data_engine/annotation_import/test_mot.py | 210 ++++++++++++++++++ 6 files changed, 845 insertions(+), 13 deletions(-) create mode 100644 tests/data_engine/annotation_import/test_coco.py create mode 100644 tests/data_engine/annotation_import/test_cvat_video.py create mode 100644 tests/data_engine/annotation_import/test_mot.py diff --git a/dagshub/data_engine/annotation/importer.py b/dagshub/data_engine/annotation/importer.py index c19212de..8ddf367e 100644 --- a/dagshub/data_engine/annotation/importer.py +++ b/dagshub/data_engine/annotation/importer.py @@ -1,13 +1,20 @@ from difflib import SequenceMatcher from pathlib import Path, PurePosixPath, PurePath from tempfile import TemporaryDirectory -from typing import TYPE_CHECKING, Literal, Optional, Union, Sequence, Mapping, Callable, List - -from dagshub_annotation_converter.converters.cvat import load_cvat_from_zip +from typing import TYPE_CHECKING, Dict, Literal, Optional, Union, Sequence, Mapping, Callable, List + +from dagshub_annotation_converter.converters.coco import load_coco_from_file +from dagshub_annotation_converter.converters.cvat import ( + load_cvat_from_zip, + load_cvat_from_xml_file, +) +from dagshub_annotation_converter.converters.mot import load_mot_from_dir, load_mot_from_zip from dagshub_annotation_converter.converters.yolo import load_yolo_from_fs +from dagshub_annotation_converter.converters.label_studio_video import video_ir_to_ls_video_tasks from dagshub_annotation_converter.formats.label_studio.task import LabelStudioTask from dagshub_annotation_converter.formats.yolo import YoloContext from dagshub_annotation_converter.ir.image.annotations.base import IRAnnotationBase +from dagshub_annotation_converter.ir.video import IRVideoBBoxAnnotation from dagshub.common.api import UserAPI from dagshub.common.api.repo import PathNotFoundError @@ -16,7 +23,7 @@ if TYPE_CHECKING: from dagshub.data_engine.model.datasource import Datasource -AnnotationType = Literal["yolo", "cvat"] +AnnotationType = Literal["yolo", "cvat", "coco", "mot", "cvat_video"] AnnotationLocation = Literal["repo", "disk"] @@ -57,6 +64,10 @@ def __init__( 'Add `yolo_type="bbox"|"segmentation"|pose"` to the arguments.' ) + @property + def is_video_format(self) -> bool: + return self.annotations_type in ("mot", "cvat_video") + def import_annotations(self) -> Mapping[str, Sequence[IRAnnotationBase]]: # Double check that the annotation file exists if self.load_from == "disk": @@ -84,15 +95,66 @@ def import_annotations(self) -> Mapping[str, Sequence[IRAnnotationBase]]: annotation_type=self.additional_args["yolo_type"], meta_file=annotations_file ) elif self.annotations_type == "cvat": - annotation_dict = load_cvat_from_zip(annotations_file) + result = load_cvat_from_zip(annotations_file) + if self._is_video_annotation_dict(result): + annotation_dict = self._flatten_video_annotations(result) + else: + annotation_dict = result + elif self.annotations_type == "coco": + annotation_dict, _ = load_coco_from_file(annotations_file) + elif self.annotations_type == "mot": + mot_kwargs = {} + if "image_width" in self.additional_args: + mot_kwargs["image_width"] = self.additional_args["image_width"] + if "image_height" in self.additional_args: + mot_kwargs["image_height"] = self.additional_args["image_height"] + if annotations_file.suffix == ".zip": + video_anns, _ = load_mot_from_zip(annotations_file, **mot_kwargs) + else: + video_anns, _ = load_mot_from_dir(annotations_file, **mot_kwargs) + annotation_dict = self._flatten_video_annotations(video_anns) + elif self.annotations_type == "cvat_video": + cvat_kwargs = {} + if "image_width" in self.additional_args: + cvat_kwargs["image_width"] = self.additional_args["image_width"] + if "image_height" in self.additional_args: + cvat_kwargs["image_height"] = self.additional_args["image_height"] + if annotations_file.suffix == ".zip": + result = load_cvat_from_zip(annotations_file, **cvat_kwargs) + else: + result = load_cvat_from_xml_file(annotations_file, **cvat_kwargs) + if self._is_video_annotation_dict(result): + annotation_dict = self._flatten_video_annotations(result) + else: + annotation_dict = result + else: + raise ValueError(f"Unsupported annotation type: {self.annotations_type}") return annotation_dict + @staticmethod + def _is_video_annotation_dict(result) -> bool: + """Check if the result from a CVAT loader is video annotations (int keys) vs image annotations (str keys).""" + if not isinstance(result, dict) or len(result) == 0: + return False + first_key = next(iter(result.keys())) + return isinstance(first_key, int) + + def _flatten_video_annotations( + self, + frame_annotations: Dict[int, Sequence[IRAnnotationBase]], + ) -> Dict[str, Sequence[IRAnnotationBase]]: + """Flatten frame-indexed video annotations into a single entry keyed by video name.""" + video_name = self.additional_args.get("video_name", self.annotations_file.stem) + all_anns: List[IRAnnotationBase] = [] + for frame_anns in frame_annotations.values(): + all_anns.extend(frame_anns) + return {video_name: all_anns} + def download_annotations(self, dest_dir: Path): log_message("Downloading annotations from repository") repoApi = self.ds.source.repoApi - if self.annotations_type == "cvat": - # Download just the annotation file + if self.annotations_type in ("cvat", "cvat_video"): repoApi.download(self.annotations_file.as_posix(), dest_dir, keep_source_prefix=True) elif self.annotations_type == "yolo": # Download the dataset .yaml file and the images + annotations @@ -104,6 +166,8 @@ def download_annotations(self, dest_dir: Path): # Download the annotation data assert context.path is not None repoApi.download(self.annotations_file.parent / context.path, dest_dir, keep_source_prefix=True) + elif self.annotations_type in ("coco", "mot"): + repoApi.download(self.annotations_file.as_posix(), dest_dir, keep_source_prefix=True) @staticmethod def determine_load_location(ds: "Datasource", annotations_path: Union[str, Path]) -> AnnotationLocation: @@ -288,6 +352,8 @@ def convert_to_ls_tasks(self, annotations: Mapping[str, Sequence[IRAnnotationBas """ Converts the annotations to Label Studio tasks. """ + if self.is_video_format: + return self._convert_to_ls_video_tasks(annotations) current_user_id = UserAPI.get_current_user(self.ds.source.repoApi.host).user_id tasks = {} for filename, anns in annotations.items(): @@ -296,3 +362,20 @@ def convert_to_ls_tasks(self, annotations: Mapping[str, Sequence[IRAnnotationBas t.add_ir_annotations(anns) tasks[filename] = t.model_dump_json().encode("utf-8") return tasks + + def _convert_to_ls_video_tasks( + self, annotations: Mapping[str, Sequence[IRAnnotationBase]] + ) -> Mapping[str, bytes]: + """ + Converts video annotations to Label Studio video tasks. + """ + tasks = {} + for filename, anns in annotations.items(): + video_anns = [a for a in anns if isinstance(a, IRVideoBBoxAnnotation)] + if not video_anns: + continue + video_path = self.ds.source.raw_path(filename) + ls_tasks = video_ir_to_ls_video_tasks(video_anns, video_path=video_path) + if ls_tasks: + tasks[filename] = ls_tasks[0].model_dump_json().encode("utf-8") + return tasks diff --git a/dagshub/data_engine/annotation/metadata.py b/dagshub/data_engine/annotation/metadata.py index 06f7bc28..0b080e0f 100644 --- a/dagshub/data_engine/annotation/metadata.py +++ b/dagshub/data_engine/annotation/metadata.py @@ -22,6 +22,11 @@ from dagshub.data_engine.model.datapoint import Datapoint +from dagshub_annotation_converter.formats.label_studio.videorectangle import VideoRectangleAnnotation +from dagshub_annotation_converter.formats.label_studio.task import task_lookup as _task_lookup + +_task_lookup["videorectangle"] = VideoRectangleAnnotation + class AnnotationMetaDict(dict): def __init__(self, annotation: "MetadataAnnotations", *args, **kwargs): @@ -271,6 +276,28 @@ def add_image_pose( self.annotations.append(ann) self._update_datapoint() + def add_coco_annotation( + self, + coco_json: str, + ): + """ + Add annotations from a COCO-format JSON string. + + Args: + coco_json: A COCO-format JSON string with ``categories``, ``images``, and ``annotations`` keys. + """ + from dagshub_annotation_converter.converters.coco import load_coco_from_json_string + + grouped, _ = load_coco_from_json_string(coco_json) + new_anns: list[IRAnnotationBase] = [] + for anns in grouped.values(): + for ann in anns: + ann.filename = self.datapoint.path + new_anns.append(ann) + self.annotations.extend(new_anns) + log_message(f"Added {len(new_anns)} COCO annotation(s) to datapoint {self.datapoint.path}") + self._update_datapoint() + def add_yolo_annotation( self, annotation_type: Literal["bbox", "segmentation", "pose"], diff --git a/dagshub/data_engine/model/query_result.py b/dagshub/data_engine/model/query_result.py index b986b5c3..37569953 100644 --- a/dagshub/data_engine/model/query_result.py +++ b/dagshub/data_engine/model/query_result.py @@ -15,10 +15,16 @@ import dacite import dagshub_annotation_converter.converters.yolo import rich.progress +from dagshub_annotation_converter.converters.coco import export_to_coco_file +from dagshub_annotation_converter.converters.cvat import export_cvat_video_to_file +from dagshub_annotation_converter.converters.mot import export_mot_to_dir +from dagshub_annotation_converter.formats.coco import CocoContext +from dagshub_annotation_converter.formats.mot import MOTContext from dagshub_annotation_converter.formats.yolo import YoloContext from dagshub_annotation_converter.formats.yolo.categories import Categories from dagshub_annotation_converter.formats.yolo.common import ir_mapping from dagshub_annotation_converter.ir.image import IRImageAnnotationBase +from dagshub_annotation_converter.ir.video import IRVideoBBoxAnnotation from pydantic import ValidationError from dagshub.auth import get_token @@ -778,6 +784,20 @@ def _get_all_annotations(self, annotation_field: str) -> List[IRImageAnnotationB annotations.extend(dp.metadata[annotation_field].annotations) return annotations + def _get_all_video_annotations(self, annotation_field: str) -> List[IRVideoBBoxAnnotation]: + all_anns = self._get_all_annotations(annotation_field) + return [a for a in all_anns if isinstance(a, IRVideoBBoxAnnotation)] + + def _resolve_annotation_field(self, annotation_field: Optional[str]) -> str: + if annotation_field is not None: + return annotation_field + annotation_fields = sorted([f.name for f in self.fields if f.is_annotation()]) + if len(annotation_fields) == 0: + raise ValueError("No annotation fields found in the datasource") + annotation_field = annotation_fields[0] + log_message(f"Using annotations from field {annotation_field}") + return annotation_field + def export_as_yolo( self, download_dir: Optional[Union[str, Path]] = None, @@ -803,12 +823,7 @@ def export_as_yolo( Returns: The path to the YAML file with the metadata. Pass this path to ``YOLO.train()`` to train a model. """ - if annotation_field is None: - annotation_fields = sorted([f.name for f in self.fields if f.is_annotation()]) - if len(annotation_fields) == 0: - raise ValueError("No annotation fields found in the datasource") - annotation_field = annotation_fields[0] - log_message(f"Using annotations from field {annotation_field}") + annotation_field = self._resolve_annotation_field(annotation_field) if download_dir is None: download_dir = Path("dagshub_export") @@ -861,6 +876,149 @@ def export_as_yolo( log_message(f"Done! Saved YOLO Dataset, YAML file is at {yaml_path.absolute()}") return yaml_path + def export_as_coco( + self, + download_dir: Optional[Union[str, Path]] = None, + annotation_field: Optional[str] = None, + output_filename: str = "annotations.json", + classes: Optional[Dict[int, str]] = None, + ) -> Path: + """ + Downloads the files and exports annotations in COCO format. + + Args: + download_dir: Where to download the files. Defaults to ``./dagshub_export`` + annotation_field: Field with the annotations. If None, uses the first alphabetical annotation field. + output_filename: Name of the output COCO JSON file. Default is ``annotations.json``. + classes: Category mapping for the COCO dataset as ``{id: name}``. + If ``None``, categories will be inferred from the annotations. + + Returns: + Path to the exported COCO JSON file. + """ + annotation_field = self._resolve_annotation_field(annotation_field) + + if download_dir is None: + download_dir = Path("dagshub_export") + download_dir = Path(download_dir) + + annotations = self._get_all_annotations(annotation_field) + if not annotations: + raise RuntimeError("No annotations found to export") + + context = CocoContext() + if classes is not None: + context.categories = dict(classes) + + # Add the source prefix to all annotations + for ann in annotations: + ann.filename = os.path.join(self.datasource.source.source_prefix, ann.filename) + + image_download_path = download_dir / "data" + log_message("Downloading image files...") + self.download_files(image_download_path) + + output_path = download_dir / output_filename + log_message("Exporting COCO annotations...") + result_path = export_to_coco_file(annotations, output_path, context=context) + log_message(f"Done! Saved COCO annotations to {result_path.absolute()}") + return result_path + + def export_as_mot( + self, + download_dir: Optional[Union[str, Path]] = None, + annotation_field: Optional[str] = None, + image_width: Optional[int] = None, + image_height: Optional[int] = None, + ) -> Path: + """ + Exports video annotations in MOT (Multiple Object Tracking) format. + + The output follows the MOT Challenge directory structure:: + + output_dir/ + gt/ + gt.txt + labels.txt + seqinfo.ini + + Args: + download_dir: Where to export. Defaults to ``./dagshub_export`` + annotation_field: Field with the annotations. If None, uses the first alphabetical annotation field. + image_width: Frame width. If None, inferred from annotations. + image_height: Frame height. If None, inferred from annotations. + + Returns: + Path to the exported MOT directory. + """ + annotation_field = self._resolve_annotation_field(annotation_field) + + if download_dir is None: + download_dir = Path("dagshub_export") + download_dir = Path(download_dir) / "mot" + + video_annotations = self._get_all_video_annotations(annotation_field) + if not video_annotations: + raise RuntimeError("No video annotations found to export") + + context = MOTContext() + if image_width is not None: + context.image_width = image_width + elif video_annotations: + context.image_width = video_annotations[0].image_width + if image_height is not None: + context.image_height = image_height + elif video_annotations: + context.image_height = video_annotations[0].image_height + + log_message("Exporting MOT annotations...") + result_path = export_mot_to_dir(video_annotations, context, download_dir) + log_message(f"Done! Saved MOT annotations to {result_path.absolute()}") + return result_path + + def export_as_cvat_video( + self, + download_dir: Optional[Union[str, Path]] = None, + annotation_field: Optional[str] = None, + video_name: str = "video.mp4", + image_width: Optional[int] = None, + image_height: Optional[int] = None, + ) -> Path: + """ + Exports video annotations in CVAT video XML format. + + Args: + download_dir: Where to export. Defaults to ``./dagshub_export`` + annotation_field: Field with the annotations. If None, uses the first alphabetical annotation field. + video_name: Name of the source video to embed in the XML metadata. + image_width: Frame width. If None, inferred from annotations. + image_height: Frame height. If None, inferred from annotations. + + Returns: + Path to the exported CVAT video XML file. + """ + annotation_field = self._resolve_annotation_field(annotation_field) + + if download_dir is None: + download_dir = Path("dagshub_export") + download_dir = Path(download_dir) + + video_annotations = self._get_all_video_annotations(annotation_field) + if not video_annotations: + raise RuntimeError("No video annotations found to export") + + output_path = download_dir / "annotations.xml" + log_message("Exporting CVAT video annotations...") + result_path = export_cvat_video_to_file( + video_annotations, + output_path, + video_name=video_name, + image_width=image_width, + image_height=image_height, + ) + log_message(f"Done! Saved CVAT video annotations to {result_path.absolute()}") + return result_path + def to_voxel51_dataset(self, **kwargs) -> "fo.Dataset": """ Creates a voxel51 dataset that can be used with\ diff --git a/tests/data_engine/annotation_import/test_coco.py b/tests/data_engine/annotation_import/test_coco.py new file mode 100644 index 00000000..38180811 --- /dev/null +++ b/tests/data_engine/annotation_import/test_coco.py @@ -0,0 +1,198 @@ +import datetime +import json +from pathlib import PurePosixPath +from unittest.mock import patch, PropertyMock + +import pytest +from dagshub_annotation_converter.ir.image import ( + IRBBoxImageAnnotation, + IRSegmentationImageAnnotation, + IRSegmentationPoint, + CoordinateStyle, +) + +from dagshub.data_engine.annotation.importer import AnnotationImporter +from dagshub.data_engine.annotation.metadata import MetadataAnnotations +from dagshub.data_engine.client.models import MetadataSelectFieldSchema +from dagshub.data_engine.dtypes import MetadataFieldType, ReservedTags +from dagshub.data_engine.model.datapoint import Datapoint +from dagshub.data_engine.model.query_result import QueryResult + + +@pytest.fixture(autouse=True) +def mock_source_prefix(ds): + with patch.object(type(ds.source), "source_prefix", new_callable=PropertyMock, return_value=PurePosixPath()): + yield + + +# --- COCO import --- + + +def test_import_coco_from_file(ds, tmp_path): + coco_file = tmp_path / "annotations.json" + coco_file.write_text(json.dumps(_make_coco_json())) + + importer = AnnotationImporter(ds, "coco", coco_file, load_from="disk") + result = importer.import_annotations() + + assert "image1.jpg" in result + assert len(result["image1.jpg"]) == 1 + assert isinstance(result["image1.jpg"][0], IRBBoxImageAnnotation) + + +def test_convert_image_to_ls_tasks(ds, tmp_path, mock_dagshub_auth): + importer = AnnotationImporter(ds, "coco", tmp_path / "ann.json", load_from="disk") + bbox = IRBBoxImageAnnotation( + filename="test.jpg", + categories={"cat": 1.0}, + top=0.1, left=0.1, width=0.2, height=0.2, + image_width=640, image_height=480, + coordinate_style=CoordinateStyle.NORMALIZED, + ) + + tasks = importer.convert_to_ls_tasks({"test.jpg": [bbox]}) + + assert "test.jpg" in tasks + task_json = json.loads(tasks["test.jpg"]) + assert "annotations" in task_json + + +# --- add_coco_annotation --- + + +def test_add_coco_annotation(ds, mock_dagshub_auth): + dp = Datapoint(datasource=ds, path="test.jpg", datapoint_id=0, metadata={}) + meta_ann = MetadataAnnotations(datapoint=dp, field="ann") + meta_ann.add_coco_annotation(json.dumps(_make_coco_json())) + + assert len(meta_ann.annotations) == 1 + assert isinstance(meta_ann.annotations[0], IRBBoxImageAnnotation) + assert meta_ann.annotations[0].filename == "test.jpg" + + +def test_add_coco_annotation_segmentation(ds, mock_dagshub_auth): + dp = Datapoint(datasource=ds, path="test.jpg", datapoint_id=0, metadata={}) + coco = { + "categories": [{"id": 1, "name": "dog"}], + "images": [{"id": 1, "width": 640, "height": 480, "file_name": "img.jpg"}], + "annotations": [ + {"id": 1, "image_id": 1, "category_id": 1, "segmentation": [[10, 20, 30, 40, 50, 60]]} + ], + } + meta_ann = MetadataAnnotations(datapoint=dp, field="ann") + meta_ann.add_coco_annotation(json.dumps(coco)) + + assert len(meta_ann.annotations) == 1 + + +# --- _resolve_annotation_field --- + + +def test_resolve_explicit(ds): + qr = _make_qr(ds, [], ann_field="my_ann") + assert qr._resolve_annotation_field("explicit") == "explicit" + + +def test_resolve_auto(ds): + qr = _make_qr(ds, [], ann_field="my_ann") + assert qr._resolve_annotation_field(None) == "my_ann" + + +def test_resolve_no_fields(ds): + qr = _make_qr(ds, [], ann_field=None) + with pytest.raises(ValueError, match="No annotation fields"): + qr._resolve_annotation_field(None) + + +# --- export_as_coco --- + + +def test_export_as_coco_bbox(ds, tmp_path): + dp = Datapoint(datasource=ds, path="images/test.jpg", datapoint_id=0, metadata={}) + ann = IRBBoxImageAnnotation( + filename="images/test.jpg", categories={"cat": 1.0}, + top=20.0, left=10.0, width=30.0, height=40.0, + image_width=640, image_height=480, + coordinate_style=CoordinateStyle.DENORMALIZED, + ) + dp.metadata["ann"] = MetadataAnnotations(datapoint=dp, field="ann", annotations=[ann]) + + qr = _make_qr(ds, [dp], ann_field="ann") + with patch.object(qr, "download_files"): + result = qr.export_as_coco(download_dir=tmp_path, annotation_field="ann") + + assert result.exists() + coco = json.loads(result.read_text()) + assert len(coco["annotations"]) == 1 + assert len(coco["images"]) == 1 + assert coco["annotations"][0]["bbox"] == [10.0, 20.0, 30.0, 40.0] + + +def test_export_as_coco_segmentation(ds, tmp_path): + dp = Datapoint(datasource=ds, path="images/test.jpg", datapoint_id=0, metadata={}) + ann = IRSegmentationImageAnnotation( + filename="images/test.jpg", categories={"dog": 1.0}, + points=[IRSegmentationPoint(x=10, y=20), IRSegmentationPoint(x=30, y=40), IRSegmentationPoint(x=50, y=60)], + image_width=640, image_height=480, + coordinate_style=CoordinateStyle.DENORMALIZED, + ) + dp.metadata["ann"] = MetadataAnnotations(datapoint=dp, field="ann", annotations=[ann]) + + qr = _make_qr(ds, [dp], ann_field="ann") + with patch.object(qr, "download_files"): + result = qr.export_as_coco(download_dir=tmp_path, annotation_field="ann") + + coco = json.loads(result.read_text()) + assert len(coco["annotations"]) == 1 + assert "segmentation" in coco["annotations"][0] + + +def test_export_as_coco_no_annotations(ds, tmp_path): + dp = Datapoint(datasource=ds, path="test.jpg", datapoint_id=0, metadata={}) + dp.metadata["ann"] = MetadataAnnotations(datapoint=dp, field="ann", annotations=[]) + + qr = _make_qr(ds, [dp], ann_field="ann") + with pytest.raises(RuntimeError, match="No annotations found"): + qr.export_as_coco(download_dir=tmp_path, annotation_field="ann") + + +def test_export_as_coco_with_classes(ds, tmp_path): + dp = Datapoint(datasource=ds, path="images/test.jpg", datapoint_id=0, metadata={}) + ann = IRBBoxImageAnnotation( + filename="images/test.jpg", categories={"cat": 1.0}, + top=20.0, left=10.0, width=30.0, height=40.0, + image_width=640, image_height=480, + coordinate_style=CoordinateStyle.DENORMALIZED, + ) + dp.metadata["ann"] = MetadataAnnotations(datapoint=dp, field="ann", annotations=[ann]) + + qr = _make_qr(ds, [dp], ann_field="ann") + with patch.object(qr, "download_files"): + result = qr.export_as_coco(download_dir=tmp_path, annotation_field="ann", classes={1: "cat", 2: "dog"}) + + coco = json.loads(result.read_text()) + cat_names = {c["name"] for c in coco["categories"]} + assert "cat" in cat_names + + +# --- Helpers --- + + +def _make_coco_json(): + return { + "categories": [{"id": 1, "name": "cat"}], + "images": [{"id": 1, "width": 640, "height": 480, "file_name": "image1.jpg"}], + "annotations": [{"id": 1, "image_id": 1, "category_id": 1, "bbox": [10, 20, 30, 40]}], + } + + +def _make_qr(ds, datapoints, ann_field=None): + fields = [] + if ann_field: + fields.append(MetadataSelectFieldSchema( + asOf=int(datetime.datetime.now().timestamp()), + autoGenerated=False, originalName=ann_field, + multiple=False, valueType=MetadataFieldType.BLOB, + name=ann_field, tags={ReservedTags.ANNOTATION.value}, + )) + return QueryResult(datasource=ds, _entries=datapoints, fields=fields) diff --git a/tests/data_engine/annotation_import/test_cvat_video.py b/tests/data_engine/annotation_import/test_cvat_video.py new file mode 100644 index 00000000..8a0dac69 --- /dev/null +++ b/tests/data_engine/annotation_import/test_cvat_video.py @@ -0,0 +1,156 @@ +import datetime +import json +from pathlib import PurePosixPath +from unittest.mock import patch, PropertyMock + +import pytest +from dagshub_annotation_converter.converters.cvat import export_cvat_video_to_xml_string +from dagshub_annotation_converter.ir.image import IRBBoxImageAnnotation, CoordinateStyle +from dagshub_annotation_converter.ir.video import IRVideoBBoxAnnotation + +from dagshub.data_engine.annotation.importer import AnnotationImporter +from dagshub.data_engine.annotation.metadata import MetadataAnnotations +from dagshub.data_engine.client.models import MetadataSelectFieldSchema +from dagshub.data_engine.dtypes import MetadataFieldType, ReservedTags +from dagshub.data_engine.model.datapoint import Datapoint +from dagshub.data_engine.model.query_result import QueryResult + + +@pytest.fixture(autouse=True) +def mock_source_prefix(ds): + with patch.object(type(ds.source), "source_prefix", new_callable=PropertyMock, return_value=PurePosixPath()): + yield + + +# --- CVAT video import --- + + +def test_import_cvat_video_from_xml(ds, tmp_path): + xml_file = tmp_path / "annotations.xml" + xml_file.write_bytes(_make_cvat_video_xml()) + + importer = AnnotationImporter(ds, "cvat_video", xml_file, load_from="disk") + result = importer.import_annotations() + + assert len(result) == 1 + anns = list(result.values())[0] + assert len(anns) == 2 + assert all(isinstance(a, IRVideoBBoxAnnotation) for a in anns) + + +# --- _get_all_video_annotations --- + + +def test_get_all_video_annotations_filters(ds): + image_ann = IRBBoxImageAnnotation( + filename="test.jpg", categories={"cat": 1.0}, + top=0.1, left=0.1, width=0.2, height=0.2, + image_width=640, image_height=480, + coordinate_style=CoordinateStyle.NORMALIZED, + ) + video_ann = _make_video_bbox() + + dp = Datapoint(datasource=ds, path="dp_0", datapoint_id=0, metadata={}) + dp.metadata["ann"] = MetadataAnnotations(datapoint=dp, field="ann", annotations=[image_ann, video_ann]) + + qr = _make_qr(ds, [dp], ann_field="ann") + result = qr._get_all_video_annotations("ann") + assert len(result) == 1 + assert isinstance(result[0], IRVideoBBoxAnnotation) + + +def test_get_all_video_annotations_empty(ds): + dp = Datapoint(datasource=ds, path="dp_0", datapoint_id=0, metadata={}) + dp.metadata["ann"] = MetadataAnnotations(datapoint=dp, field="ann", annotations=[]) + + qr = _make_qr(ds, [dp], ann_field="ann") + assert qr._get_all_video_annotations("ann") == [] + + +# --- videorectangle LS round-trip --- + + +def test_videorectangle_ls_roundtrip(): + from dagshub_annotation_converter.converters.label_studio_video import ( + video_ir_to_ls_video_tasks, + ls_video_json_to_video_ir, + ) + + anns = [_make_video_bbox(frame=0, track_id=1), _make_video_bbox(frame=5, track_id=1)] + tasks = video_ir_to_ls_video_tasks(anns) + assert len(tasks) == 1 + + recovered = ls_video_json_to_video_ir(tasks[0].model_dump_json()) + assert len(recovered) == 2 + assert recovered[0].frame_number == 0 + assert recovered[1].frame_number == 5 + + +# --- export_as_cvat_video --- + + +def test_export_as_cvat_video(ds, tmp_path): + dp = Datapoint(datasource=ds, path="video.mp4", datapoint_id=0, metadata={}) + anns = [_make_video_bbox(frame=0, track_id=0), _make_video_bbox(frame=5, track_id=0)] + dp.metadata["ann"] = MetadataAnnotations(datapoint=dp, field="ann", annotations=anns) + + qr = _make_qr(ds, [dp], ann_field="ann") + result = qr.export_as_cvat_video(download_dir=tmp_path, annotation_field="ann") + + assert result.exists() + content = result.read_text() + assert " IRVideoBBoxAnnotation: + return IRVideoBBoxAnnotation( + track_id=track_id, frame_number=frame, + left=100.0, top=150.0, width=50.0, height=80.0, + image_width=1920, image_height=1080, + categories={"person": 1.0}, + coordinate_style=CoordinateStyle.DENORMALIZED, + ) + + +def _make_cvat_video_xml() -> bytes: + anns = [_make_video_bbox(frame=0, track_id=0), _make_video_bbox(frame=5, track_id=0)] + return export_cvat_video_to_xml_string(anns) + + +def _make_qr(ds, datapoints, ann_field=None): + fields = [] + if ann_field: + fields.append(MetadataSelectFieldSchema( + asOf=int(datetime.datetime.now().timestamp()), + autoGenerated=False, originalName=ann_field, + multiple=False, valueType=MetadataFieldType.BLOB, + name=ann_field, tags={ReservedTags.ANNOTATION.value}, + )) + return QueryResult(datasource=ds, _entries=datapoints, fields=fields) diff --git a/tests/data_engine/annotation_import/test_mot.py b/tests/data_engine/annotation_import/test_mot.py new file mode 100644 index 00000000..93e3132d --- /dev/null +++ b/tests/data_engine/annotation_import/test_mot.py @@ -0,0 +1,210 @@ +import configparser +import datetime +import json +import zipfile +from pathlib import Path, PurePosixPath +from unittest.mock import patch, PropertyMock + +import pytest +from dagshub_annotation_converter.ir.image import CoordinateStyle +from dagshub_annotation_converter.ir.video import IRVideoBBoxAnnotation + +from dagshub.data_engine.annotation.importer import AnnotationImporter +from dagshub.data_engine.annotation.metadata import MetadataAnnotations +from dagshub.data_engine.client.models import MetadataSelectFieldSchema +from dagshub.data_engine.dtypes import MetadataFieldType, ReservedTags +from dagshub.data_engine.model.datapoint import Datapoint +from dagshub.data_engine.model.query_result import QueryResult + + +@pytest.fixture(autouse=True) +def mock_source_prefix(ds): + with patch.object(type(ds.source), "source_prefix", new_callable=PropertyMock, return_value=PurePosixPath()): + yield + + +# --- _is_video_annotation_dict --- + + +def test_is_video_dict_with_int_keys(): + assert AnnotationImporter._is_video_annotation_dict({0: [], 1: []}) is True + + +def test_is_video_dict_with_str_keys(): + assert AnnotationImporter._is_video_annotation_dict({"file.jpg": []}) is False + + +def test_is_video_dict_empty(): + assert AnnotationImporter._is_video_annotation_dict({}) is False + + +def test_is_video_dict_non_dict(): + assert AnnotationImporter._is_video_annotation_dict([]) is False + + +# --- is_video_format --- + + +@pytest.mark.parametrize( + "ann_type, expected", + [ + ("yolo", False), + ("cvat", False), + ("coco", False), + ("mot", True), + ("cvat_video", True), + ], +) +def test_is_video_format(ds, ann_type, expected, tmp_path): + kwargs = {} + if ann_type == "yolo": + kwargs["yolo_type"] = "bbox" + importer = AnnotationImporter(ds, ann_type, tmp_path / "dummy", load_from="disk", **kwargs) + assert importer.is_video_format is expected + + +# --- _flatten_video_annotations --- + + +def test_flatten_video_annotations(ds, tmp_path): + importer = AnnotationImporter(ds, "mot", tmp_path / "test_video", load_from="disk") + ann = _make_video_bbox(frame=0) + result = importer._flatten_video_annotations({0: [ann], 5: [ann]}) + assert "test_video" in result + assert len(result["test_video"]) == 2 + + +def test_flatten_video_annotations_custom_name(ds, tmp_path): + importer = AnnotationImporter(ds, "mot", tmp_path / "test_video", load_from="disk", video_name="my_video.mp4") + result = importer._flatten_video_annotations({0: [_make_video_bbox()]}) + assert "my_video.mp4" in result + + +# --- convert_to_ls_tasks for video --- + + +def test_convert_video_to_ls_tasks(ds, tmp_path): + importer = AnnotationImporter(ds, "mot", tmp_path / "video", load_from="disk") + video_anns = {"video.mp4": [_make_video_bbox(frame=0), _make_video_bbox(frame=1)]} + + tasks = importer.convert_to_ls_tasks(video_anns) + + assert "video.mp4" in tasks + task_json = json.loads(tasks["video.mp4"]) + assert "annotations" in task_json + + +# --- MOT import --- + + +def test_import_mot_from_dir(ds, tmp_path): + mot_dir = tmp_path / "mot_seq" + _create_mot_dir(mot_dir) + + importer = AnnotationImporter(ds, "mot", mot_dir, load_from="disk") + result = importer.import_annotations() + + assert len(result) == 1 + anns = list(result.values())[0] + assert len(anns) == 2 + assert all(isinstance(a, IRVideoBBoxAnnotation) for a in anns) + + +def test_import_mot_from_zip(ds, tmp_path): + mot_dir = tmp_path / "mot_seq" + _create_mot_dir(mot_dir) + + zip_path = tmp_path / "mot.zip" + with zipfile.ZipFile(zip_path, "w") as z: + z.write(mot_dir / "gt" / "gt.txt", "gt/gt.txt") + z.write(mot_dir / "gt" / "labels.txt", "gt/labels.txt") + z.write(mot_dir / "seqinfo.ini", "seqinfo.ini") + + importer = AnnotationImporter(ds, "mot", zip_path, load_from="disk") + result = importer.import_annotations() + + assert len(result) == 1 + anns = list(result.values())[0] + assert len(anns) == 2 + + +# --- export_as_mot --- + + +def test_export_as_mot(ds, tmp_path): + dp = Datapoint(datasource=ds, path="video.mp4", datapoint_id=0, metadata={}) + anns = [_make_video_bbox(frame=0, track_id=1), _make_video_bbox(frame=1, track_id=1)] + dp.metadata["ann"] = MetadataAnnotations(datapoint=dp, field="ann", annotations=anns) + + qr = _make_qr(ds, [dp], ann_field="ann") + result = qr.export_as_mot(download_dir=tmp_path, annotation_field="ann") + + assert result.exists() + assert (result / "gt" / "gt.txt").exists() + assert (result / "gt" / "labels.txt").exists() + assert (result / "seqinfo.ini").exists() + gt_lines = (result / "gt" / "gt.txt").read_text().strip().splitlines() + assert len(gt_lines) == 2 + + +def test_export_as_mot_no_annotations(ds, tmp_path): + dp = Datapoint(datasource=ds, path="video.mp4", datapoint_id=0, metadata={}) + dp.metadata["ann"] = MetadataAnnotations(datapoint=dp, field="ann", annotations=[]) + + qr = _make_qr(ds, [dp], ann_field="ann") + with pytest.raises(RuntimeError, match="No video annotations"): + qr.export_as_mot(download_dir=tmp_path, annotation_field="ann") + + +def test_export_as_mot_explicit_dimensions(ds, tmp_path): + dp = Datapoint(datasource=ds, path="video.mp4", datapoint_id=0, metadata={}) + anns = [_make_video_bbox(frame=0)] + dp.metadata["ann"] = MetadataAnnotations(datapoint=dp, field="ann", annotations=anns) + + qr = _make_qr(ds, [dp], ann_field="ann") + result = qr.export_as_mot( + download_dir=tmp_path, annotation_field="ann", image_width=1280, image_height=720 + ) + + seqinfo = (result / "seqinfo.ini").read_text() + assert "1280" in seqinfo + assert "720" in seqinfo + + +# --- Helpers --- + + +def _make_video_bbox(frame=0, track_id=0) -> IRVideoBBoxAnnotation: + return IRVideoBBoxAnnotation( + track_id=track_id, frame_number=frame, + left=100.0, top=150.0, width=50.0, height=80.0, + image_width=1920, image_height=1080, + categories={"person": 1.0}, + coordinate_style=CoordinateStyle.DENORMALIZED, + ) + + +def _create_mot_dir(mot_dir: Path): + gt_dir = mot_dir / "gt" + gt_dir.mkdir(parents=True) + (gt_dir / "gt.txt").write_text("1,1,100,150,50,80,1,1,1.0\n2,1,110,160,50,80,1,1,0.9\n") + (gt_dir / "labels.txt").write_text("person\n") + config = configparser.ConfigParser() + config["Sequence"] = { + "name": "test", "frameRate": "30", "seqLength": "100", + "imWidth": "1920", "imHeight": "1080", + } + with open(mot_dir / "seqinfo.ini", "w") as f: + config.write(f) + + +def _make_qr(ds, datapoints, ann_field=None): + fields = [] + if ann_field: + fields.append(MetadataSelectFieldSchema( + asOf=int(datetime.datetime.now().timestamp()), + autoGenerated=False, originalName=ann_field, + multiple=False, valueType=MetadataFieldType.BLOB, + name=ann_field, tags={ReservedTags.ANNOTATION.value}, + )) + return QueryResult(datasource=ds, _entries=datapoints, fields=fields) From 01fc310ccf7e933ee632e73de0d8bf357589862c Mon Sep 17 00:00:00 2001 From: Dean Date: Mon, 9 Feb 2026 17:49:22 +0200 Subject: [PATCH 11/21] finished tests for new converter functionality. Need to do manual QA before opening a PR --- dagshub/data_engine/model/query_result.py | 4 +- .../annotation_import/test_coco.py | 152 ++++++++++-------- .../annotation_import/test_cvat_video.py | 99 +++++++----- .../data_engine/annotation_import/test_mot.py | 134 ++++++++------- 4 files changed, 231 insertions(+), 158 deletions(-) diff --git a/dagshub/data_engine/model/query_result.py b/dagshub/data_engine/model/query_result.py index 37569953..2a2a0d4b 100644 --- a/dagshub/data_engine/model/query_result.py +++ b/dagshub/data_engine/model/query_result.py @@ -964,11 +964,11 @@ def export_as_mot( context = MOTContext() if image_width is not None: context.image_width = image_width - elif video_annotations: + else: context.image_width = video_annotations[0].image_width if image_height is not None: context.image_height = image_height - elif video_annotations: + else: context.image_height = video_annotations[0].image_height log_message("Exporting MOT annotations...") diff --git a/tests/data_engine/annotation_import/test_coco.py b/tests/data_engine/annotation_import/test_coco.py index 38180811..9b238fd1 100644 --- a/tests/data_engine/annotation_import/test_coco.py +++ b/tests/data_engine/annotation_import/test_coco.py @@ -6,12 +6,10 @@ import pytest from dagshub_annotation_converter.ir.image import ( IRBBoxImageAnnotation, - IRSegmentationImageAnnotation, - IRSegmentationPoint, CoordinateStyle, ) -from dagshub.data_engine.annotation.importer import AnnotationImporter +from dagshub.data_engine.annotation.importer import AnnotationImporter, AnnotationsNotFoundError from dagshub.data_engine.annotation.metadata import MetadataAnnotations from dagshub.data_engine.client.models import MetadataSelectFieldSchema from dagshub.data_engine.dtypes import MetadataFieldType, ReservedTags @@ -25,14 +23,12 @@ def mock_source_prefix(ds): yield -# --- COCO import --- +# --- import --- def test_import_coco_from_file(ds, tmp_path): - coco_file = tmp_path / "annotations.json" - coco_file.write_text(json.dumps(_make_coco_json())) - - importer = AnnotationImporter(ds, "coco", coco_file, load_from="disk") + _write_coco(tmp_path, _make_coco_json()) + importer = AnnotationImporter(ds, "coco", tmp_path / "annotations.json", load_from="disk") result = importer.import_annotations() assert "image1.jpg" in result @@ -40,74 +36,77 @@ def test_import_coco_from_file(ds, tmp_path): assert isinstance(result["image1.jpg"][0], IRBBoxImageAnnotation) -def test_convert_image_to_ls_tasks(ds, tmp_path, mock_dagshub_auth): +def test_import_coco_nonexistent_raises(ds, tmp_path): + importer = AnnotationImporter(ds, "coco", tmp_path / "nope.json", load_from="disk") + with pytest.raises(AnnotationsNotFoundError): + importer.import_annotations() + + +def test_coco_convert_to_ls_tasks(ds, tmp_path, mock_dagshub_auth): importer = AnnotationImporter(ds, "coco", tmp_path / "ann.json", load_from="disk") bbox = IRBBoxImageAnnotation( - filename="test.jpg", - categories={"cat": 1.0}, + filename="test.jpg", categories={"cat": 1.0}, top=0.1, left=0.1, width=0.2, height=0.2, image_width=640, image_height=480, coordinate_style=CoordinateStyle.NORMALIZED, ) - tasks = importer.convert_to_ls_tasks({"test.jpg": [bbox]}) assert "test.jpg" in tasks task_json = json.loads(tasks["test.jpg"]) assert "annotations" in task_json + assert len(task_json["annotations"]) > 0 # --- add_coco_annotation --- -def test_add_coco_annotation(ds, mock_dagshub_auth): - dp = Datapoint(datasource=ds, path="test.jpg", datapoint_id=0, metadata={}) +def test_add_coco_annotation_rewrites_filename(ds, mock_dagshub_auth): + dp = Datapoint(datasource=ds, path="my_images/photo.jpg", datapoint_id=0, metadata={}) meta_ann = MetadataAnnotations(datapoint=dp, field="ann") meta_ann.add_coco_annotation(json.dumps(_make_coco_json())) assert len(meta_ann.annotations) == 1 assert isinstance(meta_ann.annotations[0], IRBBoxImageAnnotation) - assert meta_ann.annotations[0].filename == "test.jpg" - - -def test_add_coco_annotation_segmentation(ds, mock_dagshub_auth): - dp = Datapoint(datasource=ds, path="test.jpg", datapoint_id=0, metadata={}) - coco = { - "categories": [{"id": 1, "name": "dog"}], - "images": [{"id": 1, "width": 640, "height": 480, "file_name": "img.jpg"}], - "annotations": [ - {"id": 1, "image_id": 1, "category_id": 1, "segmentation": [[10, 20, 30, 40, 50, 60]]} - ], - } - meta_ann = MetadataAnnotations(datapoint=dp, field="ann") - meta_ann.add_coco_annotation(json.dumps(coco)) - - assert len(meta_ann.annotations) == 1 + assert meta_ann.annotations[0].filename == "my_images/photo.jpg" # --- _resolve_annotation_field --- -def test_resolve_explicit(ds): +def test_resolve_explicit_field(ds): qr = _make_qr(ds, [], ann_field="my_ann") assert qr._resolve_annotation_field("explicit") == "explicit" -def test_resolve_auto(ds): +def test_resolve_auto_field(ds): qr = _make_qr(ds, [], ann_field="my_ann") assert qr._resolve_annotation_field(None) == "my_ann" -def test_resolve_no_fields(ds): +def test_resolve_no_fields_raises(ds): qr = _make_qr(ds, [], ann_field=None) with pytest.raises(ValueError, match="No annotation fields"): qr._resolve_annotation_field(None) +def test_resolve_picks_alphabetically_first(ds): + fields = [] + for name in ["zebra_ann", "alpha_ann"]: + fields.append(MetadataSelectFieldSchema( + asOf=int(datetime.datetime.now().timestamp()), + autoGenerated=False, originalName=name, + multiple=False, valueType=MetadataFieldType.BLOB, + name=name, tags={ReservedTags.ANNOTATION.value}, + )) + qr = QueryResult(datasource=ds, _entries=[], fields=fields) + assert qr._resolve_annotation_field(None) == "alpha_ann" + + # --- export_as_coco --- -def test_export_as_coco_bbox(ds, tmp_path): +def test_export_coco_bbox_coordinates(ds, tmp_path): dp = Datapoint(datasource=ds, path="images/test.jpg", datapoint_id=0, metadata={}) ann = IRBBoxImageAnnotation( filename="images/test.jpg", categories={"cat": 1.0}, @@ -121,61 +120,69 @@ def test_export_as_coco_bbox(ds, tmp_path): with patch.object(qr, "download_files"): result = qr.export_as_coco(download_dir=tmp_path, annotation_field="ann") - assert result.exists() coco = json.loads(result.read_text()) - assert len(coco["annotations"]) == 1 - assert len(coco["images"]) == 1 assert coco["annotations"][0]["bbox"] == [10.0, 20.0, 30.0, 40.0] -def test_export_as_coco_segmentation(ds, tmp_path): +def test_export_coco_no_annotations_raises(ds, tmp_path): + dp = Datapoint(datasource=ds, path="test.jpg", datapoint_id=0, metadata={}) + dp.metadata["ann"] = MetadataAnnotations(datapoint=dp, field="ann", annotations=[]) + + qr = _make_qr(ds, [dp], ann_field="ann") + with pytest.raises(RuntimeError, match="No annotations found"): + qr.export_as_coco(download_dir=tmp_path, annotation_field="ann") + + +def test_export_coco_explicit_classes(ds, tmp_path): dp = Datapoint(datasource=ds, path="images/test.jpg", datapoint_id=0, metadata={}) - ann = IRSegmentationImageAnnotation( - filename="images/test.jpg", categories={"dog": 1.0}, - points=[IRSegmentationPoint(x=10, y=20), IRSegmentationPoint(x=30, y=40), IRSegmentationPoint(x=50, y=60)], - image_width=640, image_height=480, - coordinate_style=CoordinateStyle.DENORMALIZED, + dp.metadata["ann"] = MetadataAnnotations( + datapoint=dp, field="ann", annotations=[_make_image_bbox("images/test.jpg")] ) - dp.metadata["ann"] = MetadataAnnotations(datapoint=dp, field="ann", annotations=[ann]) qr = _make_qr(ds, [dp], ann_field="ann") with patch.object(qr, "download_files"): - result = qr.export_as_coco(download_dir=tmp_path, annotation_field="ann") + result = qr.export_as_coco( + download_dir=tmp_path, annotation_field="ann", classes={1: "cat", 2: "dog"} + ) coco = json.loads(result.read_text()) - assert len(coco["annotations"]) == 1 - assert "segmentation" in coco["annotations"][0] + assert "cat" in {c["name"] for c in coco["categories"]} -def test_export_as_coco_no_annotations(ds, tmp_path): - dp = Datapoint(datasource=ds, path="test.jpg", datapoint_id=0, metadata={}) - dp.metadata["ann"] = MetadataAnnotations(datapoint=dp, field="ann", annotations=[]) +def test_export_coco_custom_filename(ds, tmp_path): + dp = Datapoint(datasource=ds, path="images/test.jpg", datapoint_id=0, metadata={}) + dp.metadata["ann"] = MetadataAnnotations( + datapoint=dp, field="ann", annotations=[_make_image_bbox("images/test.jpg")] + ) qr = _make_qr(ds, [dp], ann_field="ann") - with pytest.raises(RuntimeError, match="No annotations found"): - qr.export_as_coco(download_dir=tmp_path, annotation_field="ann") + with patch.object(qr, "download_files"): + result = qr.export_as_coco( + download_dir=tmp_path, annotation_field="ann", output_filename="custom.json" + ) + assert result.name == "custom.json" -def test_export_as_coco_with_classes(ds, tmp_path): - dp = Datapoint(datasource=ds, path="images/test.jpg", datapoint_id=0, metadata={}) - ann = IRBBoxImageAnnotation( - filename="images/test.jpg", categories={"cat": 1.0}, - top=20.0, left=10.0, width=30.0, height=40.0, - image_width=640, image_height=480, - coordinate_style=CoordinateStyle.DENORMALIZED, - ) - dp.metadata["ann"] = MetadataAnnotations(datapoint=dp, field="ann", annotations=[ann]) - qr = _make_qr(ds, [dp], ann_field="ann") +def test_export_coco_multiple_datapoints(ds, tmp_path): + dps = [] + for i, name in enumerate(["a.jpg", "b.jpg"]): + dp = Datapoint(datasource=ds, path=name, datapoint_id=i, metadata={}) + dp.metadata["ann"] = MetadataAnnotations( + datapoint=dp, field="ann", annotations=[_make_image_bbox(name)] + ) + dps.append(dp) + + qr = _make_qr(ds, dps, ann_field="ann") with patch.object(qr, "download_files"): - result = qr.export_as_coco(download_dir=tmp_path, annotation_field="ann", classes={1: "cat", 2: "dog"}) + result = qr.export_as_coco(download_dir=tmp_path, annotation_field="ann") coco = json.loads(result.read_text()) - cat_names = {c["name"] for c in coco["categories"]} - assert "cat" in cat_names + assert len(coco["annotations"]) == 2 + assert len(coco["images"]) == 2 -# --- Helpers --- +# --- helpers --- def _make_coco_json(): @@ -186,6 +193,19 @@ def _make_coco_json(): } +def _write_coco(tmp_path, coco): + (tmp_path / "annotations.json").write_text(json.dumps(coco)) + + +def _make_image_bbox(filename="test.jpg") -> IRBBoxImageAnnotation: + return IRBBoxImageAnnotation( + filename=filename, categories={"cat": 1.0}, + top=20.0, left=10.0, width=30.0, height=40.0, + image_width=640, image_height=480, + coordinate_style=CoordinateStyle.DENORMALIZED, + ) + + def _make_qr(ds, datapoints, ann_field=None): fields = [] if ann_field: diff --git a/tests/data_engine/annotation_import/test_cvat_video.py b/tests/data_engine/annotation_import/test_cvat_video.py index 8a0dac69..0abdc841 100644 --- a/tests/data_engine/annotation_import/test_cvat_video.py +++ b/tests/data_engine/annotation_import/test_cvat_video.py @@ -1,5 +1,4 @@ import datetime -import json from pathlib import PurePosixPath from unittest.mock import patch, PropertyMock @@ -22,10 +21,10 @@ def mock_source_prefix(ds): yield -# --- CVAT video import --- +# --- import --- -def test_import_cvat_video_from_xml(ds, tmp_path): +def test_import_cvat_video(ds, tmp_path): xml_file = tmp_path / "annotations.xml" xml_file.write_bytes(_make_cvat_video_xml()) @@ -41,7 +40,7 @@ def test_import_cvat_video_from_xml(ds, tmp_path): # --- _get_all_video_annotations --- -def test_get_all_video_annotations_filters(ds): +def test_get_all_video_filters(ds): image_ann = IRBBoxImageAnnotation( filename="test.jpg", categories={"cat": 1.0}, top=0.1, left=0.1, width=0.2, height=0.2, @@ -51,7 +50,9 @@ def test_get_all_video_annotations_filters(ds): video_ann = _make_video_bbox() dp = Datapoint(datasource=ds, path="dp_0", datapoint_id=0, metadata={}) - dp.metadata["ann"] = MetadataAnnotations(datapoint=dp, field="ann", annotations=[image_ann, video_ann]) + dp.metadata["ann"] = MetadataAnnotations( + datapoint=dp, field="ann", annotations=[image_ann, video_ann] + ) qr = _make_qr(ds, [dp], ann_field="ann") result = qr._get_all_video_annotations("ann") @@ -59,7 +60,7 @@ def test_get_all_video_annotations_filters(ds): assert isinstance(result[0], IRVideoBBoxAnnotation) -def test_get_all_video_annotations_empty(ds): +def test_get_all_video_empty(ds): dp = Datapoint(datasource=ds, path="dp_0", datapoint_id=0, metadata={}) dp.metadata["ann"] = MetadataAnnotations(datapoint=dp, field="ann", annotations=[]) @@ -67,34 +68,24 @@ def test_get_all_video_annotations_empty(ds): assert qr._get_all_video_annotations("ann") == [] -# --- videorectangle LS round-trip --- - - -def test_videorectangle_ls_roundtrip(): - from dagshub_annotation_converter.converters.label_studio_video import ( - video_ir_to_ls_video_tasks, - ls_video_json_to_video_ir, - ) - - anns = [_make_video_bbox(frame=0, track_id=1), _make_video_bbox(frame=5, track_id=1)] - tasks = video_ir_to_ls_video_tasks(anns) - assert len(tasks) == 1 +def test_get_all_video_aggregates_across_datapoints(ds): + dps = [] + for i in range(3): + dp = Datapoint(datasource=ds, path=f"dp_{i}", datapoint_id=i, metadata={}) + dp.metadata["ann"] = MetadataAnnotations( + datapoint=dp, field="ann", annotations=[_make_video_bbox(frame=i)] + ) + dps.append(dp) - recovered = ls_video_json_to_video_ir(tasks[0].model_dump_json()) - assert len(recovered) == 2 - assert recovered[0].frame_number == 0 - assert recovered[1].frame_number == 5 + qr = _make_qr(ds, dps, ann_field="ann") + assert len(qr._get_all_video_annotations("ann")) == 3 # --- export_as_cvat_video --- -def test_export_as_cvat_video(ds, tmp_path): - dp = Datapoint(datasource=ds, path="video.mp4", datapoint_id=0, metadata={}) - anns = [_make_video_bbox(frame=0, track_id=0), _make_video_bbox(frame=5, track_id=0)] - dp.metadata["ann"] = MetadataAnnotations(datapoint=dp, field="ann", annotations=anns) - - qr = _make_qr(ds, [dp], ann_field="ann") +def test_export_cvat_video_xml(ds, tmp_path): + qr, _ = _make_video_qr(ds) result = qr.export_as_cvat_video(download_dir=tmp_path, annotation_field="ann") assert result.exists() @@ -103,7 +94,7 @@ def test_export_as_cvat_video(ds, tmp_path): assert "= 2 + + +# --- helpers --- def _make_video_bbox(frame=0, track_id=0) -> IRVideoBBoxAnnotation: @@ -144,6 +163,14 @@ def _make_cvat_video_xml() -> bytes: return export_cvat_video_to_xml_string(anns) +def _make_video_qr(ds): + dp = Datapoint(datasource=ds, path="video.mp4", datapoint_id=0, metadata={}) + anns = [_make_video_bbox(frame=0, track_id=0), _make_video_bbox(frame=5, track_id=0)] + dp.metadata["ann"] = MetadataAnnotations(datapoint=dp, field="ann", annotations=anns) + qr = _make_qr(ds, [dp], ann_field="ann") + return qr, dp + + def _make_qr(ds, datapoints, ann_field=None): fields = [] if ann_field: diff --git a/tests/data_engine/annotation_import/test_mot.py b/tests/data_engine/annotation_import/test_mot.py index 93e3132d..ccefc86f 100644 --- a/tests/data_engine/annotation_import/test_mot.py +++ b/tests/data_engine/annotation_import/test_mot.py @@ -9,7 +9,7 @@ from dagshub_annotation_converter.ir.image import CoordinateStyle from dagshub_annotation_converter.ir.video import IRVideoBBoxAnnotation -from dagshub.data_engine.annotation.importer import AnnotationImporter +from dagshub.data_engine.annotation.importer import AnnotationImporter, AnnotationsNotFoundError from dagshub.data_engine.annotation.metadata import MetadataAnnotations from dagshub.data_engine.client.models import MetadataSelectFieldSchema from dagshub.data_engine.dtypes import MetadataFieldType, ReservedTags @@ -26,11 +26,11 @@ def mock_source_prefix(ds): # --- _is_video_annotation_dict --- -def test_is_video_dict_with_int_keys(): +def test_is_video_dict_int_keys(): assert AnnotationImporter._is_video_annotation_dict({0: [], 1: []}) is True -def test_is_video_dict_with_str_keys(): +def test_is_video_dict_str_keys(): assert AnnotationImporter._is_video_annotation_dict({"file.jpg": []}) is False @@ -42,6 +42,10 @@ def test_is_video_dict_non_dict(): assert AnnotationImporter._is_video_annotation_dict([]) is False +def test_is_video_dict_mixed_first_int(): + assert AnnotationImporter._is_video_annotation_dict({0: [], "a": []}) is True + + # --- is_video_format --- @@ -66,35 +70,31 @@ def test_is_video_format(ds, ann_type, expected, tmp_path): # --- _flatten_video_annotations --- -def test_flatten_video_annotations(ds, tmp_path): +def test_flatten_merges_frames(ds, tmp_path): importer = AnnotationImporter(ds, "mot", tmp_path / "test_video", load_from="disk") - ann = _make_video_bbox(frame=0) - result = importer._flatten_video_annotations({0: [ann], 5: [ann]}) + result = importer._flatten_video_annotations({ + 0: [_make_video_bbox(frame=0)], + 5: [_make_video_bbox(frame=5)], + }) assert "test_video" in result assert len(result["test_video"]) == 2 -def test_flatten_video_annotations_custom_name(ds, tmp_path): - importer = AnnotationImporter(ds, "mot", tmp_path / "test_video", load_from="disk", video_name="my_video.mp4") +def test_flatten_defaults_to_file_stem(ds, tmp_path): + importer = AnnotationImporter(ds, "mot", tmp_path / "my_sequence", load_from="disk") result = importer._flatten_video_annotations({0: [_make_video_bbox()]}) - assert "my_video.mp4" in result - - -# --- convert_to_ls_tasks for video --- - + assert "my_sequence" in result -def test_convert_video_to_ls_tasks(ds, tmp_path): - importer = AnnotationImporter(ds, "mot", tmp_path / "video", load_from="disk") - video_anns = {"video.mp4": [_make_video_bbox(frame=0), _make_video_bbox(frame=1)]} - - tasks = importer.convert_to_ls_tasks(video_anns) - assert "video.mp4" in tasks - task_json = json.loads(tasks["video.mp4"]) - assert "annotations" in task_json +def test_flatten_video_name_override(ds, tmp_path): + importer = AnnotationImporter( + ds, "mot", tmp_path / "test_video", load_from="disk", video_name="custom.mp4" + ) + result = importer._flatten_video_annotations({0: [_make_video_bbox()]}) + assert "custom.mp4" in result -# --- MOT import --- +# --- import --- def test_import_mot_from_dir(ds, tmp_path): @@ -113,55 +113,55 @@ def test_import_mot_from_dir(ds, tmp_path): def test_import_mot_from_zip(ds, tmp_path): mot_dir = tmp_path / "mot_seq" _create_mot_dir(mot_dir) - - zip_path = tmp_path / "mot.zip" - with zipfile.ZipFile(zip_path, "w") as z: - z.write(mot_dir / "gt" / "gt.txt", "gt/gt.txt") - z.write(mot_dir / "gt" / "labels.txt", "gt/labels.txt") - z.write(mot_dir / "seqinfo.ini", "seqinfo.ini") + zip_path = _zip_mot_dir(tmp_path, mot_dir) importer = AnnotationImporter(ds, "mot", zip_path, load_from="disk") result = importer.import_annotations() assert len(result) == 1 - anns = list(result.values())[0] - assert len(anns) == 2 + assert len(list(result.values())[0]) == 2 -# --- export_as_mot --- +def test_import_mot_nonexistent_raises(ds, tmp_path): + importer = AnnotationImporter(ds, "mot", tmp_path / "missing", load_from="disk") + with pytest.raises(AnnotationsNotFoundError): + importer.import_annotations() -def test_export_as_mot(ds, tmp_path): - dp = Datapoint(datasource=ds, path="video.mp4", datapoint_id=0, metadata={}) - anns = [_make_video_bbox(frame=0, track_id=1), _make_video_bbox(frame=1, track_id=1)] - dp.metadata["ann"] = MetadataAnnotations(datapoint=dp, field="ann", annotations=anns) +# --- convert_to_ls_tasks --- - qr = _make_qr(ds, [dp], ann_field="ann") + +def test_convert_video_to_ls_tasks(ds, tmp_path): + importer = AnnotationImporter(ds, "mot", tmp_path / "video", load_from="disk") + video_anns = {"video.mp4": [_make_video_bbox(frame=0), _make_video_bbox(frame=1)]} + tasks = importer.convert_to_ls_tasks(video_anns) + + assert "video.mp4" in tasks + task_json = json.loads(tasks["video.mp4"]) + assert "annotations" in task_json + + +def test_convert_video_empty_skipped(ds, tmp_path): + importer = AnnotationImporter(ds, "mot", tmp_path / "video", load_from="disk") + tasks = importer.convert_to_ls_tasks({"video.mp4": []}) + assert "video.mp4" not in tasks + + +# --- export_as_mot --- + + +def test_export_mot_directory_structure(ds, tmp_path): + qr, _ = _make_video_qr(ds) result = qr.export_as_mot(download_dir=tmp_path, annotation_field="ann") assert result.exists() assert (result / "gt" / "gt.txt").exists() assert (result / "gt" / "labels.txt").exists() assert (result / "seqinfo.ini").exists() - gt_lines = (result / "gt" / "gt.txt").read_text().strip().splitlines() - assert len(gt_lines) == 2 - - -def test_export_as_mot_no_annotations(ds, tmp_path): - dp = Datapoint(datasource=ds, path="video.mp4", datapoint_id=0, metadata={}) - dp.metadata["ann"] = MetadataAnnotations(datapoint=dp, field="ann", annotations=[]) - qr = _make_qr(ds, [dp], ann_field="ann") - with pytest.raises(RuntimeError, match="No video annotations"): - qr.export_as_mot(download_dir=tmp_path, annotation_field="ann") - - -def test_export_as_mot_explicit_dimensions(ds, tmp_path): - dp = Datapoint(datasource=ds, path="video.mp4", datapoint_id=0, metadata={}) - anns = [_make_video_bbox(frame=0)] - dp.metadata["ann"] = MetadataAnnotations(datapoint=dp, field="ann", annotations=anns) - qr = _make_qr(ds, [dp], ann_field="ann") +def test_export_mot_explicit_dimensions(ds, tmp_path): + qr, _ = _make_video_qr(ds) result = qr.export_as_mot( download_dir=tmp_path, annotation_field="ann", image_width=1280, image_height=720 ) @@ -171,7 +171,16 @@ def test_export_as_mot_explicit_dimensions(ds, tmp_path): assert "720" in seqinfo -# --- Helpers --- +def test_export_mot_no_annotations_raises(ds, tmp_path): + dp = Datapoint(datasource=ds, path="video.mp4", datapoint_id=0, metadata={}) + dp.metadata["ann"] = MetadataAnnotations(datapoint=dp, field="ann", annotations=[]) + + qr = _make_qr(ds, [dp], ann_field="ann") + with pytest.raises(RuntimeError, match="No video annotations"): + qr.export_as_mot(download_dir=tmp_path, annotation_field="ann") + + +# --- helpers --- def _make_video_bbox(frame=0, track_id=0) -> IRVideoBBoxAnnotation: @@ -198,6 +207,23 @@ def _create_mot_dir(mot_dir: Path): config.write(f) +def _zip_mot_dir(tmp_path: Path, mot_dir: Path) -> Path: + zip_path = tmp_path / "mot.zip" + with zipfile.ZipFile(zip_path, "w") as z: + z.write(mot_dir / "gt" / "gt.txt", "gt/gt.txt") + z.write(mot_dir / "gt" / "labels.txt", "gt/labels.txt") + z.write(mot_dir / "seqinfo.ini", "seqinfo.ini") + return zip_path + + +def _make_video_qr(ds): + dp = Datapoint(datasource=ds, path="video.mp4", datapoint_id=0, metadata={}) + anns = [_make_video_bbox(frame=0, track_id=1), _make_video_bbox(frame=1, track_id=1)] + dp.metadata["ann"] = MetadataAnnotations(datapoint=dp, field="ann", annotations=anns) + qr = _make_qr(ds, [dp], ann_field="ann") + return qr, dp + + def _make_qr(ds, datapoints, ann_field=None): fields = [] if ann_field: From 31c16a4c7e97c2386440f23a016e49185031f1c8 Mon Sep 17 00:00:00 2001 From: Dean Date: Tue, 10 Feb 2026 15:04:45 +0200 Subject: [PATCH 12/21] ignored manual testing file --- .gitignore | 1 + 1 file changed, 1 insertion(+) diff --git a/.gitignore b/.gitignore index 14b6b327..33c013cc 100644 --- a/.gitignore +++ b/.gitignore @@ -141,3 +141,4 @@ params.yml !dagshub/data_engine/voxel_plugin_server/plugins/dagshub/dist/ scratchpad.ipynb scratchpad/ +my_test.py From 59f419b0a7c925c8923003e254d0887e444dfd57 Mon Sep 17 00:00:00 2001 From: Dean Date: Tue, 10 Feb 2026 16:54:20 +0200 Subject: [PATCH 13/21] fix an issue where video dimensions weren't found --- dagshub/data_engine/annotation/importer.py | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/dagshub/data_engine/annotation/importer.py b/dagshub/data_engine/annotation/importer.py index 8ddf367e..36921708 100644 --- a/dagshub/data_engine/annotation/importer.py +++ b/dagshub/data_engine/annotation/importer.py @@ -108,6 +108,8 @@ def import_annotations(self) -> Mapping[str, Sequence[IRAnnotationBase]]: mot_kwargs["image_width"] = self.additional_args["image_width"] if "image_height" in self.additional_args: mot_kwargs["image_height"] = self.additional_args["image_height"] + if "video_name" in self.additional_args: + mot_kwargs["video_file"] = self.additional_args["video_name"] if annotations_file.suffix == ".zip": video_anns, _ = load_mot_from_zip(annotations_file, **mot_kwargs) else: @@ -217,8 +219,11 @@ def remap_annotations( ) continue for ann in anns: - assert ann.filename is not None - ann.filename = remap_func(ann.filename) + if ann.filename is not None: + ann.filename = remap_func(ann.filename) + else: + assert self.is_video_format, f"Non-video annotation has no filename: {ann}" + ann.filename = new_filename remapped[new_filename] = anns return remapped From 09e20666521a4c06387bd55164a856cb2e91309d Mon Sep 17 00:00:00 2001 From: Dean Date: Tue, 10 Feb 2026 17:27:09 +0200 Subject: [PATCH 14/21] remove assert --- dagshub/data_engine/annotation/importer.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/dagshub/data_engine/annotation/importer.py b/dagshub/data_engine/annotation/importer.py index 36921708..73ec09a6 100644 --- a/dagshub/data_engine/annotation/importer.py +++ b/dagshub/data_engine/annotation/importer.py @@ -222,7 +222,8 @@ def remap_annotations( if ann.filename is not None: ann.filename = remap_func(ann.filename) else: - assert self.is_video_format, f"Non-video annotation has no filename: {ann}" + if not self.is_video_format: + raise ValueError(f"Non-video annotation has no filename: {ann}") ann.filename = new_filename remapped[new_filename] = anns From 82636d9737ce9c73c9e5bddcb8dd34cfb8589e93 Mon Sep 17 00:00:00 2001 From: Dean Date: Thu, 19 Feb 2026 17:00:52 +0200 Subject: [PATCH 15/21] fix missing video dimensions failing and support for multiple video export --- dagshub/data_engine/model/query_result.py | 124 +++++++++++++++--- .../annotation_import/test_cvat_video.py | 77 ++++++++++- .../data_engine/annotation_import/test_mot.py | 32 +++++ 3 files changed, 208 insertions(+), 25 deletions(-) diff --git a/dagshub/data_engine/model/query_result.py b/dagshub/data_engine/model/query_result.py index 2a2a0d4b..944e1c96 100644 --- a/dagshub/data_engine/model/query_result.py +++ b/dagshub/data_engine/model/query_result.py @@ -16,7 +16,7 @@ import dagshub_annotation_converter.converters.yolo import rich.progress from dagshub_annotation_converter.converters.coco import export_to_coco_file -from dagshub_annotation_converter.converters.cvat import export_cvat_video_to_file +from dagshub_annotation_converter.converters.cvat import export_cvat_video_to_zip from dagshub_annotation_converter.converters.mot import export_mot_to_dir from dagshub_annotation_converter.formats.coco import CocoContext from dagshub_annotation_converter.formats.mot import MOTContext @@ -788,6 +788,21 @@ def _get_all_video_annotations(self, annotation_field: str) -> List[IRVideoBBoxA all_anns = self._get_all_annotations(annotation_field) return [a for a in all_anns if isinstance(a, IRVideoBBoxAnnotation)] + def _prepare_video_file_for_export( + self, + local_root: Path, + repo_relative_filename: str, + ) -> Optional[Path]: + ann_path = Path(repo_relative_filename) + primary = local_root / ann_path + if primary.exists(): + return primary + source_prefix = Path(self.datasource.source.source_prefix) + with_prefix = local_root / source_prefix / ann_path + if with_prefix.exists(): + return with_prefix + return None + def _resolve_annotation_field(self, annotation_field: Optional[str]) -> str: if annotation_field is not None: return annotation_field @@ -901,6 +916,7 @@ def export_as_coco( if download_dir is None: download_dir = Path("dagshub_export") download_dir = Path(download_dir) + data_dir = download_dir / "data" annotations = self._get_all_annotations(annotation_field) if not annotations: @@ -961,18 +977,17 @@ def export_as_mot( if not video_annotations: raise RuntimeError("No video annotations found to export") + video_file: Optional[Path] = None + if image_width is None or image_height is None: + log_message("Missing video dimensions in annotations, downloading videos for converter-side probing...") + video_file = self._prepare_video_file_for_export(download_dir.parent / "data", video_annotations) + context = MOTContext() - if image_width is not None: - context.image_width = image_width - else: - context.image_width = video_annotations[0].image_width - if image_height is not None: - context.image_height = image_height - else: - context.image_height = video_annotations[0].image_height + context.image_width = image_width + context.image_height = image_height log_message("Exporting MOT annotations...") - result_path = export_mot_to_dir(video_annotations, context, download_dir) + result_path = export_mot_to_dir(video_annotations, context, download_dir, video_file=video_file) log_message(f"Done! Saved MOT annotations to {result_path.absolute()}") return result_path @@ -985,7 +1000,7 @@ def export_as_cvat_video( image_height: Optional[int] = None, ) -> Path: """ - Exports video annotations in CVAT video XML format. + Exports video annotations in CVAT video ZIP format. Args: download_dir: Where to export. Defaults to ``./dagshub_export`` @@ -995,7 +1010,8 @@ def export_as_cvat_video( image_height: Frame height. If None, inferred from annotations. Returns: - Path to the exported CVAT video XML file. + Path to the exported CVAT video ZIP file for single-video exports, + or output directory for multi-video exports. """ annotation_field = self._resolve_annotation_field(annotation_field) @@ -1007,15 +1023,83 @@ def export_as_cvat_video( if not video_annotations: raise RuntimeError("No video annotations found to export") - output_path = download_dir / "annotations.xml" - log_message("Exporting CVAT video annotations...") - result_path = export_cvat_video_to_file( - video_annotations, - output_path, - video_name=video_name, - image_width=image_width, - image_height=image_height, + source_names = sorted( + { + Path(ann.filename).name + for ann in video_annotations + if ann.filename is not None and ann.filename != "" + } ) + has_multiple_sources = len(source_names) > 1 + + log_message("Exporting CVAT video annotations...") + local_download_root: Optional[Path] = None + if image_width is None or image_height is None: + log_message("Missing video dimensions in annotations, downloading videos for converter-side probing...") + local_download_root = self.download_files(download_dir, keep_source_prefix=True) + + if has_multiple_sources: + grouped: Dict[str, List[IRVideoBBoxAnnotation]] = {} + for ann in video_annotations: + group_key = Path(ann.filename).name if ann.filename else video_name + grouped.setdefault(group_key, []).append(ann) + + output_dir = download_dir / "labels" + output_dir.mkdir(parents=True, exist_ok=True) + + for group_video_name, group_annotations in sorted(grouped.items()): + group_video_file: Optional[Path] = None + if local_download_root is not None: + ref_filename = next((a.filename for a in group_annotations if a.filename), None) + if ref_filename is None: + raise FileNotFoundError( + f"Missing annotation filename for video group '{group_video_name}'." + ) + group_video_file = self._prepare_video_file_for_export(local_download_root, ref_filename) + if group_video_file is None: + raise FileNotFoundError( + f"Could not find local downloaded video file for '{group_video_name}' " + f"under '{local_download_root}'." + ) + + output_path = output_dir / f"{Path(group_video_name).stem}.zip" + export_cvat_video_to_zip( + group_annotations, + output_path, + video_name=group_video_name, + image_width=image_width, + image_height=image_height, + video_file=group_video_file, + ) + result_path = output_dir + else: + single_video_file: Optional[Path] = None + if local_download_root is not None: + ref_filename = next((a.filename for a in video_annotations if a.filename), None) + if ref_filename is None: + raise FileNotFoundError("Missing annotation filename for single-video CVAT export.") + single_video_file = self._prepare_video_file_for_export(local_download_root, ref_filename) + if single_video_file is None: + raise FileNotFoundError( + f"Could not find local downloaded video file for '{ref_filename}' " + f"under '{local_download_root}'." + ) + + labels_dir = download_dir / "labels" + labels_dir.mkdir(parents=True, exist_ok=True) + if source_names: + output_name = f"{Path(source_names[0]).stem}.zip" + else: + output_name = "annotations.zip" + output_path = labels_dir / output_name + result_path = export_cvat_video_to_zip( + video_annotations, + output_path, + video_name=video_name, + image_width=image_width, + image_height=image_height, + video_file=single_video_file, + ) log_message(f"Done! Saved CVAT video annotations to {result_path.absolute()}") return result_path diff --git a/tests/data_engine/annotation_import/test_cvat_video.py b/tests/data_engine/annotation_import/test_cvat_video.py index 0abdc841..cd3af914 100644 --- a/tests/data_engine/annotation_import/test_cvat_video.py +++ b/tests/data_engine/annotation_import/test_cvat_video.py @@ -1,4 +1,5 @@ import datetime +import zipfile from pathlib import PurePosixPath from unittest.mock import patch, PropertyMock @@ -89,7 +90,9 @@ def test_export_cvat_video_xml(ds, tmp_path): result = qr.export_as_cvat_video(download_dir=tmp_path, annotation_field="ann") assert result.exists() - content = result.read_text() + assert result == tmp_path / "labels" / "video.zip" + with zipfile.ZipFile(result, "r") as z: + content = z.read("annotations.xml").decode("utf-8") assert "= 2 + assert result.is_dir() + assert result == tmp_path / "labels" + assert (result / "video_0.zip").exists() + assert (result / "video_1.zip").exists() + + +def test_export_cvat_video_passes_video_file_when_dimensions_missing(ds, tmp_path, monkeypatch): + dp = Datapoint(datasource=ds, path="video.mp4", datapoint_id=0, metadata={}) + anns = [_make_video_bbox(frame=0, track_id=0), _make_video_bbox(frame=5, track_id=0)] + for ann in anns: + ann.image_width = 0 + ann.image_height = 0 + ann.filename = "video.mp4" + dp.metadata["ann"] = MetadataAnnotations(datapoint=dp, field="ann", annotations=anns) + qr = _make_qr(ds, [dp], ann_field="ann") + + captured = {} + + def _mock_download_files(self, target_dir, *args, **kwargs): + video_path = target_dir / "video.mp4" + video_path.parent.mkdir(parents=True, exist_ok=True) + video_path.write_bytes(b"video") + return target_dir + + def _mock_export_cvat_video_to_zip( + video_annotations, + output_path, + video_name, + image_width, + image_height, + video_file=None, + ): + captured["video_file"] = str(video_file) if video_file is not None else None + output_path.parent.mkdir(parents=True, exist_ok=True) + output_path.write_text("") + return output_path + + monkeypatch.setattr(QueryResult, "download_files", _mock_download_files) + monkeypatch.setattr("dagshub.data_engine.model.query_result.export_cvat_video_to_zip", _mock_export_cvat_video_to_zip) + + qr.export_as_cvat_video(download_dir=tmp_path, annotation_field="ann") + + assert captured["video_file"] is not None + assert captured["video_file"].endswith("video.mp4") + + +def test_export_cvat_video_missing_local_file_raises(ds, tmp_path, monkeypatch): + dp = Datapoint(datasource=ds, path="video.mp4", datapoint_id=0, metadata={}) + ann = _make_video_bbox(frame=0, track_id=0) + ann.image_width = 0 + ann.image_height = 0 + ann.filename = "missing.mp4" + dp.metadata["ann"] = MetadataAnnotations(datapoint=dp, field="ann", annotations=[ann]) + qr = _make_qr(ds, [dp], ann_field="ann") + + def _mock_download_files(self, target_dir, *args, **kwargs): + target_dir.mkdir(parents=True, exist_ok=True) + return target_dir + + monkeypatch.setattr(QueryResult, "download_files", _mock_download_files) + + with pytest.raises(FileNotFoundError, match="missing.mp4"): + qr.export_as_cvat_video(download_dir=tmp_path, annotation_field="ann") # --- helpers --- diff --git a/tests/data_engine/annotation_import/test_mot.py b/tests/data_engine/annotation_import/test_mot.py index ccefc86f..577bd999 100644 --- a/tests/data_engine/annotation_import/test_mot.py +++ b/tests/data_engine/annotation_import/test_mot.py @@ -180,6 +180,38 @@ def test_export_mot_no_annotations_raises(ds, tmp_path): qr.export_as_mot(download_dir=tmp_path, annotation_field="ann") +def test_export_mot_passes_video_file_when_dimensions_missing(ds, tmp_path, monkeypatch): + dp = Datapoint(datasource=ds, path="video.mp4", datapoint_id=0, metadata={}) + anns = [_make_video_bbox(frame=0, track_id=1), _make_video_bbox(frame=1, track_id=1)] + for ann in anns: + ann.image_width = 0 + ann.image_height = 0 + ann.filename = "video.mp4" + dp.metadata["ann"] = MetadataAnnotations(datapoint=dp, field="ann", annotations=anns) + qr = _make_qr(ds, [dp], ann_field="ann") + + captured = {} + + def _mock_download_files(self, target_dir, *args, **kwargs): + video_path = target_dir / "video.mp4" + video_path.parent.mkdir(parents=True, exist_ok=True) + video_path.write_bytes(b"video") + return target_dir + + def _mock_export_mot_to_dir(video_annotations, context, output_dir, video_file=None): + captured["video_file"] = str(video_file) if video_file is not None else None + output_dir.mkdir(parents=True, exist_ok=True) + return output_dir + + monkeypatch.setattr(QueryResult, "download_files", _mock_download_files) + monkeypatch.setattr("dagshub.data_engine.model.query_result.export_mot_to_dir", _mock_export_mot_to_dir) + + qr.export_as_mot(download_dir=tmp_path, annotation_field="ann") + + assert captured["video_file"] is not None + assert captured["video_file"].endswith("video.mp4") + + # --- helpers --- From 813390d9666b279d01a28fa8bc26ecd3911d295a Mon Sep 17 00:00:00 2001 From: Dean Date: Thu, 19 Feb 2026 17:08:17 +0200 Subject: [PATCH 16/21] fix export of multiple MOT files --- dagshub/data_engine/model/query_result.py | 85 +++++++++++++++++-- .../data_engine/annotation_import/test_mot.py | 28 +++++- 2 files changed, 101 insertions(+), 12 deletions(-) diff --git a/dagshub/data_engine/model/query_result.py b/dagshub/data_engine/model/query_result.py index 944e1c96..c3ec4b0b 100644 --- a/dagshub/data_engine/model/query_result.py +++ b/dagshub/data_engine/model/query_result.py @@ -5,6 +5,8 @@ import logging import os import os.path +from tempfile import TemporaryDirectory +from zipfile import ZipFile from collections import Counter, defaultdict from concurrent.futures import ThreadPoolExecutor, as_completed from dataclasses import dataclass, field @@ -803,6 +805,15 @@ def _prepare_video_file_for_export( return with_prefix return None + @staticmethod + def _zip_directory(src_dir: Path, output_zip: Path) -> Path: + output_zip.parent.mkdir(parents=True, exist_ok=True) + with ZipFile(output_zip, "w") as z: + for file_path in src_dir.rglob("*"): + if file_path.is_file(): + z.write(file_path, file_path.relative_to(src_dir)) + return output_zip + def _resolve_annotation_field(self, annotation_field: Optional[str]) -> str: if annotation_field is not None: return annotation_field @@ -971,23 +982,81 @@ def export_as_mot( if download_dir is None: download_dir = Path("dagshub_export") - download_dir = Path(download_dir) / "mot" + download_dir = Path(download_dir) + labels_dir = download_dir / "labels" + labels_dir.mkdir(parents=True, exist_ok=True) video_annotations = self._get_all_video_annotations(annotation_field) if not video_annotations: raise RuntimeError("No video annotations found to export") - video_file: Optional[Path] = None + source_names = sorted( + { + Path(ann.filename).name + for ann in video_annotations + if ann.filename is not None and ann.filename != "" + } + ) + has_multiple_sources = len(source_names) > 1 + + local_download_root: Optional[Path] = None if image_width is None or image_height is None: log_message("Missing video dimensions in annotations, downloading videos for converter-side probing...") - video_file = self._prepare_video_file_for_export(download_dir.parent / "data", video_annotations) - - context = MOTContext() - context.image_width = image_width - context.image_height = image_height + local_download_root = self.download_files(download_dir / "data", keep_source_prefix=True) log_message("Exporting MOT annotations...") - result_path = export_mot_to_dir(video_annotations, context, download_dir, video_file=video_file) + if has_multiple_sources: + grouped: Dict[str, List[IRVideoBBoxAnnotation]] = {} + for ann in video_annotations: + group_key = Path(ann.filename).name if ann.filename else "video.mp4" + grouped.setdefault(group_key, []).append(ann) + + for group_video_name, group_annotations in sorted(grouped.items()): + group_video_file: Optional[Path] = None + if local_download_root is not None: + ref_filename = next((a.filename for a in group_annotations if a.filename), None) + if ref_filename is None: + raise FileNotFoundError( + f"Missing annotation filename for MOT group '{group_video_name}'." + ) + group_video_file = self._prepare_video_file_for_export(local_download_root, ref_filename) + if group_video_file is None: + raise FileNotFoundError( + f"Could not find local downloaded video file for '{group_video_name}' " + f"under '{local_download_root}'." + ) + + context = MOTContext() + context.image_width = image_width + context.image_height = image_height + zip_path = labels_dir / f"{Path(group_video_name).stem}.zip" + with TemporaryDirectory() as tmp_dir: + output_dir = Path(tmp_dir) / Path(group_video_name).stem + export_mot_to_dir(group_annotations, context, output_dir, video_file=group_video_file) + self._zip_directory(output_dir, zip_path) + result_path = labels_dir + else: + video_file: Optional[Path] = None + if local_download_root is not None: + ref_filename = next((a.filename for a in video_annotations if a.filename), None) + if ref_filename is None: + raise FileNotFoundError("Missing annotation filename for MOT export.") + video_file = self._prepare_video_file_for_export(local_download_root, ref_filename) + if video_file is None: + raise FileNotFoundError( + f"Could not find local downloaded video file for '{ref_filename}' under '{local_download_root}'." + ) + + context = MOTContext() + context.image_width = image_width + context.image_height = image_height + single_name = Path(source_names[0]).stem if source_names else "annotations" + zip_path = labels_dir / f"{single_name}.zip" + with TemporaryDirectory() as tmp_dir: + output_dir = Path(tmp_dir) / single_name + export_mot_to_dir(video_annotations, context, output_dir, video_file=video_file) + result_path = self._zip_directory(output_dir, zip_path) + log_message(f"Done! Saved MOT annotations to {result_path.absolute()}") return result_path diff --git a/tests/data_engine/annotation_import/test_mot.py b/tests/data_engine/annotation_import/test_mot.py index 577bd999..cd66580d 100644 --- a/tests/data_engine/annotation_import/test_mot.py +++ b/tests/data_engine/annotation_import/test_mot.py @@ -155,9 +155,11 @@ def test_export_mot_directory_structure(ds, tmp_path): result = qr.export_as_mot(download_dir=tmp_path, annotation_field="ann") assert result.exists() - assert (result / "gt" / "gt.txt").exists() - assert (result / "gt" / "labels.txt").exists() - assert (result / "seqinfo.ini").exists() + assert result == tmp_path / "labels" / "video.zip" + with zipfile.ZipFile(result, "r") as z: + assert "gt/gt.txt" in z.namelist() + assert "gt/labels.txt" in z.namelist() + assert "seqinfo.ini" in z.namelist() def test_export_mot_explicit_dimensions(ds, tmp_path): @@ -166,7 +168,8 @@ def test_export_mot_explicit_dimensions(ds, tmp_path): download_dir=tmp_path, annotation_field="ann", image_width=1280, image_height=720 ) - seqinfo = (result / "seqinfo.ini").read_text() + with zipfile.ZipFile(result, "r") as z: + seqinfo = z.read("seqinfo.ini").decode("utf-8") assert "1280" in seqinfo assert "720" in seqinfo @@ -180,6 +183,23 @@ def test_export_mot_no_annotations_raises(ds, tmp_path): qr.export_as_mot(download_dir=tmp_path, annotation_field="ann") +def test_export_mot_multiple_videos(ds, tmp_path): + dps = [] + for i in range(2): + dp = Datapoint(datasource=ds, path=f"video_{i}.mp4", datapoint_id=i, metadata={}) + ann = _make_video_bbox(frame=i, track_id=i) + ann.filename = dp.path + dp.metadata["ann"] = MetadataAnnotations(datapoint=dp, field="ann", annotations=[ann]) + dps.append(dp) + + qr = _make_qr(ds, dps, ann_field="ann") + result = qr.export_as_mot(download_dir=tmp_path, annotation_field="ann") + + assert result == tmp_path / "labels" + assert (result / "video_0.zip").exists() + assert (result / "video_1.zip").exists() + + def test_export_mot_passes_video_file_when_dimensions_missing(ds, tmp_path, monkeypatch): dp = Datapoint(datasource=ds, path="video.mp4", datapoint_id=0, metadata={}) anns = [_make_video_bbox(frame=0, track_id=1), _make_video_bbox(frame=1, track_id=1)] From 3f29cc28d3d54112c595552bc5f94cdfcc664a83 Mon Sep 17 00:00:00 2001 From: Dean Date: Thu, 19 Feb 2026 18:12:35 +0200 Subject: [PATCH 17/21] fix import and export support for CVAT and MOT multi-file --- dagshub/data_engine/annotation/importer.py | 77 +++++++-- dagshub/data_engine/model/query_result.py | 159 ++++++++---------- .../annotation_import/test_cvat_video.py | 16 ++ .../data_engine/annotation_import/test_mot.py | 31 +++- 4 files changed, 177 insertions(+), 106 deletions(-) diff --git a/dagshub/data_engine/annotation/importer.py b/dagshub/data_engine/annotation/importer.py index 73ec09a6..afd4f7ed 100644 --- a/dagshub/data_engine/annotation/importer.py +++ b/dagshub/data_engine/annotation/importer.py @@ -5,10 +5,11 @@ from dagshub_annotation_converter.converters.coco import load_coco_from_file from dagshub_annotation_converter.converters.cvat import ( + load_cvat_from_fs, load_cvat_from_zip, load_cvat_from_xml_file, ) -from dagshub_annotation_converter.converters.mot import load_mot_from_dir, load_mot_from_zip +from dagshub_annotation_converter.converters.mot import load_mot_from_dir, load_mot_from_fs, load_mot_from_zip from dagshub_annotation_converter.converters.yolo import load_yolo_from_fs from dagshub_annotation_converter.converters.label_studio_video import video_ir_to_ls_video_tasks from dagshub_annotation_converter.formats.label_studio.task import LabelStudioTask @@ -95,11 +96,14 @@ def import_annotations(self) -> Mapping[str, Sequence[IRAnnotationBase]]: annotation_type=self.additional_args["yolo_type"], meta_file=annotations_file ) elif self.annotations_type == "cvat": - result = load_cvat_from_zip(annotations_file) - if self._is_video_annotation_dict(result): - annotation_dict = self._flatten_video_annotations(result) + if annotations_file.is_dir(): + annotation_dict = self._flatten_cvat_fs_annotations(load_cvat_from_fs(annotations_file)) else: - annotation_dict = result + result = load_cvat_from_zip(annotations_file) + if self._is_video_annotation_dict(result): + annotation_dict = self._flatten_video_annotations(result) + else: + annotation_dict = result elif self.annotations_type == "coco": annotation_dict, _ = load_coco_from_file(annotations_file) elif self.annotations_type == "mot": @@ -110,25 +114,41 @@ def import_annotations(self) -> Mapping[str, Sequence[IRAnnotationBase]]: mot_kwargs["image_height"] = self.additional_args["image_height"] if "video_name" in self.additional_args: mot_kwargs["video_file"] = self.additional_args["video_name"] - if annotations_file.suffix == ".zip": + if annotations_file.is_dir(): + video_files = self.additional_args.get("video_files") + mot_results = load_mot_from_fs( + annotations_file, + image_width=mot_kwargs.get("image_width"), + image_height=mot_kwargs.get("image_height"), + video_files=video_files, + ) + annotation_dict = self._flatten_mot_fs_annotations(mot_results) + elif annotations_file.suffix == ".zip": video_anns, _ = load_mot_from_zip(annotations_file, **mot_kwargs) + annotation_dict = self._flatten_video_annotations(video_anns) else: video_anns, _ = load_mot_from_dir(annotations_file, **mot_kwargs) - annotation_dict = self._flatten_video_annotations(video_anns) + annotation_dict = self._flatten_video_annotations(video_anns) elif self.annotations_type == "cvat_video": cvat_kwargs = {} if "image_width" in self.additional_args: cvat_kwargs["image_width"] = self.additional_args["image_width"] if "image_height" in self.additional_args: cvat_kwargs["image_height"] = self.additional_args["image_height"] - if annotations_file.suffix == ".zip": + if annotations_file.is_dir(): + annotation_dict = self._flatten_cvat_fs_annotations(load_cvat_from_fs(annotations_file, **cvat_kwargs)) + elif annotations_file.suffix == ".zip": result = load_cvat_from_zip(annotations_file, **cvat_kwargs) + if self._is_video_annotation_dict(result): + annotation_dict = self._flatten_video_annotations(result) + else: + annotation_dict = result else: result = load_cvat_from_xml_file(annotations_file, **cvat_kwargs) - if self._is_video_annotation_dict(result): - annotation_dict = self._flatten_video_annotations(result) - else: - annotation_dict = result + if self._is_video_annotation_dict(result): + annotation_dict = self._flatten_video_annotations(result) + else: + annotation_dict = result else: raise ValueError(f"Unsupported annotation type: {self.annotations_type}") @@ -153,6 +173,39 @@ def _flatten_video_annotations( all_anns.extend(frame_anns) return {video_name: all_anns} + def _flatten_cvat_fs_annotations(self, fs_annotations: Mapping[str, object]) -> Dict[str, Sequence[IRAnnotationBase]]: + flattened: Dict[str, List[IRAnnotationBase]] = {} + for rel_path, result in fs_annotations.items(): + if not isinstance(result, dict): + continue + if self._is_video_annotation_dict(result): + video_key = Path(rel_path).stem + flattened.setdefault(video_key, []) + for frame_anns in result.values(): + flattened[video_key].extend(frame_anns) + else: + for filename, anns in result.items(): + flattened.setdefault(filename, []) + flattened[filename].extend(anns) + return flattened + + def _flatten_mot_fs_annotations( + self, + fs_annotations: Mapping[str, object], + ) -> Dict[str, Sequence[IRAnnotationBase]]: + flattened: Dict[str, List[IRAnnotationBase]] = {} + for rel_path, result in fs_annotations.items(): + if not isinstance(result, tuple) or len(result) != 2: + continue + frame_annotations = result[0] + if not isinstance(frame_annotations, dict): + continue + sequence_name = Path(rel_path).stem if rel_path not in (".", "") else self.annotations_file.stem + flattened.setdefault(sequence_name, []) + for frame_anns in frame_annotations.values(): + flattened[sequence_name].extend(frame_anns) + return flattened + def download_annotations(self, dest_dir: Path): log_message("Downloading annotations from repository") repoApi = self.ds.source.repoApi diff --git a/dagshub/data_engine/model/query_result.py b/dagshub/data_engine/model/query_result.py index c3ec4b0b..d8456127 100644 --- a/dagshub/data_engine/model/query_result.py +++ b/dagshub/data_engine/model/query_result.py @@ -6,7 +6,6 @@ import os import os.path from tempfile import TemporaryDirectory -from zipfile import ZipFile from collections import Counter, defaultdict from concurrent.futures import ThreadPoolExecutor, as_completed from dataclasses import dataclass, field @@ -18,8 +17,8 @@ import dagshub_annotation_converter.converters.yolo import rich.progress from dagshub_annotation_converter.converters.coco import export_to_coco_file -from dagshub_annotation_converter.converters.cvat import export_cvat_video_to_zip -from dagshub_annotation_converter.converters.mot import export_mot_to_dir +from dagshub_annotation_converter.converters.cvat import export_cvat_video_to_zip, export_cvat_videos_to_zips +from dagshub_annotation_converter.converters.mot import export_mot_sequences_to_dirs, export_mot_to_dir from dagshub_annotation_converter.formats.coco import CocoContext from dagshub_annotation_converter.formats.mot import MOTContext from dagshub_annotation_converter.formats.yolo import YoloContext @@ -790,11 +789,7 @@ def _get_all_video_annotations(self, annotation_field: str) -> List[IRVideoBBoxA all_anns = self._get_all_annotations(annotation_field) return [a for a in all_anns if isinstance(a, IRVideoBBoxAnnotation)] - def _prepare_video_file_for_export( - self, - local_root: Path, - repo_relative_filename: str, - ) -> Optional[Path]: + def _prepare_video_file_for_export(self, local_root: Path, repo_relative_filename: str) -> Optional[Path]: ann_path = Path(repo_relative_filename) primary = local_root / ann_path if primary.exists(): @@ -806,13 +801,17 @@ def _prepare_video_file_for_export( return None @staticmethod - def _zip_directory(src_dir: Path, output_zip: Path) -> Path: - output_zip.parent.mkdir(parents=True, exist_ok=True) - with ZipFile(output_zip, "w") as z: - for file_path in src_dir.rglob("*"): - if file_path.is_file(): - z.write(file_path, file_path.relative_to(src_dir)) - return output_zip + def _get_annotation_filename(ann: IRVideoBBoxAnnotation) -> Optional[str]: + filename = ann.filename + if filename is None: + return None + if isinstance(filename, (list, tuple)): + if len(filename) == 0: + return None + if len(filename) > 1: + raise ValueError(f"Annotation has multiple filenames: {filename}") + filename = filename[0] + return str(filename) def _resolve_annotation_field(self, annotation_field: Optional[str]) -> str: if annotation_field is not None: @@ -992,9 +991,9 @@ def export_as_mot( source_names = sorted( { - Path(ann.filename).name - for ann in video_annotations - if ann.filename is not None and ann.filename != "" + Path(ann_filename).name + for ann_filename in (self._get_annotation_filename(ann) for ann in video_annotations) + if ann_filename } ) has_multiple_sources = len(source_names) > 1 @@ -1006,39 +1005,33 @@ def export_as_mot( log_message("Exporting MOT annotations...") if has_multiple_sources: - grouped: Dict[str, List[IRVideoBBoxAnnotation]] = {} - for ann in video_annotations: - group_key = Path(ann.filename).name if ann.filename else "video.mp4" - grouped.setdefault(group_key, []).append(ann) - - for group_video_name, group_annotations in sorted(grouped.items()): - group_video_file: Optional[Path] = None - if local_download_root is not None: - ref_filename = next((a.filename for a in group_annotations if a.filename), None) - if ref_filename is None: - raise FileNotFoundError( - f"Missing annotation filename for MOT group '{group_video_name}'." - ) - group_video_file = self._prepare_video_file_for_export(local_download_root, ref_filename) - if group_video_file is None: + video_files: Optional[Dict[str, Union[str, Path]]] = None + if local_download_root is not None: + video_files = {} + for ann_filename in { + self._get_annotation_filename(ann) + for ann in video_annotations + if self._get_annotation_filename(ann) + }: + assert ann_filename is not None + sequence_name = Path(ann_filename).stem + local_video = self._prepare_video_file_for_export(local_download_root, ann_filename) + if local_video is None: raise FileNotFoundError( - f"Could not find local downloaded video file for '{group_video_name}' " - f"under '{local_download_root}'." + f"Could not find local downloaded video file for '{ann_filename}' under " + f"'{local_download_root}'." ) + video_files[sequence_name] = local_video - context = MOTContext() - context.image_width = image_width - context.image_height = image_height - zip_path = labels_dir / f"{Path(group_video_name).stem}.zip" - with TemporaryDirectory() as tmp_dir: - output_dir = Path(tmp_dir) / Path(group_video_name).stem - export_mot_to_dir(group_annotations, context, output_dir, video_file=group_video_file) - self._zip_directory(output_dir, zip_path) + context = MOTContext() + context.image_width = image_width + context.image_height = image_height + export_mot_sequences_to_dirs(video_annotations, context, labels_dir, video_files=video_files) result_path = labels_dir else: video_file: Optional[Path] = None if local_download_root is not None: - ref_filename = next((a.filename for a in video_annotations if a.filename), None) + ref_filename = next((self._get_annotation_filename(a) for a in video_annotations), None) if ref_filename is None: raise FileNotFoundError("Missing annotation filename for MOT export.") video_file = self._prepare_video_file_for_export(local_download_root, ref_filename) @@ -1050,12 +1043,9 @@ def export_as_mot( context = MOTContext() context.image_width = image_width context.image_height = image_height - single_name = Path(source_names[0]).stem if source_names else "annotations" - zip_path = labels_dir / f"{single_name}.zip" - with TemporaryDirectory() as tmp_dir: - output_dir = Path(tmp_dir) / single_name - export_mot_to_dir(video_annotations, context, output_dir, video_file=video_file) - result_path = self._zip_directory(output_dir, zip_path) + single_name = Path(source_names[0]).stem if source_names else "sequence" + output_dir = labels_dir / single_name + result_path = export_mot_to_dir(video_annotations, context, output_dir, video_file=video_file) log_message(f"Done! Saved MOT annotations to {result_path.absolute()}") return result_path @@ -1094,57 +1084,56 @@ def export_as_cvat_video( source_names = sorted( { - Path(ann.filename).name - for ann in video_annotations - if ann.filename is not None and ann.filename != "" + Path(ann_filename).name + for ann_filename in (self._get_annotation_filename(ann) for ann in video_annotations) + if ann_filename } ) has_multiple_sources = len(source_names) > 1 log_message("Exporting CVAT video annotations...") local_download_root: Optional[Path] = None - if image_width is None or image_height is None: + if not has_multiple_sources and (image_width is None or image_height is None): log_message("Missing video dimensions in annotations, downloading videos for converter-side probing...") - local_download_root = self.download_files(download_dir, keep_source_prefix=True) + local_download_root = self.download_files(download_dir / "data", keep_source_prefix=True) if has_multiple_sources: - grouped: Dict[str, List[IRVideoBBoxAnnotation]] = {} - for ann in video_annotations: - group_key = Path(ann.filename).name if ann.filename else video_name - grouped.setdefault(group_key, []).append(ann) - - output_dir = download_dir / "labels" - output_dir.mkdir(parents=True, exist_ok=True) - - for group_video_name, group_annotations in sorted(grouped.items()): - group_video_file: Optional[Path] = None - if local_download_root is not None: - ref_filename = next((a.filename for a in group_annotations if a.filename), None) - if ref_filename is None: + video_files: Optional[Dict[str, Union[str, Path]]] = None + if image_width is None or image_height is None: + log_message("Missing video dimensions in annotations, downloading videos for converter-side probing...") + local_download_root = self.download_files(download_dir / "data", keep_source_prefix=True) + video_files = {} + for ann_filename in { + self._get_annotation_filename(ann) + for ann in video_annotations + if self._get_annotation_filename(ann) + }: + assert ann_filename is not None + local_video = self._prepare_video_file_for_export(local_download_root, ann_filename) + if local_video is None: raise FileNotFoundError( - f"Missing annotation filename for video group '{group_video_name}'." - ) - group_video_file = self._prepare_video_file_for_export(local_download_root, ref_filename) - if group_video_file is None: - raise FileNotFoundError( - f"Could not find local downloaded video file for '{group_video_name}' " + f"Could not find local downloaded video file for '{ann_filename}' " f"under '{local_download_root}'." ) + ann_path = Path(ann_filename) + video_files[ann_filename] = local_video + video_files[ann_path.name] = local_video + video_files[ann_path.stem] = local_video - output_path = output_dir / f"{Path(group_video_name).stem}.zip" - export_cvat_video_to_zip( - group_annotations, - output_path, - video_name=group_video_name, - image_width=image_width, - image_height=image_height, - video_file=group_video_file, - ) + output_dir = download_dir / "labels" + output_dir.mkdir(parents=True, exist_ok=True) + export_cvat_videos_to_zips( + video_annotations, + output_dir, + image_width=image_width, + image_height=image_height, + video_files=video_files if video_files else None, + ) result_path = output_dir else: single_video_file: Optional[Path] = None if local_download_root is not None: - ref_filename = next((a.filename for a in video_annotations if a.filename), None) + ref_filename = next((self._get_annotation_filename(a) for a in video_annotations), None) if ref_filename is None: raise FileNotFoundError("Missing annotation filename for single-video CVAT export.") single_video_file = self._prepare_video_file_for_export(local_download_root, ref_filename) @@ -1157,7 +1146,7 @@ def export_as_cvat_video( labels_dir = download_dir / "labels" labels_dir.mkdir(parents=True, exist_ok=True) if source_names: - output_name = f"{Path(source_names[0]).stem}.zip" + output_name = f"{Path(source_names[0]).name}.zip" else: output_name = "annotations.zip" output_path = labels_dir / output_name diff --git a/tests/data_engine/annotation_import/test_cvat_video.py b/tests/data_engine/annotation_import/test_cvat_video.py index cd3af914..d91643bf 100644 --- a/tests/data_engine/annotation_import/test_cvat_video.py +++ b/tests/data_engine/annotation_import/test_cvat_video.py @@ -38,6 +38,22 @@ def test_import_cvat_video(ds, tmp_path): assert all(isinstance(a, IRVideoBBoxAnnotation) for a in anns) +def test_import_cvat_video_from_fs_directory(ds, tmp_path): + first = tmp_path / "video_a.xml" + second = tmp_path / "nested" / "video_b.xml" + second.parent.mkdir(parents=True) + first.write_bytes(_make_cvat_video_xml()) + second.write_bytes(_make_cvat_video_xml()) + + importer = AnnotationImporter(ds, "cvat_video", tmp_path, load_from="disk") + result = importer.import_annotations() + + assert "video_a" in result + assert "video_b" in result + assert len(result["video_a"]) == 2 + assert len(result["video_b"]) == 2 + + # --- _get_all_video_annotations --- diff --git a/tests/data_engine/annotation_import/test_mot.py b/tests/data_engine/annotation_import/test_mot.py index cd66580d..858f9eb6 100644 --- a/tests/data_engine/annotation_import/test_mot.py +++ b/tests/data_engine/annotation_import/test_mot.py @@ -122,6 +122,21 @@ def test_import_mot_from_zip(ds, tmp_path): assert len(list(result.values())[0]) == 2 +def test_import_mot_from_fs_directory(ds, tmp_path): + seq_a = tmp_path / "seq_a" + seq_b = tmp_path / "nested" / "seq_b" + _create_mot_dir(seq_a) + _create_mot_dir(seq_b) + + importer = AnnotationImporter(ds, "mot", tmp_path, load_from="disk") + result = importer.import_annotations() + + assert "seq_a" in result + assert "seq_b" in result + assert len(result["seq_a"]) == 2 + assert len(result["seq_b"]) == 2 + + def test_import_mot_nonexistent_raises(ds, tmp_path): importer = AnnotationImporter(ds, "mot", tmp_path / "missing", load_from="disk") with pytest.raises(AnnotationsNotFoundError): @@ -155,11 +170,10 @@ def test_export_mot_directory_structure(ds, tmp_path): result = qr.export_as_mot(download_dir=tmp_path, annotation_field="ann") assert result.exists() - assert result == tmp_path / "labels" / "video.zip" - with zipfile.ZipFile(result, "r") as z: - assert "gt/gt.txt" in z.namelist() - assert "gt/labels.txt" in z.namelist() - assert "seqinfo.ini" in z.namelist() + assert result == tmp_path / "labels" / "video" + assert (result / "gt" / "gt.txt").exists() + assert (result / "gt" / "labels.txt").exists() + assert (result / "seqinfo.ini").exists() def test_export_mot_explicit_dimensions(ds, tmp_path): @@ -168,8 +182,7 @@ def test_export_mot_explicit_dimensions(ds, tmp_path): download_dir=tmp_path, annotation_field="ann", image_width=1280, image_height=720 ) - with zipfile.ZipFile(result, "r") as z: - seqinfo = z.read("seqinfo.ini").decode("utf-8") + seqinfo = (result / "seqinfo.ini").read_text() assert "1280" in seqinfo assert "720" in seqinfo @@ -196,8 +209,8 @@ def test_export_mot_multiple_videos(ds, tmp_path): result = qr.export_as_mot(download_dir=tmp_path, annotation_field="ann") assert result == tmp_path / "labels" - assert (result / "video_0.zip").exists() - assert (result / "video_1.zip").exists() + assert (result / "video_0" / "gt" / "gt.txt").exists() + assert (result / "video_1" / "gt" / "gt.txt").exists() def test_export_mot_passes_video_file_when_dimensions_missing(ds, tmp_path, monkeypatch): From 277f1ad9efb9ab6b449561d99c0acb04e03dc999 Mon Sep 17 00:00:00 2001 From: Dean Date: Sun, 22 Feb 2026 16:21:42 +0200 Subject: [PATCH 18/21] fix keyframe issue as well as visibility misconversion --- dagshub/data_engine/annotation/importer.py | 7 ++++ .../data_engine/annotation_import/test_mot.py | 32 +++++++++++++++++++ 2 files changed, 39 insertions(+) diff --git a/dagshub/data_engine/annotation/importer.py b/dagshub/data_engine/annotation/importer.py index afd4f7ed..e65a764e 100644 --- a/dagshub/data_engine/annotation/importer.py +++ b/dagshub/data_engine/annotation/importer.py @@ -116,11 +116,18 @@ def import_annotations(self) -> Mapping[str, Sequence[IRAnnotationBase]]: mot_kwargs["video_file"] = self.additional_args["video_name"] if annotations_file.is_dir(): video_files = self.additional_args.get("video_files") + raw_datasource_path = self.additional_args.get("datasource_path") + if raw_datasource_path is None: + raw_datasource_path = self.ds.source.source_prefix + datasource_path = PurePosixPath(raw_datasource_path).as_posix().lstrip("/") + if datasource_path == ".": + datasource_path = "" mot_results = load_mot_from_fs( annotations_file, image_width=mot_kwargs.get("image_width"), image_height=mot_kwargs.get("image_height"), video_files=video_files, + datasource_path=datasource_path, ) annotation_dict = self._flatten_mot_fs_annotations(mot_results) elif annotations_file.suffix == ".zip": diff --git a/tests/data_engine/annotation_import/test_mot.py b/tests/data_engine/annotation_import/test_mot.py index 858f9eb6..ea19890a 100644 --- a/tests/data_engine/annotation_import/test_mot.py +++ b/tests/data_engine/annotation_import/test_mot.py @@ -137,6 +137,38 @@ def test_import_mot_from_fs_directory(ds, tmp_path): assert len(result["seq_b"]) == 2 +def test_import_mot_from_fs_passes_datasource_path_from_source_prefix(ds, tmp_path, monkeypatch): + captured = {} + + def _mock_load_mot_from_fs(import_dir, image_width=None, image_height=None, video_files=None, datasource_path=""): + captured["import_dir"] = import_dir + captured["image_width"] = image_width + captured["image_height"] = image_height + captured["video_files"] = video_files + captured["datasource_path"] = datasource_path + return {"seq_a": ({0: [_make_video_bbox(frame=0)]}, object())} + + monkeypatch.setattr("dagshub.data_engine.annotation.importer.load_mot_from_fs", _mock_load_mot_from_fs) + + with patch.object(type(ds.source), "source_prefix", new_callable=PropertyMock, return_value=PurePosixPath("data/videos")): + importer = AnnotationImporter( + ds, + "mot", + tmp_path, + load_from="disk", + image_width=1280, + image_height=720, + video_files={"seq_a": "dummy.mp4"}, + ) + result = importer.import_annotations() + + assert captured["datasource_path"] == "data/videos" + assert captured["video_files"] == {"seq_a": "dummy.mp4"} + assert captured["image_width"] == 1280 + assert captured["image_height"] == 720 + assert "seq_a" in result + + def test_import_mot_nonexistent_raises(ds, tmp_path): importer = AnnotationImporter(ds, "mot", tmp_path / "missing", load_from="disk") with pytest.raises(AnnotationsNotFoundError): From 63b860195513103974c3682ff8bea61e18e7b8ea Mon Sep 17 00:00:00 2001 From: Dean Date: Sun, 22 Feb 2026 17:30:19 +0200 Subject: [PATCH 19/21] Fix linter errors --- dagshub/auth/token_auth.py | 2 +- dagshub/data_engine/annotation/importer.py | 7 +++++-- dagshub/data_engine/model/query_result.py | 5 ++--- tests/data_engine/annotation_import/test_cvat_video.py | 5 ++++- tests/data_engine/annotation_import/test_mot.py | 4 +++- 5 files changed, 15 insertions(+), 8 deletions(-) diff --git a/dagshub/auth/token_auth.py b/dagshub/auth/token_auth.py index 31ec32ac..7ba3a70a 100644 --- a/dagshub/auth/token_auth.py +++ b/dagshub/auth/token_auth.py @@ -37,7 +37,7 @@ def auth_flow(self, request: Request) -> Generator[Request, Response, None]: def can_renegotiate(self): # Env var tokens cannot renegotiate, every other token type can - return not type(self._token) is EnvVarDagshubToken + return type(self._token) is not EnvVarDagshubToken def renegotiate_token(self): if not self._token_storage.is_valid_token(self._token, self._host): diff --git a/dagshub/data_engine/annotation/importer.py b/dagshub/data_engine/annotation/importer.py index e65a764e..c4c86592 100644 --- a/dagshub/data_engine/annotation/importer.py +++ b/dagshub/data_engine/annotation/importer.py @@ -143,7 +143,8 @@ def import_annotations(self) -> Mapping[str, Sequence[IRAnnotationBase]]: if "image_height" in self.additional_args: cvat_kwargs["image_height"] = self.additional_args["image_height"] if annotations_file.is_dir(): - annotation_dict = self._flatten_cvat_fs_annotations(load_cvat_from_fs(annotations_file, **cvat_kwargs)) + raw = load_cvat_from_fs(annotations_file, **cvat_kwargs) + annotation_dict = self._flatten_cvat_fs_annotations(raw) elif annotations_file.suffix == ".zip": result = load_cvat_from_zip(annotations_file, **cvat_kwargs) if self._is_video_annotation_dict(result): @@ -180,7 +181,9 @@ def _flatten_video_annotations( all_anns.extend(frame_anns) return {video_name: all_anns} - def _flatten_cvat_fs_annotations(self, fs_annotations: Mapping[str, object]) -> Dict[str, Sequence[IRAnnotationBase]]: + def _flatten_cvat_fs_annotations( + self, fs_annotations: Mapping[str, object] + ) -> Dict[str, Sequence[IRAnnotationBase]]: flattened: Dict[str, List[IRAnnotationBase]] = {} for rel_path, result in fs_annotations.items(): if not isinstance(result, dict): diff --git a/dagshub/data_engine/model/query_result.py b/dagshub/data_engine/model/query_result.py index d8456127..3c283194 100644 --- a/dagshub/data_engine/model/query_result.py +++ b/dagshub/data_engine/model/query_result.py @@ -5,7 +5,6 @@ import logging import os import os.path -from tempfile import TemporaryDirectory from collections import Counter, defaultdict from concurrent.futures import ThreadPoolExecutor, as_completed from dataclasses import dataclass, field @@ -926,7 +925,6 @@ def export_as_coco( if download_dir is None: download_dir = Path("dagshub_export") download_dir = Path(download_dir) - data_dir = download_dir / "data" annotations = self._get_all_annotations(annotation_field) if not annotations: @@ -1037,7 +1035,8 @@ def export_as_mot( video_file = self._prepare_video_file_for_export(local_download_root, ref_filename) if video_file is None: raise FileNotFoundError( - f"Could not find local downloaded video file for '{ref_filename}' under '{local_download_root}'." + f"Could not find local downloaded video file for '{ref_filename}' " + f"under '{local_download_root}'." ) context = MOTContext() diff --git a/tests/data_engine/annotation_import/test_cvat_video.py b/tests/data_engine/annotation_import/test_cvat_video.py index cd3af914..328ea2c4 100644 --- a/tests/data_engine/annotation_import/test_cvat_video.py +++ b/tests/data_engine/annotation_import/test_cvat_video.py @@ -185,7 +185,10 @@ def _mock_export_cvat_video_to_zip( return output_path monkeypatch.setattr(QueryResult, "download_files", _mock_download_files) - monkeypatch.setattr("dagshub.data_engine.model.query_result.export_cvat_video_to_zip", _mock_export_cvat_video_to_zip) + monkeypatch.setattr( + "dagshub.data_engine.model.query_result.export_cvat_video_to_zip", + _mock_export_cvat_video_to_zip, + ) qr.export_as_cvat_video(download_dir=tmp_path, annotation_field="ann") diff --git a/tests/data_engine/annotation_import/test_mot.py b/tests/data_engine/annotation_import/test_mot.py index f4a8aa57..c4053c34 100644 --- a/tests/data_engine/annotation_import/test_mot.py +++ b/tests/data_engine/annotation_import/test_mot.py @@ -135,7 +135,9 @@ def _mock_load_mot_from_fs(import_dir, image_width=None, image_height=None, vide monkeypatch.setattr("dagshub.data_engine.annotation.importer.load_mot_from_fs", _mock_load_mot_from_fs) - with patch.object(type(ds.source), "source_prefix", new_callable=PropertyMock, return_value=PurePosixPath("data/videos")): + with patch.object( + type(ds.source), "source_prefix", new_callable=PropertyMock, return_value=PurePosixPath("data/videos") + ): importer = AnnotationImporter( ds, "mot", From fb1436f77adba3fcb3bb8408340107ae6957d0c2 Mon Sep 17 00:00:00 2001 From: Dean Date: Sun, 22 Feb 2026 17:54:52 +0200 Subject: [PATCH 20/21] fix broken tests --- .../annotation_import/test_cvat_video.py | 35 ++++++++-- .../data_engine/annotation_import/test_mot.py | 69 ++++++++++++++++++- tests/data_engine/conftest.py | 3 +- tests/mocks/repo_api.py | 4 ++ 4 files changed, 101 insertions(+), 10 deletions(-) diff --git a/tests/data_engine/annotation_import/test_cvat_video.py b/tests/data_engine/annotation_import/test_cvat_video.py index 328ea2c4..3676b82d 100644 --- a/tests/data_engine/annotation_import/test_cvat_video.py +++ b/tests/data_engine/annotation_import/test_cvat_video.py @@ -85,12 +85,19 @@ def test_get_all_video_aggregates_across_datapoints(ds): # --- export_as_cvat_video --- -def test_export_cvat_video_xml(ds, tmp_path): +def test_export_cvat_video_xml(ds, tmp_path, monkeypatch): qr, _ = _make_video_qr(ds) + + def _mock_download_files(self, target_dir, *args, **kwargs): + (target_dir / "video.mp4").parent.mkdir(parents=True, exist_ok=True) + (target_dir / "video.mp4").write_bytes(b"fake") + return target_dir + + monkeypatch.setattr(QueryResult, "download_files", _mock_download_files) result = qr.export_as_cvat_video(download_dir=tmp_path, annotation_field="ann") assert result.exists() - assert result == tmp_path / "labels" / "video.zip" + assert result == tmp_path / "labels" / "video.mp4.zip" with zipfile.ZipFile(result, "r") as z: content = z.read("annotations.xml").decode("utf-8") assert " bytes: def _make_video_qr(ds): dp = Datapoint(datasource=ds, path="video.mp4", datapoint_id=0, metadata={}) anns = [_make_video_bbox(frame=0, track_id=0), _make_video_bbox(frame=5, track_id=0)] + for ann in anns: + ann.filename = "video.mp4" dp.metadata["ann"] = MetadataAnnotations(datapoint=dp, field="ann", annotations=anns) qr = _make_qr(ds, [dp], ann_field="ann") return qr, dp diff --git a/tests/data_engine/annotation_import/test_mot.py b/tests/data_engine/annotation_import/test_mot.py index c4053c34..9070e676 100644 --- a/tests/data_engine/annotation_import/test_mot.py +++ b/tests/data_engine/annotation_import/test_mot.py @@ -184,8 +184,30 @@ def test_convert_video_empty_skipped(ds, tmp_path): # --- export_as_mot --- -def test_export_mot_directory_structure(ds, tmp_path): +def test_export_mot_directory_structure(ds, tmp_path, monkeypatch): qr, _ = _make_video_qr(ds) + + def _mock_download_files(self, target_dir, *args, **kwargs): + (target_dir / "video.mp4").parent.mkdir(parents=True, exist_ok=True) + (target_dir / "video.mp4").write_bytes(b"fake") + return target_dir + + def _mock_export_mot_to_dir(video_annotations, context, output_dir, video_file=None): + output_dir.mkdir(parents=True, exist_ok=True) + (output_dir / "gt").mkdir(parents=True, exist_ok=True) + (output_dir / "gt" / "gt.txt").write_text("") + (output_dir / "gt" / "labels.txt").write_text("person\n") + config = configparser.ConfigParser() + config["Sequence"] = {"imWidth": "1920", "imHeight": "1080"} + with open(output_dir / "seqinfo.ini", "w") as f: + config.write(f) + return output_dir + + monkeypatch.setattr(QueryResult, "download_files", _mock_download_files) + monkeypatch.setattr( + "dagshub.data_engine.model.query_result.export_mot_to_dir", + _mock_export_mot_to_dir, + ) result = qr.export_as_mot(download_dir=tmp_path, annotation_field="ann") assert result.exists() @@ -195,8 +217,27 @@ def test_export_mot_directory_structure(ds, tmp_path): assert (result / "seqinfo.ini").exists() -def test_export_mot_explicit_dimensions(ds, tmp_path): +def test_export_mot_explicit_dimensions(ds, tmp_path, monkeypatch): qr, _ = _make_video_qr(ds) + + def _mock_export_mot_to_dir(video_annotations, context, output_dir, video_file=None): + output_dir.mkdir(parents=True, exist_ok=True) + config = configparser.ConfigParser() + config["Sequence"] = { + "imWidth": str(context.image_width), + "imHeight": str(context.image_height), + } + with open(output_dir / "seqinfo.ini", "w") as f: + config.write(f) + (output_dir / "gt").mkdir(parents=True, exist_ok=True) + (output_dir / "gt" / "gt.txt").write_text("") + (output_dir / "gt" / "labels.txt").write_text("person\n") + return output_dir + + monkeypatch.setattr( + "dagshub.data_engine.model.query_result.export_mot_to_dir", + _mock_export_mot_to_dir, + ) result = qr.export_as_mot( download_dir=tmp_path, annotation_field="ann", image_width=1280, image_height=720 ) @@ -215,7 +256,7 @@ def test_export_mot_no_annotations_raises(ds, tmp_path): qr.export_as_mot(download_dir=tmp_path, annotation_field="ann") -def test_export_mot_multiple_videos(ds, tmp_path): +def test_export_mot_multiple_videos(ds, tmp_path, monkeypatch): dps = [] for i in range(2): dp = Datapoint(datasource=ds, path=f"video_{i}.mp4", datapoint_id=i, metadata={}) @@ -224,6 +265,26 @@ def test_export_mot_multiple_videos(ds, tmp_path): dp.metadata["ann"] = MetadataAnnotations(datapoint=dp, field="ann", annotations=[ann]) dps.append(dp) + def _mock_download_files(self, target_dir, *args, **kwargs): + target_dir.mkdir(parents=True, exist_ok=True) + for i in range(2): + (target_dir / f"video_{i}.mp4").write_bytes(b"fake") + return target_dir + + def _mock_export_mot_sequences_to_dirs(video_annotations, context, labels_dir, video_files=None): + for i in range(2): + seq_dir = labels_dir / f"video_{i}" + seq_dir.mkdir(parents=True, exist_ok=True) + (seq_dir / "gt").mkdir(parents=True, exist_ok=True) + (seq_dir / "gt" / "gt.txt").write_text("") + (seq_dir / "gt" / "labels.txt").write_text("person\n") + return labels_dir + + monkeypatch.setattr(QueryResult, "download_files", _mock_download_files) + monkeypatch.setattr( + "dagshub.data_engine.model.query_result.export_mot_sequences_to_dirs", + _mock_export_mot_sequences_to_dirs, + ) qr = _make_qr(ds, dps, ann_field="ann") result = qr.export_as_mot(download_dir=tmp_path, annotation_field="ann") @@ -303,6 +364,8 @@ def _zip_mot_dir(tmp_path: Path, mot_dir: Path) -> Path: def _make_video_qr(ds): dp = Datapoint(datasource=ds, path="video.mp4", datapoint_id=0, metadata={}) anns = [_make_video_bbox(frame=0, track_id=1), _make_video_bbox(frame=1, track_id=1)] + for ann in anns: + ann.filename = "video.mp4" dp.metadata["ann"] = MetadataAnnotations(datapoint=dp, field="ann", annotations=anns) qr = _make_qr(ds, [dp], ann_field="ann") return qr, dp diff --git a/tests/data_engine/conftest.py b/tests/data_engine/conftest.py index e8f0c70a..e57d1e83 100644 --- a/tests/data_engine/conftest.py +++ b/tests/data_engine/conftest.py @@ -5,7 +5,7 @@ from dagshub.common.api import UserAPI from dagshub.common.api.responses import UserAPIResponse from dagshub.data_engine import datasources -from dagshub.data_engine.client.models import MetadataSelectFieldSchema, PreprocessingStatus +from dagshub.data_engine.client.models import DatasourceType, MetadataSelectFieldSchema, PreprocessingStatus from dagshub.data_engine.model.datapoint import Datapoint from dagshub.data_engine.model.datasource import DatasetState, Datasource from dagshub.data_engine.model.query_result import QueryResult @@ -26,6 +26,7 @@ def other_ds(mocker, mock_dagshub_auth) -> Datasource: def _create_mock_datasource(mocker, id, name) -> Datasource: ds_state = datasources.DatasourceState(id=id, name=name, repo="kirill/repo") + ds_state.source_type = DatasourceType.REPOSITORY ds_state.path = "repo://kirill/repo/data/" ds_state.preprocessing_status = PreprocessingStatus.READY mocker.patch.object(ds_state, "client") diff --git a/tests/mocks/repo_api.py b/tests/mocks/repo_api.py index d457d161..22b6c94c 100644 --- a/tests/mocks/repo_api.py +++ b/tests/mocks/repo_api.py @@ -113,6 +113,10 @@ def generate_content_api_entry(path, is_dir=False, versioning="dvc") -> ContentA def default_branch(self) -> str: return self._default_branch + @property + def id(self) -> int: + return 1 + def get_connected_storages(self) -> List[StorageAPIEntry]: return self.storages From 9716c48ec1b2fb2edbfbd957c8d10b6f620cdac3 Mon Sep 17 00:00:00 2001 From: Dean Date: Sun, 22 Feb 2026 18:02:02 +0200 Subject: [PATCH 21/21] remove personal files from gitignore --- .gitignore | 1 - 1 file changed, 1 deletion(-) diff --git a/.gitignore b/.gitignore index 33c013cc..14b6b327 100644 --- a/.gitignore +++ b/.gitignore @@ -141,4 +141,3 @@ params.yml !dagshub/data_engine/voxel_plugin_server/plugins/dagshub/dist/ scratchpad.ipynb scratchpad/ -my_test.py