Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
37 changes: 24 additions & 13 deletions src/deepforest/utilities.py
Original file line number Diff line number Diff line change
Expand Up @@ -416,7 +416,7 @@ def format_boxes(prediction, scores=True):
return df


def read_coco(json_file):
def read_coco(json_file, mode="box"):
"""Read a COCO format JSON file and return a pandas dataframe.

Args:
Expand All @@ -439,18 +439,29 @@ def read_coco(json_file):
filenames = []
labels = []

# Format reference: https://cocodataset.org/#format-data
for annotation in coco_data["annotations"]:
segmentation = annotation.get("segmentation")
if not segmentation:
continue
# COCO polygons are usually a list of lists; take the first (assume "single part")
segmentation_mask = segmentation[0]
# Convert flat list to coordinate pairs
pairs = [
(segmentation_mask[i], segmentation_mask[i + 1])
for i in range(0, len(segmentation_mask), 2)
]
polygon = shapely.geometry.Polygon(pairs)
if mode == "box":
# COCO bbox format is [x, y, width, height]
x, y, width, height = annotation["bbox"]
# Shapely box format is [minx, miny, maxx, maxy]
polygon = shapely.box(x, y, x + width, y + height)
elif mode == "segm":
# COCO polygons are usually a list of lists; take the first (assume "single part")
segmentation = annotation.get("segmentation")
if not segmentation or len(segmentation) == 0:
continue

segmentation_mask = segmentation[0]
# Convert flat list to coordinate pairs
pairs = [
(segmentation_mask[i], segmentation_mask[i + 1])
for i in range(0, len(segmentation_mask), 2)
]
polygon = shapely.geometry.Polygon(pairs)
elif mode == "keypoint":
raise NotImplementedError

filenames.append(image_ids[annotation["image_id"]])
polygons.append(polygon.wkt)
cat_id = annotation.get("category_id")
Expand Down Expand Up @@ -578,7 +589,7 @@ def read_file(
df = pd.read_csv(input)
gdf = _pandas_to_deepforest_format__(input, df, image_path, root_dir, label)
elif input.endswith(".json"):
df = read_coco(input)
df = read_coco(input, mode="box")
gdf = _pandas_to_deepforest_format__(input, df, image_path, root_dir, label)
elif input.endswith(".xml"):
df = read_pascal_voc(input)
Expand Down
43 changes: 42 additions & 1 deletion tests/test_utilities.py
Original file line number Diff line number Diff line change
Expand Up @@ -558,7 +558,7 @@ def test_image_to_geo_coordinates_polygons():



def test_read_coco_json(tmp_path):
def test_read_coco_json_polygon(tmp_path):
"""Test reading a COCO format JSON file"""
# Create a sample COCO JSON structure
coco_data = {
Expand Down Expand Up @@ -608,6 +608,47 @@ def test_read_coco_json(tmp_path):
assert geom.is_valid
assert isinstance(geom, shapely.geometry.Polygon)

def test_read_coco_json_bbox(tmp_path):
"""Test reading a COCO format JSON file"""
# Create a sample COCO JSON structure
coco_data = {
"images": [
{"id": 1, "file_name": "OSBS_029.png"},
{"id": 2, "file_name": "OSBS_029.tif"}
],
"categories": [
{"id": 0, "name": "Tree"},
{"id": 1, "name": "Bird"}
],
"annotations": [
{
"image_id": 1,
"bbox": [0, 0, 10, 10], # x, y, width, height
"category_id": 0
},
{
"image_id": 2,
"bbox": [5, 5, 10, 10],
"category_id": 1
}
]
}

# Write the sample JSON to a temporary file
json_path = tmp_path / "annotations.json"
with open(json_path, "w") as f:
json.dump(coco_data, f)

# Read the file using our utility
df = utilities.read_file(str(json_path), root_dir=os.path.dirname(get_data("OSBS_029.png")))

# Assert the dataframe has the expected structure
assert df.shape[0] == 2 # Two annotations

# Check bboxes:
expected_boxes = [geometry.box(0, 0, 10, 10), geometry.box(5, 5, 15, 15)]
for geom, expected in zip(df.geometry, expected_boxes):
assert geom.equals(expected)

def test_format_geometry_box():
"""Test formatting box geometry from model predictions"""
Expand Down
Loading