Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -5,4 +5,5 @@
venv/
.pytest_cache/
.ruff_cache/
.DS_Store
.DS_Store
data/*
3 changes: 3 additions & 0 deletions requirements.txt
Original file line number Diff line number Diff line change
@@ -1,2 +1,5 @@
ruff
pytest
geopandas
shapely
dotenv
File renamed without changes.
58 changes: 58 additions & 0 deletions src/ingestion/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
from pathlib import Path
from typing import Any, Dict

from posm.src.ingestion.spatial_joiner import spatial_join_data

from .api_client import fetch_mapillary_metadata, fetch_osm_buildings


def run_ingestion(
lat: float, lon: float, buffer: float = 0.001, output_dir: str = "data/mapillary"
) -> Dict[str, Any]:
"""
Runs the end-to-end ingestion pipeline for a given coordinate.

Args:
lat: Latitude of the center point.
lon: Longitude of the center point.
buffer: Bounding box buffer in degrees.
output_dir: Directory to save downloaded thumbnails.

Returns:
A dictionary containing joined buildings and mapped images ready for 3D processing.
"""
print(f"--- Starting Ingestion Pipeline for ({lat}, {lon}) ---")

# 1. Fetch OSM Data
print("Fetching OSM building data...")
osm_raw_json = fetch_osm_buildings(lat, lon, buffer)
if not osm_raw_json.get("elements"):
print("[WARNING] No OSM buildings found in this area.")

# 2. Fetch Mapillary Metadata
print("Fetching Mapillary image metadata...")
mapillary_raw_list = fetch_mapillary_metadata(lat, lon, buffer)
if not mapillary_raw_list:
print("[WARNING] No Mapillary images found in this area.")

if not osm_raw_json.get("elements") and not mapillary_raw_list:
print("--- Ingestion Aborted: No data to process ---")
return {"buildings_joined": [], "mapillary_kept": [], "discarded_urls": []}

# 3. Perform Spatial Join & Scoring
print("Processing spatial join, scoring, and downloading images...")
out_path = Path(output_dir)
out_path.mkdir(parents=True, exist_ok=True)

results = spatial_join_data(
osm_raw_json=osm_raw_json, mapillary_raw_list=mapillary_raw_list, output_dir=out_path
)

print("--- Ingestion Complete ---")
print(f"Buildings processed: {len(results.get('buildings_joined', []))}")
print(f"Images assigned: {len(results.get('mapillary_kept', []))}")

return results


__all__ = ["run_ingestion"]
87 changes: 87 additions & 0 deletions src/ingestion/api_client.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,87 @@
import os
import time
from pathlib import Path
from typing import Any, Dict, List, Optional

import requests

OVERPASS_URL = "https://overpass-api.de/api/interpreter"
MAPILLARY_URL = "https://graph.mapillary.com/images"


def fetch_osm_buildings(
lat: float, lon: float, buffer: float = 0.001, retries: int = 3
) -> Dict[str, Any]:
"""Queries Overpass API for buildings with retry logic."""
s, w, n, e = (lat - buffer, lon - buffer, lat + buffer, lon + buffer)
query = f"""
[out:json][timeout:25];
(
way["building"]({s},{w},{n},{e});
relation["building"]({s},{w},{n},{e});
);
out body;
>;
out skel qt;
"""
for attempt in range(retries):
try:
print(f"Fetching OSM buildings (Attempt {attempt + 1})...")
response = requests.post(OVERPASS_URL, data={"data": query}, timeout=60)
if response.status_code == 200:
return response.json()
time.sleep((attempt + 1) * 2)
except Exception as e:
print(f"OSM attempt failed: {e}")
return {}


def fetch_mapillary_metadata(
lat: float, lon: float, buffer: float = 0.001, token: str = None
) -> List[Dict[str, Any]]:
"""Fetches Mapillary metadata including camera parameters and poses."""
token = token or os.getenv("MAPILLARY_ACCESS_TOKEN")
if not token:
raise ValueError("MAPILLARY_ACCESS_TOKEN missing from environment variables.")

headers = {"Authorization": f"OAuth {token}"}
s, w, n, e = (lat - buffer, lon - buffer, lat + buffer, lon + buffer)
bbox = f"{w},{s},{e},{n}"
fields = (
"id,thumb_original_url,computed_geometry,computed_compass_angle,"
"camera_parameters,captured_at,sequence"
)
url = f"{MAPILLARY_URL}?bbox={bbox}&fields={fields}"

try:
response = requests.get(url, headers=headers)
if response.status_code == 200:
return response.json().get("data", [])
else:
print(f"[ERROR] Mapillary API {response.status_code}: {response.text}")
except Exception as e:
print(f"[CRITICAL] Mapillary fetch failed: {e}")
return []


def download_thumbnail(url: str, image_id: str, output_dir: Path) -> Optional[str]:
"""Downloads the thumbnail and returns the local path string."""
if not url:
return None
output_dir.mkdir(parents=True, exist_ok=True)
filename = output_dir / f"{image_id}.jpg"

if filename.exists():
return str(filename)

try:
r = requests.get(url, timeout=15)
if r.status_code == 200:
filename.write_bytes(r.content)
time.sleep(0.1)
return str(filename)
else:
print(f"Error {r.status_code} downloading {image_id}")
except Exception as e:
print(f"Failed to download {image_id}: {e}")
return None
169 changes: 169 additions & 0 deletions src/ingestion/osm_parser.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,169 @@
from typing import Any, Dict, List, Optional, Tuple

from src.utils.spatial_math import clean_polygon_latlon, point_in_polygon, polygon_area_m2


def parse_height_m(tags: Dict[str, Any]) -> Optional[float]:
h = tags.get("height")
if isinstance(h, str):
s = h.strip().lower().replace("meters", "m").replace(" ", "")
try:
if s.endswith("m"):
s = s[:-1]
return float(s)
except Exception:
pass
lv = tags.get("building:levels")
if isinstance(lv, str):
try:
return max(1.0, float(lv.strip())) * 3.0
except Exception:
pass
return None


def _way_nodes_latlon(
way: Dict[str, Any], nodes_by_id: Dict[int, Tuple[float, float]]
) -> List[Tuple[float, float]]:
return [
p
for p in (nodes_by_id.get(nid) for nid in (way.get("nodes") or []))
if p and p[0] is not None and p[1] is not None
]


def _stitch_rings_from_way_node_lists(
way_node_lists: List[List[Tuple[float, float]]],
) -> List[List[Tuple[float, float]]]:
segments = [seg for seg in way_node_lists if seg and len(seg) >= 2]
rings = []
while segments:
ring = segments.pop(0)[:]
changed = True
while changed and segments:
changed = False
end, start = ring[-1], ring[0]
if len(ring) >= 3 and ring[0] == ring[-1]:
break
for i, seg in enumerate(segments):
s0, s1 = seg[0], seg[-1]
if end == s0:
ring.extend(seg[1:])
segments.pop(i)
changed = True
break
if end == s1:
ring.extend(list(reversed(seg[:-1])))
segments.pop(i)
changed = True
break
if start == s1:
ring = seg[:-1] + ring
segments.pop(i)
changed = True
break
if start == s0:
ring = list(reversed(seg[1:])) + ring
segments.pop(i)
changed = True
break
ring = clean_polygon_latlon(ring)
if len(ring) >= 4 and ring[0] == ring[-1]:
rings.append(ring)
return rings


def _extract_relation_multipolygon_rings(
rel: Dict[str, Any],
ways_by_id: Dict[int, Dict[str, Any]],
nodes_by_id: Dict[int, Tuple[float, float]],
) -> Tuple[List[List[Tuple[float, float]]], List[List[Tuple[float, float]]]]:
outer_way_lists, inner_way_lists = [], []
for m in rel.get("members", []):
if m.get("type") == "way" and (way := ways_by_id.get(m.get("ref"))):
pts = _way_nodes_latlon(way, nodes_by_id)
if len(pts) >= 2:
(
inner_way_lists
if (m.get("role") or "").strip().lower() == "inner"
else outer_way_lists
).append(pts)
return (
_stitch_rings_from_way_node_lists(outer_way_lists) if outer_way_lists else [],
_stitch_rings_from_way_node_lists(inner_way_lists) if inner_way_lists else [],
)


def extract_osm_buildings(overpass_json: Dict[str, Any]) -> List[Dict[str, Any]]:
elements = overpass_json.get("elements", [])
nodes_by_id = {
e["id"]: (e["lat"], e["lon"])
for e in elements
if e.get("type") == "node" and e.get("id") is not None
}
ways_by_id = {
e["id"]: e for e in elements if e.get("type") == "way" and e.get("id") is not None
}
buildings = []

for e in elements:
if e.get("type") != "way" or "building" not in (tags := e.get("tags", {}) or {}):
continue
footprint = clean_polygon_latlon(_way_nodes_latlon(e, nodes_by_id))
if len(footprint) < 4 or polygon_area_m2(footprint) < 5.0:
continue
verts = footprint[:-1]
buildings.append(
{
"osm_id": f"way/{e.get('id')}",
"osm_type": "way",
"tags": tags,
"height_m": parse_height_m(tags),
"footprint_latlon": footprint,
"holes_latlon": [],
"centroid_latlon": (
sum(p[0] for p in verts) / len(verts),
sum(p[1] for p in verts) / len(verts),
),
}
)

for e in elements:
if e.get("type") != "relation" or "building" not in (tags := e.get("tags", {}) or {}):
continue
outer_rings, inner_rings = _extract_relation_multipolygon_rings(e, ways_by_id, nodes_by_id)
if not outer_rings:
continue
outer_rings_sorted = sorted(outer_rings, key=polygon_area_m2, reverse=True)
primary_outer = outer_rings_sorted[0]
if len(primary_outer) < 4 or polygon_area_m2(primary_outer) < 5.0:
continue

valid_holes = [
hole
for hole in inner_rings
if len(hole) >= 4
and point_in_polygon(
sum(p[0] for p in hole[:-1]) / len(hole[:-1]),
sum(p[1] for p in hole[:-1]) / len(hole[:-1]),
primary_outer,
)
]
verts = primary_outer[:-1]
buildings.append(
{
"osm_id": f"relation/{e.get('id')}",
"osm_type": "relation",
"tags": tags,
"height_m": parse_height_m(tags),
"footprint_latlon": primary_outer,
"holes_latlon": valid_holes,
"centroid_latlon": (
sum(p[0] for p in verts) / len(verts),
sum(p[1] for p in verts) / len(verts),
),
"outer_rings_latlon": outer_rings_sorted,
}
)

return buildings
Loading