From edf8092c58d98c557de8f3446c3a65b092e35c23 Mon Sep 17 00:00:00 2001 From: anthonychen000 Date: Tue, 3 Mar 2026 18:13:54 -0500 Subject: [PATCH 1/4] feat(ingestion): refactor mapillary and osm pipeline into unified module --- .gitignore | 3 +- requirements.txt | 3 + .../run_pipeline.py => src/cli/__init__.py | 0 src/ingestion/__init__.py | 55 +++++++++ src/ingestion/api_client.py | 74 ++++++++++++ src/ingestion/osm_parser.py | 85 ++++++++++++++ src/ingestion/spatial_joiner.py.py | 108 ++++++++++++++++++ src/utils/__init__.py | 0 src/utils/spatial_math.py | 100 ++++++++++++++++ 9 files changed, 427 insertions(+), 1 deletion(-) rename scripts/run_pipeline.py => src/cli/__init__.py (100%) create mode 100644 src/ingestion/api_client.py create mode 100644 src/ingestion/osm_parser.py create mode 100644 src/ingestion/spatial_joiner.py.py create mode 100644 src/utils/__init__.py create mode 100644 src/utils/spatial_math.py diff --git a/.gitignore b/.gitignore index 504b7a0..3fb8e8b 100644 --- a/.gitignore +++ b/.gitignore @@ -5,4 +5,5 @@ venv/ .pytest_cache/ .ruff_cache/ -.DS_Store \ No newline at end of file +.DS_Store +data/* \ No newline at end of file diff --git a/requirements.txt b/requirements.txt index 68fc11a..b4dbee4 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,2 +1,5 @@ ruff pytest +geopandas +shapely +dotenv \ No newline at end of file diff --git a/scripts/run_pipeline.py b/src/cli/__init__.py similarity index 100% rename from scripts/run_pipeline.py rename to src/cli/__init__.py diff --git a/src/ingestion/__init__.py b/src/ingestion/__init__.py index e69de29..e65f458 100644 --- a/src/ingestion/__init__.py +++ b/src/ingestion/__init__.py @@ -0,0 +1,55 @@ +from pathlib import Path +from typing import Dict, Any + +from .api_client import fetch_osm_buildings, fetch_mapillary_metadata +from core.matcher import spatial_join_data + +def run_ingestion(lat: float, lon: float, buffer: float = 0.001, output_dir: str = "data/mapillary") -> Dict[str, Any]: + """ + Runs the end-to-end ingestion pipeline for a given coordinate. + + Args: + lat: Latitude of the center point. + lon: Longitude of the center point. + buffer: Bounding box buffer in degrees. + output_dir: Directory to save downloaded thumbnails. + + Returns: + A dictionary containing joined buildings and mapped images ready for 3D processing. + """ + print(f"--- Starting Ingestion Pipeline for ({lat}, {lon}) ---") + + # 1. Fetch OSM Data + print("Fetching OSM building data...") + osm_raw_json = fetch_osm_buildings(lat, lon, buffer) + if not osm_raw_json.get("elements"): + print("[WARNING] No OSM buildings found in this area.") + + # 2. Fetch Mapillary Metadata + print("Fetching Mapillary image metadata...") + mapillary_raw_list = fetch_mapillary_metadata(lat, lon, buffer) + if not mapillary_raw_list: + print("[WARNING] No Mapillary images found in this area.") + + if not osm_raw_json.get("elements") and not mapillary_raw_list: + print("--- Ingestion Aborted: No data to process ---") + return {"buildings_joined": [], "mapillary_kept": [], "discarded_urls": []} + + # 3. Perform Spatial Join & Scoring + print("Processing spatial join, scoring, and downloading images...") + out_path = Path(output_dir) + out_path.mkdir(parents=True, exist_ok=True) + + results = spatial_join_data( + osm_raw_json=osm_raw_json, + mapillary_raw_list=mapillary_raw_list, + output_dir=out_path + ) + + print(f"--- Ingestion Complete ---") + print(f"Buildings processed: {len(results.get('buildings_joined', []))}") + print(f"Images assigned: {len(results.get('mapillary_kept', []))}") + + return results + +__all__ = ["run_ingestion"] \ No newline at end of file diff --git a/src/ingestion/api_client.py b/src/ingestion/api_client.py new file mode 100644 index 0000000..c3c6dc8 --- /dev/null +++ b/src/ingestion/api_client.py @@ -0,0 +1,74 @@ +import os +import requests +import time +from pathlib import Path +from typing import Dict, Any, List, Optional + +OVERPASS_URL = "https://overpass-api.de/api/interpreter" +MAPILLARY_URL = "https://graph.mapillary.com/images" + +def fetch_osm_buildings(lat: float, lon: float, buffer: float = 0.001, retries: int = 3) -> Dict[str, Any]: + """Queries Overpass API for buildings with retry logic.""" + s, w, n, e = (lat - buffer, lon - buffer, lat + buffer, lon + buffer) + query = f""" + [out:json][timeout:25]; + ( + way["building"]({s},{w},{n},{e}); + relation["building"]({s},{w},{n},{e}); + ); + out body; + >; + out skel qt; + """ + for attempt in range(retries): + try: + print(f"Fetching OSM buildings (Attempt {attempt + 1})...") + response = requests.post(OVERPASS_URL, data={"data": query}, timeout=60) + if response.status_code == 200: + return response.json() + time.sleep((attempt + 1) * 2) + except Exception as e: + print(f"OSM attempt failed: {e}") + return {} + +def fetch_mapillary_metadata(lat: float, lon: float, buffer: float = 0.001, token: str = None) -> List[Dict[str, Any]]: + """Fetches Mapillary metadata including camera parameters and poses.""" + token = token or os.getenv("MAPILLARY_ACCESS_TOKEN") + if not token: + raise ValueError("MAPILLARY_ACCESS_TOKEN missing from environment variables.") + + headers = {"Authorization": f"OAuth {token}"} + s, w, n, e = (lat - buffer, lon - buffer, lat + buffer, lon + buffer) + bbox = f"{w},{s},{e},{n}" + fields = "id,thumb_original_url,computed_geometry,computed_compass_angle,camera_parameters,captured_at,sequence" + url = f"{MAPILLARY_URL}?bbox={bbox}&fields={fields}" + + try: + response = requests.get(url, headers=headers) + if response.status_code == 200: + return response.json().get("data", []) + else: + print(f"[ERROR] Mapillary API {response.status_code}: {response.text}") + except Exception as e: + print(f"[CRITICAL] Mapillary fetch failed: {e}") + return [] + +def download_thumbnail(url: str, image_id: str, output_dir: Path) -> Optional[str]: + """Downloads the thumbnail and returns the local path string.""" + if not url: return None + output_dir.mkdir(parents=True, exist_ok=True) + filename = output_dir / f"{image_id}.jpg" + + if filename.exists(): return str(filename) + + try: + r = requests.get(url, timeout=15) + if r.status_code == 200: + filename.write_bytes(r.content) + time.sleep(0.1) + return str(filename) + else: + print(f"Error {r.status_code} downloading {image_id}") + except Exception as e: + print(f"Failed to download {image_id}: {e}") + return None \ No newline at end of file diff --git a/src/ingestion/osm_parser.py b/src/ingestion/osm_parser.py new file mode 100644 index 0000000..349d078 --- /dev/null +++ b/src/ingestion/osm_parser.py @@ -0,0 +1,85 @@ +from typing import Any, Dict, List, Tuple, Optional +from src.utils.spatial_math import clean_polygon_latlon, polygon_area_m2, point_in_polygon + +def parse_height_m(tags: Dict[str, Any]) -> Optional[float]: + h = tags.get("height") + if isinstance(h, str): + s = h.strip().lower().replace("meters", "m").replace(" ", "") + try: + if s.endswith("m"): s = s[:-1] + return float(s) + except Exception: pass + lv = tags.get("building:levels") + if isinstance(lv, str): + try: return max(1.0, float(lv.strip())) * 3.0 + except Exception: pass + return None + +def _way_nodes_latlon(way: Dict[str, Any], nodes_by_id: Dict[int, Tuple[float, float]]) -> List[Tuple[float, float]]: + return [p for p in (nodes_by_id.get(nid) for nid in (way.get("nodes") or [])) if p and p[0] is not None and p[1] is not None] + +def _stitch_rings_from_way_node_lists(way_node_lists: List[List[Tuple[float, float]]]) -> List[List[Tuple[float, float]]]: + segments = [seg for seg in way_node_lists if seg and len(seg) >= 2] + rings = [] + while segments: + ring = segments.pop(0)[:] + changed = True + while changed and segments: + changed = False + end, start = ring[-1], ring[0] + if len(ring) >= 3 and ring[0] == ring[-1]: break + for i, seg in enumerate(segments): + s0, s1 = seg[0], seg[-1] + if end == s0: ring.extend(seg[1:]); segments.pop(i); changed = True; break + if end == s1: ring.extend(list(reversed(seg[:-1]))); segments.pop(i); changed = True; break + if start == s1: ring = seg[:-1] + ring; segments.pop(i); changed = True; break + if start == s0: ring = list(reversed(seg[1:])) + ring; segments.pop(i); changed = True; break + ring = clean_polygon_latlon(ring) + if len(ring) >= 4 and ring[0] == ring[-1]: rings.append(ring) + return rings + +def _extract_relation_multipolygon_rings(rel: Dict[str, Any], ways_by_id: Dict[int, Dict[str, Any]], nodes_by_id: Dict[int, Tuple[float, float]]) -> Tuple[List[List[Tuple[float, float]]], List[List[Tuple[float, float]]]]: + outer_way_lists, inner_way_lists = [], [] + for m in rel.get("members", []): + if m.get("type") == "way" and (way := ways_by_id.get(m.get("ref"))): + pts = _way_nodes_latlon(way, nodes_by_id) + if len(pts) >= 2: + (inner_way_lists if (m.get("role") or "").strip().lower() == "inner" else outer_way_lists).append(pts) + return (_stitch_rings_from_way_node_lists(outer_way_lists) if outer_way_lists else [], + _stitch_rings_from_way_node_lists(inner_way_lists) if inner_way_lists else []) + +def extract_osm_buildings(overpass_json: Dict[str, Any]) -> List[Dict[str, Any]]: + elements = overpass_json.get("elements", []) + nodes_by_id = {e["id"]: (e["lat"], e["lon"]) for e in elements if e.get("type") == "node" and e.get("id") is not None} + ways_by_id = {e["id"]: e for e in elements if e.get("type") == "way" and e.get("id") is not None} + buildings = [] + + for e in elements: + if e.get("type") != "way" or "building" not in (tags := e.get("tags", {}) or {}): continue + footprint = clean_polygon_latlon(_way_nodes_latlon(e, nodes_by_id)) + if len(footprint) < 4 or polygon_area_m2(footprint) < 5.0: continue + verts = footprint[:-1] + buildings.append({ + "osm_id": f"way/{e.get('id')}", "osm_type": "way", "tags": tags, + "height_m": parse_height_m(tags), "footprint_latlon": footprint, + "holes_latlon": [], "centroid_latlon": (sum(p[0] for p in verts)/len(verts), sum(p[1] for p in verts)/len(verts)) + }) + + for e in elements: + if e.get("type") != "relation" or "building" not in (tags := e.get("tags", {}) or {}): continue + outer_rings, inner_rings = _extract_relation_multipolygon_rings(e, ways_by_id, nodes_by_id) + if not outer_rings: continue + outer_rings_sorted = sorted(outer_rings, key=polygon_area_m2, reverse=True) + primary_outer = outer_rings_sorted[0] + if len(primary_outer) < 4 or polygon_area_m2(primary_outer) < 5.0: continue + + valid_holes = [hole for hole in inner_rings if len(hole) >= 4 and point_in_polygon(sum(p[0] for p in hole[:-1])/len(hole[:-1]), sum(p[1] for p in hole[:-1])/len(hole[:-1]), primary_outer)] + verts = primary_outer[:-1] + buildings.append({ + "osm_id": f"relation/{e.get('id')}", "osm_type": "relation", "tags": tags, + "height_m": parse_height_m(tags), "footprint_latlon": primary_outer, + "holes_latlon": valid_holes, "centroid_latlon": (sum(p[0] for p in verts)/len(verts), sum(p[1] for p in verts)/len(verts)), + "outer_rings_latlon": outer_rings_sorted + }) + + return buildings \ No newline at end of file diff --git a/src/ingestion/spatial_joiner.py.py b/src/ingestion/spatial_joiner.py.py new file mode 100644 index 0000000..28526be --- /dev/null +++ b/src/ingestion/spatial_joiner.py.py @@ -0,0 +1,108 @@ +from dataclasses import dataclass +from typing import Any, Dict, List, Optional, Tuple +from datetime import datetime, timezone +from pathlib import Path + +from src.utils.spatial_math import ( + point_to_segment_distance_m, bearing_deg, wrap_angle_deg, angle_diff_deg, + point_in_polygon_with_holes, haversine_m +) +from src.ingestion.osm_parser import extract_osm_buildings +from src.ingestion.api_client import download_thumbnail + +@dataclass +class EdgeInfo: + distance_m: float + edge_index: int + closest_point_lat: float + closest_point_lon: float + edge_bearing_deg: float + facade_normal_deg: float + +def nearest_edge_info(cam_lat: float, cam_lon: float, polygon_latlon_closed: List[Tuple[float, float]]) -> Optional[EdgeInfo]: + if len(polygon_latlon_closed) < 4: return None + best_d, best_i, best_t = float("inf"), -1, 0.0 + + for i in range(len(polygon_latlon_closed) - 1): + a_lat, a_lon = polygon_latlon_closed[i] + b_lat, b_lon = polygon_latlon_closed[i + 1] + d, t = point_to_segment_distance_m(cam_lat, cam_lon, a_lat, a_lon, b_lat, b_lon) + if d < best_d: best_d, best_i, best_t = d, i, t + + a_lat, a_lon = polygon_latlon_closed[best_i] + b_lat, b_lon = polygon_latlon_closed[best_i + 1] + cp_lat = a_lat + best_t * (b_lat - a_lat) + cp_lon = a_lon + best_t * (b_lon - a_lon) + e_bearing = bearing_deg(a_lat, a_lon, b_lat, b_lon) + + n1, n2 = wrap_angle_deg(e_bearing + 90), wrap_angle_deg(e_bearing - 90) + b_cp_to_cam = bearing_deg(cp_lat, cp_lon, cam_lat, cam_lon) + n = n1 if angle_diff_deg(n1, b_cp_to_cam) <= angle_diff_deg(n2, b_cp_to_cam) else n2 + + return EdgeInfo(best_d, best_i, cp_lat, cp_lon, e_bearing, n) + +def mapillary_pose(img: Dict[str, Any]) -> Optional[Dict[str, Any]]: + coords = img.get("computed_geometry", {}).get("coordinates") + heading = img.get("computed_compass_angle") + if not isinstance(coords, list) or len(coords) < 2 or coords[1] is None or coords[0] is None or heading is None: return None + return {"lat": float(coords[1]), "lon": float(coords[0]), "heading": float(heading), "captured_at": img.get("captured_at")} + +def is_approx_daylight(lat: float, lon: float, captured_at_ms: Optional[int]) -> bool: + if captured_at_ms is None: return True + try: + dt_utc = datetime.fromtimestamp(captured_at_ms / 1000.0, tz=timezone.utc) + return 7.0 <= (dt_utc.hour + (dt_utc.minute / 60.0) + (lon / 15.0)) % 24 <= 19.0 + except Exception: return True + +def score_candidate(distance_m: float, heading_delta_deg: float) -> float: + score = sum([2.0 if distance_m <= 15 else 1.5 if distance_m <= 30 else 1.0 if distance_m <= 50 else 0.5 if distance_m <= 80 else 0.0, + 2.0 if heading_delta_deg <= 15 else 1.5 if heading_delta_deg <= 30 else 1.0 if heading_delta_deg <= 45 else 0.5 if heading_delta_deg <= 60 else 0.0]) + return round(score - min(distance_m / 200.0, 0.6), 4) + +def dynamic_heading_threshold_deg(distance_m: float) -> float: + return 60.0 if distance_m <= 15 else 45.0 if distance_m <= 30 else 30.0 if distance_m <= 50 else 25.0 + +def thin_sequence(images: List[Dict[str, Any]], min_move_m: float = 2.0, min_heading_change_deg: float = 5.0) -> List[Dict[str, Any]]: + kept, last = [], None + for img in images: + if last is None or haversine_m(img["pose"]["lat"], img["pose"]["lon"], last["pose"]["lat"], last["pose"]["lon"]) >= min_move_m or angle_diff_deg(img["pose"].get("heading", 0.0), last["pose"].get("heading", 0.0)) >= min_heading_change_deg: + kept.append(img); last = img + return kept + +def spatial_join_data(osm_raw_json: Dict[str, Any], mapillary_raw_list: List[Dict[str, Any]], output_dir: Path, max_distance_m: float = 50.0) -> Dict[str, Any]: + """Matches raw mapillary data to parsed OSM buildings in memory.""" + buildings = extract_osm_buildings(osm_raw_json) + b_by_id = {b["osm_id"]: {**b, "assigned_images": []} for b in buildings} + kept_images, discarded_urls = [], [] + + for img in mapillary_raw_list: + pose = mapillary_pose(img) + url = img.get("thumb_original_url") + if not url or pose is None or not buildings or not is_approx_daylight(pose["lat"], pose["lon"], pose["captured_at"]): + if url: discarded_urls.append(url) + continue + + candidates = [] + for b in buildings: + if point_in_polygon_with_holes(pose["lat"], pose["lon"], b["footprint_latlon"], b.get("holes_latlon")): continue + if (ei := nearest_edge_info(pose["lat"], pose["lon"], b["footprint_latlon"])) is None: continue + + heading_delta = angle_diff_deg(pose["heading"], bearing_deg(pose["lat"], pose["lon"], ei.closest_point_lat, ei.closest_point_lon)) + candidates.append({ + "osm_id": b["osm_id"], "distance_to_edge_m": round(ei.distance_m, 3), "closest_edge_index": ei.edge_index, + "heading_delta_deg": round(heading_delta, 2), "visibility_score": score_candidate(ei.distance_m, heading_delta) + }) + + candidates.sort(key=lambda c: (-c["visibility_score"], c["distance_to_edge_m"])) + best = candidates[0] if candidates else None + + if best is None or best["distance_to_edge_m"] > max_distance_m or best["heading_delta_deg"] > dynamic_heading_threshold_deg(best["distance_to_edge_m"]): + discarded_urls.append(url) + continue + + local_thumb_path = download_thumbnail(url, img["id"], output_dir) + join_info = {"image_id": img["id"], "url": url, "pose": pose, "best_candidate": best, "assigned_building": best["osm_id"], "local_thumb_path": local_thumb_path} + b_by_id[best["osm_id"]]["assigned_images"].append(join_info) + kept_images.append(join_info) + + return {"buildings_joined": list(b_by_id.values()), "mapillary_kept": kept_images, "discarded_urls": discarded_urls} \ No newline at end of file diff --git a/src/utils/__init__.py b/src/utils/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/src/utils/spatial_math.py b/src/utils/spatial_math.py new file mode 100644 index 0000000..30e1928 --- /dev/null +++ b/src/utils/spatial_math.py @@ -0,0 +1,100 @@ +import math +from typing import List, Tuple, Optional + +def haversine_m(lat1: float, lon1: float, lat2: float, lon2: float) -> float: + R = 6371000.0 + p1, p2 = math.radians(lat1), math.radians(lat2) + dphi = math.radians(lat2 - lat1) + dl = math.radians(lon2 - lon1) + a = math.sin(dphi / 2) ** 2 + math.cos(p1) * math.cos(p2) * math.sin(dl / 2) ** 2 + return 2 * R * math.asin(math.sqrt(a)) + +def bearing_deg(lat1: float, lon1: float, lat2: float, lon2: float) -> float: + phi1, phi2 = math.radians(lat1), math.radians(lat2) + dlon = math.radians(lon2 - lon1) + x = math.sin(dlon) * math.cos(phi2) + y = math.cos(phi1) * math.sin(phi2) - math.sin(phi1) * math.cos(phi2) * math.cos(dlon) + return (math.degrees(math.atan2(x, y)) + 360) % 360 + +def wrap_angle_deg(a: float) -> float: + return (a + 360) % 360 + +def angle_diff_deg(a: float, b: float) -> float: + d = (a - b + 180) % 360 - 180 + return abs(d) + +def meters_per_degree(lat: float) -> Tuple[float, float]: + lat_rad = math.radians(lat) + m_per_deg_lat = 111132.92 + m_per_deg_lon = 111412.84 * math.cos(lat_rad) + return m_per_deg_lat, m_per_deg_lon + +def latlon_to_local_xy(lat: float, lon: float, lat0: float, lon0: float) -> Tuple[float, float]: + mlat, mlon = meters_per_degree(lat0) + x = (lon - lon0) * mlon + y = (lat - lat0) * mlat + return x, y + +def polygon_area_m2(polygon_latlon: List[Tuple[float, float]]) -> float: + if len(polygon_latlon) < 3: return 0.0 + poly = polygon_latlon + if len(poly) >= 4 and poly[0] == poly[-1]: poly = poly[:-1] + if len(poly) < 3: return 0.0 + lat0 = sum(p[0] for p in poly) / len(poly) + lon0 = sum(p[1] for p in poly) / len(poly) + pts = [latlon_to_local_xy(lat, lon, lat0, lon0) for lat, lon in poly] + area2 = 0.0 + for i in range(len(pts)): + x1, y1 = pts[i] + x2, y2 = pts[(i + 1) % len(pts)] + area2 += x1 * y2 - x2 * y1 + return abs(area2) * 0.5 + +def clean_polygon_latlon(poly: List[Tuple[float, float]]) -> List[Tuple[float, float]]: + if not poly: return poly + cleaned = [poly[0]] + for p in poly[1:]: + if p != cleaned[-1]: cleaned.append(p) + if len(cleaned) >= 2 and cleaned[0] == cleaned[-1]: cleaned = cleaned[:-1] + if len(cleaned) >= 3 and cleaned[0] != cleaned[-1]: cleaned.append(cleaned[0]) + return cleaned + +def point_in_polygon(lat: float, lon: float, polygon_latlon: List[Tuple[float, float]]) -> bool: + x, y = lon, lat + inside = False + n = len(polygon_latlon) + if n < 3: return False + for i in range(n - 1): + y1, x1 = polygon_latlon[i] + y2, x2 = polygon_latlon[i + 1] + if (y1 > y) != (y2 > y): + xinters = (x2 - x1) * (y - y1) / ((y2 - y1) + 1e-12) + x1 + if x < xinters: inside = not inside + return inside + +def point_in_polygon_with_holes( + lat: float, lon: float, outer_ring_closed: List[Tuple[float, float]], inner_rings_closed: Optional[List[List[Tuple[float, float]]]] = None +) -> bool: + if not outer_ring_closed or len(outer_ring_closed) < 4: return False + if not point_in_polygon(lat, lon, outer_ring_closed): return False + if inner_rings_closed: + for hole in inner_rings_closed: + if hole and len(hole) >= 4 and point_in_polygon(lat, lon, hole): return False + return True + +def point_to_segment_distance_m( + p_lat: float, p_lon: float, a_lat: float, a_lon: float, b_lat: float, b_lon: float +) -> Tuple[float, float]: + px, py = latlon_to_local_xy(p_lat, p_lon, p_lat, p_lon) + ax, ay = latlon_to_local_xy(a_lat, a_lon, p_lat, p_lon) + bx, by = latlon_to_local_xy(b_lat, b_lon, p_lat, p_lon) + + abx, aby = bx - ax, by - ay + apx, apy = px - ax, py - ay + ab2 = abx * abx + aby * aby + + if ab2 <= 1e-12: return math.hypot(px - ax, py - ay), 0.0 + + t = max(0.0, min(1.0, (apx * abx + apy * aby) / ab2)) + cx, cy = ax + t * abx, ay + t * aby + return math.hypot(px - cx, py - cy), t \ No newline at end of file From 903e316789c8062a920bde0a8dc23824398b97e8 Mon Sep 17 00:00:00 2001 From: anthonychen000 Date: Tue, 3 Mar 2026 18:19:37 -0500 Subject: [PATCH 2/4] Ruff reformat --- src/ingestion/__init__.py | 30 ++--- src/ingestion/api_client.py | 27 +++-- src/ingestion/osm_parser.py | 159 ++++++++++++++++++++------- src/ingestion/spatial_joiner.py.py | 169 +++++++++++++++++++++++------ src/utils/spatial_math.py | 57 +++++++--- 5 files changed, 335 insertions(+), 107 deletions(-) diff --git a/src/ingestion/__init__.py b/src/ingestion/__init__.py index e65f458..dde7eb1 100644 --- a/src/ingestion/__init__.py +++ b/src/ingestion/__init__.py @@ -4,52 +4,54 @@ from .api_client import fetch_osm_buildings, fetch_mapillary_metadata from core.matcher import spatial_join_data -def run_ingestion(lat: float, lon: float, buffer: float = 0.001, output_dir: str = "data/mapillary") -> Dict[str, Any]: + +def run_ingestion( + lat: float, lon: float, buffer: float = 0.001, output_dir: str = "data/mapillary" +) -> Dict[str, Any]: """ Runs the end-to-end ingestion pipeline for a given coordinate. - + Args: lat: Latitude of the center point. lon: Longitude of the center point. buffer: Bounding box buffer in degrees. output_dir: Directory to save downloaded thumbnails. - + Returns: A dictionary containing joined buildings and mapped images ready for 3D processing. """ print(f"--- Starting Ingestion Pipeline for ({lat}, {lon}) ---") - + # 1. Fetch OSM Data print("Fetching OSM building data...") osm_raw_json = fetch_osm_buildings(lat, lon, buffer) if not osm_raw_json.get("elements"): print("[WARNING] No OSM buildings found in this area.") - + # 2. Fetch Mapillary Metadata print("Fetching Mapillary image metadata...") mapillary_raw_list = fetch_mapillary_metadata(lat, lon, buffer) if not mapillary_raw_list: print("[WARNING] No Mapillary images found in this area.") - + if not osm_raw_json.get("elements") and not mapillary_raw_list: print("--- Ingestion Aborted: No data to process ---") return {"buildings_joined": [], "mapillary_kept": [], "discarded_urls": []} - + # 3. Perform Spatial Join & Scoring print("Processing spatial join, scoring, and downloading images...") out_path = Path(output_dir) out_path.mkdir(parents=True, exist_ok=True) - + results = spatial_join_data( - osm_raw_json=osm_raw_json, - mapillary_raw_list=mapillary_raw_list, - output_dir=out_path + osm_raw_json=osm_raw_json, mapillary_raw_list=mapillary_raw_list, output_dir=out_path ) - + print(f"--- Ingestion Complete ---") print(f"Buildings processed: {len(results.get('buildings_joined', []))}") print(f"Images assigned: {len(results.get('mapillary_kept', []))}") - + return results -__all__ = ["run_ingestion"] \ No newline at end of file + +__all__ = ["run_ingestion"] diff --git a/src/ingestion/api_client.py b/src/ingestion/api_client.py index c3c6dc8..2645e2f 100644 --- a/src/ingestion/api_client.py +++ b/src/ingestion/api_client.py @@ -7,7 +7,10 @@ OVERPASS_URL = "https://overpass-api.de/api/interpreter" MAPILLARY_URL = "https://graph.mapillary.com/images" -def fetch_osm_buildings(lat: float, lon: float, buffer: float = 0.001, retries: int = 3) -> Dict[str, Any]: + +def fetch_osm_buildings( + lat: float, lon: float, buffer: float = 0.001, retries: int = 3 +) -> Dict[str, Any]: """Queries Overpass API for buildings with retry logic.""" s, w, n, e = (lat - buffer, lon - buffer, lat + buffer, lon + buffer) query = f""" @@ -31,18 +34,21 @@ def fetch_osm_buildings(lat: float, lon: float, buffer: float = 0.001, retries: print(f"OSM attempt failed: {e}") return {} -def fetch_mapillary_metadata(lat: float, lon: float, buffer: float = 0.001, token: str = None) -> List[Dict[str, Any]]: + +def fetch_mapillary_metadata( + lat: float, lon: float, buffer: float = 0.001, token: str = None +) -> List[Dict[str, Any]]: """Fetches Mapillary metadata including camera parameters and poses.""" token = token or os.getenv("MAPILLARY_ACCESS_TOKEN") if not token: raise ValueError("MAPILLARY_ACCESS_TOKEN missing from environment variables.") - + headers = {"Authorization": f"OAuth {token}"} s, w, n, e = (lat - buffer, lon - buffer, lat + buffer, lon + buffer) bbox = f"{w},{s},{e},{n}" fields = "id,thumb_original_url,computed_geometry,computed_compass_angle,camera_parameters,captured_at,sequence" url = f"{MAPILLARY_URL}?bbox={bbox}&fields={fields}" - + try: response = requests.get(url, headers=headers) if response.status_code == 200: @@ -53,14 +59,17 @@ def fetch_mapillary_metadata(lat: float, lon: float, buffer: float = 0.001, toke print(f"[CRITICAL] Mapillary fetch failed: {e}") return [] + def download_thumbnail(url: str, image_id: str, output_dir: Path) -> Optional[str]: """Downloads the thumbnail and returns the local path string.""" - if not url: return None + if not url: + return None output_dir.mkdir(parents=True, exist_ok=True) filename = output_dir / f"{image_id}.jpg" - - if filename.exists(): return str(filename) - + + if filename.exists(): + return str(filename) + try: r = requests.get(url, timeout=15) if r.status_code == 200: @@ -71,4 +80,4 @@ def download_thumbnail(url: str, image_id: str, output_dir: Path) -> Optional[st print(f"Error {r.status_code} downloading {image_id}") except Exception as e: print(f"Failed to download {image_id}: {e}") - return None \ No newline at end of file + return None diff --git a/src/ingestion/osm_parser.py b/src/ingestion/osm_parser.py index 349d078..ee390aa 100644 --- a/src/ingestion/osm_parser.py +++ b/src/ingestion/osm_parser.py @@ -1,24 +1,39 @@ from typing import Any, Dict, List, Tuple, Optional from src.utils.spatial_math import clean_polygon_latlon, polygon_area_m2, point_in_polygon + def parse_height_m(tags: Dict[str, Any]) -> Optional[float]: h = tags.get("height") if isinstance(h, str): s = h.strip().lower().replace("meters", "m").replace(" ", "") try: - if s.endswith("m"): s = s[:-1] + if s.endswith("m"): + s = s[:-1] return float(s) - except Exception: pass + except Exception: + pass lv = tags.get("building:levels") if isinstance(lv, str): - try: return max(1.0, float(lv.strip())) * 3.0 - except Exception: pass + try: + return max(1.0, float(lv.strip())) * 3.0 + except Exception: + pass return None -def _way_nodes_latlon(way: Dict[str, Any], nodes_by_id: Dict[int, Tuple[float, float]]) -> List[Tuple[float, float]]: - return [p for p in (nodes_by_id.get(nid) for nid in (way.get("nodes") or [])) if p and p[0] is not None and p[1] is not None] -def _stitch_rings_from_way_node_lists(way_node_lists: List[List[Tuple[float, float]]]) -> List[List[Tuple[float, float]]]: +def _way_nodes_latlon( + way: Dict[str, Any], nodes_by_id: Dict[int, Tuple[float, float]] +) -> List[Tuple[float, float]]: + return [ + p + for p in (nodes_by_id.get(nid) for nid in (way.get("nodes") or [])) + if p and p[0] is not None and p[1] is not None + ] + + +def _stitch_rings_from_way_node_lists( + way_node_lists: List[List[Tuple[float, float]]], +) -> List[List[Tuple[float, float]]]: segments = [seg for seg in way_node_lists if seg and len(seg) >= 2] rings = [] while segments: @@ -27,59 +42,127 @@ def _stitch_rings_from_way_node_lists(way_node_lists: List[List[Tuple[float, flo while changed and segments: changed = False end, start = ring[-1], ring[0] - if len(ring) >= 3 and ring[0] == ring[-1]: break + if len(ring) >= 3 and ring[0] == ring[-1]: + break for i, seg in enumerate(segments): s0, s1 = seg[0], seg[-1] - if end == s0: ring.extend(seg[1:]); segments.pop(i); changed = True; break - if end == s1: ring.extend(list(reversed(seg[:-1]))); segments.pop(i); changed = True; break - if start == s1: ring = seg[:-1] + ring; segments.pop(i); changed = True; break - if start == s0: ring = list(reversed(seg[1:])) + ring; segments.pop(i); changed = True; break + if end == s0: + ring.extend(seg[1:]) + segments.pop(i) + changed = True + break + if end == s1: + ring.extend(list(reversed(seg[:-1]))) + segments.pop(i) + changed = True + break + if start == s1: + ring = seg[:-1] + ring + segments.pop(i) + changed = True + break + if start == s0: + ring = list(reversed(seg[1:])) + ring + segments.pop(i) + changed = True + break ring = clean_polygon_latlon(ring) - if len(ring) >= 4 and ring[0] == ring[-1]: rings.append(ring) + if len(ring) >= 4 and ring[0] == ring[-1]: + rings.append(ring) return rings -def _extract_relation_multipolygon_rings(rel: Dict[str, Any], ways_by_id: Dict[int, Dict[str, Any]], nodes_by_id: Dict[int, Tuple[float, float]]) -> Tuple[List[List[Tuple[float, float]]], List[List[Tuple[float, float]]]]: + +def _extract_relation_multipolygon_rings( + rel: Dict[str, Any], + ways_by_id: Dict[int, Dict[str, Any]], + nodes_by_id: Dict[int, Tuple[float, float]], +) -> Tuple[List[List[Tuple[float, float]]], List[List[Tuple[float, float]]]]: outer_way_lists, inner_way_lists = [], [] for m in rel.get("members", []): if m.get("type") == "way" and (way := ways_by_id.get(m.get("ref"))): pts = _way_nodes_latlon(way, nodes_by_id) if len(pts) >= 2: - (inner_way_lists if (m.get("role") or "").strip().lower() == "inner" else outer_way_lists).append(pts) - return (_stitch_rings_from_way_node_lists(outer_way_lists) if outer_way_lists else [], - _stitch_rings_from_way_node_lists(inner_way_lists) if inner_way_lists else []) + ( + inner_way_lists + if (m.get("role") or "").strip().lower() == "inner" + else outer_way_lists + ).append(pts) + return ( + _stitch_rings_from_way_node_lists(outer_way_lists) if outer_way_lists else [], + _stitch_rings_from_way_node_lists(inner_way_lists) if inner_way_lists else [], + ) + def extract_osm_buildings(overpass_json: Dict[str, Any]) -> List[Dict[str, Any]]: elements = overpass_json.get("elements", []) - nodes_by_id = {e["id"]: (e["lat"], e["lon"]) for e in elements if e.get("type") == "node" and e.get("id") is not None} - ways_by_id = {e["id"]: e for e in elements if e.get("type") == "way" and e.get("id") is not None} + nodes_by_id = { + e["id"]: (e["lat"], e["lon"]) + for e in elements + if e.get("type") == "node" and e.get("id") is not None + } + ways_by_id = { + e["id"]: e for e in elements if e.get("type") == "way" and e.get("id") is not None + } buildings = [] for e in elements: - if e.get("type") != "way" or "building" not in (tags := e.get("tags", {}) or {}): continue + if e.get("type") != "way" or "building" not in (tags := e.get("tags", {}) or {}): + continue footprint = clean_polygon_latlon(_way_nodes_latlon(e, nodes_by_id)) - if len(footprint) < 4 or polygon_area_m2(footprint) < 5.0: continue + if len(footprint) < 4 or polygon_area_m2(footprint) < 5.0: + continue verts = footprint[:-1] - buildings.append({ - "osm_id": f"way/{e.get('id')}", "osm_type": "way", "tags": tags, - "height_m": parse_height_m(tags), "footprint_latlon": footprint, - "holes_latlon": [], "centroid_latlon": (sum(p[0] for p in verts)/len(verts), sum(p[1] for p in verts)/len(verts)) - }) + buildings.append( + { + "osm_id": f"way/{e.get('id')}", + "osm_type": "way", + "tags": tags, + "height_m": parse_height_m(tags), + "footprint_latlon": footprint, + "holes_latlon": [], + "centroid_latlon": ( + sum(p[0] for p in verts) / len(verts), + sum(p[1] for p in verts) / len(verts), + ), + } + ) for e in elements: - if e.get("type") != "relation" or "building" not in (tags := e.get("tags", {}) or {}): continue + if e.get("type") != "relation" or "building" not in (tags := e.get("tags", {}) or {}): + continue outer_rings, inner_rings = _extract_relation_multipolygon_rings(e, ways_by_id, nodes_by_id) - if not outer_rings: continue + if not outer_rings: + continue outer_rings_sorted = sorted(outer_rings, key=polygon_area_m2, reverse=True) primary_outer = outer_rings_sorted[0] - if len(primary_outer) < 4 or polygon_area_m2(primary_outer) < 5.0: continue - - valid_holes = [hole for hole in inner_rings if len(hole) >= 4 and point_in_polygon(sum(p[0] for p in hole[:-1])/len(hole[:-1]), sum(p[1] for p in hole[:-1])/len(hole[:-1]), primary_outer)] + if len(primary_outer) < 4 or polygon_area_m2(primary_outer) < 5.0: + continue + + valid_holes = [ + hole + for hole in inner_rings + if len(hole) >= 4 + and point_in_polygon( + sum(p[0] for p in hole[:-1]) / len(hole[:-1]), + sum(p[1] for p in hole[:-1]) / len(hole[:-1]), + primary_outer, + ) + ] verts = primary_outer[:-1] - buildings.append({ - "osm_id": f"relation/{e.get('id')}", "osm_type": "relation", "tags": tags, - "height_m": parse_height_m(tags), "footprint_latlon": primary_outer, - "holes_latlon": valid_holes, "centroid_latlon": (sum(p[0] for p in verts)/len(verts), sum(p[1] for p in verts)/len(verts)), - "outer_rings_latlon": outer_rings_sorted - }) + buildings.append( + { + "osm_id": f"relation/{e.get('id')}", + "osm_type": "relation", + "tags": tags, + "height_m": parse_height_m(tags), + "footprint_latlon": primary_outer, + "holes_latlon": valid_holes, + "centroid_latlon": ( + sum(p[0] for p in verts) / len(verts), + sum(p[1] for p in verts) / len(verts), + ), + "outer_rings_latlon": outer_rings_sorted, + } + ) - return buildings \ No newline at end of file + return buildings diff --git a/src/ingestion/spatial_joiner.py.py b/src/ingestion/spatial_joiner.py.py index 28526be..ec031f1 100644 --- a/src/ingestion/spatial_joiner.py.py +++ b/src/ingestion/spatial_joiner.py.py @@ -4,12 +4,17 @@ from pathlib import Path from src.utils.spatial_math import ( - point_to_segment_distance_m, bearing_deg, wrap_angle_deg, angle_diff_deg, - point_in_polygon_with_holes, haversine_m + point_to_segment_distance_m, + bearing_deg, + wrap_angle_deg, + angle_diff_deg, + point_in_polygon_with_holes, + haversine_m, ) from src.ingestion.osm_parser import extract_osm_buildings from src.ingestion.api_client import download_thumbnail + @dataclass class EdgeInfo: distance_m: float @@ -19,57 +24,126 @@ class EdgeInfo: edge_bearing_deg: float facade_normal_deg: float -def nearest_edge_info(cam_lat: float, cam_lon: float, polygon_latlon_closed: List[Tuple[float, float]]) -> Optional[EdgeInfo]: - if len(polygon_latlon_closed) < 4: return None + +def nearest_edge_info( + cam_lat: float, cam_lon: float, polygon_latlon_closed: List[Tuple[float, float]] +) -> Optional[EdgeInfo]: + if len(polygon_latlon_closed) < 4: + return None best_d, best_i, best_t = float("inf"), -1, 0.0 for i in range(len(polygon_latlon_closed) - 1): a_lat, a_lon = polygon_latlon_closed[i] b_lat, b_lon = polygon_latlon_closed[i + 1] d, t = point_to_segment_distance_m(cam_lat, cam_lon, a_lat, a_lon, b_lat, b_lon) - if d < best_d: best_d, best_i, best_t = d, i, t + if d < best_d: + best_d, best_i, best_t = d, i, t a_lat, a_lon = polygon_latlon_closed[best_i] b_lat, b_lon = polygon_latlon_closed[best_i + 1] cp_lat = a_lat + best_t * (b_lat - a_lat) cp_lon = a_lon + best_t * (b_lon - a_lon) e_bearing = bearing_deg(a_lat, a_lon, b_lat, b_lon) - + n1, n2 = wrap_angle_deg(e_bearing + 90), wrap_angle_deg(e_bearing - 90) b_cp_to_cam = bearing_deg(cp_lat, cp_lon, cam_lat, cam_lon) n = n1 if angle_diff_deg(n1, b_cp_to_cam) <= angle_diff_deg(n2, b_cp_to_cam) else n2 return EdgeInfo(best_d, best_i, cp_lat, cp_lon, e_bearing, n) + def mapillary_pose(img: Dict[str, Any]) -> Optional[Dict[str, Any]]: coords = img.get("computed_geometry", {}).get("coordinates") heading = img.get("computed_compass_angle") - if not isinstance(coords, list) or len(coords) < 2 or coords[1] is None or coords[0] is None or heading is None: return None - return {"lat": float(coords[1]), "lon": float(coords[0]), "heading": float(heading), "captured_at": img.get("captured_at")} + if ( + not isinstance(coords, list) + or len(coords) < 2 + or coords[1] is None + or coords[0] is None + or heading is None + ): + return None + return { + "lat": float(coords[1]), + "lon": float(coords[0]), + "heading": float(heading), + "captured_at": img.get("captured_at"), + } + def is_approx_daylight(lat: float, lon: float, captured_at_ms: Optional[int]) -> bool: - if captured_at_ms is None: return True + if captured_at_ms is None: + return True try: dt_utc = datetime.fromtimestamp(captured_at_ms / 1000.0, tz=timezone.utc) return 7.0 <= (dt_utc.hour + (dt_utc.minute / 60.0) + (lon / 15.0)) % 24 <= 19.0 - except Exception: return True + except Exception: + return True + def score_candidate(distance_m: float, heading_delta_deg: float) -> float: - score = sum([2.0 if distance_m <= 15 else 1.5 if distance_m <= 30 else 1.0 if distance_m <= 50 else 0.5 if distance_m <= 80 else 0.0, - 2.0 if heading_delta_deg <= 15 else 1.5 if heading_delta_deg <= 30 else 1.0 if heading_delta_deg <= 45 else 0.5 if heading_delta_deg <= 60 else 0.0]) + score = sum( + [ + 2.0 + if distance_m <= 15 + else 1.5 + if distance_m <= 30 + else 1.0 + if distance_m <= 50 + else 0.5 + if distance_m <= 80 + else 0.0, + 2.0 + if heading_delta_deg <= 15 + else 1.5 + if heading_delta_deg <= 30 + else 1.0 + if heading_delta_deg <= 45 + else 0.5 + if heading_delta_deg <= 60 + else 0.0, + ] + ) return round(score - min(distance_m / 200.0, 0.6), 4) -def dynamic_heading_threshold_deg(distance_m: float) -> float: - return 60.0 if distance_m <= 15 else 45.0 if distance_m <= 30 else 30.0 if distance_m <= 50 else 25.0 -def thin_sequence(images: List[Dict[str, Any]], min_move_m: float = 2.0, min_heading_change_deg: float = 5.0) -> List[Dict[str, Any]]: +def dynamic_heading_threshold_deg(distance_m: float) -> float: + return ( + 60.0 + if distance_m <= 15 + else 45.0 + if distance_m <= 30 + else 30.0 + if distance_m <= 50 + else 25.0 + ) + + +def thin_sequence( + images: List[Dict[str, Any]], min_move_m: float = 2.0, min_heading_change_deg: float = 5.0 +) -> List[Dict[str, Any]]: kept, last = [], None for img in images: - if last is None or haversine_m(img["pose"]["lat"], img["pose"]["lon"], last["pose"]["lat"], last["pose"]["lon"]) >= min_move_m or angle_diff_deg(img["pose"].get("heading", 0.0), last["pose"].get("heading", 0.0)) >= min_heading_change_deg: - kept.append(img); last = img + if ( + last is None + or haversine_m( + img["pose"]["lat"], img["pose"]["lon"], last["pose"]["lat"], last["pose"]["lon"] + ) + >= min_move_m + or angle_diff_deg(img["pose"].get("heading", 0.0), last["pose"].get("heading", 0.0)) + >= min_heading_change_deg + ): + kept.append(img) + last = img return kept -def spatial_join_data(osm_raw_json: Dict[str, Any], mapillary_raw_list: List[Dict[str, Any]], output_dir: Path, max_distance_m: float = 50.0) -> Dict[str, Any]: + +def spatial_join_data( + osm_raw_json: Dict[str, Any], + mapillary_raw_list: List[Dict[str, Any]], + output_dir: Path, + max_distance_m: float = 50.0, +) -> Dict[str, Any]: """Matches raw mapillary data to parsed OSM buildings in memory.""" buildings = extract_osm_buildings(osm_raw_json) b_by_id = {b["osm_id"]: {**b, "assigned_images": []} for b in buildings} @@ -78,31 +152,64 @@ def spatial_join_data(osm_raw_json: Dict[str, Any], mapillary_raw_list: List[Dic for img in mapillary_raw_list: pose = mapillary_pose(img) url = img.get("thumb_original_url") - if not url or pose is None or not buildings or not is_approx_daylight(pose["lat"], pose["lon"], pose["captured_at"]): - if url: discarded_urls.append(url) + if ( + not url + or pose is None + or not buildings + or not is_approx_daylight(pose["lat"], pose["lon"], pose["captured_at"]) + ): + if url: + discarded_urls.append(url) continue candidates = [] for b in buildings: - if point_in_polygon_with_holes(pose["lat"], pose["lon"], b["footprint_latlon"], b.get("holes_latlon")): continue - if (ei := nearest_edge_info(pose["lat"], pose["lon"], b["footprint_latlon"])) is None: continue - - heading_delta = angle_diff_deg(pose["heading"], bearing_deg(pose["lat"], pose["lon"], ei.closest_point_lat, ei.closest_point_lon)) - candidates.append({ - "osm_id": b["osm_id"], "distance_to_edge_m": round(ei.distance_m, 3), "closest_edge_index": ei.edge_index, - "heading_delta_deg": round(heading_delta, 2), "visibility_score": score_candidate(ei.distance_m, heading_delta) - }) + if point_in_polygon_with_holes( + pose["lat"], pose["lon"], b["footprint_latlon"], b.get("holes_latlon") + ): + continue + if (ei := nearest_edge_info(pose["lat"], pose["lon"], b["footprint_latlon"])) is None: + continue + + heading_delta = angle_diff_deg( + pose["heading"], + bearing_deg(pose["lat"], pose["lon"], ei.closest_point_lat, ei.closest_point_lon), + ) + candidates.append( + { + "osm_id": b["osm_id"], + "distance_to_edge_m": round(ei.distance_m, 3), + "closest_edge_index": ei.edge_index, + "heading_delta_deg": round(heading_delta, 2), + "visibility_score": score_candidate(ei.distance_m, heading_delta), + } + ) candidates.sort(key=lambda c: (-c["visibility_score"], c["distance_to_edge_m"])) best = candidates[0] if candidates else None - if best is None or best["distance_to_edge_m"] > max_distance_m or best["heading_delta_deg"] > dynamic_heading_threshold_deg(best["distance_to_edge_m"]): + if ( + best is None + or best["distance_to_edge_m"] > max_distance_m + or best["heading_delta_deg"] > dynamic_heading_threshold_deg(best["distance_to_edge_m"]) + ): discarded_urls.append(url) continue local_thumb_path = download_thumbnail(url, img["id"], output_dir) - join_info = {"image_id": img["id"], "url": url, "pose": pose, "best_candidate": best, "assigned_building": best["osm_id"], "local_thumb_path": local_thumb_path} + join_info = { + "image_id": img["id"], + "url": url, + "pose": pose, + "best_candidate": best, + "assigned_building": best["osm_id"], + "local_thumb_path": local_thumb_path, + } b_by_id[best["osm_id"]]["assigned_images"].append(join_info) kept_images.append(join_info) - return {"buildings_joined": list(b_by_id.values()), "mapillary_kept": kept_images, "discarded_urls": discarded_urls} \ No newline at end of file + return { + "buildings_joined": list(b_by_id.values()), + "mapillary_kept": kept_images, + "discarded_urls": discarded_urls, + } diff --git a/src/utils/spatial_math.py b/src/utils/spatial_math.py index 30e1928..993d578 100644 --- a/src/utils/spatial_math.py +++ b/src/utils/spatial_math.py @@ -1,6 +1,7 @@ import math from typing import List, Tuple, Optional + def haversine_m(lat1: float, lon1: float, lat2: float, lon2: float) -> float: R = 6371000.0 p1, p2 = math.radians(lat1), math.radians(lat2) @@ -9,6 +10,7 @@ def haversine_m(lat1: float, lon1: float, lat2: float, lon2: float) -> float: a = math.sin(dphi / 2) ** 2 + math.cos(p1) * math.cos(p2) * math.sin(dl / 2) ** 2 return 2 * R * math.asin(math.sqrt(a)) + def bearing_deg(lat1: float, lon1: float, lat2: float, lon2: float) -> float: phi1, phi2 = math.radians(lat1), math.radians(lat2) dlon = math.radians(lon2 - lon1) @@ -16,30 +18,38 @@ def bearing_deg(lat1: float, lon1: float, lat2: float, lon2: float) -> float: y = math.cos(phi1) * math.sin(phi2) - math.sin(phi1) * math.cos(phi2) * math.cos(dlon) return (math.degrees(math.atan2(x, y)) + 360) % 360 + def wrap_angle_deg(a: float) -> float: return (a + 360) % 360 + def angle_diff_deg(a: float, b: float) -> float: d = (a - b + 180) % 360 - 180 return abs(d) + def meters_per_degree(lat: float) -> Tuple[float, float]: lat_rad = math.radians(lat) m_per_deg_lat = 111132.92 m_per_deg_lon = 111412.84 * math.cos(lat_rad) return m_per_deg_lat, m_per_deg_lon + def latlon_to_local_xy(lat: float, lon: float, lat0: float, lon0: float) -> Tuple[float, float]: mlat, mlon = meters_per_degree(lat0) x = (lon - lon0) * mlon y = (lat - lat0) * mlat return x, y + def polygon_area_m2(polygon_latlon: List[Tuple[float, float]]) -> float: - if len(polygon_latlon) < 3: return 0.0 + if len(polygon_latlon) < 3: + return 0.0 poly = polygon_latlon - if len(poly) >= 4 and poly[0] == poly[-1]: poly = poly[:-1] - if len(poly) < 3: return 0.0 + if len(poly) >= 4 and poly[0] == poly[-1]: + poly = poly[:-1] + if len(poly) < 3: + return 0.0 lat0 = sum(p[0] for p in poly) / len(poly) lon0 = sum(p[1] for p in poly) / len(poly) pts = [latlon_to_local_xy(lat, lon, lat0, lon0) for lat, lon in poly] @@ -50,38 +60,54 @@ def polygon_area_m2(polygon_latlon: List[Tuple[float, float]]) -> float: area2 += x1 * y2 - x2 * y1 return abs(area2) * 0.5 + def clean_polygon_latlon(poly: List[Tuple[float, float]]) -> List[Tuple[float, float]]: - if not poly: return poly + if not poly: + return poly cleaned = [poly[0]] for p in poly[1:]: - if p != cleaned[-1]: cleaned.append(p) - if len(cleaned) >= 2 and cleaned[0] == cleaned[-1]: cleaned = cleaned[:-1] - if len(cleaned) >= 3 and cleaned[0] != cleaned[-1]: cleaned.append(cleaned[0]) + if p != cleaned[-1]: + cleaned.append(p) + if len(cleaned) >= 2 and cleaned[0] == cleaned[-1]: + cleaned = cleaned[:-1] + if len(cleaned) >= 3 and cleaned[0] != cleaned[-1]: + cleaned.append(cleaned[0]) return cleaned + def point_in_polygon(lat: float, lon: float, polygon_latlon: List[Tuple[float, float]]) -> bool: x, y = lon, lat inside = False n = len(polygon_latlon) - if n < 3: return False + if n < 3: + return False for i in range(n - 1): y1, x1 = polygon_latlon[i] y2, x2 = polygon_latlon[i + 1] if (y1 > y) != (y2 > y): xinters = (x2 - x1) * (y - y1) / ((y2 - y1) + 1e-12) + x1 - if x < xinters: inside = not inside + if x < xinters: + inside = not inside return inside + def point_in_polygon_with_holes( - lat: float, lon: float, outer_ring_closed: List[Tuple[float, float]], inner_rings_closed: Optional[List[List[Tuple[float, float]]]] = None + lat: float, + lon: float, + outer_ring_closed: List[Tuple[float, float]], + inner_rings_closed: Optional[List[List[Tuple[float, float]]]] = None, ) -> bool: - if not outer_ring_closed or len(outer_ring_closed) < 4: return False - if not point_in_polygon(lat, lon, outer_ring_closed): return False + if not outer_ring_closed or len(outer_ring_closed) < 4: + return False + if not point_in_polygon(lat, lon, outer_ring_closed): + return False if inner_rings_closed: for hole in inner_rings_closed: - if hole and len(hole) >= 4 and point_in_polygon(lat, lon, hole): return False + if hole and len(hole) >= 4 and point_in_polygon(lat, lon, hole): + return False return True + def point_to_segment_distance_m( p_lat: float, p_lon: float, a_lat: float, a_lon: float, b_lat: float, b_lon: float ) -> Tuple[float, float]: @@ -93,8 +119,9 @@ def point_to_segment_distance_m( apx, apy = px - ax, py - ay ab2 = abx * abx + aby * aby - if ab2 <= 1e-12: return math.hypot(px - ax, py - ay), 0.0 + if ab2 <= 1e-12: + return math.hypot(px - ax, py - ay), 0.0 t = max(0.0, min(1.0, (apx * abx + apy * aby) / ab2)) cx, cy = ax + t * abx, ay + t * aby - return math.hypot(px - cx, py - cy), t \ No newline at end of file + return math.hypot(px - cx, py - cy), t From 89de85f56b27de6d7e849e768ebe1e12451aa642 Mon Sep 17 00:00:00 2001 From: anthonychen000 Date: Tue, 3 Mar 2026 18:25:49 -0500 Subject: [PATCH 3/4] style: fix remaining line length limits --- src/ingestion/__init__.py | 39 +++++++++++-------- src/ingestion/api_client.py | 10 +++-- src/ingestion/osm_parser.py | 5 ++- ...spatial_joiner.py.py => spatial_joiner.py} | 14 +++---- src/utils/spatial_math.py | 2 +- 5 files changed, 40 insertions(+), 30 deletions(-) rename src/ingestion/{spatial_joiner.py.py => spatial_joiner.py} (100%) diff --git a/src/ingestion/__init__.py b/src/ingestion/__init__.py index dde7eb1..052405f 100644 --- a/src/ingestion/__init__.py +++ b/src/ingestion/__init__.py @@ -1,57 +1,62 @@ from pathlib import Path -from typing import Dict, Any +from typing import Any, Dict -from .api_client import fetch_osm_buildings, fetch_mapillary_metadata -from core.matcher import spatial_join_data +from posm.src.ingestion.spatial_joiner import spatial_join_data + +from .api_client import fetch_mapillary_metadata, fetch_osm_buildings def run_ingestion( - lat: float, lon: float, buffer: float = 0.001, output_dir: str = "data/mapillary" + lat: float, + lon: float, + buffer: float = 0.001, + output_dir: str = "data/mapillary" ) -> Dict[str, Any]: """ Runs the end-to-end ingestion pipeline for a given coordinate. - + Args: lat: Latitude of the center point. lon: Longitude of the center point. buffer: Bounding box buffer in degrees. output_dir: Directory to save downloaded thumbnails. - + Returns: A dictionary containing joined buildings and mapped images ready for 3D processing. """ print(f"--- Starting Ingestion Pipeline for ({lat}, {lon}) ---") - + # 1. Fetch OSM Data print("Fetching OSM building data...") osm_raw_json = fetch_osm_buildings(lat, lon, buffer) if not osm_raw_json.get("elements"): print("[WARNING] No OSM buildings found in this area.") - + # 2. Fetch Mapillary Metadata print("Fetching Mapillary image metadata...") mapillary_raw_list = fetch_mapillary_metadata(lat, lon, buffer) if not mapillary_raw_list: print("[WARNING] No Mapillary images found in this area.") - + if not osm_raw_json.get("elements") and not mapillary_raw_list: print("--- Ingestion Aborted: No data to process ---") return {"buildings_joined": [], "mapillary_kept": [], "discarded_urls": []} - + # 3. Perform Spatial Join & Scoring print("Processing spatial join, scoring, and downloading images...") out_path = Path(output_dir) out_path.mkdir(parents=True, exist_ok=True) - + results = spatial_join_data( - osm_raw_json=osm_raw_json, mapillary_raw_list=mapillary_raw_list, output_dir=out_path + osm_raw_json=osm_raw_json, + mapillary_raw_list=mapillary_raw_list, + output_dir=out_path ) - - print(f"--- Ingestion Complete ---") + + print("--- Ingestion Complete ---") print(f"Buildings processed: {len(results.get('buildings_joined', []))}") print(f"Images assigned: {len(results.get('mapillary_kept', []))}") - + return results - -__all__ = ["run_ingestion"] +__all__ = ["run_ingestion"] \ No newline at end of file diff --git a/src/ingestion/api_client.py b/src/ingestion/api_client.py index 2645e2f..be1bf53 100644 --- a/src/ingestion/api_client.py +++ b/src/ingestion/api_client.py @@ -1,8 +1,9 @@ import os -import requests import time from pathlib import Path -from typing import Dict, Any, List, Optional +from typing import Any, Dict, List, Optional + +import requests OVERPASS_URL = "https://overpass-api.de/api/interpreter" MAPILLARY_URL = "https://graph.mapillary.com/images" @@ -46,7 +47,10 @@ def fetch_mapillary_metadata( headers = {"Authorization": f"OAuth {token}"} s, w, n, e = (lat - buffer, lon - buffer, lat + buffer, lon + buffer) bbox = f"{w},{s},{e},{n}" - fields = "id,thumb_original_url,computed_geometry,computed_compass_angle,camera_parameters,captured_at,sequence" + fields = ( + "id,thumb_original_url,computed_geometry,computed_compass_angle," + "camera_parameters,captured_at,sequence" + ) url = f"{MAPILLARY_URL}?bbox={bbox}&fields={fields}" try: diff --git a/src/ingestion/osm_parser.py b/src/ingestion/osm_parser.py index ee390aa..aabd1b6 100644 --- a/src/ingestion/osm_parser.py +++ b/src/ingestion/osm_parser.py @@ -1,5 +1,6 @@ -from typing import Any, Dict, List, Tuple, Optional -from src.utils.spatial_math import clean_polygon_latlon, polygon_area_m2, point_in_polygon +from typing import Any, Dict, List, Optional, Tuple + +from src.utils.spatial_math import clean_polygon_latlon, point_in_polygon, polygon_area_m2 def parse_height_m(tags: Dict[str, Any]) -> Optional[float]: diff --git a/src/ingestion/spatial_joiner.py.py b/src/ingestion/spatial_joiner.py similarity index 100% rename from src/ingestion/spatial_joiner.py.py rename to src/ingestion/spatial_joiner.py index ec031f1..84dbd7c 100644 --- a/src/ingestion/spatial_joiner.py.py +++ b/src/ingestion/spatial_joiner.py @@ -1,18 +1,18 @@ from dataclasses import dataclass -from typing import Any, Dict, List, Optional, Tuple from datetime import datetime, timezone from pathlib import Path +from typing import Any, Dict, List, Optional, Tuple +from src.ingestion.api_client import download_thumbnail +from src.ingestion.osm_parser import extract_osm_buildings from src.utils.spatial_math import ( - point_to_segment_distance_m, - bearing_deg, - wrap_angle_deg, angle_diff_deg, - point_in_polygon_with_holes, + bearing_deg, haversine_m, + point_in_polygon_with_holes, + point_to_segment_distance_m, + wrap_angle_deg, ) -from src.ingestion.osm_parser import extract_osm_buildings -from src.ingestion.api_client import download_thumbnail @dataclass diff --git a/src/utils/spatial_math.py b/src/utils/spatial_math.py index 993d578..15adbe9 100644 --- a/src/utils/spatial_math.py +++ b/src/utils/spatial_math.py @@ -1,5 +1,5 @@ import math -from typing import List, Tuple, Optional +from typing import List, Optional, Tuple def haversine_m(lat1: float, lon1: float, lat2: float, lon2: float) -> float: From 5b32c1f63cd0cef10bff40b7270bbbb70e5669fa Mon Sep 17 00:00:00 2001 From: anthonychen000 Date: Tue, 3 Mar 2026 18:27:01 -0500 Subject: [PATCH 4/4] style: apply ruff formatting --- src/ingestion/__init__.py | 30 +++++++++++++----------------- 1 file changed, 13 insertions(+), 17 deletions(-) diff --git a/src/ingestion/__init__.py b/src/ingestion/__init__.py index 052405f..83fa292 100644 --- a/src/ingestion/__init__.py +++ b/src/ingestion/__init__.py @@ -7,56 +7,52 @@ def run_ingestion( - lat: float, - lon: float, - buffer: float = 0.001, - output_dir: str = "data/mapillary" + lat: float, lon: float, buffer: float = 0.001, output_dir: str = "data/mapillary" ) -> Dict[str, Any]: """ Runs the end-to-end ingestion pipeline for a given coordinate. - + Args: lat: Latitude of the center point. lon: Longitude of the center point. buffer: Bounding box buffer in degrees. output_dir: Directory to save downloaded thumbnails. - + Returns: A dictionary containing joined buildings and mapped images ready for 3D processing. """ print(f"--- Starting Ingestion Pipeline for ({lat}, {lon}) ---") - + # 1. Fetch OSM Data print("Fetching OSM building data...") osm_raw_json = fetch_osm_buildings(lat, lon, buffer) if not osm_raw_json.get("elements"): print("[WARNING] No OSM buildings found in this area.") - + # 2. Fetch Mapillary Metadata print("Fetching Mapillary image metadata...") mapillary_raw_list = fetch_mapillary_metadata(lat, lon, buffer) if not mapillary_raw_list: print("[WARNING] No Mapillary images found in this area.") - + if not osm_raw_json.get("elements") and not mapillary_raw_list: print("--- Ingestion Aborted: No data to process ---") return {"buildings_joined": [], "mapillary_kept": [], "discarded_urls": []} - + # 3. Perform Spatial Join & Scoring print("Processing spatial join, scoring, and downloading images...") out_path = Path(output_dir) out_path.mkdir(parents=True, exist_ok=True) - + results = spatial_join_data( - osm_raw_json=osm_raw_json, - mapillary_raw_list=mapillary_raw_list, - output_dir=out_path + osm_raw_json=osm_raw_json, mapillary_raw_list=mapillary_raw_list, output_dir=out_path ) - + print("--- Ingestion Complete ---") print(f"Buildings processed: {len(results.get('buildings_joined', []))}") print(f"Images assigned: {len(results.get('mapillary_kept', []))}") - + return results -__all__ = ["run_ingestion"] \ No newline at end of file + +__all__ = ["run_ingestion"]