Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
285 changes: 163 additions & 122 deletions examples/research/virtual_philadelphia/rasterization_report.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -50,20 +50,18 @@
"import geopandas as gpd\n",
"import pandas as pd\n",
"import numpy as np\n",
"from shapely.geometry import box\n",
"\n",
"from nomad.city_gen import RasterCity\n",
"\n",
"SANDBOX_PATH = Path(\"sandbox/sandbox_data.gpkg\")\n",
"USE_SUBSET = False # Set to True for faster testing\n",
"SUBSET_SIZE = 5000"
"import nomad.map_utils as nm"
]
},
{
"cell_type": "markdown",
"id": "1df10645",
"metadata": {},
"source": [
"## Load Data"
"## Configuration"
]
},
{
Expand All @@ -73,25 +71,38 @@
"metadata": {},
"outputs": [],
"source": [
"buildings = gpd.read_file(SANDBOX_PATH, layer=\"buildings\")\n",
"streets = gpd.read_file(SANDBOX_PATH, layer=\"streets\")\n",
"boundary = gpd.read_file(SANDBOX_PATH, layer=\"boundary\")\n",
"SMALL_BOX = box(-75.1545, 39.946, -75.1425, 39.9535)\n",
"MEDIUM_BOX = box(-75.1665, 39.9385, -75.1425, 39.9535)\n",
"LARGE_BOX = box(-75.1905, 39.9235, -75.1425, 39.9535)\n",
"\n",
"# PHILLY_BOX = SMALL_BOX\n",
"# PHILLY_BOX = MEDIUM_BOX\n",
"PHILLY_BOX = LARGE_BOX\n",
"\n",
"BLOCK_SIDE_LENGTH = 10.0 # meters\n",
"\n",
"if USE_SUBSET:\n",
" buildings = buildings.head(SUBSET_SIZE)\n",
" print(f\"Using subset: {len(buildings):,} buildings\")\n",
"OUTPUT_DIR = Path(\"sandbox\")\n",
"OUTPUT_DIR.mkdir(parents=True, exist_ok=True)\n",
"\n",
"# Determine which box we're using and set appropriate filename\n",
"if PHILLY_BOX == SMALL_BOX:\n",
" BOX_NAME = \"small\"\n",
"elif PHILLY_BOX == MEDIUM_BOX:\n",
" BOX_NAME = \"medium\"\n",
"else:\n",
" print(f\"Full dataset: {len(buildings):,} buildings, {len(streets):,} streets\")"
" BOX_NAME = \"large\"\n",
"\n",
"SANDBOX_GPKG = OUTPUT_DIR / f\"sandbox_data_{BOX_NAME}.gpkg\"\n",
"\n",
"REGENERATE_DATA = False # Set to True to re-download OSM data"
]
},
{
"cell_type": "markdown",
"id": "dafce2a2",
"metadata": {},
"source": [
"## Generate City\n",
"\n",
"Converts vector geometries into discrete grid (block_size = 15 meters)"
"## Benchmark: Data Generation (OSM Download + Rotation)"
]
},
{
Expand All @@ -101,148 +112,178 @@
"metadata": {},
"outputs": [],
"source": [
"t0 = time.time()\n",
"city = RasterCity(boundary.geometry.iloc[0], streets, buildings, block_side_length=15.0)\n",
"gen_time = time.time() - t0\n",
"if REGENERATE_DATA or not SANDBOX_GPKG.exists():\n",
" print(\"=\"*50)\n",
" print(\"DATA GENERATION\")\n",
" print(\"=\"*50)\n",
" \n",
" t0 = time.time()\n",
" buildings = nm.download_osm_buildings(\n",
" PHILLY_BOX,\n",
" crs=\"EPSG:3857\",\n",
" schema=\"garden_city\",\n",
" clip=True,\n",
" infer_building_types=True,\n",
" explode=True,\n",
" )\n",
" download_buildings_time = time.time() - t0\n",
" print(f\"Buildings download: {download_buildings_time:>6.2f}s ({len(buildings):,} buildings)\")\n",
" \n",
" boundary_polygon = gpd.GeoDataFrame(geometry=[PHILLY_BOX], crs=\"EPSG:4326\").to_crs(\"EPSG:3857\").geometry.iloc[0]\n",
" outside_mask = ~buildings.geometry.within(boundary_polygon)\n",
" if outside_mask.any():\n",
" buildings = gpd.clip(buildings, gpd.GeoDataFrame(geometry=[boundary_polygon], crs=\"EPSG:3857\"))\n",
" buildings = nm.remove_overlaps(buildings).reset_index(drop=True)\n",
" \n",
" t1 = time.time()\n",
" streets = nm.download_osm_streets(\n",
" PHILLY_BOX,\n",
" crs=\"EPSG:3857\",\n",
" clip=True,\n",
" explode=True,\n",
" graphml_path=OUTPUT_DIR / \"streets_consolidated.graphml\",\n",
" )\n",
" download_streets_time = time.time() - t1\n",
" print(f\"Streets download: {download_streets_time:>6.2f}s ({len(streets):,} streets)\")\n",
" \n",
" streets = streets.reset_index(drop=True)\n",
" \n",
" t2 = time.time()\n",
" rotated_streets, rotation_deg = nm.rotate_streets_to_align(streets, k=200)\n",
" rotation_time = time.time() - t2\n",
" print(f\"Grid rotation: {rotation_time:>6.2f}s ({rotation_deg:.2f}°)\")\n",
" \n",
" rotated_buildings = nm.rotate(buildings, rotation_deg=rotation_deg)\n",
" rotated_boundary = nm.rotate(\n",
" gpd.GeoDataFrame(geometry=[boundary_polygon], crs=\"EPSG:3857\"),\n",
" rotation_deg=rotation_deg\n",
" )\n",
" \n",
" if SANDBOX_GPKG.exists():\n",
" SANDBOX_GPKG.unlink()\n",
" \n",
" rotated_buildings.to_file(SANDBOX_GPKG, layer=\"buildings\", driver=\"GPKG\")\n",
" rotated_streets.to_file(SANDBOX_GPKG, layer=\"streets\", driver=\"GPKG\", mode=\"a\")\n",
" rotated_boundary.to_file(SANDBOX_GPKG, layer=\"boundary\", driver=\"GPKG\", mode=\"a\")\n",
" \n",
" data_gen_time = download_buildings_time + download_streets_time + rotation_time\n",
" print(\"-\"*50)\n",
" print(f\"Data generation: {data_gen_time:>6.2f}s\")\n",
" print(\"=\"*50 + \"\\n\")\n",
"else:\n",
" print(f\"Loading existing data from {SANDBOX_GPKG}\")\n",
" data_gen_time = 0.0\n",
"\n",
"print(f\"\\nCity generation: {gen_time:.2f}s\")\n",
"print(f\" Blocks: {len(city.blocks_gdf):,}\")\n",
"print(f\" Streets: {len(city.streets_gdf):,}\")\n",
"print(f\" Buildings: {len(city.buildings_gdf):,}\")"
"buildings = gpd.read_file(SANDBOX_GPKG, layer=\"buildings\")\n",
"streets = gpd.read_file(SANDBOX_GPKG, layer=\"streets\")\n",
"boundary = gpd.read_file(SANDBOX_GPKG, layer=\"boundary\")"
]
},
{
"cell_type": "markdown",
"id": "addde79f",
"metadata": {},
"source": [
"## Street Graph\n",
"\n",
"NetworkX graph for pathfinding"
"## Benchmark: Rasterization Pipeline"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "9938250f",
"metadata": {},
"metadata": {
"lines_to_next_cell": 1
},
"outputs": [],
"source": [
"t0 = time.time()\n",
"G = city.get_street_graph()\n",
"graph_time = time.time() - t0\n",
"\n",
"print(f\"\\nStreet graph: {graph_time:.2f}s\")\n",
"print(f\" Nodes: {len(G.nodes):,}\")\n",
"print(f\" Edges: {len(G.edges):,}\")"
]
},
{
"cell_type": "markdown",
"id": "ae69f7de",
"metadata": {},
"source": [
"## Hub Network\n",
"print(\"=\"*50)\n",
"print(\"RASTERIZATION PIPELINE\")\n",
"print(\"=\"*50)\n",
"\n",
"Sparse hub-to-hub distance matrix for efficient routing shortcuts"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "3371ff37-6804-4acb-aad8-86b898467b6c",
"metadata": {},
"outputs": [],
"source": [
"hub_size = 100\n",
"t0 = time.time()\n",
"city._build_hub_network(hub_size=hub_size)\n",
"hub_time = time.time() - t0\n",
"\n",
"print(f\"\\nHub network ({hub_size} hubs): {hub_time:.2f}s\")\n",
"print(f\" Matrix shape: {city.hub_df.shape}\")\n",
"print(f\"\\nSample (first 5x5):\")\n",
"print(city.hub_df.iloc[:5, :5])"
]
},
{
"cell_type": "markdown",
"id": "811bd2d7",
"metadata": {},
"source": [
"## Gravity Matrix\n",
"resolve_overlaps = True\n",
"\n",
"Building-to-building gravity using vectorized Manhattan distances + hub shortcuts"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "4f984958-4cae-4bd0-9ac4-789455c7ed4f",
"metadata": {},
"outputs": [],
"source": [
"t0 = time.time()\n",
"city.compute_gravity(exponent=2.0)\n",
"grav_time = time.time() - t0\n",
"city = RasterCity(boundary.geometry.iloc[0], streets, buildings, block_side_length=BLOCK_SIDE_LENGTH, resolve_overlaps=resolve_overlaps)\n",
"gen_time = time.time() - t0\n",
"print(f\"City generation: {gen_time:>6.2f}s\")\n",
"\n",
"print(f\"\\nGravity matrix: {grav_time:.2f}s\")\n",
"print(f\" Shape: {city.grav.shape}\")\n",
"t1 = time.time()\n",
"G = city.get_street_graph()\n",
"graph_time = time.time() - t1\n",
"print(f\"Street graph: {graph_time:>6.2f}s\")\n",
"\n",
"# Detailed diagnostics\n",
"diag = np.diag(city.grav.values)\n",
"diag_zeros = (diag == 0).all()\n",
"print(f\" Diagonal all zeros: {diag_zeros}\")\n",
"if not diag_zeros:\n",
" nonzero_diag = np.where(diag != 0)[0]\n",
" print(f\" Non-zero diagonal entries: {len(nonzero_diag)}\")\n",
" print(f\" Example diagonal values: {diag[nonzero_diag[:5]]}\")\n",
" print(f\" Corresponding building IDs: {[city.grav.index[i] for i in nonzero_diag[:5]]}\")\n",
"t2 = time.time()\n",
"city._build_hub_network(hub_size=hub_size)\n",
"hub_time = time.time() - t2\n",
"print(f\"Hub network: {hub_time:>6.2f}s\")\n",
"\n",
"t3 = time.time()\n",
"city.compute_gravity(exponent=2.0, callable_only=True)\n",
"grav_time = time.time() - t3\n",
"print(f\"Gravity computation: {grav_time:>6.2f}s\")\n",
"\n",
"mask = ~np.eye(len(city.grav), dtype=bool)\n",
"offdiag = city.grav.values[mask]\n",
"offdiag_positive = (offdiag > 0).all()\n",
"print(f\" Off-diagonal all positive: {offdiag_positive}\")\n",
"if not offdiag_positive:\n",
" nonpositive = np.where(offdiag <= 0)[0]\n",
" print(f\" Non-positive off-diagonal entries: {len(nonpositive)} / {len(offdiag)}\")\n",
" print(f\" Min off-diagonal value: {offdiag.min()}\")\n",
" print(f\" Max off-diagonal value: {offdiag.max()}\")\n",
" # Find a specific example\n",
" rows, cols = np.where((city.grav.values <= 0) & ~np.eye(len(city.grav), dtype=bool))\n",
" if len(rows) > 0:\n",
" print(f\" Example: grav[{city.grav.index[rows[0]]}][{city.grav.columns[cols[0]]}] = {city.grav.iloc[rows[0], cols[0]]}\")\n",
"raster_time = gen_time + graph_time + hub_time + grav_time\n",
"print(\"-\"*50)\n",
"print(f\"Rasterization: {raster_time:>6.2f}s\")\n",
"print(\"=\"*50)\n",
"\n",
"print(f\"\\nSample (first building to 5 others):\")\n",
"print(city.grav.iloc[0, :5])"
"if data_gen_time > 0:\n",
" total_time = data_gen_time + raster_time\n",
" print(f\"\\nTotal (with data): {total_time:>6.2f}s\")"
]
},
{
"cell_type": "markdown",
"id": "81e349ec",
"id": "ae69f7de",
"metadata": {},
"source": [
"## Summary"
"## Summary: City Structure"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "c299b01f-2d47-46bd-bbc4-f48e63f9a83f",
"id": "3371ff37-6804-4acb-aad8-86b898467b6c",
"metadata": {},
"outputs": [],
"source": [
"total_time = gen_time + graph_time + hub_time + grav_time\n",
"print(\"\\n\" + \"=\"*50)\n",
"print(\"TIMING SUMMARY\")\n",
"print(\"=\"*50)\n",
"print(f\"City generation: {gen_time:>6.2f}s\")\n",
"print(f\"Street graph: {graph_time:>6.2f}s\")\n",
"print(f\"Hub network: {hub_time:>6.2f}s\")\n",
"print(f\"Gravity matrix: {grav_time:>6.2f}s\")\n",
"print(\"-\"*50)\n",
"print(f\"Total: {total_time:>6.2f}s\")\n",
"print(\"=\"*50)"
"def get_size_mb(obj):\n",
" \"\"\"Estimate memory size in MB for common objects.\"\"\"\n",
" if isinstance(obj, (pd.DataFrame, gpd.GeoDataFrame)):\n",
" return obj.memory_usage(deep=True).sum() / 1024**2\n",
" elif hasattr(obj, 'nodes') and hasattr(obj, 'edges'): # NetworkX graph\n",
" # Approximate: 64 bytes per node + 96 bytes per edge\n",
" return (len(obj.nodes) * 64 + len(obj.edges) * 96) / 1024**2\n",
" else:\n",
" return 0.0\n",
"\n",
"summary_df = pd.DataFrame({\n",
" 'Component': ['Blocks', 'Streets', 'Buildings', 'Graph Nodes', 'Graph Edges', 'Hub Network', 'Hub Info', 'Nearby Doors', 'Gravity (callable)'],\n",
" 'Count/Shape': [\n",
" f\"{len(city.blocks_gdf):,}\",\n",
" f\"{len(city.streets_gdf):,}\",\n",
" f\"{len(city.buildings_gdf):,}\",\n",
" f\"{len(G.nodes):,}\",\n",
" f\"{len(G.edges):,}\",\n",
" f\"{city.hub_df.shape[0]}×{city.hub_df.shape[1]}\",\n",
" f\"{city.grav_hub_info.shape[0]}×{city.grav_hub_info.shape[1]}\",\n",
" f\"{len(city.mh_dist_nearby_doors):,} pairs\",\n",
" \"function\"\n",
" ],\n",
" 'Memory (MB)': [\n",
" f\"{get_size_mb(city.blocks_gdf):.1f}\",\n",
" f\"{get_size_mb(city.streets_gdf):.1f}\",\n",
" f\"{get_size_mb(city.buildings_gdf):.1f}\",\n",
" f\"{get_size_mb(G):.1f}\",\n",
" \"-\",\n",
" f\"{get_size_mb(city.hub_df):.1f}\",\n",
" f\"{get_size_mb(city.grav_hub_info):.1f}\",\n",
" f\"{get_size_mb(city.mh_dist_nearby_doors):.1f}\",\n",
" \"<0.1\"\n",
" ]\n",
"})\n",
"print(\"\\n\" + summary_df.to_string(index=False))"
]
}
],
Expand All @@ -251,9 +292,9 @@
"formats": "ipynb,py:percent"
},
"kernelspec": {
"display_name": "Python (nomad repo venv)",
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "nomad-repo-venv"
"name": "python3"
},
"language_info": {
"codemirror_mode": {
Expand All @@ -265,7 +306,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.12.3"
"version": "3.10.0"
}
},
"nbformat": 4,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,11 +6,11 @@
# extension: .py
# format_name: percent
# format_version: '1.3'
# jupytext_version: 1.17.3
# jupytext_version: 1.17.1
# kernelspec:
# display_name: Python (nomad repo venv)
# display_name: Python 3 (ipykernel)
# language: python
# name: nomad-repo-venv
# name: python3
# ---

# %% [markdown]
Expand Down
Loading