diff --git a/examples/research/virtual_philadelphia/rasterization_report.ipynb b/examples/research/virtual_philadelphia/rasterization_report.ipynb
index fcd166e..d1cd5a1 100644
--- a/examples/research/virtual_philadelphia/rasterization_report.ipynb
+++ b/examples/research/virtual_philadelphia/rasterization_report.ipynb
@@ -50,12 +50,10 @@
     "import geopandas as gpd\n",
     "import pandas as pd\n",
     "import numpy as np\n",
+    "from shapely.geometry import box\n",
     "\n",
     "from nomad.city_gen import RasterCity\n",
-    "\n",
-    "SANDBOX_PATH = Path(\"sandbox/sandbox_data.gpkg\")\n",
-    "USE_SUBSET = False  # Set to True for faster testing\n",
-    "SUBSET_SIZE = 5000"
+    "import nomad.map_utils as nm"
    ]
   },
   {
@@ -63,7 +61,7 @@
    "id": "1df10645",
    "metadata": {},
    "source": [
-    "## Load Data"
+    "## Configuration"
    ]
   },
   {
@@ -73,15 +71,30 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "buildings = gpd.read_file(SANDBOX_PATH, layer=\"buildings\")\n",
-    "streets = gpd.read_file(SANDBOX_PATH, layer=\"streets\")\n",
-    "boundary = gpd.read_file(SANDBOX_PATH, layer=\"boundary\")\n",
+    "SMALL_BOX = box(-75.1545, 39.946, -75.1425, 39.9535)\n",
+    "MEDIUM_BOX = box(-75.1665, 39.9385, -75.1425, 39.9535)\n",
+    "LARGE_BOX  = box(-75.1905, 39.9235, -75.1425, 39.9535)\n",
+    "\n",
+    "# PHILLY_BOX = SMALL_BOX\n",
+    "# PHILLY_BOX = MEDIUM_BOX\n",
+    "PHILLY_BOX = LARGE_BOX\n",
+    "\n",
+    "BLOCK_SIDE_LENGTH = 10.0  # meters\n",
     "\n",
-    "if USE_SUBSET:\n",
-    "    buildings = buildings.head(SUBSET_SIZE)\n",
-    "    print(f\"Using subset: {len(buildings):,} buildings\")\n",
+    "OUTPUT_DIR = Path(\"sandbox\")\n",
+    "OUTPUT_DIR.mkdir(parents=True, exist_ok=True)\n",
+    "\n",
+    "# Determine which box we're using and set appropriate filename\n",
+    "if PHILLY_BOX == SMALL_BOX:\n",
+    "    BOX_NAME = \"small\"\n",
+    "elif PHILLY_BOX == MEDIUM_BOX:\n",
+    "    BOX_NAME = \"medium\"\n",
     "else:\n",
-    "    print(f\"Full dataset: {len(buildings):,} buildings, {len(streets):,} streets\")"
+    "    BOX_NAME = \"large\"\n",
+    "\n",
+    "SANDBOX_GPKG = OUTPUT_DIR / f\"sandbox_data_{BOX_NAME}.gpkg\"\n",
+    "\n",
+    "REGENERATE_DATA = False  # Set to True to re-download OSM data"
    ]
   },
   {
@@ -89,9 +102,7 @@
    "id": "dafce2a2",
    "metadata": {},
    "source": [
-    "## Generate City\n",
-    "\n",
-    "Converts vector geometries into discrete grid (block_size = 15 meters)"
+    "## Benchmark: Data Generation (OSM Download + Rotation)"
    ]
   },
   {
@@ -101,14 +112,71 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "t0 = time.time()\n",
-    "city = RasterCity(boundary.geometry.iloc[0], streets, buildings, block_side_length=15.0)\n",
-    "gen_time = time.time() - t0\n",
+    "if REGENERATE_DATA or not SANDBOX_GPKG.exists():\n",
+    "    print(\"=\"*50)\n",
+    "    print(\"DATA GENERATION\")\n",
+    "    print(\"=\"*50)\n",
+    "    \n",
+    "    t0 = time.time()\n",
+    "    buildings = nm.download_osm_buildings(\n",
+    "        PHILLY_BOX,\n",
+    "        crs=\"EPSG:3857\",\n",
+    "        schema=\"garden_city\",\n",
+    "        clip=True,\n",
+    "        infer_building_types=True,\n",
+    "        explode=True,\n",
+    "    )\n",
+    "    download_buildings_time = time.time() - t0\n",
+    "    print(f\"Buildings download: {download_buildings_time:>6.2f}s ({len(buildings):,} buildings)\")\n",
+    "    \n",
+    "    boundary_polygon = gpd.GeoDataFrame(geometry=[PHILLY_BOX], crs=\"EPSG:4326\").to_crs(\"EPSG:3857\").geometry.iloc[0]\n",
+    "    outside_mask = ~buildings.geometry.within(boundary_polygon)\n",
+    "    if outside_mask.any():\n",
+    "        buildings = gpd.clip(buildings, gpd.GeoDataFrame(geometry=[boundary_polygon], crs=\"EPSG:3857\"))\n",
+    "    buildings = nm.remove_overlaps(buildings).reset_index(drop=True)\n",
+    "    \n",
+    "    t1 = time.time()\n",
+    "    streets = nm.download_osm_streets(\n",
+    "        PHILLY_BOX,\n",
+    "        crs=\"EPSG:3857\",\n",
+    "        clip=True,\n",
+    "        explode=True,\n",
+    "        graphml_path=OUTPUT_DIR / \"streets_consolidated.graphml\",\n",
+    "    )\n",
+    "    download_streets_time = time.time() - t1\n",
+    "    print(f\"Streets download:   {download_streets_time:>6.2f}s ({len(streets):,} streets)\")\n",
+    "    \n",
+    "    streets = streets.reset_index(drop=True)\n",
+    "    \n",
+    "    t2 = time.time()\n",
+    "    rotated_streets, rotation_deg = nm.rotate_streets_to_align(streets, k=200)\n",
+    "    rotation_time = time.time() - t2\n",
+    "    print(f\"Grid rotation:      {rotation_time:>6.2f}s ({rotation_deg:.2f}°)\")\n",
+    "    \n",
+    "    rotated_buildings = nm.rotate(buildings, rotation_deg=rotation_deg)\n",
+    "    rotated_boundary = nm.rotate(\n",
+    "        gpd.GeoDataFrame(geometry=[boundary_polygon], crs=\"EPSG:3857\"),\n",
+    "        rotation_deg=rotation_deg\n",
+    "    )\n",
+    "    \n",
+    "    if SANDBOX_GPKG.exists():\n",
+    "        SANDBOX_GPKG.unlink()\n",
+    "    \n",
+    "    rotated_buildings.to_file(SANDBOX_GPKG, layer=\"buildings\", driver=\"GPKG\")\n",
+    "    rotated_streets.to_file(SANDBOX_GPKG, layer=\"streets\", driver=\"GPKG\", mode=\"a\")\n",
+    "    rotated_boundary.to_file(SANDBOX_GPKG, layer=\"boundary\", driver=\"GPKG\", mode=\"a\")\n",
+    "    \n",
+    "    data_gen_time = download_buildings_time + download_streets_time + rotation_time\n",
+    "    print(\"-\"*50)\n",
+    "    print(f\"Data generation:    {data_gen_time:>6.2f}s\")\n",
+    "    print(\"=\"*50 + \"\\n\")\n",
+    "else:\n",
+    "    print(f\"Loading existing data from {SANDBOX_GPKG}\")\n",
+    "    data_gen_time = 0.0\n",
     "\n",
-    "print(f\"\\nCity generation: {gen_time:.2f}s\")\n",
-    "print(f\"  Blocks: {len(city.blocks_gdf):,}\")\n",
-    "print(f\"  Streets: {len(city.streets_gdf):,}\")\n",
-    "print(f\"  Buildings: {len(city.buildings_gdf):,}\")"
+    "buildings = gpd.read_file(SANDBOX_GPKG, layer=\"buildings\")\n",
+    "streets = gpd.read_file(SANDBOX_GPKG, layer=\"streets\")\n",
+    "boundary = gpd.read_file(SANDBOX_GPKG, layer=\"boundary\")"
    ]
   },
   {
@@ -116,133 +184,106 @@
    "id": "addde79f",
    "metadata": {},
    "source": [
-    "## Street Graph\n",
-    "\n",
-    "NetworkX graph for pathfinding"
+    "## Benchmark: Rasterization Pipeline"
    ]
   },
   {
    "cell_type": "code",
    "execution_count": null,
    "id": "9938250f",
-   "metadata": {},
+   "metadata": {
+    "lines_to_next_cell": 1
+   },
    "outputs": [],
    "source": [
-    "t0 = time.time()\n",
-    "G = city.get_street_graph()\n",
-    "graph_time = time.time() - t0\n",
-    "\n",
-    "print(f\"\\nStreet graph: {graph_time:.2f}s\")\n",
-    "print(f\"  Nodes: {len(G.nodes):,}\")\n",
-    "print(f\"  Edges: {len(G.edges):,}\")"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "ae69f7de",
-   "metadata": {},
-   "source": [
-    "## Hub Network\n",
+    "print(\"=\"*50)\n",
+    "print(\"RASTERIZATION PIPELINE\")\n",
+    "print(\"=\"*50)\n",
     "\n",
-    "Sparse hub-to-hub distance matrix for efficient routing shortcuts"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "3371ff37-6804-4acb-aad8-86b898467b6c",
-   "metadata": {},
-   "outputs": [],
-   "source": [
     "hub_size = 100\n",
-    "t0 = time.time()\n",
-    "city._build_hub_network(hub_size=hub_size)\n",
-    "hub_time = time.time() - t0\n",
-    "\n",
-    "print(f\"\\nHub network ({hub_size} hubs): {hub_time:.2f}s\")\n",
-    "print(f\"  Matrix shape: {city.hub_df.shape}\")\n",
-    "print(f\"\\nSample (first 5x5):\")\n",
-    "print(city.hub_df.iloc[:5, :5])"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "811bd2d7",
-   "metadata": {},
-   "source": [
-    "## Gravity Matrix\n",
+    "resolve_overlaps = True\n",
     "\n",
-    "Building-to-building gravity using vectorized Manhattan distances + hub shortcuts"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "4f984958-4cae-4bd0-9ac4-789455c7ed4f",
-   "metadata": {},
-   "outputs": [],
-   "source": [
     "t0 = time.time()\n",
-    "city.compute_gravity(exponent=2.0)\n",
-    "grav_time = time.time() - t0\n",
+    "city = RasterCity(boundary.geometry.iloc[0], streets, buildings, block_side_length=BLOCK_SIDE_LENGTH, resolve_overlaps=resolve_overlaps)\n",
+    "gen_time = time.time() - t0\n",
+    "print(f\"City generation:    {gen_time:>6.2f}s\")\n",
     "\n",
-    "print(f\"\\nGravity matrix: {grav_time:.2f}s\")\n",
-    "print(f\"  Shape: {city.grav.shape}\")\n",
+    "t1 = time.time()\n",
+    "G = city.get_street_graph()\n",
+    "graph_time = time.time() - t1\n",
+    "print(f\"Street graph:       {graph_time:>6.2f}s\")\n",
     "\n",
-    "# Detailed diagnostics\n",
-    "diag = np.diag(city.grav.values)\n",
-    "diag_zeros = (diag == 0).all()\n",
-    "print(f\"  Diagonal all zeros: {diag_zeros}\")\n",
-    "if not diag_zeros:\n",
-    "    nonzero_diag = np.where(diag != 0)[0]\n",
-    "    print(f\"    Non-zero diagonal entries: {len(nonzero_diag)}\")\n",
-    "    print(f\"    Example diagonal values: {diag[nonzero_diag[:5]]}\")\n",
-    "    print(f\"    Corresponding building IDs: {[city.grav.index[i] for i in nonzero_diag[:5]]}\")\n",
+    "t2 = time.time()\n",
+    "city._build_hub_network(hub_size=hub_size)\n",
+    "hub_time = time.time() - t2\n",
+    "print(f\"Hub network:        {hub_time:>6.2f}s\")\n",
+    "\n",
+    "t3 = time.time()\n",
+    "city.compute_gravity(exponent=2.0, callable_only=True)\n",
+    "grav_time = time.time() - t3\n",
+    "print(f\"Gravity computation: {grav_time:>6.2f}s\")\n",
     "\n",
-    "mask = ~np.eye(len(city.grav), dtype=bool)\n",
-    "offdiag = city.grav.values[mask]\n",
-    "offdiag_positive = (offdiag > 0).all()\n",
-    "print(f\"  Off-diagonal all positive: {offdiag_positive}\")\n",
-    "if not offdiag_positive:\n",
-    "    nonpositive = np.where(offdiag <= 0)[0]\n",
-    "    print(f\"    Non-positive off-diagonal entries: {len(nonpositive)} / {len(offdiag)}\")\n",
-    "    print(f\"    Min off-diagonal value: {offdiag.min()}\")\n",
-    "    print(f\"    Max off-diagonal value: {offdiag.max()}\")\n",
-    "    # Find a specific example\n",
-    "    rows, cols = np.where((city.grav.values <= 0) & ~np.eye(len(city.grav), dtype=bool))\n",
-    "    if len(rows) > 0:\n",
-    "        print(f\"    Example: grav[{city.grav.index[rows[0]]}][{city.grav.columns[cols[0]]}] = {city.grav.iloc[rows[0], cols[0]]}\")\n",
+    "raster_time = gen_time + graph_time + hub_time + grav_time\n",
+    "print(\"-\"*50)\n",
+    "print(f\"Rasterization:      {raster_time:>6.2f}s\")\n",
+    "print(\"=\"*50)\n",
     "\n",
-    "print(f\"\\nSample (first building to 5 others):\")\n",
-    "print(city.grav.iloc[0, :5])"
+    "if data_gen_time > 0:\n",
+    "    total_time = data_gen_time + raster_time\n",
+    "    print(f\"\\nTotal (with data):  {total_time:>6.2f}s\")"
    ]
   },
   {
    "cell_type": "markdown",
-   "id": "81e349ec",
+   "id": "ae69f7de",
    "metadata": {},
    "source": [
-    "## Summary"
+    "## Summary: City Structure"
    ]
   },
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "c299b01f-2d47-46bd-bbc4-f48e63f9a83f",
+   "id": "3371ff37-6804-4acb-aad8-86b898467b6c",
    "metadata": {},
    "outputs": [],
    "source": [
-    "total_time = gen_time + graph_time + hub_time + grav_time\n",
-    "print(\"\\n\" + \"=\"*50)\n",
-    "print(\"TIMING SUMMARY\")\n",
-    "print(\"=\"*50)\n",
-    "print(f\"City generation:    {gen_time:>6.2f}s\")\n",
-    "print(f\"Street graph:       {graph_time:>6.2f}s\")\n",
-    "print(f\"Hub network:        {hub_time:>6.2f}s\")\n",
-    "print(f\"Gravity matrix:     {grav_time:>6.2f}s\")\n",
-    "print(\"-\"*50)\n",
-    "print(f\"Total:              {total_time:>6.2f}s\")\n",
-    "print(\"=\"*50)"
+    "def get_size_mb(obj):\n",
+    "    \"\"\"Estimate memory size in MB for common objects.\"\"\"\n",
+    "    if isinstance(obj, (pd.DataFrame, gpd.GeoDataFrame)):\n",
+    "        return obj.memory_usage(deep=True).sum() / 1024**2\n",
+    "    elif hasattr(obj, 'nodes') and hasattr(obj, 'edges'):  # NetworkX graph\n",
+    "        # Approximate: 64 bytes per node + 96 bytes per edge\n",
+    "        return (len(obj.nodes) * 64 + len(obj.edges) * 96) / 1024**2\n",
+    "    else:\n",
+    "        return 0.0\n",
+    "\n",
+    "summary_df = pd.DataFrame({\n",
+    "    'Component': ['Blocks', 'Streets', 'Buildings', 'Graph Nodes', 'Graph Edges', 'Hub Network', 'Hub Info', 'Nearby Doors', 'Gravity (callable)'],\n",
+    "    'Count/Shape': [\n",
+    "        f\"{len(city.blocks_gdf):,}\",\n",
+    "        f\"{len(city.streets_gdf):,}\",\n",
+    "        f\"{len(city.buildings_gdf):,}\",\n",
+    "        f\"{len(G.nodes):,}\",\n",
+    "        f\"{len(G.edges):,}\",\n",
+    "        f\"{city.hub_df.shape[0]}×{city.hub_df.shape[1]}\",\n",
+    "        f\"{city.grav_hub_info.shape[0]}×{city.grav_hub_info.shape[1]}\",\n",
+    "        f\"{len(city.mh_dist_nearby_doors):,} pairs\",\n",
+    "        \"function\"\n",
+    "    ],\n",
+    "    'Memory (MB)': [\n",
+    "        f\"{get_size_mb(city.blocks_gdf):.1f}\",\n",
+    "        f\"{get_size_mb(city.streets_gdf):.1f}\",\n",
+    "        f\"{get_size_mb(city.buildings_gdf):.1f}\",\n",
+    "        f\"{get_size_mb(G):.1f}\",\n",
+    "        \"-\",\n",
+    "        f\"{get_size_mb(city.hub_df):.1f}\",\n",
+    "        f\"{get_size_mb(city.grav_hub_info):.1f}\",\n",
+    "        f\"{get_size_mb(city.mh_dist_nearby_doors):.1f}\",\n",
+    "        \"<0.1\"\n",
+    "    ]\n",
+    "})\n",
+    "print(\"\\n\" + summary_df.to_string(index=False))"
    ]
   }
  ],
@@ -251,9 +292,9 @@
    "formats": "ipynb,py:percent"
   },
   "kernelspec": {
-   "display_name": "Python (nomad repo venv)",
+   "display_name": "Python 3 (ipykernel)",
    "language": "python",
-   "name": "nomad-repo-venv"
+   "name": "python3"
   },
   "language_info": {
    "codemirror_mode": {
@@ -265,7 +306,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.12.3"
+   "version": "3.10.0"
   }
  },
  "nbformat": 4,
diff --git a/examples/research/virtual_philadelphia/rasterization_report.py b/examples/research/virtual_philadelphia/rasterization_report.py
index 410e4a0..ced7ca0 100644
--- a/examples/research/virtual_philadelphia/rasterization_report.py
+++ b/examples/research/virtual_philadelphia/rasterization_report.py
@@ -6,11 +6,11 @@
 #       extension: .py
 #       format_name: percent
 #       format_version: '1.3'
-#       jupytext_version: 1.17.3
+#       jupytext_version: 1.17.1
 #   kernelspec:
-#     display_name: Python (nomad repo venv)
+#     display_name: Python 3 (ipykernel)
 #     language: python
-#     name: nomad-repo-venv
+#     name: python3
 # ---
 
 # %% [markdown]
diff --git a/examples/research/virtual_philadelphia/sample_step_benchmark.ipynb b/examples/research/virtual_philadelphia/sample_step_benchmark.ipynb
new file mode 100644
index 0000000..36781d9
--- /dev/null
+++ b/examples/research/virtual_philadelphia/sample_step_benchmark.ipynb
@@ -0,0 +1,326 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "id": "bb9c09b0",
+   "metadata": {},
+   "source": [
+    "# Benchmark: `_sample_step` Performance Analysis\n",
+    "\n",
+    "## Goals\n",
+    "1. Benchmark repeated `get_shortest_path()` calls (callable mode)\n",
+    "2. Profile trajectory generation to identify bottlenecks\n",
+    "3. Test optimization strategies"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "abe477f0",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import time\n",
+    "import numpy as np\n",
+    "import pandas as pd\n",
+    "import geopandas as gpd\n",
+    "from pathlib import Path\n",
+    "\n",
+    "from nomad.city_gen import RasterCity\n",
+    "from nomad.traj_gen import Population"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "40f4fcb9",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Configuration\n",
+    "BOX_SIZE = 'small'  # 'small', 'medium', or 'large'\n",
+    "BLOCK_SIDE_LENGTH = 10.0\n",
+    "HUB_SIZE = 100\n",
+    "MAX_MANHATTAN_DIST = 30\n",
+    "NUM_PATH_QUERIES = 100\n",
+    "SIMULATION_HOURS = 24\n",
+    "DT = 0.5  # minutes\n",
+    "EPR_TIME_RES = 15\n",
+    "RHO = 0.4\n",
+    "GAMMA = 0.3\n",
+    "SEED = 42"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "d4c9cd03",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "print(\"=\"*60)\n",
+    "print(f\"SETUP: {BOX_SIZE.upper()} BOX\")\n",
+    "print(\"=\"*60)\n",
+    "\n",
+    "# Load OSM data and rasterize\n",
+    "data_dir = Path(\"sandbox\")\n",
+    "osm_path = data_dir / f\"sandbox_data_{BOX_SIZE}.gpkg\"\n",
+    "\n",
+    "if not osm_path.exists():\n",
+    "    raise FileNotFoundError(f\"OSM data not found at {osm_path}. Run rasterization_report.py first.\")\n",
+    "\n",
+    "print(f\"Loading OSM data...\")\n",
+    "buildings = gpd.read_file(osm_path, layer=\"buildings\")\n",
+    "streets = gpd.read_file(osm_path, layer=\"streets\")\n",
+    "boundary = gpd.read_file(osm_path, layer=\"boundary\")\n",
+    "print(f\"  Buildings: {len(buildings):,}\")\n",
+    "print(f\"  Streets: {len(streets):,}\")\n",
+    "\n",
+    "print(f\"\\nRasterizing city...\")\n",
+    "t0 = time.time()\n",
+    "city = RasterCity(\n",
+    "    boundary.geometry.iloc[0],\n",
+    "    streets,\n",
+    "    buildings,\n",
+    "    block_side_length=BLOCK_SIDE_LENGTH,\n",
+    "    resolve_overlaps=True,\n",
+    "    verbose=False\n",
+    ")\n",
+    "print(f\"  Rasterization: {time.time()-t0:.2f}s\")\n",
+    "print(f\"  Buildings added: {len(city.buildings_gdf):,}\")\n",
+    "print(f\"  Street blocks: {len(city.streets_gdf):,}\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "23d7576b",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "print(\"\\nBuilding street graph...\")\n",
+    "t0 = time.time()\n",
+    "G = city.get_street_graph()\n",
+    "print(f\"  Street graph: {time.time()-t0:.2f}s\")\n",
+    "print(f\"  Nodes: {G.number_of_nodes():,}\")\n",
+    "print(f\"  Edges: {G.number_of_edges():,}\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "87083b3e",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "print(\"\\nBuilding hub network...\")\n",
+    "t0 = time.time()\n",
+    "city._build_hub_network(hub_size=HUB_SIZE)\n",
+    "print(f\"  Hub network: {time.time()-t0:.2f}s\")\n",
+    "print(f\"  Hubs: {len(city.hubs):,}\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "44059656",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "print(\"\\nComputing gravity (callable mode)...\")\n",
+    "t0 = time.time()\n",
+    "city.compute_gravity(exponent=2.0, callable_only=True)\n",
+    "print(f\"  Gravity computation: {time.time()-t0:.2f}s\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "d0936f95",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "print(\"\\nComputing shortest paths (callable mode)...\")\n",
+    "t0 = time.time()\n",
+    "city.compute_shortest_paths(callable_only=True)\n",
+    "print(f\"  Shortest paths: {time.time()-t0:.2f}s\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "3fa29dff",
+   "metadata": {},
+   "source": [
+    "## Benchmark 1: Repeated `get_shortest_path()` Calls"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "53adef37",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "print(\"\\n\" + \"=\"*60)\n",
+    "print(\"BENCHMARK 1: get_shortest_path() Performance\")\n",
+    "print(\"=\"*60)\n",
+    "\n",
+    "# Find valid street block pairs with manhattan distance < MAX_MANHATTAN_DIST\n",
+    "streets_list = list(city.streets_gdf.index)\n",
+    "valid_pairs = []\n",
+    "\n",
+    "print(f\"Finding {NUM_PATH_QUERIES} pairs with Manhattan distance < {MAX_MANHATTAN_DIST}...\")\n",
+    "rng = np.random.default_rng(42)\n",
+    "\n",
+    "while len(valid_pairs) < NUM_PATH_QUERIES:\n",
+    "    i = rng.integers(0, len(streets_list))\n",
+    "    j = rng.integers(0, len(streets_list))\n",
+    "    if i == j:\n",
+    "        continue\n",
+    "    \n",
+    "    start = streets_list[i]\n",
+    "    end = streets_list[j]\n",
+    "    manhattan_dist = abs(start[0] - end[0]) + abs(start[1] - end[1])\n",
+    "    \n",
+    "    if manhattan_dist < MAX_MANHATTAN_DIST:\n",
+    "        valid_pairs.append((start, end))\n",
+    "\n",
+    "print(f\"Found {len(valid_pairs)} valid pairs\")\n",
+    "\n",
+    "# Benchmark\n",
+    "times = []\n",
+    "path_lengths = []\n",
+    "\n",
+    "print(f\"\\nBenchmarking {NUM_PATH_QUERIES} get_shortest_path() calls...\")\n",
+    "for start, end in valid_pairs:\n",
+    "    t0 = time.time()\n",
+    "    path = city.get_shortest_path(start, end)\n",
+    "    elapsed = time.time() - t0\n",
+    "    times.append(elapsed)\n",
+    "    path_lengths.append(len(path) if path else 0)\n",
+    "\n",
+    "times = np.array(times) * 1000  # Convert to milliseconds\n",
+    "path_lengths = np.array(path_lengths)\n",
+    "\n",
+    "print(\"\\nResults:\")\n",
+    "print(f\"  Total queries: {len(times)}\")\n",
+    "print(f\"  Mean time: {times.mean():.2f} ms\")\n",
+    "print(f\"  Median time: {np.median(times):.2f} ms\")\n",
+    "print(f\"  Std time: {times.std():.2f} ms\")\n",
+    "print(f\"  Min time: {times.min():.2f} ms\")\n",
+    "print(f\"  Max time: {times.max():.2f} ms\")\n",
+    "print(f\"  Mean path length: {path_lengths.mean():.1f} blocks\")\n",
+    "print(f\"  Median path length: {np.median(path_lengths):.1f} blocks\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "4cd8b5e6",
+   "metadata": {},
+   "source": [
+    "## Benchmark 2: Destination Diary Generation"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "c0ae1bfa",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "print(\"\\n\" + \"=\"*60)\n",
+    "print(\"BENCHMARK 2: Destination Diary Generation\")\n",
+    "print(\"=\"*60)\n",
+    "\n",
+    "population = Population(city)\n",
+    "population.generate_agents(\n",
+    "    N=1,\n",
+    "    seed=SEED,\n",
+    "    name_count=1,\n",
+    "    datetimes=\"2024-01-01 00:00-05:00\"\n",
+    ")\n",
+    "\n",
+    "agent = list(population.roster.values())[0]\n",
+    "end_time = pd.Timestamp(\"2024-01-01 00:00-05:00\") + pd.Timedelta(hours=SIMULATION_HOURS)\n",
+    "\n",
+    "print(f\"\\nGenerating {SIMULATION_HOURS}-hour destination diary...\")\n",
+    "print(f\"  Start: {agent.last_ping['datetime']}\")\n",
+    "print(f\"  End: {end_time}\")\n",
+    "\n",
+    "t0 = time.time()\n",
+    "agent.generate_dest_diary(\n",
+    "    end_time=end_time,\n",
+    "    epr_time_res=EPR_TIME_RES,\n",
+    "    rho=RHO,\n",
+    "    gamma=GAMMA,\n",
+    "    seed=SEED\n",
+    ")\n",
+    "elapsed = time.time() - t0\n",
+    "\n",
+    "print(f\"\\nResults:\")\n",
+    "print(f\"  Time: {elapsed:.2f}s\")\n",
+    "print(f\"  Diary entries: {len(agent.destination_diary):,}\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "7957e9d4",
+   "metadata": {},
+   "source": [
+    "## Benchmark 3: Trajectory Generation from Diary"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "63a87214",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "print(\"\\n\" + \"=\"*60)\n",
+    "print(\"BENCHMARK 3: Trajectory Generation from Diary\")\n",
+    "print(\"=\"*60)\n",
+    "\n",
+    "print(f\"\\nGenerating trajectory from destination diary...\")\n",
+    "print(f\"  Diary entries: {len(agent.destination_diary):,}\")\n",
+    "print(f\"  Time step (dt): {DT} minutes\")\n",
+    "\n",
+    "t0 = time.time()\n",
+    "agent.generate_trajectory(\n",
+    "    destination_diary=agent.destination_diary,\n",
+    "    dt=DT,\n",
+    "    seed=SEED\n",
+    ")\n",
+    "elapsed = time.time() - t0\n",
+    "\n",
+    "trajectory = agent.trajectory\n",
+    "\n",
+    "print(f\"\\nResults:\")\n",
+    "print(f\"  Time: {elapsed:.2f}s\")\n",
+    "print(f\"  Trajectory points: {len(trajectory):,}\")\n",
+    "print(f\"  Points per second: {len(trajectory)/elapsed:.1f}\")\n",
+    "print(f\"  Time per point: {1000*elapsed/len(trajectory):.2f} ms\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "bd497726",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "jupytext": {
+   "formats": "ipynb,py:percent"
+  },
+  "kernelspec": {
+   "display_name": "Python 3 (ipykernel)",
+   "language": "python",
+   "name": "python3"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
diff --git a/examples/research/virtual_philadelphia/sample_step_benchmark.py b/examples/research/virtual_philadelphia/sample_step_benchmark.py
new file mode 100644
index 0000000..a72b684
--- /dev/null
+++ b/examples/research/virtual_philadelphia/sample_step_benchmark.py
@@ -0,0 +1,228 @@
+# ---
+# jupyter:
+#   jupytext:
+#     formats: ipynb,py:percent
+#     text_representation:
+#       extension: .py
+#       format_name: percent
+#       format_version: '1.3'
+#       jupytext_version: 1.17.1
+#   kernelspec:
+#     display_name: Python 3 (ipykernel)
+#     language: python
+#     name: python3
+# ---
+
+# %% [markdown]
+# # Benchmark: `_sample_step` Performance Analysis
+#
+# ## Goals
+# 1. Benchmark repeated `get_shortest_path()` calls (callable mode)
+# 2. Profile trajectory generation to identify bottlenecks
+# 3. Test optimization strategies
+
+# %%
+import time
+import numpy as np
+import pandas as pd
+import geopandas as gpd
+from pathlib import Path
+
+from nomad.city_gen import RasterCity
+from nomad.traj_gen import Population
+
+# %%
+# Configuration
+BOX_SIZE = 'small'  # 'small', 'medium', or 'large'
+BLOCK_SIDE_LENGTH = 10.0
+HUB_SIZE = 100
+MAX_MANHATTAN_DIST = 30
+NUM_PATH_QUERIES = 100
+SIMULATION_HOURS = 24
+DT = 0.5  # minutes
+EPR_TIME_RES = 15
+RHO = 0.4
+GAMMA = 0.3
+SEED = 42
+
+# %%
+print("="*60)
+print(f"SETUP: {BOX_SIZE.upper()} BOX")
+print("="*60)
+
+# Load OSM data and rasterize
+data_dir = Path("sandbox")
+osm_path = data_dir / f"sandbox_data_{BOX_SIZE}.gpkg"
+
+if not osm_path.exists():
+    raise FileNotFoundError(f"OSM data not found at {osm_path}. Run rasterization_report.py first.")
+
+print(f"Loading OSM data...")
+buildings = gpd.read_file(osm_path, layer="buildings")
+streets = gpd.read_file(osm_path, layer="streets")
+boundary = gpd.read_file(osm_path, layer="boundary")
+print(f"  Buildings: {len(buildings):,}")
+print(f"  Streets: {len(streets):,}")
+
+print(f"\nRasterizing city...")
+t0 = time.time()
+city = RasterCity(
+    boundary.geometry.iloc[0],
+    streets,
+    buildings,
+    block_side_length=BLOCK_SIDE_LENGTH,
+    resolve_overlaps=True,
+    verbose=False
+)
+print(f"  Rasterization: {time.time()-t0:.2f}s")
+print(f"  Buildings added: {len(city.buildings_gdf):,}")
+print(f"  Street blocks: {len(city.streets_gdf):,}")
+
+# %%
+print("\nBuilding street graph...")
+t0 = time.time()
+G = city.get_street_graph()
+print(f"  Street graph: {time.time()-t0:.2f}s")
+print(f"  Nodes: {G.number_of_nodes():,}")
+print(f"  Edges: {G.number_of_edges():,}")
+
+# %%
+print("\nBuilding hub network...")
+t0 = time.time()
+city._build_hub_network(hub_size=HUB_SIZE)
+print(f"  Hub network: {time.time()-t0:.2f}s")
+print(f"  Hubs: {len(city.hubs):,}")
+
+# %%
+print("\nComputing gravity (callable mode)...")
+t0 = time.time()
+city.compute_gravity(exponent=2.0, callable_only=True)
+print(f"  Gravity computation: {time.time()-t0:.2f}s")
+
+# %%
+print("\nComputing shortest paths (callable mode)...")
+t0 = time.time()
+city.compute_shortest_paths(callable_only=True)
+print(f"  Shortest paths: {time.time()-t0:.2f}s")
+
+# %% [markdown]
+# ## Benchmark 1: Repeated `get_shortest_path()` Calls
+
+# %%
+print("\n" + "="*60)
+print("BENCHMARK 1: get_shortest_path() Performance")
+print("="*60)
+
+# Find valid street block pairs with manhattan distance < MAX_MANHATTAN_DIST
+streets_list = list(city.streets_gdf.index)
+valid_pairs = []
+
+print(f"Finding {NUM_PATH_QUERIES} pairs with Manhattan distance < {MAX_MANHATTAN_DIST}...")
+rng = np.random.default_rng(42)
+
+while len(valid_pairs) < NUM_PATH_QUERIES:
+    i = rng.integers(0, len(streets_list))
+    j = rng.integers(0, len(streets_list))
+    if i == j:
+        continue
+    
+    start = streets_list[i]
+    end = streets_list[j]
+    manhattan_dist = abs(start[0] - end[0]) + abs(start[1] - end[1])
+    
+    if manhattan_dist < MAX_MANHATTAN_DIST:
+        valid_pairs.append((start, end))
+
+print(f"Found {len(valid_pairs)} valid pairs")
+
+# Benchmark
+times = []
+path_lengths = []
+
+print(f"\nBenchmarking {NUM_PATH_QUERIES} get_shortest_path() calls...")
+for start, end in valid_pairs:
+    t0 = time.time()
+    path = city.get_shortest_path(start, end)
+    elapsed = time.time() - t0
+    times.append(elapsed)
+    path_lengths.append(len(path) if path else 0)
+
+times = np.array(times) * 1000  # Convert to milliseconds
+path_lengths = np.array(path_lengths)
+
+print("\nResults:")
+print(f"  Total queries: {len(times)}")
+print(f"  Mean time: {times.mean():.2f} ms")
+print(f"  Median time: {np.median(times):.2f} ms")
+print(f"  Std time: {times.std():.2f} ms")
+print(f"  Min time: {times.min():.2f} ms")
+print(f"  Max time: {times.max():.2f} ms")
+print(f"  Mean path length: {path_lengths.mean():.1f} blocks")
+print(f"  Median path length: {np.median(path_lengths):.1f} blocks")
+
+# %% [markdown]
+# ## Benchmark 2: Destination Diary Generation
+
+# %%
+print("\n" + "="*60)
+print("BENCHMARK 2: Destination Diary Generation")
+print("="*60)
+
+population = Population(city)
+population.generate_agents(
+    N=1,
+    seed=SEED,
+    name_count=1,
+    datetimes="2024-01-01 00:00-05:00"
+)
+
+agent = list(population.roster.values())[0]
+end_time = pd.Timestamp("2024-01-01 00:00-05:00") + pd.Timedelta(hours=SIMULATION_HOURS)
+
+print(f"\nGenerating {SIMULATION_HOURS}-hour destination diary...")
+print(f"  Start: {agent.last_ping['datetime']}")
+print(f"  End: {end_time}")
+
+t0 = time.time()
+agent.generate_dest_diary(
+    end_time=end_time,
+    epr_time_res=EPR_TIME_RES,
+    rho=RHO,
+    gamma=GAMMA,
+    seed=SEED
+)
+elapsed = time.time() - t0
+
+print(f"\nResults:")
+print(f"  Time: {elapsed:.2f}s")
+print(f"  Diary entries: {len(agent.destination_diary):,}")
+
+# %% [markdown]
+# ## Benchmark 3: Trajectory Generation from Diary
+
+# %%
+print("\n" + "="*60)
+print("BENCHMARK 3: Trajectory Generation from Diary")
+print("="*60)
+
+print(f"\nGenerating trajectory from destination diary...")
+print(f"  Diary entries: {len(agent.destination_diary):,}")
+print(f"  Time step (dt): {DT} minutes")
+
+t0 = time.time()
+agent.generate_trajectory(
+    destination_diary=agent.destination_diary,
+    dt=DT,
+    seed=SEED
+)
+elapsed = time.time() - t0
+
+trajectory = agent.trajectory
+
+print(f"\nResults:")
+print(f"  Time: {elapsed:.2f}s")
+print(f"  Trajectory points: {len(trajectory):,}")
+print(f"  Points per second: {len(trajectory)/elapsed:.1f}")
+print(f"  Time per point: {1000*elapsed/len(trajectory):.2f} ms")
+
+# %%
\ No newline at end of file
diff --git a/examples/research/virtual_philadelphia/sandbox/buildings_SMALL_BOX.parquet b/examples/research/virtual_philadelphia/sandbox/buildings_SMALL_BOX.parquet
new file mode 100644
index 0000000..baeb2f7
Binary files /dev/null and b/examples/research/virtual_philadelphia/sandbox/buildings_SMALL_BOX.parquet differ
diff --git a/examples/research/virtual_philadelphia/sandbox/buildings_SMALL_BOX.pkl b/examples/research/virtual_philadelphia/sandbox/buildings_SMALL_BOX.pkl
new file mode 100644
index 0000000..bfb1bb4
Binary files /dev/null and b/examples/research/virtual_philadelphia/sandbox/buildings_SMALL_BOX.pkl differ
diff --git a/examples/research/virtual_philadelphia/sandbox/config_medium.json b/examples/research/virtual_philadelphia/sandbox/config_medium.json
new file mode 100644
index 0000000..689f585
--- /dev/null
+++ b/examples/research/virtual_philadelphia/sandbox/config_medium.json
@@ -0,0 +1,27 @@
+{
+  "box_name": "medium",
+  "block_side_length": 10.0,
+  "hub_size": 100,
+  "N": 10,
+  "name_seed": 42,
+  "name_count": 2,
+  "epr_params": {
+    "datetime": "2024-01-01 00:00-05:00",
+    "end_time": "2024-01-08 00:00-05:00",
+    "epr_time_res": 15,
+    "rho": 0.4,
+    "gamma": 0.3,
+    "seed_base": 100
+  },
+  "traj_params": {
+    "dt": 0.5,
+    "seed_base": 200
+  },
+  "sampling_params": {
+    "beta_ping": 5,
+    "beta_durations": null,
+    "beta_start": null,
+    "ha": 0.7666666666666667,
+    "seed_base": 300
+  }
+}
\ No newline at end of file
diff --git a/examples/research/virtual_philadelphia/sandbox/dest_diaries_medium/date=2024-01-01/part-0.parquet b/examples/research/virtual_philadelphia/sandbox/dest_diaries_medium/date=2024-01-01/part-0.parquet
new file mode 100644
index 0000000..6f9525e
Binary files /dev/null and b/examples/research/virtual_philadelphia/sandbox/dest_diaries_medium/date=2024-01-01/part-0.parquet differ
diff --git a/examples/research/virtual_philadelphia/sandbox/dest_diaries_medium/date=2024-01-02/part-0.parquet b/examples/research/virtual_philadelphia/sandbox/dest_diaries_medium/date=2024-01-02/part-0.parquet
new file mode 100644
index 0000000..98c2e74
Binary files /dev/null and b/examples/research/virtual_philadelphia/sandbox/dest_diaries_medium/date=2024-01-02/part-0.parquet differ
diff --git a/examples/research/virtual_philadelphia/sandbox/dest_diaries_medium/date=2024-01-03/part-0.parquet b/examples/research/virtual_philadelphia/sandbox/dest_diaries_medium/date=2024-01-03/part-0.parquet
new file mode 100644
index 0000000..a7b15ea
Binary files /dev/null and b/examples/research/virtual_philadelphia/sandbox/dest_diaries_medium/date=2024-01-03/part-0.parquet differ
diff --git a/examples/research/virtual_philadelphia/sandbox/dest_diaries_medium/date=2024-01-04/part-0.parquet b/examples/research/virtual_philadelphia/sandbox/dest_diaries_medium/date=2024-01-04/part-0.parquet
new file mode 100644
index 0000000..3965a04
Binary files /dev/null and b/examples/research/virtual_philadelphia/sandbox/dest_diaries_medium/date=2024-01-04/part-0.parquet differ
diff --git a/examples/research/virtual_philadelphia/sandbox/dest_diaries_medium/date=2024-01-05/part-0.parquet b/examples/research/virtual_philadelphia/sandbox/dest_diaries_medium/date=2024-01-05/part-0.parquet
new file mode 100644
index 0000000..4064c0e
Binary files /dev/null and b/examples/research/virtual_philadelphia/sandbox/dest_diaries_medium/date=2024-01-05/part-0.parquet differ
diff --git a/examples/research/virtual_philadelphia/sandbox/dest_diaries_medium/date=2024-01-06/part-0.parquet b/examples/research/virtual_philadelphia/sandbox/dest_diaries_medium/date=2024-01-06/part-0.parquet
new file mode 100644
index 0000000..b7b8a78
Binary files /dev/null and b/examples/research/virtual_philadelphia/sandbox/dest_diaries_medium/date=2024-01-06/part-0.parquet differ
diff --git a/examples/research/virtual_philadelphia/sandbox/dest_diaries_medium/date=2024-01-07/part-0.parquet b/examples/research/virtual_philadelphia/sandbox/dest_diaries_medium/date=2024-01-07/part-0.parquet
new file mode 100644
index 0000000..e40f704
Binary files /dev/null and b/examples/research/virtual_philadelphia/sandbox/dest_diaries_medium/date=2024-01-07/part-0.parquet differ
diff --git a/examples/research/virtual_philadelphia/sandbox/streets_SMALL_BOX.pkl b/examples/research/virtual_philadelphia/sandbox/streets_SMALL_BOX.pkl
new file mode 100644
index 0000000..e34e53c
Binary files /dev/null and b/examples/research/virtual_philadelphia/sandbox/streets_SMALL_BOX.pkl differ
diff --git a/examples/research/virtual_philadelphia/synthetic_philadelphia.py b/examples/research/virtual_philadelphia/synthetic_philadelphia.py
index 609dd54..cfd8e20 100644
--- a/examples/research/virtual_philadelphia/synthetic_philadelphia.py
+++ b/examples/research/virtual_philadelphia/synthetic_philadelphia.py
@@ -25,10 +25,13 @@
 import geopandas as gpd
 import pandas as pd
 from shapely.geometry import box
+import matplotlib.pyplot as plt
+import contextily as cx
 
 import nomad.map_utils as nm
 from nomad.city_gen import RasterCity
 from nomad.traj_gen import Population
+from nomad.io.base import from_file
 from tqdm import tqdm
 
 # %% [markdown]
@@ -36,6 +39,7 @@
 
 # %%
 LARGE_BOX = box(-75.1905, 39.9235, -75.1425, 39.9535)
+MEDIUM_BOX = box(-75.1665, 39.9385, -75.1425, 39.9535)
 
 USE_FULL_CITY = False
 OUTPUT_DIR = Path("sandbox")
@@ -45,26 +49,37 @@
     BOX_NAME = "full"
     POLY = "Philadelphia, Pennsylvania, USA"
 else:
-    BOX_NAME = "large"
-    POLY = LARGE_BOX
+    BOX_NAME = "medium"
+    POLY = MEDIUM_BOX
 
 SANDBOX_GPKG = OUTPUT_DIR / f"sandbox_data_{BOX_NAME}.gpkg"
 REGENERATE_DATA = False
 
 config = {
     "box_name": BOX_NAME,
-    "block_side_length": 15.0,
-    "hub_size": 600,
-    "N": 100,
+    "block_side_length": 10.0,
+    "hub_size": 100,
+    "N": 10,
     "name_seed": 42,
     "name_count": 2,
     "epr_params": {
         "datetime": "2024-01-01 00:00-05:00",
-        "end_time": "2024-04-01 00:00-05:00",
+        "end_time": "2024-01-08 00:00-05:00",  # 7 days
         "epr_time_res": 15,
         "rho": 0.4,
         "gamma": 0.3,
         "seed_base": 100
+    },
+    "traj_params": {
+        "dt": 0.5,
+        "seed_base": 200
+    },
+    "sampling_params": {
+        "beta_ping": 5,
+        "beta_durations": None,
+        "beta_start": None,
+        "ha": 11.5/15,
+        "seed_base": 300
     }
 }
 
@@ -179,7 +194,12 @@
 grav_time = time.time() - t3
 print(f"Gravity computation: {grav_time:>6.2f}s")
 
-raster_time = gen_time + graph_time + hub_time + grav_time
+t4 = time.time()
+city.compute_shortest_paths(callable_only=True)
+paths_time = time.time() - t4
+print(f"Shortest paths:     {paths_time:>6.2f}s")
+
+raster_time = gen_time + graph_time + hub_time + grav_time + paths_time
 print("-"*50)
 print(f"Rasterization:      {raster_time:>6.2f}s")
 print("="*50)
@@ -283,15 +303,128 @@ def get_size_mb(obj):
 print(f"\nConfig saved to {config_path}")
 print(f"Destination diaries saved to {dest_diaries_path}")
 
+# %% [markdown]
+# ## Generate Full Trajectories from Destination Diaries
+
 # %%
-import geopandas as gpd
-import contextily as cx
-import matplotlib.pyplot as plt
+print("\n" + "="*50)
+print("TRAJECTORY GENERATION")
+print("="*50)
+
+t1 = time.time()
+failed_agents = []
+for i, agent in tqdm(enumerate(population.roster.values()), total=config["N"], desc="Generating trajectories"):
+    try:
+        agent.generate_trajectory(
+            dt=config["traj_params"]["dt"],
+            seed=config["traj_params"]["seed_base"] + i
+        )
+    except ValueError as e:
+        failed_agents.append((agent.identifier, str(e)))
+        continue
+
+traj_gen_time = time.time() - t1
+print(f"Trajectory generation: {traj_gen_time:>6.2f}s")
+if failed_agents:
+    print(f"Warning: {len(failed_agents)} agents failed trajectory generation")
+
+total_points = sum(len(agent.trajectory) for agent in population.roster.values() if agent.trajectory is not None)
+print(f"Total trajectory points: {total_points:,}")
+print(f"Points per second: {total_points/traj_gen_time:.1f}")
+print("-"*50)
+print(f"Total trajectory:   {traj_gen_time:>6.2f}s")
+print("="*50)
+
+# %% [markdown]
+# ## Sample Sparse Trajectories
+
+# %%
+print("\n" + "="*50)
+print("SPARSE TRAJECTORY SAMPLING")
+print("="*50)
+
+t1 = time.time()
+for i, agent in tqdm(enumerate(population.roster.values()), total=config["N"], desc="Sampling trajectories"):
+    if agent.trajectory is None:
+        continue
+    agent.sample_trajectory(
+        beta_ping=config["sampling_params"]["beta_ping"],
+        beta_durations=config["sampling_params"]["beta_durations"],
+        beta_start=config["sampling_params"]["beta_start"],
+        ha=config["sampling_params"]["ha"],
+        seed=config["sampling_params"]["seed_base"] + i,
+        replace_sparse_traj=True
+    )
+
+sampling_time = time.time() - t1
+print(f"Sparse sampling:    {sampling_time:>6.2f}s")
+
+total_sparse_points = sum(len(agent.sparse_traj) for agent in population.roster.values() if agent.sparse_traj is not None)
+print(f"Total sparse points: {total_sparse_points:,}")
+print(f"Sparsity ratio: {total_sparse_points/total_points:.2%}")
+print("-"*50)
+print(f"Total sampling:     {sampling_time:>6.2f}s")
+print("="*50)
+
+# %% [markdown]
+# ## Reproject to Mercator and Persist
+
+# %%
+print("\n" + "="*50)
+print("REPROJECTION AND PERSISTENCE")
+print("="*50)
+
+# Build POI data for diary reprojection
+cent = city.buildings_gdf['door_point'] if 'door_point' in city.buildings_gdf.columns else city.buildings_gdf.geometry.centroid
+poi_data = pd.DataFrame({
+    'building_id': city.buildings_gdf['id'].values,
+    'x': (city.buildings_gdf['door_cell_x'].astype(float) + 0.5).values if 'door_cell_x' in city.buildings_gdf.columns else cent.x.values,
+    'y': (city.buildings_gdf['door_cell_y'].astype(float) + 0.5).values if 'door_cell_y' in city.buildings_gdf.columns else cent.y.values
+})
+
+print("Reprojecting sparse trajectories to Web Mercator...")
+population.reproject_to_mercator(sparse_traj=True, full_traj=False, diaries=True, poi_data=poi_data)
+
+print("Saving sparse trajectories and diaries...")
+population.save_pop(
+    sparse_path=OUTPUT_DIR / f"sparse_traj_{BOX_NAME}",
+    diaries_path=OUTPUT_DIR / f"diaries_{BOX_NAME}",
+    partition_cols=["date"],
+    fmt='parquet'
+)
+print("-"*50)
+print("="*50)
+
+# %% [markdown]
+# ## Visualize Sparse Trajectories
+
+# %%
+print("\n" + "="*50)
+print("VISUALIZATION")
+print("="*50)
+
+# Read sparse trajectories
+sparse_traj_df = from_file(OUTPUT_DIR / f"sparse_traj_{BOX_NAME}", format="parquet")
+print(f"Loaded {len(sparse_traj_df):,} sparse trajectory points for {config['N']} agents")
+
+# Plot with contextily basemap
+fig, ax = plt.subplots(figsize=(12, 10))
+
+# Plot each agent with different color
+for agent_id in sparse_traj_df['user_id'].unique():
+    agent_traj = sparse_traj_df[sparse_traj_df['user_id'] == agent_id]
+    ax.scatter(agent_traj['x'], agent_traj['y'], s=1, alpha=0.5, label=agent_id)
+
+# Add basemap
+cx.add_basemap(ax, crs="EPSG:3857", source=cx.providers.CartoDB.Positron)
 
-box = gpd.GeoSeries(city.boundary_polygon)
-fig, ax = plt.subplots()
-box.plot(ax=ax, facecolor="None")
-cx.add_basemap(ax, crs=box.crs)
+ax.set_xlabel('X (Web Mercator)')
+ax.set_ylabel('Y (Web Mercator)')
+ax.set_title(f'Sparse Trajectories - {config["N"]} Agents, 7 Days')
+ax.legend(bbox_to_anchor=(1.05, 1), loc='upper left', markerscale=10)
+plt.tight_layout()
+plt.savefig(OUTPUT_DIR / f"sparse_trajectories_{BOX_NAME}.png", dpi=150, bbox_inches='tight')
+print(f"Saved plot to {OUTPUT_DIR / f'sparse_trajectories_{BOX_NAME}.png'}")
 plt.show()
 
 # %%
diff --git a/nomad/city_gen.py b/nomad/city_gen.py
index 52f926a..03646ad 100644
--- a/nomad/city_gen.py
+++ b/nomad/city_gen.py
@@ -188,6 +188,8 @@ def __init__(self,
         # Precomputed building-to-building gravity (optional, built on demand)
         self.grav = None
         self.door_dist = None
+        # Precomputed shortest paths (optional, built on demand via compute_shortest_paths)
+        self.shortest_paths = None
 
     def _derive_streets_from_blocks(self):
         """
@@ -509,48 +511,69 @@ def check_adjacent(self, geom1, geom2, graph=None):
 
         raise TypeError("Unsupported types: geom1 must be block tuple/list or shapely geometry; geom2 must be block tuple or shapely geometry")
 
-    def get_street_graph(self):
-        """Build the street graph needed for routing.
-
-        Constructs a grid-adjacency graph where each street block is a node
-        and edges connect cardinally adjacent blocks.
-        """
-        if hasattr(self, 'street_graph') and self.street_graph is not None:
-            return self.street_graph
-
-        # ---------------------------------------------------------------------
-        # Build street block adjacency graph
-        # ---------------------------------------------------------------------
-        # Nodes
-        nodes_df = self.streets_gdf[['coord_x', 'coord_y']].copy()
-
-        # Build edge list via vectorized neighbor matches
+    def _build_adjacency_graph(self, gdf):
+        """Build grid-adjacency graph from GeoDataFrame with coord_x, coord_y."""
+        nodes_df = gdf[['coord_x', 'coord_y']].copy()
         edge_list = []
-        for dx, dy in [(1, 0), (0, 1)]:  # undirected; only positive directions
+        for dx, dy in [(1, 0), (0, 1)]:
             shifted = nodes_df.copy()
             shifted['nx'] = nodes_df['coord_x'] + dx
             shifted['ny'] = nodes_df['coord_y'] + dy
-            # Merge to find neighbor pairs
             merged = shifted.merge(
                 nodes_df.rename(columns={'coord_x': 'nx', 'coord_y': 'ny'}),
                 on=['nx', 'ny'], how='inner'
             )
-
             edge_list.append(
                 merged[['coord_x', 'coord_y', 'nx', 'ny']].rename(columns={'coord_x': 'x', 'coord_y': 'y'})
             )
-
         edges_df = pd.concat(edge_list, ignore_index=True)
-
-        # Create graph
         G = nx.Graph()
         G.add_nodes_from([(int(x), int(y)) for x, y in nodes_df.values])
         edge_list = [((int(r.x), int(r.y)), (int(r.nx), int(r.ny))) for r in edges_df.itertuples(index=False)]
         G.add_edges_from(edge_list, weight=1)
-        self.street_graph = G
-        
+        return G
+
+    def get_street_graph(self):
+        """Build the street graph needed for routing."""
+        if hasattr(self, 'street_graph') and self.street_graph is not None:
+            return self.street_graph
+        self.street_graph = self._build_adjacency_graph(self.streets_gdf)
         return self.street_graph
 
+    def compute_shortest_paths(self, callable_only=False):
+        """
+        Compute shortest paths between street blocks.
+        
+        Parameters
+        ----------
+        callable_only : bool, default False
+            If False, precompute all-pairs shortest paths (only for small cities)
+            If True, store on-demand callable (placeholder for hub-based routing)
+        
+        Notes
+        -----
+        Callable mode currently uses nx.shortest_path() on demand—this is a 
+        PLACEHOLDER. Production use with _sample_step requires hub-based routing 
+        for scalability (billions of calls). Current implementation too slow for 
+        large-scale trajectory generation.
+        
+        Stores result in self.shortest_paths as dict or callable
+        """
+        G = self.get_street_graph()
+        
+        if callable_only:
+            # TODO: Replace with hub-based routing for production use
+            # Current on-demand nx.shortest_path is too slow for billion+ calls in _sample_step
+            def compute_path(start_coord, end_coord):
+                try:
+                    return nx.shortest_path(G, start_coord, end_coord)
+                except nx.NetworkXNoPath:
+                    return []
+            self.shortest_paths = compute_path
+        else:
+            # Dense storage for small cities only
+            self.shortest_paths = dict(nx.all_pairs_shortest_path(G))
+
     # ---------------------------------------------------------------------
     # Shortcut ("highway") network for fast on-demand routing
     # ---------------------------------------------------------------------
@@ -1502,13 +1525,13 @@ def get_shortest_path(self, start_coord: tuple, end_coord: tuple, plot: bool = F
         Raises
         ------
         ValueError
-            If either coordinate is not a valid street block.
+            If either coordinate is not a valid street block or if shortest_paths
+            has not been computed.
 
         Notes
         -----
-        - Uses the hub-based shortcut index for speed; hub-to-hub segments may
-          be obtained via a local NetworkX shortest path when a direct next-hop
-          entry is unavailable.
+        Requires compute_shortest_paths() to be called first. Uses precomputed
+        paths (dict) or on-demand callable depending on mode.
         """
         if not (isinstance(start_coord, tuple) and isinstance(end_coord, tuple)):
             raise ValueError("Coordinates must be tuples of (x, y).")
@@ -1519,78 +1542,18 @@ def get_shortest_path(self, start_coord: tuple, end_coord: tuple, plot: bool = F
         if not ((self.streets_gdf['coord_x'] == end_coord[0]) & (self.streets_gdf['coord_y'] == end_coord[1])).any():
             raise ValueError(f"End coordinate {end_coord} must be a street block.")
 
-        # Ensure graph and shortcuts are built
-        if not hasattr(self, 'street_graph') or self.street_graph is None:
-            self.get_street_graph(lazy=True)
-        if not hasattr(self, '_shortcut_hubs') or self._shortcut_hubs is None:
-            try:
-                self._build_hub_network()
-            except Exception:
-                pass
-
-        # Prefer shortcut network if available; fall back to NetworkX if not
-        if hasattr(self, '_shortcut_hubs') and self._shortcut_hubs:
-            # Map arbitrary node to its hub via precomputed next_to_hub
-            def path_to_hub(node):
-                route = [node]
-                # Prevent infinite loops; cap at city size
-                for _ in range(max(1, len(self.streets_gdf))):
-                    if node == self._nearest_hub[node]:
-                        break
-                    node = self._next_to_hub[node]
-                    route.append(node)
-                    if node == self._nearest_hub[node]:
-                        break
-                return route
-
-            u = start_coord
-            v = end_coord
-            hu = self._nearest_hub[u]
-            hv = self._nearest_hub[v]
-
-            # u -> hu
-            seg_u_hu = path_to_hub(u)
-            # hubs path hu -> hv using next_hop table
-            seg_hubs = [hu]
-            cur = hu
-            safe_cap = max(2, len(self._shortcut_hubs) * 4)
-            for _ in range(safe_cap):
-                if cur == hv:
-                    break
-                nh = self._hub_next_hop.get(cur, {}).get(hv)
-                if nh is None:
-                    # fallback to direct nx shortest path if hub-hub mapping missing
-                    try:
-                        nx_path = nx.shortest_path(self.get_street_graph(), cur, hv)
-                    except nx.NetworkXNoPath:
-                        return []
-                    seg_hubs = nx_path
-                    cur = hv
-                    break
-                seg_hubs.append(nh)
-                cur = nh
-
-            # hv -> v: walk from v to hv using next_to_hub, then reverse to get hv->v
-            seg_v_to_hv = path_to_hub(v)
-            # ensure last element is hv
-            if seg_v_to_hv[-1] != hv:
-                # fallback to nx shortest if something is off
-                try:
-                    nx_path = nx.shortest_path(self.get_street_graph(), hv, v)
-                except nx.NetworkXNoPath:
-                    return []
-                seg_hv_to_v = nx_path
-            else:
-                seg_hv_to_v = list(reversed(seg_v_to_hv))
-
-            # Concatenate segments, avoiding duplicate junctions
-            path = seg_u_hu[:-1] + seg_hubs + seg_hv_to_v[1:]
+        # Check if shortest_paths has been computed
+        if not hasattr(self, 'shortest_paths') or self.shortest_paths is None:
+            raise ValueError("Must call compute_shortest_paths() before using get_shortest_path().")
+        
+        # Get path from either dict or callable
+        if callable(self.shortest_paths):
+            path = self.shortest_paths(start_coord, end_coord)
         else:
-            # Fallback: compute on-demand with NetworkX
-            try:
-                path = nx.shortest_path(self.get_street_graph(), start_coord, end_coord)
-            except nx.NetworkXNoPath:
+            # Dict mode: lookup precomputed path
+            if start_coord not in self.shortest_paths:
                 return []
+            path = self.shortest_paths[start_coord].get(end_coord, [])
 
         if plot:
             if ax is None:
@@ -1620,49 +1583,107 @@ def path_to_hub(node):
         return path
 
     # ---------------------------------------------------------------------
-    # Fast distance with small LRU-style cache (simple and effective)
+    # DEPRECATED: Cached path retrieval (kept for potential future use)
     # ---------------------------------------------------------------------
-    def get_distance_fast(self, start_coord: tuple, end_coord: tuple) -> float:
+    def get_paths_fast(self, start_coord: tuple, end_coord: tuple):
         """
-        Return an approximate shortest-path distance in number of steps between two
-        street blocks using the shortcut network for speed. Results are cached.
+        DEPRECATED: Return shortest path with LRU-style caching.
+        
+        This method wraps get_shortest_path() with a simple cache. May be useful
+        for future optimizations but currently not sufficient for large-scale
+        trajectory generation needs.
 
-        Falls back to on-demand NetworkX shortest_path if shortcuts are unavailable.
-        Returns np.inf if no path exists.
+        Parameters
+        ----------
+        start_coord : tuple[int, int]
+            Starting street block (x, y).
+        end_coord : tuple[int, int]
+            Ending street block (x, y).
+
+        Returns
+        -------
+        list[tuple[int, int]]
+            Sequence of street blocks from start to end, or empty list if no path.
         """
         if not (isinstance(start_coord, tuple) and isinstance(end_coord, tuple)):
             raise ValueError("Coordinates must be tuples of (x, y).")
 
         # Initialize cache
-        if not hasattr(self, '_distance_cache'):
-            self._distance_cache = {}
+        if not hasattr(self, '_paths_cache'):
+            self._paths_cache = {}
         MAX_CACHE = 50000
 
         key = (start_coord, end_coord)
-        if key in self._distance_cache:
-            return self._distance_cache[key]
+        if key in self._paths_cache:
+            return self._paths_cache[key]
 
-        # Compute via path length
+        # Compute path
         try:
             path = self.get_shortest_path(start_coord, end_coord)
-            if not path:
-                d = float('inf')
-            else:
-                d = float(max(0, len(path) - 1))
         except Exception:
-            d = float('inf')
+            path = []
 
         # Cache with simple capacity control (drop half when full)
-        try:
-            self._distance_cache[key] = d
-            if len(self._distance_cache) > MAX_CACHE:
-                # drop roughly half oldest by arbitrary order
-                for k in list(self._distance_cache.keys())[:MAX_CACHE // 2]:
-                    self._distance_cache.pop(k, None)
-        except Exception:
-            pass
+        self._paths_cache[key] = path
+        if len(self._paths_cache) > MAX_CACHE:
+            # drop roughly half oldest by arbitrary order
+            for k in list(self._paths_cache.keys())[:MAX_CACHE // 2]:
+                self._paths_cache.pop(k, None)
 
-        return d
+        return path
+
+    def _add_to_adjacency_graph(self, G, coord):
+        """Add node and cardinal edges to existing graph."""
+        G.add_node(coord)
+        x, y = coord
+        for dx, dy in [(1, 0), (-1, 0), (0, 1), (0, -1)]:
+            neighbor = (x + dx, y + dy)
+            if neighbor in G:
+                G.add_edge(coord, neighbor, weight=1)
+
+    def _connect_building_door_to_streets(self, building_id, max_depth=5):
+        """Connect building door to street network by converting blocks along shortest path."""
+        building_row = self.buildings_gdf.loc[building_id]
+        door_x = int(building_row['door_cell_x'])
+        door_y = int(building_row['door_cell_y'])
+        door_coord = (door_x, door_y)
+        
+        # Get blocks within Manhattan distance
+        candidates = [(x, y) for x in range(door_x - max_depth, door_x + max_depth + 1)
+                      for y in range(door_y - max_depth, door_y + max_depth + 1)
+                      if abs(x - door_x) + abs(y - door_y) <= max_depth]
+        
+        non_building = self.blocks_gdf[self.blocks_gdf['kind'] != 'building']
+        nearby = non_building[non_building.index.isin(candidates)]
+        block_graph = self._build_adjacency_graph(nearby)
+        
+        paths = nx.single_source_shortest_path(block_graph, source=door_coord, cutoff=max_depth)
+        
+        for target, path in paths.items():
+            if target != door_coord and target in self.streets_gdf.index:
+                new_coords = [c for c in path if c not in self.streets_gdf.index]
+                
+                for coord in path:
+                    self.blocks_gdf.loc[coord, 'kind'] = 'street'
+                    self.blocks_gdf.loc[coord, 'building_type'] = 'street'
+                
+                if new_coords:
+                    new_gdf = gpd.GeoDataFrame([
+                        {'coord_x': x, 'coord_y': y, 'geometry': box(x, y, x+1, y+1),
+                         'building_type': 'street', 'building_id': None}
+                        for x, y in new_coords
+                    ], geometry='geometry', crs=self.streets_gdf.crs)
+                    new_gdf.set_index(['coord_x', 'coord_y'], inplace=True, drop=False)
+                    new_gdf.index.names = [None, None]
+                    self.streets_gdf = pd.concat([self.streets_gdf, new_gdf])
+                    
+                    if hasattr(self, 'street_graph') and self.street_graph is not None:
+                        for coord in new_coords:
+                            self._add_to_adjacency_graph(self.street_graph, coord)
+                
+                return True
+        
+        return False
 
 
 # =============================================================================
@@ -2027,6 +2048,20 @@ def __init__(self, boundary_polygon, streets_gdf, buildings_gdf, block_side_leng
         
         self.resolve_overlaps = resolve_overlaps
         self._rasterize(verbose=verbose)
+        
+        # Connect buildings with non-street doors
+        non_street_doors = []
+        for building_id in self.buildings_gdf['id']:
+            building_row = self.buildings_gdf.loc[building_id]
+            door_coord = (int(building_row['door_cell_x']), int(building_row['door_cell_y']))
+            if door_coord not in self.streets_gdf.index:
+                non_street_doors.append(building_id)
+        
+        if non_street_doors:
+            if verbose:
+                print(f"  Connecting {len(non_street_doors)} buildings with non-street doors...")
+            for building_id in non_street_doors:
+                self._connect_building_door_to_streets(building_id, max_depth=5)
 
     def _rasterize(self, verbose=True):
         # Assigning block types could arguably be its own method, but keeping it here for now
diff --git a/nomad/tests/test_city_gen.py b/nomad/tests/test_city_gen.py
index 620ddfd..f5250b7 100644
--- a/nomad/tests/test_city_gen.py
+++ b/nomad/tests/test_city_gen.py
@@ -81,6 +81,7 @@ def test_from_geodataframes_roundtrip_nonsquare(tmp_path):
 def test_shortest_path():
     rcg = RandomCityGenerator(width=10, height=10, street_spacing=5, seed=1)
     city = rcg.generate_city()
+    city.compute_shortest_paths(callable_only=False)
     
     # Get two street coordinates
     streets_temp = city.streets_gdf.reset_index(drop=True)
@@ -581,3 +582,115 @@ def test_to_file_reverse_affine_transformation(tmp_path):
         f"Area scaling incorrect: {wm_area} vs expected {expected_area}"
 
 
+def test_connect_building_door_success():
+    """Test successful connection of building door to streets within max_depth"""
+    city = City(dimensions=(10, 10))
+    
+    # Add buildings to create an isolated area
+    # Building 1: door at (5,5), footprint at (5,6)
+    city.add_building('home', door=(5, 5), blocks=[(5, 6)])
+    building_id = city.buildings_gdf.iloc[0]['id']
+    
+    # Add park buildings to isolate the corridor
+    city.add_building('park', door=(3, 4), blocks=[(4, 4), (4, 5), (4, 6)])
+    city.add_building('park', door=(7, 4), blocks=[(6, 4), (6, 5), (6, 6)])
+    
+    # Manually create corridor by setting blocks to None (non-street, non-building)
+    # Corridor: (5,3), (5,4), (5,5) leading to street at (5,2)
+    corridor_blocks = [(5, 3), (5, 4), (5, 5)]
+    for coord in corridor_blocks:
+        city.blocks_gdf.loc[coord, 'kind'] = None
+        city.blocks_gdf.loc[coord, 'building_type'] = None
+        city.streets_gdf = city.streets_gdf.drop(coord, errors='ignore')
+    
+    # Call connection function
+    result = city._connect_building_door_to_streets(building_id, max_depth=5)
+    
+    # Assert connection successful
+    assert result == True
+    
+    # Assert corridor blocks are now streets
+    assert city.blocks_gdf.loc[(5, 3), 'kind'] == 'street'
+    assert city.blocks_gdf.loc[(5, 4), 'kind'] == 'street'
+    assert city.blocks_gdf.loc[(5, 5), 'kind'] == 'street'
+    
+    # Assert blocks exist in streets_gdf
+    assert (5, 3) in city.streets_gdf.index
+    assert (5, 4) in city.streets_gdf.index
+    assert (5, 5) in city.streets_gdf.index
+
+
+def test_connect_building_door_path_too_long():
+    """Test failure when path exceeds max_depth"""
+    city = City(dimensions=(15, 15))
+    
+    # Add a building with door at (7, 7)
+    city.add_building('home', door=(7, 7), blocks=[(7, 8)])
+    building_id = city.buildings_gdf.iloc[0]['id']
+    
+    # Create a long corridor by adding buildings on both sides
+    # Force a path length of 7: (7,7) -> (7,6) -> (7,5) -> (7,4) -> (7,3) -> (7,2) -> (7,1) -> (7,0) street
+    for x in [6, 8]:
+        for y in range(1, 8):
+            city.add_building('park', door=(x-1 if x==6 else x+1, y), blocks=[(x, y)])
+    
+    # Set corridor blocks to None  
+    for y in range(1, 8):
+        city.blocks_gdf.loc[(7, y), 'kind'] = None
+        city.blocks_gdf.loc[(7, y), 'building_type'] = None
+        city.streets_gdf = city.streets_gdf.drop((7, y), errors='ignore')
+    
+    # Path length is 7, cutoff is 5 - should fail
+    result = city._connect_building_door_to_streets(building_id, max_depth=5)
+    
+    # Assert connection failed
+    assert result == False
+
+
+def test_connect_building_door_isolated():
+    """Test failure when building door is completely isolated"""
+    city = City(dimensions=(20, 20))
+    city.add_building('home', door=(10, 10), blocks=[(10, 11)])
+    building_id = city.buildings_gdf.iloc[0]['id']
+    
+    for x in range(3, 18):
+        for y in range(3, 18):
+            if not (x == 10 and y == 10):
+                city.blocks_gdf.loc[(x, y), 'kind'] = 'building'
+                city.blocks_gdf.loc[(x, y), 'building_type'] = 'park'
+    
+    city.streets_gdf = city._derive_streets_from_blocks()
+    
+    result = city._connect_building_door_to_streets(building_id, max_depth=5)
+    assert result == False
+
+
+def test_connect_building_door_already_connected():
+    """Test that function returns True if door is already a street"""
+    city = City(dimensions=(10, 10))
+    
+    # Add a building normally (door should be on street by default)
+    city.add_building('home', door=(5, 5), blocks=[(5, 6)])
+    building_id = city.buildings_gdf.iloc[0]['id']
+    
+    # Call connection function - should succeed immediately
+    result = city._connect_building_door_to_streets(building_id, max_depth=5)
+    
+    # Assert connection successful (door already connected)
+    assert result == True
+
+
+def test_rastercity_connects_isolated_doors():
+    """Test that RasterCity automatically connects buildings with non-street doors"""
+    buildings, streets, boundary = _load_fixture()
+    
+    # Create city with rasterization that will produce some non-street doors
+    city = RasterCity(boundary, streets, buildings, block_side_length=15.0, verbose=False)
+    
+    # All building doors must be streets after initialization
+    for building_id in city.buildings_gdf['id']:
+        building_row = city.buildings_gdf.loc[building_id]
+        door_coord = (int(building_row['door_cell_x']), int(building_row['door_cell_y']))
+        assert door_coord in city.streets_gdf.index, f"Building {building_id} door {door_coord} not in streets"
+
+
diff --git a/nomad/tests/test_traj_gen.py b/nomad/tests/test_traj_gen.py
index ce4715e..efd166e 100644
--- a/nomad/tests/test_traj_gen.py
+++ b/nomad/tests/test_traj_gen.py
@@ -31,6 +31,7 @@ def garden_city():
     city_path = data_dir / "garden-city.gpkg"
     # Garden-city fixture (legacy) uses 'type' instead of 'building_type'
     city = cg.City.from_geopackage(city_path, poi_cols={'building_type':'type'})
+    city.compute_shortest_paths(callable_only=False)
     return city
 
 
diff --git a/nomad/traj_gen.py b/nomad/traj_gen.py
index c7e88f0..a6f3848 100644
--- a/nomad/traj_gen.py
+++ b/nomad/traj_gen.py
@@ -288,6 +288,10 @@ def __init__(self,
         self.diary = diary if diary is not None else pd.DataFrame(
             columns=['datetime', 'timestamp', 'duration', 'location', 'identifier'])
         self.sparse_traj = None
+        
+        # Geometry cache for _sample_step optimization
+        self._cached_geom = None
+        self._cached_geom_id = None
 
 
     def reset_trajectory(self, trajectory = True, sparse = True, last_ping = True, diary = True):
@@ -363,6 +367,12 @@ def _sample_step(self, start_point, dest_building_id, dt, rng):
             The building ID if the step is a stay, or `None` if the step is a move.
         """
         city = self.city
+        
+        # Check if start_point is in cached geometry (diagnostic only, no effect yet)
+        if self._cached_geom is not None:
+            in_cache = self._cached_geom.contains(Point(start_point))
+            if in_cache:
+                pass  # Could reuse cached geometry here
 
         # Resolve destination building geometry and attributes (strict schema)
         brow = city.buildings_gdf[city.buildings_gdf['id'] == dest_building_id]
@@ -401,6 +411,10 @@ def _sample_step(self, start_point, dest_building_id, dt, rng):
                     coord = rng.normal(loc=start_point_arr, scale=sigma*np.sqrt(dt), size=2)
                     if dest_geom.contains(Point(coord)):
                         break
+            
+            # Cache building geometry for potential reuse
+            self._cached_geom = dest_geom
+            self._cached_geom_id = dest_building_id
             return coord, location
 
         # Otherwise, move along streets toward destination door cell
@@ -451,7 +465,14 @@ def _sample_step(self, start_point, dest_building_id, dt, rng):
 
             if bound_poly.contains(Point(coord)):
                 break
-
+        
+        # Cache path geometry for potential reuse
+        # For path movements, create ID from start and destination
+        if start_info['building_id'] is not None:
+            self._cached_geom_id = (start_info['building_id'], dest_building_id)
+        else:
+            self._cached_geom_id = (start_node, dest_building_id)
+        self._cached_geom = bound_poly
         return coord, location
 
 
@@ -472,6 +493,9 @@ def _traj_from_dest_diary(self, dt, seed=0):
             self.diary = self.diary.iloc[:-1]
 
         tick_secs = int(60*dt)
+        
+        # Track previous building for cache optimization
+        previous_building_id = None
 
         entry_update = []
         for i in range(destination_diary.shape[0]):
@@ -508,6 +532,9 @@ def _traj_from_dest_diary(self, dt, seed=0):
                                      'user_id': self.identifier}
                 else:
                     current_entry['duration'] += 1*dt  # add one tick to the duration
+            
+            # Update previous_building_id after completing this destination segment
+            previous_building_id = building_id
 
         if self.trajectory is None:
             self.trajectory = pd.DataFrame(trajectory_update)
@@ -1275,6 +1302,8 @@ def save_pop(self,
         """
         if full_path:
             full_df = pd.concat([agent.trajectory for agent in self.roster.values()], ignore_index=True)
+            if partition_cols and 'date' in partition_cols and 'date' not in full_df.columns:
+                full_df['date'] = pd.to_datetime(full_df['timestamp']).dt.date.astype(str)
             full_df = from_df(full_df, traj_cols=traj_cols, mixed_timezone_behavior=mixed_timezone_behavior)
             to_file(full_df,
                     path=full_path,
@@ -1285,6 +1314,8 @@ def save_pop(self,
     
         if sparse_path:
             sparse_df = pd.concat([agent.sparse_traj for agent in self.roster.values()], ignore_index=True)
+            if partition_cols and 'date' in partition_cols and 'date' not in sparse_df.columns:
+                sparse_df['date'] = pd.to_datetime(sparse_df['timestamp']).dt.date.astype(str)
             sparse_df = from_df(sparse_df, traj_cols=traj_cols, mixed_timezone_behavior=mixed_timezone_behavior)
             to_file(sparse_df,
                     path=sparse_path,
@@ -1296,6 +1327,8 @@ def save_pop(self,
     
         if diaries_path:
             diaries_df = pd.concat([agent.diary for agent in self.roster.values()], ignore_index=True)
+            if partition_cols and 'date' in partition_cols and 'date' not in diaries_df.columns:
+                diaries_df['date'] = pd.to_datetime(diaries_df['timestamp']).dt.date.astype(str)
             diaries_df = from_df(diaries_df, traj_cols=traj_cols, mixed_timezone_behavior=mixed_timezone_behavior)
             to_file(diaries_df,
                     path=diaries_path,