From 7e34bb5631802c3a8b382f1d32d202df85a290cf Mon Sep 17 00:00:00 2001 From: Stef Kariotidis Date: Fri, 27 Mar 2026 18:21:37 +0200 Subject: [PATCH 1/2] Tune cold-cache tile bootstrap for faster initial map loads (#214) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Raise OutboundBudget.BurstCapacity from 10 to 12 - Raise OutboundBudget.AcquireTimeout from 3.0s to 3.5s - Raise client concurrency pool multiplier from 0.6 to 0.75 (6 → 9 slots) - Align BudgetRetryAfterSeconds from 5 to 6 (12 tokens / 2 per sec) - Update client fallback values and comments - Add TODO #214-D for deferred pre-warming investigation --- Areas/Public/Controllers/TilesController.cs | 6 +++--- CHANGELOG.md | 9 +++++++++ Services/TileCacheService.cs | 15 +++++++++++---- .../Controllers/TilesControllerTests.cs | 2 +- .../Services/TileCacheServiceTests.cs | 2 +- wwwroot/js/retryTileLayer.js | 10 +++++----- 6 files changed, 30 insertions(+), 14 deletions(-) diff --git a/Areas/Public/Controllers/TilesController.cs b/Areas/Public/Controllers/TilesController.cs index 9332273..123f588 100644 --- a/Areas/Public/Controllers/TilesController.cs +++ b/Areas/Public/Controllers/TilesController.cs @@ -29,12 +29,12 @@ public class TilesController : Controller /// /// Retry-After header value (in seconds) sent with HTTP 503 when the outbound budget is exhausted. - /// Set to 5s to align with : at 2 tokens/sec - /// (ReplenishIntervalMs=500) with BurstCapacity=10, a full burst refills in ~5 seconds. + /// Set to 6s to align with : at 2 tokens/sec + /// (ReplenishIntervalMs=500) with BurstCapacity=12, a full burst refills in ~6 seconds. /// Also exposed to the client via wayfarerTileConfig.retryAfterSeconds so the /// tile layer can derive its slow-retry interval without hardcoding the value. /// - internal const int BudgetRetryAfterSeconds = 5; + internal const int BudgetRetryAfterSeconds = 6; /// /// Thread-safe dictionary for rate limiting anonymous tile requests by IP address. diff --git a/CHANGELOG.md b/CHANGELOG.md index 809a507..ab61752 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,14 @@ # CHANGELOG +## [1.2.26] - 2026-03-27 + +### Changed +- Outbound budget burst capacity raised from 10 to 12 — allows 2 more tiles through on initial burst before settling into sustained 2/sec rate, reducing 503s on cold-cache loads (#214) +- Outbound budget acquire timeout raised from 3.0s to 3.5s — extra 0.5s yields 1 more token from replenishment per wave, reducing false timeouts (#214) +- Client concurrency pool multiplier raised from 60% to 75% of burst capacity (pool size 6 → 9) — more tiles queue server-side instead of waiting client-side (#214) +- Budget retry-after interval updated from 5s to 6s to align with new burst refill time (12 tokens / 2 per sec = 6s) (#214) +- Client slow-retry interval auto-derives to 18s (was 15s) from updated retry-after × 3 (#214) + ## [1.2.25] - 2026-03-26 ### Fixed diff --git a/Services/TileCacheService.cs b/Services/TileCacheService.cs index e24244c..94eab31 100644 --- a/Services/TileCacheService.cs +++ b/Services/TileCacheService.cs @@ -129,11 +129,11 @@ internal static class OutboundBudget { /// /// Maximum burst capacity — how many outbound requests can proceed without waiting - /// for replenishment. Set to 10 to allow initial map loads to proceed quickly on + /// for replenishment. Set to 12 to allow initial map loads to proceed quickly on /// a cold cache. OSM's 2-connection limit is enforced at the transport layer via /// SocketsHttpHandler.MaxConnectionsPerServer = 2 in Program.cs. /// - internal const int BurstCapacity = 10; + internal const int BurstCapacity = 12; /// /// Replenishment interval — one token is released every this many milliseconds. @@ -145,9 +145,10 @@ internal static class OutboundBudget /// Maximum time to wait for a token before giving up. Callers that time out /// serve stale cache or return 503 (graceful degradation). /// Reduced from 10s to 3s to prevent thread pool starvation under sustained - /// cold-cache load (multiple users loading maps with many uncached tiles). + /// cold-cache load, then raised to 3.5s to allow one extra token per wave + /// during cold-cache bootstrap without exceeding thread pool pressure. /// - internal static readonly TimeSpan AcquireTimeout = TimeSpan.FromSeconds(3); + internal static readonly TimeSpan AcquireTimeout = TimeSpan.FromSeconds(3.5); /// /// Semaphore representing available outbound tokens. Initialized to . @@ -742,6 +743,12 @@ private void WriteSidecarMetadata(string tileFilePath, TileSidecarMetadata metad // ── Tile caching and retrieval ────────────────────────────────────── + // TODO #214-D: Investigate pre-warming adjacent zoom levels (z-1, z+1) after a successful + // fetch to avoid cold-cache penalties when users zoom in/out. Two approaches to evaluate: + // 1. Fire-and-forget from CacheTileAsync — simplest but competes equally for OutboundBudget tokens. + // 2. Background Channel queue at lower priority — safer but requires a priority-aware token bucket. + // Deferred: OutboundBudget currently has no priority mechanism (SemaphoreSlim-based). + /// /// Downloads a tile from the given URL and caches it on the file system. /// Stores ETag, Last-Modified, and computed expiry from upstream response headers. diff --git a/tests/Wayfarer.Tests/Controllers/TilesControllerTests.cs b/tests/Wayfarer.Tests/Controllers/TilesControllerTests.cs index 3b730a8..76f53a5 100644 --- a/tests/Wayfarer.Tests/Controllers/TilesControllerTests.cs +++ b/tests/Wayfarer.Tests/Controllers/TilesControllerTests.cs @@ -414,7 +414,7 @@ public async Task GetTile_Returns503WithRetryAfter_WhenBudgetExhausted() var statusResult = Assert.IsType(result); Assert.Equal(503, statusResult.StatusCode); Assert.Equal("Tile server busy. Please retry shortly.", statusResult.Value); - Assert.Equal("5", controller.Response.Headers["Retry-After"].ToString()); + Assert.Equal("6", controller.Response.Headers["Retry-After"].ToString()); } finally { diff --git a/tests/Wayfarer.Tests/Services/TileCacheServiceTests.cs b/tests/Wayfarer.Tests/Services/TileCacheServiceTests.cs index 0899056..4d46fcc 100644 --- a/tests/Wayfarer.Tests/Services/TileCacheServiceTests.cs +++ b/tests/Wayfarer.Tests/Services/TileCacheServiceTests.cs @@ -766,7 +766,7 @@ public async Task OutboundBudget_AcquireAsync_GrantsTokensUpToBurstCapacity() { TileCacheService.OutboundBudget.ResetForTesting(); - // Burst capacity is 10 — first 10 should succeed immediately. + // Burst capacity is 12 — first 12 should succeed immediately. for (int i = 0; i < TileCacheService.OutboundBudget.BurstCapacity; i++) { var acquired = await TileCacheService.OutboundBudget.AcquireAsync(); diff --git a/wwwroot/js/retryTileLayer.js b/wwwroot/js/retryTileLayer.js index 51ed5d1..127fffa 100644 --- a/wwwroot/js/retryTileLayer.js +++ b/wwwroot/js/retryTileLayer.js @@ -27,10 +27,10 @@ const _config = window.wayfarerTileConfig || {}; // ---------- Global concurrency pool ---------- -// Pool size derived from server's outbound burst capacity: 60% of burst leaves headroom +// Pool size derived from server's outbound burst capacity: 75% of burst leaves headroom // for other concurrent users while still allowing a cold-cache load to progress quickly. -// Falls back to 6 if config is unavailable (e.g., inline scripts outside _Layout). -const _poolSize = Math.ceil((_config.burstCapacity || 10) * 0.6); +// Falls back to 9 if config is unavailable (e.g., inline scripts outside _Layout). +const _poolSize = Math.ceil((_config.burstCapacity || 12) * 0.75); let _inFlight = 0; const _waiting = []; @@ -80,7 +80,7 @@ const _releaseSlot = () => { // retryAfterSeconds is the Retry-After value the server sends on 503 (matches the budget // replenishment cycle). Slow retry uses 3x that interval to give the per-IP sliding window // time to decay between attempts. Falls back to 5s if config unavailable. -const _retryAfterSeconds = _config.retryAfterSeconds || 5; +const _retryAfterSeconds = _config.retryAfterSeconds || 6; const _defaultSlowRetryDelayMs = _retryAfterSeconds * 3 * 1000; const RetryTileLayer = L.TileLayer.extend({ @@ -225,7 +225,7 @@ const RetryTileLayer = L.TileLayer.extend({ * Two retry phases: * - Fast: attempts 0..maxRetries with exponential backoff (seconds) * - Slow: after fast retries exhaust on 503/network error, single-shot polls - * every ~15s (derived from retryAfterSeconds * 3) until the tile loads or + * every ~18s (derived from retryAfterSeconds * 3) until the tile loads or * is removed — each poll is one fetch, not a full fast-retry cycle * Acquires a concurrency slot before each fetch attempt to prevent overwhelming * the server's budget. Respects AbortSignal so removed tiles stop immediately. From dbcdce41ff66534274fbf75757489c706e93ba88 Mon Sep 17 00:00:00 2001 From: Stef Kariotidis Date: Fri, 27 Mar 2026 18:30:54 +0200 Subject: [PATCH 2/2] Fix stale comment: fallback retry-after is 6s not 5s --- wwwroot/js/retryTileLayer.js | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/wwwroot/js/retryTileLayer.js b/wwwroot/js/retryTileLayer.js index 127fffa..4461304 100644 --- a/wwwroot/js/retryTileLayer.js +++ b/wwwroot/js/retryTileLayer.js @@ -79,7 +79,7 @@ const _releaseSlot = () => { // ---------- Retry timing derived from server config ---------- // retryAfterSeconds is the Retry-After value the server sends on 503 (matches the budget // replenishment cycle). Slow retry uses 3x that interval to give the per-IP sliding window -// time to decay between attempts. Falls back to 5s if config unavailable. +// time to decay between attempts. Falls back to 6s if config unavailable. const _retryAfterSeconds = _config.retryAfterSeconds || 6; const _defaultSlowRetryDelayMs = _retryAfterSeconds * 3 * 1000;