diff --git a/Areas/Admin/Controllers/SettingsController.cs b/Areas/Admin/Controllers/SettingsController.cs index c310f0d1..dd65c0da 100644 --- a/Areas/Admin/Controllers/SettingsController.cs +++ b/Areas/Admin/Controllers/SettingsController.cs @@ -13,11 +13,17 @@ namespace Wayfarer.Areas.Admin.Controllers [Area("Admin")] public class SettingsController : BaseController { + /// + /// SSE channel name for broadcasting tile cache purge progress to admin clients. + /// + public const string TileCachePurgeChannel = "admin-tile-cache-purge"; + private readonly IApplicationSettingsService _settingsService; private readonly TileCacheService _tileCacheService; private readonly IProxiedImageCacheService _imageCacheService; private readonly IWebHostEnvironment _env; private readonly IServiceScopeFactory _scopeFactory; + private readonly SseService _sseService; public SettingsController( ILogger logger, @@ -26,7 +32,8 @@ public SettingsController( TileCacheService tileCacheService, IProxiedImageCacheService imageCacheService, IWebHostEnvironment env, - IServiceScopeFactory scopeFactory) + IServiceScopeFactory scopeFactory, + SseService sseService) : base(logger, dbContext) { _settingsService = settingsService ?? throw new ArgumentNullException(nameof(settingsService)); @@ -34,6 +41,7 @@ public SettingsController( _imageCacheService = imageCacheService ?? throw new ArgumentNullException(nameof(imageCacheService)); _env = env ?? throw new ArgumentNullException(nameof(env)); _scopeFactory = scopeFactory ?? throw new ArgumentNullException(nameof(scopeFactory)); + _sseService = sseService ?? throw new ArgumentNullException(nameof(sseService)); } [HttpGet] @@ -273,50 +281,134 @@ void Track(string name, T oldVal, T newVal) } } + /// + /// Queues a full tile cache purge as a background operation. + /// Returns 202 Accepted immediately; progress is reported via SSE on + /// . + /// Returns 409 Conflict if a purge is already running. + /// [HttpPost] [ValidateAntiForgeryToken] - public async Task DeleteAllMapTileCache() + public IActionResult DeleteAllMapTileCache() { - try - { - await _tileCacheService.PurgeAllCacheAsync(); - - var cacheStatus = await GetCacheStatus(); + if (TileCacheService.IsPurgeInProgress) + return Conflict(new { success = false, message = "A cache purge is already in progress." }); - return Ok(new - { - success = true, - message = "The map tile cache has been deleted successfully.", - cacheStatus - }); - } - catch (Exception e) - { - return Ok(new { success = false, message = e.Message }); - } + QueuePurgeOperation("all"); + return Accepted(new { success = true, message = "Full cache purge started." }); } + /// + /// Queues an LRU tile cache purge (zoom >= 9) as a background operation. + /// Returns 202 Accepted immediately; progress is reported via SSE on + /// . + /// Returns 409 Conflict if a purge is already running. + /// [HttpPost] [ValidateAntiForgeryToken] - public async Task DeleteLruCache() + public IActionResult DeleteLruCache() { - try - { - await _tileCacheService.PurgeLRUCacheAsync(); + if (TileCacheService.IsPurgeInProgress) + return Conflict(new { success = false, message = "A cache purge is already in progress." }); - var cacheStatus = await GetCacheStatus(); + QueuePurgeOperation("lru"); + return Accepted(new { success = true, message = "LRU cache purge started." }); + } - return Ok(new - { - success = true, - message = "The map tile cache for zoom levels equal or greater of 9, has been deleted successfully.", - cacheStatus - }); - } - catch (Exception e) + /// + /// SSE endpoint for receiving real-time tile cache purge progress events. + /// Admin clients connect here after initiating a purge or on page load + /// when a purge is already in progress. + /// + [HttpGet] + public async Task TileCachePurgeSse(CancellationToken cancellationToken) + { + await _sseService.SubscribeAsync( + TileCachePurgeChannel, + Response, + cancellationToken, + enableHeartbeat: true, + heartbeatInterval: TimeSpan.FromSeconds(30)); + return new EmptyResult(); + } + + /// + /// Returns whether a tile cache purge is currently in progress. + /// Used by the admin UI on page load to detect and reconnect to an ongoing purge. + /// + [HttpGet] + public IActionResult TileCachePurgeStatus() + { + return Ok(new { inProgress = TileCacheService.IsPurgeInProgress }); + } + + /// + /// Fires a cache purge in the background with SSE progress reporting. + /// The purge methods broadcast "started" after acquiring the guard, and this + /// method broadcasts "completed" or "failed" based on the outcome. + /// Uses the captured singleton directly instead of + /// re-resolving from a new DI scope. + /// + private void QueuePurgeOperation(string purgeType) + { + // Capture the singleton reference for the background task — avoids + // re-resolving the same singleton from a new DI scope. + var sseService = _sseService; + + _ = Task.Run(async () => { - return Ok(new { success = false, message = e.Message }); - } + try + { + using var scope = _scopeFactory.CreateScope(); + var tileCacheService = scope.ServiceProvider.GetRequiredService(); + + // "started" is broadcast inside the purge methods after the + // CompareExchange guard succeeds — no dangling "started" on TOCTOU race. + if (purgeType == "lru") + await tileCacheService.PurgeLRUCacheAsync(sseService, TileCachePurgeChannel); + else + await tileCacheService.PurgeAllCacheAsync(sseService, TileCachePurgeChannel); + + // Broadcast final cache status so the UI can update counters. + var cacheStatus = await BuildCacheStatusAsync(tileCacheService); + await sseService.BroadcastAsync(TileCachePurgeChannel, + System.Text.Json.JsonSerializer.Serialize(new + { + eventType = "completed", + purgeType, + message = purgeType == "lru" + ? "LRU cache purge completed successfully." + : "Full cache purge completed successfully.", + cacheStatus + })); + } + catch (InvalidOperationException) + { + // Another purge won the CompareExchange race between the controller's + // IsPurgeInProgress check and the service's atomic guard. Safe to ignore — + // the winning request is already broadcasting progress. No "started" event + // was sent for the losing request (it's broadcast after the guard). + _logger.LogInformation("Background {PurgeType} purge skipped: concurrent purge is running.", purgeType); + } + catch (Exception ex) + { + _logger.LogError(ex, "Background {PurgeType} cache purge failed.", purgeType); + try + { + await sseService.BroadcastAsync(TileCachePurgeChannel, + System.Text.Json.JsonSerializer.Serialize(new + { + eventType = "failed", + purgeType, + errorMessage = ex.Message + })); + } + catch (Exception broadcastEx) + { + _logger.LogDebug(broadcastEx, "Failed to broadcast purge failure event."); + } + } + }); } private class CacheStatus @@ -329,14 +421,18 @@ private class CacheStatus public double TotalLruGB { get; set; } } - private async Task GetCacheStatus() + /// + /// Builds cache status from a instance. + /// Used by both the request-scoped path and the background purge task. + /// + private static async Task BuildCacheStatusAsync(TileCacheService tileCacheService) { var cacheStatus = new CacheStatus(); - double total = await _tileCacheService.GetCacheFileSizeInMbAsync(); - double lru = await _tileCacheService.GetLruCachedInMbFilesAsync(); + double total = await tileCacheService.GetCacheFileSizeInMbAsync(); + double lru = await tileCacheService.GetLruCachedInMbFilesAsync(); - cacheStatus.TotalCacheFiles = await _tileCacheService.GetTotalCachedFilesAsync(); - cacheStatus.LruTotalFiles = await _tileCacheService.GetLruTotalFilesInDbAsync(); + cacheStatus.TotalCacheFiles = await tileCacheService.GetTotalCachedFilesAsync(); + cacheStatus.LruTotalFiles = await tileCacheService.GetLruTotalFilesInDbAsync(); cacheStatus.TotalCacheSize = Math.Round(total, 2); cacheStatus.TotalCacheSizeGB = Math.Round(total / 1024, 3); cacheStatus.TotalLru = Math.Round(lru, 2); @@ -345,6 +441,11 @@ private async Task GetCacheStatus() return cacheStatus; } + /// + /// Retrieves cache status using the request-scoped tile cache service. + /// + private Task GetCacheStatus() => BuildCacheStatusAsync(_tileCacheService); + /// /// Normalizes and validates tile provider settings, applying presets when selected. /// @@ -423,9 +524,16 @@ private void SetTileProviderViewData() /// /// Purges the tile cache in the background to avoid blocking the settings update. + /// Skips if another purge is already in progress to prevent conflicts. /// private void QueueTileCachePurge() { + if (TileCacheService.IsPurgeInProgress) + { + _logger.LogWarning("Skipping tile-provider-change purge: another purge is already in progress."); + return; + } + _ = Task.Run(async () => { try @@ -434,6 +542,11 @@ private void QueueTileCachePurge() var tileCacheService = scope.ServiceProvider.GetRequiredService(); await tileCacheService.PurgeAllCacheAsync(); } + catch (InvalidOperationException) + { + // Another purge started between our check and execution — safe to ignore. + _logger.LogInformation("Tile-provider-change purge skipped: concurrent purge is running."); + } catch (Exception ex) { _logger.LogError(ex, "Failed to purge tile cache in background."); diff --git a/Areas/Admin/Views/Settings/Index.cshtml b/Areas/Admin/Views/Settings/Index.cshtml index 2f98d92a..7beaac42 100644 --- a/Areas/Admin/Views/Settings/Index.cshtml +++ b/Areas/Admin/Views/Settings/Index.cshtml @@ -734,6 +734,19 @@ + @* ── Purge progress bar (hidden until a purge starts) ── *@ +
+ +
+
diff --git a/CHANGELOG.md b/CHANGELOG.md index ab61752d..8d319e32 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,22 @@ # CHANGELOG +## [1.2.27] - 2026-03-27 + +### Fixed +- **HIGH:** LRU/full cache purge timed out on large caches (~500MB), showing error page despite successful deletion. Purge now runs in background with immediate HTTP 202 response (#207) +- Cache lock contention during purge blocked concurrent tile writes. Reduced file-delete chunk size from 100 to 10 with `Task.Yield()` between chunks to prevent writer starvation (#207) + +### Added +- SSE-based real-time progress reporting for cache purge operations — admin UI shows animated progress bar with file count and percentage (#207) +- Atomic purge-in-progress guard (`Interlocked.CompareExchange`) prevents concurrent purge operations; second request returns 409 Conflict (#207) +- `TileCachePurgeSse` endpoint for SSE subscription and `TileCachePurgeStatus` endpoint for on-load reconnect (#207) +- On page load, admin settings UI checks purge status and reconnects SSE if a purge is mid-flight (#207) +- Tile-provider-change purge now respects the concurrency guard — skips gracefully if manual purge is running (#207) + +### Changed +- `DeleteAllMapTileCache` and `DeleteLruCache` endpoints return HTTP 202 Accepted (was 200 with awaited result) (#207) +- `PurgeAllCacheAsync` and `PurgeLRUCacheAsync` accept optional `SseService`/channel params for progress broadcasting (#207) + ## [1.2.26] - 2026-03-27 ### Changed diff --git a/Services/TileCacheService.cs b/Services/TileCacheService.cs index 94eab318..47068797 100644 --- a/Services/TileCacheService.cs +++ b/Services/TileCacheService.cs @@ -81,6 +81,20 @@ public class TileCacheService /// private static int _evictionInProgress = 0; + /// + /// Guards against concurrent purge operations (manual or provider-change triggered). + /// Only one purge can proceed at a time; concurrent callers receive an + /// . HTTP callers surface this as 409 Conflict; + /// internal callers (e.g. tile-provider-change) skip silently. + /// Uses for lock-free rejection. + /// + private static int _purgeInProgress = 0; + + /// + /// Indicates whether a cache purge operation is currently running. + /// + public static bool IsPurgeInProgress => Volatile.Read(ref _purgeInProgress) == 1; + /// /// Indicates whether _currentCacheSize has been initialized from the database. /// @@ -340,6 +354,7 @@ internal static void ResetStaticStateForTesting() _sidecarCache.Clear(); Interlocked.Exchange(ref _currentCacheSize, 0); Interlocked.Exchange(ref _evictionInProgress, 0); + Interlocked.Exchange(ref _purgeInProgress, 0); _cacheSizeInitialized = false; OutboundBudget.ResetForTesting(); } @@ -1583,100 +1598,127 @@ public async Task GetLruTotalFilesInDbAsync() /// Purges all tile cache both static (zoom levels <= 8) and LRU cache (zoom levels >= 9). /// Also cleans up sidecar metadata files (.meta) and temporary files (.meta.tmp). /// - public async Task PurgeAllCacheAsync() + public async Task PurgeAllCacheAsync(SseService? sseService = null, string? sseChannel = null) { - if (!Directory.Exists(_cacheDirectory)) return; + if (Interlocked.CompareExchange(ref _purgeInProgress, 1, 0) != 0) + throw new InvalidOperationException("A cache purge is already in progress."); - using var scope = _serviceScopeFactory.CreateScope(); - var dbContext = scope.ServiceProvider.GetRequiredService(); + // Broadcast "started" only after the guard is acquired — ensures no dangling + // "started" event if a concurrent request loses the CompareExchange race. + await BroadcastPurgeProgressAsync(sseService, sseChannel, "started", "all", 0, 0); - const int batchSize = 300; // Adjustable batch size for optimal performance - const int maxRetries = 3; // Max number of retries - const int delayBetweenRetries = 1000; // Delay between retries in milliseconds - - // Bulk-load all DB metadata into a dictionary keyed by file path. - // This replaces O(N) individual DB queries (one per file) with a single query, - // preventing connection pool exhaustion on large caches (100K+ tiles). - // Uses foreach instead of ToDictionary to handle anomalous duplicate TileFilePath - // values gracefully (last-wins) instead of throwing ArgumentException. - var allMetadataList = await dbContext.TileCacheMetadata - .AsNoTracking() - .Select(t => new { t.Id, t.TileFilePath }) - .ToListAsync(); - var allMetadata = new Dictionary(allMetadataList.Count); - foreach (var t in allMetadataList) + try { - allMetadata[t.TileFilePath ?? string.Empty] = t.Id; - } + if (!Directory.Exists(_cacheDirectory)) return; + + using var scope = _serviceScopeFactory.CreateScope(); + var dbContext = scope.ServiceProvider.GetRequiredService(); + + const int batchSize = 300; // Adjustable batch size for optimal performance + const int maxRetries = 3; // Max number of retries + const int delayBetweenRetries = 1000; // Delay between retries in milliseconds + + // Bulk-load all DB metadata into a dictionary keyed by file path. + // This replaces O(N) individual DB queries (one per file) with a single query, + // preventing connection pool exhaustion on large caches (100K+ tiles). + // Uses foreach instead of ToDictionary to handle anomalous duplicate TileFilePath + // values gracefully (last-wins) instead of throwing ArgumentException. + var allMetadataList = await dbContext.TileCacheMetadata + .AsNoTracking() + .Select(t => new { t.Id, t.TileFilePath }) + .ToListAsync(); + var allMetadata = new Dictionary(allMetadataList.Count); + foreach (var t in allMetadataList) + { + allMetadata[t.TileFilePath ?? string.Empty] = t.Id; + } - // Collect files and their DB metadata into batches. - // DB deletions are committed first (consistent with EvictDbTilesAsync ordering). - // If DB commit fails, no files are deleted — cache stays consistent. - var batch = new List<(int? MetaId, string FilePath, long FileSize)>(); + // Count total files for progress reporting. + var allFiles = Directory.EnumerateFiles(_cacheDirectory, "*.png").ToList(); + var totalFiles = allFiles.Count; + var deletedFiles = 0; - foreach (var file in Directory.EnumerateFiles(_cacheDirectory, "*.png")) - { - try + await BroadcastPurgeProgressAsync(sseService, sseChannel, "progress", "all", 0, totalFiles); + + // Collect files and their DB metadata into batches. + // DB deletions are committed first (consistent with EvictDbTilesAsync ordering). + // If DB commit fails, no files are deleted — cache stays consistent. + var batch = new List<(int? MetaId, string FilePath, long FileSize)>(); + + foreach (var file in allFiles) { - int? metaId = allMetadata.TryGetValue(file, out var id) ? id : null; + try + { + int? metaId = allMetadata.TryGetValue(file, out var id) ? id : null; - long fileSize = File.Exists(file) ? new FileInfo(file).Length : 0; - batch.Add((metaId, file, fileSize)); + long fileSize = File.Exists(file) ? new FileInfo(file).Length : 0; + batch.Add((metaId, file, fileSize)); - // Commit and delete in batches. - if (batch.Count >= batchSize) + // Commit and delete in batches. + if (batch.Count >= batchSize) + { + await PurgeBatchAsync(dbContext, batch, maxRetries, delayBetweenRetries); + deletedFiles += batch.Count; + batch.Clear(); + await BroadcastPurgeProgressAsync(sseService, sseChannel, "progress", "all", + deletedFiles, totalFiles); + } + } + catch (Exception e) { - await PurgeBatchAsync(dbContext, batch, maxRetries, delayBetweenRetries); - batch.Clear(); + _logger.LogError(e, "Error purging file {File}", file); } } - catch (Exception e) + + // Commit any remaining entries if the batch size was not reached. + if (batch.Any()) { - _logger.LogError(e, "Error purging file {File}", file); + await PurgeBatchAsync(dbContext, batch, maxRetries, delayBetweenRetries); + deletedFiles += batch.Count; + batch.Clear(); + await BroadcastPurgeProgressAsync(sseService, sseChannel, "progress", "all", + deletedFiles, totalFiles); } - } - - // Commit any remaining entries if the batch size was not reached. - if (batch.Any()) - { - await PurgeBatchAsync(dbContext, batch, maxRetries, delayBetweenRetries); - batch.Clear(); - } - // Clean up orphan DB records (records without corresponding files on disk). - // File.Exists cannot be translated to SQL, so project only Id + TileFilePath - // with AsNoTracking to minimize memory, then filter client-side with a HashSet. - var existingFiles = new HashSet( - Directory.EnumerateFiles(_cacheDirectory, "*.png")); - var allPaths = await dbContext.TileCacheMetadata - .AsNoTracking() - .Select(t => new { t.Id, t.TileFilePath }) - .ToListAsync(); - var orphanIds = allPaths - .Where(t => !existingFiles.Contains(t.TileFilePath)) - .Select(t => t.Id) - .ToList(); + // Clean up orphan DB records (records without corresponding files on disk). + // File.Exists cannot be translated to SQL, so project only Id + TileFilePath + // with AsNoTracking to minimize memory, then filter client-side with a HashSet. + var existingFiles = new HashSet( + Directory.EnumerateFiles(_cacheDirectory, "*.png")); + var allPaths = await dbContext.TileCacheMetadata + .AsNoTracking() + .Select(t => new { t.Id, t.TileFilePath }) + .ToListAsync(); + var orphanIds = allPaths + .Where(t => !existingFiles.Contains(t.TileFilePath)) + .Select(t => t.Id) + .ToList(); - if (orphanIds.Any()) - { - _logger.LogInformation("Found {Count} orphan DB records without files on disk.", orphanIds.Count); - await RetryOperationAsync(async () => + if (orphanIds.Any()) { - dbContext.ChangeTracker.Clear(); - var toDelete = await dbContext.TileCacheMetadata - .Where(t => orphanIds.Contains(t.Id)) - .ToListAsync(); - if (toDelete.Any()) + _logger.LogInformation("Found {Count} orphan DB records without files on disk.", orphanIds.Count); + await RetryOperationAsync(async () => { - dbContext.TileCacheMetadata.RemoveRange(toDelete); - var affectedRows = await dbContext.SaveChangesAsync(); - _logger.LogInformation("Orphan records cleanup completed. Rows affected: {Rows}", affectedRows); - } - }, maxRetries, delayBetweenRetries); - } + dbContext.ChangeTracker.Clear(); + var toDelete = await dbContext.TileCacheMetadata + .Where(t => orphanIds.Contains(t.Id)) + .ToListAsync(); + if (toDelete.Any()) + { + dbContext.TileCacheMetadata.RemoveRange(toDelete); + var affectedRows = await dbContext.SaveChangesAsync(); + _logger.LogInformation("Orphan records cleanup completed. Rows affected: {Rows}", affectedRows); + } + }, maxRetries, delayBetweenRetries); + } - // Clean up sidecar metadata files and temp files as a final sweep. - CleanupSidecarFiles(); + // Clean up sidecar metadata files and temp files as a final sweep. + CleanupSidecarFiles(); + } + finally + { + Interlocked.Exchange(ref _purgeInProgress, 0); + } } /// @@ -1747,13 +1789,13 @@ await RetryOperationAsync(async () => } // Phase 2: Delete files from disk (best-effort, after DB commit succeeded). - // Chunked lock acquisition (100 files per lock) to avoid blocking CacheTileAsync writes - // for the entire purge duration when deleting thousands of files. + // Chunked lock acquisition (10 files per lock) to minimize contention with + // CacheTileAsync writes during large purge operations. // Only decrement _currentCacheSize for DB-tracked tiles (zoom >= 9, Meta != null). // Zoom 0-8 tiles are not tracked in _currentCacheSize, so decrementing them // would drive the counter negative and permanently disable eviction. // Uses actualSizes from re-fetched entities to minimize drift. - const int deleteChunkSize = 100; + const int deleteChunkSize = 10; foreach (var chunk in batch.Chunk(deleteChunkSize)) { await _cacheLock.WaitAsync(); @@ -1782,6 +1824,10 @@ await RetryOperationAsync(async () => { _cacheLock.Release(); } + + // Yield after each chunk to give CacheTileAsync callers a chance to acquire + // the lock, preventing writer starvation during large purge operations. + await Task.Yield(); } } @@ -1818,89 +1864,146 @@ private async Task RetryOperationAsync(Func operation, int maxRetries, int /// Deletes are chunked (1000 IDs per batch) to avoid PostgreSQL query plan explosion /// from large IN clauses. /// - public async Task PurgeLRUCacheAsync() + public async Task PurgeLRUCacheAsync(SseService? sseService = null, string? sseChannel = null) { - using var scope = _serviceScopeFactory.CreateScope(); - var dbContext = scope.ServiceProvider.GetRequiredService(); + if (Interlocked.CompareExchange(ref _purgeInProgress, 1, 0) != 0) + throw new InvalidOperationException("A cache purge is already in progress."); - // Project only the fields needed — AsNoTracking avoids change tracker overhead. - var lruCache = await dbContext.TileCacheMetadata - .AsNoTracking() - .Where(file => file.Zoom >= DbMetadataZoomThreshold) - .Select(t => new { t.Id, t.TileFilePath, t.Size }) - .ToListAsync(); - - if (!lruCache.Any()) return; + // Broadcast "started" only after the guard is acquired — ensures no dangling + // "started" event if a concurrent request loses the CompareExchange race. + await BroadcastPurgeProgressAsync(sseService, sseChannel, "started", "lru", 0, 0); - // Collect file paths with IDs for Phase 2 size lookup. - var fileInfo = lruCache - .Select(t => (Id: t.Id, FilePath: t.TileFilePath, Size: (long)t.Size)) - .ToList(); - - // Phase 1: Commit DB deletions first in chunks of 1000 IDs. - // Chunking prevents PostgreSQL query plan explosion from large IN clauses. - // Re-fetches entities by ID inside the retry lambda so each attempt starts - // with a clean change tracker — prevents entity tracking conflicts on retry. - // Captures actual sizes from re-fetched entities (not stale projected sizes) - // so Phase 2's _currentCacheSize decrement is accurate. - var lruIds = lruCache.Select(t => t.Id).ToList(); - var actualSizes = new Dictionary(); - const int chunkSize = 1000; - foreach (var chunk in lruIds.Chunk(chunkSize)) + try { - var chunkList = chunk.ToList(); - await RetryOperationAsync(async () => + using var scope = _serviceScopeFactory.CreateScope(); + var dbContext = scope.ServiceProvider.GetRequiredService(); + + // Project only the fields needed — AsNoTracking avoids change tracker overhead. + var lruCache = await dbContext.TileCacheMetadata + .AsNoTracking() + .Where(file => file.Zoom >= DbMetadataZoomThreshold) + .Select(t => new { t.Id, t.TileFilePath, t.Size }) + .ToListAsync(); + + if (!lruCache.Any()) return; + + // Collect file paths with IDs for Phase 2 size lookup. + var fileInfo = lruCache + .Select(t => (Id: t.Id, FilePath: t.TileFilePath, Size: (long)t.Size)) + .ToList(); + + var totalFiles = fileInfo.Count; + await BroadcastPurgeProgressAsync(sseService, sseChannel, "progress", "lru", 0, totalFiles); + + // Phase 1: Commit DB deletions first in chunks of 1000 IDs. + // Chunking prevents PostgreSQL query plan explosion from large IN clauses. + // Re-fetches entities by ID inside the retry lambda so each attempt starts + // with a clean change tracker — prevents entity tracking conflicts on retry. + // Captures actual sizes from re-fetched entities (not stale projected sizes) + // so Phase 2's _currentCacheSize decrement is accurate. + var lruIds = lruCache.Select(t => t.Id).ToList(); + var actualSizes = new Dictionary(); + const int chunkSize = 1000; + foreach (var chunk in lruIds.Chunk(chunkSize)) { - dbContext.ChangeTracker.Clear(); - var toDelete = await dbContext.TileCacheMetadata - .Where(t => chunkList.Contains(t.Id)) - .ToListAsync(); - if (toDelete.Any()) + var chunkList = chunk.ToList(); + await RetryOperationAsync(async () => { - // Capture sizes before deletion — these reflect the current DB state. - foreach (var t in toDelete) - actualSizes[t.Id] = (long)t.Size; - dbContext.TileCacheMetadata.RemoveRange(toDelete); - await dbContext.SaveChangesAsync(); - } - }, 3, 1000); - } + dbContext.ChangeTracker.Clear(); + var toDelete = await dbContext.TileCacheMetadata + .Where(t => chunkList.Contains(t.Id)) + .ToListAsync(); + if (toDelete.Any()) + { + // Capture sizes before deletion — these reflect the current DB state. + foreach (var t in toDelete) + actualSizes[t.Id] = (long)t.Size; + dbContext.TileCacheMetadata.RemoveRange(toDelete); + await dbContext.SaveChangesAsync(); + } + }, 3, 1000); + } - _logger.LogInformation("LRU purge: {Count} DB records deleted.", lruCache.Count); + _logger.LogInformation("LRU purge: {Count} DB records deleted.", lruCache.Count); - // Phase 2: Delete files from disk (best-effort, after DB commit succeeded). - // Chunked lock acquisition (100 files per lock) to avoid blocking CacheTileAsync writes - // for the entire purge duration when deleting thousands of files. - // Uses actualSizes from re-fetched entities to minimize _currentCacheSize drift. - const int deleteChunkSize = 100; - foreach (var chunk in fileInfo.Chunk(deleteChunkSize)) - { - await _cacheLock.WaitAsync(); - try + // Phase 2: Delete files from disk (best-effort, after DB commit succeeded). + // Chunked lock acquisition (10 files per lock) to minimize contention with + // CacheTileAsync writes during large purge operations. + // Uses actualSizes from re-fetched entities to minimize _currentCacheSize drift. + const int deleteChunkSize = 10; + var deletedFiles = 0; + foreach (var chunk in fileInfo.Chunk(deleteChunkSize)) { - foreach (var (id, filePath, _) in chunk) + await _cacheLock.WaitAsync(); + try { - try + foreach (var (id, filePath, _) in chunk) { - if (File.Exists(filePath)) + try { - File.Delete(filePath); - if (actualSizes.TryGetValue(id, out var actualSize)) + if (File.Exists(filePath)) { - Interlocked.Add(ref _currentCacheSize, -actualSize); + File.Delete(filePath); + if (actualSizes.TryGetValue(id, out var actualSize)) + { + Interlocked.Add(ref _currentCacheSize, -actualSize); + } } } - } - catch (Exception e) - { - _logger.LogError(e, "Error deleting LRU cache file {File}", filePath); + catch (Exception e) + { + _logger.LogError(e, "Error deleting LRU cache file {File}", filePath); + } } } + finally + { + _cacheLock.Release(); + } + + // Yield after each chunk to give CacheTileAsync callers a chance to acquire + // the lock, preventing writer starvation during large purge operations. + await Task.Yield(); + + deletedFiles += chunk.Length; + await BroadcastPurgeProgressAsync(sseService, sseChannel, "progress", "lru", + deletedFiles, totalFiles); } - finally - { - _cacheLock.Release(); - } + } + finally + { + Interlocked.Exchange(ref _purgeInProgress, 0); + } + } + + /// + /// Broadcasts a purge progress event via SSE if a service and channel are provided. + /// Safe to call with null parameters (no-op). + /// + private async Task BroadcastPurgeProgressAsync(SseService? sseService, string? sseChannel, + string eventType, string purgeType, int deletedFiles, int totalFiles, + string? errorMessage = null) + { + if (sseService == null || sseChannel == null) return; + + var percent = totalFiles > 0 ? (int)((double)deletedFiles / totalFiles * 100) : 0; + var payload = JsonSerializer.Serialize(new + { + eventType, + purgeType, + deletedFiles, + totalFiles, + percentComplete = percent, + errorMessage + }); + + try + { + await sseService.BroadcastAsync(sseChannel, payload); + } + catch (Exception ex) + { + _logger.LogDebug(ex, "Failed to broadcast purge progress via SSE"); } } diff --git a/tests/Wayfarer.Tests/Controllers/AdminSettingsControllerTests.cs b/tests/Wayfarer.Tests/Controllers/AdminSettingsControllerTests.cs index 3699121f..b1f6ff00 100644 --- a/tests/Wayfarer.Tests/Controllers/AdminSettingsControllerTests.cs +++ b/tests/Wayfarer.Tests/Controllers/AdminSettingsControllerTests.cs @@ -56,7 +56,7 @@ public async Task Index_ReturnsView_WithSettings() new HttpContextAccessor()); var scopeFactory = BuildScopeFactory(tileCache); - var controller = new SettingsController(NullLogger.Instance, db, settingsMock.Object, tileCache, Mock.Of(), env.Object, scopeFactory); + var controller = new SettingsController(NullLogger.Instance, db, settingsMock.Object, tileCache, Mock.Of(), env.Object, scopeFactory, new SseService()); controller.ControllerContext = new ControllerContext { HttpContext = BuildHttpContextWithUser("admin", "Admin") }; var result = await controller.Index(); @@ -230,7 +230,8 @@ public void ClearMbtilesCache_RedirectsToIndex() tileCache, Mock.Of(), env.Object, - scopeFactory); + scopeFactory, + new SseService()); var httpContext = BuildHttpContextWithUser("admin", "Admin"); controller.ControllerContext = new ControllerContext { HttpContext = httpContext }; diff --git a/tests/Wayfarer.Tests/Services/TileCacheServiceTests.cs b/tests/Wayfarer.Tests/Services/TileCacheServiceTests.cs index 4d46fcc4..12f8276f 100644 --- a/tests/Wayfarer.Tests/Services/TileCacheServiceTests.cs +++ b/tests/Wayfarer.Tests/Services/TileCacheServiceTests.cs @@ -851,6 +851,203 @@ public void Dispose() { } } } + // ── Purge concurrency guard and SSE progress tests ─────────────────── + + [Fact] + public async Task PurgeAllCacheAsync_RejectsSecondConcurrentPurge() + { + using var dir = new TempDir(); + var (db, dbName) = CreateNamedDbContext(); + // Use a slow handler to keep the first purge running long enough for the race. + var handler = new SlowTileHandler(delayMs: 50); + var service1 = CreateService(db, dir.Path, handler, dbName: dbName); + + // Seed some tiles so the purge has work to do. + await service1.CacheTileAsync("http://tiles/9/1/1.png", "9", "1", "1"); + await service1.CacheTileAsync("http://tiles/9/1/2.png", "9", "1", "2"); + + // Create a second service instance sharing the same static state. + var db2 = new ApplicationDbContext( + new DbContextOptionsBuilder() + .UseInMemoryDatabase(dbName) + .ConfigureWarnings(w => w.Ignore(InMemoryEventId.TransactionIgnoredWarning)) + .Options, + new ServiceCollection().BuildServiceProvider()); + var service2 = new TileCacheService( + NullLogger.Instance, + new ConfigurationBuilder() + .AddInMemoryCollection(new Dictionary + { + ["CacheSettings:TileCacheDirectory"] = dir.Path, + ["Application:ContactEmail"] = "test@example.com" + }).Build(), + new HttpClient(new StubTileHandler()), + db2, + new StubSettingsService(), + new SingleScopeFactory(() => + new ApplicationDbContext( + new DbContextOptionsBuilder() + .UseInMemoryDatabase(dbName) + .ConfigureWarnings(w => w.Ignore(InMemoryEventId.TransactionIgnoredWarning)) + .Options, + new ServiceCollection().BuildServiceProvider())), + new HttpContextAccessor()); + + // Start the first purge. + var firstPurge = service1.PurgeAllCacheAsync(); + + // Second purge should be rejected immediately. + var ex = await Assert.ThrowsAsync( + () => service2.PurgeAllCacheAsync()); + + Assert.Contains("already in progress", ex.Message); + + // Wait for the first purge to complete. + await firstPurge; + + // After completion, the guard should be released — a new purge should succeed. + // Re-seed some data. + await service1.CacheTileAsync("http://tiles/9/2/1.png", "9", "2", "1"); + await service1.PurgeAllCacheAsync(); + } + + [Fact] + public async Task PurgeLRUCacheAsync_RejectsSecondConcurrentPurge() + { + using var dir = new TempDir(); + var (db, dbName) = CreateNamedDbContext(); + var service1 = CreateService(db, dir.Path, dbName: dbName); + + // Seed a tile so the purge has work to do. + await service1.CacheTileAsync("http://tiles/9/3/1.png", "9", "3", "1"); + + // Use a DelayingSseService to keep the first purge in flight — the "started" + // broadcast happens after the CompareExchange guard, so the delay keeps the + // guard held while the second call fires. + var delaySse = new DelayingSseService(delayMs: 200); + + // Start first purge with the delaying SSE (do not await — keep it in flight). + var firstPurge = service1.PurgeLRUCacheAsync(delaySse, "test-channel"); + + // Give the first purge a moment to hit the CompareExchange guard. + await Task.Delay(10); + + // Second purge should be rejected immediately. + var ex = await Assert.ThrowsAsync( + () => service1.PurgeLRUCacheAsync()); + Assert.Contains("already in progress", ex.Message); + + await firstPurge; + } + + [Fact] + public async Task PurgeAllCacheAsync_BroadcastsProgressViaSse() + { + using var dir = new TempDir(); + var db = CreateDbContext(); + var service = CreateService(db, dir.Path); + + // Seed tiles. + await service.CacheTileAsync("http://tiles/9/5/5.png", "9", "5", "5"); + await service.CacheTileAsync("http://tiles/9/5/6.png", "9", "5", "6"); + + var spy = new SpySseService(); + await service.PurgeAllCacheAsync(spy, "test-channel"); + + // Should have at least one progress broadcast. + Assert.NotEmpty(spy.Messages); + Assert.All(spy.Messages, m => Assert.Equal("test-channel", m.Channel)); + + // Verify the last progress event has the expected structure. + var lastPayload = spy.Messages.Last().Data; + Assert.Contains("percentComplete", lastPayload); + } + + [Fact] + public async Task PurgeLRUCacheAsync_BroadcastsProgressViaSse() + { + using var dir = new TempDir(); + var (db, dbName) = CreateNamedDbContext(); + var service = CreateService(db, dir.Path, dbName: dbName); + + // Seed a tile. + await service.CacheTileAsync("http://tiles/9/6/6.png", "9", "6", "6"); + + var spy = new SpySseService(); + await service.PurgeLRUCacheAsync(spy, "test-channel"); + + Assert.NotEmpty(spy.Messages); + Assert.All(spy.Messages, m => Assert.Equal("test-channel", m.Channel)); + } + + [Fact] + public async Task PurgeAllCacheAsync_GuardIsReleasedAfterFailure() + { + using var dir = new TempDir(); + var db = CreateDbContext(); + var service = CreateService(db, dir.Path); + + // Seed a tile then remove the directory to force an error during purge. + await service.CacheTileAsync("http://tiles/9/7/7.png", "9", "7", "7"); + + // First purge succeeds (or at least completes and releases the guard). + await service.PurgeAllCacheAsync(); + + // Should be able to purge again (guard released). + Assert.False(TileCacheService.IsPurgeInProgress); + } + + /// + /// Spy implementation of that captures broadcast calls. + /// + private sealed class SpySseService : SseService + { + public List<(string Channel, string Data)> Messages { get; } = new(); + + public override Task BroadcastAsync(string channel, string data) + { + Messages.Add((channel, data)); + return Task.CompletedTask; + } + } + + /// + /// SSE implementation that introduces a delay on each broadcast, keeping the purge + /// in-flight long enough for concurrent guard tests. The "started" broadcast happens + /// after the CompareExchange guard, so the delay holds the guard while the second + /// purge call fires. + /// + private sealed class DelayingSseService : SseService + { + private readonly int _delayMs; + public DelayingSseService(int delayMs = 200) => _delayMs = delayMs; + + public override async Task BroadcastAsync(string channel, string data) + { + await Task.Delay(_delayMs); + } + } + + /// + /// Handler that introduces a small delay to simulate slow tile fetching, + /// useful for testing concurrent purge guard timing. + /// + private sealed class SlowTileHandler : HttpMessageHandler + { + private readonly int _delayMs; + public SlowTileHandler(int delayMs = 100) => _delayMs = delayMs; + + protected override async Task SendAsync( + HttpRequestMessage request, CancellationToken cancellationToken) + { + await Task.Delay(_delayMs, cancellationToken); + return new HttpResponseMessage(HttpStatusCode.OK) + { + Content = new ByteArrayContent(new byte[] { 1, 2, 3, 4 }) + }; + } + } + private sealed class TempDir : IDisposable { public string Path { get; } = System.IO.Path.Combine(System.IO.Path.GetTempPath(), $"tiles-{Guid.NewGuid():N}"); diff --git a/wwwroot/js/Areas/Admin/Settings/Index.js b/wwwroot/js/Areas/Admin/Settings/Index.js index d805faa1..e08fb350 100644 --- a/wwwroot/js/Areas/Admin/Settings/Index.js +++ b/wwwroot/js/Areas/Admin/Settings/Index.js @@ -1,11 +1,13 @@ document.addEventListener('DOMContentLoaded', () => { document.getElementById('clearAllCache')?.addEventListener('click', (e) => { e.preventDefault(); + if (e.currentTarget.classList.contains('disabled')) return; deleteAllMapTileCache(); }); document.getElementById('clearLruCache')?.addEventListener('click', (e) => { e.preventDefault(); + if (e.currentTarget.classList.contains('disabled')) return; deleteLruCache(); }); @@ -95,6 +97,9 @@ document.addEventListener('DOMContentLoaded', () => { applyTileProviderSelection(); } + + // On page load, check if a cache purge is already in progress and reconnect SSE. + checkPurgeStatusOnLoad(); }); /** @@ -105,6 +110,205 @@ const getAntiForgeryToken = () => { return document.querySelector('input[name="__RequestVerificationToken"]')?.value || ''; }; +// ── Tile cache purge (background with SSE progress) ──────────────────── + +/** @type {EventSource|null} Active SSE connection for purge progress. */ +let purgeEventSource = null; + +/** + * Checks if a cache purge is in progress on page load and reconnects the SSE + * progress stream if so. This handles the case where the admin refreshes the + * page mid-purge. + */ +const checkPurgeStatusOnLoad = () => { + fetch('/Admin/Settings/TileCachePurgeStatus') + .then(r => r.json()) + .then(data => { + if (data.inProgress) { + showPurgeProgress(); + connectPurgeSse(); + } + }) + .catch(() => { /* status endpoint unavailable — ignore */ }); +}; + +/** + * Opens an SSE connection to receive purge progress events and updates the UI. + */ +const connectPurgeSse = () => { + if (purgeEventSource) return; // already connected + + purgeEventSource = new EventSource('/Admin/Settings/TileCachePurgeSse'); + + purgeEventSource.onmessage = (event) => { + let data; + try { + data = JSON.parse(event.data); + } catch { + return; // ignore malformed SSE payloads + } + + switch (data.eventType) { + case 'started': + showPurgeProgress(); + break; + + case 'progress': + updatePurgeProgress(data.percentComplete, data.deletedFiles, data.totalFiles); + break; + + case 'completed': + if (data.cacheStatus) updateCacheStatusDom(data.cacheStatus); + hidePurgeProgress(); + wayfarer.showAlert('success', data.message || 'Cache purge completed.'); + closePurgeSse(); + break; + + case 'failed': + hidePurgeProgress(); + wayfarer.showAlert('danger', `Cache purge failed: ${data.errorMessage || 'Unknown error'}`); + closePurgeSse(); + break; + } + }; + + purgeEventSource.onerror = () => { + // Connection lost — close and retry after a short delay. + // If the purge is still running, reconnect; otherwise hide the progress bar. + closePurgeSse(); + setTimeout(() => { + fetch('/Admin/Settings/TileCachePurgeStatus') + .then(r => r.json()) + .then(data => { + if (data.inProgress) { + connectPurgeSse(); + } else { + hidePurgeProgress(); + } + }) + .catch(() => hidePurgeProgress()); + }, 2000); + }; +}; + +/** + * Closes the active SSE connection for purge progress. + */ +const closePurgeSse = () => { + if (purgeEventSource) { + purgeEventSource.close(); + purgeEventSource = null; + } +}; + +/** + * Shows the purge progress bar and disables both cache-clear buttons. + */ +const showPurgeProgress = () => { + const progressContainer = document.getElementById('cachePurgeProgress'); + if (progressContainer) progressContainer.style.display = 'block'; + setPurgeButtonsDisabled(true); + updatePurgeProgress(0, 0, 0); +}; + +/** + * Hides the purge progress bar and re-enables the cache-clear buttons. + */ +const hidePurgeProgress = () => { + const progressContainer = document.getElementById('cachePurgeProgress'); + if (progressContainer) progressContainer.style.display = 'none'; + setPurgeButtonsDisabled(false); +}; + +/** + * Updates the progress bar width, text, and aria attributes. + * @param {number} percent - Progress percentage (0-100). + * @param {number} deleted - Number of files deleted so far. + * @param {number} total - Total files to delete. + */ +const updatePurgeProgress = (percent, deleted, total) => { + const bar = document.getElementById('cachePurgeBar'); + const text = document.getElementById('cachePurgeText'); + if (bar) { + bar.style.width = `${percent}%`; + bar.textContent = `${percent}%`; + bar.setAttribute('aria-valuenow', percent); + } + if (text) { + text.textContent = total > 0 + ? `Deleting... ${deleted} / ${total} files (${percent}%)` + : 'Starting purge...'; + } +}; + +/** + * Enables or disables both cache-clear buttons. + * @param {boolean} disabled - Whether to disable the buttons. + */ +const setPurgeButtonsDisabled = (disabled) => { + const lruBtn = document.getElementById('clearLruCache'); + const allBtn = document.getElementById('clearAllCache'); + if (lruBtn) { + lruBtn.classList.toggle('disabled', disabled); + lruBtn.setAttribute('aria-disabled', disabled); + } + if (allBtn) { + allBtn.classList.toggle('disabled', disabled); + allBtn.setAttribute('aria-disabled', disabled); + } +}; + +/** + * Updates the cache status DOM elements after a completed purge. + * @param {object} status - Cache status object from the server. + */ +const updateCacheStatusDom = (status) => { + if (!status) return; + const set = (id, val) => { const el = document.getElementById(id); if (el) el.textContent = val; }; + set('TotalCacheFiles', status.totalCacheFiles); + set('LruTotalFiles', status.lruTotalFiles); + set('TotalCacheSize', status.totalCacheSize + ' MB'); + set('TotalCacheSizeGB', status.totalCacheSizeGB + ' GB'); + set('TotalLru', status.totalLru + ' MB'); + set('TotalLruGB', status.totalLruGB + ' GB'); +}; + +/** + * Initiates a cache purge via POST. On 202 Accepted, connects SSE for progress. + * On 409 Conflict, shows a warning that a purge is already running. + * @param {string} url - The purge endpoint URL. + * @param {string} errorLabel - Human-readable label for error messages. + */ +const startCachePurge = (url, errorLabel) => { + fetch(url, { + method: 'POST', + headers: { + 'Content-Type': 'application/json', + 'RequestVerificationToken': getAntiForgeryToken() + } + }) + .then(response => { + if (response.status === 202) { + showPurgeProgress(); + connectPurgeSse(); + } else if (response.status === 409) { + response.json().then(data => { + wayfarer.showAlert('warning', data.message || 'A cache purge is already in progress.'); + }); + } else { + response.json().then(data => { + wayfarer.showAlert('danger', data?.message || `Failed to start ${errorLabel}.`); + }).catch(() => { + wayfarer.showAlert('danger', `Failed to start ${errorLabel}.`); + }); + } + }) + .catch(error => { + console.error('error:', error); + wayfarer.showAlert('danger', `Failed to start ${errorLabel}. ${error}`); + }); +}; + /** * Deletes all map tile cache from zoom level 1 to max from file system and database. */ @@ -113,33 +317,7 @@ const deleteAllMapTileCache = () => { title: "Confirm Deletion", message: "Are you sure you want to delete all map tile cache? This action cannot be undone.", confirmText: "Delete", - onConfirm: () => { - fetch("/Admin/Settings/DeleteAllMapTileCache", { - method: "POST", - headers: { - "Content-Type": "application/json", - "RequestVerificationToken": getAntiForgeryToken() - } - }) - .then(response => response.json()) - .then(data => { - if (data.success) { - document.getElementById('TotalCacheFiles').textContent = data.cacheStatus.totalCacheFiles; - document.getElementById('LruTotalFiles').textContent = data.cacheStatus.lruTotalFiles; - document.getElementById('TotalCacheSize').textContent = data.cacheStatus.totalCacheSize + ' MB'; - document.getElementById('TotalCacheSizeGB').textContent = data.cacheStatus.totalCacheSizeGB + ' GB'; - document.getElementById('TotalLru').textContent = data.cacheStatus.totalLru + ' MB'; - document.getElementById('TotalLruGB').textContent = data.cacheStatus.totalLruGB + ' GB'; - wayfarer.showAlert("success", data.message); - } else { - wayfarer.showAlert("danger", "Failed to delete map tile cache."); - } - }) - .catch(error => { - console.error("danger:", error); - wayfarer.showAlert("danger", `Failed to delete map tile cache. ${error}`); - }); - } + onConfirm: () => startCachePurge('/Admin/Settings/DeleteAllMapTileCache', 'full cache purge') }); }; @@ -151,33 +329,7 @@ const deleteLruCache = () => { title: "Confirm Deletion", message: "Are you sure you want to delete the Least Recently Used map tile cache (zoom levels >= 9)? This action cannot be undone.", confirmText: "Delete", - onConfirm: () => { - fetch("/Admin/Settings/DeleteLruCache", { - method: "POST", - headers: { - "Content-Type": "application/json", - "RequestVerificationToken": getAntiForgeryToken() - } - }) - .then(response => response.json()) - .then(data => { - if (data.success) { - document.getElementById('TotalCacheFiles').textContent = data.cacheStatus.totalCacheFiles; - document.getElementById('LruTotalFiles').textContent = data.cacheStatus.lruTotalFiles; - document.getElementById('TotalCacheSize').textContent = data.cacheStatus.totalCacheSize + ' MB'; - document.getElementById('TotalCacheSizeGB').textContent = data.cacheStatus.totalCacheSizeGB + ' GB'; - document.getElementById('TotalLru').textContent = data.cacheStatus.totalLru + ' MB'; - document.getElementById('TotalLruGB').textContent = data.cacheStatus.totalLruGB + ' GB'; - wayfarer.showAlert("success", data.message); - } else { - wayfarer.showAlert("danger", "Failed to delete Least Recently Used map tile cache."); - } - }) - .catch(error => { - console.error("danger:", error); - wayfarer.showAlert("danger", `Failed to delete Least Recently Used map tile cache. ${error}`); - }); - } + onConfirm: () => startCachePurge('/Admin/Settings/DeleteLruCache', 'LRU cache purge') }); };