diff --git a/orchestrator/CleanupStorage/CleanupStorage.cs b/orchestrator/CleanupStorage/CleanupStorage.cs new file mode 100644 index 0000000..004aaf1 --- /dev/null +++ b/orchestrator/CleanupStorage/CleanupStorage.cs @@ -0,0 +1,101 @@ +using Microsoft.EntityFrameworkCore; +using Microsoft.Extensions.Options; +using ModelScanner.Database; +using System.Diagnostics.CodeAnalysis; +using System.Text.RegularExpressions; + +namespace ModelScanner.CleanupStorage +{ + public partial class CleanupStorageJob + { + readonly CloudStorageService _cloudStorageService; + readonly ILogger _logger; + readonly CleanupStorageOptions _options; + readonly CivitaiDbContext _dbContext; + + public CleanupStorageJob(CloudStorageService cloudStorageService, CivitaiDbContext dbContext, ILogger logger, IOptions options) + { + _cloudStorageService = cloudStorageService; + _dbContext = dbContext; + _logger = logger; + _options = options.Value; + } + + public async Task PerformCleanup(CancellationToken cancellationToken) + { + var indexedDatabase = await IndexDatabase(cancellationToken); + _logger.LogInformation("Found {count} relevant files in the database", indexedDatabase.Count); + + var cutoffDate = DateTime.UtcNow.Add(-_options.CutoffInterval); + var objects = _cloudStorageService.ListObjects(cancellationToken); + + await foreach (var (path, lastModified, eTag) in objects) + { + if (lastModified >= cutoffDate) + { + _logger.LogInformation("Skipping {path} as it's not yet of age to be considered", path); + continue; + } + + if (!TryParseCloudObjectPath(path, out var userId, out var fileName)) + { + _logger.LogInformation("Skipping {path} as it was not in the expected format", path); + continue; + } + + if(indexedDatabase.Contains((userId, fileName))) + { + _logger.LogInformation("Skipping {path} as it is referred to in the database", path); + continue; + } + + _logger.LogInformation("Cleaning up {path}", path); + var staleUrl = await _cloudStorageService.SoftDeleteObject(path, eTag, cancellationToken); + + _logger.LogInformation("Moved {path} to {staleUrl}", path, staleUrl); + } + } + + [GeneratedRegex(@"^\/?(\d+)\/model\/(.+)$")] + private static partial Regex CloudPathRegex(); + + bool TryParseCloudObjectPath(string path, out int userId, [NotNullWhen(true)]out string? fileName) + { + var match = CloudPathRegex().Match(path); + + if (match.Success) + { + if (int.TryParse(match.Groups[1].ValueSpan, out userId)) + { + fileName = match.Groups[2].Value; + return true; + } + } + + userId = default; + fileName = default; + + return false; + } + + async Task> IndexDatabase(CancellationToken cancellationToken) + { + var result = new HashSet<(int userId, string fileName)>(); + + var query = _dbContext.ModelFiles + .Select(x => x.Url) + .AsAsyncEnumerable(); + + await foreach (var fileUrl in query) + { + var fileUri = new Uri(fileUrl, UriKind.Absolute); + if (TryParseCloudObjectPath(fileUri.AbsolutePath, out var userId, out var fileName)) + { + result.Add((userId, fileName)); + } + } + + return result; + } + } +} diff --git a/orchestrator/CleanupStorage/CleanupStorageOptions.cs b/orchestrator/CleanupStorage/CleanupStorageOptions.cs new file mode 100644 index 0000000..74b650a --- /dev/null +++ b/orchestrator/CleanupStorage/CleanupStorageOptions.cs @@ -0,0 +1,12 @@ +using System.ComponentModel.DataAnnotations; + +namespace ModelScanner.CleanupStorage; + +public class CleanupStorageOptions +{ + /// + /// Get or set the minimum age since last modification to be considered for cleanup + /// + [Required] + public TimeSpan CutoffInterval { get; set; } = TimeSpan.FromHours(24); +} diff --git a/orchestrator/CloudStorageService.cs b/orchestrator/CloudStorageService.cs index dd16844..cd36e28 100644 --- a/orchestrator/CloudStorageService.cs +++ b/orchestrator/CloudStorageService.cs @@ -3,6 +3,7 @@ using Amazon.S3.Model; using Microsoft.Extensions.Options; using System.ComponentModel.DataAnnotations; +using System.Runtime.CompilerServices; public class CloudStorageOptions { @@ -10,12 +11,14 @@ public class CloudStorageOptions [Required] public string SecretKey { get; set; } = default!; [Required] public string ServiceUrl { get; set; } = default!; [Required] public string UploadBucket { get; set; } = default!; + [Required] public string StaleBucket { get; set; } = default!; } public class CloudStorageService { readonly AmazonS3Client _amazonS3Client; readonly string _uploadBucket; + readonly string _staleBucket; readonly string _baseUrl; public CloudStorageService(IOptions options) @@ -25,6 +28,7 @@ public CloudStorageService(IOptions options) ServiceURL = options.Value.ServiceUrl }); _uploadBucket = options.Value.UploadBucket; + _staleBucket = options.Value.StaleBucket; _baseUrl = $"{options.Value.ServiceUrl}/{_uploadBucket}/"; } @@ -64,4 +68,43 @@ public async Task UploadFile(string filePath, string suggestedKey, Cance // Generate a url. This URL is not pre-signed. Alternative, use: return _baseUrl + key; } + + public async Task SoftDeleteObject(string path, string eTag, CancellationToken cancellationToken) + { + // First make a copy of the object in the stale bucket + var copyResponse = await _amazonS3Client.CopyObjectAsync(new CopyObjectRequest + { + SourceBucket = _uploadBucket, + DestinationBucket = _staleBucket, + SourceKey = path, + DestinationKey = path, + ETagToMatch = eTag + }, cancellationToken); + + throw new NotImplementedException(); + } + + public async IAsyncEnumerable<(string path, DateTime lastModified, string ETag)> ListObjects([EnumeratorCancellation]CancellationToken cancellationToken) + { + ListObjectsV2Response? response = null; + + do + { + cancellationToken.ThrowIfCancellationRequested(); + + response = await _amazonS3Client.ListObjectsV2Async(new ListObjectsV2Request + { + BucketName = _uploadBucket, + ContinuationToken = response?.NextContinuationToken, + }, cancellationToken); + + foreach (var s3Object in response.S3Objects) + { + cancellationToken.ThrowIfCancellationRequested(); + + yield return (s3Object.Key, s3Object.LastModified, s3Object.ETag); + } + } + while (response.IsTruncated); + } } \ No newline at end of file diff --git a/orchestrator/Database/CivitaiDbContext.cs b/orchestrator/Database/CivitaiDbContext.cs new file mode 100644 index 0000000..2a703cc --- /dev/null +++ b/orchestrator/Database/CivitaiDbContext.cs @@ -0,0 +1,23 @@ +using Microsoft.EntityFrameworkCore; +using Microsoft.EntityFrameworkCore.Metadata; +using ModelScanner.Database.Entities; + +namespace ModelScanner.Database; + +public class CivitaiDbContext : DbContext +{ + public CivitaiDbContext(DbContextOptions options) : base(options) + { + } + + public DbSet ModelFiles => Set(); + + protected override void OnModelCreating(ModelBuilder modelBuilder) + { + modelBuilder.Entity() + .HasNoKey() + .ToTable("ModelFile") + .Property(x => x.Url) + .HasColumnName("url"); + } +} diff --git a/orchestrator/Database/Entities/ModelFile.cs b/orchestrator/Database/Entities/ModelFile.cs new file mode 100644 index 0000000..cb789a0 --- /dev/null +++ b/orchestrator/Database/Entities/ModelFile.cs @@ -0,0 +1,6 @@ +namespace ModelScanner.Database.Entities; + +public class ModelFile +{ + public string Url { get; set; } = default!; +} diff --git a/orchestrator/FileProcessor.cs b/orchestrator/FileProcessor/FileProcessor.cs similarity index 97% rename from orchestrator/FileProcessor.cs rename to orchestrator/FileProcessor/FileProcessor.cs index 47d7989..f0353a2 100644 --- a/orchestrator/FileProcessor.cs +++ b/orchestrator/FileProcessor/FileProcessor.cs @@ -10,13 +10,15 @@ using System.Text.Json; using System.Text.RegularExpressions; -class FileProcessor +namespace ModelScanner.FileProcessor; + +class FileProcessorJob { - readonly ILogger _logger; + readonly ILogger _logger; readonly CloudStorageService _cloudStorageService; readonly LocalStorageOptions _localStorageOptions; - public FileProcessor(ILogger logger, CloudStorageService cloudStorageService, IOptions localStorageOptions) + public FileProcessorJob(ILogger logger, CloudStorageService cloudStorageService, IOptions localStorageOptions) { _logger = logger; _cloudStorageService = cloudStorageService; diff --git a/orchestrator/LocalStorageOptions.cs b/orchestrator/FileProcessor/LocalStorageOptions.cs similarity index 78% rename from orchestrator/LocalStorageOptions.cs rename to orchestrator/FileProcessor/LocalStorageOptions.cs index be875e6..20ebe71 100644 --- a/orchestrator/LocalStorageOptions.cs +++ b/orchestrator/FileProcessor/LocalStorageOptions.cs @@ -1,5 +1,7 @@ using System.ComponentModel.DataAnnotations; +namespace ModelScanner.FileProcessor; + class LocalStorageOptions { [Required] diff --git a/orchestrator/ScanResult.cs b/orchestrator/FileProcessor/ScanResult.cs similarity index 89% rename from orchestrator/ScanResult.cs rename to orchestrator/FileProcessor/ScanResult.cs index 86266c8..2c797d8 100644 --- a/orchestrator/ScanResult.cs +++ b/orchestrator/FileProcessor/ScanResult.cs @@ -1,4 +1,6 @@ -record ScanResult { +namespace ModelScanner.FileProcessor; + +record ScanResult { public string? Url { get; set; } public int FileExists { get; set; } public int PicklescanExitCode { get; set; } diff --git a/orchestrator/ModelScanner.csproj b/orchestrator/ModelScanner.csproj index 058b5ed..b296cf9 100644 --- a/orchestrator/ModelScanner.csproj +++ b/orchestrator/ModelScanner.csproj @@ -14,6 +14,7 @@ + diff --git a/orchestrator/Program.cs b/orchestrator/Program.cs index dc1fb87..3dd019a 100644 --- a/orchestrator/Program.cs +++ b/orchestrator/Program.cs @@ -3,7 +3,11 @@ using Hangfire.Storage.SQLite; using Microsoft.AspNetCore.Authentication; using Microsoft.AspNetCore.Authentication.Cookies; +using Microsoft.EntityFrameworkCore; using ModelScanner; +using ModelScanner.CleanupStorage; +using ModelScanner.Database; +using ModelScanner.FileProcessor; using System.Security.Claims; var builder = WebApplication.CreateBuilder(args); @@ -33,6 +37,12 @@ options.RequireAuthenticatedSignIn = false; }).AddCookie(); +builder.Services + .AddDbContext(options => + { + options.UseNpgsql(builder.Configuration.GetConnectionString("CivitaiDbContext")); + }); + var app = builder.Build(); app.UseAuthentication(); @@ -78,7 +88,7 @@ app.MapPost("/enqueue", (string fileUrl, string callbackUrl, IBackgroundJobClient backgroundJobClient) => { - backgroundJobClient.Enqueue(x => x.ProcessFile(fileUrl, callbackUrl, CancellationToken.None)); + backgroundJobClient.Enqueue(x => x.ProcessFile(fileUrl, callbackUrl, CancellationToken.None)); }); #pragma warning disable ASP0014 // Hangfire dashboard is not compatible with top level routing @@ -92,6 +102,7 @@ }); #pragma warning restore ASP0014 // Suggest using top level route registrations +RecurringJob.AddOrUpdate(x => x.PerformCleanup(CancellationToken.None), "* * * * *", queue: "cleanup-storage"); await app.RunAsync(); await app.WaitForShutdownAsync(); diff --git a/orchestrator/appsettings.json b/orchestrator/appsettings.json index 43f6712..b2a391b 100644 --- a/orchestrator/appsettings.json +++ b/orchestrator/appsettings.json @@ -17,5 +17,8 @@ }, "ValidTokens": [ "wBwmoQFpp592A0pSoYkb" - ] + ], + "ConnectionStrings": { + "CivitaiDbContext": null + } }