From 85f4f6868913dd5a6193c1c64bbf62684e759d23 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Przemys=C5=82aw=20K=C5=82ys?= Date: Sat, 18 Apr 2026 20:14:56 +0200 Subject: [PATCH 01/15] Add reusable PowerForge link service --- Docs/PowerForge.Web.Pipeline.md | 4 + Docs/PowerForge.Web.WebsiteStarter.md | 8 + PowerForge.Tests/WebCliLinksTests.cs | 427 ++++++++++ PowerForge.Tests/WebLinkServiceTests.cs | 620 ++++++++++++++ .../WebPipelineRunnerLinksTests.cs | 595 +++++++++++++ .../PowerForgeWebCliJsonContext.cs | 12 + .../WebCliCommandHandlers.Dispatch.cs | 2 + .../WebCliCommandHandlers.Links.Support.cs | 220 +++++ .../WebCliCommandHandlers.Links.cs | 644 ++++++++++++++ PowerForge.Web.Cli/WebLinkCommandSupport.cs | 363 ++++++++ .../WebPipelineRunner.Tasks.Links.cs | 504 +++++++++++ PowerForge.Web.Cli/WebPipelineRunner.Tasks.cs | 31 + PowerForge.Web/Models/LinksSpec.cs | 229 +++++ PowerForge.Web/Models/SiteSpec.cs | 2 + .../Services/WebLinkService.ApplyReview.cs | 152 ++++ .../Services/WebLinkService.ExportApache.cs | 244 ++++++ .../Services/WebLinkService.Ignore404.cs | 184 ++++ .../Services/WebLinkService.Import.cs | 364 ++++++++ .../Services/WebLinkService.Promote404.cs | 237 ++++++ .../Services/WebLinkService.Report404.cs | 467 +++++++++++ .../Services/WebLinkService.Review404.cs | 159 ++++ .../Services/WebLinkService.Types.cs | 58 ++ PowerForge.Web/Services/WebLinkService.cs | 793 ++++++++++++++++++ .../powerforge.web.pipelinespec.schema.json | 343 ++++++++ Schemas/powerforge.web.sitespec.schema.json | 61 +- 25 files changed, 6721 insertions(+), 2 deletions(-) create mode 100644 PowerForge.Tests/WebCliLinksTests.cs create mode 100644 PowerForge.Tests/WebLinkServiceTests.cs create mode 100644 PowerForge.Tests/WebPipelineRunnerLinksTests.cs create mode 100644 PowerForge.Web.Cli/WebCliCommandHandlers.Links.Support.cs create mode 100644 PowerForge.Web.Cli/WebCliCommandHandlers.Links.cs create mode 100644 PowerForge.Web.Cli/WebLinkCommandSupport.cs create mode 100644 PowerForge.Web.Cli/WebPipelineRunner.Tasks.Links.cs create mode 100644 PowerForge.Web/Models/LinksSpec.cs create mode 100644 PowerForge.Web/Services/WebLinkService.ApplyReview.cs create mode 100644 PowerForge.Web/Services/WebLinkService.ExportApache.cs create mode 100644 PowerForge.Web/Services/WebLinkService.Ignore404.cs create mode 100644 PowerForge.Web/Services/WebLinkService.Import.cs create mode 100644 PowerForge.Web/Services/WebLinkService.Promote404.cs create mode 100644 PowerForge.Web/Services/WebLinkService.Report404.cs create mode 100644 PowerForge.Web/Services/WebLinkService.Review404.cs create mode 100644 PowerForge.Web/Services/WebLinkService.Types.cs create mode 100644 PowerForge.Web/Services/WebLinkService.cs diff --git a/Docs/PowerForge.Web.Pipeline.md b/Docs/PowerForge.Web.Pipeline.md index 02d9c795..2cfd3267 100644 --- a/Docs/PowerForge.Web.Pipeline.md +++ b/Docs/PowerForge.Web.Pipeline.md @@ -113,6 +113,9 @@ Builds markdown + theme into static HTML. ``` Notes: - `clean: true` clears the output directory before building (avoids stale files). +- `language: "pl"` builds a single language slice. +- `languages: ["en","fr","de","es"]` builds a selected multi-language subset into one artifact. +- `languageAsRoot: true` only applies to single-language domain-style builds (for example `evotec.pl` as Polish at `/`). #### nav-export Exports a deterministic `site-nav.json` payload (including `surfaces` + `profiles`) from `site.json` + discovered content, without building HTML output. @@ -736,6 +739,7 @@ Applies critical CSS, minifies HTML/CSS/JS, optimizes images, and can hash asset } ``` Notes: +- `minifyHtml`, `minifyCss`, and `minifyJs` default to `false`; the `optimize` task does not minify by name alone, so CI/deploy pipelines should set them explicitly. - `config` loads `AssetPolicy` from `site.json` (rewrites, hashing defaults, cache headers). - `hashAssets` fingerprints files and rewrites references (HTML + CSS). - `cacheHeaders` writes `_headers` with cache-control rules (Netlify/Cloudflare Pages compatible). diff --git a/Docs/PowerForge.Web.WebsiteStarter.md b/Docs/PowerForge.Web.WebsiteStarter.md index 2fa1ab6f..9388cf5f 100644 --- a/Docs/PowerForge.Web.WebsiteStarter.md +++ b/Docs/PowerForge.Web.WebsiteStarter.md @@ -18,6 +18,14 @@ This is compatible with both "standalone themes" and "themes that extend a vendo - Always run with two modes: - dev: warn, summarize, stay fast - ci/release: fail on new issues, enforce budgets +- Always make the performance path explicit: + - add `optimize` to CI/release pipelines with explicit `minifyHtml`, `minifyCss`, and `minifyJs` + - choose `assetRegistry.cssStrategy` intentionally: + - `blocking` when the shell must never flash unstyled during hard navigations or reloads + - `preload` when critical CSS is solid and you want a softer non-blocking path + - `async` only when the theme's critical CSS truly covers the first paint + - prefer `Head.Links` for fonts/preconnects instead of hiding `@import` font loads inside inline theme CSS + - when a site needs first-party copies of remote fonts/CSS, prefer `AssetPolicy.Rewrites` with `SourceUrl` + `DownloadDependencies:true` - Always keep escape hatches scoped: - baselines for legacy noise - do not blanket-ignore whole categories without a written reason diff --git a/PowerForge.Tests/WebCliLinksTests.cs b/PowerForge.Tests/WebCliLinksTests.cs new file mode 100644 index 00000000..43b0ee98 --- /dev/null +++ b/PowerForge.Tests/WebCliLinksTests.cs @@ -0,0 +1,427 @@ +using System; +using System.IO; +using PowerForge.Web.Cli; +using Xunit; + +namespace PowerForge.Tests; + +public sealed class WebCliLinksTests +{ + private const int CliEnvelopeSchemaVersion = 1; + + [Fact] + public void HandleSubCommand_LinksValidate_UsesBaselineAndWritesDuplicateReport() + { + var root = Path.Combine(Path.GetTempPath(), "pf-web-cli-links-validate-" + Guid.NewGuid().ToString("N")); + Directory.CreateDirectory(root); + + try + { + var configPath = WriteSiteFixture(root, duplicateRedirects: true); + var baselinePath = Path.Combine(root, ".powerforge", "link-baseline.json"); + var duplicateReportPath = Path.Combine(root, "Build", "duplicates.csv"); + + var generateExitCode = WebCliCommandHandlers.HandleSubCommand( + "links", + new[] { "validate", "--config", configPath, "--baseline", baselinePath, "--baseline-generate" }, + outputJson: true, + logger: new WebConsoleLogger(), + outputSchemaVersion: CliEnvelopeSchemaVersion); + + Assert.Equal(0, generateExitCode); + Assert.True(File.Exists(baselinePath)); + + var validateExitCode = WebCliCommandHandlers.HandleSubCommand( + "links", + new[] { "validate", "--config", configPath, "--baseline", baselinePath, "--fail-on-new-warnings", "--duplicate-report-path", duplicateReportPath }, + outputJson: true, + logger: new WebConsoleLogger(), + outputSchemaVersion: CliEnvelopeSchemaVersion); + + Assert.Equal(0, validateExitCode); + Assert.True(File.Exists(duplicateReportPath)); + var report = File.ReadAllText(duplicateReportPath); + Assert.Contains("dedupe_generated_or_imported_row", report, StringComparison.Ordinal); + Assert.Contains("canonical", report, StringComparison.Ordinal); + Assert.Contains("duplicate", report, StringComparison.Ordinal); + } + finally + { + TryDeleteDirectory(root); + } + } + + [Fact] + public void HandleSubCommand_LinksExportApache_WritesConfiguredOutput() + { + var root = Path.Combine(Path.GetTempPath(), "pf-web-cli-links-export-" + Guid.NewGuid().ToString("N")); + Directory.CreateDirectory(root); + + try + { + var configPath = WriteSiteFixture(root, duplicateRedirects: false); + + var exitCode = WebCliCommandHandlers.HandleSubCommand( + "links", + new[] { "export-apache", "--config", configPath, "--include-404" }, + outputJson: true, + logger: new WebConsoleLogger(), + outputSchemaVersion: CliEnvelopeSchemaVersion); + + Assert.Equal(0, exitCode); + + var outputPath = Path.Combine(root, "deploy", "apache", "link-service-redirects.conf"); + Assert.True(File.Exists(outputPath)); + var apache = File.ReadAllText(outputPath); + Assert.Contains("ErrorDocument 404 /404.html", apache, StringComparison.Ordinal); + Assert.Contains("RewriteRule ^old/?$ /new/ [R=301,L,QSD]", apache, StringComparison.Ordinal); + Assert.Contains("RewriteRule ^discord/?$ https://discord.gg/example [R=302,L,QSD]", apache, StringComparison.Ordinal); + } + finally + { + TryDeleteDirectory(root); + } + } + + [Fact] + public void HandleSubCommand_LinksImportWordPress_ImportsPrettyLinksCsv() + { + var root = Path.Combine(Path.GetTempPath(), "pf-web-cli-links-import-" + Guid.NewGuid().ToString("N")); + Directory.CreateDirectory(root); + + try + { + var configPath = WriteSiteFixture(root, duplicateRedirects: false); + var importPath = Path.Combine(root, "pretty-links.csv"); + File.WriteAllText(importPath, + """ + id,name,slug,url,clicks,created_at + 10,Teams,teams,https://teams.example.test,42,2024-01-02T03:04:05Z + """); + + var exitCode = WebCliCommandHandlers.HandleSubCommand( + "links", + new[] { "import-wordpress", "--config", configPath, "--source", importPath, "--owner", "evotec", "--tag", "imported" }, + outputJson: true, + logger: new WebConsoleLogger(), + outputSchemaVersion: CliEnvelopeSchemaVersion); + + Assert.Equal(0, exitCode); + + var shortlinksPath = Path.Combine(root, "data", "links", "shortlinks.json"); + var json = File.ReadAllText(shortlinksPath); + Assert.Contains("\"slug\": \"teams\"", json, StringComparison.Ordinal); + Assert.Contains("\"host\": \"evo.yt\"", json, StringComparison.Ordinal); + Assert.Contains("\"targetUrl\": \"https://teams.example.test\"", json, StringComparison.Ordinal); + Assert.Contains("\"importedHits\": 42", json, StringComparison.Ordinal); + Assert.Contains("\"source\": \"imported-pretty-links\"", json, StringComparison.Ordinal); + } + finally + { + TryDeleteDirectory(root); + } + } + + [Fact] + public void HandleSubCommand_LinksReport404_WritesSuggestionReport() + { + var root = Path.Combine(Path.GetTempPath(), "pf-web-cli-links-404-" + Guid.NewGuid().ToString("N")); + Directory.CreateDirectory(root); + + try + { + Directory.CreateDirectory(Path.Combine(root, "_site", "docs", "install")); + File.WriteAllText(Path.Combine(root, "_site", "docs", "install", "index.html"), "install"); + var logPath = Path.Combine(root, "access.log"); + File.WriteAllText(logPath, "127.0.0.1 - - [01/Jan/2026:00:00:00 +0000] \"GET /docs/instal HTTP/1.1\" 404 123 \"-\" \"test\""); + var reportPath = Path.Combine(root, "Build", "404-suggestions.json"); + var reviewCsvPath = Path.Combine(root, "Build", "404-suggestions.csv"); + + var exitCode = WebCliCommandHandlers.HandleSubCommand( + "links", + new[] { "report-404", "--site-root", Path.Combine(root, "_site"), "--source", logPath, "--out", reportPath, "--review-csv", reviewCsvPath }, + outputJson: true, + logger: new WebConsoleLogger(), + outputSchemaVersion: CliEnvelopeSchemaVersion); + + Assert.Equal(0, exitCode); + Assert.True(File.Exists(reportPath)); + var json = File.ReadAllText(reportPath); + Assert.Contains("\"path\":\"/docs/instal\"", json, StringComparison.Ordinal); + Assert.Contains("\"targetPath\":\"/docs/install/\"", json, StringComparison.Ordinal); + var csv = File.ReadAllText(reviewCsvPath); + Assert.Contains("review_redirect_candidate", csv, StringComparison.Ordinal); + Assert.Contains("/docs/install/", csv, StringComparison.Ordinal); + } + finally + { + TryDeleteDirectory(root); + } + } + + [Fact] + public void HandleSubCommand_LinksReview404_WritesReviewArtifacts() + { + var root = Path.Combine(Path.GetTempPath(), "pf-web-cli-links-review-404-" + Guid.NewGuid().ToString("N")); + Directory.CreateDirectory(root); + + try + { + var configPath = WriteSiteFixture(root, duplicateRedirects: false); + Directory.CreateDirectory(Path.Combine(root, "_site", "docs", "install")); + File.WriteAllText(Path.Combine(root, "_site", "docs", "install", "index.html"), "install"); + var logPath = Path.Combine(root, "access.log"); + File.WriteAllText(logPath, "127.0.0.1 - - [01/Jan/2026:00:00:00 +0000] \"GET /docs/instal HTTP/1.1\" 404 123 \"-\" \"test\""); + + var exitCode = WebCliCommandHandlers.HandleSubCommand( + "links", + new[] { "review-404", "--config", configPath, "--site-root", Path.Combine(root, "_site"), "--source", logPath }, + outputJson: true, + logger: new WebConsoleLogger(), + outputSchemaVersion: CliEnvelopeSchemaVersion); + + Assert.Equal(0, exitCode); + var reportsPath = Path.Combine(root, "Build", "link-reports"); + Assert.True(File.Exists(Path.Combine(reportsPath, "404-suggestions.json"))); + Assert.True(File.Exists(Path.Combine(reportsPath, "404-suggestions.csv"))); + Assert.True(File.Exists(Path.Combine(reportsPath, "404-promoted-candidates.json"))); + Assert.True(File.Exists(Path.Combine(reportsPath, "404-promoted-candidates.csv"))); + Assert.True(File.Exists(Path.Combine(reportsPath, "ignored-404-candidates.json"))); + Assert.True(File.Exists(Path.Combine(reportsPath, "ignored-404-candidates.csv"))); + Assert.Contains("\"sourcePath\": \"/docs/instal\"", File.ReadAllText(Path.Combine(reportsPath, "404-promoted-candidates.json")), StringComparison.Ordinal); + } + finally + { + TryDeleteDirectory(root); + } + } + + [Fact] + public void HandleSubCommand_LinksPromote404_WritesRedirectCandidates() + { + var root = Path.Combine(Path.GetTempPath(), "pf-web-cli-links-promote-404-" + Guid.NewGuid().ToString("N")); + Directory.CreateDirectory(root); + + try + { + var reportPath = Path.Combine(root, "404-suggestions.json"); + File.WriteAllText(reportPath, + """ + { + "suggestions": [ + { + "path": "/docs/instal", + "host": "evotec.xyz", + "count": 2, + "suggestions": [ + { "targetPath": "/docs/install/", "score": 0.91 } + ] + } + ] + } + """); + var redirectsPath = Path.Combine(root, "data", "links", "redirects.json"); + var reviewCsvPath = Path.Combine(root, "Build", "promoted-redirects.csv"); + + var exitCode = WebCliCommandHandlers.HandleSubCommand( + "links", + new[] { "promote-404", "--source", reportPath, "--out", redirectsPath, "--review-csv", reviewCsvPath }, + outputJson: true, + logger: new WebConsoleLogger(), + outputSchemaVersion: CliEnvelopeSchemaVersion); + + Assert.Equal(0, exitCode); + var json = File.ReadAllText(redirectsPath); + Assert.Contains("\"sourcePath\": \"/docs/instal\"", json, StringComparison.Ordinal); + Assert.Contains("\"targetUrl\": \"/docs/install/\"", json, StringComparison.Ordinal); + Assert.Contains("\"enabled\": false", json, StringComparison.Ordinal); + var csv = File.ReadAllText(reviewCsvPath); + Assert.Contains("/docs/instal", csv, StringComparison.Ordinal); + Assert.Contains("/docs/install/", csv, StringComparison.Ordinal); + } + finally + { + TryDeleteDirectory(root); + } + } + + [Fact] + public void HandleSubCommand_LinksIgnore404_WritesIgnoredRules() + { + var root = Path.Combine(Path.GetTempPath(), "pf-web-cli-links-ignore-404-" + Guid.NewGuid().ToString("N")); + Directory.CreateDirectory(root); + + try + { + var reportPath = Path.Combine(root, "404-suggestions.json"); + File.WriteAllText(reportPath, + """ + { + "suggestions": [ + { + "path": "/wp-login.php", + "count": 4, + "suggestions": [] + } + ] + } + """); + var ignoredPath = Path.Combine(root, "data", "links", "ignored-404.json"); + var reviewCsvPath = Path.Combine(root, "Build", "ignored-404.csv"); + + var exitCode = WebCliCommandHandlers.HandleSubCommand( + "links", + new[] { "ignore-404", "--source", reportPath, "--out", ignoredPath, "--path", "/wp-login.php", "--reason", "scanner noise", "--review-csv", reviewCsvPath }, + outputJson: true, + logger: new WebConsoleLogger(), + outputSchemaVersion: CliEnvelopeSchemaVersion); + + Assert.Equal(0, exitCode); + var json = File.ReadAllText(ignoredPath); + Assert.Contains("\"path\": \"/wp-login.php\"", json, StringComparison.Ordinal); + Assert.Contains("\"reason\": \"scanner noise\"", json, StringComparison.Ordinal); + var csv = File.ReadAllText(reviewCsvPath); + Assert.Contains("/wp-login.php", csv, StringComparison.Ordinal); + Assert.Contains("scanner noise", csv, StringComparison.Ordinal); + } + finally + { + TryDeleteDirectory(root); + } + } + + [Fact] + public void HandleSubCommand_LinksApplyReview_AppliesCandidateFiles() + { + var root = Path.Combine(Path.GetTempPath(), "pf-web-cli-links-apply-review-" + Guid.NewGuid().ToString("N")); + Directory.CreateDirectory(root); + + try + { + var configPath = WriteSiteFixture(root, duplicateRedirects: false); + var reportsPath = Path.Combine(root, "Build", "link-reports"); + Directory.CreateDirectory(reportsPath); + File.WriteAllText(Path.Combine(reportsPath, "404-promoted-candidates.json"), + """ + { + "redirects": [ + { + "id": "reviewed", + "sourcePath": "/docs/instal", + "targetUrl": "/docs/install/", + "enabled": false, + "source": "404-promoted" + } + ] + } + """); + File.WriteAllText(Path.Combine(reportsPath, "ignored-404-candidates.json"), + """ + { + "ignored404": [ + { + "path": "/wp-login.php", + "reason": "scanner noise" + } + ] + } + """); + File.WriteAllText(Path.Combine(root, "data", "links", "ignored-404.json"), "{ \"ignored404\": [] }"); + var summaryPath = Path.Combine(reportsPath, "apply-summary.json"); + + var exitCode = WebCliCommandHandlers.HandleSubCommand( + "links", + new[] { "apply-review", "--config", configPath, "--all", "--ignored-404", Path.Combine(root, "data", "links", "ignored-404.json"), "--summary-path", summaryPath }, + outputJson: true, + logger: new WebConsoleLogger(), + outputSchemaVersion: CliEnvelopeSchemaVersion); + + Assert.Equal(0, exitCode); + Assert.True(File.Exists(summaryPath)); + Assert.Contains("\"sourcePath\": \"/docs/instal\"", File.ReadAllText(Path.Combine(root, "data", "links", "redirects.json")), StringComparison.Ordinal); + Assert.Contains("\"path\": \"/wp-login.php\"", File.ReadAllText(Path.Combine(root, "data", "links", "ignored-404.json")), StringComparison.Ordinal); + } + finally + { + TryDeleteDirectory(root); + } + } + + private static string WriteSiteFixture(string root, bool duplicateRedirects) + { + Directory.CreateDirectory(Path.Combine(root, "data", "links")); + File.WriteAllText(Path.Combine(root, "site.json"), + """ + { + "name": "Links CLI Test", + "baseUrl": "https://evotec.xyz", + "collections": [], + "links": { + "redirects": "./data/links/redirects.json", + "shortlinks": "./data/links/shortlinks.json", + "hosts": { + "short": "evo.yt" + }, + "apacheOut": "./deploy/apache/link-service-redirects.conf" + } + } + """); + + var redirectsJson = duplicateRedirects + ? """ + [ + { + "id": "canonical", + "sourcePath": "/old/", + "targetUrl": "/new/", + "status": 301 + }, + { + "id": "duplicate", + "sourcePath": "/old", + "targetUrl": "/new", + "status": 301 + } + ] + """ + : """ + [ + { + "id": "canonical", + "sourcePath": "/old/", + "targetUrl": "/new/", + "status": 301 + } + ] + """; + File.WriteAllText(Path.Combine(root, "data", "links", "redirects.json"), redirectsJson); + File.WriteAllText(Path.Combine(root, "data", "links", "shortlinks.json"), + """ + [ + { + "slug": "discord", + "host": "evo.yt", + "targetUrl": "https://discord.gg/example", + "status": 302, + "owner": "evotec", + "allowExternal": true + } + ] + """); + + return Path.Combine(root, "site.json"); + } + + private static void TryDeleteDirectory(string path) + { + try + { + if (Directory.Exists(path)) + Directory.Delete(path, true); + } + catch + { + // ignore cleanup failures in tests + } + } +} diff --git a/PowerForge.Tests/WebLinkServiceTests.cs b/PowerForge.Tests/WebLinkServiceTests.cs new file mode 100644 index 00000000..01ed1679 --- /dev/null +++ b/PowerForge.Tests/WebLinkServiceTests.cs @@ -0,0 +1,620 @@ +using System; +using System.IO; +using System.Linq; +using System.Text.Json; +using PowerForge.Web; +using Xunit; + +namespace PowerForge.Tests; + +public sealed class WebLinkServiceTests +{ + [Fact] + public void Validate_DetectsDuplicateRedirectsAndExternalTargets() + { + var dataSet = new WebLinkDataSet + { + Redirects = new[] + { + new LinkRedirectRule + { + Id = "first", + SourcePath = "/old/", + TargetUrl = "/new/", + Status = 301 + }, + new LinkRedirectRule + { + Id = "second", + SourcePath = "/old", + TargetUrl = "https://example.com/new", + Status = 301 + } + } + }; + + var result = WebLinkService.Validate(dataSet); + + Assert.False(result.Success); + Assert.Contains(result.Issues, issue => issue.Code == "PFLINK.REDIRECT.DUPLICATE"); + Assert.Contains(result.Issues, issue => issue.Code == "PFLINK.REDIRECT.TARGET_EXTERNAL"); + } + + [Fact] + public void ExportApache_EmitsHostScopedRedirectsAndShortlinks() + { + var root = Path.Combine(Path.GetTempPath(), "pf-web-links-export-" + Guid.NewGuid().ToString("N")); + Directory.CreateDirectory(root); + + try + { + var outPath = Path.Combine(root, "links.conf"); + var dataSet = new WebLinkDataSet + { + Redirects = new[] + { + new LinkRedirectRule + { + Id = "legacy", + SourceHost = "evotec.pl", + SourcePath = "/stary/", + TargetUrl = "/nowy/", + Status = 301 + }, + new LinkRedirectRule + { + Id = "old-post-id", + SourcePath = "/", + SourceQuery = "p=123", + MatchType = LinkRedirectMatchType.Query, + TargetUrl = "/blog/current/", + Status = 301 + } + }, + Shortlinks = new[] + { + new LinkShortlinkRule + { + Slug = "discord", + Host = "evo.yt", + TargetUrl = "https://discord.gg/example", + Status = 302, + Owner = "evotec", + AllowExternal = true + } + } + }; + + var result = WebLinkService.ExportApache(dataSet, new WebLinkApacheExportOptions + { + OutputPath = outPath, + IncludeErrorDocument404 = true, + Hosts = new System.Collections.Generic.Dictionary(StringComparer.OrdinalIgnoreCase) + { + ["short"] = "evo.yt" + } + }); + + Assert.Equal(3, result.RuleCount); + var apache = File.ReadAllText(outPath); + Assert.Contains("ErrorDocument 404 /404.html", apache, StringComparison.Ordinal); + Assert.Contains("RewriteCond %{HTTP_HOST} ^(.+\\.)?evotec\\.pl$ [NC]", apache, StringComparison.Ordinal); + Assert.Contains("RewriteRule ^stary/?$ /nowy/ [R=301,L,QSD]", apache, StringComparison.Ordinal); + Assert.Contains("RewriteCond %{QUERY_STRING} (^|&)p=123(&|$)", apache, StringComparison.Ordinal); + Assert.Contains("RewriteCond %{HTTP_HOST} ^(.+\\.)?evo\\.yt$ [NC]", apache, StringComparison.Ordinal); + Assert.Contains("RewriteRule ^discord/?$ https://discord.gg/example [R=302,L,QSD]", apache, StringComparison.Ordinal); + } + finally + { + TryDeleteDirectory(root); + } + } + + [Fact] + public void ImportPrettyLinks_MergesExistingShortlinksAndPreservesImportedHits() + { + var root = Path.Combine(Path.GetTempPath(), "pf-web-links-import-" + Guid.NewGuid().ToString("N")); + Directory.CreateDirectory(root); + + try + { + var csvPath = Path.Combine(root, "pretty-links.csv"); + var outPath = Path.Combine(root, "shortlinks.json"); + File.WriteAllText(csvPath, + """ + id,name,slug,url,clicks + 7,Discord,discord,https://discord.gg/example,42 + 8,Docs,/go/docs,https://docs.example.test,12 + """); + File.WriteAllText(outPath, + """ + { + "shortlinks": [ + { + "slug": "manual", + "targetUrl": "https://example.test", + "owner": "evotec", + "allowExternal": true + } + ] + } + """); + + var result = WebLinkService.ImportPrettyLinks(new WebLinkShortlinkImportOptions + { + SourcePath = csvPath, + OutputPath = outPath, + Host = "evo.yt", + PathPrefix = "/go", + Owner = "evotec", + Tags = new[] { "imported" } + }); + + Assert.Equal(1, result.ExistingCount); + Assert.Equal(2, result.ImportedCount); + Assert.Equal(3, result.WrittenCount); + + var loaded = WebLinkService.Load(new WebLinkLoadOptions + { + ShortlinksPath = outPath + }); + var discord = Assert.Single(loaded.Shortlinks, item => item.Slug == "discord"); + Assert.Equal("evo.yt", discord.Host); + Assert.Equal("/go", discord.PathPrefix); + Assert.Equal(42, discord.ImportedHits); + Assert.Equal("imported-pretty-links", discord.Source); + Assert.Contains("imported", discord.Tags); + } + finally + { + TryDeleteDirectory(root); + } + } + + [Fact] + public void Generate404Report_SuggestsGeneratedRoutesFromApacheLog() + { + var root = Path.Combine(Path.GetTempPath(), "pf-web-links-404-" + Guid.NewGuid().ToString("N")); + Directory.CreateDirectory(root); + + try + { + Directory.CreateDirectory(Path.Combine(root, "_site", "docs", "install")); + File.WriteAllText(Path.Combine(root, "_site", "docs", "install", "index.html"), "install"); + File.WriteAllText(Path.Combine(root, "_site", "404.html"), "404"); + var logPath = Path.Combine(root, "access.log"); + File.WriteAllText(logPath, + """ + 127.0.0.1 - - [01/Jan/2026:00:00:00 +0000] "GET /docs/instal HTTP/1.1" 404 123 "-" "test" + 127.0.0.1 - - [01/Jan/2026:00:00:01 +0000] "GET /assets/missing.png HTTP/1.1" 404 123 "-" "test" + """); + + var result = WebLinkService.Generate404Report(new WebLink404ReportOptions + { + SiteRoot = Path.Combine(root, "_site"), + SourcePath = logPath + }); + + Assert.Equal(1, result.ObservationCount); + var suggestion = Assert.Single(result.Suggestions); + Assert.Equal("/docs/instal", suggestion.Path); + Assert.Contains(suggestion.Suggestions, item => item.TargetPath == "/docs/install/"); + } + finally + { + TryDeleteDirectory(root); + } + } + + [Fact] + public void Generate404Report_HonorsIgnored404Rules() + { + var root = Path.Combine(Path.GetTempPath(), "pf-web-links-404-ignore-filter-" + Guid.NewGuid().ToString("N")); + Directory.CreateDirectory(root); + + try + { + Directory.CreateDirectory(Path.Combine(root, "_site", "docs", "install")); + File.WriteAllText(Path.Combine(root, "_site", "docs", "install", "index.html"), "install"); + var logPath = Path.Combine(root, "access.log"); + File.WriteAllText(logPath, + """ + 127.0.0.1 - - [01/Jan/2026:00:00:00 +0000] "GET /docs/instal HTTP/1.1" 404 123 "-" "test" + 127.0.0.1 - - [01/Jan/2026:00:00:01 +0000] "GET /scanner/probe HTTP/1.1" 404 123 "-" "test" + """); + var ignoredPath = Path.Combine(root, "ignored-404.json"); + File.WriteAllText(ignoredPath, + """ + { + "ignored404": [ + { "path": "/scanner/*", "reason": "scanner noise" } + ] + } + """); + + var result = WebLinkService.Generate404Report(new WebLink404ReportOptions + { + SiteRoot = Path.Combine(root, "_site"), + SourcePath = logPath, + Ignored404Path = ignoredPath + }); + + Assert.Equal(1, result.ObservationCount); + Assert.Equal(1, result.IgnoredObservationCount); + Assert.DoesNotContain(result.Suggestions, item => item.Path == "/scanner/probe"); + } + finally + { + TryDeleteDirectory(root); + } + } + + [Fact] + public void Generate404Report_AllowsMissingSourceWhenConfigured() + { + var root = Path.Combine(Path.GetTempPath(), "pf-web-links-404-missing-source-" + Guid.NewGuid().ToString("N")); + Directory.CreateDirectory(root); + + try + { + Directory.CreateDirectory(Path.Combine(root, "_site")); + File.WriteAllText(Path.Combine(root, "_site", "index.html"), "home"); + + var result = WebLinkService.Generate404Report(new WebLink404ReportOptions + { + SiteRoot = Path.Combine(root, "_site"), + SourcePath = Path.Combine(root, "missing.log"), + AllowMissingSource = true + }); + + Assert.Equal(0, result.ObservationCount); + Assert.Equal(0, result.SuggestedObservationCount); + } + finally + { + TryDeleteDirectory(root); + } + } + + [Fact] + public void Promote404Suggestions_WritesDisabledRedirectCandidates() + { + var root = Path.Combine(Path.GetTempPath(), "pf-web-links-promote-404-" + Guid.NewGuid().ToString("N")); + Directory.CreateDirectory(root); + + try + { + var reportPath = Path.Combine(root, "404-suggestions.json"); + var redirectsPath = Path.Combine(root, "data", "links", "redirects.json"); + var report = new WebLink404ReportResult + { + SourcePath = Path.Combine(root, "access.log"), + Suggestions = new[] + { + new WebLink404Suggestion + { + Path = "/docs/instal", + Host = "evotec.xyz", + Count = 4, + Suggestions = new[] + { + new WebLink404RouteSuggestion + { + TargetPath = "/docs/install/", + Score = 0.9d + } + } + } + } + }; + File.WriteAllText(reportPath, JsonSerializer.Serialize(report)); + + var result = WebLinkService.Promote404Suggestions(new WebLink404PromoteOptions + { + SourcePath = reportPath, + OutputPath = redirectsPath + }); + + Assert.Equal(1, result.CandidateCount); + Assert.Equal(1, result.WrittenCount); + + var loaded = WebLinkService.Load(new WebLinkLoadOptions + { + RedirectsPath = redirectsPath + }); + var redirect = Assert.Single(loaded.Redirects); + Assert.False(redirect.Enabled); + Assert.Equal("evotec.xyz", redirect.SourceHost); + Assert.Equal("/docs/instal", redirect.SourcePath); + Assert.Equal("/docs/install/", redirect.TargetUrl); + Assert.Equal("404-promoted", redirect.Source); + } + finally + { + TryDeleteDirectory(root); + } + } + + [Fact] + public void Ignore404Suggestions_WritesIgnoredRulesForSelectedPaths() + { + var root = Path.Combine(Path.GetTempPath(), "pf-web-links-ignore-404-" + Guid.NewGuid().ToString("N")); + Directory.CreateDirectory(root); + + try + { + var reportPath = Path.Combine(root, "404-suggestions.json"); + var ignoredPath = Path.Combine(root, "data", "links", "ignored-404.json"); + var report = new WebLink404ReportResult + { + Suggestions = new[] + { + new WebLink404Suggestion + { + Path = "/wp-login.php", + Count = 5 + }, + new WebLink404Suggestion + { + Path = "/docs/instal", + Count = 2, + Suggestions = new[] + { + new WebLink404RouteSuggestion { TargetPath = "/docs/install/", Score = 0.9d } + } + } + } + }; + File.WriteAllText(reportPath, JsonSerializer.Serialize(report)); + + var result = WebLinkService.Ignore404Suggestions(new WebLink404IgnoreOptions + { + SourcePath = reportPath, + OutputPath = ignoredPath, + Paths = new[] { "/wp-login.php" }, + Reason = "scanner noise", + CreatedBy = "tests" + }); + + Assert.Equal(1, result.CandidateCount); + Assert.Equal(1, result.WrittenCount); + + var json = File.ReadAllText(ignoredPath); + Assert.Contains("\"path\": \"/wp-login.php\"", json, StringComparison.Ordinal); + Assert.Contains("\"reason\": \"scanner noise\"", json, StringComparison.Ordinal); + Assert.DoesNotContain("/docs/instal", json, StringComparison.Ordinal); + } + finally + { + TryDeleteDirectory(root); + } + } + + [Fact] + public void ApplyReviewCandidates_MergesReviewedCandidatesWithoutPowerShell() + { + var root = Path.Combine(Path.GetTempPath(), "pf-web-links-apply-review-" + Guid.NewGuid().ToString("N")); + Directory.CreateDirectory(root); + + try + { + var redirectCandidatesPath = Path.Combine(root, "Build", "link-reports", "404-promoted-candidates.json"); + var ignoredCandidatesPath = Path.Combine(root, "Build", "link-reports", "ignored-404-candidates.json"); + var redirectsPath = Path.Combine(root, "data", "links", "redirects.json"); + var ignoredPath = Path.Combine(root, "data", "links", "ignored-404.json"); + Directory.CreateDirectory(Path.GetDirectoryName(redirectCandidatesPath)!); + Directory.CreateDirectory(Path.GetDirectoryName(redirectsPath)!); + + File.WriteAllText(redirectCandidatesPath, + """ + { + "redirects": [ + { + "id": "reviewed", + "sourcePath": "/docs/instal", + "targetUrl": "/docs/install/", + "enabled": false, + "source": "404-promoted" + } + ] + } + """); + File.WriteAllText(ignoredCandidatesPath, + """ + { + "ignored404": [ + { + "path": "/wp-login.php", + "reason": "scanner noise" + } + ] + } + """); + File.WriteAllText(redirectsPath, "{ \"redirects\": [] }"); + File.WriteAllText(ignoredPath, "{ \"ignored404\": [] }"); + + var result = WebLinkService.ApplyReviewCandidates(new WebLinkReviewApplyOptions + { + ApplyRedirects = true, + ApplyIgnored404 = true, + RedirectCandidatesPath = redirectCandidatesPath, + RedirectsPath = redirectsPath, + Ignored404CandidatesPath = ignoredCandidatesPath, + Ignored404Path = ignoredPath + }); + + Assert.False(result.DryRun); + Assert.NotNull(result.Redirects); + Assert.NotNull(result.Ignored404); + Assert.Equal(1, result.Redirects.CandidateCount); + Assert.Equal(1, result.Redirects.WrittenCount); + Assert.Equal(1, result.Ignored404.CandidateCount); + Assert.Equal(1, result.Ignored404.WrittenCount); + + var redirectJson = File.ReadAllText(redirectsPath); + var ignoredJson = File.ReadAllText(ignoredPath); + Assert.Contains("\"sourcePath\": \"/docs/instal\"", redirectJson, StringComparison.Ordinal); + Assert.Contains("\"enabled\": false", redirectJson, StringComparison.Ordinal); + Assert.Contains("\"path\": \"/wp-login.php\"", ignoredJson, StringComparison.Ordinal); + + var duplicateResult = WebLinkService.ApplyReviewCandidates(new WebLinkReviewApplyOptions + { + ApplyRedirects = true, + ApplyIgnored404 = true, + RedirectCandidatesPath = redirectCandidatesPath, + RedirectsPath = redirectsPath, + Ignored404CandidatesPath = ignoredCandidatesPath, + Ignored404Path = ignoredPath + }); + + Assert.Equal(1, duplicateResult.Redirects!.SkippedDuplicateCount); + Assert.Equal(1, duplicateResult.Ignored404!.SkippedDuplicateCount); + } + finally + { + TryDeleteDirectory(root); + } + } + + [Fact] + public void Validate_LanguageRootHostTreatsPrefixedAndRootTargetsAsSame() + { + var dataSet = new WebLinkDataSet + { + LanguageRootHosts = new System.Collections.Generic.Dictionary(StringComparer.OrdinalIgnoreCase) + { + ["evotec.pl"] = "pl" + }, + Redirects = new[] + { + new LinkRedirectRule + { + Id = "localized", + SourceHost = "evotec.pl", + SourcePath = "/stary/", + TargetUrl = "/pl/blog/current/", + Status = 301 + }, + new LinkRedirectRule + { + Id = "rooted", + SourceHost = "evotec.pl", + SourcePath = "/stary", + TargetUrl = "https://evotec.pl/blog/current/", + Status = 301, + AllowExternal = true + } + } + }; + + var result = WebLinkService.Validate(dataSet); + + Assert.True(result.Success); + Assert.Contains(result.Issues, issue => issue.Code == "PFLINK.REDIRECT.DUPLICATE_SAME_TARGET"); + Assert.DoesNotContain(result.Issues, issue => issue.Code == "PFLINK.REDIRECT.DUPLICATE"); + } + + [Fact] + public void ExportApache_StripsLanguagePrefixForLanguageRootHosts() + { + var root = Path.Combine(Path.GetTempPath(), "pf-web-links-export-language-root-" + Guid.NewGuid().ToString("N")); + Directory.CreateDirectory(root); + + try + { + var outPath = Path.Combine(root, "links.conf"); + var dataSet = new WebLinkDataSet + { + Redirects = new[] + { + new LinkRedirectRule + { + Id = "legacy-pl", + SourceHost = "evotec.pl", + SourcePath = "/stary/", + TargetUrl = "/pl/blog/current/", + Status = 301 + } + } + }; + + WebLinkService.ExportApache(dataSet, new WebLinkApacheExportOptions + { + OutputPath = outPath, + LanguageRootHosts = new System.Collections.Generic.Dictionary(StringComparer.OrdinalIgnoreCase) + { + ["evotec.pl"] = "pl" + } + }); + + var apache = File.ReadAllText(outPath); + Assert.Contains("RewriteRule ^stary/?$ /blog/current/ [R=301,L,QSD]", apache, StringComparison.Ordinal); + Assert.DoesNotContain("/pl/blog/current/", apache, StringComparison.Ordinal); + } + finally + { + TryDeleteDirectory(root); + } + } + + [Fact] + public void Load_ReadsJsonAndCompatibilityCsv() + { + var root = Path.Combine(Path.GetTempPath(), "pf-web-links-load-" + Guid.NewGuid().ToString("N")); + Directory.CreateDirectory(root); + + try + { + var redirectsPath = Path.Combine(root, "redirects.json"); + var csvPath = Path.Combine(root, "legacy.csv"); + File.WriteAllText(redirectsPath, + """ + { + "redirects": [ + { + "id": "manual", + "sourcePath": "/manual/", + "targetUrl": "/target/", + "status": 301 + } + ] + } + """); + File.WriteAllText(csvPath, + """ + legacy_url,target_url,status,language + /?page_id=40,/contact/,301,pl + """); + + var dataSet = WebLinkService.Load(new WebLinkLoadOptions + { + RedirectsPath = redirectsPath, + RedirectCsvPaths = new[] { csvPath }, + Hosts = new System.Collections.Generic.Dictionary(StringComparer.OrdinalIgnoreCase) + { + ["pl"] = "evotec.pl" + } + }); + + Assert.Equal(2, dataSet.Redirects.Length); + Assert.Equal("evotec.pl", dataSet.Redirects.Single(rule => rule.SourceQuery == "page_id=40").SourceHost); + } + finally + { + TryDeleteDirectory(root); + } + } + + private static void TryDeleteDirectory(string path) + { + try + { + if (Directory.Exists(path)) + Directory.Delete(path, true); + } + catch + { + // best-effort cleanup + } + } +} diff --git a/PowerForge.Tests/WebPipelineRunnerLinksTests.cs b/PowerForge.Tests/WebPipelineRunnerLinksTests.cs new file mode 100644 index 00000000..71d618f8 --- /dev/null +++ b/PowerForge.Tests/WebPipelineRunnerLinksTests.cs @@ -0,0 +1,595 @@ +using System; +using System.IO; +using System.Text.Json; +using PowerForge.Web.Cli; +using Xunit; + +namespace PowerForge.Tests; + +public sealed class WebPipelineRunnerLinksTests +{ + [Fact] + public void RunPipeline_LinksExportApache_UsesSiteLinksConfig() + { + var root = Path.Combine(Path.GetTempPath(), "pf-web-pipeline-links-" + Guid.NewGuid().ToString("N")); + Directory.CreateDirectory(root); + + try + { + Directory.CreateDirectory(Path.Combine(root, "data", "links")); + File.WriteAllText(Path.Combine(root, "site.json"), + """ + { + "name": "Links Test", + "baseUrl": "https://evotec.xyz", + "collections": [], + "links": { + "redirects": "./data/links/redirects.json", + "shortlinks": "./data/links/shortlinks.json", + "hosts": { + "short": "evo.yt" + }, + "apacheOut": "./deploy/apache/link-service-redirects.conf" + } + } + """); + File.WriteAllText(Path.Combine(root, "data", "links", "redirects.json"), + """ + [ + { + "id": "legacy", + "sourcePath": "/old/", + "targetUrl": "/new/", + "status": 301 + } + ] + """); + File.WriteAllText(Path.Combine(root, "data", "links", "shortlinks.json"), + """ + [ + { + "slug": "discord", + "host": "evo.yt", + "targetUrl": "https://discord.gg/example", + "status": 302, + "owner": "evotec", + "allowExternal": true + } + ] + """); + + var pipelinePath = Path.Combine(root, "pipeline.json"); + File.WriteAllText(pipelinePath, + """ + { + "steps": [ + { + "task": "links-export-apache", + "config": "./site.json", + "includeErrorDocument404": true, + "summaryPath": "./Build/links-summary.json" + } + ] + } + """); + + var result = WebPipelineRunner.RunPipeline(pipelinePath, logger: null); + + Assert.True(result.Success); + Assert.Single(result.Steps); + Assert.True(result.Steps[0].Success); + Assert.Contains("links-export-apache ok", result.Steps[0].Message, StringComparison.OrdinalIgnoreCase); + + var outputPath = Path.Combine(root, "deploy", "apache", "link-service-redirects.conf"); + Assert.True(File.Exists(outputPath)); + var apache = File.ReadAllText(outputPath); + Assert.Contains("RewriteRule ^old/?$ /new/ [R=301,L,QSD]", apache, StringComparison.Ordinal); + Assert.Contains("RewriteRule ^discord/?$ https://discord.gg/example [R=302,L,QSD]", apache, StringComparison.Ordinal); + + using var summary = JsonDocument.Parse(File.ReadAllText(Path.Combine(root, "Build", "links-summary.json"))); + Assert.Equal(1, summary.RootElement.GetProperty("redirects").GetInt32()); + Assert.Equal(1, summary.RootElement.GetProperty("shortlinks").GetInt32()); + } + finally + { + TryDeleteDirectory(root); + } + } + + [Fact] + public void RunPipeline_LinksValidate_FailsOnUnsafeExternalTarget() + { + var root = Path.Combine(Path.GetTempPath(), "pf-web-pipeline-links-fail-" + Guid.NewGuid().ToString("N")); + Directory.CreateDirectory(root); + + try + { + Directory.CreateDirectory(Path.Combine(root, "data", "links")); + File.WriteAllText(Path.Combine(root, "data", "links", "redirects.json"), + """ + [ + { + "id": "unsafe", + "sourcePath": "/old/", + "targetUrl": "https://example.com/new", + "status": 301 + } + ] + """); + + var pipelinePath = Path.Combine(root, "pipeline.json"); + File.WriteAllText(pipelinePath, + """ + { + "steps": [ + { + "task": "links-validate", + "redirects": "./data/links/redirects.json", + "summaryPath": "./Build/links-summary.json" + } + ] + } + """); + + var result = WebPipelineRunner.RunPipeline(pipelinePath, logger: null); + + Assert.False(result.Success); + Assert.Single(result.Steps); + Assert.False(result.Steps[0].Success); + Assert.Contains("failed", result.Steps[0].Message, StringComparison.OrdinalIgnoreCase); + } + finally + { + TryDeleteDirectory(root); + } + } + + [Fact] + public void RunPipeline_LinksValidate_WritesDuplicateReviewReport() + { + var root = Path.Combine(Path.GetTempPath(), "pf-web-pipeline-links-duplicates-" + Guid.NewGuid().ToString("N")); + Directory.CreateDirectory(root); + + try + { + Directory.CreateDirectory(Path.Combine(root, "data", "links")); + File.WriteAllText(Path.Combine(root, "data", "links", "redirects.json"), + """ + [ + { + "id": "canonical", + "sourcePath": "/old/", + "targetUrl": "/new/", + "status": 301 + }, + { + "id": "duplicate", + "sourcePath": "/old", + "targetUrl": "/new", + "status": 301 + } + ] + """); + + var pipelinePath = Path.Combine(root, "pipeline.json"); + File.WriteAllText(pipelinePath, + """ + { + "steps": [ + { + "task": "links-validate", + "redirects": "./data/links/redirects.json", + "summaryPath": "./Build/links-summary.json", + "duplicateReportPath": "./Build/duplicates.csv" + } + ] + } + """); + + var result = WebPipelineRunner.RunPipeline(pipelinePath, logger: null); + + Assert.True(result.Success); + var reportPath = Path.Combine(root, "Build", "duplicates.csv"); + Assert.True(File.Exists(reportPath)); + var report = File.ReadAllText(reportPath); + Assert.Contains("suggested_action", report, StringComparison.Ordinal); + Assert.Contains("dedupe_generated_or_imported_row", report, StringComparison.Ordinal); + Assert.Contains("canonical", report, StringComparison.Ordinal); + Assert.Contains("duplicate", report, StringComparison.Ordinal); + } + finally + { + TryDeleteDirectory(root); + } + } + + [Fact] + public void RunPipeline_LinksValidate_FailOnNewWarningsUsesBaseline() + { + var root = Path.Combine(Path.GetTempPath(), "pf-web-pipeline-links-baseline-" + Guid.NewGuid().ToString("N")); + Directory.CreateDirectory(root); + + try + { + Directory.CreateDirectory(Path.Combine(root, "data", "links")); + var redirectsPath = Path.Combine(root, "data", "links", "redirects.json"); + File.WriteAllText(redirectsPath, + """ + [ + { + "id": "canonical", + "sourcePath": "/old/", + "targetUrl": "/new/", + "status": 301 + }, + { + "id": "duplicate", + "sourcePath": "/old", + "targetUrl": "/new", + "status": 301 + } + ] + """); + + var baselinePipelinePath = Path.Combine(root, "pipeline-baseline.json"); + File.WriteAllText(baselinePipelinePath, + """ + { + "steps": [ + { + "task": "links-validate", + "redirects": "./data/links/redirects.json", + "baseline": "./.powerforge/link-baseline.json", + "baselineGenerate": true, + "summaryPath": "./Build/links-summary.json" + } + ] + } + """); + + var baselineResult = WebPipelineRunner.RunPipeline(baselinePipelinePath, logger: null); + Assert.True(baselineResult.Success); + Assert.True(File.Exists(Path.Combine(root, ".powerforge", "link-baseline.json"))); + + var failOnNewPipelinePath = Path.Combine(root, "pipeline-fail-on-new.json"); + File.WriteAllText(failOnNewPipelinePath, + """ + { + "steps": [ + { + "task": "links-validate", + "redirects": "./data/links/redirects.json", + "baseline": "./.powerforge/link-baseline.json", + "failOnNewWarnings": true, + "summaryPath": "./Build/links-summary.json" + } + ] + } + """); + + var existingResult = WebPipelineRunner.RunPipeline(failOnNewPipelinePath, logger: null); + Assert.True(existingResult.Success); + Assert.Contains("newWarnings=0", existingResult.Steps[0].Message, StringComparison.OrdinalIgnoreCase); + + File.WriteAllText(redirectsPath, + """ + [ + { + "id": "canonical", + "sourcePath": "/old/", + "targetUrl": "/new/", + "status": 301 + }, + { + "id": "duplicate", + "sourcePath": "/old", + "targetUrl": "/new", + "status": 301 + }, + { + "id": "second-canonical", + "sourcePath": "/legacy/", + "targetUrl": "/modern/", + "status": 301 + }, + { + "id": "second-duplicate", + "sourcePath": "/legacy", + "targetUrl": "/modern", + "status": 301 + } + ] + """); + + var newWarningResult = WebPipelineRunner.RunPipeline(failOnNewPipelinePath, logger: null); + Assert.False(newWarningResult.Success); + Assert.Contains("newWarnings=1", newWarningResult.Steps[0].Message, StringComparison.OrdinalIgnoreCase); + } + finally + { + TryDeleteDirectory(root); + } + } + + [Fact] + public void RunPipeline_LinksImportWordPress_WritesShortlinksAndSummary() + { + var root = Path.Combine(Path.GetTempPath(), "pf-web-pipeline-links-import-" + Guid.NewGuid().ToString("N")); + Directory.CreateDirectory(root); + + try + { + Directory.CreateDirectory(Path.Combine(root, "data", "links", "imports")); + File.WriteAllText(Path.Combine(root, "site.json"), + """ + { + "name": "Links Import Test", + "baseUrl": "https://evotec.xyz", + "collections": [], + "links": { + "shortlinks": "./data/links/shortlinks.json", + "hosts": { + "short": "evo.yt" + } + } + } + """); + File.WriteAllText(Path.Combine(root, "data", "links", "imports", "pretty-links.csv"), + """ + id,name,slug,url,clicks + 10,Teams,teams,https://teams.example.test,42 + """); + + var pipelinePath = Path.Combine(root, "pipeline.json"); + File.WriteAllText(pipelinePath, + """ + { + "steps": [ + { + "task": "links-import-wordpress", + "config": "./site.json", + "source": "./data/links/imports/pretty-links.csv", + "owner": "evotec", + "tags": [ "imported" ], + "summaryPath": "./Build/import-links-summary.json" + } + ] + } + """); + + var result = WebPipelineRunner.RunPipeline(pipelinePath, logger: null); + + Assert.True(result.Success); + Assert.Contains("links-import-wordpress ok", result.Steps[0].Message, StringComparison.OrdinalIgnoreCase); + + var shortlinksPath = Path.Combine(root, "data", "links", "shortlinks.json"); + Assert.True(File.Exists(shortlinksPath)); + var json = File.ReadAllText(shortlinksPath); + Assert.Contains("\"slug\": \"teams\"", json, StringComparison.Ordinal); + Assert.Contains("\"host\": \"evo.yt\"", json, StringComparison.Ordinal); + Assert.Contains("\"importedHits\": 42", json, StringComparison.Ordinal); + + using var summary = JsonDocument.Parse(File.ReadAllText(Path.Combine(root, "Build", "import-links-summary.json"))); + Assert.Equal(1, summary.RootElement.GetProperty("importedCount").GetInt32()); + Assert.Equal(1, summary.RootElement.GetProperty("writtenCount").GetInt32()); + } + finally + { + TryDeleteDirectory(root); + } + } + + [Fact] + public void RunPipeline_LinksReport404_WritesSuggestionReport() + { + var root = Path.Combine(Path.GetTempPath(), "pf-web-pipeline-links-404-" + Guid.NewGuid().ToString("N")); + Directory.CreateDirectory(root); + + try + { + Directory.CreateDirectory(Path.Combine(root, "_site", "docs", "install")); + File.WriteAllText(Path.Combine(root, "_site", "docs", "install", "index.html"), "install"); + File.WriteAllText(Path.Combine(root, "access.log"), "127.0.0.1 - - [01/Jan/2026:00:00:00 +0000] \"GET /docs/instal HTTP/1.1\" 404 123 \"-\" \"test\""); + + var pipelinePath = Path.Combine(root, "pipeline.json"); + File.WriteAllText(pipelinePath, + """ + { + "steps": [ + { + "task": "links-report-404", + "siteRoot": "./_site", + "source": "./access.log", + "out": "./Build/404-suggestions.json", + "reviewCsv": "./Build/404-suggestions.csv" + } + ] + } + """); + + var result = WebPipelineRunner.RunPipeline(pipelinePath, logger: null); + + Assert.True(result.Success); + Assert.Contains("links-report-404 ok", result.Steps[0].Message, StringComparison.OrdinalIgnoreCase); + var json = File.ReadAllText(Path.Combine(root, "Build", "404-suggestions.json")); + Assert.Contains("\"targetPath\": \"/docs/install/\"", json, StringComparison.Ordinal); + var csv = File.ReadAllText(Path.Combine(root, "Build", "404-suggestions.csv")); + Assert.Contains("review_redirect_candidate", csv, StringComparison.Ordinal); + } + finally + { + TryDeleteDirectory(root); + } + } + + [Fact] + public void RunPipeline_LinksReport404_AllowsMissingSource() + { + var root = Path.Combine(Path.GetTempPath(), "pf-web-pipeline-links-404-missing-source-" + Guid.NewGuid().ToString("N")); + Directory.CreateDirectory(root); + + try + { + Directory.CreateDirectory(Path.Combine(root, "_site")); + File.WriteAllText(Path.Combine(root, "_site", "index.html"), "home"); + + var pipelinePath = Path.Combine(root, "pipeline.json"); + File.WriteAllText(pipelinePath, + """ + { + "steps": [ + { + "task": "links-report-404", + "siteRoot": "./_site", + "source": "./missing.log", + "out": "./Build/404-suggestions.json", + "reviewCsv": "./Build/404-suggestions.csv", + "allowMissingSource": true + } + ] + } + """); + + var result = WebPipelineRunner.RunPipeline(pipelinePath, logger: null); + + Assert.True(result.Success); + Assert.Contains("observations=0", result.Steps[0].Message, StringComparison.OrdinalIgnoreCase); + Assert.True(File.Exists(Path.Combine(root, "Build", "404-suggestions.json"))); + Assert.True(File.Exists(Path.Combine(root, "Build", "404-suggestions.csv"))); + } + finally + { + TryDeleteDirectory(root); + } + } + + [Fact] + public void RunPipeline_LinksPromote404_WritesRedirectCandidates() + { + var root = Path.Combine(Path.GetTempPath(), "pf-web-pipeline-links-promote-404-" + Guid.NewGuid().ToString("N")); + Directory.CreateDirectory(root); + + try + { + File.WriteAllText(Path.Combine(root, "404-suggestions.json"), + """ + { + "suggestions": [ + { + "path": "/docs/instal", + "host": "evotec.xyz", + "count": 2, + "suggestions": [ + { "targetPath": "/docs/install/", "score": 0.91 } + ] + } + ] + } + """); + + var pipelinePath = Path.Combine(root, "pipeline.json"); + File.WriteAllText(pipelinePath, + """ + { + "steps": [ + { + "task": "links-promote-404", + "source": "./404-suggestions.json", + "out": "./data/links/redirects.json", + "reviewCsv": "./Build/promoted-redirects.csv", + "summaryPath": "./Build/promote-404-summary.json" + } + ] + } + """); + + var result = WebPipelineRunner.RunPipeline(pipelinePath, logger: null); + + Assert.True(result.Success); + Assert.Contains("links-promote-404 ok", result.Steps[0].Message, StringComparison.OrdinalIgnoreCase); + var json = File.ReadAllText(Path.Combine(root, "data", "links", "redirects.json")); + Assert.Contains("\"sourcePath\": \"/docs/instal\"", json, StringComparison.Ordinal); + var csv = File.ReadAllText(Path.Combine(root, "Build", "promoted-redirects.csv")); + Assert.Contains("/docs/install/", csv, StringComparison.Ordinal); + + using var summary = JsonDocument.Parse(File.ReadAllText(Path.Combine(root, "Build", "promote-404-summary.json"))); + Assert.Equal(1, summary.RootElement.GetProperty("candidateCount").GetInt32()); + Assert.Equal(1, summary.RootElement.GetProperty("writtenCount").GetInt32()); + } + finally + { + TryDeleteDirectory(root); + } + } + + [Fact] + public void RunPipeline_LinksIgnore404_WritesIgnoredRules() + { + var root = Path.Combine(Path.GetTempPath(), "pf-web-pipeline-links-ignore-404-" + Guid.NewGuid().ToString("N")); + Directory.CreateDirectory(root); + + try + { + File.WriteAllText(Path.Combine(root, "404-suggestions.json"), + """ + { + "suggestions": [ + { + "path": "/wp-login.php", + "count": 4, + "suggestions": [] + } + ] + } + """); + + var pipelinePath = Path.Combine(root, "pipeline.json"); + File.WriteAllText(pipelinePath, + """ + { + "steps": [ + { + "task": "links-ignore-404", + "source": "./404-suggestions.json", + "out": "./data/links/ignored-404.json", + "paths": ["/wp-login.php"], + "reason": "scanner noise", + "reviewCsv": "./Build/ignored-404.csv", + "summaryPath": "./Build/ignore-404-summary.json" + } + ] + } + """); + + var result = WebPipelineRunner.RunPipeline(pipelinePath, logger: null); + + Assert.True(result.Success); + Assert.Contains("links-ignore-404 ok", result.Steps[0].Message, StringComparison.OrdinalIgnoreCase); + var json = File.ReadAllText(Path.Combine(root, "data", "links", "ignored-404.json")); + Assert.Contains("\"path\": \"/wp-login.php\"", json, StringComparison.Ordinal); + var csv = File.ReadAllText(Path.Combine(root, "Build", "ignored-404.csv")); + Assert.Contains("scanner noise", csv, StringComparison.Ordinal); + + using var summary = JsonDocument.Parse(File.ReadAllText(Path.Combine(root, "Build", "ignore-404-summary.json"))); + Assert.Equal(1, summary.RootElement.GetProperty("candidateCount").GetInt32()); + Assert.Equal(1, summary.RootElement.GetProperty("writtenCount").GetInt32()); + } + finally + { + TryDeleteDirectory(root); + } + } + + private static void TryDeleteDirectory(string path) + { + try + { + if (Directory.Exists(path)) + Directory.Delete(path, true); + } + catch + { + // best-effort cleanup + } + } +} diff --git a/PowerForge.Web.Cli/PowerForgeWebCliJsonContext.cs b/PowerForge.Web.Cli/PowerForgeWebCliJsonContext.cs index 8907e9f5..c7fd48f7 100644 --- a/PowerForge.Web.Cli/PowerForgeWebCliJsonContext.cs +++ b/PowerForge.Web.Cli/PowerForgeWebCliJsonContext.cs @@ -46,6 +46,18 @@ namespace PowerForge.Web.Cli; [JsonSerializable(typeof(WebDotNetBuildResult))] [JsonSerializable(typeof(WebDotNetPublishResult))] [JsonSerializable(typeof(WebStaticOverlayResult))] +[JsonSerializable(typeof(LinkServiceSpec))] +[JsonSerializable(typeof(LinkRedirectRule[]))] +[JsonSerializable(typeof(LinkShortlinkRule[]))] +[JsonSerializable(typeof(LinkValidationIssue[]))] +[JsonSerializable(typeof(LinkValidationResult))] +[JsonSerializable(typeof(WebLinkApacheExportResult))] +[JsonSerializable(typeof(WebLinkShortlinkImportResult))] +[JsonSerializable(typeof(WebLink404ReportResult))] +[JsonSerializable(typeof(WebLink404PromoteResult))] +[JsonSerializable(typeof(WebLink404IgnoreResult))] +[JsonSerializable(typeof(WebLink404ReviewResult))] +[JsonSerializable(typeof(WebLinkReviewApplyResult))] internal partial class PowerForgeWebCliJsonContext : JsonSerializerContext { } diff --git a/PowerForge.Web.Cli/WebCliCommandHandlers.Dispatch.cs b/PowerForge.Web.Cli/WebCliCommandHandlers.Dispatch.cs index d5a32c3a..ff7abb37 100644 --- a/PowerForge.Web.Cli/WebCliCommandHandlers.Dispatch.cs +++ b/PowerForge.Web.Cli/WebCliCommandHandlers.Dispatch.cs @@ -38,6 +38,8 @@ internal static int HandleSubCommand(string subCommand, string[] subArgs, bool o "ecosystem-stats" => HandleEcosystemStats(subArgs, outputJson, logger, outputSchemaVersion), "optimize" => HandleOptimize(subArgs, outputJson, logger, outputSchemaVersion), "pipeline" => HandlePipeline(subArgs, outputJson, logger, outputSchemaVersion), + "links" => HandleLinks(subArgs, outputJson, logger, outputSchemaVersion), + "link" => HandleLinks(subArgs, outputJson, logger, outputSchemaVersion), "website-runner" => HandleWebsiteRunner(subArgs, outputJson, logger, outputSchemaVersion), "website_runner" => HandleWebsiteRunner(subArgs, outputJson, logger, outputSchemaVersion), "websiterunner" => HandleWebsiteRunner(subArgs, outputJson, logger, outputSchemaVersion), diff --git a/PowerForge.Web.Cli/WebCliCommandHandlers.Links.Support.cs b/PowerForge.Web.Cli/WebCliCommandHandlers.Links.Support.cs new file mode 100644 index 00000000..12d0a4ca --- /dev/null +++ b/PowerForge.Web.Cli/WebCliCommandHandlers.Links.Support.cs @@ -0,0 +1,220 @@ +using System; +using System.Collections.Generic; +using System.IO; +using System.Linq; +using System.Text.Json; +using PowerForge.Web; +using static PowerForge.Web.Cli.WebCliHelpers; + +namespace PowerForge.Web.Cli; + +internal static partial class WebCliCommandHandlers +{ + private static void WriteLinksApplyReviewSummary(string? summaryPath, WebLinkReviewApplyResult result) + { + if (string.IsNullOrWhiteSpace(summaryPath)) + return; + + var summaryDirectory = Path.GetDirectoryName(summaryPath); + if (!string.IsNullOrWhiteSpace(summaryDirectory)) + Directory.CreateDirectory(summaryDirectory); + + File.WriteAllText(summaryPath, JsonSerializer.Serialize(result, WebCliJson.Context.WebLinkReviewApplyResult)); + } + + private static void WriteLinksReview404Summary(string? summaryPath, WebLink404ReviewResult result) + { + if (string.IsNullOrWhiteSpace(summaryPath)) + return; + + var summaryDirectory = Path.GetDirectoryName(summaryPath); + if (!string.IsNullOrWhiteSpace(summaryDirectory)) + Directory.CreateDirectory(summaryDirectory); + + File.WriteAllText(summaryPath, JsonSerializer.Serialize(result, WebCliJson.Context.WebLink404ReviewResult)); + } + + private static int CompleteLinksValidation( + string command, + bool outputJson, + WebConsoleLogger logger, + int outputSchemaVersion, + string? configPath, + LinkValidationResult validation, + bool success, + string message, + string? reportPath, + string? duplicateReportPath) + { + var exitCode = success ? 0 : 1; + if (outputJson) + { + WebCliJsonWriter.Write(new WebCliJsonEnvelope + { + SchemaVersion = outputSchemaVersion, + Command = command, + Success = success, + ExitCode = exitCode, + Error = success ? null : message, + ConfigPath = configPath, + Result = WebCliJson.SerializeToElement(validation, WebCliJson.Context.LinkValidationResult) + }); + return exitCode; + } + + if (success) + logger.Success(message); + else + logger.Error(message); + if (!string.IsNullOrWhiteSpace(reportPath)) + logger.Info($"Report: {reportPath}"); + if (!string.IsNullOrWhiteSpace(duplicateReportPath)) + logger.Info($"Duplicate report: {duplicateReportPath}"); + return exitCode; + } + + private static WebLinksCommandConfig LoadLinksSpecForCommand(string[] args, string command, bool outputJson, WebConsoleLogger logger) + { + var configPath = TryGetOptionValue(args, "--config"); + if (string.IsNullOrWhiteSpace(configPath)) + { + return new WebLinksCommandConfig + { + BaseDir = Directory.GetCurrentDirectory() + }; + } + + var fullConfigPath = ResolveExistingFilePath(configPath); + var (siteSpec, siteSpecPath) = WebSiteSpecLoader.LoadWithPath(fullConfigPath, WebCliJson.Options); + return new WebLinksCommandConfig + { + Spec = siteSpec.Links, + ConfigPath = siteSpecPath, + BaseDir = Path.GetDirectoryName(siteSpecPath) ?? Directory.GetCurrentDirectory(), + HasConfig = true + }; + } + + private static WebLinkLoadOptions BuildLinkLoadOptions(string[] args, LinkServiceSpec? links, string baseDir) + { + var redirectsPath = ResolvePathForLinks(baseDir, + TryGetOptionValue(args, "--redirects") ?? + TryGetOptionValue(args, "--redirects-path") ?? + TryGetOptionValue(args, "--redirectsPath"), + links?.Redirects); + + var shortlinksPath = ResolvePathForLinks(baseDir, + TryGetOptionValue(args, "--shortlinks") ?? + TryGetOptionValue(args, "--shortlinks-path") ?? + TryGetOptionValue(args, "--shortlinksPath"), + links?.Shortlinks); + + var csvSources = ReadOptionList(args, + "--source", + "--sources", + "--redirect-csv", + "--redirect-csv-path", + "--redirect-csv-paths", + "--csv-source", + "--csv-sources"); + var csvPaths = csvSources.Count > 0 + ? csvSources.Select(value => ResolvePathRelative(baseDir, value)) + : (links?.RedirectCsvPaths ?? Array.Empty()).Select(value => ResolvePathRelative(baseDir, value)); + + return new WebLinkLoadOptions + { + RedirectsPath = redirectsPath, + ShortlinksPath = shortlinksPath, + RedirectCsvPaths = csvPaths + .Where(static value => !string.IsNullOrWhiteSpace(value)) + .Distinct(StringComparer.OrdinalIgnoreCase) + .ToArray(), + Hosts = BuildLinkHostMap(args, links), + LanguageRootHosts = BuildLinkLanguageRootHostMap(args, links) + }; + } + + private static IReadOnlyDictionary BuildLinkHostMap(string[] args, LinkServiceSpec? links) + { + var hosts = new Dictionary(StringComparer.OrdinalIgnoreCase); + if (links?.Hosts is not null) + { + foreach (var pair in links.Hosts) + { + if (!string.IsNullOrWhiteSpace(pair.Key) && !string.IsNullOrWhiteSpace(pair.Value)) + hosts[pair.Key.Trim()] = pair.Value.Trim(); + } + } + + AddMapEntries(hosts, ReadOptionList(args, "--host", "--hosts", "--host-map", "--hostMap"), trimValueSlashes: false); + return hosts; + } + + private static IReadOnlyDictionary BuildLinkLanguageRootHostMap(string[] args, LinkServiceSpec? links) + { + var hosts = new Dictionary(StringComparer.OrdinalIgnoreCase); + if (links?.LanguageRootHosts is not null) + { + foreach (var pair in links.LanguageRootHosts) + { + if (!string.IsNullOrWhiteSpace(pair.Key) && !string.IsNullOrWhiteSpace(pair.Value)) + hosts[pair.Key.Trim()] = pair.Value.Trim().Trim('/'); + } + } + + AddMapEntries(hosts, ReadOptionList(args, "--language-root-host", "--language-root-hosts", "--languageRootHost", "--languageRootHosts"), trimValueSlashes: true); + return hosts; + } + + private static void AddMapEntries(Dictionary target, IEnumerable entries, bool trimValueSlashes) + { + foreach (var entry in entries) + { + var separator = entry.IndexOf('='); + if (separator < 0) + separator = entry.IndexOf(':'); + if (separator <= 0 || separator >= entry.Length - 1) + continue; + + var key = entry[..separator].Trim(); + var value = entry[(separator + 1)..].Trim(); + if (trimValueSlashes) + value = value.Trim('/'); + if (!string.IsNullOrWhiteSpace(key) && !string.IsNullOrWhiteSpace(value)) + target[key] = value; + } + } + + private static bool HasDirectLinkSources(string[] args) + => !string.IsNullOrWhiteSpace(TryGetOptionValue(args, "--redirects")) || + !string.IsNullOrWhiteSpace(TryGetOptionValue(args, "--redirects-path")) || + !string.IsNullOrWhiteSpace(TryGetOptionValue(args, "--shortlinks")) || + !string.IsNullOrWhiteSpace(TryGetOptionValue(args, "--shortlinks-path")) || + ReadOptionList(args, "--source", "--sources", "--redirect-csv", "--redirect-csv-path", "--redirect-csv-paths", "--csv-source", "--csv-sources").Count > 0; + + private static string? ResolvePathForLinks(string baseDir, string? directValue, string? configValue) + { + if (!string.IsNullOrWhiteSpace(directValue)) + return ResolvePathRelative(baseDir, directValue); + return string.IsNullOrWhiteSpace(configValue) ? null : ResolvePathRelative(baseDir, configValue); + } + + private static string? ResolveOptionalPath(string baseDir, string? value) + => string.IsNullOrWhiteSpace(value) ? null : ResolvePathRelative(baseDir, value); + + private static void WriteLinks404Report(string reportPath, WebLink404ReportResult result) + { + var directory = Path.GetDirectoryName(reportPath); + if (!string.IsNullOrWhiteSpace(directory)) + Directory.CreateDirectory(directory); + File.WriteAllText(reportPath, JsonSerializer.Serialize(result, WebCliJson.Context.WebLink404ReportResult)); + } + + private sealed class WebLinksCommandConfig + { + public LinkServiceSpec? Spec { get; init; } + public string? ConfigPath { get; init; } + public string BaseDir { get; init; } = Directory.GetCurrentDirectory(); + public bool HasConfig { get; init; } + } +} diff --git a/PowerForge.Web.Cli/WebCliCommandHandlers.Links.cs b/PowerForge.Web.Cli/WebCliCommandHandlers.Links.cs new file mode 100644 index 00000000..fa2cb1b0 --- /dev/null +++ b/PowerForge.Web.Cli/WebCliCommandHandlers.Links.cs @@ -0,0 +1,644 @@ +using System; +using System.Collections.Generic; +using System.IO; +using System.Linq; +using PowerForge.Web; +using static PowerForge.Web.Cli.WebCliHelpers; + +namespace PowerForge.Web.Cli; + +internal static partial class WebCliCommandHandlers +{ + private static int HandleLinks(string[] subArgs, bool outputJson, WebConsoleLogger logger, int outputSchemaVersion) + { + if (subArgs.Length == 0) + return Fail("Missing links subcommand. Use validate or export-apache.", outputJson, logger, "web.links"); + + var action = subArgs[0].Trim().ToLowerInvariant(); + var args = subArgs.Skip(1).ToArray(); + + return action switch + { + "validate" or "check" => HandleLinksValidate(args, outputJson, logger, outputSchemaVersion), + "export-apache" or "export" or "apache" => HandleLinksExportApache(args, outputJson, logger, outputSchemaVersion), + "import-wordpress" or "import-pretty-links" or "import-prettylinks" or "import" => HandleLinksImportWordPress(args, outputJson, logger, outputSchemaVersion), + "review-404" or "404-review" or "review404" => HandleLinksReview404(args, outputJson, logger, outputSchemaVersion), + "report-404" or "404-report" or "report404" => HandleLinksReport404(args, outputJson, logger, outputSchemaVersion), + "promote-404" or "404-promote" or "promote404" => HandleLinksPromote404(args, outputJson, logger, outputSchemaVersion), + "ignore-404" or "404-ignore" or "ignore404" => HandleLinksIgnore404(args, outputJson, logger, outputSchemaVersion), + "apply-review" or "apply-candidates" or "review-apply" => HandleLinksApplyReview(args, outputJson, logger, outputSchemaVersion), + _ => Fail($"Unsupported links subcommand '{subArgs[0]}'. Use validate, export-apache, import-wordpress, review-404, report-404, promote-404, ignore-404, or apply-review.", outputJson, logger, "web.links") + }; + } + + private static int HandleLinksValidate(string[] args, bool outputJson, WebConsoleLogger logger, int outputSchemaVersion) + { + var command = "web.links.validate"; + var loaded = LoadLinksSpecForCommand(args, command, outputJson, logger); + if (!loaded.HasConfig && !HasDirectLinkSources(args)) + return Fail("Specify --config or at least one link source (--redirects, --shortlinks, or --source).", outputJson, logger, command); + + var baseDir = loaded.BaseDir; + var linkOptions = BuildLinkLoadOptions(args, loaded.Spec, baseDir); + var dataSet = WebLinkService.Load(linkOptions); + var strict = HasOption(args, "--strict"); + if (strict && dataSet.UsedSources.Length == 0) + return Fail("links validate strict mode failed: no link source files were found.", outputJson, logger, command); + + var validation = WebLinkService.Validate(dataSet); + var failOnWarnings = HasOption(args, "--fail-on-warnings") || HasOption(args, "--failOnWarnings"); + var failOnNewWarnings = HasOption(args, "--fail-on-new-warnings") || HasOption(args, "--failOnNewWarnings") || HasOption(args, "--fail-on-new"); + var baselineGenerate = HasOption(args, "--baseline-generate") || HasOption(args, "--baselineGenerate"); + var baselineUpdate = HasOption(args, "--baseline-update") || HasOption(args, "--baselineUpdate"); + var baselinePath = TryGetOptionValue(args, "--baseline") ?? + TryGetOptionValue(args, "--baseline-path") ?? + TryGetOptionValue(args, "--baselinePath"); + + var baseline = WebLinkCommandSupport.EvaluateBaseline(baseDir, baselinePath, validation, baselineGenerate, baselineUpdate, failOnNewWarnings); + var failOnNewWarningsActive = failOnNewWarnings && !baselineGenerate && !baselineUpdate; + var success = validation.ErrorCount == 0 && + (!failOnWarnings || validation.WarningCount == 0) && + (!failOnNewWarningsActive || (baseline.Loaded && baseline.NewWarnings.Length == 0)); + if (baseline.ShouldWrite) + baseline.WrittenPath = WebVerifyBaselineStore.Write(baseDir, baseline.Path, baseline.CurrentWarningKeys, baseline.Merge, logger); + + var summaryPath = ResolveOptionalPath(baseDir, TryGetOptionValue(args, "--summary-path") ?? TryGetOptionValue(args, "--summaryPath")); + var reportPath = ResolveOptionalPath(baseDir, TryGetOptionValue(args, "--report-path") ?? TryGetOptionValue(args, "--reportPath")); + var duplicateReportPath = ResolveOptionalPath(baseDir, TryGetOptionValue(args, "--duplicate-report-path") ?? TryGetOptionValue(args, "--duplicateReportPath")); + WebLinkCommandSupport.WriteSummary(summaryPath, "validate", dataSet, validation, success, export: null, baseline); + WebLinkCommandSupport.WriteIssueReport(reportPath, validation); + WebLinkCommandSupport.WriteDuplicateReport(duplicateReportPath, validation); + + var message = success + ? WebLinkCommandSupport.BuildValidateSuccessMessage(validation, baseline) + : WebLinkCommandSupport.BuildValidateFailureMessage(validation, baseline, failOnNewWarningsActive); + + return CompleteLinksValidation(command, outputJson, logger, outputSchemaVersion, loaded.ConfigPath, validation, success, message, reportPath, duplicateReportPath); + } + + private static int HandleLinksExportApache(string[] args, bool outputJson, WebConsoleLogger logger, int outputSchemaVersion) + { + var command = "web.links.export-apache"; + var loaded = LoadLinksSpecForCommand(args, command, outputJson, logger); + if (!loaded.HasConfig && !HasDirectLinkSources(args)) + return Fail("Specify --config or at least one link source (--redirects, --shortlinks, or --source).", outputJson, logger, command); + + var baseDir = loaded.BaseDir; + var linkOptions = BuildLinkLoadOptions(args, loaded.Spec, baseDir); + var dataSet = WebLinkService.Load(linkOptions); + var strict = HasOption(args, "--strict"); + if (strict && dataSet.UsedSources.Length == 0) + return Fail("links export-apache strict mode failed: no link source files were found.", outputJson, logger, command); + + var validation = WebLinkService.Validate(dataSet); + var summaryPath = ResolveOptionalPath(baseDir, TryGetOptionValue(args, "--summary-path") ?? TryGetOptionValue(args, "--summaryPath")); + var reportPath = ResolveOptionalPath(baseDir, TryGetOptionValue(args, "--report-path") ?? TryGetOptionValue(args, "--reportPath")); + var duplicateReportPath = ResolveOptionalPath(baseDir, TryGetOptionValue(args, "--duplicate-report-path") ?? TryGetOptionValue(args, "--duplicateReportPath")); + var skipValidation = HasOption(args, "--skip-validation") || HasOption(args, "--skipValidation"); + if (!skipValidation && validation.ErrorCount > 0) + { + WebLinkCommandSupport.WriteSummary(summaryPath, "export-apache", dataSet, validation, taskSuccess: false, export: null, baseline: null); + WebLinkCommandSupport.WriteIssueReport(reportPath, validation); + WebLinkCommandSupport.WriteDuplicateReport(duplicateReportPath, validation); + var message = $"links-export-apache failed validation: errors={validation.ErrorCount}; warnings={validation.WarningCount}"; + return CompleteLinksValidation(command, outputJson, logger, outputSchemaVersion, loaded.ConfigPath, validation, success: false, message, reportPath, duplicateReportPath); + } + + var outputOption = TryGetOptionValue(args, "--output"); + if (string.Equals(outputOption, "json", StringComparison.OrdinalIgnoreCase)) + outputOption = null; + + var outputPath = ResolveOptionalPath(baseDir, + TryGetOptionValue(args, "--out") ?? + outputOption ?? + TryGetOptionValue(args, "--output-path") ?? + TryGetOptionValue(args, "--outputPath") ?? + TryGetOptionValue(args, "--apache-out") ?? + TryGetOptionValue(args, "--apacheOut")) ?? + ResolveOptionalPath(baseDir, loaded.Spec?.ApacheOut) ?? + Path.GetFullPath(Path.Combine(baseDir, "deploy", "apache", "link-service-redirects.conf")); + var includeHeader = !HasOption(args, "--no-header"); + var include404 = HasOption(args, "--include-404") || + HasOption(args, "--include-error-document-404") || + HasOption(args, "--includeErrorDocument404"); + + var export = WebLinkService.ExportApache(dataSet, new WebLinkApacheExportOptions + { + OutputPath = outputPath, + IncludeHeader = includeHeader, + IncludeErrorDocument404 = include404, + Hosts = linkOptions.Hosts, + LanguageRootHosts = linkOptions.LanguageRootHosts + }); + + WebLinkCommandSupport.WriteSummary(summaryPath, "export-apache", dataSet, validation, taskSuccess: true, export, baseline: null); + WebLinkCommandSupport.WriteIssueReport(reportPath, validation); + WebLinkCommandSupport.WriteDuplicateReport(duplicateReportPath, validation); + + if (outputJson) + { + WebCliJsonWriter.Write(new WebCliJsonEnvelope + { + SchemaVersion = outputSchemaVersion, + Command = command, + Success = true, + ExitCode = 0, + ConfigPath = loaded.ConfigPath, + Result = WebCliJson.SerializeToElement(export, WebCliJson.Context.WebLinkApacheExportResult) + }); + return 0; + } + + logger.Success($"links-export-apache ok: rules={export.RuleCount}; redirects={validation.RedirectCount}; shortlinks={validation.ShortlinkCount}; warnings={validation.WarningCount}"); + logger.Info($"Output: {export.OutputPath}"); + if (!string.IsNullOrWhiteSpace(reportPath)) + logger.Info($"Report: {reportPath}"); + if (!string.IsNullOrWhiteSpace(duplicateReportPath)) + logger.Info($"Duplicate report: {duplicateReportPath}"); + return 0; + } + + private static int HandleLinksReport404(string[] args, bool outputJson, WebConsoleLogger logger, int outputSchemaVersion) + { + var command = "web.links.report-404"; + var loaded = LoadLinksSpecForCommand(args, command, outputJson, logger); + var baseDir = loaded.BaseDir; + var siteRoot = ResolveOptionalPath(baseDir, + TryGetOptionValue(args, "--site-root") ?? + TryGetOptionValue(args, "--siteRoot") ?? + TryGetOptionValue(args, "--out-root") ?? + TryGetOptionValue(args, "--outRoot")) ?? + Path.GetFullPath(Path.Combine(baseDir, "_site")); + var sourcePath = ResolveOptionalPath(baseDir, + TryGetOptionValue(args, "--source") ?? + TryGetOptionValue(args, "--log") ?? + TryGetOptionValue(args, "--input") ?? + TryGetOptionValue(args, "--in")); + var reportPath = ResolveOptionalPath(baseDir, + TryGetOptionValue(args, "--out") ?? + TryGetOptionValue(args, "--output") ?? + TryGetOptionValue(args, "--report-path") ?? + TryGetOptionValue(args, "--reportPath")) ?? + Path.GetFullPath(Path.Combine(baseDir, "Build", "link-reports", "404-suggestions.json")); + var reviewCsvPath = ResolveOptionalPath(baseDir, + TryGetOptionValue(args, "--review-csv") ?? + TryGetOptionValue(args, "--reviewCsv") ?? + TryGetOptionValue(args, "--review-csv-path") ?? + TryGetOptionValue(args, "--reviewCsvPath") ?? + TryGetOptionValue(args, "--csv-report") ?? + TryGetOptionValue(args, "--csvReport")); + var ignored404Path = ResolveOptionalPath(baseDir, + TryGetOptionValue(args, "--ignored-404") ?? + TryGetOptionValue(args, "--ignored404") ?? + TryGetOptionValue(args, "--ignored-404-path") ?? + TryGetOptionValue(args, "--ignored404Path")) ?? + ResolveOptionalPath(baseDir, loaded.Spec?.Ignored404); + + var result = WebLinkService.Generate404Report(new WebLink404ReportOptions + { + SiteRoot = siteRoot, + SourcePath = sourcePath, + Ignored404Path = ignored404Path, + AllowMissingSource = HasOption(args, "--allow-missing-source") || HasOption(args, "--allowMissingSource"), + MaxSuggestions = ParseIntOption(TryGetOptionValue(args, "--max-suggestions") ?? TryGetOptionValue(args, "--maxSuggestions"), 3), + MinimumScore = ParseDoubleOption(TryGetOptionValue(args, "--min-score") ?? TryGetOptionValue(args, "--minimum-score") ?? TryGetOptionValue(args, "--minimumScore"), 0.35d), + IncludeAsset404s = HasOption(args, "--include-assets") || HasOption(args, "--include-asset-404s") + }); + + WriteLinks404Report(reportPath, result); + WebLinkCommandSupport.Write404SuggestionReviewCsv(reviewCsvPath, result); + + if (outputJson) + { + WebCliJsonWriter.Write(new WebCliJsonEnvelope + { + SchemaVersion = outputSchemaVersion, + Command = command, + Success = true, + ExitCode = 0, + ConfigPath = loaded.ConfigPath, + Result = WebCliJson.SerializeToElement(result, WebCliJson.Context.WebLink404ReportResult) + }); + return 0; + } + + logger.Success($"links report-404 ok: observations={result.ObservationCount}; ignored={result.IgnoredObservationCount}; suggested={result.SuggestedObservationCount}; routes={result.RouteCount}"); + logger.Info($"Report: {reportPath}"); + if (!string.IsNullOrWhiteSpace(reviewCsvPath)) + logger.Info($"Review CSV: {reviewCsvPath}"); + return 0; + } + + private static int HandleLinksReview404(string[] args, bool outputJson, WebConsoleLogger logger, int outputSchemaVersion) + { + var command = "web.links.review-404"; + var loaded = LoadLinksSpecForCommand(args, command, outputJson, logger); + var baseDir = loaded.BaseDir; + var siteRoot = ResolveOptionalPath(baseDir, + TryGetOptionValue(args, "--site-root") ?? + TryGetOptionValue(args, "--siteRoot") ?? + TryGetOptionValue(args, "--out-root") ?? + TryGetOptionValue(args, "--outRoot")) ?? + Path.GetFullPath(Path.Combine(baseDir, "_site")); + var sourcePath = ResolveOptionalPath(baseDir, + TryGetOptionValue(args, "--source") ?? + TryGetOptionValue(args, "--log") ?? + TryGetOptionValue(args, "--input") ?? + TryGetOptionValue(args, "--in")) ?? + Path.GetFullPath(Path.Combine(baseDir, "Build", "link-reports", "apache-404.log")); + var ignored404Path = ResolveOptionalPath(baseDir, + TryGetOptionValue(args, "--ignored-404") ?? + TryGetOptionValue(args, "--ignored404") ?? + TryGetOptionValue(args, "--ignored-404-path") ?? + TryGetOptionValue(args, "--ignored404Path")) ?? + ResolveOptionalPath(baseDir, loaded.Spec?.Ignored404); + + var reportPath = ResolveOptionalPath(baseDir, + TryGetOptionValue(args, "--out") ?? + TryGetOptionValue(args, "--output") ?? + TryGetOptionValue(args, "--report-path") ?? + TryGetOptionValue(args, "--reportPath")) ?? + Path.GetFullPath(Path.Combine(baseDir, "Build", "link-reports", "404-suggestions.json")); + var reportCsvPath = ResolveOptionalPath(baseDir, + TryGetOptionValue(args, "--review-csv") ?? + TryGetOptionValue(args, "--reviewCsv") ?? + TryGetOptionValue(args, "--report-csv") ?? + TryGetOptionValue(args, "--reportCsv")) ?? + Path.GetFullPath(Path.Combine(baseDir, "Build", "link-reports", "404-suggestions.csv")); + var redirectCandidatesPath = ResolveOptionalPath(baseDir, + TryGetOptionValue(args, "--redirect-candidates") ?? + TryGetOptionValue(args, "--redirectCandidates") ?? + TryGetOptionValue(args, "--redirect-candidates-path") ?? + TryGetOptionValue(args, "--redirectCandidatesPath")) ?? + Path.GetFullPath(Path.Combine(baseDir, "Build", "link-reports", "404-promoted-candidates.json")); + var redirectCandidatesCsvPath = ResolveOptionalPath(baseDir, + TryGetOptionValue(args, "--redirect-candidates-csv") ?? + TryGetOptionValue(args, "--redirectCandidatesCsv") ?? + TryGetOptionValue(args, "--promoted-csv") ?? + TryGetOptionValue(args, "--promotedCsv")) ?? + Path.GetFullPath(Path.Combine(baseDir, "Build", "link-reports", "404-promoted-candidates.csv")); + var ignored404CandidatesPath = ResolveOptionalPath(baseDir, + TryGetOptionValue(args, "--ignored-404-candidates") ?? + TryGetOptionValue(args, "--ignored404Candidates") ?? + TryGetOptionValue(args, "--ignored-404-candidates-path") ?? + TryGetOptionValue(args, "--ignored404CandidatesPath")) ?? + Path.GetFullPath(Path.Combine(baseDir, "Build", "link-reports", "ignored-404-candidates.json")); + var ignored404CandidatesCsvPath = ResolveOptionalPath(baseDir, + TryGetOptionValue(args, "--ignored-404-candidates-csv") ?? + TryGetOptionValue(args, "--ignored404CandidatesCsv") ?? + TryGetOptionValue(args, "--ignored-csv") ?? + TryGetOptionValue(args, "--ignoredCsv")) ?? + Path.GetFullPath(Path.Combine(baseDir, "Build", "link-reports", "ignored-404-candidates.csv")); + var promoteSummaryPath = ResolveOptionalPath(baseDir, + TryGetOptionValue(args, "--promote-summary-path") ?? + TryGetOptionValue(args, "--promoteSummaryPath")) ?? + Path.GetFullPath(Path.Combine(baseDir, "Build", "link-reports", "404-promoted-candidates-summary.json")); + var ignoreSummaryPath = ResolveOptionalPath(baseDir, + TryGetOptionValue(args, "--ignore-summary-path") ?? + TryGetOptionValue(args, "--ignoreSummaryPath")) ?? + Path.GetFullPath(Path.Combine(baseDir, "Build", "link-reports", "ignored-404-candidates-summary.json")); + var summaryPath = ResolveOptionalPath(baseDir, + TryGetOptionValue(args, "--summary-path") ?? + TryGetOptionValue(args, "--summaryPath")); + + var result = WebLinkService.Review404(new WebLink404ReviewOptions + { + SiteRoot = siteRoot, + SourcePath = sourcePath, + Ignored404Path = ignored404Path, + AllowMissingSource = HasOption(args, "--allow-missing-source") || HasOption(args, "--allowMissingSource"), + MaxSuggestions = ParseIntOption(TryGetOptionValue(args, "--max-suggestions") ?? TryGetOptionValue(args, "--maxSuggestions"), 5), + MinimumScore = ParseDoubleOption(TryGetOptionValue(args, "--min-score") ?? TryGetOptionValue(args, "--minimum-score") ?? TryGetOptionValue(args, "--minimumScore"), 0.35d), + IncludeAsset404s = HasOption(args, "--include-assets") || HasOption(args, "--include-asset-404s"), + ReportPath = reportPath, + RedirectCandidatesPath = redirectCandidatesPath, + Ignored404CandidatesPath = ignored404CandidatesPath, + PromoteSummaryPath = promoteSummaryPath, + IgnoreSummaryPath = ignoreSummaryPath, + EnableRedirectCandidates = HasOption(args, "--enable") || HasOption(args, "--enabled") || HasOption(args, "--enable-redirects"), + PromoteMinimumScore = ParseDoubleOption(TryGetOptionValue(args, "--promote-min-score") ?? TryGetOptionValue(args, "--promoteMinimumScore"), 0.65d), + PromoteMinimumCount = ParseIntOption(TryGetOptionValue(args, "--promote-min-count") ?? TryGetOptionValue(args, "--promoteMinimumCount"), 1), + PromoteStatus = ParseIntOption(TryGetOptionValue(args, "--status"), 301), + PromoteGroup = TryGetOptionValue(args, "--group"), + IgnoreReason = TryGetOptionValue(args, "--reason"), + CreatedBy = TryGetOptionValue(args, "--created-by") ?? TryGetOptionValue(args, "--createdBy") + }); + + WebLinkCommandSupport.Write404SuggestionReviewCsv(reportCsvPath, result.Report); + WebLinkCommandSupport.WriteRedirectReviewCsv(redirectCandidatesCsvPath, result.RedirectCandidatesPath); + WebLinkCommandSupport.WriteIgnored404ReviewCsv(ignored404CandidatesCsvPath, result.Ignored404CandidatesPath); + WriteLinksReview404Summary(summaryPath, result); + + if (outputJson) + { + WebCliJsonWriter.Write(new WebCliJsonEnvelope + { + SchemaVersion = outputSchemaVersion, + Command = command, + Success = true, + ExitCode = 0, + ConfigPath = loaded.ConfigPath, + Result = WebCliJson.SerializeToElement(result, WebCliJson.Context.WebLink404ReviewResult) + }); + return 0; + } + + logger.Success($"links review-404 ok: observations={result.Report.ObservationCount}; ignored={result.Report.IgnoredObservationCount}; suggested={result.Report.SuggestedObservationCount}; redirectCandidates={result.Promote.CandidateCount}; ignoredCandidates={result.Ignore.CandidateCount}"); + logger.Info($"Report: {result.ReportPath}"); + logger.Info($"Redirect candidates: {result.RedirectCandidatesPath}"); + logger.Info($"Ignored-404 candidates: {result.Ignored404CandidatesPath}"); + return 0; + } + + private static int HandleLinksPromote404(string[] args, bool outputJson, WebConsoleLogger logger, int outputSchemaVersion) + { + var command = "web.links.promote-404"; + var loaded = LoadLinksSpecForCommand(args, command, outputJson, logger); + var baseDir = loaded.BaseDir; + var sourcePath = ResolveOptionalPath(baseDir, + TryGetOptionValue(args, "--source") ?? + TryGetOptionValue(args, "--report") ?? + TryGetOptionValue(args, "--input") ?? + TryGetOptionValue(args, "--in")); + if (string.IsNullOrWhiteSpace(sourcePath)) + return Fail("Missing required --source.", outputJson, logger, command); + + var outputPath = ResolveOptionalPath(baseDir, + TryGetOptionValue(args, "--out") ?? + TryGetOptionValue(args, "--redirects") ?? + TryGetOptionValue(args, "--redirects-path") ?? + TryGetOptionValue(args, "--redirectsPath")) ?? + ResolveOptionalPath(baseDir, loaded.Spec?.Redirects); + if (string.IsNullOrWhiteSpace(outputPath)) + return Fail("Missing required --out or links.redirects config path.", outputJson, logger, command); + var reviewCsvPath = ResolveOptionalPath(baseDir, + TryGetOptionValue(args, "--review-csv") ?? + TryGetOptionValue(args, "--reviewCsv") ?? + TryGetOptionValue(args, "--review-csv-path") ?? + TryGetOptionValue(args, "--reviewCsvPath") ?? + TryGetOptionValue(args, "--csv-report") ?? + TryGetOptionValue(args, "--csvReport")); + + var result = WebLinkService.Promote404Suggestions(new WebLink404PromoteOptions + { + SourcePath = sourcePath, + OutputPath = outputPath, + Enabled = HasOption(args, "--enable") || HasOption(args, "--enabled"), + MinimumScore = ParseDoubleOption(TryGetOptionValue(args, "--min-score") ?? TryGetOptionValue(args, "--minimum-score") ?? TryGetOptionValue(args, "--minimumScore"), 0.35d), + MinimumCount = ParseIntOption(TryGetOptionValue(args, "--min-count") ?? TryGetOptionValue(args, "--minimum-count") ?? TryGetOptionValue(args, "--minimumCount"), 1), + Status = ParseIntOption(TryGetOptionValue(args, "--status"), 301), + Group = TryGetOptionValue(args, "--group"), + MergeWithExisting = !HasOption(args, "--no-merge"), + ReplaceExisting = HasOption(args, "--replace-existing") || HasOption(args, "--replaceExisting") + }); + WebLinkCommandSupport.WriteRedirectReviewCsv(reviewCsvPath, result.OutputPath); + + if (outputJson) + { + WebCliJsonWriter.Write(new WebCliJsonEnvelope + { + SchemaVersion = outputSchemaVersion, + Command = command, + Success = true, + ExitCode = 0, + ConfigPath = loaded.ConfigPath, + Result = WebCliJson.SerializeToElement(result, WebCliJson.Context.WebLink404PromoteResult) + }); + return 0; + } + + logger.Success($"links promote-404 ok: candidates={result.CandidateCount}; written={result.WrittenCount}; skippedDuplicates={result.SkippedDuplicateCount}"); + logger.Info($"Output: {result.OutputPath}"); + if (!string.IsNullOrWhiteSpace(reviewCsvPath)) + logger.Info($"Review CSV: {reviewCsvPath}"); + return 0; + } + + private static int HandleLinksIgnore404(string[] args, bool outputJson, WebConsoleLogger logger, int outputSchemaVersion) + { + var command = "web.links.ignore-404"; + var loaded = LoadLinksSpecForCommand(args, command, outputJson, logger); + var baseDir = loaded.BaseDir; + var sourcePath = ResolveOptionalPath(baseDir, + TryGetOptionValue(args, "--source") ?? + TryGetOptionValue(args, "--report") ?? + TryGetOptionValue(args, "--input") ?? + TryGetOptionValue(args, "--in")); + if (string.IsNullOrWhiteSpace(sourcePath)) + return Fail("Missing required --source.", outputJson, logger, command); + + var outputPath = ResolveOptionalPath(baseDir, + TryGetOptionValue(args, "--out") ?? + TryGetOptionValue(args, "--ignored-404") ?? + TryGetOptionValue(args, "--ignored404") ?? + TryGetOptionValue(args, "--ignored-404-path") ?? + TryGetOptionValue(args, "--ignored404Path")) ?? + ResolveOptionalPath(baseDir, loaded.Spec?.Ignored404); + if (string.IsNullOrWhiteSpace(outputPath)) + return Fail("Missing required --out or links.ignored404 config path.", outputJson, logger, command); + var reviewCsvPath = ResolveOptionalPath(baseDir, + TryGetOptionValue(args, "--review-csv") ?? + TryGetOptionValue(args, "--reviewCsv") ?? + TryGetOptionValue(args, "--review-csv-path") ?? + TryGetOptionValue(args, "--reviewCsvPath") ?? + TryGetOptionValue(args, "--csv-report") ?? + TryGetOptionValue(args, "--csvReport")); + + var paths = ReadOptionList(args, "--path", "--paths").ToArray(); + var includeAll = HasOption(args, "--all"); + var onlyWithoutSuggestions = HasOption(args, "--without-suggestions") || HasOption(args, "--withoutSuggestions"); + if (paths.Length == 0 && !includeAll && !onlyWithoutSuggestions) + return Fail("Specify at least one --path, --all, or --without-suggestions.", outputJson, logger, command); + + var result = WebLinkService.Ignore404Suggestions(new WebLink404IgnoreOptions + { + SourcePath = sourcePath, + OutputPath = outputPath, + Paths = paths, + IncludeAll = includeAll, + OnlyWithoutSuggestions = onlyWithoutSuggestions, + Reason = TryGetOptionValue(args, "--reason"), + CreatedBy = TryGetOptionValue(args, "--created-by") ?? TryGetOptionValue(args, "--createdBy"), + MergeWithExisting = !HasOption(args, "--no-merge"), + ReplaceExisting = HasOption(args, "--replace-existing") || HasOption(args, "--replaceExisting") + }); + WebLinkCommandSupport.WriteIgnored404ReviewCsv(reviewCsvPath, result.OutputPath); + + if (outputJson) + { + WebCliJsonWriter.Write(new WebCliJsonEnvelope + { + SchemaVersion = outputSchemaVersion, + Command = command, + Success = true, + ExitCode = 0, + ConfigPath = loaded.ConfigPath, + Result = WebCliJson.SerializeToElement(result, WebCliJson.Context.WebLink404IgnoreResult) + }); + return 0; + } + + logger.Success($"links ignore-404 ok: candidates={result.CandidateCount}; written={result.WrittenCount}; skippedDuplicates={result.SkippedDuplicateCount}"); + logger.Info($"Output: {result.OutputPath}"); + if (!string.IsNullOrWhiteSpace(reviewCsvPath)) + logger.Info($"Review CSV: {reviewCsvPath}"); + return 0; + } + + private static int HandleLinksApplyReview(string[] args, bool outputJson, WebConsoleLogger logger, int outputSchemaVersion) + { + var command = "web.links.apply-review"; + var loaded = LoadLinksSpecForCommand(args, command, outputJson, logger); + var baseDir = loaded.BaseDir; + var applyAll = HasOption(args, "--all"); + var applyRedirects = applyAll || + HasOption(args, "--apply-redirects") || + HasOption(args, "--applyRedirects") || + HasOption(args, "--redirect-candidates-only"); + var applyIgnored404 = applyAll || + HasOption(args, "--apply-ignored-404") || + HasOption(args, "--applyIgnored404") || + HasOption(args, "--ignored-404-candidates-only"); + if (!applyRedirects && !applyIgnored404) + return Fail("Choose at least one target: --apply-redirects, --apply-ignored-404, or --all.", outputJson, logger, command); + + var redirectCandidatesPath = ResolveOptionalPath(baseDir, + TryGetOptionValue(args, "--redirect-candidates") ?? + TryGetOptionValue(args, "--redirectCandidates") ?? + TryGetOptionValue(args, "--redirect-candidates-path") ?? + TryGetOptionValue(args, "--redirectCandidatesPath")) ?? + Path.GetFullPath(Path.Combine(baseDir, "Build", "link-reports", "404-promoted-candidates.json")); + var redirectsPath = ResolveOptionalPath(baseDir, + TryGetOptionValue(args, "--redirects") ?? + TryGetOptionValue(args, "--redirects-path") ?? + TryGetOptionValue(args, "--redirectsPath")) ?? + ResolveOptionalPath(baseDir, loaded.Spec?.Redirects); + if (applyRedirects && string.IsNullOrWhiteSpace(redirectsPath)) + return Fail("Missing --redirects or links.redirects config path.", outputJson, logger, command); + + var ignored404CandidatesPath = ResolveOptionalPath(baseDir, + TryGetOptionValue(args, "--ignored-404-candidates") ?? + TryGetOptionValue(args, "--ignored404Candidates") ?? + TryGetOptionValue(args, "--ignored-404-candidates-path") ?? + TryGetOptionValue(args, "--ignored404CandidatesPath")) ?? + Path.GetFullPath(Path.Combine(baseDir, "Build", "link-reports", "ignored-404-candidates.json")); + var ignored404Path = ResolveOptionalPath(baseDir, + TryGetOptionValue(args, "--ignored-404") ?? + TryGetOptionValue(args, "--ignored404") ?? + TryGetOptionValue(args, "--ignored-404-path") ?? + TryGetOptionValue(args, "--ignored404Path")) ?? + ResolveOptionalPath(baseDir, loaded.Spec?.Ignored404); + if (applyIgnored404 && string.IsNullOrWhiteSpace(ignored404Path)) + return Fail("Missing --ignored-404 or links.ignored404 config path.", outputJson, logger, command); + + var result = WebLinkService.ApplyReviewCandidates(new WebLinkReviewApplyOptions + { + ApplyRedirects = applyRedirects, + ApplyIgnored404 = applyIgnored404, + RedirectCandidatesPath = redirectCandidatesPath, + RedirectsPath = redirectsPath, + Ignored404CandidatesPath = ignored404CandidatesPath, + Ignored404Path = ignored404Path, + ReplaceExisting = HasOption(args, "--replace-existing") || HasOption(args, "--replaceExisting"), + EnableRedirects = HasOption(args, "--enable-redirects") || HasOption(args, "--enableRedirects"), + DryRun = HasOption(args, "--dry-run") || HasOption(args, "--dryRun") || HasOption(args, "--what-if") || HasOption(args, "--whatIf") + }); + + var summaryPath = ResolveOptionalPath(baseDir, + TryGetOptionValue(args, "--summary-path") ?? + TryGetOptionValue(args, "--summaryPath")); + WriteLinksApplyReviewSummary(summaryPath, result); + + if (outputJson) + { + WebCliJsonWriter.Write(new WebCliJsonEnvelope + { + SchemaVersion = outputSchemaVersion, + Command = command, + Success = true, + ExitCode = 0, + ConfigPath = loaded.ConfigPath, + Result = WebCliJson.SerializeToElement(result, WebCliJson.Context.WebLinkReviewApplyResult) + }); + return 0; + } + + var parts = new List(); + if (result.Redirects is not null) + parts.Add($"redirects={result.Redirects.CandidateCount}; redirectWritten={result.Redirects.WrittenCount}; redirectSkipped={result.Redirects.SkippedDuplicateCount}"); + if (result.Ignored404 is not null) + parts.Add($"ignored404={result.Ignored404.CandidateCount}; ignoredWritten={result.Ignored404.WrittenCount}; ignoredSkipped={result.Ignored404.SkippedDuplicateCount}"); + + var label = result.DryRun ? "links apply-review dry-run ok" : "links apply-review ok"; + logger.Success(parts.Count == 0 ? label : $"{label}: {string.Join("; ", parts)}"); + if (!string.IsNullOrWhiteSpace(summaryPath)) + logger.Info($"Summary: {summaryPath}"); + return 0; + } + + private static int HandleLinksImportWordPress(string[] args, bool outputJson, WebConsoleLogger logger, int outputSchemaVersion) + { + var command = "web.links.import-wordpress"; + var loaded = LoadLinksSpecForCommand(args, command, outputJson, logger); + var sourcePath = TryGetOptionValue(args, "--source") ?? + TryGetOptionValue(args, "--csv") ?? + TryGetOptionValue(args, "--input") ?? + TryGetOptionValue(args, "--in"); + if (string.IsNullOrWhiteSpace(sourcePath)) + return Fail("Missing required --source.", outputJson, logger, command); + + var baseDir = loaded.BaseDir; + var outPath = ResolveOptionalPath(baseDir, + TryGetOptionValue(args, "--out") ?? + TryGetOptionValue(args, "--output-path") ?? + TryGetOptionValue(args, "--outputPath") ?? + TryGetOptionValue(args, "--shortlinks") ?? + TryGetOptionValue(args, "--shortlinks-path") ?? + TryGetOptionValue(args, "--shortlinksPath")) ?? + ResolveOptionalPath(baseDir, loaded.Spec?.Shortlinks); + if (string.IsNullOrWhiteSpace(outPath)) + return Fail("Missing required --out or links.shortlinks config path.", outputJson, logger, command); + + var hosts = BuildLinkHostMap(args, loaded.Spec); + var host = TryGetOptionValue(args, "--host"); + if (string.IsNullOrWhiteSpace(host) && hosts.TryGetValue("short", out var configuredShortHost)) + host = configuredShortHost; + + var status = ParseIntOption(TryGetOptionValue(args, "--status"), 302); + var result = WebLinkService.ImportPrettyLinks(new WebLinkShortlinkImportOptions + { + SourcePath = ResolvePathRelative(baseDir, sourcePath), + SourceOriginPath = sourcePath, + OutputPath = outPath, + Host = host, + PathPrefix = TryGetOptionValue(args, "--path-prefix") ?? TryGetOptionValue(args, "--pathPrefix"), + Owner = TryGetOptionValue(args, "--owner"), + Tags = ReadOptionList(args, "--tag", "--tags").ToArray(), + Status = status <= 0 ? 302 : status, + AllowExternal = !HasOption(args, "--no-external"), + MergeWithExisting = !HasOption(args, "--no-merge"), + ReplaceExisting = HasOption(args, "--replace-existing") || HasOption(args, "--replaceExisting") + }); + + if (outputJson) + { + WebCliJsonWriter.Write(new WebCliJsonEnvelope + { + SchemaVersion = outputSchemaVersion, + Command = command, + Success = true, + ExitCode = 0, + ConfigPath = loaded.ConfigPath, + Result = WebCliJson.SerializeToElement(result, WebCliJson.Context.WebLinkShortlinkImportResult) + }); + return 0; + } + + logger.Success($"links import-wordpress ok: imported={result.ImportedCount}; written={result.WrittenCount}; skippedDuplicates={result.SkippedDuplicateCount}"); + logger.Info($"Output: {result.OutputPath}"); + foreach (var warning in result.Warnings) + logger.Warn(warning); + return 0; + } + +} diff --git a/PowerForge.Web.Cli/WebLinkCommandSupport.cs b/PowerForge.Web.Cli/WebLinkCommandSupport.cs new file mode 100644 index 00000000..026a2e1c --- /dev/null +++ b/PowerForge.Web.Cli/WebLinkCommandSupport.cs @@ -0,0 +1,363 @@ +using System; +using System.Collections.Generic; +using System.Globalization; +using System.IO; +using System.Linq; +using System.Text.Json; +using System.Text.Json.Serialization; +using PowerForge.Web; + +namespace PowerForge.Web.Cli; + +internal static class WebLinkCommandSupport +{ + internal static void WriteSummary( + string? summaryPath, + string action, + WebLinkDataSet dataSet, + LinkValidationResult validation, + bool taskSuccess, + WebLinkApacheExportResult? export, + WebLinkBaselineState? baseline) + { + if (string.IsNullOrWhiteSpace(summaryPath)) + return; + + var summaryDirectory = Path.GetDirectoryName(summaryPath); + if (!string.IsNullOrWhiteSpace(summaryDirectory)) + Directory.CreateDirectory(summaryDirectory); + + var summary = new + { + generatedOn = DateTimeOffset.UtcNow.ToString("O"), + action, + redirects = validation.RedirectCount, + shortlinks = validation.ShortlinkCount, + errors = validation.ErrorCount, + warnings = validation.WarningCount, + duplicateWarnings = validation.Issues.Count(static issue => issue.Code == "PFLINK.REDIRECT.DUPLICATE_SAME_TARGET"), + duplicateErrors = validation.Issues.Count(static issue => issue.Code == "PFLINK.REDIRECT.DUPLICATE"), + success = taskSuccess, + validationSuccess = validation.Success, + usedSourceCount = dataSet.UsedSources.Length, + usedSources = dataSet.UsedSources, + missingSourceCount = dataSet.MissingSources.Length, + missingSources = dataSet.MissingSources, + baselinePath = baseline?.Path, + baselineLoaded = baseline?.Loaded, + baselineWarningCount = baseline?.KeyCount, + baselineGenerated = baseline?.Generated, + baselineUpdated = baseline?.Updated, + baselineWrittenPath = baseline?.WrittenPath, + newWarningCount = baseline?.NewWarnings.Length, + newWarnings = baseline?.NewWarnings, + issues = validation.Issues, + export + }; + + File.WriteAllText(summaryPath, JsonSerializer.Serialize(summary, LinksSummaryJsonOptions)); + } + + internal static void WriteIssueReport(string? reportPath, LinkValidationResult validation) + { + if (string.IsNullOrWhiteSpace(reportPath)) + return; + + var reportDirectory = Path.GetDirectoryName(reportPath); + if (!string.IsNullOrWhiteSpace(reportDirectory)) + Directory.CreateDirectory(reportDirectory); + + var lines = new List + { + "severity,code,id,related_id,source_host,source_path,source_query,target_url,related_target_url,normalized_target_url,related_normalized_target_url,status,related_status,origin_path,origin_line,related_origin_path,related_origin_line,message" + }; + + foreach (var issue in validation.Issues) + { + lines.Add(string.Join(",", + EscapeCsv(issue.Severity.ToString().ToLowerInvariant()), + EscapeCsv(issue.Code), + EscapeCsv(issue.Id), + EscapeCsv(issue.RelatedId), + EscapeCsv(issue.SourceHost), + EscapeCsv(issue.SourcePath), + EscapeCsv(issue.SourceQuery), + EscapeCsv(issue.TargetUrl), + EscapeCsv(issue.RelatedTargetUrl), + EscapeCsv(issue.NormalizedTargetUrl), + EscapeCsv(issue.RelatedNormalizedTargetUrl), + EscapeCsv(issue.Status <= 0 ? string.Empty : issue.Status.ToString(CultureInfo.InvariantCulture)), + EscapeCsv(issue.RelatedStatus <= 0 ? string.Empty : issue.RelatedStatus.ToString(CultureInfo.InvariantCulture)), + EscapeCsv(issue.OriginPath), + EscapeCsv(issue.OriginLine <= 0 ? string.Empty : issue.OriginLine.ToString(CultureInfo.InvariantCulture)), + EscapeCsv(issue.RelatedOriginPath), + EscapeCsv(issue.RelatedOriginLine <= 0 ? string.Empty : issue.RelatedOriginLine.ToString(CultureInfo.InvariantCulture)), + EscapeCsv(issue.Message))); + } + + File.WriteAllLines(reportPath, lines); + } + + internal static void WriteDuplicateReport(string? reportPath, LinkValidationResult validation) + { + if (string.IsNullOrWhiteSpace(reportPath)) + return; + + var reportDirectory = Path.GetDirectoryName(reportPath); + if (!string.IsNullOrWhiteSpace(reportDirectory)) + Directory.CreateDirectory(reportDirectory); + + var lines = new List + { + "severity,code,suggested_action,source_host,source_path,source_query,canonical_id,canonical_status,canonical_target_url,canonical_normalized_target_url,canonical_origin_path,canonical_origin_line,duplicate_id,duplicate_status,duplicate_target_url,duplicate_normalized_target_url,duplicate_origin_path,duplicate_origin_line,message" + }; + + foreach (var issue in validation.Issues.Where(static issue => + issue.Code is "PFLINK.REDIRECT.DUPLICATE" or "PFLINK.REDIRECT.DUPLICATE_SAME_TARGET")) + { + lines.Add(string.Join(",", + EscapeCsv(issue.Severity.ToString().ToLowerInvariant()), + EscapeCsv(issue.Code), + EscapeCsv(issue.Code == "PFLINK.REDIRECT.DUPLICATE_SAME_TARGET" ? "dedupe_generated_or_imported_row" : "review_canonical_target"), + EscapeCsv(issue.SourceHost), + EscapeCsv(issue.SourcePath), + EscapeCsv(issue.SourceQuery), + EscapeCsv(issue.RelatedId), + EscapeCsv(issue.RelatedStatus <= 0 ? string.Empty : issue.RelatedStatus.ToString(CultureInfo.InvariantCulture)), + EscapeCsv(issue.RelatedTargetUrl), + EscapeCsv(issue.RelatedNormalizedTargetUrl), + EscapeCsv(issue.RelatedOriginPath), + EscapeCsv(issue.RelatedOriginLine <= 0 ? string.Empty : issue.RelatedOriginLine.ToString(CultureInfo.InvariantCulture)), + EscapeCsv(issue.Id), + EscapeCsv(issue.Status <= 0 ? string.Empty : issue.Status.ToString(CultureInfo.InvariantCulture)), + EscapeCsv(issue.TargetUrl), + EscapeCsv(issue.NormalizedTargetUrl), + EscapeCsv(issue.OriginPath), + EscapeCsv(issue.OriginLine <= 0 ? string.Empty : issue.OriginLine.ToString(CultureInfo.InvariantCulture)), + EscapeCsv(issue.Message))); + } + + File.WriteAllLines(reportPath, lines); + } + + internal static void Write404SuggestionReviewCsv(string? reportPath, WebLink404ReportResult result) + { + if (string.IsNullOrWhiteSpace(reportPath)) + return; + + EnsureDirectory(reportPath); + var lines = new List + { + "suggested_action,host,path,count,best_target,best_score,all_targets,referrer,last_seen_at" + }; + + foreach (var suggestion in result.Suggestions.OrderByDescending(static item => item.Count).ThenBy(static item => item.Path, StringComparer.OrdinalIgnoreCase)) + { + var targets = suggestion.Suggestions ?? Array.Empty(); + var best = targets.OrderByDescending(static item => item.Score).FirstOrDefault(); + var action = best is null ? "ignore_or_investigate" : "review_redirect_candidate"; + lines.Add(string.Join(",", + EscapeCsv(action), + EscapeCsv(suggestion.Host), + EscapeCsv(suggestion.Path), + EscapeCsv(suggestion.Count.ToString(CultureInfo.InvariantCulture)), + EscapeCsv(best?.TargetPath), + EscapeCsv(best is null ? string.Empty : best.Score.ToString("0.###", CultureInfo.InvariantCulture)), + EscapeCsv(string.Join(" | ", targets.Select(static item => $"{item.TargetPath} ({item.Score:0.###})"))), + EscapeCsv(suggestion.Referrer), + EscapeCsv(suggestion.LastSeenAt))); + } + + File.WriteAllLines(reportPath, lines); + } + + internal static void WriteRedirectReviewCsv(string? reportPath, string redirectJsonPath) + { + if (string.IsNullOrWhiteSpace(reportPath) || string.IsNullOrWhiteSpace(redirectJsonPath) || !File.Exists(redirectJsonPath)) + return; + + EnsureDirectory(reportPath); + var dataSet = WebLinkService.Load(new WebLinkLoadOptions { RedirectsPath = redirectJsonPath }); + var lines = new List + { + "enabled,id,source_host,source_path,source_query,target_url,status,match_type,group,source,notes" + }; + + foreach (var redirect in dataSet.Redirects.OrderBy(static item => item.SourceHost ?? string.Empty, StringComparer.OrdinalIgnoreCase).ThenBy(static item => item.SourcePath, StringComparer.OrdinalIgnoreCase)) + { + lines.Add(string.Join(",", + EscapeCsv(redirect.Enabled ? "true" : "false"), + EscapeCsv(redirect.Id), + EscapeCsv(redirect.SourceHost), + EscapeCsv(redirect.SourcePath), + EscapeCsv(redirect.SourceQuery), + EscapeCsv(redirect.TargetUrl), + EscapeCsv(redirect.Status.ToString(CultureInfo.InvariantCulture)), + EscapeCsv(redirect.MatchType.ToString().ToLowerInvariant()), + EscapeCsv(redirect.Group), + EscapeCsv(redirect.Source), + EscapeCsv(redirect.Notes))); + } + + File.WriteAllLines(reportPath, lines); + } + + internal static void WriteIgnored404ReviewCsv(string? reportPath, string ignored404JsonPath) + { + if (string.IsNullOrWhiteSpace(reportPath) || string.IsNullOrWhiteSpace(ignored404JsonPath) || !File.Exists(ignored404JsonPath)) + return; + + EnsureDirectory(reportPath); + var rules = ReadIgnored404Rules(ignored404JsonPath); + var lines = new List + { + "host,path,reason,created_at,created_by" + }; + + foreach (var rule in rules.OrderBy(static item => item.Host ?? string.Empty, StringComparer.OrdinalIgnoreCase).ThenBy(static item => item.Path, StringComparer.OrdinalIgnoreCase)) + { + lines.Add(string.Join(",", + EscapeCsv(rule.Host), + EscapeCsv(rule.Path), + EscapeCsv(rule.Reason), + EscapeCsv(rule.CreatedAt), + EscapeCsv(rule.CreatedBy))); + } + + File.WriteAllLines(reportPath, lines); + } + + internal static WebLinkBaselineState EvaluateBaseline( + string baseDir, + string? baselinePath, + LinkValidationResult validation, + bool baselineGenerate, + bool baselineUpdate, + bool failOnNewWarnings) + { + if ((baselineGenerate || baselineUpdate || failOnNewWarnings) && string.IsNullOrWhiteSpace(baselinePath)) + baselinePath = ".powerforge/link-baseline.json"; + + var warningKeys = validation.Issues + .Where(static issue => issue.Severity == LinkValidationSeverity.Warning) + .Select(BuildIssueKey) + .Where(static key => !string.IsNullOrWhiteSpace(key)) + .Distinct(StringComparer.OrdinalIgnoreCase) + .ToArray(); + + var state = new WebLinkBaselineState + { + Path = baselinePath, + CurrentWarningKeys = warningKeys, + Generated = baselineGenerate, + Updated = baselineUpdate, + Merge = baselineUpdate, + ShouldWrite = baselineGenerate || baselineUpdate + }; + + if (string.IsNullOrWhiteSpace(baselinePath)) + return state; + + state.Loaded = WebVerifyBaselineStore.TryLoadWarningKeys(baseDir, baselinePath, out var resolvedPath, out var baselineKeys); + state.Path = string.IsNullOrWhiteSpace(resolvedPath) ? baselinePath : resolvedPath; + state.KeyCount = baselineKeys.Length; + + var baselineSet = state.Loaded + ? new HashSet(baselineKeys, StringComparer.OrdinalIgnoreCase) + : null; + state.NewWarnings = baselineSet is null + ? Array.Empty() + : validation.Issues + .Where(static issue => issue.Severity == LinkValidationSeverity.Warning) + .Where(issue => !baselineSet.Contains(BuildIssueKey(issue))) + .ToArray(); + + return state; + } + + internal static string BuildValidateSuccessMessage(LinkValidationResult validation, WebLinkBaselineState baseline) + { + var message = $"links-validate ok: redirects={validation.RedirectCount}; shortlinks={validation.ShortlinkCount}; warnings={validation.WarningCount}"; + if (!string.IsNullOrWhiteSpace(baseline.Path) && baseline.Loaded) + message += $"; newWarnings={baseline.NewWarnings.Length}"; + if (!string.IsNullOrWhiteSpace(baseline.WrittenPath)) + message += "; baseline written"; + return message; + } + + internal static string BuildValidateFailureMessage(LinkValidationResult validation, WebLinkBaselineState baseline, bool failOnNewWarnings) + { + if (failOnNewWarnings && !baseline.Loaded) + return $"links-validate failed: baseline could not be loaded; errors={validation.ErrorCount}; warnings={validation.WarningCount}"; + if (failOnNewWarnings && baseline.NewWarnings.Length > 0) + return $"links-validate failed: newWarnings={baseline.NewWarnings.Length}; errors={validation.ErrorCount}; warnings={validation.WarningCount}"; + return $"links-validate failed: errors={validation.ErrorCount}; warnings={validation.WarningCount}"; + } + + internal static string BuildIssueKey(LinkValidationIssue issue) + => string.Join("|", + issue.Code ?? string.Empty, + issue.Source ?? string.Empty, + issue.SourceHost ?? string.Empty, + issue.SourcePath ?? string.Empty, + issue.SourceQuery ?? string.Empty, + issue.Status.ToString(CultureInfo.InvariantCulture), + issue.NormalizedTargetUrl ?? issue.TargetUrl ?? string.Empty, + issue.RelatedStatus.ToString(CultureInfo.InvariantCulture), + issue.RelatedNormalizedTargetUrl ?? issue.RelatedTargetUrl ?? string.Empty); + + private static Ignored404Rule[] ReadIgnored404Rules(string path) + { + using var document = JsonDocument.Parse(File.ReadAllText(path), new JsonDocumentOptions + { + AllowTrailingCommas = true, + CommentHandling = JsonCommentHandling.Skip + }); + + var source = document.RootElement; + if (source.ValueKind == JsonValueKind.Object && source.TryGetProperty("ignored404", out var nested)) + source = nested; + if (source.ValueKind != JsonValueKind.Array) + return Array.Empty(); + + return source.Deserialize(LinksSummaryJsonOptions) ?? Array.Empty(); + } + + private static void EnsureDirectory(string path) + { + var directory = Path.GetDirectoryName(path); + if (!string.IsNullOrWhiteSpace(directory)) + Directory.CreateDirectory(directory); + } + + private static string EscapeCsv(string? value) + { + var text = value ?? string.Empty; + if (text.Contains('"', StringComparison.Ordinal)) + text = text.Replace("\"", "\"\"", StringComparison.Ordinal); + return text.IndexOfAny(new[] { ',', '"', '\r', '\n' }) >= 0 ? "\"" + text + "\"" : text; + } + + private static readonly JsonSerializerOptions LinksSummaryJsonOptions = new() + { + PropertyNamingPolicy = JsonNamingPolicy.CamelCase, + PropertyNameCaseInsensitive = true, + DefaultIgnoreCondition = JsonIgnoreCondition.WhenWritingNull, + WriteIndented = true, + Converters = { new JsonStringEnumConverter(JsonNamingPolicy.CamelCase) } + }; +} + +internal sealed class WebLinkBaselineState +{ + public string? Path { get; set; } + public bool Loaded { get; set; } + public int KeyCount { get; set; } + public bool Generated { get; set; } + public bool Updated { get; set; } + public bool Merge { get; set; } + public bool ShouldWrite { get; set; } + public string? WrittenPath { get; set; } + public string[] CurrentWarningKeys { get; set; } = Array.Empty(); + public LinkValidationIssue[] NewWarnings { get; set; } = Array.Empty(); +} diff --git a/PowerForge.Web.Cli/WebPipelineRunner.Tasks.Links.cs b/PowerForge.Web.Cli/WebPipelineRunner.Tasks.Links.cs new file mode 100644 index 00000000..b1b7d318 --- /dev/null +++ b/PowerForge.Web.Cli/WebPipelineRunner.Tasks.Links.cs @@ -0,0 +1,504 @@ +using System; +using System.Collections.Generic; +using System.IO; +using System.Linq; +using System.Text.Json.Serialization; +using System.Text.Json; +using PowerForge.Web; + +namespace PowerForge.Web.Cli; + +internal static partial class WebPipelineRunner +{ + private static void ExecuteLinksValidate(JsonElement step, string baseDir, WebPipelineStepResult stepResult) + { + var summaryPath = ResolvePath(baseDir, GetString(step, "summaryPath") ?? GetString(step, "summary-path")); + var reportPath = ResolvePath(baseDir, GetString(step, "reportPath") ?? GetString(step, "report-path")); + var duplicateReportPath = ResolvePath(baseDir, GetString(step, "duplicateReportPath") ?? GetString(step, "duplicate-report-path")); + var strict = GetBool(step, "strict") ?? false; + var failOnWarnings = GetBool(step, "failOnWarnings") ?? GetBool(step, "fail-on-warnings") ?? false; + var failOnNewWarnings = GetBool(step, "failOnNewWarnings") ?? GetBool(step, "fail-on-new-warnings") ?? GetBool(step, "failOnNew") ?? false; + var baselineGenerate = GetBool(step, "baselineGenerate") ?? GetBool(step, "baseline-generate") ?? false; + var baselineUpdate = GetBool(step, "baselineUpdate") ?? GetBool(step, "baseline-update") ?? false; + var baselinePath = GetString(step, "baselinePath") ?? GetString(step, "baseline-path") ?? GetString(step, "baseline"); + + var linkOptions = BuildLinkLoadOptions(step, baseDir); + var dataSet = WebLinkService.Load(linkOptions); + if (strict && dataSet.UsedSources.Length == 0) + throw new InvalidOperationException("links-validate strict mode failed: no link source files were found."); + + var validation = WebLinkService.Validate(dataSet); + var baseline = WebLinkCommandSupport.EvaluateBaseline(baseDir, baselinePath, validation, baselineGenerate, baselineUpdate, failOnNewWarnings); + var failOnNewWarningsActive = failOnNewWarnings && !baselineGenerate && !baselineUpdate; + var success = validation.ErrorCount == 0 && + (!failOnWarnings || validation.WarningCount == 0) && + (!failOnNewWarningsActive || (baseline.Loaded && baseline.NewWarnings.Length == 0)); + if (baseline.ShouldWrite) + baseline.WrittenPath = WebVerifyBaselineStore.Write(baseDir, baseline.Path, baseline.CurrentWarningKeys, baseline.Merge, logger: null); + + WebLinkCommandSupport.WriteSummary(summaryPath, "validate", dataSet, validation, taskSuccess: success, export: null, baseline); + WebLinkCommandSupport.WriteIssueReport(reportPath, validation); + WebLinkCommandSupport.WriteDuplicateReport(duplicateReportPath, validation); + + stepResult.Success = success; + stepResult.Message = success + ? WebLinkCommandSupport.BuildValidateSuccessMessage(validation, baseline) + : WebLinkCommandSupport.BuildValidateFailureMessage(validation, baseline, failOnNewWarningsActive); + } + + private static void ExecuteLinksExportApache(JsonElement step, string baseDir, WebPipelineStepResult stepResult) + { + var summaryPath = ResolvePath(baseDir, GetString(step, "summaryPath") ?? GetString(step, "summary-path")); + var reportPath = ResolvePath(baseDir, GetString(step, "reportPath") ?? GetString(step, "report-path")); + var duplicateReportPath = ResolvePath(baseDir, GetString(step, "duplicateReportPath") ?? GetString(step, "duplicate-report-path")); + var strict = GetBool(step, "strict") ?? false; + var skipValidation = GetBool(step, "skipValidation") ?? GetBool(step, "skip-validation") ?? false; + var includeHeader = GetBool(step, "includeHeader") ?? GetBool(step, "include-header") ?? true; + var include404 = GetBool(step, "includeErrorDocument404") ?? GetBool(step, "include-error-document-404") ?? false; + + var linkOptions = BuildLinkLoadOptions(step, baseDir); + var dataSet = WebLinkService.Load(linkOptions); + if (strict && dataSet.UsedSources.Length == 0) + throw new InvalidOperationException("links-export-apache strict mode failed: no link source files were found."); + + var validation = WebLinkService.Validate(dataSet); + if (!skipValidation && validation.ErrorCount > 0) + { + WebLinkCommandSupport.WriteSummary(summaryPath, "export-apache", dataSet, validation, taskSuccess: false, export: null, baseline: null); + WebLinkCommandSupport.WriteIssueReport(reportPath, validation); + WebLinkCommandSupport.WriteDuplicateReport(duplicateReportPath, validation); + stepResult.Success = false; + stepResult.Message = $"links-export-apache failed validation: errors={validation.ErrorCount}; warnings={validation.WarningCount}"; + return; + } + + var outputPath = ResolvePath(baseDir, + GetString(step, "out") ?? + GetString(step, "output") ?? + GetString(step, "outputPath") ?? + GetString(step, "output-path") ?? + GetString(step, "apacheOut") ?? + GetString(step, "apache-out")) ?? + ResolvePath(baseDir, LoadLinksSpec(step, baseDir).Spec?.ApacheOut) ?? + Path.GetFullPath(Path.Combine(baseDir, "deploy", "apache", "link-service-redirects.conf")); + + var export = WebLinkService.ExportApache(dataSet, new WebLinkApacheExportOptions + { + OutputPath = outputPath, + IncludeHeader = includeHeader, + IncludeErrorDocument404 = include404, + Hosts = linkOptions.Hosts, + LanguageRootHosts = linkOptions.LanguageRootHosts + }); + + WebLinkCommandSupport.WriteSummary(summaryPath, "export-apache", dataSet, validation, taskSuccess: true, export, baseline: null); + WebLinkCommandSupport.WriteIssueReport(reportPath, validation); + WebLinkCommandSupport.WriteDuplicateReport(duplicateReportPath, validation); + + stepResult.Success = true; + stepResult.Message = $"links-export-apache ok: rules={export.RuleCount}; redirects={validation.RedirectCount}; shortlinks={validation.ShortlinkCount}; warnings={validation.WarningCount}"; + } + + private static void ExecuteLinksImportWordPress(JsonElement step, string baseDir, WebPipelineStepResult stepResult) + { + var loaded = LoadLinksSpec(step, baseDir); + var links = loaded.Spec; + var linkBaseDir = loaded.BaseDir ?? baseDir; + var sourcePathValue = GetString(step, "source") ?? + GetString(step, "csv") ?? + GetString(step, "input") ?? + GetString(step, "in"); + var sourcePath = ResolvePath(baseDir, sourcePathValue); + if (string.IsNullOrWhiteSpace(sourcePath)) + throw new InvalidOperationException("links-import-wordpress requires source."); + + var outputPath = ResolvePath(baseDir, + GetString(step, "out") ?? + GetString(step, "output") ?? + GetString(step, "outputPath") ?? + GetString(step, "output-path") ?? + GetString(step, "shortlinks") ?? + GetString(step, "shortlinksPath") ?? + GetString(step, "shortlinks-path")) ?? + ResolvePath(linkBaseDir, links?.Shortlinks); + if (string.IsNullOrWhiteSpace(outputPath)) + throw new InvalidOperationException("links-import-wordpress requires out or links.shortlinks config."); + + var hosts = BuildLinksHostMap(step, links); + var host = GetString(step, "host"); + if (string.IsNullOrWhiteSpace(host) && hosts.TryGetValue("short", out var shortHost)) + host = shortHost; + + var result = WebLinkService.ImportPrettyLinks(new WebLinkShortlinkImportOptions + { + SourcePath = sourcePath, + SourceOriginPath = sourcePathValue, + OutputPath = outputPath, + Host = host, + PathPrefix = GetString(step, "pathPrefix") ?? GetString(step, "path-prefix"), + Owner = GetString(step, "owner"), + Tags = GetArrayOfStrings(step, "tags") ?? GetArrayOfStrings(step, "tag") ?? Array.Empty(), + Status = GetInt(step, "status") ?? 302, + AllowExternal = !(GetBool(step, "allowExternal") == false || GetBool(step, "allow-external") == false), + MergeWithExisting = !(GetBool(step, "merge") == false || GetBool(step, "mergeWithExisting") == false || GetBool(step, "merge-with-existing") == false), + ReplaceExisting = GetBool(step, "replaceExisting") ?? GetBool(step, "replace-existing") ?? false + }); + + var summaryPath = ResolvePath(baseDir, GetString(step, "summaryPath") ?? GetString(step, "summary-path")); + WriteLinksImportSummary(summaryPath, result); + + stepResult.Success = true; + stepResult.Message = $"links-import-wordpress ok: imported={result.ImportedCount}; written={result.WrittenCount}; skippedDuplicates={result.SkippedDuplicateCount}"; + } + + private static void ExecuteLinksReport404(JsonElement step, string baseDir, WebPipelineStepResult stepResult) + { + var loaded = LoadLinksSpec(step, baseDir); + var links = loaded.Spec; + var linkBaseDir = loaded.BaseDir ?? baseDir; + var siteRoot = ResolvePath(baseDir, + GetString(step, "siteRoot") ?? + GetString(step, "site-root") ?? + GetString(step, "outRoot") ?? + GetString(step, "out-root")) ?? + Path.GetFullPath(Path.Combine(baseDir, "_site")); + var sourcePath = ResolvePath(baseDir, + GetString(step, "source") ?? + GetString(step, "log") ?? + GetString(step, "input") ?? + GetString(step, "in")); + var reportPath = ResolvePath(baseDir, + GetString(step, "out") ?? + GetString(step, "output") ?? + GetString(step, "reportPath") ?? + GetString(step, "report-path")) ?? + Path.GetFullPath(Path.Combine(baseDir, "Build", "link-reports", "404-suggestions.json")); + var reviewCsvPath = ResolvePath(baseDir, + GetString(step, "reviewCsv") ?? + GetString(step, "review-csv") ?? + GetString(step, "reviewCsvPath") ?? + GetString(step, "review-csv-path") ?? + GetString(step, "csvReport") ?? + GetString(step, "csv-report")); + var ignored404Path = ResolvePath(baseDir, + GetString(step, "ignored404") ?? + GetString(step, "ignored-404") ?? + GetString(step, "ignored404Path") ?? + GetString(step, "ignored-404-path")) ?? + ResolvePath(linkBaseDir, links?.Ignored404); + + var result = WebLinkService.Generate404Report(new WebLink404ReportOptions + { + SiteRoot = siteRoot, + SourcePath = sourcePath, + Ignored404Path = ignored404Path, + AllowMissingSource = GetBool(step, "allowMissingSource") ?? GetBool(step, "allow-missing-source") ?? false, + MaxSuggestions = GetInt(step, "maxSuggestions") ?? GetInt(step, "max-suggestions") ?? 3, + MinimumScore = GetDouble(step, "minimumScore") ?? GetDouble(step, "minimum-score") ?? GetDouble(step, "minScore") ?? GetDouble(step, "min-score") ?? 0.35d, + IncludeAsset404s = GetBool(step, "includeAsset404s") ?? GetBool(step, "include-asset-404s") ?? GetBool(step, "includeAssets") ?? GetBool(step, "include-assets") ?? false + }); + + WriteLinks404Report(reportPath, result); + WebLinkCommandSupport.Write404SuggestionReviewCsv(reviewCsvPath, result); + + stepResult.Success = true; + stepResult.Message = $"links-report-404 ok: observations={result.ObservationCount}; ignored={result.IgnoredObservationCount}; suggested={result.SuggestedObservationCount}; routes={result.RouteCount}"; + } + + private static void ExecuteLinksPromote404(JsonElement step, string baseDir, WebPipelineStepResult stepResult) + { + var loaded = LoadLinksSpec(step, baseDir); + var links = loaded.Spec; + var linkBaseDir = loaded.BaseDir ?? baseDir; + var sourcePath = ResolvePath(baseDir, + GetString(step, "source") ?? + GetString(step, "report") ?? + GetString(step, "input") ?? + GetString(step, "in")); + if (string.IsNullOrWhiteSpace(sourcePath)) + throw new InvalidOperationException("links-promote-404 requires source."); + + var outputPath = ResolvePath(baseDir, + GetString(step, "out") ?? + GetString(step, "output") ?? + GetString(step, "outputPath") ?? + GetString(step, "output-path") ?? + GetString(step, "redirects") ?? + GetString(step, "redirectsPath") ?? + GetString(step, "redirects-path")) ?? + ResolvePath(linkBaseDir, links?.Redirects); + if (string.IsNullOrWhiteSpace(outputPath)) + throw new InvalidOperationException("links-promote-404 requires out or links.redirects config."); + var reviewCsvPath = ResolvePath(baseDir, + GetString(step, "reviewCsv") ?? + GetString(step, "review-csv") ?? + GetString(step, "reviewCsvPath") ?? + GetString(step, "review-csv-path") ?? + GetString(step, "csvReport") ?? + GetString(step, "csv-report")); + + var result = WebLinkService.Promote404Suggestions(new WebLink404PromoteOptions + { + SourcePath = sourcePath, + OutputPath = outputPath, + Enabled = GetBool(step, "enabled") ?? GetBool(step, "enable") ?? false, + MinimumScore = GetDouble(step, "minimumScore") ?? GetDouble(step, "minimum-score") ?? GetDouble(step, "minScore") ?? GetDouble(step, "min-score") ?? 0.35d, + MinimumCount = GetInt(step, "minimumCount") ?? GetInt(step, "minimum-count") ?? GetInt(step, "minCount") ?? GetInt(step, "min-count") ?? 1, + Status = GetInt(step, "status") ?? 301, + Group = GetString(step, "group"), + MergeWithExisting = !(GetBool(step, "merge") == false || GetBool(step, "mergeWithExisting") == false || GetBool(step, "merge-with-existing") == false), + ReplaceExisting = GetBool(step, "replaceExisting") ?? GetBool(step, "replace-existing") ?? false + }); + + var summaryPath = ResolvePath(baseDir, GetString(step, "summaryPath") ?? GetString(step, "summary-path")); + WriteLinksPromoteSummary(summaryPath, result); + WebLinkCommandSupport.WriteRedirectReviewCsv(reviewCsvPath, result.OutputPath); + + stepResult.Success = true; + stepResult.Message = $"links-promote-404 ok: candidates={result.CandidateCount}; written={result.WrittenCount}; skippedDuplicates={result.SkippedDuplicateCount}"; + } + + private static void ExecuteLinksIgnore404(JsonElement step, string baseDir, WebPipelineStepResult stepResult) + { + var loaded = LoadLinksSpec(step, baseDir); + var links = loaded.Spec; + var linkBaseDir = loaded.BaseDir ?? baseDir; + var sourcePath = ResolvePath(baseDir, + GetString(step, "source") ?? + GetString(step, "report") ?? + GetString(step, "input") ?? + GetString(step, "in")); + if (string.IsNullOrWhiteSpace(sourcePath)) + throw new InvalidOperationException("links-ignore-404 requires source."); + + var outputPath = ResolvePath(baseDir, + GetString(step, "out") ?? + GetString(step, "output") ?? + GetString(step, "outputPath") ?? + GetString(step, "output-path") ?? + GetString(step, "ignored404") ?? + GetString(step, "ignored-404") ?? + GetString(step, "ignored404Path") ?? + GetString(step, "ignored-404-path")) ?? + ResolvePath(linkBaseDir, links?.Ignored404); + if (string.IsNullOrWhiteSpace(outputPath)) + throw new InvalidOperationException("links-ignore-404 requires out or links.ignored404 config."); + var reviewCsvPath = ResolvePath(baseDir, + GetString(step, "reviewCsv") ?? + GetString(step, "review-csv") ?? + GetString(step, "reviewCsvPath") ?? + GetString(step, "review-csv-path") ?? + GetString(step, "csvReport") ?? + GetString(step, "csv-report")); + + var paths = GetArrayOfStrings(step, "paths") ?? GetArrayOfStrings(step, "path") ?? Array.Empty(); + var includeAll = GetBool(step, "all") ?? false; + var onlyWithoutSuggestions = GetBool(step, "withoutSuggestions") ?? GetBool(step, "without-suggestions") ?? false; + if (paths.Length == 0 && !includeAll && !onlyWithoutSuggestions) + throw new InvalidOperationException("links-ignore-404 requires paths, all:true, or withoutSuggestions:true."); + + var result = WebLinkService.Ignore404Suggestions(new WebLink404IgnoreOptions + { + SourcePath = sourcePath, + OutputPath = outputPath, + Paths = paths, + IncludeAll = includeAll, + OnlyWithoutSuggestions = onlyWithoutSuggestions, + Reason = GetString(step, "reason"), + CreatedBy = GetString(step, "createdBy") ?? GetString(step, "created-by"), + MergeWithExisting = !(GetBool(step, "merge") == false || GetBool(step, "mergeWithExisting") == false || GetBool(step, "merge-with-existing") == false), + ReplaceExisting = GetBool(step, "replaceExisting") ?? GetBool(step, "replace-existing") ?? false + }); + + var summaryPath = ResolvePath(baseDir, GetString(step, "summaryPath") ?? GetString(step, "summary-path")); + WriteLinksIgnoreSummary(summaryPath, result); + WebLinkCommandSupport.WriteIgnored404ReviewCsv(reviewCsvPath, result.OutputPath); + + stepResult.Success = true; + stepResult.Message = $"links-ignore-404 ok: candidates={result.CandidateCount}; written={result.WrittenCount}; skippedDuplicates={result.SkippedDuplicateCount}"; + } + + private static WebLinkLoadOptions BuildLinkLoadOptions(JsonElement step, string baseDir) + { + var loaded = LoadLinksSpec(step, baseDir); + var links = loaded.Spec; + var linkBaseDir = loaded.BaseDir ?? baseDir; + + var redirectsPath = ResolvePathForLinks(baseDir, linkBaseDir, + GetString(step, "redirects") ?? + GetString(step, "redirectsPath") ?? + GetString(step, "redirects-path"), + links?.Redirects); + + var shortlinksPath = ResolvePathForLinks(baseDir, linkBaseDir, + GetString(step, "shortlinks") ?? + GetString(step, "shortlinksPath") ?? + GetString(step, "shortlinks-path"), + links?.Shortlinks); + + var csvSources = GetArrayOfStrings(step, "sources") ?? + GetArrayOfStrings(step, "redirectCsvPaths") ?? + GetArrayOfStrings(step, "redirect-csv-paths") ?? + GetArrayOfStrings(step, "csvSources") ?? + GetArrayOfStrings(step, "csv-sources"); + + var csvPaths = new List(); + if (csvSources is { Length: > 0 }) + { + foreach (var value in csvSources.Where(static value => !string.IsNullOrWhiteSpace(value))) + { + var resolved = ResolvePath(baseDir, value); + if (!string.IsNullOrWhiteSpace(resolved)) + csvPaths.Add(resolved); + } + } + else if (links?.RedirectCsvPaths is { Length: > 0 }) + { + foreach (var value in links.RedirectCsvPaths.Where(static value => !string.IsNullOrWhiteSpace(value))) + { + var resolved = ResolvePath(linkBaseDir, value); + if (!string.IsNullOrWhiteSpace(resolved)) + csvPaths.Add(resolved); + } + } + + return new WebLinkLoadOptions + { + RedirectsPath = redirectsPath, + ShortlinksPath = shortlinksPath, + RedirectCsvPaths = csvPaths.Distinct(StringComparer.OrdinalIgnoreCase).ToArray(), + Hosts = BuildLinksHostMap(step, links), + LanguageRootHosts = BuildLinksLanguageRootHostMap(step, links) + }; + } + + private static (LinkServiceSpec? Spec, string? BaseDir) LoadLinksSpec(JsonElement step, string baseDir) + { + var configPath = ResolvePath(baseDir, GetString(step, "config")); + if (string.IsNullOrWhiteSpace(configPath) || !File.Exists(configPath)) + return (null, null); + + var (siteSpec, siteSpecPath) = WebSiteSpecLoader.LoadWithPath(configPath, WebCliJson.Options); + return (siteSpec.Links, Path.GetDirectoryName(siteSpecPath) ?? baseDir); + } + + private static string? ResolvePathForLinks(string stepBaseDir, string configBaseDir, string? stepValue, string? configValue) + { + if (!string.IsNullOrWhiteSpace(stepValue)) + return ResolvePath(stepBaseDir, stepValue); + return string.IsNullOrWhiteSpace(configValue) ? null : ResolvePath(configBaseDir, configValue); + } + + private static IReadOnlyDictionary BuildLinksHostMap(JsonElement step, LinkServiceSpec? links) + { + var hosts = new Dictionary(StringComparer.OrdinalIgnoreCase); + if (links?.Hosts is not null) + { + foreach (var pair in links.Hosts) + { + if (!string.IsNullOrWhiteSpace(pair.Key) && !string.IsNullOrWhiteSpace(pair.Value)) + hosts[pair.Key] = pair.Value; + } + } + + if ((step.TryGetProperty("hosts", out var hostsElement) || + step.TryGetProperty("hostMap", out hostsElement) || + step.TryGetProperty("host-map", out hostsElement)) && + hostsElement.ValueKind == JsonValueKind.Object) + { + foreach (var property in hostsElement.EnumerateObject()) + { + var value = property.Value.ValueKind == JsonValueKind.String + ? property.Value.GetString() + : property.Value.ToString(); + if (!string.IsNullOrWhiteSpace(property.Name) && !string.IsNullOrWhiteSpace(value)) + hosts[property.Name] = value.Trim(); + } + } + + return hosts; + } + + private static IReadOnlyDictionary BuildLinksLanguageRootHostMap(JsonElement step, LinkServiceSpec? links) + { + var hosts = new Dictionary(StringComparer.OrdinalIgnoreCase); + if (links?.LanguageRootHosts is not null) + { + foreach (var pair in links.LanguageRootHosts) + { + if (!string.IsNullOrWhiteSpace(pair.Key) && !string.IsNullOrWhiteSpace(pair.Value)) + hosts[pair.Key.Trim()] = pair.Value.Trim().Trim('/'); + } + } + + if ((step.TryGetProperty("languageRootHosts", out var hostsElement) || + step.TryGetProperty("language-root-hosts", out hostsElement)) && + hostsElement.ValueKind == JsonValueKind.Object) + { + foreach (var property in hostsElement.EnumerateObject()) + { + var value = property.Value.ValueKind == JsonValueKind.String + ? property.Value.GetString() + : property.Value.ToString(); + if (!string.IsNullOrWhiteSpace(property.Name) && !string.IsNullOrWhiteSpace(value)) + hosts[property.Name.Trim()] = value.Trim().Trim('/'); + } + } + + return hosts; + } + + private static void WriteLinksImportSummary(string? summaryPath, WebLinkShortlinkImportResult result) + { + if (string.IsNullOrWhiteSpace(summaryPath)) + return; + + var summaryDirectory = Path.GetDirectoryName(summaryPath); + if (!string.IsNullOrWhiteSpace(summaryDirectory)) + Directory.CreateDirectory(summaryDirectory); + + File.WriteAllText(summaryPath, JsonSerializer.Serialize(result, LinksImportJsonOptions)); + } + + private static void WriteLinks404Report(string reportPath, WebLink404ReportResult result) + { + var directory = Path.GetDirectoryName(reportPath); + if (!string.IsNullOrWhiteSpace(directory)) + Directory.CreateDirectory(directory); + + File.WriteAllText(reportPath, JsonSerializer.Serialize(result, LinksImportJsonOptions)); + } + + private static void WriteLinksPromoteSummary(string? summaryPath, WebLink404PromoteResult result) + { + if (string.IsNullOrWhiteSpace(summaryPath)) + return; + + var summaryDirectory = Path.GetDirectoryName(summaryPath); + if (!string.IsNullOrWhiteSpace(summaryDirectory)) + Directory.CreateDirectory(summaryDirectory); + + File.WriteAllText(summaryPath, JsonSerializer.Serialize(result, LinksImportJsonOptions)); + } + + private static void WriteLinksIgnoreSummary(string? summaryPath, WebLink404IgnoreResult result) + { + if (string.IsNullOrWhiteSpace(summaryPath)) + return; + + var summaryDirectory = Path.GetDirectoryName(summaryPath); + if (!string.IsNullOrWhiteSpace(summaryDirectory)) + Directory.CreateDirectory(summaryDirectory); + + File.WriteAllText(summaryPath, JsonSerializer.Serialize(result, LinksImportJsonOptions)); + } + + private static readonly JsonSerializerOptions LinksImportJsonOptions = new() + { + PropertyNamingPolicy = JsonNamingPolicy.CamelCase, + PropertyNameCaseInsensitive = true, + DefaultIgnoreCondition = JsonIgnoreCondition.WhenWritingNull, + WriteIndented = true, + Converters = { new JsonStringEnumConverter(JsonNamingPolicy.CamelCase) } + }; +} diff --git a/PowerForge.Web.Cli/WebPipelineRunner.Tasks.cs b/PowerForge.Web.Cli/WebPipelineRunner.Tasks.cs index 4094558d..378d8146 100644 --- a/PowerForge.Web.Cli/WebPipelineRunner.Tasks.cs +++ b/PowerForge.Web.Cli/WebPipelineRunner.Tasks.cs @@ -153,6 +153,37 @@ private static void ExecuteTask( case "apache-redirect": ExecuteApacheRedirects(step, baseDir, stepResult); break; + case "links-validate": + case "link-validate": + case "links": + ExecuteLinksValidate(step, baseDir, stepResult); + break; + case "links-export-apache": + case "link-export-apache": + case "links-export": + ExecuteLinksExportApache(step, baseDir, stepResult); + break; + case "links-import-wordpress": + case "link-import-wordpress": + case "links-import-pretty-links": + case "links-import": + ExecuteLinksImportWordPress(step, baseDir, stepResult); + break; + case "links-report-404": + case "link-report-404": + case "links-report": + ExecuteLinksReport404(step, baseDir, stepResult); + break; + case "links-promote-404": + case "link-promote-404": + case "links-promote": + ExecuteLinksPromote404(step, baseDir, stepResult); + break; + case "links-ignore-404": + case "link-ignore-404": + case "links-ignore": + ExecuteLinksIgnore404(step, baseDir, stepResult); + break; case "wordpress-normalize": case "wordpress-normalize-content": case "normalize-wordpress-content": diff --git a/PowerForge.Web/Models/LinksSpec.cs b/PowerForge.Web/Models/LinksSpec.cs new file mode 100644 index 00000000..ec8d7a1c --- /dev/null +++ b/PowerForge.Web/Models/LinksSpec.cs @@ -0,0 +1,229 @@ +using System; +using System.Collections.Generic; + +namespace PowerForge.Web; + +/// Configures reusable redirect, shortlink, and 404 workflow data for a site. +public sealed class LinkServiceSpec +{ + /// Path to committed redirect rules JSON. + public string? Redirects { get; set; } + /// Path to committed shortlink rules JSON. + public string? Shortlinks { get; set; } + /// Path to committed ignored 404 JSON. + public string? Ignored404 { get; set; } + /// Path to committed link group metadata JSON. + public string? Groups { get; set; } + /// Compatibility CSV inputs such as legacy WordPress redirect maps. + public string[] RedirectCsvPaths { get; set; } = Array.Empty(); + /// Apache include output path. + public string? ApacheOut { get; set; } + /// Named host aliases, for example en, pl, or short. + public Dictionary Hosts { get; set; } = new(StringComparer.OrdinalIgnoreCase); + /// Host-to-language-prefix map for domains where a language is deployed at the web root, for example evotec.pl => pl. + public Dictionary LanguageRootHosts { get; set; } = new(StringComparer.OrdinalIgnoreCase); +} + +/// Match strategy for link-service redirects. +public enum LinkRedirectMatchType +{ + /// Match one normalized path, allowing an optional trailing slash. + Exact, + /// Match a path prefix and optionally carry the suffix into the target. + Prefix, + /// Match using a host runtime regular expression. + Regex, + /// Match a path plus query string condition. + Query +} + +/// Defines an engine-owned redirect rule for static export or dynamic serving. +public sealed class LinkRedirectRule +{ + /// Stable identifier used in reports and admin workflows. + public string Id { get; set; } = string.Empty; + /// When false, the rule is ignored by validation/export. + public bool Enabled { get; set; } = true; + /// Optional host scope such as evotec.xyz, evotec.pl, evo.yt, or *. + public string? SourceHost { get; set; } + /// Source path or regex pattern, normalized as a root-relative path for non-regex rules. + public string SourcePath { get; set; } = string.Empty; + /// Optional exact query string match without a leading question mark. + public string? SourceQuery { get; set; } + /// Source matching strategy. + public LinkRedirectMatchType MatchType { get; set; } = LinkRedirectMatchType.Exact; + /// Absolute URL or root-relative path target; optional only for 410 Gone rules. + public string TargetUrl { get; set; } = string.Empty; + /// HTTP status code to emit. + public int Status { get; set; } = 301; + /// When true, preserve the incoming query string if the rule does not already match a source query. + public bool PreserveQuery { get; set; } + /// Ordering hint; higher priority rules are emitted earlier. + public int Priority { get; set; } + /// Logical grouping, for example legacy-wordpress, amp, manual, or campaign. + public string? Group { get; set; } + /// Origin of the rule, for example manual, generated, imported-wordpress, or 404-promoted. + public string? Source { get; set; } + /// Human-readable review note. + public string? Notes { get; set; } + /// Allows redirects to absolute external HTTP/HTTPS targets. + public bool AllowExternal { get; set; } + /// Creation timestamp, preferably ISO 8601. + public string? CreatedAt { get; set; } + /// Update timestamp, preferably ISO 8601. + public string? UpdatedAt { get; set; } + /// Creator identifier for audit/review workflows. + public string? CreatedBy { get; set; } + /// Updater identifier for audit/review workflows. + public string? UpdatedBy { get; set; } + /// Optional expiry timestamp for temporary redirects. + public string? ExpiresAt { get; set; } + /// Resolved source file for imported/generated diagnostics. + public string? OriginPath { get; set; } + /// One-based source line for imported/generated diagnostics. + public int OriginLine { get; set; } +} + +/// Defines a reusable branded shortlink. +public sealed class LinkShortlinkRule +{ + /// URL-safe short slug. + public string Slug { get; set; } = string.Empty; + /// Optional host scope, such as evo.yt. + public string? Host { get; set; } + /// Optional path prefix; defaults to /go unless the host is configured as the short host. + public string? PathPrefix { get; set; } + /// Explicit shortlink destination. + public string TargetUrl { get; set; } = string.Empty; + /// HTTP status code; campaign/share links usually use 302. + public int Status { get; set; } = 302; + /// Friendly label for admin/search reports. + public string? Title { get; set; } + /// Optional context for maintainers. + public string? Description { get; set; } + /// Optional grouping tags. + public string[] Tags { get; set; } = Array.Empty(); + /// Owning person, team, or project. + public string? Owner { get; set; } + /// Optional UTM query template appended during export. + public string? Utm { get; set; } + /// Origin of the rule, for example manual, imported-pretty-links, or campaign. + public string? Source { get; set; } + /// Human-readable review note. + public string? Notes { get; set; } + /// Imported historical hit/click count from a previous shortlink system. + public int ImportedHits { get; set; } + /// When false, the shortlink is ignored by validation/export. + public bool Enabled { get; set; } = true; + /// Allows absolute external HTTP/HTTPS targets. + public bool AllowExternal { get; set; } + /// Creation timestamp, preferably ISO 8601. + public string? CreatedAt { get; set; } + /// Update timestamp, preferably ISO 8601. + public string? UpdatedAt { get; set; } + /// Last target health-check timestamp, preferably ISO 8601. + public string? LastCheckedAt { get; set; } + /// Resolved source file for imported/generated diagnostics. + public string? OriginPath { get; set; } + /// One-based source line for imported/generated diagnostics. + public int OriginLine { get; set; } +} + +/// Defines a 404 observation that should be ignored by review workflows. +public sealed class Ignored404Rule +{ + /// Ignored path or pattern. + public string Path { get; set; } = string.Empty; + /// Optional host scope. + public string? Host { get; set; } + /// Reason this 404 should not create review noise. + public string? Reason { get; set; } + /// Creation timestamp, preferably ISO 8601. + public string? CreatedAt { get; set; } + /// Creator identifier for review workflows. + public string? CreatedBy { get; set; } +} + +/// Metadata for grouping redirect and shortlink records. +public sealed class LinkGroupSpec +{ + /// Stable group identifier. + public string Id { get; set; } = string.Empty; + /// Human-readable group title. + public string? Title { get; set; } + /// Optional group description. + public string? Description { get; set; } + /// Owning person, team, or project. + public string? Owner { get; set; } +} + +/// Severity for link-service validation issues. +public enum LinkValidationSeverity +{ + /// Informational issue. + Info, + /// Reviewable warning. + Warning, + /// Blocking validation error. + Error +} + +/// One validation issue produced by the link service. +public sealed class LinkValidationIssue +{ + /// Issue severity. + public LinkValidationSeverity Severity { get; set; } + /// Stable diagnostic code. + public string Code { get; set; } = string.Empty; + /// Human-readable issue message. + public string Message { get; set; } = string.Empty; + /// Rule source, such as redirect or shortlink. + public string? Source { get; set; } + /// Rule identifier or slug. + public string? Id { get; set; } + /// Related rule identifier, used for conflicts. + public string? RelatedId { get; set; } + /// Source host involved in the issue. + public string? SourceHost { get; set; } + /// Source path involved in the issue. + public string? SourcePath { get; set; } + /// Source query involved in the issue. + public string? SourceQuery { get; set; } + /// Target URL involved in the issue. + public string? TargetUrl { get; set; } + /// Related target URL, used for conflicts. + public string? RelatedTargetUrl { get; set; } + /// Normalized target URL used for duplicate comparison. + public string? NormalizedTargetUrl { get; set; } + /// Related normalized target URL used for duplicate comparison. + public string? RelatedNormalizedTargetUrl { get; set; } + /// HTTP status involved in the issue. + public int Status { get; set; } + /// Related HTTP status involved in the issue. + public int RelatedStatus { get; set; } + /// Resolved source file for diagnostics. + public string? OriginPath { get; set; } + /// One-based source line for diagnostics. + public int OriginLine { get; set; } + /// Related source file for diagnostics. + public string? RelatedOriginPath { get; set; } + /// Related one-based source line for diagnostics. + public int RelatedOriginLine { get; set; } +} + +/// Validation result for a link-service data set. +public sealed class LinkValidationResult +{ + /// Validation issues. + public LinkValidationIssue[] Issues { get; set; } = Array.Empty(); + /// Enabled redirect count. + public int RedirectCount { get; set; } + /// Enabled shortlink count. + public int ShortlinkCount { get; set; } + /// Error count. + public int ErrorCount { get; set; } + /// Warning count. + public int WarningCount { get; set; } + /// True when no errors were found. + public bool Success => ErrorCount == 0; +} diff --git a/PowerForge.Web/Models/SiteSpec.cs b/PowerForge.Web/Models/SiteSpec.cs index 466e6e47..682c70a4 100644 --- a/PowerForge.Web/Models/SiteSpec.cs +++ b/PowerForge.Web/Models/SiteSpec.cs @@ -68,6 +68,8 @@ public sealed class SiteSpec public RedirectSpec[] RouteOverrides { get; set; } = Array.Empty(); /// Redirect rules for legacy URLs. public RedirectSpec[] Redirects { get; set; } = Array.Empty(); + /// Reusable redirect, shortlink, and 404 workflow configuration. + public LinkServiceSpec? Links { get; set; } /// When enabled, emit legacy /amp redirects to canonical routes for generated pages. public bool EnableLegacyAmpRedirects { get; set; } diff --git a/PowerForge.Web/Services/WebLinkService.ApplyReview.cs b/PowerForge.Web/Services/WebLinkService.ApplyReview.cs new file mode 100644 index 00000000..0c7b04b1 --- /dev/null +++ b/PowerForge.Web/Services/WebLinkService.ApplyReview.cs @@ -0,0 +1,152 @@ +using System; +using System.Collections.Generic; +using System.IO; +using System.Linq; + +namespace PowerForge.Web; + +public static partial class WebLinkService +{ + /// Applies reviewed redirect and ignored-404 candidate files into committed link data. + public static WebLinkReviewApplyResult ApplyReviewCandidates(WebLinkReviewApplyOptions options) + { + ArgumentNullException.ThrowIfNull(options); + if (!options.ApplyRedirects && !options.ApplyIgnored404) + throw new ArgumentException("Choose at least one target: redirects or ignored404.", nameof(options)); + + var result = new WebLinkReviewApplyResult + { + DryRun = options.DryRun + }; + + if (options.ApplyRedirects) + result.Redirects = ApplyRedirectCandidates(options); + if (options.ApplyIgnored404) + result.Ignored404 = ApplyIgnored404Candidates(options); + + return result; + } + + private static WebLinkReviewApplySection ApplyRedirectCandidates(WebLinkReviewApplyOptions options) + { + if (string.IsNullOrWhiteSpace(options.RedirectCandidatesPath)) + throw new ArgumentException("RedirectCandidatesPath is required when applying redirects.", nameof(options)); + if (string.IsNullOrWhiteSpace(options.RedirectsPath)) + throw new ArgumentException("RedirectsPath is required when applying redirects.", nameof(options)); + + var candidatePath = Path.GetFullPath(options.RedirectCandidatesPath); + var targetPath = Path.GetFullPath(options.RedirectsPath); + var existing = File.Exists(targetPath) + ? ReadExistingRedirects(targetPath) + : new List(); + var candidates = File.Exists(candidatePath) + ? ReadExistingRedirects(candidatePath) + : new List(); + + if (options.EnableRedirects) + { + foreach (var candidate in candidates) + candidate.Enabled = true; + } + + var merged = MergeRedirectCandidates(existing, candidates, options.ReplaceExisting, out var skipped, out var replaced); + if (!options.DryRun) + WriteRedirectJson(targetPath, merged); + + return new WebLinkReviewApplySection + { + CandidatePath = candidatePath, + TargetPath = targetPath, + ExistingCount = existing.Count, + CandidateCount = candidates.Count, + WrittenCount = merged.Count, + SkippedDuplicateCount = skipped, + ReplacedCount = replaced + }; + } + + private static WebLinkReviewApplySection ApplyIgnored404Candidates(WebLinkReviewApplyOptions options) + { + if (string.IsNullOrWhiteSpace(options.Ignored404CandidatesPath)) + throw new ArgumentException("Ignored404CandidatesPath is required when applying ignored404.", nameof(options)); + if (string.IsNullOrWhiteSpace(options.Ignored404Path)) + throw new ArgumentException("Ignored404Path is required when applying ignored404.", nameof(options)); + + var candidatePath = Path.GetFullPath(options.Ignored404CandidatesPath); + var targetPath = Path.GetFullPath(options.Ignored404Path); + var existing = File.Exists(targetPath) + ? LoadIgnored404Rules(targetPath).ToList() + : new List(); + var candidates = File.Exists(candidatePath) + ? LoadIgnored404Rules(candidatePath).ToList() + : new List(); + + var merged = MergeIgnored404Rules(existing, candidates, options.ReplaceExisting, out var skipped, out var replaced); + if (!options.DryRun) + WriteIgnored404Json(targetPath, merged); + + return new WebLinkReviewApplySection + { + CandidatePath = candidatePath, + TargetPath = targetPath, + ExistingCount = existing.Count, + CandidateCount = candidates.Count, + WrittenCount = merged.Count, + SkippedDuplicateCount = skipped, + ReplacedCount = replaced + }; + } +} + +/// Options for applying reviewed link-service candidate files. +public sealed class WebLinkReviewApplyOptions +{ + /// Apply redirect candidates. + public bool ApplyRedirects { get; set; } + /// Apply ignored-404 candidates. + public bool ApplyIgnored404 { get; set; } + /// Candidate redirect JSON path. + public string? RedirectCandidatesPath { get; set; } + /// Committed redirect JSON path. + public string? RedirectsPath { get; set; } + /// Candidate ignored-404 JSON path. + public string? Ignored404CandidatesPath { get; set; } + /// Committed ignored-404 JSON path. + public string? Ignored404Path { get; set; } + /// Replace existing rows that have the same merge key. + public bool ReplaceExisting { get; set; } + /// Enable redirect candidates before writing them. + public bool EnableRedirects { get; set; } + /// Compute the merge result without writing target files. + public bool DryRun { get; set; } +} + +/// Result from applying reviewed link-service candidate files. +public sealed class WebLinkReviewApplyResult +{ + /// True when target files were not written. + public bool DryRun { get; set; } + /// Redirect merge summary. + public WebLinkReviewApplySection? Redirects { get; set; } + /// Ignored-404 merge summary. + public WebLinkReviewApplySection? Ignored404 { get; set; } +} + +/// Per-file merge summary for link review candidate application. +public sealed class WebLinkReviewApplySection +{ + /// Resolved candidate file path. + public string CandidatePath { get; set; } = string.Empty; + /// Resolved target file path. + public string TargetPath { get; set; } = string.Empty; + /// Rows loaded from the target before merge. + public int ExistingCount { get; set; } + /// Rows loaded from the candidate file. + public int CandidateCount { get; set; } + /// Rows in the merged output. + public int WrittenCount { get; set; } + /// Candidate rows skipped because an existing row used the same key. + public int SkippedDuplicateCount { get; set; } + /// Existing rows replaced because replacement was requested. + public int ReplacedCount { get; set; } +} diff --git a/PowerForge.Web/Services/WebLinkService.ExportApache.cs b/PowerForge.Web/Services/WebLinkService.ExportApache.cs new file mode 100644 index 00000000..dff50637 --- /dev/null +++ b/PowerForge.Web/Services/WebLinkService.ExportApache.cs @@ -0,0 +1,244 @@ +using System; +using System.Collections.Generic; +using System.IO; +using System.Linq; +using System.Text.RegularExpressions; + +namespace PowerForge.Web; + +/// Apache export operations for the PowerForge.Web link service. +public static partial class WebLinkService +{ + /// Exports enabled redirect and shortlink rules as an Apache rewrite include. + public static WebLinkApacheExportResult ExportApache(WebLinkDataSet dataSet, WebLinkApacheExportOptions options) + { + ArgumentNullException.ThrowIfNull(dataSet); + ArgumentNullException.ThrowIfNull(options); + + if (string.IsNullOrWhiteSpace(options.OutputPath)) + throw new InvalidOperationException("Apache export requires an output path."); + + var lines = new List(); + if (options.IncludeHeader) + { + lines.Add("# Auto-generated by PowerForge.Web link service"); + lines.Add("# Include this file from your Apache vhost before static file fallback."); + } + + if (options.IncludeErrorDocument404) + lines.Add("ErrorDocument 404 /404.html"); + + lines.Add("RewriteEngine On"); + lines.Add(string.Empty); + + var rules = BuildExportRedirects(dataSet, options) + .OrderByDescending(static rule => rule.Priority) + .ThenBy(static rule => MatchTypeOrder(rule.MatchType)) + .ThenBy(static rule => rule.SourceHost ?? string.Empty, StringComparer.OrdinalIgnoreCase) + .ThenBy(static rule => rule.SourcePath, StringComparer.OrdinalIgnoreCase) + .ThenBy(static rule => rule.SourceQuery ?? string.Empty, StringComparer.OrdinalIgnoreCase) + .ToArray(); + + var ruleCount = 0; + foreach (var rule in rules) + { + if (!TryAppendApacheRule(lines, rule, options.LanguageRootHosts)) + continue; + ruleCount++; + } + + var outputDirectory = Path.GetDirectoryName(options.OutputPath); + if (!string.IsNullOrWhiteSpace(outputDirectory)) + Directory.CreateDirectory(outputDirectory); + + File.WriteAllText(options.OutputPath, string.Join(Environment.NewLine, lines), Utf8NoBom); + + return new WebLinkApacheExportResult + { + OutputPath = Path.GetFullPath(options.OutputPath), + RuleCount = ruleCount + }; + } + + private static LinkRedirectRule[] BuildExportRedirects(WebLinkDataSet dataSet, WebLinkApacheExportOptions options) + { + var redirects = new List(); + redirects.AddRange(dataSet.Redirects.Where(static redirect => redirect is not null && redirect.Enabled)); + + foreach (var shortlink in dataSet.Shortlinks.Where(static item => item is not null && item.Enabled)) + { + redirects.Add(new LinkRedirectRule + { + Id = "shortlink:" + shortlink.Slug, + Enabled = true, + SourceHost = shortlink.Host, + SourcePath = NormalizeShortlinkPath(shortlink, options.Hosts), + MatchType = LinkRedirectMatchType.Exact, + TargetUrl = AppendUtm(shortlink.TargetUrl, shortlink.Utm), + Status = shortlink.Status <= 0 ? 302 : shortlink.Status, + PreserveQuery = false, + Priority = 1000, + Group = "shortlinks", + Source = "shortlink", + Notes = shortlink.Title, + AllowExternal = shortlink.AllowExternal + }); + } + + return redirects + .GroupBy(BuildRedirectKey, StringComparer.OrdinalIgnoreCase) + .Select(group => group + .OrderByDescending(static rule => rule.Priority) + .ThenBy(static rule => SourceRank(rule.Source)) + .ThenBy(static rule => string.IsNullOrWhiteSpace(rule.OriginPath) ? string.Empty : rule.OriginPath, StringComparer.OrdinalIgnoreCase) + .ThenBy(static rule => rule.OriginLine) + .ThenBy(rule => NormalizeTargetForCompare(rule.TargetUrl, rule.SourceHost, options.LanguageRootHosts), StringComparer.OrdinalIgnoreCase) + .First()) + .ToArray(); + } + + private static bool TryAppendApacheRule( + List lines, + LinkRedirectRule rule, + IReadOnlyDictionary? languageRootHosts) + { + if (rule.Status == 410) + { + if (!TryBuildApachePattern(rule, out var gonePattern, out _)) + return false; + + AppendHostCondition(lines, rule.SourceHost); + AppendQueryCondition(lines, rule.SourceQuery); + lines.Add($"RewriteRule {gonePattern} - [G,L]"); + lines.Add(string.Empty); + return true; + } + + if (!TryBuildApachePattern(rule, out var pattern, out var destination)) + return false; + + destination = NormalizeApacheDestination(destination, rule.SourceHost, languageRootHosts); + + AppendHostCondition(lines, rule.SourceHost); + AppendQueryCondition(lines, rule.SourceQuery); + + var hasSourceQuery = !string.IsNullOrWhiteSpace(rule.SourceQuery); + var flags = new List + { + $"R={ResolveStatus(rule.Status, defaultStatus: 301)}", + "L", + rule.PreserveQuery && !hasSourceQuery ? "QSA" : "QSD" + }; + lines.Add($"RewriteRule {pattern} {destination} [{string.Join(",", flags)}]"); + lines.Add(string.Empty); + return true; + } + + private static string NormalizeApacheDestination( + string destination, + string? sourceHost, + IReadOnlyDictionary? languageRootHosts) + { + if (string.IsNullOrWhiteSpace(destination) || + string.IsNullOrWhiteSpace(sourceHost) || + languageRootHosts is null || + !languageRootHosts.TryGetValue(sourceHost.Trim(), out var language) || + string.IsNullOrWhiteSpace(language)) + { + return destination; + } + + if (!IsHttpUrl(destination)) + return StripLanguageRootPrefix(destination, language); + + if (!Uri.TryCreate(destination, UriKind.Absolute, out var uri) || + !uri.Host.Equals(sourceHost.Trim(), StringComparison.OrdinalIgnoreCase)) + { + return destination; + } + + var strippedPath = StripLanguageRootPrefix(uri.AbsolutePath, language); + if (strippedPath.Equals(uri.AbsolutePath, StringComparison.Ordinal)) + return destination; + + var builder = new UriBuilder(uri) + { + Path = strippedPath + }; + return builder.Uri.ToString(); + } + + private static bool TryBuildApachePattern(LinkRedirectRule rule, out string pattern, out string destination) + { + pattern = string.Empty; + destination = string.Empty; + if (rule is null || string.IsNullOrWhiteSpace(rule.SourcePath)) + return false; + + switch (rule.MatchType) + { + case LinkRedirectMatchType.Prefix: + { + var prefix = NormalizeSourcePath(rule.SourcePath); + var starIndex = prefix.IndexOf('*'); + if (starIndex >= 0) + prefix = prefix.Substring(0, starIndex); + prefix = prefix.Trim('/'); + pattern = string.IsNullOrWhiteSpace(prefix) + ? "^(.*)$" + : $"^{Regex.Escape(prefix)}(?:/(.*))?$"; + destination = NormalizeDestination(rule.TargetUrl).Replace("{path}", "$1", StringComparison.OrdinalIgnoreCase); + return !string.IsNullOrWhiteSpace(destination); + } + case LinkRedirectMatchType.Regex: + { + var regex = rule.SourcePath.Trim(); + if (regex.StartsWith("/", StringComparison.Ordinal)) + regex = regex.TrimStart('/'); + pattern = regex.StartsWith("^", StringComparison.Ordinal) ? regex : "^" + regex; + destination = NormalizeDestination(rule.TargetUrl).Replace("{path}", "$1", StringComparison.OrdinalIgnoreCase); + return !string.IsNullOrWhiteSpace(destination); + } + case LinkRedirectMatchType.Query: + case LinkRedirectMatchType.Exact: + default: + { + var exact = NormalizeSourcePath(rule.SourcePath).Trim('/'); + pattern = string.IsNullOrWhiteSpace(exact) ? "^$" : $"^{Regex.Escape(exact)}/?$"; + destination = NormalizeDestination(rule.TargetUrl); + return rule.Status == 410 || !string.IsNullOrWhiteSpace(destination); + } + } + } + + private static void AppendHostCondition(List lines, string? host) + { + if (string.IsNullOrWhiteSpace(host) || host.Trim().Equals("*", StringComparison.Ordinal)) + return; + + lines.Add($"RewriteCond %{{HTTP_HOST}} ^(.+\\.)?{Regex.Escape(host.Trim())}$ [NC]"); + } + + private static void AppendQueryCondition(List lines, string? query) + { + if (string.IsNullOrWhiteSpace(query)) + return; + + var trimmed = query.Trim().TrimStart('?'); + var postMatch = LegacyPostIdRegex.Match("/?" + trimmed); + if (postMatch.Success) + { + lines.Add($"RewriteCond %{{QUERY_STRING}} (^|&)p={postMatch.Groups[1].Value}(&|$)"); + return; + } + + var pageMatch = LegacyPageIdRegex.Match("/?" + trimmed); + if (pageMatch.Success) + { + lines.Add($"RewriteCond %{{QUERY_STRING}} (^|&)page_id={pageMatch.Groups[1].Value}(&|$)"); + return; + } + + lines.Add($"RewriteCond %{{QUERY_STRING}} ^{Regex.Escape(trimmed)}$"); + } +} diff --git a/PowerForge.Web/Services/WebLinkService.Ignore404.cs b/PowerForge.Web/Services/WebLinkService.Ignore404.cs new file mode 100644 index 00000000..119d3777 --- /dev/null +++ b/PowerForge.Web/Services/WebLinkService.Ignore404.cs @@ -0,0 +1,184 @@ +using System; +using System.Collections.Generic; +using System.IO; +using System.Linq; +using System.Text.Json; + +namespace PowerForge.Web; + +public static partial class WebLinkService +{ + /// Adds reviewed 404 observations to an ignored-404 rules file. + public static WebLink404IgnoreResult Ignore404Suggestions(WebLink404IgnoreOptions options) + { + ArgumentNullException.ThrowIfNull(options); + if (string.IsNullOrWhiteSpace(options.SourcePath)) + throw new ArgumentException("SourcePath is required.", nameof(options)); + if (string.IsNullOrWhiteSpace(options.OutputPath)) + throw new ArgumentException("OutputPath is required.", nameof(options)); + + var sourcePath = Path.GetFullPath(options.SourcePath); + var outputPath = Path.GetFullPath(options.OutputPath); + if (!File.Exists(sourcePath)) + throw new FileNotFoundException($"404 suggestion report not found: {sourcePath}", sourcePath); + + var report = JsonSerializer.Deserialize(File.ReadAllText(sourcePath), WebJson.Options) + ?? new WebLink404ReportResult(); + var imported = BuildIgnored404Candidates(report, options).ToList(); + var existing = options.MergeWithExisting && File.Exists(outputPath) + ? LoadIgnored404Rules(outputPath).ToList() + : new List(); + + var existingCount = existing.Count; + var merged = MergeIgnored404Rules(existing, imported, options.ReplaceExisting, out var skippedDuplicates, out var replaced); + WriteIgnored404Json(outputPath, merged); + + return new WebLink404IgnoreResult + { + SourcePath = sourcePath, + OutputPath = outputPath, + ExistingCount = existingCount, + CandidateCount = imported.Count, + WrittenCount = merged.Count, + SkippedDuplicateCount = skippedDuplicates, + ReplacedCount = replaced + }; + } + + private static IEnumerable BuildIgnored404Candidates(WebLink404ReportResult report, WebLink404IgnoreOptions options) + { + var requestedPaths = (options.Paths ?? Array.Empty()) + .Select(NormalizeObservationPath) + .Where(static item => !string.IsNullOrWhiteSpace(item)) + .Distinct(StringComparer.OrdinalIgnoreCase) + .ToArray(); + + foreach (var suggestion in report.Suggestions ?? Array.Empty()) + { + var path = NormalizeObservationPath(suggestion.Path); + if (string.IsNullOrWhiteSpace(path)) + continue; + + var suggestions = suggestion.Suggestions ?? Array.Empty(); + var selected = options.IncludeAll || + (options.OnlyWithoutSuggestions && suggestions.Length == 0) || + requestedPaths.Contains(path, StringComparer.OrdinalIgnoreCase); + if (!selected) + continue; + + yield return new Ignored404Rule + { + Path = path, + Host = NullIfWhiteSpace(suggestion.Host), + Reason = string.IsNullOrWhiteSpace(options.Reason) + ? BuildDefaultIgnored404Reason(suggestion) + : options.Reason.Trim(), + CreatedAt = DateTimeOffset.UtcNow.ToString("O"), + CreatedBy = NullIfWhiteSpace(options.CreatedBy) + }; + } + } + + private static List MergeIgnored404Rules( + List existing, + List imported, + bool replaceExisting, + out int skippedCount, + out int replacedCount) + { + skippedCount = 0; + replacedCount = 0; + var merged = new List(); + var index = new Dictionary(StringComparer.OrdinalIgnoreCase); + + foreach (var rule in existing.Where(static item => item is not null)) + { + index[BuildIgnored404Key(rule)] = merged.Count; + merged.Add(rule); + } + + foreach (var rule in imported.Where(static item => item is not null)) + { + var key = BuildIgnored404Key(rule); + if (index.TryGetValue(key, out var existingIndex)) + { + if (replaceExisting) + { + merged[existingIndex] = rule; + replacedCount++; + } + else + skippedCount++; + continue; + } + + index[key] = merged.Count; + merged.Add(rule); + } + + return merged + .OrderBy(static item => item.Host ?? string.Empty, StringComparer.OrdinalIgnoreCase) + .ThenBy(static item => item.Path, StringComparer.OrdinalIgnoreCase) + .ToList(); + } + + private static void WriteIgnored404Json(string outputPath, IReadOnlyList ignored404) + { + var directory = Path.GetDirectoryName(outputPath); + if (!string.IsNullOrWhiteSpace(directory)) + Directory.CreateDirectory(directory); + + var payload = new { ignored404 }; + File.WriteAllText(outputPath, JsonSerializer.Serialize(payload, ShortlinkImportJsonOptions), Utf8NoBom); + } + + private static string BuildIgnored404Key(Ignored404Rule rule) + => string.Join("|", rule.Host ?? string.Empty, NormalizeObservationPath(rule.Path)); + + private static string BuildDefaultIgnored404Reason(WebLink404Suggestion suggestion) + => (suggestion.Suggestions ?? Array.Empty()).Length == 0 + ? "No generated route suggestion in 404 report." + : "Reviewed noisy 404 observation."; +} + +/// Options for adding 404 report observations to ignored-404 rules. +public sealed class WebLink404IgnoreOptions +{ + /// Source 404 suggestion report path. + public string SourcePath { get; set; } = string.Empty; + /// Output ignored-404 JSON path. + public string OutputPath { get; set; } = string.Empty; + /// Specific missing paths to ignore. + public string[] Paths { get; set; } = Array.Empty(); + /// When true, ignore every observation in the report. + public bool IncludeAll { get; set; } + /// When true, ignore only observations without suggestions. + public bool OnlyWithoutSuggestions { get; set; } + /// Reason stored on each ignored 404 rule. + public string? Reason { get; set; } + /// Creator identifier for review workflows. + public string? CreatedBy { get; set; } + /// Merge with existing ignored-404 JSON instead of replacing the file. + public bool MergeWithExisting { get; set; } = true; + /// Replace existing ignored rules with the same host/path key. + public bool ReplaceExisting { get; set; } +} + +/// Result from adding 404 observations to ignored-404 rules. +public sealed class WebLink404IgnoreResult +{ + /// Resolved source report path. + public string SourcePath { get; set; } = string.Empty; + /// Resolved output ignored-404 path. + public string OutputPath { get; set; } = string.Empty; + /// Existing ignored rules loaded from the output file. + public int ExistingCount { get; set; } + /// Ignored rules selected from the report. + public int CandidateCount { get; set; } + /// Total ignored rules written to the output file. + public int WrittenCount { get; set; } + /// Candidates skipped because an existing ignored rule had the same key. + public int SkippedDuplicateCount { get; set; } + /// Existing ignored rules replaced because replacement was requested. + public int ReplacedCount { get; set; } +} diff --git a/PowerForge.Web/Services/WebLinkService.Import.cs b/PowerForge.Web/Services/WebLinkService.Import.cs new file mode 100644 index 00000000..50e337bd --- /dev/null +++ b/PowerForge.Web/Services/WebLinkService.Import.cs @@ -0,0 +1,364 @@ +using System; +using System.Collections.Generic; +using System.Globalization; +using System.IO; +using System.Linq; +using System.Text.Json; +using System.Text.Json.Serialization; + +namespace PowerForge.Web; + +public static partial class WebLinkService +{ + /// Imports Pretty Links-style CSV exports into PowerForge shortlink JSON. + public static WebLinkShortlinkImportResult ImportPrettyLinks(WebLinkShortlinkImportOptions options) + { + ArgumentNullException.ThrowIfNull(options); + if (string.IsNullOrWhiteSpace(options.SourcePath)) + throw new ArgumentException("SourcePath is required.", nameof(options)); + if (string.IsNullOrWhiteSpace(options.OutputPath)) + throw new ArgumentException("OutputPath is required.", nameof(options)); + + var sourcePath = Path.GetFullPath(options.SourcePath); + var outputPath = Path.GetFullPath(options.OutputPath); + if (!File.Exists(sourcePath)) + throw new FileNotFoundException($"Pretty Links CSV not found: {sourcePath}", sourcePath); + + var warnings = new List(); + var sourceOriginPath = string.IsNullOrWhiteSpace(options.SourceOriginPath) + ? options.SourcePath + : options.SourceOriginPath; + var imported = ReadPrettyLinksCsv(sourcePath, sourceOriginPath, options, warnings).ToList(); + var existing = options.MergeWithExisting && File.Exists(outputPath) + ? ReadExistingShortlinks(outputPath) + : new List(); + + var existingCount = existing.Count; + var merged = MergeShortlinks(existing, imported, options.ReplaceExisting, out var skippedCount); + WriteShortlinkJson(outputPath, merged); + + return new WebLinkShortlinkImportResult + { + SourcePath = sourcePath, + OutputPath = outputPath, + ExistingCount = existingCount, + ImportedCount = imported.Count, + WrittenCount = merged.Count, + SkippedDuplicateCount = skippedCount, + WarningCount = warnings.Count, + Warnings = warnings.ToArray() + }; + } + + private static IEnumerable ReadPrettyLinksCsv( + string sourcePath, + string sourceOriginPath, + WebLinkShortlinkImportOptions options, + List warnings) + { + var lines = File.ReadAllLines(sourcePath); + if (lines.Length <= 1) + yield break; + + var header = SplitCsvLine(lines[0]); + var slugIndex = FindHeader(header, "slug", "link_slug", "link slug", "pretty_slug", "pretty slug", "short_slug", "short slug"); + var prettyUrlIndex = FindHeader(header, "pretty_link", "pretty link", "pretty_url", "pretty url", "short_url", "short url", "shortlink", "short_link", "short link", "path"); + var targetIndex = FindHeader(header, "target_url", "target url", "target", "destination", "destination_url", "destination url", "redirect_url", "redirect url", "url"); + var titleIndex = FindHeader(header, "title", "name", "link_name", "link name"); + var descriptionIndex = FindHeader(header, "description", "desc"); + var clicksIndex = FindHeader(header, "clicks", "click_count", "click count", "hits", "visits"); + var statusIndex = FindHeader(header, "redirect_type", "redirect type", "status", "status_code", "status code"); + var createdIndex = FindHeader(header, "created_at", "created at", "created", "createdAt"); + var updatedIndex = FindHeader(header, "updated_at", "updated at", "updated", "updatedAt", "last_updated_at", "last updated at", "lastUpdatedAt"); + var groupIndex = FindHeader(header, "group", "group_name", "group name", "category", "categories", "link_categories", "link categories"); + var tagsIndex = FindHeader(header, "tags", "tag", "link_tags", "link tags", "keywords"); + var idIndex = FindHeader(header, "id", "link_id", "link id", "link_cpt_id", "link cpt id"); + + if (targetIndex < 0) + { + warnings.Add("Pretty Links import skipped: CSV does not contain a target URL column."); + yield break; + } + + for (var i = 1; i < lines.Length; i++) + { + if (string.IsNullOrWhiteSpace(lines[i])) + continue; + + var parts = SplitCsvLine(lines[i]); + var target = ReadPart(parts, targetIndex); + if (string.IsNullOrWhiteSpace(target)) + { + warnings.Add($"Row {i + 1}: skipped because target URL is empty."); + continue; + } + + var rawSlug = ReadPart(parts, slugIndex); + if (string.IsNullOrWhiteSpace(rawSlug)) + rawSlug = ReadPart(parts, prettyUrlIndex); + if (string.IsNullOrWhiteSpace(rawSlug)) + rawSlug = SlugifyShortlink(ReadPart(parts, titleIndex)); + + var parsed = ParseImportedShortlinkPath(rawSlug, options.PathPrefix); + if (string.IsNullOrWhiteSpace(parsed.Slug)) + { + warnings.Add($"Row {i + 1}: skipped because slug is empty."); + continue; + } + + yield return new LinkShortlinkRule + { + Slug = parsed.Slug, + Host = string.IsNullOrWhiteSpace(options.Host) ? null : options.Host.Trim(), + PathPrefix = parsed.PathPrefix, + TargetUrl = target.Trim(), + Status = ParseRedirectStatus(ReadPart(parts, statusIndex), options.Status), + Title = NullIfWhiteSpace(ReadPart(parts, titleIndex)), + Description = NullIfWhiteSpace(ReadPart(parts, descriptionIndex)), + Tags = BuildImportedTags(ReadPart(parts, groupIndex), ReadPart(parts, tagsIndex), options.Tags), + Owner = NullIfWhiteSpace(options.Owner), + Source = "imported-pretty-links", + Notes = BuildImportNote(ReadPart(parts, idIndex)), + ImportedHits = ParsePositiveInt(ReadPart(parts, clicksIndex)), + AllowExternal = options.AllowExternal, + Enabled = true, + CreatedAt = NullIfWhiteSpace(ReadPart(parts, createdIndex)), + UpdatedAt = NullIfWhiteSpace(ReadPart(parts, updatedIndex)), + OriginPath = sourceOriginPath, + OriginLine = i + 1 + }; + } + } + + private static List ReadExistingShortlinks(string path) + { + var shortlinks = new List(); + var usedSources = new List(); + var missingSources = new List(); + LoadShortlinkJson(path, shortlinks, usedSources, missingSources); + return shortlinks; + } + + private static List MergeShortlinks( + List existing, + List imported, + bool replaceExisting, + out int skippedCount) + { + skippedCount = 0; + var merged = new List(); + var index = new Dictionary(StringComparer.OrdinalIgnoreCase); + + foreach (var shortlink in existing.Where(static item => item is not null)) + { + index[BuildShortlinkImportKey(shortlink)] = merged.Count; + merged.Add(shortlink); + } + + foreach (var shortlink in imported.Where(static item => item is not null)) + { + var key = BuildShortlinkImportKey(shortlink); + if (index.TryGetValue(key, out var existingIndex)) + { + if (replaceExisting) + merged[existingIndex] = shortlink; + else + skippedCount++; + continue; + } + + index[key] = merged.Count; + merged.Add(shortlink); + } + + return merged + .OrderBy(static item => item.Host ?? string.Empty, StringComparer.OrdinalIgnoreCase) + .ThenBy(static item => item.PathPrefix ?? string.Empty, StringComparer.OrdinalIgnoreCase) + .ThenBy(static item => item.Slug, StringComparer.OrdinalIgnoreCase) + .ToList(); + } + + private static void WriteShortlinkJson(string outputPath, IReadOnlyList shortlinks) + { + var directory = Path.GetDirectoryName(outputPath); + if (!string.IsNullOrWhiteSpace(directory)) + Directory.CreateDirectory(directory); + + var payload = new { shortlinks }; + File.WriteAllText(outputPath, JsonSerializer.Serialize(payload, ShortlinkImportJsonOptions), Utf8NoBom); + } + + private static (string Slug, string? PathPrefix) ParseImportedShortlinkPath(string value, string? configuredPathPrefix) + { + var path = value.Trim(); + if (Uri.TryCreate(path, UriKind.Absolute, out var uri) && + (uri.Scheme.Equals(Uri.UriSchemeHttp, StringComparison.OrdinalIgnoreCase) || + uri.Scheme.Equals(Uri.UriSchemeHttps, StringComparison.OrdinalIgnoreCase))) + { + path = uri.AbsolutePath; + } + + var queryIndex = path.IndexOf('?'); + if (queryIndex >= 0) + path = path[..queryIndex]; + var hashIndex = path.IndexOf('#'); + if (hashIndex >= 0) + path = path[..hashIndex]; + + path = path.Trim().Trim('/'); + var configuredPrefix = string.IsNullOrWhiteSpace(configuredPathPrefix) + ? null + : "/" + configuredPathPrefix.Trim().Trim('/'); + if (!string.IsNullOrWhiteSpace(configuredPrefix) && + path.StartsWith(configuredPrefix.Trim('/'), StringComparison.OrdinalIgnoreCase)) + { + path = path[configuredPrefix.Trim('/').Length..].Trim('/'); + } + + var parts = path.Split(new[] { '/' }, StringSplitOptions.RemoveEmptyEntries); + if (parts.Length == 0) + return (string.Empty, configuredPrefix); + + var slug = SlugifyShortlink(parts[^1]); + if (!string.IsNullOrWhiteSpace(configuredPrefix)) + return (slug, configuredPrefix); + + var prefix = parts.Length > 1 + ? "/" + string.Join("/", parts.Take(parts.Length - 1).Select(SlugifyShortlink).Where(static part => !string.IsNullOrWhiteSpace(part))) + : null; + return (slug, string.IsNullOrWhiteSpace(prefix) ? null : prefix); + } + + private static string[] BuildImportedTags(string rowGroups, string rowTags, string[] optionTags) + { + var tags = new List(); + tags.AddRange(optionTags ?? Array.Empty()); + if (!string.IsNullOrWhiteSpace(rowGroups)) + { + tags.AddRange(rowGroups.Split(new[] { ',', ';', '|' }, StringSplitOptions.RemoveEmptyEntries) + .Select(CleanImportedTag) + .Where(static tag => !string.IsNullOrWhiteSpace(tag))); + } + if (!string.IsNullOrWhiteSpace(rowTags)) + { + tags.AddRange(rowTags.Split(new[] { ',', ';', '|' }, StringSplitOptions.RemoveEmptyEntries) + .Select(CleanImportedTag) + .Where(static tag => !string.IsNullOrWhiteSpace(tag))); + } + + return tags + .Distinct(StringComparer.OrdinalIgnoreCase) + .OrderBy(static tag => tag, StringComparer.OrdinalIgnoreCase) + .ToArray(); + } + + private static string CleanImportedTag(string value) + => value.Trim().Trim('"', '\'').Trim(); + + private static string? BuildImportNote(string id) + => string.IsNullOrWhiteSpace(id) ? null : "Pretty Links id: " + id.Trim(); + + private static string? NullIfWhiteSpace(string? value) + => string.IsNullOrWhiteSpace(value) ? null : value.Trim(); + + private static int ParsePositiveInt(string value) + => int.TryParse(value, NumberStyles.Integer, CultureInfo.InvariantCulture, out var parsed) && parsed > 0 + ? parsed + : 0; + + private static int ParseRedirectStatus(string value, int defaultStatus) + => int.TryParse(value, NumberStyles.Integer, CultureInfo.InvariantCulture, out var parsed) && parsed is 301 or 302 or 307 or 308 + ? parsed + : defaultStatus; + + private static string BuildShortlinkImportKey(LinkShortlinkRule shortlink) + => string.Join("|", + shortlink.Host ?? string.Empty, + shortlink.PathPrefix ?? string.Empty, + shortlink.Slug ?? string.Empty); + + private static string SlugifyShortlink(string value) + { + if (string.IsNullOrWhiteSpace(value)) + return string.Empty; + + var result = new List(); + foreach (var ch in value.Trim().ToLowerInvariant()) + { + if (char.IsLetterOrDigit(ch)) + { + result.Add(ch); + } + else if (ch is '-' or '_' or '.') + { + result.Add(ch); + } + else if (char.IsWhiteSpace(ch)) + { + result.Add('-'); + } + } + + var slug = new string(result.ToArray()).Trim('-', '.', '_'); + while (slug.Contains("--", StringComparison.Ordinal)) + slug = slug.Replace("--", "-", StringComparison.Ordinal); + return slug; + } + + private static readonly JsonSerializerOptions ShortlinkImportJsonOptions = new() + { + PropertyNamingPolicy = JsonNamingPolicy.CamelCase, + PropertyNameCaseInsensitive = true, + DefaultIgnoreCondition = JsonIgnoreCondition.WhenWritingNull, + WriteIndented = true, + Converters = { new JsonStringEnumConverter(JsonNamingPolicy.CamelCase) } + }; +} + +/// Options for importing Pretty Links CSV exports into PowerForge shortlink JSON. +public sealed class WebLinkShortlinkImportOptions +{ + /// Source Pretty Links CSV path. + public string SourcePath { get; set; } = string.Empty; + /// Optional source path label persisted in imported shortlink metadata. + public string? SourceOriginPath { get; set; } + /// Output shortlinks JSON path. + public string OutputPath { get; set; } = string.Empty; + /// Optional host for imported shortlinks. + public string? Host { get; set; } + /// Optional path prefix for imported shortlinks. + public string? PathPrefix { get; set; } + /// Default owner assigned to imported shortlinks. + public string? Owner { get; set; } + /// Tags assigned to every imported shortlink. + public string[] Tags { get; set; } = Array.Empty(); + /// HTTP status for imported shortlinks. + public int Status { get; set; } = 302; + /// Allow absolute external targets. + public bool AllowExternal { get; set; } = true; + /// Merge with existing output shortlinks instead of replacing the file. + public bool MergeWithExisting { get; set; } = true; + /// Replace existing shortlinks with the same host/prefix/slug key. + public bool ReplaceExisting { get; set; } +} + +/// Result from a Pretty Links shortlink import. +public sealed class WebLinkShortlinkImportResult +{ + /// Resolved source CSV path. + public string SourcePath { get; set; } = string.Empty; + /// Resolved output JSON path. + public string OutputPath { get; set; } = string.Empty; + /// Existing shortlinks loaded from the output file. + public int ExistingCount { get; set; } + /// Shortlinks imported from the source CSV. + public int ImportedCount { get; set; } + /// Total shortlinks written to the output file. + public int WrittenCount { get; set; } + /// Imported rows skipped because an existing shortlink had the same key. + public int SkippedDuplicateCount { get; set; } + /// Warning count. + public int WarningCount { get; set; } + /// Import warnings. + public string[] Warnings { get; set; } = Array.Empty(); +} diff --git a/PowerForge.Web/Services/WebLinkService.Promote404.cs b/PowerForge.Web/Services/WebLinkService.Promote404.cs new file mode 100644 index 00000000..512d36a7 --- /dev/null +++ b/PowerForge.Web/Services/WebLinkService.Promote404.cs @@ -0,0 +1,237 @@ +using System; +using System.Collections.Generic; +using System.IO; +using System.Linq; +using System.Security.Cryptography; +using System.Text; +using System.Text.Json; + +namespace PowerForge.Web; + +public static partial class WebLinkService +{ + /// Promotes reviewed 404 report suggestions into redirect candidates. + public static WebLink404PromoteResult Promote404Suggestions(WebLink404PromoteOptions options) + { + ArgumentNullException.ThrowIfNull(options); + if (string.IsNullOrWhiteSpace(options.SourcePath)) + throw new ArgumentException("SourcePath is required.", nameof(options)); + if (string.IsNullOrWhiteSpace(options.OutputPath)) + throw new ArgumentException("OutputPath is required.", nameof(options)); + + var sourcePath = Path.GetFullPath(options.SourcePath); + var outputPath = Path.GetFullPath(options.OutputPath); + if (!File.Exists(sourcePath)) + throw new FileNotFoundException($"404 suggestion report not found: {sourcePath}", sourcePath); + + var report = JsonSerializer.Deserialize(File.ReadAllText(sourcePath), WebJson.Options) + ?? new WebLink404ReportResult(); + var imported = Build404RedirectCandidates(report, sourcePath, options).ToList(); + var existing = options.MergeWithExisting && File.Exists(outputPath) + ? ReadExistingRedirects(outputPath) + : new List(); + + var existingCount = existing.Count; + var merged = MergeRedirectCandidates(existing, imported, options.ReplaceExisting, out var skippedDuplicates, out var replaced); + WriteRedirectJson(outputPath, merged); + + return new WebLink404PromoteResult + { + SourcePath = sourcePath, + OutputPath = outputPath, + ExistingCount = existingCount, + CandidateCount = imported.Count, + WrittenCount = merged.Count, + SkippedDuplicateCount = skippedDuplicates, + ReplacedCount = replaced, + SkippedNoSuggestionCount = report.Suggestions.Count(static item => item.Suggestions.Length == 0), + SkippedLowCount = report.Suggestions.Count(item => item.Count < Math.Max(1, options.MinimumCount)) + }; + } + + private static IEnumerable Build404RedirectCandidates( + WebLink404ReportResult report, + string sourcePath, + WebLink404PromoteOptions options) + { + var minimumScore = Math.Clamp(options.MinimumScore, 0d, 1d); + var minimumCount = Math.Max(1, options.MinimumCount); + + foreach (var suggestion in report.Suggestions ?? Array.Empty()) + { + if (suggestion.Count < minimumCount) + continue; + + var target = (suggestion.Suggestions ?? Array.Empty()) + .Where(item => !string.IsNullOrWhiteSpace(item.TargetPath)) + .Where(item => item.Score >= minimumScore) + .OrderByDescending(static item => item.Score) + .ThenBy(static item => item.TargetPath, StringComparer.OrdinalIgnoreCase) + .FirstOrDefault(); + if (target is null) + continue; + + var source = NormalizeObservationPath(suggestion.Path); + if (string.IsNullOrWhiteSpace(source)) + continue; + + yield return new LinkRedirectRule + { + Id = Build404PromotedId(suggestion.Host, source, target.TargetPath), + Enabled = options.Enabled, + SourceHost = NullIfWhiteSpace(suggestion.Host), + SourcePath = source, + TargetUrl = NormalizePromoteTarget(target.TargetPath), + MatchType = LinkRedirectMatchType.Exact, + Status = options.Status is 301 or 302 or 307 or 308 ? options.Status : 301, + Group = string.IsNullOrWhiteSpace(options.Group) ? "404-suggestions" : options.Group.Trim(), + Source = "404-promoted", + Notes = Build404PromoteNote(suggestion, target, options.Enabled), + OriginPath = sourcePath + }; + } + } + + private static List ReadExistingRedirects(string path) + { + var redirects = new List(); + var usedSources = new List(); + var missingSources = new List(); + LoadRedirectJson(path, redirects, usedSources, missingSources); + return redirects; + } + + private static List MergeRedirectCandidates( + List existing, + List imported, + bool replaceExisting, + out int skippedCount, + out int replacedCount) + { + skippedCount = 0; + replacedCount = 0; + var merged = new List(); + var index = new Dictionary(StringComparer.OrdinalIgnoreCase); + + foreach (var redirect in existing.Where(static item => item is not null)) + { + index[BuildRedirectKey(redirect)] = merged.Count; + merged.Add(redirect); + } + + foreach (var redirect in imported.Where(static item => item is not null)) + { + var key = BuildRedirectKey(redirect); + if (index.TryGetValue(key, out var existingIndex)) + { + if (replaceExisting) + { + merged[existingIndex] = redirect; + replacedCount++; + } + else + skippedCount++; + continue; + } + + index[key] = merged.Count; + merged.Add(redirect); + } + + return merged + .OrderBy(static item => item.SourceHost ?? string.Empty, StringComparer.OrdinalIgnoreCase) + .ThenBy(static item => item.SourcePath, StringComparer.OrdinalIgnoreCase) + .ThenBy(static item => item.SourceQuery ?? string.Empty, StringComparer.OrdinalIgnoreCase) + .ToList(); + } + + private static void WriteRedirectJson(string outputPath, IReadOnlyList redirects) + { + var directory = Path.GetDirectoryName(outputPath); + if (!string.IsNullOrWhiteSpace(directory)) + Directory.CreateDirectory(directory); + + var payload = new { redirects }; + File.WriteAllText(outputPath, JsonSerializer.Serialize(payload, ShortlinkImportJsonOptions), Utf8NoBom); + } + + private static string Build404PromotedId(string? host, string sourcePath, string targetPath) + { + var raw = $"{host}|{sourcePath}|{targetPath}"; + var hash = SHA256.HashData(Encoding.UTF8.GetBytes(raw.ToLowerInvariant())); + return "404-" + Convert.ToHexString(hash, 0, 6).ToLowerInvariant(); + } + + private static string NormalizePromoteTarget(string targetPath) + { + var target = targetPath.Trim(); + if (!target.StartsWith("/", StringComparison.Ordinal) && + !target.StartsWith("http://", StringComparison.OrdinalIgnoreCase) && + !target.StartsWith("https://", StringComparison.OrdinalIgnoreCase)) + { + target = "/" + target.TrimStart('/'); + } + + return target; + } + + private static string Build404PromoteNote(WebLink404Suggestion suggestion, WebLink404RouteSuggestion target, bool enabled) + { + var review = enabled ? "enabled during promotion" : "review before enabling"; + var parts = new List + { + $"Promoted from 404 report; count={suggestion.Count}; score={target.Score:0.###}; {review}." + }; + if (!string.IsNullOrWhiteSpace(suggestion.Referrer)) + parts.Add("Referrer: " + suggestion.Referrer.Trim()); + if (!string.IsNullOrWhiteSpace(suggestion.LastSeenAt)) + parts.Add("Last seen: " + suggestion.LastSeenAt.Trim()); + return string.Join(" ", parts); + } +} + +/// Options for promoting 404 report suggestions into redirect candidates. +public sealed class WebLink404PromoteOptions +{ + /// Source 404 suggestion report path. + public string SourcePath { get; set; } = string.Empty; + /// Output redirects JSON path. + public string OutputPath { get; set; } = string.Empty; + /// When true, promoted redirects are immediately enabled. + public bool Enabled { get; set; } + /// Minimum suggestion score to promote. + public double MinimumScore { get; set; } = 0.35d; + /// Minimum observed 404 count to promote. + public int MinimumCount { get; set; } = 1; + /// HTTP redirect status for promoted candidates. + public int Status { get; set; } = 301; + /// Group assigned to promoted candidates. + public string? Group { get; set; } = "404-suggestions"; + /// Merge with existing redirect JSON instead of replacing the file. + public bool MergeWithExisting { get; set; } = true; + /// Replace existing redirects with the same host/path/query key. + public bool ReplaceExisting { get; set; } +} + +/// Result from promoting 404 suggestions into redirect candidates. +public sealed class WebLink404PromoteResult +{ + /// Resolved source report path. + public string SourcePath { get; set; } = string.Empty; + /// Resolved output redirects path. + public string OutputPath { get; set; } = string.Empty; + /// Existing redirects loaded from the output file. + public int ExistingCount { get; set; } + /// Redirect candidates selected from the report. + public int CandidateCount { get; set; } + /// Total redirects written to the output file. + public int WrittenCount { get; set; } + /// Candidates skipped because an existing redirect had the same key. + public int SkippedDuplicateCount { get; set; } + /// Existing redirects replaced because replacement was requested. + public int ReplacedCount { get; set; } + /// Observations skipped because they had no route suggestion. + public int SkippedNoSuggestionCount { get; set; } + /// Observations skipped because their count was below the configured threshold. + public int SkippedLowCount { get; set; } +} diff --git a/PowerForge.Web/Services/WebLinkService.Report404.cs b/PowerForge.Web/Services/WebLinkService.Report404.cs new file mode 100644 index 00000000..722d8056 --- /dev/null +++ b/PowerForge.Web/Services/WebLinkService.Report404.cs @@ -0,0 +1,467 @@ +using System; +using System.Collections.Generic; +using System.IO; +using System.Linq; +using System.Text.Json; +using System.Text.RegularExpressions; + +namespace PowerForge.Web; + +public static partial class WebLinkService +{ + private static readonly Regex ApacheRequestRegex = new("\"[A-Z]+\\s+([^\\s\\\"]+)\\s+HTTP/[^\\\"]+\"\\s+(\\d{3})", RegexOptions.Compiled | RegexOptions.CultureInvariant); + + /// Creates a reviewable 404 suggestion report from logs or observation CSVs. + public static WebLink404ReportResult Generate404Report(WebLink404ReportOptions options) + { + ArgumentNullException.ThrowIfNull(options); + if (string.IsNullOrWhiteSpace(options.SiteRoot)) + throw new ArgumentException("SiteRoot is required.", nameof(options)); + + var siteRoot = Path.GetFullPath(options.SiteRoot); + if (!Directory.Exists(siteRoot)) + throw new DirectoryNotFoundException($"Site root not found: {siteRoot}"); + + var routes = DiscoverHtmlRoutes(siteRoot); + var ignored = LoadIgnored404Rules(options.Ignored404Path, options.Ignored404Rules); + var filteredObservations = Load404Observations(options, routes) + .Where(item => !RouteExists(routes, item.Path)) + .Where(item => options.IncludeAsset404s || !LooksLikeAssetPath(item.Path)) + .ToArray(); + var observations = filteredObservations + .Where(item => !IsIgnored404(item, ignored)) + .OrderByDescending(static item => item.Count) + .ThenBy(static item => item.Path, StringComparer.OrdinalIgnoreCase) + .ToArray(); + + var maxSuggestions = options.MaxSuggestions <= 0 ? 3 : Math.Min(options.MaxSuggestions, 10); + var minimumScore = options.MinimumScore <= 0 ? 0.35d : Math.Clamp(options.MinimumScore, 0.05d, 1d); + var suggestions = new List(); + foreach (var observation in observations) + { + var matches = routes + .Select(route => new { Route = route, Score = ScoreRoute(observation.Path, route) }) + .Where(item => item.Score >= minimumScore) + .OrderByDescending(static item => item.Score) + .ThenBy(static item => item.Route, StringComparer.OrdinalIgnoreCase) + .Take(maxSuggestions) + .Select(item => new WebLink404RouteSuggestion + { + TargetPath = item.Route, + Score = Math.Round(item.Score, 3) + }) + .ToArray(); + + suggestions.Add(new WebLink404Suggestion + { + Path = observation.Path, + Host = observation.Host, + Count = observation.Count, + Referrer = observation.Referrer, + LastSeenAt = observation.LastSeenAt, + Suggestions = matches + }); + } + + return new WebLink404ReportResult + { + SiteRoot = siteRoot, + SourcePath = string.IsNullOrWhiteSpace(options.SourcePath) ? null : Path.GetFullPath(options.SourcePath), + RouteCount = routes.Length, + ObservationCount = observations.Length, + IgnoredObservationCount = filteredObservations.Length - observations.Length, + SuggestedObservationCount = suggestions.Count(static item => item.Suggestions.Length > 0), + Suggestions = suggestions.ToArray() + }; + } + + private static string[] DiscoverHtmlRoutes(string siteRoot) + { + return Directory.EnumerateFiles(siteRoot, "*.html", SearchOption.AllDirectories) + .Select(path => ToRoute(siteRoot, path)) + .Where(route => !string.IsNullOrWhiteSpace(route)) + .Where(route => !route.Equals("/404.html", StringComparison.OrdinalIgnoreCase)) + .Where(route => !route.Equals("/404/", StringComparison.OrdinalIgnoreCase)) + .Distinct(StringComparer.OrdinalIgnoreCase) + .OrderBy(static route => route, StringComparer.OrdinalIgnoreCase) + .ToArray(); + } + + private static string ToRoute(string siteRoot, string filePath) + { + var relative = Path.GetRelativePath(siteRoot, filePath).Replace('\\', '/'); + if (relative.Equals("index.html", StringComparison.OrdinalIgnoreCase)) + return "/"; + if (relative.EndsWith("/index.html", StringComparison.OrdinalIgnoreCase)) + return "/" + relative[..^"index.html".Length]; + return "/" + relative.TrimStart('/'); + } + + private static IEnumerable Load404Observations(WebLink404ReportOptions options, IReadOnlyCollection routes) + { + if (string.IsNullOrWhiteSpace(options.SourcePath)) + return Array.Empty(); + + var sourcePath = Path.GetFullPath(options.SourcePath); + if (!File.Exists(sourcePath)) + { + if (options.AllowMissingSource) + return Array.Empty(); + throw new FileNotFoundException($"404 source file not found: {sourcePath}", sourcePath); + } + + var lines = File.ReadAllLines(sourcePath); + if (lines.Length == 0) + return Array.Empty(); + + var header = SplitCsvLine(lines[0]); + var pathIndex = FindHeader(header, "path", "url", "request", "request_uri", "request uri", "uri"); + var countIndex = FindHeader(header, "count", "hits", "visits"); + var statusIndex = FindHeader(header, "status", "status_code", "status code"); + if (pathIndex >= 0) + return Read404Csv(lines, pathIndex, countIndex, statusIndex); + + return ReadApache404Log(lines, routes); + } + + private static IEnumerable Read404Csv(string[] lines, int pathIndex, int countIndex, int statusIndex) + { + var hostIndex = FindHeader(SplitCsvLine(lines[0]), "host", "domain"); + var referrerIndex = FindHeader(SplitCsvLine(lines[0]), "referrer", "referer"); + var lastSeenIndex = FindHeader(SplitCsvLine(lines[0]), "last_seen", "lastSeen", "last seen", "last_seen_at", "lastSeenAt"); + var aggregated = new Dictionary(StringComparer.OrdinalIgnoreCase); + + for (var i = 1; i < lines.Length; i++) + { + if (string.IsNullOrWhiteSpace(lines[i])) + continue; + + var parts = SplitCsvLine(lines[i]); + if (statusIndex >= 0 && ReadPart(parts, statusIndex) is { Length: > 0 } statusText && statusText != "404") + continue; + + var path = NormalizeObservationPath(ReadPart(parts, pathIndex)); + if (string.IsNullOrWhiteSpace(path)) + continue; + + var count = ParsePositiveInt(ReadPart(parts, countIndex)); + if (count <= 0) + count = 1; + AddObservation(aggregated, path, ReadPart(parts, hostIndex), count, ReadPart(parts, referrerIndex), ReadPart(parts, lastSeenIndex)); + } + + return aggregated.Values; + } + + private static IEnumerable ReadApache404Log(string[] lines, IReadOnlyCollection routes) + { + var aggregated = new Dictionary(StringComparer.OrdinalIgnoreCase); + foreach (var line in lines) + { + var match = ApacheRequestRegex.Match(line); + if (!match.Success || !match.Groups[2].Value.Equals("404", StringComparison.Ordinal)) + continue; + + var path = NormalizeObservationPath(match.Groups[1].Value); + if (string.IsNullOrWhiteSpace(path) || RouteExists(routes, path)) + continue; + + AddObservation(aggregated, path, host: null, count: 1, referrer: null, lastSeenAt: null); + } + + return aggregated.Values; + } + + private static void AddObservation(Dictionary target, string path, string? host, int count, string? referrer, string? lastSeenAt) + { + var key = (host ?? string.Empty) + "|" + path; + if (target.TryGetValue(key, out var existing)) + { + existing.Count += count; + if (string.IsNullOrWhiteSpace(existing.Referrer)) + existing.Referrer = NullIfWhiteSpace(referrer); + if (string.IsNullOrWhiteSpace(existing.LastSeenAt)) + existing.LastSeenAt = NullIfWhiteSpace(lastSeenAt); + return; + } + + target[key] = new WebLink404Observation + { + Path = path, + Host = NullIfWhiteSpace(host), + Count = count, + Referrer = NullIfWhiteSpace(referrer), + LastSeenAt = NullIfWhiteSpace(lastSeenAt) + }; + } + + private static bool RouteExists(IReadOnlyCollection routes, string path) + { + var normalized = NormalizeObservationPath(path); + if (routes.Contains(normalized, StringComparer.OrdinalIgnoreCase)) + return true; + if (!normalized.EndsWith("/", StringComparison.Ordinal) && + routes.Contains(normalized + "/", StringComparer.OrdinalIgnoreCase)) + return true; + return normalized.EndsWith("/", StringComparison.Ordinal) && + routes.Contains(normalized.TrimEnd('/') + ".html", StringComparer.OrdinalIgnoreCase); + } + + private static string NormalizeObservationPath(string? value) + { + if (string.IsNullOrWhiteSpace(value)) + return string.Empty; + + var trimmed = value.Trim(); + if (Uri.TryCreate(trimmed, UriKind.Absolute, out var uri) && + (uri.Scheme.Equals(Uri.UriSchemeHttp, StringComparison.OrdinalIgnoreCase) || + uri.Scheme.Equals(Uri.UriSchemeHttps, StringComparison.OrdinalIgnoreCase))) + { + trimmed = uri.PathAndQuery; + } + + var queryIndex = trimmed.IndexOf('?'); + if (queryIndex >= 0) + trimmed = trimmed[..queryIndex]; + var hashIndex = trimmed.IndexOf('#'); + if (hashIndex >= 0) + trimmed = trimmed[..hashIndex]; + if (!trimmed.StartsWith("/", StringComparison.Ordinal)) + trimmed = "/" + trimmed.TrimStart('/'); + return trimmed.Length > 1 ? trimmed.TrimEnd('/') : trimmed; + } + + private static bool LooksLikeAssetPath(string path) + { + var extension = Path.GetExtension(path); + return extension.Equals(".css", StringComparison.OrdinalIgnoreCase) || + extension.Equals(".js", StringComparison.OrdinalIgnoreCase) || + extension.Equals(".map", StringComparison.OrdinalIgnoreCase) || + extension.Equals(".png", StringComparison.OrdinalIgnoreCase) || + extension.Equals(".jpg", StringComparison.OrdinalIgnoreCase) || + extension.Equals(".jpeg", StringComparison.OrdinalIgnoreCase) || + extension.Equals(".gif", StringComparison.OrdinalIgnoreCase) || + extension.Equals(".svg", StringComparison.OrdinalIgnoreCase) || + extension.Equals(".webp", StringComparison.OrdinalIgnoreCase) || + extension.Equals(".ico", StringComparison.OrdinalIgnoreCase); + } + + private static bool IsIgnored404(WebLink404Observation observation, IReadOnlyList ignored) + => ignored.Any(rule => MatchesIgnored404Rule(observation, rule)); + + private static bool MatchesIgnored404Rule(WebLink404Observation observation, Ignored404Rule rule) + { + if (string.IsNullOrWhiteSpace(rule.Path)) + return false; + if (!string.IsNullOrWhiteSpace(rule.Host) && + !string.Equals(rule.Host.Trim(), observation.Host, StringComparison.OrdinalIgnoreCase)) + { + return false; + } + + var path = NormalizeObservationPath(observation.Path); + var pattern = NormalizeObservationPath(rule.Path); + if (!pattern.Contains('*', StringComparison.Ordinal)) + return path.Equals(pattern, StringComparison.OrdinalIgnoreCase); + + var parts = pattern.Split('*', StringSplitOptions.None); + var position = 0; + for (var i = 0; i < parts.Length; i++) + { + var part = parts[i]; + if (part.Length == 0) + continue; + + var index = path.IndexOf(part, position, StringComparison.OrdinalIgnoreCase); + if (index < 0 || (i == 0 && !pattern.StartsWith('*') && index != 0)) + return false; + position = index + part.Length; + } + + return pattern.EndsWith('*') || position == path.Length; + } + + private static Ignored404Rule[] LoadIgnored404Rules(string? path, IReadOnlyList? inlineRules = null) + { + var rules = new List(); + if (inlineRules is { Count: > 0 }) + rules.AddRange(inlineRules.Where(static item => item is not null)); + + if (!string.IsNullOrWhiteSpace(path)) + { + var resolved = Path.GetFullPath(path); + if (File.Exists(resolved)) + rules.AddRange(ReadIgnored404Json(resolved)); + } + + return rules + .Where(static item => !string.IsNullOrWhiteSpace(item.Path)) + .ToArray(); + } + + private static Ignored404Rule[] ReadIgnored404Json(string path) + { + using var document = JsonDocument.Parse(File.ReadAllText(path), new JsonDocumentOptions + { + AllowTrailingCommas = true, + CommentHandling = JsonCommentHandling.Skip + }); + + var source = document.RootElement; + if (source.ValueKind == JsonValueKind.Object && source.TryGetProperty("ignored404", out var nested)) + source = nested; + if (source.ValueKind != JsonValueKind.Array) + return Array.Empty(); + + return source.Deserialize(WebJson.Options) ?? Array.Empty(); + } + + private static double ScoreRoute(string missingPath, string route) + { + var missing = NormalizeForScore(missingPath); + var candidate = NormalizeForScore(route); + if (missing.Equals(candidate, StringComparison.OrdinalIgnoreCase)) + return 1d; + if (missing.Contains(candidate, StringComparison.OrdinalIgnoreCase) || + candidate.Contains(missing, StringComparison.OrdinalIgnoreCase)) + return 0.82d; + + var missingTokens = TokenizePath(missing); + var candidateTokens = TokenizePath(candidate); + var tokenScore = Jaccard(missingTokens, candidateTokens); + var tailScore = SegmentSimilarity(missingTokens.LastOrDefault() ?? missing, candidateTokens.LastOrDefault() ?? candidate); + return Math.Max(tokenScore, tailScore); + } + + private static string NormalizeForScore(string path) + => NormalizeObservationPath(path) + .Trim('/') + .Replace(".html", string.Empty, StringComparison.OrdinalIgnoreCase) + .Replace('-', ' ') + .Replace('_', ' ') + .ToLowerInvariant(); + + private static string[] TokenizePath(string value) + => value.Split(new[] { '/', ' ', '.', '+', ',' }, StringSplitOptions.RemoveEmptyEntries | StringSplitOptions.TrimEntries) + .Where(static token => token.Length > 1) + .Distinct(StringComparer.OrdinalIgnoreCase) + .ToArray(); + + private static double Jaccard(string[] left, string[] right) + { + if (left.Length == 0 || right.Length == 0) + return 0d; + + var intersection = left.Intersect(right, StringComparer.OrdinalIgnoreCase).Count(); + var union = left.Union(right, StringComparer.OrdinalIgnoreCase).Count(); + return union == 0 ? 0d : (double)intersection / union; + } + + private static double SegmentSimilarity(string left, string right) + { + if (string.IsNullOrWhiteSpace(left) || string.IsNullOrWhiteSpace(right)) + return 0d; + + var distance = Levenshtein(left, right); + var max = Math.Max(left.Length, right.Length); + return max == 0 ? 0d : 1d - ((double)distance / max); + } + + private static int Levenshtein(string left, string right) + { + var costs = new int[right.Length + 1]; + for (var j = 0; j < costs.Length; j++) + costs[j] = j; + + for (var i = 1; i <= left.Length; i++) + { + var previous = costs[0]; + costs[0] = i; + for (var j = 1; j <= right.Length; j++) + { + var current = costs[j]; + costs[j] = left[i - 1] == right[j - 1] + ? previous + : Math.Min(Math.Min(costs[j - 1], costs[j]), previous) + 1; + previous = current; + } + } + + return costs[right.Length]; + } +} + +/// Options for generating reviewable 404 redirect suggestions. +public sealed class WebLink404ReportOptions +{ + /// Generated site root used for known route discovery. + public string SiteRoot { get; set; } = string.Empty; + /// Apache access log or CSV file with 404 observations. + public string? SourcePath { get; set; } + /// Path to ignored 404 JSON rules. + public string? Ignored404Path { get; set; } + /// Inline ignored 404 rules. + public Ignored404Rule[] Ignored404Rules { get; set; } = Array.Empty(); + /// When true, a missing source log produces an empty report instead of failing. + public bool AllowMissingSource { get; set; } + /// Maximum suggestions per missing path. + public int MaxSuggestions { get; set; } = 3; + /// Minimum match score from 0 to 1. + public double MinimumScore { get; set; } = 0.35d; + /// When true, include CSS/JS/image 404s in the report. + public bool IncludeAsset404s { get; set; } +} + +/// Generated 404 report with candidate redirect targets. +public sealed class WebLink404ReportResult +{ + /// Resolved generated site root used for route discovery. + public string SiteRoot { get; set; } = string.Empty; + /// Resolved source log or observation CSV path, when provided. + public string? SourcePath { get; set; } + /// Number of generated HTML routes discovered. + public int RouteCount { get; set; } + /// Number of missing URL observations included in the report. + public int ObservationCount { get; set; } + /// Number of observations suppressed by ignored 404 rules. + public int IgnoredObservationCount { get; set; } + /// Number of observations with at least one suggested target. + public int SuggestedObservationCount { get; set; } + /// Observed missing URLs and their candidate targets. + public WebLink404Suggestion[] Suggestions { get; set; } = Array.Empty(); +} + +/// One observed missing URL and its suggested targets. +public sealed class WebLink404Suggestion +{ + /// Observed missing path. + public string Path { get; set; } = string.Empty; + /// Optional observed host. + public string? Host { get; set; } + /// Observation count for this missing path. + public int Count { get; set; } + /// Optional referrer recorded with the observation. + public string? Referrer { get; set; } + /// Optional last-seen timestamp from the observation source. + public string? LastSeenAt { get; set; } + /// Candidate generated routes that may be suitable redirect targets. + public WebLink404RouteSuggestion[] Suggestions { get; set; } = Array.Empty(); +} + +/// One candidate route for a missing URL. +public sealed class WebLink404RouteSuggestion +{ + /// Candidate generated route path. + public string TargetPath { get; set; } = string.Empty; + /// Similarity score from 0 to 1. + public double Score { get; set; } +} + +internal sealed class WebLink404Observation +{ + public string Path { get; set; } = string.Empty; + public string? Host { get; set; } + public int Count { get; set; } + public string? Referrer { get; set; } + public string? LastSeenAt { get; set; } +} diff --git a/PowerForge.Web/Services/WebLinkService.Review404.cs b/PowerForge.Web/Services/WebLinkService.Review404.cs new file mode 100644 index 00000000..a5aef618 --- /dev/null +++ b/PowerForge.Web/Services/WebLinkService.Review404.cs @@ -0,0 +1,159 @@ +using System; +using System.IO; +using System.Text.Json; + +namespace PowerForge.Web; + +public static partial class WebLinkService +{ + /// Runs the static 404 review workflow and writes review artifacts. + public static WebLink404ReviewResult Review404(WebLink404ReviewOptions options) + { + ArgumentNullException.ThrowIfNull(options); + if (string.IsNullOrWhiteSpace(options.SiteRoot)) + throw new ArgumentException("SiteRoot is required.", nameof(options)); + if (string.IsNullOrWhiteSpace(options.ReportPath)) + throw new ArgumentException("ReportPath is required.", nameof(options)); + if (string.IsNullOrWhiteSpace(options.RedirectCandidatesPath)) + throw new ArgumentException("RedirectCandidatesPath is required.", nameof(options)); + if (string.IsNullOrWhiteSpace(options.Ignored404CandidatesPath)) + throw new ArgumentException("Ignored404CandidatesPath is required.", nameof(options)); + + var reportPath = Path.GetFullPath(options.ReportPath); + var redirectCandidatesPath = Path.GetFullPath(options.RedirectCandidatesPath); + var ignored404CandidatesPath = Path.GetFullPath(options.Ignored404CandidatesPath); + + var report = Generate404Report(new WebLink404ReportOptions + { + SiteRoot = options.SiteRoot, + SourcePath = options.SourcePath, + Ignored404Path = options.Ignored404Path, + AllowMissingSource = options.AllowMissingSource, + MaxSuggestions = options.MaxSuggestions, + MinimumScore = options.MinimumScore, + IncludeAsset404s = options.IncludeAsset404s + }); + Write404ReportJson(reportPath, report); + + var promote = Promote404Suggestions(new WebLink404PromoteOptions + { + SourcePath = reportPath, + OutputPath = redirectCandidatesPath, + Enabled = options.EnableRedirectCandidates, + MinimumScore = options.PromoteMinimumScore, + MinimumCount = options.PromoteMinimumCount, + Status = options.PromoteStatus, + Group = options.PromoteGroup, + MergeWithExisting = false, + ReplaceExisting = false + }); + + var ignored = Ignore404Suggestions(new WebLink404IgnoreOptions + { + SourcePath = reportPath, + OutputPath = ignored404CandidatesPath, + OnlyWithoutSuggestions = true, + Reason = string.IsNullOrWhiteSpace(options.IgnoreReason) + ? "No generated route suggestion in 404 report." + : options.IgnoreReason, + CreatedBy = options.CreatedBy, + MergeWithExisting = false, + ReplaceExisting = false + }); + + WriteJsonIfRequested(options.PromoteSummaryPath, promote); + WriteJsonIfRequested(options.IgnoreSummaryPath, ignored); + + return new WebLink404ReviewResult + { + ReportPath = reportPath, + RedirectCandidatesPath = redirectCandidatesPath, + Ignored404CandidatesPath = ignored404CandidatesPath, + Report = report, + Promote = promote, + Ignore = ignored + }; + } + + private static void Write404ReportJson(string outputPath, WebLink404ReportResult report) + { + var directory = Path.GetDirectoryName(outputPath); + if (!string.IsNullOrWhiteSpace(directory)) + Directory.CreateDirectory(directory); + + File.WriteAllText(outputPath, JsonSerializer.Serialize(report, ShortlinkImportJsonOptions), Utf8NoBom); + } + + private static void WriteJsonIfRequested(string? outputPath, T payload) + { + if (string.IsNullOrWhiteSpace(outputPath)) + return; + + var resolved = Path.GetFullPath(outputPath); + var directory = Path.GetDirectoryName(resolved); + if (!string.IsNullOrWhiteSpace(directory)) + Directory.CreateDirectory(directory); + + File.WriteAllText(resolved, JsonSerializer.Serialize(payload, ShortlinkImportJsonOptions), Utf8NoBom); + } +} + +/// Options for the static 404 review workflow. +public sealed class WebLink404ReviewOptions +{ + /// Generated site root used for route discovery. + public string SiteRoot { get; set; } = string.Empty; + /// Apache log or CSV observation source. + public string? SourcePath { get; set; } + /// Committed ignored-404 rules used to filter observations. + public string? Ignored404Path { get; set; } + /// Allow missing log source and write empty artifacts. + public bool AllowMissingSource { get; set; } + /// Include asset 404s in the report. + public bool IncludeAsset404s { get; set; } + /// Maximum route suggestions per missing path. + public int MaxSuggestions { get; set; } = 3; + /// Minimum route suggestion score to include in the report. + public double MinimumScore { get; set; } = 0.35d; + /// Output 404 suggestion report JSON path. + public string ReportPath { get; set; } = string.Empty; + /// Output redirect candidate JSON path. + public string RedirectCandidatesPath { get; set; } = string.Empty; + /// Output ignored-404 candidate JSON path. + public string Ignored404CandidatesPath { get; set; } = string.Empty; + /// Optional redirect candidate summary path. + public string? PromoteSummaryPath { get; set; } + /// Optional ignored-404 candidate summary path. + public string? IgnoreSummaryPath { get; set; } + /// Enable promoted redirect candidates immediately. + public bool EnableRedirectCandidates { get; set; } + /// Minimum suggestion score for redirect candidates. + public double PromoteMinimumScore { get; set; } = 0.65d; + /// Minimum 404 count for redirect candidates. + public int PromoteMinimumCount { get; set; } = 1; + /// HTTP status for promoted redirect candidates. + public int PromoteStatus { get; set; } = 301; + /// Group assigned to promoted redirect candidates. + public string? PromoteGroup { get; set; } = "404-suggestions"; + /// Reason assigned to ignored-404 candidates. + public string? IgnoreReason { get; set; } = "No generated route suggestion in 404 report."; + /// Creator identifier for ignored-404 candidates. + public string? CreatedBy { get; set; } +} + +/// Result from the static 404 review workflow. +public sealed class WebLink404ReviewResult +{ + /// Resolved 404 suggestion report JSON path. + public string ReportPath { get; set; } = string.Empty; + /// Resolved redirect candidate JSON path. + public string RedirectCandidatesPath { get; set; } = string.Empty; + /// Resolved ignored-404 candidate JSON path. + public string Ignored404CandidatesPath { get; set; } = string.Empty; + /// Generated 404 suggestion report. + public WebLink404ReportResult Report { get; set; } = new(); + /// Redirect candidate promotion summary. + public WebLink404PromoteResult Promote { get; set; } = new(); + /// Ignored-404 candidate summary. + public WebLink404IgnoreResult Ignore { get; set; } = new(); +} diff --git a/PowerForge.Web/Services/WebLinkService.Types.cs b/PowerForge.Web/Services/WebLinkService.Types.cs new file mode 100644 index 00000000..7259032c --- /dev/null +++ b/PowerForge.Web/Services/WebLinkService.Types.cs @@ -0,0 +1,58 @@ +using System; +using System.Collections.Generic; + +namespace PowerForge.Web; + +/// Input paths and host aliases used when loading link-service data. +public sealed class WebLinkLoadOptions +{ + /// Path to redirect JSON. + public string? RedirectsPath { get; set; } + /// Path to shortlink JSON. + public string? ShortlinksPath { get; set; } + /// Compatibility CSV redirect inputs. + public string[] RedirectCsvPaths { get; set; } = Array.Empty(); + /// Named host aliases, for example en, pl, or short. + public IReadOnlyDictionary Hosts { get; set; } = new Dictionary(StringComparer.OrdinalIgnoreCase); + /// Host-to-language-prefix map for domains where a language is deployed at the web root. + public IReadOnlyDictionary LanguageRootHosts { get; set; } = new Dictionary(StringComparer.OrdinalIgnoreCase); +} + +/// Loaded link-service rules and source tracking metadata. +public sealed class WebLinkDataSet +{ + /// Loaded redirect rules. + public LinkRedirectRule[] Redirects { get; set; } = Array.Empty(); + /// Loaded shortlink rules. + public LinkShortlinkRule[] Shortlinks { get; set; } = Array.Empty(); + /// Source files that were found and read. + public string[] UsedSources { get; set; } = Array.Empty(); + /// Configured source files that were missing. + public string[] MissingSources { get; set; } = Array.Empty(); + /// Host-to-language-prefix map for domains where a language is deployed at the web root. + public IReadOnlyDictionary LanguageRootHosts { get; set; } = new Dictionary(StringComparer.OrdinalIgnoreCase); +} + +/// Options for exporting link-service rules to Apache rewrite syntax. +public sealed class WebLinkApacheExportOptions +{ + /// Output file path. + public string OutputPath { get; set; } = string.Empty; + /// When true, emit explanatory comments at the top of the output. + public bool IncludeHeader { get; set; } = true; + /// When true, emit ErrorDocument 404 /404.html. + public bool IncludeErrorDocument404 { get; set; } + /// Named host aliases used for short-host path inference. + public IReadOnlyDictionary Hosts { get; set; } = new Dictionary(StringComparer.OrdinalIgnoreCase); + /// Host-to-language-prefix map for domains where a language is deployed at the web root. + public IReadOnlyDictionary LanguageRootHosts { get; set; } = new Dictionary(StringComparer.OrdinalIgnoreCase); +} + +/// Result from Apache link-service export. +public sealed class WebLinkApacheExportResult +{ + /// Resolved output path. + public string OutputPath { get; set; } = string.Empty; + /// Number of emitted rewrite rules. + public int RuleCount { get; set; } +} diff --git a/PowerForge.Web/Services/WebLinkService.cs b/PowerForge.Web/Services/WebLinkService.cs new file mode 100644 index 00000000..c3f98b96 --- /dev/null +++ b/PowerForge.Web/Services/WebLinkService.cs @@ -0,0 +1,793 @@ +using System; +using System.Collections.Generic; +using System.IO; +using System.Linq; +using System.Security.Cryptography; +using System.Text; +using System.Text.Json; +using System.Text.RegularExpressions; + +namespace PowerForge.Web; + +/// Loads, validates, and exports PowerForge.Web redirect and shortlink data. +public static partial class WebLinkService +{ + private static readonly UTF8Encoding Utf8NoBom = new(encoderShouldEmitUTF8Identifier: false); + private static readonly Regex SafeSlugRegex = new("^[a-z0-9][a-z0-9._-]*$", RegexOptions.IgnoreCase | RegexOptions.Compiled | RegexOptions.CultureInvariant); + private static readonly Regex LegacyPostIdRegex = new(@"^\s*/\?p=(\d+)\s*$", RegexOptions.IgnoreCase | RegexOptions.Compiled | RegexOptions.CultureInvariant); + private static readonly Regex LegacyPageIdRegex = new(@"^\s*/\?page_id=(\d+)\s*$", RegexOptions.IgnoreCase | RegexOptions.Compiled | RegexOptions.CultureInvariant); + + /// Loads redirect and shortlink data from configured JSON and compatibility CSV files. + public static WebLinkDataSet Load(WebLinkLoadOptions options) + { + ArgumentNullException.ThrowIfNull(options); + + var redirects = new List(); + var shortlinks = new List(); + var usedSources = new List(); + var missingSources = new List(); + + LoadRedirectJson(options.RedirectsPath, redirects, usedSources, missingSources); + LoadShortlinkJson(options.ShortlinksPath, shortlinks, usedSources, missingSources); + + foreach (var csvPath in options.RedirectCsvPaths ?? Array.Empty()) + { + if (string.IsNullOrWhiteSpace(csvPath)) + continue; + + var resolved = Path.GetFullPath(csvPath); + if (!File.Exists(resolved)) + { + missingSources.Add(resolved); + continue; + } + + usedSources.Add(resolved); + redirects.AddRange(ReadRedirectCsv(resolved, options.Hosts)); + } + + return new WebLinkDataSet + { + Redirects = redirects.ToArray(), + Shortlinks = shortlinks.ToArray(), + UsedSources = usedSources.Distinct(StringComparer.OrdinalIgnoreCase).ToArray(), + MissingSources = missingSources.Distinct(StringComparer.OrdinalIgnoreCase).ToArray(), + LanguageRootHosts = NormalizeLanguageRootHosts(options.LanguageRootHosts) + }; + } + + /// Validates redirect and shortlink rules for duplicates, unsafe targets, loops, and hygiene issues. + public static LinkValidationResult Validate(WebLinkDataSet dataSet) + { + ArgumentNullException.ThrowIfNull(dataSet); + + var issues = new List(); + var enabledRedirects = dataSet.Redirects + .Where(static redirect => redirect is not null && redirect.Enabled) + .ToArray(); + var enabledShortlinks = dataSet.Shortlinks + .Where(static shortlink => shortlink is not null && shortlink.Enabled) + .ToArray(); + + ValidateRedirects(enabledRedirects, issues, dataSet.LanguageRootHosts); + ValidateShortlinks(enabledShortlinks, issues); + ValidateRedirectGraph(enabledRedirects, issues); + + var errorCount = issues.Count(static issue => issue.Severity == LinkValidationSeverity.Error); + var warningCount = issues.Count(static issue => issue.Severity == LinkValidationSeverity.Warning); + return new LinkValidationResult + { + Issues = issues.ToArray(), + RedirectCount = enabledRedirects.Length, + ShortlinkCount = enabledShortlinks.Length, + ErrorCount = errorCount, + WarningCount = warningCount + }; + } + + private static void LoadRedirectJson(string? path, List redirects, List usedSources, List missingSources) + { + if (string.IsNullOrWhiteSpace(path)) + return; + + var resolved = Path.GetFullPath(path); + if (!File.Exists(resolved)) + { + missingSources.Add(resolved); + return; + } + + using var document = JsonDocument.Parse(File.ReadAllText(resolved), new JsonDocumentOptions + { + AllowTrailingCommas = true, + CommentHandling = JsonCommentHandling.Skip + }); + + usedSources.Add(resolved); + JsonElement source = document.RootElement; + if (source.ValueKind == JsonValueKind.Object && source.TryGetProperty("redirects", out var nested)) + source = nested; + if (source.ValueKind != JsonValueKind.Array) + return; + + var parsed = source.Deserialize(WebJson.Options) ?? Array.Empty(); + foreach (var redirect in parsed) + { + if (string.IsNullOrWhiteSpace(redirect.OriginPath)) + redirect.OriginPath = resolved; + } + redirects.AddRange(parsed); + } + + private static void LoadShortlinkJson(string? path, List shortlinks, List usedSources, List missingSources) + { + if (string.IsNullOrWhiteSpace(path)) + return; + + var resolved = Path.GetFullPath(path); + if (!File.Exists(resolved)) + { + missingSources.Add(resolved); + return; + } + + using var document = JsonDocument.Parse(File.ReadAllText(resolved), new JsonDocumentOptions + { + AllowTrailingCommas = true, + CommentHandling = JsonCommentHandling.Skip + }); + + usedSources.Add(resolved); + JsonElement source = document.RootElement; + if (source.ValueKind == JsonValueKind.Object && source.TryGetProperty("shortlinks", out var nested)) + source = nested; + if (source.ValueKind != JsonValueKind.Array) + return; + + var parsed = source.Deserialize(WebJson.Options) ?? Array.Empty(); + foreach (var shortlink in parsed) + { + if (string.IsNullOrWhiteSpace(shortlink.OriginPath)) + shortlink.OriginPath = resolved; + } + shortlinks.AddRange(parsed); + } + + private static IEnumerable ReadRedirectCsv(string csvPath, IReadOnlyDictionary? hosts) + { + var rows = new List(); + var lines = File.ReadAllLines(csvPath); + if (lines.Length <= 1) + return rows; + + var header = SplitCsvLine(lines[0]); + var legacyIndex = FindHeader(header, "legacy_url", "source", "from", "redirect_from", "redirect from"); + var targetIndex = FindHeader(header, "target_url", "target", "to", "redirect_to", "redirect to"); + var statusIndex = FindHeader(header, "status", "redirect_type", "redirect type", "status_code", "status code"); + var languageIndex = FindHeader(header, "language", "lang"); + var sourceTypeIndex = FindHeader(header, "source_type", "sourceType", "group", "redirect_from_type", "redirect from type"); + var sourceIdIndex = FindHeader(header, "source_id", "sourceId", "id"); + var notesIndex = FindHeader(header, "notes", "note"); + var regexIndex = FindHeader(header, "regex", "pattern"); + + if (legacyIndex < 0 || targetIndex < 0) + return rows; + + for (var i = 1; i < lines.Length; i++) + { + if (string.IsNullOrWhiteSpace(lines[i])) + continue; + + var parts = SplitCsvLine(lines[i]); + if (parts.Length <= legacyIndex || parts.Length <= targetIndex) + continue; + + var legacy = parts[legacyIndex].Trim(); + var target = parts[targetIndex].Trim(); + if (string.IsNullOrWhiteSpace(legacy) || string.IsNullOrWhiteSpace(target)) + continue; + + var language = ReadPart(parts, languageIndex); + var source = ParseLegacySource(legacy, language, hosts); + var status = 301; + if (statusIndex >= 0 && statusIndex < parts.Length && int.TryParse(parts[statusIndex], out var parsedStatus)) + status = parsedStatus; + var sourceType = ReadPart(parts, sourceTypeIndex); + var regex = ReadPart(parts, regexIndex); + var matchType = ParseRedirectMatchType(sourceType, regex, source.Path); + var sourcePath = matchType == LinkRedirectMatchType.Regex + ? NormalizeRedirectRegexSource(regex, legacy) + : source.Path; + + rows.Add(new LinkRedirectRule + { + Id = BuildImportedId(source, target, i), + Enabled = true, + SourceHost = source.Host, + SourcePath = sourcePath, + SourceQuery = matchType == LinkRedirectMatchType.Regex ? null : source.Query, + MatchType = matchType == LinkRedirectMatchType.Exact && !string.IsNullOrWhiteSpace(source.Query) + ? LinkRedirectMatchType.Query + : matchType, + TargetUrl = target, + Status = status, + PreserveQuery = false, + Group = sourceType, + Source = "imported-csv", + Notes = ReadPart(parts, notesIndex), + AllowExternal = IsHttpUrl(target) || target.StartsWith("/", StringComparison.Ordinal), + CreatedBy = ReadPart(parts, sourceIdIndex), + OriginPath = Path.GetFullPath(csvPath), + OriginLine = i + 1 + }); + } + + return rows; + } + + private static LinkRedirectMatchType ParseRedirectMatchType(string sourceType, string regex, string sourcePath) + { + if (!string.IsNullOrWhiteSpace(regex) || + sourceType.Equals("regex", StringComparison.OrdinalIgnoreCase)) + { + if (LooksLikeSimpleWildcard(sourcePath)) + return LinkRedirectMatchType.Prefix; + return LinkRedirectMatchType.Regex; + } + + return LooksLikeSimpleWildcard(sourcePath) + ? LinkRedirectMatchType.Prefix + : LinkRedirectMatchType.Exact; + } + + private static bool LooksLikeSimpleWildcard(string value) + => !string.IsNullOrWhiteSpace(value) && + value.EndsWith('*') && + !value.Contains(".*", StringComparison.Ordinal) && + !value.StartsWith("?", StringComparison.Ordinal); + + private static string NormalizeRedirectRegexSource(string regex, string fallback) + { + var value = string.IsNullOrWhiteSpace(regex) ? fallback : regex; + value = value.Trim(); + if (value.StartsWith("/", StringComparison.Ordinal) && value.Length > 1) + value = value.TrimStart('/'); + return value; + } + + private static LinkLegacySource ParseLegacySource(string value, string? language, IReadOnlyDictionary? hosts) + { + var trimmed = value.Trim(); + string? host = null; + string path = trimmed; + string? query = null; + + if (Uri.TryCreate(trimmed, UriKind.Absolute, out var uri) && + (uri.Scheme.Equals(Uri.UriSchemeHttp, StringComparison.OrdinalIgnoreCase) || + uri.Scheme.Equals(Uri.UriSchemeHttps, StringComparison.OrdinalIgnoreCase))) + { + host = uri.Host; + path = string.IsNullOrWhiteSpace(uri.AbsolutePath) ? "/" : uri.AbsolutePath; + query = uri.Query.TrimStart('?'); + } + else if (!string.IsNullOrWhiteSpace(language) && + hosts is not null && + hosts.TryGetValue(language, out var mappedHost) && + !string.IsNullOrWhiteSpace(mappedHost)) + { + host = mappedHost; + } + + var hashIndex = path.IndexOf('#'); + if (hashIndex >= 0) + path = path.Substring(0, hashIndex); + + var queryIndex = path.IndexOf('?'); + if (queryIndex >= 0) + { + query = path[(queryIndex + 1)..]; + path = path.Substring(0, queryIndex); + } + + if (string.IsNullOrWhiteSpace(path)) + path = "/"; + if (!path.StartsWith("/", StringComparison.Ordinal)) + path = "/" + path.TrimStart('/'); + + return new LinkLegacySource(host, NormalizeSourcePath(path), string.IsNullOrWhiteSpace(query) ? null : query); + } + + private static void ValidateRedirects( + LinkRedirectRule[] redirects, + List issues, + IReadOnlyDictionary? languageRootHosts) + { + var seen = new Dictionary(StringComparer.OrdinalIgnoreCase); + foreach (var redirect in redirects) + { + var label = string.IsNullOrWhiteSpace(redirect.Id) ? redirect.SourcePath : redirect.Id; + if (string.IsNullOrWhiteSpace(redirect.SourcePath)) + { + AddIssue(issues, LinkValidationSeverity.Error, "PFLINK.REDIRECT.SOURCE_MISSING", "Redirect source path is required.", "redirect", label); + continue; + } + + if (!IsAllowedStatus(redirect.Status)) + AddIssue(issues, LinkValidationSeverity.Error, "PFLINK.REDIRECT.STATUS", $"Redirect status {redirect.Status} is not supported.", "redirect", label); + + if (redirect.Status != 410 && string.IsNullOrWhiteSpace(redirect.TargetUrl)) + AddIssue(issues, LinkValidationSeverity.Error, "PFLINK.REDIRECT.TARGET_MISSING", "Redirect target URL is required unless status is 410.", "redirect", label); + + if (!string.IsNullOrWhiteSpace(redirect.TargetUrl)) + ValidateTarget(redirect.TargetUrl, redirect.AllowExternal, issues, "redirect", label, "PFLINK.REDIRECT"); + + if (redirect.MatchType == LinkRedirectMatchType.Regex && IsBroadRegex(redirect.SourcePath)) + AddIssue(issues, LinkValidationSeverity.Warning, "PFLINK.REDIRECT.REGEX_BROAD", "Regex redirect looks very broad and should be reviewed.", "redirect", label); + + var key = BuildRedirectKey(redirect); + if (seen.TryGetValue(key, out var existing)) + { + var existingLabel = string.IsNullOrWhiteSpace(existing.Id) ? existing.SourcePath : existing.Id; + var existingNormalizedTarget = NormalizeTargetForCompare(existing.TargetUrl, existing.SourceHost, languageRootHosts); + var normalizedTarget = NormalizeTargetForCompare(redirect.TargetUrl, redirect.SourceHost, languageRootHosts); + if (string.Equals( + existingNormalizedTarget, + normalizedTarget, + StringComparison.OrdinalIgnoreCase) && + ResolveStatus(existing.Status, 301) == ResolveStatus(redirect.Status, 301)) + { + AddRedirectIssue( + issues, + LinkValidationSeverity.Warning, + "PFLINK.REDIRECT.DUPLICATE_SAME_TARGET", + $"Duplicate redirect source repeats '{existingLabel}' for {BuildDisplaySource(redirect)} -> {redirect.TargetUrl}.", + redirect, + existing, + normalizedTarget, + existingNormalizedTarget); + } + else + { + AddRedirectIssue( + issues, + LinkValidationSeverity.Error, + "PFLINK.REDIRECT.DUPLICATE", + $"Duplicate redirect source conflicts with '{existingLabel}' for {BuildDisplaySource(redirect)}: '{existing.TargetUrl}' vs '{redirect.TargetUrl}'.", + redirect, + existing, + normalizedTarget, + existingNormalizedTarget); + } + } + else + { + seen[key] = redirect; + } + } + } + + private static void ValidateShortlinks(LinkShortlinkRule[] shortlinks, List issues) + { + var seen = new Dictionary(StringComparer.OrdinalIgnoreCase); + foreach (var shortlink in shortlinks) + { + var label = shortlink.Slug; + if (string.IsNullOrWhiteSpace(shortlink.Slug) || !SafeSlugRegex.IsMatch(shortlink.Slug.Trim())) + { + AddIssue(issues, LinkValidationSeverity.Error, "PFLINK.SHORTLINK.SLUG", "Shortlink slug must be URL-safe.", "shortlink", label); + continue; + } + + if (!IsAllowedStatus(shortlink.Status)) + AddIssue(issues, LinkValidationSeverity.Error, "PFLINK.SHORTLINK.STATUS", $"Shortlink status {shortlink.Status} is not supported.", "shortlink", label); + + if (string.IsNullOrWhiteSpace(shortlink.TargetUrl)) + AddIssue(issues, LinkValidationSeverity.Error, "PFLINK.SHORTLINK.TARGET_MISSING", "Shortlink target URL is required.", "shortlink", label); + else + ValidateTarget(AppendUtm(shortlink.TargetUrl, shortlink.Utm), shortlink.AllowExternal, issues, "shortlink", label, "PFLINK.SHORTLINK"); + + var key = $"{shortlink.Host ?? string.Empty}|{NormalizeShortlinkPath(shortlink, null)}"; + if (seen.TryGetValue(key, out var existing)) + { + AddIssue(issues, LinkValidationSeverity.Error, "PFLINK.SHORTLINK.DUPLICATE", $"Duplicate shortlink conflicts with '{existing.Slug}'.", "shortlink", label); + } + else + { + seen[key] = shortlink; + } + + if (string.IsNullOrWhiteSpace(shortlink.Owner)) + AddIssue(issues, LinkValidationSeverity.Warning, "PFLINK.SHORTLINK.OWNER", "Shortlink is missing an owner.", "shortlink", label); + } + } + + private static void ValidateRedirectGraph(LinkRedirectRule[] redirects, List issues) + { + var map = new Dictionary(StringComparer.OrdinalIgnoreCase); + foreach (var redirect in redirects) + { + if (redirect.MatchType != LinkRedirectMatchType.Exact && redirect.MatchType != LinkRedirectMatchType.Query) + continue; + if (!IsLocalPath(redirect.TargetUrl)) + continue; + + var source = NormalizeSourcePath(redirect.SourcePath); + var target = NormalizeSourcePath(redirect.TargetUrl); + if (!string.IsNullOrWhiteSpace(source) && !string.IsNullOrWhiteSpace(target)) + map[source] = target; + } + + foreach (var redirect in redirects) + { + if (redirect.MatchType != LinkRedirectMatchType.Exact && redirect.MatchType != LinkRedirectMatchType.Query) + continue; + + var current = NormalizeSourcePath(redirect.SourcePath); + var visited = new HashSet(StringComparer.OrdinalIgnoreCase); + var depth = 0; + while (map.TryGetValue(current, out var next)) + { + if (!visited.Add(current)) + { + AddRedirectIssue( + issues, + LinkValidationSeverity.Error, + "PFLINK.REDIRECT.LOOP", + $"Redirect loop detected starting at {BuildDisplaySource(redirect)}.", + redirect, + normalizedTarget: NormalizeSourcePath(redirect.TargetUrl)); + break; + } + + current = next; + depth++; + if (depth > 5) + { + AddRedirectIssue( + issues, + LinkValidationSeverity.Error, + "PFLINK.REDIRECT.CHAIN", + $"Redirect chain is longer than 5 hops starting at {BuildDisplaySource(redirect)}.", + redirect, + normalizedTarget: NormalizeSourcePath(redirect.TargetUrl)); + break; + } + } + } + } + + private static void ValidateTarget(string targetUrl, bool allowExternal, List issues, string source, string? id, string codePrefix) + { + var trimmed = targetUrl.Trim(); + if (trimmed.StartsWith("/", StringComparison.Ordinal)) + return; + + if (!Uri.TryCreate(trimmed, UriKind.Absolute, out var uri)) + { + AddIssue(issues, LinkValidationSeverity.Error, codePrefix + ".TARGET_INVALID", "Target URL must be absolute or a local root-relative path.", source, id); + return; + } + + if (!uri.Scheme.Equals(Uri.UriSchemeHttp, StringComparison.OrdinalIgnoreCase) && + !uri.Scheme.Equals(Uri.UriSchemeHttps, StringComparison.OrdinalIgnoreCase)) + { + AddIssue(issues, LinkValidationSeverity.Error, codePrefix + ".TARGET_PROTOCOL", "Target URL must use http or https.", source, id); + return; + } + + if (!allowExternal) + AddIssue(issues, LinkValidationSeverity.Error, codePrefix + ".TARGET_EXTERNAL", "External target requires allowExternal: true.", source, id); + } + + private static void AddIssue(List issues, LinkValidationSeverity severity, string code, string message, string source, string? id) + => issues.Add(new LinkValidationIssue + { + Severity = severity, + Code = code, + Message = message, + Source = source, + Id = id + }); + + private static void AddRedirectIssue( + List issues, + LinkValidationSeverity severity, + string code, + string message, + LinkRedirectRule redirect, + LinkRedirectRule? related = null, + string? normalizedTarget = null, + string? relatedNormalizedTarget = null) + => issues.Add(new LinkValidationIssue + { + Severity = severity, + Code = code, + Message = message, + Source = "redirect", + Id = string.IsNullOrWhiteSpace(redirect.Id) ? redirect.SourcePath : redirect.Id, + RelatedId = related is null ? null : (string.IsNullOrWhiteSpace(related.Id) ? related.SourcePath : related.Id), + SourceHost = redirect.SourceHost, + SourcePath = redirect.SourcePath, + SourceQuery = redirect.SourceQuery, + TargetUrl = redirect.TargetUrl, + RelatedTargetUrl = related?.TargetUrl, + NormalizedTargetUrl = normalizedTarget, + RelatedNormalizedTargetUrl = relatedNormalizedTarget, + Status = ResolveStatus(redirect.Status, 301), + RelatedStatus = related is null ? 0 : ResolveStatus(related.Status, 301), + OriginPath = redirect.OriginPath, + OriginLine = redirect.OriginLine, + RelatedOriginPath = related?.OriginPath, + RelatedOriginLine = related?.OriginLine ?? 0 + }); + + private static string BuildRedirectKey(LinkRedirectRule redirect) + => string.Join("|", + redirect.SourceHost ?? string.Empty, + redirect.MatchType.ToString(), + NormalizeSourcePath(redirect.SourcePath), + redirect.SourceQuery ?? string.Empty); + + private static bool IsAllowedStatus(int status) + => status is 301 or 302 or 307 or 308 or 410; + + private static bool IsBroadRegex(string pattern) + { + var trimmed = pattern.Trim(); + return trimmed is ".*" or "^.*" or "^.*$" or "(.*)" or "^(.*)$" || trimmed.Length < 3; + } + + private static int ResolveStatus(int status, int defaultStatus) + => status <= 0 ? defaultStatus : status; + + private static int MatchTypeOrder(LinkRedirectMatchType matchType) + => matchType switch + { + LinkRedirectMatchType.Exact => 0, + LinkRedirectMatchType.Query => 1, + LinkRedirectMatchType.Prefix => 2, + LinkRedirectMatchType.Regex => 3, + _ => 10 + }; + + private static int SourceRank(string? source) + { + var normalized = source?.Trim() ?? string.Empty; + if (normalized.Equals("manual", StringComparison.OrdinalIgnoreCase)) + return 0; + if (normalized.Equals("shortlink", StringComparison.OrdinalIgnoreCase)) + return 1; + if (normalized.StartsWith("generated", StringComparison.OrdinalIgnoreCase)) + return 2; + if (normalized.StartsWith("imported", StringComparison.OrdinalIgnoreCase)) + return 3; + return 5; + } + + private static string NormalizeSourcePath(string? source) + { + if (string.IsNullOrWhiteSpace(source)) + return string.Empty; + + var trimmed = source.Trim(); + if (Uri.TryCreate(trimmed, UriKind.Absolute, out var uri) && + (uri.Scheme.Equals(Uri.UriSchemeHttp, StringComparison.OrdinalIgnoreCase) || + uri.Scheme.Equals(Uri.UriSchemeHttps, StringComparison.OrdinalIgnoreCase))) + { + trimmed = uri.AbsolutePath; + } + + var queryIndex = trimmed.IndexOf('?'); + if (queryIndex >= 0) + trimmed = trimmed.Substring(0, queryIndex); + var hashIndex = trimmed.IndexOf('#'); + if (hashIndex >= 0) + trimmed = trimmed.Substring(0, hashIndex); + + if (!trimmed.StartsWith("/", StringComparison.Ordinal)) + trimmed = "/" + trimmed.TrimStart('/'); + return trimmed.Length > 1 ? trimmed.TrimEnd('/') : trimmed; + } + + private static string NormalizeDestination(string? destination) + { + if (string.IsNullOrWhiteSpace(destination)) + return string.Empty; + + var trimmed = destination.Trim(); + if (IsHttpUrl(trimmed)) + return trimmed; + return trimmed.StartsWith("/", StringComparison.Ordinal) ? trimmed : "/" + trimmed.TrimStart('/'); + } + + private static string NormalizeTargetForCompare( + string? targetUrl, + string? sourceHost, + IReadOnlyDictionary? languageRootHosts = null) + { + if (string.IsNullOrWhiteSpace(targetUrl)) + return string.Empty; + + var normalized = NormalizeDestination(targetUrl); + if (!IsHttpUrl(normalized)) + return NormalizeLanguageRootPath(NormalizeSourcePath(normalized), sourceHost, languageRootHosts); + + if (!Uri.TryCreate(normalized, UriKind.Absolute, out var uri)) + return normalized; + + if (!string.IsNullOrWhiteSpace(sourceHost) && + uri.Host.Equals(sourceHost.Trim(), StringComparison.OrdinalIgnoreCase)) + { + return NormalizeLanguageRootPath(NormalizeSourcePath(uri.AbsolutePath), sourceHost, languageRootHosts); + } + + return normalized.TrimEnd('/'); + } + + private static string NormalizeLanguageRootPath( + string path, + string? sourceHost, + IReadOnlyDictionary? languageRootHosts) + { + if (string.IsNullOrWhiteSpace(path) || + string.IsNullOrWhiteSpace(sourceHost) || + languageRootHosts is null || + !languageRootHosts.TryGetValue(sourceHost.Trim(), out var language) || + string.IsNullOrWhiteSpace(language)) + { + return path; + } + + return NormalizeSourcePath(StripLanguageRootPrefix(path, language)); + } + + private static string StripLanguageRootPrefix(string path, string language) + { + if (string.IsNullOrWhiteSpace(path) || string.IsNullOrWhiteSpace(language)) + return path; + + var prefix = "/" + language.Trim().Trim('/') + "/"; + if (!path.StartsWith(prefix, StringComparison.OrdinalIgnoreCase)) + return path.Equals(prefix.TrimEnd('/'), StringComparison.OrdinalIgnoreCase) ? "/" : path; + + var rebased = "/" + path[prefix.Length..].TrimStart('/'); + return string.IsNullOrWhiteSpace(rebased) ? "/" : rebased; + } + + private static IReadOnlyDictionary NormalizeLanguageRootHosts(IReadOnlyDictionary? hosts) + { + var normalized = new Dictionary(StringComparer.OrdinalIgnoreCase); + if (hosts is null) + return normalized; + + foreach (var pair in hosts) + { + if (string.IsNullOrWhiteSpace(pair.Key) || string.IsNullOrWhiteSpace(pair.Value)) + continue; + + normalized[pair.Key.Trim()] = pair.Value.Trim().Trim('/'); + } + + return normalized; + } + + private static string BuildDisplaySource(LinkRedirectRule redirect) + { + var host = string.IsNullOrWhiteSpace(redirect.SourceHost) ? "*" : redirect.SourceHost.Trim(); + var path = string.IsNullOrWhiteSpace(redirect.SourcePath) ? "/" : redirect.SourcePath.Trim(); + var query = string.IsNullOrWhiteSpace(redirect.SourceQuery) ? string.Empty : "?" + redirect.SourceQuery.Trim().TrimStart('?'); + return $"{host}{path}{query}"; + } + + private static bool IsLocalPath(string? value) + => !string.IsNullOrWhiteSpace(value) && value.Trim().StartsWith("/", StringComparison.Ordinal); + + private static bool IsHttpUrl(string? value) + => !string.IsNullOrWhiteSpace(value) && + (value.StartsWith("http://", StringComparison.OrdinalIgnoreCase) || + value.StartsWith("https://", StringComparison.OrdinalIgnoreCase)); + + private static string NormalizeShortlinkPath(LinkShortlinkRule shortlink, IReadOnlyDictionary? hosts) + { + var slug = shortlink.Slug.Trim().Trim('/'); + var prefix = shortlink.PathPrefix; + if (string.IsNullOrWhiteSpace(prefix)) + { + var shortHost = ResolveShortHost(hosts); + prefix = !string.IsNullOrWhiteSpace(shortlink.Host) && + !string.IsNullOrWhiteSpace(shortHost) && + shortlink.Host.Equals(shortHost, StringComparison.OrdinalIgnoreCase) + ? "/" + : "/go"; + } + + prefix = prefix.Trim(); + if (!prefix.StartsWith("/", StringComparison.Ordinal)) + prefix = "/" + prefix.TrimStart('/'); + prefix = prefix.TrimEnd('/'); + return string.IsNullOrWhiteSpace(prefix) ? "/" + slug : prefix + "/" + slug; + } + + private static string? ResolveShortHost(IReadOnlyDictionary? hosts) + { + if (hosts is null || hosts.Count == 0) + return null; + + return hosts.TryGetValue("short", out var shortHost) && !string.IsNullOrWhiteSpace(shortHost) + ? shortHost + : null; + } + + private static string AppendUtm(string targetUrl, string? utm) + { + if (string.IsNullOrWhiteSpace(targetUrl) || string.IsNullOrWhiteSpace(utm)) + return targetUrl; + + var separator = targetUrl.Contains('?', StringComparison.Ordinal) ? "&" : "?"; + return targetUrl + separator + utm.Trim().TrimStart('?').TrimStart('&'); + } + + private static string BuildImportedId(LinkLegacySource source, string target, int row) + { + var raw = $"{source.Host}|{source.Path}|{source.Query}|{target}|{row}"; + var hash = SHA256.HashData(Encoding.UTF8.GetBytes(raw.ToLowerInvariant())); + return "imported-" + Convert.ToHexString(hash, 0, 6).ToLowerInvariant(); + } + + private static int FindHeader(string[] header, params string[] names) + { + for (var i = 0; i < header.Length; i++) + { + foreach (var name in names) + { + if (header[i].Equals(name, StringComparison.OrdinalIgnoreCase)) + return i; + } + } + + return -1; + } + + private static string ReadPart(string[] parts, int index) + => index >= 0 && index < parts.Length ? parts[index].Trim() : string.Empty; + + private static string[] SplitCsvLine(string line) + { + if (string.IsNullOrWhiteSpace(line)) + return Array.Empty(); + + var values = new List(); + var sb = new StringBuilder(); + var inQuotes = false; + for (var i = 0; i < line.Length; i++) + { + var c = line[i]; + if (c == '"' && (i + 1 >= line.Length || line[i + 1] != '"')) + { + inQuotes = !inQuotes; + continue; + } + + if (c == '"' && i + 1 < line.Length && line[i + 1] == '"') + { + sb.Append('"'); + i++; + continue; + } + + if (c == ',' && !inQuotes) + { + values.Add(sb.ToString().Trim()); + sb.Clear(); + continue; + } + + sb.Append(c); + } + + values.Add(sb.ToString().Trim()); + return values.ToArray(); + } +} + +internal readonly record struct LinkLegacySource(string? Host, string Path, string? Query); diff --git a/Schemas/powerforge.web.pipelinespec.schema.json b/Schemas/powerforge.web.pipelinespec.schema.json index 0184f00b..47431b24 100644 --- a/Schemas/powerforge.web.pipelinespec.schema.json +++ b/Schemas/powerforge.web.pipelinespec.schema.json @@ -80,6 +80,12 @@ { "$ref": "#/$defs/ProjectCatalogStep" }, { "$ref": "#/$defs/ProjectApiDocsStep" }, { "$ref": "#/$defs/ApacheRedirectsStep" }, + { "$ref": "#/$defs/LinksValidateStep" }, + { "$ref": "#/$defs/LinksExportApacheStep" }, + { "$ref": "#/$defs/LinksImportWordPressStep" }, + { "$ref": "#/$defs/LinksReport404Step" }, + { "$ref": "#/$defs/LinksPromote404Step" }, + { "$ref": "#/$defs/LinksIgnore404Step" }, { "$ref": "#/$defs/WordPressNormalizeStep" }, { "$ref": "#/$defs/WordPressMediaSyncStep" }, { "$ref": "#/$defs/WordPressExportSnapshotStep" }, @@ -104,6 +110,20 @@ "output": { "type": "string" }, "language": { "type": "string", "description": "Optional language code to build (for example: en, pl)." }, "lang": { "type": "string", "description": "Alias for language." }, + "languages": { + "oneOf": [ + { "type": "string", "description": "Comma-separated language codes to render into one output (for example: en,fr,de,es)." }, + { "type": "array", "items": { "type": "string" } } + ], + "description": "Optional language allow-list for multi-language domain builds." + }, + "langs": { + "oneOf": [ + { "type": "string", "description": "Alias for languages." }, + { "type": "array", "items": { "type": "string" } } + ], + "description": "Alias for languages." + }, "languageAsRoot": { "type": "boolean", "description": "When true and language is set, render the selected language without language prefix (domain-style language deployment)." @@ -1734,12 +1754,335 @@ "dest": { "type": "string" }, "summaryPath": { "type": "string" }, "summary-path": { "type": "string" }, + "reportPath": { "type": "string" }, + "report-path": { "type": "string" }, + "duplicateReportPath": { "type": "string" }, + "duplicate-report-path": { "type": "string" }, "strict": { "type": "boolean" }, "includeHeader": { "type": "boolean" }, "include-header": { "type": "boolean" } }, "required": ["task"] }, + "LinksValidateStep": { + "type": "object", + "additionalProperties": false, + "properties": { + "task": { "enum": ["links-validate", "link-validate", "links"] }, + "id": { "type": "string" }, + "dependsOn": { "oneOf": [{ "type": "string" }, { "type": "array", "items": { "type": "string" } }] }, + "mode": { "type": "string" }, + "modes": { "type": "array", "items": { "type": "string" } }, + "onlyModes": { "type": "array", "items": { "type": "string" } }, + "only-modes": { "type": "array", "items": { "type": "string" } }, + "skipModes": { "type": "array", "items": { "type": "string" } }, + "skip-modes": { "type": "array", "items": { "type": "string" } }, + "config": { "type": "string" }, + "redirects": { "type": "string" }, + "redirectsPath": { "type": "string" }, + "redirects-path": { "type": "string" }, + "shortlinks": { "type": "string" }, + "shortlinksPath": { "type": "string" }, + "shortlinks-path": { "type": "string" }, + "sources": { "type": "array", "items": { "type": "string" } }, + "redirectCsvPaths": { "type": "array", "items": { "type": "string" } }, + "redirect-csv-paths": { "type": "array", "items": { "type": "string" } }, + "csvSources": { "type": "array", "items": { "type": "string" } }, + "csv-sources": { "type": "array", "items": { "type": "string" } }, + "hosts": { "type": "object", "additionalProperties": { "type": "string" } }, + "hostMap": { "type": "object", "additionalProperties": { "type": "string" } }, + "host-map": { "type": "object", "additionalProperties": { "type": "string" } }, + "languageRootHosts": { "type": "object", "additionalProperties": { "type": "string" } }, + "language-root-hosts": { "type": "object", "additionalProperties": { "type": "string" } }, + "summaryPath": { "type": "string" }, + "summary-path": { "type": "string" }, + "reportPath": { "type": "string" }, + "report-path": { "type": "string" }, + "duplicateReportPath": { "type": "string" }, + "duplicate-report-path": { "type": "string" }, + "baseline": { "type": "string" }, + "baselinePath": { "type": "string" }, + "baseline-path": { "type": "string" }, + "baselineGenerate": { "type": "boolean" }, + "baseline-generate": { "type": "boolean" }, + "baselineUpdate": { "type": "boolean" }, + "baseline-update": { "type": "boolean" }, + "strict": { "type": "boolean" }, + "failOnWarnings": { "type": "boolean" }, + "fail-on-warnings": { "type": "boolean" }, + "failOnNewWarnings": { "type": "boolean" }, + "fail-on-new-warnings": { "type": "boolean" }, + "failOnNew": { "type": "boolean" } + }, + "required": ["task"] + }, + "LinksExportApacheStep": { + "type": "object", + "additionalProperties": false, + "properties": { + "task": { "enum": ["links-export-apache", "link-export-apache", "links-export"] }, + "id": { "type": "string" }, + "dependsOn": { "oneOf": [{ "type": "string" }, { "type": "array", "items": { "type": "string" } }] }, + "mode": { "type": "string" }, + "modes": { "type": "array", "items": { "type": "string" } }, + "onlyModes": { "type": "array", "items": { "type": "string" } }, + "only-modes": { "type": "array", "items": { "type": "string" } }, + "skipModes": { "type": "array", "items": { "type": "string" } }, + "skip-modes": { "type": "array", "items": { "type": "string" } }, + "config": { "type": "string" }, + "redirects": { "type": "string" }, + "redirectsPath": { "type": "string" }, + "redirects-path": { "type": "string" }, + "shortlinks": { "type": "string" }, + "shortlinksPath": { "type": "string" }, + "shortlinks-path": { "type": "string" }, + "sources": { "type": "array", "items": { "type": "string" } }, + "redirectCsvPaths": { "type": "array", "items": { "type": "string" } }, + "redirect-csv-paths": { "type": "array", "items": { "type": "string" } }, + "csvSources": { "type": "array", "items": { "type": "string" } }, + "csv-sources": { "type": "array", "items": { "type": "string" } }, + "hosts": { "type": "object", "additionalProperties": { "type": "string" } }, + "hostMap": { "type": "object", "additionalProperties": { "type": "string" } }, + "host-map": { "type": "object", "additionalProperties": { "type": "string" } }, + "languageRootHosts": { "type": "object", "additionalProperties": { "type": "string" } }, + "language-root-hosts": { "type": "object", "additionalProperties": { "type": "string" } }, + "out": { "type": "string" }, + "output": { "type": "string" }, + "outputPath": { "type": "string" }, + "output-path": { "type": "string" }, + "apacheOut": { "type": "string" }, + "apache-out": { "type": "string" }, + "summaryPath": { "type": "string" }, + "summary-path": { "type": "string" }, + "reportPath": { "type": "string" }, + "report-path": { "type": "string" }, + "strict": { "type": "boolean" }, + "skipValidation": { "type": "boolean" }, + "skip-validation": { "type": "boolean" }, + "includeHeader": { "type": "boolean" }, + "include-header": { "type": "boolean" }, + "includeErrorDocument404": { "type": "boolean" }, + "include-error-document-404": { "type": "boolean" } + }, + "required": ["task"] + }, + "LinksImportWordPressStep": { + "type": "object", + "additionalProperties": false, + "properties": { + "task": { "enum": ["links-import-wordpress", "link-import-wordpress", "links-import-pretty-links", "links-import"] }, + "id": { "type": "string" }, + "dependsOn": { "oneOf": [{ "type": "string" }, { "type": "array", "items": { "type": "string" } }] }, + "mode": { "type": "string" }, + "modes": { "type": "array", "items": { "type": "string" } }, + "onlyModes": { "type": "array", "items": { "type": "string" } }, + "only-modes": { "type": "array", "items": { "type": "string" } }, + "skipModes": { "type": "array", "items": { "type": "string" } }, + "skip-modes": { "type": "array", "items": { "type": "string" } }, + "config": { "type": "string" }, + "source": { "type": "string" }, + "csv": { "type": "string" }, + "input": { "type": "string" }, + "in": { "type": "string" }, + "out": { "type": "string" }, + "output": { "type": "string" }, + "outputPath": { "type": "string" }, + "output-path": { "type": "string" }, + "shortlinks": { "type": "string" }, + "shortlinksPath": { "type": "string" }, + "shortlinks-path": { "type": "string" }, + "host": { "type": "string" }, + "pathPrefix": { "type": "string" }, + "path-prefix": { "type": "string" }, + "owner": { "type": "string" }, + "tags": { + "oneOf": [ + { "type": "string" }, + { "type": "array", "items": { "type": "string" } } + ] + }, + "tag": { + "oneOf": [ + { "type": "string" }, + { "type": "array", "items": { "type": "string" } } + ] + }, + "status": { "type": "integer", "enum": [301, 302, 307, 308] }, + "allowExternal": { "type": "boolean" }, + "allow-external": { "type": "boolean" }, + "merge": { "type": "boolean" }, + "mergeWithExisting": { "type": "boolean" }, + "merge-with-existing": { "type": "boolean" }, + "replaceExisting": { "type": "boolean" }, + "replace-existing": { "type": "boolean" }, + "summaryPath": { "type": "string" }, + "summary-path": { "type": "string" } + }, + "required": ["task"] + }, + "LinksReport404Step": { + "type": "object", + "additionalProperties": false, + "properties": { + "task": { "enum": ["links-report-404", "link-report-404", "links-report"] }, + "id": { "type": "string" }, + "dependsOn": { "oneOf": [{ "type": "string" }, { "type": "array", "items": { "type": "string" } }] }, + "mode": { "type": "string" }, + "modes": { "type": "array", "items": { "type": "string" } }, + "onlyModes": { "type": "array", "items": { "type": "string" } }, + "only-modes": { "type": "array", "items": { "type": "string" } }, + "skipModes": { "type": "array", "items": { "type": "string" } }, + "skip-modes": { "type": "array", "items": { "type": "string" } }, + "siteRoot": { "type": "string" }, + "site-root": { "type": "string" }, + "outRoot": { "type": "string" }, + "out-root": { "type": "string" }, + "source": { "type": "string" }, + "log": { "type": "string" }, + "input": { "type": "string" }, + "in": { "type": "string" }, + "out": { "type": "string" }, + "output": { "type": "string" }, + "reportPath": { "type": "string" }, + "report-path": { "type": "string" }, + "reviewCsv": { "type": "string" }, + "review-csv": { "type": "string" }, + "reviewCsvPath": { "type": "string" }, + "review-csv-path": { "type": "string" }, + "csvReport": { "type": "string" }, + "csv-report": { "type": "string" }, + "ignored404": { "type": "string" }, + "ignored-404": { "type": "string" }, + "ignored404Path": { "type": "string" }, + "ignored-404-path": { "type": "string" }, + "maxSuggestions": { "type": "integer", "minimum": 1 }, + "max-suggestions": { "type": "integer", "minimum": 1 }, + "minimumScore": { "type": "number", "minimum": 0, "maximum": 1 }, + "minimum-score": { "type": "number", "minimum": 0, "maximum": 1 }, + "minScore": { "type": "number", "minimum": 0, "maximum": 1 }, + "min-score": { "type": "number", "minimum": 0, "maximum": 1 }, + "includeAsset404s": { "type": "boolean" }, + "include-asset-404s": { "type": "boolean" }, + "includeAssets": { "type": "boolean" }, + "include-assets": { "type": "boolean" }, + "allowMissingSource": { "type": "boolean" }, + "allow-missing-source": { "type": "boolean" } + }, + "required": ["task"] + }, + "LinksPromote404Step": { + "type": "object", + "additionalProperties": false, + "properties": { + "task": { "enum": ["links-promote-404", "link-promote-404", "links-promote"] }, + "id": { "type": "string" }, + "dependsOn": { "oneOf": [{ "type": "string" }, { "type": "array", "items": { "type": "string" } }] }, + "mode": { "type": "string" }, + "modes": { "type": "array", "items": { "type": "string" } }, + "onlyModes": { "type": "array", "items": { "type": "string" } }, + "only-modes": { "type": "array", "items": { "type": "string" } }, + "skipModes": { "type": "array", "items": { "type": "string" } }, + "skip-modes": { "type": "array", "items": { "type": "string" } }, + "config": { "type": "string" }, + "source": { "type": "string" }, + "report": { "type": "string" }, + "input": { "type": "string" }, + "in": { "type": "string" }, + "out": { "type": "string" }, + "output": { "type": "string" }, + "outputPath": { "type": "string" }, + "output-path": { "type": "string" }, + "redirects": { "type": "string" }, + "redirectsPath": { "type": "string" }, + "redirects-path": { "type": "string" }, + "enabled": { "type": "boolean" }, + "enable": { "type": "boolean" }, + "minimumScore": { "type": "number", "minimum": 0, "maximum": 1 }, + "minimum-score": { "type": "number", "minimum": 0, "maximum": 1 }, + "minScore": { "type": "number", "minimum": 0, "maximum": 1 }, + "min-score": { "type": "number", "minimum": 0, "maximum": 1 }, + "minimumCount": { "type": "integer", "minimum": 1 }, + "minimum-count": { "type": "integer", "minimum": 1 }, + "minCount": { "type": "integer", "minimum": 1 }, + "min-count": { "type": "integer", "minimum": 1 }, + "status": { "type": "integer", "enum": [301, 302, 307, 308] }, + "group": { "type": "string" }, + "merge": { "type": "boolean" }, + "mergeWithExisting": { "type": "boolean" }, + "merge-with-existing": { "type": "boolean" }, + "replaceExisting": { "type": "boolean" }, + "replace-existing": { "type": "boolean" }, + "reviewCsv": { "type": "string" }, + "review-csv": { "type": "string" }, + "reviewCsvPath": { "type": "string" }, + "review-csv-path": { "type": "string" }, + "csvReport": { "type": "string" }, + "csv-report": { "type": "string" }, + "summaryPath": { "type": "string" }, + "summary-path": { "type": "string" } + }, + "required": ["task"] + }, + "LinksIgnore404Step": { + "type": "object", + "additionalProperties": false, + "properties": { + "task": { "enum": ["links-ignore-404", "link-ignore-404", "links-ignore"] }, + "id": { "type": "string" }, + "dependsOn": { "oneOf": [{ "type": "string" }, { "type": "array", "items": { "type": "string" } }] }, + "mode": { "type": "string" }, + "modes": { "type": "array", "items": { "type": "string" } }, + "onlyModes": { "type": "array", "items": { "type": "string" } }, + "only-modes": { "type": "array", "items": { "type": "string" } }, + "skipModes": { "type": "array", "items": { "type": "string" } }, + "skip-modes": { "type": "array", "items": { "type": "string" } }, + "config": { "type": "string" }, + "source": { "type": "string" }, + "report": { "type": "string" }, + "input": { "type": "string" }, + "in": { "type": "string" }, + "out": { "type": "string" }, + "output": { "type": "string" }, + "outputPath": { "type": "string" }, + "output-path": { "type": "string" }, + "ignored404": { "type": "string" }, + "ignored-404": { "type": "string" }, + "ignored404Path": { "type": "string" }, + "ignored-404-path": { "type": "string" }, + "paths": { + "oneOf": [ + { "type": "string" }, + { "type": "array", "items": { "type": "string" } } + ] + }, + "path": { + "oneOf": [ + { "type": "string" }, + { "type": "array", "items": { "type": "string" } } + ] + }, + "all": { "type": "boolean" }, + "withoutSuggestions": { "type": "boolean" }, + "without-suggestions": { "type": "boolean" }, + "reason": { "type": "string" }, + "createdBy": { "type": "string" }, + "created-by": { "type": "string" }, + "merge": { "type": "boolean" }, + "mergeWithExisting": { "type": "boolean" }, + "merge-with-existing": { "type": "boolean" }, + "replaceExisting": { "type": "boolean" }, + "replace-existing": { "type": "boolean" }, + "reviewCsv": { "type": "string" }, + "review-csv": { "type": "string" }, + "reviewCsvPath": { "type": "string" }, + "review-csv-path": { "type": "string" }, + "csvReport": { "type": "string" }, + "csv-report": { "type": "string" }, + "summaryPath": { "type": "string" }, + "summary-path": { "type": "string" } + }, + "required": ["task"] + }, "ProjectApiDocsStep": { "type": "object", "additionalProperties": false, diff --git a/Schemas/powerforge.web.sitespec.schema.json b/Schemas/powerforge.web.sitespec.schema.json index 7f34196b..781c58fe 100644 --- a/Schemas/powerforge.web.sitespec.schema.json +++ b/Schemas/powerforge.web.sitespec.schema.json @@ -69,6 +69,8 @@ "type": "array", "items": { "$ref": "#/$defs/RedirectSpec" } }, + "Links": { "$ref": "#/$defs/LinkServiceSpec" }, + "links": { "$ref": "#/$defs/LinkServiceSpec" }, "EnableLegacyAmpRedirects": { "type": "boolean" }, "enableLegacyAmpRedirects": { "type": "boolean" }, "AssetRegistry": { "$ref": "#/$defs/AssetRegistrySpec" }, @@ -221,6 +223,49 @@ }, "required": ["From", "To"] }, + "LinkServiceSpec": { + "type": "object", + "additionalProperties": false, + "properties": { + "Redirects": { "type": "string" }, + "redirects": { "type": "string" }, + "Shortlinks": { "type": "string" }, + "shortlinks": { "type": "string" }, + "Ignored404": { "type": "string" }, + "ignored404": { "type": "string" }, + "Groups": { "type": "string" }, + "groups": { "type": "string" }, + "RedirectCsvPaths": { "type": "array", "items": { "type": "string" } }, + "redirectCsvPaths": { "type": "array", "items": { "type": "string" } }, + "redirect-csv-paths": { "type": "array", "items": { "type": "string" } }, + "ApacheOut": { "type": "string" }, + "apacheOut": { "type": "string" }, + "apache-out": { "type": "string" }, + "Hosts": { + "type": "object", + "additionalProperties": { "type": "string" } + }, + "hosts": { + "type": "object", + "additionalProperties": { "type": "string" } + }, + "LanguageRootHosts": { + "type": "object", + "additionalProperties": { "type": "string" }, + "description": "Host-to-language-prefix map for domains where a language is deployed at the web root." + }, + "languageRootHosts": { + "type": "object", + "additionalProperties": { "type": "string" }, + "description": "Alias for LanguageRootHosts." + }, + "language-root-hosts": { + "type": "object", + "additionalProperties": { "type": "string" }, + "description": "Alias for LanguageRootHosts." + } + } + }, "SourceRepoSpec": { "type": "object", "additionalProperties": false, @@ -336,7 +381,10 @@ "Replace": { "type": "string" }, "MatchType": { "type": "string", "enum": ["contains", "prefix", "exact", "regex"] }, "Source": { "type": "string" }, - "Destination": { "type": "string" } + "SourceUrl": { "type": "string" }, + "Destination": { "type": "string" }, + "DownloadDependencies": { "type": "boolean" }, + "UserAgent": { "type": "string" } }, "required": ["Match", "Replace"] }, @@ -494,8 +542,17 @@ "Rel": { "type": "string" }, "Href": { "type": "string" }, "Type": { "type": "string" }, + "As": { "type": "string" }, "Sizes": { "type": "string" }, - "Crossorigin": { "type": "string" } + "Crossorigin": { "type": "string" }, + "Attributes": { + "type": "object", + "additionalProperties": { "type": "string" } + }, + "BooleanAttributes": { + "type": "array", + "items": { "type": "string" } + } }, "required": ["Rel", "Href"] }, From 2d8e33e940323b43e76efc34ded7676682fd43cd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Przemys=C5=82aw=20K=C5=82ys?= Date: Sat, 18 Apr 2026 21:05:46 +0200 Subject: [PATCH 02/15] Address link service review feedback --- PowerForge.Tests/WebCliLinksTests.cs | 27 +++++++ PowerForge.Tests/WebLinkServiceTests.cs | 71 ++++++++++++++++++- .../WebCliCommandHandlers.Links.Support.cs | 2 + PowerForge.Web.Cli/WebLinkCommandSupport.cs | 5 ++ .../Services/WebLinkService.Import.cs | 8 ++- .../Services/WebLinkService.Report404.cs | 3 +- PowerForge.Web/Services/WebLinkService.cs | 33 ++++++++- .../powerforge.web.pipelinespec.schema.json | 2 + 8 files changed, 142 insertions(+), 9 deletions(-) diff --git a/PowerForge.Tests/WebCliLinksTests.cs b/PowerForge.Tests/WebCliLinksTests.cs index 43b0ee98..c484eea6 100644 --- a/PowerForge.Tests/WebCliLinksTests.cs +++ b/PowerForge.Tests/WebCliLinksTests.cs @@ -83,6 +83,33 @@ public void HandleSubCommand_LinksExportApache_WritesConfiguredOutput() } } + [Fact] + public void HandleSubCommand_LinksValidate_AcceptsCamelCaseDirectSourceFlags() + { + var root = Path.Combine(Path.GetTempPath(), "pf-web-cli-links-direct-" + Guid.NewGuid().ToString("N")); + Directory.CreateDirectory(root); + + try + { + _ = WriteSiteFixture(root, duplicateRedirects: false); + var redirectsPath = Path.Combine(root, "data", "links", "redirects.json"); + var shortlinksPath = Path.Combine(root, "data", "links", "shortlinks.json"); + + var exitCode = WebCliCommandHandlers.HandleSubCommand( + "links", + new[] { "validate", "--redirectsPath", redirectsPath, "--shortlinksPath", shortlinksPath }, + outputJson: true, + logger: new WebConsoleLogger(), + outputSchemaVersion: CliEnvelopeSchemaVersion); + + Assert.Equal(0, exitCode); + } + finally + { + TryDeleteDirectory(root); + } + } + [Fact] public void HandleSubCommand_LinksImportWordPress_ImportsPrettyLinksCsv() { diff --git a/PowerForge.Tests/WebLinkServiceTests.cs b/PowerForge.Tests/WebLinkServiceTests.cs index 01ed1679..0cbda3b1 100644 --- a/PowerForge.Tests/WebLinkServiceTests.cs +++ b/PowerForge.Tests/WebLinkServiceTests.cs @@ -40,6 +40,70 @@ public void Validate_DetectsDuplicateRedirectsAndExternalTargets() Assert.Contains(result.Issues, issue => issue.Code == "PFLINK.REDIRECT.TARGET_EXTERNAL"); } + [Fact] + public void ValidateRedirectGraph_KeepsHostScopedChainsSeparate() + { + var dataSet = new WebLinkDataSet + { + Redirects = new[] + { + new LinkRedirectRule + { + Id = "en-old", + SourceHost = "evotec.xyz", + SourcePath = "/old", + TargetUrl = "/new", + Status = 301 + }, + new LinkRedirectRule + { + Id = "pl-new", + SourceHost = "evotec.pl", + SourcePath = "/new", + TargetUrl = "/old", + Status = 301 + } + } + }; + + var result = WebLinkService.Validate(dataSet); + + Assert.DoesNotContain(result.Issues, issue => issue.Code == "PFLINK.REDIRECT.LOOP"); + Assert.DoesNotContain(result.Issues, issue => issue.Code == "PFLINK.REDIRECT.CHAIN"); + } + + [Fact] + public void ValidateRedirectGraph_DoesNotTreatQueryOrSlashCanonicalRulesAsLoops() + { + var dataSet = new WebLinkDataSet + { + Redirects = new[] + { + new LinkRedirectRule + { + Id = "post-id", + SourcePath = "/", + SourceQuery = "p=123", + MatchType = LinkRedirectMatchType.Query, + TargetUrl = "/blog/current/", + Status = 301 + }, + new LinkRedirectRule + { + Id = "root-slash", + SourcePath = "/blog/current", + TargetUrl = "/blog/current/", + Status = 301 + } + } + }; + + var result = WebLinkService.Validate(dataSet); + + Assert.DoesNotContain(result.Issues, issue => issue.Code == "PFLINK.REDIRECT.LOOP"); + Assert.DoesNotContain(result.Issues, issue => issue.Code == "PFLINK.REDIRECT.CHAIN"); + } + [Fact] public void ExportApache_EmitsHostScopedRedirectsAndShortlinks() { @@ -125,6 +189,7 @@ public void ImportPrettyLinks_MergesExistingShortlinksAndPreservesImportedHits() id,name,slug,url,clicks 7,Discord,discord,https://discord.gg/example,42 8,Docs,/go/docs,https://docs.example.test,12 + 9,Google,google,https://google.example.test,3 """); File.WriteAllText(outPath, """ @@ -151,8 +216,8 @@ public void ImportPrettyLinks_MergesExistingShortlinksAndPreservesImportedHits() }); Assert.Equal(1, result.ExistingCount); - Assert.Equal(2, result.ImportedCount); - Assert.Equal(3, result.WrittenCount); + Assert.Equal(3, result.ImportedCount); + Assert.Equal(4, result.WrittenCount); var loaded = WebLinkService.Load(new WebLinkLoadOptions { @@ -164,6 +229,8 @@ public void ImportPrettyLinks_MergesExistingShortlinksAndPreservesImportedHits() Assert.Equal(42, discord.ImportedHits); Assert.Equal("imported-pretty-links", discord.Source); Assert.Contains("imported", discord.Tags); + Assert.Contains(loaded.Shortlinks, item => item.Slug == "google" && item.PathPrefix == "/go"); + Assert.DoesNotContain(loaded.Shortlinks, item => item.Slug == "ogle"); } finally { diff --git a/PowerForge.Web.Cli/WebCliCommandHandlers.Links.Support.cs b/PowerForge.Web.Cli/WebCliCommandHandlers.Links.Support.cs index 12d0a4ca..5b9b1bb8 100644 --- a/PowerForge.Web.Cli/WebCliCommandHandlers.Links.Support.cs +++ b/PowerForge.Web.Cli/WebCliCommandHandlers.Links.Support.cs @@ -188,8 +188,10 @@ private static void AddMapEntries(Dictionary target, IEnumerable private static bool HasDirectLinkSources(string[] args) => !string.IsNullOrWhiteSpace(TryGetOptionValue(args, "--redirects")) || !string.IsNullOrWhiteSpace(TryGetOptionValue(args, "--redirects-path")) || + !string.IsNullOrWhiteSpace(TryGetOptionValue(args, "--redirectsPath")) || !string.IsNullOrWhiteSpace(TryGetOptionValue(args, "--shortlinks")) || !string.IsNullOrWhiteSpace(TryGetOptionValue(args, "--shortlinks-path")) || + !string.IsNullOrWhiteSpace(TryGetOptionValue(args, "--shortlinksPath")) || ReadOptionList(args, "--source", "--sources", "--redirect-csv", "--redirect-csv-path", "--redirect-csv-paths", "--csv-source", "--csv-sources").Count > 0; private static string? ResolvePathForLinks(string baseDir, string? directValue, string? configValue) diff --git a/PowerForge.Web.Cli/WebLinkCommandSupport.cs b/PowerForge.Web.Cli/WebLinkCommandSupport.cs index 026a2e1c..3341f32c 100644 --- a/PowerForge.Web.Cli/WebLinkCommandSupport.cs +++ b/PowerForge.Web.Cli/WebLinkCommandSupport.cs @@ -333,11 +333,16 @@ private static void EnsureDirectory(string path) private static string EscapeCsv(string? value) { var text = value ?? string.Empty; + if (text.Length > 0 && IsCsvFormulaPrefix(text[0])) + text = "'" + text; if (text.Contains('"', StringComparison.Ordinal)) text = text.Replace("\"", "\"\"", StringComparison.Ordinal); return text.IndexOfAny(new[] { ',', '"', '\r', '\n' }) >= 0 ? "\"" + text + "\"" : text; } + private static bool IsCsvFormulaPrefix(char value) + => value is '=' or '+' or '-' or '@'; + private static readonly JsonSerializerOptions LinksSummaryJsonOptions = new() { PropertyNamingPolicy = JsonNamingPolicy.CamelCase, diff --git a/PowerForge.Web/Services/WebLinkService.Import.cs b/PowerForge.Web/Services/WebLinkService.Import.cs index 50e337bd..b7f926a0 100644 --- a/PowerForge.Web/Services/WebLinkService.Import.cs +++ b/PowerForge.Web/Services/WebLinkService.Import.cs @@ -209,10 +209,12 @@ private static (string Slug, string? PathPrefix) ParseImportedShortlinkPath(stri var configuredPrefix = string.IsNullOrWhiteSpace(configuredPathPrefix) ? null : "/" + configuredPathPrefix.Trim().Trim('/'); - if (!string.IsNullOrWhiteSpace(configuredPrefix) && - path.StartsWith(configuredPrefix.Trim('/'), StringComparison.OrdinalIgnoreCase)) + var configuredPrefixSegment = configuredPrefix?.Trim('/'); + if (!string.IsNullOrWhiteSpace(configuredPrefixSegment) && + (string.Equals(path, configuredPrefixSegment, StringComparison.OrdinalIgnoreCase) || + path.StartsWith(configuredPrefixSegment + "/", StringComparison.OrdinalIgnoreCase))) { - path = path[configuredPrefix.Trim('/').Length..].Trim('/'); + path = path[configuredPrefixSegment.Length..].Trim('/'); } var parts = path.Split(new[] { '/' }, StringSplitOptions.RemoveEmptyEntries); diff --git a/PowerForge.Web/Services/WebLinkService.Report404.cs b/PowerForge.Web/Services/WebLinkService.Report404.cs index 722d8056..09650512 100644 --- a/PowerForge.Web/Services/WebLinkService.Report404.cs +++ b/PowerForge.Web/Services/WebLinkService.Report404.cs @@ -9,7 +9,8 @@ namespace PowerForge.Web; public static partial class WebLinkService { - private static readonly Regex ApacheRequestRegex = new("\"[A-Z]+\\s+([^\\s\\\"]+)\\s+HTTP/[^\\\"]+\"\\s+(\\d{3})", RegexOptions.Compiled | RegexOptions.CultureInvariant); + private static readonly TimeSpan LinkRegexTimeout = TimeSpan.FromSeconds(2); + private static readonly Regex ApacheRequestRegex = new("\"[A-Z]+\\s+([^\\s\\\"]+)\\s+HTTP/[^\\\"]+\"\\s+(\\d{3})", RegexOptions.Compiled | RegexOptions.CultureInvariant, LinkRegexTimeout); /// Creates a reviewable 404 suggestion report from logs or observation CSVs. public static WebLink404ReportResult Generate404Report(WebLink404ReportOptions options) diff --git a/PowerForge.Web/Services/WebLinkService.cs b/PowerForge.Web/Services/WebLinkService.cs index c3f98b96..600b924b 100644 --- a/PowerForge.Web/Services/WebLinkService.cs +++ b/PowerForge.Web/Services/WebLinkService.cs @@ -413,8 +413,10 @@ private static void ValidateRedirectGraph(LinkRedirectRule[] redirects, List(StringComparer.OrdinalIgnoreCase); var depth = 0; - while (map.TryGetValue(current, out var next)) + while (TryGetRedirectGraphTarget(map, host, current, currentQuery, out var next)) { - if (!visited.Add(current)) + if (!visited.Add(BuildRedirectGraphKey(host, current, currentQuery))) { AddRedirectIssue( issues, @@ -440,6 +444,7 @@ private static void ValidateRedirectGraph(LinkRedirectRule[] redirects, List 5) { @@ -456,6 +461,22 @@ private static void ValidateRedirectGraph(LinkRedirectRule[] redirects, List map, + string host, + string path, + string? query, + out string target) + { + if (!string.IsNullOrWhiteSpace(host) && + map.TryGetValue(BuildRedirectGraphKey(host, path, query), out target!)) + { + return true; + } + + return map.TryGetValue(BuildRedirectGraphKey(null, path, query), out target!); + } + private static void ValidateTarget(string targetUrl, bool allowExternal, List issues, string source, string? id, string codePrefix) { var trimmed = targetUrl.Trim(); @@ -528,6 +549,12 @@ private static string BuildRedirectKey(LinkRedirectRule redirect) NormalizeSourcePath(redirect.SourcePath), redirect.SourceQuery ?? string.Empty); + private static string BuildRedirectGraphKey(string? host, string path, string? query) + => string.Join("|", NormalizeRedirectGraphHost(host), NormalizeSourcePath(path), query?.Trim().ToLowerInvariant() ?? string.Empty); + + private static string NormalizeRedirectGraphHost(string? host) + => string.IsNullOrWhiteSpace(host) ? string.Empty : host.Trim().ToLowerInvariant(); + private static bool IsAllowedStatus(int status) => status is 301 or 302 or 307 or 308 or 410; diff --git a/Schemas/powerforge.web.pipelinespec.schema.json b/Schemas/powerforge.web.pipelinespec.schema.json index 47431b24..f5284e9f 100644 --- a/Schemas/powerforge.web.pipelinespec.schema.json +++ b/Schemas/powerforge.web.pipelinespec.schema.json @@ -1856,6 +1856,8 @@ "summary-path": { "type": "string" }, "reportPath": { "type": "string" }, "report-path": { "type": "string" }, + "duplicateReportPath": { "type": "string" }, + "duplicate-report-path": { "type": "string" }, "strict": { "type": "boolean" }, "skipValidation": { "type": "boolean" }, "skip-validation": { "type": "boolean" }, From 6b1449f3b1b3f874ab1d725e92bf28dc5246ece2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Przemys=C5=82aw=20K=C5=82ys?= Date: Sat, 18 Apr 2026 21:25:13 +0200 Subject: [PATCH 03/15] Address follow-up link service review feedback --- PowerForge.Tests/WebCliLinksTests.cs | 60 +++++++++++++++++++ PowerForge.Tests/WebLinkServiceTests.cs | 58 ++++++++++++++++++ .../WebPipelineRunnerLinksTests.cs | 2 +- PowerForge.Web.Cli/WebLinkCommandSupport.cs | 2 + .../WebPipelineRunner.Tasks.Links.cs | 37 ++++++++---- .../Services/WebLinkService.Import.cs | 11 +++- 6 files changed, 155 insertions(+), 15 deletions(-) diff --git a/PowerForge.Tests/WebCliLinksTests.cs b/PowerForge.Tests/WebCliLinksTests.cs index c484eea6..12bf6243 100644 --- a/PowerForge.Tests/WebCliLinksTests.cs +++ b/PowerForge.Tests/WebCliLinksTests.cs @@ -1,5 +1,6 @@ using System; using System.IO; +using PowerForge.Web; using PowerForge.Web.Cli; using Xunit; @@ -83,6 +84,49 @@ public void HandleSubCommand_LinksExportApache_WritesConfiguredOutput() } } + [Fact] + public void EvaluateBaseline_TreatsWarningIdsAsDistinctKeys() + { + var root = Path.Combine(Path.GetTempPath(), "pf-web-cli-links-baseline-" + Guid.NewGuid().ToString("N")); + Directory.CreateDirectory(root); + + try + { + var baselinePath = Path.Combine(root, ".powerforge", "link-baseline.json"); + Directory.CreateDirectory(Path.GetDirectoryName(baselinePath)!); + var existingIssue = BuildMissingOwnerIssue("existing"); + var newIssue = BuildMissingOwnerIssue("new"); + File.WriteAllText(baselinePath, + $$""" + { + "version": 1, + "warningKeys": [ + {{JsonString(WebLinkCommandSupport.BuildIssueKey(existingIssue))}} + ] + } + """); + + var state = WebLinkCommandSupport.EvaluateBaseline( + root, + baselinePath, + new LinkValidationResult + { + Issues = new[] { existingIssue, newIssue }, + WarningCount = 2 + }, + baselineGenerate: false, + baselineUpdate: false, + failOnNewWarnings: true); + + var warning = Assert.Single(state.NewWarnings); + Assert.Equal("new", warning.Id); + } + finally + { + TryDeleteDirectory(root); + } + } + [Fact] public void HandleSubCommand_LinksValidate_AcceptsCamelCaseDirectSourceFlags() { @@ -110,6 +154,22 @@ public void HandleSubCommand_LinksValidate_AcceptsCamelCaseDirectSourceFlags() } } + private static LinkValidationIssue BuildMissingOwnerIssue(string id) + => new() + { + Severity = LinkValidationSeverity.Warning, + Code = "PFLINK.SHORTLINK.OWNER", + Source = "shortlink", + Id = id, + SourceHost = "evo.yt", + SourcePath = "/go", + Status = 302, + TargetUrl = "https://example.test" + }; + + private static string JsonString(string value) + => System.Text.Json.JsonSerializer.Serialize(value); + [Fact] public void HandleSubCommand_LinksImportWordPress_ImportsPrettyLinksCsv() { diff --git a/PowerForge.Tests/WebLinkServiceTests.cs b/PowerForge.Tests/WebLinkServiceTests.cs index 0cbda3b1..f9fb458e 100644 --- a/PowerForge.Tests/WebLinkServiceTests.cs +++ b/PowerForge.Tests/WebLinkServiceTests.cs @@ -238,6 +238,64 @@ public void ImportPrettyLinks_MergesExistingShortlinksAndPreservesImportedHits() } } + [Fact] + public void ImportPrettyLinks_NormalizesPrefixWhenMergingExistingShortlinks() + { + var root = Path.Combine(Path.GetTempPath(), "pf-web-links-import-prefix-" + Guid.NewGuid().ToString("N")); + Directory.CreateDirectory(root); + + try + { + var csvPath = Path.Combine(root, "pretty-links.csv"); + var outPath = Path.Combine(root, "shortlinks.json"); + File.WriteAllText(csvPath, + """ + id,name,slug,url,clicks + 8,Docs,/go/docs,https://docs-new.example.test,12 + """); + File.WriteAllText(outPath, + """ + { + "shortlinks": [ + { + "host": "evo.yt", + "pathPrefix": "/go/", + "slug": "docs", + "targetUrl": "https://docs.example.test", + "owner": "evotec", + "allowExternal": true + } + ] + } + """); + + var result = WebLinkService.ImportPrettyLinks(new WebLinkShortlinkImportOptions + { + SourcePath = csvPath, + OutputPath = outPath, + Host = "evo.yt", + PathPrefix = "go", + Owner = "evotec" + }); + + Assert.Equal(1, result.ExistingCount); + Assert.Equal(1, result.ImportedCount); + Assert.Equal(1, result.WrittenCount); + Assert.Equal(1, result.SkippedDuplicateCount); + + var loaded = WebLinkService.Load(new WebLinkLoadOptions + { + ShortlinksPath = outPath + }); + var docs = Assert.Single(loaded.Shortlinks, item => item.Slug == "docs"); + Assert.Equal("https://docs.example.test", docs.TargetUrl); + } + finally + { + TryDeleteDirectory(root); + } + } + [Fact] public void Generate404Report_SuggestsGeneratedRoutesFromApacheLog() { diff --git a/PowerForge.Tests/WebPipelineRunnerLinksTests.cs b/PowerForge.Tests/WebPipelineRunnerLinksTests.cs index 71d618f8..97e0ca1b 100644 --- a/PowerForge.Tests/WebPipelineRunnerLinksTests.cs +++ b/PowerForge.Tests/WebPipelineRunnerLinksTests.cs @@ -552,7 +552,7 @@ public void RunPipeline_LinksIgnore404_WritesIgnoredRules() "task": "links-ignore-404", "source": "./404-suggestions.json", "out": "./data/links/ignored-404.json", - "paths": ["/wp-login.php"], + "path": "/wp-login.php", "reason": "scanner noise", "reviewCsv": "./Build/ignored-404.csv", "summaryPath": "./Build/ignore-404-summary.json" diff --git a/PowerForge.Web.Cli/WebLinkCommandSupport.cs b/PowerForge.Web.Cli/WebLinkCommandSupport.cs index 3341f32c..97840951 100644 --- a/PowerForge.Web.Cli/WebLinkCommandSupport.cs +++ b/PowerForge.Web.Cli/WebLinkCommandSupport.cs @@ -298,11 +298,13 @@ internal static string BuildIssueKey(LinkValidationIssue issue) => string.Join("|", issue.Code ?? string.Empty, issue.Source ?? string.Empty, + issue.Id ?? string.Empty, issue.SourceHost ?? string.Empty, issue.SourcePath ?? string.Empty, issue.SourceQuery ?? string.Empty, issue.Status.ToString(CultureInfo.InvariantCulture), issue.NormalizedTargetUrl ?? issue.TargetUrl ?? string.Empty, + issue.RelatedId ?? string.Empty, issue.RelatedStatus.ToString(CultureInfo.InvariantCulture), issue.RelatedNormalizedTargetUrl ?? issue.RelatedTargetUrl ?? string.Empty); diff --git a/PowerForge.Web.Cli/WebPipelineRunner.Tasks.Links.cs b/PowerForge.Web.Cli/WebPipelineRunner.Tasks.Links.cs index b1b7d318..83cf1690 100644 --- a/PowerForge.Web.Cli/WebPipelineRunner.Tasks.Links.cs +++ b/PowerForge.Web.Cli/WebPipelineRunner.Tasks.Links.cs @@ -2,7 +2,6 @@ using System.Collections.Generic; using System.IO; using System.Linq; -using System.Text.Json.Serialization; using System.Text.Json; using PowerForge.Web; @@ -291,7 +290,7 @@ private static void ExecuteLinksIgnore404(JsonElement step, string baseDir, WebP GetString(step, "csvReport") ?? GetString(step, "csv-report")); - var paths = GetArrayOfStrings(step, "paths") ?? GetArrayOfStrings(step, "path") ?? Array.Empty(); + var paths = GetStringOrArrayOfStrings(step, "paths", "path"); var includeAll = GetBool(step, "all") ?? false; var onlyWithoutSuggestions = GetBool(step, "withoutSuggestions") ?? GetBool(step, "without-suggestions") ?? false; if (paths.Length == 0 && !includeAll && !onlyWithoutSuggestions) @@ -457,7 +456,7 @@ private static void WriteLinksImportSummary(string? summaryPath, WebLinkShortlin if (!string.IsNullOrWhiteSpace(summaryDirectory)) Directory.CreateDirectory(summaryDirectory); - File.WriteAllText(summaryPath, JsonSerializer.Serialize(result, LinksImportJsonOptions)); + File.WriteAllText(summaryPath, JsonSerializer.Serialize(result, LinksSummaryJsonContext.WebLinkShortlinkImportResult)); } private static void WriteLinks404Report(string reportPath, WebLink404ReportResult result) @@ -466,7 +465,7 @@ private static void WriteLinks404Report(string reportPath, WebLink404ReportResul if (!string.IsNullOrWhiteSpace(directory)) Directory.CreateDirectory(directory); - File.WriteAllText(reportPath, JsonSerializer.Serialize(result, LinksImportJsonOptions)); + File.WriteAllText(reportPath, JsonSerializer.Serialize(result, LinksSummaryJsonContext.WebLink404ReportResult)); } private static void WriteLinksPromoteSummary(string? summaryPath, WebLink404PromoteResult result) @@ -478,7 +477,7 @@ private static void WriteLinksPromoteSummary(string? summaryPath, WebLink404Prom if (!string.IsNullOrWhiteSpace(summaryDirectory)) Directory.CreateDirectory(summaryDirectory); - File.WriteAllText(summaryPath, JsonSerializer.Serialize(result, LinksImportJsonOptions)); + File.WriteAllText(summaryPath, JsonSerializer.Serialize(result, LinksSummaryJsonContext.WebLink404PromoteResult)); } private static void WriteLinksIgnoreSummary(string? summaryPath, WebLink404IgnoreResult result) @@ -490,15 +489,27 @@ private static void WriteLinksIgnoreSummary(string? summaryPath, WebLink404Ignor if (!string.IsNullOrWhiteSpace(summaryDirectory)) Directory.CreateDirectory(summaryDirectory); - File.WriteAllText(summaryPath, JsonSerializer.Serialize(result, LinksImportJsonOptions)); + File.WriteAllText(summaryPath, JsonSerializer.Serialize(result, LinksSummaryJsonContext.WebLink404IgnoreResult)); } - private static readonly JsonSerializerOptions LinksImportJsonOptions = new() + private static string[] GetStringOrArrayOfStrings(JsonElement step, params string[] names) { - PropertyNamingPolicy = JsonNamingPolicy.CamelCase, - PropertyNameCaseInsensitive = true, - DefaultIgnoreCondition = JsonIgnoreCondition.WhenWritingNull, - WriteIndented = true, - Converters = { new JsonStringEnumConverter(JsonNamingPolicy.CamelCase) } - }; + foreach (var name in names) + { + var array = GetArrayOfStrings(step, name); + if (array is { Length: > 0 }) + return array; + + var value = GetString(step, name); + if (!string.IsNullOrWhiteSpace(value)) + return new[] { value }; + } + + return Array.Empty(); + } + + private static readonly PowerForgeWebCliJsonContext LinksSummaryJsonContext = new(new JsonSerializerOptions(WebCliJson.Options) + { + WriteIndented = true + }); } diff --git a/PowerForge.Web/Services/WebLinkService.Import.cs b/PowerForge.Web/Services/WebLinkService.Import.cs index b7f926a0..701363ce 100644 --- a/PowerForge.Web/Services/WebLinkService.Import.cs +++ b/PowerForge.Web/Services/WebLinkService.Import.cs @@ -276,9 +276,18 @@ private static int ParseRedirectStatus(string value, int defaultStatus) private static string BuildShortlinkImportKey(LinkShortlinkRule shortlink) => string.Join("|", shortlink.Host ?? string.Empty, - shortlink.PathPrefix ?? string.Empty, + NormalizeShortlinkImportPrefix(shortlink.PathPrefix), shortlink.Slug ?? string.Empty); + private static string NormalizeShortlinkImportPrefix(string? pathPrefix) + { + if (string.IsNullOrWhiteSpace(pathPrefix)) + return string.Empty; + + var trimmed = pathPrefix.Trim().Trim('/'); + return string.IsNullOrWhiteSpace(trimmed) ? string.Empty : "/" + trimmed; + } + private static string SlugifyShortlink(string value) { if (string.IsNullOrWhiteSpace(value)) From 2f785b1c5e3bc3e981d24c2682a53344463acf7a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Przemys=C5=82aw=20K=C5=82ys?= Date: Sat, 18 Apr 2026 21:35:47 +0200 Subject: [PATCH 04/15] Handle link pipeline review edge cases --- PowerForge.Tests/WebLinkServiceTests.cs | 75 +++++++++++++++++++ .../WebPipelineRunnerLinksTests.cs | 34 +++++++++ .../WebPipelineRunner.Tasks.Links.cs | 23 ++++-- .../Services/WebLinkService.Types.cs | 2 + PowerForge.Web/Services/WebLinkService.cs | 27 ++++++- 5 files changed, 151 insertions(+), 10 deletions(-) diff --git a/PowerForge.Tests/WebLinkServiceTests.cs b/PowerForge.Tests/WebLinkServiceTests.cs index f9fb458e..59c6a567 100644 --- a/PowerForge.Tests/WebLinkServiceTests.cs +++ b/PowerForge.Tests/WebLinkServiceTests.cs @@ -1,4 +1,5 @@ using System; +using System.Collections.Generic; using System.IO; using System.Linq; using System.Text.Json; @@ -40,6 +41,43 @@ public void Validate_DetectsDuplicateRedirectsAndExternalTargets() Assert.Contains(result.Issues, issue => issue.Code == "PFLINK.REDIRECT.TARGET_EXTERNAL"); } + [Fact] + public void Validate_DetectsShortHostDuplicateRoutes() + { + var dataSet = new WebLinkDataSet + { + Hosts = new Dictionary(StringComparer.OrdinalIgnoreCase) + { + ["short"] = "evo.yt" + }, + Shortlinks = new[] + { + new LinkShortlinkRule + { + Slug = "docs", + Host = "evo.yt", + TargetUrl = "https://docs.example.test", + Owner = "evotec", + AllowExternal = true + }, + new LinkShortlinkRule + { + Slug = "docs", + Host = "evo.yt", + PathPrefix = "/", + TargetUrl = "https://docs-alt.example.test", + Owner = "evotec", + AllowExternal = true + } + } + }; + + var result = WebLinkService.Validate(dataSet); + + Assert.False(result.Success); + Assert.Contains(result.Issues, issue => issue.Code == "PFLINK.SHORTLINK.DUPLICATE"); + } + [Fact] public void ValidateRedirectGraph_KeepsHostScopedChainsSeparate() { @@ -174,6 +212,43 @@ public void ExportApache_EmitsHostScopedRedirectsAndShortlinks() } } + [Fact] + public void ExportApache_EscapesExactSourceRegexCharacters() + { + var root = Path.Combine(Path.GetTempPath(), "pf-web-links-export-escape-" + Guid.NewGuid().ToString("N")); + Directory.CreateDirectory(root); + + try + { + var outPath = Path.Combine(root, "links.conf"); + var dataSet = new WebLinkDataSet + { + Redirects = new[] + { + new LinkRedirectRule + { + Id = "dotted", + SourcePath = "/foo.bar/", + TargetUrl = "/new/", + Status = 301 + } + } + }; + + WebLinkService.ExportApache(dataSet, new WebLinkApacheExportOptions + { + OutputPath = outPath + }); + + var apache = File.ReadAllText(outPath); + Assert.Contains(@"RewriteRule ^foo\.bar/?$ /new/ [R=301,L,QSD]", apache, StringComparison.Ordinal); + } + finally + { + TryDeleteDirectory(root); + } + } + [Fact] public void ImportPrettyLinks_MergesExistingShortlinksAndPreservesImportedHits() { diff --git a/PowerForge.Tests/WebPipelineRunnerLinksTests.cs b/PowerForge.Tests/WebPipelineRunnerLinksTests.cs index 97e0ca1b..7d9cda09 100644 --- a/PowerForge.Tests/WebPipelineRunnerLinksTests.cs +++ b/PowerForge.Tests/WebPipelineRunnerLinksTests.cs @@ -96,6 +96,40 @@ public void RunPipeline_LinksExportApache_UsesSiteLinksConfig() } } + [Fact] + public void RunPipeline_LinksValidate_FailsWhenConfigPathIsMissing() + { + var root = Path.Combine(Path.GetTempPath(), "pf-web-pipeline-links-missing-config-" + Guid.NewGuid().ToString("N")); + Directory.CreateDirectory(root); + + try + { + var pipelinePath = Path.Combine(root, "pipeline.json"); + File.WriteAllText(pipelinePath, + """ + { + "steps": [ + { + "task": "links-validate", + "config": "./missing-site.json" + } + ] + } + """); + + var result = WebPipelineRunner.RunPipeline(pipelinePath, logger: null); + + Assert.False(result.Success); + Assert.Single(result.Steps); + Assert.False(result.Steps[0].Success); + Assert.Contains("links config file not found", result.Steps[0].Message, StringComparison.OrdinalIgnoreCase); + } + finally + { + TryDeleteDirectory(root); + } + } + [Fact] public void RunPipeline_LinksValidate_FailsOnUnsafeExternalTarget() { diff --git a/PowerForge.Web.Cli/WebPipelineRunner.Tasks.Links.cs b/PowerForge.Web.Cli/WebPipelineRunner.Tasks.Links.cs index 83cf1690..c91d73df 100644 --- a/PowerForge.Web.Cli/WebPipelineRunner.Tasks.Links.cs +++ b/PowerForge.Web.Cli/WebPipelineRunner.Tasks.Links.cs @@ -21,7 +21,8 @@ private static void ExecuteLinksValidate(JsonElement step, string baseDir, WebPi var baselineUpdate = GetBool(step, "baselineUpdate") ?? GetBool(step, "baseline-update") ?? false; var baselinePath = GetString(step, "baselinePath") ?? GetString(step, "baseline-path") ?? GetString(step, "baseline"); - var linkOptions = BuildLinkLoadOptions(step, baseDir); + var loaded = LoadLinksSpec(step, baseDir); + var linkOptions = BuildLinkLoadOptions(step, baseDir, loaded); var dataSet = WebLinkService.Load(linkOptions); if (strict && dataSet.UsedSources.Length == 0) throw new InvalidOperationException("links-validate strict mode failed: no link source files were found."); @@ -55,7 +56,8 @@ private static void ExecuteLinksExportApache(JsonElement step, string baseDir, W var includeHeader = GetBool(step, "includeHeader") ?? GetBool(step, "include-header") ?? true; var include404 = GetBool(step, "includeErrorDocument404") ?? GetBool(step, "include-error-document-404") ?? false; - var linkOptions = BuildLinkLoadOptions(step, baseDir); + var loaded = LoadLinksSpec(step, baseDir); + var linkOptions = BuildLinkLoadOptions(step, baseDir, loaded); var dataSet = WebLinkService.Load(linkOptions); if (strict && dataSet.UsedSources.Length == 0) throw new InvalidOperationException("links-export-apache strict mode failed: no link source files were found."); @@ -78,7 +80,7 @@ private static void ExecuteLinksExportApache(JsonElement step, string baseDir, W GetString(step, "output-path") ?? GetString(step, "apacheOut") ?? GetString(step, "apache-out")) ?? - ResolvePath(baseDir, LoadLinksSpec(step, baseDir).Spec?.ApacheOut) ?? + ResolvePath(loaded.BaseDir ?? baseDir, loaded.Spec?.ApacheOut) ?? Path.GetFullPath(Path.Combine(baseDir, "deploy", "apache", "link-service-redirects.conf")); var export = WebLinkService.ExportApache(dataSet, new WebLinkApacheExportOptions @@ -317,9 +319,12 @@ private static void ExecuteLinksIgnore404(JsonElement step, string baseDir, WebP stepResult.Message = $"links-ignore-404 ok: candidates={result.CandidateCount}; written={result.WrittenCount}; skippedDuplicates={result.SkippedDuplicateCount}"; } - private static WebLinkLoadOptions BuildLinkLoadOptions(JsonElement step, string baseDir) + private static WebLinkLoadOptions BuildLinkLoadOptions( + JsonElement step, + string baseDir, + (LinkServiceSpec? Spec, string? BaseDir)? loadedSpec = null) { - var loaded = LoadLinksSpec(step, baseDir); + var loaded = loadedSpec ?? LoadLinksSpec(step, baseDir); var links = loaded.Spec; var linkBaseDir = loaded.BaseDir ?? baseDir; @@ -373,10 +378,14 @@ private static WebLinkLoadOptions BuildLinkLoadOptions(JsonElement step, string private static (LinkServiceSpec? Spec, string? BaseDir) LoadLinksSpec(JsonElement step, string baseDir) { - var configPath = ResolvePath(baseDir, GetString(step, "config")); - if (string.IsNullOrWhiteSpace(configPath) || !File.Exists(configPath)) + var configValue = GetString(step, "config"); + var configPath = ResolvePath(baseDir, configValue); + if (string.IsNullOrWhiteSpace(configPath)) return (null, null); + if (!File.Exists(configPath)) + throw new InvalidOperationException($"links config file not found: {configPath}"); + var (siteSpec, siteSpecPath) = WebSiteSpecLoader.LoadWithPath(configPath, WebCliJson.Options); return (siteSpec.Links, Path.GetDirectoryName(siteSpecPath) ?? baseDir); } diff --git a/PowerForge.Web/Services/WebLinkService.Types.cs b/PowerForge.Web/Services/WebLinkService.Types.cs index 7259032c..6c4ba60b 100644 --- a/PowerForge.Web/Services/WebLinkService.Types.cs +++ b/PowerForge.Web/Services/WebLinkService.Types.cs @@ -29,6 +29,8 @@ public sealed class WebLinkDataSet public string[] UsedSources { get; set; } = Array.Empty(); /// Configured source files that were missing. public string[] MissingSources { get; set; } = Array.Empty(); + /// Named host aliases, for example en, pl, or short. + public IReadOnlyDictionary Hosts { get; set; } = new Dictionary(StringComparer.OrdinalIgnoreCase); /// Host-to-language-prefix map for domains where a language is deployed at the web root. public IReadOnlyDictionary LanguageRootHosts { get; set; } = new Dictionary(StringComparer.OrdinalIgnoreCase); } diff --git a/PowerForge.Web/Services/WebLinkService.cs b/PowerForge.Web/Services/WebLinkService.cs index 600b924b..f1296978 100644 --- a/PowerForge.Web/Services/WebLinkService.cs +++ b/PowerForge.Web/Services/WebLinkService.cs @@ -52,6 +52,7 @@ public static WebLinkDataSet Load(WebLinkLoadOptions options) Shortlinks = shortlinks.ToArray(), UsedSources = usedSources.Distinct(StringComparer.OrdinalIgnoreCase).ToArray(), MissingSources = missingSources.Distinct(StringComparer.OrdinalIgnoreCase).ToArray(), + Hosts = NormalizeHostMap(options.Hosts), LanguageRootHosts = NormalizeLanguageRootHosts(options.LanguageRootHosts) }; } @@ -70,7 +71,7 @@ public static LinkValidationResult Validate(WebLinkDataSet dataSet) .ToArray(); ValidateRedirects(enabledRedirects, issues, dataSet.LanguageRootHosts); - ValidateShortlinks(enabledShortlinks, issues); + ValidateShortlinks(enabledShortlinks, issues, dataSet.Hosts); ValidateRedirectGraph(enabledRedirects, issues); var errorCount = issues.Count(static issue => issue.Severity == LinkValidationSeverity.Error); @@ -366,7 +367,10 @@ private static void ValidateRedirects( } } - private static void ValidateShortlinks(LinkShortlinkRule[] shortlinks, List issues) + private static void ValidateShortlinks( + LinkShortlinkRule[] shortlinks, + List issues, + IReadOnlyDictionary? hosts) { var seen = new Dictionary(StringComparer.OrdinalIgnoreCase); foreach (var shortlink in shortlinks) @@ -386,7 +390,7 @@ private static void ValidateShortlinks(LinkShortlinkRule[] shortlinks, List NormalizeHostMap(IReadOnlyDictionary? hosts) + { + var normalized = new Dictionary(StringComparer.OrdinalIgnoreCase); + if (hosts is null) + return normalized; + + foreach (var pair in hosts) + { + if (string.IsNullOrWhiteSpace(pair.Key) || string.IsNullOrWhiteSpace(pair.Value)) + continue; + + normalized[pair.Key.Trim()] = pair.Value.Trim(); + } + + return normalized; + } + private static IReadOnlyDictionary NormalizeLanguageRootHosts(IReadOnlyDictionary? hosts) { var normalized = new Dictionary(StringComparer.OrdinalIgnoreCase); From 10ae5381383110a9df51132db6c878040bea2423 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Przemys=C5=82aw=20K=C5=82ys?= Date: Sat, 18 Apr 2026 21:46:22 +0200 Subject: [PATCH 05/15] Add link apply-review pipeline support --- .../WebPipelineRunnerLinksTests.cs | 75 ++++++++++++++ .../PowerForgeWebCliJsonContext.cs | 1 + .../WebCliCommandHandlers.Links.Support.cs | 12 --- .../WebCliCommandHandlers.Links.cs | 2 +- PowerForge.Web.Cli/WebLinkCommandSupport.cs | 97 ++++++++++++++----- .../WebPipelineRunner.Tasks.Links.cs | 76 +++++++++++++++ PowerForge.Web.Cli/WebPipelineRunner.Tasks.cs | 5 + .../powerforge.web.pipelinespec.schema.json | 52 ++++++++++ 8 files changed, 281 insertions(+), 39 deletions(-) diff --git a/PowerForge.Tests/WebPipelineRunnerLinksTests.cs b/PowerForge.Tests/WebPipelineRunnerLinksTests.cs index 7d9cda09..b4d185d3 100644 --- a/PowerForge.Tests/WebPipelineRunnerLinksTests.cs +++ b/PowerForge.Tests/WebPipelineRunnerLinksTests.cs @@ -614,6 +614,81 @@ public void RunPipeline_LinksIgnore404_WritesIgnoredRules() } } + [Fact] + public void RunPipeline_LinksApplyReview_AppliesCandidateFiles() + { + var root = Path.Combine(Path.GetTempPath(), "pf-web-pipeline-links-apply-review-" + Guid.NewGuid().ToString("N")); + Directory.CreateDirectory(root); + + try + { + var reportsPath = Path.Combine(root, "Build", "link-reports"); + Directory.CreateDirectory(reportsPath); + Directory.CreateDirectory(Path.Combine(root, "data", "links")); + File.WriteAllText(Path.Combine(reportsPath, "404-promoted-candidates.json"), + """ + { + "redirects": [ + { + "id": "reviewed", + "sourcePath": "/docs/instal", + "targetUrl": "/docs/install/", + "enabled": false, + "source": "404-promoted" + } + ] + } + """); + File.WriteAllText(Path.Combine(reportsPath, "ignored-404-candidates.json"), + """ + { + "ignored404": [ + { + "path": "/wp-login.php", + "reason": "scanner noise" + } + ] + } + """); + File.WriteAllText(Path.Combine(root, "data", "links", "ignored-404.json"), "{ \"ignored404\": [] }"); + + var pipelinePath = Path.Combine(root, "pipeline.json"); + File.WriteAllText(pipelinePath, + """ + { + "steps": [ + { + "task": "links-apply-review", + "all": true, + "enableRedirects": true, + "redirects": "./data/links/redirects.json", + "ignored404": "./data/links/ignored-404.json", + "summaryPath": "./Build/link-reports/apply-summary.json" + } + ] + } + """); + + var result = WebPipelineRunner.RunPipeline(pipelinePath, logger: null); + + Assert.True(result.Success); + Assert.Contains("links-apply-review ok", result.Steps[0].Message, StringComparison.OrdinalIgnoreCase); + var redirectsJson = File.ReadAllText(Path.Combine(root, "data", "links", "redirects.json")); + Assert.Contains("\"sourcePath\": \"/docs/instal\"", redirectsJson, StringComparison.Ordinal); + Assert.Contains("\"enabled\": true", redirectsJson, StringComparison.Ordinal); + var ignoredJson = File.ReadAllText(Path.Combine(root, "data", "links", "ignored-404.json")); + Assert.Contains("\"path\": \"/wp-login.php\"", ignoredJson, StringComparison.Ordinal); + + using var summary = JsonDocument.Parse(File.ReadAllText(Path.Combine(reportsPath, "apply-summary.json"))); + Assert.Equal(1, summary.RootElement.GetProperty("redirects").GetProperty("candidateCount").GetInt32()); + Assert.Equal(1, summary.RootElement.GetProperty("ignored404").GetProperty("candidateCount").GetInt32()); + } + finally + { + TryDeleteDirectory(root); + } + } + private static void TryDeleteDirectory(string path) { try diff --git a/PowerForge.Web.Cli/PowerForgeWebCliJsonContext.cs b/PowerForge.Web.Cli/PowerForgeWebCliJsonContext.cs index c7fd48f7..1b1a708d 100644 --- a/PowerForge.Web.Cli/PowerForgeWebCliJsonContext.cs +++ b/PowerForge.Web.Cli/PowerForgeWebCliJsonContext.cs @@ -58,6 +58,7 @@ namespace PowerForge.Web.Cli; [JsonSerializable(typeof(WebLink404IgnoreResult))] [JsonSerializable(typeof(WebLink404ReviewResult))] [JsonSerializable(typeof(WebLinkReviewApplyResult))] +[JsonSerializable(typeof(WebLinkCommandSummary))] internal partial class PowerForgeWebCliJsonContext : JsonSerializerContext { } diff --git a/PowerForge.Web.Cli/WebCliCommandHandlers.Links.Support.cs b/PowerForge.Web.Cli/WebCliCommandHandlers.Links.Support.cs index 5b9b1bb8..a867ea8c 100644 --- a/PowerForge.Web.Cli/WebCliCommandHandlers.Links.Support.cs +++ b/PowerForge.Web.Cli/WebCliCommandHandlers.Links.Support.cs @@ -10,18 +10,6 @@ namespace PowerForge.Web.Cli; internal static partial class WebCliCommandHandlers { - private static void WriteLinksApplyReviewSummary(string? summaryPath, WebLinkReviewApplyResult result) - { - if (string.IsNullOrWhiteSpace(summaryPath)) - return; - - var summaryDirectory = Path.GetDirectoryName(summaryPath); - if (!string.IsNullOrWhiteSpace(summaryDirectory)) - Directory.CreateDirectory(summaryDirectory); - - File.WriteAllText(summaryPath, JsonSerializer.Serialize(result, WebCliJson.Context.WebLinkReviewApplyResult)); - } - private static void WriteLinksReview404Summary(string? summaryPath, WebLink404ReviewResult result) { if (string.IsNullOrWhiteSpace(summaryPath)) diff --git a/PowerForge.Web.Cli/WebCliCommandHandlers.Links.cs b/PowerForge.Web.Cli/WebCliCommandHandlers.Links.cs index fa2cb1b0..7c2c0dbd 100644 --- a/PowerForge.Web.Cli/WebCliCommandHandlers.Links.cs +++ b/PowerForge.Web.Cli/WebCliCommandHandlers.Links.cs @@ -547,7 +547,7 @@ private static int HandleLinksApplyReview(string[] args, bool outputJson, WebCon var summaryPath = ResolveOptionalPath(baseDir, TryGetOptionValue(args, "--summary-path") ?? TryGetOptionValue(args, "--summaryPath")); - WriteLinksApplyReviewSummary(summaryPath, result); + WebLinkCommandSupport.WriteLinksApplyReviewSummary(summaryPath, result); if (outputJson) { diff --git a/PowerForge.Web.Cli/WebLinkCommandSupport.cs b/PowerForge.Web.Cli/WebLinkCommandSupport.cs index 97840951..f9ba95e9 100644 --- a/PowerForge.Web.Cli/WebLinkCommandSupport.cs +++ b/PowerForge.Web.Cli/WebLinkCommandSupport.cs @@ -27,35 +27,47 @@ internal static void WriteSummary( if (!string.IsNullOrWhiteSpace(summaryDirectory)) Directory.CreateDirectory(summaryDirectory); - var summary = new + var summary = new WebLinkCommandSummary { - generatedOn = DateTimeOffset.UtcNow.ToString("O"), - action, - redirects = validation.RedirectCount, - shortlinks = validation.ShortlinkCount, - errors = validation.ErrorCount, - warnings = validation.WarningCount, - duplicateWarnings = validation.Issues.Count(static issue => issue.Code == "PFLINK.REDIRECT.DUPLICATE_SAME_TARGET"), - duplicateErrors = validation.Issues.Count(static issue => issue.Code == "PFLINK.REDIRECT.DUPLICATE"), - success = taskSuccess, - validationSuccess = validation.Success, - usedSourceCount = dataSet.UsedSources.Length, - usedSources = dataSet.UsedSources, - missingSourceCount = dataSet.MissingSources.Length, - missingSources = dataSet.MissingSources, - baselinePath = baseline?.Path, - baselineLoaded = baseline?.Loaded, - baselineWarningCount = baseline?.KeyCount, - baselineGenerated = baseline?.Generated, - baselineUpdated = baseline?.Updated, - baselineWrittenPath = baseline?.WrittenPath, - newWarningCount = baseline?.NewWarnings.Length, - newWarnings = baseline?.NewWarnings, - issues = validation.Issues, - export + GeneratedOn = DateTimeOffset.UtcNow.ToString("O"), + Action = action, + Redirects = validation.RedirectCount, + Shortlinks = validation.ShortlinkCount, + Errors = validation.ErrorCount, + Warnings = validation.WarningCount, + DuplicateWarnings = validation.Issues.Count(static issue => issue.Code == "PFLINK.REDIRECT.DUPLICATE_SAME_TARGET"), + DuplicateErrors = validation.Issues.Count(static issue => issue.Code == "PFLINK.REDIRECT.DUPLICATE"), + Success = taskSuccess, + ValidationSuccess = validation.Success, + UsedSourceCount = dataSet.UsedSources.Length, + UsedSources = dataSet.UsedSources, + MissingSourceCount = dataSet.MissingSources.Length, + MissingSources = dataSet.MissingSources, + BaselinePath = baseline?.Path, + BaselineLoaded = baseline?.Loaded, + BaselineWarningCount = baseline?.KeyCount, + BaselineGenerated = baseline?.Generated, + BaselineUpdated = baseline?.Updated, + BaselineWrittenPath = baseline?.WrittenPath, + NewWarningCount = baseline?.NewWarnings.Length, + NewWarnings = baseline?.NewWarnings, + Issues = validation.Issues, + Export = export }; - File.WriteAllText(summaryPath, JsonSerializer.Serialize(summary, LinksSummaryJsonOptions)); + File.WriteAllText(summaryPath, JsonSerializer.Serialize(summary, LinksSummaryJsonContext.WebLinkCommandSummary)); + } + + internal static void WriteLinksApplyReviewSummary(string? summaryPath, WebLinkReviewApplyResult result) + { + if (string.IsNullOrWhiteSpace(summaryPath)) + return; + + var summaryDirectory = Path.GetDirectoryName(summaryPath); + if (!string.IsNullOrWhiteSpace(summaryDirectory)) + Directory.CreateDirectory(summaryDirectory); + + File.WriteAllText(summaryPath, JsonSerializer.Serialize(result, WebCliJson.Context.WebLinkReviewApplyResult)); } internal static void WriteIssueReport(string? reportPath, LinkValidationResult validation) @@ -353,6 +365,39 @@ private static bool IsCsvFormulaPrefix(char value) WriteIndented = true, Converters = { new JsonStringEnumConverter(JsonNamingPolicy.CamelCase) } }; + + private static readonly PowerForgeWebCliJsonContext LinksSummaryJsonContext = new(new JsonSerializerOptions(WebCliJson.Options) + { + WriteIndented = true + }); +} + +internal sealed class WebLinkCommandSummary +{ + public string GeneratedOn { get; set; } = string.Empty; + public string Action { get; set; } = string.Empty; + public int Redirects { get; set; } + public int Shortlinks { get; set; } + public int Errors { get; set; } + public int Warnings { get; set; } + public int DuplicateWarnings { get; set; } + public int DuplicateErrors { get; set; } + public bool Success { get; set; } + public bool ValidationSuccess { get; set; } + public int UsedSourceCount { get; set; } + public string[] UsedSources { get; set; } = Array.Empty(); + public int MissingSourceCount { get; set; } + public string[] MissingSources { get; set; } = Array.Empty(); + public string? BaselinePath { get; set; } + public bool? BaselineLoaded { get; set; } + public int? BaselineWarningCount { get; set; } + public bool? BaselineGenerated { get; set; } + public bool? BaselineUpdated { get; set; } + public string? BaselineWrittenPath { get; set; } + public int? NewWarningCount { get; set; } + public LinkValidationIssue[]? NewWarnings { get; set; } + public LinkValidationIssue[] Issues { get; set; } = Array.Empty(); + public WebLinkApacheExportResult? Export { get; set; } } internal sealed class WebLinkBaselineState diff --git a/PowerForge.Web.Cli/WebPipelineRunner.Tasks.Links.cs b/PowerForge.Web.Cli/WebPipelineRunner.Tasks.Links.cs index c91d73df..66f76640 100644 --- a/PowerForge.Web.Cli/WebPipelineRunner.Tasks.Links.cs +++ b/PowerForge.Web.Cli/WebPipelineRunner.Tasks.Links.cs @@ -319,6 +319,82 @@ private static void ExecuteLinksIgnore404(JsonElement step, string baseDir, WebP stepResult.Message = $"links-ignore-404 ok: candidates={result.CandidateCount}; written={result.WrittenCount}; skippedDuplicates={result.SkippedDuplicateCount}"; } + private static void ExecuteLinksApplyReview(JsonElement step, string baseDir, WebPipelineStepResult stepResult) + { + var loaded = LoadLinksSpec(step, baseDir); + var links = loaded.Spec; + var linkBaseDir = loaded.BaseDir ?? baseDir; + var applyAll = GetBool(step, "all") ?? false; + var applyRedirects = applyAll || + GetBool(step, "applyRedirects") == true || + GetBool(step, "apply-redirects") == true || + GetBool(step, "redirectCandidatesOnly") == true || + GetBool(step, "redirect-candidates-only") == true; + var applyIgnored404 = applyAll || + GetBool(step, "applyIgnored404") == true || + GetBool(step, "apply-ignored-404") == true || + GetBool(step, "ignored404CandidatesOnly") == true || + GetBool(step, "ignored-404-candidates-only") == true; + if (!applyRedirects && !applyIgnored404) + throw new InvalidOperationException("links-apply-review requires applyRedirects, applyIgnored404, or all:true."); + + var redirectCandidatesPath = ResolvePath(baseDir, + GetString(step, "redirectCandidates") ?? + GetString(step, "redirect-candidates") ?? + GetString(step, "redirectCandidatesPath") ?? + GetString(step, "redirect-candidates-path")) ?? + Path.GetFullPath(Path.Combine(baseDir, "Build", "link-reports", "404-promoted-candidates.json")); + var redirectsPath = ResolvePathForLinks(baseDir, linkBaseDir, + GetString(step, "redirects") ?? + GetString(step, "redirectsPath") ?? + GetString(step, "redirects-path"), + links?.Redirects); + if (applyRedirects && string.IsNullOrWhiteSpace(redirectsPath)) + throw new InvalidOperationException("links-apply-review requires redirects or links.redirects config."); + + var ignored404CandidatesPath = ResolvePath(baseDir, + GetString(step, "ignored404Candidates") ?? + GetString(step, "ignored-404-candidates") ?? + GetString(step, "ignored404CandidatesPath") ?? + GetString(step, "ignored-404-candidates-path")) ?? + Path.GetFullPath(Path.Combine(baseDir, "Build", "link-reports", "ignored-404-candidates.json")); + var ignored404Path = ResolvePathForLinks(baseDir, linkBaseDir, + GetString(step, "ignored404") ?? + GetString(step, "ignored-404") ?? + GetString(step, "ignored404Path") ?? + GetString(step, "ignored-404-path"), + links?.Ignored404); + if (applyIgnored404 && string.IsNullOrWhiteSpace(ignored404Path)) + throw new InvalidOperationException("links-apply-review requires ignored404 or links.ignored404 config."); + + var result = WebLinkService.ApplyReviewCandidates(new WebLinkReviewApplyOptions + { + ApplyRedirects = applyRedirects, + ApplyIgnored404 = applyIgnored404, + RedirectCandidatesPath = redirectCandidatesPath, + RedirectsPath = redirectsPath, + Ignored404CandidatesPath = ignored404CandidatesPath, + Ignored404Path = ignored404Path, + ReplaceExisting = GetBool(step, "replaceExisting") ?? GetBool(step, "replace-existing") ?? false, + EnableRedirects = GetBool(step, "enableRedirects") ?? GetBool(step, "enable-redirects") ?? false, + DryRun = GetBool(step, "dryRun") ?? GetBool(step, "dry-run") ?? GetBool(step, "whatIf") ?? GetBool(step, "what-if") ?? false + }); + + var summaryPath = ResolvePath(baseDir, GetString(step, "summaryPath") ?? GetString(step, "summary-path")); + WebLinkCommandSupport.WriteLinksApplyReviewSummary(summaryPath, result); + + var parts = new List(); + if (result.Redirects is not null) + parts.Add($"redirects={result.Redirects.CandidateCount}; redirectWritten={result.Redirects.WrittenCount}; redirectSkipped={result.Redirects.SkippedDuplicateCount}"); + if (result.Ignored404 is not null) + parts.Add($"ignored404={result.Ignored404.CandidateCount}; ignoredWritten={result.Ignored404.WrittenCount}; ignoredSkipped={result.Ignored404.SkippedDuplicateCount}"); + + stepResult.Success = true; + stepResult.Message = parts.Count == 0 + ? "links-apply-review ok" + : $"links-apply-review ok: {string.Join("; ", parts)}"; + } + private static WebLinkLoadOptions BuildLinkLoadOptions( JsonElement step, string baseDir, diff --git a/PowerForge.Web.Cli/WebPipelineRunner.Tasks.cs b/PowerForge.Web.Cli/WebPipelineRunner.Tasks.cs index 378d8146..c552b887 100644 --- a/PowerForge.Web.Cli/WebPipelineRunner.Tasks.cs +++ b/PowerForge.Web.Cli/WebPipelineRunner.Tasks.cs @@ -184,6 +184,11 @@ private static void ExecuteTask( case "links-ignore": ExecuteLinksIgnore404(step, baseDir, stepResult); break; + case "links-apply-review": + case "link-apply-review": + case "links-apply": + ExecuteLinksApplyReview(step, baseDir, stepResult); + break; case "wordpress-normalize": case "wordpress-normalize-content": case "normalize-wordpress-content": diff --git a/Schemas/powerforge.web.pipelinespec.schema.json b/Schemas/powerforge.web.pipelinespec.schema.json index f5284e9f..f51ced5a 100644 --- a/Schemas/powerforge.web.pipelinespec.schema.json +++ b/Schemas/powerforge.web.pipelinespec.schema.json @@ -86,6 +86,7 @@ { "$ref": "#/$defs/LinksReport404Step" }, { "$ref": "#/$defs/LinksPromote404Step" }, { "$ref": "#/$defs/LinksIgnore404Step" }, + { "$ref": "#/$defs/LinksApplyReviewStep" }, { "$ref": "#/$defs/WordPressNormalizeStep" }, { "$ref": "#/$defs/WordPressMediaSyncStep" }, { "$ref": "#/$defs/WordPressExportSnapshotStep" }, @@ -2085,6 +2086,57 @@ }, "required": ["task"] }, + "LinksApplyReviewStep": { + "type": "object", + "additionalProperties": false, + "properties": { + "task": { "enum": ["links-apply-review", "link-apply-review", "links-apply"] }, + "id": { "type": "string" }, + "dependsOn": { "oneOf": [{ "type": "string" }, { "type": "array", "items": { "type": "string" } }] }, + "mode": { "type": "string" }, + "modes": { "type": "array", "items": { "type": "string" } }, + "onlyModes": { "type": "array", "items": { "type": "string" } }, + "only-modes": { "type": "array", "items": { "type": "string" } }, + "skipModes": { "type": "array", "items": { "type": "string" } }, + "skip-modes": { "type": "array", "items": { "type": "string" } }, + "config": { "type": "string" }, + "all": { "type": "boolean" }, + "applyRedirects": { "type": "boolean" }, + "apply-redirects": { "type": "boolean" }, + "redirectCandidatesOnly": { "type": "boolean" }, + "redirect-candidates-only": { "type": "boolean" }, + "applyIgnored404": { "type": "boolean" }, + "apply-ignored-404": { "type": "boolean" }, + "ignored404CandidatesOnly": { "type": "boolean" }, + "ignored-404-candidates-only": { "type": "boolean" }, + "redirectCandidates": { "type": "string" }, + "redirect-candidates": { "type": "string" }, + "redirectCandidatesPath": { "type": "string" }, + "redirect-candidates-path": { "type": "string" }, + "redirects": { "type": "string" }, + "redirectsPath": { "type": "string" }, + "redirects-path": { "type": "string" }, + "ignored404Candidates": { "type": "string" }, + "ignored-404-candidates": { "type": "string" }, + "ignored404CandidatesPath": { "type": "string" }, + "ignored-404-candidates-path": { "type": "string" }, + "ignored404": { "type": "string" }, + "ignored-404": { "type": "string" }, + "ignored404Path": { "type": "string" }, + "ignored-404-path": { "type": "string" }, + "replaceExisting": { "type": "boolean" }, + "replace-existing": { "type": "boolean" }, + "enableRedirects": { "type": "boolean" }, + "enable-redirects": { "type": "boolean" }, + "dryRun": { "type": "boolean" }, + "dry-run": { "type": "boolean" }, + "whatIf": { "type": "boolean" }, + "what-if": { "type": "boolean" }, + "summaryPath": { "type": "string" }, + "summary-path": { "type": "string" } + }, + "required": ["task"] + }, "ProjectApiDocsStep": { "type": "object", "additionalProperties": false, From 417c2ae077245f4ecbd743e8d8d4eb537fc8a89d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Przemys=C5=82aw=20K=C5=82ys?= Date: Sat, 18 Apr 2026 21:57:34 +0200 Subject: [PATCH 06/15] Address final link service review findings --- PowerForge.Tests/WebLinkServiceTests.cs | 81 +++++++++++++++++++ .../WebPipelineRunnerLinksTests.cs | 3 +- .../WebCliCommandHandlers.Links.cs | 4 +- .../WebPipelineRunner.Tasks.Links.cs | 6 +- .../Services/WebLinkService.Import.cs | 30 ++++--- PowerForge.Web/Services/WebLinkService.cs | 44 ++++++++-- 6 files changed, 147 insertions(+), 21 deletions(-) diff --git a/PowerForge.Tests/WebLinkServiceTests.cs b/PowerForge.Tests/WebLinkServiceTests.cs index 59c6a567..3b3707ef 100644 --- a/PowerForge.Tests/WebLinkServiceTests.cs +++ b/PowerForge.Tests/WebLinkServiceTests.cs @@ -142,6 +142,37 @@ public void ValidateRedirectGraph_DoesNotTreatQueryOrSlashCanonicalRulesAsLoops( Assert.DoesNotContain(result.Issues, issue => issue.Code == "PFLINK.REDIRECT.CHAIN"); } + [Fact] + public void ValidateRedirectGraph_DetectsLoopsThroughTargetQuery() + { + var dataSet = new WebLinkDataSet + { + Redirects = new[] + { + new LinkRedirectRule + { + Id = "a", + SourcePath = "/a", + TargetUrl = "/b?x=1", + Status = 301 + }, + new LinkRedirectRule + { + Id = "b-query", + SourcePath = "/b", + SourceQuery = "x=1", + MatchType = LinkRedirectMatchType.Query, + TargetUrl = "/a", + Status = 301 + } + } + }; + + var result = WebLinkService.Validate(dataSet); + + Assert.Contains(result.Issues, issue => issue.Code == "PFLINK.REDIRECT.LOOP"); + } + [Fact] public void ExportApache_EmitsHostScopedRedirectsAndShortlinks() { @@ -371,6 +402,56 @@ public void ImportPrettyLinks_NormalizesPrefixWhenMergingExistingShortlinks() } } + [Fact] + public void ImportPrettyLinks_TreatsImplicitPrefixAsGoForNonShortHosts() + { + var root = Path.Combine(Path.GetTempPath(), "pf-web-links-import-implicit-prefix-" + Guid.NewGuid().ToString("N")); + Directory.CreateDirectory(root); + + try + { + var csvPath = Path.Combine(root, "pretty-links.csv"); + var outPath = Path.Combine(root, "shortlinks.json"); + File.WriteAllText(csvPath, + """ + id,name,slug,url,clicks + 8,Docs,/go/docs,https://docs-new.example.test,12 + """); + File.WriteAllText(outPath, + """ + { + "shortlinks": [ + { + "host": "evotec.xyz", + "slug": "docs", + "targetUrl": "https://docs.example.test", + "owner": "evotec", + "allowExternal": true + } + ] + } + """); + + var result = WebLinkService.ImportPrettyLinks(new WebLinkShortlinkImportOptions + { + SourcePath = csvPath, + OutputPath = outPath, + Host = "evotec.xyz", + ShortHost = "evo.yt", + Owner = "evotec" + }); + + Assert.Equal(1, result.ExistingCount); + Assert.Equal(1, result.ImportedCount); + Assert.Equal(1, result.WrittenCount); + Assert.Equal(1, result.SkippedDuplicateCount); + } + finally + { + TryDeleteDirectory(root); + } + } + [Fact] public void Generate404Report_SuggestsGeneratedRoutesFromApacheLog() { diff --git a/PowerForge.Tests/WebPipelineRunnerLinksTests.cs b/PowerForge.Tests/WebPipelineRunnerLinksTests.cs index b4d185d3..7b7ab4be 100644 --- a/PowerForge.Tests/WebPipelineRunnerLinksTests.cs +++ b/PowerForge.Tests/WebPipelineRunnerLinksTests.cs @@ -384,7 +384,7 @@ public void RunPipeline_LinksImportWordPress_WritesShortlinksAndSummary() "config": "./site.json", "source": "./data/links/imports/pretty-links.csv", "owner": "evotec", - "tags": [ "imported" ], + "tags": "imported", "summaryPath": "./Build/import-links-summary.json" } ] @@ -401,6 +401,7 @@ public void RunPipeline_LinksImportWordPress_WritesShortlinksAndSummary() var json = File.ReadAllText(shortlinksPath); Assert.Contains("\"slug\": \"teams\"", json, StringComparison.Ordinal); Assert.Contains("\"host\": \"evo.yt\"", json, StringComparison.Ordinal); + Assert.Contains("\"imported\"", json, StringComparison.Ordinal); Assert.Contains("\"importedHits\": 42", json, StringComparison.Ordinal); using var summary = JsonDocument.Parse(File.ReadAllText(Path.Combine(root, "Build", "import-links-summary.json"))); diff --git a/PowerForge.Web.Cli/WebCliCommandHandlers.Links.cs b/PowerForge.Web.Cli/WebCliCommandHandlers.Links.cs index 7c2c0dbd..818cdedd 100644 --- a/PowerForge.Web.Cli/WebCliCommandHandlers.Links.cs +++ b/PowerForge.Web.Cli/WebCliCommandHandlers.Links.cs @@ -601,7 +601,8 @@ private static int HandleLinksImportWordPress(string[] args, bool outputJson, We var hosts = BuildLinkHostMap(args, loaded.Spec); var host = TryGetOptionValue(args, "--host"); - if (string.IsNullOrWhiteSpace(host) && hosts.TryGetValue("short", out var configuredShortHost)) + hosts.TryGetValue("short", out var configuredShortHost); + if (string.IsNullOrWhiteSpace(host) && !string.IsNullOrWhiteSpace(configuredShortHost)) host = configuredShortHost; var status = ParseIntOption(TryGetOptionValue(args, "--status"), 302); @@ -611,6 +612,7 @@ private static int HandleLinksImportWordPress(string[] args, bool outputJson, We SourceOriginPath = sourcePath, OutputPath = outPath, Host = host, + ShortHost = configuredShortHost, PathPrefix = TryGetOptionValue(args, "--path-prefix") ?? TryGetOptionValue(args, "--pathPrefix"), Owner = TryGetOptionValue(args, "--owner"), Tags = ReadOptionList(args, "--tag", "--tags").ToArray(), diff --git a/PowerForge.Web.Cli/WebPipelineRunner.Tasks.Links.cs b/PowerForge.Web.Cli/WebPipelineRunner.Tasks.Links.cs index 66f76640..39d1e76c 100644 --- a/PowerForge.Web.Cli/WebPipelineRunner.Tasks.Links.cs +++ b/PowerForge.Web.Cli/WebPipelineRunner.Tasks.Links.cs @@ -127,7 +127,8 @@ private static void ExecuteLinksImportWordPress(JsonElement step, string baseDir var hosts = BuildLinksHostMap(step, links); var host = GetString(step, "host"); - if (string.IsNullOrWhiteSpace(host) && hosts.TryGetValue("short", out var shortHost)) + hosts.TryGetValue("short", out var shortHost); + if (string.IsNullOrWhiteSpace(host) && !string.IsNullOrWhiteSpace(shortHost)) host = shortHost; var result = WebLinkService.ImportPrettyLinks(new WebLinkShortlinkImportOptions @@ -136,9 +137,10 @@ private static void ExecuteLinksImportWordPress(JsonElement step, string baseDir SourceOriginPath = sourcePathValue, OutputPath = outputPath, Host = host, + ShortHost = shortHost, PathPrefix = GetString(step, "pathPrefix") ?? GetString(step, "path-prefix"), Owner = GetString(step, "owner"), - Tags = GetArrayOfStrings(step, "tags") ?? GetArrayOfStrings(step, "tag") ?? Array.Empty(), + Tags = GetStringOrArrayOfStrings(step, "tags", "tag"), Status = GetInt(step, "status") ?? 302, AllowExternal = !(GetBool(step, "allowExternal") == false || GetBool(step, "allow-external") == false), MergeWithExisting = !(GetBool(step, "merge") == false || GetBool(step, "mergeWithExisting") == false || GetBool(step, "merge-with-existing") == false), diff --git a/PowerForge.Web/Services/WebLinkService.Import.cs b/PowerForge.Web/Services/WebLinkService.Import.cs index 701363ce..3d28ea22 100644 --- a/PowerForge.Web/Services/WebLinkService.Import.cs +++ b/PowerForge.Web/Services/WebLinkService.Import.cs @@ -34,7 +34,7 @@ public static WebLinkShortlinkImportResult ImportPrettyLinks(WebLinkShortlinkImp : new List(); var existingCount = existing.Count; - var merged = MergeShortlinks(existing, imported, options.ReplaceExisting, out var skippedCount); + var merged = MergeShortlinks(existing, imported, options.ReplaceExisting, options.ShortHost, out var skippedCount); WriteShortlinkJson(outputPath, merged); return new WebLinkShortlinkImportResult @@ -143,6 +143,7 @@ private static List MergeShortlinks( List existing, List imported, bool replaceExisting, + string? shortHost, out int skippedCount) { skippedCount = 0; @@ -151,13 +152,13 @@ private static List MergeShortlinks( foreach (var shortlink in existing.Where(static item => item is not null)) { - index[BuildShortlinkImportKey(shortlink)] = merged.Count; + index[BuildShortlinkImportKey(shortlink, shortHost)] = merged.Count; merged.Add(shortlink); } foreach (var shortlink in imported.Where(static item => item is not null)) { - var key = BuildShortlinkImportKey(shortlink); + var key = BuildShortlinkImportKey(shortlink, shortHost); if (index.TryGetValue(key, out var existingIndex)) { if (replaceExisting) @@ -273,19 +274,28 @@ private static int ParseRedirectStatus(string value, int defaultStatus) ? parsed : defaultStatus; - private static string BuildShortlinkImportKey(LinkShortlinkRule shortlink) + private static string BuildShortlinkImportKey(LinkShortlinkRule shortlink, string? shortHost) => string.Join("|", shortlink.Host ?? string.Empty, - NormalizeShortlinkImportPrefix(shortlink.PathPrefix), + NormalizeShortlinkImportPrefix(shortlink.PathPrefix, shortlink.Host, shortHost), shortlink.Slug ?? string.Empty); - private static string NormalizeShortlinkImportPrefix(string? pathPrefix) + private static string NormalizeShortlinkImportPrefix(string? pathPrefix, string? host, string? shortHost) { if (string.IsNullOrWhiteSpace(pathPrefix)) - return string.Empty; + { + return !string.IsNullOrWhiteSpace(host) && + !string.IsNullOrWhiteSpace(shortHost) && + host.Trim().Equals(shortHost.Trim(), StringComparison.OrdinalIgnoreCase) + ? "/" + : "/go"; + } - var trimmed = pathPrefix.Trim().Trim('/'); - return string.IsNullOrWhiteSpace(trimmed) ? string.Empty : "/" + trimmed; + var trimmed = pathPrefix.Trim(); + if (!trimmed.StartsWith("/", StringComparison.Ordinal)) + trimmed = "/" + trimmed.TrimStart('/'); + trimmed = trimmed.TrimEnd('/'); + return string.IsNullOrWhiteSpace(trimmed) ? "/" : trimmed; } private static string SlugifyShortlink(string value) @@ -337,6 +347,8 @@ public sealed class WebLinkShortlinkImportOptions public string OutputPath { get; set; } = string.Empty; /// Optional host for imported shortlinks. public string? Host { get; set; } + /// Configured shortlink host used to resolve implicit root prefixes. + public string? ShortHost { get; set; } /// Optional path prefix for imported shortlinks. public string? PathPrefix { get; set; } /// Default owner assigned to imported shortlinks. diff --git a/PowerForge.Web/Services/WebLinkService.cs b/PowerForge.Web/Services/WebLinkService.cs index f1296978..1b579f3e 100644 --- a/PowerForge.Web/Services/WebLinkService.cs +++ b/PowerForge.Web/Services/WebLinkService.cs @@ -407,7 +407,7 @@ private static void ValidateShortlinks( private static void ValidateRedirectGraph(LinkRedirectRule[] redirects, List issues) { - var map = new Dictionary(StringComparer.OrdinalIgnoreCase); + var map = new Dictionary(StringComparer.OrdinalIgnoreCase); foreach (var redirect in redirects) { if (redirect.MatchType != LinkRedirectMatchType.Exact && redirect.MatchType != LinkRedirectMatchType.Query) @@ -416,10 +416,14 @@ private static void ValidateRedirectGraph(LinkRedirectRule[] redirects, List 5) { @@ -466,11 +470,11 @@ private static void ValidateRedirectGraph(LinkRedirectRule[] redirects, List map, + IReadOnlyDictionary map, string host, string path, string? query, - out string target) + out RedirectGraphTarget target) { if (!string.IsNullOrWhiteSpace(host) && map.TryGetValue(BuildRedirectGraphKey(host, path, query), out target!)) @@ -481,6 +485,30 @@ private static bool TryGetRedirectGraphTarget( return map.TryGetValue(BuildRedirectGraphKey(null, path, query), out target!); } + private readonly record struct RedirectGraphTarget(string Path, string Query); + + private static RedirectGraphTarget BuildRedirectGraphTarget(string targetUrl) + => new(NormalizeSourcePath(targetUrl), NormalizeRedirectGraphQuery(ExtractLocalQuery(targetUrl))); + + private static string NormalizeRedirectGraphQuery(string? query) + => string.IsNullOrWhiteSpace(query) ? string.Empty : query.Trim().TrimStart('?').ToLowerInvariant(); + + private static string? ExtractLocalQuery(string? value) + { + if (string.IsNullOrWhiteSpace(value)) + return null; + + var trimmed = value.Trim(); + var hashIndex = trimmed.IndexOf('#'); + if (hashIndex >= 0) + trimmed = trimmed[..hashIndex]; + + var queryIndex = trimmed.IndexOf('?'); + return queryIndex >= 0 && queryIndex < trimmed.Length - 1 + ? trimmed[(queryIndex + 1)..] + : null; + } + private static void ValidateTarget(string targetUrl, bool allowExternal, List issues, string source, string? id, string codePrefix) { var trimmed = targetUrl.Trim(); From 5ae5cc8938ec92409e7f9b0f9acd5cc8b6a06f2e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Przemys=C5=82aw=20K=C5=82ys?= Date: Sat, 18 Apr 2026 22:07:54 +0200 Subject: [PATCH 07/15] Harden link validation review cases --- PowerForge.Tests/WebLinkServiceTests.cs | 119 ++++++++++++++++++ .../Services/WebLinkService.Report404.cs | 2 + PowerForge.Web/Services/WebLinkService.cs | 39 ++++-- 3 files changed, 153 insertions(+), 7 deletions(-) diff --git a/PowerForge.Tests/WebLinkServiceTests.cs b/PowerForge.Tests/WebLinkServiceTests.cs index 3b3707ef..a2a423a4 100644 --- a/PowerForge.Tests/WebLinkServiceTests.cs +++ b/PowerForge.Tests/WebLinkServiceTests.cs @@ -110,6 +110,91 @@ public void ValidateRedirectGraph_KeepsHostScopedChainsSeparate() Assert.DoesNotContain(result.Issues, issue => issue.Code == "PFLINK.REDIRECT.CHAIN"); } + [Fact] + public void ValidateRedirects_TreatsWildcardAndEmptyHostsAsSameScope() + { + var dataSet = new WebLinkDataSet + { + Redirects = new[] + { + new LinkRedirectRule + { + Id = "global-empty", + SourcePath = "/old", + TargetUrl = "/new", + Status = 301 + }, + new LinkRedirectRule + { + Id = "global-wildcard", + SourceHost = "*", + SourcePath = "/old", + TargetUrl = "/other", + Status = 301 + } + } + }; + + var result = WebLinkService.Validate(dataSet); + + Assert.Contains(result.Issues, issue => issue.Code == "PFLINK.REDIRECT.DUPLICATE"); + } + + [Fact] + public void ValidateRedirects_TreatsExactSourceQueryAsQueryRuleForDuplicates() + { + var dataSet = new WebLinkDataSet + { + Redirects = new[] + { + new LinkRedirectRule + { + Id = "exact-query", + SourcePath = "/", + SourceQuery = "p=123", + MatchType = LinkRedirectMatchType.Exact, + TargetUrl = "/post-a", + Status = 301 + }, + new LinkRedirectRule + { + Id = "query", + SourcePath = "/", + SourceQuery = "p=123", + MatchType = LinkRedirectMatchType.Query, + TargetUrl = "/post-b", + Status = 301 + } + } + }; + + var result = WebLinkService.Validate(dataSet); + + Assert.Contains(result.Issues, issue => issue.Code == "PFLINK.REDIRECT.DUPLICATE"); + } + + [Fact] + public void ValidateRedirects_RejectsProtocolRelativeTargets() + { + var dataSet = new WebLinkDataSet + { + Redirects = new[] + { + new LinkRedirectRule + { + Id = "protocol-relative", + SourcePath = "/old", + TargetUrl = "//attacker.example/path", + Status = 301 + } + } + }; + + var result = WebLinkService.Validate(dataSet); + + Assert.Contains(result.Issues, issue => issue.Code == "PFLINK.REDIRECT.TARGET_INVALID"); + } + [Fact] public void ValidateRedirectGraph_DoesNotTreatQueryOrSlashCanonicalRulesAsLoops() { @@ -487,6 +572,40 @@ public void Generate404Report_SuggestsGeneratedRoutesFromApacheLog() } } + [Fact] + public void Generate404Report_DoesNotSuggestRootForUnrelatedMissingPath() + { + var root = Path.Combine(Path.GetTempPath(), "pf-web-links-404-root-score-" + Guid.NewGuid().ToString("N")); + Directory.CreateDirectory(root); + + try + { + Directory.CreateDirectory(Path.Combine(root, "_site")); + File.WriteAllText(Path.Combine(root, "_site", "index.html"), "home"); + var logPath = Path.Combine(root, "404.csv"); + File.WriteAllText(logPath, + """ + path,count,status + /wp-login.php,4,404 + """); + + var result = WebLinkService.Generate404Report(new WebLink404ReportOptions + { + SiteRoot = Path.Combine(root, "_site"), + SourcePath = logPath, + MinimumScore = 0.8 + }); + + var suggestion = Assert.Single(result.Suggestions); + Assert.Empty(suggestion.Suggestions); + Assert.Equal(0, result.SuggestedObservationCount); + } + finally + { + TryDeleteDirectory(root); + } + } + [Fact] public void Generate404Report_HonorsIgnored404Rules() { diff --git a/PowerForge.Web/Services/WebLinkService.Report404.cs b/PowerForge.Web/Services/WebLinkService.Report404.cs index 09650512..a7f6e838 100644 --- a/PowerForge.Web/Services/WebLinkService.Report404.cs +++ b/PowerForge.Web/Services/WebLinkService.Report404.cs @@ -323,6 +323,8 @@ private static double ScoreRoute(string missingPath, string route) var candidate = NormalizeForScore(route); if (missing.Equals(candidate, StringComparison.OrdinalIgnoreCase)) return 1d; + if (missing.Length == 0 || candidate.Length == 0) + return 0d; if (missing.Contains(candidate, StringComparison.OrdinalIgnoreCase) || candidate.Contains(missing, StringComparison.OrdinalIgnoreCase)) return 0.82d; diff --git a/PowerForge.Web/Services/WebLinkService.cs b/PowerForge.Web/Services/WebLinkService.cs index 1b579f3e..aa013263 100644 --- a/PowerForge.Web/Services/WebLinkService.cs +++ b/PowerForge.Web/Services/WebLinkService.cs @@ -1,5 +1,6 @@ using System; using System.Collections.Generic; +using System.Globalization; using System.IO; using System.Linq; using System.Security.Cryptography; @@ -512,7 +513,13 @@ private static string NormalizeRedirectGraphQuery(string? query) private static void ValidateTarget(string targetUrl, bool allowExternal, List issues, string source, string? id, string codePrefix) { var trimmed = targetUrl.Trim(); - if (trimmed.StartsWith("/", StringComparison.Ordinal)) + if (trimmed.StartsWith("//", StringComparison.Ordinal)) + { + AddIssue(issues, LinkValidationSeverity.Error, codePrefix + ".TARGET_INVALID", "Protocol-relative target URLs are not supported.", source, id); + return; + } + + if (IsLocalPath(trimmed)) return; if (!Uri.TryCreate(trimmed, UriKind.Absolute, out var uri)) @@ -576,16 +583,27 @@ private static void AddRedirectIssue( private static string BuildRedirectKey(LinkRedirectRule redirect) => string.Join("|", - redirect.SourceHost ?? string.Empty, - redirect.MatchType.ToString(), + NormalizeRedirectGraphHost(redirect.SourceHost), + ((int)NormalizeRedirectKeyMatchType(redirect)).ToString(CultureInfo.InvariantCulture), NormalizeSourcePath(redirect.SourcePath), - redirect.SourceQuery ?? string.Empty); + NormalizeRedirectGraphQuery(redirect.SourceQuery)); + + private static LinkRedirectMatchType NormalizeRedirectKeyMatchType(LinkRedirectRule redirect) + => redirect.MatchType == LinkRedirectMatchType.Exact && !string.IsNullOrWhiteSpace(redirect.SourceQuery) + ? LinkRedirectMatchType.Query + : redirect.MatchType; private static string BuildRedirectGraphKey(string? host, string path, string? query) - => string.Join("|", NormalizeRedirectGraphHost(host), NormalizeSourcePath(path), query?.Trim().ToLowerInvariant() ?? string.Empty); + => string.Join("|", NormalizeRedirectGraphHost(host), NormalizeSourcePath(path), NormalizeRedirectGraphQuery(query)); private static string NormalizeRedirectGraphHost(string? host) - => string.IsNullOrWhiteSpace(host) ? string.Empty : host.Trim().ToLowerInvariant(); + { + if (string.IsNullOrWhiteSpace(host)) + return string.Empty; + + var trimmed = host.Trim(); + return trimmed.Equals("*", StringComparison.Ordinal) ? string.Empty : trimmed.ToLowerInvariant(); + } private static bool IsAllowedStatus(int status) => status is 301 or 302 or 307 or 308 or 410; @@ -756,7 +774,14 @@ private static string BuildDisplaySource(LinkRedirectRule redirect) } private static bool IsLocalPath(string? value) - => !string.IsNullOrWhiteSpace(value) && value.Trim().StartsWith("/", StringComparison.Ordinal); + { + if (string.IsNullOrWhiteSpace(value)) + return false; + + var trimmed = value.Trim(); + return trimmed.StartsWith("/", StringComparison.Ordinal) && + !trimmed.StartsWith("//", StringComparison.Ordinal); + } private static bool IsHttpUrl(string? value) => !string.IsNullOrWhiteSpace(value) && From efb08ffe5a9f2eee2e01aa0ad99f1eefb8ba538b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Przemys=C5=82aw=20K=C5=82ys?= Date: Sat, 18 Apr 2026 22:16:42 +0200 Subject: [PATCH 08/15] Address link comparison review gaps --- PowerForge.Tests/WebLinkServiceTests.cs | 74 +++++++++++++++++++ .../Services/WebLinkService.Import.cs | 2 +- PowerForge.Web/Services/WebLinkService.cs | 14 +++- 3 files changed, 87 insertions(+), 3 deletions(-) diff --git a/PowerForge.Tests/WebLinkServiceTests.cs b/PowerForge.Tests/WebLinkServiceTests.cs index a2a423a4..32973e8a 100644 --- a/PowerForge.Tests/WebLinkServiceTests.cs +++ b/PowerForge.Tests/WebLinkServiceTests.cs @@ -173,6 +173,36 @@ public void ValidateRedirects_TreatsExactSourceQueryAsQueryRuleForDuplicates() Assert.Contains(result.Issues, issue => issue.Code == "PFLINK.REDIRECT.DUPLICATE"); } + [Fact] + public void ValidateRedirects_PreservesTargetQueryWhenComparingDuplicates() + { + var dataSet = new WebLinkDataSet + { + Redirects = new[] + { + new LinkRedirectRule + { + Id = "promo-a", + SourcePath = "/promo", + TargetUrl = "/landing?src=a", + Status = 301 + }, + new LinkRedirectRule + { + Id = "promo-b", + SourcePath = "/promo", + TargetUrl = "/landing?src=b", + Status = 301 + } + } + }; + + var result = WebLinkService.Validate(dataSet); + + Assert.Contains(result.Issues, issue => issue.Code == "PFLINK.REDIRECT.DUPLICATE"); + Assert.DoesNotContain(result.Issues, issue => issue.Code == "PFLINK.REDIRECT.DUPLICATE_SAME_TARGET"); + } + [Fact] public void ValidateRedirects_RejectsProtocolRelativeTargets() { @@ -537,6 +567,50 @@ public void ImportPrettyLinks_TreatsImplicitPrefixAsGoForNonShortHosts() } } + [Fact] + public void ImportPrettyLinks_SlugifiesToValidatorSafeAscii() + { + var root = Path.Combine(Path.GetTempPath(), "pf-web-links-import-ascii-slug-" + Guid.NewGuid().ToString("N")); + Directory.CreateDirectory(root); + + try + { + var csvPath = Path.Combine(root, "pretty-links.csv"); + var outPath = Path.Combine(root, "shortlinks.json"); + File.WriteAllText(csvPath, + """ + id,name,slug,url,clicks + 9,Cafe,café-2026,https://cafe.example.test,5 + """); + + WebLinkService.ImportPrettyLinks(new WebLinkShortlinkImportOptions + { + SourcePath = csvPath, + OutputPath = outPath, + Host = "evo.yt", + ShortHost = "evo.yt", + Owner = "evotec" + }); + + var loaded = WebLinkService.Load(new WebLinkLoadOptions + { + ShortlinksPath = outPath, + Hosts = new Dictionary(StringComparer.OrdinalIgnoreCase) + { + ["short"] = "evo.yt" + } + }); + + var shortlink = Assert.Single(loaded.Shortlinks); + Assert.Equal("caf-2026", shortlink.Slug); + Assert.True(WebLinkService.Validate(loaded).Success); + } + finally + { + TryDeleteDirectory(root); + } + } + [Fact] public void Generate404Report_SuggestsGeneratedRoutesFromApacheLog() { diff --git a/PowerForge.Web/Services/WebLinkService.Import.cs b/PowerForge.Web/Services/WebLinkService.Import.cs index 3d28ea22..93e20323 100644 --- a/PowerForge.Web/Services/WebLinkService.Import.cs +++ b/PowerForge.Web/Services/WebLinkService.Import.cs @@ -306,7 +306,7 @@ private static string SlugifyShortlink(string value) var result = new List(); foreach (var ch in value.Trim().ToLowerInvariant()) { - if (char.IsLetterOrDigit(ch)) + if ((ch >= 'a' && ch <= 'z') || (ch >= '0' && ch <= '9')) { result.Add(ch); } diff --git a/PowerForge.Web/Services/WebLinkService.cs b/PowerForge.Web/Services/WebLinkService.cs index aa013263..d945a997 100644 --- a/PowerForge.Web/Services/WebLinkService.cs +++ b/PowerForge.Web/Services/WebLinkService.cs @@ -687,7 +687,10 @@ private static string NormalizeTargetForCompare( var normalized = NormalizeDestination(targetUrl); if (!IsHttpUrl(normalized)) - return NormalizeLanguageRootPath(NormalizeSourcePath(normalized), sourceHost, languageRootHosts); + { + var path = NormalizeLanguageRootPath(NormalizeSourcePath(normalized), sourceHost, languageRootHosts); + return AppendNormalizedQuery(path, ExtractLocalQuery(normalized)); + } if (!Uri.TryCreate(normalized, UriKind.Absolute, out var uri)) return normalized; @@ -695,12 +698,19 @@ private static string NormalizeTargetForCompare( if (!string.IsNullOrWhiteSpace(sourceHost) && uri.Host.Equals(sourceHost.Trim(), StringComparison.OrdinalIgnoreCase)) { - return NormalizeLanguageRootPath(NormalizeSourcePath(uri.AbsolutePath), sourceHost, languageRootHosts); + var path = NormalizeLanguageRootPath(NormalizeSourcePath(uri.AbsolutePath), sourceHost, languageRootHosts); + return AppendNormalizedQuery(path, uri.Query); } return normalized.TrimEnd('/'); } + private static string AppendNormalizedQuery(string path, string? query) + { + var normalizedQuery = NormalizeRedirectGraphQuery(query); + return string.IsNullOrWhiteSpace(normalizedQuery) ? path : path + "?" + normalizedQuery; + } + private static string NormalizeLanguageRootPath( string path, string? sourceHost, From 4cf4916bf7c9f9319c78d3a0724a46bc4fabd199 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Przemys=C5=82aw=20K=C5=82ys?= Date: Sat, 18 Apr 2026 22:25:24 +0200 Subject: [PATCH 09/15] Fix final redirect export review cases --- PowerForge.Tests/WebLinkServiceTests.cs | 90 +++++++++++++++++++ .../Services/WebLinkService.ExportApache.cs | 4 +- PowerForge.Web/Services/WebLinkService.cs | 28 +++++- 3 files changed, 118 insertions(+), 4 deletions(-) diff --git a/PowerForge.Tests/WebLinkServiceTests.cs b/PowerForge.Tests/WebLinkServiceTests.cs index 32973e8a..3fdc374d 100644 --- a/PowerForge.Tests/WebLinkServiceTests.cs +++ b/PowerForge.Tests/WebLinkServiceTests.cs @@ -288,6 +288,37 @@ public void ValidateRedirectGraph_DetectsLoopsThroughTargetQuery() Assert.Contains(result.Issues, issue => issue.Code == "PFLINK.REDIRECT.LOOP"); } + [Fact] + public void ValidateRedirectGraph_DetectsLoopsThroughSameHostAbsoluteTargets() + { + var dataSet = new WebLinkDataSet + { + Redirects = new[] + { + new LinkRedirectRule + { + Id = "a", + SourceHost = "example.com", + SourcePath = "/a", + TargetUrl = "https://example.com/b", + Status = 301 + }, + new LinkRedirectRule + { + Id = "b", + SourceHost = "example.com", + SourcePath = "/b", + TargetUrl = "/a", + Status = 301 + } + } + }; + + var result = WebLinkService.Validate(dataSet); + + Assert.Contains(result.Issues, issue => issue.Code == "PFLINK.REDIRECT.LOOP"); + } + [Fact] public void ExportApache_EmitsHostScopedRedirectsAndShortlinks() { @@ -395,6 +426,52 @@ public void ExportApache_EscapesExactSourceRegexCharacters() } } + [Fact] + public void ExportApache_EmitsGoneRulesForPrefixAndRegexWithoutTarget() + { + var root = Path.Combine(Path.GetTempPath(), "pf-web-links-export-gone-" + Guid.NewGuid().ToString("N")); + Directory.CreateDirectory(root); + + try + { + var outPath = Path.Combine(root, "links.conf"); + var dataSet = new WebLinkDataSet + { + Redirects = new[] + { + new LinkRedirectRule + { + Id = "gone-prefix", + SourcePath = "/gone/*", + MatchType = LinkRedirectMatchType.Prefix, + Status = 410 + }, + new LinkRedirectRule + { + Id = "gone-regex", + SourcePath = "/legacy/.*", + MatchType = LinkRedirectMatchType.Regex, + Status = 410 + } + } + }; + + WebLinkService.ExportApache(dataSet, new WebLinkApacheExportOptions + { + OutputPath = outPath + }); + + var apache = File.ReadAllText(outPath); + Assert.Contains("RewriteRule ^gone", apache, StringComparison.Ordinal); + Assert.Contains("RewriteRule ^legacy/.* - [G,L]", apache, StringComparison.Ordinal); + Assert.Equal(2, CountOccurrences(apache, "[G,L]")); + } + finally + { + TryDeleteDirectory(root); + } + } + [Fact] public void ImportPrettyLinks_MergesExistingShortlinksAndPreservesImportedHits() { @@ -1091,4 +1168,17 @@ private static void TryDeleteDirectory(string path) // best-effort cleanup } } + + private static int CountOccurrences(string text, string value) + { + var count = 0; + var index = 0; + while ((index = text.IndexOf(value, index, StringComparison.Ordinal)) >= 0) + { + count++; + index += value.Length; + } + + return count; + } } diff --git a/PowerForge.Web/Services/WebLinkService.ExportApache.cs b/PowerForge.Web/Services/WebLinkService.ExportApache.cs index dff50637..ee6ea5ac 100644 --- a/PowerForge.Web/Services/WebLinkService.ExportApache.cs +++ b/PowerForge.Web/Services/WebLinkService.ExportApache.cs @@ -188,7 +188,7 @@ private static bool TryBuildApachePattern(LinkRedirectRule rule, out string patt ? "^(.*)$" : $"^{Regex.Escape(prefix)}(?:/(.*))?$"; destination = NormalizeDestination(rule.TargetUrl).Replace("{path}", "$1", StringComparison.OrdinalIgnoreCase); - return !string.IsNullOrWhiteSpace(destination); + return rule.Status == 410 || !string.IsNullOrWhiteSpace(destination); } case LinkRedirectMatchType.Regex: { @@ -197,7 +197,7 @@ private static bool TryBuildApachePattern(LinkRedirectRule rule, out string patt regex = regex.TrimStart('/'); pattern = regex.StartsWith("^", StringComparison.Ordinal) ? regex : "^" + regex; destination = NormalizeDestination(rule.TargetUrl).Replace("{path}", "$1", StringComparison.OrdinalIgnoreCase); - return !string.IsNullOrWhiteSpace(destination); + return rule.Status == 410 || !string.IsNullOrWhiteSpace(destination); } case LinkRedirectMatchType.Query: case LinkRedirectMatchType.Exact: diff --git a/PowerForge.Web/Services/WebLinkService.cs b/PowerForge.Web/Services/WebLinkService.cs index d945a997..8d89432b 100644 --- a/PowerForge.Web/Services/WebLinkService.cs +++ b/PowerForge.Web/Services/WebLinkService.cs @@ -413,11 +413,10 @@ private static void ValidateRedirectGraph(LinkRedirectRule[] redirects, List new(NormalizeSourcePath(targetUrl), NormalizeRedirectGraphQuery(ExtractLocalQuery(targetUrl))); From 22ddc954dad6b5ac3821be951c6b374654441166 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Przemys=C5=82aw=20K=C5=82ys?= Date: Sat, 18 Apr 2026 22:34:07 +0200 Subject: [PATCH 10/15] Tighten redirect key normalization --- PowerForge.Tests/WebLinkServiceTests.cs | 101 ++++++++++++++++++++++ PowerForge.Web/Services/WebLinkService.cs | 41 +++++++-- 2 files changed, 135 insertions(+), 7 deletions(-) diff --git a/PowerForge.Tests/WebLinkServiceTests.cs b/PowerForge.Tests/WebLinkServiceTests.cs index 3fdc374d..da9b4e81 100644 --- a/PowerForge.Tests/WebLinkServiceTests.cs +++ b/PowerForge.Tests/WebLinkServiceTests.cs @@ -203,6 +203,70 @@ public void ValidateRedirects_PreservesTargetQueryWhenComparingDuplicates() Assert.DoesNotContain(result.Issues, issue => issue.Code == "PFLINK.REDIRECT.DUPLICATE_SAME_TARGET"); } + [Fact] + public void ValidateRedirects_PreservesSourceQueryCaseInKeys() + { + var dataSet = new WebLinkDataSet + { + Redirects = new[] + { + new LinkRedirectRule + { + Id = "upper", + SourcePath = "/promo", + SourceQuery = "Token=A", + MatchType = LinkRedirectMatchType.Query, + TargetUrl = "/landing-a", + Status = 301 + }, + new LinkRedirectRule + { + Id = "lower", + SourcePath = "/promo", + SourceQuery = "Token=a", + MatchType = LinkRedirectMatchType.Query, + TargetUrl = "/landing-b", + Status = 301 + } + } + }; + + var result = WebLinkService.Validate(dataSet); + + Assert.DoesNotContain(result.Issues, issue => issue.Code == "PFLINK.REDIRECT.DUPLICATE"); + } + + [Fact] + public void ValidateRedirects_KeepsRegexSourcePatternsIntactInKeys() + { + var dataSet = new WebLinkDataSet + { + Redirects = new[] + { + new LinkRedirectRule + { + Id = "regex-query-char", + SourcePath = "^foo?bar$", + MatchType = LinkRedirectMatchType.Regex, + TargetUrl = "/first", + Status = 301 + }, + new LinkRedirectRule + { + Id = "regex-fragment-char", + SourcePath = "^foo#bar$", + MatchType = LinkRedirectMatchType.Regex, + TargetUrl = "/second", + Status = 301 + } + } + }; + + var result = WebLinkService.Validate(dataSet); + + Assert.DoesNotContain(result.Issues, issue => issue.Code == "PFLINK.REDIRECT.DUPLICATE"); + } + [Fact] public void ValidateRedirects_RejectsProtocolRelativeTargets() { @@ -1156,6 +1220,43 @@ public void Load_ReadsJsonAndCompatibilityCsv() } } + [Fact] + public void Load_ReadsWrappedRedirectsCaseInsensitively() + { + var root = Path.Combine(Path.GetTempPath(), "pf-web-links-load-wrapper-case-" + Guid.NewGuid().ToString("N")); + Directory.CreateDirectory(root); + + try + { + var redirectsPath = Path.Combine(root, "redirects.json"); + File.WriteAllText(redirectsPath, + """ + { + "Redirects": [ + { + "id": "manual", + "sourcePath": "/manual/", + "targetUrl": "/target/", + "status": 301 + } + ] + } + """); + + var dataSet = WebLinkService.Load(new WebLinkLoadOptions + { + RedirectsPath = redirectsPath + }); + + var redirect = Assert.Single(dataSet.Redirects); + Assert.Equal("manual", redirect.Id); + } + finally + { + TryDeleteDirectory(root); + } + } + private static void TryDeleteDirectory(string path) { try diff --git a/PowerForge.Web/Services/WebLinkService.cs b/PowerForge.Web/Services/WebLinkService.cs index 8d89432b..32fd34a1 100644 --- a/PowerForge.Web/Services/WebLinkService.cs +++ b/PowerForge.Web/Services/WebLinkService.cs @@ -107,7 +107,7 @@ private static void LoadRedirectJson(string? path, List redire usedSources.Add(resolved); JsonElement source = document.RootElement; - if (source.ValueKind == JsonValueKind.Object && source.TryGetProperty("redirects", out var nested)) + if (source.ValueKind == JsonValueKind.Object && TryGetPropertyIgnoreCase(source, "redirects", out var nested)) source = nested; if (source.ValueKind != JsonValueKind.Array) return; @@ -141,7 +141,7 @@ private static void LoadShortlinkJson(string? path, List shor usedSources.Add(resolved); JsonElement source = document.RootElement; - if (source.ValueKind == JsonValueKind.Object && source.TryGetProperty("shortlinks", out var nested)) + if (source.ValueKind == JsonValueKind.Object && TryGetPropertyIgnoreCase(source, "shortlinks", out var nested)) source = nested; if (source.ValueKind != JsonValueKind.Array) return; @@ -418,7 +418,7 @@ private static void ValidateRedirectGraph(LinkRedirectRule[] redirects, List new(NormalizeSourcePath(targetUrl), NormalizeRedirectGraphQuery(ExtractLocalQuery(targetUrl))); private static string NormalizeRedirectGraphQuery(string? query) - => string.IsNullOrWhiteSpace(query) ? string.Empty : query.Trim().TrimStart('?').ToLowerInvariant(); + => string.IsNullOrWhiteSpace(query) ? string.Empty : query.Trim().TrimStart('?'); private static string? ExtractLocalQuery(string? value) { @@ -609,8 +609,13 @@ private static string BuildRedirectKey(LinkRedirectRule redirect) => string.Join("|", NormalizeRedirectGraphHost(redirect.SourceHost), ((int)NormalizeRedirectKeyMatchType(redirect)).ToString(CultureInfo.InvariantCulture), - NormalizeSourcePath(redirect.SourcePath), - NormalizeRedirectGraphQuery(redirect.SourceQuery)); + BuildRedirectSourceKey(redirect), + BuildOrdinalKey(NormalizeRedirectGraphQuery(redirect.SourceQuery))); + + private static string BuildRedirectSourceKey(LinkRedirectRule redirect) + => redirect.MatchType == LinkRedirectMatchType.Regex + ? BuildOrdinalKey(redirect.SourcePath?.Trim() ?? string.Empty) + : NormalizeSourcePath(redirect.SourcePath); private static LinkRedirectMatchType NormalizeRedirectKeyMatchType(LinkRedirectRule redirect) => redirect.MatchType == LinkRedirectMatchType.Exact && !string.IsNullOrWhiteSpace(redirect.SourceQuery) @@ -618,7 +623,10 @@ private static LinkRedirectMatchType NormalizeRedirectKeyMatchType(LinkRedirectR : redirect.MatchType; private static string BuildRedirectGraphKey(string? host, string path, string? query) - => string.Join("|", NormalizeRedirectGraphHost(host), NormalizeSourcePath(path), NormalizeRedirectGraphQuery(query)); + => string.Join("|", NormalizeRedirectGraphHost(host), NormalizeSourcePath(path), BuildOrdinalKey(NormalizeRedirectGraphQuery(query))); + + private static string BuildOrdinalKey(string value) + => Convert.ToHexString(Encoding.UTF8.GetBytes(value)); private static string NormalizeRedirectGraphHost(string? host) { @@ -641,6 +649,25 @@ private static bool IsBroadRegex(string pattern) private static int ResolveStatus(int status, int defaultStatus) => status <= 0 ? defaultStatus : status; + private static bool TryGetPropertyIgnoreCase(JsonElement element, string name, out JsonElement value) + { + if (element.TryGetProperty(name, out value)) + return true; + + foreach (var property in element.EnumerateObject()) + { + if (property.NameEquals(name) || + property.Name.Equals(name, StringComparison.OrdinalIgnoreCase)) + { + value = property.Value; + return true; + } + } + + value = default; + return false; + } + private static int MatchTypeOrder(LinkRedirectMatchType matchType) => matchType switch { From 90b60bcce6cdf862dd3557d98efba66c1d72ea76 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Przemys=C5=82aw=20K=C5=82ys?= Date: Sat, 18 Apr 2026 23:19:02 +0200 Subject: [PATCH 11/15] Align link host validation semantics --- PowerForge.Tests/WebLinkServiceTests.cs | 52 +++++++++++++++++++++++ PowerForge.Web/Services/WebLinkService.cs | 15 ++++--- 2 files changed, 62 insertions(+), 5 deletions(-) diff --git a/PowerForge.Tests/WebLinkServiceTests.cs b/PowerForge.Tests/WebLinkServiceTests.cs index da9b4e81..bffae597 100644 --- a/PowerForge.Tests/WebLinkServiceTests.cs +++ b/PowerForge.Tests/WebLinkServiceTests.cs @@ -78,6 +78,35 @@ public void Validate_DetectsShortHostDuplicateRoutes() Assert.Contains(result.Issues, issue => issue.Code == "PFLINK.SHORTLINK.DUPLICATE"); } + [Fact] + public void ValidateShortlinks_TreatsWildcardAndEmptyHostsAsSameScope() + { + var dataSet = new WebLinkDataSet + { + Shortlinks = new[] + { + new LinkShortlinkRule + { + Slug = "docs", + TargetUrl = "/docs/", + Owner = "evotec" + }, + new LinkShortlinkRule + { + Host = "*", + Slug = "docs", + TargetUrl = "/docs-v2/", + Owner = "evotec" + } + } + }; + + var result = WebLinkService.Validate(dataSet); + + Assert.False(result.Success); + Assert.Contains(result.Issues, issue => issue.Code == "PFLINK.SHORTLINK.DUPLICATE"); + } + [Fact] public void ValidateRedirectGraph_KeepsHostScopedChainsSeparate() { @@ -289,6 +318,29 @@ public void ValidateRedirects_RejectsProtocolRelativeTargets() Assert.Contains(result.Issues, issue => issue.Code == "PFLINK.REDIRECT.TARGET_INVALID"); } + [Fact] + public void ValidateRedirects_TreatsSameHostAbsoluteTargetAsInternal() + { + var dataSet = new WebLinkDataSet + { + Redirects = new[] + { + new LinkRedirectRule + { + Id = "same-host", + SourceHost = "example.com", + SourcePath = "/old", + TargetUrl = "https://example.com/new", + Status = 301 + } + } + }; + + var result = WebLinkService.Validate(dataSet); + + Assert.DoesNotContain(result.Issues, issue => issue.Code == "PFLINK.REDIRECT.TARGET_EXTERNAL"); + } + [Fact] public void ValidateRedirectGraph_DoesNotTreatQueryOrSlashCanonicalRulesAsLoops() { diff --git a/PowerForge.Web/Services/WebLinkService.cs b/PowerForge.Web/Services/WebLinkService.cs index 32fd34a1..7a38e520 100644 --- a/PowerForge.Web/Services/WebLinkService.cs +++ b/PowerForge.Web/Services/WebLinkService.cs @@ -321,7 +321,7 @@ private static void ValidateRedirects( AddIssue(issues, LinkValidationSeverity.Error, "PFLINK.REDIRECT.TARGET_MISSING", "Redirect target URL is required unless status is 410.", "redirect", label); if (!string.IsNullOrWhiteSpace(redirect.TargetUrl)) - ValidateTarget(redirect.TargetUrl, redirect.AllowExternal, issues, "redirect", label, "PFLINK.REDIRECT"); + ValidateTarget(redirect.TargetUrl, redirect.AllowExternal, issues, "redirect", label, "PFLINK.REDIRECT", redirect.SourceHost); if (redirect.MatchType == LinkRedirectMatchType.Regex && IsBroadRegex(redirect.SourcePath)) AddIssue(issues, LinkValidationSeverity.Warning, "PFLINK.REDIRECT.REGEX_BROAD", "Regex redirect looks very broad and should be reviewed.", "redirect", label); @@ -389,9 +389,9 @@ private static void ValidateShortlinks( if (string.IsNullOrWhiteSpace(shortlink.TargetUrl)) AddIssue(issues, LinkValidationSeverity.Error, "PFLINK.SHORTLINK.TARGET_MISSING", "Shortlink target URL is required.", "shortlink", label); else - ValidateTarget(AppendUtm(shortlink.TargetUrl, shortlink.Utm), shortlink.AllowExternal, issues, "shortlink", label, "PFLINK.SHORTLINK"); + ValidateTarget(AppendUtm(shortlink.TargetUrl, shortlink.Utm), shortlink.AllowExternal, issues, "shortlink", label, "PFLINK.SHORTLINK", shortlink.Host); - var key = $"{shortlink.Host ?? string.Empty}|{NormalizeShortlinkPath(shortlink, hosts)}"; + var key = $"{NormalizeRedirectGraphHost(shortlink.Host)}|{NormalizeShortlinkPath(shortlink, hosts)}"; if (seen.TryGetValue(key, out var existing)) { AddIssue(issues, LinkValidationSeverity.Error, "PFLINK.SHORTLINK.DUPLICATE", $"Duplicate shortlink conflicts with '{existing.Slug}'.", "shortlink", label); @@ -534,7 +534,7 @@ private static string NormalizeRedirectGraphQuery(string? query) : null; } - private static void ValidateTarget(string targetUrl, bool allowExternal, List issues, string source, string? id, string codePrefix) + private static void ValidateTarget(string targetUrl, bool allowExternal, List issues, string source, string? id, string codePrefix, string? sourceHost = null) { var trimmed = targetUrl.Trim(); if (trimmed.StartsWith("//", StringComparison.Ordinal)) @@ -559,10 +559,15 @@ private static void ValidateTarget(string targetUrl, bool allowExternal, List
  • !string.IsNullOrWhiteSpace(sourceHost) && + !sourceHost.Trim().Equals("*", StringComparison.Ordinal) && + target.Host.Equals(sourceHost.Trim(), StringComparison.OrdinalIgnoreCase); + private static void AddIssue(List issues, LinkValidationSeverity severity, string code, string message, string source, string? id) => issues.Add(new LinkValidationIssue { From ec06b939b1ae9a5fdd0ad762ca3c9df7e2f37c9a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Przemys=C5=82aw=20K=C5=82ys?= Date: Sat, 18 Apr 2026 23:28:11 +0200 Subject: [PATCH 12/15] Close final link review edge cases --- PowerForge.Tests/WebLinkServiceTests.cs | 62 +++++++++++++++++++++++ PowerForge.Web/Services/WebLinkService.cs | 50 +++++++++++++++++- 2 files changed, 110 insertions(+), 2 deletions(-) diff --git a/PowerForge.Tests/WebLinkServiceTests.cs b/PowerForge.Tests/WebLinkServiceTests.cs index bffae597..c1368c64 100644 --- a/PowerForge.Tests/WebLinkServiceTests.cs +++ b/PowerForge.Tests/WebLinkServiceTests.cs @@ -404,6 +404,28 @@ public void ValidateRedirectGraph_DetectsLoopsThroughTargetQuery() Assert.Contains(result.Issues, issue => issue.Code == "PFLINK.REDIRECT.LOOP"); } + [Fact] + public void ValidateRedirectGraph_DetectsDirectSelfRedirects() + { + var dataSet = new WebLinkDataSet + { + Redirects = new[] + { + new LinkRedirectRule + { + Id = "self", + SourcePath = "/a", + TargetUrl = "/a", + Status = 301 + } + } + }; + + var result = WebLinkService.Validate(dataSet); + + Assert.Contains(result.Issues, issue => issue.Code == "PFLINK.REDIRECT.LOOP"); + } + [Fact] public void ValidateRedirectGraph_DetectsLoopsThroughSameHostAbsoluteTargets() { @@ -588,6 +610,46 @@ public void ExportApache_EmitsGoneRulesForPrefixAndRegexWithoutTarget() } } + [Fact] + public void ExportApache_InsertsShortlinkUtmBeforeFragment() + { + var root = Path.Combine(Path.GetTempPath(), "pf-web-links-export-utm-fragment-" + Guid.NewGuid().ToString("N")); + Directory.CreateDirectory(root); + + try + { + var outPath = Path.Combine(root, "links.conf"); + var dataSet = new WebLinkDataSet + { + Shortlinks = new[] + { + new LinkShortlinkRule + { + Slug = "promo", + TargetUrl = "https://example.test/landing#cta", + Utm = "utm_source=short", + Status = 302, + Owner = "evotec", + AllowExternal = true + } + } + }; + + WebLinkService.ExportApache(dataSet, new WebLinkApacheExportOptions + { + OutputPath = outPath + }); + + var apache = File.ReadAllText(outPath); + Assert.Contains("https://example.test/landing?utm_source=short#cta", apache, StringComparison.Ordinal); + Assert.DoesNotContain("https://example.test/landing#cta?utm_source=short", apache, StringComparison.Ordinal); + } + finally + { + TryDeleteDirectory(root); + } + } + [Fact] public void ImportPrettyLinks_MergesExistingShortlinksAndPreservesImportedHits() { diff --git a/PowerForge.Web/Services/WebLinkService.cs b/PowerForge.Web/Services/WebLinkService.cs index 7a38e520..60ebfa34 100644 --- a/PowerForge.Web/Services/WebLinkService.cs +++ b/PowerForge.Web/Services/WebLinkService.cs @@ -420,6 +420,17 @@ private static void ValidateRedirectGraph(LinkRedirectRule[] redirects, List new(NormalizeSourcePath(targetUrl), NormalizeRedirectGraphQuery(ExtractLocalQuery(targetUrl))); + private static bool IsDirectSelfRedirect(LinkRedirectRule redirect) + { + var sourcePath = NormalizePathPreservingTrailingSlash(redirect.SourcePath); + var targetPath = NormalizePathPreservingTrailingSlash(redirect.TargetUrl); + return !string.IsNullOrWhiteSpace(sourcePath) && + sourcePath.Equals(targetPath, StringComparison.OrdinalIgnoreCase); + } + + private static string NormalizePathPreservingTrailingSlash(string? value) + { + if (string.IsNullOrWhiteSpace(value)) + return string.Empty; + + var trimmed = value.Trim(); + if (Uri.TryCreate(trimmed, UriKind.Absolute, out var uri) && + (uri.Scheme.Equals(Uri.UriSchemeHttp, StringComparison.OrdinalIgnoreCase) || + uri.Scheme.Equals(Uri.UriSchemeHttps, StringComparison.OrdinalIgnoreCase))) + { + trimmed = uri.AbsolutePath; + } + + var queryIndex = trimmed.IndexOf('?'); + if (queryIndex >= 0) + trimmed = trimmed[..queryIndex]; + var hashIndex = trimmed.IndexOf('#'); + if (hashIndex >= 0) + trimmed = trimmed[..hashIndex]; + if (!trimmed.StartsWith("/", StringComparison.Ordinal)) + trimmed = "/" + trimmed.TrimStart('/'); + return trimmed; + } + private static string NormalizeRedirectGraphQuery(string? query) => string.IsNullOrWhiteSpace(query) ? string.Empty : query.Trim().TrimStart('?'); @@ -890,8 +933,11 @@ private static string AppendUtm(string targetUrl, string? utm) if (string.IsNullOrWhiteSpace(targetUrl) || string.IsNullOrWhiteSpace(utm)) return targetUrl; - var separator = targetUrl.Contains('?', StringComparison.Ordinal) ? "&" : "?"; - return targetUrl + separator + utm.Trim().TrimStart('?').TrimStart('&'); + var fragmentIndex = targetUrl.IndexOf('#'); + var beforeFragment = fragmentIndex >= 0 ? targetUrl[..fragmentIndex] : targetUrl; + var fragment = fragmentIndex >= 0 ? targetUrl[fragmentIndex..] : string.Empty; + var separator = beforeFragment.Contains('?', StringComparison.Ordinal) ? "&" : "?"; + return beforeFragment + separator + utm.Trim().TrimStart('?').TrimStart('&') + fragment; } private static string BuildImportedId(LinkLegacySource source, string target, int row) From e6b6fff7fca252359cb96c4399f8486d7c6ffdec Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Przemys=C5=82aw=20K=C5=82ys?= Date: Sat, 18 Apr 2026 23:38:04 +0200 Subject: [PATCH 13/15] Handle final link graph review cases --- PowerForge.Tests/WebLinkServiceTests.cs | 68 +++++++++++++++++++ .../Services/WebLinkService.ExportApache.cs | 4 +- PowerForge.Web/Services/WebLinkService.cs | 33 +++++++-- 3 files changed, 99 insertions(+), 6 deletions(-) diff --git a/PowerForge.Tests/WebLinkServiceTests.cs b/PowerForge.Tests/WebLinkServiceTests.cs index c1368c64..dfff2348 100644 --- a/PowerForge.Tests/WebLinkServiceTests.cs +++ b/PowerForge.Tests/WebLinkServiceTests.cs @@ -404,6 +404,35 @@ public void ValidateRedirectGraph_DetectsLoopsThroughTargetQuery() Assert.Contains(result.Issues, issue => issue.Code == "PFLINK.REDIRECT.LOOP"); } + [Fact] + public void ValidateRedirectGraph_DetectsLoopsThroughQuerylessExactNextHop() + { + var dataSet = new WebLinkDataSet + { + Redirects = new[] + { + new LinkRedirectRule + { + Id = "a", + SourcePath = "/a", + TargetUrl = "/b?x=1", + Status = 301 + }, + new LinkRedirectRule + { + Id = "b", + SourcePath = "/b", + TargetUrl = "/a", + Status = 301 + } + } + }; + + var result = WebLinkService.Validate(dataSet); + + Assert.Contains(result.Issues, issue => issue.Code == "PFLINK.REDIRECT.LOOP"); + } + [Fact] public void ValidateRedirectGraph_DetectsDirectSelfRedirects() { @@ -610,6 +639,45 @@ public void ExportApache_EmitsGoneRulesForPrefixAndRegexWithoutTarget() } } + [Fact] + public void ExportApache_StripsLeadingSlashAfterRegexAnchor() + { + var root = Path.Combine(Path.GetTempPath(), "pf-web-links-export-regex-anchor-" + Guid.NewGuid().ToString("N")); + Directory.CreateDirectory(root); + + try + { + var outPath = Path.Combine(root, "links.conf"); + var dataSet = new WebLinkDataSet + { + Redirects = new[] + { + new LinkRedirectRule + { + Id = "anchored-regex", + SourcePath = "^/legacy/(.*)$", + MatchType = LinkRedirectMatchType.Regex, + TargetUrl = "/archive/{path}", + Status = 301 + } + } + }; + + WebLinkService.ExportApache(dataSet, new WebLinkApacheExportOptions + { + OutputPath = outPath + }); + + var apache = File.ReadAllText(outPath); + Assert.Contains("RewriteRule ^legacy/(.*)$ /archive/$1 [R=301,L,QSD]", apache, StringComparison.Ordinal); + Assert.DoesNotContain("RewriteRule ^/legacy", apache, StringComparison.Ordinal); + } + finally + { + TryDeleteDirectory(root); + } + } + [Fact] public void ExportApache_InsertsShortlinkUtmBeforeFragment() { diff --git a/PowerForge.Web/Services/WebLinkService.ExportApache.cs b/PowerForge.Web/Services/WebLinkService.ExportApache.cs index ee6ea5ac..ae884ff6 100644 --- a/PowerForge.Web/Services/WebLinkService.ExportApache.cs +++ b/PowerForge.Web/Services/WebLinkService.ExportApache.cs @@ -193,7 +193,9 @@ private static bool TryBuildApachePattern(LinkRedirectRule rule, out string patt case LinkRedirectMatchType.Regex: { var regex = rule.SourcePath.Trim(); - if (regex.StartsWith("/", StringComparison.Ordinal)) + if (regex.StartsWith("^/", StringComparison.Ordinal)) + regex = "^" + regex[2..].TrimStart('/'); + else if (regex.StartsWith("/", StringComparison.Ordinal)) regex = regex.TrimStart('/'); pattern = regex.StartsWith("^", StringComparison.Ordinal) ? regex : "^" + regex; destination = NormalizeDestination(rule.TargetUrl).Replace("{path}", "$1", StringComparison.OrdinalIgnoreCase); diff --git a/PowerForge.Web/Services/WebLinkService.cs b/PowerForge.Web/Services/WebLinkService.cs index 60ebfa34..bede992d 100644 --- a/PowerForge.Web/Services/WebLinkService.cs +++ b/PowerForge.Web/Services/WebLinkService.cs @@ -435,7 +435,10 @@ private static void ValidateRedirectGraph(LinkRedirectRule[] redirects, List new(NormalizeSourcePath(targetUrl), NormalizeRedirectGraphQuery(ExtractLocalQuery(targetUrl))); + => new(NormalizeSourcePath(targetUrl), NormalizeRedirectGraphQuery(ExtractLocalQuery(targetUrl)), false); private static bool IsDirectSelfRedirect(LinkRedirectRule redirect) { From 1e8c021292c3dd77173a350de074c68c4f411bd4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Przemys=C5=82aw=20K=C5=82ys?= Date: Sat, 18 Apr 2026 23:48:14 +0200 Subject: [PATCH 14/15] Close wildcard link review cases --- PowerForge.Tests/WebLinkServiceTests.cs | 88 +++++++++++++++++++ .../Services/WebLinkService.Import.cs | 2 +- PowerForge.Web/Services/WebLinkService.cs | 82 +++++++++++------ 3 files changed, 144 insertions(+), 28 deletions(-) diff --git a/PowerForge.Tests/WebLinkServiceTests.cs b/PowerForge.Tests/WebLinkServiceTests.cs index dfff2348..9d09cdfa 100644 --- a/PowerForge.Tests/WebLinkServiceTests.cs +++ b/PowerForge.Tests/WebLinkServiceTests.cs @@ -486,6 +486,36 @@ public void ValidateRedirectGraph_DetectsLoopsThroughSameHostAbsoluteTargets() Assert.Contains(result.Issues, issue => issue.Code == "PFLINK.REDIRECT.LOOP"); } + [Fact] + public void ValidateRedirectGraph_DetectsWildcardLoopsThroughHostScopedEdges() + { + var dataSet = new WebLinkDataSet + { + Redirects = new[] + { + new LinkRedirectRule + { + Id = "wildcard-a", + SourcePath = "/a", + TargetUrl = "/b", + Status = 301 + }, + new LinkRedirectRule + { + Id = "host-b", + SourceHost = "example.com", + SourcePath = "/b", + TargetUrl = "/a", + Status = 301 + } + } + }; + + var result = WebLinkService.Validate(dataSet); + + Assert.Contains(result.Issues, issue => issue.Code == "PFLINK.REDIRECT.LOOP"); + } + [Fact] public void ExportApache_EmitsHostScopedRedirectsAndShortlinks() { @@ -840,6 +870,64 @@ public void ImportPrettyLinks_NormalizesPrefixWhenMergingExistingShortlinks() } } + [Fact] + public void ImportPrettyLinks_TreatsWildcardAndHostlessShortlinkMergeKeysAsSameScope() + { + var root = Path.Combine(Path.GetTempPath(), "pf-web-links-import-wildcard-host-" + Guid.NewGuid().ToString("N")); + Directory.CreateDirectory(root); + + try + { + var csvPath = Path.Combine(root, "pretty-links.csv"); + var outPath = Path.Combine(root, "shortlinks.json"); + File.WriteAllText(csvPath, + """ + id,name,slug,url,clicks + 8,Docs,/go/docs,https://docs-new.example.test,12 + """); + File.WriteAllText(outPath, + """ + { + "shortlinks": [ + { + "host": "*", + "pathPrefix": "/go", + "slug": "docs", + "targetUrl": "https://docs.example.test", + "owner": "evotec", + "allowExternal": true + } + ] + } + """); + + var result = WebLinkService.ImportPrettyLinks(new WebLinkShortlinkImportOptions + { + SourcePath = csvPath, + OutputPath = outPath, + PathPrefix = "/go", + Owner = "evotec" + }); + + Assert.Equal(1, result.ExistingCount); + Assert.Equal(1, result.ImportedCount); + Assert.Equal(1, result.WrittenCount); + Assert.Equal(1, result.SkippedDuplicateCount); + + var loaded = WebLinkService.Load(new WebLinkLoadOptions + { + ShortlinksPath = outPath + }); + var docs = Assert.Single(loaded.Shortlinks, item => item.Slug == "docs"); + Assert.Equal("*", docs.Host); + Assert.Equal("https://docs.example.test", docs.TargetUrl); + } + finally + { + TryDeleteDirectory(root); + } + } + [Fact] public void ImportPrettyLinks_TreatsImplicitPrefixAsGoForNonShortHosts() { diff --git a/PowerForge.Web/Services/WebLinkService.Import.cs b/PowerForge.Web/Services/WebLinkService.Import.cs index 93e20323..cdde35e2 100644 --- a/PowerForge.Web/Services/WebLinkService.Import.cs +++ b/PowerForge.Web/Services/WebLinkService.Import.cs @@ -276,7 +276,7 @@ private static int ParseRedirectStatus(string value, int defaultStatus) private static string BuildShortlinkImportKey(LinkShortlinkRule shortlink, string? shortHost) => string.Join("|", - shortlink.Host ?? string.Empty, + NormalizeRedirectGraphHost(shortlink.Host), NormalizeShortlinkImportPrefix(shortlink.PathPrefix, shortlink.Host, shortHost), shortlink.Slug ?? string.Empty); diff --git a/PowerForge.Web/Services/WebLinkService.cs b/PowerForge.Web/Services/WebLinkService.cs index bede992d..741bf3e9 100644 --- a/PowerForge.Web/Services/WebLinkService.cs +++ b/PowerForge.Web/Services/WebLinkService.cs @@ -441,48 +441,76 @@ private static void ValidateRedirectGraph(LinkRedirectRule[] redirects, List NormalizeRedirectGraphHost(redirect.SourceHost)) + .Where(host => !string.IsNullOrWhiteSpace(host)) + .Distinct(StringComparer.OrdinalIgnoreCase) + .ToArray(); + foreach (var redirect in redirects) { if (redirect.MatchType != LinkRedirectMatchType.Exact && redirect.MatchType != LinkRedirectMatchType.Query) continue; var host = NormalizeRedirectGraphHost(redirect.SourceHost); - var current = NormalizeSourcePath(redirect.SourcePath); - var currentQuery = redirect.SourceQuery; - var visited = new HashSet(StringComparer.OrdinalIgnoreCase); - var depth = 0; - while (TryGetRedirectGraphTarget(map, host, current, currentQuery, out var next)) + foreach (var traversalHost in BuildRedirectGraphTraversalHosts(host, graphHosts)) { - if (!visited.Add(BuildRedirectGraphKey(host, current, currentQuery))) + var current = NormalizeSourcePath(redirect.SourcePath); + var currentQuery = redirect.SourceQuery; + var visited = new HashSet(StringComparer.OrdinalIgnoreCase); + var depth = 0; + var reported = false; + while (TryGetRedirectGraphTarget(map, traversalHost, current, currentQuery, out var next)) { - AddRedirectIssue( - issues, - LinkValidationSeverity.Error, - "PFLINK.REDIRECT.LOOP", - $"Redirect loop detected starting at {BuildDisplaySource(redirect)}.", - redirect, - normalizedTarget: NormalizeSourcePath(redirect.TargetUrl)); - break; + if (!visited.Add(BuildRedirectGraphKey(traversalHost, current, currentQuery))) + { + AddRedirectIssue( + issues, + LinkValidationSeverity.Error, + "PFLINK.REDIRECT.LOOP", + $"Redirect loop detected starting at {BuildDisplaySource(redirect)}.", + redirect, + normalizedTarget: NormalizeSourcePath(redirect.TargetUrl)); + reported = true; + break; + } + + current = next.Path; + currentQuery = next.Query; + depth++; + if (depth > 5) + { + AddRedirectIssue( + issues, + LinkValidationSeverity.Error, + "PFLINK.REDIRECT.CHAIN", + $"Redirect chain is longer than 5 hops starting at {BuildDisplaySource(redirect)}.", + redirect, + normalizedTarget: NormalizeSourcePath(redirect.TargetUrl)); + reported = true; + break; + } } - current = next.Path; - currentQuery = next.Query; - depth++; - if (depth > 5) - { - AddRedirectIssue( - issues, - LinkValidationSeverity.Error, - "PFLINK.REDIRECT.CHAIN", - $"Redirect chain is longer than 5 hops starting at {BuildDisplaySource(redirect)}.", - redirect, - normalizedTarget: NormalizeSourcePath(redirect.TargetUrl)); + if (reported) break; - } } } } + private static IEnumerable BuildRedirectGraphTraversalHosts(string host, IReadOnlyList graphHosts) + { + if (!string.IsNullOrWhiteSpace(host)) + { + yield return host; + yield break; + } + + yield return string.Empty; + foreach (var graphHost in graphHosts) + yield return graphHost; + } + private static bool TryGetRedirectGraphTarget( IReadOnlyDictionary map, string host, From 411fa7279217ae0635aad5dad1f9c3e664f90972 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Przemys=C5=82aw=20K=C5=82ys?= Date: Sat, 18 Apr 2026 23:58:07 +0200 Subject: [PATCH 15/15] Harden link review edge cases --- PowerForge.Tests/WebCliLinksTests.cs | 3 +- PowerForge.Tests/WebLinkServiceTests.cs | 28 +++++++++++++++++++ .../WebCliCommandHandlers.Links.cs | 12 +++++++- .../Services/WebLinkService.ApplyReview.cs | 14 ++++++---- 4 files changed, 49 insertions(+), 8 deletions(-) diff --git a/PowerForge.Tests/WebCliLinksTests.cs b/PowerForge.Tests/WebCliLinksTests.cs index 12bf6243..cf315cb4 100644 --- a/PowerForge.Tests/WebCliLinksTests.cs +++ b/PowerForge.Tests/WebCliLinksTests.cs @@ -188,7 +188,7 @@ public void HandleSubCommand_LinksImportWordPress_ImportsPrettyLinksCsv() var exitCode = WebCliCommandHandlers.HandleSubCommand( "links", - new[] { "import-wordpress", "--config", configPath, "--source", importPath, "--owner", "evotec", "--tag", "imported" }, + new[] { "import-wordpress", "--config", configPath, "--source", importPath, "--host", "short=evo.yt", "--owner", "evotec", "--tag", "imported" }, outputJson: true, logger: new WebConsoleLogger(), outputSchemaVersion: CliEnvelopeSchemaVersion); @@ -199,6 +199,7 @@ public void HandleSubCommand_LinksImportWordPress_ImportsPrettyLinksCsv() var json = File.ReadAllText(shortlinksPath); Assert.Contains("\"slug\": \"teams\"", json, StringComparison.Ordinal); Assert.Contains("\"host\": \"evo.yt\"", json, StringComparison.Ordinal); + Assert.DoesNotContain("\"host\": \"short=evo.yt\"", json, StringComparison.Ordinal); Assert.Contains("\"targetUrl\": \"https://teams.example.test\"", json, StringComparison.Ordinal); Assert.Contains("\"importedHits\": 42", json, StringComparison.Ordinal); Assert.Contains("\"source\": \"imported-pretty-links\"", json, StringComparison.Ordinal); diff --git a/PowerForge.Tests/WebLinkServiceTests.cs b/PowerForge.Tests/WebLinkServiceTests.cs index 9d09cdfa..9fa4f2ba 100644 --- a/PowerForge.Tests/WebLinkServiceTests.cs +++ b/PowerForge.Tests/WebLinkServiceTests.cs @@ -1361,6 +1361,34 @@ public void ApplyReviewCandidates_MergesReviewedCandidatesWithoutPowerShell() } } + [Fact] + public void ApplyReviewCandidates_FailsWhenRequestedCandidateFileIsMissing() + { + var root = Path.Combine(Path.GetTempPath(), "pf-web-links-apply-review-missing-" + Guid.NewGuid().ToString("N")); + Directory.CreateDirectory(root); + + try + { + var redirectsPath = Path.Combine(root, "data", "links", "redirects.json"); + Directory.CreateDirectory(Path.GetDirectoryName(redirectsPath)!); + File.WriteAllText(redirectsPath, "{ \"redirects\": [] }"); + var missingCandidatesPath = Path.Combine(root, "Build", "link-reports", "missing-candidates.json"); + + var exception = Assert.Throws(() => WebLinkService.ApplyReviewCandidates(new WebLinkReviewApplyOptions + { + ApplyRedirects = true, + RedirectCandidatesPath = missingCandidatesPath, + RedirectsPath = redirectsPath + })); + + Assert.Equal(missingCandidatesPath, exception.FileName); + } + finally + { + TryDeleteDirectory(root); + } + } + [Fact] public void Validate_LanguageRootHostTreatsPrefixedAndRootTargetsAsSame() { diff --git a/PowerForge.Web.Cli/WebCliCommandHandlers.Links.cs b/PowerForge.Web.Cli/WebCliCommandHandlers.Links.cs index 818cdedd..0c3c2237 100644 --- a/PowerForge.Web.Cli/WebCliCommandHandlers.Links.cs +++ b/PowerForge.Web.Cli/WebCliCommandHandlers.Links.cs @@ -600,7 +600,7 @@ private static int HandleLinksImportWordPress(string[] args, bool outputJson, We return Fail("Missing required --out or links.shortlinks config path.", outputJson, logger, command); var hosts = BuildLinkHostMap(args, loaded.Spec); - var host = TryGetOptionValue(args, "--host"); + var host = TryGetDirectImportHost(args); hosts.TryGetValue("short", out var configuredShortHost); if (string.IsNullOrWhiteSpace(host) && !string.IsNullOrWhiteSpace(configuredShortHost)) host = configuredShortHost; @@ -643,4 +643,14 @@ private static int HandleLinksImportWordPress(string[] args, bool outputJson, We return 0; } + private static string? TryGetDirectImportHost(string[] args) + { + var host = TryGetOptionValue(args, "--host"); + if (string.IsNullOrWhiteSpace(host)) + return null; + + var trimmed = host.Trim(); + return trimmed.Contains('=') ? null : trimmed; + } + } diff --git a/PowerForge.Web/Services/WebLinkService.ApplyReview.cs b/PowerForge.Web/Services/WebLinkService.ApplyReview.cs index 0c7b04b1..ea990f6f 100644 --- a/PowerForge.Web/Services/WebLinkService.ApplyReview.cs +++ b/PowerForge.Web/Services/WebLinkService.ApplyReview.cs @@ -36,12 +36,13 @@ private static WebLinkReviewApplySection ApplyRedirectCandidates(WebLinkReviewAp var candidatePath = Path.GetFullPath(options.RedirectCandidatesPath); var targetPath = Path.GetFullPath(options.RedirectsPath); + if (!File.Exists(candidatePath)) + throw new FileNotFoundException("Redirect candidate file was not found.", candidatePath); + var existing = File.Exists(targetPath) ? ReadExistingRedirects(targetPath) : new List(); - var candidates = File.Exists(candidatePath) - ? ReadExistingRedirects(candidatePath) - : new List(); + var candidates = ReadExistingRedirects(candidatePath); if (options.EnableRedirects) { @@ -74,12 +75,13 @@ private static WebLinkReviewApplySection ApplyIgnored404Candidates(WebLinkReview var candidatePath = Path.GetFullPath(options.Ignored404CandidatesPath); var targetPath = Path.GetFullPath(options.Ignored404Path); + if (!File.Exists(candidatePath)) + throw new FileNotFoundException("Ignored-404 candidate file was not found.", candidatePath); + var existing = File.Exists(targetPath) ? LoadIgnored404Rules(targetPath).ToList() : new List(); - var candidates = File.Exists(candidatePath) - ? LoadIgnored404Rules(candidatePath).ToList() - : new List(); + var candidates = LoadIgnored404Rules(candidatePath).ToList(); var merged = MergeIgnored404Rules(existing, candidates, options.ReplaceExisting, out var skipped, out var replaced); if (!options.DryRun)