From e4f7914e6fe8bccbad1d6613b655b2633b0959a2 Mon Sep 17 00:00:00 2001 From: Pokechu22 Date: Sat, 16 Aug 2025 19:23:12 -0700 Subject: [PATCH] Add Wix ignore set This has been in use for a while, and generally works well. Wix likes to give 200s for URLs that should be 404s (and gives a 404 message on those pages too). Wix sometimes manages to find other ways of looping (sometimes repeating _404_ in the URL, potentially without a / between them), but that is comparatively rare and I'm not aware of a nice ignore for it (I generally ignore _404_.*_404 in that case, but that only makes sense after having seen it already starting to loop). --- db/ignore_patterns/wix.json | 7 +++++++ 1 file changed, 7 insertions(+) create mode 100644 db/ignore_patterns/wix.json diff --git a/db/ignore_patterns/wix.json b/db/ignore_patterns/wix.json new file mode 100644 index 00000000..84e41eb2 --- /dev/null +++ b/db/ignore_patterns/wix.json @@ -0,0 +1,7 @@ +{ + "name": "wix", + "patterns": [ + "^https?://{primary_netloc}/((.*/)?productPage_USD_productPage_USD|(.*/)?h_\\d+/(.*/)?h_\\d+(/|$)|.*/.*\\.(jpg|jpeg|svg|png|json|txt|xml|text|gif|pdf|mp4|webp)$|.*\\.(css|js|json)$|.*/wix-thunderbolt/)" + ], + "type": "ignore_patterns" +}