From 9d90dce4ddaef5fa83a246b00dae45185f6fbba5 Mon Sep 17 00:00:00 2001 From: Vedmaka Date: Thu, 23 Oct 2025 13:57:36 +0200 Subject: [PATCH 1/9] Add configuration options for crawler protected special pages and improves fast deny logic --- extension.json | 11 +++++++++++ includes/Hooks.php | 24 +++++++++++++++++++++++- 2 files changed, 34 insertions(+), 1 deletion(-) diff --git a/extension.json b/extension.json index eff0a0c..43c6eb1 100644 --- a/extension.json +++ b/extension.json @@ -20,6 +20,17 @@ "MediaWikiPerformAction": "main", "SpecialPageBeforeExecute": "main" }, + "config": { + "CrawlerProtectedSpecialPages": { + "value": [ + "recentchangeslinked", + "whatlinkshere" + ] + }, + "CrawlerProtectionDenyFast": { + "value": false + } + }, "license-name": "MIT", "Tests": { "phpunit": "tests/phpunit" diff --git a/includes/Hooks.php b/includes/Hooks.php index 436d7e0..e9a47b8 100644 --- a/includes/Hooks.php +++ b/includes/Hooks.php @@ -26,6 +26,7 @@ class_alias( '\Article', '\MediaWiki\Page\Article' ); use MediaWiki\Actions\ActionEntryPoint; use MediaWiki\Hook\MediaWikiPerformActionHook; +use MediaWiki\MediaWikiServices; use MediaWiki\Output\OutputPage; use MediaWiki\Page\Article; use MediaWiki\Request\WebRequest; @@ -96,9 +97,25 @@ public function onSpecialPageBeforeExecute( $special, $subPage ) { return true; } + $protectedSpecialPages = MediaWikiServices::getInstance() + ->getMainConfig() + ->get('CrawlerProtectedSpecialPages'); + + $denyFast = MediaWikiServices::getInstance() + ->getMainConfig() + ->get('CrawlerProtectedSpecialPages'); + $name = strtolower( $special->getName() ); - if ( in_array( $name, [ 'recentchangeslinked', 'whatlinkshere' ], true ) ) { + if ( + // allow forgiving entries in the setting array for Special pages names + in_array( $special->getName(), $protectedSpecialPages, true ) + || in_array( $name, $protectedSpecialPages, true ) + || in_array( 'Special:' . $name, $protectedSpecialPages, true ) + ) { $out = $special->getContext()->getOutput(); + if ( $denyFast ) { + $this->denyAccessFast(); + } $this->denyAccess( $out ); return false; } @@ -106,6 +123,11 @@ public function onSpecialPageBeforeExecute( $special, $subPage ) { return true; } + protected function denyAccessFast(): void { + header( 'HTTP/1.0 418 Forbidden'); + die( 'I am a teapot' ); + } + /** * Helper: output 403 Access Denied page using i18n messages. * From 8c3c67081163ed8d2abc787516cdb0544928db64 Mon Sep 17 00:00:00 2001 From: Vedmaka Date: Thu, 23 Oct 2025 18:38:40 +0200 Subject: [PATCH 2/9] Updates README.md with details on Configuration variables --- README.md | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/README.md b/README.md index 8d8ed3f..b9d9cac 100644 --- a/README.md +++ b/README.md @@ -1,2 +1,7 @@ # CrawlerProtection Protect wikis against crawler bots + +# Configuration + +* `$wgCrawlerProtectedSpecialPages` - array of special pages to protect (default: `[ 'recentchangeslinked', 'whatlinkshere' ]`) +* `$wgCrawlerProtectionDenyFast` - drop denied requests in a quick way via `die();` with [418 I'm a teapot](https://developer.mozilla.org/en-US/docs/Web/HTTP/Reference/Status/418) code (default: `false`) From c79c0d52e0095a70b8cfc239b704e5572675fc8b Mon Sep 17 00:00:00 2001 From: Vedmaka Date: Thu, 23 Oct 2025 18:40:29 +0200 Subject: [PATCH 3/9] Code style --- includes/Hooks.php | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/includes/Hooks.php b/includes/Hooks.php index e9a47b8..2491319 100644 --- a/includes/Hooks.php +++ b/includes/Hooks.php @@ -99,11 +99,11 @@ public function onSpecialPageBeforeExecute( $special, $subPage ) { $protectedSpecialPages = MediaWikiServices::getInstance() ->getMainConfig() - ->get('CrawlerProtectedSpecialPages'); + ->get( 'CrawlerProtectedSpecialPages' ); $denyFast = MediaWikiServices::getInstance() ->getMainConfig() - ->get('CrawlerProtectedSpecialPages'); + ->get( 'CrawlerProtectedSpecialPages' ); $name = strtolower( $special->getName() ); if ( @@ -124,7 +124,7 @@ public function onSpecialPageBeforeExecute( $special, $subPage ) { } protected function denyAccessFast(): void { - header( 'HTTP/1.0 418 Forbidden'); + header( 'HTTP/1.0 418 Forbidden' ); die( 'I am a teapot' ); } From 393e473bcef2c355ffdeab2403cc3d59ede4e38f Mon Sep 17 00:00:00 2001 From: Vedmaka Date: Thu, 23 Oct 2025 18:41:38 +0200 Subject: [PATCH 4/9] Phan --- includes/Hooks.php | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/includes/Hooks.php b/includes/Hooks.php index 2491319..3801f24 100644 --- a/includes/Hooks.php +++ b/includes/Hooks.php @@ -123,6 +123,11 @@ public function onSpecialPageBeforeExecute( $special, $subPage ) { return true; } + /** + * Helper: output 418 Access Denied page using i18n messages and die + * + * @return never + */ protected function denyAccessFast(): void { header( 'HTTP/1.0 418 Forbidden' ); die( 'I am a teapot' ); From 5362e2b149960c22d748eade58a919f1df4cdc95 Mon Sep 17 00:00:00 2001 From: Vedmaka Date: Thu, 23 Oct 2025 18:44:41 +0200 Subject: [PATCH 5/9] Allows for prefixed page names match, updates README.md --- README.md | 2 +- includes/Hooks.php | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/README.md b/README.md index b9d9cac..454edad 100644 --- a/README.md +++ b/README.md @@ -3,5 +3,5 @@ Protect wikis against crawler bots # Configuration -* `$wgCrawlerProtectedSpecialPages` - array of special pages to protect (default: `[ 'recentchangeslinked', 'whatlinkshere' ]`) +* `$wgCrawlerProtectedSpecialPages` - array of special pages to protect (default: `[ 'recentchangeslinked', 'whatlinkshere' ]`). Supported values are lowercase special page names, titled spacial page names and prefixed special page names. * `$wgCrawlerProtectionDenyFast` - drop denied requests in a quick way via `die();` with [418 I'm a teapot](https://developer.mozilla.org/en-US/docs/Web/HTTP/Reference/Status/418) code (default: `false`) diff --git a/includes/Hooks.php b/includes/Hooks.php index 3801f24..312f688 100644 --- a/includes/Hooks.php +++ b/includes/Hooks.php @@ -110,7 +110,7 @@ public function onSpecialPageBeforeExecute( $special, $subPage ) { // allow forgiving entries in the setting array for Special pages names in_array( $special->getName(), $protectedSpecialPages, true ) || in_array( $name, $protectedSpecialPages, true ) - || in_array( 'Special:' . $name, $protectedSpecialPages, true ) + || in_array( 'Special:' . $special->getName(), $protectedSpecialPages, true ) ) { $out = $special->getContext()->getOutput(); if ( $denyFast ) { @@ -124,7 +124,7 @@ public function onSpecialPageBeforeExecute( $special, $subPage ) { } /** - * Helper: output 418 Access Denied page using i18n messages and die + * Helper: output 418 Teapot and halt the processing immediately * * @return never */ From 4dac2fa0a70c3ae3349de5fb5549d4a07af3b3d4 Mon Sep 17 00:00:00 2001 From: Vedmaka Date: Thu, 23 Oct 2025 18:45:17 +0200 Subject: [PATCH 6/9] Optimises config values retrival --- includes/Hooks.php | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) diff --git a/includes/Hooks.php b/includes/Hooks.php index 312f688..7ce8359 100644 --- a/includes/Hooks.php +++ b/includes/Hooks.php @@ -97,13 +97,9 @@ public function onSpecialPageBeforeExecute( $special, $subPage ) { return true; } - $protectedSpecialPages = MediaWikiServices::getInstance() - ->getMainConfig() - ->get( 'CrawlerProtectedSpecialPages' ); - - $denyFast = MediaWikiServices::getInstance() - ->getMainConfig() - ->get( 'CrawlerProtectedSpecialPages' ); + $config = MediaWikiServices::getInstance()->getMainConfig(); + $protectedSpecialPages = $config->get( 'CrawlerProtectedSpecialPages' ); + $denyFast = $config->get( 'CrawlerProtectedSpecialPages' ); $name = strtolower( $special->getName() ); if ( From 7a525591e9ba7d1ebbec13a0ede2df3e6c67d22d Mon Sep 17 00:00:00 2001 From: Vedmaka Date: Thu, 23 Oct 2025 18:45:55 +0200 Subject: [PATCH 7/9] Phan --- includes/Hooks.php | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/includes/Hooks.php b/includes/Hooks.php index 7ce8359..7c93575 100644 --- a/includes/Hooks.php +++ b/includes/Hooks.php @@ -122,7 +122,7 @@ public function onSpecialPageBeforeExecute( $special, $subPage ) { /** * Helper: output 418 Teapot and halt the processing immediately * - * @return never + * @return void */ protected function denyAccessFast(): void { header( 'HTTP/1.0 418 Forbidden' ); From 21f3e732bb0c0cb04a5d61f1e24b978e97758438 Mon Sep 17 00:00:00 2001 From: Vedmaka Date: Thu, 23 Oct 2025 18:48:59 +0200 Subject: [PATCH 8/9] Phan --- includes/Hooks.php | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/includes/Hooks.php b/includes/Hooks.php index 7c93575..8045589 100644 --- a/includes/Hooks.php +++ b/includes/Hooks.php @@ -124,7 +124,7 @@ public function onSpecialPageBeforeExecute( $special, $subPage ) { * * @return void */ - protected function denyAccessFast(): void { + protected function denyAccessFast() { header( 'HTTP/1.0 418 Forbidden' ); die( 'I am a teapot' ); } From d701cde8e6cbb5e1994e38b847432242dfa5a15a Mon Sep 17 00:00:00 2001 From: Vedmaka Date: Thu, 23 Oct 2025 18:50:37 +0200 Subject: [PATCH 9/9] Phan! --- includes/Hooks.php | 1 + 1 file changed, 1 insertion(+) diff --git a/includes/Hooks.php b/includes/Hooks.php index 8045589..afb2066 100644 --- a/includes/Hooks.php +++ b/includes/Hooks.php @@ -123,6 +123,7 @@ public function onSpecialPageBeforeExecute( $special, $subPage ) { * Helper: output 418 Teapot and halt the processing immediately * * @return void + * @suppress PhanPluginNeverReturnMethod */ protected function denyAccessFast() { header( 'HTTP/1.0 418 Forbidden' );