-
Notifications
You must be signed in to change notification settings - Fork 7
Configurable protected special pages list and optional quick request halting #10
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
9d90dce
8c3c670
c79c0d5
393e473
5362e2b
4dac2fa
7a52559
21f3e73
d701cde
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -1,2 +1,7 @@ | ||
| # CrawlerProtection | ||
| Protect wikis against crawler bots | ||
|
|
||
| # Configuration | ||
|
|
||
| * `$wgCrawlerProtectedSpecialPages` - array of special pages to protect (default: `[ 'recentchangeslinked', 'whatlinkshere' ]`). Supported values are lowercase special page names, titled spacial page names and prefixed special page names. | ||
| * `$wgCrawlerProtectionDenyFast` - drop denied requests in a quick way via `die();` with [418 I'm a teapot](https://developer.mozilla.org/en-US/docs/Web/HTTP/Reference/Status/418) code (default: `false`) |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -26,6 +26,7 @@ class_alias( '\Article', '\MediaWiki\Page\Article' ); | |
|
|
||
| use MediaWiki\Actions\ActionEntryPoint; | ||
| use MediaWiki\Hook\MediaWikiPerformActionHook; | ||
| use MediaWiki\MediaWikiServices; | ||
| use MediaWiki\Output\OutputPage; | ||
| use MediaWiki\Page\Article; | ||
| use MediaWiki\Request\WebRequest; | ||
|
|
@@ -96,16 +97,39 @@ public function onSpecialPageBeforeExecute( $special, $subPage ) { | |
| return true; | ||
| } | ||
|
|
||
| $config = MediaWikiServices::getInstance()->getMainConfig(); | ||
| $protectedSpecialPages = $config->get( 'CrawlerProtectedSpecialPages' ); | ||
| $denyFast = $config->get( 'CrawlerProtectedSpecialPages' ); | ||
|
|
||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Rather than having multiple checks, please add a line to get a version of For example: $result = array_map(
fn($p) => ($p = strtolower($p)) && strpos($p, NS_SPECIAL_NAME) === 0
? substr($p, 8)
: $p,
$protectedSpecialPages
); |
||
| $name = strtolower( $special->getName() ); | ||
| if ( in_array( $name, [ 'recentchangeslinked', 'whatlinkshere' ], true ) ) { | ||
| if ( | ||
| // allow forgiving entries in the setting array for Special pages names | ||
| in_array( $special->getName(), $protectedSpecialPages, true ) | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. These 3 lines will be redundant once the transformation is applied. Please remove the two extra lines |
||
| || in_array( $name, $protectedSpecialPages, true ) | ||
| || in_array( 'Special:' . $special->getName(), $protectedSpecialPages, true ) | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Please refactor magic word |
||
| ) { | ||
| $out = $special->getContext()->getOutput(); | ||
| if ( $denyFast ) { | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Please add unit tests to test this branch |
||
| $this->denyAccessFast(); | ||
| } | ||
| $this->denyAccess( $out ); | ||
| return false; | ||
| } | ||
|
|
||
| return true; | ||
| } | ||
|
|
||
| /** | ||
| * Helper: output 418 Teapot and halt the processing immediately | ||
| * | ||
| * @return void | ||
| * @suppress PhanPluginNeverReturnMethod | ||
| */ | ||
| protected function denyAccessFast() { | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. "Deny access fast" is a subjective name which I don't think properly addresses why one might choose to use this. Naming is a hard problem to solve, so I do empathize. How about we change the 403 vs. 418 preference variable to |
||
| header( 'HTTP/1.0 418 Forbidden' ); | ||
| die( 'I am a teapot' ); | ||
| } | ||
|
|
||
| /** | ||
| * Helper: output 403 Access Denied page using i18n messages. | ||
| * | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Please add
"mobilediff"too