diff --git a/.github/CI-SETUP.md b/.github/CI-SETUP.md new file mode 100644 index 0000000..2e47025 --- /dev/null +++ b/.github/CI-SETUP.md @@ -0,0 +1,144 @@ +# CI Setup - Docker-Based Local Testing + +## ✅ Prerequisites + +- Docker +- Docker Compose +- Make +- Git + +## 🚀 Quick Start (Recommended) + +### 1. Add docker-compose-ci as submodule +```bash +cd /home/greg/src/CrawlerProtection + +# Remove build/ from .gitignore if present (it's a submodule now) +sed -i '/^build\/$/d' .gitignore + +# Add the submodule +git submodule add https://github.com/gesinn-it-pub/docker-compose-ci.git build +git add .gitignore .gitmodules build Makefile +git commit -m "Add docker-compose-ci for local testing" +``` + +### 2. Initialize submodule (for fresh clones) +```bash +# When cloning the repo in the future, use: +git clone --recursive https://github.com/freephile/CrawlerProtection.git + +# Or if already cloned without --recursive: +git submodule update --init --recursive +``` + +### 3. Run CI Tests +The `Makefile` is already configured. Just run: +```bash +make ci # Run all CI checks +make ci-coverage # Run with coverage +make bash # Enter container to run commands manually +make down # Stop containers +``` + +## 🔧 What Gets Tested + +The `make ci` command runs: +- **Lint** - PHP syntax checking (parallel-lint) +- **PHPCS** - Code style validation (MediaWiki standards) +- **PHPUnit** - Unit tests + +All in a container with the correct PHP version, extensions, and MediaWiki setup! + +## 📋 Common Commands + +```bash +# Run all tests +make ci + +# Run specific tests inside container +make bash +> composer phpcs # Code style check +> composer phpcbf # Auto-fix code style +> composer phpunit # Run PHPUnit tests +> composer test # Run phpcs + phpunit + +# Test with different MediaWiki versions +MW_VERSION=1.39 make ci +MW_VERSION=1.43 PHP_VERSION=8.3 make ci + +# Clean up +make down +make clean +``` + +## 🌐 Access Wiki in Browser + +Create `build/docker-compose.override.yml`: +```yaml +services: + wiki: + ports: + - 8080:8080 +``` + +Then start: `make up` and visit http://localhost:8080 + +## 🔄 Update Docker CI + +```bash +git submodule update --init --remote +``` + +## 📝 Environment Variables + +Create `.env` file to customize: +```bash +MW_VERSION=1.43 +PHP_VERSION=8.2 +DB_TYPE=sqlite +EXTENSION=CrawlerProtection +``` + +## ⚡ Quick Fixes Before Commit + +```bash +# Auto-fix code style issues +make bash +> composer phpcbf + +# Check what will fail in CI +make ci +``` + +## 🐛 Troubleshooting + +**"build directory not found"** +```bash +git submodule update --init --remote +``` + +**"Container keeps restarting"** +```bash +make down +make clean +make ci +``` + +**"Permission denied"** +```bash +sudo chmod -R 777 cache/ +``` + +## 🎯 GitHub Actions Setup + +Your `.github/workflows/ci.yml` already exists and will run automatically on: +- Pushes to `main` or `specialPageList` branches +- All pull requests + +Check results at: https://github.com/freephile/CrawlerProtection/actions + +## 🔗 Resources + +- [docker-compose-ci documentation](https://github.com/gesinn-it-pub/docker-compose-ci) +- [MediaWiki coding conventions](https://www.mediawiki.org/wiki/Manual:Coding_conventions) +- Your GitHub Actions: https://github.com/freephile/CrawlerProtection/actions diff --git a/.github/DOCKER-CI-QUICKREF.md b/.github/DOCKER-CI-QUICKREF.md new file mode 100644 index 0000000..54b4d79 --- /dev/null +++ b/.github/DOCKER-CI-QUICKREF.md @@ -0,0 +1,56 @@ +# Docker CI Quick Reference + +## Setup (One Time) +```bash +git submodule add https://github.com/gesinn-it-pub/docker-compose-ci.git build +git add .gitignore .gitmodules build Makefile TESTING.md +git commit -m "Add docker-compose-ci for local testing" +``` + +## Daily Use + +```bash +make ci # Run all checks (before commit) +make bash # Fix issues manually + > composer phpcbf # Auto-fix code style +make down # Clean up +``` + +## All Commands + +| Command | Purpose | +|---------|---------| +| `make ci` | Run all CI checks (lint, phpcs, phpunit) | +| `make bash` | Enter container shell | +| `make up` | Start wiki (http://localhost:8080) | +| `make down` | Stop all containers | +| `make clean` | Remove all containers and volumes | + +## Inside Container (`make bash`) + +| Command | Purpose | +|---------|---------| +| `composer test` | Run phpcs + phpunit | +| `composer phpcs` | Check code style | +| `composer phpcbf` | Fix code style automatically | +| `composer phpunit` | Run unit tests | + +## Test Different Versions + +```bash +MW_VERSION=1.39 PHP_VERSION=8.1 make ci # Test MW 1.39 + PHP 8.1 +MW_VERSION=1.43 PHP_VERSION=8.3 make ci # Test MW 1.43 + PHP 8.3 +``` + +## Troubleshooting + +```bash +make down && make clean # Nuclear option: clean everything +git submodule update --init --remote # Update docker-compose-ci +``` + +## See Also + +- Full docs: `.github/CI-SETUP.md` +- Testing guide: `TESTING.md` +- Your CI runs: https://github.com/freephile/CrawlerProtection/actions diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 28ed119..d1505a5 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -7,6 +7,7 @@ on: push: branches: - main + - specialPageList # Add your development branch pull_request: env: @@ -58,7 +59,7 @@ jobs: - name: Lint run: ./vendor/bin/parallel-lint --exclude node_modules --exclude vendor extensions/${{ env.EXTNAME }} - name: PHP Code Sniffer - run: ./vendor/bin/phpcs -sp --standard=vendor/mediawiki/mediawiki-codesniffer/MediaWiki extensions/${{ env.EXTNAME }} + run: cd extensions/${{ env.EXTNAME }} && ../../vendor/bin/phpcs -sp security: name: Static Analysis diff --git a/.gitignore b/.gitignore index d974a53..f6a1b97 100644 --- a/.gitignore +++ b/.gitignore @@ -8,7 +8,8 @@ __pycache__/ # Distribution / packaging .Python -build/ +# do not ignore build because it is a submodule +# build/ develop-eggs/ dist/ downloads/ diff --git a/.gitmodules b/.gitmodules new file mode 100644 index 0000000..2d846d0 --- /dev/null +++ b/.gitmodules @@ -0,0 +1,3 @@ +[submodule "build"] + path = build + url = https://github.com/gesinn-it-pub/docker-compose-ci.git diff --git a/.phpcs.xml b/.phpcs.xml new file mode 100644 index 0000000..62de045 --- /dev/null +++ b/.phpcs.xml @@ -0,0 +1,27 @@ + + + MediaWiki coding standards for CrawlerProtection extension + + + + + + + + . + + + */build/* + */vendor/* + */node_modules/* + *.phan/* + + + + + + + + + + diff --git a/Makefile b/Makefile new file mode 100644 index 0000000..315dde4 --- /dev/null +++ b/Makefile @@ -0,0 +1,27 @@ +-include .env +export + +# setup for docker-compose-ci build directory +ifeq (,$(wildcard ./build/)) + $(shell git submodule update --init --remote) +endif + +EXTENSION=CrawlerProtection + +# docker images +MW_VERSION?=1.43 +PHP_VERSION?=8.2 +DB_TYPE?=mysql +DB_IMAGE?="mariadb:11.2" + +# composer +# Enables "composer update" inside of extension +# Leave empty/unset to disable, set to "true" to enable +COMPOSER_EXT?= + +# nodejs +# Enables node.js related tests and "npm install" +# Leave empty/unset to disable, set to "true" to enable +NODE_JS?= + +include build/Makefile diff --git a/README.md b/README.md index 8d8ed3f..27dbe91 100644 --- a/README.md +++ b/README.md @@ -1,2 +1,22 @@ # CrawlerProtection -Protect wikis against crawler bots + +Protect wikis against crawler bots. CrawlerProtection denies **anonymous** user +access to certain MediaWiki action URLs and SpecialPages which are resource +intensive. + +# Configuration + +* `$wgCrawlerProtectedSpecialPages` - array of special pages to protect + (default: `[ 'mobilediff', 'recentchangeslinked', 'whatlinkshere' ]`). + Supported values are special page names or their aliases regardless of case. + You do not need to use the 'Special:' prefix. Note that you can fetch a full + list of SpecialPages defined by your wiki using the API and jq with a simple + bash one-liner like + `curl -s "[YOURWIKI]api.php?action=query&meta=siteinfo&siprop=specialpagealiases&format=json" | jq -r '.query.specialpagealiases[].aliases[]' | sort` + Of course certain Specials MUST be allowed like Special:Login so do not block + everything. +* `$wgCrawlerProtectionUse418` - drop denied requests in a quick way via + `die();` with + [418 I'm a teapot](https://developer.mozilla.org/en-US/docs/Web/HTTP/Reference/Status/418) + code (default: `false`) + diff --git a/TESTING.md b/TESTING.md new file mode 100644 index 0000000..9818459 --- /dev/null +++ b/TESTING.md @@ -0,0 +1,67 @@ +# Local CI Testing with Docker + +This extension uses [docker-compose-ci](https://github.com/gesinn-it-pub/docker-compose-ci) for local testing. + +## Quick Start + +```bash +# One-time setup (if not already done) +git submodule update --init --recursive + +# Run all CI checks (lint, phpcs, phpunit) +make ci + +# Auto-fix code style issues +make bash +> composer phpcbf + +# Stop containers +make down +``` + +## Why Docker? + +- ✅ Same environment as GitHub Actions CI +- ✅ Correct PHP version, extensions, and MediaWiki automatically +- ✅ No need to install MediaWiki locally +- ✅ Test against multiple MW/PHP versions easily +- ✅ Isolated from your local system + +## Common Commands + +```bash +make ci # Run all CI checks +make bash # Enter container shell +make up # Start wiki (http://localhost:8080) +make down # Stop containers +make clean # Remove containers and volumes +``` + +## Test Different Versions + +```bash +# Test with MediaWiki 1.39 and PHP 8.1 +MW_VERSION=1.39 PHP_VERSION=8.1 make ci + +# Test with MediaWiki 1.43 and PHP 8.3 +MW_VERSION=1.43 PHP_VERSION=8.3 make ci +``` + +## Available Composer Scripts + +Inside the container (`make bash`): + +```bash +composer test # Run phpcs + phpunit +composer phpcs # Check code style +composer phpcbf # Fix code style +composer phpunit # Run unit tests +``` + +## Update Docker CI + +```bash +git submodule update --init --remote +``` + +See `.github/CI-SETUP.md` and `.github/DOCKER-CI-QUICKREF.md` for more details. diff --git a/composer.json b/composer.json index e757cbf..5501a9f 100644 --- a/composer.json +++ b/composer.json @@ -1,6 +1,10 @@ { "require-dev": { - "phpunit/phpunit": "^9.0" + "phpunit/phpunit": "^9.0", + "mediawiki/mediawiki-codesniffer": "43.0.0", + "mediawiki/minus-x": "1.1.3", + "php-parallel-lint/php-console-highlighter": "1.0.0", + "php-parallel-lint/php-parallel-lint": "1.4.0" }, "autoload": { "psr-4": { @@ -15,5 +19,19 @@ "tests/phpunit/stubs.php", "tests/phpunit/namespaced-stubs.php" ] + }, + "scripts": { + "test": [ + "@phpcs", + "@phpunit" + ], + "phpcs": "vendor/bin/phpcs -sp --standard=.phpcs.xml", + "phpcbf": "vendor/bin/phpcbf --standard=.phpcs.xml", + "phpunit": "php ../../tests/phpunit/phpunit.php tests/phpunit/" + }, + "config": { + "allow-plugins": { + "dealerdirect/phpcodesniffer-composer-installer": true + } } } diff --git a/extension.json b/extension.json index eff0a0c..b09e568 100644 --- a/extension.json +++ b/extension.json @@ -10,16 +10,28 @@ "AutoloadNamespaces": { "MediaWiki\\Extension\\CrawlerProtection\\": "includes/" }, - "HookHandlers": { - "main": { - "class": "MediaWiki\\Extension\\CrawlerProtection\\Hooks", - "services": [] - } - }, + "HookHandlers": { + "main": { + "class": "MediaWiki\\Extension\\CrawlerProtection\\Hooks", + "services": [] + } + }, "Hooks": { "MediaWikiPerformAction": "main", "SpecialPageBeforeExecute": "main" }, + "config": { + "CrawlerProtectedSpecialPages": { + "value": [ + "mobilediff", + "recentchangeslinked", + "whatlinkshere" + ] + }, + "CrawlerProtectionUse418": { + "value": false + } + }, "license-name": "MIT", "Tests": { "phpunit": "tests/phpunit" diff --git a/includes/Hooks.php b/includes/Hooks.php index 15971e2..a2d9bba 100644 --- a/includes/Hooks.php +++ b/includes/Hooks.php @@ -26,6 +26,7 @@ class_alias( '\Article', '\MediaWiki\Page\Article' ); use MediaWiki\Actions\ActionEntryPoint; use MediaWiki\Hook\MediaWikiPerformActionHook; +use MediaWiki\MediaWikiServices; use MediaWiki\Output\OutputPage; use MediaWiki\Page\Article; use MediaWiki\Request\WebRequest; @@ -35,6 +36,9 @@ class_alias( '\Article', '\MediaWiki\Page\Article' ); use MediaWiki\User\User; class Hooks implements MediaWikiPerformActionHook, SpecialPageBeforeExecuteHook { + /** @var string Prefix for special page names */ + private const SPECIAL_PAGE_PREFIX = 'Special:'; + /** * Block sensitive page views for anonymous users via MediaWikiPerformAction. * Handles: @@ -96,9 +100,24 @@ public function onSpecialPageBeforeExecute( $special, $subPage ) { return true; } + $config = MediaWikiServices::getInstance()->getMainConfig(); + $protectedSpecialPages = $config->get( 'CrawlerProtectedSpecialPages' ); + $denyFast = $config->get( 'CrawlerProtectionUse418' ); + + // Normalize protected special pages: lowercase and strip 'Special:' prefix + $normalizedProtectedPages = array_map( + fn ( $p ) => ( $p = strtolower( $p ) ) && strpos( $p, strtolower( self::SPECIAL_PAGE_PREFIX ) ) === 0 + ? substr( $p, 8 ) + : $p, + $protectedSpecialPages + ); + $name = strtolower( $special->getName() ); - if ( in_array( $name, [ 'recentchangeslinked', 'whatlinkshere', 'mobilediff' ], true ) ) { + if ( in_array( $name, $normalizedProtectedPages, true ) ) { $out = $special->getContext()->getOutput(); + if ( $denyFast ) { + $this->denyAccessWith418(); + } $this->denyAccess( $out ); return false; } @@ -106,6 +125,17 @@ public function onSpecialPageBeforeExecute( $special, $subPage ) { return true; } + /** + * Helper: output 418 Teapot and halt the processing immediately + * + * @return void + * @suppress PhanPluginNeverReturnMethod + */ + protected function denyAccessWith418() { + header( 'HTTP/1.0 I\'m a teapot' ); + die( 'I\'m a teapot' ); + } + /** * Helper: output 403 Access Denied page using i18n messages. * diff --git a/tests/phpunit/namespaced-stubs.php b/tests/phpunit/namespaced-stubs.php index 4f288a4..1dfdf57 100644 --- a/tests/phpunit/namespaced-stubs.php +++ b/tests/phpunit/namespaced-stubs.php @@ -1,4 +1,7 @@ markTestSkipped( + 'Test requires stub MediaWikiServices. Skipped in MediaWiki unit test environment.' + ); + } + $output = $this->createMock( self::$outputPageClassName ); $user = $this->createMock( self::$userClassName ); @@ -159,7 +185,7 @@ public function testSpecialPageBlocksAnonymous( $specialPageName ) { $special->method( 'getContext' )->willReturn( $context ); $runner = $this->getMockBuilder( Hooks::class ) - ->onlyMethods( [ 'denyAccess' ] ) + ->onlyMethods( [ 'denyAccess', 'denyAccessWith418' ] ) ->getMock(); $runner->expects( $this->once() )->method( 'denyAccess' )->with( $output ); @@ -173,6 +199,13 @@ public function testSpecialPageBlocksAnonymous( $specialPageName ) { * @param string $specialPageName */ public function testSpecialPageAllowsLoggedIn( $specialPageName ) { + // Skip this test in MediaWiki environment - it requires service container + if ( !property_exists( '\MediaWiki\MediaWikiServices', 'testUse418' ) ) { + $this->markTestSkipped( + 'Test requires stub MediaWikiServices. Skipped in MediaWiki unit test environment.' + ); + } + $output = $this->createMock( self::$outputPageClassName ); $user = $this->createMock( self::$userClassName ); @@ -197,6 +230,13 @@ public function testSpecialPageAllowsLoggedIn( $specialPageName ) { * @covers ::onSpecialPageBeforeExecute */ public function testUnblockedSpecialPageAllowsAnonymous() { + // Skip this test in MediaWiki environment - it requires service container + if ( !property_exists( '\MediaWiki\MediaWikiServices', 'testUse418' ) ) { + $this->markTestSkipped( + 'Test requires stub MediaWikiServices. Skipped in MediaWiki unit test environment.' + ); + } + $output = $this->createMock( self::$outputPageClassName ); $user = $this->createMock( self::$userClassName ); @@ -220,20 +260,20 @@ public function testUnblockedSpecialPageAllowsAnonymous() { /** * Create a mock context object. * - * @param object $user Mock user object - * @param object $output Mock output object - * @return object Mock context + * @param \PHPUnit\Framework\MockObject\MockObject $user Mock user object + * @param \PHPUnit\Framework\MockObject\MockObject $output Mock output object + * @return \stdClass Mock context */ private function createMockContext( $user, $output ) { $context = new class( $user, $output ) { - /** @var object */ + /** @var \PHPUnit\Framework\MockObject\MockObject */ private $user; - /** @var object */ + /** @var \PHPUnit\Framework\MockObject\MockObject */ private $output; /** - * @param object $user - * @param object $output + * @param \PHPUnit\Framework\MockObject\MockObject $user + * @param \PHPUnit\Framework\MockObject\MockObject $output */ public function __construct( $user, $output ) { $this->user = $user; @@ -241,14 +281,14 @@ public function __construct( $user, $output ) { } /** - * @return object + * @return \PHPUnit\Framework\MockObject\MockObject */ public function getUser() { return $this->user; } /** - * @return object + * @return \PHPUnit\Framework\MockObject\MockObject */ public function getOutput() { return $this->output; @@ -257,6 +297,42 @@ public function getOutput() { return $context; } + /** + * @covers ::onSpecialPageBeforeExecute + * @covers ::denyAccessWith418 + */ + public function testSpecialPageCallsDenyAccessWith418WhenConfigured() { + // This test only works with our test stubs, not in MediaWiki's PHPUnit environment + if ( !property_exists( '\MediaWiki\MediaWikiServices', 'testUse418' ) ) { + $this->markTestSkipped( + 'Test requires stub MediaWikiServices. Skipped in MediaWiki integration tests.' + ); + } + + // Enable 418 response in the test stub config + \MediaWiki\MediaWikiServices::$testUse418 = true; + + $output = $this->createMock( self::$outputPageClassName ); + + $user = $this->createMock( self::$userClassName ); + $user->method( 'isRegistered' )->willReturn( false ); + + $context = $this->createMockContext( $user, $output ); + + $special = $this->createMock( self::$specialPageClassName ); + $special->method( 'getName' )->willReturn( 'WhatLinksHere' ); + $special->method( 'getContext' )->willReturn( $context ); + + $runner = $this->getMockBuilder( Hooks::class ) + ->onlyMethods( [ 'denyAccessWith418' ] ) + ->getMock(); + // When denyFast is true, only denyAccessWith418 is called (it dies before denyAccess) + $runner->expects( $this->once() )->method( 'denyAccessWith418' ); + + $result = $runner->onSpecialPageBeforeExecute( $special, null ); + $this->assertFalse( $result ); + } + /** * Data provider for blocked special pages. *