From 88b3d7a256381043ec5aa800f8f2a19b04a5c31c Mon Sep 17 00:00:00 2001 From: Grant Slater Date: Fri, 6 Jun 2025 02:32:51 +0100 Subject: [PATCH] mediawiki: add protection against UA faking abusive scraper --- cookbooks/mediawiki/templates/default/apache.erb | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/cookbooks/mediawiki/templates/default/apache.erb b/cookbooks/mediawiki/templates/default/apache.erb index d3d25a3a1..562c8db2d 100644 --- a/cookbooks/mediawiki/templates/default/apache.erb +++ b/cookbooks/mediawiki/templates/default/apache.erb @@ -39,6 +39,15 @@ RewriteCond %{SERVER_NAME} !=<%= @name %> RewriteRule ^/(.*)$ https://<%= @name %>/$1 [R=permanent] + # Prevent abuse by an anonymous AI bot + RewriteCond %{REQUEST_METHOD} ^(GET|HEAD)$ + RewriteCond %{REQUEST_URI} ^/w/index\.php$ [OR] + RewriteCond %{REQUEST_URI} ^/wiki/Special: [OR] + RewriteCond %{REQUEST_URI} ^/w/api\.php$ + RewriteCond %{HTTP_REFERER} ^-?$ + RewriteCond %{HTTP_USER_AGENT} ((CriOS|Chrome)/[1-9][0-9]?\.0\.|Chrome/100\.0\.|Chrome/122\.0\.0\.0|(Firefox|FxiOS)/[1-6]?[0-9]\.|MSIE\ [5-9]\.0|Opera/[8-9]\.|Windows\ NT\ [3-5]\.|Version/[3-5]\.[0-1]) [NC] + RewriteRule ^ - [R=429,L] + # Historical Compatibility Links RedirectMatch 301 ^/index\.php$ /w/index.php RedirectMatch 301 ^/index\.php/(.*)$ /wiki/$1 -- 2.39.5