]> git.openstreetmap.org Git - nominatim.git/blobdiff - utils/cron_ipanalyse.py
Merge remote-tracking branch 'upstream/master'
[nominatim.git] / utils / cron_ipanalyse.py
index 4dddb1cf35447d8e20612574293843650cf14709..be5e6956915b502e19df315056f6a869d94110db 100755 (executable)
@@ -9,6 +9,7 @@
 import re
 import os
 import sys
+import subprocess
 from datetime import datetime, timedelta
 from collections import defaultdict
 
@@ -84,12 +85,16 @@ class LogEntry:
             if qp[0] == 'OPTIONS':
                 self.request = None
             else:
-                if '/search' in qp[1]:
+                if '/?' in qp[1]:
+                    self.request = 'S'
+                elif '/search' in qp[1]:
                     self.request = 'S'
                 elif '/reverse' in qp[1]:
                     self.request = 'R'
                 elif '/details' in qp[1]:
                     self.request = 'D'
+                elif '/lookup' in qp[1]:
+                    self.request = 'L'
                 else:
                     self.request = None
         self.query = e['query']
@@ -240,12 +245,12 @@ class IPstats:
         if was_blocked:
             # deblock only if the IP has been really quiet
             # (properly catches the ones that simply ignore the HTTP error)
-            return None if self.long_total < 5 else 'block'
+            return None if self.long_total < 20 else 'block'
         if self.long_api > BLOCK_UPPER or self.short_api > BLOCK_UPPER / 3:
                 # client totally overdoing it
                 return 'block'
         if was_bulked:
-            if self.short_total < 5:
+            if self.short_total < 20:
                 # client has stopped, debulk
                 return None
             if self.long_api > BLOCK_LIMIT or self.short_api > BLOCK_LIMIT / 3:
@@ -254,8 +259,8 @@ class IPstats:
             return 'bulk'
 
         if self.long_api > BULKLONG_LIMIT or self.short_api > BULKSHORT_LIMIT:
-            if self.bad_ua:
-                return 'uablock' # bad useragent
+            #if self.bad_ua:
+            #    return 'uablock' # bad useragent
             return 'bulk'
 
         return None
@@ -303,17 +308,21 @@ if __name__ == '__main__':
     fd = open("/proc/loadavg")
     cpuload = int(float(fd.readline().split()[2]))
     fd.close()
-    dbload = total200 / BULKCOOLOFF_DELTA.total_seconds()
+    # check the number of excess connections to apache
+    dbcons = int(subprocess.check_output("netstat -s | grep 'connections established' | sed 's:^\s*::;s: .*::'", shell=True))
+    fpms = int(subprocess.check_output('ps -Af | grep php-fpm | wc -l', shell=True))
+    dbload = max(0, dbcons - fpms)
 
     numbulks = len(bl.prevbulks)
-    BLOCK_LIMIT = max(BLOCK_LIMIT, BLOCK_UPPER - BLOCK_LOADFAC * (dbload - 75))
-    BULKLONG_LIMIT = max(BULK_LOWER, BULKLONG_LIMIT - BULK_LOADFAC * (cpuload - 14))
+    BLOCK_LIMIT = max(BLOCK_LIMIT, BLOCK_UPPER - BLOCK_LOADFAC * dbload)
+    BULKLONG_LIMIT = max(BULK_LOWER, BULKLONG_LIMIT - BULK_LOADFAC * cpuload)
     if numbulks > MAX_BULK_IPS:
         BLOCK_LIMIT = max(3600, BLOCK_LOWER - (numbulks - MAX_BULK_IPS)*10)
     # if the bulk pool is still empty, clients will be faster, avoid having
     # them blocked in this case
     if numbulks < 10:
-        BLOCK_LIMIT = 2*BLOCK_UPPER
+        BLOCK_UPPER *= 2
+        BLOCK_LIMIT = BLOCK_UPPER
 
 
     # collecting statistics
@@ -349,7 +358,7 @@ if __name__ == '__main__':
             elif wasbulked:
                 debulked.append(k)
     for i in bl.blacklist:
-        fd.write("%s ban\n" % k)
+        fd.write("%s ban\n" % i)
     fd.close()
 
     # TODO write logs (need to collect some statistics)