]> git.openstreetmap.org Git - nominatim.git/commitdiff
Merge branch 'master' of github.com:twain47/Nominatim
authorSarah Hoffmann <lonvia@denofr.de>
Sat, 13 Oct 2012 07:19:42 +0000 (09:19 +0200)
committerSarah Hoffmann <lonvia@denofr.de>
Sat, 13 Oct 2012 07:19:42 +0000 (09:19 +0200)
Conflicts:
munin/nominatim_importlag
utils/setup.php

14 files changed:
lib/init-website.php
lib/log.php
module/nominatim.c
settings/settings.php
sql/indices.src.sql
sql/partitions.src.sql
sql/tables.sql
utils/cron_banip.sh [new file with mode: 0755]
utils/cron_logrotate.sh [new file with mode: 0755]
utils/cron_vacuum.sh [new file with mode: 0755]
utils/setup.php
utils/specialphrases.php
website/reverse.php
website/search.php

index d21ca7af94bd14c125b135e7a1a436d5fec03578..6db2ac374d995837b2aac2dcb9861ec6922945b6 100644 (file)
@@ -1,19 +1,34 @@
 <?php
+    require_once('init.php');
 
-       require_once('init.php');
+    header('Content-type: text/html; charset=utf-8');
 
-       if (CONST_ClosedForIndexing && strpos(CONST_ClosedForIndexingExceptionIPs, ','.$_SERVER["REMOTE_ADDR"].',') === false)
-       {
-               echo "Closed for re-indexing...";
-               exit;
-       }
+    // check blocks in place for external servers
+    if (strpos($_SERVER["REMOTE_ADDR"],'193.63.75.') !== 0 &&
+        strpos(CONST_WhitelistedIPs, ','.$_SERVER["REMOTE_ADDR"].',') === false)
+    {
 
-       if (strpos(CONST_BlockedIPs, ','.$_SERVER["REMOTE_ADDR"].',') !== false)
-       {
-               echo "Your IP has been blocked. \n";
-               echo "Please create a nominatim trac ticket (http://trac.openstreetmap.org/newticket?component=nominatim) to request this to be removed. \n";
-               echo "Information on the Nominatim usage policy can be found here: http://wiki.openstreetmap.org/wiki/Nominatim#Usage_Policy \n";
-               exit;
-       }
+        if (strpos(CONST_BlockedIPs, ','.$_SERVER["REMOTE_ADDR"].',') !== false)
+        {
+            header('HTTP/1.0 403 Forbidden');
+            header('Content-type: text/html; charset=utf-8');
+            echo "<html><body><h1>Access blocked</h1>";
+            echo "Your IP has been blocked for overusing OpenStreetMap's volunteer-run servers.<br> \n";
+            echo 'Please consult the <a href="http://wiki.openstreetmap.org/wiki/Nominatim_usage_policy">Nominatim usage policy</a> for more information.';
+            echo "\n</body></html>\n";
+            exit;
+        }
 
-       header('Content-type: text/html; charset=utf-8');
+        $sTempBlockedIP = file_get_contents(CONST_IPBanFile);
+        if (preg_match('/\b'.$_SERVER["REMOTE_ADDR"].'\b/', $sTempBlockedIP))
+        {
+            header('HTTP/1.0 503 Service Temporarily Unavailable');
+            header('Content-type: text/html; charset=utf-8');
+            echo "<html><body><h1>Access blocked</h1>";
+            echo "Your IP has been blocked temporarily for overusing OpenStreetMap's volunteer-run servers. This ban will be lifted automatically in a while. To avoid further blocks, please read the<br> \n";
+            echo '<a href="http://wiki.openstreetmap.org/wiki/Nominatim_usage_policy">Nominatim usage policy</a> carefully before you continue to use this service.';
+            echo "\n</body></html>\n";
+            exit;
+        }
+
+    }
index 5b847a41172d8c84626fcb3dfea9d0df1cbaa080..d81ef46db55393f5d563ca5183b6575ea2f7e6f9 100644 (file)
 
                if (CONST_Log_DB)
                {
-                       // Log
-                       if ($sType == 'search')
-                       {
-                               $oDB->query('insert into query_log values ('.getDBQuoted($hLog[0]).','.getDBQuoted($hLog[3]).','.getDBQuoted($hLog[1]).')');
-                       }
-
-                       $sSQL = 'insert into new_query_log (type,starttime,query,ipaddress,useragent,language,format)';
+                       $sSQL = 'insert into new_query_log (type,starttime,query,ipaddress,useragent,language,format,searchterm)';
                        $sSQL .= ' values ('.getDBQuoted($sType).','.getDBQuoted($hLog[0]).','.getDBQuoted($hLog[2]);
-                       $sSQL .= ','.getDBQuoted($hLog[1]).','.getDBQuoted($_SERVER['HTTP_USER_AGENT']).','.getDBQuoted(join(',',$aLanguageList)).','.getDBQuoted($sOutputFormat).')';
+                       $sSQL .= ','.getDBQuoted($hLog[1]).','.getDBQuoted($_SERVER['HTTP_USER_AGENT']).','.getDBQuoted(join(',',$aLanguageList)).','.getDBQuoted($_GET['format']).','.getDBQuoted($hLog[3]).')';
                        $oDB->query($sSQL);
                }
 
 
                if (CONST_Log_DB)
                {
-                       $sSQL = 'update query_log set endtime = '.getDBQuoted($sEndTime).', results = '.$iNumResults;
-                       $sSQL .= ' where starttime = '.getDBQuoted($hLog[0]);
-                       $sSQL .= ' and ipaddress = '.getDBQuoted($hLog[1]);
-                       $sSQL .= ' and query = '.getDBQuoted($hLog[3]);
-                       $oDB->query($sSQL);
-
                        $sSQL = 'update new_query_log set endtime = '.getDBQuoted($sEndTime).', results = '.$iNumResults;
                        $sSQL .= ' where starttime = '.getDBQuoted($hLog[0]);
                        $sSQL .= ' and ipaddress = '.getDBQuoted($hLog[1]);
index 18ad99b3159c14d8aa8a41e1cea37cd526608a25..75238e2207ff172aad46771569b091d7e73980d5 100644 (file)
@@ -143,12 +143,12 @@ transliteration( PG_FUNCTION_ARGS )
                                resultdata++;
                        }
                }
-               else
+               /*else
                {
                        ereport( WARNING, ( errcode( ERRCODE_SUCCESSFUL_COMPLETION ),
                              errmsg( "missing char: %i\n", *wchardata )));
                        
-               }
+               }*/
                wchardata++;
        }
 
index f441521fd8ed7c7f667e8bd522aa7747d45b63b6..ce4bec5a0aa52549c2449bc6217e5cc59c54db8b 100644 (file)
        @define('CONST_Osmosis_Binary', '/usr/bin/osmosis');
 
        // Website settings
-       @define('CONST_ClosedForIndexing', false);
-       @define('CONST_ClosedForIndexingExceptionIPs', '');
        @define('CONST_BlockedIPs', '');
+       @define('CONST_IPBanFile', CONST_BasePath.'/settings/ip_blocks');
+       @define('CONST_WhitelistedIPs', '');
+       @define('CONST_BlockedUserAgents', '');
+       @define('CONST_BlockReverseMaxLoad', 15);
        @define('CONST_BulkUserIPs', '');
 
-       @define('CONST_Website_BaseURL', 'http://'.php_uname('n').'/');
+       @define('CONST_Website_BaseURL', 'http://nominatim.openstreetmap.org/');
        @define('CONST_Tile_Default', 'Mapnik');
 
-       @define('CONST_Default_Language', 'xx');
+       @define('CONST_Default_Language', 'en');
        @define('CONST_Default_Lat', 20.0);
        @define('CONST_Default_Lon', 0.0);
        @define('CONST_Default_Zoom', 2);
index 5e6ccd12e3b5431e5b9ac2664f7e6d1fd1073144..2e5dde71e66ecb25b0cd6b3be3a4ef51247464f0 100644 (file)
@@ -1,30 +1,30 @@
 -- Indices used only during search and update.
 -- These indices are created only after the indexing process is done.
 
-CREATE INDEX idx_word_word_id on word USING BTREE (word_id);
+CREATE INDEX idx_word_word_id on word USING BTREE (word_id) TABLESPACE ssd;
 
-CREATE INDEX idx_search_name_nameaddress_vector ON search_name USING GIN (nameaddress_vector) WITH (fastupdate = off);
-CREATE INDEX idx_search_name_name_vector ON search_name USING GIN (name_vector) WITH (fastupdate = off);
-CREATE INDEX idx_search_name_centroid ON search_name USING GIST (centroid);
+CREATE INDEX idx_search_name_nameaddress_vector ON search_name USING GIN (nameaddress_vector) WITH (fastupdate = off) TABLESPACE ssd;
+CREATE INDEX idx_search_name_name_vector ON search_name USING GIN (name_vector) WITH (fastupdate = off) TABLESPACE ssd;
+CREATE INDEX idx_search_name_centroid ON search_name USING GIST (centroid) TABLESPACE ssd;
 
-CREATE INDEX idx_place_addressline_address_place_id on place_addressline USING BTREE (address_place_id);
+CREATE INDEX idx_place_addressline_address_place_id on place_addressline USING BTREE (address_place_id) TABLESPACE ssd;
 
-CREATE INDEX idx_place_boundingbox_place_id on place_boundingbox USING BTREE (place_id);
-CREATE INDEX idx_place_boundingbox_outline ON place_boundingbox USING GIST (outline);
+CREATE INDEX idx_place_boundingbox_place_id on place_boundingbox USING BTREE (place_id) TABLESPACE ssd;
+CREATE INDEX idx_place_boundingbox_outline ON place_boundingbox USING GIST (outline) TABLESPACE ssd;
 
 DROP INDEX IF EXISTS idx_placex_rank_search;
-CREATE INDEX idx_placex_rank_search ON placex USING BTREE (rank_search);
-CREATE INDEX idx_placex_rank_address ON placex USING BTREE (rank_address);
-CREATE INDEX idx_placex_pendingsector ON placex USING BTREE (rank_search,geometry_sector) where indexed_status > 0;
-CREATE INDEX idx_placex_parent_place_id ON placex USING BTREE (parent_place_id) where parent_place_id IS NOT NULL;
-CREATE INDEX idx_placex_interpolation ON placex USING BTREE (geometry_sector) where indexed_status > 0 and class='place' and type='houses';
-CREATE INDEX idx_location_area_country_place_id ON location_area_country USING BTREE (place_id);
+CREATE INDEX idx_placex_rank_search ON placex USING BTREE (rank_search) TABLESPACE ssd;
+CREATE INDEX idx_placex_rank_address ON placex USING BTREE (rank_address) TABLESPACE ssd;
+CREATE INDEX idx_placex_pendingsector ON placex USING BTREE (rank_search,geometry_sector) TABLESPACE ssd where indexed_status > 0;
+CREATE INDEX idx_placex_parent_place_id ON placex USING BTREE (parent_place_id) TABLESPACE ssd where parent_place_id IS NOT NULL;
+CREATE INDEX idx_placex_interpolation ON placex USING BTREE (geometry_sector) TABLESPACE ssd where indexed_status > 0 and class='place' and type='houses';
+CREATE INDEX idx_location_area_country_place_id ON location_area_country USING BTREE (place_id) TABLESPACE ssd;
 
-CREATE INDEX idx_search_name_country_centroid ON search_name_country USING GIST (centroid);
-CREATE INDEX idx_search_name_country_nameaddress_vector ON search_name_country USING GIN (nameaddress_vector) WITH (fastupdate = off);
+CREATE INDEX idx_search_name_country_centroid ON search_name_country USING GIST (centroid) TABLESPACE ssd;
+CREATE INDEX idx_search_name_country_nameaddress_vector ON search_name_country USING GIN (nameaddress_vector) WITH (fastupdate = off) TABLESPACE ssd;
 
 -- start
-CREATE INDEX idx_location_property_-partition-_centroid ON location_property_-partition- USING GIST (centroid);
+CREATE INDEX idx_location_property_-partition-_centroid ON location_property_-partition- USING GIST (centroid) TABLESPACE ssd;
 -- end
 
 CREATE UNIQUE INDEX idx_place_osm_unique on place using btree(osm_id,osm_type,class,type);
index 324f35bbd22ce8777a735d9d51e93e0a8571d560..3fb7962c4e5d3a62bb0d6ca8e6b641ed10aeb2a0 100644 (file)
@@ -25,21 +25,21 @@ create type nearfeaturecentr as (
 );
 
 CREATE TABLE location_area_country () INHERITS (location_area_large);
-CREATE INDEX idx_location_area_country_geometry ON location_area_country USING GIST (geometry);
+CREATE INDEX idx_location_area_country_geometry ON location_area_country USING GIST (geometry) TABLESPACE ssd;
 
 CREATE TABLE search_name_country () INHERITS (search_name_blank);
-CREATE INDEX idx_search_name_country_place_id ON search_name_country USING BTREE (place_id);
-CREATE INDEX idx_search_name_country_name_vector ON search_name_country USING GIN (name_vector) WITH (fastupdate = off);
+CREATE INDEX idx_search_name_country_place_id ON search_name_country USING BTREE (place_id) TABLESPACE ssd;
+CREATE INDEX idx_search_name_country_name_vector ON search_name_country USING GIN (name_vector) WITH (fastupdate = off) TABLESPACE ssd;
 
 -- start
 CREATE TABLE location_area_large_-partition- () INHERITS (location_area_large);
-CREATE INDEX idx_location_area_large_-partition-_place_id ON location_area_large_-partition- USING BTREE (place_id);
-CREATE INDEX idx_location_area_large_-partition-_geometry ON location_area_large_-partition- USING GIST (geometry);
+CREATE INDEX idx_location_area_large_-partition-_place_id ON location_area_large_-partition- USING BTREE (place_id) TABLESPACE ssd;
+CREATE INDEX idx_location_area_large_-partition-_geometry ON location_area_large_-partition- USING GIST (geometry) TABLESPACE ssd;
 
 CREATE TABLE search_name_-partition- () INHERITS (search_name_blank);
-CREATE INDEX idx_search_name_-partition-_place_id ON search_name_-partition- USING BTREE (place_id);
-CREATE INDEX idx_search_name_-partition-_centroid ON search_name_-partition- USING GIST (centroid);
-CREATE INDEX idx_search_name_-partition-_name_vector ON search_name_-partition- USING GIN (name_vector) WITH (fastupdate = off);
+CREATE INDEX idx_search_name_-partition-_place_id ON search_name_-partition- USING BTREE (place_id) TABLESPACE ssd;
+CREATE INDEX idx_search_name_-partition-_centroid ON search_name_-partition- USING GIST (centroid) TABLESPACE ssd;
+CREATE INDEX idx_search_name_-partition-_name_vector ON search_name_-partition- USING GIN (name_vector) WITH (fastupdate = off) TABLESPACE ssd;
 
 CREATE TABLE location_property_-partition- () INHERITS (location_property);
 CREATE INDEX idx_location_property_-partition-_place_id ON location_property_-partition- USING BTREE (place_id);
@@ -52,8 +52,8 @@ CREATE TABLE location_road_-partition- (
   country_code VARCHAR(2)
   );
 SELECT AddGeometryColumn('location_road_-partition-', 'geometry', 4326, 'GEOMETRY', 2);
-CREATE INDEX idx_location_road_-partition-_geometry ON location_road_-partition- USING GIST (geometry);
-CREATE INDEX idx_location_road_-partition-_place_id ON location_road_-partition- USING BTREE (place_id);
+CREATE INDEX idx_location_road_-partition-_geometry ON location_road_-partition- USING GIST (geometry) TABLESPACE ssd;
+CREATE INDEX idx_location_road_-partition-_place_id ON location_road_-partition- USING BTREE (place_id) TABLESPACE ssd;
 
 -- end
 
index d5cf7bcd47eb0755ecbd2dcfdae9f2fac6092fe0..a0ad66a8353d814754d5cd6e576c976865f8b7c1 100644 (file)
@@ -23,18 +23,6 @@ CREATE TABLE import_npi_log (
   event text
   );
 
---drop table IF EXISTS query_log;
-CREATE TABLE query_log (
-  starttime timestamp,
-  query text,
-  ipaddress text,
-  endtime timestamp,
-  results integer
-  );
-CREATE INDEX idx_query_log ON query_log USING BTREE (starttime);
-GRANT INSERT ON query_log TO "www-data" ;
-GRANT UPDATE ON query_log TO "www-data" ;
-
 CREATE TABLE new_query_log (
   type text,
   starttime timestamp,
@@ -42,6 +30,7 @@ CREATE TABLE new_query_log (
   useragent text,
   language text,
   query text,
+  searchterm text,
   endtime timestamp,
   results integer,
   format text,
@@ -52,9 +41,6 @@ GRANT INSERT ON new_query_log TO "www-data" ;
 GRANT UPDATE ON new_query_log TO "www-data" ;
 GRANT SELECT ON new_query_log TO "www-data" ;
 
-create view vw_search_query_log as SELECT substr(query, 1, 50) AS query, starttime, endtime - starttime AS duration, substr(useragent, 1, 20) as 
-useragent, language, results, ipaddress FROM new_query_log WHERE type = 'search' ORDER BY starttime DESC;
-
 --drop table IF EXISTS report_log;
 CREATE TABLE report_log (
   starttime timestamp,
@@ -76,9 +62,9 @@ CREATE TABLE word (
   country_code varchar(2),
   search_name_count INTEGER,
   operator TEXT
-  );
+  ) TABLESPACE ssd;
 SELECT AddGeometryColumn('word', 'location', 4326, 'GEOMETRY', 2);
-CREATE INDEX idx_word_word_token on word USING BTREE (word_token);
+CREATE INDEX idx_word_word_token on word USING BTREE (word_token) TABLESPACE ssd;
 --CREATE INDEX idx_word_trigram ON word USING gin(word_trigram gin_trgm_ops) WITH (fastupdate = off);
 GRANT SELECT ON word TO "www-data" ;
 DROP SEQUENCE seq_word;
@@ -136,8 +122,8 @@ CREATE TABLE search_name_blank (
 SELECT AddGeometryColumn('search_name_blank', 'centroid', 4326, 'GEOMETRY', 2);
 
 drop table IF EXISTS search_name;
-CREATE TABLE search_name () INHERITS (search_name_blank);
-CREATE INDEX idx_search_name_place_id ON search_name USING BTREE (place_id);
+CREATE TABLE search_name () INHERITS (search_name_blank) TABLESPACE ssd;
+CREATE INDEX idx_search_name_place_id ON search_name USING BTREE (place_id) TABLESPACE ssd;
 
 drop table IF EXISTS place_addressline;
 CREATE TABLE place_addressline (
@@ -147,8 +133,8 @@ CREATE TABLE place_addressline (
   isaddress boolean,
   distance float,
   cached_rank_address integer
-  );
-CREATE INDEX idx_place_addressline_place_id on place_addressline USING BTREE (place_id);
+  ) TABLESPACE data;
+CREATE INDEX idx_place_addressline_place_id on place_addressline USING BTREE (place_id) TABLESPACE ssd;
 
 drop table IF EXISTS place_boundingbox CASCADE;
 CREATE TABLE place_boundingbox (
@@ -201,14 +187,14 @@ CREATE TABLE placex (
   wikipedia TEXT, -- calculated wikipedia article name (language:title)
   geometry_sector INTEGER,
   calculated_country_code varchar(2)
-  );
+  ) TABLESPACE ssd;
 SELECT AddGeometryColumn('placex', 'centroid', 4326, 'GEOMETRY', 2);
-CREATE UNIQUE INDEX idx_place_id ON placex USING BTREE (place_id);
-CREATE INDEX idx_placex_osmid ON placex USING BTREE (osm_type, osm_id);
-CREATE INDEX idx_placex_linked_place_id ON placex USING BTREE (linked_place_id);
-CREATE INDEX idx_placex_rank_search ON placex USING BTREE (rank_search, geometry_sector);
-CREATE INDEX idx_placex_geometry ON placex USING GIST (geometry);
-CREATE INDEX idx_placex_adminname on placex USING BTREE (make_standard_name(name->'name'),rank_search) WHERE osm_type='N' and rank_search < 26;
+CREATE UNIQUE INDEX idx_place_id ON placex USING BTREE (place_id) TABLESPACE ssd;
+CREATE INDEX idx_placex_osmid ON placex USING BTREE (osm_type, osm_id) TABLESPACE ssd;
+CREATE INDEX idx_placex_linked_place_id ON placex USING BTREE (linked_place_id) TABLESPACE ssd;
+CREATE INDEX idx_placex_rank_search ON placex USING BTREE (rank_search, geometry_sector) TABLESPACE ssd;
+CREATE INDEX idx_placex_geometry ON placex USING GIST (geometry) TABLESPACE ssd;
+CREATE INDEX idx_placex_adminname on placex USING BTREE (make_standard_name(name->'name'),rank_search) TABLESPACE ssd WHERE osm_type='N' and rank_search < 26;
 
 --CREATE INDEX idx_placex_indexed ON placex USING BTREE (indexed);
 
@@ -256,10 +242,10 @@ update placex set geometry_sector = geometry_sector(geometry);
 drop index idx_placex_pendingbylatlon;
 drop index idx_placex_interpolation;
 drop index idx_placex_sector;
-CREATE INDEX idx_placex_pendingbylatlon ON placex USING BTREE (geometry_index(geometry_sector,indexed,name),rank_search) 
+CREATE INDEX idx_placex_pendingbylatlon ON placex USING BTREE (geometry_index(geometry_sector,indexed,name),rank_search) TABLESPACE ssd
   where geometry_index(geometry_sector,indexed,name) IS NOT NULL;
-CREATE INDEX idx_placex_interpolation ON placex USING BTREE (geometry_sector) where indexed = false and class='place' and type='houses';
-CREATE INDEX idx_placex_sector ON placex USING BTREE (geometry_sector,rank_address,osm_type,osm_id);
+CREATE INDEX idx_placex_interpolation ON placex USING BTREE (geometry_sector) TABLESPACE ssd where indexed = false and class='place' and type='houses';
+CREATE INDEX idx_placex_sector ON placex USING BTREE (geometry_sector,rank_address,osm_type,osm_id) TABLESPACE ssd;
 
 DROP SEQUENCE seq_postcodes;
 CREATE SEQUENCE seq_postcodes start 1;
@@ -277,7 +263,7 @@ CREATE TABLE import_polygon_error (
   );
 SELECT AddGeometryColumn('import_polygon_error', 'prevgeometry', 4326, 'GEOMETRY', 2);
 SELECT AddGeometryColumn('import_polygon_error', 'newgeometry', 4326, 'GEOMETRY', 2);
-CREATE INDEX idx_import_polygon_error_osmid ON import_polygon_error USING BTREE (osm_type, osm_id);
+CREATE INDEX idx_import_polygon_error_osmid ON import_polygon_error USING BTREE (osm_type, osm_id) TABLESPACE ssd;
 
 drop table import_polygon_delete;
 CREATE TABLE import_polygon_delete (
@@ -286,7 +272,7 @@ CREATE TABLE import_polygon_delete (
   class TEXT NOT NULL,
   type TEXT NOT NULL
   );
-CREATE INDEX idx_import_polygon_delete_osmid ON import_polygon_delete USING BTREE (osm_type, osm_id);
+CREATE INDEX idx_import_polygon_delete_osmid ON import_polygon_delete USING BTREE (osm_type, osm_id) TABLESPACE ssd;
 
 drop sequence file;
 CREATE SEQUENCE file start 1;
diff --git a/utils/cron_banip.sh b/utils/cron_banip.sh
new file mode 100755 (executable)
index 0000000..2e0c8df
--- /dev/null
@@ -0,0 +1,97 @@
+#!/bin/bash
+#
+# Create or update the list of temporarily banned IPs.
+#
+
+BASEDIR="$( cd "$( dirname "$0" )" && cd .. && pwd )"
+if [ "x$BASEDIR" == "x" ]; then
+    echo "Could not determine base dir."
+    exit -1
+fi
+
+BLOCKEDFILE=$BASEDIR/settings/ip_blocks
+LOGFILE=$BASEDIR/log/ip_blocks.log
+
+LONG_PERIOD='1 hour'
+SHORT_PERIOD='10 min'
+COOLOFF_PERIOD='1 hour'
+
+REVLONG_LIMIT=20000
+REVSHORT_LIMIT=6000
+SRCHLONG_LIMIT=4000
+SRCHSHORT_LIMIT='10 min'
+
+PSQLCMD='psql -qtA -d nominatim'
+
+curload=`cat /proc/loadavg | sed 's:[. ].*::'`
+if [ "$curload" -gt "15" ]; then
+  REVSHORT_LIMIT=$((REVSHORT_LIMIT/2))
+fi
+
+# Blocking candidates
+$PSQLCMD > $BLOCKEDFILE.newblocks << ENDOFQUERY
+SELECT ipaddress FROM
+((SELECT ipaddress FROM
+  (SELECT ipaddress, count(*) FROM new_query_log
+   WHERE type = 'reverse' AND starttime > now() - interval '$LONG_PERIOD'
+   GROUP BY ipaddress)
+  as v
+  WHERE count > $REVLONG_LIMIT) 
+UNION
+(SELECT ipaddress FROM
+  (SELECT ipaddress, count(*) FROM new_query_log
+   WHERE type = 'reverse' AND starttime > now() - interval '$SHORT_PERIOD'
+   GROUP BY ipaddress)
+  as v
+  WHERE count > $REVSHORT_LIMIT) 
+UNION
+(SELECT ipaddress FROM
+  (SELECT ipaddress, count(*) FROM new_query_log
+   WHERE type = 'search' AND starttime > now() - interval '$LONG_PERIOD'
+   GROUP BY ipaddress)
+  as v
+  WHERE count > $SRCHLONG_LIMIT) 
+UNION
+(SELECT ipaddress FROM
+  (SELECT ipaddress, sum(endtime-starttime) as dur FROM new_query_log
+   WHERE type = 'search' AND starttime > now() - interval '$SHORT_PERIOD'
+   GROUP BY ipaddress)
+  as v
+  WHERE dur > '$SRCHSHORT_LIMIT')
+) as q ORDER BY ipaddress;
+ENDOFQUERY
+
+no_newblocks=`comm $BLOCKEDFILE.newblocks $BLOCKEDFILE -23 | wc -l`
+
+if [ "x$no_newblocks" != "x0" ]; then
+    date +"%x %X Newly blocked IPs: `comm $BLOCKEDFILE.newblocks $BLOCKEDFILE -23 | tr '\n' ' '`" >> $LOGFILE
+fi
+
+
+# Deblockable candidates
+blocked=`tr '\n' ',' < $BLOCKEDFILE | sed "s:[[:space:]]::g;s:,$::;s:,:'),(':g"`
+
+if [ "x$blocked" == "x" ]; then
+  mv $BLOCKEDFILE.newblocks $BLOCKEDFILE 
+else
+    $PSQLCMD > $BLOCKEDFILE.newlifted << ENDOFQUERY
+    SELECT column1 FROM (
+    VALUES ('$blocked')
+    EXCEPT
+    (SELECT DISTINCT ipaddress FROM new_query_log
+     WHERE starttime > now() - interval '$COOLOFF_PERIOD')
+    ) as q ORDER BY column1;
+ENDOFQUERY
+
+    no_lifted=`cat $BLOCKEDFILE.newlifted | wc -w`
+
+    if [ "x$no_lifted" != "x0" ]; then
+        date +"%x %X Bans lifted: `tr '\n' ' ' < $BLOCKEDFILE.newlifted`" >> $LOGFILE
+    fi
+
+    # Write out new blocks
+    cat $BLOCKEDFILE.newblocks $BLOCKEDFILE | sort -u | comm - $BLOCKEDFILE.newlifted -23 > $BLOCKEDFILE.new
+    mv $BLOCKEDFILE.new $BLOCKEDFILE
+
+    rm $BLOCKEDFILE.newblocks $BLOCKEDFILE.newlifted
+fi
diff --git a/utils/cron_logrotate.sh b/utils/cron_logrotate.sh
new file mode 100755 (executable)
index 0000000..b9291d9
--- /dev/null
@@ -0,0 +1,20 @@
+#!/bin/bash -e
+#
+# Rotate query logs.
+
+dbname=nominatim
+
+basedir=`dirname $0`
+logfile=`date "+$basedir/../log/query-%F.log.gz"`
+
+# dump the old logfile
+pg_dump -a -F p -t backup_query_log $dbname | gzip -9 > $logfile
+
+# remove the old logs
+psql -q -d $dbname -c 'DROP TABLE backup_query_log'
+
+# rotate
+psql -q -1 -d $dbname -c 'ALTER TABLE new_query_log RENAME TO backup_query_log;CREATE TABLE new_query_log as (select * from backup_query_log limit 0);GRANT SELECT, INSERT, UPDATE ON new_query_log TO "www-data"'
+psql -q -d $dbname -c 'ALTER INDEX idx_new_query_log_starttime RENAME TO idx_backup_query_log_starttime'
+psql -q -d $dbname -c 'CREATE INDEX idx_new_query_log_starttime ON new_query_log USING BTREE (starttime)'
+
diff --git a/utils/cron_vacuum.sh b/utils/cron_vacuum.sh
new file mode 100755 (executable)
index 0000000..4c16fc6
--- /dev/null
@@ -0,0 +1,14 @@
+#!/bin/bash
+#
+# Vaccum all tables with indices on integer arrays.
+# Agressive vacuuming seems to help against index bloat.
+#
+
+psql -q -d nominatim -c 'VACUUM ANALYSE search_name'
+psql -q -d nominatim -c 'VACUUM ANALYSE search_name_country'
+#psql -q -d nominatim -c 'VACUUM ANALYSE planet_osm_ways'
+
+for i in `seq 0 246`; do
+  psql -q -d nominatim -c "VACUUM ANALYSE search_name_${i}"
+done
+
index bc426fd39f777c28ebf95cf01b9eb67f2076a0d9..8d5c6e39cdfd66101fcc9a4bf8b45cc705c02f53 100755 (executable)
                        echo "Please download and build osm2pgsql.\nIf it is already installed, check the path in your local settings (settings/local.php) file.\n";
                        fail("osm2pgsql not found in '$osm2pgsql'");
                }
+               $osm2pgsql .= ' --tablespace-slim-index ssd --tablespace-main-index ssd --tablespace-main-data ssd --tablespace-slim-data data';
                $osm2pgsql .= ' -lsc -O gazetteer --hstore';
-               $osm2pgsql .= ' -C '.$iCacheMemory;
+               $osm2pgsql .= ' -C 16000';
                $osm2pgsql .= ' -d '.$aDSNInfo['database'].' '.$aCMDResult['osm-file'];
                passthruCheckReturn($osm2pgsql);
 
                $sSQL .= "select 'P',nextval('seq_postcodes'),'place','postcode',postcode,calculated_country_code,";
                $sSQL .= "ST_SetSRID(ST_Point(x,y),4326) as geometry from (select calculated_country_code,postcode,";
                $sSQL .= "avg(st_x(st_centroid(geometry))) as x,avg(st_y(st_centroid(geometry))) as y ";
-               $sSQL .= "from placex where postcode is not null group by calculated_country_code,postcode) as x";
+               $sSQL .= "from placex where postcode is not null and calculated_country_code not in ('ie') group by calculated_country_code,postcode) as x";
                if (!pg_query($oDB->connection, $sSQL)) fail(pg_last_error($oDB->connection));
 
                $sSQL = "insert into placex (osm_type,osm_id,class,type,postcode,calculated_country_code,geometry) ";
index f1a0d0d078035c75d4168922f2e9612cc7516592..02aaaa0fb140804a411bd840abf0c6f7aff84488 100755 (executable)
                        echo "class = '".pg_escape_string($aPair[0])."' and type = '".pg_escape_string($aPair[1])."';\n";
 
                        echo "CREATE INDEX idx_place_classtype_".pg_escape_string($aPair[0])."_".pg_escape_string($aPair[1])."_centroid ";
-                       echo "ON place_classtype_".pg_escape_string($aPair[0])."_".pg_escape_string($aPair[1])." USING GIST (centroid);\n";
+                       echo "ON place_classtype_".pg_escape_string($aPair[0])."_".pg_escape_string($aPair[1])." USING GIST (centroid) tablespace ssd;\n";
 
                        echo "CREATE INDEX idx_place_classtype_".pg_escape_string($aPair[0])."_".pg_escape_string($aPair[1])."_place_id ";
-                       echo "ON place_classtype_".pg_escape_string($aPair[0])."_".pg_escape_string($aPair[1])." USING btree(place_id);\n";
+                       echo "ON place_classtype_".pg_escape_string($aPair[0])."_".pg_escape_string($aPair[1])." USING btree(place_id) tablespace ssd;\n";
 
-            echo "GRANT SELECT ON place_classtype_".pg_escape_string($aPair[0])."_".pg_escape_string($aPair[1])." TO \"www-data\";";
+            echo "GRANT SELECT ON place_classtype_".pg_escape_string($aPair[0])."_".pg_escape_string($aPair[1])." TO \"www-data\";\n";
 
                }
 
index c155fa573030fa54cb5a71629727b808dc2be972..810be3c491fd2c96a8de4415a0b64c1c93b94a90 100755 (executable)
@@ -2,6 +2,22 @@
        require_once(dirname(dirname(__FILE__)).'/lib/init-website.php');
        require_once(CONST_BasePath.'/lib/log.php');
 
+    if (preg_match(CONST_BlockedUserAgents, $_SERVER["HTTP_USER_AGENT"]) > 0)
+    {
+        $fLoadAvg = getLoadAverage();
+        if ($fLoadAvg >= CONST_BlockReverseMaxLoad) {
+            header('HTTP/1.0 403 Forbidden');
+            header('Content-type: text/html; charset=utf-8');
+               echo "<html><body><h1>App temporarily blocked</h1>";
+            echo "Your application has been temporarily blocked from the OpenStreetMap Nominatim ";
+            echo "geolocation service due to high server load.";
+            echo "\n</body></html>\n";
+            exit;
+        }
+
+    }
+
+
         if (strpos(CONST_BulkUserIPs, ','.$_SERVER["REMOTE_ADDR"].',') !== false)
         {
                 $fLoadAvg = getLoadAverage();
index 30f7abde0e47ea2e49ef6199e039ec3e85c9f612..59d57d58a2620f4ed87622c33179d86dcfa92387 100755 (executable)
@@ -41,6 +41,7 @@
        if (isset($aLangPrefOrder['name:de'])) $bReverseInPlan = true;
        if (isset($aLangPrefOrder['name:ru'])) $bReverseInPlan = true;
        if (isset($aLangPrefOrder['name:ja'])) $bReverseInPlan = true;
+       if (isset($aLangPrefOrder['name:pl'])) $bReverseInPlan = true;
 
        $sLanguagePrefArraySQL = "ARRAY[".join(',',array_map("getDBQuoted",$aLangPrefOrder))."]";
 
                                                                        $sSQL .= " limit ".$iLimit;
 
                                                                if (CONST_Debug) var_dump($sSQL);
+                                                               $iStartTime = time();
                                                                $aViewBoxPlaceIDs = $oDB->getAll($sSQL);
                                                                if (PEAR::IsError($aViewBoxPlaceIDs))
                                                                {
                                                                        failInternalError("Could not get places for search terms.", $sSQL, $aViewBoxPlaceIDs);
                                                                }
+                                                               if (time() - $iStartTime > 60) {
+                                                                       file_put_contents(CONST_BasePath.'/log/long_queries.log', date('Y-m-d H:i:s', $iStartTime).' '.$sSQL."\n", FILE_APPEND);
+                                                               }
+
 //var_dump($aViewBoxPlaceIDs);
                                                                // Did we have an viewbox matches?
                                                                $aPlaceIDs = array();