]> git.openstreetmap.org Git - nominatim.git/commitdiff
Merge remote-tracking branch 'upstream/master'
authorSarah Hoffmann <lonvia@denofr.de>
Tue, 10 Sep 2013 17:09:17 +0000 (19:09 +0200)
committerSarah Hoffmann <lonvia@denofr.de>
Tue, 10 Sep 2013 17:09:17 +0000 (19:09 +0200)
Conflicts:
website/search.php

22 files changed:
lib/init-website.php
lib/lib.php
lib/log.php
munin/nominatim_throttled_ips [new file with mode: 0755]
settings/settings.php
sql/functions.sql
sql/indices.src.sql
sql/partition-tables.src.sql
sql/tables.sql
sql/tiger_import_finish.sql
utils/cron_banip.py [new file with mode: 0755]
utils/cron_logrotate.sh [new file with mode: 0755]
utils/cron_vacuum.sh [new file with mode: 0755]
utils/setup.php
utils/specialphrases.php
utils/update.php
website/403.html [new file with mode: 0644]
website/509.html [new file with mode: 0644]
website/favicon.ico [new file with mode: 0644]
website/nominatim.xml [new file with mode: 0644]
website/reverse.php
website/robots.txt [new file with mode: 0644]

index bcf3ebf7a0aeb6674e874ebbf3ed7413572373dc..4d1c2c7029a1cdc00b999b5c0c748eb27a517cd7 100644 (file)
@@ -5,6 +5,7 @@
        {
                header("Access-Control-Allow-Origin: *");
                header("Access-Control-Allow-Methods: OPTIONS,GET");
+               header("Access-Control-Max-Age: 8640000");
                if (!empty($_SERVER['HTTP_ACCESS_CONTROL_REQUEST_HEADERS']))
                {
                        header("Access-Control-Allow-Headers: ".$_SERVER['HTTP_ACCESS_CONTROL_REQUEST_HEADERS']);
        }
        if ($_SERVER['REQUEST_METHOD'] == 'OPTIONS') exit;
 
-       if (CONST_ClosedForIndexing && strpos(CONST_ClosedForIndexingExceptionIPs, ','.$_SERVER["REMOTE_ADDR"].',') === false)
-       {
-               echo "Closed for re-indexing...";
-               exit;
-       }
-
-       $aBucketKeys = array();
-
-       if (isset($_SERVER["HTTP_REFERER"])) $aBucketKeys[] = str_replace('www.','',strtolower(parse_url($_SERVER["HTTP_REFERER"], PHP_URL_HOST)));
-       if (isset($_SERVER["REMOTE_ADDR"])) $aBucketKeys[] = $_SERVER["REMOTE_ADDR"];
-       if (isset($_GET["email"])) $aBucketKeys[] = $_GET["email"];
-
-       $fBucketVal = doBucket($aBucketKeys, 
-                       (defined('CONST_ConnectionBucket_PageType')?constant('CONST_ConnectionBucket_Cost_'.CONST_ConnectionBucket_PageType):1) + user_busy_cost(),
-                       CONST_ConnectionBucket_LeakRate, CONST_ConnectionBucket_BlockLimit);
-
-       if ($fBucketVal > CONST_ConnectionBucket_WaitLimit && $fBucketVal < CONST_ConnectionBucket_BlockLimit)
-       {
-               $m = getBucketMemcache();
-               $iCurrentSleeping = $m->increment('sleepCounter');
-               if (false === $iCurrentSleeping)
-               {
-                       $m->add('sleepCounter', 0);
-                       $iCurrentSleeping = $m->increment('sleepCounter');
-               }
-               if ($iCurrentSleeping >= CONST_ConnectionBucket_MaxSleeping || isBucketSleeping($aBucketKeys))
-               {
-                       // Too many threads sleeping already.  This becomes a hard block.
-                       $fBucketVal = doBucket($aBucketKeys, CONST_ConnectionBucket_BlockLimit, CONST_ConnectionBucket_LeakRate, CONST_ConnectionBucket_BlockLimit);
-               }
-               else
-               {
-                       setBucketSleeping($aBucketKeys, true);
-                       sleep(($fBucketVal - CONST_ConnectionBucket_WaitLimit)/CONST_ConnectionBucket_LeakRate);
-                       $fBucketVal = doBucket($aBucketKeys, CONST_ConnectionBucket_LeakRate, CONST_ConnectionBucket_LeakRate, CONST_ConnectionBucket_BlockLimit);
-                       setBucketSleeping($aBucketKeys, false);
-               }
-               $m->decrement('sleepCounter');
-       }
-
-       if (strpos(CONST_BlockedIPs, ','.$_SERVER["REMOTE_ADDR"].',') !== false || $fBucketVal >= CONST_ConnectionBucket_BlockLimit)
-       {
-               echo "Your IP has been blocked. \n";
-               echo "Please create a nominatim trac ticket (http://trac.openstreetmap.org/newticket?component=nominatim) to request this to be removed. \n";
-               echo "Information on the Nominatim usage policy can be found here: http://wiki.openstreetmap.org/wiki/Nominatim#Usage_Policy \n";
-               exit;
-       }
-
-       header('Content-type: text/html; charset=utf-8');
 
+    header('Content-type: text/html; charset=utf-8');
index 3417c95b0e253968136ddeb996a3ca4236636a4f..3ed66237e955cdff43da2122cbcfe6bad6f52d19 100644 (file)
        {
                $aResult = array(array(join(' ',$aWords)));
                $sFirstToken = '';
-               if ($iDepth < 8) {
+               if ($iDepth < 7) {
                        while(sizeof($aWords) > 1)
                        {
                                $sWord = array_shift($aWords);
index b08f5383df26491f50492cd2ff1ae7104db94b41..501259d0ed2d48dfc8be60fe97ef5f40a07c268d 100644 (file)
 
                if (CONST_Log_DB)
                {
-                       // Log
-                       if ($sType == 'search')
-                       {
-                               $oDB->query('insert into query_log values ('.getDBQuoted($hLog[0]).','.getDBQuoted($hLog[3]).','.getDBQuoted($hLog[1]).')');
-                       }
-
-                       $sSQL = 'insert into new_query_log (type,starttime,query,ipaddress,useragent,language,format)';
+                       if (isset($_GET['email']))
+                               $sUserAgent = $_GET['email'];
+                       elseif (isset($_SERVER['HTTP_REFERER']))
+                               $sUserAgent = $_SERVER['HTTP_REFERER'];
+                       else
+                               $sUserAgent = $_SERVER['HTTP_USER_AGENT'];
+                       $sSQL = 'insert into new_query_log (type,starttime,query,ipaddress,useragent,language,format,searchterm)';
                        $sSQL .= ' values ('.getDBQuoted($sType).','.getDBQuoted($hLog[0]).','.getDBQuoted($hLog[2]);
-                       $sSQL .= ','.getDBQuoted($hLog[1]).','.getDBQuoted($_SERVER['HTTP_USER_AGENT']).','.getDBQuoted(join(',',$aLanguageList)).','.getDBQuoted($sOutputFormat).')';
+                       $sSQL .= ','.getDBQuoted($hLog[1]).','.getDBQuoted($sUserAgent).','.getDBQuoted(join(',',$aLanguageList)).','.getDBQuoted($sOutputFormat).','.getDBQuoted($hLog[3]).')';
                        $oDB->query($sSQL);
                }
 
 
                if (CONST_Log_DB)
                {
-                       $sSQL = 'update query_log set endtime = '.getDBQuoted($sEndTime).', results = '.$iNumResults;
-                       $sSQL .= ' where starttime = '.getDBQuoted($hLog[0]);
-                       $sSQL .= ' and ipaddress = '.getDBQuoted($hLog[1]);
-                       $sSQL .= ' and query = '.getDBQuoted($hLog[3]);
-                       $oDB->query($sSQL);
-
                        $sSQL = 'update new_query_log set endtime = '.getDBQuoted($sEndTime).', results = '.$iNumResults;
                        $sSQL .= ' where starttime = '.getDBQuoted($hLog[0]);
                        $sSQL .= ' and ipaddress = '.getDBQuoted($hLog[1]);
diff --git a/munin/nominatim_throttled_ips b/munin/nominatim_throttled_ips
new file mode 100755 (executable)
index 0000000..2fa1d80
--- /dev/null
@@ -0,0 +1,28 @@
+#!/bin/sh
+#
+# Plugin to monitor the number of IPs in special pools
+#
+# Parameters: 
+#
+#       config   (required)
+#       autoconf (optional - used by munin-config)
+#
+if [ "$1" = "config" ]; then
+        echo 'graph_title Restricted IPs' 
+        echo 'graph_args -l 0'
+        echo 'graph_vlabel number of IPs'
+        echo 'graph_category nominatim'
+        echo 'bulk.label bulk'
+        echo 'bulk.draw AREA'
+        echo 'bulk.type GAUGE'
+        echo 'block.label blocked'
+        echo 'block.draw STACK'
+        echo 'block.type GAUGE'
+        exit 0
+fi
+BASEDIR="$(dirname "$(readlink -f "$0")")"
+
+cut -f 2 -d ' ' $BASEDIR/../settings/ip_blocks.map | uniq -c | sed 's:[[:space:]]*\([0-9]\+\) \(.*\):\2.value \1:'
index fc6ee8dedfca2f4fa7f46ad7246121e5e8c4518e..4e9d71f031742d17bd530d8282c6313524a7d5fc 100644 (file)
 
        // Website settings
        @define('CONST_NoAccessControl', true);
-       @define('CONST_ClosedForIndexing', false);
-       @define('CONST_ClosedForIndexingExceptionIPs', '');
        @define('CONST_BlockedIPs', '');
+       @define('CONST_IPBanFile', CONST_BasePath.'/settings/ip_blocks');
+       @define('CONST_WhitelistedIPs', '');
+       @define('CONST_BlockedUserAgents', '');
+       @define('CONST_BlockReverseMaxLoad', 15);
        @define('CONST_BulkUserIPs', '');
 
-       @define('CONST_Website_BaseURL', 'http://'.php_uname('n').'/');
+       @define('CONST_Website_BaseURL', 'http://nominatim.openstreetmap.org/');
        @define('CONST_Tile_Default', 'Mapnik');
 
        @define('CONST_Default_Language', false);
index 6d2ae5f8cfd6b99011a56369b5f73a17e6e8b852..ac64defc34e2a3393937eb0b61b272ba86790c3d 100644 (file)
@@ -1357,13 +1357,26 @@ BEGIN
     NEW.centroid := null;
 
     -- reclaculate country and partition
-    IF NEW.rank_search >= 4 THEN
-      --NEW.calculated_country_code := lower(get_country_code(NEW.geometry, NEW.country_code));
-      NEW.calculated_country_code := lower(get_country_code(place_centroid));
+    IF NEW.rank_search = 4 THEN
+      -- for countries, believe the mapped country code,
+      -- so that we remain in the right partition if the boundaries
+      -- suddenly expand.
+      NEW.partition := get_partition(place_centroid, lower(NEW.country_code));
+      IF NEW.partition = 0 THEN
+        NEW.calculated_country_code := lower(get_country_code(place_centroid));
+        NEW.partition := get_partition(place_centroid, NEW.calculated_country_code);
+      ELSE
+        NEW.calculated_country_code := lower(NEW.country_code);
+      END IF;
     ELSE
-      NEW.calculated_country_code := NULL;
+      IF NEW.rank_search > 4 THEN
+        --NEW.calculated_country_code := lower(get_country_code(NEW.geometry, NEW.country_code));
+        NEW.calculated_country_code := lower(get_country_code(place_centroid));
+      ELSE
+        NEW.calculated_country_code := NULL;
+      END IF;
+      NEW.partition := get_partition(place_centroid, NEW.calculated_country_code);
     END IF;
-    NEW.partition := get_partition(place_centroid, NEW.calculated_country_code);
     NEW.geometry_sector := geometry_sector(NEW.partition, place_centroid);
 
     -- Adding ourselves to the list simplifies address calculations later
index ea57e74be1f4174f5e0ac41c881ea60bab866209..4dc23bd250474b76909103926ec338966d9a17af 100644 (file)
@@ -1,29 +1,29 @@
 -- Indices used only during search and update.
 -- These indices are created only after the indexing process is done.
 
-CREATE INDEX idx_word_word_id on word USING BTREE (word_id);
+CREATE INDEX idx_word_word_id on word USING BTREE (word_id) TABLESPACE ssd;
 
-CREATE INDEX idx_search_name_nameaddress_vector ON search_name USING GIN (nameaddress_vector) WITH (fastupdate = off);
-CREATE INDEX idx_search_name_name_vector ON search_name USING GIN (name_vector) WITH (fastupdate = off);
-CREATE INDEX idx_search_name_centroid ON search_name USING GIST (centroid);
+CREATE INDEX idx_search_name_nameaddress_vector ON search_name USING GIN (nameaddress_vector) WITH (fastupdate = off) TABLESPACE ssd;
+CREATE INDEX idx_search_name_name_vector ON search_name USING GIN (name_vector) WITH (fastupdate = off) TABLESPACE ssd;
+CREATE INDEX idx_search_name_centroid ON search_name USING GIST (centroid) TABLESPACE ssd;
 
-CREATE INDEX idx_place_addressline_address_place_id on place_addressline USING BTREE (address_place_id);
+CREATE INDEX idx_place_addressline_address_place_id on place_addressline USING BTREE (address_place_id) TABLESPACE ssd;
 
-CREATE INDEX idx_place_boundingbox_place_id on place_boundingbox USING BTREE (place_id);
-CREATE INDEX idx_place_boundingbox_outline ON place_boundingbox USING GIST (outline);
+CREATE INDEX idx_place_boundingbox_place_id on place_boundingbox USING BTREE (place_id) TABLESPACE ssd;
+CREATE INDEX idx_place_boundingbox_outline ON place_boundingbox USING GIST (outline) TABLESPACE ssd;
 
 DROP INDEX IF EXISTS idx_placex_rank_search;
-CREATE INDEX idx_placex_rank_search ON placex USING BTREE (rank_search);
-CREATE INDEX idx_placex_rank_address ON placex USING BTREE (rank_address);
-CREATE INDEX idx_placex_pendingsector ON placex USING BTREE (rank_search,geometry_sector) where indexed_status > 0;
-CREATE INDEX idx_placex_parent_place_id ON placex USING BTREE (parent_place_id) where parent_place_id IS NOT NULL;
-CREATE INDEX idx_placex_interpolation ON placex USING BTREE (geometry_sector) where indexed_status > 0 and class='place' and type='houses';
-CREATE INDEX idx_location_area_country_place_id ON location_area_country USING BTREE (place_id);
+CREATE INDEX idx_placex_rank_search ON placex USING BTREE (rank_search) TABLESPACE ssd;
+CREATE INDEX idx_placex_rank_address ON placex USING BTREE (rank_address) TABLESPACE ssd;
+CREATE INDEX idx_placex_pendingsector ON placex USING BTREE (rank_search,geometry_sector) TABLESPACE ssd where indexed_status > 0;
+CREATE INDEX idx_placex_parent_place_id ON placex USING BTREE (parent_place_id) TABLESPACE ssd where parent_place_id IS NOT NULL;
+CREATE INDEX idx_placex_interpolation ON placex USING BTREE (geometry_sector) TABLESPACE ssd where indexed_status > 0 and class='place' and type='houses';
+CREATE INDEX idx_location_area_country_place_id ON location_area_country USING BTREE (place_id) TABLESPACE ssd;
 
-CREATE INDEX idx_search_name_country_centroid ON search_name_country USING GIST (centroid);
+CREATE INDEX idx_search_name_country_centroid ON search_name_country USING GIST (centroid) TABLESPACE ssd;
 
 -- start
-CREATE INDEX idx_location_property_-partition-_centroid ON location_property_-partition- USING GIST (centroid);
+CREATE INDEX idx_location_property_-partition-_centroid ON location_property_-partition- USING GIST (centroid) TABLESPACE ssd;
 -- end
 
 CREATE UNIQUE INDEX idx_place_osm_unique on place using btree(osm_id,osm_type,class,type);
index 29e3d28235d1be5b42af21db4d3b1585e3907458..e0b1fae0eb47cfff13c0041c9e7b8070cb32d610 100644 (file)
@@ -35,21 +35,21 @@ SELECT AddGeometryColumn('search_name_blank', 'centroid', 4326, 'GEOMETRY', 2);
 
 
 CREATE TABLE location_area_country () INHERITS (location_area_large);
-CREATE INDEX idx_location_area_country_geometry ON location_area_country USING GIST (geometry);
+CREATE INDEX idx_location_area_country_geometry ON location_area_country USING GIST (geometry) TABLESPACE ssd;
 
 CREATE TABLE search_name_country () INHERITS (search_name_blank);
-CREATE INDEX idx_search_name_country_place_id ON search_name_country USING BTREE (place_id);
-CREATE INDEX idx_search_name_country_name_vector ON search_name_country USING GIN (name_vector) WITH (fastupdate = off);
+CREATE INDEX idx_search_name_country_place_id ON search_name_country USING BTREE (place_id) TABLESPACE ssd;
+CREATE INDEX idx_search_name_country_name_vector ON search_name_country USING GIN (name_vector) WITH (fastupdate = off) TABLESPACE ssd;
 
 -- start
 CREATE TABLE location_area_large_-partition- () INHERITS (location_area_large);
-CREATE INDEX idx_location_area_large_-partition-_place_id ON location_area_large_-partition- USING BTREE (place_id);
-CREATE INDEX idx_location_area_large_-partition-_geometry ON location_area_large_-partition- USING GIST (geometry);
+CREATE INDEX idx_location_area_large_-partition-_place_id ON location_area_large_-partition- USING BTREE (place_id) TABLESPACE ssd;
+CREATE INDEX idx_location_area_large_-partition-_geometry ON location_area_large_-partition- USING GIST (geometry) TABLESPACE ssd;
 
 CREATE TABLE search_name_-partition- () INHERITS (search_name_blank);
-CREATE INDEX idx_search_name_-partition-_place_id ON search_name_-partition- USING BTREE (place_id);
-CREATE INDEX idx_search_name_-partition-_centroid ON search_name_-partition- USING GIST (centroid);
-CREATE INDEX idx_search_name_-partition-_name_vector ON search_name_-partition- USING GIN (name_vector) WITH (fastupdate = off);
+CREATE INDEX idx_search_name_-partition-_place_id ON search_name_-partition- USING BTREE (place_id) TABLESPACE ssd;
+CREATE INDEX idx_search_name_-partition-_centroid ON search_name_-partition- USING GIST (centroid) TABLESPACE ssd;
+CREATE INDEX idx_search_name_-partition-_name_vector ON search_name_-partition- USING GIN (name_vector) WITH (fastupdate = off) TABLESPACE ssd;
 
 CREATE TABLE location_property_-partition- () INHERITS (location_property);
 CREATE INDEX idx_location_property_-partition-_place_id ON location_property_-partition- USING BTREE (place_id);
@@ -62,7 +62,7 @@ CREATE TABLE location_road_-partition- (
   country_code VARCHAR(2)
   );
 SELECT AddGeometryColumn('location_road_-partition-', 'geometry', 4326, 'GEOMETRY', 2);
-CREATE INDEX idx_location_road_-partition-_geometry ON location_road_-partition- USING GIST (geometry);
-CREATE INDEX idx_location_road_-partition-_place_id ON location_road_-partition- USING BTREE (place_id);
+CREATE INDEX idx_location_road_-partition-_geometry ON location_road_-partition- USING GIST (geometry) TABLESPACE ssd;
+CREATE INDEX idx_location_road_-partition-_place_id ON location_road_-partition- USING BTREE (place_id) TABLESPACE ssd;
 
 -- end
index 3bb41e28f7cf1415be242f223e4d475a60f0a799..244f2036ea3f355894bf07967351a462624031aa 100644 (file)
@@ -23,19 +23,6 @@ CREATE TABLE import_npi_log (
   event text
   );
 
---drop table IF EXISTS query_log;
-CREATE TABLE query_log (
-  starttime timestamp,
-  query text,
-  ipaddress text,
-  endtime timestamp,
-  results integer
-  );
-CREATE INDEX idx_query_log ON query_log USING BTREE (starttime);
-GRANT SELECT ON query_log TO "www-data" ;
-GRANT INSERT ON query_log TO "www-data" ;
-GRANT UPDATE ON query_log TO "www-data" ;
-
 CREATE TABLE new_query_log (
   type text,
   starttime timestamp,
@@ -43,6 +30,7 @@ CREATE TABLE new_query_log (
   useragent text,
   language text,
   query text,
+  searchterm text,
   endtime timestamp,
   results integer,
   format text,
@@ -53,9 +41,6 @@ GRANT INSERT ON new_query_log TO "www-data" ;
 GRANT UPDATE ON new_query_log TO "www-data" ;
 GRANT SELECT ON new_query_log TO "www-data" ;
 
-create view vw_search_query_log as SELECT substr(query, 1, 50) AS query, starttime, endtime - starttime AS duration, substr(useragent, 1, 20) as 
-useragent, language, results, ipaddress FROM new_query_log WHERE type = 'search' ORDER BY starttime DESC;
-
 --drop table IF EXISTS report_log;
 CREATE TABLE report_log (
   starttime timestamp,
@@ -76,8 +61,8 @@ CREATE TABLE word (
   country_code varchar(2),
   search_name_count INTEGER,
   operator TEXT
-  );
-CREATE INDEX idx_word_word_token on word USING BTREE (word_token);
+  ) TABLESPACE ssd;
+CREATE INDEX idx_word_word_token on word USING BTREE (word_token) TABLESPACE ssd;
 GRANT SELECT ON word TO "www-data" ;
 DROP SEQUENCE seq_word;
 CREATE SEQUENCE seq_word start 1;
@@ -132,7 +117,7 @@ CREATE TABLE search_name (
   nameaddress_vector integer[]
   );
 SELECT AddGeometryColumn('search_name', 'centroid', 4326, 'GEOMETRY', 2);
-CREATE INDEX idx_search_name_place_id ON search_name USING BTREE (place_id);
+CREATE INDEX idx_search_name_place_id ON search_name USING BTREE (place_id) TABLESPACE ssd;
 
 drop table IF EXISTS place_addressline;
 CREATE TABLE place_addressline (
@@ -142,8 +127,8 @@ CREATE TABLE place_addressline (
   isaddress boolean,
   distance float,
   cached_rank_address integer
-  );
-CREATE INDEX idx_place_addressline_place_id on place_addressline USING BTREE (place_id);
+  ) TABLESPACE data;
+CREATE INDEX idx_place_addressline_place_id on place_addressline USING BTREE (place_id) TABLESPACE ssd;
 
 drop table IF EXISTS place_boundingbox CASCADE;
 CREATE TABLE place_boundingbox (
@@ -196,14 +181,14 @@ CREATE TABLE placex (
   wikipedia TEXT, -- calculated wikipedia article name (language:title)
   geometry_sector INTEGER,
   calculated_country_code varchar(2)
-  );
+  ) TABLESPACE ssd;
 SELECT AddGeometryColumn('placex', 'centroid', 4326, 'GEOMETRY', 2);
-CREATE UNIQUE INDEX idx_place_id ON placex USING BTREE (place_id);
-CREATE INDEX idx_placex_osmid ON placex USING BTREE (osm_type, osm_id);
-CREATE INDEX idx_placex_linked_place_id ON placex USING BTREE (linked_place_id);
-CREATE INDEX idx_placex_rank_search ON placex USING BTREE (rank_search, geometry_sector);
-CREATE INDEX idx_placex_geometry ON placex USING GIST (geometry);
-CREATE INDEX idx_placex_adminname on placex USING BTREE (make_standard_name(name->'name'),rank_search) WHERE osm_type='N' and rank_search < 26;
+CREATE UNIQUE INDEX idx_place_id ON placex USING BTREE (place_id) TABLESPACE ssd;
+CREATE INDEX idx_placex_osmid ON placex USING BTREE (osm_type, osm_id) TABLESPACE ssd;
+CREATE INDEX idx_placex_linked_place_id ON placex USING BTREE (linked_place_id) TABLESPACE ssd;
+CREATE INDEX idx_placex_rank_search ON placex USING BTREE (rank_search, geometry_sector) TABLESPACE ssd;
+CREATE INDEX idx_placex_geometry ON placex USING GIST (geometry) TABLESPACE ssd;
+CREATE INDEX idx_placex_adminname on placex USING BTREE (make_standard_name(name->'name'),rank_search) TABLESPACE ssd WHERE osm_type='N' and rank_search < 26;
 
 --CREATE INDEX idx_placex_indexed ON placex USING BTREE (indexed);
 
@@ -244,7 +229,7 @@ CREATE TRIGGER place_before_insert BEFORE INSERT ON place
     FOR EACH ROW EXECUTE PROCEDURE place_insert();
 
 drop index idx_placex_sector;
-CREATE INDEX idx_placex_sector ON placex USING BTREE (geometry_sector,rank_address,osm_type,osm_id);
+CREATE INDEX idx_placex_sector ON placex USING BTREE (geometry_sector,rank_address,osm_type,osm_id) TABLESPACE ssd;
 
 DROP SEQUENCE seq_postcodes;
 CREATE SEQUENCE seq_postcodes start 1;
@@ -262,7 +247,7 @@ CREATE TABLE import_polygon_error (
   );
 SELECT AddGeometryColumn('import_polygon_error', 'prevgeometry', 4326, 'GEOMETRY', 2);
 SELECT AddGeometryColumn('import_polygon_error', 'newgeometry', 4326, 'GEOMETRY', 2);
-CREATE INDEX idx_import_polygon_error_osmid ON import_polygon_error USING BTREE (osm_type, osm_id);
+CREATE INDEX idx_import_polygon_error_osmid ON import_polygon_error USING BTREE (osm_type, osm_id) TABLESPACE ssd;
 GRANT SELECT ON import_polygon_error TO "www-data";
 
 drop table import_polygon_delete;
@@ -272,7 +257,7 @@ CREATE TABLE import_polygon_delete (
   class TEXT NOT NULL,
   type TEXT NOT NULL
   );
-CREATE INDEX idx_import_polygon_delete_osmid ON import_polygon_delete USING BTREE (osm_type, osm_id);
+CREATE INDEX idx_import_polygon_delete_osmid ON import_polygon_delete USING BTREE (osm_type, osm_id) TABLESPACE ssd;
 GRANT SELECT ON import_polygon_delete TO "www-data";
 
 drop sequence file;
index a0f4efc799e797998908e51149e1e803088d4c2b..d6ec1833a9fbc359ef5c66f54b73267ec037d013 100644 (file)
@@ -3,10 +3,10 @@ CREATE UNIQUE INDEX idx_location_property_tiger_place_id_imp ON location_propert
 
 GRANT SELECT ON location_property_tiger_import TO "www-data";
 
-DROP TABLE location_property_tiger;
-ALTER TABLE location_property_tiger_import RENAME TO location_property_tiger;
+--DROP TABLE location_property_tiger;
+--ALTER TABLE location_property_tiger_import RENAME TO location_property_tiger;
 
-ALTER INDEX idx_location_property_tiger_housenumber_parent_place_id_imp RENAME TO idx_location_property_tiger_housenumber_parent_place_id;
-ALTER INDEX idx_location_property_tiger_place_id_imp RENAME TO idx_location_property_tiger_place_id;
+--ALTER INDEX idx_location_property_tiger_housenumber_parent_place_id_imp RENAME TO idx_location_property_tiger_housenumber_parent_place_id;
+--ALTER INDEX idx_location_property_tiger_place_id_imp RENAME TO idx_location_property_tiger_place_id;
 
 DROP FUNCTION tigger_create_interpolation (linegeo geometry, in_startnumber integer, in_endnumber integer, interpolationtype text, in_street text, in_isin text, in_postcode text);
diff --git a/utils/cron_banip.py b/utils/cron_banip.py
new file mode 100755 (executable)
index 0000000..c8fdd78
--- /dev/null
@@ -0,0 +1,238 @@
+#!/usr/bin/python
+#
+# Search logs for high-bandwith users and create a list of suspicious IPs.
+# There are three states: bulk, block, ban. The first are bulk requesters
+# that need throtteling, the second bulk requesters that have overdone it
+# and the last manually banned IPs.
+#
+# The list can then be used in apache using rewrite rules to
+# direct bulk users to smaller thread pools or block them. A
+# typical apache config that uses php-fpm pools would look
+# like this:
+#
+#    Alias /nominatim-www/ "/var/www/nominatim/"
+#    Alias /nominatim-bulk/ "/var/www/nominatim/"
+#    <Directory "/var/www/nominatim/">
+#        Options MultiViews FollowSymLinks
+#        AddType text/html   .php
+#    </Directory>
+#
+#    <Location /nominatim-www>
+#        AddHandler fcgi:/var/run/php5-fpm-www.sock .php
+#    </Location>
+#    <Location /nominatim-bulk>
+#        AddHandler fcgi:/var/run/php5-fpm-bulk.sock .php
+#    </Location>
+#
+#    Redirect 509 /nominatim-block/
+#    ErrorDocument 509 "Bandwidth limit exceeded."
+#    Redirect 403 /nominatim-ban/
+#    ErrorDocument 403 "Access blocked."
+#
+#    RewriteEngine On
+#    RewriteMap bulklist txt:/home/wherever/ip-block.map
+#    RewriteRule ^/(.*) /nominatim-${bulklist:%{REMOTE_ADDR}|www}/$1 [PT]
+#
+
+import os
+import psycopg2
+import datetime
+
+BASEDIR = os.path.normpath(os.path.join(os.path.realpath(__file__), '../..'))
+
+#
+# DEFAULT SETTINGS
+#
+# Copy into settings/ip_blcoks.conf and adapt as required.
+#
+BLOCKEDFILE= BASEDIR + '/settings/ip_blocks.map'
+LOGFILE= BASEDIR + '/log/restricted_ip.log'
+
+# space-separated list of IPs that are never banned
+WHITELIST = ''
+# space-separated list of IPs manually blocked
+BLACKLIST = ''
+# user-agents that should be blocked from bulk mode
+# (matched with startswith)
+UA_BLOCKLIST = ()
+
+# time before a automatically blocked IP is allowed back
+BLOCKCOOLOFF_PERIOD='1 hour'
+# quiet time before an IP is released from the bulk pool
+BULKCOOLOFF_PERIOD='15 min'
+
+BULKLONG_LIMIT=8000
+BULKSHORT_LIMIT=2000
+BLOCK_UPPER=19000
+BLOCK_LOWER=4000
+BLOCK_LOADFAC=380
+BULK_LOADFAC=160
+BULK_LOWER=1500
+MAX_BULK_IPS=85
+
+#
+# END OF DEFAULT SETTINGS
+#
+
+try:
+    execfile(os.path.expanduser(BASEDIR + "/settings/ip_blocks.conf"))
+except IOError:
+    pass
+
+# read the previous blocklist
+WHITELIST = set(WHITELIST.split()) if WHITELIST else set()
+prevblocks = []
+prevbulks = []
+BLACKLIST = set(BLACKLIST.split()) if BLACKLIST else set()
+newblocks = set()
+newbulks = set()
+
+try:
+    fd = open(BLOCKEDFILE)
+    for line in fd:
+        ip, typ = line.strip().split(' ')
+        if ip not in BLACKLIST:
+            if typ == 'block':
+                prevblocks.append(ip)
+            elif typ == 'bulk':
+                prevbulks.append(ip)
+    fd.close()
+except IOError:
+    pass #ignore non-existing file
+
+# determine current load
+fd = open("/proc/loadavg")
+avgload = int(float(fd.readline().split()[2]))
+fd.close()
+# DB load
+conn = psycopg2.connect('dbname=nominatim')
+cur = conn.cursor()
+cur.execute("select count(*)/60 from new_query_log where starttime > now() - interval '1min'")
+dbload = int(cur.fetchone()[0])
+
+BLOCK_LIMIT = max(BLOCK_LOWER, BLOCK_UPPER - BLOCK_LOADFAC * (dbload - 75))
+BULKLONG_LIMIT = max(BULK_LOWER, BULKLONG_LIMIT - BULK_LOADFAC * (avgload - 14))
+if len(prevbulks) > MAX_BULK_IPS:
+    BLOCK_LIMIT = max(3600, BLOCK_LOWER - (len(prevbulks) - MAX_BULK_IPS)*10)
+
+# get the new block candidates
+cur.execute("""
+  SELECT ipaddress, max(count), max(ua) FROM
+   ((SELECT * FROM
+     (SELECT ipaddress, sum(case when endtime is null then 1 else 1+1.5*date_part('epoch',endtime-starttime) end) as count, substring(max(useragent) from 1 for 30) as ua FROM new_query_log
+      WHERE starttime > now() - interval '1 hour' GROUP BY ipaddress) as i
+   WHERE count > %s)
+   UNION
+   (SELECT ipaddress, count * 3, ua FROM
+     (SELECT ipaddress, sum(case when endtime is null then 1 else 1+1.5*date_part('epoch',endtime-starttime) end) as count, substring(max(useragent) from 1 for 30) as ua FROM new_query_log 
+      WHERE starttime > now() - interval '10 min' GROUP BY ipaddress) as i
+   WHERE count > %s)) as o
+  GROUP BY ipaddress
+""", (BULKLONG_LIMIT, BULKSHORT_LIMIT))
+
+bulkips = {}
+emergencyblocks = []
+useragentblocks = []
+
+for c in cur:
+    if c[0] not in WHITELIST and c[0] not in BLACKLIST:
+        # check for user agents that receive an immediate block
+        missing_agent = not c[2]
+        if not missing_agent:
+            for ua in UA_BLOCKLIST:
+                if c[2].startswith(ua):
+                    missing_agent = True
+                    break
+        if (missing_agent or c[1] > BLOCK_UPPER) and c[0] not in prevblocks:
+            newblocks.add(c[0])
+            if missing_agent:
+                useragentblocks.append(c[0])
+            else:
+                emergencyblocks.append(c[0])
+        else:
+            bulkips[c[0]] = c[1]
+
+# IPs from the block list that are no longer in the bulk list
+deblockcandidates = set()
+# IPs from the bulk list that are no longer in the bulk list
+debulkcandidates = set()
+# new IPs to go into the block list
+newlyblocked = []
+
+
+for ip in prevblocks:
+    if ip in bulkips:
+        newblocks.add(ip)
+        del bulkips[ip]
+    else:
+        deblockcandidates.add(ip)    
+        
+for ip in prevbulks:
+    if ip not in newblocks:
+        if ip in bulkips:
+            if bulkips[ip] > BLOCK_LIMIT:
+                newblocks.add(ip)
+                newlyblocked.append(ip)
+            else:
+                newbulks.add(ip)
+            del bulkips[ip]
+        else:
+            debulkcandidates.add(ip)
+
+# cross-check deblock candidates
+if deblockcandidates:
+    cur.execute("""
+        SELECT DISTINCT ipaddress FROM new_query_log
+        WHERE ipaddress IN ('%s') AND starttime > now() - interval '%s'
+        """ % ("','".join(deblockcandidates), BLOCKCOOLOFF_PERIOD))
+
+    for c in cur:
+        newblocks.add(c[0])
+        deblockcandidates.remove(c[0])
+# deblocked IPs go back to the bulk pool to catch the ones that simply
+# ignored the HTTP error and just continue to hammer the API.
+# Those that behave and stopped will be debulked a minute later.
+for ip in deblockcandidates:
+    newbulks.add(ip)
+
+# cross-check debulk candidates
+if debulkcandidates:
+    cur.execute("""
+        SELECT DISTINCT ipaddress FROM new_query_log
+        WHERE ipaddress IN ('%s') AND starttime > now() - interval '%s'
+        AND starttime > date_trunc('day', now())
+        """ % ("','".join(debulkcandidates), BULKCOOLOFF_PERIOD))
+
+    for c in cur:
+        newbulks.add(c[0])
+        debulkcandidates.remove(c[0])
+
+for ip in bulkips.iterkeys():
+    newbulks.add(ip)
+
+# write out the new list
+fd = open(BLOCKEDFILE, 'w')
+for ip in newblocks:
+    fd.write(ip + " block\n")
+for ip in newbulks:
+    fd.write(ip + " bulk\n")
+for ip in BLACKLIST:
+    fd.write(ip + " ban\n")
+fd.close()
+
+# write out the log
+logstr = datetime.datetime.now().strftime('%Y-%m-%d %H:%M') + ' %s %s\n'
+fd = open(LOGFILE, 'a')
+if deblockcandidates:
+    fd.write(logstr % ('unblocked:', ', '.join(deblockcandidates)))
+if debulkcandidates:
+    fd.write(logstr % (' debulked:', ', '.join(debulkcandidates)))
+if bulkips:
+    fd.write(logstr % ('new bulks:', ', '.join(bulkips.keys())))
+if emergencyblocks:
+    fd.write(logstr % ('dir.block:', ', '.join(emergencyblocks)))
+if useragentblocks:
+    fd.write(logstr % (' ua block:', ', '.join(useragentblocks)))
+if newlyblocked:
+    fd.write(logstr % ('new block:', ', '.join(newlyblocked)))
+fd.close()
diff --git a/utils/cron_logrotate.sh b/utils/cron_logrotate.sh
new file mode 100755 (executable)
index 0000000..b9291d9
--- /dev/null
@@ -0,0 +1,20 @@
+#!/bin/bash -e
+#
+# Rotate query logs.
+
+dbname=nominatim
+
+basedir=`dirname $0`
+logfile=`date "+$basedir/../log/query-%F.log.gz"`
+
+# dump the old logfile
+pg_dump -a -F p -t backup_query_log $dbname | gzip -9 > $logfile
+
+# remove the old logs
+psql -q -d $dbname -c 'DROP TABLE backup_query_log'
+
+# rotate
+psql -q -1 -d $dbname -c 'ALTER TABLE new_query_log RENAME TO backup_query_log;CREATE TABLE new_query_log as (select * from backup_query_log limit 0);GRANT SELECT, INSERT, UPDATE ON new_query_log TO "www-data"'
+psql -q -d $dbname -c 'ALTER INDEX idx_new_query_log_starttime RENAME TO idx_backup_query_log_starttime'
+psql -q -d $dbname -c 'CREATE INDEX idx_new_query_log_starttime ON new_query_log USING BTREE (starttime)'
+
diff --git a/utils/cron_vacuum.sh b/utils/cron_vacuum.sh
new file mode 100755 (executable)
index 0000000..4c16fc6
--- /dev/null
@@ -0,0 +1,14 @@
+#!/bin/bash
+#
+# Vaccum all tables with indices on integer arrays.
+# Agressive vacuuming seems to help against index bloat.
+#
+
+psql -q -d nominatim -c 'VACUUM ANALYSE search_name'
+psql -q -d nominatim -c 'VACUUM ANALYSE search_name_country'
+#psql -q -d nominatim -c 'VACUUM ANALYSE planet_osm_ways'
+
+for i in `seq 0 246`; do
+  psql -q -d nominatim -c "VACUUM ANALYSE search_name_${i}"
+done
+
index 22c5a533ef76845915be248f59feb226c7e06d51..90474fcf2767f224b0c98dcc1d628b857dd48211 100755 (executable)
                {
                        $osm2pgsql .= ' --flat-nodes '.CONST_Osm2pgsql_Flatnode_File;
                }
+               $osm2pgsql .= ' --tablespace-slim-index ssd --tablespace-main-index ssd --tablespace-main-data ssd --tablespace-slim-data data';
                $osm2pgsql .= ' -lsc -O gazetteer --hstore';
-               $osm2pgsql .= ' -C '.$iCacheMemory;
+               $osm2pgsql .= ' -C 16000';
                $osm2pgsql .= ' -P '.$aDSNInfo['port'];
                $osm2pgsql .= ' -d '.$aDSNInfo['database'].' '.$aCMDResult['osm-file'];
                passthruCheckReturn($osm2pgsql);
                $sSQL .= "select 'P',nextval('seq_postcodes'),'place','postcode',postcode,calculated_country_code,";
                $sSQL .= "ST_SetSRID(ST_Point(x,y),4326) as geometry from (select calculated_country_code,postcode,";
                $sSQL .= "avg(st_x(st_centroid(geometry))) as x,avg(st_y(st_centroid(geometry))) as y ";
-               $sSQL .= "from placex where postcode is not null group by calculated_country_code,postcode) as x";
+               $sSQL .= "from placex where postcode is not null and calculated_country_code not in ('ie') group by calculated_country_code,postcode) as x";
                if (!pg_query($oDB->connection, $sSQL)) fail(pg_last_error($oDB->connection));
 
                $sSQL = "insert into placex (osm_type,osm_id,class,type,postcode,calculated_country_code,geometry) ";
index f1a0d0d078035c75d4168922f2e9612cc7516592..02aaaa0fb140804a411bd840abf0c6f7aff84488 100755 (executable)
                        echo "class = '".pg_escape_string($aPair[0])."' and type = '".pg_escape_string($aPair[1])."';\n";
 
                        echo "CREATE INDEX idx_place_classtype_".pg_escape_string($aPair[0])."_".pg_escape_string($aPair[1])."_centroid ";
-                       echo "ON place_classtype_".pg_escape_string($aPair[0])."_".pg_escape_string($aPair[1])." USING GIST (centroid);\n";
+                       echo "ON place_classtype_".pg_escape_string($aPair[0])."_".pg_escape_string($aPair[1])." USING GIST (centroid) tablespace ssd;\n";
 
                        echo "CREATE INDEX idx_place_classtype_".pg_escape_string($aPair[0])."_".pg_escape_string($aPair[1])."_place_id ";
-                       echo "ON place_classtype_".pg_escape_string($aPair[0])."_".pg_escape_string($aPair[1])." USING btree(place_id);\n";
+                       echo "ON place_classtype_".pg_escape_string($aPair[0])."_".pg_escape_string($aPair[1])." USING btree(place_id) tablespace ssd;\n";
 
-            echo "GRANT SELECT ON place_classtype_".pg_escape_string($aPair[0])."_".pg_escape_string($aPair[1])." TO \"www-data\";";
+            echo "GRANT SELECT ON place_classtype_".pg_escape_string($aPair[0])."_".pg_escape_string($aPair[1])." TO \"www-data\";\n";
 
                }
 
index 0e26b31152d1d3445ae233defb094b3d25bd524f..febe06820bceb39cd2846d3df8ae6939b6622a79 100755 (executable)
@@ -46,7 +46,6 @@
                showUsage($aCMDOptions, true, 'Select either import of hourly or daily');
        }
 
-       if (!isset($aResult['index-instances'])) $aResult['index-instances'] = 1;
        if (!isset($aResult['index-rank'])) $aResult['index-rank'] = 0;
 /*
        // Lock to prevent multiple copies running
 
        if ($aResult['index'])
        {
+               if (!isset($aResult['index-instances'])) $aResult['index-instances'] = 1;
                passthru(CONST_BasePath.'/nominatim/nominatim -i -d '.$aDSNInfo['database'].' -t '.$aResult['index-instances'].' -r '.$aResult['index-rank']);
        }
 
                {
                        $sCMDImport .= ' --flat-nodes '.CONST_Osm2pgsql_Flatnode_File;
                }
-               $sCMDIndex = $sBasePath.'/nominatim/nominatim -i -d '.$aDSNInfo['database'].' -t '.$aResult['index-instances'];
+               $sCMDIndex = $sBasePath.'/nominatim/nominatim -i -d '.$aDSNInfo['database'];
                if (!$aResult['no-npi']) {
                        $sCMDIndex .= '-F ';
                }
                        $sBatchEnd = getosmosistimestamp($sOsmosisConfigDirectory);
 
                        // Index file
-                       $sThisIndexCmd = $sCMDIndex;
+                       if (!isset($aResult['index-instances']))
+                       {
+                               if (getLoadAverage() < 15)
+                                       $iIndexInstances = 2;
+                               else
+                                       $iIndexInstances = 1;
+                       } else
+                               $iIndexInstances = $aResult['index-instances'];
+
+                       $sThisIndexCmd = $sCMDIndex.' -t '.$iIndexInstances;
 
                        if (!$aResult['no-npi'])
                        {
diff --git a/website/403.html b/website/403.html
new file mode 100644 (file)
index 0000000..44038a4
--- /dev/null
@@ -0,0 +1,15 @@
+<html>
+<head>
+<title>Access blocked</title>
+</head>
+<body>
+<h1>Access blocked</h1>
+
+<p>You have been blocked because you have been overusing OSM's geocoding service.
+Please be aware that OSM's resources are limited and shared between many users.
+To have this block lifted, contact the Nominatim system administrator at
+nominatim@openstreetmap.org.</p>
+
+<p>For more information, consult the <a href="http://wiki.openstreetmap.org/wiki/Nominatim_usage_policy">usage policy</a> for the OSM Nominatim server.
+</body>
+</head>
diff --git a/website/509.html b/website/509.html
new file mode 100644 (file)
index 0000000..1e67a5a
--- /dev/null
@@ -0,0 +1,12 @@
+<html>
+<head>
+<title>Bandwidth limit exceeded</title>
+</head>
+<body>
+<h1>Bandwidth limit exceeded</h1>
+
+<p>You have been temporarily blocked because you have been overusing OSM's geocoding service or because you have not provided sufficient identification of your application. This block will be automatically lifted after a while. Please take the time and adapt your scripts to reduce the number of requests and make sure that you send a valid UserAgent or Referer.</p>
+
+<p>For more information, consult the <a href="http://wiki.openstreetmap.org/wiki/Nominatim_usage_policy">usage policy</a> for the OSM Nominatim server.
+</body>
+</head>
diff --git a/website/favicon.ico b/website/favicon.ico
new file mode 100644 (file)
index 0000000..0157ea0
Binary files /dev/null and b/website/favicon.ico differ
diff --git a/website/nominatim.xml b/website/nominatim.xml
new file mode 100644 (file)
index 0000000..28684b1
--- /dev/null
@@ -0,0 +1,15 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<OpenSearchDescription xmlns="http://a9.com/-/spec/opensearch/1.1/"
+                       xmlns:moz="http://www.mozilla.org/2006/browser/search/">
+       <ShortName>Nominatim</ShortName>
+       <LongName>Nominatim OSM Search</LongName>
+       <Description>Search for a place in OpenStreetMap Nominatim</Description>
+       <InputEncoding>UTF-8</InputEncoding>
+       <OutputEncoding>UTF-8</OutputEncoding>
+       <Url type="text/html" method="get" template="http://nominatim.openstreetmap.org/search/?q={searchTerms}" />
+       <Query role="example" searchTerms="Reigate" />
+       <Developer>Brian Quinion</Developer>
+       <AdultContent>false</AdultContent>
+       <Attribution>Data &amp;copy; OpenStreetMap contributors, Some Rights Reserved. ODbL, http://www.osm.org/copyright.</Attribution>
+</OpenSearchDescription>
+
index ee6a6100c8a4da0e4c6addd6058b0c951f66ecf3..ed877899faa672aa0160539840bf3b9fc6afbda3 100755 (executable)
@@ -6,18 +6,6 @@
        require_once(CONST_BasePath.'/lib/PlaceLookup.php');
        require_once(CONST_BasePath.'/lib/ReverseGeocode.php');
 
-       if (strpos(CONST_BulkUserIPs, ','.$_SERVER["REMOTE_ADDR"].',') !== false)
-       {
-               $fLoadAvg = getLoadAverage();
-               if ($fLoadAvg > 2) sleep(60);
-               if ($fLoadAvg > 4) sleep(120);
-               if ($fLoadAvg > 6)
-               {
-                       echo "Bulk User: Temporary block due to high server load\n";
-                       exit;
-               }
-       }
-
        $oDB =& getDB();
        ini_set('memory_limit', '200M');
 
diff --git a/website/robots.txt b/website/robots.txt
new file mode 100644 (file)
index 0000000..e4d3d3f
--- /dev/null
@@ -0,0 +1,10 @@
+User-agent: ia_archiver
+Allow: /
+
+User-agent: *
+Disallow: /search.php
+Disallow: /search
+Disallow: /details.php
+Disallow: /details
+Disallow: /reverse.php
+Disallow: /reverse