]> git.openstreetmap.org Git - nominatim.git/commitdiff
Merge remote-tracking branch 'upstream/master'
authorSarah Hoffmann <lonvia@denofr.de>
Sun, 6 Jul 2014 21:00:19 +0000 (23:00 +0200)
committerSarah Hoffmann <lonvia@denofr.de>
Sun, 6 Jul 2014 21:00:19 +0000 (23:00 +0200)
25 files changed:
lib/Geocode.php
lib/init-website.php
lib/lib.php
lib/log.php
munin/nominatim_throttled_ips [new file with mode: 0755]
settings/settings.php
sql/functions.sql
sql/indices.src.sql
sql/partition-tables.src.sql
sql/tables.sql
sql/tiger_import_finish.sql
utils/cron_banip.py [new file with mode: 0755]
utils/cron_logrotate.sh [new file with mode: 0755]
utils/cron_vacuum.sh [new file with mode: 0755]
utils/setup.php
utils/specialphrases.php
utils/update.php
website/403.html [new file with mode: 0644]
website/509.html [new file with mode: 0644]
website/crossdomain.xml [new file with mode: 0644]
website/favicon.ico [new file with mode: 0644]
website/nominatim.xml [new file with mode: 0644]
website/reverse.php
website/robots.txt [new file with mode: 0644]
website/search.php

index 7563a26d54e326691ccb982b39124832abcf23dd..5451c3820dee27bafbccba637151aa8e8598689d 100644 (file)
@@ -15,7 +15,7 @@
 
                protected $aExcludePlaceIDs = array();
                protected $bDeDupe = true;
-               protected $bReverseInPlan = false;
+               protected $bReverseInPlan = true;
 
                protected $iLimit = 20;
                protected $iFinalLimit = 10;
                                $sSQL .= "and 30 between $this->iMinAddressRank and $this->iMaxAddressRank ";
                                $sSQL .= "group by place_id";
                                if (!$this->bDeDupe) $sSQL .= ",place_id ";
+                               /*
                                $sSQL .= " union ";
                                $sSQL .= "select 'L' as osm_type,place_id as osm_id,'place' as class,'house' as type,null as admin_level,30 as rank_search,30 as rank_address,min(place_id) as place_id, min(parent_place_id) as parent_place_id,'us' as country_code,";
                                $sSQL .= "get_address_by_language(place_id, $sLanguagePrefArraySQL) as langaddress,";
                                $sSQL .= "group by place_id";
                                if (!$this->bDeDupe) $sSQL .= ",place_id";
                                $sSQL .= ",get_address_by_language(place_id, $sLanguagePrefArraySQL) ";
+                               */
                        }
 
                        $sSQL .= " order by importance desc";
                                                                                {
                                                                                        if (isset($aSearchTerm['word_id']) && $aSearchTerm['word_id'])
                                                                                        {
-                                                                                               if ((!$bStructuredPhrases || $iPhrase > 0) && sizeof($aCurrentSearch['aName']) && strlen($sToken) >= 4)
+                                                                                               if ((!$bStructuredPhrases || $iPhrase > 0) && sizeof($aCurrentSearch['aName']) && strpos($sToken, ' ') === false)
                                                                                                {
                                                                                                        $aSearch = $aCurrentSearch;
                                                                                                        $aSearch['iSearchRank'] += 1;
                                                                                                                $aSearch['aAddress'][$aSearchTerm['word_id']] = $aSearchTerm['word_id'];
                                                                                                                if ($aSearch['iSearchRank'] < $this->iMaxRank) $aNewWordsetSearches[] = $aSearch;
                                                                                                        }
-                                                                                                       elseif (isset($aValidTokens[' '.$sToken])) // revert to the token version?
+                                                                                                       elseif (isset($aValidTokens[' '.$sToken]) && strlen($sToken) >= 4) // revert to the token version?
                                                                                                        {
+                                                                                                               $aSearch['aAddressNonSearch'][$aSearchTerm['word_id']] = $aSearchTerm['word_id'];
+                                                                                                               $aSearch['iSearchRank'] += 1;
+                                                                                                               if ($aSearch['iSearchRank'] < $this->iMaxRank) $aNewWordsetSearches[] = $aSearch;
                                                                                                                foreach($aValidTokens[' '.$sToken] as $aSearchTermToken)
                                                                                                                {
                                                                                                                        if (empty($aSearchTermToken['country_code'])
                                                                                                        else
                                                                                                        {
                                                                                                                $aSearch['aAddressNonSearch'][$aSearchTerm['word_id']] = $aSearchTerm['word_id'];
+                                                                                                               if (preg_match('#^[0-9]+$#', $sToken)) $aSearch['iSearchRank'] += 2;
                                                                                                                if ($aSearch['iSearchRank'] < $this->iMaxRank) $aNewWordsetSearches[] = $aSearch;
                                                                                                        }
                                                                                                }
                                                                                                if (!sizeof($aCurrentSearch['aName']) || $aCurrentSearch['iNamePhrase'] == $iPhrase)
                                                                                                {
                                                                                                        $aSearch = $aCurrentSearch;
-                                                                                                       $aSearch['iSearchRank'] += 2;
+                                                                                                       $aSearch['iSearchRank'] += 1;
+                                                                                                       if (!sizeof($aCurrentSearch['aName'])) $aSearch['iSearchRank'] += 1;
                                                                                                        if (preg_match('#^[0-9]+$#', $sToken)) $aSearch['iSearchRank'] += 2;
                                                                                                        if ($aWordFrequencyScores[$aSearchTerm['word_id']] < CONST_Max_Word_Frequency)
                                                                                                                $aSearch['aName'][$aSearchTerm['word_id']] = $aSearchTerm['word_id'];
                                                        // TODO: filter out the pointless search terms (2 letter name tokens and less)
                                                        // they might be right - but they are just too darned expensive to run
                                                        if (sizeof($aSearch['aName'])) $aTerms[] = "name_vector @> ARRAY[".join($aSearch['aName'],",")."]";
-                                                       if (sizeof($aSearch['aNameNonSearch'])) $aTerms[] = "array_cat(name_vector,ARRAY[]::integer[]) @> ARRAY[".join($aSearch['aNameNonSearch'],",")."]";
+                                                       //if (sizeof($aSearch['aNameNonSearch'])) $aTerms[] = "array_cat(name_vector,ARRAY[]::integer[]) @> ARRAY[".join($aSearch['aNameNonSearch'],",")."]";
                                                        if (sizeof($aSearch['aAddress']) && $aSearch['aName'] != $aSearch['aAddress'])
                                                        {
                                                                // For infrequent name terms disable index usage for address
                                                                                sizeof($aSearch['aName']) == 1 &&
                                                                                $aWordFrequencyScores[$aSearch['aName'][reset($aSearch['aName'])]] < CONST_Search_NameOnlySearchFrequencyThreshold)
                                                                {
-                                                                       $aTerms[] = "array_cat(nameaddress_vector,ARRAY[]::integer[]) @> ARRAY[".join(array_merge($aSearch['aAddress'],$aSearch['aAddressNonSearch']),",")."]";
+                                                                       //$aTerms[] = "array_cat(nameaddress_vector,ARRAY[]::integer[]) @> ARRAY[".join(array_merge($aSearch['aAddress'],$aSearch['aAddressNonSearch']),",")."]";
+                                                                       $aTerms[] = "array_cat(nameaddress_vector,ARRAY[]::integer[]) @> ARRAY[".join($aSearch['aAddress'],",")."]";
                                                                }
                                                                else
                                                                {
                                                                        $aTerms[] = "nameaddress_vector @> ARRAY[".join($aSearch['aAddress'],",")."]";
-                                                                       if (sizeof($aSearch['aAddressNonSearch'])) $aTerms[] = "array_cat(nameaddress_vector,ARRAY[]::integer[]) @> ARRAY[".join($aSearch['aAddressNonSearch'],",")."]";
+                                                                       //if (sizeof($aSearch['aAddressNonSearch'])) $aTerms[] = "array_cat(nameaddress_vector,ARRAY[]::integer[]) @> ARRAY[".join($aSearch['aAddressNonSearch'],",")."]";
                                                                }
                                                        }
                                                        if ($aSearch['sCountryCode']) $aTerms[] = "country_code = '".pg_escape_string($aSearch['sCountryCode'])."'";
                                                                $aPlaceIDs = $this->oDB->getCol($sSQL);
 
                                                                // If not try the aux fallback table
+                                                               /*
                                                                if (!sizeof($aPlaceIDs))
                                                                {
                                                                        $sSQL = "select place_id from location_property_aux where parent_place_id in (".$sPlaceIDs.") and housenumber = '".pg_escape_string($aSearch['sHouseNumber'])."'";
                                                                        if (CONST_Debug) var_dump($sSQL);
                                                                        $aPlaceIDs = $this->oDB->getCol($sSQL);
                                                                }
+                                                               */
 
                                                                if (!sizeof($aPlaceIDs))
                                                                {
                                                        {
                                                                preg_match_all('/(-?[0-9.]+) (-?[0-9.]+)/',$aMatch[1],$aPolyPoints,PREG_SET_ORDER);
                                                        }
+                            /*
                                                        elseif (preg_match('#MULTIPOLYGON\\(\\(\\(([- 0-9.,]+)#',$aPointPolygon['astext'],$aMatch))
                                                        {
                                                                preg_match_all('/(-?[0-9.]+) (-?[0-9.]+)/',$aMatch[1],$aPolyPoints,PREG_SET_ORDER);
                                                        }
+                            */
                                                        elseif (preg_match('#POINT\\((-?[0-9.]+) (-?[0-9.]+)\\)#',$aPointPolygon['astext'],$aMatch))
                                                        {
                                                                $fRadius = 0.01;
index 6db83988551f4d2fd6fba8ac745befa3f9043388..013aee4b4a1c07daf07dde2389baff9f85069a11 100644 (file)
@@ -5,6 +5,7 @@
        {
                header("Access-Control-Allow-Origin: *");
                header("Access-Control-Allow-Methods: OPTIONS,GET");
+               header("Access-Control-Max-Age: 8640000");
                if (!empty($_SERVER['HTTP_ACCESS_CONTROL_REQUEST_HEADERS']))
                {
                        header("Access-Control-Allow-Headers: ".$_SERVER['HTTP_ACCESS_CONTROL_REQUEST_HEADERS']);
        }
        if ($_SERVER['REQUEST_METHOD'] == 'OPTIONS') exit;
 
-       if (CONST_ClosedForIndexing && strpos(CONST_ClosedForIndexingExceptionIPs, ','.$_SERVER["REMOTE_ADDR"].',') === false)
-       {
-               echo "Closed for re-indexing...";
-               exit;
-       }
-
-       $aBucketKeys = array();
-
-       if (isset($_SERVER["HTTP_REFERER"])) $aBucketKeys[] = str_replace('www.','',strtolower(parse_url($_SERVER["HTTP_REFERER"], PHP_URL_HOST)));
-       if (isset($_SERVER["REMOTE_ADDR"])) $aBucketKeys[] = $_SERVER["REMOTE_ADDR"];
-       if (isset($_GET["email"])) $aBucketKeys[] = $_GET["email"];
-
-       $fBucketVal = doBucket($aBucketKeys, 
-                       (defined('CONST_ConnectionBucket_PageType')?constant('CONST_ConnectionBucket_Cost_'.CONST_ConnectionBucket_PageType):1) + user_busy_cost(),
-                       CONST_ConnectionBucket_LeakRate, CONST_ConnectionBucket_BlockLimit);
-
-       if ($fBucketVal > CONST_ConnectionBucket_WaitLimit && $fBucketVal < CONST_ConnectionBucket_BlockLimit)
-       {
-               $m = getBucketMemcache();
-               $iCurrentSleeping = $m->increment('sleepCounter');
-               if (false === $iCurrentSleeping)
-               {
-                       $m->add('sleepCounter', 0);
-                       $iCurrentSleeping = $m->increment('sleepCounter');
-               }
-               if ($iCurrentSleeping >= CONST_ConnectionBucket_MaxSleeping || isBucketSleeping($aBucketKeys))
-               {
-                       // Too many threads sleeping already.  This becomes a hard block.
-                       $fBucketVal = doBucket($aBucketKeys, CONST_ConnectionBucket_BlockLimit, CONST_ConnectionBucket_LeakRate, CONST_ConnectionBucket_BlockLimit);
-               }
-               else
-               {
-                       setBucketSleeping($aBucketKeys, true);
-                       sleep(($fBucketVal - CONST_ConnectionBucket_WaitLimit)/CONST_ConnectionBucket_LeakRate);
-                       $fBucketVal = doBucket($aBucketKeys, CONST_ConnectionBucket_LeakRate, CONST_ConnectionBucket_LeakRate, CONST_ConnectionBucket_BlockLimit);
-                       setBucketSleeping($aBucketKeys, false);
-               }
-               $m->decrement('sleepCounter');
-       }
-
-       if (strpos(CONST_BlockedIPs, ','.$_SERVER["REMOTE_ADDR"].',') !== false || $fBucketVal >= CONST_ConnectionBucket_BlockLimit)
-       {
-               header("HTTP/1.0 429 Too Many Requests");
-               echo "Your IP has been blocked. \n";
-               echo CONST_BlockMessage;
-               exit;
-       }
-
-       header('Content-type: text/html; charset=utf-8');
-
+    header('Content-type: text/html; charset=utf-8');
index 1e05def37c3ce833e39190b252ff3278a0030c55..b54023a4becbc1bde9ee7322130c26381024204b 100644 (file)
@@ -94,7 +94,8 @@
 
        function bySearchRank($a, $b)
        {
-               if ($a['iSearchRank'] == $b['iSearchRank']) return 0;
+               if ($a['iSearchRank'] == $b['iSearchRank'])
+            return strlen($a['sOperator']) + strlen($a['sHouseNumber']) - strlen($b['sOperator']) - strlen($b['sHouseNumber']);
                return ($a['iSearchRank'] < $b['iSearchRank']?-1:1);
        }
 
        {
                $aResult = array(array(join(' ',$aWords)));
                $sFirstToken = '';
-               if ($iDepth < 8) {
+               if ($iDepth < 7) {
                        while(sizeof($aWords) > 1)
                        {
                                $sWord = array_shift($aWords);
index 37d83c4771fdb410a46290de8cb60cc8eedb1346..59e04d1745baf59f5e96232b59984187188f69df 100644 (file)
 
                if (CONST_Log_DB)
                {
-                       // Log
-                       if ($sType == 'search')
-                       {
-                               $oDB->query('insert into query_log values ('.getDBQuoted($hLog[0]).','.getDBQuoted($hLog[3]).','.getDBQuoted($hLog[1]).')');
-                       }
-
-                       $sSQL = 'insert into new_query_log (type,starttime,query,ipaddress,useragent,language,format)';
+                       if (isset($_GET['email']))
+                               $sUserAgent = $_GET['email'];
+                       elseif (isset($_SERVER['HTTP_REFERER']))
+                               $sUserAgent = $_SERVER['HTTP_REFERER'];
+                       else
+                               $sUserAgent = $_SERVER['HTTP_USER_AGENT'];
+                       $sSQL = 'insert into new_query_log (type,starttime,query,ipaddress,useragent,language,format,searchterm)';
                        $sSQL .= ' values ('.getDBQuoted($sType).','.getDBQuoted($hLog[0]).','.getDBQuoted($hLog[2]);
-                       $sSQL .= ','.getDBQuoted($hLog[1]).','.getDBQuoted($_SERVER['HTTP_USER_AGENT']).','.getDBQuoted(join(',',$aLanguageList)).','.getDBQuoted($sOutputFormat).')';
+                       $sSQL .= ','.getDBQuoted($hLog[1]).','.getDBQuoted($sUserAgent).','.getDBQuoted(join(',',$aLanguageList)).','.getDBQuoted($sOutputFormat).','.getDBQuoted($hLog[3]).')';
                        $oDB->query($sSQL);
                }
 
 
                if (CONST_Log_DB)
                {
-                       $sSQL = 'update query_log set endtime = '.getDBQuoted($sEndTime).', results = '.$iNumResults;
-                       $sSQL .= ' where starttime = '.getDBQuoted($hLog[0]);
-                       $sSQL .= ' and ipaddress = '.getDBQuoted($hLog[1]);
-                       $sSQL .= ' and query = '.getDBQuoted($hLog[3]);
-                       $oDB->query($sSQL);
-
                        $sSQL = 'update new_query_log set endtime = '.getDBQuoted($sEndTime).', results = '.$iNumResults;
                        $sSQL .= ' where starttime = '.getDBQuoted($hLog[0]);
                        $sSQL .= ' and ipaddress = '.getDBQuoted($hLog[1]);
diff --git a/munin/nominatim_throttled_ips b/munin/nominatim_throttled_ips
new file mode 100755 (executable)
index 0000000..2fa1d80
--- /dev/null
@@ -0,0 +1,28 @@
+#!/bin/sh
+#
+# Plugin to monitor the number of IPs in special pools
+#
+# Parameters: 
+#
+#       config   (required)
+#       autoconf (optional - used by munin-config)
+#
+if [ "$1" = "config" ]; then
+        echo 'graph_title Restricted IPs' 
+        echo 'graph_args -l 0'
+        echo 'graph_vlabel number of IPs'
+        echo 'graph_category nominatim'
+        echo 'bulk.label bulk'
+        echo 'bulk.draw AREA'
+        echo 'bulk.type GAUGE'
+        echo 'block.label blocked'
+        echo 'block.draw STACK'
+        echo 'block.type GAUGE'
+        exit 0
+fi
+BASEDIR="$(dirname "$(readlink -f "$0")")"
+
+cut -f 2 -d ' ' $BASEDIR/../settings/ip_blocks.map | uniq -c | sed 's:[[:space:]]*\([0-9]\+\) \(.*\):\2.value \1:'
index fd50a6f0a7c96487468cdfe6cac04b92095600cc..11e450e9e9d74b47eb0b0233fe8e1f21b60dd18d 100644 (file)
 
        // Website settings
        @define('CONST_NoAccessControl', true);
-       @define('CONST_ClosedForIndexing', false);
-       @define('CONST_ClosedForIndexingExceptionIPs', '');
        @define('CONST_BlockedIPs', '');
+       @define('CONST_IPBanFile', CONST_BasePath.'/settings/ip_blocks');
+       @define('CONST_WhitelistedIPs', '');
+       @define('CONST_BlockedUserAgents', '');
+       @define('CONST_BlockReverseMaxLoad', 15);
        @define('CONST_BulkUserIPs', '');
        @define('CONST_BlockMessage', ''); // additional info to show for blocked IPs
 
-       @define('CONST_Website_BaseURL', 'http://'.php_uname('n').'/');
+       @define('CONST_Website_BaseURL', 'http://nominatim.openstreetmap.org/');
        @define('CONST_Tile_Default', 'Mapnik');
 
        @define('CONST_Default_Language', false);
index 5c4cea053a6f05787e0f3d8da86fbf88dbabc354..ac726cb331a66366a090073bcd87fa260cb5c8e5 100644 (file)
@@ -936,6 +936,11 @@ DECLARE
 BEGIN
   --DEBUG: RAISE WARNING '% %',NEW.osm_type,NEW.osm_id;
 
+  -- remove operator tag for most places, messes too much with search_name indexes
+  IF NEW.class not in ('amenity', 'shop') THEN
+    NEW.name := delete(NEW.name, 'operator');
+  END IF;
+
   -- just block these
   IF NEW.class in ('landuse','natural') and NEW.name is null THEN
 --    RAISE WARNING 'empty landuse %',NEW.osm_id;
@@ -2059,6 +2064,11 @@ BEGIN
     --DEBUG: RAISE WARNING '%', existingplacex;
   END IF;
 
+  -- remove operator tag for most places, messes too much with search_name indexes
+  IF NEW.class not in ('amenity', 'shop') THEN
+    NEW.name := delete(NEW.name, 'operator');
+  END IF;
+
   -- Just block these - lots and pointless
   IF NEW.class in ('landuse','natural') and NEW.name is null THEN
     -- if the name tag was removed, older versions might still be lurking in the place table
@@ -2249,6 +2259,12 @@ BEGIN
 
   END IF;
 
+  -- refuse to update multiplpoygons with too many objects, too much of a performance hit
+  IF ST_NumGeometries(NEW.geometry) > 2000 THEN
+    RAISE WARNING 'Dropping update of % % because of geometry complexity.', NEW.osm_type, NEW.osm_id;
+    RETURN NULL;
+  END IF;
+
   IF coalesce(existing.name::text, '') != coalesce(NEW.name::text, '')
      OR coalesce(existing.extratags::text, '') != coalesce(NEW.extratags::text, '')
      OR coalesce(existing.housenumber, '') != coalesce(NEW.housenumber, '')
index ea57e74be1f4174f5e0ac41c881ea60bab866209..4dc23bd250474b76909103926ec338966d9a17af 100644 (file)
@@ -1,29 +1,29 @@
 -- Indices used only during search and update.
 -- These indices are created only after the indexing process is done.
 
-CREATE INDEX idx_word_word_id on word USING BTREE (word_id);
+CREATE INDEX idx_word_word_id on word USING BTREE (word_id) TABLESPACE ssd;
 
-CREATE INDEX idx_search_name_nameaddress_vector ON search_name USING GIN (nameaddress_vector) WITH (fastupdate = off);
-CREATE INDEX idx_search_name_name_vector ON search_name USING GIN (name_vector) WITH (fastupdate = off);
-CREATE INDEX idx_search_name_centroid ON search_name USING GIST (centroid);
+CREATE INDEX idx_search_name_nameaddress_vector ON search_name USING GIN (nameaddress_vector) WITH (fastupdate = off) TABLESPACE ssd;
+CREATE INDEX idx_search_name_name_vector ON search_name USING GIN (name_vector) WITH (fastupdate = off) TABLESPACE ssd;
+CREATE INDEX idx_search_name_centroid ON search_name USING GIST (centroid) TABLESPACE ssd;
 
-CREATE INDEX idx_place_addressline_address_place_id on place_addressline USING BTREE (address_place_id);
+CREATE INDEX idx_place_addressline_address_place_id on place_addressline USING BTREE (address_place_id) TABLESPACE ssd;
 
-CREATE INDEX idx_place_boundingbox_place_id on place_boundingbox USING BTREE (place_id);
-CREATE INDEX idx_place_boundingbox_outline ON place_boundingbox USING GIST (outline);
+CREATE INDEX idx_place_boundingbox_place_id on place_boundingbox USING BTREE (place_id) TABLESPACE ssd;
+CREATE INDEX idx_place_boundingbox_outline ON place_boundingbox USING GIST (outline) TABLESPACE ssd;
 
 DROP INDEX IF EXISTS idx_placex_rank_search;
-CREATE INDEX idx_placex_rank_search ON placex USING BTREE (rank_search);
-CREATE INDEX idx_placex_rank_address ON placex USING BTREE (rank_address);
-CREATE INDEX idx_placex_pendingsector ON placex USING BTREE (rank_search,geometry_sector) where indexed_status > 0;
-CREATE INDEX idx_placex_parent_place_id ON placex USING BTREE (parent_place_id) where parent_place_id IS NOT NULL;
-CREATE INDEX idx_placex_interpolation ON placex USING BTREE (geometry_sector) where indexed_status > 0 and class='place' and type='houses';
-CREATE INDEX idx_location_area_country_place_id ON location_area_country USING BTREE (place_id);
+CREATE INDEX idx_placex_rank_search ON placex USING BTREE (rank_search) TABLESPACE ssd;
+CREATE INDEX idx_placex_rank_address ON placex USING BTREE (rank_address) TABLESPACE ssd;
+CREATE INDEX idx_placex_pendingsector ON placex USING BTREE (rank_search,geometry_sector) TABLESPACE ssd where indexed_status > 0;
+CREATE INDEX idx_placex_parent_place_id ON placex USING BTREE (parent_place_id) TABLESPACE ssd where parent_place_id IS NOT NULL;
+CREATE INDEX idx_placex_interpolation ON placex USING BTREE (geometry_sector) TABLESPACE ssd where indexed_status > 0 and class='place' and type='houses';
+CREATE INDEX idx_location_area_country_place_id ON location_area_country USING BTREE (place_id) TABLESPACE ssd;
 
-CREATE INDEX idx_search_name_country_centroid ON search_name_country USING GIST (centroid);
+CREATE INDEX idx_search_name_country_centroid ON search_name_country USING GIST (centroid) TABLESPACE ssd;
 
 -- start
-CREATE INDEX idx_location_property_-partition-_centroid ON location_property_-partition- USING GIST (centroid);
+CREATE INDEX idx_location_property_-partition-_centroid ON location_property_-partition- USING GIST (centroid) TABLESPACE ssd;
 -- end
 
 CREATE UNIQUE INDEX idx_place_osm_unique on place using btree(osm_id,osm_type,class,type);
index 29e3d28235d1be5b42af21db4d3b1585e3907458..e0b1fae0eb47cfff13c0041c9e7b8070cb32d610 100644 (file)
@@ -35,21 +35,21 @@ SELECT AddGeometryColumn('search_name_blank', 'centroid', 4326, 'GEOMETRY', 2);
 
 
 CREATE TABLE location_area_country () INHERITS (location_area_large);
-CREATE INDEX idx_location_area_country_geometry ON location_area_country USING GIST (geometry);
+CREATE INDEX idx_location_area_country_geometry ON location_area_country USING GIST (geometry) TABLESPACE ssd;
 
 CREATE TABLE search_name_country () INHERITS (search_name_blank);
-CREATE INDEX idx_search_name_country_place_id ON search_name_country USING BTREE (place_id);
-CREATE INDEX idx_search_name_country_name_vector ON search_name_country USING GIN (name_vector) WITH (fastupdate = off);
+CREATE INDEX idx_search_name_country_place_id ON search_name_country USING BTREE (place_id) TABLESPACE ssd;
+CREATE INDEX idx_search_name_country_name_vector ON search_name_country USING GIN (name_vector) WITH (fastupdate = off) TABLESPACE ssd;
 
 -- start
 CREATE TABLE location_area_large_-partition- () INHERITS (location_area_large);
-CREATE INDEX idx_location_area_large_-partition-_place_id ON location_area_large_-partition- USING BTREE (place_id);
-CREATE INDEX idx_location_area_large_-partition-_geometry ON location_area_large_-partition- USING GIST (geometry);
+CREATE INDEX idx_location_area_large_-partition-_place_id ON location_area_large_-partition- USING BTREE (place_id) TABLESPACE ssd;
+CREATE INDEX idx_location_area_large_-partition-_geometry ON location_area_large_-partition- USING GIST (geometry) TABLESPACE ssd;
 
 CREATE TABLE search_name_-partition- () INHERITS (search_name_blank);
-CREATE INDEX idx_search_name_-partition-_place_id ON search_name_-partition- USING BTREE (place_id);
-CREATE INDEX idx_search_name_-partition-_centroid ON search_name_-partition- USING GIST (centroid);
-CREATE INDEX idx_search_name_-partition-_name_vector ON search_name_-partition- USING GIN (name_vector) WITH (fastupdate = off);
+CREATE INDEX idx_search_name_-partition-_place_id ON search_name_-partition- USING BTREE (place_id) TABLESPACE ssd;
+CREATE INDEX idx_search_name_-partition-_centroid ON search_name_-partition- USING GIST (centroid) TABLESPACE ssd;
+CREATE INDEX idx_search_name_-partition-_name_vector ON search_name_-partition- USING GIN (name_vector) WITH (fastupdate = off) TABLESPACE ssd;
 
 CREATE TABLE location_property_-partition- () INHERITS (location_property);
 CREATE INDEX idx_location_property_-partition-_place_id ON location_property_-partition- USING BTREE (place_id);
@@ -62,7 +62,7 @@ CREATE TABLE location_road_-partition- (
   country_code VARCHAR(2)
   );
 SELECT AddGeometryColumn('location_road_-partition-', 'geometry', 4326, 'GEOMETRY', 2);
-CREATE INDEX idx_location_road_-partition-_geometry ON location_road_-partition- USING GIST (geometry);
-CREATE INDEX idx_location_road_-partition-_place_id ON location_road_-partition- USING BTREE (place_id);
+CREATE INDEX idx_location_road_-partition-_geometry ON location_road_-partition- USING GIST (geometry) TABLESPACE ssd;
+CREATE INDEX idx_location_road_-partition-_place_id ON location_road_-partition- USING BTREE (place_id) TABLESPACE ssd;
 
 -- end
index 038a373e4d99bb2f20b8e2fd7f66f1879f6e94af..32ff5e879e401ffe54b10c54a2e69353159a5a59 100644 (file)
@@ -23,19 +23,6 @@ CREATE TABLE import_npi_log (
   event text
   );
 
---drop table IF EXISTS query_log;
-CREATE TABLE query_log (
-  starttime timestamp,
-  query text,
-  ipaddress text,
-  endtime timestamp,
-  results integer
-  );
-CREATE INDEX idx_query_log ON query_log USING BTREE (starttime);
-GRANT SELECT ON query_log TO "www-data" ;
-GRANT INSERT ON query_log TO "www-data" ;
-GRANT UPDATE ON query_log TO "www-data" ;
-
 CREATE TABLE new_query_log (
   type text,
   starttime timestamp,
@@ -43,6 +30,7 @@ CREATE TABLE new_query_log (
   useragent text,
   language text,
   query text,
+  searchterm text,
   endtime timestamp,
   results integer,
   format text,
@@ -53,9 +41,6 @@ GRANT INSERT ON new_query_log TO "www-data" ;
 GRANT UPDATE ON new_query_log TO "www-data" ;
 GRANT SELECT ON new_query_log TO "www-data" ;
 
-create view vw_search_query_log as SELECT substr(query, 1, 50) AS query, starttime, endtime - starttime AS duration, substr(useragent, 1, 20) as 
-useragent, language, results, ipaddress FROM new_query_log WHERE type = 'search' ORDER BY starttime DESC;
-
 --drop table IF EXISTS report_log;
 CREATE TABLE report_log (
   starttime timestamp,
@@ -76,8 +61,8 @@ CREATE TABLE word (
   country_code varchar(2),
   search_name_count INTEGER,
   operator TEXT
-  );
-CREATE INDEX idx_word_word_token on word USING BTREE (word_token);
+  ) TABLESPACE ssd;
+CREATE INDEX idx_word_word_token on word USING BTREE (word_token) TABLESPACE ssd;
 GRANT SELECT ON word TO "www-data" ;
 DROP SEQUENCE seq_word;
 CREATE SEQUENCE seq_word start 1;
@@ -132,7 +117,7 @@ CREATE TABLE search_name (
   nameaddress_vector integer[]
   );
 SELECT AddGeometryColumn('search_name', 'centroid', 4326, 'GEOMETRY', 2);
-CREATE INDEX idx_search_name_place_id ON search_name USING BTREE (place_id);
+CREATE INDEX idx_search_name_place_id ON search_name USING BTREE (place_id) TABLESPACE ssd;
 
 drop table IF EXISTS place_addressline;
 CREATE TABLE place_addressline (
@@ -142,8 +127,8 @@ CREATE TABLE place_addressline (
   isaddress boolean,
   distance float,
   cached_rank_address integer
-  );
-CREATE INDEX idx_place_addressline_place_id on place_addressline USING BTREE (place_id);
+  ) TABLESPACE data;
+CREATE INDEX idx_place_addressline_place_id on place_addressline USING BTREE (place_id) TABLESPACE ssd;
 
 drop table IF EXISTS place_boundingbox CASCADE;
 CREATE TABLE place_boundingbox (
@@ -196,14 +181,14 @@ CREATE TABLE placex (
   wikipedia TEXT, -- calculated wikipedia article name (language:title)
   geometry_sector INTEGER,
   calculated_country_code varchar(2)
-  );
+  ) TABLESPACE ssd;
 SELECT AddGeometryColumn('placex', 'centroid', 4326, 'GEOMETRY', 2);
-CREATE UNIQUE INDEX idx_place_id ON placex USING BTREE (place_id);
-CREATE INDEX idx_placex_osmid ON placex USING BTREE (osm_type, osm_id);
-CREATE INDEX idx_placex_linked_place_id ON placex USING BTREE (linked_place_id);
-CREATE INDEX idx_placex_rank_search ON placex USING BTREE (rank_search, geometry_sector);
-CREATE INDEX idx_placex_geometry ON placex USING GIST (geometry);
-CREATE INDEX idx_placex_adminname on placex USING BTREE (make_standard_name(name->'name'),rank_search) WHERE osm_type='N' and rank_search < 26;
+CREATE UNIQUE INDEX idx_place_id ON placex USING BTREE (place_id) TABLESPACE ssd;
+CREATE INDEX idx_placex_osmid ON placex USING BTREE (osm_type, osm_id) TABLESPACE ssd;
+CREATE INDEX idx_placex_linked_place_id ON placex USING BTREE (linked_place_id) TABLESPACE ssd;
+CREATE INDEX idx_placex_rank_search ON placex USING BTREE (rank_search, geometry_sector) TABLESPACE ssd;
+CREATE INDEX idx_placex_geometry ON placex USING GIST (geometry) TABLESPACE ssd;
+CREATE INDEX idx_placex_adminname on placex USING BTREE (make_standard_name(name->'name'),rank_search) TABLESPACE ssd WHERE osm_type='N' and rank_search < 26;
 
 --CREATE INDEX idx_placex_indexed ON placex USING BTREE (indexed);
 
@@ -237,7 +222,7 @@ CREATE TRIGGER place_before_insert BEFORE INSERT ON place
     FOR EACH ROW EXECUTE PROCEDURE place_insert();
 
 drop index idx_placex_sector;
-CREATE INDEX idx_placex_sector ON placex USING BTREE (geometry_sector,rank_address,osm_type,osm_id);
+CREATE INDEX idx_placex_sector ON placex USING BTREE (geometry_sector,rank_address,osm_type,osm_id) TABLESPACE ssd;
 
 DROP SEQUENCE seq_postcodes;
 CREATE SEQUENCE seq_postcodes start 1;
@@ -255,7 +240,7 @@ CREATE TABLE import_polygon_error (
   );
 SELECT AddGeometryColumn('import_polygon_error', 'prevgeometry', 4326, 'GEOMETRY', 2);
 SELECT AddGeometryColumn('import_polygon_error', 'newgeometry', 4326, 'GEOMETRY', 2);
-CREATE INDEX idx_import_polygon_error_osmid ON import_polygon_error USING BTREE (osm_type, osm_id);
+CREATE INDEX idx_import_polygon_error_osmid ON import_polygon_error USING BTREE (osm_type, osm_id) TABLESPACE ssd;
 GRANT SELECT ON import_polygon_error TO "www-data";
 
 drop table import_polygon_delete;
@@ -265,7 +250,7 @@ CREATE TABLE import_polygon_delete (
   class TEXT NOT NULL,
   type TEXT NOT NULL
   );
-CREATE INDEX idx_import_polygon_delete_osmid ON import_polygon_delete USING BTREE (osm_type, osm_id);
+CREATE INDEX idx_import_polygon_delete_osmid ON import_polygon_delete USING BTREE (osm_type, osm_id) TABLESPACE ssd;
 GRANT SELECT ON import_polygon_delete TO "www-data";
 
 drop sequence file;
index a0f4efc799e797998908e51149e1e803088d4c2b..d6ec1833a9fbc359ef5c66f54b73267ec037d013 100644 (file)
@@ -3,10 +3,10 @@ CREATE UNIQUE INDEX idx_location_property_tiger_place_id_imp ON location_propert
 
 GRANT SELECT ON location_property_tiger_import TO "www-data";
 
-DROP TABLE location_property_tiger;
-ALTER TABLE location_property_tiger_import RENAME TO location_property_tiger;
+--DROP TABLE location_property_tiger;
+--ALTER TABLE location_property_tiger_import RENAME TO location_property_tiger;
 
-ALTER INDEX idx_location_property_tiger_housenumber_parent_place_id_imp RENAME TO idx_location_property_tiger_housenumber_parent_place_id;
-ALTER INDEX idx_location_property_tiger_place_id_imp RENAME TO idx_location_property_tiger_place_id;
+--ALTER INDEX idx_location_property_tiger_housenumber_parent_place_id_imp RENAME TO idx_location_property_tiger_housenumber_parent_place_id;
+--ALTER INDEX idx_location_property_tiger_place_id_imp RENAME TO idx_location_property_tiger_place_id;
 
 DROP FUNCTION tigger_create_interpolation (linegeo geometry, in_startnumber integer, in_endnumber integer, interpolationtype text, in_street text, in_isin text, in_postcode text);
diff --git a/utils/cron_banip.py b/utils/cron_banip.py
new file mode 100755 (executable)
index 0000000..53f5e5f
--- /dev/null
@@ -0,0 +1,243 @@
+#!/usr/bin/python
+#
+# Search logs for high-bandwith users and create a list of suspicious IPs.
+# There are three states: bulk, block, ban. The first are bulk requesters
+# that need throtteling, the second bulk requesters that have overdone it
+# and the last manually banned IPs.
+#
+# The list can then be used in apache using rewrite rules to
+# direct bulk users to smaller thread pools or block them. A
+# typical apache config that uses php-fpm pools would look
+# like this:
+#
+#    Alias /nominatim-www/ "/var/www/nominatim/"
+#    Alias /nominatim-bulk/ "/var/www/nominatim/"
+#    <Directory "/var/www/nominatim/">
+#        Options MultiViews FollowSymLinks
+#        AddType text/html   .php
+#    </Directory>
+#
+#    <Location /nominatim-www>
+#        AddHandler fcgi:/var/run/php5-fpm-www.sock .php
+#    </Location>
+#    <Location /nominatim-bulk>
+#        AddHandler fcgi:/var/run/php5-fpm-bulk.sock .php
+#    </Location>
+#
+#    Redirect 509 /nominatim-block/
+#    ErrorDocument 509 "Bandwidth limit exceeded."
+#    Redirect 403 /nominatim-ban/
+#    ErrorDocument 403 "Access blocked."
+#
+#    RewriteEngine On
+#    RewriteMap bulklist txt:/home/wherever/ip-block.map
+#    RewriteRule ^/(.*) /nominatim-${bulklist:%{REMOTE_ADDR}|www}/$1 [PT]
+#
+
+import os
+import psycopg2
+import datetime
+
+BASEDIR = os.path.normpath(os.path.join(os.path.realpath(__file__), '../..'))
+
+#
+# DEFAULT SETTINGS
+#
+# Copy into settings/ip_blcoks.conf and adapt as required.
+#
+BLOCKEDFILE= BASEDIR + '/settings/ip_blocks.map'
+LOGFILE= BASEDIR + '/log/restricted_ip.log'
+
+# space-separated list of IPs that are never banned
+WHITELIST = ''
+# space-separated list of IPs manually blocked
+BLACKLIST = ''
+# user-agents that should be blocked from bulk mode
+# (matched with startswith)
+UA_BLOCKLIST = ()
+
+# time before a automatically blocked IP is allowed back
+BLOCKCOOLOFF_PERIOD='1 hour'
+# quiet time before an IP is released from the bulk pool
+BULKCOOLOFF_PERIOD='15 min'
+
+BULKLONG_LIMIT=8000
+BULKSHORT_LIMIT=2000
+BLOCK_UPPER=19000
+BLOCK_LOWER=4000
+BLOCK_LOADFAC=380
+BULK_LOADFAC=160
+BULK_LOWER=1500
+MAX_BULK_IPS=85
+
+#
+# END OF DEFAULT SETTINGS
+#
+
+try:
+    execfile(os.path.expanduser(BASEDIR + "/settings/ip_blocks.conf"))
+except IOError:
+    pass
+
+# read the previous blocklist
+WHITELIST = set(WHITELIST.split()) if WHITELIST else set()
+prevblocks = []
+prevbulks = []
+BLACKLIST = set(BLACKLIST.split()) if BLACKLIST else set()
+newblocks = set()
+newbulks = set()
+
+try:
+    fd = open(BLOCKEDFILE)
+    for line in fd:
+        ip, typ = line.strip().split(' ')
+        if ip not in BLACKLIST:
+            if typ == 'block':
+                prevblocks.append(ip)
+            elif typ == 'bulk':
+                prevbulks.append(ip)
+    fd.close()
+except IOError:
+    pass #ignore non-existing file
+
+# determine current load
+fd = open("/proc/loadavg")
+avgload = int(float(fd.readline().split()[2]))
+fd.close()
+# DB load
+conn = psycopg2.connect('dbname=nominatim')
+cur = conn.cursor()
+cur.execute("select count(*)/60 from new_query_log where starttime > now() - interval '1min'")
+dbload = int(cur.fetchone()[0])
+
+BLOCK_LIMIT = max(BLOCK_LOWER, BLOCK_UPPER - BLOCK_LOADFAC * (dbload - 75))
+BULKLONG_LIMIT = max(BULK_LOWER, BULKLONG_LIMIT - BULK_LOADFAC * (avgload - 14))
+if len(prevbulks) > MAX_BULK_IPS:
+    BLOCK_LIMIT = max(3600, BLOCK_LOWER - (len(prevbulks) - MAX_BULK_IPS)*10)
+# if the bulk pool is still empty, clients will be faster, avoid having
+# them blocked in this case
+if len(prevbulks) < 10:
+    BLOCK_LIMIT = 2*BLOCK_UPPER
+
+
+# get the new block candidates
+cur.execute("""
+  SELECT ipaddress, max(count), max(ua) FROM
+   ((SELECT * FROM
+     (SELECT ipaddress, sum(case when endtime is null then 1 else 1+1.5*date_part('epoch',endtime-starttime) end) as count, substring(max(useragent) from 1 for 30) as ua FROM new_query_log
+      WHERE starttime > now() - interval '1 hour' GROUP BY ipaddress) as i
+   WHERE count > %s)
+   UNION
+   (SELECT ipaddress, count * 3, ua FROM
+     (SELECT ipaddress, sum(case when endtime is null then 1 else 1+1.5*date_part('epoch',endtime-starttime) end) as count, substring(max(useragent) from 1 for 30) as ua FROM new_query_log 
+      WHERE starttime > now() - interval '10 min' GROUP BY ipaddress) as i
+   WHERE count > %s)) as o
+  GROUP BY ipaddress
+""", (BULKLONG_LIMIT, BULKSHORT_LIMIT))
+
+bulkips = {}
+emergencyblocks = []
+useragentblocks = []
+
+for c in cur:
+    if c[0] not in WHITELIST and c[0] not in BLACKLIST:
+        # check for user agents that receive an immediate block
+        missing_agent = not c[2]
+        if not missing_agent:
+            for ua in UA_BLOCKLIST:
+                if c[2].startswith(ua):
+                    missing_agent = True
+                    break
+        if (missing_agent or c[1] > BLOCK_UPPER) and c[0] not in prevblocks:
+            newblocks.add(c[0])
+            if missing_agent:
+                useragentblocks.append(c[0])
+            else:
+                emergencyblocks.append(c[0])
+        else:
+            bulkips[c[0]] = c[1]
+
+# IPs from the block list that are no longer in the bulk list
+deblockcandidates = set()
+# IPs from the bulk list that are no longer in the bulk list
+debulkcandidates = set()
+# new IPs to go into the block list
+newlyblocked = []
+
+
+for ip in prevblocks:
+    if ip in bulkips:
+        newblocks.add(ip)
+        del bulkips[ip]
+    else:
+        deblockcandidates.add(ip)    
+        
+for ip in prevbulks:
+    if ip not in newblocks:
+        if ip in bulkips:
+            if bulkips[ip] > BLOCK_LIMIT:
+                newblocks.add(ip)
+                newlyblocked.append(ip)
+            else:
+                newbulks.add(ip)
+            del bulkips[ip]
+        else:
+            debulkcandidates.add(ip)
+
+# cross-check deblock candidates
+if deblockcandidates:
+    cur.execute("""
+        SELECT DISTINCT ipaddress FROM new_query_log
+        WHERE ipaddress IN ('%s') AND starttime > now() - interval '%s'
+        """ % ("','".join(deblockcandidates), BLOCKCOOLOFF_PERIOD))
+
+    for c in cur:
+        newblocks.add(c[0])
+        deblockcandidates.remove(c[0])
+# deblocked IPs go back to the bulk pool to catch the ones that simply
+# ignored the HTTP error and just continue to hammer the API.
+# Those that behave and stopped will be debulked a minute later.
+for ip in deblockcandidates:
+    newbulks.add(ip)
+
+# cross-check debulk candidates
+if debulkcandidates:
+    cur.execute("""
+        SELECT DISTINCT ipaddress FROM new_query_log
+        WHERE ipaddress IN ('%s') AND starttime > now() - interval '%s'
+        AND starttime > date_trunc('day', now())
+        """ % ("','".join(debulkcandidates), BULKCOOLOFF_PERIOD))
+
+    for c in cur:
+        newbulks.add(c[0])
+        debulkcandidates.remove(c[0])
+
+for ip in bulkips.iterkeys():
+    newbulks.add(ip)
+
+# write out the new list
+fd = open(BLOCKEDFILE, 'w')
+for ip in newblocks:
+    fd.write(ip + " block\n")
+for ip in newbulks:
+    fd.write(ip + " bulk\n")
+for ip in BLACKLIST:
+    fd.write(ip + " ban\n")
+fd.close()
+
+# write out the log
+logstr = datetime.datetime.now().strftime('%Y-%m-%d %H:%M') + ' %s %s\n'
+fd = open(LOGFILE, 'a')
+if deblockcandidates:
+    fd.write(logstr % ('unblocked:', ', '.join(deblockcandidates)))
+if debulkcandidates:
+    fd.write(logstr % (' debulked:', ', '.join(debulkcandidates)))
+if bulkips:
+    fd.write(logstr % ('new bulks:', ', '.join(bulkips.keys())))
+if emergencyblocks:
+    fd.write(logstr % ('dir.block:', ', '.join(emergencyblocks)))
+if useragentblocks:
+    fd.write(logstr % (' ua block:', ', '.join(useragentblocks)))
+if newlyblocked:
+    fd.write(logstr % ('new block:', ', '.join(newlyblocked)))
+fd.close()
diff --git a/utils/cron_logrotate.sh b/utils/cron_logrotate.sh
new file mode 100755 (executable)
index 0000000..7d3ca4a
--- /dev/null
@@ -0,0 +1,20 @@
+#!/bin/bash -e
+#
+# Rotate query logs.
+
+dbname=nominatim
+
+basedir=`dirname $0`
+logfile=`date "+$basedir/../log/query-%F.log.gz"`
+
+# dump the old logfile
+pg_dump -a -F p -t backup_query_log $dbname | gzip -9 > $logfile
+
+# remove the old logs
+psql -q -d $dbname -c 'DROP TABLE backup_query_log'
+
+# rotate
+psql -q -1 -d $dbname -c 'ALTER TABLE new_query_log RENAME TO backup_query_log;CREATE TABLE new_query_log TABLESPACE ssd2 as (select * from backup_query_log limit 0);GRANT SELECT, INSERT, UPDATE ON new_query_log TO "www-data"'
+psql -q -d $dbname -c 'ALTER INDEX idx_new_query_log_starttime RENAME TO idx_backup_query_log_starttime'
+psql -q -d $dbname -c 'CREATE INDEX idx_new_query_log_starttime ON new_query_log USING BTREE (starttime) TABLESPACE ssd2'
+
diff --git a/utils/cron_vacuum.sh b/utils/cron_vacuum.sh
new file mode 100755 (executable)
index 0000000..4c16fc6
--- /dev/null
@@ -0,0 +1,14 @@
+#!/bin/bash
+#
+# Vaccum all tables with indices on integer arrays.
+# Agressive vacuuming seems to help against index bloat.
+#
+
+psql -q -d nominatim -c 'VACUUM ANALYSE search_name'
+psql -q -d nominatim -c 'VACUUM ANALYSE search_name_country'
+#psql -q -d nominatim -c 'VACUUM ANALYSE planet_osm_ways'
+
+for i in `seq 0 246`; do
+  psql -q -d nominatim -c "VACUUM ANALYSE search_name_${i}"
+done
+
index 71e7dab59810573a7699cff5fd2eb980b8703e79..870d37eca3a5de45c7d275375bab04583bc28f99 100755 (executable)
                {
                        $osm2pgsql .= ' --flat-nodes '.CONST_Osm2pgsql_Flatnode_File;
                }
+               $osm2pgsql .= ' --tablespace-slim-index ssd --tablespace-main-index ssd --tablespace-main-data ssd --tablespace-slim-data data';
                $osm2pgsql .= ' -lsc -O gazetteer --hstore';
-               $osm2pgsql .= ' -C '.$iCacheMemory;
+               $osm2pgsql .= ' -C 18000';
                $osm2pgsql .= ' -P '.$aDSNInfo['port'];
                $osm2pgsql .= ' -d '.$aDSNInfo['database'].' '.$aCMDResult['osm-file'];
                passthruCheckReturn($osm2pgsql);
                $sSQL .= "select 'P',nextval('seq_postcodes'),'place','postcode',postcode,calculated_country_code,";
                $sSQL .= "ST_SetSRID(ST_Point(x,y),4326) as geometry from (select calculated_country_code,postcode,";
                $sSQL .= "avg(st_x(st_centroid(geometry))) as x,avg(st_y(st_centroid(geometry))) as y ";
-               $sSQL .= "from placex where postcode is not null group by calculated_country_code,postcode) as x";
+               $sSQL .= "from placex where postcode is not null and calculated_country_code not in ('ie') group by calculated_country_code,postcode) as x";
                if (!pg_query($oDB->connection, $sSQL)) fail(pg_last_error($oDB->connection));
 
                $sSQL = "insert into placex (osm_type,osm_id,class,type,postcode,calculated_country_code,geometry) ";
index f1a0d0d078035c75d4168922f2e9612cc7516592..02aaaa0fb140804a411bd840abf0c6f7aff84488 100755 (executable)
                        echo "class = '".pg_escape_string($aPair[0])."' and type = '".pg_escape_string($aPair[1])."';\n";
 
                        echo "CREATE INDEX idx_place_classtype_".pg_escape_string($aPair[0])."_".pg_escape_string($aPair[1])."_centroid ";
-                       echo "ON place_classtype_".pg_escape_string($aPair[0])."_".pg_escape_string($aPair[1])." USING GIST (centroid);\n";
+                       echo "ON place_classtype_".pg_escape_string($aPair[0])."_".pg_escape_string($aPair[1])." USING GIST (centroid) tablespace ssd;\n";
 
                        echo "CREATE INDEX idx_place_classtype_".pg_escape_string($aPair[0])."_".pg_escape_string($aPair[1])."_place_id ";
-                       echo "ON place_classtype_".pg_escape_string($aPair[0])."_".pg_escape_string($aPair[1])." USING btree(place_id);\n";
+                       echo "ON place_classtype_".pg_escape_string($aPair[0])."_".pg_escape_string($aPair[1])." USING btree(place_id) tablespace ssd;\n";
 
-            echo "GRANT SELECT ON place_classtype_".pg_escape_string($aPair[0])."_".pg_escape_string($aPair[1])." TO \"www-data\";";
+            echo "GRANT SELECT ON place_classtype_".pg_escape_string($aPair[0])."_".pg_escape_string($aPair[1])." TO \"www-data\";\n";
 
                }
 
index c5fc14a672fa1dbc6f7e8f87066b023a70ce6947..a561aaa3656cc88ebf9099027bda7c63412425c4 100755 (executable)
@@ -47,7 +47,6 @@
                showUsage($aCMDOptions, true, 'Select either import of hourly or daily');
        }
 
-       if (!isset($aResult['index-instances'])) $aResult['index-instances'] = 1;
        if (!isset($aResult['index-rank'])) $aResult['index-rank'] = 0;
 
 /*
 
        if ($aResult['index'])
        {
+               if (!isset($aResult['index-instances'])) $aResult['index-instances'] = 1;
                passthru(CONST_BasePath.'/nominatim/nominatim -i -d '.$aDSNInfo['database'].' -P '.$aDSNInfo['port'].' -t '.$aResult['index-instances'].' -r '.$aResult['index-rank']);
        }
 
                $sCMDDownload = $sOsmosisCMD.' --read-replication-interval workingDirectory='.$sOsmosisConfigDirectory.' --simplify-change --write-xml-change '.$sImportFile;
                $sCMDCheckReplicationLag = $sOsmosisCMD.' -q --read-replication-lag workingDirectory='.$sOsmosisConfigDirectory;
                $sCMDImport = $sOsm2pgsqlCmd.' '.$sImportFile;
-               $sCMDIndex = $sBasePath.'/nominatim/nominatim -i -d '.$aDSNInfo['database'].' -P '.$aDSNInfo['port'].' -t '.$aResult['index-instances'];
+               $sCMDIndex = $sBasePath.'/nominatim/nominatim -i -d '.$aDSNInfo['database'].' -P '.$aDSNInfo['port'];
                if (!$aResult['no-npi']) {
                        $sCMDIndex .= '-F ';
                }
                        $sBatchEnd = getosmosistimestamp($sOsmosisConfigDirectory);
 
                        // Index file
-                       $sThisIndexCmd = $sCMDIndex;
+                       if (!isset($aResult['index-instances']))
+                       {
+                               if (getLoadAverage() < 24)
+                                       $iIndexInstances = 2;
+                               else
+                                       $iIndexInstances = 1;
+                       } else
+                               $iIndexInstances = $aResult['index-instances'];
+
+                       $sThisIndexCmd = $sCMDIndex.' -t '.$iIndexInstances;
                        $fCMDStartTime = time();
 
                        if (!$aResult['no-npi'])
diff --git a/website/403.html b/website/403.html
new file mode 100644 (file)
index 0000000..44038a4
--- /dev/null
@@ -0,0 +1,15 @@
+<html>
+<head>
+<title>Access blocked</title>
+</head>
+<body>
+<h1>Access blocked</h1>
+
+<p>You have been blocked because you have been overusing OSM's geocoding service.
+Please be aware that OSM's resources are limited and shared between many users.
+To have this block lifted, contact the Nominatim system administrator at
+nominatim@openstreetmap.org.</p>
+
+<p>For more information, consult the <a href="http://wiki.openstreetmap.org/wiki/Nominatim_usage_policy">usage policy</a> for the OSM Nominatim server.
+</body>
+</head>
diff --git a/website/509.html b/website/509.html
new file mode 100644 (file)
index 0000000..1e67a5a
--- /dev/null
@@ -0,0 +1,12 @@
+<html>
+<head>
+<title>Bandwidth limit exceeded</title>
+</head>
+<body>
+<h1>Bandwidth limit exceeded</h1>
+
+<p>You have been temporarily blocked because you have been overusing OSM's geocoding service or because you have not provided sufficient identification of your application. This block will be automatically lifted after a while. Please take the time and adapt your scripts to reduce the number of requests and make sure that you send a valid UserAgent or Referer.</p>
+
+<p>For more information, consult the <a href="http://wiki.openstreetmap.org/wiki/Nominatim_usage_policy">usage policy</a> for the OSM Nominatim server.
+</body>
+</head>
diff --git a/website/crossdomain.xml b/website/crossdomain.xml
new file mode 100644 (file)
index 0000000..963a682
--- /dev/null
@@ -0,0 +1,5 @@
+<?xml version="1.0"?>
+           <!DOCTYPE cross-domain-policy SYSTEM "http://www.macromedia.com/xml/dtds/cross-domain-policy.dtd">
+           <cross-domain-policy>
+           <allow-access-from domain="*" />
+           </cross-domain-policy> 
diff --git a/website/favicon.ico b/website/favicon.ico
new file mode 100644 (file)
index 0000000..0157ea0
Binary files /dev/null and b/website/favicon.ico differ
diff --git a/website/nominatim.xml b/website/nominatim.xml
new file mode 100644 (file)
index 0000000..28684b1
--- /dev/null
@@ -0,0 +1,15 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<OpenSearchDescription xmlns="http://a9.com/-/spec/opensearch/1.1/"
+                       xmlns:moz="http://www.mozilla.org/2006/browser/search/">
+       <ShortName>Nominatim</ShortName>
+       <LongName>Nominatim OSM Search</LongName>
+       <Description>Search for a place in OpenStreetMap Nominatim</Description>
+       <InputEncoding>UTF-8</InputEncoding>
+       <OutputEncoding>UTF-8</OutputEncoding>
+       <Url type="text/html" method="get" template="http://nominatim.openstreetmap.org/search/?q={searchTerms}" />
+       <Query role="example" searchTerms="Reigate" />
+       <Developer>Brian Quinion</Developer>
+       <AdultContent>false</AdultContent>
+       <Attribution>Data &amp;copy; OpenStreetMap contributors, Some Rights Reserved. ODbL, http://www.osm.org/copyright.</Attribution>
+</OpenSearchDescription>
+
index ee6a6100c8a4da0e4c6addd6058b0c951f66ecf3..ed877899faa672aa0160539840bf3b9fc6afbda3 100755 (executable)
@@ -6,18 +6,6 @@
        require_once(CONST_BasePath.'/lib/PlaceLookup.php');
        require_once(CONST_BasePath.'/lib/ReverseGeocode.php');
 
-       if (strpos(CONST_BulkUserIPs, ','.$_SERVER["REMOTE_ADDR"].',') !== false)
-       {
-               $fLoadAvg = getLoadAverage();
-               if ($fLoadAvg > 2) sleep(60);
-               if ($fLoadAvg > 4) sleep(120);
-               if ($fLoadAvg > 6)
-               {
-                       echo "Bulk User: Temporary block due to high server load\n";
-                       exit;
-               }
-       }
-
        $oDB =& getDB();
        ini_set('memory_limit', '200M');
 
diff --git a/website/robots.txt b/website/robots.txt
new file mode 100644 (file)
index 0000000..e4d3d3f
--- /dev/null
@@ -0,0 +1,10 @@
+User-agent: ia_archiver
+Allow: /
+
+User-agent: *
+Disallow: /search.php
+Disallow: /search
+Disallow: /details.php
+Disallow: /details
+Disallow: /reverse.php
+Disallow: /reverse
index 872d9801761a58664fa25b7221a47ceb9aa2f677..a3a926fd951f440c343857884bed77937b0bb6e5 100755 (executable)
        $aLangPrefOrder = getPreferredLanguages();
        $oGeocode->setLanguagePreference($aLangPrefOrder);
 
+    /*
        if (isset($aLangPrefOrder['name:de'])) $oGeocode->setReverseInPlan(true);
        if (isset($aLangPrefOrder['name:ru'])) $oGeocode->setReverseInPlan(true);
        if (isset($aLangPrefOrder['name:ja'])) $oGeocode->setReverseInPlan(true);
        if (isset($aLangPrefOrder['name:pl'])) $oGeocode->setReverseInPlan(true);
+    */
 
        // Format for output
        $sOutputFormat = 'html';