From: Sarah Hoffmann Date: Fri, 7 Feb 2014 22:42:53 +0000 (+0100) Subject: Merge remote-tracking branch 'upstream/master' X-Git-Tag: deploy~552 X-Git-Url: https://git.openstreetmap.org/nominatim.git/commitdiff_plain/616d96bf9cf48b1cc61c902d79fa85fdc977970a?hp=98b93df83d35f9062c85a5c93692206dfcbfa92d Merge remote-tracking branch 'upstream/master' --- diff --git a/lib/Geocode.php b/lib/Geocode.php index 132be9a5..532a7720 100644 --- a/lib/Geocode.php +++ b/lib/Geocode.php @@ -15,7 +15,7 @@ protected $aExcludePlaceIDs = array(); protected $bDeDupe = true; - protected $bReverseInPlan = false; + protected $bReverseInPlan = true; protected $iLimit = 20; protected $iFinalLimit = 10; @@ -27,6 +27,8 @@ protected $bBoundedSearch = false; protected $aViewBox = false; + protected $sViewboxSmallSQL = false; + protected $sViewboxLargeSQL = false; protected $aRoutePoints = false; protected $iMaxRank = 20; @@ -278,12 +280,16 @@ // Get the details for display (is this a redundant extra step?) $sPlaceIDs = join(',',$aPlaceIDs); + $sImportanceSQL = ''; + if ($this->sViewboxSmallSQL) $sImportanceSQL .= " case when ST_Contains($this->sViewboxSmallSQL, ST_Collect(centroid)) THEN 1 ELSE 0.75 END * "; + if ($this->sViewboxLargeSQL) $sImportanceSQL .= " case when ST_Contains($this->sViewboxLargeSQL, ST_Collect(centroid)) THEN 1 ELSE 0.75 END * "; + $sSQL = "select osm_type,osm_id,class,type,admin_level,rank_search,rank_address,min(place_id) as place_id, min(parent_place_id) as parent_place_id, calculated_country_code as country_code,"; $sSQL .= "get_address_by_language(place_id, $sLanguagePrefArraySQL) as langaddress,"; $sSQL .= "get_name_by_language(name, $sLanguagePrefArraySQL) as placename,"; $sSQL .= "get_name_by_language(name, ARRAY['ref']) as ref,"; $sSQL .= "avg(ST_X(centroid)) as lon,avg(ST_Y(centroid)) as lat, "; - $sSQL .= "coalesce(importance,0.75-(rank_search::float/40)) as importance, "; + $sSQL .= $sImportanceSQL."coalesce(importance,0.75-(rank_search::float/40)) as importance, "; $sSQL .= "(select max(p.importance*(p.rank_address+2)) from place_addressline s, placex p where s.place_id = min(CASE WHEN placex.rank_search < 28 THEN placex.place_id ELSE placex.parent_place_id END) and p.place_id = s.address_place_id and s.isaddress and p.importance is not null) as addressimportance, "; $sSQL .= "(extratags->'place') as extra_place "; $sSQL .= "from placex where place_id in ($sPlaceIDs) "; @@ -308,20 +314,21 @@ $sSQL .= "null as placename,"; $sSQL .= "null as ref,"; $sSQL .= "avg(ST_X(centroid)) as lon,avg(ST_Y(centroid)) as lat, "; - $sSQL .= "-0.15 as importance, "; + $sSQL .= $sImportanceSQL."-1.15 as importance, "; $sSQL .= "(select max(p.importance*(p.rank_address+2)) from place_addressline s, placex p where s.place_id = min(location_property_tiger.parent_place_id) and p.place_id = s.address_place_id and s.isaddress and p.importance is not null) as addressimportance, "; $sSQL .= "null as extra_place "; $sSQL .= "from location_property_tiger where place_id in ($sPlaceIDs) "; $sSQL .= "and 30 between $this->iMinAddressRank and $this->iMaxAddressRank "; $sSQL .= "group by place_id"; - if (!$this->bDeDupe) $sSQL .= ",place_id"; + if (!$this->bDeDupe) $sSQL .= ",place_id "; + /* $sSQL .= " union "; $sSQL .= "select 'L' as osm_type,place_id as osm_id,'place' as class,'house' as type,null as admin_level,30 as rank_search,30 as rank_address,min(place_id) as place_id, min(parent_place_id) as parent_place_id,'us' as country_code,"; $sSQL .= "get_address_by_language(place_id, $sLanguagePrefArraySQL) as langaddress,"; $sSQL .= "null as placename,"; $sSQL .= "null as ref,"; $sSQL .= "avg(ST_X(centroid)) as lon,avg(ST_Y(centroid)) as lat, "; - $sSQL .= "-0.10 as importance, "; + $sSQL .= $sImportanceSQL."-1.10 as importance, "; $sSQL .= "(select max(p.importance*(p.rank_address+2)) from place_addressline s, placex p where s.place_id = min(location_property_aux.parent_place_id) and p.place_id = s.address_place_id and s.isaddress and p.importance is not null) as addressimportance, "; $sSQL .= "null as extra_place "; $sSQL .= "from location_property_aux where place_id in ($sPlaceIDs) "; @@ -329,9 +336,10 @@ $sSQL .= "group by place_id"; if (!$this->bDeDupe) $sSQL .= ",place_id"; $sSQL .= ",get_address_by_language(place_id, $sLanguagePrefArraySQL) "; + */ } - $sSQL .= "order by importance desc"; + $sSQL .= " order by importance desc"; if (CONST_Debug) { echo "
"; var_dump($sSQL); } $aSearchResults = $this->oDB->getAll($sSQL); @@ -386,7 +394,8 @@ } // Hack to make it handle "new york, ny" (and variants) correctly - $sQuery = str_ireplace(array('New York, ny','new york, new york', 'New York ny','new york new york'), 'new york city, ny', $this->sQuery); + //$sQuery = str_ireplace(array('New York, ny','new york, new york', 'New York ny','new york new york'), 'new york city, ny', $this->sQuery); + $sQuery = $this->sQuery; // Conflicts between US state abreviations and various words for 'the' in different languages if (isset($this->aLangPrefOrder['name:en'])) @@ -397,7 +406,7 @@ } // View Box SQL - $sViewboxCentreSQL = $sViewboxSmallSQL = $sViewboxLargeSQL = false; + $sViewboxCentreSQL; $bBoundingBoxSearch = false; if ($this->aViewBox) { @@ -408,8 +417,8 @@ $aBigViewBox[1] = $this->aViewBox[1] + $fWidth; $aBigViewBox[3] = $this->aViewBox[3] - $fWidth; - $sViewboxSmallSQL = "ST_SetSRID(ST_MakeBox2D(ST_Point(".(float)$this->aViewBox[0].",".(float)$this->aViewBox[1]."),ST_Point(".(float)$this->aViewBox[2].",".(float)$this->aViewBox[3].")),4326)"; - $sViewboxLargeSQL = "ST_SetSRID(ST_MakeBox2D(ST_Point(".(float)$aBigViewBox[0].",".(float)$aBigViewBox[1]."),ST_Point(".(float)$aBigViewBox[2].",".(float)$aBigViewBox[3].")),4326)"; + $this->sViewboxSmallSQL = "ST_SetSRID(ST_MakeBox2D(ST_Point(".(float)$this->aViewBox[0].",".(float)$this->aViewBox[1]."),ST_Point(".(float)$this->aViewBox[2].",".(float)$this->aViewBox[3].")),4326)"; + $this->sViewboxLargeSQL = "ST_SetSRID(ST_MakeBox2D(ST_Point(".(float)$aBigViewBox[0].",".(float)$aBigViewBox[1]."),ST_Point(".(float)$aBigViewBox[2].",".(float)$aBigViewBox[3].")),4326)"; $bBoundingBoxSearch = $this->bBoundedSearch; } @@ -427,20 +436,20 @@ $sViewboxCentreSQL .= ")'::geometry,4326)"; $sSQL = "select st_buffer(".$sViewboxCentreSQL.",".(float)($_GET['routewidth']/69).")"; - $sViewboxSmallSQL = $this->oDB->getOne($sSQL); - if (PEAR::isError($sViewboxSmallSQL)) + $this->sViewboxSmallSQL = $this->oDB->getOne($sSQL); + if (PEAR::isError($this->sViewboxSmallSQL)) { - failInternalError("Could not get small viewbox.", $sSQL, $sViewboxSmallSQL); + failInternalError("Could not get small viewbox.", $sSQL, $this->sViewboxSmallSQL); } - $sViewboxSmallSQL = "'".$sViewboxSmallSQL."'::geometry"; + $this->sViewboxSmallSQL = "'".$this->sViewboxSmallSQL."'::geometry"; $sSQL = "select st_buffer(".$sViewboxCentreSQL.",".(float)($_GET['routewidth']/30).")"; - $sViewboxLargeSQL = $this->oDB->getOne($sSQL); - if (PEAR::isError($sViewboxLargeSQL)) + $this->sViewboxLargeSQL = $this->oDB->getOne($sSQL); + if (PEAR::isError($this->sViewboxLargeSQL)) { - failInternalError("Could not get large viewbox.", $sSQL, $sViewboxLargeSQL); + failInternalError("Could not get large viewbox.", $sSQL, $this->sViewboxLargeSQL); } - $sViewboxLargeSQL = "'".$sViewboxLargeSQL."'::geometry"; + $this->sViewboxLargeSQL = "'".$this->sViewboxLargeSQL."'::geometry"; $bBoundingBoxSearch = $this->bBoundedSearch; } @@ -811,6 +820,7 @@ $aSearch['sType'] = $aSearchTerm['type']; if (sizeof($aSearch['aName'])) $aSearch['sOperator'] = 'name'; else $aSearch['sOperator'] = 'near'; // near = in for the moment + if (strlen($aSearchTerm['operator']) == 0) $aSearch['iSearchRank'] += 1; // Do we have a shortcut id? if ($aSearch['sOperator'] == 'name') @@ -1090,7 +1100,7 @@ { $sSQL = "select place_id from place_classtype_".$aSearch['sClass']."_".$aSearch['sType']." ct"; if ($sCountryCodesSQL) $sSQL .= " join placex using (place_id)"; - $sSQL .= " where st_contains($sViewboxSmallSQL, ct.centroid)"; + $sSQL .= " where st_contains($this->sViewboxSmallSQL, ct.centroid)"; if ($sCountryCodesSQL) $sSQL .= " and calculated_country_code in ($sCountryCodesSQL)"; if (sizeof($this->aExcludePlaceIDs)) { @@ -1108,7 +1118,7 @@ { $sSQL = "select place_id from place_classtype_".$aSearch['sClass']."_".$aSearch['sType']." ct"; if ($sCountryCodesSQL) $sSQL .= " join placex using (place_id)"; - $sSQL .= " where st_contains($sViewboxLargeSQL, ct.centroid)"; + $sSQL .= " where st_contains($this->sViewboxLargeSQL, ct.centroid)"; if ($sCountryCodesSQL) $sSQL .= " and calculated_country_code in ($sCountryCodesSQL)"; if ($sViewboxCentreSQL) $sSQL .= " order by st_distance($sViewboxCentreSQL, ct.centroid) asc"; $sSQL .= " limit $this->iLimit"; @@ -1119,7 +1129,7 @@ else { $sSQL = "select place_id from placex where class='".$aSearch['sClass']."' and type='".$aSearch['sType']."'"; - $sSQL .= " and st_contains($sViewboxSmallSQL, geometry) and linked_place_id is null"; + $sSQL .= " and st_contains($this->sViewboxSmallSQL, geometry) and linked_place_id is null"; if ($sCountryCodesSQL) $sSQL .= " and calculated_country_code in ($sCountryCodesSQL)"; if ($sViewboxCentreSQL) $sSQL .= " order by st_distance($sViewboxCentreSQL, centroid) asc"; $sSQL .= " limit $this->iLimit"; @@ -1139,7 +1149,7 @@ // TODO: filter out the pointless search terms (2 letter name tokens and less) // they might be right - but they are just too darned expensive to run if (sizeof($aSearch['aName'])) $aTerms[] = "name_vector @> ARRAY[".join($aSearch['aName'],",")."]"; - if (sizeof($aSearch['aNameNonSearch'])) $aTerms[] = "array_cat(name_vector,ARRAY[]::integer[]) @> ARRAY[".join($aSearch['aNameNonSearch'],",")."]"; + //if (sizeof($aSearch['aNameNonSearch'])) $aTerms[] = "array_cat(name_vector,ARRAY[]::integer[]) @> ARRAY[".join($aSearch['aNameNonSearch'],",")."]"; if (sizeof($aSearch['aAddress']) && $aSearch['aName'] != $aSearch['aAddress']) { // For infrequent name terms disable index usage for address @@ -1147,12 +1157,13 @@ sizeof($aSearch['aName']) == 1 && $aWordFrequencyScores[$aSearch['aName'][reset($aSearch['aName'])]] < CONST_Search_NameOnlySearchFrequencyThreshold) { - $aTerms[] = "array_cat(nameaddress_vector,ARRAY[]::integer[]) @> ARRAY[".join(array_merge($aSearch['aAddress'],$aSearch['aAddressNonSearch']),",")."]"; + //$aTerms[] = "array_cat(nameaddress_vector,ARRAY[]::integer[]) @> ARRAY[".join(array_merge($aSearch['aAddress'],$aSearch['aAddressNonSearch']),",")."]"; + $aTerms[] = "array_cat(nameaddress_vector,ARRAY[]::integer[]) @> ARRAY[".join($aSearch['aAddress'],",")."]"; } else { $aTerms[] = "nameaddress_vector @> ARRAY[".join($aSearch['aAddress'],",")."]"; - if (sizeof($aSearch['aAddressNonSearch'])) $aTerms[] = "array_cat(nameaddress_vector,ARRAY[]::integer[]) @> ARRAY[".join($aSearch['aAddressNonSearch'],",")."]"; + //if (sizeof($aSearch['aAddressNonSearch'])) $aTerms[] = "array_cat(nameaddress_vector,ARRAY[]::integer[]) @> ARRAY[".join($aSearch['aAddressNonSearch'],",")."]"; } } if ($aSearch['sCountryCode']) $aTerms[] = "country_code = '".pg_escape_string($aSearch['sCountryCode'])."'"; @@ -1171,12 +1182,12 @@ $aTerms[] = "country_code in ($sCountryCodesSQL)"; } - if ($bBoundingBoxSearch) $aTerms[] = "centroid && $sViewboxSmallSQL"; + if ($bBoundingBoxSearch) $aTerms[] = "centroid && $this->sViewboxSmallSQL"; if ($sNearPointSQL) $aOrder[] = "ST_Distance($sNearPointSQL, centroid) asc"; $sImportanceSQL = '(case when importance = 0 OR importance IS NULL then 0.75-(search_rank::float/40) else importance end)'; - if ($sViewboxSmallSQL) $sImportanceSQL .= " * case when ST_Contains($sViewboxSmallSQL, centroid) THEN 1 ELSE 0.5 END"; - if ($sViewboxLargeSQL) $sImportanceSQL .= " * case when ST_Contains($sViewboxLargeSQL, centroid) THEN 1 ELSE 0.5 END"; + if ($this->sViewboxSmallSQL) $sImportanceSQL .= " * case when ST_Contains($this->sViewboxSmallSQL, centroid) THEN 1 ELSE 0.5 END"; + if ($this->sViewboxLargeSQL) $sImportanceSQL .= " * case when ST_Contains($this->sViewboxLargeSQL, centroid) THEN 1 ELSE 0.5 END"; $aOrder[] = "$sImportanceSQL DESC"; if (sizeof($aSearch['aFullNameAddress'])) { @@ -1240,6 +1251,7 @@ $aPlaceIDs = $this->oDB->getCol($sSQL); // If not try the aux fallback table + /* if (!sizeof($aPlaceIDs)) { $sSQL = "select place_id from location_property_aux where parent_place_id in (".$sPlaceIDs.") and housenumber = '".pg_escape_string($aSearch['sHouseNumber'])."'"; @@ -1251,6 +1263,7 @@ if (CONST_Debug) var_dump($sSQL); $aPlaceIDs = $this->oDB->getCol($sSQL); } + */ if (!sizeof($aPlaceIDs)) { @@ -1502,10 +1515,12 @@ { preg_match_all('/(-?[0-9.]+) (-?[0-9.]+)/',$aMatch[1],$aPolyPoints,PREG_SET_ORDER); } + /* elseif (preg_match('#MULTIPOLYGON\\(\\(\\(([- 0-9.,]+)#',$aPointPolygon['astext'],$aMatch)) { preg_match_all('/(-?[0-9.]+) (-?[0-9.]+)/',$aMatch[1],$aPolyPoints,PREG_SET_ORDER); } + */ elseif (preg_match('#POINT\\((-?[0-9.]+) (-?[0-9.]+)\\)#',$aPointPolygon['astext'],$aMatch)) { $fRadius = 0.01; diff --git a/lib/init-website.php b/lib/init-website.php index 6db83988..013aee4b 100644 --- a/lib/init-website.php +++ b/lib/init-website.php @@ -5,6 +5,7 @@ { header("Access-Control-Allow-Origin: *"); header("Access-Control-Allow-Methods: OPTIONS,GET"); + header("Access-Control-Max-Age: 8640000"); if (!empty($_SERVER['HTTP_ACCESS_CONTROL_REQUEST_HEADERS'])) { header("Access-Control-Allow-Headers: ".$_SERVER['HTTP_ACCESS_CONTROL_REQUEST_HEADERS']); @@ -12,53 +13,4 @@ } if ($_SERVER['REQUEST_METHOD'] == 'OPTIONS') exit; - if (CONST_ClosedForIndexing && strpos(CONST_ClosedForIndexingExceptionIPs, ','.$_SERVER["REMOTE_ADDR"].',') === false) - { - echo "Closed for re-indexing..."; - exit; - } - - $aBucketKeys = array(); - - if (isset($_SERVER["HTTP_REFERER"])) $aBucketKeys[] = str_replace('www.','',strtolower(parse_url($_SERVER["HTTP_REFERER"], PHP_URL_HOST))); - if (isset($_SERVER["REMOTE_ADDR"])) $aBucketKeys[] = $_SERVER["REMOTE_ADDR"]; - if (isset($_GET["email"])) $aBucketKeys[] = $_GET["email"]; - - $fBucketVal = doBucket($aBucketKeys, - (defined('CONST_ConnectionBucket_PageType')?constant('CONST_ConnectionBucket_Cost_'.CONST_ConnectionBucket_PageType):1) + user_busy_cost(), - CONST_ConnectionBucket_LeakRate, CONST_ConnectionBucket_BlockLimit); - - if ($fBucketVal > CONST_ConnectionBucket_WaitLimit && $fBucketVal < CONST_ConnectionBucket_BlockLimit) - { - $m = getBucketMemcache(); - $iCurrentSleeping = $m->increment('sleepCounter'); - if (false === $iCurrentSleeping) - { - $m->add('sleepCounter', 0); - $iCurrentSleeping = $m->increment('sleepCounter'); - } - if ($iCurrentSleeping >= CONST_ConnectionBucket_MaxSleeping || isBucketSleeping($aBucketKeys)) - { - // Too many threads sleeping already. This becomes a hard block. - $fBucketVal = doBucket($aBucketKeys, CONST_ConnectionBucket_BlockLimit, CONST_ConnectionBucket_LeakRate, CONST_ConnectionBucket_BlockLimit); - } - else - { - setBucketSleeping($aBucketKeys, true); - sleep(($fBucketVal - CONST_ConnectionBucket_WaitLimit)/CONST_ConnectionBucket_LeakRate); - $fBucketVal = doBucket($aBucketKeys, CONST_ConnectionBucket_LeakRate, CONST_ConnectionBucket_LeakRate, CONST_ConnectionBucket_BlockLimit); - setBucketSleeping($aBucketKeys, false); - } - $m->decrement('sleepCounter'); - } - - if (strpos(CONST_BlockedIPs, ','.$_SERVER["REMOTE_ADDR"].',') !== false || $fBucketVal >= CONST_ConnectionBucket_BlockLimit) - { - header("HTTP/1.0 429 Too Many Requests"); - echo "Your IP has been blocked. \n"; - echo CONST_BlockMessage; - exit; - } - - header('Content-type: text/html; charset=utf-8'); - + header('Content-type: text/html; charset=utf-8'); diff --git a/lib/lib.php b/lib/lib.php index b2f85ca2..482e9598 100644 --- a/lib/lib.php +++ b/lib/lib.php @@ -169,7 +169,7 @@ { $aResult = array(array(join(' ',$aWords))); $sFirstToken = ''; - if ($iDepth < 8) { + if ($iDepth < 7) { while(sizeof($aWords) > 1) { $sWord = array_shift($aWords); @@ -246,7 +246,13 @@ if (sizeof($aNearPostcodes)) { - return array(array('lat' => $aNearPostcodes[0]['lat'], 'lon' => $aNearPostcodes[0]['lon'], 'radius' => 0.005)); + $aPostcodes = array(); + foreach($aNearPostcodes as $aPostcode) + { + $aPostcodes[] = array('lat' => $aPostcode['lat'], 'lon' => $aPostcode['lon'], 'radius' => 0.005); + } + + return $aPostcodes; } return false; diff --git a/lib/log.php b/lib/log.php index b08f5383..501259d0 100644 --- a/lib/log.php +++ b/lib/log.php @@ -17,15 +17,15 @@ if (CONST_Log_DB) { - // Log - if ($sType == 'search') - { - $oDB->query('insert into query_log values ('.getDBQuoted($hLog[0]).','.getDBQuoted($hLog[3]).','.getDBQuoted($hLog[1]).')'); - } - - $sSQL = 'insert into new_query_log (type,starttime,query,ipaddress,useragent,language,format)'; + if (isset($_GET['email'])) + $sUserAgent = $_GET['email']; + elseif (isset($_SERVER['HTTP_REFERER'])) + $sUserAgent = $_SERVER['HTTP_REFERER']; + else + $sUserAgent = $_SERVER['HTTP_USER_AGENT']; + $sSQL = 'insert into new_query_log (type,starttime,query,ipaddress,useragent,language,format,searchterm)'; $sSQL .= ' values ('.getDBQuoted($sType).','.getDBQuoted($hLog[0]).','.getDBQuoted($hLog[2]); - $sSQL .= ','.getDBQuoted($hLog[1]).','.getDBQuoted($_SERVER['HTTP_USER_AGENT']).','.getDBQuoted(join(',',$aLanguageList)).','.getDBQuoted($sOutputFormat).')'; + $sSQL .= ','.getDBQuoted($hLog[1]).','.getDBQuoted($sUserAgent).','.getDBQuoted(join(',',$aLanguageList)).','.getDBQuoted($sOutputFormat).','.getDBQuoted($hLog[3]).')'; $oDB->query($sSQL); } @@ -59,12 +59,6 @@ if (CONST_Log_DB) { - $sSQL = 'update query_log set endtime = '.getDBQuoted($sEndTime).', results = '.$iNumResults; - $sSQL .= ' where starttime = '.getDBQuoted($hLog[0]); - $sSQL .= ' and ipaddress = '.getDBQuoted($hLog[1]); - $sSQL .= ' and query = '.getDBQuoted($hLog[3]); - $oDB->query($sSQL); - $sSQL = 'update new_query_log set endtime = '.getDBQuoted($sEndTime).', results = '.$iNumResults; $sSQL .= ' where starttime = '.getDBQuoted($hLog[0]); $sSQL .= ' and ipaddress = '.getDBQuoted($hLog[1]); diff --git a/munin/nominatim_throttled_ips b/munin/nominatim_throttled_ips new file mode 100755 index 00000000..2fa1d80e --- /dev/null +++ b/munin/nominatim_throttled_ips @@ -0,0 +1,28 @@ +#!/bin/sh +# +# Plugin to monitor the number of IPs in special pools +# +# Parameters: +# +# config (required) +# autoconf (optional - used by munin-config) +# + +if [ "$1" = "config" ]; then + + echo 'graph_title Restricted IPs' + echo 'graph_args -l 0' + echo 'graph_vlabel number of IPs' + echo 'graph_category nominatim' + echo 'bulk.label bulk' + echo 'bulk.draw AREA' + echo 'bulk.type GAUGE' + echo 'block.label blocked' + echo 'block.draw STACK' + echo 'block.type GAUGE' + exit 0 +fi + +BASEDIR="$(dirname "$(readlink -f "$0")")" + +cut -f 2 -d ' ' $BASEDIR/../settings/ip_blocks.map | uniq -c | sed 's:[[:space:]]*\([0-9]\+\) \(.*\):\2.value \1:' diff --git a/settings/settings.php b/settings/settings.php index fd50a6f0..11e450e9 100644 --- a/settings/settings.php +++ b/settings/settings.php @@ -52,13 +52,15 @@ // Website settings @define('CONST_NoAccessControl', true); - @define('CONST_ClosedForIndexing', false); - @define('CONST_ClosedForIndexingExceptionIPs', ''); @define('CONST_BlockedIPs', ''); + @define('CONST_IPBanFile', CONST_BasePath.'/settings/ip_blocks'); + @define('CONST_WhitelistedIPs', ''); + @define('CONST_BlockedUserAgents', ''); + @define('CONST_BlockReverseMaxLoad', 15); @define('CONST_BulkUserIPs', ''); @define('CONST_BlockMessage', ''); // additional info to show for blocked IPs - @define('CONST_Website_BaseURL', 'http://'.php_uname('n').'/'); + @define('CONST_Website_BaseURL', 'http://nominatim.openstreetmap.org/'); @define('CONST_Tile_Default', 'Mapnik'); @define('CONST_Default_Language', false); diff --git a/sql/functions.sql b/sql/functions.sql index 49a6e972..028a72b4 100644 --- a/sql/functions.sql +++ b/sql/functions.sql @@ -937,6 +937,11 @@ DECLARE BEGIN --DEBUG: RAISE WARNING '% %',NEW.osm_type,NEW.osm_id; + -- remove operator tag for most places, messes too much with search_name indexes + IF NEW.class not in ('amenity', 'shop') THEN + NEW.name := delete(NEW.name, 'operator'); + END IF; + -- just block these IF NEW.class in ('landuse','natural') and NEW.name is null THEN -- RAISE WARNING 'empty landuse %',NEW.osm_id; @@ -1357,13 +1362,26 @@ BEGIN NEW.centroid := null; -- reclaculate country and partition - IF NEW.rank_search >= 4 THEN - --NEW.calculated_country_code := lower(get_country_code(NEW.geometry, NEW.country_code)); - NEW.calculated_country_code := lower(get_country_code(place_centroid)); + IF NEW.rank_search = 4 THEN + -- for countries, believe the mapped country code, + -- so that we remain in the right partition if the boundaries + -- suddenly expand. + NEW.partition := get_partition(place_centroid, lower(NEW.country_code)); + IF NEW.partition = 0 THEN + NEW.calculated_country_code := lower(get_country_code(place_centroid)); + NEW.partition := get_partition(place_centroid, NEW.calculated_country_code); + ELSE + NEW.calculated_country_code := lower(NEW.country_code); + END IF; ELSE - NEW.calculated_country_code := NULL; + IF NEW.rank_search > 4 THEN + --NEW.calculated_country_code := lower(get_country_code(NEW.geometry, NEW.country_code)); + NEW.calculated_country_code := lower(get_country_code(place_centroid)); + ELSE + NEW.calculated_country_code := NULL; + END IF; + NEW.partition := get_partition(place_centroid, NEW.calculated_country_code); END IF; - NEW.partition := get_partition(place_centroid, NEW.calculated_country_code); NEW.geometry_sector := geometry_sector(NEW.partition, place_centroid); -- Adding ourselves to the list simplifies address calculations later @@ -2043,6 +2061,11 @@ BEGIN --DEBUG: RAISE WARNING '%', existingplacex; END IF; + -- remove operator tag for most places, messes too much with search_name indexes + IF NEW.class not in ('amenity', 'shop') THEN + NEW.name := delete(NEW.name, 'operator'); + END IF; + -- Just block these - lots and pointless IF NEW.class in ('landuse','natural') and NEW.name is null THEN RETURN null; @@ -2231,6 +2254,12 @@ BEGIN END IF; + -- refuse to update multiplpoygons with too many objects, too much of a performance hit + IF ST_NumGeometries(NEW.geometry) > 2000 THEN + RAISE WARNING 'Dropping update of % % because of geometry complexity.', NEW.osm_type, NEW.osm_id; + RETURN NULL; + END IF; + IF coalesce(existing.name::text, '') != coalesce(NEW.name::text, '') OR coalesce(existing.extratags::text, '') != coalesce(NEW.extratags::text, '') OR coalesce(existing.housenumber, '') != coalesce(NEW.housenumber, '') diff --git a/sql/indices.src.sql b/sql/indices.src.sql index ea57e74b..4dc23bd2 100644 --- a/sql/indices.src.sql +++ b/sql/indices.src.sql @@ -1,29 +1,29 @@ -- Indices used only during search and update. -- These indices are created only after the indexing process is done. -CREATE INDEX idx_word_word_id on word USING BTREE (word_id); +CREATE INDEX idx_word_word_id on word USING BTREE (word_id) TABLESPACE ssd; -CREATE INDEX idx_search_name_nameaddress_vector ON search_name USING GIN (nameaddress_vector) WITH (fastupdate = off); -CREATE INDEX idx_search_name_name_vector ON search_name USING GIN (name_vector) WITH (fastupdate = off); -CREATE INDEX idx_search_name_centroid ON search_name USING GIST (centroid); +CREATE INDEX idx_search_name_nameaddress_vector ON search_name USING GIN (nameaddress_vector) WITH (fastupdate = off) TABLESPACE ssd; +CREATE INDEX idx_search_name_name_vector ON search_name USING GIN (name_vector) WITH (fastupdate = off) TABLESPACE ssd; +CREATE INDEX idx_search_name_centroid ON search_name USING GIST (centroid) TABLESPACE ssd; -CREATE INDEX idx_place_addressline_address_place_id on place_addressline USING BTREE (address_place_id); +CREATE INDEX idx_place_addressline_address_place_id on place_addressline USING BTREE (address_place_id) TABLESPACE ssd; -CREATE INDEX idx_place_boundingbox_place_id on place_boundingbox USING BTREE (place_id); -CREATE INDEX idx_place_boundingbox_outline ON place_boundingbox USING GIST (outline); +CREATE INDEX idx_place_boundingbox_place_id on place_boundingbox USING BTREE (place_id) TABLESPACE ssd; +CREATE INDEX idx_place_boundingbox_outline ON place_boundingbox USING GIST (outline) TABLESPACE ssd; DROP INDEX IF EXISTS idx_placex_rank_search; -CREATE INDEX idx_placex_rank_search ON placex USING BTREE (rank_search); -CREATE INDEX idx_placex_rank_address ON placex USING BTREE (rank_address); -CREATE INDEX idx_placex_pendingsector ON placex USING BTREE (rank_search,geometry_sector) where indexed_status > 0; -CREATE INDEX idx_placex_parent_place_id ON placex USING BTREE (parent_place_id) where parent_place_id IS NOT NULL; -CREATE INDEX idx_placex_interpolation ON placex USING BTREE (geometry_sector) where indexed_status > 0 and class='place' and type='houses'; -CREATE INDEX idx_location_area_country_place_id ON location_area_country USING BTREE (place_id); +CREATE INDEX idx_placex_rank_search ON placex USING BTREE (rank_search) TABLESPACE ssd; +CREATE INDEX idx_placex_rank_address ON placex USING BTREE (rank_address) TABLESPACE ssd; +CREATE INDEX idx_placex_pendingsector ON placex USING BTREE (rank_search,geometry_sector) TABLESPACE ssd where indexed_status > 0; +CREATE INDEX idx_placex_parent_place_id ON placex USING BTREE (parent_place_id) TABLESPACE ssd where parent_place_id IS NOT NULL; +CREATE INDEX idx_placex_interpolation ON placex USING BTREE (geometry_sector) TABLESPACE ssd where indexed_status > 0 and class='place' and type='houses'; +CREATE INDEX idx_location_area_country_place_id ON location_area_country USING BTREE (place_id) TABLESPACE ssd; -CREATE INDEX idx_search_name_country_centroid ON search_name_country USING GIST (centroid); +CREATE INDEX idx_search_name_country_centroid ON search_name_country USING GIST (centroid) TABLESPACE ssd; -- start -CREATE INDEX idx_location_property_-partition-_centroid ON location_property_-partition- USING GIST (centroid); +CREATE INDEX idx_location_property_-partition-_centroid ON location_property_-partition- USING GIST (centroid) TABLESPACE ssd; -- end CREATE UNIQUE INDEX idx_place_osm_unique on place using btree(osm_id,osm_type,class,type); diff --git a/sql/partition-tables.src.sql b/sql/partition-tables.src.sql index 29e3d282..e0b1fae0 100644 --- a/sql/partition-tables.src.sql +++ b/sql/partition-tables.src.sql @@ -35,21 +35,21 @@ SELECT AddGeometryColumn('search_name_blank', 'centroid', 4326, 'GEOMETRY', 2); CREATE TABLE location_area_country () INHERITS (location_area_large); -CREATE INDEX idx_location_area_country_geometry ON location_area_country USING GIST (geometry); +CREATE INDEX idx_location_area_country_geometry ON location_area_country USING GIST (geometry) TABLESPACE ssd; CREATE TABLE search_name_country () INHERITS (search_name_blank); -CREATE INDEX idx_search_name_country_place_id ON search_name_country USING BTREE (place_id); -CREATE INDEX idx_search_name_country_name_vector ON search_name_country USING GIN (name_vector) WITH (fastupdate = off); +CREATE INDEX idx_search_name_country_place_id ON search_name_country USING BTREE (place_id) TABLESPACE ssd; +CREATE INDEX idx_search_name_country_name_vector ON search_name_country USING GIN (name_vector) WITH (fastupdate = off) TABLESPACE ssd; -- start CREATE TABLE location_area_large_-partition- () INHERITS (location_area_large); -CREATE INDEX idx_location_area_large_-partition-_place_id ON location_area_large_-partition- USING BTREE (place_id); -CREATE INDEX idx_location_area_large_-partition-_geometry ON location_area_large_-partition- USING GIST (geometry); +CREATE INDEX idx_location_area_large_-partition-_place_id ON location_area_large_-partition- USING BTREE (place_id) TABLESPACE ssd; +CREATE INDEX idx_location_area_large_-partition-_geometry ON location_area_large_-partition- USING GIST (geometry) TABLESPACE ssd; CREATE TABLE search_name_-partition- () INHERITS (search_name_blank); -CREATE INDEX idx_search_name_-partition-_place_id ON search_name_-partition- USING BTREE (place_id); -CREATE INDEX idx_search_name_-partition-_centroid ON search_name_-partition- USING GIST (centroid); -CREATE INDEX idx_search_name_-partition-_name_vector ON search_name_-partition- USING GIN (name_vector) WITH (fastupdate = off); +CREATE INDEX idx_search_name_-partition-_place_id ON search_name_-partition- USING BTREE (place_id) TABLESPACE ssd; +CREATE INDEX idx_search_name_-partition-_centroid ON search_name_-partition- USING GIST (centroid) TABLESPACE ssd; +CREATE INDEX idx_search_name_-partition-_name_vector ON search_name_-partition- USING GIN (name_vector) WITH (fastupdate = off) TABLESPACE ssd; CREATE TABLE location_property_-partition- () INHERITS (location_property); CREATE INDEX idx_location_property_-partition-_place_id ON location_property_-partition- USING BTREE (place_id); @@ -62,7 +62,7 @@ CREATE TABLE location_road_-partition- ( country_code VARCHAR(2) ); SELECT AddGeometryColumn('location_road_-partition-', 'geometry', 4326, 'GEOMETRY', 2); -CREATE INDEX idx_location_road_-partition-_geometry ON location_road_-partition- USING GIST (geometry); -CREATE INDEX idx_location_road_-partition-_place_id ON location_road_-partition- USING BTREE (place_id); +CREATE INDEX idx_location_road_-partition-_geometry ON location_road_-partition- USING GIST (geometry) TABLESPACE ssd; +CREATE INDEX idx_location_road_-partition-_place_id ON location_road_-partition- USING BTREE (place_id) TABLESPACE ssd; -- end diff --git a/sql/tables.sql b/sql/tables.sql index 038a373e..32ff5e87 100644 --- a/sql/tables.sql +++ b/sql/tables.sql @@ -23,19 +23,6 @@ CREATE TABLE import_npi_log ( event text ); ---drop table IF EXISTS query_log; -CREATE TABLE query_log ( - starttime timestamp, - query text, - ipaddress text, - endtime timestamp, - results integer - ); -CREATE INDEX idx_query_log ON query_log USING BTREE (starttime); -GRANT SELECT ON query_log TO "www-data" ; -GRANT INSERT ON query_log TO "www-data" ; -GRANT UPDATE ON query_log TO "www-data" ; - CREATE TABLE new_query_log ( type text, starttime timestamp, @@ -43,6 +30,7 @@ CREATE TABLE new_query_log ( useragent text, language text, query text, + searchterm text, endtime timestamp, results integer, format text, @@ -53,9 +41,6 @@ GRANT INSERT ON new_query_log TO "www-data" ; GRANT UPDATE ON new_query_log TO "www-data" ; GRANT SELECT ON new_query_log TO "www-data" ; -create view vw_search_query_log as SELECT substr(query, 1, 50) AS query, starttime, endtime - starttime AS duration, substr(useragent, 1, 20) as -useragent, language, results, ipaddress FROM new_query_log WHERE type = 'search' ORDER BY starttime DESC; - --drop table IF EXISTS report_log; CREATE TABLE report_log ( starttime timestamp, @@ -76,8 +61,8 @@ CREATE TABLE word ( country_code varchar(2), search_name_count INTEGER, operator TEXT - ); -CREATE INDEX idx_word_word_token on word USING BTREE (word_token); + ) TABLESPACE ssd; +CREATE INDEX idx_word_word_token on word USING BTREE (word_token) TABLESPACE ssd; GRANT SELECT ON word TO "www-data" ; DROP SEQUENCE seq_word; CREATE SEQUENCE seq_word start 1; @@ -132,7 +117,7 @@ CREATE TABLE search_name ( nameaddress_vector integer[] ); SELECT AddGeometryColumn('search_name', 'centroid', 4326, 'GEOMETRY', 2); -CREATE INDEX idx_search_name_place_id ON search_name USING BTREE (place_id); +CREATE INDEX idx_search_name_place_id ON search_name USING BTREE (place_id) TABLESPACE ssd; drop table IF EXISTS place_addressline; CREATE TABLE place_addressline ( @@ -142,8 +127,8 @@ CREATE TABLE place_addressline ( isaddress boolean, distance float, cached_rank_address integer - ); -CREATE INDEX idx_place_addressline_place_id on place_addressline USING BTREE (place_id); + ) TABLESPACE data; +CREATE INDEX idx_place_addressline_place_id on place_addressline USING BTREE (place_id) TABLESPACE ssd; drop table IF EXISTS place_boundingbox CASCADE; CREATE TABLE place_boundingbox ( @@ -196,14 +181,14 @@ CREATE TABLE placex ( wikipedia TEXT, -- calculated wikipedia article name (language:title) geometry_sector INTEGER, calculated_country_code varchar(2) - ); + ) TABLESPACE ssd; SELECT AddGeometryColumn('placex', 'centroid', 4326, 'GEOMETRY', 2); -CREATE UNIQUE INDEX idx_place_id ON placex USING BTREE (place_id); -CREATE INDEX idx_placex_osmid ON placex USING BTREE (osm_type, osm_id); -CREATE INDEX idx_placex_linked_place_id ON placex USING BTREE (linked_place_id); -CREATE INDEX idx_placex_rank_search ON placex USING BTREE (rank_search, geometry_sector); -CREATE INDEX idx_placex_geometry ON placex USING GIST (geometry); -CREATE INDEX idx_placex_adminname on placex USING BTREE (make_standard_name(name->'name'),rank_search) WHERE osm_type='N' and rank_search < 26; +CREATE UNIQUE INDEX idx_place_id ON placex USING BTREE (place_id) TABLESPACE ssd; +CREATE INDEX idx_placex_osmid ON placex USING BTREE (osm_type, osm_id) TABLESPACE ssd; +CREATE INDEX idx_placex_linked_place_id ON placex USING BTREE (linked_place_id) TABLESPACE ssd; +CREATE INDEX idx_placex_rank_search ON placex USING BTREE (rank_search, geometry_sector) TABLESPACE ssd; +CREATE INDEX idx_placex_geometry ON placex USING GIST (geometry) TABLESPACE ssd; +CREATE INDEX idx_placex_adminname on placex USING BTREE (make_standard_name(name->'name'),rank_search) TABLESPACE ssd WHERE osm_type='N' and rank_search < 26; --CREATE INDEX idx_placex_indexed ON placex USING BTREE (indexed); @@ -237,7 +222,7 @@ CREATE TRIGGER place_before_insert BEFORE INSERT ON place FOR EACH ROW EXECUTE PROCEDURE place_insert(); drop index idx_placex_sector; -CREATE INDEX idx_placex_sector ON placex USING BTREE (geometry_sector,rank_address,osm_type,osm_id); +CREATE INDEX idx_placex_sector ON placex USING BTREE (geometry_sector,rank_address,osm_type,osm_id) TABLESPACE ssd; DROP SEQUENCE seq_postcodes; CREATE SEQUENCE seq_postcodes start 1; @@ -255,7 +240,7 @@ CREATE TABLE import_polygon_error ( ); SELECT AddGeometryColumn('import_polygon_error', 'prevgeometry', 4326, 'GEOMETRY', 2); SELECT AddGeometryColumn('import_polygon_error', 'newgeometry', 4326, 'GEOMETRY', 2); -CREATE INDEX idx_import_polygon_error_osmid ON import_polygon_error USING BTREE (osm_type, osm_id); +CREATE INDEX idx_import_polygon_error_osmid ON import_polygon_error USING BTREE (osm_type, osm_id) TABLESPACE ssd; GRANT SELECT ON import_polygon_error TO "www-data"; drop table import_polygon_delete; @@ -265,7 +250,7 @@ CREATE TABLE import_polygon_delete ( class TEXT NOT NULL, type TEXT NOT NULL ); -CREATE INDEX idx_import_polygon_delete_osmid ON import_polygon_delete USING BTREE (osm_type, osm_id); +CREATE INDEX idx_import_polygon_delete_osmid ON import_polygon_delete USING BTREE (osm_type, osm_id) TABLESPACE ssd; GRANT SELECT ON import_polygon_delete TO "www-data"; drop sequence file; diff --git a/sql/tiger_import_finish.sql b/sql/tiger_import_finish.sql index a0f4efc7..d6ec1833 100644 --- a/sql/tiger_import_finish.sql +++ b/sql/tiger_import_finish.sql @@ -3,10 +3,10 @@ CREATE UNIQUE INDEX idx_location_property_tiger_place_id_imp ON location_propert GRANT SELECT ON location_property_tiger_import TO "www-data"; -DROP TABLE location_property_tiger; -ALTER TABLE location_property_tiger_import RENAME TO location_property_tiger; +--DROP TABLE location_property_tiger; +--ALTER TABLE location_property_tiger_import RENAME TO location_property_tiger; -ALTER INDEX idx_location_property_tiger_housenumber_parent_place_id_imp RENAME TO idx_location_property_tiger_housenumber_parent_place_id; -ALTER INDEX idx_location_property_tiger_place_id_imp RENAME TO idx_location_property_tiger_place_id; +--ALTER INDEX idx_location_property_tiger_housenumber_parent_place_id_imp RENAME TO idx_location_property_tiger_housenumber_parent_place_id; +--ALTER INDEX idx_location_property_tiger_place_id_imp RENAME TO idx_location_property_tiger_place_id; DROP FUNCTION tigger_create_interpolation (linegeo geometry, in_startnumber integer, in_endnumber integer, interpolationtype text, in_street text, in_isin text, in_postcode text); diff --git a/utils/cron_banip.py b/utils/cron_banip.py new file mode 100755 index 00000000..c8fdd78c --- /dev/null +++ b/utils/cron_banip.py @@ -0,0 +1,238 @@ +#!/usr/bin/python +# +# Search logs for high-bandwith users and create a list of suspicious IPs. +# There are three states: bulk, block, ban. The first are bulk requesters +# that need throtteling, the second bulk requesters that have overdone it +# and the last manually banned IPs. +# +# The list can then be used in apache using rewrite rules to +# direct bulk users to smaller thread pools or block them. A +# typical apache config that uses php-fpm pools would look +# like this: +# +# Alias /nominatim-www/ "/var/www/nominatim/" +# Alias /nominatim-bulk/ "/var/www/nominatim/" +# +# Options MultiViews FollowSymLinks +# AddType text/html .php +# +# +# +# AddHandler fcgi:/var/run/php5-fpm-www.sock .php +# +# +# AddHandler fcgi:/var/run/php5-fpm-bulk.sock .php +# +# +# Redirect 509 /nominatim-block/ +# ErrorDocument 509 "Bandwidth limit exceeded." +# Redirect 403 /nominatim-ban/ +# ErrorDocument 403 "Access blocked." +# +# RewriteEngine On +# RewriteMap bulklist txt:/home/wherever/ip-block.map +# RewriteRule ^/(.*) /nominatim-${bulklist:%{REMOTE_ADDR}|www}/$1 [PT] +# + +import os +import psycopg2 +import datetime + +BASEDIR = os.path.normpath(os.path.join(os.path.realpath(__file__), '../..')) + +# +# DEFAULT SETTINGS +# +# Copy into settings/ip_blcoks.conf and adapt as required. +# +BLOCKEDFILE= BASEDIR + '/settings/ip_blocks.map' +LOGFILE= BASEDIR + '/log/restricted_ip.log' + +# space-separated list of IPs that are never banned +WHITELIST = '' +# space-separated list of IPs manually blocked +BLACKLIST = '' +# user-agents that should be blocked from bulk mode +# (matched with startswith) +UA_BLOCKLIST = () + +# time before a automatically blocked IP is allowed back +BLOCKCOOLOFF_PERIOD='1 hour' +# quiet time before an IP is released from the bulk pool +BULKCOOLOFF_PERIOD='15 min' + +BULKLONG_LIMIT=8000 +BULKSHORT_LIMIT=2000 +BLOCK_UPPER=19000 +BLOCK_LOWER=4000 +BLOCK_LOADFAC=380 +BULK_LOADFAC=160 +BULK_LOWER=1500 +MAX_BULK_IPS=85 + +# +# END OF DEFAULT SETTINGS +# + +try: + execfile(os.path.expanduser(BASEDIR + "/settings/ip_blocks.conf")) +except IOError: + pass + +# read the previous blocklist +WHITELIST = set(WHITELIST.split()) if WHITELIST else set() +prevblocks = [] +prevbulks = [] +BLACKLIST = set(BLACKLIST.split()) if BLACKLIST else set() +newblocks = set() +newbulks = set() + +try: + fd = open(BLOCKEDFILE) + for line in fd: + ip, typ = line.strip().split(' ') + if ip not in BLACKLIST: + if typ == 'block': + prevblocks.append(ip) + elif typ == 'bulk': + prevbulks.append(ip) + fd.close() +except IOError: + pass #ignore non-existing file + +# determine current load +fd = open("/proc/loadavg") +avgload = int(float(fd.readline().split()[2])) +fd.close() +# DB load +conn = psycopg2.connect('dbname=nominatim') +cur = conn.cursor() +cur.execute("select count(*)/60 from new_query_log where starttime > now() - interval '1min'") +dbload = int(cur.fetchone()[0]) + +BLOCK_LIMIT = max(BLOCK_LOWER, BLOCK_UPPER - BLOCK_LOADFAC * (dbload - 75)) +BULKLONG_LIMIT = max(BULK_LOWER, BULKLONG_LIMIT - BULK_LOADFAC * (avgload - 14)) +if len(prevbulks) > MAX_BULK_IPS: + BLOCK_LIMIT = max(3600, BLOCK_LOWER - (len(prevbulks) - MAX_BULK_IPS)*10) + +# get the new block candidates +cur.execute(""" + SELECT ipaddress, max(count), max(ua) FROM + ((SELECT * FROM + (SELECT ipaddress, sum(case when endtime is null then 1 else 1+1.5*date_part('epoch',endtime-starttime) end) as count, substring(max(useragent) from 1 for 30) as ua FROM new_query_log + WHERE starttime > now() - interval '1 hour' GROUP BY ipaddress) as i + WHERE count > %s) + UNION + (SELECT ipaddress, count * 3, ua FROM + (SELECT ipaddress, sum(case when endtime is null then 1 else 1+1.5*date_part('epoch',endtime-starttime) end) as count, substring(max(useragent) from 1 for 30) as ua FROM new_query_log + WHERE starttime > now() - interval '10 min' GROUP BY ipaddress) as i + WHERE count > %s)) as o + GROUP BY ipaddress +""", (BULKLONG_LIMIT, BULKSHORT_LIMIT)) + +bulkips = {} +emergencyblocks = [] +useragentblocks = [] + +for c in cur: + if c[0] not in WHITELIST and c[0] not in BLACKLIST: + # check for user agents that receive an immediate block + missing_agent = not c[2] + if not missing_agent: + for ua in UA_BLOCKLIST: + if c[2].startswith(ua): + missing_agent = True + break + if (missing_agent or c[1] > BLOCK_UPPER) and c[0] not in prevblocks: + newblocks.add(c[0]) + if missing_agent: + useragentblocks.append(c[0]) + else: + emergencyblocks.append(c[0]) + else: + bulkips[c[0]] = c[1] + +# IPs from the block list that are no longer in the bulk list +deblockcandidates = set() +# IPs from the bulk list that are no longer in the bulk list +debulkcandidates = set() +# new IPs to go into the block list +newlyblocked = [] + + +for ip in prevblocks: + if ip in bulkips: + newblocks.add(ip) + del bulkips[ip] + else: + deblockcandidates.add(ip) + +for ip in prevbulks: + if ip not in newblocks: + if ip in bulkips: + if bulkips[ip] > BLOCK_LIMIT: + newblocks.add(ip) + newlyblocked.append(ip) + else: + newbulks.add(ip) + del bulkips[ip] + else: + debulkcandidates.add(ip) + +# cross-check deblock candidates +if deblockcandidates: + cur.execute(""" + SELECT DISTINCT ipaddress FROM new_query_log + WHERE ipaddress IN ('%s') AND starttime > now() - interval '%s' + """ % ("','".join(deblockcandidates), BLOCKCOOLOFF_PERIOD)) + + for c in cur: + newblocks.add(c[0]) + deblockcandidates.remove(c[0]) +# deblocked IPs go back to the bulk pool to catch the ones that simply +# ignored the HTTP error and just continue to hammer the API. +# Those that behave and stopped will be debulked a minute later. +for ip in deblockcandidates: + newbulks.add(ip) + +# cross-check debulk candidates +if debulkcandidates: + cur.execute(""" + SELECT DISTINCT ipaddress FROM new_query_log + WHERE ipaddress IN ('%s') AND starttime > now() - interval '%s' + AND starttime > date_trunc('day', now()) + """ % ("','".join(debulkcandidates), BULKCOOLOFF_PERIOD)) + + for c in cur: + newbulks.add(c[0]) + debulkcandidates.remove(c[0]) + +for ip in bulkips.iterkeys(): + newbulks.add(ip) + +# write out the new list +fd = open(BLOCKEDFILE, 'w') +for ip in newblocks: + fd.write(ip + " block\n") +for ip in newbulks: + fd.write(ip + " bulk\n") +for ip in BLACKLIST: + fd.write(ip + " ban\n") +fd.close() + +# write out the log +logstr = datetime.datetime.now().strftime('%Y-%m-%d %H:%M') + ' %s %s\n' +fd = open(LOGFILE, 'a') +if deblockcandidates: + fd.write(logstr % ('unblocked:', ', '.join(deblockcandidates))) +if debulkcandidates: + fd.write(logstr % (' debulked:', ', '.join(debulkcandidates))) +if bulkips: + fd.write(logstr % ('new bulks:', ', '.join(bulkips.keys()))) +if emergencyblocks: + fd.write(logstr % ('dir.block:', ', '.join(emergencyblocks))) +if useragentblocks: + fd.write(logstr % (' ua block:', ', '.join(useragentblocks))) +if newlyblocked: + fd.write(logstr % ('new block:', ', '.join(newlyblocked))) +fd.close() diff --git a/utils/cron_logrotate.sh b/utils/cron_logrotate.sh new file mode 100755 index 00000000..b9291d95 --- /dev/null +++ b/utils/cron_logrotate.sh @@ -0,0 +1,20 @@ +#!/bin/bash -e +# +# Rotate query logs. + +dbname=nominatim + +basedir=`dirname $0` +logfile=`date "+$basedir/../log/query-%F.log.gz"` + +# dump the old logfile +pg_dump -a -F p -t backup_query_log $dbname | gzip -9 > $logfile + +# remove the old logs +psql -q -d $dbname -c 'DROP TABLE backup_query_log' + +# rotate +psql -q -1 -d $dbname -c 'ALTER TABLE new_query_log RENAME TO backup_query_log;CREATE TABLE new_query_log as (select * from backup_query_log limit 0);GRANT SELECT, INSERT, UPDATE ON new_query_log TO "www-data"' +psql -q -d $dbname -c 'ALTER INDEX idx_new_query_log_starttime RENAME TO idx_backup_query_log_starttime' +psql -q -d $dbname -c 'CREATE INDEX idx_new_query_log_starttime ON new_query_log USING BTREE (starttime)' + diff --git a/utils/cron_vacuum.sh b/utils/cron_vacuum.sh new file mode 100755 index 00000000..4c16fc65 --- /dev/null +++ b/utils/cron_vacuum.sh @@ -0,0 +1,14 @@ +#!/bin/bash +# +# Vaccum all tables with indices on integer arrays. +# Agressive vacuuming seems to help against index bloat. +# + +psql -q -d nominatim -c 'VACUUM ANALYSE search_name' +psql -q -d nominatim -c 'VACUUM ANALYSE search_name_country' +#psql -q -d nominatim -c 'VACUUM ANALYSE planet_osm_ways' + +for i in `seq 0 246`; do + psql -q -d nominatim -c "VACUUM ANALYSE search_name_${i}" +done + diff --git a/utils/setup.php b/utils/setup.php index d9c761b2..e23a2211 100755 --- a/utils/setup.php +++ b/utils/setup.php @@ -174,8 +174,9 @@ { $osm2pgsql .= ' --flat-nodes '.CONST_Osm2pgsql_Flatnode_File; } + $osm2pgsql .= ' --tablespace-slim-index ssd --tablespace-main-index ssd --tablespace-main-data ssd --tablespace-slim-data data'; $osm2pgsql .= ' -lsc -O gazetteer --hstore'; - $osm2pgsql .= ' -C '.$iCacheMemory; + $osm2pgsql .= ' -C 18000'; $osm2pgsql .= ' -P '.$aDSNInfo['port']; $osm2pgsql .= ' -d '.$aDSNInfo['database'].' '.$aCMDResult['osm-file']; passthruCheckReturn($osm2pgsql); @@ -501,7 +502,7 @@ $sSQL .= "select 'P',nextval('seq_postcodes'),'place','postcode',postcode,calculated_country_code,"; $sSQL .= "ST_SetSRID(ST_Point(x,y),4326) as geometry from (select calculated_country_code,postcode,"; $sSQL .= "avg(st_x(st_centroid(geometry))) as x,avg(st_y(st_centroid(geometry))) as y "; - $sSQL .= "from placex where postcode is not null group by calculated_country_code,postcode) as x"; + $sSQL .= "from placex where postcode is not null and calculated_country_code not in ('ie') group by calculated_country_code,postcode) as x"; if (!pg_query($oDB->connection, $sSQL)) fail(pg_last_error($oDB->connection)); $sSQL = "insert into placex (osm_type,osm_id,class,type,postcode,calculated_country_code,geometry) "; diff --git a/utils/specialphrases.php b/utils/specialphrases.php index f1a0d0d0..02aaaa0f 100755 --- a/utils/specialphrases.php +++ b/utils/specialphrases.php @@ -91,12 +91,12 @@ echo "class = '".pg_escape_string($aPair[0])."' and type = '".pg_escape_string($aPair[1])."';\n"; echo "CREATE INDEX idx_place_classtype_".pg_escape_string($aPair[0])."_".pg_escape_string($aPair[1])."_centroid "; - echo "ON place_classtype_".pg_escape_string($aPair[0])."_".pg_escape_string($aPair[1])." USING GIST (centroid);\n"; + echo "ON place_classtype_".pg_escape_string($aPair[0])."_".pg_escape_string($aPair[1])." USING GIST (centroid) tablespace ssd;\n"; echo "CREATE INDEX idx_place_classtype_".pg_escape_string($aPair[0])."_".pg_escape_string($aPair[1])."_place_id "; - echo "ON place_classtype_".pg_escape_string($aPair[0])."_".pg_escape_string($aPair[1])." USING btree(place_id);\n"; + echo "ON place_classtype_".pg_escape_string($aPair[0])."_".pg_escape_string($aPair[1])." USING btree(place_id) tablespace ssd;\n"; - echo "GRANT SELECT ON place_classtype_".pg_escape_string($aPair[0])."_".pg_escape_string($aPair[1])." TO \"www-data\";"; + echo "GRANT SELECT ON place_classtype_".pg_escape_string($aPair[0])."_".pg_escape_string($aPair[1])." TO \"www-data\";\n"; } diff --git a/utils/update.php b/utils/update.php index 8dd1dfc3..256d5a15 100755 --- a/utils/update.php +++ b/utils/update.php @@ -47,7 +47,6 @@ showUsage($aCMDOptions, true, 'Select either import of hourly or daily'); } - if (!isset($aResult['index-instances'])) $aResult['index-instances'] = 1; if (!isset($aResult['index-rank'])) $aResult['index-rank'] = 0; /* @@ -361,6 +360,7 @@ if ($aResult['index']) { + if (!isset($aResult['index-instances'])) $aResult['index-instances'] = 1; passthru(CONST_BasePath.'/nominatim/nominatim -i -d '.$aDSNInfo['database'].' -P '.$aDSNInfo['port'].' -t '.$aResult['index-instances'].' -r '.$aResult['index-rank']); } @@ -378,7 +378,7 @@ $sCMDDownload = $sOsmosisCMD.' --read-replication-interval workingDirectory='.$sOsmosisConfigDirectory.' --simplify-change --write-xml-change '.$sImportFile; $sCMDCheckReplicationLag = $sOsmosisCMD.' -q --read-replication-lag workingDirectory='.$sOsmosisConfigDirectory; $sCMDImport = $sOsm2pgsqlCmd.' '.$sImportFile; - $sCMDIndex = $sBasePath.'/nominatim/nominatim -i -d '.$aDSNInfo['database'].' -P '.$aDSNInfo['port'].' -t '.$aResult['index-instances']; + $sCMDIndex = $sBasePath.'/nominatim/nominatim -i -d '.$aDSNInfo['database'].' -P '.$aDSNInfo['port']; if (!$aResult['no-npi']) { $sCMDIndex .= '-F '; } @@ -463,7 +463,16 @@ $sBatchEnd = getosmosistimestamp($sOsmosisConfigDirectory); // Index file - $sThisIndexCmd = $sCMDIndex; + if (!isset($aResult['index-instances'])) + { + if (getLoadAverage() < 15) + $iIndexInstances = 2; + else + $iIndexInstances = 1; + } else + $iIndexInstances = $aResult['index-instances']; + + $sThisIndexCmd = $sCMDIndex.' -t '.$iIndexInstances; $fCMDStartTime = time(); if (!$aResult['no-npi']) diff --git a/website/403.html b/website/403.html new file mode 100644 index 00000000..44038a49 --- /dev/null +++ b/website/403.html @@ -0,0 +1,15 @@ + + +Access blocked + + +

Access blocked

+ +

You have been blocked because you have been overusing OSM's geocoding service. +Please be aware that OSM's resources are limited and shared between many users. +To have this block lifted, contact the Nominatim system administrator at +nominatim@openstreetmap.org.

+ +

For more information, consult the usage policy for the OSM Nominatim server. + + diff --git a/website/509.html b/website/509.html new file mode 100644 index 00000000..1e67a5a4 --- /dev/null +++ b/website/509.html @@ -0,0 +1,12 @@ + + +Bandwidth limit exceeded + + +

Bandwidth limit exceeded

+ +

You have been temporarily blocked because you have been overusing OSM's geocoding service or because you have not provided sufficient identification of your application. This block will be automatically lifted after a while. Please take the time and adapt your scripts to reduce the number of requests and make sure that you send a valid UserAgent or Referer.

+ +

For more information, consult the usage policy for the OSM Nominatim server. + + diff --git a/website/crossdomain.xml b/website/crossdomain.xml new file mode 100644 index 00000000..963a682b --- /dev/null +++ b/website/crossdomain.xml @@ -0,0 +1,5 @@ + + + + + diff --git a/website/favicon.ico b/website/favicon.ico new file mode 100644 index 00000000..0157ea00 Binary files /dev/null and b/website/favicon.ico differ diff --git a/website/nominatim.xml b/website/nominatim.xml new file mode 100644 index 00000000..28684b16 --- /dev/null +++ b/website/nominatim.xml @@ -0,0 +1,15 @@ + + + Nominatim + Nominatim OSM Search + Search for a place in OpenStreetMap Nominatim + UTF-8 + UTF-8 + + + Brian Quinion + false + Data &copy; OpenStreetMap contributors, Some Rights Reserved. ODbL, http://www.osm.org/copyright. + + diff --git a/website/reverse.php b/website/reverse.php index ee6a6100..ed877899 100755 --- a/website/reverse.php +++ b/website/reverse.php @@ -6,18 +6,6 @@ require_once(CONST_BasePath.'/lib/PlaceLookup.php'); require_once(CONST_BasePath.'/lib/ReverseGeocode.php'); - if (strpos(CONST_BulkUserIPs, ','.$_SERVER["REMOTE_ADDR"].',') !== false) - { - $fLoadAvg = getLoadAverage(); - if ($fLoadAvg > 2) sleep(60); - if ($fLoadAvg > 4) sleep(120); - if ($fLoadAvg > 6) - { - echo "Bulk User: Temporary block due to high server load\n"; - exit; - } - } - $oDB =& getDB(); ini_set('memory_limit', '200M'); diff --git a/website/robots.txt b/website/robots.txt new file mode 100644 index 00000000..e4d3d3fb --- /dev/null +++ b/website/robots.txt @@ -0,0 +1,10 @@ +User-agent: ia_archiver +Allow: / + +User-agent: * +Disallow: /search.php +Disallow: /search +Disallow: /details.php +Disallow: /details +Disallow: /reverse.php +Disallow: /reverse diff --git a/website/search.php b/website/search.php index d4004ddb..e85c3369 100755 --- a/website/search.php +++ b/website/search.php @@ -20,10 +20,12 @@ $aLangPrefOrder = getPreferredLanguages(); $oGeocode->setLanguagePreference($aLangPrefOrder); + /* if (isset($aLangPrefOrder['name:de'])) $oGeocode->setReverseInPlan(true); if (isset($aLangPrefOrder['name:ru'])) $oGeocode->setReverseInPlan(true); if (isset($aLangPrefOrder['name:ja'])) $oGeocode->setReverseInPlan(true); if (isset($aLangPrefOrder['name:pl'])) $oGeocode->setReverseInPlan(true); + */ function loadParamsToGeocode($oGeocode, $aParams, $bBatch = false) {