From: Sarah Hoffmann Date: Wed, 8 Aug 2012 20:34:06 +0000 (+0200) Subject: Merge branch 'master' of http://github.com/twain47/Nominatim X-Git-Tag: deploy~687 X-Git-Url: https://git.openstreetmap.org/nominatim.git/commitdiff_plain/932d945b7f897fd18be7d4c2fac3eda3915aa296?hp=8ebc673ebf9ed3ceb2f278d2c0e42b836a34a37c Merge branch 'master' of http://github.com/twain47/Nominatim Conflicts: lib/lib.php lib/log.php lib/template/address-jsonv2.php settings/settings.php sql/functions.sql sql/indices.src.sql sql/tables.sql utils/setup.php utils/specialphrases.php website/reverse.php --- diff --git a/lib/init-website.php b/lib/init-website.php index d21ca7af..6db2ac37 100644 --- a/lib/init-website.php +++ b/lib/init-website.php @@ -1,19 +1,34 @@

Access blocked

"; + echo "Your IP has been blocked for overusing OpenStreetMap's volunteer-run servers.
\n"; + echo 'Please consult the Nominatim usage policy for more information.'; + echo "\n\n"; + exit; + } - header('Content-type: text/html; charset=utf-8'); + $sTempBlockedIP = file_get_contents(CONST_IPBanFile); + if (preg_match('/\b'.$_SERVER["REMOTE_ADDR"].'\b/', $sTempBlockedIP)) + { + header('HTTP/1.0 503 Service Temporarily Unavailable'); + header('Content-type: text/html; charset=utf-8'); + echo "

Access blocked

"; + echo "Your IP has been blocked temporarily for overusing OpenStreetMap's volunteer-run servers. This ban will be lifted automatically in a while. To avoid further blocks, please read the
\n"; + echo 'Nominatim usage policy carefully before you continue to use this service.'; + echo "\n\n"; + exit; + } + + } diff --git a/lib/log.php b/lib/log.php index 5b847a41..d81ef46d 100644 --- a/lib/log.php +++ b/lib/log.php @@ -17,15 +17,9 @@ if (CONST_Log_DB) { - // Log - if ($sType == 'search') - { - $oDB->query('insert into query_log values ('.getDBQuoted($hLog[0]).','.getDBQuoted($hLog[3]).','.getDBQuoted($hLog[1]).')'); - } - - $sSQL = 'insert into new_query_log (type,starttime,query,ipaddress,useragent,language,format)'; + $sSQL = 'insert into new_query_log (type,starttime,query,ipaddress,useragent,language,format,searchterm)'; $sSQL .= ' values ('.getDBQuoted($sType).','.getDBQuoted($hLog[0]).','.getDBQuoted($hLog[2]); - $sSQL .= ','.getDBQuoted($hLog[1]).','.getDBQuoted($_SERVER['HTTP_USER_AGENT']).','.getDBQuoted(join(',',$aLanguageList)).','.getDBQuoted($sOutputFormat).')'; + $sSQL .= ','.getDBQuoted($hLog[1]).','.getDBQuoted($_SERVER['HTTP_USER_AGENT']).','.getDBQuoted(join(',',$aLanguageList)).','.getDBQuoted($_GET['format']).','.getDBQuoted($hLog[3]).')'; $oDB->query($sSQL); } @@ -59,12 +53,6 @@ if (CONST_Log_DB) { - $sSQL = 'update query_log set endtime = '.getDBQuoted($sEndTime).', results = '.$iNumResults; - $sSQL .= ' where starttime = '.getDBQuoted($hLog[0]); - $sSQL .= ' and ipaddress = '.getDBQuoted($hLog[1]); - $sSQL .= ' and query = '.getDBQuoted($hLog[3]); - $oDB->query($sSQL); - $sSQL = 'update new_query_log set endtime = '.getDBQuoted($sEndTime).', results = '.$iNumResults; $sSQL .= ' where starttime = '.getDBQuoted($hLog[0]); $sSQL .= ' and ipaddress = '.getDBQuoted($hLog[1]); diff --git a/module/nominatim.c b/module/nominatim.c index 18ad99b3..75238e22 100644 --- a/module/nominatim.c +++ b/module/nominatim.c @@ -143,12 +143,12 @@ transliteration( PG_FUNCTION_ARGS ) resultdata++; } } - else + /*else { ereport( WARNING, ( errcode( ERRCODE_SUCCESSFUL_COMPLETION ), errmsg( "missing char: %i\n", *wchardata ))); - } + }*/ wchardata++; } diff --git a/munin/nominatim_importlag b/munin/nominatim_importlag index 8d69f3ce..657938db 100755 --- a/munin/nominatim_importlag +++ b/munin/nominatim_importlag @@ -21,7 +21,7 @@ if [ "$1" = "config" ]; then fi -delay=`psql -d nominatim -c 'copy (select extract(epoch from now()-lastimportdate)::int from import_status) to stdout'` +delay=`psql -d nominatim -c 'copy (select extract(epoch from timezone('utc', now())-lastimportdate)::int from import_status) to stdout'` echo "age.value $delay" diff --git a/settings/settings.php b/settings/settings.php index 4ebc199d..df965211 100644 --- a/settings/settings.php +++ b/settings/settings.php @@ -7,22 +7,24 @@ @define('CONST_Database_DSN', 'pgsql://@/nominatim'); // Paths - @define('CONST_Postgresql_Version', '9.1'); + @define('CONST_Postgresql_Version', '9.0'); @define('CONST_Path_Postgresql_Contrib', '/usr/share/postgresql/'.CONST_Postgresql_Version.'/contrib'); @define('CONST_Path_Postgresql_Postgis', CONST_Path_Postgresql_Contrib.'/postgis-1.5'); @define('CONST_Osm2pgsql_Binary', CONST_BasePath.'/osm2pgsql/osm2pgsql'); - @define('CONST_Osmosis_Binary', CONST_BasePath.'/osmosis-0.40.1/bin/osmosis'); + @define('CONST_Osmosis_Binary', CONST_BasePath.'/../osmosis-0.40.1/bin/osmosis'); // Website settings - @define('CONST_ClosedForIndexing', false); - @define('CONST_ClosedForIndexingExceptionIPs', ''); @define('CONST_BlockedIPs', ''); + @define('CONST_IPBanFile', CONST_BasePath.'/settings/ip_blocks'); + @define('CONST_WhitelistedIPs', ''); + @define('CONST_BlockedUserAgents', ''); + @define('CONST_BlockReverseMaxLoad', 15); @define('CONST_BulkUserIPs', ''); - @define('CONST_Website_BaseURL', 'http://'.php_uname('n').'/'); + @define('CONST_Website_BaseURL', 'http://nominatim.openstreetmap.org/'); @define('CONST_Tile_Default', 'Mapnik'); - @define('CONST_Default_Language', 'xx'); + @define('CONST_Default_Language', 'en'); @define('CONST_Default_Lat', 20.0); @define('CONST_Default_Lon', 0.0); @define('CONST_Default_Zoom', 2); diff --git a/sql/indices.src.sql b/sql/indices.src.sql index 5e6ccd12..2e5dde71 100644 --- a/sql/indices.src.sql +++ b/sql/indices.src.sql @@ -1,30 +1,30 @@ -- Indices used only during search and update. -- These indices are created only after the indexing process is done. -CREATE INDEX idx_word_word_id on word USING BTREE (word_id); +CREATE INDEX idx_word_word_id on word USING BTREE (word_id) TABLESPACE ssd; -CREATE INDEX idx_search_name_nameaddress_vector ON search_name USING GIN (nameaddress_vector) WITH (fastupdate = off); -CREATE INDEX idx_search_name_name_vector ON search_name USING GIN (name_vector) WITH (fastupdate = off); -CREATE INDEX idx_search_name_centroid ON search_name USING GIST (centroid); +CREATE INDEX idx_search_name_nameaddress_vector ON search_name USING GIN (nameaddress_vector) WITH (fastupdate = off) TABLESPACE ssd; +CREATE INDEX idx_search_name_name_vector ON search_name USING GIN (name_vector) WITH (fastupdate = off) TABLESPACE ssd; +CREATE INDEX idx_search_name_centroid ON search_name USING GIST (centroid) TABLESPACE ssd; -CREATE INDEX idx_place_addressline_address_place_id on place_addressline USING BTREE (address_place_id); +CREATE INDEX idx_place_addressline_address_place_id on place_addressline USING BTREE (address_place_id) TABLESPACE ssd; -CREATE INDEX idx_place_boundingbox_place_id on place_boundingbox USING BTREE (place_id); -CREATE INDEX idx_place_boundingbox_outline ON place_boundingbox USING GIST (outline); +CREATE INDEX idx_place_boundingbox_place_id on place_boundingbox USING BTREE (place_id) TABLESPACE ssd; +CREATE INDEX idx_place_boundingbox_outline ON place_boundingbox USING GIST (outline) TABLESPACE ssd; DROP INDEX IF EXISTS idx_placex_rank_search; -CREATE INDEX idx_placex_rank_search ON placex USING BTREE (rank_search); -CREATE INDEX idx_placex_rank_address ON placex USING BTREE (rank_address); -CREATE INDEX idx_placex_pendingsector ON placex USING BTREE (rank_search,geometry_sector) where indexed_status > 0; -CREATE INDEX idx_placex_parent_place_id ON placex USING BTREE (parent_place_id) where parent_place_id IS NOT NULL; -CREATE INDEX idx_placex_interpolation ON placex USING BTREE (geometry_sector) where indexed_status > 0 and class='place' and type='houses'; -CREATE INDEX idx_location_area_country_place_id ON location_area_country USING BTREE (place_id); +CREATE INDEX idx_placex_rank_search ON placex USING BTREE (rank_search) TABLESPACE ssd; +CREATE INDEX idx_placex_rank_address ON placex USING BTREE (rank_address) TABLESPACE ssd; +CREATE INDEX idx_placex_pendingsector ON placex USING BTREE (rank_search,geometry_sector) TABLESPACE ssd where indexed_status > 0; +CREATE INDEX idx_placex_parent_place_id ON placex USING BTREE (parent_place_id) TABLESPACE ssd where parent_place_id IS NOT NULL; +CREATE INDEX idx_placex_interpolation ON placex USING BTREE (geometry_sector) TABLESPACE ssd where indexed_status > 0 and class='place' and type='houses'; +CREATE INDEX idx_location_area_country_place_id ON location_area_country USING BTREE (place_id) TABLESPACE ssd; -CREATE INDEX idx_search_name_country_centroid ON search_name_country USING GIST (centroid); -CREATE INDEX idx_search_name_country_nameaddress_vector ON search_name_country USING GIN (nameaddress_vector) WITH (fastupdate = off); +CREATE INDEX idx_search_name_country_centroid ON search_name_country USING GIST (centroid) TABLESPACE ssd; +CREATE INDEX idx_search_name_country_nameaddress_vector ON search_name_country USING GIN (nameaddress_vector) WITH (fastupdate = off) TABLESPACE ssd; -- start -CREATE INDEX idx_location_property_-partition-_centroid ON location_property_-partition- USING GIST (centroid); +CREATE INDEX idx_location_property_-partition-_centroid ON location_property_-partition- USING GIST (centroid) TABLESPACE ssd; -- end CREATE UNIQUE INDEX idx_place_osm_unique on place using btree(osm_id,osm_type,class,type); diff --git a/sql/partitions.src.sql b/sql/partitions.src.sql index 324f35bb..3fb7962c 100644 --- a/sql/partitions.src.sql +++ b/sql/partitions.src.sql @@ -25,21 +25,21 @@ create type nearfeaturecentr as ( ); CREATE TABLE location_area_country () INHERITS (location_area_large); -CREATE INDEX idx_location_area_country_geometry ON location_area_country USING GIST (geometry); +CREATE INDEX idx_location_area_country_geometry ON location_area_country USING GIST (geometry) TABLESPACE ssd; CREATE TABLE search_name_country () INHERITS (search_name_blank); -CREATE INDEX idx_search_name_country_place_id ON search_name_country USING BTREE (place_id); -CREATE INDEX idx_search_name_country_name_vector ON search_name_country USING GIN (name_vector) WITH (fastupdate = off); +CREATE INDEX idx_search_name_country_place_id ON search_name_country USING BTREE (place_id) TABLESPACE ssd; +CREATE INDEX idx_search_name_country_name_vector ON search_name_country USING GIN (name_vector) WITH (fastupdate = off) TABLESPACE ssd; -- start CREATE TABLE location_area_large_-partition- () INHERITS (location_area_large); -CREATE INDEX idx_location_area_large_-partition-_place_id ON location_area_large_-partition- USING BTREE (place_id); -CREATE INDEX idx_location_area_large_-partition-_geometry ON location_area_large_-partition- USING GIST (geometry); +CREATE INDEX idx_location_area_large_-partition-_place_id ON location_area_large_-partition- USING BTREE (place_id) TABLESPACE ssd; +CREATE INDEX idx_location_area_large_-partition-_geometry ON location_area_large_-partition- USING GIST (geometry) TABLESPACE ssd; CREATE TABLE search_name_-partition- () INHERITS (search_name_blank); -CREATE INDEX idx_search_name_-partition-_place_id ON search_name_-partition- USING BTREE (place_id); -CREATE INDEX idx_search_name_-partition-_centroid ON search_name_-partition- USING GIST (centroid); -CREATE INDEX idx_search_name_-partition-_name_vector ON search_name_-partition- USING GIN (name_vector) WITH (fastupdate = off); +CREATE INDEX idx_search_name_-partition-_place_id ON search_name_-partition- USING BTREE (place_id) TABLESPACE ssd; +CREATE INDEX idx_search_name_-partition-_centroid ON search_name_-partition- USING GIST (centroid) TABLESPACE ssd; +CREATE INDEX idx_search_name_-partition-_name_vector ON search_name_-partition- USING GIN (name_vector) WITH (fastupdate = off) TABLESPACE ssd; CREATE TABLE location_property_-partition- () INHERITS (location_property); CREATE INDEX idx_location_property_-partition-_place_id ON location_property_-partition- USING BTREE (place_id); @@ -52,8 +52,8 @@ CREATE TABLE location_road_-partition- ( country_code VARCHAR(2) ); SELECT AddGeometryColumn('location_road_-partition-', 'geometry', 4326, 'GEOMETRY', 2); -CREATE INDEX idx_location_road_-partition-_geometry ON location_road_-partition- USING GIST (geometry); -CREATE INDEX idx_location_road_-partition-_place_id ON location_road_-partition- USING BTREE (place_id); +CREATE INDEX idx_location_road_-partition-_geometry ON location_road_-partition- USING GIST (geometry) TABLESPACE ssd; +CREATE INDEX idx_location_road_-partition-_place_id ON location_road_-partition- USING BTREE (place_id) TABLESPACE ssd; -- end diff --git a/sql/tables.sql b/sql/tables.sql index d5cf7bcd..a0ad66a8 100644 --- a/sql/tables.sql +++ b/sql/tables.sql @@ -23,18 +23,6 @@ CREATE TABLE import_npi_log ( event text ); ---drop table IF EXISTS query_log; -CREATE TABLE query_log ( - starttime timestamp, - query text, - ipaddress text, - endtime timestamp, - results integer - ); -CREATE INDEX idx_query_log ON query_log USING BTREE (starttime); -GRANT INSERT ON query_log TO "www-data" ; -GRANT UPDATE ON query_log TO "www-data" ; - CREATE TABLE new_query_log ( type text, starttime timestamp, @@ -42,6 +30,7 @@ CREATE TABLE new_query_log ( useragent text, language text, query text, + searchterm text, endtime timestamp, results integer, format text, @@ -52,9 +41,6 @@ GRANT INSERT ON new_query_log TO "www-data" ; GRANT UPDATE ON new_query_log TO "www-data" ; GRANT SELECT ON new_query_log TO "www-data" ; -create view vw_search_query_log as SELECT substr(query, 1, 50) AS query, starttime, endtime - starttime AS duration, substr(useragent, 1, 20) as -useragent, language, results, ipaddress FROM new_query_log WHERE type = 'search' ORDER BY starttime DESC; - --drop table IF EXISTS report_log; CREATE TABLE report_log ( starttime timestamp, @@ -76,9 +62,9 @@ CREATE TABLE word ( country_code varchar(2), search_name_count INTEGER, operator TEXT - ); + ) TABLESPACE ssd; SELECT AddGeometryColumn('word', 'location', 4326, 'GEOMETRY', 2); -CREATE INDEX idx_word_word_token on word USING BTREE (word_token); +CREATE INDEX idx_word_word_token on word USING BTREE (word_token) TABLESPACE ssd; --CREATE INDEX idx_word_trigram ON word USING gin(word_trigram gin_trgm_ops) WITH (fastupdate = off); GRANT SELECT ON word TO "www-data" ; DROP SEQUENCE seq_word; @@ -136,8 +122,8 @@ CREATE TABLE search_name_blank ( SELECT AddGeometryColumn('search_name_blank', 'centroid', 4326, 'GEOMETRY', 2); drop table IF EXISTS search_name; -CREATE TABLE search_name () INHERITS (search_name_blank); -CREATE INDEX idx_search_name_place_id ON search_name USING BTREE (place_id); +CREATE TABLE search_name () INHERITS (search_name_blank) TABLESPACE ssd; +CREATE INDEX idx_search_name_place_id ON search_name USING BTREE (place_id) TABLESPACE ssd; drop table IF EXISTS place_addressline; CREATE TABLE place_addressline ( @@ -147,8 +133,8 @@ CREATE TABLE place_addressline ( isaddress boolean, distance float, cached_rank_address integer - ); -CREATE INDEX idx_place_addressline_place_id on place_addressline USING BTREE (place_id); + ) TABLESPACE data; +CREATE INDEX idx_place_addressline_place_id on place_addressline USING BTREE (place_id) TABLESPACE ssd; drop table IF EXISTS place_boundingbox CASCADE; CREATE TABLE place_boundingbox ( @@ -201,14 +187,14 @@ CREATE TABLE placex ( wikipedia TEXT, -- calculated wikipedia article name (language:title) geometry_sector INTEGER, calculated_country_code varchar(2) - ); + ) TABLESPACE ssd; SELECT AddGeometryColumn('placex', 'centroid', 4326, 'GEOMETRY', 2); -CREATE UNIQUE INDEX idx_place_id ON placex USING BTREE (place_id); -CREATE INDEX idx_placex_osmid ON placex USING BTREE (osm_type, osm_id); -CREATE INDEX idx_placex_linked_place_id ON placex USING BTREE (linked_place_id); -CREATE INDEX idx_placex_rank_search ON placex USING BTREE (rank_search, geometry_sector); -CREATE INDEX idx_placex_geometry ON placex USING GIST (geometry); -CREATE INDEX idx_placex_adminname on placex USING BTREE (make_standard_name(name->'name'),rank_search) WHERE osm_type='N' and rank_search < 26; +CREATE UNIQUE INDEX idx_place_id ON placex USING BTREE (place_id) TABLESPACE ssd; +CREATE INDEX idx_placex_osmid ON placex USING BTREE (osm_type, osm_id) TABLESPACE ssd; +CREATE INDEX idx_placex_linked_place_id ON placex USING BTREE (linked_place_id) TABLESPACE ssd; +CREATE INDEX idx_placex_rank_search ON placex USING BTREE (rank_search, geometry_sector) TABLESPACE ssd; +CREATE INDEX idx_placex_geometry ON placex USING GIST (geometry) TABLESPACE ssd; +CREATE INDEX idx_placex_adminname on placex USING BTREE (make_standard_name(name->'name'),rank_search) TABLESPACE ssd WHERE osm_type='N' and rank_search < 26; --CREATE INDEX idx_placex_indexed ON placex USING BTREE (indexed); @@ -256,10 +242,10 @@ update placex set geometry_sector = geometry_sector(geometry); drop index idx_placex_pendingbylatlon; drop index idx_placex_interpolation; drop index idx_placex_sector; -CREATE INDEX idx_placex_pendingbylatlon ON placex USING BTREE (geometry_index(geometry_sector,indexed,name),rank_search) +CREATE INDEX idx_placex_pendingbylatlon ON placex USING BTREE (geometry_index(geometry_sector,indexed,name),rank_search) TABLESPACE ssd where geometry_index(geometry_sector,indexed,name) IS NOT NULL; -CREATE INDEX idx_placex_interpolation ON placex USING BTREE (geometry_sector) where indexed = false and class='place' and type='houses'; -CREATE INDEX idx_placex_sector ON placex USING BTREE (geometry_sector,rank_address,osm_type,osm_id); +CREATE INDEX idx_placex_interpolation ON placex USING BTREE (geometry_sector) TABLESPACE ssd where indexed = false and class='place' and type='houses'; +CREATE INDEX idx_placex_sector ON placex USING BTREE (geometry_sector,rank_address,osm_type,osm_id) TABLESPACE ssd; DROP SEQUENCE seq_postcodes; CREATE SEQUENCE seq_postcodes start 1; @@ -277,7 +263,7 @@ CREATE TABLE import_polygon_error ( ); SELECT AddGeometryColumn('import_polygon_error', 'prevgeometry', 4326, 'GEOMETRY', 2); SELECT AddGeometryColumn('import_polygon_error', 'newgeometry', 4326, 'GEOMETRY', 2); -CREATE INDEX idx_import_polygon_error_osmid ON import_polygon_error USING BTREE (osm_type, osm_id); +CREATE INDEX idx_import_polygon_error_osmid ON import_polygon_error USING BTREE (osm_type, osm_id) TABLESPACE ssd; drop table import_polygon_delete; CREATE TABLE import_polygon_delete ( @@ -286,7 +272,7 @@ CREATE TABLE import_polygon_delete ( class TEXT NOT NULL, type TEXT NOT NULL ); -CREATE INDEX idx_import_polygon_delete_osmid ON import_polygon_delete USING BTREE (osm_type, osm_id); +CREATE INDEX idx_import_polygon_delete_osmid ON import_polygon_delete USING BTREE (osm_type, osm_id) TABLESPACE ssd; drop sequence file; CREATE SEQUENCE file start 1; diff --git a/utils/cron_banip.sh b/utils/cron_banip.sh new file mode 100755 index 00000000..1f0aade6 --- /dev/null +++ b/utils/cron_banip.sh @@ -0,0 +1,84 @@ +#!/bin/bash +# +# Create or update the list of temporarily banned IPs. +# + +BLOCKEDFILE=/home/lonvia/nominatim/settings/ip_blocks +LOGFILE=/home/lonvia/nominatim/log/ip_blocks.log + +LONG_PERIOD='1 hour' +SHORT_PERIOD='10 min' +COOLOFF_PERIOD='1 hour' + +REVLONG_LIMIT=20000 +REVSHORT_LIMIT=6000 +SRCHLONG_LIMIT=4000 +SRCHSHORT_LIMIT='10 min' + +PSQLCMD='psql -qtA -d nominatim' + +# Blocking candidates +$PSQLCMD > $BLOCKEDFILE.newblocks << ENDOFQUERY +SELECT ipaddress FROM +((SELECT ipaddress FROM + (SELECT ipaddress, count(*) FROM new_query_log + WHERE type = 'reverse' AND starttime > now() - interval '$LONG_PERIOD' + GROUP BY ipaddress) + as v + WHERE count > $REVLONG_LIMIT) +UNION +(SELECT ipaddress FROM + (SELECT ipaddress, count(*) FROM new_query_log + WHERE type = 'reverse' AND starttime > now() - interval '$SHORT_PERIOD' + GROUP BY ipaddress) + as v + WHERE count > $REVSHORT_LIMIT) +UNION +(SELECT ipaddress FROM + (SELECT ipaddress, count(*) FROM new_query_log + WHERE type = 'search' AND starttime > now() - interval '$LONG_PERIOD' + GROUP BY ipaddress) + as v + WHERE count > $SRCHLONG_LIMIT) +UNION +(SELECT ipaddress FROM + (SELECT ipaddress, sum(endtime-starttime) as dur FROM new_query_log + WHERE type = 'search' AND starttime > now() - interval '$SHORT_PERIOD' + GROUP BY ipaddress) + as v + WHERE dur > '$SRCHSHORT_LIMIT') +) as q ORDER BY ipaddress; +ENDOFQUERY + +no_newblocks=`comm $BLOCKEDFILE.newblocks $BLOCKEDFILE -23 | wc -l` + +if [ "x$no_newblocks" != "x0" ]; then + date +"%x %X Newly blocked IPs: `comm $BLOCKEDFILE.newblocks $BLOCKEDFILE -23 | tr '\n' ' '`" >> $LOGFILE +fi + + +# Deblockable candidates +blocked=`tr '\n' ',' < $BLOCKEDFILE | sed "s:[[:space:]]::g;s:,$::;s:,:'),(':g"` + +if [ "x$blocked" == "x" ]; then + mv $BLOCKEDFILE.newblocks $BLOCKEDFILE +else + $PSQLCMD > $BLOCKEDFILE.newlifted << ENDOFQUERY + VALUES ('$blocked') + EXCEPT + (SELECT DISTINCT ipaddress FROM new_query_log + WHERE starttime > now() - interval '$COOLOFF_PERIOD') +ENDOFQUERY + + no_lifted=`cat $BLOCKEDFILE.newlifted | wc -w` + + if [ "x$no_lifted" != "x0" ]; then + date +"%x %X Bans lifted: `tr '\n' ' ' < $BLOCKEDFILE.newlifted`" >> $LOGFILE + fi + + # Write out new blocks + cat $BLOCKEDFILE.newblocks $BLOCKEDFILE | sort -u | comm - $BLOCKEDFILE.newlifted -23 > $BLOCKEDFILE.new + mv $BLOCKEDFILE.new $BLOCKEDFILE + + rm $BLOCKEDFILE.newblocks $BLOCKEDFILE.newlifted +fi diff --git a/utils/cron_logrotate.sh b/utils/cron_logrotate.sh new file mode 100755 index 00000000..b9291d95 --- /dev/null +++ b/utils/cron_logrotate.sh @@ -0,0 +1,20 @@ +#!/bin/bash -e +# +# Rotate query logs. + +dbname=nominatim + +basedir=`dirname $0` +logfile=`date "+$basedir/../log/query-%F.log.gz"` + +# dump the old logfile +pg_dump -a -F p -t backup_query_log $dbname | gzip -9 > $logfile + +# remove the old logs +psql -q -d $dbname -c 'DROP TABLE backup_query_log' + +# rotate +psql -q -1 -d $dbname -c 'ALTER TABLE new_query_log RENAME TO backup_query_log;CREATE TABLE new_query_log as (select * from backup_query_log limit 0);GRANT SELECT, INSERT, UPDATE ON new_query_log TO "www-data"' +psql -q -d $dbname -c 'ALTER INDEX idx_new_query_log_starttime RENAME TO idx_backup_query_log_starttime' +psql -q -d $dbname -c 'CREATE INDEX idx_new_query_log_starttime ON new_query_log USING BTREE (starttime)' + diff --git a/utils/cron_vacuum.sh b/utils/cron_vacuum.sh new file mode 100755 index 00000000..4c16fc65 --- /dev/null +++ b/utils/cron_vacuum.sh @@ -0,0 +1,14 @@ +#!/bin/bash +# +# Vaccum all tables with indices on integer arrays. +# Agressive vacuuming seems to help against index bloat. +# + +psql -q -d nominatim -c 'VACUUM ANALYSE search_name' +psql -q -d nominatim -c 'VACUUM ANALYSE search_name_country' +#psql -q -d nominatim -c 'VACUUM ANALYSE planet_osm_ways' + +for i in `seq 0 246`; do + psql -q -d nominatim -c "VACUUM ANALYSE search_name_${i}" +done + diff --git a/utils/setup.php b/utils/setup.php index c842ba33..663a9694 100755 --- a/utils/setup.php +++ b/utils/setup.php @@ -126,8 +126,9 @@ echo "Please download and build osm2pgsql.\nIf it is already installed, check the path in your local settings (settings/local.php) file.\n"; fail("osm2pgsql not found in '$osm2pgsql'"); } + $osm2pgsql .= ' --tablespace-slim-index ssd --tablespace-main-index ssd --tablespace-main-data ssd --tablespace-slim-data ssd'; $osm2pgsql .= ' -lsc -O gazetteer --hstore'; - $osm2pgsql .= ' -C '.$iCacheMemory; + $osm2pgsql .= ' -C 16000'; $osm2pgsql .= ' -d '.$aDSNInfo['database'].' '.$aCMDResult['osm-file']; passthruCheckReturn($osm2pgsql); diff --git a/utils/specialphrases.php b/utils/specialphrases.php index 4381bdfa..94aa74d8 100755 --- a/utils/specialphrases.php +++ b/utils/specialphrases.php @@ -91,17 +91,18 @@ if ($aPair[0] == 'highway') continue; if ($aPair[1] == 'highway') continue; + echo "drop table if exists place_classtype_".pg_escape_string($aPair[0])."_".pg_escape_string($aPair[1]).";\n"; echo "create table place_classtype_".pg_escape_string($aPair[0])."_".pg_escape_string($aPair[1])." as "; echo "select place_id as place_id,st_centroid(geometry) as centroid from placex where "; echo "class = '".pg_escape_string($aPair[0])."' and type = '".pg_escape_string($aPair[1])."';\n"; echo "CREATE INDEX idx_place_classtype_".pg_escape_string($aPair[0])."_".pg_escape_string($aPair[1])."_centroid "; - echo "ON place_classtype_".pg_escape_string($aPair[0])."_".pg_escape_string($aPair[1])." USING GIST (centroid);\n"; + echo "ON place_classtype_".pg_escape_string($aPair[0])."_".pg_escape_string($aPair[1])." USING GIST (centroid) tablespace ssd;\n"; echo "CREATE INDEX idx_place_classtype_".pg_escape_string($aPair[0])."_".pg_escape_string($aPair[1])."_place_id "; - echo "ON place_classtype_".pg_escape_string($aPair[0])."_".pg_escape_string($aPair[1])." USING btree(place_id);\n"; + echo "ON place_classtype_".pg_escape_string($aPair[0])."_".pg_escape_string($aPair[1])." USING btree(place_id) tablespace ssd;\n"; - echo "GRANT SELECT ON place_classtype_".pg_escape_string($aPair[0])."_".pg_escape_string($aPair[1])." TO \"www-data\";"; + echo "GRANT SELECT ON place_classtype_".pg_escape_string($aPair[0])."_".pg_escape_string($aPair[1])." TO \"www-data\";\n"; } diff --git a/website/reverse.php b/website/reverse.php index c155fa57..810be3c4 100755 --- a/website/reverse.php +++ b/website/reverse.php @@ -2,6 +2,22 @@ require_once(dirname(dirname(__FILE__)).'/lib/init-website.php'); require_once(CONST_BasePath.'/lib/log.php'); + if (preg_match(CONST_BlockedUserAgents, $_SERVER["HTTP_USER_AGENT"]) > 0) + { + $fLoadAvg = getLoadAverage(); + if ($fLoadAvg >= CONST_BlockReverseMaxLoad) { + header('HTTP/1.0 403 Forbidden'); + header('Content-type: text/html; charset=utf-8'); + echo "

App temporarily blocked

"; + echo "Your application has been temporarily blocked from the OpenStreetMap Nominatim "; + echo "geolocation service due to high server load."; + echo "\n\n"; + exit; + } + + } + + if (strpos(CONST_BulkUserIPs, ','.$_SERVER["REMOTE_ADDR"].',') !== false) { $fLoadAvg = getLoadAverage(); diff --git a/website/search.php b/website/search.php index 5e2ff371..cc73513e 100755 --- a/website/search.php +++ b/website/search.php @@ -310,7 +310,12 @@ // Check which tokens we have, get the ID numbers $sSQL = 'select word_id,word_token, word, class, type, location, country_code, operator'; $sSQL .= ' from word where word_token in ('.join(',',array_map("getDBQuoted",$aTokens)).')'; - $sSQL .= ' and (class is null or class not in (\'highway\'))'; + // HACK WARNING + // (mis)using search_name_count to exclude words that return too many + // search results. saerch_name_count is currently set to 1 by hand + // because there is no fast way to extract this count from a live database. + $sSQL .= ' and search_name_count = 0'; +// $sSQL .= ' and (class is null or class not in (\'highway\'))'; // $sSQL .= ' group by word_token, word, class, type, location, country_code'; if (CONST_Debug) var_Dump($sSQL); @@ -789,11 +794,16 @@ $sSQL .= " limit ".$iLimit; if (CONST_Debug) var_dump($sSQL); + $iStartTime = time(); $aViewBoxPlaceIDs = $oDB->getAll($sSQL); if (PEAR::IsError($aViewBoxPlaceIDs)) { failInternalError("Could not get places for search terms.", $sSQL, $aViewBoxPlaceIDs); } + if (time() - $iStartTime > 60) { + file_put_contents(CONST_BasePath.'/log/long_queries.log', date('Y-m-d H:i:s', $iStartTime).' '.$sSQL."\n", FILE_APPEND); + } + //var_dump($aViewBoxPlaceIDs); // Did we have an viewbox matches? $aPlaceIDs = array();