]> git.openstreetmap.org Git - nominatim.git/commitdiff
Use GB postcode table as definitive source. resort by pressence of search word in...
authorBrian Quinion <openstreetmap@brian.quinion.co.uk>
Thu, 22 Mar 2012 00:33:28 +0000 (00:33 +0000)
committerBrian Quinion <brian.quinion@mapquest.com>
Thu, 22 Mar 2012 00:34:56 +0000 (00:34 +0000)
lib/lib.php
lib/log.php
lib/template/details-html.php
nominatim/import.c
nominatim/index.c
settings/settings.php
sql/functions.sql
sql/tables.sql
utils/setup.php
utils/specialphrases.php
website/search.php

index a7c805f7ddc274c55bef435423a8bf949421d480..069f3e2535da491f7959c92606f41ab29a307a8c 100644 (file)
@@ -40,9 +40,9 @@
 
        function byImportance($a, $b)
        {
-/*
                if ($a['importance'] != $b['importance'])
                        return ($a['importance'] > $b['importance']?-1:1);
+/*
                if ($a['aPointPolygon']['numfeatures'] != $b['aPointPolygon']['numfeatures'])
                        return ($a['aPointPolygon']['numfeatures'] > $b['aPointPolygon']['numfeatures']?-1:1);
                if ($a['aPointPolygon']['area'] != $b['aPointPolygon']['area'])
                        exit;
                }
                
-
                if (sizeof($aNearPostcodes))
                {
                        return array(array('lat' => $aNearPostcodes[0]['lat'], 'lon' => $aNearPostcodes[0]['lon'], 'radius' => 0.005));
                }
 
                return false;
-
-               /* partial search disabled because it sequentially scans placex
-               
-               $sSQL = 'select substring(upper(postcode) from \'^[A-Z][A-Z]?[0-9][0-9A-Z]? [0-9]([A-Z][A-Z])$\'),ST_X(ST_Centroid(geometry)) as lon,ST_Y(ST_Centroid(geometry)) as lat from placex where country_code::text = \'gb\'::text AND substring(postcode from \'^([A-Z][A-Z]?[0-9][0-9A-Z]? [0-9])[A-Z][A-Z]$\') = \''.$sPostcodeSector.'\' and class=\'place\' and type=\'postcode\' ';
-               $sSQL .= ' union ';
-               $sSQL .= 'select substring(upper(postcode) from \'^[A-Z][A-Z]?[0-9][0-9A-Z]? [0-9]([A-Z][A-Z])$\'),ST_X(ST_Centroid(geometry)) as lon,ST_Y(ST_Centroid(geometry)) as lat from gb_postcode where substring(postcode from \'^([A-Z][A-Z]?[0-9][0-9A-Z]? [0-9])[A-Z][A-Z]$\') = \''.$sPostcodeSector.'\'';
-               $aNearPostcodes = $oDB->getAll($sSQL);
-               if (PEAR::IsError($aNearPostcodes))
-               {
-                       var_dump($sSQL, $aNearPostcodes);
-                       exit;
-               }
-
-               if (!sizeof($aNearPostcodes))
-               {
-                       return false;
-               }
-
-               $fTotalLat = 0;
-               $fTotalLon = 0;
-               $fTotalFac = 0;
-               foreach($aNearPostcodes as $aPostcode)
-               {
-                       $iDiff = gbPostcodeAlphaDifference($sPostcodeEnd, $aPostcode['substring'])*2 + 1;
-                       if ($iDiff == 0)
-                               $fFac = 1;
-                       else
-                               $fFac = 1/($iDiff*$iDiff);
-                       
-                       $fTotalFac += $fFac;
-                       $fTotalLat += $aPostcode['lat'] * $fFac;
-                       $fTotalLon += $aPostcode['lon'] * $fFac;
-               }
-               if ($fTotalFac)
-               {
-                       $fLat = $fTotalLat / $fTotalFac;
-                       $fLon = $fTotalLon / $fTotalFac;
-                       $fRadius = min(0.1 / $fTotalFac, 0.02);
-                       return array(array('lat' => $fLat, 'lon' => $fLon, 'radius' => $fRadius));
-               }
-               return false;
-               */
-               /*
-                       $fTotalFac is a suprisingly good indicator of accuracy
-                       $iZoom = 18 + round(log($fTotalFac,32));
-                       $iZoom = max(13,min(18,$iZoom));
-               */
        }
 
        function usPostcodeCalculate($sPostcode, &$oDB)
index 93ad8f95f658e0b63af6b05f50322d38a8dcd6cb..e3126c0ebdf3a3532179f46191035e78297b19e6 100644 (file)
@@ -26,7 +26,8 @@
                        $oDB->query($sSQL);
                }
 
-               if (CONST_Log_File && CONST_Log_File_ReverseLog != '') {
+               if (CONST_Log_File && CONST_Log_File_ReverseLog != '')
+               {
                 if ($sType == 'reverse')
                 {
                         $aStartTime = explode('.',$hLog[0]);
@@ -68,7 +69,8 @@
                        $oDB->query($sSQL);
                }
 
-               if (CONST_Log_File && CONST_Log_File_SearchLog != '') {
+               if (CONST_Log_File && CONST_Log_File_SearchLog != '')
+               {
                 $aStartTime = explode('.',$hLog[0]);
                 file_put_contents(CONST_Log_File_SearchLog,
                                 $aStartTime[0].','.$aStartTime[1].','.
index 9124d594c4c56a82a476189147512ba6e43cfac0..aa27664b307eed944e45637673a589c788d63b34 100644 (file)
@@ -74,11 +74,12 @@ body {
                        var proj_map = map.getProjectionObject();
                        var latlon;
 <?php
-if (isset($aPolyPoints)) {
-foreach($aPolyPoints as $aPolyPoint)
+if (isset($aPolyPoints))
 {
-       echo "                        pointList.push(new OpenLayers.Geometry.Point(".$aPolyPoint[1].",".$aPolyPoint[2]."));\n";
-}
+       foreach($aPolyPoints as $aPolyPoint)
+       {
+               echo "                        pointList.push(new OpenLayers.Geometry.Point(".$aPolyPoint[1].",".$aPolyPoint[2]."));\n";
+       }
 }
 ?>
                        var linearRing = new OpenLayers.Geometry.LinearRing(pointList).transform(proj_EPSG4326, proj_map);;
index 95c742ff7ce77e7110ab30bf9d77a2fad9409c63..b9341608a42346884ccb89aeb179805dc4a89c45 100644 (file)
@@ -515,15 +515,18 @@ void EndElement(xmlTextReaderPtr reader, const xmlChar *name)
                 // insert into place_address
                 paramValues[0] = (const char *)place_id;
                 paramValues[1] = (const char *)featureAddress[i].distance;
+                if (paramValues[1] == NULL || strlen(paramValues[1]) == 0) paramValues[1] = "0";
                 paramValues[2] = (const char *)featureAddress[i].type;
                 paramValues[3] = (const char *)featureAddress[i].id;
                 paramValues[4] = (const char *)featureAddress[i].key;
                 paramValues[5] = (const char *)featureAddress[i].value;
                 paramValues[6] = (const char *)featureAddress[i].isAddress;
+                if (verbose) fprintf(stderr, "placex_insert: %s %s\n", paramValues[2], paramValues[3]);
                 res = PQexecPrepared(conn, "place_addressline_insert", 7, paramValues, NULL, NULL, 0);
                 if (PQresultStatus(res) != PGRES_COMMAND_OK)
                 {
                     fprintf(stderr, "place_addressline_insert: INSERT failed: %s", PQerrorMessage(conn));
+                    fprintf(stderr, "(%s,%s,%s,%s,%s,%s,%s)",paramValues[0],paramValues[1],paramValues[2],paramValues[3],paramValues[4],paramValues[5],paramValues[6]);
                     PQclear(res);
                     exit(EXIT_FAILURE);
                 }
index 3f161cc3ad3a90718d1280210ad5308633d073d6..368fd8a5458c11bbcd592b8e81fc6cb4d47042d4 100644 (file)
@@ -270,7 +270,7 @@ void nominatim_index(int rank_min, int rank_max, int num_threads, const char *co
                         usleep(1000);
 
                         // Aim for one update per second
-                        if (sleepcount++ > 2000)
+                        if (sleepcount++ > 500)
                         {
                             rankPerSecond = ((float)rankCountTuples + (float)count) / MAX(difftime(time(0), rankStartTime),1);
                             fprintf(stderr, "  Done %i in %i @ %f per second - Rank %i ETA (seconds): %f\n", (rankCountTuples + count), (int)(difftime(time(0), rankStartTime)), rankPerSecond, rank, ((float)(rankTotalTuples - (rankCountTuples + count)))/rankPerSecond);
index 591af1c697891f37887e4f5e430bc314bcb39089..66d7bed625e8b8fc61913d9ccc96c4bb0d88448d 100644 (file)
@@ -10,8 +10,8 @@
        @define('CONST_Postgresql_Version', '9.1');
        @define('CONST_Path_Postgresql_Contrib', '/usr/share/postgresql/'.CONST_Postgresql_Version.'/contrib');
        @define('CONST_Path_Postgresql_Postgis', CONST_Path_Postgresql_Contrib.'/postgis-1.5');
-    @define('CONST_Osm2pgsql_Binary', CONST_BasePath.'/osm2pgsql/osm2pgsql');
-    @define('CONST_Osmosis_Binary', CONST_BasePath.'/osmosis-0.38/bin/osmosis');
+       @define('CONST_Osm2pgsql_Binary', CONST_BasePath.'/osm2pgsql/osm2pgsql');
+       @define('CONST_Osmosis_Binary', CONST_BasePath.'/osmosis-0.38/bin/osmosis');
 
        // Website settings
        @define('CONST_ClosedForIndexing', false);
index 6ad13892c6341646606a9166e60fe301424b05d4..9a6131524f44ff8b69fd753de3b74c6f769d07c7 100644 (file)
@@ -940,7 +940,50 @@ BEGIN
     NEW.rank_address := NEW.rank_search;
 
     -- By doing in postgres we have the country available to us - currently only used for postcode
-    IF NEW.class = 'place' THEN
+    IF NEW.class in ('place','boundary') AND NEW.type in ('postcode','postal_code') THEN
+
+        NEW.name := 'ref'=>NEW.postcode;
+
+        IF NEW.country_code = 'gb' THEN
+
+          IF NEW.postcode ~ '^([A-Z][A-Z]?[0-9][0-9A-Z]? [0-9][A-Z][A-Z])$' THEN
+            NEW.rank_search := 25;
+            NEW.rank_address := 5;
+          ELSEIF NEW.postcode ~ '^([A-Z][A-Z]?[0-9][0-9A-Z]? [0-9])$' THEN
+            NEW.rank_search := 23;
+            NEW.rank_address := 5;
+          ELSEIF NEW.postcode ~ '^([A-Z][A-Z]?[0-9][0-9A-Z])$' THEN
+            NEW.rank_search := 21;
+            NEW.rank_address := 5;
+          END IF;
+
+        ELSEIF NEW.country_code = 'de' THEN
+
+          IF NEW.postcode ~ '^([0-9]{5})$' THEN
+            NEW.rank_search := 21;
+            NEW.rank_address := 11;
+          END IF;
+
+        ELSE
+          -- Guess at the postcode format and coverage (!)
+          IF upper(NEW.postcode) ~ '^[A-Z0-9]{1,5}$' THEN -- Probably too short to be very local
+            NEW.rank_search := 21;
+            NEW.rank_address := 11;
+          ELSE
+            -- Does it look splitable into and area and local code?
+            postcode := substring(upper(NEW.postcode) from '^([- :A-Z0-9]+)([- :][A-Z0-9]+)$');
+
+            IF postcode IS NOT NULL THEN
+              NEW.rank_search := 25;
+              NEW.rank_address := 11;
+            ELSEIF NEW.postcode ~ '^[- :A-Z0-9]{6,}$' THEN
+              NEW.rank_search := 21;
+              NEW.rank_address := 11;
+            END IF;
+          END IF;
+        END IF;
+
+    ELSEIF NEW.class = 'place' THEN
       IF NEW.type in ('continent') THEN
         NEW.rank_search := 2;
         NEW.rank_address := NEW.rank_search;
@@ -992,49 +1035,6 @@ BEGIN
       ELSEIF NEW.type in ('hall_of_residence','neighbourhood','housing_estate','nature_reserve') THEN
         NEW.rank_search := 22;
         NEW.rank_address := 22;
-      ELSEIF NEW.type in ('postcode') THEN
-
-        NEW.name := 'ref'=>NEW.postcode;
-
-        IF NEW.country_code = 'gb' THEN
-
-          IF NEW.postcode ~ '^([A-Z][A-Z]?[0-9][0-9A-Z]? [0-9][A-Z][A-Z])$' THEN
-            NEW.rank_search := 25;
-            NEW.rank_address := 5;
-          ELSEIF NEW.postcode ~ '^([A-Z][A-Z]?[0-9][0-9A-Z]? [0-9])$' THEN
-            NEW.rank_search := 23;
-            NEW.rank_address := 5;
-          ELSEIF NEW.postcode ~ '^([A-Z][A-Z]?[0-9][0-9A-Z])$' THEN
-            NEW.rank_search := 21;
-            NEW.rank_address := 5;
-          END IF;
-
-        ELSEIF NEW.country_code = 'de' THEN
-
-          IF NEW.postcode ~ '^([0-9]{5})$' THEN
-            NEW.rank_search := 21;
-            NEW.rank_address := 11;
-          END IF;
-
-        ELSE
-          -- Guess at the postcode format and coverage (!)
-          IF upper(NEW.postcode) ~ '^[A-Z0-9]{1,5}$' THEN -- Probably too short to be very local
-            NEW.rank_search := 21;
-            NEW.rank_address := 11;
-          ELSE
-            -- Does it look splitable into and area and local code?
-            postcode := substring(upper(NEW.postcode) from '^([- :A-Z0-9]+)([- :][A-Z0-9]+)$');
-
-            IF postcode IS NOT NULL THEN
-              NEW.rank_search := 25;
-              NEW.rank_address := 11;
-            ELSEIF NEW.postcode ~ '^[- :A-Z0-9]{6,}$' THEN
-              NEW.rank_search := 21;
-              NEW.rank_address := 11;
-            END IF;
-          END IF;
-        END IF;
-
       ELSEIF NEW.type in ('airport','street') THEN
         NEW.rank_search := 26;
         NEW.rank_address := NEW.rank_search;
@@ -1115,7 +1115,8 @@ BEGIN
     IF st_area(NEW.geometry) < 1 THEN
       -- mark items within the geometry for re-indexing
 --    RAISE WARNING 'placex poly insert: % % % %',NEW.osm_type,NEW.osm_id,NEW.class,NEW.type;
--- work around bug in postgis
+
+      -- work around bug in postgis, this may have been fixed in 2.0.0 (see http://trac.osgeo.org/postgis/ticket/547)
       update placex set indexed_status = 2 where (ST_Contains(NEW.geometry, placex.geometry) OR ST_Intersects(NEW.geometry, placex.geometry)) 
        AND rank_search > NEW.rank_search and indexed_status = 0 and ST_geometrytype(placex.geometry) = 'ST_Point';
       update placex set indexed_status = 2 where (ST_Contains(NEW.geometry, placex.geometry) OR ST_Intersects(NEW.geometry, placex.geometry)) 
@@ -1203,6 +1204,7 @@ DECLARE
 
   tagpairid INTEGER;
 
+  default_language TEXT;
   name_vector INTEGER[];
   nameaddress_vector INTEGER[];
 
@@ -1256,6 +1258,19 @@ BEGIN
     -- cheaper but less acurate
     place_centroid := ST_Centroid(NEW.geometry);
 
+    -- Thought this wasn't needed but when we add new languages to the country_name table
+    -- we need to update the existing names
+    IF NEW.name is not null AND array_upper(%#NEW.name,1) > 1 THEN
+      default_language := get_country_language_code(NEW.country_code);
+      IF default_language IS NOT NULL THEN
+        IF NEW.name ? 'name' AND NOT NEW.name ? ('name:'||default_language) THEN
+          NEW.name := NEW.name || (('name:'||default_language) => (NEW.name -> 'name'));
+        ELSEIF NEW.name ? ('name:'||default_language) AND NOT NEW.name ? 'name' THEN
+          NEW.name := NEW.name || ('name' => (NEW.name -> 'name:'||default_language));
+        END IF;
+      END IF;
+    END IF;
+
     -- Initialise the name vector using our name
     name_vector := make_keywords(NEW.name);
     nameaddress_vector := '{}'::int[];
index 19d8c084d530bfd6dae224a97a980d1e1ab9845d..795b927747a99b9ef04418dd48bdd33c0321f2a5 100644 (file)
@@ -212,6 +212,7 @@ CREATE TABLE placex (
   geometry_sector INTEGER
   );
 SELECT AddGeometryColumn('placex', 'geometry', 4326, 'GEOMETRY', 2);
+SELECT AddGeometryColumn('placex', 'centroid', 4326, 'GEOMETRY', 2);
 CREATE UNIQUE INDEX idx_place_id ON placex USING BTREE (place_id);
 CREATE INDEX idx_placex_osmid ON placex USING BTREE (osm_type, osm_id);
 CREATE INDEX idx_placex_rank_search ON placex USING BTREE (rank_search, geometry_sector);
index cf465b15b02a58c1cd7541c646163a7dd853cd5f..94888e4430fa28c98dfc1cd4db5c989c7e1d52e0 100755 (executable)
                if (!pg_query($oDB->connection, $sSQL)) fail(pg_last_error($oDB->connection));
        }
 
-       if ($aCMDResult['osmosis-init'] && isset($aCMDResult['osmosis-init-date']))
+       if (($aCMDResult['osmosis-init'] || $aCMDResult['all']) && isset($aCMDResult['osmosis-init-date']))
        {
                $bDidSomething = true;
 
index 1c3eff5b497be3dd02c10478259829fe2d6885b0..81d240a612c1dc77062023967fff7cfb88ea734a 100755 (executable)
 
                foreach($aPairs as $aPair)
                {
+                       if ($aPair[0] == 'yes') continue;
+                       if ($aPair[1] == 'yes') continue;
+                       if ($aPair[0] == 'highway') continue;
                        if ($aPair[1] == 'highway') continue;
 
                        echo "create table place_classtype_".pg_escape_string($aPair[0])."_".pg_escape_string($aPair[1])." as ";
index d9adf6b849088b4a0eef95245087495cf2396eab..64cec021ee472cb4f484a8f2201dfaa799c78506 100755 (executable)
                                $sToken = $oDB->getOne("select make_standard_name('".$aSpecialTerm[1]."') as string");
                                $sSQL = 'select * from (select word_id,word_token, word, class, type, location, country_code, operator';
                                $sSQL .= ' from word where word_token in (\' '.$sToken.'\')) as x where (class is not null and class not in (\'place\',\'highway\')) or country_code is not null';
+                               if (CONST_Debug) var_Dump($sSQL);
                                $aSearchWords = $oDB->getAll($sSQL);
                                $aNewSearches = array();
                                foreach($aSearches as $aSearch)
                        // Try and calculate GB postcodes we might be missing
                        foreach($aTokens as $sToken)
                        {
-                               if (!isset($aValidTokens[$sToken]) && !isset($aValidTokens[' '.$sToken]) && preg_match('/^([A-Z][A-Z]?[0-9][0-9A-Z]? ?[0-9])([A-Z][A-Z])$/', strtoupper(trim($sToken)), $aData))
+                               // Source of gb postcodes is now definitive - always use
+                               if (preg_match('/^([A-Z][A-Z]?[0-9][0-9A-Z]? ?[0-9])([A-Z][A-Z])$/', strtoupper(trim($sToken)), $aData))
                                {
                                        if (substr($aData[1],-2,1) != ' ')
                                        {
                                
                                Score how good the search is so they can be ordered
                        */
-
                                foreach($aPhrases as $iPhrase => $sPhrase)
                                {
                                        $aNewPhraseSearches = array();
                                                                                                if ($aSearch['iSearchRank'] < $iMaxRank) $aNewWordsetSearches[] = $aSearch;
                                                                                        }
                                                                                }
-                                                                               else
+                                                                               elseif (isset($aSearchTerm['word_id']) && $aSearchTerm['word_id'])
                                                                                {
                                                                                        if (sizeof($aSearch['aName']))
                                                                                        {
                                                                        // Allow searching for a word - but at extra cost
                                                                        foreach($aValidTokens[$sToken] as $aSearchTerm)
                                                                        {
+                                                                               if (isset($aSearchTerm['word_id']) && $aSearchTerm['word_id'])
+                                                                               {
 //var_Dump('<hr>',$aSearch['aName']);
 
                                                                                if (sizeof($aCurrentSearch['aName'])  && strlen($sToken) >= 4)
                                                                                        $aSearch['iNamePhrase'] = $iPhrase;
                                                                                if ($aSearch['iSearchRank'] < $iMaxRank) $aNewWordsetSearches[] = $aSearch;
                                                                                }
+                                                                               }
                                                                        }
                                                                }
                                                                else
                                                        // First we need a position, either aName or fLat or both
                                                        $aTerms = array();
                                                        $aOrder = array();
+
+                                                       // TODO: filter out the pointless search terms (2 letter name tokens and less)
+                                                       // they might be right - but they are just too darned expensive to run
                                                        if (sizeof($aSearch['aName'])) $aTerms[] = "name_vector @> ARRAY[".join($aSearch['aName'],",")."]";
                                                        if (sizeof($aSearch['aAddress']) && $aSearch['aName'] != $aSearch['aAddress']) $aTerms[] = "nameaddress_vector @> ARRAY[".join($aSearch['aAddress'],",")."]";
                                                        if ($aSearch['sCountryCode']) $aTerms[] = "country_code = '".pg_escape_string($aSearch['sCountryCode'])."'";
 //var_Dump($aSearchResults);
 //exit;
        $aClassType = getClassTypesWithImportance();
+       $aRecheckWords = preg_split('/\b/',$sQuery);
+       foreach($aRecheckWords as $i => $sWord)
+       {
+               if (!$sWord) unset($aRecheckWords[$i]);
+       }
        foreach($aSearchResults as $iResNum => $aResult)
        {
                if (CONST_Search_AreaPolygons || true)
 //exit;
                }
 
+               // Adjust importance for the number of exact string matches in the result
+               $aResult['importance'] = max(0.001,$aResult['importance']);
+               $iCountWords = 0;
+               $sAddress = $aResult['langaddress'];
+               foreach($aRecheckWords as $i => $sWord)
+               {
+                       if (stripos($sAddress, $sWord)!==false) $iCountWords++;
+               }
+               $aResult['importance'] = $aResult['importance'] + $iCountWords;
+
 //if (CONST_Debug) var_dump($aResult['class'].':'.$aResult['type'].':'.$aResult['admin_level']);
 /*
                if (isset($aClassType[$aResult['class'].':'.$aResult['type'].':'.$aResult['admin_level']]['importance']) 
                $aResult['foundorder'] = $iResNum;
                $aSearchResults[$iResNum] = $aResult;
        }
-       
        uasort($aSearchResults, 'byImportance');
 
 //var_dump($aSearchResults);exit;