]> git.openstreetmap.org Git - nominatim.git/blobdiff - lib/Geocode.php
drop searches with excluded country codes earlier
[nominatim.git] / lib / Geocode.php
index aef2d3841cc5f4eec9d4e060baf002700e463f1f..a403fa1080c8755e75c9691a6e6ad41dd47e803e 100644 (file)
@@ -709,6 +709,8 @@ class Geocode
 
              Score how good the search is so they can be ordered
          */
+        $iGlobalRank = 0;
+
         foreach ($aPhrases as $iPhrase => $aPhrase) {
             $aNewPhraseSearches = array();
             if ($bStructuredPhrases) $sPhraseType = $aPhraseTypes[$iPhrase];
@@ -732,6 +734,8 @@ class Geocode
 
                         // If the token is valid
                         if (isset($aValidTokens[' '.$sToken])) {
+                            // TODO variable should go into aCurrentSearch
+                            $bHavePostcode = false;
                             foreach ($aValidTokens[' '.$sToken] as $aSearchTerm) {
                                 $aSearch = $aCurrentSearch;
                                 $aSearch['iSearchRank']++;
@@ -743,19 +747,25 @@ class Geocode
                                             $aSearch['iSearchRank'] += 5;
                                         }
                                         if ($aSearch['iSearchRank'] < $this->iMaxRank) $aNewWordsetSearches[] = $aSearch;
+                                        // If it is at the beginning, we can be almost sure that this is the wrong order
+                                        // Increase score for all searches.
+                                        if ($iToken == 0 && $iPhrase == 0) {
+                                            $iGlobalRank++;
+                                        }
                                     }
-                                } elseif ($sPhraseType == 'postalcode' || ($aSearchTerm['class'] == 'place' && $aSearchTerm['type'] == 'postcode')) {
+                                } elseif (($sPhraseType == '' || $sPhraseType == 'postalcode') && $aSearchTerm['class'] == 'place' && $aSearchTerm['type'] == 'postcode') {
                                     // We need to try the case where the postal code is the primary element (i.e. no way to tell if it is (postalcode, city) OR (city, postalcode) so try both
-                                    if ($aSearch['sPostcode'] === '' && $aSearch['sHouseNumber'] === '' &&
+                                    if ($aSearch['sPostcode'] === '' &&
                                         isset($aSearchTerm['word']) && $aSearchTerm['word'] && strpos($sNormQuery, $this->normTerm($aSearchTerm['word'])) !== false) {
                                         // If we have structured search or this is the first term,
                                         // make the postcode the primary search element.
-                                        if ($aSearch['sOperator'] === '' && ($sPhraseType == 'postalcode' || ($iToken == 0 && $iPhrase == 0))) {
+                                        if (!$bHavePostcode && $aSearch['sOperator'] === '' && ($sPhraseType == 'postalcode' || ($iToken == 0 && $iPhrase == 0))) {
                                             $aNewSearch = $aSearch;
                                             $aNewSearch['sOperator'] = 'postcode';
                                             $aNewSearch['aAddress'] = array_merge($aNewSearch['aAddress'], $aNewSearch['aName']);
-                                            $aNewSearch['aName'][$aSearchTerm['word_id']] = $aSearchTerm['word'];
+                                            $aNewSearch['aName'] = array($aSearchTerm['word_id'] => $aSearchTerm['word']);
                                             if ($aSearch['iSearchRank'] < $this->iMaxRank) $aNewWordsetSearches[] = $aNewSearch;
+                                            $bHavePostcode = true;
                                         }
 
                                         // If we have a structured search or this is not the first term,
@@ -771,8 +781,8 @@ class Geocode
                                         // sanity check: if the housenumber is not mainly made
                                         // up of numbers, add a penalty
                                         if (preg_match_all("/[^0-9]/", $sToken, $aMatches) > 2) $aSearch['iSearchRank']++;
-                                        // also housenumbers should appear in the first or second phrase
-                                        if ($iPhrase > 1) $aSearch['iSearchRank'] += 1;
+                                        // also must not appear in the middle of the address
+                                        if ($aSearch['aAddress'] || $aSearch['aAddressNonSearch']) $aSearch['iSearchRank'] += 1;
                                         if ($aSearch['iSearchRank'] < $this->iMaxRank) $aNewWordsetSearches[] = $aSearch;
                                         /*
                                         // Fall back to not searching for this item (better than nothing)
@@ -785,7 +795,7 @@ class Geocode
                                     // require a normalized exact match of the term
                                     // if we have the normalizer version of the query
                                     // available
-                                    if ($aSearch['sClass'] === ''
+                                    if ($aSearch['sOperator'] === ''
                                         && ($sNormQuery === null || !($aSearchTerm['word'] && strpos($sNormQuery, $aSearchTerm['word']) === false))) {
                                         $aSearch['sClass'] = $aSearchTerm['class'];
                                         $aSearch['sType'] = $aSearchTerm['type'];
@@ -849,7 +859,8 @@ class Geocode
                                         }
                                     }
 
-                                    if (!sizeof($aCurrentSearch['aName']) || $aCurrentSearch['iNamePhrase'] == $iPhrase) {
+                                    if ((!$aCurrentSearch['sPostcode'] && !$aCurrentSearch['aAddress'] && !$aCurrentSearch['aAddressNonSearch'])
+                                        && (!sizeof($aCurrentSearch['aName']) || $aCurrentSearch['iNamePhrase'] == $iPhrase)) {
                                         $aSearch = $aCurrentSearch;
                                         $aSearch['iSearchRank'] += 1;
                                         if (!sizeof($aCurrentSearch['aName'])) $aSearch['iSearchRank'] += 1;
@@ -911,7 +922,7 @@ class Geocode
             //if (CONST_Debug) _debugDumpGroupedSearches($aGroupedSearches, $aValidTokens);
         }
 
-        // Revisit searches, giving penalty to unlikely combinations
+        // Revisit searches, drop bad searches and give penalty to unlikely combinations.
         $aGroupedSearches = array();
         foreach ($aSearches as $aSearch) {
             if (!$aSearch['aName']) {
@@ -919,6 +930,12 @@ class Geocode
                     continue;
                 }
             }
+            if ($this->aCountryCodes && $aSearch['sCountryCode']
+                && !in_array($aSearch['sCountryCode'], $this->aCountryCodes)) {
+                continue;
+            }
+
+            $aSearch['iSearchRank'] += $iGlobalRank;
             $aGroupedSearches[$aSearch['iSearchRank']][] = $aSearch;
         }
         ksort($aGroupedSearches);
@@ -1058,7 +1075,7 @@ class Geocode
                     continue;
                 }
 
-                $sToken = chksql($this->oDB->getOne("SELECT make_standard_name('".$aSpecialTerm[1]."') AS string"));
+                $sToken = chksql($this->oDB->getOne("SELECT make_standard_name('".pg_escape_string($aSpecialTerm[1])."') AS string"));
                 $sSQL = 'SELECT * ';
                 $sSQL .= 'FROM ( ';
                 $sSQL .= '   SELECT word_id, word_token, word, class, type, country_code, operator';
@@ -1217,35 +1234,6 @@ class Geocode
                 ksort($aGroupedSearches);
             }
 
-            if (CONST_Search_TryDroppedAddressTerms && sizeof($this->aStructuredQuery) > 0) {
-                $aCopyGroupedSearches = $aGroupedSearches;
-                foreach ($aCopyGroupedSearches as $iGroup => $aSearches) {
-                    foreach ($aSearches as $iSearch => $aSearch) {
-                        $aReductionsList = array($aSearch['aAddress']);
-                        $iSearchRank = $aSearch['iSearchRank'];
-                        while (sizeof($aReductionsList) > 0) {
-                            $iSearchRank += 5;
-                            if ($iSearchRank > iMaxRank) break 3;
-                            $aNewReductionsList = array();
-                            foreach ($aReductionsList as $aReductionsWordList) {
-                                for ($iReductionWord = 0; $iReductionWord < sizeof($aReductionsWordList); $iReductionWord++) {
-                                    $aReductionsWordListResult = array_merge(array_slice($aReductionsWordList, 0, $iReductionWord), array_slice($aReductionsWordList, $iReductionWord+1));
-                                    $aReverseSearch = $aSearch;
-                                    $aSearch['aAddress'] = $aReductionsWordListResult;
-                                    $aSearch['iSearchRank'] = $iSearchRank;
-                                    $aGroupedSearches[$iSearchRank][] = $aReverseSearch;
-                                    if (sizeof($aReductionsWordListResult) > 0) {
-                                        $aNewReductionsList[] = $aReductionsWordListResult;
-                                    }
-                                }
-                            }
-                            $aReductionsList = $aNewReductionsList;
-                        }
-                    }
-                }
-                ksort($aGroupedSearches);
-            }
-
             // Filter out duplicate searches
             $aSearchHash = array();
             foreach ($aGroupedSearches as $iGroup => $aSearches) {
@@ -1273,10 +1261,6 @@ class Geocode
                     if (CONST_Debug) echo "<hr><b>Search Loop, group $iGroupLoop, loop $iQueryLoop</b>";
                     if (CONST_Debug) _debugDumpGroupedSearches(array($iGroupedRank => array($aSearch)), $aValidTokens);
 
-                    if ($sCountryCodesSQL && $aSearch['sCountryCode'] && !in_array($aSearch['sCountryCode'], $this->aCountryCodes)) {
-                        continue;
-                    }
-
                     // No location term?
                     if (!sizeof($aSearch['aName']) && !sizeof($aSearch['aAddress'])) {
                         if ($aSearch['sCountryCode'] && !$aSearch['sClass'] && !$aSearch['sHouseNumber'] && !$aSearch['oNear']) {