]> git.openstreetmap.org Git - nominatim.git/blobdiff - lib/Geocode.php
move initial search setup to new class type
[nominatim.git] / lib / Geocode.php
index a7558bea6ff2bac687b6cb9813db26aa3e95a0fd..88a969a54b93a6e324703e01fc4e033af5abb403 100644 (file)
@@ -709,6 +709,8 @@ class Geocode
 
              Score how good the search is so they can be ordered
          */
+        $iGlobalRank = 0;
+
         foreach ($aPhrases as $iPhrase => $aPhrase) {
             $aNewPhraseSearches = array();
             if ($bStructuredPhrases) $sPhraseType = $aPhraseTypes[$iPhrase];
@@ -732,6 +734,8 @@ class Geocode
 
                         // If the token is valid
                         if (isset($aValidTokens[' '.$sToken])) {
+                            // TODO variable should go into aCurrentSearch
+                            $bHavePostcode = false;
                             foreach ($aValidTokens[' '.$sToken] as $aSearchTerm) {
                                 $aSearch = $aCurrentSearch;
                                 $aSearch['iSearchRank']++;
@@ -743,19 +747,25 @@ class Geocode
                                             $aSearch['iSearchRank'] += 5;
                                         }
                                         if ($aSearch['iSearchRank'] < $this->iMaxRank) $aNewWordsetSearches[] = $aSearch;
+                                        // If it is at the beginning, we can be almost sure that this is the wrong order
+                                        // Increase score for all searches.
+                                        if ($iToken == 0 && $iPhrase == 0) {
+                                            $iGlobalRank++;
+                                        }
                                     }
-                                } elseif ($sPhraseType == 'postalcode' || ($aSearchTerm['class'] == 'place' && $aSearchTerm['type'] == 'postcode')) {
+                                } elseif (($sPhraseType == '' || $sPhraseType == 'postalcode') && $aSearchTerm['class'] == 'place' && $aSearchTerm['type'] == 'postcode') {
                                     // We need to try the case where the postal code is the primary element (i.e. no way to tell if it is (postalcode, city) OR (city, postalcode) so try both
-                                    if ($aSearch['sPostcode'] === '' && $aSearch['sHouseNumber'] === '' &&
+                                    if ($aSearch['sPostcode'] === '' &&
                                         isset($aSearchTerm['word']) && $aSearchTerm['word'] && strpos($sNormQuery, $this->normTerm($aSearchTerm['word'])) !== false) {
                                         // If we have structured search or this is the first term,
                                         // make the postcode the primary search element.
-                                        if ($aSearch['sOperator'] === '' && ($sPhraseType == 'postalcode' || ($iToken == 0 && $iPhrase == 0))) {
+                                        if (!$bHavePostcode && $aSearch['sOperator'] === '' && ($sPhraseType == 'postalcode' || ($iToken == 0 && $iPhrase == 0))) {
                                             $aNewSearch = $aSearch;
                                             $aNewSearch['sOperator'] = 'postcode';
                                             $aNewSearch['aAddress'] = array_merge($aNewSearch['aAddress'], $aNewSearch['aName']);
-                                            $aNewSearch['aName'][$aSearchTerm['word_id']] = $aSearchTerm['word'];
+                                            $aNewSearch['aName'] = array($aSearchTerm['word_id'] => $aSearchTerm['word']);
                                             if ($aSearch['iSearchRank'] < $this->iMaxRank) $aNewWordsetSearches[] = $aNewSearch;
+                                            $bHavePostcode = true;
                                         }
 
                                         // If we have a structured search or this is not the first term,
@@ -771,8 +781,8 @@ class Geocode
                                         // sanity check: if the housenumber is not mainly made
                                         // up of numbers, add a penalty
                                         if (preg_match_all("/[^0-9]/", $sToken, $aMatches) > 2) $aSearch['iSearchRank']++;
-                                        // also housenumbers should appear in the first or second phrase
-                                        if ($iPhrase > 1) $aSearch['iSearchRank'] += 1;
+                                        // also must not appear in the middle of the address
+                                        if ($aSearch['aAddress'] || $aSearch['aAddressNonSearch']) $aSearch['iSearchRank'] += 1;
                                         if ($aSearch['iSearchRank'] < $this->iMaxRank) $aNewWordsetSearches[] = $aSearch;
                                         /*
                                         // Fall back to not searching for this item (better than nothing)
@@ -849,7 +859,8 @@ class Geocode
                                         }
                                     }
 
-                                    if (!sizeof($aCurrentSearch['aName']) || $aCurrentSearch['iNamePhrase'] == $iPhrase) {
+                                    if ((!$aCurrentSearch['sPostcode'] && !$aCurrentSearch['aAddress'] && !$aCurrentSearch['aAddressNonSearch'])
+                                        && (!sizeof($aCurrentSearch['aName']) || $aCurrentSearch['iNamePhrase'] == $iPhrase)) {
                                         $aSearch = $aCurrentSearch;
                                         $aSearch['iSearchRank'] += 1;
                                         if (!sizeof($aCurrentSearch['aName'])) $aSearch['iSearchRank'] += 1;
@@ -911,7 +922,7 @@ class Geocode
             //if (CONST_Debug) _debugDumpGroupedSearches($aGroupedSearches, $aValidTokens);
         }
 
-        // Revisit searches, giving penalty to unlikely combinations
+        // Revisit searches, drop bad searches and give penalty to unlikely combinations.
         $aGroupedSearches = array();
         foreach ($aSearches as $aSearch) {
             if (!$aSearch['aName']) {
@@ -919,6 +930,12 @@ class Geocode
                     continue;
                 }
             }
+            if ($this->aCountryCodes && $aSearch['sCountryCode']
+                && !in_array($aSearch['sCountryCode'], $this->aCountryCodes)) {
+                continue;
+            }
+
+            $aSearch['iSearchRank'] += $iGlobalRank;
             $aGroupedSearches[$aSearch['iSearchRank']][] = $aSearch;
         }
         ksort($aGroupedSearches);
@@ -1006,76 +1023,59 @@ class Geocode
 
         $aSearchResults = array();
         if ($sQuery || $this->aStructuredQuery) {
-            // Start with a blank search
-            $aSearches = array(
-                          array(
-                           'iSearchRank' => 0,
-                           'iNamePhrase' => -1,
-                           'sCountryCode' => false,
-                           'aName' => array(),
-                           'aAddress' => array(),
-                           'aFullNameAddress' => array(),
-                           'aNameNonSearch' => array(),
-                           'aAddressNonSearch' => array(),
-                           'sOperator' => '',
-                           'aFeatureName' => array(),
-                           'sClass' => '',
-                           'sType' => '',
-                           'sHouseNumber' => '',
-                           'sPostcode' => '',
-                           'oNear' => $oNearPoint
-                          )
-                         );
-
-            // Any 'special' terms in the search?
-            $bSpecialTerms = false;
-            preg_match_all('/\\[([\\w_]*)=([\\w_]*)\\]/', $sQuery, $aSpecialTermsRaw, PREG_SET_ORDER);
-            foreach ($aSpecialTermsRaw as $aSpecialTerm) {
-                $sQuery = str_replace($aSpecialTerm[0], ' ', $sQuery);
-                if (!$bSpecialTerms) {
-                    $aNewSearches = array();
-                    foreach ($aSearches as $aSearch) {
-                        $aNewSearch = $aSearch;
-                        $aNewSearch['sClass'] = $aSpecialTerm[1];
-                        $aNewSearch['sType'] = $aSpecialTerm[2];
-                        $aNewSearches[] = $aNewSearch;
-                    }
+            // Start with a single blank search
+            $aSearches = array(new SearchDescription());
 
-                    $aSearches = $aNewSearches;
-                    $bSpecialTerms = true;
-                }
+            if ($oNearPoint) {
+                $aSearches[0]->setNear($oNearPoint);
             }
 
-            preg_match_all('/\\[([\\w ]*)\\]/u', $sQuery, $aSpecialTermsRaw, PREG_SET_ORDER);
-            if (isset($this->aStructuredQuery['amenity']) && $this->aStructuredQuery['amenity']) {
-                $aSpecialTermsRaw[] = array('['.$this->aStructuredQuery['amenity'].']', $this->aStructuredQuery['amenity']);
-                unset($this->aStructuredQuery['amenity']);
+            if ($sQuery) {
+                $sQuery = $aSearches[0]->extractKeyValuePairs($sQuery);
             }
 
-            foreach ($aSpecialTermsRaw as $aSpecialTerm) {
-                $sQuery = str_replace($aSpecialTerm[0], ' ', $sQuery);
-                if ($bSpecialTerms) {
-                    continue;
+            $sSpecialTerm = '';
+            if ($sQuery) {
+                preg_match_all(
+                    '/\\[([\\w ]*)\\]/u',
+                    $sQuery,
+                    $aSpecialTermsRaw,
+                    PREG_SET_ORDER
+                );
+                foreach ($aSpecialTermsRaw as $aSpecialTerm) {
+                    $sQuery = str_replace($aSpecialTerm[0], ' ', $sQuery);
+                    if (!$sSpecialTerm) {
+                        $sSpecialTerm = $aSpecialTerm[1];
+                    }
                 }
+            }
+            if (!$sSpecialTerm && $this->aStructuredQuery
+                && isset($this->aStructuredQuery['amenity'])) {
+                $sSpecialTerm = $this->aStructuredQuery['amenity'];
+                unset($this->aStructuredQuery['amenity']);
+            }
 
-                $sToken = chksql($this->oDB->getOne("SELECT make_standard_name('".$aSpecialTerm[1]."') AS string"));
-                $sSQL = 'SELECT * ';
-                $sSQL .= 'FROM ( ';
-                $sSQL .= '   SELECT word_id, word_token, word, class, type, country_code, operator';
-                $sSQL .= '   FROM word ';
+            if ($sSpecialTerm && !$aSearches[0]->hasOperator()) {
+                $sSpecialTerm = pg_escape_string($sSpecialTerm);
+                $sToken = chksql(
+                    $this->oDB->getOne("SELECT make_standard_name('$sSpecialTerm')"),
+                    "Cannot decode query. Wrong encoding?"
+                );
+                $sSQL = 'SELECT class, type FROM word ';
                 $sSQL .= '   WHERE word_token in (\' '.$sToken.'\')';
-                $sSQL .= ') AS x ';
-                $sSQL .= ' WHERE (class is not null AND class not in (\'place\'))';
+                $sSQL .= '   AND class is not null AND class not in (\'place\')';
                 if (CONST_Debug) var_Dump($sSQL);
                 $aSearchWords = chksql($this->oDB->getAll($sSQL));
                 $aNewSearches = array();
-                foreach ($aSearches as $aSearch) {
+                foreach ($aSearches as $oSearch) {
                     foreach ($aSearchWords as $aSearchTerm) {
-                        $aNewSearch = $aSearch;
-                        $aNewSearch['sClass'] = $aSearchTerm['class'];
-                        $aNewSearch['sType'] = $aSearchTerm['type'];
-                        $aNewSearches[] = $aNewSearch;
-                        $bSpecialTerms = true;
+                        $oNewSearch = clone $oSearch;
+                        $oNewSearch->setPoiSearch(
+                            Operator::TYPE,
+                            $aSearchTerm['class'],
+                            $aSearchTerm['type'],
+                        );
+                        $aNewSearches[] = $oNewSearch;
                     }
                 }
                 $aSearches = $aNewSearches;
@@ -1195,10 +1195,10 @@ class Geocode
 
                     foreach ($aGroupedSearches as $aSearches) {
                         foreach ($aSearches as $aSearch) {
-                            if ($aSearch['iSearchRank'] < $this->iMaxRank) {
-                                if (!isset($aReverseGroupedSearches[$aSearch['iSearchRank']])) $aReverseGroupedSearches[$aSearch['iSearchRank']] = array();
-                                $aReverseGroupedSearches[$aSearch['iSearchRank']][] = $aSearch;
+                            if (!isset($aReverseGroupedSearches[$aSearch->getRank()])) {
+                                $aReverseGroupedSearches[$aSearch->getRank()] = array();
                             }
+                            $aReverseGroupedSearches[$aSearch->getRank()][] = $aSearch;
                         }
                     }
 
@@ -1209,38 +1209,9 @@ class Geocode
                 // Re-group the searches by their score, junk anything over 20 as just not worth trying
                 $aGroupedSearches = array();
                 foreach ($aSearches as $aSearch) {
-                    if ($aSearch['iSearchRank'] < $this->iMaxRank) {
-                        if (!isset($aGroupedSearches[$aSearch['iSearchRank']])) $aGroupedSearches[$aSearch['iSearchRank']] = array();
-                        $aGroupedSearches[$aSearch['iSearchRank']][] = $aSearch;
-                    }
-                }
-                ksort($aGroupedSearches);
-            }
-
-            if (CONST_Search_TryDroppedAddressTerms && sizeof($this->aStructuredQuery) > 0) {
-                $aCopyGroupedSearches = $aGroupedSearches;
-                foreach ($aCopyGroupedSearches as $iGroup => $aSearches) {
-                    foreach ($aSearches as $iSearch => $aSearch) {
-                        $aReductionsList = array($aSearch['aAddress']);
-                        $iSearchRank = $aSearch['iSearchRank'];
-                        while (sizeof($aReductionsList) > 0) {
-                            $iSearchRank += 5;
-                            if ($iSearchRank > iMaxRank) break 3;
-                            $aNewReductionsList = array();
-                            foreach ($aReductionsList as $aReductionsWordList) {
-                                for ($iReductionWord = 0; $iReductionWord < sizeof($aReductionsWordList); $iReductionWord++) {
-                                    $aReductionsWordListResult = array_merge(array_slice($aReductionsWordList, 0, $iReductionWord), array_slice($aReductionsWordList, $iReductionWord+1));
-                                    $aReverseSearch = $aSearch;
-                                    $aSearch['aAddress'] = $aReductionsWordListResult;
-                                    $aSearch['iSearchRank'] = $iSearchRank;
-                                    $aGroupedSearches[$iSearchRank][] = $aReverseSearch;
-                                    if (sizeof($aReductionsWordListResult) > 0) {
-                                        $aNewReductionsList[] = $aReductionsWordListResult;
-                                    }
-                                }
-                            }
-                            $aReductionsList = $aNewReductionsList;
-                        }
+                    if ($aSearch->getRank() < $this->iMaxRank) {
+                        if (!isset($aGroupedSearches[$aSearch->getRank()])) $aGroupedSearches[$aSearch->getRank()] = array();
+                        $aGroupedSearches[$aSearch->getRank()][] = $aSearch;
                     }
                 }
                 ksort($aGroupedSearches);
@@ -1273,10 +1244,6 @@ class Geocode
                     if (CONST_Debug) echo "<hr><b>Search Loop, group $iGroupLoop, loop $iQueryLoop</b>";
                     if (CONST_Debug) _debugDumpGroupedSearches(array($iGroupedRank => array($aSearch)), $aValidTokens);
 
-                    if ($sCountryCodesSQL && $aSearch['sCountryCode'] && !in_array($aSearch['sCountryCode'], $this->aCountryCodes)) {
-                        continue;
-                    }
-
                     // No location term?
                     if (!sizeof($aSearch['aName']) && !sizeof($aSearch['aAddress'])) {
                         if ($aSearch['sCountryCode'] && !$aSearch['sClass'] && !$aSearch['sHouseNumber'] && !$aSearch['oNear']) {