X-Git-Url: https://git.openstreetmap.org/nominatim.git/blobdiff_plain/f8d55b5448b6be721a6ea05567930fe244ead7d1..0067555c38cece9cf11a0074ab59456c22c8c97d:/lib/Geocode.php diff --git a/lib/Geocode.php b/lib/Geocode.php index d96aaac2..88a969a5 100644 --- a/lib/Geocode.php +++ b/lib/Geocode.php @@ -709,6 +709,8 @@ class Geocode Score how good the search is so they can be ordered */ + $iGlobalRank = 0; + foreach ($aPhrases as $iPhrase => $aPhrase) { $aNewPhraseSearches = array(); if ($bStructuredPhrases) $sPhraseType = $aPhraseTypes[$iPhrase]; @@ -745,14 +747,19 @@ class Geocode $aSearch['iSearchRank'] += 5; } if ($aSearch['iSearchRank'] < $this->iMaxRank) $aNewWordsetSearches[] = $aSearch; + // If it is at the beginning, we can be almost sure that this is the wrong order + // Increase score for all searches. + if ($iToken == 0 && $iPhrase == 0) { + $iGlobalRank++; + } } } elseif (($sPhraseType == '' || $sPhraseType == 'postalcode') && $aSearchTerm['class'] == 'place' && $aSearchTerm['type'] == 'postcode') { // We need to try the case where the postal code is the primary element (i.e. no way to tell if it is (postalcode, city) OR (city, postalcode) so try both - if (!$bHavePostcode && $aSearch['sPostcode'] === '' && $aSearch['sHouseNumber'] === '' && + if ($aSearch['sPostcode'] === '' && isset($aSearchTerm['word']) && $aSearchTerm['word'] && strpos($sNormQuery, $this->normTerm($aSearchTerm['word'])) !== false) { // If we have structured search or this is the first term, // make the postcode the primary search element. - if ($aSearch['sOperator'] === '' && ($sPhraseType == 'postalcode' || ($iToken == 0 && $iPhrase == 0))) { + if (!$bHavePostcode && $aSearch['sOperator'] === '' && ($sPhraseType == 'postalcode' || ($iToken == 0 && $iPhrase == 0))) { $aNewSearch = $aSearch; $aNewSearch['sOperator'] = 'postcode'; $aNewSearch['aAddress'] = array_merge($aNewSearch['aAddress'], $aNewSearch['aName']); @@ -774,8 +781,8 @@ class Geocode // sanity check: if the housenumber is not mainly made // up of numbers, add a penalty if (preg_match_all("/[^0-9]/", $sToken, $aMatches) > 2) $aSearch['iSearchRank']++; - // also housenumbers should appear in the first or second phrase - if ($iPhrase > 1) $aSearch['iSearchRank'] += 1; + // also must not appear in the middle of the address + if ($aSearch['aAddress'] || $aSearch['aAddressNonSearch']) $aSearch['iSearchRank'] += 1; if ($aSearch['iSearchRank'] < $this->iMaxRank) $aNewWordsetSearches[] = $aSearch; /* // Fall back to not searching for this item (better than nothing) @@ -852,7 +859,8 @@ class Geocode } } - if (!sizeof($aCurrentSearch['aName']) || $aCurrentSearch['iNamePhrase'] == $iPhrase) { + if ((!$aCurrentSearch['sPostcode'] && !$aCurrentSearch['aAddress'] && !$aCurrentSearch['aAddressNonSearch']) + && (!sizeof($aCurrentSearch['aName']) || $aCurrentSearch['iNamePhrase'] == $iPhrase)) { $aSearch = $aCurrentSearch; $aSearch['iSearchRank'] += 1; if (!sizeof($aCurrentSearch['aName'])) $aSearch['iSearchRank'] += 1; @@ -914,7 +922,7 @@ class Geocode //if (CONST_Debug) _debugDumpGroupedSearches($aGroupedSearches, $aValidTokens); } - // Revisit searches, giving penalty to unlikely combinations + // Revisit searches, drop bad searches and give penalty to unlikely combinations. $aGroupedSearches = array(); foreach ($aSearches as $aSearch) { if (!$aSearch['aName']) { @@ -922,6 +930,12 @@ class Geocode continue; } } + if ($this->aCountryCodes && $aSearch['sCountryCode'] + && !in_array($aSearch['sCountryCode'], $this->aCountryCodes)) { + continue; + } + + $aSearch['iSearchRank'] += $iGlobalRank; $aGroupedSearches[$aSearch['iSearchRank']][] = $aSearch; } ksort($aGroupedSearches); @@ -1009,76 +1023,59 @@ class Geocode $aSearchResults = array(); if ($sQuery || $this->aStructuredQuery) { - // Start with a blank search - $aSearches = array( - array( - 'iSearchRank' => 0, - 'iNamePhrase' => -1, - 'sCountryCode' => false, - 'aName' => array(), - 'aAddress' => array(), - 'aFullNameAddress' => array(), - 'aNameNonSearch' => array(), - 'aAddressNonSearch' => array(), - 'sOperator' => '', - 'aFeatureName' => array(), - 'sClass' => '', - 'sType' => '', - 'sHouseNumber' => '', - 'sPostcode' => '', - 'oNear' => $oNearPoint - ) - ); - - // Any 'special' terms in the search? - $bSpecialTerms = false; - preg_match_all('/\\[([\\w_]*)=([\\w_]*)\\]/', $sQuery, $aSpecialTermsRaw, PREG_SET_ORDER); - foreach ($aSpecialTermsRaw as $aSpecialTerm) { - $sQuery = str_replace($aSpecialTerm[0], ' ', $sQuery); - if (!$bSpecialTerms) { - $aNewSearches = array(); - foreach ($aSearches as $aSearch) { - $aNewSearch = $aSearch; - $aNewSearch['sClass'] = $aSpecialTerm[1]; - $aNewSearch['sType'] = $aSpecialTerm[2]; - $aNewSearches[] = $aNewSearch; - } + // Start with a single blank search + $aSearches = array(new SearchDescription()); - $aSearches = $aNewSearches; - $bSpecialTerms = true; - } + if ($oNearPoint) { + $aSearches[0]->setNear($oNearPoint); } - preg_match_all('/\\[([\\w ]*)\\]/u', $sQuery, $aSpecialTermsRaw, PREG_SET_ORDER); - if (isset($this->aStructuredQuery['amenity']) && $this->aStructuredQuery['amenity']) { - $aSpecialTermsRaw[] = array('['.$this->aStructuredQuery['amenity'].']', $this->aStructuredQuery['amenity']); - unset($this->aStructuredQuery['amenity']); + if ($sQuery) { + $sQuery = $aSearches[0]->extractKeyValuePairs($sQuery); } - foreach ($aSpecialTermsRaw as $aSpecialTerm) { - $sQuery = str_replace($aSpecialTerm[0], ' ', $sQuery); - if ($bSpecialTerms) { - continue; + $sSpecialTerm = ''; + if ($sQuery) { + preg_match_all( + '/\\[([\\w ]*)\\]/u', + $sQuery, + $aSpecialTermsRaw, + PREG_SET_ORDER + ); + foreach ($aSpecialTermsRaw as $aSpecialTerm) { + $sQuery = str_replace($aSpecialTerm[0], ' ', $sQuery); + if (!$sSpecialTerm) { + $sSpecialTerm = $aSpecialTerm[1]; + } } + } + if (!$sSpecialTerm && $this->aStructuredQuery + && isset($this->aStructuredQuery['amenity'])) { + $sSpecialTerm = $this->aStructuredQuery['amenity']; + unset($this->aStructuredQuery['amenity']); + } - $sToken = chksql($this->oDB->getOne("SELECT make_standard_name('".pg_escape_string($aSpecialTerm[1])."') AS string")); - $sSQL = 'SELECT * '; - $sSQL .= 'FROM ( '; - $sSQL .= ' SELECT word_id, word_token, word, class, type, country_code, operator'; - $sSQL .= ' FROM word '; + if ($sSpecialTerm && !$aSearches[0]->hasOperator()) { + $sSpecialTerm = pg_escape_string($sSpecialTerm); + $sToken = chksql( + $this->oDB->getOne("SELECT make_standard_name('$sSpecialTerm')"), + "Cannot decode query. Wrong encoding?" + ); + $sSQL = 'SELECT class, type FROM word '; $sSQL .= ' WHERE word_token in (\' '.$sToken.'\')'; - $sSQL .= ') AS x '; - $sSQL .= ' WHERE (class is not null AND class not in (\'place\'))'; + $sSQL .= ' AND class is not null AND class not in (\'place\')'; if (CONST_Debug) var_Dump($sSQL); $aSearchWords = chksql($this->oDB->getAll($sSQL)); $aNewSearches = array(); - foreach ($aSearches as $aSearch) { + foreach ($aSearches as $oSearch) { foreach ($aSearchWords as $aSearchTerm) { - $aNewSearch = $aSearch; - $aNewSearch['sClass'] = $aSearchTerm['class']; - $aNewSearch['sType'] = $aSearchTerm['type']; - $aNewSearches[] = $aNewSearch; - $bSpecialTerms = true; + $oNewSearch = clone $oSearch; + $oNewSearch->setPoiSearch( + Operator::TYPE, + $aSearchTerm['class'], + $aSearchTerm['type'], + ); + $aNewSearches[] = $oNewSearch; } } $aSearches = $aNewSearches; @@ -1198,10 +1195,10 @@ class Geocode foreach ($aGroupedSearches as $aSearches) { foreach ($aSearches as $aSearch) { - if ($aSearch['iSearchRank'] < $this->iMaxRank) { - if (!isset($aReverseGroupedSearches[$aSearch['iSearchRank']])) $aReverseGroupedSearches[$aSearch['iSearchRank']] = array(); - $aReverseGroupedSearches[$aSearch['iSearchRank']][] = $aSearch; + if (!isset($aReverseGroupedSearches[$aSearch->getRank()])) { + $aReverseGroupedSearches[$aSearch->getRank()] = array(); } + $aReverseGroupedSearches[$aSearch->getRank()][] = $aSearch; } } @@ -1212,38 +1209,9 @@ class Geocode // Re-group the searches by their score, junk anything over 20 as just not worth trying $aGroupedSearches = array(); foreach ($aSearches as $aSearch) { - if ($aSearch['iSearchRank'] < $this->iMaxRank) { - if (!isset($aGroupedSearches[$aSearch['iSearchRank']])) $aGroupedSearches[$aSearch['iSearchRank']] = array(); - $aGroupedSearches[$aSearch['iSearchRank']][] = $aSearch; - } - } - ksort($aGroupedSearches); - } - - if (CONST_Search_TryDroppedAddressTerms && sizeof($this->aStructuredQuery) > 0) { - $aCopyGroupedSearches = $aGroupedSearches; - foreach ($aCopyGroupedSearches as $iGroup => $aSearches) { - foreach ($aSearches as $iSearch => $aSearch) { - $aReductionsList = array($aSearch['aAddress']); - $iSearchRank = $aSearch['iSearchRank']; - while (sizeof($aReductionsList) > 0) { - $iSearchRank += 5; - if ($iSearchRank > iMaxRank) break 3; - $aNewReductionsList = array(); - foreach ($aReductionsList as $aReductionsWordList) { - for ($iReductionWord = 0; $iReductionWord < sizeof($aReductionsWordList); $iReductionWord++) { - $aReductionsWordListResult = array_merge(array_slice($aReductionsWordList, 0, $iReductionWord), array_slice($aReductionsWordList, $iReductionWord+1)); - $aReverseSearch = $aSearch; - $aSearch['aAddress'] = $aReductionsWordListResult; - $aSearch['iSearchRank'] = $iSearchRank; - $aGroupedSearches[$iSearchRank][] = $aReverseSearch; - if (sizeof($aReductionsWordListResult) > 0) { - $aNewReductionsList[] = $aReductionsWordListResult; - } - } - } - $aReductionsList = $aNewReductionsList; - } + if ($aSearch->getRank() < $this->iMaxRank) { + if (!isset($aGroupedSearches[$aSearch->getRank()])) $aGroupedSearches[$aSearch->getRank()] = array(); + $aGroupedSearches[$aSearch->getRank()][] = $aSearch; } } ksort($aGroupedSearches); @@ -1276,10 +1244,6 @@ class Geocode if (CONST_Debug) echo "
Search Loop, group $iGroupLoop, loop $iQueryLoop"; if (CONST_Debug) _debugDumpGroupedSearches(array($iGroupedRank => array($aSearch)), $aValidTokens); - if ($sCountryCodesSQL && $aSearch['sCountryCode'] && !in_array($aSearch['sCountryCode'], $this->aCountryCodes)) { - continue; - } - // No location term? if (!sizeof($aSearch['aName']) && !sizeof($aSearch['aAddress'])) { if ($aSearch['sCountryCode'] && !$aSearch['sClass'] && !$aSearch['sHouseNumber'] && !$aSearch['oNear']) {