X-Git-Url: https://git.openstreetmap.org/nominatim.git/blobdiff_plain/044bb6afa53f2c799490d8a95ca050c8b755ca4c..201b4689afaee3acfddd1fc7fe829fafc70667d6:/lib-php/Geocode.php diff --git a/lib-php/Geocode.php b/lib-php/Geocode.php index 53ee49c0..3529d835 100644 --- a/lib-php/Geocode.php +++ b/lib-php/Geocode.php @@ -1,4 +1,12 @@ aExcludePlaceIDs); } - if ($this->bBoundedSearch) $aParams['bounded'] = '1'; + if ($this->bBoundedSearch) { + $aParams['bounded'] = '1'; + } if ($this->aCountryCodes) { $aParams['countrycodes'] = implode(',', $this->aCountryCodes); @@ -85,11 +96,14 @@ class Geocode public function setLimit($iLimit = 10) { - if ($iLimit > 50) $iLimit = 50; - if ($iLimit < 1) $iLimit = 1; + if ($iLimit > 50) { + $iLimit = 50; + } elseif ($iLimit < 1) { + $iLimit = 1; + } $this->iFinalLimit = $iLimit; - $this->iLimit = $iLimit + min($iLimit, 10); + $this->iLimit = $iLimit + max($iLimit, 10); } public function setFeatureType($sFeatureType) @@ -176,23 +190,29 @@ class Geocode $this->bFallback = $oParams->getBool('fallback', $this->bFallback); - // List of excluded Place IDs - used for more acurate pageing + // List of excluded Place IDs - used for more accurate pageing $sExcluded = $oParams->getStringList('exclude_place_ids'); if ($sExcluded) { foreach ($sExcluded as $iExcludedPlaceID) { $iExcludedPlaceID = (int)$iExcludedPlaceID; - if ($iExcludedPlaceID) + if ($iExcludedPlaceID) { $aExcludePlaceIDs[$iExcludedPlaceID] = $iExcludedPlaceID; + } } - if (isset($aExcludePlaceIDs)) + if (isset($aExcludePlaceIDs)) { $this->aExcludePlaceIDs = $aExcludePlaceIDs; + } } // Only certain ranks of feature $sFeatureType = $oParams->getString('featureType'); - if (!$sFeatureType) $sFeatureType = $oParams->getString('featuretype'); - if ($sFeatureType) $this->setFeatureType($sFeatureType); + if (!$sFeatureType) { + $sFeatureType = $oParams->getString('featuretype'); + } + if ($sFeatureType) { + $this->setFeatureType($sFeatureType); + } // Country code list $sCountries = $oParams->getStringList('countrycodes'); @@ -202,8 +222,9 @@ class Geocode $aCountries[] = strtolower($sCountryCode); } } - if (isset($aCountries)) + if (isset($aCountries)) { $this->aCountryCodes = $aCountries; + } } $aViewbox = $oParams->getStringList('viewboxlbrt'); @@ -236,32 +257,38 @@ class Geocode public function setQueryFromParams($oParams) { // Search query - $sQuery = $oParams->getString('q'); - if (!$sQuery) { - $this->setStructuredQuery( - $oParams->getString('amenity'), - $oParams->getString('street'), - $oParams->getString('city'), - $oParams->getString('county'), - $oParams->getString('state'), - $oParams->getString('country'), - $oParams->getString('postalcode') - ); - } else { - $this->setQuery($sQuery); + $this->setStructuredQuery( + $oParams->getString('amenity'), + $oParams->getString('street'), + $oParams->getString('city'), + $oParams->getString('county'), + $oParams->getString('state'), + $oParams->getString('country'), + $oParams->getString('postalcode') + ); + if (!$this->sQuery) { + $sQuery = $oParams->getString('q'); + + if ($sQuery) { + $this->setQuery($sQuery); + } } } public function loadStructuredAddressElement($sValue, $sKey, $iNewMinAddressRank, $iNewMaxAddressRank, $aItemListValues) { $sValue = trim($sValue); - if (!$sValue) return false; + if (!$sValue) { + return false; + } $this->aStructuredQuery[$sKey] = $sValue; if ($this->iMinAddressRank == 0 && $this->iMaxAddressRank == 30) { $this->iMinAddressRank = $iNewMinAddressRank; $this->iMaxAddressRank = $iNewMaxAddressRank; } - if ($aItemListValues) $this->aAddressRankList = array_merge($this->aAddressRankList, $aItemListValues); + if ($aItemListValues) { + $this->aAddressRankList = array_merge($this->aAddressRankList, $aItemListValues); + } return true; } @@ -269,37 +296,39 @@ class Geocode { $this->sQuery = false; - // Reset - $this->iMinAddressRank = 0; - $this->iMaxAddressRank = 30; - $this->aAddressRankList = array(); - - $this->aStructuredQuery = array(); - $this->sAllowedTypesSQLList = false; - - $this->loadStructuredAddressElement($sAmenity, 'amenity', 26, 30, false); - $this->loadStructuredAddressElement($sStreet, 'street', 26, 30, false); - $this->loadStructuredAddressElement($sCity, 'city', 14, 24, false); - $this->loadStructuredAddressElement($sCounty, 'county', 9, 13, false); - $this->loadStructuredAddressElement($sState, 'state', 8, 8, false); - $this->loadStructuredAddressElement($sPostalCode, 'postalcode', 5, 11, array(5, 11)); - $this->loadStructuredAddressElement($sCountry, 'country', 4, 4, false); - - if (!empty($this->aStructuredQuery)) { - $this->sQuery = join(', ', $this->aStructuredQuery); - if ($this->iMaxAddressRank < 30) { - $this->sAllowedTypesSQLList = '(\'place\',\'boundary\')'; + if ($sAmenity || $sStreet || $sCity || $sCounty || $sState || $sCountry || $sPostalCode) { + // Reset + $this->iMinAddressRank = 0; + $this->iMaxAddressRank = 30; + $this->aAddressRankList = array(); + + $this->aStructuredQuery = array(); + $this->sAllowedTypesSQLList = false; + + $this->loadStructuredAddressElement($sAmenity, 'amenity', 26, 30, false); + $this->loadStructuredAddressElement($sStreet, 'street', 26, 30, false); + $this->loadStructuredAddressElement($sCity, 'city', 14, 24, false); + $this->loadStructuredAddressElement($sCounty, 'county', 9, 13, false); + $this->loadStructuredAddressElement($sState, 'state', 8, 8, false); + $this->loadStructuredAddressElement($sPostalCode, 'postalcode', 5, 11, array(5, 11)); + $this->loadStructuredAddressElement($sCountry, 'country', 4, 4, false); + + if (!empty($this->aStructuredQuery)) { + $this->sQuery = join(', ', $this->aStructuredQuery); + if ($this->iMaxAddressRank < 30) { + $this->sAllowedTypesSQLList = '(\'place\',\'boundary\')'; + } } } } public function fallbackStructuredQuery() { - if (!$this->aStructuredQuery) return false; - $aParams = $this->aStructuredQuery; - if (count($aParams) == 1) return false; + if (!$aParams || count($aParams) == 1) { + return false; + } $aOrderToFallback = array('postalcode', 'street', 'city', 'county', 'state'); @@ -329,52 +358,26 @@ class Geocode */ foreach ($aPhrases as $iPhrase => $oPhrase) { $aNewPhraseSearches = array(); - $sPhraseType = $oPhrase->getPhraseType(); + $oPosition = new SearchPosition( + $oPhrase->getPhraseType(), + $iPhrase, + count($aPhrases) + ); foreach ($oPhrase->getWordSets() as $aWordset) { $aWordsetSearches = $aSearches; // Add all words from this wordset foreach ($aWordset as $iToken => $sToken) { - //echo "
$sToken"; $aNewWordsetSearches = array(); + $oPosition->setTokenPosition($iToken, count($aWordset)); foreach ($aWordsetSearches as $oCurrentSearch) { - //echo ""; - //var_dump($oCurrentSearch); - //echo ""; - - // Tokens with full name matches. - foreach ($oValidTokens->get(' '.$sToken) as $oSearchTerm) { - $aNewSearches = $oCurrentSearch->extendWithFullTerm( - $oSearchTerm, - $oValidTokens->contains($sToken) - && strpos($sToken, ' ') === false, - $sPhraseType, - $iToken == 0 && $iPhrase == 0, - $iPhrase == 0, - $iToken + 1 == count($aWordset) - && $iPhrase + 1 == count($aPhrases) - ); - - foreach ($aNewSearches as $oSearch) { - if ($oSearch->getRank() < $this->iMaxRank) { - $aNewWordsetSearches[] = $oSearch; - } - } - } - // Look for partial matches. - // Note that there is no point in adding country terms here - // because country is omitted in the address. - if ($sPhraseType != 'country') { - // Allow searching for a word - but at extra cost - foreach ($oValidTokens->get($sToken) as $oSearchTerm) { - $aNewSearches = $oCurrentSearch->extendWithPartialTerm( - $sToken, - $oSearchTerm, - (bool) $sPhraseType, - $iPhrase, - $oValidTokens->get(' '.$sToken) + foreach ($oValidTokens->get($sToken) as $oSearchTerm) { + if ($oSearchTerm->isExtendable($oCurrentSearch, $oPosition)) { + $aNewSearches = $oSearchTerm->extendSearch( + $oCurrentSearch, + $oPosition ); foreach ($aNewSearches as $oSearch) { @@ -389,7 +392,6 @@ class Geocode usort($aNewWordsetSearches, array('Nominatim\SearchDescription', 'bySearchRank')); $aWordsetSearches = array_slice($aNewWordsetSearches, 0, 50); } - //var_Dump('
',count($aWordsetSearches)); exit; $aNewPhraseSearches = array_merge($aNewPhraseSearches, $aNewWordsetSearches); usort($aNewPhraseSearches, array('Nominatim\SearchDescription', 'bySearchRank')); @@ -397,8 +399,11 @@ class Geocode $aSearchHash = array(); foreach ($aNewPhraseSearches as $iSearch => $aSearch) { $sHash = serialize($aSearch); - if (isset($aSearchHash[$sHash])) unset($aNewPhraseSearches[$iSearch]); - else $aSearchHash[$sHash] = 1; + if (isset($aSearchHash[$sHash])) { + unset($aNewPhraseSearches[$iSearch]); + } else { + $aSearchHash[$sHash] = 1; + } } $aNewPhraseSearches = array_slice($aNewPhraseSearches, 0, 50); @@ -419,10 +424,12 @@ class Geocode $iSearchCount = 0; $aSearches = array(); - foreach ($aGroupedSearches as $iScore => $aNewSearches) { + foreach ($aGroupedSearches as $aNewSearches) { $iSearchCount += count($aNewSearches); $aSearches = array_merge($aSearches, $aNewSearches); - if ($iSearchCount > 50) break; + if ($iSearchCount > 50) { + break; + } } } @@ -479,7 +486,9 @@ class Geocode public function lookup() { Debug::newFunction('Geocode::lookup'); - if (!$this->sQuery && !$this->aStructuredQuery) return array(); + if (!$this->sQuery && !$this->aStructuredQuery) { + return array(); + } Debug::printDebugArray('Geocode', $this); @@ -501,26 +510,14 @@ class Geocode if ($this->aCountryCodes) { $oCtx->setCountryList($this->aCountryCodes); } - $this->oTokenizer->setCountryRestriction($this->aCountryCodes); Debug::newSection('Query Preprocessing'); - $sLanguagePrefArraySQL = $this->oDB->getArraySQL( - $this->oDB->getDBQuotedList($this->aLangPrefOrder) - ); - $sQuery = $this->sQuery; if (!preg_match('//u', $sQuery)) { userError('Query string is not UTF-8 encoded.'); } - // Conflicts between US state abreviations and various words for 'the' in different languages - if (isset($this->aLangPrefOrder['name:en'])) { - $sQuery = preg_replace('/(^|,)\s*il\s*(,|$)/i', '\1illinois\2', $sQuery); - $sQuery = preg_replace('/(^|,)\s*al\s*(,|$)/i', '\1alabama\2', $sQuery); - $sQuery = preg_replace('/(^|,)\s*la\s*(,|$)/i', '\1louisiana\2', $sQuery); - } - // Do we have anything that looks like a lat/lon pair? $sQuery = $oCtx->setNearPointFromQuery($sQuery); @@ -562,15 +559,15 @@ class Geocode if (!empty($aTokens)) { $aNewSearches = array(); + $oPosition = new SearchPosition('', 0, 1); + $oPosition->setTokenPosition(0, 1); + foreach ($aSearches as $oSearch) { foreach ($aTokens as $oToken) { - $oNewSearch = clone $oSearch; - $oNewSearch->setPoiSearch( - $oToken->iOperator, - $oToken->sClass, - $oToken->sType + $aNewSearches = array_merge( + $aNewSearches, + $oToken->extendSearch($oSearch, $oPosition) ); - $aNewSearches[] = $oNewSearch; } } $aSearches = $aNewSearches; @@ -624,16 +621,15 @@ class Geocode } $aReverseGroupedSearches = $this->getGroupedSearches($aSearches, $aPhrases, $oValidTokens); - foreach ($aGroupedSearches as $aSearches) { + foreach ($aReverseGroupedSearches as $aSearches) { foreach ($aSearches as $aSearch) { - if (!isset($aReverseGroupedSearches[$aSearch->getRank()])) { - $aReverseGroupedSearches[$aSearch->getRank()] = array(); + if (!isset($aGroupedSearches[$aSearch->getRank()])) { + $aGroupedSearches[$aSearch->getRank()] = array(); } - $aReverseGroupedSearches[$aSearch->getRank()][] = $aSearch; + $aGroupedSearches[$aSearch->getRank()][] = $aSearch; } } - $aGroupedSearches = $aReverseGroupedSearches; ksort($aGroupedSearches); } } else { @@ -641,7 +637,9 @@ class Geocode $aGroupedSearches = array(); foreach ($aSearches as $aSearch) { if ($aSearch->getRank() < $this->iMaxRank) { - if (!isset($aGroupedSearches[$aSearch->getRank()])) $aGroupedSearches[$aSearch->getRank()] = array(); + if (!isset($aGroupedSearches[$aSearch->getRank()])) { + $aGroupedSearches[$aSearch->getRank()] = array(); + } $aGroupedSearches[$aSearch->getRank()][] = $aSearch; } } @@ -655,7 +653,9 @@ class Geocode $sHash = serialize($aSearch); if (isset($aSearchHash[$sHash])) { unset($aGroupedSearches[$iGroup][$iSearch]); - if (empty($aGroupedSearches[$iGroup])) unset($aGroupedSearches[$iGroup]); + if (empty($aGroupedSearches[$iGroup])) { + unset($aGroupedSearches[$iGroup]); + } } else { $aSearchHash[$sHash] = 1; } @@ -699,7 +699,9 @@ class Geocode } } - if ($iQueryLoop > 20) break; + if ($iQueryLoop > 30) { + break; + } } if (!empty($aResults)) { @@ -774,9 +776,9 @@ class Geocode $aResults = $tempIDs; } - if (!empty($aResults)) break; - if ($iGroupLoop > 4) break; - if ($iQueryLoop > 30) break; + if (!empty($aResults) || $iGroupLoop > 6 || $iQueryLoop > 40) { + break; + } } } else { // Just interpret as a reverse geocode @@ -794,10 +796,8 @@ class Geocode // No results? Done if (empty($aResults)) { - if ($this->bFallback) { - if ($this->fallbackStructuredQuery()) { - return $this->lookup(); - } + if ($this->bFallback && $this->fallbackStructuredQuery()) { + return $this->lookup(); } return array(); @@ -816,7 +816,9 @@ class Geocode $aRecheckWords = preg_split('/\b[\s,\\-]*/u', $sQuery); foreach ($aRecheckWords as $i => $sWord) { - if (!preg_match('/[\pL\pN]/', $sWord)) unset($aRecheckWords[$i]); + if (!preg_match('/[\pL\pN]/', $sWord)) { + unset($aRecheckWords[$i]); + } } Debug::printVar('Recheck words', $aRecheckWords); @@ -845,7 +847,9 @@ class Geocode $aResult['importance'] = 0.001; $aResult['foundorder'] = $aResult['addressimportance']; } else { - $aResult['importance'] = max(0.001, $aResult['importance']); + if ($aResult['importance'] == 0) { + $aResult['importance'] = 0.0001; + } $aResult['importance'] *= $this->viewboxImportanceFactor( $aResult['lon'], $aResult['lat'] @@ -874,9 +878,11 @@ class Geocode $iCountWords = 0; $sAddress = $aResult['langaddress']; foreach ($aRecheckWords as $i => $sWord) { - if (stripos($sAddress, $sWord)!==false) { + if (grapheme_stripos($sAddress, $sWord)!==false) { $iCountWords++; - if (preg_match('/(^|,)\s*'.preg_quote($sWord, '/').'\s*(,|$)/', $sAddress)) $iCountWords += 0.1; + if (preg_match('/(^|,)\s*'.preg_quote($sWord, '/').'\s*(,|$)/', $sAddress)) { + $iCountWords += 0.1; + } } } @@ -893,15 +899,8 @@ class Geocode $aToFilter = $aSearchResults; $aSearchResults = array(); - $bFirst = true; foreach ($aToFilter as $aResult) { $this->aExcludePlaceIDs[$aResult['place_id']] = $aResult['place_id']; - if ($bFirst) { - $fLat = $aResult['lat']; - $fLon = $aResult['lon']; - if (isset($aResult['zoom'])) $iZoom = $aResult['zoom']; - $bFirst = false; - } if (!$this->oPlaceLookup->doDeDupe() || (!isset($aOSMIDDone[$aResult['osm_type'].$aResult['osm_id']]) && !isset($aClassTypeNameDone[$aResult['osm_type'].$aResult['class'].$aResult['type'].$aResult['name'].$aResult['admin_level']])) ) { @@ -911,7 +910,9 @@ class Geocode } // Absolute limit on number of results - if (count($aSearchResults) >= $this->iFinalLimit) break; + if (count($aSearchResults) >= $this->iFinalLimit) { + break; + } } Debug::printVar('Post-filter results', $aSearchResults);