X-Git-Url: https://git.openstreetmap.org/nominatim.git/blobdiff_plain/47fdb33adf065b46df7104675e0251dd2ffb9bf4..4b46c730884f09720f5c0c6ebdff44da8bfc0e1d:/lib/Geocode.php diff --git a/lib/Geocode.php b/lib/Geocode.php index d5612240..19ed354f 100644 --- a/lib/Geocode.php +++ b/lib/Geocode.php @@ -1,4 +1,6 @@ bIncludeAddressDetails; } + function getIncludeExtraTags() + { + return $this->bIncludeExtraTags; + } + + function getIncludeNameDetails() + { + return $this->bIncludeNameDetails; + } + function setIncludePolygonAsPoints($b = true) { $this->bIncludePolygonAsPoints = $b; @@ -99,6 +117,11 @@ $this->bIncludePolygonAsSVG = $b; } + function setPolygonSimplificationThreshold($f) + { + $this->fPolygonSimplificationThreshold = $f; + } + function setDeDupe($bDeDupe = true) { $this->bDeDupe = (bool)$bDeDupe; @@ -201,6 +224,106 @@ return $this->sQuery; } + + function loadParamArray($aParams) + { + if (isset($aParams['addressdetails'])) $this->bIncludeAddressDetails = (bool)$aParams['addressdetails']; + if ((float) CONST_Postgresql_Version > 9.2) + { + if (isset($aParams['extratags'])) $this->bIncludeExtraTags = (bool)$aParams['extratags']; + if (isset($aParams['namedetails'])) $this->bIncludeNameDetails = (bool)$aParams['namedetails']; + } + if (isset($aParams['bounded'])) $this->bBoundedSearch = (bool)$aParams['bounded']; + if (isset($aParams['dedupe'])) $this->bDeDupe = (bool)$aParams['dedupe']; + + if (isset($aParams['limit'])) $this->setLimit((int)$aParams['limit']); + if (isset($aParams['offset'])) $this->iOffset = (int)$aParams['offset']; + + if (isset($aParams['fallback'])) $this->bFallback = (bool)$aParams['fallback']; + + // List of excluded Place IDs - used for more acurate pageing + if (isset($aParams['exclude_place_ids']) && $aParams['exclude_place_ids']) + { + foreach(explode(',',$aParams['exclude_place_ids']) as $iExcludedPlaceID) + { + $iExcludedPlaceID = (int)$iExcludedPlaceID; + if ($iExcludedPlaceID) + $aExcludePlaceIDs[$iExcludedPlaceID] = $iExcludedPlaceID; + } + + if (isset($aExcludePlaceIDs)) + $this->aExcludePlaceIDs = $aExcludePlaceIDs; + } + + // Only certain ranks of feature + if (isset($aParams['featureType'])) $this->setFeatureType($aParams['featureType']); + if (isset($aParams['featuretype'])) $this->setFeatureType($aParams['featuretype']); + + // Country code list + if (isset($aParams['countrycodes'])) + { + $aCountryCodes = array(); + foreach(explode(',',$aParams['countrycodes']) as $sCountryCode) + { + if (preg_match('/^[a-zA-Z][a-zA-Z]$/', $sCountryCode)) + { + $aCountryCodes[] = strtolower($sCountryCode); + } + } + $this->aCountryCodes = $aCountryCodes; + } + + if (isset($aParams['viewboxlbrt']) && $aParams['viewboxlbrt']) + { + $aCoOrdinatesLBRT = explode(',',$aParams['viewboxlbrt']); + $this->setViewBox($aCoOrdinatesLBRT[0], $aCoOrdinatesLBRT[1], $aCoOrdinatesLBRT[2], $aCoOrdinatesLBRT[3]); + } + else if (isset($aParams['viewbox']) && $aParams['viewbox']) + { + $aCoOrdinatesLTRB = explode(',',$aParams['viewbox']); + $this->setViewBox($aCoOrdinatesLTRB[0], $aCoOrdinatesLTRB[3], $aCoOrdinatesLTRB[2], $aCoOrdinatesLTRB[1]); + } + + if (isset($aParams['route']) && $aParams['route'] && isset($aParams['routewidth']) && $aParams['routewidth']) + { + $aPoints = explode(',',$aParams['route']); + if (sizeof($aPoints) % 2 != 0) + { + userError("Uneven number of points"); + exit; + } + $fPrevCoord = false; + $aRoute = array(); + foreach($aPoints as $i => $fPoint) + { + if ($i%2) + { + $aRoute[] = array((float)$fPoint, $fPrevCoord); + } + else + { + $fPrevCoord = (float)$fPoint; + } + } + $this->aRoutePoints = $aRoute; + } + } + + function setQueryFromParams($aParams) + { + // Search query + $sQuery = (isset($aParams['q'])?trim($aParams['q']):''); + if (!$sQuery) + { + $this->setStructuredQuery(@$aParams['amenity'], @$aParams['street'], @$aParams['city'], @$aParams['county'], @$aParams['state'], @$aParams['country'], @$aParams['postalcode']); + $this->setReverseInPlan(false); + } + else + { + $this->setQuery($sQuery); + } + } + function loadStructuredAddressElement($sValue, $sKey, $iNewMinAddressRank, $iNewMaxAddressRank, $aItemListValues) { $sValue = trim($sValue); @@ -277,13 +400,19 @@ // Get the details for display (is this a redundant extra step?) $sPlaceIDs = join(',',$aPlaceIDs); - $sSQL = "select osm_type,osm_id,class,type,admin_level,rank_search,rank_address,min(place_id) as place_id,calculated_country_code as country_code,"; + $sImportanceSQL = ''; + if ($this->sViewboxSmallSQL) $sImportanceSQL .= " case when ST_Contains($this->sViewboxSmallSQL, ST_Collect(centroid)) THEN 1 ELSE 0.75 END * "; + if ($this->sViewboxLargeSQL) $sImportanceSQL .= " case when ST_Contains($this->sViewboxLargeSQL, ST_Collect(centroid)) THEN 1 ELSE 0.75 END * "; + + $sSQL = "select osm_type,osm_id,class,type,admin_level,rank_search,rank_address,min(place_id) as place_id, min(parent_place_id) as parent_place_id, calculated_country_code as country_code,"; $sSQL .= "get_address_by_language(place_id, $sLanguagePrefArraySQL) as langaddress,"; $sSQL .= "get_name_by_language(name, $sLanguagePrefArraySQL) as placename,"; $sSQL .= "get_name_by_language(name, ARRAY['ref']) as ref,"; + if ($this->bIncludeExtraTags) $sSQL .= "hstore_to_json(extratags)::text as extra,"; + if ($this->bIncludeNameDetails) $sSQL .= "hstore_to_json(name)::text as names,"; $sSQL .= "avg(ST_X(centroid)) as lon,avg(ST_Y(centroid)) as lat, "; - $sSQL .= "coalesce(importance,0.75-(rank_search::float/40)) as importance, "; - $sSQL .= "(select max(p.importance*(p.rank_address+2)) from place_addressline s, placex p where s.place_id = min(placex.place_id) and p.place_id = s.address_place_id and s.isaddress and p.importance is not null) as addressimportance, "; + $sSQL .= $sImportanceSQL."coalesce(importance,0.75-(rank_search::float/40)) as importance, "; + $sSQL .= "(select max(p.importance*(p.rank_address+2)) from place_addressline s, placex p where s.place_id = min(CASE WHEN placex.rank_search < 28 THEN placex.place_id ELSE placex.parent_place_id END) and p.place_id = s.address_place_id and s.isaddress and p.importance is not null) as addressimportance, "; $sSQL .= "(extratags->'place') as extra_place "; $sSQL .= "from placex where place_id in ($sPlaceIDs) "; $sSQL .= "and (placex.rank_address between $this->iMinAddressRank and $this->iMaxAddressRank "; @@ -297,31 +426,37 @@ $sSQL .= ",langaddress "; $sSQL .= ",placename "; $sSQL .= ",ref "; + if ($this->bIncludeExtraTags) $sSQL .= ",extratags"; + if ($this->bIncludeNameDetails) $sSQL .= ",name"; $sSQL .= ",extratags->'place' "; if (30 >= $this->iMinAddressRank && 30 <= $this->iMaxAddressRank) { $sSQL .= " union "; - $sSQL .= "select 'T' as osm_type,place_id as osm_id,'place' as class,'house' as type,null as admin_level,30 as rank_search,30 as rank_address,min(place_id) as place_id,'us' as country_code,"; + $sSQL .= "select 'T' as osm_type,place_id as osm_id,'place' as class,'house' as type,null as admin_level,30 as rank_search,30 as rank_address,min(place_id) as place_id, min(parent_place_id) as parent_place_id,'us' as country_code,"; $sSQL .= "get_address_by_language(place_id, $sLanguagePrefArraySQL) as langaddress,"; $sSQL .= "null as placename,"; $sSQL .= "null as ref,"; + if ($this->bIncludeExtraTags) $sSQL .= "null as extra,"; + if ($this->bIncludeNameDetails) $sSQL .= "null as names,"; $sSQL .= "avg(ST_X(centroid)) as lon,avg(ST_Y(centroid)) as lat, "; - $sSQL .= "-0.15 as importance, "; - $sSQL .= "(select max(p.importance*(p.rank_address+2)) from place_addressline s, placex p where s.place_id = min(location_property_tiger.place_id) and p.place_id = s.address_place_id and s.isaddress and p.importance is not null) as addressimportance, "; + $sSQL .= $sImportanceSQL."-1.15 as importance, "; + $sSQL .= "(select max(p.importance*(p.rank_address+2)) from place_addressline s, placex p where s.place_id = min(location_property_tiger.parent_place_id) and p.place_id = s.address_place_id and s.isaddress and p.importance is not null) as addressimportance, "; $sSQL .= "null as extra_place "; $sSQL .= "from location_property_tiger where place_id in ($sPlaceIDs) "; $sSQL .= "and 30 between $this->iMinAddressRank and $this->iMaxAddressRank "; $sSQL .= "group by place_id"; - if (!$this->bDeDupe) $sSQL .= ",place_id"; + if (!$this->bDeDupe) $sSQL .= ",place_id "; $sSQL .= " union "; - $sSQL .= "select 'L' as osm_type,place_id as osm_id,'place' as class,'house' as type,null as admin_level,30 as rank_search,30 as rank_address,min(place_id) as place_id,'us' as country_code,"; + $sSQL .= "select 'L' as osm_type,place_id as osm_id,'place' as class,'house' as type,null as admin_level,30 as rank_search,30 as rank_address,min(place_id) as place_id, min(parent_place_id) as parent_place_id,'us' as country_code,"; $sSQL .= "get_address_by_language(place_id, $sLanguagePrefArraySQL) as langaddress,"; $sSQL .= "null as placename,"; $sSQL .= "null as ref,"; + if ($this->bIncludeExtraTags) $sSQL .= "null as extra,"; + if ($this->bIncludeNameDetails) $sSQL .= "null as names,"; $sSQL .= "avg(ST_X(centroid)) as lon,avg(ST_Y(centroid)) as lat, "; - $sSQL .= "-0.10 as importance, "; - $sSQL .= "(select max(p.importance*(p.rank_address+2)) from place_addressline s, placex p where s.place_id = min(location_property_aux.place_id) and p.place_id = s.address_place_id and s.isaddress and p.importance is not null) as addressimportance, "; + $sSQL .= $sImportanceSQL."-1.10 as importance, "; + $sSQL .= "(select max(p.importance*(p.rank_address+2)) from place_addressline s, placex p where s.place_id = min(location_property_aux.parent_place_id) and p.place_id = s.address_place_id and s.isaddress and p.importance is not null) as addressimportance, "; $sSQL .= "null as extra_place "; $sSQL .= "from location_property_aux where place_id in ($sPlaceIDs) "; $sSQL .= "and 30 between $this->iMinAddressRank and $this->iMaxAddressRank "; @@ -330,7 +465,7 @@ $sSQL .= ",get_address_by_language(place_id, $sLanguagePrefArraySQL) "; } - $sSQL .= "order by importance desc"; + $sSQL .= " order by importance desc"; if (CONST_Debug) { echo "
"; var_dump($sSQL); } $aSearchResults = $this->oDB->getAll($sSQL); @@ -342,31 +477,336 @@ return $aSearchResults; } + function getGroupedSearches($aSearches, $aPhraseTypes, $aPhrases, $aValidTokens, $aWordFrequencyScores, $bStructuredPhrases) + { + /* + Calculate all searches using aValidTokens i.e. + 'Wodsworth Road, Sheffield' => + + Phrase Wordset + 0 0 (wodsworth road) + 0 1 (wodsworth)(road) + 1 0 (sheffield) + + Score how good the search is so they can be ordered + */ + foreach($aPhrases as $iPhrase => $sPhrase) + { + $aNewPhraseSearches = array(); + if ($bStructuredPhrases) $sPhraseType = $aPhraseTypes[$iPhrase]; + else $sPhraseType = ''; + + foreach($aPhrases[$iPhrase]['wordsets'] as $iWordSet => $aWordset) + { + // Too many permutations - too expensive + if ($iWordSet > 120) break; + + $aWordsetSearches = $aSearches; + + // Add all words from this wordset + foreach($aWordset as $iToken => $sToken) + { + //echo "
$sToken"; + $aNewWordsetSearches = array(); + + foreach($aWordsetSearches as $aCurrentSearch) + { + //echo ""; + //var_dump($aCurrentSearch); + //echo ""; + + // If the token is valid + if (isset($aValidTokens[' '.$sToken])) + { + foreach($aValidTokens[' '.$sToken] as $aSearchTerm) + { + $aSearch = $aCurrentSearch; + $aSearch['iSearchRank']++; + if (($sPhraseType == '' || $sPhraseType == 'country') && !empty($aSearchTerm['country_code']) && $aSearchTerm['country_code'] != '0') + { + if ($aSearch['sCountryCode'] === false) + { + $aSearch['sCountryCode'] = strtolower($aSearchTerm['country_code']); + // Country is almost always at the end of the string - increase score for finding it anywhere else (optimisation) + if (($iToken+1 != sizeof($aWordset) || $iPhrase+1 != sizeof($aPhrases))) + { + $aSearch['iSearchRank'] += 5; + } + if ($aSearch['iSearchRank'] < $this->iMaxRank) $aNewWordsetSearches[] = $aSearch; + } + } + elseif (isset($aSearchTerm['lat']) && $aSearchTerm['lat'] !== '' && $aSearchTerm['lat'] !== null) + { + if ($aSearch['fLat'] === '') + { + $aSearch['fLat'] = $aSearchTerm['lat']; + $aSearch['fLon'] = $aSearchTerm['lon']; + $aSearch['fRadius'] = $aSearchTerm['radius']; + if ($aSearch['iSearchRank'] < $this->iMaxRank) $aNewWordsetSearches[] = $aSearch; + } + } + elseif ($sPhraseType == 'postalcode') + { + // We need to try the case where the postal code is the primary element (i.e. no way to tell if it is (postalcode, city) OR (city, postalcode) so try both + if (isset($aSearchTerm['word_id']) && $aSearchTerm['word_id']) + { + // If we already have a name try putting the postcode first + if (sizeof($aSearch['aName'])) + { + $aNewSearch = $aSearch; + $aNewSearch['aAddress'] = array_merge($aNewSearch['aAddress'], $aNewSearch['aName']); + $aNewSearch['aName'] = array(); + $aNewSearch['aName'][$aSearchTerm['word_id']] = $aSearchTerm['word_id']; + if ($aSearch['iSearchRank'] < $this->iMaxRank) $aNewWordsetSearches[] = $aNewSearch; + } + + if (sizeof($aSearch['aName'])) + { + if ((!$bStructuredPhrases || $iPhrase > 0) && $sPhraseType != 'country' && (!isset($aValidTokens[$sToken]) || strpos($sToken, ' ') !== false)) + { + $aSearch['aAddress'][$aSearchTerm['word_id']] = $aSearchTerm['word_id']; + } + else + { + $aCurrentSearch['aFullNameAddress'][$aSearchTerm['word_id']] = $aSearchTerm['word_id']; + $aSearch['iSearchRank'] += 1000; // skip; + } + } + else + { + $aSearch['aName'][$aSearchTerm['word_id']] = $aSearchTerm['word_id']; + //$aSearch['iNamePhrase'] = $iPhrase; + } + if ($aSearch['iSearchRank'] < $this->iMaxRank) $aNewWordsetSearches[] = $aSearch; + } + + } + elseif (($sPhraseType == '' || $sPhraseType == 'street') && $aSearchTerm['class'] == 'place' && $aSearchTerm['type'] == 'house') + { + if ($aSearch['sHouseNumber'] === '') + { + $aSearch['sHouseNumber'] = $sToken; + // sanity check: if the housenumber is not mainly made + // up of numbers, add a penalty + if (preg_match_all("/[^0-9]/", $sToken, $aMatches) > 2) $aSearch['iSearchRank']++; + // also housenumbers should appear in the first or second phrase + if ($iPhrase > 1) $aSearch['iSearchRank'] += 1; + if ($aSearch['iSearchRank'] < $this->iMaxRank) $aNewWordsetSearches[] = $aSearch; + /* + // Fall back to not searching for this item (better than nothing) + $aSearch = $aCurrentSearch; + $aSearch['iSearchRank'] += 1; + if ($aSearch['iSearchRank'] < $this->iMaxRank) $aNewWordsetSearches[] = $aSearch; + */ + } + } + elseif ($sPhraseType == '' && $aSearchTerm['class'] !== '' && $aSearchTerm['class'] !== null) + { + if ($aSearch['sClass'] === '') + { + $aSearch['sOperator'] = $aSearchTerm['operator']; + $aSearch['sClass'] = $aSearchTerm['class']; + $aSearch['sType'] = $aSearchTerm['type']; + if (sizeof($aSearch['aName'])) $aSearch['sOperator'] = 'name'; + else $aSearch['sOperator'] = 'near'; // near = in for the moment + if (strlen($aSearchTerm['operator']) == 0) $aSearch['iSearchRank'] += 1; + + if ($aSearch['iSearchRank'] < $this->iMaxRank) $aNewWordsetSearches[] = $aSearch; + } + } + elseif (isset($aSearchTerm['word_id']) && $aSearchTerm['word_id']) + { + if (sizeof($aSearch['aName'])) + { + if ((!$bStructuredPhrases || $iPhrase > 0) && $sPhraseType != 'country' && (!isset($aValidTokens[$sToken]) || strpos($sToken, ' ') !== false)) + { + $aSearch['aAddress'][$aSearchTerm['word_id']] = $aSearchTerm['word_id']; + } + else + { + $aCurrentSearch['aFullNameAddress'][$aSearchTerm['word_id']] = $aSearchTerm['word_id']; + $aSearch['iSearchRank'] += 1000; // skip; + } + } + else + { + $aSearch['aName'][$aSearchTerm['word_id']] = $aSearchTerm['word_id']; + //$aSearch['iNamePhrase'] = $iPhrase; + } + if ($aSearch['iSearchRank'] < $this->iMaxRank) $aNewWordsetSearches[] = $aSearch; + } + } + } + // Look for partial matches. + // Note that there is no point in adding country terms here + // because country are omitted in the address. + if (isset($aValidTokens[$sToken]) && $sPhraseType != 'country') + { + // Allow searching for a word - but at extra cost + foreach($aValidTokens[$sToken] as $aSearchTerm) + { + if (isset($aSearchTerm['word_id']) && $aSearchTerm['word_id']) + { + if ((!$bStructuredPhrases || $iPhrase > 0) && sizeof($aCurrentSearch['aName']) && strpos($sToken, ' ') === false) + { + $aSearch = $aCurrentSearch; + $aSearch['iSearchRank'] += 1; + if ($aWordFrequencyScores[$aSearchTerm['word_id']] < CONST_Max_Word_Frequency) + { + $aSearch['aAddress'][$aSearchTerm['word_id']] = $aSearchTerm['word_id']; + if ($aSearch['iSearchRank'] < $this->iMaxRank) $aNewWordsetSearches[] = $aSearch; + } + elseif (isset($aValidTokens[' '.$sToken])) // revert to the token version? + { + $aSearch['aAddressNonSearch'][$aSearchTerm['word_id']] = $aSearchTerm['word_id']; + $aSearch['iSearchRank'] += 1; + if ($aSearch['iSearchRank'] < $this->iMaxRank) $aNewWordsetSearches[] = $aSearch; + foreach($aValidTokens[' '.$sToken] as $aSearchTermToken) + { + if (empty($aSearchTermToken['country_code']) + && empty($aSearchTermToken['lat']) + && empty($aSearchTermToken['class'])) + { + $aSearch = $aCurrentSearch; + $aSearch['iSearchRank'] += 1; + $aSearch['aAddress'][$aSearchTermToken['word_id']] = $aSearchTermToken['word_id']; + if ($aSearch['iSearchRank'] < $this->iMaxRank) $aNewWordsetSearches[] = $aSearch; + } + } + } + else + { + $aSearch['aAddressNonSearch'][$aSearchTerm['word_id']] = $aSearchTerm['word_id']; + if (preg_match('#^[0-9]+$#', $sToken)) $aSearch['iSearchRank'] += 2; + if ($aSearch['iSearchRank'] < $this->iMaxRank) $aNewWordsetSearches[] = $aSearch; + } + } + + if (!sizeof($aCurrentSearch['aName']) || $aCurrentSearch['iNamePhrase'] == $iPhrase) + { + $aSearch = $aCurrentSearch; + $aSearch['iSearchRank'] += 1; + if (!sizeof($aCurrentSearch['aName'])) $aSearch['iSearchRank'] += 1; + if (preg_match('#^[0-9]+$#', $sToken)) $aSearch['iSearchRank'] += 2; + if ($aWordFrequencyScores[$aSearchTerm['word_id']] < CONST_Max_Word_Frequency) + $aSearch['aName'][$aSearchTerm['word_id']] = $aSearchTerm['word_id']; + else + $aSearch['aNameNonSearch'][$aSearchTerm['word_id']] = $aSearchTerm['word_id']; + $aSearch['iNamePhrase'] = $iPhrase; + if ($aSearch['iSearchRank'] < $this->iMaxRank) $aNewWordsetSearches[] = $aSearch; + } + } + } + } + else + { + // Allow skipping a word - but at EXTREAM cost + //$aSearch = $aCurrentSearch; + //$aSearch['iSearchRank']+=100; + //$aNewWordsetSearches[] = $aSearch; + } + } + // Sort and cut + usort($aNewWordsetSearches, 'bySearchRank'); + $aWordsetSearches = array_slice($aNewWordsetSearches, 0, 50); + } + //var_Dump('
',sizeof($aWordsetSearches)); exit; + + $aNewPhraseSearches = array_merge($aNewPhraseSearches, $aNewWordsetSearches); + usort($aNewPhraseSearches, 'bySearchRank'); + + $aSearchHash = array(); + foreach($aNewPhraseSearches as $iSearch => $aSearch) + { + $sHash = serialize($aSearch); + if (isset($aSearchHash[$sHash])) unset($aNewPhraseSearches[$iSearch]); + else $aSearchHash[$sHash] = 1; + } + + $aNewPhraseSearches = array_slice($aNewPhraseSearches, 0, 50); + } + + // Re-group the searches by their score, junk anything over 20 as just not worth trying + $aGroupedSearches = array(); + foreach($aNewPhraseSearches as $aSearch) + { + if ($aSearch['iSearchRank'] < $this->iMaxRank) + { + if (!isset($aGroupedSearches[$aSearch['iSearchRank']])) $aGroupedSearches[$aSearch['iSearchRank']] = array(); + $aGroupedSearches[$aSearch['iSearchRank']][] = $aSearch; + } + } + ksort($aGroupedSearches); + + $iSearchCount = 0; + $aSearches = array(); + foreach($aGroupedSearches as $iScore => $aNewSearches) + { + $iSearchCount += sizeof($aNewSearches); + $aSearches = array_merge($aSearches, $aNewSearches); + if ($iSearchCount > 50) break; + } + + //if (CONST_Debug) _debugDumpGroupedSearches($aGroupedSearches, $aValidTokens); + + } + return $aGroupedSearches; + + } + + /* Perform the actual query lookup. + + Returns an ordered list of results, each with the following fields: + osm_type: type of corresponding OSM object + N - node + W - way + R - relation + P - postcode (internally computed) + osm_id: id of corresponding OSM object + class: general object class (corresponds to tag key of primary OSM tag) + type: subclass of object (corresponds to tag value of primary OSM tag) + admin_level: see http://wiki.openstreetmap.org/wiki/Admin_level + rank_search: rank in search hierarchy + (see also http://wiki.openstreetmap.org/wiki/Nominatim/Development_overview#Country_to_street_level) + rank_address: rank in address hierarchy (determines orer in address) + place_id: internal key (may differ between different instances) + country_code: ISO country code + langaddress: localized full address + placename: localized name of object + ref: content of ref tag (if available) + lon: longitude + lat: latitude + importance: importance of place based on Wikipedia link count + addressimportance: cumulated importance of address elements + extra_place: type of place (for admin boundaries, if there is a place tag) + aBoundingBox: bounding Box + label: short description of the object class/type (English only) + name: full name (currently the same as langaddress) + foundorder: secondary ordering for places with same importance + */ function lookup() { if (!$this->sQuery && !$this->aStructuredQuery) return false; $sLanguagePrefArraySQL = "ARRAY[".join(',',array_map("getDBQuoted",$this->aLangPrefOrder))."]"; - $sCountryCodesSQL = false; if ($this->aCountryCodes && sizeof($this->aCountryCodes)) { $sCountryCodesSQL = join(',', array_map('addQuotes', $this->aCountryCodes)); } - // Hack to make it handle "new york, ny" (and variants) correctly - $sQuery = str_ireplace(array('New York, ny','new york, new york', 'New York ny','new york new york'), 'new york city, ny', $this->sQuery); + $sQuery = $this->sQuery; // Conflicts between US state abreviations and various words for 'the' in different languages if (isset($this->aLangPrefOrder['name:en'])) { - $sQuery = preg_replace('/,\s*il\s*(,|$)/',', illinois\1', $sQuery); - $sQuery = preg_replace('/,\s*al\s*(,|$)/',', alabama\1', $sQuery); - $sQuery = preg_replace('/,\s*la\s*(,|$)/',', louisiana\1', $sQuery); + $sQuery = preg_replace('/(^|,)\s*il\s*(,|$)/','\1illinois\2', $sQuery); + $sQuery = preg_replace('/(^|,)\s*al\s*(,|$)/','\1alabama\2', $sQuery); + $sQuery = preg_replace('/(^|,)\s*la\s*(,|$)/','\1louisiana\2', $sQuery); } // View Box SQL - $sViewboxCentreSQL = $sViewboxSmallSQL = $sViewboxLargeSQL = false; + $sViewboxCentreSQL = false; $bBoundingBoxSearch = false; if ($this->aViewBox) { @@ -377,8 +817,8 @@ $aBigViewBox[1] = $this->aViewBox[1] + $fWidth; $aBigViewBox[3] = $this->aViewBox[3] - $fWidth; - $sViewboxSmallSQL = "ST_SetSRID(ST_MakeBox2D(ST_Point(".(float)$this->aViewBox[0].",".(float)$this->aViewBox[1]."),ST_Point(".(float)$this->aViewBox[2].",".(float)$this->aViewBox[3].")),4326)"; - $sViewboxLargeSQL = "ST_SetSRID(ST_MakeBox2D(ST_Point(".(float)$aBigViewBox[0].",".(float)$aBigViewBox[1]."),ST_Point(".(float)$aBigViewBox[2].",".(float)$aBigViewBox[3].")),4326)"; + $this->sViewboxSmallSQL = "ST_SetSRID(ST_MakeBox2D(ST_Point(".(float)$this->aViewBox[0].",".(float)$this->aViewBox[1]."),ST_Point(".(float)$this->aViewBox[2].",".(float)$this->aViewBox[3].")),4326)"; + $this->sViewboxLargeSQL = "ST_SetSRID(ST_MakeBox2D(ST_Point(".(float)$aBigViewBox[0].",".(float)$aBigViewBox[1]."),ST_Point(".(float)$aBigViewBox[2].",".(float)$aBigViewBox[3].")),4326)"; $bBoundingBoxSearch = $this->bBoundedSearch; } @@ -386,62 +826,37 @@ if ($this->aRoutePoints) { $sViewboxCentreSQL = "ST_SetSRID('LINESTRING("; - $bFirst = false; - foreach($this->aRouteaPoints as $aPoint) + $bFirst = true; + foreach($this->aRoutePoints as $aPoint) { if (!$bFirst) $sViewboxCentreSQL .= ","; - $sViewboxCentreSQL .= $aPoint[1].' '.$aPoint[0]; + $sViewboxCentreSQL .= $aPoint[0].' '.$aPoint[1]; + $bFirst = false; } $sViewboxCentreSQL .= ")'::geometry,4326)"; $sSQL = "select st_buffer(".$sViewboxCentreSQL.",".(float)($_GET['routewidth']/69).")"; - $sViewboxSmallSQL = $this->oDB->getOne($sSQL); - if (PEAR::isError($sViewboxSmallSQL)) + $this->sViewboxSmallSQL = $this->oDB->getOne($sSQL); + if (PEAR::isError($this->sViewboxSmallSQL)) { - failInternalError("Could not get small viewbox.", $sSQL, $sViewboxSmallSQL); + failInternalError("Could not get small viewbox.", $sSQL, $this->sViewboxSmallSQL); } - $sViewboxSmallSQL = "'".$sViewboxSmallSQL."'::geometry"; + $this->sViewboxSmallSQL = "'".$this->sViewboxSmallSQL."'::geometry"; $sSQL = "select st_buffer(".$sViewboxCentreSQL.",".(float)($_GET['routewidth']/30).")"; - $sViewboxLargeSQL = $this->oDB->getOne($sSQL); - if (PEAR::isError($sViewboxLargeSQL)) + $this->sViewboxLargeSQL = $this->oDB->getOne($sSQL); + if (PEAR::isError($this->sViewboxLargeSQL)) { - failInternalError("Could not get large viewbox.", $sSQL, $sViewboxLargeSQL); + failInternalError("Could not get large viewbox.", $sSQL, $this->sViewboxLargeSQL); } - $sViewboxLargeSQL = "'".$sViewboxLargeSQL."'::geometry"; + $this->sViewboxLargeSQL = "'".$this->sViewboxLargeSQL."'::geometry"; $bBoundingBoxSearch = $this->bBoundedSearch; } // Do we have anything that looks like a lat/lon pair? - if (preg_match('/\\b([NS])[ ]+([0-9]+[0-9.]*)[ ]+([0-9.]+)?[, ]+([EW])[ ]+([0-9]+)[ ]+([0-9]+[0-9.]*)?\\b/', $sQuery, $aData)) - { - $fQueryLat = ($aData[1]=='N'?1:-1) * ($aData[2] + $aData[3]/60); - $fQueryLon = ($aData[4]=='E'?1:-1) * ($aData[5] + $aData[6]/60); - if ($fQueryLat <= 90.1 && $fQueryLat >= -90.1 && $fQueryLon <= 180.1 && $fQueryLon >= -180.1) - { - $this->setNearPoint(array($fQueryLat, $fQueryLon)); - $sQuery = trim(str_replace($aData[0], ' ', $sQuery)); - } - } - elseif (preg_match('/\\b([0-9]+)[ ]+([0-9]+[0-9.]*)?[ ]+([NS])[, ]+([0-9]+)[ ]+([0-9]+[0-9.]*)?[ ]+([EW])\\b/', $sQuery, $aData)) - { - $fQueryLat = ($aData[3]=='N'?1:-1) * ($aData[1] + $aData[2]/60); - $fQueryLon = ($aData[6]=='E'?1:-1) * ($aData[4] + $aData[5]/60); - if ($fQueryLat <= 90.1 && $fQueryLat >= -90.1 && $fQueryLon <= 180.1 && $fQueryLon >= -180.1) - { - $this->setNearPoint(array($fQueryLat, $fQueryLon)); - $sQuery = trim(str_replace($aData[0], ' ', $sQuery)); - } - } - elseif (preg_match('/(\\[|^|\\b)(-?[0-9]+[0-9]*\\.[0-9]+)[, ]+(-?[0-9]+[0-9]*\\.[0-9]+)(\\]|$|\\b)/', $sQuery, $aData)) - { - $fQueryLat = $aData[2]; - $fQueryLon = $aData[3]; - if ($fQueryLat <= 90.1 && $fQueryLat >= -90.1 && $fQueryLon <= 180.1 && $fQueryLon >= -180.1) - { - $this->setNearPoint(array($fQueryLat, $fQueryLon)); - $sQuery = trim(str_replace($aData[0], ' ', $sQuery)); - } + if ( $aLooksLike = looksLikeLatLonPair($sQuery) ){ + $this->setNearPoint(array($aLooksLike['lat'], $aLooksLike['lon'])); + $sQuery = $aLooksLike['query']; } $aSearchResults = array(); @@ -449,9 +864,23 @@ { // Start with a blank search $aSearches = array( - array('iSearchRank' => 0, 'iNamePhrase' => -1, 'sCountryCode' => false, 'aName'=>array(), 'aAddress'=>array(), 'aFullNameAddress'=>array(), - 'aNameNonSearch'=>array(), 'aAddressNonSearch'=>array(), - 'sOperator'=>'', 'aFeatureName' => array(), 'sClass'=>'', 'sType'=>'', 'sHouseNumber'=>'', 'fLat'=>'', 'fLon'=>'', 'fRadius'=>'') + array('iSearchRank' => 0, + 'iNamePhrase' => -1, + 'sCountryCode' => false, + 'aName' => array(), + 'aAddress' => array(), + 'aFullNameAddress' => array(), + 'aNameNonSearch' => array(), + 'aAddressNonSearch' => array(), + 'sOperator' => '', + 'aFeatureName' => array(), + 'sClass' => '', + 'sType' => '', + 'sHouseNumber' => '', + 'fLat' => '', + 'fLon' => '', + 'fRadius' => '' + ) ); // Do we have a radius search? @@ -476,10 +905,10 @@ preg_match_all('/\\[([\\w ]*)\\]/u', $sQuery, $aSpecialTermsRaw, PREG_SET_ORDER); $aSpecialTerms = array(); - if (isset($aStructuredQuery['amenity']) && $aStructuredQuery['amenity']) + if (isset($this->aStructuredQuery['amenity']) && $this->aStructuredQuery['amenity']) { - $aSpecialTermsRaw[] = array('['.$aStructuredQuery['amenity'].']', $aStructuredQuery['amenity']); - unset($aStructuredQuery['amenity']); + $aSpecialTermsRaw[] = array('['.$this->aStructuredQuery['amenity'].']', $this->aStructuredQuery['amenity']); + unset($this->aStructuredQuery['amenity']); } foreach($aSpecialTermsRaw as $aSpecialTerm) { @@ -577,8 +1006,8 @@ foreach($aDatabaseWords as $aToken) { // Very special case - require 2 letter country param to match the country code found - if ($bStructuredPhrases && $aToken['country_code'] && !empty($aStructuredQuery['country']) - && strlen($aStructuredQuery['country']) == 2 && strtolower($aStructuredQuery['country']) != $aToken['country_code']) + if ($bStructuredPhrases && $aToken['country_code'] && !empty($this->aStructuredQuery['country']) + && strlen($this->aStructuredQuery['country']) == 2 && strtolower($this->aStructuredQuery['country']) != $aToken['country_code']) { continue; } @@ -604,7 +1033,7 @@ { if (substr($aData[1],-2,1) != ' ') { - $aData[0] = substr($aData[0],0,strlen($aData[1]-1)).' '.substr($aData[0],strlen($aData[1]-1)); + $aData[0] = substr($aData[0],0,strlen($aData[1])-1).' '.substr($aData[0],strlen($aData[1])-1); $aData[1] = substr($aData[1],0,-1).' '.substr($aData[1],-1,1); } $aGBPostcodeLocation = gbPostcodeCalculate($aData[0], $aData[1], $aData[2], $this->oDB); @@ -652,278 +1081,38 @@ // Start the search process $aResultPlaceIDs = array(); - /* - Calculate all searches using aValidTokens i.e. - 'Wodsworth Road, Sheffield' => + $aGroupedSearches = $this->getGroupedSearches($aSearches, $aPhraseTypes, $aPhrases, $aValidTokens, $aWordFrequencyScores, $bStructuredPhrases); - Phrase Wordset - 0 0 (wodsworth road) - 0 1 (wodsworth)(road) - 1 0 (sheffield) - - Score how good the search is so they can be ordered - */ - foreach($aPhrases as $iPhrase => $sPhrase) + if ($this->bReverseInPlan) { - $aNewPhraseSearches = array(); - if ($bStructuredPhrases) $sPhraseType = $aPhraseTypes[$iPhrase]; - else $sPhraseType = ''; - - foreach($aPhrases[$iPhrase]['wordsets'] as $iWordSet => $aWordset) + // Reverse phrase array and also reverse the order of the wordsets in + // the first and final phrase. Don't bother about phrases in the middle + // because order in the address doesn't matter. + $aPhrases = array_reverse($aPhrases); + $aPhrases[0]['wordsets'] = getInverseWordSets($aPhrases[0]['words'], 0); + if (sizeof($aPhrases) > 1) { - // Too many permutations - too expensive - if ($iWordSet > 120) break; - - $aWordsetSearches = $aSearches; + $aFinalPhrase = end($aPhrases); + $aPhrases[sizeof($aPhrases)-1]['wordsets'] = getInverseWordSets($aFinalPhrase['words'], 0); + } + $aReverseGroupedSearches = $this->getGroupedSearches($aSearches, null, $aPhrases, $aValidTokens, $aWordFrequencyScores, false); - // Add all words from this wordset - foreach($aWordset as $iToken => $sToken) + foreach($aGroupedSearches as $aSearches) + { + foreach($aSearches as $aSearch) { - //echo "
$sToken"; - $aNewWordsetSearches = array(); - - foreach($aWordsetSearches as $aCurrentSearch) + if ($aSearch['iSearchRank'] < $this->iMaxRank) { - //echo ""; - //var_dump($aCurrentSearch); - //echo ""; - - // If the token is valid - if (isset($aValidTokens[' '.$sToken])) - { - foreach($aValidTokens[' '.$sToken] as $aSearchTerm) - { - $aSearch = $aCurrentSearch; - $aSearch['iSearchRank']++; - if (($sPhraseType == '' || $sPhraseType == 'country') && !empty($aSearchTerm['country_code']) && $aSearchTerm['country_code'] != '0') - { - if ($aSearch['sCountryCode'] === false) - { - $aSearch['sCountryCode'] = strtolower($aSearchTerm['country_code']); - // Country is almost always at the end of the string - increase score for finding it anywhere else (optimisation) - // If reverse order is enabled, it may appear at the beginning as well. - if (($iToken+1 != sizeof($aWordset) || $iPhrase+1 != sizeof($aPhrases)) && - (!$this->bReverseInPlan || $iToken > 0 || $iPhrase > 0)) - { - $aSearch['iSearchRank'] += 5; - } - if ($aSearch['iSearchRank'] < $this->iMaxRank) $aNewWordsetSearches[] = $aSearch; - } - } - elseif (isset($aSearchTerm['lat']) && $aSearchTerm['lat'] !== '' && $aSearchTerm['lat'] !== null) - { - if ($aSearch['fLat'] === '') - { - $aSearch['fLat'] = $aSearchTerm['lat']; - $aSearch['fLon'] = $aSearchTerm['lon']; - $aSearch['fRadius'] = $aSearchTerm['radius']; - if ($aSearch['iSearchRank'] < $this->iMaxRank) $aNewWordsetSearches[] = $aSearch; - } - } - elseif ($sPhraseType == 'postalcode') - { - // We need to try the case where the postal code is the primary element (i.e. no way to tell if it is (postalcode, city) OR (city, postalcode) so try both - if (isset($aSearchTerm['word_id']) && $aSearchTerm['word_id']) - { - // If we already have a name try putting the postcode first - if (sizeof($aSearch['aName'])) - { - $aNewSearch = $aSearch; - $aNewSearch['aAddress'] = array_merge($aNewSearch['aAddress'], $aNewSearch['aName']); - $aNewSearch['aName'] = array(); - $aNewSearch['aName'][$aSearchTerm['word_id']] = $aSearchTerm['word_id']; - if ($aSearch['iSearchRank'] < $this->iMaxRank) $aNewWordsetSearches[] = $aNewSearch; - } - - if (sizeof($aSearch['aName'])) - { - if ((!$bStructuredPhrases || $iPhrase > 0) && $sPhraseType != 'country' && (!isset($aValidTokens[$sToken]) || strlen($sToken) < 4 || strpos($sToken, ' ') !== false)) - { - $aSearch['aAddress'][$aSearchTerm['word_id']] = $aSearchTerm['word_id']; - } - else - { - $aCurrentSearch['aFullNameAddress'][$aSearchTerm['word_id']] = $aSearchTerm['word_id']; - $aSearch['iSearchRank'] += 1000; // skip; - } - } - else - { - $aSearch['aName'][$aSearchTerm['word_id']] = $aSearchTerm['word_id']; - //$aSearch['iNamePhrase'] = $iPhrase; - } - if ($aSearch['iSearchRank'] < $this->iMaxRank) $aNewWordsetSearches[] = $aSearch; - } - - } - elseif (($sPhraseType == '' || $sPhraseType == 'street') && $aSearchTerm['class'] == 'place' && $aSearchTerm['type'] == 'house') - { - if ($aSearch['sHouseNumber'] === '') - { - $aSearch['sHouseNumber'] = $sToken; - if ($aSearch['iSearchRank'] < $this->iMaxRank) $aNewWordsetSearches[] = $aSearch; - /* - // Fall back to not searching for this item (better than nothing) - $aSearch = $aCurrentSearch; - $aSearch['iSearchRank'] += 1; - if ($aSearch['iSearchRank'] < $this->iMaxRank) $aNewWordsetSearches[] = $aSearch; - */ - } - } - elseif ($sPhraseType == '' && $aSearchTerm['class'] !== '' && $aSearchTerm['class'] !== null) - { - if ($aSearch['sClass'] === '') - { - $aSearch['sOperator'] = $aSearchTerm['operator']; - $aSearch['sClass'] = $aSearchTerm['class']; - $aSearch['sType'] = $aSearchTerm['type']; - if (sizeof($aSearch['aName'])) $aSearch['sOperator'] = 'name'; - else $aSearch['sOperator'] = 'near'; // near = in for the moment - - // Do we have a shortcut id? - if ($aSearch['sOperator'] == 'name') - { - $sSQL = "select get_tagpair('".$aSearch['sClass']."', '".$aSearch['sType']."')"; - if ($iAmenityID = $this->oDB->getOne($sSQL)) - { - $aValidTokens[$aSearch['sClass'].':'.$aSearch['sType']] = array('word_id' => $iAmenityID); - $aSearch['aName'][$iAmenityID] = $iAmenityID; - $aSearch['sClass'] = ''; - $aSearch['sType'] = ''; - } - } - if ($aSearch['iSearchRank'] < $this->iMaxRank) $aNewWordsetSearches[] = $aSearch; - } - } - elseif (isset($aSearchTerm['word_id']) && $aSearchTerm['word_id']) - { - if (sizeof($aSearch['aName'])) - { - if ((!$bStructuredPhrases || $iPhrase > 0) && $sPhraseType != 'country' && (!isset($aValidTokens[$sToken]) || strlen($sToken) < 4 || strpos($sToken, ' ') !== false)) - { - $aSearch['aAddress'][$aSearchTerm['word_id']] = $aSearchTerm['word_id']; - } - else - { - $aCurrentSearch['aFullNameAddress'][$aSearchTerm['word_id']] = $aSearchTerm['word_id']; - $aSearch['iSearchRank'] += 1000; // skip; - } - } - else - { - $aSearch['aName'][$aSearchTerm['word_id']] = $aSearchTerm['word_id']; - //$aSearch['iNamePhrase'] = $iPhrase; - } - if ($aSearch['iSearchRank'] < $this->iMaxRank) $aNewWordsetSearches[] = $aSearch; - } - } - } - if (isset($aValidTokens[$sToken])) - { - // Allow searching for a word - but at extra cost - foreach($aValidTokens[$sToken] as $aSearchTerm) - { - if (isset($aSearchTerm['word_id']) && $aSearchTerm['word_id']) - { - if ((!$bStructuredPhrases || $iPhrase > 0) && sizeof($aCurrentSearch['aName']) && strlen($sToken) >= 4) - { - $aSearch = $aCurrentSearch; - $aSearch['iSearchRank'] += 1; - if ($aWordFrequencyScores[$aSearchTerm['word_id']] < CONST_Max_Word_Frequency) - { - $aSearch['aAddress'][$aSearchTerm['word_id']] = $aSearchTerm['word_id']; - if ($aSearch['iSearchRank'] < $this->iMaxRank) $aNewWordsetSearches[] = $aSearch; - } - elseif (isset($aValidTokens[' '.$sToken])) // revert to the token version? - { - foreach($aValidTokens[' '.$sToken] as $aSearchTermToken) - { - if (empty($aSearchTermToken['country_code']) - && empty($aSearchTermToken['lat']) - && empty($aSearchTermToken['class'])) - { - $aSearch = $aCurrentSearch; - $aSearch['iSearchRank'] += 1; - $aSearch['aAddress'][$aSearchTermToken['word_id']] = $aSearchTermToken['word_id']; - if ($aSearch['iSearchRank'] < $this->iMaxRank) $aNewWordsetSearches[] = $aSearch; - } - } - } - else - { - $aSearch['aAddressNonSearch'][$aSearchTerm['word_id']] = $aSearchTerm['word_id']; - if ($aSearch['iSearchRank'] < $this->iMaxRank) $aNewWordsetSearches[] = $aSearch; - } - } - - if (!sizeof($aCurrentSearch['aName']) || $aCurrentSearch['iNamePhrase'] == $iPhrase) - { - $aSearch = $aCurrentSearch; - $aSearch['iSearchRank'] += 2; - if (preg_match('#^[0-9]+$#', $sToken)) $aSearch['iSearchRank'] += 2; - if ($aWordFrequencyScores[$aSearchTerm['word_id']] < CONST_Max_Word_Frequency) - $aSearch['aName'][$aSearchTerm['word_id']] = $aSearchTerm['word_id']; - else - $aSearch['aNameNonSearch'][$aSearchTerm['word_id']] = $aSearchTerm['word_id']; - $aSearch['iNamePhrase'] = $iPhrase; - if ($aSearch['iSearchRank'] < $this->iMaxRank) $aNewWordsetSearches[] = $aSearch; - } - } - } - } - else - { - // Allow skipping a word - but at EXTREAM cost - //$aSearch = $aCurrentSearch; - //$aSearch['iSearchRank']+=100; - //$aNewWordsetSearches[] = $aSearch; - } + if (!isset($aReverseGroupedSearches[$aSearch['iSearchRank']])) $aReverseGroupedSearches[$aSearch['iSearchRank']] = array(); + $aReverseGroupedSearches[$aSearch['iSearchRank']][] = $aSearch; } - // Sort and cut - usort($aNewWordsetSearches, 'bySearchRank'); - $aWordsetSearches = array_slice($aNewWordsetSearches, 0, 50); - } - //var_Dump('
',sizeof($aWordsetSearches)); exit; - - $aNewPhraseSearches = array_merge($aNewPhraseSearches, $aNewWordsetSearches); - usort($aNewPhraseSearches, 'bySearchRank'); - $aSearchHash = array(); - foreach($aNewPhraseSearches as $iSearch => $aSearch) - { - $sHash = serialize($aSearch); - if (isset($aSearchHash[$sHash])) unset($aNewPhraseSearches[$iSearch]); - else $aSearchHash[$sHash] = 1; } - - $aNewPhraseSearches = array_slice($aNewPhraseSearches, 0, 50); } - // Re-group the searches by their score, junk anything over 20 as just not worth trying - $aGroupedSearches = array(); - foreach($aNewPhraseSearches as $aSearch) - { - if ($aSearch['iSearchRank'] < $this->iMaxRank) - { - if (!isset($aGroupedSearches[$aSearch['iSearchRank']])) $aGroupedSearches[$aSearch['iSearchRank']] = array(); - $aGroupedSearches[$aSearch['iSearchRank']][] = $aSearch; - } - } + $aGroupedSearches = $aReverseGroupedSearches; ksort($aGroupedSearches); - - $iSearchCount = 0; - $aSearches = array(); - foreach($aGroupedSearches as $iScore => $aNewSearches) - { - $iSearchCount += sizeof($aNewSearches); - $aSearches = array_merge($aSearches, $aNewSearches); - if ($iSearchCount > 50) break; - } - - //if (CONST_Debug) _debugDumpGroupedSearches($aGroupedSearches, $aValidTokens); - } - } else { @@ -942,30 +1131,7 @@ if (CONST_Debug) var_Dump($aGroupedSearches); - if ($this->bReverseInPlan) - { - $aCopyGroupedSearches = $aGroupedSearches; - foreach($aCopyGroupedSearches as $iGroup => $aSearches) - { - foreach($aSearches as $iSearch => $aSearch) - { - if (sizeof($aSearch['aAddress'])) - { - $iReverseItem = array_pop($aSearch['aAddress']); - if (isset($aPossibleMainWordIDs[$iReverseItem])) - { - $aSearch['aAddress'] = array_merge($aSearch['aAddress'], $aSearch['aName']); - $aSearch['aName'] = array($iReverseItem); - $aGroupedSearches[$iGroup][] = $aSearch; - } - //$aReverseSearch['aName'][$iReverseItem] = $iReverseItem; - //$aGroupedSearches[$iGroup][] = $aReverseSearch; - } - } - } - } - - if (CONST_Search_TryDroppedAddressTerms && sizeof($aStructuredQuery) > 0) + if (CONST_Search_TryDroppedAddressTerms && sizeof($this->aStructuredQuery) > 0) { $aCopyGroupedSearches = $aGroupedSearches; foreach($aCopyGroupedSearches as $iGroup => $aSearches) @@ -1044,10 +1210,16 @@ { $sSQL = "select place_id from placex where calculated_country_code='".$aSearch['sCountryCode']."' and rank_search = 4"; if ($sCountryCodesSQL) $sSQL .= " and calculated_country_code in ($sCountryCodesSQL)"; + if ($bBoundingBoxSearch) + $sSQL .= " and _st_intersects($this->sViewboxSmallSQL, geometry)"; $sSQL .= " order by st_area(geometry) desc limit 1"; if (CONST_Debug) var_dump($sSQL); $aPlaceIDs = $this->oDB->getCol($sSQL); } + else + { + $aPlaceIDs = array(); + } } else { @@ -1058,7 +1230,7 @@ { $sSQL = "select place_id from place_classtype_".$aSearch['sClass']."_".$aSearch['sType']." ct"; if ($sCountryCodesSQL) $sSQL .= " join placex using (place_id)"; - $sSQL .= " where st_contains($sViewboxSmallSQL, ct.centroid)"; + $sSQL .= " where st_contains($this->sViewboxSmallSQL, ct.centroid)"; if ($sCountryCodesSQL) $sSQL .= " and calculated_country_code in ($sCountryCodesSQL)"; if (sizeof($this->aExcludePlaceIDs)) { @@ -1072,11 +1244,12 @@ // If excluded place IDs are given, it is fair to assume that // there have been results in the small box, so no further // expansion in that case. - if (!sizeof($aPlaceIDs) && !sizeof($this->aExcludePlaceIDs)) + // Also don't expand if bounded results were requested. + if (!sizeof($aPlaceIDs) && !sizeof($this->aExcludePlaceIDs) && !$this->bBoundedSearch) { $sSQL = "select place_id from place_classtype_".$aSearch['sClass']."_".$aSearch['sType']." ct"; if ($sCountryCodesSQL) $sSQL .= " join placex using (place_id)"; - $sSQL .= " where st_contains($sViewboxLargeSQL, ct.centroid)"; + $sSQL .= " where st_contains($this->sViewboxLargeSQL, ct.centroid)"; if ($sCountryCodesSQL) $sSQL .= " and calculated_country_code in ($sCountryCodesSQL)"; if ($sViewboxCentreSQL) $sSQL .= " order by st_distance($sViewboxCentreSQL, ct.centroid) asc"; $sSQL .= " limit $this->iLimit"; @@ -1087,7 +1260,7 @@ else { $sSQL = "select place_id from placex where class='".$aSearch['sClass']."' and type='".$aSearch['sType']."'"; - $sSQL .= " and st_contains($sViewboxSmallSQL, geometry) and linked_place_id is null"; + $sSQL .= " and st_contains($this->sViewboxSmallSQL, geometry) and linked_place_id is null"; if ($sCountryCodesSQL) $sSQL .= " and calculated_country_code in ($sCountryCodesSQL)"; if ($sViewboxCentreSQL) $sSQL .= " order by st_distance($sViewboxCentreSQL, centroid) asc"; $sSQL .= " limit $this->iLimit"; @@ -1104,6 +1277,12 @@ $aTerms = array(); $aOrder = array(); + if ($aSearch['sHouseNumber'] && sizeof($aSearch['aAddress'])) + { + $sHouseNumberRegex = '\\\\m'.$aSearch['sHouseNumber'].'\\\\M'; + $aOrder[] = "exists(select place_id from placex where parent_place_id = search_name.place_id and transliteration(housenumber) ~* E'".$sHouseNumberRegex."' limit 1) desc"; + } + // TODO: filter out the pointless search terms (2 letter name tokens and less) // they might be right - but they are just too darned expensive to run if (sizeof($aSearch['aName'])) $aTerms[] = "name_vector @> ARRAY[".join($aSearch['aName'],",")."]"; @@ -1124,7 +1303,21 @@ } } if ($aSearch['sCountryCode']) $aTerms[] = "country_code = '".pg_escape_string($aSearch['sCountryCode'])."'"; - if ($aSearch['sHouseNumber']) $aTerms[] = "address_rank between 16 and 27"; + if ($aSearch['sHouseNumber']) + { + $aTerms[] = "address_rank between 16 and 27"; + } + else + { + if ($this->iMinAddressRank > 0) + { + $aTerms[] = "address_rank >= ".$this->iMinAddressRank; + } + if ($this->iMaxAddressRank < 30) + { + $aTerms[] = "address_rank <= ".$this->iMaxAddressRank; + } + } if ($aSearch['fLon'] && $aSearch['fLat']) { $aTerms[] = "ST_DWithin(centroid, ST_SetSRID(ST_Point(".$aSearch['fLon'].",".$aSearch['fLat']."),4326), ".$aSearch['fRadius'].")"; @@ -1139,26 +1332,38 @@ $aTerms[] = "country_code in ($sCountryCodesSQL)"; } - if ($bBoundingBoxSearch) $aTerms[] = "centroid && $sViewboxSmallSQL"; + if ($bBoundingBoxSearch) $aTerms[] = "centroid && $this->sViewboxSmallSQL"; if ($sNearPointSQL) $aOrder[] = "ST_Distance($sNearPointSQL, centroid) asc"; - $sImportanceSQL = '(case when importance = 0 OR importance IS NULL then 0.75-(search_rank::float/40) else importance end)'; - if ($sViewboxSmallSQL) $sImportanceSQL .= " * case when ST_Contains($sViewboxSmallSQL, centroid) THEN 1 ELSE 0.5 END"; - if ($sViewboxLargeSQL) $sImportanceSQL .= " * case when ST_Contains($sViewboxLargeSQL, centroid) THEN 1 ELSE 0.5 END"; + if ($aSearch['sHouseNumber']) + { + $sImportanceSQL = '- abs(26 - address_rank) + 3'; + } + else + { + $sImportanceSQL = '(case when importance = 0 OR importance IS NULL then 0.75-(search_rank::float/40) else importance end)'; + } + if ($this->sViewboxSmallSQL) $sImportanceSQL .= " * case when ST_Contains($this->sViewboxSmallSQL, centroid) THEN 1 ELSE 0.5 END"; + if ($this->sViewboxLargeSQL) $sImportanceSQL .= " * case when ST_Contains($this->sViewboxLargeSQL, centroid) THEN 1 ELSE 0.5 END"; + $aOrder[] = "$sImportanceSQL DESC"; if (sizeof($aSearch['aFullNameAddress'])) { - $aOrder[] = '(select count(*) from (select unnest(ARRAY['.join($aSearch['aFullNameAddress'],",").']) INTERSECT select unnest(nameaddress_vector))s) DESC'; + $sExactMatchSQL = '(select count(*) from (select unnest(ARRAY['.join($aSearch['aFullNameAddress'],",").']) INTERSECT select unnest(nameaddress_vector))s) as exactmatch'; + $aOrder[] = 'exactmatch DESC'; + } else { + $sExactMatchSQL = '0::int as exactmatch'; } if (sizeof($aTerms)) { - $sSQL = "select place_id"; + $sSQL = "select place_id, "; + $sSQL .= $sExactMatchSQL; $sSQL .= " from search_name"; $sSQL .= " where ".join(' and ',$aTerms); $sSQL .= " order by ".join(', ',$aOrder); if ($aSearch['sHouseNumber'] || $aSearch['sClass']) - $sSQL .= " limit 50"; + $sSQL .= " limit 20"; elseif (!sizeof($aSearch['aName']) && !sizeof($aSearch['aAddress']) && $aSearch['sClass']) $sSQL .= " limit 1"; else @@ -1181,6 +1386,7 @@ //if ($aViewBoxRow['in_small'] == 't') $bViewBoxMatch = 1; //else if ($aViewBoxRow['in_large'] == 't') $bViewBoxMatch = 2; $aPlaceIDs[] = $aViewBoxRow['place_id']; + $this->exactMatchCache[$aViewBoxRow['place_id']] = $aViewBoxRow['exactmatch']; } } //var_Dump($aPlaceIDs); @@ -1192,8 +1398,8 @@ $sPlaceIDs = join(',',$aPlaceIDs); // Now they are indexed look for a house attached to a street we found - $sHouseNumberRegex = '\\\\m'.str_replace(' ','[-,/ ]',$aSearch['sHouseNumber']).'\\\\M'; - $sSQL = "select place_id from placex where parent_place_id in (".$sPlaceIDs.") and housenumber ~* E'".$sHouseNumberRegex."'"; + $sHouseNumberRegex = '\\\\m'.$aSearch['sHouseNumber'].'\\\\M'; + $sSQL = "select place_id from placex where parent_place_id in (".$sPlaceIDs.") and transliteration(housenumber) ~* E'".$sHouseNumberRegex."'"; if (sizeof($this->aExcludePlaceIDs)) { $sSQL .= " and place_id not in (".join(',',$this->aExcludePlaceIDs).")"; @@ -1316,7 +1522,7 @@ } if ($sCountryCodesSQL) $sSQL .= " and lp.calculated_country_code in ($sCountryCodesSQL)"; if ($sOrderBySQL) $sSQL .= "order by ".$sOrderBySQL." asc"; - if ($iOffset) $sSQL .= " offset $iOffset"; + if ($this->iOffset) $sSQL .= " offset $this->iOffset"; $sSQL .= " limit $this->iLimit"; if (CONST_Debug) var_dump($sSQL); $aClassPlaceIDs = array_merge($aClassPlaceIDs, $this->oDB->getCol($sSQL)); @@ -1338,7 +1544,7 @@ } if ($sCountryCodesSQL) $sSQL .= " and l.calculated_country_code in ($sCountryCodesSQL)"; if ($sOrderBy) $sSQL .= "order by ".$OrderBysSQL." asc"; - if ($iOffset) $sSQL .= " offset $iOffset"; + if ($this->iOffset) $sSQL .= " offset $this->iOffset"; $sSQL .= " limit $this->iLimit"; if (CONST_Debug) var_dump($sSQL); $aClassPlaceIDs = array_merge($aClassPlaceIDs, $this->oDB->getCol($sSQL)); @@ -1419,86 +1625,33 @@ } $aClassType = getClassTypesWithImportance(); - $aRecheckWords = preg_split('/\b/u',$sQuery); + $aRecheckWords = preg_split('/\b[\s,\\-]*/u',$sQuery); foreach($aRecheckWords as $i => $sWord) { - if (!$sWord) unset($aRecheckWords[$i]); + if (!preg_match('/\pL/', $sWord)) unset($aRecheckWords[$i]); } + if (CONST_Debug) { echo 'Recheck words:<\i>'; var_dump($aRecheckWords); } + foreach($aSearchResults as $iResNum => $aResult) { - if (CONST_Search_AreaPolygons) + // Default + $fDiameter = getResultDiameter($aResult); + + $oPlaceLookup = new PlaceLookup($this->oDB); + $oPlaceLookup->setIncludePolygonAsPoints($this->bIncludePolygonAsPoints); + $oPlaceLookup->setIncludePolygonAsText($this->bIncludePolygonAsText); + $oPlaceLookup->setIncludePolygonAsGeoJSON($this->bIncludePolygonAsGeoJSON); + $oPlaceLookup->setIncludePolygonAsKML($this->bIncludePolygonAsKML); + $oPlaceLookup->setIncludePolygonAsSVG($this->bIncludePolygonAsSVG); + $oPlaceLookup->setPolygonSimplificationThreshold($this->fPolygonSimplificationThreshold); + + $aOutlineResult = $oPlaceLookup->getOutlines($aResult['place_id'], $aResult['lon'], $aResult['lat'], $fDiameter/2); + if ($aOutlineResult) { - // Get the bounding box and outline polygon - $sSQL = "select place_id,0 as numfeatures,st_area(geometry) as area,"; - $sSQL .= "ST_Y(centroid) as centrelat,ST_X(centroid) as centrelon,"; - $sSQL .= "ST_Y(ST_PointN(ST_ExteriorRing(Box2D(geometry)),4)) as minlat,ST_Y(ST_PointN(ST_ExteriorRing(Box2D(geometry)),2)) as maxlat,"; - $sSQL .= "ST_X(ST_PointN(ST_ExteriorRing(Box2D(geometry)),1)) as minlon,ST_X(ST_PointN(ST_ExteriorRing(Box2D(geometry)),3)) as maxlon"; - if ($this->bIncludePolygonAsGeoJSON) $sSQL .= ",ST_AsGeoJSON(geometry) as asgeojson"; - if ($this->bIncludePolygonAsKML) $sSQL .= ",ST_AsKML(geometry) as askml"; - if ($this->bIncludePolygonAsSVG) $sSQL .= ",ST_AsSVG(geometry) as assvg"; - if ($this->bIncludePolygonAsText || $this->bIncludePolygonAsPoints) $sSQL .= ",ST_AsText(geometry) as astext"; - $sSQL .= " from placex where place_id = ".$aResult['place_id'].' and st_geometrytype(Box2D(geometry)) = \'ST_Polygon\''; - $aPointPolygon = $this->oDB->getRow($sSQL); - if (PEAR::IsError($aPointPolygon)) - { - failInternalError("Could not get outline.", $sSQL, $aPointPolygon); - } - - if ($aPointPolygon['place_id']) - { - if ($this->bIncludePolygonAsGeoJSON) $aResult['asgeojson'] = $aPointPolygon['asgeojson']; - if ($this->bIncludePolygonAsKML) $aResult['askml'] = $aPointPolygon['askml']; - if ($this->bIncludePolygonAsSVG) $aResult['assvg'] = $aPointPolygon['assvg']; - if ($this->bIncludePolygonAsText) $aResult['astext'] = $aPointPolygon['astext']; - - if ($aPointPolygon['centrelon'] !== null && $aPointPolygon['centrelat'] !== null ) - { - $aResult['lat'] = $aPointPolygon['centrelat']; - $aResult['lon'] = $aPointPolygon['centrelon']; - } - - if ($this->bIncludePolygonAsPoints) - { - // Translate geometary string to point array - if (preg_match('#POLYGON\\(\\(([- 0-9.,]+)#',$aPointPolygon['astext'],$aMatch)) - { - preg_match_all('/(-?[0-9.]+) (-?[0-9.]+)/',$aMatch[1],$aPolyPoints,PREG_SET_ORDER); - } - elseif (preg_match('#MULTIPOLYGON\\(\\(\\(([- 0-9.,]+)#',$aPointPolygon['astext'],$aMatch)) - { - preg_match_all('/(-?[0-9.]+) (-?[0-9.]+)/',$aMatch[1],$aPolyPoints,PREG_SET_ORDER); - } - elseif (preg_match('#POINT\\((-?[0-9.]+) (-?[0-9.]+)\\)#',$aPointPolygon['astext'],$aMatch)) - { - $fRadius = 0.01; - $iSteps = ($fRadius * 40000)^2; - $fStepSize = (2*pi())/$iSteps; - $aPolyPoints = array(); - for($f = 0; $f < 2*pi(); $f += $fStepSize) - { - $aPolyPoints[] = array('',$aMatch[1]+($fRadius*sin($f)),$aMatch[2]+($fRadius*cos($f))); - } - $aPointPolygon['minlat'] = $aPointPolygon['minlat'] - $fRadius; - $aPointPolygon['maxlat'] = $aPointPolygon['maxlat'] + $fRadius; - $aPointPolygon['minlon'] = $aPointPolygon['minlon'] - $fRadius; - $aPointPolygon['maxlon'] = $aPointPolygon['maxlon'] + $fRadius; - } - } - - // Output data suitable for display (points and a bounding box) - if ($this->bIncludePolygonAsPoints && isset($aPolyPoints)) - { - $aResult['aPolyPoints'] = array(); - foreach($aPolyPoints as $aPoint) - { - $aResult['aPolyPoints'][] = array($aPoint[1], $aPoint[2]); - } - } - $aResult['aBoundingBox'] = array($aPointPolygon['minlat'],$aPointPolygon['maxlat'],$aPointPolygon['minlon'],$aPointPolygon['maxlon']); - } + $aResult = array_merge($aResult, $aOutlineResult); } - + if ($aResult['extra_place'] == 'city') { $aResult['class'] = 'place'; @@ -1506,47 +1659,6 @@ $aResult['rank_search'] = 16; } - if (!isset($aResult['aBoundingBox'])) - { - // Default - $fDiameter = 0.0001; - - if (isset($aClassType[$aResult['class'].':'.$aResult['type'].':'.$aResult['admin_level']]['defdiameter']) - && $aClassType[$aResult['class'].':'.$aResult['type'].':'.$aResult['admin_level']]['defdiameter']) - { - $fDiameter = $aClassType[$aResult['class'].':'.$aResult['type'].':'.$aResult['admin_level']]['defzoom']; - } - elseif (isset($aClassType[$aResult['class'].':'.$aResult['type']]['defdiameter']) - && $aClassType[$aResult['class'].':'.$aResult['type']]['defdiameter']) - { - $fDiameter = $aClassType[$aResult['class'].':'.$aResult['type']]['defdiameter']; - } - $fRadius = $fDiameter / 2; - - $iSteps = max(8,min(100,$fRadius * 3.14 * 100000)); - $fStepSize = (2*pi())/$iSteps; - $aPolyPoints = array(); - for($f = 0; $f < 2*pi(); $f += $fStepSize) - { - $aPolyPoints[] = array('',$aResult['lon']+($fRadius*sin($f)),$aResult['lat']+($fRadius*cos($f))); - } - $aPointPolygon['minlat'] = $aResult['lat'] - $fRadius; - $aPointPolygon['maxlat'] = $aResult['lat'] + $fRadius; - $aPointPolygon['minlon'] = $aResult['lon'] - $fRadius; - $aPointPolygon['maxlon'] = $aResult['lon'] + $fRadius; - - // Output data suitable for display (points and a bounding box) - if ($this->bIncludePolygonAsPoints) - { - $aResult['aPolyPoints'] = array(); - foreach($aPolyPoints as $aPoint) - { - $aResult['aPolyPoints'][] = array($aPoint[1], $aPoint[2]); - } - } - $aResult['aBoundingBox'] = array($aPointPolygon['minlat'],$aPointPolygon['maxlat'],$aPointPolygon['minlon'],$aPointPolygon['maxlon']); - } - // Is there an icon set for this type of result? if (isset($aClassType[$aResult['class'].':'.$aResult['type']]['icon']) && $aClassType[$aResult['class'].':'.$aResult['type']]['icon']) @@ -1574,19 +1686,65 @@ } } + if ($this->bIncludeExtraTags) + { + if ($aResult['extra']) + { + $aResult['sExtraTags'] = json_decode($aResult['extra']); + } + else + { + $aResult['sExtraTags'] = (object) array(); + } + } + + if ($this->bIncludeNameDetails) + { + if ($aResult['names']) + { + $aResult['sNameDetails'] = json_decode($aResult['names']); + } + else + { + $aResult['sNameDetails'] = (object) array(); + } + } + // Adjust importance for the number of exact string matches in the result $aResult['importance'] = max(0.001,$aResult['importance']); $iCountWords = 0; $sAddress = $aResult['langaddress']; foreach($aRecheckWords as $i => $sWord) { - if (stripos($sAddress, $sWord)!==false) $iCountWords++; + if (stripos($sAddress, $sWord)!==false) + { + $iCountWords++; + if (preg_match("/(^|,)\s*".preg_quote($sWord, '/')."\s*(,|$)/", $sAddress)) $iCountWords += 0.1; + } } $aResult['importance'] = $aResult['importance'] + ($iCountWords*0.1); // 0.1 is a completely arbitrary number but something in the range 0.1 to 0.5 would seem right $aResult['name'] = $aResult['langaddress']; - $aResult['foundorder'] = -$aResult['addressimportance']; + // secondary ordering (for results with same importance (the smaller the better): + // - approximate importance of address parts + $aResult['foundorder'] = -$aResult['addressimportance']/10; + // - number of exact matches from the query + if (isset($this->exactMatchCache[$aResult['place_id']])) + $aResult['foundorder'] -= $this->exactMatchCache[$aResult['place_id']]; + else if (isset($this->exactMatchCache[$aResult['parent_place_id']])) + $aResult['foundorder'] -= $this->exactMatchCache[$aResult['parent_place_id']]; + // - importance of the class/type + if (isset($aClassType[$aResult['class'].':'.$aResult['type']]['importance']) + && $aClassType[$aResult['class'].':'.$aResult['type']]['importance']) + { + $aResult['foundorder'] += 0.0001 * $aClassType[$aResult['class'].':'.$aResult['type']]['importance']; + } + else + { + $aResult['foundorder'] += 0.01; + } + if (CONST_Debug) { var_dump($aResult); } $aSearchResults[$iResNum] = $aResult; } uasort($aSearchResults, 'byImportance'); @@ -1599,7 +1757,6 @@ $bFirst = true; foreach($aToFilter as $iResNum => $aResult) { - if ($aResult['type'] == 'adminitrative') $aResult['type'] = 'administrative'; $this->aExcludePlaceIDs[$aResult['place_id']] = $aResult['place_id']; if ($bFirst) { @@ -1627,17 +1784,3 @@ } // end class - -/* - if (isset($_GET['route']) && $_GET['route'] && isset($_GET['routewidth']) && $_GET['routewidth']) - { - $aPoints = explode(',',$_GET['route']); - if (sizeof($aPoints) % 2 != 0) - { - userError("Uneven number of points"); - exit; - } - $sViewboxCentreSQL = "ST_SetSRID('LINESTRING("; - $fPrevCoord = false; - } -*/