From: Sarah Hoffmann Date: Mon, 5 Jun 2017 18:27:32 +0000 (+0200) Subject: Merge remote-tracking branch 'upstream/master' X-Git-Tag: deploy~385 X-Git-Url: https://git.openstreetmap.org/nominatim.git/commitdiff_plain/5141ac14f6c8502aee8a88074159a68222054527?ds=inline;hp=-c Merge remote-tracking branch 'upstream/master' --- 5141ac14f6c8502aee8a88074159a68222054527 diff --combined lib/Geocode.php index ecd5be1c,17aaf826..1c3c7a4e --- a/lib/Geocode.php +++ b/lib/Geocode.php @@@ -25,7 -25,7 +25,7 @@@ class Geocod protected $aExcludePlaceIDs = array(); protected $bDeDupe = true; - protected $bReverseInPlan = false; + protected $bReverseInPlan = true; protected $iLimit = 20; protected $iFinalLimit = 10; @@@ -653,7 -653,7 +653,7 @@@ return $aSearchResults; } - public function getGroupedSearches($aSearches, $aPhraseTypes, $aPhrases, $aValidTokens, $aWordFrequencyScores, $bStructuredPhrases) + public function getGroupedSearches($aSearches, $aPhraseTypes, $aPhrases, $aValidTokens, $aWordFrequencyScores, $bStructuredPhrases, $sNormQuery) { /* Calculate all searches using aValidTokens i.e. @@@ -752,13 -752,19 +752,19 @@@ */ } } elseif ($sPhraseType == '' && $aSearchTerm['class'] !== '' && $aSearchTerm['class'] !== null) { - if ($aSearch['sClass'] === '') { - $aSearch['sOperator'] = $aSearchTerm['operator']; + // require a normalized exact match of the term + // if we have the normalizer version of the query + // available + if ($aSearch['sClass'] === '' + && ($sNormQuery === null || !($aSearchTerm['word'] && strpos($sNormQuery, $aSearchTerm['word']) === false))) { $aSearch['sClass'] = $aSearchTerm['class']; $aSearch['sType'] = $aSearchTerm['type']; - if (sizeof($aSearch['aName'])) $aSearch['sOperator'] = 'name'; - else $aSearch['sOperator'] = 'near'; // near = in for the moment - if (strlen($aSearchTerm['operator']) == 0) $aSearch['iSearchRank'] += 1; + if ($aSearchTerm['operator'] == '') { + $aSearch['sOperator'] = sizeof($aSearch['aName']) ? 'name' : 'near'; + $aSearch['iSearchRank'] += 2; + } else { + $aSearch['sOperator'] = 'near'; // near = in for the moment + } if ($aSearch['iSearchRank'] < $this->iMaxRank) $aNewWordsetSearches[] = $aSearch; } @@@ -913,6 -919,13 +919,13 @@@ { if (!$this->sQuery && !$this->aStructuredQuery) return array(); + $oNormalizer = \Transliterator::createFromRules(CONST_Term_Normalization_Rules); + if ($oNormalizer !== null) { + $sNormQuery = $oNormalizer->transliterate($this->sQuery); + } else { + $sNormQuery = null; + } + $sLanguagePrefArraySQL = "ARRAY[".join(',', array_map("getDBQuoted", $this->aLangPrefOrder))."]"; $sCountryCodesSQL = false; if ($this->aCountryCodes) { @@@ -1139,7 -1152,7 +1152,7 @@@ // array with: placeid => -1 | tiger-housenumber $aResultPlaceIDs = array(); - $aGroupedSearches = $this->getGroupedSearches($aSearches, $aPhraseTypes, $aPhrases, $aValidTokens, $aWordFrequencyScores, $bStructuredPhrases); + $aGroupedSearches = $this->getGroupedSearches($aSearches, $aPhraseTypes, $aPhrases, $aValidTokens, $aWordFrequencyScores, $bStructuredPhrases, $sNormQuery); if ($this->bReverseInPlan) { // Reverse phrase array and also reverse the order of the wordsets in @@@ -1151,7 -1164,7 +1164,7 @@@ $aFinalPhrase = end($aPhrases); $aPhrases[sizeof($aPhrases)-1]['wordsets'] = getInverseWordSets($aFinalPhrase['words'], 0); } - $aReverseGroupedSearches = $this->getGroupedSearches($aSearches, null, $aPhrases, $aValidTokens, $aWordFrequencyScores, false); + $aReverseGroupedSearches = $this->getGroupedSearches($aSearches, null, $aPhrases, $aValidTokens, $aWordFrequencyScores, false, $sNormQuery); foreach ($aGroupedSearches as $aSearches) { foreach ($aSearches as $aSearch) { @@@ -1334,20 -1347,19 +1347,20 @@@ // TODO: filter out the pointless search terms (2 letter name tokens and less) // they might be right - but they are just too darned expensive to run if (sizeof($aSearch['aName'])) $aTerms[] = "name_vector @> ARRAY[".join($aSearch['aName'], ",")."]"; - if (sizeof($aSearch['aNameNonSearch'])) $aTerms[] = "array_cat(name_vector,ARRAY[]::integer[]) @> ARRAY[".join($aSearch['aNameNonSearch'], ",")."]"; + //if (sizeof($aSearch['aNameNonSearch'])) $aTerms[] = "array_cat(name_vector,ARRAY[]::integer[]) @> ARRAY[".join($aSearch['aNameNonSearch'], ",")."]"; if (sizeof($aSearch['aAddress']) && $aSearch['aName'] != $aSearch['aAddress']) { // For infrequent name terms disable index usage for address if (CONST_Search_NameOnlySearchFrequencyThreshold && sizeof($aSearch['aName']) == 1 && $aWordFrequencyScores[$aSearch['aName'][reset($aSearch['aName'])]] < CONST_Search_NameOnlySearchFrequencyThreshold ) { - $aTerms[] = "array_cat(nameaddress_vector,ARRAY[]::integer[]) @> ARRAY[".join(array_merge($aSearch['aAddress'], $aSearch['aAddressNonSearch']), ",")."]"; + //$aTerms[] = "array_cat(nameaddress_vector,ARRAY[]::integer[]) @> ARRAY[".join(array_merge($aSearch['aAddress'], $aSearch['aAddressNonSearch']), ",")."]"; + $aTerms[] = "array_cat(nameaddress_vector,ARRAY[]::integer[]) @> ARRAY[".join($aSearch['aAddress'],",")."]"; } else { $aTerms[] = "nameaddress_vector @> ARRAY[".join($aSearch['aAddress'], ",")."]"; - if (sizeof($aSearch['aAddressNonSearch'])) { + /*if (sizeof($aSearch['aAddressNonSearch'])) { $aTerms[] = "array_cat(nameaddress_vector,ARRAY[]::integer[]) @> ARRAY[".join($aSearch['aAddressNonSearch'], ",")."]"; - } + }*/ } } if ($aSearch['sCountryCode']) $aTerms[] = "country_code = '".pg_escape_string($aSearch['sCountryCode'])."'"; diff --combined sql/functions.sql index 8b1372d4,d863a9bf..2606cdca --- a/sql/functions.sql +++ b/sql/functions.sql @@@ -101,7 -101,7 +101,7 @@@ END $$ LANGUAGE plpgsql; - CREATE OR REPLACE FUNCTION getorcreate_amenity(lookup_word TEXT, lookup_class text, lookup_type text) + CREATE OR REPLACE FUNCTION getorcreate_amenity(lookup_word TEXT, normalized_word TEXT, lookup_class text, lookup_type text) RETURNS INTEGER AS $$ DECLARE @@@ -109,17 -109,17 +109,17 @@@ return_word_id INTEGER; BEGIN lookup_token := ' '||trim(lookup_word); - SELECT min(word_id) FROM word WHERE word_token = lookup_token and class=lookup_class and type = lookup_type into return_word_id; + SELECT min(word_id) FROM word WHERE word_token = lookup_token and word=normalized_word and class=lookup_class and type = lookup_type into return_word_id; IF return_word_id IS NULL THEN return_word_id := nextval('seq_word'); - INSERT INTO word VALUES (return_word_id, lookup_token, null, lookup_class, lookup_type, null, 0); + INSERT INTO word VALUES (return_word_id, lookup_token, normalized_word, lookup_class, lookup_type, null, 0); END IF; RETURN return_word_id; END; $$ LANGUAGE plpgsql; - CREATE OR REPLACE FUNCTION getorcreate_amenityoperator(lookup_word TEXT, lookup_class text, lookup_type text, op text) + CREATE OR REPLACE FUNCTION getorcreate_amenityoperator(lookup_word TEXT, normalized_word TEXT, lookup_class text, lookup_type text, op text) RETURNS INTEGER AS $$ DECLARE @@@ -127,10 -127,10 +127,10 @@@ return_word_id INTEGER; BEGIN lookup_token := ' '||trim(lookup_word); - SELECT min(word_id) FROM word WHERE word_token = lookup_token and class=lookup_class and type = lookup_type and operator = op into return_word_id; + SELECT min(word_id) FROM word WHERE word_token = lookup_token and word=normalized_word and class=lookup_class and type = lookup_type and operator = op into return_word_id; IF return_word_id IS NULL THEN return_word_id := nextval('seq_word'); - INSERT INTO word VALUES (return_word_id, lookup_token, null, lookup_class, lookup_type, null, 0, op); + INSERT INTO word VALUES (return_word_id, lookup_token, normalized_word, lookup_class, lookup_type, null, 0, op); END IF; RETURN return_word_id; END; @@@ -1491,7 -1491,7 +1491,7 @@@ BEGI IF relation_members IS NOT NULL THEN FOR relMember IN select get_osm_rel_members(relation_members,ARRAY['label']) as member LOOP - FOR linkedPlacex IN select * from placex where osm_type = upper(substring(relMember.member,1,1))::"char" + FOR linkedPlacex IN select * from placex where osm_type = upper(substring(relMember.member,1,1))::char(1) and osm_id = substring(relMember.member,2,10000)::bigint order by rank_search desc limit 1 LOOP -- If we don't already have one use this as the centre point of the geometry @@@ -1522,7 -1522,7 +1522,7 @@@ FOR relMember IN select get_osm_rel_members(relation_members,ARRAY['admin_center','admin_centre']) as member LOOP - FOR linkedPlacex IN select * from placex where osm_type = upper(substring(relMember.member,1,1))::"char" + FOR linkedPlacex IN select * from placex where osm_type = upper(substring(relMember.member,1,1))::char(1) and osm_id = substring(relMember.member,2,10000)::bigint order by rank_search desc limit 1 LOOP -- For an admin centre we also want a name match - still not perfect, for example 'new york, new york' @@@ -1571,7 -1571,7 +1571,7 @@@ make_standard_name(name->'name') = make_standard_name(NEW.name->'name') AND placex.rank_address = NEW.rank_address AND placex.place_id != NEW.place_id - AND placex.osm_type = 'N' AND placex.rank_search < 26 + AND placex.osm_type = 'N'::char(1) AND placex.rank_search < 26 AND st_covers(NEW.geometry, placex.geometry) LOOP @@@ -1621,7 -1621,7 +1621,7 @@@ -- Still null? how about looking it up by the node id IF NEW.importance IS NULL THEN - select language||':'||title,importance from wikipedia_article where osm_type = 'N' and osm_id = linked_node_id order by importance desc limit 1 INTO NEW.wikipedia,NEW.importance; + select language||':'||title,importance from wikipedia_article where osm_type = 'N'::char(1) and osm_id = linked_node_id order by importance desc limit 1 INTO NEW.wikipedia,NEW.importance; END IF; END IF; @@@ -2381,7 -2381,7 +2381,7 @@@ BEGI CASE WHEN class = 'place' and type = 'postcode' THEN hstore('name', postcode) ELSE name END as name, CASE WHEN extratags ? 'place' THEN 'place' ELSE class END as class, CASE WHEN extratags ? 'place' THEN extratags->'place' ELSE type END as type, - admin_level, fromarea, isaddress, + admin_level, fromarea, isaddress and linked_place_id is NULL as isaddress, CASE WHEN address_place_id = for_place_id AND rank_address = 0 THEN 100 WHEN rank_address = 11 THEN 5 ELSE rank_address END as rank_address, distance,country_code,postcode from place_addressline join placex on (address_place_id = placex.place_id)