protected $aExcludePlaceIDs = array();
protected $bDeDupe = true;
- protected $bReverseInPlan = false;
+ protected $bReverseInPlan = true;
protected $iLimit = 20;
protected $iFinalLimit = 10;
return $aSearchResults;
}
- public function getGroupedSearches($aSearches, $aPhraseTypes, $aPhrases, $aValidTokens, $aWordFrequencyScores, $bStructuredPhrases)
+ public function getGroupedSearches($aSearches, $aPhraseTypes, $aPhrases, $aValidTokens, $aWordFrequencyScores, $bStructuredPhrases, $sNormQuery)
{
/*
Calculate all searches using aValidTokens i.e.
*/
}
} elseif ($sPhraseType == '' && $aSearchTerm['class'] !== '' && $aSearchTerm['class'] !== null) {
- if ($aSearch['sClass'] === '') {
- $aSearch['sOperator'] = $aSearchTerm['operator'];
+ // require a normalized exact match of the term
+ // if we have the normalizer version of the query
+ // available
+ if ($aSearch['sClass'] === ''
+ && ($sNormQuery === null || !($aSearchTerm['word'] && strpos($sNormQuery, $aSearchTerm['word']) === false))) {
$aSearch['sClass'] = $aSearchTerm['class'];
$aSearch['sType'] = $aSearchTerm['type'];
- if (sizeof($aSearch['aName'])) $aSearch['sOperator'] = 'name';
- else $aSearch['sOperator'] = 'near'; // near = in for the moment
- if (strlen($aSearchTerm['operator']) == 0) $aSearch['iSearchRank'] += 1;
+ if ($aSearchTerm['operator'] == '') {
+ $aSearch['sOperator'] = sizeof($aSearch['aName']) ? 'name' : 'near';
+ $aSearch['iSearchRank'] += 2;
+ } else {
+ $aSearch['sOperator'] = 'near'; // near = in for the moment
+ }
if ($aSearch['iSearchRank'] < $this->iMaxRank) $aNewWordsetSearches[] = $aSearch;
}
{
if (!$this->sQuery && !$this->aStructuredQuery) return array();
+ $oNormalizer = \Transliterator::createFromRules(CONST_Term_Normalization_Rules);
+ if ($oNormalizer !== null) {
+ $sNormQuery = $oNormalizer->transliterate($this->sQuery);
+ } else {
+ $sNormQuery = null;
+ }
+
$sLanguagePrefArraySQL = "ARRAY[".join(',', array_map("getDBQuoted", $this->aLangPrefOrder))."]";
$sCountryCodesSQL = false;
if ($this->aCountryCodes) {
// array with: placeid => -1 | tiger-housenumber
$aResultPlaceIDs = array();
- $aGroupedSearches = $this->getGroupedSearches($aSearches, $aPhraseTypes, $aPhrases, $aValidTokens, $aWordFrequencyScores, $bStructuredPhrases);
+ $aGroupedSearches = $this->getGroupedSearches($aSearches, $aPhraseTypes, $aPhrases, $aValidTokens, $aWordFrequencyScores, $bStructuredPhrases, $sNormQuery);
if ($this->bReverseInPlan) {
// Reverse phrase array and also reverse the order of the wordsets in
$aFinalPhrase = end($aPhrases);
$aPhrases[sizeof($aPhrases)-1]['wordsets'] = getInverseWordSets($aFinalPhrase['words'], 0);
}
- $aReverseGroupedSearches = $this->getGroupedSearches($aSearches, null, $aPhrases, $aValidTokens, $aWordFrequencyScores, false);
+ $aReverseGroupedSearches = $this->getGroupedSearches($aSearches, null, $aPhrases, $aValidTokens, $aWordFrequencyScores, false, $sNormQuery);
foreach ($aGroupedSearches as $aSearches) {
foreach ($aSearches as $aSearch) {
// TODO: filter out the pointless search terms (2 letter name tokens and less)
// they might be right - but they are just too darned expensive to run
if (sizeof($aSearch['aName'])) $aTerms[] = "name_vector @> ARRAY[".join($aSearch['aName'], ",")."]";
- if (sizeof($aSearch['aNameNonSearch'])) $aTerms[] = "array_cat(name_vector,ARRAY[]::integer[]) @> ARRAY[".join($aSearch['aNameNonSearch'], ",")."]";
+ //if (sizeof($aSearch['aNameNonSearch'])) $aTerms[] = "array_cat(name_vector,ARRAY[]::integer[]) @> ARRAY[".join($aSearch['aNameNonSearch'], ",")."]";
if (sizeof($aSearch['aAddress']) && $aSearch['aName'] != $aSearch['aAddress']) {
// For infrequent name terms disable index usage for address
if (CONST_Search_NameOnlySearchFrequencyThreshold
&& sizeof($aSearch['aName']) == 1
&& $aWordFrequencyScores[$aSearch['aName'][reset($aSearch['aName'])]] < CONST_Search_NameOnlySearchFrequencyThreshold
) {
- $aTerms[] = "array_cat(nameaddress_vector,ARRAY[]::integer[]) @> ARRAY[".join(array_merge($aSearch['aAddress'], $aSearch['aAddressNonSearch']), ",")."]";
+ //$aTerms[] = "array_cat(nameaddress_vector,ARRAY[]::integer[]) @> ARRAY[".join(array_merge($aSearch['aAddress'], $aSearch['aAddressNonSearch']), ",")."]";
+ $aTerms[] = "array_cat(nameaddress_vector,ARRAY[]::integer[]) @> ARRAY[".join($aSearch['aAddress'],",")."]";
} else {
$aTerms[] = "nameaddress_vector @> ARRAY[".join($aSearch['aAddress'], ",")."]";
- if (sizeof($aSearch['aAddressNonSearch'])) {
+ /*if (sizeof($aSearch['aAddressNonSearch'])) {
$aTerms[] = "array_cat(nameaddress_vector,ARRAY[]::integer[]) @> ARRAY[".join($aSearch['aAddressNonSearch'], ",")."]";
- }
+ }*/
}
}
if ($aSearch['sCountryCode']) $aTerms[] = "country_code = '".pg_escape_string($aSearch['sCountryCode'])."'";
$$
LANGUAGE plpgsql;
- CREATE OR REPLACE FUNCTION getorcreate_amenity(lookup_word TEXT, lookup_class text, lookup_type text)
+ CREATE OR REPLACE FUNCTION getorcreate_amenity(lookup_word TEXT, normalized_word TEXT, lookup_class text, lookup_type text)
RETURNS INTEGER
AS $$
DECLARE
return_word_id INTEGER;
BEGIN
lookup_token := ' '||trim(lookup_word);
- SELECT min(word_id) FROM word WHERE word_token = lookup_token and class=lookup_class and type = lookup_type into return_word_id;
+ SELECT min(word_id) FROM word WHERE word_token = lookup_token and word=normalized_word and class=lookup_class and type = lookup_type into return_word_id;
IF return_word_id IS NULL THEN
return_word_id := nextval('seq_word');
- INSERT INTO word VALUES (return_word_id, lookup_token, null, lookup_class, lookup_type, null, 0);
+ INSERT INTO word VALUES (return_word_id, lookup_token, normalized_word, lookup_class, lookup_type, null, 0);
END IF;
RETURN return_word_id;
END;
$$
LANGUAGE plpgsql;
- CREATE OR REPLACE FUNCTION getorcreate_amenityoperator(lookup_word TEXT, lookup_class text, lookup_type text, op text)
+ CREATE OR REPLACE FUNCTION getorcreate_amenityoperator(lookup_word TEXT, normalized_word TEXT, lookup_class text, lookup_type text, op text)
RETURNS INTEGER
AS $$
DECLARE
return_word_id INTEGER;
BEGIN
lookup_token := ' '||trim(lookup_word);
- SELECT min(word_id) FROM word WHERE word_token = lookup_token and class=lookup_class and type = lookup_type and operator = op into return_word_id;
+ SELECT min(word_id) FROM word WHERE word_token = lookup_token and word=normalized_word and class=lookup_class and type = lookup_type and operator = op into return_word_id;
IF return_word_id IS NULL THEN
return_word_id := nextval('seq_word');
- INSERT INTO word VALUES (return_word_id, lookup_token, null, lookup_class, lookup_type, null, 0, op);
+ INSERT INTO word VALUES (return_word_id, lookup_token, normalized_word, lookup_class, lookup_type, null, 0, op);
END IF;
RETURN return_word_id;
END;
IF relation_members IS NOT NULL THEN
FOR relMember IN select get_osm_rel_members(relation_members,ARRAY['label']) as member LOOP
- FOR linkedPlacex IN select * from placex where osm_type = upper(substring(relMember.member,1,1))::"char"
+ FOR linkedPlacex IN select * from placex where osm_type = upper(substring(relMember.member,1,1))::char(1)
and osm_id = substring(relMember.member,2,10000)::bigint order by rank_search desc limit 1 LOOP
-- If we don't already have one use this as the centre point of the geometry
FOR relMember IN select get_osm_rel_members(relation_members,ARRAY['admin_center','admin_centre']) as member LOOP
- FOR linkedPlacex IN select * from placex where osm_type = upper(substring(relMember.member,1,1))::"char"
+ FOR linkedPlacex IN select * from placex where osm_type = upper(substring(relMember.member,1,1))::char(1)
and osm_id = substring(relMember.member,2,10000)::bigint order by rank_search desc limit 1 LOOP
-- For an admin centre we also want a name match - still not perfect, for example 'new york, new york'
make_standard_name(name->'name') = make_standard_name(NEW.name->'name')
AND placex.rank_address = NEW.rank_address
AND placex.place_id != NEW.place_id
- AND placex.osm_type = 'N' AND placex.rank_search < 26
+ AND placex.osm_type = 'N'::char(1) AND placex.rank_search < 26
AND st_covers(NEW.geometry, placex.geometry)
LOOP
-- Still null? how about looking it up by the node id
IF NEW.importance IS NULL THEN
- select language||':'||title,importance from wikipedia_article where osm_type = 'N' and osm_id = linked_node_id order by importance desc limit 1 INTO NEW.wikipedia,NEW.importance;
+ select language||':'||title,importance from wikipedia_article where osm_type = 'N'::char(1) and osm_id = linked_node_id order by importance desc limit 1 INTO NEW.wikipedia,NEW.importance;
END IF;
END IF;
CASE WHEN class = 'place' and type = 'postcode' THEN hstore('name', postcode) ELSE name END as name,
CASE WHEN extratags ? 'place' THEN 'place' ELSE class END as class,
CASE WHEN extratags ? 'place' THEN extratags->'place' ELSE type END as type,
- admin_level, fromarea, isaddress,
+ admin_level, fromarea, isaddress and linked_place_id is NULL as isaddress,
CASE WHEN address_place_id = for_place_id AND rank_address = 0 THEN 100 WHEN rank_address = 11 THEN 5 ELSE rank_address END as rank_address,
distance,country_code,postcode
from place_addressline join placex on (address_place_id = placex.place_id)