Merge remote-tracking branch 'upstream/master'

author Sarah Hoffmann <lonvia@denofr.de>

Mon, 5 Jun 2017 18:27:32 +0000 (20:27 +0200)

committer Sarah Hoffmann <lonvia@denofr.de>

Mon, 5 Jun 2017 18:27:32 +0000 (20:27 +0200)
author Sarah Hoffmann <lonvia@denofr.de>
Mon, 5 Jun 2017 18:27:32 +0000 (20:27 +0200)
committer Sarah Hoffmann <lonvia@denofr.de>
Mon, 5 Jun 2017 18:27:32 +0000 (20:27 +0200)
diff --combined lib/Geocode.php

index ecd5be1c7c434a6ed2056b2994942838a5c56f40,17aaf826e2963e2f9405561bafd04604a2fad651..1c3c7a4e05542e52e0755b1bb629873911d8b4f1
--- 1/lib/Geocode.php
--- 2/lib/Geocode.php
+++ b/lib/Geocode.php
@@@ -25,7 -25,7 +25,7 @@@ class Geocod
   
       protected $aExcludePlaceIDs = array();
       protected $bDeDupe = true;
- -    protected $bReverseInPlan = false;
+ +    protected $bReverseInPlan = true;
   
       protected $iLimit = 20;
       protected $iFinalLimit = 10;
@@@ -653,7 -653,7 +653,7 @@@
           return $aSearchResults;
       }
   
-     public function getGroupedSearches($aSearches, $aPhraseTypes, $aPhrases, $aValidTokens, $aWordFrequencyScores, $bStructuredPhrases)
+     public function getGroupedSearches($aSearches, $aPhraseTypes, $aPhrases, $aValidTokens, $aWordFrequencyScores, $bStructuredPhrases, $sNormQuery)
       {
           /*
                Calculate all searches using aValidTokens i.e.
@@@ -752,13 -752,19 +752,19 @@@
                                            */
                                       }
                                   } elseif ($sPhraseType == '' && $aSearchTerm['class'] !== '' && $aSearchTerm['class'] !== null) {
-                                     if ($aSearch['sClass'] === '') {
-                                         $aSearch['sOperator'] = $aSearchTerm['operator'];
+                                     // require a normalized exact match of the term
+                                     // if we have the normalizer version of the query
+                                     // available
+                                     if ($aSearch['sClass'] === ''
+                                         && ($sNormQuery === null || !($aSearchTerm['word'] && strpos($sNormQuery, $aSearchTerm['word']) === false))) {
                                           $aSearch['sClass'] = $aSearchTerm['class'];
                                           $aSearch['sType'] = $aSearchTerm['type'];
-                                         if (sizeof($aSearch['aName'])) $aSearch['sOperator'] = 'name';
-                                         else $aSearch['sOperator'] = 'near'; // near = in for the moment
-                                         if (strlen($aSearchTerm['operator']) == 0) $aSearch['iSearchRank'] += 1;
+                                         if ($aSearchTerm['operator'] == '') {
+                                             $aSearch['sOperator'] = sizeof($aSearch['aName']) ? 'name' :  'near';
+                                             $aSearch['iSearchRank'] += 2;
+                                         } else {
+                                             $aSearch['sOperator'] = 'near'; // near = in for the moment
+                                         }
   
                                           if ($aSearch['iSearchRank'] < $this->iMaxRank) $aNewWordsetSearches[] = $aSearch;
                                       }
@@@ -913,6 -919,13 +919,13 @@@
       {
           if (!$this->sQuery && !$this->aStructuredQuery) return array();
   
+         $oNormalizer = \Transliterator::createFromRules(CONST_Term_Normalization_Rules);
+         if ($oNormalizer !== null) {
+             $sNormQuery = $oNormalizer->transliterate($this->sQuery);
+         } else {
+             $sNormQuery = null;
+         }
+ 
           $sLanguagePrefArraySQL = "ARRAY[".join(',', array_map("getDBQuoted", $this->aLangPrefOrder))."]";
           $sCountryCodesSQL = false;
           if ($this->aCountryCodes) {
@@@ -1139,7 -1152,7 +1152,7 @@@
                   // array with: placeid => -1 | tiger-housenumber
                   $aResultPlaceIDs = array();
   
-                 $aGroupedSearches = $this->getGroupedSearches($aSearches, $aPhraseTypes, $aPhrases, $aValidTokens, $aWordFrequencyScores, $bStructuredPhrases);
+                 $aGroupedSearches = $this->getGroupedSearches($aSearches, $aPhraseTypes, $aPhrases, $aValidTokens, $aWordFrequencyScores, $bStructuredPhrases, $sNormQuery);
   
                   if ($this->bReverseInPlan) {
                       // Reverse phrase array and also reverse the order of the wordsets in
@@@ -1151,7 -1164,7 +1164,7 @@@
                           $aFinalPhrase = end($aPhrases);
                           $aPhrases[sizeof($aPhrases)-1]['wordsets'] = getInverseWordSets($aFinalPhrase['words'], 0);
                       }
-                     $aReverseGroupedSearches = $this->getGroupedSearches($aSearches, null, $aPhrases, $aValidTokens, $aWordFrequencyScores, false);
+                     $aReverseGroupedSearches = $this->getGroupedSearches($aSearches, null, $aPhrases, $aValidTokens, $aWordFrequencyScores, false, $sNormQuery);
   
                       foreach ($aGroupedSearches as $aSearches) {
                           foreach ($aSearches as $aSearch) {
@@@ -1334,20 -1347,19 +1347,20 @@@
                           // TODO: filter out the pointless search terms (2 letter name tokens and less)
                           // they might be right - but they are just too darned expensive to run
                           if (sizeof($aSearch['aName'])) $aTerms[] = "name_vector @> ARRAY[".join($aSearch['aName'], ",")."]";
- -                        if (sizeof($aSearch['aNameNonSearch'])) $aTerms[] = "array_cat(name_vector,ARRAY[]::integer[]) @> ARRAY[".join($aSearch['aNameNonSearch'], ",")."]";
+ +                        //if (sizeof($aSearch['aNameNonSearch'])) $aTerms[] = "array_cat(name_vector,ARRAY[]::integer[]) @> ARRAY[".join($aSearch['aNameNonSearch'], ",")."]";
                           if (sizeof($aSearch['aAddress']) && $aSearch['aName'] != $aSearch['aAddress']) {
                               // For infrequent name terms disable index usage for address
                               if (CONST_Search_NameOnlySearchFrequencyThreshold
                                   && sizeof($aSearch['aName']) == 1
                                   && $aWordFrequencyScores[$aSearch['aName'][reset($aSearch['aName'])]] < CONST_Search_NameOnlySearchFrequencyThreshold
                               ) {
- -                                $aTerms[] = "array_cat(nameaddress_vector,ARRAY[]::integer[]) @> ARRAY[".join(array_merge($aSearch['aAddress'], $aSearch['aAddressNonSearch']), ",")."]";
+ +                                //$aTerms[] = "array_cat(nameaddress_vector,ARRAY[]::integer[]) @> ARRAY[".join(array_merge($aSearch['aAddress'], $aSearch['aAddressNonSearch']), ",")."]";
+ +                                $aTerms[] = "array_cat(nameaddress_vector,ARRAY[]::integer[]) @> ARRAY[".join($aSearch['aAddress'],",")."]";
                               } else {
                                   $aTerms[] = "nameaddress_vector @> ARRAY[".join($aSearch['aAddress'], ",")."]";
- -                                if (sizeof($aSearch['aAddressNonSearch'])) {
+ +                                /*if (sizeof($aSearch['aAddressNonSearch'])) {
                                       $aTerms[] = "array_cat(nameaddress_vector,ARRAY[]::integer[]) @> ARRAY[".join($aSearch['aAddressNonSearch'], ",")."]";
- -                                }
+ +                                }*/
                               }
                           }
                           if ($aSearch['sCountryCode']) $aTerms[] = "country_code = '".pg_escape_string($aSearch['sCountryCode'])."'";
diff --combined sql/functions.sql

index 8b1372d49ac00fd815cb500e870f6ce3a0d9158f,d863a9bf5e91639577633c320ecc609302f6346e..2606cdcabc2dcb45ffe189ed9fcff7f1b990fe28
--- 1/sql/functions.sql
--- 2/sql/functions.sql
+++ b/sql/functions.sql
@@@ -101,7 -101,7 +101,7 @@@ END
   $$
   LANGUAGE plpgsql;
   
- CREATE OR REPLACE FUNCTION getorcreate_amenity(lookup_word TEXT, lookup_class text, lookup_type text)
+ CREATE OR REPLACE FUNCTION getorcreate_amenity(lookup_word TEXT, normalized_word TEXT, lookup_class text, lookup_type text)
     RETURNS INTEGER
     AS $$
   DECLARE
@@@ -109,17 -109,17 +109,17 @@@
     return_word_id INTEGER;
   BEGIN
     lookup_token := ' '||trim(lookup_word);
-   SELECT min(word_id) FROM word WHERE word_token = lookup_token and class=lookup_class and type = lookup_type into return_word_id;
+   SELECT min(word_id) FROM word WHERE word_token = lookup_token and word=normalized_word and class=lookup_class and type = lookup_type into return_word_id;
     IF return_word_id IS NULL THEN
       return_word_id := nextval('seq_word');
-     INSERT INTO word VALUES (return_word_id, lookup_token, null, lookup_class, lookup_type, null, 0);
+     INSERT INTO word VALUES (return_word_id, lookup_token, normalized_word, lookup_class, lookup_type, null, 0);
     END IF;
     RETURN return_word_id;
   END;
   $$
   LANGUAGE plpgsql;
   
- CREATE OR REPLACE FUNCTION getorcreate_amenityoperator(lookup_word TEXT, lookup_class text, lookup_type text, op text)
+ CREATE OR REPLACE FUNCTION getorcreate_amenityoperator(lookup_word TEXT, normalized_word TEXT, lookup_class text, lookup_type text, op text)
     RETURNS INTEGER
     AS $$
   DECLARE
@@@ -127,10 -127,10 +127,10 @@@
     return_word_id INTEGER;
   BEGIN
     lookup_token := ' '||trim(lookup_word);
-   SELECT min(word_id) FROM word WHERE word_token = lookup_token and class=lookup_class and type = lookup_type and operator = op into return_word_id;
+   SELECT min(word_id) FROM word WHERE word_token = lookup_token and word=normalized_word and class=lookup_class and type = lookup_type and operator = op into return_word_id;
     IF return_word_id IS NULL THEN
       return_word_id := nextval('seq_word');
-     INSERT INTO word VALUES (return_word_id, lookup_token, null, lookup_class, lookup_type, null, 0, op);
+     INSERT INTO word VALUES (return_word_id, lookup_token, normalized_word, lookup_class, lookup_type, null, 0, op);
     END IF;
     RETURN return_word_id;
   END;
@@@ -1491,7 -1491,7 +1491,7 @@@ BEGI
       IF relation_members IS NOT NULL THEN
         FOR relMember IN select get_osm_rel_members(relation_members,ARRAY['label']) as member LOOP
   
-         FOR linkedPlacex IN select * from placex where osm_type = upper(substring(relMember.member,1,1))::"char"
+         FOR linkedPlacex IN select * from placex where osm_type = upper(substring(relMember.member,1,1))::char(1) 
             and osm_id = substring(relMember.member,2,10000)::bigint order by rank_search desc limit 1 LOOP
   
             -- If we don't already have one use this as the centre point of the geometry
@@@ -1522,7 -1522,7 +1522,7 @@@
   
           FOR relMember IN select get_osm_rel_members(relation_members,ARRAY['admin_center','admin_centre']) as member LOOP
   
-           FOR linkedPlacex IN select * from placex where osm_type = upper(substring(relMember.member,1,1))::"char"
+           FOR linkedPlacex IN select * from placex where osm_type = upper(substring(relMember.member,1,1))::char(1) 
               and osm_id = substring(relMember.member,2,10000)::bigint order by rank_search desc limit 1 LOOP
   
               -- For an admin centre we also want a name match - still not perfect, for example 'new york, new york'
@@@ -1571,7 -1571,7 +1571,7 @@@
           make_standard_name(name->'name') = make_standard_name(NEW.name->'name')
           AND placex.rank_address = NEW.rank_address
           AND placex.place_id != NEW.place_id
-         AND placex.osm_type = 'N' AND placex.rank_search < 26
+         AND placex.osm_type = 'N'::char(1) AND placex.rank_search < 26
           AND st_covers(NEW.geometry, placex.geometry)
         LOOP
   
@@@ -1621,7 -1621,7 +1621,7 @@@
   
       -- Still null? how about looking it up by the node id
       IF NEW.importance IS NULL THEN
-       select language||':'||title,importance from wikipedia_article where osm_type = 'N' and osm_id = linked_node_id order by importance desc limit 1 INTO NEW.wikipedia,NEW.importance;
+       select language||':'||title,importance from wikipedia_article where osm_type = 'N'::char(1) and osm_id = linked_node_id order by importance desc limit 1 INTO NEW.wikipedia,NEW.importance;
       END IF;
   
     END IF;
@@@ -2381,7 -2381,7 +2381,7 @@@ BEGI
         CASE WHEN class = 'place' and type = 'postcode' THEN hstore('name', postcode) ELSE name END as name,
         CASE WHEN extratags ? 'place' THEN 'place' ELSE class END as class,
         CASE WHEN extratags ? 'place' THEN extratags->'place' ELSE type END as type,
- -      admin_level, fromarea, isaddress,
+ +      admin_level, fromarea, isaddress and linked_place_id is NULL as isaddress,
         CASE WHEN address_place_id = for_place_id AND rank_address = 0 THEN 100 WHEN rank_address = 11 THEN 5 ELSE rank_address END as rank_address,
         distance,country_code,postcode
         from place_addressline join placex on (address_place_id = placex.place_id)
author	Sarah Hoffmann <lonvia@denofr.de>
	Mon, 5 Jun 2017 18:27:32 +0000 (20:27 +0200)
committer	Sarah Hoffmann <lonvia@denofr.de>
	Mon, 5 Jun 2017 18:27:32 +0000 (20:27 +0200)
		1	2
lib/Geocode.php	patch \|	diff1 \|	diff2 \|	blob \| history
sql/functions.sql	patch \|	diff1 \|	diff2 \|	blob \| history