]> git.openstreetmap.org Git - nominatim.git/commitdiff
replace word frequency hash
authorSarah Hoffmann <lonvia@denofr.de>
Sun, 6 May 2018 20:10:38 +0000 (22:10 +0200)
committerSarah Hoffmann <lonvia@denofr.de>
Sun, 6 May 2018 20:35:31 +0000 (22:35 +0200)
The word frequency hash was only used to determine if the
name of a SearchDescription is rare. Do this already when
building the SearchDescription (when the word frequency
is still available) and get gid of the extra hash.

lib/Geocode.php
lib/SearchDescription.php

index 612c1a0f19cc234cc0c64621ed3828220cd0b410..68a9a7cb1aced88a11662f4dee533e43661b054e 100644 (file)
@@ -658,7 +658,6 @@ class Geocode
                     $this->oDB->getAll($sSQL),
                     'Could not get word tokens.'
                 );
-                $aWordFrequencyScores = array();
                 foreach ($aDatabaseWords as $aToken) {
                     // Filter country tokens that do not match restricted countries.
                     if ($this->aCountryCodes
@@ -681,7 +680,6 @@ class Geocode
                     } else {
                         $aValidTokens[$aToken['word_token']] = array($aToken);
                     }
-                    $aWordFrequencyScores[$aToken['word_id']] = $aToken['search_name_count'] + 1;
                 }
 
                 // US ZIP+4 codes - if there is no token, merge in the 5-digit ZIP code
@@ -781,7 +779,6 @@ class Geocode
 
                     $aResults += $oSearch->query(
                         $this->oDB,
-                        $aWordFrequencyScores,
                         $this->iMinAddressRank,
                         $this->iMaxAddressRank,
                         $this->iLimit
index 6345f50fc5aa570c06ccb1099e1544ad11bb88e2..07eccec488175c04c1b96a2d948c0857275d25e9 100644 (file)
@@ -17,6 +17,8 @@ class SearchDescription
     private $sCountryCode = '';
     /// List of word ids making up the name of the object.
     private $aName = array();
+    /// True if the name is rare enough to force index use on name.
+    private $bRareName = false;
     /// List of word ids making up the address of the object.
     private $aAddress = array();
     /// Subset of word ids of full words making up the address.
@@ -292,6 +294,11 @@ class SearchDescription
                 $oSearch = clone $this;
                 $oSearch->iSearchRank++;
                 $oSearch->aName = array($iWordID => $iWordID);
+                if (CONST_Search_NameOnlySearchFrequencyThreshold) {
+                    $oSearch->bRareName =
+                        $aSearchTerm['search_name_count'] + 1
+                          < CONST_Search_NameOnlySearchFrequencyThreshold;
+                }
                 $aNewSearches[] = $oSearch;
             }
         }
@@ -368,6 +375,13 @@ class SearchDescription
                 $oSearch->iSearchRank += 2;
             }
             if ($aSearchTerm['search_name_count'] + 1 < CONST_Max_Word_Frequency) {
+                if (empty($this->aName) && CONST_Search_NameOnlySearchFrequencyThreshold) {
+                    $oSearch->bRareName =
+                        $aSearchTerm['search_name_count'] + 1
+                          < CONST_Search_NameOnlySearchFrequencyThreshold;
+                } else {
+                    $oSearch->bRareName = false;
+                }
                 $oSearch->aName[$iWordID] = $iWordID;
             } else {
                 $oSearch->aNameNonSearch[$iWordID] = $iWordID;
@@ -385,20 +399,16 @@ class SearchDescription
     /**
      * Query database for places that match this search.
      *
-     * @param object  $oDB                  Database connection to use.
-     * @param mixed[] $aWordFrequencyScores Number of times tokens appears
-     *                                      overall in a planet database.
-     * @param integer $iMinRank             Minimum address rank to restrict
-     *                                      search to.
-     * @param integer $iMaxRank             Maximum address rank to restrict
-     *                                      search to.
-     * @param integer $iLimit               Maximum number of results.
+     * @param object  $oDB      Database connection to use.
+     * @param integer $iMinRank Minimum address rank to restrict search to.
+     * @param integer $iMaxRank Maximum address rank to restrict search to.
+     * @param integer $iLimit   Maximum number of results.
      *
      * @return mixed[] An array with two fields: IDs contains the list of
      *                 matching place IDs and houseNumber the houseNumber
      *                 if appicable or -1 if not.
      */
-    public function query(&$oDB, &$aWordFrequencyScores, $iMinRank, $iMaxRank, $iLimit)
+    public function query(&$oDB, $iMinRank, $iMaxRank, $iLimit)
     {
         $aResults = array();
         $iHousenumber = -1;
@@ -427,7 +437,6 @@ class SearchDescription
             // First search for places according to name and address.
             $aResults = $this->queryNamedPlace(
                 $oDB,
-                $aWordFrequencyScores,
                 $iMinRank,
                 $iMaxRank,
                 $iLimit
@@ -579,7 +588,7 @@ class SearchDescription
         return $aResults;
     }
 
-    private function queryNamedPlace(&$oDB, $aWordFrequencyScores, $iMinAddressRank, $iMaxAddressRank, $iLimit)
+    private function queryNamedPlace(&$oDB, $iMinAddressRank, $iMaxAddressRank, $iLimit)
     {
         $aTerms = array();
         $aOrder = array();
@@ -615,11 +624,7 @@ class SearchDescription
         }
         if (!empty($this->aAddress)) {
             // For infrequent name terms disable index usage for address
-            if (CONST_Search_NameOnlySearchFrequencyThreshold
-                && count($this->aName) == 1
-                && $aWordFrequencyScores[$this->aName[reset($this->aName)]]
-                     < CONST_Search_NameOnlySearchFrequencyThreshold
-            ) {
+            if ($this->bRareName) {
                 $aTerms[] = 'array_cat(nameaddress_vector,ARRAY[]::integer[]) @> '.getArraySQL($this->aAddress);
             } else {
                 $aTerms[] = 'nameaddress_vector @> '.getArraySQL($this->aAddress);