]> git.openstreetmap.org Git - nominatim.git/commitdiff
remove Token from explicit input for SearchDescription extension
authorSarah Hoffmann <lonvia@denofr.de>
Thu, 15 Jul 2021 12:48:20 +0000 (14:48 +0200)
committerSarah Hoffmann <lonvia@denofr.de>
Sat, 17 Jul 2021 16:18:31 +0000 (18:18 +0200)
The token string is only required by the PartialToken type, so
it can simply save the token string internally. No need to pass
it to every type.

Also moves the check for multi-word partials to the token loader
code in the tokenizer. Multi-word partials can only happen with
the legacy tokenizer and when the database was loaded with an
older version of Nominatim. No need to keep the check for
everybody.

lib-php/Geocode.php
lib-php/SearchDescription.php
lib-php/TokenPartial.php
lib-php/tokenizer/legacy_icu_tokenizer.php
lib-php/tokenizer/legacy_tokenizer.php

index c2b4f4e4c0704e035f37518b784c251bc62a8c02..001c1e1e10e41e3fc1c7abd22852f1787afe9516 100644 (file)
@@ -363,7 +363,6 @@ class Geocode
                     foreach ($aWordsetSearches as $oCurrentSearch) {
                         foreach ($oValidTokens->get($sToken) as $oSearchTerm) {
                             $aNewSearches = $oCurrentSearch->extendWithSearchTerm(
-                                $sToken,
                                 $oSearchTerm,
                                 $oPosition
                             );
index 8924287aa06483cff4b70cbd7c7fc17420bacb97..b4a78eb8e70a9e7274460340e4c76e23db53f91c 100644 (file)
@@ -152,14 +152,13 @@ class SearchDescription
     /**
      * Derive new searches by adding a full term to the existing search.
      *
-     * @param string  $sToken       Term for the token.
      * @param object  $oSearchTerm  Description of the token.
      * @param object  $oPosition    Description of the token position within
                                     the query.
      *
      * @return SearchDescription[] List of derived search descriptions.
      */
-    public function extendWithSearchTerm($sToken, $oSearchTerm, $oPosition)
+    public function extendWithSearchTerm($oSearchTerm, $oPosition)
     {
         $aNewSearches = array();
 
@@ -315,10 +314,8 @@ class SearchDescription
             }
         } elseif (!$oPosition->isPhrase('country')
                   && is_a($oSearchTerm, '\Nominatim\Token\Partial')
-                  && strpos($sToken, ' ') === false
         ) {
             $aNewSearches = $this->extendWithPartialTerm(
-                $sToken,
                 $oSearchTerm,
                 $oPosition
             );
@@ -330,14 +327,13 @@ class SearchDescription
     /**
      * Derive new searches by adding a partial term to the existing search.
      *
-     * @param string  $sToken       Term for the token.
      * @param object  $oSearchTerm  Description of the token.
      * @param object  $oPosition    Description of the token position within
                                     the query.
      *
      * @return SearchDescription[] List of derived search descriptions.
      */
-    private function extendWithPartialTerm($sToken, $oSearchTerm, $oPosition)
+    private function extendWithPartialTerm($oSearchTerm, $oPosition)
     {
         $aNewSearches = array();
         $iWordID = $oSearchTerm->iId;
@@ -347,7 +343,7 @@ class SearchDescription
         ) {
             $oSearch = clone $this;
             $oSearch->iSearchRank++;
-            if (preg_match('#^[0-9 ]+$#', $sToken)) {
+            if (preg_match('#^[0-9 ]+$#', $oSearchTerm->sToken)) {
                 $oSearch->iSearchRank++;
             }
             if ($oSearchTerm->iSearchNameCount < CONST_Max_Word_Frequency) {
@@ -367,7 +363,7 @@ class SearchDescription
             if (empty($this->aName) && empty($this->aNameNonSearch)) {
                 $oSearch->iSearchRank++;
             }
-            if (preg_match('#^[0-9 ]+$#', $sToken)) {
+            if (preg_match('#^[0-9 ]+$#', $oSearchTerm->sToken)) {
                 $oSearch->iSearchRank++;
             }
             if ($oSearchTerm->iSearchNameCount < CONST_Max_Word_Frequency) {
index 477ef9c5a1547c149366844353b0f197edf1316b..99a759474edde918b757531ea880f3d783ac77b5 100644 (file)
@@ -11,10 +11,13 @@ class Partial
     public $iId;
     /// Number of appearances in the database.
     public $iSearchNameCount;
+    /// Normalised version of the partial word.
+    public $sToken;
 
-    public function __construct($iId, $iSearchNameCount)
+    public function __construct($iId, $sToken, $iSearchNameCount)
     {
         $this->iId = $iId;
+        $this->sToken = $sToken;
         $this->iSearchNameCount = $iSearchNameCount;
     }
 
index 96a1d8a659fda9ca45d0c9753b2439e66ec00ab4..2c0884c8170b46df51f64d90e67def88ac2d3b55 100644 (file)
@@ -205,6 +205,7 @@ class Tokenizer
             } else {
                 $oToken = new Token\Partial(
                     $iId,
+                    $aWord['word_token'],
                     (int) $aWord['count']
                 );
             }
index 238fbcf45e48a2120d4b0f2b566032999bc963be..064b41667a9322bb6cb164dd6f7bb041490d1257 100644 (file)
@@ -218,9 +218,12 @@ class Tokenizer
                     (int) $aWord['count'],
                     substr_count($aWord['word_token'], ' ')
                 );
-            } else {
+            // For backward compatibility: ignore all partial tokens with more
+            // than one word.
+            } elseif (strpos($aWord['word_token'], ' ') === false) {
                 $oToken = new Token\Partial(
                     $iId,
+                    $aWord['word_token'],
                     (int) $aWord['count']
                 );
             }