]> git.openstreetmap.org Git - nominatim.git/blobdiff - lib-php/tokenizer/legacy_icu_tokenizer.php
introduce a separate token type for partials
[nominatim.git] / lib-php / tokenizer / legacy_icu_tokenizer.php
index 92dd727283019ea3454b20ee7232f0234f583b0c..8cff6f322410366d2e0ca2ceaf143d2b2035ce64 100644 (file)
@@ -195,17 +195,27 @@ class Tokenizer
                 ) {
                     $oToken = new Token\Country($iId, $aWord['country_code']);
                 }
+            } elseif ($aWord['word_token'][0] == ' ') {
+                 $oToken = new Token\Word(
+                     $iId,
+                     $aWord['word_token'][0] != ' ',
+                     (int) $aWord['count'],
+                     substr_count($aWord['word_token'], ' ')
+                 );
             } else {
-                $oToken = new Token\Word(
+                $oToken = new Token\Partial(
                     $iId,
-                    $aWord['word_token'][0] != ' ',
-                    (int) $aWord['count'],
-                    substr_count($aWord['word_token'], ' ')
+                    (int) $aWord['count']
                 );
             }
 
             if ($oToken) {
-                $oValidTokens->addToken($aWord['word_token'], $oToken);
+                // remove any leading spaces
+                if ($aWord['word_token'][0] == ' ') {
+                    $oValidTokens->addToken(substr($aWord['word_token'], 1), $oToken);
+                } else {
+                    $oValidTokens->addToken($aWord['word_token'], $oToken);
+                }
             }
         }
     }