]> git.openstreetmap.org Git - nominatim.git/commitdiff
move SearchDescription building into tokens
authorSarah Hoffmann <lonvia@denofr.de>
Sat, 17 Jul 2021 18:24:33 +0000 (20:24 +0200)
committerSarah Hoffmann <lonvia@denofr.de>
Sat, 17 Jul 2021 18:24:33 +0000 (20:24 +0200)
Moving the logic for extending the SearchDescription into the
token classes splits up the code and makes it more readable.
More importantly: it allows tokenizer to define custom token
classes in the future.

lib-php/Geocode.php
lib-php/SearchDescription.php
lib-php/TokenCountry.php
lib-php/TokenHousenumber.php
lib-php/TokenList.php
lib-php/TokenPartial.php
lib-php/TokenPostcode.php
lib-php/TokenSpecialTerm.php
lib-php/TokenWord.php

index 001c1e1e10e41e3fc1c7abd22852f1787afe9516..734f40693a188036e35e3e99a8ee9b2788501ce9 100644 (file)
@@ -362,8 +362,8 @@ class Geocode
 
                     foreach ($aWordsetSearches as $oCurrentSearch) {
                         foreach ($oValidTokens->get($sToken) as $oSearchTerm) {
-                            $aNewSearches = $oCurrentSearch->extendWithSearchTerm(
-                                $oSearchTerm,
+                            $aNewSearches = $oSearchTerm->extendSearch(
+                                $oCurrentSearch,
                                 $oPosition
                             );
 
index b4a78eb8e70a9e7274460340e4c76e23db53f91c..4886462aa6bcdc6c9ac1e9725d584699fcf0b9a5 100644 (file)
@@ -67,35 +67,6 @@ class SearchDescription
         return $this->iSearchRank;
     }
 
-    /**
-     * Make this search a POI search.
-     *
-     * In a POI search, objects are not (only) searched by their name
-     * but also by the primary OSM key/value pair (class and type in Nominatim).
-     *
-     * @param integer $iOperator Type of POI search
-     * @param string  $sClass    Class (or OSM tag key) of POI.
-     * @param string  $sType     Type (or OSM tag value) of POI.
-     *
-     * @return void
-     */
-    public function setPoiSearch($iOperator, $sClass, $sType)
-    {
-        $this->iOperator = $iOperator;
-        $this->sClass = $sClass;
-        $this->sType = $sType;
-    }
-
-    /**
-     * Check if any operator is set.
-     *
-     * @return bool True, if this is a special search operation.
-     */
-    public function hasOperator()
-    {
-        return $this->iOperator != Operator::NONE;
-    }
-
     /**
      * Extract key/value pairs from a query.
      *
@@ -147,244 +118,137 @@ class SearchDescription
     }
 
     /////////// Search building functions
+    public function clone($iTermCost)
+    {
+        $oSearch = clone $this;
+        $oSearch->iSearchRank += $iTermCost;
 
+        return $oSearch;
+    }
 
-    /**
-     * Derive new searches by adding a full term to the existing search.
-     *
-     * @param object  $oSearchTerm  Description of the token.
-     * @param object  $oPosition    Description of the token position within
-                                    the query.
-     *
-     * @return SearchDescription[] List of derived search descriptions.
-     */
-    public function extendWithSearchTerm($oSearchTerm, $oPosition)
+    public function hasName($bIncludeNonNames = false)
     {
-        $aNewSearches = array();
+        return !empty($this->aName)
+               || (!empty($this->aNameNonSearch) && $bIncludeNonNames);
+    }
 
-        if ($oPosition->maybePhrase('country')
-            && is_a($oSearchTerm, '\Nominatim\Token\Country')
-        ) {
-            if (!$this->sCountryCode) {
-                $oSearch = clone $this;
-                $oSearch->iSearchRank++;
-                $oSearch->sCountryCode = $oSearchTerm->sCountryCode;
-                // Country is almost always at the end of the string
-                // - increase score for finding it anywhere else (optimisation)
-                if (!$oPosition->isLastToken()) {
-                    $oSearch->iSearchRank += 5;
-                    $oSearch->iNamePhrase = -1;
-                }
-                $aNewSearches[] = $oSearch;
-            }
-        } elseif ($oPosition->maybePhrase('postalcode')
-                  && is_a($oSearchTerm, '\Nominatim\Token\Postcode')
-        ) {
-            if (!$this->sPostcode) {
-                // If we have structured search or this is the first term,
-                // make the postcode the primary search element.
-                if ($this->iOperator == Operator::NONE && $oPosition->isFirstToken()) {
-                    $oSearch = clone $this;
-                    $oSearch->iSearchRank++;
-                    $oSearch->iOperator = Operator::POSTCODE;
-                    $oSearch->aAddress = array_merge($this->aAddress, $this->aName);
-                    $oSearch->aName =
-                        array($oSearchTerm->iId => $oSearchTerm->sPostcode);
-                    $aNewSearches[] = $oSearch;
-                }
+    public function hasAddress()
+    {
+        return !empty($this->aAddress) || !empty($this->aAddressNonSearch);
+    }
 
-                // If we have a structured search or this is not the first term,
-                // add the postcode as an addendum.
-                if ($this->iOperator != Operator::POSTCODE
-                    && ($oPosition->isPhrase('postalcode') || !empty($this->aName))
-                ) {
-                    $oSearch = clone $this;
-                    $oSearch->iSearchRank++;
-                    $oSearch->iNamePhrase = -1;
-                    if (strlen($oSearchTerm->sPostcode) < 4) {
-                        $oSearch->iSearchRank += 4 - strlen($oSearchTerm->sPostcode);
-                    }
-                    $oSearch->sPostcode = $oSearchTerm->sPostcode;
-                    $aNewSearches[] = $oSearch;
-                }
-            }
-        } elseif ($oPosition->maybePhrase('street')
-                 && is_a($oSearchTerm, '\Nominatim\Token\HouseNumber')
-        ) {
-            if (!$this->sHouseNumber && $this->iOperator != Operator::POSTCODE) {
-                // sanity check: if the housenumber is not mainly made
-                // up of numbers, add a penalty
-                $iSearchCost = 1;
-                if (preg_match('/\\d/', $oSearchTerm->sToken) === 0
-                    || preg_match_all('/[^0-9]/', $oSearchTerm->sToken, $aMatches) > 2) {
-                    $iSearchCost++;
-                }
-                if ($this->iOperator != Operator::NONE) {
-                    $iSearchCost++;
-                }
-                if (empty($oSearchTerm->iId)) {
-                    $iSearchCost++;
-                }
-                // also must not appear in the middle of the address
-                if (!empty($this->aAddress)
-                    || (!empty($this->aAddressNonSearch))
-                    || $this->sPostcode
-                ) {
-                    $iSearchCost++;
-                }
+    public function hasCountry()
+    {
+        return $this->sCountryCode !== '';
+    }
 
-                $oSearch = clone $this;
-                $oSearch->iSearchRank += $iSearchCost;
-                $oSearch->iNamePhrase = -1;
-                $oSearch->sHouseNumber = $oSearchTerm->sToken;
-                $aNewSearches[] = $oSearch;
-
-                // Housenumbers may appear in the name when the place has its own
-                // address terms.
-                if ($oSearchTerm->iId !== null
-                    && ($this->iNamePhrase >= 0 || empty($this->aName))
-                    && empty($this->aAddress)
-                   ) {
-                    $oSearch = clone $this;
-                    $oSearch->iSearchRank += $iSearchCost;
-                    $oSearch->aAddress = $this->aName;
-                    $oSearch->bRareName = false;
-                    $oSearch->aName = array($oSearchTerm->iId => $oSearchTerm->iId);
-                    $aNewSearches[] = $oSearch;
-                }
-            }
-        } elseif ($oPosition->isPhrase('')
-                  && is_a($oSearchTerm, '\Nominatim\Token\SpecialTerm')
-        ) {
-            if ($this->iOperator == Operator::NONE) {
-                $oSearch = clone $this;
-                $oSearch->iSearchRank += 2;
-                $oSearch->iNamePhrase = -1;
-
-                $iOp = $oSearchTerm->iOperator;
-                if ($iOp == Operator::NONE) {
-                    if (!empty($this->aName) || $this->oContext->isBoundedSearch()) {
-                        $iOp = Operator::NAME;
-                    } else {
-                        $iOp = Operator::NEAR;
-                    }
-                    $oSearch->iSearchRank += 2;
-                } elseif (!$oPosition->isFirstToken() && !$oPosition->isLastToken()) {
-                    $oSearch->iSearchRank += 2;
-                }
-                if ($this->sHouseNumber) {
-                    $oSearch->iSearchRank++;
-                }
+    public function hasPostcode()
+    {
+        return $this->sPostcode !== '';
+    }
 
-                $oSearch->setPoiSearch(
-                    $iOp,
-                    $oSearchTerm->sClass,
-                    $oSearchTerm->sType
-                );
-                $aNewSearches[] = $oSearch;
-            }
-        } elseif (!$oPosition->isPhrase('country')
-                  && is_a($oSearchTerm, '\Nominatim\Token\Word')
-        ) {
-            $iWordID = $oSearchTerm->iId;
-            // Full words can only be a name if they appear at the beginning
-            // of the phrase. In structured search the name must forcably in
-            // the first phrase. In unstructured search it may be in a later
-            // phrase when the first phrase is a house number.
-            if (!empty($this->aName) || !($oPosition->isFirstPhrase() || $oPosition->isPhrase(''))) {
-                if (($oPosition->isPhrase('') || !$oPosition->isFirstPhrase())
-                    && $oSearchTerm->iTermCount > 1
-                ) {
-                    $oSearch = clone $this;
-                    $oSearch->iNamePhrase = -1;
-                    $oSearch->iSearchRank += 1;
-                    $oSearch->aAddress[$iWordID] = $iWordID;
-                    $aNewSearches[] = $oSearch;
-                }
-            } elseif (empty($this->aNameNonSearch)) {
-                $oSearch = clone $this;
-                $oSearch->iSearchRank++;
-                $oSearch->aName = array($iWordID => $iWordID);
-                if (CONST_Search_NameOnlySearchFrequencyThreshold) {
-                    $oSearch->bRareName =
-                        $oSearchTerm->iSearchNameCount
-                          < CONST_Search_NameOnlySearchFrequencyThreshold;
-                }
-                $aNewSearches[] = $oSearch;
-            }
-        } elseif (!$oPosition->isPhrase('country')
-                  && is_a($oSearchTerm, '\Nominatim\Token\Partial')
-        ) {
-            $aNewSearches = $this->extendWithPartialTerm(
-                $oSearchTerm,
-                $oPosition
-            );
+    public function hasHousenumber()
+    {
+        return $this->sHouseNumber !== '';
+    }
+
+    public function hasOperator($iOperator = null)
+    {
+        return $iOperator === null ? $this->iOperator != Operator::NONE : $this->iOperator == $iOperator;
+    }
+
+    public function addAddressToken($iId, $bSearchable = true)
+    {
+        if ($bSearchable) {
+            $this->aAddress[$iId] = $iId;
+        } else {
+            $this->aAddressNonSearch[$iId] = $iId;
+        }
+    }
+
+    public function addNameToken($iId)
+    {
+        $this->aName[$iId] = $iId;
+    }
+
+    public function addPartialNameToken($iId, $bSearchable, $iPhraseNumber)
+    {
+        if ($bSearchable) {
+            $this->aName[$iId] = $iId;
+        } else {
+            $this->aNameNonSearch[$iId] = $iId;
         }
+        $this->iNamePhrase = $iPhraseNumber;
+    }
+
+    public function markRareName()
+    {
+        $this->bRareName = true;
+    }
+
+    public function setCountry($sCountryCode)
+    {
+        $this->sCountryCode = $sCountryCode;
+        $this->iNamePhrase = -1;
+    }
+
+    public function setPostcode($sPostcode)
+    {
+        $this->sPostcode = $sPostcode;
+        $this->iNamePhrase = -1;
+    }
+
+    public function setPostcodeAsName($iId, $sPostcode)
+    {
+        $this->iOperator = Operator::POSTCODE;
+        $this->aAddress = array_merge($this->aAddress, $this->aName);
+        $this->aName = array($iId => $sPostcode);
+        $this->bRareName = true;
+        $this->iNamePhrase = -1;
+    }
+
+    public function setHousenumber($sNumber)
+    {
+        $this->sHouseNumber = $sNumber;
+        $this->iNamePhrase = -1;
+    }
 
-        return $aNewSearches;
+    public function setHousenumberAsName($iId)
+    {
+        $this->aAddress = array_merge($this->aAddress, $this->aName);
+        $this->bRareName = false;
+        $this->aName = array($iId => $iId);
+        $this->iNamePhrase = -1;
     }
 
     /**
-     * Derive new searches by adding a partial term to the existing search.
+     * Make this search a POI search.
+     *
+     * In a POI search, objects are not (only) searched by their name
+     * but also by the primary OSM key/value pair (class and type in Nominatim).
      *
-     * @param object  $oSearchTerm  Description of the token.
-     * @param object  $oPosition    Description of the token position within
-                                    the query.
+     * @param integer $iOperator Type of POI search
+     * @param string  $sClass    Class (or OSM tag key) of POI.
+     * @param string  $sType     Type (or OSM tag value) of POI.
      *
-     * @return SearchDescription[] List of derived search descriptions.
+     * @return void
      */
-    private function extendWithPartialTerm($oSearchTerm, $oPosition)
+    public function setPoiSearch($iOperator, $sClass, $sType)
     {
-        $aNewSearches = array();
-        $iWordID = $oSearchTerm->iId;
-
-        if (($oPosition->isPhrase('') || !$oPosition->isFirstPhrase())
-            && (!empty($this->aName))
-        ) {
-            $oSearch = clone $this;
-            $oSearch->iSearchRank++;
-            if (preg_match('#^[0-9 ]+$#', $oSearchTerm->sToken)) {
-                $oSearch->iSearchRank++;
-            }
-            if ($oSearchTerm->iSearchNameCount < CONST_Max_Word_Frequency) {
-                $oSearch->aAddress[$iWordID] = $iWordID;
-            } else {
-                $oSearch->aAddressNonSearch[$iWordID] = $iWordID;
-            }
-            $aNewSearches[] = $oSearch;
-        }
+        $this->iOperator = $iOperator;
+        $this->sClass = $sClass;
+        $this->sType = $sType;
+        $this->iNamePhrase = -1;
+    }
 
-        if ((!$this->sPostcode && !$this->aAddress && !$this->aAddressNonSearch)
-            && ((empty($this->aName) && empty($this->aNameNonSearch))
-                || $this->iNamePhrase == $oPosition->getPhrase())
-        ) {
-            $oSearch = clone $this;
-            $oSearch->iSearchRank++;
-            if (empty($this->aName) && empty($this->aNameNonSearch)) {
-                $oSearch->iSearchRank++;
-            }
-            if (preg_match('#^[0-9 ]+$#', $oSearchTerm->sToken)) {
-                $oSearch->iSearchRank++;
-            }
-            if ($oSearchTerm->iSearchNameCount < CONST_Max_Word_Frequency) {
-                if (empty($this->aName)
-                    && CONST_Search_NameOnlySearchFrequencyThreshold
-                ) {
-                    $oSearch->bRareName =
-                        $oSearchTerm->iSearchNameCount
-                          < CONST_Search_NameOnlySearchFrequencyThreshold;
-                } else {
-                    $oSearch->bRareName = false;
-                }
-                $oSearch->aName[$iWordID] = $iWordID;
-            } else {
-                $oSearch->aNameNonSearch[$iWordID] = $iWordID;
-            }
-            $oSearch->iNamePhrase = $oPosition->getPhrase();
-            $aNewSearches[] = $oSearch;
-        }
+    public function getNamePhrase()
+    {
+        return $this->iNamePhrase;
+    }
 
-        return $aNewSearches;
+    public function getContext()
+    {
+        return $this->oContext;
     }
 
     /////////// Query functions
index 518c0a31e3df225c37a19073946dd6c0dfd11035..917ed9d25867d55834003aaf3e53720155ec0ed7 100644 (file)
@@ -8,9 +8,9 @@ namespace Nominatim\Token;
 class Country
 {
     /// Database word id, if available.
-    public $iId;
+    private $iId;
     /// Two-letter country code (lower-cased).
-    public $sCountryCode;
+    private $sCountryCode;
 
     public function __construct($iId, $sCountryCode)
     {
@@ -18,6 +18,32 @@ class Country
         $this->sCountryCode = $sCountryCode;
     }
 
+    public function getId()
+    {
+        return $this->iId;
+    }
+
+    /**
+     * Derive new searches by adding this token to an existing search.
+     *
+     * @param object  $oSearch      Partial search description derived so far.
+     * @param object  $oPosition    Description of the token position within
+                                    the query.
+     *
+     * @return SearchDescription[] List of derived search descriptions.
+     */
+    public function extendSearch($oSearch, $oPosition)
+    {
+        if ($oSearch->hasCountry() || !$oPosition->maybePhrase('country')) {
+            return array();
+        }
+
+        $oNewSearch = $oSearch->clone($oPosition->isLastToken() ? 1 : 6);
+        $oNewSearch->setCountry($this->sCountryCode);
+
+        return array($oNewSearch);
+    }
+
     public function debugInfo()
     {
         return array(
@@ -26,4 +52,9 @@ class Country
                 'Info' => $this->sCountryCode
                );
     }
+
+    public function debugCode()
+    {
+        return 'C';
+    }
 }
index 5c7c6e9b633a4af458acbc637249ffad2453f7ac..0cc67a1212d17fc8bbd5a6251f5b86827fc05efe 100644 (file)
@@ -8,9 +8,9 @@ namespace Nominatim\Token;
 class HouseNumber
 {
     /// Database word id, if available.
-    public $iId;
+    private $iId;
     /// Normalized house number.
-    public $sToken;
+    private $sToken;
 
     public function __construct($iId, $sToken)
     {
@@ -18,6 +18,69 @@ class HouseNumber
         $this->sToken = $sToken;
     }
 
+    public function getId()
+    {
+        return $this->iId;
+    }
+
+    /**
+     * Derive new searches by adding this token to an existing search.
+     *
+     * @param object  $oSearch      Partial search description derived so far.
+     * @param object  $oPosition    Description of the token position within
+                                    the query.
+     *
+     * @return SearchDescription[] List of derived search descriptions.
+     */
+    public function extendSearch($oSearch, $oPosition)
+    {
+        $aNewSearches = array();
+
+        if ($oSearch->hasHousenumber()
+            || $oSearch->hasOperator(\Nominatim\Operator::POSTCODE)
+            || !$oPosition->maybePhrase('street')
+        ) {
+            return $aNewSearches;
+        }
+
+        // sanity check: if the housenumber is not mainly made
+        // up of numbers, add a penalty
+        $iSearchCost = 1;
+        if (preg_match('/\\d/', $this->sToken) === 0
+            || preg_match_all('/[^0-9]/', $this->sToken, $aMatches) > 2) {
+            $iSearchCost++;
+        }
+        if (!$oSearch->hasOperator(\Nominatim\Operator::NONE)) {
+            $iSearchCost++;
+        }
+        if (empty($this->iId)) {
+            $iSearchCost++;
+        }
+        // also must not appear in the middle of the address
+        if ($oSearch->hasAddress() || $oSearch->hasPostcode()) {
+            $iSearchCost++;
+        }
+
+        $oNewSearch = $oSearch->clone($iSearchCost);
+        $oNewSearch->setHousenumber($this->sToken);
+        $aNewSearches[] = $oNewSearch;
+
+        // Housenumbers may appear in the name when the place has its own
+        // address terms.
+        if ($this->iId !== null
+            && ($oSearch->getNamePhrase() >= 0 || !$oSearch->hasName())
+            && !$oSearch->hasAddress()
+        ) {
+            $oNewSearch = $oSearch->clone($iSearchCost);
+            $oNewSearch->setHousenumberAsName($this->iId);
+
+            $aNewSearches[] = $oNewSearch;
+        }
+
+        return $aNewSearches;
+    }
+
+
     public function debugInfo()
     {
         return array(
@@ -26,4 +89,9 @@ class HouseNumber
                 'Info' => array('nr' => $this->sToken)
                );
     }
+
+    public function debugCode()
+    {
+        return 'H';
+    }
 }
index bc8f9c3f1b06d9f24038342d7c8215f1b2dc3dea..a599648c21acdb48191c684f4f94c41950e2ae8c 100644 (file)
@@ -79,7 +79,7 @@ class TokenList
         foreach ($this->aTokens as $aTokenList) {
             foreach ($aTokenList as $oToken) {
                 if (is_a($oToken, '\Nominatim\Token\Word')) {
-                    $ids[$oToken->iId] = $oToken->iId;
+                    $ids[$oToken->getId()] = $oToken->getId();
                 }
             }
         }
@@ -109,9 +109,9 @@ class TokenList
         $aWordsIDs = array();
         foreach ($this->aTokens as $sToken => $aWords) {
             foreach ($aWords as $aToken) {
-                if ($aToken->iId !== null) {
-                    $aWordsIDs[$aToken->iId] =
-                        '#'.$sToken.'('.$aToken->iId.')#';
+                $iId = $aToken->getId();
+                if ($iId !== null) {
+                    $aWordsIDs[$iId] = '#'.$sToken.'('.$aToken->debugCode().' '.$iId.')#';
                 }
             }
         }
index 99a759474edde918b757531ea880f3d783ac77b5..e52161cc0168953f30fed95296cef5c196c64bcb 100644 (file)
@@ -8,19 +8,86 @@ namespace Nominatim\Token;
 class Partial
 {
     /// Database word id, if applicable.
-    public $iId;
+    private $iId;
     /// Number of appearances in the database.
-    public $iSearchNameCount;
-    /// Normalised version of the partial word.
-    public $sToken;
+    private $iSearchNameCount;
+    /// True, if the token consists exclusively of digits and spaces.
+    private $bNumberToken;
 
     public function __construct($iId, $sToken, $iSearchNameCount)
     {
         $this->iId = $iId;
-        $this->sToken = $sToken;
+        $this->bNumberToken = (bool) preg_match('#^[0-9 ]+$#', $sToken);
         $this->iSearchNameCount = $iSearchNameCount;
     }
 
+    public function getId()
+    {
+        return $this->iId;
+    }
+
+    /**
+     * Derive new searches by adding this token to an existing search.
+     *
+     * @param object  $oSearch      Partial search description derived so far.
+     * @param object  $oPosition    Description of the token position within
+                                    the query.
+     *
+     * @return SearchDescription[] List of derived search descriptions.
+     */
+    public function extendSearch($oSearch, $oPosition)
+    {
+        if ($oPosition->isPhrase('country')) {
+            return array();
+        }
+
+        $aNewSearches = array();
+
+        // Partial token in Address.
+        if (($oPosition->isPhrase('') || !$oPosition->isFirstPhrase())
+            && $oSearch->hasName()
+        ) {
+            $iSearchCost = $this->bNumberToken ? 2 : 1;
+            if ($this->iSearchNameCount >= CONST_Max_Word_Frequency) {
+                $iSearchCost += 1;
+            }
+
+            $oNewSearch = $oSearch->clone($iSearchCost);
+            $oNewSearch->addAddressToken(
+                $this->iId,
+                $this->iSearchNameCount < CONST_Max_Word_Frequency
+            );
+
+            $aNewSearches[] = $oNewSearch;
+        }
+
+        // Partial token in Name.
+        if ((!$oSearch->hasPostcode() && !$oSearch->hasAddress())
+            && (!$oSearch->hasName(true)
+                || $oSearch->getNamePhrase() == $oPosition->getPhrase())
+        ) {
+            $iSearchCost = 1;
+            if (!$oSearch->hasName(true)) {
+                $iSearchCost += 1;
+            }
+            if ($this->bNumberToken) {
+                $iSearchCost += 1;
+            }
+
+            $oNewSearch = $oSearch->clone($iSearchCost);
+            $oNewSearch->addPartialNameToken(
+                $this->iId,
+                $this->iSearchNameCount < CONST_Max_Word_Frequency,
+                $oPosition->getPhrase()
+            );
+
+            $aNewSearches[] = $oNewSearch;
+        }
+
+        return $aNewSearches;
+    }
+
+
     public function debugInfo()
     {
         return array(
@@ -31,4 +98,9 @@ class Partial
                           )
                );
     }
+
+    public function debugCode()
+    {
+        return 'w';
+    }
 }
index 8fa2ae8021d1bfbed459fb0c546d379271f1188c..563fe7faa36b9f94ad3d83b81db5a973d26cae6d 100644 (file)
@@ -8,11 +8,11 @@ namespace Nominatim\Token;
 class Postcode
 {
     /// Database word id, if available.
-    public $iId;
+    private $iId;
     /// Full nomralized postcode (upper cased).
-    public $sPostcode;
+    private $sPostcode;
     // Optional country code the postcode belongs to (currently unused).
-    public $sCountryCode;
+    private $sCountryCode;
 
     public function __construct($iId, $sPostcode, $sCountryCode = '')
     {
@@ -21,6 +21,55 @@ class Postcode
         $this->sCountryCode = empty($sCountryCode) ? '' : $sCountryCode;
     }
 
+    public function getId()
+    {
+        return $this->iId;
+    }
+
+    /**
+     * Derive new searches by adding this token to an existing search.
+     *
+     * @param object  $oSearch      Partial search description derived so far.
+     * @param object  $oPosition    Description of the token position within
+                                    the query.
+     *
+     * @return SearchDescription[] List of derived search descriptions.
+     */
+    public function extendSearch($oSearch, $oPosition)
+    {
+        $aNewSearches = array();
+
+        if ($oSearch->hasPostcode() || !$oPosition->maybePhrase('postalcode')) {
+            return $aNewSearches;
+        }
+
+        // If we have structured search or this is the first term,
+        // make the postcode the primary search element.
+        if ($oSearch->hasOperator(\Nominatim\Operator::NONE) && $oPosition->isFirstToken()) {
+            $oNewSearch = $oSearch->clone(1);
+            $oNewSearch->setPostcodeAsName($this->iId, $this->sPostcode);
+
+            $aNewSearches[] = $oNewSearch;
+        }
+
+        // If we have a structured search or this is not the first term,
+        // add the postcode as an addendum.
+        if (!$oSearch->hasOperator(\Nominatim\Operator::POSTCODE)
+            && ($oPosition->isPhrase('postalcode') || $oSearch->hasName())
+        ) {
+            $iPenalty = 1;
+            if (strlen($this->sPostcode) < 4) {
+                $iPenalty += 4 - strlen($this->sPostcode);
+            }
+            $oNewSearch = $oSearch->clone($iPenalty);
+            $oNewSearch->setPostcode($this->sPostcode);
+
+            $aNewSearches[] = $oNewSearch;
+        }
+
+        return $aNewSearches;
+    }
+
     public function debugInfo()
     {
         return array(
@@ -29,4 +78,9 @@ class Postcode
                 'Info' => $this->sPostcode.'('.$this->sCountryCode.')'
                );
     }
+
+    public function debugCode()
+    {
+        return 'P';
+    }
 }
index b2c312ec90e53d8a52b022aeb01ab057059fbd3f..89dfa02619447f78106eb16eb2139e8557c8ab4b 100644 (file)
@@ -26,6 +26,50 @@ class SpecialTerm
         $this->iOperator = $iOperator;
     }
 
+    public function getId()
+    {
+        return $this->iId;
+    }
+
+    /**
+     * Derive new searches by adding this token to an existing search.
+     *
+     * @param object  $oSearch      Partial search description derived so far.
+     * @param object  $oPosition    Description of the token position within
+                                    the query.
+     *
+     * @return SearchDescription[] List of derived search descriptions.
+     */
+    public function extendSearch($oSearch, $oPosition)
+    {
+        if ($oSearch->hasOperator() || !$oPosition->isPhrase('')) {
+            return array();
+        }
+
+        $iSearchCost = 2;
+
+        $iOp = $this->iOperator;
+        if ($iOp == \Nominatim\Operator::NONE) {
+            if ($oSearch->hasName() || $oSearch->getContext()->isBoundedSearch()) {
+                $iOp = \Nominatim\Operator::NAME;
+            } else {
+                $iOp = \Nominatim\Operator::NEAR;
+            }
+            $iSearchCost += 2;
+        } elseif (!$oPosition->isFirstToken() && !$oPosition->isLastToken()) {
+            $iSearchCost += 2;
+        }
+        if ($oSearch->hasHousenumber()) {
+            $iSearchCost ++;
+        }
+
+        $oNewSearch = $oSearch->clone($iSearchCost);
+        $oNewSearch->setPoiSearch($iOp, $this->sClass, $this->sType);
+
+        return array($oNewSearch);
+    }
+
+
     public function debugInfo()
     {
         return array(
@@ -38,4 +82,9 @@ class SpecialTerm
                           )
                );
     }
+
+    public function debugCode()
+    {
+        return 'S';
+    }
 }
index 6de584229b47f2e0b2af2cf0cceb96924c8be483..7c653f8fc7c2cee2db1e3accc0b0930866f4dca7 100644 (file)
@@ -8,11 +8,11 @@ namespace Nominatim\Token;
 class Word
 {
     /// Database word id, if applicable.
-    public $iId;
+    private $iId;
     /// Number of appearances in the database.
-    public $iSearchNameCount;
+    private $iSearchNameCount;
     /// Number of terms in the word.
-    public $iTermCount;
+    private $iTermCount;
 
     public function __construct($iId, $iSearchNameCount, $iTermCount)
     {
@@ -21,6 +21,57 @@ class Word
         $this->iTermCount = $iTermCount;
     }
 
+    public function getId()
+    {
+        return $this->iId;
+    }
+
+    /**
+     * Derive new searches by adding this token to an existing search.
+     *
+     * @param object  $oSearch      Partial search description derived so far.
+     * @param object  $oPosition    Description of the token position within
+                                    the query.
+     *
+     * @return SearchDescription[] List of derived search descriptions.
+     */
+    public function extendSearch($oSearch, $oPosition)
+    {
+        if ($oPosition->isPhrase('country')) {
+            return array();
+        }
+
+        // Full words can only be a name if they appear at the beginning
+        // of the phrase. In structured search the name must forcably in
+        // the first phrase. In unstructured search it may be in a later
+        // phrase when the first phrase is a house number.
+        if ($oSearch->hasName()
+            || !($oPosition->isFirstPhrase() || $oPosition->isPhrase(''))
+        ) {
+            if ($this->iTermCount > 1
+                && ($oPosition->isPhrase('') || !$oPosition->isFirstPhrase())
+            ) {
+                $oNewSearch = $oSearch->clone(1);
+                $oNewSearch->addAddressToken($this->iId);
+
+                return array($oNewSearch);
+            }
+        } elseif (!$oSearch->hasName(true)) {
+            $oNewSearch = $oSearch->clone(1);
+            $oNewSearch->addNameToken($this->iId);
+            if (CONST_Search_NameOnlySearchFrequencyThreshold
+                && $this->iSearchNameCount
+                          < CONST_Search_NameOnlySearchFrequencyThreshold
+            ) {
+                $oNewSearch->markRareName();
+            }
+
+            return array($oNewSearch);
+        }
+
+        return array();
+    }
+
     public function debugInfo()
     {
         return array(
@@ -32,4 +83,9 @@ class Word
                           )
                );
     }
+
+    public function debugCode()
+    {
+        return 'W';
+    }
 }