From ec3f6c9c42dd89e71d4edd3cfb2a911453aba58d Mon Sep 17 00:00:00 2001 From: Sarah Hoffmann Date: Thu, 15 Jul 2021 14:12:59 +0200 Subject: [PATCH] factor out query position Moves token and phrase position and phrase type into a separate class that is handed in when assembling the search description. This drastically reduces the number of parameters for the function to extend the search descriptions and gives us more flexibility in the future for more complex positional analysis. --- lib-php/Geocode.php | 14 +++--- lib-php/SearchDescription.php | 55 +++++++++++----------- lib-php/SearchPosition.php | 87 +++++++++++++++++++++++++++++++++++ 3 files changed, 122 insertions(+), 34 deletions(-) create mode 100644 lib-php/SearchPosition.php diff --git a/lib-php/Geocode.php b/lib-php/Geocode.php index eda6df54..c2b4f4e4 100644 --- a/lib-php/Geocode.php +++ b/lib-php/Geocode.php @@ -7,6 +7,7 @@ require_once(CONST_LibDir.'/Phrase.php'); require_once(CONST_LibDir.'/ReverseGeocode.php'); require_once(CONST_LibDir.'/SearchDescription.php'); require_once(CONST_LibDir.'/SearchContext.php'); +require_once(CONST_LibDir.'/SearchPosition.php'); require_once(CONST_LibDir.'/TokenList.php'); require_once(CONST_TokenizerDir.'/tokenizer.php'); @@ -345,7 +346,11 @@ class Geocode */ foreach ($aPhrases as $iPhrase => $oPhrase) { $aNewPhraseSearches = array(); - $sPhraseType = $oPhrase->getPhraseType(); + $oPosition = new SearchPosition( + $oPhrase->getPhraseType(), + $iPhrase, + count($aPhrases) + ); foreach ($oPhrase->getWordSets() as $aWordset) { $aWordsetSearches = $aSearches; @@ -353,17 +358,14 @@ class Geocode // Add all words from this wordset foreach ($aWordset as $iToken => $sToken) { $aNewWordsetSearches = array(); + $oPosition->setTokenPosition($iToken, count($aWordset)); foreach ($aWordsetSearches as $oCurrentSearch) { foreach ($oValidTokens->get($sToken) as $oSearchTerm) { $aNewSearches = $oCurrentSearch->extendWithSearchTerm( $sToken, $oSearchTerm, - $sPhraseType, - $iToken == 0 && $iPhrase == 0, - $iToken + 1 == count($aWordset) - && $iPhrase + 1 == count($aPhrases), - $iPhrase + $oPosition ); foreach ($aNewSearches as $oSearch) { diff --git a/lib-php/SearchDescription.php b/lib-php/SearchDescription.php index 938beb61..8924287a 100644 --- a/lib-php/SearchDescription.php +++ b/lib-php/SearchDescription.php @@ -154,19 +154,16 @@ class SearchDescription * * @param string $sToken Term for the token. * @param object $oSearchTerm Description of the token. - * @param string $sPhraseType Type of phrase the token is contained in. - * @param bool $bFirstToken True if the token is at the beginning of the - * query. - * @param bool $bLastToken True if the token is at the end of the query. - * @param integer $iPhrase Number of the phrase the token is in. + * @param object $oPosition Description of the token position within + the query. * * @return SearchDescription[] List of derived search descriptions. */ - public function extendWithSearchTerm($sToken, $oSearchTerm, $sPhraseType, $bFirstToken, $bLastToken, $iPhrase) + public function extendWithSearchTerm($sToken, $oSearchTerm, $oPosition) { $aNewSearches = array(); - if (($sPhraseType == '' || $sPhraseType == 'country') + if ($oPosition->maybePhrase('country') && is_a($oSearchTerm, '\Nominatim\Token\Country') ) { if (!$this->sCountryCode) { @@ -175,19 +172,19 @@ class SearchDescription $oSearch->sCountryCode = $oSearchTerm->sCountryCode; // Country is almost always at the end of the string // - increase score for finding it anywhere else (optimisation) - if (!$bLastToken) { + if (!$oPosition->isLastToken()) { $oSearch->iSearchRank += 5; $oSearch->iNamePhrase = -1; } $aNewSearches[] = $oSearch; } - } elseif (($sPhraseType == '' || $sPhraseType == 'postalcode') + } elseif ($oPosition->maybePhrase('postalcode') && is_a($oSearchTerm, '\Nominatim\Token\Postcode') ) { if (!$this->sPostcode) { // If we have structured search or this is the first term, // make the postcode the primary search element. - if ($this->iOperator == Operator::NONE && $bFirstToken) { + if ($this->iOperator == Operator::NONE && $oPosition->isFirstToken()) { $oSearch = clone $this; $oSearch->iSearchRank++; $oSearch->iOperator = Operator::POSTCODE; @@ -200,7 +197,7 @@ class SearchDescription // If we have a structured search or this is not the first term, // add the postcode as an addendum. if ($this->iOperator != Operator::POSTCODE - && ($sPhraseType == 'postalcode' || !empty($this->aName)) + && ($oPosition->isPhrase('postalcode') || !empty($this->aName)) ) { $oSearch = clone $this; $oSearch->iSearchRank++; @@ -212,7 +209,7 @@ class SearchDescription $aNewSearches[] = $oSearch; } } - } elseif (($sPhraseType == '' || $sPhraseType == 'street') + } elseif ($oPosition->maybePhrase('street') && is_a($oSearchTerm, '\Nominatim\Token\HouseNumber') ) { if (!$this->sHouseNumber && $this->iOperator != Operator::POSTCODE) { @@ -257,7 +254,7 @@ class SearchDescription $aNewSearches[] = $oSearch; } } - } elseif ($sPhraseType == '' + } elseif ($oPosition->isPhrase('') && is_a($oSearchTerm, '\Nominatim\Token\SpecialTerm') ) { if ($this->iOperator == Operator::NONE) { @@ -273,7 +270,7 @@ class SearchDescription $iOp = Operator::NEAR; } $oSearch->iSearchRank += 2; - } elseif (!$bFirstToken && !$bLastToken) { + } elseif (!$oPosition->isFirstToken() && !$oPosition->isLastToken()) { $oSearch->iSearchRank += 2; } if ($this->sHouseNumber) { @@ -287,7 +284,7 @@ class SearchDescription ); $aNewSearches[] = $oSearch; } - } elseif ($sPhraseType != 'country' + } elseif (!$oPosition->isPhrase('country') && is_a($oSearchTerm, '\Nominatim\Token\Word') ) { $iWordID = $oSearchTerm->iId; @@ -295,8 +292,10 @@ class SearchDescription // of the phrase. In structured search the name must forcably in // the first phrase. In unstructured search it may be in a later // phrase when the first phrase is a house number. - if (!empty($this->aName) || !($iPhrase == 0 || $sPhraseType == '')) { - if (($sPhraseType == '' || $iPhrase > 0) && $oSearchTerm->iTermCount > 1) { + if (!empty($this->aName) || !($oPosition->isFirstPhrase() || $oPosition->isPhrase(''))) { + if (($oPosition->isPhrase('') || !$oPosition->isFirstPhrase()) + && $oSearchTerm->iTermCount > 1 + ) { $oSearch = clone $this; $oSearch->iNamePhrase = -1; $oSearch->iSearchRank += 1; @@ -314,15 +313,14 @@ class SearchDescription } $aNewSearches[] = $oSearch; } - } elseif ($sPhraseType != 'country' + } elseif (!$oPosition->isPhrase('country') && is_a($oSearchTerm, '\Nominatim\Token\Partial') && strpos($sToken, ' ') === false ) { $aNewSearches = $this->extendWithPartialTerm( $sToken, $oSearchTerm, - (bool) $sPhraseType, - $iPhrase + $oPosition ); } @@ -332,19 +330,19 @@ class SearchDescription /** * Derive new searches by adding a partial term to the existing search. * - * @param string $sToken Term for the token. - * @param object $oSearchTerm Description of the token. - * @param bool $bStructuredPhrases True if the search is structured. - * @param integer $iPhrase Number of the phrase the token is in. + * @param string $sToken Term for the token. + * @param object $oSearchTerm Description of the token. + * @param object $oPosition Description of the token position within + the query. * * @return SearchDescription[] List of derived search descriptions. */ - private function extendWithPartialTerm($sToken, $oSearchTerm, $bStructuredPhrases, $iPhrase) + private function extendWithPartialTerm($sToken, $oSearchTerm, $oPosition) { $aNewSearches = array(); $iWordID = $oSearchTerm->iId; - if ((!$bStructuredPhrases || $iPhrase > 0) + if (($oPosition->isPhrase('') || !$oPosition->isFirstPhrase()) && (!empty($this->aName)) ) { $oSearch = clone $this; @@ -361,7 +359,8 @@ class SearchDescription } if ((!$this->sPostcode && !$this->aAddress && !$this->aAddressNonSearch) - && ((empty($this->aName) && empty($this->aNameNonSearch)) || $this->iNamePhrase == $iPhrase) + && ((empty($this->aName) && empty($this->aNameNonSearch)) + || $this->iNamePhrase == $oPosition->getPhrase()) ) { $oSearch = clone $this; $oSearch->iSearchRank++; @@ -385,7 +384,7 @@ class SearchDescription } else { $oSearch->aNameNonSearch[$iWordID] = $iWordID; } - $oSearch->iNamePhrase = $iPhrase; + $oSearch->iNamePhrase = $oPosition->getPhrase(); $aNewSearches[] = $oSearch; } diff --git a/lib-php/SearchPosition.php b/lib-php/SearchPosition.php new file mode 100644 index 00000000..e4260bf2 --- /dev/null +++ b/lib-php/SearchPosition.php @@ -0,0 +1,87 @@ +sPhraseType = $sPhraseType; + $this->iPhrase = $iPhrase; + $this->iNumPhrases = $iNumPhrases; + } + + public function setTokenPosition($iToken, $iNumTokens) + { + $this->iToken = $iToken; + $this->iNumTokens = $iNumTokens; + } + + /** + * Check if the phrase can be of the given type. + * + * @param string $sType Type of phrse requested. + * + * @return True if the phrase is untyped or of the given type. + */ + public function maybePhrase($sType) + { + return $this->sPhraseType == '' || $this->sPhraseType == $sType; + } + + /** + * Check if the phrase is exactly of the given type. + * + * @param string $sType Type of phrse requested. + * + * @return True if the phrase of the given type. + */ + public function isPhrase($sType) + { + return $this->sPhraseType == $sType; + } + + /** + * Return true if the token is the very first in the query. + */ + public function isFirstToken() + { + return $this->iPhrase == 0 && $this->iToken == 0; + } + + /** + * Check if the token is the final one in the query. + */ + public function isLastToken() + { + return $this->iToken + 1 == $this->iNumTokens && $this->iPhrase + 1 == $this->iNumPhrases; + } + + /** + * Check if the current token is part of the first phrase in the query. + */ + public function isFirstPhrase() + { + return $this->iPhrase == 0; + } + + /** + * Get the phrase position in the query. + */ + public function getPhrase() + { + return $this->iPhrase; + } +} -- 2.43.2