$aNewWordsetSearches = array();
foreach ($aWordsetSearches as $oCurrentSearch) {
- // Tokens with full name matches.
- foreach ($oValidTokens->get(' '.$sToken) as $oSearchTerm) {
- $aNewSearches = $oCurrentSearch->extendWithFullTerm(
+ foreach ($oValidTokens->get($sToken) as $oSearchTerm) {
+ $aNewSearches = $oCurrentSearch->extendWithSearchTerm(
+ $sToken,
$oSearchTerm,
$sPhraseType,
$iToken == 0 && $iPhrase == 0,
- $iPhrase == 0,
$iToken + 1 == count($aWordset)
- && $iPhrase + 1 == count($aPhrases)
+ && $iPhrase + 1 == count($aPhrases),
+ $iPhrase
);
foreach ($aNewSearches as $oSearch) {
}
}
}
- // Look for partial matches.
- // Note that there is no point in adding country terms here
- // because country is omitted in the address.
- if ($sPhraseType != 'country') {
- // Allow searching for a word - but at extra cost
- foreach ($oValidTokens->get($sToken) as $oSearchTerm) {
- $aNewSearches = $oCurrentSearch->extendWithPartialTerm(
- $sToken,
- $oSearchTerm,
- (bool) $sPhraseType,
- $iPhrase,
- $oValidTokens->get(' '.$sToken)
- );
-
- foreach ($aNewSearches as $oSearch) {
- if ($oSearch->getRank() < $this->iMaxRank) {
- $aNewWordsetSearches[] = $oSearch;
- }
- }
- }
- }
}
// Sort and cut
usort($aNewWordsetSearches, array('Nominatim\SearchDescription', 'bySearchRank'));
/**
* Derive new searches by adding a full term to the existing search.
*
- * @param object $oSearchTerm Description of the token.
- * @param string $sPhraseType Type of phrase the token is contained in.
- * @param bool $bFirstToken True if the token is at the beginning of the
- * query.
- * @param bool $bFirstPhrase True if the token is in the first phrase of
- * the query.
- * @param bool $bLastToken True if the token is at the end of the query.
+ * @param string $sToken Term for the token.
+ * @param object $oSearchTerm Description of the token.
+ * @param string $sPhraseType Type of phrase the token is contained in.
+ * @param bool $bFirstToken True if the token is at the beginning of the
+ * query.
+ * @param bool $bLastToken True if the token is at the end of the query.
+ * @param integer $iPhrase Number of the phrase the token is in.
*
* @return SearchDescription[] List of derived search descriptions.
*/
- public function extendWithFullTerm($oSearchTerm, $sPhraseType, $bFirstToken, $bFirstPhrase, $bLastToken)
+ public function extendWithSearchTerm($sToken, $oSearchTerm, $sPhraseType, $bFirstToken, $bLastToken, $iPhrase)
{
$aNewSearches = array();
// of the phrase. In structured search the name must forcably in
// the first phrase. In unstructured search it may be in a later
// phrase when the first phrase is a house number.
- if (!empty($this->aName) || !($bFirstPhrase || $sPhraseType == '')) {
- if (($sPhraseType == '' || !$bFirstPhrase) && $oSearchTerm->iTermCount > 1) {
+ if (!empty($this->aName) || !($iPhrase == 0 || $sPhraseType == '')) {
+ if (($sPhraseType == '' || $iPhrase > 0) && $oSearchTerm->iTermCount > 1) {
$oSearch = clone $this;
$oSearch->iNamePhrase = -1;
$oSearch->iSearchRank += 1;
}
$aNewSearches[] = $oSearch;
}
+ } elseif ($sPhraseType != 'country'
+ && is_a($oSearchTerm, '\Nominatim\Token\Partial')
+ && strpos($sToken, ' ') === false
+ ) {
+ $aNewSearches = $this->extendWithPartialTerm(
+ $sToken,
+ $oSearchTerm,
+ (bool) $sPhraseType,
+ $iPhrase
+ );
}
return $aNewSearches;
* @param object $oSearchTerm Description of the token.
* @param bool $bStructuredPhrases True if the search is structured.
* @param integer $iPhrase Number of the phrase the token is in.
- * @param array[] $aFullTokens List of full term tokens with the
- * same name.
*
* @return SearchDescription[] List of derived search descriptions.
*/
- public function extendWithPartialTerm($sToken, $oSearchTerm, $bStructuredPhrases, $iPhrase, $aFullTokens)
+ private function extendWithPartialTerm($sToken, $oSearchTerm, $bStructuredPhrases, $iPhrase)
{
- // Only allow name terms.
- if (!(is_a($oSearchTerm, '\Nominatim\Token\Word'))
- || strpos($sToken, ' ') !== false
- ) {
- return array();
- }
-
$aNewSearches = array();
$iWordID = $oSearchTerm->iId;
$oSearch->aAddress[$iWordID] = $iWordID;
} else {
$oSearch->aAddressNonSearch[$iWordID] = $iWordID;
- if (!empty($aFullTokens)) {
- $oSearch->iSearchRank++;
- }
}
$aNewSearches[] = $oSearch;
}
}
$oSearch->aName[$iWordID] = $iWordID;
} else {
- if (!empty($aFullTokens)) {
- $oSearch->iSearchRank++;
- }
$oSearch->aNameNonSearch[$iWordID] = $iWordID;
}
$oSearch->iNamePhrase = $iPhrase;
* tokens do not have a common base class. All tokens need to have a field
* with the word id that points to an entry in the `word` database table
* but otherwise the information saved about a token can be very different.
- *
- * There are two different kinds of token words: full words and partial terms.
- *
- * Full words start with a space. They represent a complete name of a place.
- * All special tokens are normally full words.
- *
- * Partial terms have no space at the beginning. They may represent a part of
- * a name of a place (e.g. in the name 'World Trade Center' a partial term
- * would be 'Trade' or 'Trade Center'). They are only used in TokenWord.
*/
class TokenList
{
*/
public function containsAny($sWord)
{
- return isset($this->aTokens[$sWord]) || isset($this->aTokens[' '.$sWord]);
+ return isset($this->aTokens[$sWord]);
}
/**
foreach ($this->aTokens as $aTokenList) {
foreach ($aTokenList as $oToken) {
- if (is_a($oToken, '\Nominatim\Token\Word') && !$oToken->bPartial) {
+ if (is_a($oToken, '\Nominatim\Token\Word')) {
$ids[$oToken->iId] = $oToken->iId;
}
}
// Try more interpretations for Tokens that could not be matched.
foreach ($aTokens as $sToken) {
- if ($sToken[0] == ' ' && !$oValidTokens->contains($sToken)) {
- if (preg_match('/^ ([0-9]{5}) [0-9]{4}$/', $sToken, $aData)) {
+ if ($sToken[0] != ' ' && !$oValidTokens->contains($sToken)) {
+ if (preg_match('/^([0-9]{5}) [0-9]{4}$/', $sToken, $aData)) {
// US ZIP+4 codes - merge in the 5-digit ZIP code
$oValidTokens->addToken(
$sToken,
new Token\Postcode(null, $aData[1], 'us')
);
- } elseif (preg_match('/^ [0-9]+$/', $sToken)) {
+ } elseif (preg_match('/^[0-9]+$/', $sToken)) {
// Unknown single word token with a number.
// Assume it is a house number.
$oValidTokens->addToken(
// Try more interpretations for Tokens that could not be matched.
foreach ($aTokens as $sToken) {
- if ($sToken[0] == ' ' && !$oValidTokens->contains($sToken)) {
- if (preg_match('/^ ([0-9]{5}) [0-9]{4}$/', $sToken, $aData)) {
+ if ($sToken[0] != ' ' && !$oValidTokens->contains($sToken)) {
+ if (preg_match('/^([0-9]{5}) [0-9]{4}$/', $sToken, $aData)) {
// US ZIP+4 codes - merge in the 5-digit ZIP code
$oValidTokens->addToken(
$sToken,
new Token\Postcode(null, $aData[1], 'us')
);
- } elseif (preg_match('/^ [0-9]+$/', $sToken)) {
+ } elseif (preg_match('/^[0-9]+$/', $sToken)) {
// Unknown single word token with a number.
// Assume it is a house number.
$oValidTokens->addToken(