0) { $this->aWords = explode(' ', $sPhrase); } else { $this->aWords = array(); } } /** * Get all possible tokens that are present in this word list. * * @return array The list of string tokens in the word list. */ public function getTokens() { $aTokens = array(); $iNumWords = count($this->aWords); for ($i = 0; $i < $iNumWords; $i++) { $sPhrase = $this->aWords[$i]; $aTokens[$sPhrase] = $sPhrase; for ($j = $i + 1; $j < $iNumWords; $j++) { $sPhrase .= ' '.$this->aWords[$j]; $aTokens[$sPhrase] = $sPhrase; } } return $aTokens; } /** * Compute all possible permutations of phrase splits that result in * words which are in the token list. */ public function getWordSets($oTokens) { $iNumWords = count($this->aWords); if ($iNumWords == 0) { return null; } // Caches the word set for the partial phrase up to word i. $aSetCache = array_fill(0, $iNumWords, array()); // Initialise first element of cache. There can only be the word. if ($oTokens->containsAny($this->aWords[0])) { $aSetCache[0][] = array($this->aWords[0]); } // Now do the next elements using what we already have. for ($i = 1; $i < $iNumWords; $i++) { for ($j = $i; $j > 0; $j--) { $sPartial = $j == $i ? $this->aWords[$j] : $this->aWords[$j].' '.$sPartial; if (!empty($aSetCache[$j - 1]) && $oTokens->containsAny($sPartial)) { $aPartial = array($sPartial); foreach ($aSetCache[$j - 1] as $aSet) { if (count($aSet) < SimpleWordList::MAX_WORDSET_LEN) { $aSetCache[$i][] = array_merge($aSet, $aPartial); } } if (count($aSetCache[$i]) > 2 * SimpleWordList::MAX_WORDSETS) { usort( $aSetCache[$i], array('\Nominatim\SimpleWordList', 'cmpByArraylen') ); $aSetCache[$i] = array_slice( $aSetCache[$i], 0, SimpleWordList::MAX_WORDSETS ); } } } // finally the current full phrase $sPartial = $this->aWords[0].' '.$sPartial; if ($oTokens->containsAny($sPartial)) { $aSetCache[$i][] = array($sPartial); } } $aWordSets = $aSetCache[$iNumWords - 1]; usort($aWordSets, array('\Nominatim\SimpleWordList', 'cmpByArraylen')); return array_slice($aWordSets, 0, SimpleWordList::MAX_WORDSETS); } /** * Custom search routine which takes two arrays. The array with the fewest * items wins. If same number of items then the one with the longest first * element wins. */ public static function cmpByArraylen($aA, $aB) { $iALen = count($aA); $iBLen = count($aB); if ($iALen == $iBLen) { return strlen($aB[0]) <=> strlen($aA[0]); } return ($iALen < $iBLen) ? -1 : 1; } public function debugInfo() { return $this->aWords; } }