From e16fcf879d9155602aec3780779ca65c964d0626 Mon Sep 17 00:00:00 2001 From: Sarah Hoffmann Date: Thu, 30 Oct 2014 23:00:47 +0100 Subject: [PATCH] fully evaluate reverse order of queries Factors out computation of search group and calls the code once for forward evaluation and wants with reversed word order. --- lib/Geocode.php | 594 ++++++++++++++++++++++++------------------------ lib/lib.php | 20 ++ 2 files changed, 323 insertions(+), 291 deletions(-) diff --git a/lib/Geocode.php b/lib/Geocode.php index 7f00de6e..1fbbd5c5 100644 --- a/lib/Geocode.php +++ b/lib/Geocode.php @@ -441,6 +441,290 @@ return $aSearchResults; } + function getGroupedSearches($aSearches, $aPhrases, $aValidTokens, $aWordFrequencyScores, $bStructuredPhrases) + { + /* + Calculate all searches using aValidTokens i.e. + 'Wodsworth Road, Sheffield' => + + Phrase Wordset + 0 0 (wodsworth road) + 0 1 (wodsworth)(road) + 1 0 (sheffield) + + Score how good the search is so they can be ordered + */ + foreach($aPhrases as $iPhrase => $sPhrase) + { + $aNewPhraseSearches = array(); + if ($bStructuredPhrases) $sPhraseType = $aPhraseTypes[$iPhrase]; + else $sPhraseType = ''; + + foreach($aPhrases[$iPhrase]['wordsets'] as $iWordSet => $aWordset) + { + // Too many permutations - too expensive + if ($iWordSet > 120) break; + + $aWordsetSearches = $aSearches; + + // Add all words from this wordset + foreach($aWordset as $iToken => $sToken) + { + //echo "
$sToken"; + $aNewWordsetSearches = array(); + + foreach($aWordsetSearches as $aCurrentSearch) + { + //echo ""; + //var_dump($aCurrentSearch); + //echo ""; + + // If the token is valid + if (isset($aValidTokens[' '.$sToken])) + { + foreach($aValidTokens[' '.$sToken] as $aSearchTerm) + { + $aSearch = $aCurrentSearch; + $aSearch['iSearchRank']++; + if (($sPhraseType == '' || $sPhraseType == 'country') && !empty($aSearchTerm['country_code']) && $aSearchTerm['country_code'] != '0') + { + if ($aSearch['sCountryCode'] === false) + { + $aSearch['sCountryCode'] = strtolower($aSearchTerm['country_code']); + // Country is almost always at the end of the string - increase score for finding it anywhere else (optimisation) + if (($iToken+1 != sizeof($aWordset) || $iPhrase+1 != sizeof($aPhrases))) + { + $aSearch['iSearchRank'] += 5; + } + if ($aSearch['iSearchRank'] < $this->iMaxRank) $aNewWordsetSearches[] = $aSearch; + } + } + elseif (isset($aSearchTerm['lat']) && $aSearchTerm['lat'] !== '' && $aSearchTerm['lat'] !== null) + { + if ($aSearch['fLat'] === '') + { + $aSearch['fLat'] = $aSearchTerm['lat']; + $aSearch['fLon'] = $aSearchTerm['lon']; + $aSearch['fRadius'] = $aSearchTerm['radius']; + if ($aSearch['iSearchRank'] < $this->iMaxRank) $aNewWordsetSearches[] = $aSearch; + } + } + elseif ($sPhraseType == 'postalcode') + { + // We need to try the case where the postal code is the primary element (i.e. no way to tell if it is (postalcode, city) OR (city, postalcode) so try both + if (isset($aSearchTerm['word_id']) && $aSearchTerm['word_id']) + { + // If we already have a name try putting the postcode first + if (sizeof($aSearch['aName'])) + { + $aNewSearch = $aSearch; + $aNewSearch['aAddress'] = array_merge($aNewSearch['aAddress'], $aNewSearch['aName']); + $aNewSearch['aName'] = array(); + $aNewSearch['aName'][$aSearchTerm['word_id']] = $aSearchTerm['word_id']; + if ($aSearch['iSearchRank'] < $this->iMaxRank) $aNewWordsetSearches[] = $aNewSearch; + } + + if (sizeof($aSearch['aName'])) + { + if ((!$bStructuredPhrases || $iPhrase > 0) && $sPhraseType != 'country' && (!isset($aValidTokens[$sToken]) || strlen($sToken) < 4 || strpos($sToken, ' ') !== false)) + { + $aSearch['aAddress'][$aSearchTerm['word_id']] = $aSearchTerm['word_id']; + } + else + { + $aCurrentSearch['aFullNameAddress'][$aSearchTerm['word_id']] = $aSearchTerm['word_id']; + $aSearch['iSearchRank'] += 1000; // skip; + } + } + else + { + $aSearch['aName'][$aSearchTerm['word_id']] = $aSearchTerm['word_id']; + //$aSearch['iNamePhrase'] = $iPhrase; + } + if ($aSearch['iSearchRank'] < $this->iMaxRank) $aNewWordsetSearches[] = $aSearch; + } + + } + elseif (($sPhraseType == '' || $sPhraseType == 'street') && $aSearchTerm['class'] == 'place' && $aSearchTerm['type'] == 'house') + { + if ($aSearch['sHouseNumber'] === '') + { + $aSearch['sHouseNumber'] = $sToken; + // sanity check: if the housenumber is not mainly made + // up of numbers, add a penalty + if (preg_match_all("/[^0-9]/", $sToken, $aMatches) > 2) $aSearch['iSearchRank']++; + if ($aSearch['iSearchRank'] < $this->iMaxRank) $aNewWordsetSearches[] = $aSearch; + /* + // Fall back to not searching for this item (better than nothing) + $aSearch = $aCurrentSearch; + $aSearch['iSearchRank'] += 1; + if ($aSearch['iSearchRank'] < $this->iMaxRank) $aNewWordsetSearches[] = $aSearch; + */ + } + } + elseif ($sPhraseType == '' && $aSearchTerm['class'] !== '' && $aSearchTerm['class'] !== null) + { + if ($aSearch['sClass'] === '') + { + $aSearch['sOperator'] = $aSearchTerm['operator']; + $aSearch['sClass'] = $aSearchTerm['class']; + $aSearch['sType'] = $aSearchTerm['type']; + if (sizeof($aSearch['aName'])) $aSearch['sOperator'] = 'name'; + else $aSearch['sOperator'] = 'near'; // near = in for the moment + if (strlen($aSearchTerm['operator']) == 0) $aSearch['iSearchRank'] += 1; + + // Do we have a shortcut id? + if ($aSearch['sOperator'] == 'name') + { + $sSQL = "select get_tagpair('".$aSearch['sClass']."', '".$aSearch['sType']."')"; + if ($iAmenityID = $this->oDB->getOne($sSQL)) + { + $aValidTokens[$aSearch['sClass'].':'.$aSearch['sType']] = array('word_id' => $iAmenityID); + $aSearch['aName'][$iAmenityID] = $iAmenityID; + $aSearch['sClass'] = ''; + $aSearch['sType'] = ''; + } + } + if ($aSearch['iSearchRank'] < $this->iMaxRank) $aNewWordsetSearches[] = $aSearch; + } + } + elseif (isset($aSearchTerm['word_id']) && $aSearchTerm['word_id']) + { + if (sizeof($aSearch['aName'])) + { + if ((!$bStructuredPhrases || $iPhrase > 0) && $sPhraseType != 'country' && (!isset($aValidTokens[$sToken]) || strlen($sToken) < 4 || strpos($sToken, ' ') !== false)) + { + $aSearch['aAddress'][$aSearchTerm['word_id']] = $aSearchTerm['word_id']; + } + else + { + $aCurrentSearch['aFullNameAddress'][$aSearchTerm['word_id']] = $aSearchTerm['word_id']; + $aSearch['iSearchRank'] += 1000; // skip; + } + } + else + { + $aSearch['aName'][$aSearchTerm['word_id']] = $aSearchTerm['word_id']; + //$aSearch['iNamePhrase'] = $iPhrase; + } + if ($aSearch['iSearchRank'] < $this->iMaxRank) $aNewWordsetSearches[] = $aSearch; + } + } + } + if (isset($aValidTokens[$sToken])) + { + // Allow searching for a word - but at extra cost + foreach($aValidTokens[$sToken] as $aSearchTerm) + { + if (isset($aSearchTerm['word_id']) && $aSearchTerm['word_id']) + { + if ((!$bStructuredPhrases || $iPhrase > 0) && sizeof($aCurrentSearch['aName']) && strpos($sToken, ' ') === false) + { + $aSearch = $aCurrentSearch; + $aSearch['iSearchRank'] += 1; + if ($aWordFrequencyScores[$aSearchTerm['word_id']] < CONST_Max_Word_Frequency) + { + $aSearch['aAddress'][$aSearchTerm['word_id']] = $aSearchTerm['word_id']; + if ($aSearch['iSearchRank'] < $this->iMaxRank) $aNewWordsetSearches[] = $aSearch; + } + elseif (isset($aValidTokens[' '.$sToken]) && strlen($sToken) >= 4) // revert to the token version? + { + $aSearch['aAddressNonSearch'][$aSearchTerm['word_id']] = $aSearchTerm['word_id']; + $aSearch['iSearchRank'] += 1; + if ($aSearch['iSearchRank'] < $this->iMaxRank) $aNewWordsetSearches[] = $aSearch; + foreach($aValidTokens[' '.$sToken] as $aSearchTermToken) + { + if (empty($aSearchTermToken['country_code']) + && empty($aSearchTermToken['lat']) + && empty($aSearchTermToken['class'])) + { + $aSearch = $aCurrentSearch; + $aSearch['iSearchRank'] += 1; + $aSearch['aAddress'][$aSearchTermToken['word_id']] = $aSearchTermToken['word_id']; + if ($aSearch['iSearchRank'] < $this->iMaxRank) $aNewWordsetSearches[] = $aSearch; + } + } + } + else + { + $aSearch['aAddressNonSearch'][$aSearchTerm['word_id']] = $aSearchTerm['word_id']; + if (preg_match('#^[0-9]+$#', $sToken)) $aSearch['iSearchRank'] += 2; + if ($aSearch['iSearchRank'] < $this->iMaxRank) $aNewWordsetSearches[] = $aSearch; + } + } + + if (!sizeof($aCurrentSearch['aName']) || $aCurrentSearch['iNamePhrase'] == $iPhrase) + { + $aSearch = $aCurrentSearch; + $aSearch['iSearchRank'] += 1; + if (!sizeof($aCurrentSearch['aName'])) $aSearch['iSearchRank'] += 1; + if (preg_match('#^[0-9]+$#', $sToken)) $aSearch['iSearchRank'] += 2; + if ($aWordFrequencyScores[$aSearchTerm['word_id']] < CONST_Max_Word_Frequency) + $aSearch['aName'][$aSearchTerm['word_id']] = $aSearchTerm['word_id']; + else + $aSearch['aNameNonSearch'][$aSearchTerm['word_id']] = $aSearchTerm['word_id']; + $aSearch['iNamePhrase'] = $iPhrase; + if ($aSearch['iSearchRank'] < $this->iMaxRank) $aNewWordsetSearches[] = $aSearch; + } + } + } + } + else + { + // Allow skipping a word - but at EXTREAM cost + //$aSearch = $aCurrentSearch; + //$aSearch['iSearchRank']+=100; + //$aNewWordsetSearches[] = $aSearch; + } + } + // Sort and cut + usort($aNewWordsetSearches, 'bySearchRank'); + $aWordsetSearches = array_slice($aNewWordsetSearches, 0, 50); + } + //var_Dump('
',sizeof($aWordsetSearches)); exit; + + $aNewPhraseSearches = array_merge($aNewPhraseSearches, $aNewWordsetSearches); + usort($aNewPhraseSearches, 'bySearchRank'); + + $aSearchHash = array(); + foreach($aNewPhraseSearches as $iSearch => $aSearch) + { + $sHash = serialize($aSearch); + if (isset($aSearchHash[$sHash])) unset($aNewPhraseSearches[$iSearch]); + else $aSearchHash[$sHash] = 1; + } + + $aNewPhraseSearches = array_slice($aNewPhraseSearches, 0, 50); + } + + // Re-group the searches by their score, junk anything over 20 as just not worth trying + $aGroupedSearches = array(); + foreach($aNewPhraseSearches as $aSearch) + { + if ($aSearch['iSearchRank'] < $this->iMaxRank) + { + if (!isset($aGroupedSearches[$aSearch['iSearchRank']])) $aGroupedSearches[$aSearch['iSearchRank']] = array(); + $aGroupedSearches[$aSearch['iSearchRank']][] = $aSearch; + } + } + ksort($aGroupedSearches); + + $iSearchCount = 0; + $aSearches = array(); + foreach($aGroupedSearches as $iScore => $aNewSearches) + { + $iSearchCount += sizeof($aNewSearches); + $aSearches = array_merge($aSearches, $aNewSearches); + if ($iSearchCount > 50) break; + } + + //if (CONST_Debug) _debugDumpGroupedSearches($aGroupedSearches, $aValidTokens); + + } + return $aGroupedSearches; + + } + /* Perform the actual query lookup. Returns an ordered list of results, each with the following fields: @@ -755,287 +1039,38 @@ // Start the search process $aResultPlaceIDs = array(); - /* - Calculate all searches using aValidTokens i.e. - 'Wodsworth Road, Sheffield' => - - Phrase Wordset - 0 0 (wodsworth road) - 0 1 (wodsworth)(road) - 1 0 (sheffield) + $aGroupedSearches = $this->getGroupedSearches($aSearches, $aPhrases, $aValidTokens, $aWordFrequencyScores, $bStructuredPhrases); - Score how good the search is so they can be ordered - */ - foreach($aPhrases as $iPhrase => $sPhrase) + if ($this->bReverseInPlan) { - $aNewPhraseSearches = array(); - if ($bStructuredPhrases) $sPhraseType = $aPhraseTypes[$iPhrase]; - else $sPhraseType = ''; - - foreach($aPhrases[$iPhrase]['wordsets'] as $iWordSet => $aWordset) + // Reverse phrase array and also reverse the order of the wordsets in + // the first and final phrase. Don't bother about phrases in the middle + // because order in the address doesn't matter. + $aPhrases = array_reverse($aPhrases); + $aPhrases[0]['wordsets'] = getInverseWordSets($aPhrases[0]['words'], 0); + if (sizeof($aPhrases) > 1) { - // Too many permutations - too expensive - if ($iWordSet > 120) break; - - $aWordsetSearches = $aSearches; + $aFinalPhrase = end($aPhrases); + $aFinalPhrase['wordsets'] = getInverseWordSets($aFinalPhrase['words'], 0); + } + $aReverseGroupedSearches = $this->getGroupedSearches($aSearches, $aPhrases, $aValidTokens, $aWordFrequencyScores, false); - // Add all words from this wordset - foreach($aWordset as $iToken => $sToken) + foreach($aGroupedSearches as $aSearches) + { + foreach($aSearches as $aSearch) { - //echo "
$sToken"; - $aNewWordsetSearches = array(); - - foreach($aWordsetSearches as $aCurrentSearch) + if ($aSearch['iSearchRank'] < $this->iMaxRank) { - //echo ""; - //var_dump($aCurrentSearch); - //echo ""; - - // If the token is valid - if (isset($aValidTokens[' '.$sToken])) - { - foreach($aValidTokens[' '.$sToken] as $aSearchTerm) - { - $aSearch = $aCurrentSearch; - $aSearch['iSearchRank']++; - if (($sPhraseType == '' || $sPhraseType == 'country') && !empty($aSearchTerm['country_code']) && $aSearchTerm['country_code'] != '0') - { - if ($aSearch['sCountryCode'] === false) - { - $aSearch['sCountryCode'] = strtolower($aSearchTerm['country_code']); - // Country is almost always at the end of the string - increase score for finding it anywhere else (optimisation) - // If reverse order is enabled, it may appear at the beginning as well. - if (($iToken+1 != sizeof($aWordset) || $iPhrase+1 != sizeof($aPhrases)) && - (!$this->bReverseInPlan || $iToken > 0 || $iPhrase > 0)) - { - $aSearch['iSearchRank'] += 5; - } - if ($aSearch['iSearchRank'] < $this->iMaxRank) $aNewWordsetSearches[] = $aSearch; - } - } - elseif (isset($aSearchTerm['lat']) && $aSearchTerm['lat'] !== '' && $aSearchTerm['lat'] !== null) - { - if ($aSearch['fLat'] === '') - { - $aSearch['fLat'] = $aSearchTerm['lat']; - $aSearch['fLon'] = $aSearchTerm['lon']; - $aSearch['fRadius'] = $aSearchTerm['radius']; - if ($aSearch['iSearchRank'] < $this->iMaxRank) $aNewWordsetSearches[] = $aSearch; - } - } - elseif ($sPhraseType == 'postalcode') - { - // We need to try the case where the postal code is the primary element (i.e. no way to tell if it is (postalcode, city) OR (city, postalcode) so try both - if (isset($aSearchTerm['word_id']) && $aSearchTerm['word_id']) - { - // If we already have a name try putting the postcode first - if (sizeof($aSearch['aName'])) - { - $aNewSearch = $aSearch; - $aNewSearch['aAddress'] = array_merge($aNewSearch['aAddress'], $aNewSearch['aName']); - $aNewSearch['aName'] = array(); - $aNewSearch['aName'][$aSearchTerm['word_id']] = $aSearchTerm['word_id']; - if ($aSearch['iSearchRank'] < $this->iMaxRank) $aNewWordsetSearches[] = $aNewSearch; - } - - if (sizeof($aSearch['aName'])) - { - if ((!$bStructuredPhrases || $iPhrase > 0) && $sPhraseType != 'country' && (!isset($aValidTokens[$sToken]) || strlen($sToken) < 4 || strpos($sToken, ' ') !== false)) - { - $aSearch['aAddress'][$aSearchTerm['word_id']] = $aSearchTerm['word_id']; - } - else - { - $aCurrentSearch['aFullNameAddress'][$aSearchTerm['word_id']] = $aSearchTerm['word_id']; - $aSearch['iSearchRank'] += 1000; // skip; - } - } - else - { - $aSearch['aName'][$aSearchTerm['word_id']] = $aSearchTerm['word_id']; - //$aSearch['iNamePhrase'] = $iPhrase; - } - if ($aSearch['iSearchRank'] < $this->iMaxRank) $aNewWordsetSearches[] = $aSearch; - } - - } - elseif (($sPhraseType == '' || $sPhraseType == 'street') && $aSearchTerm['class'] == 'place' && $aSearchTerm['type'] == 'house') - { - if ($aSearch['sHouseNumber'] === '') - { - $aSearch['sHouseNumber'] = $sToken; - // sanity check: if the housenumber is not mainly made - // up of numbers, add a penalty - if (preg_match_all("/[^0-9]/", $sToken, $aMatches) > 2) $aSearch['iSearchRank']++; - if ($aSearch['iSearchRank'] < $this->iMaxRank) $aNewWordsetSearches[] = $aSearch; - /* - // Fall back to not searching for this item (better than nothing) - $aSearch = $aCurrentSearch; - $aSearch['iSearchRank'] += 1; - if ($aSearch['iSearchRank'] < $this->iMaxRank) $aNewWordsetSearches[] = $aSearch; - */ - } - } - elseif ($sPhraseType == '' && $aSearchTerm['class'] !== '' && $aSearchTerm['class'] !== null) - { - if ($aSearch['sClass'] === '') - { - $aSearch['sOperator'] = $aSearchTerm['operator']; - $aSearch['sClass'] = $aSearchTerm['class']; - $aSearch['sType'] = $aSearchTerm['type']; - if (sizeof($aSearch['aName'])) $aSearch['sOperator'] = 'name'; - else $aSearch['sOperator'] = 'near'; // near = in for the moment - if (strlen($aSearchTerm['operator']) == 0) $aSearch['iSearchRank'] += 1; - - // Do we have a shortcut id? - if ($aSearch['sOperator'] == 'name') - { - $sSQL = "select get_tagpair('".$aSearch['sClass']."', '".$aSearch['sType']."')"; - if ($iAmenityID = $this->oDB->getOne($sSQL)) - { - $aValidTokens[$aSearch['sClass'].':'.$aSearch['sType']] = array('word_id' => $iAmenityID); - $aSearch['aName'][$iAmenityID] = $iAmenityID; - $aSearch['sClass'] = ''; - $aSearch['sType'] = ''; - } - } - if ($aSearch['iSearchRank'] < $this->iMaxRank) $aNewWordsetSearches[] = $aSearch; - } - } - elseif (isset($aSearchTerm['word_id']) && $aSearchTerm['word_id']) - { - if (sizeof($aSearch['aName'])) - { - if ((!$bStructuredPhrases || $iPhrase > 0) && $sPhraseType != 'country' && (!isset($aValidTokens[$sToken]) || strlen($sToken) < 4 || strpos($sToken, ' ') !== false)) - { - $aSearch['aAddress'][$aSearchTerm['word_id']] = $aSearchTerm['word_id']; - } - else - { - $aCurrentSearch['aFullNameAddress'][$aSearchTerm['word_id']] = $aSearchTerm['word_id']; - $aSearch['iSearchRank'] += 1000; // skip; - } - } - else - { - $aSearch['aName'][$aSearchTerm['word_id']] = $aSearchTerm['word_id']; - //$aSearch['iNamePhrase'] = $iPhrase; - } - if ($aSearch['iSearchRank'] < $this->iMaxRank) $aNewWordsetSearches[] = $aSearch; - } - } - } - if (isset($aValidTokens[$sToken])) - { - // Allow searching for a word - but at extra cost - foreach($aValidTokens[$sToken] as $aSearchTerm) - { - if (isset($aSearchTerm['word_id']) && $aSearchTerm['word_id']) - { - if ((!$bStructuredPhrases || $iPhrase > 0) && sizeof($aCurrentSearch['aName']) && strpos($sToken, ' ') === false) - { - $aSearch = $aCurrentSearch; - $aSearch['iSearchRank'] += 1; - if ($aWordFrequencyScores[$aSearchTerm['word_id']] < CONST_Max_Word_Frequency) - { - $aSearch['aAddress'][$aSearchTerm['word_id']] = $aSearchTerm['word_id']; - if ($aSearch['iSearchRank'] < $this->iMaxRank) $aNewWordsetSearches[] = $aSearch; - } - elseif (isset($aValidTokens[' '.$sToken]) && strlen($sToken) >= 4) // revert to the token version? - { - $aSearch['aAddressNonSearch'][$aSearchTerm['word_id']] = $aSearchTerm['word_id']; - $aSearch['iSearchRank'] += 1; - if ($aSearch['iSearchRank'] < $this->iMaxRank) $aNewWordsetSearches[] = $aSearch; - foreach($aValidTokens[' '.$sToken] as $aSearchTermToken) - { - if (empty($aSearchTermToken['country_code']) - && empty($aSearchTermToken['lat']) - && empty($aSearchTermToken['class'])) - { - $aSearch = $aCurrentSearch; - $aSearch['iSearchRank'] += 1; - $aSearch['aAddress'][$aSearchTermToken['word_id']] = $aSearchTermToken['word_id']; - if ($aSearch['iSearchRank'] < $this->iMaxRank) $aNewWordsetSearches[] = $aSearch; - } - } - } - else - { - $aSearch['aAddressNonSearch'][$aSearchTerm['word_id']] = $aSearchTerm['word_id']; - if (preg_match('#^[0-9]+$#', $sToken)) $aSearch['iSearchRank'] += 2; - if ($aSearch['iSearchRank'] < $this->iMaxRank) $aNewWordsetSearches[] = $aSearch; - } - } - - if (!sizeof($aCurrentSearch['aName']) || $aCurrentSearch['iNamePhrase'] == $iPhrase) - { - $aSearch = $aCurrentSearch; - $aSearch['iSearchRank'] += 1; - if (!sizeof($aCurrentSearch['aName'])) $aSearch['iSearchRank'] += 1; - if (preg_match('#^[0-9]+$#', $sToken)) $aSearch['iSearchRank'] += 2; - if ($aWordFrequencyScores[$aSearchTerm['word_id']] < CONST_Max_Word_Frequency) - $aSearch['aName'][$aSearchTerm['word_id']] = $aSearchTerm['word_id']; - else - $aSearch['aNameNonSearch'][$aSearchTerm['word_id']] = $aSearchTerm['word_id']; - $aSearch['iNamePhrase'] = $iPhrase; - if ($aSearch['iSearchRank'] < $this->iMaxRank) $aNewWordsetSearches[] = $aSearch; - } - } - } - } - else - { - // Allow skipping a word - but at EXTREAM cost - //$aSearch = $aCurrentSearch; - //$aSearch['iSearchRank']+=100; - //$aNewWordsetSearches[] = $aSearch; - } + if (!isset($aReverseGroupedSearches[$aSearch['iSearchRank']])) $aReverseGroupedSearches[$aSearch['iSearchRank']] = array(); + $aReverseGroupedSearches[$aSearch['iSearchRank']][] = $aSearch; } - // Sort and cut - usort($aNewWordsetSearches, 'bySearchRank'); - $aWordsetSearches = array_slice($aNewWordsetSearches, 0, 50); - } - //var_Dump('
',sizeof($aWordsetSearches)); exit; - - $aNewPhraseSearches = array_merge($aNewPhraseSearches, $aNewWordsetSearches); - usort($aNewPhraseSearches, 'bySearchRank'); - $aSearchHash = array(); - foreach($aNewPhraseSearches as $iSearch => $aSearch) - { - $sHash = serialize($aSearch); - if (isset($aSearchHash[$sHash])) unset($aNewPhraseSearches[$iSearch]); - else $aSearchHash[$sHash] = 1; } - - $aNewPhraseSearches = array_slice($aNewPhraseSearches, 0, 50); } - // Re-group the searches by their score, junk anything over 20 as just not worth trying - $aGroupedSearches = array(); - foreach($aNewPhraseSearches as $aSearch) - { - if ($aSearch['iSearchRank'] < $this->iMaxRank) - { - if (!isset($aGroupedSearches[$aSearch['iSearchRank']])) $aGroupedSearches[$aSearch['iSearchRank']] = array(); - $aGroupedSearches[$aSearch['iSearchRank']][] = $aSearch; - } - } + $aGroupedSearches = $aReverseGroupedSearches; ksort($aGroupedSearches); - - $iSearchCount = 0; - $aSearches = array(); - foreach($aGroupedSearches as $iScore => $aNewSearches) - { - $iSearchCount += sizeof($aNewSearches); - $aSearches = array_merge($aSearches, $aNewSearches); - if ($iSearchCount > 50) break; - } - - //if (CONST_Debug) _debugDumpGroupedSearches($aGroupedSearches, $aValidTokens); - } - } else { @@ -1054,29 +1089,6 @@ if (CONST_Debug) var_Dump($aGroupedSearches); - if ($this->bReverseInPlan) - { - $aCopyGroupedSearches = $aGroupedSearches; - foreach($aCopyGroupedSearches as $iGroup => $aSearches) - { - foreach($aSearches as $iSearch => $aSearch) - { - if (sizeof($aSearch['aAddress'])) - { - $iReverseItem = array_pop($aSearch['aAddress']); - if (isset($aPossibleMainWordIDs[$iReverseItem])) - { - $aSearch['aAddress'] = array_merge($aSearch['aAddress'], $aSearch['aName']); - $aSearch['aName'] = array($iReverseItem); - $aGroupedSearches[$iGroup][] = $aSearch; - } - //$aReverseSearch['aName'][$iReverseItem] = $iReverseItem; - //$aGroupedSearches[$iGroup][] = $aReverseSearch; - } - } - } - } - if (CONST_Search_TryDroppedAddressTerms && sizeof($aStructuredQuery) > 0) { $aCopyGroupedSearches = $aGroupedSearches; diff --git a/lib/lib.php b/lib/lib.php index a2e58c4b..43807d15 100644 --- a/lib/lib.php +++ b/lib/lib.php @@ -193,6 +193,26 @@ return $aResult; } + function getInverseWordSets($aWords, $iDepth) + { + $aResult = array(array(join(' ',$aWords))); + $sFirstToken = ''; + if ($iDepth < 8) + { + while(sizeof($aWords) > 1) + { + $sWord = array_pop($aWords); + $sFirstToken = $sWord.($sFirstToken?' ':'').$sFirstToken; + $aRest = getInverseWordSets($aWords, $iDepth+1); + foreach($aRest as $aSet) + { + $aResult[] = array_merge(array($sFirstToken),$aSet); + } + } + } + return $aResult; + } + function getTokensFromSets($aSets) { -- 2.45.2