]> git.openstreetmap.org Git - nominatim.git/commitdiff
Merge remote-tracking branch 'upstream/master'
authorSarah Hoffmann <lonvia@denofr.de>
Thu, 31 Jan 2013 19:57:31 +0000 (20:57 +0100)
committerSarah Hoffmann <lonvia@denofr.de>
Thu, 31 Jan 2013 19:57:31 +0000 (20:57 +0100)
lib/lib.php
lib/template/search-json.php
lib/template/search-jsonv2.php
lib/template/search-xml.php
website/search.php

index 2c335c49eaf6a83fdb3feea45d6775d2e67e6d0e..9d1120cbed9639c2aede011adbdef8a8e382b61a 100644 (file)
                        }
                }
                echo "<table border=\"1\">";
-               echo "<tr><th>rank</th><th>Name Tokens</th><th>Address Tokens</th><th>country</th><th>operator</th><th>class</th><th>type</th><th>house#</th><th>Lat</th><th>Lon</th><th>Radius</th></tr>";
+               echo "<tr><th>rank</th><th>Name Tokens</th><th>Name Not</th><th>Address Tokens</th><th>Address Not</th><th>country</th><th>operator</th><th>class</th><th>type</th><th>house#</th><th>Lat</th><th>Lon</th><th>Radius</th></tr>";
                foreach($aData as $iRank => $aRankedSet)
                {
                        foreach($aRankedSet as $aRow)
                                }
                                echo "</td>";
 
+                               echo "<td>";
+                               $sSep = '';
+                               foreach($aRow['aNameNonSearch'] as $iWordID)
+                               {
+                                       echo $sSep.'#'.$aWordsIDs[$iWordID].'#';
+                                       $sSep = ', ';
+                               }
+                               echo "</td>";
+
                                echo "<td>";
                                $sSep = '';
                                foreach($aRow['aAddress'] as $iWordID)
                                }
                                echo "</td>";
 
+                               echo "<td>";
+                               $sSep = '';
+                               foreach($aRow['aAddressNonSearch'] as $iWordID)
+                               {
+                                       echo $sSep.'#'.$aWordsIDs[$iWordID].'#';
+                                       $sSep = ', ';
+                               }
+                               echo "</td>";
+
                                echo "<td>".$aRow['sCountryCode']."</td>";
 
                                echo "<td>".$aRow['sOperator']."</td>";
index 542e37afdc39b6d9a54b32239e0064b47b9c6d03..57586fb9eee3c3adeb24003e225d64fb11480aaf 100644 (file)
@@ -41,6 +41,9 @@
 
                $aPlace['class'] = $aPointDetails['class'];
                $aPlace['type'] = $aPointDetails['type'];
+
+               $aPlace['importance'] = $aPointDetails['importance'];
+
                if (isset($aPointDetails['icon']) && $aPointDetails['icon'])
                {
                        $aPlace['icon'] = $aPointDetails['icon'];
index 1c2a04dd84028df86bfa3126ef3b2d647a7863a8..126f78662df64970a3acc6c120599637fdaf975d 100644 (file)
@@ -40,6 +40,9 @@
 
                $aPlace['category'] = $aPointDetails['class'];
                $aPlace['type'] = $aPointDetails['type'];
+
+               $aPlace['importance'] = $aPointDetails['importance'];
+
                if (isset($aPointDetails['icon']))
                {
                        $aPlace['icon'] = $aPointDetails['icon'];
index aa029d2bb142bcd9fc4b16660e512c4f63063c85..be05cb40c85b336b44bb241f9b80fac39a0c0c09 100644 (file)
@@ -81,6 +81,7 @@
 
                echo " class='".htmlspecialchars($aResult['class'])."'";
                echo " type='".htmlspecialchars($aResult['type'])."'";
+               echo " importance='".htmlspecialchars($aResult['importance'])."'";
                if (isset($aResult['icon']) && $aResult['icon'])
                {
                        echo " icon='".htmlspecialchars($aResult['icon'], ENT_QUOTES)."'";
index 77a3d135cba4cc6fafa05f807855397520e4372a..c3edbcd677554ef5e760bc221e1903607425850b 100755 (executable)
                        // Start with a blank search
                        $aSearches = array(
                                array('iSearchRank' => 0, 'iNamePhrase' => -1, 'sCountryCode' => false, 'aName'=>array(), 'aAddress'=>array(), 
+                                       'aNameNonSearch'=>array(), 'aAddressNonSearch'=>array(),
                                        'sOperator'=>'', 'aFeatureName' => array(), 'sClass'=>'', 'sType'=>'', 'sHouseNumber'=>'', 'fLat'=>'', 'fLon'=>'', 'fRadius'=>'')
                        );
 
                        // Check which tokens we have, get the ID numbers                       
                        $sSQL = 'select word_id,word_token, word, class, type, location, country_code, operator, search_name_count';
                        $sSQL .= ' from word where word_token in ('.join(',',array_map("getDBQuoted",$aTokens)).')';
-                       $sSQL .= ' and search_name_count < '.CONST_Max_Word_Frequency;
+//                     $sSQL .= ' and search_name_count < '.CONST_Max_Word_Frequency;
 //                     $sSQL .= ' group by word_token, word, class, type, location, country_code';
 
                        if (CONST_Debug) var_Dump($sSQL);
                                failInternalError("Could not get word tokens.", $sSQL, $aDatabaseWords);
                        }
                        $aPossibleMainWordIDs = array();
+                       $aWordFrequencyScores = array();
                        foreach($aDatabaseWords as $aToken)
                        {
+                               // Very special case - require 2 letter country param to match the country code found
+                               if ($bStructuredPhrases && $aToken['country_code'] && !empty($aStructuredQuery['country']) 
+                                  && strlen($aStructuredQuery['country']) == 2 && strtolower($aStructuredQuery['country']) != $aToken['country_code'])
+                               {
+                                       continue;
+                               }
+
                                if (isset($aValidTokens[$aToken['word_token']]))
                                {
                                        $aValidTokens[$aToken['word_token']][] = $aToken;
                                {
                                        $aValidTokens[$aToken['word_token']] = array($aToken);
                                }
-                               if ($aToken['word_token'][0]==' ' && !$aToken['class'] && !$aToken['country_code']) $aPossibleMainWordIDs[$aToken['word_id']] = 1 + $aToken['search_name_count'];
+                               if ($aToken['word_token'][0]==' ' && !$aToken['class'] && !$aToken['country_code']) $aPossibleMainWordIDs[$aToken['word_id']] = 1;
+                               $aWordFrequencyScores[$aToken['word_id']] = $aToken['search_name_count'] + 1;
                        }
                        if (CONST_Debug) var_Dump($aPhrases, $aValidTokens);
 
                                                                                {
                                                                                        if (sizeof($aSearch['aName']))
                                                                                        {
-                                                                                               if (($sPhraseType != 'street' && $sPhraseType != 'country') && (!isset($aValidTokens[$sToken]) || strlen($sToken) < 4 || strpos($sToken, ' ') !== false))
+                                                                                               if ((!$bStructuredPhrases || $iPhrase > 0) && $sPhraseType != 'country' && (!isset($aValidTokens[$sToken]) || strlen($sToken) < 4 || strpos($sToken, ' ') !== false))
                                                                                                {
                                                                                                        $aSearch['aAddress'][$aSearchTerm['word_id']] = $aSearchTerm['word_id'];
                                                                                                }
                                                                        {
                                                                                if (isset($aSearchTerm['word_id']) && $aSearchTerm['word_id'])
                                                                                {
-                                                                                       if (($sPhraseType != 'street') && sizeof($aCurrentSearch['aName']) && strlen($sToken) >= 4)
+                                                                                       if ((!$bStructuredPhrases || $iPhrase > 0) && sizeof($aCurrentSearch['aName']) && strlen($sToken) >= 4)
                                                                                        {
                                                                                                $aSearch = $aCurrentSearch;
                                                                                                $aSearch['iSearchRank'] += 1;
-                                                                                               $aSearch['aAddress'][$aSearchTerm['word_id']] = $aSearchTerm['word_id'];
-                                                                                               if ($aSearch['iSearchRank'] < $iMaxRank) $aNewWordsetSearches[] = $aSearch;
+                                                                                               if ($aWordFrequencyScores[$aSearchTerm['word_id']] < CONST_Max_Word_Frequency)
+                                                                                               {
+                                                                                                       $aSearch['aAddress'][$aSearchTerm['word_id']] = $aSearchTerm['word_id'];
+                                                                                                       if ($aSearch['iSearchRank'] < $iMaxRank) $aNewWordsetSearches[] = $aSearch;
+                                                                                               }
+                                                                                               elseif (isset($aValidTokens[' '.$sToken])) // revert to the token version?
+                                                                                               {
+                                                                                                       foreach($aValidTokens[' '.$sToken] as $aSearchTermToken)
+                                                                                                       {
+                                                                                                               if (empty($aSearchTermToken['country_code']) 
+                                                                                                                       && empty($aSearchTermToken['lat'])
+                                                                                                                       && empty($aSearchTermToken['class']))
+                                                                                                               {
+                                                                                                                       $aSearch = $aCurrentSearch;
+                                                                                                                       $aSearch['iSearchRank'] += 1;
+                                                                                                                       $aSearch['aAddress'][$aSearchTermToken['word_id']] = $aSearchTermToken['word_id'];
+                                                                                                                       if ($aSearch['iSearchRank'] < $iMaxRank) $aNewWordsetSearches[] = $aSearch;
+                                                                                                               }
+                                                                                                       }
+                                                                                               }
+                                                                                               else
+                                                                                               {
+                                                                                                       $aSearch['aAddressNonSearch'][$aSearchTerm['word_id']] = $aSearchTerm['word_id'];
+                                                                                                       if ($aSearch['iSearchRank'] < $iMaxRank) $aNewWordsetSearches[] = $aSearch;
+                                                                                               }
                                                                                        }
 
                                                                                        if (!sizeof($aCurrentSearch['aName']) || $aCurrentSearch['iNamePhrase'] == $iPhrase)
                                                                                                $aSearch = $aCurrentSearch;
                                                                                                $aSearch['iSearchRank'] += 2;
                                                                                                if (preg_match('#^[0-9]+$#', $sToken)) $aSearch['iSearchRank'] += 2;
-                                                                                               $aSearch['aName'][$aSearchTerm['word_id']] = $aSearchTerm['word_id'];
+                                                                                               if ($aWordFrequencyScores[$aSearchTerm['word_id']] < CONST_Max_Word_Frequency)
+                                                                                                       $aSearch['aName'][$aSearchTerm['word_id']] = $aSearchTerm['word_id'];
+                                                                                               else
+                                                                                                       $aSearch['aNameNonSearch'][$aSearchTerm['word_id']] = $aSearchTerm['word_id'];
                                                                                                $aSearch['iNamePhrase'] = $iPhrase;
                                                                                                if ($aSearch['iSearchRank'] < $iMaxRank) $aNewWordsetSearches[] = $aSearch;
                                                                                        }
                                                        // TODO: filter out the pointless search terms (2 letter name tokens and less)
                                                        // they might be right - but they are just too darned expensive to run
                                                        if (sizeof($aSearch['aName'])) $aTerms[] = "name_vector @> ARRAY[".join($aSearch['aName'],",")."]";
+                                                       if (sizeof($aSearch['aNameNonSearch'])) $aTerms[] = "array_cat(name_vector,ARRAY[]::integer[]) @> ARRAY[".join($aSearch['aNameNonSearch'],",")."]";
                                                        if (sizeof($aSearch['aAddress']) && $aSearch['aName'] != $aSearch['aAddress']) 
                                                        {
                                                                // For infrequent name terms disable index usage for address
                                                                if (CONST_Search_NameOnlySearchFrequencyThreshold && 
                                                                        sizeof($aSearch['aName']) == 1 && 
-                                                                       $aPossibleMainWordIDs[$aSearch['aName'][reset($aSearch['aName'])]] < CONST_Search_NameOnlySearchFrequencyThreshold)
+                                                                       $aWordFrequencyScores[$aSearch['aName'][reset($aSearch['aName'])]] < CONST_Search_NameOnlySearchFrequencyThreshold)
                                                                {
-                                                                       $aTerms[] = "array_cat(nameaddress_vector,ARRAY[]::integer[]) @> ARRAY[".join($aSearch['aAddress'],",")."]";
+                                                                       $aTerms[] = "array_cat(nameaddress_vector,ARRAY[]::integer[]) @> ARRAY[".join(array_merge($aSearch['aAddress'],$aSearch['aAddressNonSearch']),",")."]";
                                                                }
                                                                else
                                                                {
                                                                        $aTerms[] = "nameaddress_vector @> ARRAY[".join($aSearch['aAddress'],",")."]";
+                                                                       if (sizeof($aSearch['aAddressNonSearch'])) $aTerms[] = "array_cat(nameaddress_vector,ARRAY[]::integer[]) @> ARRAY[".join($aSearch['aAddressNonSearch'],",")."]";
                                                                }
                                                        }
                                                        if ($aSearch['sCountryCode']) $aTerms[] = "country_code = '".pg_escape_string($aSearch['sCountryCode'])."'";
 //var_Dump($aSearchResults);
 //exit;
        $aClassType = getClassTypesWithImportance();
-       $aRecheckWords = preg_split('/\b/',$sQuery);
+       $aRecheckWords = preg_split('/\b/u',$sQuery);
        foreach($aRecheckWords as $i => $sWord)
        {
                if (!$sWord) unset($aRecheckWords[$i]);
                if (sizeof($aSearchResults) >= $iFinalLimit) break;
        }
 
-       $sDataDate = $oDB->getOne("select TO_CHAR(lastimportdate - '1 day'::interval,'YYYY/MM/DD') from import_status limit 1");
+       $sDataDate = $oDB->getOne("select TO_CHAR(lastimportdate - '2 minutes'::interval,'YYYY/MM/DD HH24:MI')||' GMT' from import_status limit 1");
 
        if (isset($_GET['nearlat']) && isset($_GET['nearlon']))
        {