fix use of term count in partial terms

[nominatim.git] / lib / Geocode.php
diff --git a/lib/Geocode.php b/lib/Geocode.php

index 0c93e0a96e429945d4a795fe32179fcc573f0544..ed02848eac4c2c846ea1a655089caa75aca879cd 100644 (file)
--- a/lib/Geocode.php
+++ b/lib/Geocode.php
@@ -642,12 +642,6 @@ class Geocode
              $oValidTokens = new TokenList();
  
              if (!empty($aTokens)) {
-                $sSQL = 'SELECT word_id, word_token, word, class, type, country_code, operator, search_name_count';
-                $sSQL .= ' FROM word ';
-                $sSQL .= ' WHERE word_token in ('.join(',', $this->oDB->getDBQuotedList($aTokens)).')';
-
-                Debug::printSQL($sSQL);
-
                  $oValidTokens->addTokensFromDB(
                      $this->oDB,
                      $aTokens,
@@ -656,6 +650,8 @@ class Geocode
                      $this->oNormalizer
                  );
  
+                $oCtx->setFullNameWords($oValidTokens->getFullWordIDs());
+
                  // Try more interpretations for Tokens that could not be matched.
                  foreach ($aTokens as $sToken) {
                      if ($sToken[0] == ' ' && !$oValidTokens->contains($sToken)) {
@@ -923,21 +919,14 @@ class Geocode
                      $aResult['lon'],
                      $aResult['lat']
                  );
-                // Adjust importance for the number of exact string matches in the result
-                $iCountWords = 0;
-                $sAddress = $aResult['langaddress'];
-                foreach ($aRecheckWords as $i => $sWord) {
-                    if (stripos($sAddress, $sWord)!==false) {
-                        $iCountWords++;
-                        if (preg_match('/(^|,)\s*'.preg_quote($sWord, '/').'\s*(,|$)/', $sAddress)) $iCountWords += 0.1;
-                    }
-                }
-
-                $aResult['importance'] = $aResult['importance'] + ($iCountWords*0.1); // 0.1 is a completely arbitrary number but something in the range 0.1 to 0.5 would seem right
  
                  // secondary ordering (for results with same importance (the smaller the better):
                  // - approximate importance of address parts
-                $aResult['foundorder'] = -$aResult['addressimportance']/10;
+                if (isset($aResult['addressimportance']) && $aResult['addressimportance']) {
+                    $aResult['foundorder'] = -$aResult['addressimportance']/10;
+                } else {
+                    $aResult['foundorder'] = -$aResult['importance'];
+                }
                  // - number of exact matches from the query
                  $aResult['foundorder'] -= $aResults[$aResult['place_id']]->iExactMatches;
                  // - importance of the class/type
@@ -947,6 +936,21 @@ class Geocode
                  } else {
                      $aResult['foundorder'] += 0.01;
                  }
+                // - rank
+                $aResult['foundorder'] -= 0.00001 * (30 - $aResult['rank_search']);
+
+                // Adjust importance for the number of exact string matches in the result
+                $iCountWords = 0;
+                $sAddress = $aResult['langaddress'];
+                foreach ($aRecheckWords as $i => $sWord) {
+                    if (stripos($sAddress, $sWord)!==false) {
+                        $iCountWords++;
+                        if (preg_match('/(^|,)\s*'.preg_quote($sWord, '/').'\s*(,|$)/', $sAddress)) $iCountWords += 0.1;
+                    }
+                }
+
+                // 0.1 is a completely arbitrary number but something in the range 0.1 to 0.5 would seem right
+                $aResult['importance'] = $aResult['importance'] + ($iCountWords*0.1);
              }
              $aSearchResults[$iIdx] = $aResult;
          }