]> git.openstreetmap.org Git - nominatim.git/blobdiff - lib/SearchDescription.php
don't trust words from word table to be sanatized
[nominatim.git] / lib / SearchDescription.php
index 1a994acd38207a08d1fab8e3a904f1ca081442b2..42e5af309854b76967cc3079e35f32c0ceedae76 100644 (file)
@@ -2,24 +2,7 @@
 
 namespace Nominatim;
 
-/**
- * Operators describing special searches.
- */
-abstract final class Operator
-{
-    /// No operator selected.
-    const NONE = 0;
-    /// Search for POI of the given type.
-    const TYPE = 1;
-    /// Search for POIs near the given place.
-    const NEAR = 2;
-    /// Search for POIS in the given place.
-    const IN = 3;
-    /// Search for POIS named as given.
-    const NAME = 4;
-    /// Search for postcodes.
-    const POSTCODE = 5;
-}
+require_once(CONST_BasePath.'/lib/SpecialSearchOperator.php');
 
 /**
  * Description of a single interpretation of a search query.
@@ -58,19 +41,23 @@ class SearchDescription
     /// Index of phrase currently processed
     private $iNamePhrase = -1;
 
+
     public function getRank()
     {
         return $this->iSearchRank;
     }
 
+    public function addToRank($iAddRank)
+    {
+        $this->iSearchRank += $iAddRank;
+        return $this->iSearchRank;
+    }
+
     public function getPostCode()
     {
         return $this->sPostcode;
     }
 
-    /**
-     * Set the geographic search radius.
-     */
     public function setNear(&$oNearPoint)
     {
         $this->oNearPoint = $oNearPoint;
@@ -83,29 +70,20 @@ class SearchDescription
         $this->sType = $sType;
     }
 
-    /**
-     * Check if name or address for the search are specified.
-     */
     public function isNamedSearch()
     {
         return sizeof($this->aName) > 0 || sizeof($this->aAddress) > 0;
     }
 
-    /**
-     * Check if only a country is requested.
-     */
     public function isCountrySearch()
     {
         return $this->sCountryCode && sizeof($this->aName) == 0
-               && !$this->iOperator && !$this->oNear;
+               && !$this->iOperator && !$this->oNearPoint;
     }
 
-    /**
-     * Check if a search near a geographic location is requested.
-     */
     public function isNearSearch()
     {
-        return (bool) $this->oNear;
+        return (bool) $this->oNearPoint;
     }
 
     public function isPoiSearch()
@@ -141,7 +119,7 @@ class SearchDescription
             return $sVar.' = \''.$this->sCountryCode."'";
         }
         if ($sCountryList) {
-            return $sVar.' in ('.$this->sCountryCode.')';
+            return $sVar.' in ('.$sCountryList.')';
         }
 
         return '';
@@ -152,13 +130,6 @@ class SearchDescription
         return $this->iOperator != Operator::NONE;
     }
 
-    /**
-     * Extract special terms from the query, amend the search
-     * and return the shortended query.
-     *
-     * Only the first special term found will be used but all will
-     * be removed from the query.
-     */
     public function extractKeyValuePairs($sQuery)
     {
         // Search for terms of kind [<key>=<value>].
@@ -179,12 +150,233 @@ class SearchDescription
         return $sQuery;
     }
 
+    public function isValidSearch(&$aCountryCodes)
+    {
+        if (!sizeof($this->aName)) {
+            if ($this->sHouseNumber) {
+                return false;
+            }
+        }
+        if ($aCountryCodes
+            && $this->sCountryCode
+            && !in_array($this->sCountryCode, $aCountryCodes)
+        ) {
+            return false;
+        }
+
+        return true;
+    }
+
+    /////////// Search building functions
+
+
+    public function extendWithFullTerm($aSearchTerm, $bWordInQuery, $bHasPartial, $sPhraseType, $bFirstToken, $bFirstPhrase, $bLastToken, &$iGlobalRank)
+    {
+        $aNewSearches = array();
+
+        if (($sPhraseType == '' || $sPhraseType == 'country')
+            && !empty($aSearchTerm['country_code'])
+            && $aSearchTerm['country_code'] != '0'
+        ) {
+            if (!$this->sCountryCode) {
+                $oSearch = clone $this;
+                $oSearch->iSearchRank++;
+                $oSearch->sCountryCode = $aSearchTerm['country_code'];
+                // Country is almost always at the end of the string
+                // - increase score for finding it anywhere else (optimisation)
+                if (!$bLastToken) {
+                    $oSearch->iSearchRank += 5;
+                }
+                $aNewSearches[] = $oSearch;
+
+                // If it is at the beginning, we can be almost sure that
+                // the terms are in the wrong order. Increase score for all searches.
+                if ($bFirstToken) {
+                    $iGlobalRank++;
+                }
+            }
+        } elseif (($sPhraseType == '' || $sPhraseType == 'postalcode')
+                  && $aSearchTerm['class'] == 'place' && $aSearchTerm['type'] == 'postcode'
+        ) {
+            // We need to try the case where the postal code is the primary element
+            // (i.e. no way to tell if it is (postalcode, city) OR (city, postalcode)
+            // so try both.
+            if (!$this->sPostcode && $bWordInQuery
+                && pg_escape_string($aSearchTerm['word']) == $aSearchTerm['word']
+            ) {
+                // If we have structured search or this is the first term,
+                // make the postcode the primary search element.
+                if ($this->iOperator == Operator::NONE
+                    && ($sPhraseType == 'postalcode' || $bFirstToken)
+                ) {
+                    $oSearch = clone $this;
+                    $oSearch->iSearchRank++;
+                    $oSearch->iOperator = Operator::POSTCODE;
+                    $oSearch->aAddress = array_merge($this->aAddress, $this->aName);
+                    $oSearch->aName =
+                        array($aSearchTerm['word_id'] => $aSearchTerm['word']);
+                    $aNewSearches[] = $oSearch;
+                }
+
+                // If we have a structured search or this is not the first term,
+                // add the postcode as an addendum.
+                if ($this->iOperator != Operator::POSTCODE
+                    && ($sPhraseType == 'postalcode' || sizeof($this->aName))
+                ) {
+                    $oSearch = clone $this;
+                    $oSearch->iSearchRank++;
+                    $oSearch->sPostcode = $aSearchTerm['word'];
+                    $aNewSearches[] = $oSearch;
+                }
+            }
+        } elseif (($sPhraseType == '' || $sPhraseType == 'street')
+                 && $aSearchTerm['class'] == 'place' && $aSearchTerm['type'] == 'house'
+        ) {
+            if (!$this->sHouseNumber && $this->iOperator != Operator::POSTCODE) {
+                $oSearch = clone $this;
+                $oSearch->iSearchRank++;
+                $oSearch->sHouseNumber = trim($aSearchTerm['word_token']);
+                // sanity check: if the housenumber is not mainly made
+                // up of numbers, add a penalty
+                if (preg_match_all("/[^0-9]/", $oSearch->sHouseNumber, $aMatches) > 2) {
+                    $oSearch->iSearchRank++;
+                }
+                if (!isset($aSearchTerm['word_id'])) {
+                    $oSearch->iSearchRank++;
+                }
+                // also must not appear in the middle of the address
+                if (sizeof($this->aAddress) || sizeof($this->aAddressNonSearch)) {
+                    $oSearch->iSearchRank++;
+                }
+                $aNewSearches[] = $oSearch;
+            }
+        } elseif ($sPhraseType == ''
+                  && $aSearchTerm['class'] !== '' && $aSearchTerm['class'] !== null
+        ) {
+            // require a normalized exact match of the term
+            // if we have the normalizer version of the query
+            // available
+            if ($this->iOperator == Operator::NONE
+                && (isset($aSearchTerm['word']) && $aSearchTerm['word'])
+                && $bWordInQuery
+            ) {
+                $oSearch = clone $this;
+                $oSearch->iSearchRank++;
+
+                $iOp = Operator::NEAR; // near == in for the moment
+                if ($aSearchTerm['operator'] == '') {
+                    if (sizeof($this->aName)) {
+                        $iOp = Operator::NAME;
+                    }
+                    $oSearch->iSearchRank += 2;
+                }
+
+                $oSearch->setPoiSearch($iOp, $aSearchTerm['class'], $aSearchTerm['type']);
+                $aNewSearches[] = $oSearch;
+            }
+        } elseif (isset($aSearchTerm['word_id']) && $aSearchTerm['word_id']) {
+            $iWordID = $aSearchTerm['word_id'];
+            if (sizeof($this->aName)) {
+                if (($sPhraseType == '' || !$bFirstPhrase)
+                    && $sPhraseType != 'country'
+                    && !$bHasPartial
+                ) {
+                    $oSearch = clone $this;
+                    $oSearch->iSearchRank++;
+                    $oSearch->aAddress[$iWordID] = $iWordID;
+                    $aNewSearches[] = $oSearch;
+                } else {
+                    $this->aFullNameAddress[$iWordID] = $iWordID;
+                }
+            } else {
+                $oSearch = clone $this;
+                $oSearch->iSearchRank++;
+                $oSearch->aName = array($iWordID => $iWordID);
+                $aNewSearches[] = $oSearch;
+            }
+        }
+
+        return $aNewSearches;
+    }
+
+    public function extendWithPartialTerm($aSearchTerm, $bStructuredPhrases, $iPhrase, &$aWordFrequencyScores, $aFullTokens)
+    {
+        // Only allow name terms.
+        if (!(isset($aSearchTerm['word_id']) && $aSearchTerm['word_id'])) {
+            return array();
+        }
+
+        $aNewSearches = array();
+        $iWordID = $aSearchTerm['word_id'];
+
+        if ((!$bStructuredPhrases || $iPhrase > 0)
+            && sizeof($this->aName)
+            && strpos($aSearchTerm['word_token'], ' ') === false
+        ) {
+            if ($aWordFrequencyScores[$iWordID] < CONST_Max_Word_Frequency) {
+                $oSearch = clone $this;
+                $oSearch->iSearchRank++;
+                $oSearch->aAddress[$iWordID] = $iWordID;
+                $aNewSearches[] = $oSearch;
+            } else {
+                $oSearch = clone $this;
+                $oSearch->iSearchRank++;
+                $oSearch->aAddressNonSearch[$iWordID] = $iWordID;
+                if (preg_match('#^[0-9]+$#', $aSearchTerm['word_token'])) {
+                    $oSearch->iSearchRank += 2;
+                }
+                if (sizeof($aFullTokens)) {
+                    $oSearch->iSearchRank++;
+                }
+                $aNewSearches[] = $oSearch;
+
+                // revert to the token version?
+                foreach ($aFullTokens as $aSearchTermToken) {
+                    if (empty($aSearchTermToken['country_code'])
+                        && empty($aSearchTermToken['lat'])
+                        && empty($aSearchTermToken['class'])
+                    ) {
+                        $oSearch = clone $this;
+                        $oSearch->iSearchRank++;
+                        $oSearch->aAddress[$aSearchTermToken['word_id']] = $aSearchTermToken['word_id'];
+                        $aNewSearches[] = $oSearch;
+                    }
+                }
+            }
+        }
+
+        if ((!$this->sPostcode && !$this->aAddress && !$this->aAddressNonSearch)
+            && (!sizeof($this->aName) || $this->iNamePhrase == $iPhrase)
+        ) {
+            $oSearch = clone $this;
+            $oSearch->iSearchRank++;
+            if (!sizeof($this->aName)) {
+                $oSearch->iSearchRank += 1;
+            }
+            if (preg_match('#^[0-9]+$#', $aSearchTerm['word_token'])) {
+                $oSearch->iSearchRank += 2;
+            }
+            if ($aWordFrequencyScores[$iWordID] < CONST_Max_Word_Frequency) {
+                $oSearch->aName[$iWordID] = $iWordID;
+            } else {
+                $oSearch->aNameNonSearch[$iWordID] = $iWordID;
+            }
+            $oSearch->iNamePhrase = $iPhrase;
+            $aNewSearches[] = $oSearch;
+        }
+
+        return $aNewSearches;
+    }
+
+    /////////// Query functions
+
+
     public function queryCountry(&$oDB, $sViewboxSQL)
     {
         $sSQL = 'SELECT place_id FROM placex ';
         $sSQL .= "WHERE country_code='".$this->sCountryCode."'";
         $sSQL .= ' AND rank_search = 4';
-        if ($ViewboxSQL) {
+        if ($sViewboxSQL) {
             $sSQL .= " AND ST_Intersects($sViewboxSQL, geometry)";
         }
         $sSQL .= " ORDER BY st_area(geometry) DESC LIMIT 1";
@@ -226,7 +418,7 @@ class SearchDescription
             }
             $sSQL .= " limit $iLimit";
             if (CONST_Debug) var_dump($sSQL);
-            return chksql($this->oDB->getCol($sSQL));
+            return chksql($oDB->getCol($sSQL));
         }
 
         if ($this->oNearPoint) {
@@ -240,7 +432,7 @@ class SearchDescription
             $sSQL .= ' ORDER BY '.$this->oNearPoint->distanceSQL('centroid')." ASC";
             $sSQL .= " LIMIT $iLimit";
             if (CONST_Debug) var_dump($sSQL);
-            return chksql($this->oDB->getCol($sSQL));
+            return chksql($oDB->getCol($sSQL));
         }
 
         return array();
@@ -248,7 +440,7 @@ class SearchDescription
 
     public function queryPostcode(&$oDB, $sCountryList, $iLimit)
     {
-        $sSQL  = 'SELECT p.place_id FROM location_postcode p ';
+        $sSQL = 'SELECT p.place_id FROM location_postcode p ';
 
         if (sizeof($this->aAddress)) {
             $sSQL .= ', search_name s ';
@@ -259,16 +451,16 @@ class SearchDescription
             $sSQL .= 'WHERE ';
         }
 
-        $sSQL .= "p.postcode = '".pg_escape_string(reset($this->$aName))."'";
+        $sSQL .= "p.postcode = '".reset($this->aName)."'";
         $sCountryTerm = $this->countryCodeSQL('p.country_code', $sCountryList);
         if ($sCountryTerm) {
-            $sSQL .= ' AND '.$sCountyTerm;
+            $sSQL .= ' AND '.$sCountryTerm;
         }
         $sSQL .= " LIMIT $iLimit";
 
         if (CONST_Debug) var_dump($sSQL);
 
-        return chksql($this->oDB->getCol($sSQL));
+        return chksql($oDB->getCol($sSQL));
     }
 
     public function queryNamedPlace(&$oDB, $aWordFrequencyScores, $sCountryList, $iMinAddressRank, $iMaxAddressRank, $sExcludeSQL, $sViewboxSmall, $sViewboxLarge, $iLimit)
@@ -318,7 +510,7 @@ class SearchDescription
             }
         }
 
-        $sCountryTerm = $this->countryCodeSQL('p.country_code', $sCountryList);
+        $sCountryTerm = $this->countryCodeSQL('country_code', $sCountryList);
         if ($sCountryTerm) {
             $aTerms[] = $sCountryTerm;
         }
@@ -346,11 +538,11 @@ class SearchDescription
         }
 
         if ($sExcludeSQL) {
-            $aTerms = 'place_id not in ('.$sExcludeSQL.')';
+            $aTerms[] = 'place_id not in ('.$sExcludeSQL.')';
         }
 
         if ($sViewboxSmall) {
-           $aTerms[] = 'centroid && '.$sViewboxSmall;
+            $aTerms[] = 'centroid && '.$sViewboxSmall;
         }
 
         if ($this->oNearPoint) {
@@ -397,7 +589,7 @@ class SearchDescription
             if (CONST_Debug) var_dump($sSQL);
 
             return chksql(
-                $this->oDB->getAll($sSQL),
+                $oDB->getAll($sSQL),
                 "Could not get places for search terms."
             );
         }
@@ -421,7 +613,7 @@ class SearchDescription
 
         if (CONST_Debug) var_dump($sSQL);
 
-        $aPlaceIDs = chksql($this->oDB->getCol($sSQL));
+        $aPlaceIDs = chksql($oDB->getCol($sSQL));
 
         if (sizeof($aPlaceIDs)) {
             return array('aPlaceIDs' => $aPlaceIDs, 'iHouseNumber' => -1);
@@ -446,14 +638,14 @@ class SearchDescription
             $sSQL .= $iHousenumber.">=startnumber and ";
             $sSQL .= $iHousenumber."<=endnumber";
 
-            if ($sExcludeSQL)) {
+            if ($sExcludeSQL) {
                 $sSQL .= ' AND place_id not in ('.$sExcludeSQL.')';
             }
             $sSQL .= " limit $iLimit";
 
             if (CONST_Debug) var_dump($sSQL);
 
-            $aPlaceIDs = chksql($this->oDB->getCol($sSQL, 0));
+            $aPlaceIDs = chksql($oDB->getCol($sSQL, 0));
 
             if (sizeof($aPlaceIDs)) {
                 return array('aPlaceIDs' => $aPlaceIDs, 'iHouseNumber' => $iHousenumber);
@@ -472,7 +664,7 @@ class SearchDescription
 
             if (CONST_Debug) var_dump($sSQL);
 
-            $aPlaceIDs = chksql($this->oDB->getCol($sSQL));
+            $aPlaceIDs = chksql($oDB->getCol($sSQL));
 
             if (sizeof($aPlaceIDs)) {
                 return array('aPlaceIDs' => $aPlaceIDs, 'iHouseNumber' => -1);
@@ -499,7 +691,7 @@ class SearchDescription
 
             if (CONST_Debug) var_dump($sSQL);
 
-            $aPlaceIDs = chksql($this->oDB->getCol($sSQL, 0));
+            $aPlaceIDs = chksql($oDB->getCol($sSQL, 0));
 
             if (sizeof($aPlaceIDs)) {
                 return array('aPlaceIDs' => $aPlaceIDs, 'iHouseNumber' => $iHousenumber);
@@ -528,18 +720,18 @@ class SearchDescription
 
             if (CONST_Debug) var_dump($sSQL);
 
-            $aClassPlaceIDs = chksql($this->oDB->getCol($sSQL));
+            $aClassPlaceIDs = chksql($oDB->getCol($sSQL));
         }
 
         // NEAR and IN are handled the same
         if ($this->iOperator == Operator::TYPE || $this->iOperator == Operator::NEAR) {
             $sClassTable = $this->poiTable();
             $sSQL = "SELECT count(*) FROM pg_tables WHERE tablename = '$sClassTable'";
-            $bCacheTable = (bool) chksql($this->oDB->getOne($sSQL));
+            $bCacheTable = (bool) chksql($oDB->getOne($sSQL));
 
             $sSQL = "SELECT min(rank_search) FROM placex WHERE place_id in ($sPlaceIDs)";
             if (CONST_Debug) var_dump($sSQL);
-            $iMaxRank = (int)chksql($this->oDB->getOne($sSQL));
+            $iMaxRank = (int)chksql($oDB->getOne($sSQL));
 
             // For state / country level searches the normal radius search doesn't work very well
             $sPlaceGeom = false;
@@ -552,7 +744,7 @@ class SearchDescription
                 $sSQL .= " ORDER BY rank_search ASC ";
                 $sSQL .= " LIMIT 1";
                 if (CONST_Debug) var_dump($sSQL);
-                $sPlaceGeom = chksql($this->oDB->getOne($sSQL));
+                $sPlaceGeom = chksql($oDB->getOne($sSQL));
             }
 
             if ($sPlaceGeom) {
@@ -562,7 +754,7 @@ class SearchDescription
                 $sSQL = 'SELECT place_id FROM placex';
                 $sSQL .= " WHERE place_id in ($sPlaceIDs) and rank_search < $iMaxRank";
                 if (CONST_Debug) var_dump($sSQL);
-                $aPlaceIDs = chksql($this->oDB->getCol($sSQL));
+                $aPlaceIDs = chksql($oDB->getCol($sSQL));
                 $sPlaceIDs = join(',', $aPlaceIDs);
             }
 
@@ -610,7 +802,7 @@ class SearchDescription
 
                     if (CONST_Debug) var_dump($sSQL);
 
-                    $aClassPlaceIDs = array_merge($aClassPlaceIDs, chksql($this->oDB->getCol($sSQL)));
+                    $aClassPlaceIDs = array_merge($aClassPlaceIDs, chksql($oDB->getCol($sSQL)));
                 } else {
                     if ($this->oNearPoint) {
                         $fRange = $this->oNearPoint->radius();
@@ -642,11 +834,58 @@ class SearchDescription
 
                     if (CONST_Debug) var_dump($sSQL);
 
-                    $aClassPlaceIDs = array_merge($aClassPlaceIDs, chksql($this->oDB->getCol($sSQL)));
+                    $aClassPlaceIDs = array_merge($aClassPlaceIDs, chksql($oDB->getCol($sSQL)));
                 }
             }
         }
 
         return $aClassPlaceIDs;
     }
-};
+
+
+    /////////// Sort functions
+
+
+    public static function bySearchRank($a, $b)
+    {
+        if ($a->iSearchRank == $b->iSearchRank) {
+            return $a->iOperator + strlen($a->sHouseNumber)
+                     - $b->iOperator - strlen($b->sHouseNumber);
+        }
+
+        return $a->iSearchRank < $b->iSearchRank ? -1 : 1;
+    }
+
+    //////////// Debugging functions
+
+
+    public function dumpAsHtmlTableRow(&$aWordIDs)
+    {
+        $kf = function ($k) use (&$aWordIDs) {
+            return $aWordIDs[$k];
+        };
+
+        echo "<tr>";
+        echo "<td>$this->iSearchRank</td>";
+        echo "<td>".join(', ', array_map($kf, $this->aName))."</td>";
+        echo "<td>".join(', ', array_map($kf, $this->aNameNonSearch))."</td>";
+        echo "<td>".join(', ', array_map($kf, $this->aAddress))."</td>";
+        echo "<td>".join(', ', array_map($kf, $this->aAddressNonSearch))."</td>";
+        echo "<td>".$this->sCountryCode."</td>";
+        echo "<td>".Operator::toString($this->iOperator)."</td>";
+        echo "<td>".$this->sClass."</td>";
+        echo "<td>".$this->sType."</td>";
+        echo "<td>".$this->sPostcode."</td>";
+        echo "<td>".$this->sHouseNumber."</td>";
+
+        if ($this->oNearPoint) {
+            echo "<td>".$this->oNearPoint->lat()."</td>";
+            echo "<td>".$this->oNearPoint->lon()."</td>";
+            echo "<td>".$this->oNearPoint->radius()."</td>";
+        } else {
+            echo "<td></td><td></td><td></td>";
+        }
+
+        echo "</tr>";
+    }
+}