]> git.openstreetmap.org Git - nominatim.git/commitdiff
Merge remote-tracking branch 'upstream/master'
authorSarah Hoffmann <lonvia@denofr.de>
Mon, 19 Jul 2021 07:43:04 +0000 (09:43 +0200)
committerSarah Hoffmann <lonvia@denofr.de>
Mon, 19 Jul 2021 07:43:04 +0000 (09:43 +0200)
79 files changed:
.github/actions/build-nominatim/action.yml
.github/workflows/ci-tests.yml
SECURITY.md [new file with mode: 0644]
docs/admin/Installation.md
docs/develop/Development-Environment.md
lib-php/AddressDetails.php
lib-php/DB.php
lib-php/DatabaseError.php
lib-php/DebugHtml.php
lib-php/Geocode.php
lib-php/ParameterParser.php
lib-php/PlaceLookup.php
lib-php/ReverseGeocode.php
lib-php/SearchDescription.php
lib-php/SearchPosition.php [new file with mode: 0644]
lib-php/Shell.php
lib-php/TokenCountry.php
lib-php/TokenHousenumber.php
lib-php/TokenList.php
lib-php/TokenPartial.php [new file with mode: 0644]
lib-php/TokenPostcode.php
lib-php/TokenSpecialTerm.php
lib-php/TokenWord.php
lib-php/admin/export.php
lib-php/admin/update.php
lib-php/admin/warm.php
lib-php/cmd.php
lib-php/init-website.php
lib-php/lib.php
lib-php/log.php
lib-php/migration/PhraseSettingsToJson.php
lib-php/output.php
lib-php/template/address-geocodejson.php
lib-php/template/address-geojson.php
lib-php/template/address-json.php
lib-php/template/address-xml.php
lib-php/template/details-json.php
lib-php/template/error-json.php
lib-php/template/search-batch-json.php
lib-php/template/search-geocodejson.php
lib-php/template/search-geojson.php
lib-php/template/search-json.php
lib-php/template/search-xml.php
lib-php/tokenizer/legacy_icu_tokenizer.php
lib-php/tokenizer/legacy_tokenizer.php
lib-php/website/details.php
lib-php/website/lookup.php
lib-php/website/polygons.php
lib-php/website/search.php
nominatim/cli.py
nominatim/clicmd/api.py
nominatim/clicmd/args.py
nominatim/clicmd/refresh.py
nominatim/clicmd/replication.py
nominatim/config.py
nominatim/db/async_connection.py
nominatim/db/connection.py
nominatim/db/sql_preprocessor.py
nominatim/db/utils.py
nominatim/indexer/indexer.py
nominatim/indexer/progress.py
nominatim/indexer/runners.py
nominatim/tokenizer/icu_name_processor.py
nominatim/tokenizer/icu_variants.py
nominatim/tokenizer/legacy_icu_tokenizer.py
nominatim/tokenizer/legacy_tokenizer.py
nominatim/tools/check_database.py
nominatim/tools/database_import.py
nominatim/tools/exec_utils.py
nominatim/tools/freeze.py
nominatim/tools/migration.py
nominatim/tools/postcodes.py
nominatim/tools/refresh.py
nominatim/tools/special_phrases/sp_importer.py
nominatim/tools/special_phrases/sp_wiki_loader.py
nominatim/tools/special_phrases/special_phrase.py
vagrant/Install-on-Centos-7.sh
vagrant/Install-on-Centos-8.sh
vagrant/Install-on-Ubuntu-18.sh

index a391561636e1478536e5dd080baad0e7faa5ddcd..757decd4f63c7379033ac956acb38cf6c12b0cad 100644 (file)
@@ -14,9 +14,9 @@ runs:
           run: |
             sudo apt-get install -y -qq libboost-system-dev libboost-filesystem-dev libexpat1-dev zlib1g-dev libbz2-dev libpq-dev libproj-dev libicu-dev
             if [ "x$UBUNTUVER" == "x18" ]; then
-                pip3 install python-dotenv psycopg2==2.7.7 jinja2==2.8 psutil==5.4.2 pyicu osmium
+                pip3 install python-dotenv psycopg2==2.7.7 jinja2==2.8 psutil==5.4.2 pyicu osmium PyYAML==5.1 datrie
             else
-                sudo apt-get install -y -qq python3-icu python3-datrie python3-pyosmium python3-jinja2 python3-psutil python3-psycopg2 python3-dotenv
+                sudo apt-get install -y -qq python3-icu python3-datrie python3-pyosmium python3-jinja2 python3-psutil python3-psycopg2 python3-dotenv python3-yaml
             fi
           shell: bash
           env:
index 8f0ea80db89b214ba2bf989a9854534d86123eae..cea270914bf6a8760f1e4a3962a839a5028ea035 100644 (file)
@@ -4,16 +4,22 @@ on: [ push, pull_request ]
 
 jobs:
     tests:
-        runs-on: ubuntu-20.04
-
         strategy:
             matrix:
-                postgresql: [9.5, 13]
+                ubuntu: [18, 20]
                 include:
-                    - postgresql: 9.5
+                    - ubuntu: 18
+                      postgresql: 9.5
                       postgis: 2.5
-                    - postgresql: 13
+                      pytest: pytest
+                      php: 7.2
+                    - ubuntu: 20
+                      postgresql: 13
                       postgis: 3
+                      pytest: py.test-3
+                      php: 7.4
+
+        runs-on: ubuntu-${{ matrix.ubuntu }}.04
 
         steps:
             - uses: actions/checkout@v2
@@ -24,9 +30,15 @@ jobs:
             - name: Setup PHP
               uses: shivammathur/setup-php@v2
               with:
-                  php-version: '7.4'
+                  php-version: ${{ matrix.php }}
+                  coverage: xdebug
                   tools: phpunit, phpcs, composer
 
+            - uses: actions/setup-python@v2
+              with:
+                python-version: 3.6
+              if: matrix.ubuntu == 18
+
             - name: Get Date
               id: get-date
               run: |
@@ -43,33 +55,53 @@ jobs:
               with:
                   postgresql-version: ${{ matrix.postgresql }}
                   postgis-version: ${{ matrix.postgis }}
+
             - uses: ./Nominatim/.github/actions/build-nominatim
+              with:
+                  ubuntu: ${{ matrix.ubuntu }}
 
             - name: Install test prerequsites
-              run: sudo apt-get install -y -qq php-codesniffer pylint python3-pytest python3-behave python3-pytest-cov php-codecoverage php-xdebug
+              run: sudo apt-get install -y -qq pylint python3-pytest python3-behave python3-pytest-cov php-codecoverage
+              if: matrix.ubuntu == 20
+
+            - name: Install test prerequsites
+              run: |
+                   pip3 install pylint==2.6.0 pytest pytest-cov behave==1.2.6
+              if: matrix.ubuntu == 18
 
             - name: PHP linting
               run: phpcs --report-width=120 .
               working-directory: Nominatim
 
             - name: Python linting
-              run: pylint --extension-pkg-whitelist=osmium nominatim
+              run: pylint nominatim
               working-directory: Nominatim
 
             - name: PHP unit tests
               run: phpunit --coverage-clover ../../coverage-php.xml ./
               working-directory: Nominatim/test/php
+              if: matrix.ubuntu == 20
 
             - name: Python unit tests
-              run: py.test-3 --cov=nominatim --cov-report=xml test/python
+              run: $PYTEST --cov=nominatim --cov-report=xml test/python
               working-directory: Nominatim
+              env:
+                PYTEST: ${{ matrix.pytest }}
 
             - name: BDD tests
               run: |
+                  mkdir cov
                   behave -DREMOVE_TEMPLATE=1 -DBUILDDIR=$GITHUB_WORKSPACE/build --format=progress3 -DPHPCOV=./cov
                   composer require phpunit/phpcov:7.0.2
                   vendor/bin/phpcov merge --clover ../../coverage-bdd.xml ./cov
               working-directory: Nominatim/test/bdd
+              if: matrix.ubuntu == 20
+
+            - name: BDD tests
+              run: |
+                  behave -DREMOVE_TEMPLATE=1 -DBUILDDIR=$GITHUB_WORKSPACE/build --format=progress3
+              working-directory: Nominatim/test/bdd
+              if: matrix.ubuntu == 18
 
             - name: BDD tests (legacy_icu tokenizer)
               run: |
@@ -85,6 +117,7 @@ jobs:
                 fail_ci_if_error: false
                 path_to_write_report: ./coverage/codecov_report.txt
                 verbose: true
+              if: matrix.ubuntu == 20
 
     import:
         strategy:
diff --git a/SECURITY.md b/SECURITY.md
new file mode 100644 (file)
index 0000000..41a6f2e
--- /dev/null
@@ -0,0 +1,39 @@
+# Security Policy
+
+## Supported Versions
+
+All Nominatim releases receive security updates for two years.
+
+The following table lists the end of support for all currently supported
+versions.
+
+| Version | End of support for security updates |
+| ------- | ----------------------------------- |
+| 3.7.x   | 2023-04-05                          |
+| 3.6.x   | 2022-12-12                          |
+| 3.5.x   | 2022-06-05                          |
+| 3.4.x   | 2021-10-24                          |
+
+## Reporting a Vulnerability
+
+If you believe, you have found an issue in Nominatim that has implications on
+security, please send a description of the issue to **security@nominatim.org**.
+You will receive an acknowledgement of your mail within 3 work days where we
+also notify you of the next steps.
+
+## How we Disclose Security Issues
+
+** The following section only applies to security issues found in released
+versions. Issues that concern the master development branch only will be
+fixed immediately on the branch with the corresponding PR containing the
+description of the nature and severity of the issue. **
+
+Patches for identified security issues are applied to all affected versions and
+new minor versions are released. At the same time we release a statement at
+the [Nominatim blog](https://nominatim.org/blog/) describing the nature of the
+incident. Announcements will also be published at the
+[geocoding mailinglist](https://lists.openstreetmap.org/listinfo/geocoding).
+
+## List of Previous Incidents
+
+* 2020-05-04 - [SQL injection issue on /details endpoint](https://lists.openstreetmap.org/pipermail/geocoding/2020-May/002012.html)
index cc1edf590abdfa041ceffedd9bce05c7c5d87809..76af39c6e0a0b102547dedeb23343e57aa7c42ee 100644 (file)
@@ -45,6 +45,7 @@ For running Nominatim:
   * [psutil](https://github.com/giampaolo/psutil)
   * [Jinja2](https://palletsprojects.com/p/jinja/)
   * [PyICU](https://pypi.org/project/PyICU/)
+  * [PyYaml](https://pyyaml.org/) (5.1+)
   * [datrie](https://github.com/pytries/datrie)
   * [PHP](https://php.net) (7.0 or later)
   * PHP-pgsql
index 43598b9a7c638ba2177783f2b962ec45fc4c8627..eea69c706e95aa3d6195bbe171409b70d2c153ac 100644 (file)
@@ -29,7 +29,7 @@ The Nominatim test suite consists of behavioural tests (using behave) and
 unit tests (using PHPUnit for PHP code and pytest for Python code).
 It has the following additional requirements:
 
-* [behave test framework](https://behave.readthedocs.io) >= 1.2.5
+* [behave test framework](https://behave.readthedocs.io) >= 1.2.6
 * [phpunit](https://phpunit.de) >= 7.3
 * [PHP CodeSniffer](https://github.com/squizlabs/PHP_CodeSniffer)
 * [Pylint](https://pylint.org/) (2.6.0 is used for the CI)
index bf8defc230a9c66b0a1f52ec2049e7522a7b7491..91e3d89fcfac21acf1120aea9aa58d2cf3d1d4a7 100644 (file)
@@ -61,7 +61,7 @@ class AddressDetails
         return join(', ', $aParts);
     }
 
-    public function getAddressNames($sCountry = null)
+    public function getAddressNames()
     {
         $aAddress = array();
 
@@ -79,12 +79,11 @@ class AddressDetails
                 $sName = $aLine['housenumber'];
             }
 
-            if (isset($sName)) {
-                if (!isset($aAddress[$sTypeLabel])
-                    || $aLine['class'] == 'place'
-                ) {
-                    $aAddress[$sTypeLabel] = $sName;
-                }
+            if (isset($sName)
+                && (!isset($aAddress[$sTypeLabel])
+                    || $aLine['class'] == 'place')
+            ) {
+                $aAddress[$sTypeLabel] = $sName;
             }
         }
 
index abd23179526ef230b429698764fd29abc4e7762f..03ee6f1b8b6c6fc056b3e1a94604578b0f6fc2ed 100644 (file)
@@ -39,7 +39,9 @@ class DB
         $conn->exec("SET DateStyle TO 'sql,european'");
         $conn->exec("SET client_encoding TO 'utf-8'");
         $iMaxExecution = ini_get('max_execution_time');
-        if ($iMaxExecution > 0) $conn->setAttribute(\PDO::ATTR_TIMEOUT, $iMaxExecution); // seconds
+        if ($iMaxExecution > 0) {
+            $conn->setAttribute(\PDO::ATTR_TIMEOUT, $iMaxExecution); // seconds
+        }
 
         $this->connection = $conn;
         return true;
@@ -95,7 +97,9 @@ class DB
         try {
             $stmt = $this->getQueryStatement($sSQL, $aInputVars, $sErrMessage);
             $row = $stmt->fetch(\PDO::FETCH_NUM);
-            if ($row === false) return false;
+            if ($row === false) {
+                return false;
+            }
         } catch (\PDOException $e) {
             throw new \Nominatim\DatabaseError($sErrMessage, 500, null, $e, $sSQL);
         }
@@ -306,9 +310,13 @@ class DB
         if (preg_match('/^pgsql:(.+)$/', $sDSN, $aMatches)) {
             foreach (explode(';', $aMatches[1]) as $sKeyVal) {
                 list($sKey, $sVal) = explode('=', $sKeyVal, 2);
-                if ($sKey == 'host') $sKey = 'hostspec';
-                if ($sKey == 'dbname') $sKey = 'database';
-                if ($sKey == 'user') $sKey = 'username';
+                if ($sKey == 'host') {
+                    $sKey = 'hostspec';
+                } elseif ($sKey == 'dbname') {
+                    $sKey = 'database';
+                } elseif ($sKey == 'user') {
+                    $sKey = 'username';
+                }
                 $aInfo[$sKey] = $sVal;
             }
         }
index 3a53bc8ffd6f3fc4eed698d343da731a484e21d3..ec428de1951f8be8bb8976c5fae578892b6cddb4 100644 (file)
@@ -5,7 +5,7 @@ namespace Nominatim;
 class DatabaseError extends \Exception
 {
 
-    public function __construct($message, $code = 500, Exception $previous = null, $oPDOErr, $sSql = null)
+    public function __construct($message, $code, $previous, $oPDOErr, $sSql = null)
     {
         parent::__construct($message, $code, $previous);
         // https://secure.php.net/manual/en/class.pdoexception.php
index 98da8794055c644e23fbb90ffdbbc6d3d5b45f8e..19221b435b2f14cdaca2459eabe8ea483f804f8e 100644 (file)
@@ -78,7 +78,7 @@ class Debug
         echo '<th>Address Tokens</th><th>Address Not</th>';
         echo '<th>country</th><th>operator</th>';
         echo '<th>class</th><th>type</th><th>postcode</th><th>housenumber</th></tr>';
-        foreach ($aSearches as $iRank => $aRankedSet) {
+        foreach ($aSearches as $aRankedSet) {
             foreach ($aRankedSet as $aRow) {
                 $aRow->dumpAsHtmlTableRow($aWordsIDs);
             }
index a3883b2509fdc3477881f1f06b87ff9dfb9509bd..52b92c9928770f3baac9d4b162d90f9d1c904748 100644 (file)
@@ -7,6 +7,7 @@ require_once(CONST_LibDir.'/Phrase.php');
 require_once(CONST_LibDir.'/ReverseGeocode.php');
 require_once(CONST_LibDir.'/SearchDescription.php');
 require_once(CONST_LibDir.'/SearchContext.php');
+require_once(CONST_LibDir.'/SearchPosition.php');
 require_once(CONST_LibDir.'/TokenList.php');
 require_once(CONST_TokenizerDir.'/tokenizer.php');
 
@@ -70,7 +71,9 @@ class Geocode
             $aParams['exclude_place_ids'] = implode(',', $this->aExcludePlaceIDs);
         }
 
-        if ($this->bBoundedSearch) $aParams['bounded'] = '1';
+        if ($this->bBoundedSearch) {
+            $aParams['bounded'] = '1';
+        }
 
         if ($this->aCountryCodes) {
             $aParams['countrycodes'] = implode(',', $this->aCountryCodes);
@@ -85,8 +88,11 @@ class Geocode
 
     public function setLimit($iLimit = 10)
     {
-        if ($iLimit > 50) $iLimit = 50;
-        if ($iLimit < 1) $iLimit = 1;
+        if ($iLimit > 50) {
+            $iLimit = 50;
+        } elseif ($iLimit < 1) {
+            $iLimit = 1;
+        }
 
         $this->iFinalLimit = $iLimit;
         $this->iLimit = $iLimit + min($iLimit, 10);
@@ -181,18 +187,24 @@ class Geocode
         if ($sExcluded) {
             foreach ($sExcluded as $iExcludedPlaceID) {
                 $iExcludedPlaceID = (int)$iExcludedPlaceID;
-                if ($iExcludedPlaceID)
+                if ($iExcludedPlaceID) {
                     $aExcludePlaceIDs[$iExcludedPlaceID] = $iExcludedPlaceID;
+                }
             }
 
-            if (isset($aExcludePlaceIDs))
+            if (isset($aExcludePlaceIDs)) {
                 $this->aExcludePlaceIDs = $aExcludePlaceIDs;
+            }
         }
 
         // Only certain ranks of feature
         $sFeatureType = $oParams->getString('featureType');
-        if (!$sFeatureType) $sFeatureType = $oParams->getString('featuretype');
-        if ($sFeatureType) $this->setFeatureType($sFeatureType);
+        if (!$sFeatureType) {
+            $sFeatureType = $oParams->getString('featuretype');
+        }
+        if ($sFeatureType) {
+            $this->setFeatureType($sFeatureType);
+        }
 
         // Country code list
         $sCountries = $oParams->getStringList('countrycodes');
@@ -202,8 +214,9 @@ class Geocode
                     $aCountries[] = strtolower($sCountryCode);
                 }
             }
-            if (isset($aCountries))
+            if (isset($aCountries)) {
                 $this->aCountryCodes = $aCountries;
+            }
         }
 
         $aViewbox = $oParams->getStringList('viewboxlbrt');
@@ -255,13 +268,17 @@ class Geocode
     public function loadStructuredAddressElement($sValue, $sKey, $iNewMinAddressRank, $iNewMaxAddressRank, $aItemListValues)
     {
         $sValue = trim($sValue);
-        if (!$sValue) return false;
+        if (!$sValue) {
+            return false;
+        }
         $this->aStructuredQuery[$sKey] = $sValue;
         if ($this->iMinAddressRank == 0 && $this->iMaxAddressRank == 30) {
             $this->iMinAddressRank = $iNewMinAddressRank;
             $this->iMaxAddressRank = $iNewMaxAddressRank;
         }
-        if ($aItemListValues) $this->aAddressRankList = array_merge($this->aAddressRankList, $aItemListValues);
+        if ($aItemListValues) {
+            $this->aAddressRankList = array_merge($this->aAddressRankList, $aItemListValues);
+        }
         return true;
     }
 
@@ -295,11 +312,11 @@ class Geocode
 
     public function fallbackStructuredQuery()
     {
-        if (!$this->aStructuredQuery) return false;
-
         $aParams = $this->aStructuredQuery;
 
-        if (count($aParams) == 1) return false;
+        if (!$aParams || count($aParams) == 1) {
+            return false;
+        }
 
         $aOrderToFallback = array('postalcode', 'street', 'city', 'county', 'state');
 
@@ -329,50 +346,26 @@ class Geocode
          */
         foreach ($aPhrases as $iPhrase => $oPhrase) {
             $aNewPhraseSearches = array();
-            $sPhraseType = $oPhrase->getPhraseType();
+            $oPosition = new SearchPosition(
+                $oPhrase->getPhraseType(),
+                $iPhrase,
+                count($aPhrases)
+            );
 
             foreach ($oPhrase->getWordSets() as $aWordset) {
                 $aWordsetSearches = $aSearches;
 
                 // Add all words from this wordset
                 foreach ($aWordset as $iToken => $sToken) {
-                    //echo "<br><b>$sToken</b>";
                     $aNewWordsetSearches = array();
+                    $oPosition->setTokenPosition($iToken, count($aWordset));
 
                     foreach ($aWordsetSearches as $oCurrentSearch) {
-                        //echo "<i>";
-                        //var_dump($oCurrentSearch);
-                        //echo "</i>";
-
-                        // Tokens with full name matches.
-                        foreach ($oValidTokens->get(' '.$sToken) as $oSearchTerm) {
-                            $aNewSearches = $oCurrentSearch->extendWithFullTerm(
-                                $oSearchTerm,
-                                $sPhraseType,
-                                $iToken == 0 && $iPhrase == 0,
-                                $iPhrase == 0,
-                                $iToken + 1 == count($aWordset)
-                                  && $iPhrase + 1 == count($aPhrases)
-                            );
-
-                            foreach ($aNewSearches as $oSearch) {
-                                if ($oSearch->getRank() < $this->iMaxRank) {
-                                    $aNewWordsetSearches[] = $oSearch;
-                                }
-                            }
-                        }
-                        // Look for partial matches.
-                        // Note that there is no point in adding country terms here
-                        // because country is omitted in the address.
-                        if ($sPhraseType != 'country') {
-                            // Allow searching for a word - but at extra cost
-                            foreach ($oValidTokens->get($sToken) as $oSearchTerm) {
-                                $aNewSearches = $oCurrentSearch->extendWithPartialTerm(
-                                    $sToken,
-                                    $oSearchTerm,
-                                    (bool) $sPhraseType,
-                                    $iPhrase,
-                                    $oValidTokens->get(' '.$sToken)
+                        foreach ($oValidTokens->get($sToken) as $oSearchTerm) {
+                            if ($oSearchTerm->isExtendable($oCurrentSearch, $oPosition)) {
+                                $aNewSearches = $oSearchTerm->extendSearch(
+                                    $oCurrentSearch,
+                                    $oPosition
                                 );
 
                                 foreach ($aNewSearches as $oSearch) {
@@ -387,7 +380,6 @@ class Geocode
                     usort($aNewWordsetSearches, array('Nominatim\SearchDescription', 'bySearchRank'));
                     $aWordsetSearches = array_slice($aNewWordsetSearches, 0, 50);
                 }
-                //var_Dump('<hr>',count($aWordsetSearches)); exit;
 
                 $aNewPhraseSearches = array_merge($aNewPhraseSearches, $aNewWordsetSearches);
                 usort($aNewPhraseSearches, array('Nominatim\SearchDescription', 'bySearchRank'));
@@ -395,8 +387,11 @@ class Geocode
                 $aSearchHash = array();
                 foreach ($aNewPhraseSearches as $iSearch => $aSearch) {
                     $sHash = serialize($aSearch);
-                    if (isset($aSearchHash[$sHash])) unset($aNewPhraseSearches[$iSearch]);
-                    else $aSearchHash[$sHash] = 1;
+                    if (isset($aSearchHash[$sHash])) {
+                        unset($aNewPhraseSearches[$iSearch]);
+                    } else {
+                        $aSearchHash[$sHash] = 1;
+                    }
                 }
 
                 $aNewPhraseSearches = array_slice($aNewPhraseSearches, 0, 50);
@@ -417,10 +412,12 @@ class Geocode
 
             $iSearchCount = 0;
             $aSearches = array();
-            foreach ($aGroupedSearches as $iScore => $aNewSearches) {
+            foreach ($aGroupedSearches as $aNewSearches) {
                 $iSearchCount += count($aNewSearches);
                 $aSearches = array_merge($aSearches, $aNewSearches);
-                if ($iSearchCount > 50) break;
+                if ($iSearchCount > 50) {
+                    break;
+                }
             }
         }
 
@@ -477,7 +474,9 @@ class Geocode
     public function lookup()
     {
         Debug::newFunction('Geocode::lookup');
-        if (!$this->sQuery && !$this->aStructuredQuery) return array();
+        if (!$this->sQuery && !$this->aStructuredQuery) {
+            return array();
+        }
 
         Debug::printDebugArray('Geocode', $this);
 
@@ -503,10 +502,6 @@ class Geocode
 
         Debug::newSection('Query Preprocessing');
 
-        $sLanguagePrefArraySQL = $this->oDB->getArraySQL(
-            $this->oDB->getDBQuotedList($this->aLangPrefOrder)
-        );
-
         $sQuery = $this->sQuery;
         if (!preg_match('//u', $sQuery)) {
             userError('Query string is not UTF-8 encoded.');
@@ -560,15 +555,15 @@ class Geocode
 
                 if (!empty($aTokens)) {
                     $aNewSearches = array();
+                    $oPosition = new SearchPosition('', 0, 1);
+                    $oPosition->setTokenPosition(0, 1);
+
                     foreach ($aSearches as $oSearch) {
                         foreach ($aTokens as $oToken) {
-                            $oNewSearch = clone $oSearch;
-                            $oNewSearch->setPoiSearch(
-                                $oToken->iOperator,
-                                $oToken->sClass,
-                                $oToken->sType
+                            $aNewSearches = array_merge(
+                                $aNewSearches,
+                                $oToken->extendSearch($oSearch, $oPosition)
                             );
-                            $aNewSearches[] = $oNewSearch;
                         }
                     }
                     $aSearches = $aNewSearches;
@@ -639,7 +634,9 @@ class Geocode
                 $aGroupedSearches = array();
                 foreach ($aSearches as $aSearch) {
                     if ($aSearch->getRank() < $this->iMaxRank) {
-                        if (!isset($aGroupedSearches[$aSearch->getRank()])) $aGroupedSearches[$aSearch->getRank()] = array();
+                        if (!isset($aGroupedSearches[$aSearch->getRank()])) {
+                            $aGroupedSearches[$aSearch->getRank()] = array();
+                        }
                         $aGroupedSearches[$aSearch->getRank()][] = $aSearch;
                     }
                 }
@@ -653,7 +650,9 @@ class Geocode
                     $sHash = serialize($aSearch);
                     if (isset($aSearchHash[$sHash])) {
                         unset($aGroupedSearches[$iGroup][$iSearch]);
-                        if (empty($aGroupedSearches[$iGroup])) unset($aGroupedSearches[$iGroup]);
+                        if (empty($aGroupedSearches[$iGroup])) {
+                            unset($aGroupedSearches[$iGroup]);
+                        }
                     } else {
                         $aSearchHash[$sHash] = 1;
                     }
@@ -697,7 +696,9 @@ class Geocode
                         }
                     }
 
-                    if ($iQueryLoop > 20) break;
+                    if ($iQueryLoop > 20) {
+                        break;
+                    }
                 }
 
                 if (!empty($aResults)) {
@@ -772,9 +773,9 @@ class Geocode
                     $aResults = $tempIDs;
                 }
 
-                if (!empty($aResults)) break;
-                if ($iGroupLoop > 4) break;
-                if ($iQueryLoop > 30) break;
+                if (!empty($aResults) || $iGroupLoop > 4 || $iQueryLoop > 30) {
+                    break;
+                }
             }
         } else {
             // Just interpret as a reverse geocode
@@ -792,10 +793,8 @@ class Geocode
 
         // No results? Done
         if (empty($aResults)) {
-            if ($this->bFallback) {
-                if ($this->fallbackStructuredQuery()) {
-                    return $this->lookup();
-                }
+            if ($this->bFallback && $this->fallbackStructuredQuery()) {
+                return $this->lookup();
             }
 
             return array();
@@ -814,7 +813,9 @@ class Geocode
 
         $aRecheckWords = preg_split('/\b[\s,\\-]*/u', $sQuery);
         foreach ($aRecheckWords as $i => $sWord) {
-            if (!preg_match('/[\pL\pN]/', $sWord)) unset($aRecheckWords[$i]);
+            if (!preg_match('/[\pL\pN]/', $sWord)) {
+                unset($aRecheckWords[$i]);
+            }
         }
 
         Debug::printVar('Recheck words', $aRecheckWords);
@@ -874,7 +875,9 @@ class Geocode
                 foreach ($aRecheckWords as $i => $sWord) {
                     if (stripos($sAddress, $sWord)!==false) {
                         $iCountWords++;
-                        if (preg_match('/(^|,)\s*'.preg_quote($sWord, '/').'\s*(,|$)/', $sAddress)) $iCountWords += 0.1;
+                        if (preg_match('/(^|,)\s*'.preg_quote($sWord, '/').'\s*(,|$)/', $sAddress)) {
+                            $iCountWords += 0.1;
+                        }
                     }
                 }
 
@@ -891,15 +894,8 @@ class Geocode
         $aToFilter = $aSearchResults;
         $aSearchResults = array();
 
-        $bFirst = true;
         foreach ($aToFilter as $aResult) {
             $this->aExcludePlaceIDs[$aResult['place_id']] = $aResult['place_id'];
-            if ($bFirst) {
-                $fLat = $aResult['lat'];
-                $fLon = $aResult['lon'];
-                if (isset($aResult['zoom'])) $iZoom = $aResult['zoom'];
-                $bFirst = false;
-            }
             if (!$this->oPlaceLookup->doDeDupe() || (!isset($aOSMIDDone[$aResult['osm_type'].$aResult['osm_id']])
                 && !isset($aClassTypeNameDone[$aResult['osm_type'].$aResult['class'].$aResult['type'].$aResult['name'].$aResult['admin_level']]))
             ) {
@@ -909,7 +905,9 @@ class Geocode
             }
 
             // Absolute limit on number of results
-            if (count($aSearchResults) >= $this->iFinalLimit) break;
+            if (count($aSearchResults) >= $this->iFinalLimit) {
+                break;
+            }
         }
 
         Debug::printVar('Post-filter results', $aSearchResults);
index 32a848b93011fce3edaa1975ad6450369b121725..d4068aa37c2c938458c5062585d23393b050658a 100644 (file)
@@ -90,14 +90,16 @@ class ParameterParser
         $aLanguages = array();
         $sLangString = $this->getString('accept-language', $sFallback);
 
-        if ($sLangString) {
-            if (preg_match_all('/(([a-z]{1,8})([-_][a-z]{1,8})?)\s*(;\s*q\s*=\s*(1|0\.[0-9]+))?/i', $sLangString, $aLanguagesParse, PREG_SET_ORDER)) {
-                foreach ($aLanguagesParse as $iLang => $aLanguage) {
-                    $aLanguages[$aLanguage[1]] = isset($aLanguage[5])?(float)$aLanguage[5]:1 - ($iLang/100);
-                    if (!isset($aLanguages[$aLanguage[2]])) $aLanguages[$aLanguage[2]] = $aLanguages[$aLanguage[1]]/10;
+        if ($sLangString
+            && preg_match_all('/(([a-z]{1,8})([-_][a-z]{1,8})?)\s*(;\s*q\s*=\s*(1|0\.[0-9]+))?/i', $sLangString, $aLanguagesParse, PREG_SET_ORDER)
+        ) {
+            foreach ($aLanguagesParse as $iLang => $aLanguage) {
+                $aLanguages[$aLanguage[1]] = isset($aLanguage[5])?(float)$aLanguage[5]:1 - ($iLang/100);
+                if (!isset($aLanguages[$aLanguage[2]])) {
+                    $aLanguages[$aLanguage[2]] = $aLanguages[$aLanguage[1]]/10;
                 }
-                arsort($aLanguages);
             }
+            arsort($aLanguages);
         }
         if (empty($aLanguages) && CONST_Default_Language) {
             $aLanguages[CONST_Default_Language] = 1;
index b9fa3b1c08c72b1ef200a426eb75f178dc709523..7e78d536eab8cc53b3788ff5264b2c9f812d663f 100644 (file)
@@ -89,20 +89,36 @@ class PlaceLookup
     {
         $aParams = array();
 
-        if ($this->bAddressDetails) $aParams['addressdetails'] = '1';
-        if ($this->bExtraTags) $aParams['extratags'] = '1';
-        if ($this->bNameDetails) $aParams['namedetails'] = '1';
+        if ($this->bAddressDetails) {
+            $aParams['addressdetails'] = '1';
+        }
+        if ($this->bExtraTags) {
+            $aParams['extratags'] = '1';
+        }
+        if ($this->bNameDetails) {
+            $aParams['namedetails'] = '1';
+        }
 
-        if ($this->bIncludePolygonAsText) $aParams['polygon_text'] = '1';
-        if ($this->bIncludePolygonAsGeoJSON) $aParams['polygon_geojson'] = '1';
-        if ($this->bIncludePolygonAsKML) $aParams['polygon_kml'] = '1';
-        if ($this->bIncludePolygonAsSVG) $aParams['polygon_svg'] = '1';
+        if ($this->bIncludePolygonAsText) {
+            $aParams['polygon_text'] = '1';
+        }
+        if ($this->bIncludePolygonAsGeoJSON) {
+            $aParams['polygon_geojson'] = '1';
+        }
+        if ($this->bIncludePolygonAsKML) {
+            $aParams['polygon_kml'] = '1';
+        }
+        if ($this->bIncludePolygonAsSVG) {
+            $aParams['polygon_svg'] = '1';
+        }
 
         if ($this->fPolygonSimplificationThreshold > 0.0) {
             $aParams['polygon_threshold'] = $this->fPolygonSimplificationThreshold;
         }
 
-        if (!$this->bDeDupe) $aParams['dedupe'] = '0';
+        if (!$this->bDeDupe) {
+            $aParams['dedupe'] = '0';
+        }
 
         return $aParams;
     }
@@ -147,8 +163,9 @@ class PlaceLookup
 
     private function langAddressSql($sHousenumber)
     {
-        if ($this->bAddressDetails)
+        if ($this->bAddressDetails) {
             return ''; // langaddress will be computed from address details
+        }
 
         return 'get_address_by_language(place_id,'.$sHousenumber.','.$this->aLangPrefOrderSql.') AS langaddress,';
     }
@@ -234,12 +251,20 @@ class PlaceLookup
             $sSQL .= '     housenumber,';
             $sSQL .= '     country_code, ';
             $sSQL .= '     importance, ';
-            if (!$this->bDeDupe) $sSQL .= 'place_id,';
-            if (!$this->bAddressDetails) $sSQL .= 'langaddress, ';
+            if (!$this->bDeDupe) {
+                $sSQL .= 'place_id,';
+            }
+            if (!$this->bAddressDetails) {
+                $sSQL .= 'langaddress, ';
+            }
             $sSQL .= '     placename, ';
             $sSQL .= '     ref, ';
-            if ($this->bExtraTags) $sSQL .= 'extratags, ';
-            if ($this->bNameDetails) $sSQL .= 'name, ';
+            if ($this->bExtraTags) {
+                $sSQL .= 'extratags, ';
+            }
+            if ($this->bNameDetails) {
+                $sSQL .= 'name, ';
+            }
             $sSQL .= '     extra_place ';
 
             $aSubSelects[] = $sSQL;
@@ -260,8 +285,12 @@ class PlaceLookup
             $sSQL .= $this->langAddressSql('-1');
             $sSQL .= '  postcode as placename,';
             $sSQL .= '  postcode as ref,';
-            if ($this->bExtraTags) $sSQL .= 'null::text AS extra,';
-            if ($this->bNameDetails) $sSQL .= 'null::text AS names,';
+            if ($this->bExtraTags) {
+                $sSQL .= 'null::text AS extra,';
+            }
+            if ($this->bNameDetails) {
+                $sSQL .= 'null::text AS names,';
+            }
             $sSQL .= '  ST_x(geometry) AS lon, ST_y(geometry) AS lat,';
             $sSQL .= '  (0.75-(rank_search::float/40)) AS importance, ';
             $sSQL .= $this->addressImportanceSql('geometry', 'lp.parent_place_id');
@@ -298,8 +327,12 @@ class PlaceLookup
                     $sSQL .= $this->langAddressSql('housenumber_for_place');
                     $sSQL .= '     null::text AS placename, ';
                     $sSQL .= '     null::text AS ref, ';
-                    if ($this->bExtraTags) $sSQL .= 'null::text AS extra,';
-                    if ($this->bNameDetails) $sSQL .= 'null::text AS names,';
+                    if ($this->bExtraTags) {
+                        $sSQL .= 'null::text AS extra,';
+                    }
+                    if ($this->bNameDetails) {
+                        $sSQL .= 'null::text AS names,';
+                    }
                     $sSQL .= '     st_x(centroid) AS lon, ';
                     $sSQL .= '     st_y(centroid) AS lat,';
                     $sSQL .= '     -1.15 AS importance, ';
@@ -344,8 +377,12 @@ class PlaceLookup
                 $sSQL .= $this->langAddressSql('housenumber_for_place');
                 $sSQL .= '  null::text AS placename, ';
                 $sSQL .= '  null::text AS ref, ';
-                if ($this->bExtraTags) $sSQL .= 'null::text AS extra, ';
-                if ($this->bNameDetails) $sSQL .= 'null::text AS names, ';
+                if ($this->bExtraTags) {
+                    $sSQL .= 'null::text AS extra, ';
+                }
+                if ($this->bNameDetails) {
+                    $sSQL .= 'null::text AS names, ';
+                }
                 $sSQL .= '  st_x(centroid) AS lon, ';
                 $sSQL .= '  st_y(centroid) AS lat, ';
                 // slightly smaller than the importance for normal houses
@@ -448,7 +485,9 @@ class PlaceLookup
     {
 
         $aOutlineResult = array();
-        if (!$iPlaceID) return $aOutlineResult;
+        if (!$iPlaceID) {
+            return $aOutlineResult;
+        }
 
         // Get the bounding box and outline polygon
         $sSQL = 'select place_id,0 as numfeatures,st_area(geometry) as area,';
@@ -460,10 +499,18 @@ class PlaceLookup
         }
         $sSQL .= ' ST_YMin(geometry) as minlat,ST_YMax(geometry) as maxlat,';
         $sSQL .= ' ST_XMin(geometry) as minlon,ST_XMax(geometry) as maxlon';
-        if ($this->bIncludePolygonAsGeoJSON) $sSQL .= ',ST_AsGeoJSON(geometry) as asgeojson';
-        if ($this->bIncludePolygonAsKML) $sSQL .= ',ST_AsKML(geometry) as askml';
-        if ($this->bIncludePolygonAsSVG) $sSQL .= ',ST_AsSVG(geometry) as assvg';
-        if ($this->bIncludePolygonAsText) $sSQL .= ',ST_AsText(geometry) as astext';
+        if ($this->bIncludePolygonAsGeoJSON) {
+            $sSQL .= ',ST_AsGeoJSON(geometry) as asgeojson';
+        }
+        if ($this->bIncludePolygonAsKML) {
+            $sSQL .= ',ST_AsKML(geometry) as askml';
+        }
+        if ($this->bIncludePolygonAsSVG) {
+            $sSQL .= ',ST_AsSVG(geometry) as assvg';
+        }
+        if ($this->bIncludePolygonAsText) {
+            $sSQL .= ',ST_AsText(geometry) as astext';
+        }
         if ($fLonReverse != null && $fLatReverse != null) {
             $sFrom = ' from (SELECT * , CASE WHEN (class = \'highway\') AND (ST_GeometryType(geometry) = \'ST_LineString\') THEN ';
             $sFrom .=' ST_ClosestPoint(geometry, ST_SetSRID(ST_Point('.$fLatReverse.','.$fLonReverse.'),4326))';
@@ -486,10 +533,18 @@ class PlaceLookup
                 $aOutlineResult['lon'] = $aPointPolygon['centrelon'];
             }
 
-            if ($this->bIncludePolygonAsGeoJSON) $aOutlineResult['asgeojson'] = $aPointPolygon['asgeojson'];
-            if ($this->bIncludePolygonAsKML) $aOutlineResult['askml'] = $aPointPolygon['askml'];
-            if ($this->bIncludePolygonAsSVG) $aOutlineResult['assvg'] = $aPointPolygon['assvg'];
-            if ($this->bIncludePolygonAsText) $aOutlineResult['astext'] = $aPointPolygon['astext'];
+            if ($this->bIncludePolygonAsGeoJSON) {
+                $aOutlineResult['asgeojson'] = $aPointPolygon['asgeojson'];
+            }
+            if ($this->bIncludePolygonAsKML) {
+                $aOutlineResult['askml'] = $aPointPolygon['askml'];
+            }
+            if ($this->bIncludePolygonAsSVG) {
+                $aOutlineResult['assvg'] = $aPointPolygon['assvg'];
+            }
+            if ($this->bIncludePolygonAsText) {
+                $aOutlineResult['astext'] = $aPointPolygon['astext'];
+            }
 
             if (abs($aPointPolygon['minlat'] - $aPointPolygon['maxlat']) < 0.0000001) {
                 $aPointPolygon['minlat'] = $aPointPolygon['minlat'] - $fRadius;
index cf396b7ab8fa0352f8554111dac87dd942950f52..47e931ef2837bf725f39846fe7c43b5a6b33a1ba 100644 (file)
@@ -74,8 +74,6 @@ class ReverseGeocode
 
     protected function lookupLargeArea($sPointSQL, $iMaxRank)
     {
-        $oResult = null;
-
         if ($iMaxRank > 4) {
             $aPlace = $this->lookupPolygon($sPointSQL, $iMaxRank);
             if ($aPlace) {
@@ -167,9 +165,13 @@ class ReverseGeocode
     {
         Debug::newFunction('lookupPolygon');
         // polygon search begins at suburb-level
-        if ($iMaxRank > 25) $iMaxRank = 25;
+        if ($iMaxRank > 25) {
+            $iMaxRank = 25;
+        }
         // no polygon search over country-level
-        if ($iMaxRank < 5) $iMaxRank = 5;
+        if ($iMaxRank < 5) {
+            $iMaxRank = 5;
+        }
         // search for polygon
         $sSQL = 'SELECT place_id, parent_place_id, rank_address, rank_search FROM';
         $sSQL .= '(select place_id, parent_place_id, rank_address, rank_search, country_code, geometry';
@@ -190,7 +192,6 @@ class ReverseGeocode
 
         if ($aPoly) {
         // if a polygon is found, search for placenodes begins ...
-            $iParentPlaceID = $aPoly['parent_place_id'];
             $iRankAddress = $aPoly['rank_address'];
             $iRankSearch = $aPoly['rank_search'];
             $iPlaceID = $aPoly['place_id'];
@@ -242,26 +243,24 @@ class ReverseGeocode
     public function lookupPoint($sPointSQL, $bDoInterpolation = true)
     {
         Debug::newFunction('lookupPoint');
-        // starts if the search is on POI or street level,
-        // searches for the nearest POI or street,
-        // if a street is found and a POI is searched for,
-        // the nearest POI which the found street is a parent of is choosen.
-        $iMaxRank = $this->iMaxRank;
-
         // Find the nearest point
         $fSearchDiam = 0.006;
         $oResult = null;
         $aPlace = null;
 
         // for POI or street level
-        if ($iMaxRank >= 26) {
+        if ($this->iMaxRank >= 26) {
+            // starts if the search is on POI or street level,
+            // searches for the nearest POI or street,
+            // if a street is found and a POI is searched for,
+            // the nearest POI which the found street is a parent of is choosen.
             $sSQL = 'select place_id,parent_place_id,rank_address,country_code,';
             $sSQL .= ' ST_distance('.$sPointSQL.', geometry) as distance';
             $sSQL .= ' FROM ';
             $sSQL .= ' placex';
             $sSQL .= '   WHERE ST_DWithin('.$sPointSQL.', geometry, '.$fSearchDiam.')';
             $sSQL .= '   AND';
-            $sSQL .= ' rank_address between 26 and '.$iMaxRank;
+            $sSQL .= ' rank_address between 26 and '.$this->iMaxRank;
             $sSQL .= ' and (name is not null or housenumber is not null';
             $sSQL .= ' or rank_address between 26 and 27)';
             $sSQL .= ' and (rank_address between 26 and 27';
@@ -284,7 +283,7 @@ class ReverseGeocode
 
             if ($aPlace) {
                 // if street and maxrank > streetlevel
-                if ($iRankAddress <= 27 && $iMaxRank > 27) {
+                if ($iRankAddress <= 27 && $this->iMaxRank > 27) {
                     // find the closest object (up to a certain radius) of which the street is a parent of
                     $sSQL = ' select place_id,';
                     $sSQL .= ' ST_distance('.$sPointSQL.', geometry) as distance';
@@ -338,7 +337,7 @@ class ReverseGeocode
                 }
             }
 
-            if ($bDoInterpolation && $iMaxRank >= 30) {
+            if ($bDoInterpolation && $this->iMaxRank >= 30) {
                 $fDistance = $fSearchDiam;
                 if ($aPlace) {
                     // We can't reliably go from the closest street to an
@@ -356,7 +355,6 @@ class ReverseGeocode
                     $oResult = new Result($aHouse['place_id'], Result::TABLE_OSMLINE);
                     $oResult->iHouseNumber = closestHouseNumber($aHouse);
                     $aPlace = $aHouse;
-                    $iRankAddress = 30;
                 }
             }
 
@@ -366,7 +364,7 @@ class ReverseGeocode
             }
         } else {
             // lower than street level ($iMaxRank < 26 )
-            $oResult = $this->lookupLargeArea($sPointSQL, $iMaxRank);
+            $oResult = $this->lookupLargeArea($sPointSQL, $this->iMaxRank);
         }
 
         Debug::printVar('Final result', $oResult);
index 3c572f2fc872e1e41a47580b80d156834870b964..4d944bfb1ee498835c2d3178b2ea04d2439ab75d 100644 (file)
@@ -67,35 +67,6 @@ class SearchDescription
         return $this->iSearchRank;
     }
 
-    /**
-     * Make this search a POI search.
-     *
-     * In a POI search, objects are not (only) searched by their name
-     * but also by the primary OSM key/value pair (class and type in Nominatim).
-     *
-     * @param integer $iOperator Type of POI search
-     * @param string  $sClass    Class (or OSM tag key) of POI.
-     * @param string  $sType     Type (or OSM tag value) of POI.
-     *
-     * @return void
-     */
-    public function setPoiSearch($iOperator, $sClass, $sType)
-    {
-        $this->iOperator = $iOperator;
-        $this->sClass = $sClass;
-        $this->sType = $sType;
-    }
-
-    /**
-     * Check if any operator is set.
-     *
-     * @return bool True, if this is a special search operation.
-     */
-    public function hasOperator()
-    {
-        return $this->iOperator != Operator::NONE;
-    }
-
     /**
      * Extract key/value pairs from a query.
      *
@@ -148,253 +119,234 @@ class SearchDescription
 
     /////////// Search building functions
 
-
     /**
-     * Derive new searches by adding a full term to the existing search.
+     * Create a copy of this search description adding to search rank.
      *
-     * @param object $oSearchTerm  Description of the token.
-     * @param string $sPhraseType  Type of phrase the token is contained in.
-     * @param bool   $bFirstToken  True if the token is at the beginning of the
-     *                             query.
-     * @param bool   $bFirstPhrase True if the token is in the first phrase of
-     *                             the query.
-     * @param bool   $bLastToken   True if the token is at the end of the query.
+     * @param integer $iTermCost  Cost to add to the current search rank.
      *
-     * @return SearchDescription[] List of derived search descriptions.
+     * @return object Cloned search description.
      */
-    public function extendWithFullTerm($oSearchTerm, $sPhraseType, $bFirstToken, $bFirstPhrase, $bLastToken)
+    public function clone($iTermCost)
     {
-        $aNewSearches = array();
+        $oSearch = clone $this;
+        $oSearch->iSearchRank += $iTermCost;
 
-        if (($sPhraseType == '' || $sPhraseType == 'country')
-            && is_a($oSearchTerm, '\Nominatim\Token\Country')
-        ) {
-            if (!$this->sCountryCode) {
-                $oSearch = clone $this;
-                $oSearch->iSearchRank++;
-                $oSearch->sCountryCode = $oSearchTerm->sCountryCode;
-                // Country is almost always at the end of the string
-                // - increase score for finding it anywhere else (optimisation)
-                if (!$bLastToken) {
-                    $oSearch->iSearchRank += 5;
-                    $oSearch->iNamePhrase = -1;
-                }
-                $aNewSearches[] = $oSearch;
-            }
-        } elseif (($sPhraseType == '' || $sPhraseType == 'postalcode')
-                  && is_a($oSearchTerm, '\Nominatim\Token\Postcode')
-        ) {
-            if (!$this->sPostcode) {
-                // If we have structured search or this is the first term,
-                // make the postcode the primary search element.
-                if ($this->iOperator == Operator::NONE && $bFirstToken) {
-                    $oSearch = clone $this;
-                    $oSearch->iSearchRank++;
-                    $oSearch->iOperator = Operator::POSTCODE;
-                    $oSearch->aAddress = array_merge($this->aAddress, $this->aName);
-                    $oSearch->aName =
-                        array($oSearchTerm->iId => $oSearchTerm->sPostcode);
-                    $aNewSearches[] = $oSearch;
-                }
+        return $oSearch;
+    }
 
-                // If we have a structured search or this is not the first term,
-                // add the postcode as an addendum.
-                if ($this->iOperator != Operator::POSTCODE
-                    && ($sPhraseType == 'postalcode' || !empty($this->aName))
-                ) {
-                    $oSearch = clone $this;
-                    $oSearch->iSearchRank++;
-                    $oSearch->iNamePhrase = -1;
-                    if (strlen($oSearchTerm->sPostcode) < 4) {
-                        $oSearch->iSearchRank += 4 - strlen($oSearchTerm->sPostcode);
-                    }
-                    $oSearch->sPostcode = $oSearchTerm->sPostcode;
-                    $aNewSearches[] = $oSearch;
-                }
-            }
-        } elseif (($sPhraseType == '' || $sPhraseType == 'street')
-                 && is_a($oSearchTerm, '\Nominatim\Token\HouseNumber')
-        ) {
-            if (!$this->sHouseNumber && $this->iOperator != Operator::POSTCODE) {
-                // sanity check: if the housenumber is not mainly made
-                // up of numbers, add a penalty
-                $iSearchCost = 1;
-                if (preg_match('/\\d/', $oSearchTerm->sToken) === 0
-                    || preg_match_all('/[^0-9]/', $oSearchTerm->sToken, $aMatches) > 2) {
-                    $iSearchCost++;
-                }
-                if ($this->iOperator != Operator::NONE) {
-                    $iSearchCost++;
-                }
-                if (empty($oSearchTerm->iId)) {
-                    $iSearchCost++;
-                }
-                // also must not appear in the middle of the address
-                if (!empty($this->aAddress)
-                    || (!empty($this->aAddressNonSearch))
-                    || $this->sPostcode
-                ) {
-                    $iSearchCost++;
-                }
+    /**
+     * Check if the search currently includes a name.
+     *
+     * @param bool bIncludeNonNames  If true stop-word tokens are taken into
+     *                               account, too.
+     *
+     * @return bool True, if search has a name.
+     */
+    public function hasName($bIncludeNonNames = false)
+    {
+        return !empty($this->aName)
+               || (!empty($this->aNameNonSearch) && $bIncludeNonNames);
+    }
 
-                $oSearch = clone $this;
-                $oSearch->iSearchRank += $iSearchCost;
-                $oSearch->iNamePhrase = -1;
-                $oSearch->sHouseNumber = $oSearchTerm->sToken;
-                $aNewSearches[] = $oSearch;
-
-                // Housenumbers may appear in the name when the place has its own
-                // address terms.
-                if ($oSearchTerm->iId !== null
-                    && ($this->iNamePhrase >= 0 || empty($this->aName))
-                    && empty($this->aAddress)
-                   ) {
-                    $oSearch = clone $this;
-                    $oSearch->iSearchRank += $iSearchCost;
-                    $oSearch->aAddress = $this->aName;
-                    $oSearch->bRareName = false;
-                    $oSearch->aName = array($oSearchTerm->iId => $oSearchTerm->iId);
-                    $aNewSearches[] = $oSearch;
-                }
-            }
-        } elseif ($sPhraseType == ''
-                  && is_a($oSearchTerm, '\Nominatim\Token\SpecialTerm')
-        ) {
-            if ($this->iOperator == Operator::NONE) {
-                $oSearch = clone $this;
-                $oSearch->iSearchRank += 2;
-                $oSearch->iNamePhrase = -1;
-
-                $iOp = $oSearchTerm->iOperator;
-                if ($iOp == Operator::NONE) {
-                    if (!empty($this->aName) || $this->oContext->isBoundedSearch()) {
-                        $iOp = Operator::NAME;
-                    } else {
-                        $iOp = Operator::NEAR;
-                    }
-                    $oSearch->iSearchRank += 2;
-                } elseif (!$bFirstToken && !$bLastToken) {
-                    $oSearch->iSearchRank += 2;
-                }
-                if ($this->sHouseNumber) {
-                    $oSearch->iSearchRank++;
-                }
+    /**
+     * Check if the search currently includes an address term.
+     *
+     * @return bool True, if any address term is included, including stop-word
+     *              terms.
+     */
+    public function hasAddress()
+    {
+        return !empty($this->aAddress) || !empty($this->aAddressNonSearch);
+    }
 
-                $oSearch->setPoiSearch(
-                    $iOp,
-                    $oSearchTerm->sClass,
-                    $oSearchTerm->sType
-                );
-                $aNewSearches[] = $oSearch;
-            }
-        } elseif ($sPhraseType != 'country'
-                  && is_a($oSearchTerm, '\Nominatim\Token\Word')
-        ) {
-            $iWordID = $oSearchTerm->iId;
-            // Full words can only be a name if they appear at the beginning
-            // of the phrase. In structured search the name must forcably in
-            // the first phrase. In unstructured search it may be in a later
-            // phrase when the first phrase is a house number.
-            if (!empty($this->aName) || !($bFirstPhrase || $sPhraseType == '')) {
-                if (($sPhraseType == '' || !$bFirstPhrase) && $oSearchTerm->iTermCount > 1) {
-                    $oSearch = clone $this;
-                    $oSearch->iNamePhrase = -1;
-                    $oSearch->iSearchRank += 1;
-                    $oSearch->aAddress[$iWordID] = $iWordID;
-                    $aNewSearches[] = $oSearch;
-                }
-            } elseif (empty($this->aNameNonSearch)) {
-                $oSearch = clone $this;
-                $oSearch->iSearchRank++;
-                $oSearch->aName = array($iWordID => $iWordID);
-                if (CONST_Search_NameOnlySearchFrequencyThreshold) {
-                    $oSearch->bRareName =
-                        $oSearchTerm->iSearchNameCount
-                          < CONST_Search_NameOnlySearchFrequencyThreshold;
-                }
-                $aNewSearches[] = $oSearch;
-            }
-        }
+    /**
+     * Check if a country restriction is currently included in the search.
+     *
+     * @return bool True, if a country restriction is set.
+     */
+    public function hasCountry()
+    {
+        return $this->sCountryCode !== '';
+    }
 
-        return $aNewSearches;
+    /**
+     * Check if a postcode is currently included in the search.
+     *
+     * @return bool True, if a postcode is set.
+     */
+    public function hasPostcode()
+    {
+        return $this->sPostcode !== '';
     }
 
     /**
-     * Derive new searches by adding a partial term to the existing search.
+     * Check if a house number is set for the search.
      *
-     * @param string  $sToken             Term for the token.
-     * @param object  $oSearchTerm        Description of the token.
-     * @param bool    $bStructuredPhrases True if the search is structured.
-     * @param integer $iPhrase            Number of the phrase the token is in.
-     * @param array[] $aFullTokens        List of full term tokens with the
-     *                                    same name.
+     * @return bool True, if a house number is set.
+     */
+    public function hasHousenumber()
+    {
+        return $this->sHouseNumber !== '';
+    }
+
+    /**
+     * Check if a special type of place is requested.
      *
-     * @return SearchDescription[] List of derived search descriptions.
+     * param integer iOperator  When set, check for the particular
+     *                          operator used for the special type.
+     *
+     * @return bool True, if speial type is requested or, if requested,
+     *              a special type with the given operator.
      */
-    public function extendWithPartialTerm($sToken, $oSearchTerm, $bStructuredPhrases, $iPhrase, $aFullTokens)
+    public function hasOperator($iOperator = null)
     {
-        // Only allow name terms.
-        if (!(is_a($oSearchTerm, '\Nominatim\Token\Word'))
-            || strpos($sToken, ' ') !== false
-        ) {
-            return array();
+        return $iOperator === null ? $this->iOperator != Operator::NONE : $this->iOperator == $iOperator;
+    }
+
+    /**
+     * Add the given token to the list of terms to search for in the address.
+     *
+     * @param integer iID       ID of term to add.
+     * @param bool bSearchable  Term should be used to search for result
+     *                          (i.e. term is not a stop word).
+     */
+    public function addAddressToken($iId, $bSearchable = true)
+    {
+        if ($bSearchable) {
+            $this->aAddress[$iId] = $iId;
+        } else {
+            $this->aAddressNonSearch[$iId] = $iId;
         }
+    }
 
-        $aNewSearches = array();
-        $iWordID = $oSearchTerm->iId;
+    /**
+     * Add the given full-word token to the list of terms to search for in the
+     * name.
+     *
+     * @param interger iId    ID of term to add.
+     * @param bool bRareName  True if the term is infrequent enough to not
+     *                        require other constraints for efficient search.
+     */
+    public function addNameToken($iId, $bRareName)
+    {
+        $this->aName[$iId] = $iId;
+        $this->bRareName = $bRareName;
+    }
 
-        if ((!$bStructuredPhrases || $iPhrase > 0)
-            && (!empty($this->aName))
-        ) {
-            $oSearch = clone $this;
-            $oSearch->iSearchRank++;
-            if (preg_match('#^[0-9 ]+$#', $sToken)) {
-                $oSearch->iSearchRank++;
-            }
-            if ($oSearchTerm->iSearchNameCount < CONST_Max_Word_Frequency) {
-                $oSearch->aAddress[$iWordID] = $iWordID;
-            } else {
-                $oSearch->aAddressNonSearch[$iWordID] = $iWordID;
-                if (!empty($aFullTokens)) {
-                    $oSearch->iSearchRank++;
-                }
-            }
-            $aNewSearches[] = $oSearch;
+    /**
+     * Add the given partial token to the list of terms to search for in
+     * the name.
+     *
+     * @param integer iID            ID of term to add.
+     * @param bool bSearchable       Term should be used to search for result
+     *                               (i.e. term is not a stop word).
+     * @param integer iPhraseNumber  Index of phrase, where the partial term
+     *                               appears.
+     */
+    public function addPartialNameToken($iId, $bSearchable, $iPhraseNumber)
+    {
+        if ($bSearchable) {
+            $this->aName[$iId] = $iId;
+        } else {
+            $this->aNameNonSearch[$iId] = $iId;
         }
+        $this->iNamePhrase = $iPhraseNumber;
+    }
 
-        if ((!$this->sPostcode && !$this->aAddress && !$this->aAddressNonSearch)
-            && ((empty($this->aName) && empty($this->aNameNonSearch)) || $this->iNamePhrase == $iPhrase)
-        ) {
-            $oSearch = clone $this;
-            $oSearch->iSearchRank++;
-            if (empty($this->aName) && empty($this->aNameNonSearch)) {
-                $oSearch->iSearchRank++;
-            }
-            if (preg_match('#^[0-9 ]+$#', $sToken)) {
-                $oSearch->iSearchRank++;
-            }
-            if ($oSearchTerm->iSearchNameCount < CONST_Max_Word_Frequency) {
-                if (empty($this->aName)
-                    && CONST_Search_NameOnlySearchFrequencyThreshold
-                ) {
-                    $oSearch->bRareName =
-                        $oSearchTerm->iSearchNameCount
-                          < CONST_Search_NameOnlySearchFrequencyThreshold;
-                } else {
-                    $oSearch->bRareName = false;
-                }
-                $oSearch->aName[$iWordID] = $iWordID;
-            } else {
-                if (!empty($aFullTokens)) {
-                    $oSearch->iSearchRank++;
-                }
-                $oSearch->aNameNonSearch[$iWordID] = $iWordID;
-            }
-            $oSearch->iNamePhrase = $iPhrase;
-            $aNewSearches[] = $oSearch;
-        }
+    /**
+     * Set country restriction for the search.
+     *
+     * @param string sCountryCode  Country code of country to restrict search to.
+     */
+    public function setCountry($sCountryCode)
+    {
+        $this->sCountryCode = $sCountryCode;
+        $this->iNamePhrase = -1;
+    }
+
+    /**
+     * Set postcode search constraint.
+     *
+     * @param string sPostcode  Postcode the result should have.
+     */
+    public function setPostcode($sPostcode)
+    {
+        $this->sPostcode = $sPostcode;
+        $this->iNamePhrase = -1;
+    }
+
+    /**
+     * Make this search a search for a postcode object.
+     *
+     * @param integer iId       Token Id for the postcode.
+     * @param string sPostcode  Postcode to look for.
+     */
+    public function setPostcodeAsName($iId, $sPostcode)
+    {
+        $this->iOperator = Operator::POSTCODE;
+        $this->aAddress = array_merge($this->aAddress, $this->aName);
+        $this->aName = array($iId => $sPostcode);
+        $this->bRareName = true;
+        $this->iNamePhrase = -1;
+    }
+
+    /**
+     * Set house number search cnstraint.
+     *
+     * @param string sNumber  House number the result should have.
+     */
+    public function setHousenumber($sNumber)
+    {
+        $this->sHouseNumber = $sNumber;
+        $this->iNamePhrase = -1;
+    }
+
+    /**
+     * Make this search a search for a house number.
+     *
+     * @param integer iId  Token Id for the house number.
+     */
+    public function setHousenumberAsName($iId)
+    {
+        $this->aAddress = array_merge($this->aAddress, $this->aName);
+        $this->bRareName = false;
+        $this->aName = array($iId => $iId);
+        $this->iNamePhrase = -1;
+    }
+
+    /**
+     * Make this search a POI search.
+     *
+     * In a POI search, objects are not (only) searched by their name
+     * but also by the primary OSM key/value pair (class and type in Nominatim).
+     *
+     * @param integer $iOperator Type of POI search
+     * @param string  $sClass    Class (or OSM tag key) of POI.
+     * @param string  $sType     Type (or OSM tag value) of POI.
+     *
+     * @return void
+     */
+    public function setPoiSearch($iOperator, $sClass, $sType)
+    {
+        $this->iOperator = $iOperator;
+        $this->sClass = $sClass;
+        $this->sType = $sType;
+        $this->iNamePhrase = -1;
+    }
+
+    public function getNamePhrase()
+    {
+        return $this->iNamePhrase;
+    }
 
-        return $aNewSearches;
+    /**
+     * Get the global search context.
+     *
+     * @return object  Objects of global search constraints.
+     */
+    public function getContext()
+    {
+        return $this->oContext;
     }
 
     /////////// Query functions
@@ -415,7 +367,6 @@ class SearchDescription
     public function query(&$oDB, $iMinRank, $iMaxRank, $iLimit)
     {
         $aResults = array();
-        $iHousenumber = -1;
 
         if ($this->sCountryCode
             && empty($this->aName)
diff --git a/lib-php/SearchPosition.php b/lib-php/SearchPosition.php
new file mode 100644 (file)
index 0000000..e4260bf
--- /dev/null
@@ -0,0 +1,87 @@
+<?php
+
+namespace Nominatim;
+
+/**
+ * Description of the position of a token within a query.
+ */
+class SearchPosition
+{
+    private $sPhraseType;
+
+    private $iPhrase;
+    private $iNumPhrases;
+
+    private $iToken;
+    private $iNumTokens;
+
+
+    public function __construct($sPhraseType, $iPhrase, $iNumPhrases)
+    {
+        $this->sPhraseType = $sPhraseType;
+        $this->iPhrase = $iPhrase;
+        $this->iNumPhrases = $iNumPhrases;
+    }
+
+    public function setTokenPosition($iToken, $iNumTokens)
+    {
+        $this->iToken = $iToken;
+        $this->iNumTokens = $iNumTokens;
+    }
+
+    /**
+     * Check if the phrase can be of the given type.
+     *
+     * @param string  $sType  Type of phrse requested.
+     *
+     * @return True if the phrase is untyped or of the given type.
+     */
+    public function maybePhrase($sType)
+    {
+        return $this->sPhraseType == '' || $this->sPhraseType == $sType;
+    }
+
+    /**
+     * Check if the phrase is exactly of the given type.
+     *
+     * @param string  $sType  Type of phrse requested.
+     *
+     * @return True if the phrase of the given type.
+     */
+    public function isPhrase($sType)
+    {
+        return $this->sPhraseType == $sType;
+    }
+
+    /**
+     * Return true if the token is the very first in the query.
+     */
+    public function isFirstToken()
+    {
+        return $this->iPhrase == 0 && $this->iToken == 0;
+    }
+
+    /**
+     * Check if the token is the final one in the query.
+     */
+    public function isLastToken()
+    {
+        return $this->iToken + 1 == $this->iNumTokens && $this->iPhrase + 1 == $this->iNumPhrases;
+    }
+
+    /**
+     * Check if the current token is part of the first phrase in the query.
+     */
+    public function isFirstPhrase()
+    {
+        return $this->iPhrase == 0;
+    }
+
+    /**
+     * Get the phrase position in the query.
+     */
+    public function getPhrase()
+    {
+        return $this->iPhrase;
+    }
+}
index b43db135c95ada03aa61b4a677cc0d560fb080e1..4bec20e9c35af04d6e6bfe17615e4eaf2a786cea 100644 (file)
@@ -33,7 +33,9 @@ class Shell
     public function addEnvPair($sKey, $sVal)
     {
         if (isset($sKey) && $sKey && isset($sVal)) {
-            if (!isset($this->aEnv)) $this->aEnv = $_ENV;
+            if (!isset($this->aEnv)) {
+                $this->aEnv = $_ENV;
+            }
             $this->aEnv = array_merge($this->aEnv, array($sKey => $sVal), $_ENV);
         }
         return $this;
@@ -75,11 +77,8 @@ class Shell
         return $iStat;
     }
 
-
-
     private function escapeParam($sParam)
     {
-        if (preg_match('/^-*\w+$/', $sParam)) return $sParam;
-        return escapeshellarg($sParam);
+        return (preg_match('/^-*\w+$/', $sParam)) ? $sParam : escapeshellarg($sParam);
     }
 }
index 518c0a31e3df225c37a19073946dd6c0dfd11035..c9b7b6af1a93b1e778f9721397dec8760809c005 100644 (file)
@@ -8,9 +8,9 @@ namespace Nominatim\Token;
 class Country
 {
     /// Database word id, if available.
-    public $iId;
+    private $iId;
     /// Two-letter country code (lower-cased).
-    public $sCountryCode;
+    private $sCountryCode;
 
     public function __construct($iId, $sCountryCode)
     {
@@ -18,6 +18,44 @@ class Country
         $this->sCountryCode = $sCountryCode;
     }
 
+    public function getId()
+    {
+        return $this->iId;
+    }
+
+    /**
+     * Check if the token can be added to the given search.
+     * Derive new searches by adding this token to an existing search.
+     *
+     * @param object  $oSearch      Partial search description derived so far.
+     * @param object  $oPosition    Description of the token position within
+                                    the query.
+     *
+     * @return True if the token is compatible with the search configuration
+     *         given the position.
+     */
+    public function isExtendable($oSearch, $oPosition)
+    {
+        return !$oSearch->hasCountry() && $oPosition->maybePhrase('country');
+    }
+
+    /**
+     * Derive new searches by adding this token to an existing search.
+     *
+     * @param object  $oSearch      Partial search description derived so far.
+     * @param object  $oPosition    Description of the token position within
+                                    the query.
+     *
+     * @return SearchDescription[] List of derived search descriptions.
+     */
+    public function extendSearch($oSearch, $oPosition)
+    {
+        $oNewSearch = $oSearch->clone($oPosition->isLastToken() ? 1 : 6);
+        $oNewSearch->setCountry($this->sCountryCode);
+
+        return array($oNewSearch);
+    }
+
     public function debugInfo()
     {
         return array(
@@ -26,4 +64,9 @@ class Country
                 'Info' => $this->sCountryCode
                );
     }
+
+    public function debugCode()
+    {
+        return 'C';
+    }
 }
index 5c7c6e9b633a4af458acbc637249ffad2453f7ac..cd60d3ca5620b7851a36736971adf753c6db49f9 100644 (file)
@@ -8,9 +8,9 @@ namespace Nominatim\Token;
 class HouseNumber
 {
     /// Database word id, if available.
-    public $iId;
+    private $iId;
     /// Normalized house number.
-    public $sToken;
+    private $sToken;
 
     public function __construct($iId, $sToken)
     {
@@ -18,6 +18,80 @@ class HouseNumber
         $this->sToken = $sToken;
     }
 
+    public function getId()
+    {
+        return $this->iId;
+    }
+
+    /**
+     * Check if the token can be added to the given search.
+     * Derive new searches by adding this token to an existing search.
+     *
+     * @param object  $oSearch      Partial search description derived so far.
+     * @param object  $oPosition    Description of the token position within
+                                    the query.
+     *
+     * @return True if the token is compatible with the search configuration
+     *         given the position.
+     */
+    public function isExtendable($oSearch, $oPosition)
+    {
+        return !$oSearch->hasHousenumber()
+               && !$oSearch->hasOperator(\Nominatim\Operator::POSTCODE)
+               && $oPosition->maybePhrase('street');
+    }
+
+    /**
+     * Derive new searches by adding this token to an existing search.
+     *
+     * @param object  $oSearch      Partial search description derived so far.
+     * @param object  $oPosition    Description of the token position within
+                                    the query.
+     *
+     * @return SearchDescription[] List of derived search descriptions.
+     */
+    public function extendSearch($oSearch, $oPosition)
+    {
+        $aNewSearches = array();
+
+        // sanity check: if the housenumber is not mainly made
+        // up of numbers, add a penalty
+        $iSearchCost = 1;
+        if (preg_match('/\\d/', $this->sToken) === 0
+            || preg_match_all('/[^0-9]/', $this->sToken, $aMatches) > 2) {
+            $iSearchCost++;
+        }
+        if (!$oSearch->hasOperator(\Nominatim\Operator::NONE)) {
+            $iSearchCost++;
+        }
+        if (empty($this->iId)) {
+            $iSearchCost++;
+        }
+        // also must not appear in the middle of the address
+        if ($oSearch->hasAddress() || $oSearch->hasPostcode()) {
+            $iSearchCost++;
+        }
+
+        $oNewSearch = $oSearch->clone($iSearchCost);
+        $oNewSearch->setHousenumber($this->sToken);
+        $aNewSearches[] = $oNewSearch;
+
+        // Housenumbers may appear in the name when the place has its own
+        // address terms.
+        if ($this->iId !== null
+            && ($oSearch->getNamePhrase() >= 0 || !$oSearch->hasName())
+            && !$oSearch->hasAddress()
+        ) {
+            $oNewSearch = $oSearch->clone($iSearchCost);
+            $oNewSearch->setHousenumberAsName($this->iId);
+
+            $aNewSearches[] = $oNewSearch;
+        }
+
+        return $aNewSearches;
+    }
+
+
     public function debugInfo()
     {
         return array(
@@ -26,4 +100,9 @@ class HouseNumber
                 'Info' => array('nr' => $this->sToken)
                );
     }
+
+    public function debugCode()
+    {
+        return 'H';
+    }
 }
index 2df9fe0586710f120c821b09f809f286cd616f44..a599648c21acdb48191c684f4f94c41950e2ae8c 100644 (file)
@@ -7,6 +7,7 @@ require_once(CONST_LibDir.'/TokenHousenumber.php');
 require_once(CONST_LibDir.'/TokenPostcode.php');
 require_once(CONST_LibDir.'/TokenSpecialTerm.php');
 require_once(CONST_LibDir.'/TokenWord.php');
+require_once(CONST_LibDir.'/TokenPartial.php');
 require_once(CONST_LibDir.'/SpecialSearchOperator.php');
 
 /**
@@ -17,15 +18,6 @@ require_once(CONST_LibDir.'/SpecialSearchOperator.php');
  * tokens do not have a common base class. All tokens need to have a field
  * with the word id that points to an entry in the `word` database table
  * but otherwise the information saved about a token can be very different.
- *
- * There are two different kinds of token words: full words and partial terms.
- *
- * Full words start with a space. They represent a complete name of a place.
- * All special tokens are normally full words.
- *
- * Partial terms have no space at the beginning. They may represent a part of
- * a name of a place (e.g. in the name 'World Trade Center' a partial term
- * would be 'Trade' or 'Trade Center'). They are only used in TokenWord.
  */
 class TokenList
 {
@@ -64,7 +56,7 @@ class TokenList
      */
     public function containsAny($sWord)
     {
-        return isset($this->aTokens[$sWord]) || isset($this->aTokens[' '.$sWord]);
+        return isset($this->aTokens[$sWord]);
     }
 
     /**
@@ -86,8 +78,8 @@ class TokenList
 
         foreach ($this->aTokens as $aTokenList) {
             foreach ($aTokenList as $oToken) {
-                if (is_a($oToken, '\Nominatim\Token\Word') && !$oToken->bPartial) {
-                    $ids[$oToken->iId] = $oToken->iId;
+                if (is_a($oToken, '\Nominatim\Token\Word')) {
+                    $ids[$oToken->getId()] = $oToken->getId();
                 }
             }
         }
@@ -117,9 +109,9 @@ class TokenList
         $aWordsIDs = array();
         foreach ($this->aTokens as $sToken => $aWords) {
             foreach ($aWords as $aToken) {
-                if ($aToken->iId !== null) {
-                    $aWordsIDs[$aToken->iId] =
-                        '#'.$sToken.'('.$aToken->iId.')#';
+                $iId = $aToken->getId();
+                if ($iId !== null) {
+                    $aWordsIDs[$iId] = '#'.$sToken.'('.$aToken->debugCode().' '.$iId.')#';
                 }
             }
         }
diff --git a/lib-php/TokenPartial.php b/lib-php/TokenPartial.php
new file mode 100644 (file)
index 0000000..131bb2a
--- /dev/null
@@ -0,0 +1,118 @@
+<?php
+
+namespace Nominatim\Token;
+
+/**
+ * A standard word token.
+ */
+class Partial
+{
+    /// Database word id, if applicable.
+    private $iId;
+    /// Number of appearances in the database.
+    private $iSearchNameCount;
+    /// True, if the token consists exclusively of digits and spaces.
+    private $bNumberToken;
+
+    public function __construct($iId, $sToken, $iSearchNameCount)
+    {
+        $this->iId = $iId;
+        $this->bNumberToken = (bool) preg_match('#^[0-9 ]+$#', $sToken);
+        $this->iSearchNameCount = $iSearchNameCount;
+    }
+
+    public function getId()
+    {
+        return $this->iId;
+    }
+
+    /**
+     * Check if the token can be added to the given search.
+     * Derive new searches by adding this token to an existing search.
+     *
+     * @param object  $oSearch      Partial search description derived so far.
+     * @param object  $oPosition    Description of the token position within
+                                    the query.
+     *
+     * @return True if the token is compatible with the search configuration
+     *         given the position.
+     */
+    public function isExtendable($oSearch, $oPosition)
+    {
+        return !$oPosition->isPhrase('country');
+    }
+
+    /**
+     * Derive new searches by adding this token to an existing search.
+     *
+     * @param object  $oSearch      Partial search description derived so far.
+     * @param object  $oPosition    Description of the token position within
+                                    the query.
+     *
+     * @return SearchDescription[] List of derived search descriptions.
+     */
+    public function extendSearch($oSearch, $oPosition)
+    {
+        $aNewSearches = array();
+
+        // Partial token in Address.
+        if (($oPosition->isPhrase('') || !$oPosition->isFirstPhrase())
+            && $oSearch->hasName()
+        ) {
+            $iSearchCost = $this->bNumberToken ? 2 : 1;
+            if ($this->iSearchNameCount >= CONST_Max_Word_Frequency) {
+                $iSearchCost += 1;
+            }
+
+            $oNewSearch = $oSearch->clone($iSearchCost);
+            $oNewSearch->addAddressToken(
+                $this->iId,
+                $this->iSearchNameCount < CONST_Max_Word_Frequency
+            );
+
+            $aNewSearches[] = $oNewSearch;
+        }
+
+        // Partial token in Name.
+        if ((!$oSearch->hasPostcode() && !$oSearch->hasAddress())
+            && (!$oSearch->hasName(true)
+                || $oSearch->getNamePhrase() == $oPosition->getPhrase())
+        ) {
+            $iSearchCost = 1;
+            if (!$oSearch->hasName(true)) {
+                $iSearchCost += 1;
+            }
+            if ($this->bNumberToken) {
+                $iSearchCost += 1;
+            }
+
+            $oNewSearch = $oSearch->clone($iSearchCost);
+            $oNewSearch->addPartialNameToken(
+                $this->iId,
+                $this->iSearchNameCount < CONST_Max_Word_Frequency,
+                $oPosition->getPhrase()
+            );
+
+            $aNewSearches[] = $oNewSearch;
+        }
+
+        return $aNewSearches;
+    }
+
+
+    public function debugInfo()
+    {
+        return array(
+                'ID' => $this->iId,
+                'Type' => 'partial',
+                'Info' => array(
+                           'count' => $this->iSearchNameCount
+                          )
+               );
+    }
+
+    public function debugCode()
+    {
+        return 'w';
+    }
+}
index 8fa2ae8021d1bfbed459fb0c546d379271f1188c..c0b42fad5ae3fab4b4360f153806e4365f20b357 100644 (file)
@@ -8,11 +8,11 @@ namespace Nominatim\Token;
 class Postcode
 {
     /// Database word id, if available.
-    public $iId;
+    private $iId;
     /// Full nomralized postcode (upper cased).
-    public $sPostcode;
+    private $sPostcode;
     // Optional country code the postcode belongs to (currently unused).
-    public $sCountryCode;
+    private $sCountryCode;
 
     public function __construct($iId, $sPostcode, $sCountryCode = '')
     {
@@ -21,6 +21,67 @@ class Postcode
         $this->sCountryCode = empty($sCountryCode) ? '' : $sCountryCode;
     }
 
+    public function getId()
+    {
+        return $this->iId;
+    }
+
+    /**
+     * Check if the token can be added to the given search.
+     * Derive new searches by adding this token to an existing search.
+     *
+     * @param object  $oSearch      Partial search description derived so far.
+     * @param object  $oPosition    Description of the token position within
+                                    the query.
+     *
+     * @return True if the token is compatible with the search configuration
+     *         given the position.
+     */
+    public function isExtendable($oSearch, $oPosition)
+    {
+        return !$oSearch->hasPostcode() && $oPosition->maybePhrase('postalcode');
+    }
+
+    /**
+     * Derive new searches by adding this token to an existing search.
+     *
+     * @param object  $oSearch      Partial search description derived so far.
+     * @param object  $oPosition    Description of the token position within
+                                    the query.
+     *
+     * @return SearchDescription[] List of derived search descriptions.
+     */
+    public function extendSearch($oSearch, $oPosition)
+    {
+        $aNewSearches = array();
+
+        // If we have structured search or this is the first term,
+        // make the postcode the primary search element.
+        if ($oSearch->hasOperator(\Nominatim\Operator::NONE) && $oPosition->isFirstToken()) {
+            $oNewSearch = $oSearch->clone(1);
+            $oNewSearch->setPostcodeAsName($this->iId, $this->sPostcode);
+
+            $aNewSearches[] = $oNewSearch;
+        }
+
+        // If we have a structured search or this is not the first term,
+        // add the postcode as an addendum.
+        if (!$oSearch->hasOperator(\Nominatim\Operator::POSTCODE)
+            && ($oPosition->isPhrase('postalcode') || $oSearch->hasName())
+        ) {
+            $iPenalty = 1;
+            if (strlen($this->sPostcode) < 4) {
+                $iPenalty += 4 - strlen($this->sPostcode);
+            }
+            $oNewSearch = $oSearch->clone($iPenalty);
+            $oNewSearch->setPostcode($this->sPostcode);
+
+            $aNewSearches[] = $oNewSearch;
+        }
+
+        return $aNewSearches;
+    }
+
     public function debugInfo()
     {
         return array(
@@ -29,4 +90,9 @@ class Postcode
                 'Info' => $this->sPostcode.'('.$this->sCountryCode.')'
                );
     }
+
+    public function debugCode()
+    {
+        return 'P';
+    }
 }
index b2c312ec90e53d8a52b022aeb01ab057059fbd3f..5b2d4c70a64f75de8971da78500bc2b8ad65e331 100644 (file)
@@ -10,13 +10,13 @@ require_once(CONST_LibDir.'/SpecialSearchOperator.php');
 class SpecialTerm
 {
     /// Database word id, if applicable.
-    public $iId;
+    private $iId;
     /// Class (or OSM tag key) of the place to look for.
-    public $sClass;
+    private $sClass;
     /// Type (or OSM tag value) of the place to look for.
-    public $sType;
+    private $sType;
     /// Relationship of the operator to the object (see Operator class).
-    public $iOperator;
+    private $iOperator;
 
     public function __construct($iID, $sClass, $sType, $iOperator)
     {
@@ -26,6 +26,62 @@ class SpecialTerm
         $this->iOperator = $iOperator;
     }
 
+    public function getId()
+    {
+        return $this->iId;
+    }
+
+    /**
+     * Check if the token can be added to the given search.
+     * Derive new searches by adding this token to an existing search.
+     *
+     * @param object  $oSearch      Partial search description derived so far.
+     * @param object  $oPosition    Description of the token position within
+                                    the query.
+     *
+     * @return True if the token is compatible with the search configuration
+     *         given the position.
+     */
+    public function isExtendable($oSearch, $oPosition)
+    {
+        return !$oSearch->hasOperator() && $oPosition->isPhrase('');
+    }
+
+    /**
+     * Derive new searches by adding this token to an existing search.
+     *
+     * @param object  $oSearch      Partial search description derived so far.
+     * @param object  $oPosition    Description of the token position within
+                                    the query.
+     *
+     * @return SearchDescription[] List of derived search descriptions.
+     */
+    public function extendSearch($oSearch, $oPosition)
+    {
+        $iSearchCost = 2;
+
+        $iOp = $this->iOperator;
+        if ($iOp == \Nominatim\Operator::NONE) {
+            if ($oSearch->hasName() || $oSearch->getContext()->isBoundedSearch()) {
+                $iOp = \Nominatim\Operator::NAME;
+            } else {
+                $iOp = \Nominatim\Operator::NEAR;
+            }
+            $iSearchCost += 2;
+        } elseif (!$oPosition->isFirstToken() && !$oPosition->isLastToken()) {
+            $iSearchCost += 2;
+        }
+        if ($oSearch->hasHousenumber()) {
+            $iSearchCost ++;
+        }
+
+        $oNewSearch = $oSearch->clone($iSearchCost);
+        $oNewSearch->setPoiSearch($iOp, $this->sClass, $this->sType);
+
+        return array($oNewSearch);
+    }
+
+
     public function debugInfo()
     {
         return array(
@@ -38,4 +94,9 @@ class SpecialTerm
                           )
                );
     }
+
+    public function debugCode()
+    {
+        return 'S';
+    }
 }
index fc28535d4582e459f5d88c72b8977efaf1930fa9..59456e35aaef3d309b8d2da23aca21df2d546b9f 100644 (file)
@@ -8,31 +8,95 @@ namespace Nominatim\Token;
 class Word
 {
     /// Database word id, if applicable.
-    public $iId;
-    /// If true, the word may represent only part of a place name.
-    public $bPartial;
+    private $iId;
     /// Number of appearances in the database.
-    public $iSearchNameCount;
+    private $iSearchNameCount;
     /// Number of terms in the word.
-    public $iTermCount;
+    private $iTermCount;
 
-    public function __construct($iId, $bPartial, $iSearchNameCount, $iTermCount)
+    public function __construct($iId, $iSearchNameCount, $iTermCount)
     {
         $this->iId = $iId;
-        $this->bPartial = $bPartial;
         $this->iSearchNameCount = $iSearchNameCount;
         $this->iTermCount = $iTermCount;
     }
 
+    public function getId()
+    {
+        return $this->iId;
+    }
+
+    /**
+     * Check if the token can be added to the given search.
+     * Derive new searches by adding this token to an existing search.
+     *
+     * @param object  $oSearch      Partial search description derived so far.
+     * @param object  $oPosition    Description of the token position within
+                                    the query.
+     *
+     * @return True if the token is compatible with the search configuration
+     *         given the position.
+     */
+    public function isExtendable($oSearch, $oPosition)
+    {
+        return !$oPosition->isPhrase('country');
+    }
+
+    /**
+     * Derive new searches by adding this token to an existing search.
+     *
+     * @param object  $oSearch      Partial search description derived so far.
+     * @param object  $oPosition    Description of the token position within
+                                    the query.
+     *
+     * @return SearchDescription[] List of derived search descriptions.
+     */
+    public function extendSearch($oSearch, $oPosition)
+    {
+        // Full words can only be a name if they appear at the beginning
+        // of the phrase. In structured search the name must forcably in
+        // the first phrase. In unstructured search it may be in a later
+        // phrase when the first phrase is a house number.
+        if ($oSearch->hasName()
+            || !($oPosition->isFirstPhrase() || $oPosition->isPhrase(''))
+        ) {
+            if ($this->iTermCount > 1
+                && ($oPosition->isPhrase('') || !$oPosition->isFirstPhrase())
+            ) {
+                $oNewSearch = $oSearch->clone(1);
+                $oNewSearch->addAddressToken($this->iId);
+
+                return array($oNewSearch);
+            }
+        } elseif (!$oSearch->hasName(true)) {
+            $oNewSearch = $oSearch->clone(1);
+            $oNewSearch->addNameToken(
+                $this->iId,
+                CONST_Search_NameOnlySearchFrequencyThreshold
+                && $this->iSearchNameCount
+                          < CONST_Search_NameOnlySearchFrequencyThreshold
+            );
+
+            return array($oNewSearch);
+        }
+
+        return array();
+    }
+
     public function debugInfo()
     {
         return array(
                 'ID' => $this->iId,
                 'Type' => 'word',
                 'Info' => array(
-                           'partial' => $this->bPartial,
-                           'count' => $this->iSearchNameCount
+                           'count' => $this->iSearchNameCount,
+                           'terms' => $this->iTermCount
                           )
                );
     }
+
+    public function debugCode()
+    {
+        return 'W';
+    }
 }
index b038cf2a7752832449df94669f84b77566cd3039..9f205de7a8fe064f1dc9d40be4ee762548bf921f 100644 (file)
@@ -49,7 +49,9 @@
     $oDB->connect();
 
     if (isset($aCMDResult['output-type'])) {
-        if (!isset($aRankmap[$aCMDResult['output-type']])) fail('unknown output-type: '.$aCMDResult['output-type']);
+        if (!isset($aRankmap[$aCMDResult['output-type']])) {
+            fail('unknown output-type: '.$aCMDResult['output-type']);
+        }
         $iOutputRank = $aRankmap[$aCMDResult['output-type']];
     } else {
         $iOutputRank = $aRankmap['street'];
 
     // Preferred language
     $oParams = new Nominatim\ParameterParser();
-    if (!isset($aCMDResult['language'])) $aCMDResult['language'] = 'xx';
+    if (!isset($aCMDResult['language'])) {
+        $aCMDResult['language'] = 'xx';
+    }
     $aLangPrefOrder = $oParams->getPreferredLanguages($aCMDResult['language']);
     $sLanguagePrefArraySQL = $oDB->getArraySQL($oDB->getDBQuotedList($aLangPrefOrder));
 
     // output formatting: build up a lookup table that maps address ranks to columns
     $aColumnMapping = array();
     $iNumCol = 0;
-    if (!isset($aCMDResult['output-format'])) $aCMDResult['output-format'] = 'street;suburb;city;county;state;country';
+    if (!isset($aCMDResult['output-format'])) {
+        $aCMDResult['output-format'] = 'street;suburb;city;county;state;country';
+    }
     foreach (preg_split('/\s*;\s*/', $aCMDResult['output-format']) as $sColumn) {
         $bHasData = false;
         foreach (preg_split('/\s*,\s*/', $sColumn) as $sRank) {
@@ -80,7 +86,9 @@
                 }
             }
         }
-        if ($bHasData) $iNumCol++;
+        if ($bHasData) {
+            $iNumCol++;
+        }
     }
 
     // build the query for objects
     if ($sOsmType) {
         $sSQL = 'select place_id from placex where osm_type = :osm_type and osm_id = :osm_id';
         $sParentId = $oDB->getOne($sSQL, array('osm_type' => $sOsmType, 'osm_id' => $sOsmId));
-        if (!$sParentId) fail('Could not find place '.$sOsmType.' '.$sOsmId);
+        if (!$sParentId) {
+            fail('Could not find place '.$sOsmType.' '.$sOsmId);
+        }
     }
     if ($sParentId) {
         $sPlacexSQL .= ' and place_id in (select place_id from place_addressline where address_place_id = '.$sParentId.' and isaddress)';
     $oResults = $oDB->getQueryStatement($sPlacexSQL);
     $fOutstream = fopen('php://output', 'w');
     while ($aRow = $oResults->fetch()) {
-        //var_dump($aRow);
         $iPlaceID = $aRow['place_id'];
         $sSQL = "select rank_address,get_name_by_language(name,$sLanguagePrefArraySQL) as localname from get_addressdata(:place_id, -1)";
         $sSQL .= ' WHERE isaddress';
index ea58f37c44c433dbb9867827086c9f39af131592..3075070a3f404d64f6408e43111023f5e744eec5 100644 (file)
@@ -40,7 +40,9 @@ $oDB->connect();
 $fPostgresVersion = $oDB->getPostgresVersion();
 
 $aDSNInfo = Nominatim\DB::parseDSN(getSetting('DATABASE_DSN'));
-if (!isset($aDSNInfo['port']) || !$aDSNInfo['port']) $aDSNInfo['port'] = 5432;
+if (!isset($aDSNInfo['port']) || !$aDSNInfo['port']) {
+    $aDSNInfo['port'] = 5432;
+}
 
 // cache memory to be used by osm2pgsql, should not be more than the available memory
 $iCacheMemory = (isset($aResult['osm2pgsql-cache'])?$aResult['osm2pgsql-cache']:2000);
index d6aa3d9b0d5f0978045b037427520a059a936c3b..39a375062fe914ce3a178a467554d2dbc718e092 100644 (file)
@@ -62,11 +62,15 @@ if (!$aResult['search-only']) {
     $oPlaceLookup->setLanguagePreference(array('en'));
 
     echo 'Warm reverse: ';
-    if ($bVerbose) echo "\n";
+    if ($bVerbose) {
+        echo "\n";
+    }
     for ($i = 0; $i < 1000; $i++) {
         $fLat = rand(-9000, 9000) / 100;
         $fLon = rand(-18000, 18000) / 100;
-        if ($bVerbose) echo "$fLat, $fLon = ";
+        if ($bVerbose) {
+            echo "$fLat, $fLon = ";
+        }
 
         $oLookup = $oReverseGeocode->lookup($fLat, $fLon);
         $aSearchResults = $oLookup ? $oPlaceLookup->lookup(array($oLookup->iId => $oLookup)) : null;
@@ -79,10 +83,14 @@ if (!$aResult['reverse-only']) {
     $oGeocode = new Nominatim\Geocode($oDB);
 
     echo 'Warm search: ';
-    if ($bVerbose) echo "\n";
+    if ($bVerbose) {
+        echo "\n";
+    }
     $sSQL = 'SELECT word FROM word WHERE word is not null ORDER BY search_name_count DESC LIMIT 1000';
     foreach ($oDB->getCol($sSQL) as $sWord) {
-        if ($bVerbose) echo "$sWord = ";
+        if ($bVerbose) {
+            echo "$sWord = ";
+        }
 
         $oGeocode->setLanguagePreference(array('en'));
         $oGeocode->setQuery($sWord);
index 9c971e5f28eea2b1c7a5151bf491a332384b65a4..a52e8fcee0c6563c971733dd002139ed92958070 100644 (file)
@@ -9,8 +9,12 @@ function getCmdOpt($aArg, $aSpec, &$aResult, $bExitOnError = false, $bExitOnUnkn
 
     foreach ($aSpec as $aLine) {
         if (is_array($aLine)) {
-            if ($aLine[0]) $aQuick['--'.$aLine[0]] = $aLine;
-            if ($aLine[1]) $aQuick['-'.$aLine[1]] = $aLine;
+            if ($aLine[0]) {
+                $aQuick['--'.$aLine[0]] = $aLine;
+            }
+            if ($aLine[1]) {
+                $aQuick['-'.$aLine[1]] = $aLine;
+            }
             $aCounts[$aLine[0]] = 0;
         }
     }
@@ -28,7 +32,9 @@ function getCmdOpt($aArg, $aSpec, &$aResult, $bExitOnError = false, $bExitOnUnkn
                     $xVal = array();
                     for ($n = $aLine[4]; $i < $iSize && $n; $n--) {
                         $i++;
-                        if ($i >= $iSize || $aArg[$i][0] == '-') showUsage($aSpec, $bExitOnError, 'Parameter of  \''.$aLine[0].'\' is missing');
+                        if ($i >= $iSize || $aArg[$i][0] == '-') {
+                            showUsage($aSpec, $bExitOnError, 'Parameter of  \''.$aLine[0].'\' is missing');
+                        }
 
                         switch ($aLine[6]) {
                             case 'realpath':
@@ -56,7 +62,9 @@ function getCmdOpt($aArg, $aSpec, &$aResult, $bExitOnError = false, $bExitOnUnkn
                                 break;
                         }
                     }
-                    if ($aLine[4] == 1) $xVal = $xVal[0];
+                    if ($aLine[4] == 1) {
+                        $xVal = $xVal[0];
+                    }
                 } else {
                     $xVal = true;
                 }
@@ -65,7 +73,9 @@ function getCmdOpt($aArg, $aSpec, &$aResult, $bExitOnError = false, $bExitOnUnkn
             }
 
             if ($aLine[3] > 1) {
-                if (!array_key_exists($aLine[0], $aResult)) $aResult[$aLine[0]] = array();
+                if (!array_key_exists($aLine[0], $aResult)) {
+                    $aResult[$aLine[0]] = array();
+                }
                 $aResult[$aLine[0]][] = $xVal;
             } else {
                 $aResult[$aLine[0]] = $xVal;
@@ -75,18 +85,23 @@ function getCmdOpt($aArg, $aSpec, &$aResult, $bExitOnError = false, $bExitOnUnkn
         }
     }
 
-    if (array_key_exists('help', $aResult)) showUsage($aSpec);
-    if ($bUnknown && $bExitOnUnknown) showUsage($aSpec, $bExitOnError, 'Unknown option \''.$bUnknown.'\'');
+    if (array_key_exists('help', $aResult)) {
+        showUsage($aSpec);
+    }
+    if ($bUnknown && $bExitOnUnknown) {
+        showUsage($aSpec, $bExitOnError, 'Unknown option \''.$bUnknown.'\'');
+    }
 
     foreach ($aSpec as $aLine) {
         if (is_array($aLine)) {
-            if ($aCounts[$aLine[0]] < $aLine[2]) showUsage($aSpec, $bExitOnError, 'Option \''.$aLine[0].'\' is missing');
-            if ($aCounts[$aLine[0]] > $aLine[3]) showUsage($aSpec, $bExitOnError, 'Option \''.$aLine[0].'\' is pressent too many times');
-            switch ($aLine[6]) {
-                case 'bool':
-                    if (!array_key_exists($aLine[0], $aResult))
-                        $aResult[$aLine[0]] = false;
-                    break;
+            if ($aCounts[$aLine[0]] < $aLine[2]) {
+                showUsage($aSpec, $bExitOnError, 'Option \''.$aLine[0].'\' is missing');
+            }
+            if ($aCounts[$aLine[0]] > $aLine[3]) {
+                showUsage($aSpec, $bExitOnError, 'Option \''.$aLine[0].'\' is pressent too many times');
+            }
+            if ($aLine[6] == 'bool' && !array_key_exists($aLine[0], $aResult)) {
+                $aResult[$aLine[0]] = false;
             }
         }
     }
@@ -109,8 +124,12 @@ function showUsage($aSpec, $bExit = false, $sError = false)
                 echo "\n";
             }
             $aNames = array();
-            if ($aLine[1]) $aNames[] = '-'.$aLine[1];
-            if ($aLine[0]) $aNames[] = '--'.$aLine[0];
+            if ($aLine[1]) {
+                $aNames[] = '-'.$aLine[1];
+            }
+            if ($aLine[0]) {
+                $aNames[] = '--'.$aLine[0];
+            }
             $sName = join(', ', $aNames);
             echo '  '.$sName.str_repeat(' ', 30-strlen($sName)).$aLine[7]."\n";
         } else {
index f2d529800f474bc49853b211731f26de91dc0e6e..d6cc8a245c3b377a9954d7509c5e409c114affe7 100644 (file)
@@ -81,6 +81,10 @@ if (CONST_NoAccessControl) {
         header('Access-Control-Allow-Headers: '.$_SERVER['HTTP_ACCESS_CONTROL_REQUEST_HEADERS']);
     }
 }
-if (isset($_SERVER['REQUEST_METHOD']) && $_SERVER['REQUEST_METHOD'] == 'OPTIONS') exit;
+if (isset($_SERVER['REQUEST_METHOD']) && $_SERVER['REQUEST_METHOD'] == 'OPTIONS') {
+    exit;
+}
 
-if (CONST_Debug) header('Content-type: text/html; charset=utf-8');
+if (CONST_Debug) {
+    header('Content-type: text/html; charset=utf-8');
+}
index 8d82c5b32def09bc7a48bc45e0030898454855f6..d95ad4eef7b274bdb420378f840991f5857752c9 100644 (file)
@@ -6,10 +6,7 @@ function loadSettings($sProjectDir)
     // Temporary hack to set the direcory via environment instead of
     // the installed scripts. Neither setting is part of the official
     // set of settings.
-    defined('CONST_DataDir') or define('CONST_DataDir', $_SERVER['NOMINATIM_DATADIR']);
-    defined('CONST_SqlDir') or define('CONST_SqlDir', $_SERVER['NOMINATIM_SQLDIR']);
     defined('CONST_ConfigDir') or define('CONST_ConfigDir', $_SERVER['NOMINATIM_CONFIGDIR']);
-    defined('CONST_Default_ModulePath') or define('CONST_Default_ModulePath', $_SERVER['NOMINATIM_DATABASE_MODULE_SRC_PATH']);
 }
 
 function getSetting($sConfName, $sDefault = null)
@@ -32,22 +29,14 @@ function getSettingBool($sConfName)
            || strcmp($sVal, '1') == 0;
 }
 
-function getSettingConfig($sConfName, $sSystemConfig)
-{
-    $sValue = $_SERVER['NOMINATIM_'.$sConfName];
-
-    if (!$sValue) {
-        return CONST_ConfigDir.'/'.$sSystemConfig;
-    }
-
-    return $sValue;
-}
-
 function fail($sError, $sUserError = false)
 {
-    if (!$sUserError) $sUserError = $sError;
+    if (!$sUserError) {
+        $sUserError = $sError;
+    }
     error_log('ERROR: '.$sError);
-    var_dump($sUserError)."\n";
+    var_dump($sUserError);
+    echo "\n";
     exit(-1);
 }
 
@@ -95,8 +84,9 @@ function getDatabaseDate(&$oDB)
 
 function byImportance($a, $b)
 {
-    if ($a['importance'] != $b['importance'])
+    if ($a['importance'] != $b['importance']) {
         return ($a['importance'] > $b['importance']?-1:1);
+    }
 
     return $a['foundorder'] <=> $b['foundorder'];
 }
@@ -231,6 +221,8 @@ function closestHouseNumber($aRow)
 if (!function_exists('array_key_last')) {
     function array_key_last(array $array)
     {
-        if (!empty($array)) return key(array_slice($array, -1, 1, true));
+        if (!empty($array)) {
+            return key(array_slice($array, -1, 1, true));
+        }
     }
 }
index 25ed75cb2c7117411d013a5b78931129496cfec3..d7e1493263a4ec9f475b81125156a74540cf28f9 100644 (file)
@@ -5,15 +5,23 @@ function logStart(&$oDB, $sType = '', $sQuery = '', $aLanguageList = array())
 {
     $fStartTime = microtime(true);
     $aStartTime = explode('.', $fStartTime);
-    if (!isset($aStartTime[1])) $aStartTime[1] = '0';
+    if (!isset($aStartTime[1])) {
+        $aStartTime[1] = '0';
+    }
 
     $sOutputFormat = '';
-    if (isset($_GET['format'])) $sOutputFormat = $_GET['format'];
+    if (isset($_GET['format'])) {
+        $sOutputFormat = $_GET['format'];
+    }
 
     if ($sType == 'reverse') {
         $sOutQuery = (isset($_GET['lat'])?$_GET['lat']:'').'/';
-        if (isset($_GET['lon'])) $sOutQuery .= $_GET['lon'];
-        if (isset($_GET['zoom'])) $sOutQuery .= '/'.$_GET['zoom'];
+        if (isset($_GET['lon'])) {
+            $sOutQuery .= $_GET['lon'];
+        }
+        if (isset($_GET['zoom'])) {
+            $sOutQuery .= '/'.$_GET['zoom'];
+        }
     } else {
         $sOutQuery = $sQuery;
     }
@@ -28,13 +36,15 @@ function logStart(&$oDB, $sType = '', $sQuery = '', $aLanguageList = array())
             );
 
     if (CONST_Log_DB) {
-        if (isset($_GET['email']))
+        if (isset($_GET['email'])) {
             $sUserAgent = $_GET['email'];
-        elseif (isset($_SERVER['HTTP_REFERER']))
+        } elseif (isset($_SERVER['HTTP_REFERER'])) {
             $sUserAgent = $_SERVER['HTTP_REFERER'];
-        elseif (isset($_SERVER['HTTP_USER_AGENT']))
+        } elseif (isset($_SERVER['HTTP_USER_AGENT'])) {
             $sUserAgent = $_SERVER['HTTP_USER_AGENT'];
-        else $sUserAgent = '';
+        } else {
+            $sUserAgent = '';
+        }
         $sSQL = 'insert into new_query_log (type,starttime,query,ipaddress,useragent,language,format,searchterm)';
         $sSQL .= ' values (';
         $sSQL .= join(',', $oDB->getDBQuotedList(array(
@@ -60,7 +70,9 @@ function logEnd(&$oDB, $hLog, $iNumResults)
 
     if (CONST_Log_DB) {
         $aEndTime = explode('.', $fEndTime);
-        if (!$aEndTime[1]) $aEndTime[1] = '0';
+        if (!$aEndTime[1]) {
+            $aEndTime[1] = '0';
+        }
         $sEndTime = date('Y-m-d H:i:s', $aEndTime[0]).'.'.$aEndTime[1];
 
         $sSQL = 'update new_query_log set endtime = '.$oDB->getDBQuoted($sEndTime).', results = '.$iNumResults;
index 15c49f0aa0094fa87bf452eed9eb5b9463e78714..ac6e62136a3733f94303a7324a4a1971a49fc19a 100644 (file)
@@ -8,10 +8,12 @@ if (file_exists($phpPhraseSettingsFile) && !file_exists($jsonPhraseSettingsFile)
 
     $data = array();
 
-    if (isset($aTagsBlacklist))
+    if (isset($aTagsBlacklist)) {
         $data['blackList'] = $aTagsBlacklist;
-    if (isset($aTagsWhitelist))
+    }
+    if (isset($aTagsWhitelist)) {
         $data['whiteList'] = $aTagsWhitelist;
+    }
 
     $jsonFile = fopen($jsonPhraseSettingsFile, 'w');
     fwrite($jsonFile, json_encode($data));
index 8de8157623516db94c4ca3c8ca8df303326961a0..ee1db44ccfbe33d71d83d2af0cb8b47a26477efa 100644 (file)
@@ -3,14 +3,26 @@
 
 function formatOSMType($sType, $bIncludeExternal = true)
 {
-    if ($sType == 'N') return 'node';
-    if ($sType == 'W') return 'way';
-    if ($sType == 'R') return 'relation';
+    if ($sType == 'N') {
+        return 'node';
+    }
+    if ($sType == 'W') {
+        return 'way';
+    }
+    if ($sType == 'R') {
+        return 'relation';
+    }
 
-    if (!$bIncludeExternal) return '';
+    if (!$bIncludeExternal) {
+        return '';
+    }
 
-    if ($sType == 'T') return 'way';
-    if ($sType == 'I') return 'way';
+    if ($sType == 'T') {
+        return 'way';
+    }
+    if ($sType == 'I') {
+        return 'way';
+    }
 
     // not handled: P, L
 
index 0066e80e29adb13cce8320360c57eafddf7ba87e..927f3861778b40c56947927d705a82f9f5527210 100644 (file)
@@ -5,9 +5,11 @@
 $aFilteredPlaces = array();
 
 if (empty($aPlace)) {
-    if (isset($sError))
+    if (isset($sError)) {
         $aFilteredPlaces['error'] = $sError;
-    else $aFilteredPlaces['error'] = 'Unable to geocode';
+    } else {
+        $aFilteredPlaces['error'] = 'Unable to geocode';
+    }
     javascript_renderData($aFilteredPlaces);
 } else {
     $aFilteredPlaces = array(
@@ -17,7 +19,9 @@ if (empty($aPlace)) {
                                         )
                        );
 
-    if (isset($aPlace['place_id'])) $aFilteredPlaces['properties']['geocoding']['place_id'] = $aPlace['place_id'];
+    if (isset($aPlace['place_id'])) {
+        $aFilteredPlaces['properties']['geocoding']['place_id'] = $aPlace['place_id'];
+    }
     $sOSMType = formatOSMType($aPlace['osm_type']);
     if ($sOSMType) {
         $aFilteredPlaces['properties']['geocoding']['osm_type'] = $sOSMType;
index 089a86b683273bb9c147af66d6f34bee0ce872dd..0dd96f212727963598f3a7d36e1bf567b194bac2 100644 (file)
@@ -3,9 +3,11 @@
 $aFilteredPlaces = array();
 
 if (empty($aPlace)) {
-    if (isset($sError))
+    if (isset($sError)) {
         $aFilteredPlaces['error'] = $sError;
-    else $aFilteredPlaces['error'] = 'Unable to geocode';
+    } else {
+        $aFilteredPlaces['error'] = 'Unable to geocode';
+    }
     javascript_renderData($aFilteredPlaces);
 } else {
     $aFilteredPlaces = array(
@@ -13,7 +15,9 @@ if (empty($aPlace)) {
                         'properties' => array()
                        );
 
-    if (isset($aPlace['place_id'])) $aFilteredPlaces['properties']['place_id'] = $aPlace['place_id'];
+    if (isset($aPlace['place_id'])) {
+        $aFilteredPlaces['properties']['place_id'] = $aPlace['place_id'];
+    }
     $sOSMType = formatOSMType($aPlace['osm_type']);
     if ($sOSMType) {
         $aFilteredPlaces['properties']['osm_type'] = $sOSMType;
@@ -36,8 +40,12 @@ if (empty($aPlace)) {
     if (isset($aPlace['address'])) {
         $aFilteredPlaces['properties']['address'] = $aPlace['address']->getAddressNames();
     }
-    if (isset($aPlace['sExtraTags'])) $aFilteredPlaces['properties']['extratags'] = $aPlace['sExtraTags'];
-    if (isset($aPlace['sNameDetails'])) $aFilteredPlaces['properties']['namedetails'] = $aPlace['sNameDetails'];
+    if (isset($aPlace['sExtraTags'])) {
+        $aFilteredPlaces['properties']['extratags'] = $aPlace['sExtraTags'];
+    }
+    if (isset($aPlace['sNameDetails'])) {
+        $aFilteredPlaces['properties']['namedetails'] = $aPlace['sNameDetails'];
+    }
 
     if (isset($aPlace['aBoundingBox'])) {
         $aFilteredPlaces['bbox'] = array(
index 691d6a749f8e32f09932b3c75be72b99f2172a07..513d312e36435fe9d95ed46c5b7ab36af12db247 100644 (file)
@@ -3,19 +3,27 @@
 $aFilteredPlaces = array();
 
 if (empty($aPlace)) {
-    if (isset($sError))
+    if (isset($sError)) {
         $aFilteredPlaces['error'] = $sError;
-    else $aFilteredPlaces['error'] = 'Unable to geocode';
+    } else {
+        $aFilteredPlaces['error'] = 'Unable to geocode';
+    }
 } else {
-    if (isset($aPlace['place_id'])) $aFilteredPlaces['place_id'] = $aPlace['place_id'];
+    if (isset($aPlace['place_id'])) {
+        $aFilteredPlaces['place_id'] = $aPlace['place_id'];
+    }
     $aFilteredPlaces['licence'] = 'Data Â© OpenStreetMap contributors, ODbL 1.0. https://osm.org/copyright';
     $sOSMType = formatOSMType($aPlace['osm_type']);
     if ($sOSMType) {
         $aFilteredPlaces['osm_type'] = $sOSMType;
         $aFilteredPlaces['osm_id'] = $aPlace['osm_id'];
     }
-    if (isset($aPlace['lat'])) $aFilteredPlaces['lat'] = $aPlace['lat'];
-    if (isset($aPlace['lon'])) $aFilteredPlaces['lon'] = $aPlace['lon'];
+    if (isset($aPlace['lat'])) {
+        $aFilteredPlaces['lat'] = $aPlace['lat'];
+    }
+    if (isset($aPlace['lon'])) {
+        $aFilteredPlaces['lon'] = $aPlace['lon'];
+    }
 
     if ($sOutputFormat == 'jsonv2' || $sOutputFormat == 'geojson') {
         $aFilteredPlaces['place_rank'] = $aPlace['rank_search'];
@@ -35,8 +43,12 @@ if (empty($aPlace)) {
     if (isset($aPlace['address'])) {
         $aFilteredPlaces['address'] = $aPlace['address']->getAddressNames();
     }
-    if (isset($aPlace['sExtraTags'])) $aFilteredPlaces['extratags'] = $aPlace['sExtraTags'];
-    if (isset($aPlace['sNameDetails'])) $aFilteredPlaces['namedetails'] = $aPlace['sNameDetails'];
+    if (isset($aPlace['sExtraTags'])) {
+        $aFilteredPlaces['extratags'] = $aPlace['sExtraTags'];
+    }
+    if (isset($aPlace['sNameDetails'])) {
+        $aFilteredPlaces['namedetails'] = $aPlace['sNameDetails'];
+    }
 
     if (isset($aPlace['aBoundingBox'])) {
         $aFilteredPlaces['boundingbox'] = $aPlace['aBoundingBox'];
index ab0bc72cbab623cbe62ff13d217ba6325b0deaf3..cf045ab9b825c5a5bff35eaeb2ce597e46ca4b4e 100644 (file)
@@ -12,17 +12,29 @@ echo " querystring='".htmlspecialchars($_SERVER['QUERY_STRING'], ENT_QUOTES)."'"
 echo ">\n";
 
 if (empty($aPlace)) {
-    if (isset($sError))
+    if (isset($sError)) {
         echo "<error>$sError</error>";
-    else echo '<error>Unable to geocode</error>';
+    } else {
+        echo '<error>Unable to geocode</error>';
+    }
 } else {
     echo '<result';
-    if ($aPlace['place_id']) echo ' place_id="'.$aPlace['place_id'].'"';
+    if ($aPlace['place_id']) {
+        echo ' place_id="'.$aPlace['place_id'].'"';
+    }
     $sOSMType = formatOSMType($aPlace['osm_type']);
-    if ($sOSMType) echo ' osm_type="'.$sOSMType.'"'.' osm_id="'.$aPlace['osm_id'].'"';
-    if ($aPlace['ref']) echo ' ref="'.htmlspecialchars($aPlace['ref']).'"';
-    if (isset($aPlace['lat'])) echo ' lat="'.htmlspecialchars($aPlace['lat']).'"';
-    if (isset($aPlace['lon'])) echo ' lon="'.htmlspecialchars($aPlace['lon']).'"';
+    if ($sOSMType) {
+        echo ' osm_type="'.$sOSMType.'"'.' osm_id="'.$aPlace['osm_id'].'"';
+    }
+    if ($aPlace['ref']) {
+        echo ' ref="'.htmlspecialchars($aPlace['ref']).'"';
+    }
+    if (isset($aPlace['lat'])) {
+        echo ' lat="'.htmlspecialchars($aPlace['lat']).'"';
+    }
+    if (isset($aPlace['lon'])) {
+        echo ' lon="'.htmlspecialchars($aPlace['lon']).'"';
+    }
     if (isset($aPlace['aBoundingBox'])) {
         echo ' boundingbox="';
         echo join(',', $aPlace['aBoundingBox']);
index a813b9a6524efc4e912c71ecc07a7f2884c925f5..4edddca30f7f5c3897c3fbb17eaf1b14cecb08bd 100644 (file)
@@ -43,29 +43,26 @@ $aPlaceDetails['centroid'] = array(
 $aPlaceDetails['geometry'] = json_decode($aPointDetails['asgeojson']);
 
 $funcMapAddressLine = function ($aFull) {
-    $aMapped = array(
-                'localname' => $aFull['localname'],
-                'place_id' => isset($aFull['place_id']) ? (int) $aFull['place_id'] : null,
-                'osm_id' => isset($aFull['osm_id']) ? (int) $aFull['osm_id'] : null,
-                'osm_type' => isset($aFull['osm_type']) ? $aFull['osm_type'] : null,
-                'place_type' => isset($aFull['place_type']) ? $aFull['place_type'] : null,
-                'class' => $aFull['class'],
-                'type' => $aFull['type'],
-                'admin_level' => isset($aFull['admin_level']) ? (int) $aFull['admin_level'] : null,
-                'rank_address' => $aFull['rank_address'] ? (int) $aFull['rank_address'] : null,
-                'distance' => (float) $aFull['distance'],
-                'isaddress' => isset($aFull['isaddress']) ? (bool) $aFull['isaddress'] : null
-               );
-
-    return $aMapped;
+    return array(
+            'localname' => $aFull['localname'],
+            'place_id' => isset($aFull['place_id']) ? (int) $aFull['place_id'] : null,
+            'osm_id' => isset($aFull['osm_id']) ? (int) $aFull['osm_id'] : null,
+            'osm_type' => isset($aFull['osm_type']) ? $aFull['osm_type'] : null,
+            'place_type' => isset($aFull['place_type']) ? $aFull['place_type'] : null,
+            'class' => $aFull['class'],
+            'type' => $aFull['type'],
+            'admin_level' => isset($aFull['admin_level']) ? (int) $aFull['admin_level'] : null,
+            'rank_address' => $aFull['rank_address'] ? (int) $aFull['rank_address'] : null,
+            'distance' => (float) $aFull['distance'],
+            'isaddress' => isset($aFull['isaddress']) ? (bool) $aFull['isaddress'] : null
+           );
 };
 
 $funcMapKeyword = function ($aFull) {
-    $aMapped = array(
-                'id' => (int) $aFull['word_id'],
-                'token' => $aFull['word_token']
-               );
-    return $aMapped;
+    return array(
+            'id' => (int) $aFull['word_id'],
+            'token' => $aFull['word_token']
+           );
 };
 
 if ($aAddressLines) {
@@ -96,11 +93,15 @@ if ($bIncludeHierarchy) {
     if ($bGroupHierarchy) {
         $aPlaceDetails['hierarchy'] = array();
         foreach ($aHierarchyLines as $aAddressLine) {
-            if ($aAddressLine['type'] == 'yes') $sType = $aAddressLine['class'];
-            else $sType = $aAddressLine['type'];
+            if ($aAddressLine['type'] == 'yes') {
+                $sType = $aAddressLine['class'];
+            } else {
+                $sType = $aAddressLine['type'];
+            }
 
-            if (!isset($aPlaceDetails['hierarchy'][$sType]))
+            if (!isset($aPlaceDetails['hierarchy'][$sType])) {
                 $aPlaceDetails['hierarchy'][$sType] = array();
+            }
             $aPlaceDetails['hierarchy'][$sType][] = $funcMapAddressLine($aAddressLine);
         }
     } else {
index 67297dd1dcf3a6f992a50a099d7f89b25a06a906..73717ff35ac4a8e94c5616f2f7ee80e0e89409a0 100644 (file)
@@ -8,4 +8,4 @@
         $error['details'] = $exception->getFile() . '('. $exception->getLine() . ')';
     }
 
-    echo javascript_renderData(array('error' => $error));
+    javascript_renderData(array('error' => $error));
index c4bc29cb1df4c0d8160003a698c46340cc2020a2..cc017564718e3efafd9c0d212fc6f340f2dc7052 100644 (file)
@@ -5,7 +5,9 @@ $aOutput['licence'] = 'Data Â© OpenStreetMap contributors, ODbL 1.0. https://osm
 $aOutput['batch'] = array();
 
 foreach ($aBatchResults as $aSearchResults) {
-    if (!$aSearchResults) $aSearchResults = array();
+    if (!$aSearchResults) {
+        $aSearchResults = array();
+    }
     $aFilteredPlaces = array();
     foreach ($aSearchResults as $iResNum => $aPointDetails) {
         $aPlace = array(
index 3e3a31c443c7948b58e6f8c5500fc366b7f34ad0..336731a2de6b1ff836ae44222a428c1980aea7c5 100644 (file)
@@ -9,7 +9,9 @@ foreach ($aSearchResults as $iResNum => $aPointDetails) {
                                )
               );
 
-    if (isset($aPointDetails['place_id'])) $aPlace['properties']['geocoding']['place_id'] = $aPointDetails['place_id'];
+    if (isset($aPointDetails['place_id'])) {
+        $aPlace['properties']['geocoding']['place_id'] = $aPointDetails['place_id'];
+    }
     $sOSMType = formatOSMType($aPointDetails['osm_type']);
     if ($sOSMType) {
         $aPlace['properties']['geocoding']['osm_type'] = $sOSMType;
index 0847fba411b6713d2ea33fae56a42a2a7a8f4f82..c0ca625cfe3a5b4afb79874c437eb55ee33a9cc5 100644 (file)
@@ -8,7 +8,7 @@ foreach ($aSearchResults as $iResNum => $aPointDetails) {
                                 'place_id'=>$aPointDetails['place_id'],
                                )
               );
-    
+
     $sOSMType = formatOSMType($aPointDetails['osm_type']);
     if ($sOSMType) {
         $aPlace['properties']['osm_type'] = $sOSMType;
@@ -58,8 +58,12 @@ foreach ($aSearchResults as $iResNum => $aPointDetails) {
     }
 
 
-    if (isset($aPointDetails['sExtraTags'])) $aPlace['properties']['extratags'] = $aPointDetails['sExtraTags'];
-    if (isset($aPointDetails['sNameDetails'])) $aPlace['properties']['namedetails'] = $aPointDetails['sNameDetails'];
+    if (isset($aPointDetails['sExtraTags'])) {
+        $aPlace['properties']['extratags'] = $aPointDetails['sExtraTags'];
+    }
+    if (isset($aPointDetails['sNameDetails'])) {
+        $aPlace['properties']['namedetails'] = $aPointDetails['sNameDetails'];
+    }
 
     $aFilteredPlaces[] = $aPlace;
 }
index 4b896d08a6de136207a8e878ff8c2ac73f7e711d..6f729c2982119bb8e468b2377f025cd720a890d5 100644 (file)
@@ -6,7 +6,7 @@ foreach ($aSearchResults as $iResNum => $aPointDetails) {
                'place_id'=>$aPointDetails['place_id'],
                'licence'=>'Data Â© OpenStreetMap contributors, ODbL 1.0. https://osm.org/copyright',
               );
-    
+
     $sOSMType = formatOSMType($aPointDetails['osm_type']);
     if ($sOSMType) {
         $aPlace['osm_type'] = $sOSMType;
@@ -60,8 +60,12 @@ foreach ($aSearchResults as $iResNum => $aPointDetails) {
         $aPlace['geokml'] = $aPointDetails['askml'];
     }
 
-    if (isset($aPointDetails['sExtraTags'])) $aPlace['extratags'] = $aPointDetails['sExtraTags'];
-    if (isset($aPointDetails['sNameDetails'])) $aPlace['namedetails'] = $aPointDetails['sNameDetails'];
+    if (isset($aPointDetails['sExtraTags'])) {
+        $aPlace['extratags'] = $aPointDetails['sExtraTags'];
+    }
+    if (isset($aPointDetails['sNameDetails'])) {
+        $aPlace['namedetails'] = $aPointDetails['sNameDetails'];
+    }
 
     $aFilteredPlaces[] = $aPlace;
 }
index 5ac8e9dd37425d440ab6362dff35ab07a2e98d62..59b352ca2ea0bbd5f5ef75bee014504cc7e6a824 100644 (file)
@@ -10,7 +10,9 @@ echo (isset($sXmlRootTag)?$sXmlRootTag:'searchresults');
 echo " timestamp='".date(DATE_RFC822)."'";
 echo " attribution='Data Â© OpenStreetMap contributors, ODbL 1.0. http://www.openstreetmap.org/copyright'";
 echo " querystring='".htmlspecialchars($sQuery, ENT_QUOTES)."'";
-if (isset($aMoreParams['viewbox'])) echo " viewbox='".htmlspecialchars($aMoreParams['viewbox'], ENT_QUOTES)."'";
+if (isset($aMoreParams['viewbox'])) {
+    echo " viewbox='".htmlspecialchars($aMoreParams['viewbox'], ENT_QUOTES)."'";
+}
 if (isset($aMoreParams['exclude_place_ids'])) {
     echo " exclude_place_ids='".htmlspecialchars($aMoreParams['exclude_place_ids'])."'";
 }
index 92dd727283019ea3454b20ee7232f0234f583b0c..2c0884c8170b46df51f64d90e67def88ac2d3b55 100644 (file)
@@ -120,14 +120,14 @@ class Tokenizer
 
             // Try more interpretations for Tokens that could not be matched.
             foreach ($aTokens as $sToken) {
-                if ($sToken[0] == ' ' && !$oValidTokens->contains($sToken)) {
-                    if (preg_match('/^ ([0-9]{5}) [0-9]{4}$/', $sToken, $aData)) {
+                if ($sToken[0] != ' ' && !$oValidTokens->contains($sToken)) {
+                    if (preg_match('/^([0-9]{5}) [0-9]{4}$/', $sToken, $aData)) {
                         // US ZIP+4 codes - merge in the 5-digit ZIP code
                         $oValidTokens->addToken(
                             $sToken,
                             new Token\Postcode(null, $aData[1], 'us')
                         );
-                    } elseif (preg_match('/^ [0-9]+$/', $sToken)) {
+                    } elseif (preg_match('/^[0-9]+$/', $sToken)) {
                         // Unknown single word token with a number.
                         // Assume it is a house number.
                         $oValidTokens->addToken(
@@ -195,17 +195,28 @@ class Tokenizer
                 ) {
                     $oToken = new Token\Country($iId, $aWord['country_code']);
                 }
+            } elseif ($aWord['word_token'][0] == ' ') {
+                 $oToken = new Token\Word(
+                     $iId,
+                     $aWord['word_token'][0] != ' ',
+                     (int) $aWord['count'],
+                     substr_count($aWord['word_token'], ' ')
+                 );
             } else {
-                $oToken = new Token\Word(
+                $oToken = new Token\Partial(
                     $iId,
-                    $aWord['word_token'][0] != ' ',
-                    (int) $aWord['count'],
-                    substr_count($aWord['word_token'], ' ')
+                    $aWord['word_token'],
+                    (int) $aWord['count']
                 );
             }
 
             if ($oToken) {
-                $oValidTokens->addToken($aWord['word_token'], $oToken);
+                // remove any leading spaces
+                if ($aWord['word_token'][0] == ' ') {
+                    $oValidTokens->addToken(substr($aWord['word_token'], 1), $oToken);
+                } else {
+                    $oValidTokens->addToken($aWord['word_token'], $oToken);
+                }
             }
         }
     }
index 0fb37fd09ba7a34ffec578e00529121fd754c7c8..064b41667a9322bb6cb164dd6f7bb041490d1257 100644 (file)
@@ -105,7 +105,7 @@ class Tokenizer
         // now compute all possible tokens
         $aWordLists = array();
         $aTokens = array();
-        foreach ($aNormPhrases as $sTitle => $sPhrase) {
+        foreach ($aNormPhrases as $sPhrase) {
             if (strlen($sPhrase) > 0) {
                 $aWords = explode(' ', $sPhrase);
                 Tokenizer::addTokens($aTokens, $aWords);
@@ -137,14 +137,14 @@ class Tokenizer
 
             // Try more interpretations for Tokens that could not be matched.
             foreach ($aTokens as $sToken) {
-                if ($sToken[0] == ' ' && !$oValidTokens->contains($sToken)) {
-                    if (preg_match('/^ ([0-9]{5}) [0-9]{4}$/', $sToken, $aData)) {
+                if ($sToken[0] != ' ' && !$oValidTokens->contains($sToken)) {
+                    if (preg_match('/^([0-9]{5}) [0-9]{4}$/', $sToken, $aData)) {
                         // US ZIP+4 codes - merge in the 5-digit ZIP code
                         $oValidTokens->addToken(
                             $sToken,
                             new Token\Postcode(null, $aData[1], 'us')
                         );
-                    } elseif (preg_match('/^ [0-9]+$/', $sToken)) {
+                    } elseif (preg_match('/^[0-9]+$/', $sToken)) {
                         // Unknown single word token with a number.
                         // Assume it is a house number.
                         $oValidTokens->addToken(
@@ -212,17 +212,29 @@ class Tokenizer
                 ) {
                     $oToken = new Token\Country($iId, $aWord['country_code']);
                 }
-            } else {
+            } elseif ($aWord['word_token'][0] == ' ') {
                 $oToken = new Token\Word(
                     $iId,
-                    $aWord['word_token'][0] != ' ',
                     (int) $aWord['count'],
                     substr_count($aWord['word_token'], ' ')
                 );
+            // For backward compatibility: ignore all partial tokens with more
+            // than one word.
+            } elseif (strpos($aWord['word_token'], ' ') === false) {
+                $oToken = new Token\Partial(
+                    $iId,
+                    $aWord['word_token'],
+                    (int) $aWord['count']
+                );
             }
 
             if ($oToken) {
-                $oValidTokens->addToken($aWord['word_token'], $oToken);
+                // remove any leading spaces
+                if ($aWord['word_token'][0] == ' ') {
+                    $oValidTokens->addToken(substr($aWord['word_token'], 1), $oToken);
+                } else {
+                    $oValidTokens->addToken($aWord['word_token'], $oToken);
+                }
             }
         }
     }
index 55a088d1994ab8ee9555211c4d9927d78c910d6b..c16725e2ca2dae23b4b6e7d68195dafd4297bb10 100644 (file)
@@ -95,16 +95,22 @@ $iPlaceID = (int)$sPlaceId;
 
 if (CONST_Use_US_Tiger_Data) {
     $iParentPlaceID = $oDB->getOne('SELECT parent_place_id FROM location_property_tiger WHERE place_id = '.$iPlaceID);
-    if ($iParentPlaceID) $iPlaceID = $iParentPlaceID;
+    if ($iParentPlaceID) {
+        $iPlaceID = $iParentPlaceID;
+    }
 }
 
 // interpolated house numbers
 $iParentPlaceID = $oDB->getOne('SELECT parent_place_id FROM location_property_osmline WHERE place_id = '.$iPlaceID);
-if ($iParentPlaceID) $iPlaceID = $iParentPlaceID;
+if ($iParentPlaceID) {
+    $iPlaceID = $iParentPlaceID;
+}
 
 // artificial postcodes
 $iParentPlaceID = $oDB->getOne('SELECT parent_place_id FROM location_postcode WHERE place_id = '.$iPlaceID);
-if ($iParentPlaceID) $iPlaceID = $iParentPlaceID;
+if ($iParentPlaceID) {
+    $iPlaceID = $iParentPlaceID;
+}
 
 $hLog = logStart($oDB, 'details', $_SERVER['QUERY_STRING'], $aLangPrefOrder);
 
index 737edc62985d6c795a755f40c0b7b6501efd7306..eb3705fc9401e81e5c43ded80c67da0298c49e4a 100644 (file)
@@ -35,8 +35,10 @@ if (count($aOsmIds) > CONST_Places_Max_ID_count) {
 
 foreach ($aOsmIds as $sItem) {
     // Skip empty sItem
-    if (empty($sItem)) continue;
-    
+    if (empty($sItem)) {
+        continue;
+    }
+
     $sType = $sItem[0];
     $iId = (int) substr($sItem, 1);
     if ($iId > 0 && ($sType == 'N' || $sType == 'W' || $sType == 'R')) {
@@ -48,7 +50,9 @@ foreach ($aOsmIds as $sItem) {
             // key names
             $oResult = $oPlace;
             unset($oResult['aAddress']);
-            if (isset($oPlace['aAddress'])) $oResult['address'] = $oPlace['aAddress'];
+            if (isset($oPlace['aAddress'])) {
+                $oResult['address'] = $oPlace['aAddress'];
+            }
             if ($sOutputFormat != 'geocodejson') {
                 unset($oResult['langaddress']);
                 $oResult['name'] = $oPlace['langaddress'];
@@ -71,7 +75,9 @@ foreach ($aOsmIds as $sItem) {
 }
 
 
-if (CONST_Debug) exit;
+if (CONST_Debug) {
+    exit;
+}
 
 $sXmlRootTag = 'lookupresults';
 $sQuery = join(',', $aCleanedQueryParts);
index 88b48ae8e4c22c6d915a76df3299f9fbd3c162e6..c1382dbff49b5c687375848690bf053ec761ffcb 100644 (file)
@@ -30,8 +30,12 @@ while ($iTotalBroken && empty($aPolygons)) {
         $iDays++;
     }
 
-    if ($bReduced) $aWhere[] = "errormessage like 'Area reduced%'";
-    if ($sClass) $sWhere[] = "class = '".pg_escape_string($sClass)."'";
+    if ($bReduced) {
+        $aWhere[] = "errormessage like 'Area reduced%'";
+    }
+    if ($sClass) {
+        $sWhere[] = "class = '".pg_escape_string($sClass)."'";
+    }
 
     if (!empty($aWhere)) {
         $sSQL .= ' WHERE '.join(' and ', $aWhere);
index 56064e7a9efdbdb63e607316155d640852cb1834..e553ef39a3a5616419169e89e5a9085d8c947252 100644 (file)
@@ -82,7 +82,9 @@ if (isset($_SERVER['REQUEST_SCHEME'])
     $sMoreURL = '/search.php?'.http_build_query($aMoreParams);
 }
 
-if (CONST_Debug) exit;
+if (CONST_Debug) {
+    exit;
+}
 
 $sOutputTemplate = ($sOutputFormat == 'jsonv2') ? 'json' : $sOutputFormat;
 include(CONST_LibDir.'/template/search-'.$sOutputTemplate.'.php');
index 533a920e07e5937d689be322e056167ca3135884..5626deb4b5aa6d503e9efd345086f85617ffd487 100644 (file)
@@ -103,7 +103,7 @@ class CommandlineParser:
         return 1
 
 
-##### Subcommand classes
+# Subcommand classes
 #
 # Each class needs to implement two functions: add_args() adds the CLI parameters
 # for the subfunction, run() executes the subcommand.
index a555695224403fd3b647f9601f5888f78b506bfb..b99d37b87430631fe95fcdca4d59325dabc94c71 100644 (file)
@@ -90,7 +90,7 @@ class APISearch:
         if args.query:
             params = dict(q=args.query)
         else:
-            params = {k : getattr(args, k) for k, _ in STRUCTURED_QUERY if getattr(args, k)}
+            params = {k: getattr(args, k) for k, _ in STRUCTURED_QUERY if getattr(args, k)}
 
         for param, _ in EXTRADATA_PARAMS:
             if getattr(args, param):
index ee1941875d56b8c1007cf6bea1d222672080fd22..996f48f26dbffeae3306d6b4afd13853657ea02b 100644 (file)
@@ -24,4 +24,4 @@ class NominatimArgs:
                                      main_data=self.config.TABLESPACE_PLACE_DATA,
                                      main_index=self.config.TABLESPACE_PLACE_INDEX
                                     )
-                    )
+                   )
index fbc23350c7212478f8c8b0c9fa484ec154f9f070..969998ad5063d3233882c2db8084fc70f4b13194 100644 (file)
@@ -61,7 +61,7 @@ class UpdateRefresh:
                                   args.threads or 1)
                 indexer.index_postcodes()
             else:
-                LOG.error("The place table doesn\'t exist. " \
+                LOG.error("The place table doesn't exist. "
                           "Postcode updates on a frozen database is not possible.")
 
         if args.word_counts:
index 242b0f6a0b00c80bacc05ef01b97823e6bed2dfd..4c8cd44e2a77d351015600b3112bc4dca8d9f8ca 100644 (file)
@@ -93,7 +93,7 @@ class UpdateReplication:
                       indexed_only=not args.once)
 
         # Sanity check to not overwhelm the Geofabrik servers.
-        if 'download.geofabrik.de'in params['base_url']\
+        if 'download.geofabrik.de' in params['base_url']\
            and params['update_interval'] < 86400:
             LOG.fatal("Update interval too low for download.geofabrik.de.\n"
                       "Please check install documentation "
index 72aaf0bd6b436ed78322f39ccec34feb560d542d..a8436440b9f5ca78670ba9fe9e1cc8e3979ece96 100644 (file)
@@ -68,9 +68,9 @@ class Configuration:
         """
         try:
             return int(self.__getattr__(name))
-        except ValueError:
+        except ValueError as exp:
             LOG.fatal("Invalid setting NOMINATIM_%s. Needs to be a number.", name)
-            raise UsageError("Configuration error.")
+            raise UsageError("Configuration error.") from exp
 
 
     def get_libpq_dsn(self):
index db4b89ce2fa74b6a03763dccac1f00df5f8ef22e..a86c5bdcee13eb37c4874f29be286d2b06beecc0 100644 (file)
@@ -33,18 +33,17 @@ class DeadlockHandler:
         self.ignore_sql_errors = ignore_sql_errors
 
     def __enter__(self):
-        pass
+        return self
 
     def __exit__(self, exc_type, exc_value, traceback):
         if __has_psycopg2_errors__:
             if exc_type == psycopg2.errors.DeadlockDetected: # pylint: disable=E1101
                 self.handler()
                 return True
-        else:
-            if exc_type == psycopg2.extensions.TransactionRollbackError:
-                if exc_value.pgcode == '40P01':
-                    self.handler()
-                    return True
+        elif exc_type == psycopg2.extensions.TransactionRollbackError \
+             and exc_value.pgcode == '40P01':
+            self.handler()
+            return True
 
         if self.ignore_sql_errors and isinstance(exc_value, psycopg2.Error):
             LOG.info("SQL error ignored: %s", exc_value)
@@ -86,7 +85,7 @@ class DBConnection:
 
         # Use a dict to hand in the parameters because async is a reserved
         # word in Python3.
-        self.conn = psycopg2.connect(**{'dsn' : self.dsn, 'async' : True})
+        self.conn = psycopg2.connect(**{'dsn': self.dsn, 'async': True})
         self.wait()
 
         self.cursor = self.conn.cursor(cursor_factory=cursor_factory)
@@ -191,10 +190,7 @@ class WorkerPool:
                     yield thread
 
             if command_stat > self.REOPEN_CONNECTIONS_AFTER:
-                for thread in self.threads:
-                    while not thread.is_done():
-                        thread.wait()
-                    thread.connect()
+                self._reconnect_threads()
                 ready = self.threads
                 command_stat = 0
             else:
@@ -203,6 +199,13 @@ class WorkerPool:
                 self.wait_time += time.time() - tstart
 
 
+    def _reconnect_threads(self):
+        for thread in self.threads:
+            while not thread.is_done():
+                thread.wait()
+            thread.connect()
+
+
     def __enter__(self):
         return self
 
index ac8d7c858090a725142fc9a3bf8546baf6caac8b..1319ac16ea21c6e3d9424ea3bd205dd369768b4d 100644 (file)
@@ -8,6 +8,7 @@ import os
 import psycopg2
 import psycopg2.extensions
 import psycopg2.extras
+from psycopg2 import sql as pysql
 
 from nominatim.errors import UsageError
 
@@ -25,6 +26,16 @@ class _Cursor(psycopg2.extras.DictCursor):
 
         super().execute(query, args)
 
+
+    def execute_values(self, sql, argslist, template=None):
+        """ Wrapper for the psycopg2 convenience function to execute
+            SQL for a list of values.
+        """
+        LOG.debug("SQL execute_values(%s, %s)", sql, argslist)
+
+        psycopg2.extras.execute_values(self, sql, argslist, template=template)
+
+
     def scalar(self, sql, args=None):
         """ Execute query that returns a single value. The value is returned.
             If the query yields more than one row, a ValueError is raised.
@@ -37,6 +48,22 @@ class _Cursor(psycopg2.extras.DictCursor):
         return self.fetchone()[0]
 
 
+    def drop_table(self, name, if_exists=True, cascade=False):
+        """ Drop the table with the given name.
+            Set `if_exists` to False if a non-existant table should raise
+            an exception instead of just being ignored. If 'cascade' is set
+            to True then all dependent tables are deleted as well.
+        """
+        sql = 'DROP TABLE '
+        if if_exists:
+            sql += 'IF EXISTS '
+        sql += '{}'
+        if cascade:
+            sql += ' CASCADE'
+
+        self.execute(pysql.SQL(sql).format(pysql.Identifier(name)))
+
+
 class _Connection(psycopg2.extensions.connection):
     """ A connection that provides the specialised cursor by default and
         adds convenience functions for administrating the database.
@@ -75,14 +102,13 @@ class _Connection(psycopg2.extensions.connection):
         return True
 
 
-    def drop_table(self, name, if_exists=True):
+    def drop_table(self, name, if_exists=True, cascade=False):
         """ Drop the table with the given name.
             Set `if_exists` to False if a non-existant table should raise
             an exception instead of just being ignored.
         """
         with self.cursor() as cur:
-            cur.execute("""DROP TABLE {} "{}"
-                        """.format('IF EXISTS' if if_exists else '', name))
+            cur.drop_table(name, if_exists, cascade)
         self.commit()
 
 
index dafc5de434bb3bf69a69014d7d7e20a2059f9313..d756a215618d316499af1261c4f48a35c801b20c 100644 (file)
@@ -61,7 +61,7 @@ def _setup_postgresql_features(conn):
     """
     pg_version = conn.server_version_tuple()
     return {
-        'has_index_non_key_column' : pg_version >= (11, 0, 0)
+        'has_index_non_key_column': pg_version >= (11, 0, 0)
     }
 
 class SQLPreprocessor:
index 4d4305e7d67ff74c93119bbc67ef4acfa7036e2c..9a4a41a581661ced3048797b7ae1ff98d613dad0 100644 (file)
@@ -61,9 +61,9 @@ def execute_file(dsn, fname, ignore_errors=False, pre_code=None, post_code=None)
 
 
 # List of characters that need to be quoted for the copy command.
-_SQL_TRANSLATION = {ord(u'\\') : u'\\\\',
-                    ord(u'\t') : u'\\t',
-                    ord(u'\n') : u'\\n'}
+_SQL_TRANSLATION = {ord(u'\\'): u'\\\\',
+                    ord(u'\t'): u'\\t',
+                    ord(u'\n'): u'\\n'}
 
 class CopyBuffer:
     """ Data collector for the copy_from command.
index 5ab0eac3dca5701562085ffecdce652e110f9cfd..d0cfb391c4dbdf7a63c875af6ec1b2d98ca88d0c 100644 (file)
@@ -92,7 +92,7 @@ class Indexer:
 
 
     def index_full(self, analyse=True):
-        """ Index the complete database. This will first index boudnaries
+        """ Index the complete database. This will first index boundaries
             followed by all other objects. When `analyse` is True, then the
             database will be analysed at the appropriate places to
             ensure that database statistics are updated.
@@ -100,13 +100,10 @@ class Indexer:
         with connect(self.dsn) as conn:
             conn.autocommit = True
 
-            if analyse:
-                def _analyze():
+            def _analyze():
+                if analyse:
                     with conn.cursor() as cur:
                         cur.execute('ANALYZE')
-            else:
-                def _analyze():
-                    pass
 
             self.index_by_rank(0, 4)
             _analyze()
@@ -206,7 +203,7 @@ class Indexer:
 
                                 # And insert the curent batch
                                 for idx in range(0, len(places), batch):
-                                    part = places[idx:idx+batch]
+                                    part = places[idx:idx + batch]
                                     LOG.debug("Processing places: %s", str(part))
                                     runner.index_places(pool.next_free_worker(), part)
                                     progress.add(len(part))
index 177e67b812aef0ea05116928c214ed5434f5a622..634b1fae703670d838bf1633a2d222b427dd43fa 100644 (file)
@@ -63,7 +63,7 @@ class ProgressLogger:
             places_per_sec = self.done_places
         else:
             diff_seconds = (rank_end_time - self.rank_start_time).total_seconds()
-            places_per_sec = self.done_places/diff_seconds
+            places_per_sec = self.done_places / diff_seconds
 
         LOG.warning("Done %d/%d in %d @ %.3f per second - FINISHED %s\n",
                     self.done_places, self.total_places, int(diff_seconds),
index aa607faae3f3d48988ebddd738a49c90ba4bb607..068d7d0fa63edba9a45fe7aef9195d06d164996a 100644 (file)
@@ -5,13 +5,17 @@ tasks.
 import functools
 
 import psycopg2.extras
+from psycopg2 import sql as pysql
 
 # pylint: disable=C0111
 
+def _mk_valuelist(template, num):
+    return pysql.SQL(',').join([pysql.SQL(template)] * num)
+
 class AbstractPlacexRunner:
     """ Returns SQL commands for indexing of the placex table.
     """
-    SELECT_SQL = 'SELECT place_id FROM placex'
+    SELECT_SQL = pysql.SQL('SELECT place_id FROM placex ')
 
     def __init__(self, rank, analyzer):
         self.rank = rank
@@ -21,11 +25,12 @@ class AbstractPlacexRunner:
     @staticmethod
     @functools.lru_cache(maxsize=1)
     def _index_sql(num_places):
-        return """ UPDATE placex
-                   SET indexed_status = 0, address = v.addr, token_info = v.ti
-                   FROM (VALUES {}) as v(id, addr, ti)
-                   WHERE place_id = v.id
-               """.format(','.join(["(%s, %s::hstore, %s::jsonb)"]  * num_places))
+        return pysql.SQL(
+            """ UPDATE placex
+                SET indexed_status = 0, address = v.addr, token_info = v.ti
+                FROM (VALUES {}) as v(id, addr, ti)
+                WHERE place_id = v.id
+            """).format(_mk_valuelist("(%s, %s::hstore, %s::jsonb)", num_places))
 
 
     @staticmethod
@@ -52,14 +57,15 @@ class RankRunner(AbstractPlacexRunner):
         return "rank {}".format(self.rank)
 
     def sql_count_objects(self):
-        return """SELECT count(*) FROM placex
-                  WHERE rank_address = {} and indexed_status > 0
-               """.format(self.rank)
+        return pysql.SQL("""SELECT count(*) FROM placex
+                            WHERE rank_address = {} and indexed_status > 0
+                         """).format(pysql.Literal(self.rank))
 
     def sql_get_objects(self):
-        return """{} WHERE indexed_status > 0 and rank_address = {}
-                     ORDER BY geometry_sector
-               """.format(self.SELECT_SQL, self.rank)
+        return self.SELECT_SQL + pysql.SQL(
+            """WHERE indexed_status > 0 and rank_address = {}
+               ORDER BY geometry_sector
+            """).format(pysql.Literal(self.rank))
 
 
 class BoundaryRunner(AbstractPlacexRunner):
@@ -71,17 +77,18 @@ class BoundaryRunner(AbstractPlacexRunner):
         return "boundaries rank {}".format(self.rank)
 
     def sql_count_objects(self):
-        return """SELECT count(*) FROM placex
-                  WHERE indexed_status > 0
-                    AND rank_search = {}
-                    AND class = 'boundary' and type = 'administrative'
-               """.format(self.rank)
+        return pysql.SQL("""SELECT count(*) FROM placex
+                            WHERE indexed_status > 0
+                              AND rank_search = {}
+                              AND class = 'boundary' and type = 'administrative'
+                         """).format(pysql.Literal(self.rank))
 
     def sql_get_objects(self):
-        return """{} WHERE indexed_status > 0 and rank_search = {}
-                           and class = 'boundary' and type = 'administrative'
-                     ORDER BY partition, admin_level
-               """.format(self.SELECT_SQL, self.rank)
+        return self.SELECT_SQL + pysql.SQL(
+            """WHERE indexed_status > 0 and rank_search = {}
+                     and class = 'boundary' and type = 'administrative'
+               ORDER BY partition, admin_level
+            """).format(pysql.Literal(self.rank))
 
 
 class InterpolationRunner:
@@ -120,11 +127,11 @@ class InterpolationRunner:
     @staticmethod
     @functools.lru_cache(maxsize=1)
     def _index_sql(num_places):
-        return """ UPDATE location_property_osmline
-                   SET indexed_status = 0, address = v.addr, token_info = v.ti
-                   FROM (VALUES {}) as v(id, addr, ti)
-                   WHERE place_id = v.id
-               """.format(','.join(["(%s, %s::hstore, %s::jsonb)"]  * num_places))
+        return pysql.SQL("""UPDATE location_property_osmline
+                            SET indexed_status = 0, address = v.addr, token_info = v.ti
+                            FROM (VALUES {}) as v(id, addr, ti)
+                            WHERE place_id = v.id
+                         """).format(_mk_valuelist("(%s, %s::hstore, %s::jsonb)", num_places))
 
 
     def index_places(self, worker, places):
@@ -157,6 +164,6 @@ class PostcodeRunner:
 
     @staticmethod
     def index_places(worker, ids):
-        worker.perform(""" UPDATE location_postcode SET indexed_status = 0
-                           WHERE place_id IN ({})
-                       """.format(','.join((str(i[0]) for i in ids))))
+        worker.perform(pysql.SQL("""UPDATE location_postcode SET indexed_status = 0
+                                    WHERE place_id IN ({})""")
+                       .format(pysql.SQL(',').join((pysql.Literal(i[0]) for i in ids))))
index 28719df1ee7146b01591b5a1b4cf384275b7ee1f..93d2b0ffa26b9151ccba1928c0e7d0745ce4380a 100644 (file)
@@ -119,18 +119,22 @@ class ICUNameProcessor:
                 pos += 1
                 force_space = False
 
-        results = set()
-
+        # No variants detected? Fast return.
         if startpos == 0:
             trans_name = self.to_ascii.transliterate(norm_name).strip()
+            return [trans_name] if trans_name else []
+
+        return self._compute_result_set(partials, baseform[startpos:])
+
+
+    def _compute_result_set(self, partials, prefix):
+        results = set()
+
+        for variant in partials:
+            vname = variant + prefix
+            trans_name = self.to_ascii.transliterate(vname[1:-1]).strip()
             if trans_name:
                 results.add(trans_name)
-        else:
-            for variant in partials:
-                name = variant + baseform[startpos:]
-                trans_name = self.to_ascii.transliterate(name[1:-1]).strip()
-                if trans_name:
-                    results.add(trans_name)
 
         return list(results)
 
index 5148f3e2decff2b7027665a06019969e1409fb94..9ebe36849e50efbcc8896794bc3c983e65320da5 100644 (file)
@@ -7,12 +7,11 @@ import json
 _ICU_VARIANT_PORPERTY_FIELDS = ['lang']
 
 
-class ICUVariantProperties(namedtuple('_ICUVariantProperties', _ICU_VARIANT_PORPERTY_FIELDS,
-                                      defaults=(None, )*len(_ICU_VARIANT_PORPERTY_FIELDS))):
+class ICUVariantProperties(namedtuple('_ICUVariantProperties', _ICU_VARIANT_PORPERTY_FIELDS)):
     """ Data container for saving properties that describe when a variant
         should be applied.
 
-        Porperty instances are hashable.
+        Property instances are hashable.
     """
     @classmethod
     def from_rules(cls, _):
@@ -52,7 +51,7 @@ def unpickle_variant_set(variant_string):
     """
     data = json.loads(variant_string)
 
-    properties = {int(k): ICUVariantProperties(**v) for k, v in data['properties'].items()}
-    print(properties)
+    properties = {int(k): ICUVariantProperties.from_rules(v)
+                  for k, v in data['properties'].items()}
 
     return set((ICUVariant(src, repl, properties[pid]) for src, repl, pid in data['variants']))
index c585c5afe0bf28bfa24590ed05cb165f6fd2dd01..6d3d11c163eed81995b8c2c7c71f7870de5395ec 100644 (file)
@@ -9,8 +9,6 @@ import re
 from textwrap import dedent
 from pathlib import Path
 
-import psycopg2.extras
-
 from nominatim.db.connection import connect
 from nominatim.db.properties import set_property, get_property
 from nominatim.db.utils import CopyBuffer
@@ -341,7 +339,7 @@ class LegacyICUNameAnalyzer:
                 term = self.name_processor.get_search_normalized(word)
                 if term:
                     copystr.add(word, ' ' + term, cls, typ,
-                                oper if oper in ('in', 'near')  else None, 0)
+                                oper if oper in ('in', 'near') else None, 0)
                     added += 1
 
             copystr.copy_out(cursor, 'word',
@@ -359,8 +357,7 @@ class LegacyICUNameAnalyzer:
         to_delete = existing_phrases - new_phrases
 
         if to_delete:
-            psycopg2.extras.execute_values(
-                cursor,
+            cursor.execute_values(
                 """ DELETE FROM word USING (VALUES %s) as v(name, in_class, in_type, op)
                     WHERE word = name and class = in_class and type = in_type
                           and ((op = '-' and operator is null) or op = operator)""",
@@ -386,9 +383,9 @@ class LegacyICUNameAnalyzer:
             if word_tokens:
                 cur.execute("""INSERT INTO word (word_id, word_token, country_code,
                                                  search_name_count)
-                               (SELECT nextval('seq_word'), token, '{}', 0
+                               (SELECT nextval('seq_word'), token, %s, 0
                                 FROM unnest(%s) as token)
-                            """.format(country_code), (list(word_tokens),))
+                            """, (country_code, list(word_tokens)))
 
 
     def process_place(self, place):
@@ -411,33 +408,36 @@ class LegacyICUNameAnalyzer:
                 self.add_country_names(country_feature.lower(), names)
 
         address = place.get('address')
-
         if address:
-            hnrs = []
-            addr_terms = []
-            for key, value in address.items():
-                if key == 'postcode':
-                    self._add_postcode(value)
-                elif key in ('housenumber', 'streetnumber', 'conscriptionnumber'):
-                    hnrs.append(value)
-                elif key == 'street':
-                    token_info.add_street(*self._compute_name_tokens({'name': value}))
-                elif key == 'place':
-                    token_info.add_place(*self._compute_name_tokens({'name': value}))
-                elif not key.startswith('_') and \
-                     key not in ('country', 'full'):
-                    addr_terms.append((key, *self._compute_name_tokens({'name': value})))
-
-            if hnrs:
-                hnrs = self._split_housenumbers(hnrs)
-                token_info.add_housenumbers(self.conn, [self._make_standard_hnr(n) for n in hnrs])
-
-            if addr_terms:
-                token_info.add_address_terms(addr_terms)
+            self._process_place_address(token_info, address)
 
         return token_info.data
 
 
+    def _process_place_address(self, token_info, address):
+        hnrs = []
+        addr_terms = []
+        for key, value in address.items():
+            if key == 'postcode':
+                self._add_postcode(value)
+            elif key in ('housenumber', 'streetnumber', 'conscriptionnumber'):
+                hnrs.append(value)
+            elif key == 'street':
+                token_info.add_street(*self._compute_name_tokens({'name': value}))
+            elif key == 'place':
+                token_info.add_place(*self._compute_name_tokens({'name': value}))
+            elif not key.startswith('_') and \
+                 key not in ('country', 'full'):
+                addr_terms.append((key, *self._compute_name_tokens({'name': value})))
+
+        if hnrs:
+            hnrs = self._split_housenumbers(hnrs)
+            token_info.add_housenumbers(self.conn, [self._make_standard_hnr(n) for n in hnrs])
+
+        if addr_terms:
+            token_info.add_address_terms(addr_terms)
+
+
     def _compute_name_tokens(self, names):
         """ Computes the full name and partial name tokens for the given
             dictionary of names.
index 6040f88f154bad6eb25e81c9498017d914db3e88..c19dce2f5a2a3c0d903cd13bca7bd6e3738a8008 100644 (file)
@@ -370,8 +370,7 @@ class LegacyNameAnalyzer:
             to_delete = existing_phrases - norm_phrases
 
             if to_add:
-                psycopg2.extras.execute_values(
-                    cur,
+                cur.execute_values(
                     """ INSERT INTO word (word_id, word_token, word, class, type,
                                           search_name_count, operator)
                         (SELECT nextval('seq_word'), ' ' || make_standard_name(name), name,
@@ -381,8 +380,7 @@ class LegacyNameAnalyzer:
                     to_add)
 
             if to_delete and should_replace:
-                psycopg2.extras.execute_values(
-                    cur,
+                cur.execute_values(
                     """ DELETE FROM word USING (VALUES %s) as v(name, in_class, in_type, op)
                         WHERE word = name and class = in_class and type = in_type
                               and ((op = '-' and operator is null) or op = operator)""",
@@ -424,37 +422,37 @@ class LegacyNameAnalyzer:
                 self.add_country_names(country_feature.lower(), names)
 
         address = place.get('address')
-
         if address:
-            hnrs = []
-            addr_terms = []
-            for key, value in address.items():
-                if key == 'postcode':
-                    self._add_postcode(value)
-                elif key in ('housenumber', 'streetnumber', 'conscriptionnumber'):
-                    hnrs.append(value)
-                elif key == 'street':
-                    token_info.add_street(self.conn, value)
-                elif key == 'place':
-                    token_info.add_place(self.conn, value)
-                elif not key.startswith('_') and \
-                     key not in ('country', 'full'):
-                    addr_terms.append((key, value))
-
-            if hnrs:
-                token_info.add_housenumbers(self.conn, hnrs)
-
-            if addr_terms:
-                token_info.add_address_terms(self.conn, addr_terms)
+            self._process_place_address(token_info, address)
 
         return token_info.data
 
 
-    def _add_postcode(self, postcode):
-        """ Make sure the normalized postcode is present in the word table.
-        """
-        if re.search(r'[:,;]', postcode) is None:
-            self._cache.add_postcode(self.conn, self.normalize_postcode(postcode))
+    def _process_place_address(self, token_info, address):
+        hnrs = []
+        addr_terms = []
+
+        for key, value in address.items():
+            if key == 'postcode':
+                # Make sure the normalized postcode is present in the word table.
+                if re.search(r'[:,;]', value) is None:
+                    self._cache.add_postcode(self.conn,
+                                             self.normalize_postcode(value))
+            elif key in ('housenumber', 'streetnumber', 'conscriptionnumber'):
+                hnrs.append(value)
+            elif key == 'street':
+                token_info.add_street(self.conn, value)
+            elif key == 'place':
+                token_info.add_place(self.conn, value)
+            elif not key.startswith('_') and key not in ('country', 'full'):
+                addr_terms.append((key, value))
+
+        if hnrs:
+            token_info.add_housenumbers(self.conn, hnrs)
+
+        if addr_terms:
+            token_info.add_address_terms(self.conn, addr_terms)
+
 
 
 class _TokenInfo:
@@ -582,7 +580,7 @@ class _TokenCache:
         with conn.cursor() as cur:
             cur.execute("""SELECT i, ARRAY[getorcreate_housenumber_id(i::text)]::text
                            FROM generate_series(1, 100) as i""")
-            self._cached_housenumbers = {str(r[0]) : r[1] for r in cur}
+            self._cached_housenumbers = {str(r[0]): r[1] for r in cur}
 
         # For postcodes remember the ones that have already been added
         self.postcodes = set()
index d4f793b46334c27f304fbfe898268a4f23f564a9..d116554fea20f6e9b5e261adc2a48b0434fa5531 100644 (file)
@@ -24,6 +24,7 @@ def _check(hint=None):
     """
     def decorator(func):
         title = func.__doc__.split('\n', 1)[0].strip()
+
         def run_check(conn, config):
             print(title, end=' ... ')
             ret = func(conn, config)
@@ -98,13 +99,12 @@ def _get_indexes(conn):
     if conn.table_exists('place'):
         indexes.extend(('idx_placex_pendingsector',
                         'idx_location_area_country_place_id',
-                        'idx_place_osm_unique'
-                       ))
+                        'idx_place_osm_unique'))
 
     return indexes
 
 
-### CHECK FUNCTIONS
+# CHECK FUNCTIONS
 #
 # Functions are exectured in the order they appear here.
 
index efbf2ec80c0c771dbea2e71390c5ba184b782280..a4d7220fb8c73bcf7f38f1370c028122fb020b39 100644 (file)
@@ -9,6 +9,7 @@ from pathlib import Path
 
 import psutil
 import psycopg2.extras
+from psycopg2 import sql as pysql
 
 from nominatim.db.connection import connect, get_pg_env
 from nominatim.db import utils as db_utils
@@ -130,9 +131,8 @@ def import_osm_data(osm_file, options, drop=False, ignore_errors=False):
         if drop:
             conn.drop_table('planet_osm_nodes')
 
-    if drop:
-        if options['flatnode_file']:
-            Path(options['flatnode_file']).unlink()
+    if drop and options['flatnode_file']:
+        Path(options['flatnode_file']).unlink()
 
 
 def create_tables(conn, config, reverse_only=False):
@@ -185,7 +185,12 @@ def truncate_data_tables(conn):
 
     conn.commit()
 
-_COPY_COLUMNS = 'osm_type, osm_id, class, type, name, admin_level, address, extratags, geometry'
+
+_COPY_COLUMNS = pysql.SQL(',').join(map(pysql.Identifier,
+                                        ('osm_type', 'osm_id', 'class', 'type',
+                                         'name', 'admin_level', 'address',
+                                         'extratags', 'geometry')))
+
 
 def load_data(dsn, threads):
     """ Copy data into the word and placex table.
@@ -196,12 +201,15 @@ def load_data(dsn, threads):
     for imod in range(place_threads):
         conn = DBConnection(dsn)
         conn.connect()
-        conn.perform("""INSERT INTO placex ({0})
-                         SELECT {0} FROM place
-                         WHERE osm_id % {1} = {2}
-                           AND NOT (class='place' and (type='houses' or type='postcode'))
-                           AND ST_IsValid(geometry)
-                     """.format(_COPY_COLUMNS, place_threads, imod))
+        conn.perform(
+            pysql.SQL("""INSERT INTO placex ({columns})
+                           SELECT {columns} FROM place
+                           WHERE osm_id % {total} = {mod}
+                             AND NOT (class='place' and (type='houses' or type='postcode'))
+                             AND ST_IsValid(geometry)
+                      """).format(columns=_COPY_COLUMNS,
+                                  total=pysql.Literal(place_threads),
+                                  mod=pysql.Literal(imod)))
         sel.register(conn, selectors.EVENT_READ, conn)
 
     # Address interpolations go into another table.
@@ -251,6 +259,7 @@ def create_search_indices(conn, config, drop=False):
 
     sql.run_sql_file(conn, 'indices.sql', drop=drop)
 
+
 def create_country_names(conn, tokenizer, languages=None):
     """ Add default country names to search index. `languages` is a comma-
         separated list of language codes as used in OSM. If `languages` is not
@@ -262,8 +271,7 @@ def create_country_names(conn, tokenizer, languages=None):
 
     def _include_key(key):
         return key == 'name' or \
-               (key.startswith('name:') \
-                and (not languages or key[5:] in languages))
+               (key.startswith('name:') and (not languages or key[5:] in languages))
 
     with conn.cursor() as cur:
         psycopg2.extras.register_hstore(cur)
@@ -272,7 +280,7 @@ def create_country_names(conn, tokenizer, languages=None):
 
         with tokenizer.name_analyzer() as analyzer:
             for code, name in cur:
-                names = {'countrycode' : code}
+                names = {'countrycode': code}
                 if code == 'gb':
                     names['short_name'] = 'UK'
                 if code == 'us':
index 9888d96a73e83ca35abe90a01ebc13bc1eec0df9..72d252b7a83abdeb434912729bd595cfb10b3f7a 100644 (file)
@@ -70,7 +70,9 @@ def run_api_script(endpoint, project_dir, extra_env=None, phpcgi_bin=None,
     else:
         cmd = [str(phpcgi_bin)]
 
-    proc = subprocess.run(cmd, cwd=str(project_dir), env=env, capture_output=True,
+    proc = subprocess.run(cmd, cwd=str(project_dir), env=env,
+                          stdout=subprocess.PIPE,
+                          stderr=subprocess.PIPE,
                           check=False)
 
     if proc.returncode != 0 or proc.stderr:
@@ -134,11 +136,11 @@ def run_osm2pgsql(options):
 def get_url(url):
     """ Get the contents from the given URL and return it as a UTF-8 string.
     """
-    headers = {"User-Agent" : "Nominatim/{0[0]}.{0[1]}.{0[2]}-{0[3]}".format(NOMINATIM_VERSION)}
+    headers = {"User-Agent": "Nominatim/{0[0]}.{0[1]}.{0[2]}-{0[3]}".format(NOMINATIM_VERSION)}
 
     try:
         with urlrequest.urlopen(urlrequest.Request(url, headers=headers)) as response:
             return response.read().decode('utf-8')
-    except:
+    except Exception:
         LOG.fatal('Failed to load URL: %s', url)
         raise
index cc1bf97e09a92556cc1cc79cd47cfe03e605181c..a182fc8b32aa7c8ec05427d2d95587a4b567bb78 100644 (file)
@@ -3,6 +3,8 @@ Functions for removing unnecessary data from the database.
 """
 from pathlib import Path
 
+from psycopg2 import sql as pysql
+
 UPDATE_TABLES = [
     'address_levels',
     'gb_postcode',
@@ -21,15 +23,15 @@ def drop_update_tables(conn):
     """ Drop all tables only necessary for updating the database from
         OSM replication data.
     """
-
-    where = ' or '.join(["(tablename LIKE '{}')".format(t) for t in UPDATE_TABLES])
+    parts = (pysql.SQL("(tablename LIKE {})").format(pysql.Literal(t)) for t in UPDATE_TABLES)
 
     with conn.cursor() as cur:
-        cur.execute("SELECT tablename FROM pg_tables WHERE " + where)
+        cur.execute(pysql.SQL("SELECT tablename FROM pg_tables WHERE ")
+                    + pysql.SQL(' or ').join(parts))
         tables = [r[0] for r in cur]
 
         for table in tables:
-            cur.execute('DROP TABLE IF EXISTS "{}" CASCADE'.format(table))
+            cur.drop_table(table, cascade=True)
 
     conn.commit()
 
index de1e51013ffa00e4528a12318327343e38ccd11b..d7faca31f1f431b61383825ad105bee4163a6a07 100644 (file)
@@ -142,7 +142,8 @@ def change_housenumber_transliteration(conn, **_):
                        BEGIN
                          SELECT array_to_string(array_agg(trans), ';')
                            INTO normtext
-                           FROM (SELECT lookup_word as trans, getorcreate_housenumber_id(lookup_word)
+                           FROM (SELECT lookup_word as trans,
+                                        getorcreate_housenumber_id(lookup_word)
                                  FROM (SELECT make_standard_name(h) as lookup_word
                                        FROM regexp_split_to_table(housenumber, '[,;]') h) x) y;
                          return normtext;
index cfd242e24d8d358cf48f77f2930e8a1a7171ef03..d00fc97a8dbb39180b2ca5dae92784a7291bd40e 100644 (file)
@@ -7,7 +7,7 @@ import gzip
 import logging
 from math import isfinite
 
-from psycopg2.extras import execute_values
+from psycopg2 import sql as pysql
 
 from nominatim.db.connection import connect
 
@@ -52,27 +52,26 @@ class _CountryPostcodesCollector:
 
         with conn.cursor() as cur:
             if to_add:
-                execute_values(cur,
-                               """INSERT INTO location_postcode
-                                      (place_id, indexed_status, country_code,
-                                       postcode, geometry) VALUES %s""",
-                               to_add,
-                               template="""(nextval('seq_place'), 1, '{}',
-                                           %s, 'SRID=4326;POINT(%s %s)')
-                                        """.format(self.country))
+                cur.execute_values(
+                    """INSERT INTO location_postcode
+                         (place_id, indexed_status, country_code,
+                          postcode, geometry) VALUES %s""",
+                    to_add,
+                    template=pysql.SQL("""(nextval('seq_place'), 1, {},
+                                          %s, 'SRID=4326;POINT(%s %s)')
+                                       """).format(pysql.Literal(self.country)))
             if to_delete:
                 cur.execute("""DELETE FROM location_postcode
                                WHERE country_code = %s and postcode = any(%s)
                             """, (self.country, to_delete))
             if to_update:
-                execute_values(cur,
-                               """UPDATE location_postcode
-                                  SET indexed_status = 2,
-                                      geometry = ST_SetSRID(ST_Point(v.x, v.y), 4326)
-                                  FROM (VALUES %s) AS v (pc, x, y)
-                                  WHERE country_code = '{}' and postcode = pc
-                               """.format(self.country),
-                               to_update)
+                cur.execute_values(
+                    pysql.SQL("""UPDATE location_postcode
+                                 SET indexed_status = 2,
+                                     geometry = ST_SetSRID(ST_Point(v.x, v.y), 4326)
+                                 FROM (VALUES %s) AS v (pc, x, y)
+                                 WHERE country_code = {} and postcode = pc
+                              """).format(pysql.Literal(self.country)), to_update)
 
 
     def _compute_changes(self, conn):
@@ -165,11 +164,14 @@ def update_postcodes(dsn, project_dir, tokenizer):
             with conn.cursor(name="placex_postcodes") as cur:
                 cur.execute("""
                 SELECT cc as country_code, pc, ST_X(centroid), ST_Y(centroid)
-                FROM (SELECT 
-                        COALESCE(plx.country_code, get_country_code(ST_Centroid(pl.geometry))) as cc,
+                FROM (SELECT
+                        COALESCE(plx.country_code,
+                                 get_country_code(ST_Centroid(pl.geometry))) as cc,
                         token_normalized_postcode(pl.address->'postcode') as pc,
-                        ST_Centroid(ST_Collect(COALESCE(plx.centroid, ST_Centroid(pl.geometry)))) as centroid 
-                        FROM place AS pl LEFT OUTER JOIN placex AS plx ON pl.osm_id = plx.osm_id AND pl.osm_type = plx.osm_type
+                        ST_Centroid(ST_Collect(COALESCE(plx.centroid,
+                                                        ST_Centroid(pl.geometry)))) as centroid
+                      FROM place AS pl LEFT OUTER JOIN placex AS plx
+                             ON pl.osm_id = plx.osm_id AND pl.osm_type = plx.osm_type
                     WHERE pl.address ? 'postcode' AND pl.geometry IS NOT null
                     GROUP BY cc, pc) xx
                 WHERE pc IS NOT null AND cc IS NOT null
index 25a97127896076c08242cafac7a5365a7b5c520e..5aaee0c8d1d8417a5a88c4b7a317a2d2f37c4467 100644 (file)
@@ -5,7 +5,7 @@ import json
 import logging
 from textwrap import dedent
 
-from psycopg2.extras import execute_values
+from psycopg2 import sql as pysql
 
 from nominatim.db.utils import execute_file
 from nominatim.db.sql_preprocessor import SQLPreprocessor
@@ -49,7 +49,7 @@ def load_address_levels(conn, table, levels):
         _add_address_level_rows_from_entry(rows, entry)
 
     with conn.cursor() as cur:
-        cur.execute('DROP TABLE IF EXISTS {}'.format(table))
+        cur.drop_table(table)
 
         cur.execute("""CREATE TABLE {} (country_code varchar(2),
                                         class TEXT,
@@ -57,7 +57,8 @@ def load_address_levels(conn, table, levels):
                                         rank_search SMALLINT,
                                         rank_address SMALLINT)""".format(table))
 
-        execute_values(cur, "INSERT INTO {} VALUES %s".format(table), rows)
+        cur.execute_values(pysql.SQL("INSERT INTO {} VALUES %s")
+                           .format(pysql.Identifier(table)), rows)
 
         cur.execute('CREATE UNIQUE INDEX ON {} (country_code, class, type)'.format(table))
 
@@ -155,6 +156,20 @@ def recompute_importance(conn):
     conn.commit()
 
 
+def _quote_php_variable(var_type, config, conf_name):
+    if var_type == bool:
+        return 'true' if config.get_bool(conf_name) else 'false'
+
+    if var_type == int:
+        return getattr(config, conf_name)
+
+    if not getattr(config, conf_name):
+        return 'false'
+
+    quoted = getattr(config, conf_name).replace("'", "\\'")
+    return f"'{quoted}'"
+
+
 def setup_website(basedir, config, conn):
     """ Create the website script stubs.
     """
@@ -174,18 +189,11 @@ def setup_website(basedir, config, conn):
                                  config.project_dir / 'tokenizer'))
 
     for php_name, conf_name, var_type in PHP_CONST_DEFS:
-        if var_type == bool:
-            varout = 'true' if config.get_bool(conf_name) else 'false'
-        elif var_type == int:
-            varout = getattr(config, conf_name)
-        elif not getattr(config, conf_name):
-            varout = 'false'
-        else:
-            varout = "'{}'".format(getattr(config, conf_name).replace("'", "\\'"))
+        varout = _quote_php_variable(var_type, config, conf_name)
 
-        template += "@define('CONST_{}', {});\n".format(php_name, varout)
+        template += f"@define('CONST_{php_name}', {varout});\n"
 
-    template += "\nrequire_once('{}/website/{{}}');\n".format(config.lib_dir.php)
+    template += f"\nrequire_once('{config.lib_dir.php}/website/{{}}');\n"
 
     search_name_table_exists = bool(conn and conn.table_exists('search_name'))
 
index 48764518e6e267088834ff46f68b1222cfbc739d..791f4dc323eb61f4659b34c43af624ed37c4c1e9 100644 (file)
@@ -20,6 +20,12 @@ from nominatim.errors import UsageError
 from nominatim.tools.special_phrases.importer_statistics import SpecialPhrasesImporterStatistics
 
 LOG = logging.getLogger()
+
+def _classtype_table(phrase_class, phrase_type):
+    """ Return the name of the table for the given class and type.
+    """
+    return f'place_classtype_{phrase_class}_{phrase_type}'
+
 class SPImporter():
     # pylint: disable-msg=too-many-instance-attributes
     """
@@ -38,8 +44,8 @@ class SPImporter():
         # This set will contain all existing phrases to be added.
         # It contains tuples with the following format: (lable, class, type, operator)
         self.word_phrases = set()
-        #This set will contain all existing place_classtype tables which doesn't match any
-        #special phrases class/type on the wiki.
+        # This set will contain all existing place_classtype tables which doesn't match any
+        # special phrases class/type on the wiki.
         self.table_phrases_to_delete = set()
 
     def import_phrases(self, tokenizer, should_replace):
@@ -54,7 +60,7 @@ class SPImporter():
         LOG.warning('Special phrases importation starting')
         self._fetch_existing_place_classtype_tables()
 
-        #Store pairs of class/type for further processing
+        # Store pairs of class/type for further processing
         class_type_pairs = set()
 
         for loaded_phrases in self.sp_loader:
@@ -125,17 +131,17 @@ class SPImporter():
             Return the class/type pair corresponding to the phrase.
         """
 
-        #blacklisting: disallow certain class/type combinations
+        # blacklisting: disallow certain class/type combinations
         if phrase.p_class in self.black_list.keys() \
            and phrase.p_type in self.black_list[phrase.p_class]:
             return None
 
-        #whitelisting: if class is in whitelist, allow only tags in the list
+        # whitelisting: if class is in whitelist, allow only tags in the list
         if phrase.p_class in self.white_list.keys() \
            and phrase.p_type not in self.white_list[phrase.p_class]:
             return None
 
-        #sanity check, in case somebody added garbage in the wiki
+        # sanity check, in case somebody added garbage in the wiki
         if not self._check_sanity(phrase):
             self.statistics_handler.notify_one_phrase_invalid()
             return None
@@ -155,7 +161,7 @@ class SPImporter():
 
         sql_tablespace = self.config.TABLESPACE_AUX_DATA
         if sql_tablespace:
-            sql_tablespace = ' TABLESPACE '+sql_tablespace
+            sql_tablespace = ' TABLESPACE ' + sql_tablespace
 
         with self.db_connection.cursor() as db_cursor:
             db_cursor.execute("CREATE INDEX idx_placex_classtype ON placex (class, type)")
@@ -164,23 +170,23 @@ class SPImporter():
             phrase_class = pair[0]
             phrase_type = pair[1]
 
-            table_name = 'place_classtype_{}_{}'.format(phrase_class, phrase_type)
+            table_name = _classtype_table(phrase_class, phrase_type)
 
             if table_name in self.table_phrases_to_delete:
                 self.statistics_handler.notify_one_table_ignored()
-                #Remove this table from the ones to delete as it match a class/type
-                #still existing on the special phrases of the wiki.
+                # Remove this table from the ones to delete as it match a
+                # class/type still existing on the special phrases of the wiki.
                 self.table_phrases_to_delete.remove(table_name)
-                #So dont need to create the table and indexes.
+                # So don't need to create the table and indexes.
                 continue
 
-            #Table creation
+            # Table creation
             self._create_place_classtype_table(sql_tablespace, phrase_class, phrase_type)
 
-            #Indexes creation
+            # Indexes creation
             self._create_place_classtype_indexes(sql_tablespace, phrase_class, phrase_type)
 
-            #Grant access on read to the web user.
+            # Grant access on read to the web user.
             self._grant_access_to_webuser(phrase_class, phrase_type)
 
             self.statistics_handler.notify_one_table_created()
@@ -193,11 +199,11 @@ class SPImporter():
         """
             Create table place_classtype of the given phrase_class/phrase_type if doesn't exit.
         """
-        table_name = 'place_classtype_{}_{}'.format(phrase_class, phrase_type)
+        table_name = _classtype_table(phrase_class, phrase_type)
         with self.db_connection.cursor() as db_cursor:
             db_cursor.execute(SQL("""
-                    CREATE TABLE IF NOT EXISTS {{}} {} 
-                    AS SELECT place_id AS place_id,st_centroid(geometry) AS centroid FROM placex 
+                    CREATE TABLE IF NOT EXISTS {{}} {}
+                    AS SELECT place_id AS place_id,st_centroid(geometry) AS centroid FROM placex
                     WHERE class = {{}} AND type = {{}}""".format(sql_tablespace))
                               .format(Identifier(table_name), Literal(phrase_class),
                                       Literal(phrase_type)))
@@ -208,8 +214,8 @@ class SPImporter():
             Create indexes on centroid and place_id for the place_classtype table.
         """
         index_prefix = 'idx_place_classtype_{}_{}_'.format(phrase_class, phrase_type)
-        base_table = 'place_classtype_{}_{}'.format(phrase_class, phrase_type)
-        #Index on centroid
+        base_table = _classtype_table(phrase_class, phrase_type)
+        # Index on centroid
         if not self.db_connection.index_exists(index_prefix + 'centroid'):
             with self.db_connection.cursor() as db_cursor:
                 db_cursor.execute(SQL("""
@@ -217,7 +223,7 @@ class SPImporter():
                                   .format(Identifier(index_prefix + 'centroid'),
                                           Identifier(base_table)), sql_tablespace)
 
-        #Index on place_id
+        # Index on place_id
         if not self.db_connection.index_exists(index_prefix + 'place_id'):
             with self.db_connection.cursor() as db_cursor:
                 db_cursor.execute(SQL(
@@ -230,7 +236,7 @@ class SPImporter():
         """
             Grant access on read to the table place_classtype for the webuser.
         """
-        table_name = 'place_classtype_{}_{}'.format(phrase_class, phrase_type)
+        table_name = _classtype_table(phrase_class, phrase_type)
         with self.db_connection.cursor() as db_cursor:
             db_cursor.execute(SQL("""GRANT SELECT ON {} TO {}""")
                               .format(Identifier(table_name),
@@ -242,18 +248,14 @@ class SPImporter():
             Delete the place_classtype tables.
         """
         LOG.warning('Cleaning database...')
-        #Array containing all queries to execute. Contain tuples of format (query, parameters)
-        queries_parameters = []
-
-        #Delete place_classtype tables corresponding to class/type which are not on the wiki anymore
-        for table in self.table_phrases_to_delete:
-            self.statistics_handler.notify_one_table_deleted()
-            query = SQL('DROP TABLE IF EXISTS {}').format(Identifier(table))
-            queries_parameters.append((query, ()))
 
+        # Delete place_classtype tables corresponding to class/type which
+        # are not on the wiki anymore.
         with self.db_connection.cursor() as db_cursor:
-            for query, parameters in queries_parameters:
-                db_cursor.execute(query, parameters)
+            for table in self.table_phrases_to_delete:
+                self.statistics_handler.notify_one_table_deleted()
+                db_cursor.drop_table(table)
+
 
     def _convert_php_settings_if_needed(self, file_path):
         """
@@ -265,7 +267,7 @@ class SPImporter():
         file, extension = os.path.splitext(file_path)
         json_file_path = Path(file + '.json').resolve()
 
-        if extension not in('.php', '.json'):
+        if extension not in ('.php', '.json'):
             raise UsageError('The custom NOMINATIM_PHRASE_CONFIG file has not a valid extension.')
 
         if extension == '.php' and not isfile(json_file_path):
@@ -274,9 +276,8 @@ class SPImporter():
                                 (self.phplib_dir / 'migration/PhraseSettingsToJson.php').resolve(),
                                 file_path], check=True)
                 LOG.warning('special_phrase configuration file has been converted to json.')
-                return json_file_path
             except subprocess.CalledProcessError:
                 LOG.error('Error while converting %s to json.', file_path)
                 raise
-        else:
-            return json_file_path
+
+        return json_file_path
index 914e15391123cf2571c99924a17d446c7bb8415c..1ad9de7e2cfa299dc18b68e873bf6f00c9592432 100644 (file)
@@ -15,7 +15,7 @@ class SPWikiLoader(Iterator):
     def __init__(self, config, languages=None):
         super().__init__()
         self.config = config
-        #Compile the regex here to increase performances.
+        # Compile the regex here to increase performances.
         self.occurence_pattern = re.compile(
             r'\| *([^\|]+) *\|\| *([^\|]+) *\|\| *([^\|]+) *\|\| *([^\|]+) *\|\| *([\-YN])'
         )
@@ -35,7 +35,7 @@ class SPWikiLoader(Iterator):
             Parses XML content and extracts special phrases from it.
             Return a list of SpecialPhrase.
         """
-        #One match will be of format [label, class, type, operator, plural]
+        # One match will be of format [label, class, type, operator, plural]
         matches = self.occurence_pattern.findall(xml)
         returned_phrases = set()
         for match in matches:
@@ -65,5 +65,6 @@ class SPWikiLoader(Iterator):
             Requested URL Example :
                 https://wiki.openstreetmap.org/wiki/Special:Export/Nominatim/Special_Phrases/EN
         """
-        url = 'https://wiki.openstreetmap.org/wiki/Special:Export/Nominatim/Special_Phrases/' + lang.upper() # pylint: disable=line-too-long
+        url = 'https://wiki.openstreetmap.org/wiki/Special:Export/Nominatim/Special_Phrases/' \
+              + lang.upper()
         return get_url(url)
index 448fbee47b283717e80ba01d28897ff8a08635e4..da7968cac9c0845917f4593da1c41c3b1b9ac18d 100644 (file)
@@ -13,7 +13,7 @@ class SpecialPhrase():
     def __init__(self, p_label, p_class, p_type, p_operator):
         self.p_label = p_label.strip()
         self.p_class = p_class.strip()
-        #Hack around a bug where building=yes was imported with quotes into the wiki
+        # Hack around a bug where building=yes was imported with quotes into the wiki
         self.p_type = re.sub(r'\"|&quot;', '', p_type.strip())
-        #Needed if some operator in the wiki are not written in english
+        # Needed if some operator in the wiki are not written in english
         self.p_operator = '-' if p_operator not in ('near', 'in') else p_operator
index 9fb90150bacbf6bde12ee1db936ed1bb74a1563b..d3fd9ef058b363aa160b36c846b3a47284fd71ec 100755 (executable)
@@ -42,7 +42,7 @@
                         python3-pip python3-setuptools python3-devel \
                         expat-devel zlib-devel libicu-dev
 
-    pip3 install --user psycopg2 python-dotenv psutil Jinja2 PyICU datrie
+    pip3 install --user psycopg2 python-dotenv psutil Jinja2 PyICU datrie pyyaml
 
 
 #
index 2330fc3b8721888861de13403f514599b5c9cdb8..a41e846cfb615993f30cf438aac829c68de2ae3d 100755 (executable)
@@ -35,7 +35,7 @@
                         python3-pip python3-setuptools python3-devel \
                         expat-devel zlib-devel libicu-dev
 
-    pip3 install --user psycopg2 python-dotenv psutil Jinja2 PyICU datrie
+    pip3 install --user psycopg2 python-dotenv psutil Jinja2 PyICU datrie pyyaml
 
 
 #
index 63c07becc5d5088e5f2ce9a954c5dc160332cc9b..dadce0865847dfd0540cd5f5a958b1c0822d1cda 100755 (executable)
@@ -32,10 +32,10 @@ export DEBIAN_FRONTEND=noninteractive #DOCS:
                         php php-pgsql php-intl libicu-dev python3-pip \
                         python3-psycopg2 python3-psutil python3-jinja2 python3-icu git
 
-# The python-dotenv adn datrie package that comes with Ubuntu 18.04 is too old, so
+# Some of the Python packages that come with Ubuntu 18.04 are too old, so
 # install the latest version from pip:
 
-    pip3 install python-dotenv datrie
+    pip3 install python-dotenv datrie pyyaml
 
 #
 # System Configuration