]> git.openstreetmap.org Git - nominatim.git/commitdiff
Merge remote-tracking branch 'upstream/master'
authorSarah Hoffmann <lonvia@denofr.de>
Wed, 3 Feb 2021 09:40:13 +0000 (10:40 +0100)
committerSarah Hoffmann <lonvia@denofr.de>
Wed, 3 Feb 2021 09:40:13 +0000 (10:40 +0100)
62 files changed:
.github/workflows/ci-tests.yml
CMakeLists.txt
CONTRIBUTING.md
cmake/script.tmpl
cmake/tool.tmpl
docs/admin/Installation.md
docs/admin/Update.md
docs/api/Reverse.md
docs/develop/Development-Environment.md
docs/develop/Testing.md
lib/SearchDescription.php
lib/Shell.php
lib/admin/update.php
lib/output.php
lib/setup/AddressLevelParser.php [deleted file]
lib/setup/SetupClass.php
nominatim/admin/exec_utils.py [deleted file]
nominatim/cli.py
nominatim/config.py
nominatim/db/__init__.py [new file with mode: 0644]
nominatim/db/async_connection.py [moved from nominatim/indexer/db.py with 73% similarity]
nominatim/db/connection.py [new file with mode: 0644]
nominatim/db/status.py [new file with mode: 0644]
nominatim/db/utils.py [new file with mode: 0644]
nominatim/errors.py [new file with mode: 0644]
nominatim/indexer/indexer.py [new file with mode: 0644]
nominatim/indexer/progress.py
nominatim/nominatim.py [deleted file]
nominatim/tools/__init__.py [moved from nominatim/admin/__init__.py with 100% similarity]
nominatim/tools/exec_utils.py [new file with mode: 0644]
nominatim/tools/refresh.py [new file with mode: 0644]
nominatim/tools/replication.py [new file with mode: 0644]
nominatim/version.py [new file with mode: 0644]
settings/env.defaults
sql/functions/utils.sql
sql/tables.sql
test/bdd/api/details/language.feature [new file with mode: 0644]
test/bdd/api/details/params.feature
test/bdd/api/details/simple.feature
test/bdd/api/reverse/params.feature
test/bdd/api/reverse/simple.feature
test/bdd/api/search/queries.feature
test/bdd/api/search/simple.feature
test/bdd/steps/cgi-with-coverage.php
test/bdd/steps/http_responses.py
test/bdd/steps/nominatim_environment.py
test/bdd/steps/steps_api_queries.py
test/php/Nominatim/OutputTest.php [deleted file]
test/python/conftest.py [new file with mode: 0644]
test/python/test_cli.py [new file with mode: 0644]
test/python/test_config.py [new file with mode: 0644]
test/python/test_db_connection.py [new file with mode: 0644]
test/python/test_db_status.py [new file with mode: 0644]
test/python/test_db_utils.py [new file with mode: 0644]
test/python/test_indexing.py [new file with mode: 0644]
test/python/test_tools_exec_utils.py [new file with mode: 0644]
test/python/test_tools_refresh_address_levels.py [new file with mode: 0644]
test/python/test_tools_refresh_create_functions.py [new file with mode: 0644]
test/python/test_tools_replication.py [new file with mode: 0644]
utils/check_server_for_updates.py [deleted file]
utils/osm_file_date.py [deleted file]
utils/server_compare.php [deleted file]

index e57431c012db86203e28158af873c95890025552..9bdf1790dcccc7e91969a7c4f7797172b4822309 100644 (file)
@@ -35,8 +35,8 @@ jobs:
             - uses: actions/cache@v2
               with:
                   path: |
-                     {{ github.workspace }}/data/country_osm_grid.sql.gz
-                     {{ github.workspace }}/monaco-latest.osm.pbf
+                     data/country_osm_grid.sql.gz
+                     monaco-latest.osm.pbf
                   key: nominatim-data-${{ steps.get-date.outputs.date }}
 
             - uses: ./.github/actions/setup-postgresql
@@ -46,17 +46,21 @@ jobs:
             - uses: ./.github/actions/build-nominatim
 
             - name: Install test prerequsites
-              run: |
-                   sudo apt-get install -y -qq php-codesniffer
-                   sudo pip3 install behave
+              run: sudo apt-get install -y -qq php-codesniffer pylint python3-pytest python3-behave
 
             - name: PHP linting
               run: phpcs --report-width=120 .
 
+            - name: Python linting
+              run: pylint --extension-pkg-whitelist=osmium nominatim
+
             - name: PHP unit tests
               run: phpunit ./
               working-directory: test/php
 
+            - name: Python unit tests
+              run: py.test-3 test/python
+
             - name: BDD tests
               run: behave -DREMOVE_TEMPLATE=1 --format=progress3
               working-directory: test/bdd
@@ -78,8 +82,8 @@ jobs:
             - uses: actions/cache@v2
               with:
                   path: |
-                     {{ github.workspace }}/data/country_osm_grid.sql.gz
-                     {{ github.workspace }}/monaco-latest.osm.pbf
+                     data/country_osm_grid.sql.gz
+                     monaco-latest.osm.pbf
                   key: nominatim-data-${{ steps.get-date.outputs.date }}
 
             - uses: ./.github/actions/setup-postgresql
@@ -88,11 +92,6 @@ jobs:
                   postgis-version: 3
             - uses: ./.github/actions/build-nominatim
 
-            - name: Create configuration
-              run: |
-                   echo "NOMINATIM_PYOSMIUM_BINARY=/usr/lib/python3-pyosmium/pyosmium-get-changes" >> .env
-              working-directory: build
-
             - name: Download import data
               run: |
                   if [ ! -f monaco-latest.osm.pbf ]; then
index 4915d7649d4f7b50e23d9c5bb536999b2777bbcf..037ae2be5f0e8868d74bab69e23a56c561413d0a 100644 (file)
@@ -57,20 +57,11 @@ endif()
 
 
 #-----------------------------------------------------------------------------
-#  python and pyosmium (imports/updates only)
+#  python (imports/updates only)
 #-----------------------------------------------------------------------------
 
 if (BUILD_IMPORTER)
-    find_package(PythonInterp 3)
-
-    find_program(PYOSMIUM pyosmium-get-changes)
-    if (NOT EXISTS "${PYOSMIUM}")
-        set(PYOSMIUM_PATH "")
-            message(WARNING "pyosmium-get-changes not found (required for updates)")
-    else()
-        set(PYOSMIUM_PATH "${PYOSMIUM}")
-        message(STATUS "Using pyosmium-get-changes at ${PYOSMIUM_PATH}")
-    endif()
+    find_package(PythonInterp 3.5 REQUIRED)
 endif()
 
 #-----------------------------------------------------------------------------
@@ -86,8 +77,19 @@ if (BUILD_API OR BUILD_IMPORTER)
     # sanity check if PHP binary exists
     if (NOT EXISTS ${PHP_BIN})
         message(FATAL_ERROR "PHP binary not found. Install php or provide location with -DPHP_BIN=/path/php ")
+    else()
+        message (STATUS "Using PHP binary " ${PHP_BIN})
+    endif()
+    if (NOT PHPCGI_BIN)
+        find_program (PHPCGI_BIN php-cgi)
+    endif()
+    # sanity check if PHP binary exists
+    if (NOT EXISTS ${PHPCGI_BIN})
+        message(WARNING "php-cgi binary not found. nominatim tool will not provide query functions.")
+        set (PHPCGI_BIN "")
+    else()
+        message (STATUS "Using php-cgi binary " ${PHPCGI_BIN})
     endif()
-    message (STATUS "Using PHP binary " ${PHP_BIN})
 endif()
 
 #-----------------------------------------------------------------------------
@@ -155,21 +157,60 @@ if (BUILD_TESTS)
 
     set(TEST_BDD db osm2pgsql api)
 
-    foreach (test ${TEST_BDD})
-        add_test(NAME bdd_${test}
-                 COMMAND behave ${test}
-                 WORKING_DIRECTORY ${PROJECT_SOURCE_DIR}/test/bdd)
-        set_tests_properties(bdd_${test}
-            PROPERTIES ENVIRONMENT "NOMINATIM_DIR=${PROJECT_BINARY_DIR}")
-    endforeach()
-
-    add_test(NAME php
-             COMMAND phpunit ./
-             WORKING_DIRECTORY ${PROJECT_SOURCE_DIR}/test/php)
-
-    add_test(NAME phpcs
-             COMMAND phpcs --report-width=120 --colors lib website utils
-             WORKING_DIRECTORY ${PROJECT_SOURCE_DIR})
+    find_program(PYTHON_BEHAVE behave)
+    find_program(PYLINT NAMES pylint3 pylint)
+    find_program(PYTEST NAMES pytest py.test-3 py.test)
+    find_program(PHPCS phpcs)
+    find_program(PHPUNIT phpunit)
+
+    if (PYTHON_BEHAVE)
+        message(STATUS "Using Python behave binary ${PYTHON_BEHAVE}")
+        foreach (test ${TEST_BDD})
+            add_test(NAME bdd_${test}
+                     COMMAND ${PYTHON_BEHAVE} ${test}
+                     WORKING_DIRECTORY ${PROJECT_SOURCE_DIR}/test/bdd)
+            set_tests_properties(bdd_${test}
+                PROPERTIES ENVIRONMENT "NOMINATIM_DIR=${PROJECT_BINARY_DIR}")
+        endforeach()
+    else()
+        message(WARNING "behave not found. BDD tests disabled." )
+    endif()
+
+    if (PHPUNIT)
+        message(STATUS "Using phpunit binary ${PHPUNIT}")
+        add_test(NAME php
+                 COMMAND ${PHPUNIT} ./
+                 WORKING_DIRECTORY ${PROJECT_SOURCE_DIR}/test/php)
+    else()
+        message(WARNING "phpunit not found. PHP unit tests disabled." )
+    endif()
+
+    if (PHPCS)
+        message(STATUS "Using phpcs binary ${PHPCS}")
+        add_test(NAME phpcs
+                 COMMAND ${PHPCS} --report-width=120 --colors lib website utils
+                 WORKING_DIRECTORY ${PROJECT_SOURCE_DIR})
+    else()
+        message(WARNING "phpcs not found. PHP linting tests disabled." )
+    endif()
+
+    if (PYLINT)
+        message(STATUS "Using pylint binary ${PYLINT}")
+        add_test(NAME pylint
+                 COMMAND ${PYLINT} --extension-pkg-whitelist=osmium nominatim
+                 WORKING_DIRECTORY ${PROJECT_SOURCE_DIR})
+    else()
+        message(WARNING "pylint not found. Python linting tests disabled.")
+    endif()
+
+    if (PYTEST)
+        message(STATUS "Using pytest binary ${PYTEST}")
+        add_test(NAME pytest
+                 COMMAND ${PYTEST} test/python
+                 WORKING_DIRECTORY ${PROJECT_SOURCE_DIR})
+    else()
+        message(WARNING "pytest not found. Python tests disabled." )
+    endif()
 endif()
 
 #-----------------------------------------------------------------------------
index a61456671986c138d722cbbc17ef67915af4d7a5..6798c39dc1fcde20525204cd2af4abbc2e392e2a 100644 (file)
@@ -49,10 +49,11 @@ are in process of consolidating the style. The following rules apply:
  * for PHP variables use CamelCase with a prefixing letter indicating the type
    (i - integer, f - float, a - array, s - string, o - object)
 
-The coding style is enforced with PHPCS and can be tested with:
+The coding style is enforced with PHPCS and pylint. It can be tested with:
 
 ```
-  phpcs --report-width=120 --colors .
+phpcs --report-width=120 --colors .
+pylint3 --extension-pkg-whitelist=osmium nominatim
 ```
 
 ## Testing
index 30b8717bac8af7d67918d157f7d7f4ff59b2bf47..aa25a1248418d064916454880eff55f94a71adf5 100755 (executable)
@@ -8,5 +8,6 @@ require('@CMAKE_SOURCE_DIR@/lib/dotenv_loader.php');
 @define('CONST_DataDir', '@CMAKE_SOURCE_DIR@');
 
 loadDotEnv();
+$_SERVER['NOMINATIM_NOMINATIM_TOOL'] = '@CMAKE_BINARY_DIR@/nominatim';
 
 require_once('@CMAKE_SOURCE_DIR@/lib/admin/@script_source@');
index 40f2b8ea7920e910641577002a2aac1d051fadfd..3bba72c3a585e81d2b44539e64f4a6ff47bf3eab 100755 (executable)
@@ -1,11 +1,15 @@
 #!/usr/bin/env python3
 import sys
+import os
 
 sys.path.insert(1, '@CMAKE_SOURCE_DIR@')
 
+os.environ['NOMINATIM_NOMINATIM_TOOL'] = __file__
+
 from nominatim import cli
 
 exit(cli.nominatim(module_dir='@CMAKE_BINARY_DIR@/module',
                    osm2pgsql_path='@CMAKE_BINARY_DIR@/osm2pgsql/osm2pgsql',
                    phplib_dir='@CMAKE_SOURCE_DIR@/lib',
-                   data_dir='@CMAKE_SOURCE_DIR@'))
+                   data_dir='@CMAKE_SOURCE_DIR@',
+                   phpcgi_path='@PHPCGI_BIN@'))
index c9d000b22ae8b85c05f22c5cb9a7c3f8507f5249..d8c98ef5056dfecb1a6b2b4761214eee8e8f22a9 100644 (file)
@@ -38,7 +38,7 @@ For running Nominatim:
 
   * [PostgreSQL](https://www.postgresql.org) (9.3+)
   * [PostGIS](https://postgis.net) (2.2+)
-  * [Python 3](https://www.python.org/) (3.4+)
+  * [Python 3](https://www.python.org/) (3.5+)
   * [Psycopg2](https://www.psycopg.org)
   * [PHP](https://php.net) (7.0 or later)
   * PHP-pgsql
index 4b1a2be7c3815f9e4f5127731c4a563534afed91..01980bd8362d8806b5d4c2156c70cd98ec2a8266 100644 (file)
@@ -19,15 +19,6 @@ Run (as the same user who will later run the updates):
 pip3 install --user osmium
 ```
 
-Nominatim needs a tool called `pyosmium-get-changes` which comes with
-Pyosmium. You need to tell Nominatim where to find it. Add the
-following line to your `.env`:
-
-    NOMINATIM_PYOSMIUM_BINARY=/home/user/.local/bin/pyosmium-get-changes
-
-The path above is fine if you used the `--user` parameter with pip.
-Replace `user` with your user name.
-
 #### Setting up the update process
 
 Next the update needs to be initialised. By default Nominatim is configured
index a1b46b0fc2c56505cdf9142021eee5bb3b34fd44..06d7cdea6cf138428b13e395c7ff79483ec9738d 100644 (file)
@@ -162,7 +162,7 @@ This overrides the specified machine readable format. (Default: 0)
   "licence":"Data Â© OpenStreetMap contributors, ODbL 1.0. https:\/\/www.openstreetmap.org\/copyright",
   "osm_type":"way",
   "osm_id":"280940520",
-"lat":"-34.4391708",
+  "lat":"-34.4391708",
   "lon":"-58.7064573",
   "place_rank":"26",
   "category":"highway",
index 86df1fb9661f61545d7d2142eeb78d6783eb4e3c..43598b9a7c638ba2177783f2b962ec45fc4c8627 100644 (file)
@@ -26,11 +26,14 @@ following packages should get you started:
 ## Prerequisites for testing and documentation
 
 The Nominatim test suite consists of behavioural tests (using behave) and
-unit tests (using PHPUnit). It has the following additional requirements:
+unit tests (using PHPUnit for PHP code and pytest for Python code).
+It has the following additional requirements:
 
 * [behave test framework](https://behave.readthedocs.io) >= 1.2.5
 * [phpunit](https://phpunit.de) >= 7.3
 * [PHP CodeSniffer](https://github.com/squizlabs/PHP_CodeSniffer)
+* [Pylint](https://pylint.org/) (2.6.0 is used for the CI)
+* [pytest](https://pytest.org)
 
 The documentation is built with mkdocs:
 
@@ -46,9 +49,9 @@ To install all necessary packages run:
 
 ```sh
 sudo apt install php-cgi phpunit php-codesniffer \
-                 python3-pip python3-setuptools python3-dev
+                 python3-pip python3-setuptools python3-dev pylint
 
-pip3 install --user behave mkdocs
+pip3 install --user behave mkdocs pytest
 ```
 
 The `mkdocs` executable will be located in `.local/bin`. You may have to add
index 7990bf9e9232f31b876a887e57bf82448515c97a..e2b01b8d6c177a0182f83b6ab0cc56b84c888225 100644 (file)
@@ -21,14 +21,15 @@ This test directory is sturctured as follows:
   | +-  api        Tests for API endpoints (search, reverse, etc.)
   |
   +-   php         PHP unit tests
+  +-   python      Python unit tests
   +-   scenes      Geometry test data
   +-   testdb      Base data for generating API test database
 ```
 
 ## PHP Unit Tests (`test/php`)
 
-Unit tests can be found in the php/ directory. They test selected php functions.
-Very low coverage.
+Unit tests for PHP code can be found in the `php/` directory. They test selected
+PHP functions. Very low coverage.
 
 To execute the test suite run
 
@@ -36,11 +37,26 @@ To execute the test suite run
     UNIT_TEST_DSN='pgsql:dbname=nominatim_unit_tests' phpunit ../
 
 It will read phpunit.xml which points to the library, test path, bootstrap
-strip and set other parameters.
+strip and sets other parameters.
 
 It will use (and destroy) a local database 'nominatim_unit_tests'. You can set
 a different connection string with e.g. UNIT_TEST_DSN='pgsql:dbname=foo_unit_tests'.
 
+## Python Unit Tests (`test/python`)
+
+Unit tests for Python code can be found in the `python/` directory. The goal is
+to have complete coverage of the Python library in `nominatim`.
+
+To execute the tests run
+
+    py.test-3 test/python
+
+or
+
+    pytest test/python
+
+The name of the pytest binary depends on your installation.
+
 ## BDD Functional Tests (`test/bdd`)
 
 Functional tests are written as BDD instructions. For more information on
index f010606355144c418de9162747c8fbf104671c56..2053082f6bbff4a13bd6a5ba6c08bc89366e0c38 100644 (file)
@@ -86,18 +86,6 @@ class SearchDescription
         $this->sType = $sType;
     }
 
-    /**
-     * Check if this might be a full address search.
-     *
-     * @return bool True if the search contains name, address and housenumber.
-     */
-    public function looksLikeFullAddress()
-    {
-        return (!empty($this->aName))
-               && (!empty($this->aAddress) || $this->sCountryCode)
-               && preg_match('/[0-9]+/', $this->sHouseNumber);
-    }
-
     /**
      * Check if any operator is set.
      *
@@ -1027,7 +1015,7 @@ class SearchDescription
                 'Name terms (stop words)' => $this->aNameNonSearch,
                 'Address terms' => $this->aAddress,
                 'Address terms (stop words)' => $this->aAddressNonSearch,
-                'Address terms (full words)' => $this->aFullNameAddress,
+                'Address terms (full words)' => $this->aFullNameAddress ?? '',
                 'Special search' => $this->iOperator,
                 'Class' => $this->sClass,
                 'Type' => $this->sType,
@@ -1039,7 +1027,7 @@ class SearchDescription
     public function dumpAsHtmlTableRow(&$aWordIDs)
     {
         $kf = function ($k) use (&$aWordIDs) {
-            return $aWordIDs[$k];
+            return $aWordIDs[$k] ?? '['.$k.']';
         };
 
         echo '<tr>';
index 59c4473bde94d43b2569331fac3814211d0b10ed..72f90735e9763e798cb354155e8b077b37666f7e 100644 (file)
@@ -7,7 +7,7 @@ class Shell
     public function __construct($sBaseCmd, ...$aParams)
     {
         if (!$sBaseCmd) {
-            throw new Exception('Command missing in new() call');
+            throw new \Exception('Command missing in new() call');
         }
         $this->baseCmd = $sBaseCmd;
         $this->aParams = array();
index 03fcdcafb0c93e623e80f5a0bdc8da491ff48187..e8d873cdb6bfeddd5159951b2b7774ecee99b166 100644 (file)
@@ -4,7 +4,6 @@
 require_once(CONST_LibDir.'/init-cmd.php');
 require_once(CONST_LibDir.'/setup_functions.php');
 require_once(CONST_LibDir.'/setup/SetupClass.php');
-require_once(CONST_LibDir.'/setup/AddressLevelParser.php');
 
 ini_set('memory_limit', '800M');
 
@@ -105,110 +104,27 @@ if ($fPostgresVersion >= 11.0) {
     );
 }
 
-
-$oIndexCmd = (new \Nominatim\Shell(CONST_DataDir.'/nominatim/nominatim.py'))
-             ->addParams('--database', $aDSNInfo['database'])
-             ->addParams('--port', $aDSNInfo['port'])
-             ->addParams('--threads', $aResult['index-instances']);
-if (!$aResult['quiet']) {
-    $oIndexCmd->addParams('--verbose');
+$oNominatimCmd = new \Nominatim\Shell(getSetting('NOMINATIM_TOOL'));
+if ($aResult['quiet']) {
+    $oNominatimCmd->addParams('--quiet');
 }
 if ($aResult['verbose']) {
-    $oIndexCmd->addParams('--verbose');
-}
-if (isset($aDSNInfo['hostspec']) && $aDSNInfo['hostspec']) {
-    $oIndexCmd->addParams('--host', $aDSNInfo['hostspec']);
-}
-if (isset($aDSNInfo['username']) && $aDSNInfo['username']) {
-    $oIndexCmd->addParams('--username', $aDSNInfo['username']);
-}
-if (isset($aDSNInfo['password']) && $aDSNInfo['password']) {
-    $oIndexCmd->addEnvPair('PGPASSWORD', $aDSNInfo['password']);
+    $oNominatimCmd->addParams('--verbose');
 }
 
-$sPyosmiumBin = getSetting('PYOSMIUM_BINARY');
-$sBaseURL = getSetting('REPLICATION_URL');
-
 
 if ($aResult['init-updates']) {
-    // sanity check that the replication URL is correct
-    $sBaseState = file_get_contents($sBaseURL.'/state.txt');
-    if ($sBaseState === false) {
-        echo "\nCannot find state.txt file at the configured replication URL.\n";
-        echo "Does the URL point to a directory containing OSM update data?\n\n";
-        fail('replication URL not reachable.');
-    }
-    // sanity check for pyosmium-get-changes
-    if (!$sPyosmiumBin) {
-        echo "\nNOMINATIM_PYOSMIUM_BINARY not configured.\n";
-        echo "You need to install pyosmium and set up the path to pyosmium-get-changes\n";
-        echo "in your local .env file.\n\n";
-        fail('NOMINATIM_PYOSMIUM_BINARY not configured');
-    }
-
-    $aOutput = 0;
-    $oCMD = new \Nominatim\Shell($sPyosmiumBin, '--help');
-    exec($oCMD->escapedCmd(), $aOutput, $iRet);
-
-    if ($iRet != 0) {
-        echo "Cannot execute pyosmium-get-changes.\n";
-        echo "Make sure you have pyosmium installed correctly\n";
-        echo "and have set up NOMINATIM_PYOSMIUM_BINARY to point to pyosmium-get-changes.\n";
-        fail('pyosmium-get-changes not found or not usable');
-    }
-
-    if (!$aResult['no-update-functions']) {
-        // instantiate setupClass to use the function therein
-        $cSetup = new SetupFunctions(array(
-                                      'enable-diff-updates' => true,
-                                      'verbose' => $aResult['verbose']
-                                     ));
-        $cSetup->createFunctions();
-    }
-
-    $sDatabaseDate = getDatabaseDate($oDB);
-    if (!$sDatabaseDate) {
-        fail('Cannot determine date of database.');
-    }
-    $sWindBack = strftime('%Y-%m-%dT%H:%M:%SZ', strtotime($sDatabaseDate) - (3*60*60));
-
-    // get the appropriate state id
-    $aOutput = 0;
-    $oCMD = (new \Nominatim\Shell($sPyosmiumBin))
-            ->addParams('--start-date', $sWindBack)
-            ->addParams('--server', $sBaseURL);
-
-    exec($oCMD->escapedCmd(), $aOutput, $iRet);
-    if ($iRet != 0 || $aOutput[0] == 'None') {
-        fail('Error running pyosmium tools');
-    }
+    $oCmd = (clone($oNominatimCmd))->addParams('replication', '--init');
 
-    $oDB->exec('TRUNCATE import_status');
-    $sSQL = "INSERT INTO import_status (lastimportdate, sequence_id, indexed) VALUES('";
-    $sSQL .= $sDatabaseDate."',".$aOutput[0].', true)';
-
-    try {
-        $oDB->exec($sSQL);
-    } catch (\Nominatim\DatabaseError $e) {
-        fail('Could not enter sequence into database.');
+    if ($aResult['no-update-functions']) {
+        $oCmd->addParams('--no-update-functions');
     }
 
-    echo "Done. Database updates will start at sequence $aOutput[0] ($sWindBack)\n";
+    $oCmd->run();
 }
 
 if ($aResult['check-for-updates']) {
-    $aLastState = $oDB->getRow('SELECT sequence_id FROM import_status');
-
-    if (!$aLastState['sequence_id']) {
-        fail('Updates not set up. Please run ./utils/update.php --init-updates.');
-    }
-
-    $oCmd = (new \Nominatim\Shell(CONST_BinDir.'/check_server_for_updates.py'))
-            ->addParams($sBaseURL)
-            ->addParams($aLastState['sequence_id']);
-    $iRet = $oCmd->run();
-
-    exit($iRet);
+    exit((clone($oNominatimCmd))->addParams('replication', '--check-for-updates')->run());
 }
 
 if (isset($aResult['import-diff']) || isset($aResult['import-file'])) {
@@ -232,9 +148,7 @@ if (isset($aResult['import-diff']) || isset($aResult['import-file'])) {
 }
 
 if ($aResult['calculate-postcodes']) {
-    info('Update postcodes centroids');
-    $sTemplate = file_get_contents(CONST_DataDir.'/sql/update-postcodes.sql');
-    runSQLScript($sTemplate, true, true);
+    (clone($oNominatimCmd))->addParams('refresh', '--postcodes')->run();
 }
 
 $sTemporaryFile = CONST_InstallDir.'/osmosischange.osc';
@@ -283,28 +197,18 @@ if ($bHaveDiff) {
 }
 
 if ($aResult['recompute-word-counts']) {
-    info('Recompute frequency of full-word search terms');
-    $sTemplate = file_get_contents(CONST_DataDir.'/sql/words_from_search_name.sql');
-    runSQLScript($sTemplate, true, true);
+    (clone($oNominatimCmd))->addParams('refresh', '--word-counts')->run();
 }
 
 if ($aResult['index']) {
-    $oCmd = (clone $oIndexCmd)
-            ->addParams('--minrank', $aResult['index-rank'], '-b');
-    $oCmd->run();
-
-    $oCmd = (clone $oIndexCmd)
-            ->addParams('--minrank', $aResult['index-rank']);
-    $oCmd->run();
-
-    $oDB->exec('update import_status set indexed = true');
+    (clone $oNominatimCmd)
+        ->addParams('index', '--minrank', $aResult['index-rank'])
+        ->addParams('--threads', $aResult['index-instances'])
+        ->run();
 }
 
 if ($aResult['update-address-levels']) {
-    $sAddressLevelConfig = getSettingConfig('ADDRESS_LEVEL_CONFIG', 'address-levels.json');
-    echo 'Updating address levels from '.$sAddressLevelConfig.".\n";
-    $oAlParser = new \Nominatim\Setup\AddressLevelParser($sAddressLevelConfig);
-    $oAlParser->createTable($oDB, 'address_levels');
+    (clone($oNominatimCmd))->addParams('refresh', '--address-levels')->run();
 }
 
 if ($aResult['recompute-importance']) {
@@ -325,157 +229,17 @@ if ($aResult['recompute-importance']) {
 }
 
 if ($aResult['import-osmosis'] || $aResult['import-osmosis-all']) {
-    //
-    if (strpos($sBaseURL, 'download.geofabrik.de') !== false && getSetting('REPLICATION_UPDATE_INTERVAL') < 86400) {
-        fail('Error: Update interval too low for download.geofabrik.de. ' .
-             "Please check install documentation (https://nominatim.org/release-docs/latest/admin/Import-and-Update#setting-up-the-update-process)\n");
+    $oCmd = (clone($oNominatimCmd))
+              ->addParams('replication')
+              ->addParams('--threads', $aResult['index-instances']);
+
+    if (!$aResult['import-osmosis-all']) {
+        $oCmd->addParams('--once');
     }
 
-    $sImportFile = CONST_InstallDir.'/osmosischange.osc';
-
-    $oCMDDownload = (new \Nominatim\Shell($sPyosmiumBin))
-                    ->addParams('--server', $sBaseURL)
-                    ->addParams('--outfile', $sImportFile)
-                    ->addParams('--size', getSetting('REPLICATION_MAX_DIFF'));
-
-    $oCMDImport = (clone $oOsm2pgsqlCmd)->addParams($sImportFile);
-
-    while (true) {
-        $fStartTime = time();
-        $aLastState = $oDB->getRow('SELECT *, EXTRACT (EPOCH FROM lastimportdate) as unix_ts FROM import_status');
-
-        if (!$aLastState['sequence_id']) {
-            echo "Updates not set up. Please run ./utils/update.php --init-updates.\n";
-            exit(1);
-        }
-
-        echo 'Currently at sequence '.$aLastState['sequence_id'].' ('.$aLastState['lastimportdate'].') - '.$aLastState['indexed']." indexed\n";
-
-        $sBatchEnd = $aLastState['lastimportdate'];
-        $iEndSequence = $aLastState['sequence_id'];
-
-        if ($aLastState['indexed']) {
-            // Sleep if the update interval has not yet been reached.
-            $fNextUpdate = $aLastState['unix_ts'] + getSetting('REPLICATION_UPDATE_INTERVAL');
-            if ($fNextUpdate > $fStartTime) {
-                $iSleepTime = $fNextUpdate - $fStartTime;
-                echo "Waiting for next update for $iSleepTime sec.";
-                sleep($iSleepTime);
-            }
-
-            // Download the next batch of changes.
-            do {
-                $fCMDStartTime = time();
-                $iNextSeq = (int) $aLastState['sequence_id'];
-                unset($aOutput);
-
-                $oCMD = (clone $oCMDDownload)->addParams('--start-id', $iNextSeq);
-                echo $oCMD->escapedCmd()."\n";
-                if (file_exists($sImportFile)) {
-                    unlink($sImportFile);
-                }
-                exec($oCMD->escapedCmd(), $aOutput, $iResult);
-
-                if ($iResult == 3) {
-                    $sSleep = getSetting('REPLICATION_RECHECK_INTERVAL');
-                    echo 'No new updates. Sleeping for '.$sSleep." sec.\n";
-                    sleep($sSleep);
-                } elseif ($iResult != 0) {
-                    echo 'ERROR: updates failed.';
-                    exit($iResult);
-                } else {
-                    $iEndSequence = (int)$aOutput[0];
-                }
-            } while ($iResult);
-
-            // get the newest object from the diff file
-            $sBatchEnd = 0;
-            $iRet = 0;
-            $oCMD = new \Nominatim\Shell(CONST_BinDir.'/osm_file_date.py', $sImportFile);
-            exec($oCMD->escapedCmd(), $sBatchEnd, $iRet);
-            if ($iRet == 5) {
-                echo "Diff file is empty. skipping import.\n";
-                if (!$aResult['import-osmosis-all']) {
-                    exit(0);
-                } else {
-                    continue;
-                }
-            }
-            if ($iRet != 0) {
-                fail('Error getting date from diff file.');
-            }
-            $sBatchEnd = $sBatchEnd[0];
-
-            // Import the file
-            $fCMDStartTime = time();
-
-
-            echo $oCMDImport->escapedCmd()."\n";
-            unset($sJunk);
-            $iErrorLevel = $oCMDImport->run();
-            if ($iErrorLevel) {
-                echo "Error executing osm2pgsql: $iErrorLevel\n";
-                exit($iErrorLevel);
-            }
-
-            // write the update logs
-            $iFileSize = filesize($sImportFile);
-            $sSQL = 'INSERT INTO import_osmosis_log';
-            $sSQL .= '(batchend, batchseq, batchsize, starttime, endtime, event)';
-            $sSQL .= " values ('$sBatchEnd',$iEndSequence,$iFileSize,'";
-            $sSQL .= date('Y-m-d H:i:s', $fCMDStartTime)."','";
-            $sSQL .= date('Y-m-d H:i:s')."','import')";
-            var_Dump($sSQL);
-            $oDB->exec($sSQL);
-
-            // update the status
-            $sSQL = "UPDATE import_status SET lastimportdate = '$sBatchEnd', indexed=false, sequence_id = $iEndSequence";
-            var_Dump($sSQL);
-            $oDB->exec($sSQL);
-            echo date('Y-m-d H:i:s')." Completed download step for $sBatchEnd in ".round((time()-$fCMDStartTime)/60, 2)." minutes\n";
-        }
-
-        // Index file
-        if (!$aResult['no-index']) {
-            $fCMDStartTime = time();
-
-            $oThisIndexCmd = clone($oIndexCmd);
-            $oThisIndexCmd->addParams('-b');
-            echo $oThisIndexCmd->escapedCmd()."\n";
-            $iErrorLevel = $oThisIndexCmd->run();
-            if ($iErrorLevel) {
-                echo "Error: $iErrorLevel\n";
-                exit($iErrorLevel);
-            }
-
-            $oThisIndexCmd = clone($oIndexCmd);
-            echo $oThisIndexCmd->escapedCmd()."\n";
-            $iErrorLevel = $oThisIndexCmd->run();
-            if ($iErrorLevel) {
-                echo "Error: $iErrorLevel\n";
-                exit($iErrorLevel);
-            }
-
-            $sSQL = 'INSERT INTO import_osmosis_log';
-            $sSQL .= '(batchend, batchseq, batchsize, starttime, endtime, event)';
-            $sSQL .= " values ('$sBatchEnd',$iEndSequence,NULL,'";
-            $sSQL .= date('Y-m-d H:i:s', $fCMDStartTime)."','";
-            $sSQL .= date('Y-m-d H:i:s')."','index')";
-            var_Dump($sSQL);
-            $oDB->exec($sSQL);
-            echo date('Y-m-d H:i:s')." Completed index step for $sBatchEnd in ".round((time()-$fCMDStartTime)/60, 2)." minutes\n";
-
-            $sSQL = 'update import_status set indexed = true';
-            $oDB->exec($sSQL);
-        } else {
-            if ($aResult['import-osmosis-all']) {
-                echo "Error: --no-index cannot be used with continuous imports (--import-osmosis-all).\n";
-                exit(1);
-            }
-        }
-
-        $fDuration = time() - $fStartTime;
-        echo date('Y-m-d H:i:s')." Completed all for $sBatchEnd in ".round($fDuration/60, 2)." minutes\n";
-        if (!$aResult['import-osmosis-all']) exit(0);
+    if ($aResult['no-index']) {
+        $oCmd->addParams('--no-index');
     }
+
+    exit($oCmd->run());
 }
index 823a6631e08cfeab2b134d803784cf77beb985d8..8de8157623516db94c4ca3c8ca8df303326961a0 100644 (file)
@@ -16,58 +16,3 @@ function formatOSMType($sType, $bIncludeExternal = true)
 
     return '';
 }
-
-function osmLink($aFeature, $sRefText = false)
-{
-    $sOSMType = formatOSMType($aFeature['osm_type'], false);
-    if ($sOSMType) {
-        return '<a href="//www.openstreetmap.org/'.$sOSMType.'/'.$aFeature['osm_id'].'">'.$sOSMType.' '.($sRefText?$sRefText:$aFeature['osm_id']).'</a>';
-    }
-    return '';
-}
-
-function wikipediaLink($aFeature)
-{
-    if ($aFeature['wikipedia']) {
-        list($sLanguage, $sArticle) = explode(':', $aFeature['wikipedia']);
-        return '<a href="https://'.$sLanguage.'.wikipedia.org/wiki/'.urlencode($sArticle).'" target="_blank">'.$aFeature['wikipedia'].'</a>';
-    }
-    return '';
-}
-
-function detailsLink($aFeature, $sTitle = false, $sExtraProperties = false)
-{
-    if (!$aFeature['place_id']) return '';
-
-    $sHtml = '<a ';
-    if ($sExtraProperties) {
-        $sHtml .= $sExtraProperties.' ';
-    }
-
-    $sHtml .= 'href="details.php?place_id='.$aFeature['place_id'].'">'.($sTitle?$sTitle:$aFeature['place_id']).'</a>';
-
-    return $sHtml;
-}
-
-function detailsPermaLink($aFeature, $sRefText = false, $sExtraProperties = false)
-{
-    $sOSMType = formatOSMType($aFeature['osm_type'], false);
-
-    if ($sOSMType) {
-        $sHtml = '<a ';
-        if ($sExtraProperties) {
-            $sHtml .= $sExtraProperties.' ';
-        }
-        $sHtml .= 'href="details.php?osmtype='.$aFeature['osm_type']
-                  .'&osmid='.$aFeature['osm_id'].'&class='.$aFeature['class'].'">';
-
-        if ($sRefText) {
-            $sHtml .= $sRefText.'</a>';
-        } else {
-            $sHtml .= $sOSMType.' '.$aFeature['osm_id'].'</a>';
-        }
-
-        return $sHtml;
-    }
-    return detailsLink($aFeature, $sRefText, $sExtraProperties);
-}
diff --git a/lib/setup/AddressLevelParser.php b/lib/setup/AddressLevelParser.php
deleted file mode 100644 (file)
index a399c95..0000000
+++ /dev/null
@@ -1,98 +0,0 @@
-<?php
-
-namespace Nominatim\Setup;
-
-/**
- * Parses an address level description.
- */
-class AddressLevelParser
-{
-    private $aLevels;
-
-    public function __construct($sDescriptionFile)
-    {
-        $sJson = file_get_contents($sDescriptionFile);
-        $this->aLevels = json_decode($sJson, true);
-        if (!$this->aLevels) {
-            switch (json_last_error()) {
-                case JSON_ERROR_NONE:
-                    break;
-                case JSON_ERROR_DEPTH:
-                    fail('JSON error - Maximum stack depth exceeded');
-                    break;
-                case JSON_ERROR_STATE_MISMATCH:
-                    fail('JSON error - Underflow or the modes mismatch');
-                    break;
-                case JSON_ERROR_CTRL_CHAR:
-                    fail('JSON error - Unexpected control character found');
-                    break;
-                case JSON_ERROR_SYNTAX:
-                    fail('JSON error - Syntax error, malformed JSON');
-                    break;
-                case JSON_ERROR_UTF8:
-                    fail('JSON error - Malformed UTF-8 characters, possibly incorrectly encoded');
-                    break;
-                default:
-                    fail('JSON error - Unknown error');
-                    break;
-            }
-        }
-    }
-
-    /**
-     * Dump the description into a database table.
-     *
-     * @param object $oDB    Database conneciton to use.
-     * @param string $sTable Name of table to create.
-     *
-     * @return null
-     *
-     * A new table is created. Any previously existing table is dropped.
-     * The table has the following columns:
-     * country, class, type, rank_search, rank_address.
-     */
-    public function createTable($oDB, $sTable)
-    {
-        $oDB->exec('DROP TABLE IF EXISTS '.$sTable);
-        $sSql = 'CREATE TABLE '.$sTable;
-        $sSql .= '(country_code varchar(2), class TEXT, type TEXT,';
-        $sSql .= ' rank_search SMALLINT, rank_address SMALLINT)';
-        $oDB->exec($sSql);
-
-        $sSql = 'CREATE UNIQUE INDEX ON '.$sTable.' (country_code, class, type)';
-        $oDB->exec($sSql);
-
-        $sSql = 'INSERT INTO '.$sTable.' VALUES ';
-        foreach ($this->aLevels as $aLevel) {
-            $aCountries = array();
-            if (isset($aLevel['countries'])) {
-                foreach ($aLevel['countries'] as $sCountry) {
-                    $aCountries[$sCountry] = $oDB->getDBQuoted($sCountry);
-                }
-            } else {
-                $aCountries['NULL'] = 'NULL';
-            }
-            foreach ($aLevel['tags'] as $sKey => $aValues) {
-                foreach ($aValues as $sValue => $mRanks) {
-                    $aFields = array(
-                        $oDB->getDBQuoted($sKey),
-                        $sValue ? $oDB->getDBQuoted($sValue) : 'NULL'
-                    );
-                    if (is_array($mRanks)) {
-                        $aFields[] = (string) $mRanks[0];
-                        $aFields[] = (string) $mRanks[1];
-                    } else {
-                        $aFields[] = (string) $mRanks;
-                        $aFields[] = (string) $mRanks;
-                    }
-                    $sLine = ','.join(',', $aFields).'),';
-
-                    foreach ($aCountries as $sCountries) {
-                        $sSql .= '('.$sCountries.$sLine;
-                    }
-                }
-            }
-        }
-        $oDB->exec(rtrim($sSql, ','));
-    }
-}
index 77b14a8a4c27260a3a5bde6c303a43f86b16af83..a865b8f0cb2edf4780a1b54ed8de6b5f45e450ac 100755 (executable)
@@ -2,7 +2,6 @@
 
 namespace Nominatim\Setup;
 
-require_once(CONST_LibDir.'/setup/AddressLevelParser.php');
 require_once(CONST_LibDir.'/Shell.php');
 
 class SetupFunctions
@@ -19,6 +18,7 @@ class SetupFunctions
     protected $bNoPartitions;
     protected $bDrop;
     protected $oDB = null;
+    protected $oNominatimCmd;
 
     public function __construct(array $aCMDResult)
     {
@@ -81,6 +81,14 @@ class SetupFunctions
         }
 
         $this->bDrop = isset($aCMDResult['drop']) && $aCMDResult['drop'];
+
+        $this->oNominatimCmd = new \Nominatim\Shell(getSetting('NOMINATIM_TOOL'));
+        if ($this->bQuiet) {
+            $this->oNominatimCmd->addParams('--quiet');
+        }
+        if ($this->bVerbose) {
+            $this->oNominatimCmd->addParams('--verbose');
+        }
     }
 
     public function createDB()
@@ -256,8 +264,7 @@ class SetupFunctions
             $this->dropTable('search_name');
         }
 
-        $oAlParser = new AddressLevelParser(getSettingConfig('ADDRESS_LEVEL_CONFIG', 'address-levels.json'));
-        $oAlParser->createTable($this->db(), 'address_levels');
+        (clone($this->oNominatimCmd))->addParams('refresh', '--address-levels')->run();
     }
 
     public function createTableTriggers()
@@ -283,9 +290,7 @@ class SetupFunctions
     public function createPartitionFunctions()
     {
         info('Create Partition Functions');
-
-        $sTemplate = file_get_contents(CONST_DataDir.'/sql/partition-functions.src.sql');
-        $this->pgsqlRunPartitionScript($sTemplate);
+        $this->createSqlFunctions(); // also create partition functions
     }
 
     public function importWikipediaArticles()
@@ -549,30 +554,10 @@ class SetupFunctions
     {
         $this->checkModulePresence(); // raises exception on failure
 
-        $oBaseCmd = (new \Nominatim\Shell(CONST_DataDir.'/nominatim/nominatim.py'))
-                    ->addParams('--database', $this->aDSNInfo['database'])
-                    ->addParams('--port', $this->aDSNInfo['port'])
-                    ->addParams('--threads', $this->iInstances);
-
-        if (!$this->bQuiet) {
-            $oBaseCmd->addParams('-v');
-        }
-        if ($this->bVerbose) {
-            $oBaseCmd->addParams('-v');
-        }
-        if (isset($this->aDSNInfo['hostspec'])) {
-            $oBaseCmd->addParams('--host', $this->aDSNInfo['hostspec']);
-        }
-        if (isset($this->aDSNInfo['username'])) {
-            $oBaseCmd->addParams('--user', $this->aDSNInfo['username']);
-        }
-        if (isset($this->aDSNInfo['password'])) {
-            $oBaseCmd->addEnvPair('PGPASSWORD', $this->aDSNInfo['password']);
-        }
+        $oBaseCmd = (clone $this->oNominatimCmd)->addParams('index');
 
         info('Index ranks 0 - 4');
         $oCmd = (clone $oBaseCmd)->addParams('--maxrank', 4);
-        echo $oCmd->escapedCmd();
 
         $iStatus = $oCmd->run();
         if ($iStatus != 0) {
@@ -581,14 +566,14 @@ class SetupFunctions
         if (!$bIndexNoanalyse) $this->pgsqlRunScript('ANALYSE');
 
         info('Index administrative boundaries');
-        $oCmd = (clone $oBaseCmd)->addParams('-b');
+        $oCmd = (clone $oBaseCmd)->addParams('--boundaries-only');
         $iStatus = $oCmd->run();
         if ($iStatus != 0) {
             fail('error status ' . $iStatus . ' running nominatim!');
         }
 
         info('Index ranks 5 - 25');
-        $oCmd = (clone $oBaseCmd)->addParams('--minrank', 5, '--maxrank', 25);
+        $oCmd = (clone $oBaseCmd)->addParams('--no-boundaries', '--minrank', 5, '--maxrank', 25);
         $iStatus = $oCmd->run();
         if ($iStatus != 0) {
             fail('error status ' . $iStatus . ' running nominatim!');
@@ -597,7 +582,7 @@ class SetupFunctions
         if (!$bIndexNoanalyse) $this->pgsqlRunScript('ANALYSE');
 
         info('Index ranks 26 - 30');
-        $oCmd = (clone $oBaseCmd)->addParams('--minrank', 26);
+        $oCmd = (clone $oBaseCmd)->addParams('--no-boundaries', '--minrank', 26);
         $iStatus = $oCmd->run();
         if ($iStatus != 0) {
             fail('error status ' . $iStatus . ' running nominatim!');
@@ -801,43 +786,18 @@ class SetupFunctions
 
     private function createSqlFunctions()
     {
-        $sBasePath = CONST_DataDir.'/sql/functions/';
-        $sTemplate = file_get_contents($sBasePath.'utils.sql');
-        $sTemplate .= file_get_contents($sBasePath.'normalization.sql');
-        $sTemplate .= file_get_contents($sBasePath.'ranking.sql');
-        $sTemplate .= file_get_contents($sBasePath.'importance.sql');
-        $sTemplate .= file_get_contents($sBasePath.'address_lookup.sql');
-        $sTemplate .= file_get_contents($sBasePath.'interpolation.sql');
-        if ($this->db()->tableExists('place')) {
-            $sTemplate .= file_get_contents($sBasePath.'place_triggers.sql');
-        }
-        if ($this->db()->tableExists('placex')) {
-            $sTemplate .= file_get_contents($sBasePath.'placex_triggers.sql');
-        }
-        if ($this->db()->tableExists('location_postcode')) {
-            $sTemplate .= file_get_contents($sBasePath.'postcode_triggers.sql');
-        }
-        $sTemplate = str_replace('{modulepath}', $this->sModulePath, $sTemplate);
-        if ($this->bEnableDiffUpdates) {
-            $sTemplate = str_replace('RETURN NEW; -- %DIFFUPDATES%', '--', $sTemplate);
+        $oCmd = (clone($this->oNominatimCmd))
+                ->addParams('refresh', '--functions');
+
+        if (!$this->bEnableDiffUpdates) {
+            $oCmd->addParams('--no-diff-updates');
         }
+
         if ($this->bEnableDebugStatements) {
-            $sTemplate = str_replace('--DEBUG:', '', $sTemplate);
-        }
-        if (getSettingBool('LIMIT_REINDEXING')) {
-            $sTemplate = str_replace('--LIMIT INDEXING:', '', $sTemplate);
-        }
-        if (!getSettingBool('USE_US_TIGER_DATA')) {
-            $sTemplate = str_replace('-- %NOTIGERDATA% ', '', $sTemplate);
-        }
-        if (!getSettingBool('USE_AUX_LOCATION_DATA')) {
-            $sTemplate = str_replace('-- %NOAUXDATA% ', '', $sTemplate);
+            $oCmd->addParams('--enable-debug-statements');
         }
 
-        $sReverseOnly = $this->dbReverseOnly() ? 'true' : 'false';
-        $sTemplate = str_replace('%REVERSE-ONLY%', $sReverseOnly, $sTemplate);
-
-        $this->pgsqlRunScript($sTemplate);
+        $oCmd->run();
     }
 
     private function pgsqlRunPartitionScript($sTemplate)
diff --git a/nominatim/admin/exec_utils.py b/nominatim/admin/exec_utils.py
deleted file mode 100644 (file)
index f3f59de..0000000
+++ /dev/null
@@ -1,29 +0,0 @@
-"""
-Helper functions for executing external programs.
-"""
-import subprocess
-
-def run_legacy_script(script, *args, nominatim_env=None, throw_on_fail=False):
-    """ Run a Nominatim PHP script with the given arguments.
-
-        Returns the exit code of the script. If `throw_on_fail` is True
-        then throw a `CalledProcessError` on a non-zero exit.
-    """
-    cmd = ['/usr/bin/env', 'php', '-Cq',
-           nominatim_env.phplib_dir / 'admin' / script]
-    cmd.extend([str(a) for a in args])
-
-    env = nominatim_env.config.get_os_env()
-    env['NOMINATIM_DATADIR'] = str(nominatim_env.data_dir)
-    env['NOMINATIM_BINDIR'] = str(nominatim_env.data_dir / 'utils')
-    if not env['NOMINATIM_DATABASE_MODULE_PATH']:
-        env['NOMINATIM_DATABASE_MODULE_PATH'] = nominatim_env.module_dir
-    if not env['NOMINATIM_OSM2PGSQL_BINARY']:
-        env['NOMINATIM_OSM2PGSQL_BINARY'] = nominatim_env.osm2pgsql_path
-
-    proc = subprocess.run(cmd, cwd=str(nominatim_env.project_dir), env=env)
-
-    if throw_on_fail:
-        proc.check_returncode()
-
-    return proc.returncode
index 8d4071db973a117f5ba5c501b70dafed6bdd033a..4873308deb19cac7da2144fc231b9c39e43561c4 100644 (file)
@@ -2,14 +2,30 @@
 Command-line interface to the Nominatim functions for import, update,
 database administration and querying.
 """
-import sys
+import datetime as dt
 import os
+import sys
+import time
 import argparse
 import logging
 from pathlib import Path
 
 from .config import Configuration
-from .admin.exec_utils import run_legacy_script
+from .tools.exec_utils import run_legacy_script, run_api_script
+from .db.connection import connect
+from .db import status
+from .errors import UsageError
+
+LOG = logging.getLogger()
+
+def _num_system_cpus():
+    try:
+        cpus = len(os.sched_getaffinity(0))
+    except NotImplementedError:
+        cpus = None
+
+    return cpus or os.cpu_count()
+
 
 class CommandlineParser:
     """ Wraps some of the common functions for parsing the command line
@@ -57,23 +73,44 @@ class CommandlineParser:
         """ Parse the command line arguments of the program and execute the
             appropriate subcommand.
         """
-        args = self.parser.parse_args()
+        args = self.parser.parse_args(args=kwargs.get('cli_args'))
 
         if args.subcommand is None:
-            return self.parser.print_help()
+            self.parser.print_help()
+            return 1
 
-        for arg in ('module_dir', 'osm2pgsql_path', 'phplib_dir', 'data_dir'):
+        for arg in ('module_dir', 'osm2pgsql_path', 'phplib_dir', 'data_dir', 'phpcgi_path'):
             setattr(args, arg, Path(kwargs[arg]))
         args.project_dir = Path(args.project_dir)
 
         logging.basicConfig(stream=sys.stderr,
-                            format='%(asctime)s %(levelname)s: %(message)s',
+                            format='%(asctime)s: %(message)s',
                             datefmt='%Y-%m-%d %H:%M:%S',
                             level=max(4 - args.verbose, 1) * 10)
 
         args.config = Configuration(args.project_dir, args.data_dir / 'settings')
 
-        return args.command.run(args)
+        try:
+            return args.command.run(args)
+        except UsageError as exception:
+            log = logging.getLogger()
+            if log.isEnabledFor(logging.DEBUG):
+                raise # use Python's exception printing
+            log.fatal('FATAL: %s', exception)
+
+        # If we get here, then execution has failed in some way.
+        return 1
+
+
+def _osm2pgsql_options_from_args(args, default_cache, default_threads):
+    """ Set up the stanadrd osm2pgsql from the command line arguments.
+    """
+    return dict(osm2pgsql=args.osm2pgsql_path,
+                osm2pgsql_cache=args.osm2pgsql_cache or default_cache,
+                osm2pgsql_style=args.config.get_import_style_file(),
+                threads=args.threads or default_threads,
+                dsn=args.config.get_libpq_dsn(),
+                flatnode_file=args.config.FLATNODE_FILE)
 
 ##### Subcommand classes
 #
@@ -86,6 +123,8 @@ class CommandlineParser:
 #
 # No need to document the functions each time.
 # pylint: disable=C0111
+# Using non-top-level imports to make pyosmium optional for replication only.
+# pylint: disable=E0012,C0415
 
 
 class SetupAll:
@@ -185,13 +224,12 @@ class SetupSpecialPhrases:
                            help='Pull special phrases from the OSM wiki.')
         group = parser.add_argument_group('Output arguments')
         group.add_argument('-o', '--output', default='-',
-                           type=argparse.FileType('w', encoding='UTF-8'),
                            help="""File to write the preprocessed phrases to.
                                    If omitted, it will be written to stdout.""")
 
     @staticmethod
     def run(args):
-        if args.output.name != '<stdout>':
+        if args.output != '-':
             raise NotImplementedError('Only output to stdout is currently implemented.')
         return run_legacy_script('specialphrases.php', '--wiki-import', nominatim_env=args)
 
@@ -219,26 +257,123 @@ class UpdateReplication:
         group.add_argument('--no-index', action='store_false', dest='do_index',
                            help="""Do not index the new data. Only applicable
                                    together with --once""")
+        group.add_argument('--osm2pgsql-cache', metavar='SIZE', type=int,
+                           help='Size of cache to be used by osm2pgsql (in MB)')
+
+    @staticmethod
+    def _init_replication(args):
+        from .tools import replication, refresh
+
+        LOG.warning("Initialising replication updates")
+        conn = connect(args.config.get_libpq_dsn())
+        replication.init_replication(conn, base_url=args.config.REPLICATION_URL)
+        if args.update_functions:
+            LOG.warning("Create functions")
+            refresh.create_functions(conn, args.config, args.data_dir,
+                                     True, False)
+        conn.close()
+        return 0
+
+
+    @staticmethod
+    def _check_for_updates(args):
+        from .tools import replication
+
+        conn = connect(args.config.get_libpq_dsn())
+        ret = replication.check_for_updates(conn, base_url=args.config.REPLICATION_URL)
+        conn.close()
+        return ret
+
+    @staticmethod
+    def _report_update(batchdate, start_import, start_index):
+        def round_time(delta):
+            return dt.timedelta(seconds=int(delta.total_seconds()))
+
+        end = dt.datetime.now(dt.timezone.utc)
+        LOG.warning("Update completed. Import: %s. %sTotal: %s. Remaining backlog: %s.",
+                    round_time((start_index or end) - start_import),
+                    "Indexing: {} ".format(round_time(end - start_index))
+                    if start_index else '',
+                    round_time(end - start_import),
+                    round_time(end - batchdate))
+
+    @staticmethod
+    def _update(args):
+        from .tools import replication
+        from .indexer.indexer import Indexer
+
+        params = _osm2pgsql_options_from_args(args, 2000, 1)
+        params.update(base_url=args.config.REPLICATION_URL,
+                      update_interval=args.config.get_int('REPLICATION_UPDATE_INTERVAL'),
+                      import_file=args.project_dir / 'osmosischange.osc',
+                      max_diff_size=args.config.get_int('REPLICATION_MAX_DIFF'),
+                      indexed_only=not args.once)
+
+        # Sanity check to not overwhelm the Geofabrik servers.
+        if 'download.geofabrik.de'in params['base_url']\
+           and params['update_interval'] < 86400:
+            LOG.fatal("Update interval too low for download.geofabrik.de.\n"
+                      "Please check install documentation "
+                      "(https://nominatim.org/release-docs/latest/admin/Import-and-Update#"
+                      "setting-up-the-update-process).")
+            raise UsageError("Invalid replication update interval setting.")
+
+        if not args.once:
+            if not args.do_index:
+                LOG.fatal("Indexing cannot be disabled when running updates continuously.")
+                raise UsageError("Bad argument '--no-index'.")
+            recheck_interval = args.config.get_int('REPLICATION_RECHECK_INTERVAL')
+
+        while True:
+            conn = connect(args.config.get_libpq_dsn())
+            start = dt.datetime.now(dt.timezone.utc)
+            state = replication.update(conn, params)
+            status.log_status(conn, start, 'import')
+            batchdate, _, _ = status.get_status(conn)
+            conn.close()
+
+            if state is not replication.UpdateState.NO_CHANGES and args.do_index:
+                index_start = dt.datetime.now(dt.timezone.utc)
+                indexer = Indexer(args.config.get_libpq_dsn(),
+                                  args.threads or 1)
+                indexer.index_boundaries(0, 30)
+                indexer.index_by_rank(0, 30)
+
+                conn = connect(args.config.get_libpq_dsn())
+                status.set_indexed(conn, True)
+                status.log_status(conn, index_start, 'index')
+                conn.close()
+            else:
+                index_start = None
+
+            if LOG.isEnabledFor(logging.WARNING):
+                UpdateReplication._report_update(batchdate, start, index_start)
+
+            if args.once:
+                break
+
+            if state is replication.UpdateState.NO_CHANGES:
+                LOG.warning("No new changes. Sleeping for %d sec.", recheck_interval)
+                time.sleep(recheck_interval)
+
+        return state.value
 
     @staticmethod
     def run(args):
-        params = ['update.php']
+        try:
+            import osmium # pylint: disable=W0611
+        except ModuleNotFoundError:
+            LOG.fatal("pyosmium not installed. Replication functions not available.\n"
+                      "To install pyosmium via pip: pip3 install osmium")
+            return 1
+
         if args.init:
-            params.append('--init-updates')
-            if not args.update_functions:
-                params.append('--no-update-functions')
-        elif args.check_for_updates:
-            params.append('--check-for-updates')
-        else:
-            if args.once:
-                params.append('--import-osmosis')
-            else:
-                params.append('--import-osmosis-all')
-            if not args.do_index:
-                params.append('--no-index')
+            return UpdateReplication._init_replication(args)
 
-        return run_legacy_script(*params, nominatim_env=args)
+        if args.check_for_updates:
+            return UpdateReplication._check_for_updates(args)
 
+        return UpdateReplication._update(args)
 
 class UpdateAddData:
     """\
@@ -297,11 +432,35 @@ class UpdateIndex:
 
     @staticmethod
     def add_args(parser):
-        pass
+        group = parser.add_argument_group('Filter arguments')
+        group.add_argument('--boundaries-only', action='store_true',
+                           help="""Index only administrative boundaries.""")
+        group.add_argument('--no-boundaries', action='store_true',
+                           help="""Index everything except administrative boundaries.""")
+        group.add_argument('--minrank', '-r', type=int, metavar='RANK', default=0,
+                           help='Minimum/starting rank')
+        group.add_argument('--maxrank', '-R', type=int, metavar='RANK', default=30,
+                           help='Maximum/finishing rank')
 
     @staticmethod
     def run(args):
-        return run_legacy_script('update.php', '--index', nominatim_env=args)
+        from .indexer.indexer import Indexer
+
+        indexer = Indexer(args.config.get_libpq_dsn(),
+                          args.threads or _num_system_cpus() or 1)
+
+        if not args.no_boundaries:
+            indexer.index_boundaries(args.minrank, args.maxrank)
+        if not args.boundaries_only:
+            indexer.index_by_rank(args.minrank, args.maxrank)
+
+        if not args.no_boundaries and not args.boundaries_only \
+           and args.minrank == 0 and args.maxrank == 30:
+            conn = connect(args.config.get_libpq_dsn())
+            status.set_indexed(conn, True)
+            conn.close()
+
+        return 0
 
 
 class UpdateRefresh:
@@ -336,22 +495,34 @@ class UpdateRefresh:
 
     @staticmethod
     def run(args):
+        from .tools import refresh
+
         if args.postcodes:
-            run_legacy_script('update.php', '--calculate-postcodes',
-                              nominatim_env=args, throw_on_fail=True)
+            LOG.warning("Update postcodes centroid")
+            conn = connect(args.config.get_libpq_dsn())
+            refresh.update_postcodes(conn, args.data_dir)
+            conn.close()
+
         if args.word_counts:
-            run_legacy_script('update.php', '--recompute-word-counts',
-                              nominatim_env=args, throw_on_fail=True)
+            LOG.warning('Recompute frequency of full-word search terms')
+            conn = connect(args.config.get_libpq_dsn())
+            refresh.recompute_word_counts(conn, args.data_dir)
+            conn.close()
+
         if args.address_levels:
-            run_legacy_script('update.php', '--update-address-levels',
-                              nominatim_env=args, throw_on_fail=True)
+            cfg = Path(args.config.ADDRESS_LEVEL_CONFIG)
+            LOG.warning('Updating address levels from %s', cfg)
+            conn = connect(args.config.get_libpq_dsn())
+            refresh.load_address_levels_from_file(conn, cfg)
+            conn.close()
+
         if args.functions:
-            params = ['setup.php', '--create-functions', '--create-partition-functions']
-            if args.diffs:
-                params.append('--enable-diff-updates')
-            if args.enable_debug_statements:
-                params.append('--enable-debug-statements')
-            run_legacy_script(*params, nominatim_env=args, throw_on_fail=True)
+            LOG.warning('Create functions')
+            conn = connect(args.config.get_libpq_dsn())
+            refresh.create_functions(conn, args.config, args.data_dir,
+                                     args.diffs, args.enable_debug_statements)
+            conn.close()
+
         if args.wiki_data:
             run_legacy_script('setup.php', '--import-wikipedia-articles',
                               nominatim_env=args, throw_on_fail=True)
@@ -363,6 +534,8 @@ class UpdateRefresh:
             run_legacy_script('setup.php', '--setup-website',
                               nominatim_env=args, throw_on_fail=True)
 
+        return 0
+
 
 class AdminCheckDatabase:
     """\
@@ -405,7 +578,7 @@ class AdminWarm:
 
 class QueryExport:
     """\
-    Export addresses as CSV file from a Nominatim database.
+    Export addresses as CSV file from the database.
     """
 
     @staticmethod
@@ -458,17 +631,245 @@ class QueryExport:
 
         return run_legacy_script(*params, nominatim_env=args)
 
-class QueryTodo:
+STRUCTURED_QUERY = (
+    ('street', 'housenumber and street'),
+    ('city', 'city, town or village'),
+    ('county', 'county'),
+    ('state', 'state'),
+    ('country', 'country'),
+    ('postalcode', 'postcode')
+)
+
+EXTRADATA_PARAMS = (
+    ('addressdetails', 'Include a breakdown of the address into elements.'),
+    ('extratags', """Include additional information if available
+                     (e.g. wikipedia link, opening hours)."""),
+    ('namedetails', 'Include a list of alternative names.')
+)
+
+DETAILS_SWITCHES = (
+    ('addressdetails', 'Include a breakdown of the address into elements.'),
+    ('keywords', 'Include a list of name keywords and address keywords.'),
+    ('linkedplaces', 'Include a details of places that are linked with this one.'),
+    ('hierarchy', 'Include details of places lower in the address hierarchy.'),
+    ('group_hierarchy', 'Group the places by type.'),
+    ('polygon_geojson', 'Include geometry of result.')
+)
+
+def _add_api_output_arguments(parser):
+    group = parser.add_argument_group('Output arguments')
+    group.add_argument('--format', default='jsonv2',
+                       choices=['xml', 'json', 'jsonv2', 'geojson', 'geocodejson'],
+                       help='Format of result')
+    for name, desc in EXTRADATA_PARAMS:
+        group.add_argument('--' + name, action='store_true', help=desc)
+
+    group.add_argument('--lang', '--accept-language', metavar='LANGS',
+                       help='Preferred language order for presenting search results')
+    group.add_argument('--polygon-output',
+                       choices=['geojson', 'kml', 'svg', 'text'],
+                       help='Output geometry of results as a GeoJSON, KML, SVG or WKT.')
+    group.add_argument('--polygon-threshold', type=float, metavar='TOLERANCE',
+                       help="""Simplify output geometry.
+                               Parameter is difference tolerance in degrees.""")
+
+
+class APISearch:
+    """\
+    Execute API search query.
+    """
+
+    @staticmethod
+    def add_args(parser):
+        group = parser.add_argument_group('Query arguments')
+        group.add_argument('--query',
+                           help='Free-form query string')
+        for name, desc in STRUCTURED_QUERY:
+            group.add_argument('--' + name, help='Structured query: ' + desc)
+
+        _add_api_output_arguments(parser)
+
+        group = parser.add_argument_group('Result limitation')
+        group.add_argument('--countrycodes', metavar='CC,..',
+                           help='Limit search results to one or more countries.')
+        group.add_argument('--exclude_place_ids', metavar='ID,..',
+                           help='List of search object to be excluded')
+        group.add_argument('--limit', type=int,
+                           help='Limit the number of returned results')
+        group.add_argument('--viewbox', metavar='X1,Y1,X2,Y2',
+                           help='Preferred area to find search results')
+        group.add_argument('--bounded', action='store_true',
+                           help='Strictly restrict results to viewbox area')
+
+        group = parser.add_argument_group('Other arguments')
+        group.add_argument('--no-dedupe', action='store_false', dest='dedupe',
+                           help='Do not remove duplicates from the result list')
+
+
+    @staticmethod
+    def run(args):
+        if args.query:
+            params = dict(q=args.query)
+        else:
+            params = {k : getattr(args, k) for k, _ in STRUCTURED_QUERY if getattr(args, k)}
+
+        for param, _ in EXTRADATA_PARAMS:
+            if getattr(args, param):
+                params[param] = '1'
+        for param in ('format', 'countrycodes', 'exclude_place_ids', 'limit', 'viewbox'):
+            if getattr(args, param):
+                params[param] = getattr(args, param)
+        if args.lang:
+            params['accept-language'] = args.lang
+        if args.polygon_output:
+            params['polygon_' + args.polygon_output] = '1'
+        if args.polygon_threshold:
+            params['polygon_threshold'] = args.polygon_threshold
+        if args.bounded:
+            params['bounded'] = '1'
+        if not args.dedupe:
+            params['dedupe'] = '0'
+
+        return run_api_script('search', args.project_dir,
+                              phpcgi_bin=args.phpcgi_path, params=params)
+
+class APIReverse:
+    """\
+    Execute API reverse query.
+    """
+
+    @staticmethod
+    def add_args(parser):
+        group = parser.add_argument_group('Query arguments')
+        group.add_argument('--lat', type=float, required=True,
+                           help='Latitude of coordinate to look up (in WGS84)')
+        group.add_argument('--lon', type=float, required=True,
+                           help='Longitude of coordinate to look up (in WGS84)')
+        group.add_argument('--zoom', type=int,
+                           help='Level of detail required for the address')
+
+        _add_api_output_arguments(parser)
+
+
+    @staticmethod
+    def run(args):
+        params = dict(lat=args.lat, lon=args.lon)
+        if args.zoom is not None:
+            params['zoom'] = args.zoom
+
+        for param, _ in EXTRADATA_PARAMS:
+            if getattr(args, param):
+                params[param] = '1'
+        if args.format:
+            params['format'] = args.format
+        if args.lang:
+            params['accept-language'] = args.lang
+        if args.polygon_output:
+            params['polygon_' + args.polygon_output] = '1'
+        if args.polygon_threshold:
+            params['polygon_threshold'] = args.polygon_threshold
+
+        return run_api_script('reverse', args.project_dir,
+                              phpcgi_bin=args.phpcgi_path, params=params)
+
+
+class APILookup:
+    """\
+    Execute API reverse query.
+    """
+
+    @staticmethod
+    def add_args(parser):
+        group = parser.add_argument_group('Query arguments')
+        group.add_argument('--id', metavar='OSMID',
+                           action='append', required=True, dest='ids',
+                           help='OSM id to lookup in format <NRW><id> (may be repeated)')
+
+        _add_api_output_arguments(parser)
+
+
+    @staticmethod
+    def run(args):
+        params = dict(osm_ids=','.join(args.ids))
+
+        for param, _ in EXTRADATA_PARAMS:
+            if getattr(args, param):
+                params[param] = '1'
+        if args.format:
+            params['format'] = args.format
+        if args.lang:
+            params['accept-language'] = args.lang
+        if args.polygon_output:
+            params['polygon_' + args.polygon_output] = '1'
+        if args.polygon_threshold:
+            params['polygon_threshold'] = args.polygon_threshold
+
+        return run_api_script('lookup', args.project_dir,
+                              phpcgi_bin=args.phpcgi_path, params=params)
+
+
+class APIDetails:
+    """\
+    Execute API lookup query.
+    """
+
+    @staticmethod
+    def add_args(parser):
+        group = parser.add_argument_group('Query arguments')
+        objs = group.add_mutually_exclusive_group(required=True)
+        objs.add_argument('--node', '-n', type=int,
+                          help="Look up the OSM node with the given ID.")
+        objs.add_argument('--way', '-w', type=int,
+                          help="Look up the OSM way with the given ID.")
+        objs.add_argument('--relation', '-r', type=int,
+                          help="Look up the OSM relation with the given ID.")
+        objs.add_argument('--place_id', '-p', type=int,
+                          help='Database internal identifier of the OSM object to look up.')
+        group.add_argument('--class', dest='object_class',
+                           help="""Class type to disambiguated multiple entries
+                                   of the same object.""")
+
+        group = parser.add_argument_group('Output arguments')
+        for name, desc in DETAILS_SWITCHES:
+            group.add_argument('--' + name, action='store_true', help=desc)
+        group.add_argument('--lang', '--accept-language', metavar='LANGS',
+                           help='Preferred language order for presenting search results')
+
+    @staticmethod
+    def run(args):
+        if args.node:
+            params = dict(osmtype='N', osmid=args.node)
+        elif args.way:
+            params = dict(osmtype='W', osmid=args.node)
+        elif args.relation:
+            params = dict(osmtype='R', osmid=args.node)
+        else:
+            params = dict(place_id=args.place_id)
+        if args.object_class:
+            params['class'] = args.object_class
+        for name, _ in DETAILS_SWITCHES:
+            params[name] = '1' if getattr(args, name) else '0'
+
+        return run_api_script('details', args.project_dir,
+                              phpcgi_bin=args.phpcgi_path, params=params)
+
+
+class APIStatus:
     """\
-    Todo
+    Execute API status query.
     """
+
     @staticmethod
     def add_args(parser):
-        pass
+        group = parser.add_argument_group('API parameters')
+        group.add_argument('--format', default='text', choices=['text', 'json'],
+                           help='Format of result')
 
     @staticmethod
-    def run(args): # pylint: disable=W0613
-        print("TODO: searching")
+    def run(args):
+        return run_api_script('status', args.project_dir,
+                              phpcgi_bin=args.phpcgi_path,
+                              params=dict(format=args.format))
 
 
 def nominatim(**kwargs):
@@ -492,10 +893,14 @@ def nominatim(**kwargs):
     parser.add_subcommand('refresh', UpdateRefresh)
 
     parser.add_subcommand('export', QueryExport)
-    parser.add_subcommand('search', QueryTodo)
-    parser.add_subcommand('reverse', QueryTodo)
-    parser.add_subcommand('lookup', QueryTodo)
-    parser.add_subcommand('details', QueryTodo)
-    parser.add_subcommand('status', QueryTodo)
+
+    if kwargs.get('phpcgi_path'):
+        parser.add_subcommand('search', APISearch)
+        parser.add_subcommand('reverse', APIReverse)
+        parser.add_subcommand('lookup', APILookup)
+        parser.add_subcommand('details', APIDetails)
+        parser.add_subcommand('status', APIStatus)
+    else:
+        parser.parser.epilog = 'php-cgi not found. Query commands not available.'
 
     return parser.run(**kwargs)
index 911c7ddf127f68f438ab582c75fa38d5ed469dd8..4de2052ee4987ff4892ef4a5a92b6fa3e53d21dd 100644 (file)
@@ -1,10 +1,16 @@
 """
 Nominatim configuration accessor.
 """
+import logging
 import os
+from pathlib import Path
 
 from dotenv import dotenv_values
 
+from .errors import UsageError
+
+LOG = logging.getLogger()
+
 class Configuration:
     """ Load and manage the project configuration.
 
@@ -20,15 +26,75 @@ class Configuration:
     """
 
     def __init__(self, project_dir, config_dir):
+        self.project_dir = project_dir
+        self.config_dir = config_dir
         self._config = dotenv_values(str((config_dir / 'env.defaults').resolve()))
         if project_dir is not None:
             self._config.update(dotenv_values(str((project_dir / '.env').resolve())))
 
+        # Add defaults for variables that are left empty to set the default.
+        # They may still be overwritten by environment variables.
+        if not self._config['NOMINATIM_ADDRESS_LEVEL_CONFIG']:
+            self._config['NOMINATIM_ADDRESS_LEVEL_CONFIG'] = \
+                str(config_dir / 'address-levels.json')
+
+
     def __getattr__(self, name):
         name = 'NOMINATIM_' + name
 
         return os.environ.get(name) or self._config[name]
 
+    def get_bool(self, name):
+        """ Return the given configuration parameter as a boolean.
+            Values of '1', 'yes' and 'true' are accepted as truthy values,
+            everything else is interpreted as false.
+        """
+        return self.__getattr__(name).lower() in ('1', 'yes', 'true')
+
+
+    def get_int(self, name):
+        """ Return the given configuration parameter as an int.
+        """
+        try:
+            return int(self.__getattr__(name))
+        except ValueError:
+            LOG.fatal("Invalid setting NOMINATIM_%s. Needs to be a number.", name)
+            raise UsageError("Configuration error.")
+
+
+    def get_libpq_dsn(self):
+        """ Get configured database DSN converted into the key/value format
+            understood by libpq and psycopg.
+        """
+        dsn = self.DATABASE_DSN
+
+        def quote_param(param):
+            key, val = param.split('=')
+            val = val.replace('\\', '\\\\').replace("'", "\\'")
+            if ' ' in val:
+                val = "'" + val + "'"
+            return key + '=' + val
+
+        if dsn.startswith('pgsql:'):
+            # Old PHP DSN format. Convert before returning.
+            return ' '.join([quote_param(p) for p in dsn[6:].split(';')])
+
+        return dsn
+
+
+    def get_import_style_file(self):
+        """ Return the import style file as a path object. Translates the
+            name of the standard styles automatically into a file in the
+            config style.
+        """
+        style = self.__getattr__('IMPORT_STYLE')
+
+        if style in ('admin', 'street', 'address', 'full', 'extratags'):
+            return self.config_dir / 'import-{}.style'.format(style)
+
+        return Path(style)
+
+
     def get_os_env(self):
         """ Return a copy of the OS environment with the Nominatim configuration
             merged in.
diff --git a/nominatim/db/__init__.py b/nominatim/db/__init__.py
new file mode 100644 (file)
index 0000000..e69de29
similarity index 73%
rename from nominatim/indexer/db.py
rename to nominatim/db/async_connection.py
index 037c3fb2203c7d3a27d112e1eef3d9f6e69fd957..45e83664663ba835419db49304209b9da6491d35 100644 (file)
@@ -1,34 +1,27 @@
 # SPDX-License-Identifier: GPL-2.0-only
 #
 # This file is part of Nominatim.
-# Copyright (C) 2020 Sarah Hoffmann
-
+# Copyright (C) 2021 by the Nominatim developer community.
+# For a full list of authors see the git log.
+""" Database helper functions for the indexer.
+"""
 import logging
 import psycopg2
 from psycopg2.extras import wait_select
 
-log = logging.getLogger()
-
-def make_connection(options, asynchronous=False):
-    params = {'dbname' : options.dbname,
-              'user' : options.user,
-              'password' : options.password,
-              'host' : options.host,
-              'port' : options.port,
-              'async' : asynchronous}
+LOG = logging.getLogger()
 
-    return psycopg2.connect(**params)
-
-class DBConnection(object):
+class DBConnection:
     """ A single non-blocking database connection.
     """
 
-    def __init__(self, options):
+    def __init__(self, dsn):
         self.current_query = None
         self.current_params = None
-        self.options = options
+        self.dsn = dsn
 
         self.conn = None
+        self.cursor = None
         self.connect()
 
     def connect(self):
@@ -41,7 +34,9 @@ class DBConnection(object):
             self.cursor.close()
             self.conn.close()
 
-        self.conn = make_connection(self.options, asynchronous=True)
+        # Use a dict to hand in the parameters because async is a reserved
+        # word in Python3.
+        self.conn = psycopg2.connect(**{'dsn' : self.dsn, 'async' : True})
         self.wait()
 
         self.cursor = self.conn.cursor()
@@ -51,7 +46,7 @@ class DBConnection(object):
         # implemented.
         self.perform(
             """ UPDATE pg_settings SET setting = -1 WHERE name = 'jit_above_cost';
-                UPDATE pg_settings SET setting = 0 
+                UPDATE pg_settings SET setting = 0
                    WHERE name = 'max_parallel_workers_per_gather';""")
         self.wait()
 
@@ -63,14 +58,14 @@ class DBConnection(object):
                 wait_select(self.conn)
                 self.current_query = None
                 return
-            except psycopg2.extensions.TransactionRollbackError as e:
-                if e.pgcode == '40P01':
-                    log.info("Deadlock detected (params = {}), retry."
-                              .format(self.current_params))
+            except psycopg2.extensions.TransactionRollbackError as error:
+                if error.pgcode == '40P01':
+                    LOG.info("Deadlock detected (params = %s), retry.",
+                             str(self.current_params))
                     self.cursor.execute(self.current_query, self.current_params)
                 else:
                     raise
-            except psycopg2.errors.DeadlockDetected:
+            except psycopg2.errors.DeadlockDetected: # pylint: disable=E1101
                 self.cursor.execute(self.current_query, self.current_params)
 
     def perform(self, sql, args=None):
@@ -99,14 +94,13 @@ class DBConnection(object):
             if self.conn.poll() == psycopg2.extensions.POLL_OK:
                 self.current_query = None
                 return True
-        except psycopg2.extensions.TransactionRollbackError as e:
-            if e.pgcode == '40P01':
-                log.info("Deadlock detected (params = {}), retry.".format(self.current_params))
+        except psycopg2.extensions.TransactionRollbackError as error:
+            if error.pgcode == '40P01':
+                LOG.info("Deadlock detected (params = %s), retry.", str(self.current_params))
                 self.cursor.execute(self.current_query, self.current_params)
             else:
                 raise
-        except psycopg2.errors.DeadlockDetected:
+        except psycopg2.errors.DeadlockDetected: # pylint: disable=E1101
             self.cursor.execute(self.current_query, self.current_params)
 
         return False
-
diff --git a/nominatim/db/connection.py b/nominatim/db/connection.py
new file mode 100644 (file)
index 0000000..4d30151
--- /dev/null
@@ -0,0 +1,58 @@
+"""
+Specialised connection and cursor functions.
+"""
+import logging
+
+import psycopg2
+import psycopg2.extensions
+import psycopg2.extras
+
+class _Cursor(psycopg2.extras.DictCursor):
+    """ A cursor returning dict-like objects and providing specialised
+        execution functions.
+    """
+
+    def execute(self, query, args=None): # pylint: disable=W0221
+        """ Query execution that logs the SQL query when debugging is enabled.
+        """
+        logger = logging.getLogger()
+        logger.debug(self.mogrify(query, args).decode('utf-8'))
+
+        super().execute(query, args)
+
+    def scalar(self, sql, args=None):
+        """ Execute query that returns a single value. The value is returned.
+            If the query yields more than one row, a ValueError is raised.
+        """
+        self.execute(sql, args)
+
+        if self.rowcount != 1:
+            raise RuntimeError("Query did not return a single row.")
+
+        return self.fetchone()[0]
+
+
+class _Connection(psycopg2.extensions.connection):
+    """ A connection that provides the specialised cursor by default and
+        adds convenience functions for administrating the database.
+    """
+
+    def cursor(self, cursor_factory=_Cursor, **kwargs):
+        """ Return a new cursor. By default the specialised cursor is returned.
+        """
+        return super().cursor(cursor_factory=cursor_factory, **kwargs)
+
+    def table_exists(self, table):
+        """ Check that a table with the given name exists in the database.
+        """
+        with self.cursor() as cur:
+            num = cur.scalar("""SELECT count(*) FROM pg_tables
+                                WHERE tablename = %s""", (table, ))
+            return num == 1
+
+
+def connect(dsn):
+    """ Open a connection to the database using the specialised connection
+        factory.
+    """
+    return psycopg2.connect(dsn, connection_factory=_Connection)
diff --git a/nominatim/db/status.py b/nominatim/db/status.py
new file mode 100644 (file)
index 0000000..75da3c1
--- /dev/null
@@ -0,0 +1,82 @@
+"""
+Access and helper functions for the status and status log table.
+"""
+import datetime as dt
+import logging
+import re
+
+from ..tools.exec_utils import get_url
+from ..errors import UsageError
+
+LOG = logging.getLogger()
+
+def compute_database_date(conn):
+    """ Determine the date of the database from the newest object in the
+        data base.
+    """
+    # First, find the node with the highest ID in the database
+    with conn.cursor() as cur:
+        osmid = cur.scalar("SELECT max(osm_id) FROM place WHERE osm_type='N'")
+
+        if osmid is None:
+            LOG.fatal("No data found in the database.")
+            raise UsageError("No data found in the database.")
+
+    LOG.info("Using node id %d for timestamp lookup", osmid)
+    # Get the node from the API to find the timestamp when it was created.
+    node_url = 'https://www.openstreetmap.org/api/0.6/node/{}/1'.format(osmid)
+    data = get_url(node_url)
+
+    match = re.search(r'timestamp="((\d{4})-(\d{2})-(\d{2})T(\d{2}):(\d{2}):(\d{2}))Z"', data)
+
+    if match is None:
+        LOG.fatal("The node data downloaded from the API does not contain valid data.\n"
+                  "URL used: %s", node_url)
+        raise UsageError("Bad API data.")
+
+    LOG.debug("Found timestamp %s", match[1])
+
+    return dt.datetime.fromisoformat(match[1]).replace(tzinfo=dt.timezone.utc)
+
+
+def set_status(conn, date, seq=None, indexed=True):
+    """ Replace the current status with the given status.
+    """
+    assert date.tzinfo == dt.timezone.utc
+    with conn.cursor() as cur:
+        cur.execute("TRUNCATE TABLE import_status")
+        cur.execute("""INSERT INTO import_status (lastimportdate, sequence_id, indexed)
+                       VALUES (%s, %s, %s)""", (date, seq, indexed))
+
+    conn.commit()
+
+
+def get_status(conn):
+    """ Return the current status as a triple of (date, sequence, indexed).
+        If status has not been set up yet, a triple of None is returned.
+    """
+    with conn.cursor() as cur:
+        cur.execute("SELECT * FROM import_status LIMIT 1")
+        if cur.rowcount < 1:
+            return None, None, None
+
+        row = cur.fetchone()
+        return row['lastimportdate'], row['sequence_id'], row['indexed']
+
+
+def set_indexed(conn, state):
+    """ Set the indexed flag in the status table to the given state.
+    """
+    with conn.cursor() as cur:
+        cur.execute("UPDATE import_status SET indexed = %s", (state, ))
+    conn.commit()
+
+
+def log_status(conn, start, event, batchsize=None):
+    """ Write a new status line to the `import_osmosis_log` table.
+    """
+    with conn.cursor() as cur:
+        cur.execute("""INSERT INTO import_osmosis_log
+                       (batchend, batchseq, batchsize, starttime, endtime, event)
+                       SELECT lastimportdate, sequence_id, %s, %s, now(), %s FROM import_status""",
+                    (batchsize, start, event))
diff --git a/nominatim/db/utils.py b/nominatim/db/utils.py
new file mode 100644 (file)
index 0000000..abd7251
--- /dev/null
@@ -0,0 +1,12 @@
+"""
+Helper functions for handling DB accesses.
+"""
+
+def execute_file(conn, fname):
+    """ Read an SQL file and run its contents against the given connection.
+    """
+    with fname.open('r') as fdesc:
+        sql = fdesc.read()
+    with conn.cursor() as cur:
+        cur.execute(sql)
+    conn.commit()
diff --git a/nominatim/errors.py b/nominatim/errors.py
new file mode 100644 (file)
index 0000000..e77f956
--- /dev/null
@@ -0,0 +1,8 @@
+"""
+Custom exception and error classes for Nominatim.
+"""
+
+class UsageError(Exception):
+    """ An error raised because of bad user input. This error will usually
+        not cause a stack trace to be printed unless debugging is enabled.
+    """
diff --git a/nominatim/indexer/indexer.py b/nominatim/indexer/indexer.py
new file mode 100644 (file)
index 0000000..6e0ed60
--- /dev/null
@@ -0,0 +1,199 @@
+"""
+Main work horse for indexing (computing addresses) the database.
+"""
+# pylint: disable=C0111
+import logging
+import select
+
+import psycopg2
+
+from .progress import ProgressLogger
+from ..db.async_connection import DBConnection
+
+LOG = logging.getLogger()
+
+class RankRunner:
+    """ Returns SQL commands for indexing one rank within the placex table.
+    """
+
+    def __init__(self, rank):
+        self.rank = rank
+
+    def name(self):
+        return "rank {}".format(self.rank)
+
+    def sql_count_objects(self):
+        return """SELECT count(*) FROM placex
+                  WHERE rank_address = {} and indexed_status > 0
+               """.format(self.rank)
+
+    def sql_get_objects(self):
+        return """SELECT place_id FROM placex
+                  WHERE indexed_status > 0 and rank_address = {}
+                  ORDER BY geometry_sector""".format(self.rank)
+
+    @staticmethod
+    def sql_index_place(ids):
+        return "UPDATE placex SET indexed_status = 0 WHERE place_id IN ({})"\
+               .format(','.join((str(i) for i in ids)))
+
+
+class InterpolationRunner:
+    """ Returns SQL commands for indexing the address interpolation table
+        location_property_osmline.
+    """
+
+    @staticmethod
+    def name():
+        return "interpolation lines (location_property_osmline)"
+
+    @staticmethod
+    def sql_count_objects():
+        return """SELECT count(*) FROM location_property_osmline
+                  WHERE indexed_status > 0"""
+
+    @staticmethod
+    def sql_get_objects():
+        return """SELECT place_id FROM location_property_osmline
+                  WHERE indexed_status > 0
+                  ORDER BY geometry_sector"""
+
+    @staticmethod
+    def sql_index_place(ids):
+        return """UPDATE location_property_osmline
+                  SET indexed_status = 0 WHERE place_id IN ({})"""\
+               .format(','.join((str(i) for i in ids)))
+
+class BoundaryRunner:
+    """ Returns SQL commands for indexing the administrative boundaries
+        of a certain rank.
+    """
+
+    def __init__(self, rank):
+        self.rank = rank
+
+    def name(self):
+        return "boundaries rank {}".format(self.rank)
+
+    def sql_count_objects(self):
+        return """SELECT count(*) FROM placex
+                  WHERE indexed_status > 0
+                    AND rank_search = {}
+                    AND class = 'boundary' and type = 'administrative'""".format(self.rank)
+
+    def sql_get_objects(self):
+        return """SELECT place_id FROM placex
+                  WHERE indexed_status > 0 and rank_search = {}
+                        and class = 'boundary' and type = 'administrative'
+                  ORDER BY partition, admin_level""".format(self.rank)
+
+    @staticmethod
+    def sql_index_place(ids):
+        return "UPDATE placex SET indexed_status = 0 WHERE place_id IN ({})"\
+               .format(','.join((str(i) for i in ids)))
+
+class Indexer:
+    """ Main indexing routine.
+    """
+
+    def __init__(self, dsn, num_threads):
+        self.conn = psycopg2.connect(dsn)
+        self.threads = [DBConnection(dsn) for _ in range(num_threads)]
+
+    def index_boundaries(self, minrank, maxrank):
+        LOG.warning("Starting indexing boundaries using %s threads",
+                    len(self.threads))
+
+        for rank in range(max(minrank, 4), min(maxrank, 26)):
+            self.index(BoundaryRunner(rank))
+
+    def index_by_rank(self, minrank, maxrank):
+        """ Run classic indexing by rank.
+        """
+        maxrank = min(maxrank, 30)
+        LOG.warning("Starting indexing rank (%i to %i) using %i threads",
+                    minrank, maxrank, len(self.threads))
+
+        for rank in range(max(1, minrank), maxrank):
+            self.index(RankRunner(rank))
+
+        if maxrank == 30:
+            self.index(RankRunner(0))
+            self.index(InterpolationRunner(), 20)
+            self.index(RankRunner(30), 20)
+        else:
+            self.index(RankRunner(maxrank))
+
+    def update_status_table(self):
+        """ Update the status in the status table to 'indexed'.
+        """
+        with self.conn.cursor() as cur:
+            cur.execute('UPDATE import_status SET indexed = true')
+        self.conn.commit()
+
+    def index(self, obj, batch=1):
+        """ Index a single rank or table. `obj` describes the SQL to use
+            for indexing. `batch` describes the number of objects that
+            should be processed with a single SQL statement
+        """
+        LOG.warning("Starting %s (using batch size %s)", obj.name(), batch)
+
+        cur = self.conn.cursor()
+        cur.execute(obj.sql_count_objects())
+
+        total_tuples = cur.fetchone()[0]
+        LOG.debug("Total number of rows: %i", total_tuples)
+
+        cur.close()
+
+        progress = ProgressLogger(obj.name(), total_tuples)
+
+        if total_tuples > 0:
+            cur = self.conn.cursor(name='places')
+            cur.execute(obj.sql_get_objects())
+
+            next_thread = self.find_free_thread()
+            while True:
+                places = [p[0] for p in cur.fetchmany(batch)]
+                if not places:
+                    break
+
+                LOG.debug("Processing places: %s", str(places))
+                thread = next(next_thread)
+
+                thread.perform(obj.sql_index_place(places))
+                progress.add(len(places))
+
+            cur.close()
+
+            for thread in self.threads:
+                thread.wait()
+
+        progress.done()
+
+    def find_free_thread(self):
+        """ Generator that returns the next connection that is free for
+            sending a query.
+        """
+        ready = self.threads
+        command_stat = 0
+
+        while True:
+            for thread in ready:
+                if thread.is_done():
+                    command_stat += 1
+                    yield thread
+
+            # refresh the connections occasionaly to avoid potential
+            # memory leaks in Postgresql.
+            if command_stat > 100000:
+                for thread in self.threads:
+                    while not thread.is_done():
+                        thread.wait()
+                    thread.connect()
+                command_stat = 0
+                ready = self.threads
+            else:
+                ready, _, _ = select.select(self.threads, [], [])
+
+        assert False, "Unreachable code"
index 99120673faa67680216ac5fc48d6c8f93da62d03..c9d8816be989fb99675341a512c6806efcf06465 100644 (file)
@@ -26,7 +26,7 @@ class ProgressLogger:
         self.done_places = 0
         self.rank_start_time = datetime.now()
         self.log_interval = log_interval
-        self.next_info = INITIAL_PROGRESS if LOG.isEnabledFor(logging.INFO) else total + 1
+        self.next_info = INITIAL_PROGRESS if LOG.isEnabledFor(logging.WARNING) else total + 1
 
     def add(self, num=1):
         """ Mark `num` places as processed. Print a log message if the
@@ -47,9 +47,9 @@ class ProgressLogger:
         places_per_sec = self.done_places / done_time
         eta = (self.total_places - self.done_places) / places_per_sec
 
-        LOG.info("Done %d in %d @ %.3f per second - %s ETA (seconds): %.2f",
-                 self.done_places, int(done_time),
-                 places_per_sec, self.name, eta)
+        LOG.warning("Done %d in %d @ %.3f per second - %s ETA (seconds): %.2f",
+                    self.done_places, int(done_time),
+                    places_per_sec, self.name, eta)
 
         self.next_info += int(places_per_sec) * self.log_interval
 
diff --git a/nominatim/nominatim.py b/nominatim/nominatim.py
deleted file mode 100755 (executable)
index b20673d..0000000
+++ /dev/null
@@ -1,270 +0,0 @@
-#! /usr/bin/env python3
-#-----------------------------------------------------------------------------
-# nominatim - [description]
-#-----------------------------------------------------------------------------
-#
-# Indexing tool for the Nominatim database.
-#
-# Based on C version by Brian Quinion
-#
-# This program is free software; you can redistribute it and/or
-# modify it under the terms of the GNU General Public License
-# as published by the Free Software Foundation; either version 2
-# of the License, or (at your option) any later version.
-#
-# This program is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-# GNU General Public License for more details.
-#
-# You should have received a copy of the GNU General Public License
-# along with this program; if not, write to the Free Software
-# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
-#-----------------------------------------------------------------------------
-
-from argparse import ArgumentParser, RawDescriptionHelpFormatter, ArgumentTypeError
-import logging
-import sys
-import re
-import getpass
-from datetime import datetime
-import select
-
-from indexer.progress import ProgressLogger
-from indexer.db import DBConnection, make_connection
-
-log = logging.getLogger()
-
-class RankRunner(object):
-    """ Returns SQL commands for indexing one rank within the placex table.
-    """
-
-    def __init__(self, rank):
-        self.rank = rank
-
-    def name(self):
-        return "rank {}".format(self.rank)
-
-    def sql_count_objects(self):
-        return """SELECT count(*) FROM placex
-                  WHERE rank_address = {} and indexed_status > 0
-               """.format(self.rank)
-
-    def sql_get_objects(self):
-        return """SELECT place_id FROM placex
-                  WHERE indexed_status > 0 and rank_address = {}
-                  ORDER BY geometry_sector""".format(self.rank)
-
-    def sql_index_place(self, ids):
-        return "UPDATE placex SET indexed_status = 0 WHERE place_id IN ({})"\
-               .format(','.join((str(i) for i in ids)))
-
-
-class InterpolationRunner(object):
-    """ Returns SQL commands for indexing the address interpolation table
-        location_property_osmline.
-    """
-
-    def name(self):
-        return "interpolation lines (location_property_osmline)"
-
-    def sql_count_objects(self):
-        return """SELECT count(*) FROM location_property_osmline
-                  WHERE indexed_status > 0"""
-
-    def sql_get_objects(self):
-        return """SELECT place_id FROM location_property_osmline
-                  WHERE indexed_status > 0
-                  ORDER BY geometry_sector"""
-
-    def sql_index_place(self, ids):
-        return """UPDATE location_property_osmline
-                  SET indexed_status = 0 WHERE place_id IN ({})"""\
-               .format(','.join((str(i) for i in ids)))
-
-class BoundaryRunner(object):
-    """ Returns SQL commands for indexing the administrative boundaries
-        of a certain rank.
-    """
-
-    def __init__(self, rank):
-        self.rank = rank
-
-    def name(self):
-        return "boundaries rank {}".format(self.rank)
-
-    def sql_count_objects(self):
-        return """SELECT count(*) FROM placex
-                  WHERE indexed_status > 0
-                    AND rank_search = {}
-                    AND class = 'boundary' and type = 'administrative'""".format(self.rank)
-
-    def sql_get_objects(self):
-        return """SELECT place_id FROM placex
-                  WHERE indexed_status > 0 and rank_search = {}
-                        and class = 'boundary' and type = 'administrative'
-                  ORDER BY partition, admin_level""".format(self.rank)
-
-    def sql_index_place(self, ids):
-        return "UPDATE placex SET indexed_status = 0 WHERE place_id IN ({})"\
-               .format(','.join((str(i) for i in ids)))
-
-class Indexer(object):
-    """ Main indexing routine.
-    """
-
-    def __init__(self, options):
-        self.minrank = max(1, options.minrank)
-        self.maxrank = min(30, options.maxrank)
-        self.conn = make_connection(options)
-        self.threads = [DBConnection(options) for i in range(options.threads)]
-
-    def index_boundaries(self):
-        log.warning("Starting indexing boundaries using {} threads".format(
-                      len(self.threads)))
-
-        for rank in range(max(self.minrank, 5), min(self.maxrank, 26)):
-            self.index(BoundaryRunner(rank))
-
-    def index_by_rank(self):
-        """ Run classic indexing by rank.
-        """
-        log.warning("Starting indexing rank ({} to {}) using {} threads".format(
-                 self.minrank, self.maxrank, len(self.threads)))
-
-        for rank in range(max(1, self.minrank), self.maxrank):
-            self.index(RankRunner(rank))
-
-        if self.maxrank == 30:
-            self.index(RankRunner(0))
-            self.index(InterpolationRunner(), 20)
-            self.index(RankRunner(self.maxrank), 20)
-        else:
-            self.index(RankRunner(self.maxrank))
-
-    def index(self, obj, batch=1):
-        """ Index a single rank or table. `obj` describes the SQL to use
-            for indexing. `batch` describes the number of objects that
-            should be processed with a single SQL statement
-        """
-        log.warning("Starting %s (using batch size %s)", obj.name(), batch)
-
-        cur = self.conn.cursor()
-        cur.execute(obj.sql_count_objects())
-
-        total_tuples = cur.fetchone()[0]
-        log.debug("Total number of rows: {}".format(total_tuples))
-
-        cur.close()
-
-        progress = ProgressLogger(obj.name(), total_tuples)
-
-        if total_tuples > 0:
-            cur = self.conn.cursor(name='places')
-            cur.execute(obj.sql_get_objects())
-
-            next_thread = self.find_free_thread()
-            while True:
-                places = [p[0] for p in cur.fetchmany(batch)]
-                if len(places) == 0:
-                    break
-
-                log.debug("Processing places: {}".format(places))
-                thread = next(next_thread)
-
-                thread.perform(obj.sql_index_place(places))
-                progress.add(len(places))
-
-            cur.close()
-
-            for t in self.threads:
-                t.wait()
-
-        progress.done()
-
-    def find_free_thread(self):
-        """ Generator that returns the next connection that is free for
-            sending a query.
-        """
-        ready = self.threads
-        command_stat = 0
-
-        while True:
-            for thread in ready:
-                if thread.is_done():
-                    command_stat += 1
-                    yield thread
-
-            # refresh the connections occasionaly to avoid potential
-            # memory leaks in Postgresql.
-            if command_stat > 100000:
-                for t in self.threads:
-                    while not t.is_done():
-                        t.wait()
-                    t.connect()
-                command_stat = 0
-                ready = self.threads
-            else:
-                ready, _, _ = select.select(self.threads, [], [])
-
-        assert False, "Unreachable code"
-
-
-def nominatim_arg_parser():
-    """ Setup the command-line parser for the tool.
-    """
-    def h(s):
-        return re.sub("\s\s+" , " ", s)
-
-    p = ArgumentParser(description="Indexing tool for Nominatim.",
-                       formatter_class=RawDescriptionHelpFormatter)
-
-    p.add_argument('-d', '--database',
-                   dest='dbname', action='store', default='nominatim',
-                   help='Name of the PostgreSQL database to connect to.')
-    p.add_argument('-U', '--username',
-                   dest='user', action='store',
-                   help='PostgreSQL user name.')
-    p.add_argument('-W', '--password',
-                   dest='password_prompt', action='store_true',
-                   help='Force password prompt.')
-    p.add_argument('-H', '--host',
-                   dest='host', action='store',
-                   help='PostgreSQL server hostname or socket location.')
-    p.add_argument('-P', '--port',
-                   dest='port', action='store',
-                   help='PostgreSQL server port')
-    p.add_argument('-b', '--boundary-only',
-                   dest='boundary_only', action='store_true',
-                   help='Only index administrative boundaries (ignores min/maxrank).')
-    p.add_argument('-r', '--minrank',
-                   dest='minrank', type=int, metavar='RANK', default=0,
-                   help='Minimum/starting rank.')
-    p.add_argument('-R', '--maxrank',
-                   dest='maxrank', type=int, metavar='RANK', default=30,
-                   help='Maximum/finishing rank.')
-    p.add_argument('-t', '--threads',
-                   dest='threads', type=int, metavar='NUM', default=1,
-                   help='Number of threads to create for indexing.')
-    p.add_argument('-v', '--verbose',
-                   dest='loglevel', action='count', default=0,
-                   help='Increase verbosity')
-
-    return p
-
-if __name__ == '__main__':
-    logging.basicConfig(stream=sys.stderr, format='%(levelname)s: %(message)s')
-
-    options = nominatim_arg_parser().parse_args(sys.argv[1:])
-
-    log.setLevel(max(3 - options.loglevel, 0) * 10)
-
-    options.password = None
-    if options.password_prompt:
-        password = getpass.getpass("Database password: ")
-        options.password = password
-
-    if options.boundary_only:
-        Indexer(options).index_boundaries()
-    else:
-        Indexer(options).index_by_rank()
diff --git a/nominatim/tools/exec_utils.py b/nominatim/tools/exec_utils.py
new file mode 100644 (file)
index 0000000..03bed98
--- /dev/null
@@ -0,0 +1,138 @@
+"""
+Helper functions for executing external programs.
+"""
+import logging
+import os
+import subprocess
+import urllib.request as urlrequest
+from urllib.parse import urlencode
+
+from psycopg2.extensions import parse_dsn
+
+from ..version import NOMINATIM_VERSION
+
+LOG = logging.getLogger()
+
+def run_legacy_script(script, *args, nominatim_env=None, throw_on_fail=False):
+    """ Run a Nominatim PHP script with the given arguments.
+
+        Returns the exit code of the script. If `throw_on_fail` is True
+        then throw a `CalledProcessError` on a non-zero exit.
+    """
+    cmd = ['/usr/bin/env', 'php', '-Cq',
+           nominatim_env.phplib_dir / 'admin' / script]
+    cmd.extend([str(a) for a in args])
+
+    env = nominatim_env.config.get_os_env()
+    env['NOMINATIM_DATADIR'] = str(nominatim_env.data_dir)
+    env['NOMINATIM_BINDIR'] = str(nominatim_env.data_dir / 'utils')
+    if not env['NOMINATIM_DATABASE_MODULE_PATH']:
+        env['NOMINATIM_DATABASE_MODULE_PATH'] = nominatim_env.module_dir
+    if not env['NOMINATIM_OSM2PGSQL_BINARY']:
+        env['NOMINATIM_OSM2PGSQL_BINARY'] = nominatim_env.osm2pgsql_path
+
+    proc = subprocess.run(cmd, cwd=str(nominatim_env.project_dir), env=env,
+                          check=throw_on_fail)
+
+    return proc.returncode
+
+def run_api_script(endpoint, project_dir, extra_env=None, phpcgi_bin=None,
+                   params=None):
+    """ Execute a Nominiatim API function.
+
+        The function needs a project directory that contains the website
+        directory with the scripts to be executed. The scripts will be run
+        using php_cgi. Query parameters can be added as named arguments.
+
+        Returns the exit code of the script.
+    """
+    log = logging.getLogger()
+    webdir = str(project_dir / 'website')
+    query_string = urlencode(params or {})
+
+    env = dict(QUERY_STRING=query_string,
+               SCRIPT_NAME='/{}.php'.format(endpoint),
+               REQUEST_URI='/{}.php?{}'.format(endpoint, query_string),
+               CONTEXT_DOCUMENT_ROOT=webdir,
+               SCRIPT_FILENAME='{}/{}.php'.format(webdir, endpoint),
+               HTTP_HOST='localhost',
+               HTTP_USER_AGENT='nominatim-tool',
+               REMOTE_ADDR='0.0.0.0',
+               DOCUMENT_ROOT=webdir,
+               REQUEST_METHOD='GET',
+               SERVER_PROTOCOL='HTTP/1.1',
+               GATEWAY_INTERFACE='CGI/1.1',
+               REDIRECT_STATUS='CGI')
+
+    if extra_env:
+        env.update(extra_env)
+
+    if phpcgi_bin is None:
+        cmd = ['/usr/bin/env', 'php-cgi']
+    else:
+        cmd = [str(phpcgi_bin)]
+
+    proc = subprocess.run(cmd, cwd=str(project_dir), env=env, capture_output=True,
+                          check=False)
+
+    if proc.returncode != 0 or proc.stderr:
+        if proc.stderr:
+            log.error(proc.stderr.decode('utf-8').replace('\\n', '\n'))
+        else:
+            log.error(proc.stdout.decode('utf-8').replace('\\n', '\n'))
+        return proc.returncode or 1
+
+    result = proc.stdout.decode('utf-8')
+    content_start = result.find('\r\n\r\n')
+
+    print(result[content_start + 4:].replace('\\n', '\n'))
+
+    return 0
+
+
+def run_osm2pgsql(options):
+    """ Run osm2pgsql with the given options.
+    """
+    env = os.environ
+    cmd = [options['osm2pgsql'],
+           '--hstore', '--latlon', '--slim',
+           '--with-forward-dependencies', 'false',
+           '--log-progress', 'true',
+           '--number-processes', str(options['threads']),
+           '--cache', str(options['osm2pgsql_cache']),
+           '--output', 'gazetteer',
+           '--style', str(options['osm2pgsql_style'])
+          ]
+    if options['append']:
+        cmd.append('--append')
+
+    if options['flatnode_file']:
+        cmd.extend(('--flat-nodes', options['flatnode_file']))
+
+    dsn = parse_dsn(options['dsn'])
+    if 'password' in dsn:
+        env['PGPASSWORD'] = dsn['password']
+    if 'dbname' in dsn:
+        cmd.extend(('-d', dsn['dbname']))
+    if 'user' in dsn:
+        cmd.extend(('--username', dsn['user']))
+    for param in ('host', 'port'):
+        if param in dsn:
+            cmd.extend(('--' + param, dsn[param]))
+
+    cmd.append(str(options['import_file']))
+
+    subprocess.run(cmd, cwd=options.get('cwd', '.'), env=env, check=True)
+
+
+def get_url(url):
+    """ Get the contents from the given URL and return it as a UTF-8 string.
+    """
+    headers = {"User-Agent" : "Nominatim/" + NOMINATIM_VERSION}
+
+    try:
+        with urlrequest.urlopen(urlrequest.Request(url, headers=headers)) as response:
+            return response.read().decode('utf-8')
+    except:
+        LOG.fatal('Failed to load URL: %s', url)
+        raise
diff --git a/nominatim/tools/refresh.py b/nominatim/tools/refresh.py
new file mode 100644 (file)
index 0000000..5fbb07f
--- /dev/null
@@ -0,0 +1,169 @@
+"""
+Functions for bringing auxiliary data in the database up-to-date.
+"""
+import json
+import re
+
+from psycopg2.extras import execute_values
+
+from ..db.utils import execute_file
+
+def update_postcodes(conn, datadir):
+    """ Recalculate postcode centroids and add, remove and update entries in the
+        location_postcode table. `conn` is an opne connection to the database.
+    """
+    execute_file(conn, datadir / 'sql' / 'update-postcodes.sql')
+
+
+def recompute_word_counts(conn, datadir):
+    """ Compute the frequency of full-word search terms.
+    """
+    execute_file(conn, datadir / 'sql' / 'words_from_search_name.sql')
+
+
+def _add_address_level_rows_from_entry(rows, entry):
+    """ Converts a single entry from the JSON format for address rank
+        descriptions into a flat format suitable for inserting into a
+        PostgreSQL table and adds these lines to `rows`.
+    """
+    countries = entry.get('countries') or (None, )
+    for key, values in entry['tags'].items():
+        for value, ranks in values.items():
+            if isinstance(ranks, list):
+                rank_search, rank_address = ranks
+            else:
+                rank_search = rank_address = ranks
+            if not value:
+                value = None
+            for country in countries:
+                rows.append((country, key, value, rank_search, rank_address))
+
+def load_address_levels(conn, table, levels):
+    """ Replace the `address_levels` table with the contents of `levels'.
+
+        A new table is created any previously existing table is dropped.
+        The table has the following columns:
+            country, class, type, rank_search, rank_address
+    """
+    rows = []
+    for entry in levels:
+        _add_address_level_rows_from_entry(rows, entry)
+
+    with conn.cursor() as cur:
+        cur.execute('DROP TABLE IF EXISTS {}'.format(table))
+
+        cur.execute("""CREATE TABLE {} (country_code varchar(2),
+                                        class TEXT,
+                                        type TEXT,
+                                        rank_search SMALLINT,
+                                        rank_address SMALLINT)""".format(table))
+
+        execute_values(cur, "INSERT INTO {} VALUES %s".format(table), rows)
+
+        cur.execute('CREATE UNIQUE INDEX ON {} (country_code, class, type)'.format(table))
+
+    conn.commit()
+
+def load_address_levels_from_file(conn, config_file):
+    """ Replace the `address_levels` table with the contents of the config
+        file.
+    """
+    with config_file.open('r') as fdesc:
+        load_address_levels(conn, 'address_levels', json.load(fdesc))
+
+PLPGSQL_BASE_MODULES = (
+    'utils.sql',
+    'normalization.sql',
+    'ranking.sql',
+    'importance.sql',
+    'address_lookup.sql',
+    'interpolation.sql'
+)
+
+PLPGSQL_TABLE_MODULES = (
+    ('place', 'place_triggers.sql'),
+    ('placex', 'placex_triggers.sql'),
+    ('location_postcode', 'postcode_triggers.sql')
+)
+
+def _get_standard_function_sql(conn, config, sql_dir, enable_diff_updates, enable_debug):
+    """ Read all applicable SQLs containing PL/pgSQL functions, replace
+        placefolders and execute them.
+    """
+    sql_func_dir = sql_dir / 'functions'
+    sql = ''
+
+    # Get the basic set of functions that is always imported.
+    for sql_file in PLPGSQL_BASE_MODULES:
+        with (sql_func_dir / sql_file).open('r') as fdesc:
+            sql += fdesc.read()
+
+    # Some files require the presence of a certain table
+    for table, fname in PLPGSQL_TABLE_MODULES:
+        if conn.table_exists(table):
+            with (sql_func_dir / fname).open('r') as fdesc:
+                sql += fdesc.read()
+
+    # Replace placeholders.
+    sql = sql.replace('{modulepath}',
+                      config.DATABASE_MODULE_PATH or str((config.project_dir / 'module').resolve()))
+
+    if enable_diff_updates:
+        sql = sql.replace('RETURN NEW; -- %DIFFUPDATES%', '--')
+
+    if enable_debug:
+        sql = sql.replace('--DEBUG:', '')
+
+    if config.get_bool('LIMIT_REINDEXING'):
+        sql = sql.replace('--LIMIT INDEXING:', '')
+
+    if not config.get_bool('USE_US_TIGER_DATA'):
+        sql = sql.replace('-- %NOTIGERDATA% ', '')
+
+    if not config.get_bool('USE_AUX_LOCATION_DATA'):
+        sql = sql.replace('-- %NOAUXDATA% ', '')
+
+    reverse_only = 'false' if conn.table_exists('search_name') else 'true'
+
+    return sql.replace('%REVERSE-ONLY%', reverse_only)
+
+
+def replace_partition_string(sql, partitions):
+    """ Replace a partition template with the actual partition code.
+    """
+    for match in re.findall('^-- start(.*?)^-- end', sql, re.M | re.S):
+        repl = ''
+        for part in partitions:
+            repl += match.replace('-partition-', str(part))
+        sql = sql.replace(match, repl)
+
+    return sql
+
+def _get_partition_function_sql(conn, sql_dir):
+    """ Create functions that work on partition tables.
+    """
+    with conn.cursor() as cur:
+        cur.execute('SELECT distinct partition FROM country_name')
+        partitions = set([0])
+        for row in cur:
+            partitions.add(row[0])
+
+    with (sql_dir / 'partition-functions.src.sql').open('r') as fdesc:
+        sql = fdesc.read()
+
+    return replace_partition_string(sql, sorted(partitions))
+
+def create_functions(conn, config, data_dir,
+                     enable_diff_updates=True, enable_debug=False):
+    """ (Re)create the PL/pgSQL functions.
+    """
+    sql_dir = data_dir / 'sql'
+
+    sql = _get_standard_function_sql(conn, config, sql_dir,
+                                     enable_diff_updates, enable_debug)
+    sql += _get_partition_function_sql(conn, sql_dir)
+
+    with conn.cursor() as cur:
+        cur.execute(sql)
+
+    conn.commit()
diff --git a/nominatim/tools/replication.py b/nominatim/tools/replication.py
new file mode 100644 (file)
index 0000000..c7d0d3e
--- /dev/null
@@ -0,0 +1,119 @@
+"""
+Functions for updating a database from a replication source.
+"""
+import datetime as dt
+from enum import Enum
+import logging
+import time
+
+from osmium.replication.server import ReplicationServer
+from osmium import WriteHandler
+
+from ..db import status
+from .exec_utils import run_osm2pgsql
+from ..errors import UsageError
+
+LOG = logging.getLogger()
+
+def init_replication(conn, base_url):
+    """ Set up replication for the server at the given base URL.
+    """
+    LOG.info("Using replication source: %s", base_url)
+    date = status.compute_database_date(conn)
+
+    # margin of error to make sure we get all data
+    date -= dt.timedelta(hours=3)
+
+    repl = ReplicationServer(base_url)
+
+    seq = repl.timestamp_to_sequence(date)
+
+    if seq is None:
+        LOG.fatal("Cannot reach the configured replication service '%s'.\n"
+                  "Does the URL point to a directory containing OSM update data?",
+                  base_url)
+        raise UsageError("Failed to reach replication service")
+
+    status.set_status(conn, date=date, seq=seq)
+
+    LOG.warning("Updates intialised at sequence %s (%s)", seq, date)
+
+
+def check_for_updates(conn, base_url):
+    """ Check if new data is available from the replication service at the
+        given base URL.
+    """
+    _, seq, _ = status.get_status(conn)
+
+    if seq is None:
+        LOG.error("Replication not set up. "
+                  "Please run 'nominatim replication --init' first.")
+        return 254
+
+    state = ReplicationServer(base_url).get_state_info()
+
+    if state is None:
+        LOG.error("Cannot get state for URL %s.", base_url)
+        return 253
+
+    if state.sequence <= seq:
+        LOG.warning("Database is up to date.")
+        return 2
+
+    LOG.warning("New data available (%i => %i).", seq, state.sequence)
+    return 0
+
+class UpdateState(Enum):
+    """ Possible states after an update has run.
+    """
+
+    UP_TO_DATE = 0
+    MORE_PENDING = 2
+    NO_CHANGES = 3
+
+
+def update(conn, options):
+    """ Update database from the next batch of data. Returns the state of
+        updates according to `UpdateState`.
+    """
+    startdate, startseq, indexed = status.get_status(conn)
+
+    if startseq is None:
+        LOG.error("Replication not set up. "
+                  "Please run 'nominatim replication --init' first.")
+        raise UsageError("Replication not set up.")
+
+    if not indexed and options['indexed_only']:
+        LOG.info("Skipping update. There is data that needs indexing.")
+        return UpdateState.MORE_PENDING
+
+    last_since_update = dt.datetime.now(dt.timezone.utc) - startdate
+    update_interval = dt.timedelta(seconds=options['update_interval'])
+    if last_since_update < update_interval:
+        duration = (update_interval - last_since_update).seconds
+        LOG.warning("Sleeping for %s sec before next update.", duration)
+        time.sleep(duration)
+
+    if options['import_file'].exists():
+        options['import_file'].unlink()
+
+    # Read updates into file.
+    repl = ReplicationServer(options['base_url'])
+
+    outhandler = WriteHandler(str(options['import_file']))
+    endseq = repl.apply_diffs(outhandler, startseq,
+                              max_size=options['max_diff_size'] * 1024)
+    outhandler.close()
+
+    if endseq is None:
+        return UpdateState.NO_CHANGES
+
+    # Consume updates with osm2pgsql.
+    options['append'] = True
+    run_osm2pgsql(options)
+
+    # Write the current status to the file
+    endstate = repl.get_state_info(endseq)
+    status.set_status(conn, endstate.timestamp, seq=endseq, indexed=False)
+
+    return UpdateState.UP_TO_DATE
diff --git a/nominatim/version.py b/nominatim/version.py
new file mode 100644 (file)
index 0000000..a2ddc9f
--- /dev/null
@@ -0,0 +1,5 @@
+"""
+Version information for Nominatim.
+"""
+
+NOMINATIM_VERSION = "3.6.0"
index fbad3e33f355a51145862dc2b6681e30b05ee733..e2eda3409d9e8931d49b1c193d26fe69e5c9be29 100644 (file)
@@ -57,6 +57,9 @@ NOMINATIM_HTTP_PROXY_HOST=proxy.mydomain.com
 NOMINATIM_HTTP_PROXY_PORT=3128
 NOMINATIM_HTTP_PROXY_LOGIN=
 NOMINATIM_HTTP_PROXY_PASSWORD=
+# Also set these standard environment variables.
+# HTTP_PROXY="http://user:pass@10.10.1.10:1080"
+# HTTPS_PROXY="http://user:pass@10.10.1.10:1080"
 
 # Location of the osm2pgsql binary.
 # When empty, osm2pgsql is expected to reside in the osm2pgsql directory in
@@ -64,10 +67,6 @@ NOMINATIM_HTTP_PROXY_PASSWORD=
 # EXPERT ONLY. You should usually use the supplied osm2pgsql.
 NOMINATIM_OSM2PGSQL_BINARY=
 
-# Location of pyosmium-get-changes.
-# Only needed when running updates.
-NOMINATIM_PYOSMIUM_BINARY=
-
 # Directory where to find US Tiger data files to import.
 # Used with setup.php --import-tiger-data. When unset, the data is expected
 # to be located under 'data/tiger' in the source tree.
index 0a49eef52fc16ee1068642e73ac337880e2d1ed1..18d4211bb711399edde86b86a8900f2711788e2a 100644 (file)
@@ -426,7 +426,7 @@ DECLARE
   geo RECORD;
 BEGIN
   -- 10000000000 is ~~ 1x1 degree
-  FOR geo IN select quad_split_geometry(geometry, 0.01, 20) as geom LOOP
+  FOR geo IN select quad_split_geometry(geometry, 0.25, 20) as geom LOOP
     RETURN NEXT geo.geom;
   END LOOP;
   RETURN;
index 5686bcd22c002fea652599cbe1417aaaa2d93073..8647e304331279bae635ea3f3f4189af06efcdd3 100644 (file)
@@ -1,6 +1,6 @@
 drop table if exists import_status;
 CREATE TABLE import_status (
-  lastimportdate timestamp NOT NULL,
+  lastimportdate timestamp with time zone NOT NULL,
   sequence_id integer,
   indexed boolean
   );
diff --git a/test/bdd/api/details/language.feature b/test/bdd/api/details/language.feature
new file mode 100644 (file)
index 0000000..6611c81
--- /dev/null
@@ -0,0 +1,62 @@
+@APIDB
+Feature: Localization of search results
+
+    Scenario: default language
+        When sending details query for R1155955
+        Then results contain
+          | ID | localname |
+          | 0  | Liechtenstein |
+
+    Scenario: accept-language first
+        When sending details query for R1155955
+          | accept-language |
+          | zh,de |
+        Then results contain
+          | ID | localname |
+          | 0  | åˆ—支敦士登 |
+
+    Scenario: accept-language missing
+        When sending details query for R1155955
+          | accept-language |
+          | xx,fr,en,de |
+        Then results contain
+          | ID | localname |
+          | 0  | Liechtenstein |
+
+    Scenario: http accept language header first
+        Given the HTTP header
+          | accept-language |
+          | fo;q=0.8,en-ca;q=0.5,en;q=0.3 |
+        When sending details query for R1155955
+        Then results contain
+          | ID | localname |
+          | 0  | Liktinstein |
+
+    Scenario: http accept language header and accept-language
+        Given the HTTP header
+          | accept-language |
+          | fr-ca,fr;q=0.8,en-ca;q=0.5,en;q=0.3 |
+        When sending details query for R1155955
+          | accept-language |
+          | fo,en |
+        Then results contain
+          | ID | localname |
+          | 0  | Liktinstein |
+
+    Scenario: http accept language header fallback
+        Given the HTTP header
+          | accept-language |
+          | fo-ca,en-ca;q=0.5 |
+        When sending details query for R1155955
+        Then results contain
+          | ID | localname |
+          | 0  | Liktinstein |
+
+    Scenario: http accept language header fallback (upper case)
+        Given the HTTP header
+          | accept-language |
+          | fo-FR;q=0.8,en-ca;q=0.5 |
+        When sending details query for R1155955
+        Then results contain
+          | ID | localname |
+          | 0  | Liktinstein |
index 03b91d291d2b1ebc45a728e550342fc1161512f6..87c3356c8a34d01b43c2d7f25c24647721b84b72 100644 (file)
@@ -8,14 +8,15 @@ Feature: Object details
         And result has attributes geometry
         And result has not attributes keywords,address,linked_places,parentof
 
-    Scenario: JSON Details with keywords
+    Scenario: JSON Details with pretty printing
         When sending json details query for W297699560
-            | keywords |
-            | 1 |
+            | pretty |
+            | 1      |
         Then the result is valid json
-        And result has attributes keywords
+        And result has attributes geometry
+        And result has not attributes keywords,address,linked_places,parentof
 
-    Scenario: JSON Details with addressdetails
+     Scenario: JSON Details with addressdetails
         When sending json details query for W297699560
             | addressdetails |
             | 1 |
@@ -36,22 +37,46 @@ Feature: Object details
         Then the result is valid json
         And result has attributes hierarchy
 
-    Scenario: JSON Details with linkedplaces
-        When sending json details query for R123924
-            | linkedplaces |
-            | 1 |
+    Scenario: JSON Details with grouped hierarchy
+        When sending json details query for W297699560
+            | hierarchy | group_hierarchy |
+            | 1         | 1 |
         Then the result is valid json
+        And result has attributes hierarchy
 
-    Scenario Outline: HTML Details with keywords
+     Scenario Outline: JSON Details with keywords
         When sending json details query for <osmid>
             | keywords |
             | 1 |
         Then the result is valid json
+        And result has attributes keywords
+
+    Examples:
+            | osmid |
+            | W297699560 |
+            | W243055645 |
+            | W243055716 |
+            | W43327921  |
+
+    # ticket #1343
+    Scenario: Details of a country with keywords
+        When sending details query for R1155955
+            | keywords |
+            | 1 |
+        Then the result is valid json
+
+    Scenario Outline: JSON details with full geometry
+        When sending json details query for <osmid>
+            | polygon_geojson |
+            | 1 |
+        Then the result is valid json
+        And result has attributes geometry
 
     Examples:
             | osmid |
             | W297699560 |
             | W243055645 |
             | W243055716 |
+            | W43327921  |
 
 
index 906c4ce5e9f0e6e1b69be6c365205e57ea7f6a21..a9b6d6a70553d933e5c3acd0ee01e1a2145e3b10 100644 (file)
@@ -2,36 +2,54 @@
 Feature: Object details
     Check details page for correctness
 
+    Scenario: Details by place ID
+        When sending details query for 107077
+        Then the result is valid json
+        And results contain
+            | place_id |
+            | 107077   |
+
     Scenario Outline: Details via OSM id
-        When sending <format> details query for <object>
-        Then the result is valid <format>
+        When sending details query for <type><id>
+        Then the result is valid json
+        And results contain
+            | osm_type | osm_id |
+            | <type>   | <id> |
 
     Examples:
-     | format | object |
-     | json | 107077 |
-     | json | N5484325405 |
-     | json | W43327921 |
-     | json | R123924 |
+     | type | id |
+     | N    | 5484325405 |
+     | W    | 43327921 |
+     | R    | 123924 |
+
+     Scenario: Details for interpolation way just return the dependent street
+        When sending details query for W1
+        Then the result is valid json
+        And results contain
+            | category |
+            | highway |
+
+    Scenario Outline: Details for different class types for the same OSM id
+        When sending details query for N300209696:<class>
+        Then the result is valid json
+        And results contain
+          | osm_type | osm_id    | category |
+          | N        | 300209696 | <class> |
+
+    Examples:
+     | class |
+     | tourism |
+     | natural |
+     | mountain_pass |
 
     Scenario Outline: Details via unknown OSM id
-        When sending <format> details query for <object>
+        When sending details query for <object>
         Then a HTTP 400 is returned
 
     Examples:
-      | format | object |
-      | json | 1 |
-      | json | R1 |
-
-    Scenario: Details with keywords
-        When sending details query for W43327921
-            | keywords |
-            | 1 |
-        Then the result is valid json
+      | object |
+      | 1 |
+      | R1 |
+      | N300209696:highway |
 
-    # ticket #1343
-    Scenario: Details of a country with keywords
-        When sending details query for R1155955
-            | keywords |
-            | 1 |
-        Then the result is valid json
 
index 374272d0594f37526f85ca17a1e9dabff0ba5186..d6ef3794bfd514d6ec3861a5ed0dd0d0a39a7b11 100644 (file)
@@ -25,6 +25,39 @@ Feature: Parameters for Reverse API
       | -45.3,;   |
       | gkjd,50   |
 
+    Scenario Outline: Zoom levels between 4 and 18 are allowed
+        When sending reverse coordinates 47.14122383,9.52169581334
+          | zoom |
+          | <zoom> |
+        Then exactly 1 result is returned
+        And result addresses contain
+          | country_code |
+          | li |
+
+    Examples:
+      | zoom |
+      | 4 |
+      | 5 |
+      | 6 |
+      | 7 |
+      | 8 |
+      | 9 |
+      | 10 |
+      | 11 |
+      | 12 |
+      | 13 |
+      | 14 |
+      | 15 |
+      | 16 |
+      | 17 |
+      | 18 |
+
+    Scenario: Non-numerical zoom levels return an error
+        When sending reverse coordinates 47.14122383,9.52169581334
+          | zoom |
+          | adfe |
+        Then a HTTP 400 is returned
+
     Scenario Outline: Reverse Geocoding with extratags
         When sending <format> reverse coordinates 47.1395013150811,9.522098077031046
           | extratags |
index 78bd47a7e6db6858aa9686ed41ab7806626589c7..4da311e78a9270b9250ffabe5400b1d68fa2e6df 100644 (file)
@@ -131,3 +131,7 @@ Feature: Simple Reverse Tests
      | 48.966   | 8.448.2 |
      | Nan      | 8.448 |
      | 48.966   | Nan |
+
+     Scenario: Reverse Debug output returns no errors
+        When sending debug reverse coordinates 47.11,9.57
+        Then a HTTP 200 is returned
index 3a62c5811831b88930a023e6e1b8b0c2ab7fae4e..ea353f4568ad46f83f8d62d7511ef400e7234e67 100644 (file)
@@ -80,6 +80,15 @@ Feature: Search queries
           | class   | type |
           | amenity | restaurant |
 
+    Scenario: Search with specific amenity also work in country
+        When sending json search query "restaurants in liechtenstein" with address
+        Then result addresses contain
+          | country |
+          | Liechtenstein |
+        And  results contain
+          | class   | type |
+          | amenity | restaurant |
+
     Scenario: Search with key-value amenity
         When sending json search query "[club=scout] Vaduz"
         Then results contain
@@ -114,6 +123,19 @@ Feature: Search queries
           | class    | type |
           | leisure | firepit |
 
+    Scenario Outline: Key/value search near given coordinate can be restricted to country
+        When sending json search query "[natural=peak] 47.06512,9.53965" with address
+          | countrycodes |
+          | <cc> |
+        Then result addresses contain
+          | country_code |
+          | <cc> |
+
+    Examples:
+        | cc |
+        | li |
+        | ch |
+
     Scenario: Name search near given coordinate
         When sending json search query "sporry" with address
         Then result addresses contain
@@ -146,6 +168,14 @@ Feature: Search queries
             | li  |
         Then exactly 0 results are returned
 
+    Scenario: Country searches only return results for the given country
+        When sending search query "Ans Trail" with address
+            | countrycodes |
+            | li |
+        Then result addresses contain
+            | country_code |
+            | li |
+
     # https://trac.openstreetmap.org/ticket/5094
     Scenario: housenumbers are ordered by complete match first
         When sending json search query "Austrasse 11, Vaduz" with address
@@ -182,3 +212,7 @@ Feature: Search queries
        Then result addresses contain
          | ID | town |
          | 0  | Vaduz |
+
+    Scenario: Search can handle complex query word sets
+       When sending search query "aussenstelle universitat lichtenstein wachterhaus aussenstelle universitat lichtenstein wachterhaus aussenstelle universitat lichtenstein wachterhaus aussenstelle universitat lichtenstein wachterhaus"
+       Then a HTTP 200 is returned
index 603627b1e828db076462da83bd3d2cdd66734f82..b9323c5a62d695734cf354176a661bd7b4b2f801 100644 (file)
@@ -30,15 +30,10 @@ Feature: Simple Tests
 
     Examples:
      | parameter        | value |
-     | addressdetails   | 1 |
      | addressdetails   | 0 |
-     | polygon_text     | 1 |
      | polygon_text     | 0 |
-     | polygon_kml      | 1 |
      | polygon_kml      | 0 |
-     | polygon_geojson  | 1 |
      | polygon_geojson  | 0 |
-     | polygon_svg      | 1 |
      | polygon_svg      | 0 |
      | accept-language  | de,en |
      | countrycodes     | li |
@@ -48,9 +43,7 @@ Feature: Simple Tests
      | limit            | 1000 |
      | dedupe           | 1 |
      | dedupe           | 0 |
-     | extratags        | 1 |
      | extratags        | 0 |
-     | namedetails      | 1 |
      | namedetails      | 0 |
 
     Scenario: Search with invalid output format
@@ -180,13 +173,16 @@ Feature: Simple Tests
           | 234 |
         Then the result is valid xml
 
-    Scenario: Empty JSON search
-        When sending json search query "YHlERzzx"
+    Scenario Outline: Empty search
+        When sending <format> search query "YHlERzzx"
         Then exactly 0 results are returned
 
-    Scenario: Empty JSONv2 search
-        When sending jsonv2 search query "Flubb XdfESSaZx"
-        Then exactly 0 results are returned
+    Examples:
+        | format |
+        | json |
+        | jsonv2 |
+        | geojson |
+        | geocodejson |
 
     Scenario: Search for non-existing coordinates
         When sending json search query "-21.0,-33.0"
@@ -199,3 +195,16 @@ Feature: Simple Tests
         Then result header contains
           | attr     | value |
           | more_url | .*&countrycodes=pl%2Cbo&.* |
+
+    Scenario Outline: Search debug output does not return errors
+        When sending debug search query "<query>"
+        Then a HTTP 200 is returned
+
+    Examples:
+        | query |
+        | Liechtenstein |
+        | Triesen |
+        | Pfarrkirche |
+        | Landstr 27 Steinort, Triesenberg, 9495 |
+        | 9497 |
+        | restaurant in triesen |
index 47d3cc9ac6760c248cda6a0f56a33f726b302a90..80f898a3f400430dc4d5bdf4792a1ee356d52bae 100644 (file)
@@ -10,7 +10,8 @@ function coverage_shutdown($oCoverage)
 }
 
 $covfilter = new SebastianBergmann\CodeCoverage\Filter();
-$covfilter->addDirectoryToWhitelist($_SERVER['COV_PHP_DIR']);
+$covfilter->addDirectoryToWhitelist($_SERVER['COV_PHP_DIR'].'/lib');
+$covfilter->addDirectoryToWhitelist($_SERVER['COV_PHP_DIR'].'/website');
 $coverage = new SebastianBergmann\CodeCoverage\CodeCoverage(null, $covfilter);
 $coverage->start($_SERVER['COV_TEST_NAME']);
 
index 1e7da93d69bfd7dfc58060ae139065c802543063..beafcd9e1ee16773294f8279d8dd26ce080a3d6e 100644 (file)
@@ -50,7 +50,7 @@ class GenericResponse:
         self.result = []
         self.header = dict()
 
-        if errorcode == 200:
+        if errorcode == 200 and fmt != 'debug':
             getattr(self, '_parse_' + fmt)()
 
     def _parse_json(self):
index 68d7b2f43544ef1626d29e7ca66962b1bea682ff..0ee921375e8544594e65ddb2b213ceffe8cb1a59 100644 (file)
@@ -91,6 +91,7 @@ class NominatimEnvironment:
         self.test_env['NOMINATIM_BINDIR'] = self.src_dir / 'utils'
         self.test_env['NOMINATIM_DATABASE_MODULE_PATH'] = self.build_dir / 'module'
         self.test_env['NOMINATIM_OSM2PGSQL_BINARY'] = self.build_dir / 'osm2pgsql' / 'osm2pgsql'
+        self.test_env['NOMINATIM_NOMINATIM_TOOL'] = self.build_dir / 'nominatim'
 
         if self.server_module_path:
             self.test_env['NOMINATIM_DATABASE_MODULE_PATH'] = self.server_module_path
index a56081c03109765272bc234fc1364a622f0d0c65..ad4a8515af55d39ea62a9570d76d91084f69fa22 100644 (file)
@@ -12,6 +12,7 @@ from urllib.parse import urlencode
 from utils import run_script
 from http_responses import GenericResponse, SearchResponse, ReverseResponse, StatusResponse
 from check_functions import Bbox
+from table_compare import NominatimID
 
 LOG = logging.getLogger(__name__)
 
@@ -78,7 +79,7 @@ def query_cmd(context, query, dups):
     context.response = SearchResponse(outp, 'json')
 
 def send_api_query(endpoint, params, fmt, context):
-    if fmt is not None:
+    if fmt is not None and fmt.strip() != 'debug':
         params['format'] = fmt.strip()
     if context.table:
         if context.table.headings[0] == 'param':
@@ -105,7 +106,7 @@ def send_api_query(endpoint, params, fmt, context):
     cmd = ['/usr/bin/env', 'php-cgi', '-f']
     if context.nominatim.code_coverage_path:
         env['COV_SCRIPT_FILENAME'] = env['SCRIPT_FILENAME']
-        env['COV_PHP_DIR'] = os.path.join(context.nominatim.src_dir, "lib")
+        env['COV_PHP_DIR'] = context.nominatim.src_dir
         env['COV_TEST_NAME'] = '%s:%s' % (context.scenario.filename, context.scenario.line)
         env['SCRIPT_FILENAME'] = \
                 os.path.join(os.path.split(__file__)[0], 'cgi-with-coverage.php')
@@ -147,6 +148,8 @@ def website_search_request(context, fmt, query, addr):
         params['q'] = query
     if addr is not None:
         params['addressdetails'] = '1'
+    if fmt and fmt.strip() == 'debug':
+        params['debug'] = '1'
 
     outp, status = send_api_query('search', params, fmt, context)
 
@@ -159,6 +162,8 @@ def website_reverse_request(context, fmt, lat, lon):
         params['lat'] = lat
     if lon is not None:
         params['lon'] = lon
+    if fmt and fmt.strip() == 'debug':
+        params['debug'] = '1'
 
     outp, status = send_api_query('reverse', params, fmt, context)
 
@@ -168,8 +173,11 @@ def website_reverse_request(context, fmt, lat, lon):
 def website_details_request(context, fmt, query):
     params = {}
     if query[0] in 'NWR':
-        params['osmtype'] = query[0]
-        params['osmid'] = query[1:]
+        nid = NominatimID(query)
+        params['osmtype'] = nid.typ
+        params['osmid'] = nid.oid
+        if nid.cls:
+            params['class'] = nid.cls
     else:
         params['place_id'] = query
     outp, status = send_api_query('details', params, fmt, context)
@@ -199,7 +207,8 @@ def validate_result_number(context, operator, number):
 
 @then(u'a HTTP (?P<status>\d+) is returned')
 def check_http_return_status(context, status):
-    assert context.response.errorcode == int(status)
+    assert context.response.errorcode == int(status), \
+           "Return HTTP status is {}.".format(context.response.errorcode)
 
 @then(u'the page contents equals "(?P<text>.+)"')
 def check_page_content_equals(context, text):
@@ -232,9 +241,13 @@ def check_header_attr(context):
 def check_header_no_attr(context, neg, attrs):
     for attr in attrs.split(','):
         if neg:
-            assert attr not in context.response.header
+            assert attr not in context.response.header, \
+                   "Unexpected attribute {}. Full response:\n{}".format(
+                       attr, json.dumps(context.response.header, sort_keys=True, indent=2))
         else:
-            assert attr in context.response.header
+            assert attr in context.response.header, \
+                   "No attribute {}. Full response:\n{}".format(
+                       attr, json.dumps(context.response.header, sort_keys=True, indent=2))
 
 @then(u'results contain')
 def step_impl(context):
@@ -255,9 +268,13 @@ def validate_attributes(context, lid, neg, attrs):
     for i in idx:
         for attr in attrs.split(','):
             if neg:
-                assert attr not in context.response.result[i]
+                assert attr not in context.response.result[i],\
+                       "Unexpected attribute {}. Full response:\n{}".format(
+                           attr, json.dumps(context.response.result[i], sort_keys=True, indent=2))
             else:
-                assert attr in context.response.result[i]
+                assert attr in context.response.result[i], \
+                       "No attribute {}. Full response:\n{}".format(
+                           attr, json.dumps(context.response.result[i], sort_keys=True, indent=2))
 
 @then(u'result addresses contain')
 def step_impl(context):
diff --git a/test/php/Nominatim/OutputTest.php b/test/php/Nominatim/OutputTest.php
deleted file mode 100644 (file)
index cbfebb7..0000000
+++ /dev/null
@@ -1,71 +0,0 @@
-<?php
-
-namespace Nominatim;
-
-require_once(CONST_LibDir.'/output.php');
-
-class OutputTest extends \PHPUnit\Framework\TestCase
-{
-    public function testDetailsPermaLinkNode()
-    {
-        $aFeature = array('osm_type' => 'N', 'osm_id'=> 38274, 'class' => 'place');
-        $this->assertSame(
-            detailsPermaLink($aFeature),
-            '<a href="details.php?osmtype=N&osmid=38274&class=place">node 38274</a>'
-        );
-    }
-
-    public function testDetailsPermaLinkWay()
-    {
-        $aFeature = array('osm_type' => 'W', 'osm_id'=> 65, 'class' => 'highway');
-        $this->assertSame(
-            detailsPermaLink($aFeature),
-            '<a href="details.php?osmtype=W&osmid=65&class=highway">way 65</a>'
-        );
-    }
-
-    public function testDetailsPermaLinkRelation()
-    {
-        $aFeature = array('osm_type' => 'R', 'osm_id'=> 9908, 'class' => 'waterway');
-        $this->assertSame(
-            detailsPermaLink($aFeature),
-            '<a href="details.php?osmtype=R&osmid=9908&class=waterway">relation 9908</a>'
-        );
-    }
-
-    public function testDetailsPermaLinkTiger()
-    {
-        $aFeature = array('osm_type' => 'T', 'osm_id'=> 2, 'place_id' => 334);
-        $this->assertSame(
-            detailsPermaLink($aFeature, 'foo'),
-            '<a href="details.php?place_id=334">foo</a>'
-        );
-    }
-
-    public function testDetailsPermaLinkInterpolation()
-    {
-        $aFeature = array('osm_type' => 'I', 'osm_id'=> 400, 'place_id' => 3);
-        $this->assertSame(
-            detailsPermaLink($aFeature, 'foo'),
-            '<a href="details.php?place_id=3">foo</a>'
-        );
-    }
-
-    public function testDetailsPermaLinkWithExtraPropertiesNode()
-    {
-        $aFeature = array('osm_type' => 'N', 'osm_id'=> 2, 'class' => 'amenity');
-        $this->assertSame(
-            detailsPermaLink($aFeature, 'something', 'class="xtype"'),
-            '<a class="xtype" href="details.php?osmtype=N&osmid=2&class=amenity">something</a>'
-        );
-    }
-
-    public function testDetailsPermaLinkWithExtraPropertiesTiger()
-    {
-        $aFeature = array('osm_type' => 'T', 'osm_id'=> 5, 'place_id' => 46);
-        $this->assertSame(
-            detailsPermaLink($aFeature, 'something', 'class="xtype"'),
-            '<a class="xtype" href="details.php?place_id=46">something</a>'
-        );
-    }
-}
diff --git a/test/python/conftest.py b/test/python/conftest.py
new file mode 100644 (file)
index 0000000..8b0ba14
--- /dev/null
@@ -0,0 +1,155 @@
+import itertools
+import sys
+from pathlib import Path
+
+import psycopg2
+import psycopg2.extras
+import pytest
+
+SRC_DIR = Path(__file__) / '..' / '..' / '..'
+
+# always test against the source
+sys.path.insert(0, str(SRC_DIR.resolve()))
+
+from nominatim.config import Configuration
+from nominatim.db import connection
+
+class _TestingCursor(psycopg2.extras.DictCursor):
+    """ Extension to the DictCursor class that provides execution
+        short-cuts that simplify writing assertions.
+    """
+
+    def scalar(self, sql, params=None):
+        """ Execute a query with a single return value and return this value.
+            Raises an assertion when not exactly one row is returned.
+        """
+        self.execute(sql, params)
+        assert self.rowcount == 1
+        return self.fetchone()[0]
+
+    def row_set(self, sql, params=None):
+        """ Execute a query and return the result as a set of tuples.
+        """
+        self.execute(sql, params)
+        if self.rowcount == 1:
+            return set(tuple(self.fetchone()))
+
+        return set((tuple(row) for row in self))
+
+@pytest.fixture
+def temp_db(monkeypatch):
+    """ Create an empty database for the test. The database name is also
+        exported into NOMINATIM_DATABASE_DSN.
+    """
+    name = 'test_nominatim_python_unittest'
+    conn = psycopg2.connect(database='postgres')
+
+    conn.set_isolation_level(0)
+    with conn.cursor() as cur:
+        cur.execute('DROP DATABASE IF EXISTS {}'.format(name))
+        cur.execute('CREATE DATABASE {}'.format(name))
+
+    conn.close()
+
+    monkeypatch.setenv('NOMINATIM_DATABASE_DSN' , 'dbname=' + name)
+
+    yield name
+
+    conn = psycopg2.connect(database='postgres')
+
+    conn.set_isolation_level(0)
+    with conn.cursor() as cur:
+        cur.execute('DROP DATABASE IF EXISTS {}'.format(name))
+
+    conn.close()
+
+@pytest.fixture
+def temp_db_with_extensions(temp_db):
+    conn = psycopg2.connect(database=temp_db)
+    with conn.cursor() as cur:
+        cur.execute('CREATE EXTENSION hstore; CREATE EXTENSION postgis;')
+    conn.commit()
+    conn.close()
+
+    return temp_db
+
+@pytest.fixture
+def temp_db_conn(temp_db):
+    """ Connection to the test database.
+    """
+    conn = connection.connect('dbname=' + temp_db)
+    yield conn
+    conn.close()
+
+
+@pytest.fixture
+def temp_db_cursor(temp_db):
+    """ Connection and cursor towards the test database. The connection will
+        be in auto-commit mode.
+    """
+    conn = psycopg2.connect('dbname=' + temp_db)
+    conn.set_isolation_level(0)
+    with conn.cursor(cursor_factory=_TestingCursor) as cur:
+        yield cur
+    conn.close()
+
+
+@pytest.fixture
+def def_config():
+    return Configuration(None, SRC_DIR.resolve() / 'settings')
+
+
+@pytest.fixture
+def status_table(temp_db_conn):
+    """ Create an empty version of the status table and
+        the status logging table.
+    """
+    with temp_db_conn.cursor() as cur:
+        cur.execute("""CREATE TABLE import_status (
+                           lastimportdate timestamp with time zone NOT NULL,
+                           sequence_id integer,
+                           indexed boolean
+                       )""")
+        cur.execute("""CREATE TABLE import_osmosis_log (
+                           batchend timestamp,
+                           batchseq integer,
+                           batchsize bigint,
+                           starttime timestamp,
+                           endtime timestamp,
+                           event text
+                           )""")
+    temp_db_conn.commit()
+
+
+@pytest.fixture
+def place_table(temp_db_with_extensions, temp_db_conn):
+    """ Create an empty version of the place table.
+    """
+    with temp_db_conn.cursor() as cur:
+        cur.execute("""CREATE TABLE place (
+                           osm_id int8 NOT NULL,
+                           osm_type char(1) NOT NULL,
+                           class text NOT NULL,
+                           type text NOT NULL,
+                           name hstore,
+                           admin_level smallint,
+                           address hstore,
+                           extratags hstore,
+                           geometry Geometry(Geometry,4326) NOT NULL)""")
+    temp_db_conn.commit()
+
+
+@pytest.fixture
+def place_row(place_table, temp_db_cursor):
+    """ A factory for rows in the place table. The table is created as a
+        prerequisite to the fixture.
+    """
+    idseq = itertools.count(1001)
+    def _insert(osm_type='N', osm_id=None, cls='amenity', typ='cafe', names=None,
+                admin_level=None, address=None, extratags=None, geom=None):
+        temp_db_cursor.execute("INSERT INTO place VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s)",
+                               (osm_id or next(idseq), osm_type, cls, typ, names,
+                                admin_level, address, extratags,
+                                geom or 'SRID=4326;POINT(0 0 )'))
+
+    return _insert
diff --git a/test/python/test_cli.py b/test/python/test_cli.py
new file mode 100644 (file)
index 0000000..cde8475
--- /dev/null
@@ -0,0 +1,232 @@
+"""
+Tests for command line interface wrapper.
+
+These tests just check that the various command line parameters route to the
+correct functionionality. They use a lot of monkeypatching to avoid executing
+the actual functions.
+"""
+import datetime as dt
+import psycopg2
+import pytest
+import time
+
+import nominatim.cli
+import nominatim.indexer.indexer
+import nominatim.tools.refresh
+import nominatim.tools.replication
+from nominatim.errors import UsageError
+from nominatim.db import status
+
+def call_nominatim(*args):
+    return nominatim.cli.nominatim(module_dir='build/module',
+                                   osm2pgsql_path='build/osm2pgsql/osm2pgsql',
+                                   phplib_dir='lib',
+                                   data_dir='.',
+                                   phpcgi_path='/usr/bin/php-cgi',
+                                   cli_args=args)
+
+class MockParamCapture:
+    """ Mock that records the parameters with which a function was called
+        as well as the number of calls.
+    """
+    def __init__(self, retval=0):
+        self.called = 0
+        self.return_value = retval
+
+    def __call__(self, *args, **kwargs):
+        self.called += 1
+        self.last_args = args
+        self.last_kwargs = kwargs
+        return self.return_value
+
+@pytest.fixture
+def mock_run_legacy(monkeypatch):
+    mock = MockParamCapture()
+    monkeypatch.setattr(nominatim.cli, 'run_legacy_script', mock)
+    return mock
+
+@pytest.fixture
+def mock_run_api(monkeypatch):
+    mock = MockParamCapture()
+    monkeypatch.setattr(nominatim.cli, 'run_api_script', mock)
+    return mock
+
+
+def test_cli_help(capsys):
+    """ Running nominatim tool without arguments prints help.
+    """
+    assert 1 == call_nominatim()
+
+    captured = capsys.readouterr()
+    assert captured.out.startswith('usage:')
+
+
+@pytest.mark.parametrize("command,script", [
+                         (('import', '--continue', 'load-data'), 'setup'),
+                         (('freeze',), 'setup'),
+                         (('special-phrases',), 'specialphrases'),
+                         (('add-data', '--tiger-data', 'tiger'), 'setup'),
+                         (('add-data', '--file', 'foo.osm'), 'update'),
+                         (('check-database',), 'check_import_finished'),
+                         (('warm',), 'warm'),
+                         (('export',), 'export')
+                         ])
+def test_legacy_commands_simple(mock_run_legacy, command, script):
+    assert 0 == call_nominatim(*command)
+
+    assert mock_run_legacy.called == 1
+    assert mock_run_legacy.last_args[0] == script + '.php'
+
+
+@pytest.mark.parametrize("name,oid", [('file', 'foo.osm'), ('diff', 'foo.osc'),
+                                      ('node', 12), ('way', 8), ('relation', 32)])
+def test_add_data_command(mock_run_legacy, name, oid):
+    assert 0 == call_nominatim('add-data', '--' + name, str(oid))
+
+    assert mock_run_legacy.called == 1
+    assert mock_run_legacy.last_args == ('update.php', '--import-' + name, oid)
+
+
+@pytest.mark.parametrize("params,do_bnds,do_ranks", [
+                          ([], 1, 1),
+                          (['--boundaries-only'], 1, 0),
+                          (['--no-boundaries'], 0, 1),
+                          (['--boundaries-only', '--no-boundaries'], 0, 0)])
+def test_index_command(monkeypatch, temp_db_cursor, params, do_bnds, do_ranks):
+    temp_db_cursor.execute("CREATE TABLE import_status (indexed bool)")
+    bnd_mock = MockParamCapture()
+    monkeypatch.setattr(nominatim.indexer.indexer.Indexer, 'index_boundaries', bnd_mock)
+    rank_mock = MockParamCapture()
+    monkeypatch.setattr(nominatim.indexer.indexer.Indexer, 'index_by_rank', rank_mock)
+
+    assert 0 == call_nominatim('index', *params)
+
+    assert bnd_mock.called == do_bnds
+    assert rank_mock.called == do_ranks
+
+
+@pytest.mark.parametrize("command,params", [
+                         ('wiki-data', ('setup.php', '--import-wikipedia-articles')),
+                         ('importance', ('update.php', '--recompute-importance')),
+                         ('website', ('setup.php', '--setup-website')),
+                         ])
+def test_refresh_legacy_command(mock_run_legacy, temp_db, command, params):
+    assert 0 == call_nominatim('refresh', '--' + command)
+
+    assert mock_run_legacy.called == 1
+    assert len(mock_run_legacy.last_args) >= len(params)
+    assert mock_run_legacy.last_args[:len(params)] == params
+
+@pytest.mark.parametrize("command,func", [
+                         ('postcodes', 'update_postcodes'),
+                         ('word-counts', 'recompute_word_counts'),
+                         ('address-levels', 'load_address_levels_from_file'),
+                         ('functions', 'create_functions'),
+                         ])
+def test_refresh_command(monkeypatch, temp_db, command, func):
+    func_mock = MockParamCapture()
+    monkeypatch.setattr(nominatim.tools.refresh, func, func_mock)
+
+    assert 0 == call_nominatim('refresh', '--' + command)
+    assert func_mock.called == 1
+
+
+def test_refresh_importance_computed_after_wiki_import(mock_run_legacy, temp_db):
+    assert 0 == call_nominatim('refresh', '--importance', '--wiki-data')
+
+    assert mock_run_legacy.called == 2
+    assert mock_run_legacy.last_args == ('update.php', '--recompute-importance')
+
+
+@pytest.mark.parametrize("params,func", [
+                         (('--init', '--no-update-functions'), 'init_replication'),
+                         (('--check-for-updates',), 'check_for_updates')
+                         ])
+def test_replication_command(monkeypatch, temp_db, params, func):
+    func_mock = MockParamCapture()
+    monkeypatch.setattr(nominatim.tools.replication, func, func_mock)
+
+    assert 0 == call_nominatim('replication', *params)
+    assert func_mock.called == 1
+
+
+def test_replication_update_bad_interval(monkeypatch, temp_db):
+    monkeypatch.setenv('NOMINATIM_REPLICATION_UPDATE_INTERVAL', 'xx')
+
+    assert call_nominatim('replication') == 1
+
+
+def test_replication_update_bad_interval_for_geofabrik(monkeypatch, temp_db):
+    monkeypatch.setenv('NOMINATIM_REPLICATION_URL',
+                       'https://download.geofabrik.de/europe/ireland-and-northern-ireland-updates')
+
+    assert call_nominatim('replication') == 1
+
+
+@pytest.mark.parametrize("state, retval", [
+                         (nominatim.tools.replication.UpdateState.UP_TO_DATE, 0),
+                         (nominatim.tools.replication.UpdateState.NO_CHANGES, 3)
+                         ])
+def test_replication_update_once_no_index(monkeypatch, temp_db, temp_db_conn,
+                                          status_table, state, retval):
+    status.set_status(temp_db_conn, date=dt.datetime.now(dt.timezone.utc), seq=1)
+    func_mock = MockParamCapture(retval=state)
+    monkeypatch.setattr(nominatim.tools.replication, 'update', func_mock)
+
+    assert retval == call_nominatim('replication', '--once', '--no-index')
+
+
+def test_replication_update_continuous(monkeypatch, temp_db_conn, status_table):
+    status.set_status(temp_db_conn, date=dt.datetime.now(dt.timezone.utc), seq=1)
+    states = [nominatim.tools.replication.UpdateState.UP_TO_DATE,
+              nominatim.tools.replication.UpdateState.UP_TO_DATE]
+    monkeypatch.setattr(nominatim.tools.replication, 'update',
+                        lambda *args, **kwargs: states.pop())
+
+    index_mock = MockParamCapture()
+    monkeypatch.setattr(nominatim.indexer.indexer.Indexer, 'index_boundaries', index_mock)
+    monkeypatch.setattr(nominatim.indexer.indexer.Indexer, 'index_by_rank', index_mock)
+
+    with pytest.raises(IndexError):
+        call_nominatim('replication')
+
+    assert index_mock.called == 4
+
+
+def test_replication_update_continuous_no_change(monkeypatch, temp_db_conn, status_table):
+    status.set_status(temp_db_conn, date=dt.datetime.now(dt.timezone.utc), seq=1)
+    states = [nominatim.tools.replication.UpdateState.NO_CHANGES,
+              nominatim.tools.replication.UpdateState.UP_TO_DATE]
+    monkeypatch.setattr(nominatim.tools.replication, 'update',
+                        lambda *args, **kwargs: states.pop())
+
+    index_mock = MockParamCapture()
+    monkeypatch.setattr(nominatim.indexer.indexer.Indexer, 'index_boundaries', index_mock)
+    monkeypatch.setattr(nominatim.indexer.indexer.Indexer, 'index_by_rank', index_mock)
+
+    sleep_mock = MockParamCapture()
+    monkeypatch.setattr(time, 'sleep', sleep_mock)
+
+    with pytest.raises(IndexError):
+        call_nominatim('replication')
+
+    assert index_mock.called == 2
+    assert sleep_mock.called == 1
+    assert sleep_mock.last_args[0] == 60
+
+
+@pytest.mark.parametrize("params", [
+                         ('search', '--query', 'new'),
+                         ('reverse', '--lat', '0', '--lon', '0'),
+                         ('lookup', '--id', 'N1'),
+                         ('details', '--node', '1'),
+                         ('details', '--way', '1'),
+                         ('details', '--relation', '1'),
+                         ('details', '--place_id', '10001'),
+                         ('status',)
+                         ])
+def test_api_commands_simple(mock_run_api, params):
+    assert 0 == call_nominatim(*params)
+
+    assert mock_run_api.called == 1
+    assert mock_run_api.last_args[0] == params[0]
diff --git a/test/python/test_config.py b/test/python/test_config.py
new file mode 100644 (file)
index 0000000..4578be1
--- /dev/null
@@ -0,0 +1,156 @@
+"""
+Test for loading dotenv configuration.
+"""
+from pathlib import Path
+import tempfile
+
+import pytest
+
+from nominatim.config import Configuration
+from nominatim.errors import UsageError
+
+DEFCFG_DIR = Path(__file__) / '..' / '..' / '..' / 'settings'
+
+def test_no_project_dir():
+    config = Configuration(None, DEFCFG_DIR)
+
+    assert config.DATABASE_WEBUSER == 'www-data'
+
+
+def test_prefer_project_setting_over_default():
+    with tempfile.TemporaryDirectory() as project_dir:
+        with open(project_dir + '/.env', 'w') as envfile:
+            envfile.write('NOMINATIM_DATABASE_WEBUSER=apache\n')
+
+        config = Configuration(Path(project_dir), DEFCFG_DIR)
+
+        assert config.DATABASE_WEBUSER == 'apache'
+
+
+def test_prefer_os_environ_over_project_setting(monkeypatch):
+    with tempfile.TemporaryDirectory() as project_dir:
+        with open(project_dir + '/.env', 'w') as envfile:
+            envfile.write('NOMINATIM_DATABASE_WEBUSER=apache\n')
+
+        monkeypatch.setenv('NOMINATIM_DATABASE_WEBUSER', 'nobody')
+
+        config = Configuration(Path(project_dir), DEFCFG_DIR)
+
+        assert config.DATABASE_WEBUSER == 'nobody'
+
+
+def test_get_os_env_add_defaults(monkeypatch):
+    config = Configuration(None, DEFCFG_DIR)
+
+    monkeypatch.delenv('NOMINATIM_DATABASE_WEBUSER', raising=False)
+
+    assert config.get_os_env()['NOMINATIM_DATABASE_WEBUSER'] == 'www-data'
+
+
+def test_get_os_env_prefer_os_environ(monkeypatch):
+    config = Configuration(None, DEFCFG_DIR)
+
+    monkeypatch.setenv('NOMINATIM_DATABASE_WEBUSER', 'nobody')
+
+    assert config.get_os_env()['NOMINATIM_DATABASE_WEBUSER'] == 'nobody'
+
+
+def test_get_libpq_dsn_convert_default():
+    config = Configuration(None, DEFCFG_DIR)
+
+    assert config.get_libpq_dsn() == 'dbname=nominatim'
+
+
+def test_get_libpq_dsn_convert_php(monkeypatch):
+    config = Configuration(None, DEFCFG_DIR)
+
+    monkeypatch.setenv('NOMINATIM_DATABASE_DSN',
+                       'pgsql:dbname=gis;password=foo;host=localhost')
+
+    assert config.get_libpq_dsn() == 'dbname=gis password=foo host=localhost'
+
+
+@pytest.mark.parametrize("val,expect", [('foo bar', "'foo bar'"),
+                                        ("xy'z", "xy\\'z"),
+                                       ])
+def test_get_libpq_dsn_convert_php_special_chars(monkeypatch, val, expect):
+    config = Configuration(None, DEFCFG_DIR)
+
+    monkeypatch.setenv('NOMINATIM_DATABASE_DSN',
+                       'pgsql:dbname=gis;password={}'.format(val))
+
+    assert config.get_libpq_dsn() == "dbname=gis password={}".format(expect)
+
+
+def test_get_libpq_dsn_convert_libpq(monkeypatch):
+    config = Configuration(None, DEFCFG_DIR)
+
+    monkeypatch.setenv('NOMINATIM_DATABASE_DSN', 
+                       'host=localhost dbname=gis password=foo')
+
+    assert config.get_libpq_dsn() == 'host=localhost dbname=gis password=foo'
+
+
+@pytest.mark.parametrize("value,result",
+                         [(x, True) for x in ('1', 'true', 'True', 'yes', 'YES')] +
+                         [(x, False) for x in ('0', 'false', 'no', 'NO', 'x')])
+def test_get_bool(monkeypatch, value, result):
+    config = Configuration(None, DEFCFG_DIR)
+
+    monkeypatch.setenv('NOMINATIM_FOOBAR', value)
+
+    assert config.get_bool('FOOBAR') == result
+
+def test_get_bool_empty():
+    config = Configuration(None, DEFCFG_DIR)
+
+    assert config.DATABASE_MODULE_PATH == ''
+    assert config.get_bool('DATABASE_MODULE_PATH') == False
+
+
+@pytest.mark.parametrize("value,result", [('0', 0), ('1', 1),
+                                          ('85762513444', 85762513444)])
+def test_get_int_success(monkeypatch, value, result):
+    config = Configuration(None, DEFCFG_DIR)
+
+    monkeypatch.setenv('NOMINATIM_FOOBAR', value)
+
+    assert config.get_int('FOOBAR') == result
+
+
+@pytest.mark.parametrize("value", ['1b', 'fg', '0x23'])
+def test_get_int_bad_values(monkeypatch, value):
+    config = Configuration(None, DEFCFG_DIR)
+
+    monkeypatch.setenv('NOMINATIM_FOOBAR', value)
+
+    with pytest.raises(UsageError):
+        config.get_int('FOOBAR')
+
+
+def test_get_int_empty():
+    config = Configuration(None, DEFCFG_DIR)
+
+    assert config.DATABASE_MODULE_PATH == ''
+
+    with pytest.raises(UsageError):
+        config.get_int('DATABASE_MODULE_PATH')
+
+
+def test_get_import_style_intern(monkeypatch):
+    config = Configuration(None, DEFCFG_DIR)
+
+    monkeypatch.setenv('NOMINATIM_IMPORT_STYLE', 'street')
+
+    expected = DEFCFG_DIR / 'import-street.style'
+
+    assert config.get_import_style_file() == expected
+
+
+@pytest.mark.parametrize("value", ['custom', '/foo/bar.stye'])
+def test_get_import_style_intern(monkeypatch, value):
+    config = Configuration(None, DEFCFG_DIR)
+
+    monkeypatch.setenv('NOMINATIM_IMPORT_STYLE', value)
+
+    assert str(config.get_import_style_file()) == value
diff --git a/test/python/test_db_connection.py b/test/python/test_db_connection.py
new file mode 100644 (file)
index 0000000..ef1ae74
--- /dev/null
@@ -0,0 +1,32 @@
+"""
+Tests for specialised conenction and cursor classes.
+"""
+import pytest
+
+from nominatim.db.connection import connect
+
+@pytest.fixture
+def db(temp_db):
+    conn = connect('dbname=' + temp_db)
+    yield conn
+    conn.close()
+
+
+def test_connection_table_exists(db, temp_db_cursor):
+    assert db.table_exists('foobar') == False
+
+    temp_db_cursor.execute('CREATE TABLE foobar (id INT)')
+
+    assert db.table_exists('foobar') == True
+
+
+def test_cursor_scalar(db, temp_db_cursor):
+    temp_db_cursor.execute('CREATE TABLE dummy (id INT)')
+
+    with db.cursor() as cur:
+        assert cur.scalar('SELECT count(*) FROM dummy') == 0
+
+def test_cursor_scalar_many_rows(db):
+    with db.cursor() as cur:
+        with pytest.raises(RuntimeError):
+            cur.scalar('SELECT * FROM pg_tables')
diff --git a/test/python/test_db_status.py b/test/python/test_db_status.py
new file mode 100644 (file)
index 0000000..399a003
--- /dev/null
@@ -0,0 +1,114 @@
+"""
+Tests for status table manipulation.
+"""
+import datetime as dt
+
+import pytest
+
+import nominatim.db.status
+from nominatim.errors import UsageError
+
+def test_compute_database_date_place_empty(status_table, place_table, temp_db_conn):
+    with pytest.raises(UsageError):
+        nominatim.db.status.compute_database_date(temp_db_conn)
+
+OSM_NODE_DATA = """\
+<osm version="0.6" generator="OpenStreetMap server" copyright="OpenStreetMap and contributors" attribution="http://www.openstreetmap.org/copyright" license="http://opendatacommons.org/licenses/odbl/1-0/">
+<node id="45673" visible="true" version="1" changeset="2047" timestamp="2006-01-27T22:09:10Z" user="Foo" uid="111" lat="48.7586670" lon="8.1343060">
+</node>
+</osm>
+"""
+
+def test_compute_database_date_valid(monkeypatch, status_table, place_row, temp_db_conn):
+    place_row(osm_type='N', osm_id=45673)
+
+    requested_url = []
+    def mock_url(url):
+        requested_url.append(url)
+        return OSM_NODE_DATA
+
+    monkeypatch.setattr(nominatim.db.status, "get_url", mock_url)
+
+    date = nominatim.db.status.compute_database_date(temp_db_conn)
+
+    assert requested_url == ['https://www.openstreetmap.org/api/0.6/node/45673/1']
+    assert date == dt.datetime.fromisoformat('2006-01-27T22:09:10').replace(tzinfo=dt.timezone.utc)
+
+
+def test_compute_database_broken_api(monkeypatch, status_table, place_row, temp_db_conn):
+    place_row(osm_type='N', osm_id=45673)
+
+    requested_url = []
+    def mock_url(url):
+        requested_url.append(url)
+        return '<osm version="0.6" generator="OpenStre'
+
+    monkeypatch.setattr(nominatim.db.status, "get_url", mock_url)
+
+    with pytest.raises(UsageError):
+        date = nominatim.db.status.compute_database_date(temp_db_conn)
+
+
+def test_set_status_empty_table(status_table, temp_db_conn, temp_db_cursor):
+    date = dt.datetime.fromordinal(1000000).replace(tzinfo=dt.timezone.utc)
+    nominatim.db.status.set_status(temp_db_conn, date=date)
+
+    temp_db_cursor.execute("SELECT * FROM import_status")
+
+    assert temp_db_cursor.rowcount == 1
+    assert temp_db_cursor.fetchone() == [date, None, True]
+
+
+def test_set_status_filled_table(status_table, temp_db_conn, temp_db_cursor):
+    date = dt.datetime.fromordinal(1000000).replace(tzinfo=dt.timezone.utc)
+    nominatim.db.status.set_status(temp_db_conn, date=date)
+
+    assert 1 == temp_db_cursor.scalar("SELECT count(*) FROM import_status")
+
+
+    date = dt.datetime.fromordinal(1000100).replace(tzinfo=dt.timezone.utc)
+    nominatim.db.status.set_status(temp_db_conn, date=date, seq=456, indexed=False)
+
+    temp_db_cursor.execute("SELECT * FROM import_status")
+
+    assert temp_db_cursor.rowcount == 1
+    assert temp_db_cursor.fetchone() == [date, 456, False]
+
+
+def test_get_status_empty_table(status_table, temp_db_conn):
+    assert nominatim.db.status.get_status(temp_db_conn) == (None, None, None)
+
+
+def test_get_status_success(status_table, temp_db_conn):
+    date = dt.datetime.fromordinal(1000000).replace(tzinfo=dt.timezone.utc)
+    nominatim.db.status.set_status(temp_db_conn, date=date, seq=667, indexed=False)
+
+    assert nominatim.db.status.get_status(temp_db_conn) == \
+             (date, 667, False)
+
+
+@pytest.mark.parametrize("old_state", [True, False])
+@pytest.mark.parametrize("new_state", [True, False])
+def test_set_indexed(status_table, temp_db_conn, temp_db_cursor, old_state, new_state):
+    date = dt.datetime.fromordinal(1000000).replace(tzinfo=dt.timezone.utc)
+    nominatim.db.status.set_status(temp_db_conn, date=date, indexed=old_state)
+    nominatim.db.status.set_indexed(temp_db_conn, new_state)
+
+    assert temp_db_cursor.scalar("SELECT indexed FROM import_status") == new_state
+
+
+def test_set_indexed_empty_status(status_table, temp_db_conn, temp_db_cursor):
+    nominatim.db.status.set_indexed(temp_db_conn, True)
+
+    assert temp_db_cursor.scalar("SELECT count(*) FROM import_status") == 0
+
+
+def text_log_status(status_table, temp_db_conn):
+    date = dt.datetime.fromordinal(1000000).replace(tzinfo=dt.timezone.utc)
+    start = dt.datetime.now() - dt.timedelta(hours=1)
+    nominatim.db.status.set_status(temp_db_conn, date=date, seq=56)
+    nominatim.db.status.log_status(temp_db_conn, start, 'index')
+
+    assert temp_db_cursor.scalar("SELECT count(*) FROM import_osmosis_log") == 1
+    assert temp_db_cursor.scalar("SELECT seq FROM import_osmosis_log") == 56
+    assert temp_db_cursor.scalar("SELECT date FROM import_osmosis_log") == date
diff --git a/test/python/test_db_utils.py b/test/python/test_db_utils.py
new file mode 100644 (file)
index 0000000..e756f2c
--- /dev/null
@@ -0,0 +1,30 @@
+"""
+Tests for DB utility functions in db.utils
+"""
+import psycopg2
+import pytest
+
+import nominatim.db.utils as db_utils
+
+def test_execute_file_success(temp_db_conn, tmp_path):
+    tmpfile = tmp_path / 'test.sql'
+    tmpfile.write_text('CREATE TABLE test (id INT);\nINSERT INTO test VALUES(56);')
+
+    db_utils.execute_file(temp_db_conn, tmpfile)
+
+    with temp_db_conn.cursor() as cur:
+        cur.execute('SELECT * FROM test')
+
+        assert cur.rowcount == 1
+        assert cur.fetchone()[0] == 56
+
+def test_execute_file_bad_file(temp_db_conn, tmp_path):
+    with pytest.raises(FileNotFoundError):
+        db_utils.execute_file(temp_db_conn, tmp_path / 'test2.sql')
+
+def test_execute_file_bad_sql(temp_db_conn, tmp_path):
+    tmpfile = tmp_path / 'test.sql'
+    tmpfile.write_text('CREATE STABLE test (id INT)')
+
+    with pytest.raises(psycopg2.ProgrammingError):
+        db_utils.execute_file(temp_db_conn, tmpfile)
diff --git a/test/python/test_indexing.py b/test/python/test_indexing.py
new file mode 100644 (file)
index 0000000..6b52a65
--- /dev/null
@@ -0,0 +1,186 @@
+"""
+Tests for running the indexing.
+"""
+import itertools
+import psycopg2
+import pytest
+
+from nominatim.indexer.indexer import Indexer
+
+class IndexerTestDB:
+
+    def __init__(self, conn):
+        self.placex_id = itertools.count(100000)
+        self.osmline_id = itertools.count(500000)
+
+        self.conn = conn
+        self.conn.set_isolation_level(0)
+        with self.conn.cursor() as cur:
+            cur.execute("""CREATE TABLE placex (place_id BIGINT,
+                                                class TEXT,
+                                                type TEXT,
+                                                rank_address SMALLINT,
+                                                rank_search SMALLINT,
+                                                indexed_status SMALLINT,
+                                                indexed_date TIMESTAMP,
+                                                partition SMALLINT,
+                                                admin_level SMALLINT,
+                                                geometry_sector INTEGER)""")
+            cur.execute("""CREATE TABLE location_property_osmline (
+                               place_id BIGINT,
+                               indexed_status SMALLINT,
+                               indexed_date TIMESTAMP,
+                               geometry_sector INTEGER)""")
+            cur.execute("""CREATE OR REPLACE FUNCTION date_update() RETURNS TRIGGER
+                           AS $$
+                           BEGIN
+                             IF NEW.indexed_status = 0 and OLD.indexed_status != 0 THEN
+                               NEW.indexed_date = now();
+                             END IF;
+                             RETURN NEW;
+                           END; $$ LANGUAGE plpgsql;""")
+            cur.execute("""CREATE TRIGGER placex_update BEFORE UPDATE ON placex
+                           FOR EACH ROW EXECUTE PROCEDURE date_update()""")
+            cur.execute("""CREATE TRIGGER osmline_update BEFORE UPDATE ON location_property_osmline
+                           FOR EACH ROW EXECUTE PROCEDURE date_update()""")
+
+    def scalar(self, query):
+        with self.conn.cursor() as cur:
+            cur.execute(query)
+            return cur.fetchone()[0]
+
+    def add_place(self, cls='place', typ='locality',
+                  rank_search=30, rank_address=30, sector=20):
+        next_id = next(self.placex_id)
+        with self.conn.cursor() as cur:
+            cur.execute("""INSERT INTO placex
+                              (place_id, class, type, rank_search, rank_address,
+                               indexed_status, geometry_sector)
+                              VALUES (%s, %s, %s, %s, %s, 1, %s)""",
+                        (next_id, cls, typ, rank_search, rank_address, sector))
+        return next_id
+
+    def add_admin(self, **kwargs):
+        kwargs['cls'] = 'boundary'
+        kwargs['typ'] = 'administrative'
+        return self.add_place(**kwargs)
+
+    def add_osmline(self, sector=20):
+        next_id = next(self.osmline_id)
+        with self.conn.cursor() as cur:
+            cur.execute("""INSERT INTO location_property_osmline
+                              (place_id, indexed_status, geometry_sector)
+                              VALUES (%s, 1, %s)""",
+                        (next_id, sector))
+        return next_id
+
+    def placex_unindexed(self):
+        return self.scalar('SELECT count(*) from placex where indexed_status > 0')
+
+    def osmline_unindexed(self):
+        return self.scalar('SELECT count(*) from location_property_osmline where indexed_status > 0')
+
+
+@pytest.fixture
+def test_db(temp_db_conn):
+    yield IndexerTestDB(temp_db_conn)
+
+
+@pytest.mark.parametrize("threads", [1, 15])
+def test_index_full(test_db, threads):
+    for rank in range(31):
+        test_db.add_place(rank_address=rank, rank_search=rank)
+    test_db.add_osmline()
+
+    assert 31 == test_db.placex_unindexed()
+    assert 1 == test_db.osmline_unindexed()
+
+    idx = Indexer('dbname=test_nominatim_python_unittest', threads)
+    idx.index_by_rank(0, 30)
+
+    assert 0 == test_db.placex_unindexed()
+    assert 0 == test_db.osmline_unindexed()
+
+    assert 0 == test_db.scalar("""SELECT count(*) from placex
+                               WHERE indexed_status = 0 and indexed_date is null""")
+    # ranks come in order of rank address
+    assert 0 == test_db.scalar("""
+        SELECT count(*) FROM placex p WHERE rank_address > 0
+          AND indexed_date >= (SELECT min(indexed_date) FROM placex o
+                               WHERE p.rank_address < o.rank_address)""")
+    # placex rank < 30 objects come before interpolations
+    assert 0 == test_db.scalar(
+        """SELECT count(*) FROM placex WHERE rank_address < 30
+             AND indexed_date > (SELECT min(indexed_date) FROM location_property_osmline)""")
+    # placex rank = 30 objects come after interpolations
+    assert 0 == test_db.scalar(
+        """SELECT count(*) FROM placex WHERE rank_address = 30
+             AND indexed_date < (SELECT max(indexed_date) FROM location_property_osmline)""")
+    # rank 0 comes after rank 29 and before rank 30
+    assert 0 == test_db.scalar(
+        """SELECT count(*) FROM placex WHERE rank_address < 30
+             AND indexed_date > (SELECT min(indexed_date) FROM placex WHERE rank_address = 0)""")
+    assert 0 == test_db.scalar(
+        """SELECT count(*) FROM placex WHERE rank_address = 30
+             AND indexed_date < (SELECT max(indexed_date) FROM placex WHERE rank_address = 0)""")
+
+
+@pytest.mark.parametrize("threads", [1, 15])
+def test_index_partial_without_30(test_db, threads):
+    for rank in range(31):
+        test_db.add_place(rank_address=rank, rank_search=rank)
+    test_db.add_osmline()
+
+    assert 31 == test_db.placex_unindexed()
+    assert 1 == test_db.osmline_unindexed()
+
+    idx = Indexer('dbname=test_nominatim_python_unittest', threads)
+    idx.index_by_rank(4, 15)
+
+    assert 19 == test_db.placex_unindexed()
+    assert 1 == test_db.osmline_unindexed()
+
+    assert 0 == test_db.scalar("""
+                    SELECT count(*) FROM placex
+                      WHERE indexed_status = 0 AND not rank_address between 4 and 15""")
+
+
+@pytest.mark.parametrize("threads", [1, 15])
+def test_index_partial_with_30(test_db, threads):
+    for rank in range(31):
+        test_db.add_place(rank_address=rank, rank_search=rank)
+    test_db.add_osmline()
+
+    assert 31 == test_db.placex_unindexed()
+    assert 1 == test_db.osmline_unindexed()
+
+    idx = Indexer('dbname=test_nominatim_python_unittest', threads)
+    idx.index_by_rank(28, 30)
+
+    assert 27 == test_db.placex_unindexed()
+    assert 0 == test_db.osmline_unindexed()
+
+    assert 0 == test_db.scalar("""
+                    SELECT count(*) FROM placex
+                      WHERE indexed_status = 0 AND rank_address between 1 and 27""")
+
+@pytest.mark.parametrize("threads", [1, 15])
+def test_index_boundaries(test_db, threads):
+    for rank in range(4, 10):
+        test_db.add_admin(rank_address=rank, rank_search=rank)
+    for rank in range(31):
+        test_db.add_place(rank_address=rank, rank_search=rank)
+    test_db.add_osmline()
+
+    assert 37 == test_db.placex_unindexed()
+    assert 1 == test_db.osmline_unindexed()
+
+    idx = Indexer('dbname=test_nominatim_python_unittest', threads)
+    idx.index_boundaries(0, 30)
+
+    assert 31 == test_db.placex_unindexed()
+    assert 1 == test_db.osmline_unindexed()
+
+    assert 0 == test_db.scalar("""
+                    SELECT count(*) FROM placex
+                      WHERE indexed_status = 0 AND class != 'boundary'""")
diff --git a/test/python/test_tools_exec_utils.py b/test/python/test_tools_exec_utils.py
new file mode 100644 (file)
index 0000000..26a714f
--- /dev/null
@@ -0,0 +1,110 @@
+"""
+Tests for tools.exec_utils module.
+"""
+from pathlib import Path
+import subprocess
+import tempfile
+
+import pytest
+
+import nominatim.tools.exec_utils as exec_utils
+
+@pytest.fixture
+def tmp_phplib_dir():
+    with tempfile.TemporaryDirectory() as phpdir:
+        (Path(phpdir) / 'admin').mkdir()
+
+        yield Path(phpdir)
+
+@pytest.fixture
+def nominatim_env(tmp_phplib_dir, def_config):
+    class _NominatimEnv:
+        config = def_config
+        phplib_dir = tmp_phplib_dir
+        data_dir = Path('data')
+        project_dir = Path('.')
+        module_dir = 'module'
+        osm2pgsql_path = 'osm2pgsql'
+
+    return _NominatimEnv
+
+@pytest.fixture
+def test_script(nominatim_env):
+    def _create_file(code):
+        with (nominatim_env.phplib_dir / 'admin' / 't.php').open(mode='w') as fd:
+            fd.write('<?php\n')
+            fd.write(code + '\n')
+
+        return 't.php'
+
+    return _create_file
+
+@pytest.fixture(params=[0, 1, 15, 255])
+def return_code(request):
+    return request.param
+
+### run_legacy_script
+
+def test_run_legacy_return_exit_code(nominatim_env, test_script, return_code):
+    fname = test_script('exit({});'.format(return_code))
+    assert return_code == exec_utils.run_legacy_script(fname,
+                                                       nominatim_env=nominatim_env)
+
+
+def test_run_legacy_return_throw_on_fail(nominatim_env, test_script):
+    fname = test_script('exit(11);')
+    with pytest.raises(subprocess.CalledProcessError):
+        exec_utils.run_legacy_script(fname, nominatim_env=nominatim_env,
+                                     throw_on_fail=True)
+
+
+def test_run_legacy_return_dont_throw_on_success(nominatim_env, test_script):
+    fname = test_script('exit(0);')
+    assert 0 == exec_utils.run_legacy_script(fname, nominatim_env=nominatim_env,
+                                             throw_on_fail=True)
+
+def test_run_legacy_use_given__module_path(nominatim_env, test_script):
+    fname = test_script("exit($_SERVER['NOMINATIM_DATABASE_MODULE_PATH'] == 'module' ? 0 : 23);")
+
+    assert 0 == exec_utils.run_legacy_script(fname, nominatim_env=nominatim_env)
+
+
+def test_run_legacy_do_not_overwrite_module_path(nominatim_env, test_script, monkeypatch):
+    monkeypatch.setenv('NOMINATIM_DATABASE_MODULE_PATH', 'other')
+    fname = test_script("exit($_SERVER['NOMINATIM_DATABASE_MODULE_PATH'] == 'other' ? 0 : 1);")
+
+    assert 0 == exec_utils.run_legacy_script(fname, nominatim_env=nominatim_env)
+
+### run_api_script
+
+@pytest.fixture
+def tmp_project_dir():
+    with tempfile.TemporaryDirectory() as tempd:
+        project_dir = Path(tempd)
+        webdir = project_dir / 'website'
+        webdir.mkdir()
+
+        with (webdir / 'test.php').open(mode='w') as fd:
+            fd.write("<?php\necho 'OK\n';")
+
+        yield project_dir
+
+def test_run_api(tmp_project_dir):
+    assert 0 == exec_utils.run_api_script('test', tmp_project_dir)
+
+def test_run_api_execution_error(tmp_project_dir):
+    assert 0 != exec_utils.run_api_script('badname', tmp_project_dir)
+
+def test_run_api_with_extra_env(tmp_project_dir):
+    extra_env = dict(SCRIPT_FILENAME=str(tmp_project_dir / 'website' / 'test.php'))
+    assert 0 == exec_utils.run_api_script('badname', tmp_project_dir,
+                                          extra_env=extra_env)
+
+
+### run_osm2pgsql
+
+def test_run_osm2pgsql():
+    exec_utils.run_osm2pgsql(dict(osm2pgsql='echo', append=False, flatnode_file=None,
+                                  dsn='dbname=foobar', threads=1, osm2pgsql_cache=500,
+                                  osm2pgsql_style='./my.style',
+                                  import_file='foo.bar'))
diff --git a/test/python/test_tools_refresh_address_levels.py b/test/python/test_tools_refresh_address_levels.py
new file mode 100644 (file)
index 0000000..87e34c6
--- /dev/null
@@ -0,0 +1,85 @@
+"""
+Tests for function for importing address ranks.
+"""
+import json
+import pytest
+from pathlib import Path
+
+from nominatim.tools.refresh import load_address_levels, load_address_levels_from_file
+
+def test_load_ranks_def_config(temp_db_conn, temp_db_cursor, def_config):
+    load_address_levels_from_file(temp_db_conn, Path(def_config.ADDRESS_LEVEL_CONFIG))
+
+    assert temp_db_cursor.scalar('SELECT count(*) FROM address_levels') > 0
+
+def test_load_ranks_from_file(temp_db_conn, temp_db_cursor, tmp_path):
+    test_file = tmp_path / 'test_levels.json'
+    test_file.write_text('[{"tags":{"place":{"sea":2}}}]')
+
+    load_address_levels_from_file(temp_db_conn, test_file)
+
+    assert temp_db_cursor.scalar('SELECT count(*) FROM address_levels') > 0
+
+
+def test_load_ranks_from_broken_file(temp_db_conn, tmp_path):
+    test_file = tmp_path / 'test_levels.json'
+    test_file.write_text('[{"tags":"place":{"sea":2}}}]')
+
+    with pytest.raises(json.decoder.JSONDecodeError):
+        load_address_levels_from_file(temp_db_conn, test_file)
+
+
+def test_load_ranks_country(temp_db_conn, temp_db_cursor):
+    load_address_levels(temp_db_conn, 'levels',
+                        [{"tags": {"place": {"village": 14}}},
+                         {"countries": ['de'],
+                          "tags": {"place": {"village": 15}}},
+                         {"countries": ['uk', 'us' ],
+                          "tags": {"place": {"village": 16}}}
+                        ])
+
+    assert temp_db_cursor.row_set('SELECT * FROM levels') == \
+           set([(None, 'place', 'village', 14, 14),
+                ('de', 'place', 'village', 15, 15),
+                ('uk', 'place', 'village', 16, 16),
+                ('us', 'place', 'village', 16, 16),
+               ])
+
+
+def test_load_ranks_default_value(temp_db_conn, temp_db_cursor):
+    load_address_levels(temp_db_conn, 'levels',
+                        [{"tags": {"boundary": {"": 28}}},
+                         {"countries": ['hu'],
+                          "tags": {"boundary": {"": 29}}}
+                        ])
+
+    assert temp_db_cursor.row_set('SELECT * FROM levels') == \
+           set([(None, 'boundary', None, 28, 28),
+                ('hu', 'boundary', None, 29, 29),
+               ])
+
+
+def test_load_ranks_multiple_keys(temp_db_conn, temp_db_cursor):
+    load_address_levels(temp_db_conn, 'levels',
+                        [{"tags":
+                            {"place": {"city": 14},
+                             "boundary": {"administrative2" : 4}}
+                         }])
+
+    assert temp_db_cursor.row_set('SELECT * FROM levels') == \
+           set([(None, 'place', 'city', 14, 14),
+                (None, 'boundary', 'administrative2', 4, 4),
+               ])
+
+
+def test_load_ranks_address(temp_db_conn, temp_db_cursor):
+    load_address_levels(temp_db_conn, 'levels',
+                        [{"tags":
+                            {"place": {"city": 14,
+                                       "town" : [14, 13]}}
+                         }])
+
+    assert temp_db_cursor.row_set('SELECT * FROM levels') == \
+           set([(None, 'place', 'city', 14, 14),
+                (None, 'place', 'town', 14, 13),
+               ])
diff --git a/test/python/test_tools_refresh_create_functions.py b/test/python/test_tools_refresh_create_functions.py
new file mode 100644 (file)
index 0000000..4807e64
--- /dev/null
@@ -0,0 +1,99 @@
+"""
+Tests for creating PL/pgSQL functions for Nominatim.
+"""
+from pathlib import Path
+import pytest
+
+from nominatim.db.connection import connect
+from nominatim.tools.refresh import _get_standard_function_sql, _get_partition_function_sql
+
+SQL_DIR = (Path(__file__) / '..' / '..' / '..' / 'sql').resolve()
+
+@pytest.fixture
+def db(temp_db):
+    conn = connect('dbname=' + temp_db)
+    yield conn
+    conn.close()
+
+@pytest.fixture
+def db_with_tables(db):
+    with db.cursor() as cur:
+        for table in ('place', 'placex', 'location_postcode'):
+            cur.execute('CREATE TABLE {} (place_id BIGINT)'.format(table))
+
+    return db
+
+
+def test_standard_functions_replace_module_default(db, def_config):
+    def_config.project_dir = Path('.')
+    sql = _get_standard_function_sql(db, def_config, SQL_DIR, False, False)
+
+    assert sql
+    assert sql.find('{modulepath}') < 0
+    assert sql.find("'{}'".format(Path('module/nominatim.so').resolve())) >= 0
+
+
+def test_standard_functions_replace_module_custom(monkeypatch, db, def_config):
+    monkeypatch.setenv('NOMINATIM_DATABASE_MODULE_PATH', 'custom')
+    sql = _get_standard_function_sql(db, def_config, SQL_DIR, False, False)
+
+    assert sql
+    assert sql.find('{modulepath}') < 0
+    assert sql.find("'custom/nominatim.so'") >= 0
+
+
+@pytest.mark.parametrize("enabled", (True, False))
+def test_standard_functions_enable_diff(db_with_tables, def_config, enabled):
+    def_config.project_dir = Path('.')
+    sql = _get_standard_function_sql(db_with_tables, def_config, SQL_DIR, enabled, False)
+
+    assert sql
+    assert (sql.find('%DIFFUPDATES%') < 0) == enabled
+
+
+@pytest.mark.parametrize("enabled", (True, False))
+def test_standard_functions_enable_debug(db_with_tables, def_config, enabled):
+    def_config.project_dir = Path('.')
+    sql = _get_standard_function_sql(db_with_tables, def_config, SQL_DIR, False, enabled)
+
+    assert sql
+    assert (sql.find('--DEBUG') < 0) == enabled
+
+
+@pytest.mark.parametrize("enabled", (True, False))
+def test_standard_functions_enable_limit_reindexing(monkeypatch, db_with_tables, def_config, enabled):
+    def_config.project_dir = Path('.')
+    monkeypatch.setenv('NOMINATIM_LIMIT_REINDEXING', 'yes' if enabled else 'no')
+    sql = _get_standard_function_sql(db_with_tables, def_config, SQL_DIR, False, False)
+
+    assert sql
+    assert (sql.find('--LIMIT INDEXING') < 0) == enabled
+
+
+@pytest.mark.parametrize("enabled", (True, False))
+def test_standard_functions_enable_tiger(monkeypatch, db_with_tables, def_config, enabled):
+    def_config.project_dir = Path('.')
+    monkeypatch.setenv('NOMINATIM_USE_US_TIGER_DATA', 'yes' if enabled else 'no')
+    sql = _get_standard_function_sql(db_with_tables, def_config, SQL_DIR, False, False)
+
+    assert sql
+    assert (sql.find('%NOTIGERDATA%') >= 0) == enabled
+
+
+@pytest.mark.parametrize("enabled", (True, False))
+def test_standard_functions_enable_aux(monkeypatch, db_with_tables, def_config, enabled):
+    def_config.project_dir = Path('.')
+    monkeypatch.setenv('NOMINATIM_USE_AUX_LOCATION_DATA', 'yes' if enabled else 'no')
+    sql = _get_standard_function_sql(db_with_tables, def_config, SQL_DIR, False, False)
+
+    assert sql
+    assert (sql.find('%NOAUXDATA%') >= 0) == enabled
+
+
+def test_partition_function(temp_db_cursor, db, def_config):
+    temp_db_cursor.execute("CREATE TABLE country_name (partition SMALLINT)")
+
+    sql = _get_partition_function_sql(db, SQL_DIR)
+
+    assert sql
+    assert sql.find('-partition-') < 0
diff --git a/test/python/test_tools_replication.py b/test/python/test_tools_replication.py
new file mode 100644 (file)
index 0000000..156385a
--- /dev/null
@@ -0,0 +1,138 @@
+"""
+Tests for replication functionality.
+"""
+import datetime as dt
+import time
+
+import pytest
+from osmium.replication.server import OsmosisState
+
+import nominatim.tools.replication
+import nominatim.db.status as status
+from nominatim.errors import UsageError
+
+OSM_NODE_DATA = """\
+<osm version="0.6" generator="OpenStreetMap server" copyright="OpenStreetMap and contributors" attribution="http://www.openstreetmap.org/copyright" license="http://opendatacommons.org/licenses/odbl/1-0/">
+<node id="100" visible="true" version="1" changeset="2047" timestamp="2006-01-27T22:09:10Z" user="Foo" uid="111" lat="48.7586670" lon="8.1343060">
+</node>
+</osm>
+"""
+
+### init replication
+
+def test_init_replication_bad_base_url(monkeypatch, status_table, place_row, temp_db_conn, temp_db_cursor):
+    place_row(osm_type='N', osm_id=100)
+
+    monkeypatch.setattr(nominatim.db.status, "get_url", lambda u : OSM_NODE_DATA)
+
+    with pytest.raises(UsageError, match="Failed to reach replication service"):
+        nominatim.tools.replication.init_replication(temp_db_conn, 'https://test.io')
+
+
+def test_init_replication_success(monkeypatch, status_table, place_row, temp_db_conn, temp_db_cursor):
+    place_row(osm_type='N', osm_id=100)
+
+    monkeypatch.setattr(nominatim.db.status, "get_url", lambda u : OSM_NODE_DATA)
+    monkeypatch.setattr(nominatim.tools.replication.ReplicationServer,
+                        "timestamp_to_sequence",
+                        lambda self, date: 234)
+
+    nominatim.tools.replication.init_replication(temp_db_conn, 'https://test.io')
+
+    temp_db_cursor.execute("SELECT * FROM import_status")
+
+    expected_date = dt.datetime.fromisoformat('2006-01-27T19:09:10').replace(tzinfo=dt.timezone.utc)
+    assert temp_db_cursor.rowcount == 1
+    assert temp_db_cursor.fetchone() == [expected_date, 234, True]
+
+
+### checking for updates
+
+def test_check_for_updates_empty_status_table(status_table, temp_db_conn):
+    assert nominatim.tools.replication.check_for_updates(temp_db_conn, 'https://test.io') == 254
+
+
+def test_check_for_updates_seq_not_set(status_table, temp_db_conn):
+    status.set_status(temp_db_conn, dt.datetime.now(dt.timezone.utc))
+
+    assert nominatim.tools.replication.check_for_updates(temp_db_conn, 'https://test.io') == 254
+
+
+def test_check_for_updates_no_state(monkeypatch, status_table, temp_db_conn):
+    status.set_status(temp_db_conn, dt.datetime.now(dt.timezone.utc), seq=345)
+
+    monkeypatch.setattr(nominatim.tools.replication.ReplicationServer,
+                        "get_state_info", lambda self: None)
+
+    assert nominatim.tools.replication.check_for_updates(temp_db_conn, 'https://test.io') == 253
+
+
+@pytest.mark.parametrize("server_sequence,result", [(344, 2), (345, 2), (346, 0)])
+def test_check_for_updates_no_new_data(monkeypatch, status_table, temp_db_conn,
+                                       server_sequence, result):
+    date = dt.datetime.now(dt.timezone.utc)
+    status.set_status(temp_db_conn, date, seq=345)
+
+    monkeypatch.setattr(nominatim.tools.replication.ReplicationServer,
+                        "get_state_info",
+                        lambda self: OsmosisState(server_sequence, date))
+
+    assert nominatim.tools.replication.check_for_updates(temp_db_conn, 'https://test.io') == result
+
+
+### updating
+
+@pytest.fixture
+def update_options(tmpdir):
+    return dict(base_url='https://test.io',
+                   indexed_only=False,
+                   update_interval=3600,
+                   import_file=tmpdir / 'foo.osm',
+                   max_diff_size=1)
+
+def test_update_empty_status_table(status_table, temp_db_conn):
+    with pytest.raises(UsageError):
+        nominatim.tools.replication.update(temp_db_conn, {})
+
+
+def test_update_already_indexed(status_table, temp_db_conn):
+    status.set_status(temp_db_conn, dt.datetime.now(dt.timezone.utc), seq=34, indexed=False)
+
+    assert nominatim.tools.replication.update(temp_db_conn, dict(indexed_only=True)) \
+             == nominatim.tools.replication.UpdateState.MORE_PENDING
+
+
+def test_update_no_data_no_sleep(monkeypatch, status_table, temp_db_conn, update_options):
+    date = dt.datetime.now(dt.timezone.utc) - dt.timedelta(days=1)
+    status.set_status(temp_db_conn, date, seq=34)
+
+    monkeypatch.setattr(nominatim.tools.replication.ReplicationServer,
+                        "apply_diffs",
+                        lambda *args, **kwargs: None)
+
+    sleeptime = []
+    monkeypatch.setattr(time, 'sleep', lambda s: sleeptime.append(s))
+
+    assert nominatim.tools.replication.update(temp_db_conn, update_options) \
+             == nominatim.tools.replication.UpdateState.NO_CHANGES
+
+    assert not sleeptime
+
+
+def test_update_no_data_sleep(monkeypatch, status_table, temp_db_conn, update_options):
+    date = dt.datetime.now(dt.timezone.utc) - dt.timedelta(minutes=30)
+    status.set_status(temp_db_conn, date, seq=34)
+
+    monkeypatch.setattr(nominatim.tools.replication.ReplicationServer,
+                        "apply_diffs",
+                        lambda *args, **kwargs: None)
+
+    sleeptime = []
+    monkeypatch.setattr(time, 'sleep', lambda s: sleeptime.append(s))
+
+    assert nominatim.tools.replication.update(temp_db_conn, update_options) \
+             == nominatim.tools.replication.UpdateState.NO_CHANGES
+
+    assert len(sleeptime) == 1
+    assert sleeptime[0] < 3600
+    assert sleeptime[0] > 0
diff --git a/utils/check_server_for_updates.py b/utils/check_server_for_updates.py
deleted file mode 100755 (executable)
index bcc9d0b..0000000
+++ /dev/null
@@ -1,24 +0,0 @@
-#!/usr/bin/env python3
-
-import sys
-from osmium.replication import server
-
-if __name__ == '__main__':
-    if len(sys.argv) != 3:
-        print("Usage: python check_server_for_updates.py <server url> <sequence id>")
-        sys.exit(254)
-
-    seqid = int(sys.argv[2])
-
-    state = server.ReplicationServer(sys.argv[1]).get_state_info()
-
-    if state is None:
-        print("ERROR: Cannot get state from URL %s." % (sys.argv[1], ))
-        sys.exit(253)
-
-    if state.sequence <= seqid:
-        print("Database up to date.")
-        sys.exit(1)
-
-    print("New data available (%i => %i)." % (seqid, state.sequence))
-    sys.exit(0)
diff --git a/utils/osm_file_date.py b/utils/osm_file_date.py
deleted file mode 100755 (executable)
index 0443e6a..0000000
+++ /dev/null
@@ -1,34 +0,0 @@
-#!/usr/bin/env python3
-
-import osmium
-import sys
-import datetime
-
-
-class Datecounter(osmium.SimpleHandler):
-
-    filedate = None
-
-    def date(self, o):
-        ts = o.timestamp
-        if self.filedate is None or ts > self.filedate:
-            self.filedate = ts
-
-    node = date
-    way = date
-    relation = date
-
-
-if __name__ == '__main__':
-    if len(sys.argv) != 2:
-        print("Usage: python osm_file_date.py <osmfile>")
-        sys.exit(-1)
-
-    h = Datecounter()
-
-    h.apply_file(sys.argv[1])
-
-    if h.filedate is None:
-        exit(5)
-
-    print(h.filedate)
diff --git a/utils/server_compare.php b/utils/server_compare.php
deleted file mode 100755 (executable)
index 39016d0..0000000
+++ /dev/null
@@ -1,68 +0,0 @@
-#!/usr/bin/php -Cq
-<?php
-
-$sFile = 'sample.log.txt'; // Apache log file
-$sHost1 = 'http://mq-open-search-lm02.ihost.aol.com:8000/nominatim/v1';
-$sHost2 = 'http://mq-open-search-lm03.ihost.aol.com:8000/nominatim/v1';
-
-
-$sHost1Escaped = str_replace('/', '\\/', $sHost1);
-$sHost2Escaped = str_replace('/', '\\/', $sHost2);
-
-$aToDo = array(251, 293, 328, 399.1, 455.1, 479, 496, 499, 574, 609, 702, 790, 846, 865, 878, 894, 902, 961, 980);
-
-$hFile = @fopen($sFile, 'r');
-if (!$hFile) {
-    echo "Unable to open file: $sFile\n";
-    exit;
-}
-
-$i = 0;
-while (($sLine = fgets($hFile, 10000)) !== false) {
-    $i++;
-    if (!in_array($i, $aToDo)) continue;
-
-    if (preg_match('#"GET (.*) HTTP/1.[01]"#', $sLine, $aResult)) {
-        $sURL1 = $sHost1.$aResult[1];
-        $sURL2 = $sHost2.$aResult[1];
-
-        $sRes1 = '';
-        $k = 0;
-        while (!$sRes1 && $k < 10) {
-            $sRes1 = file_get_contents($sURL1);
-            $k++;
-            if (!$sRes1) sleep(10);
-        }
-        $sRes2 = file_get_contents($sURL2);
-
-        // Strip out the things that will always change
-        $sRes1 =  preg_replace('# timestamp=\'[^\']*\'#', '', $sRes1);
-        $sRes1 =  str_replace($sHost1, '', $sRes1);
-        $sRes1 =  str_replace($sHost1Escaped, '', $sRes1);
-        $sRes2 =  preg_replace('# timestamp=\'[^\']*\'#', '', $sRes2);
-        $sRes2 =  str_replace($sHost2, '', $sRes2);
-        $sRes2 =  str_replace($sHost2Escaped, '', $sRes2);
-
-        if ($sRes1 != $sRes2) {
-            echo "$i:\n";
-            var_dump($sURL1, $sURL2);
-
-            $sRes = $sURL1.":\n";
-            for ($j = 0; $j < strlen($sRes1); $j+=40) {
-                $sRes .= substr($sRes1, $j, 40)."\n";
-            }
-            file_put_contents('log/'.$i.'.1', $sRes);
-
-            $sRes = $sURL2.":\n";
-            for ($j = 0; $j < strlen($sRes2); $j+=40) {
-                $sRes .= substr($sRes2, $j, 40)."\n";
-            }
-            file_put_contents('log/'.$i.'.2', $sRes);
-        }
-        echo ".\n";
-    } else {
-        var_dump($sLine);
-    }
-}
-
-fclose($hFile);