]> git.openstreetmap.org Git - nominatim.git/commitdiff
port check-database function to python
authorSarah Hoffmann <lonvia@denofr.de>
Thu, 18 Feb 2021 16:32:30 +0000 (17:32 +0100)
committerSarah Hoffmann <lonvia@denofr.de>
Thu, 18 Feb 2021 16:32:30 +0000 (17:32 +0100)
This change also adapts the hints to use the nominatim tool.
Slightly changed checks, so that they are just as effective on
a frozen database.

lib-php/DB.php
lib-php/admin/check_import_finished.php
nominatim/clicmd/admin.py
nominatim/db/connection.py
nominatim/tools/check_database.py [new file with mode: 0644]
test/php/Nominatim/DBTest.php
test/python/test_cli.py

index c25c5ca921ae22152a28140ec1cb1540ce8d7e79..abd23179526ef230b429698764fd29abc4e7762f 100644 (file)
@@ -252,76 +252,6 @@ class DB
         return $this->exec('DROP TABLE IF EXISTS '.$sTableName.' CASCADE') == 0;
     }
 
-    /**
-    * Check if an index exists in the database. Optional filtered by tablename
-    *
-    * @param string  $sTableName
-    *
-    * @return boolean
-    */
-    public function indexExists($sIndexName, $sTableName = null)
-    {
-        return in_array($sIndexName, $this->getListOfIndices($sTableName));
-    }
-
-    /**
-    * Returns a list of index names in the database, optional filtered by tablename
-    *
-    * @param string  $sTableName
-    *
-    * @return array
-    */
-    public function getListOfIndices($sTableName = null)
-    {
-        //  table_name            | index_name                      | column_name
-        // -----------------------+---------------------------------+--------------
-        //  country_name          | idx_country_name_country_code   | country_code
-        //  country_osm_grid      | idx_country_osm_grid_geometry   | geometry
-        //  import_polygon_delete | idx_import_polygon_delete_osmid | osm_id
-        //  import_polygon_delete | idx_import_polygon_delete_osmid | osm_type
-        //  import_polygon_error  | idx_import_polygon_error_osmid  | osm_id
-        //  import_polygon_error  | idx_import_polygon_error_osmid  | osm_type
-        $sSql = <<< END
-SELECT
-    t.relname as table_name,
-    i.relname as index_name,
-    a.attname as column_name
-FROM
-    pg_class t,
-    pg_class i,
-    pg_index ix,
-    pg_attribute a
-WHERE
-    t.oid = ix.indrelid
-    and i.oid = ix.indexrelid
-    and a.attrelid = t.oid
-    and a.attnum = ANY(ix.indkey)
-    and t.relkind = 'r'
-    and i.relname NOT LIKE 'pg_%'
-    FILTERS
- ORDER BY
-    t.relname,
-    i.relname,
-    a.attname
-END;
-
-        $aRows = null;
-        if ($sTableName) {
-            $sSql = str_replace('FILTERS', 'and t.relname = :tablename', $sSql);
-            $aRows = $this->getAll($sSql, array(':tablename' => $sTableName));
-        } else {
-            $sSql = str_replace('FILTERS', '', $sSql);
-            $aRows = $this->getAll($sSql);
-        }
-
-        $aIndexNames = array_unique(array_map(function ($aRow) {
-            return $aRow['index_name'];
-        }, $aRows));
-        sort($aIndexNames);
-
-        return $aIndexNames;
-    }
-
     /**
      * Tries to connect to the database but on failure doesn't throw an exception.
      *
index f189fc9af0a49588447bef74f2521390e09f9adb..a1811af39bad2c9b0e670df7ceb3b8613a40fde7 100644 (file)
@@ -5,197 +5,7 @@ require_once(CONST_LibDir.'/init-cmd.php');
 
 loadSettings(getcwd());
 
-$term_colors = array(
-                'green' => "\033[92m",
-                'red' => "\x1B[31m",
-                'normal' => "\033[0m"
-);
+(new \Nominatim\Shell(getSetting('NOMINATIM_TOOL')))
+    ->addParams('admin', '--check-database')
+    ->run();
 
-$print_success = function ($message = 'OK') use ($term_colors) {
-    echo $term_colors['green'].$message.$term_colors['normal']."\n";
-};
-
-$print_fail = function ($message = 'Failed') use ($term_colors) {
-    echo $term_colors['red'].$message.$term_colors['normal']."\n";
-};
-
-$oDB = new Nominatim\DB;
-
-
-function isReverseOnlyInstallation()
-{
-    global $oDB;
-    return !$oDB->tableExists('search_name');
-}
-
-// Check (guess) if the setup.php included --drop
-function isNoUpdateInstallation()
-{
-    global $oDB;
-    return $oDB->tableExists('placex') && !$oDB->tableExists('planet_osm_rels') ;
-}
-
-
-echo 'Checking database got created ... ';
-if ($oDB->checkConnection()) {
-    $print_success();
-} else {
-    $print_fail();
-    echo <<< END
-    Hints:
-    * Is the database server started?
-    * Check the NOMINATIM_DATABASE_DSN variable in your local .env
-    * Try connecting to the database with the same settings
-
-END;
-    exit(1);
-}
-
-
-echo 'Checking nominatim.so module installed ... ';
-$sStandardWord = $oDB->getOne("SELECT make_standard_name('a')");
-if ($sStandardWord === 'a') {
-    $print_success();
-} else {
-    $print_fail();
-    echo <<< END
-    The Postgresql extension nominatim.so was not found in the database.
-    Hints:
-    * Check the output of the CMmake/make installation step
-    * Does nominatim.so exist?
-    * Does nominatim.so exist on the database server?
-    * Can nominatim.so be accessed by the database user?
-
-END;
-    exit(1);
-}
-
-if (!isNoUpdateInstallation()) {
-    echo 'Checking place table ... ';
-    if ($oDB->tableExists('place')) {
-        $print_success();
-    } else {
-        $print_fail();
-        echo <<< END
-        * The import didn't finish.
-        Hints:
-        * Check the output of the utils/setup.php you ran.
-        Usually the osm2pgsql step failed. Check for errors related to
-        * the file you imported not containing any places
-        * harddrive full
-        * out of memory (RAM)
-        * osm2pgsql killed by other scripts, for consuming to much memory
-
-    END;
-        exit(1);
-    }
-}
-
-
-echo 'Checking indexing status ... ';
-$iUnindexed = $oDB->getOne('SELECT count(*) FROM placex WHERE indexed_status > 0');
-if ($iUnindexed == 0) {
-    $print_success();
-} else {
-    $print_fail();
-    echo <<< END
-    The indexing didn't finish. There is still $iUnindexed places. See the
-    question 'Can a stopped/killed import process be resumed?' in the
-    troubleshooting guide.
-
-END;
-    exit(1);
-}
-
-echo "Search index creation\n";
-$aExpectedIndices = array(
-    // sql/indices.src.sql
-    'idx_word_word_id',
-    'idx_place_addressline_address_place_id',
-    'idx_placex_rank_search',
-    'idx_placex_rank_address',
-    'idx_placex_parent_place_id',
-    'idx_placex_geometry_reverse_lookuppolygon',
-    'idx_placex_geometry_reverse_placenode',
-    'idx_osmline_parent_place_id',
-    'idx_osmline_parent_osm_id',
-    'idx_postcode_id',
-    'idx_postcode_postcode'
-);
-if (!isReverseOnlyInstallation()) {
-    $aExpectedIndices = array_merge($aExpectedIndices, array(
-        // sql/indices_search.src.sql
-        'idx_search_name_nameaddress_vector',
-        'idx_search_name_name_vector',
-        'idx_search_name_centroid'
-    ));
-}
-if (!isNoUpdateInstallation()) {
-    $aExpectedIndices = array_merge($aExpectedIndices, array(
-        'idx_placex_pendingsector',
-        'idx_location_area_country_place_id',
-        'idx_place_osm_unique',
-    ));
-}
-
-foreach ($aExpectedIndices as $sExpectedIndex) {
-    echo "Checking index $sExpectedIndex ... ";
-    if ($oDB->indexExists($sExpectedIndex)) {
-        $print_success();
-    } else {
-        $print_fail();
-        echo <<< END
-        Hints:
-        * Run './utils/setup.php --create-search-indices --ignore-errors' to
-          create missing indices.
-
-END;
-        exit(1);
-    }
-}
-
-echo 'Checking search indices are valid ... ';
-$sSQL = <<< END
-    SELECT relname
-    FROM pg_class, pg_index
-    WHERE pg_index.indisvalid = false
-      AND pg_index.indexrelid = pg_class.oid;
-END;
-$aInvalid = $oDB->getCol($sSQL);
-if (empty($aInvalid)) {
-    $print_success();
-} else {
-    $print_fail();
-    echo <<< END
-    At least one index is invalid. That can happen, e.g. when index creation was
-    disrupted and later restarted. You should delete the affected indices and
-    run the index stage of setup again.
-    See the question 'Can a stopped/killed import process be resumed?' in the
-    troubleshooting guide.
-    Affected indices: 
-END;
-    echo join(', ', $aInvalid) . "\n";
-    exit(1);
-}
-
-
-
-if (getSettingBool('USE_US_TIGER_DATA')) {
-    echo 'Checking TIGER table exists ... ';
-    if ($oDB->tableExists('location_property_tiger')) {
-        $print_success();
-    } else {
-        $print_fail();
-        echo <<< END
-        Table 'location_property_tiger' does not exist. Run the TIGER data
-        import again.
-
-END;
-        exit(1);
-    }
-}
-
-
-
-
-exit(0);
index 8d34f3869be62e6e0b29f08141a40553d77269a9..e58635756b2de0e2649b84877e69f4d558a37b68 100644 (file)
@@ -1,6 +1,8 @@
 """
 Implementation of the 'admin' subcommand.
 """
+import logging
+
 from ..tools.exec_utils import run_legacy_script
 from ..db.connection import connect
 
@@ -9,6 +11,8 @@ from ..db.connection import connect
 # Using non-top-level imports to avoid eventually unused imports.
 # pylint: disable=E0012,C0415
 
+LOG = logging.getLogger()
+
 class AdminFuncs:
     """\
     Analyse and maintain the database.
@@ -39,14 +43,17 @@ class AdminFuncs:
 
     @staticmethod
     def run(args):
-        from ..tools import admin
         if args.warm:
             AdminFuncs._warm(args)
 
         if args.check_database:
-            run_legacy_script('check_import_finished.php', nominatim_env=args)
+            LOG.warning('Checking database')
+            from ..tools import check_database
+            return check_database.check_database(args.config)
 
         if args.analyse_indexing:
+            LOG.warning('Analysing performance of indexing function')
+            from ..tools import admin
             conn = connect(args.config.get_libpq_dsn())
             admin.analyse_indexing(conn, osm_id=args.osm_id, place_id=args.place_id)
             conn.close()
@@ -56,6 +63,7 @@ class AdminFuncs:
 
     @staticmethod
     def _warm(args):
+        LOG.warning('Warming database caches')
         params = ['warm.php']
         if args.target == 'reverse':
             params.append('--reverse-only')
index c7e22c98e500c7d53936fad88fa77a8bdd3fad7f..b941f46f56c63c74444506dec457bf09a8999c07 100644 (file)
@@ -7,6 +7,8 @@ import psycopg2
 import psycopg2.extensions
 import psycopg2.extras
 
+from ..errors import UsageError
+
 class _Cursor(psycopg2.extras.DictCursor):
     """ A cursor returning dict-like objects and providing specialised
         execution functions.
@@ -42,14 +44,34 @@ class _Connection(psycopg2.extensions.connection):
         """
         return super().cursor(cursor_factory=cursor_factory, **kwargs)
 
+
     def table_exists(self, table):
         """ Check that a table with the given name exists in the database.
         """
         with self.cursor() as cur:
             num = cur.scalar("""SELECT count(*) FROM pg_tables
-                                WHERE tablename = %s""", (table, ))
+                                WHERE tablename = %s and schemaname = 'public'""", (table, ))
             return num == 1
 
+
+    def index_exists(self, index, table=None):
+        """ Check that an index with the given name exists in the database.
+            If table is not None then the index must relate to the given
+            table.
+        """
+        with self.cursor() as cur:
+            cur.execute("""SELECT tablename FROM pg_indexes
+                           WHERE indexname = %s and schemaname = 'public'""", (index, ))
+            if cur.rowcount == 0:
+                return False
+
+            if table is not None:
+                row = cur.fetchone()
+                return row[0] == table
+
+        return True
+
+
     def server_version_tuple(self):
         """ Return the server version as a tuple of (major, minor).
             Converts correctly for pre-10 and post-10 PostgreSQL versions.
@@ -64,4 +86,7 @@ def connect(dsn):
     """ Open a connection to the database using the specialised connection
         factory.
     """
-    return psycopg2.connect(dsn, connection_factory=_Connection)
+    try:
+        return psycopg2.connect(dsn, connection_factory=_Connection)
+    except psycopg2.OperationalError as err:
+        raise UsageError("Cannot connect to database: {}".format(err)) from err
diff --git a/nominatim/tools/check_database.py b/nominatim/tools/check_database.py
new file mode 100644 (file)
index 0000000..e99a357
--- /dev/null
@@ -0,0 +1,269 @@
+"""
+Collection of functions that check if the database is complete and functional.
+"""
+from enum import Enum
+from textwrap import dedent
+
+import psycopg2
+
+from ..db.connection import connect
+from ..errors import UsageError
+
+CHECKLIST = []
+
+class CheckState(Enum):
+    """ Possible states of a check. FATAL stops check execution entirely.
+    """
+    OK = 0
+    FAIL = 1
+    FATAL = 2
+    NOT_APPLICABLE = 3
+
+def _check(hint=None):
+    """ Decorator for checks. It adds the function to the list of
+        checks to execute and adds the code for printing progress messages.
+    """
+    def decorator(func):
+        title = func.__doc__.split('\n', 1)[0].strip()
+        def run_check(conn, config):
+            print(title, end=' ... ')
+            ret = func(conn, config)
+            if isinstance(ret, tuple):
+                ret, params = ret
+            else:
+                params = {}
+            if ret == CheckState.OK:
+                print('\033[92mOK\033[0m')
+            elif ret == CheckState.NOT_APPLICABLE:
+                print('not applicable')
+            else:
+                print('\x1B[31mFailed\033[0m')
+                if hint:
+                    print(dedent(hint.format(**params)))
+            return ret
+
+        CHECKLIST.append(run_check)
+        return run_check
+
+    return decorator
+
+class _BadConnection: # pylint: disable=R0903
+
+    def __init__(self, msg):
+        self.msg = msg
+
+    def close(self):
+        """ Dummy function to provide the implementation.
+        """
+
+def check_database(config):
+    """ Run a number of checks on the database and return the status.
+    """
+    try:
+        conn = connect(config.get_libpq_dsn())
+    except UsageError as err:
+        conn = _BadConnection(str(err))
+
+    overall_result = 0
+    for check in CHECKLIST:
+        ret = check(conn, config)
+        if ret == CheckState.FATAL:
+            conn.close()
+            return 1
+        if ret != CheckState.OK:
+            overall_result = 1
+
+    conn.close()
+    return overall_result
+
+
+def _get_indexes(conn):
+    indexes = ['idx_word_word_id',
+               'idx_place_addressline_address_place_id',
+               'idx_placex_rank_search',
+               'idx_placex_rank_address',
+               'idx_placex_parent_place_id',
+               'idx_placex_geometry_reverse_lookuppolygon',
+               'idx_placex_geometry_reverse_placenode',
+               'idx_osmline_parent_place_id',
+               'idx_osmline_parent_osm_id',
+               'idx_postcode_id',
+               'idx_postcode_postcode'
+              ]
+    if conn.table_exists('search_name'):
+        indexes.extend(('idx_search_name_nameaddress_vector',
+                        'idx_search_name_name_vector',
+                        'idx_search_name_centroid'))
+    if conn.table_exists('place'):
+        indexes.extend(('idx_placex_pendingsector',
+                        'idx_location_area_country_place_id',
+                        'idx_place_osm_unique'
+                       ))
+
+    return indexes
+
+
+### CHECK FUNCTIONS
+#
+# Functions are exectured in the order they appear here.
+
+@_check(hint="""\
+             {error}
+
+             Hints:
+             * Is the database server started?
+             * Check the NOMINATIM_DATABASE_DSN variable in your local .env
+             * Try connecting to the database with the same settings
+
+             Project directory: {config.project_dir}
+             Current setting of NOMINATIM_DATABASE_DSN: {config.DATABASE_DSN}
+             """)
+def check_connection(conn, config):
+    """ Checking database connection
+    """
+    if isinstance(conn, _BadConnection):
+        return CheckState.FATAL, dict(error=conn.msg, config=config)
+
+    return CheckState.OK
+
+@_check(hint="""\
+             placex table not found
+
+             Hints:
+             * Are you connecting to the right database?
+             * Did the import process finish without errors?
+
+             Project directory: {config.project_dir}
+             Current setting of NOMINATIM_DATABASE_DSN: {config.DATABASE_DSN}
+             """)
+def check_placex_table(conn, config):
+    """ Checking for placex table
+    """
+    if conn.table_exists('placex'):
+        return CheckState.OK
+
+    return CheckState.FATAL, dict(config=config)
+
+
+@_check(hint="""placex table has no data. Did the import finish sucessfully?""")
+def check_placex_size(conn, config): # pylint: disable=W0613
+    """ Checking for placex content
+    """
+    with conn.cursor() as cur:
+        cnt = cur.scalar('SELECT count(*) FROM (SELECT * FROM placex LIMIT 100) x')
+
+    return CheckState.OK if cnt > 0 else CheckState.FATAL
+
+@_check(hint="""\
+             The Postgresql extension nominatim.so was not correctly loaded.
+
+             Error: {error}
+
+             Hints:
+             * Check the output of the CMmake/make installation step
+             * Does nominatim.so exist?
+             * Does nominatim.so exist on the database server?
+             * Can nominatim.so be accessed by the database user?
+             """)
+def check_module(conn, config): # pylint: disable=W0613
+    """ Checking that nominatim.so module is installed
+    """
+    with conn.cursor() as cur:
+        try:
+            out = cur.scalar("SELECT make_standard_name('a')")
+        except psycopg2.ProgrammingError as err:
+            return CheckState.FAIL, dict(error=str(err))
+
+        if out != 'a':
+            return CheckState.FAIL, dict(error='Unexpected result for make_standard_name()')
+
+        return CheckState.OK
+
+
+@_check(hint="""\
+             The indexing didn't finish. {count} entries are not yet indexed.
+
+             To index the remaining entries, run:   {index_cmd}
+             """)
+def check_indexing(conn, config): # pylint: disable=W0613
+    """ Checking indexing status
+    """
+    with conn.cursor() as cur:
+        cnt = cur.scalar('SELECT count(*) FROM placex WHERE indexed_status > 0')
+
+    if cnt == 0:
+        return CheckState.OK
+
+    if conn.index_exists('idx_word_word_id'):
+        # Likely just an interrupted update.
+        index_cmd = 'nominatim index'
+    else:
+        # Looks like the import process got interupted.
+        index_cmd = 'nominatim import --continue indexing'
+
+    return CheckState.FAIL, dict(count=cnt, index_cmd=index_cmd)
+
+
+
+@_check(hint="""\
+             The following indexes are missing:
+               {indexes}
+
+             Rerun the index creation with:   nominatim import --continue db-postprocess
+             """)
+def check_database_indexes(conn, config): # pylint: disable=W0613
+    """ Checking that database indexes are complete
+    """
+    missing = []
+    for index in _get_indexes(conn):
+        if not conn.index_exists(index):
+            missing.append(index)
+
+    if missing:
+        return CheckState.FAIL, dict(indexes='\n  '.join(missing))
+
+    return CheckState.OK
+
+
+@_check(hint="""\
+             At least one index is invalid. That can happen, e.g. when index creation was
+             disrupted and later restarted. You should delete the affected indices
+             and recreate them.
+
+             Invalid indexes:
+               {indexes}
+             """)
+def check_database_index_valid(conn, config): # pylint: disable=W0613
+    """ Checking that all database indexes are valid
+    """
+    with conn.cursor() as cur:
+        cur.execute(""" SELECT relname FROM pg_class, pg_index
+                        WHERE pg_index.indisvalid = false
+                        AND pg_index.indexrelid = pg_class.oid""")
+
+        broken = list(cur)
+
+    if broken:
+        return CheckState.FAIL, dict(indexes='\n  '.join(broken))
+
+    return CheckState.OK
+
+
+@_check(hint="""\
+             {error}
+             Run TIGER import again:   nominatim add-data --tiger-data <DIR>
+             """)
+def check_tiger_table(conn, config):
+    """ Checking TIGER external data table.
+    """
+    if not config.get_bool('USE_US_TIGER_DATA'):
+        return CheckState.NOT_APPLICABLE
+
+    if not conn.table_exists('location_property_tiger'):
+        return CheckState.FAIL, dict(error='TIGER data table not found.')
+
+    with conn.cursor() as cur:
+        if cur.scalar('SELECT count(*) FROM location_property_tiger') == 0:
+            return CheckState.FAIL, dict(error='TIGER data table is empty.')
+
+    return CheckState.OK
index c5f4f3954d39df65d4cd37ffc29fb66f808db672..1a2ecc861fe0ea68d15cb33a176692b5e1a37838 100644 (file)
@@ -164,14 +164,6 @@ class DBTest extends \PHPUnit\Framework\TestCase
             $this->assertTrue($oDB->tableExists('table1'));
             $this->assertFalse($oDB->tableExists('table99'));
             $this->assertFalse($oDB->tableExists(null));
-
-            $this->assertEmpty($oDB->getListOfIndices());
-            $oDB->exec('CREATE UNIQUE INDEX table1_index ON table1 (id)');
-            $this->assertEquals(
-                array('table1_index'),
-                $oDB->getListOfIndices()
-            );
-            $this->assertEmpty($oDB->getListOfIndices('table2'));
         }
 
         # select queries
index e1df94785f907186c43d509e652dc57a98273ced..10e57f8b7fbbaf83f3b07b9fb472331fb7d95401 100644 (file)
@@ -15,9 +15,11 @@ import nominatim.clicmd.api
 import nominatim.clicmd.refresh
 import nominatim.clicmd.admin
 import nominatim.indexer.indexer
+import nominatim.tools.admin
+import nominatim.tools.check_database
+import nominatim.tools.freeze
 import nominatim.tools.refresh
 import nominatim.tools.replication
-import nominatim.tools.freeze
 from nominatim.errors import UsageError
 from nominatim.db import status
 
@@ -95,8 +97,7 @@ def test_freeze_command(mock_func_factory, temp_db):
 
 @pytest.mark.parametrize("params", [('--warm', ),
                                     ('--warm', '--reverse-only'),
-                                    ('--warm', '--search-only'),
-                                    ('--check-database', )])
+                                    ('--warm', '--search-only')])
 def test_admin_command_legacy(mock_func_factory, params):
     mock_run_legacy = mock_func_factory(nominatim.clicmd.admin, 'run_legacy_script')
 
@@ -104,6 +105,7 @@ def test_admin_command_legacy(mock_func_factory, params):
 
     assert mock_run_legacy.called == 1
 
+
 @pytest.mark.parametrize("func, params", [('analyse_indexing', ('--analyse-indexing', ))])
 def test_admin_command_tool(temp_db, mock_func_factory, func, params):
     mock = mock_func_factory(nominatim.tools.admin, func)
@@ -111,6 +113,14 @@ def test_admin_command_tool(temp_db, mock_func_factory, func, params):
     assert 0 == call_nominatim('admin', *params)
     assert mock.called == 1
 
+
+def test_admin_command_check_database(mock_func_factory):
+    mock = mock_func_factory(nominatim.tools.check_database, 'check_database')
+
+    assert 0 == call_nominatim('admin', '--check-database')
+    assert mock.called == 1
+
+
 @pytest.mark.parametrize("name,oid", [('file', 'foo.osm'), ('diff', 'foo.osc'),
                                       ('node', 12), ('way', 8), ('relation', 32)])
 def test_add_data_command(mock_run_legacy, name, oid):