]> git.openstreetmap.org Git - nominatim.git/commitdiff
Ported tiger-data-import to python and Added Tarball Support
authorDarkshredder <srivastavayash58@gmail.com>
Mon, 8 Mar 2021 16:27:56 +0000 (21:57 +0530)
committerDarkshredder <srivastavayash58@gmail.com>
Mon, 8 Mar 2021 16:27:56 +0000 (21:57 +0530)
lib-sql/tiger_import_finish.sql
nominatim/cli.py
nominatim/clicmd/transition.py
nominatim/tools/tiger_data.py [new file with mode: 0644]

index 374c00b39bcaa5bfa37aa1a66b703c89e61583f1..39ab1ae36f6394e7fdc5ee8d93a39ad172bcf6d3 100644 (file)
@@ -1,13 +1,15 @@
 --index only on parent_place_id
-CREATE INDEX idx_location_property_tiger_parent_place_id_imp ON location_property_tiger_import (parent_place_id) {ts:aux-index};
-CREATE UNIQUE INDEX idx_location_property_tiger_place_id_imp ON location_property_tiger_import (place_id) {ts:aux-index};
+CREATE INDEX {{sql.if_index_not_exists}} idx_location_property_tiger_place_id_imp
+  ON location_property_tiger_import (parent_place_id) {{db.tablespace.aux_index}};
+CREATE UNIQUE INDEX {{sql.if_index_not_exists}} idx_location_property_tiger_place_id_imp
+  ON location_property_tiger_import (place_id) {{db.tablespace.aux_index}};
 
-GRANT SELECT ON location_property_tiger_import TO "{www-user}";
+GRANT SELECT ON location_property_tiger_import TO "{{config.DATABASE_WEBUSER}}";
 
 DROP TABLE IF EXISTS location_property_tiger;
 ALTER TABLE location_property_tiger_import RENAME TO location_property_tiger;
 
-ALTER INDEX idx_location_property_tiger_parent_place_id_imp RENAME TO idx_location_property_tiger_housenumber_parent_place_id;
-ALTER INDEX idx_location_property_tiger_place_id_imp RENAME TO idx_location_property_tiger_place_id;
+ALTER INDEX IF EXISTS idx_location_property_tiger_parent_place_id_imp RENAME TO idx_location_property_tiger_housenumber_parent_place_id;
+ALTER INDEX IF EXISTS idx_location_property_tiger_place_id_imp RENAME TO idx_location_property_tiger_place_id;
 
 DROP FUNCTION tiger_line_import (linegeo geometry, in_startnumber integer, in_endnumber integer, interpolationtype text, in_street text, in_isin text, in_postcode text);
index 7459711f9b43dd1c8e370c209d14e6c38989fa6d..e584e9d9889f1e8c3fa86f5594f71d000f35ef1c 100644 (file)
@@ -13,6 +13,7 @@ from .tools.exec_utils import run_legacy_script, run_php_server
 from .errors import UsageError
 from . import clicmd
 from .clicmd.args import NominatimArgs
+from .tools import tiger_data
 
 LOG = logging.getLogger()
 
@@ -166,8 +167,11 @@ class UpdateAddData:
     @staticmethod
     def run(args):
         if args.tiger_data:
-            os.environ['NOMINATIM_TIGER_DATA_PATH'] = args.tiger_data
-            return run_legacy_script('setup.php', '--import-tiger-data', nominatim_env=args)
+            return tiger_data.add_tiger_data(args.config.get_libpq_dsn(),
+                                      args.tiger_data,
+                                      args.threads or 1,
+                                      args.config,
+                                      args.sqllib_dir)
 
         params = ['update.php']
         if args.file:
index b8db1a38ee745fd2062f2dd63aa49d21707e3710..efce1face0ebadef877d38827855de3995560ca2 100644 (file)
@@ -58,10 +58,12 @@ class AdminTransition:
                            help="Ignore certain erros on import.")
         group.add_argument('--reverse-only', action='store_true',
                            help='Do not create search tables and indexes')
+        group.add_argument('--tiger-data', metavar='FILE',
+                           help='File to import')
 
     @staticmethod
     def run(args):
-        from ..tools import database_import
+        from ..tools import database_import, tiger_data
         from ..tools import refresh
 
         if args.create_db:
@@ -127,3 +129,11 @@ class AdminTransition:
             LOG.warning('Create Search indices')
             with connect(args.config.get_libpq_dsn()) as conn:
                 database_import.create_search_indices(conn, args.config, args.sqllib_dir, args.drop)
+        
+        if args.tiger_data:
+            LOG.warning('Tiger data')
+            tiger_data.add_tiger_data(args.config.get_libpq_dsn(),
+                                      args.tiger_data,
+                                      args.threads or 1,
+                                      args.config,
+                                      args.sqllib_dir)
diff --git a/nominatim/tools/tiger_data.py b/nominatim/tools/tiger_data.py
new file mode 100644 (file)
index 0000000..521d11c
--- /dev/null
@@ -0,0 +1,96 @@
+"""
+Functions for setting up and importing a new Nominatim database.
+"""
+import logging
+import os
+import time
+import tarfile
+import selectors
+
+from ..db.connection import connect
+from ..db.async_connection import DBConnection
+from ..db.sql_preprocessor import SQLPreprocessor
+
+LOG = logging.getLogger()
+
+
+def add_tiger_data(dsn, data_dir, threads, config, sqllib_dir):
+    """ Import tiger data from directory or tar file
+    """
+    # Handling directory or tarball file.
+    is_tarfile = False
+    if(data_dir.endswith('.tar.gz')):
+        is_tarfile = True
+        tar = tarfile.open(data_dir)
+        sql_files = [i for i in tar.getmembers() if i.name.endswith('.sql')]
+        LOG.warning(f'Found {len(sql_files)} SQL files in tarfile with path {data_dir}')
+        if(not len(sql_files)):
+            LOG.warning(f'Tiger data import selected but no files found in tarfile with path {data_dir}')
+            return
+    else:
+        files = os.listdir(data_dir)
+        sql_files = [i for i in files if i.endswith('.sql')]
+        LOG.warning(f'Found {len(sql_files)} SQL files in path {data_dir}')
+        if(not len(sql_files)):
+            LOG.warning(f'Tiger data import selected but no files found in path {data_dir}')
+            return
+    
+    with connect(dsn) as conn:
+        sql = SQLPreprocessor(conn, config, sqllib_dir)
+        sql.run_sql_file(conn, 'tiger_import_start.sql')
+
+    # Reading sql_files and then for each file line handling
+    # sql_query in <threads - 1> chunks.
+    sel = selectors.DefaultSelector()
+    place_threads = max(1, threads - 1)
+    for sql_file in sql_files:
+        if(not is_tarfile):
+            file_path = os.path.join(data_dir, sql_file)
+            file = open(file_path)
+        else:
+            file = tar.extractfile(sql_file)
+        lines = 0
+        end_of_file = False
+        total_used_threads = place_threads
+        while(True):
+            if(end_of_file):
+                break
+            for imod in range(place_threads):
+                conn = DBConnection(dsn)
+                conn.connect()
+
+                sql_query = file.readline()
+                lines+=1
+
+                if(not sql_query):
+                    end_of_file = True
+                    total_used_threads = imod
+                    break
+
+                conn.perform(sql_query)
+                sel.register(conn, selectors.EVENT_READ, conn)
+
+                if(lines==1000):
+                    print('. ', end='', flush=True)
+                    lines=0
+
+            todo = min(place_threads,total_used_threads)
+            while todo > 0:
+                for key, _ in sel.select(1):
+                    try:
+                        conn = key.data
+                        sel.unregister(conn)
+                        conn.wait()
+                        conn.close()
+                        todo -= 1
+                    except:
+                        todo -=1
+
+    if(is_tarfile):
+        tar.close()
+    print('\n')
+    LOG.warning("Creating indexes on Tiger data")
+    with connect(dsn) as conn:
+        sql = SQLPreprocessor(conn, config, sqllib_dir)
+        sql.run_sql_file(conn, 'tiger_import_finish.sql')
+    
\ No newline at end of file