move postcode normalization into tokenizer

author Sarah Hoffmann <lonvia@denofr.de>

Sun, 25 Apr 2021 16:26:36 +0000 (18:26 +0200)

committer Sarah Hoffmann <lonvia@denofr.de>

Fri, 30 Apr 2021 09:30:51 +0000 (11:30 +0200)
author Sarah Hoffmann <lonvia@denofr.de>
Sun, 25 Apr 2021 16:26:36 +0000 (18:26 +0200)
committer Sarah Hoffmann <lonvia@denofr.de>
Fri, 30 Apr 2021 09:30:51 +0000 (11:30 +0200)
diff --git a/lib-sql/functions/interpolation.sql b/lib-sql/functions/interpolation.sql

index a797cad3ac1de74b3500eb53d90bbd2111b826f1..8bfc307b7d9f1dd2147f190cb3a168695118fecd 100644 (file)
--- a/lib-sql/functions/interpolation.sql
+++ b/lib-sql/functions/interpolation.sql
@@ -12,39 +12,47 @@ $$
  LANGUAGE plpgsql IMMUTABLE;
  
  
+CREATE OR REPLACE FUNCTION get_interpolation_address(in_address HSTORE, wayid BIGINT)
+RETURNS HSTORE
+  AS $$
+DECLARE
+  location RECORD;
+  waynodes BIGINT[];
+BEGIN
+  IF akeys(in_address) != ARRAY['interpolation'] THEN
+    RETURN in_address;
+  END IF;
+
+  SELECT nodes INTO waynodes FROM planet_osm_ways WHERE id = wayid;
+  FOR location IN
+    SELECT placex.address, placex.osm_id FROM placex
+     WHERE osm_type = 'N' and osm_id = ANY(waynodes)
+           and placex.address is not null
+           and (placex.address ? 'street' or placex.address ? 'place')
+           and indexed_status < 100
+  LOOP
+    -- mark it as a derived address
+    RETURN location.address || in_address || hstore('_inherited', '');
+  END LOOP;
+
+  RETURN in_address;
+END;
+$$
+LANGUAGE plpgsql STABLE;
+
+
+
  -- find the parent road of the cut road parts
-CREATE OR REPLACE FUNCTION get_interpolation_parent(wayid BIGINT, street TEXT,
+CREATE OR REPLACE FUNCTION get_interpolation_parent(street TEXT,
                                                      place TEXT, partition SMALLINT,
                                                      centroid GEOMETRY, geom GEOMETRY)
    RETURNS BIGINT
    AS $$
  DECLARE
-  addr_street TEXT;
-  addr_place TEXT;
    parent_place_id BIGINT;
-
-  waynodes BIGINT[];
-
    location RECORD;
  BEGIN
-  addr_street = street;
-  addr_place = place;
-
-  IF addr_street is null and addr_place is null THEN
-    select nodes from planet_osm_ways where id = wayid INTO waynodes;
-    FOR location IN SELECT placex.address from placex
-                    where osm_type = 'N' and osm_id = ANY(waynodes)
-                          and placex.address is not null
-                          and (placex.address ? 'street' or placex.address ? 'place')
-                          and indexed_status < 100
-                    limit 1 LOOP
-      addr_street = location.address->'street';
-      addr_place = location.address->'place';
-    END LOOP;
-  END IF;
-
-  parent_place_id := find_parent_for_address(addr_street, addr_place,
-                                             partition, centroid);
+  parent_place_id := find_parent_for_address(street, place, partition, centroid);
  
    IF parent_place_id is null THEN
      FOR location IN SELECT place_id FROM placex
@@ -147,17 +155,20 @@ BEGIN
    NEW.interpolationtype = NEW.address->'interpolation';
  
    place_centroid := ST_PointOnSurface(NEW.linegeo);
-  NEW.parent_place_id = get_interpolation_parent(NEW.osm_id, NEW.address->'street',
+  NEW.parent_place_id = get_interpolation_parent(NEW.address->'street',
                                                   NEW.address->'place',
                                                   NEW.partition, place_centroid, NEW.linegeo);
  
    IF NEW.address is not NULL AND NEW.address ? 'postcode' AND NEW.address->'postcode' not similar to '%(,|;)%' THEN
      interpol_postcode := NEW.address->'postcode';
-    housenum := getorcreate_postcode_id(NEW.address->'postcode');
    ELSE
      interpol_postcode := NULL;
    END IF;
  
+  IF NEW.address ? '_inherited' THEN
+    NEW.address := hstore('interpolation', NEW.interpolationtype);
+  END IF;
+
    -- if the line was newly inserted, split the line as necessary
    IF OLD.indexed_status = 1 THEN
        select nodes from planet_osm_ways where id = NEW.osm_id INTO waynodes;
diff --git a/lib-sql/functions/placex_triggers.sql b/lib-sql/functions/placex_triggers.sql

index ca9ab5cc550f2889cc3b1653ed75a8aa2067e6ab..922d79bd51b3b0da6d9de7d7e7ee6eb147edcd3b 100644 (file)
--- a/lib-sql/functions/placex_triggers.sql
+++ b/lib-sql/functions/placex_triggers.sql
@@ -817,10 +817,6 @@ BEGIN
    IF NEW.address is not NULL THEN
        addr_street := NEW.address->'street';
        addr_place := NEW.address->'place';
-
-      IF NEW.address ? 'postcode' and NEW.address->'postcode' not similar to '%(:|,|;)%' THEN
-        i := getorcreate_postcode_id(NEW.address->'postcode');
-      END IF;
    END IF;
  
    NEW.postcode := null;
diff --git a/lib-sql/tokenizer/legacy_tokenizer.sql b/lib-sql/tokenizer/legacy_tokenizer.sql

index 916ba9aedc1e215906037be2bcf01ec7af583c28..4c22424ec74afa2504f51bbebdf9498fba2e740a 100644 (file)
--- a/lib-sql/tokenizer/legacy_tokenizer.sql
+++ b/lib-sql/tokenizer/legacy_tokenizer.sql
@@ -34,6 +34,13 @@ AS $$
  $$ LANGUAGE SQL IMMUTABLE STRICT;
  
  
+CREATE OR REPLACE FUNCTION token_normalized_postcode(postcode TEXT)
+  RETURNS TEXT
+AS $$
+  SELECT CASE WHEN postcode SIMILAR TO '%(,|;)%' THEN NULL ELSE upper(trim(postcode))END;
+$$ LANGUAGE SQL IMMUTABLE STRICT;
+
+
  -- Return token info that should be saved permanently in the database.
  CREATE OR REPLACE FUNCTION token_strip_info(info JSONB)
    RETURNS JSONB
@@ -133,26 +140,26 @@ $$
  LANGUAGE plpgsql;
  
  
-CREATE OR REPLACE FUNCTION getorcreate_postcode_id(postcode TEXT)
-  RETURNS INTEGER
+CREATE OR REPLACE FUNCTION create_postcode_id(postcode TEXT)
+  RETURNS BOOLEAN
    AS $$
  DECLARE
+  r RECORD;
    lookup_token TEXT;
-  lookup_word TEXT;
    return_word_id INTEGER;
  BEGIN
-  lookup_word := upper(trim(postcode));
-  lookup_token := ' ' || make_standard_name(lookup_word);
-  SELECT min(word_id) FROM word
-    WHERE word_token = lookup_token and word = lookup_word
+  lookup_token := ' ' || make_standard_name(postcode);
+  FOR r IN
+    SELECT word_id FROM word
+    WHERE word_token = lookup_token and word = postcode
            and class='place' and type='postcode'
-    INTO return_word_id;
-  IF return_word_id IS NULL THEN
-    return_word_id := nextval('seq_word');
-    INSERT INTO word VALUES (return_word_id, lookup_token, lookup_word,
-                             'place', 'postcode', null, 0);
-  END IF;
-  RETURN return_word_id;
+  LOOP
+    RETURN false;
+  END LOOP;
+
+  INSERT INTO word VALUES (nextval('seq_word'), lookup_token, postcode,
+                           'place', 'postcode', null, 0);
+  RETURN true;
  END;
  $$
  LANGUAGE plpgsql;
diff --git a/nominatim/clicmd/setup.py b/nominatim/clicmd/setup.py

index 499eff7673dc8f1d393eeb889c731eb980dc216f..1ce9cf3e0039642641c0c8ead82f52ca9b2b7d2c 100644 (file)
--- a/nominatim/clicmd/setup.py
+++ b/nominatim/clicmd/setup.py
@@ -116,7 +116,8 @@ class SetupAll:
  
          if args.continue_at is None or args.continue_at == 'load-data':
              LOG.warning('Calculate postcodes')
-            postcodes.import_postcodes(args.config.get_libpq_dsn(), args.project_dir)
+            postcodes.import_postcodes(args.config.get_libpq_dsn(), args.project_dir,
+                                       tokenizer)
  
          if args.continue_at is None or args.continue_at in ('load-data', 'indexing'):
              if args.continue_at is not None and args.continue_at != 'load-data':
diff --git a/nominatim/indexer/indexer.py b/nominatim/indexer/indexer.py

index ea7b0e5986e22c0fad8c9b826bb03b2af3d1a29c..d0c6ea0ca66c8c0927cbfb718e6cd7518bbe8238 100644 (file)
--- a/nominatim/indexer/indexer.py
+++ b/nominatim/indexer/indexer.py
@@ -147,7 +147,7 @@ class Indexer:
  
              if maxrank == 30:
                  self._index(runners.RankRunner(0, analyzer))
-                self._index(runners.InterpolationRunner(), 20)
+                self._index(runners.InterpolationRunner(analyzer), 20)
                  self._index(runners.RankRunner(30, analyzer), 20)
              else:
                  self._index(runners.RankRunner(maxrank, analyzer))
diff --git a/nominatim/indexer/runners.py b/nominatim/indexer/runners.py

index 2bf9e51632a3ba84940755d005481aab9a9eed0b..75429fe427428a9bc79b7c9ec9a4f491a873028c 100644 (file)
--- a/nominatim/indexer/runners.py
+++ b/nominatim/indexer/runners.py
@@ -25,7 +25,7 @@ class AbstractPlacexRunner:
                     SET indexed_status = 0, address = v.addr, token_info = v.ti
                     FROM (VALUES {}) as v(id, addr, ti)
                     WHERE place_id = v.id
-               """.format(','.join(["(%s, %s::hstore, %s::json)"]  * num_places))
+               """.format(','.join(["(%s, %s::hstore, %s::jsonb)"]  * num_places))
  
  
      def index_places(self, worker, places):
@@ -82,6 +82,10 @@ class InterpolationRunner:
          location_property_osmline.
      """
  
+    def __init__(self, analyzer):
+        self.analyzer = analyzer
+
+
      @staticmethod
      def name():
          return "interpolation lines (location_property_osmline)"
@@ -93,15 +97,30 @@ class InterpolationRunner:
  
      @staticmethod
      def sql_get_objects():
-        return """SELECT place_id FROM location_property_osmline
+        return """SELECT place_id, get_interpolation_address(address, osm_id) as address
+                  FROM location_property_osmline
                    WHERE indexed_status > 0
                    ORDER BY geometry_sector"""
  
+
      @staticmethod
-    def index_places(worker, ids):
-        worker.perform(""" UPDATE location_property_osmline
-                           SET indexed_status = 0 WHERE place_id IN ({})
-                       """.format(','.join((str(i[0]) for i in ids))))
+    @functools.lru_cache(maxsize=1)
+    def _index_sql(num_places):
+        return """ UPDATE location_property_osmline
+                   SET indexed_status = 0, address = v.addr, token_info = v.ti
+                   FROM (VALUES {}) as v(id, addr, ti)
+                   WHERE place_id = v.id
+               """.format(','.join(["(%s, %s::hstore, %s::jsonb)"]  * num_places))
+
+
+    def index_places(self, worker, places):
+        values = []
+        for place in places:
+            values.extend((place[x] for x in ('place_id', 'address')))
+            values.append(psycopg2.extras.Json(self.analyzer.process_place(place)))
+
+        worker.perform(self._index_sql(len(places)), values)
+
  
  
  class PostcodeRunner:
diff --git a/nominatim/tokenizer/legacy_tokenizer.py b/nominatim/tokenizer/legacy_tokenizer.py

index 6ffdc4ef46cc0269348428d8ba7abb76d872f3b0..b95a6208f3fac47e978669f38e476ef5c16fc49f 100644 (file)
--- a/nominatim/tokenizer/legacy_tokenizer.py
+++ b/nominatim/tokenizer/legacy_tokenizer.py
@@ -1,6 +1,7 @@
  """
  Tokenizer implementing normalisation as used before Nominatim 4.
  """
+from collections import OrderedDict
  import logging
  import re
  import shutil
@@ -213,6 +214,15 @@ class LegacyNameAnalyzer:
              self.conn.close()
              self.conn = None
  
+
+    def add_postcodes_from_db(self):
+        """ Add postcodes from the location_postcode table to the word table.
+        """
+        with self.conn.cursor() as cur:
+            cur.execute("""SELECT count(create_postcode_id(pc))
+                           FROM (SELECT distinct(postcode) as pc
+                                 FROM location_postcode) x""")
+
      def process_place(self, place):
          """ Determine tokenizer information about the given place.
  
@@ -226,11 +236,25 @@ class LegacyNameAnalyzer:
          address = place.get('address')
  
          if address:
+            self._add_postcode(address.get('postcode'))
              token_info.add_housenumbers(self.conn, address)
  
          return token_info.data
  
  
+    def _add_postcode(self, postcode):
+        """ Make sure the normalized postcode is present in the word table.
+        """
+        if not postcode or re.search(r'[:,;]', postcode) is not None:
+            return
+
+        def _create_postcode_from_db(pcode):
+            with self.conn.cursor() as cur:
+                cur.execute('SELECT create_postcode_id(%s)', (pcode, ))
+
+        self._cache.postcodes.get(postcode.strip().upper(), _create_postcode_from_db)
+
+
  class _TokenInfo:
      """ Collect token information to be sent back to the database.
      """
@@ -285,6 +309,32 @@ class _TokenInfo:
              self.data['hnr_tokens'], self.data['hnr'] = cur.fetchone()
  
  
+class _LRU:
+    """ Least recently used cache that accepts a generator function to
+        produce the item when there is a cache miss.
+    """
+
+    def __init__(self, maxsize=128):
+        self.data = OrderedDict()
+        self.maxsize = maxsize
+
+    def get(self, key, generator):
+        """ Get the item with the given key from the cache. If nothing
+            is found in the cache, generate the value through the
+            generator function and store it in the cache.
+        """
+        value = self.data.get(key)
+        if value is not None:
+            self.data.move_to_end(key)
+        else:
+            value = generator(key)
+            if len(self.data) >= self.maxsize:
+                self.data.popitem(last=False)
+            self.data[key] = value
+
+        return value
+
+
  class _TokenCache:
      """ Cache for token information to avoid repeated database queries.
  
@@ -292,6 +342,9 @@ class _TokenCache:
          analyzer.
      """
      def __init__(self, conn):
+        # various LRU caches
+        self.postcodes = _LRU(maxsize=32)
+
          # Lookup houseunumbers up to 100 and cache them
          with conn.cursor() as cur:
              cur.execute("""SELECT i, ARRAY[getorcreate_housenumber_id(i::text)]::text
diff --git a/nominatim/tools/postcodes.py b/nominatim/tools/postcodes.py

index 0a568cbafc73c125c4f9d34b7141b42b05eb6b11..78bd8cb9490c5646754cef2ef2bbf2348d5e2a74 100644 (file)
--- a/nominatim/tools/postcodes.py
+++ b/nominatim/tools/postcodes.py
@@ -6,7 +6,7 @@ of artificial postcode centroids.
  from nominatim.db.utils import execute_file
  from nominatim.db.connection import connect
  
-def import_postcodes(dsn, project_dir):
+def import_postcodes(dsn, project_dir, tokenizer):
      """ Set up the initial list of postcodes.
      """
  
@@ -41,10 +41,11 @@ def import_postcodes(dsn, project_dir):
                  INSERT INTO location_postcode
                   (place_id, indexed_status, country_code, postcode, geometry)
                  SELECT nextval('seq_place'), 1, country_code,
-                       upper(trim (both ' ' from address->'postcode')) as pc,
+                       token_normalized_postcode(address->'postcode') as pc,
                         ST_Centroid(ST_Collect(ST_Centroid(geometry)))
                    FROM placex
-                 WHERE address ? 'postcode' AND address->'postcode' NOT SIMILAR TO '%(,|;)%'
+                 WHERE address ? 'postcode'
+                       and token_normalized_postcode(address->'postcode') is not null
                         AND geometry IS NOT null
                   GROUP BY country_code, pc
              """)
@@ -52,9 +53,10 @@ def import_postcodes(dsn, project_dir):
              cur.execute("""
                  INSERT INTO location_postcode
                   (place_id, indexed_status, country_code, postcode, geometry)
-                SELECT nextval('seq_place'), 1, 'us', postcode,
+                SELECT nextval('seq_place'), 1, 'us',
+                       token_normalized_postcode(postcode),
                         ST_SetSRID(ST_Point(x,y),4326)
-                  FROM us_postcode WHERE postcode NOT IN
+                  FROM us_postcode WHERE token_normalized_postcode(postcode) NOT IN
                          (SELECT postcode FROM location_postcode
                            WHERE country_code = 'us')
              """)
@@ -62,8 +64,9 @@ def import_postcodes(dsn, project_dir):
              cur.execute("""
                  INSERT INTO location_postcode
                   (place_id, indexed_status, country_code, postcode, geometry)
-                SELECT nextval('seq_place'), 1, 'gb', postcode, geometry
-                  FROM gb_postcode WHERE postcode NOT IN
+                SELECT nextval('seq_place'), 1, 'gb',
+                       token_normalized_postcode(postcode), geometry
+                  FROM gb_postcode WHERE token_normalized_postcode(postcode) NOT IN
                             (SELECT postcode FROM location_postcode
                               WHERE country_code = 'gb')
              """)
@@ -72,9 +75,7 @@ def import_postcodes(dsn, project_dir):
                      DELETE FROM word WHERE class='place' and type='postcode'
                      and word NOT IN (SELECT postcode FROM location_postcode)
              """)
-
-            cur.execute("""
-                SELECT count(getorcreate_postcode_id(v)) FROM
-                (SELECT distinct(postcode) as v FROM location_postcode) p
-            """)
          conn.commit()
+
+        with tokenizer.name_analyzer() as analyzer:
+            analyzer.add_postcodes_from_db()
diff --git a/test/python/conftest.py b/test/python/conftest.py

index d4649c24929b91835b6aa3dd7d34f985904ceed6..f43f09d074c7ab56ce3837872ff2539ba3f18f86 100644 (file)
--- a/test/python/conftest.py
+++ b/test/python/conftest.py
@@ -299,7 +299,7 @@ def sql_preprocessor(temp_db_conn, tmp_path, monkeypatch, table_factory):
  
  
  @pytest.fixture
-def tokenizer_mock(monkeypatch, property_table, temp_db_conn):
+def tokenizer_mock(monkeypatch, property_table, temp_db_conn, dsn):
      """ Sets up the configuration so that the test dummy tokenizer will be
          loaded.
      """
diff --git a/test/python/dummy_tokenizer.py b/test/python/dummy_tokenizer.py

index 013016c8d0ba7c2e84bddab61bfb3428944a205a..ceea4a7ededdaa1e829e6ebf902ef42455e33e47 100644 (file)
--- a/test/python/dummy_tokenizer.py
+++ b/test/python/dummy_tokenizer.py
@@ -43,6 +43,9 @@ class DummyNameAnalyzer:
          """
          pass
  
+    def add_postcodes_from_db(self):
+        pass
+
      def process_place(self, place):
          """ Determine tokenizer information about the given place.
  
diff --git a/test/python/test_indexing.py b/test/python/test_indexing.py

index d68769064af9d6ff626ac1e78b9bcd684b9c6c51..ff84e37964b660a33be2faa0908ba2ad0c1519a7 100644 (file)
--- a/test/python/test_indexing.py
+++ b/test/python/test_indexing.py
@@ -33,6 +33,9 @@ class IndexerTestDB:
                                                  geometry_sector INTEGER)""")
              cur.execute("""CREATE TABLE location_property_osmline (
                                 place_id BIGINT,
+                               osm_id BIGINT,
+                               address HSTORE,
+                               token_info JSONB,
                                 indexed_status SMALLINT,
                                 indexed_date TIMESTAMP,
                                 geometry_sector INTEGER)""")
@@ -61,6 +64,14 @@ class IndexerTestDB:
                             END;
                             $$ LANGUAGE plpgsql STABLE;
                          """)
+            cur.execute("""CREATE OR REPLACE FUNCTION get_interpolation_address(in_address HSTORE, wayid BIGINT)
+                           RETURNS HSTORE AS $$
+                           BEGIN
+                             RETURN in_address;
+                           END;
+                           $$ LANGUAGE plpgsql STABLE;
+                        """)
+
              for table in ('placex', 'location_property_osmline', 'location_postcode'):
                  cur.execute("""CREATE TRIGGER {0}_update BEFORE UPDATE ON {0}
                                 FOR EACH ROW EXECUTE PROCEDURE date_update()
@@ -91,9 +102,9 @@ class IndexerTestDB:
          next_id = next(self.osmline_id)
          with self.conn.cursor() as cur:
              cur.execute("""INSERT INTO location_property_osmline
-                              (place_id, indexed_status, geometry_sector)
-                              VALUES (%s, 1, %s)""",
-                        (next_id, sector))
+                              (place_id, osm_id, indexed_status, geometry_sector)
+                              VALUES (%s, %s, 1, %s)""",
+                        (next_id, next_id, sector))
          return next_id
  
      def add_postcode(self, country, postcode):
diff --git a/test/python/test_tools_postcodes.py b/test/python/test_tools_postcodes.py

index 1fc060b0c6439677e592aa90d4d48d3db51a8ea6..37b47dfa680258e5587fa40e5057d7fd85904a96 100644 (file)
--- a/test/python/test_tools_postcodes.py
+++ b/test/python/test_tools_postcodes.py
@@ -5,6 +5,11 @@ Tests for functions to maintain the artificial postcode table.
  import pytest
  
  from nominatim.tools import postcodes
+import dummy_tokenizer
+
+@pytest.fixture
+def tokenizer():
+    return dummy_tokenizer.DummyTokenizer(None, None)
  
  @pytest.fixture
  def postcode_table(temp_db_with_extensions, temp_db_cursor, table_factory,
@@ -20,26 +25,26 @@ def postcode_table(temp_db_with_extensions, temp_db_cursor, table_factory,
                        postcode TEXT,
                        geometry GEOMETRY(Geometry, 4326)""")
      temp_db_cursor.execute('CREATE SEQUENCE seq_place')
-    temp_db_cursor.execute("""CREATE OR REPLACE FUNCTION getorcreate_postcode_id(postcode TEXT)
-                              RETURNS INTEGER AS $$ BEGIN RETURN 1; END; $$ LANGUAGE plpgsql;
+    temp_db_cursor.execute("""CREATE OR REPLACE FUNCTION token_normalized_postcode(postcode TEXT)
+                              RETURNS TEXT AS $$ BEGIN RETURN postcode; END; $$ LANGUAGE plpgsql;
                             """)
  
  
-def test_import_postcodes_empty(dsn, temp_db_cursor, postcode_table, tmp_path):
-    postcodes.import_postcodes(dsn, tmp_path)
+def test_import_postcodes_empty(dsn, temp_db_cursor, postcode_table, tmp_path, tokenizer):
+    postcodes.import_postcodes(dsn, tmp_path, tokenizer)
  
      assert temp_db_cursor.table_exists('gb_postcode')
      assert temp_db_cursor.table_exists('us_postcode')
      assert temp_db_cursor.table_rows('location_postcode') == 0
  
  
-def test_import_postcodes_from_placex(dsn, temp_db_cursor, postcode_table, tmp_path):
+def test_import_postcodes_from_placex(dsn, temp_db_cursor, postcode_table, tmp_path, tokenizer):
      temp_db_cursor.execute("""
          INSERT INTO placex (place_id, country_code, address, geometry)
            VALUES (1, 'xx', '"postcode"=>"9486"', 'SRID=4326;POINT(10 12)')
      """)
  
-    postcodes.import_postcodes(dsn, tmp_path)
+    postcodes.import_postcodes(dsn, tmp_path, tokenizer)
  
      rows = temp_db_cursor.row_set(""" SELECT postcode, country_code,
                                        ST_X(geometry), ST_Y(geometry)
author	Sarah Hoffmann <lonvia@denofr.de>
	Sun, 25 Apr 2021 16:26:36 +0000 (18:26 +0200)
committer	Sarah Hoffmann <lonvia@denofr.de>
	Fri, 30 Apr 2021 09:30:51 +0000 (11:30 +0200)
lib-sql/functions/interpolation.sql		patch \| blob \| history
lib-sql/functions/placex_triggers.sql		patch \| blob \| history
lib-sql/tokenizer/legacy_tokenizer.sql		patch \| blob \| history
nominatim/clicmd/setup.py		patch \| blob \| history
nominatim/indexer/indexer.py		patch \| blob \| history
nominatim/indexer/runners.py		patch \| blob \| history
nominatim/tokenizer/legacy_tokenizer.py		patch \| blob \| history
nominatim/tools/postcodes.py		patch \| blob \| history
test/python/conftest.py		patch \| blob \| history
test/python/dummy_tokenizer.py		patch \| blob \| history
test/python/test_indexing.py		patch \| blob \| history
test/python/test_tools_postcodes.py		patch \| blob \| history