]> git.openstreetmap.org Git - nominatim.git/blob - src/nominatim_db/indexer/runners.py
Merge pull request #3762 from lonvia/remove-gazetteer-output-support
[nominatim.git] / src / nominatim_db / indexer / runners.py
1 # SPDX-License-Identifier: GPL-3.0-or-later
2 #
3 # This file is part of Nominatim. (https://nominatim.org)
4 #
5 # Copyright (C) 2024 by the Nominatim developer community.
6 # For a full list of authors see the git log.
7 """
8 Mix-ins that provide the actual commands for the indexer for various indexing
9 tasks.
10 """
11 from typing import Any, Sequence
12
13 from psycopg import sql as pysql
14 from psycopg.abc import Query
15 from psycopg.rows import DictRow
16 from psycopg.types.json import Json
17
18 from ..typing import Protocol
19 from ..data.place_info import PlaceInfo
20 from ..tokenizer.base import AbstractAnalyzer
21
22
23 def _mk_valuelist(template: str, num: int) -> pysql.Composed:
24     return pysql.SQL(',').join([pysql.SQL(template)] * num)
25
26
27 def _analyze_place(place: DictRow, analyzer: AbstractAnalyzer) -> Json:
28     return Json(analyzer.process_place(PlaceInfo(place)))
29
30
31 class Runner(Protocol):
32     def name(self) -> str: ...
33     def sql_count_objects(self) -> Query: ...
34     def sql_get_objects(self) -> Query: ...
35     def index_places_query(self, batch_size: int) -> Query: ...
36     def index_places_params(self, place: DictRow) -> Sequence[Any]: ...
37
38
39 SELECT_SQL = pysql.SQL("""SELECT place_id, extra.*
40                           FROM (SELECT * FROM placex {}) as px,
41                           LATERAL placex_indexing_prepare(px) as extra """)
42 UPDATE_LINE = "(%s, %s::hstore, %s::hstore, %s::int, %s::jsonb)"
43
44
45 class AbstractPlacexRunner:
46     """ Returns SQL commands for indexing of the placex table.
47     """
48
49     def __init__(self, rank: int, analyzer: AbstractAnalyzer) -> None:
50         self.rank = rank
51         self.analyzer = analyzer
52
53     def index_places_query(self, batch_size: int) -> Query:
54         return pysql.SQL(
55             """ UPDATE placex
56                 SET indexed_status = 0, address = v.addr, token_info = v.ti,
57                     name = v.name, linked_place_id = v.linked_place_id
58                 FROM (VALUES {}) as v(id, name, addr, linked_place_id, ti)
59                 WHERE place_id = v.id
60             """).format(_mk_valuelist(UPDATE_LINE, batch_size))
61
62     def index_places_params(self, place: DictRow) -> Sequence[Any]:
63         return (place['place_id'],
64                 place['name'],
65                 place['address'],
66                 place['linked_place_id'],
67                 _analyze_place(place, self.analyzer))
68
69
70 class RankRunner(AbstractPlacexRunner):
71     """ Returns SQL commands for indexing one rank within the placex table.
72     """
73
74     def name(self) -> str:
75         return f"rank {self.rank}"
76
77     def sql_count_objects(self) -> pysql.Composed:
78         return pysql.SQL("""SELECT count(*) FROM placex
79                             WHERE rank_address = {} and indexed_status > 0
80                          """).format(pysql.Literal(self.rank))
81
82     def sql_get_objects(self) -> pysql.Composed:
83         return SELECT_SQL.format(pysql.SQL(
84                 """WHERE placex.indexed_status > 0 and placex.rank_address = {}
85                    ORDER BY placex.geometry_sector
86                 """).format(pysql.Literal(self.rank)))
87
88
89 class BoundaryRunner(AbstractPlacexRunner):
90     """ Returns SQL commands for indexing the administrative boundaries
91         of a certain rank.
92     """
93
94     def name(self) -> str:
95         return f"boundaries rank {self.rank}"
96
97     def sql_count_objects(self) -> Query:
98         return pysql.SQL("""SELECT count(*) FROM placex
99                             WHERE indexed_status > 0
100                               AND rank_search = {}
101                               AND class = 'boundary' and type = 'administrative'
102                          """).format(pysql.Literal(self.rank))
103
104     def sql_get_objects(self) -> Query:
105         return SELECT_SQL.format(pysql.SQL(
106                 """WHERE placex.indexed_status > 0 and placex.rank_search = {}
107                          and placex.class = 'boundary' and placex.type = 'administrative'
108                    ORDER BY placex.partition, placex.admin_level
109                 """).format(pysql.Literal(self.rank)))
110
111
112 class InterpolationRunner:
113     """ Returns SQL commands for indexing the address interpolation table
114         location_property_osmline.
115     """
116
117     def __init__(self, analyzer: AbstractAnalyzer) -> None:
118         self.analyzer = analyzer
119
120     def name(self) -> str:
121         return "interpolation lines (location_property_osmline)"
122
123     def sql_count_objects(self) -> Query:
124         return """SELECT count(*) FROM location_property_osmline
125                   WHERE indexed_status > 0"""
126
127     def sql_get_objects(self) -> Query:
128         return """SELECT place_id, get_interpolation_address(address, osm_id) as address
129                   FROM location_property_osmline
130                   WHERE indexed_status > 0
131                   ORDER BY geometry_sector"""
132
133     def index_places_query(self, batch_size: int) -> Query:
134         return pysql.SQL("""UPDATE location_property_osmline
135                             SET indexed_status = 0, address = v.addr, token_info = v.ti
136                             FROM (VALUES {}) as v(id, addr, ti)
137                             WHERE place_id = v.id
138                          """).format(_mk_valuelist("(%s, %s::hstore, %s::jsonb)", batch_size))
139
140     def index_places_params(self, place: DictRow) -> Sequence[Any]:
141         return (place['place_id'], place['address'],
142                 _analyze_place(place, self.analyzer))
143
144
145 class PostcodeRunner(Runner):
146     """ Provides the SQL commands for indexing the location_postcode table.
147     """
148
149     def name(self) -> str:
150         return "postcodes (location_postcode)"
151
152     def sql_count_objects(self) -> Query:
153         return 'SELECT count(*) FROM location_postcode WHERE indexed_status > 0'
154
155     def sql_get_objects(self) -> Query:
156         return """SELECT place_id FROM location_postcode
157                   WHERE indexed_status > 0
158                   ORDER BY country_code, postcode"""
159
160     def index_places_query(self, batch_size: int) -> Query:
161         return pysql.SQL("""UPDATE location_postcode SET indexed_status = 0
162                                     WHERE place_id IN ({})""")\
163                     .format(pysql.SQL(',').join((pysql.Placeholder() for _ in range(batch_size))))
164
165     def index_places_params(self, place: DictRow) -> Sequence[Any]:
166         return (place['place_id'], )