]> git.openstreetmap.org Git - nominatim.git/blob - nominatim/indexer/runners.py
Merge pull request #2305 from lonvia/tokenizer
[nominatim.git] / nominatim / indexer / runners.py
1 """
2 Mix-ins that provide the actual commands for the indexer for various indexing
3 tasks.
4 """
5 import functools
6
7 import psycopg2.extras
8
9 # pylint: disable=C0111
10
11 class AbstractPlacexRunner:
12     """ Returns SQL commands for indexing of the placex table.
13     """
14     SELECT_SQL = 'SELECT place_id FROM placex'
15
16     def __init__(self, rank, analyzer):
17         self.rank = rank
18         self.analyzer = analyzer
19
20
21     @staticmethod
22     @functools.lru_cache(maxsize=1)
23     def _index_sql(num_places):
24         return """ UPDATE placex
25                    SET indexed_status = 0, address = v.addr, token_info = v.ti
26                    FROM (VALUES {}) as v(id, addr, ti)
27                    WHERE place_id = v.id
28                """.format(','.join(["(%s, %s::hstore, %s::jsonb)"]  * num_places))
29
30
31     @staticmethod
32     def get_place_details(worker, ids):
33         worker.perform("""SELECT place_id, (placex_prepare_update(placex)).*
34                           FROM placex WHERE place_id IN %s""",
35                        (tuple((p[0] for p in ids)), ))
36
37
38     def index_places(self, worker, places):
39         values = []
40         for place in places:
41             values.extend((place[x] for x in ('place_id', 'address')))
42             values.append(psycopg2.extras.Json(self.analyzer.process_place(place)))
43
44         worker.perform(self._index_sql(len(places)), values)
45
46
47 class RankRunner(AbstractPlacexRunner):
48     """ Returns SQL commands for indexing one rank within the placex table.
49     """
50
51     def name(self):
52         return "rank {}".format(self.rank)
53
54     def sql_count_objects(self):
55         return """SELECT count(*) FROM placex
56                   WHERE rank_address = {} and indexed_status > 0
57                """.format(self.rank)
58
59     def sql_get_objects(self):
60         return """{} WHERE indexed_status > 0 and rank_address = {}
61                      ORDER BY geometry_sector
62                """.format(self.SELECT_SQL, self.rank)
63
64
65 class BoundaryRunner(AbstractPlacexRunner):
66     """ Returns SQL commands for indexing the administrative boundaries
67         of a certain rank.
68     """
69
70     def name(self):
71         return "boundaries rank {}".format(self.rank)
72
73     def sql_count_objects(self):
74         return """SELECT count(*) FROM placex
75                   WHERE indexed_status > 0
76                     AND rank_search = {}
77                     AND class = 'boundary' and type = 'administrative'
78                """.format(self.rank)
79
80     def sql_get_objects(self):
81         return """{} WHERE indexed_status > 0 and rank_search = {}
82                            and class = 'boundary' and type = 'administrative'
83                      ORDER BY partition, admin_level
84                """.format(self.SELECT_SQL, self.rank)
85
86
87 class InterpolationRunner:
88     """ Returns SQL commands for indexing the address interpolation table
89         location_property_osmline.
90     """
91
92     def __init__(self, analyzer):
93         self.analyzer = analyzer
94
95
96     @staticmethod
97     def name():
98         return "interpolation lines (location_property_osmline)"
99
100     @staticmethod
101     def sql_count_objects():
102         return """SELECT count(*) FROM location_property_osmline
103                   WHERE indexed_status > 0"""
104
105     @staticmethod
106     def sql_get_objects():
107         return """SELECT place_id
108                   FROM location_property_osmline
109                   WHERE indexed_status > 0
110                   ORDER BY geometry_sector"""
111
112
113     @staticmethod
114     def get_place_details(worker, ids):
115         worker.perform("""SELECT place_id, get_interpolation_address(address, osm_id) as address
116                           FROM location_property_osmline WHERE place_id IN %s""",
117                        (tuple((p[0] for p in ids)), ))
118
119
120     @staticmethod
121     @functools.lru_cache(maxsize=1)
122     def _index_sql(num_places):
123         return """ UPDATE location_property_osmline
124                    SET indexed_status = 0, address = v.addr, token_info = v.ti
125                    FROM (VALUES {}) as v(id, addr, ti)
126                    WHERE place_id = v.id
127                """.format(','.join(["(%s, %s::hstore, %s::jsonb)"]  * num_places))
128
129
130     def index_places(self, worker, places):
131         values = []
132         for place in places:
133             values.extend((place[x] for x in ('place_id', 'address')))
134             values.append(psycopg2.extras.Json(self.analyzer.process_place(place)))
135
136         worker.perform(self._index_sql(len(places)), values)
137
138
139
140 class PostcodeRunner:
141     """ Provides the SQL commands for indexing the location_postcode table.
142     """
143
144     @staticmethod
145     def name():
146         return "postcodes (location_postcode)"
147
148     @staticmethod
149     def sql_count_objects():
150         return 'SELECT count(*) FROM location_postcode WHERE indexed_status > 0'
151
152     @staticmethod
153     def sql_get_objects():
154         return """SELECT place_id FROM location_postcode
155                   WHERE indexed_status > 0
156                   ORDER BY country_code, postcode"""
157
158     @staticmethod
159     def index_places(worker, ids):
160         worker.perform(""" UPDATE location_postcode SET indexed_status = 0
161                            WHERE place_id IN ({})
162                        """.format(','.join((str(i[0]) for i in ids))))