]> git.openstreetmap.org Git - nominatim.git/blob - nominatim/tools/postcodes.py
index postcodes after refreshing
[nominatim.git] / nominatim / tools / postcodes.py
1 """
2 Functions for importing, updating and otherwise maintaining the table
3 of artificial postcode centroids.
4 """
5 import csv
6 import gzip
7 import logging
8
9 from psycopg2.extras import execute_values
10
11 from nominatim.db.connection import connect
12
13 LOG = logging.getLogger()
14
15 class _CountryPostcodesCollector:
16     """ Collector for postcodes of a single country.
17     """
18
19     def __init__(self, country):
20         self.country = country
21         self.collected = dict()
22
23
24     def add(self, postcode, x, y):
25         """ Add the given postcode to the collection cache. If the postcode
26             already existed, it is overwritten with the new centroid.
27         """
28         self.collected[postcode] = (x, y)
29
30
31     def commit(self, conn, analyzer, project_dir):
32         """ Update postcodes for the country from the postcodes selected so far
33             as well as any externally supplied postcodes.
34         """
35         self._update_from_external(analyzer, project_dir)
36         to_add, to_delete, to_update = self._compute_changes(conn)
37
38         LOG.info("Processing country '%s' (%s added, %s deleted, %s updated).",
39                  self.country, len(to_add), len(to_delete), len(to_update))
40
41         with conn.cursor() as cur:
42             if to_add:
43                 execute_values(cur,
44                                """INSERT INTO location_postcode
45                                       (place_id, indexed_status, country_code,
46                                        postcode, geometry) VALUES %s""",
47                                to_add,
48                                template="""(nextval('seq_place'), 1, '{}',
49                                            %s, 'SRID=4326;POINT(%s %s)')
50                                         """.format(self.country))
51             if to_delete:
52                 cur.execute("""DELETE FROM location_postcode
53                                WHERE country_code = %s and postcode = any(%s)
54                             """, (self.country, to_delete))
55             if to_update:
56                 execute_values(cur,
57                                """UPDATE location_postcode
58                                   SET indexed_status = 2,
59                                       geometry = ST_SetSRID(ST_Point(v.x, v.y), 4326)
60                                   FROM (VALUES %s) AS v (pc, x, y)
61                                   WHERE country_code = '{}' and postcode = pc
62                                """.format(self.country),
63                                to_update)
64
65
66     def _compute_changes(self, conn):
67         """ Compute which postcodes from the collected postcodes have to be
68             added or modified and which from the location_postcode table
69             have to be deleted.
70         """
71         to_update = []
72         to_delete = []
73         with conn.cursor() as cur:
74             cur.execute("""SELECT postcode, ST_X(geometry), ST_Y(geometry)
75                            FROM location_postcode
76                            WHERE country_code = %s""",
77                         (self.country, ))
78             for postcode, x, y in cur:
79                 newx, newy = self.collected.pop(postcode, (None, None))
80                 if newx is not None:
81                     dist = (x - newx)**2 + (y - newy)**2
82                     if dist > 0.0000001:
83                         to_update.append((postcode, newx, newy))
84                 else:
85                     to_delete.append(postcode)
86
87         to_add = [(k, v[0], v[1]) for k, v in self.collected.items()]
88         self.collected = []
89
90         return to_add, to_delete, to_update
91
92
93     def _update_from_external(self, analyzer, project_dir):
94         """ Look for an external postcode file for the active country in
95             the project directory and add missing postcodes when found.
96         """
97         csvfile = self._open_external(project_dir)
98         if csvfile is None:
99             return
100
101         try:
102             reader = csv.DictReader(csvfile)
103             for row in reader:
104                 if 'postcode' not in row or 'lat' not in row or 'lon' not in row:
105                     LOG.warning("Bad format for external postcode file for country '%s'."
106                                 " Ignored.", self.country)
107                     return
108                 postcode = analyzer.normalize_postcode(row['postcode'])
109                 if postcode not in self.collected:
110                     try:
111                         self.collected[postcode] = (float(row['lon']), float(row['lat']))
112                     except ValueError:
113                         LOG.warning("Bad coordinates %s, %s in %s country postcode file.",
114                                     row['lat'], row['lon'], self.country)
115
116         finally:
117             csvfile.close()
118
119
120     def _open_external(self, project_dir):
121         fname = project_dir / '{}_postcodes.csv'.format(self.country)
122
123         if fname.is_file():
124             LOG.info("Using external postcode file '%s'.", fname)
125             return open(fname, 'r')
126
127         fname = project_dir / '{}_postcodes.csv.gz'.format(self.country)
128
129         if fname.is_file():
130             LOG.info("Using external postcode file '%s'.", fname)
131             return gzip.open(fname, 'rt')
132
133         return None
134
135
136 def update_postcodes(dsn, project_dir, tokenizer):
137     """ Update the table of artificial postcodes.
138
139         Computes artificial postcode centroids from the placex table,
140         potentially enhances it with external data and then updates the
141         postcodes in the table 'location_postcode'.
142     """
143     with tokenizer.name_analyzer() as analyzer:
144         with connect(dsn) as conn:
145             with conn.cursor(name="placex_postcodes") as cur:
146                 cur.execute("""SELECT country_code, pc, ST_X(centroid), ST_Y(centroid)
147                                FROM (
148                                  SELECT country_code,
149                                         token_normalized_postcode(address->'postcode') as pc,
150                                         ST_Centroid(ST_Collect(ST_Centroid(geometry))) as centroid
151                                  FROM placex
152                                  WHERE address ? 'postcode' and geometry IS NOT null
153                                  GROUP BY country_code, pc) xx
154                                WHERE pc is not null
155                                ORDER BY country_code, pc""")
156
157                 collector = None
158
159                 for country, postcode, x, y in cur:
160                     if collector is None or country != collector.country:
161                         if collector is not None:
162                             collector.commit(conn, analyzer, project_dir)
163                         collector = _CountryPostcodesCollector(country)
164                     collector.add(postcode, x, y)
165
166                 if collector is not None:
167                     collector.commit(conn, analyzer, project_dir)
168
169             conn.commit()
170
171         analyzer.update_postcodes_from_db()