]> git.openstreetmap.org Git - nominatim.git/blob - nominatim/tools/postcodes.py
move filling of postcode table to python
[nominatim.git] / nominatim / tools / postcodes.py
1 """
2 Functions for importing, updating and otherwise maintaining the table
3 of artificial postcode centroids.
4 """
5 import csv
6 import gzip
7 import logging
8
9 from psycopg2.extras import execute_values
10
11 from nominatim.db.connection import connect
12
13 LOG = logging.getLogger()
14
15 class _CountryPostcodesCollector:
16     """ Collector for postcodes of a single country.
17     """
18
19     def __init__(self, country):
20         self.country = country
21         self.collected = dict()
22
23
24     def add(self, postcode, x, y):
25         """ Add the given postcode to the collection cache. If the postcode
26             already existed, it is overwritten with the new centroid.
27         """
28         self.collected[postcode] = (x, y)
29
30
31     def commit(self, conn, analyzer, project_dir):
32         """ Update postcodes for the country from the postcodes selected so far
33             as well as any externally supplied postcodes.
34         """
35         self._update_from_external(analyzer, project_dir)
36         to_add, to_delete, to_update = self._compute_changes(conn)
37
38         with conn.cursor() as cur:
39             if to_add:
40                 execute_values(cur,
41                                """INSERT INTO location_postcodes
42                                       (place_id, indexed_status, countrycode,
43                                        postcode, geometry) VALUES %s""",
44                                to_add,
45                                template="""(nextval('seq_place'), 1, '{}',
46                                            %s, 'SRID=4326;POINT(%s %s)')
47                                         """.format(self.country))
48             if to_delete:
49                 cur.execute("""DELETE FROM location_postcodes
50                                WHERE country_code = %s and postcode = any(%s)
51                             """, (self.country, to_delete))
52             if to_update:
53                 execute_values(cur,
54                                """UPDATE location_postcodes
55                                   SET indexed_status = 2,
56                                       geometry = ST_SetSRID(ST_Point(v.x, v.y), 4326)
57                                   FROM (VALUES %s) AS v (pc, x, y)
58                                   WHERE country_code = '{}' and postcode = pc
59                                """.format(self.country),
60                                to_update)
61
62
63     def _compute_changes(self, conn):
64         """ Compute which postcodes from the collected postcodes have to be
65             added or modified and which from the location_postcodes table
66             have to be deleted.
67         """
68         to_update = []
69         to_delete = []
70         with conn.cursor() as cur:
71             cur.execute("""SELECT postcode, ST_X(geometry), ST_Y(geometry)
72                            FROM location_postcodes
73                            WHERE country_code = %s""",
74                         (self.country, ))
75             for postcode, x, y in cur:
76                 oldx, oldy = self.collected.pop(postcode, (None, None))
77                 if oldx is not None:
78                     dist = (x - oldx)**2 + (y - oldy)**2
79                     if dist > 0.000001:
80                         to_update.append(postcode, x, y)
81                 else:
82                     to_delete.append(postcode)
83
84         to_add = [(k, v[0], v[1]) for k, v in self.collected.items()]
85         self.collected = []
86
87         return to_add, to_delete, to_update
88
89
90     def _update_from_external(self, analyzer, project_dir):
91         """ Look for an external postcode file for the active country in
92             the project directory and add missing postcodes when found.
93         """
94         csvfile = self._open_external(project_dir)
95         if csvfile is None:
96             return
97
98         try:
99             reader = csv.DictReader(csvfile)
100             for row in reader:
101                 if 'postcode' not in row or 'lat' not in row or 'lon' not in row:
102                     LOG.warning("Bad format for external postcode file for country '%s'."
103                                 " Ignored.", self.country)
104                     return
105                 postcode = analyzer.normalize_postcode(row['postcode'])
106                 if postcode not in self.collected:
107                     try:
108                         self.collected[postcode] = (float(row['lon'], float(row['lat'])))
109                     except ValueError:
110                         LOG.warning("Bad coordinates %s, %s in %s country postcode file.",
111                                     row['lat'], row['lon'], self.country)
112
113         finally:
114             csvfile.close()
115
116
117     def _open_external(self, project_dir):
118         fname = project_dir / '{}_postcodes.csv'.format(self.country)
119
120         if fname.is_file():
121             LOG.info("Using external postcode file '%s'.", fname)
122             return open(fname, 'r')
123
124         fname = project_dir / '{}_postcodes.csv.gz'.format(self.country)
125
126         if fname.is_file():
127             LOG.info("Using external postcode file '%s'.", fname)
128             return gzip.open(fname, 'rt')
129
130         return None
131
132
133 def update_postcodes(dsn, project_dir, tokenizer):
134     """ Update the table of artificial postcodes.
135
136         Computes artificial postcode centroids from the placex table,
137         potentially enhances it with external data and then updates the
138         postcodes in the table 'location_postcode'.
139     """
140     with tokenizer.name_analyzer() as analyzer:
141         with connect(dsn) as conn:
142             with conn.cursor("placex_postcodes") as cur:
143                 cur.execute("""SELECT country_code, pc, ST_X(centroid), ST_Y(centroid)
144                                FROM (
145                                  SELECT country_code,
146                                         token_normalized_postcode(address->'postcode') as pc,
147                                         ST_Centroid(ST_Collect(ST_Centroid(geometry))) as centroid
148                                  FROM placex
149                                  WHERE address ? 'postcode' and geometry IS NOT null
150                                  GROUP BY country_code, pc) xx
151                                WHERE pc is not null
152                                ORDER BY country_code, pc""")
153
154                 collector = None
155
156                 for country, postcode, x, y in cur:
157                     if collector is None or country != collector.country:
158                         if collector is not None:
159                             collector.commit(conn, analyzer, project_dir)
160                         collector = _CountryPostcodesCollector(country)
161                     collector.add(postcode, x, y)
162
163                 if collector is not None:
164                     collector.commit(conn, analyzer, project_dir)
165
166             conn.commit()
167
168         analyzer.add_postcodes_from_db()