1 # SPDX-License-Identifier: GPL-3.0-or-later
3 # This file is part of Nominatim. (https://nominatim.org)
5 # Copyright (C) 2025 by the Nominatim developer community.
6 # For a full list of authors see the git log.
8 Tokenizer implementing normalisation as used before Nominatim 4 but using
9 libICU instead of the PostgreSQL module.
11 from typing import Optional, Sequence, List, Tuple, Mapping, Any, cast, \
16 from psycopg.types.json import Jsonb
17 from psycopg import sql as pysql
19 from ..db.connection import connect, Connection, Cursor, \
20 drop_tables, table_exists, execute_scalar
21 from ..config import Configuration
22 from ..db.sql_preprocessor import SQLPreprocessor
23 from ..data.place_info import PlaceInfo
24 from ..data.place_name import PlaceName
25 from .icu_rule_loader import ICURuleLoader
26 from .place_sanitizer import PlaceSanitizer
27 from .icu_token_analysis import ICUTokenAnalysis
28 from .base import AbstractAnalyzer, AbstractTokenizer
30 DBCFG_TERM_NORMALIZATION = "tokenizer_term_normalization"
32 LOG = logging.getLogger()
34 WORD_TYPES = (('country_names', 'C'),
37 ('housenumbers', 'H'))
40 def create(dsn: str) -> 'ICUTokenizer':
41 """ Create a new instance of the tokenizer provided by this module.
43 return ICUTokenizer(dsn)
46 class ICUTokenizer(AbstractTokenizer):
47 """ This tokenizer uses libICU to convert names and queries to ASCII.
48 Otherwise it uses the same algorithms and data structures as the
49 normalization routines in Nominatim 3.
52 def __init__(self, dsn: str) -> None:
54 self.loader: Optional[ICURuleLoader] = None
56 def init_new_db(self, config: Configuration, init_db: bool = True) -> None:
57 """ Set up a new tokenizer for the database.
59 This copies all necessary data in the project directory to make
60 sure the tokenizer remains stable even over updates.
62 self.loader = ICURuleLoader(config)
67 self.update_sql_functions(config)
68 self._setup_db_tables(config)
69 self._create_base_indices(config, 'word')
71 def init_from_project(self, config: Configuration) -> None:
72 """ Initialise the tokenizer from the project directory.
74 self.loader = ICURuleLoader(config)
76 with connect(self.dsn) as conn:
77 self.loader.load_config_from_db(conn)
79 def finalize_import(self, config: Configuration) -> None:
80 """ Do any required postprocessing to make the tokenizer data ready
83 self._create_lookup_indices(config, 'word')
85 def update_sql_functions(self, config: Configuration) -> None:
86 """ Reimport the SQL functions for this tokenizer.
88 with connect(self.dsn) as conn:
89 sqlp = SQLPreprocessor(conn, config)
90 sqlp.run_sql_file(conn, 'tokenizer/icu_tokenizer.sql')
92 def check_database(self, config: Configuration) -> None:
93 """ Check that the tokenizer is set up correctly.
95 # Will throw an error if there is an issue.
96 self.init_from_project(config)
98 def update_statistics(self, config: Configuration, threads: int = 2) -> None:
99 """ Recompute frequencies for all name words.
101 with connect(self.dsn) as conn:
102 if not table_exists(conn, 'search_name'):
105 with conn.cursor() as cur:
106 cur.execute('ANALYSE search_name')
108 cur.execute(pysql.SQL('SET max_parallel_workers_per_gather TO {}')
109 .format(pysql.Literal(min(threads, 6),)))
111 LOG.info('Computing word frequencies')
112 drop_tables(conn, 'word_frequencies')
114 CREATE TEMP TABLE word_frequencies AS
115 WITH word_freq AS MATERIALIZED (
116 SELECT unnest(name_vector) as id, count(*)
117 FROM search_name GROUP BY id),
118 addr_freq AS MATERIALIZED (
119 SELECT unnest(nameaddress_vector) as id, count(*)
120 FROM search_name GROUP BY id)
121 SELECT coalesce(a.id, w.id) as id,
122 (CASE WHEN w.count is null or w.count <= 1 THEN '{}'::JSONB
123 ELSE jsonb_build_object('count', w.count) END
125 CASE WHEN a.count is null or a.count <= 1 THEN '{}'::JSONB
126 ELSE jsonb_build_object('addr_count', a.count) END) as info
127 FROM word_freq w FULL JOIN addr_freq a ON a.id = w.id;
129 cur.execute('CREATE UNIQUE INDEX ON word_frequencies(id) INCLUDE(info)')
130 cur.execute('ANALYSE word_frequencies')
131 LOG.info('Update word table with recomputed frequencies')
132 drop_tables(conn, 'tmp_word')
133 cur.execute("""CREATE TABLE tmp_word AS
134 SELECT word_id, word_token, type, word,
135 coalesce(word.info, '{}'::jsonb)
136 - 'count' - 'addr_count' ||
137 coalesce(wf.info, '{}'::jsonb)
139 FROM word LEFT JOIN word_frequencies wf
140 ON word.word_id = wf.id
143 drop_tables(conn, 'word_frequencies')
145 with conn.cursor() as cur:
146 cur.execute('SET max_parallel_workers_per_gather TO 0')
148 sqlp = SQLPreprocessor(conn, config)
149 sqlp.run_string(conn,
150 'GRANT SELECT ON tmp_word TO "{{config.DATABASE_WEBUSER}}"')
152 self._create_base_indices(config, 'tmp_word')
153 self._create_lookup_indices(config, 'tmp_word')
154 self._move_temporary_word_table('tmp_word')
156 def _cleanup_housenumbers(self) -> None:
157 """ Remove unused house numbers.
159 with connect(self.dsn) as conn:
160 if not table_exists(conn, 'search_name'):
162 with conn.cursor(name="hnr_counter") as cur:
163 cur.execute("""SELECT DISTINCT word_id, coalesce(info->>'lookup', word_token)
166 AND NOT EXISTS(SELECT * FROM search_name
167 WHERE ARRAY[word.word_id] && name_vector)
168 AND (char_length(coalesce(word, word_token)) > 6
169 OR coalesce(word, word_token) not similar to '\\d+')
171 candidates = {token: wid for wid, token in cur}
172 with conn.cursor(name="hnr_counter") as cur:
173 cur.execute("""SELECT housenumber FROM placex
174 WHERE housenumber is not null
175 AND (char_length(housenumber) > 6
176 OR housenumber not similar to '\\d+')
179 for hnr in row[0].split(';'):
180 candidates.pop(hnr, None)
181 LOG.info("There are %s outdated housenumbers.", len(candidates))
182 LOG.debug("Outdated housenumbers: %s", candidates.keys())
184 with conn.cursor() as cur:
185 cur.execute("""DELETE FROM word WHERE word_id = any(%s)""",
186 (list(candidates.values()), ))
189 def update_word_tokens(self) -> None:
190 """ Remove unused tokens.
192 LOG.warning("Cleaning up housenumber tokens.")
193 self._cleanup_housenumbers()
194 LOG.warning("Tokenizer house-keeping done.")
196 def name_analyzer(self) -> 'ICUNameAnalyzer':
197 """ Create a new analyzer for tokenizing names and queries
198 using this tokinzer. Analyzers are context managers and should
202 with tokenizer.name_analyzer() as analyzer:
206 When used outside the with construct, the caller must ensure to
207 call the close() function before destructing the analyzer.
209 Analyzers are not thread-safe. You need to instantiate one per thread.
211 assert self.loader is not None
212 return ICUNameAnalyzer(self.dsn, self.loader.make_sanitizer(),
213 self.loader.make_token_analysis())
215 def most_frequent_words(self, conn: Connection, num: int) -> List[str]:
216 """ Return a list of the `num` most frequent full words
219 with conn.cursor() as cur:
220 cur.execute("""SELECT word, sum((info->>'count')::int) as count
221 FROM word WHERE type = 'W'
223 ORDER BY count DESC LIMIT %s""", (num,))
224 return list(s[0].split('@')[0] for s in cur)
226 def _save_config(self) -> None:
227 """ Save the configuration that needs to remain stable for the given
228 database as database properties.
230 assert self.loader is not None
231 with connect(self.dsn) as conn:
232 self.loader.save_config_to_db(conn)
234 def _setup_db_tables(self, config: Configuration) -> None:
235 """ Set up the word table and fill it with pre-computed word
238 with connect(self.dsn) as conn:
239 drop_tables(conn, 'word')
240 sqlp = SQLPreprocessor(conn, config)
241 sqlp.run_string(conn, """
244 word_token text NOT NULL,
248 ) {{db.tablespace.search_data}};
249 GRANT SELECT ON word TO "{{config.DATABASE_WEBUSER}}";
251 DROP SEQUENCE IF EXISTS seq_word;
252 CREATE SEQUENCE seq_word start 1;
253 GRANT SELECT ON seq_word to "{{config.DATABASE_WEBUSER}}";
257 def _create_base_indices(self, config: Configuration, table_name: str) -> None:
258 """ Set up the word table and fill it with pre-computed word
261 with connect(self.dsn) as conn:
262 sqlp = SQLPreprocessor(conn, config)
263 sqlp.run_string(conn,
264 """CREATE INDEX idx_{{table_name}}_word_token ON {{table_name}}
265 USING BTREE (word_token) {{db.tablespace.search_index}}""",
266 table_name=table_name)
267 for name, ctype in WORD_TYPES:
268 sqlp.run_string(conn,
269 """CREATE INDEX idx_{{table_name}}_{{idx_name}} ON {{table_name}}
270 USING BTREE (word) {{db.tablespace.address_index}}
271 WHERE type = '{{column_type}}'
273 table_name=table_name, idx_name=name,
277 def _create_lookup_indices(self, config: Configuration, table_name: str) -> None:
278 """ Create additional indexes used when running the API.
280 with connect(self.dsn) as conn:
281 sqlp = SQLPreprocessor(conn, config)
282 # Index required for details lookup.
286 CREATE INDEX IF NOT EXISTS idx_{{table_name}}_word_id
287 ON {{table_name}} USING BTREE (word_id) {{db.tablespace.search_index}}
289 table_name=table_name)
292 def _move_temporary_word_table(self, old: str) -> None:
293 """ Rename all tables and indexes used by the tokenizer.
295 with connect(self.dsn) as conn:
296 drop_tables(conn, 'word')
297 with conn.cursor() as cur:
298 cur.execute(f"ALTER TABLE {old} RENAME TO word")
299 for idx in ('word_token', 'word_id'):
300 cur.execute(f"""ALTER INDEX idx_{old}_{idx}
301 RENAME TO idx_word_{idx}""")
302 for name, _ in WORD_TYPES:
303 cur.execute(f"""ALTER INDEX idx_{old}_{name}
304 RENAME TO idx_word_{name}""")
308 class ICUNameAnalyzer(AbstractAnalyzer):
309 """ The ICU analyzer uses the ICU library for splitting names.
311 Each instance opens a connection to the database to request the
315 def __init__(self, dsn: str, sanitizer: PlaceSanitizer,
316 token_analysis: ICUTokenAnalysis) -> None:
317 self.conn: Optional[Connection] = connect(dsn)
318 self.conn.autocommit = True
319 self.sanitizer = sanitizer
320 self.token_analysis = token_analysis
322 self._cache = _TokenCache()
324 def close(self) -> None:
325 """ Free all resources used by the analyzer.
331 def _search_normalized(self, name: str) -> str:
332 """ Return the search token transliteration of the given name.
334 return cast(str, self.token_analysis.search.transliterate(name)).strip()
336 def _normalized(self, name: str) -> str:
337 """ Return the normalized version of the given name with all
338 non-relevant information removed.
340 return cast(str, self.token_analysis.normalizer.transliterate(name)).strip()
342 def get_word_token_info(self, words: Sequence[str]) -> List[Tuple[str, str, Optional[int]]]:
343 """ Return token information for the given list of words.
344 If a word starts with # it is assumed to be a full name
345 otherwise is a partial name.
347 The function returns a list of tuples with
348 (original word, word token, word id).
350 The function is used for testing and debugging only
351 and not necessarily efficient.
353 assert self.conn is not None
357 if word.startswith('#'):
358 full_tokens[word] = self._search_normalized(word[1:])
360 partial_tokens[word] = self._search_normalized(word)
362 with self.conn.cursor() as cur:
363 cur.execute("""SELECT word_token, word_id
364 FROM word WHERE word_token = ANY(%s) and type = 'W'
365 """, (list(full_tokens.values()),))
366 full_ids = {r[0]: cast(int, r[1]) for r in cur}
367 cur.execute("""SELECT word_token, word_id
368 FROM word WHERE word_token = ANY(%s) and type = 'w'""",
369 (list(partial_tokens.values()),))
370 part_ids = {r[0]: cast(int, r[1]) for r in cur}
372 return [(k, v, full_ids.get(v, None)) for k, v in full_tokens.items()] \
373 + [(k, v, part_ids.get(v, None)) for k, v in partial_tokens.items()]
375 def normalize_postcode(self, postcode: str) -> str:
376 """ Convert the postcode to a standardized form.
378 This function must yield exactly the same result as the SQL function
379 'token_normalized_postcode()'.
381 return postcode.strip().upper()
383 def update_postcodes_from_db(self) -> None:
386 Removes all postcodes from the word table because they are not
387 needed. Postcodes are recognised by pattern.
389 assert self.conn is not None
391 with self.conn.cursor() as cur:
392 cur.execute("DELETE FROM word WHERE type = 'P'")
394 def update_special_phrases(self, phrases: Iterable[Tuple[str, str, str, str]],
395 should_replace: bool) -> None:
396 """ Replace the search index for special phrases with the new phrases.
397 If `should_replace` is True, then the previous set of will be
398 completely replaced. Otherwise the phrases are added to the
399 already existing ones.
401 assert self.conn is not None
402 norm_phrases = set(((self._normalized(p[0]), p[1], p[2], p[3])
405 with self.conn.cursor() as cur:
406 # Get the old phrases.
407 existing_phrases = set()
408 cur.execute("SELECT word, info FROM word WHERE type = 'S'")
409 for word, info in cur:
410 existing_phrases.add((word, info['class'], info['type'],
411 info.get('op') or '-'))
413 added = self._add_special_phrases(cur, norm_phrases, existing_phrases)
415 deleted = self._remove_special_phrases(cur, norm_phrases,
420 LOG.info("Total phrases: %s. Added: %s. Deleted: %s",
421 len(norm_phrases), added, deleted)
423 def _add_special_phrases(self, cursor: Cursor,
424 new_phrases: Set[Tuple[str, str, str, str]],
425 existing_phrases: Set[Tuple[str, str, str, str]]) -> int:
426 """ Add all phrases to the database that are not yet there.
428 to_add = new_phrases - existing_phrases
431 with cursor.copy('COPY word(word_token, type, word, info) FROM STDIN') as copy:
432 for word, cls, typ, oper in to_add:
433 term = self._search_normalized(word)
435 copy.write_row((term, 'S', word,
436 Jsonb({'class': cls, 'type': typ,
437 'op': oper if oper in ('in', 'near') else None})))
442 def _remove_special_phrases(self, cursor: Cursor,
443 new_phrases: Set[Tuple[str, str, str, str]],
444 existing_phrases: Set[Tuple[str, str, str, str]]) -> int:
445 """ Remove all phrases from the database that are no longer in the
448 to_delete = existing_phrases - new_phrases
453 WHERE type = 'S' and word = %s
454 and info->>'class' = %s and info->>'type' = %s
455 and %s = coalesce(info->>'op', '-')
458 return len(to_delete)
460 def add_country_names(self, country_code: str, names: Mapping[str, str]) -> None:
461 """ Add default names for the given country to the search index.
463 # Make sure any name preprocessing for country names applies.
464 info = PlaceInfo({'name': names, 'country_code': country_code,
465 'rank_address': 4, 'class': 'boundary',
466 'type': 'administrative'})
467 self._add_country_full_names(country_code,
468 self.sanitizer.process_names(info)[0],
471 def _add_country_full_names(self, country_code: str, names: Sequence[PlaceName],
472 internal: bool = False) -> None:
473 """ Add names for the given country from an already sanitized
476 assert self.conn is not None
479 norm_name = self._normalized(name.name)
480 token_name = self._search_normalized(name.name)
481 if norm_name and token_name:
482 word_tokens.add((token_name, norm_name))
484 with self.conn.cursor() as cur:
486 cur.execute("""SELECT word_token,
488 coalesce(info ? 'internal', false) as is_internal
490 WHERE type = 'C' and info->>'cc' = %s""",
492 # internal/external names
493 existing_tokens: Dict[bool, Set[Tuple[str, str]]] = {True: set(), False: set()}
495 existing_tokens[word[2]].add((word[0], word[1]))
497 # Delete names that no longer exist.
498 gone_tokens = existing_tokens[internal] - word_tokens
500 gone_tokens.update(existing_tokens[False] & word_tokens)
502 cur.execute("""DELETE FROM word
503 USING jsonb_array_elements(%s) as data
504 WHERE type = 'C' and info->>'cc' = %s
505 and word_token = data->>0 and word = data->>1""",
506 (Jsonb(list(gone_tokens)), country_code))
508 # Only add those names that are not yet in the list.
509 new_tokens = word_tokens - existing_tokens[True]
511 new_tokens -= existing_tokens[False]
514 sql = """INSERT INTO word (word_token, type, word, info)
515 (SELECT data->>0, 'C', data->>1,
516 jsonb_build_object('internal', 'yes', 'cc', %s::text)
517 FROM jsonb_array_elements(%s) as data)
520 sql = """INSERT INTO word (word_token, type, word, info)
521 (SELECT data->>0, 'C', data->>1,
522 jsonb_build_object('cc', %s::text)
523 FROM jsonb_array_elements(%s) as data)
525 cur.execute(sql, (country_code, Jsonb(list(new_tokens))))
527 def process_place(self, place: PlaceInfo) -> Mapping[str, Any]:
528 """ Determine tokenizer information about the given place.
530 Returns a JSON-serializable structure that will be handed into
531 the database via the token_info field.
533 token_info = _TokenInfo()
535 names, address = self.sanitizer.process_names(place)
538 token_info.set_names(*self._compute_name_tokens(names))
540 if place.is_country():
541 assert place.country_code is not None
542 self._add_country_full_names(place.country_code, names)
545 self._process_place_address(token_info, address)
547 return token_info.to_dict()
549 def _process_place_address(self, token_info: '_TokenInfo',
550 address: Sequence[PlaceName]) -> None:
552 if item.kind == 'postcode':
553 token_info.set_postcode(self._add_postcode(item))
554 elif item.kind == 'housenumber':
555 token_info.add_housenumber(*self._compute_housenumber_token(item))
556 elif item.kind == 'street':
557 token_info.add_street(self._retrieve_full_tokens(item.name))
558 elif item.kind == 'place':
560 token_info.add_place(itertools.chain(*self._compute_name_tokens([item])))
561 elif (not item.kind.startswith('_') and not item.suffix and
562 item.kind not in ('country', 'full', 'inclusion')):
563 token_info.add_address_term(item.kind,
564 itertools.chain(*self._compute_name_tokens([item])))
566 def _compute_housenumber_token(self, hnr: PlaceName) -> Tuple[Optional[int], Optional[str]]:
567 """ Normalize the housenumber and return the word token and the
570 assert self.conn is not None
571 analyzer = self.token_analysis.analysis.get('@housenumber')
572 result: Tuple[Optional[int], Optional[str]] = (None, None)
575 # When no custom analyzer is set, simply normalize and transliterate
576 norm_name = self._search_normalized(hnr.name)
578 result = self._cache.housenumbers.get(norm_name, result)
579 if result[0] is None:
580 hid = execute_scalar(self.conn, "SELECT getorcreate_hnr_id(%s)", (norm_name, ))
582 result = hid, norm_name
583 self._cache.housenumbers[norm_name] = result
585 # Otherwise use the analyzer to determine the canonical name.
586 # Per convention we use the first variant as the 'lookup name', the
587 # name that gets saved in the housenumber field of the place.
588 word_id = analyzer.get_canonical_id(hnr)
590 result = self._cache.housenumbers.get(word_id, result)
591 if result[0] is None:
592 varout = analyzer.compute_variants(word_id)
593 if isinstance(varout, tuple):
598 hid = execute_scalar(self.conn, "SELECT create_analyzed_hnr_id(%s, %s)",
600 result = hid, variants[0]
601 self._cache.housenumbers[word_id] = result
605 def _retrieve_full_tokens(self, name: str) -> List[int]:
606 """ Get the full name token for the given name, if it exists.
607 The name is only retrieved for the standard analyser.
609 assert self.conn is not None
610 norm_name = self._search_normalized(name)
612 # return cached if possible
613 if norm_name in self._cache.fulls:
614 return self._cache.fulls[norm_name]
616 with self.conn.cursor() as cur:
617 cur.execute("SELECT word_id FROM word WHERE word_token = %s and type = 'W'",
619 full = [row[0] for row in cur]
621 self._cache.fulls[norm_name] = full
625 def _compute_name_tokens(self, names: Sequence[PlaceName]) -> Tuple[Set[int], Set[int]]:
626 """ Computes the full name and partial name tokens for the given
629 assert self.conn is not None
630 full_tokens: Set[int] = set()
631 partial_tokens: Set[int] = set()
634 analyzer_id = name.get_attr('analyzer')
635 analyzer = self.token_analysis.get_analyzer(analyzer_id)
636 word_id = analyzer.get_canonical_id(name)
637 if analyzer_id is None:
640 token_id = f'{word_id}@{analyzer_id}'
642 full, part = self._cache.names.get(token_id, (None, None))
644 varset = analyzer.compute_variants(word_id)
645 if isinstance(varset, tuple):
646 variants, lookups = varset
648 variants, lookups = varset, None
652 with self.conn.cursor() as cur:
653 cur.execute("SELECT * FROM getorcreate_full_word(%s, %s, %s)",
654 (token_id, variants, lookups))
655 full, part = cast(Tuple[int, List[int]], cur.fetchone())
657 self._cache.names[token_id] = (full, part)
659 assert part is not None
661 full_tokens.add(full)
662 partial_tokens.update(part)
664 return full_tokens, partial_tokens
666 def _add_postcode(self, item: PlaceName) -> Optional[str]:
667 """ Make sure the normalized postcode is present in the word table.
669 assert self.conn is not None
670 analyzer = self.token_analysis.analysis.get('@postcode')
673 return item.name.strip().upper()
675 return analyzer.get_canonical_id(item)
679 """ Collect token information to be sent back to the database.
681 def __init__(self) -> None:
682 self.names: Optional[str] = None
683 self.housenumbers: Set[str] = set()
684 self.housenumber_tokens: Set[int] = set()
685 self.street_tokens: Optional[Set[int]] = None
686 self.place_tokens: Set[int] = set()
687 self.address_tokens: Dict[str, str] = {}
688 self.postcode: Optional[str] = None
690 def _mk_array(self, tokens: Iterable[Any]) -> str:
691 return f"{{{','.join((str(s) for s in tokens))}}}"
693 def to_dict(self) -> Dict[str, Any]:
694 """ Return the token information in database importable format.
696 out: Dict[str, Any] = {}
699 out['names'] = self.names
701 if self.housenumbers:
702 out['hnr'] = ';'.join(self.housenumbers)
703 out['hnr_tokens'] = self._mk_array(self.housenumber_tokens)
705 if self.street_tokens is not None:
706 out['street'] = self._mk_array(self.street_tokens)
708 if self.place_tokens:
709 out['place'] = self._mk_array(self.place_tokens)
711 if self.address_tokens:
712 out['addr'] = self.address_tokens
715 out['postcode'] = self.postcode
719 def set_names(self, fulls: Iterable[int], partials: Iterable[int]) -> None:
720 """ Adds token information for the normalised names.
722 self.names = self._mk_array(itertools.chain(fulls, partials))
724 def add_housenumber(self, token: Optional[int], hnr: Optional[str]) -> None:
725 """ Extract housenumber information from a list of normalised
729 assert hnr is not None
730 self.housenumbers.add(hnr)
731 self.housenumber_tokens.add(token)
733 def add_street(self, tokens: Iterable[int]) -> None:
734 """ Add addr:street match terms.
736 if self.street_tokens is None:
737 self.street_tokens = set()
738 self.street_tokens.update(tokens)
740 def add_place(self, tokens: Iterable[int]) -> None:
741 """ Add addr:place search and match terms.
743 self.place_tokens.update(tokens)
745 def add_address_term(self, key: str, partials: Iterable[int]) -> None:
746 """ Add additional address terms.
748 array = self._mk_array(partials)
750 self.address_tokens[key] = array
752 def set_postcode(self, postcode: Optional[str]) -> None:
753 """ Set the postcode to the given one.
755 self.postcode = postcode
759 """ Cache for token information to avoid repeated database queries.
761 This cache is not thread-safe and needs to be instantiated per
764 def __init__(self) -> None:
765 self.names: Dict[str, Tuple[int, List[int]]] = {}
766 self.partials: Dict[str, int] = {}
767 self.fulls: Dict[str, List[int]] = {}
768 self.housenumbers: Dict[str, Tuple[Optional[int], Optional[str]]] = {}