8 #include <libxml/xmlstring.h>
9 #include <libxml/xmlreader.h>
10 #include <libxml/hash.h>
12 #include "nominatim.h"
16 typedef enum { FILETYPE_NONE, FILETYPE_STRUCTUREDV0P1 } filetypes_t;
17 typedef enum { FILEMODE_NONE, FILEMODE_ADD, FILEMODE_UPDATE, FILEMODE_DELETE } filemodes_t;
19 #define MAX_FEATUREADDRESS 5000
20 #define MAX_FEATURENAMES 10000
21 #define MAX_FEATUREEXTRATAGS 10000
22 #define MAX_FEATURENAMESTRING 1000000
23 #define MAX_FEATUREEXTRATAGSTRING 500000
25 struct feature_address
50 xmlChar * rankAddress;
52 xmlChar * countryCode;
54 xmlChar * houseNumber;
58 int fileType = FILETYPE_NONE;
59 int fileMode = FILEMODE_ADD;
61 struct feature_address featureAddress[MAX_FEATUREADDRESS];
62 struct feature_tag featureName[MAX_FEATURENAMES];
63 struct feature_tag featureExtraTag[MAX_FEATUREEXTRATAGS];
64 struct feature feature;
65 int featureAddressLines = 0;
66 int featureNameLines = 0;
67 int featureExtraTagLines = 0;
69 xmlHashTablePtr partionTableTagsHash;
70 char featureNameString[MAX_FEATURENAMESTRING];
71 char featureExtraTagString[MAX_FEATUREEXTRATAGSTRING];
73 void StartElement(xmlTextReaderPtr reader, const xmlChar *name)
79 if (fileType == FILETYPE_NONE)
81 // Potential to handle other file types in the future / versions
82 if (xmlStrEqual(name, BAD_CAST "osmStructured"))
84 value = (char*)xmlTextReaderGetAttribute(reader, BAD_CAST "version");
85 version = strtof(value, NULL);
88 if (version == (float)0.1)
90 fileType = FILETYPE_STRUCTUREDV0P1;
91 fileMode = FILEMODE_ADD;
95 fprintf( stderr, "Unknown osmStructured version %f (%s)\n", version, value );
101 fprintf( stderr, "Unknown XML document type: %s\n", name );
107 if (xmlStrEqual(name, BAD_CAST "add"))
109 fileMode = FILEMODE_ADD;
112 if (xmlStrEqual(name, BAD_CAST "update"))
114 fileMode = FILEMODE_UPDATE;
117 if (xmlStrEqual(name, BAD_CAST "delete"))
119 fileMode = FILEMODE_DELETE;
122 if (fileMode == FILEMODE_NONE)
124 fprintf( stderr, "Unknown import mode in: %s\n", name );
128 if (xmlStrEqual(name, BAD_CAST "feature"))
130 feature.placeID = xmlTextReaderGetAttribute(reader, BAD_CAST "place_id");
131 feature.type = xmlTextReaderGetAttribute(reader, BAD_CAST "type");
132 feature.id = xmlTextReaderGetAttribute(reader, BAD_CAST "id");
133 feature.key = xmlTextReaderGetAttribute(reader, BAD_CAST "key");
134 feature.value = xmlTextReaderGetAttribute(reader, BAD_CAST "value");
135 feature.rankAddress = xmlTextReaderGetAttribute(reader, BAD_CAST "rank");
136 feature.rankSearch = xmlTextReaderGetAttribute(reader, BAD_CAST "importance");
138 feature.countryCode = NULL;
139 feature.adminLevel = NULL;
140 feature.houseNumber = NULL;
141 feature.geometry = NULL;
142 featureAddressLines = 0;
143 featureNameLines = 0;
144 featureExtraTagLines = 0;
148 if (xmlStrEqual(name, BAD_CAST "names")) return;
149 if (xmlStrEqual(name, BAD_CAST "name"))
151 if (featureNameLines < MAX_FEATURENAMES)
153 featureName[featureNameLines].type = xmlTextReaderGetAttribute(reader, BAD_CAST "type");
154 featureName[featureNameLines].value = xmlTextReaderReadString(reader);
159 fprintf( stderr, "Too many name elements (%s%s)\n", feature.type, feature.id);
164 if (xmlStrEqual(name, BAD_CAST "tags")) return;
165 if (xmlStrEqual(name, BAD_CAST "tag"))
167 if (featureExtraTagLines < MAX_FEATUREEXTRATAGS)
169 featureExtraTag[featureExtraTagLines].type = xmlTextReaderGetAttribute(reader, BAD_CAST "type");
170 featureExtraTag[featureExtraTagLines].value = xmlTextReaderReadString(reader);
171 featureExtraTagLines++;
175 fprintf( stderr, "Too many extra tag elements (%s%s)\n", feature.type, feature.id);
180 if (xmlStrEqual(name, BAD_CAST "osmGeometry"))
182 feature.geometry = xmlTextReaderReadString(reader);
185 if (xmlStrEqual(name, BAD_CAST "adminLevel"))
187 feature.adminLevel = xmlTextReaderReadString(reader);
190 if (xmlStrEqual(name, BAD_CAST "countryCode"))
192 feature.countryCode = xmlTextReaderReadString(reader);
195 if (xmlStrEqual(name, BAD_CAST "houseNumber"))
197 feature.houseNumber = xmlTextReaderReadString(reader);
200 if (xmlStrEqual(name, BAD_CAST "address"))
202 featureAddressLines = 0;
206 if (xmlStrEqual(name, BAD_CAST "continent"))
210 else if (xmlStrEqual(name, BAD_CAST "sea"))
214 else if (xmlStrEqual(name, BAD_CAST "country"))
218 else if (xmlStrEqual(name, BAD_CAST "state"))
222 else if (xmlStrEqual(name, BAD_CAST "county"))
226 else if (xmlStrEqual(name, BAD_CAST "city"))
230 else if (xmlStrEqual(name, BAD_CAST "town"))
234 else if (xmlStrEqual(name, BAD_CAST "village"))
238 else if (xmlStrEqual(name, BAD_CAST "unknown"))
242 else if (xmlStrEqual(name, BAD_CAST "suburb"))
246 else if (xmlStrEqual(name, BAD_CAST "postcode"))
250 else if (xmlStrEqual(name, BAD_CAST "neighborhood"))
254 else if (xmlStrEqual(name, BAD_CAST "street"))
258 else if (xmlStrEqual(name, BAD_CAST "access"))
262 else if (xmlStrEqual(name, BAD_CAST "building"))
266 else if (xmlStrEqual(name, BAD_CAST "other"))
272 if (featureAddressLines < MAX_FEATUREADDRESS)
274 value = (char*)xmlTextReaderGetAttribute(reader, BAD_CAST "rank");
277 fprintf( stderr, "Address element missing rank\n");
280 featureAddress[featureAddressLines].rankAddress = atoi(value);
283 value = (char*)xmlTextReaderGetAttribute(reader, BAD_CAST "isaddress");
286 fprintf( stderr, "Address element missing rank\n");
289 if (*value == 't') strcpy(featureAddress[featureAddressLines].isAddress, "t");
290 else strcpy(featureAddress[featureAddressLines].isAddress, "f");
293 featureAddress[featureAddressLines].type = xmlTextReaderGetAttribute(reader, BAD_CAST "type");
294 featureAddress[featureAddressLines].id = xmlTextReaderGetAttribute(reader, BAD_CAST "id");
295 featureAddress[featureAddressLines].key = xmlTextReaderGetAttribute(reader, BAD_CAST "key");
296 featureAddress[featureAddressLines].value = xmlTextReaderGetAttribute(reader, BAD_CAST "value");
297 featureAddress[featureAddressLines].distance = xmlTextReaderGetAttribute(reader, BAD_CAST "distance");
299 featureAddressLines++;
303 fprintf( stderr, "Too many address elements (%s%s)\n", feature.type, feature.id);
309 fprintf(stderr, "%s: Unknown element name: %s\n", __FUNCTION__, name);
312 void EndElement(xmlTextReaderPtr reader, const xmlChar *name)
315 PGresult * resPlaceID;
316 const char * paramValues[11];
318 char * partionQueryName;
319 int i, namePos, lineTypeLen, lineValueLen;
321 if (xmlStrEqual(name, BAD_CAST "feature"))
324 if (featureCount % 1000 == 0) printf("feature %i(k)\n", featureCount/1000);
326 if (fileMode == FILEMODE_ADD)
328 resPlaceID = PQexecPrepared(conn, "get_new_place_id", 0, NULL, NULL, NULL, 0);
329 if (PQresultStatus(resPlaceID) != PGRES_TUPLES_OK)
331 fprintf(stderr, "get_place_id: INSERT failed: %s", PQerrorMessage(conn));
338 paramValues[0] = (const char *)feature.type;
339 paramValues[1] = (const char *)feature.id;
340 paramValues[2] = (const char *)feature.key;
341 paramValues[3] = (const char *)feature.value;
342 resPlaceID = PQexecPrepared(conn, "get_new_place_id", 4, paramValues, NULL, NULL, 0);
343 if (PQresultStatus(resPlaceID) != PGRES_TUPLES_OK)
345 fprintf(stderr, "index_placex: INSERT failed: %s", PQerrorMessage(conn));
351 place_id = feature.placeID;
353 if (fileMode == FILEMODE_UPDATE || fileMode == FILEMODE_DELETE)
355 paramValues[0] = (const char *)place_id;
356 res = PQexecPrepared(conn, "placex_delete", 1, paramValues, NULL, NULL, 0);
357 if (PQresultStatus(res) != PGRES_COMMAND_OK)
359 fprintf(stderr, "placex_delete: DELETE failed: %s", PQerrorMessage(conn));
365 res = PQexecPrepared(conn, "search_name_delete", 1, paramValues, NULL, NULL, 0);
366 if (PQresultStatus(res) != PGRES_COMMAND_OK)
368 fprintf(stderr, "search_name_delete: DELETE failed: %s", PQerrorMessage(conn));
374 res = PQexecPrepared(conn, "place_addressline_delete", 1, paramValues, NULL, NULL, 0);
375 if (PQresultStatus(res) != PGRES_COMMAND_OK)
377 fprintf(stderr, "place_addressline_delete: DELETE failed: %s", PQerrorMessage(conn));
384 if (fileMode == FILEMODE_UPDATE || fileMode == FILEMODE_ADD)
386 // Insert into placex
387 paramValues[0] = (const char *)place_id;
388 paramValues[1] = (const char *)feature.type;
389 paramValues[2] = (const char *)feature.id;
390 paramValues[3] = (const char *)feature.key;
391 paramValues[4] = (const char *)feature.value;
393 featureNameString[0] = 0;
394 if (featureNameLines)
399 for (i = 0; i < featureNameLines; i++)
401 lineTypeLen = strlen(BAD_CAST featureName[i].type);
402 lineValueLen = strlen(BAD_CAST featureName[i].value);
403 if (namePos+lineTypeLen+lineValueLen+7 > MAX_FEATURENAMESTRING)
405 fprintf(stderr, "feature name too long: %s", (const char *)featureName[i].value);
408 if (namePos) strcpy(featureNameString+(namePos++), ",");
409 strcpy(featureNameString+(namePos++), "\"");
410 strcpy(featureNameString+namePos, BAD_CAST featureName[i].type);
411 namePos += lineTypeLen;
412 strcpy(featureNameString+namePos, "\"=>\"");
414 strcpy(featureNameString+namePos, BAD_CAST featureName[i].value);
415 namePos += lineValueLen;
416 strcpy(featureNameString+(namePos++), "\"");
419 paramValues[5] = (const char *)featureNameString;
421 featureExtraTagString[0] = 0;
422 if (featureExtraTagLines)
427 for (i = 0; i < featureExtraTagLines; i++)
429 lineTypeLen = strlen(BAD_CAST featureExtraTag[i].type);
430 lineValueLen = strlen(BAD_CAST featureExtraTag[i].value);
431 if (namePos+lineTypeLen+lineValueLen+7 > MAX_FEATUREEXTRATAGSTRING)
433 fprintf(stderr, "feature extra tag too long: %s", (const char *)featureExtraTag[i].value);
436 if (namePos) strcpy(featureExtraTagString+(namePos++),",");
437 strcpy(featureExtraTagString+(namePos++), "\"");
438 strcpy(featureExtraTagString+namePos, BAD_CAST featureExtraTag[i].type);
439 namePos += lineTypeLen;
440 strcpy(featureExtraTagString+namePos, "\"=>\"");
442 strcpy(featureExtraTagString+namePos, BAD_CAST featureExtraTag[i].value);
443 namePos += lineValueLen;
444 strcpy(featureExtraTagString+(namePos++), "\"");
447 paramValues[6] = (const char *)featureExtraTagString;
449 paramValues[7] = (const char *)feature.adminLevel;
450 paramValues[8] = (const char *)feature.houseNumber;
451 paramValues[9] = (const char *)feature.rankAddress;
452 paramValues[10] = (const char *)feature.rankSearch;
453 paramValues[11] = (const char *)feature.geometry;
454 res = PQexecPrepared(conn, "placex_insert", 12, paramValues, NULL, NULL, 0);
455 if (PQresultStatus(res) != PGRES_COMMAND_OK)
457 fprintf(stderr, "index_placex: INSERT failed: %s", PQerrorMessage(conn));
463 for (i = 0; i < featureAddressLines; i++)
465 // insert into place_address
466 paramValues[0] = (const char *)place_id;
467 paramValues[1] = (const char *)featureAddress[i].distance;
468 paramValues[2] = (const char *)featureAddress[i].type;
469 paramValues[3] = (const char *)featureAddress[i].id;
470 paramValues[4] = (const char *)featureAddress[i].key;
471 paramValues[5] = (const char *)featureAddress[i].value;
472 paramValues[6] = (const char *)featureAddress[i].isAddress;
473 res = PQexecPrepared(conn, "place_addressline_insert", 7, paramValues, NULL, NULL, 0);
474 if (PQresultStatus(res) != PGRES_COMMAND_OK)
476 fprintf(stderr, "place_addressline_insert: INSERT failed: %s", PQerrorMessage(conn));
482 xmlFree(featureAddress[i].type);
483 xmlFree(featureAddress[i].id);
484 xmlFree(featureAddress[i].key);
485 xmlFree(featureAddress[i].value);
486 xmlFree(featureAddress[i].distance);
489 if (featureNameLines)
491 paramValues[0] = (const char *)place_id;
492 res = PQexecPrepared(conn, "search_name_insert", 1, paramValues, NULL, NULL, 0);
493 if (PQresultStatus(res) != PGRES_COMMAND_OK)
495 fprintf(stderr, "search_name_insert: INSERT failed: %s", PQerrorMessage(conn));
502 partionQueryName = xmlHashLookup2(partionTableTagsHash, feature.key, feature.value);
503 if (partionQueryName)
505 // insert into partition table
506 paramValues[0] = (const char *)place_id;
507 paramValues[1] = (const char *)feature.geometry;
508 res = PQexecPrepared(conn, partionQueryName, 2, paramValues, NULL, NULL, 0);
509 if (PQresultStatus(res) != PGRES_COMMAND_OK)
511 fprintf(stderr, "%s: INSERT failed: %s", partionQueryName, PQerrorMessage(conn));
522 for (i = 0; i < featureAddressLines; i++)
524 xmlFree(featureAddress[i].type);
525 xmlFree(featureAddress[i].id);
526 xmlFree(featureAddress[i].key);
527 xmlFree(featureAddress[i].value);
528 xmlFree(featureAddress[i].distance);
532 xmlFree(feature.placeID);
533 xmlFree(feature.type);
535 xmlFree(feature.key);
536 xmlFree(feature.value);
537 xmlFree(feature.rankAddress);
538 xmlFree(feature.rankSearch);
539 // if (feature.name) xmlFree(feature.name);
540 if (feature.countryCode) xmlFree(feature.countryCode);
541 if (feature.adminLevel) xmlFree(feature.adminLevel);
542 if (feature.houseNumber) xmlFree(feature.houseNumber);
543 if (feature.geometry) xmlFree(feature.geometry);
545 // PQclear(resPlaceID);
549 static void processNode(xmlTextReaderPtr reader)
552 name = xmlTextReaderName(reader);
555 name = xmlStrdup(BAD_CAST "--");
558 switch (xmlTextReaderNodeType(reader))
560 case XML_READER_TYPE_ELEMENT:
561 StartElement(reader, name);
562 if (xmlTextReaderIsEmptyElement(reader))
563 EndElement(reader, name); /* No end_element for self closing tags! */
565 case XML_READER_TYPE_END_ELEMENT:
566 EndElement(reader, name);
568 case XML_READER_TYPE_TEXT:
569 case XML_READER_TYPE_CDATA:
570 case XML_READER_TYPE_SIGNIFICANT_WHITESPACE:
574 fprintf(stderr, "Unknown node type %d\n", xmlTextReaderNodeType(reader));
581 int nominatim_import(const char *conninfo, const char *partionTagsFilename, const char *filename)
583 xmlTextReaderPtr reader;
586 FILE * partionTagsFile;
587 char * partionQueryName;
588 char partionQuerySQL[1024];
590 conn = PQconnectdb(conninfo);
591 if (PQstatus(conn) != CONNECTION_OK)
593 fprintf(stderr, "Connection to database failed: %s\n", PQerrorMessage(conn));
597 partionTableTagsHash = xmlHashCreate(200);
599 partionTagsFile = fopen(partionTagsFilename, "rt");
600 if (!partionTagsFile)
602 fprintf(stderr, "Unable to read partition tags file: %s\n", partionTagsFilename);
606 char buffer[1024], osmkey[256], osmvalue[256];
608 while (fgets(buffer, sizeof(buffer), partionTagsFile) != NULL)
610 fields = sscanf( buffer, "%23s %63s", osmkey, osmvalue );
612 if ( fields <= 0 ) continue;
616 fprintf( stderr, "Error partition file\n");
619 partionQueryName = malloc(strlen("partition_insert_")+strlen(osmkey)+strlen(osmvalue)+2);
620 strcpy(partionQueryName, "partition_insert_");
621 strcat(partionQueryName, osmkey);
622 strcat(partionQueryName, "_");
623 strcat(partionQueryName, osmvalue);
625 strcpy(partionQuerySQL, "insert into place_classtype_");
626 strcat(partionQuerySQL, osmkey);
627 strcat(partionQuerySQL, "_");
628 strcat(partionQuerySQL, osmvalue);
629 strcat(partionQuerySQL, " (place_id, centroid) values ($1, ST_Centroid(st_setsrid($2, 4326)))");
631 res = PQprepare(conn, partionQueryName, partionQuerySQL, 2, NULL);
632 if (PQresultStatus(res) != PGRES_COMMAND_OK)
634 fprintf(stderr, "Failed to prepare %s: %s\n", partionQueryName, PQerrorMessage(conn));
638 xmlHashAddEntry2(partionTableTagsHash, BAD_CAST osmkey, BAD_CAST osmvalue, BAD_CAST partionQueryName);
641 res = PQprepare(conn, "get_new_place_id",
642 "select nextval('seq_place')",
644 if (PQresultStatus(res) != PGRES_COMMAND_OK)
646 fprintf(stderr, "Failed to prepare get_new_place_id: %s\n", PQerrorMessage(conn));
650 res = PQprepare(conn, "get_place_id",
651 "select place_id from placex where osm_type = $1 and osm_id = $2 and class = $3 and type = $4",
653 if (PQresultStatus(res) != PGRES_COMMAND_OK)
655 fprintf(stderr, "Failed to prepare get_place_id: %s\n", PQerrorMessage(conn));
659 res = PQprepare(conn, "placex_insert",
660 "insert into placex (place_id,osm_type,osm_id,class,type,name,extratags,admin_level,housenumber,rank_address,rank_search,geometry) "
661 "values ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11, st_setsrid($12, 4326))",
663 if (PQresultStatus(res) != PGRES_COMMAND_OK)
665 fprintf(stderr, "Failed to prepare placex_insert: %s\n", PQerrorMessage(conn));
669 res = PQprepare(conn, "search_name_insert",
670 "insert into search_name (place_id, search_rank, address_rank, country_code, name_vector, nameaddress_vector, centroid) "
671 "select place_id, rank_address, rank_search, country_code, make_keywords(name), "
672 "(select uniq(sort(array_agg(name_vector))) from place_addressline join search_name on "
673 "(address_place_id = search_name.place_id) where place_addressline.place_id = $1 ), st_centroid(geometry) from placex "
674 "where place_id = $1",
676 if (PQresultStatus(res) != PGRES_COMMAND_OK)
678 fprintf(stderr, "Failed to prepare search_name_insert: %s\n", PQerrorMessage(conn));
682 res = PQprepare(conn, "place_addressline_insert",
683 "insert into place_addressline (place_id, address_place_id, fromarea, isaddress, distance, cached_rank_address) "
684 "select $1, place_id, false, $7, $2, rank_address from placex where osm_type = $3 and osm_id = $4 and class = $5 and type = $6",
686 if (PQresultStatus(res) != PGRES_COMMAND_OK)
688 fprintf(stderr, "Failed to prepare place_addressline_insert: %s\n", PQerrorMessage(conn));
692 res = PQprepare(conn, "placex_delete",
693 "delete from placex where place_id = $1",
695 if (PQresultStatus(res) != PGRES_COMMAND_OK)
697 fprintf(stderr, "Failed to prepare placex_delete: %s\n", PQerrorMessage(conn));
701 res = PQprepare(conn, "search_name_delete",
702 "delete from search_name where place_id = $1",
704 if (PQresultStatus(res) != PGRES_COMMAND_OK)
706 fprintf(stderr, "Failed to prepare search_name_delete: %s\n", PQerrorMessage(conn));
710 res = PQprepare(conn, "place_addressline_delete",
711 "delete from place_addressline where place_id = $1",
713 if (PQresultStatus(res) != PGRES_COMMAND_OK)
715 fprintf(stderr, "Failed to prepare place_addressline_delete: %s\n", PQerrorMessage(conn));
721 reader = inputUTF8(filename);
725 fprintf(stderr, "Unable to open %s\n", filename);
729 ret = xmlTextReaderRead(reader);
733 ret = xmlTextReaderRead(reader);
737 fprintf(stderr, "%s : failed to parse\n", filename);
741 xmlFreeTextReader(reader);
742 xmlHashFree(partionTableTagsHash, NULL);