]> git.openstreetmap.org Git - nominatim.git/commitdiff
import TIGER data into temporary table
authorSarah Hoffmann <lonvia@denofr.de>
Sat, 18 Aug 2012 08:22:35 +0000 (10:22 +0200)
committerSarah Hoffmann <lonvia@denofr.de>
Sat, 18 Aug 2012 08:30:42 +0000 (10:30 +0200)
Import TIGER data into a temporary table first that later replaces
the current location_property_tiger table. This way index creation
on the table can be delayed until after the import which should
speed up the import and result in significantly smaller indexes.

Also removed index on parent_place_id as it is covered by
idx_location_property_tiger_housenumber_parent_place_id.

sql/functions.sql
sql/tiger_import_finish.sql [new file with mode: 0644]
sql/tiger_import_start.sql [new file with mode: 0644]
utils/setup.php

index 2c4429bbb97165abb968daaf95dd75213a59b7c7..548362ad21230775dc9d0bd889cfa416d39649ec 100644 (file)
@@ -2582,93 +2582,6 @@ END;
 $$
 LANGUAGE plpgsql;
 
-CREATE OR REPLACE FUNCTION tigger_create_interpolation(linegeo GEOMETRY, in_startnumber INTEGER, 
-  in_endnumber INTEGER, interpolationtype TEXT, 
-  in_street TEXT, in_isin TEXT, in_postcode TEXT) RETURNS INTEGER
-  AS $$
-DECLARE
-  
-  startnumber INTEGER;
-  endnumber INTEGER;
-  stepsize INTEGER;
-  housenum INTEGER;
-  newpoints INTEGER;
-  numberrange INTEGER;
-  rangestartnumber INTEGER;
-  place_centroid GEOMETRY;
-  out_partition INTEGER;
-  out_parent_place_id BIGINT;
-  location RECORD;
-  address_street_word_id INTEGER;  
-
-BEGIN
-
-  IF in_endnumber > in_startnumber THEN
-    startnumber = in_startnumber;
-    endnumber = in_endnumber;
-  ELSE
-    startnumber = in_endnumber;
-    endnumber = in_startnumber;
-  END IF;
-
-  numberrange := endnumber - startnumber;
-  rangestartnumber := startnumber;
-
-  IF (interpolationtype = 'odd' AND startnumber%2 = 0) OR (interpolationtype = 'even' AND startnumber%2 = 1) THEN
-    startnumber := startnumber + 1;
-    stepsize := 2;
-  ELSE
-    IF (interpolationtype = 'odd' OR interpolationtype = 'even') THEN
-      stepsize := 2;
-    ELSE -- everything else assumed to be 'all'
-      stepsize := 1;
-    END IF;
-  END IF;
-
-  -- Filter out really broken tiger data
-  IF numberrange > 0 AND (numberrange::float/stepsize::float > 500) 
-                    AND ST_length(linegeo)/(numberrange::float/stepsize::float) < 0.000001 THEN
-    RAISE WARNING 'Road too short for number range % to % on %, % (%)',startnumber,endnumber,in_street,in_isin,
-                  ST_length(linegeo)/(numberrange::float/stepsize::float);    
-    RETURN 0;
-  END IF;
-
-  place_centroid := ST_Centroid(linegeo);
-  out_partition := get_partition(place_centroid, 'us');
-  out_parent_place_id := null;
-
-  address_street_word_id := get_name_id(make_standard_name(in_street));
-  IF address_street_word_id IS NOT NULL THEN
-    FOR location IN SELECT * from getNearestNamedRoadFeature(out_partition, place_centroid, address_street_word_id) LOOP
-      out_parent_place_id := location.place_id;
-    END LOOP;
-  END IF;
-
-  IF out_parent_place_id IS NULL THEN
-    FOR location IN SELECT place_id FROM getNearestParellelRoadFeature(out_partition, linegeo) LOOP
-      out_parent_place_id := location.place_id;
-    END LOOP;    
-  END IF;
-
-  IF out_parent_place_id IS NULL THEN
-    FOR location IN SELECT place_id FROM getNearestRoadFeature(out_partition, place_centroid) LOOP
-      out_parent_place_id := location.place_id;
-    END LOOP;    
-  END IF;
-
-  newpoints := 0;
-  FOR housenum IN startnumber..endnumber BY stepsize LOOP
-    insert into location_property_tiger (place_id, partition, parent_place_id, housenumber, postcode, centroid)
-    values (nextval('seq_place'), out_partition, out_parent_place_id, housenum, in_postcode,
-      ST_Line_Interpolate_Point(linegeo, (housenum::float-rangestartnumber::float)/numberrange::float));
-    newpoints := newpoints + 1;
-  END LOOP;
-
-  RETURN newpoints;
-END;
-$$
-LANGUAGE plpgsql;
-
 CREATE OR REPLACE FUNCTION aux_create_property(pointgeo GEOMETRY, in_housenumber TEXT, 
   in_street TEXT, in_isin TEXT, in_postcode TEXT, in_countrycode char(2)) RETURNS INTEGER
   AS $$
diff --git a/sql/tiger_import_finish.sql b/sql/tiger_import_finish.sql
new file mode 100644 (file)
index 0000000..a0f4efc
--- /dev/null
@@ -0,0 +1,12 @@
+CREATE INDEX idx_location_property_tiger_housenumber_parent_place_id_imp ON location_property_tiger_import (parent_place_id, housenumber);
+CREATE UNIQUE INDEX idx_location_property_tiger_place_id_imp ON location_property_tiger_import (place_id);
+
+GRANT SELECT ON location_property_tiger_import TO "www-data";
+
+DROP TABLE location_property_tiger;
+ALTER TABLE location_property_tiger_import RENAME TO location_property_tiger;
+
+ALTER INDEX idx_location_property_tiger_housenumber_parent_place_id_imp RENAME TO idx_location_property_tiger_housenumber_parent_place_id;
+ALTER INDEX idx_location_property_tiger_place_id_imp RENAME TO idx_location_property_tiger_place_id;
+
+DROP FUNCTION tigger_create_interpolation (linegeo geometry, in_startnumber integer, in_endnumber integer, interpolationtype text, in_street text, in_isin text, in_postcode text);
diff --git a/sql/tiger_import_start.sql b/sql/tiger_import_start.sql
new file mode 100644 (file)
index 0000000..f40f544
--- /dev/null
@@ -0,0 +1,89 @@
+DROP TABLE IF EXISTS location_property_tiger_import;
+CREATE TABLE location_property_tiger_import () INHERITS (location_property);
+
+CREATE OR REPLACE FUNCTION tigger_create_interpolation(linegeo GEOMETRY, in_startnumber INTEGER, 
+  in_endnumber INTEGER, interpolationtype TEXT, 
+  in_street TEXT, in_isin TEXT, in_postcode TEXT) RETURNS INTEGER
+  AS $$
+DECLARE
+  
+  startnumber INTEGER;
+  endnumber INTEGER;
+  stepsize INTEGER;
+  housenum INTEGER;
+  newpoints INTEGER;
+  numberrange INTEGER;
+  rangestartnumber INTEGER;
+  place_centroid GEOMETRY;
+  out_partition INTEGER;
+  out_parent_place_id BIGINT;
+  location RECORD;
+  address_street_word_id INTEGER;  
+
+BEGIN
+
+  IF in_endnumber > in_startnumber THEN
+    startnumber = in_startnumber;
+    endnumber = in_endnumber;
+  ELSE
+    startnumber = in_endnumber;
+    endnumber = in_startnumber;
+  END IF;
+
+  numberrange := endnumber - startnumber;
+  rangestartnumber := startnumber;
+
+  IF (interpolationtype = 'odd' AND startnumber%2 = 0) OR (interpolationtype = 'even' AND startnumber%2 = 1) THEN
+    startnumber := startnumber + 1;
+    stepsize := 2;
+  ELSE
+    IF (interpolationtype = 'odd' OR interpolationtype = 'even') THEN
+      stepsize := 2;
+    ELSE -- everything else assumed to be 'all'
+      stepsize := 1;
+    END IF;
+  END IF;
+
+  -- Filter out really broken tiger data
+  IF numberrange > 0 AND (numberrange::float/stepsize::float > 500) 
+                    AND ST_length(linegeo)/(numberrange::float/stepsize::float) < 0.000001 THEN
+    RAISE WARNING 'Road too short for number range % to % on %, % (%)',startnumber,endnumber,in_street,in_isin,
+                  ST_length(linegeo)/(numberrange::float/stepsize::float);    
+    RETURN 0;
+  END IF;
+
+  place_centroid := ST_Centroid(linegeo);
+  out_partition := get_partition(place_centroid, 'us');
+  out_parent_place_id := null;
+
+  address_street_word_id := get_name_id(make_standard_name(in_street));
+  IF address_street_word_id IS NOT NULL THEN
+    FOR location IN SELECT * from getNearestNamedRoadFeature(out_partition, place_centroid, address_street_word_id) LOOP
+      out_parent_place_id := location.place_id;
+    END LOOP;
+  END IF;
+
+  IF out_parent_place_id IS NULL THEN
+    FOR location IN SELECT place_id FROM getNearestParellelRoadFeature(out_partition, linegeo) LOOP
+      out_parent_place_id := location.place_id;
+    END LOOP;    
+  END IF;
+
+  IF out_parent_place_id IS NULL THEN
+    FOR location IN SELECT place_id FROM getNearestRoadFeature(out_partition, place_centroid) LOOP
+      out_parent_place_id := location.place_id;
+    END LOOP;    
+  END IF;
+
+  newpoints := 0;
+  FOR housenum IN startnumber..endnumber BY stepsize LOOP
+    insert into location_property_tiger_import (place_id, partition, parent_place_id, housenumber, postcode, centroid)
+    values (nextval('seq_place'), out_partition, out_parent_place_id, housenum, in_postcode,
+      ST_Line_Interpolate_Point(linegeo, (housenum::float-rangestartnumber::float)/numberrange::float));
+    newpoints := newpoints + 1;
+  END LOOP;
+
+  RETURN newpoints;
+END;
+$$
+LANGUAGE plpgsql;
index c842ba33e40b927b2ff1ab23fc0770dcb608b236..049ef860d2a9b933a9facee787e3ec92dc6be3a2 100755 (executable)
        {
                $bDidSomething = true;
 
+               pgsqlRunScriptFile(CONST_BasePath.'/sql/tiger_import_start.sql');
+
                $aDBInstances = array();
                for($i = 0; $i < $iInstances; $i++)
                {
                        }
                        echo "\n";
                }
+
+               echo "Creating indexes\n";
+               pgsqlRunScriptFile(CONST_BasePath.'/sql/tiger_import_finish.sql');
        }
 
        if ($aCMDResult['calculate-postcodes'] || $aCMDResult['all'])