]> git.openstreetmap.org Git - chef.git/commitdiff
Add redirects to planet S3
authorGrant Slater <github@firefishy.com>
Mon, 25 Sep 2023 17:16:28 +0000 (18:16 +0100)
committerGrant <github@firefishy.com>
Tue, 26 Sep 2023 00:17:30 +0000 (01:17 +0100)
cookbooks/planet/recipes/default.rb
cookbooks/planet/templates/default/apache-s3-ip2region.erb [new file with mode: 0644]
cookbooks/planet/templates/default/apache.erb

index fdeb9b8f55a61c8ad4896ede88b2ace549004ee9..db55c246dbd4335ba52efbdbc7433655473028ce 100644 (file)
@@ -24,8 +24,9 @@ include_recipe "planet::aws"
 include_recipe "munin"
 
 package %w[
-  perl
   php-cli
+  python3
+  python3-geoip2
 ]
 
 remote_directory "/store/planet#html" do
@@ -109,6 +110,14 @@ template "/usr/local/bin/apache-latest-planet-filename" do
   notifies :restart, "service[apache2]"
 end
 
+template "/usr/local/bin/apache-s3-ip2region" do
+  source "apache-s3-ip2region.erb"
+  owner "root"
+  group "root"
+  mode "755"
+  notifies :restart, "service[apache2]"
+end
+
 apache_module "cgid"
 apache_module "rewrite"
 apache_module "proxy_http"
diff --git a/cookbooks/planet/templates/default/apache-s3-ip2region.erb b/cookbooks/planet/templates/default/apache-s3-ip2region.erb
new file mode 100644 (file)
index 0000000..92f438b
--- /dev/null
@@ -0,0 +1,81 @@
+#!/usr/bin/env python3
+
+import sys
+import os
+import geoip2.database
+import ipaddress
+
+# Constants
+DB_PATH = "<%= node[:geoipupdate][:directory] %>/GeoLite2-Country.mmdb"
+
+# Default region when continent doesn't match any in the dictionary
+DEFAULT_REGION = "eu-central-1"
+
+# Mapping of continents to AWS regions
+CONTINENT_TO_AWS_REGION = {
+    "NA": "us-west-2", # North America
+    "OC": "us-west-2", # Oceania
+    "SA": "us-west-2", # South America
+}
+
+# Global to store last known modification time and database reader
+last_mod_time = None
+reader = None
+
+def is_valid_ip(ip_str):
+    """Check if a string is a valid IPv4 or IPv6 address."""
+    try:
+        ipaddress.ip_address(ip_str)
+        return True
+    except ValueError:
+        return False
+
+def get_reader():
+    """Get the geoip2 database reader. Reload if the DB file has changed."""
+    global last_mod_time
+    global reader
+
+    if not os.path.exists(DB_PATH):
+      return None  # Database file missing
+
+    current_mod_time = os.path.getmtime(DB_PATH)
+
+    # If file has changed or reader isn't initialized, reload it
+    if reader is None or current_mod_time != last_mod_time:
+        if reader:
+            reader.close()  # Close the existing reader before reinitializing
+        reader = geoip2.database.Reader(DB_PATH)
+        last_mod_time = current_mod_time
+
+    return reader
+
+def get_continent_from_ip(ip_address):
+    """Return the continent for a given IP address."""
+    if not is_valid_ip(ip_address):
+        return None
+    reader = get_reader()
+    if reader is None:
+      return None  # No continent as DB is missing
+    try:
+        response = reader.country(ip_address)
+        return response.continent.code
+    except:
+        return None  # Indicates invalid IP address or other issues
+
+def determine_aws_region(continent_code):
+    """Determine AWS region based on the continent code using a dictionary."""
+    return CONTINENT_TO_AWS_REGION.get(continent_code, DEFAULT_REGION)
+
+def main():
+    """Main function to process IP addresses from stdin and return AWS regions."""
+    for line in sys.stdin:
+        ip_address = line.strip()
+
+        continent_code = get_continent_from_ip(ip_address)
+        aws_region = determine_aws_region(continent_code)
+
+        sys.stdout.write(f"{aws_region}\n")
+        sys.stdout.flush()
+
+if __name__ == "__main__":
+    main()
index faddc0589ea63b1352a0356f26c98c09d0a70d1e..6d76783af12dffff011784d2108b1be4e3201eaa 100644 (file)
 
         RewriteEngine on
         RewriteMap latestplanet prg:/usr/local/bin/apache-latest-planet-filename
+        RewriteMap ip2region prg:/usr/local/bin/apache-s3-ip2region
 
-        #Direct, no redirect for the following
+        # Direct, no redirect for the following
         RewriteCond %{REMOTE_ADDR}  ^127\.                          [OR]
         RewriteCond %{REMOTE_ADDR}  ^10\.                             [OR]
-        RewriteCond %{REMOTE_ADDR}  ^193\.60\.       [OR]
-        RewriteCond %{REMOTE_ADDR}  ^193\.61\.       [OR]
-        RewriteCond %{REMOTE_ADDR}  ^193\.62\.       [OR]
-        RewriteCond %{REMOTE_ADDR}  ^193\.63\.       [OR]
         RewriteCond %{QUERY_STRING} nomirror
-        RewriteRule    .*      - [L]
+        RewriteRule    .* - [L]
 
+        # Use RewriteMap latestplanet to redirect -latest symlink to resolved file eg: planet-latest.osm.bz2 -> 2023/planet-230918.osm.bz2
         RewriteRule ^(/planet/planet\-latest\.osm\.bz2(\.torrent)?)$                ${latestplanet:$1} [R,L]
         RewriteRule ^(/planet/full\-history/history\-latest\.osm\.bz2(\.torrent)?)$ ${latestplanet:$1} [R,L]
         RewriteRule ^(/planet/changesets\-latest\.osm\.bz2(\.torrent)?)$            ${latestplanet:$1} [R,L]
         RewriteRule ^(/cc\-by\-sa/changesets\-latest\.osm\.bz2)$        ${latestplanet:$1} [R,L]
         RewriteRule ^(/cc\-by\-sa/relations\-latest\.osm\.bz2)$         ${latestplanet:$1} [R,L]
 
+        RewriteRule ^(/notes/planet\-notes\-latest\.osn\.bz2)$         ${latestplanet:$1} [R,L]
+
         # Block an abusive fake user agent
         RewriteCond %{HTTP_USER_AGENT} "=Mozilla/5.0 (Linux; Android) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/34.0.1847.131 Safari/537.36"
         RewriteRule /.*$ - [F,L]
 
+        <% start_year = 2008 %>
+        <% current_year = Time.now.year %>
+        <% (start_year..current_year).each do |year| %>
+          <% year_two = sprintf('%02d', year % 100) %>
+          <% if year == current_year %>
+        RewriteRule ^/pbf/(planet\-<%= year_two %>[0-1][0-9][0-3][0-9]\.osm\.pbf(\.torrent|\.md5)?)$ https://osm-planet-eu-central-1.s3.amazonaws.com/planet/pbf/<%= year %>/$1 [R,L]
+        RewriteRule ^/pbf/full\-history/(history\-<%= year_two %>[0-1][0-9][0-3][0-9]\.osm\.pbf(\.torrent|\.md5)?)$ https://osm-planet-eu-central-1.s3.amazonaws.com/planet-full-history/pbf/<%= year %>/$1 [R,L]
+        RewriteRule ^/planet/<%= year %>/(planet\-<%= year_two %>[0-1][0-9][0-3][0-9]\.osm\.bz2(\.torrent|\.md5)?)$ https://osm-planet-eu-central-1.s3.amazonaws.com/planet/osm/<%= year %>/$1 [R,L]
+        RewriteRule ^/planet/<%= year %>/(changesets\-<%= year_two %>[0-1][0-9][0-3][0-9]\.osm\.bz2(\.torrent|\.md5)?)$ https://osm-planet-eu-central-1.s3.amazonaws.com/changesets/osm/<%= year %>/$1 [R,L]
+        RewriteRule ^/planet/<%= year %>/(discussions\-<%= year_two %>[0-1][0-9][0-3][0-9]\.osm\.bz2(\.torrent|\.md5)?)$ https://osm-planet-eu-central-1.s3.amazonaws.com/discussions/osm/<%= year %>/$1 [R,L]
+        RewriteRule ^/planet/<%= year %>/full\-history/(history\-<%= year_two %>[0-1][0-9][0-3][0-9]\.osm\.bz2(\.torrent|\.md5)?)$ https://osm-planet-eu-central-1.s3.amazonaws.com/planet-full-history/osm/<%= year %>/$1 [R,L]
+          <% else %>
+        RewriteRule ^/pbf/(planet\-<%= year_two %>[0-1][0-9][0-3][0-9]\.osm\.pbf(\.torrent|\.md5)?)$ https://osm-planet-${ip2region:%{REMOTE_ADDR}|eu-central-1}.s3.amazonaws.com/planet/pbf/<%= year %>/$1 [R,L]
+        RewriteRule ^/pbf/full\-history/(history\-<%= year_two %>[0-1][0-9][0-3][0-9]\.osm\.pbf(\.torrent|\.md5)?)$ https://osm-planet-${ip2region:%{REMOTE_ADDR}|eu-central-1}.s3.amazonaws.com/planet-full-history/pbf/<%= year %>/$1 [R,L]
+        RewriteRule ^/planet/<%= year %>/(planet\-<%= year_two %>[0-1][0-9][0-3][0-9]\.osm\.bz2(\.torrent|\.md5)?)$ https://osm-planet-${ip2region:%{REMOTE_ADDR}|eu-central-1}.s3.amazonaws.com/planet/osm/<%= year %>/$1 [R,L]
+        RewriteRule ^/planet/<%= year %>/(changesets\-<%= year_two %>[0-1][0-9][0-3][0-9]\.osm\.bz2(\.torrent|\.md5)?)$ https://osm-planet-${ip2region:%{REMOTE_ADDR}|eu-central-1}.s3.amazonaws.com/changesets/osm/<%= year %>/$1 [R,L]
+        RewriteRule ^/planet/<%= year %>/(discussions\-<%= year_two %>[0-1][0-9][0-3][0-9]\.osm\.bz2(\.torrent|\.md5)?)$ https://osm-planet-${ip2region:%{REMOTE_ADDR}|eu-central-1}.s3.amazonaws.com/discussions/osm/<%= year %>/$1 [R,L]
+        RewriteRule ^/planet/<%= year %>/full\-history/(history\-<%= year_two %>[0-1][0-9][0-3][0-9]\.osm\.bz2(\.torrent|\.md5)?)$ https://osm-planet-${ip2region:%{REMOTE_ADDR}|eu-central-1}.s3.amazonaws.com/planet-full-history/osm/<%= year %>/$1 [R,L]
+          <% end %>
+        <% end %>
+
         # Temporary download rate limit due to upstream fibre connection issues. - 13 Jan 2020
         <FilesMatch ".+\.(7z|bz2|gz|pbf|tgz|xz)$">
           SetOutputFilter RATE_LIMIT