1 #!<%= node[:ruby][:interpreter] %>
 
   6 # always keep the last 4 weeks
 
   7 ALWAYS_KEEP_DAYS = 4 * 7
 
   9 # otherwise, bucket by month and keep the earliest in the bucket
 
  11   Date.new(date.year, date.month, 1)
 
  14 Candidate = Struct.new(:filename, :date)
 
  16 def list_files(glob, date_pattern)
 
  17   # find all candidates for deletion
 
  18   real_files = Dir.glob(glob).select do |file|
 
  19     File.file?(file) && !File.symlink?(file)
 
  22   real_files.map do |file|
 
  24     m = date_pattern.match(file)
 
  25     raise "Unable to extract date string from #{file.inspect}" if m.nil?
 
  26     d = Date.strptime(m[1], "%y%m%d")
 
  27     Candidate.new(file, d)
 
  31 def deletion_candidates(today, candidates)
 
  32   candidate_buckets = Hash.new
 
  34   candidates.each do |c|
 
  35     next if today - c.date < ALWAYS_KEEP_DAYS
 
  38     candidate_buckets[b] = Array.new unless candidate_buckets.has_key?(b)
 
  39     candidate_buckets[b] << c
 
  42   # delete all but the earliest in each bucket
 
  43   candidate_buckets.collect_concat do |bucket, contents|
 
  44     contents.sort_by {|c| c.date}[1..-1]
 
  48 def deletions(glob, date_pattern, today, expansions)
 
  49   candidates = list_files(glob, date_pattern)
 
  50   to_delete = deletion_candidates(today, candidates)
 
  52   expanded = to_delete.collect_concat do |candidate|
 
  53     dir = File.dirname(candidate.filename)
 
  55       exp = candidate.date.strftime(e)
 
  60   expanded.select {|e| File.exist?(e)}
 
  66 OptionParser.new do |opt|
 
  67   opt.on('--dry-run') { dry_run = true }
 
  68   opt.on('--debug') { debug = true }
 
  71 xml_directory = "<%= node[:planet][:dump][:xml_directory] %>"
 
  72 xml_history_directory = "<%= node[:planet][:dump][:xml_history_directory] %>"
 
  73 pbf_directory = "<%= node[:planet][:dump][:pbf_directory] %>"
 
  74 pbf_history_directory = "<%= node[:planet][:dump][:pbf_history_directory] %>"
 
  79 to_delete += deletions(
 
  80   "#{xml_directory}/20??/planet-??????.osm.bz2",
 
  81   /planet-([0-9]{6}).osm.bz2/,
 
  83   ["changesets-%y%m%d.osm.bz2",
 
  84    "changesets-%y%m%d.osm.bz2.md5",
 
  85    "discussions-%y%m%d.osm.bz2",
 
  86    "discussions-%y%m%d.osm.bz2.md5",
 
  87    "planet-%y%m%d.osm.bz2",
 
  88    "planet-%y%m%d.osm.bz2.md5"])
 
  90 to_delete += deletions(
 
  91   "#{xml_history_directory}/20??/history-??????.osm.bz2",
 
  92   /history-([0-9]{6}).osm.bz2/,
 
  94   ["history-%y%m%d.osm.bz2",
 
  95    "history-%y%m%d.osm.bz2.md5"])
 
  97 to_delete += deletions(
 
  98   "#{pbf_directory}/planet-??????.osm.pbf",
 
  99   /planet-([0-9]{6}).osm.pbf/,
 
 101   ["planet-%y%m%d.osm.pbf",
 
 102    "planet-%y%m%d.osm.pbf.md5"])
 
 104 to_delete += deletions(
 
 105   "#{pbf_history_directory}/history-??????.osm.pbf",
 
 106   /history-([0-9]{6}).osm.pbf/,
 
 108   ["history-%y%m%d.osm.pbf",
 
 109    "history-%y%m%d.osm.pbf.md5"])
 
 113 cmd = dry_run ? "Would delete" : "Deleted"
 
 114 to_delete.each do |file|
 
 116   File.delete(file) unless dry_run
 
 117   puts "#{cmd} #{file.inspect}, #{s.size / 1000000} MB" if debug
 
 122   puts "#{cmd} files of total size #{total_size / 1000000000.0} GB"
 
 123   puts "#{cmd} #{num_deleted} files"