X-Git-Url: https://git.openstreetmap.org/rails.git/blobdiff_plain/16030e03817e2df25e087c966da33edc78d7d23e..8eead8a8707b478b342a6e4c13c4e8b4ef143b98:/script/misc/update-key-and-tag-description-pages-from-wiki diff --git a/script/misc/update-key-and-tag-description-pages-from-wiki b/script/misc/update-key-and-tag-description-pages-from-wiki new file mode 100644 index 000000000..c6550f665 --- /dev/null +++ b/script/misc/update-key-and-tag-description-pages-from-wiki @@ -0,0 +1,149 @@ +#!/usr/bin/env perl +use strict; +use warnings; + +use Pod::Usage (); +use Getopt::Long (); + +BEGIN { + eval "require MediaWiki::API; require YAML::XS;" or do { + print "You have to install some modules via CPAN to run this:\n"; + print " sudo cpanp MediaWiki::API YAML::XS\n"; + exit 1; + }; +} + +use MediaWiki::API; +use YAML::XS qw(Dump); +use Test::More 'no_plan'; + +=head1 NAME + +update-key-and-tag-description-pages-from-wiki - Screen-scrape the wiki for key/value wiki description pages + +=head1 SYNOPSIS + + perl script/misc/update-key-and-tag-description-pages-from-wiki config/wiki-tag-and-key-description.yml + +=head1 BUGS + +This will break if there are more than 500 key or value pages. Paging +needs to be implemenented. + +That or using a proper API or something (if it's there) or making a +direct query to the wiki database. + +=cut + +# Get the command-line options +Getopt::Long::Parser->new( + config => [ qw< bundling no_ignore_case no_require_order pass_through > ], +)->getoptions( + 'h|help' => \my $help, +) or help(); + +# On --help +help() if $help; + +help() unless $ARGV[0]; + +# Get a API interface +my $mw = MediaWiki::API->new(); +ok($mw, "Got a MediaWiki API"); +$mw->{config}->{api_url} = 'http://wiki.openstreetmap.org/w/api.php'; + +# All our goodies +my (%feature, %count); + +# This is what you get on: +## http://wiki.openstreetmap.org/w/index.php?search=Template:KeyDescription&fulltext=Search&fulltext=Search +for my $lang ('', map { "${_}:" } qw[ Pt Fi De It HU Cz Fr RU Pl ]) { + ok(1, " Templates for language '$lang'"); + + # Key/value pages + for my $thing (qw(key value)) { + my $Thing = ucfirst $thing; + ok(1, " Getting $thing pages"); + my $cnt = stick_content_in_hash($thing, "Template:${lang}${Thing}Description", \%feature); + ok(1, " Got $cnt $thing pages"); + $count{$thing} += $cnt; + } +} + +ok(1, "Got a total of $count{$_} ${_}s") for qw[ key value ]; + +# Dump to .yml file +open my $out, ">", $ARGV[0] or die "Can't open file '$ARGV[0]' supplied on the command line"; +say $out "# THIS FILE IS AUTOGENERATED WITH THE script/misc/update-key-and-tag-description-pages-from-wiki"; +say $out "# PROGRAM DO NOT MANUALLY EDIT IT"; +say $out ""; +say $out Dump(\%feature); +close $out; + +exit 0; + +sub stick_content_in_hash +{ + my ($key, $title, $hash) = @_; + my $ukey = ucfirst $key; + + my $space_to_underscore = sub { + my $txt = shift; + $txt =~ s/ /_/g; + $txt; + }; + + my $count = 0; + get_embeddedin( + $title, + sub { + my ($links) = @_; + my (@links) = @$links; + ok(1, " ... got " . scalar(@links) . " more links"); + for my $link (@links) { + $count++; + my $title = $link->{title}; + + if ($title =~ /^$ukey:(?.*?)$/) { + # English by default + $hash->{en}->{$key}->{ $space_to_underscore->($+{key_name}) } = $title; + } elsif ($title =~ /^(?[^:]+):$ukey:(?.*?)$/) { + $hash->{lc $+{lang}}->{$key}->{ $space_to_underscore->($+{key_name}) } = $title; + } + } + } + ); + + return $count; +} + +sub get_embeddedin +{ + my ($title, $callback) = @_; + my $articles = $mw->list( + { + action => 'query', + list => 'embeddedin', + eititle => $title, + eifilterredir => 'nonredirects', + # Doesn't work for De:* and anything non-en. Odd. + # einamespace => '0|8', + eilimit => '200', + }, + { + max => '0', + hook => $callback, + skip_encoding => 1, + } + ) || die $mw->{error}->{code} . ': ' . $mw->{error}->{details}; +} + +sub help +{ + my %arg = @_; + + Pod::Usage::pod2usage( + -verbose => $arg{ verbose }, + -exitval => $arg{ exitval } || 0, + ); +}