+++ /dev/null
-#!/usr/bin/env perl
-use feature ':5.10';
-use strict;
-use warnings;
-use YAML::Syck qw(Dump LoadFile);
-use Test::Differences;
-use Pod::Usage ();
-use Getopt::Long ();
-
-=head1 NAME
-
-locale-diff - Compare two YAML files and print how their datastructures differ
-
-=head1 SYNOPSIS
-
- # --keys is the default
- diff en.yml is.yml
- diff --keys en.yml is.yml
-
- # --untranslated-values compares prints keys whose values don't differ
- diff --untranslated-values en.yml is.yml
-
- # --untranslated-values-all compares prints keys whose values
- # don't differ. Ignoring the blacklist which prunes things
- # unlikley to be translated
- diff --untranslated-values-all en.yml is.yml
-
- # Check that interpolated variables ({{var}} and [[var]]) are the same
- diff --validate-variables en.yml is.yml
-
-=head1 DESCRIPTION
-
-This utility prints the differences between two YAML files using
-L<Test::Differences>. The purpose of it is to diff the files is
-F<config/locales> to find out what keys need to be added to the
-translated files when F<en.yml> changes.
-
-=head1 OPTIONS
-
-=over
-
-=item -h, --help
-
-Print this help message.
-
-=item --keys
-
-Show the hash keys that differ between the two files, useful merging
-new entries from F<en.yml> to a local file.
-
-=item --untranslated-values
-
-Show keys that B<exist in both the compared files> and whose values
-are exactly the same. Use C<--keys> to a list of values that hasn't
-been merged.
-
-The values are pruned according to global and language specific
-blacklists found in the C<__DATA__> section of this script.
-
-This helps to find untranslated values.
-
-=item --untranslated-values-all
-
-Like C<--untranslated-values> but ignores blacklists.
-
-=item --validate-variables
-
-Check that interpolated Ruby i18n variables (C<{{foo}}> and
-C<[[foo]]>) are equivalent in the two provided files.
-
-=item --dump-flat
-
-Dump a flat version of the translation hash in YAML format,
-i.e. "foo.bar" instead of "{foo}->{bar}".
-
-=back
-
-=head1 AUTHOR
-
-E<AElig>var ArnfjE<ouml>rE<eth> Bjarmason <avarab@gmail.com>
-
-=cut
-
-# Get the command-line options
-Getopt::Long::Parser->new(
- config => [ qw< bundling no_ignore_case no_require_order pass_through > ],
-)->getoptions(
- 'h|help' => \my $help,
- 'keys' => \my $keys,
- 'dump-flat' => \my $dump_flat,
- 'untranslated-values' => \my $untranslated_values,
- 'untranslated-values-all' => \my $untranslated_values_all,
- 'validate-variables' => \my $validate_variables,
- 'reconstruct' => \my $reconstruct,
-) or help();
-
-# --keys is the default
-$keys = 1 if not $untranslated_values_all and not $untranslated_values and not $validate_variables and not $dump_flat;
-
-# On --help
-help() if $help;
-
-# If we're not given two .yml files
-help() if (@ARGV != 2 or (!-f $ARGV[0] or !-f $ARGV[1])) and not $dump_flat || $reconstruct;
-
-my ($from, $to) = @ARGV;
-
-my $from_data = LoadFile($from);
-my $from_parsed = { iterate($from_data->{basename($from)}) };
-
-if ($dump_flat)
-{
- mark_utf8($from_parsed);
-
- print Dump $from_parsed;
-
- exit 0;
-}
-
-if ($reconstruct) {
- mark_utf8($from_parsed);
-
- my %out;
- while (my ($k, $v) = each %$from_parsed) {
- insert_string_deep(\%out, $k, $v);
- }
-
- print Dump { basename($from) => \%out };
-
- exit 0;
-}
-
-my $to_data = LoadFile($to);
-my $to_parsed = { iterate($to_data->{basename($to)}) };
-
-if ($keys)
-{
- print_key_differences($from_parsed, $to_parsed);
-}
-elsif ($untranslated_values or $untranslated_values_all)
-{
- my @untranslated = untranslated_keys($from_parsed, $to_parsed);
-
- # Prune according to blacklist
- if ($untranslated_values) {
- @untranslated = prune_untranslated_with_blacklist(basename($to), @untranslated);
- }
-
- say for @untranslated;
-} elsif ($validate_variables)
-{
- print_validate_variables($from_parsed, $to_parsed);
-}
-
-exit 0;
-
-sub print_key_differences
-{
- my ($f, $t) = @_;
-
- # Hack around Test::Differences wanting a Test::* module loaded
- $INC{"Test.pm"} = 1;
- sub Test::ok { print shift }
-
- # Diff the tree
- eq_or_diff([ sort keys %$f ], [ sort keys %$t ]);
-}
-
-sub untranslated_keys
-{
- my ($from_parsed, $to_parsed) = @_;
- sort grep { exists $to_parsed->{$_} and $from_parsed->{$_} eq $to_parsed->{$_} } keys %$from_parsed;
-}
-
-sub prune_untranslated_with_blacklist
-{
- my ($language, @keys) = @_;
- my %keys;
- @keys{@keys} = ();
-
- my $end_yaml = LoadFile(*DATA);
- my $untranslated_values = $end_yaml->{untranslated_values};
- my $default = $untranslated_values->{default};
- my $this_language = $untranslated_values->{$language} || {};
-
- my %bw_list = (%$default, %$this_language);
-
- while (my ($key, $blacklisted) = each %bw_list)
- {
- # FIXME: Does syck actually support true/false booleans in yaml?
- delete $keys{$key} if $blacklisted eq 'true'
- }
-
- sort keys %keys;
-}
-
-sub print_validate_variables
-{
- my ($f, $t) = @_;
-
- while (my ($key, $val) = each %$f)
- {
- next if exists $f->{$key} and not exists $t->{$key};
-
- my @from_var = parse_variables_from_string($f->{$key});
- my @to_var = parse_variables_from_string($t->{$key});
-
- unless (@from_var ~~ @to_var) {
- say "$key in $from has (@from_var) and $to has (@to_var)";
- }
-
- }
-}
-
-sub parse_variables_from_string
-{
- my ($string) = @_;
-
- # This probably matches most of the variables
- my $var = qr/ [a-z0-9_]+? /xs;
-
- if (my @var = $string =~ m/ \{\{ ($var) \}\} | \[\[ ($var) \]\] /gsx) {
- return sort grep { defined } @var;
- } else {
- return;
- }
-}
-
-sub iterate
-{
- my ($hash, @path) = @_;
- my @ret;
-
- while (my ($k, $v) = each %$hash)
- {
- if (ref $v eq 'HASH')
- {
- push @ret => iterate($v, @path, $k);
- }
- else
- {
- push @ret => join(".",@path, $k), $v;
- }
- }
-
- return @ret;
-}
-
-# $s = 'foo.bar.baz.spam.eggs.ham'; $h = \%h; $h = $h->{$_} = {} for split /\./, $s; \%h
-# ==> {foo => {bar => {baz => {spam => {eggs => {ham => {}}}}}}}
-sub insert_string_deep {
- my ($h, $ks, $v) = @_;
- my $p = \$h; $p = \$$p->{$_} for split /\./, $ks;
- $$p = $v;
-}
-
-# sub insert_string_deep
-# {
-# my ($hash, $key, $value) = @_;
-#
-# my @key = split /\./, $key;
-# my $h = $hash;
-#
-# my $i = 0;
-# for my $k (@key) {
-# $i ++;
-# if ($i == @key) {
-# $h->{$k} = $value;
-# } else {
-# if (ref $h->{$k}) {
-# $h = $h->{$k};
-# } else {
-# $h = $h->{$k} = {};
-# }
-# }
-# }
-# }
-
-sub basename
-{
- my $name = shift;
- $name =~ s[\..*?$][];
- $name =~ s[.*/][];
- $name;
-}
-
-sub mark_utf8
-{
- my ($hash) = @_;
-
- # Mark as UTF-8
- map { if (ref $_ eq 'ARRAY') { map { utf8::decode($_) } @$_ } else { utf8::decode($_) } } values %$hash;
-}
-
-sub help
-{
- my %arg = @_;
-
- Pod::Usage::pod2usage(
- -verbose => $arg{ verbose },
- -exitval => $arg{ exitval } || 0,
- );
-}
-
-__DATA__
-untranslated_values:
-
- # Default/Per language blacklist/whitelist for the
- # --untranslated-values switch. "true" as a value indicates that the
- # key is to be blacklisted, and "false" that it's to be
- # whitelisted. "false" is only required to whitelist a key
- # blacklisted by default on a per-language basis.
-
- default:
- html.dir: true
- layouts.intro_3_bytemark: true
- layouts.intro_3_ucl: true
- layouts.project_name.h1: true
- layouts.project_name.title: true
- site.index.license.project_url: true
- browse.relation_member.entry: true
-
- # #{{id}}
- changeset.changeset.id: true
-
- de:
- activerecord.attributes.message.sender: true
- activerecord.attributes.trace.name: true
- activerecord.models.changeset: true
- activerecord.models.relation: true
- browse.changeset.changeset: true
- browse.changeset.changesetxml: true
- browse.changeset.osmchangexml: true
- browse.changeset.title: true
- browse.common_details.version: true
- browse.containing_relation.relation: true
- browse.relation.relation: true
- browse.relation.relation_title: true
- browse.start_rjs.details: true
- browse.start_rjs.object_list.details: true
- browse.tag_details.tags: true
- changeset.changesets.id: true
- export.start.export_button: true
- export.start.format: true
- export.start.output: true
- export.start.zoom: true
- export.start_rjs.export: true
- layouts.export: true
- layouts.shop: true
- site.edit.anon_edits: true
- site.index.license.license_name: true
- site.index.permalink: true
- site.key.table.entry.park: true
- site.search.submit_text: true
- trace.edit.tags: true
- trace.trace.in: true
- trace.trace_form.tags: true
- trace.trace_optionals.tags: true
- trace.view.tags: true
- user.account.public editing.enabled link: true
-
- is:
- # ({{link}})
- site.edit.anon_edits: true
-
- # Creative Commons Attribution-Share Alike 2.0
- site.index.license.license_name: true
-
- # http://creativecommons.org/licenses/by-sa/2.0/
- site.index.license.license_url: true
-
- # {{id}}
- printable_name.with_id: true
-
- # {{name}} ({{id}})
- printable_name.with_name: true
-
- # {{type}}
- geocoder.search_osm_namefinder.prefix: true
-
- # {{suffix}}, {{parentname}}
- geocoder.search_osm_namefinder.suffix_suburb: true