#!/usr/bin/env perl use feature ':5.10'; use strict; use warnings; use YAML::Syck qw(Dump LoadFile); use Test::Differences; use Pod::Usage (); use Getopt::Long (); =head1 NAME locale-diff - Compare two YAML files and print how their datastructures differ =head1 SYNOPSIS # --keys is the default diff en.yml is.yml diff --keys en.yml is.yml # --untranslated-values compares prints keys whose values don't differ diff --untranslated-values en.yml is.yml # --untranslated-values-all compares prints keys whose values # don't differ. Ignoring the blacklist which prunes things # unlikley to be translated diff --untranslated-values-all en.yml is.yml # Check that interpolated variables ({{var}} and [[var]]) are the same diff --validate-variables en.yml is.yml =head1 DESCRIPTION This utility prints the differences between two YAML files using L. The purpose of it is to diff the files is F to find out what keys need to be added to the translated files when F changes. =head1 OPTIONS =over =item -h, --help Print this help message. =item --keys Show the hash keys that differ between the two files, useful merging new entries from F to a local file. =item --untranslated-values Show keys that B and whose values are exactly the same. Use C<--keys> to a list of values that hasn't been merged. The values are pruned according to global and language specific blacklists found in the C<__DATA__> section of this script. This helps to find untranslated values. =item --untranslated-values-all Like C<--untranslated-values> but ignores blacklists. =item --validate-variables Check that interpolated Ruby i18n variables (C<{{foo}}> and C<[[foo]]>) are equivalent in the two provided files. =item --dump-flat Dump a flat version of the translation hash in YAML format, i.e. "foo.bar" instead of "{foo}->{bar}". =back =head1 AUTHOR Evar ArnfjErE Bjarmason =cut # Get the command-line options Getopt::Long::Parser->new( config => [ qw< bundling no_ignore_case no_require_order pass_through > ], )->getoptions( 'h|help' => \my $help, 'keys' => \my $keys, 'dump-flat' => \my $dump_flat, 'untranslated-values' => \my $untranslated_values, 'untranslated-values-all' => \my $untranslated_values_all, 'validate-variables' => \my $validate_variables, 'reconstruct' => \my $reconstruct, ) or help(); # --keys is the default $keys = 1 if not $untranslated_values_all and not $untranslated_values and not $validate_variables and not $dump_flat; # On --help help() if $help; # If we're not given two .yml files help() if (@ARGV != 2 or (!-f $ARGV[0] or !-f $ARGV[1])) and not $dump_flat || $reconstruct; my ($from, $to) = @ARGV; my $from_data = LoadFile($from); my $from_parsed = { iterate($from_data->{basename($from)}) }; if ($dump_flat) { mark_utf8($from_parsed); print Dump $from_parsed; exit 0; } if ($reconstruct) { mark_utf8($from_parsed); my %out; while (my ($k, $v) = each %$from_parsed) { insert_string_deep(\%out, $k, $v); } print Dump { basename($from) => \%out }; exit 0; } my $to_data = LoadFile($to); my $to_parsed = { iterate($to_data->{basename($to)}) }; if ($keys) { print_key_differences($from_parsed, $to_parsed); } elsif ($untranslated_values or $untranslated_values_all) { my @untranslated = untranslated_keys($from_parsed, $to_parsed); # Prune according to blacklist if ($untranslated_values) { @untranslated = prune_untranslated_with_blacklist(basename($to), @untranslated); } say for @untranslated; } elsif ($validate_variables) { print_validate_variables($from_parsed, $to_parsed); } exit 0; sub print_key_differences { my ($f, $t) = @_; # Hack around Test::Differences wanting a Test::* module loaded $INC{"Test.pm"} = 1; sub Test::ok { print shift } # Diff the tree eq_or_diff([ sort keys %$f ], [ sort keys %$t ]); } sub untranslated_keys { my ($from_parsed, $to_parsed) = @_; sort grep { exists $to_parsed->{$_} and $from_parsed->{$_} eq $to_parsed->{$_} } keys %$from_parsed; } sub prune_untranslated_with_blacklist { my ($language, @keys) = @_; my %keys; @keys{@keys} = (); my $end_yaml = LoadFile(*DATA); my $untranslated_values = $end_yaml->{untranslated_values}; my $default = $untranslated_values->{default}; my $this_language = $untranslated_values->{$language} || {}; my %bw_list = (%$default, %$this_language); while (my ($key, $blacklisted) = each %bw_list) { # FIXME: Does syck actually support true/false booleans in yaml? delete $keys{$key} if $blacklisted eq 'true' } sort keys %keys; } sub print_validate_variables { my ($f, $t) = @_; while (my ($key, $val) = each %$f) { next if exists $f->{$key} and not exists $t->{$key}; my @from_var = parse_variables_from_string($f->{$key}); my @to_var = parse_variables_from_string($t->{$key}); unless (@from_var ~~ @to_var) { say "$key in $from has (@from_var) and $to has (@to_var)"; } } } sub parse_variables_from_string { my ($string) = @_; # This probably matches most of the variables my $var = qr/ [a-z0-9_]+? /xs; if (my @var = $string =~ m/ \{\{ ($var) \}\} | \[\[ ($var) \]\] /gsx) { return sort grep { defined } @var; } else { return; } } sub iterate { my ($hash, @path) = @_; my @ret; while (my ($k, $v) = each %$hash) { if (ref $v eq 'HASH') { push @ret => iterate($v, @path, $k); } else { push @ret => join(".",@path, $k), $v; } } return @ret; } # $s = 'foo.bar.baz.spam.eggs.ham'; $h = \%h; $h = $h->{$_} = {} for split /\./, $s; \%h # ==> {foo => {bar => {baz => {spam => {eggs => {ham => {}}}}}}} sub insert_string_deep { my ($h, $ks, $v) = @_; my $p = \$h; $p = \$$p->{$_} for split /\./, $ks; $$p = $v; } # sub insert_string_deep # { # my ($hash, $key, $value) = @_; # # my @key = split /\./, $key; # my $h = $hash; # # my $i = 0; # for my $k (@key) { # $i ++; # if ($i == @key) { # $h->{$k} = $value; # } else { # if (ref $h->{$k}) { # $h = $h->{$k}; # } else { # $h = $h->{$k} = {}; # } # } # } # } sub basename { my $name = shift; $name =~ s[\..*?$][]; $name =~ s[.*/][]; $name; } sub mark_utf8 { my ($hash) = @_; # Mark as UTF-8 map { if (ref $_ eq 'ARRAY') { map { utf8::decode($_) } @$_ } else { utf8::decode($_) } } values %$hash; } sub help { my %arg = @_; Pod::Usage::pod2usage( -verbose => $arg{ verbose }, -exitval => $arg{ exitval } || 0, ); } __DATA__ untranslated_values: # Default/Per language blacklist/whitelist for the # --untranslated-values switch. "true" as a value indicates that the # key is to be blacklisted, and "false" that it's to be # whitelisted. "false" is only required to whitelist a key # blacklisted by default on a per-language basis. default: html.dir: true layouts.intro_3_bytemark: true layouts.intro_3_ucl: true layouts.project_name.h1: true layouts.project_name.title: true site.index.license.project_url: true browse.relation_member.entry: true # #{{id}} changeset.changeset.id: true de: activerecord.attributes.message.sender: true activerecord.attributes.trace.name: true activerecord.models.changeset: true activerecord.models.relation: true browse.changeset.changeset: true browse.changeset.changesetxml: true browse.changeset.osmchangexml: true browse.changeset.title: true browse.common_details.version: true browse.containing_relation.relation: true browse.relation.relation: true browse.relation.relation_title: true browse.start_rjs.details: true browse.start_rjs.object_list.details: true browse.tag_details.tags: true changeset.changesets.id: true export.start.export_button: true export.start.format: true export.start.output: true export.start.zoom: true export.start_rjs.export: true layouts.export: true layouts.shop: true site.edit.anon_edits: true site.index.license.license_name: true site.index.permalink: true site.key.table.entry.park: true site.search.submit_text: true trace.edit.tags: true trace.trace.in: true trace.trace_form.tags: true trace.trace_optionals.tags: true trace.view.tags: true user.account.public editing.enabled link: true is: # ({{link}}) site.edit.anon_edits: true # Creative Commons Attribution-Share Alike 2.0 site.index.license.license_name: true # http://creativecommons.org/licenses/by-sa/2.0/ site.index.license.license_url: true # {{id}} printable_name.with_id: true # {{name}} ({{id}}) printable_name.with_name: true # {{type}} geocoder.search_osm_namefinder.prefix: true # {{suffix}}, {{parentname}} geocoder.search_osm_namefinder.suffix_suburb: true