#!/usr/bin/env perl use strict; use warnings; use YAML::Syck qw(Load LoadFile); use Test::Differences; use Pod::Usage (); use Getopt::Long (); =head1 NAME locale-diff - Compare two YAML files and print how their datastructures differ =head1 SYNOPSIS # --keys is the default diff en.yml is.yml diff --keys en.yml is.yml # --untranslated-values compares prints keys whose values don't differ diff --untranslated-values-all en.yml is.yml # --untranslated-values-all compares prints keys whose values # don't differ. Ignoring the blacklist which prunes things # unlikley to be translated diff --untranslated-values-all en.yml is.yml =head1 DESCRIPTION This utility prints the differences between two YAML files using L. The purpose of it is to diff the files is F to find out what keys need to be added to the translated files when F changes. =head1 OPTIONS =over =item -h, --help Print this help message. =item --keys Show the hash keys that differ between the two files, useful merging new entries from F to a local file. =item --untranslated-values Show keys whose values are either exactly the same between the two files, or don't exist in the target file (the latter file specified). The values are pruned according to global and language specific blacklists found in the C<__DATA__> section of this script. This helps to find untranslated values. =item --untranslated-values-all Like C<--untranslated-values> but ignores blacklists. =back =head1 AUTHOR Evar ArnfjErE Bjarmason =cut # Get the command-line options Getopt::Long::Parser->new( config => [ qw< bundling no_ignore_case no_require_order pass_through > ], )->getoptions( 'h|help' => \my $help, 'keys' => \my $keys, 'untranslated-values' => \my $untranslated_values, 'untranslated-values-all' => \my $untranslated_values_all, ) or help(); # --keys is the default $keys = 1 if not $untranslated_values_all and not $untranslated_values; # On --help help() if $help; # If we're not given two .yml files help() if @ARGV != 2 or (!-f $ARGV[0] or !-f $ARGV[1]); my ($from, $to) = @ARGV; my $from_data = LoadFile($from); my $to_data = LoadFile($to); my $from_parsed = { iterate($from_data->{basename($from)}) }; my $to_parsed = { iterate($to_data->{basename($to)}) }; # Since this used to be the default, support that... if ($keys) { print_key_differences(); } elsif ($untranslated_values or $untranslated_values_all) { my @untranslated = untranslated_keys($from_parsed, $to_parsed); # Prune according to blacklist if ($untranslated_values) { @untranslated = prune_untranslated_with_blacklist(basename($to), @untranslated); } print $_, "\n" for @untranslated; } exit 0; sub print_key_differences { # Hack around Test::Differences wanting a Test::* module loaded $INC{"Test.pm"} = 1; sub Test::ok { print shift } # Diff the tree eq_or_diff([ sort keys %$from_parsed ], [ sort keys %$to_parsed ]); } sub untranslated_keys { my ($from_parsed, $to_parsed) = @_; grep { not exists $to_parsed->{$_} or $from_parsed->{$_} eq $to_parsed->{$_} } keys %$from_parsed; } sub prune_untranslated_with_blacklist { my ($language, @keys) = @_; my %keys; @keys{@keys} = (); my $end_yaml = Load(join '', ); my $untranslated_values = $end_yaml->{untranslated_values}; my $default = $untranslated_values->{default}; my $this_language = $untranslated_values->{$language} || {}; my %bw_list = (%$default, %$this_language); use feature ':5.10'; use Data::Dump 'dump'; say STDERR dump \%bw_list; while (my ($key, $blacklisted) = each %bw_list) { # FIXME: Does syck actually support true/false booleans in yaml? delete $keys{$key} if $blacklisted eq 'true' } sort keys %keys; } sub iterate { my ($hash, @path) = @_; my @ret; while (my ($k, $v) = each %$hash) { if (ref $v eq 'HASH') { push @ret => iterate($v, @path, $k); } else { push @ret => join(".",@path, $k), $v; } } return @ret; } sub basename { my $name = shift; $name =~ s[\..*?$][]; $name; } sub help { my %arg = @_; Pod::Usage::pod2usage( -verbose => $arg{ verbose }, -exitval => $arg{ exitval } || 0, ); } __DATA__ untranslated_values: # Default/Per language blacklist/whitelist for the # --untranslated-values switch. "true" as a value indicates that the # key is to be blacklisted, and "false" that it's to be # whitelisted. "false" is only required to whitelist a key # blacklisted by default on a per-language basis. default: html.dir: true layouts.intro_3_bytemark: true layouts.intro_3_ucl: true de: layouts.export: true