summaryrefslogtreecommitdiff
path: root/i18n-diff-auditor
diff options
context:
space:
mode:
Diffstat (limited to 'i18n-diff-auditor')
-rwxr-xr-xi18n-diff-auditor532
1 files changed, 532 insertions, 0 deletions
diff --git a/i18n-diff-auditor b/i18n-diff-auditor
new file mode 100755
index 0000000..6868ca7
--- /dev/null
+++ b/i18n-diff-auditor
@@ -0,0 +1,532 @@
+#!/usr/bin/perl -w
+#
+# i18n-diff-auditor
+# Copyright (C)2018 Ian Jackson
+# GPLv3+, NO WARRANTY, see below.
+#
+#
+# Usage:
+# something like this
+# git-log -n1 -p | ./i18n-diff-auditor -D 2>&1 |less -j10 +/'^!.*'
+#
+# -D is for debug. Currently only one level.
+#
+# Output is the relevant diff hunks, with each line prepended with
+# space for ok lines and ! for questionable ones, and with relevant
+# diff lines prepended with lines starting !! (and lines starting #
+# for debug output), so ovrall:
+#
+# !! <message> reasoning for subsequent questionable diff line(s)
+# !+ diff line found to be questionable
+# !- diff line found to be questionable
+# @@@ etc. diff furniture
+# + diff line checked and ok
+# - diff line checked and ok
+# # <stuff> debug output (normally precedes relevant output)
+#
+# Changes are generally marked as ok if they correspond to a known
+# intended code change pattern. (That includes changing error calls
+# to different error calls.) If they don't correspond to any known
+# pattern, they are "questionable" and the first thing that doesn't
+# match the most common pattern is reported.
+#
+# Might be useful for projects other than dgit, provided it uses
+# the same gettext aliases (__ f_ i_) and similar error calls
+# (die, confess, fail).
+#
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program. If not, see <http://www.gnu.org/licenses/>.
+
+use strict;
+use Carp;
+use Data::Dumper;
+use Getopt::Long;
+
+our $debug = 0;
+GetOptions("debug|D+" => \$debug
+ );
+
+our @debug;
+sub debug ($$) {
+ my ($i,$s) = @_;
+ push @{ $debug[$i] }, $s if $debug;
+}
+
+my @d = <>;
+unshift @d, "# dummy line to make line 1 index 1 in \@d\n";
+
+our $i_last_l_ok = -1;
+our $count_i_last_l_ok;
+
+sub l_ok ($) {
+ my ($i) = @_;
+
+ if ($i == $i_last_l_ok) {
+ confess $i if $count_i_last_l_ok++ > 50;
+ } else {
+ $count_i_last_l_ok = 0;
+ $i_last_l_ok = $i;
+ }
+
+ return unless $i < @d;
+ $_ = $d[$i];
+ #print STDERR "L $i\n";
+ 1;
+}
+
+sub l ($) {
+ my ($i) = @_;
+ confess $i unless l_ok $i;
+};
+
+our $perlop_text = <<'END'; # c&p from man perlop
+ left terms and list operators (leftward)
+ left ->
+ nonassoc ++ --
+ right **
+ right ! ~ \ and unary + and -
+ left =~ !~
+ left * / % x
+ left + - .
+ left << >>
+ nonassoc named unary operators
+ nonassoc < > <= >= lt gt le ge
+ nonassoc == != <=> eq ne cmp ~~
+ left &
+ left | ^
+ left &&
+ left || //
+ nonassoc .. ...
+ right ?:
+ right = += -= *= etc. goto last next redo dump
+ left , =>
+ nonassoc list operators (rightward)
+ right not
+ left and
+ left or xor
+
+ **= += *= &= &.= <<= &&=
+ -= /= |= |.= >>= ||=
+ .= %= ^= ^.= //=
+ x=
+END
+
+our $perlop_re;
+
+sub prep_perlop () {
+ my @ops;
+ foreach (split /\n/, $perlop_text) {
+ next unless m{\S};
+ s{\s+$}{};
+ s{^\s+}{};
+ s{^(?: left | right | nonassoc ) \s+}{}x;
+ next if m{^terms and list operators};
+ next if m{^named unary};
+ next if m{^list operators};
+ s{ and unary.*}{};
+ s{ etc\. }{ };
+ s{\?\:}{ ? : };
+ foreach my $op (split /\s+/) {
+ next unless length $op;
+ next if $op =~ m{^\w+$};
+ $op =~ s/\W/\\$&/g;
+ push @ops, $op;
+ }
+ }
+ $perlop_re = '(?: '.(join ' | ', @ops).' )';
+ $perlop_re = qr{$perlop_re}x;
+ #print STDERR "$perlop_re\n";
+}
+
+prep_perlop();
+
+our ($ifilehead, $ifirsthunkhead);
+our ($ihunkhead, $ihunkend);
+our ($ichunkstart, $ichunkend);
+our ($before, $after);
+
+sub is_string ($) { $_[0]{T} =~ m/heredoc|string/; }
+sub is_trans ($) { grep { $_[0]{E} eq $_ } qw(__ f_ i_); }
+
+sub qp ($) {
+ my ($p) = @_;
+ $p =~ s{\\}{\\\\}g;
+ $p =~ s{\'}{\\'}g;
+ $p =~ s{\n}{\\n}g;
+ $p =~ s{\t}{\\t}g;
+ return "'$p'";
+};
+
+sub semiparse ($) {
+ ($_) = @_;
+ my @o;
+ #my $in = $_;
+ # entries contain
+ # T type
+ # E exact input text (does not contain here doc contents)
+ # P something to print in messages
+ # V value, only for: heredoc string
+ # Q quote characcter, only for: heredoc string
+ for (;;) {
+ s{^\s+}{};
+ if (s{^[\$\@\%]?[_0-9a-zA-Z]+}{}) {
+ push @o, { T => 'ident', E => $&, P => $& };
+ } elsif (s{^\<\<(['"]?)([A-Z_]+)\1}{}) {
+ my ($q,$d) = ($1,$2);
+ $q ||= '"';
+ push @o, { T => 'heredoc', Q => $q, Delim => $d,
+ E => $&, P => "<<$q$d$q" };
+ if (s{^
+ ( .* \n )
+ ( (?: (?! $d \n ) .* \n )*? )
+ $d \n
+ }{ $1 }xe) {
+ $o[$#o]{V} = $2;
+ } else {
+ m{^.*\n} or confess;
+ $_ = $&;
+ $o[$#o]{V} = $';
+ $o[$#o]{Invented} = 1;
+ }
+ } elsif (s{^ (["'])( (?: [^\\'"]
+ | \\ [^"']
+ | (?! \1 ) [^"]
+ )*
+ ) \1 }{}x) {
+ my ($q,$v) = ($1,$2);
+ push @o, { T => 'string', E => $&, P => "$q$q",
+ Q => $q, V => $v};
+ } elsif (s{^$perlop_re|^\;}{}) {
+ push @o, { T => 'op', E => $&, P => $& };
+ } elsif (s/^[[{(]//) {
+ push @o, { T => 'bra', E => $&, P => $& };
+ } elsif (s/^[]})]//) {
+ push @o, { T => 'ket', E => $&, P => $& };
+ } elsif (s/^( [\$\@\%] )( \{ )//x) {
+ push @o, { T => 'deref', E => $1, P => $1 },
+ { T => 'bra', E => $2, P => $2 };
+ } elsif (s/^ [\$\@\%] [^[^{] //x) {
+ push @o, { T => 'specvar', E => $&, P => $& };
+ } elsif (!length) {
+ last;
+ } elsif (s{^\#.*\n}{}) {
+ } else {
+ m{^.{0,10}};
+ die "cannot tokenise \`$&'";
+ }
+ }
+ for (my $i=0; $i+2 < @o; $i++) {
+ next unless $o[$i+1]{E} eq '.';
+ my @inputs = @o[$i, $i+2];
+ #print STDERR Dumper(\@inputs);
+ next if grep { !is_string($_) } @inputs;
+ my $q = $inputs[0]{Q};
+ next if grep { $_->{Q} ne $q } @inputs;
+ next if grep { $_->{Invented} } @inputs;
+ my $new = { T => 'joinedstrings',
+ E => (join '.', map { $_->{E} } @inputs),
+ P => (join '.', map { $_->{P} } @inputs),
+ V => (join '', map { $_->{V} } @inputs),
+ Q => $q,
+ };
+ @o = (@o[0..$i-1], $new, @o[$i+3..$#o]);
+ $i--; # counteracts $i++
+ }
+ debug $ichunkstart, "semiparsed: ".join ' ', map { $_->{P} } @o;
+ # debug $ichunkstart, "semiparsed V: ".join ' ', map { defined $_->{V} ? ">$_->{V}<" : '-' } @o;
+ return @o;
+}
+
+our @analysed_x;
+our @analysed_y;
+
+sub analyse_chunk_core () {
+ $before //= '';
+ die "plain deletion\n" unless defined $after;
+ my @xs = semiparse $before;
+ my @ys = semiparse $after;
+ @analysed_x = @analysed_y = ();
+ my $next_something = sub {
+ my ($ary,$anal,$var,$what) = @_;
+ die "ran out of $what\n" unless @$ary;
+ my $r = shift @$ary;
+ push @$anal, $r->{P};
+ $$var = $r;
+ };
+ my ($x,$y);
+ my $next_x = sub { $next_something->(\@xs, \@analysed_x, \$x, 'before'); };
+ my $next_y = sub { $next_something->(\@ys, \@analysed_y, \$y, 'after' ); };
+ our @y_expect_suffix = ();
+ ANALYSE:
+ for (;;) {
+ while (my $e = shift @y_expect_suffix) {
+ $next_y->();
+ $y->{E} eq $e
+ or die "suffix mismatch, expected $e got $y->{E}\n";
+ }
+ last unless @xs or @ys;
+ $next_x->();
+ $next_y->();
+ next if $x->{E} eq $y->{E};
+ next if $x->{E} eq 'sprintf' and $y->{E} eq 'f_';
+ next if $x->{E} eq 'die' and $y->{E} eq 'confess';
+ next if $x->{E} eq 'die' and $y->{E} eq 'fail';
+ foreach my $with_fh (qw(0 1)) {
+ next unless $x->{E} eq 'printf';
+ next unless $y->{E} eq 'print';
+ next unless @xs >= $with_fh;
+ next unless @ys > $with_fh;
+ if ($with_fh) {
+ next unless $xs[0]{E} eq $ys[0]{E};
+ next unless
+ $xs[0]{E} =~ m{^[A-Z]+$} or
+ $xs[0]{T} eq 'ident' && $xs[0]{E} =~ m{^\$};
+ }
+ next unless $ys[$with_fh]{E} eq 'f_';
+ # yay!
+ $next_x->() if $with_fh;
+ $next_y->() if $with_fh;
+ $next_y->(); # f_
+ next ANALYSE;
+ }
+ if ($y->{E} eq '+'
+ and @ys >= 3
+ and $ys[0]{E} eq '('
+ and is_trans($ys[1])) {
+ $next_y->(); # (
+ $next_y->(); # __ f_ i_
+ @y_expect_suffix = ')';
+ } elsif ($y->{E} eq '('
+ and @ys > 2
+ and is_trans($ys[0])
+ and @analysed_y
+ and (grep { $_ eq $analysed_y[-1] } (qw( => [ { ? : . ),
+ '(', ',') )) {
+ $next_y->(); # __ f_ i_
+ @y_expect_suffix = ')';
+ }
+ my $string_changed;
+ my $ye = $y->{E};
+ if (is_trans($y)) {
+ $next_y->();
+ die "__ on non-string $y->{P}\n" unless is_string($y);
+ die "__ on was non-string $x->{P}\n" unless is_string($x);
+ if ($y->{Q} ne "'") {
+ die "var subst in new string\n"
+ if $y->{V} =~ m{(?<!\\) [\$\@]};
+ }
+ eval {
+ die "__ string changed\n" unless $y->{V} eq $x->{V};
+ die "__ string quote changed\n" unless $y->{Q} eq $x->{Q};
+ };
+ $string_changed = $@;
+ }
+ if ($ye eq '__') {
+ $_ = $y->{V};
+ die "percent $& in __ ' string\n" if m{\%};
+ die $string_changed if length $string_changed;
+ next;
+ }
+ if ($ye eq 'i_') {
+ die $string_changed if length $string_changed;
+ next;
+ }
+ if ($ye eq 'f_') {
+ my $fmt = $y->{V};
+ die "no percent in f_ string\n" unless $fmt =~ m{\%};
+ next unless $string_changed;
+ die "f_ old string '-quoted\n" if $x->{Q} ne '"';
+ my $xs = $x->{V};
+ my $exactly = sub {
+ my ($lit, $what) = @_;
+ my $xl = substr($xs, 0, length($lit));
+ if ($xl ne $lit) {
+ debug $ichunkstart, "not exactly x: ..".qp($xs);
+ debug $ichunkstart, "not exactly y: ".qp($lit);
+ my $next = @ys ? $ys[0]{P} : '(end)';
+ die "string contents mismatch near $what before $next\n";
+ }
+ $xs = substr($xs, length($lit));
+ };
+ for (;;) {
+ #print STDERR Dumper($fmt, $xs, \@xs, @ys);
+ if ($fmt !~ m{\%[^\%]}) {
+ $exactly->($fmt, '(tail)');
+ $fmt = '';
+ die "text deleted from end of string: ".qp($xs)."\n"
+ if length $xs;
+ last;
+ }
+ $exactly->($`, '(literal)');
+ $fmt = $';
+ if ($& eq '%%') { $exactly->('%', '%%'); next; }
+ elsif ($& ne '%s') { die "unhandled %-subst $&\n"; }
+ $next_y->();
+ die "expected comma, got $y->{P}\n" unless $y->{E} eq ',';
+ if (!length $fmt and
+ !length $xs and
+ @xs and
+ $xs[0]{E} eq '.') {
+ # X has "<earlier>" . <something>
+ # Y has "<earlier>%s" [other args] , <something>
+ $next_x->(); # eat the '.'
+ next;
+ }
+ if ($xs =~ m{^\@}) {
+ $next_y->();
+ die "\@... => not string" unless is_string($y);
+ die "\@... => $y->{P}" if $y->{Q} ne '"';
+ $exactly->($y->{V}, $y->{P});
+ next;
+ }
+ my $bras = 0;
+ for (;;) {
+ if (!$bras and !@ys) {
+ last;
+ }
+ $next_y->();
+ if (!$bras and
+ (grep { $y->{E} eq $_ } qw( or xor and not ; :
+ if unless while when )
+ or $y->{E} eq ','
+ or $y->{T} eq 'ket'
+ )) {
+ # lookahead shows close of containing scope
+ # or lower precedence operator
+ unshift @ys, $y;
+ pop @analysed_y;
+ last;
+ }
+ $xs =~ s{^\s+}{} if $bras;
+ if (is_string($y) and $y->{Q} eq '"') {
+ $exactly->($y->{V}, $y->{P});
+ next;
+ }
+ $exactly->($y->{E}, $y->{P});
+ if ($y->{T} eq 'bra' or $y->{E} eq '?') {
+ $bras++;
+ } elsif ($y->{T} eq 'ket' or $y->{E} eq ':') {
+ die "too many kets at $y->{E}\n" unless $bras;
+ $bras--;
+ }
+ }
+ }
+ next;
+ }
+ die "mismatch $x->{P} => $y->{P}\n";
+ }
+}
+
+sub analyse_chunk () {
+ for (;;) {
+ eval { analyse_chunk_core(); };
+ return unless length $@;
+ if ($@ =~ m{^missing end of here doc (\S+)\n}) {
+ # fudge this
+ # (this never happens now, but in the future we might
+ # want this code again eg to try adding to the chunk)
+ $before .= "\n$1\n";
+ $after .= "\n$1\n";
+ next;
+ } else {
+ die $@;
+ }
+ }
+}
+
+our @report;
+our $last_filehead = -1;
+
+sub report_on_hunk () {
+ return unless @report;
+ if ($last_filehead != $ifilehead) {
+ foreach (my $i=$ifilehead; $i<$ifirsthunkhead; $i++) {
+ print $d[$i];
+ }
+ $last_filehead = $ifilehead;
+ }
+ my $dummy_r = { S => (scalar @d)+1, E => (scalar @d)+1 };
+ my $r;
+ for (my $i=$ihunkhead; ; $i++) {
+ for (;;) {
+ $r //= shift @report;
+ $r //= $dummy_r;
+ last if $i < $r->{E};
+ confess unless $r->{Done} == 03;
+ $r = undef;
+ }
+
+ last unless $i<$ihunkend;
+
+ foreach my $ds (@{ $debug[$i] }) {
+ print "# $ds\n";
+ }
+
+ if ($i == $r->{S}) {
+ print "!! $r->{M}";
+ $r->{Done} |= 01;
+ }
+ if ($i >= $r->{S}) {
+ print "!";
+ $r->{Done} |= 02;
+ } else {
+ print " ";
+ }
+ print $d[$i];
+ }
+ confess unless $r = $dummy_r;
+}
+
+for ($ifilehead = 0; l_ok $ifilehead; $ifilehead++) {
+ m{^diff} or next;
+ $ifirsthunkhead = $ifilehead;
+ while (l_ok $ifirsthunkhead and
+ m{^diff|^index|^---|^\Q+++\E}) {
+ $ifirsthunkhead++
+ }
+ $ihunkhead = $ifirsthunkhead;
+ while (l_ok $ihunkhead) {
+ m{^\@\@} or confess "$ihunkhead $_ ?";
+ my $i = $ihunkhead + 1;
+ for (; ; $i++) {
+ if (!l_ok $i or m{^ } or m{^\@\@}) {
+ if (defined $ichunkstart) {
+ $ichunkend = $i;
+ eval { analyse_chunk(); 1; };
+ if (length $@) {
+ debug $ichunkstart, "done x: @analysed_x";
+ debug $ichunkstart, "done y: @analysed_y";
+ push @report, { M => $@,
+ S => $ichunkstart,
+ E => $ichunkend };
+ }
+ $ichunkstart = $ichunkend = $before = $after = undef;
+ }
+ l_ok $i or last;
+ m{^\@\@} and last;
+ } elsif (m{^[-+]}) {
+ my $which = $& eq '-' ? \$before : \$after;
+ $ichunkstart //= $i;
+ $$which //= '';
+ $$which .= $';
+ } else {
+ confess "$i $_ ?";
+ }
+ }
+ $ihunkend = $i;
+ report_on_hunk();
+ $ichunkend = $i;
+ $ihunkhead = $i;
+ }
+}