From 8f47fccc111486af01b6f115c8a770eb54217956 Mon Sep 17 00:00:00 2001 From: Ian Jackson Date: Sun, 30 Sep 2018 20:54:27 +0100 Subject: i18n: i18n-diff-auditor: New script for checking i18n work (copied from "play" repo, 67fc89a905d3f922ffb3e33172cd8b8343945b1c) Signed-off-by: Ian Jackson --- i18n-diff-auditor | 385 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 385 insertions(+) create mode 100755 i18n-diff-auditor diff --git a/i18n-diff-auditor b/i18n-diff-auditor new file mode 100755 index 0000000..8fd623a --- /dev/null +++ b/i18n-diff-auditor @@ -0,0 +1,385 @@ +#!/usr/bin/perl -w +use strict; +use Carp; +use Data::Dumper; +use Getopt::Long; + +open DEBUG, ">/dev/null" or die $!; + +GetOptions("debug|D" => sub { open DEBUG, ">&2" or die $!; } + ); + +our @debug; +sub debug ($$) { + my ($i,$s) = @_; + push @{ $debug[$i] }, $s; +} + +my @d = <>; +unshift @d, "# dummy line to make line 1 index 1 in \@d\n"; + +our $i_last_l_ok = -1; +our $count_i_last_l_ok; + +sub l_ok ($) { + my ($i) = @_; + + if ($i == $i_last_l_ok) { + confess $i if $count_i_last_l_ok++ > 50; + } else { + $count_i_last_l_ok = 0; + $i_last_l_ok = $i; + } + + return unless $i < @d; + $_ = $d[$i]; + #print STDERR "L $i\n"; + 1; +} + +sub l ($) { + my ($i) = @_; + confess $i unless l_ok $i; +}; + +our $perlop_text = <<'END'; # c&p from man perlop + left terms and list operators (leftward) + left -> + nonassoc ++ -- + right ** + right ! ~ \ and unary + and - + left =~ !~ + left * / % x + left + - . + left << >> + nonassoc named unary operators + nonassoc < > <= >= lt gt le ge + nonassoc == != <=> eq ne cmp ~~ + left & + left | ^ + left && + left || // + nonassoc .. ... + right ?: + right = += -= *= etc. goto last next redo dump + left , => + nonassoc list operators (rightward) + right not + left and + left or xor + + **= += *= &= &.= <<= &&= + -= /= |= |.= >>= ||= + .= %= ^= ^.= //= + x= +END + +our $perlop_re; + +sub prep_perlop () { + my @ops; + foreach (split /\n/, $perlop_text) { + next unless m{\S}; + s{\s+$}{}; + s{^\s+}{}; + s{^(?: left | right | nonassoc ) \s+}{}x; + next if m{^terms and list operators}; + next if m{^named unary}; + next if m{^list operators}; + s{ and unary.*}{}; + s{ etc\. }{ }; + s{\?\:}{ ? : }; + foreach my $op (split /\s+/) { + next unless length $op; + next if $op =~ m{^\w+$}; + $op =~ s/\W/\\$&/g; + push @ops, $op; + } + } + $perlop_re = '(?: '.(join ' | ', @ops).' )'; + $perlop_re = qr{$perlop_re}x; + #print STDERR "$perlop_re\n"; +} + +prep_perlop(); + +our ($ifilehead, $ifirsthunkhead); +our ($ihunkhead, $ihunkend); +our ($ichunkstart, $ichunkend); +our ($before, $after); + +sub is_string ($) { $_[0]{T} =~ m/heredoc|string/; }; + +sub semiparse ($) { + ($_) = @_; + my @o; + # entries contain + # T type + # E exact input text (does not contain here doc contents) + # P something to print in messages + # V value, only for: heredoc string + # Q quote characcter, only for: heredoc string + for (;;) { + s{^\s+}{}; + if (s{^[\$\@\%]?[_0-9a-zA-Z]+}{}) { + push @o, { T => 'ident', E => $&, P => $& }; + } elsif (s{^\<\<(['"]?)([A-Z_]+)\1}{}) { + my ($q,$d) = ($1,$2); + $q ||= '"'; + push @o, { T => 'heredoc', Q => $q, Delim => $d, + E => $&, P => "<<$q$d$q" }; + s{^ + ( .* \n ) + ( (?: (?! $d) .* \n )* ) + $d \n + }{ $1 }x or die "missing end of here doc $d\n"; + $o[$#o]{V} = $2; + } elsif (s{^ (["'])( (?: [^\\'"] + | \\ [^"'] + | (?! \1 ) [^"] + )* + ) \1 }{}x) { + my ($q,$v) = ($1,$2); + push @o, { T => 'string', E => $&, P => "$q-string", + Q => $q, V => $v}; + } elsif (s{^$perlop_re|^\;}{}) { + push @o, { T => 'op', E => $&, P => $& }; + } elsif (s/[[{(]//) { + push @o, { T => 'bra', E => $&, P => $& }; + } elsif (s/[]})]//) { + push @o, { T => 'ket', E => $&, P => $& }; + } elsif (s/^( [\$\@\%] )( \{ )//x) { + push @o, { T => 'deref', E => $1, P => $1 }, + { T => 'bra', E => $2, P => $2 }; + } elsif (s/^ [\$\@\%] [^[^{] //x) { + push @o, { T => 'specvar', E => $&, P => $& }; + } elsif (!length) { + last; + } else { + m{^.{0,10}}; + die "cannot tokenise \`$&'"; + } + } + for (my $i=@o-2; $i>0; --$i) { + next unless $o[$i+1]{E} eq '.'; + my @inputs = @o[$i, $i+2]; + next if grep { !is_string($_) } @inputs; + my $q = $inputs[0]{Q}; + next if grep { $_->{Q} ne $q } @inputs; + my $new = { T => 'joinedstrings', + E => (join '.', map { $_->{E} } @inputs), + P => (join '.', map { $_->{P} } @inputs), + V => (join '', map { $_->{V} } @inputs), + Q => $q, + }; + @o = (@o[0..$i-1], $new, @o[$i+3..$#o]); + print STDERR Dumper(\@o); + } + debug $ichunkstart, "semiparsed: ".join ' ', map { $_->{P} } @o; + return @o; +} + +sub analyse_chunk_core () { + die "plain deletion\n" unless defined $after; + die "plain insertion\n" unless defined $before; + my @xs = semiparse $before; + my @ys = semiparse $after; + my $next_something = sub { + my ($ary,$var,$what) = @_; + die "ran out of $what\n" unless @$ary; + $$var = shift @$ary; + }; + my ($x,$y); + my $next_x = sub { $next_something->(\@xs, \$x, 'before'); }; + my $next_y = sub { $next_something->(\@ys, \$y, 'after' ); }; + for (;;) { + last unless @xs or @ys; + $next_x->(); + $next_y->(); + next if $x->{E} eq $y->{E}; + my $string_changed; + my $ye = $y->{E}; + if ($ye eq '__' or $ye eq 'f_') { + $next_y->(); + die "__ on non-string $y->{P}\n" unless is_string($y); + die "__ on was non-string $y->{P}\n" unless is_string($x); + if ($y->{Q} ne "'") { + die "var subst in new string\n" + if $y->{V} =~ m{(?{V} eq $x->{V}; + die "__ string quote changed\n" unless $y->{Q} eq $x->{Q}; + }; + $string_changed = $@; + } + if ($ye eq '__') { + $_ = $y->{V}; + die "percent $& in __ ' string\n" if m{\%}; + die $string_changed if length $string_changed; + next; + } + if ($ye eq 'f_') { + my $fmt = $y->{V}; + die "no percent in _f string\n" unless $fmt =~ m{\%}; + next unless $string_changed; + die "f_ old string '-quoted\n" if $x->{Q} ne '"'; + my $xs = $x->{V}; + my $exactly = sub { + my ($lit, $what) = @_; + my $xl = substr($xs, 0, length($lit)); + if ($xl ne $lit) { + debug $ichunkstart, "not exactly x: $xl"; + debug $ichunkstart, "not exactly y: $lit"; + my $next = @ys ? $ys[0]{P} : '(end)'; + die "string contents mismatch near $what before $next\n"; + } + $xs = substr($xs, length($lit)); + }; + for (;;) { + if ($fmt !~ m{\%[^\%]}) { + $exactly->($fmt, '(tail)'); + $fmt = ''; + last; + } + $exactly->($`, '(literal)'); + $fmt = $'; + if ($& eq '%%') { $exactly->('%', '%%'); next; } + elsif ($& ne '%s') { die "unhandled %-subst $&\n"; } + $next_y->(); + die "expected comma, got $y->{P}\n" unless $y->{E} eq ','; + if ($xs =~ m{^\@}) { + $next_y->(); + die "\@... => not string" unless is_string($y); + die "\@... => $y->{P}" if $y->{Q} ne '"'; + $exactly->($y->{V}, $y->{P}); + next; + } + my $bras = 0; + for (;;) { + if (!$bras and !@ys) { + last; + } + $next_y->(); + if (!$bras and + (grep { $y->{E} eq $_ } qw( or xor and not ; : ) + or $y->{T} eq 'ket' + )) { + unshift @ys, $y; + last; + } + $xs =~ s{^\s+}{}; + #debug $ichunkstart, "TOKEN $y->{P}\n"; + $exactly->($y->{E}, $y->{P}); + if ($y->{T} eq 'bra' or $y->{E} eq '?') { + $bras++; + } elsif ($y->{T} eq 'ket' or $y->{E} eq ':') { + die "too many kets at $y->{E}\n" unless $bras; + $bras--; + } + } + } + next; + } + die "mismatch $x->{P} => $y->{P}\n"; + } +} + +sub analyse_chunk () { + for (;;) { + eval { analyse_chunk_core(); }; + return unless length $@; + if ($@ =~ m{^missing end of here doc (\S+)\n}) { + # fudge this + $before .= "\n$1\n"; + $after .= "\n$1\n"; + next; + } else { + die $@; + } + } +} + +our @report; +our $last_filehead = -1; + +sub report_on_hunk () { + return unless @report; + if ($last_filehead != $ifilehead) { + foreach (my $i=$ifilehead; $i<$ifirsthunkhead; $i++) { + print $d[$i]; + } + $last_filehead = $ifilehead; + } + my $dummy_r = { S => (scalar @d)+1, E => (scalar @d)+1 }; + my $r; + for (my $i=$ihunkhead; ; $i++) { + for (;;) { + $r //= shift @report; + $r //= $dummy_r; + last if $i < $r->{E}; + confess unless $r->{Done} == 03; + $r = undef; + } + + last unless $i<$ihunkend; + + foreach my $ds (@{ $debug[$i] }) { + print "# $ds\n"; + } + + if ($i == $r->{S}) { + print "!! $r->{M}"; + $r->{Done} |= 01; + } + if ($i >= $r->{S}) { + print "!"; + $r->{Done} |= 02; + } else { + print " "; + } + print $d[$i]; + } + confess unless $r = $dummy_r; +} + +for ($ifilehead = 0; l_ok $ifilehead; $ifilehead++) { + m{^diff} or next; + $ifirsthunkhead = $ifilehead; + while (l_ok $ifirsthunkhead and + m{^diff|^index|^---|^\Q+++\E}) { + $ifirsthunkhead++ + } + $ihunkhead = $ifirsthunkhead; + while (l_ok $ihunkhead) { + m{^\@\@} or confess "$ihunkhead $_ ?"; + my $i = $ihunkhead + 1; + for (; ; $i++) { + if (!l_ok $i or m{^ } or m{^\@\@}) { + if (defined $ichunkstart) { + $ichunkend = $i; + eval { analyse_chunk(); 1; }; + if (length $@) { + push @report, { M => $@, + S => $ichunkstart, + E => $ichunkend }; + } + $ichunkstart = $ichunkend = $before = $after = undef; + } + l_ok $i or last; + m{^\@\@} and last; + } elsif (m{^[-+]}) { + my $which = $& eq '-' ? \$before : \$after; + $ichunkstart //= $i; + $$which //= ''; + $$which .= $'; + } else { + confess "$i $_ ?"; + } + } + $ihunkend = $i; + report_on_hunk(); + $ichunkend = $i; + $ihunkhead = $i; + } +} -- cgit v1.2.3