From 8f47fccc111486af01b6f115c8a770eb54217956 Mon Sep 17 00:00:00 2001
From: Ian Jackson <ijackson@chiark.greenend.org.uk>
Date: Sun, 30 Sep 2018 20:54:27 +0100
Subject: i18n: i18n-diff-auditor: New script for checking i18n work

(copied from "play" repo, 67fc89a905d3f922ffb3e33172cd8b8343945b1c)

Signed-off-by: Ian Jackson <ijackson@chiark.greenend.org.uk>
---
 i18n-diff-auditor | 385 ++++++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 385 insertions(+)
 create mode 100755 i18n-diff-auditor

diff --git a/i18n-diff-auditor b/i18n-diff-auditor
new file mode 100755
index 0000000..8fd623a
--- /dev/null
+++ b/i18n-diff-auditor
@@ -0,0 +1,385 @@
+#!/usr/bin/perl -w
+use strict;
+use Carp;
+use Data::Dumper;
+use Getopt::Long;
+
+open DEBUG, ">/dev/null" or die $!;
+
+GetOptions("debug|D" => sub { open DEBUG, ">&2" or die $!; }
+	   );
+
+our @debug;
+sub debug ($$) {
+    my ($i,$s) = @_;
+    push @{ $debug[$i] }, $s;
+}
+
+my @d = <>;
+unshift @d, "# dummy line to make line 1 index 1 in \@d\n";
+
+our $i_last_l_ok = -1;
+our $count_i_last_l_ok;
+
+sub l_ok ($) {
+    my ($i) = @_;
+
+    if ($i == $i_last_l_ok) {
+	confess $i if $count_i_last_l_ok++ > 50;
+    } else {
+	$count_i_last_l_ok = 0;
+	$i_last_l_ok = $i;
+    }
+
+    return unless $i < @d;
+    $_ = $d[$i];
+    #print STDERR "L $i\n";
+    1;
+}
+
+sub l ($) {
+    my ($i) = @_;
+    confess $i unless l_ok $i;
+};
+
+our $perlop_text = <<'END'; # c&p from man perlop
+           left        terms and list operators (leftward)
+           left        ->
+           nonassoc    ++ --
+           right       **
+           right       ! ~ \ and unary + and -
+           left        =~ !~
+           left        * / % x
+           left        + - .
+           left        << >>
+           nonassoc    named unary operators
+           nonassoc    < > <= >= lt gt le ge
+           nonassoc    == != <=> eq ne cmp ~~
+           left        &
+           left        | ^
+           left        &&
+           left        || //
+           nonassoc    ..  ...
+           right       ?:
+           right       = += -= *= etc. goto last next redo dump
+           left        , =>
+           nonassoc    list operators (rightward)
+           right       not
+           left        and
+           left        or xor
+
+           **=    +=    *=    &=    &.=    <<=    &&=
+                  -=    /=    |=    |.=    >>=    ||=
+                  .=    %=    ^=    ^.=           //=
+                        x=
+END
+
+our $perlop_re;
+
+sub prep_perlop () {
+    my @ops;
+    foreach (split /\n/, $perlop_text) {
+	next unless m{\S};
+	s{\s+$}{};
+	s{^\s+}{};
+	s{^(?: left | right | nonassoc ) \s+}{}x;
+	next if m{^terms and list operators};
+	next if m{^named unary};
+	next if m{^list operators};
+	s{ and unary.*}{};
+	s{ etc\. }{ };
+	s{\?\:}{ ? : };
+	foreach my $op (split /\s+/) {
+	    next unless length $op;
+	    next if $op =~ m{^\w+$};
+	    $op =~ s/\W/\\$&/g;
+	    push @ops, $op;
+	}
+    }
+    $perlop_re = '(?: '.(join ' | ', @ops).' )';
+    $perlop_re = qr{$perlop_re}x;
+    #print STDERR "$perlop_re\n";
+}
+
+prep_perlop();
+
+our ($ifilehead, $ifirsthunkhead);
+our ($ihunkhead, $ihunkend);
+our ($ichunkstart, $ichunkend);
+our ($before, $after);
+
+sub is_string ($) { $_[0]{T} =~ m/heredoc|string/; };
+
+sub semiparse ($) {
+    ($_) = @_;
+    my @o;
+    # entries contain
+    #   T     type
+    #   E     exact input text (does not contain here doc contents)
+    #   P     something to print in messages
+    #   V     value, only for: heredoc string
+    #   Q     quote characcter, only for: heredoc string
+    for (;;) {
+	s{^\s+}{};
+	if (s{^[\$\@\%]?[_0-9a-zA-Z]+}{}) {
+	    push @o, { T => 'ident', E => $&, P => $& };
+	} elsif (s{^\<\<(['"]?)([A-Z_]+)\1}{}) {
+	    my ($q,$d) = ($1,$2);
+	    $q ||= '"';
+	    push @o, { T => 'heredoc', Q => $q, Delim => $d,
+		       E => $&, P => "<<$q$d$q" };
+	    s{^
+                (             .* \n    )
+                ( (?: (?! $d) .* \n )* )
+                          $d     \n
+              }{ $1 }x or die "missing end of here doc $d\n";
+	    $o[$#o]{V} = $2;
+	} elsif (s{^ (["'])( (?: [^\\'"]
+                               | \\ [^"']
+                               | (?! \1 ) [^"]
+                              )*
+                       ) \1 }{}x) {
+	    my ($q,$v) = ($1,$2);
+	    push @o, { T => 'string', E => $&, P => "$q-string",
+		       Q => $q, V => $v};
+	} elsif (s{^$perlop_re|^\;}{}) {
+	    push @o, { T => 'op', E => $&, P => $& };
+	} elsif (s/[[{(]//) {
+	    push @o, { T => 'bra', E => $&, P => $& };
+	} elsif (s/[]})]//) {
+	    push @o, { T => 'ket', E => $&, P => $& };
+	} elsif (s/^( [\$\@\%] )( \{ )//x) {
+	    push @o, { T => 'deref', E => $1, P => $1 },
+		     { T => 'bra',   E => $2, P => $2 };
+	} elsif (s/^ [\$\@\%] [^[^{] //x) {
+	    push @o, { T => 'specvar', E => $&, P => $& };
+	} elsif (!length) {
+	    last;
+	} else {
+	    m{^.{0,10}};
+	    die "cannot tokenise \`$&'";
+	}
+    }
+    for (my $i=@o-2; $i>0; --$i) {
+	next unless $o[$i+1]{E} eq '.';
+	my @inputs = @o[$i, $i+2];
+	next if grep { !is_string($_) } @inputs;
+	my $q = $inputs[0]{Q};
+	next if grep { $_->{Q} ne $q } @inputs;
+	my $new = { T => 'joinedstrings',
+		    E => (join '.', map { $_->{E} } @inputs),
+		    P => (join '.', map { $_->{P} } @inputs),
+		    V => (join '',  map { $_->{V} } @inputs),
+		    Q => $q,
+		  };
+	@o = (@o[0..$i-1], $new, @o[$i+3..$#o]);
+	print STDERR Dumper(\@o);
+    }
+    debug $ichunkstart, "semiparsed: ".join ' ', map { $_->{P} } @o;
+    return @o;
+}	    
+
+sub analyse_chunk_core () {
+    die "plain deletion\n" unless defined $after;
+    die "plain insertion\n" unless defined $before;
+    my @xs = semiparse $before;
+    my @ys = semiparse $after;
+    my $next_something = sub {
+	my ($ary,$var,$what) = @_;
+	die "ran out of $what\n" unless @$ary;
+	$$var = shift @$ary;
+    };
+    my ($x,$y);
+    my $next_x = sub { $next_something->(\@xs, \$x, 'before'); };
+    my $next_y = sub { $next_something->(\@ys, \$y, 'after' ); };
+    for (;;) {
+	last unless @xs or @ys;
+	$next_x->();
+	$next_y->();
+	next if $x->{E} eq $y->{E};
+	my $string_changed;
+	my $ye = $y->{E};
+	if ($ye eq '__' or $ye eq 'f_') {
+	    $next_y->();
+	    die "__ on non-string $y->{P}\n"     unless is_string($y);
+	    die "__ on was non-string $y->{P}\n" unless is_string($x);
+	    if ($y->{Q} ne "'") {
+		die "var subst in new string\n"
+		    if $y->{V} =~ m{(?<!\\) [\$\@]};
+	    }
+	    eval {
+		die "__ string changed\n"       unless $y->{V} eq $x->{V};
+		die "__ string quote changed\n" unless $y->{Q} eq $x->{Q};
+	    };
+	    $string_changed = $@;
+	}
+	if ($ye eq '__') {
+	    $_ = $y->{V};
+	    die "percent $& in __ ' string\n" if m{\%};
+	    die $string_changed if length $string_changed;
+	    next;
+	}
+	if ($ye eq 'f_') {
+	    my $fmt = $y->{V};
+	    die "no percent in _f string\n" unless $fmt =~ m{\%};
+	    next unless $string_changed;
+	    die "f_ old string '-quoted\n" if $x->{Q} ne '"';
+	    my $xs = $x->{V};
+	    my $exactly = sub {
+		my ($lit, $what) = @_;
+		my $xl = substr($xs, 0, length($lit));
+		if ($xl ne $lit) {
+		    debug $ichunkstart, "not exactly x: $xl";
+		    debug $ichunkstart, "not exactly y: $lit";
+		    my $next = @ys ? $ys[0]{P} : '(end)';
+		    die "string contents mismatch near $what before $next\n";
+		}
+		$xs = substr($xs, length($lit));
+	    };
+	    for (;;) {
+		if ($fmt !~ m{\%[^\%]}) {
+		    $exactly->($fmt, '(tail)');
+		    $fmt = '';
+		    last;
+		}
+		$exactly->($`, '(literal)');
+		$fmt = $';
+		if ($& eq '%%') { $exactly->('%', '%%'); next; }
+		elsif ($& ne '%s') { die "unhandled %-subst $&\n"; }
+		$next_y->();
+		die "expected comma, got $y->{P}\n" unless $y->{E} eq ',';
+		if ($xs =~ m{^\@}) {
+		    $next_y->();
+		    die "\@... => not string" unless is_string($y);
+		    die "\@... => $y->{P}" if $y->{Q} ne '"';
+		    $exactly->($y->{V}, $y->{P});
+		    next;
+		}
+		my $bras = 0;
+		for (;;) {
+		    if (!$bras and !@ys) {
+			last;
+		    }
+		    $next_y->();
+		    if (!$bras and
+			(grep { $y->{E} eq $_ } qw( or xor and not ; : )
+			 or $y->{T} eq 'ket'
+			)) {
+			unshift @ys, $y;
+			last;
+		    }
+		    $xs =~ s{^\s+}{};
+		    #debug $ichunkstart, "TOKEN $y->{P}\n";
+		    $exactly->($y->{E}, $y->{P});
+		    if ($y->{T} eq 'bra' or $y->{E} eq '?') {
+			$bras++;
+		    } elsif ($y->{T} eq 'ket' or $y->{E} eq ':') {
+			die "too many kets at $y->{E}\n" unless $bras;
+			$bras--;
+		    }
+		}
+	    }
+	    next;
+	}
+	die "mismatch $x->{P} => $y->{P}\n";
+    }
+}
+
+sub analyse_chunk () {
+    for (;;) {
+	eval { analyse_chunk_core(); };
+	return unless length $@;
+	if ($@ =~ m{^missing end of here doc (\S+)\n}) {
+	    # fudge this
+	    $before .= "\n$1\n";
+	    $after .= "\n$1\n";
+	    next;
+	} else {
+	    die $@;
+	}
+    }
+}
+
+our @report;
+our $last_filehead = -1;
+
+sub report_on_hunk () {
+    return unless @report;
+    if ($last_filehead != $ifilehead) {
+	foreach (my $i=$ifilehead; $i<$ifirsthunkhead; $i++) {
+	    print $d[$i];
+	}
+	$last_filehead = $ifilehead;
+    }
+    my $dummy_r = { S => (scalar @d)+1, E => (scalar @d)+1 };
+    my $r;
+    for (my $i=$ihunkhead; ; $i++) {
+	for (;;) {
+	    $r //= shift @report;
+	    $r //= $dummy_r;
+	    last if $i < $r->{E};
+	    confess unless $r->{Done} == 03;
+	    $r = undef;
+	}
+
+	last unless $i<$ihunkend;
+
+	foreach my $ds (@{ $debug[$i] }) {
+	    print "# $ds\n";
+	}
+
+	if ($i == $r->{S}) {
+	    print "!! $r->{M}";
+	    $r->{Done} |= 01;
+	}
+	if ($i >= $r->{S}) {
+	    print "!";
+	    $r->{Done} |= 02;
+	} else {
+	    print " ";
+	}
+	print $d[$i];
+    }
+    confess unless $r = $dummy_r;
+}
+
+for ($ifilehead = 0; l_ok $ifilehead; $ifilehead++) {
+    m{^diff} or next;
+    $ifirsthunkhead = $ifilehead;
+    while (l_ok $ifirsthunkhead and
+	   m{^diff|^index|^---|^\Q+++\E}) {
+	$ifirsthunkhead++
+    }
+    $ihunkhead = $ifirsthunkhead;
+    while (l_ok $ihunkhead) {
+	m{^\@\@} or confess "$ihunkhead $_ ?";
+	my $i = $ihunkhead + 1;
+	for (; ; $i++) {
+	    if (!l_ok $i or m{^ } or m{^\@\@}) {
+		if (defined $ichunkstart) {
+		    $ichunkend = $i;
+		    eval { analyse_chunk(); 1; };
+		    if (length $@) {
+			push @report, { M => $@,
+					S => $ichunkstart,
+					E => $ichunkend };
+		    }
+		    $ichunkstart = $ichunkend = $before = $after = undef;
+		}
+		l_ok $i or last;
+		m{^\@\@} and last;
+	    } elsif (m{^[-+]}) {
+		my $which = $& eq '-' ? \$before : \$after;
+		$ichunkstart //= $i;
+		$$which //= '';
+		$$which .= $';
+	    } else {
+		confess "$i $_ ?";
+	    }
+	}
+	$ihunkend = $i;
+	report_on_hunk();
+	$ichunkend = $i;
+	$ihunkhead = $i;
+    }
+}
-- 
cgit v1.2.3