#!/usr/bin/perl -w use strict; use Carp; use Data::Dumper; use Getopt::Long; our $debug = 0; GetOptions("debug|D+" => \$debug ); our @debug; sub debug ($$) { my ($i,$s) = @_; push @{ $debug[$i] }, $s if $debug; } my @d = <>; unshift @d, "# dummy line to make line 1 index 1 in \@d\n"; our $i_last_l_ok = -1; our $count_i_last_l_ok; sub l_ok ($) { my ($i) = @_; if ($i == $i_last_l_ok) { confess $i if $count_i_last_l_ok++ > 50; } else { $count_i_last_l_ok = 0; $i_last_l_ok = $i; } return unless $i < @d; $_ = $d[$i]; #print STDERR "L $i\n"; 1; } sub l ($) { my ($i) = @_; confess $i unless l_ok $i; }; our $perlop_text = <<'END'; # c&p from man perlop left terms and list operators (leftward) left -> nonassoc ++ -- right ** right ! ~ \ and unary + and - left =~ !~ left * / % x left + - . left << >> nonassoc named unary operators nonassoc < > <= >= lt gt le ge nonassoc == != <=> eq ne cmp ~~ left & left | ^ left && left || // nonassoc .. ... right ?: right = += -= *= etc. goto last next redo dump left , => nonassoc list operators (rightward) right not left and left or xor **= += *= &= &.= <<= &&= -= /= |= |.= >>= ||= .= %= ^= ^.= //= x= END our $perlop_re; sub prep_perlop () { my @ops; foreach (split /\n/, $perlop_text) { next unless m{\S}; s{\s+$}{}; s{^\s+}{}; s{^(?: left | right | nonassoc ) \s+}{}x; next if m{^terms and list operators}; next if m{^named unary}; next if m{^list operators}; s{ and unary.*}{}; s{ etc\. }{ }; s{\?\:}{ ? : }; foreach my $op (split /\s+/) { next unless length $op; next if $op =~ m{^\w+$}; $op =~ s/\W/\\$&/g; push @ops, $op; } } $perlop_re = '(?: '.(join ' | ', @ops).' )'; $perlop_re = qr{$perlop_re}x; #print STDERR "$perlop_re\n"; } prep_perlop(); our ($ifilehead, $ifirsthunkhead); our ($ihunkhead, $ihunkend); our ($ichunkstart, $ichunkend); our ($before, $after); sub is_string ($) { $_[0]{T} =~ m/heredoc|string/; }; sub semiparse ($) { ($_) = @_; my @o; # entries contain # T type # E exact input text (does not contain here doc contents) # P something to print in messages # V value, only for: heredoc string # Q quote characcter, only for: heredoc string for (;;) { s{^\s+}{}; if (s{^[\$\@\%]?[_0-9a-zA-Z]+}{}) { push @o, { T => 'ident', E => $&, P => $& }; } elsif (s{^\<\<(['"]?)([A-Z_]+)\1}{}) { my ($q,$d) = ($1,$2); $q ||= '"'; push @o, { T => 'heredoc', Q => $q, Delim => $d, E => $&, P => "<<$q$d$q" }; s{^ ( .* \n ) ( (?: (?! $d) .* \n )* ) $d \n }{ $1 }x or die "missing end of here doc $d\n"; $o[$#o]{V} = $2; } elsif (s{^ (["'])( (?: [^\\'"] | \\ [^"'] | (?! \1 ) [^"] )* ) \1 }{}x) { my ($q,$v) = ($1,$2); push @o, { T => 'string', E => $&, P => "$q-string", Q => $q, V => $v}; } elsif (s{^$perlop_re|^\;}{}) { push @o, { T => 'op', E => $&, P => $& }; } elsif (s/[[{(]//) { push @o, { T => 'bra', E => $&, P => $& }; } elsif (s/[]})]//) { push @o, { T => 'ket', E => $&, P => $& }; } elsif (s/^( [\$\@\%] )( \{ )//x) { push @o, { T => 'deref', E => $1, P => $1 }, { T => 'bra', E => $2, P => $2 }; } elsif (s/^ [\$\@\%] [^[^{] //x) { push @o, { T => 'specvar', E => $&, P => $& }; } elsif (!length) { last; } else { m{^.{0,10}}; die "cannot tokenise \`$&'"; } } for (my $i=@o-2; $i>0; --$i) { next unless $o[$i+1]{E} eq '.'; my @inputs = @o[$i, $i+2]; next if grep { !is_string($_) } @inputs; my $q = $inputs[0]{Q}; next if grep { $_->{Q} ne $q } @inputs; my $new = { T => 'joinedstrings', E => (join '.', map { $_->{E} } @inputs), P => (join '.', map { $_->{P} } @inputs), V => (join '', map { $_->{V} } @inputs), Q => $q, }; @o = (@o[0..$i-1], $new, @o[$i+3..$#o]); print STDERR Dumper(\@o); } debug $ichunkstart, "semiparsed: ".join ' ', map { $_->{P} } @o; return @o; } our @analysed_x; our @analysed_y; sub analyse_chunk_core () { die "plain deletion\n" unless defined $after; die "plain insertion\n" unless defined $before; my @xs = semiparse $before; my @ys = semiparse $after; @analysed_x = @analysed_y = (); my $next_something = sub { my ($ary,$anal,$var,$what) = @_; die "ran out of $what\n" unless @$ary; my $r = shift @$ary; push @$anal, $r->{P}; $$var = $r; }; my ($x,$y); my $next_x = sub { $next_something->(\@xs, \@analysed_x, \$x, 'before'); }; my $next_y = sub { $next_something->(\@ys, \@analysed_y, \$y, 'after' ); }; for (;;) { last unless @xs or @ys; $next_x->(); $next_y->(); next if $x->{E} eq $y->{E}; my $string_changed; my $ye = $y->{E}; if ($ye eq '__' or $ye eq 'f_') { $next_y->(); die "__ on non-string $y->{P}\n" unless is_string($y); die "__ on was non-string $y->{P}\n" unless is_string($x); if ($y->{Q} ne "'") { die "var subst in new string\n" if $y->{V} =~ m{(?{V} eq $x->{V}; die "__ string quote changed\n" unless $y->{Q} eq $x->{Q}; }; $string_changed = $@; } if ($ye eq '__') { $_ = $y->{V}; die "percent $& in __ ' string\n" if m{\%}; die $string_changed if length $string_changed; next; } if ($ye eq 'f_') { my $fmt = $y->{V}; die "no percent in _f string\n" unless $fmt =~ m{\%}; next unless $string_changed; die "f_ old string '-quoted\n" if $x->{Q} ne '"'; my $xs = $x->{V}; my $exactly = sub { my ($lit, $what) = @_; my $xl = substr($xs, 0, length($lit)); if ($xl ne $lit) { debug $ichunkstart, "not exactly x: $xs"; debug $ichunkstart, "not exactly y: $lit"; my $next = @ys ? $ys[0]{P} : '(end)'; die "string contents mismatch near $what before $next\n"; } $xs = substr($xs, length($lit)); }; for (;;) { if ($fmt !~ m{\%[^\%]}) { $exactly->($fmt, '(tail)'); $fmt = ''; last; } $exactly->($`, '(literal)'); $fmt = $'; if ($& eq '%%') { $exactly->('%', '%%'); next; } elsif ($& ne '%s') { die "unhandled %-subst $&\n"; } $next_y->(); die "expected comma, got $y->{P}\n" unless $y->{E} eq ','; if ($xs =~ m{^\@}) { $next_y->(); die "\@... => not string" unless is_string($y); die "\@... => $y->{P}" if $y->{Q} ne '"'; $exactly->($y->{V}, $y->{P}); next; } my $bras = 0; for (;;) { if (!$bras and !@ys) { last; } $next_y->(); if (!$bras and (grep { $y->{E} eq $_ } qw( or xor and not ; : ) or $y->{T} eq 'ket' )) { unshift @ys, $y; last; } $xs =~ s{^\s+}{}; #debug $ichunkstart, "TOKEN $y->{P}\n"; $exactly->($y->{E}, $y->{P}); if ($y->{T} eq 'bra' or $y->{E} eq '?') { $bras++; } elsif ($y->{T} eq 'ket' or $y->{E} eq ':') { die "too many kets at $y->{E}\n" unless $bras; $bras--; } } } next; } die "mismatch $x->{P} => $y->{P}\n"; } } sub analyse_chunk () { for (;;) { eval { analyse_chunk_core(); }; return unless length $@; if ($@ =~ m{^missing end of here doc (\S+)\n}) { # fudge this $before .= "\n$1\n"; $after .= "\n$1\n"; next; } else { die $@; } } } our @report; our $last_filehead = -1; sub report_on_hunk () { return unless @report; if ($last_filehead != $ifilehead) { foreach (my $i=$ifilehead; $i<$ifirsthunkhead; $i++) { print $d[$i]; } $last_filehead = $ifilehead; } my $dummy_r = { S => (scalar @d)+1, E => (scalar @d)+1 }; my $r; for (my $i=$ihunkhead; ; $i++) { for (;;) { $r //= shift @report; $r //= $dummy_r; last if $i < $r->{E}; confess unless $r->{Done} == 03; $r = undef; } last unless $i<$ihunkend; foreach my $ds (@{ $debug[$i] }) { print "# $ds\n"; } if ($i == $r->{S}) { print "!! $r->{M}"; $r->{Done} |= 01; } if ($i >= $r->{S}) { print "!"; $r->{Done} |= 02; } else { print " "; } print $d[$i]; } confess unless $r = $dummy_r; } for ($ifilehead = 0; l_ok $ifilehead; $ifilehead++) { m{^diff} or next; $ifirsthunkhead = $ifilehead; while (l_ok $ifirsthunkhead and m{^diff|^index|^---|^\Q+++\E}) { $ifirsthunkhead++ } $ihunkhead = $ifirsthunkhead; while (l_ok $ihunkhead) { m{^\@\@} or confess "$ihunkhead $_ ?"; my $i = $ihunkhead + 1; for (; ; $i++) { if (!l_ok $i or m{^ } or m{^\@\@}) { if (defined $ichunkstart) { $ichunkend = $i; eval { analyse_chunk(); 1; }; if (length $@) { debug $ichunkstart, "done x: @analysed_x"; debug $ichunkstart, "done y: @analysed_y"; push @report, { M => $@, S => $ichunkstart, E => $ichunkend }; } $ichunkstart = $ichunkend = $before = $after = undef; } l_ok $i or last; m{^\@\@} and last; } elsif (m{^[-+]}) { my $which = $& eq '-' ? \$before : \$after; $ichunkstart //= $i; $$which //= ''; $$which .= $'; } else { confess "$i $_ ?"; } } $ihunkend = $i; report_on_hunk(); $ichunkend = $i; $ihunkhead = $i; } }