diff options
Diffstat (limited to 'perltest.sh')
-rwxr-xr-x | perltest.sh | 75 |
1 files changed, 64 insertions, 11 deletions
diff --git a/perltest.sh b/perltest.sh index 1a7679a..5e6c466 100755 --- a/perltest.sh +++ b/perltest.sh @@ -43,15 +43,25 @@ fi # afteralltext ignored # dupnames ignored (Perl always allows) # jitstack ignored -# mark ignored +# mark show mark information # no_auto_possess ignored -# no_start_optimize ignored +# no_start_optimize insert (??{""}) at pattern start (disables optimizing) +# -no_start_optimize ignored # subject_literal does not process subjects for escapes # ucp sets Perl's /u modifier # utf invoke UTF-8 functionality # +# Comment lines are ignored. The #pattern command can be used to set modifiers +# that will be added to each subsequent pattern, after any modifiers it may +# already have. NOTE: this is different to pcre2test where #pattern sets +# defaults which can be overridden on individual patterns. The #subject command +# may be used to set or unset a default "mark" modifier for data lines. This is +# the only use of #subject that is supported. The #perltest, #forbid_utf, and +# #newline_default commands, which are needed in the relevant pcre2test files, +# are ignored. Any other #-command is ignored, with a warning message. +# # The data lines must not have any pcre2test modifiers. Unless -# "subject_litersl" is on the pattern, data lines are processed as +# "subject_literal" is on the pattern, data lines are processed as # Perl double-quoted strings, so if they contain " $ or @ characters, these # have to be escaped. For this reason, all such characters in the # Perl-compatible testinput1 and testinput4 files are escaped so that they can @@ -127,7 +137,42 @@ for (;;) printf " re> " if $interact; last if ! ($_ = <$infile>); printf $outfile "$_" if ! $interact; - next if ($_ =~ /^\s*$/ || $_ =~ /^#/); + next if ($_ =~ /^\s*$/ || $_ =~ /^#[\s!]/); + + # A few of pcre2test's #-commands are supported, or just ignored. Any others + # cause an error. + + if ($_ =~ /^#pattern(.*)/) + { + $extra_modifiers = $1; + chomp($extra_modifiers); + $extra_modifiers =~ s/\s+$//; + next; + } + elsif ($_ =~ /^#subject(.*)/) + { + $mod = $1; + chomp($mod); + $mod =~ s/\s+$//; + if ($mod =~ s/(-?)mark,?//) + { + $minus = $1; + $default_show_mark = ($minus =~ /^$/); + } + if ($mod !~ /^\s*$/) + { + printf $outfile "** Warning: \"$mod\" in #subject ignored\n"; + } + next; + } + elsif ($_ =~ /^#/) + { + if ($_ !~ /^#newline_default|^#perltest|^#forbid_utf/) + { + printf $outfile "** Warning: #-command ignored: %s", $_; + } + next; + } $pattern = $_; @@ -146,7 +191,9 @@ for (;;) $pattern =~ /^\s*((.).*\2)(.*)$/s; $pat = $1; - $mod = $3; + $del = $2; + $mod = "$3,$extra_modifiers"; + $mod =~ s/^,\s*//; # The private "aftertext" modifier means "print $' afterwards". @@ -172,18 +219,24 @@ for (;;) $mod =~ s/jitstack=\d+,?//; - # Remove "mark" (asks pcre2test to check MARK data) */ + # The "mark" modifier requests checking of MARK data */ - $mod =~ s/mark,?//; + $show_mark = $default_show_mark | ($mod =~ s/mark,?//); # "ucp" asks pcre2test to set PCRE2_UCP; change this to /u for Perl $mod =~ s/ucp,?/u/; - # Remove "no_auto_possess" and "no_start_optimize" (disable PCRE2 optimizations) + # Remove "no_auto_possess". $mod =~ s/no_auto_possess,?//; - $mod =~ s/no_start_optimize,?//; + + # Use no_start_optimize (disable PCRE2 start-up optimization) to disable Perl + # optimization by inserting (??{""}) at the start of the pattern. We may + # also encounter -no_start_optimize from a #pattern setting. + + $mod =~ s/-no_start_optimize,?//; + if ($mod =~ s/no_start_optimize,?//) { $pat =~ s/$del/$del(??{""})/; } # Add back retained modifiers and check that the pattern is valid. @@ -279,7 +332,7 @@ for (;;) elsif (scalar(@subs) == 0) { printf $outfile "No match"; - if (defined $REGERROR && $REGERROR != 1) + if ($show_mark && defined $REGERROR && $REGERROR != 1) { printf $outfile (", mark = %s", &pchars($REGERROR)); } printf $outfile "\n"; } @@ -307,7 +360,7 @@ for (;;) # set and the input pattern was a UTF-8 string. We can, however, force # it to be so marked. - if (defined $REGMARK && $REGMARK != 1) + if ($show_mark && defined $REGMARK && $REGMARK != 1) { $xx = $REGMARK; $xx = Encode::decode_utf8($xx) if $utf8; |