summaryrefslogtreecommitdiff
path: root/perltest.sh
diff options
context:
space:
mode:
Diffstat (limited to 'perltest.sh')
-rwxr-xr-xperltest.sh75
1 files changed, 64 insertions, 11 deletions
diff --git a/perltest.sh b/perltest.sh
index 1a7679a..5e6c466 100755
--- a/perltest.sh
+++ b/perltest.sh
@@ -43,15 +43,25 @@ fi
# afteralltext ignored
# dupnames ignored (Perl always allows)
# jitstack ignored
-# mark ignored
+# mark show mark information
# no_auto_possess ignored
-# no_start_optimize ignored
+# no_start_optimize insert (??{""}) at pattern start (disables optimizing)
+# -no_start_optimize ignored
# subject_literal does not process subjects for escapes
# ucp sets Perl's /u modifier
# utf invoke UTF-8 functionality
#
+# Comment lines are ignored. The #pattern command can be used to set modifiers
+# that will be added to each subsequent pattern, after any modifiers it may
+# already have. NOTE: this is different to pcre2test where #pattern sets
+# defaults which can be overridden on individual patterns. The #subject command
+# may be used to set or unset a default "mark" modifier for data lines. This is
+# the only use of #subject that is supported. The #perltest, #forbid_utf, and
+# #newline_default commands, which are needed in the relevant pcre2test files,
+# are ignored. Any other #-command is ignored, with a warning message.
+#
# The data lines must not have any pcre2test modifiers. Unless
-# "subject_litersl" is on the pattern, data lines are processed as
+# "subject_literal" is on the pattern, data lines are processed as
# Perl double-quoted strings, so if they contain " $ or @ characters, these
# have to be escaped. For this reason, all such characters in the
# Perl-compatible testinput1 and testinput4 files are escaped so that they can
@@ -127,7 +137,42 @@ for (;;)
printf " re> " if $interact;
last if ! ($_ = <$infile>);
printf $outfile "$_" if ! $interact;
- next if ($_ =~ /^\s*$/ || $_ =~ /^#/);
+ next if ($_ =~ /^\s*$/ || $_ =~ /^#[\s!]/);
+
+ # A few of pcre2test's #-commands are supported, or just ignored. Any others
+ # cause an error.
+
+ if ($_ =~ /^#pattern(.*)/)
+ {
+ $extra_modifiers = $1;
+ chomp($extra_modifiers);
+ $extra_modifiers =~ s/\s+$//;
+ next;
+ }
+ elsif ($_ =~ /^#subject(.*)/)
+ {
+ $mod = $1;
+ chomp($mod);
+ $mod =~ s/\s+$//;
+ if ($mod =~ s/(-?)mark,?//)
+ {
+ $minus = $1;
+ $default_show_mark = ($minus =~ /^$/);
+ }
+ if ($mod !~ /^\s*$/)
+ {
+ printf $outfile "** Warning: \"$mod\" in #subject ignored\n";
+ }
+ next;
+ }
+ elsif ($_ =~ /^#/)
+ {
+ if ($_ !~ /^#newline_default|^#perltest|^#forbid_utf/)
+ {
+ printf $outfile "** Warning: #-command ignored: %s", $_;
+ }
+ next;
+ }
$pattern = $_;
@@ -146,7 +191,9 @@ for (;;)
$pattern =~ /^\s*((.).*\2)(.*)$/s;
$pat = $1;
- $mod = $3;
+ $del = $2;
+ $mod = "$3,$extra_modifiers";
+ $mod =~ s/^,\s*//;
# The private "aftertext" modifier means "print $' afterwards".
@@ -172,18 +219,24 @@ for (;;)
$mod =~ s/jitstack=\d+,?//;
- # Remove "mark" (asks pcre2test to check MARK data) */
+ # The "mark" modifier requests checking of MARK data */
- $mod =~ s/mark,?//;
+ $show_mark = $default_show_mark | ($mod =~ s/mark,?//);
# "ucp" asks pcre2test to set PCRE2_UCP; change this to /u for Perl
$mod =~ s/ucp,?/u/;
- # Remove "no_auto_possess" and "no_start_optimize" (disable PCRE2 optimizations)
+ # Remove "no_auto_possess".
$mod =~ s/no_auto_possess,?//;
- $mod =~ s/no_start_optimize,?//;
+
+ # Use no_start_optimize (disable PCRE2 start-up optimization) to disable Perl
+ # optimization by inserting (??{""}) at the start of the pattern. We may
+ # also encounter -no_start_optimize from a #pattern setting.
+
+ $mod =~ s/-no_start_optimize,?//;
+ if ($mod =~ s/no_start_optimize,?//) { $pat =~ s/$del/$del(??{""})/; }
# Add back retained modifiers and check that the pattern is valid.
@@ -279,7 +332,7 @@ for (;;)
elsif (scalar(@subs) == 0)
{
printf $outfile "No match";
- if (defined $REGERROR && $REGERROR != 1)
+ if ($show_mark && defined $REGERROR && $REGERROR != 1)
{ printf $outfile (", mark = %s", &pchars($REGERROR)); }
printf $outfile "\n";
}
@@ -307,7 +360,7 @@ for (;;)
# set and the input pattern was a UTF-8 string. We can, however, force
# it to be so marked.
- if (defined $REGMARK && $REGMARK != 1)
+ if ($show_mark && defined $REGMARK && $REGMARK != 1)
{
$xx = $REGMARK;
$xx = Encode::decode_utf8($xx) if $utf8;