diff options
Diffstat (limited to 'testdata')
54 files changed, 7776 insertions, 1944 deletions
diff --git a/testdata/grepinputM b/testdata/grepinputM new file mode 100644 index 0000000..9119e3d --- /dev/null +++ b/testdata/grepinputM @@ -0,0 +1,17 @@ +Data file for multiline tests of multiple matches. + +start end in between start +end and following +Other stuff + +start end in between start +end and following start +end other stuff + +start end in between start + +end + +** These two lines must be last. +start end in between start +end diff --git a/testdata/grepinputv b/testdata/grepinputv index d33d326..366d4fb 100644 --- a/testdata/grepinputv +++ b/testdata/grepinputv @@ -2,3 +2,8 @@ The quick brown fox jumps over the lazy dog. This time it jumps and jumps and jumps. +This line contains \E and (regex) *meta* [characters]. +The word is cat in this line +The caterpillar sat on the mat +The snowcat is not an animal +A buried feline in the syndicate diff --git a/testdata/grepoutput b/testdata/grepoutput index 9d41817..e49c2b2 100644 --- a/testdata/grepoutput +++ b/testdata/grepoutput @@ -392,7 +392,7 @@ pcre2grep: pcre2_match() gave error -47 while matching this text: aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa -pcre2grep: Error -46, -47 or -53 means that a resource limit was exceeded. +pcre2grep: Error -46, -47, -53 or -63 means that a resource limit was exceeded. pcre2grep: Check your regex for nested unlimited loops. ---------------------------- Test 38 ------------------------------ This line contains a binary zero here > @@ -454,10 +454,15 @@ RC=1 ---------------------------- Test 51 ------------------------------ over the lazy dog. This time it jumps and jumps and jumps. +This line contains \E and (regex) *meta* [characters]. +The word is cat in this line +The caterpillar sat on the mat +The snowcat is not an animal +A buried feline in the syndicate RC=0 ---------------------------- Test 52 ------------------------------ -fox [1;31mjumps[00m -This time it [1;31mjumps[00m and [1;31mjumps[00m and [1;31mjumps[00m. +fox [1;31mjumps[0m +This time it [1;31mjumps[0m and [1;31mjumps[0m and [1;31mjumps[0m. RC=0 ---------------------------- Test 53 ------------------------------ 36972,6 @@ -474,14 +479,15 @@ RC=0 597:32,4 RC=0 ---------------------------- Test 55 ----------------------------- -Here is the [1;31mpattern[00m again. -That time it was on a [1;31mline by itself[00m. -This line contains [1;31mpattern[00m not on a [1;31mline by itself[00m. +Here is the [1;31mpattern[0m again. +That time it was on a [1;31mline by itself[0m. +This line contains [1;31mpattern[0m not on a [1;31mline by itself[0m. RC=0 ---------------------------- Test 56 ----------------------------- ./testdata/grepinput:456 ./testdata/grepinput3:0 ./testdata/grepinput8:0 +./testdata/grepinputM:0 ./testdata/grepinputv:1 ./testdata/grepinputx:0 RC=0 @@ -516,7 +522,7 @@ This is a file of miscellaneous text that is used as test data for checking that the pcregrep command is working correctly. The file must be more than 24K long so that it needs more than a single read -pcre2grep: Error -46, -47 or -53 means that a resource limit was exceeded. +pcre2grep: Error -46, -47, -53 or -63 means that a resource limit was exceeded. pcre2grep: Check your regex for nested unlimited loops. RC=1 ---------------------------- Test 63 ----------------------------- @@ -526,7 +532,7 @@ This is a file of miscellaneous text that is used as test data for checking that the pcregrep command is working correctly. The file must be more than 24K long so that it needs more than a single read -pcre2grep: Error -46, -47 or -53 means that a resource limit was exceeded. +pcre2grep: Error -46, -47, -53 or -63 means that a resource limit was exceeded. pcre2grep: Check your regex for nested unlimited loops. RC=1 ---------------------------- Test 64 ------------------------------ @@ -588,57 +594,84 @@ RC=0 ---------------------------- Test 70 ----------------------------- [1;31mtriple: t1_txt s1_tag s_txt p_tag p_txt o_tag o_txt -[00m[1;31mtriple: t3_txt s2_tag s_txt p_tag p_txt o_tag o_txt +[0m[1;31mtriple: t3_txt s2_tag s_txt p_tag p_txt o_tag o_txt -[00m[1;31mtriple: t4_txt s1_tag s_txt p_tag p_txt o_tag o_txt +[0m[1;31mtriple: t4_txt s1_tag s_txt p_tag p_txt o_tag o_txt -[00m[1;31mtriple: t6_txt s2_tag s_txt p_tag p_txt o_tag o_txt +[0m[1;31mtriple: t6_txt s2_tag s_txt p_tag p_txt o_tag o_txt -[00mRC=0 +[0mRC=0 +1:[1;31mtriple: t1_txt s1_tag s_txt p_tag p_txt o_tag o_txt + +[0m6:[1;31mtriple: t3_txt s2_tag s_txt p_tag p_txt o_tag o_txt + +[0m8:[1;31mtriple: t4_txt s1_tag s_txt p_tag p_txt o_tag o_txt + +[0m13:[1;31mtriple: t6_txt s2_tag s_txt p_tag p_txt o_tag o_txt + +[0mRC=0 +triple: t1_txt s1_tag s_txt p_tag p_txt o_tag o_txt + +triple: t3_txt s2_tag s_txt p_tag p_txt o_tag o_txt + +triple: t4_txt s1_tag s_txt p_tag p_txt o_tag o_txt + +triple: t6_txt s2_tag s_txt p_tag p_txt o_tag o_txt + +RC=0 +1:triple: t1_txt s1_tag s_txt p_tag p_txt o_tag o_txt + +6:triple: t3_txt s2_tag s_txt p_tag p_txt o_tag o_txt + +8:triple: t4_txt s1_tag s_txt p_tag p_txt o_tag o_txt + +13:triple: t6_txt s2_tag s_txt p_tag p_txt o_tag o_txt + +RC=0 ---------------------------- Test 71 ----------------------------- 01 RC=0 ---------------------------- Test 72 ----------------------------- -[1;31m01[00m0203040506 +[1;31m01[0m0203040506 RC=0 ---------------------------- Test 73 ----------------------------- -[1;31m01[00m +[1;31m01[0m RC=0 ---------------------------- Test 74 ----------------------------- 01 02 RC=0 ---------------------------- Test 75 ----------------------------- -[1;31m01[00m[1;31m02[00m03040506 +[1;31m01[0m[1;31m02[0m03040506 RC=0 ---------------------------- Test 76 ----------------------------- -[1;31m01[00m -[1;31m02[00m +[1;31m01[0m +[1;31m02[0m RC=0 ---------------------------- Test 77 ----------------------------- 01 03 RC=0 ---------------------------- Test 78 ----------------------------- -[1;31m01[00m02[1;31m03[00m040506 +[1;31m01[0m02[1;31m03[0m040506 RC=0 ---------------------------- Test 79 ----------------------------- -[1;31m01[00m -[1;31m03[00m +[1;31m01[0m +[1;31m03[0m RC=0 ---------------------------- Test 80 ----------------------------- 01 RC=0 ---------------------------- Test 81 ----------------------------- -[1;31m01[00m0203040506 +[1;31m01[0m0203040506 RC=0 ---------------------------- Test 82 ----------------------------- -[1;31m01[00m +[1;31m01[0m RC=0 ---------------------------- Test 83 ----------------------------- pcre2grep: line 4 of file ./testdata/grepinput3 is too long for the internal buffer -pcre2grep: the buffer size is 100 -pcre2grep: use the --buffer-size option to change it +pcre2grep: the maximum buffer size is 100 +pcre2grep: use the --max-buffer-size option to change it RC=2 ---------------------------- Test 84 ----------------------------- testdata/grepinputv:fox jumps @@ -702,9 +735,9 @@ RC=0 ./testdata/grepinput:zerothe. RC=0 ---------------------------- Test 101 ------------------------------ -./testdata/grepinput:[1;31m.[00m|[1;31mzero[00m|[1;31mthe[00m|[1;31m.[00m -./testdata/grepinput:[1;31mzero[00m|[1;31ma[00m -./testdata/grepinput:[1;31m.[00m|[1;31mzero[00m|[1;31mthe[00m|[1;31m.[00m +./testdata/grepinput:[1;31m.[0m|[1;31mzero[0m|[1;31mthe[0m|[1;31m.[0m +./testdata/grepinput:[1;31mzero[0m|[1;31ma[0m +./testdata/grepinput:[1;31m.[0m|[1;31mzero[0m|[1;31mthe[0m|[1;31m.[0m RC=0 ---------------------------- Test 102 ----------------------------- 2: @@ -725,21 +758,21 @@ RC=0 14: RC=0 ---------------------------- Test 105 ----------------------------- -[1;31m[00mtriple: t1_txt s1_tag s_txt p_tag p_txt o_tag o_txt -[1;31m[00m -[1;31m[00mtriple: t2_txt s1_tag s_txt p_tag p_txt o_tag -[1;31m[00mLorem [1;31mipsum[00m dolor sit amet, consectetur adipisicing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat. Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non proident, sunt in culpa qui officia deserunt mollit anim id est laborum. -[1;31m[00m -[1;31m[00mtriple: t3_txt s2_tag s_txt p_tag p_txt o_tag o_txt -[1;31m[00m -[1;31m[00mtriple: t4_txt s1_tag s_txt p_tag p_txt o_tag o_txt -[1;31m[00m -[1;31m[00mtriple: t5_txt s1_tag s_txt p_tag p_txt o_tag -[1;31m[00mo_txt -[1;31m[00m -[1;31m[00mtriple: t6_txt s2_tag s_txt p_tag p_txt o_tag o_txt -[1;31m[00m -[1;31m[00mtriple: t7_txt s1_tag s_txt p_tag p_txt o_tag o_txt +triple: t1_txt s1_tag s_txt p_tag p_txt o_tag o_txt + +triple: t2_txt s1_tag s_txt p_tag p_txt o_tag +Lorem [1;31mipsum[0m dolor sit amet, consectetur adipisicing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat. Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non proident, sunt in culpa qui officia deserunt mollit anim id est laborum. + +triple: t3_txt s2_tag s_txt p_tag p_txt o_tag o_txt + +triple: t4_txt s1_tag s_txt p_tag p_txt o_tag o_txt + +triple: t5_txt s1_tag s_txt p_tag p_txt o_tag +o_txt + +triple: t6_txt s2_tag s_txt p_tag p_txt o_tag o_txt + +triple: t7_txt s1_tag s_txt p_tag p_txt o_tag o_txt RC=0 ---------------------------- Test 106 ----------------------------- a @@ -787,3 +820,128 @@ RC=0 37204,12 37216,12 RC=0 +---------------------------- Test 113 ----------------------------- +480 +RC=0 +---------------------------- Test 114 ----------------------------- +testdata/grepinput:469 +testdata/grepinput3:0 +testdata/grepinput8:0 +testdata/grepinputM:2 +testdata/grepinputv:3 +testdata/grepinputx:6 +TOTAL:480 +RC=0 +---------------------------- Test 115 ----------------------------- +testdata/grepinput:469 +testdata/grepinputM:2 +testdata/grepinputv:3 +testdata/grepinputx:6 +TOTAL:480 +RC=0 +---------------------------- Test 116 ----------------------------- +478 +RC=0 +---------------------------- Test 117 ----------------------------- +469 +0 +0 +2 +3 +6 +480 +RC=0 +---------------------------- Test 118 ----------------------------- +testdata/grepinput3 +testdata/grepinput8 +RC=0 +---------------------------- Test 119 ----------------------------- +123 +456 +789 +--- +abc +def +xyz +--- +RC=0 +---------------------------- Test 120 ------------------------------ +./testdata/grepinput:the binary zero.:zerothe. +./testdata/grepinput:a binary zero:zeroa +./testdata/grepinput:the binary zero.:zerothe. +RC=0 +---------------------------- Test 121 ----------------------------- +This line contains \E and (regex) *meta* [characters]. +RC=0 +---------------------------- Test 122 ----------------------------- +over the lazy dog. +The word is cat in this line +RC=0 +---------------------------- Test 123 ----------------------------- +over the lazy dog. +The word is cat in this line +RC=0 +---------------------------- Test 124 ----------------------------- +3:[1;31mstart end[0m in between [1;31mstart +end[0m and following +7:[1;31mstart end[0m in between [1;31mstart +end[0m and following [1;31mstart +end[0m other stuff +11:[1;31mstart end[0m in between [1;31mstart + +end[0m +16:[1;31mstart end[0m in between [1;31mstart +end[0m +RC=0 +3:[1;31mstart end[0m in between [1;31mstart +end[0m and following +5-Other stuff +6- +7:[1;31mstart end[0m in between [1;31mstart +end[0m and following [1;31mstart +end[0m other stuff +10- +11:[1;31mstart end[0m in between [1;31mstart + +end[0m +14- +15-** These two lines must be last. +16:[1;31mstart end[0m in between [1;31mstart +end[0m +RC=0 +3:start end in between start +end and following +7:start end in between start +end and following start +end other stuff +11:start end in between start + +end +16:start end in between start +end +RC=0 +3:start end in between start +end and following +5-Other stuff +6- +7:start end in between start +end and following start +end other stuff +10- +11:start end in between start + +end +14- +15-** These two lines must be last. +16:start end in between start +end +RC=0 +---------------------------- Test 125 ----------------------------- +[1;31ma[0m[1;31mb[0m[1;31mc[0m[1;31md[0m +RC=0 +[1;31ma[0m[1;31mb[0m[1;31mc[0m[1;31md[0m +RC=0 +[1;31ma[0mb[1;31mc[0md +RC=0 +[1;31ma[0mb[1;31mc[0md +RC=0 diff --git a/testdata/grepoutputC b/testdata/grepoutputC index 0116645..60f249f 100644 --- a/testdata/grepoutputC +++ b/testdata/grepoutputC @@ -1,8 +1,42 @@ Arg1: [T] [he ] [ ] Arg2: |T| () () (0) Arg1: [T] [his] [s] Arg2: |T| () () (0) +Arg1: [T] [his] [s] Arg2: |T| () () (0) +Arg1: [T] [he ] [ ] Arg2: |T| () () (0) +Arg1: [T] [he ] [ ] Arg2: |T| () () (0) +Arg1: [T] [he ] [ ] Arg2: |T| () () (0) The quick brown This time it jumps and jumps and jumps. +This line contains \E and (regex) *meta* [characters]. +The word is cat in this line +The caterpillar sat on the mat +The snowcat is not an animal Arg1: [qu] [qu] Arg1: [ t] [ t] +Arg1: [ l] [ l] +Arg1: [wo] [wo] +Arg1: [ca] [ca] +Arg1: [sn] [sn] +The quick brown +This time it jumps and jumps and jumps. +This line contains \E and (regex) *meta* [characters]. +The word is cat in this line +The caterpillar sat on the mat +The snowcat is not an animal +0:T The quick brown +0:T This time it jumps and jumps and jumps. +0:T +This line contains \E and (regex) *meta* [characters]. +0:T +The word is cat in this line +0:T +The caterpillar sat on the mat +0:T +The snowcat is not an animal +T +T +T +T +T +T diff --git a/testdata/grepoutputN b/testdata/grepoutputN index 1f9f880..ba97e90 100644 --- a/testdata/grepoutputN +++ b/testdata/grepoutputN @@ -13,4 +13,5 @@ jkl---------------------------- Test N5 ------------------------------ 4:jkl---------------------------- Test N6 ------------------------------
1:abc
2:def
3:ghi -4:jkl
\ No newline at end of file +4:jkl---------------------------- Test N7 ------------------------------
+1:abcZERO2:def diff --git a/testdata/testinput1 b/testdata/testinput1 index 6d7bc80..9a9c5fd 100644 --- a/testdata/testinput1 +++ b/testdata/testinput1 @@ -95,17 +95,6 @@ aaac abbbbbbbbbbbac -/^(b+|a){1,2}?bc/ - bbc - -/^(b*|ba){1,2}?bc/ - babc - bbabc - bababc -\= Expect no match - bababbc - babababc - /^(ba|b*){1,2}?bc/ babc bbabc @@ -1434,11 +1423,6 @@ \= Expect no match aaa -/[\d-z]+/ - 12-34z -\= Expect no match - aaa - /\x5c/ \\ @@ -3631,13 +3615,6 @@ /a*/g abbab -/^[\d-a]/ - abcde - -things - 0digit -\= Expect no match - bcdef - /[[:space:]]+/ > \x09\x0a\x0c\x0d\x0b< @@ -5792,4 +5769,424 @@ name)/mark aaaccccaaa bccccb +# /x does not apply to MARK labels + +/x (*MARK:ab cd # comment +ef) x/x,mark + axxz + +/(?<=a(B){0}c)X/ + acX + +/(?<DEFINE>b)(?(DEFINE)(a+))(?&DEFINE)/ + bbbb +\= Expect no match + baaab + +/(?=.*[A-Z])(?=.*[a-z])(?=.*[0-9])(?=.*[,;:])(?=.{8,16})(?!.*[\s])/ + \ Fred:099 + +/(?=.*X)X$/ + \ X + +/(?s)(?=.*?)b/ + aabc + +/(Z)(a)\2{1,2}?(?-i)\1X/i + ZaAAZX + +/(?'c')XX(?'YYYYYYYYYYYYYYYYYYYYYYYCl')/ + +/[s[:digit:]\E-H]+/ + s09-H + +/[s[:digit:]\Q\E-H]+/ + s09-H + +/a+(?:|b)a/ + aaaa + +/X?(R||){3335}/ + +/(?1)(A(*COMMIT)|B)D/ + ABD + XABD + BAD + ABXABD +\= Expect no match + ABX + +/(?(DEFINE)(?<m> 1? (?=(?<cond>2)?) 1 2 (?('cond')|3))) + \A + () + (?&m) + \Z/x + 123 + +/^(?: +(?: A| (1? (?=(?<cond>2)?) (1) 2 (?('cond')|3)) ) +(Z) +)+$/x + AZ123Z +\= Expect no match + AZ12Z + +/^ (?(DEFINE) ( (?!(a)\2b)..) ) ()(?1) /x + acb +\= Expect no match + aab + +'(?>ab|abab){1,5}?M' + abababababababababababM + +'(?>ab|abab){2}?M' + abababM + +'((?(?=(a))a)+k)' + bbak + +'((?(?=(a))a|)+k)' + bbak + +'(?(?!(b))a|b)+k' + ababbalbbadabak + +/(?!(b))c|b/ + Ab + Ac + +/(?=(b))b|c/ + Ab + Ac + +/^(.|(.)(?1)\2)$/ + a + aba + abcba + ababa + abcdcba + +/^((.)(?1)\2|.?)$/ + a + aba + abba + abcba + ababa + abccba + abcdcba + abcddcba + +/^(.)(\1|a(?2))/ + bab + +/^(.|(.)(?1)?\2)$/ + abcba + +/^(?(?=(a))abc|def)/ + abc + +/^(?(?!(a))def|abc)/ + abc + +/^(?(?=(a)(*ACCEPT))abc|def)/ + abc + +/^(?(?!(a)(*ACCEPT))def|abc)/ + abc + +/^(?1)\d{3}(a)/ + a123a + +# This pattern uses a lot of named subpatterns in order to match email +# addresses in various formats. It's a heavy test for named subpatterns. In the +# <atext> group, slash is coded as \x{2f} so that this pattern can also be +# processed by perltest.sh, which does not cater for an escaped delimiter +# within the pattern. $ within the pattern must also be escaped. All $ and @ +# characters in subject strings are escaped so that Perl doesn't interpret them +# as variable insertions and " characters must also be escaped for Perl. + +# This set of subpatterns is more or less a direct transliteration of the BNF +# definitions in RFC2822, without any of the obsolete features. The addition of +# a possessive + to the definition of <phrase> reduced the match limit in PCRE2 +# from over 5 million to just under 400, and eliminated a very noticeable delay +# when this file was passed to perltest.sh. + +/(?ix)(?(DEFINE) +(?<addr_spec> (?&local_part) \@ (?&domain) ) +(?<angle_addr> (?&CFWS)?+ < (?&addr_spec) > (?&CFWS)?+ ) +(?<atext> [a-z\d!#\$%&'*+-\x{2f}=?^_`{|}~] ) +(?<atom> (?&CFWS)?+ (?&atext)+ (?&CFWS)?+ ) +(?<ccontent> (?&ctext) | (?"ed_pair) | (?&comment) ) +(?<ctext> [^\x{9}\x{10}\x{13}\x{7f}-\x{ff}\ ()\\] ) +(?<comment> \( (?: (?&FWS)?+ (?&ccontent) )*+ (?&FWS)?+ \) ) +(?<CFWS> (?: (?&FWS)?+ (?&comment) )* (?# NOT possessive) + (?: (?&FWS)?+ (?&comment) | (?&FWS) ) ) +(?<dcontent> (?&dtext) | (?"ed_pair) ) +(?<display_name> (?&phrase) ) +(?<domain> (?&dot_atom) | (?&domain_literal) ) +(?<domain_literal> (?&CFWS)?+ \[ (?: (?&FWS)?+ (?&dcontent) )* (?&FWS)?+ \] + (?&CFWS)?+ ) +(?<dot_atom> (?&CFWS)?+ (?&dot_atom_text) (?&CFWS)?+ ) +(?<dot_atom_text> (?&atext)++ (?: \. (?&atext)++)*+ ) +(?<dtext> [^\x{9}\x{10}\x{13}\x{7f}-\x{ff}\ \[\]\\] ) +(?<FWS> (?: [\t\ ]*+ \n)?+ [\t\ ]++ ) +(?<local_part> (?&dot_atom) | (?"ed_string) ) +(?<mailbox> (?&name_addr) | (?&addr_spec) ) +(?<name_addr> (?&display_name)? (?&angle_addr) ) +(?<phrase> (?&word)++ ) +(?<qcontent> (?&qtext) | (?"ed_pair) ) +(?<quoted_pair> " (?&text) ) +(?<quoted_string> (?&CFWS)?+ " (?: (?&FWS)?+ (?&qcontent))* (?&FWS)?+ " + (?&CFWS)?+ ) +(?<qtext> [^\x{9}\x{10}\x{13}\x{7f}-\x{ff}\ "\\] ) +(?<text> [^\r\n] ) +(?<word> (?&atom) | (?"ed_string) ) +) # End DEFINE +^(?&mailbox)$/ + Alan Other <user\@dom.ain> + <user\@dom.ain> + user\@dom.ain + user\@[] + user\@[domain literal] + user\@[domain literal with \"[square brackets\"] inside] + \"A. Other\" <user.1234\@dom.ain> (a comment) + A. Other <user.1234\@dom.ain> (a comment) + \"/s=user/ou=host/o=place/prmd=uu.yy/admd= /c=gb/\"\@x400-re.lay +\= Expect no match + A missing angle <user\@some.where + The quick brown fox + +# -------------------------------------------------------------------------- + +# This pattern uses named groups to match default PCRE2 patterns. It's another +# heavy test for named subpatterns. Once again, code slash as \x{2f} and escape +# $ even in classes so that this works with pcre2test. + +/(?sx)(?(DEFINE) + +(?<assertion> (?&simple_assertion) | (?&lookaround) ) + +(?<atomic_group> \( \? > (?®ex) \) ) + +(?<back_reference> \\ \d+ | + \\g (?: [+-]?\d+ | \{ (?: [+-]?\d+ | (?&groupname) ) \} ) | + \\k <(?&groupname)> | + \\k '(?&groupname)' | + \\k \{ (?&groupname) \} | + \( \? P= (?&groupname) \) ) + +(?<branch> (?:(?&assertion) | + (?&callout) | + (?&comment) | + (?&option_setting) | + (?&qualified_item) | + (?"ed_string) | + (?"ed_string_empty) | + (?&special_escape) | + (?&verb) + )* ) + +(?<callout> \(\?C (?: \d+ | + (?: (?<D>["'`^%\#\$]) + (?: \k'D'\k'D' | (?!\k'D') . )* \k'D' | + \{ (?: \}\} | [^}]*+ )* \} ) + )? \) ) + +(?<capturing_group> \( (?: \? P? < (?&groupname) > | \? ' (?&groupname) ' )? + (?®ex) \) ) + +(?<character_class> \[ \^?+ (?: \] (?&class_item)* | (?&class_item)+ ) \] ) + +(?<character_type> (?! \\N\{\w+\} ) \\ [dDsSwWhHvVRN] ) + +(?<class_item> (?: \[ : (?: + alnum|alpha|ascii|blank|cntrl|digit|graph|lower|print| + punct|space|upper|word|xdigit + ) : \] | + (?"ed_string) | + (?"ed_string_empty) | + (?&escaped_character) | + (?&character_type) | + [^]] ) ) + +(?<comment> \(\?\# [^)]* \) | (?"ed_string_empty) | \\E ) + +(?<condition> (?: \( [+-]? \d+ \) | + \( < (?&groupname) > \) | + \( ' (?&groupname) ' \) | + \( R \d* \) | + \( R & (?&groupname) \) | + \( (?&groupname) \) | + \( DEFINE \) | + \( VERSION >?=\d+(?:\.\d\d?)? \) | + (?&callout)?+ (?&comment)* (?&lookaround) ) ) + +(?<conditional_group> \(\? (?&condition) (?&branch) (?: \| (?&branch) )? \) ) + +(?<delimited_regex> (?<delimiter> [-\x{2f}!"'`=_:;,%&@~]) (?®ex) + \k'delimiter' .* ) + +(?<escaped_character> \\ (?: 0[0-7]{1,2} | [0-7]{1,3} | o\{ [0-7]+ \} | + x \{ (*COMMIT) [[:xdigit:]]* \} | x [[:xdigit:]]{0,2} | + [aefnrt] | c[[:print:]] | + [^[:alnum:]] ) ) + +(?<group> (?&capturing_group) | (?&non_capturing_group) | + (?&resetting_group) | (?&atomic_group) | + (?&conditional_group) ) + +(?<groupname> [a-zA-Z_]\w* ) + +(?<literal_character> (?! (?&range_qualifier) ) [^[()|*+?.\$\\] ) + +(?<lookaround> \(\? (?: = | ! | <= | <! ) (?®ex) \) ) + +(?<non_capturing_group> \(\? [iJmnsUx-]* : (?®ex) \) ) + +(?<option_setting> \(\? [iJmnsUx-]* \) ) + +(?<qualified_item> (?:\. | + (?&lookaround) | + (?&back_reference) | + (?&character_class) | + (?&character_type) | + (?&escaped_character) | + (?&group) | + (?&subroutine_call) | + (?&literal_character) | + (?"ed_string) + ) (?&comment)? (?&qualifier)? ) + +(?<qualifier> (?: [?*+] | (?&range_qualifier) ) [+?]? ) + +(?<quoted_string> (?: \\Q (?: (?!\\E | \k'delimiter') . )++ (?: \\E | ) ) ) + +(?<quoted_string_empty> \\Q\\E ) + +(?<range_qualifier> \{ (?: \d+ (?: , \d* )? | , \d+ ) \} ) + +(?<regex> (?&start_item)* (?&branch) (?: \| (?&branch) )* ) + +(?<resetting_group> \( \? \| (?®ex) \) ) + +(?<simple_assertion> \^ | \$ | \\A | \\b | \\B | \\G | \\z | \\Z ) + +(?<special_escape> \\K ) + +(?<start_item> \( \* (?: + ANY | + ANYCRLF | + BSR_ANYCRLF | + BSR_UNICODE | + CR | + CRLF | + LF | + LIMIT_MATCH=\d+ | + LIMIT_DEPTH=\d+ | + LIMIT_HEAP=\d+ | + NOTEMPTY | + NOTEMPTY_ATSTART | + NO_AUTO_POSSESS | + NO_DOTSTAR_ANCHOR | + NO_JIT | + NO_START_OPT | + NUL | + UTF | + UCP ) \) ) + +(?<subroutine_call> (?: \(\?R\) | \(\?[+-]?\d+\) | + \(\? (?: & | P> ) (?&groupname) \) | + \\g < (?&groupname) > | + \\g ' (?&groupname) ' | + \\g < [+-]? \d+ > | + \\g ' [+-]? \d+ ) ) + +(?<verb> \(\* (?: ACCEPT | FAIL | F | COMMIT | + (?:MARK)?:(?&verbname) | + (?:PRUNE|SKIP|THEN) (?: : (?&verbname)? )? ) \) ) + +(?<verbname> [^)]+ ) + +) # End DEFINE +# Kick it all off... +^(?&delimited_regex)$/subject_literal,jitstack=256 + /^(a)(b)(c)(d)(e)(f)(g)(h)(i)(j)(k)\11*(\3\4)\1(?#)2$/ + /(cat(a(ract|tonic)|erpillar)) \1()2(3)/ + /^From +([^ ]+) +[a-zA-Z][a-zA-Z][a-zA-Z] +[a-zA-Z][a-zA-Z][a-zA-Z] +[0-9]?[0-9] +[0-9][0-9]:[0-9][0-9]/ + /^From\s+\S+\s+([a-zA-Z]{3}\s+){2}\d{1,2}\s+\d\d:\d\d/ + /<tr([\w\W\s\d][^<>]{0,})><TD([\w\W\s\d][^<>]{0,})>([\d]{0,}\.)(.*)((<BR>([\w\W\s\d][^<>]{0,})|[\s]{0,}))<\/a><\/TD><TD([\w\W\s\d][^<>]{0,})>([\w\W\s\d][^<>]{0,})<\/TD><TD([\w\W\s\d][^<>]{0,})>([\w\W\s\d][^<>]{0,})<\/TD><\/TR>/is + /^(?(DEFINE) (?<A> a) (?<B> b) ) (?&A) (?&B) / + /(?(DEFINE)(?<byte>2[0-4]\d|25[0-5]|1\d\d|[1-9]?\d))\b(?&byte)(\.(?&byte)){3}/ + /\b(?&byte)(\.(?&byte)){3}(?(DEFINE)(?<byte>2[0-4]\d|25[0-5]|1\d\d|[1-9]?\d))/ + /^(\w++|\s++)*$/ + /a+b?(*THEN)c+(*FAIL)/ + /(A (A|B(*ACCEPT)|C) D)(E)/x + /^\W*+(?:((.)\W*+(?1)\W*+\2|)|((.)\W*+(?3)\W*+\4|\W*+.\W*+))\W*+$/i + /A(*PRUNE)B(*SKIP)C(*THEN)D(*COMMIT)E(*F)F(*FAIL)G(?!)H(*ACCEPT)I/B + /(?C`a``b`)(?C'a''b')(?C"a""b")(?C^a^^b^)(?C%a%%b%)(?C#a##b#)(?C$a$$b$)(?C{a}}b})/B,callout_info + /(?sx)(?(DEFINE)(?<assertion> (?&simple_assertion) | (?&lookaround) )(?<atomic_group> \( \? > (?®ex) \) )(?<back_reference> \\ \d+ | \\g (?: [+-]?\d+ | \{ (?: [+-]?\d+ | (?&groupname) ) \} ) | \\k <(?&groupname)> | \\k '(?&groupname)' | \\k \{ (?&groupname) \} | \( \? P= (?&groupname) \) )(?<branch> (?:(?&assertion) | (?&callout) | (?&comment) | (?&option_setting) | (?&qualified_item) | (?"ed_string) | (?"ed_string_empty) | (?&special_escape) | (?&verb) )* )(?<callout> \(\?C (?: \d+ | (?: (?<D>["'`^%\#\$]) (?: \k'D'\k'D' | (?!\k'D') . )* \k'D' | \{ (?: \}\} | [^}]*+ )* \} ) )? \) )(?<capturing_group> \( (?: \? P? < (?&groupname) > | \? ' (?&groupname) ' )? (?®ex) \) )(?<character_class> \[ \^?+ (?: \] (?&class_item)* | (?&class_item)+ ) \] )(?<character_type> (?! \\N\{\w+\} ) \\ [dDsSwWhHvVRN] )(?<class_item> (?: \[ : (?: alnum|alpha|ascii|blank|cntrl|digit|graph|lower|print| punct|space|upper|word|xdigit ) : \] | (?"ed_string) | (?"ed_string_empty) | (?&escaped_character) | (?&character_type) | [^]] ) )(?<comment> \(\?\# [^)]* \) | (?"ed_string_empty) | \\E )(?<condition> (?: \( [+-]? \d+ \) | \( < (?&groupname) > \) | \( ' (?&groupname) ' \) | \( R \d* \) | \( R & (?&groupname) \) | \( (?&groupname) \) | \( DEFINE \) | \( VERSION >?=\d+(?:\.\d\d?)? \) | (?&callout)?+ (?&comment)* (?&lookaround) ) )(?<conditional_group> \(\? (?&condition) (?&branch) (?: \| (?&branch) )? \) )(?<delimited_regex> (?<delimiter> [-\x{2f}!"'`=_:;,%&@~]) (?®ex) \k'delimiter' .* )(?<escaped_character> \\ (?: 0[0-7]{1,2} | [0-7]{1,3} | o\{ [0-7]+ \} | x \{ (*COMMIT) [[:xdigit:]]* \} | x [[:xdigit:]]{0,2} | [aefnrt] | c[[:print:]] | [^[:alnum:]] ) )(?<group> (?&capturing_group) | (?&non_capturing_group) | (?&resetting_group) | (?&atomic_group) | (?&conditional_group) )(?<groupname> [a-zA-Z_]\w* )(?<literal_character> (?! (?&range_qualifier) ) [^[()|*+?.\$\\] )(?<lookaround> \(\? (?: = | ! | <= | <! ) (?®ex) \) )(?<non_capturing_group> \(\? [iJmnsUx-]* : (?®ex) \) )(?<option_setting> \(\? [iJmnsUx-]* \) )(?<qualified_item> (?:\. | (?&lookaround) | (?&back_reference) | (?&character_class) | (?&character_type) | (?&escaped_character) | (?&group) | (?&subroutine_call) | (?&literal_character) | (?"ed_string) ) (?&comment)? (?&qualifier)? )(?<qualifier> (?: [?*+] | (?&range_qualifier) ) [+?]? )(?<quoted_string> (?: \\Q (?: (?!\\E | \k'delimiter') . )++ (?: \\E | ) ) ) (?<quoted_string_empty> \\Q\\E ) (?<range_qualifier> \{ (?: \d+ (?: , \d* )? | , \d+ ) \} )(?<regex> (?&start_item)* (?&branch) (?: \| (?&branch) )* )(?<resetting_group> \( \? \| (?®ex) \) )(?<simple_assertion> \^ | \$ | \\A | \\b | \\B | \\G | \\z | \\Z )(?<special_escape> \\K )(?<start_item> \( \* (?: ANY | ANYCRLF | BSR_ANYCRLF | BSR_UNICODE | CR | CRLF | LF | LIMIT_MATCH=\d+ | LIMIT_DEPTH=\d+ | LIMIT_HEAP=\d+ | NOTEMPTY | NOTEMPTY_ATSTART | NO_AUTO_POSSESS | NO_DOTSTAR_ANCHOR | NO_JIT | NO_START_OPT | NUL | UTF | UCP ) \) )(?<subroutine_call> (?: \(\?R\) | \(\?[+-]?\d+\) | \(\? (?: & | P> ) (?&groupname) \) | \\g < (?&groupname) > | \\g ' (?&groupname) ' | \\g < [+-]? \d+ > | \\g ' [+-]? \d+ ) )(?<verb> \(\* (?: ACCEPT | FAIL | F | COMMIT | (?:MARK)?:(?&verbname) | (?:PRUNE|SKIP|THEN) (?: : (?&verbname)? )? ) \) )(?<verbname> [^)]+ ))^(?&delimited_regex)$/ +\= Expect no match + /((?(?C'')\QX\E(?!((?(?C'')(?!X=X));=)r*X=X));=)/ + /(?:(?(2y)a|b)(X))+/ + /a(*MARK)b/ + /a(*CR)b/ + /(?P<abn>(?P=abn)(?<badstufxxx)/ + +# -------------------------------------------------------------------------- + +/<(?x:[a b])>/xx + < > + +/<(?:[a b])>/xx + < > + +/<(?xxx:[a b])>/ + < > + +/<(?-x:[a b])>/xx + < > + +/[[:digit:]-]+/ + 12-24 + +/((?<=((*ACCEPT)) )\1?\b) / +\= Expect no match + ((?<=((*ACCEPT)) )\\1?\\b)\x20 + +/((?<=((*ACCEPT))X)\1?Y)\1/ + XYYZ + +/((?<=((*ACCEPT))X)\1?Y(*ACCEPT))\1/ + XYYZ + +/(?(DEFINE)(?<optional_a>a?)X)^(?&optional_a)a$/ + aa + a + +/^(a?)b(?1)a/ + abaa + aba + baa + ba + +/^(a?)+b(?1)a/ + abaa + aba + baa + ba + +/^(a?)++b(?1)a/ + abaa + aba + baa + ba + +/^(a?)+b/ + b + ab + aaab + +/(?=a+)a(a+)++b/ + aab + # End of testinput1 diff --git a/testdata/testinput10 b/testdata/testinput10 index 550e1c9..93d2560 100644 --- a/testdata/testinput10 +++ b/testdata/testinput10 @@ -445,4 +445,35 @@ /(?<=(a)(?-1))x/I,utf a\x80zx\=offset=3 +/[\W\p{Any}]/B + abc + 123 + +/[\W\pL]/B + abc +\= Expect no match + 123 + +/(*:*++++++++++++''''''''''''''''''''+''+++'+++x+++++++++++++++++++++++++++++++++++(++++++++++++++++++++:++++++%++:''''''''''''''''''''''''+++++++++++++++++++++++++++++++++++++++++++++++++++++-++++++++k+++++++''''+++'+++++++++++++++++++++++''''++++++++++++':ƿ)/utf + +/[\s[:^ascii:]]/B,ucp + +# A special extra option allows excaped surrogate code points in 8-bit mode, +# but subjects containing them must not be UTF-checked. + +/\x{d800}/I,utf,allow_surrogate_escapes + \x{d800}\=no_utf_check + +/\udfff\o{157401}/utf,alt_bsux,allow_surrogate_escapes + \x{dfff}\x{df01}\=no_utf_check + +# This has different starting code units in 8-bit mode. + +/^[^ab]/IB,utf + c + \x{ff} + \x{100} +\= Expect no match + aaa + # End of testinput10 diff --git a/testdata/testinput11 b/testdata/testinput11 index 2bdef9b..2d267d6 100644 --- a/testdata/testinput11 +++ b/testdata/testinput11 @@ -353,4 +353,19 @@ /(*THEN:\[A]{65501})/expand +# We can use pcre2test's utf8_input modifier to create wide pattern characters, +# even though this test is run when UTF is not supported. + +/abz/utf8_input + abz + ab\x{7fffffff}z + +/abz/utf8_input + abz + ab\x{ffffffff}z + +/abAz/utf8_input + abAz + ab\x{80000041}z + # End of testinput11 diff --git a/testdata/testinput12 b/testdata/testinput12 index 14a7715..b0ab909 100644 --- a/testdata/testinput12 +++ b/testdata/testinput12 @@ -343,4 +343,43 @@ /./utf \x{110000} +/(*UTF)abz/B + +/abz/utf + +/[\W\p{Any}]/B + abc + 123 + +/[\W\pL]/B + abc + \x{100} + \x{308} +\= Expect no match + 123 + +/[\s[:^ascii:]]/B,ucp + +/\pP/ucp + \x{7fffffff} + +# A special extra option allows excaped surrogate code points in 32-bit mode, +# but subjects containing them must not be UTF-checked. These patterns give +# errors in 16-bit mode. + +/\x{d800}/I,utf,allow_surrogate_escapes + \x{d800}\=no_utf_check + +/\udfff\o{157401}/utf,alt_bsux,allow_surrogate_escapes + \x{dfff}\x{df01}\=no_utf_check + +# This has different starting code units in 8-bit mode. + +/^[^ab]/IB,utf + c + \x{ff} + \x{100} +\= Expect no match + aaa + # End of testinput12 diff --git a/testdata/testinput15 b/testdata/testinput15 index 4ea9ffd..cd12ad1 100644 --- a/testdata/testinput15 +++ b/testdata/testinput15 @@ -43,14 +43,14 @@ /(*LIMIT_MATCH=4294967290)abc/ -/(*LIMIT_RECURSION=4294967280)abc/I +/(*LIMIT_DEPTH=4294967280)abc/I /(a+)*zz/ aaaaaaaaaaaaaz aaaaaaaaaaaaaz\=match_limit=3000 /(a+)*zz/ - aaaaaaaaaaaaaz\=recursion_limit=10 + aaaaaaaaaaaaaz\=depth_limit=10 /(*LIMIT_MATCH=3000)(a+)*zz/I aaaaaaaaaaaaaz @@ -63,23 +63,23 @@ aaaaaaaaaaaaaz aaaaaaaaaaaaaz\=match_limit=3000 -/(*LIMIT_RECURSION=10)(a+)*zz/I +/(*LIMIT_DEPTH=10)(a+)*zz/I aaaaaaaaaaaaaz - aaaaaaaaaaaaaz\=recursion_limit=1000 + aaaaaaaaaaaaaz\=depth_limit=1000 -/(*LIMIT_RECURSION=10)(*LIMIT_RECURSION=1000)(a+)*zz/I +/(*LIMIT_DEPTH=10)(*LIMIT_DEPTH=1000)(a+)*zz/I aaaaaaaaaaaaaz -/(*LIMIT_RECURSION=1000)(a+)*zz/I +/(*LIMIT_DEPTH=1000)(a+)*zz/I aaaaaaaaaaaaaz - aaaaaaaaaaaaaz\=recursion_limit=10 + aaaaaaaaaaaaaz\=depth_limit=10 # These three have infinitely nested recursions. /((?2))((?1))/ abc -/((?(R2)a+|(?1)b))/ +/((?(R2)a+|(?1)b))()/ aaaabcde /(?(R)a*(?1)|((?R))b)/ @@ -160,4 +160,12 @@ /(*NO_AUTO_POSSESS)\w+(?C1)/BI abc\=callout_fail=1 +# This test breaks the JIT stack limit + +/(|]+){2,2452}/ + (|]+){2,2452} + +/(*LIMIT_HEAP=21)\[(a)]{60}/expand + \[a]{60} + # End of testinput15 diff --git a/testdata/testinput17 b/testdata/testinput17 index 76925fe..9a73ef1 100644 --- a/testdata/testinput17 +++ b/testdata/testinput17 @@ -177,7 +177,7 @@ /((?2))((?1))/ abc -/((?(R2)a+|(?1)b))/ +/((?(R2)a+|(?1)b))()/ aaaabcde /(?(R)a*(?1)|((?R))b)/ diff --git a/testdata/testinput18 b/testdata/testinput18 index e31b96e..755a0c9 100644 --- a/testdata/testinput18 +++ b/testdata/testinput18 @@ -5,7 +5,7 @@ #forbid_utf #pattern posix -# Test invalid options +# Test some invalid options /abc/auto_callout @@ -14,6 +14,10 @@ /abc/ abc\=partial_hard + +/a(())bc/parens_nest_limit=1 + +/abc/allow_surrogate_escapes,max_pattern_length=2 # Real tests @@ -103,4 +107,31 @@ /\[A]{1000000}**/expand,regerror_buffsize=32 +//posix_nosub + \=offset=70000 + +/(?=(a\K))/ + a + +/^d(e)$/posix + acdef\=posix_startend=2:4 + acde\=posix_startend=2 +\= Expect no match + acdef + acdef\=posix_startend=2 + +/^a\x{00}b$/posix + a\x{00}b\=posix_startend=0:3 + +/"A" 00 "B"/hex + A\x{00}B\=posix_startend=0:3 + +/ABC/use_length + ABC + +/a\b(c/literal,posix + a\\b(c + +/a\b(c/literal,posix,dotall + # End of testdata/testinput18 diff --git a/testdata/testinput19 b/testdata/testinput19 index 7a90f1a..3bf1720 100644 --- a/testdata/testinput19 +++ b/testdata/testinput19 @@ -15,4 +15,7 @@ /\w/ucp +++\x{c2} +/"^AB" 00 "\x{1234}$"/hex,utf + AB\x{00}\x{1234}\=posix_startend=0:6 + # End of testdata/testinput19 diff --git a/testdata/testinput2 b/testdata/testinput2 index 9d0759f..5d3a80e 100644 --- a/testdata/testinput2 +++ b/testdata/testinput2 @@ -1,12 +1,12 @@ # This set of tests is not Perl-compatible. It checks on special features # of PCRE2's API, error diagnostics, and the compiled code of some patterns. -# It also checks the non-Perl syntax that PCRE2 supports (Python, .NET, -# Oniguruma). There are also some tests where PCRE2 and Perl differ, -# either because PCRE2 can't be compatible, or there is a possible Perl +# It also checks the non-Perl syntax that PCRE2 supports (Python, .NET, +# Oniguruma). There are also some tests where PCRE2 and Perl differ, +# either because PCRE2 can't be compatible, or there is a possible Perl # bug. # NOTE: This is a non-UTF set of tests. When UTF support is needed, use -# test 5. +# test 5. #forbid_utf #newline_default lf any anycrlf @@ -189,9 +189,9 @@ the barfoo and cattlefoo -/(?<=a+)b/ +/abc(?<=a+)b/ -/(?<=aaa|b{0,3})b/ +/12345(?<=aaa|b{0,3})b/ /(?<!(foo)a\1)bar/ @@ -330,7 +330,7 @@ \= Expect no match aaaa aaaaaa - + # Perl does not fail these two for the final subjects. Neither did PCRE until # release 8.01. The problem is in backtracking into a subpattern that contains # a recursive reference to itself. PCRE has now made these into atomic patterns. @@ -1062,8 +1062,8 @@ /(?C0)(abc(?C1))*/I abcabcabc - abcabc\=callout_fail=1:3 - abcabcabc\=callout_fail=1:3 + abcabc\=callout_fail=1:4 + abcabcabc\=callout_fail=1:4 /(\d{3}(?C))*/I 123\=callout_capture @@ -1099,6 +1099,9 @@ /(?C)a|b/I +/a|(b)(?C)/I + b + /x(ab|(bc|(de|(?R))))/I xab xbc @@ -1219,7 +1222,7 @@ aaabbb aaabbb\=callout_data=0 aaabbb\=callout_data=1 -\= Expect no match +\= Expect no match aaabbb\=callout_data=-1 /ab(?P<one>cd)ef(?P<two>gh)/I @@ -1291,19 +1294,19 @@ /a+b/IB,auto_callout ab aaaab -\= Expect no match +\= Expect no match aaaacb /(abc|def)x/IB,auto_callout abcx defx -\= Expect no match +\= Expect no match abcdefzx /(abc|def)x/IB,auto_callout abcx defx -\= Expect no match +\= Expect no match abcdefzx /(ab|cd){3,4}/I,auto_callout @@ -1312,11 +1315,11 @@ abcdcdcdcdcd /([ab]{,4}c|xy)/IB,auto_callout -\= Expect no match +\= Expect no match Note: that { does NOT introduce a quantifier /([ab]{,4}c|xy)/IB,auto_callout -\= Expect no match +\= Expect no match Note: that { does NOT introduce a quantifier /([ab]{1,4}c|xy){4,5}?123/IB,auto_callout @@ -1465,7 +1468,7 @@ a1b\=copy=A a2b\=copy=A a1b\=copy=Z,copy=A - + /(?|(?<a>)(?<b>)(?<a>)|(?<a>)(?<b>)(?<a>))/I,dupnames /^(?P<A>a)(?P<A>b)/I,dupnames @@ -1553,7 +1556,7 @@ \= Expect no match xyz\nabclf xyz\rabclf - + /^abc/Im,newline=cr xyz\rabc \= Expect no match @@ -2029,11 +2032,11 @@ afoo \r\nfoo \nfoo - + /^$/gm,newline=any abc\r\rxyz - abc\n\rxyz -\= Expect no match + abc\n\rxyz +\= Expect no match abc\r\nxyz /(?m)^$/g,newline=any,aftertext @@ -2041,7 +2044,7 @@ /(?m)^$|^\r\n/g,newline=any,aftertext abc\r\n\r\n - + /(?m)$/g,newline=any,aftertext abc\r\n\r\n @@ -2050,7 +2053,7 @@ /^X/m XABC -\= Expect no match +\= Expect no match XABC\=notbol /(ab|c)(?-1)/B @@ -2059,8 +2062,8 @@ /xy(?+1)(abc)/B xyabcabc \= Expect no match - xyabc - + xyabc + /x(?-0)y/ /x(?-1)y/ @@ -2073,13 +2076,13 @@ abcX Y \= Expect no match - abcY - + abcY + /^((?(+1)X|Y)(abc))+/B YabcXabc YabcXabcXabc \= Expect no match - XabcXabc + XabcXabc /(?(-1)a)/B @@ -2092,30 +2095,30 @@ /(?<A>tom|bon)-\k{A}/ tom-tom - bon-bon + bon-bon \= Expect no match - tom-bon + tom-bon /\g{A/ /(?|(abc)|(xyz))/B >abc< - >xyz< + >xyz< /(x)(?|(abc)|(xyz))(x)/B xabcx - xxyzx + xxyzx /(x)(?|(abc)(pqr)|(xyz))(x)/B xabcpqrx - xxyzx + xxyzx /\H++X/B \= Expect no match XXXX - + /\H+\hY/B - XXXX Y + XXXX Y /\H+ Y/B @@ -2169,7 +2172,7 @@ /^a+(*FAIL)/auto_callout \= Expect no match aaaaaa - + /a+b?c+(*FAIL)/auto_callout \= Expect no match aaabccc @@ -2181,7 +2184,7 @@ /a+b?(*COMMIT)c+(*FAIL)/auto_callout \= Expect no match aaabccc - + /a+b?(*SKIP)c+(*FAIL)/auto_callout \= Expect no match aaabcccaaabccc @@ -2189,7 +2192,7 @@ /a+b?(*THEN)c+(*FAIL)/auto_callout \= Expect no match aaabccc - + /a(*MARK)b/ /(?i:A{1,}\6666666666)/ @@ -2203,52 +2206,52 @@ /.+A/newline=crlf \= Expect no match \r\nA - + /\nA/newline=crlf - \r\nA + \r\nA /[\r\n]A/newline=crlf - \r\nA + \r\nA /(\r|\n)A/newline=crlf - \r\nA + \r\nA /a(*CR)b/ /(*CR)a.b/ a\nb \= Expect no match - a\rb + a\rb /(*CR)a.b/newline=lf a\nb \= Expect no match - a\rb + a\rb /(*LF)a.b/newline=CRLF a\rb \= Expect no match - a\nb + a\nb /(*CRLF)a.b/ a\rb - a\nb + a\nb \= Expect no match - a\r\nb + a\r\nb /(*ANYCRLF)a.b/newline=CR \= Expect no match a\rb - a\nb - a\r\nb + a\nb + a\r\nb /(*ANY)a.b/newline=cr \= Expect no match a\rb - a\nb - a\r\nb - a\x85b - + a\nb + a\r\nb + a\x85b + /(*ANY).*/g abc\r\ndef @@ -2258,63 +2261,81 @@ /(*CRLF).*/g abc\r\ndef +/(*NUL)^.*/ + a\nb\x00ccc + +/(*NUL)^.*/s + a\nb\x00ccc + +/^x/m,newline=NUL + ab\x00xy + +/'#comment' 0d 0a 00 '^x\' 0a 'y'/x,newline=nul,hex + x\nyz + +/(*NUL)^X\NY/ + X\nY + X\rY +\= Expect no match + X\x00Y + /a\Rb/I,bsr=anycrlf a\rb a\nb a\r\nb \= Expect no match a\x85b - a\x0bb + a\x0bb /a\Rb/I,bsr=unicode a\rb a\nb a\r\nb a\x85b - a\x0bb - + a\x0bb + /a\R?b/I,bsr=anycrlf a\rb a\nb a\r\nb \= Expect no match a\x85b - a\x0bb + a\x0bb /a\R?b/I,bsr=unicode a\rb a\nb a\r\nb a\x85b - a\x0bb - + a\x0bb + /a\R{2,4}b/I,bsr=anycrlf a\r\n\nb a\n\r\rb a\r\n\r\n\r\n\r\nb \= Expect no match a\x85\x85b - a\x0b\x0bb + a\x0b\x0bb /a\R{2,4}b/I,bsr=unicode a\r\rb a\n\n\nb a\r\n\n\r\rb a\x85\x85b - a\x0b\x0bb -\= Expect no match - a\r\r\r\r\rb - + a\x0b\x0bb +\= Expect no match + a\r\r\r\r\rb + /(*BSR_ANYCRLF)a\Rb/I a\nb - a\rb + a\rb /(*BSR_UNICODE)a\Rb/I a\x85b /(*BSR_ANYCRLF)(*CRLF)a\Rb/I a\nb - a\rb + a\rb /(*CRLF)(*BSR_UNICODE)a\Rb/I a\x85b @@ -2377,9 +2398,9 @@ /^(?+1)(?<a>x|y){0}z/ xzxx - yzyy + yzyy \= Expect no match - xxz + xxz /(\3)(\1)(a)/ \= Expect no match @@ -2387,12 +2408,12 @@ /(\3)(\1)(a)/alt_bsux,allow_empty_class,match_unset_backref,dupnames cat - + /TA]/ - The ACTA] comes + The ACTA] comes /TA]/alt_bsux,allow_empty_class,match_unset_backref,dupnames - The ACTA] comes + The ACTA] comes /(?2)[]a()b](abc)/ abcbabc @@ -2402,12 +2423,12 @@ /(?1)[]a()b](abc)/ abcbabc -\= Expect no match +\= Expect no match abcXabc /(?1)[^]a()b](abc)/ abcXabc -\= Expect no match +\= Expect no match abcbabc /(?2)[]a()b](abc)(xyz)/ @@ -2429,23 +2450,23 @@ /a[]+b/alt_bsux,allow_empty_class,match_unset_backref,dupnames \= Expect no match - ab + ab /a[]*+b/alt_bsux,allow_empty_class,match_unset_backref,dupnames \= Expect no match - ab + ab /a[^]b/alt_bsux,allow_empty_class,match_unset_backref,dupnames aXb - a\nb + a\nb \= Expect no match - ab - + ab + /a[^]+b/alt_bsux,allow_empty_class,match_unset_backref,dupnames aXb - a\nX\nXb + a\nX\nXb \= Expect no match - ab + ab /a(?!)b/B @@ -2458,32 +2479,32 @@ /(?(?=.*b)b|^)/I,auto_callout adc - abc - + abc + /(?(?=b).*b|^d)/I /(?(?=.*b).*b|^d)/I /xyz/auto_callout - xyz - abcxyz -\= Expect no match + xyz + abcxyz +\= Expect no match abc - abcxypqr - + abcxypqr + /xyz/auto_callout,no_start_optimize - abcxyz -\= Expect no match + abcxyz +\= Expect no match abc - abcxypqr - + abcxypqr + /(*NO_START_OPT)xyz/auto_callout abcxyz - + /(*NO_AUTO_POSSESS)a+b/B /xyz/auto_callout,no_start_optimize - abcxyz + abcxyz /^"((?(?=[a])[^"])|b)*"$/auto_callout "ab" @@ -2496,133 +2517,133 @@ /^X(?&N)(a)(?|(b)|(q))(c)(d)(?<N>Y)/ XYabcdY - + /Xa{2,4}b/ X\=ps Xa\=ps Xaa\=ps Xaaa\=ps Xaaaa\=ps - + /Xa{2,4}?b/ X\=ps Xa\=ps Xaa\=ps Xaaa\=ps Xaaaa\=ps - + /Xa{2,4}+b/ X\=ps Xa\=ps Xaa\=ps Xaaa\=ps Xaaaa\=ps - + /X\d{2,4}b/ X\=ps X3\=ps X33\=ps X333\=ps X3333\=ps - + /X\d{2,4}?b/ X\=ps X3\=ps X33\=ps X333\=ps X3333\=ps - + /X\d{2,4}+b/ X\=ps X3\=ps X33\=ps X333\=ps X3333\=ps - + /X\D{2,4}b/ X\=ps Xa\=ps Xaa\=ps Xaaa\=ps Xaaaa\=ps - + /X\D{2,4}?b/ X\=ps Xa\=ps Xaa\=ps Xaaa\=ps Xaaaa\=ps - + /X\D{2,4}+b/ X\=ps Xa\=ps Xaa\=ps Xaaa\=ps Xaaaa\=ps - + /X[abc]{2,4}b/ X\=ps Xa\=ps Xaa\=ps Xaaa\=ps Xaaaa\=ps - + /X[abc]{2,4}?b/ X\=ps Xa\=ps Xaa\=ps Xaaa\=ps Xaaaa\=ps - + /X[abc]{2,4}+b/ X\=ps Xa\=ps Xaa\=ps Xaaa\=ps Xaaaa\=ps - + /X[^a]{2,4}b/ X\=ps Xz\=ps Xzz\=ps Xzzz\=ps Xzzzz\=ps - + /X[^a]{2,4}?b/ X\=ps Xz\=ps Xzz\=ps Xzzz\=ps Xzzzz\=ps - + /X[^a]{2,4}+b/ X\=ps Xz\=ps Xzz\=ps Xzzz\=ps Xzzzz\=ps - + /(Y)X\1{2,4}b/ YX\=ps YXY\=ps YXYY\=ps YXYYY\=ps YXYYYY\=ps - + /(Y)X\1{2,4}?b/ YX\=ps YXY\=ps YXYY\=ps YXYYY\=ps YXYYYY\=ps - + /(Y)X\1{2,4}+b/ YX\=ps YXY\=ps YXYY\=ps YXYYY\=ps YXYYYY\=ps - + /\++\KZ|\d+X|9+Y/startchar ++++123999\=ps ++++123999Y\=ps @@ -2632,7 +2653,7 @@ \= Expect no match Z\=ps ZA\=ps - + /Z(?!)/ \= Expect no match Z\=ps @@ -2641,7 +2662,7 @@ /dog(sbody)?/ dogs\=ps dogs\=ph - + /dog(sbody)??/ dogs\=ps dogs\=ph @@ -2649,7 +2670,7 @@ /dog|dogsbody/ dogs\=ps dogs\=ph - + /dogsbody|dog/ dogs\=ps dogs\=ph @@ -2661,14 +2682,14 @@ /abc/ abc\=ps abc\=ph - + /abc\K123/startchar xyzabc123pqr xyzabc12\=ps xyzabc12\=ph - + /(?<=abc)123/ - xyzabc123pqr + xyzabc123pqr xyzabc12\=ps xyzabc12\=ph @@ -2690,11 +2711,11 @@ \= Expect no match abcdef\=notempty xyzabcdef\=notempty - + /^(?:(?=abc)|abc\K)/aftertext,startchar abcdef abcdef\=notempty_atstart -\= Expect no match +\= Expect no match abcdef\=notempty /a?b?/aftertext @@ -2703,79 +2724,79 @@ xyzabc\=notempty xyzabc\=notempty_atstart xyz\=notempty_atstart -\= Expect no match +\= Expect no match xyz\=notempty /^a?b?/aftertext xyz xyzabc -\= Expect no match +\= Expect no match xyzabc\=notempty xyzabc\=notempty_atstart xyz\=notempty_atstart xyz\=notempty - + /^(?<name>a|b\g<name>c)/ aaaa bacxxx - bbaccxxx + bbaccxxx bbbacccxx /^(?<name>a|b\g'name'c)/ aaaa bacxxx - bbaccxxx + bbaccxxx bbbacccxx /^(a|b\g<1>c)/ aaaa bacxxx - bbaccxxx + bbaccxxx bbbacccxx /^(a|b\g'1'c)/ aaaa bacxxx - bbaccxxx + bbaccxxx bbbacccxx /^(a|b\g'-1'c)/ aaaa bacxxx - bbaccxxx + bbaccxxx bbbacccxx /(^(a|b\g<-1>c))/ aaaa bacxxx - bbaccxxx + bbaccxxx bbbacccxx /(?-i:\g<name>)(?i:(?<name>a))/ XaaX - XAAX + XAAX /(?i:\g<name>)(?-i:(?<name>a))/ XaaX -\= Expect no match - XAAX +\= Expect no match + XAAX /(?-i:\g<+1>)(?i:(a))/ XaaX - XAAX + XAAX /(?=(?<regex>(?#simplesyntax)\$(?<name>[a-zA-Z_\x{7f}-\x{ff}][a-zA-Z0-9_\x{7f}-\x{ff}]*)(?:\[(?<index>[a-zA-Z0-9_\x{7f}-\x{ff}]+|\$\g<name>)\]|->\g<name>(\(.*?\))?)?|(?#simple syntax withbraces)\$\{(?:\g<name>(?<indices>\[(?:\g<index>|'(?:\\.|[^'\\])*'|"(?:\g<regex>|\\.|[^"\\])*")\])?|\g<complex>|\$\{\g<complex>\})\}|(?#complexsyntax)\{(?<complex>\$(?<segment>\g<name>(\g<indices>*|\(.*?\))?)(?:->\g<segment>)*|\$\g<complex>|\$\{\g<complex>\})\}))\{/ /(?<n>a|b|c)\g<n>*/ abc - accccbbb + accccbbb /^X(?7)(a)(?|(b)|(q)(r)(s))(c)(d)(Y)/ XYabcdY /(?<=b(?1)|zzz)(a)/ xbaax - xzzzax + xzzzax /(a)(?<=b\1)/ @@ -2826,7 +2847,7 @@ (?: [0-9a-f]{1,4} | # 1-4 hex digits or (?(1)0 | () ) ) # if null previously matched, fail; else null : # followed by colon - ){1,7} # end item; 1-7 of them required + ){1,7} # end item; 1-7 of them required [0-9a-f]{1,4} $ # final hex number at end of string (?(1)|.) # check that there was an empty component /Iix @@ -2838,25 +2859,25 @@ /(?|(?<a>A)|(?<b>B))/ /(?:a(?<quote> (?<apostrophe>')|(?<realquote>")) | - b(?<quote> (?<apostrophe>')|(?<realquote>")) ) + b(?<quote> (?<apostrophe>')|(?<realquote>")) ) (?('quote')[a-z]+|[0-9]+)/Ix,dupnames a"aaaaa - b"aaaaa -\= Expect no match + b"aaaaa +\= Expect no match b"11111 - a"11111 - + a"11111 + /^(?|(a)(b)(c)(?<D>d)|(?<D>e)) (?('D')X|Y)/IBx,dupnames abcdX eX \= Expect no match abcdY - ey - + ey + /(?<A>a) (b)(c) (?<A>d (?(R&A)$ | (?4)) )/IBx,dupnames abcdd \= Expect no match - abcdde + abcdde /abcd*/ xxxxabcd\=ps @@ -2880,19 +2901,6 @@ xxxxabcde\=ps xxxxabcde\=ph -# This is not in the Perl-compatible test because Perl seems currently to be -# broken and not behaving as specified in that it *does* bumpalong after -# hitting (*COMMIT). - -/(?1)(A(*COMMIT)|B)D/ - ABD - XABD - BAD - ABXABD -\= Expect no match - ABX - BAXBAD - /(\3)(\1)(a)/alt_bsux,allow_empty_class,match_unset_backref,dupnames cat @@ -2905,16 +2913,16 @@ /i(?(DEFINE)(?<s>a))/I i - + /()i(?(1)a)/I ia /(?i)a(?-i)b|c/B XabX XAbX - CcC + CcC \= Expect no match - XABX + XABX /(?i)a(?s)b|c/B @@ -2922,20 +2930,20 @@ /^(ab(c\1)d|x){2}$/B xabcxd - + /^(?&t)*+(?(DEFINE)(?<t>.))$/B /^(?&t)*(?(DEFINE)(?<t>.))$/B # This one is here because Perl gives the match as "b" rather than "ab". I # believe this to be a Perl bug. - + /(?>a\Kb)z|(ab)/ - ab\=startchar + ab\=startchar /(?P<L1>(?P<L2>0|)|(?P>L2)(?P>L1))/ abcd - 0abc + 0abc /abc(*MARK:)pqr/ @@ -2943,7 +2951,7 @@ /abc(*FAIL:123)xyz/ -# This should, and does, fail. In Perl, it does not, which I think is a +# This should, and does, fail. In Perl, it does not, which I think is a # bug because replacing the B in the pattern by (B|D) does make it fail. /A(*COMMIT)B/aftertext,mark @@ -2959,37 +2967,37 @@ /A(*PRUNE)B|A(*PRUNE)C/mark \= Expect no match AC - + # Mark names can be duplicated. Perl doesn't give a mark for this one, # though PCRE2 does. /^A(*:A)B|^X(*:A)Y/mark \= Expect no match XAQQ - -# COMMIT at the start of a pattern should be the same as an anchor. Perl + +# COMMIT at the start of a pattern should be the same as an anchor. Perl # optimizations defeat this. So does the PCRE2 optimization unless we disable # it. /(*COMMIT)ABC/ ABCDEFG - + /(*COMMIT)ABC/no_start_optimize \= Expect no match DEFGABC - + /^(ab (c+(*THEN)cd) | xyz)/x \= Expect no match - abcccd + abcccd /^(ab (c+(*PRUNE)cd) | xyz)/x \= Expect no match - abcccd + abcccd /^(ab (c+(*FAIL)cd) | xyz)/x \= Expect no match - abcccd - + abcccd + # Perl gets some of these wrong /(?>.(*ACCEPT))*?5/ @@ -3008,19 +3016,19 @@ ACBD \= Expect no match A\nB - ACB\n + ACB\n /A\NB./Bs ACBD - ACB\n + ACB\n \= Expect no match - A\nB - + A\nB + /A\NB/newline=crlf A\nB A\rB \= Expect no match - A\r\nB + A\r\nB /\R+b/B @@ -3091,7 +3099,7 @@ /.+/ \= Bad offsets abc\=offset=4 - abc\=offset=-4 + abc\=offset=-4 \= Valid data abc\=offset=0 abc\=offset=1 @@ -3111,24 +3119,24 @@ /(?P<abn>(?P=axn)xxx)(?<axn>yy)/B -# These tests are here because Perl gets the first one wrong. +# These tests are here because Perl gets the first one wrong. /(\R*)(.)/s \r\n - \r\r\n\n\r - \r\r\n\n\r\n + \r\r\n\n\r + \r\r\n\n\r\n /(\R)*(.)/s \r\n - \r\r\n\n\r - \r\r\n\n\r\n + \r\r\n\n\r + \r\r\n\n\r\n /((?>\r\n|\n|\x0b|\f|\r|\x85)*)(.)/s \r\n - \r\r\n\n\r - \r\r\n\n\r\n + \r\r\n\n\r + \r\r\n\n\r\n -# ------------- +# ------------- /^abc$/B @@ -3136,12 +3144,12 @@ /^(a)*+(\w)/ aaaaX -\= Expect no match +\= Expect no match aaaa /^(?:a)*+(\w)/ aaaaX -\= Expect no match +\= Expect no match aaaa /(a)++1234/IB @@ -3200,39 +3208,39 @@ /(abc)\1+/ -# Perl doesn't get these right IMO (the 3rd is PCRE2-specific) +# Perl doesn't get these right IMO (the 3rd is PCRE2-specific) /(?1)(?:(b(*ACCEPT))){0}/ b /(?1)(?:(b(*ACCEPT))){0}c/ bc -\= Expect no match - b +\= Expect no match + b /(?1)(?:((*ACCEPT))){0}c/ c c\=notempty /^.*?(?(?=a)a|b(*THEN)c)/ -\= Expect no match +\= Expect no match ba /^.*?(?(?=a)a|bc)/ ba /^.*?(?(?=a)a(*THEN)b|c)/ -\= Expect no match +\= Expect no match ac /^.*?(?(?=a)a(*THEN)b)c/ -\= Expect no match +\= Expect no match ac /^.*?(a(*THEN)b)c/ -\= Expect no match +\= Expect no match aabc - + /^.*? (?1) c (?(DEFINE)(a(*THEN)b))/x aabc @@ -3247,11 +3255,11 @@ /(*MARK:A)(*SKIP:B)(C|X)/mark C -\= Expect no match +\= Expect no match D - + /(*:A)A+(*SKIP:A)(B|Z)/mark -\= Expect no match +\= Expect no match AAAC # ---------------------------- @@ -3259,14 +3267,14 @@ "(?=a*(*ACCEPT)b)c" c c\=notempty - + /(?1)c(?(DEFINE)((*ACCEPT)b))/ c c\=notempty - + /(?>(*ACCEPT)b)c/ c -\= Expect no match +\= Expect no match c\=notempty /(?:(?>(a)))+a%/allaftertext @@ -3274,7 +3282,7 @@ /(a)b|ac/allaftertext ac\=ovector=1 - + /(a)(b)x|abc/allaftertext abc\=ovector=2 @@ -3299,7 +3307,7 @@ foobazbarX barfooX bazX - foobarbazX + foobarbazX bazfooX\=ovector=0 bazfooX\=ovector=1 bazfooX\=ovector=2 @@ -3363,17 +3371,17 @@ /^(?>a+)(?>(z+))\w/B aaaazzzzb \= Expect no match - aazz + aazz /(.)(\1|a(?2))/ bab - + /\1|(.)(?R)\1/ cbbbc - + /(.)((?(1)c|a)|a(?2))/ \= Expect no match - baa + baa /(?P<abn>(?P=abn)xxx)/B @@ -3414,7 +3422,7 @@ /a[\NB]c/ aNc - + /a[B-\Nc]/ /a[B\Nc]/ @@ -3426,34 +3434,34 @@ # This test, with something more complicated than individual letters, causes # different behaviour in Perl. Perhaps it disables some optimization; no tag is # passed back for the failures, whereas in PCRE2 there is a tag. - + /(A|P)(*:A)(B|P) | (X|P)(X|P)(*:B)(Y|P)/x,mark AABC - XXYZ + XXYZ \= Expect no match - XAQQ - XAQQXZZ - AXQQQ - AXXQQQ + XAQQ + XAQQXZZ + AXQQQ + AXXQQQ # Perl doesn't give marks for these, though it does if the alternatives are -# replaced by single letters. - +# replaced by single letters. + /(b|q)(*:m)f|a(*:n)w/mark - aw -\= Expect no match + aw +\= Expect no match abc /(q|b)(*:m)f|a(*:n)w/mark - aw -\= Expect no match + aw +\= Expect no match abc -# After a partial match, the behaviour is as for a failure. +# After a partial match, the behaviour is as for a failure. /^a(*:X)bcde/mark abc\=ps - + # These are here because Perl doesn't return a mark, except for the first. /(?=(*:x))(q|)/aftertext,mark @@ -3521,22 +3529,22 @@ ababa\=ps ababa\=ph abababx - ababababx + ababababx /^(..)\1{2,3}?x/ aba\=ps ababa\=ps ababa\=ph abababx - ababababx - + ababababx + /^(..)(\1{2,3})ab/ abababab /^\R/ \r\=ps \r\=ph - + /^\R{2,3}x/ \r\=ps \r\=ph @@ -3545,7 +3553,7 @@ \r\r\r\=ps \r\r\r\=ph \r\rx - \r\r\rx + \r\r\rx /^\R{2,3}?x/ \r\=ps @@ -3555,20 +3563,20 @@ \r\r\r\=ps \r\r\r\=ph \r\rx - \r\r\rx - + \r\r\rx + /^\R?x/ \r\=ps \r\=ph x - \rx + \rx /^\R+x/ \r\=ps \r\=ph \r\n\=ps \r\n\=ph - \rx + \rx /^a$/newline=crlf a\r\=ps @@ -3589,7 +3597,7 @@ /./newline=crlf \r\=ps \r\=ph - + /.{2,3}/newline=crlf \r\=ps \r\=ph @@ -3608,9 +3616,9 @@ "AB(C(D))(E(F))?(?(?=\2)(?=\4))" ABCDGHI\=ovector=01 - + # These are all run as real matches in test 1; here we are just checking the -# settings of the anchored and startline bits. +# settings of the anchored and startline bits. /(?>.*?a)(?<=ba)/I @@ -3646,10 +3654,10 @@ /(?:(a)+(?C1)bb|aa(?C2)b)/ aab\=callout_capture - + /(?:(a)++(?C1)bb|aa(?C2)b)/ aab\=callout_capture - + /(?:(?>(a))(?C1)bb|aa(?C2)b)/ aab\=callout_capture @@ -3666,11 +3674,11 @@ /(ab)x|ab/ ab\=ovector=0 ab\=ovector=1 - + /(?<=123)(*MARK:xx)abc/mark xxxx123a\=ph xxxx123a\=ps - + /123\Kabc/startchar xxxx123a\=ph xxxx123a\=ps @@ -3685,22 +3693,22 @@ /aaaaa(*COMMIT)(*PRUNE)b|a+c/ aaaaaac - + # Here are some that Perl treats differently because of the way it handles -# backtracking verbs. +# backtracking verbs. /(?!a(*COMMIT)b)ac|ad/ ac - ad + ad /^(?!a(*THEN)b|ac)../ - ad + ad \= Expect no match ac /^(?=a(*THEN)b|ac)/ ac - + /\A.*?(?:a|b(*THEN)c)/ ba @@ -3711,25 +3719,24 @@ ba /(?:(a(*MARK:X)a+(*SKIP:X)b)){0}(?:(?1)|aac)/ - aac + aac /\A.*?(a|b(*THEN)c)/ ba /^(A(*THEN)B|A(*THEN)D)/ - AD - + AD + /(?!b(*THEN)a)bn|bnn/ bnn /(?(?=b(*SKIP)a)bn|bnn)/ -\= Expect no match bnn /(?=b(*THEN)a|)bn|bnn/ bnn -# This test causes a segfault with Perl 5.18.0 +# This test causes a segfault with Perl 5.18.0 /^(?=(a)){0}b(?1)/ backgammon @@ -3837,13 +3844,13 @@ /[a-c]{0,6}d/IB -# End of special auto-possessive tests +# End of special auto-possessive tests /^A\o{1239}B/ A\123B /^A\oB/ - + /^A\x{zz}B/ /^A\x{12Z/ @@ -3915,13 +3922,13 @@ /[[:<:]]red[[:>:]]/B little red riding hood - a /red/ thing + a /red/ thing red is a colour - put it all on red + put it all on red \= Expect no match no reduction Alfred Winifred - + /[a[:<:]] should give error/ /(?=ab\K)/aftertext @@ -3930,7 +3937,7 @@ /abcd/newline=lf,firstline \= Expect no match xx\nxabcd - + # Test stack guard external calls. /(((a)))/stackguard=1 @@ -3961,25 +3968,25 @@ /A\9B/ -# This one is here because Perl fails to match "12" for this pattern when the $ +# This one is here because Perl fails to match "12" for this pattern when the $ # is present. - + /^(?(?=abc)\w{3}:|\d\d)$/ abc: 12 \= Expect no match 123 - xyz + xyz -# Perl gets this one wrong, giving "a" as the after text for ca and failing to +# Perl gets this one wrong, giving "a" as the after text for ca and failing to # match for cd. /(?(?=ab)ab)/aftertext abxxx ca - cd - -# This should test both paths for processing OP_RECURSE. + cd + +# This should test both paths for processing OP_RECURSE. /(?(R)a+|(?R)b)/ aaaabcde @@ -3991,29 +3998,29 @@ /(*NOTEMPTY)a*?b*?/ ab ba - cb + cb /(*NOTEMPTY_ATSTART)a*?b*?/aftertext ab - cdab + cdab /(?(VERSION>=10.0)yes|no)/I yesno - + /(?(VERSION=8)yes){3}/BI,aftertext yesno /(?(VERSION=8)yes|no){3}/I yesnononoyes \= Expect no match - yesno + yesno /(?:(?<VERSION>abc)|xyz)(?(VERSION)yes|no)/I abcyes xyzno \= Expect no match abcno - xyzyes + xyzyes /(?(VERSION<10)yes|no)/ @@ -4029,11 +4036,11 @@ /(|ab)*?d/I abd - xyd + xyd /(|ab)*?d/I,no_start_optimize abd - xyd + xyd /\k<A>*(?<A>aa)(?<A>bb)/match_unset_backref,dupnames aabb @@ -4093,7 +4100,7 @@ /abc/replace=[9]XYZ 123abc123 - + /abc/replace=xyz 1abc2\=partial_hard @@ -4105,23 +4112,23 @@ /(?<=abc)(|def)/g,replace=<$0> 123abcxyzabcdef789abcpqr - + /./replace=$0 a - + /(.)(.)/replace=$2+$1 abc - + /(?<A>.)(?<B>.)/replace=$B+$A abc - + /(.)(.)/g,replace=$2$1 - abcdefgh - + abcdefgh + /(*:pear)apple|(*:orange)lemon|(*:strawberry)blackberry/g,replace=${*MARK} apple lemon blackberry apple strudel - fruitless + fruitless /(*:pear)apple|(*:orange)lemon|(*:strawberry)blackberry/replace=${*MARK} sauce, apple lemon blackberry @@ -4129,15 +4136,15 @@ /(*:pear)apple|(*:orange)lemon|(*:strawberry)blackberry/g,replace=<$*MARK> apple lemon blackberry apple strudel - fruitless - -/(*:pear)apple/g,replace=${*MARKING} + fruitless + +/(*:pear)apple/g,replace=${*MARKING} apple lemon blackberry /(*:pear)apple/g,replace=${*MARK-time apple lemon blackberry -/(*:pear)apple/g,replace=${*mark} +/(*:pear)apple/g,replace=${*mark} apple lemon blackberry /(*:pear)apple|(*:orange)lemon|(*:strawberry)blackberry/g,replace=<$*MARKET> @@ -4173,10 +4180,10 @@ /(a)(b)|(c)/ XcX\=ovector=2,get=1,get=2,get=3,get=4,getall - + /x(?=ab\K)/ - xab\=get=0 - xab\=copy=0 + xab\=get=0 + xab\=copy=0 xab\=getall /(?<A>a)|(?<A>b)/dupnames @@ -4239,16 +4246,16 @@ 00765 456 \= Expect no match - 356 + 356 '^(a)*+(\w)' g - g\=ovector=1 + g\=ovector=1 '^(?:a)*+(\w)' g - g\=ovector=1 - + g\=ovector=1 + # These two pattern showeds up compile-time bugs "((?2){0,1999}())?" @@ -4289,11 +4296,11 @@ /^(?(?C25)(?=abc)abcd|xyz)/B,callout_info abcdefg - xyz123 + xyz123 /^(?(?C$abc$)(?=abc)abcd|xyz)/B abcdefg - xyz123 + xyz123 /^ab(?C'first')cd(?C"second")ef/ abcdefg @@ -4310,8 +4317,8 @@ /(?(?!)a|b)/ bbb -\= Expect no match - aaa +\= Expect no match + aaa # JIT gives a different error message for the infinite recursion @@ -4345,9 +4352,9 @@ /abc/ \= Expect no match \[9x!xxx(]{9999} - + /(abc)*/ - \[abc]{5} + \[abc]{5} /^/gm \n\n\n @@ -4365,17 +4372,17 @@ /A\8B\9C/ A8B9C - + /(?x:((?'a')) # comment (with parentheses) and | vertical (?-x:#not a comment (?'b')) # this is a comment () (?'c')) # not a comment (?'d')/info /(?|(?'a')(2)(?'b')|(?'a')(?'a')(3))/I,dupnames A23B - B32A + B32A # These are some patterns that used to cause buffer overflows or other errors -# while compiling. +# while compiling. /.((?2)(?R)|\1|$)()/B @@ -4459,7 +4466,7 @@ {4,5a}bc /\x0{ab}/ - \0{ab} + \0{ab} /^(a(b))\1\g1\g{1}\g-1\g{-1}\g{-02}Z/ ababababbbabZXXXX @@ -4501,8 +4508,8 @@ \= Expect no match aacb -/(*MARK:a\zb)z/alt_verbnames - +/(*MARK:a\zb)z/alt_verbnames + /(*:ab\t(d\)c)xxx/ /(*:ab\t(d\)c)xxx/alt_verbnames,mark @@ -4510,16 +4517,28 @@ /(*:A\Qxx)x\EB)x/alt_verbnames,mark x - + /(*:A\ExxxB)x/alt_verbnames,mark - x - + x + /(*: A \ and #comment \ B)x/x,alt_verbnames,mark - x - + x + +/(*: A \ and #comment + \ B)x/alt_verbnames,mark + x + +/(*: A \ and #comment + \ B)x/x,mark + x + +/(*: A \ and #comment + \ B)x/mark + x + /(*:A -B)x/alt_verbnames,mark +B)x/alt_verbnames,mark x /(*:abc\Qpqr)/alt_verbnames @@ -4537,7 +4556,7 @@ B)x/alt_verbnames,mark 1234abc\=offset_limit=7 \= Expect no match 1234abc\=offset_limit=6 - + /A/g,replace=-,use_offset_limit XAXAXAXAXA\=offset_limit=4 @@ -4551,16 +4570,16 @@ B)x/alt_verbnames,mark /abcd/null_context abcd\=null_context -\= Expect error +\= Expect error abcd\=null_context,find_limits - abcd\=allusedtext,startchar + abcd\=allusedtext,startchar /abcd/replace=w\rx\x82y\o{333}z(\Q12\$34$$\x34\E5$$),substitute_extended abcd - + /a(bc)(DE)/replace=a\u$1\U$1\E$1\l$2\L$2\Eab\Uab\LYZ\EDone,substitute_extended abcDE - + /abcd/replace=xy\kz,substitute_extended abcd @@ -4598,8 +4617,8 @@ B)x/alt_verbnames,mark /(?J)(?:(?<A>a)|(?<A>b))/replace=<$A> [a] - [b] -\= Expect error + [b] +\= Expect error (a)\=ovector=1 /(a)|(b)/replace=<$1> @@ -4624,10 +4643,10 @@ B)x/alt_verbnames,mark /(?=a\K)/replace=z BaCaD - + /(?'abcdefghijklmnopqrstuvwxyzABCDEFG'toolong)/ - -/(?'abcdefghijklmnopqrstuvwxyzABCDEF'justright)/ + +/(?'abcdefghijklmnopqrstuvwxyzABCDEF'justright)/ # These two use zero-termination /abcd/max_pattern_length=3 @@ -4639,7 +4658,7 @@ B)x/alt_verbnames,mark /abcdef/hex,max_pattern_length=3 -# These two patterns used to take a long time to compile +# These patterns used to take a long time to compile "(.*) ((?-2)(?-2))((?-2)(?-2))((?-2)(?-2))((?-2)(?-2)) @@ -4652,9 +4671,6 @@ B)x/alt_verbnames,mark ((?-2)(?-2))((?-2)(?-2))((?-2)(?-2)) a)"xI -# When (?| is used and groups of the same number may be different, -# we have to rely on a count to catch overly complicated patterns. - "(?|()|())(.*) ((?-2)(?-2))((?-2)(?-2))((?-2)(?-2))((?-2)(?-2)) ((?-2)(?-2))((?-2)(?-2))((?-2)(?-2))((?-2)(?-2)) @@ -4753,7 +4769,7 @@ a)"xI /a|(b)c/replace=>$1<,substitute_unset_empty cat - xbcom + xbcom /a|(b)c/ cat\=replace=>$1< @@ -4767,26 +4783,26 @@ a)"xI /a|(?'X'b)c/replace=>$X<,substitute_unset_empty cat - xbcom + xbcom /a|(?'X'b)c/replace=>$Y<,substitute_unset_empty cat - cat\=substitute_unknown_unset - cat\=substitute_unknown_unset,-substitute_unset_empty + cat\=substitute_unknown_unset + cat\=substitute_unknown_unset,-substitute_unset_empty /a|(b)c/replace=>$2<,substitute_unset_empty cat - cat\=substitute_unknown_unset - cat\=substitute_unknown_unset,-substitute_unset_empty + cat\=substitute_unknown_unset + cat\=substitute_unknown_unset,-substitute_unset_empty /()()()/use_offset_limit \=ovector=11000000000 \=callout_fail=11000000000 \=callout_fail=1:11000000000 - \=callout_data=11000000000 - \=callout_data=-11000000000 - \=offset_limit=1100000000000000000000 - \=copy=11000000000 + \=callout_data=11000000000 + \=callout_data=-11000000000 + \=offset_limit=1100000000000000000000 + \=copy=11000000000 /(*MARK:A\x00b)/mark abc @@ -4819,4 +4835,598 @@ a)"xI /\[AB]{6000000000000000000000}/expand -# End of testinput2 +# Hex uses pattern length, not zero-terminated. This tests for overrunning +# the given length of a pattern. + +/'(*U'/hex + +/'(*'/hex + +/'('/hex + +//hex + +# These tests are here because Perl never allows a back reference in a +# lookbehind. PCRE2 supports some limited cases. + +/([ab])...(?<=\1)z/ + a11az + b11bz +\= Expect no match + b11az + +/(?|([ab]))...(?<=\1)z/ + +/([ab])(\1)...(?<=\2)z/ + aa11az + +/(a\2)(b\1)(?<=\2)/ + +/(?<A>[ab])...(?<=\k'A')z/ + a11az + b11bz +\= Expect no match + b11az + +/(?<A>[ab])...(?<=\k'A')(?<A>)z/dupnames + +# Perl does not support \g+n + +/((\g+1X)?([ab]))+/ + aaXbbXa + +/ab(?C1)c/auto_callout + abc + +/'ab(?C1)c'/hex,auto_callout + abc + +# Perl accepts these, but gives a warning. We can't warn, so give an error. + +/[a-[:digit:]]+/ + a-a9-a + +/[A-[:digit:]]+/ + A-A9-A + +/[a-\d]+/ + a-a9-a + +/(?<RA>abc)(?(R)xyz)/B + +/(?<R>abc)(?(R)xyz)/B + +/(?=.*[A-Z])/I + +/()(?<=(?0))/ + +/(?<!|!(?<!))/ + +/(?<!|!|!||||||(?<!)||(?<!)!|!||(?<!)!|!(?<!)!|!|!|!||||!!|<!)!|!||||!|/ + +/{2,2{2,2/use_length + +/.>*?\g'0/use_length + +/.>*?\g'0/ + +/{̈́̈́{'{22{2{{2{'{22{{22{2{'{22{2{{2{{222{{2{'{22{2{22{2{'{22{2{{2{'{22{2{22{2{'{'{22{2{22{2{'{22{2{{2{'{22{2{22{2{'{222{2Ą̈́̈́{'{22{2{{2{'{22{{11{2{'{22{2{{2{{'{22{2{{2{'{22{{22{1{'{22{2{{2{{222{{2{'{22{2{22{2{'{/auto_callout + +// +\=get=i00000000000000000000000000000000 +\=get=i2345678901234567890123456789012,get=i1245678901234567890123456789012 + +"(?(?C))" + +/(?(?(?(?(?(?))))))/ + +/(?<=(?1))((?s))/anchored + +/(*:ab)*/ + +%(*:(:(svvvvvvvvvv:]*[ Z!*;[]*[^[]*!^[+.+{{2,7}' _\\\\\\\\\\\\\)?.:.. *w////\\\Q\\\\\\\\\\\\\\\T\\\\\+/?/////'+\\\EEE?/////'+/*+/[^K]?]//(w)%never_backslash_c,alt_verbnames,auto_callout + +/./newline=crlf + \=ph + +/(\x0e00\000000\xc)/replace=\P,substitute_extended + \x0e00\000000\xc + +//replace=0 + \=offset=7 + +".+\QX\E+"B,no_auto_possess + +".+\QX\E+"B,auto_callout,no_auto_possess + +# This one is here because Perl gives an 'unmatched )' error which goes away +# if one of the \) sequences is removed - which is weird. PCRE finds it too +# complicated to find a minimum matching length. + +"()X|((((((((()))))))((((())))))\2())((((((\2\2)))\2)(\22((((\2\2)2))\2)))(2\ZZZ)+:)Z^|91ZiZZnter(ZZ |91Z(ZZ ZZ(\r2Z( or#(\Z2(Z\Z(\2\2)2))\2Z)Z(\22Z((\Z2(Z\Z(\2\2)2))\2Z+:)Z|91Z(ZZ ZZ(\r2Z( or#(\Z2(Z\Z((Z*(\2(Z\':))\0)i|||||||||||||||loZ\2\2)2))\2Z)Z(\22Z((\Z2(Z\Z(\2\2)2))\2Z)))int \)\0nte!rnal errpr\2\\21r(2\ZZZ)+:)Z!|91Z(ZZ ZZ(\r2Z( or#(\Z2(Z\Z(\2\2)2))\2Z)Z(\22Z((\Z2(Z\Z(\2\2)2))\2Z)))int \)\0(2\ZZZ)+:)Z^|91ZiZZnter(ZZ |91Z(ZZ ZZ(\r2Z( or#(\Z2(Z\Z(\2\2)2))\2Z)Z(\22Z((\Z2(Z\Z(\2\2)2))\2Z)))int \)\0(2\ZZZ)+:)Z^)))int \)\0(2\ZZZ)+:)Z^|91ZiZZnter(ZZernZal ZZ(\r2Z( or#(\Z2(Z\Z(\2\2)2))\2Z)Z(\22Z((\Z2(Z\Z(\2\2)2))\2Z)))int \))\ZZ(\r2Z( or#(\Z2(Z\Z(\2\2)2))\2Z)Z(\22Z((\Z2(Z\Z(\2\2)))\2))))((((((\2\2))))))"I + +# This checks that new code for handling groups that may match an empty string +# works on a very large number of alternatives. This pattern used to provoke a +# complaint that it was too complicated. + +/(?:\[A|B|C|D|E|F|G|H|I|J|]{200}Z)/expand + +# This one used to compile rubbish instead of a compile error, and then +# behave unpredictably at match time. + +/.+(?(?C'XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX'))?!XXXX.=X/ + .+(?(?C'XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX'))?!XXXX.=X + +/[:[:alnum:]-[[a:lnum:]+/ + +/((?(?C'')\QX\E(?!((?(?C'')(?!X=X));=)r*X=X));=)/ + +/((?(?C'')\Q\E(?!((?(?C'')(?!X=X));=)r*X=X));=)/ + +/abcd/auto_callout + abcd\=callout_error=255:2 + +/()(\g+65534)/ + +/()(\g+65533)/ + +/\x00\x00\x00(\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\x00k\d+\x00\x00\x00\x00\x00\x00\2*\x00\x00\1*.){36}int^\x00\x00\x00(\1{50779}?)J\w2/I + +/(a)(b)\2\1\1\1\1/I + +/(?<a>a)(?<b>b)\g{b}\g{a}\g{a}\g{a}\g{a}(?<a>xx)(?<b>zz)/I,dupnames + +// + \=ovector=7777777777 + +# This is here because Perl matches, even though a COMMIT is encountered +# outside of the recursion. + +/(?1)(A(*COMMIT)|B)D/ + BAXBAD + +"(?1){2}(a)"B + +"(?1){2,4}(a)"B + +# This test differs from Perl for the first subject. Perl ends up with +# $1 set to 'B'; PCRE2 has it unset (which I think is right). + +/^(?: +(?:A| (?:B|B(*ACCEPT)) (?<=(.)) D) +(Z) +)+$/x + AZB + AZBDZ + +# The first of these, when run by Perl, gives the mark 'aa', which is wrong. + +'(?>a(*:aa))b|ac' mark + ac + +'(?:a(*:aa))b|ac' mark + ac + +/(R?){65}/ + (R?){65} + +/\[(a)]{60}/expand + aaaa + +/(?<!\1((?U)1((?U))))(*F)/never_backslash_c,alt_bsux,anchored,extended + +/\g{3/ + +/(a(?C1)(b)(c)d)+/ + abcdabcd\=callout_capture + +# Perl matches this one, but PCRE does not because (*ACCEPT) clears out any +# pending backtracks in the recursion. + +/^ (?(DEFINE) (..(*ACCEPT)|...) ) (?1)$/x +\= Expect no match + abc + +# Perl gives no match for this one + +/(a(*MARK:m)(*ACCEPT)){0}(?1)/mark + abc + +/abc/endanchored + xyzabc +\= Expect no match + xyzabcdef +\= Expect error + xyzabc\=ph + +/abc/ + xyzabc\=endanchored +\= Expect no match + xyzabcdef\=endanchored +\= Expect error + xyzabc\=ps,endanchored + +/abc(*ACCEPT)d/endanchored + xyzabc +\= Expect no match + xyzabcdef + +/abc|bcd/endanchored + xyzabcd +\= Expect no match + xyzabcdef + +/a(*ACCEPT)x|aa/endanchored + aaa + +# Check auto-anchoring when there is a group that is never obeyed at +# the start of a branch. + +/(?(DEFINE)(a))^bc/I + +/(a){0}.*bc/sI + +# This should be anchored, as the condition is always false and there is +# no alternative branch. + +/(?(VERSION>=999)yes)^bc/I + +# This should not be anchored. + +/(?(VERSION>=999)yes|no)^bc/I + +/(*LIMIT_HEAP=0)xxx/I + +/\d{0,3}(*:abc)(?C1)xxx/callout_info + +# ---------------------------------------------------------------------- + +# These are a whole pile of tests that touch lines of code that are not +# used by any other tests (at least when these were created). + +/^a+?x/i,no_start_optimize,no_auto_possess +\= Expect no match + aaa + +/^[^a]{3,}?x/i,no_start_optimize,no_auto_possess +\= Expect no match + bbb + cc + +/^X\S/no_start_optimize,no_auto_possess +\= Expect no match + X + +/^X\W/no_start_optimize,no_auto_possess +\= Expect no match + X + +/^X\H/no_start_optimize,no_auto_possess +\= Expect no match + X + +/^X\h/no_start_optimize,no_auto_possess +\= Expect no match + X + +/^X\V/no_start_optimize,no_auto_possess +\= Expect no match + X + +/^X\v/no_start_optimize,no_auto_possess +\= Expect no match + X + +/^X\h/no_start_optimize,no_auto_possess +\= Expect no match + XY + +/^X\V/no_start_optimize,no_auto_possess +\= Expect no match + X\n + +/^X\v/no_start_optimize,no_auto_possess +\= Expect no match + XX + +/^X.+?/s,no_start_optimize,no_auto_possess +\= Expect no match + X + +/^X\R+?/no_start_optimize,no_auto_possess +\= Expect no match + XX + +/^X\H+?/no_start_optimize,no_auto_possess +\= Expect no match + X + +/^X\h+?/no_start_optimize,no_auto_possess +\= Expect no match + X + +/^X\V+?/no_start_optimize,no_auto_possess +\= Expect no match + X + X\n + +/^X\D+?/no_start_optimize,no_auto_possess +\= Expect no match + X + X9 + +/^X\S+?/no_start_optimize,no_auto_possess +\= Expect no match + X + X\n + +/^X\W+?/no_start_optimize,no_auto_possess +\= Expect no match + X + XX + +/^X.+?Z/no_start_optimize,no_auto_possess +\= Expect no match + XY\n + +/(*CRLF)^X.+?Z/no_start_optimize,no_auto_possess +\= Expect no match + XY\r\=ps + +/^X\R+?Z/no_start_optimize,no_auto_possess +\= Expect no match + X\nX + X\n\r\n + X\n\rY + X\n\nY + X\n\x{0c}Y + +/(*BSR_ANYCRLF)^X\R+?Z/no_start_optimize,no_auto_possess +\= Expect no match + X\nX + X\n\r\n + X\n\rY + X\n\nY + X\n\x{0c}Y + +/^X\H+?Z/no_start_optimize,no_auto_possess +\= Expect no match + XY\t + XYY + +/^X\h+?Z/no_start_optimize,no_auto_possess +\= Expect no match + X\t\t + X\tY + +/^X\V+?Z/no_start_optimize,no_auto_possess +\= Expect no match + XY\n + XYY + +/^X\v+?Z/no_start_optimize,no_auto_possess +\= Expect no match + X\n\n + X\nY + +/^X\D+?Z/no_start_optimize,no_auto_possess +\= Expect no match + XY9 + XYY + +/^X\d+?Z/no_start_optimize,no_auto_possess +\= Expect no match + X99 + X9Y + +/^X\S+?Z/no_start_optimize,no_auto_possess +\= Expect no match + XY\n + XYY + +/^X\s+?Z/no_start_optimize,no_auto_possess +\= Expect no match + X\n\n + X\nY + +/^X\W+?Z/no_start_optimize,no_auto_possess +\= Expect no match + X.A + X++ + +/^X\w+?Z/no_start_optimize,no_auto_possess +\= Expect no match + Xa. + Xaa + +/^X.{1,3}Z/s,no_start_optimize,no_auto_possess +\= Expect no match + Xa.bd + +/^X\h+Z/no_start_optimize,no_auto_possess +\= Expect no match + X\t\t + X\tY + +/^X\V+Z/no_start_optimize,no_auto_possess +\= Expect no match + XY\n + XYY + +/^(X(*THEN)Y|AB){0}(?1)/ + ABX +\= Expect no match + XAB + +/^(?!A(?C1)B)C/ + ABC\=callout_error=1,no_jit + +/^(?!A(?C1)B)C/no_start_optimize + ABC\=callout_error=1 + +/^(?(?!A(?C1)B)C)/ + ABC\=callout_error=1 + +# ---------------------------------------------------------------------- + +/[a b c]/BxxI + +/[a b c]/BxxxI + +/[a b c]/B,extended_more + +/[ a b c ]/B,extended_more + +/[a b](?xx: [ 12 ] (?-xx:[ 34 ]) )y z/B + +# Unsetting /x also unsets /xx + +/[a b](?xx: [ 12 ] (?-x:[ 34 ]) )y z/B + +/(a)(?-n:(b))(c)/nB + +# ---------------------------------------------------------------------- +# These test the dangerous PCRE2_EXTRA_BAD_ESCAPE_IS_LITERAL option. + +/\j\x{z}\o{82}\L\uabcd\u\U\g{\g/B,\bad_escape_is_literal + +/\N{\c/IB,bad_escape_is_literal + +/[\j\x{z}\o\gA-\Nb-\g]/B,bad_escape_is_literal + +/[Q-\N]/B,bad_escape_is_literal + +# ---------------------------------------------------------------------- + +/a\b(c/literal + a\\b(c + +/a\b(c/literal,caseless + a\\b(c + a\\B(c + +/a\b(c/literal,firstline + XYYa\\b(c +\= Expect no match + X\na\\b(c + +/a\b?c/literal,use_offset_limit + XXXXa\\b?c\=offset_limit=4 +\= Expect no match + XXXXa\\b?c\=offset_limit=3 + +/a\b(c/literal,anchored,endanchored + a\\b(c +\= Expect no match + Xa\\b(c + a\\b(cX + Xa\\b(cX + +//literal,extended + +/a\b(c/literal,auto_callout,no_start_optimize + XXXXa\\b(c + +/a\b(c/literal,auto_callout + XXXXa\\b(c + +/(*CR)abc/literal + (*CR)abc + +/cat|dog/I,match_word + the cat sat +\= Expect no match + caterpillar + snowcat + syndicate + +/(cat)|dog/I,match_line,literal + (cat)|dog +\= Expect no match + the cat sat + caterpillar + snowcat + syndicate + +/a whole line/match_line,multiline + Rhubarb \na whole line\n custard +\= Expect no match + Not a whole line + +# Perl gets this wrong, failing to capture 'b' in group 1. + +/^(b+|a){1,2}?bc/ + bbc + +# And again here, for the "babc" subject string. + +/^(b*|ba){1,2}?bc/ + babc + bbabc + bababc +\= Expect no match + bababbc + babababc + +/[[:digit:]-a]/ + +/[[:digit:]-[:print:]]/ + +/[\d-a]/ + +/[\H-z]/ + +/[\d-[:print:]]/ + +# Perl gets the second of these wrong, giving no match. + +"(?<=(a))\1?b"I + ab + aaab + +"(?=(a))\1?b"I + ab + aaab + +# JIT does not support callout_extra + +/(*NO_JIT)(a+)b/auto_callout,no_start_optimize,no_auto_possess +\= Expect no match + aac\=callout_extra + +/(*NO_JIT)a+(?C'XXX')b/no_start_optimize,no_auto_possess +\= Expect no match + aac\=callout_extra + +/\n/firstline + xyz\nabc + +/\nabc/firstline + xyz\nabc + +/\x{0a}abc/firstline,newline=crlf +\= Expect no match + xyz\r\nabc + +/[abc]/firstline +\= Expect no match + \na + +# These tests are matched in test 1 as they are Perl compatible. Here we are +# looking at what does and does not get auto-possessified. + +/(?(DEFINE)(?<optional_a>a?))^(?&optional_a)a$/B + +/(?(DEFINE)(?<optional_a>a?)X)^(?&optional_a)a$/B + +/^(a?)b(?1)a/B + +/^(a?)+b(?1)a/B + +/^(a?)++b(?1)a/B + +/^(a?)+b/B + +/(?=a+)a(a+)++b/B + +# End of testinput2 diff --git a/testdata/testinput20 b/testdata/testinput20 index c920e2a..71f39ae 100644 --- a/testdata/testinput20 +++ b/testdata/testinput20 @@ -31,20 +31,20 @@ #load testsaved2 #pop info - foofoo + foofoo barbar - + #pop mark C -\= Expect no match - D - +\= Expect no match + D + #pop - AmanaplanacanalPanama + AmanaplanacanalPanama #pop info metcalfe 33 - + # Check for an error when different tables are used. /abc/push,tables=1 @@ -59,33 +59,50 @@ #pop should give an error pqr - + /abcd/pushcopy abcd - + #pop - abcd + abcd #pop should give an error /abcd/push #popcopy abcd - + #pop - abcd - + abcd + /abcd/push #save testsaved1 #pop should give an error #load testsaved1 -#popcopy +#popcopy abcd - + #pop abcd #pop should give an error +/abcd/pushtablescopy + abcd + +#popcopy + abcd + +#pop + abcd + +# Must only specify one of these + +//push,pushcopy + +//push,pushtablescopy + +//pushcopy,pushtablescopy + # End of testinput20 diff --git a/testdata/testinput22 b/testdata/testinput22 index 7ada9aa..e6d4053 100644 --- a/testdata/testinput22 +++ b/testdata/testinput22 @@ -94,4 +94,8 @@ \= Expect no match in 8-bit mode a\x{100}b +/^ab\C/utf,no_start_optimize +\= Expect no match - tests \C at end of subject + ab + # End of testinput22 diff --git a/testdata/testinput24 b/testdata/testinput24 new file mode 100644 index 0000000..380e23c --- /dev/null +++ b/testdata/testinput24 @@ -0,0 +1,396 @@ +# This file tests the auxiliary pattern conversion features of the PCRE2 +# library, in non-UTF mode. + +#forbid_utf +#newline_default lf any anycrlf + +# -------- Tests of glob conversion -------- + +# Set the glob separator explicitly so that different OS defaults are not a +# problem. Then test various errors. + +#pattern convert=glob,convert_glob_escape=\,convert_glob_separator=/ + +/abc/posix + +# Separator must be / \ or . + +/a*b/convert_glob_separator=% + +# Can't have separator in a class + +"[ab/cd]" + +"[,-/]" + +/[ab/ + +# Length check + +/abc/convert_length=11 + +/abc/convert_length=12 + +# Now some actual tests + +/a?b[]xy]*c/ + azb]1234c + +# Tests from the gitwildmatch list, with some additions + +/foo/ + foo +/= Expect no match + bar + +// + \ + +/???/ + foo +\= Expect no match + foobar + +/*/ + foo + \ + +/f*/ + foo + f + +/*f/ + oof +\= Expect no match + foo + +/*foo*/ + foo + food + aprilfool + +/*ob*a*r*/ + foobar + +/*ab/ + aaaaaaabababab + +/foo\*/ + foo* + +/foo\*bar/ +\= Expect no match + foobar + +/f\\oo/ + f\\oo + +/*[al]?/ + ball + +/[ten]/ +\= Expect no match + ten + +/t[a-g]n/ + ten + +/a[]]b/ + a]b + +/a[]a-]b/ + +/a[]-]b/ + a-b + a]b +\= Expect no match + aab + +/a[]a-z]b/ + aab + +/]/ + ] + +/t[!a-g]n/ + ton +\= Expect no match + ten + +'[[:alpha:]][[:digit:]][[:upper:]]' + a1B + +'[[:digit:][:upper:][:space:]]' + A + 1 + \ \= +\= Expect no match + a + . + +'[a-c[:digit:]x-z]' + 5 + b + y +\= Expect no match + q + +# End of gitwildmatch tests + +/*.j?g/ + pic01.jpg + .jpg + pic02.jxg +\= Expect no match + pic03.j/g + +/A[+-0]B/ + A+B + A.B + A0B +\= Expect no match + A/B + +/*x?z/ + abc.xyz +\= Expect no match + .xyz + +/?x?z/ + axyz +\= Expect no match + .xyz + +"[,-0]x?z" + ,xyz +\= Expect no match + /xyz + .xyz + +".x*" + .xabc + +/a[--0]z/ + a-z + a.z + a0z +\= Expect no match + a/z + a1z + +/<[a-c-d]>/ + <a> + <b> + <c> + <d> + <-> + +/a[[:digit:].]z/ + a1z + a.z +\= Expect no match + a:z + +/a[[:digit].]z/ + a[.]z + a:.]z + ad.]z + +/<[[:a[:digit:]b]>/ + <[> + <:> + <a> + <9> + <b> +\= Expect no match + <d> + +/a*b/convert_glob_separator=\ + +/a*b/convert_glob_separator=. + +/a*b/convert_glob_separator=/ + +# Non control character checking + +/A\B\\C\D/ + +/\\{}\?\*+\[\]()|.^$/ + +/*a*\/*b*/ + +/?a?\/?b?/ + +/[a\\b\c][]][-][\]\-]/ + +/[^a\\b\c][!]][!-][^\]\-]/ + +/[[:alnum:][:alpha:][:blank:][:cntrl:][:digit:][:graph:][:lower:][:print:][:punct:][:space:][:upper:][:word:][:xdigit:]]/ + +"[/-/]" + +/[-----]/ + +/[------]/ + +/[!------]/ + +/[[:alpha:]-a]/ + +/[[:alpha:]][[:punct:]][[:ascii:]]/ + +/[a-[:alpha:]]/ + +/[[:alpha:/ + +/[[:alpha:]/ + +/[[:alphaa:]]/ + +/[[:xdigi:]]/ + +/[[:xdigit::]]/ + +/****/ + +/**\/abc/ + abc + x/abc + xabc + +/abc\/**/ + +/abc\/**\/abc/ + +/**\/*a*b*g*n*t/ + abcd/abcdefg/abcdefghijk/abcdefghijklmnop.txt + +/**\/*a*\/**/ + xx/xx/xx/xax/xx/xb + +/**\/*a*/ + xx/xx/xx/xax + xx/xx/xx/xax/xx + +/**\/*a*\/**\/*b*/ + xx/xx/xx/xax/xx/xb + xx/xx/xx/xax/xx/x + +"**a"convert=glob + a + c/b/a + c/b/aaa + +"a**/b"convert=glob + a/b + ab + +"a/**b"convert=glob + a/b + ab + +#pattern convert=glob:glob_no_starstar + +/***/ + +/**a**/ + +#pattern convert=unset +#pattern convert=glob:glob_no_wild_separator + +/*/ + +/*a*/ + +/**a**/ + +/a*b/ + +/*a*b*/ + +/??a??/ + +#pattern convert=unset +#pattern convert=glob,convert_glob_escape=0 + +/a\b\cd/ + +/**\/a/ + +/a`*b/convert_glob_escape=` + +/a`*b/convert_glob_escape=0 + +/a`*b/convert_glob_escape=x + +# -------- Tests of extended POSIX conversion -------- + +#pattern convert=unset:posix_extended + +/<[[:a[:digit:]b]>/ + <[> + <:> + <a> + <9> + <b> +\= Expect no match + <d> + +/a+\1b\\c|d[ab\c]/ + +/<[]bc]>/ + <]> + <b> + <c> + +/<[^]bc]>/ + <.> +\= Expect no match + <]> + <b> + +/(a)\1b/ + a1b +\= Expect no match + aab + +/(ab)c)d]/ + Xabc)d]Y + +/a***b/ + +# -------- Tests of basic POSIX conversion -------- + +#pattern convert=unset:posix_basic + +/a*b+c\+[def](ab)\(cd\)/ + +/\(a\)\1b/ + aab +\= Expect no match + a1b + +/how.to how\.to/ + how\nto how.to +\= Expect no match + how\x{0}to how.to + +/^how to \^how to/ + +/^*abc/ + +/*abc/ + X*abcY + +/**abc/ + XabcY + X*abcY + X**abcY + +/*ab\(*cd\)/ + +/^b\(c^d\)\(^e^f\)/ + +/a***b/ + +# End of testinput24 diff --git a/testdata/testinput25 b/testdata/testinput25 new file mode 100644 index 0000000..f21d9ad --- /dev/null +++ b/testdata/testinput25 @@ -0,0 +1,18 @@ +# This file tests the auxiliary pattern conversion features of the PCRE2 +# library, in UTF mode. + +#newline_default lf any anycrlf + +# -------- Tests of glob conversion -------- + +# Set the glob separator explicitly so that different OS defaults are not a +# problem. Then test various errors. + +#pattern convert=glob,convert_glob_escape=\,convert_glob_separator=/ + +# The fact that this one works in 13 bytes in the 8-bit library shows that the +# output is in UTF-8, though pcre2test shows the character as an escape. + +/'>' c4 a3 '<'/hex,utf,convert_length=13 + +# End of testinput25 diff --git a/testdata/testinput4 b/testdata/testinput4 index ce9145d..0ef7b8e 100644 --- a/testdata/testinput4 +++ b/testdata/testinput4 @@ -567,7 +567,7 @@ /[[:^xdigit:]]/g,utf M\x{442} -/[^ABCDEFGHIJKLMNOPQRSTUVWXYZÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖØÙÚÛÜÝÞĀĂĄĆĈĊČĎĐĒĔĖĘĚĜĞĠĢĤĦĨĪĬĮİIJĴĶĹĻĽĿŁŃŅŇŊŌŎŐŒŔŖŘŚŜŞŠŢŤŦŨŪŬŮŰŲŴŶŸŹŻŽƁƂƄƆƇƉƊƋƎƏƐƑƓƔƖƗƘƜƝƟƠƢƤƦƧƩƬƮƯƱƲƳƵƷƸƼDŽLJNJǍǏǑǓǕǗǙǛǞǠǢǤǦǨǪǬǮDZǴǶǷǸǺǼǾȀȂȄȆȈȊȌȎȐȒȔȖȘȚȜȞȠȢȤȦȨȪȬȮȰȲȺȻȽȾɁΆΈΉΊΌΎΏΑΒΓΔΕΖΗΘΙΚΛΜΝΞΟΠΡΣΤΥΦΧΨΩΪΫϒϓϔϘϚϜϞϠϢϤϦϨϪϬϮϴϷϹϺϽϾϿЀЁЂЃЄЅІЇЈЉЊЋЌЍЎЏАБВГДЕЖЗИЙКЛМНОПРСТУФХЦЧШЩЪЫЬЭЮЯѠѢѤѦѨѪѬѮѰѲѴѶѸѺѼѾҀҊҌҎҐҒҔҖҘҚҜҞҠҢҤҦҨҪҬҮҰҲҴҶҸҺҼҾӀӁӃӅӇӉӋӍӐӒӔӖӘӚӜӞӠӢӤӦӨӪӬӮӰӲӴӶӸԀԂԄԆԈԊԌԎԱԲԳԴԵԶԷԸԹԺԻԼԽԾԿՀՁՂՃՄՅՆՇՈՉՊՋՌՍՎՏՐՑՒՓՔՕՖႠႡႢႣႤႥႦႧႨႩႪႫႬႭႮႯႰႱႲႳႴႵႶႷႸႹႺႻႼႽႾႿჀჁჂჃჄჅḀḂḄḆḈḊḌḎḐḒḔḖḘḚḜḞḠḢḤḦḨḪḬḮḰḲḴḶḸḺḼḾṀṂṄṆṈṊṌṎṐṒṔṖṘṚṜṞṠṢṤṦṨṪṬṮṰṲṴṶṸṺṼṾẀẂẄẆẈẊẌẎẐẒẔẠẢẤẦẨẪẬẮẰẲẴẶẸẺẼẾỀỂỄỆỈỊỌỎỐỒỔỖỘỚỜỞỠỢỤỦỨỪỬỮỰỲỴỶỸἈἉἊἋἌἍἎἏἘἙἚἛἜἝἨἩἪἫἬἭἮἯἸἹἺἻἼἽἾἿὈὉὊὋὌὍὙὛὝὟὨὩὪὫὬὭὮὯᾸᾹᾺΆῈΈῊΉῘῙῚΊῨῩῪΎῬῸΌῺΏabcdefghijklmnopqrstuvwxyzªµºßàáâãäåæçèéêëìíîïðñòóôõöøùúûüýþÿāăąćĉċčďđēĕėęěĝğġģĥħĩīĭįıijĵķĸĺļľŀłńņňʼnŋōŏőœŕŗřśŝşšţťŧũūŭůűųŵŷźżžſƀƃƅƈƌƍƒƕƙƚƛƞơƣƥƨƪƫƭưƴƶƹƺƽƾƿdžljnjǎǐǒǔǖǘǚǜǝǟǡǣǥǧǩǫǭǯǰdzǵǹǻǽǿȁȃȅȇȉȋȍȏȑȓȕȗșțȝȟȡȣȥȧȩȫȭȯȱȳȴȵȶȷȸȹȼȿɀɐɑɒɓɔɕɖɗɘəɚɛɜɝɞɟɠɡɢɣɤɥɦɧɨɩɪɫɬɭɮɯɰɱɲɳɴɵɶɷɸɹɺɻɼɽɾɿʀʁʂʃʄʅʆʇʈʉʊʋʌʍʎʏʐʑʒʓʔʕʖʗʘʙʚʛʜʝʞʟʠʡʢʣʤʥʦʧʨʩʪʫʬʭʮʯΐάέήίΰαβγδεζηθικλμνξοπρςστυφχψωϊϋόύώϐϑϕϖϗϙϛϝϟϡϣϥϧϩϫϭϯϰϱϲϳϵϸϻϼабвгдежзийклмнопрстуфхцчшщъыьэюяѐёђѓєѕіїјљњћќѝўџѡѣѥѧѩѫѭѯѱѳѵѷѹѻѽѿҁҋҍҏґғҕҗҙқҝҟҡңҥҧҩҫҭүұҳҵҷҹһҽҿӂӄӆӈӊӌӎӑӓӕӗәӛӝӟӡӣӥӧөӫӭӯӱӳӵӷӹԁԃԅԇԉԋԍԏաբգդեզէըթժիլխծկհձղճմյնշոչպջռսվտրցւփքօֆևᴀᴁᴂᴃᴄᴅᴆᴇᴈᴉᴊᴋᴌᴍᴎᴏᴐᴑᴒᴓᴔᴕᴖᴗᴘᴙᴚᴛᴜᴝᴞᴟᴠᴡᴢᴣᴤᴥᴦᴧᴨᴩᴪᴫᵢᵣᵤᵥᵦᵧᵨᵩᵪᵫᵬᵭᵮᵯᵰᵱᵲᵳᵴᵵᵶᵷᵹᵺᵻᵼᵽᵾᵿᶀᶁᶂᶃᶄᶅᶆᶇᶈᶉᶊᶋᶌᶍᶎᶏᶐᶑᶒᶓᶔᶕᶖᶗᶘᶙᶚḁḃḅḇḉḋḍḏḑḓḕḗḙḛḝḟḡḣḥḧḩḫḭḯḱḳḵḷḹḻḽḿṁṃṅṇṉṋṍṏṑṓṕṗṙṛṝṟṡṣṥṧṩṫṭṯṱṳṵṷṹṻṽṿẁẃẅẇẉẋẍẏẑẓẕẖẗẘẙẚẛạảấầẩẫậắằẳẵặẹẻẽếềểễệỉịọỏốồổỗộớờởỡợụủứừửữựỳỵỷỹἀἁἂἃἄἅἆἇἐἑἒἓἔἕἠἡἢἣἤἥἦἧἰἱἲἳἴἵἶἷὀὁὂὃὄὅὐὑὒὓὔὕὖὗὠὡὢὣὤὥὦὧὰάὲέὴήὶίὸόὺύὼώᾀᾁᾂᾃᾄᾅᾆᾇᾐᾑᾒᾓᾔᾕᾖᾗᾠᾡᾢᾣᾤᾥᾦᾧᾰᾱᾲᾳᾴᾶᾷιῂῃῄῆῇῐῑῒΐῖῗῠῡῢΰῤῥῦῧῲῳῴῶῷⲁⲃⲅⲇⲉⲋⲍⲏⲑⲓⲕⲗⲙⲛⲝⲟⲡⲣⲥⲧⲩⲫⲭⲯⲱⲳⲵⲷⲹⲻⲽⲿⳁⳃⳅⳇⳉⳋⳍⳏⳑⳓⳕⳗⳙⳛⳝⳟⳡⳣⳤⴀⴁⴂⴃⴄⴅⴆⴇⴈⴉⴊⴋⴌⴍⴎⴏⴐⴑⴒⴓⴔⴕⴖⴗⴘⴙⴚⴛⴜⴝⴞⴟⴠⴡⴢⴣⴤⴥfffiflffifflſtstﬓﬔﬕﬖﬗ\d-_^]/utf +/[^ABCDEFGHIJKLMNOPQRSTUVWXYZÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖØÙÚÛÜÝÞĀĂĄĆĈĊČĎĐĒĔĖĘĚĜĞĠĢĤĦĨĪĬĮİIJĴĶĹĻĽĿŁŃŅŇŊŌŎŐŒŔŖŘŚŜŞŠŢŤŦŨŪŬŮŰŲŴŶŸŹŻŽƁƂƄƆƇƉƊƋƎƏƐƑƓƔƖƗƘƜƝƟƠƢƤƦƧƩƬƮƯƱƲƳƵƷƸƼDŽLJNJǍǏǑǓǕǗǙǛǞǠǢǤǦǨǪǬǮDZǴǶǷǸǺǼǾȀȂȄȆȈȊȌȎȐȒȔȖȘȚȜȞȠȢȤȦȨȪȬȮȰȲȺȻȽȾɁΆΈΉΊΌΎΏΑΒΓΔΕΖΗΘΙΚΛΜΝΞΟΠΡΣΤΥΦΧΨΩΪΫϒϓϔϘϚϜϞϠϢϤϦϨϪϬϮϴϷϹϺϽϾϿЀЁЂЃЄЅІЇЈЉЊЋЌЍЎЏАБВГДЕЖЗИЙКЛМНОПРСТУФХЦЧШЩЪЫЬЭЮЯѠѢѤѦѨѪѬѮѰѲѴѶѸѺѼѾҀҊҌҎҐҒҔҖҘҚҜҞҠҢҤҦҨҪҬҮҰҲҴҶҸҺҼҾӀӁӃӅӇӉӋӍӐӒӔӖӘӚӜӞӠӢӤӦӨӪӬӮӰӲӴӶӸԀԂԄԆԈԊԌԎԱԲԳԴԵԶԷԸԹԺԻԼԽԾԿՀՁՂՃՄՅՆՇՈՉՊՋՌՍՎՏՐՑՒՓՔՕՖႠႡႢႣႤႥႦႧႨႩႪႫႬႭႮႯႰႱႲႳႴႵႶႷႸႹႺႻႼႽႾႿჀჁჂჃჄჅḀḂḄḆḈḊḌḎḐḒḔḖḘḚḜḞḠḢḤḦḨḪḬḮḰḲḴḶḸḺḼḾṀṂṄṆṈṊṌṎṐṒṔṖṘṚṜṞṠṢṤṦṨṪṬṮṰṲṴṶṸṺṼṾẀẂẄẆẈẊẌẎẐẒẔẠẢẤẦẨẪẬẮẰẲẴẶẸẺẼẾỀỂỄỆỈỊỌỎỐỒỔỖỘỚỜỞỠỢỤỦỨỪỬỮỰỲỴỶỸἈἉἊἋἌἍἎἏἘἙἚἛἜἝἨἩἪἫἬἭἮἯἸἹἺἻἼἽἾἿὈὉὊὋὌὍὙὛὝὟὨὩὪὫὬὭὮὯᾸᾹᾺΆῈΈῊΉῘῙῚΊῨῩῪΎῬῸΌῺΏabcdefghijklmnopqrstuvwxyzªµºßàáâãäåæçèéêëìíîïðñòóôõöøùúûüýþÿāăąćĉċčďđēĕėęěĝğġģĥħĩīĭįıijĵķĸĺļľŀłńņňʼnŋōŏőœŕŗřśŝşšţťŧũūŭůűųŵŷźżžſƀƃƅƈƌƍƒƕƙƚƛƞơƣƥƨƪƫƭưƴƶƹƺƽƾƿdžljnjǎǐǒǔǖǘǚǜǝǟǡǣǥǧǩǫǭǯǰdzǵǹǻǽǿȁȃȅȇȉȋȍȏȑȓȕȗșțȝȟȡȣȥȧȩȫȭȯȱȳȴȵȶȷȸȹȼȿɀɐɑɒɓɔɕɖɗɘəɚɛɜɝɞɟɠɡɢɣɤɥɦɧɨɩɪɫɬɭɮɯɰɱɲɳɴɵɶɷɸɹɺɻɼɽɾɿʀʁʂʃʄʅʆʇʈʉʊʋʌʍʎʏʐʑʒʓʔʕʖʗʘʙʚʛʜʝʞʟʠʡʢʣʤʥʦʧʨʩʪʫʬʭʮʯΐάέήίΰαβγδεζηθικλμνξοπρςστυφχψωϊϋόύώϐϑϕϖϗϙϛϝϟϡϣϥϧϩϫϭϯϰϱϲϳϵϸϻϼабвгдежзийклмнопрстуфхцчшщъыьэюяѐёђѓєѕіїјљњћќѝўџѡѣѥѧѩѫѭѯѱѳѵѷѹѻѽѿҁҋҍҏґғҕҗҙқҝҟҡңҥҧҩҫҭүұҳҵҷҹһҽҿӂӄӆӈӊӌӎӑӓӕӗәӛӝӟӡӣӥӧөӫӭӯӱӳӵӷӹԁԃԅԇԉԋԍԏաբգդեզէըթժիլխծկհձղճմյնշոչպջռսվտրցւփքօֆևᴀᴁᴂᴃᴄᴅᴆᴇᴈᴉᴊᴋᴌᴍᴎᴏᴐᴑᴒᴓᴔᴕᴖᴗᴘᴙᴚᴛᴜᴝᴞᴟᴠᴡᴢᴣᴤᴥᴦᴧᴨᴩᴪᴫᵢᵣᵤᵥᵦᵧᵨᵩᵪᵫᵬᵭᵮᵯᵰᵱᵲᵳᵴᵵᵶᵷᵹᵺᵻᵼᵽᵾᵿᶀᶁᶂᶃᶄᶅᶆᶇᶈᶉᶊᶋᶌᶍᶎᶏᶐᶑᶒᶓᶔᶕᶖᶗᶘᶙᶚḁḃḅḇḉḋḍḏḑḓḕḗḙḛḝḟḡḣḥḧḩḫḭḯḱḳḵḷḹḻḽḿṁṃṅṇṉṋṍṏṑṓṕṗṙṛṝṟṡṣṥṧṩṫṭṯṱṳṵṷṹṻṽṿẁẃẅẇẉẋẍẏẑẓẕẖẗẘẙẚẛạảấầẩẫậắằẳẵặẹẻẽếềểễệỉịọỏốồổỗộớờởỡợụủứừửữựỳỵỷỹἀἁἂἃἄἅἆἇἐἑἒἓἔἕἠἡἢἣἤἥἦἧἰἱἲἳἴἵἶἷὀὁὂὃὄὅὐὑὒὓὔὕὖὗὠὡὢὣὤὥὦὧὰάὲέὴήὶίὸόὺύὼώᾀᾁᾂᾃᾄᾅᾆᾇᾐᾑᾒᾓᾔᾕᾖᾗᾠᾡᾢᾣᾤᾥᾦᾧᾰᾱᾲᾳᾴᾶᾷιῂῃῄῆῇῐῑῒΐῖῗῠῡῢΰῤῥῦῧῲῳῴῶῷⲁⲃⲅⲇⲉⲋⲍⲏⲑⲓⲕⲗⲙⲛⲝⲟⲡⲣⲥⲧⲩⲫⲭⲯⲱⲳⲵⲷⲹⲻⲽⲿⳁⳃⳅⳇⳉⳋⳍⳏⳑⳓⳕⳗⳙⳛⳝⳟⳡⳣⳤⴀⴁⴂⴃⴄⴅⴆⴇⴈⴉⴊⴋⴌⴍⴎⴏⴐⴑⴒⴓⴔⴕⴖⴗⴘⴙⴚⴛⴜⴝⴞⴟⴠⴡⴢⴣⴤⴥfffiflffifflſtstﬓﬔﬕﬖﬗ\d_^]/utf /^[^d]*?$/ abc @@ -1627,6 +1627,11 @@ /[z\x{1f88}]+/i,utf \x{1f88}\x{1f80} +# Check a reference with more than one other case + +/^(\x{00b5})\1{2}$/i,utf + \x{00b5}\x{039c}\x{03bc} + # Characters with more than one other case; test in classes /[z\x{00b5}]+/i,utf @@ -2282,4 +2287,18 @@ \x{389} \x{20ac} +/(?=.*b)\pL/ + 11bb + +/(?(?=.*b)(?=.*b)\pL|.*c)/ + 11bb + +/^\x{123}+?$/utf,no_auto_possess + \x{123}\x{123}\x{123} + +/^\x{123}+?$/i,utf,no_auto_possess + \x{123}\x{122}\x{123} +\= Expect no match + \x{123}\x{124}\x{123} + # End of testinput4 diff --git a/testdata/testinput5 b/testdata/testinput5 index 2e13a7c..0366136 100644 --- a/testdata/testinput5 +++ b/testdata/testinput5 @@ -3,17 +3,21 @@ # results in 8-bit, 16-bit, and 32-bit modes are excluded (see tests 10 and # 12). +#newline_default lf any anycrlf + # PCRE2 and Perl disagree about the characteristics of certain Unicode -# characters. For example, 061C is considered by Perl to be Arabic, though -# is it not listed as such in the Unicode Scripts.txt file, and 2066-2069 are -# graphic and printable according to Perl, though they are actually "isolate" -# control characters. That is why the following tests are here rather than in -# test 4. +# characters. For example, 061C was considered by Perl to be Arabic, though +# it was not listed as such in the Unicode Scripts.txt file for Unicode 8. +# However, it *is* in that file for Unicode 10, but when I came to re-check, +# Perl had changed in the meantime, with 5.026 not recognizing it as Arabic. + +# 2066-2069 are graphic and printable according to Perl, though they are +# actually "isolate" control characters. That is why the following tests are +# here rather than in test 4. /^[\p{Arabic}]/utf -\= Expect no match \x{061c} - + /^[[:graph:]]+$/utf,ucp \= Expect no match \x{61c} @@ -37,14 +41,14 @@ /^[[:^print:]]+$/utf,ucp \x{09}\x{1D}\x{85}\x{61c}\x{2028}\x{2029}\x{2065}\x{2066}\x{2067} \x{2068}\x{2069} - + # Perl does not consider U+180e to be a space character. It is true that it # does not appear in the Unicode PropList.txt file as such, but in many other # sources it is listed as a space, and has been treated as such in PCRE for -# a long time. +# a long time. /^>[[:blank:]]*/utf,ucp - >\x{20}\x{a0}\x{1680}\x{180e}\x{2000}\x{202f}\x{9}\x{b}\x{2028} + >\x{20}\x{a0}\x{1680}\x{180e}\x{2000}\x{202f}\x{9}\x{b}\x{2028} /^A\s+Z/utf,ucp A\x{85}\x{180e}\x{2005}Z @@ -52,7 +56,7 @@ /^A[\s]+Z/utf,ucp A\x{2005}Z A\x{85}\x{2005}Z - + /^[[:graph:]]+$/utf,ucp \= Expect no match \x{180e} @@ -104,7 +108,7 @@ /\x{0041}\x{2262}\x{0391}\x{002e}/IB,utf \x{0041}\x{2262}\x{0391}\x{002e} - + /.{3,5}X/IB,utf \x{212ab}\x{212ab}\x{212ab}\x{861}X @@ -116,23 +120,16 @@ \= Expect no match c \x{ff} - \x{100} + \x{100} -/^[^ab]/IB,utf - c - \x{ff} - \x{100} -\= Expect no match - aaa - /\x{100}*(\d+|"(?1)")/utf 1234 - "1234" + "1234" \x{100}1234 - "\x{100}1234" - \x{100}\x{100}12ab - \x{100}\x{100}"12" -\= Expect no match + "\x{100}1234" + \x{100}\x{100}12ab + \x{100}\x{100}"12" +\= Expect no match \x{100}\x{100}abcd /\x{100}*/IB,utf @@ -148,7 +145,7 @@ \x{104} \= Expect no match \x{105} - \x{ff} + \x{ff} /[\xFF]/IB >\xff< @@ -158,18 +155,18 @@ /[Ä-Ü]/utf Ö # Matches without Study \x{d6} - + /[Ä-Ü]/utf Ö <-- Same with Study \x{d6} - + /[\x{c4}-\x{dc}]/utf Ö # Matches without Study - \x{d6} + \x{d6} /[\x{c4}-\x{dc}]/utf Ö <-- Same with Study - \x{d6} + \x{d6} /[^\x{100}]abc(xyz(?1))/IB,utf @@ -183,19 +180,22 @@ /\W/utf A.B - A\x{100}B - + A\x{100}B + /\w/utf - \x{100}X + \x{100}X -/^\ሴ/IB,utf +# Use no_start_optimize because the first code unit is different in 8-bit from +# the wider modes. + +/^\ሴ/IB,utf,no_start_optimize /()()()()()()()()()() ()()()()()()()()()() ()()()()()()()()()() ()()()()()()()()()() A (x) (?41) B/x,utf - AxxB + AxxB /^[\x{100}\E-\Q\E\x{150}]/B,utf @@ -213,11 +213,11 @@ a\r\nb a\x0bb a\x0cb - a\x{85}b - a\x{2028}b - a\x{2029}b + a\x{85}b + a\x{2028}b + a\x{2029}b \= Expect no match - a\n\rb + a\n\rb /^a\R*b/bsr=unicode,utf ab @@ -226,9 +226,9 @@ a\r\nb a\x0bb a\x0c\x{2028}\x{2029}b - a\x{85}b - a\n\rb - a\n\r\x{85}\x0cb + a\x{85}b + a\n\rb + a\n\r\x{85}\x0cb /^a\R+b/bsr=unicode,utf a\nb @@ -236,20 +236,20 @@ a\r\nb a\x0bb a\x0c\x{2028}\x{2029}b - a\x{85}b - a\n\rb - a\n\r\x{85}\x0cb + a\x{85}b + a\n\rb + a\n\r\x{85}\x0cb \= Expect no match - ab + ab /^a\R{1,3}b/bsr=unicode,utf a\nb a\n\rb a\n\r\x{85}b - a\r\n\r\nb - a\r\n\r\n\r\nb + a\r\n\r\nb + a\r\n\r\n\r\nb a\n\r\n\rb - a\n\n\r\nb + a\n\n\r\nb \= Expect no match a\n\n\n\rb a\r @@ -258,28 +258,28 @@ X X\x0a X\x09X\x0b \= Expect no match - \x{a0} X\x0a - + \x{a0} X\x0a + /\H*\h+\V?\v{3,4}/utf \x09\x20\x{a0}X\x0a\x0b\x0c\x0d\x0a \x09\x20\x{a0}\x0a\x0b\x0c\x0d\x0a \x09\x20\x{a0}\x0a\x0b\x0c -\= Expect no match +\= Expect no match \x09\x20\x{a0}\x0a\x0b - + /\H\h\V\v/utf \x{3001}\x{3000}\x{2030}\x{2028} X\x{180e}X\x{85} \= Expect no match - \x{2009} X\x0a - + \x{2009} X\x0a + /\H*\h+\V?\v{3,4}/utf \x{1680}\x{180e}\x{2007}X\x{2028}\x{2029}\x0c\x0d\x0a \x09\x{205f}\x{a0}\x0a\x{2029}\x0c\x{2028}\x0a \x09\x20\x{202f}\x0a\x0b\x0c -\= Expect no match +\= Expect no match \x09\x{200a}\x{a0}\x{2028}\x0b - + /[\h]/B,utf >\x{1680} @@ -293,57 +293,57 @@ /[\V]/B,utf /.*$/newline=any,utf - \x{1ec5} - + \x{1ec5} + /a\Rb/I,bsr=anycrlf,utf a\rb a\nb a\r\nb \= Expect no match a\x{85}b - a\x0bb + a\x0bb /a\Rb/I,bsr=unicode,utf a\rb a\nb a\r\nb a\x{85}b - a\x0bb - + a\x0bb + /a\R?b/I,bsr=anycrlf,utf a\rb a\nb a\r\nb \= Expect no match a\x{85}b - a\x0bb + a\x0bb /a\R?b/I,bsr=unicode,utf a\rb a\nb a\r\nb a\x{85}b - a\x0bb - + a\x0bb + /.*a.*=.b.*/utf,newline=any QQQ\x{2029}ABCaXYZ=!bPQR \= Expect no match a\x{2029}b - \x61\xe2\x80\xa9\x62 + \x61\xe2\x80\xa9\x62 /[[:a\x{100}b:]]/utf /a[^]b/utf,alt_bsux,allow_empty_class,match_unset_backref a\x{1234}b - a\nb + a\nb \= Expect no match - ab - + ab + /a[^]+b/utf,alt_bsux,allow_empty_class,match_unset_backref aXb - a\nX\nX\x{1234}b + a\nX\nX\x{1234}b \= Expect no match - ab + ab /(\x{de})\1/ \x{de}\x{de} @@ -357,42 +357,42 @@ Xaa\=ps Xaaa\=ps Xaaaa\=ps - + /Xa{2,4}?b/utf X\=ps Xa\=ps Xaa\=ps Xaaa\=ps Xaaaa\=ps - + /Xa{2,4}+b/utf X\=ps Xa\=ps Xaa\=ps Xaaa\=ps Xaaaa\=ps - + /X\x{123}{2,4}b/utf X\=ps X\x{123}\=ps X\x{123}\x{123}\=ps X\x{123}\x{123}\x{123}\=ps X\x{123}\x{123}\x{123}\x{123}\=ps - + /X\x{123}{2,4}?b/utf X\=ps X\x{123}\=ps X\x{123}\x{123}\=ps X\x{123}\x{123}\x{123}\=ps X\x{123}\x{123}\x{123}\x{123}\=ps - + /X\x{123}{2,4}+b/utf X\=ps X\x{123}\=ps X\x{123}\x{123}\=ps X\x{123}\x{123}\x{123}\=ps X\x{123}\x{123}\x{123}\x{123}\=ps - + /X\x{123}{2,4}b/utf \= Expect no match Xx\=ps @@ -400,7 +400,7 @@ X\x{123}\x{123}x\=ps X\x{123}\x{123}\x{123}x\=ps X\x{123}\x{123}\x{123}\x{123}x\=ps - + /X\x{123}{2,4}?b/utf \= Expect no match Xx\=ps @@ -408,7 +408,7 @@ X\x{123}\x{123}x\=ps X\x{123}\x{123}\x{123}x\=ps X\x{123}\x{123}\x{123}\x{123}x\=ps - + /X\x{123}{2,4}+b/utf \= Expect no match Xx\=ps @@ -416,21 +416,21 @@ X\x{123}\x{123}x\=ps X\x{123}\x{123}\x{123}x\=ps X\x{123}\x{123}\x{123}\x{123}x\=ps - + /X\d{2,4}b/utf X\=ps X3\=ps X33\=ps X333\=ps X3333\=ps - + /X\d{2,4}?b/utf X\=ps X3\=ps X33\=ps X333\=ps X3333\=ps - + /X\d{2,4}+b/utf X\=ps X3\=ps @@ -444,14 +444,14 @@ Xaa\=ps Xaaa\=ps Xaaaa\=ps - + /X\D{2,4}?b/utf X\=ps Xa\=ps Xaa\=ps Xaaa\=ps Xaaaa\=ps - + /X\D{2,4}+b/utf X\=ps Xa\=ps @@ -465,14 +465,14 @@ X\x{123}\x{123}\=ps X\x{123}\x{123}\x{123}\=ps X\x{123}\x{123}\x{123}\x{123}\=ps - + /X\D{2,4}?b/utf X\=ps X\x{123}\=ps X\x{123}\x{123}\=ps X\x{123}\x{123}\x{123}\=ps X\x{123}\x{123}\x{123}\x{123}\=ps - + /X\D{2,4}+b/utf X\=ps X\x{123}\=ps @@ -486,14 +486,14 @@ Xaa\=ps Xaaa\=ps Xaaaa\=ps - + /X[abc]{2,4}?b/utf X\=ps Xa\=ps Xaa\=ps Xaaa\=ps Xaaaa\=ps - + /X[abc]{2,4}+b/utf X\=ps Xa\=ps @@ -507,14 +507,14 @@ X\x{123}\x{123}\=ps X\x{123}\x{123}\x{123}\=ps X\x{123}\x{123}\x{123}\x{123}\=ps - + /X[abc\x{123}]{2,4}?b/utf X\=ps X\x{123}\=ps X\x{123}\x{123}\=ps X\x{123}\x{123}\x{123}\=ps X\x{123}\x{123}\x{123}\x{123}\=ps - + /X[abc\x{123}]{2,4}+b/utf X\=ps X\x{123}\=ps @@ -528,14 +528,14 @@ Xzz\=ps Xzzz\=ps Xzzzz\=ps - + /X[^a]{2,4}?b/utf X\=ps Xz\=ps Xzz\=ps Xzzz\=ps Xzzzz\=ps - + /X[^a]{2,4}+b/utf X\=ps Xz\=ps @@ -549,14 +549,14 @@ X\x{123}\x{123}\=ps X\x{123}\x{123}\x{123}\=ps X\x{123}\x{123}\x{123}\x{123}\=ps - + /X[^a]{2,4}?b/utf X\=ps X\x{123}\=ps X\x{123}\x{123}\=ps X\x{123}\x{123}\x{123}\=ps X\x{123}\x{123}\x{123}\x{123}\=ps - + /X[^a]{2,4}+b/utf X\=ps X\x{123}\=ps @@ -570,14 +570,14 @@ YXYY\=ps YXYYY\=ps YXYYYY\=ps - + /(Y)X\1{2,4}?b/utf YX\=ps YXY\=ps YXYY\=ps YXYYY\=ps YXYYYY\=ps - + /(Y)X\1{2,4}+b/utf YX\=ps YXY\=ps @@ -591,14 +591,14 @@ \x{123}X\x{123}\x{123}\=ps \x{123}X\x{123}\x{123}\x{123}\=ps \x{123}X\x{123}\x{123}\x{123}\x{123}\=ps - + /(\x{123})X\1{2,4}?b/utf \x{123}X\=ps \x{123}X\x{123}\=ps \x{123}X\x{123}\x{123}\=ps \x{123}X\x{123}\x{123}\x{123}\=ps \x{123}X\x{123}\x{123}\x{123}\x{123}\=ps - + /(\x{123})X\1{2,4}+b/utf \x{123}X\=ps \x{123}X\x{123}\=ps @@ -640,13 +640,13 @@ AB\x{a0}xxx\x{85}XYZ /\S \S/utf,tables=2 - \x{a2} \x{84} + \x{a2} \x{84} 'A#хц'Bx,newline=any,utf 'A#хц PQ'Bx,newline=any,utf - + /a+#хaa z#XX?/Bx,newline=any,utf @@ -661,13 +661,13 @@ /(\R*)(.)/s,utf \r\n - \r\r\n\n\r - \r\r\n\n\r\n + \r\r\n\n\r + \r\r\n\n\r\n /(\R)*(.)/s,utf \r\n - \r\r\n\n\r - \r\r\n\n\r\n + \r\r\n\n\r + \r\r\n\n\r\n /[^\x{1234}]+/Ii,utf @@ -688,7 +688,7 @@ /f.*/s,utf for\=ph - + /\x{d7ff}\x{e000}/utf /\x{d800}/utf @@ -777,7 +777,7 @@ /./utf,newline=crlf \r\=ps \r\=ph - + /.{2,3}/utf,newline=crlf \r\=ps \r\=ph @@ -835,9 +835,9 @@ /[\p{Nd}+-]+/IB,utf 1234 12-34 - 12+\x{661}-34 + 12+\x{661}-34 \= Expect no match - abcd + abcd /(?:[\PPa*]*){8,}/ @@ -884,7 +884,7 @@ /\p{Zl}{2,3}+/B,utf \x{2028}\x{2028}\x{2028} - + /\p{Zl}/B,utf /\p{Lu}{3}+/B,utf @@ -904,8 +904,8 @@ /^\p{Cs}/utf \x{dfff}\=no_utf_check \= Expect no match - \x{09f} - + \x{09f} + /^\p{Mn}/utf \x{1a1b} @@ -923,60 +923,60 @@ \= Expect no match X \x{2c2} - + /^\p{Zs}/utf \ \ \x{a0} \x{1680} \x{2000} - \x{2001} + \x{2001} \= Expect no match \x{2028} - \x{200d} - + \x{200d} + # These are here because Perl has problems with the negative versions of the # properties and has changed how it behaves for caseless matching. - + /\p{^Lu}/i,utf 1234 \= Expect no match - ABC + ABC /\P{Lu}/i,utf 1234 \= Expect no match - ABC + ABC /\p{Ll}/i,utf a Az \= Expect no match - ABC + ABC /\p{Lu}/i,utf A - a\x{10a0}B -\= Expect no match + a\x{10a0}B +\= Expect no match a - \x{1d00} + \x{1d00} /\p{Lu}/i,utf A aZ \= Expect no match - abc + abc /[\x{c0}\x{391}]/i,utf \x{c0} - \x{e0} + \x{e0} # The next two are special cases where the lengths of the different cases of # the same character differ. The first went wrong with heap frame storage; the -# second was broken in all cases. +# second was broken in all cases. /^\x{023a}+?(\x{0130}+)/i,utf \x{023a}\x{2c65}\x{0130} - + /^\x{023a}+([^X])/i,utf \x{023a}\x{2c65}X @@ -994,71 +994,71 @@ /^\x{c0}$/i,utf \x{c0} - \x{e0} + \x{e0} /^\x{e0}$/i,utf \x{c0} - \x{e0} + \x{e0} # The next two should be Perl-compatible, but it fails to match \x{e0}. PCRE # will match it only with UCP support, because without that it has no notion -# of case for anything other than the ASCII letters. +# of case for anything other than the ASCII letters. /((?i)[\x{c0}])/utf \x{c0} - \x{e0} + \x{e0} /(?i:[\x{c0}])/utf \x{c0} - \x{e0} + \x{e0} -# These are PCRE's extra properties to help with Unicodizing \d etc. +# These are PCRE's extra properties to help with Unicodizing \d etc. /^\p{Xan}/utf ABCD 1234 \x{6ca} \x{a6c} - \x{10a7} + \x{10a7} \= Expect no match - _ABC + _ABC /^\p{Xan}+/utf ABCD1234\x{6ca}\x{a6c}\x{10a7}_ \= Expect no match - _ABC + _ABC /^\p{Xan}+?/utf \x{6ca}\x{a6c}\x{10a7}_ /^\p{Xan}*/utf ABCD1234\x{6ca}\x{a6c}\x{10a7}_ - + /^\p{Xan}{2,9}/utf ABCD1234\x{6ca}\x{a6c}\x{10a7}_ - + /^\p{Xan}{2,9}?/utf \x{6ca}\x{a6c}\x{10a7}_ - + /^[\p{Xan}]/utf ABCD1234_ 1234abcd_ \x{6ca} \x{a6c} - \x{10a7} + \x{10a7} \= Expect no match - _ABC - + _ABC + /^[\p{Xan}]+/utf ABCD1234\x{6ca}\x{a6c}\x{10a7}_ \= Expect no match - _ABC + _ABC /^>\p{Xsp}/utf >\x{1680}\x{2028}\x{0b} - >\x{a0} + >\x{a0} \= Expect no match - \x{0b} + \x{0b} /^>\p{Xsp}+/utf > \x{09}\x{0a}\x{0c}\x{0d}\x{a0}\x{1680}\x{2028}\x{0b} @@ -1068,24 +1068,24 @@ /^>\p{Xsp}*/utf > \x{09}\x{0a}\x{0c}\x{0d}\x{a0}\x{1680}\x{2028}\x{0b} - + /^>\p{Xsp}{2,9}/utf > \x{09}\x{0a}\x{0c}\x{0d}\x{a0}\x{1680}\x{2028}\x{0b} - + /^>\p{Xsp}{2,9}?/utf > \x{09}\x{0a}\x{0c}\x{0d}\x{a0}\x{1680}\x{2028}\x{0b} - + /^>[\p{Xsp}]/utf >\x{2028}\x{0b} - + /^>[\p{Xsp}]+/utf > \x{09}\x{0a}\x{0c}\x{0d}\x{a0}\x{1680}\x{2028}\x{0b} /^>\p{Xps}/utf >\x{1680}\x{2028}\x{0b} - >\x{a0} + >\x{a0} \= Expect no match - \x{0b} + \x{0b} /^>\p{Xps}+/utf > \x{09}\x{0a}\x{0c}\x{0d}\x{a0}\x{1680}\x{2028}\x{0b} @@ -1095,16 +1095,16 @@ /^>\p{Xps}*/utf > \x{09}\x{0a}\x{0c}\x{0d}\x{a0}\x{1680}\x{2028}\x{0b} - + /^>\p{Xps}{2,9}/utf > \x{09}\x{0a}\x{0c}\x{0d}\x{a0}\x{1680}\x{2028}\x{0b} - + /^>\p{Xps}{2,9}?/utf > \x{09}\x{0a}\x{0c}\x{0d}\x{a0}\x{1680}\x{2028}\x{0b} - + /^>[\p{Xps}]/utf >\x{2028}\x{0b} - + /^>[\p{Xps}]+/utf > \x{09}\x{0a}\x{0c}\x{0d}\x{a0}\x{1680}\x{2028}\x{0b} @@ -1114,9 +1114,9 @@ \x{6ca} \x{a6c} \x{10a7} - _ABC + _ABC \= Expect no match - [] + [] /^\p{Xwd}+/utf ABCD1234\x{6ca}\x{a6c}\x{10a7}_ @@ -1126,32 +1126,32 @@ /^\p{Xwd}*/utf ABCD1234\x{6ca}\x{a6c}\x{10a7}_ - + /^\p{Xwd}{2,9}/utf A_B12\x{6ca}\x{a6c}\x{10a7} - + /^\p{Xwd}{2,9}?/utf \x{6ca}\x{a6c}\x{10a7}_ - + /^[\p{Xwd}]/utf ABCD1234_ 1234abcd_ \x{6ca} \x{a6c} - \x{10a7} - _ABC + \x{10a7} + _ABC \= Expect no match - [] - + [] + /^[\p{Xwd}]+/utf ABCD1234\x{6ca}\x{a6c}\x{10a7}_ -# A check not in UTF-8 mode +# A check not in UTF-8 mode /^[\p{Xwd}]+/ ABCD1234_ - -# Some negative checks + +# Some negative checks /^[\P{Xwd}]+/utf !.+\x{019}\x{35a}AB @@ -1221,53 +1221,53 @@ /[[:xdigit:]]/B,ucp -# Unicode properties for \b abd \B +# Unicode properties for \b abd \B /\b...\B/utf,ucp abc_ - \x{37e}abc\x{376} - \x{37e}\x{376}\x{371}\x{393}\x{394} - !\x{c0}++\x{c1}\x{c2} - !\x{c0}+++++ + \x{37e}abc\x{376} + \x{37e}\x{376}\x{371}\x{393}\x{394} + !\x{c0}++\x{c1}\x{c2} + !\x{c0}+++++ -# Without PCRE_UCP, non-ASCII always fail, even if < 256 +# Without PCRE_UCP, non-ASCII always fail, even if < 256 /\b...\B/utf abc_ -\= Expect no match - \x{37e}abc\x{376} - \x{37e}\x{376}\x{371}\x{393}\x{394} - !\x{c0}++\x{c1}\x{c2} - !\x{c0}+++++ +\= Expect no match + \x{37e}abc\x{376} + \x{37e}\x{376}\x{371}\x{393}\x{394} + !\x{c0}++\x{c1}\x{c2} + !\x{c0}+++++ -# With PCRE_UCP, non-UTF8 chars that are < 256 still check properties +# With PCRE_UCP, non-UTF8 chars that are < 256 still check properties /\b...\B/ucp abc_ - !\x{c0}++\x{c1}\x{c2} - !\x{c0}+++++ + !\x{c0}++\x{c1}\x{c2} + !\x{c0}+++++ -# Some of these are silly, but they check various combinations +# Some of these are silly, but they check various combinations /[[:^alpha:][:^cntrl:]]+/B,utf,ucp 123 - abc + abc /[[:^cntrl:][:^alpha:]]+/B,utf,ucp 123 - abc + abc /[[:alpha:]]+/B,utf,ucp abc /[[:^alpha:]\S]+/B,utf,ucp 123 - abc + abc /[^\d]+/B,utf,ucp abc123 abc\x{123} - \x{660}abc + \x{660}abc /\p{Lu}+9\p{Lu}+B\p{Lu}+b/B @@ -1287,7 +1287,7 @@ /A+\p{N}A+\dB+\p{N}*B+\d*/B,ucp -# These behaved oddly in Perl, so they are kept in this test +# These behaved oddly in Perl, so they are kept in this test /(\x{23a}\x{23a}\x{23a})?\1/i,utf \= Expect no match @@ -1319,43 +1319,43 @@ /(\x{2c65}\x{2c65})\1/i,utf \x{2c65}\x{2c65}\x{23a}\x{23a} - + /(ⱥⱥ)\1/i,utf - ⱥⱥȺȺ - + ⱥⱥȺȺ + /(\x{23a}\x{23a}\x{23a})\1Y/i,utf X\x{23a}\x{23a}\x{23a}\x{2c65}\x{2c65}\x{2c65}YZ /(\x{2c65}\x{2c65})\1Y/i,utf X\x{2c65}\x{2c65}\x{23a}\x{23a}YZ -# These scripts weren't yet in Perl when I added Unicode 6.0.0 to PCRE +# These scripts weren't yet in Perl when I added Unicode 6.0.0 to PCRE /^[\p{Batak}]/utf \x{1bc0} \x{1bff} \= Expect no match \x{1bf4} - + /^[\p{Brahmi}]/utf \x{11000} \x{1106f} \= Expect no match \x{1104e} - + /^[\p{Mandaic}]/utf \x{840} \x{85e} \= Expect no match \x{85c} - \x{85d} + \x{85d} /(\X*)(.)/s,utf A\x{300} /^S(\X*)e(\X*)$/utf Stéréo - + /^\X/utf ́réo @@ -1387,8 +1387,8 @@ aa\=ps aa\=ph aba\=ps - -# These Unicode 6.1.0 scripts are not known to Perl. + +# These Unicode 6.1.0 scripts are not known to Perl. /\p{Chakma}\d/utf,ucp \x{11100}\x{1113c} @@ -1403,7 +1403,7 @@ A\x{300}\x{301}\=ph A\x{301}\=ps A\x{301}\=ph - + /^\X{2,3}/utf A\=ps A\=ph @@ -1419,7 +1419,7 @@ AA\=ph A\x{300}\x{301}A\x{300}\x{301}\=ps A\x{300}\x{301}A\x{300}\x{301}\=ph - + /^\X+/utf AA\=ps AA\=ph @@ -1486,8 +1486,8 @@ /is{2}t/i,utf \= Expect no match iskt - -# This property is a PCRE special + +# This property is a PCRE special /^\p{Xuc}/utf $abc @@ -1495,7 +1495,7 @@ `abc \x{1234}abc \= Expect no match - abc + abc /^\p{Xuc}+/utf $@`\x{a0}\x{1234}\x{e000}** @@ -1552,8 +1552,8 @@ @abc `abc \x{1234}abc - -# Some auto-possessification tests + +# Some auto-possessification tests /\pN+\z/B @@ -1603,7 +1603,7 @@ /\p{Xan}+\p{Nd} \P{Xan}+\p{Nd} \p{Xan}+\P{Nd} \P{Xan}+\P{Nd}/Bx,ucp -# End auto-possessification tests +# End auto-possessification tests /\w+/B,utf,ucp,auto_callout abcd @@ -1623,7 +1623,7 @@ /\d+\s{0,5}=\s*\S?=\w{0,4}\W*/B,utf,ucp /[RST]+/Bi,utf,ucp - + /[R-T]+/Bi,utf,ucp /[Q-U]+/Bi,utf,ucp @@ -1636,7 +1636,7 @@ /\x{100}\x{200}\K\x{300}/utf,startchar \x{100}\x{200}\x{300} - + # Test UTF characters in a substitution /ábc/utf,replace=XሴZ @@ -1675,15 +1675,6 @@ /((?<digit>\d)|(?<letter>\p{L}))/g,substitute_extended,replace=<${digit:+digit; :not digit; }${letter:+letter:not a letter}> ab12cde -/[\W\p{Any}]/B - abc - 123 - -/[\W\pL]/B - abc -\= Expect no match - 123 - /(*UCP)(*UTF)[[:>:]]X/B /abc/utf,replace=xyz @@ -1704,7 +1695,7 @@ /[^[:ascii:]\W]/utf,ucp,bincode \x{de} \x{200} -\= Expect no match +\= Expect no match \x{300} \x{37e} @@ -1716,4 +1707,357 @@ /(*UTF)C\x09((?<!'(?x)!*H? #\xcc\x9a[^$]/ -# End of testinput5 +/[\D]/utf + \x{1d7cf} + +/[\D\P{Nd}]/utf + \x{1d7cf} + +/[^\D]/utf + a9b +\= Expect no match + \x{1d7cf} + +/[^\D\P{Nd}]/utf + a9b + \x{1d7cf} +\= Expect no match + \x{10000} + +# Hex uses pattern length, not zero-terminated. This tests for overrunning +# the given length of a pattern. + +/'(*UTF)'/hex + +/'#('/hex,extended,utf + +/a(?<=A\XB)/utf + +/ab(?<=A\RB)/utf + +/../utf,auto_callout + \n\x{123}\x{123}\x{123}\x{123} + +# This tests processing wide characters in extended mode. + +/XȀ/x,utf + +# These three test a bug fix that was not clearing up after a locale setting +# when the test or a subsequent one matched a wide character. + +//locale=C + +/[\P{Yi}]/utf +\x{2f000} + +/[\P{Yi}]/utf,locale=C +\x{2f000} + +/^(?<!(?=))/B,utf + +# Horizontal and vertical space lists ignore caseless + +/[\HH]/Bi,utf + +/[^\HH]/Bi,utf + +//g,utf + \=zero_terminate + +/^(?1)\p{Nd}{3}(a)/ + a123a + +/\p{Nd}{0,3}[\pL](*:abc)(?C1)xxx/callout_info + +# --------------------------------------------------------------------------- + +# A bunch of tests that hit lines of code that others do not (at least when +# these were created). + +/^[^a]{3,}?x/i,utf,no_start_optimize,no_auto_possess +\= Expect no match + bbb + cc + +/^[ac]{3,}?x/i,utf,no_start_optimize,no_auto_possess +\= Expect no match + aaa\x{100} + +/^X\X/no_start_optimize,no_auto_possess +\= Expect no match + X + +/^X\p{L&}+?/no_start_optimize,no_auto_possess +\= Expect no match + X + +/^X\p{L}+?/no_start_optimize,no_auto_possess +\= Expect no match + X + +/^X\p{Lu}+?/no_start_optimize,no_auto_possess +\= Expect no match + X + +/^X\p{Arabic}+?/no_start_optimize,no_auto_possess +\= Expect no match + X + +/^X\p{Xan}+?/ucp,no_start_optimize,no_auto_possess +\= Expect no match + X + +/^X\s+?/ucp,no_start_optimize,no_auto_possess +\= Expect no match + X + XX + +/^X\S+?/ucp,no_start_optimize,no_auto_possess + XX +\= Expect no match + X + +/^X\w+?/ucp,no_start_optimize,no_auto_possess +\= Expect no match + X + +/^X[^\x{b5}]+?/i,utf,no_start_optimize,no_auto_possess +\= Expect no match + X + +/^X[\x{b5}]+?/i,utf,no_start_optimize,no_auto_possess +\= Expect no match + X + +/^X\p{Xuc}+?/utf,no_start_optimize,no_auto_possess +\= Expect no match + X + +/^X.+?Z/s,utf,no_start_optimize,no_auto_possess +\= Expect no match + X + +/^X\R+?/utf,no_start_optimize,no_auto_possess +\= Expect no match + X + +/^X\H+?/utf,no_start_optimize,no_auto_possess +\= Expect no match + X + +/^X\V+?/utf,no_start_optimize,no_auto_possess +\= Expect no match + X + +/^X\s+?/utf,no_start_optimize,no_auto_possess +\= Expect no match + X + XX + +/^X\S+?/utf,no_start_optimize,no_auto_possess +\= Expect no match + X + +/^X\p{Any}{1,3}?Z/s,no_start_optimize,no_auto_possess + XYYYZ +\= Expect no match + XY + XYY + XYYY + XYYYYZ + +/^X\p{L&}{1,3}?Z/s,no_start_optimize,no_auto_possess +\= Expect no match + XY + XY! + +/^X\p{L}{1,3}?Z/s,no_start_optimize,no_auto_possess +\= Expect no match + XY + XY! + +/^X\p{Lu}{1,3}?Z/s,no_start_optimize,no_auto_possess +\= Expect no match + XY + XY! + +/^X\P{Han}{1,3}?Z/s,utf,no_start_optimize,no_auto_possess +\= Expect no match + XY + XY! + XY\x{2f00}! + +/^X\p{Xan}{1,3}?Z/s,no_start_optimize,no_auto_possess +\= Expect no match + XY + XY! + +/^X\p{Xsp}{1,3}?Z/s,no_start_optimize,no_auto_possess +\= Expect no match + X\n + X\n! + X\n\n! + +/^X\P{Xsp}{1,3}?Z/s,no_start_optimize,no_auto_possess +\= Expect no match + XYY\n + +/^X\p{Xwd}{1,3}?Z/s,no_start_optimize,no_auto_possess +\= Expect no match + XY + XY! + XYY! + +/^X\x{b5}+?Z/i,utf,no_start_optimize,no_auto_possess +\= Expect no match + X + X\x{b5} + X\x{b5}\x{b5}Y + +/^X\p{Xuc}+?Z/utf,no_start_optimize,no_auto_possess +\= Expect no match + X + X$ + X@@Y + +/(*CRLF)^X.+?Z/utf,no_start_optimize,no_auto_possess +\= Expect partial match + XYY\r\=ph +\= Expect no match + X + +/^X.+?Z/s,utf,no_start_optimize,no_auto_possess +\= Expect no match + X + XYY + +/^X\R+?Z/utf,no_start_optimize,no_auto_possess +\= Expect no match + X\nX + X\n\rX + X\n\r\nX + X\n\n + X\n\x{0c} + +/(*BSR_ANYCRLF)^X\R+?Z/utf,no_start_optimize,no_auto_possess +\= Expect no match + X\nX + X\n\rX + X\n\r\nX + X\n\n + X\n\x{0c} + +/^X\H+?Z/utf,no_start_optimize,no_auto_possess +\= Expect no match + XY\t + XYY + +/^X\h+?Z/utf,no_start_optimize,no_auto_possess +\= Expect no match + X\t\t + X\tY + +/^X\V+?Z/utf,no_start_optimize,no_auto_possess +\= Expect no match + XY\n + XYY + +/^X\v+?Z/utf,no_start_optimize,no_auto_possess +\= Expect no match + X\n\n + X\nY + +/^X\D+?Z/utf,no_start_optimize,no_auto_possess +\= Expect no match + XY9 + XYY + +/^X\d+?Z/utf,no_start_optimize,no_auto_possess +\= Expect no match + X99 + X9Y + +/^X\S+?Z/utf,no_start_optimize,no_auto_possess +\= Expect no match + XY\n + XYY + +/^X\s+?Z/utf,no_start_optimize,no_auto_possess +\= Expect no match + X\n\n + X\nY + +/^X\W+?Z/utf,no_start_optimize,no_auto_possess +\= Expect no match + X.A + X++ + +/^X\p{L&}{1,3}Z/no_start_optimize,no_auto_possess +\= Expect no match + XY + XY! + +/^X\p{L}{1,3}Z/no_start_optimize,no_auto_possess +\= Expect no match + XY + +/^X\p{Xan}{1,3}Z/no_start_optimize,no_auto_possess +\= Expect no match + XY + +/^X\P{Xsp}{1,3}Z/no_start_optimize,no_auto_possess +\= Expect no match + XYY + +/^X\p{Xuc}+Z/utf,no_start_optimize,no_auto_possess +\= Expect no match + X$ + +# ---------------------------------------------------------------------- +# These test the dangerous PCRE2_EXTRA_BAD_ESCAPE_IS_LITERAL option. + +/\x{d800}/B,utf,bad_escape_is_literal + +/\ud800/B,utf,alt_bsux,bad_escape_is_literal + +# ---------------------------------------------------------------------- + +/Aሴ+B/literal,utf,no_utf_check + Aሴ+B + +# These are here because I upgraded to Unicode 10.0.0 before Perl did, so it +# doesn't recognize all these scripts. In time these three tests can be moved +# to test 4. + +/^(\p{Adlam}+)(\p{Bhaiksuki}+)(\p{Marchen}+)(\p{Newa}+)(\p{Osage}+) + (\p{Tangut}+)(\p{Masaram_Gondi}+)(\p{Nushu}+)(\p{Soyombo}+) + (\p{Zanabazar_Square}+)/x,utf + \x{1E900}\x{1E924}\x{1E953}\x{11C00}\x{11C2D}\x{11C3E}\x{11C70}\x{11C77}\x{11CAB}\x{11400}\x{1142F}\x{11455}\x{104B0}\x{104D8}\x{104FB}\x{16FE0}\x{18800}\x{18AF2}\x{11D00}\x{11D3A}\x{11D59}\x{16FE1}\x{1B170}\x{1B2FB}\x{11A50}\x{11A58}\x{11AA2}\x{11A00}\x{11A07}\x{11A47} + +/^\x{1E900}\x{104B0}/i,utf + \x{1E900}\x{104B0} + \x{1E922}\x{104D8} + +/^(?:(\X)(?C))+$/utf + \x{1E900}\x{1E924}\x{1E953}\x{11C00}\x{11C2D}\x{11C3E}\x{11C70}\x{11C77}\x{11CAB}\x{11400}\x{1142F}\x{11455}\x{104B0}\x{104D8}\x{104FB}\x{16FE0}\x{18800}\x{18AF2}\x{11D00}\x{11D3A}\x{11D59}\x{16FE1}\x{1B170}\x{1B2FB}\x{11A50}\x{11A58}\x{11AA2}\x{11A00}\x{11A07}\x{11A47}\=callout_capture,callout_no_where + +# These two are here because JIT is not yet updated. Also, the very first data +# line is handled differently by Perl. + +/^\X/utf + A\x{200d}B A ZWJ + \x{261D}\x{1F3FB}B E_Base E_Modifier + \x{1F466}\x{1F3FF}B E_Base_GAZ E_Modifier + \x{200d}\x{1F3A4}B ZWJ Glue_After_ZWJ + \x{200d}\x{1F469}B ZWJ E_Base_GAZ + \x{1F1E6}\x{1F1E7}B RegionalIndicator RegionalIndicator + \x{261D}\x{E0100}\x{1F3FB}B E_Base Extend E_Modifier + +# Regional indicators + +/^(\X)(\X)/utf,aftertext + \x{1F1E6}\x{1F1E7}\x{1F1E7}B + \x{1F1E6}\x{1F1E7}\x{1F1E7}\x{1F1E6}B + + +# End of testinput5 diff --git a/testdata/testinput6 b/testdata/testinput6 index de9227e..e2f00c0 100644 --- a/testdata/testinput6 +++ b/testdata/testinput6 @@ -1861,11 +1861,6 @@ \= Expect no match aaa -/[\d-z]+/ - 12-34z -\= Expect no match - aaa - /\x5c/ \\ @@ -3813,13 +3808,6 @@ /a*/g abbab -/^[\d-a]/ - abcde - -things - 0digit -\= Expect no match - bcdef - /[[:space:]]+/ > \x09\x0a\x0c\x0d\x0b< @@ -4635,7 +4623,7 @@ /((?(R)a+|(?1)b))/ aaaabcde -/((?(R2)a+|(?1)b))/ +/((?(R2)a+|(?1)b))()/ aaaabcde /(?(R)a*(?1)|((?R))b)/ @@ -4879,7 +4867,83 @@ abcd\=null_context /()()a+/no_auto_possess - aaa\=dfa,allcaptures - a\=dfa,allcaptures + aaa\=allcaptures + a\=allcaptures + +/(*LIMIT_DEPTH=100)^((.)(?1)|.)$/ +\= Expect depth limit exceeded + a[00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00] + +/(02-)?[0-9]{3}-[0-9]{3}/ + 02-123-123 +/^(a(?2))(b)(?1)/ + abbab\=find_limits + +/abc/endanchored + xyzabc +\= Expect no match + xyzabcdef +\= Expect error + xyzabc\=ph + +/abc/ + xyzabc\=endanchored +\= Expect no match + xyzabcdef\=endanchored +\= Expect error + xyzabc\=ps,endanchored + +/abc|bcd/endanchored + xyzabcd +\= Expect no match + xyzabcdef + +/(*NUL)^.*/ + a\nb\x00ccc + +/(*NUL)^.*/s + a\nb\x00ccc + +/^x/m,newline=nul + ab\x00xy + +/'#comment' 0d 0a 00 '^x\' 0a 'y'/x,newline=nul,hex + x\nyz + +/(*NUL)^X\NY/ + X\nY + X\rY +\= Expect no match + X\x00Y + +/(?<=abc|)/ + abcde\=aftertext + +/(?<=|abc)/ + abcde\=aftertext + +/(?<=abc|)/endanchored + abcde\=aftertext + +/(?<=|abc)/endanchored + abcde\=aftertext + +/(*LIMIT_MATCH=100).*(?![|H]?.*(?![|H]?););.*(?![|H]?.*(?![|H]?););\x00\x00\x00\x00\x00\x00\x00(?!(?!(?!(?!(?!(?!(?!(?!(?!(?!(?!(?!(?!(?!(?!(?!(?!(?!(?!(?!(?!(?!(?!(?!(?!(?!(?!(?!(?!(?!(?!(?!(?!(?![|);)?.*(![|H]?);)?.*(?![|H]?);)?.*(?![|H]?);)?.*(?![|H]););![|H]?););[|H]?);|H]?);)\x00\x00\x00\x00\x00\x00H]?););?![|H]?);)?.*(?![|H]?););[||H]?);)?.*(?![|H]?););[|H]?);(?![|H]?););![|H]?););[|H]?);|H]?);)?.*(?![|H]?););;[\x00\x00\x00\x00\x00\x00\x00![|H]?););![|H]?););[|H]?);|H]?);)?.*(?![|H]?););/no_dotstar_anchor +.*(?![|H]?.*(?![|H]?););.*(?![|H]?.*(?![|H]?););\x00\x00\x00\x00\x00\x00\x00(?!(?!(?!(?!(?!(?!(?!(?!(?!(?!(?!(?!(?!(?!(?!(?!(?!(?!(?!(?!(?!(?!(?!(?!(?!(?!(?!(?!(?!(?!(?!(?!(?!(?![|);)?.*(![|H]?);)?.*(?![|H]?);)?.*(?![|H]?);)?.*(?![|H]););![|H]?););[|H]?);|H]?);)\x00\x00\x00\x00\x00\x00H]?););?![|H]?);)?.*(?![|H]?););[||H]?);)?.*(?![|H]?););[|H]?);(?![|H]?););![|H]?););[|H]?);|H]?);)?.*(?![|H]?););;[\x00\x00\x00\x00\x00\x00\x00![|H]?););![|H]?););[|H]?);|H]?);)?.*(?![|H]?);); + +/\n/firstline + xyz\nabc + +/\nabc/firstline + xyz\nabc + +/\x{0a}abc/firstline,newline=crlf +\= Expect no match + xyz\r\nabc + +/[abc]/firstline +\= Expect no match + \na + # End of testinput6 diff --git a/testdata/testinput8 b/testdata/testinput8 index f4bb709..2627454 100644 --- a/testdata/testinput8 +++ b/testdata/testinput8 @@ -161,18 +161,14 @@ # Use "expand" to create some very long patterns with nested parentheses, in # order to test workspace overflow. Again, this varies with code unit width, -# and even with it fails in two modes, the error offset differs. It also varies +# and even when it fails in two modes, the error offset differs. It also varies # with link size - hence multiple tests with different values. -/(?'ABC'\[[bar](]{105}*THEN:\[A]{255}\[)]{106}/expand,-fullbincode +/(?'ABC'\[[bar](]{792}*THEN:\[A]{255}\[)]{793}/expand,-fullbincode,parens_nest_limit=1000 -/(?'ABC'\[[bar](]{106}*THEN:\[A]{255}\[)]{107}/expand,-fullbincode +/(?'ABC'\[[bar](]{793}*THEN:\[A]{255}\[)]{794}/expand,-fullbincode,parens_nest_limit=1000 -/(?'ABC'\[[bar](]{159}*THEN:\[A]{255}\[)]{160}/expand,-fullbincode - -/(?'ABC'\[[bar](]{199}*THEN:\[A]{255}\[)]{200}/expand,-fullbincode - -/(?'ABC'\[[bar](]{299}*THEN:\[A]{255}\[)]{300}/expand,-fullbincode +/(?'ABC'\[[bar](]{1793}*THEN:\[A]{255}\[)]{1794}/expand,-fullbincode,parens_nest_limit=2000 /(?(1)(?1)){8,}+()/debug abcd diff --git a/testdata/testinput9 b/testdata/testinput9 index 9a26f5f..7be4b15 100644 --- a/testdata/testinput9 +++ b/testdata/testinput9 @@ -258,4 +258,6 @@ /(*MARK:a\x{100}b)z/alt_verbnames +/(*:*++++++++++++''''''''''''''''''''+''+++'+++x+++++++++++++++++++++++++++++++++++(++++++++++++++++++++:++++++%++:''''''''''''''''''''''''+++++++++++++++++++++++++++++++++++++++++++++++++++++-++++++++k+++++++''''+++'+++++++++++++++++++++++''''++++++++++++':ƿ)/ + # End of testinput9 diff --git a/testdata/testoutput1 b/testdata/testoutput1 index d28bf91..9c55be9 100644 --- a/testdata/testoutput1 +++ b/testdata/testoutput1 @@ -183,27 +183,6 @@ No match abbbbbbbbbbbac No match -/^(b+|a){1,2}?bc/ - bbc - 0: bbc - 1: b - -/^(b*|ba){1,2}?bc/ - babc - 0: babc - 1: ba - bbabc - 0: bbabc - 1: ba - bababc - 0: bababc - 1: ba -\= Expect no match - bababbc -No match - babababc -No match - /^(ba|b*){1,2}?bc/ babc 0: babc @@ -2006,13 +1985,6 @@ No match aaa No match -/[\d-z]+/ - 12-34z - 0: 12-34z -\= Expect no match - aaa -No match - /\x5c/ \\ 0: \ @@ -5764,17 +5736,6 @@ No match 0: 0: -/^[\d-a]/ - abcde - 0: a - -things - 0: - - 0digit - 0: 0 -\= Expect no match - bcdef -No match - /[[:space:]]+/ > \x09\x0a\x0c\x0d\x0b< 0: \x09\x0a\x0c\x0d\x0b @@ -9257,4 +9218,608 @@ No match 1: b 2: cccc +# /x does not apply to MARK labels + +/x (*MARK:ab cd # comment +ef) x/x,mark + axxz + 0: xx +MK: ab cd # comment\x0aef + +/(?<=a(B){0}c)X/ + acX + 0: X + +/(?<DEFINE>b)(?(DEFINE)(a+))(?&DEFINE)/ + bbbb + 0: bb + 1: b +\= Expect no match + baaab +No match + +/(?=.*[A-Z])(?=.*[a-z])(?=.*[0-9])(?=.*[,;:])(?=.{8,16})(?!.*[\s])/ + \ Fred:099 + 0: + +/(?=.*X)X$/ + \ X + 0: X + +/(?s)(?=.*?)b/ + aabc + 0: b + +/(Z)(a)\2{1,2}?(?-i)\1X/i + ZaAAZX + 0: ZaAAZX + 1: Z + 2: a + +/(?'c')XX(?'YYYYYYYYYYYYYYYYYYYYYYYCl')/ + +/[s[:digit:]\E-H]+/ + s09-H + 0: s09-H + +/[s[:digit:]\Q\E-H]+/ + s09-H + 0: s09-H + +/a+(?:|b)a/ + aaaa + 0: aaaa + +/X?(R||){3335}/ + +/(?1)(A(*COMMIT)|B)D/ + ABD + 0: ABD + 1: B + XABD + 0: ABD + 1: B + BAD + 0: BAD + 1: A + ABXABD + 0: ABD + 1: B +\= Expect no match + ABX +No match + +/(?(DEFINE)(?<m> 1? (?=(?<cond>2)?) 1 2 (?('cond')|3))) + \A + () + (?&m) + \Z/x + 123 + 0: 123 + 1: <unset> + 2: <unset> + 3: + +/^(?: +(?: A| (1? (?=(?<cond>2)?) (1) 2 (?('cond')|3)) ) +(Z) +)+$/x + AZ123Z + 0: AZ123Z + 1: 123 + 2: <unset> + 3: 1 + 4: Z +\= Expect no match + AZ12Z +No match + +/^ (?(DEFINE) ( (?!(a)\2b)..) ) ()(?1) /x + acb + 0: ac + 1: <unset> + 2: <unset> + 3: +\= Expect no match + aab +No match + +'(?>ab|abab){1,5}?M' + abababababababababababM + 0: abababababM + +'(?>ab|abab){2}?M' + abababM + 0: ababM + +'((?(?=(a))a)+k)' + bbak + 0: ak + 1: ak + 2: a + +'((?(?=(a))a|)+k)' + bbak + 0: ak + 1: ak + 2: a + +'(?(?!(b))a|b)+k' + ababbalbbadabak + 0: abak + 1: b + +/(?!(b))c|b/ + Ab + 0: b + Ac + 0: c + +/(?=(b))b|c/ + Ab + 0: b + 1: b + Ac + 0: c + +/^(.|(.)(?1)\2)$/ + a + 0: a + 1: a + aba + 0: aba + 1: aba + 2: a + abcba + 0: abcba + 1: abcba + 2: a + ababa + 0: ababa + 1: ababa + 2: a + abcdcba + 0: abcdcba + 1: abcdcba + 2: a + +/^((.)(?1)\2|.?)$/ + a + 0: a + 1: a + aba + 0: aba + 1: aba + 2: a + abba + 0: abba + 1: abba + 2: a + abcba + 0: abcba + 1: abcba + 2: a + ababa + 0: ababa + 1: ababa + 2: a + abccba + 0: abccba + 1: abccba + 2: a + abcdcba + 0: abcdcba + 1: abcdcba + 2: a + abcddcba + 0: abcddcba + 1: abcddcba + 2: a + +/^(.)(\1|a(?2))/ + bab + 0: bab + 1: b + 2: ab + +/^(.|(.)(?1)?\2)$/ + abcba + 0: abcba + 1: abcba + 2: a + +/^(?(?=(a))abc|def)/ + abc + 0: abc + 1: a + +/^(?(?!(a))def|abc)/ + abc + 0: abc + 1: a + +/^(?(?=(a)(*ACCEPT))abc|def)/ + abc + 0: abc + 1: a + +/^(?(?!(a)(*ACCEPT))def|abc)/ + abc + 0: abc + 1: a + +/^(?1)\d{3}(a)/ + a123a + 0: a123a + 1: a + +# This pattern uses a lot of named subpatterns in order to match email +# addresses in various formats. It's a heavy test for named subpatterns. In the +# <atext> group, slash is coded as \x{2f} so that this pattern can also be +# processed by perltest.sh, which does not cater for an escaped delimiter +# within the pattern. $ within the pattern must also be escaped. All $ and @ +# characters in subject strings are escaped so that Perl doesn't interpret them +# as variable insertions and " characters must also be escaped for Perl. + +# This set of subpatterns is more or less a direct transliteration of the BNF +# definitions in RFC2822, without any of the obsolete features. The addition of +# a possessive + to the definition of <phrase> reduced the match limit in PCRE2 +# from over 5 million to just under 400, and eliminated a very noticeable delay +# when this file was passed to perltest.sh. + +/(?ix)(?(DEFINE) +(?<addr_spec> (?&local_part) \@ (?&domain) ) +(?<angle_addr> (?&CFWS)?+ < (?&addr_spec) > (?&CFWS)?+ ) +(?<atext> [a-z\d!#\$%&'*+-\x{2f}=?^_`{|}~] ) +(?<atom> (?&CFWS)?+ (?&atext)+ (?&CFWS)?+ ) +(?<ccontent> (?&ctext) | (?"ed_pair) | (?&comment) ) +(?<ctext> [^\x{9}\x{10}\x{13}\x{7f}-\x{ff}\ ()\\] ) +(?<comment> \( (?: (?&FWS)?+ (?&ccontent) )*+ (?&FWS)?+ \) ) +(?<CFWS> (?: (?&FWS)?+ (?&comment) )* (?# NOT possessive) + (?: (?&FWS)?+ (?&comment) | (?&FWS) ) ) +(?<dcontent> (?&dtext) | (?"ed_pair) ) +(?<display_name> (?&phrase) ) +(?<domain> (?&dot_atom) | (?&domain_literal) ) +(?<domain_literal> (?&CFWS)?+ \[ (?: (?&FWS)?+ (?&dcontent) )* (?&FWS)?+ \] + (?&CFWS)?+ ) +(?<dot_atom> (?&CFWS)?+ (?&dot_atom_text) (?&CFWS)?+ ) +(?<dot_atom_text> (?&atext)++ (?: \. (?&atext)++)*+ ) +(?<dtext> [^\x{9}\x{10}\x{13}\x{7f}-\x{ff}\ \[\]\\] ) +(?<FWS> (?: [\t\ ]*+ \n)?+ [\t\ ]++ ) +(?<local_part> (?&dot_atom) | (?"ed_string) ) +(?<mailbox> (?&name_addr) | (?&addr_spec) ) +(?<name_addr> (?&display_name)? (?&angle_addr) ) +(?<phrase> (?&word)++ ) +(?<qcontent> (?&qtext) | (?"ed_pair) ) +(?<quoted_pair> " (?&text) ) +(?<quoted_string> (?&CFWS)?+ " (?: (?&FWS)?+ (?&qcontent))* (?&FWS)?+ " + (?&CFWS)?+ ) +(?<qtext> [^\x{9}\x{10}\x{13}\x{7f}-\x{ff}\ "\\] ) +(?<text> [^\r\n] ) +(?<word> (?&atom) | (?"ed_string) ) +) # End DEFINE +^(?&mailbox)$/ + Alan Other <user\@dom.ain> + 0: Alan Other <user@dom.ain> + <user\@dom.ain> + 0: <user@dom.ain> + user\@dom.ain + 0: user@dom.ain + user\@[] + 0: user@[] + user\@[domain literal] + 0: user@[domain literal] + user\@[domain literal with \"[square brackets\"] inside] + 0: user@[domain literal with "[square brackets"] inside] + \"A. Other\" <user.1234\@dom.ain> (a comment) + 0: "A. Other" <user.1234@dom.ain> (a comment) + A. Other <user.1234\@dom.ain> (a comment) + 0: A. Other <user.1234@dom.ain> (a comment) + \"/s=user/ou=host/o=place/prmd=uu.yy/admd= /c=gb/\"\@x400-re.lay + 0: "/s=user/ou=host/o=place/prmd=uu.yy/admd= /c=gb/"@x400-re.lay +\= Expect no match + A missing angle <user\@some.where +No match + The quick brown fox +No match + +# -------------------------------------------------------------------------- + +# This pattern uses named groups to match default PCRE2 patterns. It's another +# heavy test for named subpatterns. Once again, code slash as \x{2f} and escape +# $ even in classes so that this works with pcre2test. + +/(?sx)(?(DEFINE) + +(?<assertion> (?&simple_assertion) | (?&lookaround) ) + +(?<atomic_group> \( \? > (?®ex) \) ) + +(?<back_reference> \\ \d+ | + \\g (?: [+-]?\d+ | \{ (?: [+-]?\d+ | (?&groupname) ) \} ) | + \\k <(?&groupname)> | + \\k '(?&groupname)' | + \\k \{ (?&groupname) \} | + \( \? P= (?&groupname) \) ) + +(?<branch> (?:(?&assertion) | + (?&callout) | + (?&comment) | + (?&option_setting) | + (?&qualified_item) | + (?"ed_string) | + (?"ed_string_empty) | + (?&special_escape) | + (?&verb) + )* ) + +(?<callout> \(\?C (?: \d+ | + (?: (?<D>["'`^%\#\$]) + (?: \k'D'\k'D' | (?!\k'D') . )* \k'D' | + \{ (?: \}\} | [^}]*+ )* \} ) + )? \) ) + +(?<capturing_group> \( (?: \? P? < (?&groupname) > | \? ' (?&groupname) ' )? + (?®ex) \) ) + +(?<character_class> \[ \^?+ (?: \] (?&class_item)* | (?&class_item)+ ) \] ) + +(?<character_type> (?! \\N\{\w+\} ) \\ [dDsSwWhHvVRN] ) + +(?<class_item> (?: \[ : (?: + alnum|alpha|ascii|blank|cntrl|digit|graph|lower|print| + punct|space|upper|word|xdigit + ) : \] | + (?"ed_string) | + (?"ed_string_empty) | + (?&escaped_character) | + (?&character_type) | + [^]] ) ) + +(?<comment> \(\?\# [^)]* \) | (?"ed_string_empty) | \\E ) + +(?<condition> (?: \( [+-]? \d+ \) | + \( < (?&groupname) > \) | + \( ' (?&groupname) ' \) | + \( R \d* \) | + \( R & (?&groupname) \) | + \( (?&groupname) \) | + \( DEFINE \) | + \( VERSION >?=\d+(?:\.\d\d?)? \) | + (?&callout)?+ (?&comment)* (?&lookaround) ) ) + +(?<conditional_group> \(\? (?&condition) (?&branch) (?: \| (?&branch) )? \) ) + +(?<delimited_regex> (?<delimiter> [-\x{2f}!"'`=_:;,%&@~]) (?®ex) + \k'delimiter' .* ) + +(?<escaped_character> \\ (?: 0[0-7]{1,2} | [0-7]{1,3} | o\{ [0-7]+ \} | + x \{ (*COMMIT) [[:xdigit:]]* \} | x [[:xdigit:]]{0,2} | + [aefnrt] | c[[:print:]] | + [^[:alnum:]] ) ) + +(?<group> (?&capturing_group) | (?&non_capturing_group) | + (?&resetting_group) | (?&atomic_group) | + (?&conditional_group) ) + +(?<groupname> [a-zA-Z_]\w* ) + +(?<literal_character> (?! (?&range_qualifier) ) [^[()|*+?.\$\\] ) + +(?<lookaround> \(\? (?: = | ! | <= | <! ) (?®ex) \) ) + +(?<non_capturing_group> \(\? [iJmnsUx-]* : (?®ex) \) ) + +(?<option_setting> \(\? [iJmnsUx-]* \) ) + +(?<qualified_item> (?:\. | + (?&lookaround) | + (?&back_reference) | + (?&character_class) | + (?&character_type) | + (?&escaped_character) | + (?&group) | + (?&subroutine_call) | + (?&literal_character) | + (?"ed_string) + ) (?&comment)? (?&qualifier)? ) + +(?<qualifier> (?: [?*+] | (?&range_qualifier) ) [+?]? ) + +(?<quoted_string> (?: \\Q (?: (?!\\E | \k'delimiter') . )++ (?: \\E | ) ) ) + +(?<quoted_string_empty> \\Q\\E ) + +(?<range_qualifier> \{ (?: \d+ (?: , \d* )? | , \d+ ) \} ) + +(?<regex> (?&start_item)* (?&branch) (?: \| (?&branch) )* ) + +(?<resetting_group> \( \? \| (?®ex) \) ) + +(?<simple_assertion> \^ | \$ | \\A | \\b | \\B | \\G | \\z | \\Z ) + +(?<special_escape> \\K ) + +(?<start_item> \( \* (?: + ANY | + ANYCRLF | + BSR_ANYCRLF | + BSR_UNICODE | + CR | + CRLF | + LF | + LIMIT_MATCH=\d+ | + LIMIT_DEPTH=\d+ | + LIMIT_HEAP=\d+ | + NOTEMPTY | + NOTEMPTY_ATSTART | + NO_AUTO_POSSESS | + NO_DOTSTAR_ANCHOR | + NO_JIT | + NO_START_OPT | + NUL | + UTF | + UCP ) \) ) + +(?<subroutine_call> (?: \(\?R\) | \(\?[+-]?\d+\) | + \(\? (?: & | P> ) (?&groupname) \) | + \\g < (?&groupname) > | + \\g ' (?&groupname) ' | + \\g < [+-]? \d+ > | + \\g ' [+-]? \d+ ) ) + +(?<verb> \(\* (?: ACCEPT | FAIL | F | COMMIT | + (?:MARK)?:(?&verbname) | + (?:PRUNE|SKIP|THEN) (?: : (?&verbname)? )? ) \) ) + +(?<verbname> [^)]+ ) + +) # End DEFINE +# Kick it all off... +^(?&delimited_regex)$/subject_literal,jitstack=256 + /^(a)(b)(c)(d)(e)(f)(g)(h)(i)(j)(k)\11*(\3\4)\1(?#)2$/ + 0: /^(a)(b)(c)(d)(e)(f)(g)(h)(i)(j)(k)\11*(\3\4)\1(?#)2$/ + /(cat(a(ract|tonic)|erpillar)) \1()2(3)/ + 0: /(cat(a(ract|tonic)|erpillar)) \1()2(3)/ + /^From +([^ ]+) +[a-zA-Z][a-zA-Z][a-zA-Z] +[a-zA-Z][a-zA-Z][a-zA-Z] +[0-9]?[0-9] +[0-9][0-9]:[0-9][0-9]/ + 0: /^From +([^ ]+) +[a-zA-Z][a-zA-Z][a-zA-Z] +[a-zA-Z][a-zA-Z][a-zA-Z] +[0-9]?[0-9] +[0-9][0-9]:[0-9][0-9]/ + /^From\s+\S+\s+([a-zA-Z]{3}\s+){2}\d{1,2}\s+\d\d:\d\d/ + 0: /^From\s+\S+\s+([a-zA-Z]{3}\s+){2}\d{1,2}\s+\d\d:\d\d/ + /<tr([\w\W\s\d][^<>]{0,})><TD([\w\W\s\d][^<>]{0,})>([\d]{0,}\.)(.*)((<BR>([\w\W\s\d][^<>]{0,})|[\s]{0,}))<\/a><\/TD><TD([\w\W\s\d][^<>]{0,})>([\w\W\s\d][^<>]{0,})<\/TD><TD([\w\W\s\d][^<>]{0,})>([\w\W\s\d][^<>]{0,})<\/TD><\/TR>/is + 0: /<tr([\w\W\s\d][^<>]{0,})><TD([\w\W\s\d][^<>]{0,})>([\d]{0,}\.)(.*)((<BR>([\w\W\s\d][^<>]{0,})|[\s]{0,}))<\/a><\/TD><TD([\w\W\s\d][^<>]{0,})>([\w\W\s\d][^<>]{0,})<\/TD><TD([\w\W\s\d][^<>]{0,})>([\w\W\s\d][^<>]{0,})<\/TD><\/TR>/is + /^(?(DEFINE) (?<A> a) (?<B> b) ) (?&A) (?&B) / + 0: /^(?(DEFINE) (?<A> a) (?<B> b) ) (?&A) (?&B) / + /(?(DEFINE)(?<byte>2[0-4]\d|25[0-5]|1\d\d|[1-9]?\d))\b(?&byte)(\.(?&byte)){3}/ + 0: /(?(DEFINE)(?<byte>2[0-4]\d|25[0-5]|1\d\d|[1-9]?\d))\b(?&byte)(\.(?&byte)){3}/ + /\b(?&byte)(\.(?&byte)){3}(?(DEFINE)(?<byte>2[0-4]\d|25[0-5]|1\d\d|[1-9]?\d))/ + 0: /\b(?&byte)(\.(?&byte)){3}(?(DEFINE)(?<byte>2[0-4]\d|25[0-5]|1\d\d|[1-9]?\d))/ + /^(\w++|\s++)*$/ + 0: /^(\w++|\s++)*$/ + /a+b?(*THEN)c+(*FAIL)/ + 0: /a+b?(*THEN)c+(*FAIL)/ + /(A (A|B(*ACCEPT)|C) D)(E)/x + 0: /(A (A|B(*ACCEPT)|C) D)(E)/x + /^\W*+(?:((.)\W*+(?1)\W*+\2|)|((.)\W*+(?3)\W*+\4|\W*+.\W*+))\W*+$/i + 0: /^\W*+(?:((.)\W*+(?1)\W*+\2|)|((.)\W*+(?3)\W*+\4|\W*+.\W*+))\W*+$/i + /A(*PRUNE)B(*SKIP)C(*THEN)D(*COMMIT)E(*F)F(*FAIL)G(?!)H(*ACCEPT)I/B + 0: /A(*PRUNE)B(*SKIP)C(*THEN)D(*COMMIT)E(*F)F(*FAIL)G(?!)H(*ACCEPT)I/B + /(?C`a``b`)(?C'a''b')(?C"a""b")(?C^a^^b^)(?C%a%%b%)(?C#a##b#)(?C$a$$b$)(?C{a}}b})/B,callout_info + 0: /(?C`a``b`)(?C'a''b')(?C"a""b")(?C^a^^b^)(?C%a%%b%)(?C#a##b#)(?C$a$$b$)(?C{a}}b})/B,callout_info + /(?sx)(?(DEFINE)(?<assertion> (?&simple_assertion) | (?&lookaround) )(?<atomic_group> \( \? > (?®ex) \) )(?<back_reference> \\ \d+ | \\g (?: [+-]?\d+ | \{ (?: [+-]?\d+ | (?&groupname) ) \} ) | \\k <(?&groupname)> | \\k '(?&groupname)' | \\k \{ (?&groupname) \} | \( \? P= (?&groupname) \) )(?<branch> (?:(?&assertion) | (?&callout) | (?&comment) | (?&option_setting) | (?&qualified_item) | (?"ed_string) | (?"ed_string_empty) | (?&special_escape) | (?&verb) )* )(?<callout> \(\?C (?: \d+ | (?: (?<D>["'`^%\#\$]) (?: \k'D'\k'D' | (?!\k'D') . )* \k'D' | \{ (?: \}\} | [^}]*+ )* \} ) )? \) )(?<capturing_group> \( (?: \? P? < (?&groupname) > | \? ' (?&groupname) ' )? (?®ex) \) )(?<character_class> \[ \^?+ (?: \] (?&class_item)* | (?&class_item)+ ) \] )(?<character_type> (?! \\N\{\w+\} ) \\ [dDsSwWhHvVRN] )(?<class_item> (?: \[ : (?: alnum|alpha|ascii|blank|cntrl|digit|graph|lower|print| punct|space|upper|word|xdigit ) : \] | (?"ed_string) | (?"ed_string_empty) | (?&escaped_character) | (?&character_type) | [^]] ) )(?<comment> \(\?\# [^)]* \) | (?"ed_string_empty) | \\E )(?<condition> (?: \( [+-]? \d+ \) | \( < (?&groupname) > \) | \( ' (?&groupname) ' \) | \( R \d* \) | \( R & (?&groupname) \) | \( (?&groupname) \) | \( DEFINE \) | \( VERSION >?=\d+(?:\.\d\d?)? \) | (?&callout)?+ (?&comment)* (?&lookaround) ) )(?<conditional_group> \(\? (?&condition) (?&branch) (?: \| (?&branch) )? \) )(?<delimited_regex> (?<delimiter> [-\x{2f}!"'`=_:;,%&@~]) (?®ex) \k'delimiter' .* )(?<escaped_character> \\ (?: 0[0-7]{1,2} | [0-7]{1,3} | o\{ [0-7]+ \} | x \{ (*COMMIT) [[:xdigit:]]* \} | x [[:xdigit:]]{0,2} | [aefnrt] | c[[:print:]] | [^[:alnum:]] ) )(?<group> (?&capturing_group) | (?&non_capturing_group) | (?&resetting_group) | (?&atomic_group) | (?&conditional_group) )(?<groupname> [a-zA-Z_]\w* )(?<literal_character> (?! (?&range_qualifier) ) [^[()|*+?.\$\\] )(?<lookaround> \(\? (?: = | ! | <= | <! ) (?®ex) \) )(?<non_capturing_group> \(\? [iJmnsUx-]* : (?®ex) \) )(?<option_setting> \(\? [iJmnsUx-]* \) )(?<qualified_item> (?:\. | (?&lookaround) | (?&back_reference) | (?&character_class) | (?&character_type) | (?&escaped_character) | (?&group) | (?&subroutine_call) | (?&literal_character) | (?"ed_string) ) (?&comment)? (?&qualifier)? )(?<qualifier> (?: [?*+] | (?&range_qualifier) ) [+?]? )(?<quoted_string> (?: \\Q (?: (?!\\E | \k'delimiter') . )++ (?: \\E | ) ) ) (?<quoted_string_empty> \\Q\\E ) (?<range_qualifier> \{ (?: \d+ (?: , \d* )? | , \d+ ) \} )(?<regex> (?&start_item)* (?&branch) (?: \| (?&branch) )* )(?<resetting_group> \( \? \| (?®ex) \) )(?<simple_assertion> \^ | \$ | \\A | \\b | \\B | \\G | \\z | \\Z )(?<special_escape> \\K )(?<start_item> \( \* (?: ANY | ANYCRLF | BSR_ANYCRLF | BSR_UNICODE | CR | CRLF | LF | LIMIT_MATCH=\d+ | LIMIT_DEPTH=\d+ | LIMIT_HEAP=\d+ | NOTEMPTY | NOTEMPTY_ATSTART | NO_AUTO_POSSESS | NO_DOTSTAR_ANCHOR | NO_JIT | NO_START_OPT | NUL | UTF | UCP ) \) )(?<subroutine_call> (?: \(\?R\) | \(\?[+-]?\d+\) | \(\? (?: & | P> ) (?&groupname) \) | \\g < (?&groupname) > | \\g ' (?&groupname) ' | \\g < [+-]? \d+ > | \\g ' [+-]? \d+ ) )(?<verb> \(\* (?: ACCEPT | FAIL | F | COMMIT | (?:MARK)?:(?&verbname) | (?:PRUNE|SKIP|THEN) (?: : (?&verbname)? )? ) \) )(?<verbname> [^)]+ ))^(?&delimited_regex)$/ + 0: /(?sx)(?(DEFINE)(?<assertion> (?&simple_assertion) | (?&lookaround) )(?<atomic_group> \( \? > (?®ex) \) )(?<back_reference> \\ \d+ | \\g (?: [+-]?\d+ | \{ (?: [+-]?\d+ | (?&groupname) ) \} ) | \\k <(?&groupname)> | \\k '(?&groupname)' | \\k \{ (?&groupname) \} | \( \? P= (?&groupname) \) )(?<branch> (?:(?&assertion) | (?&callout) | (?&comment) | (?&option_setting) | (?&qualified_item) | (?"ed_string) | (?"ed_string_empty) | (?&special_escape) | (?&verb) )* )(?<callout> \(\?C (?: \d+ | (?: (?<D>["'`^%\#\$]) (?: \k'D'\k'D' | (?!\k'D') . )* \k'D' | \{ (?: \}\} | [^}]*+ )* \} ) )? \) )(?<capturing_group> \( (?: \? P? < (?&groupname) > | \? ' (?&groupname) ' )? (?®ex) \) )(?<character_class> \[ \^?+ (?: \] (?&class_item)* | (?&class_item)+ ) \] )(?<character_type> (?! \\N\{\w+\} ) \\ [dDsSwWhHvVRN] )(?<class_item> (?: \[ : (?: alnum|alpha|ascii|blank|cntrl|digit|graph|lower|print| punct|space|upper|word|xdigit ) : \] | (?"ed_string) | (?"ed_string_empty) | (?&escaped_character) | (?&character_type) | [^]] ) )(?<comment> \(\?\# [^)]* \) | (?"ed_string_empty) | \\E )(?<condition> (?: \( [+-]? \d+ \) | \( < (?&groupname) > \) | \( ' (?&groupname) ' \) | \( R \d* \) | \( R & (?&groupname) \) | \( (?&groupname) \) | \( DEFINE \) | \( VERSION >?=\d+(?:\.\d\d?)? \) | (?&callout)?+ (?&comment)* (?&lookaround) ) )(?<conditional_group> \(\? (?&condition) (?&branch) (?: \| (?&branch) )? \) )(?<delimited_regex> (?<delimiter> [-\x{2f}!"'`=_:;,%&@~]) (?®ex) \k'delimiter' .* )(?<escaped_character> \\ (?: 0[0-7]{1,2} | [0-7]{1,3} | o\{ [0-7]+ \} | x \{ (*COMMIT) [[:xdigit:]]* \} | x [[:xdigit:]]{0,2} | [aefnrt] | c[[:print:]] | [^[:alnum:]] ) )(?<group> (?&capturing_group) | (?&non_capturing_group) | (?&resetting_group) | (?&atomic_group) | (?&conditional_group) )(?<groupname> [a-zA-Z_]\w* )(?<literal_character> (?! (?&range_qualifier) ) [^[()|*+?.\$\\] )(?<lookaround> \(\? (?: = | ! | <= | <! ) (?®ex) \) )(?<non_capturing_group> \(\? [iJmnsUx-]* : (?®ex) \) )(?<option_setting> \(\? [iJmnsUx-]* \) )(?<qualified_item> (?:\. | (?&lookaround) | (?&back_reference) | (?&character_class) | (?&character_type) | (?&escaped_character) | (?&group) | (?&subroutine_call) | (?&literal_character) | (?"ed_string) ) (?&comment)? (?&qualifier)? )(?<qualifier> (?: [?*+] | (?&range_qualifier) ) [+?]? )(?<quoted_string> (?: \\Q (?: (?!\\E | \k'delimiter') . )++ (?: \\E | ) ) ) (?<quoted_string_empty> \\Q\\E ) (?<range_qualifier> \{ (?: \d+ (?: , \d* )? | , \d+ ) \} )(?<regex> (?&start_item)* (?&branch) (?: \| (?&branch) )* )(?<resetting_group> \( \? \| (?®ex) \) )(?<simple_assertion> \^ | \$ | \\A | \\b | \\B | \\G | \\z | \\Z )(?<special_escape> \\K )(?<start_item> \( \* (?: ANY | ANYCRLF | BSR_ANYCRLF | BSR_UNICODE | CR | CRLF | LF | LIMIT_MATCH=\d+ | LIMIT_DEPTH=\d+ | LIMIT_HEAP=\d+ | NOTEMPTY | NOTEMPTY_ATSTART | NO_AUTO_POSSESS | NO_DOTSTAR_ANCHOR | NO_JIT | NO_START_OPT | NUL | UTF | UCP ) \) )(?<subroutine_call> (?: \(\?R\) | \(\?[+-]?\d+\) | \(\? (?: & | P> ) (?&groupname) \) | \\g < (?&groupname) > | \\g ' (?&groupname) ' | \\g < [+-]? \d+ > | \\g ' [+-]? \d+ ) )(?<verb> \(\* (?: ACCEPT | FAIL | F | COMMIT | (?:MARK)?:(?&verbname) | (?:PRUNE|SKIP|THEN) (?: : (?&verbname)? )? ) \) )(?<verbname> [^)]+ ))^(?&delimited_regex)$/ +\= Expect no match + /((?(?C'')\QX\E(?!((?(?C'')(?!X=X));=)r*X=X));=)/ +No match + /(?:(?(2y)a|b)(X))+/ +No match + /a(*MARK)b/ +No match + /a(*CR)b/ +No match + /(?P<abn>(?P=abn)(?<badstufxxx)/ +No match + +# -------------------------------------------------------------------------- + +/<(?x:[a b])>/xx + < > + 0: < > + +/<(?:[a b])>/xx + < > +No match + +/<(?xxx:[a b])>/ + < > +No match + +/<(?-x:[a b])>/xx + < > + 0: < > + +/[[:digit:]-]+/ + 12-24 + 0: 12-24 + +/((?<=((*ACCEPT)) )\1?\b) / +\= Expect no match + ((?<=((*ACCEPT)) )\\1?\\b)\x20 +No match + +/((?<=((*ACCEPT))X)\1?Y)\1/ + XYYZ + 0: YY + 1: Y + 2: + +/((?<=((*ACCEPT))X)\1?Y(*ACCEPT))\1/ + XYYZ + 0: Y + 1: Y + 2: + +/(?(DEFINE)(?<optional_a>a?)X)^(?&optional_a)a$/ + aa + 0: aa + a + 0: a + +/^(a?)b(?1)a/ + abaa + 0: abaa + 1: a + aba + 0: aba + 1: a + baa + 0: baa + 1: + ba + 0: ba + 1: + +/^(a?)+b(?1)a/ + abaa + 0: abaa + 1: + aba + 0: aba + 1: + baa + 0: baa + 1: + ba + 0: ba + 1: + +/^(a?)++b(?1)a/ + abaa + 0: abaa + 1: + aba + 0: aba + 1: + baa + 0: baa + 1: + ba + 0: ba + 1: + +/^(a?)+b/ + b + 0: b + 1: + ab + 0: ab + 1: + aaab + 0: aaab + 1: + +/(?=a+)a(a+)++b/ + aab + 0: aab + 1: a + # End of testinput1 diff --git a/testdata/testoutput10 b/testdata/testoutput10 index 9761f0f..9660fc5 100644 --- a/testdata/testoutput10 +++ b/testdata/testoutput10 @@ -1539,4 +1539,91 @@ Subject length lower bound = 1 a\x80zx\=offset=3 Failed: error -22: UTF-8 error: isolated byte with 0x80 bit set at offset 1 +/[\W\p{Any}]/B +------------------------------------------------------------------ + Bra + [\x00-/:-@[-^`{-\xff\p{Any}] + Ket + End +------------------------------------------------------------------ + abc + 0: a + 123 + 0: 1 + +/[\W\pL]/B +------------------------------------------------------------------ + Bra + [\x00-/:-@[-^`{-\xff\p{L}] + Ket + End +------------------------------------------------------------------ + abc + 0: a +\= Expect no match + 123 +No match + +/(*:*++++++++++++''''''''''''''''''''+''+++'+++x+++++++++++++++++++++++++++++++++++(++++++++++++++++++++:++++++%++:''''''''''''''''''''''''+++++++++++++++++++++++++++++++++++++++++++++++++++++-++++++++k+++++++''''+++'+++++++++++++++++++++++''''++++++++++++':ƿ)/utf +Failed: error 176 at offset 259: name is too long in (*MARK), (*PRUNE), (*SKIP), or (*THEN) + +/[\s[:^ascii:]]/B,ucp +------------------------------------------------------------------ + Bra + [\x80-\xff\p{Xsp}] + Ket + End +------------------------------------------------------------------ + +# A special extra option allows excaped surrogate code points in 8-bit mode, +# but subjects containing them must not be UTF-checked. + +/\x{d800}/I,utf,allow_surrogate_escapes +Capturing subpattern count = 0 +Options: utf +Extra options: allow_surrogate_escapes +First code unit = \xed +Last code unit = \x80 +Subject length lower bound = 1 + \x{d800}\=no_utf_check + 0: \x{d800} + +/\udfff\o{157401}/utf,alt_bsux,allow_surrogate_escapes + \x{dfff}\x{df01}\=no_utf_check + 0: \x{dfff}\x{df01} + +# This has different starting code units in 8-bit mode. + +/^[^ab]/IB,utf +------------------------------------------------------------------ + Bra + ^ + [\x00-`c-\xff] (neg) + Ket + End +------------------------------------------------------------------ +Capturing subpattern count = 0 +Compile options: utf +Overall options: anchored utf +Starting code units: \x00 \x01 \x02 \x03 \x04 \x05 \x06 \x07 \x08 \x09 \x0a + \x0b \x0c \x0d \x0e \x0f \x10 \x11 \x12 \x13 \x14 \x15 \x16 \x17 \x18 \x19 + \x1a \x1b \x1c \x1d \x1e \x1f \x20 ! " # $ % & ' ( ) * + , - . / 0 1 2 3 4 + 5 6 7 8 9 : ; < = > ? @ A B C D E F G H I J K L M N O P Q R S T U V W X Y + Z [ \ ] ^ _ ` c d e f g h i j k l m n o p q r s t u v w x y z { | } ~ \x7f + \xc2 \xc3 \xc4 \xc5 \xc6 \xc7 \xc8 \xc9 \xca \xcb \xcc \xcd \xce \xcf \xd0 + \xd1 \xd2 \xd3 \xd4 \xd5 \xd6 \xd7 \xd8 \xd9 \xda \xdb \xdc \xdd \xde \xdf + \xe0 \xe1 \xe2 \xe3 \xe4 \xe5 \xe6 \xe7 \xe8 \xe9 \xea \xeb \xec \xed \xee + \xef \xf0 \xf1 \xf2 \xf3 \xf4 \xf5 \xf6 \xf7 \xf8 \xf9 \xfa \xfb \xfc \xfd + \xfe \xff +Subject length lower bound = 1 + c + 0: c + \x{ff} + 0: \x{ff} + \x{100} + 0: \x{100} +\= Expect no match + aaa +No match + # End of testinput10 diff --git a/testdata/testoutput11-16 b/testdata/testoutput11-16 index 03e04bc..e22581d 100644 --- a/testdata/testoutput11-16 +++ b/testdata/testoutput11-16 @@ -643,4 +643,22 @@ Subject length lower bound = 1 /(*THEN:\[A]{65501})/expand +# We can use pcre2test's utf8_input modifier to create wide pattern characters, +# even though this test is run when UTF is not supported. + +/abz/utf8_input +** Failed: character value greater than 0xffff cannot be converted to 16-bit in non-UTF mode + abz + ab\x{7fffffff}z + +/abz/utf8_input +** Failed: invalid UTF-8 string cannot be converted to 16-bit string + abz + ab\x{ffffffff}z + +/abAz/utf8_input +** Failed: invalid UTF-8 string cannot be converted to 16-bit string + abAz + ab\x{80000041}z + # End of testinput11 diff --git a/testdata/testoutput11-32 b/testdata/testoutput11-32 index 390ebe0..d8a909f 100644 --- a/testdata/testoutput11-32 +++ b/testdata/testoutput11-32 @@ -646,4 +646,25 @@ Subject length lower bound = 1 /(*THEN:\[A]{65501})/expand +# We can use pcre2test's utf8_input modifier to create wide pattern characters, +# even though this test is run when UTF is not supported. + +/abz/utf8_input + abz + 0: ab\x{7fffffff}z + ab\x{7fffffff}z + 0: ab\x{7fffffff}z + +/abz/utf8_input + abz + 0: ab\x{ffffffff}z + ab\x{ffffffff}z + 0: ab\x{ffffffff}z + +/abAz/utf8_input + abAz + 0: ab\x{80000041}z + ab\x{80000041}z + 0: ab\x{80000041}z + # End of testinput11 diff --git a/testdata/testoutput12-16 b/testdata/testoutput12-16 index 383a032..52dbe74 100644 --- a/testdata/testoutput12-16 +++ b/testdata/testoutput12-16 @@ -557,7 +557,7 @@ Subject length lower bound = 1 0: \x{11234} /(*UTF-32)\x{11234}/ -Failed: error 134 at offset 17: character code point value in \x{} or \o{} is too large +Failed: error 160 at offset 5: (*VERB) not recognized or malformed abcd\x{11234}pqr /(*UTF-32)\x{112}/ @@ -1367,4 +1367,108 @@ Subject length lower bound = 2 \x{110000} ** Failed: character \x{110000} is greater than 0x10ffff and so cannot be converted to UTF-16 +/(*UTF)abz/B +------------------------------------------------------------------ + Bra + ab\x{fd}\x{bf}\x{bf}\x{bf}\x{bf}\x{bf}z + Ket + End +------------------------------------------------------------------ + +/abz/utf +** Failed: character value greater than 0x10ffff cannot be converted to UTF + +/[\W\p{Any}]/B +------------------------------------------------------------------ + Bra + [\x00-/:-@[-^`{-\xff\p{Any}\x{100}-\x{ffff}] + Ket + End +------------------------------------------------------------------ + abc + 0: a + 123 + 0: 1 + +/[\W\pL]/B +------------------------------------------------------------------ + Bra + [\x00-/:-@[-^`{-\xff\p{L}\x{100}-\x{ffff}] + Ket + End +------------------------------------------------------------------ + abc + 0: a + \x{100} + 0: \x{100} + \x{308} + 0: \x{308} +\= Expect no match + 123 +No match + +/[\s[:^ascii:]]/B,ucp +------------------------------------------------------------------ + Bra + [\x80-\xff\p{Xsp}\x{100}-\x{ffff}] + Ket + End +------------------------------------------------------------------ + +/\pP/ucp + \x{7fffffff} +** Character \x{7fffffff} is greater than 0xffff and UTF-16 mode is not enabled. +** Truncation will probably give the wrong result. +No match + +# A special extra option allows excaped surrogate code points in 32-bit mode, +# but subjects containing them must not be UTF-checked. These patterns give +# errors in 16-bit mode. + +/\x{d800}/I,utf,allow_surrogate_escapes +Failed: error 191 at offset 0: PCRE2_EXTRA_ALLOW_SURROGATE_ESCAPES is not allowed in UTF-16 mode + \x{d800}\=no_utf_check + +/\udfff\o{157401}/utf,alt_bsux,allow_surrogate_escapes +Failed: error 191 at offset 0: PCRE2_EXTRA_ALLOW_SURROGATE_ESCAPES is not allowed in UTF-16 mode + \x{dfff}\x{df01}\=no_utf_check + +# This has different starting code units in 8-bit mode. + +/^[^ab]/IB,utf +------------------------------------------------------------------ + Bra + ^ + [\x00-`c-\xff] (neg) + Ket + End +------------------------------------------------------------------ +Capturing subpattern count = 0 +Compile options: utf +Overall options: anchored utf +Starting code units: \x00 \x01 \x02 \x03 \x04 \x05 \x06 \x07 \x08 \x09 \x0a + \x0b \x0c \x0d \x0e \x0f \x10 \x11 \x12 \x13 \x14 \x15 \x16 \x17 \x18 \x19 + \x1a \x1b \x1c \x1d \x1e \x1f \x20 ! " # $ % & ' ( ) * + , - . / 0 1 2 3 4 + 5 6 7 8 9 : ; < = > ? @ A B C D E F G H I J K L M N O P Q R S T U V W X Y + Z [ \ ] ^ _ ` c d e f g h i j k l m n o p q r s t u v w x y z { | } ~ \x7f + \x80 \x81 \x82 \x83 \x84 \x85 \x86 \x87 \x88 \x89 \x8a \x8b \x8c \x8d \x8e + \x8f \x90 \x91 \x92 \x93 \x94 \x95 \x96 \x97 \x98 \x99 \x9a \x9b \x9c \x9d + \x9e \x9f \xa0 \xa1 \xa2 \xa3 \xa4 \xa5 \xa6 \xa7 \xa8 \xa9 \xaa \xab \xac + \xad \xae \xaf \xb0 \xb1 \xb2 \xb3 \xb4 \xb5 \xb6 \xb7 \xb8 \xb9 \xba \xbb + \xbc \xbd \xbe \xbf \xc0 \xc1 \xc2 \xc3 \xc4 \xc5 \xc6 \xc7 \xc8 \xc9 \xca + \xcb \xcc \xcd \xce \xcf \xd0 \xd1 \xd2 \xd3 \xd4 \xd5 \xd6 \xd7 \xd8 \xd9 + \xda \xdb \xdc \xdd \xde \xdf \xe0 \xe1 \xe2 \xe3 \xe4 \xe5 \xe6 \xe7 \xe8 + \xe9 \xea \xeb \xec \xed \xee \xef \xf0 \xf1 \xf2 \xf3 \xf4 \xf5 \xf6 \xf7 + \xf8 \xf9 \xfa \xfb \xfc \xfd \xfe \xff +Subject length lower bound = 1 + c + 0: c + \x{ff} + 0: \x{ff} + \x{100} + 0: \x{100} +\= Expect no match + aaa +No match + # End of testinput12 diff --git a/testdata/testoutput12-32 b/testdata/testoutput12-32 index 95f1834..38ff92d 100644 --- a/testdata/testoutput12-32 +++ b/testdata/testoutput12-32 @@ -1361,4 +1361,111 @@ Subject length lower bound = 2 \x{110000} Failed: error -28: UTF-32 error: code points greater than 0x10ffff are not defined at offset 0 +/(*UTF)abz/B +------------------------------------------------------------------ + Bra + ab\x{fd}\x{bf}\x{bf}\x{bf}\x{bf}\x{bf}z + Ket + End +------------------------------------------------------------------ + +/abz/utf +** Failed: character value greater than 0x10ffff cannot be converted to UTF + +/[\W\p{Any}]/B +------------------------------------------------------------------ + Bra + [\x00-/:-@[-^`{-\xff\p{Any}\x{100}-\x{ffffffff}] + Ket + End +------------------------------------------------------------------ + abc + 0: a + 123 + 0: 1 + +/[\W\pL]/B +------------------------------------------------------------------ + Bra + [\x00-/:-@[-^`{-\xff\p{L}\x{100}-\x{ffffffff}] + Ket + End +------------------------------------------------------------------ + abc + 0: a + \x{100} + 0: \x{100} + \x{308} + 0: \x{308} +\= Expect no match + 123 +No match + +/[\s[:^ascii:]]/B,ucp +------------------------------------------------------------------ + Bra + [\x80-\xff\p{Xsp}\x{100}-\x{ffffffff}] + Ket + End +------------------------------------------------------------------ + +/\pP/ucp + \x{7fffffff} +No match + +# A special extra option allows excaped surrogate code points in 32-bit mode, +# but subjects containing them must not be UTF-checked. These patterns give +# errors in 16-bit mode. + +/\x{d800}/I,utf,allow_surrogate_escapes +Capturing subpattern count = 0 +Options: utf +Extra options: allow_surrogate_escapes +First code unit = \x{d800} +Subject length lower bound = 1 + \x{d800}\=no_utf_check + 0: \x{d800} + +/\udfff\o{157401}/utf,alt_bsux,allow_surrogate_escapes + \x{dfff}\x{df01}\=no_utf_check + 0: \x{dfff}\x{df01} + +# This has different starting code units in 8-bit mode. + +/^[^ab]/IB,utf +------------------------------------------------------------------ + Bra + ^ + [\x00-`c-\xff] (neg) + Ket + End +------------------------------------------------------------------ +Capturing subpattern count = 0 +Compile options: utf +Overall options: anchored utf +Starting code units: \x00 \x01 \x02 \x03 \x04 \x05 \x06 \x07 \x08 \x09 \x0a + \x0b \x0c \x0d \x0e \x0f \x10 \x11 \x12 \x13 \x14 \x15 \x16 \x17 \x18 \x19 + \x1a \x1b \x1c \x1d \x1e \x1f \x20 ! " # $ % & ' ( ) * + , - . / 0 1 2 3 4 + 5 6 7 8 9 : ; < = > ? @ A B C D E F G H I J K L M N O P Q R S T U V W X Y + Z [ \ ] ^ _ ` c d e f g h i j k l m n o p q r s t u v w x y z { | } ~ \x7f + \x80 \x81 \x82 \x83 \x84 \x85 \x86 \x87 \x88 \x89 \x8a \x8b \x8c \x8d \x8e + \x8f \x90 \x91 \x92 \x93 \x94 \x95 \x96 \x97 \x98 \x99 \x9a \x9b \x9c \x9d + \x9e \x9f \xa0 \xa1 \xa2 \xa3 \xa4 \xa5 \xa6 \xa7 \xa8 \xa9 \xaa \xab \xac + \xad \xae \xaf \xb0 \xb1 \xb2 \xb3 \xb4 \xb5 \xb6 \xb7 \xb8 \xb9 \xba \xbb + \xbc \xbd \xbe \xbf \xc0 \xc1 \xc2 \xc3 \xc4 \xc5 \xc6 \xc7 \xc8 \xc9 \xca + \xcb \xcc \xcd \xce \xcf \xd0 \xd1 \xd2 \xd3 \xd4 \xd5 \xd6 \xd7 \xd8 \xd9 + \xda \xdb \xdc \xdd \xde \xdf \xe0 \xe1 \xe2 \xe3 \xe4 \xe5 \xe6 \xe7 \xe8 + \xe9 \xea \xeb \xec \xed \xee \xef \xf0 \xf1 \xf2 \xf3 \xf4 \xf5 \xf6 \xf7 + \xf8 \xf9 \xfa \xfb \xfc \xfd \xfe \xff +Subject length lower bound = 1 + c + 0: c + \x{ff} + 0: \x{ff} + \x{100} + 0: \x{100} +\= Expect no match + aaa +No match + # End of testinput12 diff --git a/testdata/testoutput15 b/testdata/testoutput15 index bb29a49..b2068d0 100644 --- a/testdata/testoutput15 +++ b/testdata/testoutput15 @@ -12,13 +12,15 @@ Starting code units: a z Last code unit = 'z' Subject length lower bound = 2 aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaazzbbbbbb\=find_limits -Minimum match limit = 8 -Minimum recursion limit = 6 +Minimum heap limit = 0 +Minimum match limit = 7 +Minimum depth limit = 7 0: aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaazz 1: aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa aaaaaaaaaaaaaz\=find_limits -Minimum match limit = 32768 -Minimum recursion limit = 29 +Minimum heap limit = 0 +Minimum match limit = 20481 +Minimum depth limit = 30 No match !((?:\s|//.*\\n|/[*](?:\\n|.)*?[*]/)*)!I @@ -26,61 +28,71 @@ Capturing subpattern count = 1 May match empty string Subject length lower bound = 0 /* this is a C style comment */\=find_limits -Minimum match limit = 120 -Minimum recursion limit = 6 +Minimum heap limit = 0 +Minimum match limit = 64 +Minimum depth limit = 7 0: /* this is a C style comment */ 1: /* this is a C style comment */ /^(?>a)++/ aa\=find_limits +Minimum heap limit = 0 Minimum match limit = 5 -Minimum recursion limit = 2 +Minimum depth limit = 3 0: aa aaaaaaaaa\=find_limits +Minimum heap limit = 0 Minimum match limit = 12 -Minimum recursion limit = 2 +Minimum depth limit = 3 0: aaaaaaaaa /(a)(?1)++/ aa\=find_limits +Minimum heap limit = 0 Minimum match limit = 7 -Minimum recursion limit = 4 +Minimum depth limit = 5 0: aa 1: a aaaaaaaaa\=find_limits +Minimum heap limit = 0 Minimum match limit = 21 -Minimum recursion limit = 4 +Minimum depth limit = 5 0: aaaaaaaaa 1: a /a(?:.)*?a/ims abbbbbbbbbbbbbbbbbbbbba\=find_limits -Minimum match limit = 65 -Minimum recursion limit = 2 +Minimum heap limit = 0 +Minimum match limit = 24 +Minimum depth limit = 3 0: abbbbbbbbbbbbbbbbbbbbba /a(?:.(*THEN))*?a/ims abbbbbbbbbbbbbbbbbbbbba\=find_limits -Minimum match limit = 86 -Minimum recursion limit = 45 +Minimum heap limit = 0 +Minimum match limit = 66 +Minimum depth limit = 45 0: abbbbbbbbbbbbbbbbbbbbba /a(?:.(*THEN:ABC))*?a/ims abbbbbbbbbbbbbbbbbbbbba\=find_limits -Minimum match limit = 86 -Minimum recursion limit = 45 +Minimum heap limit = 0 +Minimum match limit = 66 +Minimum depth limit = 45 0: abbbbbbbbbbbbbbbbbbbbba /^(?>a+)(?>b+)(?>c+)(?>d+)(?>e+)/ aabbccddee\=find_limits +Minimum heap limit = 0 Minimum match limit = 7 -Minimum recursion limit = 2 +Minimum depth limit = 7 0: aabbccddee /^(?>(a+))(?>(b+))(?>(c+))(?>(d+))(?>(e+))/ aabbccddee\=find_limits -Minimum match limit = 17 -Minimum recursion limit = 16 +Minimum heap limit = 0 +Minimum match limit = 12 +Minimum depth limit = 12 0: aabbccddee 1: aa 2: bb @@ -90,8 +102,9 @@ Minimum recursion limit = 16 /^(?>(a+))(?>b+)(?>(c+))(?>d+)(?>(e+))/ aabbccddee\=find_limits -Minimum match limit = 13 -Minimum recursion limit = 10 +Minimum heap limit = 0 +Minimum match limit = 10 +Minimum depth limit = 10 0: aabbccddee 1: aa 2: cc @@ -103,9 +116,9 @@ Failed: error 160 at offset 17: (*VERB) not recognized or malformed /(*LIMIT_MATCH=4294967290)abc/ Failed: error 160 at offset 24: (*VERB) not recognized or malformed -/(*LIMIT_RECURSION=4294967280)abc/I +/(*LIMIT_DEPTH=4294967280)abc/I Capturing subpattern count = 0 -Recursion limit = 4294967280 +Depth limit = 4294967280 First code unit = 'a' Last code unit = 'c' Subject length lower bound = 3 @@ -117,8 +130,8 @@ No match Failed: error -47: match limit exceeded /(a+)*zz/ - aaaaaaaaaaaaaz\=recursion_limit=10 -Failed: error -53: recursion limit exceeded + aaaaaaaaaaaaaz\=depth_limit=10 +Failed: error -53: matching depth limit exceeded /(*LIMIT_MATCH=3000)(a+)*zz/I Capturing subpattern count = 1 @@ -151,36 +164,36 @@ No match aaaaaaaaaaaaaz\=match_limit=3000 Failed: error -47: match limit exceeded -/(*LIMIT_RECURSION=10)(a+)*zz/I +/(*LIMIT_DEPTH=10)(a+)*zz/I Capturing subpattern count = 1 -Recursion limit = 10 +Depth limit = 10 Starting code units: a z Last code unit = 'z' Subject length lower bound = 2 aaaaaaaaaaaaaz -Failed: error -53: recursion limit exceeded - aaaaaaaaaaaaaz\=recursion_limit=1000 -Failed: error -53: recursion limit exceeded +Failed: error -53: matching depth limit exceeded + aaaaaaaaaaaaaz\=depth_limit=1000 +Failed: error -53: matching depth limit exceeded -/(*LIMIT_RECURSION=10)(*LIMIT_RECURSION=1000)(a+)*zz/I +/(*LIMIT_DEPTH=10)(*LIMIT_DEPTH=1000)(a+)*zz/I Capturing subpattern count = 1 -Recursion limit = 1000 +Depth limit = 1000 Starting code units: a z Last code unit = 'z' Subject length lower bound = 2 aaaaaaaaaaaaaz No match -/(*LIMIT_RECURSION=1000)(a+)*zz/I +/(*LIMIT_DEPTH=1000)(a+)*zz/I Capturing subpattern count = 1 -Recursion limit = 1000 +Depth limit = 1000 Starting code units: a z Last code unit = 'z' Subject length lower bound = 2 aaaaaaaaaaaaaz No match - aaaaaaaaaaaaaz\=recursion_limit=10 -Failed: error -53: recursion limit exceeded + aaaaaaaaaaaaaz\=depth_limit=10 +Failed: error -53: matching depth limit exceeded # These three have infinitely nested recursions. @@ -188,7 +201,7 @@ Failed: error -53: recursion limit exceeded abc Failed: error -52: nested recursion at the same subject position -/((?(R2)a+|(?1)b))/ +/((?(R2)a+|(?1)b))()/ aaaabcde Failed: error -52: nested recursion at the same subject position @@ -348,12 +361,12 @@ Starting code units: 0 1 2 3 4 5 6 7 8 9 A B C D E F G H I J K L M N O P Subject length lower bound = 1 abc\=callout_fail=1 --->abc - 1 ^ ^ - 1 ^ ^ - 1 ^^ - 1 ^ ^ - 1 ^^ - 1 ^^ + 1 ^ ^ End of pattern + 1 ^ ^ End of pattern + 1 ^^ End of pattern + 1 ^ ^ End of pattern + 1 ^^ End of pattern + 1 ^^ End of pattern No match /(*NO_AUTO_POSSESS)\w+(?C1)/BI @@ -372,12 +385,23 @@ Starting code units: 0 1 2 3 4 5 6 7 8 9 A B C D E F G H I J K L M N O P Subject length lower bound = 1 abc\=callout_fail=1 --->abc - 1 ^ ^ - 1 ^ ^ - 1 ^^ - 1 ^ ^ - 1 ^^ - 1 ^^ + 1 ^ ^ End of pattern + 1 ^ ^ End of pattern + 1 ^^ End of pattern + 1 ^ ^ End of pattern + 1 ^^ End of pattern + 1 ^^ End of pattern No match +# This test breaks the JIT stack limit + +/(|]+){2,2452}/ + (|]+){2,2452} + 0: + 1: + +/(*LIMIT_HEAP=21)\[(a)]{60}/expand + \[a]{60} +Failed: error -63: heap limit exceeded + # End of testinput15 diff --git a/testdata/testoutput17 b/testdata/testoutput17 index f560f1b..a0606a7 100644 --- a/testdata/testoutput17 +++ b/testdata/testoutput17 @@ -335,7 +335,7 @@ Failed: error -47: match limit exceeded abc Failed: error -46: JIT stack limit reached -/((?(R2)a+|(?1)b))/ +/((?(R2)a+|(?1)b))()/ aaaabcde Failed: error -46: JIT stack limit reached @@ -368,6 +368,7 @@ No match Capturing subpattern count = 0 Compile options: <none> Overall options: anchored +First code unit = 'a' Subject length lower bound = 6 JIT compilation was successful #pop jitverify @@ -379,6 +380,7 @@ JIT compilation was successful Capturing subpattern count = 0 Compile options: <none> Overall options: anchored +First code unit = 'a' Subject length lower bound = 6 JIT compilation was successful #save testsaved1 diff --git a/testdata/testoutput18 b/testdata/testoutput18 index 294b121..d51423d 100644 --- a/testdata/testoutput18 +++ b/testdata/testoutput18 @@ -5,7 +5,7 @@ #forbid_utf #pattern posix -# Test invalid options +# Test some invalid options /abc/auto_callout ** Ignored with POSIX interface: auto_callout @@ -19,6 +19,12 @@ abc\=partial_hard ** Ignored with POSIX interface: partial_hard 0: abc + +/a(())bc/parens_nest_limit=1 +** Ignored with POSIX interface: parens_nest_limit + +/abc/allow_surrogate_escapes,max_pattern_length=2 +** Ignored with POSIX interface: allow_surrogate_escapes max_pattern_length # Real tests @@ -139,13 +145,13 @@ No match: POSIX code 17: match failed 0+ issippi /abc/\ -Failed: POSIX code 9: bad escape sequence at offset 3 +Failed: POSIX code 9: bad escape sequence at offset 4 "(?(?C)" Failed: POSIX code 11: unbalanced () at offset 6 "(?(?C))" -Failed: POSIX code 3: pattern error at offset 2 +Failed: POSIX code 3: pattern error at offset 6 /abcd/substitute_extended ** Ignored with POSIX interface: substitute_extended @@ -157,4 +163,47 @@ Failed: POSIX code 4: ? * + invalid at offset 100000 /\[A]{1000000}**/expand,regerror_buffsize=32 Failed: POSIX code 4: ? * + invalid at offset 1000001 +//posix_nosub + \=offset=70000 +** Ignored with POSIX interface: offset +Matched with REG_NOSUB + +/(?=(a\K))/ + a +Start of matched string is beyond its end - displaying from end to start. + 0: a + 1: a + +/^d(e)$/posix + acdef\=posix_startend=2:4 + 0: de + 1: e + acde\=posix_startend=2 + 0: de + 1: e +\= Expect no match + acdef +No match: POSIX code 17: match failed + acdef\=posix_startend=2 +No match: POSIX code 17: match failed + +/^a\x{00}b$/posix + a\x{00}b\=posix_startend=0:3 + 0: a\x00b + +/"A" 00 "B"/hex + A\x{00}B\=posix_startend=0:3 + 0: A\x00B + +/ABC/use_length + ABC + 0: ABC + +/a\b(c/literal,posix + a\\b(c + 0: a\b(c + +/a\b(c/literal,posix,dotall +Failed: POSIX code 16: bad argument at offset 0 + # End of testdata/testinput18 diff --git a/testdata/testoutput19 b/testdata/testoutput19 index c4169ca..a4a8b1a 100644 --- a/testdata/testoutput19 +++ b/testdata/testoutput19 @@ -18,4 +18,8 @@ No match: POSIX code 17: match failed +++\x{c2} 0: \xc2 +/"^AB" 00 "\x{1234}$"/hex,utf + AB\x{00}\x{1234}\=posix_startend=0:6 + 0: AB\x{00}\x{1234} + # End of testdata/testinput19 diff --git a/testdata/testoutput2 b/testdata/testoutput2 index 01cb193..fcaac8f 100644 --- a/testdata/testoutput2 +++ b/testdata/testoutput2 @@ -1,12 +1,12 @@ # This set of tests is not Perl-compatible. It checks on special features # of PCRE2's API, error diagnostics, and the compiled code of some patterns. -# It also checks the non-Perl syntax that PCRE2 supports (Python, .NET, -# Oniguruma). There are also some tests where PCRE2 and Perl differ, -# either because PCRE2 can't be compatible, or there is a possible Perl +# It also checks the non-Perl syntax that PCRE2 supports (Python, .NET, +# Oniguruma). There are also some tests where PCRE2 and Perl differ, +# either because PCRE2 can't be compatible, or there is a possible Perl # bug. # NOTE: This is a non-UTF set of tests. When UTF support is needed, use -# test 5. +# test 5. #forbid_utf #newline_default lf any anycrlf @@ -72,6 +72,7 @@ No match Capturing subpattern count = 0 Compile options: <none> Overall options: anchored +First code unit = 'a' Subject length lower bound = 3 abc 0: abc @@ -110,6 +111,7 @@ Subject length lower bound = 2 Capturing subpattern count = 0 Compile options: <none> Overall options: anchored +First code unit = 'a' Subject length lower bound = 3 abc 0: abc @@ -339,6 +341,7 @@ Subject length lower bound = 19 /the quick brown fox/I,anchored Capturing subpattern count = 0 Options: anchored +First code unit = 't' Subject length lower bound = 19 the quick brown fox 0: the quick brown fox @@ -351,6 +354,7 @@ Failed: error 111 at offset 4: unrecognized character after (? or (?- /^abc|def/I Capturing subpattern count = 0 +Starting code units: a d Subject length lower bound = 3 abcdef 0: abc @@ -472,14 +476,13 @@ No match and cattlefoo No match -/(?<=a+)b/ -Failed: error 125 at offset 6: lookbehind assertion is not fixed length +/abc(?<=a+)b/ +Failed: error 125 at offset 3: lookbehind assertion is not fixed length -/(?<=aaa|b{0,3})b/ -Failed: error 125 at offset 14: lookbehind assertion is not fixed length +/12345(?<=aaa|b{0,3})b/ +Failed: error 125 at offset 5: lookbehind assertion is not fixed length /(?<!(foo)a\1)bar/ -Failed: error 125 at offset 12: lookbehind assertion is not fixed length /(?i)abc/I Capturing subpattern count = 0 @@ -496,12 +499,14 @@ Subject length lower bound = 1 Capturing subpattern count = 0 Compile options: <none> Overall options: anchored +First code unit = '1' Subject length lower bound = 4 /(^b|(?i)^d)/I Capturing subpattern count = 1 Compile options: <none> Overall options: anchored +Starting code units: D b d Subject length lower bound = 1 /(?s).*/I @@ -539,25 +544,25 @@ First code unit at start or follows newline Subject length lower bound = 1 /(a)(?(1)a|b|c)/ -Failed: error 127 at offset 13: conditional group contains more than two branches +Failed: error 127 at offset 3: conditional group contains more than two branches /(?(?=a)a|b|c)/ -Failed: error 127 at offset 12: conditional group contains more than two branches +Failed: error 127 at offset 0: conditional group contains more than two branches /(?(1a)/ -Failed: error 114 at offset 6: missing closing parenthesis +Failed: error 124 at offset 4: missing closing parenthesis for condition /(?(1a))/ -Failed: error 126 at offset 4: malformed number or name after (?( +Failed: error 124 at offset 4: missing closing parenthesis for condition /(?(?i))/ -Failed: error 128 at offset 3: assertion expected after (?( or (?(?C) +Failed: error 128 at offset 2: assertion expected after (?( or (?(?C) /(?(abc))/ -Failed: error 115 at offset 7: reference to non-existent subpattern +Failed: error 115 at offset 3: reference to non-existent subpattern /(?(?<ab))/ -Failed: error 128 at offset 3: assertion expected after (?( or (?(?C) +Failed: error 128 at offset 2: assertion expected after (?( or (?(?C) /((?s)blah)\s+\1/I Capturing subpattern count = 1 @@ -625,6 +630,7 @@ Capturing subpattern count = 0 Max lookbehind = 1 Compile options: multiline Overall options: anchored multiline +First code unit = 'a' Subject length lower bound = 3 /^abc/Im @@ -638,6 +644,7 @@ Subject length lower bound = 3 Capturing subpattern count = 5 Compile options: <none> Overall options: anchored +First code unit = 'a' Subject length lower bound = 3 aaaaabbbbbcccccdef 0: aaaaabbbbbcccccdef @@ -715,13 +722,13 @@ Subject length lower bound = 3 No match /(?<=ab(c+)d)ef/ -Failed: error 125 at offset 11: lookbehind assertion is not fixed length +Failed: error 125 at offset 0: lookbehind assertion is not fixed length /(?<=ab(?<=c+)d)ef/ -Failed: error 125 at offset 12: lookbehind assertion is not fixed length +Failed: error 125 at offset 6: lookbehind assertion is not fixed length /(?<=ab(c|de)f)g/ -Failed: error 125 at offset 13: lookbehind assertion is not fixed length +Failed: error 125 at offset 0: lookbehind assertion is not fixed length /The next three are in testinput2 because they have variable length branches/ @@ -809,13 +816,14 @@ Capturing subpattern count = 1 Max back reference = 1 Compile options: <none> Overall options: anchored +Starting code units: a Subject length lower bound = 4 \= Expect no match aaaa No match aaaaaa No match - + # Perl does not fail these two for the final subjects. Neither did PCRE until # release 8.01. The problem is in backtracking into a subpattern that contains # a recursive reference to itself. PCRE has now made these into atomic patterns. @@ -905,7 +913,7 @@ Failed: error 122 at offset 0: unmatched closing parenthesis Failed: error 114 at offset 4: missing closing parenthesis /(?<%)b/ -Failed: error 124 at offset 3: letter or underscore expected after (?< or (?' +Failed: error 162 at offset 3: subpattern name expected /a(?{)b/ Failed: error 111 at offset 3: unrecognized character after (? or (?- @@ -923,22 +931,22 @@ Failed: error 111 at offset 3: unrecognized character after (? or (?- Failed: error 111 at offset 3: unrecognized character after (? or (?- /(?(1?)a|b)/ -Failed: error 126 at offset 4: malformed number or name after (?( +Failed: error 124 at offset 4: missing closing parenthesis for condition /[a[:xyz:/ Failed: error 106 at offset 8: missing terminating ] for character class /(?<=x+)y/ -Failed: error 125 at offset 6: lookbehind assertion is not fixed length +Failed: error 125 at offset 0: lookbehind assertion is not fixed length /a{37,17}/ Failed: error 104 at offset 7: numbers out of order in {} quantifier /abc/\ -Failed: error 101 at offset 3: \ at end of pattern +Failed: error 101 at offset 4: \ at end of pattern /abc/\i -Failed: error 101 at offset 3: \ at end of pattern +Failed: error 101 at offset 4: \ at end of pattern /(a)bc(d)/I Capturing subpattern count = 2 @@ -1005,6 +1013,7 @@ Subject length lower bound = 16 Capturing subpattern count = 3 Compile options: <none> Overall options: anchored +Starting code units: a b Subject length lower bound = 4 adef\=get=1,get=2,get=3,get=4,getall 0: adef @@ -1043,6 +1052,7 @@ Get substring 4 failed (-49): unknown substring Capturing subpattern count = 0 Compile options: <none> Overall options: anchored +First code unit = 'a' Subject length lower bound = 7 abc\00def\=copy=0,getall 0: abc\x00def @@ -1228,6 +1238,7 @@ Subject length lower bound = 3 Capturing subpattern count = 0 Compile options: <none> Overall options: anchored +First code unit = 'i' Subject length lower bound = 3 ississippi 0: iss @@ -1287,6 +1298,7 @@ Capturing subpattern count = 0 Contains explicit CR or LF match Compile options: <none> Overall options: anchored +First code unit = 'a' Subject length lower bound = 3 ab\nab\ncd 0: ab\x0a @@ -1777,6 +1789,8 @@ Subject length lower bound = 2 Capturing subpattern count = 0 Compile options: <none> Overall options: anchored +Starting code units: 0 1 2 3 4 5 6 7 8 9 A B C D E F G H I J K L M N O P + Q R S T U V W X Y Z a b c d e f g h i j k l m n o p q r s t u v w x y z Subject length lower bound = 1 /^[[:^alnum:]]/IB @@ -1790,6 +1804,18 @@ Subject length lower bound = 1 Capturing subpattern count = 0 Compile options: <none> Overall options: anchored +Starting code units: \x00 \x01 \x02 \x03 \x04 \x05 \x06 \x07 \x08 \x09 \x0a + \x0b \x0c \x0d \x0e \x0f \x10 \x11 \x12 \x13 \x14 \x15 \x16 \x17 \x18 \x19 + \x1a \x1b \x1c \x1d \x1e \x1f \x20 ! " # $ % & ' ( ) * + , - . / : ; < = > + ? @ [ \ ] ^ _ ` { | } ~ \x7f \x80 \x81 \x82 \x83 \x84 \x85 \x86 \x87 \x88 + \x89 \x8a \x8b \x8c \x8d \x8e \x8f \x90 \x91 \x92 \x93 \x94 \x95 \x96 \x97 + \x98 \x99 \x9a \x9b \x9c \x9d \x9e \x9f \xa0 \xa1 \xa2 \xa3 \xa4 \xa5 \xa6 + \xa7 \xa8 \xa9 \xaa \xab \xac \xad \xae \xaf \xb0 \xb1 \xb2 \xb3 \xb4 \xb5 + \xb6 \xb7 \xb8 \xb9 \xba \xbb \xbc \xbd \xbe \xbf \xc0 \xc1 \xc2 \xc3 \xc4 + \xc5 \xc6 \xc7 \xc8 \xc9 \xca \xcb \xcc \xcd \xce \xcf \xd0 \xd1 \xd2 \xd3 + \xd4 \xd5 \xd6 \xd7 \xd8 \xd9 \xda \xdb \xdc \xdd \xde \xdf \xe0 \xe1 \xe2 + \xe3 \xe4 \xe5 \xe6 \xe7 \xe8 \xe9 \xea \xeb \xec \xed \xee \xef \xf0 \xf1 + \xf2 \xf3 \xf4 \xf5 \xf6 \xf7 \xf8 \xf9 \xfa \xfb \xfc \xfd \xfe \xff Subject length lower bound = 1 /^[[:alpha:]]/IB @@ -1803,6 +1829,8 @@ Subject length lower bound = 1 Capturing subpattern count = 0 Compile options: <none> Overall options: anchored +Starting code units: A B C D E F G H I J K L M N O P Q R S T U V W X Y Z + a b c d e f g h i j k l m n o p q r s t u v w x y z Subject length lower bound = 1 /^[[:^alpha:]]/IB @@ -1816,6 +1844,19 @@ Subject length lower bound = 1 Capturing subpattern count = 0 Compile options: <none> Overall options: anchored +Starting code units: \x00 \x01 \x02 \x03 \x04 \x05 \x06 \x07 \x08 \x09 \x0a + \x0b \x0c \x0d \x0e \x0f \x10 \x11 \x12 \x13 \x14 \x15 \x16 \x17 \x18 \x19 + \x1a \x1b \x1c \x1d \x1e \x1f \x20 ! " # $ % & ' ( ) * + , - . / 0 1 2 3 4 + 5 6 7 8 9 : ; < = > ? @ [ \ ] ^ _ ` { | } ~ \x7f \x80 \x81 \x82 \x83 \x84 + \x85 \x86 \x87 \x88 \x89 \x8a \x8b \x8c \x8d \x8e \x8f \x90 \x91 \x92 \x93 + \x94 \x95 \x96 \x97 \x98 \x99 \x9a \x9b \x9c \x9d \x9e \x9f \xa0 \xa1 \xa2 + \xa3 \xa4 \xa5 \xa6 \xa7 \xa8 \xa9 \xaa \xab \xac \xad \xae \xaf \xb0 \xb1 + \xb2 \xb3 \xb4 \xb5 \xb6 \xb7 \xb8 \xb9 \xba \xbb \xbc \xbd \xbe \xbf \xc0 + \xc1 \xc2 \xc3 \xc4 \xc5 \xc6 \xc7 \xc8 \xc9 \xca \xcb \xcc \xcd \xce \xcf + \xd0 \xd1 \xd2 \xd3 \xd4 \xd5 \xd6 \xd7 \xd8 \xd9 \xda \xdb \xdc \xdd \xde + \xdf \xe0 \xe1 \xe2 \xe3 \xe4 \xe5 \xe6 \xe7 \xe8 \xe9 \xea \xeb \xec \xed + \xee \xef \xf0 \xf1 \xf2 \xf3 \xf4 \xf5 \xf6 \xf7 \xf8 \xf9 \xfa \xfb \xfc + \xfd \xfe \xff Subject length lower bound = 1 /[_[:alpha:]]/I @@ -1835,6 +1876,12 @@ Subject length lower bound = 1 Capturing subpattern count = 0 Compile options: <none> Overall options: anchored +Starting code units: \x00 \x01 \x02 \x03 \x04 \x05 \x06 \x07 \x08 \x09 \x0a + \x0b \x0c \x0d \x0e \x0f \x10 \x11 \x12 \x13 \x14 \x15 \x16 \x17 \x18 \x19 + \x1a \x1b \x1c \x1d \x1e \x1f \x20 ! " # $ % & ' ( ) * + , - . / 0 1 2 3 4 + 5 6 7 8 9 : ; < = > ? @ A B C D E F G H I J K L M N O P Q R S T U V W X Y + Z [ \ ] ^ _ ` a b c d e f g h i j k l m n o p q r s t u v w x y z { | } ~ + \x7f Subject length lower bound = 1 /^[[:^ascii:]]/IB @@ -1848,6 +1895,15 @@ Subject length lower bound = 1 Capturing subpattern count = 0 Compile options: <none> Overall options: anchored +Starting code units: \x80 \x81 \x82 \x83 \x84 \x85 \x86 \x87 \x88 \x89 \x8a + \x8b \x8c \x8d \x8e \x8f \x90 \x91 \x92 \x93 \x94 \x95 \x96 \x97 \x98 \x99 + \x9a \x9b \x9c \x9d \x9e \x9f \xa0 \xa1 \xa2 \xa3 \xa4 \xa5 \xa6 \xa7 \xa8 + \xa9 \xaa \xab \xac \xad \xae \xaf \xb0 \xb1 \xb2 \xb3 \xb4 \xb5 \xb6 \xb7 + \xb8 \xb9 \xba \xbb \xbc \xbd \xbe \xbf \xc0 \xc1 \xc2 \xc3 \xc4 \xc5 \xc6 + \xc7 \xc8 \xc9 \xca \xcb \xcc \xcd \xce \xcf \xd0 \xd1 \xd2 \xd3 \xd4 \xd5 + \xd6 \xd7 \xd8 \xd9 \xda \xdb \xdc \xdd \xde \xdf \xe0 \xe1 \xe2 \xe3 \xe4 + \xe5 \xe6 \xe7 \xe8 \xe9 \xea \xeb \xec \xed \xee \xef \xf0 \xf1 \xf2 \xf3 + \xf4 \xf5 \xf6 \xf7 \xf8 \xf9 \xfa \xfb \xfc \xfd \xfe \xff Subject length lower bound = 1 /^[[:blank:]]/IB @@ -1861,6 +1917,7 @@ Subject length lower bound = 1 Capturing subpattern count = 0 Compile options: <none> Overall options: anchored +Starting code units: \x09 \x20 Subject length lower bound = 1 /^[[:^blank:]]/IB @@ -1874,6 +1931,20 @@ Subject length lower bound = 1 Capturing subpattern count = 0 Compile options: <none> Overall options: anchored +Starting code units: \x00 \x01 \x02 \x03 \x04 \x05 \x06 \x07 \x08 \x0a \x0b + \x0c \x0d \x0e \x0f \x10 \x11 \x12 \x13 \x14 \x15 \x16 \x17 \x18 \x19 \x1a + \x1b \x1c \x1d \x1e \x1f ! " # $ % & ' ( ) * + , - . / 0 1 2 3 4 5 6 7 8 9 + : ; < = > ? @ A B C D E F G H I J K L M N O P Q R S T U V W X Y Z [ \ ] ^ + _ ` a b c d e f g h i j k l m n o p q r s t u v w x y z { | } ~ \x7f \x80 + \x81 \x82 \x83 \x84 \x85 \x86 \x87 \x88 \x89 \x8a \x8b \x8c \x8d \x8e \x8f + \x90 \x91 \x92 \x93 \x94 \x95 \x96 \x97 \x98 \x99 \x9a \x9b \x9c \x9d \x9e + \x9f \xa0 \xa1 \xa2 \xa3 \xa4 \xa5 \xa6 \xa7 \xa8 \xa9 \xaa \xab \xac \xad + \xae \xaf \xb0 \xb1 \xb2 \xb3 \xb4 \xb5 \xb6 \xb7 \xb8 \xb9 \xba \xbb \xbc + \xbd \xbe \xbf \xc0 \xc1 \xc2 \xc3 \xc4 \xc5 \xc6 \xc7 \xc8 \xc9 \xca \xcb + \xcc \xcd \xce \xcf \xd0 \xd1 \xd2 \xd3 \xd4 \xd5 \xd6 \xd7 \xd8 \xd9 \xda + \xdb \xdc \xdd \xde \xdf \xe0 \xe1 \xe2 \xe3 \xe4 \xe5 \xe6 \xe7 \xe8 \xe9 + \xea \xeb \xec \xed \xee \xef \xf0 \xf1 \xf2 \xf3 \xf4 \xf5 \xf6 \xf7 \xf8 + \xf9 \xfa \xfb \xfc \xfd \xfe \xff Subject length lower bound = 1 /[\n\x0b\x0c\x0d[:blank:]]/I @@ -1893,6 +1964,9 @@ Subject length lower bound = 1 Capturing subpattern count = 0 Compile options: <none> Overall options: anchored +Starting code units: \x00 \x01 \x02 \x03 \x04 \x05 \x06 \x07 \x08 \x09 \x0a + \x0b \x0c \x0d \x0e \x0f \x10 \x11 \x12 \x13 \x14 \x15 \x16 \x17 \x18 \x19 + \x1a \x1b \x1c \x1d \x1e \x1f \x7f Subject length lower bound = 1 /^[[:digit:]]/IB @@ -1906,6 +1980,7 @@ Subject length lower bound = 1 Capturing subpattern count = 0 Compile options: <none> Overall options: anchored +Starting code units: 0 1 2 3 4 5 6 7 8 9 Subject length lower bound = 1 /^[[:graph:]]/IB @@ -1919,6 +1994,9 @@ Subject length lower bound = 1 Capturing subpattern count = 0 Compile options: <none> Overall options: anchored +Starting code units: ! " # $ % & ' ( ) * + , - . / 0 1 2 3 4 5 6 7 8 9 : + ; < = > ? @ A B C D E F G H I J K L M N O P Q R S T U V W X Y Z [ \ ] ^ _ + ` a b c d e f g h i j k l m n o p q r s t u v w x y z { | } ~ Subject length lower bound = 1 /^[[:lower:]]/IB @@ -1932,6 +2010,7 @@ Subject length lower bound = 1 Capturing subpattern count = 0 Compile options: <none> Overall options: anchored +Starting code units: a b c d e f g h i j k l m n o p q r s t u v w x y z Subject length lower bound = 1 /^[[:print:]]/IB @@ -1945,6 +2024,9 @@ Subject length lower bound = 1 Capturing subpattern count = 0 Compile options: <none> Overall options: anchored +Starting code units: \x20 ! " # $ % & ' ( ) * + , - . / 0 1 2 3 4 5 6 7 8 + 9 : ; < = > ? @ A B C D E F G H I J K L M N O P Q R S T U V W X Y Z [ \ ] + ^ _ ` a b c d e f g h i j k l m n o p q r s t u v w x y z { | } ~ Subject length lower bound = 1 /^[[:punct:]]/IB @@ -1958,6 +2040,8 @@ Subject length lower bound = 1 Capturing subpattern count = 0 Compile options: <none> Overall options: anchored +Starting code units: ! " # $ % & ' ( ) * + , - . / : ; < = > ? @ [ \ ] ^ + _ ` { | } ~ Subject length lower bound = 1 /^[[:space:]]/IB @@ -1971,6 +2055,7 @@ Subject length lower bound = 1 Capturing subpattern count = 0 Compile options: <none> Overall options: anchored +Starting code units: \x09 \x0a \x0b \x0c \x0d \x20 Subject length lower bound = 1 /^[[:upper:]]/IB @@ -1984,6 +2069,7 @@ Subject length lower bound = 1 Capturing subpattern count = 0 Compile options: <none> Overall options: anchored +Starting code units: A B C D E F G H I J K L M N O P Q R S T U V W X Y Z Subject length lower bound = 1 /^[[:xdigit:]]/IB @@ -1997,6 +2083,7 @@ Subject length lower bound = 1 Capturing subpattern count = 0 Compile options: <none> Overall options: anchored +Starting code units: 0 1 2 3 4 5 6 7 8 9 A B C D E F a b c d e f Subject length lower bound = 1 /^[[:word:]]/IB @@ -2010,6 +2097,8 @@ Subject length lower bound = 1 Capturing subpattern count = 0 Compile options: <none> Overall options: anchored +Starting code units: 0 1 2 3 4 5 6 7 8 9 A B C D E F G H I J K L M N O P + Q R S T U V W X Y Z _ a b c d e f g h i j k l m n o p q r s t u v w x y z Subject length lower bound = 1 /^[[:^cntrl:]]/IB @@ -2023,6 +2112,18 @@ Subject length lower bound = 1 Capturing subpattern count = 0 Compile options: <none> Overall options: anchored +Starting code units: \x20 ! " # $ % & ' ( ) * + , - . / 0 1 2 3 4 5 6 7 8 + 9 : ; < = > ? @ A B C D E F G H I J K L M N O P Q R S T U V W X Y Z [ \ ] + ^ _ ` a b c d e f g h i j k l m n o p q r s t u v w x y z { | } ~ \x80 \x81 + \x82 \x83 \x84 \x85 \x86 \x87 \x88 \x89 \x8a \x8b \x8c \x8d \x8e \x8f \x90 + \x91 \x92 \x93 \x94 \x95 \x96 \x97 \x98 \x99 \x9a \x9b \x9c \x9d \x9e \x9f + \xa0 \xa1 \xa2 \xa3 \xa4 \xa5 \xa6 \xa7 \xa8 \xa9 \xaa \xab \xac \xad \xae + \xaf \xb0 \xb1 \xb2 \xb3 \xb4 \xb5 \xb6 \xb7 \xb8 \xb9 \xba \xbb \xbc \xbd + \xbe \xbf \xc0 \xc1 \xc2 \xc3 \xc4 \xc5 \xc6 \xc7 \xc8 \xc9 \xca \xcb \xcc + \xcd \xce \xcf \xd0 \xd1 \xd2 \xd3 \xd4 \xd5 \xd6 \xd7 \xd8 \xd9 \xda \xdb + \xdc \xdd \xde \xdf \xe0 \xe1 \xe2 \xe3 \xe4 \xe5 \xe6 \xe7 \xe8 \xe9 \xea + \xeb \xec \xed \xee \xef \xf0 \xf1 \xf2 \xf3 \xf4 \xf5 \xf6 \xf7 \xf8 \xf9 + \xfa \xfb \xfc \xfd \xfe \xff Subject length lower bound = 1 /^[12[:^digit:]]/IB @@ -2036,6 +2137,20 @@ Subject length lower bound = 1 Capturing subpattern count = 0 Compile options: <none> Overall options: anchored +Starting code units: \x00 \x01 \x02 \x03 \x04 \x05 \x06 \x07 \x08 \x09 \x0a + \x0b \x0c \x0d \x0e \x0f \x10 \x11 \x12 \x13 \x14 \x15 \x16 \x17 \x18 \x19 + \x1a \x1b \x1c \x1d \x1e \x1f \x20 ! " # $ % & ' ( ) * + , - . / 1 2 : ; < + = > ? @ A B C D E F G H I J K L M N O P Q R S T U V W X Y Z [ \ ] ^ _ ` a + b c d e f g h i j k l m n o p q r s t u v w x y z { | } ~ \x7f \x80 \x81 \x82 + \x83 \x84 \x85 \x86 \x87 \x88 \x89 \x8a \x8b \x8c \x8d \x8e \x8f \x90 \x91 + \x92 \x93 \x94 \x95 \x96 \x97 \x98 \x99 \x9a \x9b \x9c \x9d \x9e \x9f \xa0 + \xa1 \xa2 \xa3 \xa4 \xa5 \xa6 \xa7 \xa8 \xa9 \xaa \xab \xac \xad \xae \xaf + \xb0 \xb1 \xb2 \xb3 \xb4 \xb5 \xb6 \xb7 \xb8 \xb9 \xba \xbb \xbc \xbd \xbe + \xbf \xc0 \xc1 \xc2 \xc3 \xc4 \xc5 \xc6 \xc7 \xc8 \xc9 \xca \xcb \xcc \xcd + \xce \xcf \xd0 \xd1 \xd2 \xd3 \xd4 \xd5 \xd6 \xd7 \xd8 \xd9 \xda \xdb \xdc + \xdd \xde \xdf \xe0 \xe1 \xe2 \xe3 \xe4 \xe5 \xe6 \xe7 \xe8 \xe9 \xea \xeb + \xec \xed \xee \xef \xf0 \xf1 \xf2 \xf3 \xf4 \xf5 \xf6 \xf7 \xf8 \xf9 \xfa + \xfb \xfc \xfd \xfe \xff Subject length lower bound = 1 /^[[:^blank:]]/IB @@ -2049,6 +2164,20 @@ Subject length lower bound = 1 Capturing subpattern count = 0 Compile options: <none> Overall options: anchored +Starting code units: \x00 \x01 \x02 \x03 \x04 \x05 \x06 \x07 \x08 \x0a \x0b + \x0c \x0d \x0e \x0f \x10 \x11 \x12 \x13 \x14 \x15 \x16 \x17 \x18 \x19 \x1a + \x1b \x1c \x1d \x1e \x1f ! " # $ % & ' ( ) * + , - . / 0 1 2 3 4 5 6 7 8 9 + : ; < = > ? @ A B C D E F G H I J K L M N O P Q R S T U V W X Y Z [ \ ] ^ + _ ` a b c d e f g h i j k l m n o p q r s t u v w x y z { | } ~ \x7f \x80 + \x81 \x82 \x83 \x84 \x85 \x86 \x87 \x88 \x89 \x8a \x8b \x8c \x8d \x8e \x8f + \x90 \x91 \x92 \x93 \x94 \x95 \x96 \x97 \x98 \x99 \x9a \x9b \x9c \x9d \x9e + \x9f \xa0 \xa1 \xa2 \xa3 \xa4 \xa5 \xa6 \xa7 \xa8 \xa9 \xaa \xab \xac \xad + \xae \xaf \xb0 \xb1 \xb2 \xb3 \xb4 \xb5 \xb6 \xb7 \xb8 \xb9 \xba \xbb \xbc + \xbd \xbe \xbf \xc0 \xc1 \xc2 \xc3 \xc4 \xc5 \xc6 \xc7 \xc8 \xc9 \xca \xcb + \xcc \xcd \xce \xcf \xd0 \xd1 \xd2 \xd3 \xd4 \xd5 \xd6 \xd7 \xd8 \xd9 \xda + \xdb \xdc \xdd \xde \xdf \xe0 \xe1 \xe2 \xe3 \xe4 \xe5 \xe6 \xe7 \xe8 \xe9 + \xea \xeb \xec \xed \xee \xef \xf0 \xf1 \xf2 \xf3 \xf4 \xf5 \xf6 \xf7 \xf8 + \xf9 \xfa \xfb \xfc \xfd \xfe \xff Subject length lower bound = 1 /[01[:alpha:]%]/IB @@ -2115,7 +2244,7 @@ No match Failed: error 108 at offset 9: range out of order in character class /^(?(0)f|b)oo/I -Failed: error 135 at offset 6: invalid condition (?(0) +Failed: error 115 at offset 5: reference to non-existent subpattern # This one's here because of the large output vector needed @@ -2123,7 +2252,7 @@ Failed: error 135 at offset 6: invalid condition (?(0) Capturing subpattern count = 271 Max back reference = 270 Starting code units: 0 1 2 3 4 5 6 7 8 9 -Subject length lower bound = 272 +Subject length lower bound = 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 ABC ABC\=ovector=300 0: 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 ABC ABC 1: 1 @@ -2419,6 +2548,7 @@ Subject length lower bound = 4 Capturing subpattern count = 2 Compile options: <none> Overall options: anchored +First code unit = 'a' Subject length lower bound = 1 aba 0: aba @@ -2429,6 +2559,7 @@ Subject length lower bound = 1 Capturing subpattern count = 2 Compile options: <none> Overall options: anchored +First code unit = 'a' Subject length lower bound = 2 aabbaa 0: aabbaa @@ -2439,6 +2570,7 @@ Subject length lower bound = 2 Capturing subpattern count = 2 Compile options: <none> Overall options: anchored +First code unit = 'a' Subject length lower bound = 2 aabbaa 0: aabbaa @@ -2449,6 +2581,7 @@ Subject length lower bound = 2 Capturing subpattern count = 2 Compile options: <none> Overall options: anchored +First code unit = 'a' Subject length lower bound = 2 aabbaa 0: aabbaa @@ -2459,6 +2592,7 @@ Subject length lower bound = 2 Capturing subpattern count = 1 Compile options: <none> Overall options: anchored +First code unit = 'a' Subject length lower bound = 2 aabbaa 0: aabbaa @@ -2468,6 +2602,7 @@ Subject length lower bound = 2 Capturing subpattern count = 3 Compile options: <none> Overall options: anchored +First code unit = 'a' Subject length lower bound = 2 aabbaa 0: aabbaa @@ -2479,6 +2614,7 @@ Subject length lower bound = 2 Capturing subpattern count = 2 Compile options: <none> Overall options: anchored +First code unit = 'a' Subject length lower bound = 2 aabbaa 0: aabbaa @@ -2489,6 +2625,7 @@ Subject length lower bound = 2 Capturing subpattern count = 1 Compile options: <none> Overall options: anchored +First code unit = 'a' Subject length lower bound = 2 aabbaa 0: aabbaa @@ -2498,6 +2635,7 @@ Subject length lower bound = 2 Capturing subpattern count = 1 Compile options: <none> Overall options: anchored +First code unit = 'a' Subject length lower bound = 2 aabbbaa 0: aabbbaa @@ -2507,6 +2645,7 @@ Subject length lower bound = 2 Capturing subpattern count = 1 Compile options: <none> Overall options: anchored +First code unit = 'a' Subject length lower bound = 2 aabbbaa 0: aabbbaa @@ -2516,6 +2655,7 @@ Subject length lower bound = 2 Capturing subpattern count = 1 Compile options: <none> Overall options: anchored +First code unit = 'a' Subject length lower bound = 2 aabbaa 0: aabbaa @@ -2525,6 +2665,7 @@ Subject length lower bound = 2 Capturing subpattern count = 1 Compile options: <none> Overall options: anchored +First code unit = 'a' Subject length lower bound = 2 aabbbaa 0: aabbbaa @@ -2534,6 +2675,7 @@ Subject length lower bound = 2 Capturing subpattern count = 3 Compile options: <none> Overall options: anchored +First code unit = 'a' Subject length lower bound = 2 aabbbaa 0: aabbbaa @@ -2545,6 +2687,7 @@ Subject length lower bound = 2 Capturing subpattern count = 3 Compile options: <none> Overall options: anchored +First code unit = 'a' Subject length lower bound = 2 aabbbbaa 0: aabbbbaa @@ -3053,6 +3196,7 @@ Subject length lower bound = 3 Capturing subpattern count = 5 Compile options: <none> Overall options: anchored +First code unit = 'a' Subject length lower bound = 3 /^x(?U)a+b/IB @@ -3068,6 +3212,7 @@ Subject length lower bound = 3 Capturing subpattern count = 0 Compile options: <none> Overall options: anchored +First code unit = 'x' Last code unit = 'b' Subject length lower bound = 3 @@ -3086,6 +3231,7 @@ Subject length lower bound = 3 Capturing subpattern count = 1 Compile options: <none> Overall options: anchored +First code unit = 'x' Last code unit = 'b' Subject length lower bound = 3 @@ -3099,19 +3245,19 @@ Failed: error 113 at offset 0: POSIX collating elements are not supported Failed: error 112 at offset 0: POSIX named classes are supported only within a class /\l/I -Failed: error 137 at offset 1: PCRE does not support \L, \l, \N{name}, \U, or \u +Failed: error 137 at offset 2: PCRE does not support \L, \l, \N{name}, \U, or \u /\L/I -Failed: error 137 at offset 1: PCRE does not support \L, \l, \N{name}, \U, or \u +Failed: error 137 at offset 2: PCRE does not support \L, \l, \N{name}, \U, or \u /\N{name}/I -Failed: error 137 at offset 1: PCRE does not support \L, \l, \N{name}, \U, or \u +Failed: error 137 at offset 2: PCRE does not support \L, \l, \N{name}, \U, or \u /\u/I -Failed: error 137 at offset 1: PCRE does not support \L, \l, \N{name}, \U, or \u +Failed: error 137 at offset 2: PCRE does not support \L, \l, \N{name}, \U, or \u /\U/I -Failed: error 137 at offset 1: PCRE does not support \L, \l, \N{name}, \U, or \u +Failed: error 137 at offset 2: PCRE does not support \L, \l, \N{name}, \U, or \u /a{1,3}b/ungreedy ab @@ -3518,12 +3664,10 @@ Subject length lower bound = 6 1: abc 123abcdef\=callout_capture Callout 0: last capture = 1 - 0: <unset> 1: abc --->123abcdef ^ ^ d Callout 1: last capture = 1 - 0: <unset> 1: abc --->123abcdef ^ ^ f @@ -3545,25 +3689,25 @@ May match empty string Subject length lower bound = 0 abcabcabc --->abcabcabc - 0 ^ (abc(?C1))* - 1 ^ ^ ) - 1 ^ ^ ) - 1 ^ ^ ) + 0 ^ ( + 1 ^ ^ )* + 1 ^ ^ )* + 1 ^ ^ )* 0: abcabcabc 1: abc - abcabc\=callout_fail=1:3 + abcabc\=callout_fail=1:4 --->abcabc - 0 ^ (abc(?C1))* - 1 ^ ^ ) - 1 ^ ^ ) + 0 ^ ( + 1 ^ ^ )* + 1 ^ ^ )* 0: abcabc 1: abc - abcabcabc\=callout_fail=1:3 + abcabcabc\=callout_fail=1:4 --->abcabcabc - 0 ^ (abc(?C1))* - 1 ^ ^ ) - 1 ^ ^ ) - 1 ^ ^ ) + 0 ^ ( + 1 ^ ^ )* + 1 ^ ^ )* + 1 ^ ^ )* 0: abcabc 1: abc @@ -3573,38 +3717,32 @@ May match empty string Subject length lower bound = 0 123\=callout_capture Callout 0: last capture = 0 - 0: <unset> --->123 - ^ ^ ) + ^ ^ )* 0: 123 1: 123 123456\=callout_capture Callout 0: last capture = 0 - 0: <unset> --->123456 - ^ ^ ) + ^ ^ )* Callout 0: last capture = 1 - 0: <unset> 1: 123 --->123456 - ^ ^ ) + ^ ^ )* 0: 123456 1: 456 123456789\=callout_capture Callout 0: last capture = 0 - 0: <unset> --->123456789 - ^ ^ ) + ^ ^ )* Callout 0: last capture = 1 - 0: <unset> 1: 123 --->123456789 - ^ ^ ) + ^ ^ )* Callout 0: last capture = 1 - 0: <unset> 1: 456 --->123456789 - ^ ^ ) + ^ ^ )* 0: 123456789 1: 789 @@ -3614,13 +3752,11 @@ First code unit = 'x' Subject length lower bound = 4 xyzabc\=callout_capture Callout 0: last capture = 2 - 0: <unset> 1: <unset> 2: xyz --->xyzabc ^ ^ p Callout 1: last capture = 0 - 0: <unset> --->xyzabc ^ x 0: xyzabc @@ -3633,14 +3769,12 @@ Last code unit = 'x' Subject length lower bound = 5 Xxyzabc\=callout_capture Callout 0: last capture = 3 - 0: <unset> 1: X 2: <unset> 3: xyz --->Xxyzabc ^ ^ p Callout 1: last capture = 1 - 0: <unset> 1: X --->Xxyzabc ^^ x @@ -3655,7 +3789,6 @@ Last code unit = 'f' Subject length lower bound = 6 abcdef\=callout_capture Callout 0: last capture = 1 - 0: <unset> 1: abc --->abcdef ^ a @@ -3669,12 +3802,10 @@ Last code unit = 'z' Subject length lower bound = 6 abcxyz\=callout_capture Callout 1: last capture = 1 - 0: <unset> 1: abc --->abcxyz ^ ^ d Callout 2: last capture = 0 - 0: <unset> --->abcxyz ^ a 0: abcxyz @@ -3687,7 +3818,6 @@ Last code unit = 'z' Subject length lower bound = 3 abcxyz\=callout_capture Callout 0: last capture = 1 - 0: <unset> 1: abc --->abcxyz ^ ) @@ -3702,7 +3832,7 @@ Subject length lower bound = 2 \= Expect no match abbbbbccc\=callout_data=1 --->abbbbbccc - 1 ^ ^ + 1 ^ ^ End of pattern Callout data = 1 No match @@ -3714,21 +3844,21 @@ Subject length lower bound = 2 \= Expect no match abbbbbccc\=callout_data=1 --->abbbbbccc - 1 ^ ^ + 1 ^ ^ End of pattern Callout data = 1 - 1 ^ ^ + 1 ^ ^ End of pattern Callout data = 1 - 1 ^ ^ + 1 ^ ^ End of pattern Callout data = 1 - 1 ^ ^ + 1 ^ ^ End of pattern Callout data = 1 - 1 ^ ^ + 1 ^ ^ End of pattern Callout data = 1 - 1 ^ ^ + 1 ^ ^ End of pattern Callout data = 1 - 1 ^ ^ + 1 ^ ^ End of pattern Callout data = 1 - 1 ^ ^ + 1 ^ ^ End of pattern Callout data = 1 No match @@ -3742,6 +3872,7 @@ Subject length lower bound = 3 Capturing subpattern count = 0 Compile options: <none> Overall options: anchored +First code unit = 'a' Subject length lower bound = 3 /(?C)a|b/I @@ -3749,6 +3880,16 @@ Capturing subpattern count = 0 Starting code units: a b Subject length lower bound = 1 +/a|(b)(?C)/I +Capturing subpattern count = 1 +Starting code units: a b +Subject length lower bound = 1 + b +--->b + 0 ^^ End of pattern + 0: b + 1: b + /x(ab|(bc|(de|(?R))))/I Capturing subpattern count = 3 First code unit = 'x' @@ -3802,6 +3943,7 @@ No match Capturing subpattern count = 1 Compile options: <none> Overall options: anchored +First code unit = '>' Last code unit = '<' Subject length lower bound = 10 >abc>123<xyz< @@ -3835,7 +3977,7 @@ Subject length lower bound = 2 Bra CBra 1 a - Once + SBra Recurse KetRmax b @@ -3852,6 +3994,7 @@ Subject length lower bound = 2 Capturing subpattern count = 2 Compile options: <none> Overall options: anchored +Starting code units: ( - 0 1 2 3 4 5 6 7 8 9 Subject length lower bound = 1 12 0: 12 @@ -3871,6 +4014,7 @@ No match Capturing subpattern count = 2 Compile options: <none> Overall options: anchored +First code unit = 'x' Subject length lower bound = 3 xyz 0: xyz @@ -3930,6 +4074,7 @@ Failed: error 114 at offset 10: missing closing parenthesis Capturing subpattern count = 1 Compile options: <none> Overall options: anchored +First code unit = 'a' Subject length lower bound = 9 abcdefabc 0: abcdefabc @@ -3939,7 +4084,8 @@ Subject length lower bound = 9 Capturing subpattern count = 1 Compile options: <none> Overall options: anchored -Subject length lower bound = 3 +Starting code units: a b c +Subject length lower bound = 2 a=a 0: a=a 1: a @@ -3954,6 +4100,7 @@ Subject length lower bound = 3 Capturing subpattern count = 2 Compile options: <none> Overall options: anchored +Starting code units: a b c Subject length lower bound = 2 a=a 0: a=a @@ -4263,7 +4410,7 @@ Subject length lower bound = 2 Callout data = 1 0: ab 1: ab -\= Expect no match +\= Expect no match aaabbb\=callout_data=-1 --->aaabbb 1 ^ ^ b @@ -4355,10 +4502,10 @@ Subject length lower bound = 4 C aa (2) A (group 2) /(?P<x>eks)(?P<x>eccs)/I -Failed: error 143 at offset 15: two named subpatterns have the same name (PCRE2_DUPNAMES not set) +Failed: error 143 at offset 16: two named subpatterns have the same name (PCRE2_DUPNAMES not set) /(?P<abc>abc(?P<def>def)(?P<abc>xyz))/I -Failed: error 143 at offset 30: two named subpatterns have the same name (PCRE2_DUPNAMES not set) +Failed: error 143 at offset 31: two named subpatterns have the same name (PCRE2_DUPNAMES not set) "\[((?P<elem>\d+)(,(?P>elem))*)\]"I Capturing subpattern count = 3 @@ -4581,7 +4728,7 @@ Subject length lower bound = 5 +2 ^ ^ c +3 ^ ^ d +4 ^ ^ e - +5 ^ ^ + +5 ^ ^ End of pattern 0: abcde \= Expect no match abcdfe @@ -4613,13 +4760,13 @@ Subject length lower bound = 1 --->ab +0 ^ a* +2 ^^ b - +3 ^ ^ + +3 ^ ^ End of pattern 0: ab aaaab --->aaaab +0 ^ a* +2 ^ ^ b - +3 ^ ^ + +3 ^ ^ End of pattern 0: aaaab aaaacb --->aaaacb @@ -4633,7 +4780,7 @@ Subject length lower bound = 1 +2 ^^ b +0 ^ a* +2 ^ b - +3 ^^ + +3 ^^ End of pattern 0: b /a*b/IB,auto_callout @@ -4656,13 +4803,13 @@ Subject length lower bound = 1 --->ab +0 ^ a* +2 ^^ b - +3 ^ ^ + +3 ^ ^ End of pattern 0: ab aaaab --->aaaab +0 ^ a* +2 ^ ^ b - +3 ^ ^ + +3 ^ ^ End of pattern 0: aaaab aaaacb --->aaaacb @@ -4676,7 +4823,7 @@ Subject length lower bound = 1 +2 ^^ b +0 ^ a* +2 ^ b - +3 ^^ + +3 ^^ End of pattern 0: b /a+b/IB,auto_callout @@ -4699,15 +4846,15 @@ Subject length lower bound = 2 --->ab +0 ^ a+ +2 ^^ b - +3 ^ ^ + +3 ^ ^ End of pattern 0: ab aaaab --->aaaab +0 ^ a+ +2 ^ ^ b - +3 ^ ^ + +3 ^ ^ End of pattern 0: aaaab -\= Expect no match +\= Expect no match aaaacb --->aaaacb +0 ^ a+ @@ -4723,7 +4870,7 @@ No match /(abc|def)x/IB,auto_callout ------------------------------------------------------------------ Bra - Callout 255 0 9 + Callout 255 0 1 CBra 1 Callout 255 1 1 a @@ -4731,7 +4878,7 @@ No match b Callout 255 3 1 c - Callout 255 4 0 + Callout 255 4 1 Alt Callout 255 5 1 d @@ -4739,7 +4886,7 @@ No match e Callout 255 7 1 f - Callout 255 8 0 + Callout 255 8 1 Ket Callout 255 9 1 x @@ -4754,38 +4901,38 @@ Last code unit = 'x' Subject length lower bound = 4 abcx --->abcx - +0 ^ (abc|def) + +0 ^ ( +1 ^ a +2 ^^ b +3 ^ ^ c +4 ^ ^ | +9 ^ ^ x -+10 ^ ^ ++10 ^ ^ End of pattern 0: abcx 1: abc defx --->defx - +0 ^ (abc|def) + +0 ^ ( +1 ^ a +5 ^ d +6 ^^ e +7 ^ ^ f +8 ^ ^ ) +9 ^ ^ x -+10 ^ ^ ++10 ^ ^ End of pattern 0: defx 1: def -\= Expect no match +\= Expect no match abcdefzx --->abcdefzx - +0 ^ (abc|def) + +0 ^ ( +1 ^ a +2 ^^ b +3 ^ ^ c +4 ^ ^ | +9 ^ ^ x +5 ^ d - +0 ^ (abc|def) + +0 ^ ( +1 ^ a +5 ^ d +6 ^^ e @@ -4797,7 +4944,7 @@ No match /(abc|def)x/IB,auto_callout ------------------------------------------------------------------ Bra - Callout 255 0 9 + Callout 255 0 1 CBra 1 Callout 255 1 1 a @@ -4805,7 +4952,7 @@ No match b Callout 255 3 1 c - Callout 255 4 0 + Callout 255 4 1 Alt Callout 255 5 1 d @@ -4813,7 +4960,7 @@ No match e Callout 255 7 1 f - Callout 255 8 0 + Callout 255 8 1 Ket Callout 255 9 1 x @@ -4828,38 +4975,38 @@ Last code unit = 'x' Subject length lower bound = 4 abcx --->abcx - +0 ^ (abc|def) + +0 ^ ( +1 ^ a +2 ^^ b +3 ^ ^ c +4 ^ ^ | +9 ^ ^ x -+10 ^ ^ ++10 ^ ^ End of pattern 0: abcx 1: abc defx --->defx - +0 ^ (abc|def) + +0 ^ ( +1 ^ a +5 ^ d +6 ^^ e +7 ^ ^ f +8 ^ ^ ) +9 ^ ^ x -+10 ^ ^ ++10 ^ ^ End of pattern 0: defx 1: def -\= Expect no match +\= Expect no match abcdefzx --->abcdefzx - +0 ^ (abc|def) + +0 ^ ( +1 ^ a +2 ^^ b +3 ^ ^ c +4 ^ ^ | +9 ^ ^ x +5 ^ d - +0 ^ (abc|def) + +0 ^ ( +1 ^ a +5 ^ d +6 ^^ e @@ -4875,7 +5022,7 @@ Starting code units: a c Subject length lower bound = 6 ababab --->ababab - +0 ^ (ab|cd){3,4} + +0 ^ ( +1 ^ a +2 ^^ b +3 ^ ^ | @@ -4887,55 +5034,55 @@ Subject length lower bound = 6 +3 ^ ^ | +1 ^ ^ a +4 ^ ^ c -+12 ^ ^ ++12 ^ ^ End of pattern 0: ababab 1: ab abcdabcd --->abcdabcd - +0 ^ (ab|cd){3,4} + +0 ^ ( +1 ^ a +2 ^^ b +3 ^ ^ | +1 ^ ^ a +4 ^ ^ c +5 ^ ^ d - +6 ^ ^ ) + +6 ^ ^ ){3,4} +1 ^ ^ a +2 ^ ^ b +3 ^ ^ | +1 ^ ^ a +4 ^ ^ c +5 ^ ^ d - +6 ^ ^ ) -+12 ^ ^ + +6 ^ ^ ){3,4} ++12 ^ ^ End of pattern 0: abcdabcd 1: cd abcdcdcdcdcd --->abcdcdcdcdcd - +0 ^ (ab|cd){3,4} + +0 ^ ( +1 ^ a +2 ^^ b +3 ^ ^ | +1 ^ ^ a +4 ^ ^ c +5 ^ ^ d - +6 ^ ^ ) + +6 ^ ^ ){3,4} +1 ^ ^ a +4 ^ ^ c +5 ^ ^ d - +6 ^ ^ ) + +6 ^ ^ ){3,4} +1 ^ ^ a +4 ^ ^ c +5 ^ ^ d - +6 ^ ^ ) -+12 ^ ^ + +6 ^ ^ ){3,4} ++12 ^ ^ End of pattern 0: abcdcdcd 1: cd /([ab]{,4}c|xy)/IB,auto_callout ------------------------------------------------------------------ Bra - Callout 255 0 14 + Callout 255 0 1 CBra 1 Callout 255 1 4 [ab] @@ -4949,13 +5096,13 @@ Subject length lower bound = 6 } Callout 255 9 1 c - Callout 255 10 0 + Callout 255 10 1 Alt Callout 255 11 1 x Callout 255 12 1 y - Callout 255 13 0 + Callout 255 13 1 Ket Callout 255 14 0 Ket @@ -4965,18 +5112,18 @@ Capturing subpattern count = 1 Options: auto_callout Starting code units: a b x Subject length lower bound = 2 -\= Expect no match +\= Expect no match Note: that { does NOT introduce a quantifier --->Note: that { does NOT introduce a quantifier - +0 ^ ([ab]{,4}c|xy) + +0 ^ ( +1 ^ [ab] +5 ^^ { +11 ^ x - +0 ^ ([ab]{,4}c|xy) + +0 ^ ( +1 ^ [ab] +5 ^^ { +11 ^ x - +0 ^ ([ab]{,4}c|xy) + +0 ^ ( +1 ^ [ab] +5 ^^ { +11 ^ x @@ -4985,7 +5132,7 @@ No match /([ab]{,4}c|xy)/IB,auto_callout ------------------------------------------------------------------ Bra - Callout 255 0 14 + Callout 255 0 1 CBra 1 Callout 255 1 4 [ab] @@ -4999,13 +5146,13 @@ No match } Callout 255 9 1 c - Callout 255 10 0 + Callout 255 10 1 Alt Callout 255 11 1 x Callout 255 12 1 y - Callout 255 13 0 + Callout 255 13 1 Ket Callout 255 14 0 Ket @@ -5015,18 +5162,18 @@ Capturing subpattern count = 1 Options: auto_callout Starting code units: a b x Subject length lower bound = 2 -\= Expect no match +\= Expect no match Note: that { does NOT introduce a quantifier --->Note: that { does NOT introduce a quantifier - +0 ^ ([ab]{,4}c|xy) + +0 ^ ( +1 ^ [ab] +5 ^^ { +11 ^ x - +0 ^ ([ab]{,4}c|xy) + +0 ^ ( +1 ^ [ab] +5 ^^ { +11 ^ x - +0 ^ ([ab]{,4}c|xy) + +0 ^ ( +1 ^ [ab] +5 ^^ { +11 ^ x @@ -5035,58 +5182,58 @@ No match /([ab]{1,4}c|xy){4,5}?123/IB,auto_callout ------------------------------------------------------------------ Bra - Callout 255 0 21 + Callout 255 0 1 CBra 1 Callout 255 1 9 [ab]{1,4}+ Callout 255 10 1 c - Callout 255 11 0 + Callout 255 11 1 Alt Callout 255 12 1 x Callout 255 13 1 y - Callout 255 14 0 + Callout 255 14 7 Ket CBra 1 Callout 255 1 9 [ab]{1,4}+ Callout 255 10 1 c - Callout 255 11 0 + Callout 255 11 1 Alt Callout 255 12 1 x Callout 255 13 1 y - Callout 255 14 0 + Callout 255 14 7 Ket CBra 1 Callout 255 1 9 [ab]{1,4}+ Callout 255 10 1 c - Callout 255 11 0 + Callout 255 11 1 Alt Callout 255 12 1 x Callout 255 13 1 y - Callout 255 14 0 + Callout 255 14 7 Ket CBra 1 Callout 255 1 9 [ab]{1,4}+ Callout 255 10 1 c - Callout 255 11 0 + Callout 255 11 1 Alt Callout 255 12 1 x Callout 255 13 1 y - Callout 255 14 0 + Callout 255 14 7 Ket Braminzero CBra 1 @@ -5094,13 +5241,13 @@ No match [ab]{1,4}+ Callout 255 10 1 c - Callout 255 11 0 + Callout 255 11 1 Alt Callout 255 12 1 x Callout 255 13 1 y - Callout 255 14 0 + Callout 255 14 7 Ket Callout 255 21 1 1 @@ -5119,7 +5266,7 @@ Last code unit = '3' Subject length lower bound = 11 aacaacaacaacaac123 --->aacaacaacaacaac123 - +0 ^ ([ab]{1,4}c|xy){4,5}? + +0 ^ ( +1 ^ [ab]{1,4} +10 ^ ^ c +11 ^ ^ | @@ -5139,7 +5286,7 @@ Subject length lower bound = 11 +21 ^ ^ 1 +22 ^ ^ 2 +23 ^ ^ 3 -+24 ^ ^ ++24 ^ ^ End of pattern 0: aacaacaacaacaac123 1: aac @@ -5190,6 +5337,7 @@ No match Capturing subpattern count = 3 Compile options: <none> Overall options: anchored +Starting code units: 0 1 2 3 4 5 6 7 8 9 Last code unit = '/' Subject length lower bound = 6 13/05/04\=ps @@ -5287,6 +5435,7 @@ Partial match: c12 Capturing subpattern count = 0 Compile options: <none> Overall options: anchored +Starting code units: 0 1 2 3 4 5 6 7 8 9 Last code unit = 'X' Subject length lower bound = 4 1\=ps @@ -5651,7 +5800,7 @@ First code unit = \xff Subject length lower bound = 1 /^((?P<A>a1)|(?P<A>a2)b)/I -Failed: error 143 at offset 17: two named subpatterns have the same name (PCRE2_DUPNAMES not set) +Failed: error 143 at offset 18: two named subpatterns have the same name (PCRE2_DUPNAMES not set) /^((?P<A>a1)|(?P<A>a2)b)/I,dupnames Capturing subpattern count = 3 @@ -5660,6 +5809,7 @@ Named capturing subpatterns: A 3 Compile options: dupnames Overall options: anchored dupnames +First code unit = 'a' Subject length lower bound = 2 a1b\=copy=A 0: a1 @@ -5679,7 +5829,7 @@ Subject length lower bound = 2 Number not found for group 'Z' Copy substring 'Z' failed (-49): unknown substring C a1 (2) A (non-unique) - + /(?|(?<a>)(?<b>)(?<a>)|(?<a>)(?<b>)(?<a>))/I,dupnames Capturing subpattern count = 3 Named capturing subpatterns: @@ -5697,6 +5847,7 @@ Named capturing subpatterns: A 2 Compile options: dupnames Overall options: anchored dupnames +First code unit = 'a' Subject length lower bound = 2 ab\=copy=A 0: ab @@ -5710,6 +5861,7 @@ Named capturing subpatterns: A 1 A 2 Options: dupnames +Starting code units: a c Subject length lower bound = 2 ab\=copy=A 0: ab @@ -5728,6 +5880,7 @@ Named capturing subpatterns: A 3 A 4 Options: dupnames +Starting code units: a c Subject length lower bound = 2 cdefgh\=copy=A 0: cdefgh @@ -5744,6 +5897,7 @@ Named capturing subpatterns: A 3 Compile options: dupnames Overall options: anchored dupnames +First code unit = 'a' Subject length lower bound = 2 a1b\=get=A 0: a1 @@ -5771,6 +5925,7 @@ Named capturing subpatterns: A 2 Compile options: dupnames Overall options: anchored dupnames +First code unit = 'a' Subject length lower bound = 2 ab\=get=A 0: ab @@ -5784,6 +5939,7 @@ Named capturing subpatterns: A 1 A 2 Options: dupnames +Starting code units: a c Subject length lower bound = 2 ab\=get=A 0: ab @@ -5802,6 +5958,7 @@ Named capturing subpatterns: A 3 A 4 Options: dupnames +Starting code units: a c Subject length lower bound = 2 cdefgh\=get=A 0: cdefgh @@ -5819,6 +5976,7 @@ Named capturing subpatterns: Compile options: <none> Overall options: anchored Duplicate name status changes +First code unit = 'a' Subject length lower bound = 2 a1b\=copy=A 0: a1 @@ -5833,7 +5991,7 @@ Subject length lower bound = 2 C a2 (2) A (non-unique) /^(?P<A>a) (?J:(?P<B>b)(?P<B>c)) (?P<A>d)/I -Failed: error 143 at offset 37: two named subpatterns have the same name (PCRE2_DUPNAMES not set) +Failed: error 143 at offset 38: two named subpatterns have the same name (PCRE2_DUPNAMES not set) # In this next test, J is not set at the outer level; consequently it isn't set # in the pattern's options; consequently pcre2_substring_get_byname() produces @@ -5849,6 +6007,7 @@ Named capturing subpatterns: Compile options: <none> Overall options: anchored Duplicate name status changes +First code unit = 'a' Subject length lower bound = 6 a bc d\=copy=A,copy=B,copy=C 0: a bc d @@ -5889,10 +6048,10 @@ Subject length lower bound = 2 1: X /(?:(?(2y)a|b)(X))+/I -Failed: error 126 at offset 7: malformed number or name after (?( +Failed: error 124 at offset 7: missing closing parenthesis for condition /(?:(?(ZA)a|b)(?P<ZZ>X))+/I -Failed: error 115 at offset 9: reference to non-existent subpattern +Failed: error 115 at offset 6: reference to non-existent subpattern /(?:(?(ZZ)a|b)(?(ZZ)a|b)(?P<ZZ>X))+/I Capturing subpattern count = 1 @@ -5998,7 +6157,7 @@ Subject length lower bound = 3 No match xyz\rabclf No match - + /^abc/Im,newline=cr Capturing subpattern count = 0 Options: multiline @@ -6250,6 +6409,7 @@ Subject length lower bound = 4 Capturing subpattern count = 0 Compile options: <none> Overall options: anchored +Starting code units: a b Last code unit = 'b' Subject length lower bound = 2 @@ -6266,6 +6426,7 @@ Subject length lower bound = 2 Capturing subpattern count = 0 Compile options: <none> Overall options: anchored +Starting code units: a b Last code unit = 'b' Subject length lower bound = 2 @@ -6282,6 +6443,7 @@ Subject length lower bound = 2 Capturing subpattern count = 0 Compile options: <none> Overall options: anchored +Starting code units: a b Last code unit = 'b' Subject length lower bound = 2 @@ -6298,6 +6460,7 @@ Subject length lower bound = 2 Capturing subpattern count = 0 Compile options: <none> Overall options: anchored +First code unit = 'a' Last code unit = 'A' Subject length lower bound = 3 aaaA5 @@ -6319,6 +6482,7 @@ No match Capturing subpattern count = 0 Compile options: caseless Overall options: anchored caseless +Starting code units: A a Last code unit = 'A' (caseless) Subject length lower bound = 2 aaaA5 @@ -7023,7 +7187,7 @@ No match No match /^(?P>abc)(?<abcd>xxx)/ -Failed: error 115 at offset 8: reference to non-existent subpattern +Failed: error 115 at offset 5: reference to non-existent subpattern /^(?P>abc)(?<abc>x|y)/ xx @@ -7283,13 +7447,13 @@ No match No match /(?(<bc))/ -Failed: error 126 at offset 6: malformed number or name after (?( +Failed: error 142 at offset 6: syntax error in subpattern name (missing terminator) /(?(''))/ -Failed: error 128 at offset 4: assertion expected after (?( or (?(?C) +Failed: error 162 at offset 4: subpattern name expected /(?('R')stuff)/ -Failed: error 115 at offset 7: reference to non-existent subpattern +Failed: error 115 at offset 4: reference to non-existent subpattern /((abc (?(R) (?(R1)1) (?(R2)2) X | (?1) (?2) (?R) ))) /x abcabc1Xabc2XabcXabcabc @@ -7304,10 +7468,10 @@ Failed: error 115 at offset 7: reference to non-existent subpattern 2: abcabc1Xabc2XabcX /(?<A> (?'B' abc (?(R) (?(R&C)1) (?(R&B)2) X | (?1) (?2) (?R) ))) /x -Failed: error 115 at offset 29: reference to non-existent subpattern +Failed: error 115 at offset 27: reference to non-existent subpattern /^(?(DEFINE) abc | xyz ) /x -Failed: error 154 at offset 22: DEFINE group contains more than one branch +Failed: error 154 at offset 4: DEFINE group contains more than one branch /(?(DEFINE) abc) xyz/Ix Capturing subpattern count = 0 @@ -7513,19 +7677,19 @@ No match No match /^(a)\g-2/ -Failed: error 115 at offset 7: reference to non-existent subpattern +Failed: error 115 at offset 8: reference to non-existent subpattern /^(a)\g/ -Failed: error 158 at offset 5: a numbered reference must not be zero +Failed: error 157 at offset 6: \g is not followed by a braced, angle-bracketed, or quoted name/number or by a plain number /^(a)\g{0}/ -Failed: error 158 at offset 8: a numbered reference must not be zero +Failed: error 115 at offset 9: reference to non-existent subpattern /^(a)\g{3/ -Failed: error 157 at offset 8: \g is not followed by a braced, angle-bracketed, or quoted name/number or by a plain number +Failed: error 157 at offset 6: \g is not followed by a braced, angle-bracketed, or quoted name/number or by a plain number /^(a)\g{aa}/ -Failed: error 115 at offset 9: reference to non-existent subpattern +Failed: error 115 at offset 7: reference to non-existent subpattern /^a.b/newline=lf a\rb @@ -7568,13 +7732,13 @@ No match 0: \x0d\x0afoo \nfoo 0: \x0afoo - + /^$/gm,newline=any abc\r\rxyz 0: - abc\n\rxyz + abc\n\rxyz 0: -\= Expect no match +\= Expect no match abc\r\nxyz No match @@ -7589,7 +7753,7 @@ No match 0+ \x0d\x0a 0: \x0d\x0a 0+ - + /(?m)$/g,newline=any,aftertext abc\r\n\r\n 0: @@ -7609,7 +7773,7 @@ No match /^X/m XABC 0: X -\= Expect no match +\= Expect no match XABC\=notbol No match @@ -7644,17 +7808,17 @@ No match 0: xyabcabc 1: abc \= Expect no match - xyabc + xyabc No match - + /x(?-0)y/ -Failed: error 158 at offset 5: a numbered reference must not be zero +Failed: error 126 at offset 5: a relative value of zero is not allowed /x(?-1)y/ Failed: error 115 at offset 5: reference to non-existent subpattern /x(?+0)y/ -Failed: error 158 at offset 5: a numbered reference must not be zero +Failed: error 126 at offset 5: a relative value of zero is not allowed /x(?+1)y/ Failed: error 115 at offset 5: reference to non-existent subpattern @@ -7682,9 +7846,9 @@ Failed: error 115 at offset 5: reference to non-existent subpattern Y 0: Y \= Expect no match - abcY + abcY No match - + /^((?(+1)X|Y)(abc))+/B ------------------------------------------------------------------ Bra @@ -7712,11 +7876,11 @@ No match 1: Xabc 2: abc \= Expect no match - XabcXabc + XabcXabc No match /(?(-1)a)/B -Failed: error 115 at offset 6: reference to non-existent subpattern +Failed: error 115 at offset 5: reference to non-existent subpattern /((?(-1)a))/B ------------------------------------------------------------------ @@ -7732,7 +7896,7 @@ Failed: error 115 at offset 6: reference to non-existent subpattern ------------------------------------------------------------------ /((?(-2)a))/B -Failed: error 115 at offset 7: reference to non-existent subpattern +Failed: error 115 at offset 6: reference to non-existent subpattern /^(?(+1)X|Y)(.)/B ------------------------------------------------------------------ @@ -7758,11 +7922,11 @@ Failed: error 115 at offset 7: reference to non-existent subpattern tom-tom 0: tom-tom 1: tom - bon-bon + bon-bon 0: bon-bon 1: bon \= Expect no match - tom-bon + tom-bon No match /\g{A/ @@ -7786,7 +7950,7 @@ Failed: error 142 at offset 4: syntax error in subpattern name (missing terminat >abc< 0: abc 1: abc - >xyz< + >xyz< 0: xyz 1: xyz @@ -7816,7 +7980,7 @@ Failed: error 142 at offset 4: syntax error in subpattern name (missing terminat 1: x 2: abc 3: x - xxyzx + xxyzx 0: xxyzx 1: x 2: xyz @@ -7852,7 +8016,7 @@ Failed: error 142 at offset 4: syntax error in subpattern name (missing terminat 2: abc 3: pqr 4: x - xxyzx + xxyzx 0: xxyzx 1: x 2: xyz @@ -7870,7 +8034,7 @@ Failed: error 142 at offset 4: syntax error in subpattern name (missing terminat \= Expect no match XXXX No match - + /\H+\hY/B ------------------------------------------------------------------ Bra @@ -7880,7 +8044,7 @@ No match Ket End ------------------------------------------------------------------ - XXXX Y + XXXX Y 0: XXXX Y /\H+ Y/B @@ -8107,7 +8271,7 @@ Failed: error 106 at offset 10: missing terminating ] for character class +3 ^ ^ (*FAIL) +3 ^^ (*FAIL) No match - + /a+b?c+(*FAIL)/auto_callout \= Expect no match aaabccc @@ -8171,7 +8335,7 @@ No match +15 ^ ^ (*FAIL) +15 ^ ^ (*FAIL) No match - + /a+b?(*SKIP)c+(*FAIL)/auto_callout \= Expect no match aaabcccaaabccc @@ -8218,15 +8382,15 @@ No match +13 ^ ^ (*FAIL) +13 ^ ^ (*FAIL) No match - + /a(*MARK)b/ Failed: error 166 at offset 7: (*MARK) must have an argument /(?i:A{1,}\6666666666)/ -Failed: error 161 at offset 19: number is too big +Failed: error 161 at offset 19: group number is too big /\g6666666666/ -Failed: error 161 at offset 11: number is too big +Failed: error 161 at offset 7: group number is too big /[\g6666666666]/B ------------------------------------------------------------------ @@ -8243,17 +8407,17 @@ Failed: error 115 at offset 3: reference to non-existent subpattern \= Expect no match \r\nA No match - + /\nA/newline=crlf - \r\nA + \r\nA 0: \x0aA /[\r\n]A/newline=crlf - \r\nA + \r\nA 0: \x0aA /(\r|\n)A/newline=crlf - \r\nA + \r\nA 0: \x0aA 1: \x0a @@ -8264,52 +8428,52 @@ Failed: error 160 at offset 5: (*VERB) not recognized or malformed a\nb 0: a\x0ab \= Expect no match - a\rb + a\rb No match /(*CR)a.b/newline=lf a\nb 0: a\x0ab \= Expect no match - a\rb + a\rb No match /(*LF)a.b/newline=CRLF a\rb 0: a\x0db \= Expect no match - a\nb + a\nb No match /(*CRLF)a.b/ a\rb 0: a\x0db - a\nb + a\nb 0: a\x0ab \= Expect no match - a\r\nb + a\r\nb No match /(*ANYCRLF)a.b/newline=CR \= Expect no match a\rb No match - a\nb + a\nb No match - a\r\nb + a\r\nb No match /(*ANY)a.b/newline=cr \= Expect no match a\rb No match - a\nb + a\nb No match - a\r\nb + a\r\nb No match - a\x85b + a\x85b No match - + /(*ANY).*/g abc\r\ndef 0: abc @@ -8331,6 +8495,31 @@ No match 0: def 0: +/(*NUL)^.*/ + a\nb\x00ccc + 0: a\x0ab + +/(*NUL)^.*/s + a\nb\x00ccc + 0: a\x0ab\x00ccc + +/^x/m,newline=NUL + ab\x00xy + 0: x + +/'#comment' 0d 0a 00 '^x\' 0a 'y'/x,newline=nul,hex + x\nyz + 0: x\x0ay + +/(*NUL)^X\NY/ + X\nY + 0: X\x0aY + X\rY + 0: X\x0dY +\= Expect no match + X\x00Y +No match + /a\Rb/I,bsr=anycrlf Capturing subpattern count = 0 \R matches CR, LF, or CRLF @@ -8346,7 +8535,7 @@ Subject length lower bound = 3 \= Expect no match a\x85b No match - a\x0bb + a\x0bb No match /a\Rb/I,bsr=unicode @@ -8363,9 +8552,9 @@ Subject length lower bound = 3 0: a\x0d\x0ab a\x85b 0: a\x85b - a\x0bb + a\x0bb 0: a\x0bb - + /a\R?b/I,bsr=anycrlf Capturing subpattern count = 0 \R matches CR, LF, or CRLF @@ -8381,7 +8570,7 @@ Subject length lower bound = 2 \= Expect no match a\x85b No match - a\x0bb + a\x0bb No match /a\R?b/I,bsr=unicode @@ -8398,9 +8587,9 @@ Subject length lower bound = 2 0: a\x0d\x0ab a\x85b 0: a\x85b - a\x0bb + a\x0bb 0: a\x0bb - + /a\R{2,4}b/I,bsr=anycrlf Capturing subpattern count = 0 \R matches CR, LF, or CRLF @@ -8416,7 +8605,7 @@ Subject length lower bound = 4 \= Expect no match a\x85\x85b No match - a\x0b\x0bb + a\x0b\x0bb No match /a\R{2,4}b/I,bsr=unicode @@ -8433,12 +8622,12 @@ Subject length lower bound = 4 0: a\x0d\x0a\x0a\x0d\x0db a\x85\x85b 0: a\x85\x85b - a\x0b\x0bb + a\x0b\x0bb 0: a\x0b\x0bb -\= Expect no match - a\r\r\r\r\rb +\= Expect no match + a\r\r\r\r\rb No match - + /(*BSR_ANYCRLF)a\Rb/I Capturing subpattern count = 0 \R matches CR, LF, or CRLF @@ -8447,7 +8636,7 @@ Last code unit = 'b' Subject length lower bound = 3 a\nb 0: a\x0ab - a\rb + a\rb 0: a\x0db /(*BSR_UNICODE)a\Rb/I @@ -8468,7 +8657,7 @@ Last code unit = 'b' Subject length lower bound = 3 a\nb 0: a\x0ab - a\rb + a\rb 0: a\x0db /(*CRLF)(*BSR_UNICODE)a\Rb/I @@ -8493,25 +8682,25 @@ Subject length lower bound = 2 Failed: error 162 at offset 9: subpattern name expected /(?<abc>)(?&a)/ -Failed: error 115 at offset 12: reference to non-existent subpattern +Failed: error 115 at offset 11: reference to non-existent subpattern /(?<a>)(?&aaaaaaaaaaaaaaaaaaaaaaa)/ -Failed: error 115 at offset 32: reference to non-existent subpattern +Failed: error 115 at offset 9: reference to non-existent subpattern /(?+-a)/ -Failed: error 163 at offset 3: digit expected after (?+ +Failed: error 129 at offset 2: digit expected after (?+ or (?- /(?-+a)/ Failed: error 111 at offset 3: unrecognized character after (? or (?- /(?(-1))/ -Failed: error 115 at offset 6: reference to non-existent subpattern +Failed: error 115 at offset 5: reference to non-existent subpattern /(?(+10))/ -Failed: error 115 at offset 7: reference to non-existent subpattern +Failed: error 115 at offset 4: reference to non-existent subpattern /(?(10))/ -Failed: error 115 at offset 6: reference to non-existent subpattern +Failed: error 115 at offset 3: reference to non-existent subpattern /(?(+2))()()/ @@ -8527,10 +8716,10 @@ Failed: error 162 at offset 3: subpattern name expected Failed: error 162 at offset 3: subpattern name expected /\k/ -Failed: error 169 at offset 1: \k is not followed by a braced, angle-bracketed, or quoted name +Failed: error 169 at offset 2: \k is not followed by a braced, angle-bracketed, or quoted name /\kabc/ -Failed: error 169 at offset 1: \k is not followed by a braced, angle-bracketed, or quoted name +Failed: error 169 at offset 2: \k is not followed by a braced, angle-bracketed, or quoted name /(?P=)/ Failed: error 162 at offset 4: subpattern name expected @@ -8571,10 +8760,10 @@ Failed: error 157 at offset 8: \g is not followed by a braced, angle-bracketed, /^(?+1)(?<a>x|y){0}z/ xzxx 0: xz - yzyy + yzyy 0: yz \= Expect no match - xxz + xxz No match /(\3)(\1)(a)/ @@ -8588,13 +8777,13 @@ No match 1: 2: 3: a - + /TA]/ - The ACTA] comes + The ACTA] comes 0: TA] /TA]/alt_bsux,allow_empty_class,match_unset_backref,dupnames - The ACTA] comes + The ACTA] comes 0: TA] /(?2)[]a()b](abc)/ @@ -8609,7 +8798,7 @@ Failed: error 115 at offset 3: reference to non-existent subpattern abcbabc 0: abcbabc 1: abc -\= Expect no match +\= Expect no match abcXabc No match @@ -8617,7 +8806,7 @@ No match abcXabc 0: abcXabc 1: abc -\= Expect no match +\= Expect no match abcbabc No match @@ -8628,11 +8817,11 @@ No match 2: xyz /(?&N)[]a(?<N>)](?<M>abc)/ -Failed: error 115 at offset 4: reference to non-existent subpattern +Failed: error 115 at offset 3: reference to non-existent subpattern abc<abc /(?&N)[]a(?<N>)](abc)/ -Failed: error 115 at offset 4: reference to non-existent subpattern +Failed: error 115 at offset 3: reference to non-existent subpattern abc<abc /a[]b/ @@ -8648,30 +8837,30 @@ No match /a[]+b/alt_bsux,allow_empty_class,match_unset_backref,dupnames \= Expect no match - ab + ab No match /a[]*+b/alt_bsux,allow_empty_class,match_unset_backref,dupnames \= Expect no match - ab + ab No match /a[^]b/alt_bsux,allow_empty_class,match_unset_backref,dupnames aXb 0: aXb - a\nb + a\nb 0: a\x0ab \= Expect no match - ab + ab No match - + /a[^]+b/alt_bsux,allow_empty_class,match_unset_backref,dupnames aXb 0: aXb - a\nX\nXb + a\nX\nXb 0: a\x0aX\x0aXb \= Expect no match - ab + ab No match /a(?!)b/B @@ -8712,8 +8901,8 @@ Options: auto_callout Subject length lower bound = 0 adc --->adc - +0 ^ (?(?=.*b)b|^) - +2 ^ (?=.*b) + +0 ^ (? + +2 ^ (?= +5 ^ .* +7 ^ ^ b +7 ^ ^ b @@ -8721,20 +8910,20 @@ Subject length lower bound = 0 +7 ^ b +11 ^ ^ +12 ^ ) -+13 ^ ++13 ^ End of pattern 0: - abc + abc --->abc - +0 ^ (?(?=.*b)b|^) - +2 ^ (?=.*b) + +0 ^ (? + +2 ^ (?= +5 ^ .* +7 ^ ^ b +7 ^ ^ b +7 ^^ b +8 ^ ^ ) +9 ^ b - +0 ^ (?(?=.*b)b|^) - +2 ^ (?=.*b) + +0 ^ (? + +2 ^ (?= +5 ^ .* +7 ^ ^ b +7 ^^ b @@ -8742,41 +8931,40 @@ Subject length lower bound = 0 +8 ^^ ) +9 ^ b +10 ^^ | -+13 ^^ ++13 ^^ End of pattern 0: b - + /(?(?=b).*b|^d)/I Capturing subpattern count = 0 Subject length lower bound = 1 /(?(?=.*b).*b|^d)/I Capturing subpattern count = 0 -First code unit at start or follows newline Subject length lower bound = 1 /xyz/auto_callout - xyz + xyz --->xyz +0 ^ x +1 ^^ y +2 ^ ^ z - +3 ^ ^ + +3 ^ ^ End of pattern 0: xyz - abcxyz + abcxyz --->abcxyz +0 ^ x +1 ^^ y +2 ^ ^ z - +3 ^ ^ + +3 ^ ^ End of pattern 0: xyz -\= Expect no match +\= Expect no match abc No match - abcxypqr + abcxypqr No match - + /xyz/auto_callout,no_start_optimize - abcxyz + abcxyz --->abcxyz +0 ^ x +0 ^ x @@ -8784,9 +8972,9 @@ No match +0 ^ x +1 ^^ y +2 ^ ^ z - +3 ^ ^ + +3 ^ ^ End of pattern 0: xyz -\= Expect no match +\= Expect no match abc --->abc +0 ^ x @@ -8794,7 +8982,7 @@ No match +0 ^ x +0 ^ x No match - abcxypqr + abcxypqr --->abcxypqr +0 ^ x +0 ^ x @@ -8808,7 +8996,7 @@ No match +0 ^ x +0 ^ x No match - + /(*NO_START_OPT)xyz/auto_callout abcxyz --->abcxyz @@ -8818,9 +9006,9 @@ No match +15 ^ x +16 ^^ y +17 ^ ^ z -+18 ^ ^ ++18 ^ ^ End of pattern 0: xyz - + /(*NO_AUTO_POSSESS)a+b/B ------------------------------------------------------------------ Bra @@ -8831,7 +9019,7 @@ No match ------------------------------------------------------------------ /xyz/auto_callout,no_start_optimize - abcxyz + abcxyz --->abcxyz +0 ^ x +0 ^ x @@ -8839,7 +9027,7 @@ No match +0 ^ x +1 ^^ y +2 ^ ^ z - +3 ^ ^ + +3 ^ ^ End of pattern 0: xyz /^"((?(?=[a])[^"])|b)*"$/auto_callout @@ -8847,28 +9035,28 @@ No match --->"ab" +0 ^ ^ +1 ^ " - +2 ^^ ((?(?=[a])[^"])|b)* - +3 ^^ (?(?=[a])[^"]) - +5 ^^ (?=[a]) + +2 ^^ ( + +3 ^^ (? + +5 ^^ (?= +8 ^^ [a] +11 ^ ^ ) +12 ^^ [^"] +16 ^ ^ ) +17 ^ ^ | - +3 ^ ^ (?(?=[a])[^"]) - +5 ^ ^ (?=[a]) + +3 ^ ^ (? + +5 ^ ^ (?= +8 ^ ^ [a] +17 ^ ^ | +21 ^ ^ " +18 ^ ^ b -+19 ^ ^ ) - +3 ^ ^ (?(?=[a])[^"]) - +5 ^ ^ (?=[a]) ++19 ^ ^ )* + +3 ^ ^ (? + +5 ^ ^ (?= +8 ^ ^ [a] +17 ^ ^ | +21 ^ ^ " +22 ^ ^ $ -+23 ^ ^ ++23 ^ ^ End of pattern 0: "ab" 1: @@ -8889,7 +9077,7 @@ Failed: error 115 at offset 5: reference to non-existent subpattern 3: c 4: d 5: Y - + /Xa{2,4}b/ X\=ps Partial match: X @@ -8901,7 +9089,7 @@ Partial match: Xaa Partial match: Xaaa Xaaaa\=ps Partial match: Xaaaa - + /Xa{2,4}?b/ X\=ps Partial match: X @@ -8913,7 +9101,7 @@ Partial match: Xaa Partial match: Xaaa Xaaaa\=ps Partial match: Xaaaa - + /Xa{2,4}+b/ X\=ps Partial match: X @@ -8925,7 +9113,7 @@ Partial match: Xaa Partial match: Xaaa Xaaaa\=ps Partial match: Xaaaa - + /X\d{2,4}b/ X\=ps Partial match: X @@ -8937,7 +9125,7 @@ Partial match: X33 Partial match: X333 X3333\=ps Partial match: X3333 - + /X\d{2,4}?b/ X\=ps Partial match: X @@ -8949,7 +9137,7 @@ Partial match: X33 Partial match: X333 X3333\=ps Partial match: X3333 - + /X\d{2,4}+b/ X\=ps Partial match: X @@ -8961,7 +9149,7 @@ Partial match: X33 Partial match: X333 X3333\=ps Partial match: X3333 - + /X\D{2,4}b/ X\=ps Partial match: X @@ -8973,7 +9161,7 @@ Partial match: Xaa Partial match: Xaaa Xaaaa\=ps Partial match: Xaaaa - + /X\D{2,4}?b/ X\=ps Partial match: X @@ -8985,7 +9173,7 @@ Partial match: Xaa Partial match: Xaaa Xaaaa\=ps Partial match: Xaaaa - + /X\D{2,4}+b/ X\=ps Partial match: X @@ -8997,7 +9185,7 @@ Partial match: Xaa Partial match: Xaaa Xaaaa\=ps Partial match: Xaaaa - + /X[abc]{2,4}b/ X\=ps Partial match: X @@ -9009,7 +9197,7 @@ Partial match: Xaa Partial match: Xaaa Xaaaa\=ps Partial match: Xaaaa - + /X[abc]{2,4}?b/ X\=ps Partial match: X @@ -9021,7 +9209,7 @@ Partial match: Xaa Partial match: Xaaa Xaaaa\=ps Partial match: Xaaaa - + /X[abc]{2,4}+b/ X\=ps Partial match: X @@ -9033,7 +9221,7 @@ Partial match: Xaa Partial match: Xaaa Xaaaa\=ps Partial match: Xaaaa - + /X[^a]{2,4}b/ X\=ps Partial match: X @@ -9045,7 +9233,7 @@ Partial match: Xzz Partial match: Xzzz Xzzzz\=ps Partial match: Xzzzz - + /X[^a]{2,4}?b/ X\=ps Partial match: X @@ -9057,7 +9245,7 @@ Partial match: Xzz Partial match: Xzzz Xzzzz\=ps Partial match: Xzzzz - + /X[^a]{2,4}+b/ X\=ps Partial match: X @@ -9069,7 +9257,7 @@ Partial match: Xzz Partial match: Xzzz Xzzzz\=ps Partial match: Xzzzz - + /(Y)X\1{2,4}b/ YX\=ps Partial match: YX @@ -9081,7 +9269,7 @@ Partial match: YXYY Partial match: YXYYY YXYYYY\=ps Partial match: YXYYYY - + /(Y)X\1{2,4}?b/ YX\=ps Partial match: YX @@ -9093,7 +9281,7 @@ Partial match: YXYY Partial match: YXYYY YXYYYY\=ps Partial match: YXYYYY - + /(Y)X\1{2,4}+b/ YX\=ps Partial match: YX @@ -9105,7 +9293,7 @@ Partial match: YXYY Partial match: YXYYY YXYYYY\=ps Partial match: YXYYYY - + /\++\KZ|\d+X|9+Y/startchar ++++123999\=ps Partial match: 123999 @@ -9121,7 +9309,7 @@ Partial match: 123999 No match ZA\=ps No match - + /Z(?!)/ \= Expect no match Z\=ps @@ -9134,7 +9322,7 @@ No match 0: dog dogs\=ph Partial match: dogs - + /dog(sbody)??/ dogs\=ps 0: dog @@ -9146,7 +9334,7 @@ Partial match: dogs 0: dog dogs\=ph 0: dog - + /dogsbody|dog/ dogs\=ps 0: dog @@ -9164,7 +9352,7 @@ Partial match: the cat 0: abc abc\=ph 0: abc - + /abc\K123/startchar xyzabc123pqr 0: abc123 @@ -9173,9 +9361,9 @@ Partial match: the cat Partial match: abc12 xyzabc12\=ph Partial match: abc12 - + /(?<=abc)123/ - xyzabc123pqr + xyzabc123pqr 0: 123 xyzabc12\=ps Partial match: abc12 @@ -9280,7 +9468,7 @@ Partial match: +ab No match xyzabcdef\=notempty No match - + /^(?:(?=abc)|abc\K)/aftertext,startchar abcdef 0: @@ -9289,7 +9477,7 @@ No match 0: abc ^^^ 0+ def -\= Expect no match +\= Expect no match abcdef\=notempty No match @@ -9309,7 +9497,7 @@ No match xyz\=notempty_atstart 0: 0+ yz -\= Expect no match +\= Expect no match xyz\=notempty No match @@ -9320,7 +9508,7 @@ No match xyzabc 0: 0+ xyzabc -\= Expect no match +\= Expect no match xyzabc\=notempty No match xyzabc\=notempty_atstart @@ -9329,7 +9517,7 @@ No match No match xyz\=notempty No match - + /^(?<name>a|b\g<name>c)/ aaaa 0: a @@ -9337,7 +9525,7 @@ No match bacxxx 0: bac 1: bac - bbaccxxx + bbaccxxx 0: bbacc 1: bbacc bbbacccxx @@ -9351,7 +9539,7 @@ No match bacxxx 0: bac 1: bac - bbaccxxx + bbaccxxx 0: bbacc 1: bbacc bbbacccxx @@ -9365,7 +9553,7 @@ No match bacxxx 0: bac 1: bac - bbaccxxx + bbaccxxx 0: bbacc 1: bbacc bbbacccxx @@ -9379,7 +9567,7 @@ No match bacxxx 0: bac 1: bac - bbaccxxx + bbaccxxx 0: bbacc 1: bbacc bbbacccxx @@ -9393,7 +9581,7 @@ No match bacxxx 0: bac 1: bac - bbaccxxx + bbaccxxx 0: bbacc 1: bbacc bbbacccxx @@ -9409,7 +9597,7 @@ No match 0: bac 1: bac 2: bac - bbaccxxx + bbaccxxx 0: bbacc 1: bbacc 2: bbacc @@ -9422,7 +9610,7 @@ No match XaaX 0: aa 1: a - XAAX + XAAX 0: AA 1: A @@ -9430,15 +9618,15 @@ No match XaaX 0: aa 1: a -\= Expect no match - XAAX +\= Expect no match + XAAX No match /(?-i:\g<+1>)(?i:(a))/ XaaX 0: aa 1: a - XAAX + XAAX 0: AA 1: A @@ -9448,7 +9636,7 @@ No match abc 0: abc 1: a - accccbbb + accccbbb 0: accccbbb 1: a @@ -9467,27 +9655,26 @@ No match xbaax 0: a 1: a - xzzzax + xzzzax 0: a 1: a /(a)(?<=b\1)/ -Failed: error 125 at offset 10: lookbehind assertion is not fixed length /(a)(?<=b+(?1))/ -Failed: error 125 at offset 13: lookbehind assertion is not fixed length +Failed: error 125 at offset 3: lookbehind assertion is not fixed length /(a+)(?<=b(?1))/ -Failed: error 125 at offset 14: lookbehind assertion is not fixed length +Failed: error 125 at offset 4: lookbehind assertion is not fixed length /(a(?<=b(?1)))/ -Failed: error 125 at offset 13: lookbehind assertion is not fixed length +Failed: error 125 at offset 2: lookbehind assertion is not fixed length /(?<=b(?1))xyz/ Failed: error 115 at offset 8: reference to non-existent subpattern /(?<=b(?1))xyz(b+)pqrstuvew/ -Failed: error 125 at offset 26: lookbehind assertion is not fixed length +Failed: error 125 at offset 0: lookbehind assertion is not fixed length /(a|bc)\1/I Capturing subpattern count = 1 @@ -9534,6 +9721,7 @@ Subject length lower bound = 2 Capturing subpattern count = 1 Compile options: <none> Overall options: anchored +First code unit = 'F' Last code unit = ':' Subject length lower bound = 22 @@ -9584,7 +9772,7 @@ Subject length lower bound = 9 (?: [0-9a-f]{1,4} | # 1-4 hex digits or (?(1)0 | () ) ) # if null previously matched, fail; else null : # followed by colon - ){1,7} # end item; 1-7 of them required + ){1,7} # end item; 1-7 of them required [0-9a-f]{1,4} $ # final hex number at end of string (?(1)|.) # check that there was an empty component /Iix @@ -9611,10 +9799,10 @@ Subject length lower bound = 1 C B (1) a (group 1) /(?|(?<a>A)|(?<b>B))/ -Failed: error 165 at offset 15: different names for subpatterns of the same number are not allowed +Failed: error 165 at offset 16: different names for subpatterns of the same number are not allowed /(?:a(?<quote> (?<apostrophe>')|(?<realquote>")) | - b(?<quote> (?<apostrophe>')|(?<realquote>")) ) + b(?<quote> (?<apostrophe>')|(?<realquote>")) ) (?('quote')[a-z]+|[0-9]+)/Ix,dupnames Capturing subpattern count = 6 Max back reference = 4 @@ -9633,7 +9821,7 @@ Subject length lower bound = 3 1: " 2: <unset> 3: " - b"aaaaa + b"aaaaa 0: b"aaaaa 1: <unset> 2: <unset> @@ -9641,12 +9829,12 @@ Subject length lower bound = 3 4: " 5: <unset> 6: " -\= Expect no match +\= Expect no match b"11111 No match - a"11111 + a"11111 No match - + /^(?|(a)(b)(c)(?<D>d)|(?<D>e)) (?('D')X|Y)/IBx,dupnames ------------------------------------------------------------------ Bra @@ -9685,6 +9873,7 @@ Named capturing subpatterns: D 1 Compile options: dupnames extended Overall options: anchored dupnames extended +Starting code units: a e Subject length lower bound = 2 abcdX 0: abcdX @@ -9698,9 +9887,9 @@ Subject length lower bound = 2 \= Expect no match abcdY No match - ey + ey No match - + /(?<A>a) (b)(c) (?<A>d (?(R&A)$ | (?4)) )/IBx,dupnames ------------------------------------------------------------------ Bra @@ -9741,7 +9930,7 @@ Subject length lower bound = 4 3: c 4: dd \= Expect no match - abcdde + abcdde No match /abcd*/ @@ -9779,29 +9968,6 @@ Partial match: abca xxxxabcde\=ph Partial match: abcde -# This is not in the Perl-compatible test because Perl seems currently to be -# broken and not behaving as specified in that it *does* bumpalong after -# hitting (*COMMIT). - -/(?1)(A(*COMMIT)|B)D/ - ABD - 0: ABD - 1: B - XABD - 0: ABD - 1: B - BAD - 0: BAD - 1: A - ABXABD - 0: ABD - 1: B -\= Expect no match - ABX -No match - BAXBAD -No match - /(\3)(\1)(a)/alt_bsux,allow_empty_class,match_unset_backref,dupnames cat 0: a @@ -9838,7 +10004,7 @@ First code unit = 'i' Subject length lower bound = 1 i 0: i - + /()i(?(1)a)/I Capturing subpattern count = 1 Max back reference = 1 @@ -9862,10 +10028,10 @@ Subject length lower bound = 1 0: ab XAbX 0: Ab - CcC + CcC 0: c \= Expect no match - XABX + XABX No match /(?i)a(?s)b|c/B @@ -9925,7 +10091,7 @@ No match 0: xabcxd 1: abcxd 2: cx - + /^(?&t)*+(?(DEFINE)(?<t>.))$/B ------------------------------------------------------------------ Bra @@ -9950,7 +10116,7 @@ No match Bra ^ Brazero - Once + SBra Recurse KetRmax Cond @@ -9966,9 +10132,9 @@ No match # This one is here because Perl gives the match as "b" rather than "ab". I # believe this to be a Perl bug. - + /(?>a\Kb)z|(ab)/ - ab\=startchar + ab\=startchar 0: ab 1: ab @@ -9977,7 +10143,7 @@ No match 0: 1: 2: - 0abc + 0abc 0: 0 1: 0 2: 0 @@ -9989,9 +10155,9 @@ Failed: error 166 at offset 10: (*MARK) must have an argument Failed: error 166 at offset 6: (*MARK) must have an argument /abc(*FAIL:123)xyz/ -Failed: error 159 at offset 13: an argument is not allowed for (*ACCEPT), (*FAIL), or (*COMMIT) +Failed: error 159 at offset 10: an argument is not allowed for (*ACCEPT), (*FAIL), or (*COMMIT) -# This should, and does, fail. In Perl, it does not, which I think is a +# This should, and does, fail. In Perl, it does not, which I think is a # bug because replacing the B in the pattern by (B|D) does make it fail. /A(*COMMIT)B/aftertext,mark @@ -10010,7 +10176,7 @@ No match \= Expect no match AC No match - + # Mark names can be duplicated. Perl doesn't give a mark for this one, # though PCRE2 does. @@ -10018,35 +10184,35 @@ No match \= Expect no match XAQQ No match, mark = A - -# COMMIT at the start of a pattern should be the same as an anchor. Perl + +# COMMIT at the start of a pattern should be the same as an anchor. Perl # optimizations defeat this. So does the PCRE2 optimization unless we disable # it. /(*COMMIT)ABC/ ABCDEFG 0: ABC - + /(*COMMIT)ABC/no_start_optimize \= Expect no match DEFGABC No match - + /^(ab (c+(*THEN)cd) | xyz)/x \= Expect no match - abcccd + abcccd No match /^(ab (c+(*PRUNE)cd) | xyz)/x \= Expect no match - abcccd + abcccd No match /^(ab (c+(*FAIL)cd) | xyz)/x \= Expect no match - abcccd + abcccd No match - + # Perl gets some of these wrong /(?>.(*ACCEPT))*?5/ @@ -10083,7 +10249,7 @@ No match \= Expect no match A\nB No match - ACB\n + ACB\n No match /A\NB./Bs @@ -10098,19 +10264,19 @@ No match ------------------------------------------------------------------ ACBD 0: ACBD - ACB\n + ACB\n 0: ACB\x0a \= Expect no match - A\nB + A\nB No match - + /A\NB/newline=crlf A\nB 0: A\x0aB A\rB 0: A\x0dB \= Expect no match - A\r\nB + A\r\nB No match /\R+b/B @@ -10288,7 +10454,7 @@ No match \= Bad offsets abc\=offset=4 Failed: error -33: bad offset value - abc\=offset=-4 + abc\=offset=-4 ** Invalid value in 'offset=-4' \= Valid data abc\=offset=0 @@ -10335,7 +10501,7 @@ Failed: error 168 at offset 3: \c must be followed by a printable ASCII characte Failed: error 142 at offset 29: syntax error in subpattern name (missing terminator) /(?P<abn>(?P=axn)xxx)/B -Failed: error 115 at offset 15: reference to non-existent subpattern +Failed: error 115 at offset 12: reference to non-existent subpattern /(?P<abn>(?P=axn)xxx)(?<axn>yy)/B ------------------------------------------------------------------ @@ -10351,18 +10517,18 @@ Failed: error 115 at offset 15: reference to non-existent subpattern End ------------------------------------------------------------------ -# These tests are here because Perl gets the first one wrong. +# These tests are here because Perl gets the first one wrong. /(\R*)(.)/s \r\n 0: \x0d 1: 2: \x0d - \r\r\n\n\r + \r\r\n\n\r 0: \x0d\x0d\x0a\x0a\x0d 1: \x0d\x0d\x0a\x0a 2: \x0d - \r\r\n\n\r\n + \r\r\n\n\r\n 0: \x0d\x0d\x0a\x0a\x0d 1: \x0d\x0d\x0a\x0a 2: \x0d @@ -10372,11 +10538,11 @@ Failed: error 115 at offset 15: reference to non-existent subpattern 0: \x0d 1: <unset> 2: \x0d - \r\r\n\n\r + \r\r\n\n\r 0: \x0d\x0d\x0a\x0a\x0d 1: \x0a 2: \x0d - \r\r\n\n\r\n + \r\r\n\n\r\n 0: \x0d\x0d\x0a\x0a\x0d 1: \x0a 2: \x0d @@ -10386,16 +10552,16 @@ Failed: error 115 at offset 15: reference to non-existent subpattern 0: \x0d 1: 2: \x0d - \r\r\n\n\r + \r\r\n\n\r 0: \x0d\x0d\x0a\x0a\x0d 1: \x0d\x0d\x0a\x0a 2: \x0d - \r\r\n\n\r\n + \r\r\n\n\r\n 0: \x0d\x0d\x0a\x0a\x0d 1: \x0d\x0d\x0a\x0a 2: \x0d -# ------------- +# ------------- /^abc$/B ------------------------------------------------------------------ @@ -10422,7 +10588,7 @@ Failed: error 115 at offset 15: reference to non-existent subpattern 0: aaaaX 1: a 2: X -\= Expect no match +\= Expect no match aaaa No match @@ -10430,7 +10596,7 @@ No match aaaaX 0: aaaaX 1: X -\= Expect no match +\= Expect no match aaaa No match @@ -10456,18 +10622,20 @@ Last code unit = '4' Subject length lower bound = 5 /(?<=(abc)+)X/ -Failed: error 125 at offset 10: lookbehind assertion is not fixed length +Failed: error 125 at offset 0: lookbehind assertion is not fixed length /(^ab)/I Capturing subpattern count = 1 Compile options: <none> Overall options: anchored +First code unit = 'a' Subject length lower bound = 2 /(^ab)++/I Capturing subpattern count = 1 Compile options: <none> Overall options: anchored +First code unit = 'a' Subject length lower bound = 2 /(^ab|^)+/I @@ -10488,12 +10656,14 @@ Subject length lower bound = 0 Capturing subpattern count = 0 Compile options: <none> Overall options: anchored +First code unit = 'a' Subject length lower bound = 2 /(?:^ab)++/I Capturing subpattern count = 0 Compile options: <none> Overall options: anchored +First code unit = 'a' Subject length lower bound = 2 /(?:^ab|^)+/I @@ -10605,7 +10775,7 @@ Subject length lower bound = 1 /(abc)\1+/ -# Perl doesn't get these right IMO (the 3rd is PCRE2-specific) +# Perl doesn't get these right IMO (the 3rd is PCRE2-specific) /(?1)(?:(b(*ACCEPT))){0}/ b @@ -10614,8 +10784,8 @@ Subject length lower bound = 1 /(?1)(?:(b(*ACCEPT))){0}c/ bc 0: bc -\= Expect no match - b +\= Expect no match + b No match /(?1)(?:((*ACCEPT))){0}c/ @@ -10625,7 +10795,7 @@ No match 0: c /^.*?(?(?=a)a|b(*THEN)c)/ -\= Expect no match +\= Expect no match ba No match @@ -10634,20 +10804,20 @@ No match 0: ba /^.*?(?(?=a)a(*THEN)b|c)/ -\= Expect no match +\= Expect no match ac No match /^.*?(?(?=a)a(*THEN)b)c/ -\= Expect no match +\= Expect no match ac No match /^.*?(a(*THEN)b)c/ -\= Expect no match +\= Expect no match aabc No match - + /^.*? (?1) c (?(DEFINE)(a(*THEN)b))/x aabc 0: aabc @@ -10670,12 +10840,12 @@ No match 0: C 1: C MK: A -\= Expect no match +\= Expect no match D No match, mark = A - + /(*:A)A+(*SKIP:A)(B|Z)/mark -\= Expect no match +\= Expect no match AAAC No match, mark = A @@ -10686,17 +10856,17 @@ No match, mark = A 0: c c\=notempty 0: c - + /(?1)c(?(DEFINE)((*ACCEPT)b))/ c 0: c c\=notempty 0: c - + /(?>(*ACCEPT)b)c/ c 0: -\= Expect no match +\= Expect no match c\=notempty No match @@ -10711,7 +10881,7 @@ No match ac\=ovector=1 0: ac 0+ - + /(a)(b)x|abc/allaftertext abc\=ovector=2 0: abc @@ -10744,7 +10914,8 @@ Matched, but too many substrings /(?(DEFINE)(a(?2)|b)(b(?1)|a))(?:(?1)|(?2))/I Capturing subpattern count = 2 -Subject length lower bound = 1 +May match empty string +Subject length lower bound = 0 /(a(?2)|b)(b(?1)|a)(?:(?1)|(?2))/I Capturing subpattern count = 2 @@ -10783,7 +10954,7 @@ Subject length lower bound = 6 1: <unset> 2: <unset> 3: baz - foobarbazX + foobarbazX 0: bazX 1: <unset> 2: <unset> @@ -10966,26 +11137,26 @@ Matched, but too many substrings adz --->adz +0 ^ ^ - +1 ^ (a(*:A)(d|e(*:B))z|aeq) + +1 ^ ( +2 ^ a +3 ^^ (*:A) - +8 ^^ (d|e(*:B)) + +8 ^^ ( Latest Mark: A +9 ^^ d +10 ^ ^ | +18 ^ ^ z +19 ^ ^ | -+24 ^ ^ ++24 ^ ^ End of pattern 0: adz 1: adz 2: d aez --->aez +0 ^ ^ - +1 ^ (a(*:A)(d|e(*:B))z|aeq) + +1 ^ ( +2 ^ a +3 ^^ (*:A) - +8 ^^ (d|e(*:B)) + +8 ^^ ( Latest Mark: A +9 ^^ d +11 ^^ e @@ -10994,17 +11165,17 @@ Latest Mark: A Latest Mark: B +18 ^ ^ z +19 ^ ^ | -+24 ^ ^ ++24 ^ ^ End of pattern 0: aez 1: aez 2: e aeqwerty --->aeqwerty +0 ^ ^ - +1 ^ (a(*:A)(d|e(*:B))z|aeq) + +1 ^ ( +2 ^ a +3 ^^ (*:A) - +8 ^^ (d|e(*:B)) + +8 ^^ ( Latest Mark: A +9 ^^ d +11 ^^ e @@ -11016,7 +11187,7 @@ Latest Mark: B +21 ^^ e +22 ^ ^ q +23 ^ ^ ) -+24 ^ ^ ++24 ^ ^ End of pattern 0: aeq 1: aeq @@ -11083,7 +11254,7 @@ Subject length lower bound = 0 ------------------------------------------------------------------ Bra ^ - Once_NC + Once a++ Ket Once @@ -11099,7 +11270,7 @@ Subject length lower bound = 0 0: aaaazzzzb 1: zzzz \= Expect no match - aazz + aazz No match /(.)(\1|a(?2))/ @@ -11107,15 +11278,15 @@ No match 0: bab 1: b 2: ab - + /\1|(.)(?R)\1/ cbbbc 0: cbbbc 1: c - + /(.)((?(1)c|a)|a(?2))/ \= Expect no match - baa + baa No match /(?P<abn>(?P=abn)xxx)/B @@ -11223,14 +11394,14 @@ No match ------------------------------------------------------------------ /a[\NB]c/ -Failed: error 171 at offset 3: \N is not supported in a class +Failed: error 171 at offset 4: \N is not supported in a class aNc - + /a[B-\Nc]/ -Failed: error 150 at offset 5: invalid range in character class +Failed: error 150 at offset 6: invalid range in character class /a[B\Nc]/ -Failed: error 171 at offset 4: \N is not supported in a class +Failed: error 171 at offset 5: \N is not supported in a class /(a)(?2){0,1999}?(b)/ @@ -11239,14 +11410,14 @@ Failed: error 171 at offset 4: \N is not supported in a class # This test, with something more complicated than individual letters, causes # different behaviour in Perl. Perhaps it disables some optimization; no tag is # passed back for the failures, whereas in PCRE2 there is a tag. - + /(A|P)(*:A)(B|P) | (X|P)(X|P)(*:B)(Y|P)/x,mark AABC 0: AB 1: A 2: B MK: A - XXYZ + XXYZ 0: XXY 1: <unset> 2: <unset> @@ -11255,40 +11426,40 @@ MK: A 5: Y MK: B \= Expect no match - XAQQ + XAQQ No match, mark = A - XAQQXZZ + XAQQXZZ No match, mark = A - AXQQQ + AXQQQ No match, mark = A - AXXQQQ + AXXQQQ No match, mark = B # Perl doesn't give marks for these, though it does if the alternatives are -# replaced by single letters. - +# replaced by single letters. + /(b|q)(*:m)f|a(*:n)w/mark - aw + aw 0: aw MK: n -\= Expect no match +\= Expect no match abc No match, mark = m /(q|b)(*:m)f|a(*:n)w/mark - aw + aw 0: aw MK: n -\= Expect no match +\= Expect no match abc No match, mark = m -# After a partial match, the behaviour is as for a failure. +# After a partial match, the behaviour is as for a failure. /^a(*:X)bcde/mark abc\=ps Partial match, mark=X: abc - + # These are here because Perl doesn't return a mark, except for the first. /(?=(*:x))(q|)/aftertext,mark @@ -11418,7 +11589,7 @@ Partial match: ababa abababx 0: abababx 1: ab - ababababx + ababababx 0: ababababx 1: ab @@ -11432,10 +11603,10 @@ Partial match: ababa abababx 0: abababx 1: ab - ababababx + ababababx 0: ababababx 1: ab - + /^(..)(\1{2,3})ab/ abababab 0: abababab @@ -11447,7 +11618,7 @@ Partial match: ababa 0: \x0d \r\=ph Partial match: \x0d - + /^\R{2,3}x/ \r\=ps Partial match: \x0d @@ -11463,7 +11634,7 @@ Partial match: \x0d\x0d\x0d Partial match: \x0d\x0d\x0d \r\rx 0: \x0d\x0dx - \r\r\rx + \r\r\rx 0: \x0d\x0d\x0dx /^\R{2,3}?x/ @@ -11481,9 +11652,9 @@ Partial match: \x0d\x0d\x0d Partial match: \x0d\x0d\x0d \r\rx 0: \x0d\x0dx - \r\r\rx + \r\r\rx 0: \x0d\x0d\x0dx - + /^\R?x/ \r\=ps Partial match: \x0d @@ -11491,7 +11662,7 @@ Partial match: \x0d Partial match: \x0d x 0: x - \rx + \rx 0: \x0dx /^\R+x/ @@ -11503,7 +11674,7 @@ Partial match: \x0d Partial match: \x0d\x0a \r\n\=ph Partial match: \x0d\x0a - \rx + \rx 0: \x0dx /^a$/newline=crlf @@ -11537,7 +11708,7 @@ Partial match: a\x0d 0: \x0d \r\=ph Partial match: \x0d - + /.{2,3}/newline=crlf \r\=ps Partial match: \x0d @@ -11570,9 +11741,9 @@ Partial match: \x0d\x0d ABCDGHI\=ovector=01 Matched, but too many substrings 0: ABCD - + # These are all run as real matches in test 1; here we are just checking the -# settings of the anchored and startline bits. +# settings of the anchored and startline bits. /(?>.*?a)(?<=ba)/I Capturing subpattern count = 0 @@ -11602,6 +11773,7 @@ Subject length lower bound = 2 Capturing subpattern count = 0 Compile options: dotall Overall options: anchored dotall +First code unit = 'a' Subject length lower bound = 2 /.*?a(*SKIP)b/I @@ -11624,6 +11796,7 @@ Subject length lower bound = 2 Capturing subpattern count = 0 Compile options: dotall Overall options: anchored dotall +First code unit = 'a' Subject length lower bound = 2 /(?>.*?)(?<=(abcd)|(wxyz))/I @@ -11671,43 +11844,36 @@ Subject length lower bound = 3 /(?:(a)+(?C1)bb|aa(?C2)b)/ aab\=callout_capture Callout 1: last capture = 1 - 0: <unset> 1: a --->aab ^ ^ b Callout 1: last capture = 1 - 0: <unset> 1: a --->aab ^^ b Callout 2: last capture = 0 - 0: <unset> --->aab ^ ^ b 0: aab - + /(?:(a)++(?C1)bb|aa(?C2)b)/ aab\=callout_capture Callout 1: last capture = 1 - 0: <unset> 1: a --->aab ^ ^ b Callout 2: last capture = 0 - 0: <unset> --->aab ^ ^ b 0: aab - + /(?:(?>(a))(?C1)bb|aa(?C2)b)/ aab\=callout_capture Callout 1: last capture = 1 - 0: <unset> 1: a --->aab ^^ b Callout 2: last capture = 0 - 0: <unset> --->aab ^ ^ b 0: aab @@ -11715,15 +11881,12 @@ Callout 2: last capture = 0 /(?:(?1)(?C1)x|ab(?C2))((a)){0}/ aab\=callout_capture Callout 1: last capture = 0 - 0: <unset> --->aab ^^ x Callout 1: last capture = 0 - 0: <unset> --->aab ^^ x Callout 2: last capture = 0 - 0: <unset> --->aab ^ ^ ) 0: ab @@ -11731,45 +11894,39 @@ Callout 2: last capture = 0 /(?1)(?C1)((a)(?C2)){0}/ aab\=callout_capture Callout 2: last capture = 2 - 0: <unset> 1: <unset> 2: a --->aab - ^^ ) + ^^ ){0} Callout 1: last capture = 0 - 0: <unset> --->aab - ^^ ((a)(?C2)){0} + ^^ ( 0: a /(?:(a)+(?C1)bb|aa(?C2)b)++/ aab\=callout_capture Callout 1: last capture = 1 - 0: <unset> 1: a --->aab ^ ^ b Callout 1: last capture = 1 - 0: <unset> 1: a --->aab ^^ b Callout 2: last capture = 0 - 0: <unset> --->aab ^ ^ b 0: aab aab\=callout_capture,ovector=1 Callout 1: last capture = 1 - 0: <unset> + 1: a --->aab ^ ^ b Callout 1: last capture = 1 - 0: <unset> + 1: a --->aab ^^ b Callout 2: last capture = 0 - 0: <unset> --->aab ^ ^ b 0: aab @@ -11779,7 +11936,7 @@ Callout 2: last capture = 0 0: ab ab\=ovector=1 0: ab - + /(?<=123)(*MARK:xx)abc/mark xxxx123a\=ph Partial match, mark=xx: 123a @@ -11787,7 +11944,7 @@ Partial match, mark=xx: 123a xxxx123a\=ps Partial match, mark=xx: 123a <<< - + /123\Kabc/startchar xxxx123a\=ph Partial match: 123a @@ -11798,26 +11955,26 @@ Partial match: 123a bb --->bb +0 ^ ^ - +1 ^ (?(?=a)aa|bb) - +3 ^ (?=a) + +1 ^ (? + +3 ^ (?= +6 ^ a +11 ^ b +12 ^^ b +13 ^ ^ ) -+14 ^ ^ ++14 ^ ^ End of pattern 0: bb /(?C1)^(?C2)(?(?C99)(?=(?C3)a(?C4))(?C5)a(?C6)a(?C7)|(?C8)b(?C9)b(?C10))(?C11)/ bb --->bb 1 ^ ^ - 2 ^ (?(?C99)(?=(?C3)a(?C4))(?C5)a(?C6)a(?C7)|(?C8)b(?C9)b(?C10)) - 99 ^ (?=(?C3)a(?C4)) + 2 ^ (? + 99 ^ (?= 3 ^ a 8 ^ b 9 ^^ b 10 ^ ^ ) - 11 ^ ^ + 11 ^ ^ End of pattern 0: bb # Perl seems to have a bug with this one. @@ -11825,18 +11982,18 @@ Partial match: 123a /aaaaa(*COMMIT)(*PRUNE)b|a+c/ aaaaaac 0: aaaac - + # Here are some that Perl treats differently because of the way it handles -# backtracking verbs. +# backtracking verbs. /(?!a(*COMMIT)b)ac|ad/ ac 0: ac - ad + ad 0: ad /^(?!a(*THEN)b|ac)../ - ad + ad 0: ad \= Expect no match ac @@ -11845,7 +12002,7 @@ No match /^(?=a(*THEN)b|ac)/ ac 0: - + /\A.*?(?:a|b(*THEN)c)/ ba 0: ba @@ -11859,7 +12016,7 @@ No match 0: ba /(?:(a(*MARK:X)a+(*SKIP:X)b)){0}(?:(?1)|aac)/ - aac + aac 0: aac /\A.*?(a|b(*THEN)c)/ @@ -11868,24 +12025,23 @@ No match 1: a /^(A(*THEN)B|A(*THEN)D)/ - AD + AD 0: AD 1: AD - + /(?!b(*THEN)a)bn|bnn/ bnn 0: bn /(?(?=b(*SKIP)a)bn|bnn)/ -\= Expect no match bnn -No match + 0: bnn /(?=b(*THEN)a|)bn|bnn/ bnn 0: bn -# This test causes a segfault with Perl 5.18.0 +# This test causes a segfault with Perl 5.18.0 /^(?=(a)){0}b(?1)/ backgammon @@ -12545,7 +12701,7 @@ Subject length lower bound = 5 Ket a CBraPos 1 - a++ + a+ KetRpos a Ket @@ -12568,7 +12724,7 @@ Subject length lower bound = 5 cc Ket a++ - Once_NC + Once bb Alt cc @@ -12917,7 +13073,7 @@ Subject length lower bound = 5 ------------------------------------------------------------------ Bra [a-f]*+ - Once_NC + Once gg Alt hh @@ -12925,7 +13081,7 @@ Subject length lower bound = 5 # [a-f]*+ Brazero - Once_NC + Once gg Alt hh @@ -12933,7 +13089,7 @@ Subject length lower bound = 5 # [a-f]* Brazero - Once_NC + Once gg Alt hh @@ -12941,7 +13097,7 @@ Subject length lower bound = 5 a# [a-f]*+ Brazero - Once_NC + Once gg Alt hh @@ -13016,15 +13172,15 @@ Starting code units: a b c d Last code unit = 'd' Subject length lower bound = 1 -# End of special auto-possessive tests +# End of special auto-possessive tests /^A\o{1239}B/ Failed: error 164 at offset 8: non-octal character in \o{} (closing brace missing?) A\123B /^A\oB/ -Failed: error 155 at offset 3: missing opening brace after \o - +Failed: error 155 at offset 4: missing opening brace after \o + /^A\x{zz}B/ Failed: error 167 at offset 5: non-hex character in \x{} (closing brace missing?) @@ -13032,7 +13188,7 @@ Failed: error 167 at offset 5: non-hex character in \x{} (closing brace missing? Failed: error 167 at offset 7: non-hex character in \x{} (closing brace missing?) /^A\x{/ -Failed: error 167 at offset 5: non-hex character in \x{} (closing brace missing?) +Failed: error 178 at offset 5: digits missing in \x{} or \o{} /[ab]++/B,no_auto_possess ------------------------------------------------------------------ @@ -13067,16 +13223,16 @@ Failed: error 167 at offset 5: non-hex character in \x{} (closing brace missing? ------------------------------------------------------------------ /[a-[:digit:]]+/ -Failed: error 150 at offset 3: invalid range in character class +Failed: error 150 at offset 4: invalid range in character class /[A-[:digit:]]+/ -Failed: error 150 at offset 3: invalid range in character class +Failed: error 150 at offset 4: invalid range in character class /[a-[.xxx.]]+/ -Failed: error 150 at offset 3: invalid range in character class +Failed: error 150 at offset 4: invalid range in character class /[a-[=xxx=]]+/ -Failed: error 150 at offset 3: invalid range in character class +Failed: error 150 at offset 4: invalid range in character class /[a-[!xxx!]]+/ Failed: error 108 at offset 3: range out of order in character class @@ -13086,13 +13242,13 @@ Failed: error 108 at offset 3: range out of order in character class 0: A]]] /[a-\d]+/ -Failed: error 150 at offset 4: invalid range in character class +Failed: error 150 at offset 5: invalid range in character class /(?<0abc>xx)/ Failed: error 144 at offset 3: group name must start with a non-digit /(?&1abc)xx(?<1abc>y)/ -Failed: error 144 at offset 13: group name must start with a non-digit +Failed: error 144 at offset 3: group name must start with a non-digit /(?<ab-cd>xx)/ Failed: error 142 at offset 5: syntax error in subpattern name (missing terminator) @@ -13116,13 +13272,13 @@ Failed: error 144 at offset 3: group name must start with a non-digit Failed: error 144 at offset 4: group name must start with a non-digit /\g{4df}/ -Failed: error 144 at offset 3: group name must start with a non-digit +Failed: error 157 at offset 2: \g is not followed by a braced, angle-bracketed, or quoted name/number or by a plain number /(?&1abc)xx(?<1abc>y)/ -Failed: error 144 at offset 13: group name must start with a non-digit +Failed: error 144 at offset 3: group name must start with a non-digit /(?P>1abc)xx(?<1abc>y)/ -Failed: error 144 at offset 14: group name must start with a non-digit +Failed: error 144 at offset 4: group name must start with a non-digit /\g'3gh'/ Failed: error 157 at offset 2: \g is not followed by a braced, angle-bracketed, or quoted name/number or by a plain number @@ -13137,7 +13293,7 @@ Failed: error 144 at offset 4: group name must start with a non-digit Failed: error 144 at offset 4: group name must start with a non-digit /(?(4gh)abc)/ -Failed: error 126 at offset 4: malformed number or name after (?( +Failed: error 124 at offset 4: missing closing parenthesis for condition /(?(R&6yh)abc)/ Failed: error 144 at offset 5: group name must start with a non-digit @@ -13186,18 +13342,18 @@ Failed: error 144 at offset 5: group name must start with a non-digit ------------------------------------------------------------------ little red riding hood 0: red - a /red/ thing + a /red/ thing 0: red red is a colour 0: red - put it all on red + put it all on red 0: red \= Expect no match no reduction No match Alfred Winifred No match - + /[a[:<:]] should give error/ Failed: error 130 at offset 4: unknown POSIX class name @@ -13211,14 +13367,14 @@ Start of matched string is beyond its end - displaying from end to start. \= Expect no match xx\nxabcd No match - + # Test stack guard external calls. /(((a)))/stackguard=1 -Failed: error 133 at offset 2: parentheses are too deeply nested (stack check) +Failed: error 133 at offset 7: parentheses are too deeply nested (stack check) /(((a)))/stackguard=2 -Failed: error 133 at offset 3: parentheses are too deeply nested (stack check) +Failed: error 133 at offset 7: parentheses are too deeply nested (stack check) /(((a)))/stackguard=3 @@ -13231,7 +13387,7 @@ Failed: error 133 at offset 3: parentheses are too deeply nested (stack check) Bra ^ \w+ - Once_NC + Once \s*+ Ket AssertB @@ -13243,10 +13399,10 @@ Failed: error 133 at offset 3: parentheses are too deeply nested (stack check) ------------------------------------------------------------------ /\othing/ -Failed: error 155 at offset 1: missing opening brace after \o +Failed: error 155 at offset 2: missing opening brace after \o /\o{}/ -Failed: error 178 at offset 1: digits missing in \x{} or \o{} +Failed: error 178 at offset 3: digits missing in \x{} or \o{} /\o{whatever}/ Failed: error 164 at offset 3: non-octal character in \o{} (closing brace missing?) @@ -13265,9 +13421,9 @@ Failed: error 115 at offset 2: reference to non-existent subpattern /A\9B/ Failed: error 115 at offset 2: reference to non-existent subpattern -# This one is here because Perl fails to match "12" for this pattern when the $ +# This one is here because Perl fails to match "12" for this pattern when the $ # is present. - + /^(?(?=abc)\w{3}:|\d\d)$/ abc: 0: abc: @@ -13276,10 +13432,10 @@ Failed: error 115 at offset 2: reference to non-existent subpattern \= Expect no match 123 No match - xyz + xyz No match -# Perl gets this one wrong, giving "a" as the after text for ca and failing to +# Perl gets this one wrong, giving "a" as the after text for ca and failing to # match for cd. /(?(?=ab)ab)/aftertext @@ -13289,11 +13445,11 @@ No match ca 0: 0+ ca - cd + cd 0: 0+ cd - -# This should test both paths for processing OP_RECURSE. + +# This should test both paths for processing OP_RECURSE. /(?(R)a+|(?R)b)/ aaaabcde @@ -13310,14 +13466,14 @@ No match 0: a ba 0: b - cb + cb 0: b /(*NOTEMPTY_ATSTART)a*?b*?/aftertext ab 0: a 0+ b - cdab + cdab 0: 0+ dab @@ -13326,7 +13482,7 @@ Capturing subpattern count = 0 Subject length lower bound = 2 yesno 0: yes - + /(?(VERSION=8)yes){3}/BI,aftertext ------------------------------------------------------------------ Bra @@ -13350,7 +13506,7 @@ Subject length lower bound = 6 yesnononoyes 0: nonono \= Expect no match - yesno + yesno No match /(?:(?<VERSION>abc)|xyz)(?(VERSION)yes|no)/I @@ -13368,20 +13524,20 @@ Subject length lower bound = 5 \= Expect no match abcno No match - xyzyes + xyzyes No match /(?(VERSION<10)yes|no)/ -Failed: error 179 at offset 10: syntax error in (?(VERSION condition +Failed: error 179 at offset 10: syntax error or number too big in (?(VERSION condition /(?(VERSION>10)yes|no)/ -Failed: error 179 at offset 11: syntax error in (?(VERSION condition +Failed: error 179 at offset 11: syntax error or number too big in (?(VERSION condition /(?(VERSION>=10.0.0)yes|no)/ -Failed: error 179 at offset 16: syntax error in (?(VERSION condition +Failed: error 179 at offset 16: syntax error or number too big in (?(VERSION condition /(?(VERSION=10.101)yes|no)/ -Failed: error 179 at offset 17: syntax error in (?(VERSION condition +Failed: error 179 at offset 17: syntax error or number too big in (?(VERSION condition /abcd/I Capturing subpattern count = 0 @@ -13402,18 +13558,17 @@ Subject length lower bound = 1 abd 0: abd 1: ab - xyd + xyd 0: d /(|ab)*?d/I,no_start_optimize Capturing subpattern count = 1 Options: no_start_optimize -Last code unit = 'd' Subject length lower bound = 0 abd 0: abd 1: ab - xyd + xyd 0: d /\k<A>*(?<A>aa)(?<A>bb)/match_unset_backref,dupnames @@ -13500,7 +13655,7 @@ Failed: error -58 at offset 4 in replacement: expected closing curly bracket in /abc/replace=[9]XYZ 123abc123 Failed: error -48: no more memory - + /abc/replace=xyz 1abc2\=partial_hard Failed: error -34: bad option value @@ -13518,29 +13673,29 @@ Failed: error -34: bad option value /(?<=abc)(|def)/g,replace=<$0> 123abcxyzabcdef789abcpqr 4: 123abc<>xyzabc<><def>789abc<>pqr - + /./replace=$0 a 1: a - + /(.)(.)/replace=$2+$1 abc 1: b+ac - + /(?<A>.)(?<B>.)/replace=$B+$A abc 1: b+ac - + /(.)(.)/g,replace=$2$1 - abcdefgh + abcdefgh 4: badcfehg - + /(*:pear)apple|(*:orange)lemon|(*:strawberry)blackberry/g,replace=${*MARK} apple lemon blackberry 3: pear orange strawberry apple strudel 1: pear strudel - fruitless + fruitless 0: fruitless /(*:pear)apple|(*:orange)lemon|(*:strawberry)blackberry/replace=${*MARK} sauce, @@ -13552,10 +13707,10 @@ Failed: error -34: bad option value 3: <pear> <orange> <strawberry> apple strudel 1: <pear> strudel - fruitless + fruitless 0: fruitless - -/(*:pear)apple/g,replace=${*MARKING} + +/(*:pear)apple/g,replace=${*MARKING} apple lemon blackberry Failed: error -35 at offset 11 in replacement: invalid replacement string @@ -13563,7 +13718,7 @@ Failed: error -35 at offset 11 in replacement: invalid replacement string apple lemon blackberry Failed: error -58 at offset 7 in replacement: expected closing curly bracket in replacement string -/(*:pear)apple/g,replace=${*mark} +/(*:pear)apple/g,replace=${*mark} apple lemon blackberry Failed: error -35 at offset 8 in replacement: invalid replacement string @@ -13627,13 +13782,13 @@ Get substring 3 failed (-54): requested value is not available Get substring 4 failed (-49): unknown substring 0L c 1L - + /x(?=ab\K)/ - xab\=get=0 + xab\=get=0 Start of matched string is beyond its end - displaying from end to start. 0: ab 0G (0) - xab\=copy=0 + xab\=copy=0 Start of matched string is beyond its end - displaying from end to start. 0: ab 0C (0) @@ -13674,12 +13829,14 @@ get substring list failed (-2): partial match Capturing subpattern count = 0 Compile options: <none> Overall options: anchored +First code unit = 'a' Subject length lower bound = 3 /^abc/info,no_dotstar_anchor Capturing subpattern count = 0 Compile options: no_dotstar_anchor Overall options: anchored no_dotstar_anchor +First code unit = 'a' Subject length lower bound = 3 /.*\d/info,auto_callout @@ -13797,7 +13954,7 @@ Failed: error 109 at offset 7: quantifier does not follow a repeatable item 456 0: 456 \= Expect no match - 356 + 356 No match '^(a)*+(\w)' @@ -13805,7 +13962,7 @@ No match 0: g 1: <unset> 2: g - g\=ovector=1 + g\=ovector=1 Matched, but too many substrings 0: g @@ -13813,10 +13970,10 @@ Matched, but too many substrings g 0: g 1: g - g\=ovector=1 + g\=ovector=1 Matched, but too many substrings 0: g - + # These two pattern showeds up compile-time bugs "((?2){0,1999}())?" @@ -13897,7 +14054,6 @@ Callout (10): "AB" /^a(b)c(?C1)def/ abcdef\=callout_capture Callout 1: last capture = 1 - 0: <unset> 1: b --->abcdef ^ ^ d @@ -13920,7 +14076,6 @@ Callout 1: last capture = 1 ------------------------------------------------------------------ abcdef\=callout_capture Callout (10): {AB} last capture = 1 - 0: <unset> 1: b --->abcdef ^ ^ d @@ -13955,15 +14110,15 @@ Callout {a}b} Bra Bra a - CalloutStr `code` 8 14 0 + CalloutStr `code` 8 14 4 Ket Bra a - CalloutStr `code` 8 14 0 + CalloutStr `code` 8 14 4 Ket Bra a - CalloutStr `code` 8 14 0 + CalloutStr `code` 8 14 4 Ket Ket End @@ -13974,7 +14129,7 @@ Callout {a}b} Bra ^ Cond - Callout 25 9 7 + Callout 25 9 3 Assert abc Ket @@ -13985,14 +14140,14 @@ Callout {a}b} Ket End ------------------------------------------------------------------ -Callout 25 (?=abc) +Callout 25 (?= abcdefg --->abcdefg - 25 ^ (?=abc) + 25 ^ (?= 0: abcd - xyz123 + xyz123 --->xyz123 - 25 ^ (?=abc) + 25 ^ (?= 0: xyz /^(?(?C$abc$)(?=abc)abcd|xyz)/B @@ -14000,7 +14155,7 @@ Callout 25 (?=abc) Bra ^ Cond - CalloutStr $abc$ 7 12 7 + CalloutStr $abc$ 7 12 3 Assert abc Ket @@ -14014,12 +14169,12 @@ Callout 25 (?=abc) abcdefg Callout (7): $abc$ --->abcdefg - ^ (?=abc) + ^ (?= 0: abcd - xyz123 + xyz123 Callout (7): $abc$ --->xyz123 - ^ (?=abc) + ^ (?= 0: xyz /^ab(?C'first')cd(?C"second")ef/ @@ -14036,13 +14191,13 @@ Callout (20): "second" aaaXY Callout (8): `code` --->aaaXY - ^^ ) + ^^ ){3} Callout (8): `code` --->aaaXY - ^ ^ ) + ^ ^ ){3} Callout (8): `code` --->aaaXY - ^ ^ ) + ^ ^ ){3} 0: aaaX # Binary zero in callout string @@ -14060,8 +14215,8 @@ Callout (5): 'x\x00z' /(?(?!)a|b)/ bbb 0: b -\= Expect no match - aaa +\= Expect no match + aaa No match # JIT gives a different error message for the infinite recursion @@ -14073,7 +14228,7 @@ Failed: error -52: nested recursion at the same subject position # Perl fails to diagnose the absence of an assertion "(?(?<E>.*!.*)?)" -Failed: error 128 at offset 3: assertion expected after (?( or (?(?C) +Failed: error 128 at offset 2: assertion expected after (?( or (?(?C) "X((?2)()*+){2}+"B ------------------------------------------------------------------ @@ -14124,7 +14279,7 @@ Failed: error 115 at offset 15: reference to non-existent subpattern Failed: error 115 at offset 15: reference to non-existent subpattern ";(?<=()((?3))((?2)))" -Failed: error 125 at offset 20: lookbehind assertion is not fixed length +Failed: error 125 at offset 1: lookbehind assertion is not fixed length # Perl loops on this (PCRE2 used to!) @@ -14163,9 +14318,9 @@ Subject length lower bound = 0 \= Expect no match \[9x!xxx(]{9999} No match - + /(abc)*/ - \[abc]{5} + \[abc]{5} 0: abcabcabcabcabc 1: abc @@ -14211,7 +14366,7 @@ Failed: error 115 at offset 2: reference to non-existent subpattern /A\8B\9C/ Failed: error 115 at offset 2: reference to non-existent subpattern A8B9C - + /(?x:((?'a')) # comment (with parentheses) and | vertical (?-x:#not a comment (?'b')) # this is a comment () (?'c')) # not a comment (?'d')/info @@ -14239,14 +14394,14 @@ Subject length lower bound = 1 1: 2: 2 3: - B32A + B32A 0: 3 1: 2: 3: 3 # These are some patterns that used to cause buffer overflows or other errors -# while compiling. +# while compiling. /.((?2)(?R)|\1|$)()/B ------------------------------------------------------------------ @@ -14306,7 +14461,7 @@ Failed: error 115 at offset 7: reference to non-existent subpattern "(?J)(?'d'(?'d'\g{d}))" "(?=!((?2)(?))({8(?<=(?1){29}8bbbb\x16\xd\xc6^($(\xa9H4){4}h}?1)B))\x15')" -Failed: error 125 at offset 72: lookbehind assertion is not fixed length +Failed: error 125 at offset 16: lookbehind assertion is not fixed length /A(?'')Z/ Failed: error 162 at offset 4: subpattern name expected @@ -14314,7 +14469,7 @@ Failed: error 162 at offset 4: subpattern name expected "(?J:(?|(?'R')(\k'R')|((?'R'))))" /(?<=|(\,\$(?73591620449005828816)\xa8.{7}){6}\x09)/ -Failed: error 161 at offset 32: number is too big +Failed: error 161 at offset 17: group number is too big /^(?:(?(1)x|)+)+$()/B ------------------------------------------------------------------ @@ -14335,16 +14490,16 @@ Failed: error 161 at offset 32: number is too big ------------------------------------------------------------------ /[[:>:]](?<)/ -Failed: error 124 at offset 10: letter or underscore expected after (?< or (?' +Failed: error 162 at offset 10: subpattern name expected /((?x)(*:0))#(?'/ -Failed: error 124 at offset 15: letter or underscore expected after (?< or (?' +Failed: error 162 at offset 15: subpattern name expected /(?C$[$)(?<]/ -Failed: error 124 at offset 10: letter or underscore expected after (?< or (?' +Failed: error 162 at offset 10: subpattern name expected /(?C$)$)(?<]/ -Failed: error 124 at offset 10: letter or underscore expected after (?< or (?' +Failed: error 162 at offset 10: subpattern name expected /(?(R))*+/B ------------------------------------------------------------------ @@ -14362,7 +14517,7 @@ Failed: error 124 at offset 10: letter or underscore expected after (?< or (?' 0: /((?x)(?#))#(?'/ -Failed: error 124 at offset 14: letter or underscore expected after (?< or (?' +Failed: error 162 at offset 14: subpattern name expected /((?x)(?#))#(?'abc')/I Capturing subpattern count = 2 @@ -14372,7 +14527,7 @@ First code unit = '#' Subject length lower bound = 1 /[[:\\](?<[::]/ -Failed: error 124 at offset 9: letter or underscore expected after (?< or (?' +Failed: error 162 at offset 9: subpattern name expected /[[:\\](?'abc')[a:]/I Capturing subpattern count = 1 @@ -14401,13 +14556,13 @@ Failed: error 106 at offset 353: missing terminating ] for character class ------------------------------------------------------------------ /(?R-:(?</ -Failed: error 114 at offset 8: missing closing parenthesis +Failed: error 158 at offset 3: (?R (recursive pattern call) must be followed by a closing parenthesis /(?R-:(?<)/ -Failed: error 129 at offset 3: (?R or (?[+-]digits must be followed by ) +Failed: error 158 at offset 3: (?R (recursive pattern call) must be followed by a closing parenthesis /(?(?C{\Q})(?!(?'/ -Failed: error 124 at offset 16: letter or underscore expected after (?< or (?' +Failed: error 162 at offset 16: subpattern name expected /(?(?C{\Q})(?!(?'abc')))/I Capturing subpattern count = 1 @@ -14478,7 +14633,7 @@ Subject length lower bound = 0 0: {4,5a}bc /\x0{ab}/ - \0{ab} + \0{ab} 0: \x00{ab} /^(a(b))\1\g1\g{1}\g-1\g{-1}\g{-02}Z/ @@ -14505,10 +14660,10 @@ No match 0: ab /(?(8000000000/ -Failed: error 114 at offset 13: missing closing parenthesis +Failed: error 161 at offset 8: group number is too big /((?(R8000000000)))/ -Failed: error 161 at offset 16: number is too big +Failed: error 161 at offset 9: group number is too big /0(?0)|(1)(*THEN)(*SKIP:0)(*FAIL)/ \= Expect no match @@ -14516,7 +14671,7 @@ Failed: error 161 at offset 16: number is too big No match /(?(1)()\983040\2)/ -Failed: error 115 at offset 13: reference to non-existent subpattern +Failed: error 161 at offset 14: group number is too big /(*LIMIT_MATCH=)abc/ Failed: error 160 at offset 14: (*VERB) not recognized or malformed @@ -14538,9 +14693,9 @@ No match aacb No match -/(*MARK:a\zb)z/alt_verbnames -Failed: error 140 at offset 9: invalid escape sequence in (*VERB) name - +/(*MARK:a\zb)z/alt_verbnames +Failed: error 140 at offset 10: invalid escape sequence in (*VERB) name + /(*:ab\t(d\)c)xxx/ Failed: error 122 at offset 12: unmatched closing parenthesis @@ -14553,20 +14708,38 @@ MK: ab\x09(d)c x 0: x MK: Axx)xB - + /(*:A\ExxxB)x/alt_verbnames,mark - x + x 0: x MK: AxxxB - + /(*: A \ and #comment \ B)x/x,alt_verbnames,mark - x + x 0: x MK: A and B - + +/(*: A \ and #comment + \ B)x/alt_verbnames,mark + x + 0: x +MK: A and #comment\x0a B + +/(*: A \ and #comment + \ B)x/x,mark + x + 0: x +MK: A \ and #comment\x0a \ B + +/(*: A \ and #comment + \ B)x/mark + x + 0: x +MK: A \ and #comment\x0a \ B + /(*:A -B)x/alt_verbnames,mark +B)x/alt_verbnames,mark x 0: x MK: A\x0aB @@ -14595,7 +14768,7 @@ No match \= Expect no match 1234abc\=offset_limit=6 No match - + /A/g,replace=-,use_offset_limit XAXAXAXAXA\=offset_limit=4 2: X-X-XAXAXA @@ -14614,20 +14787,20 @@ No match /abcd/null_context abcd\=null_context 0: abcd -\= Expect error +\= Expect error abcd\=null_context,find_limits ** Not allowed together: find_limits null_context - abcd\=allusedtext,startchar + abcd\=allusedtext,startchar ** Not allowed together: allusedtext startchar /abcd/replace=w\rx\x82y\o{333}z(\Q12\$34$$\x34\E5$$),substitute_extended abcd 1: w\x0dx\x82y\xdbz(12\$34$$\x345$) - + /a(bc)(DE)/replace=a\u$1\U$1\E$1\l$2\L$2\Eab\Uab\LYZ\EDone,substitute_extended abcDE 1: aBcBCbcdEdeabAByzDone - + /abcd/replace=xy\kz,substitute_extended abcd Failed: error -57 at offset 4 in replacement: bad escape sequence in replacement string @@ -14681,9 +14854,9 @@ Failed: error -49 at offset 10 in replacement: unknown substring /(?J)(?:(?<A>a)|(?<A>b))/replace=<$A> [a] 1: [<a>] - [b] + [b] 1: [<b>] -\= Expect error +\= Expect error (a)\=ovector=1 Failed: error -54 at offset 3 in replacement: requested value is not available @@ -14701,6 +14874,7 @@ Capturing subpattern count = 2 Max back reference = 1 Compile options: <none> Overall options: anchored +First code unit = 'o' Last code unit = '}' Subject length lower bound = 65535 @@ -14715,7 +14889,7 @@ Failed: error 142 at offset 7: syntax error in subpattern name (missing terminat Failed: error -49 at offset 3 in replacement: unknown substring /(?<!a{65535}a{5})x/I -Failed: error 187 at offset 16: lookbehind assertion is too long +Failed: error 187 at offset 0: lookbehind assertion is too long /(?<!a{65535})x/I Capturing subpattern count = 0 @@ -14726,11 +14900,11 @@ Subject length lower bound = 1 /(?=a\K)/replace=z BaCaD Failed: error -60: match with end before start is not supported - + /(?'abcdefghijklmnopqrstuvwxyzABCDEFG'toolong)/ Failed: error 148 at offset 36: subpattern name is too long (maximum 32 characters) - -/(?'abcdefghijklmnopqrstuvwxyzABCDEF'justright)/ + +/(?'abcdefghijklmnopqrstuvwxyzABCDEF'justright)/ # These two use zero-termination /abcd/max_pattern_length=3 @@ -14744,7 +14918,7 @@ Failed: error 188 at offset 0: pattern string is longer than the limit set by th /abcdef/hex,max_pattern_length=3 -# These two patterns used to take a long time to compile +# These patterns used to take a long time to compile "(.*) ((?-2)(?-2))((?-2)(?-2))((?-2)(?-2))((?-2)(?-2)) @@ -14767,21 +14941,21 @@ May match empty string Options: extended Subject length lower bound = 0 -# When (?| is used and groups of the same number may be different, -# we have to rely on a count to catch overly complicated patterns. - "(?|()|())(.*) ((?-2)(?-2))((?-2)(?-2))((?-2)(?-2))((?-2)(?-2)) ((?-2)(?-2))((?-2)(?-2))((?-2)(?-2))((?-2)(?-2)) ((?-2)(?-2))((?-2)(?-2))((?-2)(?-2))"xI -Failed: error 186 at offset 148: regular expression is too complicated +Capturing subpattern count = 13 +May match empty string +Options: extended +Subject length lower bound = 0 "(?|()|())(?<=a() ((?-2)(?-2))((?-2)(?-2))((?-2)(?-2))((?-2)(?-2)) ((?-2)(?-2))((?-2)(?-2))((?-2)(?-2))((?-2)(?-2)) ((?-2)(?-2))((?-2)(?-2))((?-2)(?-2)) a)"xI -Failed: error 186 at offset 154: regular expression is too complicated +Failed: error 135 at offset 9: lookbehind is too complicated # Test the use of malloc for caching group information when there are more # groups than fit into the on-stack workspace. @@ -14950,11 +15124,11 @@ Subject length lower bound = 0 /(A*)\E+/B,auto_callout ------------------------------------------------------------------ Bra - Callout 255 0 7 + Callout 255 0 1 SCBra 1 Callout 255 1 2 A* - Callout 255 3 0 + Callout 255 3 4 KetRmax Callout 255 7 0 Ket @@ -14964,10 +15138,10 @@ Subject length lower bound = 0 /()\Q\E*]/B,auto_callout ------------------------------------------------------------------ Bra - Callout 255 0 7 + Callout 255 0 1 Brazero SCBra 1 - Callout 255 1 0 + Callout 255 1 6 KetRmax Callout 255 7 1 ] @@ -14977,10 +15151,10 @@ Subject length lower bound = 0 ------------------------------------------------------------------ a[bc]d --->a[bc]d - +0 ^ ()\Q\E* - +1 ^ ) + +0 ^ ( + +1 ^ )\Q\E* +7 ^ ] - +8 ^^ + +8 ^^ End of pattern 0: ] 1: @@ -15077,12 +15251,12 @@ Subject length lower bound = 0 (?-x):?/extended /(8(*:6^\x09x\xa6l\)6!|\xd0:[^:|)\x09d\Z\d{85*m(?'(?<1!)*\W[*\xff]!!h\w]*\xbe;/alt_bsux,alt_verbnames,allow_empty_class,dollar_endonly,extended,multiline,never_utf,no_dotstar_anchor,no_start_optimize -Failed: error 124 at offset 49: letter or underscore expected after (?< or (?' +Failed: error 162 at offset 49: subpattern name expected /a|(b)c/replace=>$1<,substitute_unset_empty cat 1: c><t - xbcom + xbcom 1: x>b<om /a|(b)c/ @@ -15104,23 +15278,23 @@ Failed: error -49 at offset 9 in replacement: unknown substring /a|(?'X'b)c/replace=>$X<,substitute_unset_empty cat 1: c><t - xbcom + xbcom 1: x>b<om /a|(?'X'b)c/replace=>$Y<,substitute_unset_empty cat Failed: error -49 at offset 3 in replacement: unknown substring - cat\=substitute_unknown_unset + cat\=substitute_unknown_unset 1: c><t - cat\=substitute_unknown_unset,-substitute_unset_empty + cat\=substitute_unknown_unset,-substitute_unset_empty Failed: error -55 at offset 3 in replacement: requested value is not set /a|(b)c/replace=>$2<,substitute_unset_empty cat Failed: error -49 at offset 3 in replacement: unknown substring - cat\=substitute_unknown_unset + cat\=substitute_unknown_unset 1: c><t - cat\=substitute_unknown_unset,-substitute_unset_empty + cat\=substitute_unknown_unset,-substitute_unset_empty Failed: error -55 at offset 3 in replacement: requested value is not set /()()()/use_offset_limit @@ -15130,13 +15304,13 @@ Failed: error -55 at offset 3 in replacement: requested value is not set ** Invalid value in 'callout_fail=11000000000' \=callout_fail=1:11000000000 ** Invalid value in 'callout_fail=1:11000000000' - \=callout_data=11000000000 + \=callout_data=11000000000 ** Invalid value in 'callout_data=11000000000' - \=callout_data=-11000000000 + \=callout_data=-11000000000 ** Invalid value in 'callout_data=-11000000000' - \=offset_limit=1100000000000000000000 + \=offset_limit=1100000000000000000000 ** Invalid value in 'offset_limit=1100000000000000000000' - \=copy=11000000000 + \=copy=11000000000 ** Invalid value in 'copy=11000000000' /(*MARK:A\x00b)/mark @@ -15160,13 +15334,13 @@ MK: A\x00b MK: A\x00b /efg/hex -** Unexpected non-hex-digit 'g' in hex pattern: quote missing? +** Unexpected non-hex-digit 'g' at offset 2 in hex pattern: quote missing? /eff/hex ** Odd number of digits in hex pattern /effg/hex -** Unexpected non-hex-digit 'g' in hex pattern: quote missing? +** Unexpected non-hex-digit 'g' at offset 3 in hex pattern: quote missing? /(?J)(?'a'))(?'a')/ Failed: error 122 at offset 10: unmatched closing parenthesis @@ -15195,12 +15369,1219 @@ No match /\[AB]{6000000000000000000000}/expand ** Pattern repeat count too large -# End of testinput2 -Error -63: PCRE2_ERROR_BADDATA (unknown error number) +# Hex uses pattern length, not zero-terminated. This tests for overrunning +# the given length of a pattern. + +/'(*U'/hex +Failed: error 160 at offset 3: (*VERB) not recognized or malformed + +/'(*'/hex +Failed: error 109 at offset 1: quantifier does not follow a repeatable item + +/'('/hex +Failed: error 114 at offset 1: missing closing parenthesis + +//hex + +# These tests are here because Perl never allows a back reference in a +# lookbehind. PCRE2 supports some limited cases. + +/([ab])...(?<=\1)z/ + a11az + 0: a11az + 1: a + b11bz + 0: b11bz + 1: b +\= Expect no match + b11az +No match + +/(?|([ab]))...(?<=\1)z/ +Failed: error 125 at offset 13: lookbehind assertion is not fixed length + +/([ab])(\1)...(?<=\2)z/ + aa11az + 0: aa11az + 1: a + 2: a + +/(a\2)(b\1)(?<=\2)/ +Failed: error 125 at offset 10: lookbehind assertion is not fixed length + +/(?<A>[ab])...(?<=\k'A')z/ + a11az + 0: a11az + 1: a + b11bz + 0: b11bz + 1: b +\= Expect no match + b11az +No match + +/(?<A>[ab])...(?<=\k'A')(?<A>)z/dupnames +Failed: error 125 at offset 13: lookbehind assertion is not fixed length + +# Perl does not support \g+n + +/((\g+1X)?([ab]))+/ + aaXbbXa + 0: aaXbbXa + 1: bXa + 2: bX + 3: a + +/ab(?C1)c/auto_callout + abc +--->abc + +0 ^ a + +1 ^^ b + 1 ^ ^ c + +8 ^ ^ End of pattern + 0: abc + +/'ab(?C1)c'/hex,auto_callout + abc +--->abc + +0 ^ a + +1 ^^ b + 1 ^ ^ c + +8 ^ ^ End of pattern + 0: abc + +# Perl accepts these, but gives a warning. We can't warn, so give an error. + +/[a-[:digit:]]+/ +Failed: error 150 at offset 4: invalid range in character class + a-a9-a + +/[A-[:digit:]]+/ +Failed: error 150 at offset 4: invalid range in character class + A-A9-A + +/[a-\d]+/ +Failed: error 150 at offset 5: invalid range in character class + a-a9-a + +/(?<RA>abc)(?(R)xyz)/B +------------------------------------------------------------------ + Bra + CBra 1 + abc + Ket + Cond + Cond recurse any + xyz + Ket + Ket + End +------------------------------------------------------------------ + +/(?<R>abc)(?(R)xyz)/B +------------------------------------------------------------------ + Bra + CBra 1 + abc + Ket + Cond + 1 Cond ref + xyz + Ket + Ket + End +------------------------------------------------------------------ + +/(?=.*[A-Z])/I +Capturing subpattern count = 0 +May match empty string +Subject length lower bound = 0 + +/()(?<=(?0))/ +Failed: error 125 at offset 2: lookbehind assertion is not fixed length + +/(?<!|!(?<!))/ + +/(?<!|!|!||||||(?<!)||(?<!)!|!||(?<!)!|!(?<!)!|!|!|!||||!!|<!)!|!||||!|/ + +/{2,2{2,2/use_length + +/.>*?\g'0/use_length +Failed: error 157 at offset 6: \g is not followed by a braced, angle-bracketed, or quoted name/number or by a plain number + +/.>*?\g'0/ +Failed: error 157 at offset 6: \g is not followed by a braced, angle-bracketed, or quoted name/number or by a plain number + +/{̈́̈́{'{22{2{{2{'{22{{22{2{'{22{2{{2{{222{{2{'{22{2{22{2{'{22{2{{2{'{22{2{22{2{'{'{22{2{22{2{'{22{2{{2{'{22{2{22{2{'{222{2Ą̈́̈́{'{22{2{{2{'{22{{11{2{'{22{2{{2{{'{22{2{{2{'{22{{22{1{'{22{2{{2{{222{{2{'{22{2{22{2{'{/auto_callout + +// +\=get=i00000000000000000000000000000000 +** Group name in 'get' is too long +\=get=i2345678901234567890123456789012,get=i1245678901234567890123456789012 +** Too many characters in named 'get' modifiers + +"(?(?C))" +Failed: error 128 at offset 6: assertion expected after (?( or (?(?C) + +/(?(?(?(?(?(?))))))/ +Failed: error 128 at offset 2: assertion expected after (?( or (?(?C) + +/(?<=(?1))((?s))/anchored + +/(*:ab)*/ +Failed: error 109 at offset 6: quantifier does not follow a repeatable item + +%(*:(:(svvvvvvvvvv:]*[ Z!*;[]*[^[]*!^[+.+{{2,7}' _\\\\\\\\\\\\\)?.:.. *w////\\\Q\\\\\\\\\\\\\\\T\\\\\+/?/////'+\\\EEE?/////'+/*+/[^K]?]//(w)%never_backslash_c,alt_verbnames,auto_callout + +/./newline=crlf + \=ph +No match + +/(\x0e00\000000\xc)/replace=\P,substitute_extended + \x0e00\000000\xc +Failed: error -57 at offset 2 in replacement: bad escape sequence in replacement string + +//replace=0 + \=offset=7 +Failed: error -33: bad offset value + +".+\QX\E+"B,no_auto_possess +------------------------------------------------------------------ + Bra + Any+ + X+ + Ket + End +------------------------------------------------------------------ + +".+\QX\E+"B,auto_callout,no_auto_possess +------------------------------------------------------------------ + Bra + Callout 255 0 4 + Any+ + Callout 255 4 4 + X+ + Callout 255 8 0 + Ket + End +------------------------------------------------------------------ + +# This one is here because Perl gives an 'unmatched )' error which goes away +# if one of the \) sequences is removed - which is weird. PCRE finds it too +# complicated to find a minimum matching length. + +"()X|((((((((()))))))((((())))))\2())((((((\2\2)))\2)(\22((((\2\2)2))\2)))(2\ZZZ)+:)Z^|91ZiZZnter(ZZ |91Z(ZZ ZZ(\r2Z( or#(\Z2(Z\Z(\2\2)2))\2Z)Z(\22Z((\Z2(Z\Z(\2\2)2))\2Z+:)Z|91Z(ZZ ZZ(\r2Z( or#(\Z2(Z\Z((Z*(\2(Z\':))\0)i|||||||||||||||loZ\2\2)2))\2Z)Z(\22Z((\Z2(Z\Z(\2\2)2))\2Z)))int \)\0nte!rnal errpr\2\\21r(2\ZZZ)+:)Z!|91Z(ZZ ZZ(\r2Z( or#(\Z2(Z\Z(\2\2)2))\2Z)Z(\22Z((\Z2(Z\Z(\2\2)2))\2Z)))int \)\0(2\ZZZ)+:)Z^|91ZiZZnter(ZZ |91Z(ZZ ZZ(\r2Z( or#(\Z2(Z\Z(\2\2)2))\2Z)Z(\22Z((\Z2(Z\Z(\2\2)2))\2Z)))int \)\0(2\ZZZ)+:)Z^)))int \)\0(2\ZZZ)+:)Z^|91ZiZZnter(ZZernZal ZZ(\r2Z( or#(\Z2(Z\Z(\2\2)2))\2Z)Z(\22Z((\Z2(Z\Z(\2\2)2))\2Z)))int \))\ZZ(\r2Z( or#(\Z2(Z\Z(\2\2)2))\2Z)Z(\22Z((\Z2(Z\Z(\2\2)))\2))))((((((\2\2))))))"I +Capturing subpattern count = 108 +Max back reference = 22 +Contains explicit CR or LF match +Subject length lower bound = 1 + +# This checks that new code for handling groups that may match an empty string +# works on a very large number of alternatives. This pattern used to provoke a +# complaint that it was too complicated. + +/(?:\[A|B|C|D|E|F|G|H|I|J|]{200}Z)/expand + +# This one used to compile rubbish instead of a compile error, and then +# behave unpredictably at match time. + +/.+(?(?C'XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX'))?!XXXX.=X/ +Failed: error 128 at offset 63: assertion expected after (?( or (?(?C) + .+(?(?C'XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX'))?!XXXX.=X + +/[:[:alnum:]-[[a:lnum:]+/ +Failed: error 150 at offset 11: invalid range in character class + +/((?(?C'')\QX\E(?!((?(?C'')(?!X=X));=)r*X=X));=)/ +Failed: error 128 at offset 11: assertion expected after (?( or (?(?C) + +/((?(?C'')\Q\E(?!((?(?C'')(?!X=X));=)r*X=X));=)/ + +/abcd/auto_callout + abcd\=callout_error=255:2 +--->abcd + +0 ^ a + +1 ^^ b +Failed: error -37: callout error code + +/()(\g+65534)/ +Failed: error 161 at offset 11: group number is too big + +/()(\g+65533)/ +Failed: error 115 at offset 10: reference to non-existent subpattern + +/\x00\x00\x00(\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\x00k\d+\x00\x00\x00\x00\x00\x00\2*\x00\x00\1*.){36}int^\x00\x00\x00(\1{50779}?)J\w2/I +Capturing subpattern count = 2 +Max back reference = 2 +First code unit = \xc1 +Last code unit = '2' +Subject length lower bound = 65535 + +/(a)(b)\2\1\1\1\1/I +Capturing subpattern count = 2 +Max back reference = 2 +First code unit = 'a' +Last code unit = 'b' +Subject length lower bound = 7 + +/(?<a>a)(?<b>b)\g{b}\g{a}\g{a}\g{a}\g{a}(?<a>xx)(?<b>zz)/I,dupnames +Capturing subpattern count = 4 +Max back reference = 4 +Named capturing subpatterns: + a 1 + a 3 + b 2 + b 4 +Options: dupnames +First code unit = 'a' +Last code unit = 'z' +Subject length lower bound = 11 + +// + \=ovector=7777777777 +** Invalid value in 'ovector=7777777777' + +# This is here because Perl matches, even though a COMMIT is encountered +# outside of the recursion. + +/(?1)(A(*COMMIT)|B)D/ + BAXBAD +No match + +"(?1){2}(a)"B +------------------------------------------------------------------ + Bra + Recurse + Recurse + CBra 1 + a + Ket + Ket + End +------------------------------------------------------------------ + +"(?1){2,4}(a)"B +------------------------------------------------------------------ + Bra + Recurse + Recurse + Brazero + Bra + Bra + Recurse + Ket + Brazero + Bra + Recurse + Ket + Ket + CBra 1 + a + Ket + Ket + End +------------------------------------------------------------------ + +# This test differs from Perl for the first subject. Perl ends up with +# $1 set to 'B'; PCRE2 has it unset (which I think is right). + +/^(?: +(?:A| (?:B|B(*ACCEPT)) (?<=(.)) D) +(Z) +)+$/x + AZB + 0: AZB + 1: <unset> + 2: Z + AZBDZ + 0: AZBDZ + 1: B + 2: Z + +# The first of these, when run by Perl, gives the mark 'aa', which is wrong. + +'(?>a(*:aa))b|ac' mark + ac + 0: ac + +'(?:a(*:aa))b|ac' mark + ac + 0: ac + +/(R?){65}/ + (R?){65} + 0: + 1: + +/\[(a)]{60}/expand + aaaa +No match + +/(?<!\1((?U)1((?U))))(*F)/never_backslash_c,alt_bsux,anchored,extended + +/\g{3/ +Failed: error 157 at offset 2: \g is not followed by a braced, angle-bracketed, or quoted name/number or by a plain number + +/(a(?C1)(b)(c)d)+/ + abcdabcd\=callout_capture +Callout 1: last capture = 0 +--->abcdabcd + ^^ ( +Callout 1: last capture = 1 + 1: abcd + 2: b + 3: c +--->abcdabcd + ^ ^ ( + 0: abcdabcd + 1: abcd + 2: b + 3: c + +# Perl matches this one, but PCRE does not because (*ACCEPT) clears out any +# pending backtracks in the recursion. + +/^ (?(DEFINE) (..(*ACCEPT)|...) ) (?1)$/x +\= Expect no match + abc +No match + +# Perl gives no match for this one + +/(a(*MARK:m)(*ACCEPT)){0}(?1)/mark + abc + 0: a +MK: m + +/abc/endanchored + xyzabc + 0: abc +\= Expect no match + xyzabcdef +No match +\= Expect error + xyzabc\=ph +Failed: error -34: bad option value + +/abc/ + xyzabc\=endanchored + 0: abc +\= Expect no match + xyzabcdef\=endanchored +No match +\= Expect error + xyzabc\=ps,endanchored +Failed: error -34: bad option value + +/abc(*ACCEPT)d/endanchored + xyzabc + 0: abc +\= Expect no match + xyzabcdef +No match + +/abc|bcd/endanchored + xyzabcd + 0: bcd +\= Expect no match + xyzabcdef +No match + +/a(*ACCEPT)x|aa/endanchored + aaa + 0: a + +# Check auto-anchoring when there is a group that is never obeyed at +# the start of a branch. + +/(?(DEFINE)(a))^bc/I +Capturing subpattern count = 1 +Compile options: <none> +Overall options: anchored +First code unit = 'b' +Subject length lower bound = 2 + +/(a){0}.*bc/sI +Capturing subpattern count = 1 +Compile options: dotall +Overall options: anchored dotall +Last code unit = 'c' +Subject length lower bound = 2 + +# This should be anchored, as the condition is always false and there is +# no alternative branch. + +/(?(VERSION>=999)yes)^bc/I +Capturing subpattern count = 0 +Compile options: <none> +Overall options: anchored +Subject length lower bound = 2 + +# This should not be anchored. + +/(?(VERSION>=999)yes|no)^bc/I +Capturing subpattern count = 0 +Last code unit = 'c' +Subject length lower bound = 4 + +/(*LIMIT_HEAP=0)xxx/I +Capturing subpattern count = 0 +Heap limit = 0 +First code unit = 'x' +Last code unit = 'x' +Subject length lower bound = 3 + +/\d{0,3}(*:abc)(?C1)xxx/callout_info +Callout 1 x + +# ---------------------------------------------------------------------- + +# These are a whole pile of tests that touch lines of code that are not +# used by any other tests (at least when these were created). + +/^a+?x/i,no_start_optimize,no_auto_possess +\= Expect no match + aaa +No match + +/^[^a]{3,}?x/i,no_start_optimize,no_auto_possess +\= Expect no match + bbb +No match + cc +No match + +/^X\S/no_start_optimize,no_auto_possess +\= Expect no match + X +No match + +/^X\W/no_start_optimize,no_auto_possess +\= Expect no match + X +No match + +/^X\H/no_start_optimize,no_auto_possess +\= Expect no match + X +No match + +/^X\h/no_start_optimize,no_auto_possess +\= Expect no match + X +No match + +/^X\V/no_start_optimize,no_auto_possess +\= Expect no match + X +No match + +/^X\v/no_start_optimize,no_auto_possess +\= Expect no match + X +No match + +/^X\h/no_start_optimize,no_auto_possess +\= Expect no match + XY +No match + +/^X\V/no_start_optimize,no_auto_possess +\= Expect no match + X\n +No match + +/^X\v/no_start_optimize,no_auto_possess +\= Expect no match + XX +No match + +/^X.+?/s,no_start_optimize,no_auto_possess +\= Expect no match + X +No match + +/^X\R+?/no_start_optimize,no_auto_possess +\= Expect no match + XX +No match + +/^X\H+?/no_start_optimize,no_auto_possess +\= Expect no match + X +No match + +/^X\h+?/no_start_optimize,no_auto_possess +\= Expect no match + X +No match + +/^X\V+?/no_start_optimize,no_auto_possess +\= Expect no match + X +No match + X\n +No match + +/^X\D+?/no_start_optimize,no_auto_possess +\= Expect no match + X +No match + X9 +No match + +/^X\S+?/no_start_optimize,no_auto_possess +\= Expect no match + X +No match + X\n +No match + +/^X\W+?/no_start_optimize,no_auto_possess +\= Expect no match + X +No match + XX +No match + +/^X.+?Z/no_start_optimize,no_auto_possess +\= Expect no match + XY\n +No match + +/(*CRLF)^X.+?Z/no_start_optimize,no_auto_possess +\= Expect no match + XY\r\=ps +Partial match: XY\x0d + +/^X\R+?Z/no_start_optimize,no_auto_possess +\= Expect no match + X\nX +No match + X\n\r\n +No match + X\n\rY +No match + X\n\nY +No match + X\n\x{0c}Y +No match + +/(*BSR_ANYCRLF)^X\R+?Z/no_start_optimize,no_auto_possess +\= Expect no match + X\nX +No match + X\n\r\n +No match + X\n\rY +No match + X\n\nY +No match + X\n\x{0c}Y +No match + +/^X\H+?Z/no_start_optimize,no_auto_possess +\= Expect no match + XY\t +No match + XYY +No match + +/^X\h+?Z/no_start_optimize,no_auto_possess +\= Expect no match + X\t\t +No match + X\tY +No match + +/^X\V+?Z/no_start_optimize,no_auto_possess +\= Expect no match + XY\n +No match + XYY +No match + +/^X\v+?Z/no_start_optimize,no_auto_possess +\= Expect no match + X\n\n +No match + X\nY +No match + +/^X\D+?Z/no_start_optimize,no_auto_possess +\= Expect no match + XY9 +No match + XYY +No match + +/^X\d+?Z/no_start_optimize,no_auto_possess +\= Expect no match + X99 +No match + X9Y +No match + +/^X\S+?Z/no_start_optimize,no_auto_possess +\= Expect no match + XY\n +No match + XYY +No match + +/^X\s+?Z/no_start_optimize,no_auto_possess +\= Expect no match + X\n\n +No match + X\nY +No match + +/^X\W+?Z/no_start_optimize,no_auto_possess +\= Expect no match + X.A +No match + X++ +No match + +/^X\w+?Z/no_start_optimize,no_auto_possess +\= Expect no match + Xa. +No match + Xaa +No match + +/^X.{1,3}Z/s,no_start_optimize,no_auto_possess +\= Expect no match + Xa.bd +No match + +/^X\h+Z/no_start_optimize,no_auto_possess +\= Expect no match + X\t\t +No match + X\tY +No match + +/^X\V+Z/no_start_optimize,no_auto_possess +\= Expect no match + XY\n +No match + XYY +No match + +/^(X(*THEN)Y|AB){0}(?1)/ + ABX + 0: AB +\= Expect no match + XAB +No match + +/^(?!A(?C1)B)C/ + ABC\=callout_error=1,no_jit +No match + +/^(?!A(?C1)B)C/no_start_optimize + ABC\=callout_error=1 +--->ABC + 1 ^^ B +Failed: error -37: callout error code + +/^(?(?!A(?C1)B)C)/ + ABC\=callout_error=1 +--->ABC + 1 ^^ B +Failed: error -37: callout error code + +# ---------------------------------------------------------------------- + +/[a b c]/BxxI +------------------------------------------------------------------ + Bra + [a-c] + Ket + End +------------------------------------------------------------------ +Capturing subpattern count = 0 +Options: extended_more +Starting code units: a b c +Subject length lower bound = 1 + +/[a b c]/BxxxI +------------------------------------------------------------------ + Bra + [a-c] + Ket + End +------------------------------------------------------------------ +Capturing subpattern count = 0 +Options: extended extended_more +Starting code units: a b c +Subject length lower bound = 1 + +/[a b c]/B,extended_more +------------------------------------------------------------------ + Bra + [a-c] + Ket + End +------------------------------------------------------------------ + +/[ a b c ]/B,extended_more +------------------------------------------------------------------ + Bra + [a-c] + Ket + End +------------------------------------------------------------------ + +/[a b](?xx: [ 12 ] (?-xx:[ 34 ]) )y z/B +------------------------------------------------------------------ + Bra + [ ab] + Bra + [12] + Bra + [ 34] + Ket + Ket + y z + Ket + End +------------------------------------------------------------------ + +# Unsetting /x also unsets /xx + +/[a b](?xx: [ 12 ] (?-x:[ 34 ]) )y z/B +------------------------------------------------------------------ + Bra + [ ab] + Bra + [12] + Bra + [ 34] + Ket + Ket + y z + Ket + End +------------------------------------------------------------------ + +/(a)(?-n:(b))(c)/nB +------------------------------------------------------------------ + Bra + Bra + a + Ket + Bra + CBra 1 + b + Ket + Ket + Bra + c + Ket + Ket + End +------------------------------------------------------------------ + +# ---------------------------------------------------------------------- +# These test the dangerous PCRE2_EXTRA_BAD_ESCAPE_IS_LITERAL option. + +/\j\x{z}\o{82}\L\uabcd\u\U\g{\g/B,\bad_escape_is_literal +** Unrecognized modifier '\' in '\bad_escape_is_literal' + +/\N{\c/IB,bad_escape_is_literal +------------------------------------------------------------------ + Bra + N{c + Ket + End +------------------------------------------------------------------ +Capturing subpattern count = 0 +Extra options: bad_escape_is_literal +First code unit = 'N' +Last code unit = 'c' +Subject length lower bound = 3 + +/[\j\x{z}\o\gA-\Nb-\g]/B,bad_escape_is_literal +------------------------------------------------------------------ + Bra + [A-Nb-gjoxz{}] + Ket + End +------------------------------------------------------------------ + +/[Q-\N]/B,bad_escape_is_literal +Failed: error 108 at offset 4: range out of order in character class + +# ---------------------------------------------------------------------- + +/a\b(c/literal + a\\b(c + 0: a\b(c + +/a\b(c/literal,caseless + a\\b(c + 0: a\b(c + a\\B(c + 0: a\B(c + +/a\b(c/literal,firstline + XYYa\\b(c + 0: a\b(c +\= Expect no match + X\na\\b(c +No match + +/a\b?c/literal,use_offset_limit + XXXXa\\b?c\=offset_limit=4 + 0: a\b?c +\= Expect no match + XXXXa\\b?c\=offset_limit=3 +No match + +/a\b(c/literal,anchored,endanchored + a\\b(c + 0: a\b(c +\= Expect no match + Xa\\b(c +No match + a\\b(cX +No match + Xa\\b(cX +No match + +//literal,extended +Failed: error 192 at offset 0: invalid option bits with PCRE2_LITERAL + +/a\b(c/literal,auto_callout,no_start_optimize + XXXXa\\b(c +--->XXXXa\b(c + +0 ^ a + +0 ^ a + +0 ^ a + +0 ^ a + +0 ^ a + +1 ^^ \ + +2 ^ ^ b + +3 ^ ^ ( + +4 ^ ^ c + +5 ^ ^ End of pattern + 0: a\b(c + +/a\b(c/literal,auto_callout + XXXXa\\b(c +--->XXXXa\b(c + +0 ^ a + +1 ^^ \ + +2 ^ ^ b + +3 ^ ^ ( + +4 ^ ^ c + +5 ^ ^ End of pattern + 0: a\b(c + +/(*CR)abc/literal + (*CR)abc + 0: (*CR)abc + +/cat|dog/I,match_word +Capturing subpattern count = 0 +Max lookbehind = 1 +Extra options: match_word +Starting code units: c d +Subject length lower bound = 3 + the cat sat + 0: cat +\= Expect no match + caterpillar +No match + snowcat +No match + syndicate +No match + +/(cat)|dog/I,match_line,literal +Capturing subpattern count = 0 +Compile options: literal +Overall options: anchored literal +Extra options: match_line +First code unit = '(' +Subject length lower bound = 9 + (cat)|dog + 0: (cat)|dog +\= Expect no match + the cat sat +No match + caterpillar +No match + snowcat +No match + syndicate +No match + +/a whole line/match_line,multiline + Rhubarb \na whole line\n custard + 0: a whole line +\= Expect no match + Not a whole line +No match + +# Perl gets this wrong, failing to capture 'b' in group 1. + +/^(b+|a){1,2}?bc/ + bbc + 0: bbc + 1: b + +# And again here, for the "babc" subject string. + +/^(b*|ba){1,2}?bc/ + babc + 0: babc + 1: ba + bbabc + 0: bbabc + 1: ba + bababc + 0: bababc + 1: ba +\= Expect no match + bababbc +No match + babababc +No match + +/[[:digit:]-a]/ +Failed: error 150 at offset 10: invalid range in character class + +/[[:digit:]-[:print:]]/ +Failed: error 150 at offset 10: invalid range in character class + +/[\d-a]/ +Failed: error 150 at offset 3: invalid range in character class + +/[\H-z]/ +Failed: error 150 at offset 3: invalid range in character class + +/[\d-[:print:]]/ +Failed: error 150 at offset 3: invalid range in character class + +# Perl gets the second of these wrong, giving no match. + +"(?<=(a))\1?b"I +Capturing subpattern count = 1 +Max back reference = 1 +Max lookbehind = 1 +Last code unit = 'b' +Subject length lower bound = 1 + ab + 0: b + 1: a + aaab + 0: ab + 1: a + +"(?=(a))\1?b"I +Capturing subpattern count = 1 +Max back reference = 1 +First code unit = 'a' +Last code unit = 'b' +Subject length lower bound = 1 + ab + 0: ab + 1: a + aaab + 0: ab + 1: a + +# JIT does not support callout_extra + +/(*NO_JIT)(a+)b/auto_callout,no_start_optimize,no_auto_possess +\= Expect no match + aac\=callout_extra +New match attempt +--->aac + +9 ^ ( ++10 ^ a+ ++12 ^ ^ ) ++13 ^ ^ b +Backtrack +--->aac ++12 ^^ ) ++13 ^^ b +Backtrack +No other matching paths +New match attempt +--->aac + +9 ^ ( ++10 ^ a+ ++12 ^^ ) ++13 ^^ b +Backtrack +No other matching paths +New match attempt +--->aac + +9 ^ ( ++10 ^ a+ +Backtrack +No other matching paths +New match attempt +--->aac + +9 ^ ( ++10 ^ a+ +No match + +/(*NO_JIT)a+(?C'XXX')b/no_start_optimize,no_auto_possess +\= Expect no match + aac\=callout_extra +New match attempt +Callout (15): 'XXX' +--->aac + ^ ^ b +Backtrack +Callout (15): 'XXX' +--->aac + ^^ b +Backtrack +No other matching paths +New match attempt +Callout (15): 'XXX' +--->aac + ^^ b +No match + +/\n/firstline + xyz\nabc + 0: \x0a + +/\nabc/firstline + xyz\nabc + 0: \x0aabc + +/\x{0a}abc/firstline,newline=crlf +\= Expect no match + xyz\r\nabc +No match + +/[abc]/firstline +\= Expect no match + \na +No match + +# These tests are matched in test 1 as they are Perl compatible. Here we are +# looking at what does and does not get auto-possessified. + +/(?(DEFINE)(?<optional_a>a?))^(?&optional_a)a$/B +------------------------------------------------------------------ + Bra + Cond + Cond false + CBra 1 + a? + Ket + Ket + ^ + Recurse + a + $ + Ket + End +------------------------------------------------------------------ + +/(?(DEFINE)(?<optional_a>a?)X)^(?&optional_a)a$/B +------------------------------------------------------------------ + Bra + Cond + Cond false + CBra 1 + a? + Ket + X + Ket + ^ + Recurse + a + $ + Ket + End +------------------------------------------------------------------ + +/^(a?)b(?1)a/B +------------------------------------------------------------------ + Bra + ^ + CBra 1 + a? + Ket + b + Recurse + a + Ket + End +------------------------------------------------------------------ + +/^(a?)+b(?1)a/B +------------------------------------------------------------------ + Bra + ^ + SCBra 1 + a? + KetRmax + b + Recurse + a + Ket + End +------------------------------------------------------------------ + +/^(a?)++b(?1)a/B +------------------------------------------------------------------ + Bra + ^ + SCBraPos 1 + a? + KetRpos + b + Recurse + a + Ket + End +------------------------------------------------------------------ + +/^(a?)+b/B +------------------------------------------------------------------ + Bra + ^ + SCBra 1 + a? + KetRmax + b + Ket + End +------------------------------------------------------------------ + +/(?=a+)a(a+)++b/B +------------------------------------------------------------------ + Bra + Assert + a++ + Ket + a + CBraPos 1 + a++ + KetRpos + b + Ket + End +------------------------------------------------------------------ + +# End of testinput2 +Error -65: PCRE2_ERROR_BADDATA (unknown error number) Error -62: bad serialized data Error -2: partial match Error -1: no match Error 0: PCRE2_ERROR_BADDATA (unknown error number) Error 100: no error -Error 188: pattern string is longer than the limit set by the application -Error 189: PCRE2_ERROR_BADDATA (unknown error number) +Error 101: \ at end of pattern +Error 191: PCRE2_EXTRA_ALLOW_SURROGATE_ESCAPES is not allowed in UTF-16 mode +Error 200: PCRE2_ERROR_BADDATA (unknown error number) diff --git a/testdata/testoutput20 b/testdata/testoutput20 index 952b0bb..d6265fd 100644 --- a/testdata/testoutput20 +++ b/testdata/testoutput20 @@ -40,25 +40,25 @@ Named capturing subpatterns: Options: dupnames Starting code units: b f Subject length lower bound = 6 - foofoo + foofoo 0: foofoo 1: foo barbar 0: barbar 1: <unset> 2: bar - + #pop mark C 0: C 1: C MK: A -\= Expect no match - D +\= Expect no match + D No match, mark = A - + #pop - AmanaplanacanalPanama + AmanaplanacanalPanama 0: AmanaplanacanalPanama 1: <unset> 2: <unset> @@ -78,7 +78,7 @@ Subject length lower bound = 3 0: metcalfe 33 1: metcalfe 2: 33 - + # Check for an error when different tables are used. /abc/push,tables=1 @@ -97,13 +97,13 @@ Serialization failed: error -30: patterns do not all use the same character tabl #pop should give an error ** Can't pop off an empty stack pqr - + /abcd/pushcopy abcd 0: abcd - + #pop - abcd + abcd 0: abcd #pop should give an error @@ -113,21 +113,21 @@ Serialization failed: error -30: patterns do not all use the same character tabl #popcopy abcd 0: abcd - + #pop - abcd + abcd 0: abcd - + /abcd/push #save testsaved1 #pop should give an error ** Can't pop off an empty stack #load testsaved1 -#popcopy +#popcopy abcd 0: abcd - + #pop abcd 0: abcd @@ -135,4 +135,27 @@ Serialization failed: error -30: patterns do not all use the same character tabl #pop should give an error ** Can't pop off an empty stack +/abcd/pushtablescopy + abcd + 0: abcd + +#popcopy + abcd + 0: abcd + +#pop + abcd + 0: abcd + +# Must only specify one of these + +//push,pushcopy +** Not allowed together: push pushcopy + +//push,pushtablescopy +** Not allowed together: push pushtablescopy + +//pushcopy,pushtablescopy +** Not allowed together: pushcopy pushtablescopy + # End of testinput20 diff --git a/testdata/testoutput21 b/testdata/testoutput21 index 6bf3f60..cba1326 100644 --- a/testdata/testoutput21 +++ b/testdata/testoutput21 @@ -76,7 +76,7 @@ ------------------------------------------------------------------ /ab\Cde/never_backslash_c -Failed: error 183 at offset 3: using \C is disabled by the application +Failed: error 183 at offset 4: using \C is disabled by the application /ab\Cde/info Capturing subpattern count = 0 diff --git a/testdata/testoutput22-16 b/testdata/testoutput22-16 index 01c9153..88f827c 100644 --- a/testdata/testoutput22-16 +++ b/testdata/testoutput22-16 @@ -17,7 +17,7 @@ Subject length lower bound = 0 # 16-bit modes, but not in 32-bit mode. /(?<=ab\Cde)X/utf -Failed: error 136 at offset 10: \C is not allowed in a lookbehind assertion in UTF-16 mode +Failed: error 136 at offset 0: \C is not allowed in a lookbehind assertion in UTF-16 mode ab!deXYZ # Autopossessification tests @@ -166,4 +166,9 @@ No match a\x{100}b 0: a\x{100}b +/^ab\C/utf,no_start_optimize +\= Expect no match - tests \C at end of subject + ab +No match + # End of testinput22 diff --git a/testdata/testoutput22-32 b/testdata/testoutput22-32 index 100333f..ac485fc 100644 --- a/testdata/testoutput22-32 +++ b/testdata/testoutput22-32 @@ -164,4 +164,9 @@ No match a\x{100}b 0: a\x{100}b +/^ab\C/utf,no_start_optimize +\= Expect no match - tests \C at end of subject + ab +No match + # End of testinput22 diff --git a/testdata/testoutput22-8 b/testdata/testoutput22-8 index 4814039..3d31fbc 100644 --- a/testdata/testoutput22-8 +++ b/testdata/testoutput22-8 @@ -17,7 +17,7 @@ Subject length lower bound = 0 # 16-bit modes, but not in 32-bit mode. /(?<=ab\Cde)X/utf -Failed: error 136 at offset 10: \C is not allowed in a lookbehind assertion in UTF-8 mode +Failed: error 136 at offset 0: \C is not allowed in a lookbehind assertion in UTF-8 mode ab!deXYZ # Autopossessification tests @@ -168,4 +168,9 @@ No match a\x{100}b No match +/^ab\C/utf,no_start_optimize +\= Expect no match - tests \C at end of subject + ab +No match + # End of testinput22 diff --git a/testdata/testoutput23 b/testdata/testoutput23 index 1da1c39..c6f0aa2 100644 --- a/testdata/testoutput23 +++ b/testdata/testoutput23 @@ -3,6 +3,6 @@ # correct error message. /a\Cb/ -Failed: error 185 at offset 2: using \C is disabled in this PCRE2 library +Failed: error 185 at offset 3: using \C is disabled in this PCRE2 library # End of testinput23 diff --git a/testdata/testoutput24 b/testdata/testoutput24 new file mode 100644 index 0000000..9c59893 --- /dev/null +++ b/testdata/testoutput24 @@ -0,0 +1,624 @@ +# This file tests the auxiliary pattern conversion features of the PCRE2 +# library, in non-UTF mode. + +#forbid_utf +#newline_default lf any anycrlf + +# -------- Tests of glob conversion -------- + +# Set the glob separator explicitly so that different OS defaults are not a +# problem. Then test various errors. + +#pattern convert=glob,convert_glob_escape=\,convert_glob_separator=/ + +/abc/posix +** The convert and posix modifiers are mutually exclusive + +# Separator must be / \ or . + +/a*b/convert_glob_separator=% +** Invalid glob separator '%' + +# Can't have separator in a class + +"[ab/cd]" +(?s)\A[ab/cd](?<!/)\z + +"[,-/]" +(?s)\A[,-/](?<!/)\z + +/[ab/ +** Pattern conversion error at offset 3: missing terminating ] for character class + +# Length check + +/abc/convert_length=11 +** Pattern conversion error at offset 3: no more memory + +/abc/convert_length=12 +(?s)\Aabc\z + +# Now some actual tests + +/a?b[]xy]*c/ +(?s)\Aa[^/]b[\]xy](*COMMIT)[^/]*?c\z + azb]1234c + 0: azb]1234c + +# Tests from the gitwildmatch list, with some additions + +/foo/ +(?s)\Afoo\z + foo + 0: foo +/= Expect no match +No match + bar +No match + +// +(?s)\A\z + \ + 0: + +/???/ +(?s)\A[^/][^/][^/]\z + foo + 0: foo +\= Expect no match + foobar +No match + +/*/ +(?s)\A[^/]*+\z + foo + 0: foo + \ + 0: + +/f*/ +(?s)\Af(*COMMIT)[^/]*+\z + foo + 0: foo + f + 0: f + +/*f/ +(?s)\A[^/]*?f\z + oof + 0: oof +\= Expect no match + foo +No match + +/*foo*/ +(?s)\A[^/]*?foo(*COMMIT)[^/]*+\z + foo + 0: foo + food + 0: food + aprilfool + 0: aprilfool + +/*ob*a*r*/ +(?s)\A[^/]*?ob(*COMMIT)[^/]*?a(*COMMIT)[^/]*?r(*COMMIT)[^/]*+\z + foobar + 0: foobar + +/*ab/ +(?s)\A[^/]*?ab\z + aaaaaaabababab + 0: aaaaaaabababab + +/foo\*/ +(?s)\Afoo\*\z + foo* + 0: foo* + +/foo\*bar/ +(?s)\Afoo\*bar\z +\= Expect no match + foobar +No match + +/f\\oo/ +(?s)\Af\\oo\z + f\\oo + 0: f\oo + +/*[al]?/ +(?s)\A[^/]*?[al][^/]\z + ball + 0: ball + +/[ten]/ +(?s)\A[ten]\z +\= Expect no match + ten +No match + +/t[a-g]n/ +(?s)\At[a-g]n\z + ten + 0: ten + +/a[]]b/ +(?s)\Aa[\]]b\z + a]b + 0: a]b + +/a[]a-]b/ +(?s)\Aa[\]a\-]b\z + +/a[]-]b/ +(?s)\Aa[\]\-]b\z + a-b + 0: a-b + a]b + 0: a]b +\= Expect no match + aab +No match + +/a[]a-z]b/ +(?s)\Aa[\]a-z]b\z + aab + 0: aab + +/]/ +(?s)\A\]\z + ] + 0: ] + +/t[!a-g]n/ +(?s)\At[^/a-g]n\z + ton + 0: ton +\= Expect no match + ten +No match + +'[[:alpha:]][[:digit:]][[:upper:]]' +(?s)\A[[:alpha:]][[:digit:]][[:upper:]]\z + a1B + 0: a1B + +'[[:digit:][:upper:][:space:]]' +(?s)\A[[:digit:][:upper:][:space:]]\z + A + 0: A + 1 + 0: 1 + \ \= + 0: +\= Expect no match + a +No match + . +No match + +'[a-c[:digit:]x-z]' +(?s)\A[a-c[:digit:]x-z]\z + 5 + 0: 5 + b + 0: b + y + 0: y +\= Expect no match + q +No match + +# End of gitwildmatch tests + +/*.j?g/ +(?s)\A[^/]*?\.j[^/]g\z + pic01.jpg + 0: pic01.jpg + .jpg + 0: .jpg + pic02.jxg + 0: pic02.jxg +\= Expect no match + pic03.j/g +No match + +/A[+-0]B/ +(?s)\AA[+-0](?<!/)B\z + A+B + 0: A+B + A.B + 0: A.B + A0B + 0: A0B +\= Expect no match + A/B +No match + +/*x?z/ +(?s)\A[^/]*?x[^/]z\z + abc.xyz + 0: abc.xyz +\= Expect no match + .xyz + 0: .xyz + +/?x?z/ +(?s)\A[^/]x[^/]z\z + axyz + 0: axyz +\= Expect no match + .xyz + 0: .xyz + +"[,-0]x?z" +(?s)\A[,-0](?<!/)x[^/]z\z + ,xyz + 0: ,xyz +\= Expect no match + /xyz +No match + .xyz + 0: .xyz + +".x*" +(?s)\A\.x(*COMMIT)[^/]*+\z + .xabc + 0: .xabc + +/a[--0]z/ +(?s)\Aa[\--0](?<!/)z\z + a-z + 0: a-z + a.z + 0: a.z + a0z + 0: a0z +\= Expect no match + a/z +No match + a1z +No match + +/<[a-c-d]>/ +(?s)\A<[a-c\-d]>\z + <a> + 0: <a> + <b> + 0: <b> + <c> + 0: <c> + <d> + 0: <d> + <-> + 0: <-> + +/a[[:digit:].]z/ +(?s)\Aa[[:digit:].]z\z + a1z + 0: a1z + a.z + 0: a.z +\= Expect no match + a:z +No match + +/a[[:digit].]z/ +(?s)\Aa[\[:digit]\.\]z\z + a[.]z + 0: a[.]z + a:.]z + 0: a:.]z + ad.]z + 0: ad.]z + +/<[[:a[:digit:]b]>/ +(?s)\A<[\[:a[:digit:]b]>\z + <[> + 0: <[> + <:> + 0: <:> + <a> + 0: <a> + <9> + 0: <9> + <b> + 0: <b> +\= Expect no match + <d> +No match + +/a*b/convert_glob_separator=\ +(?s)\Aa(*COMMIT)[^\\]*?b\z + +/a*b/convert_glob_separator=. +(?s)\Aa(*COMMIT)[^\.]*?b\z + +/a*b/convert_glob_separator=/ +(?s)\Aa(*COMMIT)[^/]*?b\z + +# Non control character checking + +/A\B\\C\D/ +(?s)\AAB\\CD\z + +/\\{}\?\*+\[\]()|.^$/ +(?s)\A\\\{\}\?\*\+\[\]\(\)\|\.\^\$\z + +/*a*\/*b*/ +(?s)\A[^/]*?a(*COMMIT)[^/]*?/(*COMMIT)[^/]*?b(*COMMIT)[^/]*+\z + +/?a?\/?b?/ +(?s)\A[^/]a[^/]/[^/]b[^/]\z + +/[a\\b\c][]][-][\]\-]/ +(?s)\A[a\\bc][\]][\-][\]\-]\z + +/[^a\\b\c][!]][!-][^\]\-]/ +(?s)\A[^/a\\bc][^/\]][^/\-][^/\]\-]\z + +/[[:alnum:][:alpha:][:blank:][:cntrl:][:digit:][:graph:][:lower:][:print:][:punct:][:space:][:upper:][:word:][:xdigit:]]/ +(?s)\A[[:alnum:][:alpha:][:blank:][:cntrl:][:digit:][:graph:][:lower:][:print:][:punct:][:space:][:upper:][:word:][:xdigit:]](?<!/)\z + +"[/-/]" +(?s)\A[/-/](?<!/)\z + +/[-----]/ +(?s)\A[\--\-\-\-]\z + +/[------]/ +(?s)\A[\--\-\--\-]\z + +/[!------]/ +(?s)\A[^/\--\-\--\-]\z + +/[[:alpha:]-a]/ +(?s)\A[[:alpha:]\-a]\z + +/[[:alpha:]][[:punct:]][[:ascii:]]/ +(?s)\A[[:alpha:]][[:punct:]](?<!/)[[:ascii:]](?<!/)\z + +/[a-[:alpha:]]/ +** Pattern conversion error at offset 4: invalid syntax + +/[[:alpha:/ +** Pattern conversion error at offset 9: missing terminating ] for character class + +/[[:alpha:]/ +** Pattern conversion error at offset 10: missing terminating ] for character class + +/[[:alphaa:]]/ +(?s)\A[\[:alphaa:]\]\z + +/[[:xdigi:]]/ +(?s)\A[\[:xdigi:]\]\z + +/[[:xdigit::]]/ +(?s)\A[\[:xdigit::]\]\z + +/****/ +(?s) + +/**\/abc/ +(?s)(?:\A|/)abc\z + abc + 0: abc + x/abc + 0: /abc + xabc +No match + +/abc\/**/ +(?s)\Aabc/ + +/abc\/**\/abc/ +(?s)\Aabc/(*COMMIT)(?:.*?/)??abc\z + +/**\/*a*b*g*n*t/ +(?s)(?:\A|/)(?>[^/]*?a)(?>[^/]*?b)(?>[^/]*?g)(?>[^/]*?n)(?>[^/]*?t\z) + abcd/abcdefg/abcdefghijk/abcdefghijklmnop.txt + 0: /abcdefghijklmnop.txt + +/**\/*a*\/**/ +(?s)(?:\A|/)(?>[^/]*?a)(?>[^/]*?/) + xx/xx/xx/xax/xx/xb + 0: /xax/ + +/**\/*a*/ +(?s)(?:\A|/)(?>[^/]*?a)(?>[^/]*+\z) + xx/xx/xx/xax + 0: /xax + xx/xx/xx/xax/xx +No match + +/**\/*a*\/**\/*b*/ +(?s)(?:\A|/)(?>[^/]*?a)(?>[^/]*?/)(*COMMIT)(?:.*?/)??(?>[^/]*?b)(?>[^/]*+\z) + xx/xx/xx/xax/xx/xb + 0: /xax/xx/xb + xx/xx/xx/xax/xx/x +No match + +"**a"convert=glob +(?s)a\z + a + 0: a + c/b/a + 0: a + c/b/aaa + 0: a + +"a**/b"convert=glob +(?s)\Aa(*COMMIT).*?/b\z + a/b + 0: a/b + ab +No match + +"a/**b"convert=glob +(?s)\Aa/(*COMMIT).*?b\z + a/b + 0: a/b + ab +No match + +#pattern convert=glob:glob_no_starstar + +/***/ +(?s)\A[^/]*+\z + +/**a**/ +(?s)\A[^/]*?a(*COMMIT)[^/]*+\z + +#pattern convert=unset +#pattern convert=glob:glob_no_wild_separator + +/*/ +(?s) + +/*a*/ +(?s)a + +/**a**/ +(?s)a + +/a*b/ +(?s)\Aa(*COMMIT).*?b\z + +/*a*b*/ +(?s)a(*COMMIT).*?b + +/??a??/ +(?s)\A..a..\z + +#pattern convert=unset +#pattern convert=glob,convert_glob_escape=0 + +/a\b\cd/ +(?s)\Aa\\b\\cd\z + +/**\/a/ +(?s)\\/a\z + +/a`*b/convert_glob_escape=` +(?s)\Aa\*b\z + +/a`*b/convert_glob_escape=0 +(?s)\Aa`(*COMMIT)[^/]*?b\z + +/a`*b/convert_glob_escape=x +** Invalid glob escape 'x' + +# -------- Tests of extended POSIX conversion -------- + +#pattern convert=unset:posix_extended + +/<[[:a[:digit:]b]>/ +(*NUL)<[[:a[:digit:]b]> + <[> + 0: <[> + <:> + 0: <:> + <a> + 0: <a> + <9> + 0: <9> + <b> + 0: <b> +\= Expect no match + <d> +No match + +/a+\1b\\c|d[ab\c]/ +(*NUL)a+1b\\c|d[ab\\c] + +/<[]bc]>/ +(*NUL)<[]bc]> + <]> + 0: <]> + <b> + 0: <b> + <c> + 0: <c> + +/<[^]bc]>/ +(*NUL)<[^]bc]> + <.> + 0: <.> +\= Expect no match + <]> +No match + <b> +No match + +/(a)\1b/ +(*NUL)(a)1b + a1b + 0: a1b + 1: a +\= Expect no match + aab +No match + +/(ab)c)d]/ +(*NUL)(ab)c\)d\] + Xabc)d]Y + 0: abc)d] + 1: ab + +/a***b/ +(*NUL)a*b + +# -------- Tests of basic POSIX conversion -------- + +#pattern convert=unset:posix_basic + +/a*b+c\+[def](ab)\(cd\)/ +(*NUL)a*b\+c\+[def]\(ab\)(cd) + +/\(a\)\1b/ +(*NUL)(a)\1b + aab + 0: aab + 1: a +\= Expect no match + a1b +No match + +/how.to how\.to/ +(*NUL)how.to how\.to + how\nto how.to + 0: how\x0ato how.to +\= Expect no match + how\x{0}to how.to +No match + +/^how to \^how to/ +(*NUL)^how to \^how to + +/^*abc/ +(*NUL)^\*abc + +/*abc/ +(*NUL)\*abc + X*abcY + 0: *abc + +/**abc/ +(*NUL)\**abc + XabcY + 0: abc + X*abcY + 0: *abc + X**abcY + 0: **abc + +/*ab\(*cd\)/ +(*NUL)\*ab(\*cd) + +/^b\(c^d\)\(^e^f\)/ +(*NUL)^b(c\^d)(^e\^f) + +/a***b/ +(*NUL)a*b + +# End of testinput24 diff --git a/testdata/testoutput25 b/testdata/testoutput25 new file mode 100644 index 0000000..4990293 --- /dev/null +++ b/testdata/testoutput25 @@ -0,0 +1,19 @@ +# This file tests the auxiliary pattern conversion features of the PCRE2 +# library, in UTF mode. + +#newline_default lf any anycrlf + +# -------- Tests of glob conversion -------- + +# Set the glob separator explicitly so that different OS defaults are not a +# problem. Then test various errors. + +#pattern convert=glob,convert_glob_escape=\,convert_glob_separator=/ + +# The fact that this one works in 13 bytes in the 8-bit library shows that the +# output is in UTF-8, though pcre2test shows the character as an escape. + +/'>' c4 a3 '<'/hex,utf,convert_length=13 +(?s)\A>\x{123}<\z + +# End of testinput25 diff --git a/testdata/testoutput4 b/testdata/testoutput4 index 701d411..6056e6d 100644 --- a/testdata/testoutput4 +++ b/testdata/testoutput4 @@ -958,7 +958,7 @@ No match 0: M 0: \x{442} -/[^ABCDEFGHIJKLMNOPQRSTUVWXYZÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖØÙÚÛÜÝÞĀĂĄĆĈĊČĎĐĒĔĖĘĚĜĞĠĢĤĦĨĪĬĮİIJĴĶĹĻĽĿŁŃŅŇŊŌŎŐŒŔŖŘŚŜŞŠŢŤŦŨŪŬŮŰŲŴŶŸŹŻŽƁƂƄƆƇƉƊƋƎƏƐƑƓƔƖƗƘƜƝƟƠƢƤƦƧƩƬƮƯƱƲƳƵƷƸƼDŽLJNJǍǏǑǓǕǗǙǛǞǠǢǤǦǨǪǬǮDZǴǶǷǸǺǼǾȀȂȄȆȈȊȌȎȐȒȔȖȘȚȜȞȠȢȤȦȨȪȬȮȰȲȺȻȽȾɁΆΈΉΊΌΎΏΑΒΓΔΕΖΗΘΙΚΛΜΝΞΟΠΡΣΤΥΦΧΨΩΪΫϒϓϔϘϚϜϞϠϢϤϦϨϪϬϮϴϷϹϺϽϾϿЀЁЂЃЄЅІЇЈЉЊЋЌЍЎЏАБВГДЕЖЗИЙКЛМНОПРСТУФХЦЧШЩЪЫЬЭЮЯѠѢѤѦѨѪѬѮѰѲѴѶѸѺѼѾҀҊҌҎҐҒҔҖҘҚҜҞҠҢҤҦҨҪҬҮҰҲҴҶҸҺҼҾӀӁӃӅӇӉӋӍӐӒӔӖӘӚӜӞӠӢӤӦӨӪӬӮӰӲӴӶӸԀԂԄԆԈԊԌԎԱԲԳԴԵԶԷԸԹԺԻԼԽԾԿՀՁՂՃՄՅՆՇՈՉՊՋՌՍՎՏՐՑՒՓՔՕՖႠႡႢႣႤႥႦႧႨႩႪႫႬႭႮႯႰႱႲႳႴႵႶႷႸႹႺႻႼႽႾႿჀჁჂჃჄჅḀḂḄḆḈḊḌḎḐḒḔḖḘḚḜḞḠḢḤḦḨḪḬḮḰḲḴḶḸḺḼḾṀṂṄṆṈṊṌṎṐṒṔṖṘṚṜṞṠṢṤṦṨṪṬṮṰṲṴṶṸṺṼṾẀẂẄẆẈẊẌẎẐẒẔẠẢẤẦẨẪẬẮẰẲẴẶẸẺẼẾỀỂỄỆỈỊỌỎỐỒỔỖỘỚỜỞỠỢỤỦỨỪỬỮỰỲỴỶỸἈἉἊἋἌἍἎἏἘἙἚἛἜἝἨἩἪἫἬἭἮἯἸἹἺἻἼἽἾἿὈὉὊὋὌὍὙὛὝὟὨὩὪὫὬὭὮὯᾸᾹᾺΆῈΈῊΉῘῙῚΊῨῩῪΎῬῸΌῺΏabcdefghijklmnopqrstuvwxyzªµºßàáâãäåæçèéêëìíîïðñòóôõöøùúûüýþÿāăąćĉċčďđēĕėęěĝğġģĥħĩīĭįıijĵķĸĺļľŀłńņňʼnŋōŏőœŕŗřśŝşšţťŧũūŭůűųŵŷźżžſƀƃƅƈƌƍƒƕƙƚƛƞơƣƥƨƪƫƭưƴƶƹƺƽƾƿdžljnjǎǐǒǔǖǘǚǜǝǟǡǣǥǧǩǫǭǯǰdzǵǹǻǽǿȁȃȅȇȉȋȍȏȑȓȕȗșțȝȟȡȣȥȧȩȫȭȯȱȳȴȵȶȷȸȹȼȿɀɐɑɒɓɔɕɖɗɘəɚɛɜɝɞɟɠɡɢɣɤɥɦɧɨɩɪɫɬɭɮɯɰɱɲɳɴɵɶɷɸɹɺɻɼɽɾɿʀʁʂʃʄʅʆʇʈʉʊʋʌʍʎʏʐʑʒʓʔʕʖʗʘʙʚʛʜʝʞʟʠʡʢʣʤʥʦʧʨʩʪʫʬʭʮʯΐάέήίΰαβγδεζηθικλμνξοπρςστυφχψωϊϋόύώϐϑϕϖϗϙϛϝϟϡϣϥϧϩϫϭϯϰϱϲϳϵϸϻϼабвгдежзийклмнопрстуфхцчшщъыьэюяѐёђѓєѕіїјљњћќѝўџѡѣѥѧѩѫѭѯѱѳѵѷѹѻѽѿҁҋҍҏґғҕҗҙқҝҟҡңҥҧҩҫҭүұҳҵҷҹһҽҿӂӄӆӈӊӌӎӑӓӕӗәӛӝӟӡӣӥӧөӫӭӯӱӳӵӷӹԁԃԅԇԉԋԍԏաբգդեզէըթժիլխծկհձղճմյնշոչպջռսվտրցւփքօֆևᴀᴁᴂᴃᴄᴅᴆᴇᴈᴉᴊᴋᴌᴍᴎᴏᴐᴑᴒᴓᴔᴕᴖᴗᴘᴙᴚᴛᴜᴝᴞᴟᴠᴡᴢᴣᴤᴥᴦᴧᴨᴩᴪᴫᵢᵣᵤᵥᵦᵧᵨᵩᵪᵫᵬᵭᵮᵯᵰᵱᵲᵳᵴᵵᵶᵷᵹᵺᵻᵼᵽᵾᵿᶀᶁᶂᶃᶄᶅᶆᶇᶈᶉᶊᶋᶌᶍᶎᶏᶐᶑᶒᶓᶔᶕᶖᶗᶘᶙᶚḁḃḅḇḉḋḍḏḑḓḕḗḙḛḝḟḡḣḥḧḩḫḭḯḱḳḵḷḹḻḽḿṁṃṅṇṉṋṍṏṑṓṕṗṙṛṝṟṡṣṥṧṩṫṭṯṱṳṵṷṹṻṽṿẁẃẅẇẉẋẍẏẑẓẕẖẗẘẙẚẛạảấầẩẫậắằẳẵặẹẻẽếềểễệỉịọỏốồổỗộớờởỡợụủứừửữựỳỵỷỹἀἁἂἃἄἅἆἇἐἑἒἓἔἕἠἡἢἣἤἥἦἧἰἱἲἳἴἵἶἷὀὁὂὃὄὅὐὑὒὓὔὕὖὗὠὡὢὣὤὥὦὧὰάὲέὴήὶίὸόὺύὼώᾀᾁᾂᾃᾄᾅᾆᾇᾐᾑᾒᾓᾔᾕᾖᾗᾠᾡᾢᾣᾤᾥᾦᾧᾰᾱᾲᾳᾴᾶᾷιῂῃῄῆῇῐῑῒΐῖῗῠῡῢΰῤῥῦῧῲῳῴῶῷⲁⲃⲅⲇⲉⲋⲍⲏⲑⲓⲕⲗⲙⲛⲝⲟⲡⲣⲥⲧⲩⲫⲭⲯⲱⲳⲵⲷⲹⲻⲽⲿⳁⳃⳅⳇⳉⳋⳍⳏⳑⳓⳕⳗⳙⳛⳝⳟⳡⳣⳤⴀⴁⴂⴃⴄⴅⴆⴇⴈⴉⴊⴋⴌⴍⴎⴏⴐⴑⴒⴓⴔⴕⴖⴗⴘⴙⴚⴛⴜⴝⴞⴟⴠⴡⴢⴣⴤⴥfffiflffifflſtstﬓﬔﬕﬖﬗ\d-_^]/utf +/[^ABCDEFGHIJKLMNOPQRSTUVWXYZÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖØÙÚÛÜÝÞĀĂĄĆĈĊČĎĐĒĔĖĘĚĜĞĠĢĤĦĨĪĬĮİIJĴĶĹĻĽĿŁŃŅŇŊŌŎŐŒŔŖŘŚŜŞŠŢŤŦŨŪŬŮŰŲŴŶŸŹŻŽƁƂƄƆƇƉƊƋƎƏƐƑƓƔƖƗƘƜƝƟƠƢƤƦƧƩƬƮƯƱƲƳƵƷƸƼDŽLJNJǍǏǑǓǕǗǙǛǞǠǢǤǦǨǪǬǮDZǴǶǷǸǺǼǾȀȂȄȆȈȊȌȎȐȒȔȖȘȚȜȞȠȢȤȦȨȪȬȮȰȲȺȻȽȾɁΆΈΉΊΌΎΏΑΒΓΔΕΖΗΘΙΚΛΜΝΞΟΠΡΣΤΥΦΧΨΩΪΫϒϓϔϘϚϜϞϠϢϤϦϨϪϬϮϴϷϹϺϽϾϿЀЁЂЃЄЅІЇЈЉЊЋЌЍЎЏАБВГДЕЖЗИЙКЛМНОПРСТУФХЦЧШЩЪЫЬЭЮЯѠѢѤѦѨѪѬѮѰѲѴѶѸѺѼѾҀҊҌҎҐҒҔҖҘҚҜҞҠҢҤҦҨҪҬҮҰҲҴҶҸҺҼҾӀӁӃӅӇӉӋӍӐӒӔӖӘӚӜӞӠӢӤӦӨӪӬӮӰӲӴӶӸԀԂԄԆԈԊԌԎԱԲԳԴԵԶԷԸԹԺԻԼԽԾԿՀՁՂՃՄՅՆՇՈՉՊՋՌՍՎՏՐՑՒՓՔՕՖႠႡႢႣႤႥႦႧႨႩႪႫႬႭႮႯႰႱႲႳႴႵႶႷႸႹႺႻႼႽႾႿჀჁჂჃჄჅḀḂḄḆḈḊḌḎḐḒḔḖḘḚḜḞḠḢḤḦḨḪḬḮḰḲḴḶḸḺḼḾṀṂṄṆṈṊṌṎṐṒṔṖṘṚṜṞṠṢṤṦṨṪṬṮṰṲṴṶṸṺṼṾẀẂẄẆẈẊẌẎẐẒẔẠẢẤẦẨẪẬẮẰẲẴẶẸẺẼẾỀỂỄỆỈỊỌỎỐỒỔỖỘỚỜỞỠỢỤỦỨỪỬỮỰỲỴỶỸἈἉἊἋἌἍἎἏἘἙἚἛἜἝἨἩἪἫἬἭἮἯἸἹἺἻἼἽἾἿὈὉὊὋὌὍὙὛὝὟὨὩὪὫὬὭὮὯᾸᾹᾺΆῈΈῊΉῘῙῚΊῨῩῪΎῬῸΌῺΏabcdefghijklmnopqrstuvwxyzªµºßàáâãäåæçèéêëìíîïðñòóôõöøùúûüýþÿāăąćĉċčďđēĕėęěĝğġģĥħĩīĭįıijĵķĸĺļľŀłńņňʼnŋōŏőœŕŗřśŝşšţťŧũūŭůűųŵŷźżžſƀƃƅƈƌƍƒƕƙƚƛƞơƣƥƨƪƫƭưƴƶƹƺƽƾƿdžljnjǎǐǒǔǖǘǚǜǝǟǡǣǥǧǩǫǭǯǰdzǵǹǻǽǿȁȃȅȇȉȋȍȏȑȓȕȗșțȝȟȡȣȥȧȩȫȭȯȱȳȴȵȶȷȸȹȼȿɀɐɑɒɓɔɕɖɗɘəɚɛɜɝɞɟɠɡɢɣɤɥɦɧɨɩɪɫɬɭɮɯɰɱɲɳɴɵɶɷɸɹɺɻɼɽɾɿʀʁʂʃʄʅʆʇʈʉʊʋʌʍʎʏʐʑʒʓʔʕʖʗʘʙʚʛʜʝʞʟʠʡʢʣʤʥʦʧʨʩʪʫʬʭʮʯΐάέήίΰαβγδεζηθικλμνξοπρςστυφχψωϊϋόύώϐϑϕϖϗϙϛϝϟϡϣϥϧϩϫϭϯϰϱϲϳϵϸϻϼабвгдежзийклмнопрстуфхцчшщъыьэюяѐёђѓєѕіїјљњћќѝўџѡѣѥѧѩѫѭѯѱѳѵѷѹѻѽѿҁҋҍҏґғҕҗҙқҝҟҡңҥҧҩҫҭүұҳҵҷҹһҽҿӂӄӆӈӊӌӎӑӓӕӗәӛӝӟӡӣӥӧөӫӭӯӱӳӵӷӹԁԃԅԇԉԋԍԏաբգդեզէըթժիլխծկհձղճմյնշոչպջռսվտրցւփքօֆևᴀᴁᴂᴃᴄᴅᴆᴇᴈᴉᴊᴋᴌᴍᴎᴏᴐᴑᴒᴓᴔᴕᴖᴗᴘᴙᴚᴛᴜᴝᴞᴟᴠᴡᴢᴣᴤᴥᴦᴧᴨᴩᴪᴫᵢᵣᵤᵥᵦᵧᵨᵩᵪᵫᵬᵭᵮᵯᵰᵱᵲᵳᵴᵵᵶᵷᵹᵺᵻᵼᵽᵾᵿᶀᶁᶂᶃᶄᶅᶆᶇᶈᶉᶊᶋᶌᶍᶎᶏᶐᶑᶒᶓᶔᶕᶖᶗᶘᶙᶚḁḃḅḇḉḋḍḏḑḓḕḗḙḛḝḟḡḣḥḧḩḫḭḯḱḳḵḷḹḻḽḿṁṃṅṇṉṋṍṏṑṓṕṗṙṛṝṟṡṣṥṧṩṫṭṯṱṳṵṷṹṻṽṿẁẃẅẇẉẋẍẏẑẓẕẖẗẘẙẚẛạảấầẩẫậắằẳẵặẹẻẽếềểễệỉịọỏốồổỗộớờởỡợụủứừửữựỳỵỷỹἀἁἂἃἄἅἆἇἐἑἒἓἔἕἠἡἢἣἤἥἦἧἰἱἲἳἴἵἶἷὀὁὂὃὄὅὐὑὒὓὔὕὖὗὠὡὢὣὤὥὦὧὰάὲέὴήὶίὸόὺύὼώᾀᾁᾂᾃᾄᾅᾆᾇᾐᾑᾒᾓᾔᾕᾖᾗᾠᾡᾢᾣᾤᾥᾦᾧᾰᾱᾲᾳᾴᾶᾷιῂῃῄῆῇῐῑῒΐῖῗῠῡῢΰῤῥῦῧῲῳῴῶῷⲁⲃⲅⲇⲉⲋⲍⲏⲑⲓⲕⲗⲙⲛⲝⲟⲡⲣⲥⲧⲩⲫⲭⲯⲱⲳⲵⲷⲹⲻⲽⲿⳁⳃⳅⳇⳉⳋⳍⳏⳑⳓⳕⳗⳙⳛⳝⳟⳡⳣⳤⴀⴁⴂⴃⴄⴅⴆⴇⴈⴉⴊⴋⴌⴍⴎⴏⴐⴑⴒⴓⴔⴕⴖⴗⴘⴙⴚⴛⴜⴝⴞⴟⴠⴡⴢⴣⴤⴥfffiflffifflſtstﬓﬔﬕﬖﬗ\d_^]/utf /^[^d]*?$/ abc @@ -2716,6 +2716,13 @@ No match \x{1f88}\x{1f80} 0: \x{1f88}\x{1f80} +# Check a reference with more than one other case + +/^(\x{00b5})\1{2}$/i,utf + \x{00b5}\x{039c}\x{03bc} + 0: \x{b5}\x{39c}\x{3bc} + 1: \x{b5} + # Characters with more than one other case; test in classes /[z\x{00b5}]+/i,utf @@ -3703,4 +3710,23 @@ No match \x{20ac} No match +/(?=.*b)\pL/ + 11bb + 0: b + +/(?(?=.*b)(?=.*b)\pL|.*c)/ + 11bb + 0: b + +/^\x{123}+?$/utf,no_auto_possess + \x{123}\x{123}\x{123} + 0: \x{123}\x{123}\x{123} + +/^\x{123}+?$/i,utf,no_auto_possess + \x{123}\x{122}\x{123} + 0: \x{123}\x{122}\x{123} +\= Expect no match + \x{123}\x{124}\x{123} +No match + # End of testinput4 diff --git a/testdata/testoutput5 b/testdata/testoutput5 index f19ad8c..4b3171c 100644 --- a/testdata/testoutput5 +++ b/testdata/testoutput5 @@ -3,18 +3,22 @@ # results in 8-bit, 16-bit, and 32-bit modes are excluded (see tests 10 and # 12). +#newline_default lf any anycrlf + # PCRE2 and Perl disagree about the characteristics of certain Unicode -# characters. For example, 061C is considered by Perl to be Arabic, though -# is it not listed as such in the Unicode Scripts.txt file, and 2066-2069 are -# graphic and printable according to Perl, though they are actually "isolate" -# control characters. That is why the following tests are here rather than in -# test 4. +# characters. For example, 061C was considered by Perl to be Arabic, though +# it was not listed as such in the Unicode Scripts.txt file for Unicode 8. +# However, it *is* in that file for Unicode 10, but when I came to re-check, +# Perl had changed in the meantime, with 5.026 not recognizing it as Arabic. + +# 2066-2069 are graphic and printable according to Perl, though they are +# actually "isolate" control characters. That is why the following tests are +# here rather than in test 4. /^[\p{Arabic}]/utf -\= Expect no match \x{061c} -No match - + 0: \x{61c} + /^[[:graph:]]+$/utf,ucp \= Expect no match \x{61c} @@ -52,14 +56,14 @@ No match 0: \x{09}\x{1d}\x{85}\x{61c}\x{2028}\x{2029}\x{2065}\x{2066}\x{2067} \x{2068}\x{2069} 0: \x{2068}\x{2069} - + # Perl does not consider U+180e to be a space character. It is true that it # does not appear in the Unicode PropList.txt file as such, but in many other # sources it is listed as a space, and has been treated as such in PCRE for -# a long time. +# a long time. /^>[[:blank:]]*/utf,ucp - >\x{20}\x{a0}\x{1680}\x{180e}\x{2000}\x{202f}\x{9}\x{b}\x{2028} + >\x{20}\x{a0}\x{1680}\x{180e}\x{2000}\x{202f}\x{9}\x{b}\x{2028} 0: > \x{a0}\x{1680}\x{180e}\x{2000}\x{202f}\x{09} /^A\s+Z/utf,ucp @@ -71,7 +75,7 @@ No match 0: A\x{2005}Z A\x{85}\x{2005}Z 0: A\x{85}\x{2005}Z - + /^[[:graph:]]+$/utf,ucp \= Expect no match \x{180e} @@ -150,7 +154,7 @@ Last code unit = '.' Subject length lower bound = 4 \x{0041}\x{2262}\x{0391}\x{002e} 0: A\x{2262}\x{391}. - + /.{3,5}X/IB,utf ------------------------------------------------------------------ Bra @@ -192,6 +196,7 @@ Subject length lower bound = 3 Capturing subpattern count = 0 Compile options: utf Overall options: anchored utf +Starting code units: a b Subject length lower bound = 1 bar 0: b @@ -200,51 +205,29 @@ Subject length lower bound = 1 No match \x{ff} No match - \x{100} + \x{100} No match -/^[^ab]/IB,utf ------------------------------------------------------------------- - Bra - ^ - [\x00-`c-\xff] (neg) - Ket - End ------------------------------------------------------------------- -Capturing subpattern count = 0 -Compile options: utf -Overall options: anchored utf -Subject length lower bound = 1 - c - 0: c - \x{ff} - 0: \x{ff} - \x{100} - 0: \x{100} -\= Expect no match - aaa -No match - /\x{100}*(\d+|"(?1)")/utf 1234 0: 1234 1: 1234 - "1234" + "1234" 0: "1234" 1: "1234" \x{100}1234 0: \x{100}1234 1: 1234 - "\x{100}1234" + "\x{100}1234" 0: \x{100}1234 1: 1234 - \x{100}\x{100}12ab + \x{100}\x{100}12ab 0: \x{100}\x{100}12 1: 12 - \x{100}\x{100}"12" + \x{100}\x{100}"12" 0: \x{100}\x{100}"12" 1: "12" -\= Expect no match +\= Expect no match \x{100}\x{100}abcd No match @@ -298,7 +281,7 @@ Failed: error 108 at offset 15: range out of order in character class \= Expect no match \x{105} No match - \x{ff} + \x{ff} No match /[\xFF]/IB @@ -329,23 +312,23 @@ Subject length lower bound = 1 0: \x{d6} \x{d6} 0: \x{d6} - + /[Ä-Ü]/utf Ö <-- Same with Study 0: \x{d6} \x{d6} 0: \x{d6} - + /[\x{c4}-\x{dc}]/utf Ö # Matches without Study 0: \x{d6} - \x{d6} + \x{d6} 0: \x{d6} /[\x{c4}-\x{dc}]/utf Ö <-- Same with Study 0: \x{d6} - \x{d6} + \x{d6} 0: \x{d6} /[^\x{100}]abc(xyz(?1))/IB,utf @@ -470,14 +453,17 @@ Subject length lower bound = 0 /\W/utf A.B 0: . - A\x{100}B + A\x{100}B 0: \x{100} - + /\w/utf - \x{100}X + \x{100}X 0: X -/^\ሴ/IB,utf +# Use no_start_optimize because the first code unit is different in 8-bit from +# the wider modes. + +/^\ሴ/IB,utf,no_start_optimize ------------------------------------------------------------------ Bra ^ @@ -486,16 +472,16 @@ Subject length lower bound = 0 End ------------------------------------------------------------------ Capturing subpattern count = 0 -Compile options: utf -Overall options: anchored utf -Subject length lower bound = 1 +Compile options: no_start_optimize utf +Overall options: anchored no_start_optimize utf +Subject length lower bound = 0 /()()()()()()()()()() ()()()()()()()()()() ()()()()()()()()()() ()()()()()()()()()() A (x) (?41) B/x,utf - AxxB + AxxB Matched, but too many substrings 0: AxxB 1: @@ -566,14 +552,14 @@ Matched, but too many substrings 0: a\x{0b}b a\x0cb 0: a\x{0c}b - a\x{85}b + a\x{85}b 0: a\x{85}b - a\x{2028}b + a\x{2028}b 0: a\x{2028}b - a\x{2029}b + a\x{2029}b 0: a\x{2029}b \= Expect no match - a\n\rb + a\n\rb No match /^a\R*b/bsr=unicode,utf @@ -589,11 +575,11 @@ No match 0: a\x{0b}b a\x0c\x{2028}\x{2029}b 0: a\x{0c}\x{2028}\x{2029}b - a\x{85}b + a\x{85}b 0: a\x{85}b - a\n\rb + a\n\rb 0: a\x{0a}\x{0d}b - a\n\r\x{85}\x0cb + a\n\r\x{85}\x0cb 0: a\x{0a}\x{0d}\x{85}\x{0c}b /^a\R+b/bsr=unicode,utf @@ -607,14 +593,14 @@ No match 0: a\x{0b}b a\x0c\x{2028}\x{2029}b 0: a\x{0c}\x{2028}\x{2029}b - a\x{85}b + a\x{85}b 0: a\x{85}b - a\n\rb + a\n\rb 0: a\x{0a}\x{0d}b - a\n\r\x{85}\x0cb + a\n\r\x{85}\x0cb 0: a\x{0a}\x{0d}\x{85}\x{0c}b \= Expect no match - ab + ab No match /^a\R{1,3}b/bsr=unicode,utf @@ -624,13 +610,13 @@ No match 0: a\x{0a}\x{0d}b a\n\r\x{85}b 0: a\x{0a}\x{0d}\x{85}b - a\r\n\r\nb + a\r\n\r\nb 0: a\x{0d}\x{0a}\x{0d}\x{0a}b - a\r\n\r\n\r\nb + a\r\n\r\n\r\nb 0: a\x{0d}\x{0a}\x{0d}\x{0a}\x{0d}\x{0a}b a\n\r\n\rb 0: a\x{0a}\x{0d}\x{0a}\x{0d}b - a\n\n\r\nb + a\n\n\r\nb 0: a\x{0a}\x{0a}\x{0d}\x{0a}b \= Expect no match a\n\n\n\rb @@ -644,9 +630,9 @@ No match X\x09X\x0b 0: X\x{09}X\x{0b} \= Expect no match - \x{a0} X\x0a + \x{a0} X\x0a No match - + /\H*\h+\V?\v{3,4}/utf \x09\x20\x{a0}X\x0a\x0b\x0c\x0d\x0a 0: \x{09} \x{a0}X\x{0a}\x{0b}\x{0c}\x{0d} @@ -654,19 +640,19 @@ No match 0: \x{09} \x{a0}\x{0a}\x{0b}\x{0c}\x{0d} \x09\x20\x{a0}\x0a\x0b\x0c 0: \x{09} \x{a0}\x{0a}\x{0b}\x{0c} -\= Expect no match +\= Expect no match \x09\x20\x{a0}\x0a\x0b No match - + /\H\h\V\v/utf \x{3001}\x{3000}\x{2030}\x{2028} 0: \x{3001}\x{3000}\x{2030}\x{2028} X\x{180e}X\x{85} 0: X\x{180e}X\x{85} \= Expect no match - \x{2009} X\x0a + \x{2009} X\x0a No match - + /\H*\h+\V?\v{3,4}/utf \x{1680}\x{180e}\x{2007}X\x{2028}\x{2029}\x0c\x0d\x0a 0: \x{1680}\x{180e}\x{2007}X\x{2028}\x{2029}\x{0c}\x{0d} @@ -674,10 +660,10 @@ No match 0: \x{09}\x{205f}\x{a0}\x{0a}\x{2029}\x{0c}\x{2028} \x09\x20\x{202f}\x0a\x0b\x0c 0: \x{09} \x{202f}\x{0a}\x{0b}\x{0c} -\= Expect no match +\= Expect no match \x09\x{200a}\x{a0}\x{2028}\x0b No match - + /[\h]/B,utf ------------------------------------------------------------------ Bra @@ -723,9 +709,9 @@ No match ------------------------------------------------------------------ /.*$/newline=any,utf - \x{1ec5} + \x{1ec5} 0: \x{1ec5} - + /a\Rb/I,bsr=anycrlf,utf Capturing subpattern count = 0 Options: utf @@ -742,7 +728,7 @@ Subject length lower bound = 3 \= Expect no match a\x{85}b No match - a\x0bb + a\x0bb No match /a\Rb/I,bsr=unicode,utf @@ -760,9 +746,9 @@ Subject length lower bound = 3 0: a\x{0d}\x{0a}b a\x{85}b 0: a\x{85}b - a\x0bb + a\x0bb 0: a\x{0b}b - + /a\R?b/I,bsr=anycrlf,utf Capturing subpattern count = 0 Options: utf @@ -779,7 +765,7 @@ Subject length lower bound = 2 \= Expect no match a\x{85}b No match - a\x0bb + a\x0bb No match /a\R?b/I,bsr=unicode,utf @@ -797,16 +783,16 @@ Subject length lower bound = 2 0: a\x{0d}\x{0a}b a\x{85}b 0: a\x{85}b - a\x0bb + a\x0bb 0: a\x{0b}b - + /.*a.*=.b.*/utf,newline=any QQQ\x{2029}ABCaXYZ=!bPQR 0: ABCaXYZ=!bPQR \= Expect no match a\x{2029}b No match - \x61\xe2\x80\xa9\x62 + \x61\xe2\x80\xa9\x62 No match /[[:a\x{100}b:]]/utf @@ -815,19 +801,19 @@ Failed: error 130 at offset 3: unknown POSIX class name /a[^]b/utf,alt_bsux,allow_empty_class,match_unset_backref a\x{1234}b 0: a\x{1234}b - a\nb + a\nb 0: a\x{0a}b \= Expect no match - ab + ab No match - + /a[^]+b/utf,alt_bsux,allow_empty_class,match_unset_backref aXb 0: aXb - a\nX\nX\x{1234}b + a\nX\nX\x{1234}b 0: a\x{0a}X\x{0a}X\x{1234}b \= Expect no match - ab + ab No match /(\x{de})\1/ @@ -850,7 +836,7 @@ Partial match: Xaa Partial match: Xaaa Xaaaa\=ps Partial match: Xaaaa - + /Xa{2,4}?b/utf X\=ps Partial match: X @@ -862,7 +848,7 @@ Partial match: Xaa Partial match: Xaaa Xaaaa\=ps Partial match: Xaaaa - + /Xa{2,4}+b/utf X\=ps Partial match: X @@ -874,7 +860,7 @@ Partial match: Xaa Partial match: Xaaa Xaaaa\=ps Partial match: Xaaaa - + /X\x{123}{2,4}b/utf X\=ps Partial match: X @@ -886,7 +872,7 @@ Partial match: X\x{123}\x{123} Partial match: X\x{123}\x{123}\x{123} X\x{123}\x{123}\x{123}\x{123}\=ps Partial match: X\x{123}\x{123}\x{123}\x{123} - + /X\x{123}{2,4}?b/utf X\=ps Partial match: X @@ -898,7 +884,7 @@ Partial match: X\x{123}\x{123} Partial match: X\x{123}\x{123}\x{123} X\x{123}\x{123}\x{123}\x{123}\=ps Partial match: X\x{123}\x{123}\x{123}\x{123} - + /X\x{123}{2,4}+b/utf X\=ps Partial match: X @@ -910,7 +896,7 @@ Partial match: X\x{123}\x{123} Partial match: X\x{123}\x{123}\x{123} X\x{123}\x{123}\x{123}\x{123}\=ps Partial match: X\x{123}\x{123}\x{123}\x{123} - + /X\x{123}{2,4}b/utf \= Expect no match Xx\=ps @@ -923,7 +909,7 @@ No match No match X\x{123}\x{123}\x{123}\x{123}x\=ps No match - + /X\x{123}{2,4}?b/utf \= Expect no match Xx\=ps @@ -936,7 +922,7 @@ No match No match X\x{123}\x{123}\x{123}\x{123}x\=ps No match - + /X\x{123}{2,4}+b/utf \= Expect no match Xx\=ps @@ -949,7 +935,7 @@ No match No match X\x{123}\x{123}\x{123}\x{123}x\=ps No match - + /X\d{2,4}b/utf X\=ps Partial match: X @@ -961,7 +947,7 @@ Partial match: X33 Partial match: X333 X3333\=ps Partial match: X3333 - + /X\d{2,4}?b/utf X\=ps Partial match: X @@ -973,7 +959,7 @@ Partial match: X33 Partial match: X333 X3333\=ps Partial match: X3333 - + /X\d{2,4}+b/utf X\=ps Partial match: X @@ -997,7 +983,7 @@ Partial match: Xaa Partial match: Xaaa Xaaaa\=ps Partial match: Xaaaa - + /X\D{2,4}?b/utf X\=ps Partial match: X @@ -1009,7 +995,7 @@ Partial match: Xaa Partial match: Xaaa Xaaaa\=ps Partial match: Xaaaa - + /X\D{2,4}+b/utf X\=ps Partial match: X @@ -1033,7 +1019,7 @@ Partial match: X\x{123}\x{123} Partial match: X\x{123}\x{123}\x{123} X\x{123}\x{123}\x{123}\x{123}\=ps Partial match: X\x{123}\x{123}\x{123}\x{123} - + /X\D{2,4}?b/utf X\=ps Partial match: X @@ -1045,7 +1031,7 @@ Partial match: X\x{123}\x{123} Partial match: X\x{123}\x{123}\x{123} X\x{123}\x{123}\x{123}\x{123}\=ps Partial match: X\x{123}\x{123}\x{123}\x{123} - + /X\D{2,4}+b/utf X\=ps Partial match: X @@ -1069,7 +1055,7 @@ Partial match: Xaa Partial match: Xaaa Xaaaa\=ps Partial match: Xaaaa - + /X[abc]{2,4}?b/utf X\=ps Partial match: X @@ -1081,7 +1067,7 @@ Partial match: Xaa Partial match: Xaaa Xaaaa\=ps Partial match: Xaaaa - + /X[abc]{2,4}+b/utf X\=ps Partial match: X @@ -1105,7 +1091,7 @@ Partial match: X\x{123}\x{123} Partial match: X\x{123}\x{123}\x{123} X\x{123}\x{123}\x{123}\x{123}\=ps Partial match: X\x{123}\x{123}\x{123}\x{123} - + /X[abc\x{123}]{2,4}?b/utf X\=ps Partial match: X @@ -1117,7 +1103,7 @@ Partial match: X\x{123}\x{123} Partial match: X\x{123}\x{123}\x{123} X\x{123}\x{123}\x{123}\x{123}\=ps Partial match: X\x{123}\x{123}\x{123}\x{123} - + /X[abc\x{123}]{2,4}+b/utf X\=ps Partial match: X @@ -1141,7 +1127,7 @@ Partial match: Xzz Partial match: Xzzz Xzzzz\=ps Partial match: Xzzzz - + /X[^a]{2,4}?b/utf X\=ps Partial match: X @@ -1153,7 +1139,7 @@ Partial match: Xzz Partial match: Xzzz Xzzzz\=ps Partial match: Xzzzz - + /X[^a]{2,4}+b/utf X\=ps Partial match: X @@ -1177,7 +1163,7 @@ Partial match: X\x{123}\x{123} Partial match: X\x{123}\x{123}\x{123} X\x{123}\x{123}\x{123}\x{123}\=ps Partial match: X\x{123}\x{123}\x{123}\x{123} - + /X[^a]{2,4}?b/utf X\=ps Partial match: X @@ -1189,7 +1175,7 @@ Partial match: X\x{123}\x{123} Partial match: X\x{123}\x{123}\x{123} X\x{123}\x{123}\x{123}\x{123}\=ps Partial match: X\x{123}\x{123}\x{123}\x{123} - + /X[^a]{2,4}+b/utf X\=ps Partial match: X @@ -1213,7 +1199,7 @@ Partial match: YXYY Partial match: YXYYY YXYYYY\=ps Partial match: YXYYYY - + /(Y)X\1{2,4}?b/utf YX\=ps Partial match: YX @@ -1225,7 +1211,7 @@ Partial match: YXYY Partial match: YXYYY YXYYYY\=ps Partial match: YXYYYY - + /(Y)X\1{2,4}+b/utf YX\=ps Partial match: YX @@ -1249,7 +1235,7 @@ Partial match: \x{123}X\x{123}\x{123} Partial match: \x{123}X\x{123}\x{123}\x{123} \x{123}X\x{123}\x{123}\x{123}\x{123}\=ps Partial match: \x{123}X\x{123}\x{123}\x{123}\x{123} - + /(\x{123})X\1{2,4}?b/utf \x{123}X\=ps Partial match: \x{123}X @@ -1261,7 +1247,7 @@ Partial match: \x{123}X\x{123}\x{123} Partial match: \x{123}X\x{123}\x{123}\x{123} \x{123}X\x{123}\x{123}\x{123}\x{123}\=ps Partial match: \x{123}X\x{123}\x{123}\x{123}\x{123} - + /(\x{123})X\1{2,4}+b/utf \x{123}X\=ps Partial match: \x{123}X @@ -1326,7 +1312,7 @@ Partial match: X 0: \x{a0}xxx\x{85} /\S \S/utf,tables=2 - \x{a2} \x{84} + \x{a2} \x{84} 0: \x{a2} \x{84} 'A#хц'Bx,newline=any,utf @@ -1345,7 +1331,7 @@ Partial match: X Ket End ------------------------------------------------------------------ - + /a+#хaa z#XX?/Bx,newline=any,utf ------------------------------------------------------------------ @@ -1398,11 +1384,11 @@ Failed: error 168 at offset 3: \c must be followed by a printable ASCII characte 0: \x{0d} 1: 2: \x{0d} - \r\r\n\n\r + \r\r\n\n\r 0: \x{0d}\x{0d}\x{0a}\x{0a}\x{0d} 1: \x{0d}\x{0d}\x{0a}\x{0a} 2: \x{0d} - \r\r\n\n\r\n + \r\r\n\n\r\n 0: \x{0d}\x{0d}\x{0a}\x{0a}\x{0d} 1: \x{0d}\x{0d}\x{0a}\x{0a} 2: \x{0d} @@ -1412,11 +1398,11 @@ Failed: error 168 at offset 3: \c must be followed by a printable ASCII characte 0: \x{0d} 1: <unset> 2: \x{0d} - \r\r\n\n\r + \r\r\n\n\r 0: \x{0d}\x{0d}\x{0a}\x{0a}\x{0d} 1: \x{0a} 2: \x{0d} - \r\r\n\n\r\n + \r\r\n\n\r\n 0: \x{0d}\x{0d}\x{0a}\x{0a}\x{0d} 1: \x{0a} 2: \x{0d} @@ -1456,7 +1442,7 @@ Partial match: for /f.*/s,utf for\=ph Partial match: for - + /\x{d7ff}\x{e000}/utf /\x{d800}/utf @@ -1631,7 +1617,7 @@ Partial match: abaBabA 0: \x{0d} \r\=ph Partial match: \x{0d} - + /.{2,3}/utf,newline=crlf \r\=ps Partial match: \x{0d} @@ -1746,7 +1732,7 @@ No match ------------------------------------------------------------------ /\ud800/utf,alt_bsux,allow_empty_class,match_unset_backref -Failed: error 173 at offset 5: disallowed Unicode code point (>= 0xd800 && <= 0xdfff) +Failed: error 173 at offset 6: disallowed Unicode code point (>= 0xd800 && <= 0xdfff) /^a+[a\x{200}]/B,utf ------------------------------------------------------------------ @@ -1853,10 +1839,10 @@ Subject length lower bound = 1 0: 1234 12-34 0: 12-34 - 12+\x{661}-34 + 12+\x{661}-34 0: 12+\x{661}-34 \= Expect no match - abcd + abcd No match /(?:[\PPa*]*){8,}/ @@ -1925,7 +1911,7 @@ No match 0: \x{2028}\x{2028} \x{2028}\x{2028}\x{2028} 0: \x{2028}\x{2028}\x{2028} - + /\p{Zl}/B,utf ------------------------------------------------------------------ Bra @@ -1976,9 +1962,9 @@ No match \x{dfff}\=no_utf_check 0: \x{dfff} \= Expect no match - \x{09f} + \x{09f} No match - + /^\p{Mn}/utf \x{1a1b} 0: \x{1a1b} @@ -2005,7 +1991,7 @@ No match No match \x{2c2} No match - + /^\p{Zs}/utf \ \ 0: @@ -2015,29 +2001,29 @@ No match 0: \x{1680} \x{2000} 0: \x{2000} - \x{2001} + \x{2001} 0: \x{2001} \= Expect no match \x{2028} No match - \x{200d} + \x{200d} No match - + # These are here because Perl has problems with the negative versions of the # properties and has changed how it behaves for caseless matching. - + /\p{^Lu}/i,utf 1234 0: 1 \= Expect no match - ABC + ABC No match /\P{Lu}/i,utf 1234 0: 1 \= Expect no match - ABC + ABC No match /\p{Ll}/i,utf @@ -2046,18 +2032,18 @@ No match Az 0: z \= Expect no match - ABC + ABC No match /\p{Lu}/i,utf A 0: A - a\x{10a0}B + a\x{10a0}B 0: \x{10a0} -\= Expect no match +\= Expect no match a No match - \x{1d00} + \x{1d00} No match /\p{Lu}/i,utf @@ -2066,24 +2052,24 @@ No match aZ 0: Z \= Expect no match - abc + abc No match /[\x{c0}\x{391}]/i,utf \x{c0} 0: \x{c0} - \x{e0} + \x{e0} 0: \x{e0} # The next two are special cases where the lengths of the different cases of # the same character differ. The first went wrong with heap frame storage; the -# second was broken in all cases. +# second was broken in all cases. /^\x{023a}+?(\x{0130}+)/i,utf \x{023a}\x{2c65}\x{0130} 0: \x{23a}\x{2c65}\x{130} 1: \x{130} - + /^\x{023a}+([^X])/i,utf \x{023a}\x{2c65}X 0: \x{23a}\x{2c65} @@ -2114,34 +2100,34 @@ No match /^\x{c0}$/i,utf \x{c0} 0: \x{c0} - \x{e0} + \x{e0} 0: \x{e0} /^\x{e0}$/i,utf \x{c0} 0: \x{c0} - \x{e0} + \x{e0} 0: \x{e0} # The next two should be Perl-compatible, but it fails to match \x{e0}. PCRE # will match it only with UCP support, because without that it has no notion -# of case for anything other than the ASCII letters. +# of case for anything other than the ASCII letters. /((?i)[\x{c0}])/utf \x{c0} 0: \x{c0} 1: \x{c0} - \x{e0} + \x{e0} 0: \x{e0} 1: \x{e0} /(?i:[\x{c0}])/utf \x{c0} 0: \x{c0} - \x{e0} + \x{e0} 0: \x{e0} -# These are PCRE's extra properties to help with Unicodizing \d etc. +# These are PCRE's extra properties to help with Unicodizing \d etc. /^\p{Xan}/utf ABCD @@ -2152,17 +2138,17 @@ No match 0: \x{6ca} \x{a6c} 0: \x{a6c} - \x{10a7} + \x{10a7} 0: \x{10a7} \= Expect no match - _ABC + _ABC No match /^\p{Xan}+/utf ABCD1234\x{6ca}\x{a6c}\x{10a7}_ 0: ABCD1234\x{6ca}\x{a6c}\x{10a7} \= Expect no match - _ABC + _ABC No match /^\p{Xan}+?/utf @@ -2172,15 +2158,15 @@ No match /^\p{Xan}*/utf ABCD1234\x{6ca}\x{a6c}\x{10a7}_ 0: ABCD1234\x{6ca}\x{a6c}\x{10a7} - + /^\p{Xan}{2,9}/utf ABCD1234\x{6ca}\x{a6c}\x{10a7}_ 0: ABCD1234\x{6ca} - + /^\p{Xan}{2,9}?/utf \x{6ca}\x{a6c}\x{10a7}_ 0: \x{6ca}\x{a6c} - + /^[\p{Xan}]/utf ABCD1234_ 0: A @@ -2190,26 +2176,26 @@ No match 0: \x{6ca} \x{a6c} 0: \x{a6c} - \x{10a7} + \x{10a7} 0: \x{10a7} \= Expect no match - _ABC + _ABC No match - + /^[\p{Xan}]+/utf ABCD1234\x{6ca}\x{a6c}\x{10a7}_ 0: ABCD1234\x{6ca}\x{a6c}\x{10a7} \= Expect no match - _ABC + _ABC No match /^>\p{Xsp}/utf >\x{1680}\x{2028}\x{0b} 0: >\x{1680} - >\x{a0} + >\x{a0} 0: >\x{a0} \= Expect no match - \x{0b} + \x{0b} No match /^>\p{Xsp}+/utf @@ -2223,19 +2209,19 @@ No match /^>\p{Xsp}*/utf > \x{09}\x{0a}\x{0c}\x{0d}\x{a0}\x{1680}\x{2028}\x{0b} 0: > \x{09}\x{0a}\x{0c}\x{0d}\x{a0}\x{1680}\x{2028}\x{0b} - + /^>\p{Xsp}{2,9}/utf > \x{09}\x{0a}\x{0c}\x{0d}\x{a0}\x{1680}\x{2028}\x{0b} 0: > \x{09}\x{0a}\x{0c}\x{0d}\x{a0}\x{1680}\x{2028}\x{0b} - + /^>\p{Xsp}{2,9}?/utf > \x{09}\x{0a}\x{0c}\x{0d}\x{a0}\x{1680}\x{2028}\x{0b} 0: > \x{09} - + /^>[\p{Xsp}]/utf >\x{2028}\x{0b} 0: >\x{2028} - + /^>[\p{Xsp}]+/utf > \x{09}\x{0a}\x{0c}\x{0d}\x{a0}\x{1680}\x{2028}\x{0b} 0: > \x{09}\x{0a}\x{0c}\x{0d}\x{a0}\x{1680}\x{2028}\x{0b} @@ -2243,10 +2229,10 @@ No match /^>\p{Xps}/utf >\x{1680}\x{2028}\x{0b} 0: >\x{1680} - >\x{a0} + >\x{a0} 0: >\x{a0} \= Expect no match - \x{0b} + \x{0b} No match /^>\p{Xps}+/utf @@ -2260,19 +2246,19 @@ No match /^>\p{Xps}*/utf > \x{09}\x{0a}\x{0c}\x{0d}\x{a0}\x{1680}\x{2028}\x{0b} 0: > \x{09}\x{0a}\x{0c}\x{0d}\x{a0}\x{1680}\x{2028}\x{0b} - + /^>\p{Xps}{2,9}/utf > \x{09}\x{0a}\x{0c}\x{0d}\x{a0}\x{1680}\x{2028}\x{0b} 0: > \x{09}\x{0a}\x{0c}\x{0d}\x{a0}\x{1680}\x{2028}\x{0b} - + /^>\p{Xps}{2,9}?/utf > \x{09}\x{0a}\x{0c}\x{0d}\x{a0}\x{1680}\x{2028}\x{0b} 0: > \x{09} - + /^>[\p{Xps}]/utf >\x{2028}\x{0b} 0: >\x{2028} - + /^>[\p{Xps}]+/utf > \x{09}\x{0a}\x{0c}\x{0d}\x{a0}\x{1680}\x{2028}\x{0b} 0: > \x{09}\x{0a}\x{0c}\x{0d}\x{a0}\x{1680}\x{2028}\x{0b} @@ -2288,10 +2274,10 @@ No match 0: \x{a6c} \x{10a7} 0: \x{10a7} - _ABC + _ABC 0: _ \= Expect no match - [] + [] No match /^\p{Xwd}+/utf @@ -2305,15 +2291,15 @@ No match /^\p{Xwd}*/utf ABCD1234\x{6ca}\x{a6c}\x{10a7}_ 0: ABCD1234\x{6ca}\x{a6c}\x{10a7}_ - + /^\p{Xwd}{2,9}/utf A_B12\x{6ca}\x{a6c}\x{10a7} 0: A_B12\x{6ca}\x{a6c}\x{10a7} - + /^\p{Xwd}{2,9}?/utf \x{6ca}\x{a6c}\x{10a7}_ 0: \x{6ca}\x{a6c} - + /^[\p{Xwd}]/utf ABCD1234_ 0: A @@ -2323,25 +2309,25 @@ No match 0: \x{6ca} \x{a6c} 0: \x{a6c} - \x{10a7} + \x{10a7} 0: \x{10a7} - _ABC + _ABC 0: _ \= Expect no match - [] + [] No match - + /^[\p{Xwd}]+/utf ABCD1234\x{6ca}\x{a6c}\x{10a7}_ 0: ABCD1234\x{6ca}\x{a6c}\x{10a7}_ -# A check not in UTF-8 mode +# A check not in UTF-8 mode /^[\p{Xwd}]+/ ABCD1234_ 0: ABCD1234_ - -# Some negative checks + +# Some negative checks /^[\P{Xwd}]+/utf !.+\x{019}\x{35a}AB @@ -2575,46 +2561,46 @@ No match End ------------------------------------------------------------------ -# Unicode properties for \b abd \B +# Unicode properties for \b abd \B /\b...\B/utf,ucp abc_ 0: abc - \x{37e}abc\x{376} + \x{37e}abc\x{376} 0: abc - \x{37e}\x{376}\x{371}\x{393}\x{394} + \x{37e}\x{376}\x{371}\x{393}\x{394} 0: \x{376}\x{371}\x{393} - !\x{c0}++\x{c1}\x{c2} + !\x{c0}++\x{c1}\x{c2} 0: ++\x{c1} - !\x{c0}+++++ + !\x{c0}+++++ 0: \x{c0}++ -# Without PCRE_UCP, non-ASCII always fail, even if < 256 +# Without PCRE_UCP, non-ASCII always fail, even if < 256 /\b...\B/utf abc_ 0: abc -\= Expect no match - \x{37e}abc\x{376} +\= Expect no match + \x{37e}abc\x{376} No match - \x{37e}\x{376}\x{371}\x{393}\x{394} + \x{37e}\x{376}\x{371}\x{393}\x{394} No match - !\x{c0}++\x{c1}\x{c2} + !\x{c0}++\x{c1}\x{c2} No match - !\x{c0}+++++ + !\x{c0}+++++ No match -# With PCRE_UCP, non-UTF8 chars that are < 256 still check properties +# With PCRE_UCP, non-UTF8 chars that are < 256 still check properties /\b...\B/ucp abc_ 0: abc - !\x{c0}++\x{c1}\x{c2} + !\x{c0}++\x{c1}\x{c2} 0: ++\xc1 - !\x{c0}+++++ + !\x{c0}+++++ 0: \xc0++ -# Some of these are silly, but they check various combinations +# Some of these are silly, but they check various combinations /[[:^alpha:][:^cntrl:]]+/B,utf,ucp ------------------------------------------------------------------ @@ -2625,7 +2611,7 @@ No match ------------------------------------------------------------------ 123 0: 123 - abc + abc 0: abc /[[:^cntrl:][:^alpha:]]+/B,utf,ucp @@ -2637,7 +2623,7 @@ No match ------------------------------------------------------------------ 123 0: 123 - abc + abc 0: abc /[[:alpha:]]+/B,utf,ucp @@ -2659,7 +2645,7 @@ No match ------------------------------------------------------------------ 123 0: 123 - abc + abc 0: abc /[^\d]+/B,utf,ucp @@ -2673,7 +2659,7 @@ No match 0: abc abc\x{123} 0: abc\x{123} - \x{660}abc + \x{660}abc 0: abc /\p{Lu}+9\p{Lu}+B\p{Lu}+b/B @@ -2785,7 +2771,7 @@ No match End ------------------------------------------------------------------ -# These behaved oddly in Perl, so they are kept in this test +# These behaved oddly in Perl, so they are kept in this test /(\x{23a}\x{23a}\x{23a})?\1/i,utf \= Expect no match @@ -2831,12 +2817,12 @@ No match \x{2c65}\x{2c65}\x{23a}\x{23a} 0: \x{2c65}\x{2c65}\x{23a}\x{23a} 1: \x{2c65}\x{2c65} - + /(ⱥⱥ)\1/i,utf - ⱥⱥȺȺ + ⱥⱥȺȺ 0: \x{2c65}\x{2c65}\x{23a}\x{23a} 1: \x{2c65}\x{2c65} - + /(\x{23a}\x{23a}\x{23a})\1Y/i,utf X\x{23a}\x{23a}\x{23a}\x{2c65}\x{2c65}\x{2c65}YZ 0: \x{23a}\x{23a}\x{23a}\x{2c65}\x{2c65}\x{2c65}Y @@ -2847,7 +2833,7 @@ No match 0: \x{2c65}\x{2c65}\x{23a}\x{23a}Y 1: \x{2c65}\x{2c65} -# These scripts weren't yet in Perl when I added Unicode 6.0.0 to PCRE +# These scripts weren't yet in Perl when I added Unicode 6.0.0 to PCRE /^[\p{Batak}]/utf \x{1bc0} @@ -2857,7 +2843,7 @@ No match \= Expect no match \x{1bf4} No match - + /^[\p{Brahmi}]/utf \x{11000} 0: \x{11000} @@ -2866,7 +2852,7 @@ No match \= Expect no match \x{1104e} No match - + /^[\p{Mandaic}]/utf \x{840} 0: \x{840} @@ -2875,7 +2861,7 @@ No match \= Expect no match \x{85c} No match - \x{85d} + \x{85d} No match /(\X*)(.)/s,utf @@ -2889,7 +2875,7 @@ No match 0: Ste\x{301}re\x{301}o 1: te\x{301}r 2: \x{301}o - + /^\X/utf ́réo 0: \x{301} @@ -2938,8 +2924,8 @@ Partial match: ab 0: aa aba\=ps 0: aba - -# These Unicode 6.1.0 scripts are not known to Perl. + +# These Unicode 6.1.0 scripts are not known to Perl. /\p{Chakma}\d/utf,ucp \x{11100}\x{1113c} @@ -2962,7 +2948,7 @@ Partial match: A\x{300}\x{301} 0: A\x{301} A\x{301}\=ph Partial match: A\x{301} - + /^\X{2,3}/utf A\=ps Partial match: A @@ -2990,7 +2976,7 @@ Partial match: AA 0: A\x{300}\x{301}A\x{300}\x{301} A\x{300}\x{301}A\x{300}\x{301}\=ph Partial match: A\x{300}\x{301}A\x{300}\x{301} - + /^\X+/utf AA\=ps 0: AA @@ -3163,8 +3149,8 @@ No match \= Expect no match iskt No match - -# This property is a PCRE special + +# This property is a PCRE special /^\p{Xuc}/utf $abc @@ -3176,7 +3162,7 @@ No match \x{1234}abc 0: \x{1234} \= Expect no match - abc + abc No match /^\p{Xuc}+/utf @@ -3260,8 +3246,8 @@ No match No match \x{1234}abc No match - -# Some auto-possessification tests + +# Some auto-possessification tests /\pN+\z/B ------------------------------------------------------------------ @@ -3763,7 +3749,7 @@ No match End ------------------------------------------------------------------ -# End auto-possessification tests +# End auto-possessification tests /\w+/B,utf,ucp,auto_callout ------------------------------------------------------------------ @@ -3777,7 +3763,7 @@ No match abcd --->abcd +0 ^ \w+ - +3 ^ ^ + +3 ^ ^ End of pattern 0: abcd /[\p{N}]?+/B,no_auto_possess @@ -3910,7 +3896,7 @@ No match Ket End ------------------------------------------------------------------ - + /[R-T]+/Bi,utf,ucp ------------------------------------------------------------------ Bra @@ -3944,7 +3930,7 @@ Subject length lower bound = 1 \x{100}\x{200}\x{300} 0: \x{100}\x{200}\x{300} ^^^^^^^^^^^^^^ - + # Test UTF characters in a substitution /ábc/utf,replace=XሴZ @@ -3997,7 +3983,7 @@ Failed: error 122 at offset 1227: unmatched closing parenthesis /$(&.+[\p{Me}].\s\xdcC*?(?(<y>))(?<!^)$C((;*?(R))+(?(R)){0,6}?|){12\x8a\X*?\x8a\x0b\xd1^9\3*+(\xc1,\k'P'\xb4)\xcc(z\z(?JJ)(?'X'8};(\x0b\xd1^9\?'3*+(\xc1.]k+\x0b'Pm'\xb4\xcc4'\xd1'(?'X'))?-%--\x95$9*\4'|\xd1(''%\x95*$9)#(?'R')3\x07?('P\xed')\\x16:;()\x1e\x10*:(?<y>)\xd1+!~:(?)''(d'E:yD!\s(?'R'\x1e;\x10:U))|')g!\xb0*){29+))#(?'P'})*?/ "(*UTF)(*UCP)(.UTF).+X(\V+;\^(\D|)!999}(?(?C{7(?C')\H*\S*/^\x5\xa\\xd3\x85n?(;\D*(?m).[^mH+((*UCP)(*U:F)})(?!^)(?'" -Failed: error 124 at offset 113: letter or underscore expected after (?< or (?' +Failed: error 162 at offset 113: subpattern name expected /[\pS#moq]/ = @@ -4020,31 +4006,6 @@ MK: a\x{12345}b\x{09}(d)c ab12cde 7: <not digit; letter><not digit; letter><digit; not a letter><digit; not a letter><not digit; letter><not digit; letter><not digit; letter> -/[\W\p{Any}]/B ------------------------------------------------------------------- - Bra - [\x00-/:-@[-^`{-\xff\p{Any}] - Ket - End ------------------------------------------------------------------- - abc - 0: a - 123 - 0: 1 - -/[\W\pL]/B ------------------------------------------------------------------- - Bra - [\x00-/:-@[-^`{-\xff\p{L}] - Ket - End ------------------------------------------------------------------- - abc - 0: a -\= Expect no match - 123 -No match - /(*UCP)(*UTF)[[:>:]]X/B ------------------------------------------------------------------ Bra @@ -4124,7 +4085,7 @@ No match 0: \x{de} \x{200} 0: \x{200} -\= Expect no match +\= Expect no match \x{300} No match \x{37e} @@ -4161,4 +4122,583 @@ No match /(*UTF)C\x09((?<!'(?x)!*H? #\xcc\x9a[^$]/ Failed: error 114 at offset 39: missing closing parenthesis -# End of testinput5 +/[\D]/utf + \x{1d7cf} + 0: \x{1d7cf} + +/[\D\P{Nd}]/utf + \x{1d7cf} + 0: \x{1d7cf} + +/[^\D]/utf + a9b + 0: 9 +\= Expect no match + \x{1d7cf} +No match + +/[^\D\P{Nd}]/utf + a9b + 0: 9 + \x{1d7cf} + 0: \x{1d7cf} +\= Expect no match + \x{10000} +No match + +# Hex uses pattern length, not zero-terminated. This tests for overrunning +# the given length of a pattern. + +/'(*UTF)'/hex + +/'#('/hex,extended,utf + +/a(?<=A\XB)/utf +Failed: error 125 at offset 1: lookbehind assertion is not fixed length + +/ab(?<=A\RB)/utf +Failed: error 125 at offset 2: lookbehind assertion is not fixed length + +/../utf,auto_callout + \n\x{123}\x{123}\x{123}\x{123} +--->\x{0a}\x{123}\x{123}\x{123}\x{123} + +0 ^ . + +0 ^ . + +1 ^ ^ . + +2 ^ ^ End of pattern + 0: \x{123}\x{123} + +# This tests processing wide characters in extended mode. + +/XȀ/x,utf + +# These three test a bug fix that was not clearing up after a locale setting +# when the test or a subsequent one matched a wide character. + +//locale=C + +/[\P{Yi}]/utf +\x{2f000} + 0: \x{2f000} + +/[\P{Yi}]/utf,locale=C +\x{2f000} + 0: \x{2f000} + +/^(?<!(?=))/B,utf +------------------------------------------------------------------ + Bra + ^ + AssertB not + Assert + \x{10385c} + Ket + Ket + Ket + End +------------------------------------------------------------------ + +# Horizontal and vertical space lists ignore caseless + +/[\HH]/Bi,utf +------------------------------------------------------------------ + Bra + [\x00-\x08\x0a-\x1f!-\x9f\xa1-\xff\x{100}-\x{167f}\x{1681}-\x{180d}\x{180f}-\x{1fff}\x{200b}-\x{202e}\x{2030}-\x{205e}\x{2060}-\x{2fff}\x{3001}-\x{10ffff}] + Ket + End +------------------------------------------------------------------ + +/[^\HH]/Bi,utf +------------------------------------------------------------------ + Bra + [^\x00-\x08\x0a-\x1f!-\x9f\xa1-\xff\x{100}-\x{167f}\x{1681}-\x{180d}\x{180f}-\x{1fff}\x{200b}-\x{202e}\x{2030}-\x{205e}\x{2060}-\x{2fff}\x{3001}-\x{10ffff}] + Ket + End +------------------------------------------------------------------ + +//g,utf + \=zero_terminate + 0: + +/^(?1)\p{Nd}{3}(a)/ + a123a + 0: a123a + 1: a + +/\p{Nd}{0,3}[\pL](*:abc)(?C1)xxx/callout_info +Callout 1 x + +# --------------------------------------------------------------------------- + +# A bunch of tests that hit lines of code that others do not (at least when +# these were created). + +/^[^a]{3,}?x/i,utf,no_start_optimize,no_auto_possess +\= Expect no match + bbb +No match + cc +No match + +/^[ac]{3,}?x/i,utf,no_start_optimize,no_auto_possess +\= Expect no match + aaa\x{100} +No match + +/^X\X/no_start_optimize,no_auto_possess +\= Expect no match + X +No match + +/^X\p{L&}+?/no_start_optimize,no_auto_possess +\= Expect no match + X +No match + +/^X\p{L}+?/no_start_optimize,no_auto_possess +\= Expect no match + X +No match + +/^X\p{Lu}+?/no_start_optimize,no_auto_possess +\= Expect no match + X +No match + +/^X\p{Arabic}+?/no_start_optimize,no_auto_possess +\= Expect no match + X +No match + +/^X\p{Xan}+?/ucp,no_start_optimize,no_auto_possess +\= Expect no match + X +No match + +/^X\s+?/ucp,no_start_optimize,no_auto_possess +\= Expect no match + X +No match + XX +No match + +/^X\S+?/ucp,no_start_optimize,no_auto_possess + XX + 0: XX +\= Expect no match + X +No match + +/^X\w+?/ucp,no_start_optimize,no_auto_possess +\= Expect no match + X +No match + +/^X[^\x{b5}]+?/i,utf,no_start_optimize,no_auto_possess +\= Expect no match + X +No match + +/^X[\x{b5}]+?/i,utf,no_start_optimize,no_auto_possess +\= Expect no match + X +No match + +/^X\p{Xuc}+?/utf,no_start_optimize,no_auto_possess +\= Expect no match + X +No match + +/^X.+?Z/s,utf,no_start_optimize,no_auto_possess +\= Expect no match + X +No match + +/^X\R+?/utf,no_start_optimize,no_auto_possess +\= Expect no match + X +No match + +/^X\H+?/utf,no_start_optimize,no_auto_possess +\= Expect no match + X +No match + +/^X\V+?/utf,no_start_optimize,no_auto_possess +\= Expect no match + X +No match + +/^X\s+?/utf,no_start_optimize,no_auto_possess +\= Expect no match + X +No match + XX +No match + +/^X\S+?/utf,no_start_optimize,no_auto_possess +\= Expect no match + X +No match + +/^X\p{Any}{1,3}?Z/s,no_start_optimize,no_auto_possess + XYYYZ + 0: XYYYZ +\= Expect no match + XY +No match + XYY +No match + XYYY +No match + XYYYYZ +No match + +/^X\p{L&}{1,3}?Z/s,no_start_optimize,no_auto_possess +\= Expect no match + XY +No match + XY! +No match + +/^X\p{L}{1,3}?Z/s,no_start_optimize,no_auto_possess +\= Expect no match + XY +No match + XY! +No match + +/^X\p{Lu}{1,3}?Z/s,no_start_optimize,no_auto_possess +\= Expect no match + XY +No match + XY! +No match + +/^X\P{Han}{1,3}?Z/s,utf,no_start_optimize,no_auto_possess +\= Expect no match + XY +No match + XY! +No match + XY\x{2f00}! +No match + +/^X\p{Xan}{1,3}?Z/s,no_start_optimize,no_auto_possess +\= Expect no match + XY +No match + XY! +No match + +/^X\p{Xsp}{1,3}?Z/s,no_start_optimize,no_auto_possess +\= Expect no match + X\n +No match + X\n! +No match + X\n\n! +No match + +/^X\P{Xsp}{1,3}?Z/s,no_start_optimize,no_auto_possess +\= Expect no match + XYY\n +No match + +/^X\p{Xwd}{1,3}?Z/s,no_start_optimize,no_auto_possess +\= Expect no match + XY +No match + XY! +No match + XYY! +No match + +/^X\x{b5}+?Z/i,utf,no_start_optimize,no_auto_possess +\= Expect no match + X +No match + X\x{b5} +No match + X\x{b5}\x{b5}Y +No match + +/^X\p{Xuc}+?Z/utf,no_start_optimize,no_auto_possess +\= Expect no match + X +No match + X$ +No match + X@@Y +No match + +/(*CRLF)^X.+?Z/utf,no_start_optimize,no_auto_possess +\= Expect partial match + XYY\r\=ph +Partial match: XYY\x{0d} +\= Expect no match + X +No match + +/^X.+?Z/s,utf,no_start_optimize,no_auto_possess +\= Expect no match + X +No match + XYY +No match + +/^X\R+?Z/utf,no_start_optimize,no_auto_possess +\= Expect no match + X\nX +No match + X\n\rX +No match + X\n\r\nX +No match + X\n\n +No match + X\n\x{0c} +No match + +/(*BSR_ANYCRLF)^X\R+?Z/utf,no_start_optimize,no_auto_possess +\= Expect no match + X\nX +No match + X\n\rX +No match + X\n\r\nX +No match + X\n\n +No match + X\n\x{0c} +No match + +/^X\H+?Z/utf,no_start_optimize,no_auto_possess +\= Expect no match + XY\t +No match + XYY +No match + +/^X\h+?Z/utf,no_start_optimize,no_auto_possess +\= Expect no match + X\t\t +No match + X\tY +No match + +/^X\V+?Z/utf,no_start_optimize,no_auto_possess +\= Expect no match + XY\n +No match + XYY +No match + +/^X\v+?Z/utf,no_start_optimize,no_auto_possess +\= Expect no match + X\n\n +No match + X\nY +No match + +/^X\D+?Z/utf,no_start_optimize,no_auto_possess +\= Expect no match + XY9 +No match + XYY +No match + +/^X\d+?Z/utf,no_start_optimize,no_auto_possess +\= Expect no match + X99 +No match + X9Y +No match + +/^X\S+?Z/utf,no_start_optimize,no_auto_possess +\= Expect no match + XY\n +No match + XYY +No match + +/^X\s+?Z/utf,no_start_optimize,no_auto_possess +\= Expect no match + X\n\n +No match + X\nY +No match + +/^X\W+?Z/utf,no_start_optimize,no_auto_possess +\= Expect no match + X.A +No match + X++ +No match + +/^X\p{L&}{1,3}Z/no_start_optimize,no_auto_possess +\= Expect no match + XY +No match + XY! +No match + +/^X\p{L}{1,3}Z/no_start_optimize,no_auto_possess +\= Expect no match + XY +No match + +/^X\p{Xan}{1,3}Z/no_start_optimize,no_auto_possess +\= Expect no match + XY +No match + +/^X\P{Xsp}{1,3}Z/no_start_optimize,no_auto_possess +\= Expect no match + XYY +No match + +/^X\p{Xuc}+Z/utf,no_start_optimize,no_auto_possess +\= Expect no match + X$ +No match + +# ---------------------------------------------------------------------- +# These test the dangerous PCRE2_EXTRA_BAD_ESCAPE_IS_LITERAL option. + +/\x{d800}/B,utf,bad_escape_is_literal +------------------------------------------------------------------ + Bra + x{d800} + Ket + End +------------------------------------------------------------------ + +/\ud800/B,utf,alt_bsux,bad_escape_is_literal +------------------------------------------------------------------ + Bra + ud800 + Ket + End +------------------------------------------------------------------ + +# ---------------------------------------------------------------------- + +/Aሴ+B/literal,utf,no_utf_check + Aሴ+B + 0: A\x{1234}+B + +# These are here because I upgraded to Unicode 10.0.0 before Perl did, so it +# doesn't recognize all these scripts. In time these three tests can be moved +# to test 4. + +/^(\p{Adlam}+)(\p{Bhaiksuki}+)(\p{Marchen}+)(\p{Newa}+)(\p{Osage}+) + (\p{Tangut}+)(\p{Masaram_Gondi}+)(\p{Nushu}+)(\p{Soyombo}+) + (\p{Zanabazar_Square}+)/x,utf + \x{1E900}\x{1E924}\x{1E953}\x{11C00}\x{11C2D}\x{11C3E}\x{11C70}\x{11C77}\x{11CAB}\x{11400}\x{1142F}\x{11455}\x{104B0}\x{104D8}\x{104FB}\x{16FE0}\x{18800}\x{18AF2}\x{11D00}\x{11D3A}\x{11D59}\x{16FE1}\x{1B170}\x{1B2FB}\x{11A50}\x{11A58}\x{11AA2}\x{11A00}\x{11A07}\x{11A47} + 0: \x{1e900}\x{1e924}\x{1e953}\x{11c00}\x{11c2d}\x{11c3e}\x{11c70}\x{11c77}\x{11cab}\x{11400}\x{1142f}\x{11455}\x{104b0}\x{104d8}\x{104fb}\x{16fe0}\x{18800}\x{18af2}\x{11d00}\x{11d3a}\x{11d59}\x{16fe1}\x{1b170}\x{1b2fb}\x{11a50}\x{11a58}\x{11aa2}\x{11a00}\x{11a07}\x{11a47} + 1: \x{1e900}\x{1e924}\x{1e953} + 2: \x{11c00}\x{11c2d}\x{11c3e} + 3: \x{11c70}\x{11c77}\x{11cab} + 4: \x{11400}\x{1142f}\x{11455} + 5: \x{104b0}\x{104d8}\x{104fb} + 6: \x{16fe0}\x{18800}\x{18af2} + 7: \x{11d00}\x{11d3a}\x{11d59} + 8: \x{16fe1}\x{1b170}\x{1b2fb} + 9: \x{11a50}\x{11a58}\x{11aa2} +10: \x{11a00}\x{11a07}\x{11a47} + +/^\x{1E900}\x{104B0}/i,utf + \x{1E900}\x{104B0} + 0: \x{1e900}\x{104b0} + \x{1E922}\x{104D8} + 0: \x{1e922}\x{104d8} + +/^(?:(\X)(?C))+$/utf + \x{1E900}\x{1E924}\x{1E953}\x{11C00}\x{11C2D}\x{11C3E}\x{11C70}\x{11C77}\x{11CAB}\x{11400}\x{1142F}\x{11455}\x{104B0}\x{104D8}\x{104FB}\x{16FE0}\x{18800}\x{18AF2}\x{11D00}\x{11D3A}\x{11D59}\x{16FE1}\x{1B170}\x{1B2FB}\x{11A50}\x{11A58}\x{11AA2}\x{11A00}\x{11A07}\x{11A47}\=callout_capture,callout_no_where +Callout 0: last capture = 1 + 1: \x{1e900} +Callout 0: last capture = 1 + 1: \x{1e924} +Callout 0: last capture = 1 + 1: \x{1e953} +Callout 0: last capture = 1 + 1: \x{11c00} +Callout 0: last capture = 1 + 1: \x{11c2d}\x{11c3e} +Callout 0: last capture = 1 + 1: \x{11c70} +Callout 0: last capture = 1 + 1: \x{11c77}\x{11cab} +Callout 0: last capture = 1 + 1: \x{11400} +Callout 0: last capture = 1 + 1: \x{1142f} +Callout 0: last capture = 1 + 1: \x{11455} +Callout 0: last capture = 1 + 1: \x{104b0} +Callout 0: last capture = 1 + 1: \x{104d8} +Callout 0: last capture = 1 + 1: \x{104fb} +Callout 0: last capture = 1 + 1: \x{16fe0} +Callout 0: last capture = 1 + 1: \x{18800} +Callout 0: last capture = 1 + 1: \x{18af2} +Callout 0: last capture = 1 + 1: \x{11d00}\x{11d3a} +Callout 0: last capture = 1 + 1: \x{11d59} +Callout 0: last capture = 1 + 1: \x{16fe1} +Callout 0: last capture = 1 + 1: \x{1b170} +Callout 0: last capture = 1 + 1: \x{1b2fb} +Callout 0: last capture = 1 + 1: \x{11a50}\x{11a58} +Callout 0: last capture = 1 + 1: \x{11aa2} +Callout 0: last capture = 1 + 1: \x{11a00}\x{11a07}\x{11a47} + 0: \x{1e900}\x{1e924}\x{1e953}\x{11c00}\x{11c2d}\x{11c3e}\x{11c70}\x{11c77}\x{11cab}\x{11400}\x{1142f}\x{11455}\x{104b0}\x{104d8}\x{104fb}\x{16fe0}\x{18800}\x{18af2}\x{11d00}\x{11d3a}\x{11d59}\x{16fe1}\x{1b170}\x{1b2fb}\x{11a50}\x{11a58}\x{11aa2}\x{11a00}\x{11a07}\x{11a47} + 1: \x{11a00}\x{11a07}\x{11a47} + +# These two are here because JIT is not yet updated. Also, the very first data +# line is handled differently by Perl. + +/^\X/utf + A\x{200d}B A ZWJ + 0: A\x{200d} + \x{261D}\x{1F3FB}B E_Base E_Modifier + 0: \x{261d}\x{1f3fb} + \x{1F466}\x{1F3FF}B E_Base_GAZ E_Modifier + 0: \x{1f466}\x{1f3ff} + \x{200d}\x{1F3A4}B ZWJ Glue_After_ZWJ + 0: \x{200d}\x{1f3a4} + \x{200d}\x{1F469}B ZWJ E_Base_GAZ + 0: \x{200d}\x{1f469} + \x{1F1E6}\x{1F1E7}B RegionalIndicator RegionalIndicator + 0: \x{1f1e6}\x{1f1e7} + \x{261D}\x{E0100}\x{1F3FB}B E_Base Extend E_Modifier + 0: \x{261d}\x{e0100}\x{1f3fb} + +# Regional indicators + +/^(\X)(\X)/utf,aftertext + \x{1F1E6}\x{1F1E7}\x{1F1E7}B + 0: \x{1f1e6}\x{1f1e7}\x{1f1e7} + 0+ B + 1: \x{1f1e6}\x{1f1e7} + 2: \x{1f1e7} + \x{1F1E6}\x{1F1E7}\x{1F1E7}\x{1F1E6}B + 0: \x{1f1e6}\x{1f1e7}\x{1f1e7}\x{1f1e6} + 0+ B + 1: \x{1f1e6}\x{1f1e7} + 2: \x{1f1e7}\x{1f1e6} + + +# End of testinput5 diff --git a/testdata/testoutput6 b/testdata/testoutput6 index 17616c8..b409fe0 100644 --- a/testdata/testoutput6 +++ b/testdata/testoutput6 @@ -713,7 +713,7 @@ No match /(ab|cd){3,4}/auto_callout ababab --->ababab - +0 ^ (ab|cd){3,4} + +0 ^ ( +1 ^ a +4 ^ c +2 ^^ b @@ -726,13 +726,13 @@ No match +4 ^ ^ c +2 ^ ^ b +3 ^ ^ | -+12 ^ ^ ++12 ^ ^ End of pattern +1 ^ ^ a +4 ^ ^ c 0: ababab abcdabcd --->abcdabcd - +0 ^ (ab|cd){3,4} + +0 ^ ( +1 ^ a +4 ^ c +2 ^^ b @@ -740,22 +740,22 @@ No match +1 ^ ^ a +4 ^ ^ c +5 ^ ^ d - +6 ^ ^ ) + +6 ^ ^ ){3,4} +1 ^ ^ a +4 ^ ^ c +2 ^ ^ b +3 ^ ^ | -+12 ^ ^ ++12 ^ ^ End of pattern +1 ^ ^ a +4 ^ ^ c +5 ^ ^ d - +6 ^ ^ ) -+12 ^ ^ + +6 ^ ^ ){3,4} ++12 ^ ^ End of pattern 0: abcdabcd 1: abcdab abcdcdcdcdcd --->abcdcdcdcdcd - +0 ^ (ab|cd){3,4} + +0 ^ ( +1 ^ a +4 ^ c +2 ^^ b @@ -763,17 +763,17 @@ No match +1 ^ ^ a +4 ^ ^ c +5 ^ ^ d - +6 ^ ^ ) + +6 ^ ^ ){3,4} +1 ^ ^ a +4 ^ ^ c +5 ^ ^ d - +6 ^ ^ ) -+12 ^ ^ + +6 ^ ^ ){3,4} ++12 ^ ^ End of pattern +1 ^ ^ a +4 ^ ^ c +5 ^ ^ d - +6 ^ ^ ) -+12 ^ ^ + +6 ^ ^ ){3,4} ++12 ^ ^ End of pattern 0: abcdcdcd 1: abcdcd @@ -2674,13 +2674,6 @@ No match aaa No match -/[\d-z]+/ - 12-34z - 0: 12-34z -\= Expect no match - aaa -No match - /\x5c/ \\ 0: \ @@ -5715,17 +5708,6 @@ No match 0: 0: -/^[\d-a]/ - abcde - 0: a - -things - 0: - - 0digit - 0: 0 -\= Expect no match - bcdef -No match - /[[:space:]]+/ > \x09\x0a\x0c\x0d\x0b< 0: \x09\x0a\x0c\x0d\x0b @@ -6628,14 +6610,14 @@ No match +0 ^ x +1 ^^ y +2 ^ ^ z - +3 ^ ^ + +3 ^ ^ End of pattern 0: xyz abcxyz --->abcxyz +0 ^ x +1 ^^ y +2 ^ ^ z - +3 ^ ^ + +3 ^ ^ End of pattern 0: xyz \= Expect no match abc @@ -6652,7 +6634,7 @@ No match +0 ^ x +1 ^^ y +2 ^ ^ z - +3 ^ ^ + +3 ^ ^ End of pattern 0: xyz \= Expect no match abc @@ -6686,7 +6668,7 @@ No match +15 ^ x +16 ^^ y +17 ^ ^ z -+18 ^ ^ ++18 ^ ^ End of pattern 0: xyz /(?C)ab/ @@ -6702,7 +6684,7 @@ No match --->ab +0 ^ a +1 ^^ b - +2 ^ ^ + +2 ^ ^ End of pattern 0: ab ab\=callout_none 0: ab @@ -6712,30 +6694,30 @@ No match --->"ab" +0 ^ ^ +1 ^ " - +2 ^^ ((?(?=[a])[^"])|b)* + +2 ^^ ( +21 ^^ " - +3 ^^ (?(?=[a])[^"]) + +3 ^^ (? +18 ^^ b - +5 ^^ (?=[a]) + +5 ^^ (?= +8 ^ [a] +11 ^^ ) +12 ^^ [^"] +16 ^ ^ ) +17 ^ ^ | +21 ^ ^ " - +3 ^ ^ (?(?=[a])[^"]) + +3 ^ ^ (? +18 ^ ^ b - +5 ^ ^ (?=[a]) + +5 ^ ^ (?= +8 ^ [a] -+19 ^ ^ ) ++19 ^ ^ )* +21 ^ ^ " - +3 ^ ^ (?(?=[a])[^"]) + +3 ^ ^ (? +18 ^ ^ b - +5 ^ ^ (?=[a]) + +5 ^ ^ (?= +8 ^ [a] +17 ^ ^ | +22 ^ ^ $ -+23 ^ ^ ++23 ^ ^ End of pattern 0: "ab" "ab"\=callout_none 0: "ab" @@ -7154,7 +7136,7 @@ Failed: error -52: nested recursion at the same subject position aaaabcde 0: aaaab -/((?(R2)a+|(?1)b))/ +/((?(R2)a+|(?1)b))()/ aaaabcde Failed: error -40: backreference condition or recursion test is not supported for DFA matching @@ -7517,7 +7499,6 @@ Callout (10): "AB" /^a(b)c(?C1)def/ abcdef\=callout_capture Callout 1: last capture = 0 - 0: --->abcdef ^ ^ d 0: abcdef @@ -7538,7 +7519,6 @@ Callout 1: last capture = 0 ------------------------------------------------------------------ abcdef\=callout_capture Callout (10): {AB} last capture = 0 - 0: --->abcdef ^ ^ d 0: abcdef @@ -7548,7 +7528,7 @@ Callout (10): {AB} last capture = 0 Bra ^ Cond - Callout 25 9 7 + Callout 25 9 3 Assert abc Ket @@ -7561,11 +7541,11 @@ Callout (10): {AB} last capture = 0 ------------------------------------------------------------------ abcdefg --->abcdefg - 25 ^ (?=abc) + 25 ^ (?= 0: abcd xyz123 --->xyz123 - 25 ^ (?=abc) + 25 ^ (?= 0: xyz /^(?(?C$abc$)(?=abc)abcd|xyz)/B @@ -7573,7 +7553,7 @@ Callout (10): {AB} last capture = 0 Bra ^ Cond - CalloutStr $abc$ 7 12 7 + CalloutStr $abc$ 7 12 3 Assert abc Ket @@ -7587,12 +7567,12 @@ Callout (10): {AB} last capture = 0 abcdefg Callout (7): $abc$ --->abcdefg - ^ (?=abc) + ^ (?= 0: abcd xyz123 Callout (7): $abc$ --->xyz123 - ^ (?=abc) + ^ (?= 0: xyz /^ab(?C'first')cd(?C"second")ef/ @@ -7609,13 +7589,13 @@ Callout (20): "second" aaaXY Callout (8): `code` --->aaaXY - ^^ ) + ^^ ){3} Callout (8): `code` --->aaaXY - ^ ^ ) + ^ ^ ){3} Callout (8): `code` --->aaaXY - ^ ^ ) + ^ ^ ){3} 0: aaaX # Binary zero in callout string @@ -7673,13 +7653,122 @@ No match 0: abcd /()()a+/no_auto_possess - aaa\=dfa,allcaptures + aaa\=allcaptures ** Ignored after DFA matching: allcaptures 0: aaa 1: aa 2: a - a\=dfa,allcaptures + a\=allcaptures ** Ignored after DFA matching: allcaptures 0: a +/(*LIMIT_DEPTH=100)^((.)(?1)|.)$/ +\= Expect depth limit exceeded + a[00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00] +Failed: error -53: matching depth limit exceeded + +/(02-)?[0-9]{3}-[0-9]{3}/ + 02-123-123 + 0: 02-123-123 + +/^(a(?2))(b)(?1)/ + abbab\=find_limits +Minimum match limit = 4 +Minimum depth limit = 2 + 0: abbab + +/abc/endanchored + xyzabc + 0: abc +\= Expect no match + xyzabcdef +No match +\= Expect error + xyzabc\=ph +Failed: error -34: bad option value + +/abc/ + xyzabc\=endanchored + 0: abc +\= Expect no match + xyzabcdef\=endanchored +No match +\= Expect error + xyzabc\=ps,endanchored +Failed: error -34: bad option value + +/abc|bcd/endanchored + xyzabcd + 0: bcd +\= Expect no match + xyzabcdef +No match + +/(*NUL)^.*/ + a\nb\x00ccc + 0: a\x0ab + +/(*NUL)^.*/s + a\nb\x00ccc + 0: a\x0ab\x00ccc + +/^x/m,newline=nul + ab\x00xy + 0: x + +/'#comment' 0d 0a 00 '^x\' 0a 'y'/x,newline=nul,hex + x\nyz + 0: x\x0ay + +/(*NUL)^X\NY/ + X\nY + 0: X\x0aY + X\rY + 0: X\x0dY +\= Expect no match + X\x00Y +No match + +/(?<=abc|)/ + abcde\=aftertext + 0: + 0+ abcde + +/(?<=|abc)/ + abcde\=aftertext + 0: + 0+ abcde + +/(?<=abc|)/endanchored + abcde\=aftertext + 0: + 0+ + +/(?<=|abc)/endanchored + abcde\=aftertext + 0: + 0+ + +/(*LIMIT_MATCH=100).*(?![|H]?.*(?![|H]?););.*(?![|H]?.*(?![|H]?););\x00\x00\x00\x00\x00\x00\x00(?!(?!(?!(?!(?!(?!(?!(?!(?!(?!(?!(?!(?!(?!(?!(?!(?!(?!(?!(?!(?!(?!(?!(?!(?!(?!(?!(?!(?!(?!(?!(?!(?!(?![|);)?.*(![|H]?);)?.*(?![|H]?);)?.*(?![|H]?);)?.*(?![|H]););![|H]?););[|H]?);|H]?);)\x00\x00\x00\x00\x00\x00H]?););?![|H]?);)?.*(?![|H]?););[||H]?);)?.*(?![|H]?););[|H]?);(?![|H]?););![|H]?););[|H]?);|H]?);)?.*(?![|H]?););;[\x00\x00\x00\x00\x00\x00\x00![|H]?););![|H]?););[|H]?);|H]?);)?.*(?![|H]?););/no_dotstar_anchor +.*(?![|H]?.*(?![|H]?););.*(?![|H]?.*(?![|H]?););\x00\x00\x00\x00\x00\x00\x00(?!(?!(?!(?!(?!(?!(?!(?!(?!(?!(?!(?!(?!(?!(?!(?!(?!(?!(?!(?!(?!(?!(?!(?!(?!(?!(?!(?!(?!(?!(?!(?!(?!(?![|);)?.*(![|H]?);)?.*(?![|H]?);)?.*(?![|H]?);)?.*(?![|H]););![|H]?););[|H]?);|H]?);)\x00\x00\x00\x00\x00\x00H]?););?![|H]?);)?.*(?![|H]?););[||H]?);)?.*(?![|H]?););[|H]?);(?![|H]?););![|H]?););[|H]?);|H]?);)?.*(?![|H]?););;[\x00\x00\x00\x00\x00\x00\x00![|H]?););![|H]?););[|H]?);|H]?);)?.*(?![|H]?);); +Failed: error -47: match limit exceeded + +/\n/firstline + xyz\nabc + 0: \x0a + +/\nabc/firstline + xyz\nabc + 0: \x0aabc + +/\x{0a}abc/firstline,newline=crlf +\= Expect no match + xyz\r\nabc +No match + +/[abc]/firstline +\= Expect no match + \na +No match + # End of testinput6 diff --git a/testdata/testoutput8-16-2 b/testdata/testoutput8-16-2 index 39415b7..47c9e56 100644 --- a/testdata/testoutput8-16-2 +++ b/testdata/testoutput8-16-2 @@ -187,7 +187,7 @@ Memory allocation (code space): 40 0 17 Bra 2 13 CBra 1 5 a - 7 4 Once + 7 4 SBra 9 2 Recurse 11 4 KetRmax 13 b @@ -759,18 +759,14 @@ Memory allocation (code space): 14 "(?1)(?#?'){2}(a)" ------------------------------------------------------------------ - 0 21 Bra - 2 4 Once - 4 14 Recurse - 6 4 Ket - 8 4 Once - 10 14 Recurse - 12 4 Ket - 14 5 CBra 1 - 17 a - 19 5 Ket - 21 21 Ket - 23 End + 0 13 Bra + 2 6 Recurse + 4 6 Recurse + 6 5 CBra 1 + 9 a + 11 5 Ket + 13 13 Ket + 15 End ------------------------------------------------------------------ /.((?2)(?R)|\1|$)()/ @@ -850,27 +846,19 @@ Memory allocation (code space): 14 /(?|(?|(?J:(?|(?x:(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?| ))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))) /parens_nest_limit=1000,-fullbincode -Failed: error 184 at offset 1540: (?| and/or (?J: or (?x: parentheses are too deeply nested +Failed: error 184 at offset 1504: (?| and/or (?J: or (?x: parentheses are too deeply nested # Use "expand" to create some very long patterns with nested parentheses, in # order to test workspace overflow. Again, this varies with code unit width, -# and even with it fails in two modes, the error offset differs. It also varies +# and even when it fails in two modes, the error offset differs. It also varies # with link size - hence multiple tests with different values. -/(?'ABC'\[[bar](]{105}*THEN:\[A]{255}\[)]{106}/expand,-fullbincode -Failed: error 186 at offset 594: regular expression is too complicated - -/(?'ABC'\[[bar](]{106}*THEN:\[A]{255}\[)]{107}/expand,-fullbincode -Failed: error 186 at offset 594: regular expression is too complicated - -/(?'ABC'\[[bar](]{159}*THEN:\[A]{255}\[)]{160}/expand,-fullbincode -Failed: error 186 at offset 594: regular expression is too complicated +/(?'ABC'\[[bar](]{792}*THEN:\[A]{255}\[)]{793}/expand,-fullbincode,parens_nest_limit=1000 -/(?'ABC'\[[bar](]{199}*THEN:\[A]{255}\[)]{200}/expand,-fullbincode -Failed: error 186 at offset 594: regular expression is too complicated +/(?'ABC'\[[bar](]{793}*THEN:\[A]{255}\[)]{794}/expand,-fullbincode,parens_nest_limit=1000 -/(?'ABC'\[[bar](]{299}*THEN:\[A]{255}\[)]{300}/expand,-fullbincode -Failed: error 186 at offset 594: regular expression is too complicated +/(?'ABC'\[[bar](]{1793}*THEN:\[A]{255}\[)]{1794}/expand,-fullbincode,parens_nest_limit=2000 +Failed: error 186 at offset 12820: regular expression is too complicated /(?(1)(?1)){8,}+()/debug ------------------------------------------------------------------ @@ -1031,6 +1019,5 @@ Subject length lower bound = 0 Failed: error 114 at offset 509: missing closing parenthesis /([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00](*ACCEPT)))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))/-fullbincode -Failed: error 186 at offset 490: regular expression is too complicated # End of testinput8 diff --git a/testdata/testoutput8-16-3 b/testdata/testoutput8-16-3 index 61a98eb..722b0e1 100644 --- a/testdata/testoutput8-16-3 +++ b/testdata/testoutput8-16-3 @@ -187,7 +187,7 @@ Memory allocation (code space): 54 0 23 Bra 3 17 CBra 1 7 a - 9 6 Once + 9 6 SBra 12 3 Recurse 15 6 KetRmax 18 b @@ -759,18 +759,14 @@ Memory allocation (code space): 18 "(?1)(?#?'){2}(a)" ------------------------------------------------------------------ - 0 30 Bra - 3 6 Once - 6 21 Recurse - 9 6 Ket - 12 6 Once - 15 21 Recurse - 18 6 Ket - 21 6 CBra 1 - 25 a - 27 6 Ket - 30 30 Ket - 33 End + 0 18 Bra + 3 9 Recurse + 6 9 Recurse + 9 6 CBra 1 + 13 a + 15 6 Ket + 18 18 Ket + 21 End ------------------------------------------------------------------ /.((?2)(?R)|\1|$)()/ @@ -853,20 +849,15 @@ Memory allocation (code space): 18 # Use "expand" to create some very long patterns with nested parentheses, in # order to test workspace overflow. Again, this varies with code unit width, -# and even with it fails in two modes, the error offset differs. It also varies +# and even when it fails in two modes, the error offset differs. It also varies # with link size - hence multiple tests with different values. -/(?'ABC'\[[bar](]{105}*THEN:\[A]{255}\[)]{106}/expand,-fullbincode - -/(?'ABC'\[[bar](]{106}*THEN:\[A]{255}\[)]{107}/expand,-fullbincode - -/(?'ABC'\[[bar](]{159}*THEN:\[A]{255}\[)]{160}/expand,-fullbincode +/(?'ABC'\[[bar](]{792}*THEN:\[A]{255}\[)]{793}/expand,-fullbincode,parens_nest_limit=1000 -/(?'ABC'\[[bar](]{199}*THEN:\[A]{255}\[)]{200}/expand,-fullbincode -Failed: error 186 at offset 1147: regular expression is too complicated +/(?'ABC'\[[bar](]{793}*THEN:\[A]{255}\[)]{794}/expand,-fullbincode,parens_nest_limit=1000 -/(?'ABC'\[[bar](]{299}*THEN:\[A]{255}\[)]{300}/expand,-fullbincode -Failed: error 186 at offset 1147: regular expression is too complicated +/(?'ABC'\[[bar](]{1793}*THEN:\[A]{255}\[)]{1794}/expand,-fullbincode,parens_nest_limit=2000 +Failed: error 186 at offset 12820: regular expression is too complicated /(?(1)(?1)){8,}+()/debug ------------------------------------------------------------------ diff --git a/testdata/testoutput8-32-2 b/testdata/testoutput8-32-2 index 49ef506..30667a3 100644 --- a/testdata/testoutput8-32-2 +++ b/testdata/testoutput8-32-2 @@ -187,7 +187,7 @@ Memory allocation (code space): 80 0 17 Bra 2 13 CBra 1 5 a - 7 4 Once + 7 4 SBra 9 2 Recurse 11 4 KetRmax 13 b @@ -759,18 +759,14 @@ Memory allocation (code space): 28 "(?1)(?#?'){2}(a)" ------------------------------------------------------------------ - 0 21 Bra - 2 4 Once - 4 14 Recurse - 6 4 Ket - 8 4 Once - 10 14 Recurse - 12 4 Ket - 14 5 CBra 1 - 17 a - 19 5 Ket - 21 21 Ket - 23 End + 0 13 Bra + 2 6 Recurse + 4 6 Recurse + 6 5 CBra 1 + 9 a + 11 5 Ket + 13 13 Ket + 15 End ------------------------------------------------------------------ /.((?2)(?R)|\1|$)()/ @@ -853,20 +849,15 @@ Memory allocation (code space): 28 # Use "expand" to create some very long patterns with nested parentheses, in # order to test workspace overflow. Again, this varies with code unit width, -# and even with it fails in two modes, the error offset differs. It also varies +# and even when it fails in two modes, the error offset differs. It also varies # with link size - hence multiple tests with different values. -/(?'ABC'\[[bar](]{105}*THEN:\[A]{255}\[)]{106}/expand,-fullbincode - -/(?'ABC'\[[bar](]{106}*THEN:\[A]{255}\[)]{107}/expand,-fullbincode - -/(?'ABC'\[[bar](]{159}*THEN:\[A]{255}\[)]{160}/expand,-fullbincode +/(?'ABC'\[[bar](]{792}*THEN:\[A]{255}\[)]{793}/expand,-fullbincode,parens_nest_limit=1000 -/(?'ABC'\[[bar](]{199}*THEN:\[A]{255}\[)]{200}/expand,-fullbincode -Failed: error 186 at offset 979: regular expression is too complicated +/(?'ABC'\[[bar](]{793}*THEN:\[A]{255}\[)]{794}/expand,-fullbincode,parens_nest_limit=1000 -/(?'ABC'\[[bar](]{299}*THEN:\[A]{255}\[)]{300}/expand,-fullbincode -Failed: error 186 at offset 979: regular expression is too complicated +/(?'ABC'\[[bar](]{1793}*THEN:\[A]{255}\[)]{1794}/expand,-fullbincode,parens_nest_limit=2000 +Failed: error 186 at offset 12820: regular expression is too complicated /(?(1)(?1)){8,}+()/debug ------------------------------------------------------------------ diff --git a/testdata/testoutput8-32-3 b/testdata/testoutput8-32-3 index 49ef506..30667a3 100644 --- a/testdata/testoutput8-32-3 +++ b/testdata/testoutput8-32-3 @@ -187,7 +187,7 @@ Memory allocation (code space): 80 0 17 Bra 2 13 CBra 1 5 a - 7 4 Once + 7 4 SBra 9 2 Recurse 11 4 KetRmax 13 b @@ -759,18 +759,14 @@ Memory allocation (code space): 28 "(?1)(?#?'){2}(a)" ------------------------------------------------------------------ - 0 21 Bra - 2 4 Once - 4 14 Recurse - 6 4 Ket - 8 4 Once - 10 14 Recurse - 12 4 Ket - 14 5 CBra 1 - 17 a - 19 5 Ket - 21 21 Ket - 23 End + 0 13 Bra + 2 6 Recurse + 4 6 Recurse + 6 5 CBra 1 + 9 a + 11 5 Ket + 13 13 Ket + 15 End ------------------------------------------------------------------ /.((?2)(?R)|\1|$)()/ @@ -853,20 +849,15 @@ Memory allocation (code space): 28 # Use "expand" to create some very long patterns with nested parentheses, in # order to test workspace overflow. Again, this varies with code unit width, -# and even with it fails in two modes, the error offset differs. It also varies +# and even when it fails in two modes, the error offset differs. It also varies # with link size - hence multiple tests with different values. -/(?'ABC'\[[bar](]{105}*THEN:\[A]{255}\[)]{106}/expand,-fullbincode - -/(?'ABC'\[[bar](]{106}*THEN:\[A]{255}\[)]{107}/expand,-fullbincode - -/(?'ABC'\[[bar](]{159}*THEN:\[A]{255}\[)]{160}/expand,-fullbincode +/(?'ABC'\[[bar](]{792}*THEN:\[A]{255}\[)]{793}/expand,-fullbincode,parens_nest_limit=1000 -/(?'ABC'\[[bar](]{199}*THEN:\[A]{255}\[)]{200}/expand,-fullbincode -Failed: error 186 at offset 979: regular expression is too complicated +/(?'ABC'\[[bar](]{793}*THEN:\[A]{255}\[)]{794}/expand,-fullbincode,parens_nest_limit=1000 -/(?'ABC'\[[bar](]{299}*THEN:\[A]{255}\[)]{300}/expand,-fullbincode -Failed: error 186 at offset 979: regular expression is too complicated +/(?'ABC'\[[bar](]{1793}*THEN:\[A]{255}\[)]{1794}/expand,-fullbincode,parens_nest_limit=2000 +Failed: error 186 at offset 12820: regular expression is too complicated /(?(1)(?1)){8,}+()/debug ------------------------------------------------------------------ diff --git a/testdata/testoutput8-32-4 b/testdata/testoutput8-32-4 index 49ef506..30667a3 100644 --- a/testdata/testoutput8-32-4 +++ b/testdata/testoutput8-32-4 @@ -187,7 +187,7 @@ Memory allocation (code space): 80 0 17 Bra 2 13 CBra 1 5 a - 7 4 Once + 7 4 SBra 9 2 Recurse 11 4 KetRmax 13 b @@ -759,18 +759,14 @@ Memory allocation (code space): 28 "(?1)(?#?'){2}(a)" ------------------------------------------------------------------ - 0 21 Bra - 2 4 Once - 4 14 Recurse - 6 4 Ket - 8 4 Once - 10 14 Recurse - 12 4 Ket - 14 5 CBra 1 - 17 a - 19 5 Ket - 21 21 Ket - 23 End + 0 13 Bra + 2 6 Recurse + 4 6 Recurse + 6 5 CBra 1 + 9 a + 11 5 Ket + 13 13 Ket + 15 End ------------------------------------------------------------------ /.((?2)(?R)|\1|$)()/ @@ -853,20 +849,15 @@ Memory allocation (code space): 28 # Use "expand" to create some very long patterns with nested parentheses, in # order to test workspace overflow. Again, this varies with code unit width, -# and even with it fails in two modes, the error offset differs. It also varies +# and even when it fails in two modes, the error offset differs. It also varies # with link size - hence multiple tests with different values. -/(?'ABC'\[[bar](]{105}*THEN:\[A]{255}\[)]{106}/expand,-fullbincode - -/(?'ABC'\[[bar](]{106}*THEN:\[A]{255}\[)]{107}/expand,-fullbincode - -/(?'ABC'\[[bar](]{159}*THEN:\[A]{255}\[)]{160}/expand,-fullbincode +/(?'ABC'\[[bar](]{792}*THEN:\[A]{255}\[)]{793}/expand,-fullbincode,parens_nest_limit=1000 -/(?'ABC'\[[bar](]{199}*THEN:\[A]{255}\[)]{200}/expand,-fullbincode -Failed: error 186 at offset 979: regular expression is too complicated +/(?'ABC'\[[bar](]{793}*THEN:\[A]{255}\[)]{794}/expand,-fullbincode,parens_nest_limit=1000 -/(?'ABC'\[[bar](]{299}*THEN:\[A]{255}\[)]{300}/expand,-fullbincode -Failed: error 186 at offset 979: regular expression is too complicated +/(?'ABC'\[[bar](]{1793}*THEN:\[A]{255}\[)]{1794}/expand,-fullbincode,parens_nest_limit=2000 +Failed: error 186 at offset 12820: regular expression is too complicated /(?(1)(?1)){8,}+()/debug ------------------------------------------------------------------ diff --git a/testdata/testoutput8-8-2 b/testdata/testoutput8-8-2 index 0d15a82..4b03356 100644 --- a/testdata/testoutput8-8-2 +++ b/testdata/testoutput8-8-2 @@ -187,7 +187,7 @@ Memory allocation (code space): 28 0 24 Bra 3 18 CBra 1 8 a - 10 6 Once + 10 6 SBra 13 3 Recurse 16 6 KetRmax 19 b @@ -759,18 +759,14 @@ Memory allocation (code space): 10 "(?1)(?#?'){2}(a)" ------------------------------------------------------------------ - 0 31 Bra - 3 6 Once - 6 21 Recurse - 9 6 Ket - 12 6 Once - 15 21 Recurse - 18 6 Ket - 21 7 CBra 1 - 26 a - 28 7 Ket - 31 31 Ket - 34 End + 0 19 Bra + 3 9 Recurse + 6 9 Recurse + 9 7 CBra 1 + 14 a + 16 7 Ket + 19 19 Ket + 22 End ------------------------------------------------------------------ /.((?2)(?R)|\1|$)()/ @@ -850,26 +846,19 @@ Memory allocation (code space): 10 /(?|(?|(?J:(?|(?x:(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?| ))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))) /parens_nest_limit=1000,-fullbincode -Failed: error 184 at offset 1540: (?| and/or (?J: or (?x: parentheses are too deeply nested +Failed: error 184 at offset 1504: (?| and/or (?J: or (?x: parentheses are too deeply nested # Use "expand" to create some very long patterns with nested parentheses, in # order to test workspace overflow. Again, this varies with code unit width, -# and even with it fails in two modes, the error offset differs. It also varies +# and even when it fails in two modes, the error offset differs. It also varies # with link size - hence multiple tests with different values. -/(?'ABC'\[[bar](]{105}*THEN:\[A]{255}\[)]{106}/expand,-fullbincode - -/(?'ABC'\[[bar](]{106}*THEN:\[A]{255}\[)]{107}/expand,-fullbincode -Failed: error 186 at offset 637: regular expression is too complicated - -/(?'ABC'\[[bar](]{159}*THEN:\[A]{255}\[)]{160}/expand,-fullbincode -Failed: error 186 at offset 637: regular expression is too complicated +/(?'ABC'\[[bar](]{792}*THEN:\[A]{255}\[)]{793}/expand,-fullbincode,parens_nest_limit=1000 -/(?'ABC'\[[bar](]{199}*THEN:\[A]{255}\[)]{200}/expand,-fullbincode -Failed: error 186 at offset 637: regular expression is too complicated +/(?'ABC'\[[bar](]{793}*THEN:\[A]{255}\[)]{794}/expand,-fullbincode,parens_nest_limit=1000 -/(?'ABC'\[[bar](]{299}*THEN:\[A]{255}\[)]{300}/expand,-fullbincode -Failed: error 186 at offset 637: regular expression is too complicated +/(?'ABC'\[[bar](]{1793}*THEN:\[A]{255}\[)]{1794}/expand,-fullbincode,parens_nest_limit=2000 +Failed: error 186 at offset 12820: regular expression is too complicated /(?(1)(?1)){8,}+()/debug ------------------------------------------------------------------ diff --git a/testdata/testoutput8-8-3 b/testdata/testoutput8-8-3 index 50d715d..3d33c77 100644 --- a/testdata/testoutput8-8-3 +++ b/testdata/testoutput8-8-3 @@ -187,7 +187,7 @@ Memory allocation (code space): 35 0 30 Bra 4 22 CBra 1 10 a - 12 8 Once + 12 8 SBra 16 4 Recurse 20 8 KetRmax 24 b @@ -759,18 +759,14 @@ Memory allocation (code space): 12 "(?1)(?#?'){2}(a)" ------------------------------------------------------------------ - 0 40 Bra - 4 8 Once - 8 28 Recurse - 12 8 Ket - 16 8 Once - 20 28 Recurse - 24 8 Ket - 28 8 CBra 1 - 34 a - 36 8 Ket - 40 40 Ket - 44 End + 0 24 Bra + 4 12 Recurse + 8 12 Recurse + 12 8 CBra 1 + 18 a + 20 8 Ket + 24 24 Ket + 28 End ------------------------------------------------------------------ /.((?2)(?R)|\1|$)()/ @@ -853,21 +849,15 @@ Memory allocation (code space): 12 # Use "expand" to create some very long patterns with nested parentheses, in # order to test workspace overflow. Again, this varies with code unit width, -# and even with it fails in two modes, the error offset differs. It also varies +# and even when it fails in two modes, the error offset differs. It also varies # with link size - hence multiple tests with different values. -/(?'ABC'\[[bar](]{105}*THEN:\[A]{255}\[)]{106}/expand,-fullbincode - -/(?'ABC'\[[bar](]{106}*THEN:\[A]{255}\[)]{107}/expand,-fullbincode - -/(?'ABC'\[[bar](]{159}*THEN:\[A]{255}\[)]{160}/expand,-fullbincode -Failed: error 186 at offset 936: regular expression is too complicated +/(?'ABC'\[[bar](]{792}*THEN:\[A]{255}\[)]{793}/expand,-fullbincode,parens_nest_limit=1000 -/(?'ABC'\[[bar](]{199}*THEN:\[A]{255}\[)]{200}/expand,-fullbincode -Failed: error 186 at offset 936: regular expression is too complicated +/(?'ABC'\[[bar](]{793}*THEN:\[A]{255}\[)]{794}/expand,-fullbincode,parens_nest_limit=1000 -/(?'ABC'\[[bar](]{299}*THEN:\[A]{255}\[)]{300}/expand,-fullbincode -Failed: error 186 at offset 936: regular expression is too complicated +/(?'ABC'\[[bar](]{1793}*THEN:\[A]{255}\[)]{1794}/expand,-fullbincode,parens_nest_limit=2000 +Failed: error 186 at offset 12820: regular expression is too complicated /(?(1)(?1)){8,}+()/debug ------------------------------------------------------------------ diff --git a/testdata/testoutput8-8-4 b/testdata/testoutput8-8-4 index 8ebfad5..db04971 100644 --- a/testdata/testoutput8-8-4 +++ b/testdata/testoutput8-8-4 @@ -187,7 +187,7 @@ Memory allocation (code space): 42 0 36 Bra 5 26 CBra 1 12 a - 14 10 Once + 14 10 SBra 19 5 Recurse 24 10 KetRmax 29 b @@ -759,18 +759,14 @@ Memory allocation (code space): 14 "(?1)(?#?'){2}(a)" ------------------------------------------------------------------ - 0 49 Bra - 5 10 Once - 10 35 Recurse - 15 10 Ket - 20 10 Once - 25 35 Recurse - 30 10 Ket - 35 9 CBra 1 - 42 a - 44 9 Ket - 49 49 Ket - 54 End + 0 29 Bra + 5 15 Recurse + 10 15 Recurse + 15 9 CBra 1 + 22 a + 24 9 Ket + 29 29 Ket + 34 End ------------------------------------------------------------------ /.((?2)(?R)|\1|$)()/ @@ -853,19 +849,15 @@ Memory allocation (code space): 14 # Use "expand" to create some very long patterns with nested parentheses, in # order to test workspace overflow. Again, this varies with code unit width, -# and even with it fails in two modes, the error offset differs. It also varies +# and even when it fails in two modes, the error offset differs. It also varies # with link size - hence multiple tests with different values. -/(?'ABC'\[[bar](]{105}*THEN:\[A]{255}\[)]{106}/expand,-fullbincode - -/(?'ABC'\[[bar](]{106}*THEN:\[A]{255}\[)]{107}/expand,-fullbincode - -/(?'ABC'\[[bar](]{159}*THEN:\[A]{255}\[)]{160}/expand,-fullbincode +/(?'ABC'\[[bar](]{792}*THEN:\[A]{255}\[)]{793}/expand,-fullbincode,parens_nest_limit=1000 -/(?'ABC'\[[bar](]{199}*THEN:\[A]{255}\[)]{200}/expand,-fullbincode +/(?'ABC'\[[bar](]{793}*THEN:\[A]{255}\[)]{794}/expand,-fullbincode,parens_nest_limit=1000 -/(?'ABC'\[[bar](]{299}*THEN:\[A]{255}\[)]{300}/expand,-fullbincode -Failed: error 186 at offset 1224: regular expression is too complicated +/(?'ABC'\[[bar](]{1793}*THEN:\[A]{255}\[)]{1794}/expand,-fullbincode,parens_nest_limit=2000 +Failed: error 186 at offset 12820: regular expression is too complicated /(?(1)(?1)){8,}+()/debug ------------------------------------------------------------------ diff --git a/testdata/testoutput9 b/testdata/testoutput9 index 750a7e0..6b014e5 100644 --- a/testdata/testoutput9 +++ b/testdata/testoutput9 @@ -307,14 +307,14 @@ Subject length lower bound = 1 ------------------------------------------------------------------ /\777/I -Failed: error 151 at offset 3: octal value is greater than \377 in 8-bit non-UTF-8 mode +Failed: error 151 at offset 4: octal value is greater than \377 in 8-bit non-UTF-8 mode /(*:0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF)XX/mark Failed: error 176 at offset 259: name is too long in (*MARK), (*PRUNE), (*SKIP), or (*THEN) XX /(*:0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF)XX/mark,alt_verbnames -Failed: error 176 at offset 258: name is too long in (*MARK), (*PRUNE), (*SKIP), or (*THEN) +Failed: error 176 at offset 259: name is too long in (*MARK), (*PRUNE), (*SKIP), or (*THEN) XX /(*:0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDE)XX/mark @@ -328,10 +328,10 @@ MK: 0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789AB MK: 0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDE /\u0100/alt_bsux,allow_empty_class,match_unset_backref,dupnames -Failed: error 177 at offset 5: character code point value in \u.... sequence is too large +Failed: error 177 at offset 6: character code point value in \u.... sequence is too large /[\u0100-\u0200]/alt_bsux,allow_empty_class,match_unset_backref,dupnames -Failed: error 177 at offset 6: character code point value in \u.... sequence is too large +Failed: error 177 at offset 7: character code point value in \u.... sequence is too large /[^\x00-a]{12,}[^b-\xff]*/B ------------------------------------------------------------------ @@ -364,4 +364,7 @@ Failed: error 177 at offset 6: character code point value in \u.... sequence is /(*MARK:a\x{100}b)z/alt_verbnames Failed: error 134 at offset 14: character code point value in \x{} or \o{} is too large +/(*:*++++++++++++''''''''''''''''''''+''+++'+++x+++++++++++++++++++++++++++++++++++(++++++++++++++++++++:++++++%++:''''''''''''''''''''''''+++++++++++++++++++++++++++++++++++++++++++++++++++++-++++++++k+++++++''''+++'+++++++++++++++++++++++''''++++++++++++':ƿ)/ +Failed: error 176 at offset 259: name is too long in (*MARK), (*PRUNE), (*SKIP), or (*THEN) + # End of testinput9 |