diff options
Diffstat (limited to 'testdata/testinput5')
-rw-r--r-- | testdata/testinput5 | 778 |
1 files changed, 561 insertions, 217 deletions
diff --git a/testdata/testinput5 b/testdata/testinput5 index 2e13a7c..0366136 100644 --- a/testdata/testinput5 +++ b/testdata/testinput5 @@ -3,17 +3,21 @@ # results in 8-bit, 16-bit, and 32-bit modes are excluded (see tests 10 and # 12). +#newline_default lf any anycrlf + # PCRE2 and Perl disagree about the characteristics of certain Unicode -# characters. For example, 061C is considered by Perl to be Arabic, though -# is it not listed as such in the Unicode Scripts.txt file, and 2066-2069 are -# graphic and printable according to Perl, though they are actually "isolate" -# control characters. That is why the following tests are here rather than in -# test 4. +# characters. For example, 061C was considered by Perl to be Arabic, though +# it was not listed as such in the Unicode Scripts.txt file for Unicode 8. +# However, it *is* in that file for Unicode 10, but when I came to re-check, +# Perl had changed in the meantime, with 5.026 not recognizing it as Arabic. + +# 2066-2069 are graphic and printable according to Perl, though they are +# actually "isolate" control characters. That is why the following tests are +# here rather than in test 4. /^[\p{Arabic}]/utf -\= Expect no match \x{061c} - + /^[[:graph:]]+$/utf,ucp \= Expect no match \x{61c} @@ -37,14 +41,14 @@ /^[[:^print:]]+$/utf,ucp \x{09}\x{1D}\x{85}\x{61c}\x{2028}\x{2029}\x{2065}\x{2066}\x{2067} \x{2068}\x{2069} - + # Perl does not consider U+180e to be a space character. It is true that it # does not appear in the Unicode PropList.txt file as such, but in many other # sources it is listed as a space, and has been treated as such in PCRE for -# a long time. +# a long time. /^>[[:blank:]]*/utf,ucp - >\x{20}\x{a0}\x{1680}\x{180e}\x{2000}\x{202f}\x{9}\x{b}\x{2028} + >\x{20}\x{a0}\x{1680}\x{180e}\x{2000}\x{202f}\x{9}\x{b}\x{2028} /^A\s+Z/utf,ucp A\x{85}\x{180e}\x{2005}Z @@ -52,7 +56,7 @@ /^A[\s]+Z/utf,ucp A\x{2005}Z A\x{85}\x{2005}Z - + /^[[:graph:]]+$/utf,ucp \= Expect no match \x{180e} @@ -104,7 +108,7 @@ /\x{0041}\x{2262}\x{0391}\x{002e}/IB,utf \x{0041}\x{2262}\x{0391}\x{002e} - + /.{3,5}X/IB,utf \x{212ab}\x{212ab}\x{212ab}\x{861}X @@ -116,23 +120,16 @@ \= Expect no match c \x{ff} - \x{100} + \x{100} -/^[^ab]/IB,utf - c - \x{ff} - \x{100} -\= Expect no match - aaa - /\x{100}*(\d+|"(?1)")/utf 1234 - "1234" + "1234" \x{100}1234 - "\x{100}1234" - \x{100}\x{100}12ab - \x{100}\x{100}"12" -\= Expect no match + "\x{100}1234" + \x{100}\x{100}12ab + \x{100}\x{100}"12" +\= Expect no match \x{100}\x{100}abcd /\x{100}*/IB,utf @@ -148,7 +145,7 @@ \x{104} \= Expect no match \x{105} - \x{ff} + \x{ff} /[\xFF]/IB >\xff< @@ -158,18 +155,18 @@ /[Ä-Ü]/utf Ö # Matches without Study \x{d6} - + /[Ä-Ü]/utf Ö <-- Same with Study \x{d6} - + /[\x{c4}-\x{dc}]/utf Ö # Matches without Study - \x{d6} + \x{d6} /[\x{c4}-\x{dc}]/utf Ö <-- Same with Study - \x{d6} + \x{d6} /[^\x{100}]abc(xyz(?1))/IB,utf @@ -183,19 +180,22 @@ /\W/utf A.B - A\x{100}B - + A\x{100}B + /\w/utf - \x{100}X + \x{100}X -/^\ሴ/IB,utf +# Use no_start_optimize because the first code unit is different in 8-bit from +# the wider modes. + +/^\ሴ/IB,utf,no_start_optimize /()()()()()()()()()() ()()()()()()()()()() ()()()()()()()()()() ()()()()()()()()()() A (x) (?41) B/x,utf - AxxB + AxxB /^[\x{100}\E-\Q\E\x{150}]/B,utf @@ -213,11 +213,11 @@ a\r\nb a\x0bb a\x0cb - a\x{85}b - a\x{2028}b - a\x{2029}b + a\x{85}b + a\x{2028}b + a\x{2029}b \= Expect no match - a\n\rb + a\n\rb /^a\R*b/bsr=unicode,utf ab @@ -226,9 +226,9 @@ a\r\nb a\x0bb a\x0c\x{2028}\x{2029}b - a\x{85}b - a\n\rb - a\n\r\x{85}\x0cb + a\x{85}b + a\n\rb + a\n\r\x{85}\x0cb /^a\R+b/bsr=unicode,utf a\nb @@ -236,20 +236,20 @@ a\r\nb a\x0bb a\x0c\x{2028}\x{2029}b - a\x{85}b - a\n\rb - a\n\r\x{85}\x0cb + a\x{85}b + a\n\rb + a\n\r\x{85}\x0cb \= Expect no match - ab + ab /^a\R{1,3}b/bsr=unicode,utf a\nb a\n\rb a\n\r\x{85}b - a\r\n\r\nb - a\r\n\r\n\r\nb + a\r\n\r\nb + a\r\n\r\n\r\nb a\n\r\n\rb - a\n\n\r\nb + a\n\n\r\nb \= Expect no match a\n\n\n\rb a\r @@ -258,28 +258,28 @@ X X\x0a X\x09X\x0b \= Expect no match - \x{a0} X\x0a - + \x{a0} X\x0a + /\H*\h+\V?\v{3,4}/utf \x09\x20\x{a0}X\x0a\x0b\x0c\x0d\x0a \x09\x20\x{a0}\x0a\x0b\x0c\x0d\x0a \x09\x20\x{a0}\x0a\x0b\x0c -\= Expect no match +\= Expect no match \x09\x20\x{a0}\x0a\x0b - + /\H\h\V\v/utf \x{3001}\x{3000}\x{2030}\x{2028} X\x{180e}X\x{85} \= Expect no match - \x{2009} X\x0a - + \x{2009} X\x0a + /\H*\h+\V?\v{3,4}/utf \x{1680}\x{180e}\x{2007}X\x{2028}\x{2029}\x0c\x0d\x0a \x09\x{205f}\x{a0}\x0a\x{2029}\x0c\x{2028}\x0a \x09\x20\x{202f}\x0a\x0b\x0c -\= Expect no match +\= Expect no match \x09\x{200a}\x{a0}\x{2028}\x0b - + /[\h]/B,utf >\x{1680} @@ -293,57 +293,57 @@ /[\V]/B,utf /.*$/newline=any,utf - \x{1ec5} - + \x{1ec5} + /a\Rb/I,bsr=anycrlf,utf a\rb a\nb a\r\nb \= Expect no match a\x{85}b - a\x0bb + a\x0bb /a\Rb/I,bsr=unicode,utf a\rb a\nb a\r\nb a\x{85}b - a\x0bb - + a\x0bb + /a\R?b/I,bsr=anycrlf,utf a\rb a\nb a\r\nb \= Expect no match a\x{85}b - a\x0bb + a\x0bb /a\R?b/I,bsr=unicode,utf a\rb a\nb a\r\nb a\x{85}b - a\x0bb - + a\x0bb + /.*a.*=.b.*/utf,newline=any QQQ\x{2029}ABCaXYZ=!bPQR \= Expect no match a\x{2029}b - \x61\xe2\x80\xa9\x62 + \x61\xe2\x80\xa9\x62 /[[:a\x{100}b:]]/utf /a[^]b/utf,alt_bsux,allow_empty_class,match_unset_backref a\x{1234}b - a\nb + a\nb \= Expect no match - ab - + ab + /a[^]+b/utf,alt_bsux,allow_empty_class,match_unset_backref aXb - a\nX\nX\x{1234}b + a\nX\nX\x{1234}b \= Expect no match - ab + ab /(\x{de})\1/ \x{de}\x{de} @@ -357,42 +357,42 @@ Xaa\=ps Xaaa\=ps Xaaaa\=ps - + /Xa{2,4}?b/utf X\=ps Xa\=ps Xaa\=ps Xaaa\=ps Xaaaa\=ps - + /Xa{2,4}+b/utf X\=ps Xa\=ps Xaa\=ps Xaaa\=ps Xaaaa\=ps - + /X\x{123}{2,4}b/utf X\=ps X\x{123}\=ps X\x{123}\x{123}\=ps X\x{123}\x{123}\x{123}\=ps X\x{123}\x{123}\x{123}\x{123}\=ps - + /X\x{123}{2,4}?b/utf X\=ps X\x{123}\=ps X\x{123}\x{123}\=ps X\x{123}\x{123}\x{123}\=ps X\x{123}\x{123}\x{123}\x{123}\=ps - + /X\x{123}{2,4}+b/utf X\=ps X\x{123}\=ps X\x{123}\x{123}\=ps X\x{123}\x{123}\x{123}\=ps X\x{123}\x{123}\x{123}\x{123}\=ps - + /X\x{123}{2,4}b/utf \= Expect no match Xx\=ps @@ -400,7 +400,7 @@ X\x{123}\x{123}x\=ps X\x{123}\x{123}\x{123}x\=ps X\x{123}\x{123}\x{123}\x{123}x\=ps - + /X\x{123}{2,4}?b/utf \= Expect no match Xx\=ps @@ -408,7 +408,7 @@ X\x{123}\x{123}x\=ps X\x{123}\x{123}\x{123}x\=ps X\x{123}\x{123}\x{123}\x{123}x\=ps - + /X\x{123}{2,4}+b/utf \= Expect no match Xx\=ps @@ -416,21 +416,21 @@ X\x{123}\x{123}x\=ps X\x{123}\x{123}\x{123}x\=ps X\x{123}\x{123}\x{123}\x{123}x\=ps - + /X\d{2,4}b/utf X\=ps X3\=ps X33\=ps X333\=ps X3333\=ps - + /X\d{2,4}?b/utf X\=ps X3\=ps X33\=ps X333\=ps X3333\=ps - + /X\d{2,4}+b/utf X\=ps X3\=ps @@ -444,14 +444,14 @@ Xaa\=ps Xaaa\=ps Xaaaa\=ps - + /X\D{2,4}?b/utf X\=ps Xa\=ps Xaa\=ps Xaaa\=ps Xaaaa\=ps - + /X\D{2,4}+b/utf X\=ps Xa\=ps @@ -465,14 +465,14 @@ X\x{123}\x{123}\=ps X\x{123}\x{123}\x{123}\=ps X\x{123}\x{123}\x{123}\x{123}\=ps - + /X\D{2,4}?b/utf X\=ps X\x{123}\=ps X\x{123}\x{123}\=ps X\x{123}\x{123}\x{123}\=ps X\x{123}\x{123}\x{123}\x{123}\=ps - + /X\D{2,4}+b/utf X\=ps X\x{123}\=ps @@ -486,14 +486,14 @@ Xaa\=ps Xaaa\=ps Xaaaa\=ps - + /X[abc]{2,4}?b/utf X\=ps Xa\=ps Xaa\=ps Xaaa\=ps Xaaaa\=ps - + /X[abc]{2,4}+b/utf X\=ps Xa\=ps @@ -507,14 +507,14 @@ X\x{123}\x{123}\=ps X\x{123}\x{123}\x{123}\=ps X\x{123}\x{123}\x{123}\x{123}\=ps - + /X[abc\x{123}]{2,4}?b/utf X\=ps X\x{123}\=ps X\x{123}\x{123}\=ps X\x{123}\x{123}\x{123}\=ps X\x{123}\x{123}\x{123}\x{123}\=ps - + /X[abc\x{123}]{2,4}+b/utf X\=ps X\x{123}\=ps @@ -528,14 +528,14 @@ Xzz\=ps Xzzz\=ps Xzzzz\=ps - + /X[^a]{2,4}?b/utf X\=ps Xz\=ps Xzz\=ps Xzzz\=ps Xzzzz\=ps - + /X[^a]{2,4}+b/utf X\=ps Xz\=ps @@ -549,14 +549,14 @@ X\x{123}\x{123}\=ps X\x{123}\x{123}\x{123}\=ps X\x{123}\x{123}\x{123}\x{123}\=ps - + /X[^a]{2,4}?b/utf X\=ps X\x{123}\=ps X\x{123}\x{123}\=ps X\x{123}\x{123}\x{123}\=ps X\x{123}\x{123}\x{123}\x{123}\=ps - + /X[^a]{2,4}+b/utf X\=ps X\x{123}\=ps @@ -570,14 +570,14 @@ YXYY\=ps YXYYY\=ps YXYYYY\=ps - + /(Y)X\1{2,4}?b/utf YX\=ps YXY\=ps YXYY\=ps YXYYY\=ps YXYYYY\=ps - + /(Y)X\1{2,4}+b/utf YX\=ps YXY\=ps @@ -591,14 +591,14 @@ \x{123}X\x{123}\x{123}\=ps \x{123}X\x{123}\x{123}\x{123}\=ps \x{123}X\x{123}\x{123}\x{123}\x{123}\=ps - + /(\x{123})X\1{2,4}?b/utf \x{123}X\=ps \x{123}X\x{123}\=ps \x{123}X\x{123}\x{123}\=ps \x{123}X\x{123}\x{123}\x{123}\=ps \x{123}X\x{123}\x{123}\x{123}\x{123}\=ps - + /(\x{123})X\1{2,4}+b/utf \x{123}X\=ps \x{123}X\x{123}\=ps @@ -640,13 +640,13 @@ AB\x{a0}xxx\x{85}XYZ /\S \S/utf,tables=2 - \x{a2} \x{84} + \x{a2} \x{84} 'A#хц'Bx,newline=any,utf 'A#хц PQ'Bx,newline=any,utf - + /a+#хaa z#XX?/Bx,newline=any,utf @@ -661,13 +661,13 @@ /(\R*)(.)/s,utf \r\n - \r\r\n\n\r - \r\r\n\n\r\n + \r\r\n\n\r + \r\r\n\n\r\n /(\R)*(.)/s,utf \r\n - \r\r\n\n\r - \r\r\n\n\r\n + \r\r\n\n\r + \r\r\n\n\r\n /[^\x{1234}]+/Ii,utf @@ -688,7 +688,7 @@ /f.*/s,utf for\=ph - + /\x{d7ff}\x{e000}/utf /\x{d800}/utf @@ -777,7 +777,7 @@ /./utf,newline=crlf \r\=ps \r\=ph - + /.{2,3}/utf,newline=crlf \r\=ps \r\=ph @@ -835,9 +835,9 @@ /[\p{Nd}+-]+/IB,utf 1234 12-34 - 12+\x{661}-34 + 12+\x{661}-34 \= Expect no match - abcd + abcd /(?:[\PPa*]*){8,}/ @@ -884,7 +884,7 @@ /\p{Zl}{2,3}+/B,utf \x{2028}\x{2028}\x{2028} - + /\p{Zl}/B,utf /\p{Lu}{3}+/B,utf @@ -904,8 +904,8 @@ /^\p{Cs}/utf \x{dfff}\=no_utf_check \= Expect no match - \x{09f} - + \x{09f} + /^\p{Mn}/utf \x{1a1b} @@ -923,60 +923,60 @@ \= Expect no match X \x{2c2} - + /^\p{Zs}/utf \ \ \x{a0} \x{1680} \x{2000} - \x{2001} + \x{2001} \= Expect no match \x{2028} - \x{200d} - + \x{200d} + # These are here because Perl has problems with the negative versions of the # properties and has changed how it behaves for caseless matching. - + /\p{^Lu}/i,utf 1234 \= Expect no match - ABC + ABC /\P{Lu}/i,utf 1234 \= Expect no match - ABC + ABC /\p{Ll}/i,utf a Az \= Expect no match - ABC + ABC /\p{Lu}/i,utf A - a\x{10a0}B -\= Expect no match + a\x{10a0}B +\= Expect no match a - \x{1d00} + \x{1d00} /\p{Lu}/i,utf A aZ \= Expect no match - abc + abc /[\x{c0}\x{391}]/i,utf \x{c0} - \x{e0} + \x{e0} # The next two are special cases where the lengths of the different cases of # the same character differ. The first went wrong with heap frame storage; the -# second was broken in all cases. +# second was broken in all cases. /^\x{023a}+?(\x{0130}+)/i,utf \x{023a}\x{2c65}\x{0130} - + /^\x{023a}+([^X])/i,utf \x{023a}\x{2c65}X @@ -994,71 +994,71 @@ /^\x{c0}$/i,utf \x{c0} - \x{e0} + \x{e0} /^\x{e0}$/i,utf \x{c0} - \x{e0} + \x{e0} # The next two should be Perl-compatible, but it fails to match \x{e0}. PCRE # will match it only with UCP support, because without that it has no notion -# of case for anything other than the ASCII letters. +# of case for anything other than the ASCII letters. /((?i)[\x{c0}])/utf \x{c0} - \x{e0} + \x{e0} /(?i:[\x{c0}])/utf \x{c0} - \x{e0} + \x{e0} -# These are PCRE's extra properties to help with Unicodizing \d etc. +# These are PCRE's extra properties to help with Unicodizing \d etc. /^\p{Xan}/utf ABCD 1234 \x{6ca} \x{a6c} - \x{10a7} + \x{10a7} \= Expect no match - _ABC + _ABC /^\p{Xan}+/utf ABCD1234\x{6ca}\x{a6c}\x{10a7}_ \= Expect no match - _ABC + _ABC /^\p{Xan}+?/utf \x{6ca}\x{a6c}\x{10a7}_ /^\p{Xan}*/utf ABCD1234\x{6ca}\x{a6c}\x{10a7}_ - + /^\p{Xan}{2,9}/utf ABCD1234\x{6ca}\x{a6c}\x{10a7}_ - + /^\p{Xan}{2,9}?/utf \x{6ca}\x{a6c}\x{10a7}_ - + /^[\p{Xan}]/utf ABCD1234_ 1234abcd_ \x{6ca} \x{a6c} - \x{10a7} + \x{10a7} \= Expect no match - _ABC - + _ABC + /^[\p{Xan}]+/utf ABCD1234\x{6ca}\x{a6c}\x{10a7}_ \= Expect no match - _ABC + _ABC /^>\p{Xsp}/utf >\x{1680}\x{2028}\x{0b} - >\x{a0} + >\x{a0} \= Expect no match - \x{0b} + \x{0b} /^>\p{Xsp}+/utf > \x{09}\x{0a}\x{0c}\x{0d}\x{a0}\x{1680}\x{2028}\x{0b} @@ -1068,24 +1068,24 @@ /^>\p{Xsp}*/utf > \x{09}\x{0a}\x{0c}\x{0d}\x{a0}\x{1680}\x{2028}\x{0b} - + /^>\p{Xsp}{2,9}/utf > \x{09}\x{0a}\x{0c}\x{0d}\x{a0}\x{1680}\x{2028}\x{0b} - + /^>\p{Xsp}{2,9}?/utf > \x{09}\x{0a}\x{0c}\x{0d}\x{a0}\x{1680}\x{2028}\x{0b} - + /^>[\p{Xsp}]/utf >\x{2028}\x{0b} - + /^>[\p{Xsp}]+/utf > \x{09}\x{0a}\x{0c}\x{0d}\x{a0}\x{1680}\x{2028}\x{0b} /^>\p{Xps}/utf >\x{1680}\x{2028}\x{0b} - >\x{a0} + >\x{a0} \= Expect no match - \x{0b} + \x{0b} /^>\p{Xps}+/utf > \x{09}\x{0a}\x{0c}\x{0d}\x{a0}\x{1680}\x{2028}\x{0b} @@ -1095,16 +1095,16 @@ /^>\p{Xps}*/utf > \x{09}\x{0a}\x{0c}\x{0d}\x{a0}\x{1680}\x{2028}\x{0b} - + /^>\p{Xps}{2,9}/utf > \x{09}\x{0a}\x{0c}\x{0d}\x{a0}\x{1680}\x{2028}\x{0b} - + /^>\p{Xps}{2,9}?/utf > \x{09}\x{0a}\x{0c}\x{0d}\x{a0}\x{1680}\x{2028}\x{0b} - + /^>[\p{Xps}]/utf >\x{2028}\x{0b} - + /^>[\p{Xps}]+/utf > \x{09}\x{0a}\x{0c}\x{0d}\x{a0}\x{1680}\x{2028}\x{0b} @@ -1114,9 +1114,9 @@ \x{6ca} \x{a6c} \x{10a7} - _ABC + _ABC \= Expect no match - [] + [] /^\p{Xwd}+/utf ABCD1234\x{6ca}\x{a6c}\x{10a7}_ @@ -1126,32 +1126,32 @@ /^\p{Xwd}*/utf ABCD1234\x{6ca}\x{a6c}\x{10a7}_ - + /^\p{Xwd}{2,9}/utf A_B12\x{6ca}\x{a6c}\x{10a7} - + /^\p{Xwd}{2,9}?/utf \x{6ca}\x{a6c}\x{10a7}_ - + /^[\p{Xwd}]/utf ABCD1234_ 1234abcd_ \x{6ca} \x{a6c} - \x{10a7} - _ABC + \x{10a7} + _ABC \= Expect no match - [] - + [] + /^[\p{Xwd}]+/utf ABCD1234\x{6ca}\x{a6c}\x{10a7}_ -# A check not in UTF-8 mode +# A check not in UTF-8 mode /^[\p{Xwd}]+/ ABCD1234_ - -# Some negative checks + +# Some negative checks /^[\P{Xwd}]+/utf !.+\x{019}\x{35a}AB @@ -1221,53 +1221,53 @@ /[[:xdigit:]]/B,ucp -# Unicode properties for \b abd \B +# Unicode properties for \b abd \B /\b...\B/utf,ucp abc_ - \x{37e}abc\x{376} - \x{37e}\x{376}\x{371}\x{393}\x{394} - !\x{c0}++\x{c1}\x{c2} - !\x{c0}+++++ + \x{37e}abc\x{376} + \x{37e}\x{376}\x{371}\x{393}\x{394} + !\x{c0}++\x{c1}\x{c2} + !\x{c0}+++++ -# Without PCRE_UCP, non-ASCII always fail, even if < 256 +# Without PCRE_UCP, non-ASCII always fail, even if < 256 /\b...\B/utf abc_ -\= Expect no match - \x{37e}abc\x{376} - \x{37e}\x{376}\x{371}\x{393}\x{394} - !\x{c0}++\x{c1}\x{c2} - !\x{c0}+++++ +\= Expect no match + \x{37e}abc\x{376} + \x{37e}\x{376}\x{371}\x{393}\x{394} + !\x{c0}++\x{c1}\x{c2} + !\x{c0}+++++ -# With PCRE_UCP, non-UTF8 chars that are < 256 still check properties +# With PCRE_UCP, non-UTF8 chars that are < 256 still check properties /\b...\B/ucp abc_ - !\x{c0}++\x{c1}\x{c2} - !\x{c0}+++++ + !\x{c0}++\x{c1}\x{c2} + !\x{c0}+++++ -# Some of these are silly, but they check various combinations +# Some of these are silly, but they check various combinations /[[:^alpha:][:^cntrl:]]+/B,utf,ucp 123 - abc + abc /[[:^cntrl:][:^alpha:]]+/B,utf,ucp 123 - abc + abc /[[:alpha:]]+/B,utf,ucp abc /[[:^alpha:]\S]+/B,utf,ucp 123 - abc + abc /[^\d]+/B,utf,ucp abc123 abc\x{123} - \x{660}abc + \x{660}abc /\p{Lu}+9\p{Lu}+B\p{Lu}+b/B @@ -1287,7 +1287,7 @@ /A+\p{N}A+\dB+\p{N}*B+\d*/B,ucp -# These behaved oddly in Perl, so they are kept in this test +# These behaved oddly in Perl, so they are kept in this test /(\x{23a}\x{23a}\x{23a})?\1/i,utf \= Expect no match @@ -1319,43 +1319,43 @@ /(\x{2c65}\x{2c65})\1/i,utf \x{2c65}\x{2c65}\x{23a}\x{23a} - + /(ⱥⱥ)\1/i,utf - ⱥⱥȺȺ - + ⱥⱥȺȺ + /(\x{23a}\x{23a}\x{23a})\1Y/i,utf X\x{23a}\x{23a}\x{23a}\x{2c65}\x{2c65}\x{2c65}YZ /(\x{2c65}\x{2c65})\1Y/i,utf X\x{2c65}\x{2c65}\x{23a}\x{23a}YZ -# These scripts weren't yet in Perl when I added Unicode 6.0.0 to PCRE +# These scripts weren't yet in Perl when I added Unicode 6.0.0 to PCRE /^[\p{Batak}]/utf \x{1bc0} \x{1bff} \= Expect no match \x{1bf4} - + /^[\p{Brahmi}]/utf \x{11000} \x{1106f} \= Expect no match \x{1104e} - + /^[\p{Mandaic}]/utf \x{840} \x{85e} \= Expect no match \x{85c} - \x{85d} + \x{85d} /(\X*)(.)/s,utf A\x{300} /^S(\X*)e(\X*)$/utf Stéréo - + /^\X/utf ́réo @@ -1387,8 +1387,8 @@ aa\=ps aa\=ph aba\=ps - -# These Unicode 6.1.0 scripts are not known to Perl. + +# These Unicode 6.1.0 scripts are not known to Perl. /\p{Chakma}\d/utf,ucp \x{11100}\x{1113c} @@ -1403,7 +1403,7 @@ A\x{300}\x{301}\=ph A\x{301}\=ps A\x{301}\=ph - + /^\X{2,3}/utf A\=ps A\=ph @@ -1419,7 +1419,7 @@ AA\=ph A\x{300}\x{301}A\x{300}\x{301}\=ps A\x{300}\x{301}A\x{300}\x{301}\=ph - + /^\X+/utf AA\=ps AA\=ph @@ -1486,8 +1486,8 @@ /is{2}t/i,utf \= Expect no match iskt - -# This property is a PCRE special + +# This property is a PCRE special /^\p{Xuc}/utf $abc @@ -1495,7 +1495,7 @@ `abc \x{1234}abc \= Expect no match - abc + abc /^\p{Xuc}+/utf $@`\x{a0}\x{1234}\x{e000}** @@ -1552,8 +1552,8 @@ @abc `abc \x{1234}abc - -# Some auto-possessification tests + +# Some auto-possessification tests /\pN+\z/B @@ -1603,7 +1603,7 @@ /\p{Xan}+\p{Nd} \P{Xan}+\p{Nd} \p{Xan}+\P{Nd} \P{Xan}+\P{Nd}/Bx,ucp -# End auto-possessification tests +# End auto-possessification tests /\w+/B,utf,ucp,auto_callout abcd @@ -1623,7 +1623,7 @@ /\d+\s{0,5}=\s*\S?=\w{0,4}\W*/B,utf,ucp /[RST]+/Bi,utf,ucp - + /[R-T]+/Bi,utf,ucp /[Q-U]+/Bi,utf,ucp @@ -1636,7 +1636,7 @@ /\x{100}\x{200}\K\x{300}/utf,startchar \x{100}\x{200}\x{300} - + # Test UTF characters in a substitution /ábc/utf,replace=XሴZ @@ -1675,15 +1675,6 @@ /((?<digit>\d)|(?<letter>\p{L}))/g,substitute_extended,replace=<${digit:+digit; :not digit; }${letter:+letter:not a letter}> ab12cde -/[\W\p{Any}]/B - abc - 123 - -/[\W\pL]/B - abc -\= Expect no match - 123 - /(*UCP)(*UTF)[[:>:]]X/B /abc/utf,replace=xyz @@ -1704,7 +1695,7 @@ /[^[:ascii:]\W]/utf,ucp,bincode \x{de} \x{200} -\= Expect no match +\= Expect no match \x{300} \x{37e} @@ -1716,4 +1707,357 @@ /(*UTF)C\x09((?<!'(?x)!*H? #\xcc\x9a[^$]/ -# End of testinput5 +/[\D]/utf + \x{1d7cf} + +/[\D\P{Nd}]/utf + \x{1d7cf} + +/[^\D]/utf + a9b +\= Expect no match + \x{1d7cf} + +/[^\D\P{Nd}]/utf + a9b + \x{1d7cf} +\= Expect no match + \x{10000} + +# Hex uses pattern length, not zero-terminated. This tests for overrunning +# the given length of a pattern. + +/'(*UTF)'/hex + +/'#('/hex,extended,utf + +/a(?<=A\XB)/utf + +/ab(?<=A\RB)/utf + +/../utf,auto_callout + \n\x{123}\x{123}\x{123}\x{123} + +# This tests processing wide characters in extended mode. + +/XȀ/x,utf + +# These three test a bug fix that was not clearing up after a locale setting +# when the test or a subsequent one matched a wide character. + +//locale=C + +/[\P{Yi}]/utf +\x{2f000} + +/[\P{Yi}]/utf,locale=C +\x{2f000} + +/^(?<!(?=))/B,utf + +# Horizontal and vertical space lists ignore caseless + +/[\HH]/Bi,utf + +/[^\HH]/Bi,utf + +//g,utf + \=zero_terminate + +/^(?1)\p{Nd}{3}(a)/ + a123a + +/\p{Nd}{0,3}[\pL](*:abc)(?C1)xxx/callout_info + +# --------------------------------------------------------------------------- + +# A bunch of tests that hit lines of code that others do not (at least when +# these were created). + +/^[^a]{3,}?x/i,utf,no_start_optimize,no_auto_possess +\= Expect no match + bbb + cc + +/^[ac]{3,}?x/i,utf,no_start_optimize,no_auto_possess +\= Expect no match + aaa\x{100} + +/^X\X/no_start_optimize,no_auto_possess +\= Expect no match + X + +/^X\p{L&}+?/no_start_optimize,no_auto_possess +\= Expect no match + X + +/^X\p{L}+?/no_start_optimize,no_auto_possess +\= Expect no match + X + +/^X\p{Lu}+?/no_start_optimize,no_auto_possess +\= Expect no match + X + +/^X\p{Arabic}+?/no_start_optimize,no_auto_possess +\= Expect no match + X + +/^X\p{Xan}+?/ucp,no_start_optimize,no_auto_possess +\= Expect no match + X + +/^X\s+?/ucp,no_start_optimize,no_auto_possess +\= Expect no match + X + XX + +/^X\S+?/ucp,no_start_optimize,no_auto_possess + XX +\= Expect no match + X + +/^X\w+?/ucp,no_start_optimize,no_auto_possess +\= Expect no match + X + +/^X[^\x{b5}]+?/i,utf,no_start_optimize,no_auto_possess +\= Expect no match + X + +/^X[\x{b5}]+?/i,utf,no_start_optimize,no_auto_possess +\= Expect no match + X + +/^X\p{Xuc}+?/utf,no_start_optimize,no_auto_possess +\= Expect no match + X + +/^X.+?Z/s,utf,no_start_optimize,no_auto_possess +\= Expect no match + X + +/^X\R+?/utf,no_start_optimize,no_auto_possess +\= Expect no match + X + +/^X\H+?/utf,no_start_optimize,no_auto_possess +\= Expect no match + X + +/^X\V+?/utf,no_start_optimize,no_auto_possess +\= Expect no match + X + +/^X\s+?/utf,no_start_optimize,no_auto_possess +\= Expect no match + X + XX + +/^X\S+?/utf,no_start_optimize,no_auto_possess +\= Expect no match + X + +/^X\p{Any}{1,3}?Z/s,no_start_optimize,no_auto_possess + XYYYZ +\= Expect no match + XY + XYY + XYYY + XYYYYZ + +/^X\p{L&}{1,3}?Z/s,no_start_optimize,no_auto_possess +\= Expect no match + XY + XY! + +/^X\p{L}{1,3}?Z/s,no_start_optimize,no_auto_possess +\= Expect no match + XY + XY! + +/^X\p{Lu}{1,3}?Z/s,no_start_optimize,no_auto_possess +\= Expect no match + XY + XY! + +/^X\P{Han}{1,3}?Z/s,utf,no_start_optimize,no_auto_possess +\= Expect no match + XY + XY! + XY\x{2f00}! + +/^X\p{Xan}{1,3}?Z/s,no_start_optimize,no_auto_possess +\= Expect no match + XY + XY! + +/^X\p{Xsp}{1,3}?Z/s,no_start_optimize,no_auto_possess +\= Expect no match + X\n + X\n! + X\n\n! + +/^X\P{Xsp}{1,3}?Z/s,no_start_optimize,no_auto_possess +\= Expect no match + XYY\n + +/^X\p{Xwd}{1,3}?Z/s,no_start_optimize,no_auto_possess +\= Expect no match + XY + XY! + XYY! + +/^X\x{b5}+?Z/i,utf,no_start_optimize,no_auto_possess +\= Expect no match + X + X\x{b5} + X\x{b5}\x{b5}Y + +/^X\p{Xuc}+?Z/utf,no_start_optimize,no_auto_possess +\= Expect no match + X + X$ + X@@Y + +/(*CRLF)^X.+?Z/utf,no_start_optimize,no_auto_possess +\= Expect partial match + XYY\r\=ph +\= Expect no match + X + +/^X.+?Z/s,utf,no_start_optimize,no_auto_possess +\= Expect no match + X + XYY + +/^X\R+?Z/utf,no_start_optimize,no_auto_possess +\= Expect no match + X\nX + X\n\rX + X\n\r\nX + X\n\n + X\n\x{0c} + +/(*BSR_ANYCRLF)^X\R+?Z/utf,no_start_optimize,no_auto_possess +\= Expect no match + X\nX + X\n\rX + X\n\r\nX + X\n\n + X\n\x{0c} + +/^X\H+?Z/utf,no_start_optimize,no_auto_possess +\= Expect no match + XY\t + XYY + +/^X\h+?Z/utf,no_start_optimize,no_auto_possess +\= Expect no match + X\t\t + X\tY + +/^X\V+?Z/utf,no_start_optimize,no_auto_possess +\= Expect no match + XY\n + XYY + +/^X\v+?Z/utf,no_start_optimize,no_auto_possess +\= Expect no match + X\n\n + X\nY + +/^X\D+?Z/utf,no_start_optimize,no_auto_possess +\= Expect no match + XY9 + XYY + +/^X\d+?Z/utf,no_start_optimize,no_auto_possess +\= Expect no match + X99 + X9Y + +/^X\S+?Z/utf,no_start_optimize,no_auto_possess +\= Expect no match + XY\n + XYY + +/^X\s+?Z/utf,no_start_optimize,no_auto_possess +\= Expect no match + X\n\n + X\nY + +/^X\W+?Z/utf,no_start_optimize,no_auto_possess +\= Expect no match + X.A + X++ + +/^X\p{L&}{1,3}Z/no_start_optimize,no_auto_possess +\= Expect no match + XY + XY! + +/^X\p{L}{1,3}Z/no_start_optimize,no_auto_possess +\= Expect no match + XY + +/^X\p{Xan}{1,3}Z/no_start_optimize,no_auto_possess +\= Expect no match + XY + +/^X\P{Xsp}{1,3}Z/no_start_optimize,no_auto_possess +\= Expect no match + XYY + +/^X\p{Xuc}+Z/utf,no_start_optimize,no_auto_possess +\= Expect no match + X$ + +# ---------------------------------------------------------------------- +# These test the dangerous PCRE2_EXTRA_BAD_ESCAPE_IS_LITERAL option. + +/\x{d800}/B,utf,bad_escape_is_literal + +/\ud800/B,utf,alt_bsux,bad_escape_is_literal + +# ---------------------------------------------------------------------- + +/Aሴ+B/literal,utf,no_utf_check + Aሴ+B + +# These are here because I upgraded to Unicode 10.0.0 before Perl did, so it +# doesn't recognize all these scripts. In time these three tests can be moved +# to test 4. + +/^(\p{Adlam}+)(\p{Bhaiksuki}+)(\p{Marchen}+)(\p{Newa}+)(\p{Osage}+) + (\p{Tangut}+)(\p{Masaram_Gondi}+)(\p{Nushu}+)(\p{Soyombo}+) + (\p{Zanabazar_Square}+)/x,utf + \x{1E900}\x{1E924}\x{1E953}\x{11C00}\x{11C2D}\x{11C3E}\x{11C70}\x{11C77}\x{11CAB}\x{11400}\x{1142F}\x{11455}\x{104B0}\x{104D8}\x{104FB}\x{16FE0}\x{18800}\x{18AF2}\x{11D00}\x{11D3A}\x{11D59}\x{16FE1}\x{1B170}\x{1B2FB}\x{11A50}\x{11A58}\x{11AA2}\x{11A00}\x{11A07}\x{11A47} + +/^\x{1E900}\x{104B0}/i,utf + \x{1E900}\x{104B0} + \x{1E922}\x{104D8} + +/^(?:(\X)(?C))+$/utf + \x{1E900}\x{1E924}\x{1E953}\x{11C00}\x{11C2D}\x{11C3E}\x{11C70}\x{11C77}\x{11CAB}\x{11400}\x{1142F}\x{11455}\x{104B0}\x{104D8}\x{104FB}\x{16FE0}\x{18800}\x{18AF2}\x{11D00}\x{11D3A}\x{11D59}\x{16FE1}\x{1B170}\x{1B2FB}\x{11A50}\x{11A58}\x{11AA2}\x{11A00}\x{11A07}\x{11A47}\=callout_capture,callout_no_where + +# These two are here because JIT is not yet updated. Also, the very first data +# line is handled differently by Perl. + +/^\X/utf + A\x{200d}B A ZWJ + \x{261D}\x{1F3FB}B E_Base E_Modifier + \x{1F466}\x{1F3FF}B E_Base_GAZ E_Modifier + \x{200d}\x{1F3A4}B ZWJ Glue_After_ZWJ + \x{200d}\x{1F469}B ZWJ E_Base_GAZ + \x{1F1E6}\x{1F1E7}B RegionalIndicator RegionalIndicator + \x{261D}\x{E0100}\x{1F3FB}B E_Base Extend E_Modifier + +# Regional indicators + +/^(\X)(\X)/utf,aftertext + \x{1F1E6}\x{1F1E7}\x{1F1E7}B + \x{1F1E6}\x{1F1E7}\x{1F1E7}\x{1F1E6}B + + +# End of testinput5 |