summaryrefslogtreecommitdiff
path: root/testdata/testinput5
diff options
context:
space:
mode:
Diffstat (limited to 'testdata/testinput5')
-rw-r--r--testdata/testinput5778
1 files changed, 561 insertions, 217 deletions
diff --git a/testdata/testinput5 b/testdata/testinput5
index 2e13a7c..0366136 100644
--- a/testdata/testinput5
+++ b/testdata/testinput5
@@ -3,17 +3,21 @@
# results in 8-bit, 16-bit, and 32-bit modes are excluded (see tests 10 and
# 12).
+#newline_default lf any anycrlf
+
# PCRE2 and Perl disagree about the characteristics of certain Unicode
-# characters. For example, 061C is considered by Perl to be Arabic, though
-# is it not listed as such in the Unicode Scripts.txt file, and 2066-2069 are
-# graphic and printable according to Perl, though they are actually "isolate"
-# control characters. That is why the following tests are here rather than in
-# test 4.
+# characters. For example, 061C was considered by Perl to be Arabic, though
+# it was not listed as such in the Unicode Scripts.txt file for Unicode 8.
+# However, it *is* in that file for Unicode 10, but when I came to re-check,
+# Perl had changed in the meantime, with 5.026 not recognizing it as Arabic.
+
+# 2066-2069 are graphic and printable according to Perl, though they are
+# actually "isolate" control characters. That is why the following tests are
+# here rather than in test 4.
/^[\p{Arabic}]/utf
-\= Expect no match
\x{061c}
-
+
/^[[:graph:]]+$/utf,ucp
\= Expect no match
\x{61c}
@@ -37,14 +41,14 @@
/^[[:^print:]]+$/utf,ucp
\x{09}\x{1D}\x{85}\x{61c}\x{2028}\x{2029}\x{2065}\x{2066}\x{2067}
\x{2068}\x{2069}
-
+
# Perl does not consider U+180e to be a space character. It is true that it
# does not appear in the Unicode PropList.txt file as such, but in many other
# sources it is listed as a space, and has been treated as such in PCRE for
-# a long time.
+# a long time.
/^>[[:blank:]]*/utf,ucp
- >\x{20}\x{a0}\x{1680}\x{180e}\x{2000}\x{202f}\x{9}\x{b}\x{2028}
+ >\x{20}\x{a0}\x{1680}\x{180e}\x{2000}\x{202f}\x{9}\x{b}\x{2028}
/^A\s+Z/utf,ucp
A\x{85}\x{180e}\x{2005}Z
@@ -52,7 +56,7 @@
/^A[\s]+Z/utf,ucp
A\x{2005}Z
A\x{85}\x{2005}Z
-
+
/^[[:graph:]]+$/utf,ucp
\= Expect no match
\x{180e}
@@ -104,7 +108,7 @@
/\x{0041}\x{2262}\x{0391}\x{002e}/IB,utf
\x{0041}\x{2262}\x{0391}\x{002e}
-
+
/.{3,5}X/IB,utf
\x{212ab}\x{212ab}\x{212ab}\x{861}X
@@ -116,23 +120,16 @@
\= Expect no match
c
\x{ff}
- \x{100}
+ \x{100}
-/^[^ab]/IB,utf
- c
- \x{ff}
- \x{100}
-\= Expect no match
- aaa
-
/\x{100}*(\d+|"(?1)")/utf
1234
- "1234"
+ "1234"
\x{100}1234
- "\x{100}1234"
- \x{100}\x{100}12ab
- \x{100}\x{100}"12"
-\= Expect no match
+ "\x{100}1234"
+ \x{100}\x{100}12ab
+ \x{100}\x{100}"12"
+\= Expect no match
\x{100}\x{100}abcd
/\x{100}*/IB,utf
@@ -148,7 +145,7 @@
\x{104}
\= Expect no match
\x{105}
- \x{ff}
+ \x{ff}
/[\xFF]/IB
>\xff<
@@ -158,18 +155,18 @@
/[Ä-Ü]/utf
Ö # Matches without Study
\x{d6}
-
+
/[Ä-Ü]/utf
Ö <-- Same with Study
\x{d6}
-
+
/[\x{c4}-\x{dc}]/utf
Ö # Matches without Study
- \x{d6}
+ \x{d6}
/[\x{c4}-\x{dc}]/utf
Ö <-- Same with Study
- \x{d6}
+ \x{d6}
/[^\x{100}]abc(xyz(?1))/IB,utf
@@ -183,19 +180,22 @@
/\W/utf
A.B
- A\x{100}B
-
+ A\x{100}B
+
/\w/utf
- \x{100}X
+ \x{100}X
-/^\ሴ/IB,utf
+# Use no_start_optimize because the first code unit is different in 8-bit from
+# the wider modes.
+
+/^\ሴ/IB,utf,no_start_optimize
/()()()()()()()()()()
()()()()()()()()()()
()()()()()()()()()()
()()()()()()()()()()
A (x) (?41) B/x,utf
- AxxB
+ AxxB
/^[\x{100}\E-\Q\E\x{150}]/B,utf
@@ -213,11 +213,11 @@
a\r\nb
a\x0bb
a\x0cb
- a\x{85}b
- a\x{2028}b
- a\x{2029}b
+ a\x{85}b
+ a\x{2028}b
+ a\x{2029}b
\= Expect no match
- a\n\rb
+ a\n\rb
/^a\R*b/bsr=unicode,utf
ab
@@ -226,9 +226,9 @@
a\r\nb
a\x0bb
a\x0c\x{2028}\x{2029}b
- a\x{85}b
- a\n\rb
- a\n\r\x{85}\x0cb
+ a\x{85}b
+ a\n\rb
+ a\n\r\x{85}\x0cb
/^a\R+b/bsr=unicode,utf
a\nb
@@ -236,20 +236,20 @@
a\r\nb
a\x0bb
a\x0c\x{2028}\x{2029}b
- a\x{85}b
- a\n\rb
- a\n\r\x{85}\x0cb
+ a\x{85}b
+ a\n\rb
+ a\n\r\x{85}\x0cb
\= Expect no match
- ab
+ ab
/^a\R{1,3}b/bsr=unicode,utf
a\nb
a\n\rb
a\n\r\x{85}b
- a\r\n\r\nb
- a\r\n\r\n\r\nb
+ a\r\n\r\nb
+ a\r\n\r\n\r\nb
a\n\r\n\rb
- a\n\n\r\nb
+ a\n\n\r\nb
\= Expect no match
a\n\n\n\rb
a\r
@@ -258,28 +258,28 @@
X X\x0a
X\x09X\x0b
\= Expect no match
- \x{a0} X\x0a
-
+ \x{a0} X\x0a
+
/\H*\h+\V?\v{3,4}/utf
\x09\x20\x{a0}X\x0a\x0b\x0c\x0d\x0a
\x09\x20\x{a0}\x0a\x0b\x0c\x0d\x0a
\x09\x20\x{a0}\x0a\x0b\x0c
-\= Expect no match
+\= Expect no match
\x09\x20\x{a0}\x0a\x0b
-
+
/\H\h\V\v/utf
\x{3001}\x{3000}\x{2030}\x{2028}
X\x{180e}X\x{85}
\= Expect no match
- \x{2009} X\x0a
-
+ \x{2009} X\x0a
+
/\H*\h+\V?\v{3,4}/utf
\x{1680}\x{180e}\x{2007}X\x{2028}\x{2029}\x0c\x0d\x0a
\x09\x{205f}\x{a0}\x0a\x{2029}\x0c\x{2028}\x0a
\x09\x20\x{202f}\x0a\x0b\x0c
-\= Expect no match
+\= Expect no match
\x09\x{200a}\x{a0}\x{2028}\x0b
-
+
/[\h]/B,utf
>\x{1680}
@@ -293,57 +293,57 @@
/[\V]/B,utf
/.*$/newline=any,utf
- \x{1ec5}
-
+ \x{1ec5}
+
/a\Rb/I,bsr=anycrlf,utf
a\rb
a\nb
a\r\nb
\= Expect no match
a\x{85}b
- a\x0bb
+ a\x0bb
/a\Rb/I,bsr=unicode,utf
a\rb
a\nb
a\r\nb
a\x{85}b
- a\x0bb
-
+ a\x0bb
+
/a\R?b/I,bsr=anycrlf,utf
a\rb
a\nb
a\r\nb
\= Expect no match
a\x{85}b
- a\x0bb
+ a\x0bb
/a\R?b/I,bsr=unicode,utf
a\rb
a\nb
a\r\nb
a\x{85}b
- a\x0bb
-
+ a\x0bb
+
/.*a.*=.b.*/utf,newline=any
QQQ\x{2029}ABCaXYZ=!bPQR
\= Expect no match
a\x{2029}b
- \x61\xe2\x80\xa9\x62
+ \x61\xe2\x80\xa9\x62
/[[:a\x{100}b:]]/utf
/a[^]b/utf,alt_bsux,allow_empty_class,match_unset_backref
a\x{1234}b
- a\nb
+ a\nb
\= Expect no match
- ab
-
+ ab
+
/a[^]+b/utf,alt_bsux,allow_empty_class,match_unset_backref
aXb
- a\nX\nX\x{1234}b
+ a\nX\nX\x{1234}b
\= Expect no match
- ab
+ ab
/(\x{de})\1/
\x{de}\x{de}
@@ -357,42 +357,42 @@
Xaa\=ps
Xaaa\=ps
Xaaaa\=ps
-
+
/Xa{2,4}?b/utf
X\=ps
Xa\=ps
Xaa\=ps
Xaaa\=ps
Xaaaa\=ps
-
+
/Xa{2,4}+b/utf
X\=ps
Xa\=ps
Xaa\=ps
Xaaa\=ps
Xaaaa\=ps
-
+
/X\x{123}{2,4}b/utf
X\=ps
X\x{123}\=ps
X\x{123}\x{123}\=ps
X\x{123}\x{123}\x{123}\=ps
X\x{123}\x{123}\x{123}\x{123}\=ps
-
+
/X\x{123}{2,4}?b/utf
X\=ps
X\x{123}\=ps
X\x{123}\x{123}\=ps
X\x{123}\x{123}\x{123}\=ps
X\x{123}\x{123}\x{123}\x{123}\=ps
-
+
/X\x{123}{2,4}+b/utf
X\=ps
X\x{123}\=ps
X\x{123}\x{123}\=ps
X\x{123}\x{123}\x{123}\=ps
X\x{123}\x{123}\x{123}\x{123}\=ps
-
+
/X\x{123}{2,4}b/utf
\= Expect no match
Xx\=ps
@@ -400,7 +400,7 @@
X\x{123}\x{123}x\=ps
X\x{123}\x{123}\x{123}x\=ps
X\x{123}\x{123}\x{123}\x{123}x\=ps
-
+
/X\x{123}{2,4}?b/utf
\= Expect no match
Xx\=ps
@@ -408,7 +408,7 @@
X\x{123}\x{123}x\=ps
X\x{123}\x{123}\x{123}x\=ps
X\x{123}\x{123}\x{123}\x{123}x\=ps
-
+
/X\x{123}{2,4}+b/utf
\= Expect no match
Xx\=ps
@@ -416,21 +416,21 @@
X\x{123}\x{123}x\=ps
X\x{123}\x{123}\x{123}x\=ps
X\x{123}\x{123}\x{123}\x{123}x\=ps
-
+
/X\d{2,4}b/utf
X\=ps
X3\=ps
X33\=ps
X333\=ps
X3333\=ps
-
+
/X\d{2,4}?b/utf
X\=ps
X3\=ps
X33\=ps
X333\=ps
X3333\=ps
-
+
/X\d{2,4}+b/utf
X\=ps
X3\=ps
@@ -444,14 +444,14 @@
Xaa\=ps
Xaaa\=ps
Xaaaa\=ps
-
+
/X\D{2,4}?b/utf
X\=ps
Xa\=ps
Xaa\=ps
Xaaa\=ps
Xaaaa\=ps
-
+
/X\D{2,4}+b/utf
X\=ps
Xa\=ps
@@ -465,14 +465,14 @@
X\x{123}\x{123}\=ps
X\x{123}\x{123}\x{123}\=ps
X\x{123}\x{123}\x{123}\x{123}\=ps
-
+
/X\D{2,4}?b/utf
X\=ps
X\x{123}\=ps
X\x{123}\x{123}\=ps
X\x{123}\x{123}\x{123}\=ps
X\x{123}\x{123}\x{123}\x{123}\=ps
-
+
/X\D{2,4}+b/utf
X\=ps
X\x{123}\=ps
@@ -486,14 +486,14 @@
Xaa\=ps
Xaaa\=ps
Xaaaa\=ps
-
+
/X[abc]{2,4}?b/utf
X\=ps
Xa\=ps
Xaa\=ps
Xaaa\=ps
Xaaaa\=ps
-
+
/X[abc]{2,4}+b/utf
X\=ps
Xa\=ps
@@ -507,14 +507,14 @@
X\x{123}\x{123}\=ps
X\x{123}\x{123}\x{123}\=ps
X\x{123}\x{123}\x{123}\x{123}\=ps
-
+
/X[abc\x{123}]{2,4}?b/utf
X\=ps
X\x{123}\=ps
X\x{123}\x{123}\=ps
X\x{123}\x{123}\x{123}\=ps
X\x{123}\x{123}\x{123}\x{123}\=ps
-
+
/X[abc\x{123}]{2,4}+b/utf
X\=ps
X\x{123}\=ps
@@ -528,14 +528,14 @@
Xzz\=ps
Xzzz\=ps
Xzzzz\=ps
-
+
/X[^a]{2,4}?b/utf
X\=ps
Xz\=ps
Xzz\=ps
Xzzz\=ps
Xzzzz\=ps
-
+
/X[^a]{2,4}+b/utf
X\=ps
Xz\=ps
@@ -549,14 +549,14 @@
X\x{123}\x{123}\=ps
X\x{123}\x{123}\x{123}\=ps
X\x{123}\x{123}\x{123}\x{123}\=ps
-
+
/X[^a]{2,4}?b/utf
X\=ps
X\x{123}\=ps
X\x{123}\x{123}\=ps
X\x{123}\x{123}\x{123}\=ps
X\x{123}\x{123}\x{123}\x{123}\=ps
-
+
/X[^a]{2,4}+b/utf
X\=ps
X\x{123}\=ps
@@ -570,14 +570,14 @@
YXYY\=ps
YXYYY\=ps
YXYYYY\=ps
-
+
/(Y)X\1{2,4}?b/utf
YX\=ps
YXY\=ps
YXYY\=ps
YXYYY\=ps
YXYYYY\=ps
-
+
/(Y)X\1{2,4}+b/utf
YX\=ps
YXY\=ps
@@ -591,14 +591,14 @@
\x{123}X\x{123}\x{123}\=ps
\x{123}X\x{123}\x{123}\x{123}\=ps
\x{123}X\x{123}\x{123}\x{123}\x{123}\=ps
-
+
/(\x{123})X\1{2,4}?b/utf
\x{123}X\=ps
\x{123}X\x{123}\=ps
\x{123}X\x{123}\x{123}\=ps
\x{123}X\x{123}\x{123}\x{123}\=ps
\x{123}X\x{123}\x{123}\x{123}\x{123}\=ps
-
+
/(\x{123})X\1{2,4}+b/utf
\x{123}X\=ps
\x{123}X\x{123}\=ps
@@ -640,13 +640,13 @@
AB\x{a0}xxx\x{85}XYZ
/\S \S/utf,tables=2
- \x{a2} \x{84}
+ \x{a2} \x{84}
'A#хц'Bx,newline=any,utf
'A#хц
PQ'Bx,newline=any,utf
-
+
/a+#хaa
z#XX?/Bx,newline=any,utf
@@ -661,13 +661,13 @@
/(\R*)(.)/s,utf
\r\n
- \r\r\n\n\r
- \r\r\n\n\r\n
+ \r\r\n\n\r
+ \r\r\n\n\r\n
/(\R)*(.)/s,utf
\r\n
- \r\r\n\n\r
- \r\r\n\n\r\n
+ \r\r\n\n\r
+ \r\r\n\n\r\n
/[^\x{1234}]+/Ii,utf
@@ -688,7 +688,7 @@
/f.*/s,utf
for\=ph
-
+
/\x{d7ff}\x{e000}/utf
/\x{d800}/utf
@@ -777,7 +777,7 @@
/./utf,newline=crlf
\r\=ps
\r\=ph
-
+
/.{2,3}/utf,newline=crlf
\r\=ps
\r\=ph
@@ -835,9 +835,9 @@
/[\p{Nd}+-]+/IB,utf
1234
12-34
- 12+\x{661}-34
+ 12+\x{661}-34
\= Expect no match
- abcd
+ abcd
/(?:[\PPa*]*){8,}/
@@ -884,7 +884,7 @@
/\p{Zl}{2,3}+/B,utf



\x{2028}\x{2028}\x{2028}
-
+
/\p{Zl}/B,utf
/\p{Lu}{3}+/B,utf
@@ -904,8 +904,8 @@
/^\p{Cs}/utf
\x{dfff}\=no_utf_check
\= Expect no match
- \x{09f}
-
+ \x{09f}
+
/^\p{Mn}/utf
\x{1a1b}
@@ -923,60 +923,60 @@
\= Expect no match
X
\x{2c2}
-
+
/^\p{Zs}/utf
\ \
\x{a0}
\x{1680}
\x{2000}
- \x{2001}
+ \x{2001}
\= Expect no match
\x{2028}
- \x{200d}
-
+ \x{200d}
+
# These are here because Perl has problems with the negative versions of the
# properties and has changed how it behaves for caseless matching.
-
+
/\p{^Lu}/i,utf
1234
\= Expect no match
- ABC
+ ABC
/\P{Lu}/i,utf
1234
\= Expect no match
- ABC
+ ABC
/\p{Ll}/i,utf
a
Az
\= Expect no match
- ABC
+ ABC
/\p{Lu}/i,utf
A
- a\x{10a0}B
-\= Expect no match
+ a\x{10a0}B
+\= Expect no match
a
- \x{1d00}
+ \x{1d00}
/\p{Lu}/i,utf
A
aZ
\= Expect no match
- abc
+ abc
/[\x{c0}\x{391}]/i,utf
\x{c0}
- \x{e0}
+ \x{e0}
# The next two are special cases where the lengths of the different cases of
# the same character differ. The first went wrong with heap frame storage; the
-# second was broken in all cases.
+# second was broken in all cases.
/^\x{023a}+?(\x{0130}+)/i,utf
\x{023a}\x{2c65}\x{0130}
-
+
/^\x{023a}+([^X])/i,utf
\x{023a}\x{2c65}X
@@ -994,71 +994,71 @@
/^\x{c0}$/i,utf
\x{c0}
- \x{e0}
+ \x{e0}
/^\x{e0}$/i,utf
\x{c0}
- \x{e0}
+ \x{e0}
# The next two should be Perl-compatible, but it fails to match \x{e0}. PCRE
# will match it only with UCP support, because without that it has no notion
-# of case for anything other than the ASCII letters.
+# of case for anything other than the ASCII letters.
/((?i)[\x{c0}])/utf
\x{c0}
- \x{e0}
+ \x{e0}
/(?i:[\x{c0}])/utf
\x{c0}
- \x{e0}
+ \x{e0}
-# These are PCRE's extra properties to help with Unicodizing \d etc.
+# These are PCRE's extra properties to help with Unicodizing \d etc.
/^\p{Xan}/utf
ABCD
1234
\x{6ca}
\x{a6c}
- \x{10a7}
+ \x{10a7}
\= Expect no match
- _ABC
+ _ABC
/^\p{Xan}+/utf
ABCD1234\x{6ca}\x{a6c}\x{10a7}_
\= Expect no match
- _ABC
+ _ABC
/^\p{Xan}+?/utf
\x{6ca}\x{a6c}\x{10a7}_
/^\p{Xan}*/utf
ABCD1234\x{6ca}\x{a6c}\x{10a7}_
-
+
/^\p{Xan}{2,9}/utf
ABCD1234\x{6ca}\x{a6c}\x{10a7}_
-
+
/^\p{Xan}{2,9}?/utf
\x{6ca}\x{a6c}\x{10a7}_
-
+
/^[\p{Xan}]/utf
ABCD1234_
1234abcd_
\x{6ca}
\x{a6c}
- \x{10a7}
+ \x{10a7}
\= Expect no match
- _ABC
-
+ _ABC
+
/^[\p{Xan}]+/utf
ABCD1234\x{6ca}\x{a6c}\x{10a7}_
\= Expect no match
- _ABC
+ _ABC
/^>\p{Xsp}/utf
>\x{1680}\x{2028}\x{0b}
- >\x{a0}
+ >\x{a0}
\= Expect no match
- \x{0b}
+ \x{0b}
/^>\p{Xsp}+/utf
> \x{09}\x{0a}\x{0c}\x{0d}\x{a0}\x{1680}\x{2028}\x{0b}
@@ -1068,24 +1068,24 @@
/^>\p{Xsp}*/utf
> \x{09}\x{0a}\x{0c}\x{0d}\x{a0}\x{1680}\x{2028}\x{0b}
-
+
/^>\p{Xsp}{2,9}/utf
> \x{09}\x{0a}\x{0c}\x{0d}\x{a0}\x{1680}\x{2028}\x{0b}
-
+
/^>\p{Xsp}{2,9}?/utf
> \x{09}\x{0a}\x{0c}\x{0d}\x{a0}\x{1680}\x{2028}\x{0b}
-
+
/^>[\p{Xsp}]/utf
>\x{2028}\x{0b}
-
+
/^>[\p{Xsp}]+/utf
> \x{09}\x{0a}\x{0c}\x{0d}\x{a0}\x{1680}\x{2028}\x{0b}
/^>\p{Xps}/utf
>\x{1680}\x{2028}\x{0b}
- >\x{a0}
+ >\x{a0}
\= Expect no match
- \x{0b}
+ \x{0b}
/^>\p{Xps}+/utf
> \x{09}\x{0a}\x{0c}\x{0d}\x{a0}\x{1680}\x{2028}\x{0b}
@@ -1095,16 +1095,16 @@
/^>\p{Xps}*/utf
> \x{09}\x{0a}\x{0c}\x{0d}\x{a0}\x{1680}\x{2028}\x{0b}
-
+
/^>\p{Xps}{2,9}/utf
> \x{09}\x{0a}\x{0c}\x{0d}\x{a0}\x{1680}\x{2028}\x{0b}
-
+
/^>\p{Xps}{2,9}?/utf
> \x{09}\x{0a}\x{0c}\x{0d}\x{a0}\x{1680}\x{2028}\x{0b}
-
+
/^>[\p{Xps}]/utf
>\x{2028}\x{0b}
-
+
/^>[\p{Xps}]+/utf
> \x{09}\x{0a}\x{0c}\x{0d}\x{a0}\x{1680}\x{2028}\x{0b}
@@ -1114,9 +1114,9 @@
\x{6ca}
\x{a6c}
\x{10a7}
- _ABC
+ _ABC
\= Expect no match
- []
+ []
/^\p{Xwd}+/utf
ABCD1234\x{6ca}\x{a6c}\x{10a7}_
@@ -1126,32 +1126,32 @@
/^\p{Xwd}*/utf
ABCD1234\x{6ca}\x{a6c}\x{10a7}_
-
+
/^\p{Xwd}{2,9}/utf
A_B12\x{6ca}\x{a6c}\x{10a7}
-
+
/^\p{Xwd}{2,9}?/utf
\x{6ca}\x{a6c}\x{10a7}_
-
+
/^[\p{Xwd}]/utf
ABCD1234_
1234abcd_
\x{6ca}
\x{a6c}
- \x{10a7}
- _ABC
+ \x{10a7}
+ _ABC
\= Expect no match
- []
-
+ []
+
/^[\p{Xwd}]+/utf
ABCD1234\x{6ca}\x{a6c}\x{10a7}_
-# A check not in UTF-8 mode
+# A check not in UTF-8 mode
/^[\p{Xwd}]+/
ABCD1234_
-
-# Some negative checks
+
+# Some negative checks
/^[\P{Xwd}]+/utf
!.+\x{019}\x{35a}AB
@@ -1221,53 +1221,53 @@
/[[:xdigit:]]/B,ucp
-# Unicode properties for \b abd \B
+# Unicode properties for \b abd \B
/\b...\B/utf,ucp
abc_
- \x{37e}abc\x{376}
- \x{37e}\x{376}\x{371}\x{393}\x{394}
- !\x{c0}++\x{c1}\x{c2}
- !\x{c0}+++++
+ \x{37e}abc\x{376}
+ \x{37e}\x{376}\x{371}\x{393}\x{394}
+ !\x{c0}++\x{c1}\x{c2}
+ !\x{c0}+++++
-# Without PCRE_UCP, non-ASCII always fail, even if < 256
+# Without PCRE_UCP, non-ASCII always fail, even if < 256
/\b...\B/utf
abc_
-\= Expect no match
- \x{37e}abc\x{376}
- \x{37e}\x{376}\x{371}\x{393}\x{394}
- !\x{c0}++\x{c1}\x{c2}
- !\x{c0}+++++
+\= Expect no match
+ \x{37e}abc\x{376}
+ \x{37e}\x{376}\x{371}\x{393}\x{394}
+ !\x{c0}++\x{c1}\x{c2}
+ !\x{c0}+++++
-# With PCRE_UCP, non-UTF8 chars that are < 256 still check properties
+# With PCRE_UCP, non-UTF8 chars that are < 256 still check properties
/\b...\B/ucp
abc_
- !\x{c0}++\x{c1}\x{c2}
- !\x{c0}+++++
+ !\x{c0}++\x{c1}\x{c2}
+ !\x{c0}+++++
-# Some of these are silly, but they check various combinations
+# Some of these are silly, but they check various combinations
/[[:^alpha:][:^cntrl:]]+/B,utf,ucp
123
- abc
+ abc
/[[:^cntrl:][:^alpha:]]+/B,utf,ucp
123
- abc
+ abc
/[[:alpha:]]+/B,utf,ucp
abc
/[[:^alpha:]\S]+/B,utf,ucp
123
- abc
+ abc
/[^\d]+/B,utf,ucp
abc123
abc\x{123}
- \x{660}abc
+ \x{660}abc
/\p{Lu}+9\p{Lu}+B\p{Lu}+b/B
@@ -1287,7 +1287,7 @@
/A+\p{N}A+\dB+\p{N}*B+\d*/B,ucp
-# These behaved oddly in Perl, so they are kept in this test
+# These behaved oddly in Perl, so they are kept in this test
/(\x{23a}\x{23a}\x{23a})?\1/i,utf
\= Expect no match
@@ -1319,43 +1319,43 @@
/(\x{2c65}\x{2c65})\1/i,utf
\x{2c65}\x{2c65}\x{23a}\x{23a}
-
+
/(ⱥⱥ)\1/i,utf
- ⱥⱥȺȺ
-
+ ⱥⱥȺȺ
+
/(\x{23a}\x{23a}\x{23a})\1Y/i,utf
X\x{23a}\x{23a}\x{23a}\x{2c65}\x{2c65}\x{2c65}YZ
/(\x{2c65}\x{2c65})\1Y/i,utf
X\x{2c65}\x{2c65}\x{23a}\x{23a}YZ
-# These scripts weren't yet in Perl when I added Unicode 6.0.0 to PCRE
+# These scripts weren't yet in Perl when I added Unicode 6.0.0 to PCRE
/^[\p{Batak}]/utf
\x{1bc0}
\x{1bff}
\= Expect no match
\x{1bf4}
-
+
/^[\p{Brahmi}]/utf
\x{11000}
\x{1106f}
\= Expect no match
\x{1104e}
-
+
/^[\p{Mandaic}]/utf
\x{840}
\x{85e}
\= Expect no match
\x{85c}
- \x{85d}
+ \x{85d}
/(\X*)(.)/s,utf
A\x{300}
/^S(\X*)e(\X*)$/utf
Stéréo
-
+
/^\X/utf
́réo
@@ -1387,8 +1387,8 @@
aa\=ps
aa\=ph
aba\=ps
-
-# These Unicode 6.1.0 scripts are not known to Perl.
+
+# These Unicode 6.1.0 scripts are not known to Perl.
/\p{Chakma}\d/utf,ucp
\x{11100}\x{1113c}
@@ -1403,7 +1403,7 @@
A\x{300}\x{301}\=ph
A\x{301}\=ps
A\x{301}\=ph
-
+
/^\X{2,3}/utf
A\=ps
A\=ph
@@ -1419,7 +1419,7 @@
AA\=ph
A\x{300}\x{301}A\x{300}\x{301}\=ps
A\x{300}\x{301}A\x{300}\x{301}\=ph
-
+
/^\X+/utf
AA\=ps
AA\=ph
@@ -1486,8 +1486,8 @@
/is{2}t/i,utf
\= Expect no match
iskt
-
-# This property is a PCRE special
+
+# This property is a PCRE special
/^\p{Xuc}/utf
$abc
@@ -1495,7 +1495,7 @@
`abc
\x{1234}abc
\= Expect no match
- abc
+ abc
/^\p{Xuc}+/utf
$@`\x{a0}\x{1234}\x{e000}**
@@ -1552,8 +1552,8 @@
@abc
`abc
\x{1234}abc
-
-# Some auto-possessification tests
+
+# Some auto-possessification tests
/\pN+\z/B
@@ -1603,7 +1603,7 @@
/\p{Xan}+\p{Nd} \P{Xan}+\p{Nd} \p{Xan}+\P{Nd} \P{Xan}+\P{Nd}/Bx,ucp
-# End auto-possessification tests
+# End auto-possessification tests
/\w+/B,utf,ucp,auto_callout
abcd
@@ -1623,7 +1623,7 @@
/\d+\s{0,5}=\s*\S?=\w{0,4}\W*/B,utf,ucp
/[RST]+/Bi,utf,ucp
-
+
/[R-T]+/Bi,utf,ucp
/[Q-U]+/Bi,utf,ucp
@@ -1636,7 +1636,7 @@
/\x{100}\x{200}\K\x{300}/utf,startchar
\x{100}\x{200}\x{300}
-
+
# Test UTF characters in a substitution
/ábc/utf,replace=XሴZ
@@ -1675,15 +1675,6 @@
/((?<digit>\d)|(?<letter>\p{L}))/g,substitute_extended,replace=<${digit:+digit; :not digit; }${letter:+letter:not a letter}>
ab12cde
-/[\W\p{Any}]/B
- abc
- 123
-
-/[\W\pL]/B
- abc
-\= Expect no match
- 123
-
/(*UCP)(*UTF)[[:>:]]X/B
/abc/utf,replace=xyz
@@ -1704,7 +1695,7 @@
/[^[:ascii:]\W]/utf,ucp,bincode
\x{de}
\x{200}
-\= Expect no match
+\= Expect no match
\x{300}
\x{37e}
@@ -1716,4 +1707,357 @@
/(*UTF)C\x09((?<!'(?x)!*H? #\xcc\x9a[^$]/
-# End of testinput5
+/[\D]/utf
+ \x{1d7cf}
+
+/[\D\P{Nd}]/utf
+ \x{1d7cf}
+
+/[^\D]/utf
+ a9b
+\= Expect no match
+ \x{1d7cf}
+
+/[^\D\P{Nd}]/utf
+ a9b
+ \x{1d7cf}
+\= Expect no match
+ \x{10000}
+
+# Hex uses pattern length, not zero-terminated. This tests for overrunning
+# the given length of a pattern.
+
+/'(*UTF)'/hex
+
+/'#('/hex,extended,utf
+
+/a(?<=A\XB)/utf
+
+/ab(?<=A\RB)/utf
+
+/../utf,auto_callout
+ \n\x{123}\x{123}\x{123}\x{123}
+
+# This tests processing wide characters in extended mode.
+
+/XȀ/x,utf
+
+# These three test a bug fix that was not clearing up after a locale setting
+# when the test or a subsequent one matched a wide character.
+
+//locale=C
+
+/[\P{Yi}]/utf
+\x{2f000}
+
+/[\P{Yi}]/utf,locale=C
+\x{2f000}
+
+/^(?<!(?=􃡜))/B,utf
+
+# Horizontal and vertical space lists ignore caseless
+
+/[\HH]/Bi,utf
+
+/[^\HH]/Bi,utf
+
+//g,utf
+ \=zero_terminate
+
+/^(?1)\p{Nd}{3}(a)/
+ a123a
+
+/\p{Nd}{0,3}[\pL](*:abc)(?C1)xxx/callout_info
+
+# ---------------------------------------------------------------------------
+
+# A bunch of tests that hit lines of code that others do not (at least when
+# these were created).
+
+/^[^a]{3,}?x/i,utf,no_start_optimize,no_auto_possess
+\= Expect no match
+ bbb
+ cc
+
+/^[ac]{3,}?x/i,utf,no_start_optimize,no_auto_possess
+\= Expect no match
+ aaa\x{100}
+
+/^X\X/no_start_optimize,no_auto_possess
+\= Expect no match
+ X
+
+/^X\p{L&}+?/no_start_optimize,no_auto_possess
+\= Expect no match
+ X
+
+/^X\p{L}+?/no_start_optimize,no_auto_possess
+\= Expect no match
+ X
+
+/^X\p{Lu}+?/no_start_optimize,no_auto_possess
+\= Expect no match
+ X
+
+/^X\p{Arabic}+?/no_start_optimize,no_auto_possess
+\= Expect no match
+ X
+
+/^X\p{Xan}+?/ucp,no_start_optimize,no_auto_possess
+\= Expect no match
+ X
+
+/^X\s+?/ucp,no_start_optimize,no_auto_possess
+\= Expect no match
+ X
+ XX
+
+/^X\S+?/ucp,no_start_optimize,no_auto_possess
+ XX
+\= Expect no match
+ X
+
+/^X\w+?/ucp,no_start_optimize,no_auto_possess
+\= Expect no match
+ X
+
+/^X[^\x{b5}]+?/i,utf,no_start_optimize,no_auto_possess
+\= Expect no match
+ X
+
+/^X[\x{b5}]+?/i,utf,no_start_optimize,no_auto_possess
+\= Expect no match
+ X
+
+/^X\p{Xuc}+?/utf,no_start_optimize,no_auto_possess
+\= Expect no match
+ X
+
+/^X.+?Z/s,utf,no_start_optimize,no_auto_possess
+\= Expect no match
+ X
+
+/^X\R+?/utf,no_start_optimize,no_auto_possess
+\= Expect no match
+ X
+
+/^X\H+?/utf,no_start_optimize,no_auto_possess
+\= Expect no match
+ X
+
+/^X\V+?/utf,no_start_optimize,no_auto_possess
+\= Expect no match
+ X
+
+/^X\s+?/utf,no_start_optimize,no_auto_possess
+\= Expect no match
+ X
+ XX
+
+/^X\S+?/utf,no_start_optimize,no_auto_possess
+\= Expect no match
+ X
+
+/^X\p{Any}{1,3}?Z/s,no_start_optimize,no_auto_possess
+ XYYYZ
+\= Expect no match
+ XY
+ XYY
+ XYYY
+ XYYYYZ
+
+/^X\p{L&}{1,3}?Z/s,no_start_optimize,no_auto_possess
+\= Expect no match
+ XY
+ XY!
+
+/^X\p{L}{1,3}?Z/s,no_start_optimize,no_auto_possess
+\= Expect no match
+ XY
+ XY!
+
+/^X\p{Lu}{1,3}?Z/s,no_start_optimize,no_auto_possess
+\= Expect no match
+ XY
+ XY!
+
+/^X\P{Han}{1,3}?Z/s,utf,no_start_optimize,no_auto_possess
+\= Expect no match
+ XY
+ XY!
+ XY\x{2f00}!
+
+/^X\p{Xan}{1,3}?Z/s,no_start_optimize,no_auto_possess
+\= Expect no match
+ XY
+ XY!
+
+/^X\p{Xsp}{1,3}?Z/s,no_start_optimize,no_auto_possess
+\= Expect no match
+ X\n
+ X\n!
+ X\n\n!
+
+/^X\P{Xsp}{1,3}?Z/s,no_start_optimize,no_auto_possess
+\= Expect no match
+ XYY\n
+
+/^X\p{Xwd}{1,3}?Z/s,no_start_optimize,no_auto_possess
+\= Expect no match
+ XY
+ XY!
+ XYY!
+
+/^X\x{b5}+?Z/i,utf,no_start_optimize,no_auto_possess
+\= Expect no match
+ X
+ X\x{b5}
+ X\x{b5}\x{b5}Y
+
+/^X\p{Xuc}+?Z/utf,no_start_optimize,no_auto_possess
+\= Expect no match
+ X
+ X$
+ X@@Y
+
+/(*CRLF)^X.+?Z/utf,no_start_optimize,no_auto_possess
+\= Expect partial match
+ XYY\r\=ph
+\= Expect no match
+ X
+
+/^X.+?Z/s,utf,no_start_optimize,no_auto_possess
+\= Expect no match
+ X
+ XYY
+
+/^X\R+?Z/utf,no_start_optimize,no_auto_possess
+\= Expect no match
+ X\nX
+ X\n\rX
+ X\n\r\nX
+ X\n\n
+ X\n\x{0c}
+
+/(*BSR_ANYCRLF)^X\R+?Z/utf,no_start_optimize,no_auto_possess
+\= Expect no match
+ X\nX
+ X\n\rX
+ X\n\r\nX
+ X\n\n
+ X\n\x{0c}
+
+/^X\H+?Z/utf,no_start_optimize,no_auto_possess
+\= Expect no match
+ XY\t
+ XYY
+
+/^X\h+?Z/utf,no_start_optimize,no_auto_possess
+\= Expect no match
+ X\t\t
+ X\tY
+
+/^X\V+?Z/utf,no_start_optimize,no_auto_possess
+\= Expect no match
+ XY\n
+ XYY
+
+/^X\v+?Z/utf,no_start_optimize,no_auto_possess
+\= Expect no match
+ X\n\n
+ X\nY
+
+/^X\D+?Z/utf,no_start_optimize,no_auto_possess
+\= Expect no match
+ XY9
+ XYY
+
+/^X\d+?Z/utf,no_start_optimize,no_auto_possess
+\= Expect no match
+ X99
+ X9Y
+
+/^X\S+?Z/utf,no_start_optimize,no_auto_possess
+\= Expect no match
+ XY\n
+ XYY
+
+/^X\s+?Z/utf,no_start_optimize,no_auto_possess
+\= Expect no match
+ X\n\n
+ X\nY
+
+/^X\W+?Z/utf,no_start_optimize,no_auto_possess
+\= Expect no match
+ X.A
+ X++
+
+/^X\p{L&}{1,3}Z/no_start_optimize,no_auto_possess
+\= Expect no match
+ XY
+ XY!
+
+/^X\p{L}{1,3}Z/no_start_optimize,no_auto_possess
+\= Expect no match
+ XY
+
+/^X\p{Xan}{1,3}Z/no_start_optimize,no_auto_possess
+\= Expect no match
+ XY
+
+/^X\P{Xsp}{1,3}Z/no_start_optimize,no_auto_possess
+\= Expect no match
+ XYY
+
+/^X\p{Xuc}+Z/utf,no_start_optimize,no_auto_possess
+\= Expect no match
+ X$
+
+# ----------------------------------------------------------------------
+# These test the dangerous PCRE2_EXTRA_BAD_ESCAPE_IS_LITERAL option.
+
+/\x{d800}/B,utf,bad_escape_is_literal
+
+/\ud800/B,utf,alt_bsux,bad_escape_is_literal
+
+# ----------------------------------------------------------------------
+
+/Aሴ+B/literal,utf,no_utf_check
+ Aሴ+B
+
+# These are here because I upgraded to Unicode 10.0.0 before Perl did, so it
+# doesn't recognize all these scripts. In time these three tests can be moved
+# to test 4.
+
+/^(\p{Adlam}+)(\p{Bhaiksuki}+)(\p{Marchen}+)(\p{Newa}+)(\p{Osage}+)
+ (\p{Tangut}+)(\p{Masaram_Gondi}+)(\p{Nushu}+)(\p{Soyombo}+)
+ (\p{Zanabazar_Square}+)/x,utf
+ \x{1E900}\x{1E924}\x{1E953}\x{11C00}\x{11C2D}\x{11C3E}\x{11C70}\x{11C77}\x{11CAB}\x{11400}\x{1142F}\x{11455}\x{104B0}\x{104D8}\x{104FB}\x{16FE0}\x{18800}\x{18AF2}\x{11D00}\x{11D3A}\x{11D59}\x{16FE1}\x{1B170}\x{1B2FB}\x{11A50}\x{11A58}\x{11AA2}\x{11A00}\x{11A07}\x{11A47}
+
+/^\x{1E900}\x{104B0}/i,utf
+ \x{1E900}\x{104B0}
+ \x{1E922}\x{104D8}
+
+/^(?:(\X)(?C))+$/utf
+ \x{1E900}\x{1E924}\x{1E953}\x{11C00}\x{11C2D}\x{11C3E}\x{11C70}\x{11C77}\x{11CAB}\x{11400}\x{1142F}\x{11455}\x{104B0}\x{104D8}\x{104FB}\x{16FE0}\x{18800}\x{18AF2}\x{11D00}\x{11D3A}\x{11D59}\x{16FE1}\x{1B170}\x{1B2FB}\x{11A50}\x{11A58}\x{11AA2}\x{11A00}\x{11A07}\x{11A47}\=callout_capture,callout_no_where
+
+# These two are here because JIT is not yet updated. Also, the very first data
+# line is handled differently by Perl.
+
+/^\X/utf
+ A\x{200d}B A ZWJ
+ \x{261D}\x{1F3FB}B E_Base E_Modifier
+ \x{1F466}\x{1F3FF}B E_Base_GAZ E_Modifier
+ \x{200d}\x{1F3A4}B ZWJ Glue_After_ZWJ
+ \x{200d}\x{1F469}B ZWJ E_Base_GAZ
+ \x{1F1E6}\x{1F1E7}B RegionalIndicator RegionalIndicator
+ \x{261D}\x{E0100}\x{1F3FB}B E_Base Extend E_Modifier
+
+# Regional indicators
+
+/^(\X)(\X)/utf,aftertext
+ \x{1F1E6}\x{1F1E7}\x{1F1E7}B
+ \x{1F1E6}\x{1F1E7}\x{1F1E7}\x{1F1E6}B
+
+
+# End of testinput5