summaryrefslogtreecommitdiff
path: root/testdata/testoutput10
diff options
context:
space:
mode:
Diffstat (limited to 'testdata/testoutput10')
-rw-r--r--testdata/testoutput10229
1 files changed, 109 insertions, 120 deletions
diff --git a/testdata/testoutput10 b/testdata/testoutput10
index ef248db..9761f0f 100644
--- a/testdata/testoutput10
+++ b/testdata/testoutput10
@@ -1,70 +1,10 @@
# This set of tests is for UTF-8 support and Unicode property support, with
# relevance only for the 8-bit library.
-/X(\C{3})/utf
- X\x{1234}
- 0: X\x{1234}
- 1: \x{1234}
-
-/X(\C{4})/utf
- X\x{1234}YZ
- 0: X\x{1234}Y
- 1: \x{1234}Y
-
-/X\C*/utf
- XYZabcdce
- 0: XYZabcdce
-
-/X\C*?/utf
- XYZabcde
- 0: X
-
-/X\C{3,5}/utf
- Xabcdefg
- 0: Xabcde
- X\x{1234}
- 0: X\x{1234}
- X\x{1234}YZ
- 0: X\x{1234}YZ
- X\x{1234}\x{512}
- 0: X\x{1234}\x{512}
- X\x{1234}\x{512}YZ
- 0: X\x{1234}\x{512}
-
-/X\C{3,5}?/utf
- Xabcdefg
- 0: Xabc
- X\x{1234}
- 0: X\x{1234}
- X\x{1234}YZ
- 0: X\x{1234}
- X\x{1234}\x{512}
- 0: X\x{1234}
-
-/a\Cb/utf
- aXb
- 0: aXb
- a\nb
- 0: a\x{0a}b
-
-/a\C\Cb/utf
- a\x{100}b
- 0: a\x{100}b
-
-/ab\Cde/utf
- abXde
- 0: abXde
-
-/a\C\Cb/utf
- a\x{100}b
- 0: a\x{100}b
- ** Failers
-No match
- a\x{12257}b
-No match
+# The next 4 patterns have UTF-8 errors
/[Ã]/utf
-Failed: error -8 at offset 0: UTF-8 error: byte 2 top bits not 0x80
+Failed: error -8 at offset 1: UTF-8 error: byte 2 top bits not 0x80
/Ã/utf
Failed: error -3 at offset 0: UTF-8 error: 1 byte missing at end
@@ -72,7 +12,13 @@ Failed: error -3 at offset 0: UTF-8 error: 1 byte missing at end
/ÃÃÃxxx/utf
Failed: error -8 at offset 0: UTF-8 error: byte 2 top bits not 0x80
+/‚‚‚‚‚‚‚Ã/utf
+Failed: error -22 at offset 2: UTF-8 error: isolated byte with 0x80 bit set
+
+# Now test subjects
+
/badutf/utf
+\= Expect UTF-8 errors
X\xdf
Failed: error -3: UTF-8 error: 1 byte missing at end at offset 1
XX\xef
@@ -146,13 +92,14 @@ Failed: error -20: UTF-8 error: overlong 5-byte sequence at offset 0
\xfc\x80\x80\x80\x80\x8f
Failed: error -21: UTF-8 error: overlong 6-byte sequence at offset 0
\x80
-Failed: error -22: UTF-8 error: isolated 0x80 byte at offset 0
+Failed: error -22: UTF-8 error: isolated byte with 0x80 bit set at offset 0
\xfe
Failed: error -23: UTF-8 error: illegal byte (0xfe or 0xff) at offset 0
\xff
Failed: error -23: UTF-8 error: illegal byte (0xfe or 0xff) at offset 0
/badutf/utf
+\= Expect UTF-8 errors
XX\xfb\x80\x80\x80\x80
Failed: error -13: UTF-8 error: 5-byte character is not allowed (RFC 3629) at offset 2
XX\xfd\x80\x80\x80\x80\x80
@@ -161,6 +108,7 @@ Failed: error -14: UTF-8 error: 6-byte character is not allowed (RFC 3629) at of
Failed: error -15: UTF-8 error: code points greater than 0x10ffff are not defined at offset 2
/shortutf/utf
+\= Expect UTF-8 errors
XX\xdf\=ph
Failed: error -3: UTF-8 error: 1 byte missing at end at offset 2
XX\xef\=ph
@@ -193,6 +141,7 @@ Failed: error -4: UTF-8 error: 2 bytes missing at end at offset 0
Failed: error -3: UTF-8 error: 1 byte missing at end at offset 0
/anything/utf
+\= Expect UTF-8 errors
X\xc0\x80
Failed: error -17: UTF-8 error: overlong 2-byte sequence at offset 1
XX\xc1\x8f
@@ -209,6 +158,15 @@ Failed: error -21: UTF-8 error: overlong 6-byte sequence at offset 0
Failed: error -23: UTF-8 error: illegal byte (0xfe or 0xff) at offset 0
\xff\x80\x80\x80\x80\x80
Failed: error -23: UTF-8 error: illegal byte (0xfe or 0xff) at offset 0
+ \xf8\x88\x80\x80\x80
+Failed: error -13: UTF-8 error: 5-byte character is not allowed (RFC 3629) at offset 0
+ \xf9\x87\x80\x80\x80
+Failed: error -13: UTF-8 error: 5-byte character is not allowed (RFC 3629) at offset 0
+ \xfc\x84\x80\x80\x80\x80
+Failed: error -14: UTF-8 error: 6-byte character is not allowed (RFC 3629) at offset 0
+ \xfd\x83\x80\x80\x80\x80
+Failed: error -14: UTF-8 error: 6-byte character is not allowed (RFC 3629) at offset 0
+\= Expect no match
\xc3\x8f
No match
\xe0\xaf\x80
@@ -219,14 +177,6 @@ No match
No match
\xf1\x8f\x80\x80
No match
- \xf8\x88\x80\x80\x80
-Failed: error -13: UTF-8 error: 5-byte character is not allowed (RFC 3629) at offset 0
- \xf9\x87\x80\x80\x80
-Failed: error -13: UTF-8 error: 5-byte character is not allowed (RFC 3629) at offset 0
- \xfc\x84\x80\x80\x80\x80
-Failed: error -14: UTF-8 error: 6-byte character is not allowed (RFC 3629) at offset 0
- \xfd\x83\x80\x80\x80\x80
-Failed: error -14: UTF-8 error: 6-byte character is not allowed (RFC 3629) at offset 0
\xf8\x88\x80\x80\x80\=no_utf_check
No match
\xf9\x87\x80\x80\x80\=no_utf_check
@@ -235,7 +185,62 @@ No match
No match
\xfd\x83\x80\x80\x80\x80\=no_utf_check
No match
+
+# Similar tests with offsets
+/badutf/utf
+\= Expect UTF-8 errors
+ X\xdfabcd
+Failed: error -8: UTF-8 error: byte 2 top bits not 0x80 at offset 1
+ X\xdfabcd\=offset=1
+Failed: error -8: UTF-8 error: byte 2 top bits not 0x80 at offset 1
+\= Expect no match
+ X\xdfabcd\=offset=2
+No match
+
+/(?<=x)badutf/utf
+\= Expect UTF-8 errors
+ X\xdfabcd
+Failed: error -8: UTF-8 error: byte 2 top bits not 0x80 at offset 1
+ X\xdfabcd\=offset=1
+Failed: error -8: UTF-8 error: byte 2 top bits not 0x80 at offset 1
+ X\xdfabcd\=offset=2
+Failed: error -8: UTF-8 error: byte 2 top bits not 0x80 at offset 1
+ X\xdfabcd\xdf\=offset=3
+Failed: error -3: UTF-8 error: 1 byte missing at end at offset 6
+\= Expect no match
+ X\xdfabcd\=offset=3
+No match
+
+/(?<=xx)badutf/utf
+\= Expect UTF-8 errors
+ X\xdfabcd
+Failed: error -8: UTF-8 error: byte 2 top bits not 0x80 at offset 1
+ X\xdfabcd\=offset=1
+Failed: error -8: UTF-8 error: byte 2 top bits not 0x80 at offset 1
+ X\xdfabcd\=offset=2
+Failed: error -8: UTF-8 error: byte 2 top bits not 0x80 at offset 1
+ X\xdfabcd\=offset=3
+Failed: error -8: UTF-8 error: byte 2 top bits not 0x80 at offset 1
+
+/(?<=xxxx)badutf/utf
+\= Expect UTF-8 errors
+ X\xdfabcd
+Failed: error -8: UTF-8 error: byte 2 top bits not 0x80 at offset 1
+ X\xdfabcd\=offset=1
+Failed: error -8: UTF-8 error: byte 2 top bits not 0x80 at offset 1
+ X\xdfabcd\=offset=2
+Failed: error -8: UTF-8 error: byte 2 top bits not 0x80 at offset 1
+ X\xdfabcd\=offset=3
+Failed: error -8: UTF-8 error: byte 2 top bits not 0x80 at offset 1
+ X\xdfabc\xdf\=offset=6
+Failed: error -3: UTF-8 error: 1 byte missing at end at offset 5
+ X\xdfabc\xdf\=offset=7
+Failed: error -33: bad offset value
+\= Expect no match
+ X\xdfabcd\=offset=6
+No match
+
/\x{100}/IB,utf
------------------------------------------------------------------
Bra
@@ -448,29 +453,6 @@ First code unit = \xf0
Last code unit = \xab
Subject length lower bound = 1
-# This one is here not because it's different to Perl, but because the way
-# the captured single-byte is displayed. (In Perl it becomes a character, and you
-# can't tell the difference.)
-
-/X(\C)(.*)/utf
- X\x{1234}
- 0: X\x{1234}
- 1: \x{e1}
- 2: \x{88}\x{b4}
- X\nabc
- 0: X\x{0a}abc
- 1: \x{0a}
- 2: abc
-
-# This one is here because Perl gives out a grumbly error message (quite
-# correctly, but that messes up comparisons).
-
-/a\Cb/utf
- *** Failers
-No match
- a\x{100}b
-No match
-
/[^ab\xC0-\xF0]/IB,utf
------------------------------------------------------------------
Bra
@@ -499,8 +481,7 @@ Subject length lower bound = 1
0: \x{100}
\x{1000}
0: \x{1000}
- *** Failers
- 0: *
+\= Expect no match
\x{c0}
No match
\x{f0}
@@ -659,8 +640,6 @@ Subject length lower bound = 1
0: \x{100}
\x{100}Z
0: \x{100}
- *** Failers
-No match
/[\xff]/IB,utf
------------------------------------------------------------------
@@ -750,33 +729,35 @@ Failed: error 106 at offset 15: missing terminating ] for character class
# This tests the stricter UTF-8 check according to RFC 3629.
/X/utf
+\= Expect UTF-8 errors
\x{d800}
Failed: error -16: UTF-8 error: code points 0xd800-0xdfff are not defined at offset 0
- \x{d800}\=no_utf_check
-No match
\x{da00}
Failed: error -16: UTF-8 error: code points 0xd800-0xdfff are not defined at offset 0
- \x{da00}\=no_utf_check
-No match
\x{dfff}
Failed: error -16: UTF-8 error: code points 0xd800-0xdfff are not defined at offset 0
- \x{dfff}\=no_utf_check
-No match
\x{110000}
Failed: error -15: UTF-8 error: code points greater than 0x10ffff are not defined at offset 0
- \x{110000}\=no_utf_check
-No match
\x{2000000}
Failed: error -13: UTF-8 error: 5-byte character is not allowed (RFC 3629) at offset 0
- \x{2000000}\=no_utf_check
-No match
\x{7fffffff}
Failed: error -14: UTF-8 error: 6-byte character is not allowed (RFC 3629) at offset 0
+\= Expect no match
+ \x{d800}\=no_utf_check
+No match
+ \x{da00}\=no_utf_check
+No match
+ \x{dfff}\=no_utf_check
+No match
+ \x{110000}\=no_utf_check
+No match
+ \x{2000000}\=no_utf_check
+No match
\x{7fffffff}\=no_utf_check
No match
/(*UTF8)\x{1234}/
- abcd\x{1234}pqr
+ abcd\x{1234}pqr
0: \x{1234}
/(*CRLF)(*UTF)(*BSR_UNICODE)a\Rb/I
@@ -887,16 +868,19 @@ Subject length lower bound = 3
/a+/utf
a\x{123}aa\=offset=1
0: aa
- a\x{123}aa\=offset=2
-Error -36 (bad UTF-8 offset)
a\x{123}aa\=offset=3
0: aa
a\x{123}aa\=offset=4
0: a
- a\x{123}aa\=offset=5
-No match
+\= Expect bad offset value
a\x{123}aa\=offset=6
Failed: error -33: bad offset value
+\= Expect bad UTF-8 offset
+ a\x{123}aa\=offset=2
+Error -36 (bad UTF-8 offset)
+\= Expect no match
+ a\x{123}aa\=offset=5
+No match
/\x{1234}+/Ii,utf
Capturing subpattern count = 0
@@ -1281,8 +1265,6 @@ Subject length lower bound = 1
0: \x{100}
\x{100}Z
0: \x{100}
- *** Failers
-No match
/[z-\x{100}]/IB,utf
------------------------------------------------------------------
@@ -1467,8 +1449,7 @@ Subject length lower bound = 1
0: \x{105}
\x{109}
0: \x{109}
- ** Failers
-No match
+\= Expect no match
\x{100}
No match
\x{10a}
@@ -1507,8 +1488,7 @@ Subject length lower bound = 1
0: \x{100}
\x{101}
0: \x{101}
- ** Failers
-No match
+\= Expect no match
\x{102}
No match
Y
@@ -1547,7 +1527,16 @@ Last code unit = 'B' (caseless)
Subject length lower bound = 2
/abc/utf,replace=Ã
- abc
+ abc
Failed: error -3: UTF-8 error: 1 byte missing at end
+/(?<=(a)(?-1))x/I,utf
+Capturing subpattern count = 1
+Max lookbehind = 2
+Options: utf
+First code unit = 'x'
+Subject length lower bound = 1
+ a\x80zx\=offset=3
+Failed: error -22: UTF-8 error: isolated byte with 0x80 bit set at offset 1
+
# End of testinput10