diff options
Diffstat (limited to 'testdata/testoutput10')
-rw-r--r-- | testdata/testoutput10 | 229 |
1 files changed, 109 insertions, 120 deletions
diff --git a/testdata/testoutput10 b/testdata/testoutput10 index ef248db..9761f0f 100644 --- a/testdata/testoutput10 +++ b/testdata/testoutput10 @@ -1,70 +1,10 @@ # This set of tests is for UTF-8 support and Unicode property support, with # relevance only for the 8-bit library. -/X(\C{3})/utf - X\x{1234} - 0: X\x{1234} - 1: \x{1234} - -/X(\C{4})/utf - X\x{1234}YZ - 0: X\x{1234}Y - 1: \x{1234}Y - -/X\C*/utf - XYZabcdce - 0: XYZabcdce - -/X\C*?/utf - XYZabcde - 0: X - -/X\C{3,5}/utf - Xabcdefg - 0: Xabcde - X\x{1234} - 0: X\x{1234} - X\x{1234}YZ - 0: X\x{1234}YZ - X\x{1234}\x{512} - 0: X\x{1234}\x{512} - X\x{1234}\x{512}YZ - 0: X\x{1234}\x{512} - -/X\C{3,5}?/utf - Xabcdefg - 0: Xabc - X\x{1234} - 0: X\x{1234} - X\x{1234}YZ - 0: X\x{1234} - X\x{1234}\x{512} - 0: X\x{1234} - -/a\Cb/utf - aXb - 0: aXb - a\nb - 0: a\x{0a}b - -/a\C\Cb/utf - a\x{100}b - 0: a\x{100}b - -/ab\Cde/utf - abXde - 0: abXde - -/a\C\Cb/utf - a\x{100}b - 0: a\x{100}b - ** Failers -No match - a\x{12257}b -No match +# The next 4 patterns have UTF-8 errors /[Ã]/utf -Failed: error -8 at offset 0: UTF-8 error: byte 2 top bits not 0x80 +Failed: error -8 at offset 1: UTF-8 error: byte 2 top bits not 0x80 /Ã/utf Failed: error -3 at offset 0: UTF-8 error: 1 byte missing at end @@ -72,7 +12,13 @@ Failed: error -3 at offset 0: UTF-8 error: 1 byte missing at end /ÃÃÃxxx/utf Failed: error -8 at offset 0: UTF-8 error: byte 2 top bits not 0x80 +/‚‚‚‚‚‚‚Ã/utf +Failed: error -22 at offset 2: UTF-8 error: isolated byte with 0x80 bit set + +# Now test subjects + /badutf/utf +\= Expect UTF-8 errors X\xdf Failed: error -3: UTF-8 error: 1 byte missing at end at offset 1 XX\xef @@ -146,13 +92,14 @@ Failed: error -20: UTF-8 error: overlong 5-byte sequence at offset 0 \xfc\x80\x80\x80\x80\x8f Failed: error -21: UTF-8 error: overlong 6-byte sequence at offset 0 \x80 -Failed: error -22: UTF-8 error: isolated 0x80 byte at offset 0 +Failed: error -22: UTF-8 error: isolated byte with 0x80 bit set at offset 0 \xfe Failed: error -23: UTF-8 error: illegal byte (0xfe or 0xff) at offset 0 \xff Failed: error -23: UTF-8 error: illegal byte (0xfe or 0xff) at offset 0 /badutf/utf +\= Expect UTF-8 errors XX\xfb\x80\x80\x80\x80 Failed: error -13: UTF-8 error: 5-byte character is not allowed (RFC 3629) at offset 2 XX\xfd\x80\x80\x80\x80\x80 @@ -161,6 +108,7 @@ Failed: error -14: UTF-8 error: 6-byte character is not allowed (RFC 3629) at of Failed: error -15: UTF-8 error: code points greater than 0x10ffff are not defined at offset 2 /shortutf/utf +\= Expect UTF-8 errors XX\xdf\=ph Failed: error -3: UTF-8 error: 1 byte missing at end at offset 2 XX\xef\=ph @@ -193,6 +141,7 @@ Failed: error -4: UTF-8 error: 2 bytes missing at end at offset 0 Failed: error -3: UTF-8 error: 1 byte missing at end at offset 0 /anything/utf +\= Expect UTF-8 errors X\xc0\x80 Failed: error -17: UTF-8 error: overlong 2-byte sequence at offset 1 XX\xc1\x8f @@ -209,6 +158,15 @@ Failed: error -21: UTF-8 error: overlong 6-byte sequence at offset 0 Failed: error -23: UTF-8 error: illegal byte (0xfe or 0xff) at offset 0 \xff\x80\x80\x80\x80\x80 Failed: error -23: UTF-8 error: illegal byte (0xfe or 0xff) at offset 0 + \xf8\x88\x80\x80\x80 +Failed: error -13: UTF-8 error: 5-byte character is not allowed (RFC 3629) at offset 0 + \xf9\x87\x80\x80\x80 +Failed: error -13: UTF-8 error: 5-byte character is not allowed (RFC 3629) at offset 0 + \xfc\x84\x80\x80\x80\x80 +Failed: error -14: UTF-8 error: 6-byte character is not allowed (RFC 3629) at offset 0 + \xfd\x83\x80\x80\x80\x80 +Failed: error -14: UTF-8 error: 6-byte character is not allowed (RFC 3629) at offset 0 +\= Expect no match \xc3\x8f No match \xe0\xaf\x80 @@ -219,14 +177,6 @@ No match No match \xf1\x8f\x80\x80 No match - \xf8\x88\x80\x80\x80 -Failed: error -13: UTF-8 error: 5-byte character is not allowed (RFC 3629) at offset 0 - \xf9\x87\x80\x80\x80 -Failed: error -13: UTF-8 error: 5-byte character is not allowed (RFC 3629) at offset 0 - \xfc\x84\x80\x80\x80\x80 -Failed: error -14: UTF-8 error: 6-byte character is not allowed (RFC 3629) at offset 0 - \xfd\x83\x80\x80\x80\x80 -Failed: error -14: UTF-8 error: 6-byte character is not allowed (RFC 3629) at offset 0 \xf8\x88\x80\x80\x80\=no_utf_check No match \xf9\x87\x80\x80\x80\=no_utf_check @@ -235,7 +185,62 @@ No match No match \xfd\x83\x80\x80\x80\x80\=no_utf_check No match + +# Similar tests with offsets +/badutf/utf +\= Expect UTF-8 errors + X\xdfabcd +Failed: error -8: UTF-8 error: byte 2 top bits not 0x80 at offset 1 + X\xdfabcd\=offset=1 +Failed: error -8: UTF-8 error: byte 2 top bits not 0x80 at offset 1 +\= Expect no match + X\xdfabcd\=offset=2 +No match + +/(?<=x)badutf/utf +\= Expect UTF-8 errors + X\xdfabcd +Failed: error -8: UTF-8 error: byte 2 top bits not 0x80 at offset 1 + X\xdfabcd\=offset=1 +Failed: error -8: UTF-8 error: byte 2 top bits not 0x80 at offset 1 + X\xdfabcd\=offset=2 +Failed: error -8: UTF-8 error: byte 2 top bits not 0x80 at offset 1 + X\xdfabcd\xdf\=offset=3 +Failed: error -3: UTF-8 error: 1 byte missing at end at offset 6 +\= Expect no match + X\xdfabcd\=offset=3 +No match + +/(?<=xx)badutf/utf +\= Expect UTF-8 errors + X\xdfabcd +Failed: error -8: UTF-8 error: byte 2 top bits not 0x80 at offset 1 + X\xdfabcd\=offset=1 +Failed: error -8: UTF-8 error: byte 2 top bits not 0x80 at offset 1 + X\xdfabcd\=offset=2 +Failed: error -8: UTF-8 error: byte 2 top bits not 0x80 at offset 1 + X\xdfabcd\=offset=3 +Failed: error -8: UTF-8 error: byte 2 top bits not 0x80 at offset 1 + +/(?<=xxxx)badutf/utf +\= Expect UTF-8 errors + X\xdfabcd +Failed: error -8: UTF-8 error: byte 2 top bits not 0x80 at offset 1 + X\xdfabcd\=offset=1 +Failed: error -8: UTF-8 error: byte 2 top bits not 0x80 at offset 1 + X\xdfabcd\=offset=2 +Failed: error -8: UTF-8 error: byte 2 top bits not 0x80 at offset 1 + X\xdfabcd\=offset=3 +Failed: error -8: UTF-8 error: byte 2 top bits not 0x80 at offset 1 + X\xdfabc\xdf\=offset=6 +Failed: error -3: UTF-8 error: 1 byte missing at end at offset 5 + X\xdfabc\xdf\=offset=7 +Failed: error -33: bad offset value +\= Expect no match + X\xdfabcd\=offset=6 +No match + /\x{100}/IB,utf ------------------------------------------------------------------ Bra @@ -448,29 +453,6 @@ First code unit = \xf0 Last code unit = \xab Subject length lower bound = 1 -# This one is here not because it's different to Perl, but because the way -# the captured single-byte is displayed. (In Perl it becomes a character, and you -# can't tell the difference.) - -/X(\C)(.*)/utf - X\x{1234} - 0: X\x{1234} - 1: \x{e1} - 2: \x{88}\x{b4} - X\nabc - 0: X\x{0a}abc - 1: \x{0a} - 2: abc - -# This one is here because Perl gives out a grumbly error message (quite -# correctly, but that messes up comparisons). - -/a\Cb/utf - *** Failers -No match - a\x{100}b -No match - /[^ab\xC0-\xF0]/IB,utf ------------------------------------------------------------------ Bra @@ -499,8 +481,7 @@ Subject length lower bound = 1 0: \x{100} \x{1000} 0: \x{1000} - *** Failers - 0: * +\= Expect no match \x{c0} No match \x{f0} @@ -659,8 +640,6 @@ Subject length lower bound = 1 0: \x{100} \x{100}Z 0: \x{100} - *** Failers -No match /[\xff]/IB,utf ------------------------------------------------------------------ @@ -750,33 +729,35 @@ Failed: error 106 at offset 15: missing terminating ] for character class # This tests the stricter UTF-8 check according to RFC 3629. /X/utf +\= Expect UTF-8 errors \x{d800} Failed: error -16: UTF-8 error: code points 0xd800-0xdfff are not defined at offset 0 - \x{d800}\=no_utf_check -No match \x{da00} Failed: error -16: UTF-8 error: code points 0xd800-0xdfff are not defined at offset 0 - \x{da00}\=no_utf_check -No match \x{dfff} Failed: error -16: UTF-8 error: code points 0xd800-0xdfff are not defined at offset 0 - \x{dfff}\=no_utf_check -No match \x{110000} Failed: error -15: UTF-8 error: code points greater than 0x10ffff are not defined at offset 0 - \x{110000}\=no_utf_check -No match \x{2000000} Failed: error -13: UTF-8 error: 5-byte character is not allowed (RFC 3629) at offset 0 - \x{2000000}\=no_utf_check -No match \x{7fffffff} Failed: error -14: UTF-8 error: 6-byte character is not allowed (RFC 3629) at offset 0 +\= Expect no match + \x{d800}\=no_utf_check +No match + \x{da00}\=no_utf_check +No match + \x{dfff}\=no_utf_check +No match + \x{110000}\=no_utf_check +No match + \x{2000000}\=no_utf_check +No match \x{7fffffff}\=no_utf_check No match /(*UTF8)\x{1234}/ - abcd\x{1234}pqr + abcd\x{1234}pqr 0: \x{1234} /(*CRLF)(*UTF)(*BSR_UNICODE)a\Rb/I @@ -887,16 +868,19 @@ Subject length lower bound = 3 /a+/utf a\x{123}aa\=offset=1 0: aa - a\x{123}aa\=offset=2 -Error -36 (bad UTF-8 offset) a\x{123}aa\=offset=3 0: aa a\x{123}aa\=offset=4 0: a - a\x{123}aa\=offset=5 -No match +\= Expect bad offset value a\x{123}aa\=offset=6 Failed: error -33: bad offset value +\= Expect bad UTF-8 offset + a\x{123}aa\=offset=2 +Error -36 (bad UTF-8 offset) +\= Expect no match + a\x{123}aa\=offset=5 +No match /\x{1234}+/Ii,utf Capturing subpattern count = 0 @@ -1281,8 +1265,6 @@ Subject length lower bound = 1 0: \x{100} \x{100}Z 0: \x{100} - *** Failers -No match /[z-\x{100}]/IB,utf ------------------------------------------------------------------ @@ -1467,8 +1449,7 @@ Subject length lower bound = 1 0: \x{105} \x{109} 0: \x{109} - ** Failers -No match +\= Expect no match \x{100} No match \x{10a} @@ -1507,8 +1488,7 @@ Subject length lower bound = 1 0: \x{100} \x{101} 0: \x{101} - ** Failers -No match +\= Expect no match \x{102} No match Y @@ -1547,7 +1527,16 @@ Last code unit = 'B' (caseless) Subject length lower bound = 2 /abc/utf,replace=à - abc + abc Failed: error -3: UTF-8 error: 1 byte missing at end +/(?<=(a)(?-1))x/I,utf +Capturing subpattern count = 1 +Max lookbehind = 2 +Options: utf +First code unit = 'x' +Subject length lower bound = 1 + a\x80zx\=offset=3 +Failed: error -22: UTF-8 error: isolated byte with 0x80 bit set at offset 1 + # End of testinput10 |