1 files changed, 109 insertions, 120 deletions
diff --git a/testdata/testoutput10 b/testdata/testoutput10
index ef248db..9761f0f 100644
--- a/testdata/testoutput10
+++ b/testdata/testoutput10
@@ -1,70 +1,10 @@
 # This set of tests is for UTF-8 support and Unicode property support, with
 # relevance only for the 8-bit library.
 
-/X(\C{3})/utf
-    X\x{1234}
- 0: X\x{1234}
- 1: \x{1234}
-
-/X(\C{4})/utf
-    X\x{1234}YZ
- 0: X\x{1234}Y
- 1: \x{1234}Y
-
-/X\C*/utf
-    XYZabcdce
- 0: XYZabcdce
-
-/X\C*?/utf
-    XYZabcde
- 0: X
-
-/X\C{3,5}/utf
-    Xabcdefg
- 0: Xabcde
-    X\x{1234}
- 0: X\x{1234}
-    X\x{1234}YZ
- 0: X\x{1234}YZ
-    X\x{1234}\x{512}
- 0: X\x{1234}\x{512}
-    X\x{1234}\x{512}YZ
- 0: X\x{1234}\x{512}
-
-/X\C{3,5}?/utf
-    Xabcdefg
- 0: Xabc
-    X\x{1234}
- 0: X\x{1234}
-    X\x{1234}YZ
- 0: X\x{1234}
-    X\x{1234}\x{512}
- 0: X\x{1234}
-
-/a\Cb/utf
-    aXb
- 0: aXb
-    a\nb
- 0: a\x{0a}b
-
-/a\C\Cb/utf
-    a\x{100}b
- 0: a\x{100}b
-
-/ab\Cde/utf
-    abXde
- 0: abXde
-
-/a\C\Cb/utf
-    a\x{100}b
- 0: a\x{100}b
-    ** Failers
-No match
-    a\x{12257}b
-No match
+# The next 4 patterns have UTF-8 errors
 
 /[Г]/utf
-Failed: error -8 at offset 0: UTF-8 error: byte 2 top bits not 0x80
+Failed: error -8 at offset 1: UTF-8 error: byte 2 top bits not 0x80
 
 /Г/utf
 Failed: error -3 at offset 0: UTF-8 error: 1 byte missing at end
@@ -72,7 +12,13 @@ Failed: error -3 at offset 0: UTF-8 error: 1 byte missing at end
 /ГГГxxx/utf
 Failed: error -8 at offset 0: UTF-8 error: byte 2 top bits not 0x80
 
+/Г‚‚‚‚‚‚‚‚Г/utf
+Failed: error -22 at offset 2: UTF-8 error: isolated byte with 0x80 bit set
+
+# Now test subjects
+
 /badutf/utf
+\= Expect UTF-8 errors
     X\xdf
 Failed: error -3: UTF-8 error: 1 byte missing at end at offset 1
     XX\xef
@@ -146,13 +92,14 @@ Failed: error -20: UTF-8 error: overlong 5-byte sequence at offset 0
     \xfc\x80\x80\x80\x80\x8f
 Failed: error -21: UTF-8 error: overlong 6-byte sequence at offset 0
     \x80
-Failed: error -22: UTF-8 error: isolated 0x80 byte at offset 0
+Failed: error -22: UTF-8 error: isolated byte with 0x80 bit set at offset 0
     \xfe
 Failed: error -23: UTF-8 error: illegal byte (0xfe or 0xff) at offset 0
     \xff
 Failed: error -23: UTF-8 error: illegal byte (0xfe or 0xff) at offset 0
 
 /badutf/utf
+\= Expect UTF-8 errors
     XX\xfb\x80\x80\x80\x80
 Failed: error -13: UTF-8 error: 5-byte character is not allowed (RFC 3629) at offset 2
     XX\xfd\x80\x80\x80\x80\x80
@@ -161,6 +108,7 @@ Failed: error -14: UTF-8 error: 6-byte character is not allowed (RFC 3629) at of
 Failed: error -15: UTF-8 error: code points greater than 0x10ffff are not defined at offset 2
 
 /shortutf/utf
+\= Expect UTF-8 errors
     XX\xdf\=ph
 Failed: error -3: UTF-8 error: 1 byte missing at end at offset 2
     XX\xef\=ph
@@ -193,6 +141,7 @@ Failed: error -4: UTF-8 error: 2 bytes missing at end at offset 0
 Failed: error -3: UTF-8 error: 1 byte missing at end at offset 0
 
 /anything/utf
+\= Expect UTF-8 errors
     X\xc0\x80
 Failed: error -17: UTF-8 error: overlong 2-byte sequence at offset 1
     XX\xc1\x8f
@@ -209,6 +158,15 @@ Failed: error -21: UTF-8 error: overlong 6-byte sequence at offset 0
 Failed: error -23: UTF-8 error: illegal byte (0xfe or 0xff) at offset 0
     \xff\x80\x80\x80\x80\x80
 Failed: error -23: UTF-8 error: illegal byte (0xfe or 0xff) at offset 0
+    \xf8\x88\x80\x80\x80
+Failed: error -13: UTF-8 error: 5-byte character is not allowed (RFC 3629) at offset 0
+    \xf9\x87\x80\x80\x80
+Failed: error -13: UTF-8 error: 5-byte character is not allowed (RFC 3629) at offset 0
+    \xfc\x84\x80\x80\x80\x80
+Failed: error -14: UTF-8 error: 6-byte character is not allowed (RFC 3629) at offset 0
+    \xfd\x83\x80\x80\x80\x80
+Failed: error -14: UTF-8 error: 6-byte character is not allowed (RFC 3629) at offset 0
+\= Expect no match
     \xc3\x8f
 No match
     \xe0\xaf\x80
@@ -219,14 +177,6 @@ No match
 No match
     \xf1\x8f\x80\x80
 No match
-    \xf8\x88\x80\x80\x80
-Failed: error -13: UTF-8 error: 5-byte character is not allowed (RFC 3629) at offset 0
-    \xf9\x87\x80\x80\x80
-Failed: error -13: UTF-8 error: 5-byte character is not allowed (RFC 3629) at offset 0
-    \xfc\x84\x80\x80\x80\x80
-Failed: error -14: UTF-8 error: 6-byte character is not allowed (RFC 3629) at offset 0
-    \xfd\x83\x80\x80\x80\x80
-Failed: error -14: UTF-8 error: 6-byte character is not allowed (RFC 3629) at offset 0
     \xf8\x88\x80\x80\x80\=no_utf_check
 No match
     \xf9\x87\x80\x80\x80\=no_utf_check
@@ -235,7 +185,62 @@ No match
 No match
     \xfd\x83\x80\x80\x80\x80\=no_utf_check
 No match
+    
+# Similar tests with offsets
 
+/badutf/utf
+\= Expect UTF-8 errors
+    X\xdfabcd
+Failed: error -8: UTF-8 error: byte 2 top bits not 0x80 at offset 1
+    X\xdfabcd\=offset=1
+Failed: error -8: UTF-8 error: byte 2 top bits not 0x80 at offset 1
+\= Expect no match
+    X\xdfabcd\=offset=2
+No match
+
+/(?<=x)badutf/utf
+\= Expect UTF-8 errors
+    X\xdfabcd
+Failed: error -8: UTF-8 error: byte 2 top bits not 0x80 at offset 1
+    X\xdfabcd\=offset=1
+Failed: error -8: UTF-8 error: byte 2 top bits not 0x80 at offset 1
+    X\xdfabcd\=offset=2
+Failed: error -8: UTF-8 error: byte 2 top bits not 0x80 at offset 1
+    X\xdfabcd\xdf\=offset=3
+Failed: error -3: UTF-8 error: 1 byte missing at end at offset 6
+\= Expect no match
+    X\xdfabcd\=offset=3
+No match
+
+/(?<=xx)badutf/utf
+\= Expect UTF-8 errors
+    X\xdfabcd
+Failed: error -8: UTF-8 error: byte 2 top bits not 0x80 at offset 1
+    X\xdfabcd\=offset=1
+Failed: error -8: UTF-8 error: byte 2 top bits not 0x80 at offset 1
+    X\xdfabcd\=offset=2
+Failed: error -8: UTF-8 error: byte 2 top bits not 0x80 at offset 1
+    X\xdfabcd\=offset=3
+Failed: error -8: UTF-8 error: byte 2 top bits not 0x80 at offset 1
+
+/(?<=xxxx)badutf/utf
+\= Expect UTF-8 errors
+    X\xdfabcd
+Failed: error -8: UTF-8 error: byte 2 top bits not 0x80 at offset 1
+    X\xdfabcd\=offset=1
+Failed: error -8: UTF-8 error: byte 2 top bits not 0x80 at offset 1
+    X\xdfabcd\=offset=2
+Failed: error -8: UTF-8 error: byte 2 top bits not 0x80 at offset 1
+    X\xdfabcd\=offset=3
+Failed: error -8: UTF-8 error: byte 2 top bits not 0x80 at offset 1
+    X\xdfabc\xdf\=offset=6
+Failed: error -3: UTF-8 error: 1 byte missing at end at offset 5
+    X\xdfabc\xdf\=offset=7
+Failed: error -33: bad offset value
+\= Expect no match
+    X\xdfabcd\=offset=6
+No match
+ 
 /\x{100}/IB,utf
 ------------------------------------------------------------------
         Bra
@@ -448,29 +453,6 @@ First code unit = \xf0
 Last code unit = \xab
 Subject length lower bound = 1
 
-# This one is here not because it's different to Perl, but because the way
-# the captured single-byte is displayed. (In Perl it becomes a character, and you
-# can't tell the difference.)
-
-/X(\C)(.*)/utf
-    X\x{1234}
- 0: X\x{1234}
- 1: \x{e1}
- 2: \x{88}\x{b4}
-    X\nabc
- 0: X\x{0a}abc
- 1: \x{0a}
- 2: abc
-
-# This one is here because Perl gives out a grumbly error message (quite
-# correctly, but that messes up comparisons).
-
-/a\Cb/utf
-    *** Failers
-No match
-    a\x{100}b
-No match
-
 /[^ab\xC0-\xF0]/IB,utf
 ------------------------------------------------------------------
         Bra
@@ -499,8 +481,7 @@ Subject length lower bound = 1
  0: \x{100}
     \x{1000}
  0: \x{1000}
-    *** Failers
- 0: *
+\= Expect no match
     \x{c0}
 No match
     \x{f0}
@@ -659,8 +640,6 @@ Subject length lower bound = 1
  0: \x{100}
     \x{100}Z
  0: \x{100}
-    *** Failers
-No match
 
 /[\xff]/IB,utf
 ------------------------------------------------------------------
@@ -750,33 +729,35 @@ Failed: error 106 at offset 15: missing terminating ] for character class
 # This tests the stricter UTF-8 check according to RFC 3629.
 
 /X/utf
+\= Expect UTF-8 errors
     \x{d800}
 Failed: error -16: UTF-8 error: code points 0xd800-0xdfff are not defined at offset 0
-    \x{d800}\=no_utf_check
-No match
     \x{da00}
 Failed: error -16: UTF-8 error: code points 0xd800-0xdfff are not defined at offset 0
-    \x{da00}\=no_utf_check
-No match
     \x{dfff}
 Failed: error -16: UTF-8 error: code points 0xd800-0xdfff are not defined at offset 0
-    \x{dfff}\=no_utf_check
-No match
     \x{110000}
 Failed: error -15: UTF-8 error: code points greater than 0x10ffff are not defined at offset 0
-    \x{110000}\=no_utf_check
-No match
     \x{2000000}
 Failed: error -13: UTF-8 error: 5-byte character is not allowed (RFC 3629) at offset 0
-    \x{2000000}\=no_utf_check
-No match
     \x{7fffffff}
 Failed: error -14: UTF-8 error: 6-byte character is not allowed (RFC 3629) at offset 0
+\= Expect no match
+    \x{d800}\=no_utf_check
+No match
+    \x{da00}\=no_utf_check
+No match
+    \x{dfff}\=no_utf_check
+No match
+    \x{110000}\=no_utf_check
+No match
+    \x{2000000}\=no_utf_check
+No match
     \x{7fffffff}\=no_utf_check
 No match
 
 /(*UTF8)\x{1234}/
-  abcd\x{1234}pqr
+    abcd\x{1234}pqr
  0: \x{1234}
 
 /(*CRLF)(*UTF)(*BSR_UNICODE)a\Rb/I
@@ -887,16 +868,19 @@ Subject length lower bound = 3
 /a+/utf
     a\x{123}aa\=offset=1
  0: aa
-    a\x{123}aa\=offset=2
-Error -36 (bad UTF-8 offset)
     a\x{123}aa\=offset=3
  0: aa
     a\x{123}aa\=offset=4
  0: a
-    a\x{123}aa\=offset=5
-No match
+\= Expect bad offset value
     a\x{123}aa\=offset=6
 Failed: error -33: bad offset value
+\= Expect bad UTF-8 offset     
+    a\x{123}aa\=offset=2
+Error -36 (bad UTF-8 offset)
+\= Expect no match
+    a\x{123}aa\=offset=5
+No match
 
 /\x{1234}+/Ii,utf
 Capturing subpattern count = 0
@@ -1281,8 +1265,6 @@ Subject length lower bound = 1
  0: \x{100}
     \x{100}Z
  0: \x{100}
-    *** Failers 
-No match
 
 /[z-\x{100}]/IB,utf
 ------------------------------------------------------------------
@@ -1467,8 +1449,7 @@ Subject length lower bound = 1
  0: \x{105}
     \x{109}  
  0: \x{109}
-    ** Failers
-No match
+\= Expect no match
     \x{100}
 No match
     \x{10a} 
@@ -1507,8 +1488,7 @@ Subject length lower bound = 1
  0: \x{100}
     \x{101} 
  0: \x{101}
-    ** Failers
-No match
+\= Expect no match
     \x{102}
 No match
     Y
@@ -1547,7 +1527,16 @@ Last code unit = 'B' (caseless)
 Subject length lower bound = 2
 
 /abc/utf,replace=Г
-   abc
+    abc
 Failed: error -3: UTF-8 error: 1 byte missing at end
 
+/(?<=(a)(?-1))x/I,utf
+Capturing subpattern count = 1
+Max lookbehind = 2
+Options: utf
+First code unit = 'x'
+Subject length lower bound = 1
+    a\x80zx\=offset=3
+Failed: error -22: UTF-8 error: isolated byte with 0x80 bit set at offset 1
+
 # End of testinput10