From 6fd58cfc22b0968e71f67f378555aba74e392847 Mon Sep 17 00:00:00 2001 From: Steve Bennett Date: Thu, 14 Dec 2017 20:27:53 +1000 Subject: utf8: Be more strict at rejecting invalid UTF-8 sequences. RFC 3629 says: Implementations of the decoding algorithm above MUST protect against decoding invalid sequences Signed-off-by: Steve Bennett --- tests/utftcl.test | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'tests/utftcl.test') diff --git a/tests/utftcl.test b/tests/utftcl.test index 33b8933..fac14ce 100644 --- a/tests/utftcl.test +++ b/tests/utftcl.test @@ -74,7 +74,8 @@ test utf-4.2 {Tcl_NumUtfChars: length 1} { test utf-4.3 {Tcl_NumUtfChars: long string} { testnumutfchars [bytestring "abc\xC2\xA2\xe4\xb9\x8e\uA2\u4e4e"] } {7} -test utf-4.4 {Tcl_NumUtfChars: #u0000} { +# This is an invalid utf-8 sequence. Not minimal, so should return 2 +test utf-4.4 {Tcl_NumUtfChars: #u0000} tcl { testnumutfchars [bytestring "\xC0\x80"] } {1} test utf-4.5 {Tcl_NumUtfChars: zero length, calc len} { @@ -86,7 +87,7 @@ test utf-4.6 {Tcl_NumUtfChars: length 1, calc len} { test utf-4.7 {Tcl_NumUtfChars: long string, calc len} { testnumutfchars [bytestring "abc\xC2\xA2\xe4\xb9\x8e\uA2\u4e4e"] 1 } {7} -test utf-4.8 {Tcl_NumUtfChars: #u0000, calc len} { +test utf-4.8 {Tcl_NumUtfChars: #u0000, calc len} tcl { testnumutfchars [bytestring "\xC0\x80"] 1 } {1} -- cgit v1.2.3