summaryrefslogtreecommitdiff
path: root/tests/testsuite/utf8basic.sh
diff options
context:
space:
mode:
Diffstat (limited to 'tests/testsuite/utf8basic.sh')
-rwxr-xr-xtests/testsuite/utf8basic.sh10
1 files changed, 10 insertions, 0 deletions
diff --git a/tests/testsuite/utf8basic.sh b/tests/testsuite/utf8basic.sh
new file mode 100755
index 0000000..625f1ff
--- /dev/null
+++ b/tests/testsuite/utf8basic.sh
@@ -0,0 +1,10 @@
+#/bin/sh
+
+# utf8basic.good originally generated with:
+# uconv --from-code UTF-8 --to-code UTF-8 --from-callback substitute UTF-8-test.txt > utf8basic.good
+# but modified to ignore UTF-16 surrogates which are apparently illegal. We return multiple replacement
+# characters there, but the spec apparently says we are only supposed to return 1 per UTF-16 surrogate
+# there are comments in the spec about "security vulnerability" but we always check if we're at the
+# end of our buffer before continuing processing each byte (shouldn't all decoders do this?), so there
+# shouldn't be a problem. Ignoring the UTF-16 non-conformance for now.
+../utf8norm < UTF-8-test.txt