terminal: add parser state-machine

The term-parser is used to parse any input from TTY-clients. It reads CSI, DCS, OSC and ST control sequences and normal escape sequences. It doesn't do anything with the parsed data besides detecting the sequence and returning it. The caller has to react to them. The parser also comes with its own UTF-8 helpers. The reason for that is that we don't want to assert() or hard-fail on parsing errors. Instead, we treat any invalid UTF-8 sequences as ISO-8859-1. This allows pasting invalid data into a terminal (which cannot be controlled through the TTY, anyway) and we still deal with it in a proper manner. This is _required_ for 8-bit and 7-bit DEC modes (including the g0-g3 mappings), so it's not just an ugly fallback because we can (it's still horribly ugly but at least we have an excuse).
author: David Herrmann <dh.herrmann@gmail.com> 2014-06-15 14:50:00 +0200
committer: David Herrmann <dh.herrmann@gmail.com> 2014-07-18 12:53:41 +0200
commit: 1c9633d669948155455e29b0c6e770995a8b1ca3 (patch)
tree: 9fac4ba6ccdd564b36caa2363999667aa601f182 /src/libsystemd-terminal/test-term-parser.c
parent: 28622e8f5b28412d97bf2f3a5df49c419be1e2c5 (diff)
1 files changed, 143 insertions, 0 deletions
diff --git a/src/libsystemd-terminal/test-term-parser.c b/src/libsystemd-terminal/test-term-parser.c
new file mode 100644
index 000000000..ed16f5f27
--- /dev/null
+++ b/src/libsystemd-terminal/test-term-parser.c
@@ -0,0 +1,143 @@
+/*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
+/***
+  This file is part of systemd.
+
+  Copyright (C) 2014 David Herrmann <dh.herrmann@gmail.com>
+
+  systemd is free software; you can redistribute it and/or modify it
+  under the terms of the GNU Lesser General Public License as published by
+  the Free Software Foundation; either version 2.1 of the License, or
+  (at your option) any later version.
+
+  systemd is distributed in the hope that it will be useful, but
+  WITHOUT ANY WARRANTY; without even the implied warranty of
+  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+  Lesser General Public License for more details.
+
+  You should have received a copy of the GNU Lesser General Public License
+  along with systemd; If not, see <http://www.gnu.org/licenses/>.
+***/
+
+/*
+ * Terminal Parser Tests
+ */
+
+#include <assert.h>
+#include <errno.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include "macro.h"
+#include "term-internal.h"
+#include "util.h"
+
+static void test_term_utf8_invalid(void) {
+        term_utf8 p = { };
+        const uint32_t *res;
+        size_t len;
+
+        res = term_utf8_decode(NULL, NULL, 0);
+        assert_se(res == NULL);
+
+        res = term_utf8_decode(&p, NULL, 0);
+        assert_se(res != NULL);
+
+        len = 5;
+        res = term_utf8_decode(NULL, &len, 0);
+        assert_se(res == NULL);
+        assert_se(len == 0);
+
+        len = 5;
+        res = term_utf8_decode(&p, &len, 0);
+        assert_se(res != NULL);
+        assert_se(len == 1);
+
+        len = 5;
+        res = term_utf8_decode(&p, &len, 0xCf);
+        assert_se(res == NULL);
+        assert_se(len == 0);
+
+        len = 5;
+        res = term_utf8_decode(&p, &len, 0x0);
+        assert_se(res != NULL);
+        assert_se(len == 2);
+}
+
+static void test_term_utf8_range(void) {
+        term_utf8 p = { };
+        const uint32_t *res;
+        char u8[4];
+        uint32_t i, j;
+        size_t ulen, len;
+
+        /* Convert all ucs-4 chars to utf-8 and back */
+
+        for (i = 0; i < 0x10FFFF; ++i) {
+                ulen = term_utf8_encode(u8, i);
+                if (!ulen)
+                        continue;
+
+                for (j = 0; j < ulen; ++j) {
+                        res = term_utf8_decode(&p, &len, u8[j]);
+                        if (!res) {
+                                assert_se(j + 1 != ulen);
+                                continue;
+                        }
+
+                        assert_se(j + 1 == ulen);
+                        assert_se(len == 1 && *res == i);
+                        assert_se(i <= 127 || ulen >= 2);
+                }
+        }
+}
+
+static void test_term_utf8_mix(void) {
+        static const char source[] = {
+                0x00,                           /* normal 0 */
+                0xC0, 0x80,                     /* overlong 0 */
+                0xC0, 0x81,                     /* overlong 1 */
+                0xE0, 0x80, 0x81,               /* overlong 1 */
+                0xF0, 0x80, 0x80, 0x81,         /* overlong 1 */
+                0xC0, 0x00,                     /* invalid continuation */
+                0xC0, 0xC0, 0x81,               /* invalid continuation with a following overlong 1 */
+                0xF8, 0x80, 0x80, 0x80, 0x81,   /* overlong 1 with 5 bytes */
+                0xE0, 0x80, 0xC0, 0x81,         /* invalid 3-byte followed by valid 2-byte */
+                0xF0, 0x80, 0x80, 0xC0, 0x81,   /* invalid 4-byte followed by valid 2-byte */
+        };
+        static const uint32_t result[] = {
+                0x0000,
+                0x0000,
+                0x0001,
+                0x0001,
+                0x0001,
+                0x00C0, 0x0000,
+                0x00C0, 0x0001,
+                0x00F8, 0x0080, 0x0080, 0x0080, 0x0081,
+                0x00E0, 0x0080, 0x0001,
+                0x00F0, 0x0080, 0x0080, 0x0001,
+        };
+        term_utf8 p = { };
+        const uint32_t *res;
+        unsigned int i, j;
+        size_t len;
+
+        for (i = 0, j = 0; i < sizeof(source); ++i) {
+                res = term_utf8_decode(&p, &len, source[i]);
+                if (!res)
+                        continue;
+
+                assert_se(j + len <= ELEMENTSOF(result));
+                assert_se(!memcmp(res, &result[j], sizeof(uint32_t) * len));
+                j += len;
+        }
+
+        assert_se(j == ELEMENTSOF(result));
+}
+
+int main(int argc, char *argv[]) {
+        test_term_utf8_invalid();
+        test_term_utf8_range();
+        test_term_utf8_mix();
+
+        return 0;
+}
author	David Herrmann <dh.herrmann@gmail.com>	2014-06-15 14:50:00 +0200
committer	David Herrmann <dh.herrmann@gmail.com>	2014-07-18 12:53:41 +0200
commit	1c9633d669948155455e29b0c6e770995a8b1ca3 (patch)
tree	9fac4ba6ccdd564b36caa2363999667aa601f182 /src/libsystemd-terminal/test-term-parser.c
parent	28622e8f5b28412d97bf2f3a5df49c419be1e2c5 (diff)