From 370e2cb260e7d8d063e00f6917e76952131ec741 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Zbigniew=20J=C4=99drzejewski-Szmek?= <zbyszek@in.waw.pl>
Date: Sat, 2 Jun 2018 17:08:46 +0200
Subject: test-ellipsize: add tests for ellipsize_mem, fix bugs

First, ellipsize() and ellipsize_mem() should not read past the input
buffer. Those functions take an explicit length for the input data, so they
should not assume that the buffer is terminated by a nul.

Second, ellipsization was off in various cases where wide on multi-byte
characters were used.

We had some basic test for ellipsize(), but apparently it wasn't enough to
catch more serious cases.

Should fix https://bugs.chromium.org/p/oss-fuzz/issues/detail?id=8686.
---
 src/basic/string-util.c | 61 ++++++++++++++++++++++++++++++-------------------
 1 file changed, 38 insertions(+), 23 deletions(-)

(limited to 'src/basic/string-util.c')

diff --git a/src/basic/string-util.c b/src/basic/string-util.c
index bdaa42084..b53686a11 100644
--- a/src/basic/string-util.c
+++ b/src/basic/string-util.c
@@ -473,8 +473,8 @@ static int write_ellipsis(char *buf, bool unicode) {
 }
 
 static char *ascii_ellipsize_mem(const char *s, size_t old_length, size_t new_length, unsigned percent) {
-        size_t x, need_space;
-        char *r;
+        size_t x, need_space, suffix_len;
+        char *t;
 
         assert(s);
         assert(percent <= 100);
@@ -510,8 +510,8 @@ static char *ascii_ellipsize_mem(const char *s, size_t old_length, size_t new_le
          * either for the UTF-8 encoded character or for three ASCII characters. */
         need_space = is_locale_utf8() ? 1 : 3;
 
-        r = new(char, new_length+3);
-        if (!r)
+        t = new(char, new_length+3);
+        if (!t)
                 return NULL;
 
         assert(new_length >= need_space);
@@ -519,13 +519,13 @@ static char *ascii_ellipsize_mem(const char *s, size_t old_length, size_t new_le
         x = ((new_length - need_space) * percent + 50) / 100;
         assert(x <= new_length - need_space);
 
-        memcpy(r, s, x);
-        write_ellipsis(r + x, false);
-        memcpy(r + x + 3,
-               s + old_length - (new_length - x - need_space),
-               new_length - x - need_space + 1);
+        memcpy(t, s, x);
+        write_ellipsis(t + x, false);
+        suffix_len = new_length - x - need_space;
+        memcpy(t + x + 3, s + old_length - suffix_len, suffix_len);
+        *(t + x + 3 + suffix_len) = '\0';
 
-        return r;
+        return t;
 }
 
 char *ellipsize_mem(const char *s, size_t old_length, size_t new_length, unsigned percent) {
@@ -563,35 +563,49 @@ char *ellipsize_mem(const char *s, size_t old_length, size_t new_length, unsigne
         assert(x <= new_length - 1);
 
         k = 0;
-        for (i = s; k < x && i < s + old_length; i = utf8_next_char(i)) {
+        for (i = s; i < s + old_length; i = utf8_next_char(i)) {
                 char32_t c;
+                int w;
 
                 r = utf8_encoded_to_unichar(i, &c);
                 if (r < 0)
                         return NULL;
-                k += unichar_iswide(c) ? 2 : 1;
-        }
 
-        if (k > x) /* last character was wide and went over quota */
-                x++;
+                w = unichar_iswide(c) ? 2 : 1;
+                if (k + w <= x)
+                        k += w;
+                else
+                        break;
+        }
 
-        for (j = s + old_length; k < new_length && j > i; ) {
+        for (j = s + old_length; j > i; ) {
                 char32_t c;
+                int w;
+                const char *jj;
 
-                j = utf8_prev_char(j);
-                r = utf8_encoded_to_unichar(j, &c);
+                jj = utf8_prev_char(j);
+                r = utf8_encoded_to_unichar(jj, &c);
                 if (r < 0)
                         return NULL;
-                k += unichar_iswide(c) ? 2 : 1;
+
+                w = unichar_iswide(c) ? 2 : 1;
+                if (k + w <= new_length) {
+                        k += w;
+                        j = jj;
+                } else
+                        break;
         }
         assert(i <= j);
 
         /* we don't actually need to ellipsize */
         if (i == j)
-                return memdup(s, old_length + 1);
+                return memdup_suffix0(s, old_length);
 
-        /* make space for ellipsis */
-        j = utf8_next_char(j);
+        /* make space for ellipsis, if possible */
+        if (j < s + old_length)
+                j = utf8_next_char(j);
+        else if (i > s)
+                i = utf8_prev_char(i);
 
         len = i - s;
         len2 = s + old_length - j;
@@ -606,7 +620,8 @@ char *ellipsize_mem(const char *s, size_t old_length, size_t new_length, unsigne
 
         memcpy(e, s, len);
         write_ellipsis(e + len, true);
-        memcpy(e + len + 3, j, len2 + 1);
+        memcpy(e + len + 3, j, len2);
+        *(e + len + 3 + len2) = '\0';
 
         return e;
 }
-- 
cgit v1.2.3