From 370e2cb260e7d8d063e00f6917e76952131ec741 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Zbigniew=20J=C4=99drzejewski-Szmek?= Date: Sat, 2 Jun 2018 17:08:46 +0200 Subject: test-ellipsize: add tests for ellipsize_mem, fix bugs First, ellipsize() and ellipsize_mem() should not read past the input buffer. Those functions take an explicit length for the input data, so they should not assume that the buffer is terminated by a nul. Second, ellipsization was off in various cases where wide on multi-byte characters were used. We had some basic test for ellipsize(), but apparently it wasn't enough to catch more serious cases. Should fix https://bugs.chromium.org/p/oss-fuzz/issues/detail?id=8686. --- src/basic/string-util.c | 61 ++++++++++++++++++++++++++++++------------------- 1 file changed, 38 insertions(+), 23 deletions(-) (limited to 'src/basic/string-util.c') diff --git a/src/basic/string-util.c b/src/basic/string-util.c index bdaa42084..b53686a11 100644 --- a/src/basic/string-util.c +++ b/src/basic/string-util.c @@ -473,8 +473,8 @@ static int write_ellipsis(char *buf, bool unicode) { } static char *ascii_ellipsize_mem(const char *s, size_t old_length, size_t new_length, unsigned percent) { - size_t x, need_space; - char *r; + size_t x, need_space, suffix_len; + char *t; assert(s); assert(percent <= 100); @@ -510,8 +510,8 @@ static char *ascii_ellipsize_mem(const char *s, size_t old_length, size_t new_le * either for the UTF-8 encoded character or for three ASCII characters. */ need_space = is_locale_utf8() ? 1 : 3; - r = new(char, new_length+3); - if (!r) + t = new(char, new_length+3); + if (!t) return NULL; assert(new_length >= need_space); @@ -519,13 +519,13 @@ static char *ascii_ellipsize_mem(const char *s, size_t old_length, size_t new_le x = ((new_length - need_space) * percent + 50) / 100; assert(x <= new_length - need_space); - memcpy(r, s, x); - write_ellipsis(r + x, false); - memcpy(r + x + 3, - s + old_length - (new_length - x - need_space), - new_length - x - need_space + 1); + memcpy(t, s, x); + write_ellipsis(t + x, false); + suffix_len = new_length - x - need_space; + memcpy(t + x + 3, s + old_length - suffix_len, suffix_len); + *(t + x + 3 + suffix_len) = '\0'; - return r; + return t; } char *ellipsize_mem(const char *s, size_t old_length, size_t new_length, unsigned percent) { @@ -563,35 +563,49 @@ char *ellipsize_mem(const char *s, size_t old_length, size_t new_length, unsigne assert(x <= new_length - 1); k = 0; - for (i = s; k < x && i < s + old_length; i = utf8_next_char(i)) { + for (i = s; i < s + old_length; i = utf8_next_char(i)) { char32_t c; + int w; r = utf8_encoded_to_unichar(i, &c); if (r < 0) return NULL; - k += unichar_iswide(c) ? 2 : 1; - } - if (k > x) /* last character was wide and went over quota */ - x++; + w = unichar_iswide(c) ? 2 : 1; + if (k + w <= x) + k += w; + else + break; + } - for (j = s + old_length; k < new_length && j > i; ) { + for (j = s + old_length; j > i; ) { char32_t c; + int w; + const char *jj; - j = utf8_prev_char(j); - r = utf8_encoded_to_unichar(j, &c); + jj = utf8_prev_char(j); + r = utf8_encoded_to_unichar(jj, &c); if (r < 0) return NULL; - k += unichar_iswide(c) ? 2 : 1; + + w = unichar_iswide(c) ? 2 : 1; + if (k + w <= new_length) { + k += w; + j = jj; + } else + break; } assert(i <= j); /* we don't actually need to ellipsize */ if (i == j) - return memdup(s, old_length + 1); + return memdup_suffix0(s, old_length); - /* make space for ellipsis */ - j = utf8_next_char(j); + /* make space for ellipsis, if possible */ + if (j < s + old_length) + j = utf8_next_char(j); + else if (i > s) + i = utf8_prev_char(i); len = i - s; len2 = s + old_length - j; @@ -606,7 +620,8 @@ char *ellipsize_mem(const char *s, size_t old_length, size_t new_length, unsigne memcpy(e, s, len); write_ellipsis(e + len, true); - memcpy(e + len + 3, j, len2 + 1); + memcpy(e + len + 3, j, len2); + *(e + len + 3 + len2) = '\0'; return e; } -- cgit v1.2.3