summaryrefslogtreecommitdiff
path: root/src/basic/string-util.c
diff options
context:
space:
mode:
authorZbigniew Jędrzejewski-Szmek <zbyszek@in.waw.pl>2018-06-02 17:08:46 +0200
committerSven Eden <yamakuzure@gmx.net>2018-08-24 16:47:08 +0200
commit370e2cb260e7d8d063e00f6917e76952131ec741 (patch)
treecb81aad6294e95ecd3b474c538e29e06a57564dd /src/basic/string-util.c
parentf39f1b81b67bbe5137bdc87e76076407bd77f0c3 (diff)
test-ellipsize: add tests for ellipsize_mem, fix bugs
First, ellipsize() and ellipsize_mem() should not read past the input buffer. Those functions take an explicit length for the input data, so they should not assume that the buffer is terminated by a nul. Second, ellipsization was off in various cases where wide on multi-byte characters were used. We had some basic test for ellipsize(), but apparently it wasn't enough to catch more serious cases. Should fix https://bugs.chromium.org/p/oss-fuzz/issues/detail?id=8686.
Diffstat (limited to 'src/basic/string-util.c')
-rw-r--r--src/basic/string-util.c61
1 files changed, 38 insertions, 23 deletions
diff --git a/src/basic/string-util.c b/src/basic/string-util.c
index bdaa42084..b53686a11 100644
--- a/src/basic/string-util.c
+++ b/src/basic/string-util.c
@@ -473,8 +473,8 @@ static int write_ellipsis(char *buf, bool unicode) {
}
static char *ascii_ellipsize_mem(const char *s, size_t old_length, size_t new_length, unsigned percent) {
- size_t x, need_space;
- char *r;
+ size_t x, need_space, suffix_len;
+ char *t;
assert(s);
assert(percent <= 100);
@@ -510,8 +510,8 @@ static char *ascii_ellipsize_mem(const char *s, size_t old_length, size_t new_le
* either for the UTF-8 encoded character or for three ASCII characters. */
need_space = is_locale_utf8() ? 1 : 3;
- r = new(char, new_length+3);
- if (!r)
+ t = new(char, new_length+3);
+ if (!t)
return NULL;
assert(new_length >= need_space);
@@ -519,13 +519,13 @@ static char *ascii_ellipsize_mem(const char *s, size_t old_length, size_t new_le
x = ((new_length - need_space) * percent + 50) / 100;
assert(x <= new_length - need_space);
- memcpy(r, s, x);
- write_ellipsis(r + x, false);
- memcpy(r + x + 3,
- s + old_length - (new_length - x - need_space),
- new_length - x - need_space + 1);
+ memcpy(t, s, x);
+ write_ellipsis(t + x, false);
+ suffix_len = new_length - x - need_space;
+ memcpy(t + x + 3, s + old_length - suffix_len, suffix_len);
+ *(t + x + 3 + suffix_len) = '\0';
- return r;
+ return t;
}
char *ellipsize_mem(const char *s, size_t old_length, size_t new_length, unsigned percent) {
@@ -563,35 +563,49 @@ char *ellipsize_mem(const char *s, size_t old_length, size_t new_length, unsigne
assert(x <= new_length - 1);
k = 0;
- for (i = s; k < x && i < s + old_length; i = utf8_next_char(i)) {
+ for (i = s; i < s + old_length; i = utf8_next_char(i)) {
char32_t c;
+ int w;
r = utf8_encoded_to_unichar(i, &c);
if (r < 0)
return NULL;
- k += unichar_iswide(c) ? 2 : 1;
- }
- if (k > x) /* last character was wide and went over quota */
- x++;
+ w = unichar_iswide(c) ? 2 : 1;
+ if (k + w <= x)
+ k += w;
+ else
+ break;
+ }
- for (j = s + old_length; k < new_length && j > i; ) {
+ for (j = s + old_length; j > i; ) {
char32_t c;
+ int w;
+ const char *jj;
- j = utf8_prev_char(j);
- r = utf8_encoded_to_unichar(j, &c);
+ jj = utf8_prev_char(j);
+ r = utf8_encoded_to_unichar(jj, &c);
if (r < 0)
return NULL;
- k += unichar_iswide(c) ? 2 : 1;
+
+ w = unichar_iswide(c) ? 2 : 1;
+ if (k + w <= new_length) {
+ k += w;
+ j = jj;
+ } else
+ break;
}
assert(i <= j);
/* we don't actually need to ellipsize */
if (i == j)
- return memdup(s, old_length + 1);
+ return memdup_suffix0(s, old_length);
- /* make space for ellipsis */
- j = utf8_next_char(j);
+ /* make space for ellipsis, if possible */
+ if (j < s + old_length)
+ j = utf8_next_char(j);
+ else if (i > s)
+ i = utf8_prev_char(i);
len = i - s;
len2 = s + old_length - j;
@@ -606,7 +620,8 @@ char *ellipsize_mem(const char *s, size_t old_length, size_t new_length, unsigne
memcpy(e, s, len);
write_ellipsis(e + len, true);
- memcpy(e + len + 3, j, len2 + 1);
+ memcpy(e + len + 3, j, len2);
+ *(e + len + 3 + len2) = '\0';
return e;
}