From 5cddd659cdd6021faee07533509598d1abe403e6 Mon Sep 17 00:00:00 2001 From: Lennart Poettering Date: Wed, 11 Apr 2018 19:52:25 +0200 Subject: string-util: tweak ellipsation a bit MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This primarily changes to things: 1. Ellipsation to 0, 1 or 2 characters is now supported. Previously we'd hit an assert if the new lengths was < 3, this is now permitted. The result strings won't show too much info still of course, but the code becomes a bit more generic and robust to use. 2. If a UTF-8 mode is disabled and the input string is pure ASCII, then "..." is used for ellipsation, otherwise (as before) "…". This means on a pure-ASCII system we should remain pure-ASCII, matching behaviour otherwise exposed with special_glyph() and friends. Note that we'll use "…" for ellipsiation as soon as either the locale settings indicate an UTF-8 mode or the input string already contains non-ASCII unicode characters. Testing for these special cases is improved. --- src/basic/string-util.c | 93 ++++++++++++++++++++++++++++++++++++------------- 1 file changed, 68 insertions(+), 25 deletions(-) (limited to 'src/basic/string-util.c') diff --git a/src/basic/string-util.c b/src/basic/string-util.c index cb55f15e3..a75e2e0d3 100644 --- a/src/basic/string-util.c +++ b/src/basic/string-util.c @@ -15,6 +15,7 @@ #include "alloc-util.h" #include "gunicode.h" +//#include "locale-util.h" #include "macro.h" #include "string-util.h" //#include "terminal-util.h" @@ -456,62 +457,104 @@ bool string_has_cc(const char *p, const char *ok) { } static char *ascii_ellipsize_mem(const char *s, size_t old_length, size_t new_length, unsigned percent) { - size_t x; + size_t x, need_space; char *r; assert(s); assert(percent <= 100); - assert(new_length >= 3); + assert(new_length != (size_t) -1); - if (old_length <= 3 || old_length <= new_length) + if (old_length <= new_length) return strndup(s, old_length); - r = new0(char, new_length+3); + /* Special case short ellipsations */ + switch (new_length) { + + case 0: + return strdup(""); + + case 1: + if (is_locale_utf8()) + return strdup("…"); + else + return strdup("."); + + case 2: + if (!is_locale_utf8()) + return strdup(".."); + + break; + + default: + break; + } + + /* Calculate how much space the ellipsis will take up. If we are in UTF-8 mode we only need space for one + * character ("…"), otherwise for three characters ("..."). Note that in both cases we need 3 bytes of storage, + * either for the UTF-8 encoded character or for three ASCII characters. */ + need_space = is_locale_utf8() ? 1 : 3; + + r = new(char, new_length+3); if (!r) return NULL; - x = (new_length * percent) / 100; + assert(new_length >= need_space); - if (x > new_length - 3) - x = new_length - 3; + x = ((new_length - need_space) * percent + 50) / 100; + assert(x <= new_length - need_space); memcpy(r, s, x); - r[x] = 0xe2; /* tri-dot ellipsis: … */ - r[x+1] = 0x80; - r[x+2] = 0xa6; + + if (is_locale_utf8()) { + r[x+0] = 0xe2; /* tri-dot ellipsis: … */ + r[x+1] = 0x80; + r[x+2] = 0xa6; + } else { + r[x+0] = '.'; + r[x+1] = '.'; + r[x+2] = '.'; + } + memcpy(r + x + 3, - s + old_length - (new_length - x - 1), - new_length - x - 1); + s + old_length - (new_length - x - need_space), + new_length - x - need_space + 1); return r; } char *ellipsize_mem(const char *s, size_t old_length, size_t new_length, unsigned percent) { - size_t x; - char *e; + size_t x, k, len, len2; const char *i, *j; - unsigned k, len, len2; + char *e; int r; + /* Note that 'old_length' refers to bytes in the string, while 'new_length' refers to character cells taken up + * on screen. This distinction doesn't matter for ASCII strings, but it does matter for non-ASCII UTF-8 + * strings. + * + * Ellipsation is done in a locale-dependent way: + * 1. If the string passed in is fully ASCII and the current locale is not UTF-8, three dots are used ("...") + * 2. Otherwise, a unicode ellipsis is used ("…") + * + * In other words: you'll get a unicode ellipsis as soon as either the string contains non-ASCII characters or + * the current locale is UTF-8. + */ + assert(s); assert(percent <= 100); if (new_length == (size_t) -1) return strndup(s, old_length); - assert(new_length >= 3); + if (new_length == 0) + return strdup(""); - /* if no multibyte characters use ascii_ellipsize_mem for speed */ + /* If no multibyte characters use ascii_ellipsize_mem for speed */ if (ascii_is_valid(s)) return ascii_ellipsize_mem(s, old_length, new_length, percent); - if (old_length <= 3 || old_length <= new_length) - return strndup(s, old_length); - - x = (new_length * percent) / 100; - - if (x > new_length - 3) - x = new_length - 3; + x = ((new_length - 1) * percent) / 100; + assert(x <= new_length - 1); k = 0; for (i = s; k < x && i < s + old_length; i = utf8_next_char(i)) { @@ -556,7 +599,7 @@ char *ellipsize_mem(const char *s, size_t old_length, size_t new_length, unsigne */ memcpy(e, s, len); - e[len] = 0xe2; /* tri-dot ellipsis: … */ + e[len + 0] = 0xe2; /* tri-dot ellipsis: … */ e[len + 1] = 0x80; e[len + 2] = 0xa6; -- cgit v1.2.3