summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorLennart Poettering <lennart@poettering.net>2018-04-11 19:52:25 +0200
committerSven Eden <yamakuzure@gmx.net>2018-08-24 16:47:08 +0200
commit5cddd659cdd6021faee07533509598d1abe403e6 (patch)
treeb8f69864e39780ac6056f2093c66206d715b5abb
parent992056e7763dab6d1c3babd15e53ae0a100ecbec (diff)
string-util: tweak ellipsation a bit
This primarily changes to things: 1. Ellipsation to 0, 1 or 2 characters is now supported. Previously we'd hit an assert if the new lengths was < 3, this is now permitted. The result strings won't show too much info still of course, but the code becomes a bit more generic and robust to use. 2. If a UTF-8 mode is disabled and the input string is pure ASCII, then "..." is used for ellipsation, otherwise (as before) "…". This means on a pure-ASCII system we should remain pure-ASCII, matching behaviour otherwise exposed with special_glyph() and friends. Note that we'll use "…" for ellipsiation as soon as either the locale settings indicate an UTF-8 mode or the input string already contains non-ASCII unicode characters. Testing for these special cases is improved.
-rw-r--r--src/basic/string-util.c93
-rw-r--r--src/test/test-ellipsize.c24
2 files changed, 92 insertions, 25 deletions
diff --git a/src/basic/string-util.c b/src/basic/string-util.c
index cb55f15e3..a75e2e0d3 100644
--- a/src/basic/string-util.c
+++ b/src/basic/string-util.c
@@ -15,6 +15,7 @@
#include "alloc-util.h"
#include "gunicode.h"
+//#include "locale-util.h"
#include "macro.h"
#include "string-util.h"
//#include "terminal-util.h"
@@ -456,62 +457,104 @@ bool string_has_cc(const char *p, const char *ok) {
}
static char *ascii_ellipsize_mem(const char *s, size_t old_length, size_t new_length, unsigned percent) {
- size_t x;
+ size_t x, need_space;
char *r;
assert(s);
assert(percent <= 100);
- assert(new_length >= 3);
+ assert(new_length != (size_t) -1);
- if (old_length <= 3 || old_length <= new_length)
+ if (old_length <= new_length)
return strndup(s, old_length);
- r = new0(char, new_length+3);
+ /* Special case short ellipsations */
+ switch (new_length) {
+
+ case 0:
+ return strdup("");
+
+ case 1:
+ if (is_locale_utf8())
+ return strdup("…");
+ else
+ return strdup(".");
+
+ case 2:
+ if (!is_locale_utf8())
+ return strdup("..");
+
+ break;
+
+ default:
+ break;
+ }
+
+ /* Calculate how much space the ellipsis will take up. If we are in UTF-8 mode we only need space for one
+ * character ("…"), otherwise for three characters ("..."). Note that in both cases we need 3 bytes of storage,
+ * either for the UTF-8 encoded character or for three ASCII characters. */
+ need_space = is_locale_utf8() ? 1 : 3;
+
+ r = new(char, new_length+3);
if (!r)
return NULL;
- x = (new_length * percent) / 100;
+ assert(new_length >= need_space);
- if (x > new_length - 3)
- x = new_length - 3;
+ x = ((new_length - need_space) * percent + 50) / 100;
+ assert(x <= new_length - need_space);
memcpy(r, s, x);
- r[x] = 0xe2; /* tri-dot ellipsis: … */
- r[x+1] = 0x80;
- r[x+2] = 0xa6;
+
+ if (is_locale_utf8()) {
+ r[x+0] = 0xe2; /* tri-dot ellipsis: … */
+ r[x+1] = 0x80;
+ r[x+2] = 0xa6;
+ } else {
+ r[x+0] = '.';
+ r[x+1] = '.';
+ r[x+2] = '.';
+ }
+
memcpy(r + x + 3,
- s + old_length - (new_length - x - 1),
- new_length - x - 1);
+ s + old_length - (new_length - x - need_space),
+ new_length - x - need_space + 1);
return r;
}
char *ellipsize_mem(const char *s, size_t old_length, size_t new_length, unsigned percent) {
- size_t x;
- char *e;
+ size_t x, k, len, len2;
const char *i, *j;
- unsigned k, len, len2;
+ char *e;
int r;
+ /* Note that 'old_length' refers to bytes in the string, while 'new_length' refers to character cells taken up
+ * on screen. This distinction doesn't matter for ASCII strings, but it does matter for non-ASCII UTF-8
+ * strings.
+ *
+ * Ellipsation is done in a locale-dependent way:
+ * 1. If the string passed in is fully ASCII and the current locale is not UTF-8, three dots are used ("...")
+ * 2. Otherwise, a unicode ellipsis is used ("…")
+ *
+ * In other words: you'll get a unicode ellipsis as soon as either the string contains non-ASCII characters or
+ * the current locale is UTF-8.
+ */
+
assert(s);
assert(percent <= 100);
if (new_length == (size_t) -1)
return strndup(s, old_length);
- assert(new_length >= 3);
+ if (new_length == 0)
+ return strdup("");
- /* if no multibyte characters use ascii_ellipsize_mem for speed */
+ /* If no multibyte characters use ascii_ellipsize_mem for speed */
if (ascii_is_valid(s))
return ascii_ellipsize_mem(s, old_length, new_length, percent);
- if (old_length <= 3 || old_length <= new_length)
- return strndup(s, old_length);
-
- x = (new_length * percent) / 100;
-
- if (x > new_length - 3)
- x = new_length - 3;
+ x = ((new_length - 1) * percent) / 100;
+ assert(x <= new_length - 1);
k = 0;
for (i = s; k < x && i < s + old_length; i = utf8_next_char(i)) {
@@ -556,7 +599,7 @@ char *ellipsize_mem(const char *s, size_t old_length, size_t new_length, unsigne
*/
memcpy(e, s, len);
- e[len] = 0xe2; /* tri-dot ellipsis: … */
+ e[len + 0] = 0xe2; /* tri-dot ellipsis: … */
e[len + 1] = 0x80;
e[len + 2] = 0xa6;
diff --git a/src/test/test-ellipsize.c b/src/test/test-ellipsize.c
index ba4b043fc..902bc3342 100644
--- a/src/test/test-ellipsize.c
+++ b/src/test/test-ellipsize.c
@@ -17,6 +17,30 @@ static void test_one(const char *p) {
_cleanup_free_ char *t;
t = ellipsize(p, columns(), 70);
puts(t);
+ free(t);
+ t = ellipsize(p, columns(), 0);
+ puts(t);
+ free(t);
+ t = ellipsize(p, columns(), 100);
+ puts(t);
+ free(t);
+ t = ellipsize(p, 0, 50);
+ puts(t);
+ free(t);
+ t = ellipsize(p, 1, 50);
+ puts(t);
+ free(t);
+ t = ellipsize(p, 2, 50);
+ puts(t);
+ free(t);
+ t = ellipsize(p, 3, 50);
+ puts(t);
+ free(t);
+ t = ellipsize(p, 4, 50);
+ puts(t);
+ free(t);
+ t = ellipsize(p, 5, 50);
+ puts(t);
}
int main(int argc, char *argv[]) {