diff options
Diffstat (limited to 'utf8.h')
-rw-r--r-- | utf8.h | 24 |
1 files changed, 19 insertions, 5 deletions
@@ -8,7 +8,7 @@ extern "C" { /** * UTF-8 utility functions * - * (c) 2010 Steve Bennett <steveb@workware.net.au> + * (c) 2010-2016 Steve Bennett <steveb@workware.net.au> * * See LICENCE for licence details. */ @@ -29,7 +29,8 @@ int utf8_fromunicode(char *p, unsigned uc); #include <ctype.h> /* No utf-8 support. 1 byte = 1 char */ -#define utf8_strlen(S, B) ((B) < 0 ? strlen(S) : (B)) +#define utf8_strlen(S, B) ((B) < 0 ? (int)strlen(S) : (B)) +#define utf8_strwidth(S, B) utf8_strlen((S), (B)) #define utf8_tounicode(S, CP) (*(CP) = (unsigned char)*(S), 1) #define utf8_getchars(CP, C) (*(CP) = (C), 1) #define utf8_upper(C) toupper(C) @@ -38,6 +39,7 @@ int utf8_fromunicode(char *p, unsigned uc); #define utf8_index(C, I) (I) #define utf8_charlen(C) 1 #define utf8_prev_len(S, L) 1 +#define utf8_width(C) 1 #else #if !defined(JIM_BOOTSTRAP) @@ -47,9 +49,8 @@ int utf8_fromunicode(char *p, unsigned uc); /** * Returns the length of the utf-8 sequence starting with 'c'. * - * Returns 1-4, or -1 if this is not a valid start byte. - * - * Note that charlen=4 is not supported by the rest of the API. + * Returns 1-4. + * If 'c' is not a valid start byte, returns 1. */ int utf8_charlen(int c); @@ -67,6 +68,12 @@ int utf8_charlen(int c); int utf8_strlen(const char *str, int bytelen); /** + * Calculates the display width of the first 'charlen' characters in 'str'. + * See utf8_width() + */ +int utf8_strwidth(const char *str, int charlen); + +/** * Returns the byte index of the given character in the utf-8 string. * * The string *must* be null terminated. @@ -125,6 +132,13 @@ int utf8_title(int uc); * Unicode code points > \uffff are returned unchanged. */ int utf8_lower(int uc); + +/** + * Returns the width (in characters) of the given unicode codepoint. + * This is 1 for normal letters and 0 for combining characters and 2 for wide characters. + */ +int utf8_width(int ch); + #endif /* JIM_BOOTSTRAP */ #endif |