summaryrefslogtreecommitdiff
path: root/utf8.h
diff options
context:
space:
mode:
Diffstat (limited to 'utf8.h')
-rw-r--r--utf8.h24
1 files changed, 19 insertions, 5 deletions
diff --git a/utf8.h b/utf8.h
index 7288113..9970683 100644
--- a/utf8.h
+++ b/utf8.h
@@ -8,7 +8,7 @@ extern "C" {
/**
* UTF-8 utility functions
*
- * (c) 2010 Steve Bennett <steveb@workware.net.au>
+ * (c) 2010-2016 Steve Bennett <steveb@workware.net.au>
*
* See LICENCE for licence details.
*/
@@ -29,7 +29,8 @@ int utf8_fromunicode(char *p, unsigned uc);
#include <ctype.h>
/* No utf-8 support. 1 byte = 1 char */
-#define utf8_strlen(S, B) ((B) < 0 ? strlen(S) : (B))
+#define utf8_strlen(S, B) ((B) < 0 ? (int)strlen(S) : (B))
+#define utf8_strwidth(S, B) utf8_strlen((S), (B))
#define utf8_tounicode(S, CP) (*(CP) = (unsigned char)*(S), 1)
#define utf8_getchars(CP, C) (*(CP) = (C), 1)
#define utf8_upper(C) toupper(C)
@@ -38,6 +39,7 @@ int utf8_fromunicode(char *p, unsigned uc);
#define utf8_index(C, I) (I)
#define utf8_charlen(C) 1
#define utf8_prev_len(S, L) 1
+#define utf8_width(C) 1
#else
#if !defined(JIM_BOOTSTRAP)
@@ -47,9 +49,8 @@ int utf8_fromunicode(char *p, unsigned uc);
/**
* Returns the length of the utf-8 sequence starting with 'c'.
*
- * Returns 1-4, or -1 if this is not a valid start byte.
- *
- * Note that charlen=4 is not supported by the rest of the API.
+ * Returns 1-4.
+ * If 'c' is not a valid start byte, returns 1.
*/
int utf8_charlen(int c);
@@ -67,6 +68,12 @@ int utf8_charlen(int c);
int utf8_strlen(const char *str, int bytelen);
/**
+ * Calculates the display width of the first 'charlen' characters in 'str'.
+ * See utf8_width()
+ */
+int utf8_strwidth(const char *str, int charlen);
+
+/**
* Returns the byte index of the given character in the utf-8 string.
*
* The string *must* be null terminated.
@@ -125,6 +132,13 @@ int utf8_title(int uc);
* Unicode code points > \uffff are returned unchanged.
*/
int utf8_lower(int uc);
+
+/**
+ * Returns the width (in characters) of the given unicode codepoint.
+ * This is 1 for normal letters and 0 for combining characters and 2 for wide characters.
+ */
+int utf8_width(int ch);
+
#endif /* JIM_BOOTSTRAP */
#endif