summaryrefslogtreecommitdiff
path: root/lib/mbrtowc.c
diff options
context:
space:
mode:
Diffstat (limited to 'lib/mbrtowc.c')
-rw-r--r--lib/mbrtowc.c94
1 files changed, 69 insertions, 25 deletions
diff --git a/lib/mbrtowc.c b/lib/mbrtowc.c
index 22ac2c4..2f6df28 100644
--- a/lib/mbrtowc.c
+++ b/lib/mbrtowc.c
@@ -35,12 +35,60 @@
# include "streq.h"
# include "verify.h"
-#ifndef FALLTHROUGH
-# if __GNUC__ < 7
-# define FALLTHROUGH ((void) 0)
-# else
-# define FALLTHROUGH __attribute__ ((__fallthrough__))
+# ifndef FALLTHROUGH
+# if __GNUC__ < 7
+# define FALLTHROUGH ((void) 0)
+# else
+# define FALLTHROUGH __attribute__ ((__fallthrough__))
+# endif
# endif
+
+/* Returns a classification of special values of the encoding of the current
+ locale. */
+typedef enum {
+ enc_other, /* other */
+ enc_utf8, /* UTF-8 */
+ enc_eucjp, /* EUC-JP */
+ enc_94, /* EUC-KR, GB2312, BIG5 */
+ enc_euctw, /* EUC-TW */
+ enc_gb18030, /* GB18030 */
+ enc_sjis /* SJIS */
+} enc_t;
+static inline enc_t
+locale_enc (void)
+{
+ const char *encoding = locale_charset ();
+ if (STREQ_OPT (encoding, "UTF-8", 'U', 'T', 'F', '-', '8', 0, 0, 0, 0))
+ return enc_utf8;
+ if (STREQ_OPT (encoding, "EUC-JP", 'E', 'U', 'C', '-', 'J', 'P', 0, 0, 0))
+ return enc_eucjp;
+ if (STREQ_OPT (encoding, "EUC-KR", 'E', 'U', 'C', '-', 'K', 'R', 0, 0, 0)
+ || STREQ_OPT (encoding, "GB2312", 'G', 'B', '2', '3', '1', '2', 0, 0, 0)
+ || STREQ_OPT (encoding, "BIG5", 'B', 'I', 'G', '5', 0, 0, 0, 0, 0))
+ return enc_94;
+ if (STREQ_OPT (encoding, "EUC-TW", 'E', 'U', 'C', '-', 'T', 'W', 0, 0, 0))
+ return enc_euctw;
+ if (STREQ_OPT (encoding, "GB18030", 'G', 'B', '1', '8', '0', '3', '0', 0, 0))
+ return enc_gb18030;
+ if (STREQ_OPT (encoding, "SJIS", 'S', 'J', 'I', 'S', 0, 0, 0, 0, 0))
+ return enc_sjis;
+ return enc_other;
+}
+
+#if GNULIB_WCHAR_SINGLE
+/* When we know that the locale does not change, provide a speedup by
+ caching the value of locale_enc. */
+static int cached_locale_enc = -1;
+static inline enc_t
+locale_enc_cached (void)
+{
+ if (cached_locale_enc < 0)
+ cached_locale_enc = locale_enc ();
+ return cached_locale_enc;
+}
+#else
+/* By default, don't make assumptions, hence no caching. */
+# define locale_enc_cached locale_enc
#endif
verify (sizeof (mbstate_t) >= 4);
@@ -137,10 +185,9 @@ mbrtowc (wchar_t *pwc, const char *s, size_t n, mbstate_t *ps)
if (m >= 4 || m >= MB_CUR_MAX)
goto invalid;
/* Here MB_CUR_MAX > 1 and 0 < m < 4. */
- {
- const char *encoding = locale_charset ();
-
- if (STREQ_OPT (encoding, "UTF-8", 'U', 'T', 'F', '-', '8', 0, 0, 0, 0))
+ switch (locale_enc_cached ())
+ {
+ case enc_utf8: /* UTF-8 */
{
/* Cf. unistr/u8-mblen.c. */
unsigned char c = (unsigned char) p[0];
@@ -197,8 +244,7 @@ mbrtowc (wchar_t *pwc, const char *s, size_t n, mbstate_t *ps)
/* As a reference for this code, you can use the GNU libiconv
implementation. Look for uses of the RET_TOOFEW macro. */
- if (STREQ_OPT (encoding,
- "EUC-JP", 'E', 'U', 'C', '-', 'J', 'P', 0, 0, 0))
+ case enc_eucjp: /* EUC-JP */
{
if (m == 1)
{
@@ -221,12 +267,8 @@ mbrtowc (wchar_t *pwc, const char *s, size_t n, mbstate_t *ps)
}
goto invalid;
}
- if (STREQ_OPT (encoding,
- "EUC-KR", 'E', 'U', 'C', '-', 'K', 'R', 0, 0, 0)
- || STREQ_OPT (encoding,
- "GB2312", 'G', 'B', '2', '3', '1', '2', 0, 0, 0)
- || STREQ_OPT (encoding,
- "BIG5", 'B', 'I', 'G', '5', 0, 0, 0, 0, 0))
+
+ case enc_94: /* EUC-KR, GB2312, BIG5 */
{
if (m == 1)
{
@@ -237,8 +279,8 @@ mbrtowc (wchar_t *pwc, const char *s, size_t n, mbstate_t *ps)
}
goto invalid;
}
- if (STREQ_OPT (encoding,
- "EUC-TW", 'E', 'U', 'C', '-', 'T', 'W', 0, 0, 0))
+
+ case enc_euctw: /* EUC-TW */
{
if (m == 1)
{
@@ -256,8 +298,8 @@ mbrtowc (wchar_t *pwc, const char *s, size_t n, mbstate_t *ps)
}
goto invalid;
}
- if (STREQ_OPT (encoding,
- "GB18030", 'G', 'B', '1', '8', '0', '3', '0', 0, 0))
+
+ case enc_gb18030: /* GB18030 */
{
if (m == 1)
{
@@ -290,7 +332,8 @@ mbrtowc (wchar_t *pwc, const char *s, size_t n, mbstate_t *ps)
}
goto invalid;
}
- if (STREQ_OPT (encoding, "SJIS", 'S', 'J', 'I', 'S', 0, 0, 0, 0, 0))
+
+ case enc_sjis: /* SJIS */
{
if (m == 1)
{
@@ -303,9 +346,10 @@ mbrtowc (wchar_t *pwc, const char *s, size_t n, mbstate_t *ps)
goto invalid;
}
- /* An unknown multibyte encoding. */
- goto incomplete;
- }
+ default:
+ /* An unknown multibyte encoding. */
+ goto incomplete;
+ }
incomplete:
{