summaryrefslogtreecommitdiff
path: root/lib/localeinfo.c
diff options
context:
space:
mode:
authorClint Adams <clint@debian.org>2017-01-04 20:39:58 -0500
committerClint Adams <clint@debian.org>2017-01-04 20:39:58 -0500
commit29d3f78df4630dbee0a141e2be8e77275724b3a8 (patch)
tree89a97129c5b799046be2af10bde88ea035b59bd1 /lib/localeinfo.c
parentd75f3c567505ad7acd2c1943207b367593652739 (diff)
New upstream version 4.3
Diffstat (limited to 'lib/localeinfo.c')
-rw-r--r--lib/localeinfo.c113
1 files changed, 113 insertions, 0 deletions
diff --git a/lib/localeinfo.c b/lib/localeinfo.c
new file mode 100644
index 0000000..ca96afc
--- /dev/null
+++ b/lib/localeinfo.c
@@ -0,0 +1,113 @@
+/* locale information
+
+ Copyright 2016 Free Software Foundation, Inc.
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3, or (at your option)
+ any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 51 Franklin Street - Fifth Floor, Boston, MA
+ 02110-1301, USA. */
+
+/* Written by Paul Eggert. */
+
+#include <config.h>
+
+#include <localeinfo.h>
+
+#include <verify.h>
+
+#include <limits.h>
+#include <locale.h>
+#include <stdlib.h>
+#include <string.h>
+#include <wctype.h>
+
+/* The sbclen implementation relies on this. */
+verify (MB_LEN_MAX <= SCHAR_MAX);
+
+/* Return true if the locale uses UTF-8. */
+
+static bool
+is_using_utf8 (void)
+{
+ wchar_t wc;
+ mbstate_t mbs = {0};
+ return mbrtowc (&wc, "\xc4\x80", 2, &mbs) == 2 && wc == 0x100;
+}
+
+/* Initialize *LOCALEINFO from the current locale. */
+
+void
+init_localeinfo (struct localeinfo *localeinfo)
+{
+ int i;
+
+ localeinfo->multibyte = MB_CUR_MAX > 1;
+ localeinfo->using_utf8 = is_using_utf8 ();
+
+ for (i = CHAR_MIN; i <= CHAR_MAX; i++)
+ {
+ char c = i;
+ unsigned char uc = i;
+ mbstate_t s = {0};
+ wchar_t wc;
+ size_t len = mbrtowc (&wc, &c, 1, &s);
+ localeinfo->sbclen[uc] = len <= 1 ? 1 : - (int) - len;
+ localeinfo->sbctowc[uc] = len <= 1 ? wc : WEOF;
+ }
+}
+
+/* The set of wchar_t values C such that there's a useful locale
+ somewhere where C != towupper (C) && C != towlower (towupper (C)).
+ For example, 0x00B5 (U+00B5 MICRO SIGN) is in this table, because
+ towupper (0x00B5) == 0x039C (U+039C GREEK CAPITAL LETTER MU), and
+ towlower (0x039C) == 0x03BC (U+03BC GREEK SMALL LETTER MU). */
+static short const lonesome_lower[] =
+ {
+ 0x00B5, 0x0131, 0x017F, 0x01C5, 0x01C8, 0x01CB, 0x01F2, 0x0345,
+ 0x03C2, 0x03D0, 0x03D1, 0x03D5, 0x03D6, 0x03F0, 0x03F1,
+
+ /* U+03F2 GREEK LUNATE SIGMA SYMBOL lacks a specific uppercase
+ counterpart in locales predating Unicode 4.0.0 (April 2003). */
+ 0x03F2,
+
+ 0x03F5, 0x1E9B, 0x1FBE,
+ };
+
+/* Verify that the worst case fits. This is 1 for towupper, 1 for
+ towlower, and 1 for each entry in LONESOME_LOWER. */
+verify (1 + 1 + sizeof lonesome_lower / sizeof *lonesome_lower
+ <= CASE_FOLDED_BUFSIZE);
+
+/* Find the characters equal to C after case-folding, other than C
+ itself, and store them into FOLDED. Return the number of characters
+ stored. */
+
+int
+case_folded_counterparts (wchar_t c, wchar_t folded[CASE_FOLDED_BUFSIZE])
+{
+ int i;
+ int n = 0;
+ wint_t uc = towupper (c);
+ wint_t lc = towlower (uc);
+ if (uc != c)
+ folded[n++] = uc;
+ if (lc != uc && lc != c && towupper (lc) == uc)
+ folded[n++] = lc;
+ for (i = 0; i < sizeof lonesome_lower / sizeof *lonesome_lower; i++)
+ {
+ wint_t li = lonesome_lower[i];
+ if (li != lc && li != uc && li != c && towupper (li) == uc)
+ folded[n++] = li;
+ }
+ return n;
+}