summaryrefslogtreecommitdiff
path: root/lib/localcharset.c
diff options
context:
space:
mode:
Diffstat (limited to 'lib/localcharset.c')
-rw-r--r--lib/localcharset.c90
1 files changed, 74 insertions, 16 deletions
diff --git a/lib/localcharset.c b/lib/localcharset.c
index 7401479..fa5fcbc 100644
--- a/lib/localcharset.c
+++ b/lib/localcharset.c
@@ -1,8 +1,6 @@
-/* -*- buffer-read-only: t -*- vi: set ro: */
-/* DO NOT EDIT! GENERATED AUTOMATICALLY! */
/* Determine a canonical name for the current locale's character encoding.
- Copyright (C) 2000-2006, 2008-2012 Free Software Foundation, Inc.
+ Copyright (C) 2000-2006, 2008-2016 Free Software Foundation, Inc.
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
@@ -36,6 +34,7 @@
#if defined _WIN32 || defined __WIN32__
# define WINDOWS_NATIVE
+# include <locale.h>
#endif
#if defined __EMX__
@@ -67,6 +66,11 @@
# include <os2.h>
#endif
+/* For MB_CUR_MAX_L */
+#if defined DARWIN7
+# include <xlocale.h>
+#endif
+
#if ENABLE_RELOCATABLE
# include "relocatable.h"
#else
@@ -124,7 +128,7 @@ get_charset_aliases (void)
cp = charset_aliases;
if (cp == NULL)
{
-#if !(defined DARWIN7 || defined VMS || defined WINDOWS_NATIVE || defined __CYGWIN__)
+#if !(defined DARWIN7 || defined VMS || defined WINDOWS_NATIVE || defined __CYGWIN__ || defined OS2)
const char *dir;
const char *base = "charset.alias";
char *file_name;
@@ -338,6 +342,36 @@ get_charset_aliases (void)
"CP54936" "\0" "GB18030" "\0"
"CP65001" "\0" "UTF-8" "\0";
# endif
+# if defined OS2
+ /* To avoid the troubles of installing a separate file in the same
+ directory as the DLL and of retrieving the DLL's directory at
+ runtime, simply inline the aliases here. */
+
+ /* The list of encodings is taken from "List of OS/2 Codepages"
+ by Alex Taylor:
+ <http://altsan.org/os2/toolkits/uls/index.html#codepages>.
+ See also "IBM Globalization - Code page identifiers":
+ <http://www-01.ibm.com/software/globalization/cp/cp_cpgid.html>. */
+ cp = "CP813" "\0" "ISO-8859-7" "\0"
+ "CP878" "\0" "KOI8-R" "\0"
+ "CP819" "\0" "ISO-8859-1" "\0"
+ "CP912" "\0" "ISO-8859-2" "\0"
+ "CP913" "\0" "ISO-8859-3" "\0"
+ "CP914" "\0" "ISO-8859-4" "\0"
+ "CP915" "\0" "ISO-8859-5" "\0"
+ "CP916" "\0" "ISO-8859-8" "\0"
+ "CP920" "\0" "ISO-8859-9" "\0"
+ "CP921" "\0" "ISO-8859-13" "\0"
+ "CP923" "\0" "ISO-8859-15" "\0"
+ "CP954" "\0" "EUC-JP" "\0"
+ "CP964" "\0" "EUC-TW" "\0"
+ "CP970" "\0" "EUC-KR" "\0"
+ "CP1089" "\0" "ISO-8859-6" "\0"
+ "CP1208" "\0" "UTF-8" "\0"
+ "CP1381" "\0" "GB2312" "\0"
+ "CP1386" "\0" "GBK" "\0"
+ "CP3372" "\0" "EUC-JP" "\0";
+# endif
#endif
charset_aliases = cp;
@@ -458,14 +492,34 @@ locale_charset (void)
static char buf[2 + 10 + 1];
- /* The Windows API has a function returning the locale's codepage as a
- number: GetACP().
- When the output goes to a console window, it needs to be provided in
- GetOEMCP() encoding if the console is using a raster font, or in
- GetConsoleOutputCP() encoding if it is using a TrueType font.
- But in GUI programs and for output sent to files and pipes, GetACP()
- encoding is the best bet. */
- sprintf (buf, "CP%u", GetACP ());
+ /* The Windows API has a function returning the locale's codepage as
+ a number, but the value doesn't change according to what the
+ 'setlocale' call specified. So we use it as a last resort, in
+ case the string returned by 'setlocale' doesn't specify the
+ codepage. */
+ char *current_locale = setlocale (LC_ALL, NULL);
+ char *pdot;
+
+ /* If they set different locales for different categories,
+ 'setlocale' will return a semi-colon separated list of locale
+ values. To make sure we use the correct one, we choose LC_CTYPE. */
+ if (strchr (current_locale, ';'))
+ current_locale = setlocale (LC_CTYPE, NULL);
+
+ pdot = strrchr (current_locale, '.');
+ if (pdot && 2 + strlen (pdot + 1) + 1 <= sizeof (buf))
+ sprintf (buf, "CP%s", pdot + 1);
+ else
+ {
+ /* The Windows API has a function returning the locale's codepage as a
+ number: GetACP().
+ When the output goes to a console window, it needs to be provided in
+ GetOEMCP() encoding if the console is using a raster font, or in
+ GetConsoleOutputCP() encoding if it is using a TrueType font.
+ But in GUI programs and for output sent to files and pipes, GetACP()
+ encoding is the best bet. */
+ sprintf (buf, "CP%u", GetACP ());
+ }
codeset = buf;
#elif defined OS2
@@ -475,6 +529,8 @@ locale_charset (void)
ULONG cp[3];
ULONG cplen;
+ codeset = NULL;
+
/* Allow user to override the codeset, as set in the operating system,
with standard language environment variables. */
locale = getenv ("LC_ALL");
@@ -506,10 +562,12 @@ locale_charset (void)
}
}
- /* Resolve through the charset.alias file. */
- codeset = locale;
+ /* For the POSIX locale, don't use the system's codepage. */
+ if (strcmp (locale, "C") == 0 || strcmp (locale, "POSIX") == 0)
+ codeset = "";
}
- else
+
+ if (codeset == NULL)
{
/* OS/2 has a function returning the locale's codepage as a number. */
if (DosQueryCp (sizeof (cp), cp, &cplen))
@@ -547,7 +605,7 @@ locale_charset (void)
#ifdef DARWIN7
/* Mac OS X sets MB_CUR_MAX to 1 when LC_ALL=C, and "UTF-8"
(the default codeset) does not work when MB_CUR_MAX is 1. */
- if (strcmp (codeset, "UTF-8") == 0 && MB_CUR_MAX <= 1)
+ if (strcmp (codeset, "UTF-8") == 0 && MB_CUR_MAX_L (uselocale (NULL)) <= 1)
codeset = "ASCII";
#endif