diff options
Diffstat (limited to 'lib/localcharset.c')
-rw-r--r-- | lib/localcharset.c | 90 |
1 files changed, 74 insertions, 16 deletions
diff --git a/lib/localcharset.c b/lib/localcharset.c index 7401479..fa5fcbc 100644 --- a/lib/localcharset.c +++ b/lib/localcharset.c @@ -1,8 +1,6 @@ -/* -*- buffer-read-only: t -*- vi: set ro: */ -/* DO NOT EDIT! GENERATED AUTOMATICALLY! */ /* Determine a canonical name for the current locale's character encoding. - Copyright (C) 2000-2006, 2008-2012 Free Software Foundation, Inc. + Copyright (C) 2000-2006, 2008-2016 Free Software Foundation, Inc. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -36,6 +34,7 @@ #if defined _WIN32 || defined __WIN32__ # define WINDOWS_NATIVE +# include <locale.h> #endif #if defined __EMX__ @@ -67,6 +66,11 @@ # include <os2.h> #endif +/* For MB_CUR_MAX_L */ +#if defined DARWIN7 +# include <xlocale.h> +#endif + #if ENABLE_RELOCATABLE # include "relocatable.h" #else @@ -124,7 +128,7 @@ get_charset_aliases (void) cp = charset_aliases; if (cp == NULL) { -#if !(defined DARWIN7 || defined VMS || defined WINDOWS_NATIVE || defined __CYGWIN__) +#if !(defined DARWIN7 || defined VMS || defined WINDOWS_NATIVE || defined __CYGWIN__ || defined OS2) const char *dir; const char *base = "charset.alias"; char *file_name; @@ -338,6 +342,36 @@ get_charset_aliases (void) "CP54936" "\0" "GB18030" "\0" "CP65001" "\0" "UTF-8" "\0"; # endif +# if defined OS2 + /* To avoid the troubles of installing a separate file in the same + directory as the DLL and of retrieving the DLL's directory at + runtime, simply inline the aliases here. */ + + /* The list of encodings is taken from "List of OS/2 Codepages" + by Alex Taylor: + <http://altsan.org/os2/toolkits/uls/index.html#codepages>. + See also "IBM Globalization - Code page identifiers": + <http://www-01.ibm.com/software/globalization/cp/cp_cpgid.html>. */ + cp = "CP813" "\0" "ISO-8859-7" "\0" + "CP878" "\0" "KOI8-R" "\0" + "CP819" "\0" "ISO-8859-1" "\0" + "CP912" "\0" "ISO-8859-2" "\0" + "CP913" "\0" "ISO-8859-3" "\0" + "CP914" "\0" "ISO-8859-4" "\0" + "CP915" "\0" "ISO-8859-5" "\0" + "CP916" "\0" "ISO-8859-8" "\0" + "CP920" "\0" "ISO-8859-9" "\0" + "CP921" "\0" "ISO-8859-13" "\0" + "CP923" "\0" "ISO-8859-15" "\0" + "CP954" "\0" "EUC-JP" "\0" + "CP964" "\0" "EUC-TW" "\0" + "CP970" "\0" "EUC-KR" "\0" + "CP1089" "\0" "ISO-8859-6" "\0" + "CP1208" "\0" "UTF-8" "\0" + "CP1381" "\0" "GB2312" "\0" + "CP1386" "\0" "GBK" "\0" + "CP3372" "\0" "EUC-JP" "\0"; +# endif #endif charset_aliases = cp; @@ -458,14 +492,34 @@ locale_charset (void) static char buf[2 + 10 + 1]; - /* The Windows API has a function returning the locale's codepage as a - number: GetACP(). - When the output goes to a console window, it needs to be provided in - GetOEMCP() encoding if the console is using a raster font, or in - GetConsoleOutputCP() encoding if it is using a TrueType font. - But in GUI programs and for output sent to files and pipes, GetACP() - encoding is the best bet. */ - sprintf (buf, "CP%u", GetACP ()); + /* The Windows API has a function returning the locale's codepage as + a number, but the value doesn't change according to what the + 'setlocale' call specified. So we use it as a last resort, in + case the string returned by 'setlocale' doesn't specify the + codepage. */ + char *current_locale = setlocale (LC_ALL, NULL); + char *pdot; + + /* If they set different locales for different categories, + 'setlocale' will return a semi-colon separated list of locale + values. To make sure we use the correct one, we choose LC_CTYPE. */ + if (strchr (current_locale, ';')) + current_locale = setlocale (LC_CTYPE, NULL); + + pdot = strrchr (current_locale, '.'); + if (pdot && 2 + strlen (pdot + 1) + 1 <= sizeof (buf)) + sprintf (buf, "CP%s", pdot + 1); + else + { + /* The Windows API has a function returning the locale's codepage as a + number: GetACP(). + When the output goes to a console window, it needs to be provided in + GetOEMCP() encoding if the console is using a raster font, or in + GetConsoleOutputCP() encoding if it is using a TrueType font. + But in GUI programs and for output sent to files and pipes, GetACP() + encoding is the best bet. */ + sprintf (buf, "CP%u", GetACP ()); + } codeset = buf; #elif defined OS2 @@ -475,6 +529,8 @@ locale_charset (void) ULONG cp[3]; ULONG cplen; + codeset = NULL; + /* Allow user to override the codeset, as set in the operating system, with standard language environment variables. */ locale = getenv ("LC_ALL"); @@ -506,10 +562,12 @@ locale_charset (void) } } - /* Resolve through the charset.alias file. */ - codeset = locale; + /* For the POSIX locale, don't use the system's codepage. */ + if (strcmp (locale, "C") == 0 || strcmp (locale, "POSIX") == 0) + codeset = ""; } - else + + if (codeset == NULL) { /* OS/2 has a function returning the locale's codepage as a number. */ if (DosQueryCp (sizeof (cp), cp, &cplen)) @@ -547,7 +605,7 @@ locale_charset (void) #ifdef DARWIN7 /* Mac OS X sets MB_CUR_MAX to 1 when LC_ALL=C, and "UTF-8" (the default codeset) does not work when MB_CUR_MAX is 1. */ - if (strcmp (codeset, "UTF-8") == 0 && MB_CUR_MAX <= 1) + if (strcmp (codeset, "UTF-8") == 0 && MB_CUR_MAX_L (uselocale (NULL)) <= 1) codeset = "ASCII"; #endif |