diff options
Diffstat (limited to 'src/audacious/chardet.c')
-rw-r--r-- | src/audacious/chardet.c | 194 |
1 files changed, 17 insertions, 177 deletions
diff --git a/src/audacious/chardet.c b/src/audacious/chardet.c index 145ec3f..08fe97e 100644 --- a/src/audacious/chardet.c +++ b/src/audacious/chardet.c @@ -1,6 +1,6 @@ /* * chardet.c - * Copyright 2006-2010 Yoshiki Yazawa, Matti Hämäläinen, and John Lindgren + * Copyright 2013 John Lindgren * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -17,196 +17,36 @@ * the use of this software. */ -#include <glib.h> -#include <string.h> #include <libaudcore/audstrings.h> +#include <libaudcore/hook.h> -#include "debug.h" -#include "i18n.h" #include "main.h" #include "misc.h" -#ifdef USE_CHARDET -# include <libguess.h> -#endif - -static char * cd_chardet_to_utf8 (const char * str, int len, - int * arg_bytes_read, int * arg_bytes_written); - -static char * str_to_utf8_fallback (const char * str) +static void chardet_update (void) { - char * out = g_strconcat (str, _(" (invalid UTF-8)"), NULL); + char * region = get_str (NULL, "chardet_detector"); + char * fallbacks = get_str (NULL, "chardet_fallback"); - for (char * c = out; * c; c ++) - { - if (* c & 0x80) - * c = '?'; - } + Index * list = str_list_to_index (fallbacks, ", "); + str_set_charsets (region[0] ? region : NULL, list); - return out; + str_unref (region); + str_unref (fallbacks); } -static char * cd_str_to_utf8 (const char * str) +void chardet_init (void) { - char *out_str; - - if (str == NULL) - return NULL; - - /* Note: Currently, playlist calls this function repeatedly, even - * if the string is already converted into utf-8. - * chardet_to_utf8() would convert a valid utf-8 string into a - * different utf-8 string, if fallback encodings were supplied and - * the given string could be treated as a string in one of - * fallback encodings. To avoid this, g_utf8_validate() had been - * used at the top of evaluation. - */ - - /* Note 2: g_utf8_validate() has so called encapsulated utf-8 - * problem, thus chardet_to_utf8() took the place of that. - */ - - /* Note 3: As introducing madplug, the problem of conversion from - * ISO-8859-1 to UTF-8 arose. This may be coped with g_convert() - * located near the end of chardet_to_utf8(), but it requires utf8 - * validation guard where g_utf8_validate() was. New - * dfa_validate_utf8() employs libguess' DFA engine to validate - * utf-8 and can properly distinguish examples of encapsulated - * utf-8. It is considered to be safe to use as a guard. - */ - - /* Already UTF-8? */ -#ifdef USE_CHARDET - if (libguess_validate_utf8(str, strlen(str))) - return g_strdup(str); -#else - if (g_utf8_validate(str, strlen(str), NULL)) - return g_strdup(str); -#endif + chardet_update (); - /* chardet encoding detector */ - if ((out_str = cd_chardet_to_utf8 (str, strlen (str), NULL, NULL))) - return out_str; - - /* all else fails, we mask off character codes >= 128, replace with '?' */ - return str_to_utf8_fallback(str); + hook_associate ("set chardet_detector", (HookFunction) chardet_update, NULL); + hook_associate ("set chardet_fallback", (HookFunction) chardet_update, NULL); } -static char * cd_chardet_to_utf8 (const char * str, int len, - int * arg_bytes_read, int * arg_bytes_write) +void chardet_cleanup (void) { - char *ret = NULL; - int * bytes_read, * bytes_write; - int my_bytes_read, my_bytes_write; - - bytes_read = arg_bytes_read != NULL ? arg_bytes_read : &my_bytes_read; - bytes_write = arg_bytes_write != NULL ? arg_bytes_write : &my_bytes_write; - - g_return_val_if_fail(str != NULL, NULL); - -#ifdef USE_CHARDET - if (libguess_validate_utf8(str, len)) -#else - if (g_utf8_validate(str, len, NULL)) -#endif - { - if (len < 0) - len = strlen (str); - - ret = g_malloc (len + 1); - memcpy (ret, str, len); - ret[len] = 0; - - if (arg_bytes_read != NULL) - * arg_bytes_read = len; - if (arg_bytes_write != NULL) - * arg_bytes_write = len; - - return ret; - } + hook_dissociate ("set chardet_detector", (HookFunction) chardet_update); + hook_dissociate ("set chardet_fallback", (HookFunction) chardet_update); -#ifdef USE_CHARDET - char * det = get_string (NULL, "chardet_detector"); - - if (det[0]) - { - AUDDBG("guess encoding (%s) %s\n", det, str); - const char * encoding = libguess_determine_encoding (str, len, det); - AUDDBG("encoding = %s\n", encoding); - if (encoding) - { - gsize read_gsize = 0, written_gsize = 0; - ret = g_convert (str, len, "UTF-8", encoding, & read_gsize, & written_gsize, NULL); - * bytes_read = read_gsize; - * bytes_write = written_gsize; - } - } - - g_free (det); -#endif - - /* If detection failed or was not enabled, try fallbacks (if there are any) */ - if (! ret) - { - char * fallbacks = get_string (NULL, "chardet_fallback"); - char * * split = g_strsplit_set (fallbacks, " ,:;|/", -1); - - for (char * * enc = split; * enc; enc ++) - { - gsize read_gsize = 0, written_gsize = 0; - ret = g_convert (str, len, "UTF-8", * enc, & read_gsize, & written_gsize, NULL); - * bytes_read = read_gsize; - * bytes_write = written_gsize; - - if (len == *bytes_read) - break; - else { - g_free(ret); - ret = NULL; - } - } - - g_strfreev (split); - g_free (fallbacks); - } - - /* First fallback: locale (duh!) */ - if (ret == NULL) - { - gsize read_gsize = 0, written_gsize = 0; - ret = g_locale_to_utf8 (str, len, & read_gsize, & written_gsize, NULL); - * bytes_read = read_gsize; - * bytes_write = written_gsize; - } - - /* The final fallback is ISO-8859-1, if no other is specified or conversions fail */ - if (ret == NULL) - { - gsize read_gsize = 0, written_gsize = 0; - ret = g_convert (str, len, "UTF-8", "ISO-8859-1", & read_gsize, & written_gsize, NULL); - * bytes_read = read_gsize; - * bytes_write = written_gsize; - } - - if (ret != NULL) - { - if (g_utf8_validate(ret, -1, NULL)) - return ret; - else - { - g_warning("g_utf8_validate() failed for converted string in cd_chardet_to_utf8: '%s'", ret); - g_free(ret); - return NULL; - } - } - - return NULL; /* If we have no idea, return NULL. */ -} - -void chardet_init (void) -{ -#ifdef USE_CHARDET - libguess_determine_encoding(NULL, -1, ""); -#endif - str_set_utf8_impl (cd_str_to_utf8, cd_chardet_to_utf8); + str_set_charsets (NULL, NULL); } |