summaryrefslogtreecommitdiff
path: root/src/audacious/chardet.c
diff options
context:
space:
mode:
Diffstat (limited to 'src/audacious/chardet.c')
-rw-r--r--src/audacious/chardet.c194
1 files changed, 17 insertions, 177 deletions
diff --git a/src/audacious/chardet.c b/src/audacious/chardet.c
index 145ec3f..08fe97e 100644
--- a/src/audacious/chardet.c
+++ b/src/audacious/chardet.c
@@ -1,6 +1,6 @@
/*
* chardet.c
- * Copyright 2006-2010 Yoshiki Yazawa, Matti Hämäläinen, and John Lindgren
+ * Copyright 2013 John Lindgren
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
@@ -17,196 +17,36 @@
* the use of this software.
*/
-#include <glib.h>
-#include <string.h>
#include <libaudcore/audstrings.h>
+#include <libaudcore/hook.h>
-#include "debug.h"
-#include "i18n.h"
#include "main.h"
#include "misc.h"
-#ifdef USE_CHARDET
-# include <libguess.h>
-#endif
-
-static char * cd_chardet_to_utf8 (const char * str, int len,
- int * arg_bytes_read, int * arg_bytes_written);
-
-static char * str_to_utf8_fallback (const char * str)
+static void chardet_update (void)
{
- char * out = g_strconcat (str, _(" (invalid UTF-8)"), NULL);
+ char * region = get_str (NULL, "chardet_detector");
+ char * fallbacks = get_str (NULL, "chardet_fallback");
- for (char * c = out; * c; c ++)
- {
- if (* c & 0x80)
- * c = '?';
- }
+ Index * list = str_list_to_index (fallbacks, ", ");
+ str_set_charsets (region[0] ? region : NULL, list);
- return out;
+ str_unref (region);
+ str_unref (fallbacks);
}
-static char * cd_str_to_utf8 (const char * str)
+void chardet_init (void)
{
- char *out_str;
-
- if (str == NULL)
- return NULL;
-
- /* Note: Currently, playlist calls this function repeatedly, even
- * if the string is already converted into utf-8.
- * chardet_to_utf8() would convert a valid utf-8 string into a
- * different utf-8 string, if fallback encodings were supplied and
- * the given string could be treated as a string in one of
- * fallback encodings. To avoid this, g_utf8_validate() had been
- * used at the top of evaluation.
- */
-
- /* Note 2: g_utf8_validate() has so called encapsulated utf-8
- * problem, thus chardet_to_utf8() took the place of that.
- */
-
- /* Note 3: As introducing madplug, the problem of conversion from
- * ISO-8859-1 to UTF-8 arose. This may be coped with g_convert()
- * located near the end of chardet_to_utf8(), but it requires utf8
- * validation guard where g_utf8_validate() was. New
- * dfa_validate_utf8() employs libguess' DFA engine to validate
- * utf-8 and can properly distinguish examples of encapsulated
- * utf-8. It is considered to be safe to use as a guard.
- */
-
- /* Already UTF-8? */
-#ifdef USE_CHARDET
- if (libguess_validate_utf8(str, strlen(str)))
- return g_strdup(str);
-#else
- if (g_utf8_validate(str, strlen(str), NULL))
- return g_strdup(str);
-#endif
+ chardet_update ();
- /* chardet encoding detector */
- if ((out_str = cd_chardet_to_utf8 (str, strlen (str), NULL, NULL)))
- return out_str;
-
- /* all else fails, we mask off character codes >= 128, replace with '?' */
- return str_to_utf8_fallback(str);
+ hook_associate ("set chardet_detector", (HookFunction) chardet_update, NULL);
+ hook_associate ("set chardet_fallback", (HookFunction) chardet_update, NULL);
}
-static char * cd_chardet_to_utf8 (const char * str, int len,
- int * arg_bytes_read, int * arg_bytes_write)
+void chardet_cleanup (void)
{
- char *ret = NULL;
- int * bytes_read, * bytes_write;
- int my_bytes_read, my_bytes_write;
-
- bytes_read = arg_bytes_read != NULL ? arg_bytes_read : &my_bytes_read;
- bytes_write = arg_bytes_write != NULL ? arg_bytes_write : &my_bytes_write;
-
- g_return_val_if_fail(str != NULL, NULL);
-
-#ifdef USE_CHARDET
- if (libguess_validate_utf8(str, len))
-#else
- if (g_utf8_validate(str, len, NULL))
-#endif
- {
- if (len < 0)
- len = strlen (str);
-
- ret = g_malloc (len + 1);
- memcpy (ret, str, len);
- ret[len] = 0;
-
- if (arg_bytes_read != NULL)
- * arg_bytes_read = len;
- if (arg_bytes_write != NULL)
- * arg_bytes_write = len;
-
- return ret;
- }
+ hook_dissociate ("set chardet_detector", (HookFunction) chardet_update);
+ hook_dissociate ("set chardet_fallback", (HookFunction) chardet_update);
-#ifdef USE_CHARDET
- char * det = get_string (NULL, "chardet_detector");
-
- if (det[0])
- {
- AUDDBG("guess encoding (%s) %s\n", det, str);
- const char * encoding = libguess_determine_encoding (str, len, det);
- AUDDBG("encoding = %s\n", encoding);
- if (encoding)
- {
- gsize read_gsize = 0, written_gsize = 0;
- ret = g_convert (str, len, "UTF-8", encoding, & read_gsize, & written_gsize, NULL);
- * bytes_read = read_gsize;
- * bytes_write = written_gsize;
- }
- }
-
- g_free (det);
-#endif
-
- /* If detection failed or was not enabled, try fallbacks (if there are any) */
- if (! ret)
- {
- char * fallbacks = get_string (NULL, "chardet_fallback");
- char * * split = g_strsplit_set (fallbacks, " ,:;|/", -1);
-
- for (char * * enc = split; * enc; enc ++)
- {
- gsize read_gsize = 0, written_gsize = 0;
- ret = g_convert (str, len, "UTF-8", * enc, & read_gsize, & written_gsize, NULL);
- * bytes_read = read_gsize;
- * bytes_write = written_gsize;
-
- if (len == *bytes_read)
- break;
- else {
- g_free(ret);
- ret = NULL;
- }
- }
-
- g_strfreev (split);
- g_free (fallbacks);
- }
-
- /* First fallback: locale (duh!) */
- if (ret == NULL)
- {
- gsize read_gsize = 0, written_gsize = 0;
- ret = g_locale_to_utf8 (str, len, & read_gsize, & written_gsize, NULL);
- * bytes_read = read_gsize;
- * bytes_write = written_gsize;
- }
-
- /* The final fallback is ISO-8859-1, if no other is specified or conversions fail */
- if (ret == NULL)
- {
- gsize read_gsize = 0, written_gsize = 0;
- ret = g_convert (str, len, "UTF-8", "ISO-8859-1", & read_gsize, & written_gsize, NULL);
- * bytes_read = read_gsize;
- * bytes_write = written_gsize;
- }
-
- if (ret != NULL)
- {
- if (g_utf8_validate(ret, -1, NULL))
- return ret;
- else
- {
- g_warning("g_utf8_validate() failed for converted string in cd_chardet_to_utf8: '%s'", ret);
- g_free(ret);
- return NULL;
- }
- }
-
- return NULL; /* If we have no idea, return NULL. */
-}
-
-void chardet_init (void)
-{
-#ifdef USE_CHARDET
- libguess_determine_encoding(NULL, -1, "");
-#endif
- str_set_utf8_impl (cd_str_to_utf8, cd_chardet_to_utf8);
+ str_set_charsets (NULL, NULL);
}