From 82426933e01d05934635d2e50b910031515c2b40 Mon Sep 17 00:00:00 2001 From: Alessandro Ghedini Date: Tue, 12 Aug 2014 11:49:18 +0200 Subject: Imported Upstream version 0.5.0 --- misc/charset_conv.c | 35 ++++++++++++++++++++++++++++++---- misc/ctype.h | 19 +++++++++++++++++++ misc/rendezvous.c | 54 +++++++++++++++++++++++++++++++++++++++++++++++++++++ misc/rendezvous.h | 8 ++++++++ 4 files changed, 112 insertions(+), 4 deletions(-) create mode 100644 misc/ctype.h create mode 100644 misc/rendezvous.c create mode 100644 misc/rendezvous.h (limited to 'misc') diff --git a/misc/charset_conv.c b/misc/charset_conv.c index 2146f09..31f53cc 100644 --- a/misc/charset_conv.c +++ b/misc/charset_conv.c @@ -21,6 +21,7 @@ #include #include +#include #include #include "config.h" @@ -80,11 +81,24 @@ bool mp_charset_requires_guess(const char *user_cp) // Note that "utf8" is the UTF-8 codepage, while "utf8:..." specifies UTF-8 // by default, plus a codepage that is used if the input is not UTF-8. return bstrcasecmp0(res[0], "enca") == 0 || + bstrcasecmp0(res[0], "auto") == 0 || bstrcasecmp0(res[0], "guess") == 0 || (r > 1 && bstrcasecmp0(res[0], "utf-8") == 0) || (r > 1 && bstrcasecmp0(res[0], "utf8") == 0); } +static const char *const utf_bom[3] = {"\xEF\xBB\xBF", "\xFF\xFE", "\xFE\xFF"}; +static const char *const utf_enc[3] = {"utf-8", "utf-16le", "utf-16be"}; + +static const char *ms_bom_guess(bstr buf) +{ + for (int n = 0; n < 3; n++) { + if (bstr_startswith0(buf, utf_bom[n])) + return utf_enc[n]; + } + return NULL; +} + #if HAVE_ENCA static const char *enca_guess(struct mp_log *log, bstr buf, const char *language) { @@ -102,8 +116,7 @@ static const char *enca_guess(struct mp_log *log, bstr buf, const char *language detected_cp = tmp; enca_analyser_free(analyser); } else { - mp_err(log, "ENCA doesn't know language '%s'\n", - language); + mp_err(log, "ENCA doesn't know language '%s'\n", language); size_t langcnt; const char **languages = enca_get_languages(&langcnt); mp_err(log, "ENCA supported languages:"); @@ -143,6 +156,15 @@ const char *mp_charset_guess(struct mp_log *log, bstr buf, const char *user_cp, if (!mp_charset_requires_guess(user_cp)) return user_cp; + bool use_auto = strcasecmp(user_cp, "auto") == 0; + if (use_auto) { +#if HAVE_ENCA + user_cp = "enca"; +#else + user_cp = "UTF-8:UTF-8-BROKEN"; +#endif + } + // Do our own UTF-8 detection, because at least ENCA seems to get it // wrong sometimes (suggested by divVerent). int r = bstr_validate_utf8(buf); @@ -159,6 +181,12 @@ const char *mp_charset_guess(struct mp_log *log, bstr buf, const char *user_cp, const char *res = NULL; + if (use_auto) { + res = ms_bom_guess(buf); + if (res) + type = bstr0("auto"); + } + #if HAVE_ENCA if (bstrcasecmp0(type, "enca") == 0) res = enca_guess(log, buf, lang); @@ -173,8 +201,7 @@ const char *mp_charset_guess(struct mp_log *log, bstr buf, const char *user_cp, } if (res) { - mp_dbg(log, "%.*s detected charset: '%s'\n", - BSTR_P(type), res); + mp_dbg(log, "%.*s detected charset: '%s'\n", BSTR_P(type), res); } else { res = fallback; mp_dbg(log, "Detection with %.*s failed: fallback to %s\n", diff --git a/misc/ctype.h b/misc/ctype.h new file mode 100644 index 0000000..cbff799 --- /dev/null +++ b/misc/ctype.h @@ -0,0 +1,19 @@ +#ifndef MP_CTYPE_H_ +#define MP_CTYPE_H_ + +// Roughly follows C semantics, but doesn't account for EOF, allows char as +// parameter, and is locale independent (always uses "C" locale). + +static inline int mp_isprint(char c) { return (unsigned char)c >= 32; } +static inline int mp_isspace(char c) { return c == ' ' || c == '\f' || c == '\n' || + c == '\r' || c == '\t' || c =='\v'; } +static inline int mp_isupper(char c) { return c >= 'A' && c <= 'Z'; } +static inline int mp_islower(char c) { return c >= 'a' && c <= 'z'; } +static inline int mp_isdigit(char c) { return c >= '0' && c <= '9'; } +static inline int mp_isalpha(char c) { return mp_isupper(c) || mp_islower(c); } +static inline int mp_isalnum(char c) { return mp_isalpha(c) || mp_isdigit(c); } + +static inline char mp_tolower(char c) { return mp_isupper(c) ? c - 'A' + 'a' : c; } +static inline char mp_toupper(char c) { return mp_islower(c) ? c - 'a' + 'A' : c; } + +#endif diff --git a/misc/rendezvous.c b/misc/rendezvous.c new file mode 100644 index 0000000..9af798d --- /dev/null +++ b/misc/rendezvous.c @@ -0,0 +1,54 @@ +#include + +#include "rendezvous.h" + +static pthread_mutex_t lock = PTHREAD_MUTEX_INITIALIZER; +static pthread_cond_t wakeup = PTHREAD_COND_INITIALIZER; + +static struct waiter *waiters; + +struct waiter { + void *tag; + struct waiter *next; + intptr_t *value; +}; + +/* A barrier for 2 threads, which can exchange a value when they meet. + * The first thread to call this function will block. As soon as two threads + * are calling this function with the same tag value, they will unblock, and + * on each thread the call return the value parameter of the _other_ thread. + * + * tag is an arbitrary value, but it must be an unique pointer. If there are + * more than 2 threads using the same tag, things won't work. Typically, it + * will have to point to a memory allocation or to the stack, while pointing + * it to static data is always a bug. + * + * This shouldn't be used for performance critical code (uses a linked list + * of _all_ waiters in the process, and temporarily wakes up _all_ waiters on + * each second call). + * + * This is inspired by: http://9atom.org/magic/man2html/2/rendezvous */ +intptr_t mp_rendezvous(void *tag, intptr_t value) +{ + struct waiter wait = { .tag = tag, .value = &value }; + pthread_mutex_lock(&lock); + struct waiter **prev = &waiters; + while (*prev) { + if ((*prev)->tag == tag) { + intptr_t tmp = *(*prev)->value; + *(*prev)->value = value; + value = tmp; + (*prev)->value = NULL; // signals completion + *prev = (*prev)->next; // unlink + pthread_cond_broadcast(&wakeup); + goto done; + } + prev = &(*prev)->next; + } + *prev = &wait; + while (wait.value) + pthread_cond_wait(&wakeup, &lock); +done: + pthread_mutex_unlock(&lock); + return value; +} diff --git a/misc/rendezvous.h b/misc/rendezvous.h new file mode 100644 index 0000000..ffcc89a --- /dev/null +++ b/misc/rendezvous.h @@ -0,0 +1,8 @@ +#ifndef MP_RENDEZVOUS_H_ +#define MP_RENDEZVOUS_H_ + +#include + +intptr_t mp_rendezvous(void *tag, intptr_t value); + +#endif -- cgit v1.2.3