diff options
author | William Pitcock <nenolod@atheme.org> | 2010-07-02 07:08:01 -0500 |
---|---|---|
committer | William Pitcock <nenolod@atheme.org> | 2010-07-02 07:08:01 -0500 |
commit | 2f83a515722cedbc2c0258ce88ffc34b07a528b2 (patch) | |
tree | e9c96b37234a25786633955d4c96816c8921700d | |
parent | 6d80d73c82748dd8011a82f449dce941bf2fe52c (diff) |
Add support for baltic region.
-rw-r--r-- | src/libguess/guess.c | 1 | ||||
-rw-r--r-- | src/libguess/guess.scm | 34 | ||||
-rw-r--r-- | src/libguess/guess_impl.c | 50 | ||||
-rw-r--r-- | src/libguess/guess_tab.c | 72 | ||||
-rw-r--r-- | src/libguess/libguess.h | 2 |
5 files changed, 159 insertions, 0 deletions
diff --git a/src/libguess/guess.c b/src/libguess/guess.c index 0d8320d..2c55d95 100644 --- a/src/libguess/guess.c +++ b/src/libguess/guess.c @@ -40,6 +40,7 @@ guess_init(void) guess_impl_register(GUESS_REGION_GR, guess_gr); guess_impl_register(GUESS_REGION_HW, guess_hw); guess_impl_register(GUESS_REGION_PL, guess_pl); + guess_impl_register(GUESS_REGION_BL, guess_bl); } const char * diff --git a/src/libguess/guess.scm b/src/libguess/guess.scm index f7b7018..a5e96b4 100644 --- a/src/libguess/guess.scm +++ b/src/libguess/guess.scm @@ -580,3 +580,37 @@ (((#xd8 #xf6)) init 1.0) (((#xf8 #xfe)) init 1.0))) +;;; +;;; baltic (estonia/latvia/lithuania) +;;; + +(define-dfa iso8859_13 + (init + (((#x20 #x7e)) init 1.0) + ((#xa8) init 1.0) + ((#xaa) init 1.0) + ((#xaf) init 1.0) + ((#xb8) init 1.0) + ((#xba) init 1.0) + ((#xbf) init 1.0) + (((#xc0 #xd6)) init 1.0) + (((#xd8 #xf6)) init 1.0) + (((#xf8 #xfe)) init 1.0))) + +(define-dfa cp1257 + (init + (((#x20 #x7e)) init 1.0) + ((#x80) init 1.0) + ((#x82) init 1.0) + (((#x84 #x87)) init 1.0) + ((#x89) init 1.0) + ((#x8b) init 1.0) + (((#x8d #x8f)) init 1.0) + (((#x91 #x97)) init 1.0) + ((#x99) init 1.0) + ((#x9b) init 1.0) + (((#x9d #x9e)) init 1.0) + ((#xa0) init 1.0) + (((#xa2 #xa4)) init 1.0) + (((#xa6 #xff)) init 1.0))) + diff --git a/src/libguess/guess_impl.c b/src/libguess/guess_impl.c index 6aa076d..0a1b83f 100644 --- a/src/libguess/guess_impl.c +++ b/src/libguess/guess_impl.c @@ -54,6 +54,7 @@ #define ORDER_HW &utf8, &iso8859_8, &cp1255 #define ORDER_PL &utf8, &cp1250, &iso8859_2 #define ORDER_TR &utf8, &iso8859_9, &cp1254 +#define ORDER_BL &utf8, &iso8859_13, &cp1257 /* include DFA table generated by guess.scm */ #include "guess_tab.c" @@ -604,3 +605,52 @@ const char *guess_tr(const char *buf, int buflen) else return NULL; } + +const char *guess_bl(const char *buf, int buflen) +{ + int i; + const char *rv = NULL; + + /* encodings */ + guess_dfa cp1257 = DFA_INIT(guess_cp1257_st, guess_cp1257_ar, "CP1257"); + guess_dfa iso8859_13 = DFA_INIT(guess_iso8859_13_st, guess_iso8859_13_ar, "ISO-8859-13"); + guess_dfa utf8 = DFA_INIT(guess_utf8_st, guess_utf8_ar, "UTF-8"); + + guess_dfa *top = NULL; + guess_dfa *order[] = { ORDER_BL, NULL }; + + for (i = 0; i < buflen; i++) { + int c = (unsigned char) buf[i]; + + /* special treatment of BOM */ + if (i == 0 && c == 0xff) { + if (i < buflen - 1) { + c = (unsigned char) buf[i + 1]; + if (c == 0xfe) + return UCS_2LE; + } + } + if (i == 0 && c == 0xfe) { + if (i < buflen - 1) { + c = (unsigned char) buf[i + 1]; + if (c == 0xff) + return UCS_2BE; + } + } + + rv = dfa_process(order, c); + if(rv) + return rv; + + if (dfa_none(order)) { + /* we ran out the possibilities */ + return NULL; + } + } + + top = dfa_top(order); + if (top) + return top->name; + else + return NULL; +} diff --git a/src/libguess/guess_tab.c b/src/libguess/guess_tab.c index 83504a3..73876a4 100644 --- a/src/libguess/guess_tab.c +++ b/src/libguess/guess_tab.c @@ -1304,3 +1304,75 @@ static guess_arc guess_cp1250_ar[] = { { 0, 1.0 }, /* init -> init */ }; +static signed char guess_iso8859_13_st[][256] = { + { /* state init */ + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, 1, -1, 2, -1, -1, -1, -1, 3, + -1, -1, -1, -1, -1, -1, -1, -1, 4, -1, 5, -1, -1, -1, -1, 6, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, -1, 8, 8, 8, 8, 8, 8, 8, 8, + 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, + 8, 8, 8, 8, 8, 8, 8, -1, 9, 9, 9, 9, 9, 9, 9, -1, + }, +}; + +static guess_arc guess_iso8859_13_ar[] = { + { 0, 1.0 }, /* init -> init */ + { 0, 1.0 }, /* init -> init */ + { 0, 1.0 }, /* init -> init */ + { 0, 1.0 }, /* init -> init */ + { 0, 1.0 }, /* init -> init */ + { 0, 1.0 }, /* init -> init */ + { 0, 1.0 }, /* init -> init */ + { 0, 1.0 }, /* init -> init */ + { 0, 1.0 }, /* init -> init */ + { 0, 1.0 }, /* init -> init */ +}; + +static signed char guess_cp1257_st[][256] = { + { /* state init */ + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -1, + 1, -1, 2, -1, 3, 3, 3, 3, -1, 4, -1, 5, -1, 6, 6, 6, + -1, 7, 7, 7, 7, 7, 7, 7, -1, 8, -1, 9, -1, 10, 10, -1, + 11, -1, 12, 12, 12, -1, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, + }, +}; + +static guess_arc guess_cp1257_ar[] = { + { 0, 1.0 }, /* init -> init */ + { 0, 1.0 }, /* init -> init */ + { 0, 1.0 }, /* init -> init */ + { 0, 1.0 }, /* init -> init */ + { 0, 1.0 }, /* init -> init */ + { 0, 1.0 }, /* init -> init */ + { 0, 1.0 }, /* init -> init */ + { 0, 1.0 }, /* init -> init */ + { 0, 1.0 }, /* init -> init */ + { 0, 1.0 }, /* init -> init */ + { 0, 1.0 }, /* init -> init */ + { 0, 1.0 }, /* init -> init */ + { 0, 1.0 }, /* init -> init */ + { 0, 1.0 }, /* init -> init */ +}; + diff --git a/src/libguess/libguess.h b/src/libguess/libguess.h index 32eb028..7ffcb9f 100644 --- a/src/libguess/libguess.h +++ b/src/libguess/libguess.h @@ -57,6 +57,7 @@ const char *guess_tr(const char *buf, int buflen); const char *guess_gr(const char *buf, int buflen); const char *guess_hw(const char *buf, int buflen); const char *guess_pl(const char *buf, int buflen); +const char *guess_bl(const char *buf, int buflen); typedef const char *(*guess_impl_f)(const char *buf, int len); @@ -74,6 +75,7 @@ int libguess_validate_utf8(const char *buf, int buflen); #define GUESS_REGION_GR "greek" #define GUESS_REGION_HW "hebrew" #define GUESS_REGION_PL "polish" +#define GUESS_REGION_BL "baltic" typedef void (*libguess_result_f)(const char *encodingname, const char *res); |