summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorWilliam Pitcock <nenolod@atheme.org>2010-07-02 07:08:01 -0500
committerWilliam Pitcock <nenolod@atheme.org>2010-07-02 07:08:01 -0500
commit2f83a515722cedbc2c0258ce88ffc34b07a528b2 (patch)
treee9c96b37234a25786633955d4c96816c8921700d
parent6d80d73c82748dd8011a82f449dce941bf2fe52c (diff)
Add support for baltic region.
-rw-r--r--src/libguess/guess.c1
-rw-r--r--src/libguess/guess.scm34
-rw-r--r--src/libguess/guess_impl.c50
-rw-r--r--src/libguess/guess_tab.c72
-rw-r--r--src/libguess/libguess.h2
5 files changed, 159 insertions, 0 deletions
diff --git a/src/libguess/guess.c b/src/libguess/guess.c
index 0d8320d..2c55d95 100644
--- a/src/libguess/guess.c
+++ b/src/libguess/guess.c
@@ -40,6 +40,7 @@ guess_init(void)
guess_impl_register(GUESS_REGION_GR, guess_gr);
guess_impl_register(GUESS_REGION_HW, guess_hw);
guess_impl_register(GUESS_REGION_PL, guess_pl);
+ guess_impl_register(GUESS_REGION_BL, guess_bl);
}
const char *
diff --git a/src/libguess/guess.scm b/src/libguess/guess.scm
index f7b7018..a5e96b4 100644
--- a/src/libguess/guess.scm
+++ b/src/libguess/guess.scm
@@ -580,3 +580,37 @@
(((#xd8 #xf6)) init 1.0)
(((#xf8 #xfe)) init 1.0)))
+;;;
+;;; baltic (estonia/latvia/lithuania)
+;;;
+
+(define-dfa iso8859_13
+ (init
+ (((#x20 #x7e)) init 1.0)
+ ((#xa8) init 1.0)
+ ((#xaa) init 1.0)
+ ((#xaf) init 1.0)
+ ((#xb8) init 1.0)
+ ((#xba) init 1.0)
+ ((#xbf) init 1.0)
+ (((#xc0 #xd6)) init 1.0)
+ (((#xd8 #xf6)) init 1.0)
+ (((#xf8 #xfe)) init 1.0)))
+
+(define-dfa cp1257
+ (init
+ (((#x20 #x7e)) init 1.0)
+ ((#x80) init 1.0)
+ ((#x82) init 1.0)
+ (((#x84 #x87)) init 1.0)
+ ((#x89) init 1.0)
+ ((#x8b) init 1.0)
+ (((#x8d #x8f)) init 1.0)
+ (((#x91 #x97)) init 1.0)
+ ((#x99) init 1.0)
+ ((#x9b) init 1.0)
+ (((#x9d #x9e)) init 1.0)
+ ((#xa0) init 1.0)
+ (((#xa2 #xa4)) init 1.0)
+ (((#xa6 #xff)) init 1.0)))
+
diff --git a/src/libguess/guess_impl.c b/src/libguess/guess_impl.c
index 6aa076d..0a1b83f 100644
--- a/src/libguess/guess_impl.c
+++ b/src/libguess/guess_impl.c
@@ -54,6 +54,7 @@
#define ORDER_HW &utf8, &iso8859_8, &cp1255
#define ORDER_PL &utf8, &cp1250, &iso8859_2
#define ORDER_TR &utf8, &iso8859_9, &cp1254
+#define ORDER_BL &utf8, &iso8859_13, &cp1257
/* include DFA table generated by guess.scm */
#include "guess_tab.c"
@@ -604,3 +605,52 @@ const char *guess_tr(const char *buf, int buflen)
else
return NULL;
}
+
+const char *guess_bl(const char *buf, int buflen)
+{
+ int i;
+ const char *rv = NULL;
+
+ /* encodings */
+ guess_dfa cp1257 = DFA_INIT(guess_cp1257_st, guess_cp1257_ar, "CP1257");
+ guess_dfa iso8859_13 = DFA_INIT(guess_iso8859_13_st, guess_iso8859_13_ar, "ISO-8859-13");
+ guess_dfa utf8 = DFA_INIT(guess_utf8_st, guess_utf8_ar, "UTF-8");
+
+ guess_dfa *top = NULL;
+ guess_dfa *order[] = { ORDER_BL, NULL };
+
+ for (i = 0; i < buflen; i++) {
+ int c = (unsigned char) buf[i];
+
+ /* special treatment of BOM */
+ if (i == 0 && c == 0xff) {
+ if (i < buflen - 1) {
+ c = (unsigned char) buf[i + 1];
+ if (c == 0xfe)
+ return UCS_2LE;
+ }
+ }
+ if (i == 0 && c == 0xfe) {
+ if (i < buflen - 1) {
+ c = (unsigned char) buf[i + 1];
+ if (c == 0xff)
+ return UCS_2BE;
+ }
+ }
+
+ rv = dfa_process(order, c);
+ if(rv)
+ return rv;
+
+ if (dfa_none(order)) {
+ /* we ran out the possibilities */
+ return NULL;
+ }
+ }
+
+ top = dfa_top(order);
+ if (top)
+ return top->name;
+ else
+ return NULL;
+}
diff --git a/src/libguess/guess_tab.c b/src/libguess/guess_tab.c
index 83504a3..73876a4 100644
--- a/src/libguess/guess_tab.c
+++ b/src/libguess/guess_tab.c
@@ -1304,3 +1304,75 @@ static guess_arc guess_cp1250_ar[] = {
{ 0, 1.0 }, /* init -> init */
};
+static signed char guess_iso8859_13_st[][256] = {
+ { /* state init */
+ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1, 1, -1, 2, -1, -1, -1, -1, 3,
+ -1, -1, -1, -1, -1, -1, -1, -1, 4, -1, 5, -1, -1, -1, -1, 6,
+ 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
+ 7, 7, 7, 7, 7, 7, 7, -1, 8, 8, 8, 8, 8, 8, 8, 8,
+ 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
+ 8, 8, 8, 8, 8, 8, 8, -1, 9, 9, 9, 9, 9, 9, 9, -1,
+ },
+};
+
+static guess_arc guess_iso8859_13_ar[] = {
+ { 0, 1.0 }, /* init -> init */
+ { 0, 1.0 }, /* init -> init */
+ { 0, 1.0 }, /* init -> init */
+ { 0, 1.0 }, /* init -> init */
+ { 0, 1.0 }, /* init -> init */
+ { 0, 1.0 }, /* init -> init */
+ { 0, 1.0 }, /* init -> init */
+ { 0, 1.0 }, /* init -> init */
+ { 0, 1.0 }, /* init -> init */
+ { 0, 1.0 }, /* init -> init */
+};
+
+static signed char guess_cp1257_st[][256] = {
+ { /* state init */
+ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -1,
+ 1, -1, 2, -1, 3, 3, 3, 3, -1, 4, -1, 5, -1, 6, 6, 6,
+ -1, 7, 7, 7, 7, 7, 7, 7, -1, 8, -1, 9, -1, 10, 10, -1,
+ 11, -1, 12, 12, 12, -1, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
+ 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
+ 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
+ 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
+ 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
+ 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
+ },
+};
+
+static guess_arc guess_cp1257_ar[] = {
+ { 0, 1.0 }, /* init -> init */
+ { 0, 1.0 }, /* init -> init */
+ { 0, 1.0 }, /* init -> init */
+ { 0, 1.0 }, /* init -> init */
+ { 0, 1.0 }, /* init -> init */
+ { 0, 1.0 }, /* init -> init */
+ { 0, 1.0 }, /* init -> init */
+ { 0, 1.0 }, /* init -> init */
+ { 0, 1.0 }, /* init -> init */
+ { 0, 1.0 }, /* init -> init */
+ { 0, 1.0 }, /* init -> init */
+ { 0, 1.0 }, /* init -> init */
+ { 0, 1.0 }, /* init -> init */
+ { 0, 1.0 }, /* init -> init */
+};
+
diff --git a/src/libguess/libguess.h b/src/libguess/libguess.h
index 32eb028..7ffcb9f 100644
--- a/src/libguess/libguess.h
+++ b/src/libguess/libguess.h
@@ -57,6 +57,7 @@ const char *guess_tr(const char *buf, int buflen);
const char *guess_gr(const char *buf, int buflen);
const char *guess_hw(const char *buf, int buflen);
const char *guess_pl(const char *buf, int buflen);
+const char *guess_bl(const char *buf, int buflen);
typedef const char *(*guess_impl_f)(const char *buf, int len);
@@ -74,6 +75,7 @@ int libguess_validate_utf8(const char *buf, int buflen);
#define GUESS_REGION_GR "greek"
#define GUESS_REGION_HW "hebrew"
#define GUESS_REGION_PL "polish"
+#define GUESS_REGION_BL "baltic"
typedef void (*libguess_result_f)(const char *encodingname, const char *res);