1 files changed, 330 insertions, 0 deletions
diff --git a/src/modules/filters/thmlgbf.cpp b/src/modules/filters/thmlgbf.cpp
new file mode 100644
index 0000000..66d9a20
--- /dev/null
+++ b/src/modules/filters/thmlgbf.cpp
@@ -0,0 +1,330 @@
+/***************************************************************************
+                     thmlgbf.cpp  -  ThML to GBF filter
+                             -------------------
+    begin                : 1999-10-28
+    copyright            : 2001 by CrossWire Bible Society
+ ***************************************************************************/
+
+/***************************************************************************
+ *                                                                         *
+ *   This program is free software; you can redistribute it and/or modify  *
+ *   it under the terms of the GNU General Public License as published by  *
+ *   the Free Software Foundation; either version 2 of the License, or     *
+ *   (at your option) any later version.                                   *
+ *                                                                         *
+ ***************************************************************************/
+
+#include <stdlib.h>
+#include <string.h>
+#include <thmlgbf.h>
+
+
+ThMLGBF::ThMLGBF()
+{
+}
+
+
+char ThMLGBF::ProcessText(char *text, int maxlen)
+{
+	char *to, *from, token[2048];
+	int tokpos = 0;
+	bool intoken 	= false;
+	int len;
+	bool ampersand = false;
+	bool sechead = false;
+	bool title = false;  
+
+	len = strlen(text) + 1;						// shift string to right of buffer
+	if (len < maxlen) {
+		memmove(&text[maxlen - len], text, len);
+		from = &text[maxlen - len];
+	}
+	else	from = text;							// -------------------------------
+	for (to = text; *from; from++) {
+		if (*from == '<') {
+			intoken = true;
+			tokpos = 0;
+			token[0] = 0;
+			token[1] = 0;
+			token[2] = 0;
+			ampersand = false;
+			continue;
+		}
+		else if (*from == '&') {
+			intoken = true;
+			tokpos = 0;
+			memset(token, 0, 2048);
+			ampersand = true;
+			continue;
+		}
+		if (*from == ';' && ampersand) {
+			intoken = false;
+	
+			if (!strncmp("nbsp", token, 4)) *to++ = ' ';
+			else if (!strncmp("quot", token, 4)) *to++ = '"';
+			else if (!strncmp("amp", token, 3)) *to++ = '&';
+			else if (!strncmp("lt", token, 2)) *to++ = '<';
+			else if (!strncmp("gt", token, 2)) *to++ = '>';
+			else if (!strncmp("brvbar", token, 6)) *to++ = '|';
+			else if (!strncmp("sect", token, 4)) *to++ = '§';
+			else if (!strncmp("copy", token, 4)) *to++ = '©';
+			else if (!strncmp("laquo", token, 5)) *to++ = '«';
+			else if (!strncmp("reg", token, 3)) *to++ = '®';
+			else if (!strncmp("acute", token, 5)) *to++ = '´';
+			else if (!strncmp("para", token, 4)) *to++ = '¶';
+			else if (!strncmp("raquo", token, 5)) *to++ = '»';
+			
+			else if (!strncmp("Aacute", token, 6)) *to++ = 'Á';
+			else if (!strncmp("Agrave", token, 6)) *to++ = 'À';
+			else if (!strncmp("Acirc", token, 5)) *to++ = 'Â';
+			else if (!strncmp("Auml", token, 4)) *to++ = 'Ä';
+			else if (!strncmp("Atilde", token, 6)) *to++ = 'Ã';
+			else if (!strncmp("Aring", token, 5)) *to++ = 'Å';
+			else if (!strncmp("aacute", token, 6)) *to++ = 'á';
+			else if (!strncmp("agrave", token, 6)) *to++ = 'à';
+			else if (!strncmp("acirc", token, 5)) *to++ = 'â';
+			else if (!strncmp("auml", token, 4)) *to++ = 'ä';
+			else if (!strncmp("atilde", token, 6)) *to++ = 'ã';
+			else if (!strncmp("aring", token, 5)) *to++ = 'å';
+			else if (!strncmp("Eacute", token, 6)) *to++ = 'É';
+			else if (!strncmp("Egrave", token, 6)) *to++ = 'È';
+			else if (!strncmp("Ecirc", token, 5)) *to++ = 'Ê';
+			else if (!strncmp("Euml", token, 4)) *to++ = 'Ë';
+			else if (!strncmp("eacute", token, 6)) *to++ = 'é';
+			else if (!strncmp("egrave", token, 6)) *to++ = 'è';
+			else if (!strncmp("ecirc", token, 5)) *to++ = 'ê';
+			else if (!strncmp("euml", token, 4)) *to++ = 'ë';
+			else if (!strncmp("Iacute", token, 6)) *to++ = 'Í';
+			else if (!strncmp("Igrave", token, 6)) *to++ = 'Ì';
+			else if (!strncmp("Icirc", token, 5)) *to++ = 'Î';
+			else if (!strncmp("Iuml", token, 4)) *to++ = 'Ï';
+			else if (!strncmp("iacute", token, 6)) *to++ = 'í';
+			else if (!strncmp("igrave", token, 6)) *to++ = 'ì';
+			else if (!strncmp("icirc", token, 5)) *to++ = 'î';
+			else if (!strncmp("iuml", token, 4)) *to++ = 'ï';
+			else if (!strncmp("Oacute", token, 6)) *to++ = 'Ó';
+			else if (!strncmp("Ograve", token, 6)) *to++ = 'Ò';
+			else if (!strncmp("Ocirc", token, 5)) *to++ = 'Ô';
+			else if (!strncmp("Ouml", token, 4)) *to++ = 'Ö';
+			else if (!strncmp("Otilde", token, 6)) *to++ = 'Õ';
+			else if (!strncmp("oacute", token, 6)) *to++ = 'ó';
+			else if (!strncmp("ograve", token, 6)) *to++ = 'ò';
+			else if (!strncmp("ocirc", token, 5)) *to++ = 'ô';
+			else if (!strncmp("ouml", token, 4)) *to++ = 'ö';
+			else if (!strncmp("otilde", token, 6)) *to++ = 'õ';
+			else if (!strncmp("Uacute", token, 6)) *to++ = 'Ú';
+			else if (!strncmp("Ugrave", token, 6)) *to++ = 'Ù';
+			else if (!strncmp("Ucirc", token, 5)) *to++ = 'Û';
+			else if (!strncmp("Uuml", token, 4)) *to++ = 'Ü';
+			else if (!strncmp("uacute", token, 6)) *to++ = 'ú';
+			else if (!strncmp("ugrave", token, 6)) *to++ = 'ù';
+			else if (!strncmp("ucirc", token, 5)) *to++ = 'û';
+			else if (!strncmp("uuml", token, 4)) *to++ = 'ü';
+			else if (!strncmp("Yacute", token, 6)) *to++ = 'Ý';
+			else if (!strncmp("yacute", token, 6)) *to++ = 'ý';
+			else if (!strncmp("yuml", token, 4)) *to++ = 'ÿ';
+			
+			else if (!strncmp("deg", token, 3)) *to++ = '°';
+			else if (!strncmp("plusmn", token, 6)) *to++ = '±';
+			else if (!strncmp("sup2", token, 4)) *to++ = '²';
+			else if (!strncmp("sup3", token, 4)) *to++ = '³';
+			else if (!strncmp("sup1", token, 4)) *to++ = '¹';
+			else if (!strncmp("nbsp", token, 4)) *to++ = 'º';
+			else if (!strncmp("pound", token, 5)) *to++ = '£';
+			else if (!strncmp("cent", token, 4)) *to++ = '¢';
+			else if (!strncmp("frac14", token, 6)) *to++ = '¼';
+			else if (!strncmp("frac12", token, 6)) *to++ = '½';
+			else if (!strncmp("frac34", token, 6)) *to++ = '¾';
+			else if (!strncmp("iquest", token, 6)) *to++ = '¿';
+			else if (!strncmp("iexcl", token, 5)) *to++ = '¡';
+			else if (!strncmp("ETH", token, 3)) *to++ = 'Ð';
+			else if (!strncmp("eth", token, 3)) *to++ = 'ð';
+			else if (!strncmp("THORN", token, 5)) *to++ = 'Þ';
+			else if (!strncmp("thorn", token, 5)) *to++ = 'þ';
+			else if (!strncmp("AElig", token, 5)) *to++ = 'Æ';
+			else if (!strncmp("aelig", token, 5)) *to++ = 'æ';
+			else if (!strncmp("Oslash", token, 6)) *to++ = 'Ø';
+			else if (!strncmp("curren", token, 6)) *to++ = '¤';
+			else if (!strncmp("Ccedil", token, 6)) *to++ = 'Ç';
+			else if (!strncmp("ccedil", token, 6)) *to++ = 'ç';
+			else if (!strncmp("szlig", token, 5)) *to++ = 'ß';
+			else if (!strncmp("Ntilde", token, 6)) *to++ = 'Ñ';
+			else if (!strncmp("ntilde", token, 6)) *to++ = 'ñ';
+			else if (!strncmp("yen", token, 3)) *to++ = '¥';
+			else if (!strncmp("not", token, 3)) *to++ = '¬';
+			else if (!strncmp("ordf", token, 4)) *to++ = 'ª';
+			else if (!strncmp("uml", token, 3)) *to++ = '¨';
+			else if (!strncmp("shy", token, 3)) *to++ = '';
+			else if (!strncmp("macr", token, 4)) *to++ = '¯';
+			continue;
+		
+		}
+		else if (*from == '>' && !ampersand) {
+			intoken = false;
+			// process desired tokens
+			if (!strncmp(token, "sync type=\"Strongs\" value=\"", 27)) {
+				*to++ = '<';
+				*to++ = 'W';
+				for (unsigned int i = 27; token[i] != '\"'; i++)
+					*to++ = token[i];
+				*to++ = '>';
+				continue;
+			}
+			if (!strncmp(token, "sync type=\"morph\" value=\"", 25)) {
+				*to++ = '<';
+				*to++ = 'W';
+				*to++ = 'T';
+				for (unsigned int i = 25; token[i] != '\"'; i++)
+					*to++ = token[i];
+				*to++ = '>';
+				continue;
+			}
+			else if (!strncmp(token, "scripRef", 8)) {
+				*to++ = '<';
+				*to++ = 'R';
+				*to++ = 'X';
+				*to++ = '>';
+				continue;
+			}
+			else if (!strncmp(token, "/scripRef", 9)) {
+				*to++ = '<';
+				*to++ = 'R';
+				*to++ = 'x';
+				*to++ = '>';
+				continue;
+			}
+			else if (!strncmp(token, "note", 4)) {
+				*to++ = '<';
+				*to++ = 'R';
+				*to++ = 'F';
+				*to++ = '>';
+				continue;
+			}
+			else if (!strncmp(token, "/note", 5)) {
+				*to++ = '<';
+				*to++ = 'R';
+				*to++ = 'f';
+				*to++ = '>';
+				continue;
+			}
+			else if (!strncmp(token, "sup", 3)) {
+				*to++ = '<';
+				*to++ = 'F';
+				*to++ = 'S';
+				*to++ = '>';
+			}
+			else if (!strncmp(token, "/sup", 4)) {
+				*to++ = '<';
+				*to++ = 'F';
+				*to++ = 's';
+				*to++ = '>';
+			}
+			else if (!strnicmp(token, "font color=#ff0000", 18)) {
+				*to++ = '<';
+				*to++ = 'F';
+				*to++ = 'R';
+				*to++ = '>';
+				continue;
+			}
+			else if (!strnicmp(token, "/font", 5)) {
+				*to++ = '<';
+				*to++ = 'F';
+				*to++ = 'r';
+				*to++ = '>';
+				continue;
+			}
+			else if (!strncmp(token, "div class=\"sechead\"", 19)) {
+				*to++ = '<';
+				*to++ = 'T';
+				*to++ = 'S';
+				*to++ = '>';
+				sechead = true;
+				continue;
+			}
+			else if (sechead && !strncmp(token, "/div", 19)) {
+				*to++ = '<';
+				*to++ = 'T';
+				*to++ = 's';
+				*to++ = '>';
+				sechead = false;
+				continue;
+			}
+			else if (!strncmp(token, "div class=\"title\"", 19)) {
+				*to++ = '<';
+				*to++ = 'T';
+				*to++ = 'T';
+				*to++ = '>';
+				title = true;
+				continue;
+			}
+			else if (title && !strncmp(token, "/div", 19)) {
+				*to++ = '<';
+				*to++ = 'T';
+				*to++ = 't';
+				*to++ = '>';
+				title = false;
+				continue;
+			}
+			else if (!strnicmp(token, "br", 2)) {
+				*to++ = '<';
+				*to++ = 'C';
+				*to++ = 'L';
+				*to++ = '>';
+				continue;
+			}
+			else switch(*token) {
+			case 'I':			// font tags
+			case 'i':
+				*to++ = '<';
+				*to++ = 'F';
+				*to++ = 'I';
+				*to++ = '>';
+				continue;
+			case 'B':		// bold start
+			case 'b':
+				*to++ = '<';
+				*to++ = 'F';
+				*to++ = 'B';
+				*to++ = '>';
+				continue;
+			case '/':
+				switch(token[1]) {
+				case 'P':
+				case 'p':
+					*to++ = '<';
+					*to++ = 'C';
+					*to++ = 'M';
+					*to++ = '>';
+					continue;
+				case 'I':
+				case 'i':		// italic end
+					*to++ = '<';
+					*to++ = 'F';
+					*to++ = 'i';
+					*to++ = '>';
+					continue;
+				case 'B':		// bold start
+				case 'b':
+					*to++ = '<';
+					*to++ = 'F';
+					*to++ = 'b';
+					*to++ = '>';
+					continue;
+				}
+			}
+			continue;
+		}
+		if (intoken) {
+			if (tokpos < 2045)
+				token[tokpos++] = *from;
+				token[tokpos+2] = 0;
+		}
+		else	*to++ = *from;
+	}
+	*to++ = 0;
+	*to = 0;          
+	return 0;
+}
+
+
+