summaryrefslogtreecommitdiff
path: root/src/modules/filters/thmlgbf.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'src/modules/filters/thmlgbf.cpp')
-rw-r--r--src/modules/filters/thmlgbf.cpp337
1 files changed, 145 insertions, 192 deletions
diff --git a/src/modules/filters/thmlgbf.cpp b/src/modules/filters/thmlgbf.cpp
index 66d9a20..a65ddaf 100644
--- a/src/modules/filters/thmlgbf.cpp
+++ b/src/modules/filters/thmlgbf.cpp
@@ -15,18 +15,18 @@
***************************************************************************/
#include <stdlib.h>
-#include <string.h>
#include <thmlgbf.h>
+SWORD_NAMESPACE_START
ThMLGBF::ThMLGBF()
{
}
-char ThMLGBF::ProcessText(char *text, int maxlen)
-{
- char *to, *from, token[2048];
+char ThMLGBF::processText(SWBuf &text, const SWKey *key, const SWModule *module) {
+ const char *from;
+ char token[2048];
int tokpos = 0;
bool intoken = false;
int len;
@@ -34,13 +34,10 @@ char ThMLGBF::ProcessText(char *text, int maxlen)
bool sechead = false;
bool title = false;
- len = strlen(text) + 1; // shift string to right of buffer
- if (len < maxlen) {
- memmove(&text[maxlen - len], text, len);
- from = &text[maxlen - len];
- }
- else from = text; // -------------------------------
- for (to = text; *from; from++) {
+ SWBuf orig = text;
+ from = orig.c_str();
+
+ for (text = ""; *from; from++) {
if (*from == '<') {
intoken = true;
tokpos = 0;
@@ -60,102 +57,102 @@ char ThMLGBF::ProcessText(char *text, int maxlen)
if (*from == ';' && ampersand) {
intoken = false;
- if (!strncmp("nbsp", token, 4)) *to++ = ' ';
- else if (!strncmp("quot", token, 4)) *to++ = '"';
- else if (!strncmp("amp", token, 3)) *to++ = '&';
- else if (!strncmp("lt", token, 2)) *to++ = '<';
- else if (!strncmp("gt", token, 2)) *to++ = '>';
- else if (!strncmp("brvbar", token, 6)) *to++ = '|';
- else if (!strncmp("sect", token, 4)) *to++ = '§';
- else if (!strncmp("copy", token, 4)) *to++ = '©';
- else if (!strncmp("laquo", token, 5)) *to++ = '«';
- else if (!strncmp("reg", token, 3)) *to++ = '®';
- else if (!strncmp("acute", token, 5)) *to++ = '´';
- else if (!strncmp("para", token, 4)) *to++ = '¶';
- else if (!strncmp("raquo", token, 5)) *to++ = '»';
+ if (!strncmp("nbsp", token, 4)) text += ' ';
+ else if (!strncmp("quot", token, 4)) text += '"';
+ else if (!strncmp("amp", token, 3)) text += '&';
+ else if (!strncmp("lt", token, 2)) text += '<';
+ else if (!strncmp("gt", token, 2)) text += '>';
+ else if (!strncmp("brvbar", token, 6)) text += '|';
+ else if (!strncmp("sect", token, 4)) text += '§';
+ else if (!strncmp("copy", token, 4)) text += '©';
+ else if (!strncmp("laquo", token, 5)) text += '«';
+ else if (!strncmp("reg", token, 3)) text += '®';
+ else if (!strncmp("acute", token, 5)) text += '´';
+ else if (!strncmp("para", token, 4)) text += '¶';
+ else if (!strncmp("raquo", token, 5)) text += '»';
- else if (!strncmp("Aacute", token, 6)) *to++ = 'Á';
- else if (!strncmp("Agrave", token, 6)) *to++ = 'À';
- else if (!strncmp("Acirc", token, 5)) *to++ = 'Â';
- else if (!strncmp("Auml", token, 4)) *to++ = 'Ä';
- else if (!strncmp("Atilde", token, 6)) *to++ = 'Ã';
- else if (!strncmp("Aring", token, 5)) *to++ = 'Å';
- else if (!strncmp("aacute", token, 6)) *to++ = 'á';
- else if (!strncmp("agrave", token, 6)) *to++ = 'à';
- else if (!strncmp("acirc", token, 5)) *to++ = 'â';
- else if (!strncmp("auml", token, 4)) *to++ = 'ä';
- else if (!strncmp("atilde", token, 6)) *to++ = 'ã';
- else if (!strncmp("aring", token, 5)) *to++ = 'å';
- else if (!strncmp("Eacute", token, 6)) *to++ = 'É';
- else if (!strncmp("Egrave", token, 6)) *to++ = 'È';
- else if (!strncmp("Ecirc", token, 5)) *to++ = 'Ê';
- else if (!strncmp("Euml", token, 4)) *to++ = 'Ë';
- else if (!strncmp("eacute", token, 6)) *to++ = 'é';
- else if (!strncmp("egrave", token, 6)) *to++ = 'è';
- else if (!strncmp("ecirc", token, 5)) *to++ = 'ê';
- else if (!strncmp("euml", token, 4)) *to++ = 'ë';
- else if (!strncmp("Iacute", token, 6)) *to++ = 'Í';
- else if (!strncmp("Igrave", token, 6)) *to++ = 'Ì';
- else if (!strncmp("Icirc", token, 5)) *to++ = 'Î';
- else if (!strncmp("Iuml", token, 4)) *to++ = 'Ï';
- else if (!strncmp("iacute", token, 6)) *to++ = 'í';
- else if (!strncmp("igrave", token, 6)) *to++ = 'ì';
- else if (!strncmp("icirc", token, 5)) *to++ = 'î';
- else if (!strncmp("iuml", token, 4)) *to++ = 'ï';
- else if (!strncmp("Oacute", token, 6)) *to++ = 'Ó';
- else if (!strncmp("Ograve", token, 6)) *to++ = 'Ò';
- else if (!strncmp("Ocirc", token, 5)) *to++ = 'Ô';
- else if (!strncmp("Ouml", token, 4)) *to++ = 'Ö';
- else if (!strncmp("Otilde", token, 6)) *to++ = 'Õ';
- else if (!strncmp("oacute", token, 6)) *to++ = 'ó';
- else if (!strncmp("ograve", token, 6)) *to++ = 'ò';
- else if (!strncmp("ocirc", token, 5)) *to++ = 'ô';
- else if (!strncmp("ouml", token, 4)) *to++ = 'ö';
- else if (!strncmp("otilde", token, 6)) *to++ = 'õ';
- else if (!strncmp("Uacute", token, 6)) *to++ = 'Ú';
- else if (!strncmp("Ugrave", token, 6)) *to++ = 'Ù';
- else if (!strncmp("Ucirc", token, 5)) *to++ = 'Û';
- else if (!strncmp("Uuml", token, 4)) *to++ = 'Ü';
- else if (!strncmp("uacute", token, 6)) *to++ = 'ú';
- else if (!strncmp("ugrave", token, 6)) *to++ = 'ù';
- else if (!strncmp("ucirc", token, 5)) *to++ = 'û';
- else if (!strncmp("uuml", token, 4)) *to++ = 'ü';
- else if (!strncmp("Yacute", token, 6)) *to++ = 'Ý';
- else if (!strncmp("yacute", token, 6)) *to++ = 'ý';
- else if (!strncmp("yuml", token, 4)) *to++ = 'ÿ';
+ else if (!strncmp("Aacute", token, 6)) text += 'Á';
+ else if (!strncmp("Agrave", token, 6)) text += 'À';
+ else if (!strncmp("Acirc", token, 5)) text += 'Â';
+ else if (!strncmp("Auml", token, 4)) text += 'Ä';
+ else if (!strncmp("Atilde", token, 6)) text += 'Ã';
+ else if (!strncmp("Aring", token, 5)) text += 'Å';
+ else if (!strncmp("aacute", token, 6)) text += 'á';
+ else if (!strncmp("agrave", token, 6)) text += 'à';
+ else if (!strncmp("acirc", token, 5)) text += 'â';
+ else if (!strncmp("auml", token, 4)) text += 'ä';
+ else if (!strncmp("atilde", token, 6)) text += 'ã';
+ else if (!strncmp("aring", token, 5)) text += 'å';
+ else if (!strncmp("Eacute", token, 6)) text += 'É';
+ else if (!strncmp("Egrave", token, 6)) text += 'È';
+ else if (!strncmp("Ecirc", token, 5)) text += 'Ê';
+ else if (!strncmp("Euml", token, 4)) text += 'Ë';
+ else if (!strncmp("eacute", token, 6)) text += 'é';
+ else if (!strncmp("egrave", token, 6)) text += 'è';
+ else if (!strncmp("ecirc", token, 5)) text += 'ê';
+ else if (!strncmp("euml", token, 4)) text += 'ë';
+ else if (!strncmp("Iacute", token, 6)) text += 'Í';
+ else if (!strncmp("Igrave", token, 6)) text += 'Ì';
+ else if (!strncmp("Icirc", token, 5)) text += 'Î';
+ else if (!strncmp("Iuml", token, 4)) text += 'Ï';
+ else if (!strncmp("iacute", token, 6)) text += 'í';
+ else if (!strncmp("igrave", token, 6)) text += 'ì';
+ else if (!strncmp("icirc", token, 5)) text += 'î';
+ else if (!strncmp("iuml", token, 4)) text += 'ï';
+ else if (!strncmp("Oacute", token, 6)) text += 'Ó';
+ else if (!strncmp("Ograve", token, 6)) text += 'Ò';
+ else if (!strncmp("Ocirc", token, 5)) text += 'Ô';
+ else if (!strncmp("Ouml", token, 4)) text += 'Ö';
+ else if (!strncmp("Otilde", token, 6)) text += 'Õ';
+ else if (!strncmp("oacute", token, 6)) text += 'ó';
+ else if (!strncmp("ograve", token, 6)) text += 'ò';
+ else if (!strncmp("ocirc", token, 5)) text += 'ô';
+ else if (!strncmp("ouml", token, 4)) text += 'ö';
+ else if (!strncmp("otilde", token, 6)) text += 'õ';
+ else if (!strncmp("Uacute", token, 6)) text += 'Ú';
+ else if (!strncmp("Ugrave", token, 6)) text += 'Ù';
+ else if (!strncmp("Ucirc", token, 5)) text += 'Û';
+ else if (!strncmp("Uuml", token, 4)) text += 'Ü';
+ else if (!strncmp("uacute", token, 6)) text += 'ú';
+ else if (!strncmp("ugrave", token, 6)) text += 'ù';
+ else if (!strncmp("ucirc", token, 5)) text += 'û';
+ else if (!strncmp("uuml", token, 4)) text += 'ü';
+ else if (!strncmp("Yacute", token, 6)) text += 'Ý';
+ else if (!strncmp("yacute", token, 6)) text += 'ý';
+ else if (!strncmp("yuml", token, 4)) text += 'ÿ';
- else if (!strncmp("deg", token, 3)) *to++ = '°';
- else if (!strncmp("plusmn", token, 6)) *to++ = '±';
- else if (!strncmp("sup2", token, 4)) *to++ = '²';
- else if (!strncmp("sup3", token, 4)) *to++ = '³';
- else if (!strncmp("sup1", token, 4)) *to++ = '¹';
- else if (!strncmp("nbsp", token, 4)) *to++ = 'º';
- else if (!strncmp("pound", token, 5)) *to++ = '£';
- else if (!strncmp("cent", token, 4)) *to++ = '¢';
- else if (!strncmp("frac14", token, 6)) *to++ = '¼';
- else if (!strncmp("frac12", token, 6)) *to++ = '½';
- else if (!strncmp("frac34", token, 6)) *to++ = '¾';
- else if (!strncmp("iquest", token, 6)) *to++ = '¿';
- else if (!strncmp("iexcl", token, 5)) *to++ = '¡';
- else if (!strncmp("ETH", token, 3)) *to++ = 'Ð';
- else if (!strncmp("eth", token, 3)) *to++ = 'ð';
- else if (!strncmp("THORN", token, 5)) *to++ = 'Þ';
- else if (!strncmp("thorn", token, 5)) *to++ = 'þ';
- else if (!strncmp("AElig", token, 5)) *to++ = 'Æ';
- else if (!strncmp("aelig", token, 5)) *to++ = 'æ';
- else if (!strncmp("Oslash", token, 6)) *to++ = 'Ø';
- else if (!strncmp("curren", token, 6)) *to++ = '¤';
- else if (!strncmp("Ccedil", token, 6)) *to++ = 'Ç';
- else if (!strncmp("ccedil", token, 6)) *to++ = 'ç';
- else if (!strncmp("szlig", token, 5)) *to++ = 'ß';
- else if (!strncmp("Ntilde", token, 6)) *to++ = 'Ñ';
- else if (!strncmp("ntilde", token, 6)) *to++ = 'ñ';
- else if (!strncmp("yen", token, 3)) *to++ = '¥';
- else if (!strncmp("not", token, 3)) *to++ = '¬';
- else if (!strncmp("ordf", token, 4)) *to++ = 'ª';
- else if (!strncmp("uml", token, 3)) *to++ = '¨';
- else if (!strncmp("shy", token, 3)) *to++ = '­';
- else if (!strncmp("macr", token, 4)) *to++ = '¯';
+ else if (!strncmp("deg", token, 3)) text += '°';
+ else if (!strncmp("plusmn", token, 6)) text += '±';
+ else if (!strncmp("sup2", token, 4)) text += '²';
+ else if (!strncmp("sup3", token, 4)) text += '³';
+ else if (!strncmp("sup1", token, 4)) text += '¹';
+ else if (!strncmp("nbsp", token, 4)) text += 'º';
+ else if (!strncmp("pound", token, 5)) text += '£';
+ else if (!strncmp("cent", token, 4)) text += '¢';
+ else if (!strncmp("frac14", token, 6)) text += '¼';
+ else if (!strncmp("frac12", token, 6)) text += '½';
+ else if (!strncmp("frac34", token, 6)) text += '¾';
+ else if (!strncmp("iquest", token, 6)) text += '¿';
+ else if (!strncmp("iexcl", token, 5)) text += '¡';
+ else if (!strncmp("ETH", token, 3)) text += 'Ð';
+ else if (!strncmp("eth", token, 3)) text += 'ð';
+ else if (!strncmp("THORN", token, 5)) text += 'Þ';
+ else if (!strncmp("thorn", token, 5)) text += 'þ';
+ else if (!strncmp("AElig", token, 5)) text += 'Æ';
+ else if (!strncmp("aelig", token, 5)) text += 'æ';
+ else if (!strncmp("Oslash", token, 6)) text += 'Ø';
+ else if (!strncmp("curren", token, 6)) text += '¤';
+ else if (!strncmp("Ccedil", token, 6)) text += 'Ç';
+ else if (!strncmp("ccedil", token, 6)) text += 'ç';
+ else if (!strncmp("szlig", token, 5)) text += 'ß';
+ else if (!strncmp("Ntilde", token, 6)) text += 'Ñ';
+ else if (!strncmp("ntilde", token, 6)) text += 'ñ';
+ else if (!strncmp("yen", token, 3)) text += '¥';
+ else if (!strncmp("not", token, 3)) text += '¬';
+ else if (!strncmp("ordf", token, 4)) text += 'ª';
+ else if (!strncmp("uml", token, 3)) text += '¨';
+ else if (!strncmp("shy", token, 3)) text += '­';
+ else if (!strncmp("macr", token, 4)) text += '¯';
continue;
}
@@ -163,152 +160,95 @@ char ThMLGBF::ProcessText(char *text, int maxlen)
intoken = false;
// process desired tokens
if (!strncmp(token, "sync type=\"Strongs\" value=\"", 27)) {
- *to++ = '<';
- *to++ = 'W';
+ text += "<W";
for (unsigned int i = 27; token[i] != '\"'; i++)
- *to++ = token[i];
- *to++ = '>';
+ text += token[i];
+ text += '>';
continue;
}
if (!strncmp(token, "sync type=\"morph\" value=\"", 25)) {
- *to++ = '<';
- *to++ = 'W';
- *to++ = 'T';
+ text += "<WT";
for (unsigned int i = 25; token[i] != '\"'; i++)
- *to++ = token[i];
- *to++ = '>';
+ text += token[i];
+ text += '>';
continue;
}
else if (!strncmp(token, "scripRef", 8)) {
- *to++ = '<';
- *to++ = 'R';
- *to++ = 'X';
- *to++ = '>';
+ text += "<RX>";
continue;
}
- else if (!strncmp(token, "/scripRef", 9)) {
- *to++ = '<';
- *to++ = 'R';
- *to++ = 'x';
- *to++ = '>';
+ else if (!strncmp(token, "/scripRef", 9)) {
+ text += "<Rx>";
continue;
}
else if (!strncmp(token, "note", 4)) {
- *to++ = '<';
- *to++ = 'R';
- *to++ = 'F';
- *to++ = '>';
+ text += "<RF>";
continue;
}
else if (!strncmp(token, "/note", 5)) {
- *to++ = '<';
- *to++ = 'R';
- *to++ = 'f';
- *to++ = '>';
+ text += "<Rf>";
continue;
}
else if (!strncmp(token, "sup", 3)) {
- *to++ = '<';
- *to++ = 'F';
- *to++ = 'S';
- *to++ = '>';
+ text += "<FS>";
}
else if (!strncmp(token, "/sup", 4)) {
- *to++ = '<';
- *to++ = 'F';
- *to++ = 's';
- *to++ = '>';
+ text += "<Fs>";
}
else if (!strnicmp(token, "font color=#ff0000", 18)) {
- *to++ = '<';
- *to++ = 'F';
- *to++ = 'R';
- *to++ = '>';
+ text += "<FR>";
continue;
}
else if (!strnicmp(token, "/font", 5)) {
- *to++ = '<';
- *to++ = 'F';
- *to++ = 'r';
- *to++ = '>';
+ text += "<Fr>";
continue;
}
else if (!strncmp(token, "div class=\"sechead\"", 19)) {
- *to++ = '<';
- *to++ = 'T';
- *to++ = 'S';
- *to++ = '>';
+ text += "<TS>";
sechead = true;
continue;
}
else if (sechead && !strncmp(token, "/div", 19)) {
- *to++ = '<';
- *to++ = 'T';
- *to++ = 's';
- *to++ = '>';
+ text += "<Ts>";
sechead = false;
continue;
}
else if (!strncmp(token, "div class=\"title\"", 19)) {
- *to++ = '<';
- *to++ = 'T';
- *to++ = 'T';
- *to++ = '>';
+ text += "<TT>";
title = true;
continue;
}
else if (title && !strncmp(token, "/div", 19)) {
- *to++ = '<';
- *to++ = 'T';
- *to++ = 't';
- *to++ = '>';
+ text += "<Tt>";
title = false;
continue;
}
else if (!strnicmp(token, "br", 2)) {
- *to++ = '<';
- *to++ = 'C';
- *to++ = 'L';
- *to++ = '>';
+ text += "<CL>";
continue;
}
else switch(*token) {
case 'I': // font tags
case 'i':
- *to++ = '<';
- *to++ = 'F';
- *to++ = 'I';
- *to++ = '>';
+ text += "<FI>";
continue;
case 'B': // bold start
case 'b':
- *to++ = '<';
- *to++ = 'F';
- *to++ = 'B';
- *to++ = '>';
+ text += "<FB>";
continue;
case '/':
switch(token[1]) {
case 'P':
case 'p':
- *to++ = '<';
- *to++ = 'C';
- *to++ = 'M';
- *to++ = '>';
+ text += "<CM>";
continue;
case 'I':
case 'i': // italic end
- *to++ = '<';
- *to++ = 'F';
- *to++ = 'i';
- *to++ = '>';
+ text += "<Fi>";
continue;
case 'B': // bold start
case 'b':
- *to++ = '<';
- *to++ = 'F';
- *to++ = 'b';
- *to++ = '>';
+ text += "<Fb>";
continue;
}
}
@@ -319,12 +259,25 @@ char ThMLGBF::ProcessText(char *text, int maxlen)
token[tokpos++] = *from;
token[tokpos+2] = 0;
}
- else *to++ = *from;
+ else text += *from;
}
- *to++ = 0;
- *to = 0;
+
+ orig = text;
+ from = orig.c_str();
+ for (text = ""; *from; from++) { //loop to remove extra spaces
+ if ((strchr(" \t\n\r", *from))) {
+ while (*(from+1) && (strchr(" \t\n\r", *(from+1)))) {
+ from++;
+ }
+ text += " ";
+ }
+ else {
+ text += *from;
+ }
+ }
+ text += (char)0;
+
return 0;
}
-
-
+SWORD_NAMESPACE_END