diff options
Diffstat (limited to 'src/modules/filters/thmlgbf.cpp')
-rw-r--r-- | src/modules/filters/thmlgbf.cpp | 337 |
1 files changed, 145 insertions, 192 deletions
diff --git a/src/modules/filters/thmlgbf.cpp b/src/modules/filters/thmlgbf.cpp index 66d9a20..a65ddaf 100644 --- a/src/modules/filters/thmlgbf.cpp +++ b/src/modules/filters/thmlgbf.cpp @@ -15,18 +15,18 @@ ***************************************************************************/ #include <stdlib.h> -#include <string.h> #include <thmlgbf.h> +SWORD_NAMESPACE_START ThMLGBF::ThMLGBF() { } -char ThMLGBF::ProcessText(char *text, int maxlen) -{ - char *to, *from, token[2048]; +char ThMLGBF::processText(SWBuf &text, const SWKey *key, const SWModule *module) { + const char *from; + char token[2048]; int tokpos = 0; bool intoken = false; int len; @@ -34,13 +34,10 @@ char ThMLGBF::ProcessText(char *text, int maxlen) bool sechead = false; bool title = false; - len = strlen(text) + 1; // shift string to right of buffer - if (len < maxlen) { - memmove(&text[maxlen - len], text, len); - from = &text[maxlen - len]; - } - else from = text; // ------------------------------- - for (to = text; *from; from++) { + SWBuf orig = text; + from = orig.c_str(); + + for (text = ""; *from; from++) { if (*from == '<') { intoken = true; tokpos = 0; @@ -60,102 +57,102 @@ char ThMLGBF::ProcessText(char *text, int maxlen) if (*from == ';' && ampersand) { intoken = false; - if (!strncmp("nbsp", token, 4)) *to++ = ' '; - else if (!strncmp("quot", token, 4)) *to++ = '"'; - else if (!strncmp("amp", token, 3)) *to++ = '&'; - else if (!strncmp("lt", token, 2)) *to++ = '<'; - else if (!strncmp("gt", token, 2)) *to++ = '>'; - else if (!strncmp("brvbar", token, 6)) *to++ = '|'; - else if (!strncmp("sect", token, 4)) *to++ = '§'; - else if (!strncmp("copy", token, 4)) *to++ = '©'; - else if (!strncmp("laquo", token, 5)) *to++ = '«'; - else if (!strncmp("reg", token, 3)) *to++ = '®'; - else if (!strncmp("acute", token, 5)) *to++ = '´'; - else if (!strncmp("para", token, 4)) *to++ = '¶'; - else if (!strncmp("raquo", token, 5)) *to++ = '»'; + if (!strncmp("nbsp", token, 4)) text += ' '; + else if (!strncmp("quot", token, 4)) text += '"'; + else if (!strncmp("amp", token, 3)) text += '&'; + else if (!strncmp("lt", token, 2)) text += '<'; + else if (!strncmp("gt", token, 2)) text += '>'; + else if (!strncmp("brvbar", token, 6)) text += '|'; + else if (!strncmp("sect", token, 4)) text += '§'; + else if (!strncmp("copy", token, 4)) text += '©'; + else if (!strncmp("laquo", token, 5)) text += '«'; + else if (!strncmp("reg", token, 3)) text += '®'; + else if (!strncmp("acute", token, 5)) text += '´'; + else if (!strncmp("para", token, 4)) text += '¶'; + else if (!strncmp("raquo", token, 5)) text += '»'; - else if (!strncmp("Aacute", token, 6)) *to++ = 'Á'; - else if (!strncmp("Agrave", token, 6)) *to++ = 'À'; - else if (!strncmp("Acirc", token, 5)) *to++ = 'Â'; - else if (!strncmp("Auml", token, 4)) *to++ = 'Ä'; - else if (!strncmp("Atilde", token, 6)) *to++ = 'Ã'; - else if (!strncmp("Aring", token, 5)) *to++ = 'Å'; - else if (!strncmp("aacute", token, 6)) *to++ = 'á'; - else if (!strncmp("agrave", token, 6)) *to++ = 'à'; - else if (!strncmp("acirc", token, 5)) *to++ = 'â'; - else if (!strncmp("auml", token, 4)) *to++ = 'ä'; - else if (!strncmp("atilde", token, 6)) *to++ = 'ã'; - else if (!strncmp("aring", token, 5)) *to++ = 'å'; - else if (!strncmp("Eacute", token, 6)) *to++ = 'É'; - else if (!strncmp("Egrave", token, 6)) *to++ = 'È'; - else if (!strncmp("Ecirc", token, 5)) *to++ = 'Ê'; - else if (!strncmp("Euml", token, 4)) *to++ = 'Ë'; - else if (!strncmp("eacute", token, 6)) *to++ = 'é'; - else if (!strncmp("egrave", token, 6)) *to++ = 'è'; - else if (!strncmp("ecirc", token, 5)) *to++ = 'ê'; - else if (!strncmp("euml", token, 4)) *to++ = 'ë'; - else if (!strncmp("Iacute", token, 6)) *to++ = 'Í'; - else if (!strncmp("Igrave", token, 6)) *to++ = 'Ì'; - else if (!strncmp("Icirc", token, 5)) *to++ = 'Î'; - else if (!strncmp("Iuml", token, 4)) *to++ = 'Ï'; - else if (!strncmp("iacute", token, 6)) *to++ = 'í'; - else if (!strncmp("igrave", token, 6)) *to++ = 'ì'; - else if (!strncmp("icirc", token, 5)) *to++ = 'î'; - else if (!strncmp("iuml", token, 4)) *to++ = 'ï'; - else if (!strncmp("Oacute", token, 6)) *to++ = 'Ó'; - else if (!strncmp("Ograve", token, 6)) *to++ = 'Ò'; - else if (!strncmp("Ocirc", token, 5)) *to++ = 'Ô'; - else if (!strncmp("Ouml", token, 4)) *to++ = 'Ö'; - else if (!strncmp("Otilde", token, 6)) *to++ = 'Õ'; - else if (!strncmp("oacute", token, 6)) *to++ = 'ó'; - else if (!strncmp("ograve", token, 6)) *to++ = 'ò'; - else if (!strncmp("ocirc", token, 5)) *to++ = 'ô'; - else if (!strncmp("ouml", token, 4)) *to++ = 'ö'; - else if (!strncmp("otilde", token, 6)) *to++ = 'õ'; - else if (!strncmp("Uacute", token, 6)) *to++ = 'Ú'; - else if (!strncmp("Ugrave", token, 6)) *to++ = 'Ù'; - else if (!strncmp("Ucirc", token, 5)) *to++ = 'Û'; - else if (!strncmp("Uuml", token, 4)) *to++ = 'Ü'; - else if (!strncmp("uacute", token, 6)) *to++ = 'ú'; - else if (!strncmp("ugrave", token, 6)) *to++ = 'ù'; - else if (!strncmp("ucirc", token, 5)) *to++ = 'û'; - else if (!strncmp("uuml", token, 4)) *to++ = 'ü'; - else if (!strncmp("Yacute", token, 6)) *to++ = 'Ý'; - else if (!strncmp("yacute", token, 6)) *to++ = 'ý'; - else if (!strncmp("yuml", token, 4)) *to++ = 'ÿ'; + else if (!strncmp("Aacute", token, 6)) text += 'Á'; + else if (!strncmp("Agrave", token, 6)) text += 'À'; + else if (!strncmp("Acirc", token, 5)) text += 'Â'; + else if (!strncmp("Auml", token, 4)) text += 'Ä'; + else if (!strncmp("Atilde", token, 6)) text += 'Ã'; + else if (!strncmp("Aring", token, 5)) text += 'Å'; + else if (!strncmp("aacute", token, 6)) text += 'á'; + else if (!strncmp("agrave", token, 6)) text += 'à'; + else if (!strncmp("acirc", token, 5)) text += 'â'; + else if (!strncmp("auml", token, 4)) text += 'ä'; + else if (!strncmp("atilde", token, 6)) text += 'ã'; + else if (!strncmp("aring", token, 5)) text += 'å'; + else if (!strncmp("Eacute", token, 6)) text += 'É'; + else if (!strncmp("Egrave", token, 6)) text += 'È'; + else if (!strncmp("Ecirc", token, 5)) text += 'Ê'; + else if (!strncmp("Euml", token, 4)) text += 'Ë'; + else if (!strncmp("eacute", token, 6)) text += 'é'; + else if (!strncmp("egrave", token, 6)) text += 'è'; + else if (!strncmp("ecirc", token, 5)) text += 'ê'; + else if (!strncmp("euml", token, 4)) text += 'ë'; + else if (!strncmp("Iacute", token, 6)) text += 'Í'; + else if (!strncmp("Igrave", token, 6)) text += 'Ì'; + else if (!strncmp("Icirc", token, 5)) text += 'Î'; + else if (!strncmp("Iuml", token, 4)) text += 'Ï'; + else if (!strncmp("iacute", token, 6)) text += 'í'; + else if (!strncmp("igrave", token, 6)) text += 'ì'; + else if (!strncmp("icirc", token, 5)) text += 'î'; + else if (!strncmp("iuml", token, 4)) text += 'ï'; + else if (!strncmp("Oacute", token, 6)) text += 'Ó'; + else if (!strncmp("Ograve", token, 6)) text += 'Ò'; + else if (!strncmp("Ocirc", token, 5)) text += 'Ô'; + else if (!strncmp("Ouml", token, 4)) text += 'Ö'; + else if (!strncmp("Otilde", token, 6)) text += 'Õ'; + else if (!strncmp("oacute", token, 6)) text += 'ó'; + else if (!strncmp("ograve", token, 6)) text += 'ò'; + else if (!strncmp("ocirc", token, 5)) text += 'ô'; + else if (!strncmp("ouml", token, 4)) text += 'ö'; + else if (!strncmp("otilde", token, 6)) text += 'õ'; + else if (!strncmp("Uacute", token, 6)) text += 'Ú'; + else if (!strncmp("Ugrave", token, 6)) text += 'Ù'; + else if (!strncmp("Ucirc", token, 5)) text += 'Û'; + else if (!strncmp("Uuml", token, 4)) text += 'Ü'; + else if (!strncmp("uacute", token, 6)) text += 'ú'; + else if (!strncmp("ugrave", token, 6)) text += 'ù'; + else if (!strncmp("ucirc", token, 5)) text += 'û'; + else if (!strncmp("uuml", token, 4)) text += 'ü'; + else if (!strncmp("Yacute", token, 6)) text += 'Ý'; + else if (!strncmp("yacute", token, 6)) text += 'ý'; + else if (!strncmp("yuml", token, 4)) text += 'ÿ'; - else if (!strncmp("deg", token, 3)) *to++ = '°'; - else if (!strncmp("plusmn", token, 6)) *to++ = '±'; - else if (!strncmp("sup2", token, 4)) *to++ = '²'; - else if (!strncmp("sup3", token, 4)) *to++ = '³'; - else if (!strncmp("sup1", token, 4)) *to++ = '¹'; - else if (!strncmp("nbsp", token, 4)) *to++ = 'º'; - else if (!strncmp("pound", token, 5)) *to++ = '£'; - else if (!strncmp("cent", token, 4)) *to++ = '¢'; - else if (!strncmp("frac14", token, 6)) *to++ = '¼'; - else if (!strncmp("frac12", token, 6)) *to++ = '½'; - else if (!strncmp("frac34", token, 6)) *to++ = '¾'; - else if (!strncmp("iquest", token, 6)) *to++ = '¿'; - else if (!strncmp("iexcl", token, 5)) *to++ = '¡'; - else if (!strncmp("ETH", token, 3)) *to++ = 'Ð'; - else if (!strncmp("eth", token, 3)) *to++ = 'ð'; - else if (!strncmp("THORN", token, 5)) *to++ = 'Þ'; - else if (!strncmp("thorn", token, 5)) *to++ = 'þ'; - else if (!strncmp("AElig", token, 5)) *to++ = 'Æ'; - else if (!strncmp("aelig", token, 5)) *to++ = 'æ'; - else if (!strncmp("Oslash", token, 6)) *to++ = 'Ø'; - else if (!strncmp("curren", token, 6)) *to++ = '¤'; - else if (!strncmp("Ccedil", token, 6)) *to++ = 'Ç'; - else if (!strncmp("ccedil", token, 6)) *to++ = 'ç'; - else if (!strncmp("szlig", token, 5)) *to++ = 'ß'; - else if (!strncmp("Ntilde", token, 6)) *to++ = 'Ñ'; - else if (!strncmp("ntilde", token, 6)) *to++ = 'ñ'; - else if (!strncmp("yen", token, 3)) *to++ = '¥'; - else if (!strncmp("not", token, 3)) *to++ = '¬'; - else if (!strncmp("ordf", token, 4)) *to++ = 'ª'; - else if (!strncmp("uml", token, 3)) *to++ = '¨'; - else if (!strncmp("shy", token, 3)) *to++ = ''; - else if (!strncmp("macr", token, 4)) *to++ = '¯'; + else if (!strncmp("deg", token, 3)) text += '°'; + else if (!strncmp("plusmn", token, 6)) text += '±'; + else if (!strncmp("sup2", token, 4)) text += '²'; + else if (!strncmp("sup3", token, 4)) text += '³'; + else if (!strncmp("sup1", token, 4)) text += '¹'; + else if (!strncmp("nbsp", token, 4)) text += 'º'; + else if (!strncmp("pound", token, 5)) text += '£'; + else if (!strncmp("cent", token, 4)) text += '¢'; + else if (!strncmp("frac14", token, 6)) text += '¼'; + else if (!strncmp("frac12", token, 6)) text += '½'; + else if (!strncmp("frac34", token, 6)) text += '¾'; + else if (!strncmp("iquest", token, 6)) text += '¿'; + else if (!strncmp("iexcl", token, 5)) text += '¡'; + else if (!strncmp("ETH", token, 3)) text += 'Ð'; + else if (!strncmp("eth", token, 3)) text += 'ð'; + else if (!strncmp("THORN", token, 5)) text += 'Þ'; + else if (!strncmp("thorn", token, 5)) text += 'þ'; + else if (!strncmp("AElig", token, 5)) text += 'Æ'; + else if (!strncmp("aelig", token, 5)) text += 'æ'; + else if (!strncmp("Oslash", token, 6)) text += 'Ø'; + else if (!strncmp("curren", token, 6)) text += '¤'; + else if (!strncmp("Ccedil", token, 6)) text += 'Ç'; + else if (!strncmp("ccedil", token, 6)) text += 'ç'; + else if (!strncmp("szlig", token, 5)) text += 'ß'; + else if (!strncmp("Ntilde", token, 6)) text += 'Ñ'; + else if (!strncmp("ntilde", token, 6)) text += 'ñ'; + else if (!strncmp("yen", token, 3)) text += '¥'; + else if (!strncmp("not", token, 3)) text += '¬'; + else if (!strncmp("ordf", token, 4)) text += 'ª'; + else if (!strncmp("uml", token, 3)) text += '¨'; + else if (!strncmp("shy", token, 3)) text += ''; + else if (!strncmp("macr", token, 4)) text += '¯'; continue; } @@ -163,152 +160,95 @@ char ThMLGBF::ProcessText(char *text, int maxlen) intoken = false; // process desired tokens if (!strncmp(token, "sync type=\"Strongs\" value=\"", 27)) { - *to++ = '<'; - *to++ = 'W'; + text += "<W"; for (unsigned int i = 27; token[i] != '\"'; i++) - *to++ = token[i]; - *to++ = '>'; + text += token[i]; + text += '>'; continue; } if (!strncmp(token, "sync type=\"morph\" value=\"", 25)) { - *to++ = '<'; - *to++ = 'W'; - *to++ = 'T'; + text += "<WT"; for (unsigned int i = 25; token[i] != '\"'; i++) - *to++ = token[i]; - *to++ = '>'; + text += token[i]; + text += '>'; continue; } else if (!strncmp(token, "scripRef", 8)) { - *to++ = '<'; - *to++ = 'R'; - *to++ = 'X'; - *to++ = '>'; + text += "<RX>"; continue; } - else if (!strncmp(token, "/scripRef", 9)) { - *to++ = '<'; - *to++ = 'R'; - *to++ = 'x'; - *to++ = '>'; + else if (!strncmp(token, "/scripRef", 9)) { + text += "<Rx>"; continue; } else if (!strncmp(token, "note", 4)) { - *to++ = '<'; - *to++ = 'R'; - *to++ = 'F'; - *to++ = '>'; + text += "<RF>"; continue; } else if (!strncmp(token, "/note", 5)) { - *to++ = '<'; - *to++ = 'R'; - *to++ = 'f'; - *to++ = '>'; + text += "<Rf>"; continue; } else if (!strncmp(token, "sup", 3)) { - *to++ = '<'; - *to++ = 'F'; - *to++ = 'S'; - *to++ = '>'; + text += "<FS>"; } else if (!strncmp(token, "/sup", 4)) { - *to++ = '<'; - *to++ = 'F'; - *to++ = 's'; - *to++ = '>'; + text += "<Fs>"; } else if (!strnicmp(token, "font color=#ff0000", 18)) { - *to++ = '<'; - *to++ = 'F'; - *to++ = 'R'; - *to++ = '>'; + text += "<FR>"; continue; } else if (!strnicmp(token, "/font", 5)) { - *to++ = '<'; - *to++ = 'F'; - *to++ = 'r'; - *to++ = '>'; + text += "<Fr>"; continue; } else if (!strncmp(token, "div class=\"sechead\"", 19)) { - *to++ = '<'; - *to++ = 'T'; - *to++ = 'S'; - *to++ = '>'; + text += "<TS>"; sechead = true; continue; } else if (sechead && !strncmp(token, "/div", 19)) { - *to++ = '<'; - *to++ = 'T'; - *to++ = 's'; - *to++ = '>'; + text += "<Ts>"; sechead = false; continue; } else if (!strncmp(token, "div class=\"title\"", 19)) { - *to++ = '<'; - *to++ = 'T'; - *to++ = 'T'; - *to++ = '>'; + text += "<TT>"; title = true; continue; } else if (title && !strncmp(token, "/div", 19)) { - *to++ = '<'; - *to++ = 'T'; - *to++ = 't'; - *to++ = '>'; + text += "<Tt>"; title = false; continue; } else if (!strnicmp(token, "br", 2)) { - *to++ = '<'; - *to++ = 'C'; - *to++ = 'L'; - *to++ = '>'; + text += "<CL>"; continue; } else switch(*token) { case 'I': // font tags case 'i': - *to++ = '<'; - *to++ = 'F'; - *to++ = 'I'; - *to++ = '>'; + text += "<FI>"; continue; case 'B': // bold start case 'b': - *to++ = '<'; - *to++ = 'F'; - *to++ = 'B'; - *to++ = '>'; + text += "<FB>"; continue; case '/': switch(token[1]) { case 'P': case 'p': - *to++ = '<'; - *to++ = 'C'; - *to++ = 'M'; - *to++ = '>'; + text += "<CM>"; continue; case 'I': case 'i': // italic end - *to++ = '<'; - *to++ = 'F'; - *to++ = 'i'; - *to++ = '>'; + text += "<Fi>"; continue; case 'B': // bold start case 'b': - *to++ = '<'; - *to++ = 'F'; - *to++ = 'b'; - *to++ = '>'; + text += "<Fb>"; continue; } } @@ -319,12 +259,25 @@ char ThMLGBF::ProcessText(char *text, int maxlen) token[tokpos++] = *from; token[tokpos+2] = 0; } - else *to++ = *from; + else text += *from; } - *to++ = 0; - *to = 0; + + orig = text; + from = orig.c_str(); + for (text = ""; *from; from++) { //loop to remove extra spaces + if ((strchr(" \t\n\r", *from))) { + while (*(from+1) && (strchr(" \t\n\r", *(from+1)))) { + from++; + } + text += " "; + } + else { + text += *from; + } + } + text += (char)0; + return 0; } - - +SWORD_NAMESPACE_END |