diff options
Diffstat (limited to 'src/modules/filters')
64 files changed, 6249 insertions, 3934 deletions
diff --git a/src/modules/filters/Makefile.am b/src/modules/filters/Makefile.am index c58fb5f..c9a6bef 100644 --- a/src/modules/filters/Makefile.am +++ b/src/modules/filters/Makefile.am @@ -1,22 +1,24 @@ filtersdir = $(top_srcdir)/src/modules/filters libsword_la_SOURCES += $(filtersdir)/swbasicfilter.cpp +libsword_la_SOURCES += $(filtersdir)/swoptfilter.cpp libsword_la_SOURCES += $(filtersdir)/gbfhtml.cpp libsword_la_SOURCES += $(filtersdir)/gbfhtmlhref.cpp +libsword_la_SOURCES += $(filtersdir)/gbfwebif.cpp libsword_la_SOURCES += $(filtersdir)/gbfplain.cpp libsword_la_SOURCES += $(filtersdir)/gbfrtf.cpp libsword_la_SOURCES += $(filtersdir)/plainhtml.cpp -libsword_la_SOURCES += $(filtersdir)/rwphtml.cpp -libsword_la_SOURCES += $(filtersdir)/rwprtf.cpp libsword_la_SOURCES += $(filtersdir)/cipherfil.cpp libsword_la_SOURCES += $(filtersdir)/rtfhtml.cpp libsword_la_SOURCES += $(filtersdir)/gbfstrongs.cpp libsword_la_SOURCES += $(filtersdir)/gbffootnotes.cpp libsword_la_SOURCES += $(filtersdir)/gbfheadings.cpp +libsword_la_SOURCES += $(filtersdir)/gbfredletterwords.cpp libsword_la_SOURCES += $(filtersdir)/gbfmorph.cpp libsword_la_SOURCES += $(filtersdir)/plainfootnotes.cpp + libsword_la_SOURCES += $(filtersdir)/thmlstrongs.cpp libsword_la_SOURCES += $(filtersdir)/thmlfootnotes.cpp libsword_la_SOURCES += $(filtersdir)/thmlheadings.cpp @@ -30,8 +32,24 @@ libsword_la_SOURCES += $(filtersdir)/thmlgbf.cpp libsword_la_SOURCES += $(filtersdir)/thmlrtf.cpp libsword_la_SOURCES += $(filtersdir)/thmlhtml.cpp libsword_la_SOURCES += $(filtersdir)/thmlhtmlhref.cpp +libsword_la_SOURCES += $(filtersdir)/thmlwebif.cpp + +libsword_la_SOURCES += $(filtersdir)/thmlosis.cpp +libsword_la_SOURCES += $(filtersdir)/gbfosis.cpp libsword_la_SOURCES += $(filtersdir)/thmlplain.cpp +libsword_la_SOURCES += $(filtersdir)/osisheadings.cpp +libsword_la_SOURCES += $(filtersdir)/osisfootnotes.cpp +libsword_la_SOURCES += $(filtersdir)/osishtmlhref.cpp +libsword_la_SOURCES += $(filtersdir)/osiswebif.cpp +libsword_la_SOURCES += $(filtersdir)/osismorph.cpp +libsword_la_SOURCES += $(filtersdir)/osisstrongs.cpp +libsword_la_SOURCES += $(filtersdir)/osisplain.cpp +libsword_la_SOURCES += $(filtersdir)/osisrtf.cpp +libsword_la_SOURCES += $(filtersdir)/osislemma.cpp +libsword_la_SOURCES += $(filtersdir)/osisredletterwords.cpp +libsword_la_SOURCES += $(filtersdir)/osisscripref.cpp + libsword_la_SOURCES += $(filtersdir)/unicodertf.cpp libsword_la_SOURCES += $(filtersdir)/latin1utf8.cpp libsword_la_SOURCES += $(filtersdir)/latin1utf16.cpp @@ -41,25 +59,34 @@ libsword_la_SOURCES += $(filtersdir)/scsuutf8.cpp libsword_la_SOURCES += $(filtersdir)/utf8html.cpp libsword_la_SOURCES += $(filtersdir)/utf8latin1.cpp -libsword_la_SOURCES += $(filtersdir)/thmlolb.cpp +libsword_la_SOURCES += $(filtersdir)/utf8cantillation.cpp +libsword_la_SOURCES += $(filtersdir)/utf8hebrewpoints.cpp +libsword_la_SOURCES += $(filtersdir)/utf8greekaccents.cpp libsword_la_SOURCES += $(filtersdir)/greeklexattribs.cpp -if ICU -ICUDEFS = -D_ICU_ SWICUSRC = $(filtersdir)/utf8transliterator.cpp SWICUSRC += $(filtersdir)/utf8nfc.cpp SWICUSRC += $(filtersdir)/utf8nfkd.cpp SWICUSRC += $(filtersdir)/utf8arshaping.cpp SWICUSRC += $(filtersdir)/utf8bidireorder.cpp + +if ICU +ICUDEFS = -D_ICU_ +DISTSWICUSRC = +SWREALICUSRC = $(SWICUSRC) +else +if ICUSWORD +ICUDEFS = -D_ICU_ -D_ICUSWORD_ +DISTSWICUSRC = +SWREALICUSRC = $(SWICUSRC) else -SWICUSRC = -ICUDEFS = +DISTSWICUSRC = $(SWICUSRC) +SWREALICUSRC = +endif endif -libsword_la_SOURCES += $(SWICUSRC) -DEFS += $(ICUDEFS) -libsword_la_SOURCES += $(filtersdir)/utf8cantillation.cpp -libsword_la_SOURCES += $(filtersdir)/utf8hebrewpoints.cpp -libsword_la_SOURCES += $(filtersdir)/utf8greekaccents.cpp +INCLUDES += $(ICUDEFS) +libsword_la_SOURCES += $(SWREALICUSRC) +EXTRA_DIST = $(DISTSWICUSRC) diff --git a/src/modules/filters/cipherfil.cpp b/src/modules/filters/cipherfil.cpp index ad55396..abdd0fc 100644 --- a/src/modules/filters/cipherfil.cpp +++ b/src/modules/filters/cipherfil.cpp @@ -1,13 +1,13 @@ /****************************************************************************** * - * cipherfil - SWFilter decendant to decipher a module + * cipherfil - SWFilter descendant to decipher a module */ #include <stdlib.h> -#include <string.h> #include <cipherfil.h> +SWORD_NAMESPACE_START CipherFilter::CipherFilter(const char *key) { cipher = new SWCipher((unsigned char *)key); @@ -24,15 +24,21 @@ SWCipher *CipherFilter::getCipher() { } -char CipherFilter::ProcessText(char *text, int maxlen, const SWKey *key, const SWModule *module) { - unsigned int len; -// len = strlen(text); - len = maxlen; - if (len > 0) { - cipher->cipherBuf(&len, text); - strncpy(text, cipher->Buf(), (len < (unsigned int)maxlen) ? len : maxlen); - } - text[maxlen] = 0; - text[maxlen+1] = 0; +char CipherFilter::processText(SWBuf &text, const SWKey *key, const SWModule *module) { + if (text.length() > 2) { //check if it's large enough to substract 2 in the next step. + unsigned long len = text.length(); + if (!key) { // hack, using key to determine encipher, or decipher + cipher->cipherBuf(&len, text.getRawData()); //set buffer to enciphered text + memcpy(text.getRawData(), cipher->Buf(), len); +// text = cipher->Buf(); //get the deciphered buffer + } + else if ((unsigned long)key == 1) { + cipher->Buf(text.getRawData(), len); + memcpy(text.getRawData(), cipher->cipherBuf(&len), len); +// text = cipher->cipherBuf(&len); + } + } return 0; } + +SWORD_NAMESPACE_END diff --git a/src/modules/filters/gbffootnotes.cpp b/src/modules/filters/gbffootnotes.cpp index c5b7b90..38f1106 100644 --- a/src/modules/filters/gbffootnotes.cpp +++ b/src/modules/filters/gbffootnotes.cpp @@ -1,62 +1,142 @@ /****************************************************************************** * - * gbffootnotes - SWFilter decendant to hide or show footnotes + * gbffootnotes - SWFilter descendant to hide or show footnotes * in a GBF module. */ #include <stdlib.h> -#include <string.h> #include <gbffootnotes.h> +#include <swmodule.h> +#include <swbuf.h> +#include <versekey.h> +#include <utilxml.h> #ifndef __GNUC__ #else #include <unixstr.h> #endif +SWORD_NAMESPACE_START -const char GBFFootnotes::on[] = "On"; -const char GBFFootnotes::off[] = "Off"; -const char GBFFootnotes::optName[] = "Footnotes"; -const char GBFFootnotes::optTip[] = "Toggles Footnotes On and Off if they exist"; +const char oName[] = "Footnotes"; +const char oTip[] = "Toggles Footnotes On and Off if they exist"; +const SWBuf choices[3] = {"On", "Off", ""}; +const StringList oValues(&choices[0], &choices[2]); -GBFFootnotes::GBFFootnotes() { - option = false; - options.push_back(on); - options.push_back(off); + +GBFFootnotes::GBFFootnotes() : SWOptionFilter(oName, oTip, &oValues) { + setOptionValue("Off"); } GBFFootnotes::~GBFFootnotes() { } -void GBFFootnotes::setOptionValue(const char *ival) -{ - option = (!stricmp(ival, on)); -} -const char *GBFFootnotes::getOptionValue() +char GBFFootnotes::processText (SWBuf &text, const SWKey *key, const SWModule *module) { - return (option) ? on:off; -} + + SWBuf token; + bool intoken = false; + bool hide = false; + SWBuf tagText; + XMLTag startTag; + SWBuf refs = ""; + int footnoteNum = 1; + char buf[254]; + VerseKey parser = key->getText(); -char GBFFootnotes::ProcessText(char *text, int maxlen, const SWKey *key, const SWModule *module) -{ + SWBuf orig = text; + const char *from = orig.c_str(); + + for (text = ""; *from; from++) { + if (*from == '<') { + intoken = true; + token = ""; + continue; + } + if (*from == '>') { // process tokens + intoken = false; + + XMLTag tag(token); + if (!strcmp(tag.getName(), "RF")) { + refs = ""; + startTag = tag; + hide = true; + tagText = ""; + continue; + } + else if (!strcmp(tag.getName(), "Rf")) { + if (module->isProcessEntryAttributes()) { + if(tagText.length() == 1 || !strcmp(module->Name(), "IGNT")) { + if (option) { // for ASV marks text in verse then put explanation at end of verse + text += " <FA>("; + text.append(tagText); + text += ")<Fr>"; + hide = false; + continue; + } + } + SWBuf fc = module->getEntryAttributes()["Footnote"]["count"]["value"]; + footnoteNum = (fc.length()) ? atoi(fc.c_str()) : 0; + sprintf(buf, "%i", ++footnoteNum); + module->getEntryAttributes()["Footnote"]["count"]["value"] = buf; + StringList attributes = startTag.getAttributeNames(); + for (StringList::iterator it = attributes.begin(); it != attributes.end(); it++) { + module->getEntryAttributes()["Footnote"][buf][it->c_str()] = startTag.getAttribute(it->c_str()); + } + module->getEntryAttributes()["Footnote"][buf]["body"] = tagText; + startTag.setAttribute("swordFootnote", buf); + } + hide = false; + if (option) { + text += startTag; + text.append(tagText); + } + else continue; + } + if (!hide) { + text += '<'; + text.append(token); + text += '>'; + } + else { + tagText += '<'; + tagText.append(token); + tagText += '>'; + } + continue; + } + if (intoken) { //copy token + token += *from; + } + else if (!hide) { //copy text which is not inside a token + text += *from; + } + else tagText += *from; + } + return 0; + + + + + + + + + /* if (!option) { // if we don't want footnotes - char *to, *from, token[4096]; // cheese. Fix. + char token[4096]; // cheese. Fix. int tokpos = 0; bool intoken = false; int len; bool hide = false; - len = strlen(text) + 1; // shift string to right of buffer - if (len < maxlen) { - memmove(&text[maxlen - len], text, len); - from = &text[maxlen - len]; - } - else from = text; // ------------------------------- - - for (to = text; *from; from++) { + const char *from; + SWBuf orig = text; + from = orig.c_str(); + for (text = ""; *from; from++) { if (*from == '<') { intoken = true; tokpos = 0; @@ -93,10 +173,9 @@ char GBFFootnotes::ProcessText(char *text, int maxlen, const SWKey *key, const S } // if not a footnote token, keep token in text if (!hide) { - *to++ = '<'; - for (char *tok = token; *tok; tok++) - *to++ = *tok; - *to++ = '>'; + text += '<'; + text += token; + text += '>'; } continue; } @@ -107,12 +186,12 @@ char GBFFootnotes::ProcessText(char *text, int maxlen, const SWKey *key, const S } else { if (!hide) { - *to++ = *from; + text += *from; } } } - *to++ = 0; - *to = 0; } - return 0; + return 0;*/ } + +SWORD_NAMESPACE_END diff --git a/src/modules/filters/gbfheadings.cpp b/src/modules/filters/gbfheadings.cpp index 590e2fa..01a34ca 100644 --- a/src/modules/filters/gbfheadings.cpp +++ b/src/modules/filters/gbfheadings.cpp @@ -1,62 +1,47 @@ /****************************************************************************** * - * gbfheadings - SWFilter decendant to hide or show headings + * gbfheadings - SWFilter descendant to hide or show headings * in a GBF module. */ #include <stdlib.h> -#include <string.h> #include <gbfheadings.h> #ifndef __GNUC__ #else #include <unixstr.h> #endif +SWORD_NAMESPACE_START -const char GBFHeadings::on[] = "On"; -const char GBFHeadings::off[] = "Off"; -const char GBFHeadings::optName[] = "Headings"; -const char GBFHeadings::optTip[] = "Toggles Headings On and Off if they exist"; +const char oName[] = "Headings"; +const char oTip[] = "Toggles Headings On and Off if they exist"; -GBFHeadings::GBFHeadings() { - option = false; - options.push_back(on); - options.push_back(off); +const SWBuf choices[3] = {"On", "Off", ""}; +const StringList oValues(&choices[0], &choices[2]); + +GBFHeadings::GBFHeadings() : SWOptionFilter(oName, oTip, &oValues) { + setOptionValue("Off"); } GBFHeadings::~GBFHeadings() { } -void GBFHeadings::setOptionValue(const char *ival) -{ - option = (!stricmp(ival, on)); -} - -const char *GBFHeadings::getOptionValue() -{ - return (option) ? on:off; -} -char GBFHeadings::ProcessText(char *text, int maxlen, const SWKey *key, const SWModule *module) -{ +char GBFHeadings::processText (SWBuf &text, const SWKey *key, const SWModule *module) { if (!option) { // if we don't want headings - char *to, *from, token[2048]; // cheese. Fix. + char token[2048]; // cheese. Fix. int tokpos = 0; bool intoken = false; int len; bool hide = false; - len = strlen(text) + 1; // shift string to right of buffer - if (len < maxlen) { - memmove(&text[maxlen - len], text, len); - from = &text[maxlen - len]; - } - else from = text; // ------------------------------- - - for (to = text; *from; from++) { + const char *from; + SWBuf orig = text; + from = orig.c_str(); + for (text = ""; *from; from++) { if (*from == '<') { intoken = true; tokpos = 0; @@ -82,10 +67,10 @@ char GBFHeadings::ProcessText(char *text, int maxlen, const SWKey *key, const SW } // if not a heading token, keep token in text if (!hide) { - *to++ = '<'; + text += '<'; for (char *tok = token; *tok; tok++) - *to++ = *tok; - *to++ = '>'; + text += *tok; + text += '>'; } continue; } @@ -96,12 +81,12 @@ char GBFHeadings::ProcessText(char *text, int maxlen, const SWKey *key, const SW } else { if (!hide) { - *to++ = *from; + text += *from; } } } - *to++ = 0; - *to = 0; } return 0; } + +SWORD_NAMESPACE_END diff --git a/src/modules/filters/gbfhtml.cpp b/src/modules/filters/gbfhtml.cpp index 73d445a..775bc06 100644 --- a/src/modules/filters/gbfhtml.cpp +++ b/src/modules/filters/gbfhtml.cpp @@ -1,9 +1,8 @@ /*************************************************************************** - gbfhtml.cpp - description + gbfhtml.cpp - GBF to HTML filter ------------------- - begin : Thu Jun 24 1999 - copyright : (C) 1999 by Torsten Uhlmann - email : TUhlmann@gmx.de + begin : 2001-09-03 + copyright : 2001 by CrossWire Bible Society ***************************************************************************/ /*************************************************************************** @@ -16,521 +15,154 @@ ***************************************************************************/ #include <stdlib.h> -#include <string.h> #include <gbfhtml.h> +#include <ctype.h> +SWORD_NAMESPACE_START -GBFHTML::GBFHTML() -{ +GBFHTML::GBFHTML() { + setTokenStart("<"); + setTokenEnd(">"); + + setTokenCaseSensitive(true); + + addTokenSubstitute("Rf", ")</small></font>"); + addTokenSubstitute("Rx", "</a>"); + addTokenSubstitute("FI", "<i>"); // italics begin + addTokenSubstitute("Fi", "</i>"); + addTokenSubstitute("FB", "<n>"); // bold begin + addTokenSubstitute("Fb", "</n>"); + addTokenSubstitute("FR", "<font color=\"#FF0000\">"); // words of Jesus begin + addTokenSubstitute("Fr", "</font>"); + addTokenSubstitute("FU", "<u>"); // underline begin + addTokenSubstitute("Fu", "</u>"); + addTokenSubstitute("FO", "<cite>"); // Old Testament quote begin + addTokenSubstitute("Fo", "</cite>"); + addTokenSubstitute("FS", "<sup>"); // Superscript begin// Subscript begin + addTokenSubstitute("Fs", "</sup>"); + addTokenSubstitute("FV", "<sub>"); // Subscript begin + addTokenSubstitute("Fv", "</sub>"); + addTokenSubstitute("TT", "<big>"); // Book title begin + addTokenSubstitute("Tt", "</big>"); + addTokenSubstitute("PP", "<cite>"); // poetry begin + addTokenSubstitute("Pp", "</cite>"); + addTokenSubstitute("Fn", "</font>"); // font end + addTokenSubstitute("CL", "<br />"); // new line + addTokenSubstitute("CM", "<!P><br />"); // paragraph <!P> is a non showing comment that can be changed in the front end to <P> if desired + addTokenSubstitute("CG", ""); // ??? + addTokenSubstitute("CT", ""); // ??? + addTokenSubstitute("JR", "<div align=\"right\">"); // right align begin + addTokenSubstitute("JC", "<div align=\"center\">"); // center align begin + addTokenSubstitute("JL", "</div>"); // align end + } -char GBFHTML::ProcessText(char *text, int maxlen, const SWKey *key, const SWModule *module) -{ - char *to, *from, token[2048]; - int tokpos = 0; - bool intoken = false; - bool hasFootnotePreTag = false; - bool isRightJustified = false; - bool isCentered = false; - int len; +bool GBFHTML::handleToken(SWBuf &buf, const char *token, BasicFilterUserData *userData) { const char *tok; + char val[128]; + char *valto; + const char *num; + MyUserData *u = (MyUserData *)userData; - len = strlen(text) + 1; // shift string to right of buffer - if (len < maxlen) { - memmove(&text[maxlen - len], text, len); - from = &text[maxlen - len]; - } - else - from = text; // ------------------------------- - - for (to = text; *from; from++) - { - if (*from == '\n') { - *from = ' '; - } - if (*from == '<') { - intoken = true; - tokpos = 0; - token[0] = 0; - token[1] = 0; - token[2] = 0; - continue; + if (!substituteToken(buf, token)) { + // deal with OSIS note tags. Just hide till OSISRTF + if (!strncmp(token, "note ", 5)) { + // let's stop text from going to output + u->suspendTextPassThru = true; } - if (*from == '>') { - intoken = false; - // process desired tokens - switch (*token) { - case 'W': // Strongs - switch(token[1]) - { - case 'G': // Greek - case 'H': // Hebrew - case 'T': // Tense - *to++ = ' '; - *to++ = '<'; - *to++ = 's'; - *to++ = 'm'; - *to++ = 'a'; - *to++ = 'l'; - *to++ = 'l'; - *to++ = '>'; - *to++ = '<'; - *to++ = 'e'; - *to++ = 'm'; - *to++ = '>'; - for (tok = token+2; *tok; tok++) - *to++ = *tok; - *to++ = '<'; - *to++ = '/'; - *to++ = 'e'; - *to++ = 'm'; - *to++ = '>'; - *to++ = '<'; - *to++ = '/'; - *to++ = 's'; - *to++ = 'm'; - *to++ = 'a'; - *to++ = 'l'; - *to++ = 'l'; - *to++ = '>'; - *to++ = ' '; - continue; - } - break; - case 'R': - switch(token[1]) - { - case 'X': - *to++ = '<'; - *to++ = 'a'; - *to++ = ' '; - *to++ = 'h'; - *to++ = 'r'; - *to++ = 'e'; - *to++ = 'f'; - *to++ = '='; - *to++ = '\"'; - for (tok = token + 3; *tok; tok++) { - if(*tok != '<' && *tok+1 != 'R' && *tok+2 != 'x') { - *to++ = *tok; - } - else { - break; - } - } - *to++ = '\"'; - *to++ = '>'; - continue; - case 'x': - *to++ = '<'; - *to++ = '/'; - *to++ = 'a'; - *to++ = '>'; - continue; - case 'B': //word(s) explained in footnote - *to++ = '<'; - *to++ = 'i'; - *to++ = '>'; - hasFootnotePreTag = true; //we have the RB tag - continue; - case 'F': // footnote begin - if (hasFootnotePreTag) { - *to++ = '<'; - *to++ = '/'; - *to++ = 'i'; - *to++ = '>'; - *to++ = ' '; - } - *to++ = '<'; - *to++ = 'f'; - *to++ = 'o'; - *to++ = 'n'; - *to++ = 't'; - *to++ = ' '; - *to++ = 'c'; - *to++ = 'o'; - *to++ = 'l'; - *to++ = 'o'; - *to++ = 'r'; - *to++ = '='; - *to++ = '\"'; - *to++ = '#'; - *to++ = '8'; - *to++ = '0'; - *to++ = '0'; - *to++ = '0'; - *to++ = '0'; - *to++ = '0'; - *to++ = '\"'; - *to++ = '>'; + + else if (!strncmp(token, "/note", 5)) { + u->suspendTextPassThru = false; + } - *to++ = ' '; - *to++ = '<'; - *to++ = 's'; - *to++ = 'm'; - *to++ = 'a'; - *to++ = 'l'; - *to++ = 'l'; - *to++ = '>'; - *to++ = '('; + else if (!strncmp(token, "w", 1)) { + // OSIS Word (temporary until OSISRTF is done) + valto = val; + num = strstr(token, "lemma=\"x-Strongs:"); + if (num) { + for (num+=17; ((*num) && (*num != '\"')); num++) + *valto++ = *num; + *valto = 0; + if (atoi((!isdigit(*val))?val+1:val) < 5627) { + buf += " <small><em><"; + for (tok = (!isdigit(*val))?val+1:val; *tok; tok++) + buf += *tok; + buf += "></em></small> "; + } + } + valto = val; + num = strstr(token, "morph=\"x-Robinson:"); + if (num) { + for (num+=18; ((*num) && (*num != '\"')); num++) + *valto++ = *num; + *valto = 0; + // normal robinsons tense + buf += " <small><em>("; + for (tok = val; *tok; tok++) + buf += *tok; + buf += ")</em></small> "; + } + } + + else if (!strncmp(token, "WG", 2) || !strncmp(token, "WH", 2)) { // strong's numbers + buf += " <small><em><"; + for (tok = token + 2; *tok; tok++) + buf += *tok; + buf += "></em></small> "; + } - continue; - case 'f': // footnote end - *to++ = ')'; - *to++ = '<'; - *to++ = '/'; - *to++ = 's'; - *to++ = 'm'; - *to++ = 'a'; - *to++ = 'l'; - *to++ = 'l'; - *to++ = '>'; - *to++ = ' '; - *to++ = '<'; - *to++ = '/'; - *to++ = 'f'; - *to++ = 'o'; - *to++ = 'n'; - *to++ = 't'; - *to++ = '>'; - hasFootnotePreTag = false; - continue; - } - break; + else if (!strncmp(token, "WTG", 3) || !strncmp(token, "WTH", 3)) { // strong's numbers tense + buf += " <small><em><"; + for (tok = token + 3; *tok; tok++) + if(*tok != '\"') + buf += *tok; + buf += ")</em></small> "; + } - case 'F': // font tags - switch(token[1]) - { - case 'I': // italic start - *to++ = '<'; - *to++ = 'i'; - *to++ = '>'; - continue; - case 'i': // italic end - *to++ = '<'; - *to++ = '/'; - *to++ = 'i'; - *to++ = '>'; - continue; - case 'B': // bold start - *to++ = '<'; - *to++ = 'b'; - *to++ = '>'; - continue; - case 'b': // bold end - *to++ = '<'; - *to++ = '/'; - *to++ = 'b'; - *to++ = '>'; - continue; - case 'R': // words of Jesus begin - *to++ = '<'; - *to++ = 'f'; - *to++ = 'o'; - *to++ = 'n'; - *to++ = 't'; - *to++ = ' '; - *to++ = 'c'; - *to++ = 'o'; - *to++ = 'l'; - *to++ = 'o'; - *to++ = 'r'; - *to++ = '='; - *to++ = '#'; - *to++ = 'F'; - *to++ = 'F'; - *to++ = '0'; - *to++ = '0'; - *to++ = '0'; - *to++ = '0'; - *to++ = '>'; - continue; - case 'r': // words of Jesus end - *to++ = '<'; - *to++ = '/'; - *to++ = 'f'; - *to++ = 'o'; - *to++ = 'n'; - *to++ = 't'; - *to++ = '>'; - continue; - case 'U': // Underline start - *to++ = '<'; - *to++ = 'u'; - *to++ = '>'; - continue; - case 'u': // Underline end - *to++ = '<'; - *to++ = '/'; - *to++ = 'u'; - *to++ = '>'; - continue; - case 'O': // Old Testament quote begin - *to++ = '<'; - *to++ = 'c'; - *to++ = 'i'; - *to++ = 't'; - *to++ = 'e'; - *to++ = '>'; - continue; - case 'o': // Old Testament quote end - *to++ = '<'; - *to++ = '/'; - *to++ = 'c'; - *to++ = 'i'; - *to++ = 't'; - *to++ = 'e'; - *to++ = '>'; - continue; - case 'S': // Superscript begin - *to++ = '<'; - *to++ = 's'; - *to++ = 'u'; - *to++ = 'p'; - *to++ = '>'; - continue; - case 's': // Superscript end - *to++ = '<'; - *to++ = '/'; - *to++ = 's'; - *to++ = 'u'; - *to++ = 'p'; - *to++ = '>'; - continue; - case 'V': // Subscript begin - *to++ = '<'; - *to++ = 's'; - *to++ = 'u'; - *to++ = 'b'; - *to++ = '>'; - continue; - case 'v': // Subscript end - *to++ = '<'; - *to++ = '/'; - *to++ = 's'; - *to++ = 'u'; - *to++ = 'b'; - *to++ = '>'; - continue; - case 'N': - *to++ = '<'; - *to++ = 'f'; - *to++ = 'o'; - *to++ = 'n'; - *to++ = 't'; - *to++ = ' '; - *to++ = 'f'; - *to++ = 'a'; - *to++ = 'c'; - *to++ = 'e'; - *to++ = '='; - *to++ = '"'; - for (tok = token + 2; *tok; tok++) - *to++ = *tok; - *to++ = '"'; - *to++ = '>'; - continue; - case 'n': - *to++ = '<'; - *to++ = '/'; - *to++ = 'f'; - *to++ = 'o'; - *to++ = 'n'; - *to++ = 't'; - *to++ = '>'; - continue; - } - break; - case 'C': // special character tags - switch(token[1]) - { - case 'A': // ASCII value - *to++ = (char)atoi(&token[2]); - continue; - case 'G': - //*to++ = ' '; - continue; - case 'L': // line break - *to++ = '<'; - *to++ = 'b'; - *to++ = 'r'; - *to++ = ' '; - *to++ = '/'; - *to++ = '>'; - *to++ = ' '; - continue; - case 'M': // new paragraph - *to++ = '<'; - *to++ = 'b'; - *to++ = 'r'; - *to++ = ' '; - *to++ = '/'; - *to++ = '>'; - continue; - case 'T': - //*to++ = ' '; - continue; - } - break; - case 'J': //Justification - switch(token[1]) - { - case 'R': //right - *to++ = '<'; - *to++ = 'd'; - *to++ = 'i'; - *to++ = 'v'; - *to++ = ' '; - *to++ = 'a'; - *to++ = 'l'; - *to++ = 'i'; - *to++ = 'g'; - *to++ = 'n'; - *to++ = '='; - *to++ = '\"'; - *to++ = 'r'; - *to++ = 'i'; - *to++ = 'g'; - *to++ = 'h'; - *to++ = 't'; - *to++ = '\"'; - *to++ = '>'; - isRightJustified = true; - continue; + else if (!strncmp(token, "RX", 2)) { + buf += "<i>"; + for (tok = token + 3; *tok; tok++) { + if(*tok != '<' && *tok+1 != 'R' && *tok+2 != 'x') { + buf += *tok; + } + else { + break; + } + } + buf += "</i>"; + } - case 'C': //center - *to++ = '<'; - *to++ = 'd'; - *to++ = 'i'; - *to++ = 'v'; - *to++ = ' '; - *to++ = 'a'; - *to++ = 'l'; - *to++ = 'i'; - *to++ = 'g'; - *to++ = 'n'; - *to++ = '='; - *to++ = '\"'; - *to++ = 'c'; - *to++ = 'e'; - *to++ = 'n'; - *to++ = 't'; - *to++ = 'e'; - *to++ = 'r'; - *to++ = '\"'; - *to++ = '>'; - isCentered = true; - continue; + else if (!strncmp(token, "RB", 2)) { + buf += "<i>"; + u->hasFootnotePreTag = true; + } - case 'L': //left, reset right and center - if (isCentered) { - *to++ = '<'; - *to++ = '/'; - *to++ = 'c'; - *to++ = 'e'; - *to++ = 'n'; - *to++ = 't'; - *to++ = 'e'; - *to++ = 'r'; - *to++ = '>'; - isCentered = false; - } - if (isRightJustified) { - *to++ = '<'; - *to++ = '/'; - *to++ = 'd'; - *to++ = 'i'; - *to++ = 'v'; - *to++ = '>'; - isRightJustified = false; - } - continue; - } - break; - case 'T': // title formatting - switch(token[1]) - { - case 'T': // Book title begin - *to++ = '<'; - *to++ = 'b'; - *to++ = 'i'; - *to++ = 'g'; - *to++ = '>'; - continue; - case 't': - *to++ = '<'; - *to++ = '/'; - *to++ = 'b'; - *to++ = 'i'; - *to++ = 'g'; - *to++ = '>'; - continue;/* - case 'S': - *to++ = '<'; - *to++ = 'b'; - *to++ = 'r'; - *to++ = ' '; - *to++ = '/'; - *to++ = '>'; - *to++ = '<'; - *to++ = 'b'; - *to++ = 'i'; - *to++ = 'g'; - *to++ = '>'; - continue; - case 's': - *to++ = '<'; - *to++ = '/'; - *to++ = 'b'; - *to++ = 'i'; - *to++ = 'g'; - *to++ = '>'; - *to++ = '<'; - *to++ = 'b'; - *to++ = 'r'; - *to++ = ' '; - *to++ = '/'; - *to++ = '>'; - continue;*/ - } - break; - - case 'P': // special formatting - switch(token[1]) - { - case 'P': // Poetry begin - *to++ = '<'; - *to++ = 'c'; - *to++ = 'i'; - *to++ = 't'; - *to++ = 'e'; - *to++ = '>'; - continue; - case 'p': - *to++ = '<'; - *to++ = '/'; - *to++ = 'c'; - *to++ = 'i'; - *to++ = 't'; - *to++ = 'e'; - *to++ = '>'; - continue; - } - break; + else if (!strncmp(token, "RF", 2)) { + if (u->hasFootnotePreTag) { + u->hasFootnotePreTag = false; + buf += "</i> "; } - continue; + buf += "<font color=\"#800000\"><small> ("; + } + + else if (!strncmp(token, "FN", 2)) { + buf += "<font face=\""; + for (tok = token + 2; *tok; tok++) + if(*tok != '\"') + buf += *tok; + buf += "\">"; + } + + else if (!strncmp(token, "CA", 2)) { // ASCII value + buf += (char)atoi(&token[2]); + } + + else { + return false; } - if (intoken) { - if (tokpos < 2045) { - token[tokpos++] = *from; - token[tokpos+2] = 0; - } - } - else - *to++ = *from; } - *to++ = 0; - *to = 0; - return 0; + return true; } + +SWORD_NAMESPACE_END diff --git a/src/modules/filters/gbfhtmlhref.cpp b/src/modules/filters/gbfhtmlhref.cpp index 30b27ba..4061150 100644 --- a/src/modules/filters/gbfhtmlhref.cpp +++ b/src/modules/filters/gbfhtmlhref.cpp @@ -16,8 +16,13 @@ ***************************************************************************/ #include <stdlib.h> -#include <string.h> #include <gbfhtmlhref.h> +#include <swmodule.h> +#include <utilxml.h> +#include <versekey.h> +#include <ctype.h> + +SWORD_NAMESPACE_START GBFHTMLHREF::GBFHTMLHREF() { setTokenStart("<"); @@ -25,12 +30,13 @@ GBFHTMLHREF::GBFHTMLHREF() { setTokenCaseSensitive(true); - addTokenSubstitute("Rf", ")</small></font>"); + //addTokenSubstitute("Rf", ")</small></font>"); + addTokenSubstitute("FA", "<font color=\"#800000\">"); // for ASV footnotes to mark text addTokenSubstitute("Rx", "</a>"); addTokenSubstitute("FI", "<i>"); // italics begin addTokenSubstitute("Fi", "</i>"); - addTokenSubstitute("FB", "<n>"); // bold begin - addTokenSubstitute("Fb", "</n>"); + addTokenSubstitute("FB", "<b>"); // bold begin + addTokenSubstitute("Fb", "</b>"); addTokenSubstitute("FR", "<font color=\"#FF0000\">"); // words of Jesus begin addTokenSubstitute("Fr", "</font>"); addTokenSubstitute("FU", "<u>"); // underline begin @@ -57,87 +63,160 @@ GBFHTMLHREF::GBFHTMLHREF() { } -bool GBFHTMLHREF::handleToken(char **buf, const char *token, DualStringMap &userData) { +bool GBFHTMLHREF::handleToken(SWBuf &buf, const char *token, BasicFilterUserData *userData) { const char *tok; + char val[128]; + char *valto; + const char *num; + MyUserData *u = (MyUserData *)userData; if (!substituteToken(buf, token)) { - if (!strncmp(token, "WG", 2) || !strncmp(token, "WH", 2)) { // strong's numbers - pushString(buf, " <small><em><<a href=\"#"); + XMLTag tag(token); + if (!strncmp(token, "w", 1)) { + // OSIS Word (temporary until OSISRTF is done) + valto = val; + num = strstr(token, "lemma=\"x-Strongs:"); + if (num) { + for (num+=17; ((*num) && (*num != '\"')); num++) + *valto++ = *num; + *valto = 0; + if (atoi((!isdigit(*val))?val+1:val) < 5627) { + buf += " <small><em><<a href=\"type=Strongs value="; + for (tok = val; *tok; tok++) + buf += *tok; + buf += "\">"; + for (tok = (!isdigit(*val))?val+1:val; *tok; tok++) + buf += *tok; + buf += "</a>></em></small> "; + //cout << buf; + + } + /* forget these for now + else { + // verb morph + sprintf(wordstr, "%03d", word-1); + module->getEntryAttributes()["Word"][wordstr]["Morph"] = val; + } + */ + } + valto = val; + num = strstr(token, "morph=\"x-Robinson:"); + if (num) { + for (num+=18; ((*num) && (*num != '\"')); num++) + *valto++ = *num; + *valto = 0; + buf += " <small><em>(<a href=\"type=morph class=Robinson value="; + for (tok = val; *tok; tok++) + // normal robinsons tense + buf += *tok; + buf += "\">"; + for (tok = val; *tok; tok++) + //if(*tok != '\"') + buf += *tok; + buf += "</a>)</em></small> "; + } + } + + else if (!strncmp(token, "WG", 2) || !strncmp(token, "WH", 2)) { // strong's numbers + buf += " <small><em><<a href=\"type=Strongs value="; for (tok = token+1; *tok; tok++) //if(token[i] != '\"') - *(*buf)++ = *tok; - *(*buf)++ = '\"'; - *(*buf)++ = '>'; + buf += *tok; + buf += "\">"; for (tok = token + 2; *tok; tok++) //if(token[i] != '\"') - *(*buf)++ = *tok; - pushString(buf, "</a>></em></small>"); + buf += *tok; + buf += "</a>></em></small>"; } else if (!strncmp(token, "WTG", 3) || !strncmp(token, "WTH", 3)) { // strong's numbers tense - pushString(buf, " <small><em>(<A HREF=\"#"); + buf += " <small><em>(<a href=\"type=Strongs value="; for (tok = token + 2; *tok; tok++) if(*tok != '\"') - *(*buf)++ = *tok; - *(*buf)++ = '\"'; - *(*buf)++ = '>'; + buf += *tok; + buf += "\">"; for (tok = token + 3; *tok; tok++) if(*tok != '\"') - *(*buf)++ = *tok; - pushString(buf, "</a>)</em></small>"); + buf += *tok; + buf += "</a>)</em></small>"; } else if (!strncmp(token, "WT", 2) && strncmp(token, "WTH", 3) && strncmp(token, "WTG", 3)) { // morph tags - pushString(buf, " <small><em>(<a href=\"M"); + buf += " <small><em>(<a href=\"type=morph class=none value="; for (tok = token + 2; *tok; tok++) if(*tok != '\"') - *(*buf)++ = *tok; - *(*buf)++ = '\"'; - *(*buf)++ = '>'; + buf += *tok; + buf += "\">"; for (tok = token + 2; *tok; tok++) if(*tok != '\"') - *(*buf)++ = *tok; - pushString(buf, "</a>)</em></small>"); + buf += *tok; + buf += "</a>)</em></small>"; } - else if (!strncmp(token, "RX", 2)) { - pushString(buf, "<a href=\""); + else if (!strcmp(tag.getName(), "RX")) { + buf += "<a href=\""; for (tok = token + 3; *tok; tok++) { if(*tok != '<' && *tok+1 != 'R' && *tok+2 != 'x') { - *(*buf)++ = *tok; + buf += *tok; } else { break; } } - *(*buf)++ = '\"'; - *(*buf)++ = '>'; + buf += "\">"; } - + else if (!strcmp(tag.getName(), "RF")) { + SWBuf type = tag.getAttribute("type"); + SWBuf footnoteNumber = tag.getAttribute("swordFootnote"); + VerseKey *vkey; + // see if we have a VerseKey * or descendant + try { + vkey = SWDYNAMIC_CAST(VerseKey, u->key); + } + catch ( ... ) { } + if (vkey) { + // leave this special osis type in for crossReference notes types? Might thml use this some day? Doesn't hurt. + //char ch = ((tag.getAttribute("type") && ((!strcmp(tag.getAttribute("type"), "crossReference")) || (!strcmp(tag.getAttribute("type"), "x-cross-ref")))) ? 'x':'n'); + buf.appendFormatted("<a href=\"noteID=%s.%c.%s\"><small><sup>*%c</sup></small></a> ", vkey->getText(), 'n', footnoteNumber.c_str(), 'n'); + } + u->suspendTextPassThru = true; + } + else if (!strcmp(tag.getName(), "Rf")) { + u->suspendTextPassThru = false; + } +/* else if (!strncmp(token, "RB", 2)) { - pushString(buf, "<i>"); - userData["hasFootnotePreTag"] = "true"; + buf += "<i> "; + u->hasFootnotePreTag = true; } + else if (!strncmp(token, "Rf", 2)) { + buf += " <a href=\"note="; + buf += u->lastTextNode.c_str(); + buf += "\">"; + buf += "<small><sup>*n</sup></small></a> "; + // let's let text resume to output again + u->suspendTextPassThru = false; + } + else if (!strncmp(token, "RF", 2)) { - if(userData["hasFootnotePreTag"] == "true") { - userData["hasFootnotePreTag"] = "false"; - pushString(buf, "</i> "); + if (u->hasFootnotePreTag) { + u->hasFootnotePreTag = false; + buf += "</i> "; } - pushString(buf, "<font color=\"#800000\"><small> ("); + u->suspendTextPassThru = true; } - +*/ else if (!strncmp(token, "FN", 2)) { - pushString(buf, "<font face=\""); + buf += "<font face=\""; for (tok = token + 2; *tok; tok++) if(*tok != '\"') - *(*buf)++ = *tok; - *(*buf)++ = '\"'; - *(*buf)++ = '>'; + buf += *tok; + buf += "\">"; } else if (!strncmp(token, "CA", 2)) { // ASCII value - *(*buf)++ = (char)atoi(&token[2]); + buf += (char)atoi(&token[2]); } else { @@ -146,3 +225,5 @@ bool GBFHTMLHREF::handleToken(char **buf, const char *token, DualStringMap &user } return true; } + +SWORD_NAMESPACE_END diff --git a/src/modules/filters/gbfmorph.cpp b/src/modules/filters/gbfmorph.cpp index f8d336e..996baf9 100644 --- a/src/modules/filters/gbfmorph.cpp +++ b/src/modules/filters/gbfmorph.cpp @@ -1,62 +1,47 @@ /****************************************************************************** * - * gbfmorph - SWFilter decendant to hide or show morph tags + * gbfmorph - SWFilter descendant to hide or show morph tags * in a GBF module. */ #include <stdlib.h> -#include <string.h> #include <gbfmorph.h> #ifndef __GNUC__ #else #include <unixstr.h> #endif +SWORD_NAMESPACE_START -const char GBFMorph::on[] = "On"; -const char GBFMorph::off[] = "Off"; -const char GBFMorph::optName[] = "Morphological Tags"; -const char GBFMorph::optTip[] = "Toggles Morphological Tags On and Off if they exist"; +const char oName[] = "Morphological Tags"; +const char oTip[] = "Toggles Morphological Tags On and Off if they exist"; +const SWBuf choices[3] = {"On", "Off", ""}; +const StringList oValues(&choices[0], &choices[2]); -GBFMorph::GBFMorph() { - option = false; - options.push_back(on); - options.push_back(off); +GBFMorph::GBFMorph() : SWOptionFilter(oName, oTip, &oValues) { + setOptionValue("Off"); } GBFMorph::~GBFMorph() { } -void GBFMorph::setOptionValue(const char *ival) -{ - option = (!stricmp(ival, on)); -} - -const char *GBFMorph::getOptionValue() -{ - return (option) ? on:off; -} -char GBFMorph::ProcessText(char *text, int maxlen, const SWKey *key, const SWModule *module) -{ +char GBFMorph::processText(SWBuf &text, const SWKey *key, const SWModule *module) { if (!option) { // if we don't want morph tags - char *to, *from, token[2048]; // cheese. Fix. + const char *from; + char token[2048]; // cheese. Fix. int tokpos = 0; bool intoken = false; int len; bool lastspace = false; - len = strlen(text) + 1; // shift string to right of buffer - if (len < maxlen) { - memmove(&text[maxlen - len], text, len); - from = &text[maxlen - len]; - } - else from = text; // ------------------------------- + SWBuf orig = text; + from = orig.c_str(); - for (to = text; *from; from++) { + for (text = ""; *from; from++) { if (*from == '<') { intoken = true; tokpos = 0; @@ -70,15 +55,14 @@ char GBFMorph::ProcessText(char *text, int maxlen, const SWKey *key, const SWMod if (*token == 'W' && token[1] == 'T') { // Morph if ((from[1] == ' ') || (from[1] == ',') || (from[1] == ';') || (from[1] == '.') || (from[1] == '?') || (from[1] == '!') || (from[1] == ')') || (from[1] == '\'') || (from[1] == '\"')) { if (lastspace) - to--; + text--; } continue; } // if not a morph tag token, keep token in text - *to++ = '<'; - for (char *tok = token; *tok; tok++) - *to++ = *tok; - *to++ = '>'; + text += '<'; + text += token; + text += '>'; continue; } if (intoken) { @@ -87,12 +71,12 @@ char GBFMorph::ProcessText(char *text, int maxlen, const SWKey *key, const SWMod token[tokpos+2] = 0; } else { - *to++ = *from; + text += *from; lastspace = (*from == ' '); } } - *to++ = 0; - *to = 0; } return 0; } + +SWORD_NAMESPACE_END diff --git a/src/modules/filters/gbfosis.cpp b/src/modules/filters/gbfosis.cpp new file mode 100644 index 0000000..b5dd8e1 --- /dev/null +++ b/src/modules/filters/gbfosis.cpp @@ -0,0 +1,439 @@ +/****************************************************************************** + * + * gbfstrongs - SWFilter descendant to hide or show strongs number + * in a GBF module. + */ + + +#include <stdlib.h> +#include <stdio.h> +#include <stdarg.h> +#include <gbfosis.h> +#include <swmodule.h> +#include <versekey.h> +#include <swlog.h> +#include <stdarg.h> +#ifndef __GNUC__ +#else +#include <unixstr.h> +#endif + +SWORD_NAMESPACE_START + +GBFOSIS::GBFOSIS() { +} + + +GBFOSIS::~GBFOSIS() { +} + + +char GBFOSIS::processText(SWBuf &text, const SWKey *key, const SWModule *module) { +/* + char *to, *from, token[2048]; // cheese. Fix. + int tokpos = 0; + bool intoken = false; + int len; + bool lastspace = false; + int word = 1; + char val[128]; + char buf[128]; + char wordstr[5]; + char *valto; + char *ch; + char *textStart, *textEnd; + char *wordStart, *wordEnd; + char *fromStart; + bool newText = false; + bool newWord = false; + SWBuf tmp; + bool suspendTextPassThru = false; + bool keepToken = false; + bool handled = false; + + len = strlen(text) + 1; // shift string to right of buffer + if (len < maxlen) { + memmove(&text[maxlen - len], text, len); + from = &text[maxlen - len]; + } + else from = text; + + textStart = from; + fromStart = from; + wordStart = text; + + static QuoteStack quoteStack; + + // ------------------------------- + + for (to = text; *from; from++) { + if (*from == '<') { + intoken = true; + tokpos = 0; + token[0] = 0; + token[1] = 0; + token[2] = 0; + textEnd = from-1; + wordEnd = to; + continue; + } + if (*from == '>') { // process tokens + intoken = false; + keepToken = false; + suspendTextPassThru = false; + newWord = true; + handled = false; + + while (wordStart < (text+maxlen)) { +// if (strchr(" ,;.?!()'\"", *wordStart)) + if (strchr(";, .:?!()'\"", *wordStart) && wordStart[0] && wordStart[1]) + wordStart++; + else break; + } + while (wordEnd > wordStart) { + if (strchr(" ,;.:?!()'\"", *wordEnd)) + wordEnd--; + else break; + } + + // Scripture Reference + if (!strncmp(token, "scripRef", 8)) { + // pushString(buf, "<reference work=\"Bible.KJV\" reference=\""); + suspendTextPassThru = true; + newText = true; + handled = true; + } + else if (!strncmp(token, "/scripRef", 9)) { + tmp = ""; + tmp.append(textStart, (int)(textEnd - textStart)+1); + pushString(&to, convertToOSIS(tmp.c_str(), key)); + lastspace = false; + suspendTextPassThru = false; + handled = true; + } + + // Footnote + if (!strcmp(token, "RF")) { + // pushString(buf, "<reference work=\"Bible.KJV\" reference=\""); + pushString(&to, "<note type=\"x-StudyNote\">"); + newText = true; + lastspace = false; + handled = true; + } + else if (!strcmp(token, "Rf")) { + pushString(&to, "</note>"); + lastspace = false; + handled = true; + } + // hebrew titles + if (!strcmp(token, "TH")) { + pushString(&to, "<title type=\"psalm\">"); + newText = true; + lastspace = false; + handled = true; + } + else if (!strcmp(token, "Th")) { + pushString(&to, "</title>"); + lastspace = false; + handled = true; + } + // Italics assume transchange + if (!strcmp(token, "FI")) { + pushString(&to, "<transChange type=\"added\">"); + newText = true; + lastspace = false; + handled = true; + } + else if (!strcmp(token, "Fi")) { + pushString(&to, "</transChange>"); + lastspace = false; + handled = true; + } + // Paragraph break. For now use empty paragraph element + if (!strcmp(token, "CM")) { + pushString(&to, "<milestone type=\"x-p\" />"); + newText = true; + lastspace = false; + handled = true; + } + + // Figure + else if (!strncmp(token, "img ", 4)) { + const char *src = strstr(token, "src"); + if (!src) // assert we have a src attribute + continue; +// return false; + + pushString(&to, "<figure src=\""); + const char *c; + for (c = src;((*c) && (*c != '"')); c++); + +// uncomment for SWORD absolute path logic +// if (*(c+1) == '/') { +// pushString(buf, "file:"); +// pushString(buf, module->getConfigEntry("AbsoluteDataPath")); +// if (*((*buf)-1) == '/') +// c++; // skip '/' +// } +// end of uncomment for asolute path logic + + for (c++;((*c) && (*c != '"')); c++) + *to++ = *c; + + pushString(&to, "\" />"); + lastspace = false; + handled = true; + } + + // Strongs numbers + else if (*token == 'W' && (token[1] == 'G' || token[1] == 'H')) { // Strongs + bool divineName = false; + if (module->isProcessEntryAttributes()) { + valto = val; + for (unsigned int i = 1; ((token[i]) && (i < 150)); i++) + *valto++ = token[i]; + *valto = 0; + // normal strongs number + strstrip(val); + if (!strncmp(wordStart, "<w ", 3)) { + strtok(wordStart, ">"); + char *attStart = strstr(wordStart, "lemma"); + if (attStart) { + attStart += 7; + sprintf(buf, "x-Strongs:%s|", val); + } + else { + attStart = wordStart + 3; + sprintf(buf, "lemma=\"x-Strongs:%s\" ", val); + } + wordStart[strlen(wordStart)] = '>'; + memmove(attStart+strlen(buf), attStart, (to-attStart)+1); + memcpy(attStart, buf, strlen(buf)); + to+=strlen(buf); + } + else { + if (!strcmp(val, "H03068")) { //divineName + sprintf(buf, "<divineName><w lemma=\"x-Strongs:%s\">", val); + divineName = true; + } + else sprintf(buf, "<w lemma=\"x-Strongs:%s\">", val); + + memmove(wordStart+strlen(buf), wordStart, (to-wordStart)+1); + memcpy(wordStart, buf, strlen(buf)); + to+=strlen(buf); + + if (divineName) { + wordStart += 12; + pushString(&to, "</w></divineName>"); + } + else pushString(&to, "</w>"); + + module->getEntryAttributes()["Word"][wordstr]["Strongs"] = val; + lastspace = false; + } + } + handled = true; + } + + // Morphology + else if (*token == 'W' && token[1] == 'T' && (token[2] == 'G' || token[2] == 'H')) { // Strongs + valto = val; + for (unsigned int i = 1; ((token[i]) && (i < 150)); i++) + *valto++ = token[i]; + *valto = 0; + strstrip(val); + if (!strncmp(wordStart, "<w ", 3)) { + strtok(wordStart, ">"); + char *attStart = strstr(wordStart, "morph"); + if (attStart) { + attStart += 7; + sprintf(buf, "x-%s:%s|", "StrongsMorph", val); + } + else { + attStart = wordStart + 3; + sprintf(buf, "morph=\"x-%s:%s\" ", "StrongsMorph", val); + } + wordStart[strlen(wordStart)] = '>'; + memmove(attStart+strlen(buf), attStart, (to-attStart)+1); + memcpy(attStart, buf, strlen(buf)); + to+=strlen(buf); + } + else { + sprintf(buf, "<w morph=\"x-%s:%s\">", "StrongsMorph", val); + memmove(wordStart+strlen(buf), wordStart, (to-wordStart)+1); + memcpy(wordStart, buf, strlen(buf)); + to+=strlen(buf); + pushString(&to, "</w>"); + lastspace = false; + } + handled = true; + } + + if (!keepToken) { + if (!handled) { + SWLog::systemlog->LogError("Unprocessed Token: <%s>", token); +// exit(-1); + } + if (from[1] && strchr(" ,;.:?!()'\"", from[1])) { + if (lastspace) + to--; + } + if (newText) {textStart = from+1; newText = false; } +// if (newWord) {wordStart = to; newWord = false; } + continue; + } + // if not a strongs token, keep token in text + *to++ = '<'; + for (char *tok = token; *tok; tok++) + *to++ = *tok; + *to++ = '>'; + if (newText) {textStart = to; newWord = false; } +// if (newWord) {wordStart = to; newWord = false; } + continue; + } + if (intoken) { + if ((tokpos < 2045) && ((*from != 10)&&(*from != 13))) { + token[tokpos++] = *from; + token[tokpos+2] = 0; + } + } + else { + switch (*from) { + case '\'': + case '\"': + case '`': +// quoteStack.handleQuote(fromStart, from, &to); + *to++ = *from; + from++; + break; + default: + if (newWord && (*from != ' ')) {wordStart = to; newWord = false; memset(to, 0, 10); } + if (!suspendTextPassThru) { + *to++ = *from; + lastspace = (*from == ' '); + } + } + } + } + + VerseKey *vkey = SWDYNAMIC_CAST(VerseKey, key); + if (vkey) { + char ref[254]; + if (vkey->Verse()) + sprintf(ref, "\t\t<verse osisID=\"%s\">", vkey->getOSISRef()); + else *ref = 0; + if (*ref) { + memmove(text+strlen(ref), text, maxlen-strlen(ref)-1); + memcpy(text, ref, strlen(ref)); + to+=strlen(ref); + if (vkey->Verse()) { + VerseKey tmp; + tmp = *vkey; + tmp.AutoNormalize(0); + tmp.Headings(1); + sprintf(ref, "</verse>"); + pushString(&to, ref); + tmp = MAXVERSE; + if (*vkey == tmp) { + tmp.Verse(0); +// sprintf(ref, "\t</div>"); +// pushString(&to, ref); + tmp = MAXCHAPTER; + tmp = MAXVERSE; + if (*vkey == tmp) { + tmp.Chapter(0); + tmp.Verse(0); +// sprintf(ref, "\t</div>"); +// pushString(&to, ref); + if (!quoteStack.empty()) { + SWLog::systemlog->LogError("popping unclosed quote at end of book"); + quoteStack.clear(); + } + } + } + } + +// +// else if (vkey->Chapter()) { +// sprintf(ref, "\t<div type=\"chapter\" osisID=\"%s\">", vkey->getOSISRef()); +// } +// else sprintf(ref, "\t<div type=\"book\" osisID=\"%s\">", vkey->getOSISRef()); +// + } + } + *to++ = 0; + *to = 0; +*/ + return 0; +} + + +const char *GBFOSIS::convertToOSIS(const char *inRef, const SWKey *key) { + static SWBuf outRef; + + outRef = ""; + + VerseKey defLanguage; + ListKey verses = defLanguage.ParseVerseList(inRef, (*key), true); + const char *startFrag = inRef; + for (int i = 0; i < verses.Count(); i++) { + VerseKey *element = SWDYNAMIC_CAST(VerseKey, verses.GetElement(i)); + char buf[5120]; + char frag[800]; + if (element) { + memmove(frag, startFrag, ((const char *)element->userData - startFrag) + 1); + frag[((const char *)element->userData - startFrag) + 1] = 0; + startFrag = (const char *)element->userData + 1; + sprintf(buf, "<reference osisRef=\"%s-%s\">%s</reference>", element->LowerBound().getOSISRef(), element->UpperBound().getOSISRef(), frag); + } + else { + memmove(frag, startFrag, ((const char *)verses.GetElement(i)->userData - startFrag) + 1); + frag[((const char *)verses.GetElement(i)->userData - startFrag) + 1] = 0; + startFrag = (const char *)verses.GetElement(i)->userData + 1; + sprintf(buf, "<reference osisRef=\"%s\">%s</reference>", VerseKey(*verses.GetElement(i)).getOSISRef(), frag); + } + outRef+=buf; + } + return outRef.c_str(); +} + + +QuoteStack::QuoteStack() { + clear(); +} + + +void QuoteStack::clear() { + while (!quotes.empty()) quotes.pop(); +} + + +QuoteStack::~QuoteStack() { + clear(); +} + + +void QuoteStack::handleQuote(char *buf, char *quotePos, SWBuf &text) { +//QuoteInstance(char startChar = '\"', char level = 1, string uniqueID = "", char continueCount = 0) { + if (!quotes.empty()) { + QuoteInstance last = quotes.top(); + if (last.startChar == *quotePos) { + text += "</quote>"; + quotes.pop(); + } + else { + quotes.push(QuoteInstance(*quotePos, last.level+1)); + quotes.top().pushStartStream(text); + } + } + else { + quotes.push(QuoteInstance(*quotePos)); + quotes.top().pushStartStream(text); + } +} + +void QuoteStack::QuoteInstance::pushStartStream(SWBuf &text) { + text.appendFormatted("<quote level=\"%d\">", level); +} + +SWORD_NAMESPACE_END diff --git a/src/modules/filters/gbfplain.cpp b/src/modules/filters/gbfplain.cpp index 65766d3..f519a25 100644 --- a/src/modules/filters/gbfplain.cpp +++ b/src/modules/filters/gbfplain.cpp @@ -1,34 +1,28 @@ /****************************************************************************** * - * gbfplain - SWFilter decendant to strip out all GBF tags or convert to + * gbfplain - SWFilter descendant to strip out all GBF tags or convert to * ASCII rendered symbols. */ #include <stdlib.h> -#include <string.h> #include <gbfplain.h> +SWORD_NAMESPACE_START GBFPlain::GBFPlain() { } -char GBFPlain::ProcessText(char *text, int maxlen, const SWKey *key, const SWModule *module) +char GBFPlain::processText (SWBuf &text, const SWKey *key, const SWModule *module) { - char *to, *from, token[2048]; + char token[2048]; int tokpos = 0; bool intoken = false; - int len; - - len = strlen(text) + 1; // shift string to right of buffer - if (len < maxlen) { - memmove(&text[maxlen - len], text, len); - from = &text[maxlen - len]; - } - else from = text; // ------------------------------- - - for (to = text; *from; from++) { + const char *from; + SWBuf orig = text; + from = orig.c_str(); + for (text = ""; *from; from++) { if (*from == '<') { intoken = true; tokpos = 0; @@ -46,34 +40,30 @@ char GBFPlain::ProcessText(char *text, int maxlen, const SWKey *key, const SWMod case 'G': // Greek case 'H': // Hebrew case 'T': // Tense - *to++ = ' '; - *to++ = '<'; + text += " <"; for (char *tok = token + 2; *tok; tok++) - *to++ = *tok; - *to++ = '>'; - *to++ = ' '; + text += *tok; + text += "> "; continue; } break; case 'R': switch(token[1]) { case 'F': // footnote begin - *to++ = ' '; - *to++ = '['; + text += " ["; continue; case 'f': // footnote end - *to++ = ']'; - *to++ = ' '; + text += "] "; continue; } break; case 'C': switch(token[1]) { case 'A': // ASCII value - *to++ = (char)atoi(&token[2]); + text += (char)atoi(&token[2]); continue; case 'G': - *to++ = '>'; + text += ">"; continue; /* Bug in WEB case 'L': @@ -82,11 +72,10 @@ char GBFPlain::ProcessText(char *text, int maxlen, const SWKey *key, const SWMod */ case 'L': // Bug in WEB. Use above entry when fixed case 'N': // new line - *to++ = '\n'; + text += '\n'; continue; case 'M': // new paragraph - *to++ = '\n'; - *to++ = '\n'; + text += "\n\n"; continue; } break; @@ -98,9 +87,9 @@ char GBFPlain::ProcessText(char *text, int maxlen, const SWKey *key, const SWMod token[tokpos++] = *from; token[tokpos+2] = 0; } - else *to++ = *from; + else text += *from; } - *to++ = 0; - *to = 0; return 0; } + +SWORD_NAMESPACE_END diff --git a/src/modules/filters/gbfredletterwords.cpp b/src/modules/filters/gbfredletterwords.cpp new file mode 100644 index 0000000..df7438d --- /dev/null +++ b/src/modules/filters/gbfredletterwords.cpp @@ -0,0 +1,98 @@ +/****************************************************************************** + * + * GBFRedLetterWords - SWFilter descendant to toggle red coloring of words of + * Christ in a GBF module. + */ + + +#include <stdlib.h> +#include <gbfredletterwords.h> +#include <swmodule.h> +#ifndef __GNUC__ +#else +#include <unixstr.h> +#endif +#include <ctype.h> + +SWORD_NAMESPACE_START + +const char oName[] = "Words of Christ in Red"; +const char oTip[] = "Toggles Red Coloring for Words of Christ On and Off if they are marked"; + +const SWBuf choices[3] = {"On", "Off", ""}; +const StringList oValues(&choices[0], &choices[2]); + +GBFRedLetterWords::GBFRedLetterWords() : SWOptionFilter(oName, oTip, &oValues) { + setOptionValue("Off"); +} + + +GBFRedLetterWords::~GBFRedLetterWords() { +} + + +char GBFRedLetterWords::processText(SWBuf &text, const SWKey *key, const SWModule *module) { +/** This function removes the red letter words in Bible like the WEB +* The words are marked by <FR> as start and <Fr> as end tag. +*/ + if (!option) { // if we don't want footnotes + char token[4096]; // cheese. Fix. + int tokpos = 0; + bool intoken = false; + int len; + bool hide = false; + + const char *from; + SWBuf orig = text; + from = orig.c_str(); + for (text = ""; *from; from++) { + if (*from == '<') { + intoken = true; + tokpos = 0; +// memset(token, 0, 4096); + token[0] = 0; + token[1] = 0; + token[2] = 0; + continue; + } + if (*from == '>') { // process tokens + intoken = false; + /*switch (*token) { + case 'F': // Font attribute + switch(token[1]) { + case 'R': // Begin red letter words + hide = true; + break; + case 'r': // end red letter words + hide = false; + break; + } + continue; // skip token + }*/ + + //hide the token if either FR or Fr was detected + hide = (token[0] == 'F' && ( (token[1] == 'R') || (token[1] == 'r') )); + + // if not a red letter word token, keep token in text + if (!hide) { + text += '<'; + for (char *tok = token; *tok; tok++) + text += *tok; + text += '>'; + } + continue; + } + if (intoken) { + if (tokpos < 4090) + token[tokpos++] = *from; + token[tokpos+2] = 0; // +2 cuz we init token with 2 extra '0' because of switch statement + } + else { + text += *from; + } + } + } + return 0; +} + +SWORD_NAMESPACE_END diff --git a/src/modules/filters/gbfrtf.cpp b/src/modules/filters/gbfrtf.cpp index 5f7d064..4a18fbf 100644 --- a/src/modules/filters/gbfrtf.cpp +++ b/src/modules/filters/gbfrtf.cpp @@ -1,35 +1,42 @@ /****************************************************************************** * - * gbfrtf - SWFilter decendant to convert all GBF tags to RTF tags + * gbfrtf - SWFilter descendant to convert all GBF tags to RTF tags */ -#include <stdlib.h> -#include <string.h> #include <gbfrtf.h> #include <ctype.h> +SWORD_NAMESPACE_START + GBFRTF::GBFRTF() { } -char GBFRTF::ProcessText(char *text, int maxlen, const SWKey *key, const SWModule *module) +char GBFRTF::processText(SWBuf &text, const SWKey *key, const SWModule *module) { - unsigned char *to, *from; char token[2048]; + char val[128]; + char *valto; + char *num; int tokpos = 0; bool intoken = false; int len; const char *tok; + SWBuf strongnum; + SWBuf strongtense; + bool hideText = false; + int wordLen = 0; + int wordCount = 0; + int i; - len = strlen(text) + 1; // shift string to right of buffer - if (len < maxlen) { - memmove(&text[maxlen - len], text, len); - from = (unsigned char *)&text[maxlen - len]; - } - else from = (unsigned char *)text; // ------------------------------- - for (to = (unsigned char *)text; *from; from++) { + const char *from; + SWBuf orig = text; + from = orig.c_str(); + for (text = ""; *from; from++) { if (*from == '<') { + wordLen = wordCount; + wordCount = 0; intoken = true; tokpos = 0; token[0] = 0; @@ -40,39 +47,80 @@ char GBFRTF::ProcessText(char *text, int maxlen, const SWKey *key, const SWModul if (*from == '>') { intoken = false; // process desired tokens + // deal with OSIS note tags. Just hide till OSISRTF + if (!strncmp(token, "note ", 5)) { + hideText = true; + } + if (!strncmp(token, "/note", 5)) { + hideText = false; + } + switch (*token) { + case 'w': // OSIS Word (temporary until OSISRTF is done) + strongnum = ""; + strongtense = ""; + valto = val; + num = strstr(token, "lemma=\"x-Strongs:"); + if (num) { + for (num+=17; ((*num) && (*num != '\"')); num++) + *valto++ = *num; + *valto = 0; + if (atoi((!isdigit(*val))?val+1:val) < 5627) { + // normal strongs number + strongnum += "{\\cf3 \\sub <"; + for (tok = (!isdigit(*val))?val+1:val; *tok; tok++) + strongnum += *tok; + strongnum += ">}"; + } + /* forget these for now + else { + // verb morph + sprintf(wordstr, "%03d", word-1); + module->getEntryAttributes()["Word"][wordstr]["Morph"] = val; + } + */ + } + valto = val; + num = strstr(token, "morph=\"x-Robinson:"); + if (num) { + for (num+=18; ((*num) && (*num != '\"')); num++) + *valto++ = *num; + *valto = 0; + // normal robinsons tense + strongtense += "{\\cf4 \\sub ("; + for (tok = val; *tok; tok++) + strongtense += *tok; + strongtense += ")}"; + } + continue; + + case '/': + if (token[1] == 'w') { + if ((wordCount > 0) || (strongnum != "{\\cf3 \\sub <3588>}")) { + //for (i = 0; i < strongnum.length(); i++) + text += strongnum; + //for (i = 0; i < strongtense.length(); i++) + text += strongtense; + } + } + continue; + case 'W': // Strongs switch(token[1]) { case 'G': // Greek case 'H': // Hebrew - *to++ = '{'; - *to++ = '\\'; - *to++ = 'f'; - *to++ = 's'; - *to++ = '1'; - *to++ = '7'; - *to++ = ' '; - *to++ = '<'; + text += "{\\cf3 \\sub <"; for (tok = token + 2; *tok; tok++) - *to++ = *tok; - *to++ = '>'; - *to++ = '}'; + text += *tok; + text += ">}"; continue; case 'T': // Tense - *to++ = '{'; - *to++ = '\\'; - *to++ = 'f'; - *to++ = 's'; - *to++ = '1'; - *to++ = '7'; - *to++ = ' '; - *to++ = '('; + text += "{\\cf4 \\sub ("; bool separate = false; for (tok = token + 2; *tok; tok++) { if (separate) { - *to++ = ';'; - *to++ = ' '; + text += "; "; separate = false; } switch (*tok) { @@ -80,7 +128,7 @@ char GBFRTF::ProcessText(char *text, int maxlen, const SWKey *key, const SWModul case 'H': for (tok++; *tok; tok++) { if (isdigit(*tok)) { - *to++ = *tok; + text += *tok; separate = true; } else { @@ -91,176 +139,134 @@ char GBFRTF::ProcessText(char *text, int maxlen, const SWKey *key, const SWModul break; default: for (; *tok; tok++) { - *to++ = *tok; + text += *tok; } } } - *to++ = ')'; - *to++ = '}'; + text += ")}"; continue; } break; case 'R': switch(token[1]) { case 'X': - *to++ = '#'; + text += "<a href=\"\">"; continue; case 'x': - *to++ = '|'; + text += "</a>"; continue; case 'F': // footnote begin - *to++ = '{'; - *to++ = '\\'; - *to++ = 'i'; - *to++ = '1'; - *to++ = ' '; - *to++ = '\\'; - *to++ = 'f'; - *to++ = 's'; - *to++ = '1'; - *to++ = '7'; - *to++ = ' '; - *to++ = '('; + text += "{\\i1 \\sub ("; continue; case 'f': // footnote end - *to++ = ')'; - *to++ = ' '; - *to++ = '}'; + text += ") }"; continue; } break; case 'F': // font tags switch(token[1]) { case 'I': // italic start - *to++ = '\\'; - *to++ = 'i'; - *to++ = '1'; - *to++ = ' '; + text += "\\i1 "; continue; case 'i': // italic end - *to++ = '\\'; - *to++ = 'i'; - *to++ = '0'; - *to++ = ' '; + text += "\\i0 "; continue; case 'B': // bold start - *to++ = '\\'; - *to++ = 'b'; - *to++ = '1'; - *to++ = ' '; + text += "\\b1 "; continue; case 'b': // bold end - *to++ = '\\'; - *to++ = 'b'; - *to++ = '0'; - *to++ = ' '; + text += "\\b0 "; continue; case 'N': - *to++ = '{'; - if (!strnicmp(token+2, "Symbol", 6)) { - *to++ = '\\'; - *to++ = 'f'; - *to++ = '7'; - *to++ = ' '; - } + text += '{'; + if (!strnicmp(token+2, "Symbol", 6)) + text += "\\f7 "; + if (!strnicmp(token+2, "Courier", 7)) + text += "\\f8 "; continue; case 'n': - *to++ = '}'; + text += '}'; continue; case 'S': - *to++ = '{'; - *to++ = '\\'; - *to++ = 's'; - *to++ = 'u'; - *to++ = 'p'; - *to++ = 'e'; - *to++ = 'r'; - *to++ = ' '; + text += "{\\super "; continue; case 's': - *to++ = '}'; + text += '}'; continue; case 'R': - *to++ = '{'; - *to++ = '\\'; - *to++ = 'c'; - *to++ = 'f'; - *to++ = '6'; - *to++ = ' '; + text += "{\\cf6 "; continue; case 'r': - *to++ = '}'; + text += '}'; + continue; + case 'O': + case 'C': + text += "\\scaps1 "; + continue; + case 'o': + case 'c': + text += "\\scaps0 "; + continue; + case 'V': + text += "{\\sub "; + continue; + case 'v': + text += '}'; + continue; + case 'U': + text += "\\ul1 "; + continue; + case 'u': + text += "\\ul0 "; continue; } break; case 'C': // special character tags switch(token[1]) { case 'A': // ASCII value - *to++ = (char)atoi(&token[2]); + text += (char)atoi(&token[2]); continue; case 'G': - *to++ = '>'; + text += '>'; continue; case 'L': // line break - *to++ = '\\'; - *to++ = 'l'; - *to++ = 'i'; - *to++ = 'n'; - *to++ = 'e'; - *to++ = ' '; + text += "\\line "; continue; case 'M': // new paragraph - *to++ = '\\'; - *to++ = 'p'; - *to++ = 'a'; - *to++ = 'r'; - *to++ = ' '; + text += "\\par "; continue; case 'T': - *to++ = '<'; + text += '<'; } break; case 'T': // title formatting switch(token[1]) { case 'T': // Book title begin - *to++ = '{'; - *to++ = '\\'; - *to++ = 'f'; - *to++ = 's'; - *to++ = '2'; - *to++ = '2'; - *to++ = ' '; - continue; + text += "{\\large "; + continue; case 't': - *to++ = '}'; - continue; + text += '}'; + continue; case 'S': - *to++ = '\\'; - *to++ = 'p'; - *to++ = 'a'; - *to++ = 'r'; - *to++ = ' '; - *to++ = '{'; - *to++ = '\\'; - *to++ = 'i'; - *to++ = '1'; - *to++ = '\\'; - *to++ = 'b'; - *to++ = '1'; - *to++ = ' '; + text += "\\par {\\i1\\b1 "; continue; case 's': - *to++ = '}'; - *to++ = '\\'; - *to++ = 'p'; - *to++ = 'a'; - *to++ = 'r'; - *to++ = ' '; + text += "}\\par "; continue; } break; - + case 'J': // Strongs + switch(token[1]) { + case 'L': + text += "\\ql "; + case 'C': + text += "\\qc "; + case 'R': + text += "\\qr "; + case 'F': + text += "\\qj "; + } } continue; } @@ -269,9 +275,15 @@ char GBFRTF::ProcessText(char *text, int maxlen, const SWKey *key, const SWModul token[tokpos++] = *from; token[tokpos+2] = 0; } - else *to++ = *from; + else { + if (!hideText) { + wordCount++; + text += *from; + } + } } - *to++ = 0; - *to = 0; return 0; } + +SWORD_NAMESPACE_END + diff --git a/src/modules/filters/gbfstrongs.cpp b/src/modules/filters/gbfstrongs.cpp index 40fc958..bd5088a 100644 --- a/src/modules/filters/gbfstrongs.cpp +++ b/src/modules/filters/gbfstrongs.cpp @@ -1,98 +1,116 @@ /****************************************************************************** * - * gbfstrongs - SWFilter decendant to hide or show strongs number + * gbfstrongs - SWFilter descendant to hide or show strongs number * in a GBF module. */ #include <stdlib.h> -#include <string.h> #include <gbfstrongs.h> +#include <swmodule.h> #ifndef __GNUC__ #else #include <unixstr.h> #endif +#include <ctype.h> +SWORD_NAMESPACE_START -const char GBFStrongs::on[] = "On"; -const char GBFStrongs::off[] = "Off"; -const char GBFStrongs::optName[] = "Strong's Numbers"; -const char GBFStrongs::optTip[] = "Toggles Strong's Numbers On and Off if they exist"; +const char oName[] = "Strong's Numbers"; +const char oTip[] = "Toggles Strong's Numbers On and Off if they exist"; +const SWBuf choices[3] = {"On", "Off", ""}; +const StringList oValues(&choices[0], &choices[2]); -GBFStrongs::GBFStrongs() { - option = false; - options.push_back(on); - options.push_back(off); +GBFStrongs::GBFStrongs() : SWOptionFilter(oName, oTip, &oValues) { + setOptionValue("Off"); } GBFStrongs::~GBFStrongs() { } -void GBFStrongs::setOptionValue(const char *ival) -{ - option = (!stricmp(ival, on)); -} -const char *GBFStrongs::getOptionValue() -{ - return (option) ? on:off; -} +char GBFStrongs::processText(SWBuf &text, const SWKey *key, const SWModule *module) { + char token[2048]; // cheese. Fix. + int tokpos = 0; + bool intoken = false; + int len; + bool lastspace = false; + int word = 1; + char val[128]; + char wordstr[5]; + char *valto; + char *ch; + unsigned int textStart = 0, textEnd = 0; + bool newText = false; + SWBuf tmp; + const char *from; -char GBFStrongs::ProcessText(char *text, int maxlen, const SWKey *key, const SWModule *module) -{ - if (!option) { // if we don't want strongs - char *to, *from, token[2048]; // cheese. Fix. - int tokpos = 0; - bool intoken = false; - int len; - bool lastspace = false; + SWBuf orig = text; + from = orig.c_str(); - len = strlen(text) + 1; // shift string to right of buffer - if (len < maxlen) { - memmove(&text[maxlen - len], text, len); - from = &text[maxlen - len]; + for (text = ""; *from; from++) { + if (*from == '<') { + intoken = true; + tokpos = 0; + token[0] = 0; + token[1] = 0; + token[2] = 0; + textEnd = text.size(); + continue; } - else from = text; // ------------------------------- + if (*from == '>') { // process tokens + intoken = false; - for (to = text; *from; from++) { - if (*from == '<') { - intoken = true; - tokpos = 0; - token[0] = 0; - token[1] = 0; - token[2] = 0; - continue; - } - if (*from == '>') { // process tokens - intoken = false; - if (*token == 'W' && (token[1] == 'G' || token[1] == 'H')) { // Strongs - if ((from[1] == ' ') || (from[1] == ',') || (from[1] == ';') || (from[1] == '.') || (from[1] == '?') || (from[1] == '!') || (from[1] == ')') || (from[1] == '\'') || (from[1] == '\"')) { - if (lastspace) - to--; - } - continue; + if (*token == 'W' && (token[1] == 'G' || token[1] == 'H')) { // Strongs + if (module->isProcessEntryAttributes()) { + valto = val; + for (unsigned int i = 2; ((token[i]) && (i < 150)); i++) + *valto++ = token[i]; + *valto = 0; + if (atoi((!isdigit(*val))?val+1:val) < 5627) { + // normal strongs number + sprintf(wordstr, "%03d", word++); + module->getEntryAttributes()["Word"][wordstr]["Strongs"] = val; + tmp = ""; + tmp.append(text.c_str()+textStart, (int)(textEnd - textStart)); + module->getEntryAttributes()["Word"][wordstr]["Text"] = tmp; + newText = true; + } + else { + // verb morph + sprintf(wordstr, "%03d", word-1); + module->getEntryAttributes()["Word"][wordstr]["Morph"] = val; + } + } + if (!option) { + if ((from[1] == ' ') || (from[1] == ',') || (from[1] == ';') || (from[1] == '.') || (from[1] == '?') || (from[1] == '!') || (from[1] == ')') || (from[1] == '\'') || (from[1] == '\"')) { + if (lastspace) + text--; + } + if (newText) {textStart = text.size(); newText = false; } + continue; } - // if not a strongs token, keep token in text - *to++ = '<'; - for (char *tok = token; *tok; tok++) - *to++ = *tok; - *to++ = '>'; - continue; - } - if (intoken) { - if (tokpos < 2045) - token[tokpos++] = *from; - token[tokpos+2] = 0; - } - else { - *to++ = *from; - lastspace = (*from == ' '); } + // if not a strongs token, keep token in text + text += '<'; + text += token; + text += '>'; + if (newText) {textStart = text.size(); newText = false; } + continue; + } + if (intoken) { + if (tokpos < 2045) + token[tokpos++] = *from; + token[tokpos+2] = 0; + } + else { + text += *from; + lastspace = (*from == ' '); } - *to++ = 0; - *to = 0; } return 0; } + +SWORD_NAMESPACE_END diff --git a/src/modules/filters/gbfthml.cpp b/src/modules/filters/gbfthml.cpp index ca03e71..03b8d8a 100644 --- a/src/modules/filters/gbfthml.cpp +++ b/src/modules/filters/gbfthml.cpp @@ -15,32 +15,27 @@ ***************************************************************************/ #include <stdlib.h> -#include <string.h> #include <gbfthml.h> +SWORD_NAMESPACE_START GBFThML::GBFThML() { } -char GBFThML::ProcessText(char *text, int maxlen) -{ - char *to, *from, token[2048]; +char GBFThML::processText(SWBuf &text, const SWKey *key, const SWModule *module) { + const char *from; + char token[2048]; int tokpos = 0; bool intoken = false; int len; const char *tok; - len = strlen(text) + 1; // shift string to right of buffer - if (len < maxlen) { - memmove(&text[maxlen - len], text, len); - from = &text[maxlen - len]; - } - else from = text; // ------------------------------- + SWBuf orig = text; + from = orig.c_str(); - for (to = text; *from; from++) - { + for (text = ""; *from; from++) { if (*from == '<') { intoken = true; tokpos = 0; @@ -58,75 +53,17 @@ char GBFThML::ProcessText(char *text, int maxlen) switch(token[1]) { case 'G': case 'H': - *to++ = '<'; - *to++ = 's'; - *to++ = 'y'; - *to++ = 'n'; - *to++ = 'c'; - *to++ = ' '; - *to++ = 't'; - *to++ = 'y'; - *to++ = 'p'; - *to++ = 'e'; - *to++ = '='; - *to++ = '"'; - *to++ = 'S'; - *to++ = 't'; - *to++ = 'r'; - *to++ = 'o'; - *to++ = 'n'; - *to++ = 'g'; - *to++ = 's'; - *to++ = '"'; - *to++ = ' '; - *to++ = 'v'; - *to++ = 'a'; - *to++ = 'l'; - *to++ = 'u'; - *to++ = 'e'; - *to++ = '='; - *to++ = '"'; + text += "<sync type=\"Strongs\" value=\""; for (tok = token + 1; *tok; tok++) - *to++ = *tok; - *to++ = '"'; - *to++ = ' '; - *to++ = '/'; - *to++ = '>'; + text += *tok; + text += "\" />"; continue; case 'T': // Tense - *to++ = '<'; - *to++ = 's'; - *to++ = 'y'; - *to++ = 'n'; - *to++ = 'c'; - *to++ = ' '; - *to++ = 't'; - *to++ = 'y'; - *to++ = 'p'; - *to++ = 'e'; - *to++ = '='; - *to++ = '"'; - *to++ = 'M'; - *to++ = 'o'; - *to++ = 'r'; - *to++ = 'p'; - *to++ = 'h'; - *to++ = '"'; - *to++ = ' '; - *to++ = 'v'; - *to++ = 'a'; - *to++ = 'l'; - *to++ = 'u'; - *to++ = 'e'; - *to++ = '='; - *to++ = '"'; + text += "<sync type=\"Morph\" value=\""; for (tok = token + 2; *tok; tok++) - *to++ = *tok; - *to++ = '"'; - *to++ = ' '; - *to++ = '/'; - *to++ = '>'; + text += *tok; + text += "\" />"; continue; } break; @@ -134,234 +71,99 @@ char GBFThML::ProcessText(char *text, int maxlen) switch(token[1]) { case 'X': - *to++ = '<'; - *to++ = 'a'; - *to++ = ' '; - *to++ = 'h'; - *to++ = 'r'; - *to++ = 'e'; - *to++ = 'f'; - *to++ = '='; - *to++ = '\"'; - for (tok = token + 3; *tok; tok++) { + text += "<a href=\""; + for (tok = token + 3; *tok; tok++) { if(*tok != '<' && *tok+1 != 'R' && *tok+2 != 'x') { - *to++ = *tok; + text += *tok; } else { break; } - } - *to++ = '\"'; - *to++ = '>'; - continue; + } + text += "\">"; + continue; case 'x': - *to++ = '<'; - *to++ = '/'; - *to++ = 'a'; - *to++ = '>'; - continue; + text += "</a>"; + continue; case 'F': // footnote begin - *to++ = '<'; - *to++ = 'n'; - *to++ = 'o'; - *to++ = 't'; - *to++ = 'e'; - *to++ = ' '; - *to++ = 'p'; - *to++ = 'l'; - *to++ = 'a'; - *to++ = 'c'; - *to++ = 'e'; - *to++ = '='; - *to++ = '"'; - *to++ = 'f'; - *to++ = 'o'; - *to++ = 'o'; - *to++ = 't'; - *to++ = '"'; - *to++ = '>'; - continue; + text += "<note>"; + continue; case 'f': // footnote end - *to++ = '<'; - *to++ = '/'; - *to++ = 'n'; - *to++ = 'o'; - *to++ = 't'; - *to++ = 'e'; - *to++ = '>'; - continue; + text += "</note>"; + continue; } break; case 'F': // font tags switch(token[1]) { case 'N': - *to++ = '<'; - *to++ = 'f'; - *to++ = 'o'; - *to++ = 'n'; - *to++ = 't'; - *to++ = ' '; - *to++ = 'f'; - *to++ = 'a'; - *to++ = 'c'; - *to++ = 'e'; - *to++ = '='; - *to++ = '"'; - for (tok = token + 2; *tok; tok++) - *to++ = *tok; - *to++ = '"'; - *to++ = '>'; - continue; + text += "<font face=\""; + for (tok = token + 2; *tok; tok++) + text += *tok; + text += "\">"; + continue; case 'n': - *to++ = '<'; - *to++ = '/'; - *to++ = 'f'; - *to++ = 'o'; - *to++ = 'n'; - *to++ = 't'; - *to++ = '>'; - continue; + text += "</font>"; + continue; case 'I': // italic start - *to++ = '<'; - *to++ = 'i'; - *to++ = '>'; - continue; + text += "<i>"; + continue; case 'i': // italic end - *to++ = '<'; - *to++ = '/'; - *to++ = 'i'; - *to++ = '>'; - continue; + text += "</i>"; + continue; case 'B': // bold start - *to++ = '<'; - *to++ = 'b'; - *to++ = '>'; - continue; + text += "<b>"; + continue; case 'b': // bold end - *to++ = '<'; - *to++ = '/'; - *to++ = 'b'; - *to++ = '>'; - continue; + text += "</b>"; + continue; case 'R': // words of Jesus begin - *to++ = '<'; - *to++ = 'f'; - *to++ = 'o'; - *to++ = 'n'; - *to++ = 't'; - *to++ = ' '; - *to++ = 'c'; - *to++ = 'o'; - *to++ = 'l'; - *to++ = 'o'; - *to++ = 'r'; - *to++ = '='; - *to++ = '\"'; - *to++ = '#'; - *to++ = 'f'; - *to++ = 'f'; - *to++ = '0'; - *to++ = '0'; - *to++ = '0'; - *to++ = '0'; - *to++ = '\"'; - *to++ = '>'; - continue; + text += "<font color=\"#ff0000\">"; + continue; case 'r': // words of Jesus end - *to++ = '<'; - *to++ = '/'; - *to++ = 'f'; - *to++ = 'o'; - *to++ = 'n'; - *to++ = 't'; - *to++ = '>'; - continue; + text += "</font>"; + continue; case 'U': // Underline start - *to++ = '<'; - *to++ = 'u'; - *to++ = '>'; - continue; + text += "<u>"; + continue; case 'u': // Underline end - *to++ = '<'; - *to++ = '/'; - *to++ = 'u'; - *to++ = '>'; - continue; + text += "</u>"; + continue; case 'O': // Old Testament quote begin - *to++ = '<'; - *to++ = 'c'; - *to++ = 'i'; - *to++ = 't'; - *to++ = 'e'; - *to++ = '>'; - continue; + text += "<cite>"; + continue; case 'o': // Old Testament quote end - *to++ = '<'; - *to++ = '/'; - *to++ = 'c'; - *to++ = 'i'; - *to++ = 't'; - *to++ = 'e'; - *to++ = '>'; - continue; + text += "</cite>"; + continue; case 'S': // Superscript begin - *to++ = '<'; - *to++ = 's'; - *to++ = 'u'; - *to++ = 'p'; - *to++ = '>'; - continue; + text += "<sup>"; + continue; case 's': // Superscript end - *to++ = '<'; - *to++ = '/'; - *to++ = 's'; - *to++ = 'u'; - *to++ = 'p'; - *to++ = '>'; - continue; + text += "</sup>"; + continue; case 'V': // Subscript begin - *to++ = '<'; - *to++ = 's'; - *to++ = 'u'; - *to++ = 'b'; - *to++ = '>'; - continue; + text += "<sub>"; + continue; case 'v': // Subscript end - *to++ = '<'; - *to++ = '/'; - *to++ = 's'; - *to++ = 'u'; - *to++ = 'b'; - *to++ = '>'; - continue; + text += "</sub>"; + continue; } break; case 'C': // special character tags switch(token[1]) { case 'A': // ASCII value - *to++ = (char)atoi(&token[2]); + text += (char)atoi(&token[2]); continue; case 'G': //*to++ = ' '; continue; case 'L': // line break - *to++ = '<'; - *to++ = 'b'; - *to++ = 'r'; - *to++ = ' '; - *to++ = '/'; - *to++ = '>'; - *to++ = ' '; + text += "<br /> "; continue; case 'M': // new paragraph - *to++ = '<'; - *to++ = 'p'; - *to++ = ' '; - *to++ = '/'; - *to++ = '>'; + text += "<p />"; continue; case 'T': //*to++ = ' '; @@ -372,76 +174,28 @@ char GBFThML::ProcessText(char *text, int maxlen) switch(token[1]) { case 'T': // Book title begin - *to++ = '<'; - *to++ = 'b'; - *to++ = 'i'; - *to++ = 'g'; - *to++ = '>'; - continue; + text += "<big>"; + continue; case 't': - *to++ = '<'; - *to++ = '/'; - *to++ = 'b'; - *to++ = 'i'; - *to++ = 'g'; - *to++ = '>'; - continue; + text += "</big>"; + continue; case 'S': - *to++ = '<'; - *to++ = 'd'; - *to++ = 'i'; - *to++ = 'v'; - *to++ = ' '; - *to++ = 'c'; - *to++ = 'l'; - *to++ = 'a'; - *to++ = 's'; - *to++ = 's'; - *to++ = '='; - *to++ = '\"'; - *to++ = 's'; - *to++ = 'e'; - *to++ = 'c'; - *to++ = 'h'; - *to++ = 'e'; - *to++ = 'a'; - *to++ = 'd'; - *to++ = '\"'; - *to++ = '>'; - continue; + text += "<div class=\"sechead\">"; + continue; case 's': - *to++ = '<'; - *to++ = '/'; - *to++ = 'd'; - *to++ = 'i'; - *to++ = 'v'; - *to++ = '>'; - continue; + text += "</div>"; + continue; } break; case 'P': // special formatting - switch(token[1]) - { + switch(token[1]) { case 'P': // Poetry begin - *to++ = '<'; - *to++ = 'v'; - *to++ = 'e'; - *to++ = 'r'; - *to++ = 's'; - *to++ = 'e'; - *to++ = '>'; - continue; + text += "<verse>"; + continue; case 'p': - *to++ = '<'; - *to++ = '/'; - *to++ = 'v'; - *to++ = 'e'; - *to++ = 'r'; - *to++ = 's'; - *to++ = 'e'; - *to++ = '>'; - continue; + text += "</verse>"; + continue; } break; } @@ -452,12 +206,11 @@ char GBFThML::ProcessText(char *text, int maxlen) token[tokpos++] = *from; token[tokpos+2] = 0; } - else *to++ = *from; + else text += *from; } - *to++ = 0; - *to = 0; return 0; } +SWORD_NAMESPACE_END diff --git a/src/modules/filters/gbfwebif.cpp b/src/modules/filters/gbfwebif.cpp new file mode 100644 index 0000000..131f359 --- /dev/null +++ b/src/modules/filters/gbfwebif.cpp @@ -0,0 +1,161 @@ +/*************************************************************************** + GBFWEBIF.cpp - GBF to HTML filter with hrefs + for strongs and morph tags + ------------------- + begin : 2001-09-03 + copyright : 2001 by CrossWire Bible Society + ***************************************************************************/ + +/*************************************************************************** + * * + * This program is free software; you can redistribute it and/or modify * + * it under the terms of the GNU General Public License as published by * + * the Free Software Foundation; either version 2 of the License, or * + * (at your option) any later version. * + * * + ***************************************************************************/ + +#include <gbfwebif.h> +#include <ctype.h> +#include <utilweb.h> + +SWORD_NAMESPACE_START + +GBFWEBIF::GBFWEBIF() : baseURL(""), passageStudyURL(baseURL + "passagestudy.jsp") { +//all is done in GBFHTMLHREF since it inherits form this class + addTokenSubstitute("FR", "<span class=\"wordsOfJesus\">"); // words of Jesus begin + addTokenSubstitute("Fr", "</span>"); +} + +bool GBFWEBIF::handleToken(SWBuf &buf, const char *token, BasicFilterUserData *userData) { + const char *tok; + char val[128]; + char *valto; + const char *num; + SWBuf url; + + if (!substituteToken(buf, token)) { + if (!strncmp(token, "w", 1)) { + // OSIS Word (temporary until OSISRTF is done) + valto = val; + num = strstr(token, "lemma=\"x-Strongs:"); + + if (num) { + for (num+=17; ((*num) && (*num != '\"')); num++) + *valto++ = *num; + *valto = 0; + + if (atoi((!isdigit(*val))?val+1:val) < 5627) { + buf += " <small><em><"; + url = ""; + for (tok = val; *tok; tok++) { + url += *tok; + } + if ((url.length() > 1) && strchr("GH", url[0])) { + if (isdigit(url[1])) + url = url.c_str()+1; + } + buf.appendFormatted("<a href=\"%s?showStrong=%s#cv\">", passageStudyURL.c_str(), encodeURL(url).c_str()); + + for (tok = (!isdigit(*val))?val+1:val; *tok; tok++) { + buf += *tok; + } + buf += "</a>></em></small> "; + } + } + valto = val; + num = strstr(token, "morph=\"x-Robinson:"); + if (num) { + for (num+=18; ((*num) && (*num != '\"')); num++) + *valto++ = *num; + *valto = 0; + buf += " <small><em>("; + url = ""; + for (tok = val; *tok; tok++) { + // normal robinsons tense + buf += *tok; + } + buf.appendFormatted("<a href=\"%s?showMorph=%s#cv\">", passageStudyURL.c_str(), encodeURL(url).c_str()); + + for (tok = val; *tok; tok++) { + buf += *tok; + } + buf += "</a>)</em></small> "; + } + } + + else if (!strncmp(token, "WG", 2) || !strncmp(token, "WH", 2)) { // strong's numbers + buf += " <small><em><"; + url = ""; + + for (tok = token+1; *tok; tok++) { + url += *tok; + } + if ((url.length() > 1) && strchr("GH", url[0])) { + if (isdigit(url[1])) + url = url.c_str()+1; + } + buf.appendFormatted("<a href=\"%s?showStrong=%s#cv\">", passageStudyURL.c_str(), encodeURL(url).c_str()); + + for (tok = token + 2; *tok; tok++) { + buf += *tok; + } + buf += "</a>></em></small>"; + } + + else if (!strncmp(token, "WTG", 3) || !strncmp(token, "WTH", 3)) { // strong's numbers tense + buf += " <small><em>("; + url = ""; + for (tok = token + 2; *tok; tok++) { + if(*tok != '\"') + url += *tok; + } + if ((url.length() > 1) && strchr("GH", url[0])) { + if (isdigit(url[1])) + url = url.c_str()+1; + } + buf.appendFormatted("<a href=\"%s?showStrong=%s#cv\">", passageStudyURL.c_str(), encodeURL(url).c_str()); + + for (tok = token + 3; *tok; tok++) + if(*tok != '\"') + buf += *tok; + buf += "</a>)</em></small>"; + } + + else if (!strncmp(token, "WT", 2) && strncmp(token, "WTH", 3) && strncmp(token, "WTG", 3)) { // morph tags + buf += " <small><em>("; + for (tok = token + 2; *tok; tok++) { + if(*tok != '\"') + buf += *tok; + } + buf.appendFormatted("<a href=\"%s?showMorph=%s#cv\">", passageStudyURL.c_str(), encodeURL(url).c_str()); + + for (tok = token + 2; *tok; tok++) { + if(*tok != '\"') + buf += *tok; + } + buf += "</a>)</em></small>"; + } + + else if (!strncmp(token, "RX", 2)) { + buf += "<a href=\""; + for (tok = token + 3; *tok; tok++) { + if(*tok != '<' && *tok+1 != 'R' && *tok+2 != 'x') { + buf += *tok; + } + else { + break; + } + } + + buf.appendFormatted("a href=\"%s?key=%s#cv\">", passageStudyURL.c_str(), encodeURL(url).c_str()); + } + + else { + return GBFHTMLHREF::handleToken(buf, token, userData); + } + } + return true; +} + +SWORD_NAMESPACE_END diff --git a/src/modules/filters/greeklexattribs.cpp b/src/modules/filters/greeklexattribs.cpp index fb166df..073ba39 100644 --- a/src/modules/filters/greeklexattribs.cpp +++ b/src/modules/filters/greeklexattribs.cpp @@ -1,46 +1,87 @@ /****************************************************************************** * - * greeklexattribs - SWFilter decendant to set entry attributes for greek - * lexicons + * greeklexattribs - SWFilter descendant to set entry attributes for greek + * lexicons */ #include <stdlib.h> -#include <string.h> +#include <ctype.h> +#include <string> #include <greeklexattribs.h> #include <swmodule.h> +using std::string; + +SWORD_NAMESPACE_START GreekLexAttribs::GreekLexAttribs() { } -char GreekLexAttribs::ProcessText(char *text, int maxlen, const SWKey *key, const SWModule *module) { +char GreekLexAttribs::processText(SWBuf &text, const SWKey *key, const SWModule *module) { if (module->isProcessEntryAttributes()) { - char *from; + const char *from; bool inAV = false; string phrase; string freq; - char *currentPhrase = 0; + char val[128], *valto; + char wordstr[7]; + const char *currentPhrase = 0, *ch = 0; + const char *currentPhraseEnd = 0; + int number = 0; - for (from = text; *from; from++) { + for (from = text.c_str(); *from; from++) { if (inAV) { if (currentPhrase == 0) { if (isalpha(*from)) currentPhrase = from; } else { - if ((!isalpha(*from)) && (*from != ' ')) { + if ((!isalpha(*from)) && (*from != ' ') && (*from != '+') && (*from !='(') && (*from != ')') && (*from != '\'')) { + if (*from == '<') { + if (!currentPhraseEnd) + currentPhraseEnd = from - 1; + for (; *from && *from != '>'; from++) { + if (!strncmp(from, "value=\"", 7)) { + valto = val; + from += 7; + for (unsigned int i = 0; from[i] != '\"' && i < 127; i++) + *valto++ = from[i]; + *valto = 0; + sprintf(wordstr, "%03d", number+1); + module->getEntryAttributes()["AVPhrase"][wordstr]["CompoundedWith"] = val; + from += strlen(val); + } + } + continue; + } + phrase = ""; - phrase.append(currentPhrase, (int)(from - currentPhrase)-1); + phrase.append(currentPhrase, (int)(((currentPhraseEnd>currentPhrase)?currentPhraseEnd:from) - currentPhrase)-1); currentPhrase = from; while (*from && isdigit(*from)) from++; freq = ""; freq.append(currentPhrase, (int)(from - currentPhrase)); - module->getEntryAttributes()["AVPhrase"][phrase]["Frequency"] = freq; - currentPhrase = 0; + if ((freq.length() > 0) && (phrase.length() > 0)) { + sprintf(wordstr, "%03d", ++number); + if ((strchr(phrase.c_str(), '(') > phrase.c_str()) && (strchr(phrase.c_str(), ')') > phrase.c_str() + 1)) { + string tmp = phrase.substr(0, phrase.find_first_of("(")); + phrase.erase(phrase.find_first_of("("), 1); + phrase.erase(phrase.find_first_of(")"), 1); + phrase.erase(0,phrase.find_first_not_of("\r\n\v\t ")); phrase.erase(phrase.find_last_not_of("\r\n\v\t ")+1); + module->getEntryAttributes()["AVPhrase"][wordstr]["Alt"] = phrase.c_str(); + phrase = tmp; + } + phrase.erase(0,phrase.find_first_not_of("\r\n\v\t ")); phrase.erase(phrase.find_last_not_of("\r\n\v\t ")+1); + freq.erase(0,freq.find_first_not_of("\r\n\v\t ")); freq.erase(freq.find_last_not_of("\r\n\v\t ")+1); + module->getEntryAttributes()["AVPhrase"][wordstr]["Phrase"] = phrase.c_str(); + module->getEntryAttributes()["AVPhrase"][wordstr]["Frequency"] = freq.c_str(); + currentPhrase = 0; + currentPhraseEnd = 0; + } } } if (*from == ';') inAV = false; @@ -56,3 +97,4 @@ char GreekLexAttribs::ProcessText(char *text, int maxlen, const SWKey *key, cons } +SWORD_NAMESPACE_END diff --git a/src/modules/filters/latin1utf16.cpp b/src/modules/filters/latin1utf16.cpp index 75ee998..8067381 100644 --- a/src/modules/filters/latin1utf16.cpp +++ b/src/modules/filters/latin1utf16.cpp @@ -1,6 +1,6 @@ /****************************************************************************** * - * Latin1UTF16 - SWFilter decendant to convert a Latin-1 character to UTF-16 + * Latin1UTF16 - SWFilter descendant to convert a Latin-1 character to UTF-16 * */ @@ -9,112 +9,111 @@ #include <stdio.h> #include <latin1utf16.h> +SWORD_NAMESPACE_START + Latin1UTF16::Latin1UTF16() { } -char Latin1UTF16::ProcessText(char *text, int maxlen, const SWKey *key, const SWModule *module) -{ - unsigned char *from; - unsigned short *to; +char Latin1UTF16::processText(SWBuf &text, const SWKey *key, const SWModule *module) { + const unsigned char *from; int len; + if ((unsigned long)key < 2) // hack, we're en(1)/de(0)ciphering + return -1; + - len = strlen(text) + 1; // shift string to right of buffer - if (len < maxlen) { - memmove(&text[maxlen - len], text, len); - from = (unsigned char*)&text[maxlen - len]; - } - else - from = (unsigned char*)text; - // ------------------------------- - - for (to = (unsigned short*)text; *from; from++) { - switch (*from) { + SWBuf orig = text; + from = (const unsigned char *)orig.c_str(); + + for (text = ""; *from; from++) { + text.setSize(text.size()+2); + switch (*from) { case 0x80: // '€' - *to++ = 0x20AC; + *((unsigned short *)(text.getRawData()+(text.size()-2))) = (unsigned short) 0x20AC; break; case 0x82: // '‚' - *to++ = 0x201A; + *((unsigned short *)(text.getRawData()+(text.size()-2))) = (unsigned short) 0x201A; break; case 0x83: // 'ƒ' - *to++ = 0x0192; + *((unsigned short *)(text.getRawData()+(text.size()-2))) = (unsigned short) 0x0192; break; case 0x84: // '„' - *to++ = 0x201E; + *((unsigned short *)(text.getRawData()+(text.size()-2))) = (unsigned short) 0x201E; break; case 0x85: // '…' - *to++ = 0x2026; + *((unsigned short *)(text.getRawData()+(text.size()-2))) = (unsigned short) 0x2026; break; case 0x86: // '†' - *to++ = 0x2020; + *((unsigned short *)(text.getRawData()+(text.size()-2))) = (unsigned short) 0x2020; break; case 0x87: // '‡' - *to++ = 0x2021; + *((unsigned short *)(text.getRawData()+(text.size()-2))) = (unsigned short) 0x2021; break; case 0x88: // 'ˆ' - *to++ = 0x02C6; + *((unsigned short *)(text.getRawData()+(text.size()-2))) = (unsigned short) 0x02C6; break; case 0x89: // '‰' - *to++ = 0x2030; + *((unsigned short *)(text.getRawData()+(text.size()-2))) = (unsigned short) 0x2030; break; case 0x8A: // 'Š' - *to++ = 0x0160; + *((unsigned short *)(text.getRawData()+(text.size()-2))) = (unsigned short) 0x0160; break; case 0x8B: // '‹' - *to++ = 0x2039; + *((unsigned short *)(text.getRawData()+(text.size()-2))) = (unsigned short) 0x2039; break; case 0x8C: // 'Œ' - *to++ = 0x0152; + *((unsigned short *)(text.getRawData()+(text.size()-2))) = (unsigned short) 0x0152; break; case 0x8E: // 'Ž' - *to++ = 0x017D; + *((unsigned short *)(text.getRawData()+(text.size()-2))) = (unsigned short) 0x017D; break; case 0x91: // '‘' - *to++ = 0x2018; + *((unsigned short *)(text.getRawData()+(text.size()-2))) = (unsigned short) 0x2018; break; case 0x92: // '’' - *to++ = 0x2019; + *((unsigned short *)(text.getRawData()+(text.size()-2))) = (unsigned short) 0x2019; break; case 0x93: // '“' - *to++ = 0x201C; + *((unsigned short *)(text.getRawData()+(text.size()-2))) = (unsigned short) 0x201C; break; case 0x94: // '”' - *to++ = 0x201D; + *((unsigned short *)(text.getRawData()+(text.size()-2))) = (unsigned short) 0x201D; break; case 0x95: // '•' - *to++ = 0x2022; + *((unsigned short *)(text.getRawData()+(text.size()-2))) = (unsigned short) 0x2022; break; case 0x96: // '–' - *to++ = 0x2013; + *((unsigned short *)(text.getRawData()+(text.size()-2))) = (unsigned short) 0x2013; break; case 0x97: // '—' - *to++ = 0x2014; + *((unsigned short *)(text.getRawData()+(text.size()-2))) = (unsigned short) 0x2014; break; case 0x98: // '˜' - *to++ = 0x02DC; + *((unsigned short *)(text.getRawData()+(text.size()-2))) = (unsigned short) 0x02DC; break; case 0x99: // '™' - *to++ = 0x2122; + *((unsigned short *)(text.getRawData()+(text.size()-2))) = (unsigned short) 0x2122; break; case 0x9A: // 'š' - *to++ = 0x0161; + *((unsigned short *)(text.getRawData()+(text.size()-2))) = (unsigned short) 0x0161; break; case 0x9B: // '›' - *to++ = 0x203A; + *((unsigned short *)(text.getRawData()+(text.size()-2))) = (unsigned short) 0x203A; break; case 0x9C: // 'œ' - *to++ = 0x0153; + *((unsigned short *)(text.getRawData()+(text.size()-2))) = (unsigned short) 0x0153; break; case 0x9E: // 'ž' - *to++ = 0x017E; + *((unsigned short *)(text.getRawData()+(text.size()-2))) = (unsigned short) 0x017E; break; case 0x9F: // 'Ÿ' - *to++ = 0x0178; + *((unsigned short *)(text.getRawData()+(text.size()-2))) = (unsigned short) 0x0178; break; - default: - *to++ = (unsigned short)*from; - } + default: + *((unsigned short *)(text.getRawData()+(text.size()-2))) = (unsigned short) *from; + } } - *to = 0; return 0; } + +SWORD_NAMESPACE_END diff --git a/src/modules/filters/latin1utf8.cpp b/src/modules/filters/latin1utf8.cpp index 91af8dc..6d7d87b 100644 --- a/src/modules/filters/latin1utf8.cpp +++ b/src/modules/filters/latin1utf8.cpp @@ -1,6 +1,6 @@ /****************************************************************************** * - * Latin1UTF8 - SWFilter decendant to convert a Latin-1 character to UTF-8 + * Latin1UTF8 - SWFilter descendant to convert a Latin-1 character to UTF-8 * */ @@ -10,170 +10,164 @@ #include <latin1utf8.h> #include <swmodule.h> +SWORD_NAMESPACE_START + Latin1UTF8::Latin1UTF8() { } -char Latin1UTF8::ProcessText(char *text, int maxlen, const SWKey *key, const SWModule *module) +char Latin1UTF8::processText(SWBuf &text, const SWKey *key, const SWModule *module) { - unsigned char *to, *from; - int len; - - len = strlen(text) + 1; - if (len == maxlen + 1) - maxlen = (maxlen + 1) * FILTERPAD; - // shift string to right of buffer - if (len < maxlen) { - memmove(&text[maxlen - len], text, len); - from = (unsigned char*)&text[maxlen - len]; - } - else - from = (unsigned char*)text; // ------------------------------- + const unsigned char *from; + if ((unsigned long)key < 2) // hack, we're en(1)/de(0)ciphering + return -1; + SWBuf orig = text; + from = (const unsigned char *)orig.c_str(); - for (to = (unsigned char*)text; *from; from++) { + for (text = ""; *from; from++) { if (*from < 0x80) { - *to++ = *from; + text += *from; } else if (*from < 0xc0) { switch(*from) { case 0x80: // '€' - *to++ = 0xe2; // 'â' - *to++ = 0x82; // '‚' - *to++ = 0xac; // '¬' + text += 0xe2; // 'â' + text += 0x82; // '‚' + text += 0xac; // '¬' break; case 0x82: // '‚' - *to++ = 0xe2; // 'â' - *to++ = 0x80; // '€' - *to++ = 0x9a; // 'š' + text += 0xe2; // 'â' + text += 0x80; // '€' + text += 0x9a; // 'š' break; case 0x83: // 'ƒ' - *to++ = 0xc6; // 'Æ' - *to++ = 0x92; // '’' + text += 0xc6; // 'Æ' + text += 0x92; // '’' break; case 0x84: // '„' - *to++ = 0xe2; // 'â' - *to++ = 0x80; // '€' - *to++ = 0x9e; // 'ž' + text += 0xe2; // 'â' + text += 0x80; // '€' + text += 0x9e; // 'ž' break; case 0x85: // '…' - *to++ = 0xe2; // 'â' - *to++ = 0x80; // '€' - *to++ = 0xa6; // '¦' + text += 0xe2; // 'â' + text += 0x80; // '€' + text += 0xa6; // '¦' break; case 0x86: // '†' - *to++ = 0xe2; // 'â' - *to++ = 0x80; // '€' - *to++ = 0xa0; // ' ' + text += 0xe2; // 'â' + text += 0x80; // '€' + text += 0xa0; // ' ' break; case 0x87: // '‡' - *to++ = 0xe2; // 'â' - *to++ = 0x80; // '€' - *to++ = 0xa1; // '¡' + text += 0xe2; // 'â' + text += 0x80; // '€' + text += 0xa1; // '¡' break; case 0x88: // 'ˆ' - *to++ = 0xcb; // 'Ë' - *to++ = 0x86; // '†' + text += 0xcb; // 'Ë' + text += 0x86; // '†' break; case 0x89: // '‰' - *to++ = 0xe2; // 'â' - *to++ = 0x80; // '€' - *to++ = 0xb0; // '°' + text += 0xe2; // 'â' + text += 0x80; // '€' + text += 0xb0; // '°' break; case 0x8A: // 'Š' - *to++ = 0xc5; // 'Å' - *to++ = 0xa0; // ' ' + text += 0xc5; // 'Å' + text += 0xa0; // ' ' break; case 0x8B: // '‹' - *to++ = 0xe2; // 'â' - *to++ = 0x80; // '€' - *to++ = 0xb9; // '¹' + text += 0xe2; // 'â' + text += 0x80; // '€' + text += 0xb9; // '¹' break; case 0x8C: // 'Œ' - *to++ = 0xc5; // 'Å' - *to++ = 0x92; // '’' + text += 0xc5; // 'Å' + text += 0x92; // '’' break; case 0x8E: // 'Ž' - *to++ = 0xc5; // 'Å' - *to++ = 0xbd; // '½' + text += 0xc5; // 'Å' + text += 0xbd; // '½' break; case 0x91: // '‘' - *to++ = 0xe2; // 'â' - *to++ = 0x80; // '€' - *to++ = 0x98; // '˜' + text += 0xe2; // 'â' + text += 0x80; // '€' + text += 0x98; // '˜' break; case 0x92: // '’' - *to++ = 0xe2; // 'â' - *to++ = 0x80; // '€' - *to++ = 0x99; // '™' + text += 0xe2; // 'â' + text += 0x80; // '€' + text += 0x99; // '™' break; case 0x93: // '“' - *to++ = 0xe2; // 'â' - *to++ = 0x80; // '€' - *to++ = 0x9c; // 'œ' + text += 0xe2; // 'â' + text += 0x80; // '€' + text += 0x9c; // 'œ' break; case 0x94: // '”' - *to++ = 0xe2; // 'â' - *to++ = 0x80; // '€' - *to++ = 0x9d; // '' + text += 0xe2; // 'â' + text += 0x80; // '€' + text += 0x9d; // '' break; case 0x95: // '•' - *to++ = 0xe2; // 'â' - *to++ = 0x80; // '€' - *to++ = 0xa2; // '¢' + text += 0xe2; // 'â' + text += 0x80; // '€' + text += 0xa2; // '¢' break; case 0x96: // '–' - *to++ = 0xe2; // 'â' - *to++ = 0x80; // '€' - *to++ = 0x93; // '“' + text += 0xe2; // 'â' + text += 0x80; // '€' + text += 0x93; // '“' break; case 0x97: // '—' - *to++ = 0xe2; // 'â' - *to++ = 0x80; // '€' - *to++ = 0x94; // '”' + text += 0xe2; // 'â' + text += 0x80; // '€' + text += 0x94; // '”' break; case 0x98: // '˜' - *to++ = 0xcb; // 'Ë' - *to++ = 0x9c; // 'œ' + text += 0xcb; // 'Ë' + text += 0x9c; // 'œ' break; case 0x99: // '™' - *to++ = 0xe2; // 'â' - *to++ = 0x84; // '„' - *to++ = 0xa2; // '¢' + text += 0xe2; // 'â' + text += 0x84; // '„' + text += 0xa2; // '¢' break; case 0x9A: // 'š' - *to++ = 0xc5; // 'Å' - *to++ = 0xa1; // '¡' + text += 0xc5; // 'Å' + text += 0xa1; // '¡' break; case 0x9B: // '›' - *to++ = 0xe2; // 'â' - *to++ = 0x80; // '€' - *to++ = 0xba; // 'º' + text += 0xe2; // 'â' + text += 0x80; // '€' + text += 0xba; // 'º' break; case 0x9C: // 'œ' - *to++ = 0xc5; // 'Å' - *to++ = 0x93; // '“' + text += 0xc5; // 'Å' + text += 0x93; // '“' break; case 0x9E: // 'ž' - *to++ = 0xc5; // 'Å' - *to++ = 0xbe; // '¾' + text += 0xc5; // 'Å' + text += 0xbe; // '¾' break; case 0x9F: // 'Ÿ' - *to++ = 0xc5; // 'Å' - *to++ = 0xb8; // '¸' + text += 0xc5; // 'Å' + text += 0xb8; // '¸' break; default: - *to++ = 0xC2; - *to++ = *from; + text += 0xC2; + text += *from; } } else { - *to++ = 0xC3; - *to++ = (*from - 0x40); + text += 0xC3; + text += (*from - 0x40); } } - *to++ = 0; - *to = 0; return 0; } + +SWORD_NAMESPACE_END diff --git a/src/modules/filters/osisfootnotes.cpp b/src/modules/filters/osisfootnotes.cpp new file mode 100644 index 0000000..59a7cae --- /dev/null +++ b/src/modules/filters/osisfootnotes.cpp @@ -0,0 +1,138 @@ +/****************************************************************************** + * + * osisfootnotes - SWFilter descendant to hide or show footnotes + * in an OSIS module. + */ + + +#include <stdlib.h> +#include <osisfootnotes.h> +#include <swmodule.h> +#include <swbuf.h> +#include <versekey.h> +#include <utilxml.h> +#ifndef __GNUC__ +#else +#include <unixstr.h> +#endif + +SWORD_NAMESPACE_START + +const char oName[] = "Footnotes"; +const char oTip[] = "Toggles Footnotes On and Off if they exist"; + +const SWBuf choices[3] = {"On", "Off", ""}; +const StringList oValues(&choices[0], &choices[2]); + +OSISFootnotes::OSISFootnotes() : SWOptionFilter(oName, oTip, &oValues) { + setOptionValue("Off"); +} + + +OSISFootnotes::~OSISFootnotes() { +} + + +char OSISFootnotes::processText(SWBuf &text, const SWKey *key, const SWModule *module) { + SWBuf token; + bool intoken = false; + bool hide = false; + SWBuf tagText; + XMLTag startTag; + SWBuf refs = ""; + int footnoteNum = 1; + char buf[254]; + VerseKey parser = key->getText(); + + SWBuf orig = text; + const char *from = orig.c_str(); + + for (text = ""; *from; from++) { + + // remove all newlines temporarily to fix kjv2003 module + if ((*from == 10) || (*from == 13)) { + if ((text.length()>1) && (text[text.length()-2] != ' ') && (*(from+1) != ' ')) + text += ' '; + continue; + } + + + if (*from == '<') { + intoken = true; + token = ""; + continue; + } + if (*from == '>') { // process tokens + intoken = false; + + XMLTag tag(token); + if (!strcmp(tag.getName(), "note")) { + if (!tag.isEndTag()) { + if (SWBuf("strongsMarkup") == tag.getAttribute("type")) { // handle bug in KJV2003 module where some note open tags were <note ... /> + tag.setEmpty(false); + } + if (!tag.isEmpty()) { +// if ((!tag.isEmpty()) || (SWBuf("strongsMarkup") == tag.getAttribute("type"))) { + refs = ""; + startTag = tag; + hide = true; + tagText = ""; + continue; + } + } + if (hide && tag.isEndTag()) { + if (module->isProcessEntryAttributes()) { + sprintf(buf, "%i", footnoteNum++); + StringList attributes = startTag.getAttributeNames(); + for (StringList::iterator it = attributes.begin(); it != attributes.end(); it++) { + module->getEntryAttributes()["Footnote"][buf][it->c_str()] = startTag.getAttribute(it->c_str()); + } + module->getEntryAttributes()["Footnote"][buf]["body"] = tagText; + startTag.setAttribute("swordFootnote", buf); + if ((startTag.getAttribute("type")) && (!strcmp(startTag.getAttribute("type"), "crossReference"))) { + if (!refs.length()) + refs = parser.ParseVerseList(tagText.c_str(), parser, true).getRangeText(); + module->getEntryAttributes()["Footnote"][buf]["refList"] = refs.c_str(); + } + } + hide = false; + if ((option) || ((startTag.getAttribute("type") && (!strcmp(startTag.getAttribute("type"), "crossReference"))))) { // we want the tag in the text; crossReferences are handled by another filter + text += startTag; + text.append(tagText); + } + else continue; + } + } + + // if not a heading token, keep token in text + if ((!strcmp(tag.getName(), "reference")) && (!tag.isEndTag())) { + SWBuf osisRef = tag.getAttribute("osisRef"); + if (refs.length()) + refs += "; "; + refs += osisRef; + } + if (!hide) { + text += '<'; + text.append(token); + text += '>'; + } + else { + tagText += '<'; + tagText.append(token); + tagText += '>'; + } + continue; + } + if (intoken) { //copy token + token += *from; + } + else if (!hide) { //copy text which is not inside a token + text += *from; + } + else tagText += *from; + } + return 0; +} + +SWORD_NAMESPACE_END + diff --git a/src/modules/filters/osisheadings.cpp b/src/modules/filters/osisheadings.cpp new file mode 100644 index 0000000..8ec9ce3 --- /dev/null +++ b/src/modules/filters/osisheadings.cpp @@ -0,0 +1,118 @@ +/****************************************************************************** + * + *osisheadings - SWFilter descendant to hide or show headings + * in an OSIS module. + */ + + +#include <stdlib.h> +#include <osisheadings.h> +#include <swmodule.h> +#include <utilxml.h> +#ifndef __GNUC__ +#else +#include <unixstr.h> +#endif + +SWORD_NAMESPACE_START + +const char oName[] = "Headings"; +const char oTip[] = "Toggles Headings On and Off if they exist"; + +const SWBuf choices[3] = {"On", "Off", ""}; +const StringList oValues(&choices[0], &choices[2]); + +OSISHeadings::OSISHeadings() : SWOptionFilter(oName, oTip, &oValues) { + setOptionValue("Off"); +} + + +OSISHeadings::~OSISHeadings() { +} + + +char OSISHeadings::processText(SWBuf &text, const SWKey *key, const SWModule *module) { + SWBuf token; + bool intoken = false; + bool hide = false; + bool preverse = false; + SWBuf header; + int headerNum = 0; + int pvHeaderNum = 0; + char buf[254]; + + SWBuf orig = text; + const char *from = orig.c_str(); + + for (text = ""; *from; from++) { + if (*from == '<') { + intoken = true; + token = ""; + continue; + } + if (*from == '>') { // process tokens + intoken = false; + + XMLTag tag(token); + if (!stricmp(tag.getName(), "title")) { + if ((tag.getAttribute("subtype")) && (!stricmp(tag.getAttribute("subtype"), "x-preverse"))) { + hide = true; + preverse = true; + header = ""; + continue; + } + if (!tag.isEndTag()) { + hide = true; + header = ""; + if (option) { // we want the tag in the text + text += '<'; + text.append(token); + text += '>'; + } + continue; + } + if (hide && tag.isEndTag()) { + + if (module->isProcessEntryAttributes() && option) { + if (preverse) { + sprintf(buf, "%i", pvHeaderNum++); + module->getEntryAttributes()["Heading"]["Preverse"][buf] = header; + } + else { + sprintf(buf, "%i", headerNum++); + module->getEntryAttributes()["Heading"]["Interverse"][buf] = header; + if (option) { // we want the tag in the text + text.append(header); + } + } + } + hide = false; + if ((!option) || (preverse)) { // we don't want the tag in the text anymore + preverse = false; + continue; + } + preverse = false; + } + } + + // if not a heading token, keep token in text + if (!hide) { + text += '<'; + text.append(token); + text += '>'; + } + continue; + } + if (intoken) { //copy token + token += *from; + } + else if (!hide) { //copy text which is not inside a token + text += *from; + } + else header += *from; + } + return 0; +} + +SWORD_NAMESPACE_END + diff --git a/src/modules/filters/osishtmlhref.cpp b/src/modules/filters/osishtmlhref.cpp new file mode 100644 index 0000000..26e6dff --- /dev/null +++ b/src/modules/filters/osishtmlhref.cpp @@ -0,0 +1,339 @@ +/*************************************************************************** + osishtmlhref.cpp - OSIS to HTML with hrefs filter + ------------------- + begin : 2003-06-24 + copyright : 2003 by CrossWire Bible Society + ***************************************************************************/ + +/*************************************************************************** + * * + * This program is free software; you can redistribute it and/or modify * + * it under the terms of the GNU General Public License as published by * + * the Free Software Foundation; either version 2 of the License, or * + * (at your option) any later version. * + * * + ***************************************************************************/ + +#include <stdlib.h> +#include <osishtmlhref.h> +#include <utilxml.h> +#include <versekey.h> +#include <swmodule.h> + +SWORD_NAMESPACE_START + + +OSISHTMLHREF::MyUserData::MyUserData(const SWModule *module, const SWKey *key) : BasicFilterUserData(module, key) { + osisQToTick = ((!module->getConfigEntry("OSISqToTick")) || (strcmp(module->getConfigEntry("OSISqToTick"), "false"))); +} + + +OSISHTMLHREF::OSISHTMLHREF() { + setTokenStart("<"); + setTokenEnd(">"); + + setEscapeStart("&"); + setEscapeEnd(";"); + + setEscapeStringCaseSensitive(true); + + addEscapeStringSubstitute("amp", "&"); + addEscapeStringSubstitute("apos", "'"); + addEscapeStringSubstitute("lt", "<"); + addEscapeStringSubstitute("gt", ">"); + addEscapeStringSubstitute("quot", "\""); + addTokenSubstitute("lg", "<br />"); + addTokenSubstitute("/lg", "<br />"); + + setTokenCaseSensitive(true); +} + + +bool OSISHTMLHREF::handleToken(SWBuf &buf, const char *token, BasicFilterUserData *userData) { + // manually process if it wasn't a simple substitution + if (!substituteToken(buf, token)) { + MyUserData *u = (MyUserData *)userData; + XMLTag tag(token); + + // <w> tag + if (!strcmp(tag.getName(), "w")) { + + // start <w> tag + if ((!tag.isEmpty()) && (!tag.isEndTag())) { + u->w = token; + } + + // end or empty <w> tag + else { + bool endTag = tag.isEndTag(); + SWBuf lastText; + bool show = true; // to handle unplaced article in kjv2003-- temporary till combined + + if (endTag) { + tag = u->w.c_str(); + lastText = u->lastTextNode.c_str(); + } + else lastText = "stuff"; + + const char *attrib; + const char *val; + if (attrib = tag.getAttribute("xlit")) { + val = strchr(attrib, ':'); + val = (val) ? (val + 1) : attrib; + buf.appendFormatted(" %s", val); + } + if (attrib = tag.getAttribute("gloss")) { + val = strchr(attrib, ':'); + val = (val) ? (val + 1) : attrib; + buf.appendFormatted(" %s", val); + } + if (attrib = tag.getAttribute("lemma")) { + int count = tag.getAttributePartCount("lemma"); + int i = (count > 1) ? 0 : -1; // -1 for whole value cuz it's faster, but does the same thing as 0 + do { + attrib = tag.getAttribute("lemma", i); + if (i < 0) i = 0; // to handle our -1 condition + val = strchr(attrib, ':'); + val = (val) ? (val + 1) : attrib; + const char *val2 = val; + if ((strchr("GH", *val)) && (isdigit(val[1]))) + val2++; + if ((!strcmp(val2, "3588")) && (lastText.length() < 1)) + show = false; + else buf.appendFormatted(" <small><em><<a href=\"type=Strongs value=%s\">%s</a>></em></small> ", val, val2); + } while (++i < count); + } + if ((attrib = tag.getAttribute("morph")) && (show)) { + SWBuf savelemma = tag.getAttribute("savlm"); + if ((strstr(savelemma.c_str(), "3588")) && (lastText.length() < 1)) + show = false; + if (show) { + int count = tag.getAttributePartCount("morph"); + int i = (count > 1) ? 0 : -1; // -1 for whole value cuz it's faster, but does the same thing as 0 + do { + attrib = tag.getAttribute("morph", i); + if (i < 0) i = 0; // to handle our -1 condition + val = strchr(attrib, ':'); + val = (val) ? (val + 1) : attrib; + const char *val2 = val; + if ((*val == 'T') && (strchr("GH", val[1])) && (isdigit(val[2]))) + val2+=2; + buf.appendFormatted(" <small><em>(<a href=\"type=morph class=%s value=%s\">%s</a>)</em></small> ", tag.getAttribute("morph"), val, val2); + } while (++i < count); + } + } + if (attrib = tag.getAttribute("POS")) { + val = strchr(attrib, ':'); + val = (val) ? (val + 1) : attrib; + buf.appendFormatted(" %s", val); + } + + /*if (endTag) + buf += "}";*/ + } + } + + // <note> tag + else if (!strcmp(tag.getName(), "note")) { + if (!tag.isEndTag()) { + if (!tag.isEmpty()) { + SWBuf type = tag.getAttribute("type"); + + if (type != "strongsMarkup") { // leave strong's markup notes out, in the future we'll probably have different option filters to turn different note types on or off + SWBuf footnoteNumber = tag.getAttribute("swordFootnote"); + VerseKey *vkey; + // see if we have a VerseKey * or descendant + try { + vkey = SWDYNAMIC_CAST(VerseKey, u->key); + } + catch ( ... ) { } + if (vkey) { + char ch = ((tag.getAttribute("type") && ((!strcmp(tag.getAttribute("type"), "crossReference")) || (!strcmp(tag.getAttribute("type"), "x-cross-ref")))) ? 'x':'n'); + buf.appendFormatted("<a href=\"noteID=%s.%c.%s\"><small><sup>*%c</sup></small></a> ", vkey->getText(), ch, footnoteNumber.c_str(), ch); + } + } + u->suspendTextPassThru = true; + } + } + if (tag.isEndTag()) { + u->suspendTextPassThru = false; + } + } + + // <p> paragraph tag + else if (!strcmp(tag.getName(), "p")) { + if ((!tag.isEndTag()) && (!tag.isEmpty())) { // non-empty start tag + buf += "<!P><br />"; + } + else if (tag.isEndTag()) { // end tag + buf += "<!/P><br />"; + userData->supressAdjacentWhitespace = true; + } + else { // empty paragraph break marker + buf += "<!P><br />"; + userData->supressAdjacentWhitespace = true; + } + } + + // <reference> tag + else if (!strcmp(tag.getName(), "reference")) { + if ((!tag.isEndTag()) && (!tag.isEmpty())) { + buf += "<a href=\"\">"; + } + else if (tag.isEndTag()) { + buf += "</a>"; + } + } + + // <l> poetry, etc + else if (!strcmp(tag.getName(), "l")) { + if (tag.isEmpty()) { + buf += "<br />"; + } + else if (tag.isEndTag()) { + buf += "<br />"; + } + else if (tag.getAttribute("sID")) { // empty line marker + buf += "<br />"; + } + } + + // <milestone type="line"/> + else if ((!strcmp(tag.getName(), "milestone")) && (tag.getAttribute("type")) && (!strcmp(tag.getAttribute("type"), "line"))) { + buf += "<br />"; + userData->supressAdjacentWhitespace = true; + } + + // <title> + else if (!strcmp(tag.getName(), "title")) { + if ((!tag.isEndTag()) && (!tag.isEmpty())) { + buf += "<b>"; + } + else if (tag.isEndTag()) { + buf += "</b><br />"; + } + } + + // <hi> hi? hi contrast? + else if (!strcmp(tag.getName(), "hi")) { + SWBuf type = tag.getAttribute("type"); + if ((!tag.isEndTag()) && (!tag.isEmpty())) { + if (type == "b" || type == "x-b") { + buf += "<b> "; + u->inBold = true; + } + else { // all other types + buf += "<i> "; + u->inBold = false; + } + } + else if (tag.isEndTag()) { + if(u->inBold) { + buf += "</b>"; + u->inBold = false; + } + else + buf += "</i>"; + } + else { // empty hi marker + // what to do? is this even valid? + } + } + + // <q> quote + else if (!strcmp(tag.getName(), "q")) { + SWBuf type = tag.getAttribute("type"); + SWBuf who = tag.getAttribute("who"); + const char *lev = tag.getAttribute("level"); + int level = (lev) ? atoi(lev) : 1; + + if ((!tag.isEndTag()) && (!tag.isEmpty())) { + /*buf += "{";*/ + + //alternate " and ' + if (u->osisQToTick) + buf += (level % 2) ? '\"' : '\''; + + if (who == "Jesus") { + buf += "<font color=\"red\"> "; + } + } + else if (tag.isEndTag()) { + //alternate " and ' + if (u->osisQToTick) + buf += (level % 2) ? '\"' : '\''; + //buf += "</font>"; + } + else { // empty quote marker + //alternate " and ' + if (u->osisQToTick) + buf += (level % 2) ? '\"' : '\''; + } + } + + // <transChange> + else if (!strcmp(tag.getName(), "transChange")) { + SWBuf type = tag.getAttribute("type"); + + if ((!tag.isEndTag()) && (!tag.isEmpty())) { + +// just do all transChange tags this way for now +// if (type == "supplied") + buf += "<i>"; + } + else if (tag.isEndTag()) { + buf += "</i>"; + } + else { // empty transChange marker? + } + } + + // image + else if (!strcmp(tag.getName(), "figure")) { + const char *src = tag.getAttribute("src"); + if (!src) // assert we have a src attribute + return false; + + char* filepath = new char[strlen(u->module->getConfigEntry("AbsoluteDataPath")) + strlen(token)]; + *filepath = 0; + strcpy(filepath, userData->module->getConfigEntry("AbsoluteDataPath")); + strcat(filepath, src); + +// we do this because BibleCS looks for this EXACT format for an image tag + buf+="<image src=\""; + buf+=filepath; + buf+="\" />"; +/* + char imgc; + for (c = filepath + strlen(filepath); c > filepath && *c != '.'; c--); + c++; + FILE* imgfile; + if (stricmp(c, "jpg") || stricmp(c, "jpeg")) { + imgfile = fopen(filepath, "r"); + if (imgfile != NULL) { + buf += "{\\nonshppict {\\pict\\jpegblip "; + while (feof(imgfile) != EOF) { + buf.appendFormatted("%2x", fgetc(imgfile)); + } + fclose(imgfile); + buf += "}}"; + } + } + else if (stricmp(c, "png")) { + buf += "{\\*\\shppict {\\pict\\pngblip "; + + buf += "}}"; + } +*/ + delete [] filepath; + } + + else { + return false; // we still didn't handle token + } + } + return true; +} + + +SWORD_NAMESPACE_END diff --git a/src/modules/filters/osislemma.cpp b/src/modules/filters/osislemma.cpp new file mode 100644 index 0000000..e31c002 --- /dev/null +++ b/src/modules/filters/osislemma.cpp @@ -0,0 +1,78 @@ +/****************************************************************************** + * + * osislemma - SWFilter descendant to hide or show lemmata + * in a OSIS module. + */ + + +#include <stdlib.h> +#include <osislemma.h> +#include <utilxml.h> +#ifndef __GNUC__ +#else +#include <unixstr.h> +#endif + +SWORD_NAMESPACE_START + +const char oName[] = "Lemmas"; +const char oTip[] = "Toggles Lemmas On and Off if they exist"; + +const SWBuf choices[3] = {"On", "Off", ""}; +const StringList oValues(&choices[0], &choices[2]); + +OSISLemma::OSISLemma() : SWOptionFilter(oName, oTip, &oValues) { + setOptionValue("Off"); +} + + +OSISLemma::~OSISLemma() { +} + + +char OSISLemma::processText(SWBuf &text, const SWKey *key, const SWModule *module) { + if (!option) { // if we don't want lemmas + const char *from; + char token[2048]; // cheese. Fix. + int tokpos = 0; + bool intoken = false; + bool lastspace = false; + SWBuf orig = text; + from = orig.c_str(); + + for (text = ""; *from; from++) { + if (*from == '<') { + intoken = true; + tokpos = 0; + token[0] = 0; + continue; + } + if (*from == '>') { // process tokens + intoken = false; + XMLTag tag(token); + if ((!strcmp(tag.getName(), "w")) && (!tag.isEndTag())) { // Lemma + SWBuf lemma = tag.getAttribute("lemma"); + if (lemma.length()) { + tag.setAttribute("lemma", 0); + tag.setAttribute("savlm", lemma.c_str()); + } + } + // keep tag, possibly with the lemma removed + text += tag; + continue; + } + if (intoken) { + if (tokpos < 2045) + token[tokpos++] = *from; + token[tokpos] = 0; + } + else { + text += *from; + lastspace = (*from == ' '); + } + } + } + return 0; +} + +SWORD_NAMESPACE_END diff --git a/src/modules/filters/osismorph.cpp b/src/modules/filters/osismorph.cpp new file mode 100644 index 0000000..1b2d116 --- /dev/null +++ b/src/modules/filters/osismorph.cpp @@ -0,0 +1,75 @@ +/****************************************************************************** + * + * osismorph - SWFilter descendant to hide or show morph tags + * in a OSIS module. + */ + + +#include <stdlib.h> +#include <osismorph.h> +#include <utilxml.h> +#ifndef __GNUC__ +#else +#include <unixstr.h> +#endif + +SWORD_NAMESPACE_START + +const char oName[] = "Morphological Tags"; +const char oTip[] = "Toggles Morphological Tags On and Off if they exist"; + +const SWBuf choices[3] = {"On", "Off", ""}; +const StringList oValues(&choices[0], &choices[2]); + +OSISMorph::OSISMorph() : SWOptionFilter(oName, oTip, &oValues) { + setOptionValue("Off"); +} + + +OSISMorph::~OSISMorph() { +} + + +char OSISMorph::processText(SWBuf &text, const SWKey *key, const SWModule *module) { + if (!option) { // if we don't want morph tags + const char *from; + char token[2048]; // cheese. Fix. + int tokpos = 0; + bool intoken = false; + bool lastspace = false; + SWBuf orig = text; + from = orig.c_str(); + + for (text = ""; *from; from++) { + if (*from == '<') { + intoken = true; + tokpos = 0; + token[0] = 0; + continue; + } + if (*from == '>') { // process tokens + intoken = false; + XMLTag tag(token); + if ((!strcmp(tag.getName(), "w")) && (!tag.isEndTag())) { // Morph + if (tag.getAttribute("morph")) + tag.setAttribute("morph", 0); + } + // keep tag, possibly with the morph removed + text += tag; + continue; + } + if (intoken) { + if (tokpos < 2045) + token[tokpos++] = *from; + token[tokpos] = 0; + } + else { + text += *from; + lastspace = (*from == ' '); + } + } + } + return 0; +} + +SWORD_NAMESPACE_END diff --git a/src/modules/filters/osisplain.cpp b/src/modules/filters/osisplain.cpp new file mode 100644 index 0000000..7a12a27 --- /dev/null +++ b/src/modules/filters/osisplain.cpp @@ -0,0 +1,151 @@ +/*************************************************************************** + osisplain.cpp - OSIS to Plaintext filter + ------------------- + begin : 2003-02-15 + copyright : 2003 by CrossWire Bible Society + ***************************************************************************/ + +/*************************************************************************** + * * + * This program is free software; you can redistribute it and/or modify * + * it under the terms of the GNU General Public License as published by * + * the Free Software Foundation; either version 2 of the License, or * + * (at your option) any later version. * + * * + ***************************************************************************/ + +#include <stdlib.h> +#include <osisplain.h> +#include <utilxml.h> +#include <ctype.h> + +SWORD_NAMESPACE_START + +OSISPlain::OSISPlain() { + setTokenStart("<"); + setTokenEnd(">"); + + setEscapeStart("&"); + setEscapeEnd(";"); + + setEscapeStringCaseSensitive(true); + + addEscapeStringSubstitute("amp", "&"); + addEscapeStringSubstitute("apos", "'"); + addEscapeStringSubstitute("lt", "<"); + addEscapeStringSubstitute("gt", ">"); + addEscapeStringSubstitute("quot", "\""); + + + addTokenSubstitute("title", "\n"); + addTokenSubstitute("/title", "\n"); + addTokenSubstitute("/l", "\n"); + addTokenSubstitute("lg", "\n"); + addTokenSubstitute("/lg", "\n"); + + setTokenCaseSensitive(true); +} + + +bool OSISPlain::handleToken(SWBuf &buf, const char *token, BasicFilterUserData *userData) { + // manually process if it wasn't a simple substitution + if (!substituteToken(buf, token)) { + MyUserData *u = (MyUserData *)userData; + XMLTag tag(token); + if (((*token == 'w') && (token[1] == ' ')) || + ((*token == '/') && (token[1] == 'w') && (!token[2]))) { + bool start = false; + if (*token == 'w') { + if (token[strlen(token)-1] != '/') { + u->w = token; + return true; + } + start = true; + } + tag = (start) ? token : u->w.c_str(); + bool show = true; // to handle unplaced article in kjv2003-- temporary till combined + + SWBuf lastText = (start) ? "stuff" : u->lastTextNode.c_str(); + + const char *attrib; + const char *val; + if (attrib = tag.getAttribute("xlit")) { + val = strchr(attrib, ':'); + val = (val) ? (val + 1) : attrib; + buf.appendFormatted(" <%s>", val); + } + if (attrib = tag.getAttribute("gloss")) { + val = strchr(attrib, ':'); + val = (val) ? (val + 1) : attrib; + buf.appendFormatted(" <%s>", val); + } + if (attrib = tag.getAttribute("lemma")) { + int count = tag.getAttributePartCount("lemma"); + int i = (count > 1) ? 0 : -1; // -1 for whole value cuz it's faster, but does the same thing as 0 + do { + attrib = tag.getAttribute("lemma", i); + if (i < 0) i = 0; // to handle our -1 condition + val = strchr(attrib, ':'); + val = (val) ? (val + 1) : attrib; + if ((strchr("GH", *val)) && (isdigit(val[1]))) + val++; + if ((!strcmp(val, "3588")) && (lastText.length() < 1)) + show = false; + else buf.appendFormatted(" <%s>}", val); + } while (++i < count); + } + if ((attrib = tag.getAttribute("morph")) && (show)) { + int count = tag.getAttributePartCount("morph"); + int i = (count > 1) ? 0 : -1; // -1 for whole value cuz it's faster, but does the same thing as 0 + do { + attrib = tag.getAttribute("morph", i); + if (i < 0) i = 0; // to handle our -1 condition + val = strchr(attrib, ':'); + val = (val) ? (val + 1) : attrib; + if ((*val == 'T') && (strchr("GH", val[1])) && (isdigit(val[2]))) + val+=2; + buf.appendFormatted(" (%s)", val); + } while (++i < count); + } + if (attrib = tag.getAttribute("POS")) { + val = strchr(attrib, ':'); + val = (val) ? (val + 1) : attrib; + buf.appendFormatted(" <%s>", val); + } + } + + // <note> tag + else if (!strncmp(token, "note", 4)) { + if (!strstr(token, "strongsMarkup")) { // leave strong's markup notes out, in the future we'll probably have different option filters to turn different note types on or off + buf += " ("; + } + else u->suspendTextPassThru = true; + } + else if (!strncmp(token, "/note", 5)) { + if (!u->suspendTextPassThru) + buf += ")"; + else u->suspendTextPassThru = false; + } + + // <p> paragraph tag + else if (((*token == 'p') && ((token[1] == ' ') || (!token[1]))) || + ((*token == '/') && (token[1] == 'p') && (!token[2]))) { + userData->supressAdjacentWhitespace = true; + buf += "\n"; + } + + // <milestone type="line"/> + else if ((!strcmp(tag.getName(), "milestone")) && (tag.getAttribute("type")) && (!strcmp(tag.getAttribute("type"), "line"))) { + userData->supressAdjacentWhitespace = true; + buf += "\n"; + } + + else { + return false; // we still didn't handle token + } + } + return true; +} + + +SWORD_NAMESPACE_END diff --git a/src/modules/filters/osisredletterwords.cpp b/src/modules/filters/osisredletterwords.cpp new file mode 100644 index 0000000..a9a4834 --- /dev/null +++ b/src/modules/filters/osisredletterwords.cpp @@ -0,0 +1,77 @@ +/****************************************************************************** + * + * OSISRedLetterWords - SWFilter descendant to toggle red coloring for words + * of Christ in an OSIS module. + */ + + +#include <stdlib.h> +#include <osisredletterwords.h> +#include <swmodule.h> +#include <utilxml.h> +#ifndef __GNUC__ +#else +#include <unixstr.h> +#endif + +SWORD_NAMESPACE_START + +const char oName[] = "Words of Christ in Red"; +const char oTip[] = "Toggles Red Coloring for Words of Christ On and Off if they are marked"; + +const SWBuf choices[3] = {"On", "Off", ""}; +const StringList oValues(&choices[0], &choices[2]); + +OSISRedLetterWords::OSISRedLetterWords() : SWOptionFilter(oName, oTip, &oValues) { + setOptionValue("On"); +} + + +OSISRedLetterWords::~OSISRedLetterWords() { +} + + +char OSISRedLetterWords::processText(SWBuf &text, const SWKey *key, const SWModule *module) { + SWBuf token; + bool intoken = false; + char buf[254]; + + SWBuf orig = text; + const char *from = orig.c_str(); + + if (!option) + for (text = ""; *from; from++) { + if (*from == '<') { + intoken = true; + token = ""; + continue; + } + if (*from == '>') { // process tokens + intoken = false; + + XMLTag tag(token); + if (!stricmp(tag.getName(), "q")) { + if ((tag.getAttribute("who")) && (!stricmp(tag.getAttribute("who"), "Jesus"))) { + tag.setAttribute("who", 0); + text += tag; // tag toString already has < and > + continue; + } + } + // if we haven't modified, still use original token instead of tag, so we don't reorder attribs and stuff. It doesn't really matter, but this is less intrusive to the original markup. + text += '<'; + text.append(token); + text += '>'; + continue; + } + if (intoken) { //copy token + token += *from; + } + else { //copy text which is not inside a token + text += *from; + } + } + return 0; +} + +SWORD_NAMESPACE_END + diff --git a/src/modules/filters/osisrtf.cpp b/src/modules/filters/osisrtf.cpp new file mode 100644 index 0000000..e94e8ae --- /dev/null +++ b/src/modules/filters/osisrtf.cpp @@ -0,0 +1,333 @@ +/*************************************************************************** + osisrtf.cpp - OSIS to RTF filter + ------------------- + begin : 2003-02-15 + copyright : 2003 by CrossWire Bible Society + ***************************************************************************/ + +/*************************************************************************** + * * + * This program is free software; you can redistribute it and/or modify * + * it under the terms of the GNU General Public License as published by * + * the Free Software Foundation; either version 2 of the License, or * + * (at your option) any later version. * + * * + ***************************************************************************/ + +#include <stdlib.h> +#include <osisrtf.h> +#include <utilxml.h> +#include <versekey.h> +#include <swmodule.h> + +SWORD_NAMESPACE_START + + +OSISRTF::MyUserData::MyUserData(const SWModule *module, const SWKey *key) : BasicFilterUserData(module, key) { + inXRefNote = false; + BiblicalText = false; + if (module) { + version = module->Name(); + BiblicalText = (!strcmp(module->Type(), "Biblical Texts")); + } + osisQToTick = ((!module->getConfigEntry("OSISqToTick")) || (strcmp(module->getConfigEntry("OSISqToTick"), "false"))); +} + + +OSISRTF::OSISRTF() { + setTokenStart("<"); + setTokenEnd(">"); + + setEscapeStart("&"); + setEscapeEnd(";"); + + setEscapeStringCaseSensitive(true); + + addEscapeStringSubstitute("amp", "&"); + addEscapeStringSubstitute("apos", "'"); + addEscapeStringSubstitute("lt", "<"); + addEscapeStringSubstitute("gt", ">"); + addEscapeStringSubstitute("quot", "\""); + addTokenSubstitute("lg", "{\\par}"); + addTokenSubstitute("/lg", "{\\par}"); + + setTokenCaseSensitive(true); +} + + +bool OSISRTF::handleToken(SWBuf &buf, const char *token, BasicFilterUserData *userData) { + // manually process if it wasn't a simple substitution + if (!substituteToken(buf, token)) { + MyUserData *u = (MyUserData *)userData; + XMLTag tag(token); + + // <w> tag + if (!strcmp(tag.getName(), "w")) { + + // start <w> tag + if ((!tag.isEmpty()) && (!tag.isEndTag())) { + buf += "{"; + u->w = token; + } + + // end or empty <w> tag + else { + bool endTag = tag.isEndTag(); + SWBuf lastText; + bool show = true; // to handle unplaced article in kjv2003-- temporary till combined + + if (endTag) { + tag = u->w.c_str(); + lastText = u->lastTextNode.c_str(); + } + else lastText = "stuff"; + + const char *attrib; + const char *val; + if (attrib = tag.getAttribute("xlit")) { + val = strchr(attrib, ':'); + val = (val) ? (val + 1) : attrib; + buf.appendFormatted(" {\\fs15 <%s>}", val); + } + if (attrib = tag.getAttribute("gloss")) { + val = strchr(attrib, ':'); + val = (val) ? (val + 1) : attrib; + buf.appendFormatted(" {\\fs15 <%s>}", val); + } + if (attrib = tag.getAttribute("lemma")) { + int count = tag.getAttributePartCount("lemma"); + int i = (count > 1) ? 0 : -1; // -1 for whole value cuz it's faster, but does the same thing as 0 + do { + attrib = tag.getAttribute("lemma", i); + if (i < 0) i = 0; // to handle our -1 condition + val = strchr(attrib, ':'); + val = (val) ? (val + 1) : attrib; + if ((strchr("GH", *val)) && (isdigit(val[1]))) + val++; + if ((!strcmp(val, "3588")) && (lastText.length() < 1)) + show = false; + else buf.appendFormatted(" {\\cf3 \\sub <%s>}", val); + } while (++i < count); + } + if ((attrib = tag.getAttribute("morph")) && (show)) { + SWBuf savelemma = tag.getAttribute("savlm"); + if ((strstr(savelemma.c_str(), "3588")) && (lastText.length() < 1)) + show = false; + if (show) { + int count = tag.getAttributePartCount("morph"); + int i = (count > 1) ? 0 : -1; // -1 for whole value cuz it's faster, but does the same thing as 0 + do { + attrib = tag.getAttribute("morph", i); + if (i < 0) i = 0; // to handle our -1 condition + val = strchr(attrib, ':'); + val = (val) ? (val + 1) : attrib; + if ((*val == 'T') && (strchr("GH", val[1])) && (isdigit(val[2]))) + val+=2; + buf.appendFormatted(" {\\cf4 \\sub (%s)}", val); + } while (++i < count); + } + } + if (attrib = tag.getAttribute("POS")) { + val = strchr(attrib, ':'); + val = (val) ? (val + 1) : attrib; + buf.appendFormatted(" {\\fs15 <%s>}", val); + } + + if (endTag) + buf += "}"; + } + } + + // <note> tag + else if (!strcmp(tag.getName(), "note")) { + if (!tag.isEndTag()) { + if (!tag.isEmpty()) { + SWBuf type = tag.getAttribute("type"); + + if (type != "strongsMarkup") { // leave strong's markup notes out, in the future we'll probably have different option filters to turn different note types on or off + SWBuf footnoteNumber = tag.getAttribute("swordFootnote"); + VerseKey *vkey; + // see if we have a VerseKey * or descendant + try { + vkey = SWDYNAMIC_CAST(VerseKey, u->key); + } + catch ( ... ) { } + if (vkey) { + char ch = ((!strcmp(type.c_str(), "crossReference")) || (!strcmp(type.c_str(), "x-cross-ref"))) ? 'x':'n'; + buf.appendFormatted("{\\super <a href=\"\">*%c%i.%s</a>} ", ch, vkey->Verse(), footnoteNumber.c_str()); + u->inXRefNote = (ch == 'x'); + } + } + u->suspendTextPassThru = true; + } + } + if (tag.isEndTag()) { + u->suspendTextPassThru = false; + u->inXRefNote = false; + } + } + + // <p> paragraph tag + else if (!strcmp(tag.getName(), "p")) { + if ((!tag.isEndTag()) && (!tag.isEmpty())) { // non-empty start tag + buf += "{\\par "; + } + else if (tag.isEndTag()) { // end tag + buf += "\\par}"; + userData->supressAdjacentWhitespace = true; + } + else { // empty paragraph break marker + buf += "{\\par\\par}"; + userData->supressAdjacentWhitespace = true; + } + } + + // <reference> tag + else if (!strcmp(tag.getName(), "reference")) { + if (!u->inXRefNote) { // only show these if we're not in an xref note + if ((!tag.isEndTag()) && (!tag.isEmpty())) { + buf += "{<a href=\"\">"; + } + else if (tag.isEndTag()) { + buf += "</a>}"; + } + } + } + + // <l> poetry + else if (!strcmp(tag.getName(), "l")) { + if ((!tag.isEndTag()) && (!tag.isEmpty())) { + buf += "{"; + } + else if (tag.isEndTag()) { + buf += "\\par}"; + } + else if (tag.getAttribute("sID")) { // empty line marker + buf += "{\\par}"; + } + } + + // <milestone type="line"/> + else if ((!strcmp(tag.getName(), "milestone")) && (tag.getAttribute("type")) && (!strcmp(tag.getAttribute("type"), "line"))) { + buf += "{\\par}"; + userData->supressAdjacentWhitespace = true; + } + + // <title> + else if (!strcmp(tag.getName(), "title")) { + if ((!tag.isEndTag()) && (!tag.isEmpty())) { + buf += "{\\par\\i1\\b1 "; + } + else if (tag.isEndTag()) { + buf += "\\par}"; + } + } + + // <hi> + else if (!strcmp(tag.getName(), "hi")) { + SWBuf type = tag.getAttribute("type"); + if ((!tag.isEndTag()) && (!tag.isEmpty())) { + if (type == "b" || type == "x-b") + buf += "{\\b1 "; + else // all other types + buf += "{\\i1 "; + } + else if (tag.isEndTag()) { + buf += "}"; + } + } + + // <q> quote + else if (!strcmp(tag.getName(), "q")) { + SWBuf type = tag.getAttribute("type"); + SWBuf who = tag.getAttribute("who"); + const char *lev = tag.getAttribute("level"); + int level = (lev) ? atoi(lev) : 1; + + if ((!tag.isEndTag()) && (!tag.isEmpty())) { + buf += "{"; + + //alternate " and ' + if (u->osisQToTick) + buf += (level % 2) ? '\"' : '\''; + + if (who == "Jesus") + buf += "\\cf6 "; + } + else if (tag.isEndTag()) { + //alternate " and ' + if (u->osisQToTick) + buf += (level % 2) ? '\"' : '\''; + buf += "}"; + } + else { // empty quote marker + //alternate " and ' + if (u->osisQToTick) + buf += (level % 2) ? '\"' : '\''; + } + } + + // <transChange> + else if (!strcmp(tag.getName(), "transChange")) { + SWBuf type = tag.getAttribute("type"); + + if ((!tag.isEndTag()) && (!tag.isEmpty())) { + +// just do all transChange tags this way for now +// if (type == "supplied") + buf += "{\\i1 "; + } + else if (tag.isEndTag()) { + buf += "}"; + } + } + + // image + else if (!strcmp(tag.getName(), "figure")) { + const char *src = tag.getAttribute("src"); + if (!src) // assert we have a src attribute + return false; + + char* filepath = new char[strlen(u->module->getConfigEntry("AbsoluteDataPath")) + strlen(token)]; + *filepath = 0; + strcpy(filepath, userData->module->getConfigEntry("AbsoluteDataPath")); + strcat(filepath, src); + +// we do this because BibleCS looks for this EXACT format for an image tag + buf+="<img src=\""; + buf+=filepath; + buf+="\" />"; +/* + char imgc; + for (c = filepath + strlen(filepath); c > filepath && *c != '.'; c--); + c++; + FILE* imgfile; + if (stricmp(c, "jpg") || stricmp(c, "jpeg")) { + imgfile = fopen(filepath, "r"); + if (imgfile != NULL) { + buf += "{\\nonshppict {\\pict\\jpegblip "; + while (feof(imgfile) != EOF) { + buf.appendFormatted("%2x", fgetc(imgfile)); + } + fclose(imgfile); + buf += "}}"; + } + } + else if (stricmp(c, "png")) { + buf += "{\\*\\shppict {\\pict\\pngblip "; + + buf += "}}"; + } +*/ + delete [] filepath; + } + else { + return false; // we still didn't handle token + } + } + return true; +} + + +SWORD_NAMESPACE_END + diff --git a/src/modules/filters/osisscripref.cpp b/src/modules/filters/osisscripref.cpp new file mode 100644 index 0000000..566e08a --- /dev/null +++ b/src/modules/filters/osisscripref.cpp @@ -0,0 +1,104 @@ +/****************************************************************************** + * + * OSISScripref - SWFilter descendant to hide or show scripture references + * in an OSIS module. + */ + + +#include <stdlib.h> +#include <osisscripref.h> +#include <swmodule.h> +#include <utilxml.h> +#ifndef __GNUC__ +#else +#include <unixstr.h> +#endif + +SWORD_NAMESPACE_START + +const char oName[] = "Cross-references"; +const char oTip[] = "Toggles Scripture Cross-references On and Off if they exist"; + +const SWBuf choices[3] = {"On", "Off", ""}; +const StringList oValues(&choices[0], &choices[2]); + +OSISScripref::OSISScripref() : SWOptionFilter(oName, oTip, &oValues) { + setOptionValue("On"); +} + + +OSISScripref::~OSISScripref() { +} + + +char OSISScripref::processText(SWBuf &text, const SWKey *key, const SWModule *module) { + SWBuf token; + bool intoken = false; + bool hide = false; + SWBuf tagText; + XMLTag startTag; + int tagTextNum = 1; + int footnoteNum = 1; + char buf[254]; + + SWBuf orig = text; + const char *from = orig.c_str(); + + for (text = ""; *from; from++) { + if (*from == '<') { + intoken = true; + token = ""; + continue; + } + if (*from == '>') { // process tokens + intoken = false; + + XMLTag tag(token); + if (!strcmp(tag.getName(), "note")) { + if (!tag.isEndTag() && (!tag.isEmpty())) { + startTag = tag; + if ((tag.getAttribute("type")) && (!strcmp(tag.getAttribute("type"), "crossReference"))) { + hide = true; + tagText = ""; + if (option) { // we want the tag in the text + text += '<'; + text.append(token); + text += '>'; + } + continue; + } + } + if (hide && tag.isEndTag()) { + hide = false; + if (option) { // we want the tag in the text + text.append(tagText); // end tag gets added further down + } + else continue; // don't let the end tag get added to the text + } + } + + // if not a heading token, keep token in text + if (!hide) { + text += '<'; + text.append(token); + text += '>'; + } + else { + tagText += '<'; + tagText.append(token); + tagText += '>'; + } + continue; + } + if (intoken) { //copy token + token += *from; + } + else if (!hide) { //copy text which is not inside a token + text += *from; + } + else tagText += *from; + } + return 0; +} + +SWORD_NAMESPACE_END diff --git a/src/modules/filters/osisstrongs.cpp b/src/modules/filters/osisstrongs.cpp new file mode 100644 index 0000000..7949b50 --- /dev/null +++ b/src/modules/filters/osisstrongs.cpp @@ -0,0 +1,128 @@ +/****************************************************************************** + * + * osisstrongs - SWFilter descendant to hide or show strongs number + * in a OSIS module. + */ + + +#include <stdlib.h> +#include <osisstrongs.h> +#include <swmodule.h> +#ifndef __GNUC__ +#else +#include <unixstr.h> +#endif +#include <ctype.h> + +SWORD_NAMESPACE_START + +const char oName[] = "Strong's Numbers"; +const char oTip[] = "Toggles Strong's Numbers On and Off if they exist"; + +const SWBuf choices[3] = {"On", "Off", ""}; +const StringList oValues(&choices[0], &choices[2]); + + +OSISStrongs::OSISStrongs() : SWOptionFilter(oName, oTip, &oValues) { + setOptionValue("Off"); +} + + +OSISStrongs::~OSISStrongs() { +} + + +char OSISStrongs::processText(SWBuf &text, const SWKey *key, const SWModule *module) { + const char *from; + char token[2048]; // cheese. Fix. + int tokpos = 0; + bool intoken = false; + int len; + bool lastspace = false; + int word = 1; + char val[128]; + char wordstr[5]; + char *valto; + char *ch; + unsigned int textStart = 0, textEnd = 0; + bool newText = false; + SWBuf tmp; + + SWBuf orig = text; + from = orig.c_str(); + + len = strlen(text) + 1; // shift string to right of buffer + + for (text = ""; *from; from++) { + if (*from == '<') { + intoken = true; + tokpos = 0; + token[0] = 0; + token[1] = 0; + token[2] = 0; + textEnd = text.size(); + continue; + } + if (*from == '>') { // process tokens + intoken = false; + if ((*token == 'w') && (token[1] == ' ')) { // Word + if (module->isProcessEntryAttributes()) { + valto = val; + char *num = strstr(token, "lemma=\"x-Strongs:"); + if (num) { + for (num+=17; ((*num) && (*num != '\"')); num++) + *valto++ = *num; + *valto = 0; + if (atoi((!isdigit(*val))?val+1:val) < 5627) { + // normal strongs number + sprintf(wordstr, "%03d", word++); + module->getEntryAttributes()["Word"][wordstr]["Strongs"] = val; + tmp = ""; + tmp.append(text.c_str()+textStart, (int)(textEnd - textStart)); + module->getEntryAttributes()["Word"][wordstr]["Text"] = tmp; + newText = true; + } + else { + // verb morph + sprintf(wordstr, "%03d", word-1); + module->getEntryAttributes()["Word"][wordstr]["Morph"] = val; + } + } + } + if (!option) { + char *num = strstr(token, "lemma=\"x-Strongs:"); + if (num) { + memcpy(num, "savlm", 5); +/* + for (int i = 0; i < 17; i++) + *num++ = ' '; + for (; ((*num) && (*num!='\"')); num++) + *num = ' '; + if (*num) + *num = ' '; +*/ + } + } + } + // if not a strongs token, keep token in text + text += '<'; + for (char *tok = token; *tok; tok++) + text += *tok; + text += '>'; + if (newText) {textStart = text.size(); newText = false; } + continue; + } + if (intoken) { + if (tokpos < 2045) + token[tokpos++] = *from; + token[tokpos+2] = 0; + } + else { + text += *from; + lastspace = (*from == ' '); + } + } + return 0; +} + +SWORD_NAMESPACE_END diff --git a/src/modules/filters/osiswebif.cpp b/src/modules/filters/osiswebif.cpp new file mode 100644 index 0000000..f2db7b7 --- /dev/null +++ b/src/modules/filters/osiswebif.cpp @@ -0,0 +1,212 @@ +/*************************************************************************** + OSISWEBIF.cpp - OSIS to HTML filter with hrefs + for strongs and morph tags + ------------------- + begin : 2003-10-23 + copyright : 2003 by CrossWire Bible Society + ***************************************************************************/ + +/*************************************************************************** + * * + * This program is free software; you can redistribute it and/or modify * + * it under the terms of the GNU General Public License as published by * + * the Free Software Foundation; either version 2 of the License, or * + * (at your option) any later version. * + * * + ***************************************************************************/ + +#include <stdlib.h> +#include <osiswebif.h> +#include <utilxml.h> +#include <versekey.h> +#include <swmodule.h> +#include <ctype.h> + + +SWORD_NAMESPACE_START + + +OSISWEBIF::OSISWEBIF() : baseURL(""), passageStudyURL(baseURL + "passagestudy.jsp") { +} + +bool OSISWEBIF::handleToken(SWBuf &buf, const char *token, BasicFilterUserData *userData) { + // manually process if it wasn't a simple substitution + if (!substituteToken(buf, token)) { + MyUserData *u = (MyUserData *)userData; + XMLTag tag(token); + + // <w> tag + if (!strcmp(tag.getName(), "w")) { + + // start <w> tag + if ((!tag.isEmpty()) && (!tag.isEndTag())) { + u->w = token; + } + + // end or empty <w> tag + else { + bool endTag = tag.isEndTag(); + SWBuf lastText; + bool show = true; // to handle unplaced article in kjv2003-- temporary till combined + + if (endTag) { + tag = u->w.c_str(); + lastText = u->lastTextNode.c_str(); + } + else lastText = "stuff"; + + const char *attrib; + const char *val; + if (attrib = tag.getAttribute("xlit")) { + val = strchr(attrib, ':'); + val = (val) ? (val + 1) : attrib; + buf.appendFormatted(" %s", val); + } + if (attrib = tag.getAttribute("gloss")) { + val = strchr(attrib, ':'); + val = (val) ? (val + 1) : attrib; + buf.appendFormatted(" %s", val); + } + if (attrib = tag.getAttribute("lemma")) { + int count = tag.getAttributePartCount("lemma"); + int i = (count > 1) ? 0 : -1; // -1 for whole value cuz it's faster, but does the same thing as 0 + do { + attrib = tag.getAttribute("lemma", i); + if (i < 0) i = 0; // to handle our -1 condition + val = strchr(attrib, ':'); + val = (val) ? (val + 1) : attrib; + const char *val2 = val; + if ((strchr("GH", *val)) && (isdigit(val[1]))) + val2++; + if ((!strcmp(val2, "3588")) && (lastText.length() < 1)) + show = false; + else buf.appendFormatted(" <small><em><<a href=\"%s?showStrong=%s#cv\">%s</a>></em></small> ", passageStudyURL.c_str(), encodeURL(val2).c_str(), val2); + } while (++i < count); + } + if ((attrib = tag.getAttribute("morph")) && (show)) { + SWBuf savelemma = tag.getAttribute("savlm"); + if ((strstr(savelemma.c_str(), "3588")) && (lastText.length() < 1)) + show = false; + if (show) { + int count = tag.getAttributePartCount("morph"); + int i = (count > 1) ? 0 : -1; // -1 for whole value cuz it's faster, but does the same thing as 0 + do { + attrib = tag.getAttribute("morph", i); + if (i < 0) i = 0; // to handle our -1 condition + val = strchr(attrib, ':'); + val = (val) ? (val + 1) : attrib; + const char *val2 = val; + if ((*val == 'T') && (strchr("GH", val[1])) && (isdigit(val[2]))) + val2+=2; + buf.appendFormatted(" <small><em>(<a href=\"%s?showMorph=%s#cv\">%s</a>)</em></small> ", passageStudyURL.c_str(), encodeURL(val2).c_str(), val2); + } while (++i < count); + } + } + if (attrib = tag.getAttribute("POS")) { + val = strchr(attrib, ':'); + val = (val) ? (val + 1) : attrib; + buf.appendFormatted(" %s", val); + } + + /*if (endTag) + buf += "}";*/ + } + } + + // <note> tag + else if (!strcmp(tag.getName(), "note")) { + if (!tag.isEndTag()) { + if (!tag.isEmpty()) { + SWBuf type = tag.getAttribute("type"); + + if (type != "strongsMarkup") { // leave strong's markup notes out, in the future we'll probably have different option filters to turn different note types on or off + SWBuf footnoteNumber = tag.getAttribute("swordFootnote"); + VerseKey *vkey; + // see if we have a VerseKey * or descendant + try { + vkey = SWDYNAMIC_CAST(VerseKey, u->key); + } + catch ( ... ) { } + if (vkey) { + char ch = ((tag.getAttribute("type") && ((!strcmp(tag.getAttribute("type"), "crossReference")) || (!strcmp(tag.getAttribute("type"), "x-cross-ref")))) ? 'x':'n'); +// buf.appendFormatted("<a href=\"noteID=%s.%c.%s\"><small><sup>*%c</sup></small></a> ", vkey->getText(), ch, footnoteNumber.c_str(), ch); + } + } + u->suspendTextPassThru = true; + } + } + if (tag.isEndTag()) { + u->suspendTextPassThru = false; + } + } + // <title> + else if (!strcmp(tag.getName(), "title")) { + if ((!tag.isEndTag()) && (!tag.isEmpty())) { + buf += "<h3>"; + } + else if (tag.isEndTag()) { + buf += "</h3>"; + } + } + + // <q> quote + else if (!strcmp(tag.getName(), "q")) { + SWBuf type = tag.getAttribute("type"); + SWBuf who = tag.getAttribute("who"); + const char *lev = tag.getAttribute("level"); + int level = (lev) ? atoi(lev) : 1; + + if ((!tag.isEndTag()) && (!tag.isEmpty())) { + /*buf += "{";*/ + + //alternate " and ' + if (u->osisQToTick) + buf += (level % 2) ? '\"' : '\''; + + if (who == "Jesus") { + buf += "<span class=\"wordsOfJesus\"> "; + } + } + else if (tag.isEndTag()) { + //alternate " and ' + if (u->osisQToTick) + buf += (level % 2) ? '\"' : '\''; + buf += "</span>"; + } + else { // empty quote marker + //alternate " and ' + if (u->osisQToTick) + buf += (level % 2) ? '\"' : '\''; + } + } + + // <transChange> + else if (!strcmp(tag.getName(), "transChange")) { + if ((!tag.isEndTag()) && (!tag.isEmpty())) { + SWBuf type = tag.getAttribute("type"); + u->lastTransChange = type; + +// just do all transChange tags this way for now + if ((type == "added") || (type == "supplied")) + buf += "<i>"; + else if (type == "tenseChange") + buf += "*"; + } + else if (tag.isEndTag()) { + SWBuf type = u->lastTransChange; + if ((type == "added") || (type == "supplied")) + buf += "</i>"; + } + else { // empty transChange marker? + } + } + else { + return OSISHTMLHREF::handleToken(buf, token, userData); + } + } + return true; +} + + +SWORD_NAMESPACE_END + diff --git a/src/modules/filters/plainfootnotes.cpp b/src/modules/filters/plainfootnotes.cpp index 96fc4d8..0ae50ef 100644 --- a/src/modules/filters/plainfootnotes.cpp +++ b/src/modules/filters/plainfootnotes.cpp @@ -19,84 +19,65 @@ #include <swkey.h> #include <stdlib.h> -#include <string.h> #ifndef __GNUC__ #else #include <unixstr.h> #endif -const char PLAINFootnotes::on[] = "On"; -const char PLAINFootnotes::off[] = "Off"; -const char PLAINFootnotes::optName[] = "Footnotes"; -const char PLAINFootnotes::optTip[] = "Toggles Footnotes On and Off In Bible Texts If They Exist"; +SWORD_NAMESPACE_START -PLAINFootnotes::PLAINFootnotes(){ - option = false; - options.push_back(on); - options.push_back(off); -} - -PLAINFootnotes::~PLAINFootnotes(){ -} +const char oName[] = "Footnotes"; +const char oTip[] = "Toggles Footnotes On and Off In Bible Texts If They Exist"; +const SWBuf choices[3] = {"On", "Off", ""}; +const StringList oValues(&choices[0], &choices[2]); -void PLAINFootnotes::setOptionValue(const char *ival) -{ - option = (!stricmp(ival, on)); +PLAINFootnotes::PLAINFootnotes() : SWOptionFilter(oName, oTip, &oValues) { + setOptionValue("Off"); } -const char *PLAINFootnotes::getOptionValue() -{ - return (option) ? on:off; +PLAINFootnotes::~PLAINFootnotes(){ } -char PLAINFootnotes::ProcessText(char *text, int maxlen, const SWKey *key, const SWModule *module) -{ - char token[2048]; - int tokpos = 0; - bool intoken = false; - bool lastspace = false; - +char PLAINFootnotes::processText(SWBuf &text, const SWKey *key, const SWModule *module) { if (!option) { // if we don't want footnotes - char *to, *from; - int len; - bool hide = false; + //char token[2048]; + //SWBuf token; + //int tokpos = 0; + //bool intoken = false; + //bool lastspace = false; - len = strlen(text) + 1; // shift string to right of buffer - if (len < maxlen) - { - memmove(&text[maxlen - len], text, len); - from = &text[maxlen - len]; - } - else from = text; // ------------------------------- + bool hide = false; - for (to = text; *from; from++) { - if (*from == '{') // Footnote start + SWBuf orig = text; + const char *from = orig.c_str(); + for (text = ""; *from; from++) { + if (*from == '{') // Footnote start { hide = true; continue; } - if (*from == '}') // Footnote end + else if (*from == '}') // Footnote end { - hide=false; + hide = false; continue; } - if (intoken) { - if (tokpos < 2045) - token[tokpos++] = *from; - token[tokpos+2] = 0; - } - else { + + //if (intoken) { + //if (tokpos < 2045) + // token += *from; + // token[tokpos+2] = 0; + //} + //else { if (!hide) { - *to++ = *from; - lastspace = (*from == ' '); + text = *from; + //lastspace = (*from == ' '); } - } + //} } - *to++ = 0; - *to = 0; } return 0; } +SWORD_NAMESPACE_END diff --git a/src/modules/filters/plainhtml.cpp b/src/modules/filters/plainhtml.cpp index fefb029..19f4bc2 100644 --- a/src/modules/filters/plainhtml.cpp +++ b/src/modules/filters/plainhtml.cpp @@ -15,120 +15,56 @@ * * ***************************************************************************/ -#include <stdlib.h> -#include <string.h> #include <plainhtml.h> +SWORD_NAMESPACE_START PLAINHTML::PLAINHTML() { } -char PLAINHTML::ProcessText(char *text, int maxlen, const SWKey *key, const SWModule *module) +char PLAINHTML::processText(SWBuf &text, const SWKey *key, const SWModule *module) { - char *to, *from; - int len; int count = 0; - len = strlen(text) + 1; // shift string to right of buffer - if (len < maxlen) { - memmove(&text[maxlen - len], text, len); - from = &text[maxlen - len]; - } - else from = text; // ------------------------------- - for (to = text; *from; from++) + SWBuf orig = text; + const char *from = orig.c_str(); + for (text = ""; *from; from++) { - if ((*from == '\n') && (from[1] == '\n')) // paragraph + if ((*from == '\n') && (from[1] == '\n')) // two newlinea are a paragraph { - *to++ = '<'; - *to++ = 'P'; - *to++ = '>'; + text += "<P>"; from++; continue; } else { - if ((*from == '\n')) // && (from[1] != '\n')) // new line + if ((*from == '\n')) // && (from[1] != '\n')) // only one new line { - *to++ = '<'; - *to++ = 'B'; - *to++ = 'R'; - *to++ = '>'; + text += "<BR>"; continue; } } - if (*from == '{') { - *to++ = '<'; - *to++ = 'F'; - *to++ = 'O'; - *to++ = 'N'; - *to++ = 'T'; - *to++ = ' '; - *to++ = 'C'; - *to++ = 'O'; - *to++ = 'L'; - *to++ = 'O'; - *to++ = 'R'; - *to++ = '='; - *to++ = '#'; - *to++ = '8'; - *to++ = '0'; - *to++ = '0'; - *to++ = '0'; - *to++ = '0'; - *to++ = '0'; - *to++ = '>'; - - *to++ = '<'; - *to++ = 'S'; - *to++ = 'M'; - *to++ = 'A'; - *to++ = 'L'; - *to++ = 'L'; - *to++ = '>'; - *to++ = ' '; - *to++ = '('; + if (*from == '{') { //footnote start + text += "<FONT COLOR=\"#80000\"><SMALL> ("; continue; } - - if (*from == '}') + else if (*from == '}') //footnote end { - *to++ = ')'; - *to++ = ' '; - *to++ = '<'; - *to++ = '/'; - *to++ = 'S'; - *to++ = 'M'; - *to++ = 'A'; - *to++ = 'L'; - *to++ = 'L'; - *to++ = '>'; - - *to++ = '<'; - *to++ = '/'; - *to++ = 'F'; - *to++ = 'O'; - *to++ = 'N'; - *to++ = 'T'; - *to++ = '>'; + text += ") </SMALL></FONT>"; continue; } - - if ((*from == ' ') && (count > 5000)) + else if ((*from == ' ') && (count > 5000)) { - *to++ = '<'; - *to++ = 'W'; - *to++ = 'B'; - *to++ = 'R'; - *to++ = '>'; + text += "<WBR>"; count = 0; continue; } - *to++ = *from; + text += *from; count++; } - *to++ = 0; - *to = 0; return 0; } + +SWORD_NAMESPACE_END diff --git a/src/modules/filters/rtfhtml.cpp b/src/modules/filters/rtfhtml.cpp index f0b842b..6b228fb 100644 --- a/src/modules/filters/rtfhtml.cpp +++ b/src/modules/filters/rtfhtml.cpp @@ -16,54 +16,41 @@ ***************************************************************************/ #include <stdlib.h> -#include <string.h> #include <rtfhtml.h> +SWORD_NAMESPACE_START RTFHTML::RTFHTML() { } -char RTFHTML::ProcessText(char *text, int maxlen, const SWKey *key, const SWModule *module) +char RTFHTML::processText(SWBuf &text, const SWKey *key, const SWModule *module) { - char *to, *from; - int len; bool center = false; - len = strlen(text) + 1; // shift string to right of buffer - if (len < maxlen) { - memmove(&text[maxlen - len], text, len); - from = &text[maxlen - len]; - } - else from = text; // ------------------------------- - for (to = text; *from; from++) { + const char *from; + SWBuf orig = text; + from = orig.c_str(); + for (text = ""; *from; from++) + { if (*from == '\\') // a RTF command { - if ((from[1] == 'p') && (from[2] == 'a') && (from[3] == 'r') && (from[4] == 'd')) - { // switch all modifier off + if ( !strncmp(from+1, "pard", 4) ) + //(from[1] == 'p') && (from[2] == 'a') && (from[3] == 'r') && (from[4] == 'd')) + { // switch all modifiers off if (center) { - *to++ = '<'; - *to++ = '/'; - *to++ = 'C'; - *to++ = 'E'; - *to++ = 'N'; - *to++ = 'T'; - *to++ = 'E'; - *to++ = 'R'; - *to++ = '>'; + text += "</CENTER>"; center = false; } from += 4; continue; } - if ((from[1] == 'p') && (from[2] == 'a') && (from[3] == 'r')) + if ( !strncmp(from+1, "par", 3) ) + //(from[1] == 'p') && (from[2] == 'a') && (from[3] == 'r')) { - *to++ = '<'; - *to++ = 'P'; - *to++ = '>'; - *to++ = '\n'; + text += "<P>\n"; from += 3; continue; } @@ -72,18 +59,12 @@ char RTFHTML::ProcessText(char *text, int maxlen, const SWKey *key, const SWModu from += 1; continue; } - if ((from[1] == 'q') && (from[2] == 'c')) // center on + if ( !strncmp(from+1, "qc", 2) ) + //(from[1] == 'q') && (from[2] == 'c')) // center on { if (!center) { - *to++ = '<'; - *to++ = 'C'; - *to++ = 'E'; - *to++ = 'N'; - *to++ = 'T'; - *to++ = 'E'; - *to++ = 'R'; - *to++ = '>'; + text += "<CENTER>"; center = true; } from += 2; @@ -91,9 +72,9 @@ char RTFHTML::ProcessText(char *text, int maxlen, const SWKey *key, const SWModu } } - *to++ = *from; + text += *from; } - *to++ = 0; - *to = 0; return 0; } + +SWORD_NAMESPACE_END diff --git a/src/modules/filters/rwphtml.cpp b/src/modules/filters/rwphtml.cpp deleted file mode 100644 index 6f8ae4f..0000000 --- a/src/modules/filters/rwphtml.cpp +++ /dev/null @@ -1,187 +0,0 @@ -/*************************************************************************** - rwphtml.cpp - description - ------------------- - begin : Thu Jun 24 1999 - copyright : (C) 1999 by Torsten Uhlmann - email : TUhlmann@gmx.de - ***************************************************************************/ - -/*************************************************************************** - * * - * This program is free software; you can redistribute it and/or modify * - * it under the terms of the GNU General Public License as published by * - * the Free Software Foundation; either version 2 of the License, or * - * (at your option) any later version. * - * * - ***************************************************************************/ - -#include <stdlib.h> -#include <string.h> -#include <ctype.h> -#include <rwphtml.h> - -RWPHTML::RWPHTML() -{ -} - - -char RWPHTML::ProcessText(char *text, int maxlen, const SWKey *key, const SWModule *module) -{ - char *to, *from; - signed char greek_str[500]; - bool inverse = false; - bool first_letter = false; - int len; - - len = strlen(text) + 1; // shift string to right of buffer - if (len < maxlen) { - memmove(&text[maxlen - len], text, len); - from = &text[maxlen - len]; - } else - from = text; - for (to = text; *from; from++) { - if (*from == '\\') { - ++from; - int i=0; - first_letter = true; - greek_str[0] = '\0'; - while (*from != '\\') { /* get the greek word or phrase */ - greek_str[i++] = *from; - greek_str[i + 1] = '\0'; - from++; - } /* convert to symbol font as best we can */ - strcpy(to,"<I> </I><FONT FACE=\"symbol\">"); - to += strlen(to); - for (int j = 0; j < i; j++) { - if ((first_letter) - && (greek_str[j] == 'h')) { - if (greek_str[j + 1] == 'o') { - *to++ = 'o'; - first_letter = false; - ++j; - continue; - } else if (greek_str[j + 1] == 'a') { - *to++ = 'a'; - first_letter = false; - ++j; - continue; - } else if (greek_str[j + 1] == 'w') { - *to++ = 'w'; - first_letter = false; - ++j; - continue; - } else if (greek_str[j + 1] == 'u') { - *to++ = 'u'; - first_letter = false; - ++j; - continue; - } else if (greek_str[j + 1] == - -109) { - *to++ = 'w'; - first_letter = false; - ++j; - continue; - } else if (greek_str[j + 1] == - -120) { - *to++ = 'h'; - first_letter = false; - ++j; - continue; - } else if (greek_str[j + 1] == 'i') { - *to++ = 'i'; - first_letter = false; - ++j; - continue; - }else if (greek_str[j + 1] == 'e') { - *to++ = 'e'; - first_letter = false; - ++j; - continue; - } - first_letter = false; - } - if ((greek_str[j] == 't') - && (greek_str[j + 1] == 'h')) { - *to++ = 'q'; - ++j; - continue; - } - if ((greek_str[j] == 'c') - && (greek_str[j + 1] == 'h')) { - *to++ = 'c'; - ++j; - continue; - } - if ((greek_str[j] == 'p') - && (greek_str[j + 1] == 'h')) { - ++j; - *to++ = 'f'; - continue; - } - if (greek_str[j] == -120) { - *to++ = 'h'; - continue; - } - if (greek_str[j] == -125) { - *to++ = 'a'; - continue; - } - if (greek_str[j] == -109) { - if(greek_str[j+1] == 'i') ++j; - *to++ = 'w'; - continue; - } - if (greek_str[j] == ' ') - first_letter = true; - if (greek_str[j] == 's') { - if(isalpha(greek_str[j + 1])) *to++ = 's'; - else if(!isprint(greek_str[j] )) *to++ = 's'; - else *to++ = 'V'; - continue; - } - if (greek_str[j] == '\'') { - continue; - } - *to++ = greek_str[j]; - } - strcpy(to,"</FONT><I> </I>"); - to += strlen(to); - continue; - } - if (*from == '#') { // verse markings (e.g. "#Mark 1:1|") - inverse = true; - strcpy(to,"<FONT COLOR=#0000FF>"); - to += strlen(to); - continue; - } - if ((*from == '|') && (inverse)) { - inverse = false; - strcpy(to,"</FONT>"); - to += strlen(to); - continue; - } - if (*from == '{') { - strcpy(to,"<BR><STRONG>"); - to += strlen(to); - if ((from - &text[maxlen - len]) > 10) { // not the beginning of the entry - strcpy(to,"<P>"); - to += strlen(to); - } - continue; - } - if (*from == '}') { - strcpy(to," </STRONG>"); - to += strlen(to); - continue; - } - if ((*from == '\n') && (from[1] == '\n')) { - strcpy(to,"<P>"); - to += strlen(to); - continue; - } - *to++ = *from; - } - *to++ = 0; - *to = 0; - return 0; -} diff --git a/src/modules/filters/rwprtf.cpp b/src/modules/filters/rwprtf.cpp deleted file mode 100644 index 8f7b074..0000000 --- a/src/modules/filters/rwprtf.cpp +++ /dev/null @@ -1,107 +0,0 @@ -/****************************************************************************** - * - * rwprtf - SWFilter decendant to convert all GBF tags to RTF tags - */ - - -#include <stdlib.h> -#include <string.h> -#include <rwprtf.h> - - -RWPRTF::RWPRTF() { - -} - - -char RWPRTF::ProcessText(char *text, int maxlen, const SWKey *key, const SWModule *module) -{ - char *to, *from; - bool ingreek = false; - bool inverse = false; - int len; - - len = strlen(text) + 1; // shift string to right of buffer - if (len < maxlen) { - memmove(&text[maxlen - len], text, len); - from = &text[maxlen - len]; - } - else from = text; // ------------------------------- - for (to = text; *from; from++) { - if (*from == '\\') { - if(!ingreek) { - ingreek = true; - *to++ = '['; - *to++ = '{'; - *to++ = '\\'; - *to++ = 'f'; - *to++ = '8'; - *to++ = ' '; - continue; - } - else { - ingreek = false; - *to++ = '}'; - *to++ = ']'; - continue; - } - } - - if ((ingreek) && ((*from == 'h') || (*from == 'H'))) - continue; // 'h's are mostly useless in RWP translitterations. The greek is more correct without them. - - if (*from == '#') { // verse markings (e.g. "#Mark 1:1|") - inverse = true; - *to++ = '{'; - *to++ = '\\'; - *to++ = 'c'; - *to++ = 'f'; - *to++ = '2'; - *to++ = ' '; - *to++ = '#'; - continue; - } - if ((*from == '|') && (inverse)) { - inverse = false; - *to++ = '|'; - *to++ = '}'; - continue; - } - - if (*from == '{') { - *to++ = '{'; - *to++ = '\\'; - *to++ = 'b'; - *to++ = ' '; - if ((from - &text[maxlen - len]) > 10) { // not the beginning of the entry - *to++ = '\\'; - *to++ = 'p'; - *to++ = 'a'; - *to++ = 'r'; - *to++ = ' '; - } - continue; - } - - if (*from == '}') { - // this is kinda neat... DO NOTHING - } - if ((*from == '\n') && (from[1] == '\n')) { - *to++ = '\\'; - *to++ = 'p'; - *to++ = 'a'; - *to++ = 'r'; - *to++ = '\\'; - *to++ = 'p'; - *to++ = 'a'; - *to++ = 'r'; - *to++ = ' '; - continue; - } - - *to++ = *from; - } - *to++ = 0; - *to = 0; - return 0; -} diff --git a/src/modules/filters/scsuutf8.cpp b/src/modules/filters/scsuutf8.cpp index d0d5ceb..aff265d 100644 --- a/src/modules/filters/scsuutf8.cpp +++ b/src/modules/filters/scsuutf8.cpp @@ -1,6 +1,6 @@ /****************************************************************************** * - * SCSUUTF8 - SWFilter decendant to convert a SCSU character to UTF-8 + * SCSUUTF8 - SWFilter descendant to convert a SCSU character to UTF-8 * */ @@ -22,6 +22,8 @@ #include <scsuutf8.h> +SWORD_NAMESPACE_START + SCSUUTF8::SCSUUTF8() { } @@ -59,11 +61,13 @@ unsigned char* SCSUUTF8::UTF8Output(unsigned long uchar, unsigned char* text) return text; } -char SCSUUTF8::ProcessText(char *text, int len, const SWKey *key, const SWModule *module) -{ +char SCSUUTF8::processText(SWBuf &text, const SWKey *key, const SWModule *module) { +/* unsigned char *to, *from; unsigned long buflen = len * FILTERPAD; char active = 0, mode = 0; + if ((unsigned long)key < 2) // hack, we're en(1)/de(0)ciphering + return -1; static unsigned short start[8] = {0x0000,0x0080,0x0100,0x0300,0x2000,0x2080,0x2100,0x3000}; static unsigned short slide[8] = {0x0080,0x00C0,0x0400,0x0600,0x0900,0x3040,0x30A0,0xFF00}; @@ -103,7 +107,7 @@ char SCSUUTF8::ProcessText(char *text, int len, const SWKey *key, const SWModule }; if (!len) - return 0; + return 0; memmove(&text[buflen - len], text, len); from = (unsigned char*)&text[buflen - len]; @@ -114,95 +118,95 @@ char SCSUUTF8::ProcessText(char *text, int len, const SWKey *key, const SWModule for (int i = 0; i < len;) { - if (i >= len) break; - c = from[i++]; + if (i >= len) break; + c = from[i++]; - if (c >= 0x80) + if (c >= 0x80) { to = UTF8Output (c - 0x80 + slide[active], to); } - else if (c >= 0x20 && c <= 0x7F) + else if (c >= 0x20 && c <= 0x7F) { to = UTF8Output (c, to); } - else if (c == 0x0 || c == 0x9 || c == 0xA || c == 0xC || c == 0xD) + else if (c == 0x0 || c == 0x9 || c == 0xA || c == 0xC || c == 0xD) { to = UTF8Output (c, to); } - else if (c >= 0x1 && c <= 0x8) /* SQn */ + else if (c >= 0x1 && c <= 0x8) // SQn { - if (i >= len) break; - /* single quote */ d = from[i++]; + if (i >= len) break; + d = from[i++]; // single quote to = UTF8Output (d < 0x80 ? d + start [c - 0x1] : d - 0x80 + slide [c - 0x1], to); } - else if (c >= 0x10 && c <= 0x17) /* SCn */ + else if (c >= 0x10 && c <= 0x17) // SCn { - /* change window */ active = c - 0x10; + active = c - 0x10; // change window } - else if (c >= 0x18 && c <= 0x1F) /* SDn */ + else if (c >= 0x18 && c <= 0x1F) // SDn { - /* define window */ active = c - 0x18; - if (i >= len) break; + active = c - 0x18; // define window + if (i >= len) break; slide [active] = win [from[i++]]; } - else if (c == 0xB) /* SDX */ + else if (c == 0xB) // SDX { - if (i >= len) break; - c = from[i++]; + if (i >= len) break; + c = from[i++]; - if (i >= len) break; - d = from[i++]; + if (i >= len) break; + d = from[i++]; slide [active = c>>5] = 0x10000 + (((c & 0x1F) << 8 | d) << 7); } - else if (c == 0xE) /* SQU */ + else if (c == 0xE) // SQU { - if (i >= len) break; - /* SQU */ c = from[i++]; + if (i >= len) break; + c = from[i++]; // SQU - if (i >= len) break; - to = UTF8Output (c << 8 | from[i++], to); - } - else if (c == 0xF) /* SCU */ + if (i >= len) break; + to = UTF8Output (c << 8 | from[i++], to); + } + else if (c == 0xF) // SCU { - /* change to Unicode mode */ mode = 1; + mode = 1; // change to Unicode mode while (mode) { - if (i >= len) break; - c = from[i++]; + if (i >= len) break; + c = from[i++]; - if (c <= 0xDF || c >= 0xF3) + if (c <= 0xDF || c >= 0xF3) { - if (i >= len) break; + if (i >= len) break; to = UTF8Output (c << 8 | from[i++], to); } - else if (c == 0xF0) /* UQU */ + else if (c == 0xF0) // UQU { - if (i >= len) break; + if (i >= len) break; c = from[i++]; - if (i >= len) break; - to = UTF8Output (c << 8 | from[i++], to); + if (i >= len) break; + to = UTF8Output (c << 8 | from[i++], to); } - else if (c >= 0xE0 && c <= 0xE7) /* UCn */ + else if (c >= 0xE0 && c <= 0xE7) // UCn { active = c - 0xE0; mode = 0; } - else if (c >= 0xE8 && c <= 0xEF) /* UDn */ + else if (c >= 0xE8 && c <= 0xEF) // UDn { - if (i >= len) break; + if (i >= len) break; slide [active=c-0xE8] = win [from[i++]]; mode = 0; } - else if (c == 0xF1) /* UDX */ + else if (c == 0xF1) // UDX { - if (i >= len) break; + if (i >= len) break; c = from[i++]; - if (i >= len) break; - d = from[i++]; + if (i >= len) break; + d = from[i++]; slide [active = c>>5] = 0x10000 + (((c & 0x1F) << 8 | d) << 7); mode = 0; @@ -215,6 +219,8 @@ char SCSUUTF8::ProcessText(char *text, int len, const SWKey *key, const SWModule *to++ = 0; *to = 0; +*/ return 0; } +SWORD_NAMESPACE_END diff --git a/src/modules/filters/swbasicfilter.cpp b/src/modules/filters/swbasicfilter.cpp index 2865085..55c8843 100644 --- a/src/modules/filters/swbasicfilter.cpp +++ b/src/modules/filters/swbasicfilter.cpp @@ -4,7 +4,7 @@ * many filters will need and can use as a starting * point. * - * $Id: swbasicfilter.cpp,v 1.17 2002/03/11 19:01:28 scribe Exp $ + * $Id: swbasicfilter.cpp,v 1.33 2003/10/24 02:43:46 scribe Exp $ * * Copyright 2001 CrossWire Bible Society (http://www.crosswire.org) * CrossWire Bible Society @@ -23,16 +23,23 @@ */ #include <stdlib.h> -#include <string.h> #include <swbasicfilter.h> #include <stdio.h> #include <stdarg.h> +SWORD_NAMESPACE_START + +const char SWBasicFilter::INITIALIZE = 1; +const char SWBasicFilter::PRECHAR = 2; +const char SWBasicFilter::POSTCHAR = 4; +const char SWBasicFilter::FINALIZE = 8; + SWBasicFilter::SWBasicFilter() { - tokenStart = 0; - tokenEnd = 0; - escStart = 0; - escEnd = 0; + processStages = 0; + tokenStart = 0; + tokenEnd = 0; + escStart = 0; + escEnd = 0; setTokenStart("<"); setTokenEnd(">"); @@ -40,9 +47,9 @@ SWBasicFilter::SWBasicFilter() { setEscapeEnd(";"); escStringCaseSensitive = false; - tokenCaseSensitive = false; - passThruUnknownToken = false; - passThruUnknownEsc = false; + tokenCaseSensitive = false; + passThruUnknownToken = false; + passThruUnknownEsc = false; } @@ -87,10 +94,18 @@ void SWBasicFilter::addTokenSubstitute(const char *findString, const char *repla if (!tokenCaseSensitive) { stdstr(&buf, findString); toupperstr(buf); - tokenSubMap.insert(DualStringMap::value_type(buf, replaceString)); + tokenSubMap[buf] = replaceString; delete [] buf; } - else tokenSubMap.insert(DualStringMap::value_type(findString, replaceString)); + else tokenSubMap[findString] = replaceString; +} + + +void SWBasicFilter::replaceTokenSubstitute(const char *findString, const char *replaceString) { + if (tokenSubMap.find(findString) != tokenSubMap.end()) { + tokenSubMap.erase( tokenSubMap.find(findString) ); //erase entry + } + addTokenSubstitute(findString, replaceString); } @@ -106,19 +121,15 @@ void SWBasicFilter::addEscapeStringSubstitute(const char *findString, const char else escSubMap.insert(DualStringMap::value_type(findString, replaceString)); } - -void SWBasicFilter::pushString(char **buf, const char *format, ...) { - va_list argptr; - - va_start(argptr, format); - *buf += vsprintf(*buf, format, argptr); - va_end(argptr); - -// *buf += strlen(*buf); +void SWBasicFilter::replaceEscapeStringSubstitute(const char *findString, const char *replaceString) { + if (escSubMap.find(findString) != escSubMap.end()) { + escSubMap.erase( escSubMap.find(findString) ); //erase entry + } + addEscapeStringSubstitute(findString, replaceString); } -bool SWBasicFilter::substituteToken(char **buf, const char *token) { +bool SWBasicFilter::substituteToken(SWBuf &buf, const char *token) { DualStringMap::iterator it; if (!tokenCaseSensitive) { @@ -131,14 +142,14 @@ bool SWBasicFilter::substituteToken(char **buf, const char *token) { it = tokenSubMap.find(token); if (it != tokenSubMap.end()) { - pushString(buf, it->second.c_str()); + buf += it->second.c_str(); return true; } return false; } -bool SWBasicFilter::substituteEscapeString(char **buf, const char *escString) { +bool SWBasicFilter::substituteEscapeString(SWBuf &buf, const char *escString) { DualStringMap::iterator it; if (!escStringCaseSensitive) { @@ -151,72 +162,76 @@ bool SWBasicFilter::substituteEscapeString(char **buf, const char *escString) { it = escSubMap.find(escString); if (it != escSubMap.end()) { - pushString(buf, it->second.c_str()); + buf += it->second.c_str(); return true; } return false; } -bool SWBasicFilter::handleToken(char **buf, const char *token, DualStringMap &userData) { +bool SWBasicFilter::handleToken(SWBuf &buf, const char *token, BasicFilterUserData *userData) { return substituteToken(buf, token); } -bool SWBasicFilter::handleEscapeString(char **buf, const char *escString, DualStringMap &userData) { +bool SWBasicFilter::handleEscapeString(SWBuf &buf, const char *escString, BasicFilterUserData *userData) { return substituteEscapeString(buf, escString); } void SWBasicFilter::setEscapeStart(const char *escStart) { stdstr(&(this->escStart), escStart); + escStartLen = strlen(escStart); } void SWBasicFilter::setEscapeEnd(const char *escEnd) { stdstr(&(this->escEnd), escEnd); + escEndLen = strlen(escEnd); } void SWBasicFilter::setTokenStart(const char *tokenStart) { stdstr(&(this->tokenStart), tokenStart); + tokenStartLen = strlen(tokenStart); } void SWBasicFilter::setTokenEnd(const char *tokenEnd) { stdstr(&(this->tokenEnd), tokenEnd); + tokenEndLen = strlen(tokenEnd); } -char SWBasicFilter::ProcessText(char *text, int maxlen, const SWKey *key, const SWModule *module) { - this->key = key; - this->module = module; - char *to, *from, token[4096]; +char SWBasicFilter::processText(SWBuf &text, const SWKey *key, const SWModule *module) { + char *from; + char token[4096]; int tokpos = 0; - bool intoken = false; - int len; + bool intoken = false; bool inEsc = false; - char escStartLen = strlen(escStart); - char escEndLen = strlen(escEnd); char escStartPos = 0, escEndPos = 0; - char tokenStartLen = strlen(tokenStart); - char tokenEndLen = strlen(tokenEnd); char tokenStartPos = 0, tokenEndPos = 0; - DualStringMap userData; - string lastTextNode; + SWBuf lastTextNode; + BasicFilterUserData *userData = createUserData(module, key); - bool suspendTextPassThru = false; - userData["suspendTextPassThru"] = "false"; + SWBuf orig = text; + from = orig.getRawData(); + text = ""; - len = strlen(text) + 1; // shift string to right of buffer - if (len < maxlen) { - memmove(&text[maxlen - len], text, len); - from = &text[maxlen - len]; + if (processStages & INITIALIZE) { + if (processStage(INITIALIZE, text, from, userData)) { // processStage handled it all + delete userData; + return 0; + } } - else from = text; // ------------------------------- + for (;*from; from++) { + + if (processStages & PRECHAR) { + if (processStage(PRECHAR, text, from, userData)) // processStage handled this char + continue; + } - for (to = text; *from; from++) { if (*from == tokenStart[tokenStartPos]) { if (tokenStartPos == (tokenStartLen - 1)) { intoken = true; @@ -247,15 +262,14 @@ char SWBasicFilter::ProcessText(char *text, int maxlen, const SWKey *key, const if (*from == escEnd[escEndPos]) { if (escEndPos == (escEndLen - 1)) { intoken = false; - userData["lastTextNode"] = lastTextNode; - if ((!handleEscapeString(&to, token, userData)) && (passThruUnknownEsc)) { - pushString(&to, escStart); - pushString(&to, token); - pushString(&to, escEnd); + userData->lastTextNode = lastTextNode; + if ((!handleEscapeString(text, token, userData)) && (passThruUnknownEsc)) { + text += escStart; + text += token; + text += escEnd; } escEndPos = escStartPos = tokenEndPos = tokenStartPos = 0; lastTextNode = ""; - suspendTextPassThru = (!userData["suspendTextPassThru"].compare("true")); continue; } } @@ -265,15 +279,14 @@ char SWBasicFilter::ProcessText(char *text, int maxlen, const SWKey *key, const if (*from == tokenEnd[tokenEndPos]) { if (tokenEndPos == (tokenEndLen - 1)) { intoken = false; - userData["lastTextNode"] = lastTextNode; - if ((!handleToken(&to, token, userData)) && (passThruUnknownToken)) { - pushString(&to, tokenStart); - pushString(&to, token); - pushString(&to, tokenEnd); + userData->lastTextNode = lastTextNode; + if ((!handleToken(text, token, userData)) && (passThruUnknownToken)) { + text += tokenStart; + text += token; + text += tokenEnd; } escEndPos = escStartPos = tokenEndPos = tokenStartPos = 0; lastTextNode = ""; - suspendTextPassThru = (!userData["suspendTextPassThru"].compare("true")); continue; } } @@ -285,15 +298,24 @@ char SWBasicFilter::ProcessText(char *text, int maxlen, const SWKey *key, const token[tokpos+2] = 0; } else { - if (!suspendTextPassThru) - *to++ = *from; - lastTextNode += *from; + if ((!userData->supressAdjacentWhitespace) || (*from != ' ')) { + if (!userData->suspendTextPassThru) + text += *from; + lastTextNode += *from; + } + userData->supressAdjacentWhitespace = false; } + + if (processStages & POSTCHAR) + processStage(POSTCHAR, text, from, userData); + } - *to++ = 0; - *to = 0; - return 0; -} + if (processStages & FINALIZE) + processStage(FINALIZE, text, from, userData); + delete userData; + return 0; +} +SWORD_NAMESPACE_END diff --git a/src/modules/filters/swoptfilter.cpp b/src/modules/filters/swoptfilter.cpp new file mode 100644 index 0000000..4eb3c82 --- /dev/null +++ b/src/modules/filters/swoptfilter.cpp @@ -0,0 +1,38 @@ +/****************************************************************************** + * + * swoptfilter - SWFilter descendant and base class for all option filters + */ + + +#include <swoptfilter.h> + +SWORD_NAMESPACE_START + + +SWOptionFilter::SWOptionFilter(const char *oName, const char *oTip, const StringList *oValues) { + optName = oName; + optTip = oTip; + optValues = oValues; +} + + +SWOptionFilter::~SWOptionFilter() { +} + + +void SWOptionFilter::setOptionValue(const char *ival) { + for (StringList::const_iterator loop = optValues->begin(); loop != optValues->end(); loop++) { + if (!stricmp(loop->c_str(), ival)) { + optionValue = *loop; + option = (!stricmp(ival, "On")); // convenience for boolean filters + break; + } + } +} + +const char *SWOptionFilter::getOptionValue() { + return optionValue; +} + + +SWORD_NAMESPACE_END diff --git a/src/modules/filters/thmlfootnotes.cpp b/src/modules/filters/thmlfootnotes.cpp index d9b1f0e..8b52d98 100644 --- a/src/modules/filters/thmlfootnotes.cpp +++ b/src/modules/filters/thmlfootnotes.cpp @@ -1,103 +1,127 @@ /****************************************************************************** * - * thmlfootnotes - SWFilter decendant to hide or show footnotes + * thmlfootnotes - SWFilter descendant to hide or show footnotes * in a ThML module. */ #include <stdlib.h> -#include <string.h> #include <thmlfootnotes.h> +#include <swmodule.h> +#include <swbuf.h> +#include <versekey.h> +#include <utilxml.h> #ifndef __GNUC__ #else #include <unixstr.h> #endif +SWORD_NAMESPACE_START -const char ThMLFootnotes::on[] = "On"; -const char ThMLFootnotes::off[] = "Off"; -const char ThMLFootnotes::optName[] = "Footnotes"; -const char ThMLFootnotes::optTip[] = "Toggles Footnotes On and Off if they exist"; +const char oName[] = "Footnotes"; +const char oTip[] = "Toggles Footnotes On and Off if they exist"; +const SWBuf choices[3] = {"On", "Off", ""}; +const StringList oValues(&choices[0], &choices[2]); -ThMLFootnotes::ThMLFootnotes() { - option = false; - options.push_back(on); - options.push_back(off); +ThMLFootnotes::ThMLFootnotes() : SWOptionFilter(oName, oTip, &oValues) { + setOptionValue("Off"); } ThMLFootnotes::~ThMLFootnotes() { } -void ThMLFootnotes::setOptionValue(const char *ival) -{ - option = (!stricmp(ival, on)); -} -const char *ThMLFootnotes::getOptionValue() -{ - return (option) ? on:off; -} +char ThMLFootnotes::processText(SWBuf &text, const SWKey *key, const SWModule *module) { + SWBuf token; + bool intoken = false; + bool hide = false; + SWBuf tagText; + XMLTag startTag; + SWBuf refs = ""; + int footnoteNum = 1; + char buf[254]; + VerseKey parser = key->getText(); -char ThMLFootnotes::ProcessText(char *text, int maxlen, const SWKey *key, const SWModule *module) -{ - if (!option) { // if we don't want footnotes - char *to, *from, token[2048]; // cheese. Fix. - int tokpos = 0; - bool intoken = false; - int len; - bool hide = false; + SWBuf orig = text; + const char *from = orig.c_str(); - len = strlen(text) + 1; // shift string to right of buffer - if (len < maxlen) { - memmove(&text[maxlen - len], text, len); - from = &text[maxlen - len]; + for (text = ""; *from; from++) { + if (*from == '<') { + intoken = true; + token = ""; + continue; } - else from = text; // ------------------------------- + if (*from == '>') { // process tokens + intoken = false; - for (to = text; *from; from++) { - if (*from == '<') { - intoken = true; - tokpos = 0; - token[0] = 0; - token[1] = 0; - token[2] = 0; - continue; - } - if (*from == '>') { // process tokens - intoken = false; - if (!strncmp(token, "note", 4)) { - hide = true; - continue; + XMLTag tag(token); + if (!strcmp(tag.getName(), "note")) { + if (!tag.isEndTag()) { + if (!tag.isEmpty()) { + refs = ""; + startTag = tag; + hide = true; + tagText = ""; + continue; + } } - else if (!strncmp(token, "/note", 5)) { - hide = false; - continue; + if (hide && tag.isEndTag()) { + if (module->isProcessEntryAttributes()) { + SWBuf fc = module->getEntryAttributes()["Footnote"]["count"]["value"]; + footnoteNum = (fc.length()) ? atoi(fc.c_str()) : 0; + sprintf(buf, "%i", ++footnoteNum); + module->getEntryAttributes()["Footnote"]["count"]["value"] = buf; + StringList attributes = startTag.getAttributeNames(); + for (StringList::iterator it = attributes.begin(); it != attributes.end(); it++) { + module->getEntryAttributes()["Footnote"][buf][it->c_str()] = startTag.getAttribute(it->c_str()); + } + module->getEntryAttributes()["Footnote"][buf]["body"] = tagText; + startTag.setAttribute("swordFootnote", buf); + if ((startTag.getAttribute("type")) && (!strcmp(startTag.getAttribute("type"), "crossReference"))) { + if (!refs.length()) + refs = parser.ParseVerseList(tagText.c_str(), parser, true).getRangeText(); + module->getEntryAttributes()["Footnote"][buf]["refList"] = refs.c_str(); + } + } + hide = false; + if ((option) || ((startTag.getAttribute("type") && (!strcmp(startTag.getAttribute("type"), "crossReference"))))) { // we want the tag in the text; crossReferences are handled by another filter + text += startTag; + text.append(tagText); + } + else continue; } + } - // if not a footnote token, keep token in text - if (!hide) { - *to++ = '<'; - for (char *tok = token; *tok; tok++) - *to++ = *tok; - *to++ = '>'; - } - continue; + // if not a note token, keep token in text + if ((!strcmp(tag.getName(), "scripRef")) && (!tag.isEndTag())) { + SWBuf osisRef = tag.getAttribute("passage"); + if (refs.length()) + refs += "; "; + refs += osisRef; } - if (intoken) { - if (tokpos < 2045) - token[tokpos++] = *from; - token[tokpos+2] = 0; + if (!hide) { + text += '<'; + text.append(token); + text += '>'; } - else { - if (!hide) { - *to++ = *from; - } + else { + tagText += '<'; + tagText.append(token); + tagText += '>'; } + continue; + } + if (intoken) { //copy token + token += *from; } - *to++ = 0; - *to = 0; + else if (!hide) { //copy text which is not inside a token + text += *from; + } + else tagText += *from; } return 0; } + +SWORD_NAMESPACE_END diff --git a/src/modules/filters/thmlgbf.cpp b/src/modules/filters/thmlgbf.cpp index 66d9a20..a65ddaf 100644 --- a/src/modules/filters/thmlgbf.cpp +++ b/src/modules/filters/thmlgbf.cpp @@ -15,18 +15,18 @@ ***************************************************************************/ #include <stdlib.h> -#include <string.h> #include <thmlgbf.h> +SWORD_NAMESPACE_START ThMLGBF::ThMLGBF() { } -char ThMLGBF::ProcessText(char *text, int maxlen) -{ - char *to, *from, token[2048]; +char ThMLGBF::processText(SWBuf &text, const SWKey *key, const SWModule *module) { + const char *from; + char token[2048]; int tokpos = 0; bool intoken = false; int len; @@ -34,13 +34,10 @@ char ThMLGBF::ProcessText(char *text, int maxlen) bool sechead = false; bool title = false; - len = strlen(text) + 1; // shift string to right of buffer - if (len < maxlen) { - memmove(&text[maxlen - len], text, len); - from = &text[maxlen - len]; - } - else from = text; // ------------------------------- - for (to = text; *from; from++) { + SWBuf orig = text; + from = orig.c_str(); + + for (text = ""; *from; from++) { if (*from == '<') { intoken = true; tokpos = 0; @@ -60,102 +57,102 @@ char ThMLGBF::ProcessText(char *text, int maxlen) if (*from == ';' && ampersand) { intoken = false; - if (!strncmp("nbsp", token, 4)) *to++ = ' '; - else if (!strncmp("quot", token, 4)) *to++ = '"'; - else if (!strncmp("amp", token, 3)) *to++ = '&'; - else if (!strncmp("lt", token, 2)) *to++ = '<'; - else if (!strncmp("gt", token, 2)) *to++ = '>'; - else if (!strncmp("brvbar", token, 6)) *to++ = '|'; - else if (!strncmp("sect", token, 4)) *to++ = '§'; - else if (!strncmp("copy", token, 4)) *to++ = '©'; - else if (!strncmp("laquo", token, 5)) *to++ = '«'; - else if (!strncmp("reg", token, 3)) *to++ = '®'; - else if (!strncmp("acute", token, 5)) *to++ = '´'; - else if (!strncmp("para", token, 4)) *to++ = '¶'; - else if (!strncmp("raquo", token, 5)) *to++ = '»'; + if (!strncmp("nbsp", token, 4)) text += ' '; + else if (!strncmp("quot", token, 4)) text += '"'; + else if (!strncmp("amp", token, 3)) text += '&'; + else if (!strncmp("lt", token, 2)) text += '<'; + else if (!strncmp("gt", token, 2)) text += '>'; + else if (!strncmp("brvbar", token, 6)) text += '|'; + else if (!strncmp("sect", token, 4)) text += '§'; + else if (!strncmp("copy", token, 4)) text += '©'; + else if (!strncmp("laquo", token, 5)) text += '«'; + else if (!strncmp("reg", token, 3)) text += '®'; + else if (!strncmp("acute", token, 5)) text += '´'; + else if (!strncmp("para", token, 4)) text += '¶'; + else if (!strncmp("raquo", token, 5)) text += '»'; - else if (!strncmp("Aacute", token, 6)) *to++ = 'Á'; - else if (!strncmp("Agrave", token, 6)) *to++ = 'À'; - else if (!strncmp("Acirc", token, 5)) *to++ = 'Â'; - else if (!strncmp("Auml", token, 4)) *to++ = 'Ä'; - else if (!strncmp("Atilde", token, 6)) *to++ = 'Ã'; - else if (!strncmp("Aring", token, 5)) *to++ = 'Å'; - else if (!strncmp("aacute", token, 6)) *to++ = 'á'; - else if (!strncmp("agrave", token, 6)) *to++ = 'à'; - else if (!strncmp("acirc", token, 5)) *to++ = 'â'; - else if (!strncmp("auml", token, 4)) *to++ = 'ä'; - else if (!strncmp("atilde", token, 6)) *to++ = 'ã'; - else if (!strncmp("aring", token, 5)) *to++ = 'å'; - else if (!strncmp("Eacute", token, 6)) *to++ = 'É'; - else if (!strncmp("Egrave", token, 6)) *to++ = 'È'; - else if (!strncmp("Ecirc", token, 5)) *to++ = 'Ê'; - else if (!strncmp("Euml", token, 4)) *to++ = 'Ë'; - else if (!strncmp("eacute", token, 6)) *to++ = 'é'; - else if (!strncmp("egrave", token, 6)) *to++ = 'è'; - else if (!strncmp("ecirc", token, 5)) *to++ = 'ê'; - else if (!strncmp("euml", token, 4)) *to++ = 'ë'; - else if (!strncmp("Iacute", token, 6)) *to++ = 'Í'; - else if (!strncmp("Igrave", token, 6)) *to++ = 'Ì'; - else if (!strncmp("Icirc", token, 5)) *to++ = 'Î'; - else if (!strncmp("Iuml", token, 4)) *to++ = 'Ï'; - else if (!strncmp("iacute", token, 6)) *to++ = 'í'; - else if (!strncmp("igrave", token, 6)) *to++ = 'ì'; - else if (!strncmp("icirc", token, 5)) *to++ = 'î'; - else if (!strncmp("iuml", token, 4)) *to++ = 'ï'; - else if (!strncmp("Oacute", token, 6)) *to++ = 'Ó'; - else if (!strncmp("Ograve", token, 6)) *to++ = 'Ò'; - else if (!strncmp("Ocirc", token, 5)) *to++ = 'Ô'; - else if (!strncmp("Ouml", token, 4)) *to++ = 'Ö'; - else if (!strncmp("Otilde", token, 6)) *to++ = 'Õ'; - else if (!strncmp("oacute", token, 6)) *to++ = 'ó'; - else if (!strncmp("ograve", token, 6)) *to++ = 'ò'; - else if (!strncmp("ocirc", token, 5)) *to++ = 'ô'; - else if (!strncmp("ouml", token, 4)) *to++ = 'ö'; - else if (!strncmp("otilde", token, 6)) *to++ = 'õ'; - else if (!strncmp("Uacute", token, 6)) *to++ = 'Ú'; - else if (!strncmp("Ugrave", token, 6)) *to++ = 'Ù'; - else if (!strncmp("Ucirc", token, 5)) *to++ = 'Û'; - else if (!strncmp("Uuml", token, 4)) *to++ = 'Ü'; - else if (!strncmp("uacute", token, 6)) *to++ = 'ú'; - else if (!strncmp("ugrave", token, 6)) *to++ = 'ù'; - else if (!strncmp("ucirc", token, 5)) *to++ = 'û'; - else if (!strncmp("uuml", token, 4)) *to++ = 'ü'; - else if (!strncmp("Yacute", token, 6)) *to++ = 'Ý'; - else if (!strncmp("yacute", token, 6)) *to++ = 'ý'; - else if (!strncmp("yuml", token, 4)) *to++ = 'ÿ'; + else if (!strncmp("Aacute", token, 6)) text += 'Á'; + else if (!strncmp("Agrave", token, 6)) text += 'À'; + else if (!strncmp("Acirc", token, 5)) text += 'Â'; + else if (!strncmp("Auml", token, 4)) text += 'Ä'; + else if (!strncmp("Atilde", token, 6)) text += 'Ã'; + else if (!strncmp("Aring", token, 5)) text += 'Å'; + else if (!strncmp("aacute", token, 6)) text += 'á'; + else if (!strncmp("agrave", token, 6)) text += 'à'; + else if (!strncmp("acirc", token, 5)) text += 'â'; + else if (!strncmp("auml", token, 4)) text += 'ä'; + else if (!strncmp("atilde", token, 6)) text += 'ã'; + else if (!strncmp("aring", token, 5)) text += 'å'; + else if (!strncmp("Eacute", token, 6)) text += 'É'; + else if (!strncmp("Egrave", token, 6)) text += 'È'; + else if (!strncmp("Ecirc", token, 5)) text += 'Ê'; + else if (!strncmp("Euml", token, 4)) text += 'Ë'; + else if (!strncmp("eacute", token, 6)) text += 'é'; + else if (!strncmp("egrave", token, 6)) text += 'è'; + else if (!strncmp("ecirc", token, 5)) text += 'ê'; + else if (!strncmp("euml", token, 4)) text += 'ë'; + else if (!strncmp("Iacute", token, 6)) text += 'Í'; + else if (!strncmp("Igrave", token, 6)) text += 'Ì'; + else if (!strncmp("Icirc", token, 5)) text += 'Î'; + else if (!strncmp("Iuml", token, 4)) text += 'Ï'; + else if (!strncmp("iacute", token, 6)) text += 'í'; + else if (!strncmp("igrave", token, 6)) text += 'ì'; + else if (!strncmp("icirc", token, 5)) text += 'î'; + else if (!strncmp("iuml", token, 4)) text += 'ï'; + else if (!strncmp("Oacute", token, 6)) text += 'Ó'; + else if (!strncmp("Ograve", token, 6)) text += 'Ò'; + else if (!strncmp("Ocirc", token, 5)) text += 'Ô'; + else if (!strncmp("Ouml", token, 4)) text += 'Ö'; + else if (!strncmp("Otilde", token, 6)) text += 'Õ'; + else if (!strncmp("oacute", token, 6)) text += 'ó'; + else if (!strncmp("ograve", token, 6)) text += 'ò'; + else if (!strncmp("ocirc", token, 5)) text += 'ô'; + else if (!strncmp("ouml", token, 4)) text += 'ö'; + else if (!strncmp("otilde", token, 6)) text += 'õ'; + else if (!strncmp("Uacute", token, 6)) text += 'Ú'; + else if (!strncmp("Ugrave", token, 6)) text += 'Ù'; + else if (!strncmp("Ucirc", token, 5)) text += 'Û'; + else if (!strncmp("Uuml", token, 4)) text += 'Ü'; + else if (!strncmp("uacute", token, 6)) text += 'ú'; + else if (!strncmp("ugrave", token, 6)) text += 'ù'; + else if (!strncmp("ucirc", token, 5)) text += 'û'; + else if (!strncmp("uuml", token, 4)) text += 'ü'; + else if (!strncmp("Yacute", token, 6)) text += 'Ý'; + else if (!strncmp("yacute", token, 6)) text += 'ý'; + else if (!strncmp("yuml", token, 4)) text += 'ÿ'; - else if (!strncmp("deg", token, 3)) *to++ = '°'; - else if (!strncmp("plusmn", token, 6)) *to++ = '±'; - else if (!strncmp("sup2", token, 4)) *to++ = '²'; - else if (!strncmp("sup3", token, 4)) *to++ = '³'; - else if (!strncmp("sup1", token, 4)) *to++ = '¹'; - else if (!strncmp("nbsp", token, 4)) *to++ = 'º'; - else if (!strncmp("pound", token, 5)) *to++ = '£'; - else if (!strncmp("cent", token, 4)) *to++ = '¢'; - else if (!strncmp("frac14", token, 6)) *to++ = '¼'; - else if (!strncmp("frac12", token, 6)) *to++ = '½'; - else if (!strncmp("frac34", token, 6)) *to++ = '¾'; - else if (!strncmp("iquest", token, 6)) *to++ = '¿'; - else if (!strncmp("iexcl", token, 5)) *to++ = '¡'; - else if (!strncmp("ETH", token, 3)) *to++ = 'Ð'; - else if (!strncmp("eth", token, 3)) *to++ = 'ð'; - else if (!strncmp("THORN", token, 5)) *to++ = 'Þ'; - else if (!strncmp("thorn", token, 5)) *to++ = 'þ'; - else if (!strncmp("AElig", token, 5)) *to++ = 'Æ'; - else if (!strncmp("aelig", token, 5)) *to++ = 'æ'; - else if (!strncmp("Oslash", token, 6)) *to++ = 'Ø'; - else if (!strncmp("curren", token, 6)) *to++ = '¤'; - else if (!strncmp("Ccedil", token, 6)) *to++ = 'Ç'; - else if (!strncmp("ccedil", token, 6)) *to++ = 'ç'; - else if (!strncmp("szlig", token, 5)) *to++ = 'ß'; - else if (!strncmp("Ntilde", token, 6)) *to++ = 'Ñ'; - else if (!strncmp("ntilde", token, 6)) *to++ = 'ñ'; - else if (!strncmp("yen", token, 3)) *to++ = '¥'; - else if (!strncmp("not", token, 3)) *to++ = '¬'; - else if (!strncmp("ordf", token, 4)) *to++ = 'ª'; - else if (!strncmp("uml", token, 3)) *to++ = '¨'; - else if (!strncmp("shy", token, 3)) *to++ = ''; - else if (!strncmp("macr", token, 4)) *to++ = '¯'; + else if (!strncmp("deg", token, 3)) text += '°'; + else if (!strncmp("plusmn", token, 6)) text += '±'; + else if (!strncmp("sup2", token, 4)) text += '²'; + else if (!strncmp("sup3", token, 4)) text += '³'; + else if (!strncmp("sup1", token, 4)) text += '¹'; + else if (!strncmp("nbsp", token, 4)) text += 'º'; + else if (!strncmp("pound", token, 5)) text += '£'; + else if (!strncmp("cent", token, 4)) text += '¢'; + else if (!strncmp("frac14", token, 6)) text += '¼'; + else if (!strncmp("frac12", token, 6)) text += '½'; + else if (!strncmp("frac34", token, 6)) text += '¾'; + else if (!strncmp("iquest", token, 6)) text += '¿'; + else if (!strncmp("iexcl", token, 5)) text += '¡'; + else if (!strncmp("ETH", token, 3)) text += 'Ð'; + else if (!strncmp("eth", token, 3)) text += 'ð'; + else if (!strncmp("THORN", token, 5)) text += 'Þ'; + else if (!strncmp("thorn", token, 5)) text += 'þ'; + else if (!strncmp("AElig", token, 5)) text += 'Æ'; + else if (!strncmp("aelig", token, 5)) text += 'æ'; + else if (!strncmp("Oslash", token, 6)) text += 'Ø'; + else if (!strncmp("curren", token, 6)) text += '¤'; + else if (!strncmp("Ccedil", token, 6)) text += 'Ç'; + else if (!strncmp("ccedil", token, 6)) text += 'ç'; + else if (!strncmp("szlig", token, 5)) text += 'ß'; + else if (!strncmp("Ntilde", token, 6)) text += 'Ñ'; + else if (!strncmp("ntilde", token, 6)) text += 'ñ'; + else if (!strncmp("yen", token, 3)) text += '¥'; + else if (!strncmp("not", token, 3)) text += '¬'; + else if (!strncmp("ordf", token, 4)) text += 'ª'; + else if (!strncmp("uml", token, 3)) text += '¨'; + else if (!strncmp("shy", token, 3)) text += ''; + else if (!strncmp("macr", token, 4)) text += '¯'; continue; } @@ -163,152 +160,95 @@ char ThMLGBF::ProcessText(char *text, int maxlen) intoken = false; // process desired tokens if (!strncmp(token, "sync type=\"Strongs\" value=\"", 27)) { - *to++ = '<'; - *to++ = 'W'; + text += "<W"; for (unsigned int i = 27; token[i] != '\"'; i++) - *to++ = token[i]; - *to++ = '>'; + text += token[i]; + text += '>'; continue; } if (!strncmp(token, "sync type=\"morph\" value=\"", 25)) { - *to++ = '<'; - *to++ = 'W'; - *to++ = 'T'; + text += "<WT"; for (unsigned int i = 25; token[i] != '\"'; i++) - *to++ = token[i]; - *to++ = '>'; + text += token[i]; + text += '>'; continue; } else if (!strncmp(token, "scripRef", 8)) { - *to++ = '<'; - *to++ = 'R'; - *to++ = 'X'; - *to++ = '>'; + text += "<RX>"; continue; } - else if (!strncmp(token, "/scripRef", 9)) { - *to++ = '<'; - *to++ = 'R'; - *to++ = 'x'; - *to++ = '>'; + else if (!strncmp(token, "/scripRef", 9)) { + text += "<Rx>"; continue; } else if (!strncmp(token, "note", 4)) { - *to++ = '<'; - *to++ = 'R'; - *to++ = 'F'; - *to++ = '>'; + text += "<RF>"; continue; } else if (!strncmp(token, "/note", 5)) { - *to++ = '<'; - *to++ = 'R'; - *to++ = 'f'; - *to++ = '>'; + text += "<Rf>"; continue; } else if (!strncmp(token, "sup", 3)) { - *to++ = '<'; - *to++ = 'F'; - *to++ = 'S'; - *to++ = '>'; + text += "<FS>"; } else if (!strncmp(token, "/sup", 4)) { - *to++ = '<'; - *to++ = 'F'; - *to++ = 's'; - *to++ = '>'; + text += "<Fs>"; } else if (!strnicmp(token, "font color=#ff0000", 18)) { - *to++ = '<'; - *to++ = 'F'; - *to++ = 'R'; - *to++ = '>'; + text += "<FR>"; continue; } else if (!strnicmp(token, "/font", 5)) { - *to++ = '<'; - *to++ = 'F'; - *to++ = 'r'; - *to++ = '>'; + text += "<Fr>"; continue; } else if (!strncmp(token, "div class=\"sechead\"", 19)) { - *to++ = '<'; - *to++ = 'T'; - *to++ = 'S'; - *to++ = '>'; + text += "<TS>"; sechead = true; continue; } else if (sechead && !strncmp(token, "/div", 19)) { - *to++ = '<'; - *to++ = 'T'; - *to++ = 's'; - *to++ = '>'; + text += "<Ts>"; sechead = false; continue; } else if (!strncmp(token, "div class=\"title\"", 19)) { - *to++ = '<'; - *to++ = 'T'; - *to++ = 'T'; - *to++ = '>'; + text += "<TT>"; title = true; continue; } else if (title && !strncmp(token, "/div", 19)) { - *to++ = '<'; - *to++ = 'T'; - *to++ = 't'; - *to++ = '>'; + text += "<Tt>"; title = false; continue; } else if (!strnicmp(token, "br", 2)) { - *to++ = '<'; - *to++ = 'C'; - *to++ = 'L'; - *to++ = '>'; + text += "<CL>"; continue; } else switch(*token) { case 'I': // font tags case 'i': - *to++ = '<'; - *to++ = 'F'; - *to++ = 'I'; - *to++ = '>'; + text += "<FI>"; continue; case 'B': // bold start case 'b': - *to++ = '<'; - *to++ = 'F'; - *to++ = 'B'; - *to++ = '>'; + text += "<FB>"; continue; case '/': switch(token[1]) { case 'P': case 'p': - *to++ = '<'; - *to++ = 'C'; - *to++ = 'M'; - *to++ = '>'; + text += "<CM>"; continue; case 'I': case 'i': // italic end - *to++ = '<'; - *to++ = 'F'; - *to++ = 'i'; - *to++ = '>'; + text += "<Fi>"; continue; case 'B': // bold start case 'b': - *to++ = '<'; - *to++ = 'F'; - *to++ = 'b'; - *to++ = '>'; + text += "<Fb>"; continue; } } @@ -319,12 +259,25 @@ char ThMLGBF::ProcessText(char *text, int maxlen) token[tokpos++] = *from; token[tokpos+2] = 0; } - else *to++ = *from; + else text += *from; } - *to++ = 0; - *to = 0; + + orig = text; + from = orig.c_str(); + for (text = ""; *from; from++) { //loop to remove extra spaces + if ((strchr(" \t\n\r", *from))) { + while (*(from+1) && (strchr(" \t\n\r", *(from+1)))) { + from++; + } + text += " "; + } + else { + text += *from; + } + } + text += (char)0; + return 0; } - - +SWORD_NAMESPACE_END diff --git a/src/modules/filters/thmlheadings.cpp b/src/modules/filters/thmlheadings.cpp index 00b8a23..bc764bb 100644 --- a/src/modules/filters/thmlheadings.cpp +++ b/src/modules/filters/thmlheadings.cpp @@ -1,107 +1,96 @@ /****************************************************************************** * - * thmlheadings - SWFilter decendant to hide or show headings + * thmlheadings - SWFilter descendant to hide or show headings * in a ThML module. */ #include <stdlib.h> -#include <string.h> #include <thmlheadings.h> +#include <utilxml.h> + +#include <iostream> + #ifndef __GNUC__ #else #include <unixstr.h> #endif +SWORD_NAMESPACE_START -const char ThMLHeadings::on[] = "On"; -const char ThMLHeadings::off[] = "Off"; -const char ThMLHeadings::optName[] = "Headings"; -const char ThMLHeadings::optTip[] = "Toggles Headings On and Off if they exist"; +const char oName[] = "Headings"; +const char oTip[] = "Toggles Headings On and Off if they exist"; +const SWBuf choices[3] = {"On", "Off", ""}; +const StringList oValues(&choices[0], &choices[2]); -ThMLHeadings::ThMLHeadings() { - option = false; - options.push_back(on); - options.push_back(off); +ThMLHeadings::ThMLHeadings() : SWOptionFilter(oName, oTip, &oValues) { + setOptionValue("Off"); } ThMLHeadings::~ThMLHeadings() { } -void ThMLHeadings::setOptionValue(const char *ival) -{ - option = (!stricmp(ival, on)); -} - -const char *ThMLHeadings::getOptionValue() -{ - return (option) ? on:off; -} -char ThMLHeadings::ProcessText(char *text, int maxlen, const SWKey *key, const SWModule *module) -{ +char ThMLHeadings::processText(SWBuf &text, const SWKey *key, const SWModule *module) { if (!option) { // if we don't want headings - char *to, *from, token[2048]; // cheese. Fix. - int tokpos = 0; + SWBuf token; bool intoken = false; - int len; bool hide = false; - len = strlen(text) + 1; // shift string to right of buffer - if (len < maxlen) { - memmove(&text[maxlen - len], text, len); - from = &text[maxlen - len]; - } - else from = text; // ------------------------------- + SWBuf orig = text; + const char *from = orig.c_str(); - for (to = text; *from; from++) { + for (text = ""; *from; from++) { if (*from == '<') { intoken = true; - tokpos = 0; - token[0] = 0; - token[1] = 0; - token[2] = 0; + token = ""; continue; } if (*from == '>') { // process tokens intoken = false; - if (!strnicmp(token, "div class=\"sechead\"", 19)) { + + XMLTag tag(token); + + if (!stricmp(tag.getName(), "div")) { //we only want a div tag + //std::cout << tag.toString() << " " << tag.isEndTag() << std::endl; + + if (tag.getAttribute("class") && !stricmp(tag.getAttribute("class"), "sechead")) { hide = true; continue; - } - if (!strnicmp(token, "div class=\"title\"", 17)) { + } + + if (tag.getAttribute("class") && !stricmp(tag.getAttribute("class"), "title")) { hide = true; continue; - } - else if (hide && !strnicmp(token, "/div", 4)) { - hide = false; - continue; + } + + if (hide && tag.isEndTag()) { + hide = false; + continue; + } + } // if not a heading token, keep token in text if (!hide) { - *to++ = '<'; - for (char *tok = token; *tok; tok++) - *to++ = *tok; - *to++ = '>'; + text += '<'; + text += token; + text += '>'; } continue; } - if (intoken) { - if (tokpos < 2045) - token[tokpos++] = *from; - token[tokpos+2] = 0; + + if (intoken) { //copy token + token += *from; } - else { - if (!hide) { - *to++ = *from; - } + else if (!hide) { //copy text which is not inside a token + text += *from; } } - *to++ = 0; - *to = 0; } return 0; } + +SWORD_NAMESPACE_END diff --git a/src/modules/filters/thmlhtml.cpp b/src/modules/filters/thmlhtml.cpp index 9cb8679..40b3320 100644 --- a/src/modules/filters/thmlhtml.cpp +++ b/src/modules/filters/thmlhtml.cpp @@ -15,10 +15,11 @@ ***************************************************************************/ #include <stdlib.h> -#include <string.h> #include <thmlhtml.h> #include <swmodule.h> +#include <utilxml.h> +SWORD_NAMESPACE_START ThMLHTML::ThMLHTML() { setTokenStart("<"); @@ -128,84 +129,98 @@ ThMLHTML::ThMLHTML() { */ setTokenCaseSensitive(true); - addTokenSubstitute("/scripRef", " </a>"); addTokenSubstitute("note", " <font color=\"#800000\"><small>("); addTokenSubstitute("/note", ")</small></font> "); } -bool ThMLHTML::handleToken(char **buf, const char *token, DualStringMap &userData) { - if (!substituteToken(buf, token)) { - // manually process if it wasn't a simple substitution - if (!strncmp(token, "sync type=\"Strongs\" value=\"", 27)) { - if (token[27] == 'H' || token[27] == 'G' || token[27] == 'A') { - pushString(buf, "<small><em>"); - for (const char *tok = token + 5; *tok; tok++) - if(*tok != '\"') - *(*buf)++ = *tok; - pushString(buf, "</em></small>"); - } - else if (token[27] == 'T') { - pushString(buf, "<small><i>"); - for (unsigned int i = 29; token[i] != '\"'; i++) - *(*buf)++ = token[i]; - pushString(buf, "</i></small>"); - } - } - else if (!strncmp(token, "sync type=\"morph\" value=\"", 25)) { - pushString(buf, "<small><em>"); - for (unsigned int i = 25; token[i] != '\"'; i++) - *(*buf)++ = token[i]; - pushString(buf, "</em></small>"); - } - else if (!strncmp(token, "sync type=\"lemma\" value=\"", 25)) { - pushString(buf, "<small><em>("); - for (unsigned int i = 25; token[i] != '\"'; i++) - *(*buf)++ = token[i]; - pushString(buf, ")</em></small>"); +bool ThMLHTML::handleToken(SWBuf &buf, const char *token, BasicFilterUserData *userData) { + if (!substituteToken(buf, token)) { // manually process if it wasn't a simple substitution + MyUserData *u = (MyUserData *)userData; + XMLTag tag(token); + if (!strcmp(tag.getName(), "sync")) { + if (tag.getAttribute("type") && tag.getAttribute("value") && !strcmp(tag.getAttribute("type"), "Strongs")) { + const char* value = tag.getAttribute("value"); + if (*value == 'H' || *value == 'G' || *value == 'A') { + value++; + buf += "<small><em>"; + buf += value; + buf += "</em></small>"; + } + else if (*value == 'T') { + value += 2; + + buf += "<small><i>"; + buf += value; + buf += "</i></small>"; + } + } + else if (tag.getAttribute("type") && tag.getAttribute("value") && !strcmp(tag.getAttribute("type"), "morph")) { + buf += "<small><em>"; + buf += tag.getAttribute("value"); + buf += "</em></small>"; + } + else if (tag.getAttribute("type") && tag.getAttribute("value") && !strcmp(tag.getAttribute("type"), "lemma")) { + buf += "<small><em>("; + buf += tag.getAttribute("value"); + buf += ")</em></small>"; + } } - else if (!strncmp(token, "scripRef", 8)) { - pushString(buf, "<a href=\""); - for (const char *tok = token + 9; *tok; tok++) - if(*tok != '\"') - *(*buf)++ = *tok; - *(*buf)++ = '\"'; - *(*buf)++ = '>'; + else if (!strcmp(tag.getName(), "div")) { + if (tag.isEndTag() && (u->SecHead)) { + buf += "</i></b><br />"; + u->SecHead = false; + } + else if (tag.getAttribute("class")) { + if (!strcmp(tag.getAttribute("class"), "sechead")) { + u->SecHead = true; + buf += "<br /><b><i>"; + } + else if (!strcmp(tag.getAttribute("class"), "title")) { + u->SecHead = true; + buf += "<br /><b><i>"; + } + } } - else if (!strncmp(token, "img ", 4)) { + else if (!strcmp(tag.getName(), "img")) { const char *src = strstr(token, "src"); if (!src) // assert we have a src attribute return false; - *(*buf)++ = '<'; + buf += '<'; for (const char *c = token; *c; c++) { if (c == src) { for (;((*c) && (*c != '"')); c++) - *(*buf)++ = *c; + buf += *c; if (!*c) { c--; continue; } - *(*buf)++ = '"'; + buf += '"'; if (*(c+1) == '/') { - pushString(buf, "file:"); - pushString(buf, module->getConfigEntry("AbsoluteDataPath")); - if (*((*buf)-1) == '/') + buf += "file:"; + buf += userData->module->getConfigEntry("AbsoluteDataPath"); + if (buf[buf.length()-2] == '/') c++; // skip '/' } continue; } - *(*buf)++ = *c; + buf += *c; } - *(*buf)++ = '>'; + buf += '>'; } - else if(!strncmp(token, "note", 4)) { - pushString(buf, " <font color=\"#800000\"><small>("); - } + else if (!strcmp(tag.getName(), "scripRef")) { //do nothing with scrip refs, we leave them out + } else { - return false; // we still didn't handle token + buf += '<'; + buf += token; + buf += '>'; + +// return false; // we still didn't handle token } } return true; } + +SWORD_NAMESPACE_END diff --git a/src/modules/filters/thmlhtmlhref.cpp b/src/modules/filters/thmlhtmlhref.cpp index ce7e3fd..b94b8ae 100644 --- a/src/modules/filters/thmlhtmlhref.cpp +++ b/src/modules/filters/thmlhtmlhref.cpp @@ -15,255 +15,195 @@ ***************************************************************************/ #include <stdlib.h> -#include <string.h> #include <thmlhtmlhref.h> #include <swmodule.h> +#include <utilxml.h> +#include <versekey.h> +SWORD_NAMESPACE_START -ThMLHTMLHREF::ThMLHTMLHREF() { - setTokenStart("<"); - setTokenEnd(">"); -/* - setEscapeStart("&"); - setEscapeEnd(";"); - setEscapeStringCaseSensitive(true); +ThMLHTMLHREF::MyUserData::MyUserData(const SWModule *module, const SWKey *key) : BasicFilterUserData(module, key) { + if (module) { + version = module->Name(); + BiblicalText = (!strcmp(module->Type(), "Biblical Texts")); + } +} - addEscapeStringSubstitute("nbsp", " "); - addEscapeStringSubstitute("quot", "\""); - addEscapeStringSubstitute("amp", "&"); - addEscapeStringSubstitute("lt", "<"); - addEscapeStringSubstitute("gt", ">"); - addEscapeStringSubstitute("brvbar", "|"); - addEscapeStringSubstitute("sect", "§"); - addEscapeStringSubstitute("copy", "©"); - addEscapeStringSubstitute("laquo", "«"); - addEscapeStringSubstitute("reg", "®"); - addEscapeStringSubstitute("acute", "´"); - addEscapeStringSubstitute("para", "¶"); - addEscapeStringSubstitute("raquo", "»"); - addEscapeStringSubstitute("Aacute", "Á"); - addEscapeStringSubstitute("Agrave", "À"); - addEscapeStringSubstitute("Acirc", "Â"); - addEscapeStringSubstitute("Auml", "Ä"); - addEscapeStringSubstitute("Atilde", "Ã"); - addEscapeStringSubstitute("Aring", "Å"); - addEscapeStringSubstitute("aacute", "á"); - addEscapeStringSubstitute("agrave", "à"); - addEscapeStringSubstitute("acirc", "â"); - addEscapeStringSubstitute("auml", "ä"); - addEscapeStringSubstitute("atilde", "ã"); - addEscapeStringSubstitute("aring", "å"); - addEscapeStringSubstitute("Eacute", "É"); - addEscapeStringSubstitute("Egrave", "È"); - addEscapeStringSubstitute("Ecirc", "Ê"); - addEscapeStringSubstitute("Euml", "Ë"); - addEscapeStringSubstitute("eacute", "é"); - addEscapeStringSubstitute("egrave", "è"); - addEscapeStringSubstitute("ecirc", "ê"); - addEscapeStringSubstitute("euml", "ë"); - addEscapeStringSubstitute("Iacute", "Í"); - addEscapeStringSubstitute("Igrave", "Ì"); - addEscapeStringSubstitute("Icirc", "Î"); - addEscapeStringSubstitute("Iuml", "Ï"); - addEscapeStringSubstitute("iacute", "í"); - addEscapeStringSubstitute("igrave", "ì"); - addEscapeStringSubstitute("icirc", "î"); - addEscapeStringSubstitute("iuml", "ï"); - addEscapeStringSubstitute("Oacute", "Ó"); - addEscapeStringSubstitute("Ograve", "Ò"); - addEscapeStringSubstitute("Ocirc", "Ô"); - addEscapeStringSubstitute("Ouml", "Ö"); - addEscapeStringSubstitute("Otilde", "Õ"); - addEscapeStringSubstitute("oacute", "ó"); - addEscapeStringSubstitute("ograve", "ò"); - addEscapeStringSubstitute("ocirc", "ô"); - addEscapeStringSubstitute("ouml", "ö"); - addEscapeStringSubstitute("otilde", "õ"); - addEscapeStringSubstitute("Uacute", "Ú"); - addEscapeStringSubstitute("Ugrave", "Ù"); - addEscapeStringSubstitute("Ucirc", "Û"); - addEscapeStringSubstitute("Uuml", "Ü"); - addEscapeStringSubstitute("uacute", "ú"); - addEscapeStringSubstitute("ugrave", "ù"); - addEscapeStringSubstitute("ucirc", "û"); - addEscapeStringSubstitute("uuml", "ü"); - addEscapeStringSubstitute("Yacute", "Ý"); - addEscapeStringSubstitute("yacute", "ý"); - addEscapeStringSubstitute("yuml", "ÿ"); +ThMLHTMLHREF::ThMLHTMLHREF() { + setTokenStart("<"); + setTokenEnd(">"); - addEscapeStringSubstitute("deg", "°"); - addEscapeStringSubstitute("plusmn", "±"); - addEscapeStringSubstitute("sup2", "²"); - addEscapeStringSubstitute("sup3", "³"); - addEscapeStringSubstitute("sup1", "¹"); - addEscapeStringSubstitute("nbsp", "º"); - addEscapeStringSubstitute("pound", "£"); - addEscapeStringSubstitute("cent", "¢"); - addEscapeStringSubstitute("frac14", "¼"); - addEscapeStringSubstitute("frac12", "½"); - addEscapeStringSubstitute("frac34", "¾"); - addEscapeStringSubstitute("iquest", "¿"); - addEscapeStringSubstitute("iexcl", "¡"); - addEscapeStringSubstitute("ETH", "Ð"); - addEscapeStringSubstitute("eth", "ð"); - addEscapeStringSubstitute("THORN", "Þ"); - addEscapeStringSubstitute("thorn", "þ"); - addEscapeStringSubstitute("AElig", "Æ"); - addEscapeStringSubstitute("aelig", "æ"); - addEscapeStringSubstitute("Oslash", "Ø"); - addEscapeStringSubstitute("curren", "¤"); - addEscapeStringSubstitute("Ccedil", "Ç"); - addEscapeStringSubstitute("ccedil", "ç"); - addEscapeStringSubstitute("szlig", "ß"); - addEscapeStringSubstitute("Ntilde", "Ñ"); - addEscapeStringSubstitute("ntilde", "ñ"); - addEscapeStringSubstitute("yen", "¥"); - addEscapeStringSubstitute("not", "¬"); - addEscapeStringSubstitute("ordf", "ª"); - addEscapeStringSubstitute("uml", "¨"); - addEscapeStringSubstitute("shy", ""); - addEscapeStringSubstitute("macr", "¯"); -*/ setTokenCaseSensitive(true); - - addTokenSubstitute("note", " <font color=\"#800000\"><small>("); - addTokenSubstitute("/note", ")</small></font> "); + addTokenSubstitute("scripture", "<i> "); addTokenSubstitute("/scripture", "</i> "); } -bool ThMLHTMLHREF::handleToken(char **buf, const char *token, DualStringMap &userData) { +bool ThMLHTMLHREF::handleToken(SWBuf &buf, const char *token, BasicFilterUserData *userData) { const char *tok; - if (!substituteToken(buf, token)) { - // manually process if it wasn't a simple substitution - if (!strncmp(token, "sync ", 5)) { - pushString(buf, "<a href=\""); - for (tok = token + 5; *(tok+1); tok++) - if(*tok != '\"') - *(*buf)++ = *tok; - *(*buf)++ = '\"'; - *(*buf)++ = '>'; - - //scan for value and add it to the buffer - for (tok = token + 5; *tok; tok++) { - if (!strncmp(tok, "value=\"", 7)) { - tok += 7; - for (;*tok != '\"'; tok++) - *(*buf)++ = *tok; - break; - } + if (!substituteToken(buf, token)) { // manually process if it wasn't a simple substitution + MyUserData *u = (MyUserData *)userData; + + XMLTag tag(token); + if ((!tag.isEndTag()) && (!tag.isEmpty())) + u->startTag = tag; + + if (tag.getName() && !strcmp(tag.getName(), "sync")) { + SWBuf value = tag.getAttribute("value"); + if (tag.getAttribute("type") && !strcmp(tag.getAttribute("type"), "morph")) { //> + buf += "<small><em>(<a href=\""; + buf += "type="; + buf += tag.getAttribute("type"); + + //const char* value = tag.getAttribute("value"); + buf += " value="; + buf += (value.length()) ? value.c_str() : ""; + buf += "\">"; + buf += (value.length()) ? value.c_str() : ""; + buf += "</a>) </em></small>"; + } + else if (tag.getAttribute("type") && !strcmp(tag.getAttribute("type"), "Strongs")) { + buf += "<small><em><<a href=\""; + buf += "type="; + buf += tag.getAttribute("type"); + + //const char* value = tag.getAttribute("value"); + buf += " value="; + buf += (value.length()) ? value.c_str() : ""; + buf += "\">"; + value<<1; + buf += (value.length()) ? value.c_str() : ""; + buf += "</a>> </em></small>"; + } + else if (tag.getAttribute("type") && !strcmp(tag.getAttribute("type"), "Dict")) { + if (!tag.isEndTag()) + buf += "<b>"; + else buf += "</b>"; } - pushString(buf, "</a>"); + } - - else if (!strncmp(token, "scripture ", 10)) { - userData["inscriptRef"] = "true"; - pushString(buf, "<i>"); - } - - else if (!strncmp(token, "scripRef p", 10) || !strncmp(token, "scripRef v", 10)) { - userData["inscriptRef"] = "true"; - pushString(buf, "<a href=\""); - for (const char *tok = token + 9; *(tok+1); tok++) - if(*tok != '\"') - *(*buf)++ = *tok; - *(*buf)++ = '\"'; - *(*buf)++ = '>'; - } - - // we're starting a scripRef like "<scripRef>John 3:16</scripRef>" - else if (!strcmp(token, "scripRef")) { - userData["inscriptRef"] = "false"; - // let's stop text from going to output - userData["suspendTextPassThru"] = "true"; + // <note> tag + else if (!strcmp(tag.getName(), "note")) { + if (!tag.isEndTag()) { + if (!tag.isEmpty()) { + SWBuf type = tag.getAttribute("type"); + SWBuf footnoteNumber = tag.getAttribute("swordFootnote"); + VerseKey *vkey; + // see if we have a VerseKey * or descendant + try { + vkey = SWDYNAMIC_CAST(VerseKey, u->key); + } + catch ( ... ) { } + if (vkey) { + // leave this special osis type in for crossReference notes types? Might thml use this some day? Doesn't hurt. + char ch = ((tag.getAttribute("type") && ((!strcmp(tag.getAttribute("type"), "crossReference")) || (!strcmp(tag.getAttribute("type"), "x-cross-ref")))) ? 'x':'n'); + buf.appendFormatted("<a href=\"noteID=%s.%c.%s\"><small><sup>*%c</sup></small></a> ", vkey->getText(), ch, footnoteNumber.c_str(), ch); + } + u->suspendTextPassThru = true; + } + } + if (tag.isEndTag()) { + u->suspendTextPassThru = false; + } } - - // we've ended a scripRef - else if (!strcmp(token, "/scripRef")) { - if (userData["inscriptRef"] == "true") { // like "<scripRef passage="John 3:16">John 3:16</scripRef>" - userData["inscriptRef"] = "false"; - pushString(buf, "</a>"); + // <scripRef> tag + else if (!strcmp(tag.getName(), "scripRef")) { + if (!tag.isEndTag()) { + if (!tag.isEmpty()) { + u->suspendTextPassThru = true; + } } - - else { // like "<scripRef>John 3:16</scripRef>" - pushString(buf, "<a href=\"passage="); - //char *strbuf = (char *)userData["lastTextNode"].c_str(); - pushString(buf, userData["lastTextNode"].c_str()); - *(*buf)++ = '\"'; - *(*buf)++ = '>'; - pushString(buf, userData["lastTextNode"].c_str()); + if (tag.isEndTag()) { // </scripRef> + if (!u->BiblicalText) { + SWBuf refList = u->startTag.getAttribute("passage"); + if (!refList.length()) + refList = u->lastTextNode; + SWBuf version = tag.getAttribute("version"); + buf += " <a href=\""; + if (version.length()) { + buf += "version="; + buf += version; + buf += " "; + } + buf += "passage="; + buf += refList.c_str(); + buf += "\">"; + buf += u->lastTextNode.c_str(); + buf += "</a> "; + } + else { + SWBuf footnoteNumber = u->startTag.getAttribute("swordFootnote"); + VerseKey *vkey; + // see if we have a VerseKey * or descendant + try { + vkey = SWDYNAMIC_CAST(VerseKey, u->key); + } + catch ( ... ) {} + if (vkey) { + // leave this special osis type in for crossReference notes types? Might thml use this some day? Doesn't hurt. + buf.appendFormatted("<a href=\"noteID=%s.x.%s\"><small><sup>*x</sup></small></a> ", vkey->getText(), footnoteNumber.c_str()); + } + } + // let's let text resume to output again - userData["suspendTextPassThru"] = "false"; - pushString(buf, "</a>"); + u->suspendTextPassThru = false; } } - - else if (!strncmp(token, "div class=\"sechead\"", 19)) { - userData["SecHead"] = "true"; - pushString(buf, "<br /><b><i>"); - } - else if (!strncmp(token, "div class=\"title\"", 19)) { - userData["SecHead"] = "true"; - pushString(buf, "<br /><b><i>"); - } - else if (!strncmp(token, "/div", 4)) { - if (userData["SecHead"] == "true") { - pushString(buf, "</i></b><br />"); - userData["SecHead"] = "false"; + else if (tag.getName() && !strcmp(tag.getName(), "div")) { + if (tag.isEndTag() && u->SecHead) { + buf += "</i></b><br />"; + u->SecHead = false; + } + else if (tag.getAttribute("class")) { + if (!stricmp(tag.getAttribute("class"), "sechead")) { + u->SecHead = true; + buf += "<br /><b><i>"; + } + else if (!stricmp(tag.getAttribute("class"), "title")) { + u->SecHead = true; + buf += "<br /><b><i>"; + } } } - - else if (!strncmp(token, "sync type=\"Strongs\" value=\"T", 28)) { - pushString(buf, "<a href=\""); - for (tok = token + 5; *(tok+1); tok++) - if(*tok != '\"') - *(*buf)++ = *tok; - *(*buf)++ = '\"'; - *(*buf)++ = '>'; - for (tok = token + 29; *(tok+2); tok++) - if(*tok != '\"') - *(*buf)++ = *tok; - pushString(buf, "</a>"); - } - else if (!strncmp(token, "img ", 4)) { + else if (tag.getName() && (!strcmp(tag.getName(), "img") || !strcmp(tag.getName(), "image"))) { const char *src = strstr(token, "src"); if (!src) // assert we have a src attribute return false; - *(*buf)++ = '<'; + buf += '<'; for (const char *c = token; *c; c++) { if (c == src) { for (;((*c) && (*c != '"')); c++) - *(*buf)++ = *c; + buf += *c; if (!*c) { c--; continue; } - *(*buf)++ = '"'; + buf += '"'; if (*(c+1) == '/') { - pushString(buf, "file:"); - pushString(buf, module->getConfigEntry("AbsoluteDataPath")); - if (*((*buf)-1) == '/') + buf += "file:"; + buf += userData->module->getConfigEntry("AbsoluteDataPath"); + if (buf[buf.length()-2] == '/') c++; // skip '/' } continue; } - *(*buf)++ = *c; + buf += *c; } - *(*buf)++ = '>'; + buf += '>'; } - else if (!strncmp(token, "note", 4)) { - pushString(buf, " <small><font color=\"#800000\">("); - } else { - *(*buf)++ = '<'; - for (const char *tok = token; *tok; tok++) - *(*buf)++ = *tok; - *(*buf)++ = '>'; + buf += '<'; + /*for (const char *tok = token; *tok; tok++) + buf += *tok;*/ + buf += token; + buf += '>'; //return false; // we still didn't handle token } } return true; } + +SWORD_NAMESPACE_END diff --git a/src/modules/filters/thmllemma.cpp b/src/modules/filters/thmllemma.cpp index 33856db..02939df 100644 --- a/src/modules/filters/thmllemma.cpp +++ b/src/modules/filters/thmllemma.cpp @@ -1,97 +1,69 @@ /****************************************************************************** * - * thmllemma - SWFilter decendant to hide or show lemmas + * thmllemma - SWFilter descendant to hide or show lemmas * in a ThML module. */ #include <stdlib.h> -#include <string.h> #include <thmllemma.h> #ifndef __GNUC__ #else #include <unixstr.h> #endif +SWORD_NAMESPACE_START -const char ThMLLemma::on[] = "On"; -const char ThMLLemma::off[] = "Off"; -const char ThMLLemma::optName[] = "Lemmas"; -const char ThMLLemma::optTip[] = "Toggles Lemmas On and Off if they exist"; +const char oName[] = "Lemmas"; +const char oTip[] = "Toggles Lemmas On and Off if they exist"; -ThMLLemma::ThMLLemma() { - option = false; - options.push_back(on); - options.push_back(off); +const SWBuf choices[3] = {"On", "Off", ""}; +const StringList oValues(&choices[0], &choices[2]); + +ThMLLemma::ThMLLemma() : SWOptionFilter(oName, oTip, &oValues) { + setOptionValue("Off"); } ThMLLemma::~ThMLLemma() { } -void ThMLLemma::setOptionValue(const char *ival) -{ - option = (!stricmp(ival, on)); -} - -const char *ThMLLemma::getOptionValue() -{ - return (option) ? on:off; -} -char ThMLLemma::ProcessText(char *text, int maxlen, const SWKey *key, const SWModule *module) -{ +char ThMLLemma::processText(SWBuf &text, const SWKey *key, const SWModule *module) { if (!option) { // if we don't want lemmas - char *to, *from, token[2048]; // cheese. Fix. - int tokpos = 0; bool intoken = false; - int len; - bool lastspace = false; - len = strlen(text) + 1; // shift string to right of buffer - if (len < maxlen) { - memmove(&text[maxlen - len], text, len); - from = &text[maxlen - len]; - } - else from = text; // ------------------------------- - - for (to = text; *from; from++) { + SWBuf token; + SWBuf orig = text; + const char *from = orig.c_str(); + for (text = ""; *from; from++) { if (*from == '<') { intoken = true; - tokpos = 0; - token[0] = 0; - token[1] = 0; - token[2] = 0; + token = ""; continue; } - if (*from == '>') { // process tokens + else if (*from == '>') { // process tokens intoken = false; - if (!strnicmp(token, "sync type=\"lemma\" ", 18)) { // Lemma - if ((from[1] == ' ') || (from[1] == ',') || (from[1] == ';') || (from[1] == '.') || (from[1] == '?') || (from[1] == '!') || (from[1] == ')') || (from[1] == '\'') || (from[1] == '\"')) { - if (lastspace) - to--; - } - continue; + if (!strnicmp(token.c_str(), "sync", 4) && strstr(token.c_str(), " type=\"lemma\"")) { // Lemma + continue; } + // if not a lemma token, keep token in text - *to++ = '<'; - for (char *tok = token; *tok; tok++) - *to++ = *tok; - *to++ = '>'; + text += '<'; + text.append(token); + text += '>'; continue; } + if (intoken) { - if (tokpos < 2045) - token[tokpos++] = *from; - token[tokpos+2] = 0; - } - else { - *to++ = *from; - lastspace = (*from == ' '); + token += *from; } + else { + text += *from; + } } - *to++ = 0; - *to = 0; } return 0; } + +SWORD_NAMESPACE_END diff --git a/src/modules/filters/thmlmorph.cpp b/src/modules/filters/thmlmorph.cpp index f95bede..606ae7e 100644 --- a/src/modules/filters/thmlmorph.cpp +++ b/src/modules/filters/thmlmorph.cpp @@ -1,98 +1,69 @@ /****************************************************************************** * - * thmlmorph - SWFilter decendant to hide or show morph tags + * thmlmorph - SWFilter descendant to hide or show morph tags * in a ThML module. */ #include <stdlib.h> -#include <string.h> #include <thmlmorph.h> #ifndef __GNUC__ #else #include <unixstr.h> #endif +SWORD_NAMESPACE_START -const char ThMLMorph::on[] = "On"; -const char ThMLMorph::off[] = "Off"; -const char ThMLMorph::optName[] = "Morphological Tags"; -const char ThMLMorph::optTip[] = "Toggles Morphological Tags On and Off if they exist"; +const char oName[] = "Morphological Tags"; +const char oTip[] = "Toggles Morphological Tags On and Off if they exist"; +const SWBuf choices[3] = {"On", "Off", ""}; +const StringList oValues(&choices[0], &choices[2]); -ThMLMorph::ThMLMorph() { - option = false; - options.push_back(on); - options.push_back(off); +ThMLMorph::ThMLMorph() : SWOptionFilter(oName, oTip, &oValues) { + setOptionValue("Off"); } ThMLMorph::~ThMLMorph() { } -void ThMLMorph::setOptionValue(const char *ival) -{ - option = (!stricmp(ival, on)); -} - -const char *ThMLMorph::getOptionValue() -{ - return (option) ? on:off; -} -char ThMLMorph::ProcessText(char *text, int maxlen, const SWKey *key, const SWModule *module) -{ +char ThMLMorph::processText(SWBuf &text, const SWKey *key, const SWModule *module) { if (!option) { // if we don't want morph tags - char *to, *from, token[2048]; // cheese. Fix. - int tokpos = 0; bool intoken = false; - int len; - bool lastspace = false; - - len = strlen(text) + 1; // shift string to right of buffer - if (len < maxlen) { - memmove(&text[maxlen - len], text, len); - from = &text[maxlen - len]; - } - else from = text; // ------------------------------- - for (to = text; *from; from++) { + SWBuf token; + SWBuf orig = text; + const char *from = orig.c_str(); + for (text = ""; *from; from++) { if (*from == '<') { intoken = true; - tokpos = 0; - token[0] = 0; - token[1] = 0; - token[2] = 0; + token = ""; continue; } if (*from == '>') { // process tokens intoken = false; - if (!strnicmp(token, "sync type=\"morph\" ", 18)) { // Morph - if ((from[1] == ' ') || (from[1] == ',') || (from[1] == ';') || (from[1] == '.') || (from[1] == '?') || (from[1] == '!') || (from[1] == ')') || (from[1] == '\'') || (from[1] == '\"')) { - if (lastspace) - to--; - } + if (!strncmp(token.c_str(), "sync ", 5) && strstr(token.c_str(), "type=\"morph\"")) { // Morph continue; } + // if not a morph tag token, keep token in text - *to++ = '<'; - for (char *tok = token; *tok; tok++) - *to++ = *tok; - *to++ = '>'; + text += '<'; + text += token; + text += '>'; continue; } + if (intoken) { - if (tokpos < 2045) - token[tokpos++] = *from; - token[tokpos+2] = 0; + token += *from; } else { - *to++ = *from; - lastspace = (*from == ' '); + text += *from; } } - *to++ = 0; - *to = 0; } return 0; } + +SWORD_NAMESPACE_END diff --git a/src/modules/filters/thmlolb.cpp b/src/modules/filters/thmlolb.cpp deleted file mode 100644 index 2b31fab..0000000 --- a/src/modules/filters/thmlolb.cpp +++ /dev/null @@ -1,243 +0,0 @@ -/*************************************************************************** - thmlolb.cpp - ThML to OLB filter - ------------------- - begin : 2001-05-10 - copyright : 2001 by CrossWire Bible Society - ***************************************************************************/ - -/*************************************************************************** - * * - * This program is free software; you can redistribute it and/or modify * - * it under the terms of the GNU General Public License as published by * - * the Free Software Foundation; either version 2 of the License, or * - * (at your option) any later version. * - * * - ***************************************************************************/ - -#include <stdlib.h> -#include <string.h> -#include <thmlolb.h> - - -ThMLOLB::ThMLOLB() -{ -} - - -char ThMLOLB::ProcessText(char *text, int maxlen) -{ - char *to, *from, token[2048]; - int tokpos = 0; - bool intoken = false; - int len; - bool ampersand = false; - int i; - - len = strlen(text) + 1; // shift string to right of buffer - if (len < maxlen) { - memmove(&text[maxlen - len], text, len); - from = &text[maxlen - len]; - } - else from = text; // ------------------------------- - for (to = text; *from; from++) - { - if (*from == '<') { - intoken = true; - tokpos = 0; - memset(token, 0, 2048); - ampersand = false; - continue; - } - else if (*from == '&') { - intoken = true; - tokpos = 0; - memset(token, 0, 2048); - ampersand = true; - continue; - } - if (*from == ';' && ampersand) { - intoken = false; - - if (!strncmp("nbsp", token, 4)) *to++ = ' '; - else if (!strncmp("quot", token, 4)) *to++ = '"'; - else if (!strncmp("amp", token, 3)) *to++ = '&'; - else if (!strncmp("lt", token, 2)) *to++ = '<'; - else if (!strncmp("gt", token, 2)) *to++ = '>'; - else if (!strncmp("brvbar", token, 6)) *to++ = '|'; - else if (!strncmp("sect", token, 4)) *to++ = '§'; - else if (!strncmp("copy", token, 4)) *to++ = '©'; - else if (!strncmp("laquo", token, 5)) *to++ = '«'; - else if (!strncmp("reg", token, 3)) *to++ = '®'; - else if (!strncmp("acute", token, 5)) *to++ = '´'; - else if (!strncmp("para", token, 4)) *to++ = '¶'; - else if (!strncmp("raquo", token, 5)) *to++ = '»'; - - else if (!strncmp("Aacute", token, 6)) *to++ = 'Á'; - else if (!strncmp("Agrave", token, 6)) *to++ = 'À'; - else if (!strncmp("Acirc", token, 5)) *to++ = 'Â'; - else if (!strncmp("Auml", token, 4)) *to++ = 'Ä'; - else if (!strncmp("Atilde", token, 6)) *to++ = 'Ã'; - else if (!strncmp("Aring", token, 5)) *to++ = 'Å'; - else if (!strncmp("aacute", token, 6)) *to++ = 'á'; - else if (!strncmp("agrave", token, 6)) *to++ = 'à'; - else if (!strncmp("acirc", token, 5)) *to++ = 'â'; - else if (!strncmp("auml", token, 4)) *to++ = 'ä'; - else if (!strncmp("atilde", token, 6)) *to++ = 'ã'; - else if (!strncmp("aring", token, 5)) *to++ = 'å'; - else if (!strncmp("Eacute", token, 6)) *to++ = 'É'; - else if (!strncmp("Egrave", token, 6)) *to++ = 'È'; - else if (!strncmp("Ecirc", token, 5)) *to++ = 'Ê'; - else if (!strncmp("Euml", token, 4)) *to++ = 'Ë'; - else if (!strncmp("eacute", token, 6)) *to++ = 'é'; - else if (!strncmp("egrave", token, 6)) *to++ = 'è'; - else if (!strncmp("ecirc", token, 5)) *to++ = 'ê'; - else if (!strncmp("euml", token, 4)) *to++ = 'ë'; - else if (!strncmp("Iacute", token, 6)) *to++ = 'Í'; - else if (!strncmp("Igrave", token, 6)) *to++ = 'Ì'; - else if (!strncmp("Icirc", token, 5)) *to++ = 'Î'; - else if (!strncmp("Iuml", token, 4)) *to++ = 'Ï'; - else if (!strncmp("iacute", token, 6)) *to++ = 'í'; - else if (!strncmp("igrave", token, 6)) *to++ = 'ì'; - else if (!strncmp("icirc", token, 5)) *to++ = 'î'; - else if (!strncmp("iuml", token, 4)) *to++ = 'ï'; - else if (!strncmp("Oacute", token, 6)) *to++ = 'Ó'; - else if (!strncmp("Ograve", token, 6)) *to++ = 'Ò'; - else if (!strncmp("Ocirc", token, 5)) *to++ = 'Ô'; - else if (!strncmp("Ouml", token, 4)) *to++ = 'Ö'; - else if (!strncmp("Otilde", token, 6)) *to++ = 'Õ'; - else if (!strncmp("oacute", token, 6)) *to++ = 'ó'; - else if (!strncmp("ograve", token, 6)) *to++ = 'ò'; - else if (!strncmp("ocirc", token, 5)) *to++ = 'ô'; - else if (!strncmp("ouml", token, 4)) *to++ = 'ö'; - else if (!strncmp("otilde", token, 6)) *to++ = 'õ'; - else if (!strncmp("Uacute", token, 6)) *to++ = 'Ú'; - else if (!strncmp("Ugrave", token, 6)) *to++ = 'Ù'; - else if (!strncmp("Ucirc", token, 5)) *to++ = 'Û'; - else if (!strncmp("Uuml", token, 4)) *to++ = 'Ü'; - else if (!strncmp("uacute", token, 6)) *to++ = 'ú'; - else if (!strncmp("ugrave", token, 6)) *to++ = 'ù'; - else if (!strncmp("ucirc", token, 5)) *to++ = 'û'; - else if (!strncmp("uuml", token, 4)) *to++ = 'ü'; - else if (!strncmp("Yacute", token, 6)) *to++ = 'Ý'; - else if (!strncmp("yacute", token, 6)) *to++ = 'ý'; - else if (!strncmp("yuml", token, 4)) *to++ = 'ÿ'; - - else if (!strncmp("deg", token, 3)) *to++ = '°'; - else if (!strncmp("plusmn", token, 6)) *to++ = '±'; - else if (!strncmp("sup2", token, 4)) *to++ = '²'; - else if (!strncmp("sup3", token, 4)) *to++ = '³'; - else if (!strncmp("sup1", token, 4)) *to++ = '¹'; - else if (!strncmp("nbsp", token, 4)) *to++ = 'º'; - else if (!strncmp("pound", token, 5)) *to++ = '£'; - else if (!strncmp("cent", token, 4)) *to++ = '¢'; - else if (!strncmp("frac14", token, 6)) *to++ = '¼'; - else if (!strncmp("frac12", token, 6)) *to++ = '½'; - else if (!strncmp("frac34", token, 6)) *to++ = '¾'; - else if (!strncmp("iquest", token, 6)) *to++ = '¿'; - else if (!strncmp("iexcl", token, 5)) *to++ = '¡'; - else if (!strncmp("ETH", token, 3)) *to++ = 'Ð'; - else if (!strncmp("eth", token, 3)) *to++ = 'ð'; - else if (!strncmp("THORN", token, 5)) *to++ = 'Þ'; - else if (!strncmp("thorn", token, 5)) *to++ = 'þ'; - else if (!strncmp("AElig", token, 5)) *to++ = 'Æ'; - else if (!strncmp("aelig", token, 5)) *to++ = 'æ'; - else if (!strncmp("Oslash", token, 6)) *to++ = 'Ø'; - else if (!strncmp("curren", token, 6)) *to++ = '¤'; - else if (!strncmp("Ccedil", token, 6)) *to++ = 'Ç'; - else if (!strncmp("ccedil", token, 6)) *to++ = 'ç'; - else if (!strncmp("szlig", token, 5)) *to++ = 'ß'; - else if (!strncmp("Ntilde", token, 6)) *to++ = 'Ñ'; - else if (!strncmp("ntilde", token, 6)) *to++ = 'ñ'; - else if (!strncmp("yen", token, 3)) *to++ = '¥'; - else if (!strncmp("not", token, 3)) *to++ = '¬'; - else if (!strncmp("ordf", token, 4)) *to++ = 'ª'; - else if (!strncmp("uml", token, 3)) *to++ = '¨'; - else if (!strncmp("shy", token, 3)) *to++ = ''; - else if (!strncmp("macr", token, 4)) *to++ = '¯'; - continue; - - } - else if (*from == '>' && !ampersand) - { - intoken = false; - // process desired tokens - if (!strncmp(token, "sync type=\"Strongs\" value=\"G", 27)) { - *to++ = '<'; - for (i = 28; token[i] != '\"'; i++) - *to++ = token[i]; - *to++ = '>'; - continue; - } - else if (!strncmp(token, "sync type=\"Strongs\" value=\"H", 27)) { - *to++ = '<'; - for (i = 28; token[i] != '\"'; i++) - *to++ = token[i]; - *to++ = '>'; - continue; - } - else if (!strncmp(token, "scripRef", 8)) { - *to++ = '#'; - continue; - } - else if (!strncmp(token, "/scripRef", 9)) { - *to++ = ' '; - continue; - } - else if (!strncmp(token, "note ", 5)) { - *to++ = '{'; - continue; - } - else if (!strncmp(token, "/note", 5)) { - *to++ = '}'; - continue; - } - else if (!strnicmp(token, "font", 4)) { - *to++ = '\\'; - *to++ = '\\'; - continue; - } - else if (!strnicmp(token, "/font", 5)) { - *to++ = '\\'; - *to++ = '\\'; - continue; - } - else switch(*token) { - case 'I': // font tags - case 'i': - *to++ = '\\'; - *to++ = '@'; - continue; - case 'B': // bold start - case 'b': - *to++ = '\\'; - *to++ = '$'; - continue; - case '/': - switch(token[1]) { - case 'I': - case 'i': // italic end - *to++ = '\\'; - *to++ = '@'; - continue; - case 'B': // bold start - case 'b': - *to++ = '\\'; - *to++ = '$'; - continue; - } - } - continue; - } - if (intoken) { - if (tokpos < 2047) - token[tokpos++] = *from; - } - else *to++ = *from; - } - *to++ = 0; - *to = 0; - return 0; -} - - - diff --git a/src/modules/filters/thmlosis.cpp b/src/modules/filters/thmlosis.cpp new file mode 100644 index 0000000..7208610 --- /dev/null +++ b/src/modules/filters/thmlosis.cpp @@ -0,0 +1,385 @@ +/****************************************************************************** + * + * thmlstrongs - SWFilter descendant to hide or show strongs number + * in a ThML module. + */ + + +#include <stdlib.h> +#include <stdio.h> +#include <stdarg.h> +#include <thmlosis.h> +#include <swmodule.h> +#include <swlog.h> +#include <versekey.h> +#include <stdarg.h> +#ifndef __GNUC__ +#else +#include <unixstr.h> +#endif + +SWORD_NAMESPACE_START + +ThMLOSIS::ThMLOSIS() { +} + + +ThMLOSIS::~ThMLOSIS() { +} + + +char ThMLOSIS::processText(SWBuf &text, const SWKey *key, const SWModule *module) { + /* + + const char *from; + char token[2048]; // cheese. Fix. + int tokpos = 0; + bool intoken = false; + int len; + bool lastspace = false; + int word = 1; + char val[128]; + char buf[128]; + char wordstr[5]; + char *valto; + char *ch; + char *textStart, *textEnd; + char *wordStart, *wordEnd; + bool newText = false; + bool newWord = false; + SWBuf tmp; + bool suspendTextPassThru = false; + bool keepToken = false; + bool handled = false; + SWBuf divEnd = ""; + + + wordStart = text; + + SWBuf orig = text; + from = orig.c_str(); + + for (text = ""; *from; from++) { + + if (*from == '<') { + intoken = true; + tokpos = 0; + token[0] = 0; + token[1] = 0; + token[2] = 0; + textEnd = from-1; + wordEnd = to; + continue; + } + if (*from == '>') { // process tokens + intoken = false; + keepToken = false; + suspendTextPassThru = false; + newWord = true; + handled = false; + + while (wordStart < (text+maxlen)) { +// if (strchr(" ,;.?!()'\"", *wordStart)) + if (strchr(";,: .?!()'\"", *wordStart)) + wordStart++; + else break; + } + while (wordEnd > wordStart) { + if (strchr(" ,;:.?!()'\"", *wordEnd)) + wordEnd--; + else break; + } + + // section titles + if (!strcmp(token, "div class=\"sechead\"")) { + pushString(&to, "<title>"); + divEnd = "</title>"; + newText = true; + lastspace = false; + handled = true; + } + else if (!strcmp(token, "/div")) { + pushString(&to, divEnd.c_str()); + lastspace = false; + handled = true; + } + // Scripture Reference + if (!strncmp(token, "scripRef", 8)) { + // pushString(buf, "<reference osisRef=\""); + suspendTextPassThru = true; + newText = true; + handled = true; + } + else if (!strncmp(token, "/scripRef", 9)) { + tmp = ""; + tmp.append(textStart, (int)(textEnd - textStart)+1); + pushString(&to, convertToOSIS(tmp.c_str(), key)); + suspendTextPassThru = false; + handled = true; + } +// Usage of italics to represent transChange isn't domaninant; +// solution: mark in OSIS instead, assume no semantics other than emphasis +// of italicized text +// if (!strcmp(module->Type(), "Biblical Texts")) { +// // Italics assume transchange for Biblical texts +// if (!stricmp(token, "i")) { +// pushString(&to, "<transChange type=\"added\">"); +// newText = true; +// lastspace = false; +// handled = true; +// } +// else if (!stricmp(token, "/i")) { +// pushString(&to, "</transChange>"); +// lastspace = false; +// handled = true; +// } +// } +// else { +// // otherwise, italics are just italics +//-- end italics for transchange + if (!stricmp(token, "i")) { + pushString(&to, "<hi type=\"i\">"); + newText = true; + lastspace = false; + handled = true; + } + else if (!stricmp(token, "/i")) { + pushString(&to, "</hi>"); + lastspace = false; + handled = true; + } +// } + + if (!strcmp(token, "b")) { + pushString(&to, "<hi type=\"b\">"); + newText = true; + lastspace = false; + handled = true; + } + else if (!strcmp(token, "/b")) { + pushString(&to, "</hi>"); + lastspace = false; + handled = true; + } + + // Footnote + if (!strcmp(token, "note")) { + pushString(&to, "<note>"); + newText = true; + lastspace = false; + handled = true; + } + else if (!strcmp(token, "/note")) { + pushString(&to, "</note>"); + lastspace = false; + handled = true; + } + + // Figure + else if (!strncmp(token, "img ", 4)) { + const char *src = strstr(token, "src"); + if (!src) // assert we have a src attribute + continue; +// return false; + + pushString(&to, "<figure src=\""); + const char *c; + for (c = src;((*c) && (*c != '"')); c++); + +// uncomment for SWORD absolute path logic +// if (*(c+1) == '/') { +// pushString(buf, "file:"); +// pushString(buf, module->getConfigEntry("AbsoluteDataPath")); +// if (*((*buf)-1) == '/') +// c++; // skip '/' +// } +// end of uncomment for asolute path logic + + for (c++;((*c) && (*c != '"')); c++) + *to++ = *c; + + pushString(&to, "\" />"); + handled = true; + } + + // Strongs numbers + else if (!strnicmp(token, "sync type=\"Strongs\" ", 20)) { // Strongs + if (module->isProcessEntryAttributes()) { + valto = val; + for (unsigned int i = 27; token[i] != '\"' && i < 150; i++) + *valto++ = token[i]; + *valto = 0; + if (atoi((!isdigit(*val))?val+1:val) < 5627) { + // normal strongs number + strstrip(val); + sprintf(buf, "<w lemma=\"x-Strong:%s\">", val); + memmove(wordStart+strlen(buf), wordStart, (to-wordStart)+1); + memcpy(wordStart, buf, strlen(buf)); + to+=strlen(buf); + pushString(&to, "</w>"); + module->getEntryAttributes()["Word"][wordstr]["Strongs"] = val; +// tmp = ""; +// tmp.append(textStart, (int)(wordEnd - wordStart)); +// module->getEntryAttributes()["Word"][wordstr]["Text"] = tmp; + } + else { + // verb morph + sprintf(wordstr, "%03d", word-1); + module->getEntryAttributes()["Word"][wordstr]["Morph"] = val; + } + } + handled = true; + } + + // Morphology + else if (!strncmp(token, "sync type=\"morph\"", 17)) { + for (ch = token+17; *ch; ch++) { + if (!strncmp(ch, "class=\"", 7)) { + valto = val; + for (unsigned int i = 7; ch[i] != '\"' && i < 127; i++) + *valto++ = ch[i]; + *valto = 0; + sprintf(wordstr, "%03d", word-1); + strstrip(val); + module->getEntryAttributes()["Word"][wordstr]["MorphClass"] = val; + } + if (!strncmp(ch, "value=\"", 7)) { + valto = val; + for (unsigned int i = 7; ch[i] != '\"' && i < 127; i++) + *valto++ = ch[i]; + *valto = 0; + sprintf(wordstr, "%03d", word-1); + strstrip(val); + module->getEntryAttributes()["Word"][wordstr]["Morph"] = val; + } + } + if (!strncmp(wordStart, "<w ", 3)) { + + const char *cls = "Unknown", *morph; + + if (module->getEntryAttributes()["Word"][wordstr]["Morph"].size() > 0) { + if (module->getEntryAttributes()["Word"][wordstr]["MorphClass"].size() > 0) + cls = module->getEntryAttributes()["Word"][wordstr]["MorphClass"].c_str(); + morph = module->getEntryAttributes()["Word"][wordstr]["Morph"].c_str(); + + sprintf(buf, "morph=\"x-%s:%s\" ", cls, morph); + memmove(wordStart+3+strlen(buf), wordStart+3, (to-wordStart)+1); + memcpy(wordStart+3, buf, strlen(buf)); + to+=strlen(buf); + } + } + handled = true; + } + + if (!keepToken) { // if we don't want strongs + if (!handled) { + SWLog::systemlog->LogError("Unprocessed Token: <%s>", token); +// exit(-1); + } + if (strchr(" ,:;.?!()'\"", from[1])) { + if (lastspace) + to--; + } + if (newText) {textStart = from+1; newText = false; } +// if (newWord) {wordStart = to; newWord = false; } + continue; + } + // if not a strongs token, keep token in text + *to++ = '<'; + for (char *tok = token; *tok; tok++) + *to++ = *tok; + *to++ = '>'; + if (newText) {textStart = to; newWord = false; } +// if (newWord) {wordStart = to; newWord = false; } + continue; + } + if (intoken) { + if ((tokpos < 2045) && ((*from != 10)&&(*from != 13))) { + token[tokpos++] = *from; + token[tokpos+2] = 0; + } + } + else { + if (newWord && (*from != ' ')) {wordStart = to; newWord = false; memset(to, 0, 10); } + if (!suspendTextPassThru) { + *to++ = *from; + lastspace = (*from == ' '); + } + } + } + + VerseKey *vkey = SWDYNAMIC_CAST(VerseKey, key); + if (vkey) { + char ref[254]; + if (vkey->Verse()) + sprintf(ref, "<verse osisID=\"%s\">", vkey->getOSISRef()); + else *ref = 0; + if (*ref) { + memmove(text+strlen(ref), text, maxlen-strlen(ref)-1); + memcpy(text, ref, strlen(ref)); + to+=strlen(ref); + if (vkey->Verse()) { + VerseKey tmp; + tmp = *vkey; + tmp.AutoNormalize(0); + tmp.Headings(1); + sprintf(ref, "</verse>"); + pushString(&to, ref); + tmp = MAXVERSE; + if (*vkey == tmp) { + tmp.Verse(0); +// sprintf(ref, "\t</div>"); +// pushString(&to, ref); + tmp = MAXCHAPTER; + tmp = MAXVERSE; + if (*vkey == tmp) { + tmp.Chapter(0); + tmp.Verse(0); +// sprintf(ref, "\t</div>"); +// pushString(&to, ref); + } + } + } + +// else if (vkey->Chapter()) +// sprintf(ref, "\t<div type=\"chapter\" osisID=\"%s\">", vkey->getOSISRef()); +// else sprintf(ref, "\t<div type=\"book\" osisID=\"%s\">", vkey->getOSISRef()); + } + } + *to++ = 0; + *to = 0; +*/ + return 0; +} + + +const char *ThMLOSIS::convertToOSIS(const char *inRef, const SWKey *key) { + static SWBuf outRef; + + outRef = ""; + + VerseKey defLanguage; + ListKey verses = defLanguage.ParseVerseList(inRef, (*key), true); + const char *startFrag = inRef; + for (int i = 0; i < verses.Count(); i++) { + VerseKey *element = SWDYNAMIC_CAST(VerseKey, verses.GetElement(i)); + char buf[5120]; + char frag[5120]; + if (element) { + memmove(frag, startFrag, ((const char *)element->userData - startFrag) + 1); + frag[((const char *)element->userData - startFrag) + 1] = 0; + startFrag = (const char *)element->userData + 1; + sprintf(buf, "<reference osisRef=\"%s-%s\">%s</reference>", element->LowerBound().getOSISRef(), element->UpperBound().getOSISRef(), frag); + } + else { + memmove(frag, startFrag, ((const char *)verses.GetElement(i)->userData - startFrag) + 1); + frag[((const char *)verses.GetElement(i)->userData - startFrag) + 1] = 0; + startFrag = (const char *)verses.GetElement(i)->userData + 1; + sprintf(buf, "<reference osisRef=\"%s\">%s</reference>", VerseKey(*verses.GetElement(i)).getOSISRef(), frag); + } + outRef+=buf; + } + return outRef.c_str(); +} + +SWORD_NAMESPACE_END diff --git a/src/modules/filters/thmlplain.cpp b/src/modules/filters/thmlplain.cpp index 5609f16..a04d6c3 100644 --- a/src/modules/filters/thmlplain.cpp +++ b/src/modules/filters/thmlplain.cpp @@ -1,35 +1,30 @@ /****************************************************************************** * - * thmlplain - SWFilter decendant to strip out all ThML tags or convert to + * thmlplain - SWFilter descendant to strip out all ThML tags or convert to * ASCII rendered symbols. */ #include <stdlib.h> -#include <string.h> #include <thmlplain.h> +SWORD_NAMESPACE_START ThMLPlain::ThMLPlain() { } - -char ThMLPlain::ProcessText(char *text, int maxlen) +char ThMLPlain::processText(SWBuf &text, const SWKey *key, const SWModule *module) { - char *to, *from, token[2048]; + char token[2048]; int tokpos = 0; bool intoken = false; - int len; bool ampersand = false; - len = strlen(text) + 1; // shift string to right of buffer - if (len < maxlen) { - memmove(&text[maxlen - len], text, len); - from = &text[maxlen - len]; - } - else from = text; // ------------------------------- - - for (to = text; *from; from++) { + const char *from; + SWBuf orig = text; + from = orig.c_str(); + for (text = ""; *from; from++) + { if (*from == 10 || *from == 13) from++; if (*from == '<') { @@ -53,102 +48,102 @@ char ThMLPlain::ProcessText(char *text, int maxlen) if (*from == ';' && ampersand) { intoken = false; - if (!strncmp("nbsp", token, 4)) *to++ = ' '; - else if (!strncmp("quot", token, 4)) *to++ = '"'; - else if (!strncmp("amp", token, 3)) *to++ = '&'; - else if (!strncmp("lt", token, 2)) *to++ = '<'; - else if (!strncmp("gt", token, 2)) *to++ = '>'; - else if (!strncmp("brvbar", token, 6)) *to++ = '|'; - else if (!strncmp("sect", token, 4)) *to++ = '§'; - else if (!strncmp("copy", token, 4)) *to++ = '©'; - else if (!strncmp("laquo", token, 5)) *to++ = '«'; - else if (!strncmp("reg", token, 3)) *to++ = '®'; - else if (!strncmp("acute", token, 5)) *to++ = '´'; - else if (!strncmp("para", token, 4)) *to++ = '¶'; - else if (!strncmp("raquo", token, 5)) *to++ = '»'; + if (!strncmp("nbsp", token, 4)) text += ' '; + else if (!strncmp("quot", token, 4)) text += '"'; + else if (!strncmp("amp", token, 3)) text += '&'; + else if (!strncmp("lt", token, 2)) text += '<'; + else if (!strncmp("gt", token, 2)) text += '>'; + else if (!strncmp("brvbar", token, 6)) text += '|'; + else if (!strncmp("sect", token, 4)) text += '§'; + else if (!strncmp("copy", token, 4)) text += '©'; + else if (!strncmp("laquo", token, 5)) text += '«'; + else if (!strncmp("reg", token, 3)) text += '®'; + else if (!strncmp("acute", token, 5)) text += '´'; + else if (!strncmp("para", token, 4)) text += '¶'; + else if (!strncmp("raquo", token, 5)) text += '»'; - else if (!strncmp("Aacute", token, 6)) *to++ = 'Á'; - else if (!strncmp("Agrave", token, 6)) *to++ = 'À'; - else if (!strncmp("Acirc", token, 5)) *to++ = 'Â'; - else if (!strncmp("Auml", token, 4)) *to++ = 'Ä'; - else if (!strncmp("Atilde", token, 6)) *to++ = 'Ã'; - else if (!strncmp("Aring", token, 5)) *to++ = 'Å'; - else if (!strncmp("aacute", token, 6)) *to++ = 'á'; - else if (!strncmp("agrave", token, 6)) *to++ = 'à'; - else if (!strncmp("acirc", token, 5)) *to++ = 'â'; - else if (!strncmp("auml", token, 4)) *to++ = 'ä'; - else if (!strncmp("atilde", token, 6)) *to++ = 'ã'; - else if (!strncmp("aring", token, 5)) *to++ = 'å'; - else if (!strncmp("Eacute", token, 6)) *to++ = 'É'; - else if (!strncmp("Egrave", token, 6)) *to++ = 'È'; - else if (!strncmp("Ecirc", token, 5)) *to++ = 'Ê'; - else if (!strncmp("Euml", token, 4)) *to++ = 'Ë'; - else if (!strncmp("eacute", token, 6)) *to++ = 'é'; - else if (!strncmp("egrave", token, 6)) *to++ = 'è'; - else if (!strncmp("ecirc", token, 5)) *to++ = 'ê'; - else if (!strncmp("euml", token, 4)) *to++ = 'ë'; - else if (!strncmp("Iacute", token, 6)) *to++ = 'Í'; - else if (!strncmp("Igrave", token, 6)) *to++ = 'Ì'; - else if (!strncmp("Icirc", token, 5)) *to++ = 'Î'; - else if (!strncmp("Iuml", token, 4)) *to++ = 'Ï'; - else if (!strncmp("iacute", token, 6)) *to++ = 'í'; - else if (!strncmp("igrave", token, 6)) *to++ = 'ì'; - else if (!strncmp("icirc", token, 5)) *to++ = 'î'; - else if (!strncmp("iuml", token, 4)) *to++ = 'ï'; - else if (!strncmp("Oacute", token, 6)) *to++ = 'Ó'; - else if (!strncmp("Ograve", token, 6)) *to++ = 'Ò'; - else if (!strncmp("Ocirc", token, 5)) *to++ = 'Ô'; - else if (!strncmp("Ouml", token, 4)) *to++ = 'Ö'; - else if (!strncmp("Otilde", token, 6)) *to++ = 'Õ'; - else if (!strncmp("oacute", token, 6)) *to++ = 'ó'; - else if (!strncmp("ograve", token, 6)) *to++ = 'ò'; - else if (!strncmp("ocirc", token, 5)) *to++ = 'ô'; - else if (!strncmp("ouml", token, 4)) *to++ = 'ö'; - else if (!strncmp("otilde", token, 6)) *to++ = 'õ'; - else if (!strncmp("Uacute", token, 6)) *to++ = 'Ú'; - else if (!strncmp("Ugrave", token, 6)) *to++ = 'Ù'; - else if (!strncmp("Ucirc", token, 5)) *to++ = 'Û'; - else if (!strncmp("Uuml", token, 4)) *to++ = 'Ü'; - else if (!strncmp("uacute", token, 6)) *to++ = 'ú'; - else if (!strncmp("ugrave", token, 6)) *to++ = 'ù'; - else if (!strncmp("ucirc", token, 5)) *to++ = 'û'; - else if (!strncmp("uuml", token, 4)) *to++ = 'ü'; - else if (!strncmp("Yacute", token, 6)) *to++ = 'Ý'; - else if (!strncmp("yacute", token, 6)) *to++ = 'ý'; - else if (!strncmp("yuml", token, 4)) *to++ = 'ÿ'; + else if (!strncmp("Aacute", token, 6)) text += 'Á'; + else if (!strncmp("Agrave", token, 6)) text += 'À'; + else if (!strncmp("Acirc", token, 5)) text += 'Â'; + else if (!strncmp("Auml", token, 4)) text += 'Ä'; + else if (!strncmp("Atilde", token, 6)) text += 'Ã'; + else if (!strncmp("Aring", token, 5)) text += 'Å'; + else if (!strncmp("aacute", token, 6)) text += 'á'; + else if (!strncmp("agrave", token, 6)) text += 'à'; + else if (!strncmp("acirc", token, 5)) text += 'â'; + else if (!strncmp("auml", token, 4)) text += 'ä'; + else if (!strncmp("atilde", token, 6)) text += 'ã'; + else if (!strncmp("aring", token, 5)) text += 'å'; + else if (!strncmp("Eacute", token, 6)) text += 'É'; + else if (!strncmp("Egrave", token, 6)) text += 'È'; + else if (!strncmp("Ecirc", token, 5)) text += 'Ê'; + else if (!strncmp("Euml", token, 4)) text += 'Ë'; + else if (!strncmp("eacute", token, 6)) text += 'é'; + else if (!strncmp("egrave", token, 6)) text += 'è'; + else if (!strncmp("ecirc", token, 5)) text += 'ê'; + else if (!strncmp("euml", token, 4)) text += 'ë'; + else if (!strncmp("Iacute", token, 6)) text += 'Í'; + else if (!strncmp("Igrave", token, 6)) text += 'Ì'; + else if (!strncmp("Icirc", token, 5)) text += 'Î'; + else if (!strncmp("Iuml", token, 4)) text += 'Ï'; + else if (!strncmp("iacute", token, 6)) text += 'í'; + else if (!strncmp("igrave", token, 6)) text += 'ì'; + else if (!strncmp("icirc", token, 5)) text += 'î'; + else if (!strncmp("iuml", token, 4)) text += 'ï'; + else if (!strncmp("Oacute", token, 6)) text += 'Ó'; + else if (!strncmp("Ograve", token, 6)) text += 'Ò'; + else if (!strncmp("Ocirc", token, 5)) text += 'Ô'; + else if (!strncmp("Ouml", token, 4)) text += 'Ö'; + else if (!strncmp("Otilde", token, 6)) text += 'Õ'; + else if (!strncmp("oacute", token, 6)) text += 'ó'; + else if (!strncmp("ograve", token, 6)) text += 'ò'; + else if (!strncmp("ocirc", token, 5)) text += 'ô'; + else if (!strncmp("ouml", token, 4)) text += 'ö'; + else if (!strncmp("otilde", token, 6)) text += 'õ'; + else if (!strncmp("Uacute", token, 6)) text += 'Ú'; + else if (!strncmp("Ugrave", token, 6)) text += 'Ù'; + else if (!strncmp("Ucirc", token, 5)) text += 'Û'; + else if (!strncmp("Uuml", token, 4)) text += 'Ü'; + else if (!strncmp("uacute", token, 6)) text += 'ú'; + else if (!strncmp("ugrave", token, 6)) text += 'ù'; + else if (!strncmp("ucirc", token, 5)) text += 'û'; + else if (!strncmp("uuml", token, 4)) text += 'ü'; + else if (!strncmp("Yacute", token, 6)) text += 'Ý'; + else if (!strncmp("yacute", token, 6)) text += 'ý'; + else if (!strncmp("yuml", token, 4)) text += 'ÿ'; - else if (!strncmp("deg", token, 3)) *to++ = '°'; - else if (!strncmp("plusmn", token, 6)) *to++ = '±'; - else if (!strncmp("sup2", token, 4)) *to++ = '²'; - else if (!strncmp("sup3", token, 4)) *to++ = '³'; - else if (!strncmp("sup1", token, 4)) *to++ = '¹'; - else if (!strncmp("nbsp", token, 4)) *to++ = 'º'; - else if (!strncmp("pound", token, 5)) *to++ = '£'; - else if (!strncmp("cent", token, 4)) *to++ = '¢'; - else if (!strncmp("frac14", token, 6)) *to++ = '¼'; - else if (!strncmp("frac12", token, 6)) *to++ = '½'; - else if (!strncmp("frac34", token, 6)) *to++ = '¾'; - else if (!strncmp("iquest", token, 6)) *to++ = '¿'; - else if (!strncmp("iexcl", token, 5)) *to++ = '¡'; - else if (!strncmp("ETH", token, 3)) *to++ = 'Ð'; - else if (!strncmp("eth", token, 3)) *to++ = 'ð'; - else if (!strncmp("THORN", token, 5)) *to++ = 'Þ'; - else if (!strncmp("thorn", token, 5)) *to++ = 'þ'; - else if (!strncmp("AElig", token, 5)) *to++ = 'Æ'; - else if (!strncmp("aelig", token, 5)) *to++ = 'æ'; - else if (!strncmp("Oslash", token, 6)) *to++ = 'Ø'; - else if (!strncmp("curren", token, 6)) *to++ = '¤'; - else if (!strncmp("Ccedil", token, 6)) *to++ = 'Ç'; - else if (!strncmp("ccedil", token, 6)) *to++ = 'ç'; - else if (!strncmp("szlig", token, 5)) *to++ = 'ß'; - else if (!strncmp("Ntilde", token, 6)) *to++ = 'Ñ'; - else if (!strncmp("ntilde", token, 6)) *to++ = 'ñ'; - else if (!strncmp("yen", token, 3)) *to++ = '¥'; - else if (!strncmp("not", token, 3)) *to++ = '¬'; - else if (!strncmp("ordf", token, 4)) *to++ = 'ª'; - else if (!strncmp("uml", token, 3)) *to++ = '¨'; - else if (!strncmp("shy", token, 3)) *to++ = ''; - else if (!strncmp("macr", token, 4)) *to++ = '¯'; + else if (!strncmp("deg", token, 3)) text += '°'; + else if (!strncmp("plusmn", token, 6)) text += '±'; + else if (!strncmp("sup2", token, 4)) text += '²'; + else if (!strncmp("sup3", token, 4)) text += '³'; + else if (!strncmp("sup1", token, 4)) text += '¹'; + else if (!strncmp("nbsp", token, 4)) text += 'º'; + else if (!strncmp("pound", token, 5)) text += '£'; + else if (!strncmp("cent", token, 4)) text += '¢'; + else if (!strncmp("frac14", token, 6)) text += '¼'; + else if (!strncmp("frac12", token, 6)) text += '½'; + else if (!strncmp("frac34", token, 6)) text += '¾'; + else if (!strncmp("iquest", token, 6)) text += '¿'; + else if (!strncmp("iexcl", token, 5)) text += '¡'; + else if (!strncmp("ETH", token, 3)) text += 'Ð'; + else if (!strncmp("eth", token, 3)) text += 'ð'; + else if (!strncmp("THORN", token, 5)) text += 'Þ'; + else if (!strncmp("thorn", token, 5)) text += 'þ'; + else if (!strncmp("AElig", token, 5)) text += 'Æ'; + else if (!strncmp("aelig", token, 5)) text += 'æ'; + else if (!strncmp("Oslash", token, 6)) text += 'Ø'; + else if (!strncmp("curren", token, 6)) text += '¤'; + else if (!strncmp("Ccedil", token, 6)) text += 'Ç'; + else if (!strncmp("ccedil", token, 6)) text += 'ç'; + else if (!strncmp("szlig", token, 5)) text += 'ß'; + else if (!strncmp("Ntilde", token, 6)) text += 'Ñ'; + else if (!strncmp("ntilde", token, 6)) text += 'ñ'; + else if (!strncmp("yen", token, 3)) text += '¥'; + else if (!strncmp("not", token, 3)) text += '¬'; + else if (!strncmp("ordf", token, 4)) text += 'ª'; + else if (!strncmp("uml", token, 3)) text += '¨'; + else if (!strncmp("shy", token, 3)) text += ''; + else if (!strncmp("macr", token, 4)) text += '¯'; continue; } @@ -156,32 +151,32 @@ char ThMLPlain::ProcessText(char *text, int maxlen) intoken = false; // process desired tokens if (!strncmp(token, "sync type=\"Strongs\" value=\"", 27)) { - *to++ = ' '; - *to++ = '<'; + text += ' '; + text += '<'; for (unsigned int i = 27; token[i] != '\"'; i++) - *to++ = token[i]; - *to++ = '>'; + text += token[i]; + text += '>'; continue; } if (!strncmp(token, "sync type=\"morph\" value=\"", 25)) { - *to++ = ' '; - *to++ = '('; + text += ' '; + text += '('; for (unsigned int i = 25; token[i] != '\"'; i++) - *to++ = token[i]; - *to++ = ')'; + text += token[i]; + text += ')'; continue; } if (!strncmp("note", token, 4)) { - *to++ = ' '; - *to++ = '('; + text += ' '; + text += '('; } else if (!strncmp("br", token, 2)) - *to++ = '\n'; + text += '\n'; else if (!strncmp("/p", token, 2)) - *to++ = '\n'; + text += '\n'; else if (!strncmp("/note", token, 5)) { - *to++ = ')'; - *to++ = ' '; + text += ')'; + text += ' '; } continue; } @@ -190,12 +185,26 @@ char ThMLPlain::ProcessText(char *text, int maxlen) token[tokpos++] = *from; token[tokpos+2] = 0; } - else *to++ = *from; + else text += *from; } - *to++ = 0; - *to = 0; + + orig = text; + from = orig.c_str(); + for (text = ""; *from; from++) { //loop to remove extra spaces + if ((strchr(" \t\n\r", *from))) { + while (*(from+1) && (strchr(" \t\n\r", *(from+1)))) { + from++; + } + text += " "; + } + else { + text += *from; + } + } + text += (char)0; return 0; } +SWORD_NAMESPACE_END diff --git a/src/modules/filters/thmlrtf.cpp b/src/modules/filters/thmlrtf.cpp index 76289ec..4487921 100644 --- a/src/modules/filters/thmlrtf.cpp +++ b/src/modules/filters/thmlrtf.cpp @@ -15,12 +15,14 @@ ***************************************************************************/ #include <stdlib.h> -#include <string.h> #include <thmlrtf.h> +#include <swmodule.h> +#include <utilxml.h> +#include <versekey.h> +SWORD_NAMESPACE_START -ThMLRTF::ThMLRTF() -{ +ThMLRTF::ThMLRTF() { setTokenStart("<"); setTokenEnd(">"); @@ -30,6 +32,7 @@ ThMLRTF::ThMLRTF() setEscapeStringCaseSensitive(true); addEscapeStringSubstitute("nbsp", " "); + addEscapeStringSubstitute("apos", "'"); addEscapeStringSubstitute("quot", "\""); addEscapeStringSubstitute("amp", "&"); addEscapeStringSubstitute("lt", "<"); @@ -128,87 +131,178 @@ ThMLRTF::ThMLRTF() setTokenCaseSensitive(true); - addTokenSubstitute("/scripRef", "|}"); - addTokenSubstitute("/note", ") }"); - - addTokenSubstitute("br", "\\line "); - addTokenSubstitute("br /", "\\line "); - addTokenSubstitute("i", "{\\i1 "); - addTokenSubstitute("/i", "}"); - addTokenSubstitute("b", "{\\b1 "); - addTokenSubstitute("/b", "}"); - addTokenSubstitute("p", "\\par "); - - //we need uppercase forms for the moment to support a few early ThML modules that aren't XHTML compliant - addTokenSubstitute("BR", "\\line "); - addTokenSubstitute("I", "{\\i1 "); - addTokenSubstitute("/I", "}"); - addTokenSubstitute("B", "{\\b1 "); - addTokenSubstitute("/B", "}"); - addTokenSubstitute("P", "\\par "); + + addTokenSubstitute("br", "\\line "); + addTokenSubstitute("br /", "\\line "); + addTokenSubstitute("i", "{\\i1 "); + addTokenSubstitute("/i", "}"); + addTokenSubstitute("b", "{\\b1 "); + addTokenSubstitute("/b", "}"); + addTokenSubstitute("p", "\\par "); + + //we need uppercase forms for the moment to support a few early ThML modules that aren't XHTML compliant + addTokenSubstitute("BR", "\\line "); + addTokenSubstitute("I", "{\\i1 "); + addTokenSubstitute("/I", "}"); + addTokenSubstitute("B", "{\\b1 "); + addTokenSubstitute("/B", "}"); + addTokenSubstitute("P", "\\par "); + addTokenSubstitute("scripture", "{\\i1 "); + addTokenSubstitute("/scripture", "}"); } -bool ThMLRTF::handleToken(char **buf, const char *token, DualStringMap &userData) { - if (!substituteToken(buf, token)) { - // manually process if it wasn't a simple substitution - if (!strncmp(token, "sync type=\"Strongs\" value=\"", 27)) { -/* if (token[27] == 'H' || token[27] == 'G' || token[27] == 'A') { - pushString(buf, " {\\fs15 <"); - for (unsigned int i = 28; token[i] != '\"'; i++) - *(*buf)++ = token[i]; - pushString(buf, ">}"); - } - else if (token[27] == 'T') { - pushString(buf, " {\\fs15 ("); - for (unsigned int i = 28; token[i] != '\"'; i++) - *(*buf)++ = token[i]; - pushString(buf, ")}"); + +char ThMLRTF::processText(SWBuf &text, const SWKey *key, const SWModule *module) { + SWBasicFilter::processText(text, key, module); //handle tokens as usual + const char *from; + SWBuf orig = text; + from = orig.c_str(); + for (text = ""; *from; from++) { //loop to remove extra spaces + if ((strchr(" \t\n\r", *from))) { + while (*(from+1) && (strchr(" \t\n\r", *(from+1)))) { + from++; } + text += " "; + } + else { + text += *from; + } + } + text += (char)0; + return 0; +} + + +ThMLRTF::MyUserData::MyUserData(const SWModule *module, const SWKey *key) : BasicFilterUserData(module, key) { + if (module) { + version = module->Name(); + BiblicalText = (!strcmp(module->Type(), "Biblical Texts")); + } +} + + +bool ThMLRTF::handleToken(SWBuf &buf, const char *token, BasicFilterUserData *userData) { + const char *tok; + if (!substituteToken(buf, token)) { // manually process if it wasn't a simple substitution + MyUserData *u = (MyUserData *)userData; + XMLTag tag(token); + if ((!tag.isEndTag()) && (!tag.isEmpty())) + u->startTag = tag; + if (tag.getName() && !strcmp(tag.getName(), "sync")) { + SWBuf value = tag.getAttribute("value"); + if (tag.getAttribute("type") && !strcmp(tag.getAttribute("type"), "morph")) { //> + buf.appendFormatted(" {\\cf4 \\sub (%s)}", value.c_str()); + } + else if( tag.getAttribute("type") && !strcmp(tag.getAttribute("type"), "Strongs")) { + if (value[0] == 'H' || value[0] == 'G' || value[0] == 'A') { + value<<1; + buf.appendFormatted(" {\\cf3 \\sub <%s>}", value.c_str()); + } + else if (value[0] == 'T') { + value<<1; + buf.appendFormatted(" {\\cf4 \\sub (%s)}", value.c_str()); + } + } + else if (tag.getAttribute("type") && !strcmp(tag.getAttribute("type"), "Dict")) { + if (!tag.isEndTag()) + buf += "{\\b "; + else buf += "}"; + } } - else if (!strncmp(token, "sync type=\"morph\" ", 18)) { - pushString(buf, " {\\fs15 ("); - for (const char *tok = token + 5; *tok; tok++) { - if (!strncmp(tok, "value=\"", 7)) { - tok += 7; - for (;*tok != '\"'; tok++) - *(*buf)++ = *tok; - break; + // <note> tag + else if (!strcmp(tag.getName(), "note")) { + if (!tag.isEndTag()) { + if (!tag.isEmpty()) { + SWBuf type = tag.getAttribute("type"); + SWBuf footnoteNumber = tag.getAttribute("swordFootnote"); + VerseKey *vkey; + // see if we have a VerseKey * or descendant + try { + vkey = SWDYNAMIC_CAST(VerseKey, u->key); + } + catch ( ... ) { } + if (vkey) { + // leave this special osis type in for crossReference notes types? Might thml use this some day? Doesn't hurt. + char ch = ((tag.getAttribute("type") && ((!strcmp(tag.getAttribute("type"), "crossReference")) || (!strcmp(tag.getAttribute("type"), "x-cross-ref")))) ? 'x':'n'); + buf.appendFormatted("{\\super <a href=\"\">*%c%i.%s</a>} ", ch, vkey->Verse(), footnoteNumber.c_str()); + } + u->suspendTextPassThru = true; } } + if (tag.isEndTag()) { + u->suspendTextPassThru = false; + } + } + - pushString(buf, ")}"); -*/ } - else if (!strncmp(token, "sync type=\"lemma\" value=\"", 25)) { - pushString(buf, "{\\fs15 ("); - for (unsigned int i = 25; token[i] != '\"'; i++) - *(*buf)++ = token[i]; - pushString(buf, ")}"); + else if (!strcmp(tag.getName(), "scripRef")) { + if (!tag.isEndTag()) { + if (!tag.isEmpty()) { + u->suspendTextPassThru = true; + } + } + if (tag.isEndTag()) { // </scripRef> + if (!u->BiblicalText) { + SWBuf refList = u->startTag.getAttribute("passage"); + if (!refList.length()) + refList = u->lastTextNode; + SWBuf version = tag.getAttribute("version"); + buf += "<a href=\"\">"; + buf += refList.c_str(); +// buf += u->lastTextNode.c_str(); + buf += "</a>"; + } + else { + SWBuf footnoteNumber = u->startTag.getAttribute("swordFootnote"); + VerseKey *vkey; + // see if we have a VerseKey * or descendant + try { + vkey = SWDYNAMIC_CAST(VerseKey, u->key); + } + catch ( ... ) {} + if (vkey) { + // leave this special osis type in for crossReference notes types? Might thml use this some day? Doesn't hurt. + buf.appendFormatted("{\\super <a href=\"\">*x%i.%s</a>} ", vkey->Verse(), footnoteNumber.c_str()); + } + } + + // let's let text resume to output again + u->suspendTextPassThru = false; + } } - else if (!strncmp(token, "scripRef", 8)) { - pushString(buf, "{\\cf2 #"); + + else if (tag.getName() && !strcmp(tag.getName(), "div")) { + if (tag.isEndTag() && u->SecHead) { + buf += "\\par}"; + u->SecHead = false; + } + else if (tag.getAttribute("class")) { + if (!stricmp(tag.getAttribute("class"), "sechead")) { + u->SecHead = true; + buf += "{\\par\\i1\\b1 "; + } + else if (!stricmp(tag.getAttribute("class"), "title")) { + u->SecHead = true; + buf += "{\\par\\i1\\b1 "; + } + } } - else if (!strncmp(token, "div", 3)) { - *(*buf)++ = '{'; - if (!strncmp(token, "div class=\"title\"", 17)) { - pushString(buf, "\\par\\i1\\b1 "); - userData["sechead"] = "true"; - } - else if (!strncmp(token, "div class=\"sechead\"", 19)) { - pushString(buf, "\\par\\i1\\b1 "); - userData["sechead"] = "true"; - } - } - else if (!strncmp(token, "/div", 4)) { - *(*buf)++ = '}'; - if (userData["sechead"] == "true") { - pushString(buf, "\\par "); - userData["sechead"] == "false"; - } - } - else if (!strncmp(token, "note", 4)) { - pushString(buf, " {\\i1\\fs15 ("); - } + else if (tag.getName() && (!strcmp(tag.getName(), "img") || !strcmp(tag.getName(), "image"))) { + const char *src = tag.getAttribute("src"); + if (!src) // assert we have a src attribute + return false; + char* filepath = new char[strlen(u->module->getConfigEntry("AbsoluteDataPath")) + strlen(token)]; + *filepath = 0; + strcpy(filepath, userData->module->getConfigEntry("AbsoluteDataPath")); + strcat(filepath, src); + +// we do this because BibleCS looks for this EXACT format for an image tag + buf+="<img src=\""; + buf+=filepath; + buf+="\" />"; + delete [] filepath; + } else { return false; // we still didn't handle token } @@ -217,3 +311,4 @@ bool ThMLRTF::handleToken(char **buf, const char *token, DualStringMap &userData } +SWORD_NAMESPACE_END diff --git a/src/modules/filters/thmlscripref.cpp b/src/modules/filters/thmlscripref.cpp index 23edd6d..7b93f90 100644 --- a/src/modules/filters/thmlscripref.cpp +++ b/src/modules/filters/thmlscripref.cpp @@ -1,103 +1,126 @@ /****************************************************************************** * - * thmlscripref - SWFilter decendant to hide or show scripture references - * in a ThML module. + * thmlscripref - SWFilter descendant to hide or show scripture + * referebces in a ThML module. */ #include <stdlib.h> -#include <string.h> #include <thmlscripref.h> +#include <utilxml.h> +#include <versekey.h> +#include <swmodule.h> #ifndef __GNUC__ #else #include <unixstr.h> #endif +SWORD_NAMESPACE_START -const char ThMLScripref::on[] = "On"; -const char ThMLScripref::off[] = "Off"; -const char ThMLScripref::optName[] = "Scripture Cross-references"; -const char ThMLScripref::optTip[] = "Toggles Scripture Cross-references On and Off if they exist"; +const char oName[] = "Cross-references"; +const char oTip[] = "Toggles Scripture Cross-references On and Off if they exist"; +const SWBuf choices[3] = {"On", "Off", ""}; +const StringList oValues(&choices[0], &choices[2]); -ThMLScripref::ThMLScripref() { - option = false; - options.push_back(on); - options.push_back(off); +ThMLScripref::ThMLScripref() : SWOptionFilter(oName, oTip, &oValues) { + setOptionValue("On"); } ThMLScripref::~ThMLScripref() { } -void ThMLScripref::setOptionValue(const char *ival) -{ - option = (!stricmp(ival, on)); -} -const char *ThMLScripref::getOptionValue() -{ - return (option) ? on:off; -} +char ThMLScripref::processText(SWBuf &text, const SWKey *key, const SWModule *module) { + SWBuf token; + bool intoken = false; + bool hide = false; + SWBuf tagText; + XMLTag startTag; + SWBuf refs = ""; + int footnoteNum = 1; + char buf[254]; + VerseKey parser = key->getText(); -char ThMLScripref::ProcessText(char *text, int maxlen, const SWKey *key, const SWModule *module) -{ - if (!option) { // if we don't want scriprefs - char *to, *from, token[2048]; // cheese. Fix. - int tokpos = 0; - bool intoken = false; - int len; - bool hide = false; + SWBuf orig = text; + const char *from = orig.c_str(); - len = strlen(text) + 1; // shift string to right of buffer - if (len < maxlen) { - memmove(&text[maxlen - len], text, len); - from = &text[maxlen - len]; + for (text = ""; *from; from++) { + if (*from == '<') { + intoken = true; + token = ""; + continue; } - else from = text; // ------------------------------- + if (*from == '>') { // process tokens + intoken = false; - for (to = text; *from; from++) { - if (*from == '<') { - intoken = true; - tokpos = 0; - token[0] = 0; - token[1] = 0; - token[2] = 0; - continue; - } - if (*from == '>') { // process tokens - intoken = false; - if (!strnicmp(token, "scripRef", 8)) { - hide = true; - continue; + XMLTag tag(token); + if (!strcmp(tag.getName(), "scripRef")) { + if (!tag.isEndTag()) { + if (!tag.isEmpty()) { + refs = ""; + startTag = tag; + hide = true; + tagText = ""; + continue; + } } - else if (!strnicmp(token, "/scripRef", 9)) { - hide = false; - continue; + if (hide && tag.isEndTag()) { + if (module->isProcessEntryAttributes()) { + SWBuf fc = module->getEntryAttributes()["Footnote"]["count"]["value"]; + footnoteNum = (fc.length()) ? atoi(fc.c_str()) : 0; + sprintf(buf, "%i", ++footnoteNum); + module->getEntryAttributes()["Footnote"]["count"]["value"] = buf; + StringList attributes = startTag.getAttributeNames(); + for (StringList::iterator it = attributes.begin(); it != attributes.end(); it++) { + module->getEntryAttributes()["Footnote"][buf][it->c_str()] = startTag.getAttribute(it->c_str()); + } + module->getEntryAttributes()["Footnote"][buf]["body"] = tagText; + startTag.setAttribute("swordFootnote", buf); + SWBuf passage = startTag.getAttribute("passage"); + if (passage.length()) + refs = parser.ParseVerseList(passage.c_str(), parser, true).getRangeText(); + else refs = parser.ParseVerseList(tagText.c_str(), parser, true).getRangeText(); + module->getEntryAttributes()["Footnote"][buf]["refList"] = refs.c_str(); + } + hide = false; + if (option) { // we want the tag in the text + text += startTag; + text.append(tagText); + } + else continue; } + } - // if not a scripref token, keep token in text - if (!hide) { - *to++ = '<'; - for (char *tok = token; *tok; tok++) - *to++ = *tok; - *to++ = '>'; - } - continue; + // if not a scripRef token, keep token in text + if ((!strcmp(tag.getName(), "scripRef")) && (!tag.isEndTag())) { + SWBuf osisRef = tag.getAttribute("passage"); + if (refs.length()) + refs += "; "; + refs += osisRef; } - if (intoken) { - if (tokpos < 2045) - token[tokpos++] = *from; - token[tokpos+2] = 0; + if (!hide) { + text += '<'; + text.append(token); + text += '>'; } - else { - if (!hide) { - *to++ = *from; - } + else { + tagText += '<'; + tagText.append(token); + tagText += '>'; } + continue; + } + if (intoken) { //copy token + token += *from; } - *to++ = 0; - *to = 0; + else if (!hide) { //copy text which is not inside a token + text += *from; + } + else tagText += *from; } return 0; } + +SWORD_NAMESPACE_END diff --git a/src/modules/filters/thmlstrongs.cpp b/src/modules/filters/thmlstrongs.cpp index 8d0466c..4a53e25 100644 --- a/src/modules/filters/thmlstrongs.cpp +++ b/src/modules/filters/thmlstrongs.cpp @@ -1,50 +1,40 @@ /****************************************************************************** * - * thmlstrongs - SWFilter decendant to hide or show strongs number + * thmlstrongs - SWFilter descendant to hide or show strongs number * in a ThML module. */ #include <stdlib.h> #include <stdio.h> -#include <string.h> #include <thmlstrongs.h> #include <swmodule.h> #ifndef __GNUC__ #else #include <unixstr.h> #endif +#include <ctype.h> +SWORD_NAMESPACE_START -const char ThMLStrongs::on[] = "On"; -const char ThMLStrongs::off[] = "Off"; -const char ThMLStrongs::optName[] = "Strong's Numbers"; -const char ThMLStrongs::optTip[] = "Toggles Strong's Numbers On and Off if they exist"; +const char oName[] = "Strong's Numbers"; +const char oTip[] = "Toggles Strong's Numbers On and Off if they exist"; +const SWBuf choices[3] = {"On", "Off", ""}; +const StringList oValues(&choices[0], &choices[2]); -ThMLStrongs::ThMLStrongs() { - option = false; - options.push_back(on); - options.push_back(off); +ThMLStrongs::ThMLStrongs() : SWOptionFilter(oName, oTip, &oValues) { + setOptionValue("Off"); } ThMLStrongs::~ThMLStrongs() { } -void ThMLStrongs::setOptionValue(const char *ival) -{ - option = (!stricmp(ival, on)); -} - -const char *ThMLStrongs::getOptionValue() -{ - return (option) ? on:off; -} -char ThMLStrongs::ProcessText(char *text, int maxlen, const SWKey *key, const SWModule *module) -{ - char *to, *from, token[2048]; // cheese. Fix. +char ThMLStrongs::processText(SWBuf &text, const SWKey *key, const SWModule *module) { + char token[2048]; // cheese. Fix. + const char *from; int tokpos = 0; bool intoken = false; int len; @@ -54,23 +44,21 @@ char ThMLStrongs::ProcessText(char *text, int maxlen, const SWKey *key, const SW char wordstr[5]; char *valto; char *ch; + unsigned int textStart = 0, textEnd = 0; + SWBuf tmp; + bool newText = false; - len = strlen(text) + 1; // shift string to right of buffer - if (len < maxlen) { - memmove(&text[maxlen - len], text, len); - from = &text[maxlen - len]; - } - else from = text; - - // ------------------------------- + SWBuf orig = text; + from = orig.c_str(); - for (to = text; *from; from++) { + for (text = ""; *from; from++) { if (*from == '<') { intoken = true; tokpos = 0; token[0] = 0; token[1] = 0; token[2] = 0; + textEnd = text.length(); continue; } if (*from == '>') { // process tokens @@ -81,15 +69,28 @@ char ThMLStrongs::ProcessText(char *text, int maxlen, const SWKey *key, const SW for (unsigned int i = 27; token[i] != '\"' && i < 150; i++) *valto++ = token[i]; *valto = 0; - sprintf(wordstr, "%03d", word++); - module->getEntryAttributes()["Word"][wordstr]["Strongs"] = val; + if (atoi((!isdigit(*val))?val+1:val) < 5627) { + // normal strongs number + sprintf(wordstr, "%03d", word++); + module->getEntryAttributes()["Word"][wordstr]["Strongs"] = val; + tmp = ""; + tmp.append(text.c_str()+textStart, (int)(textEnd - textStart)); + module->getEntryAttributes()["Word"][wordstr]["Text"] = tmp; + newText = true; + } + else { + // verb morph + sprintf(wordstr, "%03d", word-1); + module->getEntryAttributes()["Word"][wordstr]["Morph"] = val; + } } if (!option) { // if we don't want strongs if ((from[1] == ' ') || (from[1] == ',') || (from[1] == ';') || (from[1] == '.') || (from[1] == '?') || (from[1] == '!') || (from[1] == ')') || (from[1] == '\'') || (from[1] == '\"')) { if (lastspace) - to--; + text--; } + if (newText) {textStart = text.length(); newText = false; } continue; } } @@ -116,10 +117,10 @@ char ThMLStrongs::ProcessText(char *text, int maxlen, const SWKey *key, const SW } } // if not a strongs token, keep token in text - *to++ = '<'; - for (char *tok = token; *tok; tok++) - *to++ = *tok; - *to++ = '>'; + text += '<'; + text += token; + text += '>'; + if (newText) {textStart = text.length(); newText = false; } continue; } if (intoken) { @@ -128,11 +129,11 @@ char ThMLStrongs::ProcessText(char *text, int maxlen, const SWKey *key, const SW token[tokpos+2] = 0; } else { - *to++ = *from; + text += *from; lastspace = (*from == ' '); } } - *to++ = 0; - *to = 0; return 0; } + +SWORD_NAMESPACE_END diff --git a/src/modules/filters/thmlvariants.cpp b/src/modules/filters/thmlvariants.cpp index fda0950..b8ab653 100644 --- a/src/modules/filters/thmlvariants.cpp +++ b/src/modules/filters/thmlvariants.cpp @@ -1,18 +1,18 @@ /****************************************************************************** * - * thmlvariants - SWFilter decendant to hide or show textual variants + * thmlvariants - SWFilter descendant to hide or show textual variants * in a ThML module. */ #include <stdlib.h> -#include <string.h> #include <thmlvariants.h> #ifndef __GNUC__ #else #include <unixstr.h> #endif +SWORD_NAMESPACE_START const char ThMLVariants::primary[] = "Primary Reading"; const char ThMLVariants::secondary[] = "Secondary Reading"; @@ -35,7 +35,9 @@ ThMLVariants::~ThMLVariants() { void ThMLVariants::setOptionValue(const char *ival) { - option = (!stricmp(ival, primary)); + if (!stricmp(ival, primary)) option = 0; + else if (!stricmp(ival, secondary)) option = 1; + else option = 2; } const char *ThMLVariants::getOptionValue() @@ -51,128 +53,54 @@ const char *ThMLVariants::getOptionValue() } } -char ThMLVariants::ProcessText(char *text, int maxlen, const SWKey *key, const SWModule *module) +char ThMLVariants::processText(SWBuf &text, const SWKey *key, const SWModule *module) { - if (option == 0) { //we want primary only - char *to, *from, token[2048]; // cheese. Fix. - int tokpos = 0; + if ( option == 0 || option == 1) { //we want primary or variant only bool intoken = false; - int len; bool hide = false; - - len = strlen(text) + 1; // shift string to right of buffer - if (len < maxlen) { - memmove(&text[maxlen - len], text, len); - from = &text[maxlen - len]; - } - else from = text; - // ------------------------------- + SWBuf token; + SWBuf orig = text; + const char *from = orig.c_str(); - for (to = text; *from; from++) { + //we use a fixed comparision string to make sure the loop is as fast as the original two blocks with almost the same code + const char* variantCompareString = (option == 0) ? "div type=\"variant\" class=\"1\"" : "div type=\"variant\" class=\"2\""; + + for (text = ""; *from; from++) { if (*from == '<') { intoken = true; - tokpos = 0; - token[0] = 0; - token[1] = 0; - token[2] = 0; + token = ""; continue; } - if (*from == '>') { // process tokens + else if (*from == '>') { // process tokens intoken = false; - if (!strncmp(token, "div type=\"variant\"", 19)) { - hide = true; - continue; - } - else if (!strncmp(token, "/div", 4)) { - hide = false; - continue; + + if ( !strncmp(token.c_str(), variantCompareString, 28)) { //only one of the variants, length of the two strings is 28 in both cases + hide = true; + continue; } - - // if not a footnote token, keep token in text - if (!hide) { - *to++ = '<'; - for (char *tok = token; *tok; tok++) - *to++ = *tok; - *to++ = '>'; - } - continue; - } - if (intoken) { - if (tokpos < 2045) - token[tokpos++] = *from; - token[tokpos+2] = 0; - } - else { if (!hide) { - *to++ = *from; - } - } - } - *to++ = 0; - *to = 0; - - } - else if (option == 1) { //we want variant only - char *to, *from, token[2048]; // cheese. Fix. - int tokpos = 0; - bool intoken = false; - int len; - bool hide = false; - - len = strlen(text) + 1; // shift string to right of buffer - if (len < maxlen) { - memmove(&text[maxlen - len], text, len); - from = &text[maxlen - len]; - } - else from = text; - - // ------------------------------- - - for (to = text; *from; from++) { - if (*from == '<') { - intoken = true; - tokpos = 0; - token[0] = 0; - token[1] = 0; - token[2] = 0; - continue; - } - if (*from == '>') { // process tokens - intoken = false; - if (!strncmp(token, "div type=\"primary\"", 19)) { - hide = true; - continue; + text += '<'; + text.append(token); + text += '>'; } - else if (!strncmp(token, "/div", 4)) { - hide = false; - continue; + if (!strncmp(token.c_str(), "/div", 4)) { + hide = false; + continue; } - // if not a footnote token, keep token in text - if (!hide) { - *to++ = '<'; - for (char *tok = token; *tok; tok++) - *to++ = *tok; - *to++ = '>'; - } continue; } if (intoken) { - if (tokpos < 2045) - token[tokpos++] = *from; - token[tokpos+2] = 0; + token += *from; } - else { - if (!hide) { - *to++ = *from; - } + else if (!hide) { + text += *from; } } - *to++ = 0; - *to = 0; } + return 0; } @@ -181,3 +109,4 @@ char ThMLVariants::ProcessText(char *text, int maxlen, const SWKey *key, const S +SWORD_NAMESPACE_END diff --git a/src/modules/filters/thmlwebif.cpp b/src/modules/filters/thmlwebif.cpp new file mode 100644 index 0000000..f082e1e --- /dev/null +++ b/src/modules/filters/thmlwebif.cpp @@ -0,0 +1,104 @@ +/*************************************************************************** + ThMLWEBIF.cpp - ThML to HTML filter with hrefs + ------------------- + begin : 2001-09-03 + copyright : 2001 by CrossWire Bible Society + ***************************************************************************/ + +/*************************************************************************** + * * + * This program is free software; you can redistribute it and/or modify * + * it under the terms of the GNU General Public License as published by * + * the Free Software Foundation; either version 2 of the License, or * + * (at your option) any later version. * + * * + ***************************************************************************/ + +#include <stdlib.h> +#include <thmlwebif.h> +#include <swmodule.h> +#include <utilweb.h> +#include <utilxml.h> +#include <ctype.h> + +SWORD_NAMESPACE_START + +ThMLWEBIF::ThMLWEBIF() : baseURL(""), passageStudyURL(baseURL + "passagestudy.jsp") { + //all's done in ThMLHTMLHREF +} + +bool ThMLWEBIF::handleToken(SWBuf &buf, const char *token, BasicFilterUserData *userData) { + const char *tok; + + if (!substituteToken(buf, token)) { // manually process if it wasn't a simple substitution + MyUserData *u = (MyUserData *)userData; + XMLTag tag(token); + SWBuf url; + if (!strcmp(tag.getName(), "sync")) { + const char* value = tag.getAttribute("value"); + url = value; + if ((url.length() > 1) && strchr("GH", url[0])) { + if (isdigit(url[1])) + url = url.c_str()+1; + } + + if(tag.getAttribute("type") && !strcmp(tag.getAttribute("type"), "morph")){ + buf += "<small><em> ("; + buf.appendFormatted("<a href=\"%s?showMorph=%s#cv\">", passageStudyURL.c_str(), encodeURL(url).c_str() ); + } + else { + if (value) { + value++; //skip leading G, H or T + //url = value; + } + + buf += "<small><em> <"; + buf.appendFormatted("<a href=\"%s?showStrong=%s#cv\">", passageStudyURL.c_str(), encodeURL(url).c_str() ); + } + + buf += value; + buf += "</a>"; + + if (tag.getAttribute("type") && !strcmp(tag.getAttribute("type"), "morph")) { + buf += ") </em></small>"; + } + else { + buf += "> </em></small>"; + } + } + else if (!strcmp(tag.getName(), "scripRef")) { + if (tag.isEndTag()) { + if (u->inscriptRef) { // like "<scripRef passage="John 3:16">John 3:16</scripRef>" + u->inscriptRef = false; + buf += "</a>"; + } + else { // end of scripRef like "<scripRef>John 3:16</scripRef>" + url = u->lastTextNode; + buf.appendFormatted("<a href=\"%s?key=%s#cv\">", passageStudyURL.c_str(), encodeURL(url).c_str()); + buf += u->lastTextNode.c_str(); + buf += "</a>"; + + // let's let text resume to output again + u->suspendTextPassThru = false; + } + } + else if (tag.getAttribute("passage")) { //passage given + u->inscriptRef = true; + + buf.appendFormatted("<a href=\"%s?key=%s#cv\">", passageStudyURL.c_str(), encodeURL(tag.getAttribute("passage")).c_str()); + } + else { //no passage given + u->inscriptRef = false; + // let's stop text from going to output + u->suspendTextPassThru = true; + } + } + else { + return ThMLHTMLHREF::handleToken(buf,token,userData); + } + } + return true; +} + + +SWORD_NAMESPACE_END diff --git a/src/modules/filters/unicodertf.cpp b/src/modules/filters/unicodertf.cpp index b53a2d7..0a2bca8 100644 --- a/src/modules/filters/unicodertf.cpp +++ b/src/modules/filters/unicodertf.cpp @@ -1,6 +1,6 @@ /****************************************************************************** * - * unicodertf - SWFilter decendant to convert a double byte unicode file + * unicodertf - SWFilter descendant to convert a double byte unicode file * to RTF tags */ @@ -9,62 +9,78 @@ #include <stdio.h> #include <unicodertf.h> +SWORD_NAMESPACE_START + UnicodeRTF::UnicodeRTF() { } -char UnicodeRTF::ProcessText(char *text, int maxlen, const SWKey *key, const SWModule *module) +char UnicodeRTF::processText(SWBuf &text, const SWKey *key, const SWModule *module) { - unsigned char *to, *from, *maxto; - int len; - char digit[10]; - short ch; // must be signed per unicode spec (negative is ok for big numbers > 32768) + const unsigned char *from; + char digit[10]; + unsigned long ch; + signed short utf16; + unsigned char from2[7]; - len = strlenw(text) + 2; // shift string to right of buffer - if (len < maxlen) { - memmove(&text[maxlen - len], text, len); - from = (unsigned char*)&text[maxlen - len]; - } - else from = (unsigned char*)text; - maxto =(unsigned char*)text + maxlen; + SWBuf orig = text; + + from = (const unsigned char *)orig.c_str(); // ------------------------------- - for (to = (unsigned char*)text; *from && (to <= maxto); from++) { - ch = 0; - if ((*from & 128) != 128) { - *to++ = *from; - continue; - } - if ((*from & 128) && ((*from & 64) != 64)) { - // error - *from = 'x'; - continue; - } - *from <<= 1; - int subsequent; - for (subsequent = 1; (*from & 128); subsequent++) { - *from <<= 1; - from[subsequent] &= 63; - ch <<= 6; - ch |= from[subsequent]; - } - subsequent--; - *from <<=1; - char significantFirstBits = 8 - (2+subsequent); - - ch |= (((short)*from) << (((6*subsequent)+significantFirstBits)-8)); - from += subsequent; - *to++ = '\\'; - *to++ = 'u'; - sprintf(digit, "%d", ch); - for (char *dig = digit; *dig; dig++) - *to++ = *dig; - *to++ = '?'; + for (text = ""; *from; from++) { + ch = 0; + //case: ANSI + if ((*from & 128) != 128) { + text += *from; + continue; + } + //case: Invalid UTF-8 (illegal continuing byte in initial position) + if ((*from & 128) && ((*from & 64) != 64)) { + continue; + } + //case: 2+ byte codepoint + from2[0] = *from; + from2[0] <<= 1; + int subsequent; + for (subsequent = 1; (from2[0] & 128) && (subsequent < 7); subsequent++) { + from2[0] <<= 1; + from2[subsequent] = from[subsequent]; + from2[subsequent] &= 63; + ch <<= 6; + ch |= from2[subsequent]; + } + subsequent--; + from2[0] <<= 1; + char significantFirstBits = 8 - (2+subsequent); + + ch |= (((short)from2[0]) << (((6*subsequent)+significantFirstBits)-8)); + from += subsequent; + if (ch < 0x10000) { + utf16 = (signed short)ch; + text += '\\'; + text += 'u'; + sprintf(digit, "%d", utf16); + text += digit; + text += '?'; + } + else { + utf16 = (signed short)((ch - 0x10000) / 0x400 + 0xD800); + text += '\\'; + text += 'u'; + sprintf(digit, "%d", utf16); + text += digit; + text += '?'; + utf16 = (signed short)((ch - 0x10000) % 0x400 + 0xDC00); + text += '\\'; + text += 'u'; + sprintf(digit, "%d", utf16); + text += digit; + text += '?'; + } } - - if (to != maxto) { - *to++ = 0; - } - *to = 0; + return 0; } + +SWORD_NAMESPACE_END diff --git a/src/modules/filters/utf16utf8.cpp b/src/modules/filters/utf16utf8.cpp index 5a7719f..ef1593b 100644 --- a/src/modules/filters/utf16utf8.cpp +++ b/src/modules/filters/utf16utf8.cpp @@ -1,6 +1,6 @@ /****************************************************************************** * - * UTF16UTF8 - SWFilter decendant to convert UTF-16 to UTF-8 + * UTF16UTF8 - SWFilter descendant to convert UTF-16 to UTF-8 * */ @@ -9,38 +9,36 @@ #include <utf16utf8.h> +SWORD_NAMESPACE_START + UTF16UTF8::UTF16UTF8() { } -char UTF16UTF8::ProcessText(char *text, int maxlen, const SWKey *key, const SWModule *module) +char UTF16UTF8::processText(SWBuf &text, const SWKey *key, const SWModule *module) { unsigned short *from; - unsigned char *to; int len; unsigned long uchar; unsigned short schar; + if ((unsigned long)key < 2) // hack, we're en(1)/de(0)ciphering + return -1; len = 0; - from = (unsigned short*) text; + from = (unsigned short*) text.c_str(); while (*from) { len += 2; from++; } - // shift string to right of buffer - if (len < maxlen) { - memmove(&text[maxlen - len], text, len); - from = (unsigned short*)&text[maxlen - len]; - } - else - from = (unsigned short*)text; - + SWBuf orig = text; + from = (unsigned short*)orig.c_str(); + // ------------------------------- - for (to = (unsigned char*)text; *from; from++) { + for (text = ""; *from; from++) { uchar = 0; if (*from < 0xD800 || *from > 0xDFFF) { @@ -66,26 +64,24 @@ char UTF16UTF8::ProcessText(char *text, int maxlen, const SWKey *key, const SWMo } if (uchar < 0x80) { - *to++ = uchar; + text += uchar; } else if (uchar < 0x800) { - *to++ = 0xc0 | (uchar >> 6); - *to++ = 0x80 | (uchar & 0x3f); + text += 0xc0 | (uchar >> 6); + text += 0x80 | (uchar & 0x3f); } else if (uchar < 0x10000) { - *to++ = 0xe0 | (uchar >> 12); - *to++ = 0x80 | (uchar >> 6) & 0x3f; - *to++ = 0x80 | uchar & 0x3f; + text += 0xe0 | (uchar >> 12); + text += 0x80 | (uchar >> 6) & 0x3f; + text += 0x80 | uchar & 0x3f; } else if (uchar < 0x200000) { - *to++ = 0xF0 | (uchar >> 18); - *to++ = 0x80 | (uchar >> 12) & 0x3F; - *to++ = 0x80 | (uchar >> 6) & 0x3F; - *to++ = 0x80 | uchar & 0x3F; + text += 0xF0 | (uchar >> 18); + text += 0x80 | (uchar >> 12) & 0x3F; + text += 0x80 | (uchar >> 6) & 0x3F; + text += 0x80 | uchar & 0x3F; } } - *to++ = 0; - *to = 0; return 0; } @@ -93,3 +89,4 @@ char UTF16UTF8::ProcessText(char *text, int maxlen, const SWKey *key, const SWMo +SWORD_NAMESPACE_END diff --git a/src/modules/filters/utf8arshaping.cpp b/src/modules/filters/utf8arshaping.cpp index 5121f48..3246adc 100644 --- a/src/modules/filters/utf8arshaping.cpp +++ b/src/modules/filters/utf8arshaping.cpp @@ -1,13 +1,12 @@ /****************************************************************************** * -* utf8arshaping - SWFilter decendant to perform Arabic shaping on +* utf8arshaping - SWFilter descendant to perform Arabic shaping on * UTF-8 text */ #ifdef _ICU_ #include <stdlib.h> -#include <string.h> #ifdef __GNUC__ #include <unixstr.h> @@ -15,34 +14,39 @@ #include <utf8arshaping.h> -UTF8arShaping::UTF8arShaping() { - - conv = ucnv_open("UTF-8", &err); +SWORD_NAMESPACE_START +UTF8arShaping::UTF8arShaping() { + conv = ucnv_open("UTF-8", &err); } UTF8arShaping::~UTF8arShaping() { ucnv_close(conv); } -char UTF8arShaping::ProcessText(char *text, int maxlen, const SWKey *key, const SWModule *module) +char UTF8arShaping::processText(SWBuf &text, const SWKey *key, const SWModule *module) { UChar *ustr, *ustr2; + if ((unsigned long)key < 2) // hack, we're en(1)/de(0)ciphering + return -1; - int32_t len = strlen(text); + int32_t len = text.length(); ustr = new UChar[len]; ustr2 = new UChar[len]; // Convert UTF-8 string to UTF-16 (UChars) - len = ucnv_toUChars(conv, ustr, len, text, -1, &err); + len = ucnv_toUChars(conv, ustr, len, text.c_str(), -1, &err); len = u_shapeArabic(ustr, len, ustr2, len, U_SHAPE_LETTERS_SHAPE | U_SHAPE_DIGITS_EN2AN, &err); - ucnv_fromUChars(conv, text, maxlen, ustr2, len, &err); + text.setSize(text.size()*2); + len = ucnv_fromUChars(conv, text.getRawData(), text.size(), ustr2, len, &err); + text.setSize(len); delete [] ustr2; delete [] ustr; return 0; } +SWORD_NAMESPACE_END #endif diff --git a/src/modules/filters/utf8bidireorder.cpp b/src/modules/filters/utf8bidireorder.cpp index 8fa7280..902047a 100644 --- a/src/modules/filters/utf8bidireorder.cpp +++ b/src/modules/filters/utf8bidireorder.cpp @@ -1,13 +1,12 @@ /****************************************************************************** * -* utf8cnormalizer - SWFilter decendant to perform reordering of UTF-8 +* utf8cnormalizer - SWFilter descendant to perform reordering of UTF-8 * text to visual order according to Unicode BiDi */ #ifdef _ICU_ #include <stdlib.h> -#include <string.h> #ifdef __GNUC__ #include <unixstr.h> @@ -15,6 +14,8 @@ #include <utf8bidireorder.h> +SWORD_NAMESPACE_START + UTF8BiDiReorder::UTF8BiDiReorder() { conv = ucnv_open("UTF-8", &err); @@ -25,15 +26,17 @@ UTF8BiDiReorder::~UTF8BiDiReorder() { ucnv_close(conv); } -char UTF8BiDiReorder::ProcessText(char *text, int maxlen, const SWKey *key, const SWModule *module) +char UTF8BiDiReorder::processText(SWBuf &text, const SWKey *key, const SWModule *module) { UChar *ustr, *ustr2; + if ((unsigned long)key < 2) // hack, we're en(1)/de(0)ciphering + return -1; - int32_t len = strlen(text); + int32_t len = text.length(); ustr = new UChar[len]; //each char could become a surrogate pair // Convert UTF-8 string to UTF-16 (UChars) - len = ucnv_toUChars(conv, ustr, len, text, -1, &err); + len = ucnv_toUChars(conv, ustr, len, text.c_str(), -1, &err); ustr2 = new UChar[len]; UBiDi* bidi = ubidi_openSized(len + 1, 0, &err); @@ -45,11 +48,14 @@ char UTF8BiDiReorder::ProcessText(char *text, int maxlen, const SWKey *key, cons // len = ubidi_writeReverse(ustr, len, ustr2, len, // UBIDI_DO_MIRRORING | UBIDI_REMOVE_BIDI_CONTROLS, &err); - ucnv_fromUChars(conv, text, maxlen, ustr2, len, &err); + text.setSize(text.size()*2); + len = ucnv_fromUChars(conv, text.getRawData(), text.size(), ustr2, len, &err); + text.setSize(len); delete [] ustr2; delete [] ustr; return 0; } +SWORD_NAMESPACE_END #endif diff --git a/src/modules/filters/utf8cantillation.cpp b/src/modules/filters/utf8cantillation.cpp index 84cb513..6213620 100644 --- a/src/modules/filters/utf8cantillation.cpp +++ b/src/modules/filters/utf8cantillation.cpp @@ -1,6 +1,6 @@ /****************************************************************************** * - * UTF8Cantillation - SWFilter decendant to remove UTF-8 Hebrew cantillation + * UTF8Cantillation - SWFilter descendant to remove UTF-8 Hebrew cantillation * */ @@ -9,56 +9,47 @@ #include <stdio.h> #include <utf8cantillation.h> +SWORD_NAMESPACE_START -const char UTF8Cantillation::on[] = "On"; -const char UTF8Cantillation::off[] = "Off"; -const char UTF8Cantillation::optName[] = "Hebrew Cantillation"; -const char UTF8Cantillation::optTip[] = "Toggles Hebrew Cantillation Marks"; +const char oName[] = "Hebrew Cantillation"; +const char oTip[] = "Toggles Hebrew Cantillation Marks"; -UTF8Cantillation::UTF8Cantillation() { - option = false; - options.push_back(on); - options.push_back(off); +const SWBuf choices[3] = {"On", "Off", ""}; +const StringList oValues(&choices[0], &choices[2]); + +UTF8Cantillation::UTF8Cantillation() : SWOptionFilter(oName, oTip, &oValues) { + setOptionValue("Off"); } -UTF8Cantillation::~UTF8Cantillation(){}; -void UTF8Cantillation::setOptionValue(const char *ival) -{ - option = (!stricmp(ival, on)); -} +UTF8Cantillation::~UTF8Cantillation(){}; -const char *UTF8Cantillation::getOptionValue() -{ - return (option) ? on:off; -} -char UTF8Cantillation::ProcessText(char *text, int maxlen, const SWKey *key, const SWModule *module) -{ +char UTF8Cantillation::processText(SWBuf &text, const SWKey *key, const SWModule *module) { if (!option) { - unsigned char *to, *from; - to = (unsigned char*)text; - //The UTF-8 range 0xD6 0x90 to 0xD6 0xAF and 0xD7 0x84 consist of Hebrew cantillation marks so block those out. - for (from = (unsigned char*)text; *from; from++) { - if (*from != 0xD6) { - if (*from == 0xD7 && *(from + 1) == 0x84) { - from++; - } - else { - *to++ = *from; - } - } - else if (*(from + 1) < 0x90 || *(from + 1) > 0xAF) { - *to++ = *from; - from++; - *to++ = *from; - } - else { - from++; - } + //The UTF-8 range 0xD6 0x90 to 0xD6 0xAF and 0xD7 0x84 consist of Hebrew cantillation marks so block those out. + SWBuf orig = text; + const unsigned char* from = (unsigned char*)orig.c_str(); + for (text = ""; *from; from++) { + if (*from != 0xD6) { + if (*from == 0xD7 && *(from + 1) == 0x84) { + from++; + } + else { + text += *from; + } + } + else if (*(from + 1) < 0x90 || *(from + 1) > 0xAF) { + text += *from; + from++; + text += *from; + } + else { + from++; + } + } } - *to++ = 0; - *to = 0; - } return 0; } + +SWORD_NAMESPACE_END diff --git a/src/modules/filters/utf8greekaccents.cpp b/src/modules/filters/utf8greekaccents.cpp index b0e5dc8..05ef59b 100644 --- a/src/modules/filters/utf8greekaccents.cpp +++ b/src/modules/filters/utf8greekaccents.cpp @@ -1,6 +1,6 @@ /****************************************************************************** * - * UTF8GreekAccents - SWFilter decendant to remove UTF-8 Greek accents + * UTF8GreekAccents - SWFilter descendant to remove UTF-8 Greek accents * */ @@ -9,239 +9,235 @@ #include <stdio.h> #include <utf8greekaccents.h> +SWORD_NAMESPACE_START -const char UTF8GreekAccents::on[] = "On"; -const char UTF8GreekAccents::off[] = "Off"; -const char UTF8GreekAccents::optName[] = "Greek Accents"; -const char UTF8GreekAccents::optTip[] = "Toggles Greek Accents"; +const char oName[] = "Greek Accents"; +const char oTip[] = "Toggles Greek Accents"; -UTF8GreekAccents::UTF8GreekAccents() { - option = true; - options.push_back(on); - options.push_back(off); +const SWBuf choices[3] = {"On", "Off", ""}; +const StringList oValues(&choices[0], &choices[2]); + +UTF8GreekAccents::UTF8GreekAccents() : SWOptionFilter(oName, oTip, &oValues) { + setOptionValue("On"); } UTF8GreekAccents::~UTF8GreekAccents(){}; -void UTF8GreekAccents::setOptionValue(const char *ival) -{ - option = (!stricmp(ival, on)); -} - -const char *UTF8GreekAccents::getOptionValue() -{ - return (option) ? on:off; -} -char UTF8GreekAccents::ProcessText(char *text, int maxlen, const SWKey *key, const SWModule *module) -{ - if (!option) { - unsigned char *to, *from; - - to = (unsigned char*)text; - for (from = (unsigned char*)text; *from; from++) { - //first just remove combining characters - if (*from == 0xE2 && *(from + 1) == 0x80 && *(from + 2) == 0x99) - from += 2; - else if (*from == 0xCC && *(from + 1)) { - if (*(from + 1) == 0x80 || *(from + 1) == 0x81 || *(from + 1) == 0x82 || *(from + 1) == 0x88 || *(from + 1) == 0x93 || *(from + 1) == 0x94) - from++; - } - else if (*from == 0xCD && *(from + 1) == 0xBA) - from++; - //now converted pre-composed characters to their alphabetic bases, discarding the accents - - //Greek - //capital alpha - else if ((*from == 0xCE && *(from + 1) == 0x86)) { - *to++ = 0xCE; - *to++ = 0x91; - from++; - } - //capital epsilon - else if ((*from == 0xCE && *(from + 1) == 0x88)) { - *to++ = 0xCE; - *to++ = 0x95; - from++; - } - //capital eta - else if ((*from == 0xCE && *(from + 1) == 0x89)) { - *to++ = 0xCE; - *to++ = 0x97; - from++; - } - //capital iota - else if ((*from == 0xCE && (*(from + 1) == 0x8A || *(from + 1) == 0xAA))) { - *to++ = 0xCE; - *to++ = 0x99; - from++; - } - //capital omicron - else if ((*from == 0xCE && *(from + 1) == 0x8C)) { - *to++ = 0xCE; - *to++ = 0x9F; - from++; - } - //capital upsilon - else if ((*from == 0xCE && (*(from + 1) == 0x8E || *(from + 1) == 0xAB))) { - *to++ = 0xCE; - *to++ = 0xA5; - from++; - } - //capital omega - else if ((*from == 0xCE && *(from + 1) == 0x8F)) { - *to++ = 0xCE; - *to++ = 0xA9; - from++; - } - - //alpha - else if ((*from == 0xCE && *(from + 1) == 0xAC)) { - *to++ = 0xCE; - *to++ = 0xB1; - from++; - } - //epsilon - else if ((*from == 0xCE && *(from + 1) == 0xAD)) { - *to++ = 0xCE; - *to++ = 0xB5; - from++; - } - //eta - else if ((*from == 0xCE && *(from + 1) == 0xAE)) { - *to++ = 0xCE; - *to++ = 0xB7; - from++; - } - //iota - else if ((*from == 0xCE && *(from + 1) == 0xAF) || (*from == 0xCF && *(from + 1) == 0x8A)) { - *to++ = 0xCE; - *to++ = 0xB9; - from++; - } - //omicron - else if ((*from == 0xCF && *(from + 1) == 0x8C)) { - *to++ = 0xCE; - *to++ = 0xBF; - from++; - } - //upsilon - else if ((*from == 0xCE && *(from + 1) == 0x88) || (*from == 0xCF && (*(from + 1) == 0x8B || *(from + 1) == 0x8D))) { - *to++ = 0xCF; - *to++ = 0x85; - from++; - } - //omega - else if ((*from == 0xCF && *(from + 1) == 0x8E)) { - *to++ = 0xCF; - *to++ = 0x89; - from++; - } - - //Extended Greek - //capital alpha - else if (*from == 0xE1 && ((*(from + 1) == 0xBC || *(from + 1) == 0xBE) && *(from + 2) >= 0x88 && *(from + 2) <= 0x8F) || (*(from + 1) == 0xBE && *(from + 2) >= 0xB8 && *(from + 2) <= 0xBC)) { - *to++ = 0xCE; - *to++ = 0x91; - from+=2; - } - //capital epsilon - else if (*from == 0xE1 && ((*(from + 1) == 0xBC && *(from + 2) >= 0x98 && *(from + 2) <= 0x9D) || (*(from + 1) == 0xBF && (*(from + 2) == 0x88 || *(from + 2) == 0x89)))) { - *to++ = 0xCE; - *to++ = 0x95; - from+=2; - } - //capital eta - else if (*from == 0xE1 && ((*(from + 1) == 0xBC && *(from + 2) >= 0xA8 && *(from + 2) <= 0xAF) || (*(from + 1) == 0xBE && *(from + 2) >= 0x98 && *(from + 2) <= 0x9F) || (*(from + 1) == 0xBF && *(from + 2) >= 0x8A && *(from + 2) <= 0x8C))) { - *to++ = 0xCE; - *to++ = 0x97; - from+=2; - } - //capital iota - else if (*from == 0xE1 && ((*(from + 1) == 0xBC && *(from + 2) >= 0xB8 && *(from + 2) <= 0xBF) || (*(from + 1) == 0xBF && *(from + 2) >= 0x98 && *(from + 2) <= 0x9B))) { - *to++ = 0xCE; - *to++ = 0x99; - from+=2; - } - //capital omicron - else if (*from == 0xE1 && ((*(from + 1) == 0xBD && *(from + 2) >= 0x88 && *(from + 2) <= 0x8D) || (*(from + 1) == 0xBF && *(from + 2) == 0xB8 || *(from + 2) == 0xB9))) { - *to++ = 0xCE; - *to++ = 0x9F; - from+=2; - } - //capital upsilon - else if (*from == 0xE1 && ((*(from + 1) == 0xBD && *(from + 2) >= 0x99 && *(from + 2) <= 0x9F) || (*(from + 1) == 0xBF && *(from + 2) >= 0xA8 && *(from + 2) <= 0xAB))) { - *to++ = 0xCE; - *to++ = 0xA5; - from+=2; - } - //capital omega - else if (*from == 0xE1 && (((*(from + 1) == 0xBD || *(from + 1) == 0xBE) && *(from + 2) >= 0xA8 && *(from + 2) <= 0xAF) || (*(from + 1) == 0xBF && *(from + 2) >= 0xBA && *(from + 2) <= 0xBC))) { - *to++ = 0xCE; - *to++ = 0xA9; - from+=2; - } - //capital rho - else if (*from == 0xE1 && *(from + 1) == 0xBF && *(from + 2) == 0xAC) { - *to++ = 0xCE; - *to++ = 0xA1; - from+=2; - } - - //alpha - else if (*from == 0xE1 && ((*(from + 1) == 0xBC || *(from + 1) == 0xBE) && *(from + 2) >= 0x80 && *(from + 2) <= 0x87) || (*(from + 1) == 0xBD && (*(from + 2) == 0xB0 || *(from + 2) == 0xB1)) || (*(from + 1) == 0xBE && *(from + 2) >= 0xB0 && *(from + 2) <= 0xB7)) { - *to++ = 0xCE; - *to++ = 0xB1; - from+=2; - } - //epsilon - else if (*from == 0xE1 && ((*(from + 1) == 0xBC && *(from + 2) >= 0x90 && *(from + 2) <= 0x95) || (*(from + 1) == 0xBD && (*(from + 2) == 0xB2 || *(from + 2) == 0xB3)))) { - *to++ = 0xCE; - *to++ = 0xB5; - from+=2; - } - //eta - else if (*from == 0xE1 && ((*(from + 1) == 0xBE && *(from + 2) >= 0x90 && *(from + 2) <= 0x97) || (*(from + 1) == 0xBC && *(from + 2) >= 0xA0 && *(from + 2) <= 0xA7) || (*(from + 1) == 0xBF && *(from + 2) >= 0x82 && *(from + 2) <= 0x87) || (*(from + 1) == 0xBD && (*(from + 2) == 0xB4 || *(from + 2) == 0xB5)))) { - *to++ = 0xCE; - *to++ = 0xB7; - from+=2; - } - //iota - else if (*from == 0xE1 && ((*(from + 1) == 0xBC && *(from + 2) >= 0xB0 && *(from + 2) <= 0xB7) || (*(from + 1) == 0xBD && (*(from + 2) == 0xB6 || *(from + 2) == 0xB7)) || (*(from + 1) == 0xBF && *(from + 2) >= 0x90 && *(from + 2) <= 0x97))) { - *to++ = 0xCE; - *to++ = 0xB9; - from+=2; - } - //omicron - else if (*from == 0xE1 && (*(from + 1) == 0xBD && ((*(from + 2) >= 0x80 && *(from + 2) <= 0x85) || (*(from + 2) == 0xB8 || *(from + 2) == 0xB9)))) { - *to++ = 0xCE; - *to++ = 0xBF; - from+=2; - } - //upsilon - else if (*from == 0xE1 && ((*(from + 1) == 0xBD && ((*(from + 2) >= 0x90 && *(from + 2) <= 0x97) || *(from + 2) == 0xBA || *(from + 2) == 0xBB)) || (*(from + 1) == 0xBF && ((*(from + 2) >= 0xA0 && *(from + 2) <= 0xA3) || *(from + 2) == 0xA6 || *(from + 2) == 0xA7)))) { - *to++ = 0xCF; - *to++ = 0x85; - from+=2; - } - //omega - else if (*from == 0xE1 && ((*(from + 1) == 0xBD && ((*(from + 2) >= 0xA0 && *(from + 2) <= 0xA7) || (*(from + 2) == 0xBC || *(from + 2) == 0xBD))) || (*(from + 1) == 0xBE && (*(from + 2) >= 0xA0 && *(from + 2) <= 0xA7)) || (*(from + 1) == 0xBF && *(from + 2) >= 0xB2 && *(from + 2) <= 0xB7))) { - *to++ = 0xCF; - *to++ = 0x89; - from+=2; - } - //rho - else if (*from == 0xE1 && *(from + 1) == 0xBF && (*(from + 2) == 0xA4 && *(from + 2) == 0xA5)) { - *to++ = 0xCF; - *to++ = 0x81; - from+=2; - } - else - *to++ = *from; +char UTF8GreekAccents::processText(SWBuf &text, const SWKey *key, const SWModule *module) { + + if (!option) { //we don't want greek accents + //unsigned char *to, *from; + //to = (unsigned char*)text; + //for (from = (unsigned char*)text; *from; from++) { + + SWBuf orig = text; + const unsigned char* from = (unsigned char*)orig.c_str(); + for (text = ""; *from; from++) { + //first just remove combining characters + if (*from == 0xE2 && *(from + 1) == 0x80 && *(from + 2) == 0x99) { + from += 2; + } + else if (*from == 0xCC && *(from + 1)) { + if (*(from + 1) == 0x80 || *(from + 1) == 0x81 || *(from + 1) == 0x82 || *(from + 1) == 0x88 || *(from + 1) == 0x93 || *(from + 1) == 0x94) { + from++; + } + } + else if (*from == 0xCD && *(from + 1) == 0xBA) { + from++; + } + //now converted pre-composed characters to their alphabetic bases, discarding the accents + + //Greek + //capital alpha + else if ((*from == 0xCE && *(from + 1) == 0x86)) { + text += 0xCE; + text += 0x91; + from++; + } + //capital epsilon + else if ((*from == 0xCE && *(from + 1) == 0x88)) { + text += 0xCE; + text += 0x95; + from++; + } + //capital eta + else if ((*from == 0xCE && *(from + 1) == 0x89)) { + text += 0xCE; + text += 0x97; + from++; + } + //capital iota + else if ((*from == 0xCE && (*(from + 1) == 0x8A || *(from + 1) == 0xAA))) { + text += 0xCE; + text += 0x99; + from++; + } + //capital omicron + else if ((*from == 0xCE && *(from + 1) == 0x8C)) { + text += 0xCE; + text += 0x9F; + from++; + } + //capital upsilon + else if ((*from == 0xCE && (*(from + 1) == 0x8E || *(from + 1) == 0xAB))) { + text += 0xCE; + text += 0xA5; + from++; + } + //capital omega + else if ((*from == 0xCE && *(from + 1) == 0x8F)) { + text += 0xCE; + text += 0xA9; + from++; + } + + //alpha + else if ((*from == 0xCE && *(from + 1) == 0xAC)) { + text += 0xCE; + text += 0xB1; + from++; + } + //epsilon + else if ((*from == 0xCE && *(from + 1) == 0xAD)) { + text += 0xCE; + text += 0xB5; + from++; + } + //eta + else if ((*from == 0xCE && *(from + 1) == 0xAE)) { + text += 0xCE; + text += 0xB7; + from++; + } + //iota + else if ((*from == 0xCE && *(from + 1) == 0xAF) || (*from == 0xCF && *(from + 1) == 0x8A)) { + text += 0xCE; + text += 0xB9; + from++; + } + //omicron + else if ((*from == 0xCF && *(from + 1) == 0x8C)) { + text += 0xCE; + text += 0xBF; + from++; + } + //upsilon + else if ((*from == 0xCE && *(from + 1) == 0x88) || (*from == 0xCF && (*(from + 1) == 0x8B || *(from + 1) == 0x8D))) { + text += 0xCF; + text += 0x85; + from++; + } + //omega + else if ((*from == 0xCF && *(from + 1) == 0x8E)) { + text += 0xCF; + text += 0x89; + from++; + } + + //Extended Greek + //capital alpha + else if (*from == 0xE1 && ((*(from + 1) == 0xBC || *(from + 1) == 0xBE) && *(from + 2) >= 0x88 && *(from + 2) <= 0x8F) || (*(from + 1) == 0xBE && *(from + 2) >= 0xB8 && *(from + 2) <= 0xBC)) { + text += 0xCE; + text += 0x91; + from+=2; + } + //capital epsilon + else if (*from == 0xE1 && ((*(from + 1) == 0xBC && *(from + 2) >= 0x98 && *(from + 2) <= 0x9D) || (*(from + 1) == 0xBF && (*(from + 2) == 0x88 || *(from + 2) == 0x89)))) { + text += 0xCE; + text += 0x95; + from+=2; + } + //capital eta + else if (*from == 0xE1 && ((*(from + 1) == 0xBC && *(from + 2) >= 0xA8 && *(from + 2) <= 0xAF) || (*(from + 1) == 0xBE && *(from + 2) >= 0x98 && *(from + 2) <= 0x9F) || (*(from + 1) == 0xBF && *(from + 2) >= 0x8A && *(from + 2) <= 0x8C))) { + text += 0xCE; + text += 0x97; + from+=2; + } + //capital iota + else if (*from == 0xE1 && ((*(from + 1) == 0xBC && *(from + 2) >= 0xB8 && *(from + 2) <= 0xBF) || (*(from + 1) == 0xBF && *(from + 2) >= 0x98 && *(from + 2) <= 0x9B))) { + text += 0xCE; + text += 0x99; + from+=2; + } + //capital omicron + else if (*from == 0xE1 && ((*(from + 1) == 0xBD && *(from + 2) >= 0x88 && *(from + 2) <= 0x8D) || (*(from + 1) == 0xBF && *(from + 2) == 0xB8 || *(from + 2) == 0xB9))) { + text += 0xCE; + text += 0x9F; + from+=2; + } + //capital upsilon + else if (*from == 0xE1 && ((*(from + 1) == 0xBD && *(from + 2) >= 0x99 && *(from + 2) <= 0x9F) || (*(from + 1) == 0xBF && *(from + 2) >= 0xA8 && *(from + 2) <= 0xAB))) { + text += 0xCE; + text += 0xA5; + from+=2; + } + //capital omega + else if (*from == 0xE1 && (((*(from + 1) == 0xBD || *(from + 1) == 0xBE) && *(from + 2) >= 0xA8 && *(from + 2) <= 0xAF) || (*(from + 1) == 0xBF && *(from + 2) >= 0xBA && *(from + 2) <= 0xBC))) { + text += 0xCE; + text += 0xA9; + from+=2; + } + //capital rho + else if (*from == 0xE1 && *(from + 1) == 0xBF && *(from + 2) == 0xAC) { + text += 0xCE; + text += 0xA1; + from+=2; + } + + //alpha + else if (*from == 0xE1 && ((*(from + 1) == 0xBC || *(from + 1) == 0xBE) && *(from + 2) >= 0x80 && *(from + 2) <= 0x87) || (*(from + 1) == 0xBD && (*(from + 2) == 0xB0 || *(from + 2) == 0xB1)) || (*(from + 1) == 0xBE && *(from + 2) >= 0xB0 && *(from + 2) <= 0xB7)) { + text += 0xCE; + text += 0xB1; + from+=2; + } + //epsilon + else if (*from == 0xE1 && ((*(from + 1) == 0xBC && *(from + 2) >= 0x90 && *(from + 2) <= 0x95) || (*(from + 1) == 0xBD && (*(from + 2) == 0xB2 || *(from + 2) == 0xB3)))) { + text += 0xCE; + text += 0xB5; + from+=2; + } + //eta + else if (*from == 0xE1 && ((*(from + 1) == 0xBE && *(from + 2) >= 0x90 && *(from + 2) <= 0x97) || (*(from + 1) == 0xBC && *(from + 2) >= 0xA0 && *(from + 2) <= 0xA7) || (*(from + 1) == 0xBF && *(from + 2) >= 0x82 && *(from + 2) <= 0x87) || (*(from + 1) == 0xBD && (*(from + 2) == 0xB4 || *(from + 2) == 0xB5)))) { + text += 0xCE; + text += 0xB7; + from+=2; + } + //iota + else if (*from == 0xE1 && ((*(from + 1) == 0xBC && *(from + 2) >= 0xB0 && *(from + 2) <= 0xB7) || (*(from + 1) == 0xBD && (*(from + 2) == 0xB6 || *(from + 2) == 0xB7)) || (*(from + 1) == 0xBF && *(from + 2) >= 0x90 && *(from + 2) <= 0x97))) { + text += 0xCE; + text += 0xB9; + from+=2; + } + //omicron + else if (*from == 0xE1 && (*(from + 1) == 0xBD && ((*(from + 2) >= 0x80 && *(from + 2) <= 0x85) || (*(from + 2) == 0xB8 || *(from + 2) == 0xB9)))) { + text += 0xCE; + text += 0xBF; + from+=2; + } + //upsilon + else if (*from == 0xE1 && ((*(from + 1) == 0xBD && ((*(from + 2) >= 0x90 && *(from + 2) <= 0x97) || *(from + 2) == 0xBA || *(from + 2) == 0xBB)) || (*(from + 1) == 0xBF && ((*(from + 2) >= 0xA0 && *(from + 2) <= 0xA3) || *(from + 2) == 0xA6 || *(from + 2) == 0xA7)))) { + text += 0xCF; + text += 0x85; + from+=2; + } + //omega + else if (*from == 0xE1 && ((*(from + 1) == 0xBD && ((*(from + 2) >= 0xA0 && *(from + 2) <= 0xA7) || (*(from + 2) == 0xBC || *(from + 2) == 0xBD))) || (*(from + 1) == 0xBE && (*(from + 2) >= 0xA0 && *(from + 2) <= 0xA7)) || (*(from + 1) == 0xBF && *(from + 2) >= 0xB2 && *(from + 2) <= 0xB7))) { + text += 0xCF; + text += 0x89; + from+=2; + } + //rho + else if (*from == 0xE1 && *(from + 1) == 0xBF && (*(from + 2) == 0xA4 && *(from + 2) == 0xA5)) { + text += 0xCF; + text += 0x81; + from+=2; + } + else { //no characters we filter + text += *from; + } + } } - *to++ = 0; - *to = 0; - } return 0; } @@ -250,3 +246,4 @@ char UTF8GreekAccents::ProcessText(char *text, int maxlen, const SWKey *key, con +SWORD_NAMESPACE_END diff --git a/src/modules/filters/utf8hebrewpoints.cpp b/src/modules/filters/utf8hebrewpoints.cpp index e5b50e1..0476db8 100644 --- a/src/modules/filters/utf8hebrewpoints.cpp +++ b/src/modules/filters/utf8hebrewpoints.cpp @@ -1,6 +1,6 @@ /****************************************************************************** * - * UTF8HebrewPoints - SWFilter decendant to remove UTF-8 Hebrew vowel points + * UTF8HebrewPoints - SWFilter descendant to remove UTF-8 Hebrew vowel points * */ @@ -9,47 +9,36 @@ #include <stdio.h> #include <utf8hebrewpoints.h> +SWORD_NAMESPACE_START -const char UTF8HebrewPoints::on[] = "On"; -const char UTF8HebrewPoints::off[] = "Off"; -const char UTF8HebrewPoints::optName[] = "Hebrew Vowel Points"; -const char UTF8HebrewPoints::optTip[] = "Toggles Hebrew Vowel Points"; +const char oName[] = "Hebrew Vowel Points"; +const char oTip[] = "Toggles Hebrew Vowel Points"; -UTF8HebrewPoints::UTF8HebrewPoints() { - option = true; - options.push_back(on); - options.push_back(off); +const SWBuf choices[3] = {"On", "Off", ""}; +const StringList oValues(&choices[0], &choices[2]); + +UTF8HebrewPoints::UTF8HebrewPoints() : SWOptionFilter(oName, oTip, &oValues) { + setOptionValue("On"); } UTF8HebrewPoints::~UTF8HebrewPoints(){}; -void UTF8HebrewPoints::setOptionValue(const char *ival) -{ - option = (!stricmp(ival, on)); -} - -const char *UTF8HebrewPoints::getOptionValue() -{ - return (option) ? on:off; -} -char UTF8HebrewPoints::ProcessText(char *text, int maxlen, const SWKey *key, const SWModule *module) -{ +char UTF8HebrewPoints::processText(SWBuf &text, const SWKey *key, const SWModule *module) { if (!option) { - unsigned char *to, *from; - - to = (unsigned char*)text; //The UTF-8 range 0xD6 0xB0 to 0xD6 0xBF excluding 0xD6 0x consist of Hebrew cantillation marks so block those out. - for (from = (unsigned char*)text; *from; from++) { + SWBuf orig = text; + const unsigned char* from = (unsigned char*)orig.c_str(); + for (text = ""; *from; from++) { if ((*from == 0xD6) && (*(from + 1) >= 0xB0 && *(from + 1) <= 0xBF) && (*(from + 1) != 0xBE)) { from++; } else { - *to++ = *from; + text += *from; } } - *to++ = 0; - *to = 0; - } + } return 0; } + +SWORD_NAMESPACE_END diff --git a/src/modules/filters/utf8html.cpp b/src/modules/filters/utf8html.cpp index 7487815..94fbdc1 100644 --- a/src/modules/filters/utf8html.cpp +++ b/src/modules/filters/utf8html.cpp @@ -1,6 +1,6 @@ /****************************************************************************** * - * utf8html - SWFilter decendant to convert a UTF-8 stream to HTML escapes + * utf8html - SWFilter descendant to convert a UTF-8 stream to HTML escapes * */ @@ -9,29 +9,32 @@ #include <stdio.h> #include <utf8html.h> +SWORD_NAMESPACE_START + UTF8HTML::UTF8HTML() { } -char UTF8HTML::ProcessText(char *text, int maxlen, const SWKey *key, const SWModule *module) +char UTF8HTML::processText(SWBuf &text, const SWKey *key, const SWModule *module) { - unsigned char *to, *from; + unsigned char *from; int len; char digit[10]; unsigned long ch; + if ((unsigned long)key < 2) // hack, we're en(1)/de(0)ciphering + return -1; + + len = strlenw(text.c_str()) + 2; // shift string to right of buffer + + SWBuf orig = text; + from = (unsigned char *)orig.c_str(); - len = strlenw(text) + 2; // shift string to right of buffer - if (len < maxlen) { - memmove(&text[maxlen - len], text, len); - from = (unsigned char*)&text[maxlen - len]; - } - else from = (unsigned char*)text; // ------------------------------- - for (to = (unsigned char*)text; *from; from++) { + for (text = ""; *from; from++) { ch = 0; if ((*from & 128) != 128) { // if (*from != ' ') - *to++ = *from; + text += *from; continue; } if ((*from & 128) && ((*from & 64) != 64)) { @@ -53,14 +56,14 @@ char UTF8HTML::ProcessText(char *text, int maxlen, const SWKey *key, const SWMod ch |= (((short)*from) << (((6*subsequent)+significantFirstBits)-8)); from += subsequent; - *to++ = '&'; - *to++ = '#'; + text += '&'; + text += '#'; sprintf(digit, "%d", ch); for (char *dig = digit; *dig; dig++) - *to++ = *dig; - *to++ = ';'; + text += *dig; + text += ';'; } - *to++ = 0; - *to = 0; return 0; } + +SWORD_NAMESPACE_END diff --git a/src/modules/filters/utf8latin1.cpp b/src/modules/filters/utf8latin1.cpp index 6cc1acd..6ab4c9c 100644 --- a/src/modules/filters/utf8latin1.cpp +++ b/src/modules/filters/utf8latin1.cpp @@ -1,6 +1,6 @@ /****************************************************************************** * - * UTF8Latin1 - SWFilter decendant to convert UTF-8 to Latin-1 + * UTF8Latin1 - SWFilter descendant to convert UTF-8 to Latin-1 * */ @@ -9,31 +9,32 @@ #include <utf8latin1.h> +SWORD_NAMESPACE_START + UTF8Latin1::UTF8Latin1(char rchar) : replacementChar(rchar) { } -char UTF8Latin1::ProcessText(char *text, int maxlen, const SWKey *key, const SWModule *module) +char UTF8Latin1::processText(SWBuf &text, const SWKey *key, const SWModule *module) { unsigned char *from; - unsigned short *to; int len; unsigned long uchar; unsigned char significantFirstBits, subsequent; - len = strlen(text) + 1; // shift string to right of buffer - if (len < maxlen) { - memmove(&text[maxlen - len], text, len); - from = (unsigned char*)&text[maxlen - len]; + if ((unsigned long)key < 2) {// hack, we're en(1)/de(0)ciphering + return -1; } - else - from = (unsigned char*)text; - + len = strlen(text.c_str()) + 1; // shift string to right of buffer + + SWBuf orig = text; + from = (unsigned char*)orig.c_str(); + // ------------------------------- - for (to = (unsigned short*)text; *from; from++) { + for (text = ""; *from; from++) { uchar = 0; if ((*from & 128) != 128) { // if (*from != ' ') @@ -60,15 +61,14 @@ char UTF8Latin1::ProcessText(char *text, int maxlen, const SWKey *key, const SWM } if (uchar < 0xff) { - *to++ = (unsigned char)uchar; + text += (unsigned char)uchar; } else { - *to++ = replacementChar; + text += replacementChar; } } - *to++ = 0; - *to = 0; - return 0; } +SWORD_NAMESPACE_END + diff --git a/src/modules/filters/utf8nfc.cpp b/src/modules/filters/utf8nfc.cpp index df9e090..5500224 100644 --- a/src/modules/filters/utf8nfc.cpp +++ b/src/modules/filters/utf8nfc.cpp @@ -1,13 +1,12 @@ /****************************************************************************** * -* utf8nfc - SWFilter decendant to perform NFC (canonical composition +* utf8nfc - SWFilter descendant to perform NFC (canonical composition * normalization) on UTF-8 text */ #ifdef _ICU_ #include <stdlib.h> -#include <string.h> #ifdef __GNUC__ #include <unixstr.h> @@ -15,6 +14,8 @@ #include <utf8nfc.h> +SWORD_NAMESPACE_START + UTF8NFC::UTF8NFC() { conv = ucnv_open("UTF-8", &err); } @@ -23,19 +24,24 @@ UTF8NFC::~UTF8NFC() { ucnv_close(conv); } -char UTF8NFC::ProcessText(char *text, int maxlen, const SWKey *key, const SWModule *module) +char UTF8NFC::processText(SWBuf &text, const SWKey *key, const SWModule *module) { - int32_t len = strlen(text) * 2; + if ((unsigned long)key < 2) // hack, we're en(1)/de(0)ciphering + return -1; + + int32_t len = text.length() * 2; source = new UChar[len + 1]; //each char could become a surrogate pair // Convert UTF-8 string to UTF-16 (UChars) - len = ucnv_toUChars(conv, source, len, text, -1, &err); + len = ucnv_toUChars(conv, source, len, text.c_str(), -1, &err); target = new UChar[len + 1]; //canonical composition unorm_normalize(source, len, UNORM_NFC, 0, target, len, &err); - ucnv_fromUChars(conv, text, maxlen, target, -1, &err); + text.setSize(text.size()*2); + len = ucnv_fromUChars(conv, text.getRawData(), text.size(), target, -1, &err); + text.setSize(len); delete [] source; delete [] target; @@ -43,4 +49,5 @@ char UTF8NFC::ProcessText(char *text, int maxlen, const SWKey *key, const SWModu return 0; } +SWORD_NAMESPACE_END #endif diff --git a/src/modules/filters/utf8nfkd.cpp b/src/modules/filters/utf8nfkd.cpp index 450cbbf..6da24f8 100644 --- a/src/modules/filters/utf8nfkd.cpp +++ b/src/modules/filters/utf8nfkd.cpp @@ -1,13 +1,12 @@ /****************************************************************************** * -* utf8nfkd - SWFilter decendant to perform NFKD (compatability decomposition +* utf8nfkd - SWFilter descendant to perform NFKD (compatability decomposition * normalization) on UTF-8 text */ #ifdef _ICU_ #include <stdlib.h> -#include <string.h> #ifdef __GNUC__ #include <unixstr.h> @@ -15,6 +14,8 @@ #include <utf8nfkd.h> +SWORD_NAMESPACE_START + UTF8NFKD::UTF8NFKD() { conv = ucnv_open("UTF-8", &err); } @@ -23,24 +24,30 @@ UTF8NFKD::~UTF8NFKD() { ucnv_close(conv); } -char UTF8NFKD::ProcessText(char *text, int maxlen, const SWKey *key, const SWModule *module) +char UTF8NFKD::processText(SWBuf &text, const SWKey *key, const SWModule *module) { - int32_t len = strlen(text) * 2; + if ((unsigned long)key < 2) // hack, we're en(1)/de(0)ciphering + return -1; + + int32_t len = text.length() * 2; source = new UChar[len + 1]; //each char could become a surrogate pair // Convert UTF-8 string to UTF-16 (UChars) - len = ucnv_toUChars(conv, source, len, text, -1, &err); + len = ucnv_toUChars(conv, source, len, text.c_str(), -1, &err); target = new UChar[len + 1]; //compatability decomposition unorm_normalize(source, len, UNORM_NFKD, 0, target, len, &err); - ucnv_fromUChars(conv, text, maxlen, target, -1, &err); + text.setSize(text.size()*2); + len = ucnv_fromUChars(conv, text.getRawData(), text.size(), target, -1, &err); + text.setSize(len); - delete [] source; - delete [] target; + delete [] source; + delete [] target; return 0; } +SWORD_NAMESPACE_END #endif diff --git a/src/modules/filters/utf8transliterator.cpp b/src/modules/filters/utf8transliterator.cpp index 7bc068a..b753c0c 100644 --- a/src/modules/filters/utf8transliterator.cpp +++ b/src/modules/filters/utf8transliterator.cpp @@ -1,27 +1,43 @@ /****************************************************************************** * -* utf8transliterators - SWFilter decendant to transliterate between +* utf8transliterators - SWFilter descendant to transliterate between * ICU-supported scripts. */ #ifdef _ICU_ #include <stdlib.h> -#include <string.h> #ifdef __GNUC__ #include <unixstr.h> #endif +#include <unicode/ucnv.h> +#include <unicode/uchar.h> #include <utf8transliterator.h> +#ifndef _ICUSWORD_ +#include "unicode/resbund.h" +#endif +#include <swlog.h> + +SWORD_NAMESPACE_START + const char UTF8Transliterator::optionstring[NUMTARGETSCRIPTS][16] = { "Off", "Latin", + "IPA", "Basic Latin", + "SBL", + "TC", "Beta", "BGreek", -/* + "SERA", + "Hugoye", + "UNGEGN", + "ISO", + "ALA-LC", + "BGN-PCGN", "Greek", "Hebrew", "Cyrillic", @@ -29,7 +45,6 @@ const char UTF8Transliterator::optionstring[NUMTARGETSCRIPTS][16] = { "Syriac", "Katakana", "Hiragana", - "Jamo", "Hangul", "Devanagari", "Tamil", @@ -46,19 +61,253 @@ const char UTF8Transliterator::optionstring[NUMTARGETSCRIPTS][16] = { "Ethiopic", "Gothic", "Ugaritic", - "Coptic" - */ + "Coptic", + "Meroitic", + "Linear B", + "Cypriot", + "Runic", + "Ogham", + "Thaana", + "Glagolitic", + "Tengwar", + "Cirth" }; const char UTF8Transliterator::optName[] = "Transliteration"; const char UTF8Transliterator::optTip[] = "Transliterates between scripts"; +SWTransMap UTF8Transliterator::transMap; + +#ifndef _ICUSWORD_ + +const char UTF8Transliterator::SW_RB_RULE_BASED_IDS[] = "RuleBasedTransliteratorIDs"; +const char UTF8Transliterator::SW_RB_RULE[] = "Rule"; +#ifdef SWICU_DATA +const char UTF8Transliterator::SW_RESDATA[] = SWICU_DATA; +#else +const char UTF8Transliterator::SW_RESDATA[] = "/usr/local/lib/sword/"; +#endif + +class SWCharString { + public: + inline SWCharString(const UnicodeString& str); + inline ~SWCharString(); + inline operator const char*() { return ptr; } + private: + char buf[128]; + char* ptr; +}; +SWCharString::SWCharString(const UnicodeString& str) { + // TODO This isn't quite right -- we should probably do + // preflighting here to determine the real length. + if (str.length() >= (int32_t)sizeof(buf)) { + ptr = new char[str.length() + 8]; + } else { + ptr = buf; + } + str.extract(0, 0x7FFFFFFF, ptr, ""); +} + +SWCharString::~SWCharString() { + if (ptr != buf) { + delete[] ptr; + } +} + +#endif // _ICUSWORD_ + + UTF8Transliterator::UTF8Transliterator() { option = 0; unsigned long i; for (i = 0; i < NUMTARGETSCRIPTS; i++) { options.push_back(optionstring[i]); } +#ifndef _ICUSWORD_ + utf8status = U_ZERO_ERROR; + Load(utf8status); +#endif +} + +void UTF8Transliterator::Load(UErrorCode &status) +{ +#ifndef _ICUSWORD_ + static const char translit_swordindex[] = "translit_swordindex"; + + UResourceBundle *bundle, *transIDs, *colBund; + bundle = ures_openDirect(SW_RESDATA, translit_swordindex, &status); + if (U_FAILURE(status)) { + SWLog::systemlog->LogError("no resource index to load"); + SWLog::systemlog->LogError("status %s", u_errorName(status)); + return; + } + + transIDs = ures_getByKey(bundle, SW_RB_RULE_BASED_IDS, 0, &status); + UParseError parseError; + + int32_t row, maxRows; + if (U_SUCCESS(status)) { + maxRows = ures_getSize(transIDs); + for (row = 0; row < maxRows; row++) { + colBund = ures_getByIndex(transIDs, row, 0, &status); + + if (U_SUCCESS(status) && ures_getSize(colBund) == 4) { + UnicodeString id = ures_getUnicodeStringByIndex(colBund, 0, &status); + UChar type = ures_getUnicodeStringByIndex(colBund, 1, &status).charAt(0); + UnicodeString resString = ures_getUnicodeStringByIndex(colBund, 2, &status); + SWLog::systemlog->LogInformation("ok so far"); + + if (U_SUCCESS(status)) { + switch (type) { + case 0x66: // 'f' + case 0x69: // 'i' + // 'file' or 'internal'; + // row[2]=resource, row[3]=direction + { + UBool visible = (type == 0x0066 /*f*/); + UTransDirection dir = + (ures_getUnicodeStringByIndex(colBund, 3, &status).charAt(0) == + 0x0046 /*F*/) ? + UTRANS_FORWARD : UTRANS_REVERSE; + //registry->put(id, resString, dir, visible); + SWLog::systemlog->LogInformation("instantiating %s ...", resString.getBuffer()); + registerTrans(id, resString, dir, status); + SWLog::systemlog->LogInformation("done."); + } + break; + case 0x61: // 'a' + // 'alias'; row[2]=createInstance argument + //registry->put(id, resString, TRUE); + break; + } + } + else SWLog::systemlog->LogError("Failed to get resString"); + } + else SWLog::systemlog->LogError("Failed to get row"); + ures_close(colBund); + } + } + else + { + SWLog::systemlog->LogError("no resource index to load"); + SWLog::systemlog->LogError("status %s", u_errorName(status)); + } + + ures_close(transIDs); + ures_close(bundle); + +#endif // _ICUSWORD_ +} + +void UTF8Transliterator::registerTrans(const UnicodeString& ID, const UnicodeString& resource, + UTransDirection dir, UErrorCode &status ) +{ +#ifndef _ICUSWORD_ + SWLog::systemlog->LogInformation("registering ID locally %s", ID.getBuffer()); + SWTransData swstuff; + swstuff.resource = resource; + swstuff.dir = dir; + SWTransPair swpair; + swpair.first = ID; + swpair.second = swstuff; + transMap.insert(swpair); +#endif +} + +bool UTF8Transliterator::checkTrans(const UnicodeString& ID, UErrorCode &status ) +{ +#ifndef _ICUSWORD_ + Transliterator *trans = Transliterator::createInstance(ID, UTRANS_FORWARD, status); + if (!U_FAILURE(status)) + { + // already have it, clean up and return true + SWLog::systemlog->LogInformation("already have it %s", ID.getBuffer()); + delete trans; + return true; + } + status = U_ZERO_ERROR; + + SWTransMap::iterator swelement; + if ((swelement = transMap.find(ID)) != transMap.end()) + { + SWLog::systemlog->LogInformation("found element in map"); + SWTransData swstuff = (*swelement).second; + UParseError parseError; + //UErrorCode status; + //std::cout << "unregistering " << ID << std::endl; + //Transliterator::unregister(ID); + SWLog::systemlog->LogInformation("resource is %s", swstuff.resource.getBuffer()); + + // Get the rules + //std::cout << "importing: " << ID << ", " << resource << std::endl; + SWCharString ch(swstuff.resource); + UResourceBundle *bundle = ures_openDirect(SW_RESDATA, ch, &status); + const UnicodeString rules = ures_getUnicodeStringByKey(bundle, SW_RB_RULE, &status); + ures_close(bundle); + //parser.parse(rules, isReverse ? UTRANS_REVERSE : UTRANS_FORWARD, + // parseError, status); + if (U_FAILURE(status)) { + SWLog::systemlog->LogError("Failed to get rules"); + SWLog::systemlog->LogError("status %s", u_errorName(status)); + return false; + } + + + Transliterator *trans = Transliterator::createFromRules(ID, rules, swstuff.dir, + parseError,status); + if (U_FAILURE(status)) { + SWLog::systemlog->LogError("Failed to create transliterator"); + SWLog::systemlog->LogError("status %s", u_errorName(status)); + SWLog::systemlog->LogError("Parse error: line %s", parseError.line); + SWLog::systemlog->LogError("Parse error: offset %d", parseError.offset); + SWLog::systemlog->LogError("Parse error: preContext %s", *parseError.preContext); + SWLog::systemlog->LogError("Parse error: postContext %s", *parseError.postContext); + SWLog::systemlog->LogError("rules were"); +// SWLog::systemlog->LogError((const char *)rules); + return false; + } + + Transliterator::registerInstance(trans); + return true; + + //Transliterator *trans = instantiateTrans(ID, swstuff.resource, swstuff.dir, parseError, status); + //return trans; + } + else + { + return false; + } +#else +return true; +#endif // _ICUSWORD_ +} + +bool UTF8Transliterator::addTrans(const char* newTrans, SWBuf* transList) { +#ifdef _ICUSWORD_ + UErrorCode status; + if (checkTrans(UnicodeString(newTrans), status)) { +#endif + *transList += newTrans; + *transList += ";"; + return true; +#ifdef _ICUSWORD_ + } + else { + return false; + } +#endif +} + +Transliterator * UTF8Transliterator::createTrans(const UnicodeString& ID, UTransDirection dir, UErrorCode &status ) +{ + Transliterator *trans = Transliterator::createInstance(ID,UTRANS_FORWARD,status); + if (U_FAILURE(status)) { + delete trans; + return NULL; + } + else { + return trans; + } } void UTF8Transliterator::setOptionValue(const char *ival) @@ -75,20 +324,16 @@ const char *UTF8Transliterator::getOptionValue() return (NUMTARGETSCRIPTS > option) ? optionstring[option] : 0; } -char UTF8Transliterator::ProcessText(char *text, int maxlen, const SWKey *key, const SWModule *module) +char UTF8Transliterator::processText(SWBuf &text, const SWKey *key, const SWModule *module) { if (option) { // if we want transliteration unsigned long i, j; UErrorCode err = U_ZERO_ERROR; UConverter * conv = NULL; conv = ucnv_open("UTF-8", &err); + SWBuf ID; bool compat = false; - bool noNFC = false; - - if (option == SE_JAMO) { - noNFC = true; - } // Convert UTF-8 string to UTF-16 (UChars) j = strlen(text); @@ -107,8 +352,9 @@ char UTF8Transliterator::ProcessText(char *text, int maxlen, const SWKey *key, c for (i = 0; i < len; i++) { j = ublock_getCode(source[i]); + scripts[SE_LATIN] = true; switch (j) { - case UBLOCK_BASIC_LATIN: scripts[SE_LATIN] = true; break; + //case UBLOCK_BASIC_LATIN: scripts[SE_LATIN] = true; break; case UBLOCK_GREEK: scripts[SE_GREEK] = true; break; case UBLOCK_HEBREW: scripts[SE_HEBREW] = true; break; case UBLOCK_CYRILLIC: scripts[SE_CYRILLIC] = true; break; @@ -132,8 +378,16 @@ char UTF8Transliterator::ProcessText(char *text, int maxlen, const SWKey *key, c case UBLOCK_ARMENIAN: scripts[SE_ARMENIAN] = true; break; case UBLOCK_ETHIOPIC: scripts[SE_ETHIOPIC] = true; break; case UBLOCK_GOTHIC: scripts[SE_GOTHIC] = true; break; - // needs Unicode 3.2? or 4.0? support from ICU - //case UBLOCK_UGARITIC: scripts[SE_UGARITIC] = true; break; + case UBLOCK_UGARITIC: scripts[SE_UGARITIC] = true; break; +// case UBLOCK_MEROITIC: scripts[SE_MEROITIC] = true; break; +// case UBLOCK_LINEARB: scripts[SE_LINEARB] = true; break; +// case UBLOCK_CYPRIOT: scripts[SE_CYPRIOT] = true; break; + case UBLOCK_RUNIC: scripts[SE_RUNIC] = true; break; + case UBLOCK_OGHAM: scripts[SE_OGHAM] = true; break; + case UBLOCK_THAANA: scripts[SE_THAANA] = true; break; +// case UBLOCK_GLAGOLITIC: scripts[SE_GLAGOLITIC] = true; break; +// case UBLOCK_TENGWAR: scripts[SE_TENGWAR] = true; break; +// case UBLOCK_CIRTH: scripts[SE_CIRTH] = true; break; case UBLOCK_CJK_RADICALS_SUPPLEMENT: case UBLOCK_KANGXI_RADICALS: case UBLOCK_IDEOGRAPHIC_DESCRIPTION_CHARACTERS: @@ -153,11 +407,11 @@ char UTF8Transliterator::ProcessText(char *text, int maxlen, const SWKey *key, c compat = true; break; - default: scripts[SE_LATIN] = true; + //default: scripts[SE_LATIN] = true; } - } + } scripts[option] = false; //turn off the reflexive transliteration - + //return if we have no transliteration to do for this text j = 0; for (i = 0; !j && i < NUMSCRIPTS; i++) { @@ -168,312 +422,468 @@ char UTF8Transliterator::ProcessText(char *text, int maxlen, const SWKey *key, c return 0; } - UnicodeString id; if (compat) { - id = UnicodeString("NFKD"); + addTrans("NFKD", &ID); } else { - id = UnicodeString("NFD"); + addTrans("NFD", &ID); } //Simple X to Latin transliterators if (scripts[SE_GREEK]) { - if (option == SE_BETA) - id += UnicodeString(";Greek-Beta"); - else if (option == SE_BGREEK) - id += UnicodeString(";Greek-BGreek"); + if (strnicmp (((SWModule*)module)->Lang(), "cop", 3)) { + if (option == SE_SBL) + addTrans("Greek-Latin/SBL", &ID); + else if (option == SE_TC) + addTrans("Greek-Latin/TC", &ID); + else if (option == SE_BETA) + addTrans("Greek-Latin/Beta", &ID); + else if (option == SE_BGREEK) + addTrans("Greek-Latin/BGreek", &ID); + else if (option == SE_UNGEGN) + addTrans("Greek-Latin/UNGEGN", &ID); + else if (option == SE_ISO) + addTrans("Greek-Latin/ISO", &ID); + else if (option == SE_ALALC) + addTrans("Greek-Latin/ALALC", &ID); + else if (option == SE_BGNPCGN) + addTrans("Greek-Latin/BGNPCGN", &ID); + else if (option == SE_IPA) + addTrans("Greek-IPA/Ancient", &ID); + else { + addTrans("Greek-Latin", &ID); + scripts[SE_LATIN] = true; + } + } else { - if (!strnicmp (((SWModule*)module)->Lang(), "cop", 3)) { - id += UnicodeString(";Coptic-Latin"); - } - else { - id += UnicodeString(";Greek-Latin"); - } - scripts[SE_LATIN] = true; + if (option == SE_SBL) + addTrans("Coptic-Latin/SBL", &ID); + else if (option == SE_TC) + addTrans("Coptic-Latin/TC", &ID); + else if (option == SE_BETA) + addTrans("Coptic-Latin/Beta", &ID); + else if (option == SE_IPA) + addTrans("Coptic-IPA", &ID); + else { + addTrans("Coptic-Latin", &ID); + scripts[SE_LATIN] = true; + } } } if (scripts[SE_HEBREW]) { - if (option == SE_BETA) - id += UnicodeString(";Hebrew-CCAT"); + if (option == SE_SBL) + addTrans("Hebrew-Latin/SBL", &ID); + else if (option == SE_TC) + addTrans("Hebrew-Latin/TC", &ID); + else if (option == SE_BETA) + addTrans("Hebrew-Latin/Beta", &ID); + else if (option == SE_UNGEGN) + addTrans("Hebrew-Latin/UNGEGN", &ID); + else if (option == SE_ALALC) + addTrans("Hebrew-Latin/ALALC", &ID); else if (option == SE_SYRIAC) - id += UnicodeString(";Hebrew-Syriac"); + addTrans("Hebrew-Syriac", &ID); else { - id += UnicodeString(";Hebrew-Latin"); - scripts[SE_LATIN] = true; + addTrans("Hebrew-Latin", &ID); + scripts[SE_LATIN] = true; } } if (scripts[SE_CYRILLIC]) { - id += UnicodeString(";Cyrillic-Latin"); - scripts[SE_LATIN] = true; + if (option == SE_GLAGOLITIC) + addTrans("Cyrillic-Glagolitic", &ID); + else { + addTrans("Cyrillic-Latin", &ID); + scripts[SE_LATIN] = true; + } } if (scripts[SE_ARABIC]) { - id += UnicodeString(";Arabic-Latin"); + addTrans("Arabic-Latin", &ID); scripts[SE_LATIN] = true; } if (scripts[SE_SYRIAC]) { - if (option == SE_BETA) - id += UnicodeString(";Syriac-CCAT"); + if (option == SE_TC) + addTrans("Syriac-Latin/TC", &ID); + else if (option == SE_BETA) + addTrans("Syriac-Latin/Beta", &ID); + else if (option == SE_HUGOYE) + addTrans("Syriac-Latin/Hugoye", &ID); else if (option == SE_HEBREW) - id += UnicodeString(";Syriac-Hebrew"); + addTrans("Syriac-Hebrew", &ID); else { - id += UnicodeString(";Syriac-Latin"); + addTrans("Syriac-Latin", &ID); scripts[SE_LATIN] = true; } } if (scripts[SE_THAI]) { - id += UnicodeString(";Thai-Latin"); + addTrans("Thai-Latin", &ID); scripts[SE_LATIN] = true; } if (scripts[SE_GEORGIAN]) { - id += UnicodeString(";Georgian-Latin"); - scripts[SE_LATIN] = true; + if (option == SE_ISO) + addTrans("Georgian-Latin/ISO", &ID); + else if (option == SE_ALALC) + addTrans("Georgian-Latin/ALALC", &ID); + else if (option == SE_BGNPCGN) + addTrans("Georgian-Latin/BGNPCGN", &ID); + else if (option == SE_IPA) + addTrans("Georgian-IPA", &ID); + else { + addTrans("Georgian-Latin", &ID); + scripts[SE_LATIN] = true; + } } if (scripts[SE_ARMENIAN]) { - id += UnicodeString(";Armenian-Latin"); - scripts[SE_LATIN] = true; - } + if (option == SE_ISO) + addTrans("Armenian-Latin/ISO", &ID); + else if (option == SE_ALALC) + addTrans("Armenian-Latin/ALALC", &ID); + else if (option == SE_BGNPCGN) + addTrans("Armenian-Latin/BGNPCGN", &ID); + else if (option == SE_IPA) + addTrans("Armenian-IPA", &ID); + else { + addTrans("Armenian-Latin", &ID); + scripts[SE_LATIN] = true; + } + } if (scripts[SE_ETHIOPIC]) { - id += UnicodeString(";Ethiopic-Latin"); - scripts[SE_LATIN] = true; + if (option == SE_UNGEGN) + addTrans("Ethiopic-Latin/UNGEGN", &ID); + else if (option == SE_ISO) + addTrans("Ethiopic-Latin/ISO", &ID); + else if (option == SE_ALALC) + addTrans("Ethiopic-Latin/ALALC", &ID); + else if (option == SE_SERA) + addTrans("Ethiopic-Latin/SERA", &ID); + else { + addTrans("Ethiopic-Latin", &ID); + scripts[SE_LATIN] = true; + } } if (scripts[SE_GOTHIC]) { - id += UnicodeString(";Gothic-Latin"); - scripts[SE_LATIN] = true; + if (option == SE_BASICLATIN) + addTrans("Gothic-Latin/Basic", &ID); + else if (option == SE_IPA) + addTrans("Gothic-IPA", &ID); + else { + addTrans("Gothic-Latin", &ID); + scripts[SE_LATIN] = true; + } } if (scripts[SE_UGARITIC]) { - id += UnicodeString(";Ugaritic-Latin"); + if (option == SE_SBL) + addTrans("Ugaritic-Latin/SBL", &ID); + else { + addTrans("Ugaritic-Latin", &ID); + scripts[SE_LATIN] = true; + } + } + if (scripts[SE_MEROITIC]) { + addTrans("Meroitic-Latin", &ID); scripts[SE_LATIN] = true; } - if (scripts[SE_HAN]) { - if (!strnicmp (((SWModule*)module)->Lang(), "ja", 2)) { - id += UnicodeString(";Kanji-OnRomaji"); - } - else { - id += UnicodeString(";Han-Pinyin"); - } + if (scripts[SE_LINEARB]) { + addTrans("LinearB-Latin", &ID); + scripts[SE_LATIN] = true; + } + if (scripts[SE_CYPRIOT]) { + addTrans("Cypriot-Latin", &ID); + scripts[SE_LATIN] = true; + } + if (scripts[SE_RUNIC]) { + addTrans("Runic-Latin", &ID); + scripts[SE_LATIN] = true; + } + if (scripts[SE_OGHAM]) { + addTrans("Ogham-Latin", &ID); + scripts[SE_LATIN] = true; + } + if (scripts[SE_THAANA]) { + if (option == SE_ALALC) + addTrans("Thaana-Latin/ALALC", &ID); + else if (option == SE_BGNPCGN) + addTrans("Thaana-Latin/BGNPCGN", &ID); + else { + addTrans("Thaana-Latin", &ID); + scripts[SE_LATIN] = true; + } + } + if (scripts[SE_GLAGOLITIC]) { + if (option == SE_ISO) + addTrans("Glagolitic-Latin/ISO", &ID); + else if (option == SE_ALALC) + addTrans("Glagolitic-Latin/ALALC", &ID); + else if (option == SE_ALALC) + addTrans("Glagolitic-Cyrillic", &ID); + else { + addTrans("Glagolitic-Latin", &ID); + scripts[SE_LATIN] = true; + } + } + if (scripts[SE_THAI]) { + addTrans("Thai-Latin", &ID); + scripts[SE_LATIN] = true; + } + if (scripts[SE_THAI]) { + addTrans("Thai-Latin", &ID); + scripts[SE_LATIN] = true; + } + + if (scripts[SE_HAN]) { + if (!strnicmp (((SWModule*)module)->Lang(), "ja", 2)) { + addTrans("Kanji-Romaji", &ID); + } + else { + addTrans("Han-Latin", &ID); + } scripts[SE_LATIN] = true; } // Inter-Kana and Kana to Latin transliterators if (option == SE_HIRAGANA && scripts[SE_KATAKANA]) { - id += UnicodeString(";Katakana-Hiragana"); + addTrans("Katakana-Hiragana", &ID); scripts[SE_HIRAGANA] = true; } else if (option == SE_KATAKANA && scripts[SE_HIRAGANA]) { - id += UnicodeString(";Hiragana-Katakana"); + addTrans("Hiragana-Katakana", &ID); scripts[SE_KATAKANA] = true; } else { if (scripts[SE_KATAKANA]) { - id += UnicodeString(";Katakana-Latin"); + addTrans("Katakana-Latin", &ID); scripts[SE_LATIN] = true; } if (scripts[SE_HIRAGANA]) { - id += UnicodeString(";Hiragana-Latin"); + addTrans("Hiragana-Latin", &ID); scripts[SE_LATIN] = true; } } - // Inter-Korean and Korean to Latin transliterators - if (option == SE_HANGUL && scripts[SE_JAMO]) { - noNFC = false; - scripts[SE_HANGUL] = true; - } - else if (option == SE_JAMO && scripts[SE_HANGUL]) { - noNFC = true; - scripts[SE_JAMO] = true; + // Korean to Latin transliterators + if (scripts[SE_HANGUL]) { + addTrans("Hangul-Latin", &ID); + scripts[SE_LATIN] = true; } - else { - if (scripts[SE_HANGUL]) { - id += UnicodeString(";Hangul-Latin"); - scripts[SE_LATIN] = true; - } - if (scripts[SE_JAMO]) { - id += UnicodeString(";Jamo-Latin"); - scripts[SE_LATIN] = true; - } + if (scripts[SE_JAMO]) { + addTrans("Jamo-Latin", &ID); + scripts[SE_LATIN] = true; } // Indic-Latin if (option < SE_DEVANAGARI || option > SE_MALAYALAM) { // Indic to Latin if (scripts[SE_TAMIL]) { - id += UnicodeString(";Tamil-Latin"); + addTrans("Tamil-Latin", &ID); scripts[SE_LATIN] = true; } if (scripts[SE_BENGALI]) { - id += UnicodeString(";Bengali-Latin"); + addTrans("Bengali-Latin", &ID); scripts[SE_LATIN] = true; } if (scripts[SE_GURMUKHI]) { - id += UnicodeString(";Gurmukhi-Latin"); + addTrans("Gurmukhi-Latin", &ID); scripts[SE_LATIN] = true; } if (scripts[SE_GUJARATI]) { - id += UnicodeString(";Gujarati-Latin"); + addTrans("Gujarati-Latin", &ID); scripts[SE_LATIN] = true; } if (scripts[SE_ORIYA]) { - id += UnicodeString(";Oriya-Latin"); + addTrans("Oriya-Latin", &ID); scripts[SE_LATIN] = true; } if (scripts[SE_TELUGU]) { - id += UnicodeString(";Telugu-Latin"); + addTrans("Telugu-Latin", &ID); scripts[SE_LATIN] = true; } if (scripts[SE_KANNADA]) { - id += UnicodeString(";Kannada-Latin"); + addTrans("Kannada-Latin", &ID); scripts[SE_LATIN] = true; } if (scripts[SE_MALAYALAM]) { - id += UnicodeString(";Malayalam-Latin"); + addTrans("Malayalam-Latin", &ID); scripts[SE_LATIN] = true; } } else { if (scripts[SE_LATIN]) { - id += UnicodeString(";Latin-InterIndic"); + addTrans("Latin-InterIndic", &ID); } if (scripts[SE_DEVANAGARI]) { - id += UnicodeString(";Devanagari-InterIndic"); + addTrans("Devanagari-InterIndic", &ID); } if (scripts[SE_TAMIL]) { - id += UnicodeString(";Tamil-InterIndic"); + addTrans("Tamil-InterIndic", &ID); } if (scripts[SE_BENGALI]) { - id += UnicodeString(";Bengali-InterIndic"); + addTrans("Bengali-InterIndic", &ID); } if (scripts[SE_GURMUKHI]) { - id += UnicodeString(";Gurmurkhi-InterIndic"); + addTrans("Gurmurkhi-InterIndic", &ID); } if (scripts[SE_GUJARATI]) { - id += UnicodeString(";Gujarati-InterIndic"); + addTrans("Gujarati-InterIndic", &ID); } if (scripts[SE_ORIYA]) { - id += UnicodeString(";Oriya-InterIndic"); + addTrans("Oriya-InterIndic", &ID); } if (scripts[SE_TELUGU]) { - id += UnicodeString(";Telugu-InterIndic"); + addTrans("Telugu-InterIndic", &ID); } if (scripts[SE_KANNADA]) { - id += UnicodeString(";Kannada-InterIndic"); + addTrans("Kannada-InterIndic", &ID); } if (scripts[SE_MALAYALAM]) { - id += UnicodeString(";Malayalam-InterIndic"); + addTrans("Malayalam-InterIndic", &ID); } switch(option) { case SE_DEVANAGARI: - id += UnicodeString(";InterIndic-Devanagari"); + addTrans("InterIndic-Devanagari", &ID); break; case SE_TAMIL: - id += UnicodeString(";InterIndic-Tamil"); + addTrans("InterIndic-Tamil", &ID); break; case SE_BENGALI: - id += UnicodeString(";InterIndic-Bengali"); + addTrans("InterIndic-Bengali", &ID); break; case SE_GURMUKHI: - id += UnicodeString(";InterIndic-Gurmukhi"); + addTrans("InterIndic-Gurmukhi", &ID); break; case SE_GUJARATI: - id += UnicodeString(";InterIndic-Gujarati"); + addTrans("InterIndic-Gujarati", &ID); break; case SE_ORIYA: - id += UnicodeString(";InterIndic-Oriya"); + addTrans("InterIndic-Oriya", &ID); break; case SE_TELUGU: - id += UnicodeString(";InterIndic-Telugu"); + addTrans("InterIndic-Telugu", &ID); break; case SE_KANNADA: - id += UnicodeString(";InterIndic-Kannada"); + addTrans("InterIndic-Kannada", &ID); break; case SE_MALAYALAM: - id += UnicodeString(";InterIndic-Malayalam"); + addTrans("InterIndic-Malayalam", &ID); break; default: - id += UnicodeString(";InterIndic-Latin"); + addTrans("InterIndic-Latin", &ID); scripts[SE_LATIN] = true; break; } } + if (scripts[SE_TENGWAR]) { + addTrans("Tengwar-Latin", &ID); + scripts[SE_LATIN] = true; + } + if (scripts[SE_CIRTH]) { + addTrans("Cirth-Latin", &ID); + scripts[SE_LATIN] = true; + } + if (scripts[SE_LATIN]) { switch (option) { case SE_GREEK: - id += UnicodeString(";Latin-Greek"); + addTrans("Latin-Greek", &ID); break; case SE_HEBREW: - id += UnicodeString(";Latin-Hebrew"); + addTrans("Latin-Hebrew", &ID); break; case SE_CYRILLIC: - id += UnicodeString(";Latin-Cyrillic"); + addTrans("Latin-Cyrillic", &ID); break; case SE_ARABIC: - id += UnicodeString(";Latin-Arabic"); + addTrans("Latin-Arabic", &ID); break; case SE_SYRIAC: - id += UnicodeString(";Latin-Syriac"); + addTrans("Latin-Syriac", &ID); break; case SE_THAI: - id += UnicodeString(";Latin-Thai"); + addTrans("Latin-Thai", &ID); break; case SE_GEORGIAN: - id += UnicodeString(";Latin-Georgian"); + addTrans("Latin-Georgian", &ID); break; case SE_ARMENIAN: - id += UnicodeString(";Latin-Armenian"); + addTrans("Latin-Armenian", &ID); break; case SE_ETHIOPIC: - id += UnicodeString(";Latin-Ethiopic"); + addTrans("Latin-Ethiopic", &ID); break; case SE_GOTHIC: - id += UnicodeString(";Latin-Gothic"); + addTrans("Latin-Gothic", &ID); break; case SE_UGARITIC: - id += UnicodeString(";Latin-Ugaritic"); + addTrans("Latin-Ugaritic", &ID); break; case SE_COPTIC: - id += UnicodeString(";Latin-Coptic"); + addTrans("Latin-Coptic", &ID); break; case SE_KATAKANA: - id += UnicodeString(";Latin-Katakana"); + addTrans("Latin-Katakana", &ID); break; case SE_HIRAGANA: - id += UnicodeString(";Latin-Hiragana"); + addTrans("Latin-Hiragana", &ID); break; case SE_JAMO: - id += UnicodeString(";Latin-Jamo"); + addTrans("Latin-Jamo", &ID); break; case SE_HANGUL: - id += UnicodeString(";Latin-Hangul"); + addTrans("Latin-Hangul", &ID); + break; + case SE_MEROITIC: + addTrans("Latin-Meroitic", &ID); + break; + case SE_LINEARB: + addTrans("Latin-LinearB", &ID); + break; + case SE_CYPRIOT: + addTrans("Latin-Cypriot", &ID); + break; + case SE_RUNIC: + addTrans("Latin-Runic", &ID); + break; + case SE_OGHAM: + addTrans("Latin-Ogham", &ID); + break; + case SE_THAANA: + addTrans("Latin-Thaana", &ID); + break; + case SE_GLAGOLITIC: + addTrans("Latin-Glagolitic", &ID); + break; + case SE_TENGWAR: + addTrans("Latin-Tengwar", &ID); + break; + case SE_CIRTH: + addTrans("Latin-Cirth", &ID); break; } } if (option == SE_BASICLATIN) { - id += UnicodeString(";Any-Latin1"); - } - - if (noNFC) { - id += UnicodeString(";NFD"); - } else { - id += UnicodeString(";NFC"); + addTrans("Any-Latin1", &ID); } - UParseError perr; + addTrans("NFC", &ID); err = U_ZERO_ERROR; - Transliterator * trans = Transliterator::createInstance(id, UTRANS_FORWARD, perr, err); - if (trans) { + Transliterator * trans = createTrans(UnicodeString(ID), UTRANS_FORWARD, err); + if (trans && !U_FAILURE(err)) { UnicodeString target = UnicodeString(source); - trans->transliterate(target); - len = ucnv_fromUChars(conv, text, maxlen, target.getBuffer(), target.length(), &err); - if (len < maxlen) *(text + len) = 0; - else *(text + maxlen) = 0; - delete trans; + trans->transliterate(target); + text.setSize(text.size()*2); + len = ucnv_fromUChars(conv, text.getRawData(), text.size(), target.getBuffer(), target.length(), &err); + text.setSize(len); + delete trans; } ucnv_close(conv); } return 0; } + +SWORD_NAMESPACE_END #endif + + + diff --git a/src/modules/filters/utf8utf16.cpp b/src/modules/filters/utf8utf16.cpp index 9aea6fe..2fddf4c 100644 --- a/src/modules/filters/utf8utf16.cpp +++ b/src/modules/filters/utf8utf16.cpp @@ -1,6 +1,6 @@ /****************************************************************************** * - * UTF8UTF16 - SWFilter decendant to convert UTF-8 to UTF-16 + * UTF8UTF16 - SWFilter descendant to convert UTF-8 to UTF-16 * */ @@ -9,71 +9,70 @@ #include <utf8utf16.h> +SWORD_NAMESPACE_START + UTF8UTF16::UTF8UTF16() { } -char UTF8UTF16::ProcessText(char *text, int maxlen, const SWKey *key, const SWModule *module) -{ - unsigned char *from; - unsigned short *to; +char UTF8UTF16::processText(SWBuf &text, const SWKey *key, const SWModule *module) { + const unsigned char *from; - int len; - unsigned long uchar; - unsigned char significantFirstBits, subsequent; - unsigned short schar; - - len = strlen(text) + 1; // shift string to right of buffer - if (len < maxlen) { - memmove(&text[maxlen - len], text, len); - from = (unsigned char*)&text[maxlen - len]; - } - else - from = (unsigned char*)text; + int len; + unsigned long uchar, uchars[10]; + unsigned char significantFirstBits, subsequent; + unsigned short schar; + if ((unsigned long)key < 2) // hack, we're en(1)/de(0)ciphering + return -1; + - // ------------------------------- - - for (to = (unsigned short*)text; *from; from++) { - uchar = 0; - if ((*from & 128) != 128) { - // if (*from != ' ') - uchar = *from; - } - else if ((*from & 128) && ((*from & 64) != 64)) { - // error, do nothing - continue; - } - else { - *from <<= 1; - for (subsequent = 1; (*from & 128); subsequent++) { - *from <<= 1; - from[subsequent] &= 63; - uchar <<= 6; - uchar |= from[subsequent]; - } - subsequent--; - *from <<=1; - significantFirstBits = 8 - (2+subsequent); - - uchar |= (((short)*from) << (((6*subsequent)+significantFirstBits)-8)); - from += subsequent; - } + SWBuf orig = text; + from = (const unsigned char *)orig.c_str(); - if (uchar < 0x1ffff) { - *to++ = (unsigned short)uchar; - } - else { - uchar -= 0x10000; - schar = 0xD800 | (uchar & 0x03ff); - uchar >>= 10; - uchar |= 0xDC00; - *to++ = (unsigned short)schar; - *to++ = (unsigned short)uchar; - } - } - *to = (unsigned short)0; + for (text = ""; *from; from++) { + uchar = 0; + if ((*from & 128) != 128) { + //if (*from != ' ') + uchar = *from; + } + else if ((*from & 128) && ((*from & 64) != 64)) { + // error, do nothing + continue; + } + else { + uchars[0] = *from; + uchars[0] <<= 1; + for (subsequent = 1; (uchars[0] & 128) && (subsequent < 10); subsequent++) { + uchars[0] <<= 1; + uchars[subsequent] = from[subsequent]; + uchars[subsequent] &= 63; + uchar <<= 6; + uchar |= uchars[subsequent]; + } + subsequent--; + uchars[0] <<=1; + significantFirstBits = 8 - (2+subsequent); + + uchar |= (((short)uchars[0]) << (((6*subsequent)+significantFirstBits)-8)); + from += subsequent; + } - return 0; + if (uchar < 0x1ffff) { + text.setSize(text.size()+2); + *((unsigned short *)(text.getRawData()+(text.size()-2))) = (unsigned short)uchar; + } + else { + uchar -= 0x10000; + schar = 0xD800 | (uchar & 0x03ff); + uchar >>= 10; + uchar |= 0xDC00; + text.setSize(text.size()+4); + *((unsigned short *)(text.getRawData()+(text.size()-4))) = (unsigned short)schar; + *((unsigned short *)(text.getRawData()+(text.size()-2))) = (unsigned short)uchar; + } + } + return 0; } +SWORD_NAMESPACE_END |