From 03134fa5f6f25d92724ce4c183f9bbe12a9e37dc Mon Sep 17 00:00:00 2001 From: "Roberto C. Sanchez" Date: Sat, 29 Mar 2014 10:53:59 -0400 Subject: Imported Upstream version 1.5.11 --- src/modules/filters/Makefile | 5 + src/modules/filters/Makefile.am | 105 +++ src/modules/filters/cipherfil.cpp | 46 ++ src/modules/filters/gbffootnotes.cpp | 193 ++++++ src/modules/filters/gbfheadings.cpp | 87 +++ src/modules/filters/gbfhtml.cpp | 181 ++++++ src/modules/filters/gbfhtmlhref.cpp | 288 +++++++++ src/modules/filters/gbfmorph.cpp | 77 +++ src/modules/filters/gbfosis.cpp | 420 ++++++++++++ src/modules/filters/gbfplain.cpp | 97 +++ src/modules/filters/gbfredletterwords.cpp | 93 +++ src/modules/filters/gbfrtf.cpp | 311 +++++++++ src/modules/filters/gbfstrongs.cpp | 126 ++++ src/modules/filters/gbfthml.cpp | 216 +++++++ src/modules/filters/gbfwebif.cpp | 191 ++++++ src/modules/filters/gbfwordjs.cpp | 282 ++++++++ src/modules/filters/greeklexattribs.cpp | 101 +++ src/modules/filters/latin1utf16.cpp | 119 ++++ src/modules/filters/latin1utf8.cpp | 173 +++++ src/modules/filters/osisfootnotes.cpp | 157 +++++ src/modules/filters/osisheadings.cpp | 144 +++++ src/modules/filters/osishtmlhref.cpp | 561 ++++++++++++++++ src/modules/filters/osislemma.cpp | 85 +++ src/modules/filters/osismorph.cpp | 85 +++ src/modules/filters/osismorphsegmentation.cpp | 106 +++ src/modules/filters/osisosis.cpp | 173 +++++ src/modules/filters/osisplain.cpp | 192 ++++++ src/modules/filters/osisredletterwords.cpp | 85 +++ src/modules/filters/osisrtf.cpp | 520 +++++++++++++++ src/modules/filters/osisscripref.cpp | 100 +++ src/modules/filters/osisstrongs.cpp | 257 ++++++++ src/modules/filters/osisvariants.cpp | 118 ++++ src/modules/filters/osiswebif.cpp | 198 ++++++ src/modules/filters/osiswordjs.cpp | 178 ++++++ src/modules/filters/papyriplain.cpp | 71 ++ src/modules/filters/plainfootnotes.cpp | 79 +++ src/modules/filters/plainhtml.cpp | 83 +++ src/modules/filters/rtfhtml.cpp | 81 +++ src/modules/filters/scsuutf8.cpp | 226 +++++++ src/modules/filters/swbasicfilter.cpp | 406 ++++++++++++ src/modules/filters/swoptfilter.cpp | 47 ++ src/modules/filters/teihtmlhref.cpp | 205 ++++++ src/modules/filters/teiplain.cpp | 116 ++++ src/modules/filters/teirtf.cpp | 182 ++++++ src/modules/filters/thmlfootnotes.cpp | 124 ++++ src/modules/filters/thmlgbf.cpp | 291 +++++++++ src/modules/filters/thmlheadings.cpp | 153 +++++ src/modules/filters/thmlhtml.cpp | 236 +++++++ src/modules/filters/thmlhtmlhref.cpp | 357 +++++++++++ src/modules/filters/thmllemma.cpp | 65 ++ src/modules/filters/thmlmorph.cpp | 65 ++ src/modules/filters/thmlosis.cpp | 575 +++++++++++++++++ src/modules/filters/thmlplain.cpp | 219 +++++++ src/modules/filters/thmlrtf.cpp | 346 ++++++++++ src/modules/filters/thmlscripref.cpp | 123 ++++ src/modules/filters/thmlstrongs.cpp | 146 +++++ src/modules/filters/thmlvariants.cpp | 118 ++++ src/modules/filters/thmlwebif.cpp | 103 +++ src/modules/filters/thmlwordjs.cpp | 296 +++++++++ src/modules/filters/unicodertf.cpp | 87 +++ src/modules/filters/utf16utf8.cpp | 90 +++ src/modules/filters/utf8arshaping.cpp | 51 ++ src/modules/filters/utf8bidireorder.cpp | 60 ++ src/modules/filters/utf8cantillation.cpp | 55 ++ src/modules/filters/utf8greekaccents.cpp | 261 ++++++++ src/modules/filters/utf8hebrewpoints.cpp | 44 ++ src/modules/filters/utf8html.cpp | 70 ++ src/modules/filters/utf8latin1.cpp | 75 +++ src/modules/filters/utf8nfc.cpp | 50 ++ src/modules/filters/utf8nfkd.cpp | 52 ++ src/modules/filters/utf8transliterator.cpp | 888 ++++++++++++++++++++++++++ src/modules/filters/utf8utf16.cpp | 78 +++ 72 files changed, 12644 insertions(+) create mode 100644 src/modules/filters/Makefile create mode 100644 src/modules/filters/Makefile.am create mode 100644 src/modules/filters/cipherfil.cpp create mode 100644 src/modules/filters/gbffootnotes.cpp create mode 100644 src/modules/filters/gbfheadings.cpp create mode 100644 src/modules/filters/gbfhtml.cpp create mode 100644 src/modules/filters/gbfhtmlhref.cpp create mode 100644 src/modules/filters/gbfmorph.cpp create mode 100644 src/modules/filters/gbfosis.cpp create mode 100644 src/modules/filters/gbfplain.cpp create mode 100644 src/modules/filters/gbfredletterwords.cpp create mode 100644 src/modules/filters/gbfrtf.cpp create mode 100644 src/modules/filters/gbfstrongs.cpp create mode 100644 src/modules/filters/gbfthml.cpp create mode 100644 src/modules/filters/gbfwebif.cpp create mode 100644 src/modules/filters/gbfwordjs.cpp create mode 100644 src/modules/filters/greeklexattribs.cpp create mode 100644 src/modules/filters/latin1utf16.cpp create mode 100644 src/modules/filters/latin1utf8.cpp create mode 100644 src/modules/filters/osisfootnotes.cpp create mode 100644 src/modules/filters/osisheadings.cpp create mode 100644 src/modules/filters/osishtmlhref.cpp create mode 100644 src/modules/filters/osislemma.cpp create mode 100644 src/modules/filters/osismorph.cpp create mode 100644 src/modules/filters/osismorphsegmentation.cpp create mode 100644 src/modules/filters/osisosis.cpp create mode 100644 src/modules/filters/osisplain.cpp create mode 100644 src/modules/filters/osisredletterwords.cpp create mode 100644 src/modules/filters/osisrtf.cpp create mode 100644 src/modules/filters/osisscripref.cpp create mode 100644 src/modules/filters/osisstrongs.cpp create mode 100644 src/modules/filters/osisvariants.cpp create mode 100644 src/modules/filters/osiswebif.cpp create mode 100644 src/modules/filters/osiswordjs.cpp create mode 100644 src/modules/filters/papyriplain.cpp create mode 100644 src/modules/filters/plainfootnotes.cpp create mode 100644 src/modules/filters/plainhtml.cpp create mode 100644 src/modules/filters/rtfhtml.cpp create mode 100644 src/modules/filters/scsuutf8.cpp create mode 100644 src/modules/filters/swbasicfilter.cpp create mode 100644 src/modules/filters/swoptfilter.cpp create mode 100644 src/modules/filters/teihtmlhref.cpp create mode 100644 src/modules/filters/teiplain.cpp create mode 100644 src/modules/filters/teirtf.cpp create mode 100644 src/modules/filters/thmlfootnotes.cpp create mode 100644 src/modules/filters/thmlgbf.cpp create mode 100644 src/modules/filters/thmlheadings.cpp create mode 100644 src/modules/filters/thmlhtml.cpp create mode 100644 src/modules/filters/thmlhtmlhref.cpp create mode 100644 src/modules/filters/thmllemma.cpp create mode 100644 src/modules/filters/thmlmorph.cpp create mode 100644 src/modules/filters/thmlosis.cpp create mode 100644 src/modules/filters/thmlplain.cpp create mode 100644 src/modules/filters/thmlrtf.cpp create mode 100644 src/modules/filters/thmlscripref.cpp create mode 100644 src/modules/filters/thmlstrongs.cpp create mode 100644 src/modules/filters/thmlvariants.cpp create mode 100644 src/modules/filters/thmlwebif.cpp create mode 100644 src/modules/filters/thmlwordjs.cpp create mode 100644 src/modules/filters/unicodertf.cpp create mode 100644 src/modules/filters/utf16utf8.cpp create mode 100644 src/modules/filters/utf8arshaping.cpp create mode 100644 src/modules/filters/utf8bidireorder.cpp create mode 100644 src/modules/filters/utf8cantillation.cpp create mode 100644 src/modules/filters/utf8greekaccents.cpp create mode 100644 src/modules/filters/utf8hebrewpoints.cpp create mode 100644 src/modules/filters/utf8html.cpp create mode 100644 src/modules/filters/utf8latin1.cpp create mode 100644 src/modules/filters/utf8nfc.cpp create mode 100644 src/modules/filters/utf8nfkd.cpp create mode 100644 src/modules/filters/utf8transliterator.cpp create mode 100644 src/modules/filters/utf8utf16.cpp (limited to 'src/modules/filters') diff --git a/src/modules/filters/Makefile b/src/modules/filters/Makefile new file mode 100644 index 0000000..1a2d00d --- /dev/null +++ b/src/modules/filters/Makefile @@ -0,0 +1,5 @@ + +root := ../../.. + +all: + make -C ${root} diff --git a/src/modules/filters/Makefile.am b/src/modules/filters/Makefile.am new file mode 100644 index 0000000..7092c73 --- /dev/null +++ b/src/modules/filters/Makefile.am @@ -0,0 +1,105 @@ +filtersdir = $(top_srcdir)/src/modules/filters + +libsword_la_SOURCES += $(filtersdir)/swbasicfilter.cpp +libsword_la_SOURCES += $(filtersdir)/swoptfilter.cpp + +GBFFIL = $(filtersdir)/gbfhtml.cpp +GBFFIL += $(filtersdir)/gbfhtmlhref.cpp +GBFFIL += $(filtersdir)/gbfwebif.cpp +GBFFIL += $(filtersdir)/gbfplain.cpp +GBFFIL += $(filtersdir)/gbfrtf.cpp +GBFFIL += $(filtersdir)/gbfstrongs.cpp +GBFFIL += $(filtersdir)/gbffootnotes.cpp +GBFFIL += $(filtersdir)/gbfheadings.cpp +GBFFIL += $(filtersdir)/gbfredletterwords.cpp +GBFFIL += $(filtersdir)/gbfmorph.cpp +GBFFIL += $(filtersdir)/gbfwordjs.cpp + +THMLFIL = $(filtersdir)/thmlstrongs.cpp +THMLFIL += $(filtersdir)/thmlfootnotes.cpp +THMLFIL += $(filtersdir)/thmlheadings.cpp +THMLFIL += $(filtersdir)/thmlmorph.cpp +THMLFIL += $(filtersdir)/thmllemma.cpp +THMLFIL += $(filtersdir)/thmlscripref.cpp +THMLFIL += $(filtersdir)/thmlvariants.cpp +THMLFIL += $(filtersdir)/thmlgbf.cpp +THMLFIL += $(filtersdir)/thmlrtf.cpp +THMLFIL += $(filtersdir)/thmlhtml.cpp +THMLFIL += $(filtersdir)/thmlhtmlhref.cpp +THMLFIL += $(filtersdir)/thmlwebif.cpp +THMLFIL += $(filtersdir)/thmlwordjs.cpp + +TEIFIL = $(filtersdir)/teiplain.cpp +TEIFIL += $(filtersdir)/teirtf.cpp +TEIFIL += $(filtersdir)/teihtmlhref.cpp + +CONVFIL = $(filtersdir)/gbfthml.cpp +CONVFIL += $(filtersdir)/gbfosis.cpp +CONVFIL += $(filtersdir)/thmlosis.cpp +CONVFIL += $(filtersdir)/thmlplain.cpp +CONVFIL += $(filtersdir)/osisosis.cpp + +OSISFIL = $(filtersdir)/osisheadings.cpp +OSISFIL += $(filtersdir)/osisfootnotes.cpp +OSISFIL += $(filtersdir)/osishtmlhref.cpp +OSISFIL += $(filtersdir)/osiswebif.cpp +OSISFIL += $(filtersdir)/osismorph.cpp +OSISFIL += $(filtersdir)/osisstrongs.cpp +OSISFIL += $(filtersdir)/osisplain.cpp +OSISFIL += $(filtersdir)/osisrtf.cpp +OSISFIL += $(filtersdir)/osislemma.cpp +OSISFIL += $(filtersdir)/osisredletterwords.cpp +OSISFIL += $(filtersdir)/osisscripref.cpp +OSISFIL += $(filtersdir)/osisvariants.cpp +OSISFIL += $(filtersdir)/osiswordjs.cpp +OSISFIL += $(filtersdir)/osismorphsegmentation.cpp + +libsword_la_SOURCES += $(filtersdir)/latin1utf8.cpp +libsword_la_SOURCES += $(filtersdir)/latin1utf16.cpp +libsword_la_SOURCES += $(filtersdir)/utf8utf16.cpp +libsword_la_SOURCES += $(filtersdir)/utf16utf8.cpp +libsword_la_SOURCES += $(filtersdir)/scsuutf8.cpp +libsword_la_SOURCES += $(filtersdir)/utf8html.cpp +libsword_la_SOURCES += $(filtersdir)/utf8latin1.cpp + +libsword_la_SOURCES += $(filtersdir)/utf8cantillation.cpp +libsword_la_SOURCES += $(filtersdir)/utf8hebrewpoints.cpp +libsword_la_SOURCES += $(filtersdir)/utf8greekaccents.cpp + +libsword_la_SOURCES += $(filtersdir)/cipherfil.cpp + +PLFIL = $(filtersdir)/rtfhtml.cpp +PLFIL += $(filtersdir)/plainfootnotes.cpp +PLFIL += $(filtersdir)/plainhtml.cpp +PLFIL += $(filtersdir)/greeklexattribs.cpp +PLFIL += $(filtersdir)/unicodertf.cpp +PLFIL += $(filtersdir)/papyriplain.cpp + + +SWICUSRC = $(filtersdir)/utf8transliterator.cpp +SWICUSRC += $(filtersdir)/utf8nfc.cpp +SWICUSRC += $(filtersdir)/utf8nfkd.cpp +SWICUSRC += $(filtersdir)/utf8arshaping.cpp +SWICUSRC += $(filtersdir)/utf8bidireorder.cpp + +if ICU +ICUDEFS = -D_ICU_ +DISTSWICUSRC = +SWREALICUSRC = $(SWICUSRC) +else +if ICUSWORD +ICUDEFS = -D_ICU_ -D_ICUSWORD_ +DISTSWICUSRC = +SWREALICUSRC = $(SWICUSRC) +else +DISTSWICUSRC = $(SWICUSRC) +SWREALICUSRC = +endif +endif + +AM_CPPFLAGS += $(ICUDEFS) +libsword_la_SOURCES += $(SWREALICUSRC) +EXTRA_DIST = $(DISTSWICUSRC) + +libsword_la_SOURCES += $(OSISFIL) $(GBFFIL) \ + $(THMLFIL) $(CONVFIL) $(PLFIL) $(TEIFIL) diff --git a/src/modules/filters/cipherfil.cpp b/src/modules/filters/cipherfil.cpp new file mode 100644 index 0000000..24c665e --- /dev/null +++ b/src/modules/filters/cipherfil.cpp @@ -0,0 +1,46 @@ +/****************************************************************************** + * + * cipherfil - SWFilter descendant to decipher a module + */ + + +#include +#include +#include +#include + +SWORD_NAMESPACE_START + +CipherFilter::CipherFilter(const char *key) { + cipher = new SWCipher((unsigned char *)key); +} + + +CipherFilter::~CipherFilter() { + delete cipher; +} + + +SWCipher *CipherFilter::getCipher() { + return cipher; +} + + +char CipherFilter::processText(SWBuf &text, const SWKey *key, const SWModule *module) { + if (text.length() > 2) { //check if it's large enough to substract 2 in the next step. + unsigned long len = text.length(); + if (!key) { // hack, using key to determine encipher, or decipher + cipher->cipherBuf(&len, text.getRawData()); //set buffer to enciphered text + memcpy(text.getRawData(), cipher->Buf(), len); +// text = cipher->Buf(); //get the deciphered buffer + } + else if ((unsigned long)key == 1) { + cipher->Buf(text.getRawData(), len); + memcpy(text.getRawData(), cipher->cipherBuf(&len), len); +// text = cipher->cipherBuf(&len); + } + } + return 0; +} + +SWORD_NAMESPACE_END diff --git a/src/modules/filters/gbffootnotes.cpp b/src/modules/filters/gbffootnotes.cpp new file mode 100644 index 0000000..bef29b8 --- /dev/null +++ b/src/modules/filters/gbffootnotes.cpp @@ -0,0 +1,193 @@ +/****************************************************************************** + * + * gbffootnotes - SWFilter descendant to hide or show footnotes + * in a GBF module. + */ + + +#include +#include +#include +#include +#include +#include +#include + +SWORD_NAMESPACE_START + +const char oName[] = "Footnotes"; +const char oTip[] = "Toggles Footnotes On and Off if they exist"; + +const SWBuf choices[3] = {"Off", "On", ""}; +const StringList oValues(&choices[0], &choices[2]); + + +GBFFootnotes::GBFFootnotes() : SWOptionFilter(oName, oTip, &oValues) { + setOptionValue("Off"); +} + + +GBFFootnotes::~GBFFootnotes() { +} + + +char GBFFootnotes::processText (SWBuf &text, const SWKey *key, const SWModule *module) +{ + + SWBuf token; + bool intoken = false; + bool hide = false; + SWBuf tagText; + XMLTag startTag; + SWBuf refs = ""; + int footnoteNum = 1; + char buf[254]; + VerseKey parser = key->getText(); + + SWBuf orig = text; + const char *from = orig.c_str(); + + //XMLTag tag; + + for (text = ""; *from; from++) { + if (*from == '<') { + intoken = true; + token = ""; + continue; + } + if (*from == '>') { // process tokens + intoken = false; + + //XMLTag tag(token); + if (!strncmp(token, "RF",2)) { +// tag = token; + + refs = ""; + startTag = token; + hide = true; + tagText = ""; + continue; + } + else if (!strncmp(token, "Rf",2)) { + if (module->isProcessEntryAttributes()) { + //tag = token; + + if((tagText.length() == 1) || !strcmp(module->Name(), "IGNT")) { + if (option) { // for ASV marks text in verse then put explanation at end of verse + text.append(" ["); + text.append(tagText); + text.append("]"); + hide = false; + continue; + } + } + SWBuf fc = module->getEntryAttributes()["Footnote"]["count"]["value"]; + footnoteNum = (fc.length()) ? atoi(fc.c_str()) : 0; + sprintf(buf, "%i", ++footnoteNum); + module->getEntryAttributes()["Footnote"]["count"]["value"] = buf; + StringList attributes = startTag.getAttributeNames(); + for (StringList::const_iterator it = attributes.begin(); it != attributes.end(); it++) { + module->getEntryAttributes()["Footnote"][buf][it->c_str()] = startTag.getAttribute(it->c_str()); + } + module->getEntryAttributes()["Footnote"][buf]["body"] = tagText; + startTag.setAttribute("swordFootnote", buf); + } + hide = false; + if (option) { + text.append(startTag); + text.append(tagText); + } + else continue; + } + if (!hide) { + text.append('<'); + text.append(token); + text.append('>'); + } + else { + tagText.append('<'); + tagText.append(token); + tagText.append('>'); + } + continue; + } + if (intoken) { //copy token + token.append(*from); + } + else if (!hide) { //copy text which is not inside a token + text.append(*from); + } + else tagText.append(*from); + } + return 0; + + /* + if (!option) { // if we don't want footnotes + char token[4096]; // cheese. Fix. + int tokpos = 0; + bool intoken = false; + int len; + bool hide = false; + + const char *from; + SWBuf orig = text; + from = orig.c_str(); + for (text = ""; *from; from++) { + if (*from == '<') { + intoken = true; + tokpos = 0; +// memset(token, 0, 4096); + token[0] = 0; + token[1] = 0; + token[2] = 0; + continue; + } + if (*from == '>') { // process tokens + intoken = false; + switch (*token) { + case 'R': // Reference + switch(token[1]) { + case 'F': // Begin footnote + hide = true; + break; + case 'f': // end footnote + hide = false; + break; + } + continue; // skip token + case 'W': + if (token[1] == 'T') { + switch (token[2]) { + case 'P': + case 'S': + case 'A': + continue; // remove this token + default: + break; + } + } + } + // if not a footnote token, keep token in text + if (!hide) { + text += '<'; + text += token; + text += '>'; + } + continue; + } + if (intoken) { + if (tokpos < 4090) + token[tokpos++] = *from; + token[tokpos+2] = 0; // +2 cuz we init token with 2 extra '0' because of switch statement + } + else { + if (!hide) { + text += *from; + } + } + } + } + return 0;*/ +} + +SWORD_NAMESPACE_END diff --git a/src/modules/filters/gbfheadings.cpp b/src/modules/filters/gbfheadings.cpp new file mode 100644 index 0000000..81a4d94 --- /dev/null +++ b/src/modules/filters/gbfheadings.cpp @@ -0,0 +1,87 @@ +/****************************************************************************** + * + * gbfheadings - SWFilter descendant to hide or show headings + * in a GBF module. + */ + + +#include +#include + +SWORD_NAMESPACE_START + + +const char oName[] = "Headings"; +const char oTip[] = "Toggles Headings On and Off if they exist"; + +const SWBuf choices[3] = {"Off", "On", ""}; +const StringList oValues(&choices[0], &choices[2]); + +GBFHeadings::GBFHeadings() : SWOptionFilter(oName, oTip, &oValues) { + setOptionValue("Off"); +} + + +GBFHeadings::~GBFHeadings() { +} + + +char GBFHeadings::processText (SWBuf &text, const SWKey *key, const SWModule *module) { + if (!option) { // if we don't want headings + char token[2048]; // cheese. Fix. + int tokpos = 0; + bool intoken = false; + bool hide = false; + + const char *from; + SWBuf orig = text; + from = orig.c_str(); + for (text = ""; *from; from++) { + if (*from == '<') { + intoken = true; + tokpos = 0; +// memset(token, 0, 2048); + token[0] = 0; + token[1] = 0; + token[2] = 0; + continue; + } + if (*from == '>') { // process tokens + intoken = false; + switch (*token) { + case 'T': // Reference + switch(token[1]) { + case 'S': // Begin heading + hide = true; + break; + case 's': // end heading + hide = false; + break; + } + continue; // skip token + } + // if not a heading token, keep token in text + if (!hide) { + text += '<'; + for (char *tok = token; *tok; tok++) + text += *tok; + text += '>'; + } + continue; + } + if (intoken) { + if (tokpos < 2045) + token[tokpos++] = *from; + token[tokpos+2] = 0; + } + else { + if (!hide) { + text += *from; + } + } + } + } + return 0; +} + +SWORD_NAMESPACE_END diff --git a/src/modules/filters/gbfhtml.cpp b/src/modules/filters/gbfhtml.cpp new file mode 100644 index 0000000..a9d8434 --- /dev/null +++ b/src/modules/filters/gbfhtml.cpp @@ -0,0 +1,181 @@ +/*************************************************************************** + gbfhtml.cpp - GBF to HTML filter + ------------------- + begin : 2001-09-03 + copyright : 2001 by CrossWire Bible Society + ***************************************************************************/ + +/*************************************************************************** + * * + * This program is free software; you can redistribute it and/or modify * + * it under the terms of the GNU General Public License as published by * + * the Free Software Foundation; either version 2 of the License, or * + * (at your option) any later version. * + * * + ***************************************************************************/ + +#include +#include +#include + +SWORD_NAMESPACE_START + +GBFHTML::GBFHTML() { + setTokenStart("<"); + setTokenEnd(">"); + + setTokenCaseSensitive(true); + + addTokenSubstitute("Rf", ")"); + addTokenSubstitute("Rx", ""); + addTokenSubstitute("FI", ""); // italics begin + addTokenSubstitute("Fi", ""); + addTokenSubstitute("FB", ""); // bold begin + addTokenSubstitute("Fb", ""); + addTokenSubstitute("FR", ""); // words of Jesus begin + addTokenSubstitute("Fr", ""); + addTokenSubstitute("FU", ""); // underline begin + addTokenSubstitute("Fu", ""); + addTokenSubstitute("FO", ""); // Old Testament quote begin + addTokenSubstitute("Fo", ""); + addTokenSubstitute("FS", ""); // Superscript begin// Subscript begin + addTokenSubstitute("Fs", ""); + addTokenSubstitute("FV", ""); // Subscript begin + addTokenSubstitute("Fv", ""); + addTokenSubstitute("TT", ""); // Book title begin + addTokenSubstitute("Tt", ""); + addTokenSubstitute("PP", ""); // poetry begin + addTokenSubstitute("Pp", ""); + addTokenSubstitute("Fn", ""); // font end + addTokenSubstitute("CL", "
"); // new line + addTokenSubstitute("CM", "
"); // paragraph is a non showing comment that can be changed in the front end to

if desired + addTokenSubstitute("CG", ""); // ??? + addTokenSubstitute("CT", ""); // ??? + addTokenSubstitute("JR", "

"); // right align begin + addTokenSubstitute("JC", "
"); // center align begin + addTokenSubstitute("JL", "
"); // align end + +} + + +bool GBFHTML::handleToken(SWBuf &buf, const char *token, BasicFilterUserData *userData) { + const char *tok; + char val[128]; + char *valto; + const char *num; + MyUserData *u = (MyUserData *)userData; + + if (!substituteToken(buf, token)) { + // deal with OSIS note tags. Just hide till OSISRTF + if (!strncmp(token, "note ", 5)) { + // let's stop text from going to output + u->suspendTextPassThru = true; + } + + else if (!strncmp(token, "/note", 5)) { + u->suspendTextPassThru = false; + } + + else if (!strncmp(token, "w", 1)) { + // OSIS Word (temporary until OSISRTF is done) + valto = val; + num = strstr(token, "lemma=\"x-Strongs:"); + if (num) { + for (num+=17; ((*num) && (*num != '\"')); num++) + *valto++ = *num; + *valto = 0; + if (atoi((!isdigit(*val))?val+1:val) < 5627) { + buf += " <"; + for (tok = (!isdigit(*val))?val+1:val; *tok; tok++) + buf += *tok; + buf += "> "; + } + } else { + num = strstr(token, "lemma=\"strong:"); + if (num) { + for (num+=14; ((*num) && (*num != '\"')); num++) + *valto++ = *num; + *valto = 0; + if (atoi((!isdigit(*val))?val+1:val) < 5627) { + buf += " <"; + for (tok = (!isdigit(*val))?val+1:val; *tok; tok++) + buf += *tok; + buf += "> "; + } + } + } + valto = val; + num = strstr(token, "morph=\"x-Robinson:"); + if (num) { + for (num+=18; ((*num) && (*num != '\"')); num++) + *valto++ = *num; + *valto = 0; + // normal robinsons tense + buf += " ("; + for (tok = val; *tok; tok++) + buf += *tok; + buf += ") "; + } + } + + else if (!strncmp(token, "WG", 2) || !strncmp(token, "WH", 2)) { // strong's numbers + buf += " <"; + for (tok = token + 2; *tok; tok++) + buf += *tok; + buf += "> "; + } + + else if (!strncmp(token, "WTG", 3) || !strncmp(token, "WTH", 3)) { // strong's numbers tense + buf += " <"; + for (tok = token + 3; *tok; tok++) + if(*tok != '\"') + buf += *tok; + buf += ") "; + } + + else if (!strncmp(token, "RX", 2)) { + buf += ""; + for (tok = token + 3; *tok; tok++) { + if(*tok != '<' && *tok+1 != 'R' && *tok+2 != 'x') { + buf += *tok; + } + else { + break; + } + } + buf += ""; + } + + else if (!strncmp(token, "RB", 2)) { + buf += ""; + u->hasFootnotePreTag = true; + } + + else if (!strncmp(token, "RF", 2)) { + if (u->hasFootnotePreTag) { + u->hasFootnotePreTag = false; + buf += " "; + } + buf += " ("; + } + + else if (!strncmp(token, "FN", 2)) { + buf += ""; + } + + else if (!strncmp(token, "CA", 2)) { // ASCII value + buf += (char)atoi(&token[2]); + } + + else { + return false; + } + } + return true; +} + +SWORD_NAMESPACE_END diff --git a/src/modules/filters/gbfhtmlhref.cpp b/src/modules/filters/gbfhtmlhref.cpp new file mode 100644 index 0000000..7f1c254 --- /dev/null +++ b/src/modules/filters/gbfhtmlhref.cpp @@ -0,0 +1,288 @@ +/*************************************************************************** + gbfhtmlhref.cpp - GBF to HTML filter with hrefs + for strongs and morph tags + ------------------- + begin : 2001-09-03 + copyright : 2001 by CrossWire Bible Society + ***************************************************************************/ + +/*************************************************************************** + * * + * This program is free software; you can redistribute it and/or modify * + * it under the terms of the GNU General Public License as published by * + * the Free Software Foundation; either version 2 of the License, or * + * (at your option) any later version. * + * * + ***************************************************************************/ + +#include +#include +#include +#include +#include +#include +#include + +SWORD_NAMESPACE_START + +GBFHTMLHREF::MyUserData::MyUserData(const SWModule *module, const SWKey *key) : BasicFilterUserData(module, key) { + if (module) { + version = module->Name(); + } +} + +GBFHTMLHREF::GBFHTMLHREF() { + setTokenStart("<"); + setTokenEnd(">"); + + setTokenCaseSensitive(true); + + //addTokenSubstitute("Rf", ")"); + addTokenSubstitute("FA", ""); // for ASV footnotes to mark text + addTokenSubstitute("Rx", ""); + addTokenSubstitute("FI", ""); // italics begin + addTokenSubstitute("Fi", ""); + addTokenSubstitute("FB", ""); // bold begin + addTokenSubstitute("Fb", ""); + addTokenSubstitute("FR", ""); // words of Jesus begin + addTokenSubstitute("Fr", ""); + addTokenSubstitute("FU", ""); // underline begin + addTokenSubstitute("Fu", ""); + addTokenSubstitute("FO", ""); // Old Testament quote begin + addTokenSubstitute("Fo", ""); + addTokenSubstitute("FS", ""); // Superscript begin// Subscript begin + addTokenSubstitute("Fs", ""); + addTokenSubstitute("FV", ""); // Subscript begin + addTokenSubstitute("Fv", ""); + addTokenSubstitute("TT", ""); // Book title begin + addTokenSubstitute("Tt", ""); + addTokenSubstitute("PP", ""); // poetry begin + addTokenSubstitute("Pp", ""); + addTokenSubstitute("Fn", ""); // font end + addTokenSubstitute("CL", "
"); // new line + addTokenSubstitute("CM", "
"); // paragraph is a non showing comment that can be changed in the front end to

if desired + addTokenSubstitute("CG", ""); // ??? + addTokenSubstitute("CT", ""); // ??? + addTokenSubstitute("JR", "

"); // right align begin + addTokenSubstitute("JC", "
"); // center align begin + addTokenSubstitute("JL", "
"); // align end + +} + + +bool GBFHTMLHREF::handleToken(SWBuf &buf, const char *token, BasicFilterUserData *userData) { + const char *tok; + MyUserData *u = (MyUserData *)userData; + + if (!substituteToken(buf, token)) { + XMLTag tag(token); + /*if (!strncmp(token, "w", 1)) { + // OSIS Word (temporary until OSISRTF is done) + valto = val; + num = strstr(token, "lemma=\"x-Strongs:"); + if (num) { + for (num+=17; ((*num) && (*num != '\"')); num++) + *valto++ = *num; + *valto = 0; + if (atoi((!isdigit(*val))?val+1:val) < 5627) { + buf += " <"; + for (tok = (!isdigit(*val))?val+1:val; *tok; tok++) + buf += *tok; + buf += "> "; + //cout << buf; + + } + // forget these for now + //else { + // verb morph + //sprintf(wordstr, "%03d", word-1); + //module->getEntryAttributes()["Word"][wordstr]["Morph"] = val; + //} + } + else { + num = strstr(token, "lemma=\"strong:"); + if (num) { + for (num+=14; ((*num) && (*num != '\"')); num++) + *valto++ = *num; + *valto = 0; + if (atoi((!isdigit(*val))?val+1:val) < 5627) { + buf += " <"; + for (tok = (!isdigit(*val))?val+1:val; *tok; tok++) + buf += *tok; + buf += "> "; + //cout << buf; + + } + // forget these for now + //else { + // verb morph + //sprintf(wordstr, "%03d", word-1); + //module->getEntryAttributes()["Word"][wordstr]["Morph"] = val; + //} + } + } + valto = val; + num = strstr(token, "morph=\"x-Robinson:"); + if (num) { + for (num+=18; ((*num) && (*num != '\"')); num++) + *valto++ = *num; + *valto = 0; + buf += " ("; + for (tok = val; *tok; tok++) + //if(*tok != '\"') + buf += *tok; + buf += ") "; + } + }*/ + + // else + if (!strncmp(token, "WG", 2)) { // strong's numbers + //buf += " <<"; + for (tok = token + 2; *tok; tok++) + //if(token[i] != '\"') + buf += *tok; + buf += ">"; + } + else if (!strncmp(token, "WH", 2)) { // strong's numbers + //buf += " <<"; + for (tok = token + 2; *tok; tok++) + //if(token[i] != '\"') + buf += *tok; + buf += ">"; + } + else if (!strncmp(token, "WTG", 3)) { // strong's numbers tense + //buf += " (("; + for (tok = token + 3; *tok; tok++) + if(*tok != '\"') + buf += *tok; + buf += ")"; + } + else if (!strncmp(token, "WTH", 3)) { // strong's numbers tense + //buf += " (("; + for (tok = token + 3; *tok; tok++) + if(*tok != '\"') + buf += *tok; + buf += ")"; + } + + else if (!strncmp(token, "WT", 2) && strncmp(token, "WTH", 3) && strncmp(token, "WTG", 3)) { // morph tags + //buf += " (("; + for (tok = token + 2; *tok; tok++) + if(*tok != '\"') + buf += *tok; + buf += ")"; + } + + else if (!strcmp(tag.getName(), "RX")) { + buf += ""; + } + else if (!strcmp(tag.getName(), "RF")) { + SWBuf type = tag.getAttribute("type"); + SWBuf footnoteNumber = tag.getAttribute("swordFootnote"); + VerseKey *vkey = NULL; + // see if we have a VerseKey * or descendant + SWTRY { + vkey = SWDYNAMIC_CAST(VerseKey, u->key); + } + SWCATCH ( ... ) { } + if (vkey) { + // leave this special osis type in for crossReference notes types? Might thml use this some day? Doesn't hurt. + //char ch = ((tag.getAttribute("type") && ((!strcmp(tag.getAttribute("type"), "crossReference")) || (!strcmp(tag.getAttribute("type"), "x-cross-ref")))) ? 'x':'n'); + buf.appendFormatted("*n ", + URL::encode(footnoteNumber.c_str()).c_str(), + URL::encode(u->version.c_str()).c_str(), + URL::encode(vkey->getText()).c_str()); + } + u->suspendTextPassThru = true; + } + else if (!strcmp(tag.getName(), "Rf")) { + u->suspendTextPassThru = false; + } +/* + else if (!strncmp(token, "RB", 2)) { + buf += " "; + u->hasFootnotePreTag = true; + } + + else if (!strncmp(token, "Rf", 2)) { + buf += " lastTextNode.c_str(); + buf += "\">"; + buf += "*n "; + // let's let text resume to output again + u->suspendTextPassThru = false; + } + + else if (!strncmp(token, "RF", 2)) { + if (u->hasFootnotePreTag) { + u->hasFootnotePreTag = false; + buf += " "; + } + u->suspendTextPassThru = true; + } +*/ + else if (!strncmp(token, "FN", 2)) { + buf += ""; + } + + else if (!strncmp(token, "CA", 2)) { // ASCII value + buf += (char)atoi(&token[2]); + } + + else { + return false; + } + } + return true; +} + +SWORD_NAMESPACE_END diff --git a/src/modules/filters/gbfmorph.cpp b/src/modules/filters/gbfmorph.cpp new file mode 100644 index 0000000..5226db7 --- /dev/null +++ b/src/modules/filters/gbfmorph.cpp @@ -0,0 +1,77 @@ +/****************************************************************************** + * + * gbfmorph - SWFilter descendant to hide or show morph tags + * in a GBF module. + */ + + +#include +#include + +SWORD_NAMESPACE_START + +const char oName[] = "Morphological Tags"; +const char oTip[] = "Toggles Morphological Tags On and Off if they exist"; + +const SWBuf choices[3] = {"Off", "On", ""}; +const StringList oValues(&choices[0], &choices[2]); + +GBFMorph::GBFMorph() : SWOptionFilter(oName, oTip, &oValues) { + setOptionValue("Off"); +} + + +GBFMorph::~GBFMorph() { +} + + +char GBFMorph::processText(SWBuf &text, const SWKey *key, const SWModule *module) { + if (!option) { // if we don't want morph tags + const char *from; + char token[2048]; // cheese. Fix. + int tokpos = 0; + bool intoken = false; + bool lastspace = false; + + SWBuf orig = text; + from = orig.c_str(); + + for (text = ""; *from; from++) { + if (*from == '<') { + intoken = true; + tokpos = 0; + token[0] = 0; + token[1] = 0; + token[2] = 0; + continue; + } + if (*from == '>') { // process tokens + intoken = false; + if (*token == 'W' && token[1] == 'T') { // Morph + if ((from[1] == ' ') || (from[1] == ',') || (from[1] == ';') || (from[1] == '.') || (from[1] == '?') || (from[1] == '!') || (from[1] == ')') || (from[1] == '\'') || (from[1] == '\"')) { + if (lastspace) + text--; + } + continue; + } + // if not a morph tag token, keep token in text + text += '<'; + text += token; + text += '>'; + continue; + } + if (intoken) { + if (tokpos < 2045) + token[tokpos++] = *from; + token[tokpos+2] = 0; + } + else { + text += *from; + lastspace = (*from == ' '); + } + } + } + return 0; +} + +SWORD_NAMESPACE_END diff --git a/src/modules/filters/gbfosis.cpp b/src/modules/filters/gbfosis.cpp new file mode 100644 index 0000000..00443f9 --- /dev/null +++ b/src/modules/filters/gbfosis.cpp @@ -0,0 +1,420 @@ +/****************************************************************************** + * + * gbfstrongs - SWFilter descendant to hide or show strongs number + * in a GBF module. + */ + + +#include +#include +#include +#include +#include +#include +#include +#include + +SWORD_NAMESPACE_START + +GBFOSIS::GBFOSIS() { +} + + +GBFOSIS::~GBFOSIS() { +} + + +char GBFOSIS::processText(SWBuf &text, const SWKey *key, const SWModule *module) { + char token[2048]; //cheesy, we seem to like cheese :) + int tokpos = 0; + bool intoken = false; + bool keepToken = false; + +// static QuoteStack quoteStack; + + SWBuf orig = text; + SWBuf tmp; + SWBuf value; + + bool suspendTextPassThru = false; + bool handled = false; + bool newWord = false; + bool newText = false; + bool lastspace = false; + + const char *wordStart = text.c_str(); + const char *wordEnd = NULL; + + const char *textStart = NULL; + const char *textEnd = NULL; + + SWBuf textNode = ""; + + SWBuf buf; + + text = ""; + for (const char* from = orig.c_str(); *from; ++from) { + if (*from == '<') { //start of new token detected + intoken = true; + tokpos = 0; + token[0] = 0; + token[1] = 0; + token[2] = 0; + textEnd = from-1; //end of last text node found + wordEnd = text.c_str() + text.length();//not good, instead of wordEnd = to! + + continue; + } + + if (*from == '>') { // process tokens + intoken = false; + keepToken = false; + suspendTextPassThru = false; + newWord = true; + handled = false; + + while (wordStart < (text.c_str() + text.length())) { //hack + if (strchr(";,. :?!()'\"", *wordStart) && wordStart[0] && wordStart[1]) + wordStart++; + else break; + } + while (wordEnd > wordStart) { + if (strchr(" ,;:.?!()'\"", *wordEnd)) + wordEnd--; + else break; + } + + // Scripture Reference + if (!strncmp(token, "scripRef", 8)) { + suspendTextPassThru = true; + newText = true; + handled = true; + } + else if (!strncmp(token, "/scripRef", 9)) { + tmp = ""; + tmp.append(textStart, (int)(textEnd - textStart)+1); + text += VerseKey::convertToOSIS(tmp.c_str(), key); + + lastspace = false; + suspendTextPassThru = false; + handled = true; + } + + // Footnote + if (!strcmp(token, "RF") || !strncmp(token, "RF ", 3)) { //the GBFFootnotes filter adds the attribute "swordFootnote", we want to catch that, too + // pushString(buf, ""; + newText = true; + lastspace = false; + handled = true; + } + else if (!strcmp(token, "Rf")) { + text += ""; + lastspace = false; + handled = true; + } + // hebrew titles + if (!strcmp(token, "TH")) { + text += ""; + newText = true; + lastspace = false; + handled = true; + } + else if (!strcmp(token, "Th")) { + text += ""; + lastspace = false; + handled = true; + } + // Italics assume transchange + if (!strcmp(token, "FI")) { + text += ""; + newText = true; + lastspace = false; + handled = true; + } + else if (!strcmp(token, "Fi")) { + text += ""; + lastspace = false; + handled = true; + } + // less than + if (!strcmp(token, "CT")) { + text += "<"; + newText = true; + lastspace = false; + handled = true; + } + // greater than + if (!strcmp(token, "CG")) { + text += ">"; + newText = true; + lastspace = false; + handled = true; + } + // Paragraph break. For now use empty paragraph element + if (!strcmp(token, "CM")) { + text += ""; + newText = true; + lastspace = false; + handled = true; + } + + // Figure + else if (!strncmp(token, "img ", 4)) { + const char *src = strstr(token, "src"); + if (!src) // assert we have a src attribute + continue; +// return false; + + text += "
getConfigEntry("AbsoluteDataPath")); +// if (*((*buf)-1) == '/') +// c++; // skip '/' +// } +// end of uncomment for asolute path logic + + for (c++;((*c) && (*c != '"')); c++) { + text += *c; + } + text += "\" />"; + + lastspace = false; + handled = true; + } + + // Strongs numbers + else if (*token == 'W' && (token[1] == 'G' || token[1] == 'H')) { // Strongs + bool divineName = false; + value = token+1; + + // normal strongs number + //strstrip(val); + if (!strncmp(wordStart, " attribute! + if (!strcmp(value.c_str(), "H03068")) { //divineName + buf = ""; + buf.appendFormatted("", value.c_str()); + + divineName = true; + } + else { + buf = ""; + buf.appendFormatted("", value.c_str()); + } + + text.insert(wordStart - text.c_str(), buf); + + if (divineName) { + wordStart += 12; + text += ""; + } + else text += ""; + + lastspace = false; + } + handled = true; + } + + // Morphology + else if (*token == 'W' && token[1] == 'T') { + if (token[2] == 'G' || token[2] == 'H') { // Strongs + value = token+2; + } + else value = token+1; + + if (!strncmp(wordStart, " attribute fond + buf = ""; + buf.appendFormatted("", "robinson", value.c_str()); + text.insert(wordStart - text.c_str(), buf); + text += ""; + lastspace = false; + + } + handled = true; + } + + if (!keepToken) { + if (!handled) { + SWLog::getSystemLog()->logError("Unprocessed Token: <%s> in key %s", token, key ? (const char*)*key : ""); +// exit(-1); + } + if (from[1] && strchr(" ,;.:?!()'\"", from[1])) { + if (lastspace) { + text--; + } + } + if (newText) { + textStart = from+1; + newText = false; + } + continue; + } + + // if not a strongs token, keep token in text + text.appendFormatted("<%s>", token); + + if (newText) { + textStart = text.c_str() + text.length(); + newWord = false; + } + continue; + } + if (intoken) { + if ((tokpos < 2045) && ((*from != 10)&&(*from != 13))) { + token[tokpos++] = *from; + token[tokpos+2] = 0; + } + } + else { + switch (*from) { + case '\'': + case '\"': + case '`': +// quoteStack.handleQuote(fromStart, from, &to); + text += *from; + //from++; //this line removes chars after an apostrophe! Needs fixing. + break; + default: + if (newWord && (*from != ' ')) { + wordStart = text.c_str() + text.length(); + newWord = false; + + //fix this if required? + //memset(to, 0, 10); + + } + + if (!suspendTextPassThru) { + text += (*from); + lastspace = (*from == ' '); + } + } + } + } + + VerseKey *vkey = SWDYNAMIC_CAST(VerseKey, key); + if (vkey) { + SWBuf ref = ""; + if (vkey->Verse()) { + ref.appendFormatted("\t\t", vkey->getOSISRef()); + } + + if (ref.length() > 0) { + + text = ref + text; + + if (vkey->Verse()) { + VerseKey tmp; + tmp = *vkey; + tmp.AutoNormalize(0); + tmp.Headings(1); + + text += ""; + + tmp = MAXVERSE; + if (*vkey == tmp) { + tmp.Verse(0); +// sprintf(ref, "\t
"); +// pushString(&to, ref); + tmp = MAXCHAPTER; + tmp = MAXVERSE; + if (*vkey == tmp) { + tmp.Chapter(0); + tmp.Verse(0); +// sprintf(ref, "\t
"); +// pushString(&to, ref); +/* + if (!quoteStack.empty()) { + SWLog::getSystemLog()->logError("popping unclosed quote at end of book"); + quoteStack.clear(); + } +*/ + } + } + } +// else if (vkey->Chapter()) { +// sprintf(ref, "\t
", vkey->getOSISRef()); +// } +// else sprintf(ref, "\t
", vkey->getOSISRef()); + } + } + return 0; +} + + +QuoteStack::QuoteStack() { + clear(); +} + + +void QuoteStack::clear() { + while (!quotes.empty()) quotes.pop(); +} + + +QuoteStack::~QuoteStack() { + clear(); +} + + +void QuoteStack::handleQuote(char *buf, char *quotePos, SWBuf &text) { +//QuoteInstance(char startChar = '\"', char level = 1, string uniqueID = "", char continueCount = 0) { + if (!quotes.empty()) { + QuoteInstance last = quotes.top(); + if (last.startChar == *quotePos) { + text += ""; + quotes.pop(); + } + else { + quotes.push(QuoteInstance(*quotePos, last.level+1)); + quotes.top().pushStartStream(text); + } + } + else { + quotes.push(QuoteInstance(*quotePos)); + quotes.top().pushStartStream(text); + } +} + +void QuoteStack::QuoteInstance::pushStartStream(SWBuf &text) { + text.appendFormatted("", level); +} + +SWORD_NAMESPACE_END diff --git a/src/modules/filters/gbfplain.cpp b/src/modules/filters/gbfplain.cpp new file mode 100644 index 0000000..5657e20 --- /dev/null +++ b/src/modules/filters/gbfplain.cpp @@ -0,0 +1,97 @@ +/****************************************************************************** + * + * gbfplain - SWFilter descendant to strip out all GBF tags or convert to + * ASCII rendered symbols. + */ + + +#include +#include +#include + +SWORD_NAMESPACE_START + +GBFPlain::GBFPlain() { +} + + +char GBFPlain::processText (SWBuf &text, const SWKey *key, const SWModule *module) +{ + char token[2048]; + int tokpos = 0; + bool intoken = false; + SWBuf orig = text; + const char* from = orig.c_str(); + + for (text = ""; *from; ++from) { + if (*from == '<') { + intoken = true; + tokpos = 0; + token[0] = 0; + token[1] = 0; + token[2] = 0; + continue; + } + if (*from == '>') { + intoken = false; + // process desired tokens + switch (*token) { + case 'W': // Strongs + switch(token[1]) { + case 'G': // Greek + case 'H': // Hebrew + case 'T': // Tense + text.append(" <"); + //for (char *tok = token + 2; *tok; tok++) + // text += *tok; + text.append(token+2); + text.append("> "); + continue; + } + break; + case 'R': + switch(token[1]) { + case 'F': // footnote begin + text.append(" ["); + continue; + case 'f': // footnote end + text.append("] "); + continue; + } + break; + case 'C': + switch(token[1]) { + case 'A': // ASCII value + text.append((char)atoi(&token[2])); + continue; + case 'G': + text.append('>'); + continue; +/* Bug in WEB + case 'L': + *to++ = '<'; + continue; +*/ + case 'L': // Bug in WEB. Use above entry when fixed + case 'N': // new line + text.append('\n'); + continue; + case 'M': // new paragraph + text.append("\n\n"); + continue; + } + break; + } + continue; + } + if (intoken) { + if (tokpos < 2045) + token[tokpos++] = *from; + token[tokpos+2] = 0; + } + else text.append(*from); + } + return 0; +} + +SWORD_NAMESPACE_END diff --git a/src/modules/filters/gbfredletterwords.cpp b/src/modules/filters/gbfredletterwords.cpp new file mode 100644 index 0000000..a79802d --- /dev/null +++ b/src/modules/filters/gbfredletterwords.cpp @@ -0,0 +1,93 @@ +/****************************************************************************** + * + * GBFRedLetterWords - SWFilter descendant to toggle red coloring of words of + * Christ in a GBF module. + */ + + +#include +#include +#include +#include + +SWORD_NAMESPACE_START + +const char oName[] = "Words of Christ in Red"; +const char oTip[] = "Toggles Red Coloring for Words of Christ On and Off if they are marked"; + +const SWBuf choices[3] = {"Off", "On", ""}; +const StringList oValues(&choices[0], &choices[2]); + +GBFRedLetterWords::GBFRedLetterWords() : SWOptionFilter(oName, oTip, &oValues) { + setOptionValue("Off"); +} + + +GBFRedLetterWords::~GBFRedLetterWords() { +} + + +char GBFRedLetterWords::processText(SWBuf &text, const SWKey *key, const SWModule *module) { +/** This function removes the red letter words in Bible like the WEB +* The words are marked by as start and as end tag. +*/ + if (!option) { // if we don't want footnotes + char token[4096]; // cheese. Fix. + int tokpos = 0; + bool intoken = false; + bool hide = false; + + const char *from; + SWBuf orig = text; + from = orig.c_str(); + for (text = ""; *from; from++) { + if (*from == '<') { + intoken = true; + tokpos = 0; +// memset(token, 0, 4096); + token[0] = 0; + token[1] = 0; + token[2] = 0; + continue; + } + if (*from == '>') { // process tokens + intoken = false; + /*switch (*token) { + case 'F': // Font attribute + switch(token[1]) { + case 'R': // Begin red letter words + hide = true; + break; + case 'r': // end red letter words + hide = false; + break; + } + continue; // skip token + }*/ + + //hide the token if either FR or Fr was detected + hide = (token[0] == 'F' && ( (token[1] == 'R') || (token[1] == 'r') )); + + // if not a red letter word token, keep token in text + if (!hide) { + text += '<'; + for (char *tok = token; *tok; tok++) + text += *tok; + text += '>'; + } + continue; + } + if (intoken) { + if (tokpos < 4090) + token[tokpos++] = *from; + token[tokpos+2] = 0; // +2 cuz we init token with 2 extra '0' because of switch statement + } + else { + text += *from; + } + } + } + return 0; +} + +SWORD_NAMESPACE_END diff --git a/src/modules/filters/gbfrtf.cpp b/src/modules/filters/gbfrtf.cpp new file mode 100644 index 0000000..eb39612 --- /dev/null +++ b/src/modules/filters/gbfrtf.cpp @@ -0,0 +1,311 @@ +/****************************************************************************** + * + * gbfrtf - SWFilter descendant to convert all GBF tags to RTF tags + */ + + +#include +#include +#include +#include + +SWORD_NAMESPACE_START + +GBFRTF::GBFRTF() { +} + + +char GBFRTF::processText(SWBuf &text, const SWKey *key, const SWModule *module) +{ + char token[2048]; + char val[128]; + char *valto; + char *num; + int tokpos = 0; + bool intoken = false; + const char *tok; + SWBuf strongnum; + SWBuf strongtense; + bool hideText = false; + int wordLen = 0; + int wordCount = 0; + + const char *from; + SWBuf orig = text; + from = orig.c_str(); + for (text = ""; *from; from++) { + if (*from == '<') { + wordLen = wordCount; + wordCount = 0; + intoken = true; + tokpos = 0; + token[0] = 0; + token[1] = 0; + token[2] = 0; + continue; + } + if (*from == '>') { + intoken = false; + // process desired tokens + // deal with OSIS note tags. Just hide till OSISRTF + if (!strncmp(token, "note ", 5)) { + hideText = true; + } + if (!strncmp(token, "/note", 5)) { + hideText = false; + } + + switch (*token) { + case 'w': // OSIS Word (temporary until OSISRTF is done) + strongnum = ""; + strongtense = ""; + valto = val; + num = strstr(token, "lemma=\"x-Strongs:"); + if (num) { + for (num+=17; ((*num) && (*num != '\"')); num++) + *valto++ = *num; + *valto = 0; + if (atoi((!isdigit(*val))?val+1:val) < 5627) { + // normal strongs number + strongnum += "{\\cf3 \\sub <"; + for (tok = (!isdigit(*val))?val+1:val; *tok; tok++) + strongnum += *tok; + strongnum += ">}"; + } + /* forget these for now + else { + // verb morph + sprintf(wordstr, "%03d", word-1); + module->getEntryAttributes()["Word"][wordstr]["Morph"] = val; + } + */ + } + else { + num = strstr(token, "lemma=\"strong:"); + if (num) { + for (num+=14; ((*num) && (*num != '\"')); num++) + *valto++ = *num; + *valto = 0; + if (atoi((!isdigit(*val))?val+1:val) < 5627) { + // normal strongs number + strongnum += "{\\cf3 \\sub <"; + for (tok = (!isdigit(*val))?val+1:val; *tok; tok++) + strongnum += *tok; + strongnum += ">}"; + } + /* forget these for now + else { + // verb morph + sprintf(wordstr, "%03d", word-1); + module->getEntryAttributes()["Word"][wordstr]["Morph"] = val; + } + */ + } + } + valto = val; + num = strstr(token, "morph=\"x-Robinson:"); + if (num) { + for (num+=18; ((*num) && (*num != '\"')); num++) + *valto++ = *num; + *valto = 0; + // normal robinsons tense + strongtense += "{\\cf4 \\sub ("; + for (tok = val; *tok; tok++) + strongtense += *tok; + strongtense += ")}"; + } + continue; + + case '/': + if (token[1] == 'w') { + if ((wordCount > 0) || (strongnum != "{\\cf3 \\sub <3588>}")) { + //for (i = 0; i < strongnum.length(); i++) + text += strongnum; + //for (i = 0; i < strongtense.length(); i++) + text += strongtense; + } + } + continue; + + case 'W': // Strongs + switch(token[1]) { + case 'G': // Greek + case 'H': // Hebrew + text += "{\\cf3 \\sub <"; + for (tok = token + 2; *tok; tok++) + text += *tok; + text += ">}"; + continue; + + case 'T': // Tense + text += "{\\cf4 \\sub ("; + bool separate = false; + for (tok = token + 2; *tok; tok++) { + if (separate) { + text += "; "; + separate = false; + } + switch (*tok) { + case 'G': + case 'H': + for (tok++; *tok; tok++) { + if (isdigit(*tok)) { + text += *tok; + separate = true; + } + else { + tok--; + break; + } + } + break; + default: + for (; *tok; tok++) { + text += *tok; + } + } + } + text += ")}"; + continue; + } + break; + case 'R': + switch(token[1]) { + case 'X': + text += ""; + continue; + case 'x': + text += ""; + continue; + case 'F': // footnote begin + text += "{\\i1 \\sub [ "; + continue; + case 'f': // footnote end + text += " ] }"; + continue; + } + break; + case 'F': // font tags + switch(token[1]) { + case 'I': // italic start + text += "\\i1 "; + continue; + case 'i': // italic end + text += "\\i0 "; + continue; + case 'B': // bold start + text += "\\b1 "; + continue; + case 'b': // bold end + text += "\\b0 "; + continue; + case 'N': + text += '{'; + if (!strnicmp(token+2, "Symbol", 6)) + text += "\\f7 "; + if (!strnicmp(token+2, "Courier", 7)) + text += "\\f8 "; + continue; + case 'n': + text += '}'; + continue; + case 'S': + text += "{\\super "; + continue; + case 's': + text += '}'; + continue; + case 'R': + text += "{\\cf6 "; + continue; + case 'r': + text += '}'; + continue; + case 'O': + case 'C': + text += "\\scaps1 "; + continue; + case 'o': + case 'c': + text += "\\scaps0 "; + continue; + case 'V': + text += "{\\sub "; + continue; + case 'v': + text += '}'; + continue; + case 'U': + text += "\\ul1 "; + continue; + case 'u': + text += "\\ul0 "; + continue; + } + break; + case 'C': // special character tags + switch(token[1]) { + case 'A': // ASCII value + text += (char)atoi(&token[2]); + continue; + case 'G': + text += '>'; + continue; + case 'L': // line break + text += "\\line "; + continue; + case 'M': // new paragraph + text += "\\par "; + continue; + case 'T': + text += '<'; + } + break; + case 'T': // title formatting + switch(token[1]) + { + case 'T': // Book title begin + text += "{\\large "; + continue; + case 't': + text += '}'; + continue; + case 'S': + text += "\\par {\\i1\\b1 "; + continue; + case 's': + text += "}\\par "; + continue; + } + break; + case 'J': // Strongs + switch(token[1]) { + case 'L': + text += "\\ql "; + case 'C': + text += "\\qc "; + case 'R': + text += "\\qr "; + case 'F': + text += "\\qj "; + } + } + continue; + } + if (intoken) { + if (tokpos < 2045) + token[tokpos++] = *from; + token[tokpos+2] = 0; + } + else { + if (!hideText) { + wordCount++; + text += *from; + } + } + } + return 0; +} + +SWORD_NAMESPACE_END + diff --git a/src/modules/filters/gbfstrongs.cpp b/src/modules/filters/gbfstrongs.cpp new file mode 100644 index 0000000..610edb5 --- /dev/null +++ b/src/modules/filters/gbfstrongs.cpp @@ -0,0 +1,126 @@ +/****************************************************************************** + * + * gbfstrongs - SWFilter descendant to hide or show strongs number + * in a GBF module. + */ + + +#include +#include +#include +#include +#include + +SWORD_NAMESPACE_START + +const char oName[] = "Strong's Numbers"; +const char oTip[] = "Toggles Strong's Numbers On and Off if they exist"; + +const SWBuf choices[3] = {"Off", "On", ""}; +const StringList oValues(&choices[0], &choices[2]); + +GBFStrongs::GBFStrongs() : SWOptionFilter(oName, oTip, &oValues) { + setOptionValue("Off"); +} + + +GBFStrongs::~GBFStrongs() { +} + + +char GBFStrongs::processText(SWBuf &text, const SWKey *key, const SWModule *module) { + char token[2048]; // cheese. Fix. + int tokpos = 0; + bool intoken = false; + bool lastspace = false; + int word = 1; + char val[128]; + char wordstr[5]; + char *valto; + unsigned int textStart = 0, textEnd = 0; + bool newText = false; + SWBuf tmp; + const char *from; + + SWBuf orig = text; + from = orig.c_str(); + + for (text = ""; *from; from++) { + if (*from == '<') { + intoken = true; + tokpos = 0; + token[0] = 0; + token[1] = 0; + token[2] = 0; + textEnd = text.size(); + continue; + } + if (*from == '>') { // process tokens + intoken = false; + if (*token == 'W' && (token[1] == 'G' || token[1] == 'H')) { // Strongs + if (module->isProcessEntryAttributes()) { + valto = val; + for (unsigned int i = 1; ((token[i]) && (i < 150)); i++) + *valto++ = token[i]; + *valto = 0; + if (atoi((!isdigit(*val))?val+1:val) < 5627) { + // normal strongs number + sprintf(wordstr, "%03d", word++); + module->getEntryAttributes()["Word"][wordstr]["PartsCount"] = "1"; + module->getEntryAttributes()["Word"][wordstr]["Lemma"] = val; + module->getEntryAttributes()["Word"][wordstr]["LemmaClass"] = "strong"; + tmp = ""; + tmp.append(text.c_str()+textStart, (int)(textEnd - textStart)); + module->getEntryAttributes()["Word"][wordstr]["Text"] = tmp; + newText = true; + } + else { + // verb morph + sprintf(wordstr, "%03d", word-1); + module->getEntryAttributes()["Word"][wordstr]["Morph"] = val; + module->getEntryAttributes()["Word"][wordstr]["MorphClass"] = "OLBMorph"; + } + } + + if (!option) { + if ((from[1] == ' ') || (from[1] == ',') || (from[1] == ';') || (from[1] == '.') || (from[1] == '?') || (from[1] == '!') || (from[1] == ')') || (from[1] == '\'') || (from[1] == '\"')) { + if (lastspace) + text--; + } + if (newText) {textStart = text.size(); newText = false; } + continue; + } + } + if (module->isProcessEntryAttributes()) { + if ((*token == 'W') && (token[1] == 'T')) { // Morph + valto = val; + for (unsigned int i = 2; ((token[i]) && (i < 150)); i++) + *valto++ = token[i]; + *valto = 0; + sprintf(wordstr, "%03d", word-1); + module->getEntryAttributes()["Word"][wordstr]["MorphClass"] = "GBFMorph"; + module->getEntryAttributes()["Word"][wordstr]["Morph"] = val; + newText = true; + } + } + // if not a strongs token, keep token in text + text += '<'; + text += token; + text += '>'; + if (newText) {textStart = text.size(); newText = false; } + continue; + } + if (intoken) { + if (tokpos < 2045) + token[tokpos++] = *from; + token[tokpos+2] = 0; + } + else { + text += *from; + lastspace = (*from == ' '); + } + } + return 0; +} + +SWORD_NAMESPACE_END diff --git a/src/modules/filters/gbfthml.cpp b/src/modules/filters/gbfthml.cpp new file mode 100644 index 0000000..2664f48 --- /dev/null +++ b/src/modules/filters/gbfthml.cpp @@ -0,0 +1,216 @@ +/*************************************************************************** + gbfthml.cpp - GBF to ThML filter + ------------------- + begin : 1999-10-27 + copyright : 2001 by CrossWire Bible Society + ***************************************************************************/ + +/*************************************************************************** + * * + * This program is free software; you can redistribute it and/or modify * + * it under the terms of the GNU General Public License as published by * + * the Free Software Foundation; either version 2 of the License, or * + * (at your option) any later version. * + * * + ***************************************************************************/ + +#include +#include +#include + +SWORD_NAMESPACE_START + +GBFThML::GBFThML() +{ +} + + +char GBFThML::processText(SWBuf &text, const SWKey *key, const SWModule *module) { + const char *from; + char token[2048]; + int tokpos = 0; + bool intoken = false; + const char *tok; + + SWBuf orig = text; + from = orig.c_str(); + + for (text = ""; *from; from++) { + if (*from == '<') { + intoken = true; + tokpos = 0; + token[0] = 0; + token[1] = 0; + token[2] = 0; + continue; + } + if (*from == '>') + { + intoken = false; + // process desired tokens + switch (*token) { + case 'W': // Strongs + switch(token[1]) { + case 'G': + case 'H': + text += ""; + continue; + + case 'T': // Tense + text += ""; + continue; + } + break; + case 'R': + switch(token[1]) + { + case 'X': + text += ""; + continue; + case 'x': + text += ""; + continue; + case 'F': // footnote begin + text += ""; + continue; + case 'f': // footnote end + text += ""; + continue; + } + break; + case 'F': // font tags + switch(token[1]) + { + case 'N': + text += ""; + continue; + case 'n': + text += ""; + continue; + case 'I': // italic start + text += ""; + continue; + case 'i': // italic end + text += ""; + continue; + case 'B': // bold start + text += ""; + continue; + case 'b': // bold end + text += ""; + continue; + + case 'R': // words of Jesus begin + text += ""; + continue; + case 'r': // words of Jesus end + text += ""; + continue; + case 'U': // Underline start + text += ""; + continue; + case 'u': // Underline end + text += ""; + continue; + case 'O': // Old Testament quote begin + text += ""; + continue; + case 'o': // Old Testament quote end + text += ""; + continue; + case 'S': // Superscript begin + text += ""; + continue; + case 's': // Superscript end + text += ""; + continue; + case 'V': // Subscript begin + text += ""; + continue; + case 'v': // Subscript end + text += ""; + continue; + } + break; + case 'C': // special character tags + switch(token[1]) + { + case 'A': // ASCII value + text += (char)atoi(&token[2]); + continue; + case 'G': + //*to++ = ' '; + continue; + case 'L': // line break + text += "
"; + continue; + case 'M': // new paragraph + text += "

"; + continue; + case 'T': + //*to++ = ' '; + continue; + } + break; + case 'T': // title formatting + switch(token[1]) + { + case 'T': // Book title begin + text += ""; + continue; + case 't': + text += ""; + continue; + case 'S': + text += "

"; + continue; + case 's': + text += "
"; + continue; + } + break; + + case 'P': // special formatting + switch(token[1]) { + case 'P': // Poetry begin + text += ""; + continue; + case 'p': + text += ""; + continue; + } + break; + } + continue; + } + if (intoken) { + if (tokpos < 2045) + token[tokpos++] = *from; + token[tokpos+2] = 0; + } + else text += *from; + } + return 0; +} + + + +SWORD_NAMESPACE_END diff --git a/src/modules/filters/gbfwebif.cpp b/src/modules/filters/gbfwebif.cpp new file mode 100644 index 0000000..e651db6 --- /dev/null +++ b/src/modules/filters/gbfwebif.cpp @@ -0,0 +1,191 @@ +/*************************************************************************** + GBFWEBIF.cpp - GBF to HTML filter with hrefs + for strongs and morph tags + ------------------- + begin : 2001-09-03 + copyright : 2001 by CrossWire Bible Society + ***************************************************************************/ + +/*************************************************************************** + * * + * This program is free software; you can redistribute it and/or modify * + * it under the terms of the GNU General Public License as published by * + * the Free Software Foundation; either version 2 of the License, or * + * (at your option) any later version. * + * * + ***************************************************************************/ + +#include +#include +#include + +SWORD_NAMESPACE_START + +GBFWEBIF::GBFWEBIF() : baseURL(""), passageStudyURL(baseURL + "passagestudy.jsp") { +//all is done in GBFHTMLHREF since it inherits form this class + addTokenSubstitute("FR", ""); // words of Jesus begin + addTokenSubstitute("Fr", ""); +} + +bool GBFWEBIF::handleToken(SWBuf &buf, const char *token, BasicFilterUserData *userData) { + const char *tok; + char val[128]; + char *valto; + const char *num; + SWBuf url; + + if (!substituteToken(buf, token)) { + if (!strncmp(token, "w", 1)) { + // OSIS Word (temporary until OSISRTF is done) + valto = val; + num = strstr(token, "lemma=\"x-Strongs:"); + if (num) { + for (num+=17; ((*num) && (*num != '\"')); num++) + *valto++ = *num; + *valto = 0; + + if (atoi((!isdigit(*val))?val+1:val) < 5627) { + buf += " <"; + url = ""; + for (tok = val; *tok; tok++) { + url += *tok; + } + if ((url.length() > 1) && strchr("GH", url[0])) { + if (isdigit(url[1])) + url = url.c_str()+1; + } + buf.appendFormatted("", passageStudyURL.c_str(), URL::encode(url).c_str()); + + for (tok = (!isdigit(*val))?val+1:val; *tok; tok++) { + buf += *tok; + } + buf += "> "; + } + } + else { + num = strstr(token, "lemma=\"strong:"); + if (num) { + for (num+=14; ((*num) && (*num != '\"')); num++) + *valto++ = *num; + *valto = 0; + + if (atoi((!isdigit(*val))?val+1:val) < 5627) { + buf += " <"; + url = ""; + for (tok = val; *tok; tok++) { + url += *tok; + } + if ((url.length() > 1) && strchr("GH", url[0])) { + if (isdigit(url[1])) + url = url.c_str()+1; + } + buf.appendFormatted("", passageStudyURL.c_str(), URL::encode(url).c_str()); + + for (tok = (!isdigit(*val))?val+1:val; *tok; tok++) { + buf += *tok; + } + buf += "> "; + } + } + } + valto = val; + num = strstr(token, "morph=\"x-Robinson:"); + if (num) { + for (num+=18; ((*num) && (*num != '\"')); num++) + *valto++ = *num; + *valto = 0; + buf += " ("; + url = ""; + for (tok = val; *tok; tok++) { + // normal robinsons tense + buf += *tok; + } + buf.appendFormatted("", passageStudyURL.c_str(), URL::encode(url).c_str()); + + for (tok = val; *tok; tok++) { + buf += *tok; + } + buf += ") "; + } + } + + else if (!strncmp(token, "WG", 2) || !strncmp(token, "WH", 2)) { // strong's numbers + buf += " <"; + url = ""; + + for (tok = token+1; *tok; tok++) { + url += *tok; + } + if ((url.length() > 1) && strchr("GH", url[0])) { + if (isdigit(url[1])) + url = url.c_str()+1; + } + buf.appendFormatted("", passageStudyURL.c_str(), URL::encode(url).c_str()); + + for (tok = token + 2; *tok; tok++) { + buf += *tok; + } + buf += ">"; + } + + else if (!strncmp(token, "WTG", 3) || !strncmp(token, "WTH", 3)) { // strong's numbers tense + buf += " ("; + url = ""; + for (tok = token + 2; *tok; tok++) { + if(*tok != '\"') + url += *tok; + } + if ((url.length() > 1) && strchr("GH", url[0])) { + if (isdigit(url[1])) + url = url.c_str()+1; + } + buf.appendFormatted("", passageStudyURL.c_str(), URL::encode(url).c_str()); + + for (tok = token + 3; *tok; tok++) + if(*tok != '\"') + buf += *tok; + buf += ")"; + } + + else if (!strncmp(token, "WT", 2) && strncmp(token, "WTH", 3) && strncmp(token, "WTG", 3)) { // morph tags + buf += " ("; + for (tok = token + 2; *tok; tok++) { + if(*tok != '\"') + buf += *tok; + } + buf.appendFormatted("", passageStudyURL.c_str(), URL::encode(url).c_str()); + + for (tok = token + 2; *tok; tok++) { + if(*tok != '\"') + buf += *tok; + } + buf += ")"; + } + + else if (!strncmp(token, "RX", 2)) { + buf += "", passageStudyURL.c_str(), URL::encode(url).c_str()); + } + // ok to leave these in + else if ((!strncmp(token, "span", 4)) + || (!strncmp(token, "/span", 5))) { + buf.appendFormatted("<%s>", token); + } + + else { + return GBFHTMLHREF::handleToken(buf, token, userData); + } + } + return true; +} + +SWORD_NAMESPACE_END diff --git a/src/modules/filters/gbfwordjs.cpp b/src/modules/filters/gbfwordjs.cpp new file mode 100644 index 0000000..f81ffac --- /dev/null +++ b/src/modules/filters/gbfwordjs.cpp @@ -0,0 +1,282 @@ +/****************************************************************************** + * + * gbfstrongs - SWFilter descendant to hide or show strongs number + * in a GBF module. + */ + + +#include +#include +#include +#include +#include +#include +#include + +SWORD_NAMESPACE_START + +const char oName[] = "Word Javascript"; +const char oTip[] = "Toggles Word Javascript data"; + +const SWBuf choices[3] = {"Off", "On", ""}; +const StringList oValues(&choices[0], &choices[2]); + + +GBFWordJS::GBFWordJS() : SWOptionFilter(oName, oTip, &oValues) { + setOptionValue("Off"); + + defaultGreekLex = 0; + defaultHebLex = 0; + defaultGreekParse = 0; + defaultHebParse = 0; + mgr = 0; +} + + +GBFWordJS::~GBFWordJS() { +} + + +char GBFWordJS::processText(SWBuf &text, const SWKey *key, const SWModule *module) { + if (option) { + char token[2112]; // cheese. Fix. + int tokpos = 0; + bool intoken = false; + bool lastspace = false; + int word = 1; + char val[128]; + char wordstr[5]; + unsigned int textStart = 0, lastAppendLen = 0, textEnd = 0; + SWBuf tmp; + bool newText = false; + bool needWordOut = false; + AttributeValue *wordAttrs = 0; + SWBuf modName = (module)?module->Name():""; + SWBuf wordSrcPrefix = modName; + + const SWBuf orig = text; + const char * from = orig.c_str(); + VerseKey *vkey = 0; + if (key) { + vkey = SWDYNAMIC_CAST(VerseKey, key); + } + + for (text = ""; *from; from++) { + if (*from == '<') { + intoken = true; + tokpos = 0; + token[0] = 0; + token[1] = 0; + token[2] = 0; + textEnd = text.length(); + continue; + } + if (*from == '>') { // process tokens + intoken = false; + if (*token == 'W' && (token[1] == 'G' || token[1] == 'H')) { // Strongs + strcpy(val,token+1); + if (atoi((!isdigit(*val))?val+1:val) < 5627) { + // normal strongs number + sprintf(wordstr, "%03d", word++); + needWordOut = (word > 2); + wordAttrs = &(module->getEntryAttributes()["Word"][wordstr]); + (*wordAttrs)["Lemma"] = val; + //printf("Adding: [\"Word\"][%s][\"Strongs\"] = %s\n", wordstr, val); + tmp = ""; + tmp.append(text.c_str()+textStart, (int)(textEnd - textStart)); + (*wordAttrs)["Text"] = tmp; + text.append(""); + SWBuf ts; + ts.appendFormatted("%d", textStart); + (*wordAttrs)["TextStart"] = ts; + //printf("Adding: [\"Word\"][%s][\"Text\"] = %s\n", wordstr, tmp.c_str()); + newText = true; + } + else { + // verb morph + if (wordAttrs) { + (*wordAttrs)["Morph"] = val; + } + //printf("Adding: [\"Word\"][%s][\"Morph\"] = %s\n", wordstr, val); + } + + } + if (*token == 'W' && token[1] == 'T') { // Morph + if (token[2] == 'G' || token[2] == 'H') { + strcpy(val, token+2); + } + else strcpy(val, token+1); + if (wordAttrs) { + (*wordAttrs)["Morph"] = val; + (*wordAttrs)["MorphClass"] = "StrongsMorph"; + } + newText = true; + } + // if not a strongs token, keep token in text + text += '<'; + text += token; + text += '>'; + if (needWordOut) { + char wstr[10]; + sprintf(wstr, "%03d", word-2); + AttributeValue *wAttrs = &(module->getEntryAttributes()["Word"][wstr]); + needWordOut = false; + SWBuf strong = (*wAttrs)["Lemma"]; + SWBuf morph = (*wAttrs)["Morph"]; + SWBuf morphClass = (*wAttrs)["MorphClass"]; + SWBuf wordText = (*wAttrs)["Text"]; + SWBuf textSt = (*wAttrs)["TextStart"]; + if (strong.size()) { + char gh = 0; + gh = isdigit(strong[0]) ? 0:strong[0]; + if (!gh) { + if (vkey) { + gh = vkey->Testament() ? 'H' : 'G'; + } + } + else strong << 1; + + SWModule *sLex = 0; + SWModule *sMorph = 0; + if (gh == 'G') { + sLex = defaultGreekLex; + sMorph = defaultGreekParse; + } + if (gh == 'H') { + sLex = defaultHebLex; + sMorph = defaultHebParse; + } + SWBuf lexName = ""; + if (sLex) { + // we can pass the real lex name in, but we have some + // aliases in the javascript to optimize bandwidth + lexName = sLex->Name(); + if (lexName == "StrongsGreek") + lexName = "G"; + if (lexName == "StrongsHebrew") + lexName = "H"; + } + SWBuf wordID; + if (vkey) { + // optimize for bandwidth and use only the verse as the unique entry id + wordID.appendFormatted("%d", vkey->Verse()); + } + else { + wordID = key->getText(); + } + for (unsigned int i = 0; i < wordID.size(); i++) { + if ((!isdigit(wordID[i])) && (!isalpha(wordID[i]))) { + wordID[i] = '_'; + } + } + wordID.appendFormatted("_%s%d", wordSrcPrefix.c_str(), atoi(wstr)); + if (textSt.size()) { + int textStr = atoi(textSt.c_str()); + textStr += lastAppendLen; + SWBuf spanStart = ""; + + + +/* + if (sMorph) { + SWBuf popMorph = "%s", sMorph->Name(), morph.c_str(), wordID.c_str(), morph.c_str()); + morph = popMorph; + } +*/ + + // 'p' = 'fillpop' to save bandwidth + const char *m = strchr(morph.c_str(), ':'); + if (m) m++; + else m = morph.c_str(); + spanStart.appendFormatted("", lexName.c_str(), strong.c_str(), wordID.c_str(), m, modName.c_str()); + text.insert(textStr, spanStart); + lastAppendLen = spanStart.length(); + } + } + + } + if (newText) { + textStart = text.length(); newText = false; + } + continue; + } + if (intoken) { + if (tokpos < 2045) + token[tokpos++] = *from; + token[tokpos+2] = 0; + } + else { + text += *from; + lastspace = (*from == ' '); + } + } + + char wstr[10]; + sprintf(wstr, "%03d", word-1); + AttributeValue *wAttrs = &(module->getEntryAttributes()["Word"][wstr]); + needWordOut = false; + SWBuf strong = (*wAttrs)["Lemma"]; + SWBuf morph = (*wAttrs)["Morph"]; + SWBuf morphClass = (*wAttrs)["MorphClass"]; + SWBuf wordText = (*wAttrs)["Text"]; + SWBuf textSt = (*wAttrs)["TextStart"]; + if (strong.size()) { + char gh = 0; + gh = isdigit(strong[0]) ? 0:strong[0]; + if (!gh) { + if (vkey) { + gh = vkey->Testament() ? 'H' : 'G'; + } + } + else strong << 1; + + SWModule *sLex = 0; + if (gh == 'G') { + sLex = defaultGreekLex; + } + if (gh == 'H') { + sLex = defaultHebLex; + } + SWBuf lexName = ""; + if (sLex) { + // we can pass the real lex name in, but we have some + // aliases in the javascript to optimize bandwidth + lexName = sLex->Name(); + if (lexName == "StrongsGreek") + lexName = "G"; + if (lexName == "StrongsHebrew") + lexName = "H"; + } + SWBuf wordID; + if (vkey) { + // optimize for bandwidth and use only the verse as the unique entry id + wordID.appendFormatted("%d", vkey->Verse()); + } + else { + wordID = key->getText(); + } + for (unsigned int i = 0; i < wordID.size(); i++) { + if ((!isdigit(wordID[i])) && (!isalpha(wordID[i]))) { + wordID[i] = '_'; + } + } + wordID.appendFormatted("_%s%d", wordSrcPrefix.c_str(), atoi(wstr)); + if (textSt.size()) { + int textStr = atoi(textSt.c_str()); + textStr += lastAppendLen; + SWBuf spanStart = ""; + // 'p' = 'fillpop' to save bandwidth + const char *m = strchr(morph.c_str(), ':'); + if (m) m++; + else m = morph.c_str(); + spanStart.appendFormatted("", lexName.c_str(), strong.c_str(), wordID.c_str(), m, modName.c_str()); + text.insert(textStr, spanStart); + } + } + } + + return 0; +} + +SWORD_NAMESPACE_END diff --git a/src/modules/filters/greeklexattribs.cpp b/src/modules/filters/greeklexattribs.cpp new file mode 100644 index 0000000..1e82305 --- /dev/null +++ b/src/modules/filters/greeklexattribs.cpp @@ -0,0 +1,101 @@ +/****************************************************************************** + * + * greeklexattribs - SWFilter descendant to set entry attributes for greek + * lexicons + */ + + +#include +#include +#include +#include +#include +#include + +using std::string; + +SWORD_NAMESPACE_START + +GreekLexAttribs::GreekLexAttribs() { +} + + +char GreekLexAttribs::processText(SWBuf &text, const SWKey *key, const SWModule *module) { + + if (module->isProcessEntryAttributes()) { + const char *from; + bool inAV = false; + string phrase; + string freq; + char val[128], *valto; + char wordstr[7]; + const char *currentPhrase = 0; + const char *currentPhraseEnd = 0; + int number = 0; + + + for (from = text.c_str(); *from; from++) { + if (inAV) { + if (currentPhrase == 0) { + if (isalpha(*from)) + currentPhrase = from; + } + else { + if ((!isalpha(*from)) && (*from != ' ') && (*from != '+') && (*from !='(') && (*from != ')') && (*from != '\'')) { + if (*from == '<') { + if (!currentPhraseEnd) + currentPhraseEnd = from - 1; + for (; *from && *from != '>'; from++) { + if (!strncmp(from, "value=\"", 7)) { + valto = val; + from += 7; + for (unsigned int i = 0; from[i] != '\"' && i < 127; i++) + *valto++ = from[i]; + *valto = 0; + sprintf(wordstr, "%03d", number+1); + module->getEntryAttributes()["AVPhrase"][wordstr]["CompoundedWith"] = val; + from += strlen(val); + } + } + continue; + } + + phrase = ""; + phrase.append(currentPhrase, (int)(((currentPhraseEnd>currentPhrase)?currentPhraseEnd:from) - currentPhrase)-1); + currentPhrase = from; + while (*from && isdigit(*from)) from++; + freq = ""; + freq.append(currentPhrase, (int)(from - currentPhrase)); + if ((freq.length() > 0) && (phrase.length() > 0)) { + sprintf(wordstr, "%03d", ++number); + if ((strchr(phrase.c_str(), '(') > phrase.c_str()) && (strchr(phrase.c_str(), ')') > phrase.c_str() + 1)) { + string tmp = phrase.substr(0, phrase.find_first_of("(")); + phrase.erase(phrase.find_first_of("("), 1); + phrase.erase(phrase.find_first_of(")"), 1); + phrase.erase(0,phrase.find_first_not_of("\r\n\v\t ")); phrase.erase(phrase.find_last_not_of("\r\n\v\t ")+1); + module->getEntryAttributes()["AVPhrase"][wordstr]["Alt"] = phrase.c_str(); + phrase = tmp; + } + phrase.erase(0,phrase.find_first_not_of("\r\n\v\t ")); phrase.erase(phrase.find_last_not_of("\r\n\v\t ")+1); + freq.erase(0,freq.find_first_not_of("\r\n\v\t ")); freq.erase(freq.find_last_not_of("\r\n\v\t ")+1); + module->getEntryAttributes()["AVPhrase"][wordstr]["Phrase"] = phrase.c_str(); + module->getEntryAttributes()["AVPhrase"][wordstr]["Frequency"] = freq.c_str(); + currentPhrase = 0; + currentPhraseEnd = 0; + } + } + } + if (*from == ';') inAV = false; + + } + else if (!strncmp(from, "AV-", 3)) { + inAV = true; + from+=2; + } + } + } + return 0; +} + + +SWORD_NAMESPACE_END diff --git a/src/modules/filters/latin1utf16.cpp b/src/modules/filters/latin1utf16.cpp new file mode 100644 index 0000000..1392750 --- /dev/null +++ b/src/modules/filters/latin1utf16.cpp @@ -0,0 +1,119 @@ +/****************************************************************************** + * + * Latin1UTF16 - SWFilter descendant to convert a Latin-1 character to UTF-16 + * + */ + + +#include +#include +#include +#include + +SWORD_NAMESPACE_START + +Latin1UTF16::Latin1UTF16() { +} + + +char Latin1UTF16::processText(SWBuf &text, const SWKey *key, const SWModule *module) { + const unsigned char *from; + if ((unsigned long)key < 2) // hack, we're en(1)/de(0)ciphering + return (char)-1; + + + SWBuf orig = text; + from = (const unsigned char *)orig.c_str(); + + for (text = ""; *from; from++) { + text.setSize(text.size()+2); + switch (*from) { + case 0x80: // '€' + *((unsigned short *)(text.getRawData()+(text.size()-2))) = (unsigned short) 0x20AC; + break; + case 0x82: // '‚' + *((unsigned short *)(text.getRawData()+(text.size()-2))) = (unsigned short) 0x201A; + break; + case 0x83: // 'ƒ' + *((unsigned short *)(text.getRawData()+(text.size()-2))) = (unsigned short) 0x0192; + break; + case 0x84: // '„' + *((unsigned short *)(text.getRawData()+(text.size()-2))) = (unsigned short) 0x201E; + break; + case 0x85: // '…' + *((unsigned short *)(text.getRawData()+(text.size()-2))) = (unsigned short) 0x2026; + break; + case 0x86: // '†' + *((unsigned short *)(text.getRawData()+(text.size()-2))) = (unsigned short) 0x2020; + break; + case 0x87: // '‡' + *((unsigned short *)(text.getRawData()+(text.size()-2))) = (unsigned short) 0x2021; + break; + case 0x88: // 'ˆ' + *((unsigned short *)(text.getRawData()+(text.size()-2))) = (unsigned short) 0x02C6; + break; + case 0x89: // '‰' + *((unsigned short *)(text.getRawData()+(text.size()-2))) = (unsigned short) 0x2030; + break; + case 0x8A: // 'Š' + *((unsigned short *)(text.getRawData()+(text.size()-2))) = (unsigned short) 0x0160; + break; + case 0x8B: // '‹' + *((unsigned short *)(text.getRawData()+(text.size()-2))) = (unsigned short) 0x2039; + break; + case 0x8C: // 'Œ' + *((unsigned short *)(text.getRawData()+(text.size()-2))) = (unsigned short) 0x0152; + break; + case 0x8E: // 'Ž' + *((unsigned short *)(text.getRawData()+(text.size()-2))) = (unsigned short) 0x017D; + break; + case 0x91: // '‘' + *((unsigned short *)(text.getRawData()+(text.size()-2))) = (unsigned short) 0x2018; + break; + case 0x92: // '’' + *((unsigned short *)(text.getRawData()+(text.size()-2))) = (unsigned short) 0x2019; + break; + case 0x93: // '“' + *((unsigned short *)(text.getRawData()+(text.size()-2))) = (unsigned short) 0x201C; + break; + case 0x94: // '”' + *((unsigned short *)(text.getRawData()+(text.size()-2))) = (unsigned short) 0x201D; + break; + case 0x95: // '•' + *((unsigned short *)(text.getRawData()+(text.size()-2))) = (unsigned short) 0x2022; + break; + case 0x96: // '–' + *((unsigned short *)(text.getRawData()+(text.size()-2))) = (unsigned short) 0x2013; + break; + case 0x97: // '—' + *((unsigned short *)(text.getRawData()+(text.size()-2))) = (unsigned short) 0x2014; + break; + case 0x98: // '˜' + *((unsigned short *)(text.getRawData()+(text.size()-2))) = (unsigned short) 0x02DC; + break; + case 0x99: // '™' + *((unsigned short *)(text.getRawData()+(text.size()-2))) = (unsigned short) 0x2122; + break; + case 0x9A: // 'š' + *((unsigned short *)(text.getRawData()+(text.size()-2))) = (unsigned short) 0x0161; + break; + case 0x9B: // '›' + *((unsigned short *)(text.getRawData()+(text.size()-2))) = (unsigned short) 0x203A; + break; + case 0x9C: // 'œ' + *((unsigned short *)(text.getRawData()+(text.size()-2))) = (unsigned short) 0x0153; + break; + case 0x9E: // 'ž' + *((unsigned short *)(text.getRawData()+(text.size()-2))) = (unsigned short) 0x017E; + break; + case 0x9F: // 'Ÿ' + *((unsigned short *)(text.getRawData()+(text.size()-2))) = (unsigned short) 0x0178; + break; + default: + *((unsigned short *)(text.getRawData()+(text.size()-2))) = (unsigned short) *from; + } + } + return 0; +} + +SWORD_NAMESPACE_END diff --git a/src/modules/filters/latin1utf8.cpp b/src/modules/filters/latin1utf8.cpp new file mode 100644 index 0000000..6c0d7f1 --- /dev/null +++ b/src/modules/filters/latin1utf8.cpp @@ -0,0 +1,173 @@ +/****************************************************************************** + * + * Latin1UTF8 - SWFilter descendant to convert a Latin-1 character to UTF-8 + * + */ + + +#include +#include +#include +#include + +SWORD_NAMESPACE_START + +Latin1UTF8::Latin1UTF8() { +} + + +char Latin1UTF8::processText(SWBuf &text, const SWKey *key, const SWModule *module) +{ + const unsigned char *from; + + if ((unsigned long)key < 2) // hack, we're en(1)/de(0)ciphering + return (char)-1; + + SWBuf orig = text; + from = (const unsigned char *)orig.c_str(); + + for (text = ""; *from; from++) { + if (*from < 0x80) { + text += *from; + } + else if (*from < 0xc0) { + switch(*from) { + case 0x80: // '€' + text += 0xe2; // 'â' + text += 0x82; // '‚' + text += 0xac; // '¬' + break; + case 0x82: // '‚' + text += 0xe2; // 'â' + text += 0x80; // '€' + text += 0x9a; // 'š' + break; + case 0x83: // 'ƒ' + text += 0xc6; // 'Æ' + text += 0x92; // '’' + break; + case 0x84: // '„' + text += 0xe2; // 'â' + text += 0x80; // '€' + text += 0x9e; // 'ž' + break; + case 0x85: // '…' + text += 0xe2; // 'â' + text += 0x80; // '€' + text += 0xa6; // '¦' + break; + case 0x86: // '†' + text += 0xe2; // 'â' + text += 0x80; // '€' + text += 0xa0; // ' ' + break; + case 0x87: // '‡' + text += 0xe2; // 'â' + text += 0x80; // '€' + text += 0xa1; // '¡' + break; + case 0x88: // 'ˆ' + text += 0xcb; // 'Ë' + text += 0x86; // '†' + break; + case 0x89: // '‰' + text += 0xe2; // 'â' + text += 0x80; // '€' + text += 0xb0; // '°' + break; + case 0x8A: // 'Š' + text += 0xc5; // 'Å' + text += 0xa0; // ' ' + break; + case 0x8B: // '‹' + text += 0xe2; // 'â' + text += 0x80; // '€' + text += 0xb9; // '¹' + break; + case 0x8C: // 'Œ' + text += 0xc5; // 'Å' + text += 0x92; // '’' + break; + case 0x8E: // 'Ž' + text += 0xc5; // 'Å' + text += 0xbd; // '½' + break; + case 0x91: // '‘' + text += 0xe2; // 'â' + text += 0x80; // '€' + text += 0x98; // '˜' + break; + case 0x92: // '’' + text += 0xe2; // 'â' + text += 0x80; // '€' + text += 0x99; // '™' + break; + case 0x93: // '“' + text += 0xe2; // 'â' + text += 0x80; // '€' + text += 0x9c; // 'œ' + break; + case 0x94: // '”' + text += 0xe2; // 'â' + text += 0x80; // '€' + text += 0x9d; // '' + break; + case 0x95: // '•' + text += 0xe2; // 'â' + text += 0x80; // '€' + text += 0xa2; // '¢' + break; + case 0x96: // '–' + text += 0xe2; // 'â' + text += 0x80; // '€' + text += 0x93; // '“' + break; + case 0x97: // '—' + text += 0xe2; // 'â' + text += 0x80; // '€' + text += 0x94; // '”' + break; + case 0x98: // '˜' + text += 0xcb; // 'Ë' + text += 0x9c; // 'œ' + break; + case 0x99: // '™' + text += 0xe2; // 'â' + text += 0x84; // '„' + text += 0xa2; // '¢' + break; + case 0x9A: // 'š' + text += 0xc5; // 'Å' + text += 0xa1; // '¡' + break; + case 0x9B: // '›' + text += 0xe2; // 'â' + text += 0x80; // '€' + text += 0xba; // 'º' + break; + case 0x9C: // 'œ' + text += 0xc5; // 'Å' + text += 0x93; // '“' + break; + case 0x9E: // 'ž' + text += 0xc5; // 'Å' + text += 0xbe; // '¾' + break; + case 0x9F: // 'Ÿ' + text += 0xc5; // 'Å' + text += 0xb8; // '¸' + break; + default: + text += 0xC2; + text += *from; + } + } + else { + text += 0xC3; + text += (*from - 0x40); + } + } + return 0; +} + +SWORD_NAMESPACE_END diff --git a/src/modules/filters/osisfootnotes.cpp b/src/modules/filters/osisfootnotes.cpp new file mode 100644 index 0000000..89c9c40 --- /dev/null +++ b/src/modules/filters/osisfootnotes.cpp @@ -0,0 +1,157 @@ +/****************************************************************************** + * + * osisfootnotes - SWFilter descendant to hide or show footnotes + * in an OSIS module. + */ + + +#include +#include +#include +#include +#include +#include +#include +#include + +SWORD_NAMESPACE_START + +const char oName[] = "Footnotes"; +const char oTip[] = "Toggles Footnotes On and Off if they exist"; + +const SWBuf choices[3] = {"Off", "On", ""}; +const StringList oValues(&choices[0], &choices[2]); + + +OSISFootnotes::OSISFootnotes() : SWOptionFilter(oName, oTip, &oValues) { + setOptionValue("Off"); +} + + +OSISFootnotes::~OSISFootnotes() { +} + + +char OSISFootnotes::processText(SWBuf &text, const SWKey *key, const SWModule *module) { + SWBuf token; + bool intoken = false; + bool hide = false; + SWBuf tagText; + XMLTag startTag; + SWBuf refs = ""; + int footnoteNum = 1; + char buf[254]; + VerseKey parser(key->getText()); + + SWBuf orig = text; + const char *from = orig.c_str(); + + XMLTag tag; + bool strongsMarkup = false; + + + for (text = ""; *from; ++from) { + + // remove all newlines temporarily to fix kjv2003 module + if ((*from == 10) || (*from == 13)) { + if ((text.length()>1) && (text[text.length()-2] != ' ') && (*(from+1) != ' ')) + text.append(' '); + continue; + } + + + if (*from == '<') { + intoken = true; + token = ""; + continue; + } + + + + if (*from == '>') { // process tokens + intoken = false; + if (!strncmp(token, "note", 4) || !strncmp(token.c_str(), "/note", 5)) { + tag = token; + + if (!tag.isEndTag()) { + if (tag.getAttribute("type") && (!strcmp("x-strongsMarkup", tag.getAttribute("type")) + || !strcmp("strongsMarkup", tag.getAttribute("type"))) // deprecated + ) { + tag.setEmpty(false); // handle bug in KJV2003 module where some note open tags were + strongsMarkup = true; + } + + if (!tag.isEmpty()) { +// if ((!tag.isEmpty()) || (SWBuf("strongsMarkup") == tag.getAttribute("type"))) { + refs = ""; + startTag = tag; + hide = true; + tagText = ""; + continue; + } + } + if (hide && tag.isEndTag()) { + if (module->isProcessEntryAttributes() && !strongsMarkup) { //don`t parse strongsMarkup to EntryAttributes as Footnote + sprintf(buf, "%i", footnoteNum++); + StringList attributes = startTag.getAttributeNames(); + for (StringList::const_iterator it = attributes.begin(); it != attributes.end(); it++) { + module->getEntryAttributes()["Footnote"][buf][it->c_str()] = startTag.getAttribute(it->c_str()); + } + module->getEntryAttributes()["Footnote"][buf]["body"] = tagText; + startTag.setAttribute("swordFootnote", buf); + if ((startTag.getAttribute("type")) && (!strcmp(startTag.getAttribute("type"), "crossReference"))) { + if (!refs.length()) + refs = parser.ParseVerseList(tagText.c_str(), parser, true).getRangeText(); + module->getEntryAttributes()["Footnote"][buf]["refList"] = refs.c_str(); + } + } + hide = false; + if (option || (startTag.getAttribute("type") && !strcmp(startTag.getAttribute("type"), "crossReference"))) { // we want the tag in the text; crossReferences are handled by another filter + text.append(startTag); +// text.append(tagText); // we don't put the body back in because it is retrievable from EntryAttributes["Footnotes"][]["body"]. + } + else continue; + } + strongsMarkup = false; + } + + // if not a heading token, keep token in text + //if ((!strcmp(tag.getName(), "reference")) && (!tag.isEndTag())) { + // SWBuf osisRef = tag.getAttribute("osisRef"); + if (!strncmp(token, "reference", 9)) { + if (refs.length()) { + refs.append("; "); + } + + const char* attr = strstr(token.c_str() + 9, "osisRef=\""); + const char* end = attr ? strchr(attr+9, '"') : 0; + + if (attr && end) { + refs.append(attr+9, end-(attr+9)); + } + } + if (!hide) { + text.append('<'); + text.append(token); + text.append('>'); + } + else { + tagText.append('<'); + tagText.append(token); + tagText.append('>'); + } + continue; + } + if (intoken) { //copy token + token.append(*from); + } + else if (!hide) { //copy text which is not inside a token + text.append(*from); + } + else tagText.append(*from); + } + return 0; +} + +SWORD_NAMESPACE_END + diff --git a/src/modules/filters/osisheadings.cpp b/src/modules/filters/osisheadings.cpp new file mode 100644 index 0000000..a072335 --- /dev/null +++ b/src/modules/filters/osisheadings.cpp @@ -0,0 +1,144 @@ +/****************************************************************************** + * + *osisheadings - SWFilter descendant to hide or show headings + * in an OSIS module. + */ + + +#include +#include +#include +#include +#include +#include + +SWORD_NAMESPACE_START + +const char oName[] = "Headings"; +const char oTip[] = "Toggles Headings On and Off if they exist"; + +const SWBuf choices[3] = {"Off", "On", ""}; +const StringList oValues(&choices[0], &choices[2]); + +OSISHeadings::OSISHeadings() : SWOptionFilter(oName, oTip, &oValues) { + setOptionValue("Off"); +} + + +OSISHeadings::~OSISHeadings() { +} + + +char OSISHeadings::processText(SWBuf &text, const SWKey *key, const SWModule *module) { + SWBuf token; + bool intoken = false; + bool hide = false; + bool preverse = false; + bool withinTitle = false; + bool canonical = false; + SWBuf header; + int headerNum = 0; + int pvHeaderNum = 0; + char buf[254]; + XMLTag startTag; + + SWBuf orig = text; + const char *from = orig.c_str(); + + XMLTag tag; + + for (text = ""; *from; ++from) { + if (*from == '<') { + intoken = true; + token = ""; + + continue; + } + if (*from == '>') { // process tokens + intoken = false; + + if (!strncmp(token.c_str(), "title", 5) || !strncmp(token.c_str(), "/title", 6)) { + withinTitle = (!strnicmp(token.c_str(), "title", 5)); + tag = token; + + if (!tag.isEndTag()) { //start tag + if (!tag.isEmpty()) { + startTag = tag; + } + } + + if ( (tag.getAttribute("subType") && !stricmp(tag.getAttribute("subType"), "x-preverse")) + || (tag.getAttribute("subtype") && !stricmp(tag.getAttribute("subtype"), "x-preverse")) // deprecated + ) { + hide = true; + preverse = true; + header = ""; + canonical = (tag.getAttribute("canonical") && (!stricmp(tag.getAttribute("canonical"), "true"))); + continue; + } + if (!tag.isEndTag()) { //start tag + hide = true; + header = ""; + if (option || canonical) { // we want the tag in the text + text.append('<'); + text.append(token); + text.append('>'); + } + continue; + } + if (hide && tag.isEndTag()) { + if (module->isProcessEntryAttributes() && ((option || canonical) || (!preverse))) { + if (preverse) { + sprintf(buf, "%i", pvHeaderNum++); + module->getEntryAttributes()["Heading"]["Preverse"][buf] = header; + } + else { + sprintf(buf, "%i", headerNum++); + module->getEntryAttributes()["Heading"]["Interverse"][buf] = header; + if (option || canonical) { // we want the tag in the text + text.append(header); + } + } + + StringList attributes = startTag.getAttributeNames(); + for (StringList::const_iterator it = attributes.begin(); it != attributes.end(); it++) { + module->getEntryAttributes()["Heading"][buf][it->c_str()] = startTag.getAttribute(it->c_str()); + } + } + + hide = false; + if (!(option || canonical) || preverse) { // we don't want the tag in the text anymore + preverse = false; + continue; + } + preverse = false; + } + } + + if (withinTitle) { + header.append('<'); + header.append(token); + header.append('>'); + } else { + // if not a heading token, keep token in text + if (!hide) { + text.append('<'); + text.append(token); + text.append('>'); + } + } + continue; + } + if (intoken) { //copy token + token.append(*from); + } + else if (!hide) { //copy text which is not inside a token + text.append(*from); + } + else header.append(*from); + } + return 0; +} + +SWORD_NAMESPACE_END + diff --git a/src/modules/filters/osishtmlhref.cpp b/src/modules/filters/osishtmlhref.cpp new file mode 100644 index 0000000..fe3e058 --- /dev/null +++ b/src/modules/filters/osishtmlhref.cpp @@ -0,0 +1,561 @@ +/*************************************************************************** + osishtmlhref.cpp - OSIS to HTML with hrefs filter + ------------------- + begin : 2003-06-24 + copyright : 2003 by CrossWire Bible Society + ***************************************************************************/ + +/*************************************************************************** + * * + * This program is free software; you can redistribute it and/or modify * + * it under the terms of the GNU General Public License as published by * + * the Free Software Foundation version 2 of the License. * + * * + ***************************************************************************/ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +SWORD_NAMESPACE_START + +class OSISHTMLHREF::QuoteStack : public std::stack { +}; + +OSISHTMLHREF::MyUserData::MyUserData(const SWModule *module, const SWKey *key) : BasicFilterUserData(module, key) { + inBold = false; + inXRefNote = false; + suspendLevel = 0; + quoteStack = new QuoteStack(); + wordsOfChristStart = " "; + wordsOfChristEnd = " "; + if (module) { + osisQToTick = ((!module->getConfigEntry("OSISqToTick")) || (strcmp(module->getConfigEntry("OSISqToTick"), "false"))); + version = module->Name(); + BiblicalText = (!strcmp(module->Type(), "Biblical Texts")); + } + else { + osisQToTick = true; // default + version = ""; + } +} + +OSISHTMLHREF::MyUserData::~MyUserData() { + // Just in case the quotes are not well formed + while (!quoteStack->empty()) { + char *tagData = quoteStack->top(); + quoteStack->pop(); + delete [] tagData; + } + delete quoteStack; +} + +OSISHTMLHREF::OSISHTMLHREF() { + setTokenStart("<"); + setTokenEnd(">"); + + setEscapeStart("&"); + setEscapeEnd(";"); + + setEscapeStringCaseSensitive(true); + setPassThruNumericEscapeString(true); + + addAllowedEscapeString("quot"); + addAllowedEscapeString("apos"); + addAllowedEscapeString("amp"); + addAllowedEscapeString("lt"); + addAllowedEscapeString("gt"); + + setTokenCaseSensitive(true); + + addTokenSubstitute("lg", "
"); + addTokenSubstitute("/lg", "
"); + + morphFirst = false; +} + +// though this might be slightly slower, possibly causing an extra bool check, this is a renderFilter +// so speed isn't the absolute highest priority, and this is a very minor possible hit +static inline void outText(const char *t, SWBuf &o, BasicFilterUserData *u) { if (!u->suspendTextPassThru) o += t; else u->lastSuspendSegment += t; } +static inline void outText(char t, SWBuf &o, BasicFilterUserData *u) { if (!u->suspendTextPassThru) o += t; else u->lastSuspendSegment += t; } + +void processLemma(bool suspendTextPassThru, XMLTag &tag, SWBuf &buf) { + const char *attrib; + const char *val; + if ((attrib = tag.getAttribute("lemma"))) { + int count = tag.getAttributePartCount("lemma", ' '); + int i = (count > 1) ? 0 : -1; // -1 for whole value cuz it's faster, but does the same thing as 0 + do { + attrib = tag.getAttribute("lemma", i, ' '); + if (i < 0) i = 0; // to handle our -1 condition + val = strchr(attrib, ':'); + val = (val) ? (val + 1) : attrib; + SWBuf gh; + if(*val == 'G') + gh = "Greek"; + if(*val == 'H') + gh = "Hebrew"; + const char *val2 = val; + if ((strchr("GH", *val)) && (isdigit(val[1]))) + val2++; + //if ((!strcmp(val2, "3588")) && (lastText.length() < 1)) + // show = false; + //else { + if (!suspendTextPassThru) { + buf.appendFormatted("<%s>", + (gh.length()) ? gh.c_str() : "", + URL::encode(val2).c_str(), + val2); + } + //} + + } while (++i < count); + } +} + +void processMorph(bool suspendTextPassThru, XMLTag &tag, SWBuf &buf) { + const char * attrib; + const char *val; + if ((attrib = tag.getAttribute("morph"))) { // && (show)) { + SWBuf savelemma = tag.getAttribute("savlm"); + //if ((strstr(savelemma.c_str(), "3588")) && (lastText.length() < 1)) + // show = false; + //if (show) { + int count = tag.getAttributePartCount("morph", ' '); + int i = (count > 1) ? 0 : -1; // -1 for whole value cuz it's faster, but does the same thing as 0 + do { + attrib = tag.getAttribute("morph", i, ' '); + if (i < 0) i = 0; // to handle our -1 condition + val = strchr(attrib, ':'); + val = (val) ? (val + 1) : attrib; + const char *val2 = val; + if ((*val == 'T') && (strchr("GH", val[1])) && (isdigit(val[2]))) + val2+=2; + if (!suspendTextPassThru) { + buf.appendFormatted("(%s)", + URL::encode(tag.getAttribute("morph")).c_str(), + URL::encode(val).c_str(), + val2); + } + } while (++i < count); + //} + } +} + +bool OSISHTMLHREF::handleToken(SWBuf &buf, const char *token, BasicFilterUserData *userData) { + MyUserData *u = (MyUserData *)userData; + SWBuf scratch; + bool sub = (u->suspendTextPassThru) ? substituteToken(scratch, token) : substituteToken(buf, token); + if (!sub) { + // manually process if it wasn't a simple substitution + XMLTag tag(token); + + // tag + if (!strcmp(tag.getName(), "w")) { + + // start tag + if ((!tag.isEmpty()) && (!tag.isEndTag())) { + u->w = token; + } + + // end or empty tag + else { + bool endTag = tag.isEndTag(); + SWBuf lastText; + //bool show = true; // to handle unplaced article in kjv2003-- temporary till combined + + if (endTag) { + tag = u->w.c_str(); + lastText = u->lastTextNode.c_str(); + } + else lastText = "stuff"; + + const char *attrib; + const char *val; + if ((attrib = tag.getAttribute("xlit"))) { + val = strchr(attrib, ':'); + val = (val) ? (val + 1) : attrib; + outText(" ", buf, u); + outText(val, buf, u); + } + if ((attrib = tag.getAttribute("gloss"))) { + val = strchr(attrib, ':'); + val = (val) ? (val + 1) : attrib; + outText(" ", buf, u); + outText(val, buf, u); + } + if (!morphFirst) { + processLemma(u->suspendTextPassThru, tag, buf); + processMorph(u->suspendTextPassThru, tag, buf); + } + else { + processMorph(u->suspendTextPassThru, tag, buf); + processLemma(u->suspendTextPassThru, tag, buf); + } + if ((attrib = tag.getAttribute("POS"))) { + val = strchr(attrib, ':'); + val = (val) ? (val + 1) : attrib; + outText(" ", buf, u); + outText(val, buf, u); + } + + /*if (endTag) + buf += "}";*/ + } + } + + // tag + else if (!strcmp(tag.getName(), "note")) { + if (!tag.isEndTag()) { + SWBuf type = tag.getAttribute("type"); + bool strongsMarkup = (type == "x-strongsMarkup" || type == "strongsMarkup"); // the latter is deprecated + if (strongsMarkup) { + tag.setEmpty(false); // handle bug in KJV2003 module where some note open tags were + } + + if (!tag.isEmpty()) { + + if (!strongsMarkup) { // leave strong's markup notes out, in the future we'll probably have different option filters to turn different note types on or off + SWBuf footnoteNumber = tag.getAttribute("swordFootnote"); + VerseKey *vkey = NULL; + char ch = ((tag.getAttribute("type") && ((!strcmp(tag.getAttribute("type"), "crossReference")) || (!strcmp(tag.getAttribute("type"), "x-cross-ref")))) ? 'x':'n'); + + u->inXRefNote = true; // Why this change? Ben Morgan: Any note can have references in, so we need to set this to true for all notes +// u->inXRefNote = (ch == 'x'); + + // see if we have a VerseKey * or descendant + SWTRY { + vkey = SWDYNAMIC_CAST(VerseKey, u->key); + } + SWCATCH ( ... ) { } + if (vkey) { + //printf("URL = %s\n",URL::encode(vkey->getText()).c_str()); + buf.appendFormatted("*%c", + ch, + URL::encode(footnoteNumber.c_str()).c_str(), + URL::encode(u->version.c_str()).c_str(), + URL::encode(vkey->getText()).c_str(), + ch); + } + else { + buf.appendFormatted("*%c", + ch, + URL::encode(footnoteNumber.c_str()).c_str(), + URL::encode(u->version.c_str()).c_str(), + URL::encode(u->key->getText()).c_str(), + ch); + } + } + } + u->suspendTextPassThru = (++u->suspendLevel); + } + if (tag.isEndTag()) { + u->suspendTextPassThru = (--u->suspendLevel); + u->inXRefNote = false; + u->lastSuspendSegment = ""; // fix/work-around for nasb devineName in note bug + } + } + + //

paragraph tag + else if (!strcmp(tag.getName(), "p")) { + if ((!tag.isEndTag()) && (!tag.isEmpty())) { // non-empty start tag + outText("
", buf, u); + } + else if (tag.isEndTag()) { // end tag + outText("
", buf, u); + userData->supressAdjacentWhitespace = true; + } + else { // empty paragraph break marker + outText("
", buf, u); + userData->supressAdjacentWhitespace = true; + } + } + + // tag + else if (!strcmp(tag.getName(), "reference")) { + if (!u->inXRefNote) { // only show these if we're not in an xref note + if ((!tag.isEndTag()) && (!tag.isEmpty())) { + u->suspendTextPassThru = (++u->suspendLevel); + } + if (tag.isEndTag()) { + if (!u->BiblicalText) { + SWBuf refList = tag.getAttribute("passage"); + if (!refList.length()) + refList = u->lastTextNode; + SWBuf version = tag.getAttribute("version"); + + buf.appendFormatted(" ", + (refList.length()) ? URL::encode(refList.c_str()).c_str() : "", + (version.length()) ? URL::encode(version.c_str()).c_str() : ""); + buf += u->lastTextNode.c_str(); + buf += " "; + } + else { + SWBuf footnoteNumber = tag.getAttribute("swordFootnote"); + VerseKey *vkey = NULL; + // see if we have a VerseKey * or descendant + SWTRY { + vkey = SWDYNAMIC_CAST(VerseKey, u->key); + } + SWCATCH ( ... ) {} + if (vkey) { + // leave this special osis type in for crossReference notes types? Might thml use this some day? Doesn't hurt. + //buf.appendFormatted("*x ", vkey->getText(), footnoteNumber.c_str()); + buf.appendFormatted("*x", + URL::encode(footnoteNumber.c_str()).c_str(), + URL::encode(u->version.c_str()).c_str(), + URL::encode(vkey->getText()).c_str()); + + } + } + u->suspendTextPassThru = (--u->suspendLevel); + } + }/* + if (tag.isEndTag()) { + u->suspendTextPassThru = false; + }*/ + } + + // poetry, etc + else if (!strcmp(tag.getName(), "l")) { + // end line marker + if (tag.getAttribute("eID")) { + outText("
", buf, u); + } + // without eID or sID + // Note: this is improper osis. This should be + else if (tag.isEmpty() && !tag.getAttribute("sID")) { + outText("
", buf, u); + } + // end of the line + else if (tag.isEndTag()) { + outText("
", buf, u); + } + } + + // + else if (!strcmp(tag.getName(), "lb")) { + outText("
", buf, u); + userData->supressAdjacentWhitespace = true; + } + // + // + // + else if ((!strcmp(tag.getName(), "milestone")) && (tag.getAttribute("type"))) { + if(!strcmp(tag.getAttribute("type"), "line")) { + outText("
", buf, u); + userData->supressAdjacentWhitespace = true; + } + else if(!strcmp(tag.getAttribute("type"),"x-p")) { + if( tag.getAttribute("marker")) + outText(tag.getAttribute("marker"), buf, u); + else outText("", buf, u); + } + else if (!strcmp(tag.getAttribute("type"), "cQuote")) { + const char *tmp = tag.getAttribute("marker"); + bool hasMark = tmp; + SWBuf mark = tmp; + tmp = tag.getAttribute("level"); + int level = (tmp) ? atoi(tmp) : 1; + + // first check to see if we've been given an explicit mark + if (hasMark) + outText(mark, buf, u); + // finally, alternate " and ', if config says we should supply a mark + else if (u->osisQToTick) + outText((level % 2) ? '\"' : '\'', buf, u); + } + } + + // + else if (!strcmp(tag.getName(), "title")) { + if ((!tag.isEndTag()) && (!tag.isEmpty())) { + outText("<b>", buf, u); + } + else if (tag.isEndTag()) { + outText("</b><br />", buf, u); + } + } + + // <catchWord> & <rdg> tags (italicize) + else if (!strcmp(tag.getName(), "rdg") || !strcmp(tag.getName(), "catchWord")) { + if ((!tag.isEndTag()) && (!tag.isEmpty())) { + outText("<i>", buf, u); + } + else if (tag.isEndTag()) { + outText("</i>", buf, u); + } + } + + // divineName + else if (!strcmp(tag.getName(), "divineName")) { + if ((!tag.isEndTag()) && (!tag.isEmpty())) { + u->suspendTextPassThru = (++u->suspendLevel); + } + else if (tag.isEndTag()) { + SWBuf lastText = u->lastSuspendSegment.c_str(); + u->suspendTextPassThru = (--u->suspendLevel); + if (lastText.size()) { + toupperstr(lastText); + scratch.setFormatted("%c<font size=\"-1\">%s</font>", lastText[0], lastText.c_str()+1); + outText(scratch.c_str(), buf, u); + } + } + } + + // <hi> text highlighting + else if (!strcmp(tag.getName(), "hi")) { + SWBuf type = tag.getAttribute("type"); + if ((!tag.isEndTag()) && (!tag.isEmpty())) { + if (type == "b" || type == "x-b") { + outText("<b>", buf, u); + u->inBold = true; + } + else { // all other types + outText("<i>", buf, u); + u->inBold = false; + } + } + else if (tag.isEndTag()) { + if(u->inBold) { + outText("</b>", buf, u); + u->inBold = false; + } + else outText("</i>", buf, u); + } + } + + // <q> quote + // Rules for a quote element: + // If the tag is empty with an sID or an eID then use whatever it specifies for quoting. + // Note: empty elements without sID or eID are ignored. + // If the tag is <q> then use it's specifications and push it onto a stack for </q> + // If the tag is </q> then use the pushed <q> for specification + // If there is a marker attribute, possibly empty, this overrides osisQToTick. + // If osisQToTick, then output the marker, using level to determine the type of mark. + else if (!strcmp(tag.getName(), "q")) { + SWBuf type = tag.getAttribute("type"); + SWBuf who = tag.getAttribute("who"); + const char *tmp = tag.getAttribute("level"); + int level = (tmp) ? atoi(tmp) : 1; + tmp = tag.getAttribute("marker"); + bool hasMark = tmp; + SWBuf mark = tmp; + + // open <q> or <q sID... /> + if ((!tag.isEmpty() && !tag.isEndTag()) || (tag.isEmpty() && tag.getAttribute("sID"))) { + // if <q> then remember it for the </q> + if (!tag.isEmpty()) { + char *tagData = 0; + stdstr(&tagData, tag.toString()); + u->quoteStack->push(tagData); + } + + // Do this first so quote marks are included as WoC + if (who == "Jesus") + outText(u->wordsOfChristStart, buf, u); + + // first check to see if we've been given an explicit mark + if (hasMark) + outText(mark, buf, u); + //alternate " and ' + else if (u->osisQToTick) + outText((level % 2) ? '\"' : '\'', buf, u); + } + // close </q> or <q eID... /> + else if ((tag.isEndTag()) || (tag.isEmpty() && tag.getAttribute("eID"))) { + // if it is </q> then pop the stack for the attributes + if (tag.isEndTag() && !u->quoteStack->empty()) { + char *tagData = u->quoteStack->top(); + u->quoteStack->pop(); + XMLTag qTag(tagData); + delete [] tagData; + + type = qTag.getAttribute("type"); + who = qTag.getAttribute("who"); + tmp = qTag.getAttribute("level"); + level = (tmp) ? atoi(tmp) : 1; + tmp = qTag.getAttribute("marker"); + hasMark = tmp; + mark = tmp; + } + + // first check to see if we've been given an explicit mark + if (hasMark) + outText(mark, buf, u); + // finally, alternate " and ', if config says we should supply a mark + else if (u->osisQToTick) + outText((level % 2) ? '\"' : '\'', buf, u); + + // Do this last so quote marks are included as WoC + if (who == "Jesus") + outText(u->wordsOfChristEnd, buf, u); + } + } + + // <transChange> + else if (!strcmp(tag.getName(), "transChange")) { + if ((!tag.isEndTag()) && (!tag.isEmpty())) { + SWBuf type = tag.getAttribute("type"); + u->lastTransChange = type; + + // just do all transChange tags this way for now + if ((type == "added") || (type == "supplied")) + outText("<i>", buf, u); + else if (type == "tenseChange") + buf += "*"; + } + else if (tag.isEndTag()) { + SWBuf type = u->lastTransChange; + if ((type == "added") || (type == "supplied")) + outText("</i>", buf, u); + } + else { // empty transChange marker? + } + } + + // image + else if (!strcmp(tag.getName(), "figure")) { + const char *src = tag.getAttribute("src"); + if (!src) // assert we have a src attribute + return false; + + SWBuf filepath; + if (userData->module) { + filepath = userData->module->getConfigEntry("AbsoluteDataPath"); + if ((filepath.size()) && (filepath[filepath.size()-1] != '/') && (src[0] != '/')) + filepath += '/'; + } + filepath += src; + + // images become clickable, if the UI supports showImage. + outText("<a href=\"passagestudy.jsp?action=showImage&value=", buf, u); + outText(URL::encode(filepath.c_str()).c_str(), buf, u); + outText("&module=", buf, u); + outText(URL::encode(u->version.c_str()).c_str(), buf, u); + outText("\">", buf, u); + +// we do this because BibleCS looks for this EXACT format for an image tag + outText("<image border=0 src=\"", buf, u); + outText(filepath, buf, u); + outText("\" />", buf, u); + + outText("</a>", buf, u); + } + + else { + return false; // we still didn't handle token + } + } + return true; +} + + +SWORD_NAMESPACE_END diff --git a/src/modules/filters/osislemma.cpp b/src/modules/filters/osislemma.cpp new file mode 100644 index 0000000..f5e6ff6 --- /dev/null +++ b/src/modules/filters/osislemma.cpp @@ -0,0 +1,85 @@ +/****************************************************************************** + * + * osislemma - SWFilter descendant to hide or show lemmata + * in a OSIS module. + */ + + +#include <stdlib.h> +#include <osislemma.h> +#include <utilxml.h> + +SWORD_NAMESPACE_START + +const char oName[] = "Lemmas"; +const char oTip[] = "Toggles Lemmas On and Off if they exist"; + +const SWBuf choices[3] = {"Off", "On", ""}; +const StringList oValues(&choices[0], &choices[2]); + +OSISLemma::OSISLemma() : SWOptionFilter(oName, oTip, &oValues) { + setOptionValue("Off"); +} + + +OSISLemma::~OSISLemma() { +} + + +char OSISLemma::processText(SWBuf &text, const SWKey *key, const SWModule *module) { + SWBuf token; + bool intoken = false; + bool lastspace = false; + + const SWBuf orig = text; + const char * from = orig.c_str(); + + if (!option) { + for (text = ""; *from; ++from) { + if (*from == '<') { + intoken = true; + token = ""; + continue; + } + if (*from == '>') { // process tokens + intoken = false; + if (token.startsWith("w ")) { // Word + XMLTag wtag(token); + int count = wtag.getAttributePartCount("lemma", ' '); + for (int i = 0; i < count; i++) { + SWBuf a = wtag.getAttribute("lemma", i, ' '); + const char *prefix = a.stripPrefix(':'); + if ((!prefix) || ((SWBuf)prefix).startsWith("lemma.")) { + // remove attribute part + wtag.setAttribute("lemma", 0, i, ' '); + i--; + count--; + } + } + token = wtag; + token.trim(); + // drop <> + token << 1; + token--; + } + + // keep token in text + text.append('<'); + text.append(token); + text.append('>'); + + continue; + } + if (intoken) { + token += *from; + } + else { + text.append(*from); + lastspace = (*from == ' '); + } + } + } + return 0; +} + +SWORD_NAMESPACE_END diff --git a/src/modules/filters/osismorph.cpp b/src/modules/filters/osismorph.cpp new file mode 100644 index 0000000..69d44d5 --- /dev/null +++ b/src/modules/filters/osismorph.cpp @@ -0,0 +1,85 @@ +/****************************************************************************** + * + * osismorph - SWFilter descendant to hide or show morph tags + * in a OSIS module. + */ + + +#include <stdlib.h> +#include <osismorph.h> + +SWORD_NAMESPACE_START + +const char oName[] = "Morphological Tags"; +const char oTip[] = "Toggles Morphological Tags On and Off if they exist"; + +const SWBuf choices[3] = {"Off", "On", ""}; +const StringList oValues(&choices[0], &choices[2]); + +OSISMorph::OSISMorph() : SWOptionFilter(oName, oTip, &oValues) { + setOptionValue("Off"); +} + + +OSISMorph::~OSISMorph() { +} + + +char OSISMorph::processText(SWBuf &text, const SWKey *key, const SWModule *module) { + if (!option) { // if we don't want morph tags + char token[2048]; // cheese. Fix. + int tokpos = 0; + bool intoken = false; + bool lastspace = false; + SWBuf orig = text; + const char *from = orig.c_str(); + + //taken out of the loop for speed + const char* start = 0; + const char* end = 0; + + for (text = ""; *from; ++from) { + if (*from == '<') { + intoken = true; + tokpos = 0; + token[0] = 0; + continue; + } + if (*from == '>') { // process tokens + intoken = false; + + if ((*token == 'w') && (token[1] == ' ')) { + start = strstr(token+2, "morph=\""); //we leave out "w " at the start + end = start ? strchr(start+7, '"') : 0; //search the end of the morph value + + if (start && end) { //start and end of the morph tag found + text.append('<'); + text.append(token, start-token); //the text before the morph attr + text.append(end+1); //text after the morph attr + text.append('>'); + + continue; //next loop + } + } + + text.append('<'); + text.append(token); + text.append('>'); + + continue; + } + if (intoken) { + if (tokpos < 2045) + token[tokpos++] = *from; + token[tokpos] = 0; + } + else { + text.append(*from); + lastspace = (*from == ' '); + } + } + } + return 0; +} + +SWORD_NAMESPACE_END diff --git a/src/modules/filters/osismorphsegmentation.cpp b/src/modules/filters/osismorphsegmentation.cpp new file mode 100644 index 0000000..bf32581 --- /dev/null +++ b/src/modules/filters/osismorphsegmentation.cpp @@ -0,0 +1,106 @@ +/****************************************************************************** + * + * osismorphsegmentation - SWFilter descendant to toggle splitting of morphemes + * (for morpheme segmented Hebrew in the WLC) + */ + + +#include <osismorphsegmentation.h> +#include <stdlib.h> +#include <utilxml.h> +#include <swmodule.h> +#include <swbuf.h> + +SWORD_NAMESPACE_START + +const char oName[] = "Morpheme Segmentation"; +const char oTip[] = "Toggles Morpheme Segmentation On and Off, when present"; + +const SWBuf choices[3] = {"Off", "On", ""}; +const StringList oValues(&choices[0], &choices[2]); + +OSISMorphSegmentation::OSISMorphSegmentation() : SWOptionFilter(oName, oTip, &oValues) { + setOptionValue("Off"); +} + + +OSISMorphSegmentation::~OSISMorphSegmentation() {} + + +char OSISMorphSegmentation::processText(SWBuf &text, const SWKey * /*key*/, const SWModule *module) { + SWBuf token; + bool intoken = false; + bool hide = false; + + SWBuf orig( text ); + const char *from = orig.c_str(); + + XMLTag tag; + SWBuf tagText = ""; + unsigned int morphemeNum = 0; + bool inMorpheme = false; + SWBuf buf; + + for (text = ""; *from; ++from) { + if (*from == '<') { + intoken = true; + token = ""; + continue; + } + + if (*from == '>') { // process tokens + intoken = false; + + if (!strncmp(token.c_str(), "seg ", 4) || !strncmp(token.c_str(), "/seg", 4)) { + tag = token; + + if (!tag.isEndTag() && tag.getAttribute("type") && !strcmp("morph", tag.getAttribute("type"))) { //<seg type="morph"> start tag + hide = !option; //only hide if option is Off + tagText = ""; + inMorpheme = true; + } + + if (tag.isEndTag()) { + buf.setFormatted("%.3d", morphemeNum++); + module->getEntryAttributes()["Morpheme"][buf]["body"] = tagText; + inMorpheme = false; + } + if (hide) { //hides start and end tags as long as hide is set + + if (tag.isEndTag()) { //</seg> + hide = false; + } + + continue; //leave out the current token + } + } //end of seg tag handling + + text.append('<'); + text.append(token); + text.append('>'); + + if (inMorpheme) { + tagText.append('<'); + tagText.append(token); + tagText.append('>'); + } + + hide = false; + + continue; + } //end of intoken part + + if (intoken) { //copy token + token.append(*from); + } + else { //copy text which is not inside of a tag + text.append(*from); + if (inMorpheme) { + tagText.append(*from); + } + } + } + return 0; +} + +SWORD_NAMESPACE_END diff --git a/src/modules/filters/osisosis.cpp b/src/modules/filters/osisosis.cpp new file mode 100644 index 0000000..7da6089 --- /dev/null +++ b/src/modules/filters/osisosis.cpp @@ -0,0 +1,173 @@ +/*************************************************************************** + osisosis.cpp - internal OSIS to public OSIS filter + ------------------- + begin : 2004-03-13 + copyright : 2003 by CrossWire Bible Society + ***************************************************************************/ + +/*************************************************************************** + * * + * This program is free software; you can redistribute it and/or modify * + * it under the terms of the GNU General Public License as published by * + * the Free Software Foundation version 2 of the License. + * * + ***************************************************************************/ + +#include <stdlib.h> +#include <osisosis.h> +#include <utilxml.h> +#include <versekey.h> +#include <swmodule.h> + +SWORD_NAMESPACE_START + + +OSISOSIS::MyUserData::MyUserData(const SWModule *module, const SWKey *key) : BasicFilterUserData(module, key) { + osisQToTick = ((!module->getConfigEntry("OSISqToTick")) || (strcmp(module->getConfigEntry("OSISqToTick"), "false"))); +} + + +OSISOSIS::OSISOSIS() { + setTokenStart("<"); + setTokenEnd(">"); + + setEscapeStart("&"); + setEscapeEnd(";"); + + setEscapeStringCaseSensitive(true); + setPassThruNumericEscapeString(true); + + addAllowedEscapeString("quot"); + addAllowedEscapeString("apos"); + addAllowedEscapeString("amp"); + addAllowedEscapeString("lt"); + addAllowedEscapeString("gt"); + + setTokenCaseSensitive(true); +} + + +char OSISOSIS::processText(SWBuf &text, const SWKey *key, const SWModule *module) { + char status = SWBasicFilter::processText(text, key, module); + VerseKey *vkey = SWDYNAMIC_CAST(VerseKey, key); + if (vkey) { + SWBuf ref = ""; + if (vkey->Verse()) { + ref.appendFormatted("\t\t<verse osisID=\"%s\">", vkey->getOSISRef()); + } + + if (ref.length() > 0) { + + text = ref + text; + + if (vkey->Verse()) { + VerseKey tmp; + tmp = *vkey; + tmp.AutoNormalize(0); + tmp.Headings(1); + + text += "</verse>"; + + tmp = MAXVERSE; + if (*vkey == tmp) { + tmp.Verse(0); +// sprintf(ref, "\t</div>"); +// pushString(&to, ref); + tmp = MAXCHAPTER; + tmp = MAXVERSE; + if (*vkey == tmp) { + tmp.Chapter(0); + tmp.Verse(0); +// sprintf(ref, "\t</div>"); +// pushString(&to, ref); + } + } + } + +// +// else if (vkey->Chapter()) { +// sprintf(ref, "\t<div type=\"chapter\" osisID=\"%s\">", vkey->getOSISRef()); +// } +// else sprintf(ref, "\t<div type=\"book\" osisID=\"%s\">", vkey->getOSISRef()); +// + } + } + return status; +} + +bool OSISOSIS::handleToken(SWBuf &buf, const char *token, BasicFilterUserData *userData) { + // manually process if it wasn't a simple substitution + if (!substituteToken(buf, token)) { + MyUserData *u = (MyUserData *)userData; + XMLTag tag(token); + + if (!tag.isEmpty() && (!tag.isEndTag())) + u->startTag = tag; + + // <w> tag + if (!strcmp(tag.getName(), "w")) { + + // start <w> tag + if ((!tag.isEmpty()) && (!tag.isEndTag())) { + SWBuf attr = tag.getAttribute("lemma"); + if (attr.length()) { + if (!strncmp(attr.c_str(), "x-Strongs:", 10)) { + memcpy(attr.getRawData()+3, "strong", 6); + attr << 3; + tag.setAttribute("lemma", attr); + } + } + attr = tag.getAttribute("morph"); + if (attr.length()) { + if (!strncmp(attr.c_str(), "x-StrongsMorph:", 15)) { + memcpy(attr.getRawData()+3, "strong", 6); + attr << 3; + tag.setAttribute("lemma", attr); + } + if (!strncmp(attr.c_str(), "x-Robinson:", 11)) { + attr[2] = 'r'; + attr << 2; + tag.setAttribute("lemma", attr); + } + } + tag.setAttribute("wn", 0); + tag.setAttribute("savlm", 0); + tag.setAttribute("splitID", 0); + } + buf += tag; + } + + // <note> tag + else if (!strcmp(tag.getName(), "note")) { + if (!tag.isEndTag()) { + SWBuf type = tag.getAttribute("type"); + bool strongsMarkup = (type == "x-strongsMarkup" || type == "strongsMarkup"); // the latter is deprecated + if (strongsMarkup) { + tag.setEmpty(false); // handle bug in KJV2003 module where some note open tags were <note ... /> + } + + if (!tag.isEmpty()) { + tag.setAttribute("swordFootnote", 0); + + if (!strongsMarkup) { + buf += tag; + } + else u->suspendTextPassThru = true; + } + } + if (tag.isEndTag()) { + if (u->suspendTextPassThru == false) + buf+=tag; + else u->suspendTextPassThru = false; + } + } + + else { + return false; // we still didn't handle token + } + } + return true; +} + + +SWORD_NAMESPACE_END diff --git a/src/modules/filters/osisplain.cpp b/src/modules/filters/osisplain.cpp new file mode 100644 index 0000000..62841a6 --- /dev/null +++ b/src/modules/filters/osisplain.cpp @@ -0,0 +1,192 @@ +/*************************************************************************** + osisplain.cpp - OSIS to Plaintext filter + ------------------- + begin : 2003-02-15 + copyright : 2003 by CrossWire Bible Society + ***************************************************************************/ + +/*************************************************************************** + * * + * This program is free software; you can redistribute it and/or modify * + * it under the terms of the GNU General Public License as published by * + * the Free Software Foundation; either version 2 of the License, or * + * (at your option) any later version. * + * * + ***************************************************************************/ + +#include <stdlib.h> +#include <osisplain.h> +#include <ctype.h> +#include <versekey.h> +#include <stringmgr.h> + +SWORD_NAMESPACE_START + +OSISPlain::OSISPlain() { + setTokenStart("<"); + setTokenEnd(">"); + + setEscapeStart("&"); + setEscapeEnd(";"); + + setEscapeStringCaseSensitive(true); + + addEscapeStringSubstitute("amp", "&"); + addEscapeStringSubstitute("apos", "'"); + addEscapeStringSubstitute("lt", "<"); + addEscapeStringSubstitute("gt", ">"); + addEscapeStringSubstitute("quot", "\""); + + setTokenCaseSensitive(true); + addTokenSubstitute("title", "\n"); + addTokenSubstitute("/title", "\n"); + addTokenSubstitute("/l", "\n"); + addTokenSubstitute("lg", "\n"); + addTokenSubstitute("/lg", "\n"); +} + + +bool OSISPlain::handleToken(SWBuf &buf, const char *token, BasicFilterUserData *userData) { + // manually process if it wasn't a simple substitution + if (!substituteToken(buf, token)) { + MyUserData *u = (MyUserData *)userData; + VerseKey *vk = SWDYNAMIC_CAST(VerseKey, u->key); + char testament = (vk) ? vk ->Testament() : 2; // default to NT + if (((*token == 'w') && (token[1] == ' ')) || + ((*token == '/') && (token[1] == 'w') && (!token[2]))) { + u->tag = token; + + bool start = false; + if (*token == 'w') { + if (token[strlen(token)-1] != '/') { + u->w = token; + return true; + } + start = true; + } + u->tag = (start) ? token : u->w.c_str(); + bool show = true; // to handle unplaced article in kjv2003-- temporary till combined + + SWBuf lastText = (start) ? "stuff" : u->lastTextNode.c_str(); + + const char *attrib; + const char *val; + if ((attrib = u->tag.getAttribute("xlit"))) { + val = strchr(attrib, ':'); + val = (val) ? (val + 1) : attrib; + buf.append(" <"); + buf.append(val); + buf.append('>'); + } + if ((attrib = u->tag.getAttribute("gloss"))) { + val = strchr(attrib, ':'); + val = (val) ? (val + 1) : attrib; + buf.append(" <"); + buf.append(val); + buf.append('>'); + } + if ((attrib = u->tag.getAttribute("lemma"))) { + int count = u->tag.getAttributePartCount("lemma", ' '); + int i = (count > 1) ? 0 : -1; // -1 for whole value cuz it's faster, but does the same thing as 0 + do { + char gh; + attrib = u->tag.getAttribute("lemma", i, ' '); + if (i < 0) i = 0; // to handle our -1 condition + val = strchr(attrib, ':'); + val = (val) ? (val + 1) : attrib; + if ((strchr("GH", *val)) && (isdigit(val[1]))) { + gh = *val; + val++; + } + else { + gh = (testament>1) ? 'G' : 'H'; + } + if ((!strcmp(val, "3588")) && (lastText.length() < 1)) + show = false; + else { + buf.append(" <"); + buf.append(gh); + buf.append(val); + buf.append(">"); + } + } while (++i < count); + } + if ((attrib = u->tag.getAttribute("morph")) && (show)) { + int count = u->tag.getAttributePartCount("morph", ' '); + int i = (count > 1) ? 0 : -1; // -1 for whole value cuz it's faster, but does the same thing as 0 + do { + attrib = u->tag.getAttribute("morph", i, ' '); + if (i < 0) i = 0; // to handle our -1 condition + val = strchr(attrib, ':'); + val = (val) ? (val + 1) : attrib; + if ((*val == 'T') && (strchr("GH", val[1])) && (isdigit(val[2]))) + val+=2; + buf.append(" ("); + buf.append(val); + buf.append(')'); + } while (++i < count); + } + if ((attrib = u->tag.getAttribute("POS"))) { + val = strchr(attrib, ':'); + val = (val) ? (val + 1) : attrib; + + buf.append(" <"); + buf.append(val); + buf.append('>'); + } + } + + // <note> tag + else if (!strncmp(token, "note", 4)) { + if (!strstr(token, "strongsMarkup")) { // leave strong's markup notes out, in the future we'll probably have different option filters to turn different note types on or off + buf.append(" ("); + } + else u->suspendTextPassThru = true; + } + else if (!strncmp(token, "/note", 5)) { + if (!u->suspendTextPassThru) + buf.append(')'); + else u->suspendTextPassThru = false; + } + + // <p> paragraph tag + else if (((*token == 'p') && ((token[1] == ' ') || (!token[1]))) || + ((*token == '/') && (token[1] == 'p') && (!token[2]))) { + userData->supressAdjacentWhitespace = true; + buf.append('\n'); + } + + // <lb .../> + else if (!strncmp(token, "lb", 2)) { + userData->supressAdjacentWhitespace = true; + buf.append('\n'); + } + else if (!strncmp(token, "l", 1) && strstr(token, "eID")) { + userData->supressAdjacentWhitespace = true; + buf.append('\n'); + } + else if (!strncmp(token, "/divineName", 11)) { + // Get the end portion of the string, and upper case it + char* end = buf.getRawData(); + end += buf.size() - u->lastTextNode.size(); + toupperstr(end); + } + + // <milestone type="line"/> + else if (!strncmp(token, "milestone", 9)) { + const char* type = strstr(token+10, "type=\""); + if (type && strncmp(type+6, "line", 4)) { //we check for type != line + userData->supressAdjacentWhitespace = true; + buf.append('\n'); + } + } + + else { + return false; // we still didn't handle token + } + } + return true; +} + + +SWORD_NAMESPACE_END diff --git a/src/modules/filters/osisredletterwords.cpp b/src/modules/filters/osisredletterwords.cpp new file mode 100644 index 0000000..727332d --- /dev/null +++ b/src/modules/filters/osisredletterwords.cpp @@ -0,0 +1,85 @@ +/****************************************************************************** + * + * OSISRedLetterWords - SWFilter descendant to toggle red coloring for words + * of Christ in an OSIS module. + */ + + +#include <stdlib.h> +#include <osisredletterwords.h> +#include <swmodule.h> + + +SWORD_NAMESPACE_START + +const char oName[] = "Words of Christ in Red"; +const char oTip[] = "Toggles Red Coloring for Words of Christ On and Off if they are marked"; + +const SWBuf choices[3] = {"Off", "On", ""}; +const StringList oValues(&choices[0], &choices[2]); + +OSISRedLetterWords::OSISRedLetterWords() : SWOptionFilter(oName, oTip, &oValues) { + setOptionValue("On"); +} + + +OSISRedLetterWords::~OSISRedLetterWords() { +} + + +char OSISRedLetterWords::processText(SWBuf &text, const SWKey *key, const SWModule *module) { + if (option) //leave in the red lettered words + return 0; + + SWBuf token; + bool intoken = false; + + SWBuf orig = text; + const char *from = orig.c_str(); + + //taken out of the loop + const char* start = 0; + const char* end = 0; + + for (text = ""; *from; from++) { + if (*from == '<') { + intoken = true; + token = ""; + continue; + } + else if (*from == '>') { // process tokens + intoken = false; + + if ((token[0] == 'q') && (token[1] == ' ')) { //q tag + start = strstr(token.c_str(), " who=\"Jesus\""); + if (start && (strlen(start) >= 12)) { //we found a quote of Jesus Christ + end = start+12; //marks the end of the who attribute value + + text.append('<'); + text.append(token, start - (token.c_str())); //the text before the who attr + text.append(end, token.c_str() + token.length() - end); //text after the who attr + text.append('>'); + + continue; + } + } + + //token not processed, append it. We don't want to alter the text + text.append('<'); + text.append(token); + text.append('>'); + continue; + } + + if (intoken) { //copy token + token.append(*from); + } + else { //copy text which is not inside a token + text.append(*from); + } + } + return 0; +} + +SWORD_NAMESPACE_END + diff --git a/src/modules/filters/osisrtf.cpp b/src/modules/filters/osisrtf.cpp new file mode 100644 index 0000000..0352335 --- /dev/null +++ b/src/modules/filters/osisrtf.cpp @@ -0,0 +1,520 @@ +/*************************************************************************** + osisrtf.cpp - OSIS to RTF filter + ------------------- + begin : 2003-02-15 + copyright : 2003 by CrossWire Bible Society + ***************************************************************************/ + +/*************************************************************************** + * * + * This program is free software; you can redistribute it and/or modify * + * it under the terms of the GNU General Public License as published by * + * the Free Software Foundation version 2 of the License. * + * * + ***************************************************************************/ + +#include <stdlib.h> +#include <ctype.h> +#include <osisrtf.h> +#include <utilxml.h> +#include <utilstr.h> +#include <versekey.h> +#include <swmodule.h> +#include <stringmgr.h> +#include <stack> + +SWORD_NAMESPACE_START + +namespace { + class MyUserData : public BasicFilterUserData { + public: + bool osisQToTick; + bool BiblicalText; + bool inXRefNote; + int suspendLevel; + std::stack<char *> quoteStack; + SWBuf w; + SWBuf version; + MyUserData(const SWModule *module, const SWKey *key); + ~MyUserData(); + }; + + + MyUserData::MyUserData(const SWModule *module, const SWKey *key) : BasicFilterUserData(module, key) { + inXRefNote = false; + BiblicalText = false; + suspendLevel = 0; + if (module) { + version = module->Name(); + BiblicalText = (!strcmp(module->Type(), "Biblical Texts")); + } + osisQToTick = ((!module->getConfigEntry("OSISqToTick")) || (strcmp(module->getConfigEntry("OSISqToTick"), "false"))); + } + + + MyUserData::~MyUserData() { + // Just in case the quotes are not well formed + while (!quoteStack.empty()) { + char *tagData = quoteStack.top(); + quoteStack.pop(); + delete [] tagData; + } + } +static inline void outText(const char *t, SWBuf &o, BasicFilterUserData *u) { if (!u->suspendTextPassThru) o += t; else u->lastSuspendSegment += t; } +static inline void outText(char t, SWBuf &o, BasicFilterUserData *u) { if (!u->suspendTextPassThru) o += t; else u->lastSuspendSegment += t; } +}; + + +OSISRTF::OSISRTF() { + setTokenStart("<"); + setTokenEnd(">"); + + setEscapeStart("&"); + setEscapeEnd(";"); + + setEscapeStringCaseSensitive(true); + + addEscapeStringSubstitute("amp", "&"); + addEscapeStringSubstitute("apos", "'"); + addEscapeStringSubstitute("lt", "<"); + addEscapeStringSubstitute("gt", ">"); + addEscapeStringSubstitute("quot", "\""); + addTokenSubstitute("lg", "{\\par}"); + addTokenSubstitute("/lg", "{\\par}"); + + setTokenCaseSensitive(true); +} + + +BasicFilterUserData *OSISRTF::createUserData(const SWModule *module, const SWKey *key) { + return new MyUserData(module, key); +} + + +char OSISRTF::processText(SWBuf &text, const SWKey *key, const SWModule *module) { + + // preprocess text buffer to escape RTF control codes + const char *from; + SWBuf orig = text; + from = orig.c_str(); + for (text = ""; *from; from++) { //loop to remove extra spaces + switch (*from) { + case '{': + case '}': + case '\\': + text += "\\"; + text += *from; + break; + default: + text += *from; + } + } + text += (char)0; + + SWBasicFilter::processText(text, key, module); //handle tokens as usual + + orig = text; + from = orig.c_str(); + for (text = ""; *from; from++) { //loop to remove extra spaces + if ((strchr(" \t\n\r", *from))) { + while (*(from+1) && (strchr(" \t\n\r", *(from+1)))) { + from++; + } + text += " "; + } + else { + text += *from; + } + } + text += (char)0; // probably not needed, but don't want to remove without investigating (same as above) + return 0; +} + + +bool OSISRTF::handleToken(SWBuf &buf, const char *token, BasicFilterUserData *userData) { + // manually process if it wasn't a simple substitution + MyUserData *u = (MyUserData *)userData; + SWBuf scratch; + bool sub = (u->suspendTextPassThru) ? substituteToken(scratch, token) : substituteToken(buf, token); + if (!sub) { + XMLTag tag(token); + + // <w> tag + if (!strcmp(tag.getName(), "w")) { + + // start <w> tag + if ((!tag.isEmpty()) && (!tag.isEndTag())) { + outText('{', buf, u); + u->w = token; + } + + // end or empty <w> tag + else { + bool endTag = tag.isEndTag(); + SWBuf lastText; + bool show = true; // to handle unplaced article in kjv2003-- temporary till combined + + if (endTag) { + tag = u->w.c_str(); + lastText = u->lastTextNode.c_str(); + } + else lastText = "stuff"; + + const char *attrib; + const char *val; + if ((attrib = tag.getAttribute("xlit"))) { + val = strchr(attrib, ':'); + val = (val) ? (val + 1) : attrib; + scratch.setFormatted(" {\\fs15 <%s>}", val); + outText(scratch.c_str(), buf, u); + } + if ((attrib = tag.getAttribute("gloss"))) { + val = strchr(attrib, ':'); + val = (val) ? (val + 1) : attrib; + scratch.setFormatted(" {\\fs15 <%s>}", val); + outText(scratch.c_str(), buf, u); + } + if ((attrib = tag.getAttribute("lemma"))) { + int count = tag.getAttributePartCount("lemma", ' '); + int i = (count > 1) ? 0 : -1; // -1 for whole value cuz it's faster, but does the same thing as 0 + do { + attrib = tag.getAttribute("lemma", i, ' '); + if (i < 0) i = 0; // to handle our -1 condition + val = strchr(attrib, ':'); + val = (val) ? (val + 1) : attrib; + const char *val2 = val; + if ((strchr("GH", *val)) && (isdigit(val[1]))) + val2++; + if ((!strcmp(val2, "3588")) && (lastText.length() < 1)) + show = false; + else { + scratch.setFormatted(" {\\cf3 \\sub <%s>}", val2); + outText(scratch.c_str(), buf, u); + } + } while (++i < count); + } + if ((attrib = tag.getAttribute("morph")) && (show)) { + SWBuf savelemma = tag.getAttribute("savlm"); + if ((strstr(savelemma.c_str(), "3588")) && (lastText.length() < 1)) + show = false; + if (show) { + int count = tag.getAttributePartCount("morph", ' '); + int i = (count > 1) ? 0 : -1; // -1 for whole value cuz it's faster, but does the same thing as 0 + do { + attrib = tag.getAttribute("morph", i, ' '); + if (i < 0) i = 0; // to handle our -1 condition + val = strchr(attrib, ':'); + val = (val) ? (val + 1) : attrib; + const char *val2 = val; + if ((*val == 'T') && (strchr("GH", val[1])) && (isdigit(val[2]))) + val2+=2; + scratch.setFormatted(" {\\cf4 \\sub (%s)}", val2); + outText(scratch.c_str(), buf, u); + } while (++i < count); + } + } + if ((attrib = tag.getAttribute("POS"))) { + val = strchr(attrib, ':'); + val = (val) ? (val + 1) : attrib; + scratch.setFormatted(" {\\fs15 <%s>}", val); + outText(scratch.c_str(), buf, u); + } + + if (endTag) + outText('}', buf, u); + } + } + + // <note> tag + else if (!strcmp(tag.getName(), "note")) { + if (!tag.isEndTag()) { + if (!tag.isEmpty()) { + SWBuf type = tag.getAttribute("type"); + + if ( (type != "x-strongsMarkup") // leave strong's markup notes out, in the future we'll probably have different option filters to turn different note types on or off + && (type != "strongsMarkup") // deprecated + ) { + SWBuf footnoteNumber = tag.getAttribute("swordFootnote"); + VerseKey *vkey = NULL; + // see if we have a VerseKey * or descendant + SWTRY { + vkey = SWDYNAMIC_CAST(VerseKey, u->key); + } + SWCATCH ( ... ) { } + if (vkey) { + char ch = ((!strcmp(type.c_str(), "crossReference")) || (!strcmp(type.c_str(), "x-cross-ref"))) ? 'x':'n'; + scratch.setFormatted("{\\super <a href=\"\">*%c%i.%s</a>} ", ch, vkey->Verse(), footnoteNumber.c_str()); + outText(scratch.c_str(), buf, u); + u->inXRefNote = (ch == 'x'); + } + } + u->suspendTextPassThru = (++u->suspendLevel); + } + } + if (tag.isEndTag()) { + u->suspendTextPassThru = (--u->suspendLevel); + u->inXRefNote = false; + } + } + + // <p> paragraph tag + else if (!strcmp(tag.getName(), "p")) { + if ((!tag.isEndTag()) && (!tag.isEmpty())) { // non-empty start tag + outText("{\\fi200\\par}", buf, u); + } + else if (tag.isEndTag()) { // end tag + outText("{\\par}", buf, u); + userData->supressAdjacentWhitespace = true; + } + else { // empty paragraph break marker + outText("{\\pard\\par\\par}", buf, u); + userData->supressAdjacentWhitespace = true; + } + } + + // <reference> tag + else if (!strcmp(tag.getName(), "reference")) { + if (!u->inXRefNote) { // only show these if we're not in an xref note + if ((!tag.isEndTag()) && (!tag.isEmpty())) { + outText("{<a href=\"\">", buf, u); + } + else if (tag.isEndTag()) { + outText("</a>}", buf, u); + } + } + } + + // <l> poetry + else if (!strcmp(tag.getName(), "l")) { + // end line marker + if (tag.getAttribute("eID")) { + outText("{\\par}", buf, u); + } + // <l/> without eID or sID + // Note: this is improper osis. This should be <lb/> + else if (tag.isEmpty() && !tag.getAttribute("sID")) { + outText("{\\par}", buf, u); + } + // end of the line + else if (tag.isEndTag()) { + outText("{\\par}", buf, u); + } + } + + // <milestone type="line"/> or <lb.../> + else if ((!strcmp(tag.getName(), "lb")) || ((!strcmp(tag.getName(), "milestone")) && (tag.getAttribute("type")) && (!strcmp(tag.getAttribute("type"), "line")))) { + outText("{\\par}", buf, u); + userData->supressAdjacentWhitespace = true; + } + + // <title> + else if (!strcmp(tag.getName(), "title")) { + if ((!tag.isEndTag()) && (!tag.isEmpty())) { + outText("{\\par\\i1\\b1 ", buf, u); + } + else if (tag.isEndTag()) { + outText("\\par}", buf, u); + } + } + + // <catchWord> & <rdg> tags (italicize) + else if (!strcmp(tag.getName(), "rdg") || !strcmp(tag.getName(), "catchWord")) { + if ((!tag.isEndTag()) && (!tag.isEmpty())) { + outText("{\\i1 ", buf, u); + } + else if (tag.isEndTag()) { + outText('}', buf, u); + } + } + + // <hi> + else if (!strcmp(tag.getName(), "hi")) { + SWBuf type = tag.getAttribute("type"); + if ((!tag.isEndTag()) && (!tag.isEmpty())) { + if (type == "b" || type == "x-b") + outText("{\\b1 ", buf, u); + else // all other types + outText("{\\i1 ", buf, u); + } + else if (tag.isEndTag()) { + outText('}', buf, u); + } + } + + // <q> quote + // Rules for a quote element: + // If the tag is empty with an sID or an eID then use whatever it specifies for quoting. + // Note: empty elements without sID or eID are ignored. + // If the tag is <q> then use it's specifications and push it onto a stack for </q> + // If the tag is </q> then use the pushed <q> for specification + // If there is a marker attribute, possibly empty, this overrides osisQToTick. + // If osisQToTick, then output the marker, using level to determine the type of mark. + else if (!strcmp(tag.getName(), "q")) { + SWBuf type = tag.getAttribute("type"); + SWBuf who = tag.getAttribute("who"); + const char *tmp = tag.getAttribute("level"); + int level = (tmp) ? atoi(tmp) : 1; + tmp = tag.getAttribute("marker"); + bool hasMark = tmp; + SWBuf mark = tmp; + + // open <q> or <q sID... /> + if ((!tag.isEmpty() && !tag.isEndTag()) || (tag.isEmpty() && tag.getAttribute("sID"))) { + // if <q> then remember it for the </q> + if (!tag.isEmpty()) { + char *tagData = 0; + stdstr(&tagData, tag.toString()); + u->quoteStack.push(tagData); + } + + // Do this first so quote marks are included as WoC + if (who == "Jesus") + outText("\\cf6 ", buf, u); + + // first check to see if we've been given an explicit mark + if (hasMark) + outText(mark, buf, u); + //alternate " and ' + else if (u->osisQToTick) + outText((level % 2) ? '\"' : '\'', buf, u); + } + // close </q> or <q eID... /> + else if ((tag.isEndTag()) || (tag.getAttribute("eID"))) { + // if it is </q> then pop the stack for the attributes + if (tag.isEndTag() && !u->quoteStack.empty()) { + char *tagData = u->quoteStack.top(); + u->quoteStack.pop(); + XMLTag qTag(tagData); + delete [] tagData; + + type = qTag.getAttribute("type"); + who = qTag.getAttribute("who"); + tmp = qTag.getAttribute("level"); + level = (tmp) ? atoi(tmp) : 1; + tmp = qTag.getAttribute("marker"); + hasMark = tmp; + mark = tmp; + } + + // first check to see if we've been given an explicit mark + if (hasMark) + outText(mark, buf, u); + // finally, alternate " and ', if config says we should supply a mark + else if (u->osisQToTick) + outText((level % 2) ? '\"' : '\'', buf, u); + + // Do this last so quote marks are included as WoC + if (who == "Jesus") + outText("\\cf0 ", buf, u); + } + } + + + // <milestone type="cQuote" marker="x"/> + else if (!strcmp(tag.getName(), "milestone") && tag.getAttribute("type") && !strcmp(tag.getAttribute("type"), "cQuote")) { + const char *tmp = tag.getAttribute("marker"); + bool hasMark = tmp; + SWBuf mark = tmp; + tmp = tag.getAttribute("level"); + int level = (tmp) ? atoi(tmp) : 1; + + // first check to see if we've been given an explicit mark + if (hasMark) + outText(mark, buf, u); + // finally, alternate " and ', if config says we should supply a mark + else if (u->osisQToTick) + outText((level % 2) ? '\"' : '\'', buf, u); + } + + // <transChange> + else if (!strcmp(tag.getName(), "transChange")) { + SWBuf type = tag.getAttribute("type"); + + if ((!tag.isEndTag()) && (!tag.isEmpty())) { + +// just do all transChange tags this way for now +// if (type == "supplied") + outText("{\\i1 ", buf, u); + } + else if (tag.isEndTag()) { + outText('}', buf, u); + } + } + + // <divineName> + else if (!strcmp(tag.getName(), "divineName")) { + + if ((!tag.isEndTag()) && (!tag.isEmpty())) { + u->suspendTextPassThru = (++u->suspendLevel); + } + else if (tag.isEndTag()) { + SWBuf lastText = u->lastSuspendSegment.c_str(); + u->suspendTextPassThru = (--u->suspendLevel); + if (lastText.size()) { + toupperstr(lastText); + scratch.setFormatted("{\\fs19%c\\fs16%s}", lastText[0], lastText.c_str()+1); + outText(scratch.c_str(), buf, u); + } + } + } + + // <div> + else if (!strcmp(tag.getName(), "div")) { + + if ((!tag.isEndTag()) && (!tag.isEmpty())) { + outText("\\par\\par\\pard ", buf, u); + } + else if (tag.isEndTag()) { + } + } + + // image + else if (!strcmp(tag.getName(), "figure")) { + const char *src = tag.getAttribute("src"); + if (!src) // assert we have a src attribute + return false; + + char* filepath = new char[strlen(u->module->getConfigEntry("AbsoluteDataPath")) + strlen(token)]; + *filepath = 0; + strcpy(filepath, userData->module->getConfigEntry("AbsoluteDataPath")); + strcat(filepath, src); + +// we do this because BibleCS looks for this EXACT format for an image tag + outText("<img src=\"", buf, u); + outText(filepath, buf, u); + outText("\" />", buf, u); +/* + char imgc; + for (c = filepath + strlen(filepath); c > filepath && *c != '.'; c--); + c++; + FILE* imgfile; + if (stricmp(c, "jpg") || stricmp(c, "jpeg")) { + imgfile = fopen(filepath, "r"); + if (imgfile != NULL) { + outText("{\\nonshppict {\\pict\\jpegblip ", buf, u); + while (feof(imgfile) != EOF) { + scratch.setFormatted("%2x", fgetc(imgfile)); + outText(scratch.c_str(), buf, u); + + } + fclose(imgfile); + outText("}}", buf, u); + } + } + else if (stricmp(c, "png")) { + outText("{\\*\\shppict {\\pict\\pngblip ", buf, u); + + outText("}}", buf, u); + } +*/ + delete [] filepath; + } + else { + return false; // we still didn't handle token + } + } + return true; +} + + +SWORD_NAMESPACE_END diff --git a/src/modules/filters/osisscripref.cpp b/src/modules/filters/osisscripref.cpp new file mode 100644 index 0000000..437f5f5 --- /dev/null +++ b/src/modules/filters/osisscripref.cpp @@ -0,0 +1,100 @@ +/****************************************************************************** + * + * OSISScripref - SWFilter descendant to hide or show scripture references + * in an OSIS module. + */ + + +#include <stdlib.h> +#include <osisscripref.h> +#include <swmodule.h> +#include <utilxml.h> + +SWORD_NAMESPACE_START + +const char oName[] = "Cross-references"; +const char oTip[] = "Toggles Scripture Cross-references On and Off if they exist"; + +const SWBuf choices[3] = {"Off", "On", ""}; +const StringList oValues(&choices[0], &choices[2]); + +OSISScripref::OSISScripref() : SWOptionFilter(oName, oTip, &oValues) { + setOptionValue("On"); +} + + +OSISScripref::~OSISScripref() { +} + + +char OSISScripref::processText(SWBuf &text, const SWKey *key, const SWModule *module) { + SWBuf token; + bool intoken = false; + bool hide = false; + SWBuf tagText; + XMLTag startTag; + + SWBuf orig = text; + const char *from = orig.c_str(); + + XMLTag tag; + + for (text = ""; *from; ++from) { + if (*from == '<') { + intoken = true; + token = ""; + continue; + } + if (*from == '>') { // process tokens + intoken = false; + + tag = token; + + if (!strncmp(token.c_str(), "note", 4) || !strncmp(token.c_str(), "/note", 5)) { + if (!tag.isEndTag() && !tag.isEmpty()) { + startTag = tag; + if ((tag.getAttribute("type")) && (!strcmp(tag.getAttribute("type"), "crossReference"))) { + hide = true; + tagText = ""; + if (option) { // we want the tag in the text + text.append('<'); + text.append(token); + text.append('>'); + } + continue; + } + } + if (hide && tag.isEndTag()) { + hide = false; + if (option) { // we want the tag in the text + text.append(tagText); // end tag gets added further down + } + else continue; // don't let the end tag get added to the text + } + } + + // if not a heading token, keep token in text + if (!hide) { + text.append('<'); + text.append(token); + text.append('>'); + } + else { + tagText.append('<'); + tagText.append(token); + tagText.append('>'); + } + continue; + } + if (intoken) { //copy token + token.append(*from); + } + else if (!hide) { //copy text which is not inside a token + text.append(*from); + } + else tagText.append(*from); + } + return 0; +} + +SWORD_NAMESPACE_END diff --git a/src/modules/filters/osisstrongs.cpp b/src/modules/filters/osisstrongs.cpp new file mode 100644 index 0000000..922f7fd --- /dev/null +++ b/src/modules/filters/osisstrongs.cpp @@ -0,0 +1,257 @@ +/****************************************************************************** + * + * osisstrongs - SWFilter descendant to hide or show strongs number + * in a OSIS module. + */ + + +#include <stdlib.h> +#include <stdio.h> +#include <ctype.h> +#include <osisstrongs.h> +#include <swmodule.h> +#include <versekey.h> +#include <utilxml.h> + +SWORD_NAMESPACE_START + +const char oName[] = "Strong's Numbers"; +const char oTip[] = "Toggles Strong's Numbers On and Off if they exist"; + +const SWBuf choices[3] = {"Off", "On", ""}; +const StringList oValues(&choices[0], &choices[2]); + + +OSISStrongs::OSISStrongs() : SWOptionFilter(oName, oTip, &oValues) { + setOptionValue("Off"); +} + + +OSISStrongs::~OSISStrongs() { +} + + +char OSISStrongs::processText(SWBuf &text, const SWKey *key, const SWModule *module) { + SWBuf token; + bool intoken = false; + bool lastspace = false; + int wordNum = 1; + char wordstr[5]; + const char *wordStart = 0; + + const SWBuf orig = text; + const char * from = orig.c_str(); + + for (text = ""; *from; ++from) { + if (*from == '<') { + intoken = true; + token = ""; + continue; + } + if (*from == '>') { // process tokens + intoken = false; + if (token.startsWith("w ")) { // Word + XMLTag wtag(token); + if (module->isProcessEntryAttributes()) { + wordStart = from+1; + char gh = 0; + VerseKey *vkey = 0; + if (key) { + vkey = SWDYNAMIC_CAST(VerseKey, key); + } + SWBuf lemma = ""; + SWBuf morph = ""; + SWBuf src = ""; + SWBuf morphClass = ""; + SWBuf lemmaClass = ""; + + const char *attrib; + sprintf(wordstr, "%03d", wordNum); + + // why is morph entry attribute processing done in here? Well, it's faster. It makes more local sense to place this code in osismorph. + // easier to keep lemma and morph in same wordstr number too maybe. + if ((attrib = wtag.getAttribute("morph"))) { + int count = wtag.getAttributePartCount("morph", ' '); + int i = (count > 1) ? 0 : -1; // -1 for whole value cuz it's faster, but does the same thing as 0 + do { + SWBuf mClass = ""; + SWBuf mp = ""; + attrib = wtag.getAttribute("morph", i, ' '); + if (i < 0) i = 0; // to handle our -1 condition + + const char *m = strchr(attrib, ':'); + if (m) { + int len = m-attrib; + mClass.append(attrib, len); + attrib += (len+1); + } + if ((mClass == "x-Robinsons") || (mClass == "x-Robinson") || (mClass == "Robinson")) { + mClass = "robinson"; + } + if (i) { morphClass += " "; morph += " "; } + mp += attrib; + morphClass += mClass; + morph += mp; + if (count > 1) { + SWBuf tmp; + tmp.setFormatted("Morph.%d", i+1); + module->getEntryAttributes()["Word"][wordstr][tmp] = mp; + tmp.setFormatted("MorphClass.%d", i+1); + module->getEntryAttributes()["Word"][wordstr][tmp] = mClass; + } + } while (++i < count); + } + + if ((attrib = wtag.getAttribute("lemma"))) { + int count = wtag.getAttributePartCount("lemma", ' '); + int i = (count > 1) ? 0 : -1; // -1 for whole value cuz it's faster, but does the same thing as 0 + do { + gh = 0; + SWBuf lClass = ""; + SWBuf l = ""; + attrib = wtag.getAttribute("lemma", i, ' '); + if (i < 0) i = 0; // to handle our -1 condition + + const char *m = strchr(attrib, ':'); + if (m) { + int len = m-attrib; + lClass.append(attrib, len); + attrib += (len+1); + } + if ((lClass == "x-Strongs") || (lClass == "strong") || (lClass == "Strong")) { + if (isdigit(attrib[0])) { + if (vkey) { + gh = vkey->Testament() ? 'H' : 'G'; + } + } + else { + gh = *attrib; + attrib++; + } + lClass = "strong"; + } + if (gh) l += gh; + l += attrib; + if (i) { lemmaClass += " "; lemma += " "; } + lemma += l; + lemmaClass += lClass; + if (count > 1) { + SWBuf tmp; + tmp.setFormatted("Lemma.%d", i+1); + module->getEntryAttributes()["Word"][wordstr][tmp] = l; + tmp.setFormatted("LemmaClass.%d", i+1); + module->getEntryAttributes()["Word"][wordstr][tmp] = lClass; + } + } while (++i < count); + module->getEntryAttributes()["Word"][wordstr]["PartCount"].setFormatted("%d", count); + } + + if ((attrib = wtag.getAttribute("src"))) { + int count = wtag.getAttributePartCount("src", ' '); + int i = (count > 1) ? 0 : -1; // -1 for whole value cuz it's faster, but does the same thing as 0 + do { + SWBuf mp = ""; + attrib = wtag.getAttribute("src", i, ' '); + if (i < 0) i = 0; // to handle our -1 condition + + if (i) src += " "; + mp += attrib; + src += mp; + if (count > 1) { + SWBuf tmp; + tmp.setFormatted("Src.%d", i+1); + module->getEntryAttributes()["Word"][wordstr][tmp] = mp; + } + } while (++i < count); + } + + + if (lemma.length()) + module->getEntryAttributes()["Word"][wordstr]["Lemma"] = lemma; + if (lemmaClass.length()) + module->getEntryAttributes()["Word"][wordstr]["LemmaClass"] = lemmaClass; + if (morph.length()) + module->getEntryAttributes()["Word"][wordstr]["Morph"] = morph; + if (morphClass.length()) + module->getEntryAttributes()["Word"][wordstr]["MorphClass"] = morphClass; + if (src.length()) + module->getEntryAttributes()["Word"][wordstr]["Src"] = src; + + if (wtag.isEmpty()) { + int j; + for (j = token.length()-1; ((j>0) && (strchr(" /", token[j]))); j--); + token.size(j+1); + } + + token += " wn=\""; + token += wordstr; + token += "\""; + + if (wtag.isEmpty()) { + token += "/"; + } + + wordNum++; + } + + if (!option) { +/* + * Code which handles multiple lemma types. Kindof works but breaks at least WEBIF filters for strongs. + * + int count = wtag.getAttributePartCount("lemma", ' '); + for (int i = 0; i < count; i++) { + SWBuf a = wtag.getAttribute("lemma", i, ' '); + const char *prefix = a.stripPrefix(':'); + if ((prefix) && (!strcmp(prefix, "x-Strongs") || !strcmp(prefix, "strong") || !strcmp(prefix, "Strong"))) { + // remove attribute part + wtag.setAttribute("lemma", 0, i, ' '); + i--; + count--; + } + } +* Instead the codee below just removes the lemma attribute +*****/ + const char *l = wtag.getAttribute("lemma"); + if (l) { + SWBuf savlm = l; + wtag.setAttribute("lemma", 0); + wtag.setAttribute("savlm", savlm); + token = wtag; + token.trim(); + // drop <> + token << 1; + token--; + } + } + } + if (token.startsWith("/w")) { // Word End + if (module->isProcessEntryAttributes()) { + if (wordStart) { + SWBuf tmp; + tmp.append(wordStart, (from-wordStart)-3); + sprintf(wordstr, "%03d", wordNum-1); + module->getEntryAttributes()["Word"][wordstr]["Text"] = tmp; + } + } + wordStart = 0; + } + + // keep token in text + text.append('<'); + text.append(token); + text.append('>'); + + continue; + } + if (intoken) { + token += *from; + } + else { + text.append(*from); + lastspace = (*from == ' '); + } + } + return 0; +} + +SWORD_NAMESPACE_END diff --git a/src/modules/filters/osisvariants.cpp b/src/modules/filters/osisvariants.cpp new file mode 100644 index 0000000..91d700c --- /dev/null +++ b/src/modules/filters/osisvariants.cpp @@ -0,0 +1,118 @@ +/****************************************************************************** + * + * osisvariants - SWFilter descendant to hide or show textual variants + * in an OSIS module. + */ + + +#include <stdlib.h> +#include <osisvariants.h> +#include <utilstr.h> + +SWORD_NAMESPACE_START + +const char OSISVariants::primary[] = "Primary Reading"; +const char OSISVariants::secondary[] = "Secondary Reading"; +const char OSISVariants::all[] = "All Readings"; + +const char OSISVariants::optName[] = "Textual Variants"; +const char OSISVariants::optTip[] = "Switch between Textual Variants modes"; + + +OSISVariants::OSISVariants() { + option = false; + options.push_back(primary); + options.push_back(secondary); + options.push_back(all); +} + + +OSISVariants::~OSISVariants() { +} + +void OSISVariants::setOptionValue(const char *ival) +{ + if (!stricmp(ival, primary)) option = 0; + else if (!stricmp(ival, secondary)) option = 1; + else option = 2; +} + +const char *OSISVariants::getOptionValue() +{ + if (option == 0) { + return primary; + } + else if (option == 1) { + return secondary; + } + else { + return all; + } +} + +char OSISVariants::processText(SWBuf &text, const SWKey *key, const SWModule *module) +{ + if (option == 0 || option == 1) { //we want primary or variant only + bool intoken = false; + bool hide = false; + bool invar = false; + + SWBuf token; + SWBuf orig = text; + const char *from = orig.c_str(); + + //we use a fixed comparision string to make sure the loop is as fast as the original two blocks with almost the same code + //const char* variantCompareString = (option == 0) ? "div type=\"variant\" class=\"1\"" : "div type=\"variant\" class=\"2\""; + + for (text = ""; *from; from++) { + if (*from == '<') { + intoken = true; + token = ""; + continue; + } + else if (*from == '>') { // process tokens + intoken = false; + + if (!strncmp(token.c_str(), "seg ", 4)) { //only one of the variants + invar = true; + hide = true; + continue; + } + if (!strncmp(token.c_str(), "div type=\"variant\"", 18)) { + invar = true; + continue; + } + if (!strncmp(token.c_str(), "/div", 4)) { + hide = false; + if (invar) { + invar = false; + continue; + } + } + if (!hide) { + text += '<'; + text.append(token); + text += '>'; + } + + continue; + } + if (intoken) { + token += *from; + } + else if (!hide) { + text += *from; + } + } + + } + + return 0; +} + + + + + + +SWORD_NAMESPACE_END diff --git a/src/modules/filters/osiswebif.cpp b/src/modules/filters/osiswebif.cpp new file mode 100644 index 0000000..ecc58f7 --- /dev/null +++ b/src/modules/filters/osiswebif.cpp @@ -0,0 +1,198 @@ +/*************************************************************************** + OSISWEBIF.cpp - OSIS to HTML filter with hrefs + for strongs and morph tags + ------------------- + begin : 2003-10-23 + copyright : 2003 by CrossWire Bible Society + ***************************************************************************/ + +/*************************************************************************** + * * + * This program is free software; you can redistribute it and/or modify * + * it under the terms of the GNU General Public License as published by * + * the Free Software Foundation; either version 2 of the License, or * + * (at your option) any later version. * + * * + ***************************************************************************/ + +#include <stdlib.h> +#include <osiswebif.h> +#include <utilxml.h> +#include <url.h> +#include <versekey.h> +#include <swmodule.h> +#include <ctype.h> + + +SWORD_NAMESPACE_START + + +OSISWEBIF::OSISWEBIF() : baseURL(""), passageStudyURL(baseURL + "passagestudy.jsp"), javascript(false) { +} + + +BasicFilterUserData *OSISWEBIF::createUserData(const SWModule *module, const SWKey *key) { + MyUserData *u = new MyUserData(module, key); + u->wordsOfChristStart = "<span class=\"wordsOfJesus\"> "; + u->wordsOfChristEnd = "</span> "; + return u; +} + + +bool OSISWEBIF::handleToken(SWBuf &buf, const char *token, BasicFilterUserData *userData) { + MyUserData *u = (MyUserData *)userData; + SWBuf scratch; + bool sub = (u->suspendTextPassThru) ? substituteToken(scratch, token) : substituteToken(buf, token); + if (!sub) { + // manually process if it wasn't a simple substitution + XMLTag tag(token); + + // <w> tag + if (!strcmp(tag.getName(), "w")) { + + // start <w> tag + if ((!tag.isEmpty()) && (!tag.isEndTag())) { + u->w = token; + } + + // end or empty <w> tag + else { + bool endTag = tag.isEndTag(); + SWBuf lastText; + bool show = true; // to handle unplaced article in kjv2003-- temporary till combined + + if (endTag) { + tag = u->w.c_str(); + lastText = u->lastTextNode.c_str(); + } + else lastText = "stuff"; + + const char *attrib; + const char *val; + if ((attrib = tag.getAttribute("xlit"))) { + val = strchr(attrib, ':'); + val = (val) ? (val + 1) : attrib; +// buf.appendFormatted(" %s", val); + } + if ((attrib = tag.getAttribute("gloss"))) { + val = strchr(attrib, ':'); + val = (val) ? (val + 1) : attrib; + buf.appendFormatted(" %s", val); + } + if ((attrib = tag.getAttribute("lemma"))) { + int count = tag.getAttributePartCount("lemma", ' '); + int i = (count > 1) ? 0 : -1; // -1 for whole value cuz it's faster, but does the same thing as 0 + do { + attrib = tag.getAttribute("lemma", i, ' '); + if (i < 0) i = 0; // to handle our -1 condition + val = strchr(attrib, ':'); + val = (val) ? (val + 1) : attrib; + const char *val2 = val; + if ((strchr("GH", *val)) && (isdigit(val[1]))) + val2++; + if ((!strcmp(val2, "3588")) && (lastText.length() < 1)) + show = false; + else buf.appendFormatted(" <small><em><<a href=\"%s?showStrong=%s#cv\">%s</a>></em></small> ", passageStudyURL.c_str(), URL::encode(val2).c_str(), val2); + } while (++i < count); + } + if ((attrib = tag.getAttribute("morph")) && (show)) { + SWBuf savelemma = tag.getAttribute("savlm"); + if ((strstr(savelemma.c_str(), "3588")) && (lastText.length() < 1)) + show = false; + if (show) { + int count = tag.getAttributePartCount("morph", ' '); + int i = (count > 1) ? 0 : -1; // -1 for whole value cuz it's faster, but does the same thing as 0 + do { + attrib = tag.getAttribute("morph", i, ' '); + if (i < 0) i = 0; // to handle our -1 condition + val = strchr(attrib, ':'); + val = (val) ? (val + 1) : attrib; + const char *val2 = val; + if ((*val == 'T') && (strchr("GH", val[1])) && (isdigit(val[2]))) + val2+=2; + buf.appendFormatted(" <small><em>(<a href=\"%s?showMorph=%s#cv\">%s</a>)</em></small> ", passageStudyURL.c_str(), URL::encode(val2).c_str(), val2); + } while (++i < count); + } + } + if ((attrib = tag.getAttribute("POS"))) { + val = strchr(attrib, ':'); + val = (val) ? (val + 1) : attrib; + buf.appendFormatted(" %s", val); + } + + /*if (endTag) + buf += "}";*/ + } + } + + // <note> tag + else if (!strcmp(tag.getName(), "note")) { + if (!tag.isEndTag()) { + SWBuf type = tag.getAttribute("type"); + bool strongsMarkup = (type == "x-strongsMarkup" || type == "strongsMarkup"); // the latter is deprecated + if (strongsMarkup) { + tag.setEmpty(false); // handle bug in KJV2003 module where some note open tags were <note ... /> + } + + if (!tag.isEmpty()) { + if (!strongsMarkup) { // leave strong's markup notes out, in the future we'll probably have different option filters to turn different note types on or off + SWBuf footnoteNumber = tag.getAttribute("swordFootnote"); + SWBuf modName = (u->module) ? u->module->Name() : ""; + VerseKey *vkey = NULL; + // see if we have a VerseKey * or descendant + SWTRY { + vkey = SWDYNAMIC_CAST(VerseKey, u->key); + } + SWCATCH ( ... ) { } + if (vkey) { + char ch = ((tag.getAttribute("type") && ((!strcmp(tag.getAttribute("type"), "crossReference")) || (!strcmp(tag.getAttribute("type"), "x-cross-ref")))) ? 'x':'n'); +// buf.appendFormatted("<a href=\"noteID=%s.%c.%s\"><small><sup>*%c</sup></small></a> ", vkey->getText(), ch, footnoteNumber.c_str(), ch); + buf.appendFormatted("<span class=\"fn\" onclick=\"f(\'%s\',\'%s\',\'%s\');\" >%c</span>", modName.c_str(), u->key->getText(), footnoteNumber.c_str(), ch); + } + } + u->suspendTextPassThru = (++u->suspendLevel); + } + } + if (tag.isEndTag()) { + u->suspendTextPassThru = (--u->suspendLevel); + + } + } + + // <title> + else if (!strcmp(tag.getName(), "title")) { + if ((!tag.isEndTag()) && (!tag.isEmpty())) { + buf += "<h3>"; + } + else if (tag.isEndTag()) { + buf += "</h3>"; + } + } + + // ok to leave these in + else if (!strcmp(tag.getName(), "div")) { + buf += tag; + } + else if (!strcmp(tag.getName(), "span")) { + buf += tag; + } + else if (!strcmp(tag.getName(), "br")) { + buf += tag; + } + + // handled appropriately in base class + // <catchWord> & <rdg> tags (italicize) + // <hi> text highlighting + // <q> quote + // <milestone type="cQuote" marker="x"/> + // <transChange> + else { + return OSISHTMLHREF::handleToken(buf, token, userData); + } + } + return true; +} + + +SWORD_NAMESPACE_END + diff --git a/src/modules/filters/osiswordjs.cpp b/src/modules/filters/osiswordjs.cpp new file mode 100644 index 0000000..dc805b4 --- /dev/null +++ b/src/modules/filters/osiswordjs.cpp @@ -0,0 +1,178 @@ +/****************************************************************************** + * + * osisstrongs - SWFilter descendant to hide or show strongs number + * in a OSIS module. + */ + + +#include <stdlib.h> +#include <stdio.h> +#include <osiswordjs.h> +#include <swmodule.h> +#include <ctype.h> +#include <utilxml.h> +#include <utilstr.h> +#include <versekey.h> +#include <stdio.h> + +SWORD_NAMESPACE_START + +const char oName[] = "Word Javascript"; +const char oTip[] = "Toggles Word Javascript data"; + +const SWBuf choices[3] = {"Off", "On", ""}; +const StringList oValues(&choices[0], &choices[2]); + + +OSISWordJS::OSISWordJS() : SWOptionFilter(oName, oTip, &oValues) { + setOptionValue("Off"); + + defaultGreekLex = 0; + defaultHebLex = 0; + defaultGreekParse = 0; + defaultHebParse = 0; + mgr = 0; +} + + +OSISWordJS::~OSISWordJS() { +} + + +char OSISWordJS::processText(SWBuf &text, const SWKey *key, const SWModule *module) { + if (option) { + char token[2112]; // cheese. Fix. + int tokpos = 0; + bool intoken = false; + bool lastspace = false; + int wordNum = 1; + char wordstr[5]; + SWBuf modName = (module)?module->Name():""; + // add TR to w src in KJV then remove this next line + SWBuf wordSrcPrefix = (modName == "KJV")?SWBuf("TR"):modName; + + VerseKey *vkey = 0; + if (key) { + vkey = SWDYNAMIC_CAST(VerseKey, key); + } + + const SWBuf orig = text; + const char * from = orig.c_str(); + + for (text = ""; *from; ++from) { + if (*from == '<') { + intoken = true; + tokpos = 0; + token[0] = 0; + token[1] = 0; + token[2] = 0; + continue; + } + if (*from == '>') { // process tokens + intoken = false; + if ((*token == 'w') && (token[1] == ' ')) { // Word + XMLTag wtag(token); + sprintf(wordstr, "%03d", wordNum); + SWBuf lemmaClass; + SWBuf lemma; + SWBuf morph; + SWBuf src; + char gh = 0; + int count = atoi(module->getEntryAttributes()["Word"][wordstr]["PartCount"].c_str()); + for (int i = 0; i < count; i++) { + + // for now, lemma class can just be equal to last lemma class in multi part word + SWBuf tmp = "LemmaClass"; + if (count > 1) tmp.appendFormatted(".%d", i+1); + lemmaClass = module->getEntryAttributes()["Word"][wordstr][tmp]; + + tmp = "Lemma"; + if (count > 1) tmp.appendFormatted(".%d", i+1); + tmp = (module->getEntryAttributes()["Word"][wordstr][tmp].c_str()); + + // if we're strongs, + if (lemmaClass == "strong") { + gh = tmp[0]; + tmp << 1; + } + if (lemma.size()) lemma += "|"; + lemma += tmp; + + tmp = "Morph"; + if (count > 1) tmp.appendFormatted(".%d", i+1); + tmp = (module->getEntryAttributes()["Word"][wordstr][tmp].c_str()); + if (morph.size()) morph += "|"; + morph += tmp; + + tmp = "Src"; + if (count > 1) tmp.appendFormatted(".%d", i+1); + tmp = (module->getEntryAttributes()["Word"][wordstr][tmp].c_str()); + if (!tmp.length()) tmp.appendFormatted("%d", wordNum); + tmp.insert(0, wordSrcPrefix); + if (src.size()) src += "|"; + src += tmp; + } + + SWBuf lexName = ""; + // we can pass the real lex name in, but we have some + // aliases in the javascript to optimize bandwidth + if ((gh == 'G') && (defaultGreekLex)) { + lexName = (!strcmp(defaultGreekLex->Name(), "StrongsGreek"))?"G":defaultGreekLex->Name(); + } + else if ((gh == 'H') && (defaultHebLex)) { + lexName = (!strcmp(defaultHebLex->Name(), "StrongsHebrew"))?"H":defaultHebLex->Name(); + } + + SWBuf xlit = wtag.getAttribute("xlit"); + + if ((lemmaClass != "strong") && (xlit.startsWith("betacode:"))) { + lexName = "betacode"; +// const char *m = strchr(xlit.c_str(), ':'); +// strong = ++m; + } + SWBuf wordID; + if (vkey) { + // optimize for bandwidth and use only the verse as the unique entry id + wordID.appendFormatted("%d", vkey->Verse()); + } + else { + wordID = key->getText(); + } + wordID.appendFormatted("_%s", src.c_str()); + // clean up our word ID for XHTML + for (unsigned int i = 0; i < wordID.size(); i++) { + if ((!isdigit(wordID[i])) && (!isalpha(wordID[i]))) { + wordID[i] = '_'; + } + } + // 'p' = 'fillpop' to save bandwidth + text.appendFormatted("<span class=\"clk\" onclick=\"p('%s','%s','%s','%s','','%s');\" >", lexName.c_str(), lemma.c_str(), wordID.c_str(), morph.c_str(), modName.c_str()); + wordNum++; + } + if ((*token == '/') && (token[1] == 'w') && option) { // Word + text += "</w></span>"; + continue; + } + + // if not a strongs token, keep token in text + text.append('<'); + text.append(token); + text.append('>'); + + continue; + } + if (intoken) { + if (tokpos < 2045) + token[tokpos++] = *from; + token[tokpos+2] = 0; + } + else { + text.append(*from); + lastspace = (*from == ' '); + } + } + } + return 0; +} + +SWORD_NAMESPACE_END diff --git a/src/modules/filters/papyriplain.cpp b/src/modules/filters/papyriplain.cpp new file mode 100644 index 0000000..423bfda --- /dev/null +++ b/src/modules/filters/papyriplain.cpp @@ -0,0 +1,71 @@ +/****************************************************************************** + * + * papyriplain - SWFilter descendant to strip out all Papyri tags + */ + + +#include <stdlib.h> +#include <papyriplain.h> + +SWORD_NAMESPACE_START + +PapyriPlain::PapyriPlain() { +} + + +char PapyriPlain::processText (SWBuf &text, const SWKey *key, const SWModule *module) +{ + SWBuf orig = text; + const char *from = orig.c_str(); + + for (text = ""; *from; ++from) { + + // remove hyphen and whitespace if that is all that separates words + // also be sure we're not a double hyphen '--' + if ((*from == '-') && (text.length() > 0) && (text[text.length()-1] != '-')) { + char remove = 0; + const char *c; + for (c = from+1; *c; c++) { + if ((*c == 10) || (*c == 13)) { + remove = 1; + } + if (!strchr(" \t\n", *c)) { + if (remove) remove++; + break; + } + } + if (remove > 1) { + from = c-1; + continue; + } + } + + // remove all newlines + if ((*from == 10) || (*from == 13)) { + if ((text.length()>1) && (text[text.length()-2] != ' ') && (*(from+1) != ' ')) + text.append(' '); + continue; + } + + + // strip odd characters + switch (*from) { + case '(': + case ')': + case '[': + case ']': + case '{': + case '}': + case '<': + case '>': + continue; + } + + // if we've made it this far + text.append(*from); + + } + return 0; +} + +SWORD_NAMESPACE_END diff --git a/src/modules/filters/plainfootnotes.cpp b/src/modules/filters/plainfootnotes.cpp new file mode 100644 index 0000000..0baf313 --- /dev/null +++ b/src/modules/filters/plainfootnotes.cpp @@ -0,0 +1,79 @@ +/*************************************************************************** + plainfootnotes.cpp - description + ------------------- + begin : Wed Oct 13 1999 + copyright : (C) 1999 by The team of BibleTime + email : info@bibletime.de + ***************************************************************************/ + +/*************************************************************************** + * * + * This program is free software; you can redistribute it and/or modify * + * it under the terms of the GNU General Public License as published by * + * the Free Software Foundation; either version 2 of the License, or * + * (at your option) any later version. * + * * + ***************************************************************************/ + +#include <plainfootnotes.h> +#include <swkey.h> + +#include <stdlib.h> + +SWORD_NAMESPACE_START + +const char oName[] = "Footnotes"; +const char oTip[] = "Toggles Footnotes On and Off In Bible Texts If They Exist"; + +const SWBuf choices[3] = {"Off", "On", ""}; +const StringList oValues(&choices[0], &choices[2]); + +PLAINFootnotes::PLAINFootnotes() : SWOptionFilter(oName, oTip, &oValues) { + setOptionValue("Off"); +} + +PLAINFootnotes::~PLAINFootnotes(){ +} + + +char PLAINFootnotes::processText(SWBuf &text, const SWKey *key, const SWModule *module) { + if (!option) { // if we don't want footnotes + //char token[2048]; + //SWBuf token; + //int tokpos = 0; + //bool intoken = false; + //bool lastspace = false; + + bool hide = false; + + SWBuf orig = text; + const char *from = orig.c_str(); + for (text = ""; *from; from++) { + if (*from == '{') // Footnote start + { + hide = true; + continue; + } + else if (*from == '}') // Footnote end + { + hide = false; + continue; + } + + //if (intoken) { + //if (tokpos < 2045) + // token += *from; + // token[tokpos+2] = 0; + //} + //else { + if (!hide) { + text = *from; + //lastspace = (*from == ' '); + } + //} + } + } + return 0; +} + +SWORD_NAMESPACE_END diff --git a/src/modules/filters/plainhtml.cpp b/src/modules/filters/plainhtml.cpp new file mode 100644 index 0000000..f5f2a5c --- /dev/null +++ b/src/modules/filters/plainhtml.cpp @@ -0,0 +1,83 @@ +/*************************************************************************** + plainhtml.cpp - description + ------------------- + begin : Thu Jun 24 1999 + copyright : (C) 1999 by Torsten Uhlmann + email : TUhlmann@gmx.de + ***************************************************************************/ + +/*************************************************************************** + * * + * This program is free software; you can redistribute it and/or modify * + * it under the terms of the GNU General Public License as published by * + * the Free Software Foundation; either version 2 of the License, or * + * (at your option) any later version. * + * * + ***************************************************************************/ + +#include <plainhtml.h> +#include <swbuf.h> + +SWORD_NAMESPACE_START + +PLAINHTML::PLAINHTML() +{ +} + + +char PLAINHTML::processText(SWBuf &text, const SWKey *key, const SWModule *module) +{ + int count = 0; + + SWBuf orig = text; + const char *from = orig.c_str(); + for (text = ""; *from; from++) + { + if ((*from == '\n') && (from[1] == '\n')) // two newlinea are a paragraph + { + text += "<P>"; + from++; + continue; + } else { + if ((*from == '\n')) // && (from[1] != '\n')) // only one new line + { + text += "<BR>"; + continue; + } + } + + if (*from == '{') { //footnote start + text += "<FONT COLOR=\"#80000\"><SMALL> ("; + continue; + } + else if (*from == '}') //footnote end + { + text += ") </SMALL></FONT>"; + continue; + } + else if (*from == '<') { + text += "<"; + continue; + } + else if (*from == '>') { + text += ">"; + continue; + } + else if (*from == '&') { + text += "&"; + continue; + } + else if ((*from == ' ') && (count > 5000)) + { + text += "<WBR>"; + count = 0; + continue; + } + + text += *from; + count++; + } + return 0; +} + +SWORD_NAMESPACE_END diff --git a/src/modules/filters/rtfhtml.cpp b/src/modules/filters/rtfhtml.cpp new file mode 100644 index 0000000..cac5068 --- /dev/null +++ b/src/modules/filters/rtfhtml.cpp @@ -0,0 +1,81 @@ +/*************************************************************************** + rtfhtml.cpp - description + ------------------- + begin : Wed Oct 13 1999 + copyright : (C) 1999 by The team of BibleTime + email : info@bibletime.de + ***************************************************************************/ + +/*************************************************************************** + * * + * This program is free software; you can redistribute it and/or modify * + * it under the terms of the GNU General Public License as published by * + * the Free Software Foundation; either version 2 of the License, or * + * (at your option) any later version. * + * * + ***************************************************************************/ + +#include <stdlib.h> +#include <rtfhtml.h> +#include <swbuf.h> + +SWORD_NAMESPACE_START + +RTFHTML::RTFHTML() { + +} + + +char RTFHTML::processText(SWBuf &text, const SWKey *key, const SWModule *module) +{ + bool center = false; + + const char *from; + SWBuf orig = text; + from = orig.c_str(); + for (text = ""; *from; from++) + { + if (*from == '\\') // a RTF command + { + if ( !strncmp(from+1, "pard", 4) ) + //(from[1] == 'p') && (from[2] == 'a') && (from[3] == 'r') && (from[4] == 'd')) + { // switch all modifiers off + if (center) + { + text += "</CENTER>"; + center = false; + } + from += 4; + continue; + } + if ( !strncmp(from+1, "par", 3) ) + //(from[1] == 'p') && (from[2] == 'a') && (from[3] == 'r')) + { + text += "<P>\n"; + from += 3; + continue; + } + if (from[1] == ' ') + { + from += 1; + continue; + } + if ( !strncmp(from+1, "qc", 2) ) + //(from[1] == 'q') && (from[2] == 'c')) // center on + { + if (!center) + { + text += "<CENTER>"; + center = true; + } + from += 2; + continue; + } + } + + text += *from; + } + return 0; +} + +SWORD_NAMESPACE_END diff --git a/src/modules/filters/scsuutf8.cpp b/src/modules/filters/scsuutf8.cpp new file mode 100644 index 0000000..0daff4a --- /dev/null +++ b/src/modules/filters/scsuutf8.cpp @@ -0,0 +1,226 @@ +/****************************************************************************** + * + * SCSUUTF8 - SWFilter descendant to convert a SCSU character to UTF-8 + * + */ + + +/* This class is based on: + * http://czyborra.com/scsu/scsu.c written by Roman Czyborra@dds.nl + * on Andrea's balcony in North Amsterdam on 1998-08-04 + * Thanks to Richard Verhoeven <rcb5@win.tue.nl> for his suggestion + * to correct the haphazard "if" after UQU to "else if" on 1998-10-01 + * + * This is a deflator to UTF-8 output for input compressed in SCSU, + * the (Reuters) Standard Compression Scheme for Unicode as described + * in http://www.unicode.org/unicode/reports/tr6.html + */ + +#include <stdlib.h> +#include <stdio.h> +#include <swmodule.h> + +#include <scsuutf8.h> + +SWORD_NAMESPACE_START + +SCSUUTF8::SCSUUTF8() { +} + + +unsigned char* SCSUUTF8::UTF8Output(unsigned long uchar, unsigned char* text) +{ + /* join UTF-16 surrogates without any pairing sanity checks */ + + static int d; + + if (uchar >= 0xd800 && uchar <= 0xdbff) { d = uchar & 0x3f; return text; } + if (uchar >= 0xdc00 && uchar <= 0xdfff) { uchar = uchar + 0x2400 + d * 0x400; } + + /* output one character as UTF-8 multibyte sequence */ + + if (uchar < 0x80) { + *text++ = c; + } + else if (uchar < 0x800) { + *text++ = 0xc0 | uchar >> 6; + *text++ = 0x80 | (uchar & 0x3f); + } + else if (uchar < 0x10000) { + *text++ = 0xe0 | uchar >> 12; + *text++ = 0x80 | (uchar >> 6 & 0x3f); + *text++ = 0x80 | (uchar & 0x3f); + } + else if (uchar < 0x200000) { + *text++ = 0xf0 | uchar >> 18; + *text++ = 0x80 | (uchar >> 12 & 0x3f); + *text++ = 0x80 | (uchar >> 6 & 0x3f); + *text++ = 0x80 | (uchar & 0x3f); + } + + return text; +} + +char SCSUUTF8::processText(SWBuf &text, const SWKey *key, const SWModule *module) { +/* + unsigned char *to, *from; + unsigned long buflen = len * FILTERPAD; + char active = 0, mode = 0; + if ((unsigned long)key < 2) // hack, we're en(1)/de(0)ciphering + return -1; + + static unsigned short start[8] = {0x0000,0x0080,0x0100,0x0300,0x2000,0x2080,0x2100,0x3000}; + static unsigned short slide[8] = {0x0080,0x00C0,0x0400,0x0600,0x0900,0x3040,0x30A0,0xFF00}; + static unsigned short win[256] = { + 0x0000, 0x0080, 0x0100, 0x0180, 0x0200, 0x0280, 0x0300, 0x0380, + 0x0400, 0x0480, 0x0500, 0x0580, 0x0600, 0x0680, 0x0700, 0x0780, + 0x0800, 0x0880, 0x0900, 0x0980, 0x0A00, 0x0A80, 0x0B00, 0x0B80, + 0x0C00, 0x0C80, 0x0D00, 0x0D80, 0x0E00, 0x0E80, 0x0F00, 0x0F80, + 0x1000, 0x1080, 0x1100, 0x1180, 0x1200, 0x1280, 0x1300, 0x1380, + 0x1400, 0x1480, 0x1500, 0x1580, 0x1600, 0x1680, 0x1700, 0x1780, + 0x1800, 0x1880, 0x1900, 0x1980, 0x1A00, 0x1A80, 0x1B00, 0x1B80, + 0x1C00, 0x1C80, 0x1D00, 0x1D80, 0x1E00, 0x1E80, 0x1F00, 0x1F80, + 0x2000, 0x2080, 0x2100, 0x2180, 0x2200, 0x2280, 0x2300, 0x2380, + 0x2400, 0x2480, 0x2500, 0x2580, 0x2600, 0x2680, 0x2700, 0x2780, + 0x2800, 0x2880, 0x2900, 0x2980, 0x2A00, 0x2A80, 0x2B00, 0x2B80, + 0x2C00, 0x2C80, 0x2D00, 0x2D80, 0x2E00, 0x2E80, 0x2F00, 0x2F80, + 0x3000, 0x3080, 0x3100, 0x3180, 0x3200, 0x3280, 0x3300, 0x3800, + 0xE000, 0xE080, 0xE100, 0xE180, 0xE200, 0xE280, 0xE300, 0xE380, + 0xE400, 0xE480, 0xE500, 0xE580, 0xE600, 0xE680, 0xE700, 0xE780, + 0xE800, 0xE880, 0xE900, 0xE980, 0xEA00, 0xEA80, 0xEB00, 0xEB80, + 0xEC00, 0xEC80, 0xED00, 0xED80, 0xEE00, 0xEE80, 0xEF00, 0xEF80, + 0xF000, 0xF080, 0xF100, 0xF180, 0xF200, 0xF280, 0xF300, 0xF380, + 0xF400, 0xF480, 0xF500, 0xF580, 0xF600, 0xF680, 0xF700, 0xF780, + 0xF800, 0xF880, 0xF900, 0xF980, 0xFA00, 0xFA80, 0xFB00, 0xFB80, + 0xFC00, 0xFC80, 0xFD00, 0xFD80, 0xFE00, 0xFE80, 0xFF00, 0xFF80, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x00C0, 0x0250, 0x0370, 0x0530, 0x3040, 0x30A0, 0xFF60 + }; + + if (!len) + return 0; + + memmove(&text[buflen - len], text, len); + from = (unsigned char*)&text[buflen - len]; + to = (unsigned char *)text; + + // ------------------------------- + + for (int i = 0; i < len;) { + + + if (i >= len) break; + c = from[i++]; + + if (c >= 0x80) + { + to = UTF8Output (c - 0x80 + slide[active], to); + } + else if (c >= 0x20 && c <= 0x7F) + { + to = UTF8Output (c, to); + } + else if (c == 0x0 || c == 0x9 || c == 0xA || c == 0xC || c == 0xD) + { + to = UTF8Output (c, to); + } + else if (c >= 0x1 && c <= 0x8) // SQn + { + if (i >= len) break; + d = from[i++]; // single quote + + to = UTF8Output (d < 0x80 ? d + start [c - 0x1] : + d - 0x80 + slide [c - 0x1], to); + } + else if (c >= 0x10 && c <= 0x17) // SCn + { + active = c - 0x10; // change window + } + else if (c >= 0x18 && c <= 0x1F) // SDn + { + active = c - 0x18; // define window + if (i >= len) break; + slide [active] = win [from[i++]]; + } + else if (c == 0xB) // SDX + { + if (i >= len) break; + c = from[i++]; + + if (i >= len) break; + d = from[i++]; + + slide [active = c>>5] = 0x10000 + (((c & 0x1F) << 8 | d) << 7); + } + else if (c == 0xE) // SQU + { + if (i >= len) break; + c = from[i++]; // SQU + + if (i >= len) break; + to = UTF8Output (c << 8 | from[i++], to); + } + else if (c == 0xF) // SCU + { + mode = 1; // change to Unicode mode + + while (mode) + { + if (i >= len) break; + c = from[i++]; + + if (c <= 0xDF || c >= 0xF3) + { + if (i >= len) break; + to = UTF8Output (c << 8 | from[i++], to); + } + else if (c == 0xF0) // UQU + { + if (i >= len) break; + c = from[i++]; + + if (i >= len) break; + to = UTF8Output (c << 8 | from[i++], to); + } + else if (c >= 0xE0 && c <= 0xE7) // UCn + { + active = c - 0xE0; mode = 0; + } + else if (c >= 0xE8 && c <= 0xEF) // UDn + { + if (i >= len) break; + slide [active=c-0xE8] = win [from[i++]]; mode = 0; + } + else if (c == 0xF1) // UDX + { + if (i >= len) break; + c = from[i++]; + + if (i >= len) break; + d = from[i++]; + + slide [active = c>>5] = + 0x10000 + (((c & 0x1F) << 8 | d) << 7); mode = 0; + } + } + } + + + } + + *to++ = 0; + *to = 0; +*/ + return 0; +} + +SWORD_NAMESPACE_END diff --git a/src/modules/filters/swbasicfilter.cpp b/src/modules/filters/swbasicfilter.cpp new file mode 100644 index 0000000..ef10e45 --- /dev/null +++ b/src/modules/filters/swbasicfilter.cpp @@ -0,0 +1,406 @@ +/****************************************************************************** + * swbasicfilter.h - definition of class SWBasicFilter. An SWFilter + * impl that provides some basic methods that + * many filters will need and can use as a starting + * point. + * + * $Id: swbasicfilter.cpp 2167 2008-05-16 23:23:39Z scribe $ + * + * Copyright 2001 CrossWire Bible Society (http://www.crosswire.org) + * CrossWire Bible Society + * P. O. Box 2528 + * Tempe, AZ 85280-2528 + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation version 2. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + */ + +#include <stdlib.h> +#include <swbasicfilter.h> +#include <stdio.h> +#include <stdarg.h> +#include <utilstr.h> +#include <stringmgr.h> +#include <map> +#include <set> + +SWORD_NAMESPACE_START + +typedef std::map<SWBuf, SWBuf> DualStringMap; +typedef std::set<SWBuf> StringSet; + +// I hate bridge patterns but this isolates std::map from a ton of filters +class SWBasicFilter::Private { +public: + DualStringMap tokenSubMap; + DualStringMap escSubMap; + StringSet escPassSet; +}; + +const char SWBasicFilter::INITIALIZE = 1; +const char SWBasicFilter::PRECHAR = 2; +const char SWBasicFilter::POSTCHAR = 4; +const char SWBasicFilter::FINALIZE = 8; + +SWBasicFilter::SWBasicFilter() { + + p = new Private; + + processStages = 0; + tokenStart = 0; + tokenEnd = 0; + escStart = 0; + escEnd = 0; + + setTokenStart("<"); + setTokenEnd(">"); + setEscapeStart("&"); + setEscapeEnd(";"); + + escStringCaseSensitive = false; + tokenCaseSensitive = false; + passThruUnknownToken = false; + passThruUnknownEsc = false; + passThruNumericEsc = false; +} + + +SWBasicFilter::~SWBasicFilter() { + if (tokenStart) + delete [] tokenStart; + + if (tokenEnd) + delete [] tokenEnd; + + if (escStart) + delete [] escStart; + + if (escEnd) + delete [] escEnd; + + delete p; +} + + +void SWBasicFilter::setPassThruUnknownToken(bool val) { + passThruUnknownToken = val; +} + + +void SWBasicFilter::setPassThruUnknownEscapeString(bool val) { + passThruUnknownEsc = val; +} + +void SWBasicFilter::setPassThruNumericEscapeString(bool val) { + passThruUnknownEsc = val; +} + + +void SWBasicFilter::setTokenCaseSensitive(bool val) { + tokenCaseSensitive = val; +} + + +void SWBasicFilter::setEscapeStringCaseSensitive(bool val) { + escStringCaseSensitive = val; +} + + +void SWBasicFilter::addTokenSubstitute(const char *findString, const char *replaceString) { + char *buf = 0; + + if (!tokenCaseSensitive) { + stdstr(&buf, findString); + toupperstr(buf); + p->tokenSubMap[buf] = replaceString; + delete [] buf; + } + else p->tokenSubMap[findString] = replaceString; +} + + +void SWBasicFilter::removeTokenSubstitute(const char *findString) { + if (p->tokenSubMap.find(findString) != p->tokenSubMap.end()) { + p->tokenSubMap.erase( p->tokenSubMap.find(findString) ); + } +} + +void SWBasicFilter::addAllowedEscapeString(const char *findString) { + char *buf = 0; + + if (!escStringCaseSensitive) { + stdstr(&buf, findString); + toupperstr(buf); + p->escPassSet.insert(StringSet::value_type(buf)); + delete [] buf; + } + else p->escPassSet.insert(StringSet::value_type(findString)); +} + +void SWBasicFilter::removeAllowedEscapeString(const char *findString) { + if (p->escPassSet.find(findString) != p->escPassSet.end()) { + p->escPassSet.erase( p->escPassSet.find(findString) ); + } +} + +void SWBasicFilter::addEscapeStringSubstitute(const char *findString, const char *replaceString) { + char *buf = 0; + + if (!escStringCaseSensitive) { + stdstr(&buf, findString); + toupperstr(buf); + p->escSubMap.insert(DualStringMap::value_type(buf, replaceString)); + delete [] buf; + } + else p->escSubMap.insert(DualStringMap::value_type(findString, replaceString)); +} + +void SWBasicFilter::removeEscapeStringSubstitute(const char *findString) { + if (p->escSubMap.find(findString) != p->escSubMap.end()) { + p->escSubMap.erase( p->escSubMap.find(findString) ); + } +} + +bool SWBasicFilter::substituteToken(SWBuf &buf, const char *token) { + DualStringMap::iterator it; + + if (!tokenCaseSensitive) { + char *tmp = 0; + stdstr(&tmp, token); + toupperstr(tmp); + it = p->tokenSubMap.find(tmp); + delete [] tmp; + } else + it = p->tokenSubMap.find(token); + + if (it != p->tokenSubMap.end()) { + buf += it->second.c_str(); + return true; + } + return false; +} + +void SWBasicFilter::appendEscapeString(SWBuf &buf, const char *escString) { + buf += escStart; + buf += escString; + buf += escEnd; +} + +bool SWBasicFilter::passAllowedEscapeString(SWBuf &buf, const char *escString) { + StringSet::iterator it; + + if (!escStringCaseSensitive) { + char *tmp = 0; + stdstr(&tmp, escString); + toupperstr(tmp); + it = p->escPassSet.find(tmp); + delete [] tmp; + } else + it = p->escPassSet.find(escString); + + if (it != p->escPassSet.end()) { + appendEscapeString(buf, escString); + return true; + } + + return false; +} + +bool SWBasicFilter::handleNumericEscapeString(SWBuf &buf, const char *escString) { + if (passThruNumericEsc) { + appendEscapeString(buf, escString); + return true; + } + return false; +} + +bool SWBasicFilter::substituteEscapeString(SWBuf &buf, const char *escString) { + DualStringMap::iterator it; + + if (*escString == '#') { + return handleNumericEscapeString(buf, escString); + } + + if (passAllowedEscapeString(buf, escString)) { + return true; + } + + if (!escStringCaseSensitive) { + char *tmp = 0; + stdstr(&tmp, escString); + toupperstr(tmp); + it = p->escSubMap.find(tmp); + delete [] tmp; + } else + it = p->escSubMap.find(escString); + + if (it != p->escSubMap.end()) { + buf += it->second.c_str(); + return true; + } + return false; +} + + +bool SWBasicFilter::handleToken(SWBuf &buf, const char *token, BasicFilterUserData *userData) { + return substituteToken(buf, token); +} + + +bool SWBasicFilter::handleEscapeString(SWBuf &buf, const char *escString, BasicFilterUserData *userData) { + return substituteEscapeString(buf, escString); +} + + +void SWBasicFilter::setEscapeStart(const char *escStart) { + stdstr(&(this->escStart), escStart); + escStartLen = strlen(escStart); +} + + +void SWBasicFilter::setEscapeEnd(const char *escEnd) { + stdstr(&(this->escEnd), escEnd); + escEndLen = strlen(escEnd); +} + + +void SWBasicFilter::setTokenStart(const char *tokenStart) { + stdstr(&(this->tokenStart), tokenStart); + tokenStartLen = strlen(tokenStart); +} + + +void SWBasicFilter::setTokenEnd(const char *tokenEnd) { + stdstr(&(this->tokenEnd), tokenEnd); + tokenEndLen = strlen(tokenEnd); +} + + +char SWBasicFilter::processText(SWBuf &text, const SWKey *key, const SWModule *module) { + char *from; + char token[4096]; + int tokpos = 0; + bool intoken = false; + bool inEsc = false; + int escStartPos = 0, escEndPos = 0; + int tokenStartPos = 0, tokenEndPos = 0; + SWBuf lastTextNode; + BasicFilterUserData *userData = createUserData(module, key); + + SWBuf orig = text; + from = orig.getRawData(); + text = ""; + + if (processStages & INITIALIZE) { + if (processStage(INITIALIZE, text, from, userData)) { // processStage handled it all + delete userData; + return 0; + } + } + + for (;*from; from++) { + + if (processStages & PRECHAR) { + if (processStage(PRECHAR, text, from, userData)) // processStage handled this char + continue; + } + + if (*from == tokenStart[tokenStartPos]) { + if (tokenStartPos == (tokenStartLen - 1)) { + intoken = true; + tokpos = 0; + token[0] = 0; + token[1] = 0; + token[2] = 0; + inEsc = false; + } + else tokenStartPos++; + continue; + } + + if (*from == escStart[escStartPos]) { + if (escStartPos == (escStartLen - 1)) { + intoken = true; + tokpos = 0; + token[0] = 0; + token[1] = 0; + token[2] = 0; + inEsc = true; + } + else escStartPos++; + continue; + } + + if (inEsc) { + if (*from == escEnd[escEndPos]) { + if (escEndPos == (escEndLen - 1)) { + intoken = inEsc = false; + userData->lastTextNode = lastTextNode; + + if (!userData->suspendTextPassThru) { //if text through is disabled no tokens should pass, too + if ((!handleEscapeString(text, token, userData)) && (passThruUnknownEsc)) { + appendEscapeString(text, token); + } + } + escEndPos = escStartPos = tokenEndPos = tokenStartPos = 0; + lastTextNode = ""; + continue; + } + } + } + + if (!inEsc) { + if (*from == tokenEnd[tokenEndPos]) { + if (tokenEndPos == (tokenEndLen - 1)) { + intoken = false; + userData->lastTextNode = lastTextNode; + if ((!handleToken(text, token, userData)) && (passThruUnknownToken)) { + text += tokenStart; + text += token; + text += tokenEnd; + } + escEndPos = escStartPos = tokenEndPos = tokenStartPos = 0; + lastTextNode = ""; + continue; + } + } + } + + if (intoken) { + if (tokpos < 4090) { + token[tokpos++] = *from; + token[tokpos+2] = 0; + } + } + else { + if ((!userData->supressAdjacentWhitespace) || (*from != ' ')) { + if (!userData->suspendTextPassThru) { + text.append(*from); + userData->lastSuspendSegment.size(0); + } + else userData->lastSuspendSegment.append(*from); + lastTextNode.append(*from); + } + userData->supressAdjacentWhitespace = false; + } + + if (processStages & POSTCHAR) + processStage(POSTCHAR, text, from, userData); + + } + + if (processStages & FINALIZE) + processStage(FINALIZE, text, from, userData); + + delete userData; + return 0; +} + +SWORD_NAMESPACE_END diff --git a/src/modules/filters/swoptfilter.cpp b/src/modules/filters/swoptfilter.cpp new file mode 100644 index 0000000..6921190 --- /dev/null +++ b/src/modules/filters/swoptfilter.cpp @@ -0,0 +1,47 @@ +/****************************************************************************** + * + * swoptfilter - SWFilter descendant and base class for all option filters + */ + + +#include <swoptfilter.h> +#include <utilstr.h> + +SWORD_NAMESPACE_START + + +SWOptionFilter::SWOptionFilter() { + static StringList empty; + static const char *empty2 = ""; + optName = empty2; + optTip = empty2; + optValues = ∅ +} + +SWOptionFilter::SWOptionFilter(const char *oName, const char *oTip, const StringList *oValues) { + optName = oName; + optTip = oTip; + optValues = oValues; +} + + +SWOptionFilter::~SWOptionFilter() { +} + + +void SWOptionFilter::setOptionValue(const char *ival) { + for (StringList::const_iterator loop = optValues->begin(); loop != optValues->end(); loop++) { + if (!stricmp(loop->c_str(), ival)) { + optionValue = *loop; + option = (!strnicmp(ival, "On", 2)); // convenience for boolean filters + break; + } + } +} + +const char *SWOptionFilter::getOptionValue() { + return optionValue; +} + + +SWORD_NAMESPACE_END diff --git a/src/modules/filters/teihtmlhref.cpp b/src/modules/filters/teihtmlhref.cpp new file mode 100644 index 0000000..1d213f4 --- /dev/null +++ b/src/modules/filters/teihtmlhref.cpp @@ -0,0 +1,205 @@ +/*************************************************************************** + teirtf.cpp - TEI to HTMLHREF filter + ------------------- + begin : 2006-07-03 + copyright : 2006 by CrossWire Bible Society + ***************************************************************************/ + +/*************************************************************************** + * * + * This program is free software; you can redistribute it and/or modify * + * it under the terms of the GNU General Public License as published by * + * the Free Software Foundation; either version 2 of the License, or * + * (at your option) any later version. * + * * + ***************************************************************************/ + +#include <stdlib.h> +#include <ctype.h> +#include <teihtmlhref.h> +#include <utilxml.h> +#include <swmodule.h> +#include <url.h> + + +SWORD_NAMESPACE_START + + +TEIHTMLHREF::MyUserData::MyUserData(const SWModule *module, const SWKey *key) : BasicFilterUserData(module, key) { + BiblicalText = false; + if (module) { + version = module->Name(); + BiblicalText = (!strcmp(module->Type(), "Biblical Texts")); + } +} + + +TEIHTMLHREF::TEIHTMLHREF() { + setTokenStart("<"); + setTokenEnd(">"); + + setEscapeStart("&"); + setEscapeEnd(";"); + + setEscapeStringCaseSensitive(true); + + addAllowedEscapeString("quot"); + addAllowedEscapeString("apos"); + addAllowedEscapeString("amp"); + addAllowedEscapeString("lt"); + addAllowedEscapeString("gt"); + + setTokenCaseSensitive(true); +} + +bool TEIHTMLHREF::handleToken(SWBuf &buf, const char *token, BasicFilterUserData *userData) { + // manually process if it wasn't a simple substitution + if (!substituteToken(buf, token)) { + MyUserData *u = (MyUserData *)userData; + XMLTag tag(token); + + if (!strcmp(tag.getName(), "p")) { + if ((!tag.isEndTag()) && (!tag.isEmpty())) { // non-empty start tag + buf += "<!P><br />"; + } + else if (tag.isEndTag()) { // end tag + buf += "<!/P><br />"; + //userData->supressAdjacentWhitespace = true; + } + else { // empty paragraph break marker + buf += "<!P><br />"; + //userData->supressAdjacentWhitespace = true; + } + } + + // <hi> + else if (!strcmp(tag.getName(), "hi")) { + if ((!tag.isEndTag()) && (!tag.isEmpty())) { + SWBuf rend = tag.getAttribute("rend"); + + u->lastHi = rend; + if (rend == "ital") + buf += "<i>"; + else if (rend == "bold") + buf += "<b>"; + else if (rend == "sup") + buf += "<small><sup>"; + + } + else if (tag.isEndTag()) { + SWBuf rend = u->lastHi; + if (rend == "ital") + buf += "</i>"; + else if (rend == "bold") + buf += "</b>"; + else if (rend == "sup") + buf += "</sup></small>"; + } + } + + // <entryFree> + else if (!strcmp(tag.getName(), "entryFree")) { + if ((!tag.isEndTag()) && (!tag.isEmpty())) { + SWBuf n = tag.getAttribute("n"); + if (n != "") { + buf += "<b>"; + buf += n; + buf += "</b>"; + } + } + } + + // <sense> + else if (!strcmp(tag.getName(), "sense")) { + if ((!tag.isEndTag()) && (!tag.isEmpty())) { + SWBuf n = tag.getAttribute("n"); + if (n != "") { + buf += "<br /><b>"; + buf += n; + buf += "</b>"; + } + } + } + + // <div> + else if (!strcmp(tag.getName(), "div")) { + + if ((!tag.isEndTag()) && (!tag.isEmpty())) { + buf += "<!P>"; + } + else if (tag.isEndTag()) { + } + } + + // <pos>, <gen>, <case>, <gram>, <number>, <mood>, <pron>, <def> + else if (!strcmp(tag.getName(), "pos") || + !strcmp(tag.getName(), "gen") || + !strcmp(tag.getName(), "case") || + !strcmp(tag.getName(), "gram") || + !strcmp(tag.getName(), "number") || + !strcmp(tag.getName(), "pron") || + !strcmp(tag.getName(), "def")) { + if ((!tag.isEndTag()) && (!tag.isEmpty())) { + buf += "<i>"; + } + else if (tag.isEndTag()) { + buf += "</i>"; + } + } + + // <tr> + else if (!strcmp(tag.getName(), "tr")) { + if ((!tag.isEndTag()) && (!tag.isEmpty())) { + buf += "<i>"; + } + else if (tag.isEndTag()) { + buf += "</i>"; + } + } + + // orth + else if (!strcmp(tag.getName(), "orth")) { + if ((!tag.isEndTag()) && (!tag.isEmpty())) { + buf += "<b>"; + } + else if (tag.isEndTag()) { + buf += "</b>"; + } + } + + // <etym>, <usg> + else if (!strcmp(tag.getName(), "etym") || + !strcmp(tag.getName(), "usg")) { + // do nothing here + } + + // <note> tag + else if (!strcmp(tag.getName(), "note")) { + if (!tag.isEndTag()) { + if (!tag.isEmpty()) { + u->suspendTextPassThru = true; + } + } + if (tag.isEndTag()) { + SWBuf footnoteNumber = tag.getAttribute("swordFootnote"); + + buf.appendFormatted("<a href=\"passagestudy.jsp?action=showNote&type=n&value=%s&module=%s&passage=%s\"><small><sup>*n</sup></small></a>", + URL::encode(footnoteNumber.c_str()).c_str(), + URL::encode(u->version.c_str()).c_str(), + URL::encode(u->key->getText()).c_str()); + + u->suspendTextPassThru = false; + } + } + + else { + return false; // we still didn't handle token + } + + } + return true; +} + + +SWORD_NAMESPACE_END + diff --git a/src/modules/filters/teiplain.cpp b/src/modules/filters/teiplain.cpp new file mode 100644 index 0000000..c721d84 --- /dev/null +++ b/src/modules/filters/teiplain.cpp @@ -0,0 +1,116 @@ +/*************************************************************************** + teiplain.cpp - TEI to Plaintext filter + ------------------- + begin : 2006-07-05 + copyright : 2006 by CrossWire Bible Society + ***************************************************************************/ + +/*************************************************************************** + * * + * This program is free software; you can redistribute it and/or modify * + * it under the terms of the GNU General Public License as published by * + * the Free Software Foundation; either version 2 of the License, or * + * (at your option) any later version. * + * * + ***************************************************************************/ + +#include <stdlib.h> +#include <teiplain.h> +#include <ctype.h> + +SWORD_NAMESPACE_START + +TEIPlain::TEIPlain() { + setTokenStart("<"); + setTokenEnd(">"); + + setEscapeStart("&"); + setEscapeEnd(";"); + + setEscapeStringCaseSensitive(true); + + addEscapeStringSubstitute("amp", "&"); + addEscapeStringSubstitute("apos", "'"); + addEscapeStringSubstitute("lt", "<"); + addEscapeStringSubstitute("gt", ">"); + addEscapeStringSubstitute("quot", "\""); + + setTokenCaseSensitive(true); +} + + +bool TEIPlain::handleToken(SWBuf &buf, const char *token, BasicFilterUserData *userData) { + // manually process if it wasn't a simple substitution + if (!substituteToken(buf, token)) { + //MyUserData *u = (MyUserData *)userData; + XMLTag tag(token); + + // <p> paragraph tag + if (!strcmp(tag.getName(), "p")) { + if ((!tag.isEndTag()) && (!tag.isEmpty())) { // non-empty start tag + buf += "\n"; + } + else if (tag.isEndTag()) { // end tag + buf += "\n"; + userData->supressAdjacentWhitespace = true; + } + else { // empty paragraph break marker + buf += "\n\n"; + userData->supressAdjacentWhitespace = true; + } + } + + // <entryFree> + else if (!strcmp(tag.getName(), "entryFree")) { + SWBuf n = tag.getAttribute("n"); + if ((!tag.isEndTag()) && (!tag.isEmpty())) { + if (n != "") { + buf += n; + buf += ". "; + } + } + } + + // <sense> + else if (!strcmp(tag.getName(), "sense")) { + SWBuf n = tag.getAttribute("n"); + if ((!tag.isEndTag()) && (!tag.isEmpty())) { + if (n != "") { + buf += n; + buf += ". "; + } + } + else if (tag.isEndTag()) { + buf += "\n"; + } + } + + // <div> + else if (!strcmp(tag.getName(), "div")) { + + if ((!tag.isEndTag()) && (!tag.isEmpty())) { + buf.append("\n\n\n"); + } + else if (tag.isEndTag()) { + } + } + + // <etym> + else if (!strcmp(tag.getName(), "etym")) { + if ((!tag.isEndTag()) && (!tag.isEmpty())) { + buf += "["; + } + else if (tag.isEndTag()) { + buf += "]"; + } + } + + else { + return false; // we still didn't handle token + } + } + return true; +} + + +SWORD_NAMESPACE_END diff --git a/src/modules/filters/teirtf.cpp b/src/modules/filters/teirtf.cpp new file mode 100644 index 0000000..006f099 --- /dev/null +++ b/src/modules/filters/teirtf.cpp @@ -0,0 +1,182 @@ +/*************************************************************************** + teirtf.cpp - TEI to RTF filter + ------------------- + begin : 2006-07-03 + copyright : 2006 by CrossWire Bible Society + ***************************************************************************/ + +/*************************************************************************** + * * + * This program is free software; you can redistribute it and/or modify * + * it under the terms of the GNU General Public License as published by * + * the Free Software Foundation; either version 2 of the License, or * + * (at your option) any later version. * + * * + ***************************************************************************/ + +#include <stdlib.h> +#include <ctype.h> +#include <teirtf.h> +#include <utilxml.h> +#include <swmodule.h> +#include <versekey.h> + +SWORD_NAMESPACE_START + + +TEIRTF::MyUserData::MyUserData(const SWModule *module, const SWKey *key) : BasicFilterUserData(module, key) { + BiblicalText = false; + if (module) { + version = module->Name(); + BiblicalText = (!strcmp(module->Type(), "Biblical Texts")); + } +} + + +TEIRTF::TEIRTF() { + setTokenStart("<"); + setTokenEnd(">"); + + setEscapeStart("&"); + setEscapeEnd(";"); + + setEscapeStringCaseSensitive(true); + + addEscapeStringSubstitute("amp", "&"); + addEscapeStringSubstitute("apos", "'"); + addEscapeStringSubstitute("lt", "<"); + addEscapeStringSubstitute("gt", ">"); + addEscapeStringSubstitute("quot", "\""); + + setTokenCaseSensitive(true); +} + + +bool TEIRTF::handleToken(SWBuf &buf, const char *token, BasicFilterUserData *userData) { + // manually process if it wasn't a simple substitution + if (!substituteToken(buf, token)) { + MyUserData *u = (MyUserData *)userData; + XMLTag tag(token); + + // <p> paragraph tag + if (!strcmp(tag.getName(), "p")) { + if (!tag.isEndTag()) { // non-empty start tag + buf += "{\\sb100\\fi200\\par}"; + } + } + + // <hi> + else if (!strcmp(tag.getName(), "hi")) { + SWBuf rend = tag.getAttribute("rend"); + if ((!tag.isEndTag()) && (!tag.isEmpty())) { + if (rend == "ital") + buf += "{\\i1 "; + else if (rend == "bold") + buf += "{\\b1 "; + else if (rend == "sup") + buf += "{\\super "; + + } + else if (tag.isEndTag()) { + buf += "}"; + } + } + + // <entryFree> + else if (!strcmp(tag.getName(), "entryFree")) { + SWBuf n = tag.getAttribute("n"); + if ((!tag.isEndTag()) && (!tag.isEmpty())) { + if (n != "") { + buf += "{\\b1 "; + buf += n; + buf += ". }"; } + } + } + + // <sense> + else if (!strcmp(tag.getName(), "sense")) { + SWBuf n = tag.getAttribute("n"); + if ((!tag.isEndTag()) && (!tag.isEmpty())) { + if (n != "") { + buf += "{\\sb100\\par\\b1 "; + buf += n; + buf += ". }"; + } + } + } + + // <div> + else if (!strcmp(tag.getName(), "div")) { + + if ((!tag.isEndTag()) && (!tag.isEmpty())) { + buf.append("{\\pard\\sa300}"); + } + else if (tag.isEndTag()) { + } + } + + // <pos>, <gen>, <case>, <gram>, <number>, <mood> + else if (!strcmp(tag.getName(), "pos") || !strcmp(tag.getName(), "gen") || !strcmp(tag.getName(), "case") || !strcmp(tag.getName(), "gram") || !strcmp(tag.getName(), "number") || !strcmp(tag.getName(), "mood")) { + if ((!tag.isEndTag()) && (!tag.isEmpty())) { + buf += "{\\i1 "; + } + else if (tag.isEndTag()) { + buf += "}"; + } + } + + // <tr> + else if (!strcmp(tag.getName(), "tr")) { + if ((!tag.isEndTag()) && (!tag.isEmpty())) { + buf += "{\\i1 "; + } + else if (tag.isEndTag()) { + buf += "}"; + } + } + + // <etym> + else if (!strcmp(tag.getName(), "etym")) { + if ((!tag.isEndTag()) && (!tag.isEmpty())) { + buf += "["; + } + else if (tag.isEndTag()) { + buf += "]"; + } + } + + // <note> tag + else if (!strcmp(tag.getName(), "note")) { + if (!tag.isEndTag()) { + if (!tag.isEmpty()) { + SWBuf type = tag.getAttribute("type"); + + SWBuf footnoteNumber = tag.getAttribute("swordFootnote"); + VerseKey *vkey; + // see if we have a VerseKey * or descendant + SWTRY { + vkey = SWDYNAMIC_CAST(VerseKey, u->key); + } + SWCATCH ( ... ) { } + if (vkey) { + buf.appendFormatted("{\\super <a href=\"\">*%s</a>} ", footnoteNumber.c_str()); + } + u->suspendTextPassThru = true; + } + } + if (tag.isEndTag()) { + u->suspendTextPassThru = false; + } + } + + else { + return false; // we still didn't handle token + } + + } + return true; +} + + +SWORD_NAMESPACE_END + diff --git a/src/modules/filters/thmlfootnotes.cpp b/src/modules/filters/thmlfootnotes.cpp new file mode 100644 index 0000000..23c43b4 --- /dev/null +++ b/src/modules/filters/thmlfootnotes.cpp @@ -0,0 +1,124 @@ +/****************************************************************************** + * + * thmlfootnotes - SWFilter descendant to hide or show footnotes + * in a ThML module. + */ + + +#include <stdlib.h> +#include <stdio.h> +#include <thmlfootnotes.h> +#include <swmodule.h> +#include <swbuf.h> +#include <versekey.h> +#include <utilxml.h> + +SWORD_NAMESPACE_START + +const char oName[] = "Footnotes"; +const char oTip[] = "Toggles Footnotes On and Off if they exist"; + +const SWBuf choices[3] = {"Off", "On", ""}; +const StringList oValues(&choices[0], &choices[2]); + +ThMLFootnotes::ThMLFootnotes() : SWOptionFilter(oName, oTip, &oValues) { + setOptionValue("Off"); +} + + +ThMLFootnotes::~ThMLFootnotes() { +} + + +char ThMLFootnotes::processText(SWBuf &text, const SWKey *key, const SWModule *module) { + SWBuf token; + bool intoken = false; + bool hide = false; + SWBuf tagText; + XMLTag startTag; + SWBuf refs = ""; + int footnoteNum = 1; + char buf[254]; + VerseKey parser = key->getText(); + + SWBuf orig = text; + const char *from = orig.c_str(); + + for (text = ""; *from; from++) { + if (*from == '<') { + intoken = true; + token = ""; + continue; + } + if (*from == '>') { // process tokens + intoken = false; + + XMLTag tag(token); + if (!strcmp(tag.getName(), "note")) { + if (!tag.isEndTag()) { + if (!tag.isEmpty()) { + refs = ""; + startTag = tag; + hide = true; + tagText = ""; + continue; + } + } + if (hide && tag.isEndTag()) { + if (module->isProcessEntryAttributes()) { + SWBuf fc = module->getEntryAttributes()["Footnote"]["count"]["value"]; + footnoteNum = (fc.length()) ? atoi(fc.c_str()) : 0; + sprintf(buf, "%i", ++footnoteNum); + module->getEntryAttributes()["Footnote"]["count"]["value"] = buf; + StringList attributes = startTag.getAttributeNames(); + for (StringList::iterator it = attributes.begin(); it != attributes.end(); it++) { + module->getEntryAttributes()["Footnote"][buf][it->c_str()] = startTag.getAttribute(it->c_str()); + } + module->getEntryAttributes()["Footnote"][buf]["body"] = tagText; + startTag.setAttribute("swordFootnote", buf); + if ((startTag.getAttribute("type")) && (!strcmp(startTag.getAttribute("type"), "crossReference"))) { + if (!refs.length()) + refs = parser.ParseVerseList(tagText.c_str(), parser, true).getRangeText(); + module->getEntryAttributes()["Footnote"][buf]["refList"] = refs.c_str(); + } + } + hide = false; + if ((option) || ((startTag.getAttribute("type") && (!strcmp(startTag.getAttribute("type"), "crossReference"))))) { // we want the tag in the text; crossReferences are handled by another filter + text += startTag; + text.append(tagText); + } + else continue; + } + } + + // if not a note token, keep token in text + if ((!strcmp(tag.getName(), "scripRef")) && (!tag.isEndTag())) { + SWBuf osisRef = tag.getAttribute("passage"); + if (refs.length()) + refs += "; "; + refs += osisRef; + } + if (!hide) { + text += '<'; + text.append(token); + text += '>'; + } + else { + tagText += '<'; + tagText.append(token); + tagText += '>'; + } + continue; + } + if (intoken) { //copy token + token += *from; + } + else if (!hide) { //copy text which is not inside a token + text += *from; + } + else tagText += *from; + } + return 0; +} + +SWORD_NAMESPACE_END diff --git a/src/modules/filters/thmlgbf.cpp b/src/modules/filters/thmlgbf.cpp new file mode 100644 index 0000000..f8703b1 --- /dev/null +++ b/src/modules/filters/thmlgbf.cpp @@ -0,0 +1,291 @@ +/*************************************************************************** + thmlgbf.cpp - ThML to GBF filter + ------------------- + begin : 1999-10-28 + copyright : 2001 by CrossWire Bible Society + ***************************************************************************/ + +/*************************************************************************** + * * + * This program is free software; you can redistribute it and/or modify * + * it under the terms of the GNU General Public License as published by * + * the Free Software Foundation; either version 2 of the License, or * + * (at your option) any later version. * + * * + ***************************************************************************/ + +#include <stdlib.h> +#include <thmlgbf.h> +#include <utilstr.h> +#include <swbuf.h> + +SWORD_NAMESPACE_START + +ThMLGBF::ThMLGBF() +{ +} + + +char ThMLGBF::processText(SWBuf &text, const SWKey *key, const SWModule *module) { + const char *from; + char token[2048]; + int tokpos = 0; + bool intoken = false; + bool ampersand = false; + bool sechead = false; + bool title = false; + + SWBuf orig = text; + from = orig.c_str(); + + for (text = ""; *from; from++) { + if (*from == '<') { + intoken = true; + tokpos = 0; + token[0] = 0; + token[1] = 0; + token[2] = 0; + ampersand = false; + continue; + } + else if (*from == '&') { + intoken = true; + tokpos = 0; + memset(token, 0, 2048); + ampersand = true; + continue; + } + if (*from == ';' && ampersand) { + intoken = false; + + if (!strncmp("nbsp", token, 4)) text += ' '; + else if (!strncmp("quot", token, 4)) text += '"'; + else if (!strncmp("amp", token, 3)) text += '&'; + else if (!strncmp("lt", token, 2)) text += '<'; + else if (!strncmp("gt", token, 2)) text += '>'; + else if (!strncmp("brvbar", token, 6)) text += '¦'; + else if (!strncmp("sect", token, 4)) text += '§'; + else if (!strncmp("copy", token, 4)) text += '©'; + else if (!strncmp("laquo", token, 5)) text += '«'; + else if (!strncmp("reg", token, 3)) text += '®'; + else if (!strncmp("acute", token, 5)) text += '´'; + else if (!strncmp("para", token, 4)) text += '¶'; + else if (!strncmp("raquo", token, 5)) text += '»'; + + else if (!strncmp("Aacute", token, 6)) text += 'Á'; + else if (!strncmp("Agrave", token, 6)) text += 'À'; + else if (!strncmp("Acirc", token, 5)) text += 'Â'; + else if (!strncmp("Auml", token, 4)) text += 'Ä'; + else if (!strncmp("Atilde", token, 6)) text += 'Ã'; + else if (!strncmp("Aring", token, 5)) text += 'Å'; + else if (!strncmp("aacute", token, 6)) text += 'á'; + else if (!strncmp("agrave", token, 6)) text += 'à'; + else if (!strncmp("acirc", token, 5)) text += 'â'; + else if (!strncmp("auml", token, 4)) text += 'ä'; + else if (!strncmp("atilde", token, 6)) text += 'ã'; + else if (!strncmp("aring", token, 5)) text += 'å'; + else if (!strncmp("Eacute", token, 6)) text += 'É'; + else if (!strncmp("Egrave", token, 6)) text += 'È'; + else if (!strncmp("Ecirc", token, 5)) text += 'Ê'; + else if (!strncmp("Euml", token, 4)) text += 'Ë'; + else if (!strncmp("eacute", token, 6)) text += 'é'; + else if (!strncmp("egrave", token, 6)) text += 'è'; + else if (!strncmp("ecirc", token, 5)) text += 'ê'; + else if (!strncmp("euml", token, 4)) text += 'ë'; + else if (!strncmp("Iacute", token, 6)) text += 'Í'; + else if (!strncmp("Igrave", token, 6)) text += 'Ì'; + else if (!strncmp("Icirc", token, 5)) text += 'Î'; + else if (!strncmp("Iuml", token, 4)) text += 'Ï'; + else if (!strncmp("iacute", token, 6)) text += 'í'; + else if (!strncmp("igrave", token, 6)) text += 'ì'; + else if (!strncmp("icirc", token, 5)) text += 'î'; + else if (!strncmp("iuml", token, 4)) text += 'ï'; + else if (!strncmp("Oacute", token, 6)) text += 'Ó'; + else if (!strncmp("Ograve", token, 6)) text += 'Ò'; + else if (!strncmp("Ocirc", token, 5)) text += 'Ô'; + else if (!strncmp("Ouml", token, 4)) text += 'Ö'; + else if (!strncmp("Otilde", token, 6)) text += 'Õ'; + else if (!strncmp("oacute", token, 6)) text += 'ó'; + else if (!strncmp("ograve", token, 6)) text += 'ò'; + else if (!strncmp("ocirc", token, 5)) text += 'ô'; + else if (!strncmp("ouml", token, 4)) text += 'ö'; + else if (!strncmp("otilde", token, 6)) text += 'õ'; + else if (!strncmp("Uacute", token, 6)) text += 'Ú'; + else if (!strncmp("Ugrave", token, 6)) text += 'Ù'; + else if (!strncmp("Ucirc", token, 5)) text += 'Û'; + else if (!strncmp("Uuml", token, 4)) text += 'Ü'; + else if (!strncmp("uacute", token, 6)) text += 'ú'; + else if (!strncmp("ugrave", token, 6)) text += 'ù'; + else if (!strncmp("ucirc", token, 5)) text += 'û'; + else if (!strncmp("uuml", token, 4)) text += 'ü'; + else if (!strncmp("Yacute", token, 6)) text += 'Ý'; + else if (!strncmp("yacute", token, 6)) text += 'ý'; + else if (!strncmp("yuml", token, 4)) text += 'ÿ'; + + else if (!strncmp("deg", token, 3)) text += '°'; + else if (!strncmp("plusmn", token, 6)) text += '±'; + else if (!strncmp("sup2", token, 4)) text += '²'; + else if (!strncmp("sup3", token, 4)) text += '³'; + else if (!strncmp("sup1", token, 4)) text += '¹'; + else if (!strncmp("nbsp", token, 4)) text += 'º'; + else if (!strncmp("pound", token, 5)) text += '£'; + else if (!strncmp("cent", token, 4)) text += '¢'; + else if (!strncmp("frac14", token, 6)) text += '¼'; + else if (!strncmp("frac12", token, 6)) text += '½'; + else if (!strncmp("frac34", token, 6)) text += '¾'; + else if (!strncmp("iquest", token, 6)) text += '¿'; + else if (!strncmp("iexcl", token, 5)) text += '¡'; + else if (!strncmp("ETH", token, 3)) text += 'Ð'; + else if (!strncmp("eth", token, 3)) text += 'ð'; + else if (!strncmp("THORN", token, 5)) text += 'Þ'; + else if (!strncmp("thorn", token, 5)) text += 'þ'; + else if (!strncmp("AElig", token, 5)) text += 'Æ'; + else if (!strncmp("aelig", token, 5)) text += 'æ'; + else if (!strncmp("Oslash", token, 6)) text += 'Ø'; + else if (!strncmp("curren", token, 6)) text += '¤'; + else if (!strncmp("Ccedil", token, 6)) text += 'Ç'; + else if (!strncmp("ccedil", token, 6)) text += 'ç'; + else if (!strncmp("szlig", token, 5)) text += 'ß'; + else if (!strncmp("Ntilde", token, 6)) text += 'Ñ'; + else if (!strncmp("ntilde", token, 6)) text += 'ñ'; + else if (!strncmp("yen", token, 3)) text += '¥'; + else if (!strncmp("not", token, 3)) text += '¬'; + else if (!strncmp("ordf", token, 4)) text += 'ª'; + else if (!strncmp("uml", token, 3)) text += '¨'; + else if (!strncmp("shy", token, 3)) text += '­'; + else if (!strncmp("macr", token, 4)) text += '¯'; + else if (!strncmp("micro", token, 5)) text += "µ"; + else if (!strncmp("middot", token, 6)) text +="·"; + else if (!strncmp("cedil", token, 5)) text += "¸"; + else if (!strncmp("ordm", token, 4)) text += "º"; + else if (!strncmp("times", token, 5)) text += "×"; + else if (!strncmp("divide", token, 6)) text +="÷"; + else if (!strncmp("oslash", token, 6)) text +="ø"; + continue; + + } + else if (*from == '>' && !ampersand) { + intoken = false; + // process desired tokens + if (!strncmp(token, "sync type=\"Strongs\" value=\"", 27)) { + text += "<W"; + for (unsigned int i = 27; token[i] != '\"'; i++) + text += token[i]; + text += '>'; + continue; + } + if (!strncmp(token, "sync type=\"morph\" value=\"", 25)) { + text += "<WT"; + for (unsigned int i = 25; token[i] != '\"'; i++) + text += token[i]; + text += '>'; + continue; + } + else if (!strncmp(token, "scripRef", 8)) { + text += "<RX>"; + continue; + } + else if (!strncmp(token, "/scripRef", 9)) { + text += "<Rx>"; + continue; + } + else if (!strncmp(token, "note", 4)) { + text += "<RF>"; + continue; + } + else if (!strncmp(token, "/note", 5)) { + text += "<Rf>"; + continue; + } + else if (!strncmp(token, "sup", 3)) { + text += "<FS>"; + } + else if (!strncmp(token, "/sup", 4)) { + text += "<Fs>"; + } + else if (!strnicmp(token, "font color=#ff0000", 18)) { + text += "<FR>"; + continue; + } + else if (!strnicmp(token, "/font", 5)) { + text += "<Fr>"; + continue; + } + else if (!strncmp(token, "div class=\"sechead\"", 19)) { + text += "<TS>"; + sechead = true; + continue; + } + else if (sechead && !strncmp(token, "/div", 19)) { + text += "<Ts>"; + sechead = false; + continue; + } + else if (!strncmp(token, "div class=\"title\"", 19)) { + text += "<TT>"; + title = true; + continue; + } + else if (title && !strncmp(token, "/div", 19)) { + text += "<Tt>"; + title = false; + continue; + } + else if (!strnicmp(token, "br", 2)) { + text += "<CL>"; + continue; + } + else switch(*token) { + case 'I': // font tags + case 'i': + text += "<FI>"; + continue; + case 'B': // bold start + case 'b': + text += "<FB>"; + continue; + case '/': + switch(token[1]) { + case 'P': + case 'p': + text += "<CM>"; + continue; + case 'I': + case 'i': // italic end + text += "<Fi>"; + continue; + case 'B': // bold start + case 'b': + text += "<Fb>"; + continue; + } + } + continue; + } + if (intoken) { + if (tokpos < 2045) + token[tokpos++] = *from; + token[tokpos+2] = 0; + } + else text += *from; + } + + orig = text; + from = orig.c_str(); + for (text = ""; *from; from++) { //loop to remove extra spaces + if ((strchr(" \t\n\r", *from))) { + while (*(from+1) && (strchr(" \t\n\r", *(from+1)))) { + from++; + } + text += " "; + } + else { + text += *from; + } + } + text += (char)0; + + return 0; +} + +SWORD_NAMESPACE_END diff --git a/src/modules/filters/thmlheadings.cpp b/src/modules/filters/thmlheadings.cpp new file mode 100644 index 0000000..4d6134f --- /dev/null +++ b/src/modules/filters/thmlheadings.cpp @@ -0,0 +1,153 @@ +/****************************************************************************** + * + * thmlheadings - SWFilter descendant to hide or show headings + * in a ThML module. + */ + + +#include <stdlib.h> +#include <stdio.h> +#include <thmlheadings.h> +#include <utilxml.h> +#include <utilstr.h> +#include <swmodule.h> +#include <stdio.h> + +SWORD_NAMESPACE_START + +const char oName[] = "Headings"; +const char oTip[] = "Toggles Headings On and Off if they exist"; + +const SWBuf choices[3] = {"Off", "On", ""}; +const StringList oValues(&choices[0], &choices[2]); + +ThMLHeadings::ThMLHeadings() : SWOptionFilter(oName, oTip, &oValues) { + setOptionValue("Off"); +} + + +ThMLHeadings::~ThMLHeadings() { +} + + +char ThMLHeadings::processText(SWBuf &text, const SWKey *key, const SWModule *module) { + SWBuf token; + bool intoken = false; + bool isheader = false; + bool hide = false; + bool preverse = false; + bool withinDiv = false; + SWBuf header; + int headerNum = 0; + int pvHeaderNum = 0; + char buf[254]; + XMLTag startTag; + + SWBuf orig = text; + const char *from = orig.c_str(); + + XMLTag tag; + + for (text = ""; *from; ++from) { + if (*from == '<') { + intoken = true; + token = ""; + + continue; + } + if (*from == '>') { // process tokens + intoken = false; + + if (!strnicmp(token.c_str(), "div", 3) || !strnicmp(token.c_str(), "/div", 4)) { + withinDiv = (!strnicmp(token.c_str(), "div", 3)); + tag = token; + if (hide && tag.isEndTag()) { + if (module->isProcessEntryAttributes() && (option || (!preverse))) { + if (preverse) { + sprintf(buf, "%i", pvHeaderNum++); + module->getEntryAttributes()["Heading"]["Preverse"][buf] = header; + } + else { + sprintf(buf, "%i", headerNum++); + module->getEntryAttributes()["Heading"]["Interverse"][buf] = header; + if (option) { // we want the tag in the text + text.append(header); + } + } + + StringList attributes = startTag.getAttributeNames(); + for (StringList::const_iterator it = attributes.begin(); it != attributes.end(); it++) { + module->getEntryAttributes()["Heading"][buf][it->c_str()] = startTag.getAttribute(it->c_str()); + } + } + + hide = false; + if (!option || preverse) { // we don't want the tag in the text anymore + preverse = false; + continue; + } + preverse = false; + } + if (tag.getAttribute("class") && ((!stricmp(tag.getAttribute("class"), "sechead")) + || (!stricmp(tag.getAttribute("class"), "title")))) { + + isheader = true; + + if (!tag.isEndTag()) { //start tag + if (!tag.isEmpty()) { + startTag = tag; + +/* how do we tell a ThML preverse title from one that should be in the text? probably if any text is before the title... just assuming all are preverse for now + } + if (tag.getAttribute("subtype") && !stricmp(tag.getAttribute("subtype"), "x-preverse")) { +*/ + hide = true; + preverse = true; + header = ""; + continue; + } // move back up under startTag = tag + } +/* this is where non-preverse will go eventually + if (!tag.isEndTag()) { //start tag + hide = true; + header = ""; + if (option) { // we want the tag in the text + text.append('<'); + text.append(token); + text.append('>'); + } + continue; + } +*/ + } + else + isheader = false; + } + + if (withinDiv && isheader) { + header.append('<'); + header.append(token); + header.append('>'); + } else { + // if not a heading token, keep token in text + if (!hide) { + text.append('<'); + text.append(token); + text.append('>'); + } + } + continue; + } + if (intoken) { //copy token + token.append(*from); + } + else if (!hide) { //copy text which is not inside a token + text.append(*from); + } + else header.append(*from); + } + return 0; +} + + +SWORD_NAMESPACE_END diff --git a/src/modules/filters/thmlhtml.cpp b/src/modules/filters/thmlhtml.cpp new file mode 100644 index 0000000..efb09cd --- /dev/null +++ b/src/modules/filters/thmlhtml.cpp @@ -0,0 +1,236 @@ +/*************************************************************************** + thmlhtml.cpp - ThML to HTML filter + ------------------- + begin : 1999-10-27 + copyright : 2001 by CrossWire Bible Society + ***************************************************************************/ + +/*************************************************************************** + * * + * This program is free software; you can redistribute it and/or modify * + * it under the terms of the GNU General Public License as published by * + * the Free Software Foundation; either version 2 of the License, or * + * (at your option) any later version. * + * * + ***************************************************************************/ + +#include <stdlib.h> +#include <thmlhtml.h> +#include <swmodule.h> +#include <utilxml.h> + +SWORD_NAMESPACE_START + +ThMLHTML::ThMLHTML() { + setTokenStart("<"); + setTokenEnd(">"); + + setEscapeStart("&"); + setEscapeEnd(";"); + + setEscapeStringCaseSensitive(true); + setPassThruNumericEscapeString(true); + + addAllowedEscapeString("quot"); + addAllowedEscapeString("amp"); + addAllowedEscapeString("lt"); + addAllowedEscapeString("gt"); + + addAllowedEscapeString("nbsp"); + addAllowedEscapeString("brvbar"); // "¦" + addAllowedEscapeString("sect"); // "§" + addAllowedEscapeString("copy"); // "©" + addAllowedEscapeString("laquo"); // "«" + addAllowedEscapeString("reg"); // "®" + addAllowedEscapeString("acute"); // "´" + addAllowedEscapeString("para"); // "¶" + addAllowedEscapeString("raquo"); // "»" + + addAllowedEscapeString("Aacute"); // "Á" + addAllowedEscapeString("Agrave"); // "À" + addAllowedEscapeString("Acirc"); // "Â" + addAllowedEscapeString("Auml"); // "Ä" + addAllowedEscapeString("Atilde"); // "Ã" + addAllowedEscapeString("Aring"); // "Å" + addAllowedEscapeString("aacute"); // "á" + addAllowedEscapeString("agrave"); // "à" + addAllowedEscapeString("acirc"); // "â" + addAllowedEscapeString("auml"); // "ä" + addAllowedEscapeString("atilde"); // "ã" + addAllowedEscapeString("aring"); // "å" + addAllowedEscapeString("Eacute"); // "É" + addAllowedEscapeString("Egrave"); // "È" + addAllowedEscapeString("Ecirc"); // "Ê" + addAllowedEscapeString("Euml"); // "Ë" + addAllowedEscapeString("eacute"); // "é" + addAllowedEscapeString("egrave"); // "è" + addAllowedEscapeString("ecirc"); // "ê" + addAllowedEscapeString("euml"); // "ë" + addAllowedEscapeString("Iacute"); // "Í" + addAllowedEscapeString("Igrave"); // "Ì" + addAllowedEscapeString("Icirc"); // "Î" + addAllowedEscapeString("Iuml"); // "Ï" + addAllowedEscapeString("iacute"); // "í" + addAllowedEscapeString("igrave"); // "ì" + addAllowedEscapeString("icirc"); // "î" + addAllowedEscapeString("iuml"); // "ï" + addAllowedEscapeString("Oacute"); // "Ó" + addAllowedEscapeString("Ograve"); // "Ò" + addAllowedEscapeString("Ocirc"); // "Ô" + addAllowedEscapeString("Ouml"); // "Ö" + addAllowedEscapeString("Otilde"); // "Õ" + addAllowedEscapeString("oacute"); // "ó" + addAllowedEscapeString("ograve"); // "ò" + addAllowedEscapeString("ocirc"); // "ô" + addAllowedEscapeString("ouml"); // "ö" + addAllowedEscapeString("otilde"); // "õ" + addAllowedEscapeString("Uacute"); // "Ú" + addAllowedEscapeString("Ugrave"); // "Ù" + addAllowedEscapeString("Ucirc"); // "Û" + addAllowedEscapeString("Uuml"); // "Ü" + addAllowedEscapeString("uacute"); // "ú" + addAllowedEscapeString("ugrave"); // "ù" + addAllowedEscapeString("ucirc"); // "û" + addAllowedEscapeString("uuml"); // "ü" + addAllowedEscapeString("Yacute"); // "Ý" + addAllowedEscapeString("yacute"); // "ý" + addAllowedEscapeString("yuml"); // "ÿ" + + addAllowedEscapeString("deg"); // "°" + addAllowedEscapeString("plusmn"); // "±" + addAllowedEscapeString("sup2"); // "²" + addAllowedEscapeString("sup3"); // "³" + addAllowedEscapeString("sup1"); // "¹" + addAllowedEscapeString("nbsp"); // "º" + addAllowedEscapeString("pound"); // "£" + addAllowedEscapeString("cent"); // "¢" + addAllowedEscapeString("frac14"); // "¼" + addAllowedEscapeString("frac12"); // "½" + addAllowedEscapeString("frac34"); // "¾" + addAllowedEscapeString("iquest"); // "¿" + addAllowedEscapeString("iexcl"); // "¡" + addAllowedEscapeString("ETH"); // "Ð" + addAllowedEscapeString("eth"); // "ð" + addAllowedEscapeString("THORN"); // "Þ" + addAllowedEscapeString("thorn"); // "þ" + addAllowedEscapeString("AElig"); // "Æ" + addAllowedEscapeString("aelig"); // "æ" + addAllowedEscapeString("Oslash"); // "Ø" + addAllowedEscapeString("curren"); // "¤" + addAllowedEscapeString("Ccedil"); // "Ç" + addAllowedEscapeString("ccedil"); // "ç" + addAllowedEscapeString("szlig"); // "ß" + addAllowedEscapeString("Ntilde"); // "Ñ" + addAllowedEscapeString("ntilde"); // "ñ" + addAllowedEscapeString("yen"); // "¥" + addAllowedEscapeString("not"); // "¬" + addAllowedEscapeString("ordf"); // "ª" + addAllowedEscapeString("uml"); // "¨" + addAllowedEscapeString("shy"); // "­" + addAllowedEscapeString("macr"); // "¯" + + addAllowedEscapeString("micro"); // "µ" + addAllowedEscapeString("middot"); // "·" + addAllowedEscapeString("cedil"); // "¸" + addAllowedEscapeString("ordm"); // "º" + addAllowedEscapeString("times"); // "×" + addAllowedEscapeString("divide"); // "÷" + addAllowedEscapeString("oslash"); // "ø" + + setTokenCaseSensitive(true); + + addTokenSubstitute("note", " <font color=\"#800000\"><small>("); + addTokenSubstitute("/note", ")</small></font> "); +} + + +bool ThMLHTML::handleToken(SWBuf &buf, const char *token, BasicFilterUserData *userData) { + if (!substituteToken(buf, token)) { // manually process if it wasn't a simple substitution + MyUserData *u = (MyUserData *)userData; + XMLTag tag(token); + if (!strcmp(tag.getName(), "sync")) { + if (tag.getAttribute("type") && tag.getAttribute("value") && !strcmp(tag.getAttribute("type"), "Strongs")) { + const char* value = tag.getAttribute("value"); + if (*value == 'H' || *value == 'G' || *value == 'A') { + value++; + buf += "<small><em>"; + buf += value; + buf += "</em></small>"; + } + else if (*value == 'T') { + value += 2; + + buf += "<small><i>"; + buf += value; + buf += "</i></small>"; + } + } + else if (tag.getAttribute("type") && tag.getAttribute("value") && !strcmp(tag.getAttribute("type"), "morph")) { + buf += "<small><em>"; + buf += tag.getAttribute("value"); + buf += "</em></small>"; + } + else if (tag.getAttribute("type") && tag.getAttribute("value") && !strcmp(tag.getAttribute("type"), "lemma")) { + buf += "<small><em>("; + buf += tag.getAttribute("value"); + buf += ")</em></small>"; + } + } + else if (!strcmp(tag.getName(), "div")) { + if (tag.isEndTag() && (u->SecHead)) { + buf += "</i></b><br />"; + u->SecHead = false; + } + else if (tag.getAttribute("class")) { + if (!strcmp(tag.getAttribute("class"), "sechead")) { + u->SecHead = true; + buf += "<br /><b><i>"; + } + else if (!strcmp(tag.getAttribute("class"), "title")) { + u->SecHead = true; + buf += "<br /><b><i>"; + } + } + } + else if (!strcmp(tag.getName(), "img")) { + const char *src = strstr(token, "src"); + if (!src) // assert we have a src attribute + return false; + + buf += '<'; + for (const char *c = token; *c; c++) { + if (c == src) { + for (;((*c) && (*c != '"')); c++) + buf += *c; + + if (!*c) { c--; continue; } + + buf += '"'; + if (*(c+1) == '/') { + buf += "file:"; + buf += userData->module->getConfigEntry("AbsoluteDataPath"); + if (buf[buf.length()-2] == '/') + c++; // skip '/' + } + continue; + } + buf += *c; + } + buf += '>'; + } + else if (!strcmp(tag.getName(), "scripRef")) { //do nothing with scrip refs, we leave them out + + } + else { + buf += '<'; + buf += token; + buf += '>'; + +// return false; // we still didn't handle token + } + } + return true; +} + + +SWORD_NAMESPACE_END diff --git a/src/modules/filters/thmlhtmlhref.cpp b/src/modules/filters/thmlhtmlhref.cpp new file mode 100644 index 0000000..0596f75 --- /dev/null +++ b/src/modules/filters/thmlhtmlhref.cpp @@ -0,0 +1,357 @@ +/*************************************************************************** + thmlhtmlhref.cpp - ThML to HTML filter with hrefs + ------------------- + begin : 2001-09-03 + copyright : 2001 by CrossWire Bible Society + ***************************************************************************/ + +/*************************************************************************** + * * + * This program is free software; you can redistribute it and/or modify * + * it under the terms of the GNU General Public License as published by * + * the Free Software Foundation; either version 2 of the License, or * + * (at your option) any later version. * + * * + ***************************************************************************/ +#include <stdlib.h> +#include <thmlhtmlhref.h> +#include <swmodule.h> +#include <utilxml.h> +#include <utilstr.h> +#include <versekey.h> +#include <url.h> + +SWORD_NAMESPACE_START + + +ThMLHTMLHREF::MyUserData::MyUserData(const SWModule *module, const SWKey *key) : BasicFilterUserData(module, key) { + if (module) { + version = module->Name(); + BiblicalText = (!strcmp(module->Type(), "Biblical Texts")); + SecHead = false; + } +} + + +ThMLHTMLHREF::ThMLHTMLHREF() { + setTokenStart("<"); + setTokenEnd(">"); + + setEscapeStart("&"); + setEscapeEnd(";"); + + setEscapeStringCaseSensitive(true); + setPassThruNumericEscapeString(true); + + addAllowedEscapeString("quot"); + addAllowedEscapeString("amp"); + addAllowedEscapeString("lt"); + addAllowedEscapeString("gt"); + + addAllowedEscapeString("nbsp"); + addAllowedEscapeString("brvbar"); // "Å " + addAllowedEscapeString("sect"); // "§" + addAllowedEscapeString("copy"); // "©" + addAllowedEscapeString("laquo"); // "«" + addAllowedEscapeString("reg"); // "®" + addAllowedEscapeString("acute"); // "Ž" + addAllowedEscapeString("para"); // "¶" + addAllowedEscapeString("raquo"); // "»" + + addAllowedEscapeString("Aacute"); // "Ã" + addAllowedEscapeString("Agrave"); // "À" + addAllowedEscapeString("Acirc"); // "Â" + addAllowedEscapeString("Auml"); // "Ä" + addAllowedEscapeString("Atilde"); // "Ã" + addAllowedEscapeString("Aring"); // "Ã…" + addAllowedEscapeString("aacute"); // "á" + addAllowedEscapeString("agrave"); // "à" + addAllowedEscapeString("acirc"); // "â" + addAllowedEscapeString("auml"); // "ä" + addAllowedEscapeString("atilde"); // "ã" + addAllowedEscapeString("aring"); // "Ã¥" + addAllowedEscapeString("Eacute"); // "É" + addAllowedEscapeString("Egrave"); // "È" + addAllowedEscapeString("Ecirc"); // "Ê" + addAllowedEscapeString("Euml"); // "Ë" + addAllowedEscapeString("eacute"); // "é" + addAllowedEscapeString("egrave"); // "è" + addAllowedEscapeString("ecirc"); // "ê" + addAllowedEscapeString("euml"); // "ë" + addAllowedEscapeString("Iacute"); // "Ã" + addAllowedEscapeString("Igrave"); // "ÃŒ" + addAllowedEscapeString("Icirc"); // "ÃŽ" + addAllowedEscapeString("Iuml"); // "Ã" + addAllowedEscapeString("iacute"); // "í" + addAllowedEscapeString("igrave"); // "ì" + addAllowedEscapeString("icirc"); // "î" + addAllowedEscapeString("iuml"); // "ï" + addAllowedEscapeString("Oacute"); // "Ó" + addAllowedEscapeString("Ograve"); // "Ã’" + addAllowedEscapeString("Ocirc"); // "Ô" + addAllowedEscapeString("Ouml"); // "Ö" + addAllowedEscapeString("Otilde"); // "Õ" + addAllowedEscapeString("oacute"); // "ó" + addAllowedEscapeString("ograve"); // "ò" + addAllowedEscapeString("ocirc"); // "ô" + addAllowedEscapeString("ouml"); // "ö" + addAllowedEscapeString("otilde"); // "õ" + addAllowedEscapeString("Uacute"); // "Ú" + addAllowedEscapeString("Ugrave"); // "Ù" + addAllowedEscapeString("Ucirc"); // "Û" + addAllowedEscapeString("Uuml"); // "Ãœ" + addAllowedEscapeString("uacute"); // "ú" + addAllowedEscapeString("ugrave"); // "ù" + addAllowedEscapeString("ucirc"); // "û" + addAllowedEscapeString("uuml"); // "ü" + addAllowedEscapeString("Yacute"); // "Ã" + addAllowedEscapeString("yacute"); // "ý" + addAllowedEscapeString("yuml"); // "ÿ" + + addAllowedEscapeString("deg"); // "°" + addAllowedEscapeString("plusmn"); // "±" + addAllowedEscapeString("sup2"); // "²" + addAllowedEscapeString("sup3"); // "³" + addAllowedEscapeString("sup1"); // "¹" + addAllowedEscapeString("nbsp"); // "º" + addAllowedEscapeString("pound"); // "£" + addAllowedEscapeString("cent"); // "¢" + addAllowedEscapeString("frac14"); // "Å’" + addAllowedEscapeString("frac12"); // "Å“" + addAllowedEscapeString("frac34"); // "Ÿ" + addAllowedEscapeString("iquest"); // "¿" + addAllowedEscapeString("iexcl"); // "¡" + addAllowedEscapeString("ETH"); // "Ã" + addAllowedEscapeString("eth"); // "ð" + addAllowedEscapeString("THORN"); // "Þ" + addAllowedEscapeString("thorn"); // "þ" + addAllowedEscapeString("AElig"); // "Æ" + addAllowedEscapeString("aelig"); // "æ" + addAllowedEscapeString("Oslash"); // "Ø" + addAllowedEscapeString("curren"); // "€" + addAllowedEscapeString("Ccedil"); // "Ç" + addAllowedEscapeString("ccedil"); // "ç" + addAllowedEscapeString("szlig"); // "ß" + addAllowedEscapeString("Ntilde"); // "Ñ" + addAllowedEscapeString("ntilde"); // "ñ" + addAllowedEscapeString("yen"); // "Â¥" + addAllowedEscapeString("not"); // "¬" + addAllowedEscapeString("ordf"); // "ª" + addAllowedEscapeString("uml"); // "Å¡" + addAllowedEscapeString("shy"); // "­" + addAllowedEscapeString("macr"); // "¯" + + addAllowedEscapeString("micro"); // "µ" + addAllowedEscapeString("middot"); // "·" + addAllowedEscapeString("cedil"); // "ž" + addAllowedEscapeString("ordm"); // "º" + addAllowedEscapeString("times"); // "×" + addAllowedEscapeString("divide"); // "÷" + addAllowedEscapeString("oslash"); // "ø" + + setTokenCaseSensitive(true); +// addTokenSubstitute("scripture", "<i> "); + addTokenSubstitute("/scripture", "</i> "); +} + + +bool ThMLHTMLHREF::handleToken(SWBuf &buf, const char *token, BasicFilterUserData *userData) { + if (!substituteToken(buf, token)) { // manually process if it wasn't a simple substitution + MyUserData *u = (MyUserData *)userData; + + XMLTag tag(token); + if ((!tag.isEndTag()) && (!tag.isEmpty())) + u->startTag = tag; + + if (tag.getName() && !strcmp(tag.getName(), "sync")) { + SWBuf value = tag.getAttribute("value"); + if (tag.getAttribute("type") && !strcmp(tag.getAttribute("type"), "morph")) { //> + if(value.length()) + buf.appendFormatted("<small><em>(<a href=\"passagestudy.jsp?action=showMorph&type=Greek&value=%s\">%s</a>)</em></small>", + URL::encode(value.c_str()).c_str(), + value.c_str()); + } + else if (tag.getAttribute("type") && !strcmp(tag.getAttribute("type"), "lemma")) { //> + if(value.length()) + // empty "type=" is deliberate. + buf.appendFormatted("<small><em><<a href=\"passagestudy.jsp?action=showStrongs&type=&value=%s\">%s</a>></em></small>", + URL::encode(value.c_str()).c_str(), + value.c_str()); + } + else if (tag.getAttribute("type") && !strcmp(tag.getAttribute("type"), "Strongs")) { + char ch = *value; + value<<1; + buf.appendFormatted("<small><em><<a href=\"passagestudy.jsp?action=showStrongs&type=%s&value=%s\">", + ((ch == 'H') ? "Hebrew" : "Greek"), + URL::encode(value.c_str()).c_str()); + buf += (value.length()) ? value.c_str() : ""; + buf += "</a>></em></small>"; + } + else if (tag.getAttribute("type") && !strcmp(tag.getAttribute("type"), "Dict")) { + buf += (tag.isEndTag() ? "</b>" : "<b>"); + } + + } + // <note> tag + else if (!strcmp(tag.getName(), "note")) { + if (!tag.isEndTag()) { + if (!tag.isEmpty()) { + SWBuf type = tag.getAttribute("type"); + SWBuf footnoteNumber = tag.getAttribute("swordFootnote"); + VerseKey *vkey = NULL; + // see if we have a VerseKey * or descendant + SWTRY { + vkey = SWDYNAMIC_CAST(VerseKey, u->key); + } + SWCATCH ( ... ) { } + if (vkey) { + // leave this special osis type in for crossReference notes types? Might thml use this some day? Doesn't hurt. + char ch = ((tag.getAttribute("type") && ((!strcmp(tag.getAttribute("type"), "crossReference")) || (!strcmp(tag.getAttribute("type"), "x-cross-ref")))) ? 'x':'n'); + buf.appendFormatted("<a href=\"passagestudy.jsp?action=showNote&type=%c&value=%s&module=%s&passage=%s\"><small><sup>*%c</sup></small></a>", + ch, + URL::encode(footnoteNumber.c_str()).c_str(), + URL::encode(u->version.c_str()).c_str(), + URL::encode(vkey->getText()).c_str(), + ch); + } + else { + char ch = ((tag.getAttribute("type") && ((!strcmp(tag.getAttribute("type"), "crossReference")) || (!strcmp(tag.getAttribute("type"), "x-cross-ref")))) ? 'x':'n'); + buf.appendFormatted("<a href=\"passagestudy.jsp?action=showNote&type=%c&value=%s&module=%s&passage=%s\"><small><sup>*%c</sup></small></a>", + ch, + URL::encode(footnoteNumber.c_str()).c_str(), + URL::encode(u->version.c_str()).c_str(), + URL::encode(u->key->getText()).c_str(), + ch); + } + u->suspendTextPassThru = true; + } + } + if (tag.isEndTag()) { + u->suspendTextPassThru = false; + } + } + else if (!strcmp(tag.getName(), "scripture")) { + buf += (tag.isEndTag() ? "</i>" : "<i>"); + } + // <scripRef> tag + else if (!strcmp(tag.getName(), "scripRef")) { + if (!tag.isEndTag()) { + if (!tag.isEmpty()) { + u->suspendTextPassThru = true; + } + } + if (tag.isEndTag()) { // </scripRef> + if (!u->BiblicalText) { + SWBuf refList = u->startTag.getAttribute("passage"); + if (!refList.length()) + refList = u->lastTextNode; + SWBuf version = tag.getAttribute("version"); + + buf.appendFormatted("<a href=\"passagestudy.jsp?action=showRef&type=scripRef&value=%s&module=%s\">", + (refList.length()) ? URL::encode(refList.c_str()).c_str() : "", + (version.length()) ? URL::encode(version.c_str()).c_str() : ""); + buf += u->lastTextNode.c_str(); + buf += "</a>"; + } + else { + SWBuf footnoteNumber = u->startTag.getAttribute("swordFootnote"); + VerseKey *vkey = NULL; + // see if we have a VerseKey * or descendant + SWTRY { + vkey = SWDYNAMIC_CAST(VerseKey, u->key); + } + SWCATCH ( ... ) {} + if (vkey) { + // leave this special osis type in for crossReference notes types? Might thml use this some day? Doesn't hurt. + //buf.appendFormatted("<a href=\"noteID=%s.x.%s\"><small><sup>*x</sup></small></a> ", vkey->getText(), footnoteNumber.c_str()); + buf.appendFormatted("<a href=\"passagestudy.jsp?action=showNote&type=x&value=%s&module=%s&passage=%s\"><small><sup>*x</sup></small></a>", + URL::encode(footnoteNumber.c_str()).c_str(), + URL::encode(u->version.c_str()).c_str(), + URL::encode(vkey->getText()).c_str()); + + } + } + + // let's let text resume to output again + u->suspendTextPassThru = false; + } + } + else if (tag.getName() && !strcmp(tag.getName(), "div")) { + if (tag.isEndTag() && u->SecHead) { + buf += "</i></b><br />"; + u->SecHead = false; + } + else if (tag.getAttribute("class")) { + if (!stricmp(tag.getAttribute("class"), "sechead")) { + u->SecHead = true; + buf += "<br /><b><i>"; + } + else if (!stricmp(tag.getAttribute("class"), "title")) { + u->SecHead = true; + buf += "<br /><b><i>"; + } + else { + buf += tag; + } + } + else { + buf += tag; + } + } + else if (tag.getName() && (!strcmp(tag.getName(), "img") || !strcmp(tag.getName(), "image"))) { + const char *src = strstr(token, "src"); + if (!src) // assert we have a src attribute + return false; + + const char *c, *d; + if (((c = strchr(src+3, '"')) == NULL) || + ((d = strchr( ++c , '"')) == NULL)) // identify endpoints. + return false; // abandon hope. + + SWBuf imagename = "file:"; + if (*c == '/') // as below, inside for loop. + imagename += userData->module->getConfigEntry("AbsoluteDataPath"); + while (c != d) // move bits into the name. + imagename += *(c++); + + // images become clickable, if the UI supports showImage. + buf.appendFormatted("<a href=\"passagestudy.jsp?action=showImage&value=%s&module=%s\"><", + URL::encode(imagename.c_str()).c_str(), + URL::encode(u->version.c_str()).c_str()); + + for (c = token; *c; c++) { + if ((*c == '/') && (*(c+1) == '\0')) + continue; + if (c == src) { + for (;((*c) && (*c != '"')); c++) + buf += *c; + + if (!*c) { c--; continue; } + + buf += '"'; + if (*(c+1) == '/') { + buf += "file:"; + buf += userData->module->getConfigEntry("AbsoluteDataPath"); + if (buf[buf.length()-2] == '/') + c++; // skip '/' + } + continue; + } + buf += *c; + } + buf += " border=0 /></a>"; + } + else { + buf += '<'; + /*for (const char *tok = token; *tok; tok++) + buf += *tok;*/ + buf += token; + buf += '>'; + //return false; // we still didn't handle token + } + } + return true; +} + + +SWORD_NAMESPACE_END diff --git a/src/modules/filters/thmllemma.cpp b/src/modules/filters/thmllemma.cpp new file mode 100644 index 0000000..3e5761d --- /dev/null +++ b/src/modules/filters/thmllemma.cpp @@ -0,0 +1,65 @@ +/****************************************************************************** + * + * thmllemma - SWFilter descendant to hide or show lemmas + * in a ThML module. + */ + + +#include <stdlib.h> +#include <thmllemma.h> + +SWORD_NAMESPACE_START + +const char oName[] = "Lemmas"; +const char oTip[] = "Toggles Lemmas On and Off if they exist"; + +const SWBuf choices[3] = {"Off", "On", ""}; +const StringList oValues(&choices[0], &choices[2]); + +ThMLLemma::ThMLLemma() : SWOptionFilter(oName, oTip, &oValues) { + setOptionValue("Off"); +} + + +ThMLLemma::~ThMLLemma() { +} + + +char ThMLLemma::processText(SWBuf &text, const SWKey *key, const SWModule *module) { + if (!option) { // if we don't want lemmas + bool intoken = false; + + SWBuf token; + SWBuf orig = text; + const char *from = orig.c_str(); + for (text = ""; *from; from++) { + if (*from == '<') { + intoken = true; + token = ""; + continue; + } + if (*from == '>') { // process tokens + intoken = false; + if (!strncmp(token.c_str(), "sync ", 5) && strstr(token.c_str(), "type=\"lemma\"")) { // Lemma + continue; + } + + // if not a lemma token, keep token in text + text += '<'; + text += token; + text += '>'; + continue; + } + + if (intoken) { + token += *from; + } + else { + text += *from; + } + } + } + return 0; +} + +SWORD_NAMESPACE_END diff --git a/src/modules/filters/thmlmorph.cpp b/src/modules/filters/thmlmorph.cpp new file mode 100644 index 0000000..0fbef56 --- /dev/null +++ b/src/modules/filters/thmlmorph.cpp @@ -0,0 +1,65 @@ +/****************************************************************************** + * + * thmlmorph - SWFilter descendant to hide or show morph tags + * in a ThML module. + */ + + +#include <stdlib.h> +#include <thmlmorph.h> + +SWORD_NAMESPACE_START + +const char oName[] = "Morphological Tags"; +const char oTip[] = "Toggles Morphological Tags On and Off if they exist"; + +const SWBuf choices[3] = {"Off", "On", ""}; +const StringList oValues(&choices[0], &choices[2]); + +ThMLMorph::ThMLMorph() : SWOptionFilter(oName, oTip, &oValues) { + setOptionValue("Off"); +} + + +ThMLMorph::~ThMLMorph() { +} + + +char ThMLMorph::processText(SWBuf &text, const SWKey *key, const SWModule *module) { + if (!option) { // if we don't want morph tags + bool intoken = false; + + SWBuf token; + SWBuf orig = text; + const char *from = orig.c_str(); + for (text = ""; *from; from++) { + if (*from == '<') { + intoken = true; + token = ""; + continue; + } + if (*from == '>') { // process tokens + intoken = false; + if (!strncmp(token.c_str(), "sync ", 5) && strstr(token.c_str(), "type=\"morph\"")) { // Morph + continue; + } + + // if not a morph tag token, keep token in text + text += '<'; + text += token; + text += '>'; + continue; + } + + if (intoken) { + token += *from; + } + else { + text += *from; + } + } + } + return 0; +} + +SWORD_NAMESPACE_END diff --git a/src/modules/filters/thmlosis.cpp b/src/modules/filters/thmlosis.cpp new file mode 100644 index 0000000..939be82 --- /dev/null +++ b/src/modules/filters/thmlosis.cpp @@ -0,0 +1,575 @@ +/****************************************************************************** + * + * thmlstrongs - SWFilter descendant to hide or show strongs number + * in a ThML module. + */ + + +#include <stdlib.h> +#include <stdio.h> +#include <stdarg.h> +#include <ctype.h> +#include <thmlosis.h> +#include <swmodule.h> +#include <swlog.h> +#include <versekey.h> +#include <utilstr.h> +#include <utilxml.h> + + +SWORD_NAMESPACE_START + +ThMLOSIS::ThMLOSIS() { +} + + +ThMLOSIS::~ThMLOSIS() { +} + + +char ThMLOSIS::processText(SWBuf &text, const SWKey *key, const SWModule *module) { + char token[2048]; // cheese. Fix. + int tokpos = 0; + bool intoken = false; + bool keepToken = false; + bool ampersand = false; + +// static QuoteStack quoteStack; + + bool lastspace = false; + char val[128]; + SWBuf buf; + char *valto; + char *ch; + + const char *wordStart = text.c_str(); + const char *wordEnd = NULL; + + const char *textStart = NULL; + const char *textEnd = NULL; + + bool suspendTextPassThru = false; + bool handled = false; + bool newText = false; + bool newWord = false; + +// SWBuf tmp; + SWBuf divEnd = ""; + + SWBuf orig = text; + const char* from = orig.c_str(); + + text = ""; + for (from = orig.c_str(); *from; ++from) { + + // handle silly <variant word> items in greek whnu, remove when module is fixed + if ((*from == '<') && (*(from+1) < 0)) { + text += "<"; + continue; + } + + if (*from == '<') { //start of new token detected + intoken = true; + tokpos = 0; + token[0] = 0; + token[1] = 0; + token[2] = 0; + ampersand = false; + textEnd = from-1; + wordEnd = text.c_str() + text.length();//not good, instead of wordEnd = to! + +// wordEnd = to; + continue; + } + + if (*from == '&') { + intoken = true; + tokpos = 0; + token[0] = 0; + token[1] = 0; + token[2] = 0; + ampersand = true; + continue; + } + + if (*from == ';' && ampersand) { + intoken = false; + ampersand = false; + + if (*token == '#') { + text += '&'; + text += token; + text += ';'; + } + else if (!strncmp("nbsp", token, 4)) text += ' '; + else if (!strncmp("quot", token, 4)) text += '"'; + else if (!strncmp("amp", token, 3)) text += '&'; + else if (!strncmp("lt", token, 2)) text += '<'; + else if (!strncmp("gt", token, 2)) text += '>'; + else if (!strncmp("brvbar", token, 6)) text += '¦'; + else if (!strncmp("sect", token, 4)) text += '§'; + else if (!strncmp("copy", token, 4)) text += '©'; + else if (!strncmp("laquo", token, 5)) text += '«'; + else if (!strncmp("reg", token, 3)) text += '®'; + else if (!strncmp("acute", token, 5)) text += '´'; + else if (!strncmp("para", token, 4)) text += '¶'; + else if (!strncmp("raquo", token, 5)) text += '»'; + else if (!strncmp("Aacute", token, 6)) text += 'Á'; + else if (!strncmp("Agrave", token, 6)) text += 'À'; + else if (!strncmp("Acirc", token, 5)) text += 'Â'; + else if (!strncmp("Auml", token, 4)) text += 'Ä'; + else if (!strncmp("Atilde", token, 6)) text += 'Ã'; + else if (!strncmp("Aring", token, 5)) text += 'Å'; + else if (!strncmp("aacute", token, 6)) text += 'á'; + else if (!strncmp("agrave", token, 6)) text += 'à'; + else if (!strncmp("acirc", token, 5)) text += 'â'; + else if (!strncmp("auml", token, 4)) text += 'ä'; + else if (!strncmp("atilde", token, 6)) text += 'ã'; + else if (!strncmp("aring", token, 5)) text += 'å'; + else if (!strncmp("Eacute", token, 6)) text += 'É'; + else if (!strncmp("Egrave", token, 6)) text += 'È'; + else if (!strncmp("Ecirc", token, 5)) text += 'Ê'; + else if (!strncmp("Euml", token, 4)) text += 'Ë'; + else if (!strncmp("eacute", token, 6)) text += 'é'; + else if (!strncmp("egrave", token, 6)) text += 'è'; + else if (!strncmp("ecirc", token, 5)) text += 'ê'; + else if (!strncmp("euml", token, 4)) text += 'ë'; + else if (!strncmp("Iacute", token, 6)) text += 'Í'; + else if (!strncmp("Igrave", token, 6)) text += 'Ì'; + else if (!strncmp("Icirc", token, 5)) text += 'Î'; + else if (!strncmp("Iuml", token, 4)) text += 'Ï'; + else if (!strncmp("iacute", token, 6)) text += 'í'; + else if (!strncmp("igrave", token, 6)) text += 'ì'; + else if (!strncmp("icirc", token, 5)) text += 'î'; + else if (!strncmp("iuml", token, 4)) text += 'ï'; + else if (!strncmp("Oacute", token, 6)) text += 'Ó'; + else if (!strncmp("Ograve", token, 6)) text += 'Ò'; + else if (!strncmp("Ocirc", token, 5)) text += 'Ô'; + else if (!strncmp("Ouml", token, 4)) text += 'Ö'; + else if (!strncmp("Otilde", token, 6)) text += 'Õ'; + else if (!strncmp("oacute", token, 6)) text += 'ó'; + else if (!strncmp("ograve", token, 6)) text += 'ò'; + else if (!strncmp("ocirc", token, 5)) text += 'ô'; + else if (!strncmp("ouml", token, 4)) text += 'ö'; + else if (!strncmp("otilde", token, 6)) text += 'õ'; + else if (!strncmp("Uacute", token, 6)) text += 'Ú'; + else if (!strncmp("Ugrave", token, 6)) text += 'Ù'; + else if (!strncmp("Ucirc", token, 5)) text += 'Û'; + else if (!strncmp("Uuml", token, 4)) text += 'Ü'; + else if (!strncmp("uacute", token, 6)) text += 'ú'; + else if (!strncmp("ugrave", token, 6)) text += 'ù'; + else if (!strncmp("ucirc", token, 5)) text += 'û'; + else if (!strncmp("uuml", token, 4)) text += 'ü'; + else if (!strncmp("Yacute", token, 6)) text += 'Ý'; + else if (!strncmp("yacute", token, 6)) text += 'ý'; + else if (!strncmp("yuml", token, 4)) text += 'ÿ'; + + else if (!strncmp("deg", token, 3)) text += '°'; + else if (!strncmp("plusmn", token, 6)) text += '±'; + else if (!strncmp("sup2", token, 4)) text += '²'; + else if (!strncmp("sup3", token, 4)) text += '³'; + else if (!strncmp("sup1", token, 4)) text += '¹'; + else if (!strncmp("nbsp", token, 4)) text += 'º'; + else if (!strncmp("pound", token, 5)) text += '£'; + else if (!strncmp("cent", token, 4)) text += '¢'; + else if (!strncmp("frac14", token, 6)) text += '¼'; + else if (!strncmp("frac12", token, 6)) text += '½'; + else if (!strncmp("frac34", token, 6)) text += '¾'; + else if (!strncmp("iquest", token, 6)) text += '¿'; + else if (!strncmp("iexcl", token, 5)) text += '¡'; + else if (!strncmp("ETH", token, 3)) text += 'Ð'; + else if (!strncmp("eth", token, 3)) text += 'ð'; + else if (!strncmp("THORN", token, 5)) text += 'Þ'; + else if (!strncmp("thorn", token, 5)) text += 'þ'; + else if (!strncmp("AElig", token, 5)) text += 'Æ'; + else if (!strncmp("aelig", token, 5)) text += 'æ'; + else if (!strncmp("Oslash", token, 6)) text += 'Ø'; + else if (!strncmp("curren", token, 6)) text += '¤'; + else if (!strncmp("Ccedil", token, 6)) text += 'Ç'; + else if (!strncmp("ccedil", token, 6)) text += 'ç'; + else if (!strncmp("szlig", token, 5)) text += 'ß'; + else if (!strncmp("Ntilde", token, 6)) text += 'Ñ'; + else if (!strncmp("ntilde", token, 6)) text += 'ñ'; + else if (!strncmp("yen", token, 3)) text += '¥'; + else if (!strncmp("not", token, 3)) text += '¬'; + else if (!strncmp("ordf", token, 4)) text += 'ª'; + else if (!strncmp("uml", token, 3)) text += '¨'; + else if (!strncmp("shy", token, 3)) text += '­'; + else if (!strncmp("macr", token, 4)) text += '¯'; + else if (!strncmp("micro", token, 5)) text += "µ"; + else if (!strncmp("middot", token, 6)) text +="·"; + else if (!strncmp("cedil", token, 5)) text += "¸"; + else if (!strncmp("ordm", token, 4)) text += "º"; + else if (!strncmp("times", token, 5)) text += "×"; + else if (!strncmp("divide", token, 6)) text +="÷"; + else if (!strncmp("oslash", token, 6)) text +="ø"; + continue; + } + + // handle silly <variant word> items in greek whnu, remove when module is fixed + if ((*from == '>') && (*(from-1) < 0)) { + text += ">"; + continue; + } + + if (*from == '>') { // process tokens + intoken = false; + keepToken = false; + suspendTextPassThru = false; + newWord = true; + handled = false; + + while (wordStart < (text.c_str() + text.length())) { //hack + if (strchr(";,. :?!()'\"", *wordStart) && wordStart[0] && wordStart[1]) + wordStart++; + else break; + } + while (wordEnd > wordStart) { + if (strchr(" ,;:.?!()'\"", *wordEnd)) + wordEnd--; + else break; + } + + // variants + if (!strncmp(token, "div type=\"variant\"", 18)) { + XMLTag tag = token; + text.append("<seg type=\"x-variant\""); + SWBuf cls = "x-class:"; + cls += tag.getAttribute("class"); + if (cls.length()>8) + text.appendFormatted(" subType=\"%s\"", cls.c_str()); + + text += ">"; + divEnd = "</seg>"; + newText = true; + lastspace = false; + handled = true; + } + // section titles + if (!strcmp(token, "div class=\"sechead\"")) { +// pushString(&to, "<title>"); + text.append("<title>"); + divEnd = ""; + newText = true; + lastspace = false; + handled = true; + } + else if (!strcmp(token, "/div")) { + //pushString(&to, divEnd.c_str()); + text.append(divEnd); + lastspace = false; + handled = true; + } + // Scripture Reference + if (!strncmp(token, "scripRef", 8)) { + // pushString(buf, "Type(), "Biblical Texts")) { +// // Italics assume transchange for Biblical texts +// if (!stricmp(token, "i")) { +// pushString(&to, ""); +// newText = true; +// lastspace = false; +// handled = true; +// } +// else if (!stricmp(token, "/i")) { +// pushString(&to, ""); +// lastspace = false; +// handled = true; +// } +// } +// else { +// // otherwise, italics are just italics +//-- end italics for transchange + if (!stricmp(token, "i")) { +// pushString(&to, ""); + text.append(""); + newText = true; + lastspace = false; + handled = true; + } + else if (!stricmp(token, "/i")) { +// pushString(&to, ""); + text.append(""); + lastspace = false; + handled = true; + } +// } + + if (!strcmp(token, "b")) { +// pushString(&to, ""); + text.append(""); + newText = true; + lastspace = false; + handled = true; + } + else if (!strcmp(token, "/b")) { +// pushString(&to, ""); + text.append(""); + lastspace = false; + handled = true; + } + + // Footnote + if (!strcmp(token, "note")) { + //pushString(&to, ""); + text.append(""); + newText = true; + lastspace = false; + handled = true; + } + else if (!strcmp(token, "/note")) { + // pushString(&to, ""); + text.append(""); + lastspace = false; + handled = true; + } + + // Figure + else if (!strncmp(token, "img ", 4)) { + const char *src = strstr(token, "src"); + if (!src) // assert we have a src attribute + continue; +// return false; + + //pushString(&to, "

getConfigEntry("AbsoluteDataPath")); +// if (*((*buf)-1) == '/') +// c++; // skip '/' +// } +// end of uncomment for asolute path logic + +// for (c++;((*c) && (*c != '"')); c++) +// *to++ = *c; + + //pushString(&to, "\" />"); + text.append("\" />"); + handled = true; + } + + // Strongs numbers + else if (!strnicmp(token, "sync type=\"Strongs\" ", 20)) { // Strongs + valto = val; + for (unsigned int i = 27; token[i] != '\"' && i < 150; i++) + *valto++ = token[i]; + *valto = 0; + if (atoi((!isdigit(*val))?val+1:val) < 5627) { + // normal strongs number + strstrip(val); + + if (!strncmp(wordStart, " attribute! + buf = ""; + buf.appendFormatted("", val); + text.insert(wordStart - text.c_str(), buf); + text += ""; + lastspace = false; + } + } + // OLB verb morph, leave it out of OSIS tag + else { + } + handled = true; + } + + // Morphology + else if (!strncmp(token, "sync type=\"morph\"", 17)) { + SWBuf cls = ""; + SWBuf morph = ""; + for (ch = token+17; *ch; ch++) { + if (!strncmp(ch, "class=\"", 7)) { + valto = val; + for (unsigned int i = 7; ch[i] != '\"' && i < 127; i++) + *valto++ = ch[i]; + *valto = 0; + strstrip(val); + cls = val; + } + if (!strncmp(ch, "value=\"", 7)) { + valto = val; + for (unsigned int i = 7; ch[i] != '\"' && i < 127; i++) + *valto++ = ch[i]; + *valto = 0; + strstrip(val); + morph = val; + } + } + if (!strncmp(wordStart, " attribute fond + buf = ""; + buf.appendFormatted("", ((cls.length())?cls.c_str():"robinson"), morph.c_str()); + text.insert(wordStart - text.c_str(), buf); + text += ""; + lastspace = false; + + } + handled = true; + } + + if (!keepToken) { + if (!handled) { + SWLog::getSystemLog()->logError("Unprocessed Token: <%s> in key %s", token, key ? (const char*)*key : ""); +// exit(-1); + } + if (from[1] && strchr(" ,;.:?!()'\"", from[1])) { + if (lastspace) { + text--; + } + } + if (newText) { + textStart = from+1; + newText = false; + } + continue; + } + + // if not a strongs token, keep token in text + text.appendFormatted("<%s>", token); + + if (newText) { + textStart = text.c_str() + text.length(); + newWord = false; + } + continue; + } + if (intoken) { + if ((tokpos < 2045) && ((*from != 10)&&(*from != 13))) { + token[tokpos++] = *from; + token[tokpos+2] = 0; + } + } + else { + switch (*from) { + case '\'': + case '\"': + case '`': +// quoteStack.handleQuote(fromStart, from, &to); + text += *from; + //from++; //this line removes chars after an apostrophe! Needs fixing. + break; + default: + if (newWord && (*from != ' ')) { + wordStart = text.c_str() + text.length(); + newWord = false; + + //fix this if required? + //memset(to, 0, 10); + + } + + if (!suspendTextPassThru) { + text += (*from); + lastspace = (*from == ' '); + } + } + } + } + + VerseKey *vkey = SWDYNAMIC_CAST(VerseKey, key); + if (vkey) { + SWBuf ref = ""; + if (vkey->Verse()) { + ref.appendFormatted("\t\t", vkey->getOSISRef()); + } + + if (ref.length() > 0) { + + text = ref + text; + + if (vkey->Verse()) { + VerseKey tmp; + tmp = *vkey; + tmp.AutoNormalize(0); + tmp.Headings(1); + + text += ""; + + tmp = MAXVERSE; + if (*vkey == tmp) { + tmp.Verse(0); +// sprintf(ref, "\t
"); +// pushString(&to, ref); + tmp = MAXCHAPTER; + tmp = MAXVERSE; + if (*vkey == tmp) { + tmp.Chapter(0); + tmp.Verse(0); +// sprintf(ref, "\t
"); +// pushString(&to, ref); +/* + if (!quoteStack.empty()) { + SWLog::getSystemLog()->logError("popping unclosed quote at end of book"); + quoteStack.clear(); + } +*/ + } + } + } +// else if (vkey->Chapter()) { +// sprintf(ref, "\t
", vkey->getOSISRef()); +// } +// else sprintf(ref, "\t
", vkey->getOSISRef()); + } + } + return 0; +} + + +SWORD_NAMESPACE_END diff --git a/src/modules/filters/thmlplain.cpp b/src/modules/filters/thmlplain.cpp new file mode 100644 index 0000000..8f8379a --- /dev/null +++ b/src/modules/filters/thmlplain.cpp @@ -0,0 +1,219 @@ +/****************************************************************************** + * + * thmlplain - SWFilter descendant to strip out all ThML tags or convert to + * ASCII rendered symbols. + */ + + +#include +#include +#include + +SWORD_NAMESPACE_START + +ThMLPlain::ThMLPlain() { +} + +char ThMLPlain::processText(SWBuf &text, const SWKey *key, const SWModule *module) +{ + char token[2048]; + int tokpos = 0; + bool intoken = false; + bool ampersand = false; + + const char *from; + SWBuf orig = text; + from = orig.c_str(); + for (text = ""; *from; from++) + { + if (*from == 10 || *from == 13) + from++; + if (*from == '<') { + intoken = true; + tokpos = 0; + token[0] = 0; + token[1] = 0; + token[2] = 0; + ampersand = false; + continue; + } + else if (*from == '&') { + intoken = true; + tokpos = 0; + token[0] = 0; + token[1] = 0; + token[2] = 0; + ampersand = true; + continue; + } + if (*from == ';' && ampersand) { + intoken = false; + ampersand = false; + + if (!strncmp("nbsp", token, 4)) text += ' '; + else if (!strncmp("quot", token, 4)) text += '"'; + else if (!strncmp("amp", token, 3)) text += '&'; + else if (!strncmp("lt", token, 2)) text += '<'; + else if (!strncmp("gt", token, 2)) text += '>'; + else if (!strncmp("brvbar", token, 6)) text += '¦'; + else if (!strncmp("sect", token, 4)) text += '§'; + else if (!strncmp("copy", token, 4)) text += '©'; + else if (!strncmp("laquo", token, 5)) text += '«'; + else if (!strncmp("reg", token, 3)) text += '®'; + else if (!strncmp("acute", token, 5)) text += '´'; + else if (!strncmp("para", token, 4)) text += '¶'; + else if (!strncmp("raquo", token, 5)) text += '»'; + + else if (!strncmp("Aacute", token, 6)) text += 'Á'; + else if (!strncmp("Agrave", token, 6)) text += 'À'; + else if (!strncmp("Acirc", token, 5)) text += 'Â'; + else if (!strncmp("Auml", token, 4)) text += 'Ä'; + else if (!strncmp("Atilde", token, 6)) text += 'Ã'; + else if (!strncmp("Aring", token, 5)) text += 'Å'; + else if (!strncmp("aacute", token, 6)) text += 'á'; + else if (!strncmp("agrave", token, 6)) text += 'à'; + else if (!strncmp("acirc", token, 5)) text += 'â'; + else if (!strncmp("auml", token, 4)) text += 'ä'; + else if (!strncmp("atilde", token, 6)) text += 'ã'; + else if (!strncmp("aring", token, 5)) text += 'å'; + else if (!strncmp("Eacute", token, 6)) text += 'É'; + else if (!strncmp("Egrave", token, 6)) text += 'È'; + else if (!strncmp("Ecirc", token, 5)) text += 'Ê'; + else if (!strncmp("Euml", token, 4)) text += 'Ë'; + else if (!strncmp("eacute", token, 6)) text += 'é'; + else if (!strncmp("egrave", token, 6)) text += 'è'; + else if (!strncmp("ecirc", token, 5)) text += 'ê'; + else if (!strncmp("euml", token, 4)) text += 'ë'; + else if (!strncmp("Iacute", token, 6)) text += 'Í'; + else if (!strncmp("Igrave", token, 6)) text += 'Ì'; + else if (!strncmp("Icirc", token, 5)) text += 'Î'; + else if (!strncmp("Iuml", token, 4)) text += 'Ï'; + else if (!strncmp("iacute", token, 6)) text += 'í'; + else if (!strncmp("igrave", token, 6)) text += 'ì'; + else if (!strncmp("icirc", token, 5)) text += 'î'; + else if (!strncmp("iuml", token, 4)) text += 'ï'; + else if (!strncmp("Oacute", token, 6)) text += 'Ó'; + else if (!strncmp("Ograve", token, 6)) text += 'Ò'; + else if (!strncmp("Ocirc", token, 5)) text += 'Ô'; + else if (!strncmp("Ouml", token, 4)) text += 'Ö'; + else if (!strncmp("Otilde", token, 6)) text += 'Õ'; + else if (!strncmp("oacute", token, 6)) text += 'ó'; + else if (!strncmp("ograve", token, 6)) text += 'ò'; + else if (!strncmp("ocirc", token, 5)) text += 'ô'; + else if (!strncmp("ouml", token, 4)) text += 'ö'; + else if (!strncmp("otilde", token, 6)) text += 'õ'; + else if (!strncmp("Uacute", token, 6)) text += 'Ú'; + else if (!strncmp("Ugrave", token, 6)) text += 'Ù'; + else if (!strncmp("Ucirc", token, 5)) text += 'Û'; + else if (!strncmp("Uuml", token, 4)) text += 'Ü'; + else if (!strncmp("uacute", token, 6)) text += 'ú'; + else if (!strncmp("ugrave", token, 6)) text += 'ù'; + else if (!strncmp("ucirc", token, 5)) text += 'û'; + else if (!strncmp("uuml", token, 4)) text += 'ü'; + else if (!strncmp("Yacute", token, 6)) text += 'Ý'; + else if (!strncmp("yacute", token, 6)) text += 'ý'; + else if (!strncmp("yuml", token, 4)) text += 'ÿ'; + + else if (!strncmp("deg", token, 3)) text += '°'; + else if (!strncmp("plusmn", token, 6)) text += '±'; + else if (!strncmp("sup2", token, 4)) text += '²'; + else if (!strncmp("sup3", token, 4)) text += '³'; + else if (!strncmp("sup1", token, 4)) text += '¹'; + else if (!strncmp("nbsp", token, 4)) text += 'º'; + else if (!strncmp("pound", token, 5)) text += '£'; + else if (!strncmp("cent", token, 4)) text += '¢'; + else if (!strncmp("frac14", token, 6)) text += '¼'; + else if (!strncmp("frac12", token, 6)) text += '½'; + else if (!strncmp("frac34", token, 6)) text += '¾'; + else if (!strncmp("iquest", token, 6)) text += '¿'; + else if (!strncmp("iexcl", token, 5)) text += '¡'; + else if (!strncmp("ETH", token, 3)) text += 'Ð'; + else if (!strncmp("eth", token, 3)) text += 'ð'; + else if (!strncmp("THORN", token, 5)) text += 'Þ'; + else if (!strncmp("thorn", token, 5)) text += 'þ'; + else if (!strncmp("AElig", token, 5)) text += 'Æ'; + else if (!strncmp("aelig", token, 5)) text += 'æ'; + else if (!strncmp("Oslash", token, 6)) text += 'Ø'; + else if (!strncmp("curren", token, 6)) text += '¤'; + else if (!strncmp("Ccedil", token, 6)) text += 'Ç'; + else if (!strncmp("ccedil", token, 6)) text += 'ç'; + else if (!strncmp("szlig", token, 5)) text += 'ß'; + else if (!strncmp("Ntilde", token, 6)) text += 'Ñ'; + else if (!strncmp("ntilde", token, 6)) text += 'ñ'; + else if (!strncmp("yen", token, 3)) text += '¥'; + else if (!strncmp("not", token, 3)) text += '¬'; + else if (!strncmp("ordf", token, 4)) text += 'ª'; + else if (!strncmp("uml", token, 3)) text += '¨'; + else if (!strncmp("shy", token, 3)) text += '­'; + else if (!strncmp("macr", token, 4)) text += '¯'; + else if (!strncmp("micro", token, 5)) text += "µ"; + else if (!strncmp("middot", token, 6)) text +="·"; + else if (!strncmp("cedil", token, 5)) text += "¸"; + else if (!strncmp("ordm", token, 4)) text += "º"; + else if (!strncmp("times", token, 5)) text += "×"; + else if (!strncmp("divide", token, 6)) text +="÷"; + else if (!strncmp("oslash", token, 6)) text +="ø"; + continue; + + } + else if (*from == '>' && !ampersand) { + intoken = false; + // process desired tokens + if (!strncmp(token, "sync type=\"Strongs\" value=\"", 27)) { + text += ' '; + text += '<'; + for (unsigned int i = 27; token[i] != '\"'; i++) + text += token[i]; + text += '>'; + continue; + } + if (!strncmp(token, "sync type=\"morph\" value=\"", 25)) { + text += ' '; + text += '('; + for (unsigned int i = 25; token[i] != '\"'; i++) + text += token[i]; + text += ')'; + continue; + } + if (!strncmp("note", token, 4)) { + text += ' '; + text += '('; + } + else if (!strncmp("br", token, 2)) + text += '\n'; + else if (!strncmp("/p", token, 2)) + text += '\n'; + else if (!strncmp("/note", token, 5)) { + text += ')'; + text += ' '; + } + continue; + } + if (intoken) { + if (tokpos < 2045) + token[tokpos++] = *from; + token[tokpos+2] = 0; + } + else text += *from; + } + + orig = text; + from = orig.c_str(); + for (text = ""; *from; from++) { //loop to remove extra spaces + if ((strchr(" \t\n\r", *from))) { + while (*(from+1) && (strchr(" \t\n\r", *(from+1)))) { + from++; + } + text += " "; + } + else { + text += *from; + } + } + text += (char)0; + + return 0; +} + + +SWORD_NAMESPACE_END diff --git a/src/modules/filters/thmlrtf.cpp b/src/modules/filters/thmlrtf.cpp new file mode 100644 index 0000000..23e4a90 --- /dev/null +++ b/src/modules/filters/thmlrtf.cpp @@ -0,0 +1,346 @@ +/*************************************************************************** + thmlrtf.cpp - ThML to RTF filter + ------------------- + begin : 1999-10-27 + copyright : 2001 by CrossWire Bible Society + ***************************************************************************/ + +/*************************************************************************** + * * + * This program is free software; you can redistribute it and/or modify * + * it under the terms of the GNU General Public License as published by * + * the Free Software Foundation; either version 2 of the License, or * + * (at your option) any later version. * + * * + ***************************************************************************/ + +#include +#include +#include +#include +#include +#include + +SWORD_NAMESPACE_START + +ThMLRTF::ThMLRTF() { + setTokenStart("<"); + setTokenEnd(">"); + + setEscapeStart("&"); + setEscapeEnd(";"); + + setEscapeStringCaseSensitive(true); + + addEscapeStringSubstitute("nbsp", "\302\240"); + addEscapeStringSubstitute("apos", "'"); + addEscapeStringSubstitute("quot", "\""); + addEscapeStringSubstitute("amp", "&"); + addEscapeStringSubstitute("lt", "<"); + addEscapeStringSubstitute("gt", ">"); + addEscapeStringSubstitute("brvbar", "¦"); + addEscapeStringSubstitute("sect", "§"); + addEscapeStringSubstitute("copy", "©"); + addEscapeStringSubstitute("laquo", "«"); + addEscapeStringSubstitute("reg", "®"); + addEscapeStringSubstitute("acute", "´"); + addEscapeStringSubstitute("para", "¶"); + addEscapeStringSubstitute("raquo", "»"); + + addEscapeStringSubstitute("Aacute", "Á"); + addEscapeStringSubstitute("Agrave", "À"); + addEscapeStringSubstitute("Acirc", "Â"); + addEscapeStringSubstitute("Auml", "Ä"); + addEscapeStringSubstitute("Atilde", "Ã"); + addEscapeStringSubstitute("Aring", "Å"); + addEscapeStringSubstitute("aacute", "á"); + addEscapeStringSubstitute("agrave", "à"); + addEscapeStringSubstitute("acirc", "â"); + addEscapeStringSubstitute("auml", "ä"); + addEscapeStringSubstitute("atilde", "ã"); + addEscapeStringSubstitute("aring", "å"); + addEscapeStringSubstitute("Eacute", "É"); + addEscapeStringSubstitute("Egrave", "È"); + addEscapeStringSubstitute("Ecirc", "Ê"); + addEscapeStringSubstitute("Euml", "Ë"); + addEscapeStringSubstitute("eacute", "é"); + addEscapeStringSubstitute("egrave", "è"); + addEscapeStringSubstitute("ecirc", "ê"); + addEscapeStringSubstitute("euml", "ë"); + addEscapeStringSubstitute("Iacute", "Í"); + addEscapeStringSubstitute("Igrave", "Ì"); + addEscapeStringSubstitute("Icirc", "Î"); + addEscapeStringSubstitute("Iuml", "Ï"); + addEscapeStringSubstitute("iacute", "í"); + addEscapeStringSubstitute("igrave", "ì"); + addEscapeStringSubstitute("icirc", "î"); + addEscapeStringSubstitute("iuml", "ï"); + addEscapeStringSubstitute("Oacute", "Ó"); + addEscapeStringSubstitute("Ograve", "Ò"); + addEscapeStringSubstitute("Ocirc", "Ô"); + addEscapeStringSubstitute("Ouml", "Ö"); + addEscapeStringSubstitute("Otilde", "Õ"); + addEscapeStringSubstitute("oacute", "ó"); + addEscapeStringSubstitute("ograve", "ò"); + addEscapeStringSubstitute("ocirc", "ô"); + addEscapeStringSubstitute("ouml", "ö"); + addEscapeStringSubstitute("otilde", "õ"); + addEscapeStringSubstitute("Uacute", "Ú"); + addEscapeStringSubstitute("Ugrave", "Ù"); + addEscapeStringSubstitute("Ucirc", "Û"); + addEscapeStringSubstitute("Uuml", "Ü"); + addEscapeStringSubstitute("uacute", "ú"); + addEscapeStringSubstitute("ugrave", "ù"); + addEscapeStringSubstitute("ucirc", "û"); + addEscapeStringSubstitute("uuml", "ü"); + addEscapeStringSubstitute("Yacute", "Ý"); + addEscapeStringSubstitute("yacute", "ý"); + addEscapeStringSubstitute("yuml", "ÿ"); + + addEscapeStringSubstitute("deg", "°"); + addEscapeStringSubstitute("plusmn", "±"); + addEscapeStringSubstitute("sup2", "²"); + addEscapeStringSubstitute("sup3", "³"); + addEscapeStringSubstitute("sup1", "¹"); + addEscapeStringSubstitute("nbsp", "º"); + addEscapeStringSubstitute("pound", "£"); + addEscapeStringSubstitute("cent", "¢"); + addEscapeStringSubstitute("frac14", "¼"); + addEscapeStringSubstitute("frac12", "½"); + addEscapeStringSubstitute("frac34", "¾"); + addEscapeStringSubstitute("iquest", "¿"); + addEscapeStringSubstitute("iexcl", "¡"); + addEscapeStringSubstitute("ETH", "Ð"); + addEscapeStringSubstitute("eth", "ð"); + addEscapeStringSubstitute("THORN", "Þ"); + addEscapeStringSubstitute("thorn", "þ"); + addEscapeStringSubstitute("AElig", "Æ"); + addEscapeStringSubstitute("aelig", "æ"); + addEscapeStringSubstitute("Oslash", "Ø"); + addEscapeStringSubstitute("curren", "¤"); + addEscapeStringSubstitute("Ccedil", "Ç"); + addEscapeStringSubstitute("ccedil", "ç"); + addEscapeStringSubstitute("szlig", "ß"); + addEscapeStringSubstitute("Ntilde", "Ñ"); + addEscapeStringSubstitute("ntilde", "ñ"); + addEscapeStringSubstitute("yen", "¥"); + addEscapeStringSubstitute("not", "¬"); + addEscapeStringSubstitute("ordf", "ª"); + addEscapeStringSubstitute("uml", "¨"); + addEscapeStringSubstitute("shy", "­"); + addEscapeStringSubstitute("macr", "¯"); + + addEscapeStringSubstitute("micro", "µ"); + addEscapeStringSubstitute("middot", "·"); + addEscapeStringSubstitute("cedil", "¸"); + addEscapeStringSubstitute("ordm", "º"); + addEscapeStringSubstitute("times", "×"); + addEscapeStringSubstitute("divide", "÷"); + addEscapeStringSubstitute("oslash", "ø"); + + setTokenCaseSensitive(true); + + + addTokenSubstitute("br", "\\line "); + addTokenSubstitute("br /", "\\line "); + addTokenSubstitute("i", "{\\i1 "); + addTokenSubstitute("/i", "}"); + addTokenSubstitute("b", "{\\b1 "); + addTokenSubstitute("/b", "}"); + addTokenSubstitute("p", "{\\fi200\\par}"); + addTokenSubstitute("p /", "\\pard\\par\\par "); + + //we need uppercase forms for the moment to support a few early ThML modules that aren't XHTML compliant + addTokenSubstitute("BR", "\\line "); + addTokenSubstitute("I", "{\\i1 "); + addTokenSubstitute("/I", "}"); + addTokenSubstitute("B", "{\\b1 "); + addTokenSubstitute("/B", "}"); + addTokenSubstitute("P", "\\par "); + addTokenSubstitute("scripture", "{\\i1 "); + addTokenSubstitute("/scripture", "}"); + addTokenSubstitute("center", "\\qc "); + addTokenSubstitute("/center", "\\pard "); +} + + +char ThMLRTF::processText(SWBuf &text, const SWKey *key, const SWModule *module) { + + // preprocess text buffer to escape RTF control codes + const char *from; + SWBuf orig = text; + from = orig.c_str(); + for (text = ""; *from; from++) { //loop to remove extra spaces + switch (*from) { + case '{': + case '}': + case '\\': + text += "\\"; + text += *from; + break; + default: + text += *from; + } + } + text += (char)0; + + SWBasicFilter::processText(text, key, module); //handle tokens as usual + + orig = text; + from = orig.c_str(); + for (text = ""; *from; from++) { //loop to remove extra spaces + if ((strchr(" \t\n\r", *from))) { + while (*(from+1) && (strchr(" \t\n\r", *(from+1)))) { + from++; + } + text += " "; + } + else { + text += *from; + } + } + text += (char)0; // probably not needed, but don't want to remove without investigating (same as above) + return 0; +} + + +ThMLRTF::MyUserData::MyUserData(const SWModule *module, const SWKey *key) : BasicFilterUserData(module, key) { + this->SecHead = false; + XMLTag startTag = ""; + if (module) { + version = module->Name(); + BiblicalText = (!strcmp(module->Type(), "Biblical Texts")); + } +} + + +bool ThMLRTF::handleToken(SWBuf &buf, const char *token, BasicFilterUserData *userData) { + if (!substituteToken(buf, token)) { // manually process if it wasn't a simple substitution + MyUserData *u = (MyUserData *)userData; + XMLTag tag(token); + if ((!tag.isEndTag()) && (!tag.isEmpty())) + u->startTag = tag; + if (tag.getName() && !strcmp(tag.getName(), "sync")) { + SWBuf value = tag.getAttribute("value"); + if (tag.getAttribute("type") && !strcmp(tag.getAttribute("type"), "morph")) { //> + buf.appendFormatted(" {\\cf4 \\sub (%s)}", value.c_str()); + } + else if( tag.getAttribute("type") && !strcmp(tag.getAttribute("type"), "Strongs")) { + if (value[0] == 'H' || value[0] == 'G' || value[0] == 'A') { + value<<1; + buf.appendFormatted(" {\\cf3 \\sub <%s>}", value.c_str()); + } + else if (value[0] == 'T') { + value<<1; + buf.appendFormatted(" {\\cf4 \\sub (%s)}", value.c_str()); + } + } + else if (tag.getAttribute("type") && !strcmp(tag.getAttribute("type"), "Dict")) { + if (!tag.isEndTag()) + buf += "{\\b "; + else buf += "}"; + } + } + // tag + else if (!strcmp(tag.getName(), "note")) { + if (!tag.isEndTag()) { + if (!tag.isEmpty()) { + SWBuf type = tag.getAttribute("type"); + SWBuf footnoteNumber = tag.getAttribute("swordFootnote"); + VerseKey *vkey = NULL; + // see if we have a VerseKey * or descendant + SWTRY { + vkey = SWDYNAMIC_CAST(VerseKey, u->key); + } + SWCATCH ( ... ) { } + if (vkey) { + // leave this special osis type in for crossReference notes types? Might thml use this some day? Doesn't hurt. + char ch = ((tag.getAttribute("type") && ((!strcmp(tag.getAttribute("type"), "crossReference")) || (!strcmp(tag.getAttribute("type"), "x-cross-ref")))) ? 'x':'n'); + buf.appendFormatted("{\\super *%c%i.%s} ", ch, vkey->Verse(), footnoteNumber.c_str()); + } + u->suspendTextPassThru = true; + } + } + if (tag.isEndTag()) { + u->suspendTextPassThru = false; + } + } + + + else if (!strcmp(tag.getName(), "scripRef")) { + if (!tag.isEndTag()) { + if (!tag.isEmpty()) { + u->suspendTextPassThru = true; + } + } + if (tag.isEndTag()) { // + if (!u->BiblicalText) { + SWBuf refList = u->startTag.getAttribute("passage"); + if (!refList.length()) + refList = u->lastTextNode; + SWBuf version = tag.getAttribute("version"); + buf += ""; + buf += refList.c_str(); +// buf += u->lastTextNode.c_str(); + buf += ""; + } + else { + SWBuf footnoteNumber = u->startTag.getAttribute("swordFootnote"); + VerseKey *vkey = NULL; + // see if we have a VerseKey * or descendant + SWTRY { + vkey = SWDYNAMIC_CAST(VerseKey, u->key); + } + SWCATCH ( ... ) {} + if (vkey) { + // leave this special osis type in for crossReference notes types? Might thml use this some day? Doesn't hurt. + buf.appendFormatted("{\\super *x%i.%s} ", vkey->Verse(), footnoteNumber.c_str()); + } + } + + // let's let text resume to output again + u->suspendTextPassThru = false; + } + } + + else if (tag.getName() && !strcmp(tag.getName(), "div")) { + if (tag.isEndTag() && u->SecHead) { + buf += "\\par}"; + u->SecHead = false; + } + else if (tag.getAttribute("class")) { + if (!stricmp(tag.getAttribute("class"), "sechead")) { + u->SecHead = true; + buf += "{\\par\\i1\\b1 "; + } + else if (!stricmp(tag.getAttribute("class"), "title")) { + u->SecHead = true; + buf += "{\\par\\i1\\b1 "; + } + } + } + else if (tag.getName() && (!strcmp(tag.getName(), "img") || !strcmp(tag.getName(), "image"))) { + const char *src = tag.getAttribute("src"); + if (!src) // assert we have a src attribute + return false; + + char* filepath = new char[strlen(u->module->getConfigEntry("AbsoluteDataPath")) + strlen(token)]; + *filepath = 0; + strcpy(filepath, userData->module->getConfigEntry("AbsoluteDataPath")); + strcat(filepath, src); + +// we do this because BibleCS looks for this EXACT format for an image tag + buf+=""; + delete [] filepath; + } + else { + return false; // we still didn't handle token + } + } + return true; +} + + +SWORD_NAMESPACE_END diff --git a/src/modules/filters/thmlscripref.cpp b/src/modules/filters/thmlscripref.cpp new file mode 100644 index 0000000..df2b3d2 --- /dev/null +++ b/src/modules/filters/thmlscripref.cpp @@ -0,0 +1,123 @@ +/****************************************************************************** + * + * thmlscripref - SWFilter descendant to hide or show scripture + * referebces in a ThML module. + */ + + +#include +#include +#include +#include +#include +#include + +SWORD_NAMESPACE_START + +const char oName[] = "Cross-references"; +const char oTip[] = "Toggles Scripture Cross-references On and Off if they exist"; + +const SWBuf choices[3] = {"Off", "On", ""}; +const StringList oValues(&choices[0], &choices[2]); + +ThMLScripref::ThMLScripref() : SWOptionFilter(oName, oTip, &oValues) { + setOptionValue("On"); +} + + +ThMLScripref::~ThMLScripref() { +} + + +char ThMLScripref::processText(SWBuf &text, const SWKey *key, const SWModule *module) { + SWBuf token; + bool intoken = false; + bool hide = false; + SWBuf tagText; + XMLTag startTag; + SWBuf refs = ""; + int footnoteNum = 1; + char buf[254]; + VerseKey parser = key->getText(); + + SWBuf orig = text; + const char *from = orig.c_str(); + + for (text = ""; *from; from++) { + if (*from == '<') { + intoken = true; + token = ""; + continue; + } + if (*from == '>') { // process tokens + intoken = false; + + XMLTag tag(token); + if (!strcmp(tag.getName(), "scripRef")) { + if (!tag.isEndTag()) { + if (!tag.isEmpty()) { + refs = ""; + startTag = tag; + hide = true; + tagText = ""; + continue; + } + } + if (hide && tag.isEndTag()) { + if (module->isProcessEntryAttributes()) { + SWBuf fc = module->getEntryAttributes()["Footnote"]["count"]["value"]; + footnoteNum = (fc.length()) ? atoi(fc.c_str()) : 0; + sprintf(buf, "%i", ++footnoteNum); + module->getEntryAttributes()["Footnote"]["count"]["value"] = buf; + StringList attributes = startTag.getAttributeNames(); + for (StringList::iterator it = attributes.begin(); it != attributes.end(); it++) { + module->getEntryAttributes()["Footnote"][buf][it->c_str()] = startTag.getAttribute(it->c_str()); + } + module->getEntryAttributes()["Footnote"][buf]["body"] = tagText; + startTag.setAttribute("swordFootnote", buf); + SWBuf passage = startTag.getAttribute("passage"); + if (passage.length()) + refs = parser.ParseVerseList(passage.c_str(), parser, true).getRangeText(); + else refs = parser.ParseVerseList(tagText.c_str(), parser, true).getRangeText(); + module->getEntryAttributes()["Footnote"][buf]["refList"] = refs.c_str(); + } + hide = false; + if (option) { // we want the tag in the text + text += startTag; + text.append(tagText); + } + else continue; + } + } + + // if not a scripRef token, keep token in text + if ((!strcmp(tag.getName(), "scripRef")) && (!tag.isEndTag())) { + SWBuf osisRef = tag.getAttribute("passage"); + if (refs.length()) + refs += "; "; + refs += osisRef; + } + if (!hide) { + text += '<'; + text.append(token); + text += '>'; + } + else { + tagText += '<'; + tagText.append(token); + tagText += '>'; + } + continue; + } + if (intoken) { //copy token + token += *from; + } + else if (!hide) { //copy text which is not inside a token + text += *from; + } + else tagText += *from; + } + return 0; +} + +SWORD_NAMESPACE_END diff --git a/src/modules/filters/thmlstrongs.cpp b/src/modules/filters/thmlstrongs.cpp new file mode 100644 index 0000000..c1ab08c --- /dev/null +++ b/src/modules/filters/thmlstrongs.cpp @@ -0,0 +1,146 @@ +/****************************************************************************** + * + * thmlstrongs - SWFilter descendant to hide or show strongs number + * in a ThML module. + */ + + +#include +#include +#include +#include +#include +#include + +SWORD_NAMESPACE_START + +const char oName[] = "Strong's Numbers"; +const char oTip[] = "Toggles Strong's Numbers On and Off if they exist"; + +const SWBuf choices[3] = {"Off", "On", ""}; +const StringList oValues(&choices[0], &choices[2]); + +ThMLStrongs::ThMLStrongs() : SWOptionFilter(oName, oTip, &oValues) { + setOptionValue("Off"); +} + + +ThMLStrongs::~ThMLStrongs() { +} + + +char ThMLStrongs::processText(SWBuf &text, const SWKey *key, const SWModule *module) { + char token[2048]; // cheese. Fix. + const char *from; + int tokpos = 0; + bool intoken = false; + bool lastspace = false; + int word = 1; + char val[128]; + char wordstr[5]; + char *valto; + char *ch; + unsigned int textStart = 0, textEnd = 0; + SWBuf tmp; + bool newText = false; + + SWBuf orig = text; + from = orig.c_str(); + + for (text = ""; *from; from++) { + if (*from == '<') { + intoken = true; + tokpos = 0; + token[0] = 0; + token[1] = 0; + token[2] = 0; + textEnd = text.length(); + continue; + } + if (*from == '>') { // process tokens + intoken = false; + if (!strnicmp(token, "sync type=\"Strongs\" ", 20)) { // Strongs + if (module->isProcessEntryAttributes()) { + valto = val; + for (unsigned int i = 27; token[i] != '\"' && i < 150; i++) + *valto++ = token[i]; + *valto = 0; + if (atoi((!isdigit(*val))?val+1:val) < 5627) { + // normal strongs number + sprintf(wordstr, "%03d", word); + module->getEntryAttributes()["Word"][wordstr]["PartCount"] = "1"; + module->getEntryAttributes()["Word"][wordstr]["Lemma"] = val; + module->getEntryAttributes()["Word"][wordstr]["LemmaClass"] = "strong"; + tmp = ""; + tmp.append(text.c_str()+textStart, (int)(textEnd - textStart)); + module->getEntryAttributes()["Word"][wordstr]["Text"] = tmp; + newText = true; + } + else { +/* + // verb morph + sprintf(wordstr, "%03d", word); + module->getEntryAttributes()["Word"][wordstr]["Morph"] = val; + module->getEntryAttributes()["Word"][wordstr]["MorphClass"] = "OLBMorph"; +*/ + word--; // for now, completely ignore this word attribute. + } + word++; + } + + if (!option) { // if we don't want strongs + if ((from[1] == ' ') || (from[1] == ',') || (from[1] == ';') || (from[1] == '.') || (from[1] == '?') || (from[1] == '!') || (from[1] == ')') || (from[1] == '\'') || (from[1] == '\"')) { + if (lastspace) + text--; + } + if (newText) {textStart = text.length(); newText = false; } + continue; + } + } + if (module->isProcessEntryAttributes()) { + if (!strncmp(token, "sync type=\"morph\"", 17)) { + for (ch = token+17; *ch; ch++) { + if (!strncmp(ch, "class=\"", 7)) { + valto = val; + for (unsigned int i = 7; ch[i] != '\"' && i < 127; i++) + *valto++ = ch[i]; + *valto = 0; + sprintf(wordstr, "%03d", word-1); + if ((!stricmp(val, "Robinsons")) || (!stricmp(val, "Robinson"))) { + strcpy(val, "robinson"); + } + module->getEntryAttributes()["Word"][wordstr]["MorphClass"] = val; + } + if (!strncmp(ch, "value=\"", 7)) { + valto = val; + for (unsigned int i = 7; ch[i] != '\"' && i < 127; i++) + *valto++ = ch[i]; + *valto = 0; + sprintf(wordstr, "%03d", word-1); + module->getEntryAttributes()["Word"][wordstr]["Morph"] = val; + } + } + newText = true; + } + } + // if not a strongs token, keep token in text + text += '<'; + text += token; + text += '>'; + if (newText) {textStart = text.length(); newText = false; } + continue; + } + if (intoken) { + if (tokpos < 2045) + token[tokpos++] = *from; + token[tokpos+2] = 0; + } + else { + text += *from; + lastspace = (*from == ' '); + } + } + return 0; +} + +SWORD_NAMESPACE_END diff --git a/src/modules/filters/thmlvariants.cpp b/src/modules/filters/thmlvariants.cpp new file mode 100644 index 0000000..49f9b65 --- /dev/null +++ b/src/modules/filters/thmlvariants.cpp @@ -0,0 +1,118 @@ +/****************************************************************************** + * + * thmlvariants - SWFilter descendant to hide or show textual variants + * in a ThML module. + */ + + +#include +#include +#include + +SWORD_NAMESPACE_START + +const char ThMLVariants::primary[] = "Primary Reading"; +const char ThMLVariants::secondary[] = "Secondary Reading"; +const char ThMLVariants::all[] = "All Readings"; + +const char ThMLVariants::optName[] = "Textual Variants"; +const char ThMLVariants::optTip[] = "Switch between Textual Variants modes"; + + +ThMLVariants::ThMLVariants() { + option = false; + options.push_back(primary); + options.push_back(secondary); + options.push_back(all); +} + + +ThMLVariants::~ThMLVariants() { +} + +void ThMLVariants::setOptionValue(const char *ival) +{ + if (!stricmp(ival, primary)) option = 0; + else if (!stricmp(ival, secondary)) option = 1; + else option = 2; +} + +const char *ThMLVariants::getOptionValue() +{ + if (option == 0) { + return primary; + } + else if (option == 1) { + return secondary; + } + else { + return all; + } +} + +char ThMLVariants::processText(SWBuf &text, const SWKey *key, const SWModule *module) +{ + if ( option == 0 || option == 1) { //we want primary or variant only + bool intoken = false; + bool hide = false; + bool invar = false; + + SWBuf token; + SWBuf orig = text; + const char *from = orig.c_str(); + + //we use a fixed comparision string to make sure the loop is as fast as the original two blocks with almost the same code + const char* variantCompareString = (option == 0) ? "div type=\"variant\" class=\"1\"" : "div type=\"variant\" class=\"2\""; + + for (text = ""; *from; from++) { + if (*from == '<') { + intoken = true; + token = ""; + continue; + } + else if (*from == '>') { // process tokens + intoken = false; + + if ( !strncmp(token.c_str(), variantCompareString, 28)) { //only one of the variants, length of the two strings is 28 in both cases + invar = true; + hide = true; + continue; + } + if (!strncmp(token.c_str(), "div type=\"variant\"", 18)) { + invar = true; + continue; + } + if (!strncmp(token.c_str(), "/div", 4)) { + hide = false; + if (invar) { + invar = false; + continue; + } + } + if (!hide) { + text += '<'; + text.append(token); + text += '>'; + } + + continue; + } + if (intoken) { + token += *from; + } + else if (!hide) { + text += *from; + } + } + + } + + return 0; +} + + + + + + +SWORD_NAMESPACE_END diff --git a/src/modules/filters/thmlwebif.cpp b/src/modules/filters/thmlwebif.cpp new file mode 100644 index 0000000..7428754 --- /dev/null +++ b/src/modules/filters/thmlwebif.cpp @@ -0,0 +1,103 @@ +/*************************************************************************** + ThMLWEBIF.cpp - ThML to HTML filter with hrefs + ------------------- + begin : 2001-09-03 + copyright : 2001 by CrossWire Bible Society + ***************************************************************************/ + +/*************************************************************************** + * * + * This program is free software; you can redistribute it and/or modify * + * it under the terms of the GNU General Public License as published by * + * the Free Software Foundation; either version 2 of the License, or * + * (at your option) any later version. * + * * + ***************************************************************************/ + +#include +#include +#include +#include +#include +#include + +SWORD_NAMESPACE_START + +ThMLWEBIF::ThMLWEBIF() : baseURL(""), passageStudyURL(baseURL + "passagestudy.jsp") { + //all's done in ThMLHTMLHREF +} + +bool ThMLWEBIF::handleToken(SWBuf &buf, const char *token, BasicFilterUserData *userData) { + + if (!substituteToken(buf, token)) { // manually process if it wasn't a simple substitution + MyUserData *u = (MyUserData *)userData; + XMLTag tag(token); + SWBuf url; + if (!strcmp(tag.getName(), "sync")) { + const char* value = tag.getAttribute("value"); + url = value; + if ((url.length() > 1) && strchr("GH", url[0])) { + if (isdigit(url[1])) + url = url.c_str()+1; + } + + if(tag.getAttribute("type") && !strcmp(tag.getAttribute("type"), "morph")){ + buf += " ("; + buf.appendFormatted("", passageStudyURL.c_str(), URL::encode(url).c_str() ); + } + else { + if (value) { + value++; //skip leading G, H or T + //url = value; + } + + buf += " <"; + buf.appendFormatted("", passageStudyURL.c_str(), URL::encode(url).c_str() ); + } + + buf += value; + buf += ""; + + if (tag.getAttribute("type") && !strcmp(tag.getAttribute("type"), "morph")) { + buf += ") "; + } + else { + buf += "> "; + } + } + else if (!strcmp(tag.getName(), "scripRef")) { + if (tag.isEndTag()) { + if (u->inscriptRef) { // like "John 3:16" + u->inscriptRef = false; + buf += ""; + } + else { // end of scripRef like "John 3:16" + url = u->lastTextNode; + buf.appendFormatted("", passageStudyURL.c_str(), URL::encode(url).c_str()); + buf += u->lastTextNode.c_str(); + buf += ""; + + // let's let text resume to output again + u->suspendTextPassThru = false; + } + } + else if (tag.getAttribute("passage")) { //passage given + u->inscriptRef = true; + + buf.appendFormatted("", passageStudyURL.c_str(), URL::encode(tag.getAttribute("passage")).c_str()); + } + else { //no passage given + u->inscriptRef = false; + // let's stop text from going to output + u->suspendTextPassThru = true; + } + } + else { + return ThMLHTMLHREF::handleToken(buf,token,userData); + } + } + return true; +} + + +SWORD_NAMESPACE_END diff --git a/src/modules/filters/thmlwordjs.cpp b/src/modules/filters/thmlwordjs.cpp new file mode 100644 index 0000000..ad8eef0 --- /dev/null +++ b/src/modules/filters/thmlwordjs.cpp @@ -0,0 +1,296 @@ +/****************************************************************************** + * + * thmlstrongs - SWFilter descendant to hide or show strongs number + * in a ThML module. + */ + + +#include +#include +#include +#include +#include +#include +#include +#include + +SWORD_NAMESPACE_START + +const char oName[] = "Word Javascript"; +const char oTip[] = "Toggles Word Javascript data"; + +const SWBuf choices[3] = {"Off", "On", ""}; +const StringList oValues(&choices[0], &choices[2]); + + +ThMLWordJS::ThMLWordJS() : SWOptionFilter(oName, oTip, &oValues) { + setOptionValue("Off"); + + defaultGreekLex = 0; + defaultHebLex = 0; + defaultGreekParse = 0; + defaultHebParse = 0; + mgr = 0; +} + + +ThMLWordJS::~ThMLWordJS() { +} + + +char ThMLWordJS::processText(SWBuf &text, const SWKey *key, const SWModule *module) { + if (option) { + char token[2112]; // cheese. Fix. + int tokpos = 0; + bool intoken = false; + bool lastspace = false; + int word = 1; + char val[128]; + char *valto; + char *ch; + char wordstr[5]; + unsigned int textStart = 0, lastAppendLen = 0, textEnd = 0; + SWBuf tmp; + bool newText = false; + bool needWordOut = false; + AttributeValue *wordAttrs = 0; + SWBuf modName = (module)?module->Name():""; + SWBuf wordSrcPrefix = modName; + + const SWBuf orig = text; + const char * from = orig.c_str(); + VerseKey *vkey = 0; + if (key) { + vkey = SWDYNAMIC_CAST(VerseKey, key); + } + + for (text = ""; *from; from++) { + if (*from == '<') { + intoken = true; + tokpos = 0; + token[0] = 0; + token[1] = 0; + token[2] = 0; + textEnd = text.length(); + continue; + } + if (*from == '>') { // process tokens + intoken = false; + if (!strnicmp(token, "sync type=\"Strongs\" ", 20)) { // Strongs + valto = val; + for (unsigned int i = 27; token[i] != '\"' && i < 150; i++) + *valto++ = token[i]; + *valto = 0; + if (atoi((!isdigit(*val))?val+1:val) < 5627) { + // normal strongs number + sprintf(wordstr, "%03d", word++); + needWordOut = (word > 2); + wordAttrs = &(module->getEntryAttributes()["Word"][wordstr]); + (*wordAttrs)["Strongs"] = val; + //printf("Adding: [\"Word\"][%s][\"Strongs\"] = %s\n", wordstr, val); + tmp = ""; + tmp.append(text.c_str()+textStart, (int)(textEnd - textStart)); + (*wordAttrs)["Text"] = tmp; + text.append(""); + SWBuf ts; + ts.appendFormatted("%d", textStart); + (*wordAttrs)["TextStart"] = ts; + //printf("Adding: [\"Word\"][%s][\"Text\"] = %s\n", wordstr, tmp.c_str()); + newText = true; + } + else { + // verb morph + (*wordAttrs)["Morph"] = val; + //printf("Adding: [\"Word\"][%s][\"Morph\"] = %s\n", wordstr, val); + } + + } + if (!strncmp(token, "sync type=\"morph\"", 17)) { + for (ch = token+17; *ch; ch++) { + if (!strncmp(ch, "class=\"", 7)) { + valto = val; + for (unsigned int i = 7; ch[i] != '\"' && i < 127; i++) + *valto++ = ch[i]; + *valto = 0; + (*wordAttrs)["MorphClass"] = val; + //printf("Adding: [\"Word\"][%s][\"MorphClass\"] = %s\n", wordstr, val); + } + if (!strncmp(ch, "value=\"", 7)) { + valto = val; + for (unsigned int i = 7; ch[i] != '\"' && i < 127; i++) + *valto++ = ch[i]; + *valto = 0; + (*wordAttrs)["Morph"] = val; + //printf("Adding: [\"Word\"][%s][\"Morph\"] = %s\n", wordstr, val); + } + } + newText = true; + } + // if not a strongs token, keep token in text + text += '<'; + text += token; + text += '>'; + if (needWordOut) { + char wstr[10]; + sprintf(wstr, "%03d", word-2); + AttributeValue *wAttrs = &(module->getEntryAttributes()["Word"][wstr]); + needWordOut = false; + SWBuf strong = (*wAttrs)["Strongs"]; + SWBuf morph = (*wAttrs)["Morph"]; + SWBuf morphClass = (*wAttrs)["MorphClass"]; + SWBuf wordText = (*wAttrs)["Text"]; + SWBuf textSt = (*wAttrs)["TextStart"]; + if (strong.size()) { + char gh = 0; + gh = isdigit(strong[0]) ? 0:strong[0]; + if (!gh) { + if (vkey) { + gh = vkey->Testament() ? 'H' : 'G'; + } + } + else strong << 1; + + SWModule *sLex = 0; + SWModule *sMorph = 0; + if (gh == 'G') { + sLex = defaultGreekLex; + sMorph = defaultGreekParse; + } + if (gh == 'H') { + sLex = defaultHebLex; + sMorph = defaultHebParse; + } + SWBuf lexName = ""; + if (sLex) { + // we can pass the real lex name in, but we have some + // aliases in the javascript to optimize bandwidth + lexName = sLex->Name(); + if (lexName == "StrongsGreek") + lexName = "G"; + if (lexName == "StrongsHebrew") + lexName = "H"; + } + SWBuf wordID; + if (vkey) { + // optimize for bandwidth and use only the verse as the unique entry id + wordID.appendFormatted("%d", vkey->Verse()); + } + else { + wordID = key->getText(); + } + for (unsigned int i = 0; i < wordID.size(); i++) { + if ((!isdigit(wordID[i])) && (!isalpha(wordID[i]))) { + wordID[i] = '_'; + } + } + wordID.appendFormatted("_%s%d", wordSrcPrefix.c_str(), atoi(wstr)); + if (textSt.size()) { + int textStr = atoi(textSt.c_str()); + textStr += lastAppendLen; + SWBuf spanStart = ""; + + + +/* + if (sMorph) { + SWBuf popMorph = "%s", sMorph->Name(), morph.c_str(), wordID.c_str(), morph.c_str()); + morph = popMorph; + } +*/ + + // 'p' = 'fillpop' to save bandwidth + const char *m = strchr(morph.c_str(), ':'); + if (m) m++; + else m = morph.c_str(); + spanStart.appendFormatted("", lexName.c_str(), strong.c_str(), wordID.c_str(), m, modName.c_str()); + text.insert(textStr, spanStart); + lastAppendLen = spanStart.length(); + } + } + + } + if (newText) { + textStart = text.length(); newText = false; + } + continue; + } + if (intoken) { + if (tokpos < 2045) + token[tokpos++] = *from; + token[tokpos+2] = 0; + } + else { + text += *from; + lastspace = (*from == ' '); + } + } + + char wstr[10]; + sprintf(wstr, "%03d", word-1); + AttributeValue *wAttrs = &(module->getEntryAttributes()["Word"][wstr]); + needWordOut = false; + SWBuf strong = (*wAttrs)["Strongs"]; + SWBuf morph = (*wAttrs)["Morph"]; + SWBuf morphClass = (*wAttrs)["MorphClass"]; + SWBuf wordText = (*wAttrs)["Text"]; + SWBuf textSt = (*wAttrs)["TextStart"]; + if (strong.size()) { + char gh = 0; + gh = isdigit(strong[0]) ? 0:strong[0]; + if (!gh) { + if (vkey) { + gh = vkey->Testament() ? 'H' : 'G'; + } + } + else strong << 1; + + SWModule *sLex = 0; + if (gh == 'G') { + sLex = defaultGreekLex; + } + if (gh == 'H') { + sLex = defaultHebLex; + } + SWBuf lexName = ""; + if (sLex) { + // we can pass the real lex name in, but we have some + // aliases in the javascript to optimize bandwidth + lexName = sLex->Name(); + if (lexName == "StrongsGreek") + lexName = "G"; + if (lexName == "StrongsHebrew") + lexName = "H"; + } + SWBuf wordID; + if (vkey) { + // optimize for bandwidth and use only the verse as the unique entry id + wordID.appendFormatted("%d", vkey->Verse()); + } + else { + wordID = key->getText(); + } + for (unsigned int i = 0; i < wordID.size(); i++) { + if ((!isdigit(wordID[i])) && (!isalpha(wordID[i]))) { + wordID[i] = '_'; + } + } + wordID.appendFormatted("_%s%d", wordSrcPrefix.c_str(), atoi(wstr)); + if (textSt.size()) { + int textStr = atoi(textSt.c_str()); + textStr += lastAppendLen; + SWBuf spanStart = ""; + // 'p' = 'fillpop' to save bandwidth + const char *m = strchr(morph.c_str(), ':'); + if (m) m++; + else m = morph.c_str(); + spanStart.appendFormatted("", lexName.c_str(), strong.c_str(), wordID.c_str(), m, modName.c_str()); + text.insert(textStr, spanStart); + } + } + } + + return 0; +} + +SWORD_NAMESPACE_END diff --git a/src/modules/filters/unicodertf.cpp b/src/modules/filters/unicodertf.cpp new file mode 100644 index 0000000..8c2a1f6 --- /dev/null +++ b/src/modules/filters/unicodertf.cpp @@ -0,0 +1,87 @@ +/****************************************************************************** + * + * unicodertf - SWFilter descendant to convert a double byte unicode file + * to RTF tags + */ + + +#include +#include +#include +#include + +SWORD_NAMESPACE_START + +UnicodeRTF::UnicodeRTF() { +} + + +char UnicodeRTF::processText(SWBuf &text, const SWKey *key, const SWModule *module) +{ + const unsigned char *from; + char digit[10]; + unsigned long ch; + signed short utf16; + unsigned char from2[7]; + + SWBuf orig = text; + + from = (const unsigned char *)orig.c_str(); + + // ------------------------------- + for (text = ""; *from; from++) { + ch = 0; + //case: ANSI + if ((*from & 128) != 128) { + text += *from; + continue; + } + //case: Invalid UTF-8 (illegal continuing byte in initial position) + if ((*from & 128) && ((*from & 64) != 64)) { + continue; + } + //case: 2+ byte codepoint + from2[0] = *from; + from2[0] <<= 1; + int subsequent; + for (subsequent = 1; (from2[0] & 128) && (subsequent < 7); subsequent++) { + from2[0] <<= 1; + from2[subsequent] = from[subsequent]; + from2[subsequent] &= 63; + ch <<= 6; + ch |= from2[subsequent]; + } + subsequent--; + from2[0] <<= 1; + char significantFirstBits = 8 - (2+subsequent); + + ch |= (((short)from2[0]) << (((6*subsequent)+significantFirstBits)-8)); + from += subsequent; + if (ch < 0x10000) { + utf16 = (signed short)ch; + text += '\\'; + text += 'u'; + sprintf(digit, "%d", utf16); + text += digit; + text += '?'; + } + else { + utf16 = (signed short)((ch - 0x10000) / 0x400 + 0xD800); + text += '\\'; + text += 'u'; + sprintf(digit, "%d", utf16); + text += digit; + text += '?'; + utf16 = (signed short)((ch - 0x10000) % 0x400 + 0xDC00); + text += '\\'; + text += 'u'; + sprintf(digit, "%d", utf16); + text += digit; + text += '?'; + } + } + + return 0; +} + +SWORD_NAMESPACE_END diff --git a/src/modules/filters/utf16utf8.cpp b/src/modules/filters/utf16utf8.cpp new file mode 100644 index 0000000..ae0845f --- /dev/null +++ b/src/modules/filters/utf16utf8.cpp @@ -0,0 +1,90 @@ +/****************************************************************************** + * + * UTF16UTF8 - SWFilter descendant to convert UTF-16 to UTF-8 + * + */ + +#include +#include + +#include +#include + +SWORD_NAMESPACE_START + +UTF16UTF8::UTF16UTF8() { +} + + +char UTF16UTF8::processText(SWBuf &text, const SWKey *key, const SWModule *module) +{ + unsigned short *from; + + int len; + unsigned long uchar; + unsigned short schar; + len = 0; + from = (unsigned short*) text.c_str(); + while (*from) { + len += 2; + from++; + } + + SWBuf orig = text; + from = (unsigned short*)orig.c_str(); + + + // ------------------------------- + + for (text = ""; *from; from++) { + uchar = 0; + + if (*from < 0xD800 || *from > 0xDFFF) { + uchar = *from; + } + else if (*from >= 0xD800 && *from <= 0xDBFF) { + uchar = *from; + schar = *(from+1); + if (uchar < 0xDC00 || uchar > 0xDFFF) { + //error, do nothing + continue; + } + uchar &= 0x03ff; + schar &= 0x03ff; + uchar <<= 10; + uchar |= schar; + uchar += 0x10000; + from++; + } + else { + //error, do nothing + continue; + } + + if (uchar < 0x80) { + text += uchar; + } + else if (uchar < 0x800) { + text += 0xc0 | (uchar >> 6); + text += 0x80 | (uchar & 0x3f); + } + else if (uchar < 0x10000) { + text += 0xe0 | (uchar >> 12); + text += 0x80 | ((uchar >> 6) & 0x3f); + text += 0x80 | (uchar & 0x3f); + } + else if (uchar < 0x200000) { + text += 0xF0 | (uchar >> 18); + text += 0x80 | ((uchar >> 12) & 0x3F); + text += 0x80 | ((uchar >> 6) & 0x3F); + text += 0x80 | (uchar & 0x3F); + } + } + + return 0; +} + + + + +SWORD_NAMESPACE_END diff --git a/src/modules/filters/utf8arshaping.cpp b/src/modules/filters/utf8arshaping.cpp new file mode 100644 index 0000000..702fb62 --- /dev/null +++ b/src/modules/filters/utf8arshaping.cpp @@ -0,0 +1,51 @@ +/****************************************************************************** +* +* utf8arshaping - SWFilter descendant to perform Arabic shaping on +* UTF-8 text +*/ + +#ifdef _ICU_ + +#include + +#include + +#include +#include + +SWORD_NAMESPACE_START + +UTF8arShaping::UTF8arShaping() { + conv = ucnv_open("UTF-8", &err); +} + +UTF8arShaping::~UTF8arShaping() { + ucnv_close(conv); +} + +char UTF8arShaping::processText(SWBuf &text, const SWKey *key, const SWModule *module) +{ + UChar *ustr, *ustr2; + if ((unsigned long)key < 2) // hack, we're en(1)/de(0)ciphering + return -1; + + int32_t len = text.length(); + ustr = new UChar[len]; + ustr2 = new UChar[len]; + + // Convert UTF-8 string to UTF-16 (UChars) + len = ucnv_toUChars(conv, ustr, len, text.c_str(), -1, &err); + + len = u_shapeArabic(ustr, len, ustr2, len, U_SHAPE_LETTERS_SHAPE | U_SHAPE_DIGITS_EN2AN, &err); + + text.setSize(text.size()*2); + len = ucnv_fromUChars(conv, text.getRawData(), text.size(), ustr2, len, &err); + text.setSize(len); + + delete [] ustr2; + delete [] ustr; + return 0; +} + +SWORD_NAMESPACE_END +#endif diff --git a/src/modules/filters/utf8bidireorder.cpp b/src/modules/filters/utf8bidireorder.cpp new file mode 100644 index 0000000..783602c --- /dev/null +++ b/src/modules/filters/utf8bidireorder.cpp @@ -0,0 +1,60 @@ +/****************************************************************************** +* +* utf8cnormalizer - SWFilter descendant to perform reordering of UTF-8 +* text to visual order according to Unicode BiDi +*/ + +#ifdef _ICU_ + +#include + +#include + +#include +#include + +SWORD_NAMESPACE_START + +UTF8BiDiReorder::UTF8BiDiReorder() { + + conv = ucnv_open("UTF-8", &err); + +} + +UTF8BiDiReorder::~UTF8BiDiReorder() { + ucnv_close(conv); +} + +char UTF8BiDiReorder::processText(SWBuf &text, const SWKey *key, const SWModule *module) +{ + UChar *ustr, *ustr2; + if ((unsigned long)key < 2) // hack, we're en(1)/de(0)ciphering + return -1; + + int32_t len = text.length(); + ustr = new UChar[len]; //each char could become a surrogate pair + + // Convert UTF-8 string to UTF-16 (UChars) + len = ucnv_toUChars(conv, ustr, len, text.c_str(), -1, &err); + ustr2 = new UChar[len]; + + UBiDi* bidi = ubidi_openSized(len + 1, 0, &err); + ubidi_setPara(bidi, ustr, len, UBIDI_DEFAULT_RTL, NULL, &err); + len = ubidi_writeReordered(bidi, ustr2, len, + UBIDI_DO_MIRRORING | UBIDI_REMOVE_BIDI_CONTROLS, &err); + ubidi_close(bidi); + +// len = ubidi_writeReverse(ustr, len, ustr2, len, +// UBIDI_DO_MIRRORING | UBIDI_REMOVE_BIDI_CONTROLS, &err); + + text.setSize(text.size()*2); + len = ucnv_fromUChars(conv, text.getRawData(), text.size(), ustr2, len, &err); + text.setSize(len); + + delete [] ustr2; + delete [] ustr; + return 0; +} + +SWORD_NAMESPACE_END +#endif diff --git a/src/modules/filters/utf8cantillation.cpp b/src/modules/filters/utf8cantillation.cpp new file mode 100644 index 0000000..6213620 --- /dev/null +++ b/src/modules/filters/utf8cantillation.cpp @@ -0,0 +1,55 @@ +/****************************************************************************** + * + * UTF8Cantillation - SWFilter descendant to remove UTF-8 Hebrew cantillation + * + */ + + +#include +#include +#include + +SWORD_NAMESPACE_START + +const char oName[] = "Hebrew Cantillation"; +const char oTip[] = "Toggles Hebrew Cantillation Marks"; + +const SWBuf choices[3] = {"On", "Off", ""}; +const StringList oValues(&choices[0], &choices[2]); + +UTF8Cantillation::UTF8Cantillation() : SWOptionFilter(oName, oTip, &oValues) { + setOptionValue("Off"); +} + + +UTF8Cantillation::~UTF8Cantillation(){}; + + +char UTF8Cantillation::processText(SWBuf &text, const SWKey *key, const SWModule *module) { + if (!option) { + //The UTF-8 range 0xD6 0x90 to 0xD6 0xAF and 0xD7 0x84 consist of Hebrew cantillation marks so block those out. + SWBuf orig = text; + const unsigned char* from = (unsigned char*)orig.c_str(); + for (text = ""; *from; from++) { + if (*from != 0xD6) { + if (*from == 0xD7 && *(from + 1) == 0x84) { + from++; + } + else { + text += *from; + } + } + else if (*(from + 1) < 0x90 || *(from + 1) > 0xAF) { + text += *from; + from++; + text += *from; + } + else { + from++; + } + } + } + return 0; +} + +SWORD_NAMESPACE_END diff --git a/src/modules/filters/utf8greekaccents.cpp b/src/modules/filters/utf8greekaccents.cpp new file mode 100644 index 0000000..df85968 --- /dev/null +++ b/src/modules/filters/utf8greekaccents.cpp @@ -0,0 +1,261 @@ +/****************************************************************************** + * + * UTF8GreekAccents - SWFilter descendant to remove UTF-8 Greek accents + * + */ + + +#include +#include +#include + + +#ifdef _ICU_ +#include +sword::UTF8NFKD decompose; +#endif + +SWORD_NAMESPACE_START + +const char oName[] = "Greek Accents"; +const char oTip[] = "Toggles Greek Accents"; + +const SWBuf choices[3] = {"On", "Off", ""}; +const StringList oValues(&choices[0], &choices[2]); + +UTF8GreekAccents::UTF8GreekAccents() : SWOptionFilter(oName, oTip, &oValues) { + setOptionValue("On"); +} + +UTF8GreekAccents::~UTF8GreekAccents(){}; + + +char UTF8GreekAccents::processText(SWBuf &text, const SWKey *key, const SWModule *module) { + + if (!option) { //we don't want greek accents + //unsigned char *to, *from; + //to = (unsigned char*)text; + //for (from = (unsigned char*)text; *from; from++) { +#ifdef _ICU_ + decompose.processText(text, (SWKey *)2); // note the hack of 2 to mimic a real key. TODO: remove all hacks +#endif + + SWBuf orig = text; + const unsigned char* from = (unsigned char*)orig.c_str(); + for (text = ""; *from; from++) { + //first just remove combining characters + if (*from == 0xE2 && *(from + 1) == 0x80 && *(from + 2) == 0x99) { + from += 2; + } + else if (*from == 0xCC && *(from + 1)) { + if (*(from + 1) == 0x80 || *(from + 1) == 0x81 || *(from + 1) == 0x82 || *(from + 1) == 0x88 || *(from + 1) == 0x93 || *(from + 1) == 0x94) { + from++; + } + } + else if (*from == 0xCD && (*(from + 1) == 0xBA || *(from + 1) == 0x82)) { + from++; + } + //now converted pre-composed characters to their alphabetic bases, discarding the accents + + //Greek + //capital alpha + else if ((*from == 0xCE && *(from + 1) == 0x86)) { + text += 0xCE; + text += 0x91; + from++; + } + //capital epsilon + else if ((*from == 0xCE && *(from + 1) == 0x88)) { + text += 0xCE; + text += 0x95; + from++; + } + //capital eta + else if ((*from == 0xCE && *(from + 1) == 0x89)) { + text += 0xCE; + text += 0x97; + from++; + } + //capital iota + else if ((*from == 0xCE && (*(from + 1) == 0x8A || *(from + 1) == 0xAA))) { + text += 0xCE; + text += 0x99; + from++; + } + //capital omicron + else if ((*from == 0xCE && *(from + 1) == 0x8C)) { + text += 0xCE; + text += 0x9F; + from++; + } + //capital upsilon + else if ((*from == 0xCE && (*(from + 1) == 0x8E || *(from + 1) == 0xAB))) { + text += 0xCE; + text += 0xA5; + from++; + } + //capital omega + else if ((*from == 0xCE && *(from + 1) == 0x8F)) { + text += 0xCE; + text += 0xA9; + from++; + } + + //alpha + else if ((*from == 0xCE && *(from + 1) == 0xAC)) { + text += 0xCE; + text += 0xB1; + from++; + } + //epsilon + else if ((*from == 0xCE && *(from + 1) == 0xAD)) { + text += 0xCE; + text += 0xB5; + from++; + } + //eta + else if ((*from == 0xCE && *(from + 1) == 0xAE)) { + text += 0xCE; + text += 0xB7; + from++; + } + //iota + else if ((*from == 0xCE && *(from + 1) == 0xAF) || (*from == 0xCF && *(from + 1) == 0x8A)) { + text += 0xCE; + text += 0xB9; + from++; + } + //omicron + else if ((*from == 0xCF && *(from + 1) == 0x8C)) { + text += 0xCE; + text += 0xBF; + from++; + } + //upsilon + else if ((*from == 0xCE && *(from + 1) == 0x88) || (*from == 0xCF && (*(from + 1) == 0x8B || *(from + 1) == 0x8D))) { + text += 0xCF; + text += 0x85; + from++; + } + //omega + else if ((*from == 0xCF && *(from + 1) == 0x8E)) { + text += 0xCF; + text += 0x89; + from++; + } + + //Extended Greek + //capital alpha + else if (*from == 0xE1 && (((*(from + 1) == 0xBC || *(from + 1) == 0xBE) && *(from + 2) >= 0x88 && *(from + 2) <= 0x8F) || (*(from + 1) == 0xBE && *(from + 2) >= 0xB8 && *(from + 2) <= 0xBC))) { + text += 0xCE; + text += 0x91; + from+=2; + } + //capital epsilon + else if (*from == 0xE1 && ((*(from + 1) == 0xBC && *(from + 2) >= 0x98 && *(from + 2) <= 0x9D) || (*(from + 1) == 0xBF && (*(from + 2) == 0x88 || *(from + 2) == 0x89)))) { + text += 0xCE; + text += 0x95; + from+=2; + } + //capital eta + else if (*from == 0xE1 && ((*(from + 1) == 0xBC && *(from + 2) >= 0xA8 && *(from + 2) <= 0xAF) || (*(from + 1) == 0xBE && *(from + 2) >= 0x98 && *(from + 2) <= 0x9F) || (*(from + 1) == 0xBF && *(from + 2) >= 0x8A && *(from + 2) <= 0x8C))) { + text += 0xCE; + text += 0x97; + from+=2; + } + //capital iota + else if (*from == 0xE1 && ((*(from + 1) == 0xBC && *(from + 2) >= 0xB8 && *(from + 2) <= 0xBF) || (*(from + 1) == 0xBF && *(from + 2) >= 0x98 && *(from + 2) <= 0x9B))) { + text += 0xCE; + text += 0x99; + from+=2; + } + //capital omicron + else if (*from == 0xE1 && (((*(from + 1) == 0xBD && *(from + 2) >= 0x88 && *(from + 2) <= 0x8D)) || ((*(from + 1) == 0xBF && (*(from + 2) == 0xB8 || *(from + 2) == 0xB9))))) { + text += 0xCE; + text += 0x9F; + from+=2; + } + //capital upsilon + else if (*from == 0xE1 && ((*(from + 1) == 0xBD && *(from + 2) >= 0x99 && *(from + 2) <= 0x9F) || (*(from + 1) == 0xBF && *(from + 2) >= 0xA8 && *(from + 2) <= 0xAB))) { + text += 0xCE; + text += 0xA5; + from+=2; + } + //capital omega + else if (*from == 0xE1 && (((*(from + 1) == 0xBD || *(from + 1) == 0xBE) && *(from + 2) >= 0xA8 && *(from + 2) <= 0xAF) || (*(from + 1) == 0xBF && *(from + 2) >= 0xBA && *(from + 2) <= 0xBC))) { + text += 0xCE; + text += 0xA9; + from+=2; + } + //capital rho + else if (*from == 0xE1 && *(from + 1) == 0xBF && *(from + 2) == 0xAC) { + text += 0xCE; + text += 0xA1; + from+=2; + } + + //alpha + else if (*from == 0xE1 && ( + ((*(from + 1) == 0xBC || *(from + 1) == 0xBE) && *(from + 2) >= 0x80 && *(from + 2) <= 0x87) + || (*(from + 1) == 0xBD && (*(from + 2) == 0xB0 || *(from + 2) == 0xB1)) + || (*(from + 1) == 0xBE && *(from + 2) >= 0xB0 && *(from + 2) <= 0xB7))) { + text += 0xCE; + text += 0xB1; + from+=2; + } + //epsilon + else if (*from == 0xE1 && ((*(from + 1) == 0xBC && *(from + 2) >= 0x90 && *(from + 2) <= 0x95) || (*(from + 1) == 0xBD && (*(from + 2) == 0xB2 || *(from + 2) == 0xB3)))) { + text += 0xCE; + text += 0xB5; + from+=2; + } + //eta + else if (*from == 0xE1 && ((*(from + 1) == 0xBE && *(from + 2) >= 0x90 && *(from + 2) <= 0x97) || (*(from + 1) == 0xBC && *(from + 2) >= 0xA0 && *(from + 2) <= 0xA7) || (*(from + 1) == 0xBF && *(from + 2) >= 0x82 && *(from + 2) <= 0x87) || (*(from + 1) == 0xBD && (*(from + 2) == 0xB4 || *(from + 2) == 0xB5)))) { + text += 0xCE; + text += 0xB7; + from+=2; + } + //iota + else if (*from == 0xE1 && ((*(from + 1) == 0xBC && *(from + 2) >= 0xB0 && *(from + 2) <= 0xB7) || (*(from + 1) == 0xBD && (*(from + 2) == 0xB6 || *(from + 2) == 0xB7)) || (*(from + 1) == 0xBF && *(from + 2) >= 0x90 && *(from + 2) <= 0x97))) { + text += 0xCE; + text += 0xB9; + from+=2; + } + //omicron + else if (*from == 0xE1 && (*(from + 1) == 0xBD && ((*(from + 2) >= 0x80 && *(from + 2) <= 0x85) || (*(from + 2) == 0xB8 || *(from + 2) == 0xB9)))) { + text += 0xCE; + text += 0xBF; + from+=2; + } + //upsilon + else if (*from == 0xE1 && ((*(from + 1) == 0xBD && ((*(from + 2) >= 0x90 && *(from + 2) <= 0x97) || *(from + 2) == 0xBA || *(from + 2) == 0xBB)) || (*(from + 1) == 0xBF && ((*(from + 2) >= 0xA0 && *(from + 2) <= 0xA3) || *(from + 2) == 0xA6 || *(from + 2) == 0xA7)))) { + text += 0xCF; + text += 0x85; + from+=2; + } + //omega + else if (*from == 0xE1 && ((*(from + 1) == 0xBD && ((*(from + 2) >= 0xA0 && *(from + 2) <= 0xA7) || (*(from + 2) == 0xBC || *(from + 2) == 0xBD))) || (*(from + 1) == 0xBE && (*(from + 2) >= 0xA0 && *(from + 2) <= 0xA7)) || (*(from + 1) == 0xBF && *(from + 2) >= 0xB2 && *(from + 2) <= 0xB7))) { + text += 0xCF; + text += 0x89; + from+=2; + } + //rho + else if (*from == 0xE1 && *(from + 1) == 0xBF && (*(from + 2) == 0xA4 && *(from + 2) == 0xA5)) { + text += 0xCF; + text += 0x81; + from+=2; + } + else { //no characters we filter + text += *from; + } + } + } + return 0; +} + + + + + + +SWORD_NAMESPACE_END diff --git a/src/modules/filters/utf8hebrewpoints.cpp b/src/modules/filters/utf8hebrewpoints.cpp new file mode 100644 index 0000000..0476db8 --- /dev/null +++ b/src/modules/filters/utf8hebrewpoints.cpp @@ -0,0 +1,44 @@ +/****************************************************************************** + * + * UTF8HebrewPoints - SWFilter descendant to remove UTF-8 Hebrew vowel points + * + */ + + +#include +#include +#include + +SWORD_NAMESPACE_START + +const char oName[] = "Hebrew Vowel Points"; +const char oTip[] = "Toggles Hebrew Vowel Points"; + +const SWBuf choices[3] = {"On", "Off", ""}; +const StringList oValues(&choices[0], &choices[2]); + +UTF8HebrewPoints::UTF8HebrewPoints() : SWOptionFilter(oName, oTip, &oValues) { + setOptionValue("On"); +} + +UTF8HebrewPoints::~UTF8HebrewPoints(){}; + + +char UTF8HebrewPoints::processText(SWBuf &text, const SWKey *key, const SWModule *module) { + if (!option) { + //The UTF-8 range 0xD6 0xB0 to 0xD6 0xBF excluding 0xD6 0x consist of Hebrew cantillation marks so block those out. + SWBuf orig = text; + const unsigned char* from = (unsigned char*)orig.c_str(); + for (text = ""; *from; from++) { + if ((*from == 0xD6) && (*(from + 1) >= 0xB0 && *(from + 1) <= 0xBF) && (*(from + 1) != 0xBE)) { + from++; + } + else { + text += *from; + } + } + } + return 0; +} + +SWORD_NAMESPACE_END diff --git a/src/modules/filters/utf8html.cpp b/src/modules/filters/utf8html.cpp new file mode 100644 index 0000000..088f669 --- /dev/null +++ b/src/modules/filters/utf8html.cpp @@ -0,0 +1,70 @@ +/****************************************************************************** + * + * utf8html - SWFilter descendant to convert a UTF-8 stream to HTML escapes + * + */ + + +#include +#include +#include +#include + +SWORD_NAMESPACE_START + +UTF8HTML::UTF8HTML() { +} + + +char UTF8HTML::processText(SWBuf &text, const SWKey *key, const SWModule *module) +{ + unsigned char *from; + int len; + char digit[10]; + unsigned long ch; + if ((unsigned long)key < 2) // hack, we're en(1)/de(0)ciphering + return (char)-1; + + len = strlen(text.c_str()) + 2; // shift string to right of buffer + + SWBuf orig = text; + from = (unsigned char *)orig.c_str(); + + // ------------------------------- + for (text = ""; *from; from++) { + ch = 0; + if ((*from & 128) != 128) { +// if (*from != ' ') + text += *from; + continue; + } + if ((*from & 128) && ((*from & 64) != 64)) { + // error + *from = 'x'; + continue; + } + *from <<= 1; + int subsequent; + for (subsequent = 1; (*from & 128); subsequent++) { + *from <<= 1; + from[subsequent] &= 63; + ch <<= 6; + ch |= from[subsequent]; + } + subsequent--; + *from <<=1; + char significantFirstBits = 8 - (2+subsequent); + + ch |= (((short)*from) << (((6*subsequent)+significantFirstBits)-8)); + from += subsequent; + text += '&'; + text += '#'; + sprintf(digit, "%ld", ch); + for (char *dig = digit; *dig; dig++) + text += *dig; + text += ';'; + } + return 0; +} + +SWORD_NAMESPACE_END diff --git a/src/modules/filters/utf8latin1.cpp b/src/modules/filters/utf8latin1.cpp new file mode 100644 index 0000000..08b288d --- /dev/null +++ b/src/modules/filters/utf8latin1.cpp @@ -0,0 +1,75 @@ +/****************************************************************************** + * + * UTF8Latin1 - SWFilter descendant to convert UTF-8 to Latin-1 + * + */ + +#include +#include + +#include +#include + +SWORD_NAMESPACE_START + +UTF8Latin1::UTF8Latin1(char rchar) : replacementChar(rchar) { +} + + +char UTF8Latin1::processText(SWBuf &text, const SWKey *key, const SWModule *module) +{ + unsigned char *from; + + int len; + unsigned long uchar; + unsigned char significantFirstBits, subsequent; + + if ((unsigned long)key < 2) {// hack, we're en(1)/de(0)ciphering + return (char)-1; + } + len = strlen(text.c_str()) + 1; // shift string to right of buffer + + SWBuf orig = text; + from = (unsigned char*)orig.c_str(); + + + // ------------------------------- + + for (text = ""; *from; from++) { + uchar = 0; + if ((*from & 128) != 128) { + // if (*from != ' ') + uchar = *from; + } + else if ((*from & 128) && ((*from & 64) != 64)) { + // error, do nothing + continue; + } + else { + *from <<= 1; + for (subsequent = 1; (*from & 128); subsequent++) { + *from <<= 1; + from[subsequent] &= 63; + uchar <<= 6; + uchar |= from[subsequent]; + } + subsequent--; + *from <<=1; + significantFirstBits = 8 - (2+subsequent); + + uchar |= (((short)*from) << (((6*subsequent)+significantFirstBits)-8)); + from += subsequent; + } + + if (uchar < 0xff) { + text += (unsigned char)uchar; + } + else { + text += replacementChar; + } + } + return 0; +} + +SWORD_NAMESPACE_END + diff --git a/src/modules/filters/utf8nfc.cpp b/src/modules/filters/utf8nfc.cpp new file mode 100644 index 0000000..15b76b5 --- /dev/null +++ b/src/modules/filters/utf8nfc.cpp @@ -0,0 +1,50 @@ +/****************************************************************************** +* +* utf8nfc - SWFilter descendant to perform NFC (canonical composition +* normalization) on UTF-8 text +*/ + +#ifdef _ICU_ + +#include + +#include +#include +#include +#include + +#include +#include + +SWORD_NAMESPACE_START + +UTF8NFC::UTF8NFC() { + conv = ucnv_open("UTF-8", &err); +} + +UTF8NFC::~UTF8NFC() { + ucnv_close(conv); +} + +char UTF8NFC::processText(SWBuf &text, const SWKey *key, const SWModule *module) +{ + if ((unsigned long)key < 2) // hack, we're en(1)/de(0)ciphering + return -1; + + UErrorCode status = U_ZERO_ERROR; + UnicodeString source(text.getRawData(), text.length(), conv, status); + UnicodeString target; + + status = U_ZERO_ERROR; + Normalizer::normalize(source, UNORM_NFC, 0, target, status); + + status = U_ZERO_ERROR; + text.setSize(text.size()*2); // potentially, it can grow to 2x the original size + int32_t len = target.extract(text.getRawData(), text.size(), conv, status); + text.setSize(len); + + return 0; +} + +SWORD_NAMESPACE_END +#endif diff --git a/src/modules/filters/utf8nfkd.cpp b/src/modules/filters/utf8nfkd.cpp new file mode 100644 index 0000000..a19d36b --- /dev/null +++ b/src/modules/filters/utf8nfkd.cpp @@ -0,0 +1,52 @@ +/****************************************************************************** +* +* utf8nfkd - SWFilter descendant to perform NFKD (compatability decomposition +* normalization) on UTF-8 text +*/ + +#ifdef _ICU_ + +#include + +#include + +#include +#include + +SWORD_NAMESPACE_START + +UTF8NFKD::UTF8NFKD() { + conv = ucnv_open("UTF-8", &err); +} + +UTF8NFKD::~UTF8NFKD() { + ucnv_close(conv); +} + +char UTF8NFKD::processText(SWBuf &text, const SWKey *key, const SWModule *module) +{ + if ((unsigned long)key < 2) // hack, we're en(1)/de(0)ciphering + return -1; + + int32_t len = 5 + text.length() * 5; + source = new UChar[len + 1]; //each char could become a surrogate pair + + // Convert UTF-8 string to UTF-16 (UChars) + int32_t ulen = ucnv_toUChars(conv, source, len, text.c_str(), -1, &err); + target = new UChar[len + 1]; + + //compatability decomposition + ulen = unorm_normalize(source, ulen, UNORM_NFKD, 0, target, len, &err); + + text.setSize(len); + len = ucnv_fromUChars(conv, text.getRawData(), len, target, ulen, &err); + text.setSize(len); + + delete [] source; + delete [] target; + + return 0; +} + +SWORD_NAMESPACE_END +#endif diff --git a/src/modules/filters/utf8transliterator.cpp b/src/modules/filters/utf8transliterator.cpp new file mode 100644 index 0000000..d99741b --- /dev/null +++ b/src/modules/filters/utf8transliterator.cpp @@ -0,0 +1,888 @@ +/****************************************************************************** +* +* utf8transliterators - SWFilter descendant to transliterate between +* ICU-supported scripts. +*/ + +#ifdef _ICU_ + +#include + +#include + +#include +#include +#include +#include + +#ifndef _ICUSWORD_ +#include "unicode/resbund.h" +#endif +#include + +SWORD_NAMESPACE_START + +const char UTF8Transliterator::optionstring[NUMTARGETSCRIPTS][16] = { + "Off", + "Latin", + "IPA", + "Basic Latin", + "SBL", + "TC", + "Beta", + "BGreek", + "SERA", + "Hugoye", + "UNGEGN", + "ISO", + "ALA-LC", + "BGN-PCGN", + "Greek", + "Hebrew", + "Cyrillic", + "Arabic", + "Syriac", + "Katakana", + "Hiragana", + "Hangul", + "Devanagari", + "Tamil", + "Bengali", + "Gurmukhi", + "Gujarati", + "Oriya", + "Telugu", + "Kannada", + "Malayalam", + "Thai", + "Georgian", + "Armenian", + "Ethiopic", + "Gothic", + "Ugaritic", + "Coptic", + "Meroitic", + "Linear B", + "Cypriot", + "Runic", + "Ogham", + "Thaana", + "Glagolitic", +// "Tengwar", +// "Cirth" +}; + +const char UTF8Transliterator::optName[] = "Transliteration"; +const char UTF8Transliterator::optTip[] = "Transliterates between scripts"; + +SWTransMap UTF8Transliterator::transMap; + +#ifndef _ICUSWORD_ + +const char UTF8Transliterator::SW_RB_RULE_BASED_IDS[] = "RuleBasedTransliteratorIDs"; +const char UTF8Transliterator::SW_RB_RULE[] = "Rule"; +#ifdef SWICU_DATA +const char UTF8Transliterator::SW_RESDATA[] = SWICU_DATA; +#else +const char UTF8Transliterator::SW_RESDATA[] = "/usr/local/lib/sword/"; +#endif + +class SWCharString { + public: + inline SWCharString(const UnicodeString& str); + inline ~SWCharString(); + inline operator const char*() { return ptr; } + private: + char buf[128]; + char* ptr; +}; +SWCharString::SWCharString(const UnicodeString& str) { + // TODO This isn't quite right -- we should probably do + // preflighting here to determine the real length. + if (str.length() >= (int32_t)sizeof(buf)) { + ptr = new char[str.length() + 8]; + } else { + ptr = buf; + } + str.extract(0, 0x7FFFFFFF, ptr, ""); +} + +SWCharString::~SWCharString() { + if (ptr != buf) { + delete[] ptr; + } +} + +#endif // _ICUSWORD_ + + +UTF8Transliterator::UTF8Transliterator() { + option = 0; + unsigned long i; + for (i = 0; i < NUMTARGETSCRIPTS; i++) { + options.push_back(optionstring[i]); + } +#ifndef _ICUSWORD_ + utf8status = U_ZERO_ERROR; + Load(utf8status); +#endif +} + +void UTF8Transliterator::Load(UErrorCode &status) +{ +#ifndef _ICUSWORD_ + static const char translit_swordindex[] = "translit_swordindex"; + + UResourceBundle *bundle = 0, *transIDs = 0, *colBund = 0; + bundle = ures_openDirect(SW_RESDATA, translit_swordindex, &status); + if (U_FAILURE(status)) { + SWLog::getSystemLog()->logError("UTF8Transliterator: ICU: no resource index to load"); + SWLog::getSystemLog()->logError("UTF8Transliterator: ICU: status %s", u_errorName(status)); + return; + } + + transIDs = ures_getByKey(bundle, SW_RB_RULE_BASED_IDS, 0, &status); + //UParseError parseError; + + int32_t row, maxRows; + if (U_SUCCESS(status)) { + maxRows = ures_getSize(transIDs); + for (row = 0; row < maxRows; row++) { + colBund = ures_getByIndex(transIDs, row, 0, &status); + + if (U_SUCCESS(status) && ures_getSize(colBund) == 4) { + UnicodeString id = ures_getUnicodeStringByIndex(colBund, 0, &status); + UChar type = ures_getUnicodeStringByIndex(colBund, 1, &status).charAt(0); + UnicodeString resString = ures_getUnicodeStringByIndex(colBund, 2, &status); + SWLog::getSystemLog()->logDebug("ok so far"); + + if (U_SUCCESS(status)) { + switch (type) { + case 0x66: // 'f' + case 0x69: // 'i' + // 'file' or 'internal'; + // row[2]=resource, row[3]=direction + { + //UBool visible = (type == 0x0066 /*f*/); + UTransDirection dir = + (ures_getUnicodeStringByIndex(colBund, 3, &status).charAt(0) == + 0x0046 /*F*/) ? + UTRANS_FORWARD : UTRANS_REVERSE; + //registry->put(id, resString, dir, visible); + SWLog::getSystemLog()->logDebug("instantiating %s ...", resString.getBuffer()); + registerTrans(id, resString, dir, status); + SWLog::getSystemLog()->logDebug("done."); + } + break; + case 0x61: // 'a' + // 'alias'; row[2]=createInstance argument + //registry->put(id, resString, TRUE); + break; + } + } + else SWLog::getSystemLog()->logError("UTF8Transliterator: ICU: Failed to get resString"); + } + else SWLog::getSystemLog()->logError("UTF8Transliterator: ICU: Failed to get row"); + ures_close(colBund); + } + } + else + { + SWLog::getSystemLog()->logError("UTF8Transliterator: ICU: no resource index to load"); + SWLog::getSystemLog()->logError("UTF8Transliterator: ICU: status %s", u_errorName(status)); + } + + ures_close(transIDs); + ures_close(bundle); + +#endif // _ICUSWORD_ +} + +void UTF8Transliterator::registerTrans(const UnicodeString& ID, const UnicodeString& resource, + UTransDirection dir, UErrorCode &status ) +{ +#ifndef _ICUSWORD_ + SWLog::getSystemLog()->logDebug("registering ID locally %s", ID.getBuffer()); + SWTransData swstuff; + swstuff.resource = resource; + swstuff.dir = dir; + SWTransPair swpair; + swpair.first = ID; + swpair.second = swstuff; + transMap.insert(swpair); +#endif +} + +bool UTF8Transliterator::checkTrans(const UnicodeString& ID, UErrorCode &status ) +{ +#ifndef _ICUSWORD_ + Transliterator *trans = Transliterator::createInstance(ID, UTRANS_FORWARD, status); + if (!U_FAILURE(status)) + { + // already have it, clean up and return true + SWLog::getSystemLog()->logDebug("already have it %s", ID.getBuffer()); + delete trans; + return true; + } + status = U_ZERO_ERROR; + + SWTransMap::iterator swelement; + if ((swelement = transMap.find(ID)) != transMap.end()) + { + SWLog::getSystemLog()->logDebug("found element in map"); + SWTransData swstuff = (*swelement).second; + UParseError parseError; + //UErrorCode status; + //std::cout << "unregistering " << ID << std::endl; + //Transliterator::unregister(ID); + SWLog::getSystemLog()->logDebug("resource is %s", swstuff.resource.getBuffer()); + + // Get the rules + //std::cout << "importing: " << ID << ", " << resource << std::endl; + SWCharString ch(swstuff.resource); + UResourceBundle *bundle = ures_openDirect(SW_RESDATA, ch, &status); + const UnicodeString rules = ures_getUnicodeStringByKey(bundle, SW_RB_RULE, &status); + ures_close(bundle); + //parser.parse(rules, isReverse ? UTRANS_REVERSE : UTRANS_FORWARD, + // parseError, status); + if (U_FAILURE(status)) { + SWLog::getSystemLog()->logError("UTF8Transliterator: ICU: Failed to get rules"); + SWLog::getSystemLog()->logError("UTF8Transliterator: ICU: status %s", u_errorName(status)); + return false; + } + + + Transliterator *trans = Transliterator::createFromRules(ID, rules, swstuff.dir, + parseError,status); + if (U_FAILURE(status)) { + SWLog::getSystemLog()->logError("UTF8Transliterator: ICU: Failed to create transliterator"); + SWLog::getSystemLog()->logError("UTF8Transliterator: ICU: status %s", u_errorName(status)); + SWLog::getSystemLog()->logError("UTF8Transliterator: ICU: Parse error: line %s", parseError.line); + SWLog::getSystemLog()->logError("UTF8Transliterator: ICU: Parse error: offset %d", parseError.offset); + SWLog::getSystemLog()->logError("UTF8Transliterator: ICU: Parse error: preContext %s", *parseError.preContext); + SWLog::getSystemLog()->logError("UTF8Transliterator: ICU: Parse error: postContext %s", *parseError.postContext); + SWLog::getSystemLog()->logError("UTF8Transliterator: ICU: rules were"); +// SWLog::getSystemLog()->logError((const char *)rules); + return false; + } + + Transliterator::registerInstance(trans); + return true; + + //Transliterator *trans = instantiateTrans(ID, swstuff.resource, swstuff.dir, parseError, status); + //return trans; + } + else + { + return false; + } +#else +return true; +#endif // _ICUSWORD_ +} + +bool UTF8Transliterator::addTrans(const char* newTrans, SWBuf* transList) { +#ifdef _ICUSWORD_ + UErrorCode status; + if (checkTrans(UnicodeString(newTrans), status)) { +#endif + *transList += newTrans; + *transList += ";"; + return true; +#ifdef _ICUSWORD_ + } + else { + return false; + } +#endif +} + +Transliterator * UTF8Transliterator::createTrans(const UnicodeString& ID, UTransDirection dir, UErrorCode &status ) +{ + Transliterator *trans = Transliterator::createInstance(ID,UTRANS_FORWARD,status); + if (U_FAILURE(status)) { + delete trans; + return NULL; + } + else { + return trans; + } +} + +void UTF8Transliterator::setOptionValue(const char *ival) +{ + unsigned char i = option = NUMTARGETSCRIPTS; + while (i && stricmp(ival, optionstring[i])) { + i--; + option = i; + } +} + +const char *UTF8Transliterator::getOptionValue() +{ + return (NUMTARGETSCRIPTS > option) ? optionstring[option] : 0; +} + +char UTF8Transliterator::processText(SWBuf &text, const SWKey *key, const SWModule *module) +{ + if (option) { // if we want transliteration + unsigned long i, j; + UErrorCode err = U_ZERO_ERROR; + UConverter * conv = NULL; + conv = ucnv_open("UTF-8", &err); + SWBuf ID; + + bool compat = false; + + // Convert UTF-8 string to UTF-16 (UChars) + j = strlen(text); + int32_t len = (j * 2) + 1; + UChar *source = new UChar[len]; + err = U_ZERO_ERROR; + len = ucnv_toUChars(conv, source, len, text, j, &err); + source[len] = 0; + + // Figure out which scripts are used in the string + unsigned char scripts[NUMSCRIPTS]; + + for (i = 0; i < NUMSCRIPTS; i++) { + scripts[i] = false; + } + + for (i = 0; i < (unsigned long)len; i++) { + j = ublock_getCode(source[i]); + scripts[SE_LATIN] = true; + switch (j) { + //case UBLOCK_BASIC_LATIN: scripts[SE_LATIN] = true; break; + case UBLOCK_GREEK: scripts[SE_GREEK] = true; break; + case UBLOCK_HEBREW: scripts[SE_HEBREW] = true; break; + case UBLOCK_CYRILLIC: scripts[SE_CYRILLIC] = true; break; + case UBLOCK_ARABIC: scripts[SE_ARABIC] = true; break; + case UBLOCK_SYRIAC: scripts[SE_SYRIAC] = true; break; + case UBLOCK_KATAKANA: scripts[SE_KATAKANA] = true; break; + case UBLOCK_HIRAGANA: scripts[SE_HIRAGANA] = true; break; + case UBLOCK_HANGUL_SYLLABLES: scripts[SE_HANGUL] = true; break; + case UBLOCK_HANGUL_JAMO: scripts[SE_JAMO] = true; break; + case UBLOCK_DEVANAGARI: scripts[SE_DEVANAGARI] = true; break; + case UBLOCK_TAMIL: scripts[SE_TAMIL] = true; break; + case UBLOCK_BENGALI: scripts[SE_BENGALI] = true; break; + case UBLOCK_GURMUKHI: scripts[SE_GURMUKHI] = true; break; + case UBLOCK_GUJARATI: scripts[SE_GUJARATI] = true; break; + case UBLOCK_ORIYA: scripts[SE_ORIYA] = true; break; + case UBLOCK_TELUGU: scripts[SE_TELUGU] = true; break; + case UBLOCK_KANNADA: scripts[SE_KANNADA] = true; break; + case UBLOCK_MALAYALAM: scripts[SE_MALAYALAM] = true; break; + case UBLOCK_THAI: scripts[SE_THAI] = true; break; + case UBLOCK_GEORGIAN: scripts[SE_GEORGIAN] = true; break; + case UBLOCK_ARMENIAN: scripts[SE_ARMENIAN] = true; break; + case UBLOCK_ETHIOPIC: scripts[SE_ETHIOPIC] = true; break; + case UBLOCK_GOTHIC: scripts[SE_GOTHIC] = true; break; + case UBLOCK_UGARITIC: scripts[SE_UGARITIC] = true; break; +// case UBLOCK_MEROITIC: scripts[SE_MEROITIC] = true; break; +// case UBLOCK_LINEARB: scripts[SE_LINEARB] = true; break; +// case UBLOCK_CYPRIOT: scripts[SE_CYPRIOT] = true; break; + case UBLOCK_RUNIC: scripts[SE_RUNIC] = true; break; + case UBLOCK_OGHAM: scripts[SE_OGHAM] = true; break; + case UBLOCK_THAANA: scripts[SE_THAANA] = true; break; +// case UBLOCK_GLAGOLITIC: scripts[SE_GLAGOLITIC] = true; break; +// case UBLOCK_TENGWAR: scripts[SE_TENGWAR] = true; break; +// case UBLOCK_CIRTH: scripts[SE_CIRTH] = true; break; + case UBLOCK_CJK_RADICALS_SUPPLEMENT: + case UBLOCK_KANGXI_RADICALS: + case UBLOCK_IDEOGRAPHIC_DESCRIPTION_CHARACTERS: + case UBLOCK_CJK_SYMBOLS_AND_PUNCTUATION: + case UBLOCK_CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A: + case UBLOCK_CJK_UNIFIED_IDEOGRAPHS: + scripts[SE_HAN] = true; + break; + case UBLOCK_CJK_COMPATIBILITY: + case UBLOCK_CJK_COMPATIBILITY_IDEOGRAPHS: + case UBLOCK_CJK_COMPATIBILITY_FORMS: + scripts[SE_HAN] = true; + compat = true; + break; + case UBLOCK_HANGUL_COMPATIBILITY_JAMO: + scripts[SE_HANGUL] = true; + compat = true; + break; + + //default: scripts[SE_LATIN] = true; + } + } + scripts[option] = false; //turn off the reflexive transliteration + + //return if we have no transliteration to do for this text + j = 0; + for (i = 0; !j && i < NUMSCRIPTS; i++) { + if (scripts[i]) j++; + } + if (!j) { + ucnv_close(conv); + return 0; + } + + if (compat) { + addTrans("NFKD", &ID); + } + else { + addTrans("NFD", &ID); + } + + //Simple X to Latin transliterators + if (scripts[SE_GREEK]) { + if (strnicmp (((SWModule*)module)->Lang(), "cop", 3)) { + if (option == SE_SBL) + addTrans("Greek-Latin/SBL", &ID); + else if (option == SE_TC) + addTrans("Greek-Latin/TC", &ID); + else if (option == SE_BETA) + addTrans("Greek-Latin/Beta", &ID); + else if (option == SE_BGREEK) + addTrans("Greek-Latin/BGreek", &ID); + else if (option == SE_UNGEGN) + addTrans("Greek-Latin/UNGEGN", &ID); + else if (option == SE_ISO) + addTrans("Greek-Latin/ISO", &ID); + else if (option == SE_ALALC) + addTrans("Greek-Latin/ALALC", &ID); + else if (option == SE_BGNPCGN) + addTrans("Greek-Latin/BGNPCGN", &ID); + else if (option == SE_IPA) + addTrans("Greek-IPA/Ancient", &ID); + else { + addTrans("Greek-Latin", &ID); + scripts[SE_LATIN] = true; + } + } + else { + if (option == SE_SBL) + addTrans("Coptic-Latin/SBL", &ID); + else if (option == SE_TC) + addTrans("Coptic-Latin/TC", &ID); + else if (option == SE_BETA) + addTrans("Coptic-Latin/Beta", &ID); + else if (option == SE_IPA) + addTrans("Coptic-IPA", &ID); + else { + addTrans("Coptic-Latin", &ID); + scripts[SE_LATIN] = true; + } + } + } + if (scripts[SE_HEBREW]) { + if (option == SE_SBL) + addTrans("Hebrew-Latin/SBL", &ID); + else if (option == SE_TC) + addTrans("Hebrew-Latin/TC", &ID); + else if (option == SE_BETA) + addTrans("Hebrew-Latin/Beta", &ID); + else if (option == SE_UNGEGN) + addTrans("Hebrew-Latin/UNGEGN", &ID); + else if (option == SE_ALALC) + addTrans("Hebrew-Latin/ALALC", &ID); + else if (option == SE_SYRIAC) + addTrans("Hebrew-Syriac", &ID); + else { + addTrans("Hebrew-Latin", &ID); + scripts[SE_LATIN] = true; + } + } + if (scripts[SE_CYRILLIC]) { + if (option == SE_GLAGOLITIC) + addTrans("Cyrillic-Glagolitic", &ID); + else { + addTrans("Cyrillic-Latin", &ID); + scripts[SE_LATIN] = true; + } + } + if (scripts[SE_ARABIC]) { + addTrans("Arabic-Latin", &ID); + scripts[SE_LATIN] = true; + } + if (scripts[SE_SYRIAC]) { + if (option == SE_TC) + addTrans("Syriac-Latin/TC", &ID); + else if (option == SE_BETA) + addTrans("Syriac-Latin/Beta", &ID); + else if (option == SE_HUGOYE) + addTrans("Syriac-Latin/Hugoye", &ID); + else if (option == SE_HEBREW) + addTrans("Syriac-Hebrew", &ID); + else { + addTrans("Syriac-Latin", &ID); + scripts[SE_LATIN] = true; + } + } + if (scripts[SE_THAI]) { + addTrans("Thai-Latin", &ID); + scripts[SE_LATIN] = true; + } + if (scripts[SE_GEORGIAN]) { + if (option == SE_ISO) + addTrans("Georgian-Latin/ISO", &ID); + else if (option == SE_ALALC) + addTrans("Georgian-Latin/ALALC", &ID); + else if (option == SE_BGNPCGN) + addTrans("Georgian-Latin/BGNPCGN", &ID); + else if (option == SE_IPA) + addTrans("Georgian-IPA", &ID); + else { + addTrans("Georgian-Latin", &ID); + scripts[SE_LATIN] = true; + } + } + if (scripts[SE_ARMENIAN]) { + if (option == SE_ISO) + addTrans("Armenian-Latin/ISO", &ID); + else if (option == SE_ALALC) + addTrans("Armenian-Latin/ALALC", &ID); + else if (option == SE_BGNPCGN) + addTrans("Armenian-Latin/BGNPCGN", &ID); + else if (option == SE_IPA) + addTrans("Armenian-IPA", &ID); + else { + addTrans("Armenian-Latin", &ID); + scripts[SE_LATIN] = true; + } + } + if (scripts[SE_ETHIOPIC]) { + if (option == SE_UNGEGN) + addTrans("Ethiopic-Latin/UNGEGN", &ID); + else if (option == SE_ISO) + addTrans("Ethiopic-Latin/ISO", &ID); + else if (option == SE_ALALC) + addTrans("Ethiopic-Latin/ALALC", &ID); + else if (option == SE_SERA) + addTrans("Ethiopic-Latin/SERA", &ID); + else { + addTrans("Ethiopic-Latin", &ID); + scripts[SE_LATIN] = true; + } + } + if (scripts[SE_GOTHIC]) { + if (option == SE_BASICLATIN) + addTrans("Gothic-Latin/Basic", &ID); + else if (option == SE_IPA) + addTrans("Gothic-IPA", &ID); + else { + addTrans("Gothic-Latin", &ID); + scripts[SE_LATIN] = true; + } + } + if (scripts[SE_UGARITIC]) { + if (option == SE_SBL) + addTrans("Ugaritic-Latin/SBL", &ID); + else { + addTrans("Ugaritic-Latin", &ID); + scripts[SE_LATIN] = true; + } + } + if (scripts[SE_MEROITIC]) { + addTrans("Meroitic-Latin", &ID); + scripts[SE_LATIN] = true; + } + if (scripts[SE_LINEARB]) { + addTrans("LinearB-Latin", &ID); + scripts[SE_LATIN] = true; + } + if (scripts[SE_CYPRIOT]) { + addTrans("Cypriot-Latin", &ID); + scripts[SE_LATIN] = true; + } + if (scripts[SE_RUNIC]) { + addTrans("Runic-Latin", &ID); + scripts[SE_LATIN] = true; + } + if (scripts[SE_OGHAM]) { + addTrans("Ogham-Latin", &ID); + scripts[SE_LATIN] = true; + } + if (scripts[SE_THAANA]) { + if (option == SE_ALALC) + addTrans("Thaana-Latin/ALALC", &ID); + else if (option == SE_BGNPCGN) + addTrans("Thaana-Latin/BGNPCGN", &ID); + else { + addTrans("Thaana-Latin", &ID); + scripts[SE_LATIN] = true; + } + } + if (scripts[SE_GLAGOLITIC]) { + if (option == SE_ISO) + addTrans("Glagolitic-Latin/ISO", &ID); + else if (option == SE_ALALC) + addTrans("Glagolitic-Latin/ALALC", &ID); + else if (option == SE_ALALC) + addTrans("Glagolitic-Cyrillic", &ID); + else { + addTrans("Glagolitic-Latin", &ID); + scripts[SE_LATIN] = true; + } + } + if (scripts[SE_THAI]) { + addTrans("Thai-Latin", &ID); + scripts[SE_LATIN] = true; + } + if (scripts[SE_THAI]) { + addTrans("Thai-Latin", &ID); + scripts[SE_LATIN] = true; + } + + if (scripts[SE_HAN]) { + if (!strnicmp (((SWModule*)module)->Lang(), "ja", 2)) { + addTrans("Kanji-Romaji", &ID); + } + else { + addTrans("Han-Latin", &ID); + } + scripts[SE_LATIN] = true; + } + + // Inter-Kana and Kana to Latin transliterators + if (option == SE_HIRAGANA && scripts[SE_KATAKANA]) { + addTrans("Katakana-Hiragana", &ID); + scripts[SE_HIRAGANA] = true; + } + else if (option == SE_KATAKANA && scripts[SE_HIRAGANA]) { + addTrans("Hiragana-Katakana", &ID); + scripts[SE_KATAKANA] = true; + } + else { + if (scripts[SE_KATAKANA]) { + addTrans("Katakana-Latin", &ID); + scripts[SE_LATIN] = true; + } + if (scripts[SE_HIRAGANA]) { + addTrans("Hiragana-Latin", &ID); + scripts[SE_LATIN] = true; + } + } + + // Korean to Latin transliterators + if (scripts[SE_HANGUL]) { + addTrans("Hangul-Latin", &ID); + scripts[SE_LATIN] = true; + } + if (scripts[SE_JAMO]) { + addTrans("Jamo-Latin", &ID); + scripts[SE_LATIN] = true; + } + + // Indic-Latin + if (option < SE_DEVANAGARI || option > SE_MALAYALAM) { + // Indic to Latin + if (scripts[SE_TAMIL]) { + addTrans("Tamil-Latin", &ID); + scripts[SE_LATIN] = true; + } + if (scripts[SE_BENGALI]) { + addTrans("Bengali-Latin", &ID); + scripts[SE_LATIN] = true; + } + if (scripts[SE_GURMUKHI]) { + addTrans("Gurmukhi-Latin", &ID); + scripts[SE_LATIN] = true; + } + if (scripts[SE_GUJARATI]) { + addTrans("Gujarati-Latin", &ID); + scripts[SE_LATIN] = true; + } + if (scripts[SE_ORIYA]) { + addTrans("Oriya-Latin", &ID); + scripts[SE_LATIN] = true; + } + if (scripts[SE_TELUGU]) { + addTrans("Telugu-Latin", &ID); + scripts[SE_LATIN] = true; + } + if (scripts[SE_KANNADA]) { + addTrans("Kannada-Latin", &ID); + scripts[SE_LATIN] = true; + } + if (scripts[SE_MALAYALAM]) { + addTrans("Malayalam-Latin", &ID); + scripts[SE_LATIN] = true; + } + } + else { + if (scripts[SE_LATIN]) { + addTrans("Latin-InterIndic", &ID); + } + if (scripts[SE_DEVANAGARI]) { + addTrans("Devanagari-InterIndic", &ID); + } + if (scripts[SE_TAMIL]) { + addTrans("Tamil-InterIndic", &ID); + } + if (scripts[SE_BENGALI]) { + addTrans("Bengali-InterIndic", &ID); + } + if (scripts[SE_GURMUKHI]) { + addTrans("Gurmurkhi-InterIndic", &ID); + } + if (scripts[SE_GUJARATI]) { + addTrans("Gujarati-InterIndic", &ID); + } + if (scripts[SE_ORIYA]) { + addTrans("Oriya-InterIndic", &ID); + } + if (scripts[SE_TELUGU]) { + addTrans("Telugu-InterIndic", &ID); + } + if (scripts[SE_KANNADA]) { + addTrans("Kannada-InterIndic", &ID); + } + if (scripts[SE_MALAYALAM]) { + addTrans("Malayalam-InterIndic", &ID); + } + + switch(option) { + case SE_DEVANAGARI: + addTrans("InterIndic-Devanagari", &ID); + break; + case SE_TAMIL: + addTrans("InterIndic-Tamil", &ID); + break; + case SE_BENGALI: + addTrans("InterIndic-Bengali", &ID); + break; + case SE_GURMUKHI: + addTrans("InterIndic-Gurmukhi", &ID); + break; + case SE_GUJARATI: + addTrans("InterIndic-Gujarati", &ID); + break; + case SE_ORIYA: + addTrans("InterIndic-Oriya", &ID); + break; + case SE_TELUGU: + addTrans("InterIndic-Telugu", &ID); + break; + case SE_KANNADA: + addTrans("InterIndic-Kannada", &ID); + break; + case SE_MALAYALAM: + addTrans("InterIndic-Malayalam", &ID); + break; + default: + addTrans("InterIndic-Latin", &ID); + scripts[SE_LATIN] = true; + break; + } + } + +// if (scripts[SE_TENGWAR]) { +// addTrans("Tengwar-Latin", &ID); +// scripts[SE_LATIN] = true; +// } +// if (scripts[SE_CIRTH]) { +// addTrans("Cirth-Latin", &ID); +// scripts[SE_LATIN] = true; +// } + + if (scripts[SE_LATIN]) { + switch (option) { + case SE_GREEK: + addTrans("Latin-Greek", &ID); + break; + case SE_HEBREW: + addTrans("Latin-Hebrew", &ID); + break; + case SE_CYRILLIC: + addTrans("Latin-Cyrillic", &ID); + break; + case SE_ARABIC: + addTrans("Latin-Arabic", &ID); + break; + case SE_SYRIAC: + addTrans("Latin-Syriac", &ID); + break; + case SE_THAI: + addTrans("Latin-Thai", &ID); + break; + case SE_GEORGIAN: + addTrans("Latin-Georgian", &ID); + break; + case SE_ARMENIAN: + addTrans("Latin-Armenian", &ID); + break; + case SE_ETHIOPIC: + addTrans("Latin-Ethiopic", &ID); + break; + case SE_GOTHIC: + addTrans("Latin-Gothic", &ID); + break; + case SE_UGARITIC: + addTrans("Latin-Ugaritic", &ID); + break; + case SE_COPTIC: + addTrans("Latin-Coptic", &ID); + break; + case SE_KATAKANA: + addTrans("Latin-Katakana", &ID); + break; + case SE_HIRAGANA: + addTrans("Latin-Hiragana", &ID); + break; + case SE_JAMO: + addTrans("Latin-Jamo", &ID); + break; + case SE_HANGUL: + addTrans("Latin-Hangul", &ID); + break; + case SE_MEROITIC: + addTrans("Latin-Meroitic", &ID); + break; + case SE_LINEARB: + addTrans("Latin-LinearB", &ID); + break; + case SE_CYPRIOT: + addTrans("Latin-Cypriot", &ID); + break; + case SE_RUNIC: + addTrans("Latin-Runic", &ID); + break; + case SE_OGHAM: + addTrans("Latin-Ogham", &ID); + break; + case SE_THAANA: + addTrans("Latin-Thaana", &ID); + break; + case SE_GLAGOLITIC: + addTrans("Latin-Glagolitic", &ID); + break; +// case SE_TENGWAR: +// addTrans("Latin-Tengwar", &ID); +// break; +// case SE_CIRTH: +// addTrans("Latin-Cirth", &ID); +// break; + } + } + + if (option == SE_BASICLATIN) { + addTrans("Any-Latin1", &ID); + } + + addTrans("NFC", &ID); + + err = U_ZERO_ERROR; + Transliterator * trans = createTrans(UnicodeString(ID), UTRANS_FORWARD, err); + if (trans && !U_FAILURE(err)) { + UnicodeString target = UnicodeString(source); + trans->transliterate(target); + text.setSize(text.size()*2); + len = ucnv_fromUChars(conv, text.getRawData(), text.size(), target.getBuffer(), target.length(), &err); + text.setSize(len); + delete trans; + } + ucnv_close(conv); + } + return 0; +} + +SWORD_NAMESPACE_END +#endif + + + diff --git a/src/modules/filters/utf8utf16.cpp b/src/modules/filters/utf8utf16.cpp new file mode 100644 index 0000000..5c1614c --- /dev/null +++ b/src/modules/filters/utf8utf16.cpp @@ -0,0 +1,78 @@ +/****************************************************************************** + * + * UTF8UTF16 - SWFilter descendant to convert UTF-8 to UTF-16 + * + */ + +#include +#include + +#include +#include + +SWORD_NAMESPACE_START + +UTF8UTF16::UTF8UTF16() { +} + +char UTF8UTF16::processText(SWBuf &text, const SWKey *key, const SWModule *module) { + const unsigned char *from; + unsigned long ch; + signed short utf16; + unsigned char from2[7]; + + SWBuf orig = text; + + from = (const unsigned char *)orig.c_str(); + + // ------------------------------- + for (text = ""; *from; from++) { + ch = 0; + //case: ANSI + if ((*from & 128) != 128) { + text.setSize(text.size()+2); + *((unsigned short *)(text.getRawData()+(text.size()-2))) = (unsigned short)*from; + continue; + } + //case: Invalid UTF-8 (illegal continuing byte in initial position) + if ((*from & 128) && ((*from & 64) != 64)) { + continue; + } + //case: 2+ byte codepoint + from2[0] = *from; + from2[0] <<= 1; + int subsequent; + for (subsequent = 1; (from2[0] & 128) && (subsequent < 7); subsequent++) { + from2[0] <<= 1; + from2[subsequent] = from[subsequent]; + from2[subsequent] &= 63; + ch <<= 6; + ch |= from2[subsequent]; + } + subsequent--; + from2[0] <<= 1; + char significantFirstBits = 8 - (2+subsequent); + + ch |= (((short)from2[0]) << (((6*subsequent)+significantFirstBits)-8)); + from += subsequent; + if (ch < 0x10000) { + text.setSize(text.size()+2); + *((unsigned short *)(text.getRawData()+(text.size()-2))) = (unsigned short)ch; + } + else { + utf16 = (signed short)((ch - 0x10000) / 0x400 + 0xD800); + text.setSize(text.size()+2); + *((unsigned short *)(text.getRawData()+(text.size()-2))) = (unsigned short)utf16; + utf16 = (signed short)((ch - 0x10000) % 0x400 + 0xDC00); + text.setSize(text.size()+2); + *((unsigned short *)(text.getRawData()+(text.size()-2))) = (unsigned short)utf16; + } + } + text.setSize(text.size()+2); + *((unsigned short *)(text.getRawData()+(text.size()-2))) = (unsigned short)0; + + return 0; + +} + +SWORD_NAMESPACE_END -- cgit v1.2.3