summaryrefslogtreecommitdiff
path: root/src/modules/filters
diff options
context:
space:
mode:
authorRoberto C. Sanchez <roberto@connexer.com>2014-03-29 10:53:59 -0400
committerRoberto C. Sanchez <roberto@connexer.com>2014-03-29 10:53:59 -0400
commit03134fa5f6f25d92724ce4c183f9bbe12a9e37dc (patch)
tree847326a4de82f0241ac87cbbc427a1b92a696a02 /src/modules/filters
parentd7469385b05b9510338407fa123e9ad090f80af6 (diff)
Imported Upstream version 1.5.11
Diffstat (limited to 'src/modules/filters')
-rw-r--r--src/modules/filters/Makefile5
-rw-r--r--src/modules/filters/Makefile.am105
-rw-r--r--src/modules/filters/cipherfil.cpp46
-rw-r--r--src/modules/filters/gbffootnotes.cpp193
-rw-r--r--src/modules/filters/gbfheadings.cpp87
-rw-r--r--src/modules/filters/gbfhtml.cpp181
-rw-r--r--src/modules/filters/gbfhtmlhref.cpp288
-rw-r--r--src/modules/filters/gbfmorph.cpp77
-rw-r--r--src/modules/filters/gbfosis.cpp420
-rw-r--r--src/modules/filters/gbfplain.cpp97
-rw-r--r--src/modules/filters/gbfredletterwords.cpp93
-rw-r--r--src/modules/filters/gbfrtf.cpp311
-rw-r--r--src/modules/filters/gbfstrongs.cpp126
-rw-r--r--src/modules/filters/gbfthml.cpp216
-rw-r--r--src/modules/filters/gbfwebif.cpp191
-rw-r--r--src/modules/filters/gbfwordjs.cpp282
-rw-r--r--src/modules/filters/greeklexattribs.cpp101
-rw-r--r--src/modules/filters/latin1utf16.cpp119
-rw-r--r--src/modules/filters/latin1utf8.cpp173
-rw-r--r--src/modules/filters/osisfootnotes.cpp157
-rw-r--r--src/modules/filters/osisheadings.cpp144
-rw-r--r--src/modules/filters/osishtmlhref.cpp561
-rw-r--r--src/modules/filters/osislemma.cpp85
-rw-r--r--src/modules/filters/osismorph.cpp85
-rw-r--r--src/modules/filters/osismorphsegmentation.cpp106
-rw-r--r--src/modules/filters/osisosis.cpp173
-rw-r--r--src/modules/filters/osisplain.cpp192
-rw-r--r--src/modules/filters/osisredletterwords.cpp85
-rw-r--r--src/modules/filters/osisrtf.cpp520
-rw-r--r--src/modules/filters/osisscripref.cpp100
-rw-r--r--src/modules/filters/osisstrongs.cpp257
-rw-r--r--src/modules/filters/osisvariants.cpp118
-rw-r--r--src/modules/filters/osiswebif.cpp198
-rw-r--r--src/modules/filters/osiswordjs.cpp178
-rw-r--r--src/modules/filters/papyriplain.cpp71
-rw-r--r--src/modules/filters/plainfootnotes.cpp79
-rw-r--r--src/modules/filters/plainhtml.cpp83
-rw-r--r--src/modules/filters/rtfhtml.cpp81
-rw-r--r--src/modules/filters/scsuutf8.cpp226
-rw-r--r--src/modules/filters/swbasicfilter.cpp406
-rw-r--r--src/modules/filters/swoptfilter.cpp47
-rw-r--r--src/modules/filters/teihtmlhref.cpp205
-rw-r--r--src/modules/filters/teiplain.cpp116
-rw-r--r--src/modules/filters/teirtf.cpp182
-rw-r--r--src/modules/filters/thmlfootnotes.cpp124
-rw-r--r--src/modules/filters/thmlgbf.cpp291
-rw-r--r--src/modules/filters/thmlheadings.cpp153
-rw-r--r--src/modules/filters/thmlhtml.cpp236
-rw-r--r--src/modules/filters/thmlhtmlhref.cpp357
-rw-r--r--src/modules/filters/thmllemma.cpp65
-rw-r--r--src/modules/filters/thmlmorph.cpp65
-rw-r--r--src/modules/filters/thmlosis.cpp575
-rw-r--r--src/modules/filters/thmlplain.cpp219
-rw-r--r--src/modules/filters/thmlrtf.cpp346
-rw-r--r--src/modules/filters/thmlscripref.cpp123
-rw-r--r--src/modules/filters/thmlstrongs.cpp146
-rw-r--r--src/modules/filters/thmlvariants.cpp118
-rw-r--r--src/modules/filters/thmlwebif.cpp103
-rw-r--r--src/modules/filters/thmlwordjs.cpp296
-rw-r--r--src/modules/filters/unicodertf.cpp87
-rw-r--r--src/modules/filters/utf16utf8.cpp90
-rw-r--r--src/modules/filters/utf8arshaping.cpp51
-rw-r--r--src/modules/filters/utf8bidireorder.cpp60
-rw-r--r--src/modules/filters/utf8cantillation.cpp55
-rw-r--r--src/modules/filters/utf8greekaccents.cpp261
-rw-r--r--src/modules/filters/utf8hebrewpoints.cpp44
-rw-r--r--src/modules/filters/utf8html.cpp70
-rw-r--r--src/modules/filters/utf8latin1.cpp75
-rw-r--r--src/modules/filters/utf8nfc.cpp50
-rw-r--r--src/modules/filters/utf8nfkd.cpp52
-rw-r--r--src/modules/filters/utf8transliterator.cpp888
-rw-r--r--src/modules/filters/utf8utf16.cpp78
72 files changed, 12644 insertions, 0 deletions
diff --git a/src/modules/filters/Makefile b/src/modules/filters/Makefile
new file mode 100644
index 0000000..1a2d00d
--- /dev/null
+++ b/src/modules/filters/Makefile
@@ -0,0 +1,5 @@
+
+root := ../../..
+
+all:
+ make -C ${root}
diff --git a/src/modules/filters/Makefile.am b/src/modules/filters/Makefile.am
new file mode 100644
index 0000000..7092c73
--- /dev/null
+++ b/src/modules/filters/Makefile.am
@@ -0,0 +1,105 @@
+filtersdir = $(top_srcdir)/src/modules/filters
+
+libsword_la_SOURCES += $(filtersdir)/swbasicfilter.cpp
+libsword_la_SOURCES += $(filtersdir)/swoptfilter.cpp
+
+GBFFIL = $(filtersdir)/gbfhtml.cpp
+GBFFIL += $(filtersdir)/gbfhtmlhref.cpp
+GBFFIL += $(filtersdir)/gbfwebif.cpp
+GBFFIL += $(filtersdir)/gbfplain.cpp
+GBFFIL += $(filtersdir)/gbfrtf.cpp
+GBFFIL += $(filtersdir)/gbfstrongs.cpp
+GBFFIL += $(filtersdir)/gbffootnotes.cpp
+GBFFIL += $(filtersdir)/gbfheadings.cpp
+GBFFIL += $(filtersdir)/gbfredletterwords.cpp
+GBFFIL += $(filtersdir)/gbfmorph.cpp
+GBFFIL += $(filtersdir)/gbfwordjs.cpp
+
+THMLFIL = $(filtersdir)/thmlstrongs.cpp
+THMLFIL += $(filtersdir)/thmlfootnotes.cpp
+THMLFIL += $(filtersdir)/thmlheadings.cpp
+THMLFIL += $(filtersdir)/thmlmorph.cpp
+THMLFIL += $(filtersdir)/thmllemma.cpp
+THMLFIL += $(filtersdir)/thmlscripref.cpp
+THMLFIL += $(filtersdir)/thmlvariants.cpp
+THMLFIL += $(filtersdir)/thmlgbf.cpp
+THMLFIL += $(filtersdir)/thmlrtf.cpp
+THMLFIL += $(filtersdir)/thmlhtml.cpp
+THMLFIL += $(filtersdir)/thmlhtmlhref.cpp
+THMLFIL += $(filtersdir)/thmlwebif.cpp
+THMLFIL += $(filtersdir)/thmlwordjs.cpp
+
+TEIFIL = $(filtersdir)/teiplain.cpp
+TEIFIL += $(filtersdir)/teirtf.cpp
+TEIFIL += $(filtersdir)/teihtmlhref.cpp
+
+CONVFIL = $(filtersdir)/gbfthml.cpp
+CONVFIL += $(filtersdir)/gbfosis.cpp
+CONVFIL += $(filtersdir)/thmlosis.cpp
+CONVFIL += $(filtersdir)/thmlplain.cpp
+CONVFIL += $(filtersdir)/osisosis.cpp
+
+OSISFIL = $(filtersdir)/osisheadings.cpp
+OSISFIL += $(filtersdir)/osisfootnotes.cpp
+OSISFIL += $(filtersdir)/osishtmlhref.cpp
+OSISFIL += $(filtersdir)/osiswebif.cpp
+OSISFIL += $(filtersdir)/osismorph.cpp
+OSISFIL += $(filtersdir)/osisstrongs.cpp
+OSISFIL += $(filtersdir)/osisplain.cpp
+OSISFIL += $(filtersdir)/osisrtf.cpp
+OSISFIL += $(filtersdir)/osislemma.cpp
+OSISFIL += $(filtersdir)/osisredletterwords.cpp
+OSISFIL += $(filtersdir)/osisscripref.cpp
+OSISFIL += $(filtersdir)/osisvariants.cpp
+OSISFIL += $(filtersdir)/osiswordjs.cpp
+OSISFIL += $(filtersdir)/osismorphsegmentation.cpp
+
+libsword_la_SOURCES += $(filtersdir)/latin1utf8.cpp
+libsword_la_SOURCES += $(filtersdir)/latin1utf16.cpp
+libsword_la_SOURCES += $(filtersdir)/utf8utf16.cpp
+libsword_la_SOURCES += $(filtersdir)/utf16utf8.cpp
+libsword_la_SOURCES += $(filtersdir)/scsuutf8.cpp
+libsword_la_SOURCES += $(filtersdir)/utf8html.cpp
+libsword_la_SOURCES += $(filtersdir)/utf8latin1.cpp
+
+libsword_la_SOURCES += $(filtersdir)/utf8cantillation.cpp
+libsword_la_SOURCES += $(filtersdir)/utf8hebrewpoints.cpp
+libsword_la_SOURCES += $(filtersdir)/utf8greekaccents.cpp
+
+libsword_la_SOURCES += $(filtersdir)/cipherfil.cpp
+
+PLFIL = $(filtersdir)/rtfhtml.cpp
+PLFIL += $(filtersdir)/plainfootnotes.cpp
+PLFIL += $(filtersdir)/plainhtml.cpp
+PLFIL += $(filtersdir)/greeklexattribs.cpp
+PLFIL += $(filtersdir)/unicodertf.cpp
+PLFIL += $(filtersdir)/papyriplain.cpp
+
+
+SWICUSRC = $(filtersdir)/utf8transliterator.cpp
+SWICUSRC += $(filtersdir)/utf8nfc.cpp
+SWICUSRC += $(filtersdir)/utf8nfkd.cpp
+SWICUSRC += $(filtersdir)/utf8arshaping.cpp
+SWICUSRC += $(filtersdir)/utf8bidireorder.cpp
+
+if ICU
+ICUDEFS = -D_ICU_
+DISTSWICUSRC =
+SWREALICUSRC = $(SWICUSRC)
+else
+if ICUSWORD
+ICUDEFS = -D_ICU_ -D_ICUSWORD_
+DISTSWICUSRC =
+SWREALICUSRC = $(SWICUSRC)
+else
+DISTSWICUSRC = $(SWICUSRC)
+SWREALICUSRC =
+endif
+endif
+
+AM_CPPFLAGS += $(ICUDEFS)
+libsword_la_SOURCES += $(SWREALICUSRC)
+EXTRA_DIST = $(DISTSWICUSRC)
+
+libsword_la_SOURCES += $(OSISFIL) $(GBFFIL) \
+ $(THMLFIL) $(CONVFIL) $(PLFIL) $(TEIFIL)
diff --git a/src/modules/filters/cipherfil.cpp b/src/modules/filters/cipherfil.cpp
new file mode 100644
index 0000000..24c665e
--- /dev/null
+++ b/src/modules/filters/cipherfil.cpp
@@ -0,0 +1,46 @@
+/******************************************************************************
+ *
+ * cipherfil - SWFilter descendant to decipher a module
+ */
+
+
+#include <stdlib.h>
+#include <cipherfil.h>
+#include <swcipher.h>
+#include <swbuf.h>
+
+SWORD_NAMESPACE_START
+
+CipherFilter::CipherFilter(const char *key) {
+ cipher = new SWCipher((unsigned char *)key);
+}
+
+
+CipherFilter::~CipherFilter() {
+ delete cipher;
+}
+
+
+SWCipher *CipherFilter::getCipher() {
+ return cipher;
+}
+
+
+char CipherFilter::processText(SWBuf &text, const SWKey *key, const SWModule *module) {
+ if (text.length() > 2) { //check if it's large enough to substract 2 in the next step.
+ unsigned long len = text.length();
+ if (!key) { // hack, using key to determine encipher, or decipher
+ cipher->cipherBuf(&len, text.getRawData()); //set buffer to enciphered text
+ memcpy(text.getRawData(), cipher->Buf(), len);
+// text = cipher->Buf(); //get the deciphered buffer
+ }
+ else if ((unsigned long)key == 1) {
+ cipher->Buf(text.getRawData(), len);
+ memcpy(text.getRawData(), cipher->cipherBuf(&len), len);
+// text = cipher->cipherBuf(&len);
+ }
+ }
+ return 0;
+}
+
+SWORD_NAMESPACE_END
diff --git a/src/modules/filters/gbffootnotes.cpp b/src/modules/filters/gbffootnotes.cpp
new file mode 100644
index 0000000..bef29b8
--- /dev/null
+++ b/src/modules/filters/gbffootnotes.cpp
@@ -0,0 +1,193 @@
+/******************************************************************************
+ *
+ * gbffootnotes - SWFilter descendant to hide or show footnotes
+ * in a GBF module.
+ */
+
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <gbffootnotes.h>
+#include <swmodule.h>
+#include <swbuf.h>
+#include <versekey.h>
+#include <utilxml.h>
+
+SWORD_NAMESPACE_START
+
+const char oName[] = "Footnotes";
+const char oTip[] = "Toggles Footnotes On and Off if they exist";
+
+const SWBuf choices[3] = {"Off", "On", ""};
+const StringList oValues(&choices[0], &choices[2]);
+
+
+GBFFootnotes::GBFFootnotes() : SWOptionFilter(oName, oTip, &oValues) {
+ setOptionValue("Off");
+}
+
+
+GBFFootnotes::~GBFFootnotes() {
+}
+
+
+char GBFFootnotes::processText (SWBuf &text, const SWKey *key, const SWModule *module)
+{
+
+ SWBuf token;
+ bool intoken = false;
+ bool hide = false;
+ SWBuf tagText;
+ XMLTag startTag;
+ SWBuf refs = "";
+ int footnoteNum = 1;
+ char buf[254];
+ VerseKey parser = key->getText();
+
+ SWBuf orig = text;
+ const char *from = orig.c_str();
+
+ //XMLTag tag;
+
+ for (text = ""; *from; from++) {
+ if (*from == '<') {
+ intoken = true;
+ token = "";
+ continue;
+ }
+ if (*from == '>') { // process tokens
+ intoken = false;
+
+ //XMLTag tag(token);
+ if (!strncmp(token, "RF",2)) {
+// tag = token;
+
+ refs = "";
+ startTag = token;
+ hide = true;
+ tagText = "";
+ continue;
+ }
+ else if (!strncmp(token, "Rf",2)) {
+ if (module->isProcessEntryAttributes()) {
+ //tag = token;
+
+ if((tagText.length() == 1) || !strcmp(module->Name(), "IGNT")) {
+ if (option) { // for ASV marks text in verse then put explanation at end of verse
+ text.append(" <FS>[");
+ text.append(tagText);
+ text.append("]<Fs>");
+ hide = false;
+ continue;
+ }
+ }
+ SWBuf fc = module->getEntryAttributes()["Footnote"]["count"]["value"];
+ footnoteNum = (fc.length()) ? atoi(fc.c_str()) : 0;
+ sprintf(buf, "%i", ++footnoteNum);
+ module->getEntryAttributes()["Footnote"]["count"]["value"] = buf;
+ StringList attributes = startTag.getAttributeNames();
+ for (StringList::const_iterator it = attributes.begin(); it != attributes.end(); it++) {
+ module->getEntryAttributes()["Footnote"][buf][it->c_str()] = startTag.getAttribute(it->c_str());
+ }
+ module->getEntryAttributes()["Footnote"][buf]["body"] = tagText;
+ startTag.setAttribute("swordFootnote", buf);
+ }
+ hide = false;
+ if (option) {
+ text.append(startTag);
+ text.append(tagText);
+ }
+ else continue;
+ }
+ if (!hide) {
+ text.append('<');
+ text.append(token);
+ text.append('>');
+ }
+ else {
+ tagText.append('<');
+ tagText.append(token);
+ tagText.append('>');
+ }
+ continue;
+ }
+ if (intoken) { //copy token
+ token.append(*from);
+ }
+ else if (!hide) { //copy text which is not inside a token
+ text.append(*from);
+ }
+ else tagText.append(*from);
+ }
+ return 0;
+
+ /*
+ if (!option) { // if we don't want footnotes
+ char token[4096]; // cheese. Fix.
+ int tokpos = 0;
+ bool intoken = false;
+ int len;
+ bool hide = false;
+
+ const char *from;
+ SWBuf orig = text;
+ from = orig.c_str();
+ for (text = ""; *from; from++) {
+ if (*from == '<') {
+ intoken = true;
+ tokpos = 0;
+// memset(token, 0, 4096);
+ token[0] = 0;
+ token[1] = 0;
+ token[2] = 0;
+ continue;
+ }
+ if (*from == '>') { // process tokens
+ intoken = false;
+ switch (*token) {
+ case 'R': // Reference
+ switch(token[1]) {
+ case 'F': // Begin footnote
+ hide = true;
+ break;
+ case 'f': // end footnote
+ hide = false;
+ break;
+ }
+ continue; // skip token
+ case 'W':
+ if (token[1] == 'T') {
+ switch (token[2]) {
+ case 'P':
+ case 'S':
+ case 'A':
+ continue; // remove this token
+ default:
+ break;
+ }
+ }
+ }
+ // if not a footnote token, keep token in text
+ if (!hide) {
+ text += '<';
+ text += token;
+ text += '>';
+ }
+ continue;
+ }
+ if (intoken) {
+ if (tokpos < 4090)
+ token[tokpos++] = *from;
+ token[tokpos+2] = 0; // +2 cuz we init token with 2 extra '0' because of switch statement
+ }
+ else {
+ if (!hide) {
+ text += *from;
+ }
+ }
+ }
+ }
+ return 0;*/
+}
+
+SWORD_NAMESPACE_END
diff --git a/src/modules/filters/gbfheadings.cpp b/src/modules/filters/gbfheadings.cpp
new file mode 100644
index 0000000..81a4d94
--- /dev/null
+++ b/src/modules/filters/gbfheadings.cpp
@@ -0,0 +1,87 @@
+/******************************************************************************
+ *
+ * gbfheadings - SWFilter descendant to hide or show headings
+ * in a GBF module.
+ */
+
+
+#include <stdlib.h>
+#include <gbfheadings.h>
+
+SWORD_NAMESPACE_START
+
+
+const char oName[] = "Headings";
+const char oTip[] = "Toggles Headings On and Off if they exist";
+
+const SWBuf choices[3] = {"Off", "On", ""};
+const StringList oValues(&choices[0], &choices[2]);
+
+GBFHeadings::GBFHeadings() : SWOptionFilter(oName, oTip, &oValues) {
+ setOptionValue("Off");
+}
+
+
+GBFHeadings::~GBFHeadings() {
+}
+
+
+char GBFHeadings::processText (SWBuf &text, const SWKey *key, const SWModule *module) {
+ if (!option) { // if we don't want headings
+ char token[2048]; // cheese. Fix.
+ int tokpos = 0;
+ bool intoken = false;
+ bool hide = false;
+
+ const char *from;
+ SWBuf orig = text;
+ from = orig.c_str();
+ for (text = ""; *from; from++) {
+ if (*from == '<') {
+ intoken = true;
+ tokpos = 0;
+// memset(token, 0, 2048);
+ token[0] = 0;
+ token[1] = 0;
+ token[2] = 0;
+ continue;
+ }
+ if (*from == '>') { // process tokens
+ intoken = false;
+ switch (*token) {
+ case 'T': // Reference
+ switch(token[1]) {
+ case 'S': // Begin heading
+ hide = true;
+ break;
+ case 's': // end heading
+ hide = false;
+ break;
+ }
+ continue; // skip token
+ }
+ // if not a heading token, keep token in text
+ if (!hide) {
+ text += '<';
+ for (char *tok = token; *tok; tok++)
+ text += *tok;
+ text += '>';
+ }
+ continue;
+ }
+ if (intoken) {
+ if (tokpos < 2045)
+ token[tokpos++] = *from;
+ token[tokpos+2] = 0;
+ }
+ else {
+ if (!hide) {
+ text += *from;
+ }
+ }
+ }
+ }
+ return 0;
+}
+
+SWORD_NAMESPACE_END
diff --git a/src/modules/filters/gbfhtml.cpp b/src/modules/filters/gbfhtml.cpp
new file mode 100644
index 0000000..a9d8434
--- /dev/null
+++ b/src/modules/filters/gbfhtml.cpp
@@ -0,0 +1,181 @@
+/***************************************************************************
+ gbfhtml.cpp - GBF to HTML filter
+ -------------------
+ begin : 2001-09-03
+ copyright : 2001 by CrossWire Bible Society
+ ***************************************************************************/
+
+/***************************************************************************
+ * *
+ * This program is free software; you can redistribute it and/or modify *
+ * it under the terms of the GNU General Public License as published by *
+ * the Free Software Foundation; either version 2 of the License, or *
+ * (at your option) any later version. *
+ * *
+ ***************************************************************************/
+
+#include <stdlib.h>
+#include <gbfhtml.h>
+#include <ctype.h>
+
+SWORD_NAMESPACE_START
+
+GBFHTML::GBFHTML() {
+ setTokenStart("<");
+ setTokenEnd(">");
+
+ setTokenCaseSensitive(true);
+
+ addTokenSubstitute("Rf", ")</small></font>");
+ addTokenSubstitute("Rx", "</a>");
+ addTokenSubstitute("FI", "<i>"); // italics begin
+ addTokenSubstitute("Fi", "</i>");
+ addTokenSubstitute("FB", "<n>"); // bold begin
+ addTokenSubstitute("Fb", "</n>");
+ addTokenSubstitute("FR", "<font color=\"#FF0000\">"); // words of Jesus begin
+ addTokenSubstitute("Fr", "</font>");
+ addTokenSubstitute("FU", "<u>"); // underline begin
+ addTokenSubstitute("Fu", "</u>");
+ addTokenSubstitute("FO", "<cite>"); // Old Testament quote begin
+ addTokenSubstitute("Fo", "</cite>");
+ addTokenSubstitute("FS", "<sup>"); // Superscript begin// Subscript begin
+ addTokenSubstitute("Fs", "</sup>");
+ addTokenSubstitute("FV", "<sub>"); // Subscript begin
+ addTokenSubstitute("Fv", "</sub>");
+ addTokenSubstitute("TT", "<big>"); // Book title begin
+ addTokenSubstitute("Tt", "</big>");
+ addTokenSubstitute("PP", "<cite>"); // poetry begin
+ addTokenSubstitute("Pp", "</cite>");
+ addTokenSubstitute("Fn", "</font>"); // font end
+ addTokenSubstitute("CL", "<br />"); // new line
+ addTokenSubstitute("CM", "<!P><br />"); // paragraph <!P> is a non showing comment that can be changed in the front end to <P> if desired
+ addTokenSubstitute("CG", ""); // ???
+ addTokenSubstitute("CT", ""); // ???
+ addTokenSubstitute("JR", "<div align=\"right\">"); // right align begin
+ addTokenSubstitute("JC", "<div align=\"center\">"); // center align begin
+ addTokenSubstitute("JL", "</div>"); // align end
+
+}
+
+
+bool GBFHTML::handleToken(SWBuf &buf, const char *token, BasicFilterUserData *userData) {
+ const char *tok;
+ char val[128];
+ char *valto;
+ const char *num;
+ MyUserData *u = (MyUserData *)userData;
+
+ if (!substituteToken(buf, token)) {
+ // deal with OSIS note tags. Just hide till OSISRTF
+ if (!strncmp(token, "note ", 5)) {
+ // let's stop text from going to output
+ u->suspendTextPassThru = true;
+ }
+
+ else if (!strncmp(token, "/note", 5)) {
+ u->suspendTextPassThru = false;
+ }
+
+ else if (!strncmp(token, "w", 1)) {
+ // OSIS Word (temporary until OSISRTF is done)
+ valto = val;
+ num = strstr(token, "lemma=\"x-Strongs:");
+ if (num) {
+ for (num+=17; ((*num) && (*num != '\"')); num++)
+ *valto++ = *num;
+ *valto = 0;
+ if (atoi((!isdigit(*val))?val+1:val) < 5627) {
+ buf += " <small><em>&lt;";
+ for (tok = (!isdigit(*val))?val+1:val; *tok; tok++)
+ buf += *tok;
+ buf += "&gt;</em></small> ";
+ }
+ } else {
+ num = strstr(token, "lemma=\"strong:");
+ if (num) {
+ for (num+=14; ((*num) && (*num != '\"')); num++)
+ *valto++ = *num;
+ *valto = 0;
+ if (atoi((!isdigit(*val))?val+1:val) < 5627) {
+ buf += " <small><em>&lt;";
+ for (tok = (!isdigit(*val))?val+1:val; *tok; tok++)
+ buf += *tok;
+ buf += "&gt;</em></small> ";
+ }
+ }
+ }
+ valto = val;
+ num = strstr(token, "morph=\"x-Robinson:");
+ if (num) {
+ for (num+=18; ((*num) && (*num != '\"')); num++)
+ *valto++ = *num;
+ *valto = 0;
+ // normal robinsons tense
+ buf += " <small><em>(";
+ for (tok = val; *tok; tok++)
+ buf += *tok;
+ buf += ")</em></small> ";
+ }
+ }
+
+ else if (!strncmp(token, "WG", 2) || !strncmp(token, "WH", 2)) { // strong's numbers
+ buf += " <small><em>&lt;";
+ for (tok = token + 2; *tok; tok++)
+ buf += *tok;
+ buf += "&gt;</em></small> ";
+ }
+
+ else if (!strncmp(token, "WTG", 3) || !strncmp(token, "WTH", 3)) { // strong's numbers tense
+ buf += " <small><em>&lt;";
+ for (tok = token + 3; *tok; tok++)
+ if(*tok != '\"')
+ buf += *tok;
+ buf += ")</em></small> ";
+ }
+
+ else if (!strncmp(token, "RX", 2)) {
+ buf += "<i>";
+ for (tok = token + 3; *tok; tok++) {
+ if(*tok != '<' && *tok+1 != 'R' && *tok+2 != 'x') {
+ buf += *tok;
+ }
+ else {
+ break;
+ }
+ }
+ buf += "</i>";
+ }
+
+ else if (!strncmp(token, "RB", 2)) {
+ buf += "<i>";
+ u->hasFootnotePreTag = true;
+ }
+
+ else if (!strncmp(token, "RF", 2)) {
+ if (u->hasFootnotePreTag) {
+ u->hasFootnotePreTag = false;
+ buf += "</i> ";
+ }
+ buf += "<font color=\"#800000\"><small> (";
+ }
+
+ else if (!strncmp(token, "FN", 2)) {
+ buf += "<font face=\"";
+ for (tok = token + 2; *tok; tok++)
+ if(*tok != '\"')
+ buf += *tok;
+ buf += "\">";
+ }
+
+ else if (!strncmp(token, "CA", 2)) { // ASCII value
+ buf += (char)atoi(&token[2]);
+ }
+
+ else {
+ return false;
+ }
+ }
+ return true;
+}
+
+SWORD_NAMESPACE_END
diff --git a/src/modules/filters/gbfhtmlhref.cpp b/src/modules/filters/gbfhtmlhref.cpp
new file mode 100644
index 0000000..7f1c254
--- /dev/null
+++ b/src/modules/filters/gbfhtmlhref.cpp
@@ -0,0 +1,288 @@
+/***************************************************************************
+ gbfhtmlhref.cpp - GBF to HTML filter with hrefs
+ for strongs and morph tags
+ -------------------
+ begin : 2001-09-03
+ copyright : 2001 by CrossWire Bible Society
+ ***************************************************************************/
+
+/***************************************************************************
+ * *
+ * This program is free software; you can redistribute it and/or modify *
+ * it under the terms of the GNU General Public License as published by *
+ * the Free Software Foundation; either version 2 of the License, or *
+ * (at your option) any later version. *
+ * *
+ ***************************************************************************/
+
+#include <stdlib.h>
+#include <gbfhtmlhref.h>
+#include <swmodule.h>
+#include <utilxml.h>
+#include <versekey.h>
+#include <ctype.h>
+#include <url.h>
+
+SWORD_NAMESPACE_START
+
+GBFHTMLHREF::MyUserData::MyUserData(const SWModule *module, const SWKey *key) : BasicFilterUserData(module, key) {
+ if (module) {
+ version = module->Name();
+ }
+}
+
+GBFHTMLHREF::GBFHTMLHREF() {
+ setTokenStart("<");
+ setTokenEnd(">");
+
+ setTokenCaseSensitive(true);
+
+ //addTokenSubstitute("Rf", ")</small></font>");
+ addTokenSubstitute("FA", "<font color=\"#800000\">"); // for ASV footnotes to mark text
+ addTokenSubstitute("Rx", "</a>");
+ addTokenSubstitute("FI", "<i>"); // italics begin
+ addTokenSubstitute("Fi", "</i>");
+ addTokenSubstitute("FB", "<b>"); // bold begin
+ addTokenSubstitute("Fb", "</b>");
+ addTokenSubstitute("FR", "<font color=\"#FF0000\">"); // words of Jesus begin
+ addTokenSubstitute("Fr", "</font>");
+ addTokenSubstitute("FU", "<u>"); // underline begin
+ addTokenSubstitute("Fu", "</u>");
+ addTokenSubstitute("FO", "<cite>"); // Old Testament quote begin
+ addTokenSubstitute("Fo", "</cite>");
+ addTokenSubstitute("FS", "<sup>"); // Superscript begin// Subscript begin
+ addTokenSubstitute("Fs", "</sup>");
+ addTokenSubstitute("FV", "<sub>"); // Subscript begin
+ addTokenSubstitute("Fv", "</sub>");
+ addTokenSubstitute("TT", "<big>"); // Book title begin
+ addTokenSubstitute("Tt", "</big>");
+ addTokenSubstitute("PP", "<cite>"); // poetry begin
+ addTokenSubstitute("Pp", "</cite>");
+ addTokenSubstitute("Fn", "</font>"); // font end
+ addTokenSubstitute("CL", "<br />"); // new line
+ addTokenSubstitute("CM", "<!P><br />"); // paragraph <!P> is a non showing comment that can be changed in the front end to <P> if desired
+ addTokenSubstitute("CG", ""); // ???
+ addTokenSubstitute("CT", ""); // ???
+ addTokenSubstitute("JR", "<div align=\"right\">"); // right align begin
+ addTokenSubstitute("JC", "<div align=\"center\">"); // center align begin
+ addTokenSubstitute("JL", "</div>"); // align end
+
+}
+
+
+bool GBFHTMLHREF::handleToken(SWBuf &buf, const char *token, BasicFilterUserData *userData) {
+ const char *tok;
+ MyUserData *u = (MyUserData *)userData;
+
+ if (!substituteToken(buf, token)) {
+ XMLTag tag(token);
+ /*if (!strncmp(token, "w", 1)) {
+ // OSIS Word (temporary until OSISRTF is done)
+ valto = val;
+ num = strstr(token, "lemma=\"x-Strongs:");
+ if (num) {
+ for (num+=17; ((*num) && (*num != '\"')); num++)
+ *valto++ = *num;
+ *valto = 0;
+ if (atoi((!isdigit(*val))?val+1:val) < 5627) {
+ buf += " <small><em>&lt;<a href=\"type=Strongs value=";
+ for (tok = val; *tok; tok++)
+ buf += *tok;
+ buf += "\">";
+ for (tok = (!isdigit(*val))?val+1:val; *tok; tok++)
+ buf += *tok;
+ buf += "</a>&gt;</em></small> ";
+ //cout << buf;
+
+ }
+ // forget these for now
+ //else {
+ // verb morph
+ //sprintf(wordstr, "%03d", word-1);
+ //module->getEntryAttributes()["Word"][wordstr]["Morph"] = val;
+ //}
+ }
+ else {
+ num = strstr(token, "lemma=\"strong:");
+ if (num) {
+ for (num+=14; ((*num) && (*num != '\"')); num++)
+ *valto++ = *num;
+ *valto = 0;
+ if (atoi((!isdigit(*val))?val+1:val) < 5627) {
+ buf += " <small><em>&lt;<a href=\"type=Strongs value=";
+ for (tok = val; *tok; tok++)
+ buf += *tok;
+ buf += "\">";
+ for (tok = (!isdigit(*val))?val+1:val; *tok; tok++)
+ buf += *tok;
+ buf += "</a>&gt;</em></small> ";
+ //cout << buf;
+
+ }
+ // forget these for now
+ //else {
+ // verb morph
+ //sprintf(wordstr, "%03d", word-1);
+ //module->getEntryAttributes()["Word"][wordstr]["Morph"] = val;
+ //}
+ }
+ }
+ valto = val;
+ num = strstr(token, "morph=\"x-Robinson:");
+ if (num) {
+ for (num+=18; ((*num) && (*num != '\"')); num++)
+ *valto++ = *num;
+ *valto = 0;
+ buf += " <small><em>(<a href=\"type=morph class=Robinson value=";
+ for (tok = val; *tok; tok++)
+ // normal robinsons tense
+ buf += *tok;
+ buf += "\">";
+ for (tok = val; *tok; tok++)
+ //if(*tok != '\"')
+ buf += *tok;
+ buf += "</a>)</em></small> ";
+ }
+ }*/
+
+ // else
+ if (!strncmp(token, "WG", 2)) { // strong's numbers
+ //buf += " <small><em>&lt;<a href=\"type=Strongs value=";
+ buf += " <small><em>&lt;<a href=\"passagestudy.jsp?action=showStrongs&type=Greek&value=";
+ for (tok = token+2; *tok; tok++)
+ //if(token[i] != '\"')
+ buf += *tok;
+ buf += "\">";
+ for (tok = token + 2; *tok; tok++)
+ //if(token[i] != '\"')
+ buf += *tok;
+ buf += "</a>&gt;</em></small>";
+ }
+ else if (!strncmp(token, "WH", 2)) { // strong's numbers
+ //buf += " <small><em>&lt;<a href=\"type=Strongs value=";
+ buf += " <small><em>&lt;<a href=\"passagestudy.jsp?action=showStrongs&type=Hebrew&value=";
+ for (tok = token+2; *tok; tok++)
+ //if(token[i] != '\"')
+ buf += *tok;
+ buf += "\">";
+ for (tok = token + 2; *tok; tok++)
+ //if(token[i] != '\"')
+ buf += *tok;
+ buf += "</a>&gt;</em></small>";
+ }
+ else if (!strncmp(token, "WTG", 3)) { // strong's numbers tense
+ //buf += " <small><em>(<a href=\"type=Strongs value=";
+ buf += " <small><em>(<a href=\"passagestudy.jsp?action=showStrongs&type=Greek&value=";
+ for (tok = token + 3; *tok; tok++)
+ if(*tok != '\"')
+ buf += *tok;
+ buf += "\">";
+ for (tok = token + 3; *tok; tok++)
+ if(*tok != '\"')
+ buf += *tok;
+ buf += "</a>)</em></small>";
+ }
+ else if (!strncmp(token, "WTH", 3)) { // strong's numbers tense
+ //buf += " <small><em>(<a href=\"type=Strongs value=";
+ buf += " <small><em>(<a href=\"passagestudy.jsp?action=showStrongs&type=Hebrew&value=";
+ for (tok = token + 3; *tok; tok++)
+ if(*tok != '\"')
+ buf += *tok;
+ buf += "\">";
+ for (tok = token + 3; *tok; tok++)
+ if(*tok != '\"')
+ buf += *tok;
+ buf += "</a>)</em></small>";
+ }
+
+ else if (!strncmp(token, "WT", 2) && strncmp(token, "WTH", 3) && strncmp(token, "WTG", 3)) { // morph tags
+ //buf += " <small><em>(<a href=\"type=morph class=none value=";
+ buf += " <small><em>(<a href=\"passagestudy.jsp?action=showMorph&type=Greek&value=";
+
+ for (tok = token + 2; *tok; tok++)
+ if(*tok != '\"')
+ buf += *tok;
+ buf += "\">";
+ for (tok = token + 2; *tok; tok++)
+ if(*tok != '\"')
+ buf += *tok;
+ buf += "</a>)</em></small>";
+ }
+
+ else if (!strcmp(tag.getName(), "RX")) {
+ buf += "<a href=\"";
+ for (tok = token + 3; *tok; tok++) {
+ if(*tok != '<' && *tok+1 != 'R' && *tok+2 != 'x') {
+ buf += *tok;
+ }
+ else {
+ break;
+ }
+ }
+ buf += "\">";
+ }
+ else if (!strcmp(tag.getName(), "RF")) {
+ SWBuf type = tag.getAttribute("type");
+ SWBuf footnoteNumber = tag.getAttribute("swordFootnote");
+ VerseKey *vkey = NULL;
+ // see if we have a VerseKey * or descendant
+ SWTRY {
+ vkey = SWDYNAMIC_CAST(VerseKey, u->key);
+ }
+ SWCATCH ( ... ) { }
+ if (vkey) {
+ // leave this special osis type in for crossReference notes types? Might thml use this some day? Doesn't hurt.
+ //char ch = ((tag.getAttribute("type") && ((!strcmp(tag.getAttribute("type"), "crossReference")) || (!strcmp(tag.getAttribute("type"), "x-cross-ref")))) ? 'x':'n');
+ buf.appendFormatted("<a href=\"passagestudy.jsp?action=showNote&type=n&value=%s&module=%s&passage=%s\"><small><sup>*n</sup></small></a> ",
+ URL::encode(footnoteNumber.c_str()).c_str(),
+ URL::encode(u->version.c_str()).c_str(),
+ URL::encode(vkey->getText()).c_str());
+ }
+ u->suspendTextPassThru = true;
+ }
+ else if (!strcmp(tag.getName(), "Rf")) {
+ u->suspendTextPassThru = false;
+ }
+/*
+ else if (!strncmp(token, "RB", 2)) {
+ buf += "<i> ";
+ u->hasFootnotePreTag = true;
+ }
+
+ else if (!strncmp(token, "Rf", 2)) {
+ buf += "&nbsp<a href=\"note=";
+ buf += u->lastTextNode.c_str();
+ buf += "\">";
+ buf += "<small><sup>*n</sup></small></a>&nbsp";
+ // let's let text resume to output again
+ u->suspendTextPassThru = false;
+ }
+
+ else if (!strncmp(token, "RF", 2)) {
+ if (u->hasFootnotePreTag) {
+ u->hasFootnotePreTag = false;
+ buf += "</i> ";
+ }
+ u->suspendTextPassThru = true;
+ }
+*/
+ else if (!strncmp(token, "FN", 2)) {
+ buf += "<font face=\"";
+ for (tok = token + 2; *tok; tok++)
+ if(*tok != '\"')
+ buf += *tok;
+ buf += "\">";
+ }
+
+ else if (!strncmp(token, "CA", 2)) { // ASCII value
+ buf += (char)atoi(&token[2]);
+ }
+
+ else {
+ return false;
+ }
+ }
+ return true;
+}
+
+SWORD_NAMESPACE_END
diff --git a/src/modules/filters/gbfmorph.cpp b/src/modules/filters/gbfmorph.cpp
new file mode 100644
index 0000000..5226db7
--- /dev/null
+++ b/src/modules/filters/gbfmorph.cpp
@@ -0,0 +1,77 @@
+/******************************************************************************
+ *
+ * gbfmorph - SWFilter descendant to hide or show morph tags
+ * in a GBF module.
+ */
+
+
+#include <stdlib.h>
+#include <gbfmorph.h>
+
+SWORD_NAMESPACE_START
+
+const char oName[] = "Morphological Tags";
+const char oTip[] = "Toggles Morphological Tags On and Off if they exist";
+
+const SWBuf choices[3] = {"Off", "On", ""};
+const StringList oValues(&choices[0], &choices[2]);
+
+GBFMorph::GBFMorph() : SWOptionFilter(oName, oTip, &oValues) {
+ setOptionValue("Off");
+}
+
+
+GBFMorph::~GBFMorph() {
+}
+
+
+char GBFMorph::processText(SWBuf &text, const SWKey *key, const SWModule *module) {
+ if (!option) { // if we don't want morph tags
+ const char *from;
+ char token[2048]; // cheese. Fix.
+ int tokpos = 0;
+ bool intoken = false;
+ bool lastspace = false;
+
+ SWBuf orig = text;
+ from = orig.c_str();
+
+ for (text = ""; *from; from++) {
+ if (*from == '<') {
+ intoken = true;
+ tokpos = 0;
+ token[0] = 0;
+ token[1] = 0;
+ token[2] = 0;
+ continue;
+ }
+ if (*from == '>') { // process tokens
+ intoken = false;
+ if (*token == 'W' && token[1] == 'T') { // Morph
+ if ((from[1] == ' ') || (from[1] == ',') || (from[1] == ';') || (from[1] == '.') || (from[1] == '?') || (from[1] == '!') || (from[1] == ')') || (from[1] == '\'') || (from[1] == '\"')) {
+ if (lastspace)
+ text--;
+ }
+ continue;
+ }
+ // if not a morph tag token, keep token in text
+ text += '<';
+ text += token;
+ text += '>';
+ continue;
+ }
+ if (intoken) {
+ if (tokpos < 2045)
+ token[tokpos++] = *from;
+ token[tokpos+2] = 0;
+ }
+ else {
+ text += *from;
+ lastspace = (*from == ' ');
+ }
+ }
+ }
+ return 0;
+}
+
+SWORD_NAMESPACE_END
diff --git a/src/modules/filters/gbfosis.cpp b/src/modules/filters/gbfosis.cpp
new file mode 100644
index 0000000..00443f9
--- /dev/null
+++ b/src/modules/filters/gbfosis.cpp
@@ -0,0 +1,420 @@
+/******************************************************************************
+ *
+ * gbfstrongs - SWFilter descendant to hide or show strongs number
+ * in a GBF module.
+ */
+
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <stdarg.h>
+#include <gbfosis.h>
+#include <swmodule.h>
+#include <versekey.h>
+#include <swlog.h>
+#include <stdarg.h>
+
+SWORD_NAMESPACE_START
+
+GBFOSIS::GBFOSIS() {
+}
+
+
+GBFOSIS::~GBFOSIS() {
+}
+
+
+char GBFOSIS::processText(SWBuf &text, const SWKey *key, const SWModule *module) {
+ char token[2048]; //cheesy, we seem to like cheese :)
+ int tokpos = 0;
+ bool intoken = false;
+ bool keepToken = false;
+
+// static QuoteStack quoteStack;
+
+ SWBuf orig = text;
+ SWBuf tmp;
+ SWBuf value;
+
+ bool suspendTextPassThru = false;
+ bool handled = false;
+ bool newWord = false;
+ bool newText = false;
+ bool lastspace = false;
+
+ const char *wordStart = text.c_str();
+ const char *wordEnd = NULL;
+
+ const char *textStart = NULL;
+ const char *textEnd = NULL;
+
+ SWBuf textNode = "";
+
+ SWBuf buf;
+
+ text = "";
+ for (const char* from = orig.c_str(); *from; ++from) {
+ if (*from == '<') { //start of new token detected
+ intoken = true;
+ tokpos = 0;
+ token[0] = 0;
+ token[1] = 0;
+ token[2] = 0;
+ textEnd = from-1; //end of last text node found
+ wordEnd = text.c_str() + text.length();//not good, instead of wordEnd = to!
+
+ continue;
+ }
+
+ if (*from == '>') { // process tokens
+ intoken = false;
+ keepToken = false;
+ suspendTextPassThru = false;
+ newWord = true;
+ handled = false;
+
+ while (wordStart < (text.c_str() + text.length())) { //hack
+ if (strchr(";,. :?!()'\"", *wordStart) && wordStart[0] && wordStart[1])
+ wordStart++;
+ else break;
+ }
+ while (wordEnd > wordStart) {
+ if (strchr(" ,;:.?!()'\"", *wordEnd))
+ wordEnd--;
+ else break;
+ }
+
+ // Scripture Reference
+ if (!strncmp(token, "scripRef", 8)) {
+ suspendTextPassThru = true;
+ newText = true;
+ handled = true;
+ }
+ else if (!strncmp(token, "/scripRef", 9)) {
+ tmp = "";
+ tmp.append(textStart, (int)(textEnd - textStart)+1);
+ text += VerseKey::convertToOSIS(tmp.c_str(), key);
+
+ lastspace = false;
+ suspendTextPassThru = false;
+ handled = true;
+ }
+
+ // Footnote
+ if (!strcmp(token, "RF") || !strncmp(token, "RF ", 3)) { //the GBFFootnotes filter adds the attribute "swordFootnote", we want to catch that, too
+ // pushString(buf, "<reference work=\"Bible.KJV\" reference=\"");
+ text += "<note type=\"x-StudyNote\">";
+ newText = true;
+ lastspace = false;
+ handled = true;
+ }
+ else if (!strcmp(token, "Rf")) {
+ text += "</note>";
+ lastspace = false;
+ handled = true;
+ }
+ // hebrew titles
+ if (!strcmp(token, "TH")) {
+ text += "<title type=\"psalm\">";
+ newText = true;
+ lastspace = false;
+ handled = true;
+ }
+ else if (!strcmp(token, "Th")) {
+ text += "</title>";
+ lastspace = false;
+ handled = true;
+ }
+ // Italics assume transchange
+ if (!strcmp(token, "FI")) {
+ text += "<transChange type=\"added\">";
+ newText = true;
+ lastspace = false;
+ handled = true;
+ }
+ else if (!strcmp(token, "Fi")) {
+ text += "</transChange>";
+ lastspace = false;
+ handled = true;
+ }
+ // less than
+ if (!strcmp(token, "CT")) {
+ text += "&lt;";
+ newText = true;
+ lastspace = false;
+ handled = true;
+ }
+ // greater than
+ if (!strcmp(token, "CG")) {
+ text += "&gt;";
+ newText = true;
+ lastspace = false;
+ handled = true;
+ }
+ // Paragraph break. For now use empty paragraph element
+ if (!strcmp(token, "CM")) {
+ text += "<milestone type=\"x-p\" />";
+ newText = true;
+ lastspace = false;
+ handled = true;
+ }
+
+ // Figure
+ else if (!strncmp(token, "img ", 4)) {
+ const char *src = strstr(token, "src");
+ if (!src) // assert we have a src attribute
+ continue;
+// return false;
+
+ text += "<figure src=\"";
+ const char *c;
+ for (c = src;((*c) && (*c != '"')); c++);
+
+// uncomment for SWORD absolute path logic
+// if (*(c+1) == '/') {
+// pushString(buf, "file:");
+// pushString(buf, module->getConfigEntry("AbsoluteDataPath"));
+// if (*((*buf)-1) == '/')
+// c++; // skip '/'
+// }
+// end of uncomment for asolute path logic
+
+ for (c++;((*c) && (*c != '"')); c++) {
+ text += *c;
+ }
+ text += "\" />";
+
+ lastspace = false;
+ handled = true;
+ }
+
+ // Strongs numbers
+ else if (*token == 'W' && (token[1] == 'G' || token[1] == 'H')) { // Strongs
+ bool divineName = false;
+ value = token+1;
+
+ // normal strongs number
+ //strstrip(val);
+ if (!strncmp(wordStart, "<w ", 3)) {
+ const char *attStart = strstr(wordStart, "lemma");
+ if (attStart) {
+ attStart += 7;
+
+ buf = "";
+ buf.appendFormatted("strong:%s ", value.c_str());
+ }
+ else { // no lemma attribute
+ attStart = wordStart + 3;
+
+ buf = "";
+ buf.appendFormatted(buf, "lemma=\"strong:%s\" ", value.c_str());
+ }
+
+ text.insert(attStart - text.c_str(), buf);
+ }
+ else { //wordStart doesn't point to an existing <w> attribute!
+ if (!strcmp(value.c_str(), "H03068")) { //divineName
+ buf = "";
+ buf.appendFormatted("<divineName><w lemma=\"strong:%s\">", value.c_str());
+
+ divineName = true;
+ }
+ else {
+ buf = "";
+ buf.appendFormatted("<w lemma=\"strong:%s\">", value.c_str());
+ }
+
+ text.insert(wordStart - text.c_str(), buf);
+
+ if (divineName) {
+ wordStart += 12;
+ text += "</w></divineName>";
+ }
+ else text += "</w>";
+
+ lastspace = false;
+ }
+ handled = true;
+ }
+
+ // Morphology
+ else if (*token == 'W' && token[1] == 'T') {
+ if (token[2] == 'G' || token[2] == 'H') { // Strongs
+ value = token+2;
+ }
+ else value = token+1;
+
+ if (!strncmp(wordStart, "<w ", 3)) {
+ const char *attStart = strstr(wordStart, "morph");
+ if (attStart) { //existing morph attribute, append this one to it
+ attStart += 7;
+ buf = "";
+ buf.appendFormatted("%s:%s ", "robinson", value.c_str());
+ }
+ else { // no lemma attribute
+ attStart = wordStart + 3;
+ buf = "";
+ buf.appendFormatted("morph=\"%s:%s\" ", "robinson", value.c_str());
+ }
+
+ text.insert(attStart - text.c_str(), buf); //hack, we have to
+ }
+ else { //no existing <w> attribute fond
+ buf = "";
+ buf.appendFormatted("<w morph=\"%s:%s\">", "robinson", value.c_str());
+ text.insert(wordStart - text.c_str(), buf);
+ text += "</w>";
+ lastspace = false;
+
+ }
+ handled = true;
+ }
+
+ if (!keepToken) {
+ if (!handled) {
+ SWLog::getSystemLog()->logError("Unprocessed Token: <%s> in key %s", token, key ? (const char*)*key : "<unknown>");
+// exit(-1);
+ }
+ if (from[1] && strchr(" ,;.:?!()'\"", from[1])) {
+ if (lastspace) {
+ text--;
+ }
+ }
+ if (newText) {
+ textStart = from+1;
+ newText = false;
+ }
+ continue;
+ }
+
+ // if not a strongs token, keep token in text
+ text.appendFormatted("<%s>", token);
+
+ if (newText) {
+ textStart = text.c_str() + text.length();
+ newWord = false;
+ }
+ continue;
+ }
+ if (intoken) {
+ if ((tokpos < 2045) && ((*from != 10)&&(*from != 13))) {
+ token[tokpos++] = *from;
+ token[tokpos+2] = 0;
+ }
+ }
+ else {
+ switch (*from) {
+ case '\'':
+ case '\"':
+ case '`':
+// quoteStack.handleQuote(fromStart, from, &to);
+ text += *from;
+ //from++; //this line removes chars after an apostrophe! Needs fixing.
+ break;
+ default:
+ if (newWord && (*from != ' ')) {
+ wordStart = text.c_str() + text.length();
+ newWord = false;
+
+ //fix this if required?
+ //memset(to, 0, 10);
+
+ }
+
+ if (!suspendTextPassThru) {
+ text += (*from);
+ lastspace = (*from == ' ');
+ }
+ }
+ }
+ }
+
+ VerseKey *vkey = SWDYNAMIC_CAST(VerseKey, key);
+ if (vkey) {
+ SWBuf ref = "";
+ if (vkey->Verse()) {
+ ref.appendFormatted("\t\t<verse osisID=\"%s\">", vkey->getOSISRef());
+ }
+
+ if (ref.length() > 0) {
+
+ text = ref + text;
+
+ if (vkey->Verse()) {
+ VerseKey tmp;
+ tmp = *vkey;
+ tmp.AutoNormalize(0);
+ tmp.Headings(1);
+
+ text += "</verse>";
+
+ tmp = MAXVERSE;
+ if (*vkey == tmp) {
+ tmp.Verse(0);
+// sprintf(ref, "\t</div>");
+// pushString(&to, ref);
+ tmp = MAXCHAPTER;
+ tmp = MAXVERSE;
+ if (*vkey == tmp) {
+ tmp.Chapter(0);
+ tmp.Verse(0);
+// sprintf(ref, "\t</div>");
+// pushString(&to, ref);
+/*
+ if (!quoteStack.empty()) {
+ SWLog::getSystemLog()->logError("popping unclosed quote at end of book");
+ quoteStack.clear();
+ }
+*/
+ }
+ }
+ }
+// else if (vkey->Chapter()) {
+// sprintf(ref, "\t<div type=\"chapter\" osisID=\"%s\">", vkey->getOSISRef());
+// }
+// else sprintf(ref, "\t<div type=\"book\" osisID=\"%s\">", vkey->getOSISRef());
+ }
+ }
+ return 0;
+}
+
+
+QuoteStack::QuoteStack() {
+ clear();
+}
+
+
+void QuoteStack::clear() {
+ while (!quotes.empty()) quotes.pop();
+}
+
+
+QuoteStack::~QuoteStack() {
+ clear();
+}
+
+
+void QuoteStack::handleQuote(char *buf, char *quotePos, SWBuf &text) {
+//QuoteInstance(char startChar = '\"', char level = 1, string uniqueID = "", char continueCount = 0) {
+ if (!quotes.empty()) {
+ QuoteInstance last = quotes.top();
+ if (last.startChar == *quotePos) {
+ text += "</quote>";
+ quotes.pop();
+ }
+ else {
+ quotes.push(QuoteInstance(*quotePos, last.level+1));
+ quotes.top().pushStartStream(text);
+ }
+ }
+ else {
+ quotes.push(QuoteInstance(*quotePos));
+ quotes.top().pushStartStream(text);
+ }
+}
+
+void QuoteStack::QuoteInstance::pushStartStream(SWBuf &text) {
+ text.appendFormatted("<quote level=\"%d\">", level);
+}
+
+SWORD_NAMESPACE_END
diff --git a/src/modules/filters/gbfplain.cpp b/src/modules/filters/gbfplain.cpp
new file mode 100644
index 0000000..5657e20
--- /dev/null
+++ b/src/modules/filters/gbfplain.cpp
@@ -0,0 +1,97 @@
+/******************************************************************************
+ *
+ * gbfplain - SWFilter descendant to strip out all GBF tags or convert to
+ * ASCII rendered symbols.
+ */
+
+
+#include <stdlib.h>
+#include <gbfplain.h>
+#include <swbuf.h>
+
+SWORD_NAMESPACE_START
+
+GBFPlain::GBFPlain() {
+}
+
+
+char GBFPlain::processText (SWBuf &text, const SWKey *key, const SWModule *module)
+{
+ char token[2048];
+ int tokpos = 0;
+ bool intoken = false;
+ SWBuf orig = text;
+ const char* from = orig.c_str();
+
+ for (text = ""; *from; ++from) {
+ if (*from == '<') {
+ intoken = true;
+ tokpos = 0;
+ token[0] = 0;
+ token[1] = 0;
+ token[2] = 0;
+ continue;
+ }
+ if (*from == '>') {
+ intoken = false;
+ // process desired tokens
+ switch (*token) {
+ case 'W': // Strongs
+ switch(token[1]) {
+ case 'G': // Greek
+ case 'H': // Hebrew
+ case 'T': // Tense
+ text.append(" <");
+ //for (char *tok = token + 2; *tok; tok++)
+ // text += *tok;
+ text.append(token+2);
+ text.append("> ");
+ continue;
+ }
+ break;
+ case 'R':
+ switch(token[1]) {
+ case 'F': // footnote begin
+ text.append(" [");
+ continue;
+ case 'f': // footnote end
+ text.append("] ");
+ continue;
+ }
+ break;
+ case 'C':
+ switch(token[1]) {
+ case 'A': // ASCII value
+ text.append((char)atoi(&token[2]));
+ continue;
+ case 'G':
+ text.append('>');
+ continue;
+/* Bug in WEB
+ case 'L':
+ *to++ = '<';
+ continue;
+*/
+ case 'L': // Bug in WEB. Use above entry when fixed
+ case 'N': // new line
+ text.append('\n');
+ continue;
+ case 'M': // new paragraph
+ text.append("\n\n");
+ continue;
+ }
+ break;
+ }
+ continue;
+ }
+ if (intoken) {
+ if (tokpos < 2045)
+ token[tokpos++] = *from;
+ token[tokpos+2] = 0;
+ }
+ else text.append(*from);
+ }
+ return 0;
+}
+
+SWORD_NAMESPACE_END
diff --git a/src/modules/filters/gbfredletterwords.cpp b/src/modules/filters/gbfredletterwords.cpp
new file mode 100644
index 0000000..a79802d
--- /dev/null
+++ b/src/modules/filters/gbfredletterwords.cpp
@@ -0,0 +1,93 @@
+/******************************************************************************
+ *
+ * GBFRedLetterWords - SWFilter descendant to toggle red coloring of words of
+ * Christ in a GBF module.
+ */
+
+
+#include <stdlib.h>
+#include <gbfredletterwords.h>
+#include <swmodule.h>
+#include <ctype.h>
+
+SWORD_NAMESPACE_START
+
+const char oName[] = "Words of Christ in Red";
+const char oTip[] = "Toggles Red Coloring for Words of Christ On and Off if they are marked";
+
+const SWBuf choices[3] = {"Off", "On", ""};
+const StringList oValues(&choices[0], &choices[2]);
+
+GBFRedLetterWords::GBFRedLetterWords() : SWOptionFilter(oName, oTip, &oValues) {
+ setOptionValue("Off");
+}
+
+
+GBFRedLetterWords::~GBFRedLetterWords() {
+}
+
+
+char GBFRedLetterWords::processText(SWBuf &text, const SWKey *key, const SWModule *module) {
+/** This function removes the red letter words in Bible like the WEB
+* The words are marked by <FR> as start and <Fr> as end tag.
+*/
+ if (!option) { // if we don't want footnotes
+ char token[4096]; // cheese. Fix.
+ int tokpos = 0;
+ bool intoken = false;
+ bool hide = false;
+
+ const char *from;
+ SWBuf orig = text;
+ from = orig.c_str();
+ for (text = ""; *from; from++) {
+ if (*from == '<') {
+ intoken = true;
+ tokpos = 0;
+// memset(token, 0, 4096);
+ token[0] = 0;
+ token[1] = 0;
+ token[2] = 0;
+ continue;
+ }
+ if (*from == '>') { // process tokens
+ intoken = false;
+ /*switch (*token) {
+ case 'F': // Font attribute
+ switch(token[1]) {
+ case 'R': // Begin red letter words
+ hide = true;
+ break;
+ case 'r': // end red letter words
+ hide = false;
+ break;
+ }
+ continue; // skip token
+ }*/
+
+ //hide the token if either FR or Fr was detected
+ hide = (token[0] == 'F' && ( (token[1] == 'R') || (token[1] == 'r') ));
+
+ // if not a red letter word token, keep token in text
+ if (!hide) {
+ text += '<';
+ for (char *tok = token; *tok; tok++)
+ text += *tok;
+ text += '>';
+ }
+ continue;
+ }
+ if (intoken) {
+ if (tokpos < 4090)
+ token[tokpos++] = *from;
+ token[tokpos+2] = 0; // +2 cuz we init token with 2 extra '0' because of switch statement
+ }
+ else {
+ text += *from;
+ }
+ }
+ }
+ return 0;
+}
+
+SWORD_NAMESPACE_END
diff --git a/src/modules/filters/gbfrtf.cpp b/src/modules/filters/gbfrtf.cpp
new file mode 100644
index 0000000..eb39612
--- /dev/null
+++ b/src/modules/filters/gbfrtf.cpp
@@ -0,0 +1,311 @@
+/******************************************************************************
+ *
+ * gbfrtf - SWFilter descendant to convert all GBF tags to RTF tags
+ */
+
+
+#include <gbfrtf.h>
+#include <utilstr.h>
+#include <ctype.h>
+#include <swbuf.h>
+
+SWORD_NAMESPACE_START
+
+GBFRTF::GBFRTF() {
+}
+
+
+char GBFRTF::processText(SWBuf &text, const SWKey *key, const SWModule *module)
+{
+ char token[2048];
+ char val[128];
+ char *valto;
+ char *num;
+ int tokpos = 0;
+ bool intoken = false;
+ const char *tok;
+ SWBuf strongnum;
+ SWBuf strongtense;
+ bool hideText = false;
+ int wordLen = 0;
+ int wordCount = 0;
+
+ const char *from;
+ SWBuf orig = text;
+ from = orig.c_str();
+ for (text = ""; *from; from++) {
+ if (*from == '<') {
+ wordLen = wordCount;
+ wordCount = 0;
+ intoken = true;
+ tokpos = 0;
+ token[0] = 0;
+ token[1] = 0;
+ token[2] = 0;
+ continue;
+ }
+ if (*from == '>') {
+ intoken = false;
+ // process desired tokens
+ // deal with OSIS note tags. Just hide till OSISRTF
+ if (!strncmp(token, "note ", 5)) {
+ hideText = true;
+ }
+ if (!strncmp(token, "/note", 5)) {
+ hideText = false;
+ }
+
+ switch (*token) {
+ case 'w': // OSIS Word (temporary until OSISRTF is done)
+ strongnum = "";
+ strongtense = "";
+ valto = val;
+ num = strstr(token, "lemma=\"x-Strongs:");
+ if (num) {
+ for (num+=17; ((*num) && (*num != '\"')); num++)
+ *valto++ = *num;
+ *valto = 0;
+ if (atoi((!isdigit(*val))?val+1:val) < 5627) {
+ // normal strongs number
+ strongnum += "{\\cf3 \\sub <";
+ for (tok = (!isdigit(*val))?val+1:val; *tok; tok++)
+ strongnum += *tok;
+ strongnum += ">}";
+ }
+ /* forget these for now
+ else {
+ // verb morph
+ sprintf(wordstr, "%03d", word-1);
+ module->getEntryAttributes()["Word"][wordstr]["Morph"] = val;
+ }
+ */
+ }
+ else {
+ num = strstr(token, "lemma=\"strong:");
+ if (num) {
+ for (num+=14; ((*num) && (*num != '\"')); num++)
+ *valto++ = *num;
+ *valto = 0;
+ if (atoi((!isdigit(*val))?val+1:val) < 5627) {
+ // normal strongs number
+ strongnum += "{\\cf3 \\sub <";
+ for (tok = (!isdigit(*val))?val+1:val; *tok; tok++)
+ strongnum += *tok;
+ strongnum += ">}";
+ }
+ /* forget these for now
+ else {
+ // verb morph
+ sprintf(wordstr, "%03d", word-1);
+ module->getEntryAttributes()["Word"][wordstr]["Morph"] = val;
+ }
+ */
+ }
+ }
+ valto = val;
+ num = strstr(token, "morph=\"x-Robinson:");
+ if (num) {
+ for (num+=18; ((*num) && (*num != '\"')); num++)
+ *valto++ = *num;
+ *valto = 0;
+ // normal robinsons tense
+ strongtense += "{\\cf4 \\sub (";
+ for (tok = val; *tok; tok++)
+ strongtense += *tok;
+ strongtense += ")}";
+ }
+ continue;
+
+ case '/':
+ if (token[1] == 'w') {
+ if ((wordCount > 0) || (strongnum != "{\\cf3 \\sub <3588>}")) {
+ //for (i = 0; i < strongnum.length(); i++)
+ text += strongnum;
+ //for (i = 0; i < strongtense.length(); i++)
+ text += strongtense;
+ }
+ }
+ continue;
+
+ case 'W': // Strongs
+ switch(token[1]) {
+ case 'G': // Greek
+ case 'H': // Hebrew
+ text += "{\\cf3 \\sub <";
+ for (tok = token + 2; *tok; tok++)
+ text += *tok;
+ text += ">}";
+ continue;
+
+ case 'T': // Tense
+ text += "{\\cf4 \\sub (";
+ bool separate = false;
+ for (tok = token + 2; *tok; tok++) {
+ if (separate) {
+ text += "; ";
+ separate = false;
+ }
+ switch (*tok) {
+ case 'G':
+ case 'H':
+ for (tok++; *tok; tok++) {
+ if (isdigit(*tok)) {
+ text += *tok;
+ separate = true;
+ }
+ else {
+ tok--;
+ break;
+ }
+ }
+ break;
+ default:
+ for (; *tok; tok++) {
+ text += *tok;
+ }
+ }
+ }
+ text += ")}";
+ continue;
+ }
+ break;
+ case 'R':
+ switch(token[1]) {
+ case 'X':
+ text += "<a href=\"\">";
+ continue;
+ case 'x':
+ text += "</a>";
+ continue;
+ case 'F': // footnote begin
+ text += "{\\i1 \\sub [ ";
+ continue;
+ case 'f': // footnote end
+ text += " ] }";
+ continue;
+ }
+ break;
+ case 'F': // font tags
+ switch(token[1]) {
+ case 'I': // italic start
+ text += "\\i1 ";
+ continue;
+ case 'i': // italic end
+ text += "\\i0 ";
+ continue;
+ case 'B': // bold start
+ text += "\\b1 ";
+ continue;
+ case 'b': // bold end
+ text += "\\b0 ";
+ continue;
+ case 'N':
+ text += '{';
+ if (!strnicmp(token+2, "Symbol", 6))
+ text += "\\f7 ";
+ if (!strnicmp(token+2, "Courier", 7))
+ text += "\\f8 ";
+ continue;
+ case 'n':
+ text += '}';
+ continue;
+ case 'S':
+ text += "{\\super ";
+ continue;
+ case 's':
+ text += '}';
+ continue;
+ case 'R':
+ text += "{\\cf6 ";
+ continue;
+ case 'r':
+ text += '}';
+ continue;
+ case 'O':
+ case 'C':
+ text += "\\scaps1 ";
+ continue;
+ case 'o':
+ case 'c':
+ text += "\\scaps0 ";
+ continue;
+ case 'V':
+ text += "{\\sub ";
+ continue;
+ case 'v':
+ text += '}';
+ continue;
+ case 'U':
+ text += "\\ul1 ";
+ continue;
+ case 'u':
+ text += "\\ul0 ";
+ continue;
+ }
+ break;
+ case 'C': // special character tags
+ switch(token[1]) {
+ case 'A': // ASCII value
+ text += (char)atoi(&token[2]);
+ continue;
+ case 'G':
+ text += '>';
+ continue;
+ case 'L': // line break
+ text += "\\line ";
+ continue;
+ case 'M': // new paragraph
+ text += "\\par ";
+ continue;
+ case 'T':
+ text += '<';
+ }
+ break;
+ case 'T': // title formatting
+ switch(token[1])
+ {
+ case 'T': // Book title begin
+ text += "{\\large ";
+ continue;
+ case 't':
+ text += '}';
+ continue;
+ case 'S':
+ text += "\\par {\\i1\\b1 ";
+ continue;
+ case 's':
+ text += "}\\par ";
+ continue;
+ }
+ break;
+ case 'J': // Strongs
+ switch(token[1]) {
+ case 'L':
+ text += "\\ql ";
+ case 'C':
+ text += "\\qc ";
+ case 'R':
+ text += "\\qr ";
+ case 'F':
+ text += "\\qj ";
+ }
+ }
+ continue;
+ }
+ if (intoken) {
+ if (tokpos < 2045)
+ token[tokpos++] = *from;
+ token[tokpos+2] = 0;
+ }
+ else {
+ if (!hideText) {
+ wordCount++;
+ text += *from;
+ }
+ }
+ }
+ return 0;
+}
+
+SWORD_NAMESPACE_END
+
diff --git a/src/modules/filters/gbfstrongs.cpp b/src/modules/filters/gbfstrongs.cpp
new file mode 100644
index 0000000..610edb5
--- /dev/null
+++ b/src/modules/filters/gbfstrongs.cpp
@@ -0,0 +1,126 @@
+/******************************************************************************
+ *
+ * gbfstrongs - SWFilter descendant to hide or show strongs number
+ * in a GBF module.
+ */
+
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <gbfstrongs.h>
+#include <swmodule.h>
+#include <ctype.h>
+
+SWORD_NAMESPACE_START
+
+const char oName[] = "Strong's Numbers";
+const char oTip[] = "Toggles Strong's Numbers On and Off if they exist";
+
+const SWBuf choices[3] = {"Off", "On", ""};
+const StringList oValues(&choices[0], &choices[2]);
+
+GBFStrongs::GBFStrongs() : SWOptionFilter(oName, oTip, &oValues) {
+ setOptionValue("Off");
+}
+
+
+GBFStrongs::~GBFStrongs() {
+}
+
+
+char GBFStrongs::processText(SWBuf &text, const SWKey *key, const SWModule *module) {
+ char token[2048]; // cheese. Fix.
+ int tokpos = 0;
+ bool intoken = false;
+ bool lastspace = false;
+ int word = 1;
+ char val[128];
+ char wordstr[5];
+ char *valto;
+ unsigned int textStart = 0, textEnd = 0;
+ bool newText = false;
+ SWBuf tmp;
+ const char *from;
+
+ SWBuf orig = text;
+ from = orig.c_str();
+
+ for (text = ""; *from; from++) {
+ if (*from == '<') {
+ intoken = true;
+ tokpos = 0;
+ token[0] = 0;
+ token[1] = 0;
+ token[2] = 0;
+ textEnd = text.size();
+ continue;
+ }
+ if (*from == '>') { // process tokens
+ intoken = false;
+ if (*token == 'W' && (token[1] == 'G' || token[1] == 'H')) { // Strongs
+ if (module->isProcessEntryAttributes()) {
+ valto = val;
+ for (unsigned int i = 1; ((token[i]) && (i < 150)); i++)
+ *valto++ = token[i];
+ *valto = 0;
+ if (atoi((!isdigit(*val))?val+1:val) < 5627) {
+ // normal strongs number
+ sprintf(wordstr, "%03d", word++);
+ module->getEntryAttributes()["Word"][wordstr]["PartsCount"] = "1";
+ module->getEntryAttributes()["Word"][wordstr]["Lemma"] = val;
+ module->getEntryAttributes()["Word"][wordstr]["LemmaClass"] = "strong";
+ tmp = "";
+ tmp.append(text.c_str()+textStart, (int)(textEnd - textStart));
+ module->getEntryAttributes()["Word"][wordstr]["Text"] = tmp;
+ newText = true;
+ }
+ else {
+ // verb morph
+ sprintf(wordstr, "%03d", word-1);
+ module->getEntryAttributes()["Word"][wordstr]["Morph"] = val;
+ module->getEntryAttributes()["Word"][wordstr]["MorphClass"] = "OLBMorph";
+ }
+ }
+
+ if (!option) {
+ if ((from[1] == ' ') || (from[1] == ',') || (from[1] == ';') || (from[1] == '.') || (from[1] == '?') || (from[1] == '!') || (from[1] == ')') || (from[1] == '\'') || (from[1] == '\"')) {
+ if (lastspace)
+ text--;
+ }
+ if (newText) {textStart = text.size(); newText = false; }
+ continue;
+ }
+ }
+ if (module->isProcessEntryAttributes()) {
+ if ((*token == 'W') && (token[1] == 'T')) { // Morph
+ valto = val;
+ for (unsigned int i = 2; ((token[i]) && (i < 150)); i++)
+ *valto++ = token[i];
+ *valto = 0;
+ sprintf(wordstr, "%03d", word-1);
+ module->getEntryAttributes()["Word"][wordstr]["MorphClass"] = "GBFMorph";
+ module->getEntryAttributes()["Word"][wordstr]["Morph"] = val;
+ newText = true;
+ }
+ }
+ // if not a strongs token, keep token in text
+ text += '<';
+ text += token;
+ text += '>';
+ if (newText) {textStart = text.size(); newText = false; }
+ continue;
+ }
+ if (intoken) {
+ if (tokpos < 2045)
+ token[tokpos++] = *from;
+ token[tokpos+2] = 0;
+ }
+ else {
+ text += *from;
+ lastspace = (*from == ' ');
+ }
+ }
+ return 0;
+}
+
+SWORD_NAMESPACE_END
diff --git a/src/modules/filters/gbfthml.cpp b/src/modules/filters/gbfthml.cpp
new file mode 100644
index 0000000..2664f48
--- /dev/null
+++ b/src/modules/filters/gbfthml.cpp
@@ -0,0 +1,216 @@
+/***************************************************************************
+ gbfthml.cpp - GBF to ThML filter
+ -------------------
+ begin : 1999-10-27
+ copyright : 2001 by CrossWire Bible Society
+ ***************************************************************************/
+
+/***************************************************************************
+ * *
+ * This program is free software; you can redistribute it and/or modify *
+ * it under the terms of the GNU General Public License as published by *
+ * the Free Software Foundation; either version 2 of the License, or *
+ * (at your option) any later version. *
+ * *
+ ***************************************************************************/
+
+#include <stdlib.h>
+#include <gbfthml.h>
+#include <swbuf.h>
+
+SWORD_NAMESPACE_START
+
+GBFThML::GBFThML()
+{
+}
+
+
+char GBFThML::processText(SWBuf &text, const SWKey *key, const SWModule *module) {
+ const char *from;
+ char token[2048];
+ int tokpos = 0;
+ bool intoken = false;
+ const char *tok;
+
+ SWBuf orig = text;
+ from = orig.c_str();
+
+ for (text = ""; *from; from++) {
+ if (*from == '<') {
+ intoken = true;
+ tokpos = 0;
+ token[0] = 0;
+ token[1] = 0;
+ token[2] = 0;
+ continue;
+ }
+ if (*from == '>')
+ {
+ intoken = false;
+ // process desired tokens
+ switch (*token) {
+ case 'W': // Strongs
+ switch(token[1]) {
+ case 'G':
+ case 'H':
+ text += "<sync type=\"Strongs\" value=\"";
+ for (tok = token + 1; *tok; tok++)
+ text += *tok;
+ text += "\" />";
+ continue;
+
+ case 'T': // Tense
+ text += "<sync type=\"Morph\" value=\"";
+ for (tok = token + 2; *tok; tok++)
+ text += *tok;
+ text += "\" />";
+ continue;
+ }
+ break;
+ case 'R':
+ switch(token[1])
+ {
+ case 'X':
+ text += "<a href=\"";
+ for (tok = token + 3; *tok; tok++) {
+ if(*tok != '<' && *tok+1 != 'R' && *tok+2 != 'x') {
+ text += *tok;
+ }
+ else {
+ break;
+ }
+ }
+ text += "\">";
+ continue;
+ case 'x':
+ text += "</a>";
+ continue;
+ case 'F': // footnote begin
+ text += "<note>";
+ continue;
+ case 'f': // footnote end
+ text += "</note>";
+ continue;
+ }
+ break;
+ case 'F': // font tags
+ switch(token[1])
+ {
+ case 'N':
+ text += "<font face=\"";
+ for (tok = token + 2; *tok; tok++)
+ text += *tok;
+ text += "\">";
+ continue;
+ case 'n':
+ text += "</font>";
+ continue;
+ case 'I': // italic start
+ text += "<i>";
+ continue;
+ case 'i': // italic end
+ text += "</i>";
+ continue;
+ case 'B': // bold start
+ text += "<b>";
+ continue;
+ case 'b': // bold end
+ text += "</b>";
+ continue;
+
+ case 'R': // words of Jesus begin
+ text += "<font color=\"#ff0000\">";
+ continue;
+ case 'r': // words of Jesus end
+ text += "</font>";
+ continue;
+ case 'U': // Underline start
+ text += "<u>";
+ continue;
+ case 'u': // Underline end
+ text += "</u>";
+ continue;
+ case 'O': // Old Testament quote begin
+ text += "<cite>";
+ continue;
+ case 'o': // Old Testament quote end
+ text += "</cite>";
+ continue;
+ case 'S': // Superscript begin
+ text += "<sup>";
+ continue;
+ case 's': // Superscript end
+ text += "</sup>";
+ continue;
+ case 'V': // Subscript begin
+ text += "<sub>";
+ continue;
+ case 'v': // Subscript end
+ text += "</sub>";
+ continue;
+ }
+ break;
+ case 'C': // special character tags
+ switch(token[1])
+ {
+ case 'A': // ASCII value
+ text += (char)atoi(&token[2]);
+ continue;
+ case 'G':
+ //*to++ = ' ';
+ continue;
+ case 'L': // line break
+ text += "<br /> ";
+ continue;
+ case 'M': // new paragraph
+ text += "<p />";
+ continue;
+ case 'T':
+ //*to++ = ' ';
+ continue;
+ }
+ break;
+ case 'T': // title formatting
+ switch(token[1])
+ {
+ case 'T': // Book title begin
+ text += "<big>";
+ continue;
+ case 't':
+ text += "</big>";
+ continue;
+ case 'S':
+ text += "<div class=\"sechead\">";
+ continue;
+ case 's':
+ text += "</div>";
+ continue;
+ }
+ break;
+
+ case 'P': // special formatting
+ switch(token[1]) {
+ case 'P': // Poetry begin
+ text += "<verse>";
+ continue;
+ case 'p':
+ text += "</verse>";
+ continue;
+ }
+ break;
+ }
+ continue;
+ }
+ if (intoken) {
+ if (tokpos < 2045)
+ token[tokpos++] = *from;
+ token[tokpos+2] = 0;
+ }
+ else text += *from;
+ }
+ return 0;
+}
+
+
+
+SWORD_NAMESPACE_END
diff --git a/src/modules/filters/gbfwebif.cpp b/src/modules/filters/gbfwebif.cpp
new file mode 100644
index 0000000..e651db6
--- /dev/null
+++ b/src/modules/filters/gbfwebif.cpp
@@ -0,0 +1,191 @@
+/***************************************************************************
+ GBFWEBIF.cpp - GBF to HTML filter with hrefs
+ for strongs and morph tags
+ -------------------
+ begin : 2001-09-03
+ copyright : 2001 by CrossWire Bible Society
+ ***************************************************************************/
+
+/***************************************************************************
+ * *
+ * This program is free software; you can redistribute it and/or modify *
+ * it under the terms of the GNU General Public License as published by *
+ * the Free Software Foundation; either version 2 of the License, or *
+ * (at your option) any later version. *
+ * *
+ ***************************************************************************/
+
+#include <gbfwebif.h>
+#include <ctype.h>
+#include <url.h>
+
+SWORD_NAMESPACE_START
+
+GBFWEBIF::GBFWEBIF() : baseURL(""), passageStudyURL(baseURL + "passagestudy.jsp") {
+//all is done in GBFHTMLHREF since it inherits form this class
+ addTokenSubstitute("FR", "<span class=\"wordsOfJesus\">"); // words of Jesus begin
+ addTokenSubstitute("Fr", "</span>");
+}
+
+bool GBFWEBIF::handleToken(SWBuf &buf, const char *token, BasicFilterUserData *userData) {
+ const char *tok;
+ char val[128];
+ char *valto;
+ const char *num;
+ SWBuf url;
+
+ if (!substituteToken(buf, token)) {
+ if (!strncmp(token, "w", 1)) {
+ // OSIS Word (temporary until OSISRTF is done)
+ valto = val;
+ num = strstr(token, "lemma=\"x-Strongs:");
+ if (num) {
+ for (num+=17; ((*num) && (*num != '\"')); num++)
+ *valto++ = *num;
+ *valto = 0;
+
+ if (atoi((!isdigit(*val))?val+1:val) < 5627) {
+ buf += " <small><em>&lt;";
+ url = "";
+ for (tok = val; *tok; tok++) {
+ url += *tok;
+ }
+ if ((url.length() > 1) && strchr("GH", url[0])) {
+ if (isdigit(url[1]))
+ url = url.c_str()+1;
+ }
+ buf.appendFormatted("<a href=\"%s?showStrong=%s#cv\">", passageStudyURL.c_str(), URL::encode(url).c_str());
+
+ for (tok = (!isdigit(*val))?val+1:val; *tok; tok++) {
+ buf += *tok;
+ }
+ buf += "</a>&gt;</em></small> ";
+ }
+ }
+ else {
+ num = strstr(token, "lemma=\"strong:");
+ if (num) {
+ for (num+=14; ((*num) && (*num != '\"')); num++)
+ *valto++ = *num;
+ *valto = 0;
+
+ if (atoi((!isdigit(*val))?val+1:val) < 5627) {
+ buf += " <small><em>&lt;";
+ url = "";
+ for (tok = val; *tok; tok++) {
+ url += *tok;
+ }
+ if ((url.length() > 1) && strchr("GH", url[0])) {
+ if (isdigit(url[1]))
+ url = url.c_str()+1;
+ }
+ buf.appendFormatted("<a href=\"%s?showStrong=%s#cv\">", passageStudyURL.c_str(), URL::encode(url).c_str());
+
+ for (tok = (!isdigit(*val))?val+1:val; *tok; tok++) {
+ buf += *tok;
+ }
+ buf += "</a>&gt;</em></small> ";
+ }
+ }
+ }
+ valto = val;
+ num = strstr(token, "morph=\"x-Robinson:");
+ if (num) {
+ for (num+=18; ((*num) && (*num != '\"')); num++)
+ *valto++ = *num;
+ *valto = 0;
+ buf += " <small><em>(";
+ url = "";
+ for (tok = val; *tok; tok++) {
+ // normal robinsons tense
+ buf += *tok;
+ }
+ buf.appendFormatted("<a href=\"%s?showMorph=%s#cv\">", passageStudyURL.c_str(), URL::encode(url).c_str());
+
+ for (tok = val; *tok; tok++) {
+ buf += *tok;
+ }
+ buf += "</a>)</em></small> ";
+ }
+ }
+
+ else if (!strncmp(token, "WG", 2) || !strncmp(token, "WH", 2)) { // strong's numbers
+ buf += " <small><em>&lt;";
+ url = "";
+
+ for (tok = token+1; *tok; tok++) {
+ url += *tok;
+ }
+ if ((url.length() > 1) && strchr("GH", url[0])) {
+ if (isdigit(url[1]))
+ url = url.c_str()+1;
+ }
+ buf.appendFormatted("<a href=\"%s?showStrong=%s#cv\">", passageStudyURL.c_str(), URL::encode(url).c_str());
+
+ for (tok = token + 2; *tok; tok++) {
+ buf += *tok;
+ }
+ buf += "</a>&gt;</em></small>";
+ }
+
+ else if (!strncmp(token, "WTG", 3) || !strncmp(token, "WTH", 3)) { // strong's numbers tense
+ buf += " <small><em>(";
+ url = "";
+ for (tok = token + 2; *tok; tok++) {
+ if(*tok != '\"')
+ url += *tok;
+ }
+ if ((url.length() > 1) && strchr("GH", url[0])) {
+ if (isdigit(url[1]))
+ url = url.c_str()+1;
+ }
+ buf.appendFormatted("<a href=\"%s?showStrong=%s#cv\">", passageStudyURL.c_str(), URL::encode(url).c_str());
+
+ for (tok = token + 3; *tok; tok++)
+ if(*tok != '\"')
+ buf += *tok;
+ buf += "</a>)</em></small>";
+ }
+
+ else if (!strncmp(token, "WT", 2) && strncmp(token, "WTH", 3) && strncmp(token, "WTG", 3)) { // morph tags
+ buf += " <small><em>(";
+ for (tok = token + 2; *tok; tok++) {
+ if(*tok != '\"')
+ buf += *tok;
+ }
+ buf.appendFormatted("<a href=\"%s?showMorph=%s#cv\">", passageStudyURL.c_str(), URL::encode(url).c_str());
+
+ for (tok = token + 2; *tok; tok++) {
+ if(*tok != '\"')
+ buf += *tok;
+ }
+ buf += "</a>)</em></small>";
+ }
+
+ else if (!strncmp(token, "RX", 2)) {
+ buf += "<a href=\"";
+ for (tok = token + 3; *tok; tok++) {
+ if(*tok != '<' && *tok+1 != 'R' && *tok+2 != 'x') {
+ buf += *tok;
+ }
+ else {
+ break;
+ }
+ }
+
+ buf.appendFormatted("a href=\"%s?key=%s#cv\">", passageStudyURL.c_str(), URL::encode(url).c_str());
+ }
+ // ok to leave these in
+ else if ((!strncmp(token, "span", 4))
+ || (!strncmp(token, "/span", 5))) {
+ buf.appendFormatted("<%s>", token);
+ }
+
+ else {
+ return GBFHTMLHREF::handleToken(buf, token, userData);
+ }
+ }
+ return true;
+}
+
+SWORD_NAMESPACE_END
diff --git a/src/modules/filters/gbfwordjs.cpp b/src/modules/filters/gbfwordjs.cpp
new file mode 100644
index 0000000..f81ffac
--- /dev/null
+++ b/src/modules/filters/gbfwordjs.cpp
@@ -0,0 +1,282 @@
+/******************************************************************************
+ *
+ * gbfstrongs - SWFilter descendant to hide or show strongs number
+ * in a GBF module.
+ */
+
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <gbfwordjs.h>
+#include <swmodule.h>
+#include <ctype.h>
+#include <utilstr.h>
+#include <versekey.h>
+
+SWORD_NAMESPACE_START
+
+const char oName[] = "Word Javascript";
+const char oTip[] = "Toggles Word Javascript data";
+
+const SWBuf choices[3] = {"Off", "On", ""};
+const StringList oValues(&choices[0], &choices[2]);
+
+
+GBFWordJS::GBFWordJS() : SWOptionFilter(oName, oTip, &oValues) {
+ setOptionValue("Off");
+
+ defaultGreekLex = 0;
+ defaultHebLex = 0;
+ defaultGreekParse = 0;
+ defaultHebParse = 0;
+ mgr = 0;
+}
+
+
+GBFWordJS::~GBFWordJS() {
+}
+
+
+char GBFWordJS::processText(SWBuf &text, const SWKey *key, const SWModule *module) {
+ if (option) {
+ char token[2112]; // cheese. Fix.
+ int tokpos = 0;
+ bool intoken = false;
+ bool lastspace = false;
+ int word = 1;
+ char val[128];
+ char wordstr[5];
+ unsigned int textStart = 0, lastAppendLen = 0, textEnd = 0;
+ SWBuf tmp;
+ bool newText = false;
+ bool needWordOut = false;
+ AttributeValue *wordAttrs = 0;
+ SWBuf modName = (module)?module->Name():"";
+ SWBuf wordSrcPrefix = modName;
+
+ const SWBuf orig = text;
+ const char * from = orig.c_str();
+ VerseKey *vkey = 0;
+ if (key) {
+ vkey = SWDYNAMIC_CAST(VerseKey, key);
+ }
+
+ for (text = ""; *from; from++) {
+ if (*from == '<') {
+ intoken = true;
+ tokpos = 0;
+ token[0] = 0;
+ token[1] = 0;
+ token[2] = 0;
+ textEnd = text.length();
+ continue;
+ }
+ if (*from == '>') { // process tokens
+ intoken = false;
+ if (*token == 'W' && (token[1] == 'G' || token[1] == 'H')) { // Strongs
+ strcpy(val,token+1);
+ if (atoi((!isdigit(*val))?val+1:val) < 5627) {
+ // normal strongs number
+ sprintf(wordstr, "%03d", word++);
+ needWordOut = (word > 2);
+ wordAttrs = &(module->getEntryAttributes()["Word"][wordstr]);
+ (*wordAttrs)["Lemma"] = val;
+ //printf("Adding: [\"Word\"][%s][\"Strongs\"] = %s\n", wordstr, val);
+ tmp = "";
+ tmp.append(text.c_str()+textStart, (int)(textEnd - textStart));
+ (*wordAttrs)["Text"] = tmp;
+ text.append("</span>");
+ SWBuf ts;
+ ts.appendFormatted("%d", textStart);
+ (*wordAttrs)["TextStart"] = ts;
+ //printf("Adding: [\"Word\"][%s][\"Text\"] = %s\n", wordstr, tmp.c_str());
+ newText = true;
+ }
+ else {
+ // verb morph
+ if (wordAttrs) {
+ (*wordAttrs)["Morph"] = val;
+ }
+ //printf("Adding: [\"Word\"][%s][\"Morph\"] = %s\n", wordstr, val);
+ }
+
+ }
+ if (*token == 'W' && token[1] == 'T') { // Morph
+ if (token[2] == 'G' || token[2] == 'H') {
+ strcpy(val, token+2);
+ }
+ else strcpy(val, token+1);
+ if (wordAttrs) {
+ (*wordAttrs)["Morph"] = val;
+ (*wordAttrs)["MorphClass"] = "StrongsMorph";
+ }
+ newText = true;
+ }
+ // if not a strongs token, keep token in text
+ text += '<';
+ text += token;
+ text += '>';
+ if (needWordOut) {
+ char wstr[10];
+ sprintf(wstr, "%03d", word-2);
+ AttributeValue *wAttrs = &(module->getEntryAttributes()["Word"][wstr]);
+ needWordOut = false;
+ SWBuf strong = (*wAttrs)["Lemma"];
+ SWBuf morph = (*wAttrs)["Morph"];
+ SWBuf morphClass = (*wAttrs)["MorphClass"];
+ SWBuf wordText = (*wAttrs)["Text"];
+ SWBuf textSt = (*wAttrs)["TextStart"];
+ if (strong.size()) {
+ char gh = 0;
+ gh = isdigit(strong[0]) ? 0:strong[0];
+ if (!gh) {
+ if (vkey) {
+ gh = vkey->Testament() ? 'H' : 'G';
+ }
+ }
+ else strong << 1;
+
+ SWModule *sLex = 0;
+ SWModule *sMorph = 0;
+ if (gh == 'G') {
+ sLex = defaultGreekLex;
+ sMorph = defaultGreekParse;
+ }
+ if (gh == 'H') {
+ sLex = defaultHebLex;
+ sMorph = defaultHebParse;
+ }
+ SWBuf lexName = "";
+ if (sLex) {
+ // we can pass the real lex name in, but we have some
+ // aliases in the javascript to optimize bandwidth
+ lexName = sLex->Name();
+ if (lexName == "StrongsGreek")
+ lexName = "G";
+ if (lexName == "StrongsHebrew")
+ lexName = "H";
+ }
+ SWBuf wordID;
+ if (vkey) {
+ // optimize for bandwidth and use only the verse as the unique entry id
+ wordID.appendFormatted("%d", vkey->Verse());
+ }
+ else {
+ wordID = key->getText();
+ }
+ for (unsigned int i = 0; i < wordID.size(); i++) {
+ if ((!isdigit(wordID[i])) && (!isalpha(wordID[i]))) {
+ wordID[i] = '_';
+ }
+ }
+ wordID.appendFormatted("_%s%d", wordSrcPrefix.c_str(), atoi(wstr));
+ if (textSt.size()) {
+ int textStr = atoi(textSt.c_str());
+ textStr += lastAppendLen;
+ SWBuf spanStart = "";
+
+
+
+/*
+ if (sMorph) {
+ SWBuf popMorph = "<a onclick=\"";
+ popMorph.appendFormatted("p(\'%s\',\'%s\','%s','');\" >%s</a>", sMorph->Name(), morph.c_str(), wordID.c_str(), morph.c_str());
+ morph = popMorph;
+ }
+*/
+
+ // 'p' = 'fillpop' to save bandwidth
+ const char *m = strchr(morph.c_str(), ':');
+ if (m) m++;
+ else m = morph.c_str();
+ spanStart.appendFormatted("<span class=\"clk\" onclick=\"p('%s','%s','%s','%s','','%s');\" >", lexName.c_str(), strong.c_str(), wordID.c_str(), m, modName.c_str());
+ text.insert(textStr, spanStart);
+ lastAppendLen = spanStart.length();
+ }
+ }
+
+ }
+ if (newText) {
+ textStart = text.length(); newText = false;
+ }
+ continue;
+ }
+ if (intoken) {
+ if (tokpos < 2045)
+ token[tokpos++] = *from;
+ token[tokpos+2] = 0;
+ }
+ else {
+ text += *from;
+ lastspace = (*from == ' ');
+ }
+ }
+
+ char wstr[10];
+ sprintf(wstr, "%03d", word-1);
+ AttributeValue *wAttrs = &(module->getEntryAttributes()["Word"][wstr]);
+ needWordOut = false;
+ SWBuf strong = (*wAttrs)["Lemma"];
+ SWBuf morph = (*wAttrs)["Morph"];
+ SWBuf morphClass = (*wAttrs)["MorphClass"];
+ SWBuf wordText = (*wAttrs)["Text"];
+ SWBuf textSt = (*wAttrs)["TextStart"];
+ if (strong.size()) {
+ char gh = 0;
+ gh = isdigit(strong[0]) ? 0:strong[0];
+ if (!gh) {
+ if (vkey) {
+ gh = vkey->Testament() ? 'H' : 'G';
+ }
+ }
+ else strong << 1;
+
+ SWModule *sLex = 0;
+ if (gh == 'G') {
+ sLex = defaultGreekLex;
+ }
+ if (gh == 'H') {
+ sLex = defaultHebLex;
+ }
+ SWBuf lexName = "";
+ if (sLex) {
+ // we can pass the real lex name in, but we have some
+ // aliases in the javascript to optimize bandwidth
+ lexName = sLex->Name();
+ if (lexName == "StrongsGreek")
+ lexName = "G";
+ if (lexName == "StrongsHebrew")
+ lexName = "H";
+ }
+ SWBuf wordID;
+ if (vkey) {
+ // optimize for bandwidth and use only the verse as the unique entry id
+ wordID.appendFormatted("%d", vkey->Verse());
+ }
+ else {
+ wordID = key->getText();
+ }
+ for (unsigned int i = 0; i < wordID.size(); i++) {
+ if ((!isdigit(wordID[i])) && (!isalpha(wordID[i]))) {
+ wordID[i] = '_';
+ }
+ }
+ wordID.appendFormatted("_%s%d", wordSrcPrefix.c_str(), atoi(wstr));
+ if (textSt.size()) {
+ int textStr = atoi(textSt.c_str());
+ textStr += lastAppendLen;
+ SWBuf spanStart = "";
+ // 'p' = 'fillpop' to save bandwidth
+ const char *m = strchr(morph.c_str(), ':');
+ if (m) m++;
+ else m = morph.c_str();
+ spanStart.appendFormatted("<span class=\"clk\" onclick=\"p('%s','%s','%s','%s','','%s');\" >", lexName.c_str(), strong.c_str(), wordID.c_str(), m, modName.c_str());
+ text.insert(textStr, spanStart);
+ }
+ }
+ }
+
+ return 0;
+}
+
+SWORD_NAMESPACE_END
diff --git a/src/modules/filters/greeklexattribs.cpp b/src/modules/filters/greeklexattribs.cpp
new file mode 100644
index 0000000..1e82305
--- /dev/null
+++ b/src/modules/filters/greeklexattribs.cpp
@@ -0,0 +1,101 @@
+/******************************************************************************
+ *
+ * greeklexattribs - SWFilter descendant to set entry attributes for greek
+ * lexicons
+ */
+
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <ctype.h>
+#include <string>
+#include <greeklexattribs.h>
+#include <swmodule.h>
+
+using std::string;
+
+SWORD_NAMESPACE_START
+
+GreekLexAttribs::GreekLexAttribs() {
+}
+
+
+char GreekLexAttribs::processText(SWBuf &text, const SWKey *key, const SWModule *module) {
+
+ if (module->isProcessEntryAttributes()) {
+ const char *from;
+ bool inAV = false;
+ string phrase;
+ string freq;
+ char val[128], *valto;
+ char wordstr[7];
+ const char *currentPhrase = 0;
+ const char *currentPhraseEnd = 0;
+ int number = 0;
+
+
+ for (from = text.c_str(); *from; from++) {
+ if (inAV) {
+ if (currentPhrase == 0) {
+ if (isalpha(*from))
+ currentPhrase = from;
+ }
+ else {
+ if ((!isalpha(*from)) && (*from != ' ') && (*from != '+') && (*from !='(') && (*from != ')') && (*from != '\'')) {
+ if (*from == '<') {
+ if (!currentPhraseEnd)
+ currentPhraseEnd = from - 1;
+ for (; *from && *from != '>'; from++) {
+ if (!strncmp(from, "value=\"", 7)) {
+ valto = val;
+ from += 7;
+ for (unsigned int i = 0; from[i] != '\"' && i < 127; i++)
+ *valto++ = from[i];
+ *valto = 0;
+ sprintf(wordstr, "%03d", number+1);
+ module->getEntryAttributes()["AVPhrase"][wordstr]["CompoundedWith"] = val;
+ from += strlen(val);
+ }
+ }
+ continue;
+ }
+
+ phrase = "";
+ phrase.append(currentPhrase, (int)(((currentPhraseEnd>currentPhrase)?currentPhraseEnd:from) - currentPhrase)-1);
+ currentPhrase = from;
+ while (*from && isdigit(*from)) from++;
+ freq = "";
+ freq.append(currentPhrase, (int)(from - currentPhrase));
+ if ((freq.length() > 0) && (phrase.length() > 0)) {
+ sprintf(wordstr, "%03d", ++number);
+ if ((strchr(phrase.c_str(), '(') > phrase.c_str()) && (strchr(phrase.c_str(), ')') > phrase.c_str() + 1)) {
+ string tmp = phrase.substr(0, phrase.find_first_of("("));
+ phrase.erase(phrase.find_first_of("("), 1);
+ phrase.erase(phrase.find_first_of(")"), 1);
+ phrase.erase(0,phrase.find_first_not_of("\r\n\v\t ")); phrase.erase(phrase.find_last_not_of("\r\n\v\t ")+1);
+ module->getEntryAttributes()["AVPhrase"][wordstr]["Alt"] = phrase.c_str();
+ phrase = tmp;
+ }
+ phrase.erase(0,phrase.find_first_not_of("\r\n\v\t ")); phrase.erase(phrase.find_last_not_of("\r\n\v\t ")+1);
+ freq.erase(0,freq.find_first_not_of("\r\n\v\t ")); freq.erase(freq.find_last_not_of("\r\n\v\t ")+1);
+ module->getEntryAttributes()["AVPhrase"][wordstr]["Phrase"] = phrase.c_str();
+ module->getEntryAttributes()["AVPhrase"][wordstr]["Frequency"] = freq.c_str();
+ currentPhrase = 0;
+ currentPhraseEnd = 0;
+ }
+ }
+ }
+ if (*from == ';') inAV = false;
+
+ }
+ else if (!strncmp(from, "AV-", 3)) {
+ inAV = true;
+ from+=2;
+ }
+ }
+ }
+ return 0;
+}
+
+
+SWORD_NAMESPACE_END
diff --git a/src/modules/filters/latin1utf16.cpp b/src/modules/filters/latin1utf16.cpp
new file mode 100644
index 0000000..1392750
--- /dev/null
+++ b/src/modules/filters/latin1utf16.cpp
@@ -0,0 +1,119 @@
+/******************************************************************************
+ *
+ * Latin1UTF16 - SWFilter descendant to convert a Latin-1 character to UTF-16
+ *
+ */
+
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <latin1utf16.h>
+#include <swbuf.h>
+
+SWORD_NAMESPACE_START
+
+Latin1UTF16::Latin1UTF16() {
+}
+
+
+char Latin1UTF16::processText(SWBuf &text, const SWKey *key, const SWModule *module) {
+ const unsigned char *from;
+ if ((unsigned long)key < 2) // hack, we're en(1)/de(0)ciphering
+ return (char)-1;
+
+
+ SWBuf orig = text;
+ from = (const unsigned char *)orig.c_str();
+
+ for (text = ""; *from; from++) {
+ text.setSize(text.size()+2);
+ switch (*from) {
+ case 0x80: // ''
+ *((unsigned short *)(text.getRawData()+(text.size()-2))) = (unsigned short) 0x20AC;
+ break;
+ case 0x82: // ''
+ *((unsigned short *)(text.getRawData()+(text.size()-2))) = (unsigned short) 0x201A;
+ break;
+ case 0x83: // ''
+ *((unsigned short *)(text.getRawData()+(text.size()-2))) = (unsigned short) 0x0192;
+ break;
+ case 0x84: // ''
+ *((unsigned short *)(text.getRawData()+(text.size()-2))) = (unsigned short) 0x201E;
+ break;
+ case 0x85: // ''
+ *((unsigned short *)(text.getRawData()+(text.size()-2))) = (unsigned short) 0x2026;
+ break;
+ case 0x86: // ''
+ *((unsigned short *)(text.getRawData()+(text.size()-2))) = (unsigned short) 0x2020;
+ break;
+ case 0x87: // ''
+ *((unsigned short *)(text.getRawData()+(text.size()-2))) = (unsigned short) 0x2021;
+ break;
+ case 0x88: // ''
+ *((unsigned short *)(text.getRawData()+(text.size()-2))) = (unsigned short) 0x02C6;
+ break;
+ case 0x89: // ''
+ *((unsigned short *)(text.getRawData()+(text.size()-2))) = (unsigned short) 0x2030;
+ break;
+ case 0x8A: // ''
+ *((unsigned short *)(text.getRawData()+(text.size()-2))) = (unsigned short) 0x0160;
+ break;
+ case 0x8B: // ''
+ *((unsigned short *)(text.getRawData()+(text.size()-2))) = (unsigned short) 0x2039;
+ break;
+ case 0x8C: // ''
+ *((unsigned short *)(text.getRawData()+(text.size()-2))) = (unsigned short) 0x0152;
+ break;
+ case 0x8E: // ''
+ *((unsigned short *)(text.getRawData()+(text.size()-2))) = (unsigned short) 0x017D;
+ break;
+ case 0x91: // ''
+ *((unsigned short *)(text.getRawData()+(text.size()-2))) = (unsigned short) 0x2018;
+ break;
+ case 0x92: // ''
+ *((unsigned short *)(text.getRawData()+(text.size()-2))) = (unsigned short) 0x2019;
+ break;
+ case 0x93: // ''
+ *((unsigned short *)(text.getRawData()+(text.size()-2))) = (unsigned short) 0x201C;
+ break;
+ case 0x94: // ''
+ *((unsigned short *)(text.getRawData()+(text.size()-2))) = (unsigned short) 0x201D;
+ break;
+ case 0x95: // ''
+ *((unsigned short *)(text.getRawData()+(text.size()-2))) = (unsigned short) 0x2022;
+ break;
+ case 0x96: // ''
+ *((unsigned short *)(text.getRawData()+(text.size()-2))) = (unsigned short) 0x2013;
+ break;
+ case 0x97: // ''
+ *((unsigned short *)(text.getRawData()+(text.size()-2))) = (unsigned short) 0x2014;
+ break;
+ case 0x98: // ''
+ *((unsigned short *)(text.getRawData()+(text.size()-2))) = (unsigned short) 0x02DC;
+ break;
+ case 0x99: // ''
+ *((unsigned short *)(text.getRawData()+(text.size()-2))) = (unsigned short) 0x2122;
+ break;
+ case 0x9A: // ''
+ *((unsigned short *)(text.getRawData()+(text.size()-2))) = (unsigned short) 0x0161;
+ break;
+ case 0x9B: // ''
+ *((unsigned short *)(text.getRawData()+(text.size()-2))) = (unsigned short) 0x203A;
+ break;
+ case 0x9C: // ''
+ *((unsigned short *)(text.getRawData()+(text.size()-2))) = (unsigned short) 0x0153;
+ break;
+ case 0x9E: // ''
+ *((unsigned short *)(text.getRawData()+(text.size()-2))) = (unsigned short) 0x017E;
+ break;
+ case 0x9F: // ''
+ *((unsigned short *)(text.getRawData()+(text.size()-2))) = (unsigned short) 0x0178;
+ break;
+ default:
+ *((unsigned short *)(text.getRawData()+(text.size()-2))) = (unsigned short) *from;
+ }
+ }
+ return 0;
+}
+
+SWORD_NAMESPACE_END
diff --git a/src/modules/filters/latin1utf8.cpp b/src/modules/filters/latin1utf8.cpp
new file mode 100644
index 0000000..6c0d7f1
--- /dev/null
+++ b/src/modules/filters/latin1utf8.cpp
@@ -0,0 +1,173 @@
+/******************************************************************************
+ *
+ * Latin1UTF8 - SWFilter descendant to convert a Latin-1 character to UTF-8
+ *
+ */
+
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <latin1utf8.h>
+#include <swmodule.h>
+
+SWORD_NAMESPACE_START
+
+Latin1UTF8::Latin1UTF8() {
+}
+
+
+char Latin1UTF8::processText(SWBuf &text, const SWKey *key, const SWModule *module)
+{
+ const unsigned char *from;
+
+ if ((unsigned long)key < 2) // hack, we're en(1)/de(0)ciphering
+ return (char)-1;
+
+ SWBuf orig = text;
+ from = (const unsigned char *)orig.c_str();
+
+ for (text = ""; *from; from++) {
+ if (*from < 0x80) {
+ text += *from;
+ }
+ else if (*from < 0xc0) {
+ switch(*from) {
+ case 0x80: // ''
+ text += 0xe2; // ''
+ text += 0x82; // ''
+ text += 0xac; // ''
+ break;
+ case 0x82: // ''
+ text += 0xe2; // ''
+ text += 0x80; // ''
+ text += 0x9a; // ''
+ break;
+ case 0x83: // ''
+ text += 0xc6; // ''
+ text += 0x92; // ''
+ break;
+ case 0x84: // ''
+ text += 0xe2; // ''
+ text += 0x80; // ''
+ text += 0x9e; // ''
+ break;
+ case 0x85: // ''
+ text += 0xe2; // ''
+ text += 0x80; // ''
+ text += 0xa6; // ''
+ break;
+ case 0x86: // ''
+ text += 0xe2; // ''
+ text += 0x80; // ''
+ text += 0xa0; // ''
+ break;
+ case 0x87: // ''
+ text += 0xe2; // ''
+ text += 0x80; // ''
+ text += 0xa1; // ''
+ break;
+ case 0x88: // ''
+ text += 0xcb; // ''
+ text += 0x86; // ''
+ break;
+ case 0x89: // ''
+ text += 0xe2; // ''
+ text += 0x80; // ''
+ text += 0xb0; // ''
+ break;
+ case 0x8A: // ''
+ text += 0xc5; // ''
+ text += 0xa0; // ''
+ break;
+ case 0x8B: // ''
+ text += 0xe2; // ''
+ text += 0x80; // ''
+ text += 0xb9; // ''
+ break;
+ case 0x8C: // ''
+ text += 0xc5; // ''
+ text += 0x92; // ''
+ break;
+ case 0x8E: // ''
+ text += 0xc5; // ''
+ text += 0xbd; // ''
+ break;
+ case 0x91: // ''
+ text += 0xe2; // ''
+ text += 0x80; // ''
+ text += 0x98; // ''
+ break;
+ case 0x92: // ''
+ text += 0xe2; // ''
+ text += 0x80; // ''
+ text += 0x99; // ''
+ break;
+ case 0x93: // ''
+ text += 0xe2; // ''
+ text += 0x80; // ''
+ text += 0x9c; // ''
+ break;
+ case 0x94: // ''
+ text += 0xe2; // ''
+ text += 0x80; // ''
+ text += 0x9d; // ''
+ break;
+ case 0x95: // ''
+ text += 0xe2; // ''
+ text += 0x80; // ''
+ text += 0xa2; // ''
+ break;
+ case 0x96: // ''
+ text += 0xe2; // ''
+ text += 0x80; // ''
+ text += 0x93; // ''
+ break;
+ case 0x97: // ''
+ text += 0xe2; // ''
+ text += 0x80; // ''
+ text += 0x94; // ''
+ break;
+ case 0x98: // ''
+ text += 0xcb; // ''
+ text += 0x9c; // ''
+ break;
+ case 0x99: // ''
+ text += 0xe2; // ''
+ text += 0x84; // ''
+ text += 0xa2; // ''
+ break;
+ case 0x9A: // ''
+ text += 0xc5; // ''
+ text += 0xa1; // ''
+ break;
+ case 0x9B: // ''
+ text += 0xe2; // ''
+ text += 0x80; // ''
+ text += 0xba; // ''
+ break;
+ case 0x9C: // ''
+ text += 0xc5; // ''
+ text += 0x93; // ''
+ break;
+ case 0x9E: // ''
+ text += 0xc5; // ''
+ text += 0xbe; // ''
+ break;
+ case 0x9F: // ''
+ text += 0xc5; // ''
+ text += 0xb8; // ''
+ break;
+ default:
+ text += 0xC2;
+ text += *from;
+ }
+ }
+ else {
+ text += 0xC3;
+ text += (*from - 0x40);
+ }
+ }
+ return 0;
+}
+
+SWORD_NAMESPACE_END
diff --git a/src/modules/filters/osisfootnotes.cpp b/src/modules/filters/osisfootnotes.cpp
new file mode 100644
index 0000000..89c9c40
--- /dev/null
+++ b/src/modules/filters/osisfootnotes.cpp
@@ -0,0 +1,157 @@
+/******************************************************************************
+ *
+ * osisfootnotes - SWFilter descendant to hide or show footnotes
+ * in an OSIS module.
+ */
+
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <osisfootnotes.h>
+#include <swmodule.h>
+#include <swbuf.h>
+#include <versekey.h>
+#include <utilxml.h>
+#include <utilstr.h>
+
+SWORD_NAMESPACE_START
+
+const char oName[] = "Footnotes";
+const char oTip[] = "Toggles Footnotes On and Off if they exist";
+
+const SWBuf choices[3] = {"Off", "On", ""};
+const StringList oValues(&choices[0], &choices[2]);
+
+
+OSISFootnotes::OSISFootnotes() : SWOptionFilter(oName, oTip, &oValues) {
+ setOptionValue("Off");
+}
+
+
+OSISFootnotes::~OSISFootnotes() {
+}
+
+
+char OSISFootnotes::processText(SWBuf &text, const SWKey *key, const SWModule *module) {
+ SWBuf token;
+ bool intoken = false;
+ bool hide = false;
+ SWBuf tagText;
+ XMLTag startTag;
+ SWBuf refs = "";
+ int footnoteNum = 1;
+ char buf[254];
+ VerseKey parser(key->getText());
+
+ SWBuf orig = text;
+ const char *from = orig.c_str();
+
+ XMLTag tag;
+ bool strongsMarkup = false;
+
+
+ for (text = ""; *from; ++from) {
+
+ // remove all newlines temporarily to fix kjv2003 module
+ if ((*from == 10) || (*from == 13)) {
+ if ((text.length()>1) && (text[text.length()-2] != ' ') && (*(from+1) != ' '))
+ text.append(' ');
+ continue;
+ }
+
+
+ if (*from == '<') {
+ intoken = true;
+ token = "";
+ continue;
+ }
+
+
+
+ if (*from == '>') { // process tokens
+ intoken = false;
+ if (!strncmp(token, "note", 4) || !strncmp(token.c_str(), "/note", 5)) {
+ tag = token;
+
+ if (!tag.isEndTag()) {
+ if (tag.getAttribute("type") && (!strcmp("x-strongsMarkup", tag.getAttribute("type"))
+ || !strcmp("strongsMarkup", tag.getAttribute("type"))) // deprecated
+ ) {
+ tag.setEmpty(false); // handle bug in KJV2003 module where some note open tags were <note ... />
+ strongsMarkup = true;
+ }
+
+ if (!tag.isEmpty()) {
+// if ((!tag.isEmpty()) || (SWBuf("strongsMarkup") == tag.getAttribute("type"))) {
+ refs = "";
+ startTag = tag;
+ hide = true;
+ tagText = "";
+ continue;
+ }
+ }
+ if (hide && tag.isEndTag()) {
+ if (module->isProcessEntryAttributes() && !strongsMarkup) { //don`t parse strongsMarkup to EntryAttributes as Footnote
+ sprintf(buf, "%i", footnoteNum++);
+ StringList attributes = startTag.getAttributeNames();
+ for (StringList::const_iterator it = attributes.begin(); it != attributes.end(); it++) {
+ module->getEntryAttributes()["Footnote"][buf][it->c_str()] = startTag.getAttribute(it->c_str());
+ }
+ module->getEntryAttributes()["Footnote"][buf]["body"] = tagText;
+ startTag.setAttribute("swordFootnote", buf);
+ if ((startTag.getAttribute("type")) && (!strcmp(startTag.getAttribute("type"), "crossReference"))) {
+ if (!refs.length())
+ refs = parser.ParseVerseList(tagText.c_str(), parser, true).getRangeText();
+ module->getEntryAttributes()["Footnote"][buf]["refList"] = refs.c_str();
+ }
+ }
+ hide = false;
+ if (option || (startTag.getAttribute("type") && !strcmp(startTag.getAttribute("type"), "crossReference"))) { // we want the tag in the text; crossReferences are handled by another filter
+ text.append(startTag);
+// text.append(tagText); // we don't put the body back in because it is retrievable from EntryAttributes["Footnotes"][]["body"].
+ }
+ else continue;
+ }
+ strongsMarkup = false;
+ }
+
+ // if not a heading token, keep token in text
+ //if ((!strcmp(tag.getName(), "reference")) && (!tag.isEndTag())) {
+ // SWBuf osisRef = tag.getAttribute("osisRef");
+ if (!strncmp(token, "reference", 9)) {
+ if (refs.length()) {
+ refs.append("; ");
+ }
+
+ const char* attr = strstr(token.c_str() + 9, "osisRef=\"");
+ const char* end = attr ? strchr(attr+9, '"') : 0;
+
+ if (attr && end) {
+ refs.append(attr+9, end-(attr+9));
+ }
+ }
+ if (!hide) {
+ text.append('<');
+ text.append(token);
+ text.append('>');
+ }
+ else {
+ tagText.append('<');
+ tagText.append(token);
+ tagText.append('>');
+ }
+ continue;
+ }
+ if (intoken) { //copy token
+ token.append(*from);
+ }
+ else if (!hide) { //copy text which is not inside a token
+ text.append(*from);
+ }
+ else tagText.append(*from);
+ }
+ return 0;
+}
+
+SWORD_NAMESPACE_END
+
diff --git a/src/modules/filters/osisheadings.cpp b/src/modules/filters/osisheadings.cpp
new file mode 100644
index 0000000..a072335
--- /dev/null
+++ b/src/modules/filters/osisheadings.cpp
@@ -0,0 +1,144 @@
+/******************************************************************************
+ *
+ *osisheadings - SWFilter descendant to hide or show headings
+ * in an OSIS module.
+ */
+
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <osisheadings.h>
+#include <swmodule.h>
+#include <utilxml.h>
+#include <utilstr.h>
+
+SWORD_NAMESPACE_START
+
+const char oName[] = "Headings";
+const char oTip[] = "Toggles Headings On and Off if they exist";
+
+const SWBuf choices[3] = {"Off", "On", ""};
+const StringList oValues(&choices[0], &choices[2]);
+
+OSISHeadings::OSISHeadings() : SWOptionFilter(oName, oTip, &oValues) {
+ setOptionValue("Off");
+}
+
+
+OSISHeadings::~OSISHeadings() {
+}
+
+
+char OSISHeadings::processText(SWBuf &text, const SWKey *key, const SWModule *module) {
+ SWBuf token;
+ bool intoken = false;
+ bool hide = false;
+ bool preverse = false;
+ bool withinTitle = false;
+ bool canonical = false;
+ SWBuf header;
+ int headerNum = 0;
+ int pvHeaderNum = 0;
+ char buf[254];
+ XMLTag startTag;
+
+ SWBuf orig = text;
+ const char *from = orig.c_str();
+
+ XMLTag tag;
+
+ for (text = ""; *from; ++from) {
+ if (*from == '<') {
+ intoken = true;
+ token = "";
+
+ continue;
+ }
+ if (*from == '>') { // process tokens
+ intoken = false;
+
+ if (!strncmp(token.c_str(), "title", 5) || !strncmp(token.c_str(), "/title", 6)) {
+ withinTitle = (!strnicmp(token.c_str(), "title", 5));
+ tag = token;
+
+ if (!tag.isEndTag()) { //start tag
+ if (!tag.isEmpty()) {
+ startTag = tag;
+ }
+ }
+
+ if ( (tag.getAttribute("subType") && !stricmp(tag.getAttribute("subType"), "x-preverse"))
+ || (tag.getAttribute("subtype") && !stricmp(tag.getAttribute("subtype"), "x-preverse")) // deprecated
+ ) {
+ hide = true;
+ preverse = true;
+ header = "";
+ canonical = (tag.getAttribute("canonical") && (!stricmp(tag.getAttribute("canonical"), "true")));
+ continue;
+ }
+ if (!tag.isEndTag()) { //start tag
+ hide = true;
+ header = "";
+ if (option || canonical) { // we want the tag in the text
+ text.append('<');
+ text.append(token);
+ text.append('>');
+ }
+ continue;
+ }
+ if (hide && tag.isEndTag()) {
+ if (module->isProcessEntryAttributes() && ((option || canonical) || (!preverse))) {
+ if (preverse) {
+ sprintf(buf, "%i", pvHeaderNum++);
+ module->getEntryAttributes()["Heading"]["Preverse"][buf] = header;
+ }
+ else {
+ sprintf(buf, "%i", headerNum++);
+ module->getEntryAttributes()["Heading"]["Interverse"][buf] = header;
+ if (option || canonical) { // we want the tag in the text
+ text.append(header);
+ }
+ }
+
+ StringList attributes = startTag.getAttributeNames();
+ for (StringList::const_iterator it = attributes.begin(); it != attributes.end(); it++) {
+ module->getEntryAttributes()["Heading"][buf][it->c_str()] = startTag.getAttribute(it->c_str());
+ }
+ }
+
+ hide = false;
+ if (!(option || canonical) || preverse) { // we don't want the tag in the text anymore
+ preverse = false;
+ continue;
+ }
+ preverse = false;
+ }
+ }
+
+ if (withinTitle) {
+ header.append('<');
+ header.append(token);
+ header.append('>');
+ } else {
+ // if not a heading token, keep token in text
+ if (!hide) {
+ text.append('<');
+ text.append(token);
+ text.append('>');
+ }
+ }
+ continue;
+ }
+ if (intoken) { //copy token
+ token.append(*from);
+ }
+ else if (!hide) { //copy text which is not inside a token
+ text.append(*from);
+ }
+ else header.append(*from);
+ }
+ return 0;
+}
+
+SWORD_NAMESPACE_END
+
diff --git a/src/modules/filters/osishtmlhref.cpp b/src/modules/filters/osishtmlhref.cpp
new file mode 100644
index 0000000..fe3e058
--- /dev/null
+++ b/src/modules/filters/osishtmlhref.cpp
@@ -0,0 +1,561 @@
+/***************************************************************************
+ osishtmlhref.cpp - OSIS to HTML with hrefs filter
+ -------------------
+ begin : 2003-06-24
+ copyright : 2003 by CrossWire Bible Society
+ ***************************************************************************/
+
+/***************************************************************************
+ * *
+ * This program is free software; you can redistribute it and/or modify *
+ * it under the terms of the GNU General Public License as published by *
+ * the Free Software Foundation version 2 of the License. *
+ * *
+ ***************************************************************************/
+
+#include <stdlib.h>
+#include <ctype.h>
+#include <osishtmlhref.h>
+#include <utilxml.h>
+#include <utilstr.h>
+#include <versekey.h>
+#include <swmodule.h>
+#include <url.h>
+#include <stringmgr.h>
+#include <stack>
+
+SWORD_NAMESPACE_START
+
+class OSISHTMLHREF::QuoteStack : public std::stack<char *> {
+};
+
+OSISHTMLHREF::MyUserData::MyUserData(const SWModule *module, const SWKey *key) : BasicFilterUserData(module, key) {
+ inBold = false;
+ inXRefNote = false;
+ suspendLevel = 0;
+ quoteStack = new QuoteStack();
+ wordsOfChristStart = "<font color=\"red\"> ";
+ wordsOfChristEnd = "</font> ";
+ if (module) {
+ osisQToTick = ((!module->getConfigEntry("OSISqToTick")) || (strcmp(module->getConfigEntry("OSISqToTick"), "false")));
+ version = module->Name();
+ BiblicalText = (!strcmp(module->Type(), "Biblical Texts"));
+ }
+ else {
+ osisQToTick = true; // default
+ version = "";
+ }
+}
+
+OSISHTMLHREF::MyUserData::~MyUserData() {
+ // Just in case the quotes are not well formed
+ while (!quoteStack->empty()) {
+ char *tagData = quoteStack->top();
+ quoteStack->pop();
+ delete [] tagData;
+ }
+ delete quoteStack;
+}
+
+OSISHTMLHREF::OSISHTMLHREF() {
+ setTokenStart("<");
+ setTokenEnd(">");
+
+ setEscapeStart("&");
+ setEscapeEnd(";");
+
+ setEscapeStringCaseSensitive(true);
+ setPassThruNumericEscapeString(true);
+
+ addAllowedEscapeString("quot");
+ addAllowedEscapeString("apos");
+ addAllowedEscapeString("amp");
+ addAllowedEscapeString("lt");
+ addAllowedEscapeString("gt");
+
+ setTokenCaseSensitive(true);
+
+ addTokenSubstitute("lg", "<br />");
+ addTokenSubstitute("/lg", "<br />");
+
+ morphFirst = false;
+}
+
+// though this might be slightly slower, possibly causing an extra bool check, this is a renderFilter
+// so speed isn't the absolute highest priority, and this is a very minor possible hit
+static inline void outText(const char *t, SWBuf &o, BasicFilterUserData *u) { if (!u->suspendTextPassThru) o += t; else u->lastSuspendSegment += t; }
+static inline void outText(char t, SWBuf &o, BasicFilterUserData *u) { if (!u->suspendTextPassThru) o += t; else u->lastSuspendSegment += t; }
+
+void processLemma(bool suspendTextPassThru, XMLTag &tag, SWBuf &buf) {
+ const char *attrib;
+ const char *val;
+ if ((attrib = tag.getAttribute("lemma"))) {
+ int count = tag.getAttributePartCount("lemma", ' ');
+ int i = (count > 1) ? 0 : -1; // -1 for whole value cuz it's faster, but does the same thing as 0
+ do {
+ attrib = tag.getAttribute("lemma", i, ' ');
+ if (i < 0) i = 0; // to handle our -1 condition
+ val = strchr(attrib, ':');
+ val = (val) ? (val + 1) : attrib;
+ SWBuf gh;
+ if(*val == 'G')
+ gh = "Greek";
+ if(*val == 'H')
+ gh = "Hebrew";
+ const char *val2 = val;
+ if ((strchr("GH", *val)) && (isdigit(val[1])))
+ val2++;
+ //if ((!strcmp(val2, "3588")) && (lastText.length() < 1))
+ // show = false;
+ //else {
+ if (!suspendTextPassThru) {
+ buf.appendFormatted("<small><em>&lt;<a href=\"passagestudy.jsp?action=showStrongs&type=%s&value=%s\">%s</a>&gt;</em></small>",
+ (gh.length()) ? gh.c_str() : "",
+ URL::encode(val2).c_str(),
+ val2);
+ }
+ //}
+
+ } while (++i < count);
+ }
+}
+
+void processMorph(bool suspendTextPassThru, XMLTag &tag, SWBuf &buf) {
+ const char * attrib;
+ const char *val;
+ if ((attrib = tag.getAttribute("morph"))) { // && (show)) {
+ SWBuf savelemma = tag.getAttribute("savlm");
+ //if ((strstr(savelemma.c_str(), "3588")) && (lastText.length() < 1))
+ // show = false;
+ //if (show) {
+ int count = tag.getAttributePartCount("morph", ' ');
+ int i = (count > 1) ? 0 : -1; // -1 for whole value cuz it's faster, but does the same thing as 0
+ do {
+ attrib = tag.getAttribute("morph", i, ' ');
+ if (i < 0) i = 0; // to handle our -1 condition
+ val = strchr(attrib, ':');
+ val = (val) ? (val + 1) : attrib;
+ const char *val2 = val;
+ if ((*val == 'T') && (strchr("GH", val[1])) && (isdigit(val[2])))
+ val2+=2;
+ if (!suspendTextPassThru) {
+ buf.appendFormatted("<small><em>(<a href=\"passagestudy.jsp?action=showMorph&type=%s&value=%s\">%s</a>)</em></small>",
+ URL::encode(tag.getAttribute("morph")).c_str(),
+ URL::encode(val).c_str(),
+ val2);
+ }
+ } while (++i < count);
+ //}
+ }
+}
+
+bool OSISHTMLHREF::handleToken(SWBuf &buf, const char *token, BasicFilterUserData *userData) {
+ MyUserData *u = (MyUserData *)userData;
+ SWBuf scratch;
+ bool sub = (u->suspendTextPassThru) ? substituteToken(scratch, token) : substituteToken(buf, token);
+ if (!sub) {
+ // manually process if it wasn't a simple substitution
+ XMLTag tag(token);
+
+ // <w> tag
+ if (!strcmp(tag.getName(), "w")) {
+
+ // start <w> tag
+ if ((!tag.isEmpty()) && (!tag.isEndTag())) {
+ u->w = token;
+ }
+
+ // end or empty <w> tag
+ else {
+ bool endTag = tag.isEndTag();
+ SWBuf lastText;
+ //bool show = true; // to handle unplaced article in kjv2003-- temporary till combined
+
+ if (endTag) {
+ tag = u->w.c_str();
+ lastText = u->lastTextNode.c_str();
+ }
+ else lastText = "stuff";
+
+ const char *attrib;
+ const char *val;
+ if ((attrib = tag.getAttribute("xlit"))) {
+ val = strchr(attrib, ':');
+ val = (val) ? (val + 1) : attrib;
+ outText(" ", buf, u);
+ outText(val, buf, u);
+ }
+ if ((attrib = tag.getAttribute("gloss"))) {
+ val = strchr(attrib, ':');
+ val = (val) ? (val + 1) : attrib;
+ outText(" ", buf, u);
+ outText(val, buf, u);
+ }
+ if (!morphFirst) {
+ processLemma(u->suspendTextPassThru, tag, buf);
+ processMorph(u->suspendTextPassThru, tag, buf);
+ }
+ else {
+ processMorph(u->suspendTextPassThru, tag, buf);
+ processLemma(u->suspendTextPassThru, tag, buf);
+ }
+ if ((attrib = tag.getAttribute("POS"))) {
+ val = strchr(attrib, ':');
+ val = (val) ? (val + 1) : attrib;
+ outText(" ", buf, u);
+ outText(val, buf, u);
+ }
+
+ /*if (endTag)
+ buf += "}";*/
+ }
+ }
+
+ // <note> tag
+ else if (!strcmp(tag.getName(), "note")) {
+ if (!tag.isEndTag()) {
+ SWBuf type = tag.getAttribute("type");
+ bool strongsMarkup = (type == "x-strongsMarkup" || type == "strongsMarkup"); // the latter is deprecated
+ if (strongsMarkup) {
+ tag.setEmpty(false); // handle bug in KJV2003 module where some note open tags were <note ... />
+ }
+
+ if (!tag.isEmpty()) {
+
+ if (!strongsMarkup) { // leave strong's markup notes out, in the future we'll probably have different option filters to turn different note types on or off
+ SWBuf footnoteNumber = tag.getAttribute("swordFootnote");
+ VerseKey *vkey = NULL;
+ char ch = ((tag.getAttribute("type") && ((!strcmp(tag.getAttribute("type"), "crossReference")) || (!strcmp(tag.getAttribute("type"), "x-cross-ref")))) ? 'x':'n');
+
+ u->inXRefNote = true; // Why this change? Ben Morgan: Any note can have references in, so we need to set this to true for all notes
+// u->inXRefNote = (ch == 'x');
+
+ // see if we have a VerseKey * or descendant
+ SWTRY {
+ vkey = SWDYNAMIC_CAST(VerseKey, u->key);
+ }
+ SWCATCH ( ... ) { }
+ if (vkey) {
+ //printf("URL = %s\n",URL::encode(vkey->getText()).c_str());
+ buf.appendFormatted("<a href=\"passagestudy.jsp?action=showNote&type=%c&value=%s&module=%s&passage=%s\"><small><sup>*%c</sup></small></a>",
+ ch,
+ URL::encode(footnoteNumber.c_str()).c_str(),
+ URL::encode(u->version.c_str()).c_str(),
+ URL::encode(vkey->getText()).c_str(),
+ ch);
+ }
+ else {
+ buf.appendFormatted("<a href=\"passagestudy.jsp?action=showNote&type=%c&value=%s&module=%s&passage=%s\"><small><sup>*%c</sup></small></a>",
+ ch,
+ URL::encode(footnoteNumber.c_str()).c_str(),
+ URL::encode(u->version.c_str()).c_str(),
+ URL::encode(u->key->getText()).c_str(),
+ ch);
+ }
+ }
+ }
+ u->suspendTextPassThru = (++u->suspendLevel);
+ }
+ if (tag.isEndTag()) {
+ u->suspendTextPassThru = (--u->suspendLevel);
+ u->inXRefNote = false;
+ u->lastSuspendSegment = ""; // fix/work-around for nasb devineName in note bug
+ }
+ }
+
+ // <p> paragraph tag
+ else if (!strcmp(tag.getName(), "p")) {
+ if ((!tag.isEndTag()) && (!tag.isEmpty())) { // non-empty start tag
+ outText("<!P><br />", buf, u);
+ }
+ else if (tag.isEndTag()) { // end tag
+ outText("<!/P><br />", buf, u);
+ userData->supressAdjacentWhitespace = true;
+ }
+ else { // empty paragraph break marker
+ outText("<!P><br />", buf, u);
+ userData->supressAdjacentWhitespace = true;
+ }
+ }
+
+ // <reference> tag
+ else if (!strcmp(tag.getName(), "reference")) {
+ if (!u->inXRefNote) { // only show these if we're not in an xref note
+ if ((!tag.isEndTag()) && (!tag.isEmpty())) {
+ u->suspendTextPassThru = (++u->suspendLevel);
+ }
+ if (tag.isEndTag()) {
+ if (!u->BiblicalText) {
+ SWBuf refList = tag.getAttribute("passage");
+ if (!refList.length())
+ refList = u->lastTextNode;
+ SWBuf version = tag.getAttribute("version");
+
+ buf.appendFormatted("&nbsp;<a href=\"passagestudy.jsp?action=showRef&type=scripRef&value=%s&module=%s\">",
+ (refList.length()) ? URL::encode(refList.c_str()).c_str() : "",
+ (version.length()) ? URL::encode(version.c_str()).c_str() : "");
+ buf += u->lastTextNode.c_str();
+ buf += "</a>&nbsp;";
+ }
+ else {
+ SWBuf footnoteNumber = tag.getAttribute("swordFootnote");
+ VerseKey *vkey = NULL;
+ // see if we have a VerseKey * or descendant
+ SWTRY {
+ vkey = SWDYNAMIC_CAST(VerseKey, u->key);
+ }
+ SWCATCH ( ... ) {}
+ if (vkey) {
+ // leave this special osis type in for crossReference notes types? Might thml use this some day? Doesn't hurt.
+ //buf.appendFormatted("<a href=\"noteID=%s.x.%s\"><small><sup>*x</sup></small></a> ", vkey->getText(), footnoteNumber.c_str());
+ buf.appendFormatted("<a href=\"passagestudy.jsp?action=showNote&type=x&value=%s&module=%s&passage=%s\"><small><sup>*x</sup></small></a>",
+ URL::encode(footnoteNumber.c_str()).c_str(),
+ URL::encode(u->version.c_str()).c_str(),
+ URL::encode(vkey->getText()).c_str());
+
+ }
+ }
+ u->suspendTextPassThru = (--u->suspendLevel);
+ }
+ }/*
+ if (tag.isEndTag()) {
+ u->suspendTextPassThru = false;
+ }*/
+ }
+
+ // <l> poetry, etc
+ else if (!strcmp(tag.getName(), "l")) {
+ // end line marker
+ if (tag.getAttribute("eID")) {
+ outText("<br />", buf, u);
+ }
+ // <l/> without eID or sID
+ // Note: this is improper osis. This should be <lb/>
+ else if (tag.isEmpty() && !tag.getAttribute("sID")) {
+ outText("<br />", buf, u);
+ }
+ // end of the line
+ else if (tag.isEndTag()) {
+ outText("<br />", buf, u);
+ }
+ }
+
+ // <lb.../>
+ else if (!strcmp(tag.getName(), "lb")) {
+ outText("<br />", buf, u);
+ userData->supressAdjacentWhitespace = true;
+ }
+ // <milestone type="line"/>
+ // <milestone type="x-p"/>
+ // <milestone type="cQuote" marker="x"/>
+ else if ((!strcmp(tag.getName(), "milestone")) && (tag.getAttribute("type"))) {
+ if(!strcmp(tag.getAttribute("type"), "line")) {
+ outText("<br />", buf, u);
+ userData->supressAdjacentWhitespace = true;
+ }
+ else if(!strcmp(tag.getAttribute("type"),"x-p")) {
+ if( tag.getAttribute("marker"))
+ outText(tag.getAttribute("marker"), buf, u);
+ else outText("<!p>", buf, u);
+ }
+ else if (!strcmp(tag.getAttribute("type"), "cQuote")) {
+ const char *tmp = tag.getAttribute("marker");
+ bool hasMark = tmp;
+ SWBuf mark = tmp;
+ tmp = tag.getAttribute("level");
+ int level = (tmp) ? atoi(tmp) : 1;
+
+ // first check to see if we've been given an explicit mark
+ if (hasMark)
+ outText(mark, buf, u);
+ // finally, alternate " and ', if config says we should supply a mark
+ else if (u->osisQToTick)
+ outText((level % 2) ? '\"' : '\'', buf, u);
+ }
+ }
+
+ // <title>
+ else if (!strcmp(tag.getName(), "title")) {
+ if ((!tag.isEndTag()) && (!tag.isEmpty())) {
+ outText("<b>", buf, u);
+ }
+ else if (tag.isEndTag()) {
+ outText("</b><br />", buf, u);
+ }
+ }
+
+ // <catchWord> & <rdg> tags (italicize)
+ else if (!strcmp(tag.getName(), "rdg") || !strcmp(tag.getName(), "catchWord")) {
+ if ((!tag.isEndTag()) && (!tag.isEmpty())) {
+ outText("<i>", buf, u);
+ }
+ else if (tag.isEndTag()) {
+ outText("</i>", buf, u);
+ }
+ }
+
+ // divineName
+ else if (!strcmp(tag.getName(), "divineName")) {
+ if ((!tag.isEndTag()) && (!tag.isEmpty())) {
+ u->suspendTextPassThru = (++u->suspendLevel);
+ }
+ else if (tag.isEndTag()) {
+ SWBuf lastText = u->lastSuspendSegment.c_str();
+ u->suspendTextPassThru = (--u->suspendLevel);
+ if (lastText.size()) {
+ toupperstr(lastText);
+ scratch.setFormatted("%c<font size=\"-1\">%s</font>", lastText[0], lastText.c_str()+1);
+ outText(scratch.c_str(), buf, u);
+ }
+ }
+ }
+
+ // <hi> text highlighting
+ else if (!strcmp(tag.getName(), "hi")) {
+ SWBuf type = tag.getAttribute("type");
+ if ((!tag.isEndTag()) && (!tag.isEmpty())) {
+ if (type == "b" || type == "x-b") {
+ outText("<b>", buf, u);
+ u->inBold = true;
+ }
+ else { // all other types
+ outText("<i>", buf, u);
+ u->inBold = false;
+ }
+ }
+ else if (tag.isEndTag()) {
+ if(u->inBold) {
+ outText("</b>", buf, u);
+ u->inBold = false;
+ }
+ else outText("</i>", buf, u);
+ }
+ }
+
+ // <q> quote
+ // Rules for a quote element:
+ // If the tag is empty with an sID or an eID then use whatever it specifies for quoting.
+ // Note: empty elements without sID or eID are ignored.
+ // If the tag is <q> then use it's specifications and push it onto a stack for </q>
+ // If the tag is </q> then use the pushed <q> for specification
+ // If there is a marker attribute, possibly empty, this overrides osisQToTick.
+ // If osisQToTick, then output the marker, using level to determine the type of mark.
+ else if (!strcmp(tag.getName(), "q")) {
+ SWBuf type = tag.getAttribute("type");
+ SWBuf who = tag.getAttribute("who");
+ const char *tmp = tag.getAttribute("level");
+ int level = (tmp) ? atoi(tmp) : 1;
+ tmp = tag.getAttribute("marker");
+ bool hasMark = tmp;
+ SWBuf mark = tmp;
+
+ // open <q> or <q sID... />
+ if ((!tag.isEmpty() && !tag.isEndTag()) || (tag.isEmpty() && tag.getAttribute("sID"))) {
+ // if <q> then remember it for the </q>
+ if (!tag.isEmpty()) {
+ char *tagData = 0;
+ stdstr(&tagData, tag.toString());
+ u->quoteStack->push(tagData);
+ }
+
+ // Do this first so quote marks are included as WoC
+ if (who == "Jesus")
+ outText(u->wordsOfChristStart, buf, u);
+
+ // first check to see if we've been given an explicit mark
+ if (hasMark)
+ outText(mark, buf, u);
+ //alternate " and '
+ else if (u->osisQToTick)
+ outText((level % 2) ? '\"' : '\'', buf, u);
+ }
+ // close </q> or <q eID... />
+ else if ((tag.isEndTag()) || (tag.isEmpty() && tag.getAttribute("eID"))) {
+ // if it is </q> then pop the stack for the attributes
+ if (tag.isEndTag() && !u->quoteStack->empty()) {
+ char *tagData = u->quoteStack->top();
+ u->quoteStack->pop();
+ XMLTag qTag(tagData);
+ delete [] tagData;
+
+ type = qTag.getAttribute("type");
+ who = qTag.getAttribute("who");
+ tmp = qTag.getAttribute("level");
+ level = (tmp) ? atoi(tmp) : 1;
+ tmp = qTag.getAttribute("marker");
+ hasMark = tmp;
+ mark = tmp;
+ }
+
+ // first check to see if we've been given an explicit mark
+ if (hasMark)
+ outText(mark, buf, u);
+ // finally, alternate " and ', if config says we should supply a mark
+ else if (u->osisQToTick)
+ outText((level % 2) ? '\"' : '\'', buf, u);
+
+ // Do this last so quote marks are included as WoC
+ if (who == "Jesus")
+ outText(u->wordsOfChristEnd, buf, u);
+ }
+ }
+
+ // <transChange>
+ else if (!strcmp(tag.getName(), "transChange")) {
+ if ((!tag.isEndTag()) && (!tag.isEmpty())) {
+ SWBuf type = tag.getAttribute("type");
+ u->lastTransChange = type;
+
+ // just do all transChange tags this way for now
+ if ((type == "added") || (type == "supplied"))
+ outText("<i>", buf, u);
+ else if (type == "tenseChange")
+ buf += "*";
+ }
+ else if (tag.isEndTag()) {
+ SWBuf type = u->lastTransChange;
+ if ((type == "added") || (type == "supplied"))
+ outText("</i>", buf, u);
+ }
+ else { // empty transChange marker?
+ }
+ }
+
+ // image
+ else if (!strcmp(tag.getName(), "figure")) {
+ const char *src = tag.getAttribute("src");
+ if (!src) // assert we have a src attribute
+ return false;
+
+ SWBuf filepath;
+ if (userData->module) {
+ filepath = userData->module->getConfigEntry("AbsoluteDataPath");
+ if ((filepath.size()) && (filepath[filepath.size()-1] != '/') && (src[0] != '/'))
+ filepath += '/';
+ }
+ filepath += src;
+
+ // images become clickable, if the UI supports showImage.
+ outText("<a href=\"passagestudy.jsp?action=showImage&value=", buf, u);
+ outText(URL::encode(filepath.c_str()).c_str(), buf, u);
+ outText("&module=", buf, u);
+ outText(URL::encode(u->version.c_str()).c_str(), buf, u);
+ outText("\">", buf, u);
+
+// we do this because BibleCS looks for this EXACT format for an image tag
+ outText("<image border=0 src=\"", buf, u);
+ outText(filepath, buf, u);
+ outText("\" />", buf, u);
+
+ outText("</a>", buf, u);
+ }
+
+ else {
+ return false; // we still didn't handle token
+ }
+ }
+ return true;
+}
+
+
+SWORD_NAMESPACE_END
diff --git a/src/modules/filters/osislemma.cpp b/src/modules/filters/osislemma.cpp
new file mode 100644
index 0000000..f5e6ff6
--- /dev/null
+++ b/src/modules/filters/osislemma.cpp
@@ -0,0 +1,85 @@
+/******************************************************************************
+ *
+ * osislemma - SWFilter descendant to hide or show lemmata
+ * in a OSIS module.
+ */
+
+
+#include <stdlib.h>
+#include <osislemma.h>
+#include <utilxml.h>
+
+SWORD_NAMESPACE_START
+
+const char oName[] = "Lemmas";
+const char oTip[] = "Toggles Lemmas On and Off if they exist";
+
+const SWBuf choices[3] = {"Off", "On", ""};
+const StringList oValues(&choices[0], &choices[2]);
+
+OSISLemma::OSISLemma() : SWOptionFilter(oName, oTip, &oValues) {
+ setOptionValue("Off");
+}
+
+
+OSISLemma::~OSISLemma() {
+}
+
+
+char OSISLemma::processText(SWBuf &text, const SWKey *key, const SWModule *module) {
+ SWBuf token;
+ bool intoken = false;
+ bool lastspace = false;
+
+ const SWBuf orig = text;
+ const char * from = orig.c_str();
+
+ if (!option) {
+ for (text = ""; *from; ++from) {
+ if (*from == '<') {
+ intoken = true;
+ token = "";
+ continue;
+ }
+ if (*from == '>') { // process tokens
+ intoken = false;
+ if (token.startsWith("w ")) { // Word
+ XMLTag wtag(token);
+ int count = wtag.getAttributePartCount("lemma", ' ');
+ for (int i = 0; i < count; i++) {
+ SWBuf a = wtag.getAttribute("lemma", i, ' ');
+ const char *prefix = a.stripPrefix(':');
+ if ((!prefix) || ((SWBuf)prefix).startsWith("lemma.")) {
+ // remove attribute part
+ wtag.setAttribute("lemma", 0, i, ' ');
+ i--;
+ count--;
+ }
+ }
+ token = wtag;
+ token.trim();
+ // drop <>
+ token << 1;
+ token--;
+ }
+
+ // keep token in text
+ text.append('<');
+ text.append(token);
+ text.append('>');
+
+ continue;
+ }
+ if (intoken) {
+ token += *from;
+ }
+ else {
+ text.append(*from);
+ lastspace = (*from == ' ');
+ }
+ }
+ }
+ return 0;
+}
+
+SWORD_NAMESPACE_END
diff --git a/src/modules/filters/osismorph.cpp b/src/modules/filters/osismorph.cpp
new file mode 100644
index 0000000..69d44d5
--- /dev/null
+++ b/src/modules/filters/osismorph.cpp
@@ -0,0 +1,85 @@
+/******************************************************************************
+ *
+ * osismorph - SWFilter descendant to hide or show morph tags
+ * in a OSIS module.
+ */
+
+
+#include <stdlib.h>
+#include <osismorph.h>
+
+SWORD_NAMESPACE_START
+
+const char oName[] = "Morphological Tags";
+const char oTip[] = "Toggles Morphological Tags On and Off if they exist";
+
+const SWBuf choices[3] = {"Off", "On", ""};
+const StringList oValues(&choices[0], &choices[2]);
+
+OSISMorph::OSISMorph() : SWOptionFilter(oName, oTip, &oValues) {
+ setOptionValue("Off");
+}
+
+
+OSISMorph::~OSISMorph() {
+}
+
+
+char OSISMorph::processText(SWBuf &text, const SWKey *key, const SWModule *module) {
+ if (!option) { // if we don't want morph tags
+ char token[2048]; // cheese. Fix.
+ int tokpos = 0;
+ bool intoken = false;
+ bool lastspace = false;
+ SWBuf orig = text;
+ const char *from = orig.c_str();
+
+ //taken out of the loop for speed
+ const char* start = 0;
+ const char* end = 0;
+
+ for (text = ""; *from; ++from) {
+ if (*from == '<') {
+ intoken = true;
+ tokpos = 0;
+ token[0] = 0;
+ continue;
+ }
+ if (*from == '>') { // process tokens
+ intoken = false;
+
+ if ((*token == 'w') && (token[1] == ' ')) {
+ start = strstr(token+2, "morph=\""); //we leave out "w " at the start
+ end = start ? strchr(start+7, '"') : 0; //search the end of the morph value
+
+ if (start && end) { //start and end of the morph tag found
+ text.append('<');
+ text.append(token, start-token); //the text before the morph attr
+ text.append(end+1); //text after the morph attr
+ text.append('>');
+
+ continue; //next loop
+ }
+ }
+
+ text.append('<');
+ text.append(token);
+ text.append('>');
+
+ continue;
+ }
+ if (intoken) {
+ if (tokpos < 2045)
+ token[tokpos++] = *from;
+ token[tokpos] = 0;
+ }
+ else {
+ text.append(*from);
+ lastspace = (*from == ' ');
+ }
+ }
+ }
+ return 0;
+}
+
+SWORD_NAMESPACE_END
diff --git a/src/modules/filters/osismorphsegmentation.cpp b/src/modules/filters/osismorphsegmentation.cpp
new file mode 100644
index 0000000..bf32581
--- /dev/null
+++ b/src/modules/filters/osismorphsegmentation.cpp
@@ -0,0 +1,106 @@
+/******************************************************************************
+ *
+ * osismorphsegmentation - SWFilter descendant to toggle splitting of morphemes
+ * (for morpheme segmented Hebrew in the WLC)
+ */
+
+
+#include <osismorphsegmentation.h>
+#include <stdlib.h>
+#include <utilxml.h>
+#include <swmodule.h>
+#include <swbuf.h>
+
+SWORD_NAMESPACE_START
+
+const char oName[] = "Morpheme Segmentation";
+const char oTip[] = "Toggles Morpheme Segmentation On and Off, when present";
+
+const SWBuf choices[3] = {"Off", "On", ""};
+const StringList oValues(&choices[0], &choices[2]);
+
+OSISMorphSegmentation::OSISMorphSegmentation() : SWOptionFilter(oName, oTip, &oValues) {
+ setOptionValue("Off");
+}
+
+
+OSISMorphSegmentation::~OSISMorphSegmentation() {}
+
+
+char OSISMorphSegmentation::processText(SWBuf &text, const SWKey * /*key*/, const SWModule *module) {
+ SWBuf token;
+ bool intoken = false;
+ bool hide = false;
+
+ SWBuf orig( text );
+ const char *from = orig.c_str();
+
+ XMLTag tag;
+ SWBuf tagText = "";
+ unsigned int morphemeNum = 0;
+ bool inMorpheme = false;
+ SWBuf buf;
+
+ for (text = ""; *from; ++from) {
+ if (*from == '<') {
+ intoken = true;
+ token = "";
+ continue;
+ }
+
+ if (*from == '>') { // process tokens
+ intoken = false;
+
+ if (!strncmp(token.c_str(), "seg ", 4) || !strncmp(token.c_str(), "/seg", 4)) {
+ tag = token;
+
+ if (!tag.isEndTag() && tag.getAttribute("type") && !strcmp("morph", tag.getAttribute("type"))) { //<seg type="morph"> start tag
+ hide = !option; //only hide if option is Off
+ tagText = "";
+ inMorpheme = true;
+ }
+
+ if (tag.isEndTag()) {
+ buf.setFormatted("%.3d", morphemeNum++);
+ module->getEntryAttributes()["Morpheme"][buf]["body"] = tagText;
+ inMorpheme = false;
+ }
+ if (hide) { //hides start and end tags as long as hide is set
+
+ if (tag.isEndTag()) { //</seg>
+ hide = false;
+ }
+
+ continue; //leave out the current token
+ }
+ } //end of seg tag handling
+
+ text.append('<');
+ text.append(token);
+ text.append('>');
+
+ if (inMorpheme) {
+ tagText.append('<');
+ tagText.append(token);
+ tagText.append('>');
+ }
+
+ hide = false;
+
+ continue;
+ } //end of intoken part
+
+ if (intoken) { //copy token
+ token.append(*from);
+ }
+ else { //copy text which is not inside of a tag
+ text.append(*from);
+ if (inMorpheme) {
+ tagText.append(*from);
+ }
+ }
+ }
+ return 0;
+}
+
+SWORD_NAMESPACE_END
diff --git a/src/modules/filters/osisosis.cpp b/src/modules/filters/osisosis.cpp
new file mode 100644
index 0000000..7da6089
--- /dev/null
+++ b/src/modules/filters/osisosis.cpp
@@ -0,0 +1,173 @@
+/***************************************************************************
+ osisosis.cpp - internal OSIS to public OSIS filter
+ -------------------
+ begin : 2004-03-13
+ copyright : 2003 by CrossWire Bible Society
+ ***************************************************************************/
+
+/***************************************************************************
+ * *
+ * This program is free software; you can redistribute it and/or modify *
+ * it under the terms of the GNU General Public License as published by *
+ * the Free Software Foundation version 2 of the License.
+ * *
+ ***************************************************************************/
+
+#include <stdlib.h>
+#include <osisosis.h>
+#include <utilxml.h>
+#include <versekey.h>
+#include <swmodule.h>
+
+SWORD_NAMESPACE_START
+
+
+OSISOSIS::MyUserData::MyUserData(const SWModule *module, const SWKey *key) : BasicFilterUserData(module, key) {
+ osisQToTick = ((!module->getConfigEntry("OSISqToTick")) || (strcmp(module->getConfigEntry("OSISqToTick"), "false")));
+}
+
+
+OSISOSIS::OSISOSIS() {
+ setTokenStart("<");
+ setTokenEnd(">");
+
+ setEscapeStart("&");
+ setEscapeEnd(";");
+
+ setEscapeStringCaseSensitive(true);
+ setPassThruNumericEscapeString(true);
+
+ addAllowedEscapeString("quot");
+ addAllowedEscapeString("apos");
+ addAllowedEscapeString("amp");
+ addAllowedEscapeString("lt");
+ addAllowedEscapeString("gt");
+
+ setTokenCaseSensitive(true);
+}
+
+
+char OSISOSIS::processText(SWBuf &text, const SWKey *key, const SWModule *module) {
+ char status = SWBasicFilter::processText(text, key, module);
+ VerseKey *vkey = SWDYNAMIC_CAST(VerseKey, key);
+ if (vkey) {
+ SWBuf ref = "";
+ if (vkey->Verse()) {
+ ref.appendFormatted("\t\t<verse osisID=\"%s\">", vkey->getOSISRef());
+ }
+
+ if (ref.length() > 0) {
+
+ text = ref + text;
+
+ if (vkey->Verse()) {
+ VerseKey tmp;
+ tmp = *vkey;
+ tmp.AutoNormalize(0);
+ tmp.Headings(1);
+
+ text += "</verse>";
+
+ tmp = MAXVERSE;
+ if (*vkey == tmp) {
+ tmp.Verse(0);
+// sprintf(ref, "\t</div>");
+// pushString(&to, ref);
+ tmp = MAXCHAPTER;
+ tmp = MAXVERSE;
+ if (*vkey == tmp) {
+ tmp.Chapter(0);
+ tmp.Verse(0);
+// sprintf(ref, "\t</div>");
+// pushString(&to, ref);
+ }
+ }
+ }
+
+//
+// else if (vkey->Chapter()) {
+// sprintf(ref, "\t<div type=\"chapter\" osisID=\"%s\">", vkey->getOSISRef());
+// }
+// else sprintf(ref, "\t<div type=\"book\" osisID=\"%s\">", vkey->getOSISRef());
+//
+ }
+ }
+ return status;
+}
+
+bool OSISOSIS::handleToken(SWBuf &buf, const char *token, BasicFilterUserData *userData) {
+ // manually process if it wasn't a simple substitution
+ if (!substituteToken(buf, token)) {
+ MyUserData *u = (MyUserData *)userData;
+ XMLTag tag(token);
+
+ if (!tag.isEmpty() && (!tag.isEndTag()))
+ u->startTag = tag;
+
+ // <w> tag
+ if (!strcmp(tag.getName(), "w")) {
+
+ // start <w> tag
+ if ((!tag.isEmpty()) && (!tag.isEndTag())) {
+ SWBuf attr = tag.getAttribute("lemma");
+ if (attr.length()) {
+ if (!strncmp(attr.c_str(), "x-Strongs:", 10)) {
+ memcpy(attr.getRawData()+3, "strong", 6);
+ attr << 3;
+ tag.setAttribute("lemma", attr);
+ }
+ }
+ attr = tag.getAttribute("morph");
+ if (attr.length()) {
+ if (!strncmp(attr.c_str(), "x-StrongsMorph:", 15)) {
+ memcpy(attr.getRawData()+3, "strong", 6);
+ attr << 3;
+ tag.setAttribute("lemma", attr);
+ }
+ if (!strncmp(attr.c_str(), "x-Robinson:", 11)) {
+ attr[2] = 'r';
+ attr << 2;
+ tag.setAttribute("lemma", attr);
+ }
+ }
+ tag.setAttribute("wn", 0);
+ tag.setAttribute("savlm", 0);
+ tag.setAttribute("splitID", 0);
+ }
+ buf += tag;
+ }
+
+ // <note> tag
+ else if (!strcmp(tag.getName(), "note")) {
+ if (!tag.isEndTag()) {
+ SWBuf type = tag.getAttribute("type");
+ bool strongsMarkup = (type == "x-strongsMarkup" || type == "strongsMarkup"); // the latter is deprecated
+ if (strongsMarkup) {
+ tag.setEmpty(false); // handle bug in KJV2003 module where some note open tags were <note ... />
+ }
+
+ if (!tag.isEmpty()) {
+ tag.setAttribute("swordFootnote", 0);
+
+ if (!strongsMarkup) {
+ buf += tag;
+ }
+ else u->suspendTextPassThru = true;
+ }
+ }
+ if (tag.isEndTag()) {
+ if (u->suspendTextPassThru == false)
+ buf+=tag;
+ else u->suspendTextPassThru = false;
+ }
+ }
+
+ else {
+ return false; // we still didn't handle token
+ }
+ }
+ return true;
+}
+
+
+SWORD_NAMESPACE_END
diff --git a/src/modules/filters/osisplain.cpp b/src/modules/filters/osisplain.cpp
new file mode 100644
index 0000000..62841a6
--- /dev/null
+++ b/src/modules/filters/osisplain.cpp
@@ -0,0 +1,192 @@
+/***************************************************************************
+ osisplain.cpp - OSIS to Plaintext filter
+ -------------------
+ begin : 2003-02-15
+ copyright : 2003 by CrossWire Bible Society
+ ***************************************************************************/
+
+/***************************************************************************
+ * *
+ * This program is free software; you can redistribute it and/or modify *
+ * it under the terms of the GNU General Public License as published by *
+ * the Free Software Foundation; either version 2 of the License, or *
+ * (at your option) any later version. *
+ * *
+ ***************************************************************************/
+
+#include <stdlib.h>
+#include <osisplain.h>
+#include <ctype.h>
+#include <versekey.h>
+#include <stringmgr.h>
+
+SWORD_NAMESPACE_START
+
+OSISPlain::OSISPlain() {
+ setTokenStart("<");
+ setTokenEnd(">");
+
+ setEscapeStart("&");
+ setEscapeEnd(";");
+
+ setEscapeStringCaseSensitive(true);
+
+ addEscapeStringSubstitute("amp", "&");
+ addEscapeStringSubstitute("apos", "'");
+ addEscapeStringSubstitute("lt", "<");
+ addEscapeStringSubstitute("gt", ">");
+ addEscapeStringSubstitute("quot", "\"");
+
+ setTokenCaseSensitive(true);
+ addTokenSubstitute("title", "\n");
+ addTokenSubstitute("/title", "\n");
+ addTokenSubstitute("/l", "\n");
+ addTokenSubstitute("lg", "\n");
+ addTokenSubstitute("/lg", "\n");
+}
+
+
+bool OSISPlain::handleToken(SWBuf &buf, const char *token, BasicFilterUserData *userData) {
+ // manually process if it wasn't a simple substitution
+ if (!substituteToken(buf, token)) {
+ MyUserData *u = (MyUserData *)userData;
+ VerseKey *vk = SWDYNAMIC_CAST(VerseKey, u->key);
+ char testament = (vk) ? vk ->Testament() : 2; // default to NT
+ if (((*token == 'w') && (token[1] == ' ')) ||
+ ((*token == '/') && (token[1] == 'w') && (!token[2]))) {
+ u->tag = token;
+
+ bool start = false;
+ if (*token == 'w') {
+ if (token[strlen(token)-1] != '/') {
+ u->w = token;
+ return true;
+ }
+ start = true;
+ }
+ u->tag = (start) ? token : u->w.c_str();
+ bool show = true; // to handle unplaced article in kjv2003-- temporary till combined
+
+ SWBuf lastText = (start) ? "stuff" : u->lastTextNode.c_str();
+
+ const char *attrib;
+ const char *val;
+ if ((attrib = u->tag.getAttribute("xlit"))) {
+ val = strchr(attrib, ':');
+ val = (val) ? (val + 1) : attrib;
+ buf.append(" <");
+ buf.append(val);
+ buf.append('>');
+ }
+ if ((attrib = u->tag.getAttribute("gloss"))) {
+ val = strchr(attrib, ':');
+ val = (val) ? (val + 1) : attrib;
+ buf.append(" <");
+ buf.append(val);
+ buf.append('>');
+ }
+ if ((attrib = u->tag.getAttribute("lemma"))) {
+ int count = u->tag.getAttributePartCount("lemma", ' ');
+ int i = (count > 1) ? 0 : -1; // -1 for whole value cuz it's faster, but does the same thing as 0
+ do {
+ char gh;
+ attrib = u->tag.getAttribute("lemma", i, ' ');
+ if (i < 0) i = 0; // to handle our -1 condition
+ val = strchr(attrib, ':');
+ val = (val) ? (val + 1) : attrib;
+ if ((strchr("GH", *val)) && (isdigit(val[1]))) {
+ gh = *val;
+ val++;
+ }
+ else {
+ gh = (testament>1) ? 'G' : 'H';
+ }
+ if ((!strcmp(val, "3588")) && (lastText.length() < 1))
+ show = false;
+ else {
+ buf.append(" <");
+ buf.append(gh);
+ buf.append(val);
+ buf.append(">");
+ }
+ } while (++i < count);
+ }
+ if ((attrib = u->tag.getAttribute("morph")) && (show)) {
+ int count = u->tag.getAttributePartCount("morph", ' ');
+ int i = (count > 1) ? 0 : -1; // -1 for whole value cuz it's faster, but does the same thing as 0
+ do {
+ attrib = u->tag.getAttribute("morph", i, ' ');
+ if (i < 0) i = 0; // to handle our -1 condition
+ val = strchr(attrib, ':');
+ val = (val) ? (val + 1) : attrib;
+ if ((*val == 'T') && (strchr("GH", val[1])) && (isdigit(val[2])))
+ val+=2;
+ buf.append(" (");
+ buf.append(val);
+ buf.append(')');
+ } while (++i < count);
+ }
+ if ((attrib = u->tag.getAttribute("POS"))) {
+ val = strchr(attrib, ':');
+ val = (val) ? (val + 1) : attrib;
+
+ buf.append(" <");
+ buf.append(val);
+ buf.append('>');
+ }
+ }
+
+ // <note> tag
+ else if (!strncmp(token, "note", 4)) {
+ if (!strstr(token, "strongsMarkup")) { // leave strong's markup notes out, in the future we'll probably have different option filters to turn different note types on or off
+ buf.append(" (");
+ }
+ else u->suspendTextPassThru = true;
+ }
+ else if (!strncmp(token, "/note", 5)) {
+ if (!u->suspendTextPassThru)
+ buf.append(')');
+ else u->suspendTextPassThru = false;
+ }
+
+ // <p> paragraph tag
+ else if (((*token == 'p') && ((token[1] == ' ') || (!token[1]))) ||
+ ((*token == '/') && (token[1] == 'p') && (!token[2]))) {
+ userData->supressAdjacentWhitespace = true;
+ buf.append('\n');
+ }
+
+ // <lb .../>
+ else if (!strncmp(token, "lb", 2)) {
+ userData->supressAdjacentWhitespace = true;
+ buf.append('\n');
+ }
+ else if (!strncmp(token, "l", 1) && strstr(token, "eID")) {
+ userData->supressAdjacentWhitespace = true;
+ buf.append('\n');
+ }
+ else if (!strncmp(token, "/divineName", 11)) {
+ // Get the end portion of the string, and upper case it
+ char* end = buf.getRawData();
+ end += buf.size() - u->lastTextNode.size();
+ toupperstr(end);
+ }
+
+ // <milestone type="line"/>
+ else if (!strncmp(token, "milestone", 9)) {
+ const char* type = strstr(token+10, "type=\"");
+ if (type && strncmp(type+6, "line", 4)) { //we check for type != line
+ userData->supressAdjacentWhitespace = true;
+ buf.append('\n');
+ }
+ }
+
+ else {
+ return false; // we still didn't handle token
+ }
+ }
+ return true;
+}
+
+
+SWORD_NAMESPACE_END
diff --git a/src/modules/filters/osisredletterwords.cpp b/src/modules/filters/osisredletterwords.cpp
new file mode 100644
index 0000000..727332d
--- /dev/null
+++ b/src/modules/filters/osisredletterwords.cpp
@@ -0,0 +1,85 @@
+/******************************************************************************
+ *
+ * OSISRedLetterWords - SWFilter descendant to toggle red coloring for words
+ * of Christ in an OSIS module.
+ */
+
+
+#include <stdlib.h>
+#include <osisredletterwords.h>
+#include <swmodule.h>
+
+
+SWORD_NAMESPACE_START
+
+const char oName[] = "Words of Christ in Red";
+const char oTip[] = "Toggles Red Coloring for Words of Christ On and Off if they are marked";
+
+const SWBuf choices[3] = {"Off", "On", ""};
+const StringList oValues(&choices[0], &choices[2]);
+
+OSISRedLetterWords::OSISRedLetterWords() : SWOptionFilter(oName, oTip, &oValues) {
+ setOptionValue("On");
+}
+
+
+OSISRedLetterWords::~OSISRedLetterWords() {
+}
+
+
+char OSISRedLetterWords::processText(SWBuf &text, const SWKey *key, const SWModule *module) {
+ if (option) //leave in the red lettered words
+ return 0;
+
+ SWBuf token;
+ bool intoken = false;
+
+ SWBuf orig = text;
+ const char *from = orig.c_str();
+
+ //taken out of the loop
+ const char* start = 0;
+ const char* end = 0;
+
+ for (text = ""; *from; from++) {
+ if (*from == '<') {
+ intoken = true;
+ token = "";
+ continue;
+ }
+ else if (*from == '>') { // process tokens
+ intoken = false;
+
+ if ((token[0] == 'q') && (token[1] == ' ')) { //q tag
+ start = strstr(token.c_str(), " who=\"Jesus\"");
+ if (start && (strlen(start) >= 12)) { //we found a quote of Jesus Christ
+ end = start+12; //marks the end of the who attribute value
+
+ text.append('<');
+ text.append(token, start - (token.c_str())); //the text before the who attr
+ text.append(end, token.c_str() + token.length() - end); //text after the who attr
+ text.append('>');
+
+ continue;
+ }
+ }
+
+ //token not processed, append it. We don't want to alter the text
+ text.append('<');
+ text.append(token);
+ text.append('>');
+ continue;
+ }
+
+ if (intoken) { //copy token
+ token.append(*from);
+ }
+ else { //copy text which is not inside a token
+ text.append(*from);
+ }
+ }
+ return 0;
+}
+
+SWORD_NAMESPACE_END
+
diff --git a/src/modules/filters/osisrtf.cpp b/src/modules/filters/osisrtf.cpp
new file mode 100644
index 0000000..0352335
--- /dev/null
+++ b/src/modules/filters/osisrtf.cpp
@@ -0,0 +1,520 @@
+/***************************************************************************
+ osisrtf.cpp - OSIS to RTF filter
+ -------------------
+ begin : 2003-02-15
+ copyright : 2003 by CrossWire Bible Society
+ ***************************************************************************/
+
+/***************************************************************************
+ * *
+ * This program is free software; you can redistribute it and/or modify *
+ * it under the terms of the GNU General Public License as published by *
+ * the Free Software Foundation version 2 of the License. *
+ * *
+ ***************************************************************************/
+
+#include <stdlib.h>
+#include <ctype.h>
+#include <osisrtf.h>
+#include <utilxml.h>
+#include <utilstr.h>
+#include <versekey.h>
+#include <swmodule.h>
+#include <stringmgr.h>
+#include <stack>
+
+SWORD_NAMESPACE_START
+
+namespace {
+ class MyUserData : public BasicFilterUserData {
+ public:
+ bool osisQToTick;
+ bool BiblicalText;
+ bool inXRefNote;
+ int suspendLevel;
+ std::stack<char *> quoteStack;
+ SWBuf w;
+ SWBuf version;
+ MyUserData(const SWModule *module, const SWKey *key);
+ ~MyUserData();
+ };
+
+
+ MyUserData::MyUserData(const SWModule *module, const SWKey *key) : BasicFilterUserData(module, key) {
+ inXRefNote = false;
+ BiblicalText = false;
+ suspendLevel = 0;
+ if (module) {
+ version = module->Name();
+ BiblicalText = (!strcmp(module->Type(), "Biblical Texts"));
+ }
+ osisQToTick = ((!module->getConfigEntry("OSISqToTick")) || (strcmp(module->getConfigEntry("OSISqToTick"), "false")));
+ }
+
+
+ MyUserData::~MyUserData() {
+ // Just in case the quotes are not well formed
+ while (!quoteStack.empty()) {
+ char *tagData = quoteStack.top();
+ quoteStack.pop();
+ delete [] tagData;
+ }
+ }
+static inline void outText(const char *t, SWBuf &o, BasicFilterUserData *u) { if (!u->suspendTextPassThru) o += t; else u->lastSuspendSegment += t; }
+static inline void outText(char t, SWBuf &o, BasicFilterUserData *u) { if (!u->suspendTextPassThru) o += t; else u->lastSuspendSegment += t; }
+};
+
+
+OSISRTF::OSISRTF() {
+ setTokenStart("<");
+ setTokenEnd(">");
+
+ setEscapeStart("&");
+ setEscapeEnd(";");
+
+ setEscapeStringCaseSensitive(true);
+
+ addEscapeStringSubstitute("amp", "&");
+ addEscapeStringSubstitute("apos", "'");
+ addEscapeStringSubstitute("lt", "<");
+ addEscapeStringSubstitute("gt", ">");
+ addEscapeStringSubstitute("quot", "\"");
+ addTokenSubstitute("lg", "{\\par}");
+ addTokenSubstitute("/lg", "{\\par}");
+
+ setTokenCaseSensitive(true);
+}
+
+
+BasicFilterUserData *OSISRTF::createUserData(const SWModule *module, const SWKey *key) {
+ return new MyUserData(module, key);
+}
+
+
+char OSISRTF::processText(SWBuf &text, const SWKey *key, const SWModule *module) {
+
+ // preprocess text buffer to escape RTF control codes
+ const char *from;
+ SWBuf orig = text;
+ from = orig.c_str();
+ for (text = ""; *from; from++) { //loop to remove extra spaces
+ switch (*from) {
+ case '{':
+ case '}':
+ case '\\':
+ text += "\\";
+ text += *from;
+ break;
+ default:
+ text += *from;
+ }
+ }
+ text += (char)0;
+
+ SWBasicFilter::processText(text, key, module); //handle tokens as usual
+
+ orig = text;
+ from = orig.c_str();
+ for (text = ""; *from; from++) { //loop to remove extra spaces
+ if ((strchr(" \t\n\r", *from))) {
+ while (*(from+1) && (strchr(" \t\n\r", *(from+1)))) {
+ from++;
+ }
+ text += " ";
+ }
+ else {
+ text += *from;
+ }
+ }
+ text += (char)0; // probably not needed, but don't want to remove without investigating (same as above)
+ return 0;
+}
+
+
+bool OSISRTF::handleToken(SWBuf &buf, const char *token, BasicFilterUserData *userData) {
+ // manually process if it wasn't a simple substitution
+ MyUserData *u = (MyUserData *)userData;
+ SWBuf scratch;
+ bool sub = (u->suspendTextPassThru) ? substituteToken(scratch, token) : substituteToken(buf, token);
+ if (!sub) {
+ XMLTag tag(token);
+
+ // <w> tag
+ if (!strcmp(tag.getName(), "w")) {
+
+ // start <w> tag
+ if ((!tag.isEmpty()) && (!tag.isEndTag())) {
+ outText('{', buf, u);
+ u->w = token;
+ }
+
+ // end or empty <w> tag
+ else {
+ bool endTag = tag.isEndTag();
+ SWBuf lastText;
+ bool show = true; // to handle unplaced article in kjv2003-- temporary till combined
+
+ if (endTag) {
+ tag = u->w.c_str();
+ lastText = u->lastTextNode.c_str();
+ }
+ else lastText = "stuff";
+
+ const char *attrib;
+ const char *val;
+ if ((attrib = tag.getAttribute("xlit"))) {
+ val = strchr(attrib, ':');
+ val = (val) ? (val + 1) : attrib;
+ scratch.setFormatted(" {\\fs15 <%s>}", val);
+ outText(scratch.c_str(), buf, u);
+ }
+ if ((attrib = tag.getAttribute("gloss"))) {
+ val = strchr(attrib, ':');
+ val = (val) ? (val + 1) : attrib;
+ scratch.setFormatted(" {\\fs15 <%s>}", val);
+ outText(scratch.c_str(), buf, u);
+ }
+ if ((attrib = tag.getAttribute("lemma"))) {
+ int count = tag.getAttributePartCount("lemma", ' ');
+ int i = (count > 1) ? 0 : -1; // -1 for whole value cuz it's faster, but does the same thing as 0
+ do {
+ attrib = tag.getAttribute("lemma", i, ' ');
+ if (i < 0) i = 0; // to handle our -1 condition
+ val = strchr(attrib, ':');
+ val = (val) ? (val + 1) : attrib;
+ const char *val2 = val;
+ if ((strchr("GH", *val)) && (isdigit(val[1])))
+ val2++;
+ if ((!strcmp(val2, "3588")) && (lastText.length() < 1))
+ show = false;
+ else {
+ scratch.setFormatted(" {\\cf3 \\sub <%s>}", val2);
+ outText(scratch.c_str(), buf, u);
+ }
+ } while (++i < count);
+ }
+ if ((attrib = tag.getAttribute("morph")) && (show)) {
+ SWBuf savelemma = tag.getAttribute("savlm");
+ if ((strstr(savelemma.c_str(), "3588")) && (lastText.length() < 1))
+ show = false;
+ if (show) {
+ int count = tag.getAttributePartCount("morph", ' ');
+ int i = (count > 1) ? 0 : -1; // -1 for whole value cuz it's faster, but does the same thing as 0
+ do {
+ attrib = tag.getAttribute("morph", i, ' ');
+ if (i < 0) i = 0; // to handle our -1 condition
+ val = strchr(attrib, ':');
+ val = (val) ? (val + 1) : attrib;
+ const char *val2 = val;
+ if ((*val == 'T') && (strchr("GH", val[1])) && (isdigit(val[2])))
+ val2+=2;
+ scratch.setFormatted(" {\\cf4 \\sub (%s)}", val2);
+ outText(scratch.c_str(), buf, u);
+ } while (++i < count);
+ }
+ }
+ if ((attrib = tag.getAttribute("POS"))) {
+ val = strchr(attrib, ':');
+ val = (val) ? (val + 1) : attrib;
+ scratch.setFormatted(" {\\fs15 <%s>}", val);
+ outText(scratch.c_str(), buf, u);
+ }
+
+ if (endTag)
+ outText('}', buf, u);
+ }
+ }
+
+ // <note> tag
+ else if (!strcmp(tag.getName(), "note")) {
+ if (!tag.isEndTag()) {
+ if (!tag.isEmpty()) {
+ SWBuf type = tag.getAttribute("type");
+
+ if ( (type != "x-strongsMarkup") // leave strong's markup notes out, in the future we'll probably have different option filters to turn different note types on or off
+ && (type != "strongsMarkup") // deprecated
+ ) {
+ SWBuf footnoteNumber = tag.getAttribute("swordFootnote");
+ VerseKey *vkey = NULL;
+ // see if we have a VerseKey * or descendant
+ SWTRY {
+ vkey = SWDYNAMIC_CAST(VerseKey, u->key);
+ }
+ SWCATCH ( ... ) { }
+ if (vkey) {
+ char ch = ((!strcmp(type.c_str(), "crossReference")) || (!strcmp(type.c_str(), "x-cross-ref"))) ? 'x':'n';
+ scratch.setFormatted("{\\super <a href=\"\">*%c%i.%s</a>} ", ch, vkey->Verse(), footnoteNumber.c_str());
+ outText(scratch.c_str(), buf, u);
+ u->inXRefNote = (ch == 'x');
+ }
+ }
+ u->suspendTextPassThru = (++u->suspendLevel);
+ }
+ }
+ if (tag.isEndTag()) {
+ u->suspendTextPassThru = (--u->suspendLevel);
+ u->inXRefNote = false;
+ }
+ }
+
+ // <p> paragraph tag
+ else if (!strcmp(tag.getName(), "p")) {
+ if ((!tag.isEndTag()) && (!tag.isEmpty())) { // non-empty start tag
+ outText("{\\fi200\\par}", buf, u);
+ }
+ else if (tag.isEndTag()) { // end tag
+ outText("{\\par}", buf, u);
+ userData->supressAdjacentWhitespace = true;
+ }
+ else { // empty paragraph break marker
+ outText("{\\pard\\par\\par}", buf, u);
+ userData->supressAdjacentWhitespace = true;
+ }
+ }
+
+ // <reference> tag
+ else if (!strcmp(tag.getName(), "reference")) {
+ if (!u->inXRefNote) { // only show these if we're not in an xref note
+ if ((!tag.isEndTag()) && (!tag.isEmpty())) {
+ outText("{<a href=\"\">", buf, u);
+ }
+ else if (tag.isEndTag()) {
+ outText("</a>}", buf, u);
+ }
+ }
+ }
+
+ // <l> poetry
+ else if (!strcmp(tag.getName(), "l")) {
+ // end line marker
+ if (tag.getAttribute("eID")) {
+ outText("{\\par}", buf, u);
+ }
+ // <l/> without eID or sID
+ // Note: this is improper osis. This should be <lb/>
+ else if (tag.isEmpty() && !tag.getAttribute("sID")) {
+ outText("{\\par}", buf, u);
+ }
+ // end of the line
+ else if (tag.isEndTag()) {
+ outText("{\\par}", buf, u);
+ }
+ }
+
+ // <milestone type="line"/> or <lb.../>
+ else if ((!strcmp(tag.getName(), "lb")) || ((!strcmp(tag.getName(), "milestone")) && (tag.getAttribute("type")) && (!strcmp(tag.getAttribute("type"), "line")))) {
+ outText("{\\par}", buf, u);
+ userData->supressAdjacentWhitespace = true;
+ }
+
+ // <title>
+ else if (!strcmp(tag.getName(), "title")) {
+ if ((!tag.isEndTag()) && (!tag.isEmpty())) {
+ outText("{\\par\\i1\\b1 ", buf, u);
+ }
+ else if (tag.isEndTag()) {
+ outText("\\par}", buf, u);
+ }
+ }
+
+ // <catchWord> & <rdg> tags (italicize)
+ else if (!strcmp(tag.getName(), "rdg") || !strcmp(tag.getName(), "catchWord")) {
+ if ((!tag.isEndTag()) && (!tag.isEmpty())) {
+ outText("{\\i1 ", buf, u);
+ }
+ else if (tag.isEndTag()) {
+ outText('}', buf, u);
+ }
+ }
+
+ // <hi>
+ else if (!strcmp(tag.getName(), "hi")) {
+ SWBuf type = tag.getAttribute("type");
+ if ((!tag.isEndTag()) && (!tag.isEmpty())) {
+ if (type == "b" || type == "x-b")
+ outText("{\\b1 ", buf, u);
+ else // all other types
+ outText("{\\i1 ", buf, u);
+ }
+ else if (tag.isEndTag()) {
+ outText('}', buf, u);
+ }
+ }
+
+ // <q> quote
+ // Rules for a quote element:
+ // If the tag is empty with an sID or an eID then use whatever it specifies for quoting.
+ // Note: empty elements without sID or eID are ignored.
+ // If the tag is <q> then use it's specifications and push it onto a stack for </q>
+ // If the tag is </q> then use the pushed <q> for specification
+ // If there is a marker attribute, possibly empty, this overrides osisQToTick.
+ // If osisQToTick, then output the marker, using level to determine the type of mark.
+ else if (!strcmp(tag.getName(), "q")) {
+ SWBuf type = tag.getAttribute("type");
+ SWBuf who = tag.getAttribute("who");
+ const char *tmp = tag.getAttribute("level");
+ int level = (tmp) ? atoi(tmp) : 1;
+ tmp = tag.getAttribute("marker");
+ bool hasMark = tmp;
+ SWBuf mark = tmp;
+
+ // open <q> or <q sID... />
+ if ((!tag.isEmpty() && !tag.isEndTag()) || (tag.isEmpty() && tag.getAttribute("sID"))) {
+ // if <q> then remember it for the </q>
+ if (!tag.isEmpty()) {
+ char *tagData = 0;
+ stdstr(&tagData, tag.toString());
+ u->quoteStack.push(tagData);
+ }
+
+ // Do this first so quote marks are included as WoC
+ if (who == "Jesus")
+ outText("\\cf6 ", buf, u);
+
+ // first check to see if we've been given an explicit mark
+ if (hasMark)
+ outText(mark, buf, u);
+ //alternate " and '
+ else if (u->osisQToTick)
+ outText((level % 2) ? '\"' : '\'', buf, u);
+ }
+ // close </q> or <q eID... />
+ else if ((tag.isEndTag()) || (tag.getAttribute("eID"))) {
+ // if it is </q> then pop the stack for the attributes
+ if (tag.isEndTag() && !u->quoteStack.empty()) {
+ char *tagData = u->quoteStack.top();
+ u->quoteStack.pop();
+ XMLTag qTag(tagData);
+ delete [] tagData;
+
+ type = qTag.getAttribute("type");
+ who = qTag.getAttribute("who");
+ tmp = qTag.getAttribute("level");
+ level = (tmp) ? atoi(tmp) : 1;
+ tmp = qTag.getAttribute("marker");
+ hasMark = tmp;
+ mark = tmp;
+ }
+
+ // first check to see if we've been given an explicit mark
+ if (hasMark)
+ outText(mark, buf, u);
+ // finally, alternate " and ', if config says we should supply a mark
+ else if (u->osisQToTick)
+ outText((level % 2) ? '\"' : '\'', buf, u);
+
+ // Do this last so quote marks are included as WoC
+ if (who == "Jesus")
+ outText("\\cf0 ", buf, u);
+ }
+ }
+
+
+ // <milestone type="cQuote" marker="x"/>
+ else if (!strcmp(tag.getName(), "milestone") && tag.getAttribute("type") && !strcmp(tag.getAttribute("type"), "cQuote")) {
+ const char *tmp = tag.getAttribute("marker");
+ bool hasMark = tmp;
+ SWBuf mark = tmp;
+ tmp = tag.getAttribute("level");
+ int level = (tmp) ? atoi(tmp) : 1;
+
+ // first check to see if we've been given an explicit mark
+ if (hasMark)
+ outText(mark, buf, u);
+ // finally, alternate " and ', if config says we should supply a mark
+ else if (u->osisQToTick)
+ outText((level % 2) ? '\"' : '\'', buf, u);
+ }
+
+ // <transChange>
+ else if (!strcmp(tag.getName(), "transChange")) {
+ SWBuf type = tag.getAttribute("type");
+
+ if ((!tag.isEndTag()) && (!tag.isEmpty())) {
+
+// just do all transChange tags this way for now
+// if (type == "supplied")
+ outText("{\\i1 ", buf, u);
+ }
+ else if (tag.isEndTag()) {
+ outText('}', buf, u);
+ }
+ }
+
+ // <divineName>
+ else if (!strcmp(tag.getName(), "divineName")) {
+
+ if ((!tag.isEndTag()) && (!tag.isEmpty())) {
+ u->suspendTextPassThru = (++u->suspendLevel);
+ }
+ else if (tag.isEndTag()) {
+ SWBuf lastText = u->lastSuspendSegment.c_str();
+ u->suspendTextPassThru = (--u->suspendLevel);
+ if (lastText.size()) {
+ toupperstr(lastText);
+ scratch.setFormatted("{\\fs19%c\\fs16%s}", lastText[0], lastText.c_str()+1);
+ outText(scratch.c_str(), buf, u);
+ }
+ }
+ }
+
+ // <div>
+ else if (!strcmp(tag.getName(), "div")) {
+
+ if ((!tag.isEndTag()) && (!tag.isEmpty())) {
+ outText("\\par\\par\\pard ", buf, u);
+ }
+ else if (tag.isEndTag()) {
+ }
+ }
+
+ // image
+ else if (!strcmp(tag.getName(), "figure")) {
+ const char *src = tag.getAttribute("src");
+ if (!src) // assert we have a src attribute
+ return false;
+
+ char* filepath = new char[strlen(u->module->getConfigEntry("AbsoluteDataPath")) + strlen(token)];
+ *filepath = 0;
+ strcpy(filepath, userData->module->getConfigEntry("AbsoluteDataPath"));
+ strcat(filepath, src);
+
+// we do this because BibleCS looks for this EXACT format for an image tag
+ outText("<img src=\"", buf, u);
+ outText(filepath, buf, u);
+ outText("\" />", buf, u);
+/*
+ char imgc;
+ for (c = filepath + strlen(filepath); c > filepath && *c != '.'; c--);
+ c++;
+ FILE* imgfile;
+ if (stricmp(c, "jpg") || stricmp(c, "jpeg")) {
+ imgfile = fopen(filepath, "r");
+ if (imgfile != NULL) {
+ outText("{\\nonshppict {\\pict\\jpegblip ", buf, u);
+ while (feof(imgfile) != EOF) {
+ scratch.setFormatted("%2x", fgetc(imgfile));
+ outText(scratch.c_str(), buf, u);
+
+ }
+ fclose(imgfile);
+ outText("}}", buf, u);
+ }
+ }
+ else if (stricmp(c, "png")) {
+ outText("{\\*\\shppict {\\pict\\pngblip ", buf, u);
+
+ outText("}}", buf, u);
+ }
+*/
+ delete [] filepath;
+ }
+ else {
+ return false; // we still didn't handle token
+ }
+ }
+ return true;
+}
+
+
+SWORD_NAMESPACE_END
diff --git a/src/modules/filters/osisscripref.cpp b/src/modules/filters/osisscripref.cpp
new file mode 100644
index 0000000..437f5f5
--- /dev/null
+++ b/src/modules/filters/osisscripref.cpp
@@ -0,0 +1,100 @@
+/******************************************************************************
+ *
+ * OSISScripref - SWFilter descendant to hide or show scripture references
+ * in an OSIS module.
+ */
+
+
+#include <stdlib.h>
+#include <osisscripref.h>
+#include <swmodule.h>
+#include <utilxml.h>
+
+SWORD_NAMESPACE_START
+
+const char oName[] = "Cross-references";
+const char oTip[] = "Toggles Scripture Cross-references On and Off if they exist";
+
+const SWBuf choices[3] = {"Off", "On", ""};
+const StringList oValues(&choices[0], &choices[2]);
+
+OSISScripref::OSISScripref() : SWOptionFilter(oName, oTip, &oValues) {
+ setOptionValue("On");
+}
+
+
+OSISScripref::~OSISScripref() {
+}
+
+
+char OSISScripref::processText(SWBuf &text, const SWKey *key, const SWModule *module) {
+ SWBuf token;
+ bool intoken = false;
+ bool hide = false;
+ SWBuf tagText;
+ XMLTag startTag;
+
+ SWBuf orig = text;
+ const char *from = orig.c_str();
+
+ XMLTag tag;
+
+ for (text = ""; *from; ++from) {
+ if (*from == '<') {
+ intoken = true;
+ token = "";
+ continue;
+ }
+ if (*from == '>') { // process tokens
+ intoken = false;
+
+ tag = token;
+
+ if (!strncmp(token.c_str(), "note", 4) || !strncmp(token.c_str(), "/note", 5)) {
+ if (!tag.isEndTag() && !tag.isEmpty()) {
+ startTag = tag;
+ if ((tag.getAttribute("type")) && (!strcmp(tag.getAttribute("type"), "crossReference"))) {
+ hide = true;
+ tagText = "";
+ if (option) { // we want the tag in the text
+ text.append('<');
+ text.append(token);
+ text.append('>');
+ }
+ continue;
+ }
+ }
+ if (hide && tag.isEndTag()) {
+ hide = false;
+ if (option) { // we want the tag in the text
+ text.append(tagText); // end tag gets added further down
+ }
+ else continue; // don't let the end tag get added to the text
+ }
+ }
+
+ // if not a heading token, keep token in text
+ if (!hide) {
+ text.append('<');
+ text.append(token);
+ text.append('>');
+ }
+ else {
+ tagText.append('<');
+ tagText.append(token);
+ tagText.append('>');
+ }
+ continue;
+ }
+ if (intoken) { //copy token
+ token.append(*from);
+ }
+ else if (!hide) { //copy text which is not inside a token
+ text.append(*from);
+ }
+ else tagText.append(*from);
+ }
+ return 0;
+}
+
+SWORD_NAMESPACE_END
diff --git a/src/modules/filters/osisstrongs.cpp b/src/modules/filters/osisstrongs.cpp
new file mode 100644
index 0000000..922f7fd
--- /dev/null
+++ b/src/modules/filters/osisstrongs.cpp
@@ -0,0 +1,257 @@
+/******************************************************************************
+ *
+ * osisstrongs - SWFilter descendant to hide or show strongs number
+ * in a OSIS module.
+ */
+
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <ctype.h>
+#include <osisstrongs.h>
+#include <swmodule.h>
+#include <versekey.h>
+#include <utilxml.h>
+
+SWORD_NAMESPACE_START
+
+const char oName[] = "Strong's Numbers";
+const char oTip[] = "Toggles Strong's Numbers On and Off if they exist";
+
+const SWBuf choices[3] = {"Off", "On", ""};
+const StringList oValues(&choices[0], &choices[2]);
+
+
+OSISStrongs::OSISStrongs() : SWOptionFilter(oName, oTip, &oValues) {
+ setOptionValue("Off");
+}
+
+
+OSISStrongs::~OSISStrongs() {
+}
+
+
+char OSISStrongs::processText(SWBuf &text, const SWKey *key, const SWModule *module) {
+ SWBuf token;
+ bool intoken = false;
+ bool lastspace = false;
+ int wordNum = 1;
+ char wordstr[5];
+ const char *wordStart = 0;
+
+ const SWBuf orig = text;
+ const char * from = orig.c_str();
+
+ for (text = ""; *from; ++from) {
+ if (*from == '<') {
+ intoken = true;
+ token = "";
+ continue;
+ }
+ if (*from == '>') { // process tokens
+ intoken = false;
+ if (token.startsWith("w ")) { // Word
+ XMLTag wtag(token);
+ if (module->isProcessEntryAttributes()) {
+ wordStart = from+1;
+ char gh = 0;
+ VerseKey *vkey = 0;
+ if (key) {
+ vkey = SWDYNAMIC_CAST(VerseKey, key);
+ }
+ SWBuf lemma = "";
+ SWBuf morph = "";
+ SWBuf src = "";
+ SWBuf morphClass = "";
+ SWBuf lemmaClass = "";
+
+ const char *attrib;
+ sprintf(wordstr, "%03d", wordNum);
+
+ // why is morph entry attribute processing done in here? Well, it's faster. It makes more local sense to place this code in osismorph.
+ // easier to keep lemma and morph in same wordstr number too maybe.
+ if ((attrib = wtag.getAttribute("morph"))) {
+ int count = wtag.getAttributePartCount("morph", ' ');
+ int i = (count > 1) ? 0 : -1; // -1 for whole value cuz it's faster, but does the same thing as 0
+ do {
+ SWBuf mClass = "";
+ SWBuf mp = "";
+ attrib = wtag.getAttribute("morph", i, ' ');
+ if (i < 0) i = 0; // to handle our -1 condition
+
+ const char *m = strchr(attrib, ':');
+ if (m) {
+ int len = m-attrib;
+ mClass.append(attrib, len);
+ attrib += (len+1);
+ }
+ if ((mClass == "x-Robinsons") || (mClass == "x-Robinson") || (mClass == "Robinson")) {
+ mClass = "robinson";
+ }
+ if (i) { morphClass += " "; morph += " "; }
+ mp += attrib;
+ morphClass += mClass;
+ morph += mp;
+ if (count > 1) {
+ SWBuf tmp;
+ tmp.setFormatted("Morph.%d", i+1);
+ module->getEntryAttributes()["Word"][wordstr][tmp] = mp;
+ tmp.setFormatted("MorphClass.%d", i+1);
+ module->getEntryAttributes()["Word"][wordstr][tmp] = mClass;
+ }
+ } while (++i < count);
+ }
+
+ if ((attrib = wtag.getAttribute("lemma"))) {
+ int count = wtag.getAttributePartCount("lemma", ' ');
+ int i = (count > 1) ? 0 : -1; // -1 for whole value cuz it's faster, but does the same thing as 0
+ do {
+ gh = 0;
+ SWBuf lClass = "";
+ SWBuf l = "";
+ attrib = wtag.getAttribute("lemma", i, ' ');
+ if (i < 0) i = 0; // to handle our -1 condition
+
+ const char *m = strchr(attrib, ':');
+ if (m) {
+ int len = m-attrib;
+ lClass.append(attrib, len);
+ attrib += (len+1);
+ }
+ if ((lClass == "x-Strongs") || (lClass == "strong") || (lClass == "Strong")) {
+ if (isdigit(attrib[0])) {
+ if (vkey) {
+ gh = vkey->Testament() ? 'H' : 'G';
+ }
+ }
+ else {
+ gh = *attrib;
+ attrib++;
+ }
+ lClass = "strong";
+ }
+ if (gh) l += gh;
+ l += attrib;
+ if (i) { lemmaClass += " "; lemma += " "; }
+ lemma += l;
+ lemmaClass += lClass;
+ if (count > 1) {
+ SWBuf tmp;
+ tmp.setFormatted("Lemma.%d", i+1);
+ module->getEntryAttributes()["Word"][wordstr][tmp] = l;
+ tmp.setFormatted("LemmaClass.%d", i+1);
+ module->getEntryAttributes()["Word"][wordstr][tmp] = lClass;
+ }
+ } while (++i < count);
+ module->getEntryAttributes()["Word"][wordstr]["PartCount"].setFormatted("%d", count);
+ }
+
+ if ((attrib = wtag.getAttribute("src"))) {
+ int count = wtag.getAttributePartCount("src", ' ');
+ int i = (count > 1) ? 0 : -1; // -1 for whole value cuz it's faster, but does the same thing as 0
+ do {
+ SWBuf mp = "";
+ attrib = wtag.getAttribute("src", i, ' ');
+ if (i < 0) i = 0; // to handle our -1 condition
+
+ if (i) src += " ";
+ mp += attrib;
+ src += mp;
+ if (count > 1) {
+ SWBuf tmp;
+ tmp.setFormatted("Src.%d", i+1);
+ module->getEntryAttributes()["Word"][wordstr][tmp] = mp;
+ }
+ } while (++i < count);
+ }
+
+
+ if (lemma.length())
+ module->getEntryAttributes()["Word"][wordstr]["Lemma"] = lemma;
+ if (lemmaClass.length())
+ module->getEntryAttributes()["Word"][wordstr]["LemmaClass"] = lemmaClass;
+ if (morph.length())
+ module->getEntryAttributes()["Word"][wordstr]["Morph"] = morph;
+ if (morphClass.length())
+ module->getEntryAttributes()["Word"][wordstr]["MorphClass"] = morphClass;
+ if (src.length())
+ module->getEntryAttributes()["Word"][wordstr]["Src"] = src;
+
+ if (wtag.isEmpty()) {
+ int j;
+ for (j = token.length()-1; ((j>0) && (strchr(" /", token[j]))); j--);
+ token.size(j+1);
+ }
+
+ token += " wn=\"";
+ token += wordstr;
+ token += "\"";
+
+ if (wtag.isEmpty()) {
+ token += "/";
+ }
+
+ wordNum++;
+ }
+
+ if (!option) {
+/*
+ * Code which handles multiple lemma types. Kindof works but breaks at least WEBIF filters for strongs.
+ *
+ int count = wtag.getAttributePartCount("lemma", ' ');
+ for (int i = 0; i < count; i++) {
+ SWBuf a = wtag.getAttribute("lemma", i, ' ');
+ const char *prefix = a.stripPrefix(':');
+ if ((prefix) && (!strcmp(prefix, "x-Strongs") || !strcmp(prefix, "strong") || !strcmp(prefix, "Strong"))) {
+ // remove attribute part
+ wtag.setAttribute("lemma", 0, i, ' ');
+ i--;
+ count--;
+ }
+ }
+* Instead the codee below just removes the lemma attribute
+*****/
+ const char *l = wtag.getAttribute("lemma");
+ if (l) {
+ SWBuf savlm = l;
+ wtag.setAttribute("lemma", 0);
+ wtag.setAttribute("savlm", savlm);
+ token = wtag;
+ token.trim();
+ // drop <>
+ token << 1;
+ token--;
+ }
+ }
+ }
+ if (token.startsWith("/w")) { // Word End
+ if (module->isProcessEntryAttributes()) {
+ if (wordStart) {
+ SWBuf tmp;
+ tmp.append(wordStart, (from-wordStart)-3);
+ sprintf(wordstr, "%03d", wordNum-1);
+ module->getEntryAttributes()["Word"][wordstr]["Text"] = tmp;
+ }
+ }
+ wordStart = 0;
+ }
+
+ // keep token in text
+ text.append('<');
+ text.append(token);
+ text.append('>');
+
+ continue;
+ }
+ if (intoken) {
+ token += *from;
+ }
+ else {
+ text.append(*from);
+ lastspace = (*from == ' ');
+ }
+ }
+ return 0;
+}
+
+SWORD_NAMESPACE_END
diff --git a/src/modules/filters/osisvariants.cpp b/src/modules/filters/osisvariants.cpp
new file mode 100644
index 0000000..91d700c
--- /dev/null
+++ b/src/modules/filters/osisvariants.cpp
@@ -0,0 +1,118 @@
+/******************************************************************************
+ *
+ * osisvariants - SWFilter descendant to hide or show textual variants
+ * in an OSIS module.
+ */
+
+
+#include <stdlib.h>
+#include <osisvariants.h>
+#include <utilstr.h>
+
+SWORD_NAMESPACE_START
+
+const char OSISVariants::primary[] = "Primary Reading";
+const char OSISVariants::secondary[] = "Secondary Reading";
+const char OSISVariants::all[] = "All Readings";
+
+const char OSISVariants::optName[] = "Textual Variants";
+const char OSISVariants::optTip[] = "Switch between Textual Variants modes";
+
+
+OSISVariants::OSISVariants() {
+ option = false;
+ options.push_back(primary);
+ options.push_back(secondary);
+ options.push_back(all);
+}
+
+
+OSISVariants::~OSISVariants() {
+}
+
+void OSISVariants::setOptionValue(const char *ival)
+{
+ if (!stricmp(ival, primary)) option = 0;
+ else if (!stricmp(ival, secondary)) option = 1;
+ else option = 2;
+}
+
+const char *OSISVariants::getOptionValue()
+{
+ if (option == 0) {
+ return primary;
+ }
+ else if (option == 1) {
+ return secondary;
+ }
+ else {
+ return all;
+ }
+}
+
+char OSISVariants::processText(SWBuf &text, const SWKey *key, const SWModule *module)
+{
+ if (option == 0 || option == 1) { //we want primary or variant only
+ bool intoken = false;
+ bool hide = false;
+ bool invar = false;
+
+ SWBuf token;
+ SWBuf orig = text;
+ const char *from = orig.c_str();
+
+ //we use a fixed comparision string to make sure the loop is as fast as the original two blocks with almost the same code
+ //const char* variantCompareString = (option == 0) ? "div type=\"variant\" class=\"1\"" : "div type=\"variant\" class=\"2\"";
+
+ for (text = ""; *from; from++) {
+ if (*from == '<') {
+ intoken = true;
+ token = "";
+ continue;
+ }
+ else if (*from == '>') { // process tokens
+ intoken = false;
+
+ if (!strncmp(token.c_str(), "seg ", 4)) { //only one of the variants
+ invar = true;
+ hide = true;
+ continue;
+ }
+ if (!strncmp(token.c_str(), "div type=\"variant\"", 18)) {
+ invar = true;
+ continue;
+ }
+ if (!strncmp(token.c_str(), "/div", 4)) {
+ hide = false;
+ if (invar) {
+ invar = false;
+ continue;
+ }
+ }
+ if (!hide) {
+ text += '<';
+ text.append(token);
+ text += '>';
+ }
+
+ continue;
+ }
+ if (intoken) {
+ token += *from;
+ }
+ else if (!hide) {
+ text += *from;
+ }
+ }
+
+ }
+
+ return 0;
+}
+
+
+
+
+
+
+SWORD_NAMESPACE_END
diff --git a/src/modules/filters/osiswebif.cpp b/src/modules/filters/osiswebif.cpp
new file mode 100644
index 0000000..ecc58f7
--- /dev/null
+++ b/src/modules/filters/osiswebif.cpp
@@ -0,0 +1,198 @@
+/***************************************************************************
+ OSISWEBIF.cpp - OSIS to HTML filter with hrefs
+ for strongs and morph tags
+ -------------------
+ begin : 2003-10-23
+ copyright : 2003 by CrossWire Bible Society
+ ***************************************************************************/
+
+/***************************************************************************
+ * *
+ * This program is free software; you can redistribute it and/or modify *
+ * it under the terms of the GNU General Public License as published by *
+ * the Free Software Foundation; either version 2 of the License, or *
+ * (at your option) any later version. *
+ * *
+ ***************************************************************************/
+
+#include <stdlib.h>
+#include <osiswebif.h>
+#include <utilxml.h>
+#include <url.h>
+#include <versekey.h>
+#include <swmodule.h>
+#include <ctype.h>
+
+
+SWORD_NAMESPACE_START
+
+
+OSISWEBIF::OSISWEBIF() : baseURL(""), passageStudyURL(baseURL + "passagestudy.jsp"), javascript(false) {
+}
+
+
+BasicFilterUserData *OSISWEBIF::createUserData(const SWModule *module, const SWKey *key) {
+ MyUserData *u = new MyUserData(module, key);
+ u->wordsOfChristStart = "<span class=\"wordsOfJesus\"> ";
+ u->wordsOfChristEnd = "</span> ";
+ return u;
+}
+
+
+bool OSISWEBIF::handleToken(SWBuf &buf, const char *token, BasicFilterUserData *userData) {
+ MyUserData *u = (MyUserData *)userData;
+ SWBuf scratch;
+ bool sub = (u->suspendTextPassThru) ? substituteToken(scratch, token) : substituteToken(buf, token);
+ if (!sub) {
+ // manually process if it wasn't a simple substitution
+ XMLTag tag(token);
+
+ // <w> tag
+ if (!strcmp(tag.getName(), "w")) {
+
+ // start <w> tag
+ if ((!tag.isEmpty()) && (!tag.isEndTag())) {
+ u->w = token;
+ }
+
+ // end or empty <w> tag
+ else {
+ bool endTag = tag.isEndTag();
+ SWBuf lastText;
+ bool show = true; // to handle unplaced article in kjv2003-- temporary till combined
+
+ if (endTag) {
+ tag = u->w.c_str();
+ lastText = u->lastTextNode.c_str();
+ }
+ else lastText = "stuff";
+
+ const char *attrib;
+ const char *val;
+ if ((attrib = tag.getAttribute("xlit"))) {
+ val = strchr(attrib, ':');
+ val = (val) ? (val + 1) : attrib;
+// buf.appendFormatted(" %s", val);
+ }
+ if ((attrib = tag.getAttribute("gloss"))) {
+ val = strchr(attrib, ':');
+ val = (val) ? (val + 1) : attrib;
+ buf.appendFormatted(" %s", val);
+ }
+ if ((attrib = tag.getAttribute("lemma"))) {
+ int count = tag.getAttributePartCount("lemma", ' ');
+ int i = (count > 1) ? 0 : -1; // -1 for whole value cuz it's faster, but does the same thing as 0
+ do {
+ attrib = tag.getAttribute("lemma", i, ' ');
+ if (i < 0) i = 0; // to handle our -1 condition
+ val = strchr(attrib, ':');
+ val = (val) ? (val + 1) : attrib;
+ const char *val2 = val;
+ if ((strchr("GH", *val)) && (isdigit(val[1])))
+ val2++;
+ if ((!strcmp(val2, "3588")) && (lastText.length() < 1))
+ show = false;
+ else buf.appendFormatted(" <small><em>&lt;<a href=\"%s?showStrong=%s#cv\">%s</a>&gt;</em></small> ", passageStudyURL.c_str(), URL::encode(val2).c_str(), val2);
+ } while (++i < count);
+ }
+ if ((attrib = tag.getAttribute("morph")) && (show)) {
+ SWBuf savelemma = tag.getAttribute("savlm");
+ if ((strstr(savelemma.c_str(), "3588")) && (lastText.length() < 1))
+ show = false;
+ if (show) {
+ int count = tag.getAttributePartCount("morph", ' ');
+ int i = (count > 1) ? 0 : -1; // -1 for whole value cuz it's faster, but does the same thing as 0
+ do {
+ attrib = tag.getAttribute("morph", i, ' ');
+ if (i < 0) i = 0; // to handle our -1 condition
+ val = strchr(attrib, ':');
+ val = (val) ? (val + 1) : attrib;
+ const char *val2 = val;
+ if ((*val == 'T') && (strchr("GH", val[1])) && (isdigit(val[2])))
+ val2+=2;
+ buf.appendFormatted(" <small><em>(<a href=\"%s?showMorph=%s#cv\">%s</a>)</em></small> ", passageStudyURL.c_str(), URL::encode(val2).c_str(), val2);
+ } while (++i < count);
+ }
+ }
+ if ((attrib = tag.getAttribute("POS"))) {
+ val = strchr(attrib, ':');
+ val = (val) ? (val + 1) : attrib;
+ buf.appendFormatted(" %s", val);
+ }
+
+ /*if (endTag)
+ buf += "}";*/
+ }
+ }
+
+ // <note> tag
+ else if (!strcmp(tag.getName(), "note")) {
+ if (!tag.isEndTag()) {
+ SWBuf type = tag.getAttribute("type");
+ bool strongsMarkup = (type == "x-strongsMarkup" || type == "strongsMarkup"); // the latter is deprecated
+ if (strongsMarkup) {
+ tag.setEmpty(false); // handle bug in KJV2003 module where some note open tags were <note ... />
+ }
+
+ if (!tag.isEmpty()) {
+ if (!strongsMarkup) { // leave strong's markup notes out, in the future we'll probably have different option filters to turn different note types on or off
+ SWBuf footnoteNumber = tag.getAttribute("swordFootnote");
+ SWBuf modName = (u->module) ? u->module->Name() : "";
+ VerseKey *vkey = NULL;
+ // see if we have a VerseKey * or descendant
+ SWTRY {
+ vkey = SWDYNAMIC_CAST(VerseKey, u->key);
+ }
+ SWCATCH ( ... ) { }
+ if (vkey) {
+ char ch = ((tag.getAttribute("type") && ((!strcmp(tag.getAttribute("type"), "crossReference")) || (!strcmp(tag.getAttribute("type"), "x-cross-ref")))) ? 'x':'n');
+// buf.appendFormatted("<a href=\"noteID=%s.%c.%s\"><small><sup>*%c</sup></small></a> ", vkey->getText(), ch, footnoteNumber.c_str(), ch);
+ buf.appendFormatted("<span class=\"fn\" onclick=\"f(\'%s\',\'%s\',\'%s\');\" >%c</span>", modName.c_str(), u->key->getText(), footnoteNumber.c_str(), ch);
+ }
+ }
+ u->suspendTextPassThru = (++u->suspendLevel);
+ }
+ }
+ if (tag.isEndTag()) {
+ u->suspendTextPassThru = (--u->suspendLevel);
+
+ }
+ }
+
+ // <title>
+ else if (!strcmp(tag.getName(), "title")) {
+ if ((!tag.isEndTag()) && (!tag.isEmpty())) {
+ buf += "<h3>";
+ }
+ else if (tag.isEndTag()) {
+ buf += "</h3>";
+ }
+ }
+
+ // ok to leave these in
+ else if (!strcmp(tag.getName(), "div")) {
+ buf += tag;
+ }
+ else if (!strcmp(tag.getName(), "span")) {
+ buf += tag;
+ }
+ else if (!strcmp(tag.getName(), "br")) {
+ buf += tag;
+ }
+
+ // handled appropriately in base class
+ // <catchWord> & <rdg> tags (italicize)
+ // <hi> text highlighting
+ // <q> quote
+ // <milestone type="cQuote" marker="x"/>
+ // <transChange>
+ else {
+ return OSISHTMLHREF::handleToken(buf, token, userData);
+ }
+ }
+ return true;
+}
+
+
+SWORD_NAMESPACE_END
+
diff --git a/src/modules/filters/osiswordjs.cpp b/src/modules/filters/osiswordjs.cpp
new file mode 100644
index 0000000..dc805b4
--- /dev/null
+++ b/src/modules/filters/osiswordjs.cpp
@@ -0,0 +1,178 @@
+/******************************************************************************
+ *
+ * osisstrongs - SWFilter descendant to hide or show strongs number
+ * in a OSIS module.
+ */
+
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <osiswordjs.h>
+#include <swmodule.h>
+#include <ctype.h>
+#include <utilxml.h>
+#include <utilstr.h>
+#include <versekey.h>
+#include <stdio.h>
+
+SWORD_NAMESPACE_START
+
+const char oName[] = "Word Javascript";
+const char oTip[] = "Toggles Word Javascript data";
+
+const SWBuf choices[3] = {"Off", "On", ""};
+const StringList oValues(&choices[0], &choices[2]);
+
+
+OSISWordJS::OSISWordJS() : SWOptionFilter(oName, oTip, &oValues) {
+ setOptionValue("Off");
+
+ defaultGreekLex = 0;
+ defaultHebLex = 0;
+ defaultGreekParse = 0;
+ defaultHebParse = 0;
+ mgr = 0;
+}
+
+
+OSISWordJS::~OSISWordJS() {
+}
+
+
+char OSISWordJS::processText(SWBuf &text, const SWKey *key, const SWModule *module) {
+ if (option) {
+ char token[2112]; // cheese. Fix.
+ int tokpos = 0;
+ bool intoken = false;
+ bool lastspace = false;
+ int wordNum = 1;
+ char wordstr[5];
+ SWBuf modName = (module)?module->Name():"";
+ // add TR to w src in KJV then remove this next line
+ SWBuf wordSrcPrefix = (modName == "KJV")?SWBuf("TR"):modName;
+
+ VerseKey *vkey = 0;
+ if (key) {
+ vkey = SWDYNAMIC_CAST(VerseKey, key);
+ }
+
+ const SWBuf orig = text;
+ const char * from = orig.c_str();
+
+ for (text = ""; *from; ++from) {
+ if (*from == '<') {
+ intoken = true;
+ tokpos = 0;
+ token[0] = 0;
+ token[1] = 0;
+ token[2] = 0;
+ continue;
+ }
+ if (*from == '>') { // process tokens
+ intoken = false;
+ if ((*token == 'w') && (token[1] == ' ')) { // Word
+ XMLTag wtag(token);
+ sprintf(wordstr, "%03d", wordNum);
+ SWBuf lemmaClass;
+ SWBuf lemma;
+ SWBuf morph;
+ SWBuf src;
+ char gh = 0;
+ int count = atoi(module->getEntryAttributes()["Word"][wordstr]["PartCount"].c_str());
+ for (int i = 0; i < count; i++) {
+
+ // for now, lemma class can just be equal to last lemma class in multi part word
+ SWBuf tmp = "LemmaClass";
+ if (count > 1) tmp.appendFormatted(".%d", i+1);
+ lemmaClass = module->getEntryAttributes()["Word"][wordstr][tmp];
+
+ tmp = "Lemma";
+ if (count > 1) tmp.appendFormatted(".%d", i+1);
+ tmp = (module->getEntryAttributes()["Word"][wordstr][tmp].c_str());
+
+ // if we're strongs,
+ if (lemmaClass == "strong") {
+ gh = tmp[0];
+ tmp << 1;
+ }
+ if (lemma.size()) lemma += "|";
+ lemma += tmp;
+
+ tmp = "Morph";
+ if (count > 1) tmp.appendFormatted(".%d", i+1);
+ tmp = (module->getEntryAttributes()["Word"][wordstr][tmp].c_str());
+ if (morph.size()) morph += "|";
+ morph += tmp;
+
+ tmp = "Src";
+ if (count > 1) tmp.appendFormatted(".%d", i+1);
+ tmp = (module->getEntryAttributes()["Word"][wordstr][tmp].c_str());
+ if (!tmp.length()) tmp.appendFormatted("%d", wordNum);
+ tmp.insert(0, wordSrcPrefix);
+ if (src.size()) src += "|";
+ src += tmp;
+ }
+
+ SWBuf lexName = "";
+ // we can pass the real lex name in, but we have some
+ // aliases in the javascript to optimize bandwidth
+ if ((gh == 'G') && (defaultGreekLex)) {
+ lexName = (!strcmp(defaultGreekLex->Name(), "StrongsGreek"))?"G":defaultGreekLex->Name();
+ }
+ else if ((gh == 'H') && (defaultHebLex)) {
+ lexName = (!strcmp(defaultHebLex->Name(), "StrongsHebrew"))?"H":defaultHebLex->Name();
+ }
+
+ SWBuf xlit = wtag.getAttribute("xlit");
+
+ if ((lemmaClass != "strong") && (xlit.startsWith("betacode:"))) {
+ lexName = "betacode";
+// const char *m = strchr(xlit.c_str(), ':');
+// strong = ++m;
+ }
+ SWBuf wordID;
+ if (vkey) {
+ // optimize for bandwidth and use only the verse as the unique entry id
+ wordID.appendFormatted("%d", vkey->Verse());
+ }
+ else {
+ wordID = key->getText();
+ }
+ wordID.appendFormatted("_%s", src.c_str());
+ // clean up our word ID for XHTML
+ for (unsigned int i = 0; i < wordID.size(); i++) {
+ if ((!isdigit(wordID[i])) && (!isalpha(wordID[i]))) {
+ wordID[i] = '_';
+ }
+ }
+ // 'p' = 'fillpop' to save bandwidth
+ text.appendFormatted("<span class=\"clk\" onclick=\"p('%s','%s','%s','%s','','%s');\" >", lexName.c_str(), lemma.c_str(), wordID.c_str(), morph.c_str(), modName.c_str());
+ wordNum++;
+ }
+ if ((*token == '/') && (token[1] == 'w') && option) { // Word
+ text += "</w></span>";
+ continue;
+ }
+
+ // if not a strongs token, keep token in text
+ text.append('<');
+ text.append(token);
+ text.append('>');
+
+ continue;
+ }
+ if (intoken) {
+ if (tokpos < 2045)
+ token[tokpos++] = *from;
+ token[tokpos+2] = 0;
+ }
+ else {
+ text.append(*from);
+ lastspace = (*from == ' ');
+ }
+ }
+ }
+ return 0;
+}
+
+SWORD_NAMESPACE_END
diff --git a/src/modules/filters/papyriplain.cpp b/src/modules/filters/papyriplain.cpp
new file mode 100644
index 0000000..423bfda
--- /dev/null
+++ b/src/modules/filters/papyriplain.cpp
@@ -0,0 +1,71 @@
+/******************************************************************************
+ *
+ * papyriplain - SWFilter descendant to strip out all Papyri tags
+ */
+
+
+#include <stdlib.h>
+#include <papyriplain.h>
+
+SWORD_NAMESPACE_START
+
+PapyriPlain::PapyriPlain() {
+}
+
+
+char PapyriPlain::processText (SWBuf &text, const SWKey *key, const SWModule *module)
+{
+ SWBuf orig = text;
+ const char *from = orig.c_str();
+
+ for (text = ""; *from; ++from) {
+
+ // remove hyphen and whitespace if that is all that separates words
+ // also be sure we're not a double hyphen '--'
+ if ((*from == '-') && (text.length() > 0) && (text[text.length()-1] != '-')) {
+ char remove = 0;
+ const char *c;
+ for (c = from+1; *c; c++) {
+ if ((*c == 10) || (*c == 13)) {
+ remove = 1;
+ }
+ if (!strchr(" \t\n", *c)) {
+ if (remove) remove++;
+ break;
+ }
+ }
+ if (remove > 1) {
+ from = c-1;
+ continue;
+ }
+ }
+
+ // remove all newlines
+ if ((*from == 10) || (*from == 13)) {
+ if ((text.length()>1) && (text[text.length()-2] != ' ') && (*(from+1) != ' '))
+ text.append(' ');
+ continue;
+ }
+
+
+ // strip odd characters
+ switch (*from) {
+ case '(':
+ case ')':
+ case '[':
+ case ']':
+ case '{':
+ case '}':
+ case '<':
+ case '>':
+ continue;
+ }
+
+ // if we've made it this far
+ text.append(*from);
+
+ }
+ return 0;
+}
+
+SWORD_NAMESPACE_END
diff --git a/src/modules/filters/plainfootnotes.cpp b/src/modules/filters/plainfootnotes.cpp
new file mode 100644
index 0000000..0baf313
--- /dev/null
+++ b/src/modules/filters/plainfootnotes.cpp
@@ -0,0 +1,79 @@
+/***************************************************************************
+ plainfootnotes.cpp - description
+ -------------------
+ begin : Wed Oct 13 1999
+ copyright : (C) 1999 by The team of BibleTime
+ email : info@bibletime.de
+ ***************************************************************************/
+
+/***************************************************************************
+ * *
+ * This program is free software; you can redistribute it and/or modify *
+ * it under the terms of the GNU General Public License as published by *
+ * the Free Software Foundation; either version 2 of the License, or *
+ * (at your option) any later version. *
+ * *
+ ***************************************************************************/
+
+#include <plainfootnotes.h>
+#include <swkey.h>
+
+#include <stdlib.h>
+
+SWORD_NAMESPACE_START
+
+const char oName[] = "Footnotes";
+const char oTip[] = "Toggles Footnotes On and Off In Bible Texts If They Exist";
+
+const SWBuf choices[3] = {"Off", "On", ""};
+const StringList oValues(&choices[0], &choices[2]);
+
+PLAINFootnotes::PLAINFootnotes() : SWOptionFilter(oName, oTip, &oValues) {
+ setOptionValue("Off");
+}
+
+PLAINFootnotes::~PLAINFootnotes(){
+}
+
+
+char PLAINFootnotes::processText(SWBuf &text, const SWKey *key, const SWModule *module) {
+ if (!option) { // if we don't want footnotes
+ //char token[2048];
+ //SWBuf token;
+ //int tokpos = 0;
+ //bool intoken = false;
+ //bool lastspace = false;
+
+ bool hide = false;
+
+ SWBuf orig = text;
+ const char *from = orig.c_str();
+ for (text = ""; *from; from++) {
+ if (*from == '{') // Footnote start
+ {
+ hide = true;
+ continue;
+ }
+ else if (*from == '}') // Footnote end
+ {
+ hide = false;
+ continue;
+ }
+
+ //if (intoken) {
+ //if (tokpos < 2045)
+ // token += *from;
+ // token[tokpos+2] = 0;
+ //}
+ //else {
+ if (!hide) {
+ text = *from;
+ //lastspace = (*from == ' ');
+ }
+ //}
+ }
+ }
+ return 0;
+}
+
+SWORD_NAMESPACE_END
diff --git a/src/modules/filters/plainhtml.cpp b/src/modules/filters/plainhtml.cpp
new file mode 100644
index 0000000..f5f2a5c
--- /dev/null
+++ b/src/modules/filters/plainhtml.cpp
@@ -0,0 +1,83 @@
+/***************************************************************************
+ plainhtml.cpp - description
+ -------------------
+ begin : Thu Jun 24 1999
+ copyright : (C) 1999 by Torsten Uhlmann
+ email : TUhlmann@gmx.de
+ ***************************************************************************/
+
+/***************************************************************************
+ * *
+ * This program is free software; you can redistribute it and/or modify *
+ * it under the terms of the GNU General Public License as published by *
+ * the Free Software Foundation; either version 2 of the License, or *
+ * (at your option) any later version. *
+ * *
+ ***************************************************************************/
+
+#include <plainhtml.h>
+#include <swbuf.h>
+
+SWORD_NAMESPACE_START
+
+PLAINHTML::PLAINHTML()
+{
+}
+
+
+char PLAINHTML::processText(SWBuf &text, const SWKey *key, const SWModule *module)
+{
+ int count = 0;
+
+ SWBuf orig = text;
+ const char *from = orig.c_str();
+ for (text = ""; *from; from++)
+ {
+ if ((*from == '\n') && (from[1] == '\n')) // two newlinea are a paragraph
+ {
+ text += "<P>";
+ from++;
+ continue;
+ } else {
+ if ((*from == '\n')) // && (from[1] != '\n')) // only one new line
+ {
+ text += "<BR>";
+ continue;
+ }
+ }
+
+ if (*from == '{') { //footnote start
+ text += "<FONT COLOR=\"#80000\"><SMALL> (";
+ continue;
+ }
+ else if (*from == '}') //footnote end
+ {
+ text += ") </SMALL></FONT>";
+ continue;
+ }
+ else if (*from == '<') {
+ text += "&lt;";
+ continue;
+ }
+ else if (*from == '>') {
+ text += "&gt;";
+ continue;
+ }
+ else if (*from == '&') {
+ text += "&amp;";
+ continue;
+ }
+ else if ((*from == ' ') && (count > 5000))
+ {
+ text += "<WBR>";
+ count = 0;
+ continue;
+ }
+
+ text += *from;
+ count++;
+ }
+ return 0;
+}
+
+SWORD_NAMESPACE_END
diff --git a/src/modules/filters/rtfhtml.cpp b/src/modules/filters/rtfhtml.cpp
new file mode 100644
index 0000000..cac5068
--- /dev/null
+++ b/src/modules/filters/rtfhtml.cpp
@@ -0,0 +1,81 @@
+/***************************************************************************
+ rtfhtml.cpp - description
+ -------------------
+ begin : Wed Oct 13 1999
+ copyright : (C) 1999 by The team of BibleTime
+ email : info@bibletime.de
+ ***************************************************************************/
+
+/***************************************************************************
+ * *
+ * This program is free software; you can redistribute it and/or modify *
+ * it under the terms of the GNU General Public License as published by *
+ * the Free Software Foundation; either version 2 of the License, or *
+ * (at your option) any later version. *
+ * *
+ ***************************************************************************/
+
+#include <stdlib.h>
+#include <rtfhtml.h>
+#include <swbuf.h>
+
+SWORD_NAMESPACE_START
+
+RTFHTML::RTFHTML() {
+
+}
+
+
+char RTFHTML::processText(SWBuf &text, const SWKey *key, const SWModule *module)
+{
+ bool center = false;
+
+ const char *from;
+ SWBuf orig = text;
+ from = orig.c_str();
+ for (text = ""; *from; from++)
+ {
+ if (*from == '\\') // a RTF command
+ {
+ if ( !strncmp(from+1, "pard", 4) )
+ //(from[1] == 'p') && (from[2] == 'a') && (from[3] == 'r') && (from[4] == 'd'))
+ { // switch all modifiers off
+ if (center)
+ {
+ text += "</CENTER>";
+ center = false;
+ }
+ from += 4;
+ continue;
+ }
+ if ( !strncmp(from+1, "par", 3) )
+ //(from[1] == 'p') && (from[2] == 'a') && (from[3] == 'r'))
+ {
+ text += "<P>\n";
+ from += 3;
+ continue;
+ }
+ if (from[1] == ' ')
+ {
+ from += 1;
+ continue;
+ }
+ if ( !strncmp(from+1, "qc", 2) )
+ //(from[1] == 'q') && (from[2] == 'c')) // center on
+ {
+ if (!center)
+ {
+ text += "<CENTER>";
+ center = true;
+ }
+ from += 2;
+ continue;
+ }
+ }
+
+ text += *from;
+ }
+ return 0;
+}
+
+SWORD_NAMESPACE_END
diff --git a/src/modules/filters/scsuutf8.cpp b/src/modules/filters/scsuutf8.cpp
new file mode 100644
index 0000000..0daff4a
--- /dev/null
+++ b/src/modules/filters/scsuutf8.cpp
@@ -0,0 +1,226 @@
+/******************************************************************************
+ *
+ * SCSUUTF8 - SWFilter descendant to convert a SCSU character to UTF-8
+ *
+ */
+
+
+/* This class is based on:
+ * http://czyborra.com/scsu/scsu.c written by Roman Czyborra@dds.nl
+ * on Andrea's balcony in North Amsterdam on 1998-08-04
+ * Thanks to Richard Verhoeven <rcb5@win.tue.nl> for his suggestion
+ * to correct the haphazard "if" after UQU to "else if" on 1998-10-01
+ *
+ * This is a deflator to UTF-8 output for input compressed in SCSU,
+ * the (Reuters) Standard Compression Scheme for Unicode as described
+ * in http://www.unicode.org/unicode/reports/tr6.html
+ */
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <swmodule.h>
+
+#include <scsuutf8.h>
+
+SWORD_NAMESPACE_START
+
+SCSUUTF8::SCSUUTF8() {
+}
+
+
+unsigned char* SCSUUTF8::UTF8Output(unsigned long uchar, unsigned char* text)
+{
+ /* join UTF-16 surrogates without any pairing sanity checks */
+
+ static int d;
+
+ if (uchar >= 0xd800 && uchar <= 0xdbff) { d = uchar & 0x3f; return text; }
+ if (uchar >= 0xdc00 && uchar <= 0xdfff) { uchar = uchar + 0x2400 + d * 0x400; }
+
+ /* output one character as UTF-8 multibyte sequence */
+
+ if (uchar < 0x80) {
+ *text++ = c;
+ }
+ else if (uchar < 0x800) {
+ *text++ = 0xc0 | uchar >> 6;
+ *text++ = 0x80 | (uchar & 0x3f);
+ }
+ else if (uchar < 0x10000) {
+ *text++ = 0xe0 | uchar >> 12;
+ *text++ = 0x80 | (uchar >> 6 & 0x3f);
+ *text++ = 0x80 | (uchar & 0x3f);
+ }
+ else if (uchar < 0x200000) {
+ *text++ = 0xf0 | uchar >> 18;
+ *text++ = 0x80 | (uchar >> 12 & 0x3f);
+ *text++ = 0x80 | (uchar >> 6 & 0x3f);
+ *text++ = 0x80 | (uchar & 0x3f);
+ }
+
+ return text;
+}
+
+char SCSUUTF8::processText(SWBuf &text, const SWKey *key, const SWModule *module) {
+/*
+ unsigned char *to, *from;
+ unsigned long buflen = len * FILTERPAD;
+ char active = 0, mode = 0;
+ if ((unsigned long)key < 2) // hack, we're en(1)/de(0)ciphering
+ return -1;
+
+ static unsigned short start[8] = {0x0000,0x0080,0x0100,0x0300,0x2000,0x2080,0x2100,0x3000};
+ static unsigned short slide[8] = {0x0080,0x00C0,0x0400,0x0600,0x0900,0x3040,0x30A0,0xFF00};
+ static unsigned short win[256] = {
+ 0x0000, 0x0080, 0x0100, 0x0180, 0x0200, 0x0280, 0x0300, 0x0380,
+ 0x0400, 0x0480, 0x0500, 0x0580, 0x0600, 0x0680, 0x0700, 0x0780,
+ 0x0800, 0x0880, 0x0900, 0x0980, 0x0A00, 0x0A80, 0x0B00, 0x0B80,
+ 0x0C00, 0x0C80, 0x0D00, 0x0D80, 0x0E00, 0x0E80, 0x0F00, 0x0F80,
+ 0x1000, 0x1080, 0x1100, 0x1180, 0x1200, 0x1280, 0x1300, 0x1380,
+ 0x1400, 0x1480, 0x1500, 0x1580, 0x1600, 0x1680, 0x1700, 0x1780,
+ 0x1800, 0x1880, 0x1900, 0x1980, 0x1A00, 0x1A80, 0x1B00, 0x1B80,
+ 0x1C00, 0x1C80, 0x1D00, 0x1D80, 0x1E00, 0x1E80, 0x1F00, 0x1F80,
+ 0x2000, 0x2080, 0x2100, 0x2180, 0x2200, 0x2280, 0x2300, 0x2380,
+ 0x2400, 0x2480, 0x2500, 0x2580, 0x2600, 0x2680, 0x2700, 0x2780,
+ 0x2800, 0x2880, 0x2900, 0x2980, 0x2A00, 0x2A80, 0x2B00, 0x2B80,
+ 0x2C00, 0x2C80, 0x2D00, 0x2D80, 0x2E00, 0x2E80, 0x2F00, 0x2F80,
+ 0x3000, 0x3080, 0x3100, 0x3180, 0x3200, 0x3280, 0x3300, 0x3800,
+ 0xE000, 0xE080, 0xE100, 0xE180, 0xE200, 0xE280, 0xE300, 0xE380,
+ 0xE400, 0xE480, 0xE500, 0xE580, 0xE600, 0xE680, 0xE700, 0xE780,
+ 0xE800, 0xE880, 0xE900, 0xE980, 0xEA00, 0xEA80, 0xEB00, 0xEB80,
+ 0xEC00, 0xEC80, 0xED00, 0xED80, 0xEE00, 0xEE80, 0xEF00, 0xEF80,
+ 0xF000, 0xF080, 0xF100, 0xF180, 0xF200, 0xF280, 0xF300, 0xF380,
+ 0xF400, 0xF480, 0xF500, 0xF580, 0xF600, 0xF680, 0xF700, 0xF780,
+ 0xF800, 0xF880, 0xF900, 0xF980, 0xFA00, 0xFA80, 0xFB00, 0xFB80,
+ 0xFC00, 0xFC80, 0xFD00, 0xFD80, 0xFE00, 0xFE80, 0xFF00, 0xFF80,
+ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+ 0x0000, 0x00C0, 0x0250, 0x0370, 0x0530, 0x3040, 0x30A0, 0xFF60
+ };
+
+ if (!len)
+ return 0;
+
+ memmove(&text[buflen - len], text, len);
+ from = (unsigned char*)&text[buflen - len];
+ to = (unsigned char *)text;
+
+ // -------------------------------
+
+ for (int i = 0; i < len;) {
+
+
+ if (i >= len) break;
+ c = from[i++];
+
+ if (c >= 0x80)
+ {
+ to = UTF8Output (c - 0x80 + slide[active], to);
+ }
+ else if (c >= 0x20 && c <= 0x7F)
+ {
+ to = UTF8Output (c, to);
+ }
+ else if (c == 0x0 || c == 0x9 || c == 0xA || c == 0xC || c == 0xD)
+ {
+ to = UTF8Output (c, to);
+ }
+ else if (c >= 0x1 && c <= 0x8) // SQn
+ {
+ if (i >= len) break;
+ d = from[i++]; // single quote
+
+ to = UTF8Output (d < 0x80 ? d + start [c - 0x1] :
+ d - 0x80 + slide [c - 0x1], to);
+ }
+ else if (c >= 0x10 && c <= 0x17) // SCn
+ {
+ active = c - 0x10; // change window
+ }
+ else if (c >= 0x18 && c <= 0x1F) // SDn
+ {
+ active = c - 0x18; // define window
+ if (i >= len) break;
+ slide [active] = win [from[i++]];
+ }
+ else if (c == 0xB) // SDX
+ {
+ if (i >= len) break;
+ c = from[i++];
+
+ if (i >= len) break;
+ d = from[i++];
+
+ slide [active = c>>5] = 0x10000 + (((c & 0x1F) << 8 | d) << 7);
+ }
+ else if (c == 0xE) // SQU
+ {
+ if (i >= len) break;
+ c = from[i++]; // SQU
+
+ if (i >= len) break;
+ to = UTF8Output (c << 8 | from[i++], to);
+ }
+ else if (c == 0xF) // SCU
+ {
+ mode = 1; // change to Unicode mode
+
+ while (mode)
+ {
+ if (i >= len) break;
+ c = from[i++];
+
+ if (c <= 0xDF || c >= 0xF3)
+ {
+ if (i >= len) break;
+ to = UTF8Output (c << 8 | from[i++], to);
+ }
+ else if (c == 0xF0) // UQU
+ {
+ if (i >= len) break;
+ c = from[i++];
+
+ if (i >= len) break;
+ to = UTF8Output (c << 8 | from[i++], to);
+ }
+ else if (c >= 0xE0 && c <= 0xE7) // UCn
+ {
+ active = c - 0xE0; mode = 0;
+ }
+ else if (c >= 0xE8 && c <= 0xEF) // UDn
+ {
+ if (i >= len) break;
+ slide [active=c-0xE8] = win [from[i++]]; mode = 0;
+ }
+ else if (c == 0xF1) // UDX
+ {
+ if (i >= len) break;
+ c = from[i++];
+
+ if (i >= len) break;
+ d = from[i++];
+
+ slide [active = c>>5] =
+ 0x10000 + (((c & 0x1F) << 8 | d) << 7); mode = 0;
+ }
+ }
+ }
+
+
+ }
+
+ *to++ = 0;
+ *to = 0;
+*/
+ return 0;
+}
+
+SWORD_NAMESPACE_END
diff --git a/src/modules/filters/swbasicfilter.cpp b/src/modules/filters/swbasicfilter.cpp
new file mode 100644
index 0000000..ef10e45
--- /dev/null
+++ b/src/modules/filters/swbasicfilter.cpp
@@ -0,0 +1,406 @@
+/******************************************************************************
+ * swbasicfilter.h - definition of class SWBasicFilter. An SWFilter
+ * impl that provides some basic methods that
+ * many filters will need and can use as a starting
+ * point.
+ *
+ * $Id: swbasicfilter.cpp 2167 2008-05-16 23:23:39Z scribe $
+ *
+ * Copyright 2001 CrossWire Bible Society (http://www.crosswire.org)
+ * CrossWire Bible Society
+ * P. O. Box 2528
+ * Tempe, AZ 85280-2528
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation version 2.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ */
+
+#include <stdlib.h>
+#include <swbasicfilter.h>
+#include <stdio.h>
+#include <stdarg.h>
+#include <utilstr.h>
+#include <stringmgr.h>
+#include <map>
+#include <set>
+
+SWORD_NAMESPACE_START
+
+typedef std::map<SWBuf, SWBuf> DualStringMap;
+typedef std::set<SWBuf> StringSet;
+
+// I hate bridge patterns but this isolates std::map from a ton of filters
+class SWBasicFilter::Private {
+public:
+ DualStringMap tokenSubMap;
+ DualStringMap escSubMap;
+ StringSet escPassSet;
+};
+
+const char SWBasicFilter::INITIALIZE = 1;
+const char SWBasicFilter::PRECHAR = 2;
+const char SWBasicFilter::POSTCHAR = 4;
+const char SWBasicFilter::FINALIZE = 8;
+
+SWBasicFilter::SWBasicFilter() {
+
+ p = new Private;
+
+ processStages = 0;
+ tokenStart = 0;
+ tokenEnd = 0;
+ escStart = 0;
+ escEnd = 0;
+
+ setTokenStart("<");
+ setTokenEnd(">");
+ setEscapeStart("&");
+ setEscapeEnd(";");
+
+ escStringCaseSensitive = false;
+ tokenCaseSensitive = false;
+ passThruUnknownToken = false;
+ passThruUnknownEsc = false;
+ passThruNumericEsc = false;
+}
+
+
+SWBasicFilter::~SWBasicFilter() {
+ if (tokenStart)
+ delete [] tokenStart;
+
+ if (tokenEnd)
+ delete [] tokenEnd;
+
+ if (escStart)
+ delete [] escStart;
+
+ if (escEnd)
+ delete [] escEnd;
+
+ delete p;
+}
+
+
+void SWBasicFilter::setPassThruUnknownToken(bool val) {
+ passThruUnknownToken = val;
+}
+
+
+void SWBasicFilter::setPassThruUnknownEscapeString(bool val) {
+ passThruUnknownEsc = val;
+}
+
+void SWBasicFilter::setPassThruNumericEscapeString(bool val) {
+ passThruUnknownEsc = val;
+}
+
+
+void SWBasicFilter::setTokenCaseSensitive(bool val) {
+ tokenCaseSensitive = val;
+}
+
+
+void SWBasicFilter::setEscapeStringCaseSensitive(bool val) {
+ escStringCaseSensitive = val;
+}
+
+
+void SWBasicFilter::addTokenSubstitute(const char *findString, const char *replaceString) {
+ char *buf = 0;
+
+ if (!tokenCaseSensitive) {
+ stdstr(&buf, findString);
+ toupperstr(buf);
+ p->tokenSubMap[buf] = replaceString;
+ delete [] buf;
+ }
+ else p->tokenSubMap[findString] = replaceString;
+}
+
+
+void SWBasicFilter::removeTokenSubstitute(const char *findString) {
+ if (p->tokenSubMap.find(findString) != p->tokenSubMap.end()) {
+ p->tokenSubMap.erase( p->tokenSubMap.find(findString) );
+ }
+}
+
+void SWBasicFilter::addAllowedEscapeString(const char *findString) {
+ char *buf = 0;
+
+ if (!escStringCaseSensitive) {
+ stdstr(&buf, findString);
+ toupperstr(buf);
+ p->escPassSet.insert(StringSet::value_type(buf));
+ delete [] buf;
+ }
+ else p->escPassSet.insert(StringSet::value_type(findString));
+}
+
+void SWBasicFilter::removeAllowedEscapeString(const char *findString) {
+ if (p->escPassSet.find(findString) != p->escPassSet.end()) {
+ p->escPassSet.erase( p->escPassSet.find(findString) );
+ }
+}
+
+void SWBasicFilter::addEscapeStringSubstitute(const char *findString, const char *replaceString) {
+ char *buf = 0;
+
+ if (!escStringCaseSensitive) {
+ stdstr(&buf, findString);
+ toupperstr(buf);
+ p->escSubMap.insert(DualStringMap::value_type(buf, replaceString));
+ delete [] buf;
+ }
+ else p->escSubMap.insert(DualStringMap::value_type(findString, replaceString));
+}
+
+void SWBasicFilter::removeEscapeStringSubstitute(const char *findString) {
+ if (p->escSubMap.find(findString) != p->escSubMap.end()) {
+ p->escSubMap.erase( p->escSubMap.find(findString) );
+ }
+}
+
+bool SWBasicFilter::substituteToken(SWBuf &buf, const char *token) {
+ DualStringMap::iterator it;
+
+ if (!tokenCaseSensitive) {
+ char *tmp = 0;
+ stdstr(&tmp, token);
+ toupperstr(tmp);
+ it = p->tokenSubMap.find(tmp);
+ delete [] tmp;
+ } else
+ it = p->tokenSubMap.find(token);
+
+ if (it != p->tokenSubMap.end()) {
+ buf += it->second.c_str();
+ return true;
+ }
+ return false;
+}
+
+void SWBasicFilter::appendEscapeString(SWBuf &buf, const char *escString) {
+ buf += escStart;
+ buf += escString;
+ buf += escEnd;
+}
+
+bool SWBasicFilter::passAllowedEscapeString(SWBuf &buf, const char *escString) {
+ StringSet::iterator it;
+
+ if (!escStringCaseSensitive) {
+ char *tmp = 0;
+ stdstr(&tmp, escString);
+ toupperstr(tmp);
+ it = p->escPassSet.find(tmp);
+ delete [] tmp;
+ } else
+ it = p->escPassSet.find(escString);
+
+ if (it != p->escPassSet.end()) {
+ appendEscapeString(buf, escString);
+ return true;
+ }
+
+ return false;
+}
+
+bool SWBasicFilter::handleNumericEscapeString(SWBuf &buf, const char *escString) {
+ if (passThruNumericEsc) {
+ appendEscapeString(buf, escString);
+ return true;
+ }
+ return false;
+}
+
+bool SWBasicFilter::substituteEscapeString(SWBuf &buf, const char *escString) {
+ DualStringMap::iterator it;
+
+ if (*escString == '#') {
+ return handleNumericEscapeString(buf, escString);
+ }
+
+ if (passAllowedEscapeString(buf, escString)) {
+ return true;
+ }
+
+ if (!escStringCaseSensitive) {
+ char *tmp = 0;
+ stdstr(&tmp, escString);
+ toupperstr(tmp);
+ it = p->escSubMap.find(tmp);
+ delete [] tmp;
+ } else
+ it = p->escSubMap.find(escString);
+
+ if (it != p->escSubMap.end()) {
+ buf += it->second.c_str();
+ return true;
+ }
+ return false;
+}
+
+
+bool SWBasicFilter::handleToken(SWBuf &buf, const char *token, BasicFilterUserData *userData) {
+ return substituteToken(buf, token);
+}
+
+
+bool SWBasicFilter::handleEscapeString(SWBuf &buf, const char *escString, BasicFilterUserData *userData) {
+ return substituteEscapeString(buf, escString);
+}
+
+
+void SWBasicFilter::setEscapeStart(const char *escStart) {
+ stdstr(&(this->escStart), escStart);
+ escStartLen = strlen(escStart);
+}
+
+
+void SWBasicFilter::setEscapeEnd(const char *escEnd) {
+ stdstr(&(this->escEnd), escEnd);
+ escEndLen = strlen(escEnd);
+}
+
+
+void SWBasicFilter::setTokenStart(const char *tokenStart) {
+ stdstr(&(this->tokenStart), tokenStart);
+ tokenStartLen = strlen(tokenStart);
+}
+
+
+void SWBasicFilter::setTokenEnd(const char *tokenEnd) {
+ stdstr(&(this->tokenEnd), tokenEnd);
+ tokenEndLen = strlen(tokenEnd);
+}
+
+
+char SWBasicFilter::processText(SWBuf &text, const SWKey *key, const SWModule *module) {
+ char *from;
+ char token[4096];
+ int tokpos = 0;
+ bool intoken = false;
+ bool inEsc = false;
+ int escStartPos = 0, escEndPos = 0;
+ int tokenStartPos = 0, tokenEndPos = 0;
+ SWBuf lastTextNode;
+ BasicFilterUserData *userData = createUserData(module, key);
+
+ SWBuf orig = text;
+ from = orig.getRawData();
+ text = "";
+
+ if (processStages & INITIALIZE) {
+ if (processStage(INITIALIZE, text, from, userData)) { // processStage handled it all
+ delete userData;
+ return 0;
+ }
+ }
+
+ for (;*from; from++) {
+
+ if (processStages & PRECHAR) {
+ if (processStage(PRECHAR, text, from, userData)) // processStage handled this char
+ continue;
+ }
+
+ if (*from == tokenStart[tokenStartPos]) {
+ if (tokenStartPos == (tokenStartLen - 1)) {
+ intoken = true;
+ tokpos = 0;
+ token[0] = 0;
+ token[1] = 0;
+ token[2] = 0;
+ inEsc = false;
+ }
+ else tokenStartPos++;
+ continue;
+ }
+
+ if (*from == escStart[escStartPos]) {
+ if (escStartPos == (escStartLen - 1)) {
+ intoken = true;
+ tokpos = 0;
+ token[0] = 0;
+ token[1] = 0;
+ token[2] = 0;
+ inEsc = true;
+ }
+ else escStartPos++;
+ continue;
+ }
+
+ if (inEsc) {
+ if (*from == escEnd[escEndPos]) {
+ if (escEndPos == (escEndLen - 1)) {
+ intoken = inEsc = false;
+ userData->lastTextNode = lastTextNode;
+
+ if (!userData->suspendTextPassThru) { //if text through is disabled no tokens should pass, too
+ if ((!handleEscapeString(text, token, userData)) && (passThruUnknownEsc)) {
+ appendEscapeString(text, token);
+ }
+ }
+ escEndPos = escStartPos = tokenEndPos = tokenStartPos = 0;
+ lastTextNode = "";
+ continue;
+ }
+ }
+ }
+
+ if (!inEsc) {
+ if (*from == tokenEnd[tokenEndPos]) {
+ if (tokenEndPos == (tokenEndLen - 1)) {
+ intoken = false;
+ userData->lastTextNode = lastTextNode;
+ if ((!handleToken(text, token, userData)) && (passThruUnknownToken)) {
+ text += tokenStart;
+ text += token;
+ text += tokenEnd;
+ }
+ escEndPos = escStartPos = tokenEndPos = tokenStartPos = 0;
+ lastTextNode = "";
+ continue;
+ }
+ }
+ }
+
+ if (intoken) {
+ if (tokpos < 4090) {
+ token[tokpos++] = *from;
+ token[tokpos+2] = 0;
+ }
+ }
+ else {
+ if ((!userData->supressAdjacentWhitespace) || (*from != ' ')) {
+ if (!userData->suspendTextPassThru) {
+ text.append(*from);
+ userData->lastSuspendSegment.size(0);
+ }
+ else userData->lastSuspendSegment.append(*from);
+ lastTextNode.append(*from);
+ }
+ userData->supressAdjacentWhitespace = false;
+ }
+
+ if (processStages & POSTCHAR)
+ processStage(POSTCHAR, text, from, userData);
+
+ }
+
+ if (processStages & FINALIZE)
+ processStage(FINALIZE, text, from, userData);
+
+ delete userData;
+ return 0;
+}
+
+SWORD_NAMESPACE_END
diff --git a/src/modules/filters/swoptfilter.cpp b/src/modules/filters/swoptfilter.cpp
new file mode 100644
index 0000000..6921190
--- /dev/null
+++ b/src/modules/filters/swoptfilter.cpp
@@ -0,0 +1,47 @@
+/******************************************************************************
+ *
+ * swoptfilter - SWFilter descendant and base class for all option filters
+ */
+
+
+#include <swoptfilter.h>
+#include <utilstr.h>
+
+SWORD_NAMESPACE_START
+
+
+SWOptionFilter::SWOptionFilter() {
+ static StringList empty;
+ static const char *empty2 = "";
+ optName = empty2;
+ optTip = empty2;
+ optValues = &empty;
+}
+
+SWOptionFilter::SWOptionFilter(const char *oName, const char *oTip, const StringList *oValues) {
+ optName = oName;
+ optTip = oTip;
+ optValues = oValues;
+}
+
+
+SWOptionFilter::~SWOptionFilter() {
+}
+
+
+void SWOptionFilter::setOptionValue(const char *ival) {
+ for (StringList::const_iterator loop = optValues->begin(); loop != optValues->end(); loop++) {
+ if (!stricmp(loop->c_str(), ival)) {
+ optionValue = *loop;
+ option = (!strnicmp(ival, "On", 2)); // convenience for boolean filters
+ break;
+ }
+ }
+}
+
+const char *SWOptionFilter::getOptionValue() {
+ return optionValue;
+}
+
+
+SWORD_NAMESPACE_END
diff --git a/src/modules/filters/teihtmlhref.cpp b/src/modules/filters/teihtmlhref.cpp
new file mode 100644
index 0000000..1d213f4
--- /dev/null
+++ b/src/modules/filters/teihtmlhref.cpp
@@ -0,0 +1,205 @@
+/***************************************************************************
+ teirtf.cpp - TEI to HTMLHREF filter
+ -------------------
+ begin : 2006-07-03
+ copyright : 2006 by CrossWire Bible Society
+ ***************************************************************************/
+
+/***************************************************************************
+ * *
+ * This program is free software; you can redistribute it and/or modify *
+ * it under the terms of the GNU General Public License as published by *
+ * the Free Software Foundation; either version 2 of the License, or *
+ * (at your option) any later version. *
+ * *
+ ***************************************************************************/
+
+#include <stdlib.h>
+#include <ctype.h>
+#include <teihtmlhref.h>
+#include <utilxml.h>
+#include <swmodule.h>
+#include <url.h>
+
+
+SWORD_NAMESPACE_START
+
+
+TEIHTMLHREF::MyUserData::MyUserData(const SWModule *module, const SWKey *key) : BasicFilterUserData(module, key) {
+ BiblicalText = false;
+ if (module) {
+ version = module->Name();
+ BiblicalText = (!strcmp(module->Type(), "Biblical Texts"));
+ }
+}
+
+
+TEIHTMLHREF::TEIHTMLHREF() {
+ setTokenStart("<");
+ setTokenEnd(">");
+
+ setEscapeStart("&");
+ setEscapeEnd(";");
+
+ setEscapeStringCaseSensitive(true);
+
+ addAllowedEscapeString("quot");
+ addAllowedEscapeString("apos");
+ addAllowedEscapeString("amp");
+ addAllowedEscapeString("lt");
+ addAllowedEscapeString("gt");
+
+ setTokenCaseSensitive(true);
+}
+
+bool TEIHTMLHREF::handleToken(SWBuf &buf, const char *token, BasicFilterUserData *userData) {
+ // manually process if it wasn't a simple substitution
+ if (!substituteToken(buf, token)) {
+ MyUserData *u = (MyUserData *)userData;
+ XMLTag tag(token);
+
+ if (!strcmp(tag.getName(), "p")) {
+ if ((!tag.isEndTag()) && (!tag.isEmpty())) { // non-empty start tag
+ buf += "<!P><br />";
+ }
+ else if (tag.isEndTag()) { // end tag
+ buf += "<!/P><br />";
+ //userData->supressAdjacentWhitespace = true;
+ }
+ else { // empty paragraph break marker
+ buf += "<!P><br />";
+ //userData->supressAdjacentWhitespace = true;
+ }
+ }
+
+ // <hi>
+ else if (!strcmp(tag.getName(), "hi")) {
+ if ((!tag.isEndTag()) && (!tag.isEmpty())) {
+ SWBuf rend = tag.getAttribute("rend");
+
+ u->lastHi = rend;
+ if (rend == "ital")
+ buf += "<i>";
+ else if (rend == "bold")
+ buf += "<b>";
+ else if (rend == "sup")
+ buf += "<small><sup>";
+
+ }
+ else if (tag.isEndTag()) {
+ SWBuf rend = u->lastHi;
+ if (rend == "ital")
+ buf += "</i>";
+ else if (rend == "bold")
+ buf += "</b>";
+ else if (rend == "sup")
+ buf += "</sup></small>";
+ }
+ }
+
+ // <entryFree>
+ else if (!strcmp(tag.getName(), "entryFree")) {
+ if ((!tag.isEndTag()) && (!tag.isEmpty())) {
+ SWBuf n = tag.getAttribute("n");
+ if (n != "") {
+ buf += "<b>";
+ buf += n;
+ buf += "</b>";
+ }
+ }
+ }
+
+ // <sense>
+ else if (!strcmp(tag.getName(), "sense")) {
+ if ((!tag.isEndTag()) && (!tag.isEmpty())) {
+ SWBuf n = tag.getAttribute("n");
+ if (n != "") {
+ buf += "<br /><b>";
+ buf += n;
+ buf += "</b>";
+ }
+ }
+ }
+
+ // <div>
+ else if (!strcmp(tag.getName(), "div")) {
+
+ if ((!tag.isEndTag()) && (!tag.isEmpty())) {
+ buf += "<!P>";
+ }
+ else if (tag.isEndTag()) {
+ }
+ }
+
+ // <pos>, <gen>, <case>, <gram>, <number>, <mood>, <pron>, <def>
+ else if (!strcmp(tag.getName(), "pos") ||
+ !strcmp(tag.getName(), "gen") ||
+ !strcmp(tag.getName(), "case") ||
+ !strcmp(tag.getName(), "gram") ||
+ !strcmp(tag.getName(), "number") ||
+ !strcmp(tag.getName(), "pron") ||
+ !strcmp(tag.getName(), "def")) {
+ if ((!tag.isEndTag()) && (!tag.isEmpty())) {
+ buf += "<i>";
+ }
+ else if (tag.isEndTag()) {
+ buf += "</i>";
+ }
+ }
+
+ // <tr>
+ else if (!strcmp(tag.getName(), "tr")) {
+ if ((!tag.isEndTag()) && (!tag.isEmpty())) {
+ buf += "<i>";
+ }
+ else if (tag.isEndTag()) {
+ buf += "</i>";
+ }
+ }
+
+ // orth
+ else if (!strcmp(tag.getName(), "orth")) {
+ if ((!tag.isEndTag()) && (!tag.isEmpty())) {
+ buf += "<b>";
+ }
+ else if (tag.isEndTag()) {
+ buf += "</b>";
+ }
+ }
+
+ // <etym>, <usg>
+ else if (!strcmp(tag.getName(), "etym") ||
+ !strcmp(tag.getName(), "usg")) {
+ // do nothing here
+ }
+
+ // <note> tag
+ else if (!strcmp(tag.getName(), "note")) {
+ if (!tag.isEndTag()) {
+ if (!tag.isEmpty()) {
+ u->suspendTextPassThru = true;
+ }
+ }
+ if (tag.isEndTag()) {
+ SWBuf footnoteNumber = tag.getAttribute("swordFootnote");
+
+ buf.appendFormatted("<a href=\"passagestudy.jsp?action=showNote&type=n&value=%s&module=%s&passage=%s\"><small><sup>*n</sup></small></a>",
+ URL::encode(footnoteNumber.c_str()).c_str(),
+ URL::encode(u->version.c_str()).c_str(),
+ URL::encode(u->key->getText()).c_str());
+
+ u->suspendTextPassThru = false;
+ }
+ }
+
+ else {
+ return false; // we still didn't handle token
+ }
+
+ }
+ return true;
+}
+
+
+SWORD_NAMESPACE_END
+
diff --git a/src/modules/filters/teiplain.cpp b/src/modules/filters/teiplain.cpp
new file mode 100644
index 0000000..c721d84
--- /dev/null
+++ b/src/modules/filters/teiplain.cpp
@@ -0,0 +1,116 @@
+/***************************************************************************
+ teiplain.cpp - TEI to Plaintext filter
+ -------------------
+ begin : 2006-07-05
+ copyright : 2006 by CrossWire Bible Society
+ ***************************************************************************/
+
+/***************************************************************************
+ * *
+ * This program is free software; you can redistribute it and/or modify *
+ * it under the terms of the GNU General Public License as published by *
+ * the Free Software Foundation; either version 2 of the License, or *
+ * (at your option) any later version. *
+ * *
+ ***************************************************************************/
+
+#include <stdlib.h>
+#include <teiplain.h>
+#include <ctype.h>
+
+SWORD_NAMESPACE_START
+
+TEIPlain::TEIPlain() {
+ setTokenStart("<");
+ setTokenEnd(">");
+
+ setEscapeStart("&");
+ setEscapeEnd(";");
+
+ setEscapeStringCaseSensitive(true);
+
+ addEscapeStringSubstitute("amp", "&");
+ addEscapeStringSubstitute("apos", "'");
+ addEscapeStringSubstitute("lt", "<");
+ addEscapeStringSubstitute("gt", ">");
+ addEscapeStringSubstitute("quot", "\"");
+
+ setTokenCaseSensitive(true);
+}
+
+
+bool TEIPlain::handleToken(SWBuf &buf, const char *token, BasicFilterUserData *userData) {
+ // manually process if it wasn't a simple substitution
+ if (!substituteToken(buf, token)) {
+ //MyUserData *u = (MyUserData *)userData;
+ XMLTag tag(token);
+
+ // <p> paragraph tag
+ if (!strcmp(tag.getName(), "p")) {
+ if ((!tag.isEndTag()) && (!tag.isEmpty())) { // non-empty start tag
+ buf += "\n";
+ }
+ else if (tag.isEndTag()) { // end tag
+ buf += "\n";
+ userData->supressAdjacentWhitespace = true;
+ }
+ else { // empty paragraph break marker
+ buf += "\n\n";
+ userData->supressAdjacentWhitespace = true;
+ }
+ }
+
+ // <entryFree>
+ else if (!strcmp(tag.getName(), "entryFree")) {
+ SWBuf n = tag.getAttribute("n");
+ if ((!tag.isEndTag()) && (!tag.isEmpty())) {
+ if (n != "") {
+ buf += n;
+ buf += ". ";
+ }
+ }
+ }
+
+ // <sense>
+ else if (!strcmp(tag.getName(), "sense")) {
+ SWBuf n = tag.getAttribute("n");
+ if ((!tag.isEndTag()) && (!tag.isEmpty())) {
+ if (n != "") {
+ buf += n;
+ buf += ". ";
+ }
+ }
+ else if (tag.isEndTag()) {
+ buf += "\n";
+ }
+ }
+
+ // <div>
+ else if (!strcmp(tag.getName(), "div")) {
+
+ if ((!tag.isEndTag()) && (!tag.isEmpty())) {
+ buf.append("\n\n\n");
+ }
+ else if (tag.isEndTag()) {
+ }
+ }
+
+ // <etym>
+ else if (!strcmp(tag.getName(), "etym")) {
+ if ((!tag.isEndTag()) && (!tag.isEmpty())) {
+ buf += "[";
+ }
+ else if (tag.isEndTag()) {
+ buf += "]";
+ }
+ }
+
+ else {
+ return false; // we still didn't handle token
+ }
+ }
+ return true;
+}
+
+
+SWORD_NAMESPACE_END
diff --git a/src/modules/filters/teirtf.cpp b/src/modules/filters/teirtf.cpp
new file mode 100644
index 0000000..006f099
--- /dev/null
+++ b/src/modules/filters/teirtf.cpp
@@ -0,0 +1,182 @@
+/***************************************************************************
+ teirtf.cpp - TEI to RTF filter
+ -------------------
+ begin : 2006-07-03
+ copyright : 2006 by CrossWire Bible Society
+ ***************************************************************************/
+
+/***************************************************************************
+ * *
+ * This program is free software; you can redistribute it and/or modify *
+ * it under the terms of the GNU General Public License as published by *
+ * the Free Software Foundation; either version 2 of the License, or *
+ * (at your option) any later version. *
+ * *
+ ***************************************************************************/
+
+#include <stdlib.h>
+#include <ctype.h>
+#include <teirtf.h>
+#include <utilxml.h>
+#include <swmodule.h>
+#include <versekey.h>
+
+SWORD_NAMESPACE_START
+
+
+TEIRTF::MyUserData::MyUserData(const SWModule *module, const SWKey *key) : BasicFilterUserData(module, key) {
+ BiblicalText = false;
+ if (module) {
+ version = module->Name();
+ BiblicalText = (!strcmp(module->Type(), "Biblical Texts"));
+ }
+}
+
+
+TEIRTF::TEIRTF() {
+ setTokenStart("<");
+ setTokenEnd(">");
+
+ setEscapeStart("&");
+ setEscapeEnd(";");
+
+ setEscapeStringCaseSensitive(true);
+
+ addEscapeStringSubstitute("amp", "&");
+ addEscapeStringSubstitute("apos", "'");
+ addEscapeStringSubstitute("lt", "<");
+ addEscapeStringSubstitute("gt", ">");
+ addEscapeStringSubstitute("quot", "\"");
+
+ setTokenCaseSensitive(true);
+}
+
+
+bool TEIRTF::handleToken(SWBuf &buf, const char *token, BasicFilterUserData *userData) {
+ // manually process if it wasn't a simple substitution
+ if (!substituteToken(buf, token)) {
+ MyUserData *u = (MyUserData *)userData;
+ XMLTag tag(token);
+
+ // <p> paragraph tag
+ if (!strcmp(tag.getName(), "p")) {
+ if (!tag.isEndTag()) { // non-empty start tag
+ buf += "{\\sb100\\fi200\\par}";
+ }
+ }
+
+ // <hi>
+ else if (!strcmp(tag.getName(), "hi")) {
+ SWBuf rend = tag.getAttribute("rend");
+ if ((!tag.isEndTag()) && (!tag.isEmpty())) {
+ if (rend == "ital")
+ buf += "{\\i1 ";
+ else if (rend == "bold")
+ buf += "{\\b1 ";
+ else if (rend == "sup")
+ buf += "{\\super ";
+
+ }
+ else if (tag.isEndTag()) {
+ buf += "}";
+ }
+ }
+
+ // <entryFree>
+ else if (!strcmp(tag.getName(), "entryFree")) {
+ SWBuf n = tag.getAttribute("n");
+ if ((!tag.isEndTag()) && (!tag.isEmpty())) {
+ if (n != "") {
+ buf += "{\\b1 ";
+ buf += n;
+ buf += ". }"; }
+ }
+ }
+
+ // <sense>
+ else if (!strcmp(tag.getName(), "sense")) {
+ SWBuf n = tag.getAttribute("n");
+ if ((!tag.isEndTag()) && (!tag.isEmpty())) {
+ if (n != "") {
+ buf += "{\\sb100\\par\\b1 ";
+ buf += n;
+ buf += ". }";
+ }
+ }
+ }
+
+ // <div>
+ else if (!strcmp(tag.getName(), "div")) {
+
+ if ((!tag.isEndTag()) && (!tag.isEmpty())) {
+ buf.append("{\\pard\\sa300}");
+ }
+ else if (tag.isEndTag()) {
+ }
+ }
+
+ // <pos>, <gen>, <case>, <gram>, <number>, <mood>
+ else if (!strcmp(tag.getName(), "pos") || !strcmp(tag.getName(), "gen") || !strcmp(tag.getName(), "case") || !strcmp(tag.getName(), "gram") || !strcmp(tag.getName(), "number") || !strcmp(tag.getName(), "mood")) {
+ if ((!tag.isEndTag()) && (!tag.isEmpty())) {
+ buf += "{\\i1 ";
+ }
+ else if (tag.isEndTag()) {
+ buf += "}";
+ }
+ }
+
+ // <tr>
+ else if (!strcmp(tag.getName(), "tr")) {
+ if ((!tag.isEndTag()) && (!tag.isEmpty())) {
+ buf += "{\\i1 ";
+ }
+ else if (tag.isEndTag()) {
+ buf += "}";
+ }
+ }
+
+ // <etym>
+ else if (!strcmp(tag.getName(), "etym")) {
+ if ((!tag.isEndTag()) && (!tag.isEmpty())) {
+ buf += "[";
+ }
+ else if (tag.isEndTag()) {
+ buf += "]";
+ }
+ }
+
+ // <note> tag
+ else if (!strcmp(tag.getName(), "note")) {
+ if (!tag.isEndTag()) {
+ if (!tag.isEmpty()) {
+ SWBuf type = tag.getAttribute("type");
+
+ SWBuf footnoteNumber = tag.getAttribute("swordFootnote");
+ VerseKey *vkey;
+ // see if we have a VerseKey * or descendant
+ SWTRY {
+ vkey = SWDYNAMIC_CAST(VerseKey, u->key);
+ }
+ SWCATCH ( ... ) { }
+ if (vkey) {
+ buf.appendFormatted("{\\super <a href=\"\">*%s</a>} ", footnoteNumber.c_str());
+ }
+ u->suspendTextPassThru = true;
+ }
+ }
+ if (tag.isEndTag()) {
+ u->suspendTextPassThru = false;
+ }
+ }
+
+ else {
+ return false; // we still didn't handle token
+ }
+
+ }
+ return true;
+}
+
+
+SWORD_NAMESPACE_END
+
diff --git a/src/modules/filters/thmlfootnotes.cpp b/src/modules/filters/thmlfootnotes.cpp
new file mode 100644
index 0000000..23c43b4
--- /dev/null
+++ b/src/modules/filters/thmlfootnotes.cpp
@@ -0,0 +1,124 @@
+/******************************************************************************
+ *
+ * thmlfootnotes - SWFilter descendant to hide or show footnotes
+ * in a ThML module.
+ */
+
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <thmlfootnotes.h>
+#include <swmodule.h>
+#include <swbuf.h>
+#include <versekey.h>
+#include <utilxml.h>
+
+SWORD_NAMESPACE_START
+
+const char oName[] = "Footnotes";
+const char oTip[] = "Toggles Footnotes On and Off if they exist";
+
+const SWBuf choices[3] = {"Off", "On", ""};
+const StringList oValues(&choices[0], &choices[2]);
+
+ThMLFootnotes::ThMLFootnotes() : SWOptionFilter(oName, oTip, &oValues) {
+ setOptionValue("Off");
+}
+
+
+ThMLFootnotes::~ThMLFootnotes() {
+}
+
+
+char ThMLFootnotes::processText(SWBuf &text, const SWKey *key, const SWModule *module) {
+ SWBuf token;
+ bool intoken = false;
+ bool hide = false;
+ SWBuf tagText;
+ XMLTag startTag;
+ SWBuf refs = "";
+ int footnoteNum = 1;
+ char buf[254];
+ VerseKey parser = key->getText();
+
+ SWBuf orig = text;
+ const char *from = orig.c_str();
+
+ for (text = ""; *from; from++) {
+ if (*from == '<') {
+ intoken = true;
+ token = "";
+ continue;
+ }
+ if (*from == '>') { // process tokens
+ intoken = false;
+
+ XMLTag tag(token);
+ if (!strcmp(tag.getName(), "note")) {
+ if (!tag.isEndTag()) {
+ if (!tag.isEmpty()) {
+ refs = "";
+ startTag = tag;
+ hide = true;
+ tagText = "";
+ continue;
+ }
+ }
+ if (hide && tag.isEndTag()) {
+ if (module->isProcessEntryAttributes()) {
+ SWBuf fc = module->getEntryAttributes()["Footnote"]["count"]["value"];
+ footnoteNum = (fc.length()) ? atoi(fc.c_str()) : 0;
+ sprintf(buf, "%i", ++footnoteNum);
+ module->getEntryAttributes()["Footnote"]["count"]["value"] = buf;
+ StringList attributes = startTag.getAttributeNames();
+ for (StringList::iterator it = attributes.begin(); it != attributes.end(); it++) {
+ module->getEntryAttributes()["Footnote"][buf][it->c_str()] = startTag.getAttribute(it->c_str());
+ }
+ module->getEntryAttributes()["Footnote"][buf]["body"] = tagText;
+ startTag.setAttribute("swordFootnote", buf);
+ if ((startTag.getAttribute("type")) && (!strcmp(startTag.getAttribute("type"), "crossReference"))) {
+ if (!refs.length())
+ refs = parser.ParseVerseList(tagText.c_str(), parser, true).getRangeText();
+ module->getEntryAttributes()["Footnote"][buf]["refList"] = refs.c_str();
+ }
+ }
+ hide = false;
+ if ((option) || ((startTag.getAttribute("type") && (!strcmp(startTag.getAttribute("type"), "crossReference"))))) { // we want the tag in the text; crossReferences are handled by another filter
+ text += startTag;
+ text.append(tagText);
+ }
+ else continue;
+ }
+ }
+
+ // if not a note token, keep token in text
+ if ((!strcmp(tag.getName(), "scripRef")) && (!tag.isEndTag())) {
+ SWBuf osisRef = tag.getAttribute("passage");
+ if (refs.length())
+ refs += "; ";
+ refs += osisRef;
+ }
+ if (!hide) {
+ text += '<';
+ text.append(token);
+ text += '>';
+ }
+ else {
+ tagText += '<';
+ tagText.append(token);
+ tagText += '>';
+ }
+ continue;
+ }
+ if (intoken) { //copy token
+ token += *from;
+ }
+ else if (!hide) { //copy text which is not inside a token
+ text += *from;
+ }
+ else tagText += *from;
+ }
+ return 0;
+}
+
+SWORD_NAMESPACE_END
diff --git a/src/modules/filters/thmlgbf.cpp b/src/modules/filters/thmlgbf.cpp
new file mode 100644
index 0000000..f8703b1
--- /dev/null
+++ b/src/modules/filters/thmlgbf.cpp
@@ -0,0 +1,291 @@
+/***************************************************************************
+ thmlgbf.cpp - ThML to GBF filter
+ -------------------
+ begin : 1999-10-28
+ copyright : 2001 by CrossWire Bible Society
+ ***************************************************************************/
+
+/***************************************************************************
+ * *
+ * This program is free software; you can redistribute it and/or modify *
+ * it under the terms of the GNU General Public License as published by *
+ * the Free Software Foundation; either version 2 of the License, or *
+ * (at your option) any later version. *
+ * *
+ ***************************************************************************/
+
+#include <stdlib.h>
+#include <thmlgbf.h>
+#include <utilstr.h>
+#include <swbuf.h>
+
+SWORD_NAMESPACE_START
+
+ThMLGBF::ThMLGBF()
+{
+}
+
+
+char ThMLGBF::processText(SWBuf &text, const SWKey *key, const SWModule *module) {
+ const char *from;
+ char token[2048];
+ int tokpos = 0;
+ bool intoken = false;
+ bool ampersand = false;
+ bool sechead = false;
+ bool title = false;
+
+ SWBuf orig = text;
+ from = orig.c_str();
+
+ for (text = ""; *from; from++) {
+ if (*from == '<') {
+ intoken = true;
+ tokpos = 0;
+ token[0] = 0;
+ token[1] = 0;
+ token[2] = 0;
+ ampersand = false;
+ continue;
+ }
+ else if (*from == '&') {
+ intoken = true;
+ tokpos = 0;
+ memset(token, 0, 2048);
+ ampersand = true;
+ continue;
+ }
+ if (*from == ';' && ampersand) {
+ intoken = false;
+
+ if (!strncmp("nbsp", token, 4)) text += ' ';
+ else if (!strncmp("quot", token, 4)) text += '"';
+ else if (!strncmp("amp", token, 3)) text += '&';
+ else if (!strncmp("lt", token, 2)) text += '<';
+ else if (!strncmp("gt", token, 2)) text += '>';
+ else if (!strncmp("brvbar", token, 6)) text += '';
+ else if (!strncmp("sect", token, 4)) text += '';
+ else if (!strncmp("copy", token, 4)) text += '';
+ else if (!strncmp("laquo", token, 5)) text += '';
+ else if (!strncmp("reg", token, 3)) text += '';
+ else if (!strncmp("acute", token, 5)) text += '';
+ else if (!strncmp("para", token, 4)) text += '';
+ else if (!strncmp("raquo", token, 5)) text += '';
+
+ else if (!strncmp("Aacute", token, 6)) text += '';
+ else if (!strncmp("Agrave", token, 6)) text += '';
+ else if (!strncmp("Acirc", token, 5)) text += '';
+ else if (!strncmp("Auml", token, 4)) text += '';
+ else if (!strncmp("Atilde", token, 6)) text += '';
+ else if (!strncmp("Aring", token, 5)) text += '';
+ else if (!strncmp("aacute", token, 6)) text += '';
+ else if (!strncmp("agrave", token, 6)) text += '';
+ else if (!strncmp("acirc", token, 5)) text += '';
+ else if (!strncmp("auml", token, 4)) text += '';
+ else if (!strncmp("atilde", token, 6)) text += '';
+ else if (!strncmp("aring", token, 5)) text += '';
+ else if (!strncmp("Eacute", token, 6)) text += '';
+ else if (!strncmp("Egrave", token, 6)) text += '';
+ else if (!strncmp("Ecirc", token, 5)) text += '';
+ else if (!strncmp("Euml", token, 4)) text += '';
+ else if (!strncmp("eacute", token, 6)) text += '';
+ else if (!strncmp("egrave", token, 6)) text += '';
+ else if (!strncmp("ecirc", token, 5)) text += '';
+ else if (!strncmp("euml", token, 4)) text += '';
+ else if (!strncmp("Iacute", token, 6)) text += '';
+ else if (!strncmp("Igrave", token, 6)) text += '';
+ else if (!strncmp("Icirc", token, 5)) text += '';
+ else if (!strncmp("Iuml", token, 4)) text += '';
+ else if (!strncmp("iacute", token, 6)) text += '';
+ else if (!strncmp("igrave", token, 6)) text += '';
+ else if (!strncmp("icirc", token, 5)) text += '';
+ else if (!strncmp("iuml", token, 4)) text += '';
+ else if (!strncmp("Oacute", token, 6)) text += '';
+ else if (!strncmp("Ograve", token, 6)) text += '';
+ else if (!strncmp("Ocirc", token, 5)) text += '';
+ else if (!strncmp("Ouml", token, 4)) text += '';
+ else if (!strncmp("Otilde", token, 6)) text += '';
+ else if (!strncmp("oacute", token, 6)) text += '';
+ else if (!strncmp("ograve", token, 6)) text += '';
+ else if (!strncmp("ocirc", token, 5)) text += '';
+ else if (!strncmp("ouml", token, 4)) text += '';
+ else if (!strncmp("otilde", token, 6)) text += '';
+ else if (!strncmp("Uacute", token, 6)) text += '';
+ else if (!strncmp("Ugrave", token, 6)) text += '';
+ else if (!strncmp("Ucirc", token, 5)) text += '';
+ else if (!strncmp("Uuml", token, 4)) text += '';
+ else if (!strncmp("uacute", token, 6)) text += '';
+ else if (!strncmp("ugrave", token, 6)) text += '';
+ else if (!strncmp("ucirc", token, 5)) text += '';
+ else if (!strncmp("uuml", token, 4)) text += '';
+ else if (!strncmp("Yacute", token, 6)) text += '';
+ else if (!strncmp("yacute", token, 6)) text += '';
+ else if (!strncmp("yuml", token, 4)) text += '';
+
+ else if (!strncmp("deg", token, 3)) text += '';
+ else if (!strncmp("plusmn", token, 6)) text += '';
+ else if (!strncmp("sup2", token, 4)) text += '';
+ else if (!strncmp("sup3", token, 4)) text += '';
+ else if (!strncmp("sup1", token, 4)) text += '';
+ else if (!strncmp("nbsp", token, 4)) text += '';
+ else if (!strncmp("pound", token, 5)) text += '';
+ else if (!strncmp("cent", token, 4)) text += '';
+ else if (!strncmp("frac14", token, 6)) text += '';
+ else if (!strncmp("frac12", token, 6)) text += '';
+ else if (!strncmp("frac34", token, 6)) text += '';
+ else if (!strncmp("iquest", token, 6)) text += '';
+ else if (!strncmp("iexcl", token, 5)) text += '';
+ else if (!strncmp("ETH", token, 3)) text += '';
+ else if (!strncmp("eth", token, 3)) text += '';
+ else if (!strncmp("THORN", token, 5)) text += '';
+ else if (!strncmp("thorn", token, 5)) text += '';
+ else if (!strncmp("AElig", token, 5)) text += '';
+ else if (!strncmp("aelig", token, 5)) text += '';
+ else if (!strncmp("Oslash", token, 6)) text += '';
+ else if (!strncmp("curren", token, 6)) text += '';
+ else if (!strncmp("Ccedil", token, 6)) text += '';
+ else if (!strncmp("ccedil", token, 6)) text += '';
+ else if (!strncmp("szlig", token, 5)) text += '';
+ else if (!strncmp("Ntilde", token, 6)) text += '';
+ else if (!strncmp("ntilde", token, 6)) text += '';
+ else if (!strncmp("yen", token, 3)) text += '';
+ else if (!strncmp("not", token, 3)) text += '';
+ else if (!strncmp("ordf", token, 4)) text += '';
+ else if (!strncmp("uml", token, 3)) text += '';
+ else if (!strncmp("shy", token, 3)) text += '';
+ else if (!strncmp("macr", token, 4)) text += '';
+ else if (!strncmp("micro", token, 5)) text += "";
+ else if (!strncmp("middot", token, 6)) text +="";
+ else if (!strncmp("cedil", token, 5)) text += "";
+ else if (!strncmp("ordm", token, 4)) text += "";
+ else if (!strncmp("times", token, 5)) text += "";
+ else if (!strncmp("divide", token, 6)) text +="";
+ else if (!strncmp("oslash", token, 6)) text +="";
+ continue;
+
+ }
+ else if (*from == '>' && !ampersand) {
+ intoken = false;
+ // process desired tokens
+ if (!strncmp(token, "sync type=\"Strongs\" value=\"", 27)) {
+ text += "<W";
+ for (unsigned int i = 27; token[i] != '\"'; i++)
+ text += token[i];
+ text += '>';
+ continue;
+ }
+ if (!strncmp(token, "sync type=\"morph\" value=\"", 25)) {
+ text += "<WT";
+ for (unsigned int i = 25; token[i] != '\"'; i++)
+ text += token[i];
+ text += '>';
+ continue;
+ }
+ else if (!strncmp(token, "scripRef", 8)) {
+ text += "<RX>";
+ continue;
+ }
+ else if (!strncmp(token, "/scripRef", 9)) {
+ text += "<Rx>";
+ continue;
+ }
+ else if (!strncmp(token, "note", 4)) {
+ text += "<RF>";
+ continue;
+ }
+ else if (!strncmp(token, "/note", 5)) {
+ text += "<Rf>";
+ continue;
+ }
+ else if (!strncmp(token, "sup", 3)) {
+ text += "<FS>";
+ }
+ else if (!strncmp(token, "/sup", 4)) {
+ text += "<Fs>";
+ }
+ else if (!strnicmp(token, "font color=#ff0000", 18)) {
+ text += "<FR>";
+ continue;
+ }
+ else if (!strnicmp(token, "/font", 5)) {
+ text += "<Fr>";
+ continue;
+ }
+ else if (!strncmp(token, "div class=\"sechead\"", 19)) {
+ text += "<TS>";
+ sechead = true;
+ continue;
+ }
+ else if (sechead && !strncmp(token, "/div", 19)) {
+ text += "<Ts>";
+ sechead = false;
+ continue;
+ }
+ else if (!strncmp(token, "div class=\"title\"", 19)) {
+ text += "<TT>";
+ title = true;
+ continue;
+ }
+ else if (title && !strncmp(token, "/div", 19)) {
+ text += "<Tt>";
+ title = false;
+ continue;
+ }
+ else if (!strnicmp(token, "br", 2)) {
+ text += "<CL>";
+ continue;
+ }
+ else switch(*token) {
+ case 'I': // font tags
+ case 'i':
+ text += "<FI>";
+ continue;
+ case 'B': // bold start
+ case 'b':
+ text += "<FB>";
+ continue;
+ case '/':
+ switch(token[1]) {
+ case 'P':
+ case 'p':
+ text += "<CM>";
+ continue;
+ case 'I':
+ case 'i': // italic end
+ text += "<Fi>";
+ continue;
+ case 'B': // bold start
+ case 'b':
+ text += "<Fb>";
+ continue;
+ }
+ }
+ continue;
+ }
+ if (intoken) {
+ if (tokpos < 2045)
+ token[tokpos++] = *from;
+ token[tokpos+2] = 0;
+ }
+ else text += *from;
+ }
+
+ orig = text;
+ from = orig.c_str();
+ for (text = ""; *from; from++) { //loop to remove extra spaces
+ if ((strchr(" \t\n\r", *from))) {
+ while (*(from+1) && (strchr(" \t\n\r", *(from+1)))) {
+ from++;
+ }
+ text += " ";
+ }
+ else {
+ text += *from;
+ }
+ }
+ text += (char)0;
+
+ return 0;
+}
+
+SWORD_NAMESPACE_END
diff --git a/src/modules/filters/thmlheadings.cpp b/src/modules/filters/thmlheadings.cpp
new file mode 100644
index 0000000..4d6134f
--- /dev/null
+++ b/src/modules/filters/thmlheadings.cpp
@@ -0,0 +1,153 @@
+/******************************************************************************
+ *
+ * thmlheadings - SWFilter descendant to hide or show headings
+ * in a ThML module.
+ */
+
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <thmlheadings.h>
+#include <utilxml.h>
+#include <utilstr.h>
+#include <swmodule.h>
+#include <stdio.h>
+
+SWORD_NAMESPACE_START
+
+const char oName[] = "Headings";
+const char oTip[] = "Toggles Headings On and Off if they exist";
+
+const SWBuf choices[3] = {"Off", "On", ""};
+const StringList oValues(&choices[0], &choices[2]);
+
+ThMLHeadings::ThMLHeadings() : SWOptionFilter(oName, oTip, &oValues) {
+ setOptionValue("Off");
+}
+
+
+ThMLHeadings::~ThMLHeadings() {
+}
+
+
+char ThMLHeadings::processText(SWBuf &text, const SWKey *key, const SWModule *module) {
+ SWBuf token;
+ bool intoken = false;
+ bool isheader = false;
+ bool hide = false;
+ bool preverse = false;
+ bool withinDiv = false;
+ SWBuf header;
+ int headerNum = 0;
+ int pvHeaderNum = 0;
+ char buf[254];
+ XMLTag startTag;
+
+ SWBuf orig = text;
+ const char *from = orig.c_str();
+
+ XMLTag tag;
+
+ for (text = ""; *from; ++from) {
+ if (*from == '<') {
+ intoken = true;
+ token = "";
+
+ continue;
+ }
+ if (*from == '>') { // process tokens
+ intoken = false;
+
+ if (!strnicmp(token.c_str(), "div", 3) || !strnicmp(token.c_str(), "/div", 4)) {
+ withinDiv = (!strnicmp(token.c_str(), "div", 3));
+ tag = token;
+ if (hide && tag.isEndTag()) {
+ if (module->isProcessEntryAttributes() && (option || (!preverse))) {
+ if (preverse) {
+ sprintf(buf, "%i", pvHeaderNum++);
+ module->getEntryAttributes()["Heading"]["Preverse"][buf] = header;
+ }
+ else {
+ sprintf(buf, "%i", headerNum++);
+ module->getEntryAttributes()["Heading"]["Interverse"][buf] = header;
+ if (option) { // we want the tag in the text
+ text.append(header);
+ }
+ }
+
+ StringList attributes = startTag.getAttributeNames();
+ for (StringList::const_iterator it = attributes.begin(); it != attributes.end(); it++) {
+ module->getEntryAttributes()["Heading"][buf][it->c_str()] = startTag.getAttribute(it->c_str());
+ }
+ }
+
+ hide = false;
+ if (!option || preverse) { // we don't want the tag in the text anymore
+ preverse = false;
+ continue;
+ }
+ preverse = false;
+ }
+ if (tag.getAttribute("class") && ((!stricmp(tag.getAttribute("class"), "sechead"))
+ || (!stricmp(tag.getAttribute("class"), "title")))) {
+
+ isheader = true;
+
+ if (!tag.isEndTag()) { //start tag
+ if (!tag.isEmpty()) {
+ startTag = tag;
+
+/* how do we tell a ThML preverse title from one that should be in the text? probably if any text is before the title... just assuming all are preverse for now
+ }
+ if (tag.getAttribute("subtype") && !stricmp(tag.getAttribute("subtype"), "x-preverse")) {
+*/
+ hide = true;
+ preverse = true;
+ header = "";
+ continue;
+ } // move back up under startTag = tag
+ }
+/* this is where non-preverse will go eventually
+ if (!tag.isEndTag()) { //start tag
+ hide = true;
+ header = "";
+ if (option) { // we want the tag in the text
+ text.append('<');
+ text.append(token);
+ text.append('>');
+ }
+ continue;
+ }
+*/
+ }
+ else
+ isheader = false;
+ }
+
+ if (withinDiv && isheader) {
+ header.append('<');
+ header.append(token);
+ header.append('>');
+ } else {
+ // if not a heading token, keep token in text
+ if (!hide) {
+ text.append('<');
+ text.append(token);
+ text.append('>');
+ }
+ }
+ continue;
+ }
+ if (intoken) { //copy token
+ token.append(*from);
+ }
+ else if (!hide) { //copy text which is not inside a token
+ text.append(*from);
+ }
+ else header.append(*from);
+ }
+ return 0;
+}
+
+
+SWORD_NAMESPACE_END
diff --git a/src/modules/filters/thmlhtml.cpp b/src/modules/filters/thmlhtml.cpp
new file mode 100644
index 0000000..efb09cd
--- /dev/null
+++ b/src/modules/filters/thmlhtml.cpp
@@ -0,0 +1,236 @@
+/***************************************************************************
+ thmlhtml.cpp - ThML to HTML filter
+ -------------------
+ begin : 1999-10-27
+ copyright : 2001 by CrossWire Bible Society
+ ***************************************************************************/
+
+/***************************************************************************
+ * *
+ * This program is free software; you can redistribute it and/or modify *
+ * it under the terms of the GNU General Public License as published by *
+ * the Free Software Foundation; either version 2 of the License, or *
+ * (at your option) any later version. *
+ * *
+ ***************************************************************************/
+
+#include <stdlib.h>
+#include <thmlhtml.h>
+#include <swmodule.h>
+#include <utilxml.h>
+
+SWORD_NAMESPACE_START
+
+ThMLHTML::ThMLHTML() {
+ setTokenStart("<");
+ setTokenEnd(">");
+
+ setEscapeStart("&");
+ setEscapeEnd(";");
+
+ setEscapeStringCaseSensitive(true);
+ setPassThruNumericEscapeString(true);
+
+ addAllowedEscapeString("quot");
+ addAllowedEscapeString("amp");
+ addAllowedEscapeString("lt");
+ addAllowedEscapeString("gt");
+
+ addAllowedEscapeString("nbsp");
+ addAllowedEscapeString("brvbar"); // ""
+ addAllowedEscapeString("sect"); // ""
+ addAllowedEscapeString("copy"); // ""
+ addAllowedEscapeString("laquo"); // ""
+ addAllowedEscapeString("reg"); // ""
+ addAllowedEscapeString("acute"); // ""
+ addAllowedEscapeString("para"); // ""
+ addAllowedEscapeString("raquo"); // ""
+
+ addAllowedEscapeString("Aacute"); // ""
+ addAllowedEscapeString("Agrave"); // ""
+ addAllowedEscapeString("Acirc"); // ""
+ addAllowedEscapeString("Auml"); // ""
+ addAllowedEscapeString("Atilde"); // ""
+ addAllowedEscapeString("Aring"); // ""
+ addAllowedEscapeString("aacute"); // ""
+ addAllowedEscapeString("agrave"); // ""
+ addAllowedEscapeString("acirc"); // ""
+ addAllowedEscapeString("auml"); // ""
+ addAllowedEscapeString("atilde"); // ""
+ addAllowedEscapeString("aring"); // ""
+ addAllowedEscapeString("Eacute"); // ""
+ addAllowedEscapeString("Egrave"); // ""
+ addAllowedEscapeString("Ecirc"); // ""
+ addAllowedEscapeString("Euml"); // ""
+ addAllowedEscapeString("eacute"); // ""
+ addAllowedEscapeString("egrave"); // ""
+ addAllowedEscapeString("ecirc"); // ""
+ addAllowedEscapeString("euml"); // ""
+ addAllowedEscapeString("Iacute"); // ""
+ addAllowedEscapeString("Igrave"); // ""
+ addAllowedEscapeString("Icirc"); // ""
+ addAllowedEscapeString("Iuml"); // ""
+ addAllowedEscapeString("iacute"); // ""
+ addAllowedEscapeString("igrave"); // ""
+ addAllowedEscapeString("icirc"); // ""
+ addAllowedEscapeString("iuml"); // ""
+ addAllowedEscapeString("Oacute"); // ""
+ addAllowedEscapeString("Ograve"); // ""
+ addAllowedEscapeString("Ocirc"); // ""
+ addAllowedEscapeString("Ouml"); // ""
+ addAllowedEscapeString("Otilde"); // ""
+ addAllowedEscapeString("oacute"); // ""
+ addAllowedEscapeString("ograve"); // ""
+ addAllowedEscapeString("ocirc"); // ""
+ addAllowedEscapeString("ouml"); // ""
+ addAllowedEscapeString("otilde"); // ""
+ addAllowedEscapeString("Uacute"); // ""
+ addAllowedEscapeString("Ugrave"); // ""
+ addAllowedEscapeString("Ucirc"); // ""
+ addAllowedEscapeString("Uuml"); // ""
+ addAllowedEscapeString("uacute"); // ""
+ addAllowedEscapeString("ugrave"); // ""
+ addAllowedEscapeString("ucirc"); // ""
+ addAllowedEscapeString("uuml"); // ""
+ addAllowedEscapeString("Yacute"); // ""
+ addAllowedEscapeString("yacute"); // ""
+ addAllowedEscapeString("yuml"); // ""
+
+ addAllowedEscapeString("deg"); // ""
+ addAllowedEscapeString("plusmn"); // ""
+ addAllowedEscapeString("sup2"); // ""
+ addAllowedEscapeString("sup3"); // ""
+ addAllowedEscapeString("sup1"); // ""
+ addAllowedEscapeString("nbsp"); // ""
+ addAllowedEscapeString("pound"); // ""
+ addAllowedEscapeString("cent"); // ""
+ addAllowedEscapeString("frac14"); // ""
+ addAllowedEscapeString("frac12"); // ""
+ addAllowedEscapeString("frac34"); // ""
+ addAllowedEscapeString("iquest"); // ""
+ addAllowedEscapeString("iexcl"); // ""
+ addAllowedEscapeString("ETH"); // ""
+ addAllowedEscapeString("eth"); // ""
+ addAllowedEscapeString("THORN"); // ""
+ addAllowedEscapeString("thorn"); // ""
+ addAllowedEscapeString("AElig"); // ""
+ addAllowedEscapeString("aelig"); // ""
+ addAllowedEscapeString("Oslash"); // ""
+ addAllowedEscapeString("curren"); // ""
+ addAllowedEscapeString("Ccedil"); // ""
+ addAllowedEscapeString("ccedil"); // ""
+ addAllowedEscapeString("szlig"); // ""
+ addAllowedEscapeString("Ntilde"); // ""
+ addAllowedEscapeString("ntilde"); // ""
+ addAllowedEscapeString("yen"); // ""
+ addAllowedEscapeString("not"); // ""
+ addAllowedEscapeString("ordf"); // ""
+ addAllowedEscapeString("uml"); // ""
+ addAllowedEscapeString("shy"); // ""
+ addAllowedEscapeString("macr"); // ""
+
+ addAllowedEscapeString("micro"); // ""
+ addAllowedEscapeString("middot"); // ""
+ addAllowedEscapeString("cedil"); // ""
+ addAllowedEscapeString("ordm"); // ""
+ addAllowedEscapeString("times"); // ""
+ addAllowedEscapeString("divide"); // ""
+ addAllowedEscapeString("oslash"); // ""
+
+ setTokenCaseSensitive(true);
+
+ addTokenSubstitute("note", " <font color=\"#800000\"><small>(");
+ addTokenSubstitute("/note", ")</small></font> ");
+}
+
+
+bool ThMLHTML::handleToken(SWBuf &buf, const char *token, BasicFilterUserData *userData) {
+ if (!substituteToken(buf, token)) { // manually process if it wasn't a simple substitution
+ MyUserData *u = (MyUserData *)userData;
+ XMLTag tag(token);
+ if (!strcmp(tag.getName(), "sync")) {
+ if (tag.getAttribute("type") && tag.getAttribute("value") && !strcmp(tag.getAttribute("type"), "Strongs")) {
+ const char* value = tag.getAttribute("value");
+ if (*value == 'H' || *value == 'G' || *value == 'A') {
+ value++;
+ buf += "<small><em>";
+ buf += value;
+ buf += "</em></small>";
+ }
+ else if (*value == 'T') {
+ value += 2;
+
+ buf += "<small><i>";
+ buf += value;
+ buf += "</i></small>";
+ }
+ }
+ else if (tag.getAttribute("type") && tag.getAttribute("value") && !strcmp(tag.getAttribute("type"), "morph")) {
+ buf += "<small><em>";
+ buf += tag.getAttribute("value");
+ buf += "</em></small>";
+ }
+ else if (tag.getAttribute("type") && tag.getAttribute("value") && !strcmp(tag.getAttribute("type"), "lemma")) {
+ buf += "<small><em>(";
+ buf += tag.getAttribute("value");
+ buf += ")</em></small>";
+ }
+ }
+ else if (!strcmp(tag.getName(), "div")) {
+ if (tag.isEndTag() && (u->SecHead)) {
+ buf += "</i></b><br />";
+ u->SecHead = false;
+ }
+ else if (tag.getAttribute("class")) {
+ if (!strcmp(tag.getAttribute("class"), "sechead")) {
+ u->SecHead = true;
+ buf += "<br /><b><i>";
+ }
+ else if (!strcmp(tag.getAttribute("class"), "title")) {
+ u->SecHead = true;
+ buf += "<br /><b><i>";
+ }
+ }
+ }
+ else if (!strcmp(tag.getName(), "img")) {
+ const char *src = strstr(token, "src");
+ if (!src) // assert we have a src attribute
+ return false;
+
+ buf += '<';
+ for (const char *c = token; *c; c++) {
+ if (c == src) {
+ for (;((*c) && (*c != '"')); c++)
+ buf += *c;
+
+ if (!*c) { c--; continue; }
+
+ buf += '"';
+ if (*(c+1) == '/') {
+ buf += "file:";
+ buf += userData->module->getConfigEntry("AbsoluteDataPath");
+ if (buf[buf.length()-2] == '/')
+ c++; // skip '/'
+ }
+ continue;
+ }
+ buf += *c;
+ }
+ buf += '>';
+ }
+ else if (!strcmp(tag.getName(), "scripRef")) { //do nothing with scrip refs, we leave them out
+
+ }
+ else {
+ buf += '<';
+ buf += token;
+ buf += '>';
+
+// return false; // we still didn't handle token
+ }
+ }
+ return true;
+}
+
+
+SWORD_NAMESPACE_END
diff --git a/src/modules/filters/thmlhtmlhref.cpp b/src/modules/filters/thmlhtmlhref.cpp
new file mode 100644
index 0000000..0596f75
--- /dev/null
+++ b/src/modules/filters/thmlhtmlhref.cpp
@@ -0,0 +1,357 @@
+/***************************************************************************
+ thmlhtmlhref.cpp - ThML to HTML filter with hrefs
+ -------------------
+ begin : 2001-09-03
+ copyright : 2001 by CrossWire Bible Society
+ ***************************************************************************/
+
+/***************************************************************************
+ * *
+ * This program is free software; you can redistribute it and/or modify *
+ * it under the terms of the GNU General Public License as published by *
+ * the Free Software Foundation; either version 2 of the License, or *
+ * (at your option) any later version. *
+ * *
+ ***************************************************************************/
+#include <stdlib.h>
+#include <thmlhtmlhref.h>
+#include <swmodule.h>
+#include <utilxml.h>
+#include <utilstr.h>
+#include <versekey.h>
+#include <url.h>
+
+SWORD_NAMESPACE_START
+
+
+ThMLHTMLHREF::MyUserData::MyUserData(const SWModule *module, const SWKey *key) : BasicFilterUserData(module, key) {
+ if (module) {
+ version = module->Name();
+ BiblicalText = (!strcmp(module->Type(), "Biblical Texts"));
+ SecHead = false;
+ }
+}
+
+
+ThMLHTMLHREF::ThMLHTMLHREF() {
+ setTokenStart("<");
+ setTokenEnd(">");
+
+ setEscapeStart("&");
+ setEscapeEnd(";");
+
+ setEscapeStringCaseSensitive(true);
+ setPassThruNumericEscapeString(true);
+
+ addAllowedEscapeString("quot");
+ addAllowedEscapeString("amp");
+ addAllowedEscapeString("lt");
+ addAllowedEscapeString("gt");
+
+ addAllowedEscapeString("nbsp");
+ addAllowedEscapeString("brvbar"); // "Š"
+ addAllowedEscapeString("sect"); // "§"
+ addAllowedEscapeString("copy"); // "©"
+ addAllowedEscapeString("laquo"); // "«"
+ addAllowedEscapeString("reg"); // "®"
+ addAllowedEscapeString("acute"); // "Ž"
+ addAllowedEscapeString("para"); // "¶"
+ addAllowedEscapeString("raquo"); // "»"
+
+ addAllowedEscapeString("Aacute"); // "Á"
+ addAllowedEscapeString("Agrave"); // "À"
+ addAllowedEscapeString("Acirc"); // "Â"
+ addAllowedEscapeString("Auml"); // "Ä"
+ addAllowedEscapeString("Atilde"); // "Ã"
+ addAllowedEscapeString("Aring"); // "Å"
+ addAllowedEscapeString("aacute"); // "á"
+ addAllowedEscapeString("agrave"); // "à"
+ addAllowedEscapeString("acirc"); // "â"
+ addAllowedEscapeString("auml"); // "ä"
+ addAllowedEscapeString("atilde"); // "ã"
+ addAllowedEscapeString("aring"); // "å"
+ addAllowedEscapeString("Eacute"); // "É"
+ addAllowedEscapeString("Egrave"); // "È"
+ addAllowedEscapeString("Ecirc"); // "Ê"
+ addAllowedEscapeString("Euml"); // "Ë"
+ addAllowedEscapeString("eacute"); // "é"
+ addAllowedEscapeString("egrave"); // "è"
+ addAllowedEscapeString("ecirc"); // "ê"
+ addAllowedEscapeString("euml"); // "ë"
+ addAllowedEscapeString("Iacute"); // "Í"
+ addAllowedEscapeString("Igrave"); // "Ì"
+ addAllowedEscapeString("Icirc"); // "Î"
+ addAllowedEscapeString("Iuml"); // "Ï"
+ addAllowedEscapeString("iacute"); // "í"
+ addAllowedEscapeString("igrave"); // "ì"
+ addAllowedEscapeString("icirc"); // "î"
+ addAllowedEscapeString("iuml"); // "ï"
+ addAllowedEscapeString("Oacute"); // "Ó"
+ addAllowedEscapeString("Ograve"); // "Ò"
+ addAllowedEscapeString("Ocirc"); // "Ô"
+ addAllowedEscapeString("Ouml"); // "Ö"
+ addAllowedEscapeString("Otilde"); // "Õ"
+ addAllowedEscapeString("oacute"); // "ó"
+ addAllowedEscapeString("ograve"); // "ò"
+ addAllowedEscapeString("ocirc"); // "ô"
+ addAllowedEscapeString("ouml"); // "ö"
+ addAllowedEscapeString("otilde"); // "õ"
+ addAllowedEscapeString("Uacute"); // "Ú"
+ addAllowedEscapeString("Ugrave"); // "Ù"
+ addAllowedEscapeString("Ucirc"); // "Û"
+ addAllowedEscapeString("Uuml"); // "Ü"
+ addAllowedEscapeString("uacute"); // "ú"
+ addAllowedEscapeString("ugrave"); // "ù"
+ addAllowedEscapeString("ucirc"); // "û"
+ addAllowedEscapeString("uuml"); // "ü"
+ addAllowedEscapeString("Yacute"); // "Ý"
+ addAllowedEscapeString("yacute"); // "ý"
+ addAllowedEscapeString("yuml"); // "ÿ"
+
+ addAllowedEscapeString("deg"); // "°"
+ addAllowedEscapeString("plusmn"); // "±"
+ addAllowedEscapeString("sup2"); // "²"
+ addAllowedEscapeString("sup3"); // "³"
+ addAllowedEscapeString("sup1"); // "¹"
+ addAllowedEscapeString("nbsp"); // "º"
+ addAllowedEscapeString("pound"); // "£"
+ addAllowedEscapeString("cent"); // "¢"
+ addAllowedEscapeString("frac14"); // "Œ"
+ addAllowedEscapeString("frac12"); // "œ"
+ addAllowedEscapeString("frac34"); // "Ÿ"
+ addAllowedEscapeString("iquest"); // "¿"
+ addAllowedEscapeString("iexcl"); // "¡"
+ addAllowedEscapeString("ETH"); // "Ð"
+ addAllowedEscapeString("eth"); // "ð"
+ addAllowedEscapeString("THORN"); // "Þ"
+ addAllowedEscapeString("thorn"); // "þ"
+ addAllowedEscapeString("AElig"); // "Æ"
+ addAllowedEscapeString("aelig"); // "æ"
+ addAllowedEscapeString("Oslash"); // "Ø"
+ addAllowedEscapeString("curren"); // "€"
+ addAllowedEscapeString("Ccedil"); // "Ç"
+ addAllowedEscapeString("ccedil"); // "ç"
+ addAllowedEscapeString("szlig"); // "ß"
+ addAllowedEscapeString("Ntilde"); // "Ñ"
+ addAllowedEscapeString("ntilde"); // "ñ"
+ addAllowedEscapeString("yen"); // "¥"
+ addAllowedEscapeString("not"); // "¬"
+ addAllowedEscapeString("ordf"); // "ª"
+ addAllowedEscapeString("uml"); // "š"
+ addAllowedEscapeString("shy"); // "­"
+ addAllowedEscapeString("macr"); // "¯"
+
+ addAllowedEscapeString("micro"); // "µ"
+ addAllowedEscapeString("middot"); // "·"
+ addAllowedEscapeString("cedil"); // "ž"
+ addAllowedEscapeString("ordm"); // "º"
+ addAllowedEscapeString("times"); // "×"
+ addAllowedEscapeString("divide"); // "÷"
+ addAllowedEscapeString("oslash"); // "ø"
+
+ setTokenCaseSensitive(true);
+// addTokenSubstitute("scripture", "<i> ");
+ addTokenSubstitute("/scripture", "</i> ");
+}
+
+
+bool ThMLHTMLHREF::handleToken(SWBuf &buf, const char *token, BasicFilterUserData *userData) {
+ if (!substituteToken(buf, token)) { // manually process if it wasn't a simple substitution
+ MyUserData *u = (MyUserData *)userData;
+
+ XMLTag tag(token);
+ if ((!tag.isEndTag()) && (!tag.isEmpty()))
+ u->startTag = tag;
+
+ if (tag.getName() && !strcmp(tag.getName(), "sync")) {
+ SWBuf value = tag.getAttribute("value");
+ if (tag.getAttribute("type") && !strcmp(tag.getAttribute("type"), "morph")) { //&gt;
+ if(value.length())
+ buf.appendFormatted("<small><em>(<a href=\"passagestudy.jsp?action=showMorph&type=Greek&value=%s\">%s</a>)</em></small>",
+ URL::encode(value.c_str()).c_str(),
+ value.c_str());
+ }
+ else if (tag.getAttribute("type") && !strcmp(tag.getAttribute("type"), "lemma")) { //&gt;
+ if(value.length())
+ // empty "type=" is deliberate.
+ buf.appendFormatted("<small><em>&lt;<a href=\"passagestudy.jsp?action=showStrongs&type=&value=%s\">%s</a>&gt;</em></small>",
+ URL::encode(value.c_str()).c_str(),
+ value.c_str());
+ }
+ else if (tag.getAttribute("type") && !strcmp(tag.getAttribute("type"), "Strongs")) {
+ char ch = *value;
+ value<<1;
+ buf.appendFormatted("<small><em>&lt;<a href=\"passagestudy.jsp?action=showStrongs&type=%s&value=%s\">",
+ ((ch == 'H') ? "Hebrew" : "Greek"),
+ URL::encode(value.c_str()).c_str());
+ buf += (value.length()) ? value.c_str() : "";
+ buf += "</a>&gt;</em></small>";
+ }
+ else if (tag.getAttribute("type") && !strcmp(tag.getAttribute("type"), "Dict")) {
+ buf += (tag.isEndTag() ? "</b>" : "<b>");
+ }
+
+ }
+ // <note> tag
+ else if (!strcmp(tag.getName(), "note")) {
+ if (!tag.isEndTag()) {
+ if (!tag.isEmpty()) {
+ SWBuf type = tag.getAttribute("type");
+ SWBuf footnoteNumber = tag.getAttribute("swordFootnote");
+ VerseKey *vkey = NULL;
+ // see if we have a VerseKey * or descendant
+ SWTRY {
+ vkey = SWDYNAMIC_CAST(VerseKey, u->key);
+ }
+ SWCATCH ( ... ) { }
+ if (vkey) {
+ // leave this special osis type in for crossReference notes types? Might thml use this some day? Doesn't hurt.
+ char ch = ((tag.getAttribute("type") && ((!strcmp(tag.getAttribute("type"), "crossReference")) || (!strcmp(tag.getAttribute("type"), "x-cross-ref")))) ? 'x':'n');
+ buf.appendFormatted("<a href=\"passagestudy.jsp?action=showNote&type=%c&value=%s&module=%s&passage=%s\"><small><sup>*%c</sup></small></a>",
+ ch,
+ URL::encode(footnoteNumber.c_str()).c_str(),
+ URL::encode(u->version.c_str()).c_str(),
+ URL::encode(vkey->getText()).c_str(),
+ ch);
+ }
+ else {
+ char ch = ((tag.getAttribute("type") && ((!strcmp(tag.getAttribute("type"), "crossReference")) || (!strcmp(tag.getAttribute("type"), "x-cross-ref")))) ? 'x':'n');
+ buf.appendFormatted("<a href=\"passagestudy.jsp?action=showNote&type=%c&value=%s&module=%s&passage=%s\"><small><sup>*%c</sup></small></a>",
+ ch,
+ URL::encode(footnoteNumber.c_str()).c_str(),
+ URL::encode(u->version.c_str()).c_str(),
+ URL::encode(u->key->getText()).c_str(),
+ ch);
+ }
+ u->suspendTextPassThru = true;
+ }
+ }
+ if (tag.isEndTag()) {
+ u->suspendTextPassThru = false;
+ }
+ }
+ else if (!strcmp(tag.getName(), "scripture")) {
+ buf += (tag.isEndTag() ? "</i>" : "<i>");
+ }
+ // <scripRef> tag
+ else if (!strcmp(tag.getName(), "scripRef")) {
+ if (!tag.isEndTag()) {
+ if (!tag.isEmpty()) {
+ u->suspendTextPassThru = true;
+ }
+ }
+ if (tag.isEndTag()) { // </scripRef>
+ if (!u->BiblicalText) {
+ SWBuf refList = u->startTag.getAttribute("passage");
+ if (!refList.length())
+ refList = u->lastTextNode;
+ SWBuf version = tag.getAttribute("version");
+
+ buf.appendFormatted("<a href=\"passagestudy.jsp?action=showRef&type=scripRef&value=%s&module=%s\">",
+ (refList.length()) ? URL::encode(refList.c_str()).c_str() : "",
+ (version.length()) ? URL::encode(version.c_str()).c_str() : "");
+ buf += u->lastTextNode.c_str();
+ buf += "</a>";
+ }
+ else {
+ SWBuf footnoteNumber = u->startTag.getAttribute("swordFootnote");
+ VerseKey *vkey = NULL;
+ // see if we have a VerseKey * or descendant
+ SWTRY {
+ vkey = SWDYNAMIC_CAST(VerseKey, u->key);
+ }
+ SWCATCH ( ... ) {}
+ if (vkey) {
+ // leave this special osis type in for crossReference notes types? Might thml use this some day? Doesn't hurt.
+ //buf.appendFormatted("<a href=\"noteID=%s.x.%s\"><small><sup>*x</sup></small></a> ", vkey->getText(), footnoteNumber.c_str());
+ buf.appendFormatted("<a href=\"passagestudy.jsp?action=showNote&type=x&value=%s&module=%s&passage=%s\"><small><sup>*x</sup></small></a>",
+ URL::encode(footnoteNumber.c_str()).c_str(),
+ URL::encode(u->version.c_str()).c_str(),
+ URL::encode(vkey->getText()).c_str());
+
+ }
+ }
+
+ // let's let text resume to output again
+ u->suspendTextPassThru = false;
+ }
+ }
+ else if (tag.getName() && !strcmp(tag.getName(), "div")) {
+ if (tag.isEndTag() && u->SecHead) {
+ buf += "</i></b><br />";
+ u->SecHead = false;
+ }
+ else if (tag.getAttribute("class")) {
+ if (!stricmp(tag.getAttribute("class"), "sechead")) {
+ u->SecHead = true;
+ buf += "<br /><b><i>";
+ }
+ else if (!stricmp(tag.getAttribute("class"), "title")) {
+ u->SecHead = true;
+ buf += "<br /><b><i>";
+ }
+ else {
+ buf += tag;
+ }
+ }
+ else {
+ buf += tag;
+ }
+ }
+ else if (tag.getName() && (!strcmp(tag.getName(), "img") || !strcmp(tag.getName(), "image"))) {
+ const char *src = strstr(token, "src");
+ if (!src) // assert we have a src attribute
+ return false;
+
+ const char *c, *d;
+ if (((c = strchr(src+3, '"')) == NULL) ||
+ ((d = strchr( ++c , '"')) == NULL)) // identify endpoints.
+ return false; // abandon hope.
+
+ SWBuf imagename = "file:";
+ if (*c == '/') // as below, inside for loop.
+ imagename += userData->module->getConfigEntry("AbsoluteDataPath");
+ while (c != d) // move bits into the name.
+ imagename += *(c++);
+
+ // images become clickable, if the UI supports showImage.
+ buf.appendFormatted("<a href=\"passagestudy.jsp?action=showImage&value=%s&module=%s\"><",
+ URL::encode(imagename.c_str()).c_str(),
+ URL::encode(u->version.c_str()).c_str());
+
+ for (c = token; *c; c++) {
+ if ((*c == '/') && (*(c+1) == '\0'))
+ continue;
+ if (c == src) {
+ for (;((*c) && (*c != '"')); c++)
+ buf += *c;
+
+ if (!*c) { c--; continue; }
+
+ buf += '"';
+ if (*(c+1) == '/') {
+ buf += "file:";
+ buf += userData->module->getConfigEntry("AbsoluteDataPath");
+ if (buf[buf.length()-2] == '/')
+ c++; // skip '/'
+ }
+ continue;
+ }
+ buf += *c;
+ }
+ buf += " border=0 /></a>";
+ }
+ else {
+ buf += '<';
+ /*for (const char *tok = token; *tok; tok++)
+ buf += *tok;*/
+ buf += token;
+ buf += '>';
+ //return false; // we still didn't handle token
+ }
+ }
+ return true;
+}
+
+
+SWORD_NAMESPACE_END
diff --git a/src/modules/filters/thmllemma.cpp b/src/modules/filters/thmllemma.cpp
new file mode 100644
index 0000000..3e5761d
--- /dev/null
+++ b/src/modules/filters/thmllemma.cpp
@@ -0,0 +1,65 @@
+/******************************************************************************
+ *
+ * thmllemma - SWFilter descendant to hide or show lemmas
+ * in a ThML module.
+ */
+
+
+#include <stdlib.h>
+#include <thmllemma.h>
+
+SWORD_NAMESPACE_START
+
+const char oName[] = "Lemmas";
+const char oTip[] = "Toggles Lemmas On and Off if they exist";
+
+const SWBuf choices[3] = {"Off", "On", ""};
+const StringList oValues(&choices[0], &choices[2]);
+
+ThMLLemma::ThMLLemma() : SWOptionFilter(oName, oTip, &oValues) {
+ setOptionValue("Off");
+}
+
+
+ThMLLemma::~ThMLLemma() {
+}
+
+
+char ThMLLemma::processText(SWBuf &text, const SWKey *key, const SWModule *module) {
+ if (!option) { // if we don't want lemmas
+ bool intoken = false;
+
+ SWBuf token;
+ SWBuf orig = text;
+ const char *from = orig.c_str();
+ for (text = ""; *from; from++) {
+ if (*from == '<') {
+ intoken = true;
+ token = "";
+ continue;
+ }
+ if (*from == '>') { // process tokens
+ intoken = false;
+ if (!strncmp(token.c_str(), "sync ", 5) && strstr(token.c_str(), "type=\"lemma\"")) { // Lemma
+ continue;
+ }
+
+ // if not a lemma token, keep token in text
+ text += '<';
+ text += token;
+ text += '>';
+ continue;
+ }
+
+ if (intoken) {
+ token += *from;
+ }
+ else {
+ text += *from;
+ }
+ }
+ }
+ return 0;
+}
+
+SWORD_NAMESPACE_END
diff --git a/src/modules/filters/thmlmorph.cpp b/src/modules/filters/thmlmorph.cpp
new file mode 100644
index 0000000..0fbef56
--- /dev/null
+++ b/src/modules/filters/thmlmorph.cpp
@@ -0,0 +1,65 @@
+/******************************************************************************
+ *
+ * thmlmorph - SWFilter descendant to hide or show morph tags
+ * in a ThML module.
+ */
+
+
+#include <stdlib.h>
+#include <thmlmorph.h>
+
+SWORD_NAMESPACE_START
+
+const char oName[] = "Morphological Tags";
+const char oTip[] = "Toggles Morphological Tags On and Off if they exist";
+
+const SWBuf choices[3] = {"Off", "On", ""};
+const StringList oValues(&choices[0], &choices[2]);
+
+ThMLMorph::ThMLMorph() : SWOptionFilter(oName, oTip, &oValues) {
+ setOptionValue("Off");
+}
+
+
+ThMLMorph::~ThMLMorph() {
+}
+
+
+char ThMLMorph::processText(SWBuf &text, const SWKey *key, const SWModule *module) {
+ if (!option) { // if we don't want morph tags
+ bool intoken = false;
+
+ SWBuf token;
+ SWBuf orig = text;
+ const char *from = orig.c_str();
+ for (text = ""; *from; from++) {
+ if (*from == '<') {
+ intoken = true;
+ token = "";
+ continue;
+ }
+ if (*from == '>') { // process tokens
+ intoken = false;
+ if (!strncmp(token.c_str(), "sync ", 5) && strstr(token.c_str(), "type=\"morph\"")) { // Morph
+ continue;
+ }
+
+ // if not a morph tag token, keep token in text
+ text += '<';
+ text += token;
+ text += '>';
+ continue;
+ }
+
+ if (intoken) {
+ token += *from;
+ }
+ else {
+ text += *from;
+ }
+ }
+ }
+ return 0;
+}
+
+SWORD_NAMESPACE_END
diff --git a/src/modules/filters/thmlosis.cpp b/src/modules/filters/thmlosis.cpp
new file mode 100644
index 0000000..939be82
--- /dev/null
+++ b/src/modules/filters/thmlosis.cpp
@@ -0,0 +1,575 @@
+/******************************************************************************
+ *
+ * thmlstrongs - SWFilter descendant to hide or show strongs number
+ * in a ThML module.
+ */
+
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <stdarg.h>
+#include <ctype.h>
+#include <thmlosis.h>
+#include <swmodule.h>
+#include <swlog.h>
+#include <versekey.h>
+#include <utilstr.h>
+#include <utilxml.h>
+
+
+SWORD_NAMESPACE_START
+
+ThMLOSIS::ThMLOSIS() {
+}
+
+
+ThMLOSIS::~ThMLOSIS() {
+}
+
+
+char ThMLOSIS::processText(SWBuf &text, const SWKey *key, const SWModule *module) {
+ char token[2048]; // cheese. Fix.
+ int tokpos = 0;
+ bool intoken = false;
+ bool keepToken = false;
+ bool ampersand = false;
+
+// static QuoteStack quoteStack;
+
+ bool lastspace = false;
+ char val[128];
+ SWBuf buf;
+ char *valto;
+ char *ch;
+
+ const char *wordStart = text.c_str();
+ const char *wordEnd = NULL;
+
+ const char *textStart = NULL;
+ const char *textEnd = NULL;
+
+ bool suspendTextPassThru = false;
+ bool handled = false;
+ bool newText = false;
+ bool newWord = false;
+
+// SWBuf tmp;
+ SWBuf divEnd = "";
+
+ SWBuf orig = text;
+ const char* from = orig.c_str();
+
+ text = "";
+ for (from = orig.c_str(); *from; ++from) {
+
+ // handle silly <variant word> items in greek whnu, remove when module is fixed
+ if ((*from == '<') && (*(from+1) < 0)) {
+ text += "&lt;";
+ continue;
+ }
+
+ if (*from == '<') { //start of new token detected
+ intoken = true;
+ tokpos = 0;
+ token[0] = 0;
+ token[1] = 0;
+ token[2] = 0;
+ ampersand = false;
+ textEnd = from-1;
+ wordEnd = text.c_str() + text.length();//not good, instead of wordEnd = to!
+
+// wordEnd = to;
+ continue;
+ }
+
+ if (*from == '&') {
+ intoken = true;
+ tokpos = 0;
+ token[0] = 0;
+ token[1] = 0;
+ token[2] = 0;
+ ampersand = true;
+ continue;
+ }
+
+ if (*from == ';' && ampersand) {
+ intoken = false;
+ ampersand = false;
+
+ if (*token == '#') {
+ text += '&';
+ text += token;
+ text += ';';
+ }
+ else if (!strncmp("nbsp", token, 4)) text += ' ';
+ else if (!strncmp("quot", token, 4)) text += '"';
+ else if (!strncmp("amp", token, 3)) text += '&';
+ else if (!strncmp("lt", token, 2)) text += '<';
+ else if (!strncmp("gt", token, 2)) text += '>';
+ else if (!strncmp("brvbar", token, 6)) text += '';
+ else if (!strncmp("sect", token, 4)) text += '';
+ else if (!strncmp("copy", token, 4)) text += '';
+ else if (!strncmp("laquo", token, 5)) text += '';
+ else if (!strncmp("reg", token, 3)) text += '';
+ else if (!strncmp("acute", token, 5)) text += '';
+ else if (!strncmp("para", token, 4)) text += '';
+ else if (!strncmp("raquo", token, 5)) text += '';
+ else if (!strncmp("Aacute", token, 6)) text += '';
+ else if (!strncmp("Agrave", token, 6)) text += '';
+ else if (!strncmp("Acirc", token, 5)) text += '';
+ else if (!strncmp("Auml", token, 4)) text += '';
+ else if (!strncmp("Atilde", token, 6)) text += '';
+ else if (!strncmp("Aring", token, 5)) text += '';
+ else if (!strncmp("aacute", token, 6)) text += '';
+ else if (!strncmp("agrave", token, 6)) text += '';
+ else if (!strncmp("acirc", token, 5)) text += '';
+ else if (!strncmp("auml", token, 4)) text += '';
+ else if (!strncmp("atilde", token, 6)) text += '';
+ else if (!strncmp("aring", token, 5)) text += '';
+ else if (!strncmp("Eacute", token, 6)) text += '';
+ else if (!strncmp("Egrave", token, 6)) text += '';
+ else if (!strncmp("Ecirc", token, 5)) text += '';
+ else if (!strncmp("Euml", token, 4)) text += '';
+ else if (!strncmp("eacute", token, 6)) text += '';
+ else if (!strncmp("egrave", token, 6)) text += '';
+ else if (!strncmp("ecirc", token, 5)) text += '';
+ else if (!strncmp("euml", token, 4)) text += '';
+ else if (!strncmp("Iacute", token, 6)) text += '';
+ else if (!strncmp("Igrave", token, 6)) text += '';
+ else if (!strncmp("Icirc", token, 5)) text += '';
+ else if (!strncmp("Iuml", token, 4)) text += '';
+ else if (!strncmp("iacute", token, 6)) text += '';
+ else if (!strncmp("igrave", token, 6)) text += '';
+ else if (!strncmp("icirc", token, 5)) text += '';
+ else if (!strncmp("iuml", token, 4)) text += '';
+ else if (!strncmp("Oacute", token, 6)) text += '';
+ else if (!strncmp("Ograve", token, 6)) text += '';
+ else if (!strncmp("Ocirc", token, 5)) text += '';
+ else if (!strncmp("Ouml", token, 4)) text += '';
+ else if (!strncmp("Otilde", token, 6)) text += '';
+ else if (!strncmp("oacute", token, 6)) text += '';
+ else if (!strncmp("ograve", token, 6)) text += '';
+ else if (!strncmp("ocirc", token, 5)) text += '';
+ else if (!strncmp("ouml", token, 4)) text += '';
+ else if (!strncmp("otilde", token, 6)) text += '';
+ else if (!strncmp("Uacute", token, 6)) text += '';
+ else if (!strncmp("Ugrave", token, 6)) text += '';
+ else if (!strncmp("Ucirc", token, 5)) text += '';
+ else if (!strncmp("Uuml", token, 4)) text += '';
+ else if (!strncmp("uacute", token, 6)) text += '';
+ else if (!strncmp("ugrave", token, 6)) text += '';
+ else if (!strncmp("ucirc", token, 5)) text += '';
+ else if (!strncmp("uuml", token, 4)) text += '';
+ else if (!strncmp("Yacute", token, 6)) text += '';
+ else if (!strncmp("yacute", token, 6)) text += '';
+ else if (!strncmp("yuml", token, 4)) text += '';
+
+ else if (!strncmp("deg", token, 3)) text += '';
+ else if (!strncmp("plusmn", token, 6)) text += '';
+ else if (!strncmp("sup2", token, 4)) text += '';
+ else if (!strncmp("sup3", token, 4)) text += '';
+ else if (!strncmp("sup1", token, 4)) text += '';
+ else if (!strncmp("nbsp", token, 4)) text += '';
+ else if (!strncmp("pound", token, 5)) text += '';
+ else if (!strncmp("cent", token, 4)) text += '';
+ else if (!strncmp("frac14", token, 6)) text += '';
+ else if (!strncmp("frac12", token, 6)) text += '';
+ else if (!strncmp("frac34", token, 6)) text += '';
+ else if (!strncmp("iquest", token, 6)) text += '';
+ else if (!strncmp("iexcl", token, 5)) text += '';
+ else if (!strncmp("ETH", token, 3)) text += '';
+ else if (!strncmp("eth", token, 3)) text += '';
+ else if (!strncmp("THORN", token, 5)) text += '';
+ else if (!strncmp("thorn", token, 5)) text += '';
+ else if (!strncmp("AElig", token, 5)) text += '';
+ else if (!strncmp("aelig", token, 5)) text += '';
+ else if (!strncmp("Oslash", token, 6)) text += '';
+ else if (!strncmp("curren", token, 6)) text += '';
+ else if (!strncmp("Ccedil", token, 6)) text += '';
+ else if (!strncmp("ccedil", token, 6)) text += '';
+ else if (!strncmp("szlig", token, 5)) text += '';
+ else if (!strncmp("Ntilde", token, 6)) text += '';
+ else if (!strncmp("ntilde", token, 6)) text += '';
+ else if (!strncmp("yen", token, 3)) text += '';
+ else if (!strncmp("not", token, 3)) text += '';
+ else if (!strncmp("ordf", token, 4)) text += '';
+ else if (!strncmp("uml", token, 3)) text += '';
+ else if (!strncmp("shy", token, 3)) text += '';
+ else if (!strncmp("macr", token, 4)) text += '';
+ else if (!strncmp("micro", token, 5)) text += "";
+ else if (!strncmp("middot", token, 6)) text +="";
+ else if (!strncmp("cedil", token, 5)) text += "";
+ else if (!strncmp("ordm", token, 4)) text += "";
+ else if (!strncmp("times", token, 5)) text += "";
+ else if (!strncmp("divide", token, 6)) text +="";
+ else if (!strncmp("oslash", token, 6)) text +="";
+ continue;
+ }
+
+ // handle silly <variant word> items in greek whnu, remove when module is fixed
+ if ((*from == '>') && (*(from-1) < 0)) {
+ text += "&gt;";
+ continue;
+ }
+
+ if (*from == '>') { // process tokens
+ intoken = false;
+ keepToken = false;
+ suspendTextPassThru = false;
+ newWord = true;
+ handled = false;
+
+ while (wordStart < (text.c_str() + text.length())) { //hack
+ if (strchr(";,. :?!()'\"", *wordStart) && wordStart[0] && wordStart[1])
+ wordStart++;
+ else break;
+ }
+ while (wordEnd > wordStart) {
+ if (strchr(" ,;:.?!()'\"", *wordEnd))
+ wordEnd--;
+ else break;
+ }
+
+ // variants
+ if (!strncmp(token, "div type=\"variant\"", 18)) {
+ XMLTag tag = token;
+ text.append("<seg type=\"x-variant\"");
+ SWBuf cls = "x-class:";
+ cls += tag.getAttribute("class");
+ if (cls.length()>8)
+ text.appendFormatted(" subType=\"%s\"", cls.c_str());
+
+ text += ">";
+ divEnd = "</seg>";
+ newText = true;
+ lastspace = false;
+ handled = true;
+ }
+ // section titles
+ if (!strcmp(token, "div class=\"sechead\"")) {
+// pushString(&to, "<title>");
+ text.append("<title>");
+ divEnd = "</title>";
+ newText = true;
+ lastspace = false;
+ handled = true;
+ }
+ else if (!strcmp(token, "/div")) {
+ //pushString(&to, divEnd.c_str());
+ text.append(divEnd);
+ lastspace = false;
+ handled = true;
+ }
+ // Scripture Reference
+ if (!strncmp(token, "scripRef", 8)) {
+ // pushString(buf, "<reference osisRef=\"");
+ suspendTextPassThru = true;
+ newText = true;
+ handled = true;
+ }
+ else if (!strncmp(token, "/scripRef", 9)) {
+ SWBuf tmp;
+ tmp = "";
+ tmp.append(textStart, (int)(textEnd - textStart)+1);
+ //pushString(&to, convertToOSIS(tmp.c_str(), key));
+ text.append(VerseKey::convertToOSIS(tmp.c_str(), key));
+ suspendTextPassThru = false;
+ handled = true;
+ }
+// Usage of italics to represent transChange isn't domaninant;
+// solution: mark in OSIS instead, assume no semantics other than emphasis
+// of italicized text
+// if (!strcmp(module->Type(), "Biblical Texts")) {
+// // Italics assume transchange for Biblical texts
+// if (!stricmp(token, "i")) {
+// pushString(&to, "<transChange type=\"added\">");
+// newText = true;
+// lastspace = false;
+// handled = true;
+// }
+// else if (!stricmp(token, "/i")) {
+// pushString(&to, "</transChange>");
+// lastspace = false;
+// handled = true;
+// }
+// }
+// else {
+// // otherwise, italics are just italics
+//-- end italics for transchange
+ if (!stricmp(token, "i")) {
+// pushString(&to, "<hi type=\"i\">");
+ text.append("<hi type=\"i\">");
+ newText = true;
+ lastspace = false;
+ handled = true;
+ }
+ else if (!stricmp(token, "/i")) {
+// pushString(&to, "</hi>");
+ text.append("</hi>");
+ lastspace = false;
+ handled = true;
+ }
+// }
+
+ if (!strcmp(token, "b")) {
+// pushString(&to, "<hi type=\"b\">");
+ text.append("<hi type=\"b\">");
+ newText = true;
+ lastspace = false;
+ handled = true;
+ }
+ else if (!strcmp(token, "/b")) {
+// pushString(&to, "</hi>");
+ text.append("</hi>");
+ lastspace = false;
+ handled = true;
+ }
+
+ // Footnote
+ if (!strcmp(token, "note")) {
+ //pushString(&to, "<note>");
+ text.append("<note>");
+ newText = true;
+ lastspace = false;
+ handled = true;
+ }
+ else if (!strcmp(token, "/note")) {
+ // pushString(&to, "</note>");
+ text.append("</note>");
+ lastspace = false;
+ handled = true;
+ }
+
+ // Figure
+ else if (!strncmp(token, "img ", 4)) {
+ const char *src = strstr(token, "src");
+ if (!src) // assert we have a src attribute
+ continue;
+// return false;
+
+ //pushString(&to, "<figure src=\"");
+ text.append("<figure src=\"");
+
+ const char* end = strchr(src+2, '"'); //start search behind src="
+
+ if (end) { //append the path
+ text.append(src+2, end - (src+2));
+ }
+
+// const char *c;
+// for (c = src;((*c) && (*c != '"')); c++);
+
+// uncomment for SWORD absolute path logic
+// if (*(c+1) == '/') {
+// pushString(buf, "file:");
+// pushString(buf, module->getConfigEntry("AbsoluteDataPath"));
+// if (*((*buf)-1) == '/')
+// c++; // skip '/'
+// }
+// end of uncomment for asolute path logic
+
+// for (c++;((*c) && (*c != '"')); c++)
+// *to++ = *c;
+
+ //pushString(&to, "\" />");
+ text.append("\" />");
+ handled = true;
+ }
+
+ // Strongs numbers
+ else if (!strnicmp(token, "sync type=\"Strongs\" ", 20)) { // Strongs
+ valto = val;
+ for (unsigned int i = 27; token[i] != '\"' && i < 150; i++)
+ *valto++ = token[i];
+ *valto = 0;
+ if (atoi((!isdigit(*val))?val+1:val) < 5627) {
+ // normal strongs number
+ strstrip(val);
+
+ if (!strncmp(wordStart, "<w ", 3)) {
+ const char *attStart = strstr(wordStart, "lemma");
+ if (attStart) { //existing morph attribute, append this one to it
+ attStart += 7;
+ buf = "";
+ buf.appendFormatted("strong:%s ", val);
+ }
+ else { // no lemma attribute
+ attStart = wordStart + 3;
+ buf = "";
+ buf.appendFormatted(buf, "lemma=\"strong:%s\" ", val);
+ }
+
+ text.insert(attStart - text.c_str(), buf);
+ }
+ else { //wordStart doesn't point to an existing <w> attribute!
+ buf = "";
+ buf.appendFormatted("<w lemma=\"strong:%s\">", val);
+ text.insert(wordStart - text.c_str(), buf);
+ text += "</w>";
+ lastspace = false;
+ }
+ }
+ // OLB verb morph, leave it out of OSIS tag
+ else {
+ }
+ handled = true;
+ }
+
+ // Morphology
+ else if (!strncmp(token, "sync type=\"morph\"", 17)) {
+ SWBuf cls = "";
+ SWBuf morph = "";
+ for (ch = token+17; *ch; ch++) {
+ if (!strncmp(ch, "class=\"", 7)) {
+ valto = val;
+ for (unsigned int i = 7; ch[i] != '\"' && i < 127; i++)
+ *valto++ = ch[i];
+ *valto = 0;
+ strstrip(val);
+ cls = val;
+ }
+ if (!strncmp(ch, "value=\"", 7)) {
+ valto = val;
+ for (unsigned int i = 7; ch[i] != '\"' && i < 127; i++)
+ *valto++ = ch[i];
+ *valto = 0;
+ strstrip(val);
+ morph = val;
+ }
+ }
+ if (!strncmp(wordStart, "<w ", 3)) {
+ const char *attStart = strstr(wordStart, "morph");
+ if (attStart) { //existing morph attribute, append this one to it
+ attStart += 7;
+ buf = "";
+ buf.appendFormatted("%s:%s ", ((cls.length())?cls.c_str():"robinson"), morph.c_str());
+ }
+ else { // no lemma attribute
+ attStart = wordStart + 3;
+ buf = "";
+ buf.appendFormatted("morph=\"%s:%s\" ", ((cls.length())?cls.c_str():"robinson"), morph.c_str());
+ }
+
+ text.insert(attStart - text.c_str(), buf); //hack, we have to
+ }
+ else { //no existing <w> attribute fond
+ buf = "";
+ buf.appendFormatted("<w morph=\"%s:%s\">", ((cls.length())?cls.c_str():"robinson"), morph.c_str());
+ text.insert(wordStart - text.c_str(), buf);
+ text += "</w>";
+ lastspace = false;
+
+ }
+ handled = true;
+ }
+
+ if (!keepToken) {
+ if (!handled) {
+ SWLog::getSystemLog()->logError("Unprocessed Token: <%s> in key %s", token, key ? (const char*)*key : "<unknown>");
+// exit(-1);
+ }
+ if (from[1] && strchr(" ,;.:?!()'\"", from[1])) {
+ if (lastspace) {
+ text--;
+ }
+ }
+ if (newText) {
+ textStart = from+1;
+ newText = false;
+ }
+ continue;
+ }
+
+ // if not a strongs token, keep token in text
+ text.appendFormatted("<%s>", token);
+
+ if (newText) {
+ textStart = text.c_str() + text.length();
+ newWord = false;
+ }
+ continue;
+ }
+ if (intoken) {
+ if ((tokpos < 2045) && ((*from != 10)&&(*from != 13))) {
+ token[tokpos++] = *from;
+ token[tokpos+2] = 0;
+ }
+ }
+ else {
+ switch (*from) {
+ case '\'':
+ case '\"':
+ case '`':
+// quoteStack.handleQuote(fromStart, from, &to);
+ text += *from;
+ //from++; //this line removes chars after an apostrophe! Needs fixing.
+ break;
+ default:
+ if (newWord && (*from != ' ')) {
+ wordStart = text.c_str() + text.length();
+ newWord = false;
+
+ //fix this if required?
+ //memset(to, 0, 10);
+
+ }
+
+ if (!suspendTextPassThru) {
+ text += (*from);
+ lastspace = (*from == ' ');
+ }
+ }
+ }
+ }
+
+ VerseKey *vkey = SWDYNAMIC_CAST(VerseKey, key);
+ if (vkey) {
+ SWBuf ref = "";
+ if (vkey->Verse()) {
+ ref.appendFormatted("\t\t<verse osisID=\"%s\">", vkey->getOSISRef());
+ }
+
+ if (ref.length() > 0) {
+
+ text = ref + text;
+
+ if (vkey->Verse()) {
+ VerseKey tmp;
+ tmp = *vkey;
+ tmp.AutoNormalize(0);
+ tmp.Headings(1);
+
+ text += "</verse>";
+
+ tmp = MAXVERSE;
+ if (*vkey == tmp) {
+ tmp.Verse(0);
+// sprintf(ref, "\t</div>");
+// pushString(&to, ref);
+ tmp = MAXCHAPTER;
+ tmp = MAXVERSE;
+ if (*vkey == tmp) {
+ tmp.Chapter(0);
+ tmp.Verse(0);
+// sprintf(ref, "\t</div>");
+// pushString(&to, ref);
+/*
+ if (!quoteStack.empty()) {
+ SWLog::getSystemLog()->logError("popping unclosed quote at end of book");
+ quoteStack.clear();
+ }
+*/
+ }
+ }
+ }
+// else if (vkey->Chapter()) {
+// sprintf(ref, "\t<div type=\"chapter\" osisID=\"%s\">", vkey->getOSISRef());
+// }
+// else sprintf(ref, "\t<div type=\"book\" osisID=\"%s\">", vkey->getOSISRef());
+ }
+ }
+ return 0;
+}
+
+
+SWORD_NAMESPACE_END
diff --git a/src/modules/filters/thmlplain.cpp b/src/modules/filters/thmlplain.cpp
new file mode 100644
index 0000000..8f8379a
--- /dev/null
+++ b/src/modules/filters/thmlplain.cpp
@@ -0,0 +1,219 @@
+/******************************************************************************
+ *
+ * thmlplain - SWFilter descendant to strip out all ThML tags or convert to
+ * ASCII rendered symbols.
+ */
+
+
+#include <stdlib.h>
+#include <thmlplain.h>
+#include <swbuf.h>
+
+SWORD_NAMESPACE_START
+
+ThMLPlain::ThMLPlain() {
+}
+
+char ThMLPlain::processText(SWBuf &text, const SWKey *key, const SWModule *module)
+{
+ char token[2048];
+ int tokpos = 0;
+ bool intoken = false;
+ bool ampersand = false;
+
+ const char *from;
+ SWBuf orig = text;
+ from = orig.c_str();
+ for (text = ""; *from; from++)
+ {
+ if (*from == 10 || *from == 13)
+ from++;
+ if (*from == '<') {
+ intoken = true;
+ tokpos = 0;
+ token[0] = 0;
+ token[1] = 0;
+ token[2] = 0;
+ ampersand = false;
+ continue;
+ }
+ else if (*from == '&') {
+ intoken = true;
+ tokpos = 0;
+ token[0] = 0;
+ token[1] = 0;
+ token[2] = 0;
+ ampersand = true;
+ continue;
+ }
+ if (*from == ';' && ampersand) {
+ intoken = false;
+ ampersand = false;
+
+ if (!strncmp("nbsp", token, 4)) text += ' ';
+ else if (!strncmp("quot", token, 4)) text += '"';
+ else if (!strncmp("amp", token, 3)) text += '&';
+ else if (!strncmp("lt", token, 2)) text += '<';
+ else if (!strncmp("gt", token, 2)) text += '>';
+ else if (!strncmp("brvbar", token, 6)) text += '';
+ else if (!strncmp("sect", token, 4)) text += '';
+ else if (!strncmp("copy", token, 4)) text += '';
+ else if (!strncmp("laquo", token, 5)) text += '';
+ else if (!strncmp("reg", token, 3)) text += '';
+ else if (!strncmp("acute", token, 5)) text += '';
+ else if (!strncmp("para", token, 4)) text += '';
+ else if (!strncmp("raquo", token, 5)) text += '';
+
+ else if (!strncmp("Aacute", token, 6)) text += '';
+ else if (!strncmp("Agrave", token, 6)) text += '';
+ else if (!strncmp("Acirc", token, 5)) text += '';
+ else if (!strncmp("Auml", token, 4)) text += '';
+ else if (!strncmp("Atilde", token, 6)) text += '';
+ else if (!strncmp("Aring", token, 5)) text += '';
+ else if (!strncmp("aacute", token, 6)) text += '';
+ else if (!strncmp("agrave", token, 6)) text += '';
+ else if (!strncmp("acirc", token, 5)) text += '';
+ else if (!strncmp("auml", token, 4)) text += '';
+ else if (!strncmp("atilde", token, 6)) text += '';
+ else if (!strncmp("aring", token, 5)) text += '';
+ else if (!strncmp("Eacute", token, 6)) text += '';
+ else if (!strncmp("Egrave", token, 6)) text += '';
+ else if (!strncmp("Ecirc", token, 5)) text += '';
+ else if (!strncmp("Euml", token, 4)) text += '';
+ else if (!strncmp("eacute", token, 6)) text += '';
+ else if (!strncmp("egrave", token, 6)) text += '';
+ else if (!strncmp("ecirc", token, 5)) text += '';
+ else if (!strncmp("euml", token, 4)) text += '';
+ else if (!strncmp("Iacute", token, 6)) text += '';
+ else if (!strncmp("Igrave", token, 6)) text += '';
+ else if (!strncmp("Icirc", token, 5)) text += '';
+ else if (!strncmp("Iuml", token, 4)) text += '';
+ else if (!strncmp("iacute", token, 6)) text += '';
+ else if (!strncmp("igrave", token, 6)) text += '';
+ else if (!strncmp("icirc", token, 5)) text += '';
+ else if (!strncmp("iuml", token, 4)) text += '';
+ else if (!strncmp("Oacute", token, 6)) text += '';
+ else if (!strncmp("Ograve", token, 6)) text += '';
+ else if (!strncmp("Ocirc", token, 5)) text += '';
+ else if (!strncmp("Ouml", token, 4)) text += '';
+ else if (!strncmp("Otilde", token, 6)) text += '';
+ else if (!strncmp("oacute", token, 6)) text += '';
+ else if (!strncmp("ograve", token, 6)) text += '';
+ else if (!strncmp("ocirc", token, 5)) text += '';
+ else if (!strncmp("ouml", token, 4)) text += '';
+ else if (!strncmp("otilde", token, 6)) text += '';
+ else if (!strncmp("Uacute", token, 6)) text += '';
+ else if (!strncmp("Ugrave", token, 6)) text += '';
+ else if (!strncmp("Ucirc", token, 5)) text += '';
+ else if (!strncmp("Uuml", token, 4)) text += '';
+ else if (!strncmp("uacute", token, 6)) text += '';
+ else if (!strncmp("ugrave", token, 6)) text += '';
+ else if (!strncmp("ucirc", token, 5)) text += '';
+ else if (!strncmp("uuml", token, 4)) text += '';
+ else if (!strncmp("Yacute", token, 6)) text += '';
+ else if (!strncmp("yacute", token, 6)) text += '';
+ else if (!strncmp("yuml", token, 4)) text += '';
+
+ else if (!strncmp("deg", token, 3)) text += '';
+ else if (!strncmp("plusmn", token, 6)) text += '';
+ else if (!strncmp("sup2", token, 4)) text += '';
+ else if (!strncmp("sup3", token, 4)) text += '';
+ else if (!strncmp("sup1", token, 4)) text += '';
+ else if (!strncmp("nbsp", token, 4)) text += '';
+ else if (!strncmp("pound", token, 5)) text += '';
+ else if (!strncmp("cent", token, 4)) text += '';
+ else if (!strncmp("frac14", token, 6)) text += '';
+ else if (!strncmp("frac12", token, 6)) text += '';
+ else if (!strncmp("frac34", token, 6)) text += '';
+ else if (!strncmp("iquest", token, 6)) text += '';
+ else if (!strncmp("iexcl", token, 5)) text += '';
+ else if (!strncmp("ETH", token, 3)) text += '';
+ else if (!strncmp("eth", token, 3)) text += '';
+ else if (!strncmp("THORN", token, 5)) text += '';
+ else if (!strncmp("thorn", token, 5)) text += '';
+ else if (!strncmp("AElig", token, 5)) text += '';
+ else if (!strncmp("aelig", token, 5)) text += '';
+ else if (!strncmp("Oslash", token, 6)) text += '';
+ else if (!strncmp("curren", token, 6)) text += '';
+ else if (!strncmp("Ccedil", token, 6)) text += '';
+ else if (!strncmp("ccedil", token, 6)) text += '';
+ else if (!strncmp("szlig", token, 5)) text += '';
+ else if (!strncmp("Ntilde", token, 6)) text += '';
+ else if (!strncmp("ntilde", token, 6)) text += '';
+ else if (!strncmp("yen", token, 3)) text += '';
+ else if (!strncmp("not", token, 3)) text += '';
+ else if (!strncmp("ordf", token, 4)) text += '';
+ else if (!strncmp("uml", token, 3)) text += '';
+ else if (!strncmp("shy", token, 3)) text += '';
+ else if (!strncmp("macr", token, 4)) text += '';
+ else if (!strncmp("micro", token, 5)) text += "";
+ else if (!strncmp("middot", token, 6)) text +="";
+ else if (!strncmp("cedil", token, 5)) text += "";
+ else if (!strncmp("ordm", token, 4)) text += "";
+ else if (!strncmp("times", token, 5)) text += "";
+ else if (!strncmp("divide", token, 6)) text +="";
+ else if (!strncmp("oslash", token, 6)) text +="";
+ continue;
+
+ }
+ else if (*from == '>' && !ampersand) {
+ intoken = false;
+ // process desired tokens
+ if (!strncmp(token, "sync type=\"Strongs\" value=\"", 27)) {
+ text += ' ';
+ text += '<';
+ for (unsigned int i = 27; token[i] != '\"'; i++)
+ text += token[i];
+ text += '>';
+ continue;
+ }
+ if (!strncmp(token, "sync type=\"morph\" value=\"", 25)) {
+ text += ' ';
+ text += '(';
+ for (unsigned int i = 25; token[i] != '\"'; i++)
+ text += token[i];
+ text += ')';
+ continue;
+ }
+ if (!strncmp("note", token, 4)) {
+ text += ' ';
+ text += '(';
+ }
+ else if (!strncmp("br", token, 2))
+ text += '\n';
+ else if (!strncmp("/p", token, 2))
+ text += '\n';
+ else if (!strncmp("/note", token, 5)) {
+ text += ')';
+ text += ' ';
+ }
+ continue;
+ }
+ if (intoken) {
+ if (tokpos < 2045)
+ token[tokpos++] = *from;
+ token[tokpos+2] = 0;
+ }
+ else text += *from;
+ }
+
+ orig = text;
+ from = orig.c_str();
+ for (text = ""; *from; from++) { //loop to remove extra spaces
+ if ((strchr(" \t\n\r", *from))) {
+ while (*(from+1) && (strchr(" \t\n\r", *(from+1)))) {
+ from++;
+ }
+ text += " ";
+ }
+ else {
+ text += *from;
+ }
+ }
+ text += (char)0;
+
+ return 0;
+}
+
+
+SWORD_NAMESPACE_END
diff --git a/src/modules/filters/thmlrtf.cpp b/src/modules/filters/thmlrtf.cpp
new file mode 100644
index 0000000..23e4a90
--- /dev/null
+++ b/src/modules/filters/thmlrtf.cpp
@@ -0,0 +1,346 @@
+/***************************************************************************
+ thmlrtf.cpp - ThML to RTF filter
+ -------------------
+ begin : 1999-10-27
+ copyright : 2001 by CrossWire Bible Society
+ ***************************************************************************/
+
+/***************************************************************************
+ * *
+ * This program is free software; you can redistribute it and/or modify *
+ * it under the terms of the GNU General Public License as published by *
+ * the Free Software Foundation; either version 2 of the License, or *
+ * (at your option) any later version. *
+ * *
+ ***************************************************************************/
+
+#include <stdlib.h>
+#include <thmlrtf.h>
+#include <swmodule.h>
+#include <utilxml.h>
+#include <utilstr.h>
+#include <versekey.h>
+
+SWORD_NAMESPACE_START
+
+ThMLRTF::ThMLRTF() {
+ setTokenStart("<");
+ setTokenEnd(">");
+
+ setEscapeStart("&");
+ setEscapeEnd(";");
+
+ setEscapeStringCaseSensitive(true);
+
+ addEscapeStringSubstitute("nbsp", "\302\240");
+ addEscapeStringSubstitute("apos", "'");
+ addEscapeStringSubstitute("quot", "\"");
+ addEscapeStringSubstitute("amp", "&");
+ addEscapeStringSubstitute("lt", "<");
+ addEscapeStringSubstitute("gt", ">");
+ addEscapeStringSubstitute("brvbar", "");
+ addEscapeStringSubstitute("sect", "");
+ addEscapeStringSubstitute("copy", "");
+ addEscapeStringSubstitute("laquo", "");
+ addEscapeStringSubstitute("reg", "");
+ addEscapeStringSubstitute("acute", "");
+ addEscapeStringSubstitute("para", "");
+ addEscapeStringSubstitute("raquo", "");
+
+ addEscapeStringSubstitute("Aacute", "");
+ addEscapeStringSubstitute("Agrave", "");
+ addEscapeStringSubstitute("Acirc", "");
+ addEscapeStringSubstitute("Auml", "");
+ addEscapeStringSubstitute("Atilde", "");
+ addEscapeStringSubstitute("Aring", "");
+ addEscapeStringSubstitute("aacute", "");
+ addEscapeStringSubstitute("agrave", "");
+ addEscapeStringSubstitute("acirc", "");
+ addEscapeStringSubstitute("auml", "");
+ addEscapeStringSubstitute("atilde", "");
+ addEscapeStringSubstitute("aring", "");
+ addEscapeStringSubstitute("Eacute", "");
+ addEscapeStringSubstitute("Egrave", "");
+ addEscapeStringSubstitute("Ecirc", "");
+ addEscapeStringSubstitute("Euml", "");
+ addEscapeStringSubstitute("eacute", "");
+ addEscapeStringSubstitute("egrave", "");
+ addEscapeStringSubstitute("ecirc", "");
+ addEscapeStringSubstitute("euml", "");
+ addEscapeStringSubstitute("Iacute", "");
+ addEscapeStringSubstitute("Igrave", "");
+ addEscapeStringSubstitute("Icirc", "");
+ addEscapeStringSubstitute("Iuml", "");
+ addEscapeStringSubstitute("iacute", "");
+ addEscapeStringSubstitute("igrave", "");
+ addEscapeStringSubstitute("icirc", "");
+ addEscapeStringSubstitute("iuml", "");
+ addEscapeStringSubstitute("Oacute", "");
+ addEscapeStringSubstitute("Ograve", "");
+ addEscapeStringSubstitute("Ocirc", "");
+ addEscapeStringSubstitute("Ouml", "");
+ addEscapeStringSubstitute("Otilde", "");
+ addEscapeStringSubstitute("oacute", "");
+ addEscapeStringSubstitute("ograve", "");
+ addEscapeStringSubstitute("ocirc", "");
+ addEscapeStringSubstitute("ouml", "");
+ addEscapeStringSubstitute("otilde", "");
+ addEscapeStringSubstitute("Uacute", "");
+ addEscapeStringSubstitute("Ugrave", "");
+ addEscapeStringSubstitute("Ucirc", "");
+ addEscapeStringSubstitute("Uuml", "");
+ addEscapeStringSubstitute("uacute", "");
+ addEscapeStringSubstitute("ugrave", "");
+ addEscapeStringSubstitute("ucirc", "");
+ addEscapeStringSubstitute("uuml", "");
+ addEscapeStringSubstitute("Yacute", "");
+ addEscapeStringSubstitute("yacute", "");
+ addEscapeStringSubstitute("yuml", "");
+
+ addEscapeStringSubstitute("deg", "");
+ addEscapeStringSubstitute("plusmn", "");
+ addEscapeStringSubstitute("sup2", "");
+ addEscapeStringSubstitute("sup3", "");
+ addEscapeStringSubstitute("sup1", "");
+ addEscapeStringSubstitute("nbsp", "");
+ addEscapeStringSubstitute("pound", "");
+ addEscapeStringSubstitute("cent", "");
+ addEscapeStringSubstitute("frac14", "");
+ addEscapeStringSubstitute("frac12", "");
+ addEscapeStringSubstitute("frac34", "");
+ addEscapeStringSubstitute("iquest", "");
+ addEscapeStringSubstitute("iexcl", "");
+ addEscapeStringSubstitute("ETH", "");
+ addEscapeStringSubstitute("eth", "");
+ addEscapeStringSubstitute("THORN", "");
+ addEscapeStringSubstitute("thorn", "");
+ addEscapeStringSubstitute("AElig", "");
+ addEscapeStringSubstitute("aelig", "");
+ addEscapeStringSubstitute("Oslash", "");
+ addEscapeStringSubstitute("curren", "");
+ addEscapeStringSubstitute("Ccedil", "");
+ addEscapeStringSubstitute("ccedil", "");
+ addEscapeStringSubstitute("szlig", "");
+ addEscapeStringSubstitute("Ntilde", "");
+ addEscapeStringSubstitute("ntilde", "");
+ addEscapeStringSubstitute("yen", "");
+ addEscapeStringSubstitute("not", "");
+ addEscapeStringSubstitute("ordf", "");
+ addEscapeStringSubstitute("uml", "");
+ addEscapeStringSubstitute("shy", "");
+ addEscapeStringSubstitute("macr", "");
+
+ addEscapeStringSubstitute("micro", "");
+ addEscapeStringSubstitute("middot", "");
+ addEscapeStringSubstitute("cedil", "");
+ addEscapeStringSubstitute("ordm", "");
+ addEscapeStringSubstitute("times", "");
+ addEscapeStringSubstitute("divide", "");
+ addEscapeStringSubstitute("oslash", "");
+
+ setTokenCaseSensitive(true);
+
+
+ addTokenSubstitute("br", "\\line ");
+ addTokenSubstitute("br /", "\\line ");
+ addTokenSubstitute("i", "{\\i1 ");
+ addTokenSubstitute("/i", "}");
+ addTokenSubstitute("b", "{\\b1 ");
+ addTokenSubstitute("/b", "}");
+ addTokenSubstitute("p", "{\\fi200\\par}");
+ addTokenSubstitute("p /", "\\pard\\par\\par ");
+
+ //we need uppercase forms for the moment to support a few early ThML modules that aren't XHTML compliant
+ addTokenSubstitute("BR", "\\line ");
+ addTokenSubstitute("I", "{\\i1 ");
+ addTokenSubstitute("/I", "}");
+ addTokenSubstitute("B", "{\\b1 ");
+ addTokenSubstitute("/B", "}");
+ addTokenSubstitute("P", "\\par ");
+ addTokenSubstitute("scripture", "{\\i1 ");
+ addTokenSubstitute("/scripture", "}");
+ addTokenSubstitute("center", "\\qc ");
+ addTokenSubstitute("/center", "\\pard ");
+}
+
+
+char ThMLRTF::processText(SWBuf &text, const SWKey *key, const SWModule *module) {
+
+ // preprocess text buffer to escape RTF control codes
+ const char *from;
+ SWBuf orig = text;
+ from = orig.c_str();
+ for (text = ""; *from; from++) { //loop to remove extra spaces
+ switch (*from) {
+ case '{':
+ case '}':
+ case '\\':
+ text += "\\";
+ text += *from;
+ break;
+ default:
+ text += *from;
+ }
+ }
+ text += (char)0;
+
+ SWBasicFilter::processText(text, key, module); //handle tokens as usual
+
+ orig = text;
+ from = orig.c_str();
+ for (text = ""; *from; from++) { //loop to remove extra spaces
+ if ((strchr(" \t\n\r", *from))) {
+ while (*(from+1) && (strchr(" \t\n\r", *(from+1)))) {
+ from++;
+ }
+ text += " ";
+ }
+ else {
+ text += *from;
+ }
+ }
+ text += (char)0; // probably not needed, but don't want to remove without investigating (same as above)
+ return 0;
+}
+
+
+ThMLRTF::MyUserData::MyUserData(const SWModule *module, const SWKey *key) : BasicFilterUserData(module, key) {
+ this->SecHead = false;
+ XMLTag startTag = "";
+ if (module) {
+ version = module->Name();
+ BiblicalText = (!strcmp(module->Type(), "Biblical Texts"));
+ }
+}
+
+
+bool ThMLRTF::handleToken(SWBuf &buf, const char *token, BasicFilterUserData *userData) {
+ if (!substituteToken(buf, token)) { // manually process if it wasn't a simple substitution
+ MyUserData *u = (MyUserData *)userData;
+ XMLTag tag(token);
+ if ((!tag.isEndTag()) && (!tag.isEmpty()))
+ u->startTag = tag;
+ if (tag.getName() && !strcmp(tag.getName(), "sync")) {
+ SWBuf value = tag.getAttribute("value");
+ if (tag.getAttribute("type") && !strcmp(tag.getAttribute("type"), "morph")) { //&gt;
+ buf.appendFormatted(" {\\cf4 \\sub (%s)}", value.c_str());
+ }
+ else if( tag.getAttribute("type") && !strcmp(tag.getAttribute("type"), "Strongs")) {
+ if (value[0] == 'H' || value[0] == 'G' || value[0] == 'A') {
+ value<<1;
+ buf.appendFormatted(" {\\cf3 \\sub <%s>}", value.c_str());
+ }
+ else if (value[0] == 'T') {
+ value<<1;
+ buf.appendFormatted(" {\\cf4 \\sub (%s)}", value.c_str());
+ }
+ }
+ else if (tag.getAttribute("type") && !strcmp(tag.getAttribute("type"), "Dict")) {
+ if (!tag.isEndTag())
+ buf += "{\\b ";
+ else buf += "}";
+ }
+ }
+ // <note> tag
+ else if (!strcmp(tag.getName(), "note")) {
+ if (!tag.isEndTag()) {
+ if (!tag.isEmpty()) {
+ SWBuf type = tag.getAttribute("type");
+ SWBuf footnoteNumber = tag.getAttribute("swordFootnote");
+ VerseKey *vkey = NULL;
+ // see if we have a VerseKey * or descendant
+ SWTRY {
+ vkey = SWDYNAMIC_CAST(VerseKey, u->key);
+ }
+ SWCATCH ( ... ) { }
+ if (vkey) {
+ // leave this special osis type in for crossReference notes types? Might thml use this some day? Doesn't hurt.
+ char ch = ((tag.getAttribute("type") && ((!strcmp(tag.getAttribute("type"), "crossReference")) || (!strcmp(tag.getAttribute("type"), "x-cross-ref")))) ? 'x':'n');
+ buf.appendFormatted("{\\super <a href=\"\">*%c%i.%s</a>} ", ch, vkey->Verse(), footnoteNumber.c_str());
+ }
+ u->suspendTextPassThru = true;
+ }
+ }
+ if (tag.isEndTag()) {
+ u->suspendTextPassThru = false;
+ }
+ }
+
+
+ else if (!strcmp(tag.getName(), "scripRef")) {
+ if (!tag.isEndTag()) {
+ if (!tag.isEmpty()) {
+ u->suspendTextPassThru = true;
+ }
+ }
+ if (tag.isEndTag()) { // </scripRef>
+ if (!u->BiblicalText) {
+ SWBuf refList = u->startTag.getAttribute("passage");
+ if (!refList.length())
+ refList = u->lastTextNode;
+ SWBuf version = tag.getAttribute("version");
+ buf += "<a href=\"\">";
+ buf += refList.c_str();
+// buf += u->lastTextNode.c_str();
+ buf += "</a>";
+ }
+ else {
+ SWBuf footnoteNumber = u->startTag.getAttribute("swordFootnote");
+ VerseKey *vkey = NULL;
+ // see if we have a VerseKey * or descendant
+ SWTRY {
+ vkey = SWDYNAMIC_CAST(VerseKey, u->key);
+ }
+ SWCATCH ( ... ) {}
+ if (vkey) {
+ // leave this special osis type in for crossReference notes types? Might thml use this some day? Doesn't hurt.
+ buf.appendFormatted("{\\super <a href=\"\">*x%i.%s</a>} ", vkey->Verse(), footnoteNumber.c_str());
+ }
+ }
+
+ // let's let text resume to output again
+ u->suspendTextPassThru = false;
+ }
+ }
+
+ else if (tag.getName() && !strcmp(tag.getName(), "div")) {
+ if (tag.isEndTag() && u->SecHead) {
+ buf += "\\par}";
+ u->SecHead = false;
+ }
+ else if (tag.getAttribute("class")) {
+ if (!stricmp(tag.getAttribute("class"), "sechead")) {
+ u->SecHead = true;
+ buf += "{\\par\\i1\\b1 ";
+ }
+ else if (!stricmp(tag.getAttribute("class"), "title")) {
+ u->SecHead = true;
+ buf += "{\\par\\i1\\b1 ";
+ }
+ }
+ }
+ else if (tag.getName() && (!strcmp(tag.getName(), "img") || !strcmp(tag.getName(), "image"))) {
+ const char *src = tag.getAttribute("src");
+ if (!src) // assert we have a src attribute
+ return false;
+
+ char* filepath = new char[strlen(u->module->getConfigEntry("AbsoluteDataPath")) + strlen(token)];
+ *filepath = 0;
+ strcpy(filepath, userData->module->getConfigEntry("AbsoluteDataPath"));
+ strcat(filepath, src);
+
+// we do this because BibleCS looks for this EXACT format for an image tag
+ buf+="<img src=\"";
+ buf+=filepath;
+ buf+="\" />";
+ delete [] filepath;
+ }
+ else {
+ return false; // we still didn't handle token
+ }
+ }
+ return true;
+}
+
+
+SWORD_NAMESPACE_END
diff --git a/src/modules/filters/thmlscripref.cpp b/src/modules/filters/thmlscripref.cpp
new file mode 100644
index 0000000..df2b3d2
--- /dev/null
+++ b/src/modules/filters/thmlscripref.cpp
@@ -0,0 +1,123 @@
+/******************************************************************************
+ *
+ * thmlscripref - SWFilter descendant to hide or show scripture
+ * referebces in a ThML module.
+ */
+
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <thmlscripref.h>
+#include <utilxml.h>
+#include <versekey.h>
+#include <swmodule.h>
+
+SWORD_NAMESPACE_START
+
+const char oName[] = "Cross-references";
+const char oTip[] = "Toggles Scripture Cross-references On and Off if they exist";
+
+const SWBuf choices[3] = {"Off", "On", ""};
+const StringList oValues(&choices[0], &choices[2]);
+
+ThMLScripref::ThMLScripref() : SWOptionFilter(oName, oTip, &oValues) {
+ setOptionValue("On");
+}
+
+
+ThMLScripref::~ThMLScripref() {
+}
+
+
+char ThMLScripref::processText(SWBuf &text, const SWKey *key, const SWModule *module) {
+ SWBuf token;
+ bool intoken = false;
+ bool hide = false;
+ SWBuf tagText;
+ XMLTag startTag;
+ SWBuf refs = "";
+ int footnoteNum = 1;
+ char buf[254];
+ VerseKey parser = key->getText();
+
+ SWBuf orig = text;
+ const char *from = orig.c_str();
+
+ for (text = ""; *from; from++) {
+ if (*from == '<') {
+ intoken = true;
+ token = "";
+ continue;
+ }
+ if (*from == '>') { // process tokens
+ intoken = false;
+
+ XMLTag tag(token);
+ if (!strcmp(tag.getName(), "scripRef")) {
+ if (!tag.isEndTag()) {
+ if (!tag.isEmpty()) {
+ refs = "";
+ startTag = tag;
+ hide = true;
+ tagText = "";
+ continue;
+ }
+ }
+ if (hide && tag.isEndTag()) {
+ if (module->isProcessEntryAttributes()) {
+ SWBuf fc = module->getEntryAttributes()["Footnote"]["count"]["value"];
+ footnoteNum = (fc.length()) ? atoi(fc.c_str()) : 0;
+ sprintf(buf, "%i", ++footnoteNum);
+ module->getEntryAttributes()["Footnote"]["count"]["value"] = buf;
+ StringList attributes = startTag.getAttributeNames();
+ for (StringList::iterator it = attributes.begin(); it != attributes.end(); it++) {
+ module->getEntryAttributes()["Footnote"][buf][it->c_str()] = startTag.getAttribute(it->c_str());
+ }
+ module->getEntryAttributes()["Footnote"][buf]["body"] = tagText;
+ startTag.setAttribute("swordFootnote", buf);
+ SWBuf passage = startTag.getAttribute("passage");
+ if (passage.length())
+ refs = parser.ParseVerseList(passage.c_str(), parser, true).getRangeText();
+ else refs = parser.ParseVerseList(tagText.c_str(), parser, true).getRangeText();
+ module->getEntryAttributes()["Footnote"][buf]["refList"] = refs.c_str();
+ }
+ hide = false;
+ if (option) { // we want the tag in the text
+ text += startTag;
+ text.append(tagText);
+ }
+ else continue;
+ }
+ }
+
+ // if not a scripRef token, keep token in text
+ if ((!strcmp(tag.getName(), "scripRef")) && (!tag.isEndTag())) {
+ SWBuf osisRef = tag.getAttribute("passage");
+ if (refs.length())
+ refs += "; ";
+ refs += osisRef;
+ }
+ if (!hide) {
+ text += '<';
+ text.append(token);
+ text += '>';
+ }
+ else {
+ tagText += '<';
+ tagText.append(token);
+ tagText += '>';
+ }
+ continue;
+ }
+ if (intoken) { //copy token
+ token += *from;
+ }
+ else if (!hide) { //copy text which is not inside a token
+ text += *from;
+ }
+ else tagText += *from;
+ }
+ return 0;
+}
+
+SWORD_NAMESPACE_END
diff --git a/src/modules/filters/thmlstrongs.cpp b/src/modules/filters/thmlstrongs.cpp
new file mode 100644
index 0000000..c1ab08c
--- /dev/null
+++ b/src/modules/filters/thmlstrongs.cpp
@@ -0,0 +1,146 @@
+/******************************************************************************
+ *
+ * thmlstrongs - SWFilter descendant to hide or show strongs number
+ * in a ThML module.
+ */
+
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <thmlstrongs.h>
+#include <swmodule.h>
+#include <utilstr.h>
+#include <ctype.h>
+
+SWORD_NAMESPACE_START
+
+const char oName[] = "Strong's Numbers";
+const char oTip[] = "Toggles Strong's Numbers On and Off if they exist";
+
+const SWBuf choices[3] = {"Off", "On", ""};
+const StringList oValues(&choices[0], &choices[2]);
+
+ThMLStrongs::ThMLStrongs() : SWOptionFilter(oName, oTip, &oValues) {
+ setOptionValue("Off");
+}
+
+
+ThMLStrongs::~ThMLStrongs() {
+}
+
+
+char ThMLStrongs::processText(SWBuf &text, const SWKey *key, const SWModule *module) {
+ char token[2048]; // cheese. Fix.
+ const char *from;
+ int tokpos = 0;
+ bool intoken = false;
+ bool lastspace = false;
+ int word = 1;
+ char val[128];
+ char wordstr[5];
+ char *valto;
+ char *ch;
+ unsigned int textStart = 0, textEnd = 0;
+ SWBuf tmp;
+ bool newText = false;
+
+ SWBuf orig = text;
+ from = orig.c_str();
+
+ for (text = ""; *from; from++) {
+ if (*from == '<') {
+ intoken = true;
+ tokpos = 0;
+ token[0] = 0;
+ token[1] = 0;
+ token[2] = 0;
+ textEnd = text.length();
+ continue;
+ }
+ if (*from == '>') { // process tokens
+ intoken = false;
+ if (!strnicmp(token, "sync type=\"Strongs\" ", 20)) { // Strongs
+ if (module->isProcessEntryAttributes()) {
+ valto = val;
+ for (unsigned int i = 27; token[i] != '\"' && i < 150; i++)
+ *valto++ = token[i];
+ *valto = 0;
+ if (atoi((!isdigit(*val))?val+1:val) < 5627) {
+ // normal strongs number
+ sprintf(wordstr, "%03d", word);
+ module->getEntryAttributes()["Word"][wordstr]["PartCount"] = "1";
+ module->getEntryAttributes()["Word"][wordstr]["Lemma"] = val;
+ module->getEntryAttributes()["Word"][wordstr]["LemmaClass"] = "strong";
+ tmp = "";
+ tmp.append(text.c_str()+textStart, (int)(textEnd - textStart));
+ module->getEntryAttributes()["Word"][wordstr]["Text"] = tmp;
+ newText = true;
+ }
+ else {
+/*
+ // verb morph
+ sprintf(wordstr, "%03d", word);
+ module->getEntryAttributes()["Word"][wordstr]["Morph"] = val;
+ module->getEntryAttributes()["Word"][wordstr]["MorphClass"] = "OLBMorph";
+*/
+ word--; // for now, completely ignore this word attribute.
+ }
+ word++;
+ }
+
+ if (!option) { // if we don't want strongs
+ if ((from[1] == ' ') || (from[1] == ',') || (from[1] == ';') || (from[1] == '.') || (from[1] == '?') || (from[1] == '!') || (from[1] == ')') || (from[1] == '\'') || (from[1] == '\"')) {
+ if (lastspace)
+ text--;
+ }
+ if (newText) {textStart = text.length(); newText = false; }
+ continue;
+ }
+ }
+ if (module->isProcessEntryAttributes()) {
+ if (!strncmp(token, "sync type=\"morph\"", 17)) {
+ for (ch = token+17; *ch; ch++) {
+ if (!strncmp(ch, "class=\"", 7)) {
+ valto = val;
+ for (unsigned int i = 7; ch[i] != '\"' && i < 127; i++)
+ *valto++ = ch[i];
+ *valto = 0;
+ sprintf(wordstr, "%03d", word-1);
+ if ((!stricmp(val, "Robinsons")) || (!stricmp(val, "Robinson"))) {
+ strcpy(val, "robinson");
+ }
+ module->getEntryAttributes()["Word"][wordstr]["MorphClass"] = val;
+ }
+ if (!strncmp(ch, "value=\"", 7)) {
+ valto = val;
+ for (unsigned int i = 7; ch[i] != '\"' && i < 127; i++)
+ *valto++ = ch[i];
+ *valto = 0;
+ sprintf(wordstr, "%03d", word-1);
+ module->getEntryAttributes()["Word"][wordstr]["Morph"] = val;
+ }
+ }
+ newText = true;
+ }
+ }
+ // if not a strongs token, keep token in text
+ text += '<';
+ text += token;
+ text += '>';
+ if (newText) {textStart = text.length(); newText = false; }
+ continue;
+ }
+ if (intoken) {
+ if (tokpos < 2045)
+ token[tokpos++] = *from;
+ token[tokpos+2] = 0;
+ }
+ else {
+ text += *from;
+ lastspace = (*from == ' ');
+ }
+ }
+ return 0;
+}
+
+SWORD_NAMESPACE_END
diff --git a/src/modules/filters/thmlvariants.cpp b/src/modules/filters/thmlvariants.cpp
new file mode 100644
index 0000000..49f9b65
--- /dev/null
+++ b/src/modules/filters/thmlvariants.cpp
@@ -0,0 +1,118 @@
+/******************************************************************************
+ *
+ * thmlvariants - SWFilter descendant to hide or show textual variants
+ * in a ThML module.
+ */
+
+
+#include <stdlib.h>
+#include <thmlvariants.h>
+#include <utilstr.h>
+
+SWORD_NAMESPACE_START
+
+const char ThMLVariants::primary[] = "Primary Reading";
+const char ThMLVariants::secondary[] = "Secondary Reading";
+const char ThMLVariants::all[] = "All Readings";
+
+const char ThMLVariants::optName[] = "Textual Variants";
+const char ThMLVariants::optTip[] = "Switch between Textual Variants modes";
+
+
+ThMLVariants::ThMLVariants() {
+ option = false;
+ options.push_back(primary);
+ options.push_back(secondary);
+ options.push_back(all);
+}
+
+
+ThMLVariants::~ThMLVariants() {
+}
+
+void ThMLVariants::setOptionValue(const char *ival)
+{
+ if (!stricmp(ival, primary)) option = 0;
+ else if (!stricmp(ival, secondary)) option = 1;
+ else option = 2;
+}
+
+const char *ThMLVariants::getOptionValue()
+{
+ if (option == 0) {
+ return primary;
+ }
+ else if (option == 1) {
+ return secondary;
+ }
+ else {
+ return all;
+ }
+}
+
+char ThMLVariants::processText(SWBuf &text, const SWKey *key, const SWModule *module)
+{
+ if ( option == 0 || option == 1) { //we want primary or variant only
+ bool intoken = false;
+ bool hide = false;
+ bool invar = false;
+
+ SWBuf token;
+ SWBuf orig = text;
+ const char *from = orig.c_str();
+
+ //we use a fixed comparision string to make sure the loop is as fast as the original two blocks with almost the same code
+ const char* variantCompareString = (option == 0) ? "div type=\"variant\" class=\"1\"" : "div type=\"variant\" class=\"2\"";
+
+ for (text = ""; *from; from++) {
+ if (*from == '<') {
+ intoken = true;
+ token = "";
+ continue;
+ }
+ else if (*from == '>') { // process tokens
+ intoken = false;
+
+ if ( !strncmp(token.c_str(), variantCompareString, 28)) { //only one of the variants, length of the two strings is 28 in both cases
+ invar = true;
+ hide = true;
+ continue;
+ }
+ if (!strncmp(token.c_str(), "div type=\"variant\"", 18)) {
+ invar = true;
+ continue;
+ }
+ if (!strncmp(token.c_str(), "/div", 4)) {
+ hide = false;
+ if (invar) {
+ invar = false;
+ continue;
+ }
+ }
+ if (!hide) {
+ text += '<';
+ text.append(token);
+ text += '>';
+ }
+
+ continue;
+ }
+ if (intoken) {
+ token += *from;
+ }
+ else if (!hide) {
+ text += *from;
+ }
+ }
+
+ }
+
+ return 0;
+}
+
+
+
+
+
+
+SWORD_NAMESPACE_END
diff --git a/src/modules/filters/thmlwebif.cpp b/src/modules/filters/thmlwebif.cpp
new file mode 100644
index 0000000..7428754
--- /dev/null
+++ b/src/modules/filters/thmlwebif.cpp
@@ -0,0 +1,103 @@
+/***************************************************************************
+ ThMLWEBIF.cpp - ThML to HTML filter with hrefs
+ -------------------
+ begin : 2001-09-03
+ copyright : 2001 by CrossWire Bible Society
+ ***************************************************************************/
+
+/***************************************************************************
+ * *
+ * This program is free software; you can redistribute it and/or modify *
+ * it under the terms of the GNU General Public License as published by *
+ * the Free Software Foundation; either version 2 of the License, or *
+ * (at your option) any later version. *
+ * *
+ ***************************************************************************/
+
+#include <stdlib.h>
+#include <thmlwebif.h>
+#include <swmodule.h>
+#include <url.h>
+#include <utilxml.h>
+#include <ctype.h>
+
+SWORD_NAMESPACE_START
+
+ThMLWEBIF::ThMLWEBIF() : baseURL(""), passageStudyURL(baseURL + "passagestudy.jsp") {
+ //all's done in ThMLHTMLHREF
+}
+
+bool ThMLWEBIF::handleToken(SWBuf &buf, const char *token, BasicFilterUserData *userData) {
+
+ if (!substituteToken(buf, token)) { // manually process if it wasn't a simple substitution
+ MyUserData *u = (MyUserData *)userData;
+ XMLTag tag(token);
+ SWBuf url;
+ if (!strcmp(tag.getName(), "sync")) {
+ const char* value = tag.getAttribute("value");
+ url = value;
+ if ((url.length() > 1) && strchr("GH", url[0])) {
+ if (isdigit(url[1]))
+ url = url.c_str()+1;
+ }
+
+ if(tag.getAttribute("type") && !strcmp(tag.getAttribute("type"), "morph")){
+ buf += "<small><em> (";
+ buf.appendFormatted("<a href=\"%s?showMorph=%s#cv\">", passageStudyURL.c_str(), URL::encode(url).c_str() );
+ }
+ else {
+ if (value) {
+ value++; //skip leading G, H or T
+ //url = value;
+ }
+
+ buf += "<small><em> &lt;";
+ buf.appendFormatted("<a href=\"%s?showStrong=%s#cv\">", passageStudyURL.c_str(), URL::encode(url).c_str() );
+ }
+
+ buf += value;
+ buf += "</a>";
+
+ if (tag.getAttribute("type") && !strcmp(tag.getAttribute("type"), "morph")) {
+ buf += ") </em></small>";
+ }
+ else {
+ buf += "&gt; </em></small>";
+ }
+ }
+ else if (!strcmp(tag.getName(), "scripRef")) {
+ if (tag.isEndTag()) {
+ if (u->inscriptRef) { // like "<scripRef passage="John 3:16">John 3:16</scripRef>"
+ u->inscriptRef = false;
+ buf += "</a>";
+ }
+ else { // end of scripRef like "<scripRef>John 3:16</scripRef>"
+ url = u->lastTextNode;
+ buf.appendFormatted("<a href=\"%s?key=%s#cv\">", passageStudyURL.c_str(), URL::encode(url).c_str());
+ buf += u->lastTextNode.c_str();
+ buf += "</a>";
+
+ // let's let text resume to output again
+ u->suspendTextPassThru = false;
+ }
+ }
+ else if (tag.getAttribute("passage")) { //passage given
+ u->inscriptRef = true;
+
+ buf.appendFormatted("<a href=\"%s?key=%s#cv\">", passageStudyURL.c_str(), URL::encode(tag.getAttribute("passage")).c_str());
+ }
+ else { //no passage given
+ u->inscriptRef = false;
+ // let's stop text from going to output
+ u->suspendTextPassThru = true;
+ }
+ }
+ else {
+ return ThMLHTMLHREF::handleToken(buf,token,userData);
+ }
+ }
+ return true;
+}
+
+
+SWORD_NAMESPACE_END
diff --git a/src/modules/filters/thmlwordjs.cpp b/src/modules/filters/thmlwordjs.cpp
new file mode 100644
index 0000000..ad8eef0
--- /dev/null
+++ b/src/modules/filters/thmlwordjs.cpp
@@ -0,0 +1,296 @@
+/******************************************************************************
+ *
+ * thmlstrongs - SWFilter descendant to hide or show strongs number
+ * in a ThML module.
+ */
+
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <thmlwordjs.h>
+#include <swmodule.h>
+#include <ctype.h>
+#include <utilxml.h>
+#include <utilstr.h>
+#include <versekey.h>
+
+SWORD_NAMESPACE_START
+
+const char oName[] = "Word Javascript";
+const char oTip[] = "Toggles Word Javascript data";
+
+const SWBuf choices[3] = {"Off", "On", ""};
+const StringList oValues(&choices[0], &choices[2]);
+
+
+ThMLWordJS::ThMLWordJS() : SWOptionFilter(oName, oTip, &oValues) {
+ setOptionValue("Off");
+
+ defaultGreekLex = 0;
+ defaultHebLex = 0;
+ defaultGreekParse = 0;
+ defaultHebParse = 0;
+ mgr = 0;
+}
+
+
+ThMLWordJS::~ThMLWordJS() {
+}
+
+
+char ThMLWordJS::processText(SWBuf &text, const SWKey *key, const SWModule *module) {
+ if (option) {
+ char token[2112]; // cheese. Fix.
+ int tokpos = 0;
+ bool intoken = false;
+ bool lastspace = false;
+ int word = 1;
+ char val[128];
+ char *valto;
+ char *ch;
+ char wordstr[5];
+ unsigned int textStart = 0, lastAppendLen = 0, textEnd = 0;
+ SWBuf tmp;
+ bool newText = false;
+ bool needWordOut = false;
+ AttributeValue *wordAttrs = 0;
+ SWBuf modName = (module)?module->Name():"";
+ SWBuf wordSrcPrefix = modName;
+
+ const SWBuf orig = text;
+ const char * from = orig.c_str();
+ VerseKey *vkey = 0;
+ if (key) {
+ vkey = SWDYNAMIC_CAST(VerseKey, key);
+ }
+
+ for (text = ""; *from; from++) {
+ if (*from == '<') {
+ intoken = true;
+ tokpos = 0;
+ token[0] = 0;
+ token[1] = 0;
+ token[2] = 0;
+ textEnd = text.length();
+ continue;
+ }
+ if (*from == '>') { // process tokens
+ intoken = false;
+ if (!strnicmp(token, "sync type=\"Strongs\" ", 20)) { // Strongs
+ valto = val;
+ for (unsigned int i = 27; token[i] != '\"' && i < 150; i++)
+ *valto++ = token[i];
+ *valto = 0;
+ if (atoi((!isdigit(*val))?val+1:val) < 5627) {
+ // normal strongs number
+ sprintf(wordstr, "%03d", word++);
+ needWordOut = (word > 2);
+ wordAttrs = &(module->getEntryAttributes()["Word"][wordstr]);
+ (*wordAttrs)["Strongs"] = val;
+ //printf("Adding: [\"Word\"][%s][\"Strongs\"] = %s\n", wordstr, val);
+ tmp = "";
+ tmp.append(text.c_str()+textStart, (int)(textEnd - textStart));
+ (*wordAttrs)["Text"] = tmp;
+ text.append("</span>");
+ SWBuf ts;
+ ts.appendFormatted("%d", textStart);
+ (*wordAttrs)["TextStart"] = ts;
+ //printf("Adding: [\"Word\"][%s][\"Text\"] = %s\n", wordstr, tmp.c_str());
+ newText = true;
+ }
+ else {
+ // verb morph
+ (*wordAttrs)["Morph"] = val;
+ //printf("Adding: [\"Word\"][%s][\"Morph\"] = %s\n", wordstr, val);
+ }
+
+ }
+ if (!strncmp(token, "sync type=\"morph\"", 17)) {
+ for (ch = token+17; *ch; ch++) {
+ if (!strncmp(ch, "class=\"", 7)) {
+ valto = val;
+ for (unsigned int i = 7; ch[i] != '\"' && i < 127; i++)
+ *valto++ = ch[i];
+ *valto = 0;
+ (*wordAttrs)["MorphClass"] = val;
+ //printf("Adding: [\"Word\"][%s][\"MorphClass\"] = %s\n", wordstr, val);
+ }
+ if (!strncmp(ch, "value=\"", 7)) {
+ valto = val;
+ for (unsigned int i = 7; ch[i] != '\"' && i < 127; i++)
+ *valto++ = ch[i];
+ *valto = 0;
+ (*wordAttrs)["Morph"] = val;
+ //printf("Adding: [\"Word\"][%s][\"Morph\"] = %s\n", wordstr, val);
+ }
+ }
+ newText = true;
+ }
+ // if not a strongs token, keep token in text
+ text += '<';
+ text += token;
+ text += '>';
+ if (needWordOut) {
+ char wstr[10];
+ sprintf(wstr, "%03d", word-2);
+ AttributeValue *wAttrs = &(module->getEntryAttributes()["Word"][wstr]);
+ needWordOut = false;
+ SWBuf strong = (*wAttrs)["Strongs"];
+ SWBuf morph = (*wAttrs)["Morph"];
+ SWBuf morphClass = (*wAttrs)["MorphClass"];
+ SWBuf wordText = (*wAttrs)["Text"];
+ SWBuf textSt = (*wAttrs)["TextStart"];
+ if (strong.size()) {
+ char gh = 0;
+ gh = isdigit(strong[0]) ? 0:strong[0];
+ if (!gh) {
+ if (vkey) {
+ gh = vkey->Testament() ? 'H' : 'G';
+ }
+ }
+ else strong << 1;
+
+ SWModule *sLex = 0;
+ SWModule *sMorph = 0;
+ if (gh == 'G') {
+ sLex = defaultGreekLex;
+ sMorph = defaultGreekParse;
+ }
+ if (gh == 'H') {
+ sLex = defaultHebLex;
+ sMorph = defaultHebParse;
+ }
+ SWBuf lexName = "";
+ if (sLex) {
+ // we can pass the real lex name in, but we have some
+ // aliases in the javascript to optimize bandwidth
+ lexName = sLex->Name();
+ if (lexName == "StrongsGreek")
+ lexName = "G";
+ if (lexName == "StrongsHebrew")
+ lexName = "H";
+ }
+ SWBuf wordID;
+ if (vkey) {
+ // optimize for bandwidth and use only the verse as the unique entry id
+ wordID.appendFormatted("%d", vkey->Verse());
+ }
+ else {
+ wordID = key->getText();
+ }
+ for (unsigned int i = 0; i < wordID.size(); i++) {
+ if ((!isdigit(wordID[i])) && (!isalpha(wordID[i]))) {
+ wordID[i] = '_';
+ }
+ }
+ wordID.appendFormatted("_%s%d", wordSrcPrefix.c_str(), atoi(wstr));
+ if (textSt.size()) {
+ int textStr = atoi(textSt.c_str());
+ textStr += lastAppendLen;
+ SWBuf spanStart = "";
+
+
+
+/*
+ if (sMorph) {
+ SWBuf popMorph = "<a onclick=\"";
+ popMorph.appendFormatted("p(\'%s\',\'%s\','%s','');\" >%s</a>", sMorph->Name(), morph.c_str(), wordID.c_str(), morph.c_str());
+ morph = popMorph;
+ }
+*/
+
+ // 'p' = 'fillpop' to save bandwidth
+ const char *m = strchr(morph.c_str(), ':');
+ if (m) m++;
+ else m = morph.c_str();
+ spanStart.appendFormatted("<span class=\"clk\" onclick=\"p('%s','%s','%s','%s','','%s');\" >", lexName.c_str(), strong.c_str(), wordID.c_str(), m, modName.c_str());
+ text.insert(textStr, spanStart);
+ lastAppendLen = spanStart.length();
+ }
+ }
+
+ }
+ if (newText) {
+ textStart = text.length(); newText = false;
+ }
+ continue;
+ }
+ if (intoken) {
+ if (tokpos < 2045)
+ token[tokpos++] = *from;
+ token[tokpos+2] = 0;
+ }
+ else {
+ text += *from;
+ lastspace = (*from == ' ');
+ }
+ }
+
+ char wstr[10];
+ sprintf(wstr, "%03d", word-1);
+ AttributeValue *wAttrs = &(module->getEntryAttributes()["Word"][wstr]);
+ needWordOut = false;
+ SWBuf strong = (*wAttrs)["Strongs"];
+ SWBuf morph = (*wAttrs)["Morph"];
+ SWBuf morphClass = (*wAttrs)["MorphClass"];
+ SWBuf wordText = (*wAttrs)["Text"];
+ SWBuf textSt = (*wAttrs)["TextStart"];
+ if (strong.size()) {
+ char gh = 0;
+ gh = isdigit(strong[0]) ? 0:strong[0];
+ if (!gh) {
+ if (vkey) {
+ gh = vkey->Testament() ? 'H' : 'G';
+ }
+ }
+ else strong << 1;
+
+ SWModule *sLex = 0;
+ if (gh == 'G') {
+ sLex = defaultGreekLex;
+ }
+ if (gh == 'H') {
+ sLex = defaultHebLex;
+ }
+ SWBuf lexName = "";
+ if (sLex) {
+ // we can pass the real lex name in, but we have some
+ // aliases in the javascript to optimize bandwidth
+ lexName = sLex->Name();
+ if (lexName == "StrongsGreek")
+ lexName = "G";
+ if (lexName == "StrongsHebrew")
+ lexName = "H";
+ }
+ SWBuf wordID;
+ if (vkey) {
+ // optimize for bandwidth and use only the verse as the unique entry id
+ wordID.appendFormatted("%d", vkey->Verse());
+ }
+ else {
+ wordID = key->getText();
+ }
+ for (unsigned int i = 0; i < wordID.size(); i++) {
+ if ((!isdigit(wordID[i])) && (!isalpha(wordID[i]))) {
+ wordID[i] = '_';
+ }
+ }
+ wordID.appendFormatted("_%s%d", wordSrcPrefix.c_str(), atoi(wstr));
+ if (textSt.size()) {
+ int textStr = atoi(textSt.c_str());
+ textStr += lastAppendLen;
+ SWBuf spanStart = "";
+ // 'p' = 'fillpop' to save bandwidth
+ const char *m = strchr(morph.c_str(), ':');
+ if (m) m++;
+ else m = morph.c_str();
+ spanStart.appendFormatted("<span class=\"clk\" onclick=\"p('%s','%s','%s','%s','','%s');\" >", lexName.c_str(), strong.c_str(), wordID.c_str(), m, modName.c_str());
+ text.insert(textStr, spanStart);
+ }
+ }
+ }
+
+ return 0;
+}
+
+SWORD_NAMESPACE_END
diff --git a/src/modules/filters/unicodertf.cpp b/src/modules/filters/unicodertf.cpp
new file mode 100644
index 0000000..8c2a1f6
--- /dev/null
+++ b/src/modules/filters/unicodertf.cpp
@@ -0,0 +1,87 @@
+/******************************************************************************
+ *
+ * unicodertf - SWFilter descendant to convert a double byte unicode file
+ * to RTF tags
+ */
+
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <unicodertf.h>
+#include <swbuf.h>
+
+SWORD_NAMESPACE_START
+
+UnicodeRTF::UnicodeRTF() {
+}
+
+
+char UnicodeRTF::processText(SWBuf &text, const SWKey *key, const SWModule *module)
+{
+ const unsigned char *from;
+ char digit[10];
+ unsigned long ch;
+ signed short utf16;
+ unsigned char from2[7];
+
+ SWBuf orig = text;
+
+ from = (const unsigned char *)orig.c_str();
+
+ // -------------------------------
+ for (text = ""; *from; from++) {
+ ch = 0;
+ //case: ANSI
+ if ((*from & 128) != 128) {
+ text += *from;
+ continue;
+ }
+ //case: Invalid UTF-8 (illegal continuing byte in initial position)
+ if ((*from & 128) && ((*from & 64) != 64)) {
+ continue;
+ }
+ //case: 2+ byte codepoint
+ from2[0] = *from;
+ from2[0] <<= 1;
+ int subsequent;
+ for (subsequent = 1; (from2[0] & 128) && (subsequent < 7); subsequent++) {
+ from2[0] <<= 1;
+ from2[subsequent] = from[subsequent];
+ from2[subsequent] &= 63;
+ ch <<= 6;
+ ch |= from2[subsequent];
+ }
+ subsequent--;
+ from2[0] <<= 1;
+ char significantFirstBits = 8 - (2+subsequent);
+
+ ch |= (((short)from2[0]) << (((6*subsequent)+significantFirstBits)-8));
+ from += subsequent;
+ if (ch < 0x10000) {
+ utf16 = (signed short)ch;
+ text += '\\';
+ text += 'u';
+ sprintf(digit, "%d", utf16);
+ text += digit;
+ text += '?';
+ }
+ else {
+ utf16 = (signed short)((ch - 0x10000) / 0x400 + 0xD800);
+ text += '\\';
+ text += 'u';
+ sprintf(digit, "%d", utf16);
+ text += digit;
+ text += '?';
+ utf16 = (signed short)((ch - 0x10000) % 0x400 + 0xDC00);
+ text += '\\';
+ text += 'u';
+ sprintf(digit, "%d", utf16);
+ text += digit;
+ text += '?';
+ }
+ }
+
+ return 0;
+}
+
+SWORD_NAMESPACE_END
diff --git a/src/modules/filters/utf16utf8.cpp b/src/modules/filters/utf16utf8.cpp
new file mode 100644
index 0000000..ae0845f
--- /dev/null
+++ b/src/modules/filters/utf16utf8.cpp
@@ -0,0 +1,90 @@
+/******************************************************************************
+ *
+ * UTF16UTF8 - SWFilter descendant to convert UTF-16 to UTF-8
+ *
+ */
+
+#include <stdlib.h>
+#include <stdio.h>
+
+#include <utf16utf8.h>
+#include <swbuf.h>
+
+SWORD_NAMESPACE_START
+
+UTF16UTF8::UTF16UTF8() {
+}
+
+
+char UTF16UTF8::processText(SWBuf &text, const SWKey *key, const SWModule *module)
+{
+ unsigned short *from;
+
+ int len;
+ unsigned long uchar;
+ unsigned short schar;
+ len = 0;
+ from = (unsigned short*) text.c_str();
+ while (*from) {
+ len += 2;
+ from++;
+ }
+
+ SWBuf orig = text;
+ from = (unsigned short*)orig.c_str();
+
+
+ // -------------------------------
+
+ for (text = ""; *from; from++) {
+ uchar = 0;
+
+ if (*from < 0xD800 || *from > 0xDFFF) {
+ uchar = *from;
+ }
+ else if (*from >= 0xD800 && *from <= 0xDBFF) {
+ uchar = *from;
+ schar = *(from+1);
+ if (uchar < 0xDC00 || uchar > 0xDFFF) {
+ //error, do nothing
+ continue;
+ }
+ uchar &= 0x03ff;
+ schar &= 0x03ff;
+ uchar <<= 10;
+ uchar |= schar;
+ uchar += 0x10000;
+ from++;
+ }
+ else {
+ //error, do nothing
+ continue;
+ }
+
+ if (uchar < 0x80) {
+ text += uchar;
+ }
+ else if (uchar < 0x800) {
+ text += 0xc0 | (uchar >> 6);
+ text += 0x80 | (uchar & 0x3f);
+ }
+ else if (uchar < 0x10000) {
+ text += 0xe0 | (uchar >> 12);
+ text += 0x80 | ((uchar >> 6) & 0x3f);
+ text += 0x80 | (uchar & 0x3f);
+ }
+ else if (uchar < 0x200000) {
+ text += 0xF0 | (uchar >> 18);
+ text += 0x80 | ((uchar >> 12) & 0x3F);
+ text += 0x80 | ((uchar >> 6) & 0x3F);
+ text += 0x80 | (uchar & 0x3F);
+ }
+ }
+
+ return 0;
+}
+
+
+
+
+SWORD_NAMESPACE_END
diff --git a/src/modules/filters/utf8arshaping.cpp b/src/modules/filters/utf8arshaping.cpp
new file mode 100644
index 0000000..702fb62
--- /dev/null
+++ b/src/modules/filters/utf8arshaping.cpp
@@ -0,0 +1,51 @@
+/******************************************************************************
+*
+* utf8arshaping - SWFilter descendant to perform Arabic shaping on
+* UTF-8 text
+*/
+
+#ifdef _ICU_
+
+#include <stdlib.h>
+
+#include <utilstr.h>
+
+#include <utf8arshaping.h>
+#include <swbuf.h>
+
+SWORD_NAMESPACE_START
+
+UTF8arShaping::UTF8arShaping() {
+ conv = ucnv_open("UTF-8", &err);
+}
+
+UTF8arShaping::~UTF8arShaping() {
+ ucnv_close(conv);
+}
+
+char UTF8arShaping::processText(SWBuf &text, const SWKey *key, const SWModule *module)
+{
+ UChar *ustr, *ustr2;
+ if ((unsigned long)key < 2) // hack, we're en(1)/de(0)ciphering
+ return -1;
+
+ int32_t len = text.length();
+ ustr = new UChar[len];
+ ustr2 = new UChar[len];
+
+ // Convert UTF-8 string to UTF-16 (UChars)
+ len = ucnv_toUChars(conv, ustr, len, text.c_str(), -1, &err);
+
+ len = u_shapeArabic(ustr, len, ustr2, len, U_SHAPE_LETTERS_SHAPE | U_SHAPE_DIGITS_EN2AN, &err);
+
+ text.setSize(text.size()*2);
+ len = ucnv_fromUChars(conv, text.getRawData(), text.size(), ustr2, len, &err);
+ text.setSize(len);
+
+ delete [] ustr2;
+ delete [] ustr;
+ return 0;
+}
+
+SWORD_NAMESPACE_END
+#endif
diff --git a/src/modules/filters/utf8bidireorder.cpp b/src/modules/filters/utf8bidireorder.cpp
new file mode 100644
index 0000000..783602c
--- /dev/null
+++ b/src/modules/filters/utf8bidireorder.cpp
@@ -0,0 +1,60 @@
+/******************************************************************************
+*
+* utf8cnormalizer - SWFilter descendant to perform reordering of UTF-8
+* text to visual order according to Unicode BiDi
+*/
+
+#ifdef _ICU_
+
+#include <stdlib.h>
+
+#include <utilstr.h>
+
+#include <utf8bidireorder.h>
+#include <swbuf.h>
+
+SWORD_NAMESPACE_START
+
+UTF8BiDiReorder::UTF8BiDiReorder() {
+
+ conv = ucnv_open("UTF-8", &err);
+
+}
+
+UTF8BiDiReorder::~UTF8BiDiReorder() {
+ ucnv_close(conv);
+}
+
+char UTF8BiDiReorder::processText(SWBuf &text, const SWKey *key, const SWModule *module)
+{
+ UChar *ustr, *ustr2;
+ if ((unsigned long)key < 2) // hack, we're en(1)/de(0)ciphering
+ return -1;
+
+ int32_t len = text.length();
+ ustr = new UChar[len]; //each char could become a surrogate pair
+
+ // Convert UTF-8 string to UTF-16 (UChars)
+ len = ucnv_toUChars(conv, ustr, len, text.c_str(), -1, &err);
+ ustr2 = new UChar[len];
+
+ UBiDi* bidi = ubidi_openSized(len + 1, 0, &err);
+ ubidi_setPara(bidi, ustr, len, UBIDI_DEFAULT_RTL, NULL, &err);
+ len = ubidi_writeReordered(bidi, ustr2, len,
+ UBIDI_DO_MIRRORING | UBIDI_REMOVE_BIDI_CONTROLS, &err);
+ ubidi_close(bidi);
+
+// len = ubidi_writeReverse(ustr, len, ustr2, len,
+// UBIDI_DO_MIRRORING | UBIDI_REMOVE_BIDI_CONTROLS, &err);
+
+ text.setSize(text.size()*2);
+ len = ucnv_fromUChars(conv, text.getRawData(), text.size(), ustr2, len, &err);
+ text.setSize(len);
+
+ delete [] ustr2;
+ delete [] ustr;
+ return 0;
+}
+
+SWORD_NAMESPACE_END
+#endif
diff --git a/src/modules/filters/utf8cantillation.cpp b/src/modules/filters/utf8cantillation.cpp
new file mode 100644
index 0000000..6213620
--- /dev/null
+++ b/src/modules/filters/utf8cantillation.cpp
@@ -0,0 +1,55 @@
+/******************************************************************************
+ *
+ * UTF8Cantillation - SWFilter descendant to remove UTF-8 Hebrew cantillation
+ *
+ */
+
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <utf8cantillation.h>
+
+SWORD_NAMESPACE_START
+
+const char oName[] = "Hebrew Cantillation";
+const char oTip[] = "Toggles Hebrew Cantillation Marks";
+
+const SWBuf choices[3] = {"On", "Off", ""};
+const StringList oValues(&choices[0], &choices[2]);
+
+UTF8Cantillation::UTF8Cantillation() : SWOptionFilter(oName, oTip, &oValues) {
+ setOptionValue("Off");
+}
+
+
+UTF8Cantillation::~UTF8Cantillation(){};
+
+
+char UTF8Cantillation::processText(SWBuf &text, const SWKey *key, const SWModule *module) {
+ if (!option) {
+ //The UTF-8 range 0xD6 0x90 to 0xD6 0xAF and 0xD7 0x84 consist of Hebrew cantillation marks so block those out.
+ SWBuf orig = text;
+ const unsigned char* from = (unsigned char*)orig.c_str();
+ for (text = ""; *from; from++) {
+ if (*from != 0xD6) {
+ if (*from == 0xD7 && *(from + 1) == 0x84) {
+ from++;
+ }
+ else {
+ text += *from;
+ }
+ }
+ else if (*(from + 1) < 0x90 || *(from + 1) > 0xAF) {
+ text += *from;
+ from++;
+ text += *from;
+ }
+ else {
+ from++;
+ }
+ }
+ }
+ return 0;
+}
+
+SWORD_NAMESPACE_END
diff --git a/src/modules/filters/utf8greekaccents.cpp b/src/modules/filters/utf8greekaccents.cpp
new file mode 100644
index 0000000..df85968
--- /dev/null
+++ b/src/modules/filters/utf8greekaccents.cpp
@@ -0,0 +1,261 @@
+/******************************************************************************
+ *
+ * UTF8GreekAccents - SWFilter descendant to remove UTF-8 Greek accents
+ *
+ */
+
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <utf8greekaccents.h>
+
+
+#ifdef _ICU_
+#include <utf8nfkd.h>
+sword::UTF8NFKD decompose;
+#endif
+
+SWORD_NAMESPACE_START
+
+const char oName[] = "Greek Accents";
+const char oTip[] = "Toggles Greek Accents";
+
+const SWBuf choices[3] = {"On", "Off", ""};
+const StringList oValues(&choices[0], &choices[2]);
+
+UTF8GreekAccents::UTF8GreekAccents() : SWOptionFilter(oName, oTip, &oValues) {
+ setOptionValue("On");
+}
+
+UTF8GreekAccents::~UTF8GreekAccents(){};
+
+
+char UTF8GreekAccents::processText(SWBuf &text, const SWKey *key, const SWModule *module) {
+
+ if (!option) { //we don't want greek accents
+ //unsigned char *to, *from;
+ //to = (unsigned char*)text;
+ //for (from = (unsigned char*)text; *from; from++) {
+#ifdef _ICU_
+ decompose.processText(text, (SWKey *)2); // note the hack of 2 to mimic a real key. TODO: remove all hacks
+#endif
+
+ SWBuf orig = text;
+ const unsigned char* from = (unsigned char*)orig.c_str();
+ for (text = ""; *from; from++) {
+ //first just remove combining characters
+ if (*from == 0xE2 && *(from + 1) == 0x80 && *(from + 2) == 0x99) {
+ from += 2;
+ }
+ else if (*from == 0xCC && *(from + 1)) {
+ if (*(from + 1) == 0x80 || *(from + 1) == 0x81 || *(from + 1) == 0x82 || *(from + 1) == 0x88 || *(from + 1) == 0x93 || *(from + 1) == 0x94) {
+ from++;
+ }
+ }
+ else if (*from == 0xCD && (*(from + 1) == 0xBA || *(from + 1) == 0x82)) {
+ from++;
+ }
+ //now converted pre-composed characters to their alphabetic bases, discarding the accents
+
+ //Greek
+ //capital alpha
+ else if ((*from == 0xCE && *(from + 1) == 0x86)) {
+ text += 0xCE;
+ text += 0x91;
+ from++;
+ }
+ //capital epsilon
+ else if ((*from == 0xCE && *(from + 1) == 0x88)) {
+ text += 0xCE;
+ text += 0x95;
+ from++;
+ }
+ //capital eta
+ else if ((*from == 0xCE && *(from + 1) == 0x89)) {
+ text += 0xCE;
+ text += 0x97;
+ from++;
+ }
+ //capital iota
+ else if ((*from == 0xCE && (*(from + 1) == 0x8A || *(from + 1) == 0xAA))) {
+ text += 0xCE;
+ text += 0x99;
+ from++;
+ }
+ //capital omicron
+ else if ((*from == 0xCE && *(from + 1) == 0x8C)) {
+ text += 0xCE;
+ text += 0x9F;
+ from++;
+ }
+ //capital upsilon
+ else if ((*from == 0xCE && (*(from + 1) == 0x8E || *(from + 1) == 0xAB))) {
+ text += 0xCE;
+ text += 0xA5;
+ from++;
+ }
+ //capital omega
+ else if ((*from == 0xCE && *(from + 1) == 0x8F)) {
+ text += 0xCE;
+ text += 0xA9;
+ from++;
+ }
+
+ //alpha
+ else if ((*from == 0xCE && *(from + 1) == 0xAC)) {
+ text += 0xCE;
+ text += 0xB1;
+ from++;
+ }
+ //epsilon
+ else if ((*from == 0xCE && *(from + 1) == 0xAD)) {
+ text += 0xCE;
+ text += 0xB5;
+ from++;
+ }
+ //eta
+ else if ((*from == 0xCE && *(from + 1) == 0xAE)) {
+ text += 0xCE;
+ text += 0xB7;
+ from++;
+ }
+ //iota
+ else if ((*from == 0xCE && *(from + 1) == 0xAF) || (*from == 0xCF && *(from + 1) == 0x8A)) {
+ text += 0xCE;
+ text += 0xB9;
+ from++;
+ }
+ //omicron
+ else if ((*from == 0xCF && *(from + 1) == 0x8C)) {
+ text += 0xCE;
+ text += 0xBF;
+ from++;
+ }
+ //upsilon
+ else if ((*from == 0xCE && *(from + 1) == 0x88) || (*from == 0xCF && (*(from + 1) == 0x8B || *(from + 1) == 0x8D))) {
+ text += 0xCF;
+ text += 0x85;
+ from++;
+ }
+ //omega
+ else if ((*from == 0xCF && *(from + 1) == 0x8E)) {
+ text += 0xCF;
+ text += 0x89;
+ from++;
+ }
+
+ //Extended Greek
+ //capital alpha
+ else if (*from == 0xE1 && (((*(from + 1) == 0xBC || *(from + 1) == 0xBE) && *(from + 2) >= 0x88 && *(from + 2) <= 0x8F) || (*(from + 1) == 0xBE && *(from + 2) >= 0xB8 && *(from + 2) <= 0xBC))) {
+ text += 0xCE;
+ text += 0x91;
+ from+=2;
+ }
+ //capital epsilon
+ else if (*from == 0xE1 && ((*(from + 1) == 0xBC && *(from + 2) >= 0x98 && *(from + 2) <= 0x9D) || (*(from + 1) == 0xBF && (*(from + 2) == 0x88 || *(from + 2) == 0x89)))) {
+ text += 0xCE;
+ text += 0x95;
+ from+=2;
+ }
+ //capital eta
+ else if (*from == 0xE1 && ((*(from + 1) == 0xBC && *(from + 2) >= 0xA8 && *(from + 2) <= 0xAF) || (*(from + 1) == 0xBE && *(from + 2) >= 0x98 && *(from + 2) <= 0x9F) || (*(from + 1) == 0xBF && *(from + 2) >= 0x8A && *(from + 2) <= 0x8C))) {
+ text += 0xCE;
+ text += 0x97;
+ from+=2;
+ }
+ //capital iota
+ else if (*from == 0xE1 && ((*(from + 1) == 0xBC && *(from + 2) >= 0xB8 && *(from + 2) <= 0xBF) || (*(from + 1) == 0xBF && *(from + 2) >= 0x98 && *(from + 2) <= 0x9B))) {
+ text += 0xCE;
+ text += 0x99;
+ from+=2;
+ }
+ //capital omicron
+ else if (*from == 0xE1 && (((*(from + 1) == 0xBD && *(from + 2) >= 0x88 && *(from + 2) <= 0x8D)) || ((*(from + 1) == 0xBF && (*(from + 2) == 0xB8 || *(from + 2) == 0xB9))))) {
+ text += 0xCE;
+ text += 0x9F;
+ from+=2;
+ }
+ //capital upsilon
+ else if (*from == 0xE1 && ((*(from + 1) == 0xBD && *(from + 2) >= 0x99 && *(from + 2) <= 0x9F) || (*(from + 1) == 0xBF && *(from + 2) >= 0xA8 && *(from + 2) <= 0xAB))) {
+ text += 0xCE;
+ text += 0xA5;
+ from+=2;
+ }
+ //capital omega
+ else if (*from == 0xE1 && (((*(from + 1) == 0xBD || *(from + 1) == 0xBE) && *(from + 2) >= 0xA8 && *(from + 2) <= 0xAF) || (*(from + 1) == 0xBF && *(from + 2) >= 0xBA && *(from + 2) <= 0xBC))) {
+ text += 0xCE;
+ text += 0xA9;
+ from+=2;
+ }
+ //capital rho
+ else if (*from == 0xE1 && *(from + 1) == 0xBF && *(from + 2) == 0xAC) {
+ text += 0xCE;
+ text += 0xA1;
+ from+=2;
+ }
+
+ //alpha
+ else if (*from == 0xE1 && (
+ ((*(from + 1) == 0xBC || *(from + 1) == 0xBE) && *(from + 2) >= 0x80 && *(from + 2) <= 0x87)
+ || (*(from + 1) == 0xBD && (*(from + 2) == 0xB0 || *(from + 2) == 0xB1))
+ || (*(from + 1) == 0xBE && *(from + 2) >= 0xB0 && *(from + 2) <= 0xB7))) {
+ text += 0xCE;
+ text += 0xB1;
+ from+=2;
+ }
+ //epsilon
+ else if (*from == 0xE1 && ((*(from + 1) == 0xBC && *(from + 2) >= 0x90 && *(from + 2) <= 0x95) || (*(from + 1) == 0xBD && (*(from + 2) == 0xB2 || *(from + 2) == 0xB3)))) {
+ text += 0xCE;
+ text += 0xB5;
+ from+=2;
+ }
+ //eta
+ else if (*from == 0xE1 && ((*(from + 1) == 0xBE && *(from + 2) >= 0x90 && *(from + 2) <= 0x97) || (*(from + 1) == 0xBC && *(from + 2) >= 0xA0 && *(from + 2) <= 0xA7) || (*(from + 1) == 0xBF && *(from + 2) >= 0x82 && *(from + 2) <= 0x87) || (*(from + 1) == 0xBD && (*(from + 2) == 0xB4 || *(from + 2) == 0xB5)))) {
+ text += 0xCE;
+ text += 0xB7;
+ from+=2;
+ }
+ //iota
+ else if (*from == 0xE1 && ((*(from + 1) == 0xBC && *(from + 2) >= 0xB0 && *(from + 2) <= 0xB7) || (*(from + 1) == 0xBD && (*(from + 2) == 0xB6 || *(from + 2) == 0xB7)) || (*(from + 1) == 0xBF && *(from + 2) >= 0x90 && *(from + 2) <= 0x97))) {
+ text += 0xCE;
+ text += 0xB9;
+ from+=2;
+ }
+ //omicron
+ else if (*from == 0xE1 && (*(from + 1) == 0xBD && ((*(from + 2) >= 0x80 && *(from + 2) <= 0x85) || (*(from + 2) == 0xB8 || *(from + 2) == 0xB9)))) {
+ text += 0xCE;
+ text += 0xBF;
+ from+=2;
+ }
+ //upsilon
+ else if (*from == 0xE1 && ((*(from + 1) == 0xBD && ((*(from + 2) >= 0x90 && *(from + 2) <= 0x97) || *(from + 2) == 0xBA || *(from + 2) == 0xBB)) || (*(from + 1) == 0xBF && ((*(from + 2) >= 0xA0 && *(from + 2) <= 0xA3) || *(from + 2) == 0xA6 || *(from + 2) == 0xA7)))) {
+ text += 0xCF;
+ text += 0x85;
+ from+=2;
+ }
+ //omega
+ else if (*from == 0xE1 && ((*(from + 1) == 0xBD && ((*(from + 2) >= 0xA0 && *(from + 2) <= 0xA7) || (*(from + 2) == 0xBC || *(from + 2) == 0xBD))) || (*(from + 1) == 0xBE && (*(from + 2) >= 0xA0 && *(from + 2) <= 0xA7)) || (*(from + 1) == 0xBF && *(from + 2) >= 0xB2 && *(from + 2) <= 0xB7))) {
+ text += 0xCF;
+ text += 0x89;
+ from+=2;
+ }
+ //rho
+ else if (*from == 0xE1 && *(from + 1) == 0xBF && (*(from + 2) == 0xA4 && *(from + 2) == 0xA5)) {
+ text += 0xCF;
+ text += 0x81;
+ from+=2;
+ }
+ else { //no characters we filter
+ text += *from;
+ }
+ }
+ }
+ return 0;
+}
+
+
+
+
+
+
+SWORD_NAMESPACE_END
diff --git a/src/modules/filters/utf8hebrewpoints.cpp b/src/modules/filters/utf8hebrewpoints.cpp
new file mode 100644
index 0000000..0476db8
--- /dev/null
+++ b/src/modules/filters/utf8hebrewpoints.cpp
@@ -0,0 +1,44 @@
+/******************************************************************************
+ *
+ * UTF8HebrewPoints - SWFilter descendant to remove UTF-8 Hebrew vowel points
+ *
+ */
+
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <utf8hebrewpoints.h>
+
+SWORD_NAMESPACE_START
+
+const char oName[] = "Hebrew Vowel Points";
+const char oTip[] = "Toggles Hebrew Vowel Points";
+
+const SWBuf choices[3] = {"On", "Off", ""};
+const StringList oValues(&choices[0], &choices[2]);
+
+UTF8HebrewPoints::UTF8HebrewPoints() : SWOptionFilter(oName, oTip, &oValues) {
+ setOptionValue("On");
+}
+
+UTF8HebrewPoints::~UTF8HebrewPoints(){};
+
+
+char UTF8HebrewPoints::processText(SWBuf &text, const SWKey *key, const SWModule *module) {
+ if (!option) {
+ //The UTF-8 range 0xD6 0xB0 to 0xD6 0xBF excluding 0xD6 0x consist of Hebrew cantillation marks so block those out.
+ SWBuf orig = text;
+ const unsigned char* from = (unsigned char*)orig.c_str();
+ for (text = ""; *from; from++) {
+ if ((*from == 0xD6) && (*(from + 1) >= 0xB0 && *(from + 1) <= 0xBF) && (*(from + 1) != 0xBE)) {
+ from++;
+ }
+ else {
+ text += *from;
+ }
+ }
+ }
+ return 0;
+}
+
+SWORD_NAMESPACE_END
diff --git a/src/modules/filters/utf8html.cpp b/src/modules/filters/utf8html.cpp
new file mode 100644
index 0000000..088f669
--- /dev/null
+++ b/src/modules/filters/utf8html.cpp
@@ -0,0 +1,70 @@
+/******************************************************************************
+ *
+ * utf8html - SWFilter descendant to convert a UTF-8 stream to HTML escapes
+ *
+ */
+
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <utf8html.h>
+#include <swbuf.h>
+
+SWORD_NAMESPACE_START
+
+UTF8HTML::UTF8HTML() {
+}
+
+
+char UTF8HTML::processText(SWBuf &text, const SWKey *key, const SWModule *module)
+{
+ unsigned char *from;
+ int len;
+ char digit[10];
+ unsigned long ch;
+ if ((unsigned long)key < 2) // hack, we're en(1)/de(0)ciphering
+ return (char)-1;
+
+ len = strlen(text.c_str()) + 2; // shift string to right of buffer
+
+ SWBuf orig = text;
+ from = (unsigned char *)orig.c_str();
+
+ // -------------------------------
+ for (text = ""; *from; from++) {
+ ch = 0;
+ if ((*from & 128) != 128) {
+// if (*from != ' ')
+ text += *from;
+ continue;
+ }
+ if ((*from & 128) && ((*from & 64) != 64)) {
+ // error
+ *from = 'x';
+ continue;
+ }
+ *from <<= 1;
+ int subsequent;
+ for (subsequent = 1; (*from & 128); subsequent++) {
+ *from <<= 1;
+ from[subsequent] &= 63;
+ ch <<= 6;
+ ch |= from[subsequent];
+ }
+ subsequent--;
+ *from <<=1;
+ char significantFirstBits = 8 - (2+subsequent);
+
+ ch |= (((short)*from) << (((6*subsequent)+significantFirstBits)-8));
+ from += subsequent;
+ text += '&';
+ text += '#';
+ sprintf(digit, "%ld", ch);
+ for (char *dig = digit; *dig; dig++)
+ text += *dig;
+ text += ';';
+ }
+ return 0;
+}
+
+SWORD_NAMESPACE_END
diff --git a/src/modules/filters/utf8latin1.cpp b/src/modules/filters/utf8latin1.cpp
new file mode 100644
index 0000000..08b288d
--- /dev/null
+++ b/src/modules/filters/utf8latin1.cpp
@@ -0,0 +1,75 @@
+/******************************************************************************
+ *
+ * UTF8Latin1 - SWFilter descendant to convert UTF-8 to Latin-1
+ *
+ */
+
+#include <stdlib.h>
+#include <stdio.h>
+
+#include <utf8latin1.h>
+#include <swbuf.h>
+
+SWORD_NAMESPACE_START
+
+UTF8Latin1::UTF8Latin1(char rchar) : replacementChar(rchar) {
+}
+
+
+char UTF8Latin1::processText(SWBuf &text, const SWKey *key, const SWModule *module)
+{
+ unsigned char *from;
+
+ int len;
+ unsigned long uchar;
+ unsigned char significantFirstBits, subsequent;
+
+ if ((unsigned long)key < 2) {// hack, we're en(1)/de(0)ciphering
+ return (char)-1;
+ }
+ len = strlen(text.c_str()) + 1; // shift string to right of buffer
+
+ SWBuf orig = text;
+ from = (unsigned char*)orig.c_str();
+
+
+ // -------------------------------
+
+ for (text = ""; *from; from++) {
+ uchar = 0;
+ if ((*from & 128) != 128) {
+ // if (*from != ' ')
+ uchar = *from;
+ }
+ else if ((*from & 128) && ((*from & 64) != 64)) {
+ // error, do nothing
+ continue;
+ }
+ else {
+ *from <<= 1;
+ for (subsequent = 1; (*from & 128); subsequent++) {
+ *from <<= 1;
+ from[subsequent] &= 63;
+ uchar <<= 6;
+ uchar |= from[subsequent];
+ }
+ subsequent--;
+ *from <<=1;
+ significantFirstBits = 8 - (2+subsequent);
+
+ uchar |= (((short)*from) << (((6*subsequent)+significantFirstBits)-8));
+ from += subsequent;
+ }
+
+ if (uchar < 0xff) {
+ text += (unsigned char)uchar;
+ }
+ else {
+ text += replacementChar;
+ }
+ }
+ return 0;
+}
+
+SWORD_NAMESPACE_END
+
diff --git a/src/modules/filters/utf8nfc.cpp b/src/modules/filters/utf8nfc.cpp
new file mode 100644
index 0000000..15b76b5
--- /dev/null
+++ b/src/modules/filters/utf8nfc.cpp
@@ -0,0 +1,50 @@
+/******************************************************************************
+*
+* utf8nfc - SWFilter descendant to perform NFC (canonical composition
+* normalization) on UTF-8 text
+*/
+
+#ifdef _ICU_
+
+#include <stdlib.h>
+
+#include <utilstr.h>
+#include <unicode/unistr.h>
+#include <unicode/normlzr.h>
+#include <unicode/unorm.h>
+
+#include <utf8nfc.h>
+#include <swbuf.h>
+
+SWORD_NAMESPACE_START
+
+UTF8NFC::UTF8NFC() {
+ conv = ucnv_open("UTF-8", &err);
+}
+
+UTF8NFC::~UTF8NFC() {
+ ucnv_close(conv);
+}
+
+char UTF8NFC::processText(SWBuf &text, const SWKey *key, const SWModule *module)
+{
+ if ((unsigned long)key < 2) // hack, we're en(1)/de(0)ciphering
+ return -1;
+
+ UErrorCode status = U_ZERO_ERROR;
+ UnicodeString source(text.getRawData(), text.length(), conv, status);
+ UnicodeString target;
+
+ status = U_ZERO_ERROR;
+ Normalizer::normalize(source, UNORM_NFC, 0, target, status);
+
+ status = U_ZERO_ERROR;
+ text.setSize(text.size()*2); // potentially, it can grow to 2x the original size
+ int32_t len = target.extract(text.getRawData(), text.size(), conv, status);
+ text.setSize(len);
+
+ return 0;
+}
+
+SWORD_NAMESPACE_END
+#endif
diff --git a/src/modules/filters/utf8nfkd.cpp b/src/modules/filters/utf8nfkd.cpp
new file mode 100644
index 0000000..a19d36b
--- /dev/null
+++ b/src/modules/filters/utf8nfkd.cpp
@@ -0,0 +1,52 @@
+/******************************************************************************
+*
+* utf8nfkd - SWFilter descendant to perform NFKD (compatability decomposition
+* normalization) on UTF-8 text
+*/
+
+#ifdef _ICU_
+
+#include <stdlib.h>
+
+#include <utilstr.h>
+
+#include <utf8nfkd.h>
+#include <swbuf.h>
+
+SWORD_NAMESPACE_START
+
+UTF8NFKD::UTF8NFKD() {
+ conv = ucnv_open("UTF-8", &err);
+}
+
+UTF8NFKD::~UTF8NFKD() {
+ ucnv_close(conv);
+}
+
+char UTF8NFKD::processText(SWBuf &text, const SWKey *key, const SWModule *module)
+{
+ if ((unsigned long)key < 2) // hack, we're en(1)/de(0)ciphering
+ return -1;
+
+ int32_t len = 5 + text.length() * 5;
+ source = new UChar[len + 1]; //each char could become a surrogate pair
+
+ // Convert UTF-8 string to UTF-16 (UChars)
+ int32_t ulen = ucnv_toUChars(conv, source, len, text.c_str(), -1, &err);
+ target = new UChar[len + 1];
+
+ //compatability decomposition
+ ulen = unorm_normalize(source, ulen, UNORM_NFKD, 0, target, len, &err);
+
+ text.setSize(len);
+ len = ucnv_fromUChars(conv, text.getRawData(), len, target, ulen, &err);
+ text.setSize(len);
+
+ delete [] source;
+ delete [] target;
+
+ return 0;
+}
+
+SWORD_NAMESPACE_END
+#endif
diff --git a/src/modules/filters/utf8transliterator.cpp b/src/modules/filters/utf8transliterator.cpp
new file mode 100644
index 0000000..d99741b
--- /dev/null
+++ b/src/modules/filters/utf8transliterator.cpp
@@ -0,0 +1,888 @@
+/******************************************************************************
+*
+* utf8transliterators - SWFilter descendant to transliterate between
+* ICU-supported scripts.
+*/
+
+#ifdef _ICU_
+
+#include <stdlib.h>
+
+#include <utilstr.h>
+
+#include <unicode/ucnv.h>
+#include <unicode/uchar.h>
+#include <utf8transliterator.h>
+#include <swmodule.h>
+
+#ifndef _ICUSWORD_
+#include "unicode/resbund.h"
+#endif
+#include <swlog.h>
+
+SWORD_NAMESPACE_START
+
+const char UTF8Transliterator::optionstring[NUMTARGETSCRIPTS][16] = {
+ "Off",
+ "Latin",
+ "IPA",
+ "Basic Latin",
+ "SBL",
+ "TC",
+ "Beta",
+ "BGreek",
+ "SERA",
+ "Hugoye",
+ "UNGEGN",
+ "ISO",
+ "ALA-LC",
+ "BGN-PCGN",
+ "Greek",
+ "Hebrew",
+ "Cyrillic",
+ "Arabic",
+ "Syriac",
+ "Katakana",
+ "Hiragana",
+ "Hangul",
+ "Devanagari",
+ "Tamil",
+ "Bengali",
+ "Gurmukhi",
+ "Gujarati",
+ "Oriya",
+ "Telugu",
+ "Kannada",
+ "Malayalam",
+ "Thai",
+ "Georgian",
+ "Armenian",
+ "Ethiopic",
+ "Gothic",
+ "Ugaritic",
+ "Coptic",
+ "Meroitic",
+ "Linear B",
+ "Cypriot",
+ "Runic",
+ "Ogham",
+ "Thaana",
+ "Glagolitic",
+// "Tengwar",
+// "Cirth"
+};
+
+const char UTF8Transliterator::optName[] = "Transliteration";
+const char UTF8Transliterator::optTip[] = "Transliterates between scripts";
+
+SWTransMap UTF8Transliterator::transMap;
+
+#ifndef _ICUSWORD_
+
+const char UTF8Transliterator::SW_RB_RULE_BASED_IDS[] = "RuleBasedTransliteratorIDs";
+const char UTF8Transliterator::SW_RB_RULE[] = "Rule";
+#ifdef SWICU_DATA
+const char UTF8Transliterator::SW_RESDATA[] = SWICU_DATA;
+#else
+const char UTF8Transliterator::SW_RESDATA[] = "/usr/local/lib/sword/";
+#endif
+
+class SWCharString {
+ public:
+ inline SWCharString(const UnicodeString& str);
+ inline ~SWCharString();
+ inline operator const char*() { return ptr; }
+ private:
+ char buf[128];
+ char* ptr;
+};
+SWCharString::SWCharString(const UnicodeString& str) {
+ // TODO This isn't quite right -- we should probably do
+ // preflighting here to determine the real length.
+ if (str.length() >= (int32_t)sizeof(buf)) {
+ ptr = new char[str.length() + 8];
+ } else {
+ ptr = buf;
+ }
+ str.extract(0, 0x7FFFFFFF, ptr, "");
+}
+
+SWCharString::~SWCharString() {
+ if (ptr != buf) {
+ delete[] ptr;
+ }
+}
+
+#endif // _ICUSWORD_
+
+
+UTF8Transliterator::UTF8Transliterator() {
+ option = 0;
+ unsigned long i;
+ for (i = 0; i < NUMTARGETSCRIPTS; i++) {
+ options.push_back(optionstring[i]);
+ }
+#ifndef _ICUSWORD_
+ utf8status = U_ZERO_ERROR;
+ Load(utf8status);
+#endif
+}
+
+void UTF8Transliterator::Load(UErrorCode &status)
+{
+#ifndef _ICUSWORD_
+ static const char translit_swordindex[] = "translit_swordindex";
+
+ UResourceBundle *bundle = 0, *transIDs = 0, *colBund = 0;
+ bundle = ures_openDirect(SW_RESDATA, translit_swordindex, &status);
+ if (U_FAILURE(status)) {
+ SWLog::getSystemLog()->logError("UTF8Transliterator: ICU: no resource index to load");
+ SWLog::getSystemLog()->logError("UTF8Transliterator: ICU: status %s", u_errorName(status));
+ return;
+ }
+
+ transIDs = ures_getByKey(bundle, SW_RB_RULE_BASED_IDS, 0, &status);
+ //UParseError parseError;
+
+ int32_t row, maxRows;
+ if (U_SUCCESS(status)) {
+ maxRows = ures_getSize(transIDs);
+ for (row = 0; row < maxRows; row++) {
+ colBund = ures_getByIndex(transIDs, row, 0, &status);
+
+ if (U_SUCCESS(status) && ures_getSize(colBund) == 4) {
+ UnicodeString id = ures_getUnicodeStringByIndex(colBund, 0, &status);
+ UChar type = ures_getUnicodeStringByIndex(colBund, 1, &status).charAt(0);
+ UnicodeString resString = ures_getUnicodeStringByIndex(colBund, 2, &status);
+ SWLog::getSystemLog()->logDebug("ok so far");
+
+ if (U_SUCCESS(status)) {
+ switch (type) {
+ case 0x66: // 'f'
+ case 0x69: // 'i'
+ // 'file' or 'internal';
+ // row[2]=resource, row[3]=direction
+ {
+ //UBool visible = (type == 0x0066 /*f*/);
+ UTransDirection dir =
+ (ures_getUnicodeStringByIndex(colBund, 3, &status).charAt(0) ==
+ 0x0046 /*F*/) ?
+ UTRANS_FORWARD : UTRANS_REVERSE;
+ //registry->put(id, resString, dir, visible);
+ SWLog::getSystemLog()->logDebug("instantiating %s ...", resString.getBuffer());
+ registerTrans(id, resString, dir, status);
+ SWLog::getSystemLog()->logDebug("done.");
+ }
+ break;
+ case 0x61: // 'a'
+ // 'alias'; row[2]=createInstance argument
+ //registry->put(id, resString, TRUE);
+ break;
+ }
+ }
+ else SWLog::getSystemLog()->logError("UTF8Transliterator: ICU: Failed to get resString");
+ }
+ else SWLog::getSystemLog()->logError("UTF8Transliterator: ICU: Failed to get row");
+ ures_close(colBund);
+ }
+ }
+ else
+ {
+ SWLog::getSystemLog()->logError("UTF8Transliterator: ICU: no resource index to load");
+ SWLog::getSystemLog()->logError("UTF8Transliterator: ICU: status %s", u_errorName(status));
+ }
+
+ ures_close(transIDs);
+ ures_close(bundle);
+
+#endif // _ICUSWORD_
+}
+
+void UTF8Transliterator::registerTrans(const UnicodeString& ID, const UnicodeString& resource,
+ UTransDirection dir, UErrorCode &status )
+{
+#ifndef _ICUSWORD_
+ SWLog::getSystemLog()->logDebug("registering ID locally %s", ID.getBuffer());
+ SWTransData swstuff;
+ swstuff.resource = resource;
+ swstuff.dir = dir;
+ SWTransPair swpair;
+ swpair.first = ID;
+ swpair.second = swstuff;
+ transMap.insert(swpair);
+#endif
+}
+
+bool UTF8Transliterator::checkTrans(const UnicodeString& ID, UErrorCode &status )
+{
+#ifndef _ICUSWORD_
+ Transliterator *trans = Transliterator::createInstance(ID, UTRANS_FORWARD, status);
+ if (!U_FAILURE(status))
+ {
+ // already have it, clean up and return true
+ SWLog::getSystemLog()->logDebug("already have it %s", ID.getBuffer());
+ delete trans;
+ return true;
+ }
+ status = U_ZERO_ERROR;
+
+ SWTransMap::iterator swelement;
+ if ((swelement = transMap.find(ID)) != transMap.end())
+ {
+ SWLog::getSystemLog()->logDebug("found element in map");
+ SWTransData swstuff = (*swelement).second;
+ UParseError parseError;
+ //UErrorCode status;
+ //std::cout << "unregistering " << ID << std::endl;
+ //Transliterator::unregister(ID);
+ SWLog::getSystemLog()->logDebug("resource is %s", swstuff.resource.getBuffer());
+
+ // Get the rules
+ //std::cout << "importing: " << ID << ", " << resource << std::endl;
+ SWCharString ch(swstuff.resource);
+ UResourceBundle *bundle = ures_openDirect(SW_RESDATA, ch, &status);
+ const UnicodeString rules = ures_getUnicodeStringByKey(bundle, SW_RB_RULE, &status);
+ ures_close(bundle);
+ //parser.parse(rules, isReverse ? UTRANS_REVERSE : UTRANS_FORWARD,
+ // parseError, status);
+ if (U_FAILURE(status)) {
+ SWLog::getSystemLog()->logError("UTF8Transliterator: ICU: Failed to get rules");
+ SWLog::getSystemLog()->logError("UTF8Transliterator: ICU: status %s", u_errorName(status));
+ return false;
+ }
+
+
+ Transliterator *trans = Transliterator::createFromRules(ID, rules, swstuff.dir,
+ parseError,status);
+ if (U_FAILURE(status)) {
+ SWLog::getSystemLog()->logError("UTF8Transliterator: ICU: Failed to create transliterator");
+ SWLog::getSystemLog()->logError("UTF8Transliterator: ICU: status %s", u_errorName(status));
+ SWLog::getSystemLog()->logError("UTF8Transliterator: ICU: Parse error: line %s", parseError.line);
+ SWLog::getSystemLog()->logError("UTF8Transliterator: ICU: Parse error: offset %d", parseError.offset);
+ SWLog::getSystemLog()->logError("UTF8Transliterator: ICU: Parse error: preContext %s", *parseError.preContext);
+ SWLog::getSystemLog()->logError("UTF8Transliterator: ICU: Parse error: postContext %s", *parseError.postContext);
+ SWLog::getSystemLog()->logError("UTF8Transliterator: ICU: rules were");
+// SWLog::getSystemLog()->logError((const char *)rules);
+ return false;
+ }
+
+ Transliterator::registerInstance(trans);
+ return true;
+
+ //Transliterator *trans = instantiateTrans(ID, swstuff.resource, swstuff.dir, parseError, status);
+ //return trans;
+ }
+ else
+ {
+ return false;
+ }
+#else
+return true;
+#endif // _ICUSWORD_
+}
+
+bool UTF8Transliterator::addTrans(const char* newTrans, SWBuf* transList) {
+#ifdef _ICUSWORD_
+ UErrorCode status;
+ if (checkTrans(UnicodeString(newTrans), status)) {
+#endif
+ *transList += newTrans;
+ *transList += ";";
+ return true;
+#ifdef _ICUSWORD_
+ }
+ else {
+ return false;
+ }
+#endif
+}
+
+Transliterator * UTF8Transliterator::createTrans(const UnicodeString& ID, UTransDirection dir, UErrorCode &status )
+{
+ Transliterator *trans = Transliterator::createInstance(ID,UTRANS_FORWARD,status);
+ if (U_FAILURE(status)) {
+ delete trans;
+ return NULL;
+ }
+ else {
+ return trans;
+ }
+}
+
+void UTF8Transliterator::setOptionValue(const char *ival)
+{
+ unsigned char i = option = NUMTARGETSCRIPTS;
+ while (i && stricmp(ival, optionstring[i])) {
+ i--;
+ option = i;
+ }
+}
+
+const char *UTF8Transliterator::getOptionValue()
+{
+ return (NUMTARGETSCRIPTS > option) ? optionstring[option] : 0;
+}
+
+char UTF8Transliterator::processText(SWBuf &text, const SWKey *key, const SWModule *module)
+{
+ if (option) { // if we want transliteration
+ unsigned long i, j;
+ UErrorCode err = U_ZERO_ERROR;
+ UConverter * conv = NULL;
+ conv = ucnv_open("UTF-8", &err);
+ SWBuf ID;
+
+ bool compat = false;
+
+ // Convert UTF-8 string to UTF-16 (UChars)
+ j = strlen(text);
+ int32_t len = (j * 2) + 1;
+ UChar *source = new UChar[len];
+ err = U_ZERO_ERROR;
+ len = ucnv_toUChars(conv, source, len, text, j, &err);
+ source[len] = 0;
+
+ // Figure out which scripts are used in the string
+ unsigned char scripts[NUMSCRIPTS];
+
+ for (i = 0; i < NUMSCRIPTS; i++) {
+ scripts[i] = false;
+ }
+
+ for (i = 0; i < (unsigned long)len; i++) {
+ j = ublock_getCode(source[i]);
+ scripts[SE_LATIN] = true;
+ switch (j) {
+ //case UBLOCK_BASIC_LATIN: scripts[SE_LATIN] = true; break;
+ case UBLOCK_GREEK: scripts[SE_GREEK] = true; break;
+ case UBLOCK_HEBREW: scripts[SE_HEBREW] = true; break;
+ case UBLOCK_CYRILLIC: scripts[SE_CYRILLIC] = true; break;
+ case UBLOCK_ARABIC: scripts[SE_ARABIC] = true; break;
+ case UBLOCK_SYRIAC: scripts[SE_SYRIAC] = true; break;
+ case UBLOCK_KATAKANA: scripts[SE_KATAKANA] = true; break;
+ case UBLOCK_HIRAGANA: scripts[SE_HIRAGANA] = true; break;
+ case UBLOCK_HANGUL_SYLLABLES: scripts[SE_HANGUL] = true; break;
+ case UBLOCK_HANGUL_JAMO: scripts[SE_JAMO] = true; break;
+ case UBLOCK_DEVANAGARI: scripts[SE_DEVANAGARI] = true; break;
+ case UBLOCK_TAMIL: scripts[SE_TAMIL] = true; break;
+ case UBLOCK_BENGALI: scripts[SE_BENGALI] = true; break;
+ case UBLOCK_GURMUKHI: scripts[SE_GURMUKHI] = true; break;
+ case UBLOCK_GUJARATI: scripts[SE_GUJARATI] = true; break;
+ case UBLOCK_ORIYA: scripts[SE_ORIYA] = true; break;
+ case UBLOCK_TELUGU: scripts[SE_TELUGU] = true; break;
+ case UBLOCK_KANNADA: scripts[SE_KANNADA] = true; break;
+ case UBLOCK_MALAYALAM: scripts[SE_MALAYALAM] = true; break;
+ case UBLOCK_THAI: scripts[SE_THAI] = true; break;
+ case UBLOCK_GEORGIAN: scripts[SE_GEORGIAN] = true; break;
+ case UBLOCK_ARMENIAN: scripts[SE_ARMENIAN] = true; break;
+ case UBLOCK_ETHIOPIC: scripts[SE_ETHIOPIC] = true; break;
+ case UBLOCK_GOTHIC: scripts[SE_GOTHIC] = true; break;
+ case UBLOCK_UGARITIC: scripts[SE_UGARITIC] = true; break;
+// case UBLOCK_MEROITIC: scripts[SE_MEROITIC] = true; break;
+// case UBLOCK_LINEARB: scripts[SE_LINEARB] = true; break;
+// case UBLOCK_CYPRIOT: scripts[SE_CYPRIOT] = true; break;
+ case UBLOCK_RUNIC: scripts[SE_RUNIC] = true; break;
+ case UBLOCK_OGHAM: scripts[SE_OGHAM] = true; break;
+ case UBLOCK_THAANA: scripts[SE_THAANA] = true; break;
+// case UBLOCK_GLAGOLITIC: scripts[SE_GLAGOLITIC] = true; break;
+// case UBLOCK_TENGWAR: scripts[SE_TENGWAR] = true; break;
+// case UBLOCK_CIRTH: scripts[SE_CIRTH] = true; break;
+ case UBLOCK_CJK_RADICALS_SUPPLEMENT:
+ case UBLOCK_KANGXI_RADICALS:
+ case UBLOCK_IDEOGRAPHIC_DESCRIPTION_CHARACTERS:
+ case UBLOCK_CJK_SYMBOLS_AND_PUNCTUATION:
+ case UBLOCK_CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A:
+ case UBLOCK_CJK_UNIFIED_IDEOGRAPHS:
+ scripts[SE_HAN] = true;
+ break;
+ case UBLOCK_CJK_COMPATIBILITY:
+ case UBLOCK_CJK_COMPATIBILITY_IDEOGRAPHS:
+ case UBLOCK_CJK_COMPATIBILITY_FORMS:
+ scripts[SE_HAN] = true;
+ compat = true;
+ break;
+ case UBLOCK_HANGUL_COMPATIBILITY_JAMO:
+ scripts[SE_HANGUL] = true;
+ compat = true;
+ break;
+
+ //default: scripts[SE_LATIN] = true;
+ }
+ }
+ scripts[option] = false; //turn off the reflexive transliteration
+
+ //return if we have no transliteration to do for this text
+ j = 0;
+ for (i = 0; !j && i < NUMSCRIPTS; i++) {
+ if (scripts[i]) j++;
+ }
+ if (!j) {
+ ucnv_close(conv);
+ return 0;
+ }
+
+ if (compat) {
+ addTrans("NFKD", &ID);
+ }
+ else {
+ addTrans("NFD", &ID);
+ }
+
+ //Simple X to Latin transliterators
+ if (scripts[SE_GREEK]) {
+ if (strnicmp (((SWModule*)module)->Lang(), "cop", 3)) {
+ if (option == SE_SBL)
+ addTrans("Greek-Latin/SBL", &ID);
+ else if (option == SE_TC)
+ addTrans("Greek-Latin/TC", &ID);
+ else if (option == SE_BETA)
+ addTrans("Greek-Latin/Beta", &ID);
+ else if (option == SE_BGREEK)
+ addTrans("Greek-Latin/BGreek", &ID);
+ else if (option == SE_UNGEGN)
+ addTrans("Greek-Latin/UNGEGN", &ID);
+ else if (option == SE_ISO)
+ addTrans("Greek-Latin/ISO", &ID);
+ else if (option == SE_ALALC)
+ addTrans("Greek-Latin/ALALC", &ID);
+ else if (option == SE_BGNPCGN)
+ addTrans("Greek-Latin/BGNPCGN", &ID);
+ else if (option == SE_IPA)
+ addTrans("Greek-IPA/Ancient", &ID);
+ else {
+ addTrans("Greek-Latin", &ID);
+ scripts[SE_LATIN] = true;
+ }
+ }
+ else {
+ if (option == SE_SBL)
+ addTrans("Coptic-Latin/SBL", &ID);
+ else if (option == SE_TC)
+ addTrans("Coptic-Latin/TC", &ID);
+ else if (option == SE_BETA)
+ addTrans("Coptic-Latin/Beta", &ID);
+ else if (option == SE_IPA)
+ addTrans("Coptic-IPA", &ID);
+ else {
+ addTrans("Coptic-Latin", &ID);
+ scripts[SE_LATIN] = true;
+ }
+ }
+ }
+ if (scripts[SE_HEBREW]) {
+ if (option == SE_SBL)
+ addTrans("Hebrew-Latin/SBL", &ID);
+ else if (option == SE_TC)
+ addTrans("Hebrew-Latin/TC", &ID);
+ else if (option == SE_BETA)
+ addTrans("Hebrew-Latin/Beta", &ID);
+ else if (option == SE_UNGEGN)
+ addTrans("Hebrew-Latin/UNGEGN", &ID);
+ else if (option == SE_ALALC)
+ addTrans("Hebrew-Latin/ALALC", &ID);
+ else if (option == SE_SYRIAC)
+ addTrans("Hebrew-Syriac", &ID);
+ else {
+ addTrans("Hebrew-Latin", &ID);
+ scripts[SE_LATIN] = true;
+ }
+ }
+ if (scripts[SE_CYRILLIC]) {
+ if (option == SE_GLAGOLITIC)
+ addTrans("Cyrillic-Glagolitic", &ID);
+ else {
+ addTrans("Cyrillic-Latin", &ID);
+ scripts[SE_LATIN] = true;
+ }
+ }
+ if (scripts[SE_ARABIC]) {
+ addTrans("Arabic-Latin", &ID);
+ scripts[SE_LATIN] = true;
+ }
+ if (scripts[SE_SYRIAC]) {
+ if (option == SE_TC)
+ addTrans("Syriac-Latin/TC", &ID);
+ else if (option == SE_BETA)
+ addTrans("Syriac-Latin/Beta", &ID);
+ else if (option == SE_HUGOYE)
+ addTrans("Syriac-Latin/Hugoye", &ID);
+ else if (option == SE_HEBREW)
+ addTrans("Syriac-Hebrew", &ID);
+ else {
+ addTrans("Syriac-Latin", &ID);
+ scripts[SE_LATIN] = true;
+ }
+ }
+ if (scripts[SE_THAI]) {
+ addTrans("Thai-Latin", &ID);
+ scripts[SE_LATIN] = true;
+ }
+ if (scripts[SE_GEORGIAN]) {
+ if (option == SE_ISO)
+ addTrans("Georgian-Latin/ISO", &ID);
+ else if (option == SE_ALALC)
+ addTrans("Georgian-Latin/ALALC", &ID);
+ else if (option == SE_BGNPCGN)
+ addTrans("Georgian-Latin/BGNPCGN", &ID);
+ else if (option == SE_IPA)
+ addTrans("Georgian-IPA", &ID);
+ else {
+ addTrans("Georgian-Latin", &ID);
+ scripts[SE_LATIN] = true;
+ }
+ }
+ if (scripts[SE_ARMENIAN]) {
+ if (option == SE_ISO)
+ addTrans("Armenian-Latin/ISO", &ID);
+ else if (option == SE_ALALC)
+ addTrans("Armenian-Latin/ALALC", &ID);
+ else if (option == SE_BGNPCGN)
+ addTrans("Armenian-Latin/BGNPCGN", &ID);
+ else if (option == SE_IPA)
+ addTrans("Armenian-IPA", &ID);
+ else {
+ addTrans("Armenian-Latin", &ID);
+ scripts[SE_LATIN] = true;
+ }
+ }
+ if (scripts[SE_ETHIOPIC]) {
+ if (option == SE_UNGEGN)
+ addTrans("Ethiopic-Latin/UNGEGN", &ID);
+ else if (option == SE_ISO)
+ addTrans("Ethiopic-Latin/ISO", &ID);
+ else if (option == SE_ALALC)
+ addTrans("Ethiopic-Latin/ALALC", &ID);
+ else if (option == SE_SERA)
+ addTrans("Ethiopic-Latin/SERA", &ID);
+ else {
+ addTrans("Ethiopic-Latin", &ID);
+ scripts[SE_LATIN] = true;
+ }
+ }
+ if (scripts[SE_GOTHIC]) {
+ if (option == SE_BASICLATIN)
+ addTrans("Gothic-Latin/Basic", &ID);
+ else if (option == SE_IPA)
+ addTrans("Gothic-IPA", &ID);
+ else {
+ addTrans("Gothic-Latin", &ID);
+ scripts[SE_LATIN] = true;
+ }
+ }
+ if (scripts[SE_UGARITIC]) {
+ if (option == SE_SBL)
+ addTrans("Ugaritic-Latin/SBL", &ID);
+ else {
+ addTrans("Ugaritic-Latin", &ID);
+ scripts[SE_LATIN] = true;
+ }
+ }
+ if (scripts[SE_MEROITIC]) {
+ addTrans("Meroitic-Latin", &ID);
+ scripts[SE_LATIN] = true;
+ }
+ if (scripts[SE_LINEARB]) {
+ addTrans("LinearB-Latin", &ID);
+ scripts[SE_LATIN] = true;
+ }
+ if (scripts[SE_CYPRIOT]) {
+ addTrans("Cypriot-Latin", &ID);
+ scripts[SE_LATIN] = true;
+ }
+ if (scripts[SE_RUNIC]) {
+ addTrans("Runic-Latin", &ID);
+ scripts[SE_LATIN] = true;
+ }
+ if (scripts[SE_OGHAM]) {
+ addTrans("Ogham-Latin", &ID);
+ scripts[SE_LATIN] = true;
+ }
+ if (scripts[SE_THAANA]) {
+ if (option == SE_ALALC)
+ addTrans("Thaana-Latin/ALALC", &ID);
+ else if (option == SE_BGNPCGN)
+ addTrans("Thaana-Latin/BGNPCGN", &ID);
+ else {
+ addTrans("Thaana-Latin", &ID);
+ scripts[SE_LATIN] = true;
+ }
+ }
+ if (scripts[SE_GLAGOLITIC]) {
+ if (option == SE_ISO)
+ addTrans("Glagolitic-Latin/ISO", &ID);
+ else if (option == SE_ALALC)
+ addTrans("Glagolitic-Latin/ALALC", &ID);
+ else if (option == SE_ALALC)
+ addTrans("Glagolitic-Cyrillic", &ID);
+ else {
+ addTrans("Glagolitic-Latin", &ID);
+ scripts[SE_LATIN] = true;
+ }
+ }
+ if (scripts[SE_THAI]) {
+ addTrans("Thai-Latin", &ID);
+ scripts[SE_LATIN] = true;
+ }
+ if (scripts[SE_THAI]) {
+ addTrans("Thai-Latin", &ID);
+ scripts[SE_LATIN] = true;
+ }
+
+ if (scripts[SE_HAN]) {
+ if (!strnicmp (((SWModule*)module)->Lang(), "ja", 2)) {
+ addTrans("Kanji-Romaji", &ID);
+ }
+ else {
+ addTrans("Han-Latin", &ID);
+ }
+ scripts[SE_LATIN] = true;
+ }
+
+ // Inter-Kana and Kana to Latin transliterators
+ if (option == SE_HIRAGANA && scripts[SE_KATAKANA]) {
+ addTrans("Katakana-Hiragana", &ID);
+ scripts[SE_HIRAGANA] = true;
+ }
+ else if (option == SE_KATAKANA && scripts[SE_HIRAGANA]) {
+ addTrans("Hiragana-Katakana", &ID);
+ scripts[SE_KATAKANA] = true;
+ }
+ else {
+ if (scripts[SE_KATAKANA]) {
+ addTrans("Katakana-Latin", &ID);
+ scripts[SE_LATIN] = true;
+ }
+ if (scripts[SE_HIRAGANA]) {
+ addTrans("Hiragana-Latin", &ID);
+ scripts[SE_LATIN] = true;
+ }
+ }
+
+ // Korean to Latin transliterators
+ if (scripts[SE_HANGUL]) {
+ addTrans("Hangul-Latin", &ID);
+ scripts[SE_LATIN] = true;
+ }
+ if (scripts[SE_JAMO]) {
+ addTrans("Jamo-Latin", &ID);
+ scripts[SE_LATIN] = true;
+ }
+
+ // Indic-Latin
+ if (option < SE_DEVANAGARI || option > SE_MALAYALAM) {
+ // Indic to Latin
+ if (scripts[SE_TAMIL]) {
+ addTrans("Tamil-Latin", &ID);
+ scripts[SE_LATIN] = true;
+ }
+ if (scripts[SE_BENGALI]) {
+ addTrans("Bengali-Latin", &ID);
+ scripts[SE_LATIN] = true;
+ }
+ if (scripts[SE_GURMUKHI]) {
+ addTrans("Gurmukhi-Latin", &ID);
+ scripts[SE_LATIN] = true;
+ }
+ if (scripts[SE_GUJARATI]) {
+ addTrans("Gujarati-Latin", &ID);
+ scripts[SE_LATIN] = true;
+ }
+ if (scripts[SE_ORIYA]) {
+ addTrans("Oriya-Latin", &ID);
+ scripts[SE_LATIN] = true;
+ }
+ if (scripts[SE_TELUGU]) {
+ addTrans("Telugu-Latin", &ID);
+ scripts[SE_LATIN] = true;
+ }
+ if (scripts[SE_KANNADA]) {
+ addTrans("Kannada-Latin", &ID);
+ scripts[SE_LATIN] = true;
+ }
+ if (scripts[SE_MALAYALAM]) {
+ addTrans("Malayalam-Latin", &ID);
+ scripts[SE_LATIN] = true;
+ }
+ }
+ else {
+ if (scripts[SE_LATIN]) {
+ addTrans("Latin-InterIndic", &ID);
+ }
+ if (scripts[SE_DEVANAGARI]) {
+ addTrans("Devanagari-InterIndic", &ID);
+ }
+ if (scripts[SE_TAMIL]) {
+ addTrans("Tamil-InterIndic", &ID);
+ }
+ if (scripts[SE_BENGALI]) {
+ addTrans("Bengali-InterIndic", &ID);
+ }
+ if (scripts[SE_GURMUKHI]) {
+ addTrans("Gurmurkhi-InterIndic", &ID);
+ }
+ if (scripts[SE_GUJARATI]) {
+ addTrans("Gujarati-InterIndic", &ID);
+ }
+ if (scripts[SE_ORIYA]) {
+ addTrans("Oriya-InterIndic", &ID);
+ }
+ if (scripts[SE_TELUGU]) {
+ addTrans("Telugu-InterIndic", &ID);
+ }
+ if (scripts[SE_KANNADA]) {
+ addTrans("Kannada-InterIndic", &ID);
+ }
+ if (scripts[SE_MALAYALAM]) {
+ addTrans("Malayalam-InterIndic", &ID);
+ }
+
+ switch(option) {
+ case SE_DEVANAGARI:
+ addTrans("InterIndic-Devanagari", &ID);
+ break;
+ case SE_TAMIL:
+ addTrans("InterIndic-Tamil", &ID);
+ break;
+ case SE_BENGALI:
+ addTrans("InterIndic-Bengali", &ID);
+ break;
+ case SE_GURMUKHI:
+ addTrans("InterIndic-Gurmukhi", &ID);
+ break;
+ case SE_GUJARATI:
+ addTrans("InterIndic-Gujarati", &ID);
+ break;
+ case SE_ORIYA:
+ addTrans("InterIndic-Oriya", &ID);
+ break;
+ case SE_TELUGU:
+ addTrans("InterIndic-Telugu", &ID);
+ break;
+ case SE_KANNADA:
+ addTrans("InterIndic-Kannada", &ID);
+ break;
+ case SE_MALAYALAM:
+ addTrans("InterIndic-Malayalam", &ID);
+ break;
+ default:
+ addTrans("InterIndic-Latin", &ID);
+ scripts[SE_LATIN] = true;
+ break;
+ }
+ }
+
+// if (scripts[SE_TENGWAR]) {
+// addTrans("Tengwar-Latin", &ID);
+// scripts[SE_LATIN] = true;
+// }
+// if (scripts[SE_CIRTH]) {
+// addTrans("Cirth-Latin", &ID);
+// scripts[SE_LATIN] = true;
+// }
+
+ if (scripts[SE_LATIN]) {
+ switch (option) {
+ case SE_GREEK:
+ addTrans("Latin-Greek", &ID);
+ break;
+ case SE_HEBREW:
+ addTrans("Latin-Hebrew", &ID);
+ break;
+ case SE_CYRILLIC:
+ addTrans("Latin-Cyrillic", &ID);
+ break;
+ case SE_ARABIC:
+ addTrans("Latin-Arabic", &ID);
+ break;
+ case SE_SYRIAC:
+ addTrans("Latin-Syriac", &ID);
+ break;
+ case SE_THAI:
+ addTrans("Latin-Thai", &ID);
+ break;
+ case SE_GEORGIAN:
+ addTrans("Latin-Georgian", &ID);
+ break;
+ case SE_ARMENIAN:
+ addTrans("Latin-Armenian", &ID);
+ break;
+ case SE_ETHIOPIC:
+ addTrans("Latin-Ethiopic", &ID);
+ break;
+ case SE_GOTHIC:
+ addTrans("Latin-Gothic", &ID);
+ break;
+ case SE_UGARITIC:
+ addTrans("Latin-Ugaritic", &ID);
+ break;
+ case SE_COPTIC:
+ addTrans("Latin-Coptic", &ID);
+ break;
+ case SE_KATAKANA:
+ addTrans("Latin-Katakana", &ID);
+ break;
+ case SE_HIRAGANA:
+ addTrans("Latin-Hiragana", &ID);
+ break;
+ case SE_JAMO:
+ addTrans("Latin-Jamo", &ID);
+ break;
+ case SE_HANGUL:
+ addTrans("Latin-Hangul", &ID);
+ break;
+ case SE_MEROITIC:
+ addTrans("Latin-Meroitic", &ID);
+ break;
+ case SE_LINEARB:
+ addTrans("Latin-LinearB", &ID);
+ break;
+ case SE_CYPRIOT:
+ addTrans("Latin-Cypriot", &ID);
+ break;
+ case SE_RUNIC:
+ addTrans("Latin-Runic", &ID);
+ break;
+ case SE_OGHAM:
+ addTrans("Latin-Ogham", &ID);
+ break;
+ case SE_THAANA:
+ addTrans("Latin-Thaana", &ID);
+ break;
+ case SE_GLAGOLITIC:
+ addTrans("Latin-Glagolitic", &ID);
+ break;
+// case SE_TENGWAR:
+// addTrans("Latin-Tengwar", &ID);
+// break;
+// case SE_CIRTH:
+// addTrans("Latin-Cirth", &ID);
+// break;
+ }
+ }
+
+ if (option == SE_BASICLATIN) {
+ addTrans("Any-Latin1", &ID);
+ }
+
+ addTrans("NFC", &ID);
+
+ err = U_ZERO_ERROR;
+ Transliterator * trans = createTrans(UnicodeString(ID), UTRANS_FORWARD, err);
+ if (trans && !U_FAILURE(err)) {
+ UnicodeString target = UnicodeString(source);
+ trans->transliterate(target);
+ text.setSize(text.size()*2);
+ len = ucnv_fromUChars(conv, text.getRawData(), text.size(), target.getBuffer(), target.length(), &err);
+ text.setSize(len);
+ delete trans;
+ }
+ ucnv_close(conv);
+ }
+ return 0;
+}
+
+SWORD_NAMESPACE_END
+#endif
+
+
+
diff --git a/src/modules/filters/utf8utf16.cpp b/src/modules/filters/utf8utf16.cpp
new file mode 100644
index 0000000..5c1614c
--- /dev/null
+++ b/src/modules/filters/utf8utf16.cpp
@@ -0,0 +1,78 @@
+/******************************************************************************
+ *
+ * UTF8UTF16 - SWFilter descendant to convert UTF-8 to UTF-16
+ *
+ */
+
+#include <stdlib.h>
+#include <stdio.h>
+
+#include <utf8utf16.h>
+#include <swbuf.h>
+
+SWORD_NAMESPACE_START
+
+UTF8UTF16::UTF8UTF16() {
+}
+
+char UTF8UTF16::processText(SWBuf &text, const SWKey *key, const SWModule *module) {
+ const unsigned char *from;
+ unsigned long ch;
+ signed short utf16;
+ unsigned char from2[7];
+
+ SWBuf orig = text;
+
+ from = (const unsigned char *)orig.c_str();
+
+ // -------------------------------
+ for (text = ""; *from; from++) {
+ ch = 0;
+ //case: ANSI
+ if ((*from & 128) != 128) {
+ text.setSize(text.size()+2);
+ *((unsigned short *)(text.getRawData()+(text.size()-2))) = (unsigned short)*from;
+ continue;
+ }
+ //case: Invalid UTF-8 (illegal continuing byte in initial position)
+ if ((*from & 128) && ((*from & 64) != 64)) {
+ continue;
+ }
+ //case: 2+ byte codepoint
+ from2[0] = *from;
+ from2[0] <<= 1;
+ int subsequent;
+ for (subsequent = 1; (from2[0] & 128) && (subsequent < 7); subsequent++) {
+ from2[0] <<= 1;
+ from2[subsequent] = from[subsequent];
+ from2[subsequent] &= 63;
+ ch <<= 6;
+ ch |= from2[subsequent];
+ }
+ subsequent--;
+ from2[0] <<= 1;
+ char significantFirstBits = 8 - (2+subsequent);
+
+ ch |= (((short)from2[0]) << (((6*subsequent)+significantFirstBits)-8));
+ from += subsequent;
+ if (ch < 0x10000) {
+ text.setSize(text.size()+2);
+ *((unsigned short *)(text.getRawData()+(text.size()-2))) = (unsigned short)ch;
+ }
+ else {
+ utf16 = (signed short)((ch - 0x10000) / 0x400 + 0xD800);
+ text.setSize(text.size()+2);
+ *((unsigned short *)(text.getRawData()+(text.size()-2))) = (unsigned short)utf16;
+ utf16 = (signed short)((ch - 0x10000) % 0x400 + 0xDC00);
+ text.setSize(text.size()+2);
+ *((unsigned short *)(text.getRawData()+(text.size()-2))) = (unsigned short)utf16;
+ }
+ }
+ text.setSize(text.size()+2);
+ *((unsigned short *)(text.getRawData()+(text.size()-2))) = (unsigned short)0;
+
+ return 0;
+
+}
+
+SWORD_NAMESPACE_END