summaryrefslogtreecommitdiff
path: root/src/modules/filters
diff options
context:
space:
mode:
authorRoberto C. Sanchez <roberto@connexer.com>2014-03-29 10:53:33 -0400
committerRoberto C. Sanchez <roberto@connexer.com>2014-03-29 10:53:33 -0400
commit8d3fc864d094eeadc721f8e93436b37a5fab173e (patch)
tree05e201c67dca55b4ccdf90ad479a25d95e3b1e63 /src/modules/filters
Imported Upstream version 1.5.3
Diffstat (limited to 'src/modules/filters')
-rw-r--r--src/modules/filters/Makefile5
-rw-r--r--src/modules/filters/Makefile.am65
-rw-r--r--src/modules/filters/cipherfil.cpp38
-rw-r--r--src/modules/filters/gbffootnotes.cpp118
-rw-r--r--src/modules/filters/gbfheadings.cpp107
-rw-r--r--src/modules/filters/gbfhtml.cpp536
-rw-r--r--src/modules/filters/gbfhtmlhref.cpp148
-rw-r--r--src/modules/filters/gbfmorph.cpp98
-rw-r--r--src/modules/filters/gbfplain.cpp106
-rw-r--r--src/modules/filters/gbfrtf.cpp277
-rw-r--r--src/modules/filters/gbfstrongs.cpp98
-rw-r--r--src/modules/filters/gbfthml.cpp463
-rw-r--r--src/modules/filters/greeklexattribs.cpp58
-rw-r--r--src/modules/filters/latin1utf16.cpp120
-rw-r--r--src/modules/filters/latin1utf8.cpp179
-rw-r--r--src/modules/filters/plainfootnotes.cpp102
-rw-r--r--src/modules/filters/plainhtml.cpp134
-rw-r--r--src/modules/filters/rtfhtml.cpp99
-rw-r--r--src/modules/filters/rwphtml.cpp187
-rw-r--r--src/modules/filters/rwprtf.cpp107
-rw-r--r--src/modules/filters/scsuutf8.cpp220
-rw-r--r--src/modules/filters/swbasicfilter.cpp299
-rw-r--r--src/modules/filters/thmlfootnotes.cpp103
-rw-r--r--src/modules/filters/thmlgbf.cpp330
-rw-r--r--src/modules/filters/thmlheadings.cpp107
-rw-r--r--src/modules/filters/thmlhtml.cpp211
-rw-r--r--src/modules/filters/thmlhtmlhref.cpp269
-rw-r--r--src/modules/filters/thmllemma.cpp97
-rw-r--r--src/modules/filters/thmlmorph.cpp98
-rw-r--r--src/modules/filters/thmlolb.cpp243
-rw-r--r--src/modules/filters/thmlplain.cpp201
-rw-r--r--src/modules/filters/thmlrtf.cpp219
-rw-r--r--src/modules/filters/thmlscripref.cpp103
-rw-r--r--src/modules/filters/thmlstrongs.cpp138
-rw-r--r--src/modules/filters/thmlvariants.cpp183
-rw-r--r--src/modules/filters/unicodertf.cpp70
-rw-r--r--src/modules/filters/utf16utf8.cpp95
-rw-r--r--src/modules/filters/utf8arshaping.cpp48
-rw-r--r--src/modules/filters/utf8bidireorder.cpp55
-rw-r--r--src/modules/filters/utf8cantillation.cpp64
-rw-r--r--src/modules/filters/utf8greekaccents.cpp252
-rw-r--r--src/modules/filters/utf8hebrewpoints.cpp55
-rw-r--r--src/modules/filters/utf8html.cpp66
-rw-r--r--src/modules/filters/utf8latin1.cpp74
-rw-r--r--src/modules/filters/utf8nfc.cpp46
-rw-r--r--src/modules/filters/utf8nfkd.cpp46
-rw-r--r--src/modules/filters/utf8transliterator.cpp479
-rw-r--r--src/modules/filters/utf8utf16.cpp79
48 files changed, 7295 insertions, 0 deletions
diff --git a/src/modules/filters/Makefile b/src/modules/filters/Makefile
new file mode 100644
index 0000000..1a2d00d
--- /dev/null
+++ b/src/modules/filters/Makefile
@@ -0,0 +1,5 @@
+
+root := ../../..
+
+all:
+ make -C ${root}
diff --git a/src/modules/filters/Makefile.am b/src/modules/filters/Makefile.am
new file mode 100644
index 0000000..c58fb5f
--- /dev/null
+++ b/src/modules/filters/Makefile.am
@@ -0,0 +1,65 @@
+filtersdir = $(top_srcdir)/src/modules/filters
+
+libsword_la_SOURCES += $(filtersdir)/swbasicfilter.cpp
+
+libsword_la_SOURCES += $(filtersdir)/gbfhtml.cpp
+libsword_la_SOURCES += $(filtersdir)/gbfhtmlhref.cpp
+libsword_la_SOURCES += $(filtersdir)/gbfplain.cpp
+libsword_la_SOURCES += $(filtersdir)/gbfrtf.cpp
+libsword_la_SOURCES += $(filtersdir)/plainhtml.cpp
+libsword_la_SOURCES += $(filtersdir)/rwphtml.cpp
+libsword_la_SOURCES += $(filtersdir)/rwprtf.cpp
+libsword_la_SOURCES += $(filtersdir)/cipherfil.cpp
+libsword_la_SOURCES += $(filtersdir)/rtfhtml.cpp
+
+libsword_la_SOURCES += $(filtersdir)/gbfstrongs.cpp
+libsword_la_SOURCES += $(filtersdir)/gbffootnotes.cpp
+libsword_la_SOURCES += $(filtersdir)/gbfheadings.cpp
+libsword_la_SOURCES += $(filtersdir)/gbfmorph.cpp
+libsword_la_SOURCES += $(filtersdir)/plainfootnotes.cpp
+libsword_la_SOURCES += $(filtersdir)/thmlstrongs.cpp
+libsword_la_SOURCES += $(filtersdir)/thmlfootnotes.cpp
+libsword_la_SOURCES += $(filtersdir)/thmlheadings.cpp
+libsword_la_SOURCES += $(filtersdir)/thmlmorph.cpp
+libsword_la_SOURCES += $(filtersdir)/thmllemma.cpp
+libsword_la_SOURCES += $(filtersdir)/thmlscripref.cpp
+libsword_la_SOURCES += $(filtersdir)/thmlvariants.cpp
+
+libsword_la_SOURCES += $(filtersdir)/gbfthml.cpp
+libsword_la_SOURCES += $(filtersdir)/thmlgbf.cpp
+libsword_la_SOURCES += $(filtersdir)/thmlrtf.cpp
+libsword_la_SOURCES += $(filtersdir)/thmlhtml.cpp
+libsword_la_SOURCES += $(filtersdir)/thmlhtmlhref.cpp
+libsword_la_SOURCES += $(filtersdir)/thmlplain.cpp
+
+libsword_la_SOURCES += $(filtersdir)/unicodertf.cpp
+libsword_la_SOURCES += $(filtersdir)/latin1utf8.cpp
+libsword_la_SOURCES += $(filtersdir)/latin1utf16.cpp
+libsword_la_SOURCES += $(filtersdir)/utf8utf16.cpp
+libsword_la_SOURCES += $(filtersdir)/utf16utf8.cpp
+libsword_la_SOURCES += $(filtersdir)/scsuutf8.cpp
+libsword_la_SOURCES += $(filtersdir)/utf8html.cpp
+libsword_la_SOURCES += $(filtersdir)/utf8latin1.cpp
+
+libsword_la_SOURCES += $(filtersdir)/thmlolb.cpp
+
+libsword_la_SOURCES += $(filtersdir)/greeklexattribs.cpp
+
+if ICU
+ICUDEFS = -D_ICU_
+SWICUSRC = $(filtersdir)/utf8transliterator.cpp
+SWICUSRC += $(filtersdir)/utf8nfc.cpp
+SWICUSRC += $(filtersdir)/utf8nfkd.cpp
+SWICUSRC += $(filtersdir)/utf8arshaping.cpp
+SWICUSRC += $(filtersdir)/utf8bidireorder.cpp
+else
+SWICUSRC =
+ICUDEFS =
+endif
+libsword_la_SOURCES += $(SWICUSRC)
+DEFS += $(ICUDEFS)
+
+libsword_la_SOURCES += $(filtersdir)/utf8cantillation.cpp
+libsword_la_SOURCES += $(filtersdir)/utf8hebrewpoints.cpp
+libsword_la_SOURCES += $(filtersdir)/utf8greekaccents.cpp
+
diff --git a/src/modules/filters/cipherfil.cpp b/src/modules/filters/cipherfil.cpp
new file mode 100644
index 0000000..ad55396
--- /dev/null
+++ b/src/modules/filters/cipherfil.cpp
@@ -0,0 +1,38 @@
+/******************************************************************************
+ *
+ * cipherfil - SWFilter decendant to decipher a module
+ */
+
+
+#include <stdlib.h>
+#include <string.h>
+#include <cipherfil.h>
+
+
+CipherFilter::CipherFilter(const char *key) {
+ cipher = new SWCipher((unsigned char *)key);
+}
+
+
+CipherFilter::~CipherFilter() {
+ delete cipher;
+}
+
+
+SWCipher *CipherFilter::getCipher() {
+ return cipher;
+}
+
+
+char CipherFilter::ProcessText(char *text, int maxlen, const SWKey *key, const SWModule *module) {
+ unsigned int len;
+// len = strlen(text);
+ len = maxlen;
+ if (len > 0) {
+ cipher->cipherBuf(&len, text);
+ strncpy(text, cipher->Buf(), (len < (unsigned int)maxlen) ? len : maxlen);
+ }
+ text[maxlen] = 0;
+ text[maxlen+1] = 0;
+ return 0;
+}
diff --git a/src/modules/filters/gbffootnotes.cpp b/src/modules/filters/gbffootnotes.cpp
new file mode 100644
index 0000000..c5b7b90
--- /dev/null
+++ b/src/modules/filters/gbffootnotes.cpp
@@ -0,0 +1,118 @@
+/******************************************************************************
+ *
+ * gbffootnotes - SWFilter decendant to hide or show footnotes
+ * in a GBF module.
+ */
+
+
+#include <stdlib.h>
+#include <string.h>
+#include <gbffootnotes.h>
+#ifndef __GNUC__
+#else
+#include <unixstr.h>
+#endif
+
+
+const char GBFFootnotes::on[] = "On";
+const char GBFFootnotes::off[] = "Off";
+const char GBFFootnotes::optName[] = "Footnotes";
+const char GBFFootnotes::optTip[] = "Toggles Footnotes On and Off if they exist";
+
+
+GBFFootnotes::GBFFootnotes() {
+ option = false;
+ options.push_back(on);
+ options.push_back(off);
+}
+
+
+GBFFootnotes::~GBFFootnotes() {
+}
+
+void GBFFootnotes::setOptionValue(const char *ival)
+{
+ option = (!stricmp(ival, on));
+}
+
+const char *GBFFootnotes::getOptionValue()
+{
+ return (option) ? on:off;
+}
+
+char GBFFootnotes::ProcessText(char *text, int maxlen, const SWKey *key, const SWModule *module)
+{
+ if (!option) { // if we don't want footnotes
+ char *to, *from, token[4096]; // cheese. Fix.
+ int tokpos = 0;
+ bool intoken = false;
+ int len;
+ bool hide = false;
+
+ len = strlen(text) + 1; // shift string to right of buffer
+ if (len < maxlen) {
+ memmove(&text[maxlen - len], text, len);
+ from = &text[maxlen - len];
+ }
+ else from = text; // -------------------------------
+
+ for (to = text; *from; from++) {
+ if (*from == '<') {
+ intoken = true;
+ tokpos = 0;
+// memset(token, 0, 4096);
+ token[0] = 0;
+ token[1] = 0;
+ token[2] = 0;
+ continue;
+ }
+ if (*from == '>') { // process tokens
+ intoken = false;
+ switch (*token) {
+ case 'R': // Reference
+ switch(token[1]) {
+ case 'F': // Begin footnote
+ hide = true;
+ break;
+ case 'f': // end footnote
+ hide = false;
+ break;
+ }
+ continue; // skip token
+ case 'W':
+ if (token[1] == 'T') {
+ switch (token[2]) {
+ case 'P':
+ case 'S':
+ case 'A':
+ continue; // remove this token
+ default:
+ break;
+ }
+ }
+ }
+ // if not a footnote token, keep token in text
+ if (!hide) {
+ *to++ = '<';
+ for (char *tok = token; *tok; tok++)
+ *to++ = *tok;
+ *to++ = '>';
+ }
+ continue;
+ }
+ if (intoken) {
+ if (tokpos < 4090)
+ token[tokpos++] = *from;
+ token[tokpos+2] = 0; // +2 cuz we init token with 2 extra '0' because of switch statement
+ }
+ else {
+ if (!hide) {
+ *to++ = *from;
+ }
+ }
+ }
+ *to++ = 0;
+ *to = 0;
+ }
+ return 0;
+}
diff --git a/src/modules/filters/gbfheadings.cpp b/src/modules/filters/gbfheadings.cpp
new file mode 100644
index 0000000..590e2fa
--- /dev/null
+++ b/src/modules/filters/gbfheadings.cpp
@@ -0,0 +1,107 @@
+/******************************************************************************
+ *
+ * gbfheadings - SWFilter decendant to hide or show headings
+ * in a GBF module.
+ */
+
+
+#include <stdlib.h>
+#include <string.h>
+#include <gbfheadings.h>
+#ifndef __GNUC__
+#else
+#include <unixstr.h>
+#endif
+
+
+const char GBFHeadings::on[] = "On";
+const char GBFHeadings::off[] = "Off";
+const char GBFHeadings::optName[] = "Headings";
+const char GBFHeadings::optTip[] = "Toggles Headings On and Off if they exist";
+
+
+GBFHeadings::GBFHeadings() {
+ option = false;
+ options.push_back(on);
+ options.push_back(off);
+}
+
+
+GBFHeadings::~GBFHeadings() {
+}
+
+void GBFHeadings::setOptionValue(const char *ival)
+{
+ option = (!stricmp(ival, on));
+}
+
+const char *GBFHeadings::getOptionValue()
+{
+ return (option) ? on:off;
+}
+
+char GBFHeadings::ProcessText(char *text, int maxlen, const SWKey *key, const SWModule *module)
+{
+ if (!option) { // if we don't want headings
+ char *to, *from, token[2048]; // cheese. Fix.
+ int tokpos = 0;
+ bool intoken = false;
+ int len;
+ bool hide = false;
+
+ len = strlen(text) + 1; // shift string to right of buffer
+ if (len < maxlen) {
+ memmove(&text[maxlen - len], text, len);
+ from = &text[maxlen - len];
+ }
+ else from = text; // -------------------------------
+
+ for (to = text; *from; from++) {
+ if (*from == '<') {
+ intoken = true;
+ tokpos = 0;
+// memset(token, 0, 2048);
+ token[0] = 0;
+ token[1] = 0;
+ token[2] = 0;
+ continue;
+ }
+ if (*from == '>') { // process tokens
+ intoken = false;
+ switch (*token) {
+ case 'T': // Reference
+ switch(token[1]) {
+ case 'S': // Begin heading
+ hide = true;
+ break;
+ case 's': // end heading
+ hide = false;
+ break;
+ }
+ continue; // skip token
+ }
+ // if not a heading token, keep token in text
+ if (!hide) {
+ *to++ = '<';
+ for (char *tok = token; *tok; tok++)
+ *to++ = *tok;
+ *to++ = '>';
+ }
+ continue;
+ }
+ if (intoken) {
+ if (tokpos < 2045)
+ token[tokpos++] = *from;
+ token[tokpos+2] = 0;
+ }
+ else {
+ if (!hide) {
+ *to++ = *from;
+ }
+ }
+ }
+ *to++ = 0;
+ *to = 0;
+ }
+ return 0;
+}
diff --git a/src/modules/filters/gbfhtml.cpp b/src/modules/filters/gbfhtml.cpp
new file mode 100644
index 0000000..73d445a
--- /dev/null
+++ b/src/modules/filters/gbfhtml.cpp
@@ -0,0 +1,536 @@
+/***************************************************************************
+ gbfhtml.cpp - description
+ -------------------
+ begin : Thu Jun 24 1999
+ copyright : (C) 1999 by Torsten Uhlmann
+ email : TUhlmann@gmx.de
+ ***************************************************************************/
+
+/***************************************************************************
+ * *
+ * This program is free software; you can redistribute it and/or modify *
+ * it under the terms of the GNU General Public License as published by *
+ * the Free Software Foundation; either version 2 of the License, or *
+ * (at your option) any later version. *
+ * *
+ ***************************************************************************/
+
+#include <stdlib.h>
+#include <string.h>
+#include <gbfhtml.h>
+
+
+GBFHTML::GBFHTML()
+{
+}
+
+
+char GBFHTML::ProcessText(char *text, int maxlen, const SWKey *key, const SWModule *module)
+{
+ char *to, *from, token[2048];
+ int tokpos = 0;
+ bool intoken = false;
+ bool hasFootnotePreTag = false;
+ bool isRightJustified = false;
+ bool isCentered = false;
+ int len;
+ const char *tok;
+
+ len = strlen(text) + 1; // shift string to right of buffer
+ if (len < maxlen) {
+ memmove(&text[maxlen - len], text, len);
+ from = &text[maxlen - len];
+ }
+ else
+ from = text; // -------------------------------
+
+ for (to = text; *from; from++)
+ {
+ if (*from == '\n') {
+ *from = ' ';
+ }
+ if (*from == '<') {
+ intoken = true;
+ tokpos = 0;
+ token[0] = 0;
+ token[1] = 0;
+ token[2] = 0;
+ continue;
+ }
+ if (*from == '>') {
+ intoken = false;
+ // process desired tokens
+ switch (*token) {
+ case 'W': // Strongs
+ switch(token[1])
+ {
+ case 'G': // Greek
+ case 'H': // Hebrew
+ case 'T': // Tense
+ *to++ = ' ';
+ *to++ = '<';
+ *to++ = 's';
+ *to++ = 'm';
+ *to++ = 'a';
+ *to++ = 'l';
+ *to++ = 'l';
+ *to++ = '>';
+ *to++ = '<';
+ *to++ = 'e';
+ *to++ = 'm';
+ *to++ = '>';
+ for (tok = token+2; *tok; tok++)
+ *to++ = *tok;
+ *to++ = '<';
+ *to++ = '/';
+ *to++ = 'e';
+ *to++ = 'm';
+ *to++ = '>';
+ *to++ = '<';
+ *to++ = '/';
+ *to++ = 's';
+ *to++ = 'm';
+ *to++ = 'a';
+ *to++ = 'l';
+ *to++ = 'l';
+ *to++ = '>';
+ *to++ = ' ';
+ continue;
+ }
+ break;
+ case 'R':
+ switch(token[1])
+ {
+ case 'X':
+ *to++ = '<';
+ *to++ = 'a';
+ *to++ = ' ';
+ *to++ = 'h';
+ *to++ = 'r';
+ *to++ = 'e';
+ *to++ = 'f';
+ *to++ = '=';
+ *to++ = '\"';
+ for (tok = token + 3; *tok; tok++) {
+ if(*tok != '<' && *tok+1 != 'R' && *tok+2 != 'x') {
+ *to++ = *tok;
+ }
+ else {
+ break;
+ }
+ }
+ *to++ = '\"';
+ *to++ = '>';
+ continue;
+ case 'x':
+ *to++ = '<';
+ *to++ = '/';
+ *to++ = 'a';
+ *to++ = '>';
+ continue;
+ case 'B': //word(s) explained in footnote
+ *to++ = '<';
+ *to++ = 'i';
+ *to++ = '>';
+ hasFootnotePreTag = true; //we have the RB tag
+ continue;
+ case 'F': // footnote begin
+ if (hasFootnotePreTag) {
+ *to++ = '<';
+ *to++ = '/';
+ *to++ = 'i';
+ *to++ = '>';
+ *to++ = ' ';
+ }
+ *to++ = '<';
+ *to++ = 'f';
+ *to++ = 'o';
+ *to++ = 'n';
+ *to++ = 't';
+ *to++ = ' ';
+ *to++ = 'c';
+ *to++ = 'o';
+ *to++ = 'l';
+ *to++ = 'o';
+ *to++ = 'r';
+ *to++ = '=';
+ *to++ = '\"';
+ *to++ = '#';
+ *to++ = '8';
+ *to++ = '0';
+ *to++ = '0';
+ *to++ = '0';
+ *to++ = '0';
+ *to++ = '0';
+ *to++ = '\"';
+ *to++ = '>';
+
+ *to++ = ' ';
+ *to++ = '<';
+ *to++ = 's';
+ *to++ = 'm';
+ *to++ = 'a';
+ *to++ = 'l';
+ *to++ = 'l';
+ *to++ = '>';
+ *to++ = '(';
+
+ continue;
+ case 'f': // footnote end
+ *to++ = ')';
+ *to++ = '<';
+ *to++ = '/';
+ *to++ = 's';
+ *to++ = 'm';
+ *to++ = 'a';
+ *to++ = 'l';
+ *to++ = 'l';
+ *to++ = '>';
+ *to++ = ' ';
+ *to++ = '<';
+ *to++ = '/';
+ *to++ = 'f';
+ *to++ = 'o';
+ *to++ = 'n';
+ *to++ = 't';
+ *to++ = '>';
+ hasFootnotePreTag = false;
+ continue;
+ }
+ break;
+
+ case 'F': // font tags
+ switch(token[1])
+ {
+ case 'I': // italic start
+ *to++ = '<';
+ *to++ = 'i';
+ *to++ = '>';
+ continue;
+ case 'i': // italic end
+ *to++ = '<';
+ *to++ = '/';
+ *to++ = 'i';
+ *to++ = '>';
+ continue;
+ case 'B': // bold start
+ *to++ = '<';
+ *to++ = 'b';
+ *to++ = '>';
+ continue;
+ case 'b': // bold end
+ *to++ = '<';
+ *to++ = '/';
+ *to++ = 'b';
+ *to++ = '>';
+ continue;
+ case 'R': // words of Jesus begin
+ *to++ = '<';
+ *to++ = 'f';
+ *to++ = 'o';
+ *to++ = 'n';
+ *to++ = 't';
+ *to++ = ' ';
+ *to++ = 'c';
+ *to++ = 'o';
+ *to++ = 'l';
+ *to++ = 'o';
+ *to++ = 'r';
+ *to++ = '=';
+ *to++ = '#';
+ *to++ = 'F';
+ *to++ = 'F';
+ *to++ = '0';
+ *to++ = '0';
+ *to++ = '0';
+ *to++ = '0';
+ *to++ = '>';
+ continue;
+ case 'r': // words of Jesus end
+ *to++ = '<';
+ *to++ = '/';
+ *to++ = 'f';
+ *to++ = 'o';
+ *to++ = 'n';
+ *to++ = 't';
+ *to++ = '>';
+ continue;
+ case 'U': // Underline start
+ *to++ = '<';
+ *to++ = 'u';
+ *to++ = '>';
+ continue;
+ case 'u': // Underline end
+ *to++ = '<';
+ *to++ = '/';
+ *to++ = 'u';
+ *to++ = '>';
+ continue;
+ case 'O': // Old Testament quote begin
+ *to++ = '<';
+ *to++ = 'c';
+ *to++ = 'i';
+ *to++ = 't';
+ *to++ = 'e';
+ *to++ = '>';
+ continue;
+ case 'o': // Old Testament quote end
+ *to++ = '<';
+ *to++ = '/';
+ *to++ = 'c';
+ *to++ = 'i';
+ *to++ = 't';
+ *to++ = 'e';
+ *to++ = '>';
+ continue;
+ case 'S': // Superscript begin
+ *to++ = '<';
+ *to++ = 's';
+ *to++ = 'u';
+ *to++ = 'p';
+ *to++ = '>';
+ continue;
+ case 's': // Superscript end
+ *to++ = '<';
+ *to++ = '/';
+ *to++ = 's';
+ *to++ = 'u';
+ *to++ = 'p';
+ *to++ = '>';
+ continue;
+ case 'V': // Subscript begin
+ *to++ = '<';
+ *to++ = 's';
+ *to++ = 'u';
+ *to++ = 'b';
+ *to++ = '>';
+ continue;
+ case 'v': // Subscript end
+ *to++ = '<';
+ *to++ = '/';
+ *to++ = 's';
+ *to++ = 'u';
+ *to++ = 'b';
+ *to++ = '>';
+ continue;
+ case 'N':
+ *to++ = '<';
+ *to++ = 'f';
+ *to++ = 'o';
+ *to++ = 'n';
+ *to++ = 't';
+ *to++ = ' ';
+ *to++ = 'f';
+ *to++ = 'a';
+ *to++ = 'c';
+ *to++ = 'e';
+ *to++ = '=';
+ *to++ = '"';
+ for (tok = token + 2; *tok; tok++)
+ *to++ = *tok;
+ *to++ = '"';
+ *to++ = '>';
+ continue;
+ case 'n':
+ *to++ = '<';
+ *to++ = '/';
+ *to++ = 'f';
+ *to++ = 'o';
+ *to++ = 'n';
+ *to++ = 't';
+ *to++ = '>';
+ continue;
+ }
+ break;
+ case 'C': // special character tags
+ switch(token[1])
+ {
+ case 'A': // ASCII value
+ *to++ = (char)atoi(&token[2]);
+ continue;
+ case 'G':
+ //*to++ = ' ';
+ continue;
+ case 'L': // line break
+ *to++ = '<';
+ *to++ = 'b';
+ *to++ = 'r';
+ *to++ = ' ';
+ *to++ = '/';
+ *to++ = '>';
+ *to++ = ' ';
+ continue;
+ case 'M': // new paragraph
+ *to++ = '<';
+ *to++ = 'b';
+ *to++ = 'r';
+ *to++ = ' ';
+ *to++ = '/';
+ *to++ = '>';
+ continue;
+ case 'T':
+ //*to++ = ' ';
+ continue;
+ }
+ break;
+ case 'J': //Justification
+ switch(token[1])
+ {
+ case 'R': //right
+ *to++ = '<';
+ *to++ = 'd';
+ *to++ = 'i';
+ *to++ = 'v';
+ *to++ = ' ';
+ *to++ = 'a';
+ *to++ = 'l';
+ *to++ = 'i';
+ *to++ = 'g';
+ *to++ = 'n';
+ *to++ = '=';
+ *to++ = '\"';
+ *to++ = 'r';
+ *to++ = 'i';
+ *to++ = 'g';
+ *to++ = 'h';
+ *to++ = 't';
+ *to++ = '\"';
+ *to++ = '>';
+ isRightJustified = true;
+ continue;
+
+ case 'C': //center
+ *to++ = '<';
+ *to++ = 'd';
+ *to++ = 'i';
+ *to++ = 'v';
+ *to++ = ' ';
+ *to++ = 'a';
+ *to++ = 'l';
+ *to++ = 'i';
+ *to++ = 'g';
+ *to++ = 'n';
+ *to++ = '=';
+ *to++ = '\"';
+ *to++ = 'c';
+ *to++ = 'e';
+ *to++ = 'n';
+ *to++ = 't';
+ *to++ = 'e';
+ *to++ = 'r';
+ *to++ = '\"';
+ *to++ = '>';
+ isCentered = true;
+ continue;
+
+ case 'L': //left, reset right and center
+ if (isCentered) {
+ *to++ = '<';
+ *to++ = '/';
+ *to++ = 'c';
+ *to++ = 'e';
+ *to++ = 'n';
+ *to++ = 't';
+ *to++ = 'e';
+ *to++ = 'r';
+ *to++ = '>';
+ isCentered = false;
+ }
+ if (isRightJustified) {
+ *to++ = '<';
+ *to++ = '/';
+ *to++ = 'd';
+ *to++ = 'i';
+ *to++ = 'v';
+ *to++ = '>';
+ isRightJustified = false;
+ }
+ continue;
+ }
+ break;
+ case 'T': // title formatting
+ switch(token[1])
+ {
+ case 'T': // Book title begin
+ *to++ = '<';
+ *to++ = 'b';
+ *to++ = 'i';
+ *to++ = 'g';
+ *to++ = '>';
+ continue;
+ case 't':
+ *to++ = '<';
+ *to++ = '/';
+ *to++ = 'b';
+ *to++ = 'i';
+ *to++ = 'g';
+ *to++ = '>';
+ continue;/*
+ case 'S':
+ *to++ = '<';
+ *to++ = 'b';
+ *to++ = 'r';
+ *to++ = ' ';
+ *to++ = '/';
+ *to++ = '>';
+ *to++ = '<';
+ *to++ = 'b';
+ *to++ = 'i';
+ *to++ = 'g';
+ *to++ = '>';
+ continue;
+ case 's':
+ *to++ = '<';
+ *to++ = '/';
+ *to++ = 'b';
+ *to++ = 'i';
+ *to++ = 'g';
+ *to++ = '>';
+ *to++ = '<';
+ *to++ = 'b';
+ *to++ = 'r';
+ *to++ = ' ';
+ *to++ = '/';
+ *to++ = '>';
+ continue;*/
+ }
+ break;
+
+ case 'P': // special formatting
+ switch(token[1])
+ {
+ case 'P': // Poetry begin
+ *to++ = '<';
+ *to++ = 'c';
+ *to++ = 'i';
+ *to++ = 't';
+ *to++ = 'e';
+ *to++ = '>';
+ continue;
+ case 'p':
+ *to++ = '<';
+ *to++ = '/';
+ *to++ = 'c';
+ *to++ = 'i';
+ *to++ = 't';
+ *to++ = 'e';
+ *to++ = '>';
+ continue;
+ }
+ break;
+ }
+ continue;
+ }
+ if (intoken) {
+ if (tokpos < 2045) {
+ token[tokpos++] = *from;
+ token[tokpos+2] = 0;
+ }
+ }
+ else
+ *to++ = *from;
+ }
+ *to++ = 0;
+ *to = 0;
+ return 0;
+}
diff --git a/src/modules/filters/gbfhtmlhref.cpp b/src/modules/filters/gbfhtmlhref.cpp
new file mode 100644
index 0000000..30b27ba
--- /dev/null
+++ b/src/modules/filters/gbfhtmlhref.cpp
@@ -0,0 +1,148 @@
+/***************************************************************************
+ gbfhtmlhref.cpp - GBF to HTML filter with hrefs
+ for strongs and morph tags
+ -------------------
+ begin : 2001-09-03
+ copyright : 2001 by CrossWire Bible Society
+ ***************************************************************************/
+
+/***************************************************************************
+ * *
+ * This program is free software; you can redistribute it and/or modify *
+ * it under the terms of the GNU General Public License as published by *
+ * the Free Software Foundation; either version 2 of the License, or *
+ * (at your option) any later version. *
+ * *
+ ***************************************************************************/
+
+#include <stdlib.h>
+#include <string.h>
+#include <gbfhtmlhref.h>
+
+GBFHTMLHREF::GBFHTMLHREF() {
+ setTokenStart("<");
+ setTokenEnd(">");
+
+ setTokenCaseSensitive(true);
+
+ addTokenSubstitute("Rf", ")</small></font>");
+ addTokenSubstitute("Rx", "</a>");
+ addTokenSubstitute("FI", "<i>"); // italics begin
+ addTokenSubstitute("Fi", "</i>");
+ addTokenSubstitute("FB", "<n>"); // bold begin
+ addTokenSubstitute("Fb", "</n>");
+ addTokenSubstitute("FR", "<font color=\"#FF0000\">"); // words of Jesus begin
+ addTokenSubstitute("Fr", "</font>");
+ addTokenSubstitute("FU", "<u>"); // underline begin
+ addTokenSubstitute("Fu", "</u>");
+ addTokenSubstitute("FO", "<cite>"); // Old Testament quote begin
+ addTokenSubstitute("Fo", "</cite>");
+ addTokenSubstitute("FS", "<sup>"); // Superscript begin// Subscript begin
+ addTokenSubstitute("Fs", "</sup>");
+ addTokenSubstitute("FV", "<sub>"); // Subscript begin
+ addTokenSubstitute("Fv", "</sub>");
+ addTokenSubstitute("TT", "<big>"); // Book title begin
+ addTokenSubstitute("Tt", "</big>");
+ addTokenSubstitute("PP", "<cite>"); // poetry begin
+ addTokenSubstitute("Pp", "</cite>");
+ addTokenSubstitute("Fn", "</font>"); // font end
+ addTokenSubstitute("CL", "<br />"); // new line
+ addTokenSubstitute("CM", "<!P><br />"); // paragraph <!P> is a non showing comment that can be changed in the front end to <P> if desired
+ addTokenSubstitute("CG", ""); // ???
+ addTokenSubstitute("CT", ""); // ???
+ addTokenSubstitute("JR", "<div align=\"right\">"); // right align begin
+ addTokenSubstitute("JC", "<div align=\"center\">"); // center align begin
+ addTokenSubstitute("JL", "</div>"); // align end
+
+}
+
+
+bool GBFHTMLHREF::handleToken(char **buf, const char *token, DualStringMap &userData) {
+ const char *tok;
+
+ if (!substituteToken(buf, token)) {
+ if (!strncmp(token, "WG", 2) || !strncmp(token, "WH", 2)) { // strong's numbers
+ pushString(buf, " <small><em>&lt;<a href=\"#");
+ for (tok = token+1; *tok; tok++)
+ //if(token[i] != '\"')
+ *(*buf)++ = *tok;
+ *(*buf)++ = '\"';
+ *(*buf)++ = '>';
+ for (tok = token + 2; *tok; tok++)
+ //if(token[i] != '\"')
+ *(*buf)++ = *tok;
+ pushString(buf, "</a>&gt;</em></small>");
+ }
+
+ else if (!strncmp(token, "WTG", 3) || !strncmp(token, "WTH", 3)) { // strong's numbers tense
+ pushString(buf, " <small><em>(<A HREF=\"#");
+ for (tok = token + 2; *tok; tok++)
+ if(*tok != '\"')
+ *(*buf)++ = *tok;
+ *(*buf)++ = '\"';
+ *(*buf)++ = '>';
+ for (tok = token + 3; *tok; tok++)
+ if(*tok != '\"')
+ *(*buf)++ = *tok;
+ pushString(buf, "</a>)</em></small>");
+ }
+
+ else if (!strncmp(token, "WT", 2) && strncmp(token, "WTH", 3) && strncmp(token, "WTG", 3)) { // morph tags
+ pushString(buf, " <small><em>(<a href=\"M");
+ for (tok = token + 2; *tok; tok++)
+ if(*tok != '\"')
+ *(*buf)++ = *tok;
+ *(*buf)++ = '\"';
+ *(*buf)++ = '>';
+ for (tok = token + 2; *tok; tok++)
+ if(*tok != '\"')
+ *(*buf)++ = *tok;
+ pushString(buf, "</a>)</em></small>");
+ }
+
+ else if (!strncmp(token, "RX", 2)) {
+ pushString(buf, "<a href=\"");
+ for (tok = token + 3; *tok; tok++) {
+ if(*tok != '<' && *tok+1 != 'R' && *tok+2 != 'x') {
+ *(*buf)++ = *tok;
+ }
+ else {
+ break;
+ }
+ }
+ *(*buf)++ = '\"';
+ *(*buf)++ = '>';
+ }
+
+ else if (!strncmp(token, "RB", 2)) {
+ pushString(buf, "<i>");
+ userData["hasFootnotePreTag"] = "true";
+ }
+
+ else if (!strncmp(token, "RF", 2)) {
+ if(userData["hasFootnotePreTag"] == "true") {
+ userData["hasFootnotePreTag"] = "false";
+ pushString(buf, "</i> ");
+ }
+ pushString(buf, "<font color=\"#800000\"><small> (");
+ }
+
+ else if (!strncmp(token, "FN", 2)) {
+ pushString(buf, "<font face=\"");
+ for (tok = token + 2; *tok; tok++)
+ if(*tok != '\"')
+ *(*buf)++ = *tok;
+ *(*buf)++ = '\"';
+ *(*buf)++ = '>';
+ }
+
+ else if (!strncmp(token, "CA", 2)) { // ASCII value
+ *(*buf)++ = (char)atoi(&token[2]);
+ }
+
+ else {
+ return false;
+ }
+ }
+ return true;
+}
diff --git a/src/modules/filters/gbfmorph.cpp b/src/modules/filters/gbfmorph.cpp
new file mode 100644
index 0000000..f8d336e
--- /dev/null
+++ b/src/modules/filters/gbfmorph.cpp
@@ -0,0 +1,98 @@
+/******************************************************************************
+ *
+ * gbfmorph - SWFilter decendant to hide or show morph tags
+ * in a GBF module.
+ */
+
+
+#include <stdlib.h>
+#include <string.h>
+#include <gbfmorph.h>
+#ifndef __GNUC__
+#else
+#include <unixstr.h>
+#endif
+
+
+const char GBFMorph::on[] = "On";
+const char GBFMorph::off[] = "Off";
+const char GBFMorph::optName[] = "Morphological Tags";
+const char GBFMorph::optTip[] = "Toggles Morphological Tags On and Off if they exist";
+
+
+GBFMorph::GBFMorph() {
+ option = false;
+ options.push_back(on);
+ options.push_back(off);
+}
+
+
+GBFMorph::~GBFMorph() {
+}
+
+void GBFMorph::setOptionValue(const char *ival)
+{
+ option = (!stricmp(ival, on));
+}
+
+const char *GBFMorph::getOptionValue()
+{
+ return (option) ? on:off;
+}
+
+char GBFMorph::ProcessText(char *text, int maxlen, const SWKey *key, const SWModule *module)
+{
+ if (!option) { // if we don't want morph tags
+ char *to, *from, token[2048]; // cheese. Fix.
+ int tokpos = 0;
+ bool intoken = false;
+ int len;
+ bool lastspace = false;
+
+ len = strlen(text) + 1; // shift string to right of buffer
+ if (len < maxlen) {
+ memmove(&text[maxlen - len], text, len);
+ from = &text[maxlen - len];
+ }
+ else from = text; // -------------------------------
+
+ for (to = text; *from; from++) {
+ if (*from == '<') {
+ intoken = true;
+ tokpos = 0;
+ token[0] = 0;
+ token[1] = 0;
+ token[2] = 0;
+ continue;
+ }
+ if (*from == '>') { // process tokens
+ intoken = false;
+ if (*token == 'W' && token[1] == 'T') { // Morph
+ if ((from[1] == ' ') || (from[1] == ',') || (from[1] == ';') || (from[1] == '.') || (from[1] == '?') || (from[1] == '!') || (from[1] == ')') || (from[1] == '\'') || (from[1] == '\"')) {
+ if (lastspace)
+ to--;
+ }
+ continue;
+ }
+ // if not a morph tag token, keep token in text
+ *to++ = '<';
+ for (char *tok = token; *tok; tok++)
+ *to++ = *tok;
+ *to++ = '>';
+ continue;
+ }
+ if (intoken) {
+ if (tokpos < 2045)
+ token[tokpos++] = *from;
+ token[tokpos+2] = 0;
+ }
+ else {
+ *to++ = *from;
+ lastspace = (*from == ' ');
+ }
+ }
+ *to++ = 0;
+ *to = 0;
+ }
+ return 0;
+}
diff --git a/src/modules/filters/gbfplain.cpp b/src/modules/filters/gbfplain.cpp
new file mode 100644
index 0000000..65766d3
--- /dev/null
+++ b/src/modules/filters/gbfplain.cpp
@@ -0,0 +1,106 @@
+/******************************************************************************
+ *
+ * gbfplain - SWFilter decendant to strip out all GBF tags or convert to
+ * ASCII rendered symbols.
+ */
+
+
+#include <stdlib.h>
+#include <string.h>
+#include <gbfplain.h>
+
+
+GBFPlain::GBFPlain() {
+}
+
+
+char GBFPlain::ProcessText(char *text, int maxlen, const SWKey *key, const SWModule *module)
+{
+ char *to, *from, token[2048];
+ int tokpos = 0;
+ bool intoken = false;
+ int len;
+
+ len = strlen(text) + 1; // shift string to right of buffer
+ if (len < maxlen) {
+ memmove(&text[maxlen - len], text, len);
+ from = &text[maxlen - len];
+ }
+ else from = text; // -------------------------------
+
+ for (to = text; *from; from++) {
+ if (*from == '<') {
+ intoken = true;
+ tokpos = 0;
+ token[0] = 0;
+ token[1] = 0;
+ token[2] = 0;
+ continue;
+ }
+ if (*from == '>') {
+ intoken = false;
+ // process desired tokens
+ switch (*token) {
+ case 'W': // Strongs
+ switch(token[1]) {
+ case 'G': // Greek
+ case 'H': // Hebrew
+ case 'T': // Tense
+ *to++ = ' ';
+ *to++ = '<';
+ for (char *tok = token + 2; *tok; tok++)
+ *to++ = *tok;
+ *to++ = '>';
+ *to++ = ' ';
+ continue;
+ }
+ break;
+ case 'R':
+ switch(token[1]) {
+ case 'F': // footnote begin
+ *to++ = ' ';
+ *to++ = '[';
+ continue;
+ case 'f': // footnote end
+ *to++ = ']';
+ *to++ = ' ';
+ continue;
+ }
+ break;
+ case 'C':
+ switch(token[1]) {
+ case 'A': // ASCII value
+ *to++ = (char)atoi(&token[2]);
+ continue;
+ case 'G':
+ *to++ = '>';
+ continue;
+/* Bug in WEB
+ case 'L':
+ *to++ = '<';
+ continue;
+*/
+ case 'L': // Bug in WEB. Use above entry when fixed
+ case 'N': // new line
+ *to++ = '\n';
+ continue;
+ case 'M': // new paragraph
+ *to++ = '\n';
+ *to++ = '\n';
+ continue;
+ }
+ break;
+ }
+ continue;
+ }
+ if (intoken) {
+ if (tokpos < 2045)
+ token[tokpos++] = *from;
+ token[tokpos+2] = 0;
+ }
+ else *to++ = *from;
+ }
+ *to++ = 0;
+ *to = 0;
+ return 0;
+}
diff --git a/src/modules/filters/gbfrtf.cpp b/src/modules/filters/gbfrtf.cpp
new file mode 100644
index 0000000..5f7d064
--- /dev/null
+++ b/src/modules/filters/gbfrtf.cpp
@@ -0,0 +1,277 @@
+/******************************************************************************
+ *
+ * gbfrtf - SWFilter decendant to convert all GBF tags to RTF tags
+ */
+
+
+#include <stdlib.h>
+#include <string.h>
+#include <gbfrtf.h>
+#include <ctype.h>
+
+GBFRTF::GBFRTF() {
+}
+
+
+char GBFRTF::ProcessText(char *text, int maxlen, const SWKey *key, const SWModule *module)
+{
+ unsigned char *to, *from;
+ char token[2048];
+ int tokpos = 0;
+ bool intoken = false;
+ int len;
+ const char *tok;
+
+ len = strlen(text) + 1; // shift string to right of buffer
+ if (len < maxlen) {
+ memmove(&text[maxlen - len], text, len);
+ from = (unsigned char *)&text[maxlen - len];
+ }
+ else from = (unsigned char *)text; // -------------------------------
+ for (to = (unsigned char *)text; *from; from++) {
+ if (*from == '<') {
+ intoken = true;
+ tokpos = 0;
+ token[0] = 0;
+ token[1] = 0;
+ token[2] = 0;
+ continue;
+ }
+ if (*from == '>') {
+ intoken = false;
+ // process desired tokens
+ switch (*token) {
+ case 'W': // Strongs
+ switch(token[1]) {
+ case 'G': // Greek
+ case 'H': // Hebrew
+ *to++ = '{';
+ *to++ = '\\';
+ *to++ = 'f';
+ *to++ = 's';
+ *to++ = '1';
+ *to++ = '7';
+ *to++ = ' ';
+ *to++ = '<';
+ for (tok = token + 2; *tok; tok++)
+ *to++ = *tok;
+ *to++ = '>';
+ *to++ = '}';
+ continue;
+
+ case 'T': // Tense
+ *to++ = '{';
+ *to++ = '\\';
+ *to++ = 'f';
+ *to++ = 's';
+ *to++ = '1';
+ *to++ = '7';
+ *to++ = ' ';
+ *to++ = '(';
+ bool separate = false;
+ for (tok = token + 2; *tok; tok++) {
+ if (separate) {
+ *to++ = ';';
+ *to++ = ' ';
+ separate = false;
+ }
+ switch (*tok) {
+ case 'G':
+ case 'H':
+ for (tok++; *tok; tok++) {
+ if (isdigit(*tok)) {
+ *to++ = *tok;
+ separate = true;
+ }
+ else {
+ tok--;
+ break;
+ }
+ }
+ break;
+ default:
+ for (; *tok; tok++) {
+ *to++ = *tok;
+ }
+ }
+ }
+ *to++ = ')';
+ *to++ = '}';
+ continue;
+ }
+ break;
+ case 'R':
+ switch(token[1]) {
+ case 'X':
+ *to++ = '#';
+ continue;
+ case 'x':
+ *to++ = '|';
+ continue;
+ case 'F': // footnote begin
+ *to++ = '{';
+ *to++ = '\\';
+ *to++ = 'i';
+ *to++ = '1';
+ *to++ = ' ';
+ *to++ = '\\';
+ *to++ = 'f';
+ *to++ = 's';
+ *to++ = '1';
+ *to++ = '7';
+ *to++ = ' ';
+ *to++ = '(';
+ continue;
+ case 'f': // footnote end
+ *to++ = ')';
+ *to++ = ' ';
+ *to++ = '}';
+ continue;
+ }
+ break;
+ case 'F': // font tags
+ switch(token[1]) {
+ case 'I': // italic start
+ *to++ = '\\';
+ *to++ = 'i';
+ *to++ = '1';
+ *to++ = ' ';
+ continue;
+ case 'i': // italic end
+ *to++ = '\\';
+ *to++ = 'i';
+ *to++ = '0';
+ *to++ = ' ';
+ continue;
+ case 'B': // bold start
+ *to++ = '\\';
+ *to++ = 'b';
+ *to++ = '1';
+ *to++ = ' ';
+ continue;
+ case 'b': // bold end
+ *to++ = '\\';
+ *to++ = 'b';
+ *to++ = '0';
+ *to++ = ' ';
+ continue;
+ case 'N':
+ *to++ = '{';
+ if (!strnicmp(token+2, "Symbol", 6)) {
+ *to++ = '\\';
+ *to++ = 'f';
+ *to++ = '7';
+ *to++ = ' ';
+ }
+ continue;
+ case 'n':
+ *to++ = '}';
+ continue;
+ case 'S':
+ *to++ = '{';
+ *to++ = '\\';
+ *to++ = 's';
+ *to++ = 'u';
+ *to++ = 'p';
+ *to++ = 'e';
+ *to++ = 'r';
+ *to++ = ' ';
+ continue;
+ case 's':
+ *to++ = '}';
+ continue;
+ case 'R':
+ *to++ = '{';
+ *to++ = '\\';
+ *to++ = 'c';
+ *to++ = 'f';
+ *to++ = '6';
+ *to++ = ' ';
+ continue;
+ case 'r':
+ *to++ = '}';
+ continue;
+ }
+ break;
+ case 'C': // special character tags
+ switch(token[1]) {
+ case 'A': // ASCII value
+ *to++ = (char)atoi(&token[2]);
+ continue;
+ case 'G':
+ *to++ = '>';
+ continue;
+ case 'L': // line break
+ *to++ = '\\';
+ *to++ = 'l';
+ *to++ = 'i';
+ *to++ = 'n';
+ *to++ = 'e';
+ *to++ = ' ';
+ continue;
+ case 'M': // new paragraph
+ *to++ = '\\';
+ *to++ = 'p';
+ *to++ = 'a';
+ *to++ = 'r';
+ *to++ = ' ';
+ continue;
+ case 'T':
+ *to++ = '<';
+ }
+ break;
+ case 'T': // title formatting
+ switch(token[1])
+ {
+ case 'T': // Book title begin
+ *to++ = '{';
+ *to++ = '\\';
+ *to++ = 'f';
+ *to++ = 's';
+ *to++ = '2';
+ *to++ = '2';
+ *to++ = ' ';
+ continue;
+ case 't':
+ *to++ = '}';
+ continue;
+ case 'S':
+ *to++ = '\\';
+ *to++ = 'p';
+ *to++ = 'a';
+ *to++ = 'r';
+ *to++ = ' ';
+ *to++ = '{';
+ *to++ = '\\';
+ *to++ = 'i';
+ *to++ = '1';
+ *to++ = '\\';
+ *to++ = 'b';
+ *to++ = '1';
+ *to++ = ' ';
+ continue;
+ case 's':
+ *to++ = '}';
+ *to++ = '\\';
+ *to++ = 'p';
+ *to++ = 'a';
+ *to++ = 'r';
+ *to++ = ' ';
+ continue;
+ }
+ break;
+
+ }
+ continue;
+ }
+ if (intoken) {
+ if (tokpos < 2045)
+ token[tokpos++] = *from;
+ token[tokpos+2] = 0;
+ }
+ else *to++ = *from;
+ }
+ *to++ = 0;
+ *to = 0;
+ return 0;
+}
diff --git a/src/modules/filters/gbfstrongs.cpp b/src/modules/filters/gbfstrongs.cpp
new file mode 100644
index 0000000..40fc958
--- /dev/null
+++ b/src/modules/filters/gbfstrongs.cpp
@@ -0,0 +1,98 @@
+/******************************************************************************
+ *
+ * gbfstrongs - SWFilter decendant to hide or show strongs number
+ * in a GBF module.
+ */
+
+
+#include <stdlib.h>
+#include <string.h>
+#include <gbfstrongs.h>
+#ifndef __GNUC__
+#else
+#include <unixstr.h>
+#endif
+
+
+const char GBFStrongs::on[] = "On";
+const char GBFStrongs::off[] = "Off";
+const char GBFStrongs::optName[] = "Strong's Numbers";
+const char GBFStrongs::optTip[] = "Toggles Strong's Numbers On and Off if they exist";
+
+
+GBFStrongs::GBFStrongs() {
+ option = false;
+ options.push_back(on);
+ options.push_back(off);
+}
+
+
+GBFStrongs::~GBFStrongs() {
+}
+
+void GBFStrongs::setOptionValue(const char *ival)
+{
+ option = (!stricmp(ival, on));
+}
+
+const char *GBFStrongs::getOptionValue()
+{
+ return (option) ? on:off;
+}
+
+char GBFStrongs::ProcessText(char *text, int maxlen, const SWKey *key, const SWModule *module)
+{
+ if (!option) { // if we don't want strongs
+ char *to, *from, token[2048]; // cheese. Fix.
+ int tokpos = 0;
+ bool intoken = false;
+ int len;
+ bool lastspace = false;
+
+ len = strlen(text) + 1; // shift string to right of buffer
+ if (len < maxlen) {
+ memmove(&text[maxlen - len], text, len);
+ from = &text[maxlen - len];
+ }
+ else from = text; // -------------------------------
+
+ for (to = text; *from; from++) {
+ if (*from == '<') {
+ intoken = true;
+ tokpos = 0;
+ token[0] = 0;
+ token[1] = 0;
+ token[2] = 0;
+ continue;
+ }
+ if (*from == '>') { // process tokens
+ intoken = false;
+ if (*token == 'W' && (token[1] == 'G' || token[1] == 'H')) { // Strongs
+ if ((from[1] == ' ') || (from[1] == ',') || (from[1] == ';') || (from[1] == '.') || (from[1] == '?') || (from[1] == '!') || (from[1] == ')') || (from[1] == '\'') || (from[1] == '\"')) {
+ if (lastspace)
+ to--;
+ }
+ continue;
+ }
+ // if not a strongs token, keep token in text
+ *to++ = '<';
+ for (char *tok = token; *tok; tok++)
+ *to++ = *tok;
+ *to++ = '>';
+ continue;
+ }
+ if (intoken) {
+ if (tokpos < 2045)
+ token[tokpos++] = *from;
+ token[tokpos+2] = 0;
+ }
+ else {
+ *to++ = *from;
+ lastspace = (*from == ' ');
+ }
+ }
+ *to++ = 0;
+ *to = 0;
+ }
+ return 0;
+}
diff --git a/src/modules/filters/gbfthml.cpp b/src/modules/filters/gbfthml.cpp
new file mode 100644
index 0000000..ca03e71
--- /dev/null
+++ b/src/modules/filters/gbfthml.cpp
@@ -0,0 +1,463 @@
+/***************************************************************************
+ gbfthml.cpp - GBF to ThML filter
+ -------------------
+ begin : 1999-10-27
+ copyright : 2001 by CrossWire Bible Society
+ ***************************************************************************/
+
+/***************************************************************************
+ * *
+ * This program is free software; you can redistribute it and/or modify *
+ * it under the terms of the GNU General Public License as published by *
+ * the Free Software Foundation; either version 2 of the License, or *
+ * (at your option) any later version. *
+ * *
+ ***************************************************************************/
+
+#include <stdlib.h>
+#include <string.h>
+#include <gbfthml.h>
+
+
+GBFThML::GBFThML()
+{
+}
+
+
+char GBFThML::ProcessText(char *text, int maxlen)
+{
+ char *to, *from, token[2048];
+ int tokpos = 0;
+ bool intoken = false;
+ int len;
+ const char *tok;
+
+ len = strlen(text) + 1; // shift string to right of buffer
+ if (len < maxlen) {
+ memmove(&text[maxlen - len], text, len);
+ from = &text[maxlen - len];
+ }
+ else from = text; // -------------------------------
+
+ for (to = text; *from; from++)
+ {
+ if (*from == '<') {
+ intoken = true;
+ tokpos = 0;
+ token[0] = 0;
+ token[1] = 0;
+ token[2] = 0;
+ continue;
+ }
+ if (*from == '>')
+ {
+ intoken = false;
+ // process desired tokens
+ switch (*token) {
+ case 'W': // Strongs
+ switch(token[1]) {
+ case 'G':
+ case 'H':
+ *to++ = '<';
+ *to++ = 's';
+ *to++ = 'y';
+ *to++ = 'n';
+ *to++ = 'c';
+ *to++ = ' ';
+ *to++ = 't';
+ *to++ = 'y';
+ *to++ = 'p';
+ *to++ = 'e';
+ *to++ = '=';
+ *to++ = '"';
+ *to++ = 'S';
+ *to++ = 't';
+ *to++ = 'r';
+ *to++ = 'o';
+ *to++ = 'n';
+ *to++ = 'g';
+ *to++ = 's';
+ *to++ = '"';
+ *to++ = ' ';
+ *to++ = 'v';
+ *to++ = 'a';
+ *to++ = 'l';
+ *to++ = 'u';
+ *to++ = 'e';
+ *to++ = '=';
+ *to++ = '"';
+ for (tok = token + 1; *tok; tok++)
+ *to++ = *tok;
+ *to++ = '"';
+ *to++ = ' ';
+ *to++ = '/';
+ *to++ = '>';
+ continue;
+
+ case 'T': // Tense
+ *to++ = '<';
+ *to++ = 's';
+ *to++ = 'y';
+ *to++ = 'n';
+ *to++ = 'c';
+ *to++ = ' ';
+ *to++ = 't';
+ *to++ = 'y';
+ *to++ = 'p';
+ *to++ = 'e';
+ *to++ = '=';
+ *to++ = '"';
+ *to++ = 'M';
+ *to++ = 'o';
+ *to++ = 'r';
+ *to++ = 'p';
+ *to++ = 'h';
+ *to++ = '"';
+ *to++ = ' ';
+ *to++ = 'v';
+ *to++ = 'a';
+ *to++ = 'l';
+ *to++ = 'u';
+ *to++ = 'e';
+ *to++ = '=';
+ *to++ = '"';
+ for (tok = token + 2; *tok; tok++)
+ *to++ = *tok;
+ *to++ = '"';
+ *to++ = ' ';
+ *to++ = '/';
+ *to++ = '>';
+ continue;
+ }
+ break;
+ case 'R':
+ switch(token[1])
+ {
+ case 'X':
+ *to++ = '<';
+ *to++ = 'a';
+ *to++ = ' ';
+ *to++ = 'h';
+ *to++ = 'r';
+ *to++ = 'e';
+ *to++ = 'f';
+ *to++ = '=';
+ *to++ = '\"';
+ for (tok = token + 3; *tok; tok++) {
+ if(*tok != '<' && *tok+1 != 'R' && *tok+2 != 'x') {
+ *to++ = *tok;
+ }
+ else {
+ break;
+ }
+ }
+ *to++ = '\"';
+ *to++ = '>';
+ continue;
+ case 'x':
+ *to++ = '<';
+ *to++ = '/';
+ *to++ = 'a';
+ *to++ = '>';
+ continue;
+ case 'F': // footnote begin
+ *to++ = '<';
+ *to++ = 'n';
+ *to++ = 'o';
+ *to++ = 't';
+ *to++ = 'e';
+ *to++ = ' ';
+ *to++ = 'p';
+ *to++ = 'l';
+ *to++ = 'a';
+ *to++ = 'c';
+ *to++ = 'e';
+ *to++ = '=';
+ *to++ = '"';
+ *to++ = 'f';
+ *to++ = 'o';
+ *to++ = 'o';
+ *to++ = 't';
+ *to++ = '"';
+ *to++ = '>';
+ continue;
+ case 'f': // footnote end
+ *to++ = '<';
+ *to++ = '/';
+ *to++ = 'n';
+ *to++ = 'o';
+ *to++ = 't';
+ *to++ = 'e';
+ *to++ = '>';
+ continue;
+ }
+ break;
+ case 'F': // font tags
+ switch(token[1])
+ {
+ case 'N':
+ *to++ = '<';
+ *to++ = 'f';
+ *to++ = 'o';
+ *to++ = 'n';
+ *to++ = 't';
+ *to++ = ' ';
+ *to++ = 'f';
+ *to++ = 'a';
+ *to++ = 'c';
+ *to++ = 'e';
+ *to++ = '=';
+ *to++ = '"';
+ for (tok = token + 2; *tok; tok++)
+ *to++ = *tok;
+ *to++ = '"';
+ *to++ = '>';
+ continue;
+ case 'n':
+ *to++ = '<';
+ *to++ = '/';
+ *to++ = 'f';
+ *to++ = 'o';
+ *to++ = 'n';
+ *to++ = 't';
+ *to++ = '>';
+ continue;
+ case 'I': // italic start
+ *to++ = '<';
+ *to++ = 'i';
+ *to++ = '>';
+ continue;
+ case 'i': // italic end
+ *to++ = '<';
+ *to++ = '/';
+ *to++ = 'i';
+ *to++ = '>';
+ continue;
+ case 'B': // bold start
+ *to++ = '<';
+ *to++ = 'b';
+ *to++ = '>';
+ continue;
+ case 'b': // bold end
+ *to++ = '<';
+ *to++ = '/';
+ *to++ = 'b';
+ *to++ = '>';
+ continue;
+
+ case 'R': // words of Jesus begin
+ *to++ = '<';
+ *to++ = 'f';
+ *to++ = 'o';
+ *to++ = 'n';
+ *to++ = 't';
+ *to++ = ' ';
+ *to++ = 'c';
+ *to++ = 'o';
+ *to++ = 'l';
+ *to++ = 'o';
+ *to++ = 'r';
+ *to++ = '=';
+ *to++ = '\"';
+ *to++ = '#';
+ *to++ = 'f';
+ *to++ = 'f';
+ *to++ = '0';
+ *to++ = '0';
+ *to++ = '0';
+ *to++ = '0';
+ *to++ = '\"';
+ *to++ = '>';
+ continue;
+ case 'r': // words of Jesus end
+ *to++ = '<';
+ *to++ = '/';
+ *to++ = 'f';
+ *to++ = 'o';
+ *to++ = 'n';
+ *to++ = 't';
+ *to++ = '>';
+ continue;
+ case 'U': // Underline start
+ *to++ = '<';
+ *to++ = 'u';
+ *to++ = '>';
+ continue;
+ case 'u': // Underline end
+ *to++ = '<';
+ *to++ = '/';
+ *to++ = 'u';
+ *to++ = '>';
+ continue;
+ case 'O': // Old Testament quote begin
+ *to++ = '<';
+ *to++ = 'c';
+ *to++ = 'i';
+ *to++ = 't';
+ *to++ = 'e';
+ *to++ = '>';
+ continue;
+ case 'o': // Old Testament quote end
+ *to++ = '<';
+ *to++ = '/';
+ *to++ = 'c';
+ *to++ = 'i';
+ *to++ = 't';
+ *to++ = 'e';
+ *to++ = '>';
+ continue;
+ case 'S': // Superscript begin
+ *to++ = '<';
+ *to++ = 's';
+ *to++ = 'u';
+ *to++ = 'p';
+ *to++ = '>';
+ continue;
+ case 's': // Superscript end
+ *to++ = '<';
+ *to++ = '/';
+ *to++ = 's';
+ *to++ = 'u';
+ *to++ = 'p';
+ *to++ = '>';
+ continue;
+ case 'V': // Subscript begin
+ *to++ = '<';
+ *to++ = 's';
+ *to++ = 'u';
+ *to++ = 'b';
+ *to++ = '>';
+ continue;
+ case 'v': // Subscript end
+ *to++ = '<';
+ *to++ = '/';
+ *to++ = 's';
+ *to++ = 'u';
+ *to++ = 'b';
+ *to++ = '>';
+ continue;
+ }
+ break;
+ case 'C': // special character tags
+ switch(token[1])
+ {
+ case 'A': // ASCII value
+ *to++ = (char)atoi(&token[2]);
+ continue;
+ case 'G':
+ //*to++ = ' ';
+ continue;
+ case 'L': // line break
+ *to++ = '<';
+ *to++ = 'b';
+ *to++ = 'r';
+ *to++ = ' ';
+ *to++ = '/';
+ *to++ = '>';
+ *to++ = ' ';
+ continue;
+ case 'M': // new paragraph
+ *to++ = '<';
+ *to++ = 'p';
+ *to++ = ' ';
+ *to++ = '/';
+ *to++ = '>';
+ continue;
+ case 'T':
+ //*to++ = ' ';
+ continue;
+ }
+ break;
+ case 'T': // title formatting
+ switch(token[1])
+ {
+ case 'T': // Book title begin
+ *to++ = '<';
+ *to++ = 'b';
+ *to++ = 'i';
+ *to++ = 'g';
+ *to++ = '>';
+ continue;
+ case 't':
+ *to++ = '<';
+ *to++ = '/';
+ *to++ = 'b';
+ *to++ = 'i';
+ *to++ = 'g';
+ *to++ = '>';
+ continue;
+ case 'S':
+ *to++ = '<';
+ *to++ = 'd';
+ *to++ = 'i';
+ *to++ = 'v';
+ *to++ = ' ';
+ *to++ = 'c';
+ *to++ = 'l';
+ *to++ = 'a';
+ *to++ = 's';
+ *to++ = 's';
+ *to++ = '=';
+ *to++ = '\"';
+ *to++ = 's';
+ *to++ = 'e';
+ *to++ = 'c';
+ *to++ = 'h';
+ *to++ = 'e';
+ *to++ = 'a';
+ *to++ = 'd';
+ *to++ = '\"';
+ *to++ = '>';
+ continue;
+ case 's':
+ *to++ = '<';
+ *to++ = '/';
+ *to++ = 'd';
+ *to++ = 'i';
+ *to++ = 'v';
+ *to++ = '>';
+ continue;
+ }
+ break;
+
+ case 'P': // special formatting
+ switch(token[1])
+ {
+ case 'P': // Poetry begin
+ *to++ = '<';
+ *to++ = 'v';
+ *to++ = 'e';
+ *to++ = 'r';
+ *to++ = 's';
+ *to++ = 'e';
+ *to++ = '>';
+ continue;
+ case 'p':
+ *to++ = '<';
+ *to++ = '/';
+ *to++ = 'v';
+ *to++ = 'e';
+ *to++ = 'r';
+ *to++ = 's';
+ *to++ = 'e';
+ *to++ = '>';
+ continue;
+ }
+ break;
+ }
+ continue;
+ }
+ if (intoken) {
+ if (tokpos < 2045)
+ token[tokpos++] = *from;
+ token[tokpos+2] = 0;
+ }
+ else *to++ = *from;
+ }
+ *to++ = 0;
+ *to = 0;
+ return 0;
+}
+
+
+
diff --git a/src/modules/filters/greeklexattribs.cpp b/src/modules/filters/greeklexattribs.cpp
new file mode 100644
index 0000000..fb166df
--- /dev/null
+++ b/src/modules/filters/greeklexattribs.cpp
@@ -0,0 +1,58 @@
+/******************************************************************************
+ *
+ * greeklexattribs - SWFilter decendant to set entry attributes for greek
+ * lexicons
+ */
+
+
+#include <stdlib.h>
+#include <string.h>
+#include <greeklexattribs.h>
+#include <swmodule.h>
+
+
+GreekLexAttribs::GreekLexAttribs() {
+}
+
+
+char GreekLexAttribs::ProcessText(char *text, int maxlen, const SWKey *key, const SWModule *module) {
+
+ if (module->isProcessEntryAttributes()) {
+ char *from;
+ bool inAV = false;
+ string phrase;
+ string freq;
+ char *currentPhrase = 0;
+
+
+ for (from = text; *from; from++) {
+ if (inAV) {
+ if (currentPhrase == 0) {
+ if (isalpha(*from))
+ currentPhrase = from;
+ }
+ else {
+ if ((!isalpha(*from)) && (*from != ' ')) {
+ phrase = "";
+ phrase.append(currentPhrase, (int)(from - currentPhrase)-1);
+ currentPhrase = from;
+ while (*from && isdigit(*from)) from++;
+ freq = "";
+ freq.append(currentPhrase, (int)(from - currentPhrase));
+ module->getEntryAttributes()["AVPhrase"][phrase]["Frequency"] = freq;
+ currentPhrase = 0;
+ }
+ }
+ if (*from == ';') inAV = false;
+
+ }
+ else if (!strncmp(from, "AV-", 3)) {
+ inAV = true;
+ from+=2;
+ }
+ }
+ }
+ return 0;
+}
+
+
diff --git a/src/modules/filters/latin1utf16.cpp b/src/modules/filters/latin1utf16.cpp
new file mode 100644
index 0000000..75ee998
--- /dev/null
+++ b/src/modules/filters/latin1utf16.cpp
@@ -0,0 +1,120 @@
+/******************************************************************************
+ *
+ * Latin1UTF16 - SWFilter decendant to convert a Latin-1 character to UTF-16
+ *
+ */
+
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <latin1utf16.h>
+
+Latin1UTF16::Latin1UTF16() {
+}
+
+
+char Latin1UTF16::ProcessText(char *text, int maxlen, const SWKey *key, const SWModule *module)
+{
+ unsigned char *from;
+ unsigned short *to;
+ int len;
+
+ len = strlen(text) + 1; // shift string to right of buffer
+ if (len < maxlen) {
+ memmove(&text[maxlen - len], text, len);
+ from = (unsigned char*)&text[maxlen - len];
+ }
+ else
+ from = (unsigned char*)text;
+ // -------------------------------
+
+ for (to = (unsigned short*)text; *from; from++) {
+ switch (*from) {
+ case 0x80: // '€'
+ *to++ = 0x20AC;
+ break;
+ case 0x82: // '‚'
+ *to++ = 0x201A;
+ break;
+ case 0x83: // 'ƒ'
+ *to++ = 0x0192;
+ break;
+ case 0x84: // '„'
+ *to++ = 0x201E;
+ break;
+ case 0x85: // '…'
+ *to++ = 0x2026;
+ break;
+ case 0x86: // '†'
+ *to++ = 0x2020;
+ break;
+ case 0x87: // '‡'
+ *to++ = 0x2021;
+ break;
+ case 0x88: // 'ˆ'
+ *to++ = 0x02C6;
+ break;
+ case 0x89: // '‰'
+ *to++ = 0x2030;
+ break;
+ case 0x8A: // 'Š'
+ *to++ = 0x0160;
+ break;
+ case 0x8B: // '‹'
+ *to++ = 0x2039;
+ break;
+ case 0x8C: // 'Œ'
+ *to++ = 0x0152;
+ break;
+ case 0x8E: // 'Ž'
+ *to++ = 0x017D;
+ break;
+ case 0x91: // '‘'
+ *to++ = 0x2018;
+ break;
+ case 0x92: // '’'
+ *to++ = 0x2019;
+ break;
+ case 0x93: // '“'
+ *to++ = 0x201C;
+ break;
+ case 0x94: // '”'
+ *to++ = 0x201D;
+ break;
+ case 0x95: // '•'
+ *to++ = 0x2022;
+ break;
+ case 0x96: // '–'
+ *to++ = 0x2013;
+ break;
+ case 0x97: // '—'
+ *to++ = 0x2014;
+ break;
+ case 0x98: // '˜'
+ *to++ = 0x02DC;
+ break;
+ case 0x99: // '™'
+ *to++ = 0x2122;
+ break;
+ case 0x9A: // 'š'
+ *to++ = 0x0161;
+ break;
+ case 0x9B: // '›'
+ *to++ = 0x203A;
+ break;
+ case 0x9C: // 'œ'
+ *to++ = 0x0153;
+ break;
+ case 0x9E: // 'ž'
+ *to++ = 0x017E;
+ break;
+ case 0x9F: // 'Ÿ'
+ *to++ = 0x0178;
+ break;
+ default:
+ *to++ = (unsigned short)*from;
+ }
+ }
+ *to = 0;
+ return 0;
+}
diff --git a/src/modules/filters/latin1utf8.cpp b/src/modules/filters/latin1utf8.cpp
new file mode 100644
index 0000000..91af8dc
--- /dev/null
+++ b/src/modules/filters/latin1utf8.cpp
@@ -0,0 +1,179 @@
+/******************************************************************************
+ *
+ * Latin1UTF8 - SWFilter decendant to convert a Latin-1 character to UTF-8
+ *
+ */
+
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <latin1utf8.h>
+#include <swmodule.h>
+
+Latin1UTF8::Latin1UTF8() {
+}
+
+
+char Latin1UTF8::ProcessText(char *text, int maxlen, const SWKey *key, const SWModule *module)
+{
+ unsigned char *to, *from;
+ int len;
+
+ len = strlen(text) + 1;
+ if (len == maxlen + 1)
+ maxlen = (maxlen + 1) * FILTERPAD;
+ // shift string to right of buffer
+ if (len < maxlen) {
+ memmove(&text[maxlen - len], text, len);
+ from = (unsigned char*)&text[maxlen - len];
+ }
+ else
+ from = (unsigned char*)text; // -------------------------------
+
+
+
+ for (to = (unsigned char*)text; *from; from++) {
+ if (*from < 0x80) {
+ *to++ = *from;
+ }
+ else if (*from < 0xc0) {
+ switch(*from) {
+ case 0x80: // '€'
+ *to++ = 0xe2; // 'â'
+ *to++ = 0x82; // '‚'
+ *to++ = 0xac; // '¬'
+ break;
+ case 0x82: // '‚'
+ *to++ = 0xe2; // 'â'
+ *to++ = 0x80; // '€'
+ *to++ = 0x9a; // 'š'
+ break;
+ case 0x83: // 'ƒ'
+ *to++ = 0xc6; // 'Æ'
+ *to++ = 0x92; // '’'
+ break;
+ case 0x84: // '„'
+ *to++ = 0xe2; // 'â'
+ *to++ = 0x80; // '€'
+ *to++ = 0x9e; // 'ž'
+ break;
+ case 0x85: // '…'
+ *to++ = 0xe2; // 'â'
+ *to++ = 0x80; // '€'
+ *to++ = 0xa6; // '¦'
+ break;
+ case 0x86: // '†'
+ *to++ = 0xe2; // 'â'
+ *to++ = 0x80; // '€'
+ *to++ = 0xa0; // ' '
+ break;
+ case 0x87: // '‡'
+ *to++ = 0xe2; // 'â'
+ *to++ = 0x80; // '€'
+ *to++ = 0xa1; // '¡'
+ break;
+ case 0x88: // 'ˆ'
+ *to++ = 0xcb; // 'Ë'
+ *to++ = 0x86; // '†'
+ break;
+ case 0x89: // '‰'
+ *to++ = 0xe2; // 'â'
+ *to++ = 0x80; // '€'
+ *to++ = 0xb0; // '°'
+ break;
+ case 0x8A: // 'Š'
+ *to++ = 0xc5; // 'Å'
+ *to++ = 0xa0; // ' '
+ break;
+ case 0x8B: // '‹'
+ *to++ = 0xe2; // 'â'
+ *to++ = 0x80; // '€'
+ *to++ = 0xb9; // '¹'
+ break;
+ case 0x8C: // 'Œ'
+ *to++ = 0xc5; // 'Å'
+ *to++ = 0x92; // '’'
+ break;
+ case 0x8E: // 'Ž'
+ *to++ = 0xc5; // 'Å'
+ *to++ = 0xbd; // '½'
+ break;
+ case 0x91: // '‘'
+ *to++ = 0xe2; // 'â'
+ *to++ = 0x80; // '€'
+ *to++ = 0x98; // '˜'
+ break;
+ case 0x92: // '’'
+ *to++ = 0xe2; // 'â'
+ *to++ = 0x80; // '€'
+ *to++ = 0x99; // '™'
+ break;
+ case 0x93: // '“'
+ *to++ = 0xe2; // 'â'
+ *to++ = 0x80; // '€'
+ *to++ = 0x9c; // 'œ'
+ break;
+ case 0x94: // '”'
+ *to++ = 0xe2; // 'â'
+ *to++ = 0x80; // '€'
+ *to++ = 0x9d; // ''
+ break;
+ case 0x95: // '•'
+ *to++ = 0xe2; // 'â'
+ *to++ = 0x80; // '€'
+ *to++ = 0xa2; // '¢'
+ break;
+ case 0x96: // '–'
+ *to++ = 0xe2; // 'â'
+ *to++ = 0x80; // '€'
+ *to++ = 0x93; // '“'
+ break;
+ case 0x97: // '—'
+ *to++ = 0xe2; // 'â'
+ *to++ = 0x80; // '€'
+ *to++ = 0x94; // '”'
+ break;
+ case 0x98: // '˜'
+ *to++ = 0xcb; // 'Ë'
+ *to++ = 0x9c; // 'œ'
+ break;
+ case 0x99: // '™'
+ *to++ = 0xe2; // 'â'
+ *to++ = 0x84; // '„'
+ *to++ = 0xa2; // '¢'
+ break;
+ case 0x9A: // 'š'
+ *to++ = 0xc5; // 'Å'
+ *to++ = 0xa1; // '¡'
+ break;
+ case 0x9B: // '›'
+ *to++ = 0xe2; // 'â'
+ *to++ = 0x80; // '€'
+ *to++ = 0xba; // 'º'
+ break;
+ case 0x9C: // 'œ'
+ *to++ = 0xc5; // 'Å'
+ *to++ = 0x93; // '“'
+ break;
+ case 0x9E: // 'ž'
+ *to++ = 0xc5; // 'Å'
+ *to++ = 0xbe; // '¾'
+ break;
+ case 0x9F: // 'Ÿ'
+ *to++ = 0xc5; // 'Å'
+ *to++ = 0xb8; // '¸'
+ break;
+ default:
+ *to++ = 0xC2;
+ *to++ = *from;
+ }
+ }
+ else {
+ *to++ = 0xC3;
+ *to++ = (*from - 0x40);
+ }
+ }
+ *to++ = 0;
+ *to = 0;
+ return 0;
+}
diff --git a/src/modules/filters/plainfootnotes.cpp b/src/modules/filters/plainfootnotes.cpp
new file mode 100644
index 0000000..96fc4d8
--- /dev/null
+++ b/src/modules/filters/plainfootnotes.cpp
@@ -0,0 +1,102 @@
+/***************************************************************************
+ plainfootnotes.cpp - description
+ -------------------
+ begin : Wed Oct 13 1999
+ copyright : (C) 1999 by The team of BibleTime
+ email : info@bibletime.de
+ ***************************************************************************/
+
+/***************************************************************************
+ * *
+ * This program is free software; you can redistribute it and/or modify *
+ * it under the terms of the GNU General Public License as published by *
+ * the Free Software Foundation; either version 2 of the License, or *
+ * (at your option) any later version. *
+ * *
+ ***************************************************************************/
+
+#include <plainfootnotes.h>
+#include <swkey.h>
+
+#include <stdlib.h>
+#include <string.h>
+#ifndef __GNUC__
+#else
+#include <unixstr.h>
+#endif
+
+const char PLAINFootnotes::on[] = "On";
+const char PLAINFootnotes::off[] = "Off";
+const char PLAINFootnotes::optName[] = "Footnotes";
+const char PLAINFootnotes::optTip[] = "Toggles Footnotes On and Off In Bible Texts If They Exist";
+
+PLAINFootnotes::PLAINFootnotes(){
+ option = false;
+ options.push_back(on);
+ options.push_back(off);
+}
+
+PLAINFootnotes::~PLAINFootnotes(){
+}
+
+
+void PLAINFootnotes::setOptionValue(const char *ival)
+{
+ option = (!stricmp(ival, on));
+}
+
+const char *PLAINFootnotes::getOptionValue()
+{
+ return (option) ? on:off;
+}
+
+
+char PLAINFootnotes::ProcessText(char *text, int maxlen, const SWKey *key, const SWModule *module)
+{
+ char token[2048];
+ int tokpos = 0;
+ bool intoken = false;
+ bool lastspace = false;
+
+ if (!option) { // if we don't want footnotes
+ char *to, *from;
+ int len;
+ bool hide = false;
+
+ len = strlen(text) + 1; // shift string to right of buffer
+ if (len < maxlen)
+ {
+ memmove(&text[maxlen - len], text, len);
+ from = &text[maxlen - len];
+ }
+ else from = text; // -------------------------------
+
+ for (to = text; *from; from++) {
+ if (*from == '{') // Footnote start
+ {
+ hide = true;
+ continue;
+ }
+ if (*from == '}') // Footnote end
+ {
+ hide=false;
+ continue;
+ }
+ if (intoken) {
+ if (tokpos < 2045)
+ token[tokpos++] = *from;
+ token[tokpos+2] = 0;
+ }
+ else {
+ if (!hide) {
+ *to++ = *from;
+ lastspace = (*from == ' ');
+ }
+ }
+ }
+ *to++ = 0;
+ *to = 0;
+ }
+ return 0;
+}
+
diff --git a/src/modules/filters/plainhtml.cpp b/src/modules/filters/plainhtml.cpp
new file mode 100644
index 0000000..fefb029
--- /dev/null
+++ b/src/modules/filters/plainhtml.cpp
@@ -0,0 +1,134 @@
+/***************************************************************************
+ rwphtml.cpp - description
+ -------------------
+ begin : Thu Jun 24 1999
+ copyright : (C) 1999 by Torsten Uhlmann
+ email : TUhlmann@gmx.de
+ ***************************************************************************/
+
+/***************************************************************************
+ * *
+ * This program is free software; you can redistribute it and/or modify *
+ * it under the terms of the GNU General Public License as published by *
+ * the Free Software Foundation; either version 2 of the License, or *
+ * (at your option) any later version. *
+ * *
+ ***************************************************************************/
+
+#include <stdlib.h>
+#include <string.h>
+#include <plainhtml.h>
+
+
+PLAINHTML::PLAINHTML()
+{
+}
+
+
+char PLAINHTML::ProcessText(char *text, int maxlen, const SWKey *key, const SWModule *module)
+{
+ char *to, *from;
+ int len;
+ int count = 0;
+
+ len = strlen(text) + 1; // shift string to right of buffer
+ if (len < maxlen) {
+ memmove(&text[maxlen - len], text, len);
+ from = &text[maxlen - len];
+ }
+ else from = text; // -------------------------------
+ for (to = text; *from; from++)
+ {
+ if ((*from == '\n') && (from[1] == '\n')) // paragraph
+ {
+ *to++ = '<';
+ *to++ = 'P';
+ *to++ = '>';
+ from++;
+ continue;
+ } else {
+ if ((*from == '\n')) // && (from[1] != '\n')) // new line
+ {
+ *to++ = '<';
+ *to++ = 'B';
+ *to++ = 'R';
+ *to++ = '>';
+ continue;
+ }
+ }
+
+ if (*from == '{') {
+ *to++ = '<';
+ *to++ = 'F';
+ *to++ = 'O';
+ *to++ = 'N';
+ *to++ = 'T';
+ *to++ = ' ';
+ *to++ = 'C';
+ *to++ = 'O';
+ *to++ = 'L';
+ *to++ = 'O';
+ *to++ = 'R';
+ *to++ = '=';
+ *to++ = '#';
+ *to++ = '8';
+ *to++ = '0';
+ *to++ = '0';
+ *to++ = '0';
+ *to++ = '0';
+ *to++ = '0';
+ *to++ = '>';
+
+ *to++ = '<';
+ *to++ = 'S';
+ *to++ = 'M';
+ *to++ = 'A';
+ *to++ = 'L';
+ *to++ = 'L';
+ *to++ = '>';
+ *to++ = ' ';
+ *to++ = '(';
+ continue;
+ }
+
+ if (*from == '}')
+ {
+ *to++ = ')';
+ *to++ = ' ';
+ *to++ = '<';
+ *to++ = '/';
+ *to++ = 'S';
+ *to++ = 'M';
+ *to++ = 'A';
+ *to++ = 'L';
+ *to++ = 'L';
+ *to++ = '>';
+
+ *to++ = '<';
+ *to++ = '/';
+ *to++ = 'F';
+ *to++ = 'O';
+ *to++ = 'N';
+ *to++ = 'T';
+ *to++ = '>';
+ continue;
+ }
+
+ if ((*from == ' ') && (count > 5000))
+ {
+ *to++ = '<';
+ *to++ = 'W';
+ *to++ = 'B';
+ *to++ = 'R';
+ *to++ = '>';
+ count = 0;
+ continue;
+ }
+
+ *to++ = *from;
+ count++;
+ }
+ *to++ = 0;
+ *to = 0;
+ return 0;
+}
diff --git a/src/modules/filters/rtfhtml.cpp b/src/modules/filters/rtfhtml.cpp
new file mode 100644
index 0000000..f0b842b
--- /dev/null
+++ b/src/modules/filters/rtfhtml.cpp
@@ -0,0 +1,99 @@
+/***************************************************************************
+ rtfhtml.cpp - description
+ -------------------
+ begin : Wed Oct 13 1999
+ copyright : (C) 1999 by The team of BibleTime
+ email : info@bibletime.de
+ ***************************************************************************/
+
+/***************************************************************************
+ * *
+ * This program is free software; you can redistribute it and/or modify *
+ * it under the terms of the GNU General Public License as published by *
+ * the Free Software Foundation; either version 2 of the License, or *
+ * (at your option) any later version. *
+ * *
+ ***************************************************************************/
+
+#include <stdlib.h>
+#include <string.h>
+#include <rtfhtml.h>
+
+
+RTFHTML::RTFHTML() {
+
+}
+
+
+char RTFHTML::ProcessText(char *text, int maxlen, const SWKey *key, const SWModule *module)
+{
+ char *to, *from;
+ int len;
+ bool center = false;
+
+ len = strlen(text) + 1; // shift string to right of buffer
+ if (len < maxlen) {
+ memmove(&text[maxlen - len], text, len);
+ from = &text[maxlen - len];
+ }
+ else from = text; // -------------------------------
+ for (to = text; *from; from++) {
+ if (*from == '\\') // a RTF command
+ {
+ if ((from[1] == 'p') && (from[2] == 'a') && (from[3] == 'r') && (from[4] == 'd'))
+ { // switch all modifier off
+ if (center)
+ {
+ *to++ = '<';
+ *to++ = '/';
+ *to++ = 'C';
+ *to++ = 'E';
+ *to++ = 'N';
+ *to++ = 'T';
+ *to++ = 'E';
+ *to++ = 'R';
+ *to++ = '>';
+ center = false;
+ }
+ from += 4;
+ continue;
+ }
+ if ((from[1] == 'p') && (from[2] == 'a') && (from[3] == 'r'))
+ {
+ *to++ = '<';
+ *to++ = 'P';
+ *to++ = '>';
+ *to++ = '\n';
+ from += 3;
+ continue;
+ }
+ if (from[1] == ' ')
+ {
+ from += 1;
+ continue;
+ }
+ if ((from[1] == 'q') && (from[2] == 'c')) // center on
+ {
+ if (!center)
+ {
+ *to++ = '<';
+ *to++ = 'C';
+ *to++ = 'E';
+ *to++ = 'N';
+ *to++ = 'T';
+ *to++ = 'E';
+ *to++ = 'R';
+ *to++ = '>';
+ center = true;
+ }
+ from += 2;
+ continue;
+ }
+ }
+
+ *to++ = *from;
+ }
+ *to++ = 0;
+ *to = 0;
+ return 0;
+}
diff --git a/src/modules/filters/rwphtml.cpp b/src/modules/filters/rwphtml.cpp
new file mode 100644
index 0000000..6f8ae4f
--- /dev/null
+++ b/src/modules/filters/rwphtml.cpp
@@ -0,0 +1,187 @@
+/***************************************************************************
+ rwphtml.cpp - description
+ -------------------
+ begin : Thu Jun 24 1999
+ copyright : (C) 1999 by Torsten Uhlmann
+ email : TUhlmann@gmx.de
+ ***************************************************************************/
+
+/***************************************************************************
+ * *
+ * This program is free software; you can redistribute it and/or modify *
+ * it under the terms of the GNU General Public License as published by *
+ * the Free Software Foundation; either version 2 of the License, or *
+ * (at your option) any later version. *
+ * *
+ ***************************************************************************/
+
+#include <stdlib.h>
+#include <string.h>
+#include <ctype.h>
+#include <rwphtml.h>
+
+RWPHTML::RWPHTML()
+{
+}
+
+
+char RWPHTML::ProcessText(char *text, int maxlen, const SWKey *key, const SWModule *module)
+{
+ char *to, *from;
+ signed char greek_str[500];
+ bool inverse = false;
+ bool first_letter = false;
+ int len;
+
+ len = strlen(text) + 1; // shift string to right of buffer
+ if (len < maxlen) {
+ memmove(&text[maxlen - len], text, len);
+ from = &text[maxlen - len];
+ } else
+ from = text;
+ for (to = text; *from; from++) {
+ if (*from == '\\') {
+ ++from;
+ int i=0;
+ first_letter = true;
+ greek_str[0] = '\0';
+ while (*from != '\\') { /* get the greek word or phrase */
+ greek_str[i++] = *from;
+ greek_str[i + 1] = '\0';
+ from++;
+ } /* convert to symbol font as best we can */
+ strcpy(to,"<I> </I><FONT FACE=\"symbol\">");
+ to += strlen(to);
+ for (int j = 0; j < i; j++) {
+ if ((first_letter)
+ && (greek_str[j] == 'h')) {
+ if (greek_str[j + 1] == 'o') {
+ *to++ = 'o';
+ first_letter = false;
+ ++j;
+ continue;
+ } else if (greek_str[j + 1] == 'a') {
+ *to++ = 'a';
+ first_letter = false;
+ ++j;
+ continue;
+ } else if (greek_str[j + 1] == 'w') {
+ *to++ = 'w';
+ first_letter = false;
+ ++j;
+ continue;
+ } else if (greek_str[j + 1] == 'u') {
+ *to++ = 'u';
+ first_letter = false;
+ ++j;
+ continue;
+ } else if (greek_str[j + 1] ==
+ -109) {
+ *to++ = 'w';
+ first_letter = false;
+ ++j;
+ continue;
+ } else if (greek_str[j + 1] ==
+ -120) {
+ *to++ = 'h';
+ first_letter = false;
+ ++j;
+ continue;
+ } else if (greek_str[j + 1] == 'i') {
+ *to++ = 'i';
+ first_letter = false;
+ ++j;
+ continue;
+ }else if (greek_str[j + 1] == 'e') {
+ *to++ = 'e';
+ first_letter = false;
+ ++j;
+ continue;
+ }
+ first_letter = false;
+ }
+ if ((greek_str[j] == 't')
+ && (greek_str[j + 1] == 'h')) {
+ *to++ = 'q';
+ ++j;
+ continue;
+ }
+ if ((greek_str[j] == 'c')
+ && (greek_str[j + 1] == 'h')) {
+ *to++ = 'c';
+ ++j;
+ continue;
+ }
+ if ((greek_str[j] == 'p')
+ && (greek_str[j + 1] == 'h')) {
+ ++j;
+ *to++ = 'f';
+ continue;
+ }
+ if (greek_str[j] == -120) {
+ *to++ = 'h';
+ continue;
+ }
+ if (greek_str[j] == -125) {
+ *to++ = 'a';
+ continue;
+ }
+ if (greek_str[j] == -109) {
+ if(greek_str[j+1] == 'i') ++j;
+ *to++ = 'w';
+ continue;
+ }
+ if (greek_str[j] == ' ')
+ first_letter = true;
+ if (greek_str[j] == 's') {
+ if(isalpha(greek_str[j + 1])) *to++ = 's';
+ else if(!isprint(greek_str[j] )) *to++ = 's';
+ else *to++ = 'V';
+ continue;
+ }
+ if (greek_str[j] == '\'') {
+ continue;
+ }
+ *to++ = greek_str[j];
+ }
+ strcpy(to,"</FONT><I> </I>");
+ to += strlen(to);
+ continue;
+ }
+ if (*from == '#') { // verse markings (e.g. "#Mark 1:1|")
+ inverse = true;
+ strcpy(to,"<FONT COLOR=#0000FF>");
+ to += strlen(to);
+ continue;
+ }
+ if ((*from == '|') && (inverse)) {
+ inverse = false;
+ strcpy(to,"</FONT>");
+ to += strlen(to);
+ continue;
+ }
+ if (*from == '{') {
+ strcpy(to,"<BR><STRONG>");
+ to += strlen(to);
+ if ((from - &text[maxlen - len]) > 10) { // not the beginning of the entry
+ strcpy(to,"<P>");
+ to += strlen(to);
+ }
+ continue;
+ }
+ if (*from == '}') {
+ strcpy(to," </STRONG>");
+ to += strlen(to);
+ continue;
+ }
+ if ((*from == '\n') && (from[1] == '\n')) {
+ strcpy(to,"<P>");
+ to += strlen(to);
+ continue;
+ }
+ *to++ = *from;
+ }
+ *to++ = 0;
+ *to = 0;
+ return 0;
+}
diff --git a/src/modules/filters/rwprtf.cpp b/src/modules/filters/rwprtf.cpp
new file mode 100644
index 0000000..8f7b074
--- /dev/null
+++ b/src/modules/filters/rwprtf.cpp
@@ -0,0 +1,107 @@
+/******************************************************************************
+ *
+ * rwprtf - SWFilter decendant to convert all GBF tags to RTF tags
+ */
+
+
+#include <stdlib.h>
+#include <string.h>
+#include <rwprtf.h>
+
+
+RWPRTF::RWPRTF() {
+
+}
+
+
+char RWPRTF::ProcessText(char *text, int maxlen, const SWKey *key, const SWModule *module)
+{
+ char *to, *from;
+ bool ingreek = false;
+ bool inverse = false;
+ int len;
+
+ len = strlen(text) + 1; // shift string to right of buffer
+ if (len < maxlen) {
+ memmove(&text[maxlen - len], text, len);
+ from = &text[maxlen - len];
+ }
+ else from = text; // -------------------------------
+ for (to = text; *from; from++) {
+ if (*from == '\\') {
+ if(!ingreek) {
+ ingreek = true;
+ *to++ = '[';
+ *to++ = '{';
+ *to++ = '\\';
+ *to++ = 'f';
+ *to++ = '8';
+ *to++ = ' ';
+ continue;
+ }
+ else {
+ ingreek = false;
+ *to++ = '}';
+ *to++ = ']';
+ continue;
+ }
+ }
+
+ if ((ingreek) && ((*from == 'h') || (*from == 'H')))
+ continue; // 'h's are mostly useless in RWP translitterations. The greek is more correct without them.
+
+ if (*from == '#') { // verse markings (e.g. "#Mark 1:1|")
+ inverse = true;
+ *to++ = '{';
+ *to++ = '\\';
+ *to++ = 'c';
+ *to++ = 'f';
+ *to++ = '2';
+ *to++ = ' ';
+ *to++ = '#';
+ continue;
+ }
+ if ((*from == '|') && (inverse)) {
+ inverse = false;
+ *to++ = '|';
+ *to++ = '}';
+ continue;
+ }
+
+ if (*from == '{') {
+ *to++ = '{';
+ *to++ = '\\';
+ *to++ = 'b';
+ *to++ = ' ';
+ if ((from - &text[maxlen - len]) > 10) { // not the beginning of the entry
+ *to++ = '\\';
+ *to++ = 'p';
+ *to++ = 'a';
+ *to++ = 'r';
+ *to++ = ' ';
+ }
+ continue;
+ }
+
+ if (*from == '}') {
+ // this is kinda neat... DO NOTHING
+ }
+ if ((*from == '\n') && (from[1] == '\n')) {
+ *to++ = '\\';
+ *to++ = 'p';
+ *to++ = 'a';
+ *to++ = 'r';
+ *to++ = '\\';
+ *to++ = 'p';
+ *to++ = 'a';
+ *to++ = 'r';
+ *to++ = ' ';
+ continue;
+ }
+
+ *to++ = *from;
+ }
+ *to++ = 0;
+ *to = 0;
+ return 0;
+}
diff --git a/src/modules/filters/scsuutf8.cpp b/src/modules/filters/scsuutf8.cpp
new file mode 100644
index 0000000..d0d5ceb
--- /dev/null
+++ b/src/modules/filters/scsuutf8.cpp
@@ -0,0 +1,220 @@
+/******************************************************************************
+ *
+ * SCSUUTF8 - SWFilter decendant to convert a SCSU character to UTF-8
+ *
+ */
+
+
+/* This class is based on:
+ * http://czyborra.com/scsu/scsu.c written by Roman Czyborra@dds.nl
+ * on Andrea's balcony in North Amsterdam on 1998-08-04
+ * Thanks to Richard Verhoeven <rcb5@win.tue.nl> for his suggestion
+ * to correct the haphazard "if" after UQU to "else if" on 1998-10-01
+ *
+ * This is a deflator to UTF-8 output for input compressed in SCSU,
+ * the (Reuters) Standard Compression Scheme for Unicode as described
+ * in http://www.unicode.org/unicode/reports/tr6.html
+ */
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <swmodule.h>
+
+#include <scsuutf8.h>
+
+SCSUUTF8::SCSUUTF8() {
+}
+
+
+unsigned char* SCSUUTF8::UTF8Output(unsigned long uchar, unsigned char* text)
+{
+ /* join UTF-16 surrogates without any pairing sanity checks */
+
+ static int d;
+
+ if (uchar >= 0xd800 && uchar <= 0xdbff) { d = uchar & 0x3f; return text; }
+ if (uchar >= 0xdc00 && uchar <= 0xdfff) { uchar = uchar + 0x2400 + d * 0x400; }
+
+ /* output one character as UTF-8 multibyte sequence */
+
+ if (uchar < 0x80) {
+ *text++ = c;
+ }
+ else if (uchar < 0x800) {
+ *text++ = 0xc0 | uchar >> 6;
+ *text++ = 0x80 | uchar & 0x3f;
+ }
+ else if (uchar < 0x10000) {
+ *text++ = 0xe0 | uchar >> 12;
+ *text++ = 0x80 | uchar >> 6 & 0x3f;
+ *text++ = 0x80 | uchar & 0x3f;
+ }
+ else if (uchar < 0x200000) {
+ *text++ = 0xf0 | uchar >> 18;
+ *text++ = 0x80 | uchar >> 12 & 0x3f;
+ *text++ = 0x80 | uchar >> 6 & 0x3f;
+ *text++ = 0x80 | uchar & 0x3f;
+ }
+
+ return text;
+}
+
+char SCSUUTF8::ProcessText(char *text, int len, const SWKey *key, const SWModule *module)
+{
+ unsigned char *to, *from;
+ unsigned long buflen = len * FILTERPAD;
+ char active = 0, mode = 0;
+
+ static unsigned short start[8] = {0x0000,0x0080,0x0100,0x0300,0x2000,0x2080,0x2100,0x3000};
+ static unsigned short slide[8] = {0x0080,0x00C0,0x0400,0x0600,0x0900,0x3040,0x30A0,0xFF00};
+ static unsigned short win[256] = {
+ 0x0000, 0x0080, 0x0100, 0x0180, 0x0200, 0x0280, 0x0300, 0x0380,
+ 0x0400, 0x0480, 0x0500, 0x0580, 0x0600, 0x0680, 0x0700, 0x0780,
+ 0x0800, 0x0880, 0x0900, 0x0980, 0x0A00, 0x0A80, 0x0B00, 0x0B80,
+ 0x0C00, 0x0C80, 0x0D00, 0x0D80, 0x0E00, 0x0E80, 0x0F00, 0x0F80,
+ 0x1000, 0x1080, 0x1100, 0x1180, 0x1200, 0x1280, 0x1300, 0x1380,
+ 0x1400, 0x1480, 0x1500, 0x1580, 0x1600, 0x1680, 0x1700, 0x1780,
+ 0x1800, 0x1880, 0x1900, 0x1980, 0x1A00, 0x1A80, 0x1B00, 0x1B80,
+ 0x1C00, 0x1C80, 0x1D00, 0x1D80, 0x1E00, 0x1E80, 0x1F00, 0x1F80,
+ 0x2000, 0x2080, 0x2100, 0x2180, 0x2200, 0x2280, 0x2300, 0x2380,
+ 0x2400, 0x2480, 0x2500, 0x2580, 0x2600, 0x2680, 0x2700, 0x2780,
+ 0x2800, 0x2880, 0x2900, 0x2980, 0x2A00, 0x2A80, 0x2B00, 0x2B80,
+ 0x2C00, 0x2C80, 0x2D00, 0x2D80, 0x2E00, 0x2E80, 0x2F00, 0x2F80,
+ 0x3000, 0x3080, 0x3100, 0x3180, 0x3200, 0x3280, 0x3300, 0x3800,
+ 0xE000, 0xE080, 0xE100, 0xE180, 0xE200, 0xE280, 0xE300, 0xE380,
+ 0xE400, 0xE480, 0xE500, 0xE580, 0xE600, 0xE680, 0xE700, 0xE780,
+ 0xE800, 0xE880, 0xE900, 0xE980, 0xEA00, 0xEA80, 0xEB00, 0xEB80,
+ 0xEC00, 0xEC80, 0xED00, 0xED80, 0xEE00, 0xEE80, 0xEF00, 0xEF80,
+ 0xF000, 0xF080, 0xF100, 0xF180, 0xF200, 0xF280, 0xF300, 0xF380,
+ 0xF400, 0xF480, 0xF500, 0xF580, 0xF600, 0xF680, 0xF700, 0xF780,
+ 0xF800, 0xF880, 0xF900, 0xF980, 0xFA00, 0xFA80, 0xFB00, 0xFB80,
+ 0xFC00, 0xFC80, 0xFD00, 0xFD80, 0xFE00, 0xFE80, 0xFF00, 0xFF80,
+ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+ 0x0000, 0x00C0, 0x0250, 0x0370, 0x0530, 0x3040, 0x30A0, 0xFF60
+ };
+
+ if (!len)
+ return 0;
+
+ memmove(&text[buflen - len], text, len);
+ from = (unsigned char*)&text[buflen - len];
+ to = (unsigned char *)text;
+
+ // -------------------------------
+
+ for (int i = 0; i < len;) {
+
+
+ if (i >= len) break;
+ c = from[i++];
+
+ if (c >= 0x80)
+ {
+ to = UTF8Output (c - 0x80 + slide[active], to);
+ }
+ else if (c >= 0x20 && c <= 0x7F)
+ {
+ to = UTF8Output (c, to);
+ }
+ else if (c == 0x0 || c == 0x9 || c == 0xA || c == 0xC || c == 0xD)
+ {
+ to = UTF8Output (c, to);
+ }
+ else if (c >= 0x1 && c <= 0x8) /* SQn */
+ {
+ if (i >= len) break;
+ /* single quote */ d = from[i++];
+
+ to = UTF8Output (d < 0x80 ? d + start [c - 0x1] :
+ d - 0x80 + slide [c - 0x1], to);
+ }
+ else if (c >= 0x10 && c <= 0x17) /* SCn */
+ {
+ /* change window */ active = c - 0x10;
+ }
+ else if (c >= 0x18 && c <= 0x1F) /* SDn */
+ {
+ /* define window */ active = c - 0x18;
+ if (i >= len) break;
+ slide [active] = win [from[i++]];
+ }
+ else if (c == 0xB) /* SDX */
+ {
+ if (i >= len) break;
+ c = from[i++];
+
+ if (i >= len) break;
+ d = from[i++];
+
+ slide [active = c>>5] = 0x10000 + (((c & 0x1F) << 8 | d) << 7);
+ }
+ else if (c == 0xE) /* SQU */
+ {
+ if (i >= len) break;
+ /* SQU */ c = from[i++];
+
+ if (i >= len) break;
+ to = UTF8Output (c << 8 | from[i++], to);
+ }
+ else if (c == 0xF) /* SCU */
+ {
+ /* change to Unicode mode */ mode = 1;
+
+ while (mode)
+ {
+ if (i >= len) break;
+ c = from[i++];
+
+ if (c <= 0xDF || c >= 0xF3)
+ {
+ if (i >= len) break;
+ to = UTF8Output (c << 8 | from[i++], to);
+ }
+ else if (c == 0xF0) /* UQU */
+ {
+ if (i >= len) break;
+ c = from[i++];
+
+ if (i >= len) break;
+ to = UTF8Output (c << 8 | from[i++], to);
+ }
+ else if (c >= 0xE0 && c <= 0xE7) /* UCn */
+ {
+ active = c - 0xE0; mode = 0;
+ }
+ else if (c >= 0xE8 && c <= 0xEF) /* UDn */
+ {
+ if (i >= len) break;
+ slide [active=c-0xE8] = win [from[i++]]; mode = 0;
+ }
+ else if (c == 0xF1) /* UDX */
+ {
+ if (i >= len) break;
+ c = from[i++];
+
+ if (i >= len) break;
+ d = from[i++];
+
+ slide [active = c>>5] =
+ 0x10000 + (((c & 0x1F) << 8 | d) << 7); mode = 0;
+ }
+ }
+ }
+
+
+ }
+
+ *to++ = 0;
+ *to = 0;
+ return 0;
+}
+
diff --git a/src/modules/filters/swbasicfilter.cpp b/src/modules/filters/swbasicfilter.cpp
new file mode 100644
index 0000000..2865085
--- /dev/null
+++ b/src/modules/filters/swbasicfilter.cpp
@@ -0,0 +1,299 @@
+/******************************************************************************
+ * swbasicfilter.h - definition of class SWBasicFilter. An SWFilter
+ * impl that provides some basic methods that
+ * many filters will need and can use as a starting
+ * point.
+ *
+ * $Id: swbasicfilter.cpp,v 1.17 2002/03/11 19:01:28 scribe Exp $
+ *
+ * Copyright 2001 CrossWire Bible Society (http://www.crosswire.org)
+ * CrossWire Bible Society
+ * P. O. Box 2528
+ * Tempe, AZ 85280-2528
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation version 2.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ */
+
+#include <stdlib.h>
+#include <string.h>
+#include <swbasicfilter.h>
+#include <stdio.h>
+#include <stdarg.h>
+
+SWBasicFilter::SWBasicFilter() {
+ tokenStart = 0;
+ tokenEnd = 0;
+ escStart = 0;
+ escEnd = 0;
+
+ setTokenStart("<");
+ setTokenEnd(">");
+ setEscapeStart("&");
+ setEscapeEnd(";");
+
+ escStringCaseSensitive = false;
+ tokenCaseSensitive = false;
+ passThruUnknownToken = false;
+ passThruUnknownEsc = false;
+}
+
+
+void SWBasicFilter::setPassThruUnknownToken(bool val) {
+ passThruUnknownToken = val;
+}
+
+
+void SWBasicFilter::setPassThruUnknownEscapeString(bool val) {
+ passThruUnknownEsc = val;
+}
+
+
+void SWBasicFilter::setTokenCaseSensitive(bool val) {
+ tokenCaseSensitive = val;
+}
+
+
+void SWBasicFilter::setEscapeStringCaseSensitive(bool val) {
+ escStringCaseSensitive = val;
+}
+
+
+SWBasicFilter::~SWBasicFilter() {
+ if (tokenStart)
+ delete [] tokenStart;
+
+ if (tokenEnd)
+ delete [] tokenEnd;
+
+ if (escStart)
+ delete [] escStart;
+
+ if (escEnd)
+ delete [] escEnd;
+}
+
+
+void SWBasicFilter::addTokenSubstitute(const char *findString, const char *replaceString) {
+ char *buf = 0;
+
+ if (!tokenCaseSensitive) {
+ stdstr(&buf, findString);
+ toupperstr(buf);
+ tokenSubMap.insert(DualStringMap::value_type(buf, replaceString));
+ delete [] buf;
+ }
+ else tokenSubMap.insert(DualStringMap::value_type(findString, replaceString));
+}
+
+
+void SWBasicFilter::addEscapeStringSubstitute(const char *findString, const char *replaceString) {
+ char *buf = 0;
+
+ if (!escStringCaseSensitive) {
+ stdstr(&buf, findString);
+ toupperstr(buf);
+ escSubMap.insert(DualStringMap::value_type(buf, replaceString));
+ delete [] buf;
+ }
+ else escSubMap.insert(DualStringMap::value_type(findString, replaceString));
+}
+
+
+void SWBasicFilter::pushString(char **buf, const char *format, ...) {
+ va_list argptr;
+
+ va_start(argptr, format);
+ *buf += vsprintf(*buf, format, argptr);
+ va_end(argptr);
+
+// *buf += strlen(*buf);
+}
+
+
+bool SWBasicFilter::substituteToken(char **buf, const char *token) {
+ DualStringMap::iterator it;
+
+ if (!tokenCaseSensitive) {
+ char *tmp = 0;
+ stdstr(&tmp, token);
+ toupperstr(tmp);
+ it = tokenSubMap.find(tmp);
+ delete [] tmp;
+ } else
+ it = tokenSubMap.find(token);
+
+ if (it != tokenSubMap.end()) {
+ pushString(buf, it->second.c_str());
+ return true;
+ }
+ return false;
+}
+
+
+bool SWBasicFilter::substituteEscapeString(char **buf, const char *escString) {
+ DualStringMap::iterator it;
+
+ if (!escStringCaseSensitive) {
+ char *tmp = 0;
+ stdstr(&tmp, escString);
+ toupperstr(tmp);
+ it = escSubMap.find(tmp);
+ delete [] tmp;
+ } else
+ it = escSubMap.find(escString);
+
+ if (it != escSubMap.end()) {
+ pushString(buf, it->second.c_str());
+ return true;
+ }
+ return false;
+}
+
+
+bool SWBasicFilter::handleToken(char **buf, const char *token, DualStringMap &userData) {
+ return substituteToken(buf, token);
+}
+
+
+bool SWBasicFilter::handleEscapeString(char **buf, const char *escString, DualStringMap &userData) {
+ return substituteEscapeString(buf, escString);
+}
+
+
+void SWBasicFilter::setEscapeStart(const char *escStart) {
+ stdstr(&(this->escStart), escStart);
+}
+
+
+void SWBasicFilter::setEscapeEnd(const char *escEnd) {
+ stdstr(&(this->escEnd), escEnd);
+}
+
+
+void SWBasicFilter::setTokenStart(const char *tokenStart) {
+ stdstr(&(this->tokenStart), tokenStart);
+}
+
+
+void SWBasicFilter::setTokenEnd(const char *tokenEnd) {
+ stdstr(&(this->tokenEnd), tokenEnd);
+}
+
+
+char SWBasicFilter::ProcessText(char *text, int maxlen, const SWKey *key, const SWModule *module) {
+ this->key = key;
+ this->module = module;
+ char *to, *from, token[4096];
+ int tokpos = 0;
+ bool intoken = false;
+ int len;
+ bool inEsc = false;
+ char escStartLen = strlen(escStart);
+ char escEndLen = strlen(escEnd);
+ char escStartPos = 0, escEndPos = 0;
+ char tokenStartLen = strlen(tokenStart);
+ char tokenEndLen = strlen(tokenEnd);
+ char tokenStartPos = 0, tokenEndPos = 0;
+ DualStringMap userData;
+ string lastTextNode;
+
+ bool suspendTextPassThru = false;
+ userData["suspendTextPassThru"] = "false";
+
+ len = strlen(text) + 1; // shift string to right of buffer
+ if (len < maxlen) {
+ memmove(&text[maxlen - len], text, len);
+ from = &text[maxlen - len];
+ }
+ else from = text; // -------------------------------
+
+
+ for (to = text; *from; from++) {
+ if (*from == tokenStart[tokenStartPos]) {
+ if (tokenStartPos == (tokenStartLen - 1)) {
+ intoken = true;
+ tokpos = 0;
+ token[0] = 0;
+ token[1] = 0;
+ token[2] = 0;
+ inEsc = false;
+ }
+ else tokenStartPos++;
+ continue;
+ }
+
+ if (*from == escStart[escStartPos]) {
+ if (escStartPos == (escStartLen - 1)) {
+ intoken = true;
+ tokpos = 0;
+ token[0] = 0;
+ token[1] = 0;
+ token[2] = 0;
+ inEsc = true;
+ }
+ else escStartPos++;
+ continue;
+ }
+
+ if (inEsc) {
+ if (*from == escEnd[escEndPos]) {
+ if (escEndPos == (escEndLen - 1)) {
+ intoken = false;
+ userData["lastTextNode"] = lastTextNode;
+ if ((!handleEscapeString(&to, token, userData)) && (passThruUnknownEsc)) {
+ pushString(&to, escStart);
+ pushString(&to, token);
+ pushString(&to, escEnd);
+ }
+ escEndPos = escStartPos = tokenEndPos = tokenStartPos = 0;
+ lastTextNode = "";
+ suspendTextPassThru = (!userData["suspendTextPassThru"].compare("true"));
+ continue;
+ }
+ }
+ }
+
+ if (!inEsc) {
+ if (*from == tokenEnd[tokenEndPos]) {
+ if (tokenEndPos == (tokenEndLen - 1)) {
+ intoken = false;
+ userData["lastTextNode"] = lastTextNode;
+ if ((!handleToken(&to, token, userData)) && (passThruUnknownToken)) {
+ pushString(&to, tokenStart);
+ pushString(&to, token);
+ pushString(&to, tokenEnd);
+ }
+ escEndPos = escStartPos = tokenEndPos = tokenStartPos = 0;
+ lastTextNode = "";
+ suspendTextPassThru = (!userData["suspendTextPassThru"].compare("true"));
+ continue;
+ }
+ }
+ }
+
+ if (intoken) {
+ if (tokpos < 4090)
+ token[tokpos++] = *from;
+ token[tokpos+2] = 0;
+ }
+ else {
+ if (!suspendTextPassThru)
+ *to++ = *from;
+ lastTextNode += *from;
+ }
+ }
+ *to++ = 0;
+ *to = 0;
+ return 0;
+}
+
+
+
diff --git a/src/modules/filters/thmlfootnotes.cpp b/src/modules/filters/thmlfootnotes.cpp
new file mode 100644
index 0000000..d9b1f0e
--- /dev/null
+++ b/src/modules/filters/thmlfootnotes.cpp
@@ -0,0 +1,103 @@
+/******************************************************************************
+ *
+ * thmlfootnotes - SWFilter decendant to hide or show footnotes
+ * in a ThML module.
+ */
+
+
+#include <stdlib.h>
+#include <string.h>
+#include <thmlfootnotes.h>
+#ifndef __GNUC__
+#else
+#include <unixstr.h>
+#endif
+
+
+const char ThMLFootnotes::on[] = "On";
+const char ThMLFootnotes::off[] = "Off";
+const char ThMLFootnotes::optName[] = "Footnotes";
+const char ThMLFootnotes::optTip[] = "Toggles Footnotes On and Off if they exist";
+
+
+ThMLFootnotes::ThMLFootnotes() {
+ option = false;
+ options.push_back(on);
+ options.push_back(off);
+}
+
+
+ThMLFootnotes::~ThMLFootnotes() {
+}
+
+void ThMLFootnotes::setOptionValue(const char *ival)
+{
+ option = (!stricmp(ival, on));
+}
+
+const char *ThMLFootnotes::getOptionValue()
+{
+ return (option) ? on:off;
+}
+
+char ThMLFootnotes::ProcessText(char *text, int maxlen, const SWKey *key, const SWModule *module)
+{
+ if (!option) { // if we don't want footnotes
+ char *to, *from, token[2048]; // cheese. Fix.
+ int tokpos = 0;
+ bool intoken = false;
+ int len;
+ bool hide = false;
+
+ len = strlen(text) + 1; // shift string to right of buffer
+ if (len < maxlen) {
+ memmove(&text[maxlen - len], text, len);
+ from = &text[maxlen - len];
+ }
+ else from = text; // -------------------------------
+
+ for (to = text; *from; from++) {
+ if (*from == '<') {
+ intoken = true;
+ tokpos = 0;
+ token[0] = 0;
+ token[1] = 0;
+ token[2] = 0;
+ continue;
+ }
+ if (*from == '>') { // process tokens
+ intoken = false;
+ if (!strncmp(token, "note", 4)) {
+ hide = true;
+ continue;
+ }
+ else if (!strncmp(token, "/note", 5)) {
+ hide = false;
+ continue;
+ }
+
+ // if not a footnote token, keep token in text
+ if (!hide) {
+ *to++ = '<';
+ for (char *tok = token; *tok; tok++)
+ *to++ = *tok;
+ *to++ = '>';
+ }
+ continue;
+ }
+ if (intoken) {
+ if (tokpos < 2045)
+ token[tokpos++] = *from;
+ token[tokpos+2] = 0;
+ }
+ else {
+ if (!hide) {
+ *to++ = *from;
+ }
+ }
+ }
+ *to++ = 0;
+ *to = 0;
+ }
+ return 0;
+}
diff --git a/src/modules/filters/thmlgbf.cpp b/src/modules/filters/thmlgbf.cpp
new file mode 100644
index 0000000..66d9a20
--- /dev/null
+++ b/src/modules/filters/thmlgbf.cpp
@@ -0,0 +1,330 @@
+/***************************************************************************
+ thmlgbf.cpp - ThML to GBF filter
+ -------------------
+ begin : 1999-10-28
+ copyright : 2001 by CrossWire Bible Society
+ ***************************************************************************/
+
+/***************************************************************************
+ * *
+ * This program is free software; you can redistribute it and/or modify *
+ * it under the terms of the GNU General Public License as published by *
+ * the Free Software Foundation; either version 2 of the License, or *
+ * (at your option) any later version. *
+ * *
+ ***************************************************************************/
+
+#include <stdlib.h>
+#include <string.h>
+#include <thmlgbf.h>
+
+
+ThMLGBF::ThMLGBF()
+{
+}
+
+
+char ThMLGBF::ProcessText(char *text, int maxlen)
+{
+ char *to, *from, token[2048];
+ int tokpos = 0;
+ bool intoken = false;
+ int len;
+ bool ampersand = false;
+ bool sechead = false;
+ bool title = false;
+
+ len = strlen(text) + 1; // shift string to right of buffer
+ if (len < maxlen) {
+ memmove(&text[maxlen - len], text, len);
+ from = &text[maxlen - len];
+ }
+ else from = text; // -------------------------------
+ for (to = text; *from; from++) {
+ if (*from == '<') {
+ intoken = true;
+ tokpos = 0;
+ token[0] = 0;
+ token[1] = 0;
+ token[2] = 0;
+ ampersand = false;
+ continue;
+ }
+ else if (*from == '&') {
+ intoken = true;
+ tokpos = 0;
+ memset(token, 0, 2048);
+ ampersand = true;
+ continue;
+ }
+ if (*from == ';' && ampersand) {
+ intoken = false;
+
+ if (!strncmp("nbsp", token, 4)) *to++ = ' ';
+ else if (!strncmp("quot", token, 4)) *to++ = '"';
+ else if (!strncmp("amp", token, 3)) *to++ = '&';
+ else if (!strncmp("lt", token, 2)) *to++ = '<';
+ else if (!strncmp("gt", token, 2)) *to++ = '>';
+ else if (!strncmp("brvbar", token, 6)) *to++ = '|';
+ else if (!strncmp("sect", token, 4)) *to++ = '§';
+ else if (!strncmp("copy", token, 4)) *to++ = '©';
+ else if (!strncmp("laquo", token, 5)) *to++ = '«';
+ else if (!strncmp("reg", token, 3)) *to++ = '®';
+ else if (!strncmp("acute", token, 5)) *to++ = '´';
+ else if (!strncmp("para", token, 4)) *to++ = '¶';
+ else if (!strncmp("raquo", token, 5)) *to++ = '»';
+
+ else if (!strncmp("Aacute", token, 6)) *to++ = 'Á';
+ else if (!strncmp("Agrave", token, 6)) *to++ = 'À';
+ else if (!strncmp("Acirc", token, 5)) *to++ = 'Â';
+ else if (!strncmp("Auml", token, 4)) *to++ = 'Ä';
+ else if (!strncmp("Atilde", token, 6)) *to++ = 'Ã';
+ else if (!strncmp("Aring", token, 5)) *to++ = 'Å';
+ else if (!strncmp("aacute", token, 6)) *to++ = 'á';
+ else if (!strncmp("agrave", token, 6)) *to++ = 'à';
+ else if (!strncmp("acirc", token, 5)) *to++ = 'â';
+ else if (!strncmp("auml", token, 4)) *to++ = 'ä';
+ else if (!strncmp("atilde", token, 6)) *to++ = 'ã';
+ else if (!strncmp("aring", token, 5)) *to++ = 'å';
+ else if (!strncmp("Eacute", token, 6)) *to++ = 'É';
+ else if (!strncmp("Egrave", token, 6)) *to++ = 'È';
+ else if (!strncmp("Ecirc", token, 5)) *to++ = 'Ê';
+ else if (!strncmp("Euml", token, 4)) *to++ = 'Ë';
+ else if (!strncmp("eacute", token, 6)) *to++ = 'é';
+ else if (!strncmp("egrave", token, 6)) *to++ = 'è';
+ else if (!strncmp("ecirc", token, 5)) *to++ = 'ê';
+ else if (!strncmp("euml", token, 4)) *to++ = 'ë';
+ else if (!strncmp("Iacute", token, 6)) *to++ = 'Í';
+ else if (!strncmp("Igrave", token, 6)) *to++ = 'Ì';
+ else if (!strncmp("Icirc", token, 5)) *to++ = 'Î';
+ else if (!strncmp("Iuml", token, 4)) *to++ = 'Ï';
+ else if (!strncmp("iacute", token, 6)) *to++ = 'í';
+ else if (!strncmp("igrave", token, 6)) *to++ = 'ì';
+ else if (!strncmp("icirc", token, 5)) *to++ = 'î';
+ else if (!strncmp("iuml", token, 4)) *to++ = 'ï';
+ else if (!strncmp("Oacute", token, 6)) *to++ = 'Ó';
+ else if (!strncmp("Ograve", token, 6)) *to++ = 'Ò';
+ else if (!strncmp("Ocirc", token, 5)) *to++ = 'Ô';
+ else if (!strncmp("Ouml", token, 4)) *to++ = 'Ö';
+ else if (!strncmp("Otilde", token, 6)) *to++ = 'Õ';
+ else if (!strncmp("oacute", token, 6)) *to++ = 'ó';
+ else if (!strncmp("ograve", token, 6)) *to++ = 'ò';
+ else if (!strncmp("ocirc", token, 5)) *to++ = 'ô';
+ else if (!strncmp("ouml", token, 4)) *to++ = 'ö';
+ else if (!strncmp("otilde", token, 6)) *to++ = 'õ';
+ else if (!strncmp("Uacute", token, 6)) *to++ = 'Ú';
+ else if (!strncmp("Ugrave", token, 6)) *to++ = 'Ù';
+ else if (!strncmp("Ucirc", token, 5)) *to++ = 'Û';
+ else if (!strncmp("Uuml", token, 4)) *to++ = 'Ü';
+ else if (!strncmp("uacute", token, 6)) *to++ = 'ú';
+ else if (!strncmp("ugrave", token, 6)) *to++ = 'ù';
+ else if (!strncmp("ucirc", token, 5)) *to++ = 'û';
+ else if (!strncmp("uuml", token, 4)) *to++ = 'ü';
+ else if (!strncmp("Yacute", token, 6)) *to++ = 'Ý';
+ else if (!strncmp("yacute", token, 6)) *to++ = 'ý';
+ else if (!strncmp("yuml", token, 4)) *to++ = 'ÿ';
+
+ else if (!strncmp("deg", token, 3)) *to++ = '°';
+ else if (!strncmp("plusmn", token, 6)) *to++ = '±';
+ else if (!strncmp("sup2", token, 4)) *to++ = '²';
+ else if (!strncmp("sup3", token, 4)) *to++ = '³';
+ else if (!strncmp("sup1", token, 4)) *to++ = '¹';
+ else if (!strncmp("nbsp", token, 4)) *to++ = 'º';
+ else if (!strncmp("pound", token, 5)) *to++ = '£';
+ else if (!strncmp("cent", token, 4)) *to++ = '¢';
+ else if (!strncmp("frac14", token, 6)) *to++ = '¼';
+ else if (!strncmp("frac12", token, 6)) *to++ = '½';
+ else if (!strncmp("frac34", token, 6)) *to++ = '¾';
+ else if (!strncmp("iquest", token, 6)) *to++ = '¿';
+ else if (!strncmp("iexcl", token, 5)) *to++ = '¡';
+ else if (!strncmp("ETH", token, 3)) *to++ = 'Ð';
+ else if (!strncmp("eth", token, 3)) *to++ = 'ð';
+ else if (!strncmp("THORN", token, 5)) *to++ = 'Þ';
+ else if (!strncmp("thorn", token, 5)) *to++ = 'þ';
+ else if (!strncmp("AElig", token, 5)) *to++ = 'Æ';
+ else if (!strncmp("aelig", token, 5)) *to++ = 'æ';
+ else if (!strncmp("Oslash", token, 6)) *to++ = 'Ø';
+ else if (!strncmp("curren", token, 6)) *to++ = '¤';
+ else if (!strncmp("Ccedil", token, 6)) *to++ = 'Ç';
+ else if (!strncmp("ccedil", token, 6)) *to++ = 'ç';
+ else if (!strncmp("szlig", token, 5)) *to++ = 'ß';
+ else if (!strncmp("Ntilde", token, 6)) *to++ = 'Ñ';
+ else if (!strncmp("ntilde", token, 6)) *to++ = 'ñ';
+ else if (!strncmp("yen", token, 3)) *to++ = '¥';
+ else if (!strncmp("not", token, 3)) *to++ = '¬';
+ else if (!strncmp("ordf", token, 4)) *to++ = 'ª';
+ else if (!strncmp("uml", token, 3)) *to++ = '¨';
+ else if (!strncmp("shy", token, 3)) *to++ = '­';
+ else if (!strncmp("macr", token, 4)) *to++ = '¯';
+ continue;
+
+ }
+ else if (*from == '>' && !ampersand) {
+ intoken = false;
+ // process desired tokens
+ if (!strncmp(token, "sync type=\"Strongs\" value=\"", 27)) {
+ *to++ = '<';
+ *to++ = 'W';
+ for (unsigned int i = 27; token[i] != '\"'; i++)
+ *to++ = token[i];
+ *to++ = '>';
+ continue;
+ }
+ if (!strncmp(token, "sync type=\"morph\" value=\"", 25)) {
+ *to++ = '<';
+ *to++ = 'W';
+ *to++ = 'T';
+ for (unsigned int i = 25; token[i] != '\"'; i++)
+ *to++ = token[i];
+ *to++ = '>';
+ continue;
+ }
+ else if (!strncmp(token, "scripRef", 8)) {
+ *to++ = '<';
+ *to++ = 'R';
+ *to++ = 'X';
+ *to++ = '>';
+ continue;
+ }
+ else if (!strncmp(token, "/scripRef", 9)) {
+ *to++ = '<';
+ *to++ = 'R';
+ *to++ = 'x';
+ *to++ = '>';
+ continue;
+ }
+ else if (!strncmp(token, "note", 4)) {
+ *to++ = '<';
+ *to++ = 'R';
+ *to++ = 'F';
+ *to++ = '>';
+ continue;
+ }
+ else if (!strncmp(token, "/note", 5)) {
+ *to++ = '<';
+ *to++ = 'R';
+ *to++ = 'f';
+ *to++ = '>';
+ continue;
+ }
+ else if (!strncmp(token, "sup", 3)) {
+ *to++ = '<';
+ *to++ = 'F';
+ *to++ = 'S';
+ *to++ = '>';
+ }
+ else if (!strncmp(token, "/sup", 4)) {
+ *to++ = '<';
+ *to++ = 'F';
+ *to++ = 's';
+ *to++ = '>';
+ }
+ else if (!strnicmp(token, "font color=#ff0000", 18)) {
+ *to++ = '<';
+ *to++ = 'F';
+ *to++ = 'R';
+ *to++ = '>';
+ continue;
+ }
+ else if (!strnicmp(token, "/font", 5)) {
+ *to++ = '<';
+ *to++ = 'F';
+ *to++ = 'r';
+ *to++ = '>';
+ continue;
+ }
+ else if (!strncmp(token, "div class=\"sechead\"", 19)) {
+ *to++ = '<';
+ *to++ = 'T';
+ *to++ = 'S';
+ *to++ = '>';
+ sechead = true;
+ continue;
+ }
+ else if (sechead && !strncmp(token, "/div", 19)) {
+ *to++ = '<';
+ *to++ = 'T';
+ *to++ = 's';
+ *to++ = '>';
+ sechead = false;
+ continue;
+ }
+ else if (!strncmp(token, "div class=\"title\"", 19)) {
+ *to++ = '<';
+ *to++ = 'T';
+ *to++ = 'T';
+ *to++ = '>';
+ title = true;
+ continue;
+ }
+ else if (title && !strncmp(token, "/div", 19)) {
+ *to++ = '<';
+ *to++ = 'T';
+ *to++ = 't';
+ *to++ = '>';
+ title = false;
+ continue;
+ }
+ else if (!strnicmp(token, "br", 2)) {
+ *to++ = '<';
+ *to++ = 'C';
+ *to++ = 'L';
+ *to++ = '>';
+ continue;
+ }
+ else switch(*token) {
+ case 'I': // font tags
+ case 'i':
+ *to++ = '<';
+ *to++ = 'F';
+ *to++ = 'I';
+ *to++ = '>';
+ continue;
+ case 'B': // bold start
+ case 'b':
+ *to++ = '<';
+ *to++ = 'F';
+ *to++ = 'B';
+ *to++ = '>';
+ continue;
+ case '/':
+ switch(token[1]) {
+ case 'P':
+ case 'p':
+ *to++ = '<';
+ *to++ = 'C';
+ *to++ = 'M';
+ *to++ = '>';
+ continue;
+ case 'I':
+ case 'i': // italic end
+ *to++ = '<';
+ *to++ = 'F';
+ *to++ = 'i';
+ *to++ = '>';
+ continue;
+ case 'B': // bold start
+ case 'b':
+ *to++ = '<';
+ *to++ = 'F';
+ *to++ = 'b';
+ *to++ = '>';
+ continue;
+ }
+ }
+ continue;
+ }
+ if (intoken) {
+ if (tokpos < 2045)
+ token[tokpos++] = *from;
+ token[tokpos+2] = 0;
+ }
+ else *to++ = *from;
+ }
+ *to++ = 0;
+ *to = 0;
+ return 0;
+}
+
+
+
diff --git a/src/modules/filters/thmlheadings.cpp b/src/modules/filters/thmlheadings.cpp
new file mode 100644
index 0000000..00b8a23
--- /dev/null
+++ b/src/modules/filters/thmlheadings.cpp
@@ -0,0 +1,107 @@
+/******************************************************************************
+ *
+ * thmlheadings - SWFilter decendant to hide or show headings
+ * in a ThML module.
+ */
+
+
+#include <stdlib.h>
+#include <string.h>
+#include <thmlheadings.h>
+#ifndef __GNUC__
+#else
+#include <unixstr.h>
+#endif
+
+
+const char ThMLHeadings::on[] = "On";
+const char ThMLHeadings::off[] = "Off";
+const char ThMLHeadings::optName[] = "Headings";
+const char ThMLHeadings::optTip[] = "Toggles Headings On and Off if they exist";
+
+
+ThMLHeadings::ThMLHeadings() {
+ option = false;
+ options.push_back(on);
+ options.push_back(off);
+}
+
+
+ThMLHeadings::~ThMLHeadings() {
+}
+
+void ThMLHeadings::setOptionValue(const char *ival)
+{
+ option = (!stricmp(ival, on));
+}
+
+const char *ThMLHeadings::getOptionValue()
+{
+ return (option) ? on:off;
+}
+
+char ThMLHeadings::ProcessText(char *text, int maxlen, const SWKey *key, const SWModule *module)
+{
+ if (!option) { // if we don't want headings
+ char *to, *from, token[2048]; // cheese. Fix.
+ int tokpos = 0;
+ bool intoken = false;
+ int len;
+ bool hide = false;
+
+ len = strlen(text) + 1; // shift string to right of buffer
+ if (len < maxlen) {
+ memmove(&text[maxlen - len], text, len);
+ from = &text[maxlen - len];
+ }
+ else from = text; // -------------------------------
+
+ for (to = text; *from; from++) {
+ if (*from == '<') {
+ intoken = true;
+ tokpos = 0;
+ token[0] = 0;
+ token[1] = 0;
+ token[2] = 0;
+ continue;
+ }
+ if (*from == '>') { // process tokens
+ intoken = false;
+ if (!strnicmp(token, "div class=\"sechead\"", 19)) {
+ hide = true;
+ continue;
+ }
+ if (!strnicmp(token, "div class=\"title\"", 17)) {
+ hide = true;
+ continue;
+ }
+ else if (hide && !strnicmp(token, "/div", 4)) {
+ hide = false;
+ continue;
+ }
+
+ // if not a heading token, keep token in text
+ if (!hide) {
+ *to++ = '<';
+ for (char *tok = token; *tok; tok++)
+ *to++ = *tok;
+ *to++ = '>';
+ }
+ continue;
+ }
+ if (intoken) {
+ if (tokpos < 2045)
+ token[tokpos++] = *from;
+ token[tokpos+2] = 0;
+ }
+ else {
+ if (!hide) {
+ *to++ = *from;
+ }
+ }
+ }
+ *to++ = 0;
+ *to = 0;
+ }
+ return 0;
+}
diff --git a/src/modules/filters/thmlhtml.cpp b/src/modules/filters/thmlhtml.cpp
new file mode 100644
index 0000000..9cb8679
--- /dev/null
+++ b/src/modules/filters/thmlhtml.cpp
@@ -0,0 +1,211 @@
+/***************************************************************************
+ thmlhtml.cpp - ThML to HTML filter
+ -------------------
+ begin : 1999-10-27
+ copyright : 2001 by CrossWire Bible Society
+ ***************************************************************************/
+
+/***************************************************************************
+ * *
+ * This program is free software; you can redistribute it and/or modify *
+ * it under the terms of the GNU General Public License as published by *
+ * the Free Software Foundation; either version 2 of the License, or *
+ * (at your option) any later version. *
+ * *
+ ***************************************************************************/
+
+#include <stdlib.h>
+#include <string.h>
+#include <thmlhtml.h>
+#include <swmodule.h>
+
+
+ThMLHTML::ThMLHTML() {
+ setTokenStart("<");
+ setTokenEnd(">");
+/*
+ setEscapeStart("&");
+ setEscapeEnd(";");
+
+ setEscapeStringCaseSensitive(true);
+
+ addEscapeStringSubstitute("nbsp", " ");
+ addEscapeStringSubstitute("quot", "\"");
+ addEscapeStringSubstitute("amp", "&");
+ addEscapeStringSubstitute("lt", "<");
+ addEscapeStringSubstitute("gt", ">");
+ addEscapeStringSubstitute("brvbar", "|");
+ addEscapeStringSubstitute("sect", "§");
+ addEscapeStringSubstitute("copy", "©");
+ addEscapeStringSubstitute("laquo", "«");
+ addEscapeStringSubstitute("reg", "®");
+ addEscapeStringSubstitute("acute", "´");
+ addEscapeStringSubstitute("para", "¶");
+ addEscapeStringSubstitute("raquo", "»");
+
+ addEscapeStringSubstitute("Aacute", "Á");
+ addEscapeStringSubstitute("Agrave", "À");
+ addEscapeStringSubstitute("Acirc", "Â");
+ addEscapeStringSubstitute("Auml", "Ä");
+ addEscapeStringSubstitute("Atilde", "Ã");
+ addEscapeStringSubstitute("Aring", "Å");
+ addEscapeStringSubstitute("aacute", "á");
+ addEscapeStringSubstitute("agrave", "à");
+ addEscapeStringSubstitute("acirc", "â");
+ addEscapeStringSubstitute("auml", "ä");
+ addEscapeStringSubstitute("atilde", "ã");
+ addEscapeStringSubstitute("aring", "å");
+ addEscapeStringSubstitute("Eacute", "É");
+ addEscapeStringSubstitute("Egrave", "È");
+ addEscapeStringSubstitute("Ecirc", "Ê");
+ addEscapeStringSubstitute("Euml", "Ë");
+ addEscapeStringSubstitute("eacute", "é");
+ addEscapeStringSubstitute("egrave", "è");
+ addEscapeStringSubstitute("ecirc", "ê");
+ addEscapeStringSubstitute("euml", "ë");
+ addEscapeStringSubstitute("Iacute", "Í");
+ addEscapeStringSubstitute("Igrave", "Ì");
+ addEscapeStringSubstitute("Icirc", "Î");
+ addEscapeStringSubstitute("Iuml", "Ï");
+ addEscapeStringSubstitute("iacute", "í");
+ addEscapeStringSubstitute("igrave", "ì");
+ addEscapeStringSubstitute("icirc", "î");
+ addEscapeStringSubstitute("iuml", "ï");
+ addEscapeStringSubstitute("Oacute", "Ó");
+ addEscapeStringSubstitute("Ograve", "Ò");
+ addEscapeStringSubstitute("Ocirc", "Ô");
+ addEscapeStringSubstitute("Ouml", "Ö");
+ addEscapeStringSubstitute("Otilde", "Õ");
+ addEscapeStringSubstitute("oacute", "ó");
+ addEscapeStringSubstitute("ograve", "ò");
+ addEscapeStringSubstitute("ocirc", "ô");
+ addEscapeStringSubstitute("ouml", "ö");
+ addEscapeStringSubstitute("otilde", "õ");
+ addEscapeStringSubstitute("Uacute", "Ú");
+ addEscapeStringSubstitute("Ugrave", "Ù");
+ addEscapeStringSubstitute("Ucirc", "Û");
+ addEscapeStringSubstitute("Uuml", "Ü");
+ addEscapeStringSubstitute("uacute", "ú");
+ addEscapeStringSubstitute("ugrave", "ù");
+ addEscapeStringSubstitute("ucirc", "û");
+ addEscapeStringSubstitute("uuml", "ü");
+ addEscapeStringSubstitute("Yacute", "Ý");
+ addEscapeStringSubstitute("yacute", "ý");
+ addEscapeStringSubstitute("yuml", "ÿ");
+
+ addEscapeStringSubstitute("deg", "°");
+ addEscapeStringSubstitute("plusmn", "±");
+ addEscapeStringSubstitute("sup2", "²");
+ addEscapeStringSubstitute("sup3", "³");
+ addEscapeStringSubstitute("sup1", "¹");
+ addEscapeStringSubstitute("nbsp", "º");
+ addEscapeStringSubstitute("pound", "£");
+ addEscapeStringSubstitute("cent", "¢");
+ addEscapeStringSubstitute("frac14", "¼");
+ addEscapeStringSubstitute("frac12", "½");
+ addEscapeStringSubstitute("frac34", "¾");
+ addEscapeStringSubstitute("iquest", "¿");
+ addEscapeStringSubstitute("iexcl", "¡");
+ addEscapeStringSubstitute("ETH", "Ð");
+ addEscapeStringSubstitute("eth", "ð");
+ addEscapeStringSubstitute("THORN", "Þ");
+ addEscapeStringSubstitute("thorn", "þ");
+ addEscapeStringSubstitute("AElig", "Æ");
+ addEscapeStringSubstitute("aelig", "æ");
+ addEscapeStringSubstitute("Oslash", "Ø");
+ addEscapeStringSubstitute("curren", "¤");
+ addEscapeStringSubstitute("Ccedil", "Ç");
+ addEscapeStringSubstitute("ccedil", "ç");
+ addEscapeStringSubstitute("szlig", "ß");
+ addEscapeStringSubstitute("Ntilde", "Ñ");
+ addEscapeStringSubstitute("ntilde", "ñ");
+ addEscapeStringSubstitute("yen", "¥");
+ addEscapeStringSubstitute("not", "¬");
+ addEscapeStringSubstitute("ordf", "ª");
+ addEscapeStringSubstitute("uml", "¨");
+ addEscapeStringSubstitute("shy", "­");
+ addEscapeStringSubstitute("macr", "¯");
+*/
+ setTokenCaseSensitive(true);
+
+ addTokenSubstitute("/scripRef", " </a>");
+ addTokenSubstitute("note", " <font color=\"#800000\"><small>(");
+ addTokenSubstitute("/note", ")</small></font> ");
+}
+
+
+bool ThMLHTML::handleToken(char **buf, const char *token, DualStringMap &userData) {
+ if (!substituteToken(buf, token)) {
+ // manually process if it wasn't a simple substitution
+ if (!strncmp(token, "sync type=\"Strongs\" value=\"", 27)) {
+ if (token[27] == 'H' || token[27] == 'G' || token[27] == 'A') {
+ pushString(buf, "<small><em>");
+ for (const char *tok = token + 5; *tok; tok++)
+ if(*tok != '\"')
+ *(*buf)++ = *tok;
+ pushString(buf, "</em></small>");
+ }
+ else if (token[27] == 'T') {
+ pushString(buf, "<small><i>");
+ for (unsigned int i = 29; token[i] != '\"'; i++)
+ *(*buf)++ = token[i];
+ pushString(buf, "</i></small>");
+ }
+ }
+ else if (!strncmp(token, "sync type=\"morph\" value=\"", 25)) {
+ pushString(buf, "<small><em>");
+ for (unsigned int i = 25; token[i] != '\"'; i++)
+ *(*buf)++ = token[i];
+ pushString(buf, "</em></small>");
+ }
+ else if (!strncmp(token, "sync type=\"lemma\" value=\"", 25)) {
+ pushString(buf, "<small><em>(");
+ for (unsigned int i = 25; token[i] != '\"'; i++)
+ *(*buf)++ = token[i];
+ pushString(buf, ")</em></small>");
+ }
+ else if (!strncmp(token, "scripRef", 8)) {
+ pushString(buf, "<a href=\"");
+ for (const char *tok = token + 9; *tok; tok++)
+ if(*tok != '\"')
+ *(*buf)++ = *tok;
+ *(*buf)++ = '\"';
+ *(*buf)++ = '>';
+ }
+ else if (!strncmp(token, "img ", 4)) {
+ const char *src = strstr(token, "src");
+ if (!src) // assert we have a src attribute
+ return false;
+
+ *(*buf)++ = '<';
+ for (const char *c = token; *c; c++) {
+ if (c == src) {
+ for (;((*c) && (*c != '"')); c++)
+ *(*buf)++ = *c;
+
+ if (!*c) { c--; continue; }
+
+ *(*buf)++ = '"';
+ if (*(c+1) == '/') {
+ pushString(buf, "file:");
+ pushString(buf, module->getConfigEntry("AbsoluteDataPath"));
+ if (*((*buf)-1) == '/')
+ c++; // skip '/'
+ }
+ continue;
+ }
+ *(*buf)++ = *c;
+ }
+ *(*buf)++ = '>';
+ }
+ else if(!strncmp(token, "note", 4)) {
+ pushString(buf, " <font color=\"#800000\"><small>(");
+ }
+
+ else {
+ return false; // we still didn't handle token
+ }
+ }
+ return true;
+}
+
diff --git a/src/modules/filters/thmlhtmlhref.cpp b/src/modules/filters/thmlhtmlhref.cpp
new file mode 100644
index 0000000..ce7e3fd
--- /dev/null
+++ b/src/modules/filters/thmlhtmlhref.cpp
@@ -0,0 +1,269 @@
+/***************************************************************************
+ thmlhtmlhref.cpp - ThML to HTML filter with hrefs
+ -------------------
+ begin : 2001-09-03
+ copyright : 2001 by CrossWire Bible Society
+ ***************************************************************************/
+
+/***************************************************************************
+ * *
+ * This program is free software; you can redistribute it and/or modify *
+ * it under the terms of the GNU General Public License as published by *
+ * the Free Software Foundation; either version 2 of the License, or *
+ * (at your option) any later version. *
+ * *
+ ***************************************************************************/
+
+#include <stdlib.h>
+#include <string.h>
+#include <thmlhtmlhref.h>
+#include <swmodule.h>
+
+
+ThMLHTMLHREF::ThMLHTMLHREF() {
+ setTokenStart("<");
+ setTokenEnd(">");
+/*
+ setEscapeStart("&");
+ setEscapeEnd(";");
+
+ setEscapeStringCaseSensitive(true);
+
+ addEscapeStringSubstitute("nbsp", " ");
+ addEscapeStringSubstitute("quot", "\"");
+ addEscapeStringSubstitute("amp", "&");
+ addEscapeStringSubstitute("lt", "<");
+ addEscapeStringSubstitute("gt", ">");
+ addEscapeStringSubstitute("brvbar", "|");
+ addEscapeStringSubstitute("sect", "§");
+ addEscapeStringSubstitute("copy", "©");
+ addEscapeStringSubstitute("laquo", "«");
+ addEscapeStringSubstitute("reg", "®");
+ addEscapeStringSubstitute("acute", "´");
+ addEscapeStringSubstitute("para", "¶");
+ addEscapeStringSubstitute("raquo", "»");
+
+ addEscapeStringSubstitute("Aacute", "Á");
+ addEscapeStringSubstitute("Agrave", "À");
+ addEscapeStringSubstitute("Acirc", "Â");
+ addEscapeStringSubstitute("Auml", "Ä");
+ addEscapeStringSubstitute("Atilde", "Ã");
+ addEscapeStringSubstitute("Aring", "Å");
+ addEscapeStringSubstitute("aacute", "á");
+ addEscapeStringSubstitute("agrave", "à");
+ addEscapeStringSubstitute("acirc", "â");
+ addEscapeStringSubstitute("auml", "ä");
+ addEscapeStringSubstitute("atilde", "ã");
+ addEscapeStringSubstitute("aring", "å");
+ addEscapeStringSubstitute("Eacute", "É");
+ addEscapeStringSubstitute("Egrave", "È");
+ addEscapeStringSubstitute("Ecirc", "Ê");
+ addEscapeStringSubstitute("Euml", "Ë");
+ addEscapeStringSubstitute("eacute", "é");
+ addEscapeStringSubstitute("egrave", "è");
+ addEscapeStringSubstitute("ecirc", "ê");
+ addEscapeStringSubstitute("euml", "ë");
+ addEscapeStringSubstitute("Iacute", "Í");
+ addEscapeStringSubstitute("Igrave", "Ì");
+ addEscapeStringSubstitute("Icirc", "Î");
+ addEscapeStringSubstitute("Iuml", "Ï");
+ addEscapeStringSubstitute("iacute", "í");
+ addEscapeStringSubstitute("igrave", "ì");
+ addEscapeStringSubstitute("icirc", "î");
+ addEscapeStringSubstitute("iuml", "ï");
+ addEscapeStringSubstitute("Oacute", "Ó");
+ addEscapeStringSubstitute("Ograve", "Ò");
+ addEscapeStringSubstitute("Ocirc", "Ô");
+ addEscapeStringSubstitute("Ouml", "Ö");
+ addEscapeStringSubstitute("Otilde", "Õ");
+ addEscapeStringSubstitute("oacute", "ó");
+ addEscapeStringSubstitute("ograve", "ò");
+ addEscapeStringSubstitute("ocirc", "ô");
+ addEscapeStringSubstitute("ouml", "ö");
+ addEscapeStringSubstitute("otilde", "õ");
+ addEscapeStringSubstitute("Uacute", "Ú");
+ addEscapeStringSubstitute("Ugrave", "Ù");
+ addEscapeStringSubstitute("Ucirc", "Û");
+ addEscapeStringSubstitute("Uuml", "Ü");
+ addEscapeStringSubstitute("uacute", "ú");
+ addEscapeStringSubstitute("ugrave", "ù");
+ addEscapeStringSubstitute("ucirc", "û");
+ addEscapeStringSubstitute("uuml", "ü");
+ addEscapeStringSubstitute("Yacute", "Ý");
+ addEscapeStringSubstitute("yacute", "ý");
+ addEscapeStringSubstitute("yuml", "ÿ");
+
+ addEscapeStringSubstitute("deg", "°");
+ addEscapeStringSubstitute("plusmn", "±");
+ addEscapeStringSubstitute("sup2", "²");
+ addEscapeStringSubstitute("sup3", "³");
+ addEscapeStringSubstitute("sup1", "¹");
+ addEscapeStringSubstitute("nbsp", "º");
+ addEscapeStringSubstitute("pound", "£");
+ addEscapeStringSubstitute("cent", "¢");
+ addEscapeStringSubstitute("frac14", "¼");
+ addEscapeStringSubstitute("frac12", "½");
+ addEscapeStringSubstitute("frac34", "¾");
+ addEscapeStringSubstitute("iquest", "¿");
+ addEscapeStringSubstitute("iexcl", "¡");
+ addEscapeStringSubstitute("ETH", "Ð");
+ addEscapeStringSubstitute("eth", "ð");
+ addEscapeStringSubstitute("THORN", "Þ");
+ addEscapeStringSubstitute("thorn", "þ");
+ addEscapeStringSubstitute("AElig", "Æ");
+ addEscapeStringSubstitute("aelig", "æ");
+ addEscapeStringSubstitute("Oslash", "Ø");
+ addEscapeStringSubstitute("curren", "¤");
+ addEscapeStringSubstitute("Ccedil", "Ç");
+ addEscapeStringSubstitute("ccedil", "ç");
+ addEscapeStringSubstitute("szlig", "ß");
+ addEscapeStringSubstitute("Ntilde", "Ñ");
+ addEscapeStringSubstitute("ntilde", "ñ");
+ addEscapeStringSubstitute("yen", "¥");
+ addEscapeStringSubstitute("not", "¬");
+ addEscapeStringSubstitute("ordf", "ª");
+ addEscapeStringSubstitute("uml", "¨");
+ addEscapeStringSubstitute("shy", "­");
+ addEscapeStringSubstitute("macr", "¯");
+*/
+ setTokenCaseSensitive(true);
+
+ addTokenSubstitute("note", " <font color=\"#800000\"><small>(");
+ addTokenSubstitute("/note", ")</small></font> ");
+ addTokenSubstitute("/scripture", "</i> ");
+}
+
+
+bool ThMLHTMLHREF::handleToken(char **buf, const char *token, DualStringMap &userData) {
+ const char *tok;
+ if (!substituteToken(buf, token)) {
+ // manually process if it wasn't a simple substitution
+ if (!strncmp(token, "sync ", 5)) {
+ pushString(buf, "<a href=\"");
+ for (tok = token + 5; *(tok+1); tok++)
+ if(*tok != '\"')
+ *(*buf)++ = *tok;
+ *(*buf)++ = '\"';
+ *(*buf)++ = '>';
+
+ //scan for value and add it to the buffer
+ for (tok = token + 5; *tok; tok++) {
+ if (!strncmp(tok, "value=\"", 7)) {
+ tok += 7;
+ for (;*tok != '\"'; tok++)
+ *(*buf)++ = *tok;
+ break;
+ }
+ }
+ pushString(buf, "</a>");
+ }
+
+ else if (!strncmp(token, "scripture ", 10)) {
+ userData["inscriptRef"] = "true";
+ pushString(buf, "<i>");
+ }
+
+ else if (!strncmp(token, "scripRef p", 10) || !strncmp(token, "scripRef v", 10)) {
+ userData["inscriptRef"] = "true";
+ pushString(buf, "<a href=\"");
+ for (const char *tok = token + 9; *(tok+1); tok++)
+ if(*tok != '\"')
+ *(*buf)++ = *tok;
+ *(*buf)++ = '\"';
+ *(*buf)++ = '>';
+ }
+
+ // we're starting a scripRef like "<scripRef>John 3:16</scripRef>"
+ else if (!strcmp(token, "scripRef")) {
+ userData["inscriptRef"] = "false";
+ // let's stop text from going to output
+ userData["suspendTextPassThru"] = "true";
+ }
+
+ // we've ended a scripRef
+ else if (!strcmp(token, "/scripRef")) {
+ if (userData["inscriptRef"] == "true") { // like "<scripRef passage="John 3:16">John 3:16</scripRef>"
+ userData["inscriptRef"] = "false";
+ pushString(buf, "</a>");
+ }
+
+ else { // like "<scripRef>John 3:16</scripRef>"
+ pushString(buf, "<a href=\"passage=");
+ //char *strbuf = (char *)userData["lastTextNode"].c_str();
+ pushString(buf, userData["lastTextNode"].c_str());
+ *(*buf)++ = '\"';
+ *(*buf)++ = '>';
+ pushString(buf, userData["lastTextNode"].c_str());
+ // let's let text resume to output again
+ userData["suspendTextPassThru"] = "false";
+ pushString(buf, "</a>");
+ }
+ }
+
+ else if (!strncmp(token, "div class=\"sechead\"", 19)) {
+ userData["SecHead"] = "true";
+ pushString(buf, "<br /><b><i>");
+ }
+ else if (!strncmp(token, "div class=\"title\"", 19)) {
+ userData["SecHead"] = "true";
+ pushString(buf, "<br /><b><i>");
+ }
+ else if (!strncmp(token, "/div", 4)) {
+ if (userData["SecHead"] == "true") {
+ pushString(buf, "</i></b><br />");
+ userData["SecHead"] = "false";
+ }
+ }
+
+ else if (!strncmp(token, "sync type=\"Strongs\" value=\"T", 28)) {
+ pushString(buf, "<a href=\"");
+ for (tok = token + 5; *(tok+1); tok++)
+ if(*tok != '\"')
+ *(*buf)++ = *tok;
+ *(*buf)++ = '\"';
+ *(*buf)++ = '>';
+ for (tok = token + 29; *(tok+2); tok++)
+ if(*tok != '\"')
+ *(*buf)++ = *tok;
+ pushString(buf, "</a>");
+ }
+ else if (!strncmp(token, "img ", 4)) {
+ const char *src = strstr(token, "src");
+ if (!src) // assert we have a src attribute
+ return false;
+
+ *(*buf)++ = '<';
+ for (const char *c = token; *c; c++) {
+ if (c == src) {
+ for (;((*c) && (*c != '"')); c++)
+ *(*buf)++ = *c;
+
+ if (!*c) { c--; continue; }
+
+ *(*buf)++ = '"';
+ if (*(c+1) == '/') {
+ pushString(buf, "file:");
+ pushString(buf, module->getConfigEntry("AbsoluteDataPath"));
+ if (*((*buf)-1) == '/')
+ c++; // skip '/'
+ }
+ continue;
+ }
+ *(*buf)++ = *c;
+ }
+ *(*buf)++ = '>';
+ }
+ else if (!strncmp(token, "note", 4)) {
+ pushString(buf, " <small><font color=\"#800000\">(");
+ }
+ else {
+ *(*buf)++ = '<';
+ for (const char *tok = token; *tok; tok++)
+ *(*buf)++ = *tok;
+ *(*buf)++ = '>';
+ //return false; // we still didn't handle token
+ }
+ }
+ return true;
+}
+
diff --git a/src/modules/filters/thmllemma.cpp b/src/modules/filters/thmllemma.cpp
new file mode 100644
index 0000000..33856db
--- /dev/null
+++ b/src/modules/filters/thmllemma.cpp
@@ -0,0 +1,97 @@
+/******************************************************************************
+ *
+ * thmllemma - SWFilter decendant to hide or show lemmas
+ * in a ThML module.
+ */
+
+
+#include <stdlib.h>
+#include <string.h>
+#include <thmllemma.h>
+#ifndef __GNUC__
+#else
+#include <unixstr.h>
+#endif
+
+
+const char ThMLLemma::on[] = "On";
+const char ThMLLemma::off[] = "Off";
+const char ThMLLemma::optName[] = "Lemmas";
+const char ThMLLemma::optTip[] = "Toggles Lemmas On and Off if they exist";
+
+ThMLLemma::ThMLLemma() {
+ option = false;
+ options.push_back(on);
+ options.push_back(off);
+}
+
+
+ThMLLemma::~ThMLLemma() {
+}
+
+void ThMLLemma::setOptionValue(const char *ival)
+{
+ option = (!stricmp(ival, on));
+}
+
+const char *ThMLLemma::getOptionValue()
+{
+ return (option) ? on:off;
+}
+
+char ThMLLemma::ProcessText(char *text, int maxlen, const SWKey *key, const SWModule *module)
+{
+ if (!option) { // if we don't want lemmas
+ char *to, *from, token[2048]; // cheese. Fix.
+ int tokpos = 0;
+ bool intoken = false;
+ int len;
+ bool lastspace = false;
+
+ len = strlen(text) + 1; // shift string to right of buffer
+ if (len < maxlen) {
+ memmove(&text[maxlen - len], text, len);
+ from = &text[maxlen - len];
+ }
+ else from = text; // -------------------------------
+
+ for (to = text; *from; from++) {
+ if (*from == '<') {
+ intoken = true;
+ tokpos = 0;
+ token[0] = 0;
+ token[1] = 0;
+ token[2] = 0;
+ continue;
+ }
+ if (*from == '>') { // process tokens
+ intoken = false;
+ if (!strnicmp(token, "sync type=\"lemma\" ", 18)) { // Lemma
+ if ((from[1] == ' ') || (from[1] == ',') || (from[1] == ';') || (from[1] == '.') || (from[1] == '?') || (from[1] == '!') || (from[1] == ')') || (from[1] == '\'') || (from[1] == '\"')) {
+ if (lastspace)
+ to--;
+ }
+ continue;
+ }
+ // if not a lemma token, keep token in text
+ *to++ = '<';
+ for (char *tok = token; *tok; tok++)
+ *to++ = *tok;
+ *to++ = '>';
+ continue;
+ }
+ if (intoken) {
+ if (tokpos < 2045)
+ token[tokpos++] = *from;
+ token[tokpos+2] = 0;
+ }
+ else {
+ *to++ = *from;
+ lastspace = (*from == ' ');
+ }
+ }
+ *to++ = 0;
+ *to = 0;
+ }
+ return 0;
+}
diff --git a/src/modules/filters/thmlmorph.cpp b/src/modules/filters/thmlmorph.cpp
new file mode 100644
index 0000000..f95bede
--- /dev/null
+++ b/src/modules/filters/thmlmorph.cpp
@@ -0,0 +1,98 @@
+/******************************************************************************
+ *
+ * thmlmorph - SWFilter decendant to hide or show morph tags
+ * in a ThML module.
+ */
+
+
+#include <stdlib.h>
+#include <string.h>
+#include <thmlmorph.h>
+#ifndef __GNUC__
+#else
+#include <unixstr.h>
+#endif
+
+
+const char ThMLMorph::on[] = "On";
+const char ThMLMorph::off[] = "Off";
+const char ThMLMorph::optName[] = "Morphological Tags";
+const char ThMLMorph::optTip[] = "Toggles Morphological Tags On and Off if they exist";
+
+
+ThMLMorph::ThMLMorph() {
+ option = false;
+ options.push_back(on);
+ options.push_back(off);
+}
+
+
+ThMLMorph::~ThMLMorph() {
+}
+
+void ThMLMorph::setOptionValue(const char *ival)
+{
+ option = (!stricmp(ival, on));
+}
+
+const char *ThMLMorph::getOptionValue()
+{
+ return (option) ? on:off;
+}
+
+char ThMLMorph::ProcessText(char *text, int maxlen, const SWKey *key, const SWModule *module)
+{
+ if (!option) { // if we don't want morph tags
+ char *to, *from, token[2048]; // cheese. Fix.
+ int tokpos = 0;
+ bool intoken = false;
+ int len;
+ bool lastspace = false;
+
+ len = strlen(text) + 1; // shift string to right of buffer
+ if (len < maxlen) {
+ memmove(&text[maxlen - len], text, len);
+ from = &text[maxlen - len];
+ }
+ else from = text; // -------------------------------
+
+ for (to = text; *from; from++) {
+ if (*from == '<') {
+ intoken = true;
+ tokpos = 0;
+ token[0] = 0;
+ token[1] = 0;
+ token[2] = 0;
+ continue;
+ }
+ if (*from == '>') { // process tokens
+ intoken = false;
+ if (!strnicmp(token, "sync type=\"morph\" ", 18)) { // Morph
+ if ((from[1] == ' ') || (from[1] == ',') || (from[1] == ';') || (from[1] == '.') || (from[1] == '?') || (from[1] == '!') || (from[1] == ')') || (from[1] == '\'') || (from[1] == '\"')) {
+ if (lastspace)
+ to--;
+ }
+ continue;
+ }
+ // if not a morph tag token, keep token in text
+ *to++ = '<';
+ for (char *tok = token; *tok; tok++)
+ *to++ = *tok;
+ *to++ = '>';
+ continue;
+ }
+ if (intoken) {
+ if (tokpos < 2045)
+ token[tokpos++] = *from;
+ token[tokpos+2] = 0;
+ }
+ else {
+ *to++ = *from;
+ lastspace = (*from == ' ');
+ }
+ }
+ *to++ = 0;
+ *to = 0;
+ }
+ return 0;
+}
diff --git a/src/modules/filters/thmlolb.cpp b/src/modules/filters/thmlolb.cpp
new file mode 100644
index 0000000..2b31fab
--- /dev/null
+++ b/src/modules/filters/thmlolb.cpp
@@ -0,0 +1,243 @@
+/***************************************************************************
+ thmlolb.cpp - ThML to OLB filter
+ -------------------
+ begin : 2001-05-10
+ copyright : 2001 by CrossWire Bible Society
+ ***************************************************************************/
+
+/***************************************************************************
+ * *
+ * This program is free software; you can redistribute it and/or modify *
+ * it under the terms of the GNU General Public License as published by *
+ * the Free Software Foundation; either version 2 of the License, or *
+ * (at your option) any later version. *
+ * *
+ ***************************************************************************/
+
+#include <stdlib.h>
+#include <string.h>
+#include <thmlolb.h>
+
+
+ThMLOLB::ThMLOLB()
+{
+}
+
+
+char ThMLOLB::ProcessText(char *text, int maxlen)
+{
+ char *to, *from, token[2048];
+ int tokpos = 0;
+ bool intoken = false;
+ int len;
+ bool ampersand = false;
+ int i;
+
+ len = strlen(text) + 1; // shift string to right of buffer
+ if (len < maxlen) {
+ memmove(&text[maxlen - len], text, len);
+ from = &text[maxlen - len];
+ }
+ else from = text; // -------------------------------
+ for (to = text; *from; from++)
+ {
+ if (*from == '<') {
+ intoken = true;
+ tokpos = 0;
+ memset(token, 0, 2048);
+ ampersand = false;
+ continue;
+ }
+ else if (*from == '&') {
+ intoken = true;
+ tokpos = 0;
+ memset(token, 0, 2048);
+ ampersand = true;
+ continue;
+ }
+ if (*from == ';' && ampersand) {
+ intoken = false;
+
+ if (!strncmp("nbsp", token, 4)) *to++ = ' ';
+ else if (!strncmp("quot", token, 4)) *to++ = '"';
+ else if (!strncmp("amp", token, 3)) *to++ = '&';
+ else if (!strncmp("lt", token, 2)) *to++ = '<';
+ else if (!strncmp("gt", token, 2)) *to++ = '>';
+ else if (!strncmp("brvbar", token, 6)) *to++ = '|';
+ else if (!strncmp("sect", token, 4)) *to++ = '§';
+ else if (!strncmp("copy", token, 4)) *to++ = '©';
+ else if (!strncmp("laquo", token, 5)) *to++ = '«';
+ else if (!strncmp("reg", token, 3)) *to++ = '®';
+ else if (!strncmp("acute", token, 5)) *to++ = '´';
+ else if (!strncmp("para", token, 4)) *to++ = '¶';
+ else if (!strncmp("raquo", token, 5)) *to++ = '»';
+
+ else if (!strncmp("Aacute", token, 6)) *to++ = 'Á';
+ else if (!strncmp("Agrave", token, 6)) *to++ = 'À';
+ else if (!strncmp("Acirc", token, 5)) *to++ = 'Â';
+ else if (!strncmp("Auml", token, 4)) *to++ = 'Ä';
+ else if (!strncmp("Atilde", token, 6)) *to++ = 'Ã';
+ else if (!strncmp("Aring", token, 5)) *to++ = 'Å';
+ else if (!strncmp("aacute", token, 6)) *to++ = 'á';
+ else if (!strncmp("agrave", token, 6)) *to++ = 'à';
+ else if (!strncmp("acirc", token, 5)) *to++ = 'â';
+ else if (!strncmp("auml", token, 4)) *to++ = 'ä';
+ else if (!strncmp("atilde", token, 6)) *to++ = 'ã';
+ else if (!strncmp("aring", token, 5)) *to++ = 'å';
+ else if (!strncmp("Eacute", token, 6)) *to++ = 'É';
+ else if (!strncmp("Egrave", token, 6)) *to++ = 'È';
+ else if (!strncmp("Ecirc", token, 5)) *to++ = 'Ê';
+ else if (!strncmp("Euml", token, 4)) *to++ = 'Ë';
+ else if (!strncmp("eacute", token, 6)) *to++ = 'é';
+ else if (!strncmp("egrave", token, 6)) *to++ = 'è';
+ else if (!strncmp("ecirc", token, 5)) *to++ = 'ê';
+ else if (!strncmp("euml", token, 4)) *to++ = 'ë';
+ else if (!strncmp("Iacute", token, 6)) *to++ = 'Í';
+ else if (!strncmp("Igrave", token, 6)) *to++ = 'Ì';
+ else if (!strncmp("Icirc", token, 5)) *to++ = 'Î';
+ else if (!strncmp("Iuml", token, 4)) *to++ = 'Ï';
+ else if (!strncmp("iacute", token, 6)) *to++ = 'í';
+ else if (!strncmp("igrave", token, 6)) *to++ = 'ì';
+ else if (!strncmp("icirc", token, 5)) *to++ = 'î';
+ else if (!strncmp("iuml", token, 4)) *to++ = 'ï';
+ else if (!strncmp("Oacute", token, 6)) *to++ = 'Ó';
+ else if (!strncmp("Ograve", token, 6)) *to++ = 'Ò';
+ else if (!strncmp("Ocirc", token, 5)) *to++ = 'Ô';
+ else if (!strncmp("Ouml", token, 4)) *to++ = 'Ö';
+ else if (!strncmp("Otilde", token, 6)) *to++ = 'Õ';
+ else if (!strncmp("oacute", token, 6)) *to++ = 'ó';
+ else if (!strncmp("ograve", token, 6)) *to++ = 'ò';
+ else if (!strncmp("ocirc", token, 5)) *to++ = 'ô';
+ else if (!strncmp("ouml", token, 4)) *to++ = 'ö';
+ else if (!strncmp("otilde", token, 6)) *to++ = 'õ';
+ else if (!strncmp("Uacute", token, 6)) *to++ = 'Ú';
+ else if (!strncmp("Ugrave", token, 6)) *to++ = 'Ù';
+ else if (!strncmp("Ucirc", token, 5)) *to++ = 'Û';
+ else if (!strncmp("Uuml", token, 4)) *to++ = 'Ü';
+ else if (!strncmp("uacute", token, 6)) *to++ = 'ú';
+ else if (!strncmp("ugrave", token, 6)) *to++ = 'ù';
+ else if (!strncmp("ucirc", token, 5)) *to++ = 'û';
+ else if (!strncmp("uuml", token, 4)) *to++ = 'ü';
+ else if (!strncmp("Yacute", token, 6)) *to++ = 'Ý';
+ else if (!strncmp("yacute", token, 6)) *to++ = 'ý';
+ else if (!strncmp("yuml", token, 4)) *to++ = 'ÿ';
+
+ else if (!strncmp("deg", token, 3)) *to++ = '°';
+ else if (!strncmp("plusmn", token, 6)) *to++ = '±';
+ else if (!strncmp("sup2", token, 4)) *to++ = '²';
+ else if (!strncmp("sup3", token, 4)) *to++ = '³';
+ else if (!strncmp("sup1", token, 4)) *to++ = '¹';
+ else if (!strncmp("nbsp", token, 4)) *to++ = 'º';
+ else if (!strncmp("pound", token, 5)) *to++ = '£';
+ else if (!strncmp("cent", token, 4)) *to++ = '¢';
+ else if (!strncmp("frac14", token, 6)) *to++ = '¼';
+ else if (!strncmp("frac12", token, 6)) *to++ = '½';
+ else if (!strncmp("frac34", token, 6)) *to++ = '¾';
+ else if (!strncmp("iquest", token, 6)) *to++ = '¿';
+ else if (!strncmp("iexcl", token, 5)) *to++ = '¡';
+ else if (!strncmp("ETH", token, 3)) *to++ = 'Ð';
+ else if (!strncmp("eth", token, 3)) *to++ = 'ð';
+ else if (!strncmp("THORN", token, 5)) *to++ = 'Þ';
+ else if (!strncmp("thorn", token, 5)) *to++ = 'þ';
+ else if (!strncmp("AElig", token, 5)) *to++ = 'Æ';
+ else if (!strncmp("aelig", token, 5)) *to++ = 'æ';
+ else if (!strncmp("Oslash", token, 6)) *to++ = 'Ø';
+ else if (!strncmp("curren", token, 6)) *to++ = '¤';
+ else if (!strncmp("Ccedil", token, 6)) *to++ = 'Ç';
+ else if (!strncmp("ccedil", token, 6)) *to++ = 'ç';
+ else if (!strncmp("szlig", token, 5)) *to++ = 'ß';
+ else if (!strncmp("Ntilde", token, 6)) *to++ = 'Ñ';
+ else if (!strncmp("ntilde", token, 6)) *to++ = 'ñ';
+ else if (!strncmp("yen", token, 3)) *to++ = '¥';
+ else if (!strncmp("not", token, 3)) *to++ = '¬';
+ else if (!strncmp("ordf", token, 4)) *to++ = 'ª';
+ else if (!strncmp("uml", token, 3)) *to++ = '¨';
+ else if (!strncmp("shy", token, 3)) *to++ = '­';
+ else if (!strncmp("macr", token, 4)) *to++ = '¯';
+ continue;
+
+ }
+ else if (*from == '>' && !ampersand)
+ {
+ intoken = false;
+ // process desired tokens
+ if (!strncmp(token, "sync type=\"Strongs\" value=\"G", 27)) {
+ *to++ = '<';
+ for (i = 28; token[i] != '\"'; i++)
+ *to++ = token[i];
+ *to++ = '>';
+ continue;
+ }
+ else if (!strncmp(token, "sync type=\"Strongs\" value=\"H", 27)) {
+ *to++ = '<';
+ for (i = 28; token[i] != '\"'; i++)
+ *to++ = token[i];
+ *to++ = '>';
+ continue;
+ }
+ else if (!strncmp(token, "scripRef", 8)) {
+ *to++ = '#';
+ continue;
+ }
+ else if (!strncmp(token, "/scripRef", 9)) {
+ *to++ = ' ';
+ continue;
+ }
+ else if (!strncmp(token, "note ", 5)) {
+ *to++ = '{';
+ continue;
+ }
+ else if (!strncmp(token, "/note", 5)) {
+ *to++ = '}';
+ continue;
+ }
+ else if (!strnicmp(token, "font", 4)) {
+ *to++ = '\\';
+ *to++ = '\\';
+ continue;
+ }
+ else if (!strnicmp(token, "/font", 5)) {
+ *to++ = '\\';
+ *to++ = '\\';
+ continue;
+ }
+ else switch(*token) {
+ case 'I': // font tags
+ case 'i':
+ *to++ = '\\';
+ *to++ = '@';
+ continue;
+ case 'B': // bold start
+ case 'b':
+ *to++ = '\\';
+ *to++ = '$';
+ continue;
+ case '/':
+ switch(token[1]) {
+ case 'I':
+ case 'i': // italic end
+ *to++ = '\\';
+ *to++ = '@';
+ continue;
+ case 'B': // bold start
+ case 'b':
+ *to++ = '\\';
+ *to++ = '$';
+ continue;
+ }
+ }
+ continue;
+ }
+ if (intoken) {
+ if (tokpos < 2047)
+ token[tokpos++] = *from;
+ }
+ else *to++ = *from;
+ }
+ *to++ = 0;
+ *to = 0;
+ return 0;
+}
+
+
+
diff --git a/src/modules/filters/thmlplain.cpp b/src/modules/filters/thmlplain.cpp
new file mode 100644
index 0000000..5609f16
--- /dev/null
+++ b/src/modules/filters/thmlplain.cpp
@@ -0,0 +1,201 @@
+/******************************************************************************
+ *
+ * thmlplain - SWFilter decendant to strip out all ThML tags or convert to
+ * ASCII rendered symbols.
+ */
+
+
+#include <stdlib.h>
+#include <string.h>
+#include <thmlplain.h>
+
+
+ThMLPlain::ThMLPlain() {
+}
+
+
+char ThMLPlain::ProcessText(char *text, int maxlen)
+{
+ char *to, *from, token[2048];
+ int tokpos = 0;
+ bool intoken = false;
+ int len;
+ bool ampersand = false;
+
+ len = strlen(text) + 1; // shift string to right of buffer
+ if (len < maxlen) {
+ memmove(&text[maxlen - len], text, len);
+ from = &text[maxlen - len];
+ }
+ else from = text; // -------------------------------
+
+ for (to = text; *from; from++) {
+ if (*from == 10 || *from == 13)
+ from++;
+ if (*from == '<') {
+ intoken = true;
+ tokpos = 0;
+ token[0] = 0;
+ token[1] = 0;
+ token[2] = 0;
+ ampersand = false;
+ continue;
+ }
+ else if (*from == '&') {
+ intoken = true;
+ tokpos = 0;
+ token[0] = 0;
+ token[1] = 0;
+ token[2] = 0;
+ ampersand = true;
+ continue;
+ }
+ if (*from == ';' && ampersand) {
+ intoken = false;
+
+ if (!strncmp("nbsp", token, 4)) *to++ = ' ';
+ else if (!strncmp("quot", token, 4)) *to++ = '"';
+ else if (!strncmp("amp", token, 3)) *to++ = '&';
+ else if (!strncmp("lt", token, 2)) *to++ = '<';
+ else if (!strncmp("gt", token, 2)) *to++ = '>';
+ else if (!strncmp("brvbar", token, 6)) *to++ = '|';
+ else if (!strncmp("sect", token, 4)) *to++ = '§';
+ else if (!strncmp("copy", token, 4)) *to++ = '©';
+ else if (!strncmp("laquo", token, 5)) *to++ = '«';
+ else if (!strncmp("reg", token, 3)) *to++ = '®';
+ else if (!strncmp("acute", token, 5)) *to++ = '´';
+ else if (!strncmp("para", token, 4)) *to++ = '¶';
+ else if (!strncmp("raquo", token, 5)) *to++ = '»';
+
+ else if (!strncmp("Aacute", token, 6)) *to++ = 'Á';
+ else if (!strncmp("Agrave", token, 6)) *to++ = 'À';
+ else if (!strncmp("Acirc", token, 5)) *to++ = 'Â';
+ else if (!strncmp("Auml", token, 4)) *to++ = 'Ä';
+ else if (!strncmp("Atilde", token, 6)) *to++ = 'Ã';
+ else if (!strncmp("Aring", token, 5)) *to++ = 'Å';
+ else if (!strncmp("aacute", token, 6)) *to++ = 'á';
+ else if (!strncmp("agrave", token, 6)) *to++ = 'à';
+ else if (!strncmp("acirc", token, 5)) *to++ = 'â';
+ else if (!strncmp("auml", token, 4)) *to++ = 'ä';
+ else if (!strncmp("atilde", token, 6)) *to++ = 'ã';
+ else if (!strncmp("aring", token, 5)) *to++ = 'å';
+ else if (!strncmp("Eacute", token, 6)) *to++ = 'É';
+ else if (!strncmp("Egrave", token, 6)) *to++ = 'È';
+ else if (!strncmp("Ecirc", token, 5)) *to++ = 'Ê';
+ else if (!strncmp("Euml", token, 4)) *to++ = 'Ë';
+ else if (!strncmp("eacute", token, 6)) *to++ = 'é';
+ else if (!strncmp("egrave", token, 6)) *to++ = 'è';
+ else if (!strncmp("ecirc", token, 5)) *to++ = 'ê';
+ else if (!strncmp("euml", token, 4)) *to++ = 'ë';
+ else if (!strncmp("Iacute", token, 6)) *to++ = 'Í';
+ else if (!strncmp("Igrave", token, 6)) *to++ = 'Ì';
+ else if (!strncmp("Icirc", token, 5)) *to++ = 'Î';
+ else if (!strncmp("Iuml", token, 4)) *to++ = 'Ï';
+ else if (!strncmp("iacute", token, 6)) *to++ = 'í';
+ else if (!strncmp("igrave", token, 6)) *to++ = 'ì';
+ else if (!strncmp("icirc", token, 5)) *to++ = 'î';
+ else if (!strncmp("iuml", token, 4)) *to++ = 'ï';
+ else if (!strncmp("Oacute", token, 6)) *to++ = 'Ó';
+ else if (!strncmp("Ograve", token, 6)) *to++ = 'Ò';
+ else if (!strncmp("Ocirc", token, 5)) *to++ = 'Ô';
+ else if (!strncmp("Ouml", token, 4)) *to++ = 'Ö';
+ else if (!strncmp("Otilde", token, 6)) *to++ = 'Õ';
+ else if (!strncmp("oacute", token, 6)) *to++ = 'ó';
+ else if (!strncmp("ograve", token, 6)) *to++ = 'ò';
+ else if (!strncmp("ocirc", token, 5)) *to++ = 'ô';
+ else if (!strncmp("ouml", token, 4)) *to++ = 'ö';
+ else if (!strncmp("otilde", token, 6)) *to++ = 'õ';
+ else if (!strncmp("Uacute", token, 6)) *to++ = 'Ú';
+ else if (!strncmp("Ugrave", token, 6)) *to++ = 'Ù';
+ else if (!strncmp("Ucirc", token, 5)) *to++ = 'Û';
+ else if (!strncmp("Uuml", token, 4)) *to++ = 'Ü';
+ else if (!strncmp("uacute", token, 6)) *to++ = 'ú';
+ else if (!strncmp("ugrave", token, 6)) *to++ = 'ù';
+ else if (!strncmp("ucirc", token, 5)) *to++ = 'û';
+ else if (!strncmp("uuml", token, 4)) *to++ = 'ü';
+ else if (!strncmp("Yacute", token, 6)) *to++ = 'Ý';
+ else if (!strncmp("yacute", token, 6)) *to++ = 'ý';
+ else if (!strncmp("yuml", token, 4)) *to++ = 'ÿ';
+
+ else if (!strncmp("deg", token, 3)) *to++ = '°';
+ else if (!strncmp("plusmn", token, 6)) *to++ = '±';
+ else if (!strncmp("sup2", token, 4)) *to++ = '²';
+ else if (!strncmp("sup3", token, 4)) *to++ = '³';
+ else if (!strncmp("sup1", token, 4)) *to++ = '¹';
+ else if (!strncmp("nbsp", token, 4)) *to++ = 'º';
+ else if (!strncmp("pound", token, 5)) *to++ = '£';
+ else if (!strncmp("cent", token, 4)) *to++ = '¢';
+ else if (!strncmp("frac14", token, 6)) *to++ = '¼';
+ else if (!strncmp("frac12", token, 6)) *to++ = '½';
+ else if (!strncmp("frac34", token, 6)) *to++ = '¾';
+ else if (!strncmp("iquest", token, 6)) *to++ = '¿';
+ else if (!strncmp("iexcl", token, 5)) *to++ = '¡';
+ else if (!strncmp("ETH", token, 3)) *to++ = 'Ð';
+ else if (!strncmp("eth", token, 3)) *to++ = 'ð';
+ else if (!strncmp("THORN", token, 5)) *to++ = 'Þ';
+ else if (!strncmp("thorn", token, 5)) *to++ = 'þ';
+ else if (!strncmp("AElig", token, 5)) *to++ = 'Æ';
+ else if (!strncmp("aelig", token, 5)) *to++ = 'æ';
+ else if (!strncmp("Oslash", token, 6)) *to++ = 'Ø';
+ else if (!strncmp("curren", token, 6)) *to++ = '¤';
+ else if (!strncmp("Ccedil", token, 6)) *to++ = 'Ç';
+ else if (!strncmp("ccedil", token, 6)) *to++ = 'ç';
+ else if (!strncmp("szlig", token, 5)) *to++ = 'ß';
+ else if (!strncmp("Ntilde", token, 6)) *to++ = 'Ñ';
+ else if (!strncmp("ntilde", token, 6)) *to++ = 'ñ';
+ else if (!strncmp("yen", token, 3)) *to++ = '¥';
+ else if (!strncmp("not", token, 3)) *to++ = '¬';
+ else if (!strncmp("ordf", token, 4)) *to++ = 'ª';
+ else if (!strncmp("uml", token, 3)) *to++ = '¨';
+ else if (!strncmp("shy", token, 3)) *to++ = '­';
+ else if (!strncmp("macr", token, 4)) *to++ = '¯';
+ continue;
+
+ }
+ else if (*from == '>' && !ampersand) {
+ intoken = false;
+ // process desired tokens
+ if (!strncmp(token, "sync type=\"Strongs\" value=\"", 27)) {
+ *to++ = ' ';
+ *to++ = '<';
+ for (unsigned int i = 27; token[i] != '\"'; i++)
+ *to++ = token[i];
+ *to++ = '>';
+ continue;
+ }
+ if (!strncmp(token, "sync type=\"morph\" value=\"", 25)) {
+ *to++ = ' ';
+ *to++ = '(';
+ for (unsigned int i = 25; token[i] != '\"'; i++)
+ *to++ = token[i];
+ *to++ = ')';
+ continue;
+ }
+ if (!strncmp("note", token, 4)) {
+ *to++ = ' ';
+ *to++ = '(';
+ }
+ else if (!strncmp("br", token, 2))
+ *to++ = '\n';
+ else if (!strncmp("/p", token, 2))
+ *to++ = '\n';
+ else if (!strncmp("/note", token, 5)) {
+ *to++ = ')';
+ *to++ = ' ';
+ }
+ continue;
+ }
+ if (intoken) {
+ if (tokpos < 2045)
+ token[tokpos++] = *from;
+ token[tokpos+2] = 0;
+ }
+ else *to++ = *from;
+ }
+ *to++ = 0;
+ *to = 0;
+
+ return 0;
+}
+
+
diff --git a/src/modules/filters/thmlrtf.cpp b/src/modules/filters/thmlrtf.cpp
new file mode 100644
index 0000000..76289ec
--- /dev/null
+++ b/src/modules/filters/thmlrtf.cpp
@@ -0,0 +1,219 @@
+/***************************************************************************
+ thmlrtf.cpp - ThML to RTF filter
+ -------------------
+ begin : 1999-10-27
+ copyright : 2001 by CrossWire Bible Society
+ ***************************************************************************/
+
+/***************************************************************************
+ * *
+ * This program is free software; you can redistribute it and/or modify *
+ * it under the terms of the GNU General Public License as published by *
+ * the Free Software Foundation; either version 2 of the License, or *
+ * (at your option) any later version. *
+ * *
+ ***************************************************************************/
+
+#include <stdlib.h>
+#include <string.h>
+#include <thmlrtf.h>
+
+
+ThMLRTF::ThMLRTF()
+{
+ setTokenStart("<");
+ setTokenEnd(">");
+
+ setEscapeStart("&");
+ setEscapeEnd(";");
+
+ setEscapeStringCaseSensitive(true);
+
+ addEscapeStringSubstitute("nbsp", " ");
+ addEscapeStringSubstitute("quot", "\"");
+ addEscapeStringSubstitute("amp", "&");
+ addEscapeStringSubstitute("lt", "<");
+ addEscapeStringSubstitute("gt", ">");
+ addEscapeStringSubstitute("brvbar", "|");
+ addEscapeStringSubstitute("sect", "§");
+ addEscapeStringSubstitute("copy", "©");
+ addEscapeStringSubstitute("laquo", "«");
+ addEscapeStringSubstitute("reg", "®");
+ addEscapeStringSubstitute("acute", "´");
+ addEscapeStringSubstitute("para", "¶");
+ addEscapeStringSubstitute("raquo", "»");
+
+ addEscapeStringSubstitute("Aacute", "Á");
+ addEscapeStringSubstitute("Agrave", "À");
+ addEscapeStringSubstitute("Acirc", "Â");
+ addEscapeStringSubstitute("Auml", "Ä");
+ addEscapeStringSubstitute("Atilde", "Ã");
+ addEscapeStringSubstitute("Aring", "Å");
+ addEscapeStringSubstitute("aacute", "á");
+ addEscapeStringSubstitute("agrave", "à");
+ addEscapeStringSubstitute("acirc", "â");
+ addEscapeStringSubstitute("auml", "ä");
+ addEscapeStringSubstitute("atilde", "ã");
+ addEscapeStringSubstitute("aring", "å");
+ addEscapeStringSubstitute("Eacute", "É");
+ addEscapeStringSubstitute("Egrave", "È");
+ addEscapeStringSubstitute("Ecirc", "Ê");
+ addEscapeStringSubstitute("Euml", "Ë");
+ addEscapeStringSubstitute("eacute", "é");
+ addEscapeStringSubstitute("egrave", "è");
+ addEscapeStringSubstitute("ecirc", "ê");
+ addEscapeStringSubstitute("euml", "ë");
+ addEscapeStringSubstitute("Iacute", "Í");
+ addEscapeStringSubstitute("Igrave", "Ì");
+ addEscapeStringSubstitute("Icirc", "Î");
+ addEscapeStringSubstitute("Iuml", "Ï");
+ addEscapeStringSubstitute("iacute", "í");
+ addEscapeStringSubstitute("igrave", "ì");
+ addEscapeStringSubstitute("icirc", "î");
+ addEscapeStringSubstitute("iuml", "ï");
+ addEscapeStringSubstitute("Oacute", "Ó");
+ addEscapeStringSubstitute("Ograve", "Ò");
+ addEscapeStringSubstitute("Ocirc", "Ô");
+ addEscapeStringSubstitute("Ouml", "Ö");
+ addEscapeStringSubstitute("Otilde", "Õ");
+ addEscapeStringSubstitute("oacute", "ó");
+ addEscapeStringSubstitute("ograve", "ò");
+ addEscapeStringSubstitute("ocirc", "ô");
+ addEscapeStringSubstitute("ouml", "ö");
+ addEscapeStringSubstitute("otilde", "õ");
+ addEscapeStringSubstitute("Uacute", "Ú");
+ addEscapeStringSubstitute("Ugrave", "Ù");
+ addEscapeStringSubstitute("Ucirc", "Û");
+ addEscapeStringSubstitute("Uuml", "Ü");
+ addEscapeStringSubstitute("uacute", "ú");
+ addEscapeStringSubstitute("ugrave", "ù");
+ addEscapeStringSubstitute("ucirc", "û");
+ addEscapeStringSubstitute("uuml", "ü");
+ addEscapeStringSubstitute("Yacute", "Ý");
+ addEscapeStringSubstitute("yacute", "ý");
+ addEscapeStringSubstitute("yuml", "ÿ");
+
+ addEscapeStringSubstitute("deg", "°");
+ addEscapeStringSubstitute("plusmn", "±");
+ addEscapeStringSubstitute("sup2", "²");
+ addEscapeStringSubstitute("sup3", "³");
+ addEscapeStringSubstitute("sup1", "¹");
+ addEscapeStringSubstitute("nbsp", "º");
+ addEscapeStringSubstitute("pound", "£");
+ addEscapeStringSubstitute("cent", "¢");
+ addEscapeStringSubstitute("frac14", "¼");
+ addEscapeStringSubstitute("frac12", "½");
+ addEscapeStringSubstitute("frac34", "¾");
+ addEscapeStringSubstitute("iquest", "¿");
+ addEscapeStringSubstitute("iexcl", "¡");
+ addEscapeStringSubstitute("ETH", "Ð");
+ addEscapeStringSubstitute("eth", "ð");
+ addEscapeStringSubstitute("THORN", "Þ");
+ addEscapeStringSubstitute("thorn", "þ");
+ addEscapeStringSubstitute("AElig", "Æ");
+ addEscapeStringSubstitute("aelig", "æ");
+ addEscapeStringSubstitute("Oslash", "Ø");
+ addEscapeStringSubstitute("curren", "¤");
+ addEscapeStringSubstitute("Ccedil", "Ç");
+ addEscapeStringSubstitute("ccedil", "ç");
+ addEscapeStringSubstitute("szlig", "ß");
+ addEscapeStringSubstitute("Ntilde", "Ñ");
+ addEscapeStringSubstitute("ntilde", "ñ");
+ addEscapeStringSubstitute("yen", "¥");
+ addEscapeStringSubstitute("not", "¬");
+ addEscapeStringSubstitute("ordf", "ª");
+ addEscapeStringSubstitute("uml", "¨");
+ addEscapeStringSubstitute("shy", "­");
+ addEscapeStringSubstitute("macr", "¯");
+
+ setTokenCaseSensitive(true);
+
+ addTokenSubstitute("/scripRef", "|}");
+ addTokenSubstitute("/note", ") }");
+
+ addTokenSubstitute("br", "\\line ");
+ addTokenSubstitute("br /", "\\line ");
+ addTokenSubstitute("i", "{\\i1 ");
+ addTokenSubstitute("/i", "}");
+ addTokenSubstitute("b", "{\\b1 ");
+ addTokenSubstitute("/b", "}");
+ addTokenSubstitute("p", "\\par ");
+
+ //we need uppercase forms for the moment to support a few early ThML modules that aren't XHTML compliant
+ addTokenSubstitute("BR", "\\line ");
+ addTokenSubstitute("I", "{\\i1 ");
+ addTokenSubstitute("/I", "}");
+ addTokenSubstitute("B", "{\\b1 ");
+ addTokenSubstitute("/B", "}");
+ addTokenSubstitute("P", "\\par ");
+}
+
+bool ThMLRTF::handleToken(char **buf, const char *token, DualStringMap &userData) {
+ if (!substituteToken(buf, token)) {
+ // manually process if it wasn't a simple substitution
+ if (!strncmp(token, "sync type=\"Strongs\" value=\"", 27)) {
+/* if (token[27] == 'H' || token[27] == 'G' || token[27] == 'A') {
+ pushString(buf, " {\\fs15 <");
+ for (unsigned int i = 28; token[i] != '\"'; i++)
+ *(*buf)++ = token[i];
+ pushString(buf, ">}");
+ }
+ else if (token[27] == 'T') {
+ pushString(buf, " {\\fs15 (");
+ for (unsigned int i = 28; token[i] != '\"'; i++)
+ *(*buf)++ = token[i];
+ pushString(buf, ")}");
+ }
+ }
+ else if (!strncmp(token, "sync type=\"morph\" ", 18)) {
+ pushString(buf, " {\\fs15 (");
+ for (const char *tok = token + 5; *tok; tok++) {
+ if (!strncmp(tok, "value=\"", 7)) {
+ tok += 7;
+ for (;*tok != '\"'; tok++)
+ *(*buf)++ = *tok;
+ break;
+ }
+ }
+
+ pushString(buf, ")}");
+*/ }
+ else if (!strncmp(token, "sync type=\"lemma\" value=\"", 25)) {
+ pushString(buf, "{\\fs15 (");
+ for (unsigned int i = 25; token[i] != '\"'; i++)
+ *(*buf)++ = token[i];
+ pushString(buf, ")}");
+ }
+ else if (!strncmp(token, "scripRef", 8)) {
+ pushString(buf, "{\\cf2 #");
+ }
+ else if (!strncmp(token, "div", 3)) {
+ *(*buf)++ = '{';
+ if (!strncmp(token, "div class=\"title\"", 17)) {
+ pushString(buf, "\\par\\i1\\b1 ");
+ userData["sechead"] = "true";
+ }
+ else if (!strncmp(token, "div class=\"sechead\"", 19)) {
+ pushString(buf, "\\par\\i1\\b1 ");
+ userData["sechead"] = "true";
+ }
+ }
+ else if (!strncmp(token, "/div", 4)) {
+ *(*buf)++ = '}';
+ if (userData["sechead"] == "true") {
+ pushString(buf, "\\par ");
+ userData["sechead"] == "false";
+ }
+ }
+ else if (!strncmp(token, "note", 4)) {
+ pushString(buf, " {\\i1\\fs15 (");
+ }
+
+ else {
+ return false; // we still didn't handle token
+ }
+ }
+ return true;
+}
+
+
diff --git a/src/modules/filters/thmlscripref.cpp b/src/modules/filters/thmlscripref.cpp
new file mode 100644
index 0000000..23edd6d
--- /dev/null
+++ b/src/modules/filters/thmlscripref.cpp
@@ -0,0 +1,103 @@
+/******************************************************************************
+ *
+ * thmlscripref - SWFilter decendant to hide or show scripture references
+ * in a ThML module.
+ */
+
+
+#include <stdlib.h>
+#include <string.h>
+#include <thmlscripref.h>
+#ifndef __GNUC__
+#else
+#include <unixstr.h>
+#endif
+
+
+const char ThMLScripref::on[] = "On";
+const char ThMLScripref::off[] = "Off";
+const char ThMLScripref::optName[] = "Scripture Cross-references";
+const char ThMLScripref::optTip[] = "Toggles Scripture Cross-references On and Off if they exist";
+
+
+ThMLScripref::ThMLScripref() {
+ option = false;
+ options.push_back(on);
+ options.push_back(off);
+}
+
+
+ThMLScripref::~ThMLScripref() {
+}
+
+void ThMLScripref::setOptionValue(const char *ival)
+{
+ option = (!stricmp(ival, on));
+}
+
+const char *ThMLScripref::getOptionValue()
+{
+ return (option) ? on:off;
+}
+
+char ThMLScripref::ProcessText(char *text, int maxlen, const SWKey *key, const SWModule *module)
+{
+ if (!option) { // if we don't want scriprefs
+ char *to, *from, token[2048]; // cheese. Fix.
+ int tokpos = 0;
+ bool intoken = false;
+ int len;
+ bool hide = false;
+
+ len = strlen(text) + 1; // shift string to right of buffer
+ if (len < maxlen) {
+ memmove(&text[maxlen - len], text, len);
+ from = &text[maxlen - len];
+ }
+ else from = text; // -------------------------------
+
+ for (to = text; *from; from++) {
+ if (*from == '<') {
+ intoken = true;
+ tokpos = 0;
+ token[0] = 0;
+ token[1] = 0;
+ token[2] = 0;
+ continue;
+ }
+ if (*from == '>') { // process tokens
+ intoken = false;
+ if (!strnicmp(token, "scripRef", 8)) {
+ hide = true;
+ continue;
+ }
+ else if (!strnicmp(token, "/scripRef", 9)) {
+ hide = false;
+ continue;
+ }
+
+ // if not a scripref token, keep token in text
+ if (!hide) {
+ *to++ = '<';
+ for (char *tok = token; *tok; tok++)
+ *to++ = *tok;
+ *to++ = '>';
+ }
+ continue;
+ }
+ if (intoken) {
+ if (tokpos < 2045)
+ token[tokpos++] = *from;
+ token[tokpos+2] = 0;
+ }
+ else {
+ if (!hide) {
+ *to++ = *from;
+ }
+ }
+ }
+ *to++ = 0;
+ *to = 0;
+ }
+ return 0;
+}
diff --git a/src/modules/filters/thmlstrongs.cpp b/src/modules/filters/thmlstrongs.cpp
new file mode 100644
index 0000000..8d0466c
--- /dev/null
+++ b/src/modules/filters/thmlstrongs.cpp
@@ -0,0 +1,138 @@
+/******************************************************************************
+ *
+ * thmlstrongs - SWFilter decendant to hide or show strongs number
+ * in a ThML module.
+ */
+
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <thmlstrongs.h>
+#include <swmodule.h>
+#ifndef __GNUC__
+#else
+#include <unixstr.h>
+#endif
+
+
+const char ThMLStrongs::on[] = "On";
+const char ThMLStrongs::off[] = "Off";
+const char ThMLStrongs::optName[] = "Strong's Numbers";
+const char ThMLStrongs::optTip[] = "Toggles Strong's Numbers On and Off if they exist";
+
+
+ThMLStrongs::ThMLStrongs() {
+ option = false;
+ options.push_back(on);
+ options.push_back(off);
+}
+
+
+ThMLStrongs::~ThMLStrongs() {
+}
+
+void ThMLStrongs::setOptionValue(const char *ival)
+{
+ option = (!stricmp(ival, on));
+}
+
+const char *ThMLStrongs::getOptionValue()
+{
+ return (option) ? on:off;
+}
+
+char ThMLStrongs::ProcessText(char *text, int maxlen, const SWKey *key, const SWModule *module)
+{
+ char *to, *from, token[2048]; // cheese. Fix.
+ int tokpos = 0;
+ bool intoken = false;
+ int len;
+ bool lastspace = false;
+ int word = 1;
+ char val[128];
+ char wordstr[5];
+ char *valto;
+ char *ch;
+
+ len = strlen(text) + 1; // shift string to right of buffer
+ if (len < maxlen) {
+ memmove(&text[maxlen - len], text, len);
+ from = &text[maxlen - len];
+ }
+ else from = text;
+
+ // -------------------------------
+
+ for (to = text; *from; from++) {
+ if (*from == '<') {
+ intoken = true;
+ tokpos = 0;
+ token[0] = 0;
+ token[1] = 0;
+ token[2] = 0;
+ continue;
+ }
+ if (*from == '>') { // process tokens
+ intoken = false;
+ if (!strnicmp(token, "sync type=\"Strongs\" ", 20)) { // Strongs
+ if (module->isProcessEntryAttributes()) {
+ valto = val;
+ for (unsigned int i = 27; token[i] != '\"' && i < 150; i++)
+ *valto++ = token[i];
+ *valto = 0;
+ sprintf(wordstr, "%03d", word++);
+ module->getEntryAttributes()["Word"][wordstr]["Strongs"] = val;
+ }
+
+ if (!option) { // if we don't want strongs
+ if ((from[1] == ' ') || (from[1] == ',') || (from[1] == ';') || (from[1] == '.') || (from[1] == '?') || (from[1] == '!') || (from[1] == ')') || (from[1] == '\'') || (from[1] == '\"')) {
+ if (lastspace)
+ to--;
+ }
+ continue;
+ }
+ }
+ if (module->isProcessEntryAttributes()) {
+ if (!strncmp(token, "sync type=\"morph\"", 17)) {
+ for (ch = token+17; *ch; ch++) {
+ if (!strncmp(ch, "class=\"", 7)) {
+ valto = val;
+ for (unsigned int i = 7; ch[i] != '\"' && i < 127; i++)
+ *valto++ = ch[i];
+ *valto = 0;
+ sprintf(wordstr, "%03d", word-1);
+ module->getEntryAttributes()["Word"][wordstr]["MorphClass"] = val;
+ }
+ if (!strncmp(ch, "value=\"", 7)) {
+ valto = val;
+ for (unsigned int i = 7; ch[i] != '\"' && i < 127; i++)
+ *valto++ = ch[i];
+ *valto = 0;
+ sprintf(wordstr, "%03d", word-1);
+ module->getEntryAttributes()["Word"][wordstr]["Morph"] = val;
+ }
+ }
+ }
+ }
+ // if not a strongs token, keep token in text
+ *to++ = '<';
+ for (char *tok = token; *tok; tok++)
+ *to++ = *tok;
+ *to++ = '>';
+ continue;
+ }
+ if (intoken) {
+ if (tokpos < 2045)
+ token[tokpos++] = *from;
+ token[tokpos+2] = 0;
+ }
+ else {
+ *to++ = *from;
+ lastspace = (*from == ' ');
+ }
+ }
+ *to++ = 0;
+ *to = 0;
+ return 0;
+}
diff --git a/src/modules/filters/thmlvariants.cpp b/src/modules/filters/thmlvariants.cpp
new file mode 100644
index 0000000..fda0950
--- /dev/null
+++ b/src/modules/filters/thmlvariants.cpp
@@ -0,0 +1,183 @@
+/******************************************************************************
+ *
+ * thmlvariants - SWFilter decendant to hide or show textual variants
+ * in a ThML module.
+ */
+
+
+#include <stdlib.h>
+#include <string.h>
+#include <thmlvariants.h>
+#ifndef __GNUC__
+#else
+#include <unixstr.h>
+#endif
+
+
+const char ThMLVariants::primary[] = "Primary Reading";
+const char ThMLVariants::secondary[] = "Secondary Reading";
+const char ThMLVariants::all[] = "All Readings";
+
+const char ThMLVariants::optName[] = "Textual Variants";
+const char ThMLVariants::optTip[] = "Switch between Textual Variants modes";
+
+
+ThMLVariants::ThMLVariants() {
+ option = false;
+ options.push_back(primary);
+ options.push_back(secondary);
+ options.push_back(all);
+}
+
+
+ThMLVariants::~ThMLVariants() {
+}
+
+void ThMLVariants::setOptionValue(const char *ival)
+{
+ option = (!stricmp(ival, primary));
+}
+
+const char *ThMLVariants::getOptionValue()
+{
+ if (option == 0) {
+ return primary;
+ }
+ else if (option == 1) {
+ return secondary;
+ }
+ else {
+ return all;
+ }
+}
+
+char ThMLVariants::ProcessText(char *text, int maxlen, const SWKey *key, const SWModule *module)
+{
+ if (option == 0) { //we want primary only
+ char *to, *from, token[2048]; // cheese. Fix.
+ int tokpos = 0;
+ bool intoken = false;
+ int len;
+ bool hide = false;
+
+ len = strlen(text) + 1; // shift string to right of buffer
+ if (len < maxlen) {
+ memmove(&text[maxlen - len], text, len);
+ from = &text[maxlen - len];
+ }
+ else from = text;
+
+ // -------------------------------
+
+ for (to = text; *from; from++) {
+ if (*from == '<') {
+ intoken = true;
+ tokpos = 0;
+ token[0] = 0;
+ token[1] = 0;
+ token[2] = 0;
+ continue;
+ }
+ if (*from == '>') { // process tokens
+ intoken = false;
+ if (!strncmp(token, "div type=\"variant\"", 19)) {
+ hide = true;
+ continue;
+ }
+ else if (!strncmp(token, "/div", 4)) {
+ hide = false;
+ continue;
+ }
+
+ // if not a footnote token, keep token in text
+ if (!hide) {
+ *to++ = '<';
+ for (char *tok = token; *tok; tok++)
+ *to++ = *tok;
+ *to++ = '>';
+ }
+ continue;
+ }
+ if (intoken) {
+ if (tokpos < 2045)
+ token[tokpos++] = *from;
+ token[tokpos+2] = 0;
+ }
+ else {
+ if (!hide) {
+ *to++ = *from;
+ }
+ }
+ }
+ *to++ = 0;
+ *to = 0;
+
+ }
+ else if (option == 1) { //we want variant only
+ char *to, *from, token[2048]; // cheese. Fix.
+ int tokpos = 0;
+ bool intoken = false;
+ int len;
+ bool hide = false;
+
+ len = strlen(text) + 1; // shift string to right of buffer
+ if (len < maxlen) {
+ memmove(&text[maxlen - len], text, len);
+ from = &text[maxlen - len];
+ }
+ else from = text;
+
+ // -------------------------------
+
+ for (to = text; *from; from++) {
+ if (*from == '<') {
+ intoken = true;
+ tokpos = 0;
+ token[0] = 0;
+ token[1] = 0;
+ token[2] = 0;
+ continue;
+ }
+ if (*from == '>') { // process tokens
+ intoken = false;
+ if (!strncmp(token, "div type=\"primary\"", 19)) {
+ hide = true;
+ continue;
+ }
+ else if (!strncmp(token, "/div", 4)) {
+ hide = false;
+ continue;
+ }
+
+ // if not a footnote token, keep token in text
+ if (!hide) {
+ *to++ = '<';
+ for (char *tok = token; *tok; tok++)
+ *to++ = *tok;
+ *to++ = '>';
+ }
+ continue;
+ }
+ if (intoken) {
+ if (tokpos < 2045)
+ token[tokpos++] = *from;
+ token[tokpos+2] = 0;
+ }
+ else {
+ if (!hide) {
+ *to++ = *from;
+ }
+ }
+ }
+ *to++ = 0;
+ *to = 0;
+
+ }
+ return 0;
+}
+
+
+
+
+
+
diff --git a/src/modules/filters/unicodertf.cpp b/src/modules/filters/unicodertf.cpp
new file mode 100644
index 0000000..b53a2d7
--- /dev/null
+++ b/src/modules/filters/unicodertf.cpp
@@ -0,0 +1,70 @@
+/******************************************************************************
+ *
+ * unicodertf - SWFilter decendant to convert a double byte unicode file
+ * to RTF tags
+ */
+
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <unicodertf.h>
+
+UnicodeRTF::UnicodeRTF() {
+}
+
+
+char UnicodeRTF::ProcessText(char *text, int maxlen, const SWKey *key, const SWModule *module)
+{
+ unsigned char *to, *from, *maxto;
+ int len;
+ char digit[10];
+ short ch; // must be signed per unicode spec (negative is ok for big numbers > 32768)
+
+ len = strlenw(text) + 2; // shift string to right of buffer
+ if (len < maxlen) {
+ memmove(&text[maxlen - len], text, len);
+ from = (unsigned char*)&text[maxlen - len];
+ }
+ else from = (unsigned char*)text;
+ maxto =(unsigned char*)text + maxlen;
+
+ // -------------------------------
+ for (to = (unsigned char*)text; *from && (to <= maxto); from++) {
+ ch = 0;
+ if ((*from & 128) != 128) {
+ *to++ = *from;
+ continue;
+ }
+ if ((*from & 128) && ((*from & 64) != 64)) {
+ // error
+ *from = 'x';
+ continue;
+ }
+ *from <<= 1;
+ int subsequent;
+ for (subsequent = 1; (*from & 128); subsequent++) {
+ *from <<= 1;
+ from[subsequent] &= 63;
+ ch <<= 6;
+ ch |= from[subsequent];
+ }
+ subsequent--;
+ *from <<=1;
+ char significantFirstBits = 8 - (2+subsequent);
+
+ ch |= (((short)*from) << (((6*subsequent)+significantFirstBits)-8));
+ from += subsequent;
+ *to++ = '\\';
+ *to++ = 'u';
+ sprintf(digit, "%d", ch);
+ for (char *dig = digit; *dig; dig++)
+ *to++ = *dig;
+ *to++ = '?';
+ }
+
+ if (to != maxto) {
+ *to++ = 0;
+ }
+ *to = 0;
+ return 0;
+}
diff --git a/src/modules/filters/utf16utf8.cpp b/src/modules/filters/utf16utf8.cpp
new file mode 100644
index 0000000..5a7719f
--- /dev/null
+++ b/src/modules/filters/utf16utf8.cpp
@@ -0,0 +1,95 @@
+/******************************************************************************
+ *
+ * UTF16UTF8 - SWFilter decendant to convert UTF-16 to UTF-8
+ *
+ */
+
+#include <stdlib.h>
+#include <stdio.h>
+
+#include <utf16utf8.h>
+
+UTF16UTF8::UTF16UTF8() {
+}
+
+
+char UTF16UTF8::ProcessText(char *text, int maxlen, const SWKey *key, const SWModule *module)
+{
+ unsigned short *from;
+ unsigned char *to;
+
+ int len;
+ unsigned long uchar;
+ unsigned short schar;
+
+ len = 0;
+ from = (unsigned short*) text;
+ while (*from) {
+ len += 2;
+ from++;
+ }
+
+ // shift string to right of buffer
+ if (len < maxlen) {
+ memmove(&text[maxlen - len], text, len);
+ from = (unsigned short*)&text[maxlen - len];
+ }
+ else
+ from = (unsigned short*)text;
+
+
+ // -------------------------------
+
+ for (to = (unsigned char*)text; *from; from++) {
+ uchar = 0;
+
+ if (*from < 0xD800 || *from > 0xDFFF) {
+ uchar = *from;
+ }
+ else if (*from >= 0xD800 && *from <= 0xDBFF) {
+ uchar = *from;
+ schar = *(from+1);
+ if (uchar < 0xDC00 || uchar > 0xDFFF) {
+ //error, do nothing
+ continue;
+ }
+ uchar &= 0x03ff;
+ schar &= 0x03ff;
+ uchar <<= 10;
+ uchar |= schar;
+ uchar += 0x10000;
+ from++;
+ }
+ else {
+ //error, do nothing
+ continue;
+ }
+
+ if (uchar < 0x80) {
+ *to++ = uchar;
+ }
+ else if (uchar < 0x800) {
+ *to++ = 0xc0 | (uchar >> 6);
+ *to++ = 0x80 | (uchar & 0x3f);
+ }
+ else if (uchar < 0x10000) {
+ *to++ = 0xe0 | (uchar >> 12);
+ *to++ = 0x80 | (uchar >> 6) & 0x3f;
+ *to++ = 0x80 | uchar & 0x3f;
+ }
+ else if (uchar < 0x200000) {
+ *to++ = 0xF0 | (uchar >> 18);
+ *to++ = 0x80 | (uchar >> 12) & 0x3F;
+ *to++ = 0x80 | (uchar >> 6) & 0x3F;
+ *to++ = 0x80 | uchar & 0x3F;
+ }
+ }
+ *to++ = 0;
+ *to = 0;
+
+ return 0;
+}
+
+
+
+
diff --git a/src/modules/filters/utf8arshaping.cpp b/src/modules/filters/utf8arshaping.cpp
new file mode 100644
index 0000000..5121f48
--- /dev/null
+++ b/src/modules/filters/utf8arshaping.cpp
@@ -0,0 +1,48 @@
+/******************************************************************************
+*
+* utf8arshaping - SWFilter decendant to perform Arabic shaping on
+* UTF-8 text
+*/
+
+#ifdef _ICU_
+
+#include <stdlib.h>
+#include <string.h>
+
+#ifdef __GNUC__
+#include <unixstr.h>
+#endif
+
+#include <utf8arshaping.h>
+
+UTF8arShaping::UTF8arShaping() {
+
+ conv = ucnv_open("UTF-8", &err);
+
+}
+
+UTF8arShaping::~UTF8arShaping() {
+ ucnv_close(conv);
+}
+
+char UTF8arShaping::ProcessText(char *text, int maxlen, const SWKey *key, const SWModule *module)
+{
+ UChar *ustr, *ustr2;
+
+ int32_t len = strlen(text);
+ ustr = new UChar[len];
+ ustr2 = new UChar[len];
+
+ // Convert UTF-8 string to UTF-16 (UChars)
+ len = ucnv_toUChars(conv, ustr, len, text, -1, &err);
+
+ len = u_shapeArabic(ustr, len, ustr2, len, U_SHAPE_LETTERS_SHAPE | U_SHAPE_DIGITS_EN2AN, &err);
+
+ ucnv_fromUChars(conv, text, maxlen, ustr2, len, &err);
+
+ delete [] ustr2;
+ delete [] ustr;
+ return 0;
+}
+
+#endif
diff --git a/src/modules/filters/utf8bidireorder.cpp b/src/modules/filters/utf8bidireorder.cpp
new file mode 100644
index 0000000..8fa7280
--- /dev/null
+++ b/src/modules/filters/utf8bidireorder.cpp
@@ -0,0 +1,55 @@
+/******************************************************************************
+*
+* utf8cnormalizer - SWFilter decendant to perform reordering of UTF-8
+* text to visual order according to Unicode BiDi
+*/
+
+#ifdef _ICU_
+
+#include <stdlib.h>
+#include <string.h>
+
+#ifdef __GNUC__
+#include <unixstr.h>
+#endif
+
+#include <utf8bidireorder.h>
+
+UTF8BiDiReorder::UTF8BiDiReorder() {
+
+ conv = ucnv_open("UTF-8", &err);
+
+}
+
+UTF8BiDiReorder::~UTF8BiDiReorder() {
+ ucnv_close(conv);
+}
+
+char UTF8BiDiReorder::ProcessText(char *text, int maxlen, const SWKey *key, const SWModule *module)
+{
+ UChar *ustr, *ustr2;
+
+ int32_t len = strlen(text);
+ ustr = new UChar[len]; //each char could become a surrogate pair
+
+ // Convert UTF-8 string to UTF-16 (UChars)
+ len = ucnv_toUChars(conv, ustr, len, text, -1, &err);
+ ustr2 = new UChar[len];
+
+ UBiDi* bidi = ubidi_openSized(len + 1, 0, &err);
+ ubidi_setPara(bidi, ustr, len, UBIDI_DEFAULT_RTL, NULL, &err);
+ len = ubidi_writeReordered(bidi, ustr2, len,
+ UBIDI_DO_MIRRORING | UBIDI_REMOVE_BIDI_CONTROLS, &err);
+ ubidi_close(bidi);
+
+// len = ubidi_writeReverse(ustr, len, ustr2, len,
+// UBIDI_DO_MIRRORING | UBIDI_REMOVE_BIDI_CONTROLS, &err);
+
+ ucnv_fromUChars(conv, text, maxlen, ustr2, len, &err);
+
+ delete [] ustr2;
+ delete [] ustr;
+ return 0;
+}
+
+#endif
diff --git a/src/modules/filters/utf8cantillation.cpp b/src/modules/filters/utf8cantillation.cpp
new file mode 100644
index 0000000..84cb513
--- /dev/null
+++ b/src/modules/filters/utf8cantillation.cpp
@@ -0,0 +1,64 @@
+/******************************************************************************
+ *
+ * UTF8Cantillation - SWFilter decendant to remove UTF-8 Hebrew cantillation
+ *
+ */
+
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <utf8cantillation.h>
+
+
+const char UTF8Cantillation::on[] = "On";
+const char UTF8Cantillation::off[] = "Off";
+const char UTF8Cantillation::optName[] = "Hebrew Cantillation";
+const char UTF8Cantillation::optTip[] = "Toggles Hebrew Cantillation Marks";
+
+UTF8Cantillation::UTF8Cantillation() {
+ option = false;
+ options.push_back(on);
+ options.push_back(off);
+}
+
+UTF8Cantillation::~UTF8Cantillation(){};
+
+void UTF8Cantillation::setOptionValue(const char *ival)
+{
+ option = (!stricmp(ival, on));
+}
+
+const char *UTF8Cantillation::getOptionValue()
+{
+ return (option) ? on:off;
+}
+
+char UTF8Cantillation::ProcessText(char *text, int maxlen, const SWKey *key, const SWModule *module)
+{
+ if (!option) {
+ unsigned char *to, *from;
+ to = (unsigned char*)text;
+ //The UTF-8 range 0xD6 0x90 to 0xD6 0xAF and 0xD7 0x84 consist of Hebrew cantillation marks so block those out.
+ for (from = (unsigned char*)text; *from; from++) {
+ if (*from != 0xD6) {
+ if (*from == 0xD7 && *(from + 1) == 0x84) {
+ from++;
+ }
+ else {
+ *to++ = *from;
+ }
+ }
+ else if (*(from + 1) < 0x90 || *(from + 1) > 0xAF) {
+ *to++ = *from;
+ from++;
+ *to++ = *from;
+ }
+ else {
+ from++;
+ }
+ }
+ *to++ = 0;
+ *to = 0;
+ }
+ return 0;
+}
diff --git a/src/modules/filters/utf8greekaccents.cpp b/src/modules/filters/utf8greekaccents.cpp
new file mode 100644
index 0000000..b0e5dc8
--- /dev/null
+++ b/src/modules/filters/utf8greekaccents.cpp
@@ -0,0 +1,252 @@
+/******************************************************************************
+ *
+ * UTF8GreekAccents - SWFilter decendant to remove UTF-8 Greek accents
+ *
+ */
+
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <utf8greekaccents.h>
+
+
+const char UTF8GreekAccents::on[] = "On";
+const char UTF8GreekAccents::off[] = "Off";
+const char UTF8GreekAccents::optName[] = "Greek Accents";
+const char UTF8GreekAccents::optTip[] = "Toggles Greek Accents";
+
+UTF8GreekAccents::UTF8GreekAccents() {
+ option = true;
+ options.push_back(on);
+ options.push_back(off);
+}
+
+UTF8GreekAccents::~UTF8GreekAccents(){};
+
+void UTF8GreekAccents::setOptionValue(const char *ival)
+{
+ option = (!stricmp(ival, on));
+}
+
+const char *UTF8GreekAccents::getOptionValue()
+{
+ return (option) ? on:off;
+}
+
+char UTF8GreekAccents::ProcessText(char *text, int maxlen, const SWKey *key, const SWModule *module)
+{
+ if (!option) {
+ unsigned char *to, *from;
+
+ to = (unsigned char*)text;
+ for (from = (unsigned char*)text; *from; from++) {
+ //first just remove combining characters
+ if (*from == 0xE2 && *(from + 1) == 0x80 && *(from + 2) == 0x99)
+ from += 2;
+ else if (*from == 0xCC && *(from + 1)) {
+ if (*(from + 1) == 0x80 || *(from + 1) == 0x81 || *(from + 1) == 0x82 || *(from + 1) == 0x88 || *(from + 1) == 0x93 || *(from + 1) == 0x94)
+ from++;
+ }
+ else if (*from == 0xCD && *(from + 1) == 0xBA)
+ from++;
+ //now converted pre-composed characters to their alphabetic bases, discarding the accents
+
+ //Greek
+ //capital alpha
+ else if ((*from == 0xCE && *(from + 1) == 0x86)) {
+ *to++ = 0xCE;
+ *to++ = 0x91;
+ from++;
+ }
+ //capital epsilon
+ else if ((*from == 0xCE && *(from + 1) == 0x88)) {
+ *to++ = 0xCE;
+ *to++ = 0x95;
+ from++;
+ }
+ //capital eta
+ else if ((*from == 0xCE && *(from + 1) == 0x89)) {
+ *to++ = 0xCE;
+ *to++ = 0x97;
+ from++;
+ }
+ //capital iota
+ else if ((*from == 0xCE && (*(from + 1) == 0x8A || *(from + 1) == 0xAA))) {
+ *to++ = 0xCE;
+ *to++ = 0x99;
+ from++;
+ }
+ //capital omicron
+ else if ((*from == 0xCE && *(from + 1) == 0x8C)) {
+ *to++ = 0xCE;
+ *to++ = 0x9F;
+ from++;
+ }
+ //capital upsilon
+ else if ((*from == 0xCE && (*(from + 1) == 0x8E || *(from + 1) == 0xAB))) {
+ *to++ = 0xCE;
+ *to++ = 0xA5;
+ from++;
+ }
+ //capital omega
+ else if ((*from == 0xCE && *(from + 1) == 0x8F)) {
+ *to++ = 0xCE;
+ *to++ = 0xA9;
+ from++;
+ }
+
+ //alpha
+ else if ((*from == 0xCE && *(from + 1) == 0xAC)) {
+ *to++ = 0xCE;
+ *to++ = 0xB1;
+ from++;
+ }
+ //epsilon
+ else if ((*from == 0xCE && *(from + 1) == 0xAD)) {
+ *to++ = 0xCE;
+ *to++ = 0xB5;
+ from++;
+ }
+ //eta
+ else if ((*from == 0xCE && *(from + 1) == 0xAE)) {
+ *to++ = 0xCE;
+ *to++ = 0xB7;
+ from++;
+ }
+ //iota
+ else if ((*from == 0xCE && *(from + 1) == 0xAF) || (*from == 0xCF && *(from + 1) == 0x8A)) {
+ *to++ = 0xCE;
+ *to++ = 0xB9;
+ from++;
+ }
+ //omicron
+ else if ((*from == 0xCF && *(from + 1) == 0x8C)) {
+ *to++ = 0xCE;
+ *to++ = 0xBF;
+ from++;
+ }
+ //upsilon
+ else if ((*from == 0xCE && *(from + 1) == 0x88) || (*from == 0xCF && (*(from + 1) == 0x8B || *(from + 1) == 0x8D))) {
+ *to++ = 0xCF;
+ *to++ = 0x85;
+ from++;
+ }
+ //omega
+ else if ((*from == 0xCF && *(from + 1) == 0x8E)) {
+ *to++ = 0xCF;
+ *to++ = 0x89;
+ from++;
+ }
+
+ //Extended Greek
+ //capital alpha
+ else if (*from == 0xE1 && ((*(from + 1) == 0xBC || *(from + 1) == 0xBE) && *(from + 2) >= 0x88 && *(from + 2) <= 0x8F) || (*(from + 1) == 0xBE && *(from + 2) >= 0xB8 && *(from + 2) <= 0xBC)) {
+ *to++ = 0xCE;
+ *to++ = 0x91;
+ from+=2;
+ }
+ //capital epsilon
+ else if (*from == 0xE1 && ((*(from + 1) == 0xBC && *(from + 2) >= 0x98 && *(from + 2) <= 0x9D) || (*(from + 1) == 0xBF && (*(from + 2) == 0x88 || *(from + 2) == 0x89)))) {
+ *to++ = 0xCE;
+ *to++ = 0x95;
+ from+=2;
+ }
+ //capital eta
+ else if (*from == 0xE1 && ((*(from + 1) == 0xBC && *(from + 2) >= 0xA8 && *(from + 2) <= 0xAF) || (*(from + 1) == 0xBE && *(from + 2) >= 0x98 && *(from + 2) <= 0x9F) || (*(from + 1) == 0xBF && *(from + 2) >= 0x8A && *(from + 2) <= 0x8C))) {
+ *to++ = 0xCE;
+ *to++ = 0x97;
+ from+=2;
+ }
+ //capital iota
+ else if (*from == 0xE1 && ((*(from + 1) == 0xBC && *(from + 2) >= 0xB8 && *(from + 2) <= 0xBF) || (*(from + 1) == 0xBF && *(from + 2) >= 0x98 && *(from + 2) <= 0x9B))) {
+ *to++ = 0xCE;
+ *to++ = 0x99;
+ from+=2;
+ }
+ //capital omicron
+ else if (*from == 0xE1 && ((*(from + 1) == 0xBD && *(from + 2) >= 0x88 && *(from + 2) <= 0x8D) || (*(from + 1) == 0xBF && *(from + 2) == 0xB8 || *(from + 2) == 0xB9))) {
+ *to++ = 0xCE;
+ *to++ = 0x9F;
+ from+=2;
+ }
+ //capital upsilon
+ else if (*from == 0xE1 && ((*(from + 1) == 0xBD && *(from + 2) >= 0x99 && *(from + 2) <= 0x9F) || (*(from + 1) == 0xBF && *(from + 2) >= 0xA8 && *(from + 2) <= 0xAB))) {
+ *to++ = 0xCE;
+ *to++ = 0xA5;
+ from+=2;
+ }
+ //capital omega
+ else if (*from == 0xE1 && (((*(from + 1) == 0xBD || *(from + 1) == 0xBE) && *(from + 2) >= 0xA8 && *(from + 2) <= 0xAF) || (*(from + 1) == 0xBF && *(from + 2) >= 0xBA && *(from + 2) <= 0xBC))) {
+ *to++ = 0xCE;
+ *to++ = 0xA9;
+ from+=2;
+ }
+ //capital rho
+ else if (*from == 0xE1 && *(from + 1) == 0xBF && *(from + 2) == 0xAC) {
+ *to++ = 0xCE;
+ *to++ = 0xA1;
+ from+=2;
+ }
+
+ //alpha
+ else if (*from == 0xE1 && ((*(from + 1) == 0xBC || *(from + 1) == 0xBE) && *(from + 2) >= 0x80 && *(from + 2) <= 0x87) || (*(from + 1) == 0xBD && (*(from + 2) == 0xB0 || *(from + 2) == 0xB1)) || (*(from + 1) == 0xBE && *(from + 2) >= 0xB0 && *(from + 2) <= 0xB7)) {
+ *to++ = 0xCE;
+ *to++ = 0xB1;
+ from+=2;
+ }
+ //epsilon
+ else if (*from == 0xE1 && ((*(from + 1) == 0xBC && *(from + 2) >= 0x90 && *(from + 2) <= 0x95) || (*(from + 1) == 0xBD && (*(from + 2) == 0xB2 || *(from + 2) == 0xB3)))) {
+ *to++ = 0xCE;
+ *to++ = 0xB5;
+ from+=2;
+ }
+ //eta
+ else if (*from == 0xE1 && ((*(from + 1) == 0xBE && *(from + 2) >= 0x90 && *(from + 2) <= 0x97) || (*(from + 1) == 0xBC && *(from + 2) >= 0xA0 && *(from + 2) <= 0xA7) || (*(from + 1) == 0xBF && *(from + 2) >= 0x82 && *(from + 2) <= 0x87) || (*(from + 1) == 0xBD && (*(from + 2) == 0xB4 || *(from + 2) == 0xB5)))) {
+ *to++ = 0xCE;
+ *to++ = 0xB7;
+ from+=2;
+ }
+ //iota
+ else if (*from == 0xE1 && ((*(from + 1) == 0xBC && *(from + 2) >= 0xB0 && *(from + 2) <= 0xB7) || (*(from + 1) == 0xBD && (*(from + 2) == 0xB6 || *(from + 2) == 0xB7)) || (*(from + 1) == 0xBF && *(from + 2) >= 0x90 && *(from + 2) <= 0x97))) {
+ *to++ = 0xCE;
+ *to++ = 0xB9;
+ from+=2;
+ }
+ //omicron
+ else if (*from == 0xE1 && (*(from + 1) == 0xBD && ((*(from + 2) >= 0x80 && *(from + 2) <= 0x85) || (*(from + 2) == 0xB8 || *(from + 2) == 0xB9)))) {
+ *to++ = 0xCE;
+ *to++ = 0xBF;
+ from+=2;
+ }
+ //upsilon
+ else if (*from == 0xE1 && ((*(from + 1) == 0xBD && ((*(from + 2) >= 0x90 && *(from + 2) <= 0x97) || *(from + 2) == 0xBA || *(from + 2) == 0xBB)) || (*(from + 1) == 0xBF && ((*(from + 2) >= 0xA0 && *(from + 2) <= 0xA3) || *(from + 2) == 0xA6 || *(from + 2) == 0xA7)))) {
+ *to++ = 0xCF;
+ *to++ = 0x85;
+ from+=2;
+ }
+ //omega
+ else if (*from == 0xE1 && ((*(from + 1) == 0xBD && ((*(from + 2) >= 0xA0 && *(from + 2) <= 0xA7) || (*(from + 2) == 0xBC || *(from + 2) == 0xBD))) || (*(from + 1) == 0xBE && (*(from + 2) >= 0xA0 && *(from + 2) <= 0xA7)) || (*(from + 1) == 0xBF && *(from + 2) >= 0xB2 && *(from + 2) <= 0xB7))) {
+ *to++ = 0xCF;
+ *to++ = 0x89;
+ from+=2;
+ }
+ //rho
+ else if (*from == 0xE1 && *(from + 1) == 0xBF && (*(from + 2) == 0xA4 && *(from + 2) == 0xA5)) {
+ *to++ = 0xCF;
+ *to++ = 0x81;
+ from+=2;
+ }
+ else
+ *to++ = *from;
+ }
+ *to++ = 0;
+ *to = 0;
+ }
+ return 0;
+}
+
+
+
+
+
+
diff --git a/src/modules/filters/utf8hebrewpoints.cpp b/src/modules/filters/utf8hebrewpoints.cpp
new file mode 100644
index 0000000..e5b50e1
--- /dev/null
+++ b/src/modules/filters/utf8hebrewpoints.cpp
@@ -0,0 +1,55 @@
+/******************************************************************************
+ *
+ * UTF8HebrewPoints - SWFilter decendant to remove UTF-8 Hebrew vowel points
+ *
+ */
+
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <utf8hebrewpoints.h>
+
+
+const char UTF8HebrewPoints::on[] = "On";
+const char UTF8HebrewPoints::off[] = "Off";
+const char UTF8HebrewPoints::optName[] = "Hebrew Vowel Points";
+const char UTF8HebrewPoints::optTip[] = "Toggles Hebrew Vowel Points";
+
+UTF8HebrewPoints::UTF8HebrewPoints() {
+ option = true;
+ options.push_back(on);
+ options.push_back(off);
+}
+
+UTF8HebrewPoints::~UTF8HebrewPoints(){};
+
+void UTF8HebrewPoints::setOptionValue(const char *ival)
+{
+ option = (!stricmp(ival, on));
+}
+
+const char *UTF8HebrewPoints::getOptionValue()
+{
+ return (option) ? on:off;
+}
+
+char UTF8HebrewPoints::ProcessText(char *text, int maxlen, const SWKey *key, const SWModule *module)
+{
+ if (!option) {
+ unsigned char *to, *from;
+
+ to = (unsigned char*)text;
+ //The UTF-8 range 0xD6 0xB0 to 0xD6 0xBF excluding 0xD6 0x consist of Hebrew cantillation marks so block those out.
+ for (from = (unsigned char*)text; *from; from++) {
+ if ((*from == 0xD6) && (*(from + 1) >= 0xB0 && *(from + 1) <= 0xBF) && (*(from + 1) != 0xBE)) {
+ from++;
+ }
+ else {
+ *to++ = *from;
+ }
+ }
+ *to++ = 0;
+ *to = 0;
+ }
+ return 0;
+}
diff --git a/src/modules/filters/utf8html.cpp b/src/modules/filters/utf8html.cpp
new file mode 100644
index 0000000..7487815
--- /dev/null
+++ b/src/modules/filters/utf8html.cpp
@@ -0,0 +1,66 @@
+/******************************************************************************
+ *
+ * utf8html - SWFilter decendant to convert a UTF-8 stream to HTML escapes
+ *
+ */
+
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <utf8html.h>
+
+UTF8HTML::UTF8HTML() {
+}
+
+
+char UTF8HTML::ProcessText(char *text, int maxlen, const SWKey *key, const SWModule *module)
+{
+ unsigned char *to, *from;
+ int len;
+ char digit[10];
+ unsigned long ch;
+
+ len = strlenw(text) + 2; // shift string to right of buffer
+ if (len < maxlen) {
+ memmove(&text[maxlen - len], text, len);
+ from = (unsigned char*)&text[maxlen - len];
+ }
+ else from = (unsigned char*)text;
+ // -------------------------------
+ for (to = (unsigned char*)text; *from; from++) {
+ ch = 0;
+ if ((*from & 128) != 128) {
+// if (*from != ' ')
+ *to++ = *from;
+ continue;
+ }
+ if ((*from & 128) && ((*from & 64) != 64)) {
+ // error
+ *from = 'x';
+ continue;
+ }
+ *from <<= 1;
+ int subsequent;
+ for (subsequent = 1; (*from & 128); subsequent++) {
+ *from <<= 1;
+ from[subsequent] &= 63;
+ ch <<= 6;
+ ch |= from[subsequent];
+ }
+ subsequent--;
+ *from <<=1;
+ char significantFirstBits = 8 - (2+subsequent);
+
+ ch |= (((short)*from) << (((6*subsequent)+significantFirstBits)-8));
+ from += subsequent;
+ *to++ = '&';
+ *to++ = '#';
+ sprintf(digit, "%d", ch);
+ for (char *dig = digit; *dig; dig++)
+ *to++ = *dig;
+ *to++ = ';';
+ }
+ *to++ = 0;
+ *to = 0;
+ return 0;
+}
diff --git a/src/modules/filters/utf8latin1.cpp b/src/modules/filters/utf8latin1.cpp
new file mode 100644
index 0000000..6cc1acd
--- /dev/null
+++ b/src/modules/filters/utf8latin1.cpp
@@ -0,0 +1,74 @@
+/******************************************************************************
+ *
+ * UTF8Latin1 - SWFilter decendant to convert UTF-8 to Latin-1
+ *
+ */
+
+#include <stdlib.h>
+#include <stdio.h>
+
+#include <utf8latin1.h>
+
+UTF8Latin1::UTF8Latin1(char rchar) : replacementChar(rchar) {
+}
+
+
+char UTF8Latin1::ProcessText(char *text, int maxlen, const SWKey *key, const SWModule *module)
+{
+ unsigned char *from;
+ unsigned short *to;
+
+ int len;
+ unsigned long uchar;
+ unsigned char significantFirstBits, subsequent;
+
+ len = strlen(text) + 1; // shift string to right of buffer
+ if (len < maxlen) {
+ memmove(&text[maxlen - len], text, len);
+ from = (unsigned char*)&text[maxlen - len];
+ }
+ else
+ from = (unsigned char*)text;
+
+
+ // -------------------------------
+
+ for (to = (unsigned short*)text; *from; from++) {
+ uchar = 0;
+ if ((*from & 128) != 128) {
+ // if (*from != ' ')
+ uchar = *from;
+ }
+ else if ((*from & 128) && ((*from & 64) != 64)) {
+ // error, do nothing
+ continue;
+ }
+ else {
+ *from <<= 1;
+ for (subsequent = 1; (*from & 128); subsequent++) {
+ *from <<= 1;
+ from[subsequent] &= 63;
+ uchar <<= 6;
+ uchar |= from[subsequent];
+ }
+ subsequent--;
+ *from <<=1;
+ significantFirstBits = 8 - (2+subsequent);
+
+ uchar |= (((short)*from) << (((6*subsequent)+significantFirstBits)-8));
+ from += subsequent;
+ }
+
+ if (uchar < 0xff) {
+ *to++ = (unsigned char)uchar;
+ }
+ else {
+ *to++ = replacementChar;
+ }
+ }
+ *to++ = 0;
+ *to = 0;
+
+ return 0;
+}
+
diff --git a/src/modules/filters/utf8nfc.cpp b/src/modules/filters/utf8nfc.cpp
new file mode 100644
index 0000000..df9e090
--- /dev/null
+++ b/src/modules/filters/utf8nfc.cpp
@@ -0,0 +1,46 @@
+/******************************************************************************
+*
+* utf8nfc - SWFilter decendant to perform NFC (canonical composition
+* normalization) on UTF-8 text
+*/
+
+#ifdef _ICU_
+
+#include <stdlib.h>
+#include <string.h>
+
+#ifdef __GNUC__
+#include <unixstr.h>
+#endif
+
+#include <utf8nfc.h>
+
+UTF8NFC::UTF8NFC() {
+ conv = ucnv_open("UTF-8", &err);
+}
+
+UTF8NFC::~UTF8NFC() {
+ ucnv_close(conv);
+}
+
+char UTF8NFC::ProcessText(char *text, int maxlen, const SWKey *key, const SWModule *module)
+{
+ int32_t len = strlen(text) * 2;
+ source = new UChar[len + 1]; //each char could become a surrogate pair
+
+ // Convert UTF-8 string to UTF-16 (UChars)
+ len = ucnv_toUChars(conv, source, len, text, -1, &err);
+ target = new UChar[len + 1];
+
+ //canonical composition
+ unorm_normalize(source, len, UNORM_NFC, 0, target, len, &err);
+
+ ucnv_fromUChars(conv, text, maxlen, target, -1, &err);
+
+ delete [] source;
+ delete [] target;
+
+ return 0;
+}
+
+#endif
diff --git a/src/modules/filters/utf8nfkd.cpp b/src/modules/filters/utf8nfkd.cpp
new file mode 100644
index 0000000..450cbbf
--- /dev/null
+++ b/src/modules/filters/utf8nfkd.cpp
@@ -0,0 +1,46 @@
+/******************************************************************************
+*
+* utf8nfkd - SWFilter decendant to perform NFKD (compatability decomposition
+* normalization) on UTF-8 text
+*/
+
+#ifdef _ICU_
+
+#include <stdlib.h>
+#include <string.h>
+
+#ifdef __GNUC__
+#include <unixstr.h>
+#endif
+
+#include <utf8nfkd.h>
+
+UTF8NFKD::UTF8NFKD() {
+ conv = ucnv_open("UTF-8", &err);
+}
+
+UTF8NFKD::~UTF8NFKD() {
+ ucnv_close(conv);
+}
+
+char UTF8NFKD::ProcessText(char *text, int maxlen, const SWKey *key, const SWModule *module)
+{
+ int32_t len = strlen(text) * 2;
+ source = new UChar[len + 1]; //each char could become a surrogate pair
+
+ // Convert UTF-8 string to UTF-16 (UChars)
+ len = ucnv_toUChars(conv, source, len, text, -1, &err);
+ target = new UChar[len + 1];
+
+ //compatability decomposition
+ unorm_normalize(source, len, UNORM_NFKD, 0, target, len, &err);
+
+ ucnv_fromUChars(conv, text, maxlen, target, -1, &err);
+
+ delete [] source;
+ delete [] target;
+
+ return 0;
+}
+
+#endif
diff --git a/src/modules/filters/utf8transliterator.cpp b/src/modules/filters/utf8transliterator.cpp
new file mode 100644
index 0000000..7bc068a
--- /dev/null
+++ b/src/modules/filters/utf8transliterator.cpp
@@ -0,0 +1,479 @@
+/******************************************************************************
+*
+* utf8transliterators - SWFilter decendant to transliterate between
+* ICU-supported scripts.
+*/
+
+#ifdef _ICU_
+
+#include <stdlib.h>
+#include <string.h>
+
+#ifdef __GNUC__
+#include <unixstr.h>
+#endif
+
+#include <utf8transliterator.h>
+
+const char UTF8Transliterator::optionstring[NUMTARGETSCRIPTS][16] = {
+ "Off",
+ "Latin",
+ "Basic Latin",
+ "Beta",
+ "BGreek",
+/*
+ "Greek",
+ "Hebrew",
+ "Cyrillic",
+ "Arabic",
+ "Syriac",
+ "Katakana",
+ "Hiragana",
+ "Jamo",
+ "Hangul",
+ "Devanagari",
+ "Tamil",
+ "Bengali",
+ "Gurmukhi",
+ "Gujarati",
+ "Oriya",
+ "Telugu",
+ "Kannada",
+ "Malayalam",
+ "Thai",
+ "Georgian",
+ "Armenian",
+ "Ethiopic",
+ "Gothic",
+ "Ugaritic",
+ "Coptic"
+ */
+};
+
+const char UTF8Transliterator::optName[] = "Transliteration";
+const char UTF8Transliterator::optTip[] = "Transliterates between scripts";
+
+UTF8Transliterator::UTF8Transliterator() {
+ option = 0;
+ unsigned long i;
+ for (i = 0; i < NUMTARGETSCRIPTS; i++) {
+ options.push_back(optionstring[i]);
+ }
+}
+
+void UTF8Transliterator::setOptionValue(const char *ival)
+{
+ unsigned char i = option = NUMTARGETSCRIPTS;
+ while (i && stricmp(ival, optionstring[i])) {
+ i--;
+ option = i;
+ }
+}
+
+const char *UTF8Transliterator::getOptionValue()
+{
+ return (NUMTARGETSCRIPTS > option) ? optionstring[option] : 0;
+}
+
+char UTF8Transliterator::ProcessText(char *text, int maxlen, const SWKey *key, const SWModule *module)
+{
+ if (option) { // if we want transliteration
+ unsigned long i, j;
+ UErrorCode err = U_ZERO_ERROR;
+ UConverter * conv = NULL;
+ conv = ucnv_open("UTF-8", &err);
+
+ bool compat = false;
+ bool noNFC = false;
+
+ if (option == SE_JAMO) {
+ noNFC = true;
+ }
+
+ // Convert UTF-8 string to UTF-16 (UChars)
+ j = strlen(text);
+ int32_t len = (j * 2) + 1;
+ UChar *source = new UChar[len];
+ err = U_ZERO_ERROR;
+ len = ucnv_toUChars(conv, source, len, text, j, &err);
+ source[len] = 0;
+
+ // Figure out which scripts are used in the string
+ unsigned char scripts[NUMSCRIPTS];
+
+ for (i = 0; i < NUMSCRIPTS; i++) {
+ scripts[i] = false;
+ }
+
+ for (i = 0; i < len; i++) {
+ j = ublock_getCode(source[i]);
+ switch (j) {
+ case UBLOCK_BASIC_LATIN: scripts[SE_LATIN] = true; break;
+ case UBLOCK_GREEK: scripts[SE_GREEK] = true; break;
+ case UBLOCK_HEBREW: scripts[SE_HEBREW] = true; break;
+ case UBLOCK_CYRILLIC: scripts[SE_CYRILLIC] = true; break;
+ case UBLOCK_ARABIC: scripts[SE_ARABIC] = true; break;
+ case UBLOCK_SYRIAC: scripts[SE_SYRIAC] = true; break;
+ case UBLOCK_KATAKANA: scripts[SE_KATAKANA] = true; break;
+ case UBLOCK_HIRAGANA: scripts[SE_HIRAGANA] = true; break;
+ case UBLOCK_HANGUL_SYLLABLES: scripts[SE_HANGUL] = true; break;
+ case UBLOCK_HANGUL_JAMO: scripts[SE_JAMO] = true; break;
+ case UBLOCK_DEVANAGARI: scripts[SE_DEVANAGARI] = true; break;
+ case UBLOCK_TAMIL: scripts[SE_TAMIL] = true; break;
+ case UBLOCK_BENGALI: scripts[SE_BENGALI] = true; break;
+ case UBLOCK_GURMUKHI: scripts[SE_GURMUKHI] = true; break;
+ case UBLOCK_GUJARATI: scripts[SE_GUJARATI] = true; break;
+ case UBLOCK_ORIYA: scripts[SE_ORIYA] = true; break;
+ case UBLOCK_TELUGU: scripts[SE_TELUGU] = true; break;
+ case UBLOCK_KANNADA: scripts[SE_KANNADA] = true; break;
+ case UBLOCK_MALAYALAM: scripts[SE_MALAYALAM] = true; break;
+ case UBLOCK_THAI: scripts[SE_THAI] = true; break;
+ case UBLOCK_GEORGIAN: scripts[SE_GEORGIAN] = true; break;
+ case UBLOCK_ARMENIAN: scripts[SE_ARMENIAN] = true; break;
+ case UBLOCK_ETHIOPIC: scripts[SE_ETHIOPIC] = true; break;
+ case UBLOCK_GOTHIC: scripts[SE_GOTHIC] = true; break;
+ // needs Unicode 3.2? or 4.0? support from ICU
+ //case UBLOCK_UGARITIC: scripts[SE_UGARITIC] = true; break;
+ case UBLOCK_CJK_RADICALS_SUPPLEMENT:
+ case UBLOCK_KANGXI_RADICALS:
+ case UBLOCK_IDEOGRAPHIC_DESCRIPTION_CHARACTERS:
+ case UBLOCK_CJK_SYMBOLS_AND_PUNCTUATION:
+ case UBLOCK_CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A:
+ case UBLOCK_CJK_UNIFIED_IDEOGRAPHS:
+ scripts[SE_HAN] = true;
+ break;
+ case UBLOCK_CJK_COMPATIBILITY:
+ case UBLOCK_CJK_COMPATIBILITY_IDEOGRAPHS:
+ case UBLOCK_CJK_COMPATIBILITY_FORMS:
+ scripts[SE_HAN] = true;
+ compat = true;
+ break;
+ case UBLOCK_HANGUL_COMPATIBILITY_JAMO:
+ scripts[SE_HANGUL] = true;
+ compat = true;
+ break;
+
+ default: scripts[SE_LATIN] = true;
+ }
+ }
+ scripts[option] = false; //turn off the reflexive transliteration
+
+ //return if we have no transliteration to do for this text
+ j = 0;
+ for (i = 0; !j && i < NUMSCRIPTS; i++) {
+ if (scripts[i]) j++;
+ }
+ if (!j) {
+ ucnv_close(conv);
+ return 0;
+ }
+
+ UnicodeString id;
+ if (compat) {
+ id = UnicodeString("NFKD");
+ }
+ else {
+ id = UnicodeString("NFD");
+ }
+
+ //Simple X to Latin transliterators
+ if (scripts[SE_GREEK]) {
+ if (option == SE_BETA)
+ id += UnicodeString(";Greek-Beta");
+ else if (option == SE_BGREEK)
+ id += UnicodeString(";Greek-BGreek");
+ else {
+ if (!strnicmp (((SWModule*)module)->Lang(), "cop", 3)) {
+ id += UnicodeString(";Coptic-Latin");
+ }
+ else {
+ id += UnicodeString(";Greek-Latin");
+ }
+ scripts[SE_LATIN] = true;
+ }
+ }
+ if (scripts[SE_HEBREW]) {
+ if (option == SE_BETA)
+ id += UnicodeString(";Hebrew-CCAT");
+ else if (option == SE_SYRIAC)
+ id += UnicodeString(";Hebrew-Syriac");
+ else {
+ id += UnicodeString(";Hebrew-Latin");
+ scripts[SE_LATIN] = true;
+ }
+ }
+ if (scripts[SE_CYRILLIC]) {
+ id += UnicodeString(";Cyrillic-Latin");
+ scripts[SE_LATIN] = true;
+ }
+ if (scripts[SE_ARABIC]) {
+ id += UnicodeString(";Arabic-Latin");
+ scripts[SE_LATIN] = true;
+ }
+ if (scripts[SE_SYRIAC]) {
+ if (option == SE_BETA)
+ id += UnicodeString(";Syriac-CCAT");
+ else if (option == SE_HEBREW)
+ id += UnicodeString(";Syriac-Hebrew");
+ else {
+ id += UnicodeString(";Syriac-Latin");
+ scripts[SE_LATIN] = true;
+ }
+ }
+ if (scripts[SE_THAI]) {
+ id += UnicodeString(";Thai-Latin");
+ scripts[SE_LATIN] = true;
+ }
+ if (scripts[SE_GEORGIAN]) {
+ id += UnicodeString(";Georgian-Latin");
+ scripts[SE_LATIN] = true;
+ }
+ if (scripts[SE_ARMENIAN]) {
+ id += UnicodeString(";Armenian-Latin");
+ scripts[SE_LATIN] = true;
+ }
+ if (scripts[SE_ETHIOPIC]) {
+ id += UnicodeString(";Ethiopic-Latin");
+ scripts[SE_LATIN] = true;
+ }
+ if (scripts[SE_GOTHIC]) {
+ id += UnicodeString(";Gothic-Latin");
+ scripts[SE_LATIN] = true;
+ }
+ if (scripts[SE_UGARITIC]) {
+ id += UnicodeString(";Ugaritic-Latin");
+ scripts[SE_LATIN] = true;
+ }
+ if (scripts[SE_HAN]) {
+ if (!strnicmp (((SWModule*)module)->Lang(), "ja", 2)) {
+ id += UnicodeString(";Kanji-OnRomaji");
+ }
+ else {
+ id += UnicodeString(";Han-Pinyin");
+ }
+ scripts[SE_LATIN] = true;
+ }
+
+ // Inter-Kana and Kana to Latin transliterators
+ if (option == SE_HIRAGANA && scripts[SE_KATAKANA]) {
+ id += UnicodeString(";Katakana-Hiragana");
+ scripts[SE_HIRAGANA] = true;
+ }
+ else if (option == SE_KATAKANA && scripts[SE_HIRAGANA]) {
+ id += UnicodeString(";Hiragana-Katakana");
+ scripts[SE_KATAKANA] = true;
+ }
+ else {
+ if (scripts[SE_KATAKANA]) {
+ id += UnicodeString(";Katakana-Latin");
+ scripts[SE_LATIN] = true;
+ }
+ if (scripts[SE_HIRAGANA]) {
+ id += UnicodeString(";Hiragana-Latin");
+ scripts[SE_LATIN] = true;
+ }
+ }
+
+ // Inter-Korean and Korean to Latin transliterators
+ if (option == SE_HANGUL && scripts[SE_JAMO]) {
+ noNFC = false;
+ scripts[SE_HANGUL] = true;
+ }
+ else if (option == SE_JAMO && scripts[SE_HANGUL]) {
+ noNFC = true;
+ scripts[SE_JAMO] = true;
+ }
+ else {
+ if (scripts[SE_HANGUL]) {
+ id += UnicodeString(";Hangul-Latin");
+ scripts[SE_LATIN] = true;
+ }
+ if (scripts[SE_JAMO]) {
+ id += UnicodeString(";Jamo-Latin");
+ scripts[SE_LATIN] = true;
+ }
+ }
+
+ // Indic-Latin
+ if (option < SE_DEVANAGARI || option > SE_MALAYALAM) {
+ // Indic to Latin
+ if (scripts[SE_TAMIL]) {
+ id += UnicodeString(";Tamil-Latin");
+ scripts[SE_LATIN] = true;
+ }
+ if (scripts[SE_BENGALI]) {
+ id += UnicodeString(";Bengali-Latin");
+ scripts[SE_LATIN] = true;
+ }
+ if (scripts[SE_GURMUKHI]) {
+ id += UnicodeString(";Gurmukhi-Latin");
+ scripts[SE_LATIN] = true;
+ }
+ if (scripts[SE_GUJARATI]) {
+ id += UnicodeString(";Gujarati-Latin");
+ scripts[SE_LATIN] = true;
+ }
+ if (scripts[SE_ORIYA]) {
+ id += UnicodeString(";Oriya-Latin");
+ scripts[SE_LATIN] = true;
+ }
+ if (scripts[SE_TELUGU]) {
+ id += UnicodeString(";Telugu-Latin");
+ scripts[SE_LATIN] = true;
+ }
+ if (scripts[SE_KANNADA]) {
+ id += UnicodeString(";Kannada-Latin");
+ scripts[SE_LATIN] = true;
+ }
+ if (scripts[SE_MALAYALAM]) {
+ id += UnicodeString(";Malayalam-Latin");
+ scripts[SE_LATIN] = true;
+ }
+ }
+ else {
+ if (scripts[SE_LATIN]) {
+ id += UnicodeString(";Latin-InterIndic");
+ }
+ if (scripts[SE_DEVANAGARI]) {
+ id += UnicodeString(";Devanagari-InterIndic");
+ }
+ if (scripts[SE_TAMIL]) {
+ id += UnicodeString(";Tamil-InterIndic");
+ }
+ if (scripts[SE_BENGALI]) {
+ id += UnicodeString(";Bengali-InterIndic");
+ }
+ if (scripts[SE_GURMUKHI]) {
+ id += UnicodeString(";Gurmurkhi-InterIndic");
+ }
+ if (scripts[SE_GUJARATI]) {
+ id += UnicodeString(";Gujarati-InterIndic");
+ }
+ if (scripts[SE_ORIYA]) {
+ id += UnicodeString(";Oriya-InterIndic");
+ }
+ if (scripts[SE_TELUGU]) {
+ id += UnicodeString(";Telugu-InterIndic");
+ }
+ if (scripts[SE_KANNADA]) {
+ id += UnicodeString(";Kannada-InterIndic");
+ }
+ if (scripts[SE_MALAYALAM]) {
+ id += UnicodeString(";Malayalam-InterIndic");
+ }
+
+ switch(option) {
+ case SE_DEVANAGARI:
+ id += UnicodeString(";InterIndic-Devanagari");
+ break;
+ case SE_TAMIL:
+ id += UnicodeString(";InterIndic-Tamil");
+ break;
+ case SE_BENGALI:
+ id += UnicodeString(";InterIndic-Bengali");
+ break;
+ case SE_GURMUKHI:
+ id += UnicodeString(";InterIndic-Gurmukhi");
+ break;
+ case SE_GUJARATI:
+ id += UnicodeString(";InterIndic-Gujarati");
+ break;
+ case SE_ORIYA:
+ id += UnicodeString(";InterIndic-Oriya");
+ break;
+ case SE_TELUGU:
+ id += UnicodeString(";InterIndic-Telugu");
+ break;
+ case SE_KANNADA:
+ id += UnicodeString(";InterIndic-Kannada");
+ break;
+ case SE_MALAYALAM:
+ id += UnicodeString(";InterIndic-Malayalam");
+ break;
+ default:
+ id += UnicodeString(";InterIndic-Latin");
+ scripts[SE_LATIN] = true;
+ break;
+ }
+ }
+
+ if (scripts[SE_LATIN]) {
+ switch (option) {
+ case SE_GREEK:
+ id += UnicodeString(";Latin-Greek");
+ break;
+ case SE_HEBREW:
+ id += UnicodeString(";Latin-Hebrew");
+ break;
+ case SE_CYRILLIC:
+ id += UnicodeString(";Latin-Cyrillic");
+ break;
+ case SE_ARABIC:
+ id += UnicodeString(";Latin-Arabic");
+ break;
+ case SE_SYRIAC:
+ id += UnicodeString(";Latin-Syriac");
+ break;
+ case SE_THAI:
+ id += UnicodeString(";Latin-Thai");
+ break;
+ case SE_GEORGIAN:
+ id += UnicodeString(";Latin-Georgian");
+ break;
+ case SE_ARMENIAN:
+ id += UnicodeString(";Latin-Armenian");
+ break;
+ case SE_ETHIOPIC:
+ id += UnicodeString(";Latin-Ethiopic");
+ break;
+ case SE_GOTHIC:
+ id += UnicodeString(";Latin-Gothic");
+ break;
+ case SE_UGARITIC:
+ id += UnicodeString(";Latin-Ugaritic");
+ break;
+ case SE_COPTIC:
+ id += UnicodeString(";Latin-Coptic");
+ break;
+ case SE_KATAKANA:
+ id += UnicodeString(";Latin-Katakana");
+ break;
+ case SE_HIRAGANA:
+ id += UnicodeString(";Latin-Hiragana");
+ break;
+ case SE_JAMO:
+ id += UnicodeString(";Latin-Jamo");
+ break;
+ case SE_HANGUL:
+ id += UnicodeString(";Latin-Hangul");
+ break;
+ }
+ }
+
+ if (option == SE_BASICLATIN) {
+ id += UnicodeString(";Any-Latin1");
+ }
+
+ if (noNFC) {
+ id += UnicodeString(";NFD");
+ } else {
+ id += UnicodeString(";NFC");
+ }
+
+ UParseError perr;
+
+ err = U_ZERO_ERROR;
+ Transliterator * trans = Transliterator::createInstance(id, UTRANS_FORWARD, perr, err);
+ if (trans) {
+ UnicodeString target = UnicodeString(source);
+ trans->transliterate(target);
+ len = ucnv_fromUChars(conv, text, maxlen, target.getBuffer(), target.length(), &err);
+ if (len < maxlen) *(text + len) = 0;
+ else *(text + maxlen) = 0;
+ delete trans;
+ }
+ ucnv_close(conv);
+ }
+ return 0;
+}
+#endif
diff --git a/src/modules/filters/utf8utf16.cpp b/src/modules/filters/utf8utf16.cpp
new file mode 100644
index 0000000..9aea6fe
--- /dev/null
+++ b/src/modules/filters/utf8utf16.cpp
@@ -0,0 +1,79 @@
+/******************************************************************************
+ *
+ * UTF8UTF16 - SWFilter decendant to convert UTF-8 to UTF-16
+ *
+ */
+
+#include <stdlib.h>
+#include <stdio.h>
+
+#include <utf8utf16.h>
+
+UTF8UTF16::UTF8UTF16() {
+}
+
+
+char UTF8UTF16::ProcessText(char *text, int maxlen, const SWKey *key, const SWModule *module)
+{
+ unsigned char *from;
+ unsigned short *to;
+
+ int len;
+ unsigned long uchar;
+ unsigned char significantFirstBits, subsequent;
+ unsigned short schar;
+
+ len = strlen(text) + 1; // shift string to right of buffer
+ if (len < maxlen) {
+ memmove(&text[maxlen - len], text, len);
+ from = (unsigned char*)&text[maxlen - len];
+ }
+ else
+ from = (unsigned char*)text;
+
+
+ // -------------------------------
+
+ for (to = (unsigned short*)text; *from; from++) {
+ uchar = 0;
+ if ((*from & 128) != 128) {
+ // if (*from != ' ')
+ uchar = *from;
+ }
+ else if ((*from & 128) && ((*from & 64) != 64)) {
+ // error, do nothing
+ continue;
+ }
+ else {
+ *from <<= 1;
+ for (subsequent = 1; (*from & 128); subsequent++) {
+ *from <<= 1;
+ from[subsequent] &= 63;
+ uchar <<= 6;
+ uchar |= from[subsequent];
+ }
+ subsequent--;
+ *from <<=1;
+ significantFirstBits = 8 - (2+subsequent);
+
+ uchar |= (((short)*from) << (((6*subsequent)+significantFirstBits)-8));
+ from += subsequent;
+ }
+
+ if (uchar < 0x1ffff) {
+ *to++ = (unsigned short)uchar;
+ }
+ else {
+ uchar -= 0x10000;
+ schar = 0xD800 | (uchar & 0x03ff);
+ uchar >>= 10;
+ uchar |= 0xDC00;
+ *to++ = (unsigned short)schar;
+ *to++ = (unsigned short)uchar;
+ }
+ }
+ *to = (unsigned short)0;
+
+ return 0;
+}
+