summaryrefslogtreecommitdiff
path: root/src/modules/filters
diff options
context:
space:
mode:
Diffstat (limited to 'src/modules/filters')
-rw-r--r--src/modules/filters/Makefile.am51
-rw-r--r--src/modules/filters/cipherfil.cpp30
-rw-r--r--src/modules/filters/gbffootnotes.cpp151
-rw-r--r--src/modules/filters/gbfheadings.cpp57
-rw-r--r--src/modules/filters/gbfhtml.cpp640
-rw-r--r--src/modules/filters/gbfhtmlhref.cpp165
-rw-r--r--src/modules/filters/gbfmorph.cpp58
-rw-r--r--src/modules/filters/gbfosis.cpp439
-rw-r--r--src/modules/filters/gbfplain.cpp51
-rw-r--r--src/modules/filters/gbfredletterwords.cpp98
-rw-r--r--src/modules/filters/gbfrtf.cpp294
-rw-r--r--src/modules/filters/gbfstrongs.cpp148
-rw-r--r--src/modules/filters/gbfthml.cpp403
-rw-r--r--src/modules/filters/gbfwebif.cpp161
-rw-r--r--src/modules/filters/greeklexattribs.cpp64
-rw-r--r--src/modules/filters/latin1utf16.cpp93
-rw-r--r--src/modules/filters/latin1utf8.cpp182
-rw-r--r--src/modules/filters/osisfootnotes.cpp138
-rw-r--r--src/modules/filters/osisheadings.cpp118
-rw-r--r--src/modules/filters/osishtmlhref.cpp339
-rw-r--r--src/modules/filters/osislemma.cpp78
-rw-r--r--src/modules/filters/osismorph.cpp75
-rw-r--r--src/modules/filters/osisplain.cpp151
-rw-r--r--src/modules/filters/osisredletterwords.cpp77
-rw-r--r--src/modules/filters/osisrtf.cpp333
-rw-r--r--src/modules/filters/osisscripref.cpp104
-rw-r--r--src/modules/filters/osisstrongs.cpp128
-rw-r--r--src/modules/filters/osiswebif.cpp212
-rw-r--r--src/modules/filters/plainfootnotes.cpp83
-rw-r--r--src/modules/filters/plainhtml.cpp100
-rw-r--r--src/modules/filters/rtfhtml.cpp59
-rw-r--r--src/modules/filters/rwphtml.cpp187
-rw-r--r--src/modules/filters/rwprtf.cpp107
-rw-r--r--src/modules/filters/scsuutf8.cpp96
-rw-r--r--src/modules/filters/swbasicfilter.cpp152
-rw-r--r--src/modules/filters/swoptfilter.cpp38
-rw-r--r--src/modules/filters/thmlfootnotes.cpp156
-rw-r--r--src/modules/filters/thmlgbf.cpp337
-rw-r--r--src/modules/filters/thmlheadings.cpp101
-rw-r--r--src/modules/filters/thmlhtml.cpp117
-rw-r--r--src/modules/filters/thmlhtmlhref.cpp350
-rw-r--r--src/modules/filters/thmllemma.cpp86
-rw-r--r--src/modules/filters/thmlmorph.cpp77
-rw-r--r--src/modules/filters/thmlolb.cpp243
-rw-r--r--src/modules/filters/thmlosis.cpp385
-rw-r--r--src/modules/filters/thmlplain.cpp259
-rw-r--r--src/modules/filters/thmlrtf.cpp243
-rw-r--r--src/modules/filters/thmlscripref.cpp157
-rw-r--r--src/modules/filters/thmlstrongs.cpp83
-rw-r--r--src/modules/filters/thmlvariants.cpp133
-rw-r--r--src/modules/filters/thmlwebif.cpp104
-rw-r--r--src/modules/filters/unicodertf.cpp114
-rw-r--r--src/modules/filters/utf16utf8.cpp47
-rw-r--r--src/modules/filters/utf8arshaping.cpp22
-rw-r--r--src/modules/filters/utf8bidireorder.cpp18
-rw-r--r--src/modules/filters/utf8cantillation.cpp77
-rw-r--r--src/modules/filters/utf8greekaccents.cpp449
-rw-r--r--src/modules/filters/utf8hebrewpoints.cpp45
-rw-r--r--src/modules/filters/utf8html.cpp37
-rw-r--r--src/modules/filters/utf8latin1.cpp32
-rw-r--r--src/modules/filters/utf8nfc.cpp19
-rw-r--r--src/modules/filters/utf8nfkd.cpp23
-rw-r--r--src/modules/filters/utf8transliterator.cpp694
-rw-r--r--src/modules/filters/utf8utf16.cpp115
64 files changed, 6249 insertions, 3934 deletions
diff --git a/src/modules/filters/Makefile.am b/src/modules/filters/Makefile.am
index c58fb5f..c9a6bef 100644
--- a/src/modules/filters/Makefile.am
+++ b/src/modules/filters/Makefile.am
@@ -1,22 +1,24 @@
filtersdir = $(top_srcdir)/src/modules/filters
libsword_la_SOURCES += $(filtersdir)/swbasicfilter.cpp
+libsword_la_SOURCES += $(filtersdir)/swoptfilter.cpp
libsword_la_SOURCES += $(filtersdir)/gbfhtml.cpp
libsword_la_SOURCES += $(filtersdir)/gbfhtmlhref.cpp
+libsword_la_SOURCES += $(filtersdir)/gbfwebif.cpp
libsword_la_SOURCES += $(filtersdir)/gbfplain.cpp
libsword_la_SOURCES += $(filtersdir)/gbfrtf.cpp
libsword_la_SOURCES += $(filtersdir)/plainhtml.cpp
-libsword_la_SOURCES += $(filtersdir)/rwphtml.cpp
-libsword_la_SOURCES += $(filtersdir)/rwprtf.cpp
libsword_la_SOURCES += $(filtersdir)/cipherfil.cpp
libsword_la_SOURCES += $(filtersdir)/rtfhtml.cpp
libsword_la_SOURCES += $(filtersdir)/gbfstrongs.cpp
libsword_la_SOURCES += $(filtersdir)/gbffootnotes.cpp
libsword_la_SOURCES += $(filtersdir)/gbfheadings.cpp
+libsword_la_SOURCES += $(filtersdir)/gbfredletterwords.cpp
libsword_la_SOURCES += $(filtersdir)/gbfmorph.cpp
libsword_la_SOURCES += $(filtersdir)/plainfootnotes.cpp
+
libsword_la_SOURCES += $(filtersdir)/thmlstrongs.cpp
libsword_la_SOURCES += $(filtersdir)/thmlfootnotes.cpp
libsword_la_SOURCES += $(filtersdir)/thmlheadings.cpp
@@ -30,8 +32,24 @@ libsword_la_SOURCES += $(filtersdir)/thmlgbf.cpp
libsword_la_SOURCES += $(filtersdir)/thmlrtf.cpp
libsword_la_SOURCES += $(filtersdir)/thmlhtml.cpp
libsword_la_SOURCES += $(filtersdir)/thmlhtmlhref.cpp
+libsword_la_SOURCES += $(filtersdir)/thmlwebif.cpp
+
+libsword_la_SOURCES += $(filtersdir)/thmlosis.cpp
+libsword_la_SOURCES += $(filtersdir)/gbfosis.cpp
libsword_la_SOURCES += $(filtersdir)/thmlplain.cpp
+libsword_la_SOURCES += $(filtersdir)/osisheadings.cpp
+libsword_la_SOURCES += $(filtersdir)/osisfootnotes.cpp
+libsword_la_SOURCES += $(filtersdir)/osishtmlhref.cpp
+libsword_la_SOURCES += $(filtersdir)/osiswebif.cpp
+libsword_la_SOURCES += $(filtersdir)/osismorph.cpp
+libsword_la_SOURCES += $(filtersdir)/osisstrongs.cpp
+libsword_la_SOURCES += $(filtersdir)/osisplain.cpp
+libsword_la_SOURCES += $(filtersdir)/osisrtf.cpp
+libsword_la_SOURCES += $(filtersdir)/osislemma.cpp
+libsword_la_SOURCES += $(filtersdir)/osisredletterwords.cpp
+libsword_la_SOURCES += $(filtersdir)/osisscripref.cpp
+
libsword_la_SOURCES += $(filtersdir)/unicodertf.cpp
libsword_la_SOURCES += $(filtersdir)/latin1utf8.cpp
libsword_la_SOURCES += $(filtersdir)/latin1utf16.cpp
@@ -41,25 +59,34 @@ libsword_la_SOURCES += $(filtersdir)/scsuutf8.cpp
libsword_la_SOURCES += $(filtersdir)/utf8html.cpp
libsword_la_SOURCES += $(filtersdir)/utf8latin1.cpp
-libsword_la_SOURCES += $(filtersdir)/thmlolb.cpp
+libsword_la_SOURCES += $(filtersdir)/utf8cantillation.cpp
+libsword_la_SOURCES += $(filtersdir)/utf8hebrewpoints.cpp
+libsword_la_SOURCES += $(filtersdir)/utf8greekaccents.cpp
libsword_la_SOURCES += $(filtersdir)/greeklexattribs.cpp
-if ICU
-ICUDEFS = -D_ICU_
SWICUSRC = $(filtersdir)/utf8transliterator.cpp
SWICUSRC += $(filtersdir)/utf8nfc.cpp
SWICUSRC += $(filtersdir)/utf8nfkd.cpp
SWICUSRC += $(filtersdir)/utf8arshaping.cpp
SWICUSRC += $(filtersdir)/utf8bidireorder.cpp
+
+if ICU
+ICUDEFS = -D_ICU_
+DISTSWICUSRC =
+SWREALICUSRC = $(SWICUSRC)
+else
+if ICUSWORD
+ICUDEFS = -D_ICU_ -D_ICUSWORD_
+DISTSWICUSRC =
+SWREALICUSRC = $(SWICUSRC)
else
-SWICUSRC =
-ICUDEFS =
+DISTSWICUSRC = $(SWICUSRC)
+SWREALICUSRC =
+endif
endif
-libsword_la_SOURCES += $(SWICUSRC)
-DEFS += $(ICUDEFS)
-libsword_la_SOURCES += $(filtersdir)/utf8cantillation.cpp
-libsword_la_SOURCES += $(filtersdir)/utf8hebrewpoints.cpp
-libsword_la_SOURCES += $(filtersdir)/utf8greekaccents.cpp
+INCLUDES += $(ICUDEFS)
+libsword_la_SOURCES += $(SWREALICUSRC)
+EXTRA_DIST = $(DISTSWICUSRC)
diff --git a/src/modules/filters/cipherfil.cpp b/src/modules/filters/cipherfil.cpp
index ad55396..abdd0fc 100644
--- a/src/modules/filters/cipherfil.cpp
+++ b/src/modules/filters/cipherfil.cpp
@@ -1,13 +1,13 @@
/******************************************************************************
*
- * cipherfil - SWFilter decendant to decipher a module
+ * cipherfil - SWFilter descendant to decipher a module
*/
#include <stdlib.h>
-#include <string.h>
#include <cipherfil.h>
+SWORD_NAMESPACE_START
CipherFilter::CipherFilter(const char *key) {
cipher = new SWCipher((unsigned char *)key);
@@ -24,15 +24,21 @@ SWCipher *CipherFilter::getCipher() {
}
-char CipherFilter::ProcessText(char *text, int maxlen, const SWKey *key, const SWModule *module) {
- unsigned int len;
-// len = strlen(text);
- len = maxlen;
- if (len > 0) {
- cipher->cipherBuf(&len, text);
- strncpy(text, cipher->Buf(), (len < (unsigned int)maxlen) ? len : maxlen);
- }
- text[maxlen] = 0;
- text[maxlen+1] = 0;
+char CipherFilter::processText(SWBuf &text, const SWKey *key, const SWModule *module) {
+ if (text.length() > 2) { //check if it's large enough to substract 2 in the next step.
+ unsigned long len = text.length();
+ if (!key) { // hack, using key to determine encipher, or decipher
+ cipher->cipherBuf(&len, text.getRawData()); //set buffer to enciphered text
+ memcpy(text.getRawData(), cipher->Buf(), len);
+// text = cipher->Buf(); //get the deciphered buffer
+ }
+ else if ((unsigned long)key == 1) {
+ cipher->Buf(text.getRawData(), len);
+ memcpy(text.getRawData(), cipher->cipherBuf(&len), len);
+// text = cipher->cipherBuf(&len);
+ }
+ }
return 0;
}
+
+SWORD_NAMESPACE_END
diff --git a/src/modules/filters/gbffootnotes.cpp b/src/modules/filters/gbffootnotes.cpp
index c5b7b90..38f1106 100644
--- a/src/modules/filters/gbffootnotes.cpp
+++ b/src/modules/filters/gbffootnotes.cpp
@@ -1,62 +1,142 @@
/******************************************************************************
*
- * gbffootnotes - SWFilter decendant to hide or show footnotes
+ * gbffootnotes - SWFilter descendant to hide or show footnotes
* in a GBF module.
*/
#include <stdlib.h>
-#include <string.h>
#include <gbffootnotes.h>
+#include <swmodule.h>
+#include <swbuf.h>
+#include <versekey.h>
+#include <utilxml.h>
#ifndef __GNUC__
#else
#include <unixstr.h>
#endif
+SWORD_NAMESPACE_START
-const char GBFFootnotes::on[] = "On";
-const char GBFFootnotes::off[] = "Off";
-const char GBFFootnotes::optName[] = "Footnotes";
-const char GBFFootnotes::optTip[] = "Toggles Footnotes On and Off if they exist";
+const char oName[] = "Footnotes";
+const char oTip[] = "Toggles Footnotes On and Off if they exist";
+const SWBuf choices[3] = {"On", "Off", ""};
+const StringList oValues(&choices[0], &choices[2]);
-GBFFootnotes::GBFFootnotes() {
- option = false;
- options.push_back(on);
- options.push_back(off);
+
+GBFFootnotes::GBFFootnotes() : SWOptionFilter(oName, oTip, &oValues) {
+ setOptionValue("Off");
}
GBFFootnotes::~GBFFootnotes() {
}
-void GBFFootnotes::setOptionValue(const char *ival)
-{
- option = (!stricmp(ival, on));
-}
-const char *GBFFootnotes::getOptionValue()
+char GBFFootnotes::processText (SWBuf &text, const SWKey *key, const SWModule *module)
{
- return (option) ? on:off;
-}
+
+ SWBuf token;
+ bool intoken = false;
+ bool hide = false;
+ SWBuf tagText;
+ XMLTag startTag;
+ SWBuf refs = "";
+ int footnoteNum = 1;
+ char buf[254];
+ VerseKey parser = key->getText();
-char GBFFootnotes::ProcessText(char *text, int maxlen, const SWKey *key, const SWModule *module)
-{
+ SWBuf orig = text;
+ const char *from = orig.c_str();
+
+ for (text = ""; *from; from++) {
+ if (*from == '<') {
+ intoken = true;
+ token = "";
+ continue;
+ }
+ if (*from == '>') { // process tokens
+ intoken = false;
+
+ XMLTag tag(token);
+ if (!strcmp(tag.getName(), "RF")) {
+ refs = "";
+ startTag = tag;
+ hide = true;
+ tagText = "";
+ continue;
+ }
+ else if (!strcmp(tag.getName(), "Rf")) {
+ if (module->isProcessEntryAttributes()) {
+ if(tagText.length() == 1 || !strcmp(module->Name(), "IGNT")) {
+ if (option) { // for ASV marks text in verse then put explanation at end of verse
+ text += " <FA>(";
+ text.append(tagText);
+ text += ")<Fr>";
+ hide = false;
+ continue;
+ }
+ }
+ SWBuf fc = module->getEntryAttributes()["Footnote"]["count"]["value"];
+ footnoteNum = (fc.length()) ? atoi(fc.c_str()) : 0;
+ sprintf(buf, "%i", ++footnoteNum);
+ module->getEntryAttributes()["Footnote"]["count"]["value"] = buf;
+ StringList attributes = startTag.getAttributeNames();
+ for (StringList::iterator it = attributes.begin(); it != attributes.end(); it++) {
+ module->getEntryAttributes()["Footnote"][buf][it->c_str()] = startTag.getAttribute(it->c_str());
+ }
+ module->getEntryAttributes()["Footnote"][buf]["body"] = tagText;
+ startTag.setAttribute("swordFootnote", buf);
+ }
+ hide = false;
+ if (option) {
+ text += startTag;
+ text.append(tagText);
+ }
+ else continue;
+ }
+ if (!hide) {
+ text += '<';
+ text.append(token);
+ text += '>';
+ }
+ else {
+ tagText += '<';
+ tagText.append(token);
+ tagText += '>';
+ }
+ continue;
+ }
+ if (intoken) { //copy token
+ token += *from;
+ }
+ else if (!hide) { //copy text which is not inside a token
+ text += *from;
+ }
+ else tagText += *from;
+ }
+ return 0;
+
+
+
+
+
+
+
+
+ /*
if (!option) { // if we don't want footnotes
- char *to, *from, token[4096]; // cheese. Fix.
+ char token[4096]; // cheese. Fix.
int tokpos = 0;
bool intoken = false;
int len;
bool hide = false;
- len = strlen(text) + 1; // shift string to right of buffer
- if (len < maxlen) {
- memmove(&text[maxlen - len], text, len);
- from = &text[maxlen - len];
- }
- else from = text; // -------------------------------
-
- for (to = text; *from; from++) {
+ const char *from;
+ SWBuf orig = text;
+ from = orig.c_str();
+ for (text = ""; *from; from++) {
if (*from == '<') {
intoken = true;
tokpos = 0;
@@ -93,10 +173,9 @@ char GBFFootnotes::ProcessText(char *text, int maxlen, const SWKey *key, const S
}
// if not a footnote token, keep token in text
if (!hide) {
- *to++ = '<';
- for (char *tok = token; *tok; tok++)
- *to++ = *tok;
- *to++ = '>';
+ text += '<';
+ text += token;
+ text += '>';
}
continue;
}
@@ -107,12 +186,12 @@ char GBFFootnotes::ProcessText(char *text, int maxlen, const SWKey *key, const S
}
else {
if (!hide) {
- *to++ = *from;
+ text += *from;
}
}
}
- *to++ = 0;
- *to = 0;
}
- return 0;
+ return 0;*/
}
+
+SWORD_NAMESPACE_END
diff --git a/src/modules/filters/gbfheadings.cpp b/src/modules/filters/gbfheadings.cpp
index 590e2fa..01a34ca 100644
--- a/src/modules/filters/gbfheadings.cpp
+++ b/src/modules/filters/gbfheadings.cpp
@@ -1,62 +1,47 @@
/******************************************************************************
*
- * gbfheadings - SWFilter decendant to hide or show headings
+ * gbfheadings - SWFilter descendant to hide or show headings
* in a GBF module.
*/
#include <stdlib.h>
-#include <string.h>
#include <gbfheadings.h>
#ifndef __GNUC__
#else
#include <unixstr.h>
#endif
+SWORD_NAMESPACE_START
-const char GBFHeadings::on[] = "On";
-const char GBFHeadings::off[] = "Off";
-const char GBFHeadings::optName[] = "Headings";
-const char GBFHeadings::optTip[] = "Toggles Headings On and Off if they exist";
+const char oName[] = "Headings";
+const char oTip[] = "Toggles Headings On and Off if they exist";
-GBFHeadings::GBFHeadings() {
- option = false;
- options.push_back(on);
- options.push_back(off);
+const SWBuf choices[3] = {"On", "Off", ""};
+const StringList oValues(&choices[0], &choices[2]);
+
+GBFHeadings::GBFHeadings() : SWOptionFilter(oName, oTip, &oValues) {
+ setOptionValue("Off");
}
GBFHeadings::~GBFHeadings() {
}
-void GBFHeadings::setOptionValue(const char *ival)
-{
- option = (!stricmp(ival, on));
-}
-
-const char *GBFHeadings::getOptionValue()
-{
- return (option) ? on:off;
-}
-char GBFHeadings::ProcessText(char *text, int maxlen, const SWKey *key, const SWModule *module)
-{
+char GBFHeadings::processText (SWBuf &text, const SWKey *key, const SWModule *module) {
if (!option) { // if we don't want headings
- char *to, *from, token[2048]; // cheese. Fix.
+ char token[2048]; // cheese. Fix.
int tokpos = 0;
bool intoken = false;
int len;
bool hide = false;
- len = strlen(text) + 1; // shift string to right of buffer
- if (len < maxlen) {
- memmove(&text[maxlen - len], text, len);
- from = &text[maxlen - len];
- }
- else from = text; // -------------------------------
-
- for (to = text; *from; from++) {
+ const char *from;
+ SWBuf orig = text;
+ from = orig.c_str();
+ for (text = ""; *from; from++) {
if (*from == '<') {
intoken = true;
tokpos = 0;
@@ -82,10 +67,10 @@ char GBFHeadings::ProcessText(char *text, int maxlen, const SWKey *key, const SW
}
// if not a heading token, keep token in text
if (!hide) {
- *to++ = '<';
+ text += '<';
for (char *tok = token; *tok; tok++)
- *to++ = *tok;
- *to++ = '>';
+ text += *tok;
+ text += '>';
}
continue;
}
@@ -96,12 +81,12 @@ char GBFHeadings::ProcessText(char *text, int maxlen, const SWKey *key, const SW
}
else {
if (!hide) {
- *to++ = *from;
+ text += *from;
}
}
}
- *to++ = 0;
- *to = 0;
}
return 0;
}
+
+SWORD_NAMESPACE_END
diff --git a/src/modules/filters/gbfhtml.cpp b/src/modules/filters/gbfhtml.cpp
index 73d445a..775bc06 100644
--- a/src/modules/filters/gbfhtml.cpp
+++ b/src/modules/filters/gbfhtml.cpp
@@ -1,9 +1,8 @@
/***************************************************************************
- gbfhtml.cpp - description
+ gbfhtml.cpp - GBF to HTML filter
-------------------
- begin : Thu Jun 24 1999
- copyright : (C) 1999 by Torsten Uhlmann
- email : TUhlmann@gmx.de
+ begin : 2001-09-03
+ copyright : 2001 by CrossWire Bible Society
***************************************************************************/
/***************************************************************************
@@ -16,521 +15,154 @@
***************************************************************************/
#include <stdlib.h>
-#include <string.h>
#include <gbfhtml.h>
+#include <ctype.h>
+SWORD_NAMESPACE_START
-GBFHTML::GBFHTML()
-{
+GBFHTML::GBFHTML() {
+ setTokenStart("<");
+ setTokenEnd(">");
+
+ setTokenCaseSensitive(true);
+
+ addTokenSubstitute("Rf", ")</small></font>");
+ addTokenSubstitute("Rx", "</a>");
+ addTokenSubstitute("FI", "<i>"); // italics begin
+ addTokenSubstitute("Fi", "</i>");
+ addTokenSubstitute("FB", "<n>"); // bold begin
+ addTokenSubstitute("Fb", "</n>");
+ addTokenSubstitute("FR", "<font color=\"#FF0000\">"); // words of Jesus begin
+ addTokenSubstitute("Fr", "</font>");
+ addTokenSubstitute("FU", "<u>"); // underline begin
+ addTokenSubstitute("Fu", "</u>");
+ addTokenSubstitute("FO", "<cite>"); // Old Testament quote begin
+ addTokenSubstitute("Fo", "</cite>");
+ addTokenSubstitute("FS", "<sup>"); // Superscript begin// Subscript begin
+ addTokenSubstitute("Fs", "</sup>");
+ addTokenSubstitute("FV", "<sub>"); // Subscript begin
+ addTokenSubstitute("Fv", "</sub>");
+ addTokenSubstitute("TT", "<big>"); // Book title begin
+ addTokenSubstitute("Tt", "</big>");
+ addTokenSubstitute("PP", "<cite>"); // poetry begin
+ addTokenSubstitute("Pp", "</cite>");
+ addTokenSubstitute("Fn", "</font>"); // font end
+ addTokenSubstitute("CL", "<br />"); // new line
+ addTokenSubstitute("CM", "<!P><br />"); // paragraph <!P> is a non showing comment that can be changed in the front end to <P> if desired
+ addTokenSubstitute("CG", ""); // ???
+ addTokenSubstitute("CT", ""); // ???
+ addTokenSubstitute("JR", "<div align=\"right\">"); // right align begin
+ addTokenSubstitute("JC", "<div align=\"center\">"); // center align begin
+ addTokenSubstitute("JL", "</div>"); // align end
+
}
-char GBFHTML::ProcessText(char *text, int maxlen, const SWKey *key, const SWModule *module)
-{
- char *to, *from, token[2048];
- int tokpos = 0;
- bool intoken = false;
- bool hasFootnotePreTag = false;
- bool isRightJustified = false;
- bool isCentered = false;
- int len;
+bool GBFHTML::handleToken(SWBuf &buf, const char *token, BasicFilterUserData *userData) {
const char *tok;
+ char val[128];
+ char *valto;
+ const char *num;
+ MyUserData *u = (MyUserData *)userData;
- len = strlen(text) + 1; // shift string to right of buffer
- if (len < maxlen) {
- memmove(&text[maxlen - len], text, len);
- from = &text[maxlen - len];
- }
- else
- from = text; // -------------------------------
-
- for (to = text; *from; from++)
- {
- if (*from == '\n') {
- *from = ' ';
- }
- if (*from == '<') {
- intoken = true;
- tokpos = 0;
- token[0] = 0;
- token[1] = 0;
- token[2] = 0;
- continue;
+ if (!substituteToken(buf, token)) {
+ // deal with OSIS note tags. Just hide till OSISRTF
+ if (!strncmp(token, "note ", 5)) {
+ // let's stop text from going to output
+ u->suspendTextPassThru = true;
}
- if (*from == '>') {
- intoken = false;
- // process desired tokens
- switch (*token) {
- case 'W': // Strongs
- switch(token[1])
- {
- case 'G': // Greek
- case 'H': // Hebrew
- case 'T': // Tense
- *to++ = ' ';
- *to++ = '<';
- *to++ = 's';
- *to++ = 'm';
- *to++ = 'a';
- *to++ = 'l';
- *to++ = 'l';
- *to++ = '>';
- *to++ = '<';
- *to++ = 'e';
- *to++ = 'm';
- *to++ = '>';
- for (tok = token+2; *tok; tok++)
- *to++ = *tok;
- *to++ = '<';
- *to++ = '/';
- *to++ = 'e';
- *to++ = 'm';
- *to++ = '>';
- *to++ = '<';
- *to++ = '/';
- *to++ = 's';
- *to++ = 'm';
- *to++ = 'a';
- *to++ = 'l';
- *to++ = 'l';
- *to++ = '>';
- *to++ = ' ';
- continue;
- }
- break;
- case 'R':
- switch(token[1])
- {
- case 'X':
- *to++ = '<';
- *to++ = 'a';
- *to++ = ' ';
- *to++ = 'h';
- *to++ = 'r';
- *to++ = 'e';
- *to++ = 'f';
- *to++ = '=';
- *to++ = '\"';
- for (tok = token + 3; *tok; tok++) {
- if(*tok != '<' && *tok+1 != 'R' && *tok+2 != 'x') {
- *to++ = *tok;
- }
- else {
- break;
- }
- }
- *to++ = '\"';
- *to++ = '>';
- continue;
- case 'x':
- *to++ = '<';
- *to++ = '/';
- *to++ = 'a';
- *to++ = '>';
- continue;
- case 'B': //word(s) explained in footnote
- *to++ = '<';
- *to++ = 'i';
- *to++ = '>';
- hasFootnotePreTag = true; //we have the RB tag
- continue;
- case 'F': // footnote begin
- if (hasFootnotePreTag) {
- *to++ = '<';
- *to++ = '/';
- *to++ = 'i';
- *to++ = '>';
- *to++ = ' ';
- }
- *to++ = '<';
- *to++ = 'f';
- *to++ = 'o';
- *to++ = 'n';
- *to++ = 't';
- *to++ = ' ';
- *to++ = 'c';
- *to++ = 'o';
- *to++ = 'l';
- *to++ = 'o';
- *to++ = 'r';
- *to++ = '=';
- *to++ = '\"';
- *to++ = '#';
- *to++ = '8';
- *to++ = '0';
- *to++ = '0';
- *to++ = '0';
- *to++ = '0';
- *to++ = '0';
- *to++ = '\"';
- *to++ = '>';
+
+ else if (!strncmp(token, "/note", 5)) {
+ u->suspendTextPassThru = false;
+ }
- *to++ = ' ';
- *to++ = '<';
- *to++ = 's';
- *to++ = 'm';
- *to++ = 'a';
- *to++ = 'l';
- *to++ = 'l';
- *to++ = '>';
- *to++ = '(';
+ else if (!strncmp(token, "w", 1)) {
+ // OSIS Word (temporary until OSISRTF is done)
+ valto = val;
+ num = strstr(token, "lemma=\"x-Strongs:");
+ if (num) {
+ for (num+=17; ((*num) && (*num != '\"')); num++)
+ *valto++ = *num;
+ *valto = 0;
+ if (atoi((!isdigit(*val))?val+1:val) < 5627) {
+ buf += " <small><em>&lt;";
+ for (tok = (!isdigit(*val))?val+1:val; *tok; tok++)
+ buf += *tok;
+ buf += "&gt;</em></small> ";
+ }
+ }
+ valto = val;
+ num = strstr(token, "morph=\"x-Robinson:");
+ if (num) {
+ for (num+=18; ((*num) && (*num != '\"')); num++)
+ *valto++ = *num;
+ *valto = 0;
+ // normal robinsons tense
+ buf += " <small><em>(";
+ for (tok = val; *tok; tok++)
+ buf += *tok;
+ buf += ")</em></small> ";
+ }
+ }
+
+ else if (!strncmp(token, "WG", 2) || !strncmp(token, "WH", 2)) { // strong's numbers
+ buf += " <small><em>&lt;";
+ for (tok = token + 2; *tok; tok++)
+ buf += *tok;
+ buf += "&gt;</em></small> ";
+ }
- continue;
- case 'f': // footnote end
- *to++ = ')';
- *to++ = '<';
- *to++ = '/';
- *to++ = 's';
- *to++ = 'm';
- *to++ = 'a';
- *to++ = 'l';
- *to++ = 'l';
- *to++ = '>';
- *to++ = ' ';
- *to++ = '<';
- *to++ = '/';
- *to++ = 'f';
- *to++ = 'o';
- *to++ = 'n';
- *to++ = 't';
- *to++ = '>';
- hasFootnotePreTag = false;
- continue;
- }
- break;
+ else if (!strncmp(token, "WTG", 3) || !strncmp(token, "WTH", 3)) { // strong's numbers tense
+ buf += " <small><em>&lt;";
+ for (tok = token + 3; *tok; tok++)
+ if(*tok != '\"')
+ buf += *tok;
+ buf += ")</em></small> ";
+ }
- case 'F': // font tags
- switch(token[1])
- {
- case 'I': // italic start
- *to++ = '<';
- *to++ = 'i';
- *to++ = '>';
- continue;
- case 'i': // italic end
- *to++ = '<';
- *to++ = '/';
- *to++ = 'i';
- *to++ = '>';
- continue;
- case 'B': // bold start
- *to++ = '<';
- *to++ = 'b';
- *to++ = '>';
- continue;
- case 'b': // bold end
- *to++ = '<';
- *to++ = '/';
- *to++ = 'b';
- *to++ = '>';
- continue;
- case 'R': // words of Jesus begin
- *to++ = '<';
- *to++ = 'f';
- *to++ = 'o';
- *to++ = 'n';
- *to++ = 't';
- *to++ = ' ';
- *to++ = 'c';
- *to++ = 'o';
- *to++ = 'l';
- *to++ = 'o';
- *to++ = 'r';
- *to++ = '=';
- *to++ = '#';
- *to++ = 'F';
- *to++ = 'F';
- *to++ = '0';
- *to++ = '0';
- *to++ = '0';
- *to++ = '0';
- *to++ = '>';
- continue;
- case 'r': // words of Jesus end
- *to++ = '<';
- *to++ = '/';
- *to++ = 'f';
- *to++ = 'o';
- *to++ = 'n';
- *to++ = 't';
- *to++ = '>';
- continue;
- case 'U': // Underline start
- *to++ = '<';
- *to++ = 'u';
- *to++ = '>';
- continue;
- case 'u': // Underline end
- *to++ = '<';
- *to++ = '/';
- *to++ = 'u';
- *to++ = '>';
- continue;
- case 'O': // Old Testament quote begin
- *to++ = '<';
- *to++ = 'c';
- *to++ = 'i';
- *to++ = 't';
- *to++ = 'e';
- *to++ = '>';
- continue;
- case 'o': // Old Testament quote end
- *to++ = '<';
- *to++ = '/';
- *to++ = 'c';
- *to++ = 'i';
- *to++ = 't';
- *to++ = 'e';
- *to++ = '>';
- continue;
- case 'S': // Superscript begin
- *to++ = '<';
- *to++ = 's';
- *to++ = 'u';
- *to++ = 'p';
- *to++ = '>';
- continue;
- case 's': // Superscript end
- *to++ = '<';
- *to++ = '/';
- *to++ = 's';
- *to++ = 'u';
- *to++ = 'p';
- *to++ = '>';
- continue;
- case 'V': // Subscript begin
- *to++ = '<';
- *to++ = 's';
- *to++ = 'u';
- *to++ = 'b';
- *to++ = '>';
- continue;
- case 'v': // Subscript end
- *to++ = '<';
- *to++ = '/';
- *to++ = 's';
- *to++ = 'u';
- *to++ = 'b';
- *to++ = '>';
- continue;
- case 'N':
- *to++ = '<';
- *to++ = 'f';
- *to++ = 'o';
- *to++ = 'n';
- *to++ = 't';
- *to++ = ' ';
- *to++ = 'f';
- *to++ = 'a';
- *to++ = 'c';
- *to++ = 'e';
- *to++ = '=';
- *to++ = '"';
- for (tok = token + 2; *tok; tok++)
- *to++ = *tok;
- *to++ = '"';
- *to++ = '>';
- continue;
- case 'n':
- *to++ = '<';
- *to++ = '/';
- *to++ = 'f';
- *to++ = 'o';
- *to++ = 'n';
- *to++ = 't';
- *to++ = '>';
- continue;
- }
- break;
- case 'C': // special character tags
- switch(token[1])
- {
- case 'A': // ASCII value
- *to++ = (char)atoi(&token[2]);
- continue;
- case 'G':
- //*to++ = ' ';
- continue;
- case 'L': // line break
- *to++ = '<';
- *to++ = 'b';
- *to++ = 'r';
- *to++ = ' ';
- *to++ = '/';
- *to++ = '>';
- *to++ = ' ';
- continue;
- case 'M': // new paragraph
- *to++ = '<';
- *to++ = 'b';
- *to++ = 'r';
- *to++ = ' ';
- *to++ = '/';
- *to++ = '>';
- continue;
- case 'T':
- //*to++ = ' ';
- continue;
- }
- break;
- case 'J': //Justification
- switch(token[1])
- {
- case 'R': //right
- *to++ = '<';
- *to++ = 'd';
- *to++ = 'i';
- *to++ = 'v';
- *to++ = ' ';
- *to++ = 'a';
- *to++ = 'l';
- *to++ = 'i';
- *to++ = 'g';
- *to++ = 'n';
- *to++ = '=';
- *to++ = '\"';
- *to++ = 'r';
- *to++ = 'i';
- *to++ = 'g';
- *to++ = 'h';
- *to++ = 't';
- *to++ = '\"';
- *to++ = '>';
- isRightJustified = true;
- continue;
+ else if (!strncmp(token, "RX", 2)) {
+ buf += "<i>";
+ for (tok = token + 3; *tok; tok++) {
+ if(*tok != '<' && *tok+1 != 'R' && *tok+2 != 'x') {
+ buf += *tok;
+ }
+ else {
+ break;
+ }
+ }
+ buf += "</i>";
+ }
- case 'C': //center
- *to++ = '<';
- *to++ = 'd';
- *to++ = 'i';
- *to++ = 'v';
- *to++ = ' ';
- *to++ = 'a';
- *to++ = 'l';
- *to++ = 'i';
- *to++ = 'g';
- *to++ = 'n';
- *to++ = '=';
- *to++ = '\"';
- *to++ = 'c';
- *to++ = 'e';
- *to++ = 'n';
- *to++ = 't';
- *to++ = 'e';
- *to++ = 'r';
- *to++ = '\"';
- *to++ = '>';
- isCentered = true;
- continue;
+ else if (!strncmp(token, "RB", 2)) {
+ buf += "<i>";
+ u->hasFootnotePreTag = true;
+ }
- case 'L': //left, reset right and center
- if (isCentered) {
- *to++ = '<';
- *to++ = '/';
- *to++ = 'c';
- *to++ = 'e';
- *to++ = 'n';
- *to++ = 't';
- *to++ = 'e';
- *to++ = 'r';
- *to++ = '>';
- isCentered = false;
- }
- if (isRightJustified) {
- *to++ = '<';
- *to++ = '/';
- *to++ = 'd';
- *to++ = 'i';
- *to++ = 'v';
- *to++ = '>';
- isRightJustified = false;
- }
- continue;
- }
- break;
- case 'T': // title formatting
- switch(token[1])
- {
- case 'T': // Book title begin
- *to++ = '<';
- *to++ = 'b';
- *to++ = 'i';
- *to++ = 'g';
- *to++ = '>';
- continue;
- case 't':
- *to++ = '<';
- *to++ = '/';
- *to++ = 'b';
- *to++ = 'i';
- *to++ = 'g';
- *to++ = '>';
- continue;/*
- case 'S':
- *to++ = '<';
- *to++ = 'b';
- *to++ = 'r';
- *to++ = ' ';
- *to++ = '/';
- *to++ = '>';
- *to++ = '<';
- *to++ = 'b';
- *to++ = 'i';
- *to++ = 'g';
- *to++ = '>';
- continue;
- case 's':
- *to++ = '<';
- *to++ = '/';
- *to++ = 'b';
- *to++ = 'i';
- *to++ = 'g';
- *to++ = '>';
- *to++ = '<';
- *to++ = 'b';
- *to++ = 'r';
- *to++ = ' ';
- *to++ = '/';
- *to++ = '>';
- continue;*/
- }
- break;
-
- case 'P': // special formatting
- switch(token[1])
- {
- case 'P': // Poetry begin
- *to++ = '<';
- *to++ = 'c';
- *to++ = 'i';
- *to++ = 't';
- *to++ = 'e';
- *to++ = '>';
- continue;
- case 'p':
- *to++ = '<';
- *to++ = '/';
- *to++ = 'c';
- *to++ = 'i';
- *to++ = 't';
- *to++ = 'e';
- *to++ = '>';
- continue;
- }
- break;
+ else if (!strncmp(token, "RF", 2)) {
+ if (u->hasFootnotePreTag) {
+ u->hasFootnotePreTag = false;
+ buf += "</i> ";
}
- continue;
+ buf += "<font color=\"#800000\"><small> (";
+ }
+
+ else if (!strncmp(token, "FN", 2)) {
+ buf += "<font face=\"";
+ for (tok = token + 2; *tok; tok++)
+ if(*tok != '\"')
+ buf += *tok;
+ buf += "\">";
+ }
+
+ else if (!strncmp(token, "CA", 2)) { // ASCII value
+ buf += (char)atoi(&token[2]);
+ }
+
+ else {
+ return false;
}
- if (intoken) {
- if (tokpos < 2045) {
- token[tokpos++] = *from;
- token[tokpos+2] = 0;
- }
- }
- else
- *to++ = *from;
}
- *to++ = 0;
- *to = 0;
- return 0;
+ return true;
}
+
+SWORD_NAMESPACE_END
diff --git a/src/modules/filters/gbfhtmlhref.cpp b/src/modules/filters/gbfhtmlhref.cpp
index 30b27ba..4061150 100644
--- a/src/modules/filters/gbfhtmlhref.cpp
+++ b/src/modules/filters/gbfhtmlhref.cpp
@@ -16,8 +16,13 @@
***************************************************************************/
#include <stdlib.h>
-#include <string.h>
#include <gbfhtmlhref.h>
+#include <swmodule.h>
+#include <utilxml.h>
+#include <versekey.h>
+#include <ctype.h>
+
+SWORD_NAMESPACE_START
GBFHTMLHREF::GBFHTMLHREF() {
setTokenStart("<");
@@ -25,12 +30,13 @@ GBFHTMLHREF::GBFHTMLHREF() {
setTokenCaseSensitive(true);
- addTokenSubstitute("Rf", ")</small></font>");
+ //addTokenSubstitute("Rf", ")</small></font>");
+ addTokenSubstitute("FA", "<font color=\"#800000\">"); // for ASV footnotes to mark text
addTokenSubstitute("Rx", "</a>");
addTokenSubstitute("FI", "<i>"); // italics begin
addTokenSubstitute("Fi", "</i>");
- addTokenSubstitute("FB", "<n>"); // bold begin
- addTokenSubstitute("Fb", "</n>");
+ addTokenSubstitute("FB", "<b>"); // bold begin
+ addTokenSubstitute("Fb", "</b>");
addTokenSubstitute("FR", "<font color=\"#FF0000\">"); // words of Jesus begin
addTokenSubstitute("Fr", "</font>");
addTokenSubstitute("FU", "<u>"); // underline begin
@@ -57,87 +63,160 @@ GBFHTMLHREF::GBFHTMLHREF() {
}
-bool GBFHTMLHREF::handleToken(char **buf, const char *token, DualStringMap &userData) {
+bool GBFHTMLHREF::handleToken(SWBuf &buf, const char *token, BasicFilterUserData *userData) {
const char *tok;
+ char val[128];
+ char *valto;
+ const char *num;
+ MyUserData *u = (MyUserData *)userData;
if (!substituteToken(buf, token)) {
- if (!strncmp(token, "WG", 2) || !strncmp(token, "WH", 2)) { // strong's numbers
- pushString(buf, " <small><em>&lt;<a href=\"#");
+ XMLTag tag(token);
+ if (!strncmp(token, "w", 1)) {
+ // OSIS Word (temporary until OSISRTF is done)
+ valto = val;
+ num = strstr(token, "lemma=\"x-Strongs:");
+ if (num) {
+ for (num+=17; ((*num) && (*num != '\"')); num++)
+ *valto++ = *num;
+ *valto = 0;
+ if (atoi((!isdigit(*val))?val+1:val) < 5627) {
+ buf += " <small><em>&lt;<a href=\"type=Strongs value=";
+ for (tok = val; *tok; tok++)
+ buf += *tok;
+ buf += "\">";
+ for (tok = (!isdigit(*val))?val+1:val; *tok; tok++)
+ buf += *tok;
+ buf += "</a>&gt;</em></small> ";
+ //cout << buf;
+
+ }
+ /* forget these for now
+ else {
+ // verb morph
+ sprintf(wordstr, "%03d", word-1);
+ module->getEntryAttributes()["Word"][wordstr]["Morph"] = val;
+ }
+ */
+ }
+ valto = val;
+ num = strstr(token, "morph=\"x-Robinson:");
+ if (num) {
+ for (num+=18; ((*num) && (*num != '\"')); num++)
+ *valto++ = *num;
+ *valto = 0;
+ buf += " <small><em>(<a href=\"type=morph class=Robinson value=";
+ for (tok = val; *tok; tok++)
+ // normal robinsons tense
+ buf += *tok;
+ buf += "\">";
+ for (tok = val; *tok; tok++)
+ //if(*tok != '\"')
+ buf += *tok;
+ buf += "</a>)</em></small> ";
+ }
+ }
+
+ else if (!strncmp(token, "WG", 2) || !strncmp(token, "WH", 2)) { // strong's numbers
+ buf += " <small><em>&lt;<a href=\"type=Strongs value=";
for (tok = token+1; *tok; tok++)
//if(token[i] != '\"')
- *(*buf)++ = *tok;
- *(*buf)++ = '\"';
- *(*buf)++ = '>';
+ buf += *tok;
+ buf += "\">";
for (tok = token + 2; *tok; tok++)
//if(token[i] != '\"')
- *(*buf)++ = *tok;
- pushString(buf, "</a>&gt;</em></small>");
+ buf += *tok;
+ buf += "</a>&gt;</em></small>";
}
else if (!strncmp(token, "WTG", 3) || !strncmp(token, "WTH", 3)) { // strong's numbers tense
- pushString(buf, " <small><em>(<A HREF=\"#");
+ buf += " <small><em>(<a href=\"type=Strongs value=";
for (tok = token + 2; *tok; tok++)
if(*tok != '\"')
- *(*buf)++ = *tok;
- *(*buf)++ = '\"';
- *(*buf)++ = '>';
+ buf += *tok;
+ buf += "\">";
for (tok = token + 3; *tok; tok++)
if(*tok != '\"')
- *(*buf)++ = *tok;
- pushString(buf, "</a>)</em></small>");
+ buf += *tok;
+ buf += "</a>)</em></small>";
}
else if (!strncmp(token, "WT", 2) && strncmp(token, "WTH", 3) && strncmp(token, "WTG", 3)) { // morph tags
- pushString(buf, " <small><em>(<a href=\"M");
+ buf += " <small><em>(<a href=\"type=morph class=none value=";
for (tok = token + 2; *tok; tok++)
if(*tok != '\"')
- *(*buf)++ = *tok;
- *(*buf)++ = '\"';
- *(*buf)++ = '>';
+ buf += *tok;
+ buf += "\">";
for (tok = token + 2; *tok; tok++)
if(*tok != '\"')
- *(*buf)++ = *tok;
- pushString(buf, "</a>)</em></small>");
+ buf += *tok;
+ buf += "</a>)</em></small>";
}
- else if (!strncmp(token, "RX", 2)) {
- pushString(buf, "<a href=\"");
+ else if (!strcmp(tag.getName(), "RX")) {
+ buf += "<a href=\"";
for (tok = token + 3; *tok; tok++) {
if(*tok != '<' && *tok+1 != 'R' && *tok+2 != 'x') {
- *(*buf)++ = *tok;
+ buf += *tok;
}
else {
break;
}
}
- *(*buf)++ = '\"';
- *(*buf)++ = '>';
+ buf += "\">";
}
-
+ else if (!strcmp(tag.getName(), "RF")) {
+ SWBuf type = tag.getAttribute("type");
+ SWBuf footnoteNumber = tag.getAttribute("swordFootnote");
+ VerseKey *vkey;
+ // see if we have a VerseKey * or descendant
+ try {
+ vkey = SWDYNAMIC_CAST(VerseKey, u->key);
+ }
+ catch ( ... ) { }
+ if (vkey) {
+ // leave this special osis type in for crossReference notes types? Might thml use this some day? Doesn't hurt.
+ //char ch = ((tag.getAttribute("type") && ((!strcmp(tag.getAttribute("type"), "crossReference")) || (!strcmp(tag.getAttribute("type"), "x-cross-ref")))) ? 'x':'n');
+ buf.appendFormatted("<a href=\"noteID=%s.%c.%s\"><small><sup>*%c</sup></small></a> ", vkey->getText(), 'n', footnoteNumber.c_str(), 'n');
+ }
+ u->suspendTextPassThru = true;
+ }
+ else if (!strcmp(tag.getName(), "Rf")) {
+ u->suspendTextPassThru = false;
+ }
+/*
else if (!strncmp(token, "RB", 2)) {
- pushString(buf, "<i>");
- userData["hasFootnotePreTag"] = "true";
+ buf += "<i> ";
+ u->hasFootnotePreTag = true;
}
+ else if (!strncmp(token, "Rf", 2)) {
+ buf += "&nbsp<a href=\"note=";
+ buf += u->lastTextNode.c_str();
+ buf += "\">";
+ buf += "<small><sup>*n</sup></small></a>&nbsp";
+ // let's let text resume to output again
+ u->suspendTextPassThru = false;
+ }
+
else if (!strncmp(token, "RF", 2)) {
- if(userData["hasFootnotePreTag"] == "true") {
- userData["hasFootnotePreTag"] = "false";
- pushString(buf, "</i> ");
+ if (u->hasFootnotePreTag) {
+ u->hasFootnotePreTag = false;
+ buf += "</i> ";
}
- pushString(buf, "<font color=\"#800000\"><small> (");
+ u->suspendTextPassThru = true;
}
-
+*/
else if (!strncmp(token, "FN", 2)) {
- pushString(buf, "<font face=\"");
+ buf += "<font face=\"";
for (tok = token + 2; *tok; tok++)
if(*tok != '\"')
- *(*buf)++ = *tok;
- *(*buf)++ = '\"';
- *(*buf)++ = '>';
+ buf += *tok;
+ buf += "\">";
}
else if (!strncmp(token, "CA", 2)) { // ASCII value
- *(*buf)++ = (char)atoi(&token[2]);
+ buf += (char)atoi(&token[2]);
}
else {
@@ -146,3 +225,5 @@ bool GBFHTMLHREF::handleToken(char **buf, const char *token, DualStringMap &user
}
return true;
}
+
+SWORD_NAMESPACE_END
diff --git a/src/modules/filters/gbfmorph.cpp b/src/modules/filters/gbfmorph.cpp
index f8d336e..996baf9 100644
--- a/src/modules/filters/gbfmorph.cpp
+++ b/src/modules/filters/gbfmorph.cpp
@@ -1,62 +1,47 @@
/******************************************************************************
*
- * gbfmorph - SWFilter decendant to hide or show morph tags
+ * gbfmorph - SWFilter descendant to hide or show morph tags
* in a GBF module.
*/
#include <stdlib.h>
-#include <string.h>
#include <gbfmorph.h>
#ifndef __GNUC__
#else
#include <unixstr.h>
#endif
+SWORD_NAMESPACE_START
-const char GBFMorph::on[] = "On";
-const char GBFMorph::off[] = "Off";
-const char GBFMorph::optName[] = "Morphological Tags";
-const char GBFMorph::optTip[] = "Toggles Morphological Tags On and Off if they exist";
+const char oName[] = "Morphological Tags";
+const char oTip[] = "Toggles Morphological Tags On and Off if they exist";
+const SWBuf choices[3] = {"On", "Off", ""};
+const StringList oValues(&choices[0], &choices[2]);
-GBFMorph::GBFMorph() {
- option = false;
- options.push_back(on);
- options.push_back(off);
+GBFMorph::GBFMorph() : SWOptionFilter(oName, oTip, &oValues) {
+ setOptionValue("Off");
}
GBFMorph::~GBFMorph() {
}
-void GBFMorph::setOptionValue(const char *ival)
-{
- option = (!stricmp(ival, on));
-}
-
-const char *GBFMorph::getOptionValue()
-{
- return (option) ? on:off;
-}
-char GBFMorph::ProcessText(char *text, int maxlen, const SWKey *key, const SWModule *module)
-{
+char GBFMorph::processText(SWBuf &text, const SWKey *key, const SWModule *module) {
if (!option) { // if we don't want morph tags
- char *to, *from, token[2048]; // cheese. Fix.
+ const char *from;
+ char token[2048]; // cheese. Fix.
int tokpos = 0;
bool intoken = false;
int len;
bool lastspace = false;
- len = strlen(text) + 1; // shift string to right of buffer
- if (len < maxlen) {
- memmove(&text[maxlen - len], text, len);
- from = &text[maxlen - len];
- }
- else from = text; // -------------------------------
+ SWBuf orig = text;
+ from = orig.c_str();
- for (to = text; *from; from++) {
+ for (text = ""; *from; from++) {
if (*from == '<') {
intoken = true;
tokpos = 0;
@@ -70,15 +55,14 @@ char GBFMorph::ProcessText(char *text, int maxlen, const SWKey *key, const SWMod
if (*token == 'W' && token[1] == 'T') { // Morph
if ((from[1] == ' ') || (from[1] == ',') || (from[1] == ';') || (from[1] == '.') || (from[1] == '?') || (from[1] == '!') || (from[1] == ')') || (from[1] == '\'') || (from[1] == '\"')) {
if (lastspace)
- to--;
+ text--;
}
continue;
}
// if not a morph tag token, keep token in text
- *to++ = '<';
- for (char *tok = token; *tok; tok++)
- *to++ = *tok;
- *to++ = '>';
+ text += '<';
+ text += token;
+ text += '>';
continue;
}
if (intoken) {
@@ -87,12 +71,12 @@ char GBFMorph::ProcessText(char *text, int maxlen, const SWKey *key, const SWMod
token[tokpos+2] = 0;
}
else {
- *to++ = *from;
+ text += *from;
lastspace = (*from == ' ');
}
}
- *to++ = 0;
- *to = 0;
}
return 0;
}
+
+SWORD_NAMESPACE_END
diff --git a/src/modules/filters/gbfosis.cpp b/src/modules/filters/gbfosis.cpp
new file mode 100644
index 0000000..b5dd8e1
--- /dev/null
+++ b/src/modules/filters/gbfosis.cpp
@@ -0,0 +1,439 @@
+/******************************************************************************
+ *
+ * gbfstrongs - SWFilter descendant to hide or show strongs number
+ * in a GBF module.
+ */
+
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <stdarg.h>
+#include <gbfosis.h>
+#include <swmodule.h>
+#include <versekey.h>
+#include <swlog.h>
+#include <stdarg.h>
+#ifndef __GNUC__
+#else
+#include <unixstr.h>
+#endif
+
+SWORD_NAMESPACE_START
+
+GBFOSIS::GBFOSIS() {
+}
+
+
+GBFOSIS::~GBFOSIS() {
+}
+
+
+char GBFOSIS::processText(SWBuf &text, const SWKey *key, const SWModule *module) {
+/*
+ char *to, *from, token[2048]; // cheese. Fix.
+ int tokpos = 0;
+ bool intoken = false;
+ int len;
+ bool lastspace = false;
+ int word = 1;
+ char val[128];
+ char buf[128];
+ char wordstr[5];
+ char *valto;
+ char *ch;
+ char *textStart, *textEnd;
+ char *wordStart, *wordEnd;
+ char *fromStart;
+ bool newText = false;
+ bool newWord = false;
+ SWBuf tmp;
+ bool suspendTextPassThru = false;
+ bool keepToken = false;
+ bool handled = false;
+
+ len = strlen(text) + 1; // shift string to right of buffer
+ if (len < maxlen) {
+ memmove(&text[maxlen - len], text, len);
+ from = &text[maxlen - len];
+ }
+ else from = text;
+
+ textStart = from;
+ fromStart = from;
+ wordStart = text;
+
+ static QuoteStack quoteStack;
+
+ // -------------------------------
+
+ for (to = text; *from; from++) {
+ if (*from == '<') {
+ intoken = true;
+ tokpos = 0;
+ token[0] = 0;
+ token[1] = 0;
+ token[2] = 0;
+ textEnd = from-1;
+ wordEnd = to;
+ continue;
+ }
+ if (*from == '>') { // process tokens
+ intoken = false;
+ keepToken = false;
+ suspendTextPassThru = false;
+ newWord = true;
+ handled = false;
+
+ while (wordStart < (text+maxlen)) {
+// if (strchr(" ,;.?!()'\"", *wordStart))
+ if (strchr(";, .:?!()'\"", *wordStart) && wordStart[0] && wordStart[1])
+ wordStart++;
+ else break;
+ }
+ while (wordEnd > wordStart) {
+ if (strchr(" ,;.:?!()'\"", *wordEnd))
+ wordEnd--;
+ else break;
+ }
+
+ // Scripture Reference
+ if (!strncmp(token, "scripRef", 8)) {
+ // pushString(buf, "<reference work=\"Bible.KJV\" reference=\"");
+ suspendTextPassThru = true;
+ newText = true;
+ handled = true;
+ }
+ else if (!strncmp(token, "/scripRef", 9)) {
+ tmp = "";
+ tmp.append(textStart, (int)(textEnd - textStart)+1);
+ pushString(&to, convertToOSIS(tmp.c_str(), key));
+ lastspace = false;
+ suspendTextPassThru = false;
+ handled = true;
+ }
+
+ // Footnote
+ if (!strcmp(token, "RF")) {
+ // pushString(buf, "<reference work=\"Bible.KJV\" reference=\"");
+ pushString(&to, "<note type=\"x-StudyNote\">");
+ newText = true;
+ lastspace = false;
+ handled = true;
+ }
+ else if (!strcmp(token, "Rf")) {
+ pushString(&to, "</note>");
+ lastspace = false;
+ handled = true;
+ }
+ // hebrew titles
+ if (!strcmp(token, "TH")) {
+ pushString(&to, "<title type=\"psalm\">");
+ newText = true;
+ lastspace = false;
+ handled = true;
+ }
+ else if (!strcmp(token, "Th")) {
+ pushString(&to, "</title>");
+ lastspace = false;
+ handled = true;
+ }
+ // Italics assume transchange
+ if (!strcmp(token, "FI")) {
+ pushString(&to, "<transChange type=\"added\">");
+ newText = true;
+ lastspace = false;
+ handled = true;
+ }
+ else if (!strcmp(token, "Fi")) {
+ pushString(&to, "</transChange>");
+ lastspace = false;
+ handled = true;
+ }
+ // Paragraph break. For now use empty paragraph element
+ if (!strcmp(token, "CM")) {
+ pushString(&to, "<milestone type=\"x-p\" />");
+ newText = true;
+ lastspace = false;
+ handled = true;
+ }
+
+ // Figure
+ else if (!strncmp(token, "img ", 4)) {
+ const char *src = strstr(token, "src");
+ if (!src) // assert we have a src attribute
+ continue;
+// return false;
+
+ pushString(&to, "<figure src=\"");
+ const char *c;
+ for (c = src;((*c) && (*c != '"')); c++);
+
+// uncomment for SWORD absolute path logic
+// if (*(c+1) == '/') {
+// pushString(buf, "file:");
+// pushString(buf, module->getConfigEntry("AbsoluteDataPath"));
+// if (*((*buf)-1) == '/')
+// c++; // skip '/'
+// }
+// end of uncomment for asolute path logic
+
+ for (c++;((*c) && (*c != '"')); c++)
+ *to++ = *c;
+
+ pushString(&to, "\" />");
+ lastspace = false;
+ handled = true;
+ }
+
+ // Strongs numbers
+ else if (*token == 'W' && (token[1] == 'G' || token[1] == 'H')) { // Strongs
+ bool divineName = false;
+ if (module->isProcessEntryAttributes()) {
+ valto = val;
+ for (unsigned int i = 1; ((token[i]) && (i < 150)); i++)
+ *valto++ = token[i];
+ *valto = 0;
+ // normal strongs number
+ strstrip(val);
+ if (!strncmp(wordStart, "<w ", 3)) {
+ strtok(wordStart, ">");
+ char *attStart = strstr(wordStart, "lemma");
+ if (attStart) {
+ attStart += 7;
+ sprintf(buf, "x-Strongs:%s|", val);
+ }
+ else {
+ attStart = wordStart + 3;
+ sprintf(buf, "lemma=\"x-Strongs:%s\" ", val);
+ }
+ wordStart[strlen(wordStart)] = '>';
+ memmove(attStart+strlen(buf), attStart, (to-attStart)+1);
+ memcpy(attStart, buf, strlen(buf));
+ to+=strlen(buf);
+ }
+ else {
+ if (!strcmp(val, "H03068")) { //divineName
+ sprintf(buf, "<divineName><w lemma=\"x-Strongs:%s\">", val);
+ divineName = true;
+ }
+ else sprintf(buf, "<w lemma=\"x-Strongs:%s\">", val);
+
+ memmove(wordStart+strlen(buf), wordStart, (to-wordStart)+1);
+ memcpy(wordStart, buf, strlen(buf));
+ to+=strlen(buf);
+
+ if (divineName) {
+ wordStart += 12;
+ pushString(&to, "</w></divineName>");
+ }
+ else pushString(&to, "</w>");
+
+ module->getEntryAttributes()["Word"][wordstr]["Strongs"] = val;
+ lastspace = false;
+ }
+ }
+ handled = true;
+ }
+
+ // Morphology
+ else if (*token == 'W' && token[1] == 'T' && (token[2] == 'G' || token[2] == 'H')) { // Strongs
+ valto = val;
+ for (unsigned int i = 1; ((token[i]) && (i < 150)); i++)
+ *valto++ = token[i];
+ *valto = 0;
+ strstrip(val);
+ if (!strncmp(wordStart, "<w ", 3)) {
+ strtok(wordStart, ">");
+ char *attStart = strstr(wordStart, "morph");
+ if (attStart) {
+ attStart += 7;
+ sprintf(buf, "x-%s:%s|", "StrongsMorph", val);
+ }
+ else {
+ attStart = wordStart + 3;
+ sprintf(buf, "morph=\"x-%s:%s\" ", "StrongsMorph", val);
+ }
+ wordStart[strlen(wordStart)] = '>';
+ memmove(attStart+strlen(buf), attStart, (to-attStart)+1);
+ memcpy(attStart, buf, strlen(buf));
+ to+=strlen(buf);
+ }
+ else {
+ sprintf(buf, "<w morph=\"x-%s:%s\">", "StrongsMorph", val);
+ memmove(wordStart+strlen(buf), wordStart, (to-wordStart)+1);
+ memcpy(wordStart, buf, strlen(buf));
+ to+=strlen(buf);
+ pushString(&to, "</w>");
+ lastspace = false;
+ }
+ handled = true;
+ }
+
+ if (!keepToken) {
+ if (!handled) {
+ SWLog::systemlog->LogError("Unprocessed Token: <%s>", token);
+// exit(-1);
+ }
+ if (from[1] && strchr(" ,;.:?!()'\"", from[1])) {
+ if (lastspace)
+ to--;
+ }
+ if (newText) {textStart = from+1; newText = false; }
+// if (newWord) {wordStart = to; newWord = false; }
+ continue;
+ }
+ // if not a strongs token, keep token in text
+ *to++ = '<';
+ for (char *tok = token; *tok; tok++)
+ *to++ = *tok;
+ *to++ = '>';
+ if (newText) {textStart = to; newWord = false; }
+// if (newWord) {wordStart = to; newWord = false; }
+ continue;
+ }
+ if (intoken) {
+ if ((tokpos < 2045) && ((*from != 10)&&(*from != 13))) {
+ token[tokpos++] = *from;
+ token[tokpos+2] = 0;
+ }
+ }
+ else {
+ switch (*from) {
+ case '\'':
+ case '\"':
+ case '`':
+// quoteStack.handleQuote(fromStart, from, &to);
+ *to++ = *from;
+ from++;
+ break;
+ default:
+ if (newWord && (*from != ' ')) {wordStart = to; newWord = false; memset(to, 0, 10); }
+ if (!suspendTextPassThru) {
+ *to++ = *from;
+ lastspace = (*from == ' ');
+ }
+ }
+ }
+ }
+
+ VerseKey *vkey = SWDYNAMIC_CAST(VerseKey, key);
+ if (vkey) {
+ char ref[254];
+ if (vkey->Verse())
+ sprintf(ref, "\t\t<verse osisID=\"%s\">", vkey->getOSISRef());
+ else *ref = 0;
+ if (*ref) {
+ memmove(text+strlen(ref), text, maxlen-strlen(ref)-1);
+ memcpy(text, ref, strlen(ref));
+ to+=strlen(ref);
+ if (vkey->Verse()) {
+ VerseKey tmp;
+ tmp = *vkey;
+ tmp.AutoNormalize(0);
+ tmp.Headings(1);
+ sprintf(ref, "</verse>");
+ pushString(&to, ref);
+ tmp = MAXVERSE;
+ if (*vkey == tmp) {
+ tmp.Verse(0);
+// sprintf(ref, "\t</div>");
+// pushString(&to, ref);
+ tmp = MAXCHAPTER;
+ tmp = MAXVERSE;
+ if (*vkey == tmp) {
+ tmp.Chapter(0);
+ tmp.Verse(0);
+// sprintf(ref, "\t</div>");
+// pushString(&to, ref);
+ if (!quoteStack.empty()) {
+ SWLog::systemlog->LogError("popping unclosed quote at end of book");
+ quoteStack.clear();
+ }
+ }
+ }
+ }
+
+//
+// else if (vkey->Chapter()) {
+// sprintf(ref, "\t<div type=\"chapter\" osisID=\"%s\">", vkey->getOSISRef());
+// }
+// else sprintf(ref, "\t<div type=\"book\" osisID=\"%s\">", vkey->getOSISRef());
+//
+ }
+ }
+ *to++ = 0;
+ *to = 0;
+*/
+ return 0;
+}
+
+
+const char *GBFOSIS::convertToOSIS(const char *inRef, const SWKey *key) {
+ static SWBuf outRef;
+
+ outRef = "";
+
+ VerseKey defLanguage;
+ ListKey verses = defLanguage.ParseVerseList(inRef, (*key), true);
+ const char *startFrag = inRef;
+ for (int i = 0; i < verses.Count(); i++) {
+ VerseKey *element = SWDYNAMIC_CAST(VerseKey, verses.GetElement(i));
+ char buf[5120];
+ char frag[800];
+ if (element) {
+ memmove(frag, startFrag, ((const char *)element->userData - startFrag) + 1);
+ frag[((const char *)element->userData - startFrag) + 1] = 0;
+ startFrag = (const char *)element->userData + 1;
+ sprintf(buf, "<reference osisRef=\"%s-%s\">%s</reference>", element->LowerBound().getOSISRef(), element->UpperBound().getOSISRef(), frag);
+ }
+ else {
+ memmove(frag, startFrag, ((const char *)verses.GetElement(i)->userData - startFrag) + 1);
+ frag[((const char *)verses.GetElement(i)->userData - startFrag) + 1] = 0;
+ startFrag = (const char *)verses.GetElement(i)->userData + 1;
+ sprintf(buf, "<reference osisRef=\"%s\">%s</reference>", VerseKey(*verses.GetElement(i)).getOSISRef(), frag);
+ }
+ outRef+=buf;
+ }
+ return outRef.c_str();
+}
+
+
+QuoteStack::QuoteStack() {
+ clear();
+}
+
+
+void QuoteStack::clear() {
+ while (!quotes.empty()) quotes.pop();
+}
+
+
+QuoteStack::~QuoteStack() {
+ clear();
+}
+
+
+void QuoteStack::handleQuote(char *buf, char *quotePos, SWBuf &text) {
+//QuoteInstance(char startChar = '\"', char level = 1, string uniqueID = "", char continueCount = 0) {
+ if (!quotes.empty()) {
+ QuoteInstance last = quotes.top();
+ if (last.startChar == *quotePos) {
+ text += "</quote>";
+ quotes.pop();
+ }
+ else {
+ quotes.push(QuoteInstance(*quotePos, last.level+1));
+ quotes.top().pushStartStream(text);
+ }
+ }
+ else {
+ quotes.push(QuoteInstance(*quotePos));
+ quotes.top().pushStartStream(text);
+ }
+}
+
+void QuoteStack::QuoteInstance::pushStartStream(SWBuf &text) {
+ text.appendFormatted("<quote level=\"%d\">", level);
+}
+
+SWORD_NAMESPACE_END
diff --git a/src/modules/filters/gbfplain.cpp b/src/modules/filters/gbfplain.cpp
index 65766d3..f519a25 100644
--- a/src/modules/filters/gbfplain.cpp
+++ b/src/modules/filters/gbfplain.cpp
@@ -1,34 +1,28 @@
/******************************************************************************
*
- * gbfplain - SWFilter decendant to strip out all GBF tags or convert to
+ * gbfplain - SWFilter descendant to strip out all GBF tags or convert to
* ASCII rendered symbols.
*/
#include <stdlib.h>
-#include <string.h>
#include <gbfplain.h>
+SWORD_NAMESPACE_START
GBFPlain::GBFPlain() {
}
-char GBFPlain::ProcessText(char *text, int maxlen, const SWKey *key, const SWModule *module)
+char GBFPlain::processText (SWBuf &text, const SWKey *key, const SWModule *module)
{
- char *to, *from, token[2048];
+ char token[2048];
int tokpos = 0;
bool intoken = false;
- int len;
-
- len = strlen(text) + 1; // shift string to right of buffer
- if (len < maxlen) {
- memmove(&text[maxlen - len], text, len);
- from = &text[maxlen - len];
- }
- else from = text; // -------------------------------
-
- for (to = text; *from; from++) {
+ const char *from;
+ SWBuf orig = text;
+ from = orig.c_str();
+ for (text = ""; *from; from++) {
if (*from == '<') {
intoken = true;
tokpos = 0;
@@ -46,34 +40,30 @@ char GBFPlain::ProcessText(char *text, int maxlen, const SWKey *key, const SWMod
case 'G': // Greek
case 'H': // Hebrew
case 'T': // Tense
- *to++ = ' ';
- *to++ = '<';
+ text += " <";
for (char *tok = token + 2; *tok; tok++)
- *to++ = *tok;
- *to++ = '>';
- *to++ = ' ';
+ text += *tok;
+ text += "> ";
continue;
}
break;
case 'R':
switch(token[1]) {
case 'F': // footnote begin
- *to++ = ' ';
- *to++ = '[';
+ text += " [";
continue;
case 'f': // footnote end
- *to++ = ']';
- *to++ = ' ';
+ text += "] ";
continue;
}
break;
case 'C':
switch(token[1]) {
case 'A': // ASCII value
- *to++ = (char)atoi(&token[2]);
+ text += (char)atoi(&token[2]);
continue;
case 'G':
- *to++ = '>';
+ text += ">";
continue;
/* Bug in WEB
case 'L':
@@ -82,11 +72,10 @@ char GBFPlain::ProcessText(char *text, int maxlen, const SWKey *key, const SWMod
*/
case 'L': // Bug in WEB. Use above entry when fixed
case 'N': // new line
- *to++ = '\n';
+ text += '\n';
continue;
case 'M': // new paragraph
- *to++ = '\n';
- *to++ = '\n';
+ text += "\n\n";
continue;
}
break;
@@ -98,9 +87,9 @@ char GBFPlain::ProcessText(char *text, int maxlen, const SWKey *key, const SWMod
token[tokpos++] = *from;
token[tokpos+2] = 0;
}
- else *to++ = *from;
+ else text += *from;
}
- *to++ = 0;
- *to = 0;
return 0;
}
+
+SWORD_NAMESPACE_END
diff --git a/src/modules/filters/gbfredletterwords.cpp b/src/modules/filters/gbfredletterwords.cpp
new file mode 100644
index 0000000..df7438d
--- /dev/null
+++ b/src/modules/filters/gbfredletterwords.cpp
@@ -0,0 +1,98 @@
+/******************************************************************************
+ *
+ * GBFRedLetterWords - SWFilter descendant to toggle red coloring of words of
+ * Christ in a GBF module.
+ */
+
+
+#include <stdlib.h>
+#include <gbfredletterwords.h>
+#include <swmodule.h>
+#ifndef __GNUC__
+#else
+#include <unixstr.h>
+#endif
+#include <ctype.h>
+
+SWORD_NAMESPACE_START
+
+const char oName[] = "Words of Christ in Red";
+const char oTip[] = "Toggles Red Coloring for Words of Christ On and Off if they are marked";
+
+const SWBuf choices[3] = {"On", "Off", ""};
+const StringList oValues(&choices[0], &choices[2]);
+
+GBFRedLetterWords::GBFRedLetterWords() : SWOptionFilter(oName, oTip, &oValues) {
+ setOptionValue("Off");
+}
+
+
+GBFRedLetterWords::~GBFRedLetterWords() {
+}
+
+
+char GBFRedLetterWords::processText(SWBuf &text, const SWKey *key, const SWModule *module) {
+/** This function removes the red letter words in Bible like the WEB
+* The words are marked by <FR> as start and <Fr> as end tag.
+*/
+ if (!option) { // if we don't want footnotes
+ char token[4096]; // cheese. Fix.
+ int tokpos = 0;
+ bool intoken = false;
+ int len;
+ bool hide = false;
+
+ const char *from;
+ SWBuf orig = text;
+ from = orig.c_str();
+ for (text = ""; *from; from++) {
+ if (*from == '<') {
+ intoken = true;
+ tokpos = 0;
+// memset(token, 0, 4096);
+ token[0] = 0;
+ token[1] = 0;
+ token[2] = 0;
+ continue;
+ }
+ if (*from == '>') { // process tokens
+ intoken = false;
+ /*switch (*token) {
+ case 'F': // Font attribute
+ switch(token[1]) {
+ case 'R': // Begin red letter words
+ hide = true;
+ break;
+ case 'r': // end red letter words
+ hide = false;
+ break;
+ }
+ continue; // skip token
+ }*/
+
+ //hide the token if either FR or Fr was detected
+ hide = (token[0] == 'F' && ( (token[1] == 'R') || (token[1] == 'r') ));
+
+ // if not a red letter word token, keep token in text
+ if (!hide) {
+ text += '<';
+ for (char *tok = token; *tok; tok++)
+ text += *tok;
+ text += '>';
+ }
+ continue;
+ }
+ if (intoken) {
+ if (tokpos < 4090)
+ token[tokpos++] = *from;
+ token[tokpos+2] = 0; // +2 cuz we init token with 2 extra '0' because of switch statement
+ }
+ else {
+ text += *from;
+ }
+ }
+ }
+ return 0;
+}
+
+SWORD_NAMESPACE_END
diff --git a/src/modules/filters/gbfrtf.cpp b/src/modules/filters/gbfrtf.cpp
index 5f7d064..4a18fbf 100644
--- a/src/modules/filters/gbfrtf.cpp
+++ b/src/modules/filters/gbfrtf.cpp
@@ -1,35 +1,42 @@
/******************************************************************************
*
- * gbfrtf - SWFilter decendant to convert all GBF tags to RTF tags
+ * gbfrtf - SWFilter descendant to convert all GBF tags to RTF tags
*/
-#include <stdlib.h>
-#include <string.h>
#include <gbfrtf.h>
#include <ctype.h>
+SWORD_NAMESPACE_START
+
GBFRTF::GBFRTF() {
}
-char GBFRTF::ProcessText(char *text, int maxlen, const SWKey *key, const SWModule *module)
+char GBFRTF::processText(SWBuf &text, const SWKey *key, const SWModule *module)
{
- unsigned char *to, *from;
char token[2048];
+ char val[128];
+ char *valto;
+ char *num;
int tokpos = 0;
bool intoken = false;
int len;
const char *tok;
+ SWBuf strongnum;
+ SWBuf strongtense;
+ bool hideText = false;
+ int wordLen = 0;
+ int wordCount = 0;
+ int i;
- len = strlen(text) + 1; // shift string to right of buffer
- if (len < maxlen) {
- memmove(&text[maxlen - len], text, len);
- from = (unsigned char *)&text[maxlen - len];
- }
- else from = (unsigned char *)text; // -------------------------------
- for (to = (unsigned char *)text; *from; from++) {
+ const char *from;
+ SWBuf orig = text;
+ from = orig.c_str();
+ for (text = ""; *from; from++) {
if (*from == '<') {
+ wordLen = wordCount;
+ wordCount = 0;
intoken = true;
tokpos = 0;
token[0] = 0;
@@ -40,39 +47,80 @@ char GBFRTF::ProcessText(char *text, int maxlen, const SWKey *key, const SWModul
if (*from == '>') {
intoken = false;
// process desired tokens
+ // deal with OSIS note tags. Just hide till OSISRTF
+ if (!strncmp(token, "note ", 5)) {
+ hideText = true;
+ }
+ if (!strncmp(token, "/note", 5)) {
+ hideText = false;
+ }
+
switch (*token) {
+ case 'w': // OSIS Word (temporary until OSISRTF is done)
+ strongnum = "";
+ strongtense = "";
+ valto = val;
+ num = strstr(token, "lemma=\"x-Strongs:");
+ if (num) {
+ for (num+=17; ((*num) && (*num != '\"')); num++)
+ *valto++ = *num;
+ *valto = 0;
+ if (atoi((!isdigit(*val))?val+1:val) < 5627) {
+ // normal strongs number
+ strongnum += "{\\cf3 \\sub <";
+ for (tok = (!isdigit(*val))?val+1:val; *tok; tok++)
+ strongnum += *tok;
+ strongnum += ">}";
+ }
+ /* forget these for now
+ else {
+ // verb morph
+ sprintf(wordstr, "%03d", word-1);
+ module->getEntryAttributes()["Word"][wordstr]["Morph"] = val;
+ }
+ */
+ }
+ valto = val;
+ num = strstr(token, "morph=\"x-Robinson:");
+ if (num) {
+ for (num+=18; ((*num) && (*num != '\"')); num++)
+ *valto++ = *num;
+ *valto = 0;
+ // normal robinsons tense
+ strongtense += "{\\cf4 \\sub (";
+ for (tok = val; *tok; tok++)
+ strongtense += *tok;
+ strongtense += ")}";
+ }
+ continue;
+
+ case '/':
+ if (token[1] == 'w') {
+ if ((wordCount > 0) || (strongnum != "{\\cf3 \\sub <3588>}")) {
+ //for (i = 0; i < strongnum.length(); i++)
+ text += strongnum;
+ //for (i = 0; i < strongtense.length(); i++)
+ text += strongtense;
+ }
+ }
+ continue;
+
case 'W': // Strongs
switch(token[1]) {
case 'G': // Greek
case 'H': // Hebrew
- *to++ = '{';
- *to++ = '\\';
- *to++ = 'f';
- *to++ = 's';
- *to++ = '1';
- *to++ = '7';
- *to++ = ' ';
- *to++ = '<';
+ text += "{\\cf3 \\sub <";
for (tok = token + 2; *tok; tok++)
- *to++ = *tok;
- *to++ = '>';
- *to++ = '}';
+ text += *tok;
+ text += ">}";
continue;
case 'T': // Tense
- *to++ = '{';
- *to++ = '\\';
- *to++ = 'f';
- *to++ = 's';
- *to++ = '1';
- *to++ = '7';
- *to++ = ' ';
- *to++ = '(';
+ text += "{\\cf4 \\sub (";
bool separate = false;
for (tok = token + 2; *tok; tok++) {
if (separate) {
- *to++ = ';';
- *to++ = ' ';
+ text += "; ";
separate = false;
}
switch (*tok) {
@@ -80,7 +128,7 @@ char GBFRTF::ProcessText(char *text, int maxlen, const SWKey *key, const SWModul
case 'H':
for (tok++; *tok; tok++) {
if (isdigit(*tok)) {
- *to++ = *tok;
+ text += *tok;
separate = true;
}
else {
@@ -91,176 +139,134 @@ char GBFRTF::ProcessText(char *text, int maxlen, const SWKey *key, const SWModul
break;
default:
for (; *tok; tok++) {
- *to++ = *tok;
+ text += *tok;
}
}
}
- *to++ = ')';
- *to++ = '}';
+ text += ")}";
continue;
}
break;
case 'R':
switch(token[1]) {
case 'X':
- *to++ = '#';
+ text += "<a href=\"\">";
continue;
case 'x':
- *to++ = '|';
+ text += "</a>";
continue;
case 'F': // footnote begin
- *to++ = '{';
- *to++ = '\\';
- *to++ = 'i';
- *to++ = '1';
- *to++ = ' ';
- *to++ = '\\';
- *to++ = 'f';
- *to++ = 's';
- *to++ = '1';
- *to++ = '7';
- *to++ = ' ';
- *to++ = '(';
+ text += "{\\i1 \\sub (";
continue;
case 'f': // footnote end
- *to++ = ')';
- *to++ = ' ';
- *to++ = '}';
+ text += ") }";
continue;
}
break;
case 'F': // font tags
switch(token[1]) {
case 'I': // italic start
- *to++ = '\\';
- *to++ = 'i';
- *to++ = '1';
- *to++ = ' ';
+ text += "\\i1 ";
continue;
case 'i': // italic end
- *to++ = '\\';
- *to++ = 'i';
- *to++ = '0';
- *to++ = ' ';
+ text += "\\i0 ";
continue;
case 'B': // bold start
- *to++ = '\\';
- *to++ = 'b';
- *to++ = '1';
- *to++ = ' ';
+ text += "\\b1 ";
continue;
case 'b': // bold end
- *to++ = '\\';
- *to++ = 'b';
- *to++ = '0';
- *to++ = ' ';
+ text += "\\b0 ";
continue;
case 'N':
- *to++ = '{';
- if (!strnicmp(token+2, "Symbol", 6)) {
- *to++ = '\\';
- *to++ = 'f';
- *to++ = '7';
- *to++ = ' ';
- }
+ text += '{';
+ if (!strnicmp(token+2, "Symbol", 6))
+ text += "\\f7 ";
+ if (!strnicmp(token+2, "Courier", 7))
+ text += "\\f8 ";
continue;
case 'n':
- *to++ = '}';
+ text += '}';
continue;
case 'S':
- *to++ = '{';
- *to++ = '\\';
- *to++ = 's';
- *to++ = 'u';
- *to++ = 'p';
- *to++ = 'e';
- *to++ = 'r';
- *to++ = ' ';
+ text += "{\\super ";
continue;
case 's':
- *to++ = '}';
+ text += '}';
continue;
case 'R':
- *to++ = '{';
- *to++ = '\\';
- *to++ = 'c';
- *to++ = 'f';
- *to++ = '6';
- *to++ = ' ';
+ text += "{\\cf6 ";
continue;
case 'r':
- *to++ = '}';
+ text += '}';
+ continue;
+ case 'O':
+ case 'C':
+ text += "\\scaps1 ";
+ continue;
+ case 'o':
+ case 'c':
+ text += "\\scaps0 ";
+ continue;
+ case 'V':
+ text += "{\\sub ";
+ continue;
+ case 'v':
+ text += '}';
+ continue;
+ case 'U':
+ text += "\\ul1 ";
+ continue;
+ case 'u':
+ text += "\\ul0 ";
continue;
}
break;
case 'C': // special character tags
switch(token[1]) {
case 'A': // ASCII value
- *to++ = (char)atoi(&token[2]);
+ text += (char)atoi(&token[2]);
continue;
case 'G':
- *to++ = '>';
+ text += '>';
continue;
case 'L': // line break
- *to++ = '\\';
- *to++ = 'l';
- *to++ = 'i';
- *to++ = 'n';
- *to++ = 'e';
- *to++ = ' ';
+ text += "\\line ";
continue;
case 'M': // new paragraph
- *to++ = '\\';
- *to++ = 'p';
- *to++ = 'a';
- *to++ = 'r';
- *to++ = ' ';
+ text += "\\par ";
continue;
case 'T':
- *to++ = '<';
+ text += '<';
}
break;
case 'T': // title formatting
switch(token[1])
{
case 'T': // Book title begin
- *to++ = '{';
- *to++ = '\\';
- *to++ = 'f';
- *to++ = 's';
- *to++ = '2';
- *to++ = '2';
- *to++ = ' ';
- continue;
+ text += "{\\large ";
+ continue;
case 't':
- *to++ = '}';
- continue;
+ text += '}';
+ continue;
case 'S':
- *to++ = '\\';
- *to++ = 'p';
- *to++ = 'a';
- *to++ = 'r';
- *to++ = ' ';
- *to++ = '{';
- *to++ = '\\';
- *to++ = 'i';
- *to++ = '1';
- *to++ = '\\';
- *to++ = 'b';
- *to++ = '1';
- *to++ = ' ';
+ text += "\\par {\\i1\\b1 ";
continue;
case 's':
- *to++ = '}';
- *to++ = '\\';
- *to++ = 'p';
- *to++ = 'a';
- *to++ = 'r';
- *to++ = ' ';
+ text += "}\\par ";
continue;
}
break;
-
+ case 'J': // Strongs
+ switch(token[1]) {
+ case 'L':
+ text += "\\ql ";
+ case 'C':
+ text += "\\qc ";
+ case 'R':
+ text += "\\qr ";
+ case 'F':
+ text += "\\qj ";
+ }
}
continue;
}
@@ -269,9 +275,15 @@ char GBFRTF::ProcessText(char *text, int maxlen, const SWKey *key, const SWModul
token[tokpos++] = *from;
token[tokpos+2] = 0;
}
- else *to++ = *from;
+ else {
+ if (!hideText) {
+ wordCount++;
+ text += *from;
+ }
+ }
}
- *to++ = 0;
- *to = 0;
return 0;
}
+
+SWORD_NAMESPACE_END
+
diff --git a/src/modules/filters/gbfstrongs.cpp b/src/modules/filters/gbfstrongs.cpp
index 40fc958..bd5088a 100644
--- a/src/modules/filters/gbfstrongs.cpp
+++ b/src/modules/filters/gbfstrongs.cpp
@@ -1,98 +1,116 @@
/******************************************************************************
*
- * gbfstrongs - SWFilter decendant to hide or show strongs number
+ * gbfstrongs - SWFilter descendant to hide or show strongs number
* in a GBF module.
*/
#include <stdlib.h>
-#include <string.h>
#include <gbfstrongs.h>
+#include <swmodule.h>
#ifndef __GNUC__
#else
#include <unixstr.h>
#endif
+#include <ctype.h>
+SWORD_NAMESPACE_START
-const char GBFStrongs::on[] = "On";
-const char GBFStrongs::off[] = "Off";
-const char GBFStrongs::optName[] = "Strong's Numbers";
-const char GBFStrongs::optTip[] = "Toggles Strong's Numbers On and Off if they exist";
+const char oName[] = "Strong's Numbers";
+const char oTip[] = "Toggles Strong's Numbers On and Off if they exist";
+const SWBuf choices[3] = {"On", "Off", ""};
+const StringList oValues(&choices[0], &choices[2]);
-GBFStrongs::GBFStrongs() {
- option = false;
- options.push_back(on);
- options.push_back(off);
+GBFStrongs::GBFStrongs() : SWOptionFilter(oName, oTip, &oValues) {
+ setOptionValue("Off");
}
GBFStrongs::~GBFStrongs() {
}
-void GBFStrongs::setOptionValue(const char *ival)
-{
- option = (!stricmp(ival, on));
-}
-const char *GBFStrongs::getOptionValue()
-{
- return (option) ? on:off;
-}
+char GBFStrongs::processText(SWBuf &text, const SWKey *key, const SWModule *module) {
+ char token[2048]; // cheese. Fix.
+ int tokpos = 0;
+ bool intoken = false;
+ int len;
+ bool lastspace = false;
+ int word = 1;
+ char val[128];
+ char wordstr[5];
+ char *valto;
+ char *ch;
+ unsigned int textStart = 0, textEnd = 0;
+ bool newText = false;
+ SWBuf tmp;
+ const char *from;
-char GBFStrongs::ProcessText(char *text, int maxlen, const SWKey *key, const SWModule *module)
-{
- if (!option) { // if we don't want strongs
- char *to, *from, token[2048]; // cheese. Fix.
- int tokpos = 0;
- bool intoken = false;
- int len;
- bool lastspace = false;
+ SWBuf orig = text;
+ from = orig.c_str();
- len = strlen(text) + 1; // shift string to right of buffer
- if (len < maxlen) {
- memmove(&text[maxlen - len], text, len);
- from = &text[maxlen - len];
+ for (text = ""; *from; from++) {
+ if (*from == '<') {
+ intoken = true;
+ tokpos = 0;
+ token[0] = 0;
+ token[1] = 0;
+ token[2] = 0;
+ textEnd = text.size();
+ continue;
}
- else from = text; // -------------------------------
+ if (*from == '>') { // process tokens
+ intoken = false;
- for (to = text; *from; from++) {
- if (*from == '<') {
- intoken = true;
- tokpos = 0;
- token[0] = 0;
- token[1] = 0;
- token[2] = 0;
- continue;
- }
- if (*from == '>') { // process tokens
- intoken = false;
- if (*token == 'W' && (token[1] == 'G' || token[1] == 'H')) { // Strongs
- if ((from[1] == ' ') || (from[1] == ',') || (from[1] == ';') || (from[1] == '.') || (from[1] == '?') || (from[1] == '!') || (from[1] == ')') || (from[1] == '\'') || (from[1] == '\"')) {
- if (lastspace)
- to--;
- }
- continue;
+ if (*token == 'W' && (token[1] == 'G' || token[1] == 'H')) { // Strongs
+ if (module->isProcessEntryAttributes()) {
+ valto = val;
+ for (unsigned int i = 2; ((token[i]) && (i < 150)); i++)
+ *valto++ = token[i];
+ *valto = 0;
+ if (atoi((!isdigit(*val))?val+1:val) < 5627) {
+ // normal strongs number
+ sprintf(wordstr, "%03d", word++);
+ module->getEntryAttributes()["Word"][wordstr]["Strongs"] = val;
+ tmp = "";
+ tmp.append(text.c_str()+textStart, (int)(textEnd - textStart));
+ module->getEntryAttributes()["Word"][wordstr]["Text"] = tmp;
+ newText = true;
+ }
+ else {
+ // verb morph
+ sprintf(wordstr, "%03d", word-1);
+ module->getEntryAttributes()["Word"][wordstr]["Morph"] = val;
+ }
+ }
+ if (!option) {
+ if ((from[1] == ' ') || (from[1] == ',') || (from[1] == ';') || (from[1] == '.') || (from[1] == '?') || (from[1] == '!') || (from[1] == ')') || (from[1] == '\'') || (from[1] == '\"')) {
+ if (lastspace)
+ text--;
+ }
+ if (newText) {textStart = text.size(); newText = false; }
+ continue;
}
- // if not a strongs token, keep token in text
- *to++ = '<';
- for (char *tok = token; *tok; tok++)
- *to++ = *tok;
- *to++ = '>';
- continue;
- }
- if (intoken) {
- if (tokpos < 2045)
- token[tokpos++] = *from;
- token[tokpos+2] = 0;
- }
- else {
- *to++ = *from;
- lastspace = (*from == ' ');
}
+ // if not a strongs token, keep token in text
+ text += '<';
+ text += token;
+ text += '>';
+ if (newText) {textStart = text.size(); newText = false; }
+ continue;
+ }
+ if (intoken) {
+ if (tokpos < 2045)
+ token[tokpos++] = *from;
+ token[tokpos+2] = 0;
+ }
+ else {
+ text += *from;
+ lastspace = (*from == ' ');
}
- *to++ = 0;
- *to = 0;
}
return 0;
}
+
+SWORD_NAMESPACE_END
diff --git a/src/modules/filters/gbfthml.cpp b/src/modules/filters/gbfthml.cpp
index ca03e71..03b8d8a 100644
--- a/src/modules/filters/gbfthml.cpp
+++ b/src/modules/filters/gbfthml.cpp
@@ -15,32 +15,27 @@
***************************************************************************/
#include <stdlib.h>
-#include <string.h>
#include <gbfthml.h>
+SWORD_NAMESPACE_START
GBFThML::GBFThML()
{
}
-char GBFThML::ProcessText(char *text, int maxlen)
-{
- char *to, *from, token[2048];
+char GBFThML::processText(SWBuf &text, const SWKey *key, const SWModule *module) {
+ const char *from;
+ char token[2048];
int tokpos = 0;
bool intoken = false;
int len;
const char *tok;
- len = strlen(text) + 1; // shift string to right of buffer
- if (len < maxlen) {
- memmove(&text[maxlen - len], text, len);
- from = &text[maxlen - len];
- }
- else from = text; // -------------------------------
+ SWBuf orig = text;
+ from = orig.c_str();
- for (to = text; *from; from++)
- {
+ for (text = ""; *from; from++) {
if (*from == '<') {
intoken = true;
tokpos = 0;
@@ -58,75 +53,17 @@ char GBFThML::ProcessText(char *text, int maxlen)
switch(token[1]) {
case 'G':
case 'H':
- *to++ = '<';
- *to++ = 's';
- *to++ = 'y';
- *to++ = 'n';
- *to++ = 'c';
- *to++ = ' ';
- *to++ = 't';
- *to++ = 'y';
- *to++ = 'p';
- *to++ = 'e';
- *to++ = '=';
- *to++ = '"';
- *to++ = 'S';
- *to++ = 't';
- *to++ = 'r';
- *to++ = 'o';
- *to++ = 'n';
- *to++ = 'g';
- *to++ = 's';
- *to++ = '"';
- *to++ = ' ';
- *to++ = 'v';
- *to++ = 'a';
- *to++ = 'l';
- *to++ = 'u';
- *to++ = 'e';
- *to++ = '=';
- *to++ = '"';
+ text += "<sync type=\"Strongs\" value=\"";
for (tok = token + 1; *tok; tok++)
- *to++ = *tok;
- *to++ = '"';
- *to++ = ' ';
- *to++ = '/';
- *to++ = '>';
+ text += *tok;
+ text += "\" />";
continue;
case 'T': // Tense
- *to++ = '<';
- *to++ = 's';
- *to++ = 'y';
- *to++ = 'n';
- *to++ = 'c';
- *to++ = ' ';
- *to++ = 't';
- *to++ = 'y';
- *to++ = 'p';
- *to++ = 'e';
- *to++ = '=';
- *to++ = '"';
- *to++ = 'M';
- *to++ = 'o';
- *to++ = 'r';
- *to++ = 'p';
- *to++ = 'h';
- *to++ = '"';
- *to++ = ' ';
- *to++ = 'v';
- *to++ = 'a';
- *to++ = 'l';
- *to++ = 'u';
- *to++ = 'e';
- *to++ = '=';
- *to++ = '"';
+ text += "<sync type=\"Morph\" value=\"";
for (tok = token + 2; *tok; tok++)
- *to++ = *tok;
- *to++ = '"';
- *to++ = ' ';
- *to++ = '/';
- *to++ = '>';
+ text += *tok;
+ text += "\" />";
continue;
}
break;
@@ -134,234 +71,99 @@ char GBFThML::ProcessText(char *text, int maxlen)
switch(token[1])
{
case 'X':
- *to++ = '<';
- *to++ = 'a';
- *to++ = ' ';
- *to++ = 'h';
- *to++ = 'r';
- *to++ = 'e';
- *to++ = 'f';
- *to++ = '=';
- *to++ = '\"';
- for (tok = token + 3; *tok; tok++) {
+ text += "<a href=\"";
+ for (tok = token + 3; *tok; tok++) {
if(*tok != '<' && *tok+1 != 'R' && *tok+2 != 'x') {
- *to++ = *tok;
+ text += *tok;
}
else {
break;
}
- }
- *to++ = '\"';
- *to++ = '>';
- continue;
+ }
+ text += "\">";
+ continue;
case 'x':
- *to++ = '<';
- *to++ = '/';
- *to++ = 'a';
- *to++ = '>';
- continue;
+ text += "</a>";
+ continue;
case 'F': // footnote begin
- *to++ = '<';
- *to++ = 'n';
- *to++ = 'o';
- *to++ = 't';
- *to++ = 'e';
- *to++ = ' ';
- *to++ = 'p';
- *to++ = 'l';
- *to++ = 'a';
- *to++ = 'c';
- *to++ = 'e';
- *to++ = '=';
- *to++ = '"';
- *to++ = 'f';
- *to++ = 'o';
- *to++ = 'o';
- *to++ = 't';
- *to++ = '"';
- *to++ = '>';
- continue;
+ text += "<note>";
+ continue;
case 'f': // footnote end
- *to++ = '<';
- *to++ = '/';
- *to++ = 'n';
- *to++ = 'o';
- *to++ = 't';
- *to++ = 'e';
- *to++ = '>';
- continue;
+ text += "</note>";
+ continue;
}
break;
case 'F': // font tags
switch(token[1])
{
case 'N':
- *to++ = '<';
- *to++ = 'f';
- *to++ = 'o';
- *to++ = 'n';
- *to++ = 't';
- *to++ = ' ';
- *to++ = 'f';
- *to++ = 'a';
- *to++ = 'c';
- *to++ = 'e';
- *to++ = '=';
- *to++ = '"';
- for (tok = token + 2; *tok; tok++)
- *to++ = *tok;
- *to++ = '"';
- *to++ = '>';
- continue;
+ text += "<font face=\"";
+ for (tok = token + 2; *tok; tok++)
+ text += *tok;
+ text += "\">";
+ continue;
case 'n':
- *to++ = '<';
- *to++ = '/';
- *to++ = 'f';
- *to++ = 'o';
- *to++ = 'n';
- *to++ = 't';
- *to++ = '>';
- continue;
+ text += "</font>";
+ continue;
case 'I': // italic start
- *to++ = '<';
- *to++ = 'i';
- *to++ = '>';
- continue;
+ text += "<i>";
+ continue;
case 'i': // italic end
- *to++ = '<';
- *to++ = '/';
- *to++ = 'i';
- *to++ = '>';
- continue;
+ text += "</i>";
+ continue;
case 'B': // bold start
- *to++ = '<';
- *to++ = 'b';
- *to++ = '>';
- continue;
+ text += "<b>";
+ continue;
case 'b': // bold end
- *to++ = '<';
- *to++ = '/';
- *to++ = 'b';
- *to++ = '>';
- continue;
+ text += "</b>";
+ continue;
case 'R': // words of Jesus begin
- *to++ = '<';
- *to++ = 'f';
- *to++ = 'o';
- *to++ = 'n';
- *to++ = 't';
- *to++ = ' ';
- *to++ = 'c';
- *to++ = 'o';
- *to++ = 'l';
- *to++ = 'o';
- *to++ = 'r';
- *to++ = '=';
- *to++ = '\"';
- *to++ = '#';
- *to++ = 'f';
- *to++ = 'f';
- *to++ = '0';
- *to++ = '0';
- *to++ = '0';
- *to++ = '0';
- *to++ = '\"';
- *to++ = '>';
- continue;
+ text += "<font color=\"#ff0000\">";
+ continue;
case 'r': // words of Jesus end
- *to++ = '<';
- *to++ = '/';
- *to++ = 'f';
- *to++ = 'o';
- *to++ = 'n';
- *to++ = 't';
- *to++ = '>';
- continue;
+ text += "</font>";
+ continue;
case 'U': // Underline start
- *to++ = '<';
- *to++ = 'u';
- *to++ = '>';
- continue;
+ text += "<u>";
+ continue;
case 'u': // Underline end
- *to++ = '<';
- *to++ = '/';
- *to++ = 'u';
- *to++ = '>';
- continue;
+ text += "</u>";
+ continue;
case 'O': // Old Testament quote begin
- *to++ = '<';
- *to++ = 'c';
- *to++ = 'i';
- *to++ = 't';
- *to++ = 'e';
- *to++ = '>';
- continue;
+ text += "<cite>";
+ continue;
case 'o': // Old Testament quote end
- *to++ = '<';
- *to++ = '/';
- *to++ = 'c';
- *to++ = 'i';
- *to++ = 't';
- *to++ = 'e';
- *to++ = '>';
- continue;
+ text += "</cite>";
+ continue;
case 'S': // Superscript begin
- *to++ = '<';
- *to++ = 's';
- *to++ = 'u';
- *to++ = 'p';
- *to++ = '>';
- continue;
+ text += "<sup>";
+ continue;
case 's': // Superscript end
- *to++ = '<';
- *to++ = '/';
- *to++ = 's';
- *to++ = 'u';
- *to++ = 'p';
- *to++ = '>';
- continue;
+ text += "</sup>";
+ continue;
case 'V': // Subscript begin
- *to++ = '<';
- *to++ = 's';
- *to++ = 'u';
- *to++ = 'b';
- *to++ = '>';
- continue;
+ text += "<sub>";
+ continue;
case 'v': // Subscript end
- *to++ = '<';
- *to++ = '/';
- *to++ = 's';
- *to++ = 'u';
- *to++ = 'b';
- *to++ = '>';
- continue;
+ text += "</sub>";
+ continue;
}
break;
case 'C': // special character tags
switch(token[1])
{
case 'A': // ASCII value
- *to++ = (char)atoi(&token[2]);
+ text += (char)atoi(&token[2]);
continue;
case 'G':
//*to++ = ' ';
continue;
case 'L': // line break
- *to++ = '<';
- *to++ = 'b';
- *to++ = 'r';
- *to++ = ' ';
- *to++ = '/';
- *to++ = '>';
- *to++ = ' ';
+ text += "<br /> ";
continue;
case 'M': // new paragraph
- *to++ = '<';
- *to++ = 'p';
- *to++ = ' ';
- *to++ = '/';
- *to++ = '>';
+ text += "<p />";
continue;
case 'T':
//*to++ = ' ';
@@ -372,76 +174,28 @@ char GBFThML::ProcessText(char *text, int maxlen)
switch(token[1])
{
case 'T': // Book title begin
- *to++ = '<';
- *to++ = 'b';
- *to++ = 'i';
- *to++ = 'g';
- *to++ = '>';
- continue;
+ text += "<big>";
+ continue;
case 't':
- *to++ = '<';
- *to++ = '/';
- *to++ = 'b';
- *to++ = 'i';
- *to++ = 'g';
- *to++ = '>';
- continue;
+ text += "</big>";
+ continue;
case 'S':
- *to++ = '<';
- *to++ = 'd';
- *to++ = 'i';
- *to++ = 'v';
- *to++ = ' ';
- *to++ = 'c';
- *to++ = 'l';
- *to++ = 'a';
- *to++ = 's';
- *to++ = 's';
- *to++ = '=';
- *to++ = '\"';
- *to++ = 's';
- *to++ = 'e';
- *to++ = 'c';
- *to++ = 'h';
- *to++ = 'e';
- *to++ = 'a';
- *to++ = 'd';
- *to++ = '\"';
- *to++ = '>';
- continue;
+ text += "<div class=\"sechead\">";
+ continue;
case 's':
- *to++ = '<';
- *to++ = '/';
- *to++ = 'd';
- *to++ = 'i';
- *to++ = 'v';
- *to++ = '>';
- continue;
+ text += "</div>";
+ continue;
}
break;
case 'P': // special formatting
- switch(token[1])
- {
+ switch(token[1]) {
case 'P': // Poetry begin
- *to++ = '<';
- *to++ = 'v';
- *to++ = 'e';
- *to++ = 'r';
- *to++ = 's';
- *to++ = 'e';
- *to++ = '>';
- continue;
+ text += "<verse>";
+ continue;
case 'p':
- *to++ = '<';
- *to++ = '/';
- *to++ = 'v';
- *to++ = 'e';
- *to++ = 'r';
- *to++ = 's';
- *to++ = 'e';
- *to++ = '>';
- continue;
+ text += "</verse>";
+ continue;
}
break;
}
@@ -452,12 +206,11 @@ char GBFThML::ProcessText(char *text, int maxlen)
token[tokpos++] = *from;
token[tokpos+2] = 0;
}
- else *to++ = *from;
+ else text += *from;
}
- *to++ = 0;
- *to = 0;
return 0;
}
+SWORD_NAMESPACE_END
diff --git a/src/modules/filters/gbfwebif.cpp b/src/modules/filters/gbfwebif.cpp
new file mode 100644
index 0000000..131f359
--- /dev/null
+++ b/src/modules/filters/gbfwebif.cpp
@@ -0,0 +1,161 @@
+/***************************************************************************
+ GBFWEBIF.cpp - GBF to HTML filter with hrefs
+ for strongs and morph tags
+ -------------------
+ begin : 2001-09-03
+ copyright : 2001 by CrossWire Bible Society
+ ***************************************************************************/
+
+/***************************************************************************
+ * *
+ * This program is free software; you can redistribute it and/or modify *
+ * it under the terms of the GNU General Public License as published by *
+ * the Free Software Foundation; either version 2 of the License, or *
+ * (at your option) any later version. *
+ * *
+ ***************************************************************************/
+
+#include <gbfwebif.h>
+#include <ctype.h>
+#include <utilweb.h>
+
+SWORD_NAMESPACE_START
+
+GBFWEBIF::GBFWEBIF() : baseURL(""), passageStudyURL(baseURL + "passagestudy.jsp") {
+//all is done in GBFHTMLHREF since it inherits form this class
+ addTokenSubstitute("FR", "<span class=\"wordsOfJesus\">"); // words of Jesus begin
+ addTokenSubstitute("Fr", "</span>");
+}
+
+bool GBFWEBIF::handleToken(SWBuf &buf, const char *token, BasicFilterUserData *userData) {
+ const char *tok;
+ char val[128];
+ char *valto;
+ const char *num;
+ SWBuf url;
+
+ if (!substituteToken(buf, token)) {
+ if (!strncmp(token, "w", 1)) {
+ // OSIS Word (temporary until OSISRTF is done)
+ valto = val;
+ num = strstr(token, "lemma=\"x-Strongs:");
+
+ if (num) {
+ for (num+=17; ((*num) && (*num != '\"')); num++)
+ *valto++ = *num;
+ *valto = 0;
+
+ if (atoi((!isdigit(*val))?val+1:val) < 5627) {
+ buf += " <small><em>&lt;";
+ url = "";
+ for (tok = val; *tok; tok++) {
+ url += *tok;
+ }
+ if ((url.length() > 1) && strchr("GH", url[0])) {
+ if (isdigit(url[1]))
+ url = url.c_str()+1;
+ }
+ buf.appendFormatted("<a href=\"%s?showStrong=%s#cv\">", passageStudyURL.c_str(), encodeURL(url).c_str());
+
+ for (tok = (!isdigit(*val))?val+1:val; *tok; tok++) {
+ buf += *tok;
+ }
+ buf += "</a>&gt;</em></small> ";
+ }
+ }
+ valto = val;
+ num = strstr(token, "morph=\"x-Robinson:");
+ if (num) {
+ for (num+=18; ((*num) && (*num != '\"')); num++)
+ *valto++ = *num;
+ *valto = 0;
+ buf += " <small><em>(";
+ url = "";
+ for (tok = val; *tok; tok++) {
+ // normal robinsons tense
+ buf += *tok;
+ }
+ buf.appendFormatted("<a href=\"%s?showMorph=%s#cv\">", passageStudyURL.c_str(), encodeURL(url).c_str());
+
+ for (tok = val; *tok; tok++) {
+ buf += *tok;
+ }
+ buf += "</a>)</em></small> ";
+ }
+ }
+
+ else if (!strncmp(token, "WG", 2) || !strncmp(token, "WH", 2)) { // strong's numbers
+ buf += " <small><em>&lt;";
+ url = "";
+
+ for (tok = token+1; *tok; tok++) {
+ url += *tok;
+ }
+ if ((url.length() > 1) && strchr("GH", url[0])) {
+ if (isdigit(url[1]))
+ url = url.c_str()+1;
+ }
+ buf.appendFormatted("<a href=\"%s?showStrong=%s#cv\">", passageStudyURL.c_str(), encodeURL(url).c_str());
+
+ for (tok = token + 2; *tok; tok++) {
+ buf += *tok;
+ }
+ buf += "</a>&gt;</em></small>";
+ }
+
+ else if (!strncmp(token, "WTG", 3) || !strncmp(token, "WTH", 3)) { // strong's numbers tense
+ buf += " <small><em>(";
+ url = "";
+ for (tok = token + 2; *tok; tok++) {
+ if(*tok != '\"')
+ url += *tok;
+ }
+ if ((url.length() > 1) && strchr("GH", url[0])) {
+ if (isdigit(url[1]))
+ url = url.c_str()+1;
+ }
+ buf.appendFormatted("<a href=\"%s?showStrong=%s#cv\">", passageStudyURL.c_str(), encodeURL(url).c_str());
+
+ for (tok = token + 3; *tok; tok++)
+ if(*tok != '\"')
+ buf += *tok;
+ buf += "</a>)</em></small>";
+ }
+
+ else if (!strncmp(token, "WT", 2) && strncmp(token, "WTH", 3) && strncmp(token, "WTG", 3)) { // morph tags
+ buf += " <small><em>(";
+ for (tok = token + 2; *tok; tok++) {
+ if(*tok != '\"')
+ buf += *tok;
+ }
+ buf.appendFormatted("<a href=\"%s?showMorph=%s#cv\">", passageStudyURL.c_str(), encodeURL(url).c_str());
+
+ for (tok = token + 2; *tok; tok++) {
+ if(*tok != '\"')
+ buf += *tok;
+ }
+ buf += "</a>)</em></small>";
+ }
+
+ else if (!strncmp(token, "RX", 2)) {
+ buf += "<a href=\"";
+ for (tok = token + 3; *tok; tok++) {
+ if(*tok != '<' && *tok+1 != 'R' && *tok+2 != 'x') {
+ buf += *tok;
+ }
+ else {
+ break;
+ }
+ }
+
+ buf.appendFormatted("a href=\"%s?key=%s#cv\">", passageStudyURL.c_str(), encodeURL(url).c_str());
+ }
+
+ else {
+ return GBFHTMLHREF::handleToken(buf, token, userData);
+ }
+ }
+ return true;
+}
+
+SWORD_NAMESPACE_END
diff --git a/src/modules/filters/greeklexattribs.cpp b/src/modules/filters/greeklexattribs.cpp
index fb166df..073ba39 100644
--- a/src/modules/filters/greeklexattribs.cpp
+++ b/src/modules/filters/greeklexattribs.cpp
@@ -1,46 +1,87 @@
/******************************************************************************
*
- * greeklexattribs - SWFilter decendant to set entry attributes for greek
- * lexicons
+ * greeklexattribs - SWFilter descendant to set entry attributes for greek
+ * lexicons
*/
#include <stdlib.h>
-#include <string.h>
+#include <ctype.h>
+#include <string>
#include <greeklexattribs.h>
#include <swmodule.h>
+using std::string;
+
+SWORD_NAMESPACE_START
GreekLexAttribs::GreekLexAttribs() {
}
-char GreekLexAttribs::ProcessText(char *text, int maxlen, const SWKey *key, const SWModule *module) {
+char GreekLexAttribs::processText(SWBuf &text, const SWKey *key, const SWModule *module) {
if (module->isProcessEntryAttributes()) {
- char *from;
+ const char *from;
bool inAV = false;
string phrase;
string freq;
- char *currentPhrase = 0;
+ char val[128], *valto;
+ char wordstr[7];
+ const char *currentPhrase = 0, *ch = 0;
+ const char *currentPhraseEnd = 0;
+ int number = 0;
- for (from = text; *from; from++) {
+ for (from = text.c_str(); *from; from++) {
if (inAV) {
if (currentPhrase == 0) {
if (isalpha(*from))
currentPhrase = from;
}
else {
- if ((!isalpha(*from)) && (*from != ' ')) {
+ if ((!isalpha(*from)) && (*from != ' ') && (*from != '+') && (*from !='(') && (*from != ')') && (*from != '\'')) {
+ if (*from == '<') {
+ if (!currentPhraseEnd)
+ currentPhraseEnd = from - 1;
+ for (; *from && *from != '>'; from++) {
+ if (!strncmp(from, "value=\"", 7)) {
+ valto = val;
+ from += 7;
+ for (unsigned int i = 0; from[i] != '\"' && i < 127; i++)
+ *valto++ = from[i];
+ *valto = 0;
+ sprintf(wordstr, "%03d", number+1);
+ module->getEntryAttributes()["AVPhrase"][wordstr]["CompoundedWith"] = val;
+ from += strlen(val);
+ }
+ }
+ continue;
+ }
+
phrase = "";
- phrase.append(currentPhrase, (int)(from - currentPhrase)-1);
+ phrase.append(currentPhrase, (int)(((currentPhraseEnd>currentPhrase)?currentPhraseEnd:from) - currentPhrase)-1);
currentPhrase = from;
while (*from && isdigit(*from)) from++;
freq = "";
freq.append(currentPhrase, (int)(from - currentPhrase));
- module->getEntryAttributes()["AVPhrase"][phrase]["Frequency"] = freq;
- currentPhrase = 0;
+ if ((freq.length() > 0) && (phrase.length() > 0)) {
+ sprintf(wordstr, "%03d", ++number);
+ if ((strchr(phrase.c_str(), '(') > phrase.c_str()) && (strchr(phrase.c_str(), ')') > phrase.c_str() + 1)) {
+ string tmp = phrase.substr(0, phrase.find_first_of("("));
+ phrase.erase(phrase.find_first_of("("), 1);
+ phrase.erase(phrase.find_first_of(")"), 1);
+ phrase.erase(0,phrase.find_first_not_of("\r\n\v\t ")); phrase.erase(phrase.find_last_not_of("\r\n\v\t ")+1);
+ module->getEntryAttributes()["AVPhrase"][wordstr]["Alt"] = phrase.c_str();
+ phrase = tmp;
+ }
+ phrase.erase(0,phrase.find_first_not_of("\r\n\v\t ")); phrase.erase(phrase.find_last_not_of("\r\n\v\t ")+1);
+ freq.erase(0,freq.find_first_not_of("\r\n\v\t ")); freq.erase(freq.find_last_not_of("\r\n\v\t ")+1);
+ module->getEntryAttributes()["AVPhrase"][wordstr]["Phrase"] = phrase.c_str();
+ module->getEntryAttributes()["AVPhrase"][wordstr]["Frequency"] = freq.c_str();
+ currentPhrase = 0;
+ currentPhraseEnd = 0;
+ }
}
}
if (*from == ';') inAV = false;
@@ -56,3 +97,4 @@ char GreekLexAttribs::ProcessText(char *text, int maxlen, const SWKey *key, cons
}
+SWORD_NAMESPACE_END
diff --git a/src/modules/filters/latin1utf16.cpp b/src/modules/filters/latin1utf16.cpp
index 75ee998..8067381 100644
--- a/src/modules/filters/latin1utf16.cpp
+++ b/src/modules/filters/latin1utf16.cpp
@@ -1,6 +1,6 @@
/******************************************************************************
*
- * Latin1UTF16 - SWFilter decendant to convert a Latin-1 character to UTF-16
+ * Latin1UTF16 - SWFilter descendant to convert a Latin-1 character to UTF-16
*
*/
@@ -9,112 +9,111 @@
#include <stdio.h>
#include <latin1utf16.h>
+SWORD_NAMESPACE_START
+
Latin1UTF16::Latin1UTF16() {
}
-char Latin1UTF16::ProcessText(char *text, int maxlen, const SWKey *key, const SWModule *module)
-{
- unsigned char *from;
- unsigned short *to;
+char Latin1UTF16::processText(SWBuf &text, const SWKey *key, const SWModule *module) {
+ const unsigned char *from;
int len;
+ if ((unsigned long)key < 2) // hack, we're en(1)/de(0)ciphering
+ return -1;
+
- len = strlen(text) + 1; // shift string to right of buffer
- if (len < maxlen) {
- memmove(&text[maxlen - len], text, len);
- from = (unsigned char*)&text[maxlen - len];
- }
- else
- from = (unsigned char*)text;
- // -------------------------------
-
- for (to = (unsigned short*)text; *from; from++) {
- switch (*from) {
+ SWBuf orig = text;
+ from = (const unsigned char *)orig.c_str();
+
+ for (text = ""; *from; from++) {
+ text.setSize(text.size()+2);
+ switch (*from) {
case 0x80: // '€'
- *to++ = 0x20AC;
+ *((unsigned short *)(text.getRawData()+(text.size()-2))) = (unsigned short) 0x20AC;
break;
case 0x82: // '‚'
- *to++ = 0x201A;
+ *((unsigned short *)(text.getRawData()+(text.size()-2))) = (unsigned short) 0x201A;
break;
case 0x83: // 'ƒ'
- *to++ = 0x0192;
+ *((unsigned short *)(text.getRawData()+(text.size()-2))) = (unsigned short) 0x0192;
break;
case 0x84: // '„'
- *to++ = 0x201E;
+ *((unsigned short *)(text.getRawData()+(text.size()-2))) = (unsigned short) 0x201E;
break;
case 0x85: // '…'
- *to++ = 0x2026;
+ *((unsigned short *)(text.getRawData()+(text.size()-2))) = (unsigned short) 0x2026;
break;
case 0x86: // '†'
- *to++ = 0x2020;
+ *((unsigned short *)(text.getRawData()+(text.size()-2))) = (unsigned short) 0x2020;
break;
case 0x87: // '‡'
- *to++ = 0x2021;
+ *((unsigned short *)(text.getRawData()+(text.size()-2))) = (unsigned short) 0x2021;
break;
case 0x88: // 'ˆ'
- *to++ = 0x02C6;
+ *((unsigned short *)(text.getRawData()+(text.size()-2))) = (unsigned short) 0x02C6;
break;
case 0x89: // '‰'
- *to++ = 0x2030;
+ *((unsigned short *)(text.getRawData()+(text.size()-2))) = (unsigned short) 0x2030;
break;
case 0x8A: // 'Š'
- *to++ = 0x0160;
+ *((unsigned short *)(text.getRawData()+(text.size()-2))) = (unsigned short) 0x0160;
break;
case 0x8B: // '‹'
- *to++ = 0x2039;
+ *((unsigned short *)(text.getRawData()+(text.size()-2))) = (unsigned short) 0x2039;
break;
case 0x8C: // 'Œ'
- *to++ = 0x0152;
+ *((unsigned short *)(text.getRawData()+(text.size()-2))) = (unsigned short) 0x0152;
break;
case 0x8E: // 'Ž'
- *to++ = 0x017D;
+ *((unsigned short *)(text.getRawData()+(text.size()-2))) = (unsigned short) 0x017D;
break;
case 0x91: // '‘'
- *to++ = 0x2018;
+ *((unsigned short *)(text.getRawData()+(text.size()-2))) = (unsigned short) 0x2018;
break;
case 0x92: // '’'
- *to++ = 0x2019;
+ *((unsigned short *)(text.getRawData()+(text.size()-2))) = (unsigned short) 0x2019;
break;
case 0x93: // '“'
- *to++ = 0x201C;
+ *((unsigned short *)(text.getRawData()+(text.size()-2))) = (unsigned short) 0x201C;
break;
case 0x94: // '”'
- *to++ = 0x201D;
+ *((unsigned short *)(text.getRawData()+(text.size()-2))) = (unsigned short) 0x201D;
break;
case 0x95: // '•'
- *to++ = 0x2022;
+ *((unsigned short *)(text.getRawData()+(text.size()-2))) = (unsigned short) 0x2022;
break;
case 0x96: // '–'
- *to++ = 0x2013;
+ *((unsigned short *)(text.getRawData()+(text.size()-2))) = (unsigned short) 0x2013;
break;
case 0x97: // '—'
- *to++ = 0x2014;
+ *((unsigned short *)(text.getRawData()+(text.size()-2))) = (unsigned short) 0x2014;
break;
case 0x98: // '˜'
- *to++ = 0x02DC;
+ *((unsigned short *)(text.getRawData()+(text.size()-2))) = (unsigned short) 0x02DC;
break;
case 0x99: // '™'
- *to++ = 0x2122;
+ *((unsigned short *)(text.getRawData()+(text.size()-2))) = (unsigned short) 0x2122;
break;
case 0x9A: // 'š'
- *to++ = 0x0161;
+ *((unsigned short *)(text.getRawData()+(text.size()-2))) = (unsigned short) 0x0161;
break;
case 0x9B: // '›'
- *to++ = 0x203A;
+ *((unsigned short *)(text.getRawData()+(text.size()-2))) = (unsigned short) 0x203A;
break;
case 0x9C: // 'œ'
- *to++ = 0x0153;
+ *((unsigned short *)(text.getRawData()+(text.size()-2))) = (unsigned short) 0x0153;
break;
case 0x9E: // 'ž'
- *to++ = 0x017E;
+ *((unsigned short *)(text.getRawData()+(text.size()-2))) = (unsigned short) 0x017E;
break;
case 0x9F: // 'Ÿ'
- *to++ = 0x0178;
+ *((unsigned short *)(text.getRawData()+(text.size()-2))) = (unsigned short) 0x0178;
break;
- default:
- *to++ = (unsigned short)*from;
- }
+ default:
+ *((unsigned short *)(text.getRawData()+(text.size()-2))) = (unsigned short) *from;
+ }
}
- *to = 0;
return 0;
}
+
+SWORD_NAMESPACE_END
diff --git a/src/modules/filters/latin1utf8.cpp b/src/modules/filters/latin1utf8.cpp
index 91af8dc..6d7d87b 100644
--- a/src/modules/filters/latin1utf8.cpp
+++ b/src/modules/filters/latin1utf8.cpp
@@ -1,6 +1,6 @@
/******************************************************************************
*
- * Latin1UTF8 - SWFilter decendant to convert a Latin-1 character to UTF-8
+ * Latin1UTF8 - SWFilter descendant to convert a Latin-1 character to UTF-8
*
*/
@@ -10,170 +10,164 @@
#include <latin1utf8.h>
#include <swmodule.h>
+SWORD_NAMESPACE_START
+
Latin1UTF8::Latin1UTF8() {
}
-char Latin1UTF8::ProcessText(char *text, int maxlen, const SWKey *key, const SWModule *module)
+char Latin1UTF8::processText(SWBuf &text, const SWKey *key, const SWModule *module)
{
- unsigned char *to, *from;
- int len;
-
- len = strlen(text) + 1;
- if (len == maxlen + 1)
- maxlen = (maxlen + 1) * FILTERPAD;
- // shift string to right of buffer
- if (len < maxlen) {
- memmove(&text[maxlen - len], text, len);
- from = (unsigned char*)&text[maxlen - len];
- }
- else
- from = (unsigned char*)text; // -------------------------------
+ const unsigned char *from;
+ if ((unsigned long)key < 2) // hack, we're en(1)/de(0)ciphering
+ return -1;
+ SWBuf orig = text;
+ from = (const unsigned char *)orig.c_str();
- for (to = (unsigned char*)text; *from; from++) {
+ for (text = ""; *from; from++) {
if (*from < 0x80) {
- *to++ = *from;
+ text += *from;
}
else if (*from < 0xc0) {
switch(*from) {
case 0x80: // '€'
- *to++ = 0xe2; // 'â'
- *to++ = 0x82; // '‚'
- *to++ = 0xac; // '¬'
+ text += 0xe2; // 'â'
+ text += 0x82; // '‚'
+ text += 0xac; // '¬'
break;
case 0x82: // '‚'
- *to++ = 0xe2; // 'â'
- *to++ = 0x80; // '€'
- *to++ = 0x9a; // 'š'
+ text += 0xe2; // 'â'
+ text += 0x80; // '€'
+ text += 0x9a; // 'š'
break;
case 0x83: // 'ƒ'
- *to++ = 0xc6; // 'Æ'
- *to++ = 0x92; // '’'
+ text += 0xc6; // 'Æ'
+ text += 0x92; // '’'
break;
case 0x84: // '„'
- *to++ = 0xe2; // 'â'
- *to++ = 0x80; // '€'
- *to++ = 0x9e; // 'ž'
+ text += 0xe2; // 'â'
+ text += 0x80; // '€'
+ text += 0x9e; // 'ž'
break;
case 0x85: // '…'
- *to++ = 0xe2; // 'â'
- *to++ = 0x80; // '€'
- *to++ = 0xa6; // '¦'
+ text += 0xe2; // 'â'
+ text += 0x80; // '€'
+ text += 0xa6; // '¦'
break;
case 0x86: // '†'
- *to++ = 0xe2; // 'â'
- *to++ = 0x80; // '€'
- *to++ = 0xa0; // ' '
+ text += 0xe2; // 'â'
+ text += 0x80; // '€'
+ text += 0xa0; // ' '
break;
case 0x87: // '‡'
- *to++ = 0xe2; // 'â'
- *to++ = 0x80; // '€'
- *to++ = 0xa1; // '¡'
+ text += 0xe2; // 'â'
+ text += 0x80; // '€'
+ text += 0xa1; // '¡'
break;
case 0x88: // 'ˆ'
- *to++ = 0xcb; // 'Ë'
- *to++ = 0x86; // '†'
+ text += 0xcb; // 'Ë'
+ text += 0x86; // '†'
break;
case 0x89: // '‰'
- *to++ = 0xe2; // 'â'
- *to++ = 0x80; // '€'
- *to++ = 0xb0; // '°'
+ text += 0xe2; // 'â'
+ text += 0x80; // '€'
+ text += 0xb0; // '°'
break;
case 0x8A: // 'Š'
- *to++ = 0xc5; // 'Å'
- *to++ = 0xa0; // ' '
+ text += 0xc5; // 'Å'
+ text += 0xa0; // ' '
break;
case 0x8B: // '‹'
- *to++ = 0xe2; // 'â'
- *to++ = 0x80; // '€'
- *to++ = 0xb9; // '¹'
+ text += 0xe2; // 'â'
+ text += 0x80; // '€'
+ text += 0xb9; // '¹'
break;
case 0x8C: // 'Œ'
- *to++ = 0xc5; // 'Å'
- *to++ = 0x92; // '’'
+ text += 0xc5; // 'Å'
+ text += 0x92; // '’'
break;
case 0x8E: // 'Ž'
- *to++ = 0xc5; // 'Å'
- *to++ = 0xbd; // '½'
+ text += 0xc5; // 'Å'
+ text += 0xbd; // '½'
break;
case 0x91: // '‘'
- *to++ = 0xe2; // 'â'
- *to++ = 0x80; // '€'
- *to++ = 0x98; // '˜'
+ text += 0xe2; // 'â'
+ text += 0x80; // '€'
+ text += 0x98; // '˜'
break;
case 0x92: // '’'
- *to++ = 0xe2; // 'â'
- *to++ = 0x80; // '€'
- *to++ = 0x99; // '™'
+ text += 0xe2; // 'â'
+ text += 0x80; // '€'
+ text += 0x99; // '™'
break;
case 0x93: // '“'
- *to++ = 0xe2; // 'â'
- *to++ = 0x80; // '€'
- *to++ = 0x9c; // 'œ'
+ text += 0xe2; // 'â'
+ text += 0x80; // '€'
+ text += 0x9c; // 'œ'
break;
case 0x94: // '”'
- *to++ = 0xe2; // 'â'
- *to++ = 0x80; // '€'
- *to++ = 0x9d; // ''
+ text += 0xe2; // 'â'
+ text += 0x80; // '€'
+ text += 0x9d; // ''
break;
case 0x95: // '•'
- *to++ = 0xe2; // 'â'
- *to++ = 0x80; // '€'
- *to++ = 0xa2; // '¢'
+ text += 0xe2; // 'â'
+ text += 0x80; // '€'
+ text += 0xa2; // '¢'
break;
case 0x96: // '–'
- *to++ = 0xe2; // 'â'
- *to++ = 0x80; // '€'
- *to++ = 0x93; // '“'
+ text += 0xe2; // 'â'
+ text += 0x80; // '€'
+ text += 0x93; // '“'
break;
case 0x97: // '—'
- *to++ = 0xe2; // 'â'
- *to++ = 0x80; // '€'
- *to++ = 0x94; // '”'
+ text += 0xe2; // 'â'
+ text += 0x80; // '€'
+ text += 0x94; // '”'
break;
case 0x98: // '˜'
- *to++ = 0xcb; // 'Ë'
- *to++ = 0x9c; // 'œ'
+ text += 0xcb; // 'Ë'
+ text += 0x9c; // 'œ'
break;
case 0x99: // '™'
- *to++ = 0xe2; // 'â'
- *to++ = 0x84; // '„'
- *to++ = 0xa2; // '¢'
+ text += 0xe2; // 'â'
+ text += 0x84; // '„'
+ text += 0xa2; // '¢'
break;
case 0x9A: // 'š'
- *to++ = 0xc5; // 'Å'
- *to++ = 0xa1; // '¡'
+ text += 0xc5; // 'Å'
+ text += 0xa1; // '¡'
break;
case 0x9B: // '›'
- *to++ = 0xe2; // 'â'
- *to++ = 0x80; // '€'
- *to++ = 0xba; // 'º'
+ text += 0xe2; // 'â'
+ text += 0x80; // '€'
+ text += 0xba; // 'º'
break;
case 0x9C: // 'œ'
- *to++ = 0xc5; // 'Å'
- *to++ = 0x93; // '“'
+ text += 0xc5; // 'Å'
+ text += 0x93; // '“'
break;
case 0x9E: // 'ž'
- *to++ = 0xc5; // 'Å'
- *to++ = 0xbe; // '¾'
+ text += 0xc5; // 'Å'
+ text += 0xbe; // '¾'
break;
case 0x9F: // 'Ÿ'
- *to++ = 0xc5; // 'Å'
- *to++ = 0xb8; // '¸'
+ text += 0xc5; // 'Å'
+ text += 0xb8; // '¸'
break;
default:
- *to++ = 0xC2;
- *to++ = *from;
+ text += 0xC2;
+ text += *from;
}
}
else {
- *to++ = 0xC3;
- *to++ = (*from - 0x40);
+ text += 0xC3;
+ text += (*from - 0x40);
}
}
- *to++ = 0;
- *to = 0;
return 0;
}
+
+SWORD_NAMESPACE_END
diff --git a/src/modules/filters/osisfootnotes.cpp b/src/modules/filters/osisfootnotes.cpp
new file mode 100644
index 0000000..59a7cae
--- /dev/null
+++ b/src/modules/filters/osisfootnotes.cpp
@@ -0,0 +1,138 @@
+/******************************************************************************
+ *
+ * osisfootnotes - SWFilter descendant to hide or show footnotes
+ * in an OSIS module.
+ */
+
+
+#include <stdlib.h>
+#include <osisfootnotes.h>
+#include <swmodule.h>
+#include <swbuf.h>
+#include <versekey.h>
+#include <utilxml.h>
+#ifndef __GNUC__
+#else
+#include <unixstr.h>
+#endif
+
+SWORD_NAMESPACE_START
+
+const char oName[] = "Footnotes";
+const char oTip[] = "Toggles Footnotes On and Off if they exist";
+
+const SWBuf choices[3] = {"On", "Off", ""};
+const StringList oValues(&choices[0], &choices[2]);
+
+OSISFootnotes::OSISFootnotes() : SWOptionFilter(oName, oTip, &oValues) {
+ setOptionValue("Off");
+}
+
+
+OSISFootnotes::~OSISFootnotes() {
+}
+
+
+char OSISFootnotes::processText(SWBuf &text, const SWKey *key, const SWModule *module) {
+ SWBuf token;
+ bool intoken = false;
+ bool hide = false;
+ SWBuf tagText;
+ XMLTag startTag;
+ SWBuf refs = "";
+ int footnoteNum = 1;
+ char buf[254];
+ VerseKey parser = key->getText();
+
+ SWBuf orig = text;
+ const char *from = orig.c_str();
+
+ for (text = ""; *from; from++) {
+
+ // remove all newlines temporarily to fix kjv2003 module
+ if ((*from == 10) || (*from == 13)) {
+ if ((text.length()>1) && (text[text.length()-2] != ' ') && (*(from+1) != ' '))
+ text += ' ';
+ continue;
+ }
+
+
+ if (*from == '<') {
+ intoken = true;
+ token = "";
+ continue;
+ }
+ if (*from == '>') { // process tokens
+ intoken = false;
+
+ XMLTag tag(token);
+ if (!strcmp(tag.getName(), "note")) {
+ if (!tag.isEndTag()) {
+ if (SWBuf("strongsMarkup") == tag.getAttribute("type")) { // handle bug in KJV2003 module where some note open tags were <note ... />
+ tag.setEmpty(false);
+ }
+ if (!tag.isEmpty()) {
+// if ((!tag.isEmpty()) || (SWBuf("strongsMarkup") == tag.getAttribute("type"))) {
+ refs = "";
+ startTag = tag;
+ hide = true;
+ tagText = "";
+ continue;
+ }
+ }
+ if (hide && tag.isEndTag()) {
+ if (module->isProcessEntryAttributes()) {
+ sprintf(buf, "%i", footnoteNum++);
+ StringList attributes = startTag.getAttributeNames();
+ for (StringList::iterator it = attributes.begin(); it != attributes.end(); it++) {
+ module->getEntryAttributes()["Footnote"][buf][it->c_str()] = startTag.getAttribute(it->c_str());
+ }
+ module->getEntryAttributes()["Footnote"][buf]["body"] = tagText;
+ startTag.setAttribute("swordFootnote", buf);
+ if ((startTag.getAttribute("type")) && (!strcmp(startTag.getAttribute("type"), "crossReference"))) {
+ if (!refs.length())
+ refs = parser.ParseVerseList(tagText.c_str(), parser, true).getRangeText();
+ module->getEntryAttributes()["Footnote"][buf]["refList"] = refs.c_str();
+ }
+ }
+ hide = false;
+ if ((option) || ((startTag.getAttribute("type") && (!strcmp(startTag.getAttribute("type"), "crossReference"))))) { // we want the tag in the text; crossReferences are handled by another filter
+ text += startTag;
+ text.append(tagText);
+ }
+ else continue;
+ }
+ }
+
+ // if not a heading token, keep token in text
+ if ((!strcmp(tag.getName(), "reference")) && (!tag.isEndTag())) {
+ SWBuf osisRef = tag.getAttribute("osisRef");
+ if (refs.length())
+ refs += "; ";
+ refs += osisRef;
+ }
+ if (!hide) {
+ text += '<';
+ text.append(token);
+ text += '>';
+ }
+ else {
+ tagText += '<';
+ tagText.append(token);
+ tagText += '>';
+ }
+ continue;
+ }
+ if (intoken) { //copy token
+ token += *from;
+ }
+ else if (!hide) { //copy text which is not inside a token
+ text += *from;
+ }
+ else tagText += *from;
+ }
+ return 0;
+}
+
+SWORD_NAMESPACE_END
+
diff --git a/src/modules/filters/osisheadings.cpp b/src/modules/filters/osisheadings.cpp
new file mode 100644
index 0000000..8ec9ce3
--- /dev/null
+++ b/src/modules/filters/osisheadings.cpp
@@ -0,0 +1,118 @@
+/******************************************************************************
+ *
+ *osisheadings - SWFilter descendant to hide or show headings
+ * in an OSIS module.
+ */
+
+
+#include <stdlib.h>
+#include <osisheadings.h>
+#include <swmodule.h>
+#include <utilxml.h>
+#ifndef __GNUC__
+#else
+#include <unixstr.h>
+#endif
+
+SWORD_NAMESPACE_START
+
+const char oName[] = "Headings";
+const char oTip[] = "Toggles Headings On and Off if they exist";
+
+const SWBuf choices[3] = {"On", "Off", ""};
+const StringList oValues(&choices[0], &choices[2]);
+
+OSISHeadings::OSISHeadings() : SWOptionFilter(oName, oTip, &oValues) {
+ setOptionValue("Off");
+}
+
+
+OSISHeadings::~OSISHeadings() {
+}
+
+
+char OSISHeadings::processText(SWBuf &text, const SWKey *key, const SWModule *module) {
+ SWBuf token;
+ bool intoken = false;
+ bool hide = false;
+ bool preverse = false;
+ SWBuf header;
+ int headerNum = 0;
+ int pvHeaderNum = 0;
+ char buf[254];
+
+ SWBuf orig = text;
+ const char *from = orig.c_str();
+
+ for (text = ""; *from; from++) {
+ if (*from == '<') {
+ intoken = true;
+ token = "";
+ continue;
+ }
+ if (*from == '>') { // process tokens
+ intoken = false;
+
+ XMLTag tag(token);
+ if (!stricmp(tag.getName(), "title")) {
+ if ((tag.getAttribute("subtype")) && (!stricmp(tag.getAttribute("subtype"), "x-preverse"))) {
+ hide = true;
+ preverse = true;
+ header = "";
+ continue;
+ }
+ if (!tag.isEndTag()) {
+ hide = true;
+ header = "";
+ if (option) { // we want the tag in the text
+ text += '<';
+ text.append(token);
+ text += '>';
+ }
+ continue;
+ }
+ if (hide && tag.isEndTag()) {
+
+ if (module->isProcessEntryAttributes() && option) {
+ if (preverse) {
+ sprintf(buf, "%i", pvHeaderNum++);
+ module->getEntryAttributes()["Heading"]["Preverse"][buf] = header;
+ }
+ else {
+ sprintf(buf, "%i", headerNum++);
+ module->getEntryAttributes()["Heading"]["Interverse"][buf] = header;
+ if (option) { // we want the tag in the text
+ text.append(header);
+ }
+ }
+ }
+ hide = false;
+ if ((!option) || (preverse)) { // we don't want the tag in the text anymore
+ preverse = false;
+ continue;
+ }
+ preverse = false;
+ }
+ }
+
+ // if not a heading token, keep token in text
+ if (!hide) {
+ text += '<';
+ text.append(token);
+ text += '>';
+ }
+ continue;
+ }
+ if (intoken) { //copy token
+ token += *from;
+ }
+ else if (!hide) { //copy text which is not inside a token
+ text += *from;
+ }
+ else header += *from;
+ }
+ return 0;
+}
+
+SWORD_NAMESPACE_END
+
diff --git a/src/modules/filters/osishtmlhref.cpp b/src/modules/filters/osishtmlhref.cpp
new file mode 100644
index 0000000..26e6dff
--- /dev/null
+++ b/src/modules/filters/osishtmlhref.cpp
@@ -0,0 +1,339 @@
+/***************************************************************************
+ osishtmlhref.cpp - OSIS to HTML with hrefs filter
+ -------------------
+ begin : 2003-06-24
+ copyright : 2003 by CrossWire Bible Society
+ ***************************************************************************/
+
+/***************************************************************************
+ * *
+ * This program is free software; you can redistribute it and/or modify *
+ * it under the terms of the GNU General Public License as published by *
+ * the Free Software Foundation; either version 2 of the License, or *
+ * (at your option) any later version. *
+ * *
+ ***************************************************************************/
+
+#include <stdlib.h>
+#include <osishtmlhref.h>
+#include <utilxml.h>
+#include <versekey.h>
+#include <swmodule.h>
+
+SWORD_NAMESPACE_START
+
+
+OSISHTMLHREF::MyUserData::MyUserData(const SWModule *module, const SWKey *key) : BasicFilterUserData(module, key) {
+ osisQToTick = ((!module->getConfigEntry("OSISqToTick")) || (strcmp(module->getConfigEntry("OSISqToTick"), "false")));
+}
+
+
+OSISHTMLHREF::OSISHTMLHREF() {
+ setTokenStart("<");
+ setTokenEnd(">");
+
+ setEscapeStart("&");
+ setEscapeEnd(";");
+
+ setEscapeStringCaseSensitive(true);
+
+ addEscapeStringSubstitute("amp", "&");
+ addEscapeStringSubstitute("apos", "'");
+ addEscapeStringSubstitute("lt", "<");
+ addEscapeStringSubstitute("gt", ">");
+ addEscapeStringSubstitute("quot", "\"");
+ addTokenSubstitute("lg", "<br />");
+ addTokenSubstitute("/lg", "<br />");
+
+ setTokenCaseSensitive(true);
+}
+
+
+bool OSISHTMLHREF::handleToken(SWBuf &buf, const char *token, BasicFilterUserData *userData) {
+ // manually process if it wasn't a simple substitution
+ if (!substituteToken(buf, token)) {
+ MyUserData *u = (MyUserData *)userData;
+ XMLTag tag(token);
+
+ // <w> tag
+ if (!strcmp(tag.getName(), "w")) {
+
+ // start <w> tag
+ if ((!tag.isEmpty()) && (!tag.isEndTag())) {
+ u->w = token;
+ }
+
+ // end or empty <w> tag
+ else {
+ bool endTag = tag.isEndTag();
+ SWBuf lastText;
+ bool show = true; // to handle unplaced article in kjv2003-- temporary till combined
+
+ if (endTag) {
+ tag = u->w.c_str();
+ lastText = u->lastTextNode.c_str();
+ }
+ else lastText = "stuff";
+
+ const char *attrib;
+ const char *val;
+ if (attrib = tag.getAttribute("xlit")) {
+ val = strchr(attrib, ':');
+ val = (val) ? (val + 1) : attrib;
+ buf.appendFormatted(" %s", val);
+ }
+ if (attrib = tag.getAttribute("gloss")) {
+ val = strchr(attrib, ':');
+ val = (val) ? (val + 1) : attrib;
+ buf.appendFormatted(" %s", val);
+ }
+ if (attrib = tag.getAttribute("lemma")) {
+ int count = tag.getAttributePartCount("lemma");
+ int i = (count > 1) ? 0 : -1; // -1 for whole value cuz it's faster, but does the same thing as 0
+ do {
+ attrib = tag.getAttribute("lemma", i);
+ if (i < 0) i = 0; // to handle our -1 condition
+ val = strchr(attrib, ':');
+ val = (val) ? (val + 1) : attrib;
+ const char *val2 = val;
+ if ((strchr("GH", *val)) && (isdigit(val[1])))
+ val2++;
+ if ((!strcmp(val2, "3588")) && (lastText.length() < 1))
+ show = false;
+ else buf.appendFormatted(" <small><em>&lt;<a href=\"type=Strongs value=%s\">%s</a>&gt;</em></small> ", val, val2);
+ } while (++i < count);
+ }
+ if ((attrib = tag.getAttribute("morph")) && (show)) {
+ SWBuf savelemma = tag.getAttribute("savlm");
+ if ((strstr(savelemma.c_str(), "3588")) && (lastText.length() < 1))
+ show = false;
+ if (show) {
+ int count = tag.getAttributePartCount("morph");
+ int i = (count > 1) ? 0 : -1; // -1 for whole value cuz it's faster, but does the same thing as 0
+ do {
+ attrib = tag.getAttribute("morph", i);
+ if (i < 0) i = 0; // to handle our -1 condition
+ val = strchr(attrib, ':');
+ val = (val) ? (val + 1) : attrib;
+ const char *val2 = val;
+ if ((*val == 'T') && (strchr("GH", val[1])) && (isdigit(val[2])))
+ val2+=2;
+ buf.appendFormatted(" <small><em>(<a href=\"type=morph class=%s value=%s\">%s</a>)</em></small> ", tag.getAttribute("morph"), val, val2);
+ } while (++i < count);
+ }
+ }
+ if (attrib = tag.getAttribute("POS")) {
+ val = strchr(attrib, ':');
+ val = (val) ? (val + 1) : attrib;
+ buf.appendFormatted(" %s", val);
+ }
+
+ /*if (endTag)
+ buf += "}";*/
+ }
+ }
+
+ // <note> tag
+ else if (!strcmp(tag.getName(), "note")) {
+ if (!tag.isEndTag()) {
+ if (!tag.isEmpty()) {
+ SWBuf type = tag.getAttribute("type");
+
+ if (type != "strongsMarkup") { // leave strong's markup notes out, in the future we'll probably have different option filters to turn different note types on or off
+ SWBuf footnoteNumber = tag.getAttribute("swordFootnote");
+ VerseKey *vkey;
+ // see if we have a VerseKey * or descendant
+ try {
+ vkey = SWDYNAMIC_CAST(VerseKey, u->key);
+ }
+ catch ( ... ) { }
+ if (vkey) {
+ char ch = ((tag.getAttribute("type") && ((!strcmp(tag.getAttribute("type"), "crossReference")) || (!strcmp(tag.getAttribute("type"), "x-cross-ref")))) ? 'x':'n');
+ buf.appendFormatted("<a href=\"noteID=%s.%c.%s\"><small><sup>*%c</sup></small></a> ", vkey->getText(), ch, footnoteNumber.c_str(), ch);
+ }
+ }
+ u->suspendTextPassThru = true;
+ }
+ }
+ if (tag.isEndTag()) {
+ u->suspendTextPassThru = false;
+ }
+ }
+
+ // <p> paragraph tag
+ else if (!strcmp(tag.getName(), "p")) {
+ if ((!tag.isEndTag()) && (!tag.isEmpty())) { // non-empty start tag
+ buf += "<!P><br />";
+ }
+ else if (tag.isEndTag()) { // end tag
+ buf += "<!/P><br />";
+ userData->supressAdjacentWhitespace = true;
+ }
+ else { // empty paragraph break marker
+ buf += "<!P><br />";
+ userData->supressAdjacentWhitespace = true;
+ }
+ }
+
+ // <reference> tag
+ else if (!strcmp(tag.getName(), "reference")) {
+ if ((!tag.isEndTag()) && (!tag.isEmpty())) {
+ buf += "<a href=\"\">";
+ }
+ else if (tag.isEndTag()) {
+ buf += "</a>";
+ }
+ }
+
+ // <l> poetry, etc
+ else if (!strcmp(tag.getName(), "l")) {
+ if (tag.isEmpty()) {
+ buf += "<br />";
+ }
+ else if (tag.isEndTag()) {
+ buf += "<br />";
+ }
+ else if (tag.getAttribute("sID")) { // empty line marker
+ buf += "<br />";
+ }
+ }
+
+ // <milestone type="line"/>
+ else if ((!strcmp(tag.getName(), "milestone")) && (tag.getAttribute("type")) && (!strcmp(tag.getAttribute("type"), "line"))) {
+ buf += "<br />";
+ userData->supressAdjacentWhitespace = true;
+ }
+
+ // <title>
+ else if (!strcmp(tag.getName(), "title")) {
+ if ((!tag.isEndTag()) && (!tag.isEmpty())) {
+ buf += "<b>";
+ }
+ else if (tag.isEndTag()) {
+ buf += "</b><br />";
+ }
+ }
+
+ // <hi> hi? hi contrast?
+ else if (!strcmp(tag.getName(), "hi")) {
+ SWBuf type = tag.getAttribute("type");
+ if ((!tag.isEndTag()) && (!tag.isEmpty())) {
+ if (type == "b" || type == "x-b") {
+ buf += "<b> ";
+ u->inBold = true;
+ }
+ else { // all other types
+ buf += "<i> ";
+ u->inBold = false;
+ }
+ }
+ else if (tag.isEndTag()) {
+ if(u->inBold) {
+ buf += "</b>";
+ u->inBold = false;
+ }
+ else
+ buf += "</i>";
+ }
+ else { // empty hi marker
+ // what to do? is this even valid?
+ }
+ }
+
+ // <q> quote
+ else if (!strcmp(tag.getName(), "q")) {
+ SWBuf type = tag.getAttribute("type");
+ SWBuf who = tag.getAttribute("who");
+ const char *lev = tag.getAttribute("level");
+ int level = (lev) ? atoi(lev) : 1;
+
+ if ((!tag.isEndTag()) && (!tag.isEmpty())) {
+ /*buf += "{";*/
+
+ //alternate " and '
+ if (u->osisQToTick)
+ buf += (level % 2) ? '\"' : '\'';
+
+ if (who == "Jesus") {
+ buf += "<font color=\"red\"> ";
+ }
+ }
+ else if (tag.isEndTag()) {
+ //alternate " and '
+ if (u->osisQToTick)
+ buf += (level % 2) ? '\"' : '\'';
+ //buf += "</font>";
+ }
+ else { // empty quote marker
+ //alternate " and '
+ if (u->osisQToTick)
+ buf += (level % 2) ? '\"' : '\'';
+ }
+ }
+
+ // <transChange>
+ else if (!strcmp(tag.getName(), "transChange")) {
+ SWBuf type = tag.getAttribute("type");
+
+ if ((!tag.isEndTag()) && (!tag.isEmpty())) {
+
+// just do all transChange tags this way for now
+// if (type == "supplied")
+ buf += "<i>";
+ }
+ else if (tag.isEndTag()) {
+ buf += "</i>";
+ }
+ else { // empty transChange marker?
+ }
+ }
+
+ // image
+ else if (!strcmp(tag.getName(), "figure")) {
+ const char *src = tag.getAttribute("src");
+ if (!src) // assert we have a src attribute
+ return false;
+
+ char* filepath = new char[strlen(u->module->getConfigEntry("AbsoluteDataPath")) + strlen(token)];
+ *filepath = 0;
+ strcpy(filepath, userData->module->getConfigEntry("AbsoluteDataPath"));
+ strcat(filepath, src);
+
+// we do this because BibleCS looks for this EXACT format for an image tag
+ buf+="<image src=\"";
+ buf+=filepath;
+ buf+="\" />";
+/*
+ char imgc;
+ for (c = filepath + strlen(filepath); c > filepath && *c != '.'; c--);
+ c++;
+ FILE* imgfile;
+ if (stricmp(c, "jpg") || stricmp(c, "jpeg")) {
+ imgfile = fopen(filepath, "r");
+ if (imgfile != NULL) {
+ buf += "{\\nonshppict {\\pict\\jpegblip ";
+ while (feof(imgfile) != EOF) {
+ buf.appendFormatted("%2x", fgetc(imgfile));
+ }
+ fclose(imgfile);
+ buf += "}}";
+ }
+ }
+ else if (stricmp(c, "png")) {
+ buf += "{\\*\\shppict {\\pict\\pngblip ";
+
+ buf += "}}";
+ }
+*/
+ delete [] filepath;
+ }
+
+ else {
+ return false; // we still didn't handle token
+ }
+ }
+ return true;
+}
+
+
+SWORD_NAMESPACE_END
diff --git a/src/modules/filters/osislemma.cpp b/src/modules/filters/osislemma.cpp
new file mode 100644
index 0000000..e31c002
--- /dev/null
+++ b/src/modules/filters/osislemma.cpp
@@ -0,0 +1,78 @@
+/******************************************************************************
+ *
+ * osislemma - SWFilter descendant to hide or show lemmata
+ * in a OSIS module.
+ */
+
+
+#include <stdlib.h>
+#include <osislemma.h>
+#include <utilxml.h>
+#ifndef __GNUC__
+#else
+#include <unixstr.h>
+#endif
+
+SWORD_NAMESPACE_START
+
+const char oName[] = "Lemmas";
+const char oTip[] = "Toggles Lemmas On and Off if they exist";
+
+const SWBuf choices[3] = {"On", "Off", ""};
+const StringList oValues(&choices[0], &choices[2]);
+
+OSISLemma::OSISLemma() : SWOptionFilter(oName, oTip, &oValues) {
+ setOptionValue("Off");
+}
+
+
+OSISLemma::~OSISLemma() {
+}
+
+
+char OSISLemma::processText(SWBuf &text, const SWKey *key, const SWModule *module) {
+ if (!option) { // if we don't want lemmas
+ const char *from;
+ char token[2048]; // cheese. Fix.
+ int tokpos = 0;
+ bool intoken = false;
+ bool lastspace = false;
+ SWBuf orig = text;
+ from = orig.c_str();
+
+ for (text = ""; *from; from++) {
+ if (*from == '<') {
+ intoken = true;
+ tokpos = 0;
+ token[0] = 0;
+ continue;
+ }
+ if (*from == '>') { // process tokens
+ intoken = false;
+ XMLTag tag(token);
+ if ((!strcmp(tag.getName(), "w")) && (!tag.isEndTag())) { // Lemma
+ SWBuf lemma = tag.getAttribute("lemma");
+ if (lemma.length()) {
+ tag.setAttribute("lemma", 0);
+ tag.setAttribute("savlm", lemma.c_str());
+ }
+ }
+ // keep tag, possibly with the lemma removed
+ text += tag;
+ continue;
+ }
+ if (intoken) {
+ if (tokpos < 2045)
+ token[tokpos++] = *from;
+ token[tokpos] = 0;
+ }
+ else {
+ text += *from;
+ lastspace = (*from == ' ');
+ }
+ }
+ }
+ return 0;
+}
+
+SWORD_NAMESPACE_END
diff --git a/src/modules/filters/osismorph.cpp b/src/modules/filters/osismorph.cpp
new file mode 100644
index 0000000..1b2d116
--- /dev/null
+++ b/src/modules/filters/osismorph.cpp
@@ -0,0 +1,75 @@
+/******************************************************************************
+ *
+ * osismorph - SWFilter descendant to hide or show morph tags
+ * in a OSIS module.
+ */
+
+
+#include <stdlib.h>
+#include <osismorph.h>
+#include <utilxml.h>
+#ifndef __GNUC__
+#else
+#include <unixstr.h>
+#endif
+
+SWORD_NAMESPACE_START
+
+const char oName[] = "Morphological Tags";
+const char oTip[] = "Toggles Morphological Tags On and Off if they exist";
+
+const SWBuf choices[3] = {"On", "Off", ""};
+const StringList oValues(&choices[0], &choices[2]);
+
+OSISMorph::OSISMorph() : SWOptionFilter(oName, oTip, &oValues) {
+ setOptionValue("Off");
+}
+
+
+OSISMorph::~OSISMorph() {
+}
+
+
+char OSISMorph::processText(SWBuf &text, const SWKey *key, const SWModule *module) {
+ if (!option) { // if we don't want morph tags
+ const char *from;
+ char token[2048]; // cheese. Fix.
+ int tokpos = 0;
+ bool intoken = false;
+ bool lastspace = false;
+ SWBuf orig = text;
+ from = orig.c_str();
+
+ for (text = ""; *from; from++) {
+ if (*from == '<') {
+ intoken = true;
+ tokpos = 0;
+ token[0] = 0;
+ continue;
+ }
+ if (*from == '>') { // process tokens
+ intoken = false;
+ XMLTag tag(token);
+ if ((!strcmp(tag.getName(), "w")) && (!tag.isEndTag())) { // Morph
+ if (tag.getAttribute("morph"))
+ tag.setAttribute("morph", 0);
+ }
+ // keep tag, possibly with the morph removed
+ text += tag;
+ continue;
+ }
+ if (intoken) {
+ if (tokpos < 2045)
+ token[tokpos++] = *from;
+ token[tokpos] = 0;
+ }
+ else {
+ text += *from;
+ lastspace = (*from == ' ');
+ }
+ }
+ }
+ return 0;
+}
+
+SWORD_NAMESPACE_END
diff --git a/src/modules/filters/osisplain.cpp b/src/modules/filters/osisplain.cpp
new file mode 100644
index 0000000..7a12a27
--- /dev/null
+++ b/src/modules/filters/osisplain.cpp
@@ -0,0 +1,151 @@
+/***************************************************************************
+ osisplain.cpp - OSIS to Plaintext filter
+ -------------------
+ begin : 2003-02-15
+ copyright : 2003 by CrossWire Bible Society
+ ***************************************************************************/
+
+/***************************************************************************
+ * *
+ * This program is free software; you can redistribute it and/or modify *
+ * it under the terms of the GNU General Public License as published by *
+ * the Free Software Foundation; either version 2 of the License, or *
+ * (at your option) any later version. *
+ * *
+ ***************************************************************************/
+
+#include <stdlib.h>
+#include <osisplain.h>
+#include <utilxml.h>
+#include <ctype.h>
+
+SWORD_NAMESPACE_START
+
+OSISPlain::OSISPlain() {
+ setTokenStart("<");
+ setTokenEnd(">");
+
+ setEscapeStart("&");
+ setEscapeEnd(";");
+
+ setEscapeStringCaseSensitive(true);
+
+ addEscapeStringSubstitute("amp", "&");
+ addEscapeStringSubstitute("apos", "'");
+ addEscapeStringSubstitute("lt", "<");
+ addEscapeStringSubstitute("gt", ">");
+ addEscapeStringSubstitute("quot", "\"");
+
+
+ addTokenSubstitute("title", "\n");
+ addTokenSubstitute("/title", "\n");
+ addTokenSubstitute("/l", "\n");
+ addTokenSubstitute("lg", "\n");
+ addTokenSubstitute("/lg", "\n");
+
+ setTokenCaseSensitive(true);
+}
+
+
+bool OSISPlain::handleToken(SWBuf &buf, const char *token, BasicFilterUserData *userData) {
+ // manually process if it wasn't a simple substitution
+ if (!substituteToken(buf, token)) {
+ MyUserData *u = (MyUserData *)userData;
+ XMLTag tag(token);
+ if (((*token == 'w') && (token[1] == ' ')) ||
+ ((*token == '/') && (token[1] == 'w') && (!token[2]))) {
+ bool start = false;
+ if (*token == 'w') {
+ if (token[strlen(token)-1] != '/') {
+ u->w = token;
+ return true;
+ }
+ start = true;
+ }
+ tag = (start) ? token : u->w.c_str();
+ bool show = true; // to handle unplaced article in kjv2003-- temporary till combined
+
+ SWBuf lastText = (start) ? "stuff" : u->lastTextNode.c_str();
+
+ const char *attrib;
+ const char *val;
+ if (attrib = tag.getAttribute("xlit")) {
+ val = strchr(attrib, ':');
+ val = (val) ? (val + 1) : attrib;
+ buf.appendFormatted(" <%s>", val);
+ }
+ if (attrib = tag.getAttribute("gloss")) {
+ val = strchr(attrib, ':');
+ val = (val) ? (val + 1) : attrib;
+ buf.appendFormatted(" <%s>", val);
+ }
+ if (attrib = tag.getAttribute("lemma")) {
+ int count = tag.getAttributePartCount("lemma");
+ int i = (count > 1) ? 0 : -1; // -1 for whole value cuz it's faster, but does the same thing as 0
+ do {
+ attrib = tag.getAttribute("lemma", i);
+ if (i < 0) i = 0; // to handle our -1 condition
+ val = strchr(attrib, ':');
+ val = (val) ? (val + 1) : attrib;
+ if ((strchr("GH", *val)) && (isdigit(val[1])))
+ val++;
+ if ((!strcmp(val, "3588")) && (lastText.length() < 1))
+ show = false;
+ else buf.appendFormatted(" <%s>}", val);
+ } while (++i < count);
+ }
+ if ((attrib = tag.getAttribute("morph")) && (show)) {
+ int count = tag.getAttributePartCount("morph");
+ int i = (count > 1) ? 0 : -1; // -1 for whole value cuz it's faster, but does the same thing as 0
+ do {
+ attrib = tag.getAttribute("morph", i);
+ if (i < 0) i = 0; // to handle our -1 condition
+ val = strchr(attrib, ':');
+ val = (val) ? (val + 1) : attrib;
+ if ((*val == 'T') && (strchr("GH", val[1])) && (isdigit(val[2])))
+ val+=2;
+ buf.appendFormatted(" (%s)", val);
+ } while (++i < count);
+ }
+ if (attrib = tag.getAttribute("POS")) {
+ val = strchr(attrib, ':');
+ val = (val) ? (val + 1) : attrib;
+ buf.appendFormatted(" <%s>", val);
+ }
+ }
+
+ // <note> tag
+ else if (!strncmp(token, "note", 4)) {
+ if (!strstr(token, "strongsMarkup")) { // leave strong's markup notes out, in the future we'll probably have different option filters to turn different note types on or off
+ buf += " (";
+ }
+ else u->suspendTextPassThru = true;
+ }
+ else if (!strncmp(token, "/note", 5)) {
+ if (!u->suspendTextPassThru)
+ buf += ")";
+ else u->suspendTextPassThru = false;
+ }
+
+ // <p> paragraph tag
+ else if (((*token == 'p') && ((token[1] == ' ') || (!token[1]))) ||
+ ((*token == '/') && (token[1] == 'p') && (!token[2]))) {
+ userData->supressAdjacentWhitespace = true;
+ buf += "\n";
+ }
+
+ // <milestone type="line"/>
+ else if ((!strcmp(tag.getName(), "milestone")) && (tag.getAttribute("type")) && (!strcmp(tag.getAttribute("type"), "line"))) {
+ userData->supressAdjacentWhitespace = true;
+ buf += "\n";
+ }
+
+ else {
+ return false; // we still didn't handle token
+ }
+ }
+ return true;
+}
+
+
+SWORD_NAMESPACE_END
diff --git a/src/modules/filters/osisredletterwords.cpp b/src/modules/filters/osisredletterwords.cpp
new file mode 100644
index 0000000..a9a4834
--- /dev/null
+++ b/src/modules/filters/osisredletterwords.cpp
@@ -0,0 +1,77 @@
+/******************************************************************************
+ *
+ * OSISRedLetterWords - SWFilter descendant to toggle red coloring for words
+ * of Christ in an OSIS module.
+ */
+
+
+#include <stdlib.h>
+#include <osisredletterwords.h>
+#include <swmodule.h>
+#include <utilxml.h>
+#ifndef __GNUC__
+#else
+#include <unixstr.h>
+#endif
+
+SWORD_NAMESPACE_START
+
+const char oName[] = "Words of Christ in Red";
+const char oTip[] = "Toggles Red Coloring for Words of Christ On and Off if they are marked";
+
+const SWBuf choices[3] = {"On", "Off", ""};
+const StringList oValues(&choices[0], &choices[2]);
+
+OSISRedLetterWords::OSISRedLetterWords() : SWOptionFilter(oName, oTip, &oValues) {
+ setOptionValue("On");
+}
+
+
+OSISRedLetterWords::~OSISRedLetterWords() {
+}
+
+
+char OSISRedLetterWords::processText(SWBuf &text, const SWKey *key, const SWModule *module) {
+ SWBuf token;
+ bool intoken = false;
+ char buf[254];
+
+ SWBuf orig = text;
+ const char *from = orig.c_str();
+
+ if (!option)
+ for (text = ""; *from; from++) {
+ if (*from == '<') {
+ intoken = true;
+ token = "";
+ continue;
+ }
+ if (*from == '>') { // process tokens
+ intoken = false;
+
+ XMLTag tag(token);
+ if (!stricmp(tag.getName(), "q")) {
+ if ((tag.getAttribute("who")) && (!stricmp(tag.getAttribute("who"), "Jesus"))) {
+ tag.setAttribute("who", 0);
+ text += tag; // tag toString already has < and >
+ continue;
+ }
+ }
+ // if we haven't modified, still use original token instead of tag, so we don't reorder attribs and stuff. It doesn't really matter, but this is less intrusive to the original markup.
+ text += '<';
+ text.append(token);
+ text += '>';
+ continue;
+ }
+ if (intoken) { //copy token
+ token += *from;
+ }
+ else { //copy text which is not inside a token
+ text += *from;
+ }
+ }
+ return 0;
+}
+
+SWORD_NAMESPACE_END
+
diff --git a/src/modules/filters/osisrtf.cpp b/src/modules/filters/osisrtf.cpp
new file mode 100644
index 0000000..e94e8ae
--- /dev/null
+++ b/src/modules/filters/osisrtf.cpp
@@ -0,0 +1,333 @@
+/***************************************************************************
+ osisrtf.cpp - OSIS to RTF filter
+ -------------------
+ begin : 2003-02-15
+ copyright : 2003 by CrossWire Bible Society
+ ***************************************************************************/
+
+/***************************************************************************
+ * *
+ * This program is free software; you can redistribute it and/or modify *
+ * it under the terms of the GNU General Public License as published by *
+ * the Free Software Foundation; either version 2 of the License, or *
+ * (at your option) any later version. *
+ * *
+ ***************************************************************************/
+
+#include <stdlib.h>
+#include <osisrtf.h>
+#include <utilxml.h>
+#include <versekey.h>
+#include <swmodule.h>
+
+SWORD_NAMESPACE_START
+
+
+OSISRTF::MyUserData::MyUserData(const SWModule *module, const SWKey *key) : BasicFilterUserData(module, key) {
+ inXRefNote = false;
+ BiblicalText = false;
+ if (module) {
+ version = module->Name();
+ BiblicalText = (!strcmp(module->Type(), "Biblical Texts"));
+ }
+ osisQToTick = ((!module->getConfigEntry("OSISqToTick")) || (strcmp(module->getConfigEntry("OSISqToTick"), "false")));
+}
+
+
+OSISRTF::OSISRTF() {
+ setTokenStart("<");
+ setTokenEnd(">");
+
+ setEscapeStart("&");
+ setEscapeEnd(";");
+
+ setEscapeStringCaseSensitive(true);
+
+ addEscapeStringSubstitute("amp", "&");
+ addEscapeStringSubstitute("apos", "'");
+ addEscapeStringSubstitute("lt", "<");
+ addEscapeStringSubstitute("gt", ">");
+ addEscapeStringSubstitute("quot", "\"");
+ addTokenSubstitute("lg", "{\\par}");
+ addTokenSubstitute("/lg", "{\\par}");
+
+ setTokenCaseSensitive(true);
+}
+
+
+bool OSISRTF::handleToken(SWBuf &buf, const char *token, BasicFilterUserData *userData) {
+ // manually process if it wasn't a simple substitution
+ if (!substituteToken(buf, token)) {
+ MyUserData *u = (MyUserData *)userData;
+ XMLTag tag(token);
+
+ // <w> tag
+ if (!strcmp(tag.getName(), "w")) {
+
+ // start <w> tag
+ if ((!tag.isEmpty()) && (!tag.isEndTag())) {
+ buf += "{";
+ u->w = token;
+ }
+
+ // end or empty <w> tag
+ else {
+ bool endTag = tag.isEndTag();
+ SWBuf lastText;
+ bool show = true; // to handle unplaced article in kjv2003-- temporary till combined
+
+ if (endTag) {
+ tag = u->w.c_str();
+ lastText = u->lastTextNode.c_str();
+ }
+ else lastText = "stuff";
+
+ const char *attrib;
+ const char *val;
+ if (attrib = tag.getAttribute("xlit")) {
+ val = strchr(attrib, ':');
+ val = (val) ? (val + 1) : attrib;
+ buf.appendFormatted(" {\\fs15 <%s>}", val);
+ }
+ if (attrib = tag.getAttribute("gloss")) {
+ val = strchr(attrib, ':');
+ val = (val) ? (val + 1) : attrib;
+ buf.appendFormatted(" {\\fs15 <%s>}", val);
+ }
+ if (attrib = tag.getAttribute("lemma")) {
+ int count = tag.getAttributePartCount("lemma");
+ int i = (count > 1) ? 0 : -1; // -1 for whole value cuz it's faster, but does the same thing as 0
+ do {
+ attrib = tag.getAttribute("lemma", i);
+ if (i < 0) i = 0; // to handle our -1 condition
+ val = strchr(attrib, ':');
+ val = (val) ? (val + 1) : attrib;
+ if ((strchr("GH", *val)) && (isdigit(val[1])))
+ val++;
+ if ((!strcmp(val, "3588")) && (lastText.length() < 1))
+ show = false;
+ else buf.appendFormatted(" {\\cf3 \\sub <%s>}", val);
+ } while (++i < count);
+ }
+ if ((attrib = tag.getAttribute("morph")) && (show)) {
+ SWBuf savelemma = tag.getAttribute("savlm");
+ if ((strstr(savelemma.c_str(), "3588")) && (lastText.length() < 1))
+ show = false;
+ if (show) {
+ int count = tag.getAttributePartCount("morph");
+ int i = (count > 1) ? 0 : -1; // -1 for whole value cuz it's faster, but does the same thing as 0
+ do {
+ attrib = tag.getAttribute("morph", i);
+ if (i < 0) i = 0; // to handle our -1 condition
+ val = strchr(attrib, ':');
+ val = (val) ? (val + 1) : attrib;
+ if ((*val == 'T') && (strchr("GH", val[1])) && (isdigit(val[2])))
+ val+=2;
+ buf.appendFormatted(" {\\cf4 \\sub (%s)}", val);
+ } while (++i < count);
+ }
+ }
+ if (attrib = tag.getAttribute("POS")) {
+ val = strchr(attrib, ':');
+ val = (val) ? (val + 1) : attrib;
+ buf.appendFormatted(" {\\fs15 <%s>}", val);
+ }
+
+ if (endTag)
+ buf += "}";
+ }
+ }
+
+ // <note> tag
+ else if (!strcmp(tag.getName(), "note")) {
+ if (!tag.isEndTag()) {
+ if (!tag.isEmpty()) {
+ SWBuf type = tag.getAttribute("type");
+
+ if (type != "strongsMarkup") { // leave strong's markup notes out, in the future we'll probably have different option filters to turn different note types on or off
+ SWBuf footnoteNumber = tag.getAttribute("swordFootnote");
+ VerseKey *vkey;
+ // see if we have a VerseKey * or descendant
+ try {
+ vkey = SWDYNAMIC_CAST(VerseKey, u->key);
+ }
+ catch ( ... ) { }
+ if (vkey) {
+ char ch = ((!strcmp(type.c_str(), "crossReference")) || (!strcmp(type.c_str(), "x-cross-ref"))) ? 'x':'n';
+ buf.appendFormatted("{\\super <a href=\"\">*%c%i.%s</a>} ", ch, vkey->Verse(), footnoteNumber.c_str());
+ u->inXRefNote = (ch == 'x');
+ }
+ }
+ u->suspendTextPassThru = true;
+ }
+ }
+ if (tag.isEndTag()) {
+ u->suspendTextPassThru = false;
+ u->inXRefNote = false;
+ }
+ }
+
+ // <p> paragraph tag
+ else if (!strcmp(tag.getName(), "p")) {
+ if ((!tag.isEndTag()) && (!tag.isEmpty())) { // non-empty start tag
+ buf += "{\\par ";
+ }
+ else if (tag.isEndTag()) { // end tag
+ buf += "\\par}";
+ userData->supressAdjacentWhitespace = true;
+ }
+ else { // empty paragraph break marker
+ buf += "{\\par\\par}";
+ userData->supressAdjacentWhitespace = true;
+ }
+ }
+
+ // <reference> tag
+ else if (!strcmp(tag.getName(), "reference")) {
+ if (!u->inXRefNote) { // only show these if we're not in an xref note
+ if ((!tag.isEndTag()) && (!tag.isEmpty())) {
+ buf += "{<a href=\"\">";
+ }
+ else if (tag.isEndTag()) {
+ buf += "</a>}";
+ }
+ }
+ }
+
+ // <l> poetry
+ else if (!strcmp(tag.getName(), "l")) {
+ if ((!tag.isEndTag()) && (!tag.isEmpty())) {
+ buf += "{";
+ }
+ else if (tag.isEndTag()) {
+ buf += "\\par}";
+ }
+ else if (tag.getAttribute("sID")) { // empty line marker
+ buf += "{\\par}";
+ }
+ }
+
+ // <milestone type="line"/>
+ else if ((!strcmp(tag.getName(), "milestone")) && (tag.getAttribute("type")) && (!strcmp(tag.getAttribute("type"), "line"))) {
+ buf += "{\\par}";
+ userData->supressAdjacentWhitespace = true;
+ }
+
+ // <title>
+ else if (!strcmp(tag.getName(), "title")) {
+ if ((!tag.isEndTag()) && (!tag.isEmpty())) {
+ buf += "{\\par\\i1\\b1 ";
+ }
+ else if (tag.isEndTag()) {
+ buf += "\\par}";
+ }
+ }
+
+ // <hi>
+ else if (!strcmp(tag.getName(), "hi")) {
+ SWBuf type = tag.getAttribute("type");
+ if ((!tag.isEndTag()) && (!tag.isEmpty())) {
+ if (type == "b" || type == "x-b")
+ buf += "{\\b1 ";
+ else // all other types
+ buf += "{\\i1 ";
+ }
+ else if (tag.isEndTag()) {
+ buf += "}";
+ }
+ }
+
+ // <q> quote
+ else if (!strcmp(tag.getName(), "q")) {
+ SWBuf type = tag.getAttribute("type");
+ SWBuf who = tag.getAttribute("who");
+ const char *lev = tag.getAttribute("level");
+ int level = (lev) ? atoi(lev) : 1;
+
+ if ((!tag.isEndTag()) && (!tag.isEmpty())) {
+ buf += "{";
+
+ //alternate " and '
+ if (u->osisQToTick)
+ buf += (level % 2) ? '\"' : '\'';
+
+ if (who == "Jesus")
+ buf += "\\cf6 ";
+ }
+ else if (tag.isEndTag()) {
+ //alternate " and '
+ if (u->osisQToTick)
+ buf += (level % 2) ? '\"' : '\'';
+ buf += "}";
+ }
+ else { // empty quote marker
+ //alternate " and '
+ if (u->osisQToTick)
+ buf += (level % 2) ? '\"' : '\'';
+ }
+ }
+
+ // <transChange>
+ else if (!strcmp(tag.getName(), "transChange")) {
+ SWBuf type = tag.getAttribute("type");
+
+ if ((!tag.isEndTag()) && (!tag.isEmpty())) {
+
+// just do all transChange tags this way for now
+// if (type == "supplied")
+ buf += "{\\i1 ";
+ }
+ else if (tag.isEndTag()) {
+ buf += "}";
+ }
+ }
+
+ // image
+ else if (!strcmp(tag.getName(), "figure")) {
+ const char *src = tag.getAttribute("src");
+ if (!src) // assert we have a src attribute
+ return false;
+
+ char* filepath = new char[strlen(u->module->getConfigEntry("AbsoluteDataPath")) + strlen(token)];
+ *filepath = 0;
+ strcpy(filepath, userData->module->getConfigEntry("AbsoluteDataPath"));
+ strcat(filepath, src);
+
+// we do this because BibleCS looks for this EXACT format for an image tag
+ buf+="<img src=\"";
+ buf+=filepath;
+ buf+="\" />";
+/*
+ char imgc;
+ for (c = filepath + strlen(filepath); c > filepath && *c != '.'; c--);
+ c++;
+ FILE* imgfile;
+ if (stricmp(c, "jpg") || stricmp(c, "jpeg")) {
+ imgfile = fopen(filepath, "r");
+ if (imgfile != NULL) {
+ buf += "{\\nonshppict {\\pict\\jpegblip ";
+ while (feof(imgfile) != EOF) {
+ buf.appendFormatted("%2x", fgetc(imgfile));
+ }
+ fclose(imgfile);
+ buf += "}}";
+ }
+ }
+ else if (stricmp(c, "png")) {
+ buf += "{\\*\\shppict {\\pict\\pngblip ";
+
+ buf += "}}";
+ }
+*/
+ delete [] filepath;
+ }
+ else {
+ return false; // we still didn't handle token
+ }
+ }
+ return true;
+}
+
+
+SWORD_NAMESPACE_END
+
diff --git a/src/modules/filters/osisscripref.cpp b/src/modules/filters/osisscripref.cpp
new file mode 100644
index 0000000..566e08a
--- /dev/null
+++ b/src/modules/filters/osisscripref.cpp
@@ -0,0 +1,104 @@
+/******************************************************************************
+ *
+ * OSISScripref - SWFilter descendant to hide or show scripture references
+ * in an OSIS module.
+ */
+
+
+#include <stdlib.h>
+#include <osisscripref.h>
+#include <swmodule.h>
+#include <utilxml.h>
+#ifndef __GNUC__
+#else
+#include <unixstr.h>
+#endif
+
+SWORD_NAMESPACE_START
+
+const char oName[] = "Cross-references";
+const char oTip[] = "Toggles Scripture Cross-references On and Off if they exist";
+
+const SWBuf choices[3] = {"On", "Off", ""};
+const StringList oValues(&choices[0], &choices[2]);
+
+OSISScripref::OSISScripref() : SWOptionFilter(oName, oTip, &oValues) {
+ setOptionValue("On");
+}
+
+
+OSISScripref::~OSISScripref() {
+}
+
+
+char OSISScripref::processText(SWBuf &text, const SWKey *key, const SWModule *module) {
+ SWBuf token;
+ bool intoken = false;
+ bool hide = false;
+ SWBuf tagText;
+ XMLTag startTag;
+ int tagTextNum = 1;
+ int footnoteNum = 1;
+ char buf[254];
+
+ SWBuf orig = text;
+ const char *from = orig.c_str();
+
+ for (text = ""; *from; from++) {
+ if (*from == '<') {
+ intoken = true;
+ token = "";
+ continue;
+ }
+ if (*from == '>') { // process tokens
+ intoken = false;
+
+ XMLTag tag(token);
+ if (!strcmp(tag.getName(), "note")) {
+ if (!tag.isEndTag() && (!tag.isEmpty())) {
+ startTag = tag;
+ if ((tag.getAttribute("type")) && (!strcmp(tag.getAttribute("type"), "crossReference"))) {
+ hide = true;
+ tagText = "";
+ if (option) { // we want the tag in the text
+ text += '<';
+ text.append(token);
+ text += '>';
+ }
+ continue;
+ }
+ }
+ if (hide && tag.isEndTag()) {
+ hide = false;
+ if (option) { // we want the tag in the text
+ text.append(tagText); // end tag gets added further down
+ }
+ else continue; // don't let the end tag get added to the text
+ }
+ }
+
+ // if not a heading token, keep token in text
+ if (!hide) {
+ text += '<';
+ text.append(token);
+ text += '>';
+ }
+ else {
+ tagText += '<';
+ tagText.append(token);
+ tagText += '>';
+ }
+ continue;
+ }
+ if (intoken) { //copy token
+ token += *from;
+ }
+ else if (!hide) { //copy text which is not inside a token
+ text += *from;
+ }
+ else tagText += *from;
+ }
+ return 0;
+}
+
+SWORD_NAMESPACE_END
diff --git a/src/modules/filters/osisstrongs.cpp b/src/modules/filters/osisstrongs.cpp
new file mode 100644
index 0000000..7949b50
--- /dev/null
+++ b/src/modules/filters/osisstrongs.cpp
@@ -0,0 +1,128 @@
+/******************************************************************************
+ *
+ * osisstrongs - SWFilter descendant to hide or show strongs number
+ * in a OSIS module.
+ */
+
+
+#include <stdlib.h>
+#include <osisstrongs.h>
+#include <swmodule.h>
+#ifndef __GNUC__
+#else
+#include <unixstr.h>
+#endif
+#include <ctype.h>
+
+SWORD_NAMESPACE_START
+
+const char oName[] = "Strong's Numbers";
+const char oTip[] = "Toggles Strong's Numbers On and Off if they exist";
+
+const SWBuf choices[3] = {"On", "Off", ""};
+const StringList oValues(&choices[0], &choices[2]);
+
+
+OSISStrongs::OSISStrongs() : SWOptionFilter(oName, oTip, &oValues) {
+ setOptionValue("Off");
+}
+
+
+OSISStrongs::~OSISStrongs() {
+}
+
+
+char OSISStrongs::processText(SWBuf &text, const SWKey *key, const SWModule *module) {
+ const char *from;
+ char token[2048]; // cheese. Fix.
+ int tokpos = 0;
+ bool intoken = false;
+ int len;
+ bool lastspace = false;
+ int word = 1;
+ char val[128];
+ char wordstr[5];
+ char *valto;
+ char *ch;
+ unsigned int textStart = 0, textEnd = 0;
+ bool newText = false;
+ SWBuf tmp;
+
+ SWBuf orig = text;
+ from = orig.c_str();
+
+ len = strlen(text) + 1; // shift string to right of buffer
+
+ for (text = ""; *from; from++) {
+ if (*from == '<') {
+ intoken = true;
+ tokpos = 0;
+ token[0] = 0;
+ token[1] = 0;
+ token[2] = 0;
+ textEnd = text.size();
+ continue;
+ }
+ if (*from == '>') { // process tokens
+ intoken = false;
+ if ((*token == 'w') && (token[1] == ' ')) { // Word
+ if (module->isProcessEntryAttributes()) {
+ valto = val;
+ char *num = strstr(token, "lemma=\"x-Strongs:");
+ if (num) {
+ for (num+=17; ((*num) && (*num != '\"')); num++)
+ *valto++ = *num;
+ *valto = 0;
+ if (atoi((!isdigit(*val))?val+1:val) < 5627) {
+ // normal strongs number
+ sprintf(wordstr, "%03d", word++);
+ module->getEntryAttributes()["Word"][wordstr]["Strongs"] = val;
+ tmp = "";
+ tmp.append(text.c_str()+textStart, (int)(textEnd - textStart));
+ module->getEntryAttributes()["Word"][wordstr]["Text"] = tmp;
+ newText = true;
+ }
+ else {
+ // verb morph
+ sprintf(wordstr, "%03d", word-1);
+ module->getEntryAttributes()["Word"][wordstr]["Morph"] = val;
+ }
+ }
+ }
+ if (!option) {
+ char *num = strstr(token, "lemma=\"x-Strongs:");
+ if (num) {
+ memcpy(num, "savlm", 5);
+/*
+ for (int i = 0; i < 17; i++)
+ *num++ = ' ';
+ for (; ((*num) && (*num!='\"')); num++)
+ *num = ' ';
+ if (*num)
+ *num = ' ';
+*/
+ }
+ }
+ }
+ // if not a strongs token, keep token in text
+ text += '<';
+ for (char *tok = token; *tok; tok++)
+ text += *tok;
+ text += '>';
+ if (newText) {textStart = text.size(); newText = false; }
+ continue;
+ }
+ if (intoken) {
+ if (tokpos < 2045)
+ token[tokpos++] = *from;
+ token[tokpos+2] = 0;
+ }
+ else {
+ text += *from;
+ lastspace = (*from == ' ');
+ }
+ }
+ return 0;
+}
+
+SWORD_NAMESPACE_END
diff --git a/src/modules/filters/osiswebif.cpp b/src/modules/filters/osiswebif.cpp
new file mode 100644
index 0000000..f2db7b7
--- /dev/null
+++ b/src/modules/filters/osiswebif.cpp
@@ -0,0 +1,212 @@
+/***************************************************************************
+ OSISWEBIF.cpp - OSIS to HTML filter with hrefs
+ for strongs and morph tags
+ -------------------
+ begin : 2003-10-23
+ copyright : 2003 by CrossWire Bible Society
+ ***************************************************************************/
+
+/***************************************************************************
+ * *
+ * This program is free software; you can redistribute it and/or modify *
+ * it under the terms of the GNU General Public License as published by *
+ * the Free Software Foundation; either version 2 of the License, or *
+ * (at your option) any later version. *
+ * *
+ ***************************************************************************/
+
+#include <stdlib.h>
+#include <osiswebif.h>
+#include <utilxml.h>
+#include <versekey.h>
+#include <swmodule.h>
+#include <ctype.h>
+
+
+SWORD_NAMESPACE_START
+
+
+OSISWEBIF::OSISWEBIF() : baseURL(""), passageStudyURL(baseURL + "passagestudy.jsp") {
+}
+
+bool OSISWEBIF::handleToken(SWBuf &buf, const char *token, BasicFilterUserData *userData) {
+ // manually process if it wasn't a simple substitution
+ if (!substituteToken(buf, token)) {
+ MyUserData *u = (MyUserData *)userData;
+ XMLTag tag(token);
+
+ // <w> tag
+ if (!strcmp(tag.getName(), "w")) {
+
+ // start <w> tag
+ if ((!tag.isEmpty()) && (!tag.isEndTag())) {
+ u->w = token;
+ }
+
+ // end or empty <w> tag
+ else {
+ bool endTag = tag.isEndTag();
+ SWBuf lastText;
+ bool show = true; // to handle unplaced article in kjv2003-- temporary till combined
+
+ if (endTag) {
+ tag = u->w.c_str();
+ lastText = u->lastTextNode.c_str();
+ }
+ else lastText = "stuff";
+
+ const char *attrib;
+ const char *val;
+ if (attrib = tag.getAttribute("xlit")) {
+ val = strchr(attrib, ':');
+ val = (val) ? (val + 1) : attrib;
+ buf.appendFormatted(" %s", val);
+ }
+ if (attrib = tag.getAttribute("gloss")) {
+ val = strchr(attrib, ':');
+ val = (val) ? (val + 1) : attrib;
+ buf.appendFormatted(" %s", val);
+ }
+ if (attrib = tag.getAttribute("lemma")) {
+ int count = tag.getAttributePartCount("lemma");
+ int i = (count > 1) ? 0 : -1; // -1 for whole value cuz it's faster, but does the same thing as 0
+ do {
+ attrib = tag.getAttribute("lemma", i);
+ if (i < 0) i = 0; // to handle our -1 condition
+ val = strchr(attrib, ':');
+ val = (val) ? (val + 1) : attrib;
+ const char *val2 = val;
+ if ((strchr("GH", *val)) && (isdigit(val[1])))
+ val2++;
+ if ((!strcmp(val2, "3588")) && (lastText.length() < 1))
+ show = false;
+ else buf.appendFormatted(" <small><em>&lt;<a href=\"%s?showStrong=%s#cv\">%s</a>&gt;</em></small> ", passageStudyURL.c_str(), encodeURL(val2).c_str(), val2);
+ } while (++i < count);
+ }
+ if ((attrib = tag.getAttribute("morph")) && (show)) {
+ SWBuf savelemma = tag.getAttribute("savlm");
+ if ((strstr(savelemma.c_str(), "3588")) && (lastText.length() < 1))
+ show = false;
+ if (show) {
+ int count = tag.getAttributePartCount("morph");
+ int i = (count > 1) ? 0 : -1; // -1 for whole value cuz it's faster, but does the same thing as 0
+ do {
+ attrib = tag.getAttribute("morph", i);
+ if (i < 0) i = 0; // to handle our -1 condition
+ val = strchr(attrib, ':');
+ val = (val) ? (val + 1) : attrib;
+ const char *val2 = val;
+ if ((*val == 'T') && (strchr("GH", val[1])) && (isdigit(val[2])))
+ val2+=2;
+ buf.appendFormatted(" <small><em>(<a href=\"%s?showMorph=%s#cv\">%s</a>)</em></small> ", passageStudyURL.c_str(), encodeURL(val2).c_str(), val2);
+ } while (++i < count);
+ }
+ }
+ if (attrib = tag.getAttribute("POS")) {
+ val = strchr(attrib, ':');
+ val = (val) ? (val + 1) : attrib;
+ buf.appendFormatted(" %s", val);
+ }
+
+ /*if (endTag)
+ buf += "}";*/
+ }
+ }
+
+ // <note> tag
+ else if (!strcmp(tag.getName(), "note")) {
+ if (!tag.isEndTag()) {
+ if (!tag.isEmpty()) {
+ SWBuf type = tag.getAttribute("type");
+
+ if (type != "strongsMarkup") { // leave strong's markup notes out, in the future we'll probably have different option filters to turn different note types on or off
+ SWBuf footnoteNumber = tag.getAttribute("swordFootnote");
+ VerseKey *vkey;
+ // see if we have a VerseKey * or descendant
+ try {
+ vkey = SWDYNAMIC_CAST(VerseKey, u->key);
+ }
+ catch ( ... ) { }
+ if (vkey) {
+ char ch = ((tag.getAttribute("type") && ((!strcmp(tag.getAttribute("type"), "crossReference")) || (!strcmp(tag.getAttribute("type"), "x-cross-ref")))) ? 'x':'n');
+// buf.appendFormatted("<a href=\"noteID=%s.%c.%s\"><small><sup>*%c</sup></small></a> ", vkey->getText(), ch, footnoteNumber.c_str(), ch);
+ }
+ }
+ u->suspendTextPassThru = true;
+ }
+ }
+ if (tag.isEndTag()) {
+ u->suspendTextPassThru = false;
+ }
+ }
+ // <title>
+ else if (!strcmp(tag.getName(), "title")) {
+ if ((!tag.isEndTag()) && (!tag.isEmpty())) {
+ buf += "<h3>";
+ }
+ else if (tag.isEndTag()) {
+ buf += "</h3>";
+ }
+ }
+
+ // <q> quote
+ else if (!strcmp(tag.getName(), "q")) {
+ SWBuf type = tag.getAttribute("type");
+ SWBuf who = tag.getAttribute("who");
+ const char *lev = tag.getAttribute("level");
+ int level = (lev) ? atoi(lev) : 1;
+
+ if ((!tag.isEndTag()) && (!tag.isEmpty())) {
+ /*buf += "{";*/
+
+ //alternate " and '
+ if (u->osisQToTick)
+ buf += (level % 2) ? '\"' : '\'';
+
+ if (who == "Jesus") {
+ buf += "<span class=\"wordsOfJesus\"> ";
+ }
+ }
+ else if (tag.isEndTag()) {
+ //alternate " and '
+ if (u->osisQToTick)
+ buf += (level % 2) ? '\"' : '\'';
+ buf += "</span>";
+ }
+ else { // empty quote marker
+ //alternate " and '
+ if (u->osisQToTick)
+ buf += (level % 2) ? '\"' : '\'';
+ }
+ }
+
+ // <transChange>
+ else if (!strcmp(tag.getName(), "transChange")) {
+ if ((!tag.isEndTag()) && (!tag.isEmpty())) {
+ SWBuf type = tag.getAttribute("type");
+ u->lastTransChange = type;
+
+// just do all transChange tags this way for now
+ if ((type == "added") || (type == "supplied"))
+ buf += "<i>";
+ else if (type == "tenseChange")
+ buf += "*";
+ }
+ else if (tag.isEndTag()) {
+ SWBuf type = u->lastTransChange;
+ if ((type == "added") || (type == "supplied"))
+ buf += "</i>";
+ }
+ else { // empty transChange marker?
+ }
+ }
+ else {
+ return OSISHTMLHREF::handleToken(buf, token, userData);
+ }
+ }
+ return true;
+}
+
+
+SWORD_NAMESPACE_END
+
diff --git a/src/modules/filters/plainfootnotes.cpp b/src/modules/filters/plainfootnotes.cpp
index 96fc4d8..0ae50ef 100644
--- a/src/modules/filters/plainfootnotes.cpp
+++ b/src/modules/filters/plainfootnotes.cpp
@@ -19,84 +19,65 @@
#include <swkey.h>
#include <stdlib.h>
-#include <string.h>
#ifndef __GNUC__
#else
#include <unixstr.h>
#endif
-const char PLAINFootnotes::on[] = "On";
-const char PLAINFootnotes::off[] = "Off";
-const char PLAINFootnotes::optName[] = "Footnotes";
-const char PLAINFootnotes::optTip[] = "Toggles Footnotes On and Off In Bible Texts If They Exist";
+SWORD_NAMESPACE_START
-PLAINFootnotes::PLAINFootnotes(){
- option = false;
- options.push_back(on);
- options.push_back(off);
-}
-
-PLAINFootnotes::~PLAINFootnotes(){
-}
+const char oName[] = "Footnotes";
+const char oTip[] = "Toggles Footnotes On and Off In Bible Texts If They Exist";
+const SWBuf choices[3] = {"On", "Off", ""};
+const StringList oValues(&choices[0], &choices[2]);
-void PLAINFootnotes::setOptionValue(const char *ival)
-{
- option = (!stricmp(ival, on));
+PLAINFootnotes::PLAINFootnotes() : SWOptionFilter(oName, oTip, &oValues) {
+ setOptionValue("Off");
}
-const char *PLAINFootnotes::getOptionValue()
-{
- return (option) ? on:off;
+PLAINFootnotes::~PLAINFootnotes(){
}
-char PLAINFootnotes::ProcessText(char *text, int maxlen, const SWKey *key, const SWModule *module)
-{
- char token[2048];
- int tokpos = 0;
- bool intoken = false;
- bool lastspace = false;
-
+char PLAINFootnotes::processText(SWBuf &text, const SWKey *key, const SWModule *module) {
if (!option) { // if we don't want footnotes
- char *to, *from;
- int len;
- bool hide = false;
+ //char token[2048];
+ //SWBuf token;
+ //int tokpos = 0;
+ //bool intoken = false;
+ //bool lastspace = false;
- len = strlen(text) + 1; // shift string to right of buffer
- if (len < maxlen)
- {
- memmove(&text[maxlen - len], text, len);
- from = &text[maxlen - len];
- }
- else from = text; // -------------------------------
+ bool hide = false;
- for (to = text; *from; from++) {
- if (*from == '{') // Footnote start
+ SWBuf orig = text;
+ const char *from = orig.c_str();
+ for (text = ""; *from; from++) {
+ if (*from == '{') // Footnote start
{
hide = true;
continue;
}
- if (*from == '}') // Footnote end
+ else if (*from == '}') // Footnote end
{
- hide=false;
+ hide = false;
continue;
}
- if (intoken) {
- if (tokpos < 2045)
- token[tokpos++] = *from;
- token[tokpos+2] = 0;
- }
- else {
+
+ //if (intoken) {
+ //if (tokpos < 2045)
+ // token += *from;
+ // token[tokpos+2] = 0;
+ //}
+ //else {
if (!hide) {
- *to++ = *from;
- lastspace = (*from == ' ');
+ text = *from;
+ //lastspace = (*from == ' ');
}
- }
+ //}
}
- *to++ = 0;
- *to = 0;
}
return 0;
}
+SWORD_NAMESPACE_END
diff --git a/src/modules/filters/plainhtml.cpp b/src/modules/filters/plainhtml.cpp
index fefb029..19f4bc2 100644
--- a/src/modules/filters/plainhtml.cpp
+++ b/src/modules/filters/plainhtml.cpp
@@ -15,120 +15,56 @@
* *
***************************************************************************/
-#include <stdlib.h>
-#include <string.h>
#include <plainhtml.h>
+SWORD_NAMESPACE_START
PLAINHTML::PLAINHTML()
{
}
-char PLAINHTML::ProcessText(char *text, int maxlen, const SWKey *key, const SWModule *module)
+char PLAINHTML::processText(SWBuf &text, const SWKey *key, const SWModule *module)
{
- char *to, *from;
- int len;
int count = 0;
- len = strlen(text) + 1; // shift string to right of buffer
- if (len < maxlen) {
- memmove(&text[maxlen - len], text, len);
- from = &text[maxlen - len];
- }
- else from = text; // -------------------------------
- for (to = text; *from; from++)
+ SWBuf orig = text;
+ const char *from = orig.c_str();
+ for (text = ""; *from; from++)
{
- if ((*from == '\n') && (from[1] == '\n')) // paragraph
+ if ((*from == '\n') && (from[1] == '\n')) // two newlinea are a paragraph
{
- *to++ = '<';
- *to++ = 'P';
- *to++ = '>';
+ text += "<P>";
from++;
continue;
} else {
- if ((*from == '\n')) // && (from[1] != '\n')) // new line
+ if ((*from == '\n')) // && (from[1] != '\n')) // only one new line
{
- *to++ = '<';
- *to++ = 'B';
- *to++ = 'R';
- *to++ = '>';
+ text += "<BR>";
continue;
}
}
- if (*from == '{') {
- *to++ = '<';
- *to++ = 'F';
- *to++ = 'O';
- *to++ = 'N';
- *to++ = 'T';
- *to++ = ' ';
- *to++ = 'C';
- *to++ = 'O';
- *to++ = 'L';
- *to++ = 'O';
- *to++ = 'R';
- *to++ = '=';
- *to++ = '#';
- *to++ = '8';
- *to++ = '0';
- *to++ = '0';
- *to++ = '0';
- *to++ = '0';
- *to++ = '0';
- *to++ = '>';
-
- *to++ = '<';
- *to++ = 'S';
- *to++ = 'M';
- *to++ = 'A';
- *to++ = 'L';
- *to++ = 'L';
- *to++ = '>';
- *to++ = ' ';
- *to++ = '(';
+ if (*from == '{') { //footnote start
+ text += "<FONT COLOR=\"#80000\"><SMALL> (";
continue;
}
-
- if (*from == '}')
+ else if (*from == '}') //footnote end
{
- *to++ = ')';
- *to++ = ' ';
- *to++ = '<';
- *to++ = '/';
- *to++ = 'S';
- *to++ = 'M';
- *to++ = 'A';
- *to++ = 'L';
- *to++ = 'L';
- *to++ = '>';
-
- *to++ = '<';
- *to++ = '/';
- *to++ = 'F';
- *to++ = 'O';
- *to++ = 'N';
- *to++ = 'T';
- *to++ = '>';
+ text += ") </SMALL></FONT>";
continue;
}
-
- if ((*from == ' ') && (count > 5000))
+ else if ((*from == ' ') && (count > 5000))
{
- *to++ = '<';
- *to++ = 'W';
- *to++ = 'B';
- *to++ = 'R';
- *to++ = '>';
+ text += "<WBR>";
count = 0;
continue;
}
- *to++ = *from;
+ text += *from;
count++;
}
- *to++ = 0;
- *to = 0;
return 0;
}
+
+SWORD_NAMESPACE_END
diff --git a/src/modules/filters/rtfhtml.cpp b/src/modules/filters/rtfhtml.cpp
index f0b842b..6b228fb 100644
--- a/src/modules/filters/rtfhtml.cpp
+++ b/src/modules/filters/rtfhtml.cpp
@@ -16,54 +16,41 @@
***************************************************************************/
#include <stdlib.h>
-#include <string.h>
#include <rtfhtml.h>
+SWORD_NAMESPACE_START
RTFHTML::RTFHTML() {
}
-char RTFHTML::ProcessText(char *text, int maxlen, const SWKey *key, const SWModule *module)
+char RTFHTML::processText(SWBuf &text, const SWKey *key, const SWModule *module)
{
- char *to, *from;
- int len;
bool center = false;
- len = strlen(text) + 1; // shift string to right of buffer
- if (len < maxlen) {
- memmove(&text[maxlen - len], text, len);
- from = &text[maxlen - len];
- }
- else from = text; // -------------------------------
- for (to = text; *from; from++) {
+ const char *from;
+ SWBuf orig = text;
+ from = orig.c_str();
+ for (text = ""; *from; from++)
+ {
if (*from == '\\') // a RTF command
{
- if ((from[1] == 'p') && (from[2] == 'a') && (from[3] == 'r') && (from[4] == 'd'))
- { // switch all modifier off
+ if ( !strncmp(from+1, "pard", 4) )
+ //(from[1] == 'p') && (from[2] == 'a') && (from[3] == 'r') && (from[4] == 'd'))
+ { // switch all modifiers off
if (center)
{
- *to++ = '<';
- *to++ = '/';
- *to++ = 'C';
- *to++ = 'E';
- *to++ = 'N';
- *to++ = 'T';
- *to++ = 'E';
- *to++ = 'R';
- *to++ = '>';
+ text += "</CENTER>";
center = false;
}
from += 4;
continue;
}
- if ((from[1] == 'p') && (from[2] == 'a') && (from[3] == 'r'))
+ if ( !strncmp(from+1, "par", 3) )
+ //(from[1] == 'p') && (from[2] == 'a') && (from[3] == 'r'))
{
- *to++ = '<';
- *to++ = 'P';
- *to++ = '>';
- *to++ = '\n';
+ text += "<P>\n";
from += 3;
continue;
}
@@ -72,18 +59,12 @@ char RTFHTML::ProcessText(char *text, int maxlen, const SWKey *key, const SWModu
from += 1;
continue;
}
- if ((from[1] == 'q') && (from[2] == 'c')) // center on
+ if ( !strncmp(from+1, "qc", 2) )
+ //(from[1] == 'q') && (from[2] == 'c')) // center on
{
if (!center)
{
- *to++ = '<';
- *to++ = 'C';
- *to++ = 'E';
- *to++ = 'N';
- *to++ = 'T';
- *to++ = 'E';
- *to++ = 'R';
- *to++ = '>';
+ text += "<CENTER>";
center = true;
}
from += 2;
@@ -91,9 +72,9 @@ char RTFHTML::ProcessText(char *text, int maxlen, const SWKey *key, const SWModu
}
}
- *to++ = *from;
+ text += *from;
}
- *to++ = 0;
- *to = 0;
return 0;
}
+
+SWORD_NAMESPACE_END
diff --git a/src/modules/filters/rwphtml.cpp b/src/modules/filters/rwphtml.cpp
deleted file mode 100644
index 6f8ae4f..0000000
--- a/src/modules/filters/rwphtml.cpp
+++ /dev/null
@@ -1,187 +0,0 @@
-/***************************************************************************
- rwphtml.cpp - description
- -------------------
- begin : Thu Jun 24 1999
- copyright : (C) 1999 by Torsten Uhlmann
- email : TUhlmann@gmx.de
- ***************************************************************************/
-
-/***************************************************************************
- * *
- * This program is free software; you can redistribute it and/or modify *
- * it under the terms of the GNU General Public License as published by *
- * the Free Software Foundation; either version 2 of the License, or *
- * (at your option) any later version. *
- * *
- ***************************************************************************/
-
-#include <stdlib.h>
-#include <string.h>
-#include <ctype.h>
-#include <rwphtml.h>
-
-RWPHTML::RWPHTML()
-{
-}
-
-
-char RWPHTML::ProcessText(char *text, int maxlen, const SWKey *key, const SWModule *module)
-{
- char *to, *from;
- signed char greek_str[500];
- bool inverse = false;
- bool first_letter = false;
- int len;
-
- len = strlen(text) + 1; // shift string to right of buffer
- if (len < maxlen) {
- memmove(&text[maxlen - len], text, len);
- from = &text[maxlen - len];
- } else
- from = text;
- for (to = text; *from; from++) {
- if (*from == '\\') {
- ++from;
- int i=0;
- first_letter = true;
- greek_str[0] = '\0';
- while (*from != '\\') { /* get the greek word or phrase */
- greek_str[i++] = *from;
- greek_str[i + 1] = '\0';
- from++;
- } /* convert to symbol font as best we can */
- strcpy(to,"<I> </I><FONT FACE=\"symbol\">");
- to += strlen(to);
- for (int j = 0; j < i; j++) {
- if ((first_letter)
- && (greek_str[j] == 'h')) {
- if (greek_str[j + 1] == 'o') {
- *to++ = 'o';
- first_letter = false;
- ++j;
- continue;
- } else if (greek_str[j + 1] == 'a') {
- *to++ = 'a';
- first_letter = false;
- ++j;
- continue;
- } else if (greek_str[j + 1] == 'w') {
- *to++ = 'w';
- first_letter = false;
- ++j;
- continue;
- } else if (greek_str[j + 1] == 'u') {
- *to++ = 'u';
- first_letter = false;
- ++j;
- continue;
- } else if (greek_str[j + 1] ==
- -109) {
- *to++ = 'w';
- first_letter = false;
- ++j;
- continue;
- } else if (greek_str[j + 1] ==
- -120) {
- *to++ = 'h';
- first_letter = false;
- ++j;
- continue;
- } else if (greek_str[j + 1] == 'i') {
- *to++ = 'i';
- first_letter = false;
- ++j;
- continue;
- }else if (greek_str[j + 1] == 'e') {
- *to++ = 'e';
- first_letter = false;
- ++j;
- continue;
- }
- first_letter = false;
- }
- if ((greek_str[j] == 't')
- && (greek_str[j + 1] == 'h')) {
- *to++ = 'q';
- ++j;
- continue;
- }
- if ((greek_str[j] == 'c')
- && (greek_str[j + 1] == 'h')) {
- *to++ = 'c';
- ++j;
- continue;
- }
- if ((greek_str[j] == 'p')
- && (greek_str[j + 1] == 'h')) {
- ++j;
- *to++ = 'f';
- continue;
- }
- if (greek_str[j] == -120) {
- *to++ = 'h';
- continue;
- }
- if (greek_str[j] == -125) {
- *to++ = 'a';
- continue;
- }
- if (greek_str[j] == -109) {
- if(greek_str[j+1] == 'i') ++j;
- *to++ = 'w';
- continue;
- }
- if (greek_str[j] == ' ')
- first_letter = true;
- if (greek_str[j] == 's') {
- if(isalpha(greek_str[j + 1])) *to++ = 's';
- else if(!isprint(greek_str[j] )) *to++ = 's';
- else *to++ = 'V';
- continue;
- }
- if (greek_str[j] == '\'') {
- continue;
- }
- *to++ = greek_str[j];
- }
- strcpy(to,"</FONT><I> </I>");
- to += strlen(to);
- continue;
- }
- if (*from == '#') { // verse markings (e.g. "#Mark 1:1|")
- inverse = true;
- strcpy(to,"<FONT COLOR=#0000FF>");
- to += strlen(to);
- continue;
- }
- if ((*from == '|') && (inverse)) {
- inverse = false;
- strcpy(to,"</FONT>");
- to += strlen(to);
- continue;
- }
- if (*from == '{') {
- strcpy(to,"<BR><STRONG>");
- to += strlen(to);
- if ((from - &text[maxlen - len]) > 10) { // not the beginning of the entry
- strcpy(to,"<P>");
- to += strlen(to);
- }
- continue;
- }
- if (*from == '}') {
- strcpy(to," </STRONG>");
- to += strlen(to);
- continue;
- }
- if ((*from == '\n') && (from[1] == '\n')) {
- strcpy(to,"<P>");
- to += strlen(to);
- continue;
- }
- *to++ = *from;
- }
- *to++ = 0;
- *to = 0;
- return 0;
-}
diff --git a/src/modules/filters/rwprtf.cpp b/src/modules/filters/rwprtf.cpp
deleted file mode 100644
index 8f7b074..0000000
--- a/src/modules/filters/rwprtf.cpp
+++ /dev/null
@@ -1,107 +0,0 @@
-/******************************************************************************
- *
- * rwprtf - SWFilter decendant to convert all GBF tags to RTF tags
- */
-
-
-#include <stdlib.h>
-#include <string.h>
-#include <rwprtf.h>
-
-
-RWPRTF::RWPRTF() {
-
-}
-
-
-char RWPRTF::ProcessText(char *text, int maxlen, const SWKey *key, const SWModule *module)
-{
- char *to, *from;
- bool ingreek = false;
- bool inverse = false;
- int len;
-
- len = strlen(text) + 1; // shift string to right of buffer
- if (len < maxlen) {
- memmove(&text[maxlen - len], text, len);
- from = &text[maxlen - len];
- }
- else from = text; // -------------------------------
- for (to = text; *from; from++) {
- if (*from == '\\') {
- if(!ingreek) {
- ingreek = true;
- *to++ = '[';
- *to++ = '{';
- *to++ = '\\';
- *to++ = 'f';
- *to++ = '8';
- *to++ = ' ';
- continue;
- }
- else {
- ingreek = false;
- *to++ = '}';
- *to++ = ']';
- continue;
- }
- }
-
- if ((ingreek) && ((*from == 'h') || (*from == 'H')))
- continue; // 'h's are mostly useless in RWP translitterations. The greek is more correct without them.
-
- if (*from == '#') { // verse markings (e.g. "#Mark 1:1|")
- inverse = true;
- *to++ = '{';
- *to++ = '\\';
- *to++ = 'c';
- *to++ = 'f';
- *to++ = '2';
- *to++ = ' ';
- *to++ = '#';
- continue;
- }
- if ((*from == '|') && (inverse)) {
- inverse = false;
- *to++ = '|';
- *to++ = '}';
- continue;
- }
-
- if (*from == '{') {
- *to++ = '{';
- *to++ = '\\';
- *to++ = 'b';
- *to++ = ' ';
- if ((from - &text[maxlen - len]) > 10) { // not the beginning of the entry
- *to++ = '\\';
- *to++ = 'p';
- *to++ = 'a';
- *to++ = 'r';
- *to++ = ' ';
- }
- continue;
- }
-
- if (*from == '}') {
- // this is kinda neat... DO NOTHING
- }
- if ((*from == '\n') && (from[1] == '\n')) {
- *to++ = '\\';
- *to++ = 'p';
- *to++ = 'a';
- *to++ = 'r';
- *to++ = '\\';
- *to++ = 'p';
- *to++ = 'a';
- *to++ = 'r';
- *to++ = ' ';
- continue;
- }
-
- *to++ = *from;
- }
- *to++ = 0;
- *to = 0;
- return 0;
-}
diff --git a/src/modules/filters/scsuutf8.cpp b/src/modules/filters/scsuutf8.cpp
index d0d5ceb..aff265d 100644
--- a/src/modules/filters/scsuutf8.cpp
+++ b/src/modules/filters/scsuutf8.cpp
@@ -1,6 +1,6 @@
/******************************************************************************
*
- * SCSUUTF8 - SWFilter decendant to convert a SCSU character to UTF-8
+ * SCSUUTF8 - SWFilter descendant to convert a SCSU character to UTF-8
*
*/
@@ -22,6 +22,8 @@
#include <scsuutf8.h>
+SWORD_NAMESPACE_START
+
SCSUUTF8::SCSUUTF8() {
}
@@ -59,11 +61,13 @@ unsigned char* SCSUUTF8::UTF8Output(unsigned long uchar, unsigned char* text)
return text;
}
-char SCSUUTF8::ProcessText(char *text, int len, const SWKey *key, const SWModule *module)
-{
+char SCSUUTF8::processText(SWBuf &text, const SWKey *key, const SWModule *module) {
+/*
unsigned char *to, *from;
unsigned long buflen = len * FILTERPAD;
char active = 0, mode = 0;
+ if ((unsigned long)key < 2) // hack, we're en(1)/de(0)ciphering
+ return -1;
static unsigned short start[8] = {0x0000,0x0080,0x0100,0x0300,0x2000,0x2080,0x2100,0x3000};
static unsigned short slide[8] = {0x0080,0x00C0,0x0400,0x0600,0x0900,0x3040,0x30A0,0xFF00};
@@ -103,7 +107,7 @@ char SCSUUTF8::ProcessText(char *text, int len, const SWKey *key, const SWModule
};
if (!len)
- return 0;
+ return 0;
memmove(&text[buflen - len], text, len);
from = (unsigned char*)&text[buflen - len];
@@ -114,95 +118,95 @@ char SCSUUTF8::ProcessText(char *text, int len, const SWKey *key, const SWModule
for (int i = 0; i < len;) {
- if (i >= len) break;
- c = from[i++];
+ if (i >= len) break;
+ c = from[i++];
- if (c >= 0x80)
+ if (c >= 0x80)
{
to = UTF8Output (c - 0x80 + slide[active], to);
}
- else if (c >= 0x20 && c <= 0x7F)
+ else if (c >= 0x20 && c <= 0x7F)
{
to = UTF8Output (c, to);
}
- else if (c == 0x0 || c == 0x9 || c == 0xA || c == 0xC || c == 0xD)
+ else if (c == 0x0 || c == 0x9 || c == 0xA || c == 0xC || c == 0xD)
{
to = UTF8Output (c, to);
}
- else if (c >= 0x1 && c <= 0x8) /* SQn */
+ else if (c >= 0x1 && c <= 0x8) // SQn
{
- if (i >= len) break;
- /* single quote */ d = from[i++];
+ if (i >= len) break;
+ d = from[i++]; // single quote
to = UTF8Output (d < 0x80 ? d + start [c - 0x1] :
d - 0x80 + slide [c - 0x1], to);
}
- else if (c >= 0x10 && c <= 0x17) /* SCn */
+ else if (c >= 0x10 && c <= 0x17) // SCn
{
- /* change window */ active = c - 0x10;
+ active = c - 0x10; // change window
}
- else if (c >= 0x18 && c <= 0x1F) /* SDn */
+ else if (c >= 0x18 && c <= 0x1F) // SDn
{
- /* define window */ active = c - 0x18;
- if (i >= len) break;
+ active = c - 0x18; // define window
+ if (i >= len) break;
slide [active] = win [from[i++]];
}
- else if (c == 0xB) /* SDX */
+ else if (c == 0xB) // SDX
{
- if (i >= len) break;
- c = from[i++];
+ if (i >= len) break;
+ c = from[i++];
- if (i >= len) break;
- d = from[i++];
+ if (i >= len) break;
+ d = from[i++];
slide [active = c>>5] = 0x10000 + (((c & 0x1F) << 8 | d) << 7);
}
- else if (c == 0xE) /* SQU */
+ else if (c == 0xE) // SQU
{
- if (i >= len) break;
- /* SQU */ c = from[i++];
+ if (i >= len) break;
+ c = from[i++]; // SQU
- if (i >= len) break;
- to = UTF8Output (c << 8 | from[i++], to);
- }
- else if (c == 0xF) /* SCU */
+ if (i >= len) break;
+ to = UTF8Output (c << 8 | from[i++], to);
+ }
+ else if (c == 0xF) // SCU
{
- /* change to Unicode mode */ mode = 1;
+ mode = 1; // change to Unicode mode
while (mode)
{
- if (i >= len) break;
- c = from[i++];
+ if (i >= len) break;
+ c = from[i++];
- if (c <= 0xDF || c >= 0xF3)
+ if (c <= 0xDF || c >= 0xF3)
{
- if (i >= len) break;
+ if (i >= len) break;
to = UTF8Output (c << 8 | from[i++], to);
}
- else if (c == 0xF0) /* UQU */
+ else if (c == 0xF0) // UQU
{
- if (i >= len) break;
+ if (i >= len) break;
c = from[i++];
- if (i >= len) break;
- to = UTF8Output (c << 8 | from[i++], to);
+ if (i >= len) break;
+ to = UTF8Output (c << 8 | from[i++], to);
}
- else if (c >= 0xE0 && c <= 0xE7) /* UCn */
+ else if (c >= 0xE0 && c <= 0xE7) // UCn
{
active = c - 0xE0; mode = 0;
}
- else if (c >= 0xE8 && c <= 0xEF) /* UDn */
+ else if (c >= 0xE8 && c <= 0xEF) // UDn
{
- if (i >= len) break;
+ if (i >= len) break;
slide [active=c-0xE8] = win [from[i++]]; mode = 0;
}
- else if (c == 0xF1) /* UDX */
+ else if (c == 0xF1) // UDX
{
- if (i >= len) break;
+ if (i >= len) break;
c = from[i++];
- if (i >= len) break;
- d = from[i++];
+ if (i >= len) break;
+ d = from[i++];
slide [active = c>>5] =
0x10000 + (((c & 0x1F) << 8 | d) << 7); mode = 0;
@@ -215,6 +219,8 @@ char SCSUUTF8::ProcessText(char *text, int len, const SWKey *key, const SWModule
*to++ = 0;
*to = 0;
+*/
return 0;
}
+SWORD_NAMESPACE_END
diff --git a/src/modules/filters/swbasicfilter.cpp b/src/modules/filters/swbasicfilter.cpp
index 2865085..55c8843 100644
--- a/src/modules/filters/swbasicfilter.cpp
+++ b/src/modules/filters/swbasicfilter.cpp
@@ -4,7 +4,7 @@
* many filters will need and can use as a starting
* point.
*
- * $Id: swbasicfilter.cpp,v 1.17 2002/03/11 19:01:28 scribe Exp $
+ * $Id: swbasicfilter.cpp,v 1.33 2003/10/24 02:43:46 scribe Exp $
*
* Copyright 2001 CrossWire Bible Society (http://www.crosswire.org)
* CrossWire Bible Society
@@ -23,16 +23,23 @@
*/
#include <stdlib.h>
-#include <string.h>
#include <swbasicfilter.h>
#include <stdio.h>
#include <stdarg.h>
+SWORD_NAMESPACE_START
+
+const char SWBasicFilter::INITIALIZE = 1;
+const char SWBasicFilter::PRECHAR = 2;
+const char SWBasicFilter::POSTCHAR = 4;
+const char SWBasicFilter::FINALIZE = 8;
+
SWBasicFilter::SWBasicFilter() {
- tokenStart = 0;
- tokenEnd = 0;
- escStart = 0;
- escEnd = 0;
+ processStages = 0;
+ tokenStart = 0;
+ tokenEnd = 0;
+ escStart = 0;
+ escEnd = 0;
setTokenStart("<");
setTokenEnd(">");
@@ -40,9 +47,9 @@ SWBasicFilter::SWBasicFilter() {
setEscapeEnd(";");
escStringCaseSensitive = false;
- tokenCaseSensitive = false;
- passThruUnknownToken = false;
- passThruUnknownEsc = false;
+ tokenCaseSensitive = false;
+ passThruUnknownToken = false;
+ passThruUnknownEsc = false;
}
@@ -87,10 +94,18 @@ void SWBasicFilter::addTokenSubstitute(const char *findString, const char *repla
if (!tokenCaseSensitive) {
stdstr(&buf, findString);
toupperstr(buf);
- tokenSubMap.insert(DualStringMap::value_type(buf, replaceString));
+ tokenSubMap[buf] = replaceString;
delete [] buf;
}
- else tokenSubMap.insert(DualStringMap::value_type(findString, replaceString));
+ else tokenSubMap[findString] = replaceString;
+}
+
+
+void SWBasicFilter::replaceTokenSubstitute(const char *findString, const char *replaceString) {
+ if (tokenSubMap.find(findString) != tokenSubMap.end()) {
+ tokenSubMap.erase( tokenSubMap.find(findString) ); //erase entry
+ }
+ addTokenSubstitute(findString, replaceString);
}
@@ -106,19 +121,15 @@ void SWBasicFilter::addEscapeStringSubstitute(const char *findString, const char
else escSubMap.insert(DualStringMap::value_type(findString, replaceString));
}
-
-void SWBasicFilter::pushString(char **buf, const char *format, ...) {
- va_list argptr;
-
- va_start(argptr, format);
- *buf += vsprintf(*buf, format, argptr);
- va_end(argptr);
-
-// *buf += strlen(*buf);
+void SWBasicFilter::replaceEscapeStringSubstitute(const char *findString, const char *replaceString) {
+ if (escSubMap.find(findString) != escSubMap.end()) {
+ escSubMap.erase( escSubMap.find(findString) ); //erase entry
+ }
+ addEscapeStringSubstitute(findString, replaceString);
}
-bool SWBasicFilter::substituteToken(char **buf, const char *token) {
+bool SWBasicFilter::substituteToken(SWBuf &buf, const char *token) {
DualStringMap::iterator it;
if (!tokenCaseSensitive) {
@@ -131,14 +142,14 @@ bool SWBasicFilter::substituteToken(char **buf, const char *token) {
it = tokenSubMap.find(token);
if (it != tokenSubMap.end()) {
- pushString(buf, it->second.c_str());
+ buf += it->second.c_str();
return true;
}
return false;
}
-bool SWBasicFilter::substituteEscapeString(char **buf, const char *escString) {
+bool SWBasicFilter::substituteEscapeString(SWBuf &buf, const char *escString) {
DualStringMap::iterator it;
if (!escStringCaseSensitive) {
@@ -151,72 +162,76 @@ bool SWBasicFilter::substituteEscapeString(char **buf, const char *escString) {
it = escSubMap.find(escString);
if (it != escSubMap.end()) {
- pushString(buf, it->second.c_str());
+ buf += it->second.c_str();
return true;
}
return false;
}
-bool SWBasicFilter::handleToken(char **buf, const char *token, DualStringMap &userData) {
+bool SWBasicFilter::handleToken(SWBuf &buf, const char *token, BasicFilterUserData *userData) {
return substituteToken(buf, token);
}
-bool SWBasicFilter::handleEscapeString(char **buf, const char *escString, DualStringMap &userData) {
+bool SWBasicFilter::handleEscapeString(SWBuf &buf, const char *escString, BasicFilterUserData *userData) {
return substituteEscapeString(buf, escString);
}
void SWBasicFilter::setEscapeStart(const char *escStart) {
stdstr(&(this->escStart), escStart);
+ escStartLen = strlen(escStart);
}
void SWBasicFilter::setEscapeEnd(const char *escEnd) {
stdstr(&(this->escEnd), escEnd);
+ escEndLen = strlen(escEnd);
}
void SWBasicFilter::setTokenStart(const char *tokenStart) {
stdstr(&(this->tokenStart), tokenStart);
+ tokenStartLen = strlen(tokenStart);
}
void SWBasicFilter::setTokenEnd(const char *tokenEnd) {
stdstr(&(this->tokenEnd), tokenEnd);
+ tokenEndLen = strlen(tokenEnd);
}
-char SWBasicFilter::ProcessText(char *text, int maxlen, const SWKey *key, const SWModule *module) {
- this->key = key;
- this->module = module;
- char *to, *from, token[4096];
+char SWBasicFilter::processText(SWBuf &text, const SWKey *key, const SWModule *module) {
+ char *from;
+ char token[4096];
int tokpos = 0;
- bool intoken = false;
- int len;
+ bool intoken = false;
bool inEsc = false;
- char escStartLen = strlen(escStart);
- char escEndLen = strlen(escEnd);
char escStartPos = 0, escEndPos = 0;
- char tokenStartLen = strlen(tokenStart);
- char tokenEndLen = strlen(tokenEnd);
char tokenStartPos = 0, tokenEndPos = 0;
- DualStringMap userData;
- string lastTextNode;
+ SWBuf lastTextNode;
+ BasicFilterUserData *userData = createUserData(module, key);
- bool suspendTextPassThru = false;
- userData["suspendTextPassThru"] = "false";
+ SWBuf orig = text;
+ from = orig.getRawData();
+ text = "";
- len = strlen(text) + 1; // shift string to right of buffer
- if (len < maxlen) {
- memmove(&text[maxlen - len], text, len);
- from = &text[maxlen - len];
+ if (processStages & INITIALIZE) {
+ if (processStage(INITIALIZE, text, from, userData)) { // processStage handled it all
+ delete userData;
+ return 0;
+ }
}
- else from = text; // -------------------------------
+ for (;*from; from++) {
+
+ if (processStages & PRECHAR) {
+ if (processStage(PRECHAR, text, from, userData)) // processStage handled this char
+ continue;
+ }
- for (to = text; *from; from++) {
if (*from == tokenStart[tokenStartPos]) {
if (tokenStartPos == (tokenStartLen - 1)) {
intoken = true;
@@ -247,15 +262,14 @@ char SWBasicFilter::ProcessText(char *text, int maxlen, const SWKey *key, const
if (*from == escEnd[escEndPos]) {
if (escEndPos == (escEndLen - 1)) {
intoken = false;
- userData["lastTextNode"] = lastTextNode;
- if ((!handleEscapeString(&to, token, userData)) && (passThruUnknownEsc)) {
- pushString(&to, escStart);
- pushString(&to, token);
- pushString(&to, escEnd);
+ userData->lastTextNode = lastTextNode;
+ if ((!handleEscapeString(text, token, userData)) && (passThruUnknownEsc)) {
+ text += escStart;
+ text += token;
+ text += escEnd;
}
escEndPos = escStartPos = tokenEndPos = tokenStartPos = 0;
lastTextNode = "";
- suspendTextPassThru = (!userData["suspendTextPassThru"].compare("true"));
continue;
}
}
@@ -265,15 +279,14 @@ char SWBasicFilter::ProcessText(char *text, int maxlen, const SWKey *key, const
if (*from == tokenEnd[tokenEndPos]) {
if (tokenEndPos == (tokenEndLen - 1)) {
intoken = false;
- userData["lastTextNode"] = lastTextNode;
- if ((!handleToken(&to, token, userData)) && (passThruUnknownToken)) {
- pushString(&to, tokenStart);
- pushString(&to, token);
- pushString(&to, tokenEnd);
+ userData->lastTextNode = lastTextNode;
+ if ((!handleToken(text, token, userData)) && (passThruUnknownToken)) {
+ text += tokenStart;
+ text += token;
+ text += tokenEnd;
}
escEndPos = escStartPos = tokenEndPos = tokenStartPos = 0;
lastTextNode = "";
- suspendTextPassThru = (!userData["suspendTextPassThru"].compare("true"));
continue;
}
}
@@ -285,15 +298,24 @@ char SWBasicFilter::ProcessText(char *text, int maxlen, const SWKey *key, const
token[tokpos+2] = 0;
}
else {
- if (!suspendTextPassThru)
- *to++ = *from;
- lastTextNode += *from;
+ if ((!userData->supressAdjacentWhitespace) || (*from != ' ')) {
+ if (!userData->suspendTextPassThru)
+ text += *from;
+ lastTextNode += *from;
+ }
+ userData->supressAdjacentWhitespace = false;
}
+
+ if (processStages & POSTCHAR)
+ processStage(POSTCHAR, text, from, userData);
+
}
- *to++ = 0;
- *to = 0;
- return 0;
-}
+ if (processStages & FINALIZE)
+ processStage(FINALIZE, text, from, userData);
+ delete userData;
+ return 0;
+}
+SWORD_NAMESPACE_END
diff --git a/src/modules/filters/swoptfilter.cpp b/src/modules/filters/swoptfilter.cpp
new file mode 100644
index 0000000..4eb3c82
--- /dev/null
+++ b/src/modules/filters/swoptfilter.cpp
@@ -0,0 +1,38 @@
+/******************************************************************************
+ *
+ * swoptfilter - SWFilter descendant and base class for all option filters
+ */
+
+
+#include <swoptfilter.h>
+
+SWORD_NAMESPACE_START
+
+
+SWOptionFilter::SWOptionFilter(const char *oName, const char *oTip, const StringList *oValues) {
+ optName = oName;
+ optTip = oTip;
+ optValues = oValues;
+}
+
+
+SWOptionFilter::~SWOptionFilter() {
+}
+
+
+void SWOptionFilter::setOptionValue(const char *ival) {
+ for (StringList::const_iterator loop = optValues->begin(); loop != optValues->end(); loop++) {
+ if (!stricmp(loop->c_str(), ival)) {
+ optionValue = *loop;
+ option = (!stricmp(ival, "On")); // convenience for boolean filters
+ break;
+ }
+ }
+}
+
+const char *SWOptionFilter::getOptionValue() {
+ return optionValue;
+}
+
+
+SWORD_NAMESPACE_END
diff --git a/src/modules/filters/thmlfootnotes.cpp b/src/modules/filters/thmlfootnotes.cpp
index d9b1f0e..8b52d98 100644
--- a/src/modules/filters/thmlfootnotes.cpp
+++ b/src/modules/filters/thmlfootnotes.cpp
@@ -1,103 +1,127 @@
/******************************************************************************
*
- * thmlfootnotes - SWFilter decendant to hide or show footnotes
+ * thmlfootnotes - SWFilter descendant to hide or show footnotes
* in a ThML module.
*/
#include <stdlib.h>
-#include <string.h>
#include <thmlfootnotes.h>
+#include <swmodule.h>
+#include <swbuf.h>
+#include <versekey.h>
+#include <utilxml.h>
#ifndef __GNUC__
#else
#include <unixstr.h>
#endif
+SWORD_NAMESPACE_START
-const char ThMLFootnotes::on[] = "On";
-const char ThMLFootnotes::off[] = "Off";
-const char ThMLFootnotes::optName[] = "Footnotes";
-const char ThMLFootnotes::optTip[] = "Toggles Footnotes On and Off if they exist";
+const char oName[] = "Footnotes";
+const char oTip[] = "Toggles Footnotes On and Off if they exist";
+const SWBuf choices[3] = {"On", "Off", ""};
+const StringList oValues(&choices[0], &choices[2]);
-ThMLFootnotes::ThMLFootnotes() {
- option = false;
- options.push_back(on);
- options.push_back(off);
+ThMLFootnotes::ThMLFootnotes() : SWOptionFilter(oName, oTip, &oValues) {
+ setOptionValue("Off");
}
ThMLFootnotes::~ThMLFootnotes() {
}
-void ThMLFootnotes::setOptionValue(const char *ival)
-{
- option = (!stricmp(ival, on));
-}
-const char *ThMLFootnotes::getOptionValue()
-{
- return (option) ? on:off;
-}
+char ThMLFootnotes::processText(SWBuf &text, const SWKey *key, const SWModule *module) {
+ SWBuf token;
+ bool intoken = false;
+ bool hide = false;
+ SWBuf tagText;
+ XMLTag startTag;
+ SWBuf refs = "";
+ int footnoteNum = 1;
+ char buf[254];
+ VerseKey parser = key->getText();
-char ThMLFootnotes::ProcessText(char *text, int maxlen, const SWKey *key, const SWModule *module)
-{
- if (!option) { // if we don't want footnotes
- char *to, *from, token[2048]; // cheese. Fix.
- int tokpos = 0;
- bool intoken = false;
- int len;
- bool hide = false;
+ SWBuf orig = text;
+ const char *from = orig.c_str();
- len = strlen(text) + 1; // shift string to right of buffer
- if (len < maxlen) {
- memmove(&text[maxlen - len], text, len);
- from = &text[maxlen - len];
+ for (text = ""; *from; from++) {
+ if (*from == '<') {
+ intoken = true;
+ token = "";
+ continue;
}
- else from = text; // -------------------------------
+ if (*from == '>') { // process tokens
+ intoken = false;
- for (to = text; *from; from++) {
- if (*from == '<') {
- intoken = true;
- tokpos = 0;
- token[0] = 0;
- token[1] = 0;
- token[2] = 0;
- continue;
- }
- if (*from == '>') { // process tokens
- intoken = false;
- if (!strncmp(token, "note", 4)) {
- hide = true;
- continue;
+ XMLTag tag(token);
+ if (!strcmp(tag.getName(), "note")) {
+ if (!tag.isEndTag()) {
+ if (!tag.isEmpty()) {
+ refs = "";
+ startTag = tag;
+ hide = true;
+ tagText = "";
+ continue;
+ }
}
- else if (!strncmp(token, "/note", 5)) {
- hide = false;
- continue;
+ if (hide && tag.isEndTag()) {
+ if (module->isProcessEntryAttributes()) {
+ SWBuf fc = module->getEntryAttributes()["Footnote"]["count"]["value"];
+ footnoteNum = (fc.length()) ? atoi(fc.c_str()) : 0;
+ sprintf(buf, "%i", ++footnoteNum);
+ module->getEntryAttributes()["Footnote"]["count"]["value"] = buf;
+ StringList attributes = startTag.getAttributeNames();
+ for (StringList::iterator it = attributes.begin(); it != attributes.end(); it++) {
+ module->getEntryAttributes()["Footnote"][buf][it->c_str()] = startTag.getAttribute(it->c_str());
+ }
+ module->getEntryAttributes()["Footnote"][buf]["body"] = tagText;
+ startTag.setAttribute("swordFootnote", buf);
+ if ((startTag.getAttribute("type")) && (!strcmp(startTag.getAttribute("type"), "crossReference"))) {
+ if (!refs.length())
+ refs = parser.ParseVerseList(tagText.c_str(), parser, true).getRangeText();
+ module->getEntryAttributes()["Footnote"][buf]["refList"] = refs.c_str();
+ }
+ }
+ hide = false;
+ if ((option) || ((startTag.getAttribute("type") && (!strcmp(startTag.getAttribute("type"), "crossReference"))))) { // we want the tag in the text; crossReferences are handled by another filter
+ text += startTag;
+ text.append(tagText);
+ }
+ else continue;
}
+ }
- // if not a footnote token, keep token in text
- if (!hide) {
- *to++ = '<';
- for (char *tok = token; *tok; tok++)
- *to++ = *tok;
- *to++ = '>';
- }
- continue;
+ // if not a note token, keep token in text
+ if ((!strcmp(tag.getName(), "scripRef")) && (!tag.isEndTag())) {
+ SWBuf osisRef = tag.getAttribute("passage");
+ if (refs.length())
+ refs += "; ";
+ refs += osisRef;
}
- if (intoken) {
- if (tokpos < 2045)
- token[tokpos++] = *from;
- token[tokpos+2] = 0;
+ if (!hide) {
+ text += '<';
+ text.append(token);
+ text += '>';
}
- else {
- if (!hide) {
- *to++ = *from;
- }
+ else {
+ tagText += '<';
+ tagText.append(token);
+ tagText += '>';
}
+ continue;
+ }
+ if (intoken) { //copy token
+ token += *from;
}
- *to++ = 0;
- *to = 0;
+ else if (!hide) { //copy text which is not inside a token
+ text += *from;
+ }
+ else tagText += *from;
}
return 0;
}
+
+SWORD_NAMESPACE_END
diff --git a/src/modules/filters/thmlgbf.cpp b/src/modules/filters/thmlgbf.cpp
index 66d9a20..a65ddaf 100644
--- a/src/modules/filters/thmlgbf.cpp
+++ b/src/modules/filters/thmlgbf.cpp
@@ -15,18 +15,18 @@
***************************************************************************/
#include <stdlib.h>
-#include <string.h>
#include <thmlgbf.h>
+SWORD_NAMESPACE_START
ThMLGBF::ThMLGBF()
{
}
-char ThMLGBF::ProcessText(char *text, int maxlen)
-{
- char *to, *from, token[2048];
+char ThMLGBF::processText(SWBuf &text, const SWKey *key, const SWModule *module) {
+ const char *from;
+ char token[2048];
int tokpos = 0;
bool intoken = false;
int len;
@@ -34,13 +34,10 @@ char ThMLGBF::ProcessText(char *text, int maxlen)
bool sechead = false;
bool title = false;
- len = strlen(text) + 1; // shift string to right of buffer
- if (len < maxlen) {
- memmove(&text[maxlen - len], text, len);
- from = &text[maxlen - len];
- }
- else from = text; // -------------------------------
- for (to = text; *from; from++) {
+ SWBuf orig = text;
+ from = orig.c_str();
+
+ for (text = ""; *from; from++) {
if (*from == '<') {
intoken = true;
tokpos = 0;
@@ -60,102 +57,102 @@ char ThMLGBF::ProcessText(char *text, int maxlen)
if (*from == ';' && ampersand) {
intoken = false;
- if (!strncmp("nbsp", token, 4)) *to++ = ' ';
- else if (!strncmp("quot", token, 4)) *to++ = '"';
- else if (!strncmp("amp", token, 3)) *to++ = '&';
- else if (!strncmp("lt", token, 2)) *to++ = '<';
- else if (!strncmp("gt", token, 2)) *to++ = '>';
- else if (!strncmp("brvbar", token, 6)) *to++ = '|';
- else if (!strncmp("sect", token, 4)) *to++ = '§';
- else if (!strncmp("copy", token, 4)) *to++ = '©';
- else if (!strncmp("laquo", token, 5)) *to++ = '«';
- else if (!strncmp("reg", token, 3)) *to++ = '®';
- else if (!strncmp("acute", token, 5)) *to++ = '´';
- else if (!strncmp("para", token, 4)) *to++ = '¶';
- else if (!strncmp("raquo", token, 5)) *to++ = '»';
+ if (!strncmp("nbsp", token, 4)) text += ' ';
+ else if (!strncmp("quot", token, 4)) text += '"';
+ else if (!strncmp("amp", token, 3)) text += '&';
+ else if (!strncmp("lt", token, 2)) text += '<';
+ else if (!strncmp("gt", token, 2)) text += '>';
+ else if (!strncmp("brvbar", token, 6)) text += '|';
+ else if (!strncmp("sect", token, 4)) text += '§';
+ else if (!strncmp("copy", token, 4)) text += '©';
+ else if (!strncmp("laquo", token, 5)) text += '«';
+ else if (!strncmp("reg", token, 3)) text += '®';
+ else if (!strncmp("acute", token, 5)) text += '´';
+ else if (!strncmp("para", token, 4)) text += '¶';
+ else if (!strncmp("raquo", token, 5)) text += '»';
- else if (!strncmp("Aacute", token, 6)) *to++ = 'Á';
- else if (!strncmp("Agrave", token, 6)) *to++ = 'À';
- else if (!strncmp("Acirc", token, 5)) *to++ = 'Â';
- else if (!strncmp("Auml", token, 4)) *to++ = 'Ä';
- else if (!strncmp("Atilde", token, 6)) *to++ = 'Ã';
- else if (!strncmp("Aring", token, 5)) *to++ = 'Å';
- else if (!strncmp("aacute", token, 6)) *to++ = 'á';
- else if (!strncmp("agrave", token, 6)) *to++ = 'à';
- else if (!strncmp("acirc", token, 5)) *to++ = 'â';
- else if (!strncmp("auml", token, 4)) *to++ = 'ä';
- else if (!strncmp("atilde", token, 6)) *to++ = 'ã';
- else if (!strncmp("aring", token, 5)) *to++ = 'å';
- else if (!strncmp("Eacute", token, 6)) *to++ = 'É';
- else if (!strncmp("Egrave", token, 6)) *to++ = 'È';
- else if (!strncmp("Ecirc", token, 5)) *to++ = 'Ê';
- else if (!strncmp("Euml", token, 4)) *to++ = 'Ë';
- else if (!strncmp("eacute", token, 6)) *to++ = 'é';
- else if (!strncmp("egrave", token, 6)) *to++ = 'è';
- else if (!strncmp("ecirc", token, 5)) *to++ = 'ê';
- else if (!strncmp("euml", token, 4)) *to++ = 'ë';
- else if (!strncmp("Iacute", token, 6)) *to++ = 'Í';
- else if (!strncmp("Igrave", token, 6)) *to++ = 'Ì';
- else if (!strncmp("Icirc", token, 5)) *to++ = 'Î';
- else if (!strncmp("Iuml", token, 4)) *to++ = 'Ï';
- else if (!strncmp("iacute", token, 6)) *to++ = 'í';
- else if (!strncmp("igrave", token, 6)) *to++ = 'ì';
- else if (!strncmp("icirc", token, 5)) *to++ = 'î';
- else if (!strncmp("iuml", token, 4)) *to++ = 'ï';
- else if (!strncmp("Oacute", token, 6)) *to++ = 'Ó';
- else if (!strncmp("Ograve", token, 6)) *to++ = 'Ò';
- else if (!strncmp("Ocirc", token, 5)) *to++ = 'Ô';
- else if (!strncmp("Ouml", token, 4)) *to++ = 'Ö';
- else if (!strncmp("Otilde", token, 6)) *to++ = 'Õ';
- else if (!strncmp("oacute", token, 6)) *to++ = 'ó';
- else if (!strncmp("ograve", token, 6)) *to++ = 'ò';
- else if (!strncmp("ocirc", token, 5)) *to++ = 'ô';
- else if (!strncmp("ouml", token, 4)) *to++ = 'ö';
- else if (!strncmp("otilde", token, 6)) *to++ = 'õ';
- else if (!strncmp("Uacute", token, 6)) *to++ = 'Ú';
- else if (!strncmp("Ugrave", token, 6)) *to++ = 'Ù';
- else if (!strncmp("Ucirc", token, 5)) *to++ = 'Û';
- else if (!strncmp("Uuml", token, 4)) *to++ = 'Ü';
- else if (!strncmp("uacute", token, 6)) *to++ = 'ú';
- else if (!strncmp("ugrave", token, 6)) *to++ = 'ù';
- else if (!strncmp("ucirc", token, 5)) *to++ = 'û';
- else if (!strncmp("uuml", token, 4)) *to++ = 'ü';
- else if (!strncmp("Yacute", token, 6)) *to++ = 'Ý';
- else if (!strncmp("yacute", token, 6)) *to++ = 'ý';
- else if (!strncmp("yuml", token, 4)) *to++ = 'ÿ';
+ else if (!strncmp("Aacute", token, 6)) text += 'Á';
+ else if (!strncmp("Agrave", token, 6)) text += 'À';
+ else if (!strncmp("Acirc", token, 5)) text += 'Â';
+ else if (!strncmp("Auml", token, 4)) text += 'Ä';
+ else if (!strncmp("Atilde", token, 6)) text += 'Ã';
+ else if (!strncmp("Aring", token, 5)) text += 'Å';
+ else if (!strncmp("aacute", token, 6)) text += 'á';
+ else if (!strncmp("agrave", token, 6)) text += 'à';
+ else if (!strncmp("acirc", token, 5)) text += 'â';
+ else if (!strncmp("auml", token, 4)) text += 'ä';
+ else if (!strncmp("atilde", token, 6)) text += 'ã';
+ else if (!strncmp("aring", token, 5)) text += 'å';
+ else if (!strncmp("Eacute", token, 6)) text += 'É';
+ else if (!strncmp("Egrave", token, 6)) text += 'È';
+ else if (!strncmp("Ecirc", token, 5)) text += 'Ê';
+ else if (!strncmp("Euml", token, 4)) text += 'Ë';
+ else if (!strncmp("eacute", token, 6)) text += 'é';
+ else if (!strncmp("egrave", token, 6)) text += 'è';
+ else if (!strncmp("ecirc", token, 5)) text += 'ê';
+ else if (!strncmp("euml", token, 4)) text += 'ë';
+ else if (!strncmp("Iacute", token, 6)) text += 'Í';
+ else if (!strncmp("Igrave", token, 6)) text += 'Ì';
+ else if (!strncmp("Icirc", token, 5)) text += 'Î';
+ else if (!strncmp("Iuml", token, 4)) text += 'Ï';
+ else if (!strncmp("iacute", token, 6)) text += 'í';
+ else if (!strncmp("igrave", token, 6)) text += 'ì';
+ else if (!strncmp("icirc", token, 5)) text += 'î';
+ else if (!strncmp("iuml", token, 4)) text += 'ï';
+ else if (!strncmp("Oacute", token, 6)) text += 'Ó';
+ else if (!strncmp("Ograve", token, 6)) text += 'Ò';
+ else if (!strncmp("Ocirc", token, 5)) text += 'Ô';
+ else if (!strncmp("Ouml", token, 4)) text += 'Ö';
+ else if (!strncmp("Otilde", token, 6)) text += 'Õ';
+ else if (!strncmp("oacute", token, 6)) text += 'ó';
+ else if (!strncmp("ograve", token, 6)) text += 'ò';
+ else if (!strncmp("ocirc", token, 5)) text += 'ô';
+ else if (!strncmp("ouml", token, 4)) text += 'ö';
+ else if (!strncmp("otilde", token, 6)) text += 'õ';
+ else if (!strncmp("Uacute", token, 6)) text += 'Ú';
+ else if (!strncmp("Ugrave", token, 6)) text += 'Ù';
+ else if (!strncmp("Ucirc", token, 5)) text += 'Û';
+ else if (!strncmp("Uuml", token, 4)) text += 'Ü';
+ else if (!strncmp("uacute", token, 6)) text += 'ú';
+ else if (!strncmp("ugrave", token, 6)) text += 'ù';
+ else if (!strncmp("ucirc", token, 5)) text += 'û';
+ else if (!strncmp("uuml", token, 4)) text += 'ü';
+ else if (!strncmp("Yacute", token, 6)) text += 'Ý';
+ else if (!strncmp("yacute", token, 6)) text += 'ý';
+ else if (!strncmp("yuml", token, 4)) text += 'ÿ';
- else if (!strncmp("deg", token, 3)) *to++ = '°';
- else if (!strncmp("plusmn", token, 6)) *to++ = '±';
- else if (!strncmp("sup2", token, 4)) *to++ = '²';
- else if (!strncmp("sup3", token, 4)) *to++ = '³';
- else if (!strncmp("sup1", token, 4)) *to++ = '¹';
- else if (!strncmp("nbsp", token, 4)) *to++ = 'º';
- else if (!strncmp("pound", token, 5)) *to++ = '£';
- else if (!strncmp("cent", token, 4)) *to++ = '¢';
- else if (!strncmp("frac14", token, 6)) *to++ = '¼';
- else if (!strncmp("frac12", token, 6)) *to++ = '½';
- else if (!strncmp("frac34", token, 6)) *to++ = '¾';
- else if (!strncmp("iquest", token, 6)) *to++ = '¿';
- else if (!strncmp("iexcl", token, 5)) *to++ = '¡';
- else if (!strncmp("ETH", token, 3)) *to++ = 'Ð';
- else if (!strncmp("eth", token, 3)) *to++ = 'ð';
- else if (!strncmp("THORN", token, 5)) *to++ = 'Þ';
- else if (!strncmp("thorn", token, 5)) *to++ = 'þ';
- else if (!strncmp("AElig", token, 5)) *to++ = 'Æ';
- else if (!strncmp("aelig", token, 5)) *to++ = 'æ';
- else if (!strncmp("Oslash", token, 6)) *to++ = 'Ø';
- else if (!strncmp("curren", token, 6)) *to++ = '¤';
- else if (!strncmp("Ccedil", token, 6)) *to++ = 'Ç';
- else if (!strncmp("ccedil", token, 6)) *to++ = 'ç';
- else if (!strncmp("szlig", token, 5)) *to++ = 'ß';
- else if (!strncmp("Ntilde", token, 6)) *to++ = 'Ñ';
- else if (!strncmp("ntilde", token, 6)) *to++ = 'ñ';
- else if (!strncmp("yen", token, 3)) *to++ = '¥';
- else if (!strncmp("not", token, 3)) *to++ = '¬';
- else if (!strncmp("ordf", token, 4)) *to++ = 'ª';
- else if (!strncmp("uml", token, 3)) *to++ = '¨';
- else if (!strncmp("shy", token, 3)) *to++ = '­';
- else if (!strncmp("macr", token, 4)) *to++ = '¯';
+ else if (!strncmp("deg", token, 3)) text += '°';
+ else if (!strncmp("plusmn", token, 6)) text += '±';
+ else if (!strncmp("sup2", token, 4)) text += '²';
+ else if (!strncmp("sup3", token, 4)) text += '³';
+ else if (!strncmp("sup1", token, 4)) text += '¹';
+ else if (!strncmp("nbsp", token, 4)) text += 'º';
+ else if (!strncmp("pound", token, 5)) text += '£';
+ else if (!strncmp("cent", token, 4)) text += '¢';
+ else if (!strncmp("frac14", token, 6)) text += '¼';
+ else if (!strncmp("frac12", token, 6)) text += '½';
+ else if (!strncmp("frac34", token, 6)) text += '¾';
+ else if (!strncmp("iquest", token, 6)) text += '¿';
+ else if (!strncmp("iexcl", token, 5)) text += '¡';
+ else if (!strncmp("ETH", token, 3)) text += 'Ð';
+ else if (!strncmp("eth", token, 3)) text += 'ð';
+ else if (!strncmp("THORN", token, 5)) text += 'Þ';
+ else if (!strncmp("thorn", token, 5)) text += 'þ';
+ else if (!strncmp("AElig", token, 5)) text += 'Æ';
+ else if (!strncmp("aelig", token, 5)) text += 'æ';
+ else if (!strncmp("Oslash", token, 6)) text += 'Ø';
+ else if (!strncmp("curren", token, 6)) text += '¤';
+ else if (!strncmp("Ccedil", token, 6)) text += 'Ç';
+ else if (!strncmp("ccedil", token, 6)) text += 'ç';
+ else if (!strncmp("szlig", token, 5)) text += 'ß';
+ else if (!strncmp("Ntilde", token, 6)) text += 'Ñ';
+ else if (!strncmp("ntilde", token, 6)) text += 'ñ';
+ else if (!strncmp("yen", token, 3)) text += '¥';
+ else if (!strncmp("not", token, 3)) text += '¬';
+ else if (!strncmp("ordf", token, 4)) text += 'ª';
+ else if (!strncmp("uml", token, 3)) text += '¨';
+ else if (!strncmp("shy", token, 3)) text += '­';
+ else if (!strncmp("macr", token, 4)) text += '¯';
continue;
}
@@ -163,152 +160,95 @@ char ThMLGBF::ProcessText(char *text, int maxlen)
intoken = false;
// process desired tokens
if (!strncmp(token, "sync type=\"Strongs\" value=\"", 27)) {
- *to++ = '<';
- *to++ = 'W';
+ text += "<W";
for (unsigned int i = 27; token[i] != '\"'; i++)
- *to++ = token[i];
- *to++ = '>';
+ text += token[i];
+ text += '>';
continue;
}
if (!strncmp(token, "sync type=\"morph\" value=\"", 25)) {
- *to++ = '<';
- *to++ = 'W';
- *to++ = 'T';
+ text += "<WT";
for (unsigned int i = 25; token[i] != '\"'; i++)
- *to++ = token[i];
- *to++ = '>';
+ text += token[i];
+ text += '>';
continue;
}
else if (!strncmp(token, "scripRef", 8)) {
- *to++ = '<';
- *to++ = 'R';
- *to++ = 'X';
- *to++ = '>';
+ text += "<RX>";
continue;
}
- else if (!strncmp(token, "/scripRef", 9)) {
- *to++ = '<';
- *to++ = 'R';
- *to++ = 'x';
- *to++ = '>';
+ else if (!strncmp(token, "/scripRef", 9)) {
+ text += "<Rx>";
continue;
}
else if (!strncmp(token, "note", 4)) {
- *to++ = '<';
- *to++ = 'R';
- *to++ = 'F';
- *to++ = '>';
+ text += "<RF>";
continue;
}
else if (!strncmp(token, "/note", 5)) {
- *to++ = '<';
- *to++ = 'R';
- *to++ = 'f';
- *to++ = '>';
+ text += "<Rf>";
continue;
}
else if (!strncmp(token, "sup", 3)) {
- *to++ = '<';
- *to++ = 'F';
- *to++ = 'S';
- *to++ = '>';
+ text += "<FS>";
}
else if (!strncmp(token, "/sup", 4)) {
- *to++ = '<';
- *to++ = 'F';
- *to++ = 's';
- *to++ = '>';
+ text += "<Fs>";
}
else if (!strnicmp(token, "font color=#ff0000", 18)) {
- *to++ = '<';
- *to++ = 'F';
- *to++ = 'R';
- *to++ = '>';
+ text += "<FR>";
continue;
}
else if (!strnicmp(token, "/font", 5)) {
- *to++ = '<';
- *to++ = 'F';
- *to++ = 'r';
- *to++ = '>';
+ text += "<Fr>";
continue;
}
else if (!strncmp(token, "div class=\"sechead\"", 19)) {
- *to++ = '<';
- *to++ = 'T';
- *to++ = 'S';
- *to++ = '>';
+ text += "<TS>";
sechead = true;
continue;
}
else if (sechead && !strncmp(token, "/div", 19)) {
- *to++ = '<';
- *to++ = 'T';
- *to++ = 's';
- *to++ = '>';
+ text += "<Ts>";
sechead = false;
continue;
}
else if (!strncmp(token, "div class=\"title\"", 19)) {
- *to++ = '<';
- *to++ = 'T';
- *to++ = 'T';
- *to++ = '>';
+ text += "<TT>";
title = true;
continue;
}
else if (title && !strncmp(token, "/div", 19)) {
- *to++ = '<';
- *to++ = 'T';
- *to++ = 't';
- *to++ = '>';
+ text += "<Tt>";
title = false;
continue;
}
else if (!strnicmp(token, "br", 2)) {
- *to++ = '<';
- *to++ = 'C';
- *to++ = 'L';
- *to++ = '>';
+ text += "<CL>";
continue;
}
else switch(*token) {
case 'I': // font tags
case 'i':
- *to++ = '<';
- *to++ = 'F';
- *to++ = 'I';
- *to++ = '>';
+ text += "<FI>";
continue;
case 'B': // bold start
case 'b':
- *to++ = '<';
- *to++ = 'F';
- *to++ = 'B';
- *to++ = '>';
+ text += "<FB>";
continue;
case '/':
switch(token[1]) {
case 'P':
case 'p':
- *to++ = '<';
- *to++ = 'C';
- *to++ = 'M';
- *to++ = '>';
+ text += "<CM>";
continue;
case 'I':
case 'i': // italic end
- *to++ = '<';
- *to++ = 'F';
- *to++ = 'i';
- *to++ = '>';
+ text += "<Fi>";
continue;
case 'B': // bold start
case 'b':
- *to++ = '<';
- *to++ = 'F';
- *to++ = 'b';
- *to++ = '>';
+ text += "<Fb>";
continue;
}
}
@@ -319,12 +259,25 @@ char ThMLGBF::ProcessText(char *text, int maxlen)
token[tokpos++] = *from;
token[tokpos+2] = 0;
}
- else *to++ = *from;
+ else text += *from;
}
- *to++ = 0;
- *to = 0;
+
+ orig = text;
+ from = orig.c_str();
+ for (text = ""; *from; from++) { //loop to remove extra spaces
+ if ((strchr(" \t\n\r", *from))) {
+ while (*(from+1) && (strchr(" \t\n\r", *(from+1)))) {
+ from++;
+ }
+ text += " ";
+ }
+ else {
+ text += *from;
+ }
+ }
+ text += (char)0;
+
return 0;
}
-
-
+SWORD_NAMESPACE_END
diff --git a/src/modules/filters/thmlheadings.cpp b/src/modules/filters/thmlheadings.cpp
index 00b8a23..bc764bb 100644
--- a/src/modules/filters/thmlheadings.cpp
+++ b/src/modules/filters/thmlheadings.cpp
@@ -1,107 +1,96 @@
/******************************************************************************
*
- * thmlheadings - SWFilter decendant to hide or show headings
+ * thmlheadings - SWFilter descendant to hide or show headings
* in a ThML module.
*/
#include <stdlib.h>
-#include <string.h>
#include <thmlheadings.h>
+#include <utilxml.h>
+
+#include <iostream>
+
#ifndef __GNUC__
#else
#include <unixstr.h>
#endif
+SWORD_NAMESPACE_START
-const char ThMLHeadings::on[] = "On";
-const char ThMLHeadings::off[] = "Off";
-const char ThMLHeadings::optName[] = "Headings";
-const char ThMLHeadings::optTip[] = "Toggles Headings On and Off if they exist";
+const char oName[] = "Headings";
+const char oTip[] = "Toggles Headings On and Off if they exist";
+const SWBuf choices[3] = {"On", "Off", ""};
+const StringList oValues(&choices[0], &choices[2]);
-ThMLHeadings::ThMLHeadings() {
- option = false;
- options.push_back(on);
- options.push_back(off);
+ThMLHeadings::ThMLHeadings() : SWOptionFilter(oName, oTip, &oValues) {
+ setOptionValue("Off");
}
ThMLHeadings::~ThMLHeadings() {
}
-void ThMLHeadings::setOptionValue(const char *ival)
-{
- option = (!stricmp(ival, on));
-}
-
-const char *ThMLHeadings::getOptionValue()
-{
- return (option) ? on:off;
-}
-char ThMLHeadings::ProcessText(char *text, int maxlen, const SWKey *key, const SWModule *module)
-{
+char ThMLHeadings::processText(SWBuf &text, const SWKey *key, const SWModule *module) {
if (!option) { // if we don't want headings
- char *to, *from, token[2048]; // cheese. Fix.
- int tokpos = 0;
+ SWBuf token;
bool intoken = false;
- int len;
bool hide = false;
- len = strlen(text) + 1; // shift string to right of buffer
- if (len < maxlen) {
- memmove(&text[maxlen - len], text, len);
- from = &text[maxlen - len];
- }
- else from = text; // -------------------------------
+ SWBuf orig = text;
+ const char *from = orig.c_str();
- for (to = text; *from; from++) {
+ for (text = ""; *from; from++) {
if (*from == '<') {
intoken = true;
- tokpos = 0;
- token[0] = 0;
- token[1] = 0;
- token[2] = 0;
+ token = "";
continue;
}
if (*from == '>') { // process tokens
intoken = false;
- if (!strnicmp(token, "div class=\"sechead\"", 19)) {
+
+ XMLTag tag(token);
+
+ if (!stricmp(tag.getName(), "div")) { //we only want a div tag
+ //std::cout << tag.toString() << " " << tag.isEndTag() << std::endl;
+
+ if (tag.getAttribute("class") && !stricmp(tag.getAttribute("class"), "sechead")) {
hide = true;
continue;
- }
- if (!strnicmp(token, "div class=\"title\"", 17)) {
+ }
+
+ if (tag.getAttribute("class") && !stricmp(tag.getAttribute("class"), "title")) {
hide = true;
continue;
- }
- else if (hide && !strnicmp(token, "/div", 4)) {
- hide = false;
- continue;
+ }
+
+ if (hide && tag.isEndTag()) {
+ hide = false;
+ continue;
+ }
+
}
// if not a heading token, keep token in text
if (!hide) {
- *to++ = '<';
- for (char *tok = token; *tok; tok++)
- *to++ = *tok;
- *to++ = '>';
+ text += '<';
+ text += token;
+ text += '>';
}
continue;
}
- if (intoken) {
- if (tokpos < 2045)
- token[tokpos++] = *from;
- token[tokpos+2] = 0;
+
+ if (intoken) { //copy token
+ token += *from;
}
- else {
- if (!hide) {
- *to++ = *from;
- }
+ else if (!hide) { //copy text which is not inside a token
+ text += *from;
}
}
- *to++ = 0;
- *to = 0;
}
return 0;
}
+
+SWORD_NAMESPACE_END
diff --git a/src/modules/filters/thmlhtml.cpp b/src/modules/filters/thmlhtml.cpp
index 9cb8679..40b3320 100644
--- a/src/modules/filters/thmlhtml.cpp
+++ b/src/modules/filters/thmlhtml.cpp
@@ -15,10 +15,11 @@
***************************************************************************/
#include <stdlib.h>
-#include <string.h>
#include <thmlhtml.h>
#include <swmodule.h>
+#include <utilxml.h>
+SWORD_NAMESPACE_START
ThMLHTML::ThMLHTML() {
setTokenStart("<");
@@ -128,84 +129,98 @@ ThMLHTML::ThMLHTML() {
*/
setTokenCaseSensitive(true);
- addTokenSubstitute("/scripRef", " </a>");
addTokenSubstitute("note", " <font color=\"#800000\"><small>(");
addTokenSubstitute("/note", ")</small></font> ");
}
-bool ThMLHTML::handleToken(char **buf, const char *token, DualStringMap &userData) {
- if (!substituteToken(buf, token)) {
- // manually process if it wasn't a simple substitution
- if (!strncmp(token, "sync type=\"Strongs\" value=\"", 27)) {
- if (token[27] == 'H' || token[27] == 'G' || token[27] == 'A') {
- pushString(buf, "<small><em>");
- for (const char *tok = token + 5; *tok; tok++)
- if(*tok != '\"')
- *(*buf)++ = *tok;
- pushString(buf, "</em></small>");
- }
- else if (token[27] == 'T') {
- pushString(buf, "<small><i>");
- for (unsigned int i = 29; token[i] != '\"'; i++)
- *(*buf)++ = token[i];
- pushString(buf, "</i></small>");
- }
- }
- else if (!strncmp(token, "sync type=\"morph\" value=\"", 25)) {
- pushString(buf, "<small><em>");
- for (unsigned int i = 25; token[i] != '\"'; i++)
- *(*buf)++ = token[i];
- pushString(buf, "</em></small>");
- }
- else if (!strncmp(token, "sync type=\"lemma\" value=\"", 25)) {
- pushString(buf, "<small><em>(");
- for (unsigned int i = 25; token[i] != '\"'; i++)
- *(*buf)++ = token[i];
- pushString(buf, ")</em></small>");
+bool ThMLHTML::handleToken(SWBuf &buf, const char *token, BasicFilterUserData *userData) {
+ if (!substituteToken(buf, token)) { // manually process if it wasn't a simple substitution
+ MyUserData *u = (MyUserData *)userData;
+ XMLTag tag(token);
+ if (!strcmp(tag.getName(), "sync")) {
+ if (tag.getAttribute("type") && tag.getAttribute("value") && !strcmp(tag.getAttribute("type"), "Strongs")) {
+ const char* value = tag.getAttribute("value");
+ if (*value == 'H' || *value == 'G' || *value == 'A') {
+ value++;
+ buf += "<small><em>";
+ buf += value;
+ buf += "</em></small>";
+ }
+ else if (*value == 'T') {
+ value += 2;
+
+ buf += "<small><i>";
+ buf += value;
+ buf += "</i></small>";
+ }
+ }
+ else if (tag.getAttribute("type") && tag.getAttribute("value") && !strcmp(tag.getAttribute("type"), "morph")) {
+ buf += "<small><em>";
+ buf += tag.getAttribute("value");
+ buf += "</em></small>";
+ }
+ else if (tag.getAttribute("type") && tag.getAttribute("value") && !strcmp(tag.getAttribute("type"), "lemma")) {
+ buf += "<small><em>(";
+ buf += tag.getAttribute("value");
+ buf += ")</em></small>";
+ }
}
- else if (!strncmp(token, "scripRef", 8)) {
- pushString(buf, "<a href=\"");
- for (const char *tok = token + 9; *tok; tok++)
- if(*tok != '\"')
- *(*buf)++ = *tok;
- *(*buf)++ = '\"';
- *(*buf)++ = '>';
+ else if (!strcmp(tag.getName(), "div")) {
+ if (tag.isEndTag() && (u->SecHead)) {
+ buf += "</i></b><br />";
+ u->SecHead = false;
+ }
+ else if (tag.getAttribute("class")) {
+ if (!strcmp(tag.getAttribute("class"), "sechead")) {
+ u->SecHead = true;
+ buf += "<br /><b><i>";
+ }
+ else if (!strcmp(tag.getAttribute("class"), "title")) {
+ u->SecHead = true;
+ buf += "<br /><b><i>";
+ }
+ }
}
- else if (!strncmp(token, "img ", 4)) {
+ else if (!strcmp(tag.getName(), "img")) {
const char *src = strstr(token, "src");
if (!src) // assert we have a src attribute
return false;
- *(*buf)++ = '<';
+ buf += '<';
for (const char *c = token; *c; c++) {
if (c == src) {
for (;((*c) && (*c != '"')); c++)
- *(*buf)++ = *c;
+ buf += *c;
if (!*c) { c--; continue; }
- *(*buf)++ = '"';
+ buf += '"';
if (*(c+1) == '/') {
- pushString(buf, "file:");
- pushString(buf, module->getConfigEntry("AbsoluteDataPath"));
- if (*((*buf)-1) == '/')
+ buf += "file:";
+ buf += userData->module->getConfigEntry("AbsoluteDataPath");
+ if (buf[buf.length()-2] == '/')
c++; // skip '/'
}
continue;
}
- *(*buf)++ = *c;
+ buf += *c;
}
- *(*buf)++ = '>';
+ buf += '>';
}
- else if(!strncmp(token, "note", 4)) {
- pushString(buf, " <font color=\"#800000\"><small>(");
- }
+ else if (!strcmp(tag.getName(), "scripRef")) { //do nothing with scrip refs, we leave them out
+ }
else {
- return false; // we still didn't handle token
+ buf += '<';
+ buf += token;
+ buf += '>';
+
+// return false; // we still didn't handle token
}
}
return true;
}
+
+SWORD_NAMESPACE_END
diff --git a/src/modules/filters/thmlhtmlhref.cpp b/src/modules/filters/thmlhtmlhref.cpp
index ce7e3fd..b94b8ae 100644
--- a/src/modules/filters/thmlhtmlhref.cpp
+++ b/src/modules/filters/thmlhtmlhref.cpp
@@ -15,255 +15,195 @@
***************************************************************************/
#include <stdlib.h>
-#include <string.h>
#include <thmlhtmlhref.h>
#include <swmodule.h>
+#include <utilxml.h>
+#include <versekey.h>
+SWORD_NAMESPACE_START
-ThMLHTMLHREF::ThMLHTMLHREF() {
- setTokenStart("<");
- setTokenEnd(">");
-/*
- setEscapeStart("&");
- setEscapeEnd(";");
- setEscapeStringCaseSensitive(true);
+ThMLHTMLHREF::MyUserData::MyUserData(const SWModule *module, const SWKey *key) : BasicFilterUserData(module, key) {
+ if (module) {
+ version = module->Name();
+ BiblicalText = (!strcmp(module->Type(), "Biblical Texts"));
+ }
+}
- addEscapeStringSubstitute("nbsp", " ");
- addEscapeStringSubstitute("quot", "\"");
- addEscapeStringSubstitute("amp", "&");
- addEscapeStringSubstitute("lt", "<");
- addEscapeStringSubstitute("gt", ">");
- addEscapeStringSubstitute("brvbar", "|");
- addEscapeStringSubstitute("sect", "§");
- addEscapeStringSubstitute("copy", "©");
- addEscapeStringSubstitute("laquo", "«");
- addEscapeStringSubstitute("reg", "®");
- addEscapeStringSubstitute("acute", "´");
- addEscapeStringSubstitute("para", "¶");
- addEscapeStringSubstitute("raquo", "»");
- addEscapeStringSubstitute("Aacute", "Á");
- addEscapeStringSubstitute("Agrave", "À");
- addEscapeStringSubstitute("Acirc", "Â");
- addEscapeStringSubstitute("Auml", "Ä");
- addEscapeStringSubstitute("Atilde", "Ã");
- addEscapeStringSubstitute("Aring", "Å");
- addEscapeStringSubstitute("aacute", "á");
- addEscapeStringSubstitute("agrave", "à");
- addEscapeStringSubstitute("acirc", "â");
- addEscapeStringSubstitute("auml", "ä");
- addEscapeStringSubstitute("atilde", "ã");
- addEscapeStringSubstitute("aring", "å");
- addEscapeStringSubstitute("Eacute", "É");
- addEscapeStringSubstitute("Egrave", "È");
- addEscapeStringSubstitute("Ecirc", "Ê");
- addEscapeStringSubstitute("Euml", "Ë");
- addEscapeStringSubstitute("eacute", "é");
- addEscapeStringSubstitute("egrave", "è");
- addEscapeStringSubstitute("ecirc", "ê");
- addEscapeStringSubstitute("euml", "ë");
- addEscapeStringSubstitute("Iacute", "Í");
- addEscapeStringSubstitute("Igrave", "Ì");
- addEscapeStringSubstitute("Icirc", "Î");
- addEscapeStringSubstitute("Iuml", "Ï");
- addEscapeStringSubstitute("iacute", "í");
- addEscapeStringSubstitute("igrave", "ì");
- addEscapeStringSubstitute("icirc", "î");
- addEscapeStringSubstitute("iuml", "ï");
- addEscapeStringSubstitute("Oacute", "Ó");
- addEscapeStringSubstitute("Ograve", "Ò");
- addEscapeStringSubstitute("Ocirc", "Ô");
- addEscapeStringSubstitute("Ouml", "Ö");
- addEscapeStringSubstitute("Otilde", "Õ");
- addEscapeStringSubstitute("oacute", "ó");
- addEscapeStringSubstitute("ograve", "ò");
- addEscapeStringSubstitute("ocirc", "ô");
- addEscapeStringSubstitute("ouml", "ö");
- addEscapeStringSubstitute("otilde", "õ");
- addEscapeStringSubstitute("Uacute", "Ú");
- addEscapeStringSubstitute("Ugrave", "Ù");
- addEscapeStringSubstitute("Ucirc", "Û");
- addEscapeStringSubstitute("Uuml", "Ü");
- addEscapeStringSubstitute("uacute", "ú");
- addEscapeStringSubstitute("ugrave", "ù");
- addEscapeStringSubstitute("ucirc", "û");
- addEscapeStringSubstitute("uuml", "ü");
- addEscapeStringSubstitute("Yacute", "Ý");
- addEscapeStringSubstitute("yacute", "ý");
- addEscapeStringSubstitute("yuml", "ÿ");
+ThMLHTMLHREF::ThMLHTMLHREF() {
+ setTokenStart("<");
+ setTokenEnd(">");
- addEscapeStringSubstitute("deg", "°");
- addEscapeStringSubstitute("plusmn", "±");
- addEscapeStringSubstitute("sup2", "²");
- addEscapeStringSubstitute("sup3", "³");
- addEscapeStringSubstitute("sup1", "¹");
- addEscapeStringSubstitute("nbsp", "º");
- addEscapeStringSubstitute("pound", "£");
- addEscapeStringSubstitute("cent", "¢");
- addEscapeStringSubstitute("frac14", "¼");
- addEscapeStringSubstitute("frac12", "½");
- addEscapeStringSubstitute("frac34", "¾");
- addEscapeStringSubstitute("iquest", "¿");
- addEscapeStringSubstitute("iexcl", "¡");
- addEscapeStringSubstitute("ETH", "Ð");
- addEscapeStringSubstitute("eth", "ð");
- addEscapeStringSubstitute("THORN", "Þ");
- addEscapeStringSubstitute("thorn", "þ");
- addEscapeStringSubstitute("AElig", "Æ");
- addEscapeStringSubstitute("aelig", "æ");
- addEscapeStringSubstitute("Oslash", "Ø");
- addEscapeStringSubstitute("curren", "¤");
- addEscapeStringSubstitute("Ccedil", "Ç");
- addEscapeStringSubstitute("ccedil", "ç");
- addEscapeStringSubstitute("szlig", "ß");
- addEscapeStringSubstitute("Ntilde", "Ñ");
- addEscapeStringSubstitute("ntilde", "ñ");
- addEscapeStringSubstitute("yen", "¥");
- addEscapeStringSubstitute("not", "¬");
- addEscapeStringSubstitute("ordf", "ª");
- addEscapeStringSubstitute("uml", "¨");
- addEscapeStringSubstitute("shy", "­");
- addEscapeStringSubstitute("macr", "¯");
-*/
setTokenCaseSensitive(true);
-
- addTokenSubstitute("note", " <font color=\"#800000\"><small>(");
- addTokenSubstitute("/note", ")</small></font> ");
+ addTokenSubstitute("scripture", "<i> ");
addTokenSubstitute("/scripture", "</i> ");
}
-bool ThMLHTMLHREF::handleToken(char **buf, const char *token, DualStringMap &userData) {
+bool ThMLHTMLHREF::handleToken(SWBuf &buf, const char *token, BasicFilterUserData *userData) {
const char *tok;
- if (!substituteToken(buf, token)) {
- // manually process if it wasn't a simple substitution
- if (!strncmp(token, "sync ", 5)) {
- pushString(buf, "<a href=\"");
- for (tok = token + 5; *(tok+1); tok++)
- if(*tok != '\"')
- *(*buf)++ = *tok;
- *(*buf)++ = '\"';
- *(*buf)++ = '>';
-
- //scan for value and add it to the buffer
- for (tok = token + 5; *tok; tok++) {
- if (!strncmp(tok, "value=\"", 7)) {
- tok += 7;
- for (;*tok != '\"'; tok++)
- *(*buf)++ = *tok;
- break;
- }
+ if (!substituteToken(buf, token)) { // manually process if it wasn't a simple substitution
+ MyUserData *u = (MyUserData *)userData;
+
+ XMLTag tag(token);
+ if ((!tag.isEndTag()) && (!tag.isEmpty()))
+ u->startTag = tag;
+
+ if (tag.getName() && !strcmp(tag.getName(), "sync")) {
+ SWBuf value = tag.getAttribute("value");
+ if (tag.getAttribute("type") && !strcmp(tag.getAttribute("type"), "morph")) { //&gt;
+ buf += "<small><em>(<a href=\"";
+ buf += "type=";
+ buf += tag.getAttribute("type");
+
+ //const char* value = tag.getAttribute("value");
+ buf += " value=";
+ buf += (value.length()) ? value.c_str() : "";
+ buf += "\">";
+ buf += (value.length()) ? value.c_str() : "";
+ buf += "</a>) </em></small>";
+ }
+ else if (tag.getAttribute("type") && !strcmp(tag.getAttribute("type"), "Strongs")) {
+ buf += "<small><em>&lt;<a href=\"";
+ buf += "type=";
+ buf += tag.getAttribute("type");
+
+ //const char* value = tag.getAttribute("value");
+ buf += " value=";
+ buf += (value.length()) ? value.c_str() : "";
+ buf += "\">";
+ value<<1;
+ buf += (value.length()) ? value.c_str() : "";
+ buf += "</a>&gt; </em></small>";
+ }
+ else if (tag.getAttribute("type") && !strcmp(tag.getAttribute("type"), "Dict")) {
+ if (!tag.isEndTag())
+ buf += "<b>";
+ else buf += "</b>";
}
- pushString(buf, "</a>");
+
}
-
- else if (!strncmp(token, "scripture ", 10)) {
- userData["inscriptRef"] = "true";
- pushString(buf, "<i>");
- }
-
- else if (!strncmp(token, "scripRef p", 10) || !strncmp(token, "scripRef v", 10)) {
- userData["inscriptRef"] = "true";
- pushString(buf, "<a href=\"");
- for (const char *tok = token + 9; *(tok+1); tok++)
- if(*tok != '\"')
- *(*buf)++ = *tok;
- *(*buf)++ = '\"';
- *(*buf)++ = '>';
- }
-
- // we're starting a scripRef like "<scripRef>John 3:16</scripRef>"
- else if (!strcmp(token, "scripRef")) {
- userData["inscriptRef"] = "false";
- // let's stop text from going to output
- userData["suspendTextPassThru"] = "true";
+ // <note> tag
+ else if (!strcmp(tag.getName(), "note")) {
+ if (!tag.isEndTag()) {
+ if (!tag.isEmpty()) {
+ SWBuf type = tag.getAttribute("type");
+ SWBuf footnoteNumber = tag.getAttribute("swordFootnote");
+ VerseKey *vkey;
+ // see if we have a VerseKey * or descendant
+ try {
+ vkey = SWDYNAMIC_CAST(VerseKey, u->key);
+ }
+ catch ( ... ) { }
+ if (vkey) {
+ // leave this special osis type in for crossReference notes types? Might thml use this some day? Doesn't hurt.
+ char ch = ((tag.getAttribute("type") && ((!strcmp(tag.getAttribute("type"), "crossReference")) || (!strcmp(tag.getAttribute("type"), "x-cross-ref")))) ? 'x':'n');
+ buf.appendFormatted("<a href=\"noteID=%s.%c.%s\"><small><sup>*%c</sup></small></a> ", vkey->getText(), ch, footnoteNumber.c_str(), ch);
+ }
+ u->suspendTextPassThru = true;
+ }
+ }
+ if (tag.isEndTag()) {
+ u->suspendTextPassThru = false;
+ }
}
-
- // we've ended a scripRef
- else if (!strcmp(token, "/scripRef")) {
- if (userData["inscriptRef"] == "true") { // like "<scripRef passage="John 3:16">John 3:16</scripRef>"
- userData["inscriptRef"] = "false";
- pushString(buf, "</a>");
+ // <scripRef> tag
+ else if (!strcmp(tag.getName(), "scripRef")) {
+ if (!tag.isEndTag()) {
+ if (!tag.isEmpty()) {
+ u->suspendTextPassThru = true;
+ }
}
-
- else { // like "<scripRef>John 3:16</scripRef>"
- pushString(buf, "<a href=\"passage=");
- //char *strbuf = (char *)userData["lastTextNode"].c_str();
- pushString(buf, userData["lastTextNode"].c_str());
- *(*buf)++ = '\"';
- *(*buf)++ = '>';
- pushString(buf, userData["lastTextNode"].c_str());
+ if (tag.isEndTag()) { // </scripRef>
+ if (!u->BiblicalText) {
+ SWBuf refList = u->startTag.getAttribute("passage");
+ if (!refList.length())
+ refList = u->lastTextNode;
+ SWBuf version = tag.getAttribute("version");
+ buf += "&nbsp<a href=\"";
+ if (version.length()) {
+ buf += "version=";
+ buf += version;
+ buf += " ";
+ }
+ buf += "passage=";
+ buf += refList.c_str();
+ buf += "\">";
+ buf += u->lastTextNode.c_str();
+ buf += "</a>&nbsp";
+ }
+ else {
+ SWBuf footnoteNumber = u->startTag.getAttribute("swordFootnote");
+ VerseKey *vkey;
+ // see if we have a VerseKey * or descendant
+ try {
+ vkey = SWDYNAMIC_CAST(VerseKey, u->key);
+ }
+ catch ( ... ) {}
+ if (vkey) {
+ // leave this special osis type in for crossReference notes types? Might thml use this some day? Doesn't hurt.
+ buf.appendFormatted("<a href=\"noteID=%s.x.%s\"><small><sup>*x</sup></small></a> ", vkey->getText(), footnoteNumber.c_str());
+ }
+ }
+
// let's let text resume to output again
- userData["suspendTextPassThru"] = "false";
- pushString(buf, "</a>");
+ u->suspendTextPassThru = false;
}
}
-
- else if (!strncmp(token, "div class=\"sechead\"", 19)) {
- userData["SecHead"] = "true";
- pushString(buf, "<br /><b><i>");
- }
- else if (!strncmp(token, "div class=\"title\"", 19)) {
- userData["SecHead"] = "true";
- pushString(buf, "<br /><b><i>");
- }
- else if (!strncmp(token, "/div", 4)) {
- if (userData["SecHead"] == "true") {
- pushString(buf, "</i></b><br />");
- userData["SecHead"] = "false";
+ else if (tag.getName() && !strcmp(tag.getName(), "div")) {
+ if (tag.isEndTag() && u->SecHead) {
+ buf += "</i></b><br />";
+ u->SecHead = false;
+ }
+ else if (tag.getAttribute("class")) {
+ if (!stricmp(tag.getAttribute("class"), "sechead")) {
+ u->SecHead = true;
+ buf += "<br /><b><i>";
+ }
+ else if (!stricmp(tag.getAttribute("class"), "title")) {
+ u->SecHead = true;
+ buf += "<br /><b><i>";
+ }
}
}
-
- else if (!strncmp(token, "sync type=\"Strongs\" value=\"T", 28)) {
- pushString(buf, "<a href=\"");
- for (tok = token + 5; *(tok+1); tok++)
- if(*tok != '\"')
- *(*buf)++ = *tok;
- *(*buf)++ = '\"';
- *(*buf)++ = '>';
- for (tok = token + 29; *(tok+2); tok++)
- if(*tok != '\"')
- *(*buf)++ = *tok;
- pushString(buf, "</a>");
- }
- else if (!strncmp(token, "img ", 4)) {
+ else if (tag.getName() && (!strcmp(tag.getName(), "img") || !strcmp(tag.getName(), "image"))) {
const char *src = strstr(token, "src");
if (!src) // assert we have a src attribute
return false;
- *(*buf)++ = '<';
+ buf += '<';
for (const char *c = token; *c; c++) {
if (c == src) {
for (;((*c) && (*c != '"')); c++)
- *(*buf)++ = *c;
+ buf += *c;
if (!*c) { c--; continue; }
- *(*buf)++ = '"';
+ buf += '"';
if (*(c+1) == '/') {
- pushString(buf, "file:");
- pushString(buf, module->getConfigEntry("AbsoluteDataPath"));
- if (*((*buf)-1) == '/')
+ buf += "file:";
+ buf += userData->module->getConfigEntry("AbsoluteDataPath");
+ if (buf[buf.length()-2] == '/')
c++; // skip '/'
}
continue;
}
- *(*buf)++ = *c;
+ buf += *c;
}
- *(*buf)++ = '>';
+ buf += '>';
}
- else if (!strncmp(token, "note", 4)) {
- pushString(buf, " <small><font color=\"#800000\">(");
- }
else {
- *(*buf)++ = '<';
- for (const char *tok = token; *tok; tok++)
- *(*buf)++ = *tok;
- *(*buf)++ = '>';
+ buf += '<';
+ /*for (const char *tok = token; *tok; tok++)
+ buf += *tok;*/
+ buf += token;
+ buf += '>';
//return false; // we still didn't handle token
}
}
return true;
}
+
+SWORD_NAMESPACE_END
diff --git a/src/modules/filters/thmllemma.cpp b/src/modules/filters/thmllemma.cpp
index 33856db..02939df 100644
--- a/src/modules/filters/thmllemma.cpp
+++ b/src/modules/filters/thmllemma.cpp
@@ -1,97 +1,69 @@
/******************************************************************************
*
- * thmllemma - SWFilter decendant to hide or show lemmas
+ * thmllemma - SWFilter descendant to hide or show lemmas
* in a ThML module.
*/
#include <stdlib.h>
-#include <string.h>
#include <thmllemma.h>
#ifndef __GNUC__
#else
#include <unixstr.h>
#endif
+SWORD_NAMESPACE_START
-const char ThMLLemma::on[] = "On";
-const char ThMLLemma::off[] = "Off";
-const char ThMLLemma::optName[] = "Lemmas";
-const char ThMLLemma::optTip[] = "Toggles Lemmas On and Off if they exist";
+const char oName[] = "Lemmas";
+const char oTip[] = "Toggles Lemmas On and Off if they exist";
-ThMLLemma::ThMLLemma() {
- option = false;
- options.push_back(on);
- options.push_back(off);
+const SWBuf choices[3] = {"On", "Off", ""};
+const StringList oValues(&choices[0], &choices[2]);
+
+ThMLLemma::ThMLLemma() : SWOptionFilter(oName, oTip, &oValues) {
+ setOptionValue("Off");
}
ThMLLemma::~ThMLLemma() {
}
-void ThMLLemma::setOptionValue(const char *ival)
-{
- option = (!stricmp(ival, on));
-}
-
-const char *ThMLLemma::getOptionValue()
-{
- return (option) ? on:off;
-}
-char ThMLLemma::ProcessText(char *text, int maxlen, const SWKey *key, const SWModule *module)
-{
+char ThMLLemma::processText(SWBuf &text, const SWKey *key, const SWModule *module) {
if (!option) { // if we don't want lemmas
- char *to, *from, token[2048]; // cheese. Fix.
- int tokpos = 0;
bool intoken = false;
- int len;
- bool lastspace = false;
- len = strlen(text) + 1; // shift string to right of buffer
- if (len < maxlen) {
- memmove(&text[maxlen - len], text, len);
- from = &text[maxlen - len];
- }
- else from = text; // -------------------------------
-
- for (to = text; *from; from++) {
+ SWBuf token;
+ SWBuf orig = text;
+ const char *from = orig.c_str();
+ for (text = ""; *from; from++) {
if (*from == '<') {
intoken = true;
- tokpos = 0;
- token[0] = 0;
- token[1] = 0;
- token[2] = 0;
+ token = "";
continue;
}
- if (*from == '>') { // process tokens
+ else if (*from == '>') { // process tokens
intoken = false;
- if (!strnicmp(token, "sync type=\"lemma\" ", 18)) { // Lemma
- if ((from[1] == ' ') || (from[1] == ',') || (from[1] == ';') || (from[1] == '.') || (from[1] == '?') || (from[1] == '!') || (from[1] == ')') || (from[1] == '\'') || (from[1] == '\"')) {
- if (lastspace)
- to--;
- }
- continue;
+ if (!strnicmp(token.c_str(), "sync", 4) && strstr(token.c_str(), " type=\"lemma\"")) { // Lemma
+ continue;
}
+
// if not a lemma token, keep token in text
- *to++ = '<';
- for (char *tok = token; *tok; tok++)
- *to++ = *tok;
- *to++ = '>';
+ text += '<';
+ text.append(token);
+ text += '>';
continue;
}
+
if (intoken) {
- if (tokpos < 2045)
- token[tokpos++] = *from;
- token[tokpos+2] = 0;
- }
- else {
- *to++ = *from;
- lastspace = (*from == ' ');
+ token += *from;
}
+ else {
+ text += *from;
+ }
}
- *to++ = 0;
- *to = 0;
}
return 0;
}
+
+SWORD_NAMESPACE_END
diff --git a/src/modules/filters/thmlmorph.cpp b/src/modules/filters/thmlmorph.cpp
index f95bede..606ae7e 100644
--- a/src/modules/filters/thmlmorph.cpp
+++ b/src/modules/filters/thmlmorph.cpp
@@ -1,98 +1,69 @@
/******************************************************************************
*
- * thmlmorph - SWFilter decendant to hide or show morph tags
+ * thmlmorph - SWFilter descendant to hide or show morph tags
* in a ThML module.
*/
#include <stdlib.h>
-#include <string.h>
#include <thmlmorph.h>
#ifndef __GNUC__
#else
#include <unixstr.h>
#endif
+SWORD_NAMESPACE_START
-const char ThMLMorph::on[] = "On";
-const char ThMLMorph::off[] = "Off";
-const char ThMLMorph::optName[] = "Morphological Tags";
-const char ThMLMorph::optTip[] = "Toggles Morphological Tags On and Off if they exist";
+const char oName[] = "Morphological Tags";
+const char oTip[] = "Toggles Morphological Tags On and Off if they exist";
+const SWBuf choices[3] = {"On", "Off", ""};
+const StringList oValues(&choices[0], &choices[2]);
-ThMLMorph::ThMLMorph() {
- option = false;
- options.push_back(on);
- options.push_back(off);
+ThMLMorph::ThMLMorph() : SWOptionFilter(oName, oTip, &oValues) {
+ setOptionValue("Off");
}
ThMLMorph::~ThMLMorph() {
}
-void ThMLMorph::setOptionValue(const char *ival)
-{
- option = (!stricmp(ival, on));
-}
-
-const char *ThMLMorph::getOptionValue()
-{
- return (option) ? on:off;
-}
-char ThMLMorph::ProcessText(char *text, int maxlen, const SWKey *key, const SWModule *module)
-{
+char ThMLMorph::processText(SWBuf &text, const SWKey *key, const SWModule *module) {
if (!option) { // if we don't want morph tags
- char *to, *from, token[2048]; // cheese. Fix.
- int tokpos = 0;
bool intoken = false;
- int len;
- bool lastspace = false;
-
- len = strlen(text) + 1; // shift string to right of buffer
- if (len < maxlen) {
- memmove(&text[maxlen - len], text, len);
- from = &text[maxlen - len];
- }
- else from = text; // -------------------------------
- for (to = text; *from; from++) {
+ SWBuf token;
+ SWBuf orig = text;
+ const char *from = orig.c_str();
+ for (text = ""; *from; from++) {
if (*from == '<') {
intoken = true;
- tokpos = 0;
- token[0] = 0;
- token[1] = 0;
- token[2] = 0;
+ token = "";
continue;
}
if (*from == '>') { // process tokens
intoken = false;
- if (!strnicmp(token, "sync type=\"morph\" ", 18)) { // Morph
- if ((from[1] == ' ') || (from[1] == ',') || (from[1] == ';') || (from[1] == '.') || (from[1] == '?') || (from[1] == '!') || (from[1] == ')') || (from[1] == '\'') || (from[1] == '\"')) {
- if (lastspace)
- to--;
- }
+ if (!strncmp(token.c_str(), "sync ", 5) && strstr(token.c_str(), "type=\"morph\"")) { // Morph
continue;
}
+
// if not a morph tag token, keep token in text
- *to++ = '<';
- for (char *tok = token; *tok; tok++)
- *to++ = *tok;
- *to++ = '>';
+ text += '<';
+ text += token;
+ text += '>';
continue;
}
+
if (intoken) {
- if (tokpos < 2045)
- token[tokpos++] = *from;
- token[tokpos+2] = 0;
+ token += *from;
}
else {
- *to++ = *from;
- lastspace = (*from == ' ');
+ text += *from;
}
}
- *to++ = 0;
- *to = 0;
}
return 0;
}
+
+SWORD_NAMESPACE_END
diff --git a/src/modules/filters/thmlolb.cpp b/src/modules/filters/thmlolb.cpp
deleted file mode 100644
index 2b31fab..0000000
--- a/src/modules/filters/thmlolb.cpp
+++ /dev/null
@@ -1,243 +0,0 @@
-/***************************************************************************
- thmlolb.cpp - ThML to OLB filter
- -------------------
- begin : 2001-05-10
- copyright : 2001 by CrossWire Bible Society
- ***************************************************************************/
-
-/***************************************************************************
- * *
- * This program is free software; you can redistribute it and/or modify *
- * it under the terms of the GNU General Public License as published by *
- * the Free Software Foundation; either version 2 of the License, or *
- * (at your option) any later version. *
- * *
- ***************************************************************************/
-
-#include <stdlib.h>
-#include <string.h>
-#include <thmlolb.h>
-
-
-ThMLOLB::ThMLOLB()
-{
-}
-
-
-char ThMLOLB::ProcessText(char *text, int maxlen)
-{
- char *to, *from, token[2048];
- int tokpos = 0;
- bool intoken = false;
- int len;
- bool ampersand = false;
- int i;
-
- len = strlen(text) + 1; // shift string to right of buffer
- if (len < maxlen) {
- memmove(&text[maxlen - len], text, len);
- from = &text[maxlen - len];
- }
- else from = text; // -------------------------------
- for (to = text; *from; from++)
- {
- if (*from == '<') {
- intoken = true;
- tokpos = 0;
- memset(token, 0, 2048);
- ampersand = false;
- continue;
- }
- else if (*from == '&') {
- intoken = true;
- tokpos = 0;
- memset(token, 0, 2048);
- ampersand = true;
- continue;
- }
- if (*from == ';' && ampersand) {
- intoken = false;
-
- if (!strncmp("nbsp", token, 4)) *to++ = ' ';
- else if (!strncmp("quot", token, 4)) *to++ = '"';
- else if (!strncmp("amp", token, 3)) *to++ = '&';
- else if (!strncmp("lt", token, 2)) *to++ = '<';
- else if (!strncmp("gt", token, 2)) *to++ = '>';
- else if (!strncmp("brvbar", token, 6)) *to++ = '|';
- else if (!strncmp("sect", token, 4)) *to++ = '§';
- else if (!strncmp("copy", token, 4)) *to++ = '©';
- else if (!strncmp("laquo", token, 5)) *to++ = '«';
- else if (!strncmp("reg", token, 3)) *to++ = '®';
- else if (!strncmp("acute", token, 5)) *to++ = '´';
- else if (!strncmp("para", token, 4)) *to++ = '¶';
- else if (!strncmp("raquo", token, 5)) *to++ = '»';
-
- else if (!strncmp("Aacute", token, 6)) *to++ = 'Á';
- else if (!strncmp("Agrave", token, 6)) *to++ = 'À';
- else if (!strncmp("Acirc", token, 5)) *to++ = 'Â';
- else if (!strncmp("Auml", token, 4)) *to++ = 'Ä';
- else if (!strncmp("Atilde", token, 6)) *to++ = 'Ã';
- else if (!strncmp("Aring", token, 5)) *to++ = 'Å';
- else if (!strncmp("aacute", token, 6)) *to++ = 'á';
- else if (!strncmp("agrave", token, 6)) *to++ = 'à';
- else if (!strncmp("acirc", token, 5)) *to++ = 'â';
- else if (!strncmp("auml", token, 4)) *to++ = 'ä';
- else if (!strncmp("atilde", token, 6)) *to++ = 'ã';
- else if (!strncmp("aring", token, 5)) *to++ = 'å';
- else if (!strncmp("Eacute", token, 6)) *to++ = 'É';
- else if (!strncmp("Egrave", token, 6)) *to++ = 'È';
- else if (!strncmp("Ecirc", token, 5)) *to++ = 'Ê';
- else if (!strncmp("Euml", token, 4)) *to++ = 'Ë';
- else if (!strncmp("eacute", token, 6)) *to++ = 'é';
- else if (!strncmp("egrave", token, 6)) *to++ = 'è';
- else if (!strncmp("ecirc", token, 5)) *to++ = 'ê';
- else if (!strncmp("euml", token, 4)) *to++ = 'ë';
- else if (!strncmp("Iacute", token, 6)) *to++ = 'Í';
- else if (!strncmp("Igrave", token, 6)) *to++ = 'Ì';
- else if (!strncmp("Icirc", token, 5)) *to++ = 'Î';
- else if (!strncmp("Iuml", token, 4)) *to++ = 'Ï';
- else if (!strncmp("iacute", token, 6)) *to++ = 'í';
- else if (!strncmp("igrave", token, 6)) *to++ = 'ì';
- else if (!strncmp("icirc", token, 5)) *to++ = 'î';
- else if (!strncmp("iuml", token, 4)) *to++ = 'ï';
- else if (!strncmp("Oacute", token, 6)) *to++ = 'Ó';
- else if (!strncmp("Ograve", token, 6)) *to++ = 'Ò';
- else if (!strncmp("Ocirc", token, 5)) *to++ = 'Ô';
- else if (!strncmp("Ouml", token, 4)) *to++ = 'Ö';
- else if (!strncmp("Otilde", token, 6)) *to++ = 'Õ';
- else if (!strncmp("oacute", token, 6)) *to++ = 'ó';
- else if (!strncmp("ograve", token, 6)) *to++ = 'ò';
- else if (!strncmp("ocirc", token, 5)) *to++ = 'ô';
- else if (!strncmp("ouml", token, 4)) *to++ = 'ö';
- else if (!strncmp("otilde", token, 6)) *to++ = 'õ';
- else if (!strncmp("Uacute", token, 6)) *to++ = 'Ú';
- else if (!strncmp("Ugrave", token, 6)) *to++ = 'Ù';
- else if (!strncmp("Ucirc", token, 5)) *to++ = 'Û';
- else if (!strncmp("Uuml", token, 4)) *to++ = 'Ü';
- else if (!strncmp("uacute", token, 6)) *to++ = 'ú';
- else if (!strncmp("ugrave", token, 6)) *to++ = 'ù';
- else if (!strncmp("ucirc", token, 5)) *to++ = 'û';
- else if (!strncmp("uuml", token, 4)) *to++ = 'ü';
- else if (!strncmp("Yacute", token, 6)) *to++ = 'Ý';
- else if (!strncmp("yacute", token, 6)) *to++ = 'ý';
- else if (!strncmp("yuml", token, 4)) *to++ = 'ÿ';
-
- else if (!strncmp("deg", token, 3)) *to++ = '°';
- else if (!strncmp("plusmn", token, 6)) *to++ = '±';
- else if (!strncmp("sup2", token, 4)) *to++ = '²';
- else if (!strncmp("sup3", token, 4)) *to++ = '³';
- else if (!strncmp("sup1", token, 4)) *to++ = '¹';
- else if (!strncmp("nbsp", token, 4)) *to++ = 'º';
- else if (!strncmp("pound", token, 5)) *to++ = '£';
- else if (!strncmp("cent", token, 4)) *to++ = '¢';
- else if (!strncmp("frac14", token, 6)) *to++ = '¼';
- else if (!strncmp("frac12", token, 6)) *to++ = '½';
- else if (!strncmp("frac34", token, 6)) *to++ = '¾';
- else if (!strncmp("iquest", token, 6)) *to++ = '¿';
- else if (!strncmp("iexcl", token, 5)) *to++ = '¡';
- else if (!strncmp("ETH", token, 3)) *to++ = 'Ð';
- else if (!strncmp("eth", token, 3)) *to++ = 'ð';
- else if (!strncmp("THORN", token, 5)) *to++ = 'Þ';
- else if (!strncmp("thorn", token, 5)) *to++ = 'þ';
- else if (!strncmp("AElig", token, 5)) *to++ = 'Æ';
- else if (!strncmp("aelig", token, 5)) *to++ = 'æ';
- else if (!strncmp("Oslash", token, 6)) *to++ = 'Ø';
- else if (!strncmp("curren", token, 6)) *to++ = '¤';
- else if (!strncmp("Ccedil", token, 6)) *to++ = 'Ç';
- else if (!strncmp("ccedil", token, 6)) *to++ = 'ç';
- else if (!strncmp("szlig", token, 5)) *to++ = 'ß';
- else if (!strncmp("Ntilde", token, 6)) *to++ = 'Ñ';
- else if (!strncmp("ntilde", token, 6)) *to++ = 'ñ';
- else if (!strncmp("yen", token, 3)) *to++ = '¥';
- else if (!strncmp("not", token, 3)) *to++ = '¬';
- else if (!strncmp("ordf", token, 4)) *to++ = 'ª';
- else if (!strncmp("uml", token, 3)) *to++ = '¨';
- else if (!strncmp("shy", token, 3)) *to++ = '­';
- else if (!strncmp("macr", token, 4)) *to++ = '¯';
- continue;
-
- }
- else if (*from == '>' && !ampersand)
- {
- intoken = false;
- // process desired tokens
- if (!strncmp(token, "sync type=\"Strongs\" value=\"G", 27)) {
- *to++ = '<';
- for (i = 28; token[i] != '\"'; i++)
- *to++ = token[i];
- *to++ = '>';
- continue;
- }
- else if (!strncmp(token, "sync type=\"Strongs\" value=\"H", 27)) {
- *to++ = '<';
- for (i = 28; token[i] != '\"'; i++)
- *to++ = token[i];
- *to++ = '>';
- continue;
- }
- else if (!strncmp(token, "scripRef", 8)) {
- *to++ = '#';
- continue;
- }
- else if (!strncmp(token, "/scripRef", 9)) {
- *to++ = ' ';
- continue;
- }
- else if (!strncmp(token, "note ", 5)) {
- *to++ = '{';
- continue;
- }
- else if (!strncmp(token, "/note", 5)) {
- *to++ = '}';
- continue;
- }
- else if (!strnicmp(token, "font", 4)) {
- *to++ = '\\';
- *to++ = '\\';
- continue;
- }
- else if (!strnicmp(token, "/font", 5)) {
- *to++ = '\\';
- *to++ = '\\';
- continue;
- }
- else switch(*token) {
- case 'I': // font tags
- case 'i':
- *to++ = '\\';
- *to++ = '@';
- continue;
- case 'B': // bold start
- case 'b':
- *to++ = '\\';
- *to++ = '$';
- continue;
- case '/':
- switch(token[1]) {
- case 'I':
- case 'i': // italic end
- *to++ = '\\';
- *to++ = '@';
- continue;
- case 'B': // bold start
- case 'b':
- *to++ = '\\';
- *to++ = '$';
- continue;
- }
- }
- continue;
- }
- if (intoken) {
- if (tokpos < 2047)
- token[tokpos++] = *from;
- }
- else *to++ = *from;
- }
- *to++ = 0;
- *to = 0;
- return 0;
-}
-
-
-
diff --git a/src/modules/filters/thmlosis.cpp b/src/modules/filters/thmlosis.cpp
new file mode 100644
index 0000000..7208610
--- /dev/null
+++ b/src/modules/filters/thmlosis.cpp
@@ -0,0 +1,385 @@
+/******************************************************************************
+ *
+ * thmlstrongs - SWFilter descendant to hide or show strongs number
+ * in a ThML module.
+ */
+
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <stdarg.h>
+#include <thmlosis.h>
+#include <swmodule.h>
+#include <swlog.h>
+#include <versekey.h>
+#include <stdarg.h>
+#ifndef __GNUC__
+#else
+#include <unixstr.h>
+#endif
+
+SWORD_NAMESPACE_START
+
+ThMLOSIS::ThMLOSIS() {
+}
+
+
+ThMLOSIS::~ThMLOSIS() {
+}
+
+
+char ThMLOSIS::processText(SWBuf &text, const SWKey *key, const SWModule *module) {
+ /*
+
+ const char *from;
+ char token[2048]; // cheese. Fix.
+ int tokpos = 0;
+ bool intoken = false;
+ int len;
+ bool lastspace = false;
+ int word = 1;
+ char val[128];
+ char buf[128];
+ char wordstr[5];
+ char *valto;
+ char *ch;
+ char *textStart, *textEnd;
+ char *wordStart, *wordEnd;
+ bool newText = false;
+ bool newWord = false;
+ SWBuf tmp;
+ bool suspendTextPassThru = false;
+ bool keepToken = false;
+ bool handled = false;
+ SWBuf divEnd = "";
+
+
+ wordStart = text;
+
+ SWBuf orig = text;
+ from = orig.c_str();
+
+ for (text = ""; *from; from++) {
+
+ if (*from == '<') {
+ intoken = true;
+ tokpos = 0;
+ token[0] = 0;
+ token[1] = 0;
+ token[2] = 0;
+ textEnd = from-1;
+ wordEnd = to;
+ continue;
+ }
+ if (*from == '>') { // process tokens
+ intoken = false;
+ keepToken = false;
+ suspendTextPassThru = false;
+ newWord = true;
+ handled = false;
+
+ while (wordStart < (text+maxlen)) {
+// if (strchr(" ,;.?!()'\"", *wordStart))
+ if (strchr(";,: .?!()'\"", *wordStart))
+ wordStart++;
+ else break;
+ }
+ while (wordEnd > wordStart) {
+ if (strchr(" ,;:.?!()'\"", *wordEnd))
+ wordEnd--;
+ else break;
+ }
+
+ // section titles
+ if (!strcmp(token, "div class=\"sechead\"")) {
+ pushString(&to, "<title>");
+ divEnd = "</title>";
+ newText = true;
+ lastspace = false;
+ handled = true;
+ }
+ else if (!strcmp(token, "/div")) {
+ pushString(&to, divEnd.c_str());
+ lastspace = false;
+ handled = true;
+ }
+ // Scripture Reference
+ if (!strncmp(token, "scripRef", 8)) {
+ // pushString(buf, "<reference osisRef=\"");
+ suspendTextPassThru = true;
+ newText = true;
+ handled = true;
+ }
+ else if (!strncmp(token, "/scripRef", 9)) {
+ tmp = "";
+ tmp.append(textStart, (int)(textEnd - textStart)+1);
+ pushString(&to, convertToOSIS(tmp.c_str(), key));
+ suspendTextPassThru = false;
+ handled = true;
+ }
+// Usage of italics to represent transChange isn't domaninant;
+// solution: mark in OSIS instead, assume no semantics other than emphasis
+// of italicized text
+// if (!strcmp(module->Type(), "Biblical Texts")) {
+// // Italics assume transchange for Biblical texts
+// if (!stricmp(token, "i")) {
+// pushString(&to, "<transChange type=\"added\">");
+// newText = true;
+// lastspace = false;
+// handled = true;
+// }
+// else if (!stricmp(token, "/i")) {
+// pushString(&to, "</transChange>");
+// lastspace = false;
+// handled = true;
+// }
+// }
+// else {
+// // otherwise, italics are just italics
+//-- end italics for transchange
+ if (!stricmp(token, "i")) {
+ pushString(&to, "<hi type=\"i\">");
+ newText = true;
+ lastspace = false;
+ handled = true;
+ }
+ else if (!stricmp(token, "/i")) {
+ pushString(&to, "</hi>");
+ lastspace = false;
+ handled = true;
+ }
+// }
+
+ if (!strcmp(token, "b")) {
+ pushString(&to, "<hi type=\"b\">");
+ newText = true;
+ lastspace = false;
+ handled = true;
+ }
+ else if (!strcmp(token, "/b")) {
+ pushString(&to, "</hi>");
+ lastspace = false;
+ handled = true;
+ }
+
+ // Footnote
+ if (!strcmp(token, "note")) {
+ pushString(&to, "<note>");
+ newText = true;
+ lastspace = false;
+ handled = true;
+ }
+ else if (!strcmp(token, "/note")) {
+ pushString(&to, "</note>");
+ lastspace = false;
+ handled = true;
+ }
+
+ // Figure
+ else if (!strncmp(token, "img ", 4)) {
+ const char *src = strstr(token, "src");
+ if (!src) // assert we have a src attribute
+ continue;
+// return false;
+
+ pushString(&to, "<figure src=\"");
+ const char *c;
+ for (c = src;((*c) && (*c != '"')); c++);
+
+// uncomment for SWORD absolute path logic
+// if (*(c+1) == '/') {
+// pushString(buf, "file:");
+// pushString(buf, module->getConfigEntry("AbsoluteDataPath"));
+// if (*((*buf)-1) == '/')
+// c++; // skip '/'
+// }
+// end of uncomment for asolute path logic
+
+ for (c++;((*c) && (*c != '"')); c++)
+ *to++ = *c;
+
+ pushString(&to, "\" />");
+ handled = true;
+ }
+
+ // Strongs numbers
+ else if (!strnicmp(token, "sync type=\"Strongs\" ", 20)) { // Strongs
+ if (module->isProcessEntryAttributes()) {
+ valto = val;
+ for (unsigned int i = 27; token[i] != '\"' && i < 150; i++)
+ *valto++ = token[i];
+ *valto = 0;
+ if (atoi((!isdigit(*val))?val+1:val) < 5627) {
+ // normal strongs number
+ strstrip(val);
+ sprintf(buf, "<w lemma=\"x-Strong:%s\">", val);
+ memmove(wordStart+strlen(buf), wordStart, (to-wordStart)+1);
+ memcpy(wordStart, buf, strlen(buf));
+ to+=strlen(buf);
+ pushString(&to, "</w>");
+ module->getEntryAttributes()["Word"][wordstr]["Strongs"] = val;
+// tmp = "";
+// tmp.append(textStart, (int)(wordEnd - wordStart));
+// module->getEntryAttributes()["Word"][wordstr]["Text"] = tmp;
+ }
+ else {
+ // verb morph
+ sprintf(wordstr, "%03d", word-1);
+ module->getEntryAttributes()["Word"][wordstr]["Morph"] = val;
+ }
+ }
+ handled = true;
+ }
+
+ // Morphology
+ else if (!strncmp(token, "sync type=\"morph\"", 17)) {
+ for (ch = token+17; *ch; ch++) {
+ if (!strncmp(ch, "class=\"", 7)) {
+ valto = val;
+ for (unsigned int i = 7; ch[i] != '\"' && i < 127; i++)
+ *valto++ = ch[i];
+ *valto = 0;
+ sprintf(wordstr, "%03d", word-1);
+ strstrip(val);
+ module->getEntryAttributes()["Word"][wordstr]["MorphClass"] = val;
+ }
+ if (!strncmp(ch, "value=\"", 7)) {
+ valto = val;
+ for (unsigned int i = 7; ch[i] != '\"' && i < 127; i++)
+ *valto++ = ch[i];
+ *valto = 0;
+ sprintf(wordstr, "%03d", word-1);
+ strstrip(val);
+ module->getEntryAttributes()["Word"][wordstr]["Morph"] = val;
+ }
+ }
+ if (!strncmp(wordStart, "<w ", 3)) {
+
+ const char *cls = "Unknown", *morph;
+
+ if (module->getEntryAttributes()["Word"][wordstr]["Morph"].size() > 0) {
+ if (module->getEntryAttributes()["Word"][wordstr]["MorphClass"].size() > 0)
+ cls = module->getEntryAttributes()["Word"][wordstr]["MorphClass"].c_str();
+ morph = module->getEntryAttributes()["Word"][wordstr]["Morph"].c_str();
+
+ sprintf(buf, "morph=\"x-%s:%s\" ", cls, morph);
+ memmove(wordStart+3+strlen(buf), wordStart+3, (to-wordStart)+1);
+ memcpy(wordStart+3, buf, strlen(buf));
+ to+=strlen(buf);
+ }
+ }
+ handled = true;
+ }
+
+ if (!keepToken) { // if we don't want strongs
+ if (!handled) {
+ SWLog::systemlog->LogError("Unprocessed Token: <%s>", token);
+// exit(-1);
+ }
+ if (strchr(" ,:;.?!()'\"", from[1])) {
+ if (lastspace)
+ to--;
+ }
+ if (newText) {textStart = from+1; newText = false; }
+// if (newWord) {wordStart = to; newWord = false; }
+ continue;
+ }
+ // if not a strongs token, keep token in text
+ *to++ = '<';
+ for (char *tok = token; *tok; tok++)
+ *to++ = *tok;
+ *to++ = '>';
+ if (newText) {textStart = to; newWord = false; }
+// if (newWord) {wordStart = to; newWord = false; }
+ continue;
+ }
+ if (intoken) {
+ if ((tokpos < 2045) && ((*from != 10)&&(*from != 13))) {
+ token[tokpos++] = *from;
+ token[tokpos+2] = 0;
+ }
+ }
+ else {
+ if (newWord && (*from != ' ')) {wordStart = to; newWord = false; memset(to, 0, 10); }
+ if (!suspendTextPassThru) {
+ *to++ = *from;
+ lastspace = (*from == ' ');
+ }
+ }
+ }
+
+ VerseKey *vkey = SWDYNAMIC_CAST(VerseKey, key);
+ if (vkey) {
+ char ref[254];
+ if (vkey->Verse())
+ sprintf(ref, "<verse osisID=\"%s\">", vkey->getOSISRef());
+ else *ref = 0;
+ if (*ref) {
+ memmove(text+strlen(ref), text, maxlen-strlen(ref)-1);
+ memcpy(text, ref, strlen(ref));
+ to+=strlen(ref);
+ if (vkey->Verse()) {
+ VerseKey tmp;
+ tmp = *vkey;
+ tmp.AutoNormalize(0);
+ tmp.Headings(1);
+ sprintf(ref, "</verse>");
+ pushString(&to, ref);
+ tmp = MAXVERSE;
+ if (*vkey == tmp) {
+ tmp.Verse(0);
+// sprintf(ref, "\t</div>");
+// pushString(&to, ref);
+ tmp = MAXCHAPTER;
+ tmp = MAXVERSE;
+ if (*vkey == tmp) {
+ tmp.Chapter(0);
+ tmp.Verse(0);
+// sprintf(ref, "\t</div>");
+// pushString(&to, ref);
+ }
+ }
+ }
+
+// else if (vkey->Chapter())
+// sprintf(ref, "\t<div type=\"chapter\" osisID=\"%s\">", vkey->getOSISRef());
+// else sprintf(ref, "\t<div type=\"book\" osisID=\"%s\">", vkey->getOSISRef());
+ }
+ }
+ *to++ = 0;
+ *to = 0;
+*/
+ return 0;
+}
+
+
+const char *ThMLOSIS::convertToOSIS(const char *inRef, const SWKey *key) {
+ static SWBuf outRef;
+
+ outRef = "";
+
+ VerseKey defLanguage;
+ ListKey verses = defLanguage.ParseVerseList(inRef, (*key), true);
+ const char *startFrag = inRef;
+ for (int i = 0; i < verses.Count(); i++) {
+ VerseKey *element = SWDYNAMIC_CAST(VerseKey, verses.GetElement(i));
+ char buf[5120];
+ char frag[5120];
+ if (element) {
+ memmove(frag, startFrag, ((const char *)element->userData - startFrag) + 1);
+ frag[((const char *)element->userData - startFrag) + 1] = 0;
+ startFrag = (const char *)element->userData + 1;
+ sprintf(buf, "<reference osisRef=\"%s-%s\">%s</reference>", element->LowerBound().getOSISRef(), element->UpperBound().getOSISRef(), frag);
+ }
+ else {
+ memmove(frag, startFrag, ((const char *)verses.GetElement(i)->userData - startFrag) + 1);
+ frag[((const char *)verses.GetElement(i)->userData - startFrag) + 1] = 0;
+ startFrag = (const char *)verses.GetElement(i)->userData + 1;
+ sprintf(buf, "<reference osisRef=\"%s\">%s</reference>", VerseKey(*verses.GetElement(i)).getOSISRef(), frag);
+ }
+ outRef+=buf;
+ }
+ return outRef.c_str();
+}
+
+SWORD_NAMESPACE_END
diff --git a/src/modules/filters/thmlplain.cpp b/src/modules/filters/thmlplain.cpp
index 5609f16..a04d6c3 100644
--- a/src/modules/filters/thmlplain.cpp
+++ b/src/modules/filters/thmlplain.cpp
@@ -1,35 +1,30 @@
/******************************************************************************
*
- * thmlplain - SWFilter decendant to strip out all ThML tags or convert to
+ * thmlplain - SWFilter descendant to strip out all ThML tags or convert to
* ASCII rendered symbols.
*/
#include <stdlib.h>
-#include <string.h>
#include <thmlplain.h>
+SWORD_NAMESPACE_START
ThMLPlain::ThMLPlain() {
}
-
-char ThMLPlain::ProcessText(char *text, int maxlen)
+char ThMLPlain::processText(SWBuf &text, const SWKey *key, const SWModule *module)
{
- char *to, *from, token[2048];
+ char token[2048];
int tokpos = 0;
bool intoken = false;
- int len;
bool ampersand = false;
- len = strlen(text) + 1; // shift string to right of buffer
- if (len < maxlen) {
- memmove(&text[maxlen - len], text, len);
- from = &text[maxlen - len];
- }
- else from = text; // -------------------------------
-
- for (to = text; *from; from++) {
+ const char *from;
+ SWBuf orig = text;
+ from = orig.c_str();
+ for (text = ""; *from; from++)
+ {
if (*from == 10 || *from == 13)
from++;
if (*from == '<') {
@@ -53,102 +48,102 @@ char ThMLPlain::ProcessText(char *text, int maxlen)
if (*from == ';' && ampersand) {
intoken = false;
- if (!strncmp("nbsp", token, 4)) *to++ = ' ';
- else if (!strncmp("quot", token, 4)) *to++ = '"';
- else if (!strncmp("amp", token, 3)) *to++ = '&';
- else if (!strncmp("lt", token, 2)) *to++ = '<';
- else if (!strncmp("gt", token, 2)) *to++ = '>';
- else if (!strncmp("brvbar", token, 6)) *to++ = '|';
- else if (!strncmp("sect", token, 4)) *to++ = '§';
- else if (!strncmp("copy", token, 4)) *to++ = '©';
- else if (!strncmp("laquo", token, 5)) *to++ = '«';
- else if (!strncmp("reg", token, 3)) *to++ = '®';
- else if (!strncmp("acute", token, 5)) *to++ = '´';
- else if (!strncmp("para", token, 4)) *to++ = '¶';
- else if (!strncmp("raquo", token, 5)) *to++ = '»';
+ if (!strncmp("nbsp", token, 4)) text += ' ';
+ else if (!strncmp("quot", token, 4)) text += '"';
+ else if (!strncmp("amp", token, 3)) text += '&';
+ else if (!strncmp("lt", token, 2)) text += '<';
+ else if (!strncmp("gt", token, 2)) text += '>';
+ else if (!strncmp("brvbar", token, 6)) text += '|';
+ else if (!strncmp("sect", token, 4)) text += '§';
+ else if (!strncmp("copy", token, 4)) text += '©';
+ else if (!strncmp("laquo", token, 5)) text += '«';
+ else if (!strncmp("reg", token, 3)) text += '®';
+ else if (!strncmp("acute", token, 5)) text += '´';
+ else if (!strncmp("para", token, 4)) text += '¶';
+ else if (!strncmp("raquo", token, 5)) text += '»';
- else if (!strncmp("Aacute", token, 6)) *to++ = 'Á';
- else if (!strncmp("Agrave", token, 6)) *to++ = 'À';
- else if (!strncmp("Acirc", token, 5)) *to++ = 'Â';
- else if (!strncmp("Auml", token, 4)) *to++ = 'Ä';
- else if (!strncmp("Atilde", token, 6)) *to++ = 'Ã';
- else if (!strncmp("Aring", token, 5)) *to++ = 'Å';
- else if (!strncmp("aacute", token, 6)) *to++ = 'á';
- else if (!strncmp("agrave", token, 6)) *to++ = 'à';
- else if (!strncmp("acirc", token, 5)) *to++ = 'â';
- else if (!strncmp("auml", token, 4)) *to++ = 'ä';
- else if (!strncmp("atilde", token, 6)) *to++ = 'ã';
- else if (!strncmp("aring", token, 5)) *to++ = 'å';
- else if (!strncmp("Eacute", token, 6)) *to++ = 'É';
- else if (!strncmp("Egrave", token, 6)) *to++ = 'È';
- else if (!strncmp("Ecirc", token, 5)) *to++ = 'Ê';
- else if (!strncmp("Euml", token, 4)) *to++ = 'Ë';
- else if (!strncmp("eacute", token, 6)) *to++ = 'é';
- else if (!strncmp("egrave", token, 6)) *to++ = 'è';
- else if (!strncmp("ecirc", token, 5)) *to++ = 'ê';
- else if (!strncmp("euml", token, 4)) *to++ = 'ë';
- else if (!strncmp("Iacute", token, 6)) *to++ = 'Í';
- else if (!strncmp("Igrave", token, 6)) *to++ = 'Ì';
- else if (!strncmp("Icirc", token, 5)) *to++ = 'Î';
- else if (!strncmp("Iuml", token, 4)) *to++ = 'Ï';
- else if (!strncmp("iacute", token, 6)) *to++ = 'í';
- else if (!strncmp("igrave", token, 6)) *to++ = 'ì';
- else if (!strncmp("icirc", token, 5)) *to++ = 'î';
- else if (!strncmp("iuml", token, 4)) *to++ = 'ï';
- else if (!strncmp("Oacute", token, 6)) *to++ = 'Ó';
- else if (!strncmp("Ograve", token, 6)) *to++ = 'Ò';
- else if (!strncmp("Ocirc", token, 5)) *to++ = 'Ô';
- else if (!strncmp("Ouml", token, 4)) *to++ = 'Ö';
- else if (!strncmp("Otilde", token, 6)) *to++ = 'Õ';
- else if (!strncmp("oacute", token, 6)) *to++ = 'ó';
- else if (!strncmp("ograve", token, 6)) *to++ = 'ò';
- else if (!strncmp("ocirc", token, 5)) *to++ = 'ô';
- else if (!strncmp("ouml", token, 4)) *to++ = 'ö';
- else if (!strncmp("otilde", token, 6)) *to++ = 'õ';
- else if (!strncmp("Uacute", token, 6)) *to++ = 'Ú';
- else if (!strncmp("Ugrave", token, 6)) *to++ = 'Ù';
- else if (!strncmp("Ucirc", token, 5)) *to++ = 'Û';
- else if (!strncmp("Uuml", token, 4)) *to++ = 'Ü';
- else if (!strncmp("uacute", token, 6)) *to++ = 'ú';
- else if (!strncmp("ugrave", token, 6)) *to++ = 'ù';
- else if (!strncmp("ucirc", token, 5)) *to++ = 'û';
- else if (!strncmp("uuml", token, 4)) *to++ = 'ü';
- else if (!strncmp("Yacute", token, 6)) *to++ = 'Ý';
- else if (!strncmp("yacute", token, 6)) *to++ = 'ý';
- else if (!strncmp("yuml", token, 4)) *to++ = 'ÿ';
+ else if (!strncmp("Aacute", token, 6)) text += 'Á';
+ else if (!strncmp("Agrave", token, 6)) text += 'À';
+ else if (!strncmp("Acirc", token, 5)) text += 'Â';
+ else if (!strncmp("Auml", token, 4)) text += 'Ä';
+ else if (!strncmp("Atilde", token, 6)) text += 'Ã';
+ else if (!strncmp("Aring", token, 5)) text += 'Å';
+ else if (!strncmp("aacute", token, 6)) text += 'á';
+ else if (!strncmp("agrave", token, 6)) text += 'à';
+ else if (!strncmp("acirc", token, 5)) text += 'â';
+ else if (!strncmp("auml", token, 4)) text += 'ä';
+ else if (!strncmp("atilde", token, 6)) text += 'ã';
+ else if (!strncmp("aring", token, 5)) text += 'å';
+ else if (!strncmp("Eacute", token, 6)) text += 'É';
+ else if (!strncmp("Egrave", token, 6)) text += 'È';
+ else if (!strncmp("Ecirc", token, 5)) text += 'Ê';
+ else if (!strncmp("Euml", token, 4)) text += 'Ë';
+ else if (!strncmp("eacute", token, 6)) text += 'é';
+ else if (!strncmp("egrave", token, 6)) text += 'è';
+ else if (!strncmp("ecirc", token, 5)) text += 'ê';
+ else if (!strncmp("euml", token, 4)) text += 'ë';
+ else if (!strncmp("Iacute", token, 6)) text += 'Í';
+ else if (!strncmp("Igrave", token, 6)) text += 'Ì';
+ else if (!strncmp("Icirc", token, 5)) text += 'Î';
+ else if (!strncmp("Iuml", token, 4)) text += 'Ï';
+ else if (!strncmp("iacute", token, 6)) text += 'í';
+ else if (!strncmp("igrave", token, 6)) text += 'ì';
+ else if (!strncmp("icirc", token, 5)) text += 'î';
+ else if (!strncmp("iuml", token, 4)) text += 'ï';
+ else if (!strncmp("Oacute", token, 6)) text += 'Ó';
+ else if (!strncmp("Ograve", token, 6)) text += 'Ò';
+ else if (!strncmp("Ocirc", token, 5)) text += 'Ô';
+ else if (!strncmp("Ouml", token, 4)) text += 'Ö';
+ else if (!strncmp("Otilde", token, 6)) text += 'Õ';
+ else if (!strncmp("oacute", token, 6)) text += 'ó';
+ else if (!strncmp("ograve", token, 6)) text += 'ò';
+ else if (!strncmp("ocirc", token, 5)) text += 'ô';
+ else if (!strncmp("ouml", token, 4)) text += 'ö';
+ else if (!strncmp("otilde", token, 6)) text += 'õ';
+ else if (!strncmp("Uacute", token, 6)) text += 'Ú';
+ else if (!strncmp("Ugrave", token, 6)) text += 'Ù';
+ else if (!strncmp("Ucirc", token, 5)) text += 'Û';
+ else if (!strncmp("Uuml", token, 4)) text += 'Ü';
+ else if (!strncmp("uacute", token, 6)) text += 'ú';
+ else if (!strncmp("ugrave", token, 6)) text += 'ù';
+ else if (!strncmp("ucirc", token, 5)) text += 'û';
+ else if (!strncmp("uuml", token, 4)) text += 'ü';
+ else if (!strncmp("Yacute", token, 6)) text += 'Ý';
+ else if (!strncmp("yacute", token, 6)) text += 'ý';
+ else if (!strncmp("yuml", token, 4)) text += 'ÿ';
- else if (!strncmp("deg", token, 3)) *to++ = '°';
- else if (!strncmp("plusmn", token, 6)) *to++ = '±';
- else if (!strncmp("sup2", token, 4)) *to++ = '²';
- else if (!strncmp("sup3", token, 4)) *to++ = '³';
- else if (!strncmp("sup1", token, 4)) *to++ = '¹';
- else if (!strncmp("nbsp", token, 4)) *to++ = 'º';
- else if (!strncmp("pound", token, 5)) *to++ = '£';
- else if (!strncmp("cent", token, 4)) *to++ = '¢';
- else if (!strncmp("frac14", token, 6)) *to++ = '¼';
- else if (!strncmp("frac12", token, 6)) *to++ = '½';
- else if (!strncmp("frac34", token, 6)) *to++ = '¾';
- else if (!strncmp("iquest", token, 6)) *to++ = '¿';
- else if (!strncmp("iexcl", token, 5)) *to++ = '¡';
- else if (!strncmp("ETH", token, 3)) *to++ = 'Ð';
- else if (!strncmp("eth", token, 3)) *to++ = 'ð';
- else if (!strncmp("THORN", token, 5)) *to++ = 'Þ';
- else if (!strncmp("thorn", token, 5)) *to++ = 'þ';
- else if (!strncmp("AElig", token, 5)) *to++ = 'Æ';
- else if (!strncmp("aelig", token, 5)) *to++ = 'æ';
- else if (!strncmp("Oslash", token, 6)) *to++ = 'Ø';
- else if (!strncmp("curren", token, 6)) *to++ = '¤';
- else if (!strncmp("Ccedil", token, 6)) *to++ = 'Ç';
- else if (!strncmp("ccedil", token, 6)) *to++ = 'ç';
- else if (!strncmp("szlig", token, 5)) *to++ = 'ß';
- else if (!strncmp("Ntilde", token, 6)) *to++ = 'Ñ';
- else if (!strncmp("ntilde", token, 6)) *to++ = 'ñ';
- else if (!strncmp("yen", token, 3)) *to++ = '¥';
- else if (!strncmp("not", token, 3)) *to++ = '¬';
- else if (!strncmp("ordf", token, 4)) *to++ = 'ª';
- else if (!strncmp("uml", token, 3)) *to++ = '¨';
- else if (!strncmp("shy", token, 3)) *to++ = '­';
- else if (!strncmp("macr", token, 4)) *to++ = '¯';
+ else if (!strncmp("deg", token, 3)) text += '°';
+ else if (!strncmp("plusmn", token, 6)) text += '±';
+ else if (!strncmp("sup2", token, 4)) text += '²';
+ else if (!strncmp("sup3", token, 4)) text += '³';
+ else if (!strncmp("sup1", token, 4)) text += '¹';
+ else if (!strncmp("nbsp", token, 4)) text += 'º';
+ else if (!strncmp("pound", token, 5)) text += '£';
+ else if (!strncmp("cent", token, 4)) text += '¢';
+ else if (!strncmp("frac14", token, 6)) text += '¼';
+ else if (!strncmp("frac12", token, 6)) text += '½';
+ else if (!strncmp("frac34", token, 6)) text += '¾';
+ else if (!strncmp("iquest", token, 6)) text += '¿';
+ else if (!strncmp("iexcl", token, 5)) text += '¡';
+ else if (!strncmp("ETH", token, 3)) text += 'Ð';
+ else if (!strncmp("eth", token, 3)) text += 'ð';
+ else if (!strncmp("THORN", token, 5)) text += 'Þ';
+ else if (!strncmp("thorn", token, 5)) text += 'þ';
+ else if (!strncmp("AElig", token, 5)) text += 'Æ';
+ else if (!strncmp("aelig", token, 5)) text += 'æ';
+ else if (!strncmp("Oslash", token, 6)) text += 'Ø';
+ else if (!strncmp("curren", token, 6)) text += '¤';
+ else if (!strncmp("Ccedil", token, 6)) text += 'Ç';
+ else if (!strncmp("ccedil", token, 6)) text += 'ç';
+ else if (!strncmp("szlig", token, 5)) text += 'ß';
+ else if (!strncmp("Ntilde", token, 6)) text += 'Ñ';
+ else if (!strncmp("ntilde", token, 6)) text += 'ñ';
+ else if (!strncmp("yen", token, 3)) text += '¥';
+ else if (!strncmp("not", token, 3)) text += '¬';
+ else if (!strncmp("ordf", token, 4)) text += 'ª';
+ else if (!strncmp("uml", token, 3)) text += '¨';
+ else if (!strncmp("shy", token, 3)) text += '­';
+ else if (!strncmp("macr", token, 4)) text += '¯';
continue;
}
@@ -156,32 +151,32 @@ char ThMLPlain::ProcessText(char *text, int maxlen)
intoken = false;
// process desired tokens
if (!strncmp(token, "sync type=\"Strongs\" value=\"", 27)) {
- *to++ = ' ';
- *to++ = '<';
+ text += ' ';
+ text += '<';
for (unsigned int i = 27; token[i] != '\"'; i++)
- *to++ = token[i];
- *to++ = '>';
+ text += token[i];
+ text += '>';
continue;
}
if (!strncmp(token, "sync type=\"morph\" value=\"", 25)) {
- *to++ = ' ';
- *to++ = '(';
+ text += ' ';
+ text += '(';
for (unsigned int i = 25; token[i] != '\"'; i++)
- *to++ = token[i];
- *to++ = ')';
+ text += token[i];
+ text += ')';
continue;
}
if (!strncmp("note", token, 4)) {
- *to++ = ' ';
- *to++ = '(';
+ text += ' ';
+ text += '(';
}
else if (!strncmp("br", token, 2))
- *to++ = '\n';
+ text += '\n';
else if (!strncmp("/p", token, 2))
- *to++ = '\n';
+ text += '\n';
else if (!strncmp("/note", token, 5)) {
- *to++ = ')';
- *to++ = ' ';
+ text += ')';
+ text += ' ';
}
continue;
}
@@ -190,12 +185,26 @@ char ThMLPlain::ProcessText(char *text, int maxlen)
token[tokpos++] = *from;
token[tokpos+2] = 0;
}
- else *to++ = *from;
+ else text += *from;
}
- *to++ = 0;
- *to = 0;
+
+ orig = text;
+ from = orig.c_str();
+ for (text = ""; *from; from++) { //loop to remove extra spaces
+ if ((strchr(" \t\n\r", *from))) {
+ while (*(from+1) && (strchr(" \t\n\r", *(from+1)))) {
+ from++;
+ }
+ text += " ";
+ }
+ else {
+ text += *from;
+ }
+ }
+ text += (char)0;
return 0;
}
+SWORD_NAMESPACE_END
diff --git a/src/modules/filters/thmlrtf.cpp b/src/modules/filters/thmlrtf.cpp
index 76289ec..4487921 100644
--- a/src/modules/filters/thmlrtf.cpp
+++ b/src/modules/filters/thmlrtf.cpp
@@ -15,12 +15,14 @@
***************************************************************************/
#include <stdlib.h>
-#include <string.h>
#include <thmlrtf.h>
+#include <swmodule.h>
+#include <utilxml.h>
+#include <versekey.h>
+SWORD_NAMESPACE_START
-ThMLRTF::ThMLRTF()
-{
+ThMLRTF::ThMLRTF() {
setTokenStart("<");
setTokenEnd(">");
@@ -30,6 +32,7 @@ ThMLRTF::ThMLRTF()
setEscapeStringCaseSensitive(true);
addEscapeStringSubstitute("nbsp", " ");
+ addEscapeStringSubstitute("apos", "'");
addEscapeStringSubstitute("quot", "\"");
addEscapeStringSubstitute("amp", "&");
addEscapeStringSubstitute("lt", "<");
@@ -128,87 +131,178 @@ ThMLRTF::ThMLRTF()
setTokenCaseSensitive(true);
- addTokenSubstitute("/scripRef", "|}");
- addTokenSubstitute("/note", ") }");
-
- addTokenSubstitute("br", "\\line ");
- addTokenSubstitute("br /", "\\line ");
- addTokenSubstitute("i", "{\\i1 ");
- addTokenSubstitute("/i", "}");
- addTokenSubstitute("b", "{\\b1 ");
- addTokenSubstitute("/b", "}");
- addTokenSubstitute("p", "\\par ");
-
- //we need uppercase forms for the moment to support a few early ThML modules that aren't XHTML compliant
- addTokenSubstitute("BR", "\\line ");
- addTokenSubstitute("I", "{\\i1 ");
- addTokenSubstitute("/I", "}");
- addTokenSubstitute("B", "{\\b1 ");
- addTokenSubstitute("/B", "}");
- addTokenSubstitute("P", "\\par ");
+
+ addTokenSubstitute("br", "\\line ");
+ addTokenSubstitute("br /", "\\line ");
+ addTokenSubstitute("i", "{\\i1 ");
+ addTokenSubstitute("/i", "}");
+ addTokenSubstitute("b", "{\\b1 ");
+ addTokenSubstitute("/b", "}");
+ addTokenSubstitute("p", "\\par ");
+
+ //we need uppercase forms for the moment to support a few early ThML modules that aren't XHTML compliant
+ addTokenSubstitute("BR", "\\line ");
+ addTokenSubstitute("I", "{\\i1 ");
+ addTokenSubstitute("/I", "}");
+ addTokenSubstitute("B", "{\\b1 ");
+ addTokenSubstitute("/B", "}");
+ addTokenSubstitute("P", "\\par ");
+ addTokenSubstitute("scripture", "{\\i1 ");
+ addTokenSubstitute("/scripture", "}");
}
-bool ThMLRTF::handleToken(char **buf, const char *token, DualStringMap &userData) {
- if (!substituteToken(buf, token)) {
- // manually process if it wasn't a simple substitution
- if (!strncmp(token, "sync type=\"Strongs\" value=\"", 27)) {
-/* if (token[27] == 'H' || token[27] == 'G' || token[27] == 'A') {
- pushString(buf, " {\\fs15 <");
- for (unsigned int i = 28; token[i] != '\"'; i++)
- *(*buf)++ = token[i];
- pushString(buf, ">}");
- }
- else if (token[27] == 'T') {
- pushString(buf, " {\\fs15 (");
- for (unsigned int i = 28; token[i] != '\"'; i++)
- *(*buf)++ = token[i];
- pushString(buf, ")}");
+
+char ThMLRTF::processText(SWBuf &text, const SWKey *key, const SWModule *module) {
+ SWBasicFilter::processText(text, key, module); //handle tokens as usual
+ const char *from;
+ SWBuf orig = text;
+ from = orig.c_str();
+ for (text = ""; *from; from++) { //loop to remove extra spaces
+ if ((strchr(" \t\n\r", *from))) {
+ while (*(from+1) && (strchr(" \t\n\r", *(from+1)))) {
+ from++;
}
+ text += " ";
+ }
+ else {
+ text += *from;
+ }
+ }
+ text += (char)0;
+ return 0;
+}
+
+
+ThMLRTF::MyUserData::MyUserData(const SWModule *module, const SWKey *key) : BasicFilterUserData(module, key) {
+ if (module) {
+ version = module->Name();
+ BiblicalText = (!strcmp(module->Type(), "Biblical Texts"));
+ }
+}
+
+
+bool ThMLRTF::handleToken(SWBuf &buf, const char *token, BasicFilterUserData *userData) {
+ const char *tok;
+ if (!substituteToken(buf, token)) { // manually process if it wasn't a simple substitution
+ MyUserData *u = (MyUserData *)userData;
+ XMLTag tag(token);
+ if ((!tag.isEndTag()) && (!tag.isEmpty()))
+ u->startTag = tag;
+ if (tag.getName() && !strcmp(tag.getName(), "sync")) {
+ SWBuf value = tag.getAttribute("value");
+ if (tag.getAttribute("type") && !strcmp(tag.getAttribute("type"), "morph")) { //&gt;
+ buf.appendFormatted(" {\\cf4 \\sub (%s)}", value.c_str());
+ }
+ else if( tag.getAttribute("type") && !strcmp(tag.getAttribute("type"), "Strongs")) {
+ if (value[0] == 'H' || value[0] == 'G' || value[0] == 'A') {
+ value<<1;
+ buf.appendFormatted(" {\\cf3 \\sub <%s>}", value.c_str());
+ }
+ else if (value[0] == 'T') {
+ value<<1;
+ buf.appendFormatted(" {\\cf4 \\sub (%s)}", value.c_str());
+ }
+ }
+ else if (tag.getAttribute("type") && !strcmp(tag.getAttribute("type"), "Dict")) {
+ if (!tag.isEndTag())
+ buf += "{\\b ";
+ else buf += "}";
+ }
}
- else if (!strncmp(token, "sync type=\"morph\" ", 18)) {
- pushString(buf, " {\\fs15 (");
- for (const char *tok = token + 5; *tok; tok++) {
- if (!strncmp(tok, "value=\"", 7)) {
- tok += 7;
- for (;*tok != '\"'; tok++)
- *(*buf)++ = *tok;
- break;
+ // <note> tag
+ else if (!strcmp(tag.getName(), "note")) {
+ if (!tag.isEndTag()) {
+ if (!tag.isEmpty()) {
+ SWBuf type = tag.getAttribute("type");
+ SWBuf footnoteNumber = tag.getAttribute("swordFootnote");
+ VerseKey *vkey;
+ // see if we have a VerseKey * or descendant
+ try {
+ vkey = SWDYNAMIC_CAST(VerseKey, u->key);
+ }
+ catch ( ... ) { }
+ if (vkey) {
+ // leave this special osis type in for crossReference notes types? Might thml use this some day? Doesn't hurt.
+ char ch = ((tag.getAttribute("type") && ((!strcmp(tag.getAttribute("type"), "crossReference")) || (!strcmp(tag.getAttribute("type"), "x-cross-ref")))) ? 'x':'n');
+ buf.appendFormatted("{\\super <a href=\"\">*%c%i.%s</a>} ", ch, vkey->Verse(), footnoteNumber.c_str());
+ }
+ u->suspendTextPassThru = true;
}
}
+ if (tag.isEndTag()) {
+ u->suspendTextPassThru = false;
+ }
+ }
+
- pushString(buf, ")}");
-*/ }
- else if (!strncmp(token, "sync type=\"lemma\" value=\"", 25)) {
- pushString(buf, "{\\fs15 (");
- for (unsigned int i = 25; token[i] != '\"'; i++)
- *(*buf)++ = token[i];
- pushString(buf, ")}");
+ else if (!strcmp(tag.getName(), "scripRef")) {
+ if (!tag.isEndTag()) {
+ if (!tag.isEmpty()) {
+ u->suspendTextPassThru = true;
+ }
+ }
+ if (tag.isEndTag()) { // </scripRef>
+ if (!u->BiblicalText) {
+ SWBuf refList = u->startTag.getAttribute("passage");
+ if (!refList.length())
+ refList = u->lastTextNode;
+ SWBuf version = tag.getAttribute("version");
+ buf += "<a href=\"\">";
+ buf += refList.c_str();
+// buf += u->lastTextNode.c_str();
+ buf += "</a>";
+ }
+ else {
+ SWBuf footnoteNumber = u->startTag.getAttribute("swordFootnote");
+ VerseKey *vkey;
+ // see if we have a VerseKey * or descendant
+ try {
+ vkey = SWDYNAMIC_CAST(VerseKey, u->key);
+ }
+ catch ( ... ) {}
+ if (vkey) {
+ // leave this special osis type in for crossReference notes types? Might thml use this some day? Doesn't hurt.
+ buf.appendFormatted("{\\super <a href=\"\">*x%i.%s</a>} ", vkey->Verse(), footnoteNumber.c_str());
+ }
+ }
+
+ // let's let text resume to output again
+ u->suspendTextPassThru = false;
+ }
}
- else if (!strncmp(token, "scripRef", 8)) {
- pushString(buf, "{\\cf2 #");
+
+ else if (tag.getName() && !strcmp(tag.getName(), "div")) {
+ if (tag.isEndTag() && u->SecHead) {
+ buf += "\\par}";
+ u->SecHead = false;
+ }
+ else if (tag.getAttribute("class")) {
+ if (!stricmp(tag.getAttribute("class"), "sechead")) {
+ u->SecHead = true;
+ buf += "{\\par\\i1\\b1 ";
+ }
+ else if (!stricmp(tag.getAttribute("class"), "title")) {
+ u->SecHead = true;
+ buf += "{\\par\\i1\\b1 ";
+ }
+ }
}
- else if (!strncmp(token, "div", 3)) {
- *(*buf)++ = '{';
- if (!strncmp(token, "div class=\"title\"", 17)) {
- pushString(buf, "\\par\\i1\\b1 ");
- userData["sechead"] = "true";
- }
- else if (!strncmp(token, "div class=\"sechead\"", 19)) {
- pushString(buf, "\\par\\i1\\b1 ");
- userData["sechead"] = "true";
- }
- }
- else if (!strncmp(token, "/div", 4)) {
- *(*buf)++ = '}';
- if (userData["sechead"] == "true") {
- pushString(buf, "\\par ");
- userData["sechead"] == "false";
- }
- }
- else if (!strncmp(token, "note", 4)) {
- pushString(buf, " {\\i1\\fs15 (");
- }
+ else if (tag.getName() && (!strcmp(tag.getName(), "img") || !strcmp(tag.getName(), "image"))) {
+ const char *src = tag.getAttribute("src");
+ if (!src) // assert we have a src attribute
+ return false;
+ char* filepath = new char[strlen(u->module->getConfigEntry("AbsoluteDataPath")) + strlen(token)];
+ *filepath = 0;
+ strcpy(filepath, userData->module->getConfigEntry("AbsoluteDataPath"));
+ strcat(filepath, src);
+
+// we do this because BibleCS looks for this EXACT format for an image tag
+ buf+="<img src=\"";
+ buf+=filepath;
+ buf+="\" />";
+ delete [] filepath;
+ }
else {
return false; // we still didn't handle token
}
@@ -217,3 +311,4 @@ bool ThMLRTF::handleToken(char **buf, const char *token, DualStringMap &userData
}
+SWORD_NAMESPACE_END
diff --git a/src/modules/filters/thmlscripref.cpp b/src/modules/filters/thmlscripref.cpp
index 23edd6d..7b93f90 100644
--- a/src/modules/filters/thmlscripref.cpp
+++ b/src/modules/filters/thmlscripref.cpp
@@ -1,103 +1,126 @@
/******************************************************************************
*
- * thmlscripref - SWFilter decendant to hide or show scripture references
- * in a ThML module.
+ * thmlscripref - SWFilter descendant to hide or show scripture
+ * referebces in a ThML module.
*/
#include <stdlib.h>
-#include <string.h>
#include <thmlscripref.h>
+#include <utilxml.h>
+#include <versekey.h>
+#include <swmodule.h>
#ifndef __GNUC__
#else
#include <unixstr.h>
#endif
+SWORD_NAMESPACE_START
-const char ThMLScripref::on[] = "On";
-const char ThMLScripref::off[] = "Off";
-const char ThMLScripref::optName[] = "Scripture Cross-references";
-const char ThMLScripref::optTip[] = "Toggles Scripture Cross-references On and Off if they exist";
+const char oName[] = "Cross-references";
+const char oTip[] = "Toggles Scripture Cross-references On and Off if they exist";
+const SWBuf choices[3] = {"On", "Off", ""};
+const StringList oValues(&choices[0], &choices[2]);
-ThMLScripref::ThMLScripref() {
- option = false;
- options.push_back(on);
- options.push_back(off);
+ThMLScripref::ThMLScripref() : SWOptionFilter(oName, oTip, &oValues) {
+ setOptionValue("On");
}
ThMLScripref::~ThMLScripref() {
}
-void ThMLScripref::setOptionValue(const char *ival)
-{
- option = (!stricmp(ival, on));
-}
-const char *ThMLScripref::getOptionValue()
-{
- return (option) ? on:off;
-}
+char ThMLScripref::processText(SWBuf &text, const SWKey *key, const SWModule *module) {
+ SWBuf token;
+ bool intoken = false;
+ bool hide = false;
+ SWBuf tagText;
+ XMLTag startTag;
+ SWBuf refs = "";
+ int footnoteNum = 1;
+ char buf[254];
+ VerseKey parser = key->getText();
-char ThMLScripref::ProcessText(char *text, int maxlen, const SWKey *key, const SWModule *module)
-{
- if (!option) { // if we don't want scriprefs
- char *to, *from, token[2048]; // cheese. Fix.
- int tokpos = 0;
- bool intoken = false;
- int len;
- bool hide = false;
+ SWBuf orig = text;
+ const char *from = orig.c_str();
- len = strlen(text) + 1; // shift string to right of buffer
- if (len < maxlen) {
- memmove(&text[maxlen - len], text, len);
- from = &text[maxlen - len];
+ for (text = ""; *from; from++) {
+ if (*from == '<') {
+ intoken = true;
+ token = "";
+ continue;
}
- else from = text; // -------------------------------
+ if (*from == '>') { // process tokens
+ intoken = false;
- for (to = text; *from; from++) {
- if (*from == '<') {
- intoken = true;
- tokpos = 0;
- token[0] = 0;
- token[1] = 0;
- token[2] = 0;
- continue;
- }
- if (*from == '>') { // process tokens
- intoken = false;
- if (!strnicmp(token, "scripRef", 8)) {
- hide = true;
- continue;
+ XMLTag tag(token);
+ if (!strcmp(tag.getName(), "scripRef")) {
+ if (!tag.isEndTag()) {
+ if (!tag.isEmpty()) {
+ refs = "";
+ startTag = tag;
+ hide = true;
+ tagText = "";
+ continue;
+ }
}
- else if (!strnicmp(token, "/scripRef", 9)) {
- hide = false;
- continue;
+ if (hide && tag.isEndTag()) {
+ if (module->isProcessEntryAttributes()) {
+ SWBuf fc = module->getEntryAttributes()["Footnote"]["count"]["value"];
+ footnoteNum = (fc.length()) ? atoi(fc.c_str()) : 0;
+ sprintf(buf, "%i", ++footnoteNum);
+ module->getEntryAttributes()["Footnote"]["count"]["value"] = buf;
+ StringList attributes = startTag.getAttributeNames();
+ for (StringList::iterator it = attributes.begin(); it != attributes.end(); it++) {
+ module->getEntryAttributes()["Footnote"][buf][it->c_str()] = startTag.getAttribute(it->c_str());
+ }
+ module->getEntryAttributes()["Footnote"][buf]["body"] = tagText;
+ startTag.setAttribute("swordFootnote", buf);
+ SWBuf passage = startTag.getAttribute("passage");
+ if (passage.length())
+ refs = parser.ParseVerseList(passage.c_str(), parser, true).getRangeText();
+ else refs = parser.ParseVerseList(tagText.c_str(), parser, true).getRangeText();
+ module->getEntryAttributes()["Footnote"][buf]["refList"] = refs.c_str();
+ }
+ hide = false;
+ if (option) { // we want the tag in the text
+ text += startTag;
+ text.append(tagText);
+ }
+ else continue;
}
+ }
- // if not a scripref token, keep token in text
- if (!hide) {
- *to++ = '<';
- for (char *tok = token; *tok; tok++)
- *to++ = *tok;
- *to++ = '>';
- }
- continue;
+ // if not a scripRef token, keep token in text
+ if ((!strcmp(tag.getName(), "scripRef")) && (!tag.isEndTag())) {
+ SWBuf osisRef = tag.getAttribute("passage");
+ if (refs.length())
+ refs += "; ";
+ refs += osisRef;
}
- if (intoken) {
- if (tokpos < 2045)
- token[tokpos++] = *from;
- token[tokpos+2] = 0;
+ if (!hide) {
+ text += '<';
+ text.append(token);
+ text += '>';
}
- else {
- if (!hide) {
- *to++ = *from;
- }
+ else {
+ tagText += '<';
+ tagText.append(token);
+ tagText += '>';
}
+ continue;
+ }
+ if (intoken) { //copy token
+ token += *from;
}
- *to++ = 0;
- *to = 0;
+ else if (!hide) { //copy text which is not inside a token
+ text += *from;
+ }
+ else tagText += *from;
}
return 0;
}
+
+SWORD_NAMESPACE_END
diff --git a/src/modules/filters/thmlstrongs.cpp b/src/modules/filters/thmlstrongs.cpp
index 8d0466c..4a53e25 100644
--- a/src/modules/filters/thmlstrongs.cpp
+++ b/src/modules/filters/thmlstrongs.cpp
@@ -1,50 +1,40 @@
/******************************************************************************
*
- * thmlstrongs - SWFilter decendant to hide or show strongs number
+ * thmlstrongs - SWFilter descendant to hide or show strongs number
* in a ThML module.
*/
#include <stdlib.h>
#include <stdio.h>
-#include <string.h>
#include <thmlstrongs.h>
#include <swmodule.h>
#ifndef __GNUC__
#else
#include <unixstr.h>
#endif
+#include <ctype.h>
+SWORD_NAMESPACE_START
-const char ThMLStrongs::on[] = "On";
-const char ThMLStrongs::off[] = "Off";
-const char ThMLStrongs::optName[] = "Strong's Numbers";
-const char ThMLStrongs::optTip[] = "Toggles Strong's Numbers On and Off if they exist";
+const char oName[] = "Strong's Numbers";
+const char oTip[] = "Toggles Strong's Numbers On and Off if they exist";
+const SWBuf choices[3] = {"On", "Off", ""};
+const StringList oValues(&choices[0], &choices[2]);
-ThMLStrongs::ThMLStrongs() {
- option = false;
- options.push_back(on);
- options.push_back(off);
+ThMLStrongs::ThMLStrongs() : SWOptionFilter(oName, oTip, &oValues) {
+ setOptionValue("Off");
}
ThMLStrongs::~ThMLStrongs() {
}
-void ThMLStrongs::setOptionValue(const char *ival)
-{
- option = (!stricmp(ival, on));
-}
-
-const char *ThMLStrongs::getOptionValue()
-{
- return (option) ? on:off;
-}
-char ThMLStrongs::ProcessText(char *text, int maxlen, const SWKey *key, const SWModule *module)
-{
- char *to, *from, token[2048]; // cheese. Fix.
+char ThMLStrongs::processText(SWBuf &text, const SWKey *key, const SWModule *module) {
+ char token[2048]; // cheese. Fix.
+ const char *from;
int tokpos = 0;
bool intoken = false;
int len;
@@ -54,23 +44,21 @@ char ThMLStrongs::ProcessText(char *text, int maxlen, const SWKey *key, const SW
char wordstr[5];
char *valto;
char *ch;
+ unsigned int textStart = 0, textEnd = 0;
+ SWBuf tmp;
+ bool newText = false;
- len = strlen(text) + 1; // shift string to right of buffer
- if (len < maxlen) {
- memmove(&text[maxlen - len], text, len);
- from = &text[maxlen - len];
- }
- else from = text;
-
- // -------------------------------
+ SWBuf orig = text;
+ from = orig.c_str();
- for (to = text; *from; from++) {
+ for (text = ""; *from; from++) {
if (*from == '<') {
intoken = true;
tokpos = 0;
token[0] = 0;
token[1] = 0;
token[2] = 0;
+ textEnd = text.length();
continue;
}
if (*from == '>') { // process tokens
@@ -81,15 +69,28 @@ char ThMLStrongs::ProcessText(char *text, int maxlen, const SWKey *key, const SW
for (unsigned int i = 27; token[i] != '\"' && i < 150; i++)
*valto++ = token[i];
*valto = 0;
- sprintf(wordstr, "%03d", word++);
- module->getEntryAttributes()["Word"][wordstr]["Strongs"] = val;
+ if (atoi((!isdigit(*val))?val+1:val) < 5627) {
+ // normal strongs number
+ sprintf(wordstr, "%03d", word++);
+ module->getEntryAttributes()["Word"][wordstr]["Strongs"] = val;
+ tmp = "";
+ tmp.append(text.c_str()+textStart, (int)(textEnd - textStart));
+ module->getEntryAttributes()["Word"][wordstr]["Text"] = tmp;
+ newText = true;
+ }
+ else {
+ // verb morph
+ sprintf(wordstr, "%03d", word-1);
+ module->getEntryAttributes()["Word"][wordstr]["Morph"] = val;
+ }
}
if (!option) { // if we don't want strongs
if ((from[1] == ' ') || (from[1] == ',') || (from[1] == ';') || (from[1] == '.') || (from[1] == '?') || (from[1] == '!') || (from[1] == ')') || (from[1] == '\'') || (from[1] == '\"')) {
if (lastspace)
- to--;
+ text--;
}
+ if (newText) {textStart = text.length(); newText = false; }
continue;
}
}
@@ -116,10 +117,10 @@ char ThMLStrongs::ProcessText(char *text, int maxlen, const SWKey *key, const SW
}
}
// if not a strongs token, keep token in text
- *to++ = '<';
- for (char *tok = token; *tok; tok++)
- *to++ = *tok;
- *to++ = '>';
+ text += '<';
+ text += token;
+ text += '>';
+ if (newText) {textStart = text.length(); newText = false; }
continue;
}
if (intoken) {
@@ -128,11 +129,11 @@ char ThMLStrongs::ProcessText(char *text, int maxlen, const SWKey *key, const SW
token[tokpos+2] = 0;
}
else {
- *to++ = *from;
+ text += *from;
lastspace = (*from == ' ');
}
}
- *to++ = 0;
- *to = 0;
return 0;
}
+
+SWORD_NAMESPACE_END
diff --git a/src/modules/filters/thmlvariants.cpp b/src/modules/filters/thmlvariants.cpp
index fda0950..b8ab653 100644
--- a/src/modules/filters/thmlvariants.cpp
+++ b/src/modules/filters/thmlvariants.cpp
@@ -1,18 +1,18 @@
/******************************************************************************
*
- * thmlvariants - SWFilter decendant to hide or show textual variants
+ * thmlvariants - SWFilter descendant to hide or show textual variants
* in a ThML module.
*/
#include <stdlib.h>
-#include <string.h>
#include <thmlvariants.h>
#ifndef __GNUC__
#else
#include <unixstr.h>
#endif
+SWORD_NAMESPACE_START
const char ThMLVariants::primary[] = "Primary Reading";
const char ThMLVariants::secondary[] = "Secondary Reading";
@@ -35,7 +35,9 @@ ThMLVariants::~ThMLVariants() {
void ThMLVariants::setOptionValue(const char *ival)
{
- option = (!stricmp(ival, primary));
+ if (!stricmp(ival, primary)) option = 0;
+ else if (!stricmp(ival, secondary)) option = 1;
+ else option = 2;
}
const char *ThMLVariants::getOptionValue()
@@ -51,128 +53,54 @@ const char *ThMLVariants::getOptionValue()
}
}
-char ThMLVariants::ProcessText(char *text, int maxlen, const SWKey *key, const SWModule *module)
+char ThMLVariants::processText(SWBuf &text, const SWKey *key, const SWModule *module)
{
- if (option == 0) { //we want primary only
- char *to, *from, token[2048]; // cheese. Fix.
- int tokpos = 0;
+ if ( option == 0 || option == 1) { //we want primary or variant only
bool intoken = false;
- int len;
bool hide = false;
-
- len = strlen(text) + 1; // shift string to right of buffer
- if (len < maxlen) {
- memmove(&text[maxlen - len], text, len);
- from = &text[maxlen - len];
- }
- else from = text;
- // -------------------------------
+ SWBuf token;
+ SWBuf orig = text;
+ const char *from = orig.c_str();
- for (to = text; *from; from++) {
+ //we use a fixed comparision string to make sure the loop is as fast as the original two blocks with almost the same code
+ const char* variantCompareString = (option == 0) ? "div type=\"variant\" class=\"1\"" : "div type=\"variant\" class=\"2\"";
+
+ for (text = ""; *from; from++) {
if (*from == '<') {
intoken = true;
- tokpos = 0;
- token[0] = 0;
- token[1] = 0;
- token[2] = 0;
+ token = "";
continue;
}
- if (*from == '>') { // process tokens
+ else if (*from == '>') { // process tokens
intoken = false;
- if (!strncmp(token, "div type=\"variant\"", 19)) {
- hide = true;
- continue;
- }
- else if (!strncmp(token, "/div", 4)) {
- hide = false;
- continue;
+
+ if ( !strncmp(token.c_str(), variantCompareString, 28)) { //only one of the variants, length of the two strings is 28 in both cases
+ hide = true;
+ continue;
}
-
- // if not a footnote token, keep token in text
- if (!hide) {
- *to++ = '<';
- for (char *tok = token; *tok; tok++)
- *to++ = *tok;
- *to++ = '>';
- }
- continue;
- }
- if (intoken) {
- if (tokpos < 2045)
- token[tokpos++] = *from;
- token[tokpos+2] = 0;
- }
- else {
if (!hide) {
- *to++ = *from;
- }
- }
- }
- *to++ = 0;
- *to = 0;
-
- }
- else if (option == 1) { //we want variant only
- char *to, *from, token[2048]; // cheese. Fix.
- int tokpos = 0;
- bool intoken = false;
- int len;
- bool hide = false;
-
- len = strlen(text) + 1; // shift string to right of buffer
- if (len < maxlen) {
- memmove(&text[maxlen - len], text, len);
- from = &text[maxlen - len];
- }
- else from = text;
-
- // -------------------------------
-
- for (to = text; *from; from++) {
- if (*from == '<') {
- intoken = true;
- tokpos = 0;
- token[0] = 0;
- token[1] = 0;
- token[2] = 0;
- continue;
- }
- if (*from == '>') { // process tokens
- intoken = false;
- if (!strncmp(token, "div type=\"primary\"", 19)) {
- hide = true;
- continue;
+ text += '<';
+ text.append(token);
+ text += '>';
}
- else if (!strncmp(token, "/div", 4)) {
- hide = false;
- continue;
+ if (!strncmp(token.c_str(), "/div", 4)) {
+ hide = false;
+ continue;
}
- // if not a footnote token, keep token in text
- if (!hide) {
- *to++ = '<';
- for (char *tok = token; *tok; tok++)
- *to++ = *tok;
- *to++ = '>';
- }
continue;
}
if (intoken) {
- if (tokpos < 2045)
- token[tokpos++] = *from;
- token[tokpos+2] = 0;
+ token += *from;
}
- else {
- if (!hide) {
- *to++ = *from;
- }
+ else if (!hide) {
+ text += *from;
}
}
- *to++ = 0;
- *to = 0;
}
+
return 0;
}
@@ -181,3 +109,4 @@ char ThMLVariants::ProcessText(char *text, int maxlen, const SWKey *key, const S
+SWORD_NAMESPACE_END
diff --git a/src/modules/filters/thmlwebif.cpp b/src/modules/filters/thmlwebif.cpp
new file mode 100644
index 0000000..f082e1e
--- /dev/null
+++ b/src/modules/filters/thmlwebif.cpp
@@ -0,0 +1,104 @@
+/***************************************************************************
+ ThMLWEBIF.cpp - ThML to HTML filter with hrefs
+ -------------------
+ begin : 2001-09-03
+ copyright : 2001 by CrossWire Bible Society
+ ***************************************************************************/
+
+/***************************************************************************
+ * *
+ * This program is free software; you can redistribute it and/or modify *
+ * it under the terms of the GNU General Public License as published by *
+ * the Free Software Foundation; either version 2 of the License, or *
+ * (at your option) any later version. *
+ * *
+ ***************************************************************************/
+
+#include <stdlib.h>
+#include <thmlwebif.h>
+#include <swmodule.h>
+#include <utilweb.h>
+#include <utilxml.h>
+#include <ctype.h>
+
+SWORD_NAMESPACE_START
+
+ThMLWEBIF::ThMLWEBIF() : baseURL(""), passageStudyURL(baseURL + "passagestudy.jsp") {
+ //all's done in ThMLHTMLHREF
+}
+
+bool ThMLWEBIF::handleToken(SWBuf &buf, const char *token, BasicFilterUserData *userData) {
+ const char *tok;
+
+ if (!substituteToken(buf, token)) { // manually process if it wasn't a simple substitution
+ MyUserData *u = (MyUserData *)userData;
+ XMLTag tag(token);
+ SWBuf url;
+ if (!strcmp(tag.getName(), "sync")) {
+ const char* value = tag.getAttribute("value");
+ url = value;
+ if ((url.length() > 1) && strchr("GH", url[0])) {
+ if (isdigit(url[1]))
+ url = url.c_str()+1;
+ }
+
+ if(tag.getAttribute("type") && !strcmp(tag.getAttribute("type"), "morph")){
+ buf += "<small><em> (";
+ buf.appendFormatted("<a href=\"%s?showMorph=%s#cv\">", passageStudyURL.c_str(), encodeURL(url).c_str() );
+ }
+ else {
+ if (value) {
+ value++; //skip leading G, H or T
+ //url = value;
+ }
+
+ buf += "<small><em> &lt;";
+ buf.appendFormatted("<a href=\"%s?showStrong=%s#cv\">", passageStudyURL.c_str(), encodeURL(url).c_str() );
+ }
+
+ buf += value;
+ buf += "</a>";
+
+ if (tag.getAttribute("type") && !strcmp(tag.getAttribute("type"), "morph")) {
+ buf += ") </em></small>";
+ }
+ else {
+ buf += "&gt; </em></small>";
+ }
+ }
+ else if (!strcmp(tag.getName(), "scripRef")) {
+ if (tag.isEndTag()) {
+ if (u->inscriptRef) { // like "<scripRef passage="John 3:16">John 3:16</scripRef>"
+ u->inscriptRef = false;
+ buf += "</a>";
+ }
+ else { // end of scripRef like "<scripRef>John 3:16</scripRef>"
+ url = u->lastTextNode;
+ buf.appendFormatted("<a href=\"%s?key=%s#cv\">", passageStudyURL.c_str(), encodeURL(url).c_str());
+ buf += u->lastTextNode.c_str();
+ buf += "</a>";
+
+ // let's let text resume to output again
+ u->suspendTextPassThru = false;
+ }
+ }
+ else if (tag.getAttribute("passage")) { //passage given
+ u->inscriptRef = true;
+
+ buf.appendFormatted("<a href=\"%s?key=%s#cv\">", passageStudyURL.c_str(), encodeURL(tag.getAttribute("passage")).c_str());
+ }
+ else { //no passage given
+ u->inscriptRef = false;
+ // let's stop text from going to output
+ u->suspendTextPassThru = true;
+ }
+ }
+ else {
+ return ThMLHTMLHREF::handleToken(buf,token,userData);
+ }
+ }
+ return true;
+}
+
+
+SWORD_NAMESPACE_END
diff --git a/src/modules/filters/unicodertf.cpp b/src/modules/filters/unicodertf.cpp
index b53a2d7..0a2bca8 100644
--- a/src/modules/filters/unicodertf.cpp
+++ b/src/modules/filters/unicodertf.cpp
@@ -1,6 +1,6 @@
/******************************************************************************
*
- * unicodertf - SWFilter decendant to convert a double byte unicode file
+ * unicodertf - SWFilter descendant to convert a double byte unicode file
* to RTF tags
*/
@@ -9,62 +9,78 @@
#include <stdio.h>
#include <unicodertf.h>
+SWORD_NAMESPACE_START
+
UnicodeRTF::UnicodeRTF() {
}
-char UnicodeRTF::ProcessText(char *text, int maxlen, const SWKey *key, const SWModule *module)
+char UnicodeRTF::processText(SWBuf &text, const SWKey *key, const SWModule *module)
{
- unsigned char *to, *from, *maxto;
- int len;
- char digit[10];
- short ch; // must be signed per unicode spec (negative is ok for big numbers > 32768)
+ const unsigned char *from;
+ char digit[10];
+ unsigned long ch;
+ signed short utf16;
+ unsigned char from2[7];
- len = strlenw(text) + 2; // shift string to right of buffer
- if (len < maxlen) {
- memmove(&text[maxlen - len], text, len);
- from = (unsigned char*)&text[maxlen - len];
- }
- else from = (unsigned char*)text;
- maxto =(unsigned char*)text + maxlen;
+ SWBuf orig = text;
+
+ from = (const unsigned char *)orig.c_str();
// -------------------------------
- for (to = (unsigned char*)text; *from && (to <= maxto); from++) {
- ch = 0;
- if ((*from & 128) != 128) {
- *to++ = *from;
- continue;
- }
- if ((*from & 128) && ((*from & 64) != 64)) {
- // error
- *from = 'x';
- continue;
- }
- *from <<= 1;
- int subsequent;
- for (subsequent = 1; (*from & 128); subsequent++) {
- *from <<= 1;
- from[subsequent] &= 63;
- ch <<= 6;
- ch |= from[subsequent];
- }
- subsequent--;
- *from <<=1;
- char significantFirstBits = 8 - (2+subsequent);
-
- ch |= (((short)*from) << (((6*subsequent)+significantFirstBits)-8));
- from += subsequent;
- *to++ = '\\';
- *to++ = 'u';
- sprintf(digit, "%d", ch);
- for (char *dig = digit; *dig; dig++)
- *to++ = *dig;
- *to++ = '?';
+ for (text = ""; *from; from++) {
+ ch = 0;
+ //case: ANSI
+ if ((*from & 128) != 128) {
+ text += *from;
+ continue;
+ }
+ //case: Invalid UTF-8 (illegal continuing byte in initial position)
+ if ((*from & 128) && ((*from & 64) != 64)) {
+ continue;
+ }
+ //case: 2+ byte codepoint
+ from2[0] = *from;
+ from2[0] <<= 1;
+ int subsequent;
+ for (subsequent = 1; (from2[0] & 128) && (subsequent < 7); subsequent++) {
+ from2[0] <<= 1;
+ from2[subsequent] = from[subsequent];
+ from2[subsequent] &= 63;
+ ch <<= 6;
+ ch |= from2[subsequent];
+ }
+ subsequent--;
+ from2[0] <<= 1;
+ char significantFirstBits = 8 - (2+subsequent);
+
+ ch |= (((short)from2[0]) << (((6*subsequent)+significantFirstBits)-8));
+ from += subsequent;
+ if (ch < 0x10000) {
+ utf16 = (signed short)ch;
+ text += '\\';
+ text += 'u';
+ sprintf(digit, "%d", utf16);
+ text += digit;
+ text += '?';
+ }
+ else {
+ utf16 = (signed short)((ch - 0x10000) / 0x400 + 0xD800);
+ text += '\\';
+ text += 'u';
+ sprintf(digit, "%d", utf16);
+ text += digit;
+ text += '?';
+ utf16 = (signed short)((ch - 0x10000) % 0x400 + 0xDC00);
+ text += '\\';
+ text += 'u';
+ sprintf(digit, "%d", utf16);
+ text += digit;
+ text += '?';
+ }
}
-
- if (to != maxto) {
- *to++ = 0;
- }
- *to = 0;
+
return 0;
}
+
+SWORD_NAMESPACE_END
diff --git a/src/modules/filters/utf16utf8.cpp b/src/modules/filters/utf16utf8.cpp
index 5a7719f..ef1593b 100644
--- a/src/modules/filters/utf16utf8.cpp
+++ b/src/modules/filters/utf16utf8.cpp
@@ -1,6 +1,6 @@
/******************************************************************************
*
- * UTF16UTF8 - SWFilter decendant to convert UTF-16 to UTF-8
+ * UTF16UTF8 - SWFilter descendant to convert UTF-16 to UTF-8
*
*/
@@ -9,38 +9,36 @@
#include <utf16utf8.h>
+SWORD_NAMESPACE_START
+
UTF16UTF8::UTF16UTF8() {
}
-char UTF16UTF8::ProcessText(char *text, int maxlen, const SWKey *key, const SWModule *module)
+char UTF16UTF8::processText(SWBuf &text, const SWKey *key, const SWModule *module)
{
unsigned short *from;
- unsigned char *to;
int len;
unsigned long uchar;
unsigned short schar;
+ if ((unsigned long)key < 2) // hack, we're en(1)/de(0)ciphering
+ return -1;
len = 0;
- from = (unsigned short*) text;
+ from = (unsigned short*) text.c_str();
while (*from) {
len += 2;
from++;
}
- // shift string to right of buffer
- if (len < maxlen) {
- memmove(&text[maxlen - len], text, len);
- from = (unsigned short*)&text[maxlen - len];
- }
- else
- from = (unsigned short*)text;
-
+ SWBuf orig = text;
+ from = (unsigned short*)orig.c_str();
+
// -------------------------------
- for (to = (unsigned char*)text; *from; from++) {
+ for (text = ""; *from; from++) {
uchar = 0;
if (*from < 0xD800 || *from > 0xDFFF) {
@@ -66,26 +64,24 @@ char UTF16UTF8::ProcessText(char *text, int maxlen, const SWKey *key, const SWMo
}
if (uchar < 0x80) {
- *to++ = uchar;
+ text += uchar;
}
else if (uchar < 0x800) {
- *to++ = 0xc0 | (uchar >> 6);
- *to++ = 0x80 | (uchar & 0x3f);
+ text += 0xc0 | (uchar >> 6);
+ text += 0x80 | (uchar & 0x3f);
}
else if (uchar < 0x10000) {
- *to++ = 0xe0 | (uchar >> 12);
- *to++ = 0x80 | (uchar >> 6) & 0x3f;
- *to++ = 0x80 | uchar & 0x3f;
+ text += 0xe0 | (uchar >> 12);
+ text += 0x80 | (uchar >> 6) & 0x3f;
+ text += 0x80 | uchar & 0x3f;
}
else if (uchar < 0x200000) {
- *to++ = 0xF0 | (uchar >> 18);
- *to++ = 0x80 | (uchar >> 12) & 0x3F;
- *to++ = 0x80 | (uchar >> 6) & 0x3F;
- *to++ = 0x80 | uchar & 0x3F;
+ text += 0xF0 | (uchar >> 18);
+ text += 0x80 | (uchar >> 12) & 0x3F;
+ text += 0x80 | (uchar >> 6) & 0x3F;
+ text += 0x80 | uchar & 0x3F;
}
}
- *to++ = 0;
- *to = 0;
return 0;
}
@@ -93,3 +89,4 @@ char UTF16UTF8::ProcessText(char *text, int maxlen, const SWKey *key, const SWMo
+SWORD_NAMESPACE_END
diff --git a/src/modules/filters/utf8arshaping.cpp b/src/modules/filters/utf8arshaping.cpp
index 5121f48..3246adc 100644
--- a/src/modules/filters/utf8arshaping.cpp
+++ b/src/modules/filters/utf8arshaping.cpp
@@ -1,13 +1,12 @@
/******************************************************************************
*
-* utf8arshaping - SWFilter decendant to perform Arabic shaping on
+* utf8arshaping - SWFilter descendant to perform Arabic shaping on
* UTF-8 text
*/
#ifdef _ICU_
#include <stdlib.h>
-#include <string.h>
#ifdef __GNUC__
#include <unixstr.h>
@@ -15,34 +14,39 @@
#include <utf8arshaping.h>
-UTF8arShaping::UTF8arShaping() {
-
- conv = ucnv_open("UTF-8", &err);
+SWORD_NAMESPACE_START
+UTF8arShaping::UTF8arShaping() {
+ conv = ucnv_open("UTF-8", &err);
}
UTF8arShaping::~UTF8arShaping() {
ucnv_close(conv);
}
-char UTF8arShaping::ProcessText(char *text, int maxlen, const SWKey *key, const SWModule *module)
+char UTF8arShaping::processText(SWBuf &text, const SWKey *key, const SWModule *module)
{
UChar *ustr, *ustr2;
+ if ((unsigned long)key < 2) // hack, we're en(1)/de(0)ciphering
+ return -1;
- int32_t len = strlen(text);
+ int32_t len = text.length();
ustr = new UChar[len];
ustr2 = new UChar[len];
// Convert UTF-8 string to UTF-16 (UChars)
- len = ucnv_toUChars(conv, ustr, len, text, -1, &err);
+ len = ucnv_toUChars(conv, ustr, len, text.c_str(), -1, &err);
len = u_shapeArabic(ustr, len, ustr2, len, U_SHAPE_LETTERS_SHAPE | U_SHAPE_DIGITS_EN2AN, &err);
- ucnv_fromUChars(conv, text, maxlen, ustr2, len, &err);
+ text.setSize(text.size()*2);
+ len = ucnv_fromUChars(conv, text.getRawData(), text.size(), ustr2, len, &err);
+ text.setSize(len);
delete [] ustr2;
delete [] ustr;
return 0;
}
+SWORD_NAMESPACE_END
#endif
diff --git a/src/modules/filters/utf8bidireorder.cpp b/src/modules/filters/utf8bidireorder.cpp
index 8fa7280..902047a 100644
--- a/src/modules/filters/utf8bidireorder.cpp
+++ b/src/modules/filters/utf8bidireorder.cpp
@@ -1,13 +1,12 @@
/******************************************************************************
*
-* utf8cnormalizer - SWFilter decendant to perform reordering of UTF-8
+* utf8cnormalizer - SWFilter descendant to perform reordering of UTF-8
* text to visual order according to Unicode BiDi
*/
#ifdef _ICU_
#include <stdlib.h>
-#include <string.h>
#ifdef __GNUC__
#include <unixstr.h>
@@ -15,6 +14,8 @@
#include <utf8bidireorder.h>
+SWORD_NAMESPACE_START
+
UTF8BiDiReorder::UTF8BiDiReorder() {
conv = ucnv_open("UTF-8", &err);
@@ -25,15 +26,17 @@ UTF8BiDiReorder::~UTF8BiDiReorder() {
ucnv_close(conv);
}
-char UTF8BiDiReorder::ProcessText(char *text, int maxlen, const SWKey *key, const SWModule *module)
+char UTF8BiDiReorder::processText(SWBuf &text, const SWKey *key, const SWModule *module)
{
UChar *ustr, *ustr2;
+ if ((unsigned long)key < 2) // hack, we're en(1)/de(0)ciphering
+ return -1;
- int32_t len = strlen(text);
+ int32_t len = text.length();
ustr = new UChar[len]; //each char could become a surrogate pair
// Convert UTF-8 string to UTF-16 (UChars)
- len = ucnv_toUChars(conv, ustr, len, text, -1, &err);
+ len = ucnv_toUChars(conv, ustr, len, text.c_str(), -1, &err);
ustr2 = new UChar[len];
UBiDi* bidi = ubidi_openSized(len + 1, 0, &err);
@@ -45,11 +48,14 @@ char UTF8BiDiReorder::ProcessText(char *text, int maxlen, const SWKey *key, cons
// len = ubidi_writeReverse(ustr, len, ustr2, len,
// UBIDI_DO_MIRRORING | UBIDI_REMOVE_BIDI_CONTROLS, &err);
- ucnv_fromUChars(conv, text, maxlen, ustr2, len, &err);
+ text.setSize(text.size()*2);
+ len = ucnv_fromUChars(conv, text.getRawData(), text.size(), ustr2, len, &err);
+ text.setSize(len);
delete [] ustr2;
delete [] ustr;
return 0;
}
+SWORD_NAMESPACE_END
#endif
diff --git a/src/modules/filters/utf8cantillation.cpp b/src/modules/filters/utf8cantillation.cpp
index 84cb513..6213620 100644
--- a/src/modules/filters/utf8cantillation.cpp
+++ b/src/modules/filters/utf8cantillation.cpp
@@ -1,6 +1,6 @@
/******************************************************************************
*
- * UTF8Cantillation - SWFilter decendant to remove UTF-8 Hebrew cantillation
+ * UTF8Cantillation - SWFilter descendant to remove UTF-8 Hebrew cantillation
*
*/
@@ -9,56 +9,47 @@
#include <stdio.h>
#include <utf8cantillation.h>
+SWORD_NAMESPACE_START
-const char UTF8Cantillation::on[] = "On";
-const char UTF8Cantillation::off[] = "Off";
-const char UTF8Cantillation::optName[] = "Hebrew Cantillation";
-const char UTF8Cantillation::optTip[] = "Toggles Hebrew Cantillation Marks";
+const char oName[] = "Hebrew Cantillation";
+const char oTip[] = "Toggles Hebrew Cantillation Marks";
-UTF8Cantillation::UTF8Cantillation() {
- option = false;
- options.push_back(on);
- options.push_back(off);
+const SWBuf choices[3] = {"On", "Off", ""};
+const StringList oValues(&choices[0], &choices[2]);
+
+UTF8Cantillation::UTF8Cantillation() : SWOptionFilter(oName, oTip, &oValues) {
+ setOptionValue("Off");
}
-UTF8Cantillation::~UTF8Cantillation(){};
-void UTF8Cantillation::setOptionValue(const char *ival)
-{
- option = (!stricmp(ival, on));
-}
+UTF8Cantillation::~UTF8Cantillation(){};
-const char *UTF8Cantillation::getOptionValue()
-{
- return (option) ? on:off;
-}
-char UTF8Cantillation::ProcessText(char *text, int maxlen, const SWKey *key, const SWModule *module)
-{
+char UTF8Cantillation::processText(SWBuf &text, const SWKey *key, const SWModule *module) {
if (!option) {
- unsigned char *to, *from;
- to = (unsigned char*)text;
- //The UTF-8 range 0xD6 0x90 to 0xD6 0xAF and 0xD7 0x84 consist of Hebrew cantillation marks so block those out.
- for (from = (unsigned char*)text; *from; from++) {
- if (*from != 0xD6) {
- if (*from == 0xD7 && *(from + 1) == 0x84) {
- from++;
- }
- else {
- *to++ = *from;
- }
- }
- else if (*(from + 1) < 0x90 || *(from + 1) > 0xAF) {
- *to++ = *from;
- from++;
- *to++ = *from;
- }
- else {
- from++;
- }
+ //The UTF-8 range 0xD6 0x90 to 0xD6 0xAF and 0xD7 0x84 consist of Hebrew cantillation marks so block those out.
+ SWBuf orig = text;
+ const unsigned char* from = (unsigned char*)orig.c_str();
+ for (text = ""; *from; from++) {
+ if (*from != 0xD6) {
+ if (*from == 0xD7 && *(from + 1) == 0x84) {
+ from++;
+ }
+ else {
+ text += *from;
+ }
+ }
+ else if (*(from + 1) < 0x90 || *(from + 1) > 0xAF) {
+ text += *from;
+ from++;
+ text += *from;
+ }
+ else {
+ from++;
+ }
+ }
}
- *to++ = 0;
- *to = 0;
- }
return 0;
}
+
+SWORD_NAMESPACE_END
diff --git a/src/modules/filters/utf8greekaccents.cpp b/src/modules/filters/utf8greekaccents.cpp
index b0e5dc8..05ef59b 100644
--- a/src/modules/filters/utf8greekaccents.cpp
+++ b/src/modules/filters/utf8greekaccents.cpp
@@ -1,6 +1,6 @@
/******************************************************************************
*
- * UTF8GreekAccents - SWFilter decendant to remove UTF-8 Greek accents
+ * UTF8GreekAccents - SWFilter descendant to remove UTF-8 Greek accents
*
*/
@@ -9,239 +9,235 @@
#include <stdio.h>
#include <utf8greekaccents.h>
+SWORD_NAMESPACE_START
-const char UTF8GreekAccents::on[] = "On";
-const char UTF8GreekAccents::off[] = "Off";
-const char UTF8GreekAccents::optName[] = "Greek Accents";
-const char UTF8GreekAccents::optTip[] = "Toggles Greek Accents";
+const char oName[] = "Greek Accents";
+const char oTip[] = "Toggles Greek Accents";
-UTF8GreekAccents::UTF8GreekAccents() {
- option = true;
- options.push_back(on);
- options.push_back(off);
+const SWBuf choices[3] = {"On", "Off", ""};
+const StringList oValues(&choices[0], &choices[2]);
+
+UTF8GreekAccents::UTF8GreekAccents() : SWOptionFilter(oName, oTip, &oValues) {
+ setOptionValue("On");
}
UTF8GreekAccents::~UTF8GreekAccents(){};
-void UTF8GreekAccents::setOptionValue(const char *ival)
-{
- option = (!stricmp(ival, on));
-}
-
-const char *UTF8GreekAccents::getOptionValue()
-{
- return (option) ? on:off;
-}
-char UTF8GreekAccents::ProcessText(char *text, int maxlen, const SWKey *key, const SWModule *module)
-{
- if (!option) {
- unsigned char *to, *from;
-
- to = (unsigned char*)text;
- for (from = (unsigned char*)text; *from; from++) {
- //first just remove combining characters
- if (*from == 0xE2 && *(from + 1) == 0x80 && *(from + 2) == 0x99)
- from += 2;
- else if (*from == 0xCC && *(from + 1)) {
- if (*(from + 1) == 0x80 || *(from + 1) == 0x81 || *(from + 1) == 0x82 || *(from + 1) == 0x88 || *(from + 1) == 0x93 || *(from + 1) == 0x94)
- from++;
- }
- else if (*from == 0xCD && *(from + 1) == 0xBA)
- from++;
- //now converted pre-composed characters to their alphabetic bases, discarding the accents
-
- //Greek
- //capital alpha
- else if ((*from == 0xCE && *(from + 1) == 0x86)) {
- *to++ = 0xCE;
- *to++ = 0x91;
- from++;
- }
- //capital epsilon
- else if ((*from == 0xCE && *(from + 1) == 0x88)) {
- *to++ = 0xCE;
- *to++ = 0x95;
- from++;
- }
- //capital eta
- else if ((*from == 0xCE && *(from + 1) == 0x89)) {
- *to++ = 0xCE;
- *to++ = 0x97;
- from++;
- }
- //capital iota
- else if ((*from == 0xCE && (*(from + 1) == 0x8A || *(from + 1) == 0xAA))) {
- *to++ = 0xCE;
- *to++ = 0x99;
- from++;
- }
- //capital omicron
- else if ((*from == 0xCE && *(from + 1) == 0x8C)) {
- *to++ = 0xCE;
- *to++ = 0x9F;
- from++;
- }
- //capital upsilon
- else if ((*from == 0xCE && (*(from + 1) == 0x8E || *(from + 1) == 0xAB))) {
- *to++ = 0xCE;
- *to++ = 0xA5;
- from++;
- }
- //capital omega
- else if ((*from == 0xCE && *(from + 1) == 0x8F)) {
- *to++ = 0xCE;
- *to++ = 0xA9;
- from++;
- }
-
- //alpha
- else if ((*from == 0xCE && *(from + 1) == 0xAC)) {
- *to++ = 0xCE;
- *to++ = 0xB1;
- from++;
- }
- //epsilon
- else if ((*from == 0xCE && *(from + 1) == 0xAD)) {
- *to++ = 0xCE;
- *to++ = 0xB5;
- from++;
- }
- //eta
- else if ((*from == 0xCE && *(from + 1) == 0xAE)) {
- *to++ = 0xCE;
- *to++ = 0xB7;
- from++;
- }
- //iota
- else if ((*from == 0xCE && *(from + 1) == 0xAF) || (*from == 0xCF && *(from + 1) == 0x8A)) {
- *to++ = 0xCE;
- *to++ = 0xB9;
- from++;
- }
- //omicron
- else if ((*from == 0xCF && *(from + 1) == 0x8C)) {
- *to++ = 0xCE;
- *to++ = 0xBF;
- from++;
- }
- //upsilon
- else if ((*from == 0xCE && *(from + 1) == 0x88) || (*from == 0xCF && (*(from + 1) == 0x8B || *(from + 1) == 0x8D))) {
- *to++ = 0xCF;
- *to++ = 0x85;
- from++;
- }
- //omega
- else if ((*from == 0xCF && *(from + 1) == 0x8E)) {
- *to++ = 0xCF;
- *to++ = 0x89;
- from++;
- }
-
- //Extended Greek
- //capital alpha
- else if (*from == 0xE1 && ((*(from + 1) == 0xBC || *(from + 1) == 0xBE) && *(from + 2) >= 0x88 && *(from + 2) <= 0x8F) || (*(from + 1) == 0xBE && *(from + 2) >= 0xB8 && *(from + 2) <= 0xBC)) {
- *to++ = 0xCE;
- *to++ = 0x91;
- from+=2;
- }
- //capital epsilon
- else if (*from == 0xE1 && ((*(from + 1) == 0xBC && *(from + 2) >= 0x98 && *(from + 2) <= 0x9D) || (*(from + 1) == 0xBF && (*(from + 2) == 0x88 || *(from + 2) == 0x89)))) {
- *to++ = 0xCE;
- *to++ = 0x95;
- from+=2;
- }
- //capital eta
- else if (*from == 0xE1 && ((*(from + 1) == 0xBC && *(from + 2) >= 0xA8 && *(from + 2) <= 0xAF) || (*(from + 1) == 0xBE && *(from + 2) >= 0x98 && *(from + 2) <= 0x9F) || (*(from + 1) == 0xBF && *(from + 2) >= 0x8A && *(from + 2) <= 0x8C))) {
- *to++ = 0xCE;
- *to++ = 0x97;
- from+=2;
- }
- //capital iota
- else if (*from == 0xE1 && ((*(from + 1) == 0xBC && *(from + 2) >= 0xB8 && *(from + 2) <= 0xBF) || (*(from + 1) == 0xBF && *(from + 2) >= 0x98 && *(from + 2) <= 0x9B))) {
- *to++ = 0xCE;
- *to++ = 0x99;
- from+=2;
- }
- //capital omicron
- else if (*from == 0xE1 && ((*(from + 1) == 0xBD && *(from + 2) >= 0x88 && *(from + 2) <= 0x8D) || (*(from + 1) == 0xBF && *(from + 2) == 0xB8 || *(from + 2) == 0xB9))) {
- *to++ = 0xCE;
- *to++ = 0x9F;
- from+=2;
- }
- //capital upsilon
- else if (*from == 0xE1 && ((*(from + 1) == 0xBD && *(from + 2) >= 0x99 && *(from + 2) <= 0x9F) || (*(from + 1) == 0xBF && *(from + 2) >= 0xA8 && *(from + 2) <= 0xAB))) {
- *to++ = 0xCE;
- *to++ = 0xA5;
- from+=2;
- }
- //capital omega
- else if (*from == 0xE1 && (((*(from + 1) == 0xBD || *(from + 1) == 0xBE) && *(from + 2) >= 0xA8 && *(from + 2) <= 0xAF) || (*(from + 1) == 0xBF && *(from + 2) >= 0xBA && *(from + 2) <= 0xBC))) {
- *to++ = 0xCE;
- *to++ = 0xA9;
- from+=2;
- }
- //capital rho
- else if (*from == 0xE1 && *(from + 1) == 0xBF && *(from + 2) == 0xAC) {
- *to++ = 0xCE;
- *to++ = 0xA1;
- from+=2;
- }
-
- //alpha
- else if (*from == 0xE1 && ((*(from + 1) == 0xBC || *(from + 1) == 0xBE) && *(from + 2) >= 0x80 && *(from + 2) <= 0x87) || (*(from + 1) == 0xBD && (*(from + 2) == 0xB0 || *(from + 2) == 0xB1)) || (*(from + 1) == 0xBE && *(from + 2) >= 0xB0 && *(from + 2) <= 0xB7)) {
- *to++ = 0xCE;
- *to++ = 0xB1;
- from+=2;
- }
- //epsilon
- else if (*from == 0xE1 && ((*(from + 1) == 0xBC && *(from + 2) >= 0x90 && *(from + 2) <= 0x95) || (*(from + 1) == 0xBD && (*(from + 2) == 0xB2 || *(from + 2) == 0xB3)))) {
- *to++ = 0xCE;
- *to++ = 0xB5;
- from+=2;
- }
- //eta
- else if (*from == 0xE1 && ((*(from + 1) == 0xBE && *(from + 2) >= 0x90 && *(from + 2) <= 0x97) || (*(from + 1) == 0xBC && *(from + 2) >= 0xA0 && *(from + 2) <= 0xA7) || (*(from + 1) == 0xBF && *(from + 2) >= 0x82 && *(from + 2) <= 0x87) || (*(from + 1) == 0xBD && (*(from + 2) == 0xB4 || *(from + 2) == 0xB5)))) {
- *to++ = 0xCE;
- *to++ = 0xB7;
- from+=2;
- }
- //iota
- else if (*from == 0xE1 && ((*(from + 1) == 0xBC && *(from + 2) >= 0xB0 && *(from + 2) <= 0xB7) || (*(from + 1) == 0xBD && (*(from + 2) == 0xB6 || *(from + 2) == 0xB7)) || (*(from + 1) == 0xBF && *(from + 2) >= 0x90 && *(from + 2) <= 0x97))) {
- *to++ = 0xCE;
- *to++ = 0xB9;
- from+=2;
- }
- //omicron
- else if (*from == 0xE1 && (*(from + 1) == 0xBD && ((*(from + 2) >= 0x80 && *(from + 2) <= 0x85) || (*(from + 2) == 0xB8 || *(from + 2) == 0xB9)))) {
- *to++ = 0xCE;
- *to++ = 0xBF;
- from+=2;
- }
- //upsilon
- else if (*from == 0xE1 && ((*(from + 1) == 0xBD && ((*(from + 2) >= 0x90 && *(from + 2) <= 0x97) || *(from + 2) == 0xBA || *(from + 2) == 0xBB)) || (*(from + 1) == 0xBF && ((*(from + 2) >= 0xA0 && *(from + 2) <= 0xA3) || *(from + 2) == 0xA6 || *(from + 2) == 0xA7)))) {
- *to++ = 0xCF;
- *to++ = 0x85;
- from+=2;
- }
- //omega
- else if (*from == 0xE1 && ((*(from + 1) == 0xBD && ((*(from + 2) >= 0xA0 && *(from + 2) <= 0xA7) || (*(from + 2) == 0xBC || *(from + 2) == 0xBD))) || (*(from + 1) == 0xBE && (*(from + 2) >= 0xA0 && *(from + 2) <= 0xA7)) || (*(from + 1) == 0xBF && *(from + 2) >= 0xB2 && *(from + 2) <= 0xB7))) {
- *to++ = 0xCF;
- *to++ = 0x89;
- from+=2;
- }
- //rho
- else if (*from == 0xE1 && *(from + 1) == 0xBF && (*(from + 2) == 0xA4 && *(from + 2) == 0xA5)) {
- *to++ = 0xCF;
- *to++ = 0x81;
- from+=2;
- }
- else
- *to++ = *from;
+char UTF8GreekAccents::processText(SWBuf &text, const SWKey *key, const SWModule *module) {
+
+ if (!option) { //we don't want greek accents
+ //unsigned char *to, *from;
+ //to = (unsigned char*)text;
+ //for (from = (unsigned char*)text; *from; from++) {
+
+ SWBuf orig = text;
+ const unsigned char* from = (unsigned char*)orig.c_str();
+ for (text = ""; *from; from++) {
+ //first just remove combining characters
+ if (*from == 0xE2 && *(from + 1) == 0x80 && *(from + 2) == 0x99) {
+ from += 2;
+ }
+ else if (*from == 0xCC && *(from + 1)) {
+ if (*(from + 1) == 0x80 || *(from + 1) == 0x81 || *(from + 1) == 0x82 || *(from + 1) == 0x88 || *(from + 1) == 0x93 || *(from + 1) == 0x94) {
+ from++;
+ }
+ }
+ else if (*from == 0xCD && *(from + 1) == 0xBA) {
+ from++;
+ }
+ //now converted pre-composed characters to their alphabetic bases, discarding the accents
+
+ //Greek
+ //capital alpha
+ else if ((*from == 0xCE && *(from + 1) == 0x86)) {
+ text += 0xCE;
+ text += 0x91;
+ from++;
+ }
+ //capital epsilon
+ else if ((*from == 0xCE && *(from + 1) == 0x88)) {
+ text += 0xCE;
+ text += 0x95;
+ from++;
+ }
+ //capital eta
+ else if ((*from == 0xCE && *(from + 1) == 0x89)) {
+ text += 0xCE;
+ text += 0x97;
+ from++;
+ }
+ //capital iota
+ else if ((*from == 0xCE && (*(from + 1) == 0x8A || *(from + 1) == 0xAA))) {
+ text += 0xCE;
+ text += 0x99;
+ from++;
+ }
+ //capital omicron
+ else if ((*from == 0xCE && *(from + 1) == 0x8C)) {
+ text += 0xCE;
+ text += 0x9F;
+ from++;
+ }
+ //capital upsilon
+ else if ((*from == 0xCE && (*(from + 1) == 0x8E || *(from + 1) == 0xAB))) {
+ text += 0xCE;
+ text += 0xA5;
+ from++;
+ }
+ //capital omega
+ else if ((*from == 0xCE && *(from + 1) == 0x8F)) {
+ text += 0xCE;
+ text += 0xA9;
+ from++;
+ }
+
+ //alpha
+ else if ((*from == 0xCE && *(from + 1) == 0xAC)) {
+ text += 0xCE;
+ text += 0xB1;
+ from++;
+ }
+ //epsilon
+ else if ((*from == 0xCE && *(from + 1) == 0xAD)) {
+ text += 0xCE;
+ text += 0xB5;
+ from++;
+ }
+ //eta
+ else if ((*from == 0xCE && *(from + 1) == 0xAE)) {
+ text += 0xCE;
+ text += 0xB7;
+ from++;
+ }
+ //iota
+ else if ((*from == 0xCE && *(from + 1) == 0xAF) || (*from == 0xCF && *(from + 1) == 0x8A)) {
+ text += 0xCE;
+ text += 0xB9;
+ from++;
+ }
+ //omicron
+ else if ((*from == 0xCF && *(from + 1) == 0x8C)) {
+ text += 0xCE;
+ text += 0xBF;
+ from++;
+ }
+ //upsilon
+ else if ((*from == 0xCE && *(from + 1) == 0x88) || (*from == 0xCF && (*(from + 1) == 0x8B || *(from + 1) == 0x8D))) {
+ text += 0xCF;
+ text += 0x85;
+ from++;
+ }
+ //omega
+ else if ((*from == 0xCF && *(from + 1) == 0x8E)) {
+ text += 0xCF;
+ text += 0x89;
+ from++;
+ }
+
+ //Extended Greek
+ //capital alpha
+ else if (*from == 0xE1 && ((*(from + 1) == 0xBC || *(from + 1) == 0xBE) && *(from + 2) >= 0x88 && *(from + 2) <= 0x8F) || (*(from + 1) == 0xBE && *(from + 2) >= 0xB8 && *(from + 2) <= 0xBC)) {
+ text += 0xCE;
+ text += 0x91;
+ from+=2;
+ }
+ //capital epsilon
+ else if (*from == 0xE1 && ((*(from + 1) == 0xBC && *(from + 2) >= 0x98 && *(from + 2) <= 0x9D) || (*(from + 1) == 0xBF && (*(from + 2) == 0x88 || *(from + 2) == 0x89)))) {
+ text += 0xCE;
+ text += 0x95;
+ from+=2;
+ }
+ //capital eta
+ else if (*from == 0xE1 && ((*(from + 1) == 0xBC && *(from + 2) >= 0xA8 && *(from + 2) <= 0xAF) || (*(from + 1) == 0xBE && *(from + 2) >= 0x98 && *(from + 2) <= 0x9F) || (*(from + 1) == 0xBF && *(from + 2) >= 0x8A && *(from + 2) <= 0x8C))) {
+ text += 0xCE;
+ text += 0x97;
+ from+=2;
+ }
+ //capital iota
+ else if (*from == 0xE1 && ((*(from + 1) == 0xBC && *(from + 2) >= 0xB8 && *(from + 2) <= 0xBF) || (*(from + 1) == 0xBF && *(from + 2) >= 0x98 && *(from + 2) <= 0x9B))) {
+ text += 0xCE;
+ text += 0x99;
+ from+=2;
+ }
+ //capital omicron
+ else if (*from == 0xE1 && ((*(from + 1) == 0xBD && *(from + 2) >= 0x88 && *(from + 2) <= 0x8D) || (*(from + 1) == 0xBF && *(from + 2) == 0xB8 || *(from + 2) == 0xB9))) {
+ text += 0xCE;
+ text += 0x9F;
+ from+=2;
+ }
+ //capital upsilon
+ else if (*from == 0xE1 && ((*(from + 1) == 0xBD && *(from + 2) >= 0x99 && *(from + 2) <= 0x9F) || (*(from + 1) == 0xBF && *(from + 2) >= 0xA8 && *(from + 2) <= 0xAB))) {
+ text += 0xCE;
+ text += 0xA5;
+ from+=2;
+ }
+ //capital omega
+ else if (*from == 0xE1 && (((*(from + 1) == 0xBD || *(from + 1) == 0xBE) && *(from + 2) >= 0xA8 && *(from + 2) <= 0xAF) || (*(from + 1) == 0xBF && *(from + 2) >= 0xBA && *(from + 2) <= 0xBC))) {
+ text += 0xCE;
+ text += 0xA9;
+ from+=2;
+ }
+ //capital rho
+ else if (*from == 0xE1 && *(from + 1) == 0xBF && *(from + 2) == 0xAC) {
+ text += 0xCE;
+ text += 0xA1;
+ from+=2;
+ }
+
+ //alpha
+ else if (*from == 0xE1 && ((*(from + 1) == 0xBC || *(from + 1) == 0xBE) && *(from + 2) >= 0x80 && *(from + 2) <= 0x87) || (*(from + 1) == 0xBD && (*(from + 2) == 0xB0 || *(from + 2) == 0xB1)) || (*(from + 1) == 0xBE && *(from + 2) >= 0xB0 && *(from + 2) <= 0xB7)) {
+ text += 0xCE;
+ text += 0xB1;
+ from+=2;
+ }
+ //epsilon
+ else if (*from == 0xE1 && ((*(from + 1) == 0xBC && *(from + 2) >= 0x90 && *(from + 2) <= 0x95) || (*(from + 1) == 0xBD && (*(from + 2) == 0xB2 || *(from + 2) == 0xB3)))) {
+ text += 0xCE;
+ text += 0xB5;
+ from+=2;
+ }
+ //eta
+ else if (*from == 0xE1 && ((*(from + 1) == 0xBE && *(from + 2) >= 0x90 && *(from + 2) <= 0x97) || (*(from + 1) == 0xBC && *(from + 2) >= 0xA0 && *(from + 2) <= 0xA7) || (*(from + 1) == 0xBF && *(from + 2) >= 0x82 && *(from + 2) <= 0x87) || (*(from + 1) == 0xBD && (*(from + 2) == 0xB4 || *(from + 2) == 0xB5)))) {
+ text += 0xCE;
+ text += 0xB7;
+ from+=2;
+ }
+ //iota
+ else if (*from == 0xE1 && ((*(from + 1) == 0xBC && *(from + 2) >= 0xB0 && *(from + 2) <= 0xB7) || (*(from + 1) == 0xBD && (*(from + 2) == 0xB6 || *(from + 2) == 0xB7)) || (*(from + 1) == 0xBF && *(from + 2) >= 0x90 && *(from + 2) <= 0x97))) {
+ text += 0xCE;
+ text += 0xB9;
+ from+=2;
+ }
+ //omicron
+ else if (*from == 0xE1 && (*(from + 1) == 0xBD && ((*(from + 2) >= 0x80 && *(from + 2) <= 0x85) || (*(from + 2) == 0xB8 || *(from + 2) == 0xB9)))) {
+ text += 0xCE;
+ text += 0xBF;
+ from+=2;
+ }
+ //upsilon
+ else if (*from == 0xE1 && ((*(from + 1) == 0xBD && ((*(from + 2) >= 0x90 && *(from + 2) <= 0x97) || *(from + 2) == 0xBA || *(from + 2) == 0xBB)) || (*(from + 1) == 0xBF && ((*(from + 2) >= 0xA0 && *(from + 2) <= 0xA3) || *(from + 2) == 0xA6 || *(from + 2) == 0xA7)))) {
+ text += 0xCF;
+ text += 0x85;
+ from+=2;
+ }
+ //omega
+ else if (*from == 0xE1 && ((*(from + 1) == 0xBD && ((*(from + 2) >= 0xA0 && *(from + 2) <= 0xA7) || (*(from + 2) == 0xBC || *(from + 2) == 0xBD))) || (*(from + 1) == 0xBE && (*(from + 2) >= 0xA0 && *(from + 2) <= 0xA7)) || (*(from + 1) == 0xBF && *(from + 2) >= 0xB2 && *(from + 2) <= 0xB7))) {
+ text += 0xCF;
+ text += 0x89;
+ from+=2;
+ }
+ //rho
+ else if (*from == 0xE1 && *(from + 1) == 0xBF && (*(from + 2) == 0xA4 && *(from + 2) == 0xA5)) {
+ text += 0xCF;
+ text += 0x81;
+ from+=2;
+ }
+ else { //no characters we filter
+ text += *from;
+ }
+ }
}
- *to++ = 0;
- *to = 0;
- }
return 0;
}
@@ -250,3 +246,4 @@ char UTF8GreekAccents::ProcessText(char *text, int maxlen, const SWKey *key, con
+SWORD_NAMESPACE_END
diff --git a/src/modules/filters/utf8hebrewpoints.cpp b/src/modules/filters/utf8hebrewpoints.cpp
index e5b50e1..0476db8 100644
--- a/src/modules/filters/utf8hebrewpoints.cpp
+++ b/src/modules/filters/utf8hebrewpoints.cpp
@@ -1,6 +1,6 @@
/******************************************************************************
*
- * UTF8HebrewPoints - SWFilter decendant to remove UTF-8 Hebrew vowel points
+ * UTF8HebrewPoints - SWFilter descendant to remove UTF-8 Hebrew vowel points
*
*/
@@ -9,47 +9,36 @@
#include <stdio.h>
#include <utf8hebrewpoints.h>
+SWORD_NAMESPACE_START
-const char UTF8HebrewPoints::on[] = "On";
-const char UTF8HebrewPoints::off[] = "Off";
-const char UTF8HebrewPoints::optName[] = "Hebrew Vowel Points";
-const char UTF8HebrewPoints::optTip[] = "Toggles Hebrew Vowel Points";
+const char oName[] = "Hebrew Vowel Points";
+const char oTip[] = "Toggles Hebrew Vowel Points";
-UTF8HebrewPoints::UTF8HebrewPoints() {
- option = true;
- options.push_back(on);
- options.push_back(off);
+const SWBuf choices[3] = {"On", "Off", ""};
+const StringList oValues(&choices[0], &choices[2]);
+
+UTF8HebrewPoints::UTF8HebrewPoints() : SWOptionFilter(oName, oTip, &oValues) {
+ setOptionValue("On");
}
UTF8HebrewPoints::~UTF8HebrewPoints(){};
-void UTF8HebrewPoints::setOptionValue(const char *ival)
-{
- option = (!stricmp(ival, on));
-}
-
-const char *UTF8HebrewPoints::getOptionValue()
-{
- return (option) ? on:off;
-}
-char UTF8HebrewPoints::ProcessText(char *text, int maxlen, const SWKey *key, const SWModule *module)
-{
+char UTF8HebrewPoints::processText(SWBuf &text, const SWKey *key, const SWModule *module) {
if (!option) {
- unsigned char *to, *from;
-
- to = (unsigned char*)text;
//The UTF-8 range 0xD6 0xB0 to 0xD6 0xBF excluding 0xD6 0x consist of Hebrew cantillation marks so block those out.
- for (from = (unsigned char*)text; *from; from++) {
+ SWBuf orig = text;
+ const unsigned char* from = (unsigned char*)orig.c_str();
+ for (text = ""; *from; from++) {
if ((*from == 0xD6) && (*(from + 1) >= 0xB0 && *(from + 1) <= 0xBF) && (*(from + 1) != 0xBE)) {
from++;
}
else {
- *to++ = *from;
+ text += *from;
}
}
- *to++ = 0;
- *to = 0;
- }
+ }
return 0;
}
+
+SWORD_NAMESPACE_END
diff --git a/src/modules/filters/utf8html.cpp b/src/modules/filters/utf8html.cpp
index 7487815..94fbdc1 100644
--- a/src/modules/filters/utf8html.cpp
+++ b/src/modules/filters/utf8html.cpp
@@ -1,6 +1,6 @@
/******************************************************************************
*
- * utf8html - SWFilter decendant to convert a UTF-8 stream to HTML escapes
+ * utf8html - SWFilter descendant to convert a UTF-8 stream to HTML escapes
*
*/
@@ -9,29 +9,32 @@
#include <stdio.h>
#include <utf8html.h>
+SWORD_NAMESPACE_START
+
UTF8HTML::UTF8HTML() {
}
-char UTF8HTML::ProcessText(char *text, int maxlen, const SWKey *key, const SWModule *module)
+char UTF8HTML::processText(SWBuf &text, const SWKey *key, const SWModule *module)
{
- unsigned char *to, *from;
+ unsigned char *from;
int len;
char digit[10];
unsigned long ch;
+ if ((unsigned long)key < 2) // hack, we're en(1)/de(0)ciphering
+ return -1;
+
+ len = strlenw(text.c_str()) + 2; // shift string to right of buffer
+
+ SWBuf orig = text;
+ from = (unsigned char *)orig.c_str();
- len = strlenw(text) + 2; // shift string to right of buffer
- if (len < maxlen) {
- memmove(&text[maxlen - len], text, len);
- from = (unsigned char*)&text[maxlen - len];
- }
- else from = (unsigned char*)text;
// -------------------------------
- for (to = (unsigned char*)text; *from; from++) {
+ for (text = ""; *from; from++) {
ch = 0;
if ((*from & 128) != 128) {
// if (*from != ' ')
- *to++ = *from;
+ text += *from;
continue;
}
if ((*from & 128) && ((*from & 64) != 64)) {
@@ -53,14 +56,14 @@ char UTF8HTML::ProcessText(char *text, int maxlen, const SWKey *key, const SWMod
ch |= (((short)*from) << (((6*subsequent)+significantFirstBits)-8));
from += subsequent;
- *to++ = '&';
- *to++ = '#';
+ text += '&';
+ text += '#';
sprintf(digit, "%d", ch);
for (char *dig = digit; *dig; dig++)
- *to++ = *dig;
- *to++ = ';';
+ text += *dig;
+ text += ';';
}
- *to++ = 0;
- *to = 0;
return 0;
}
+
+SWORD_NAMESPACE_END
diff --git a/src/modules/filters/utf8latin1.cpp b/src/modules/filters/utf8latin1.cpp
index 6cc1acd..6ab4c9c 100644
--- a/src/modules/filters/utf8latin1.cpp
+++ b/src/modules/filters/utf8latin1.cpp
@@ -1,6 +1,6 @@
/******************************************************************************
*
- * UTF8Latin1 - SWFilter decendant to convert UTF-8 to Latin-1
+ * UTF8Latin1 - SWFilter descendant to convert UTF-8 to Latin-1
*
*/
@@ -9,31 +9,32 @@
#include <utf8latin1.h>
+SWORD_NAMESPACE_START
+
UTF8Latin1::UTF8Latin1(char rchar) : replacementChar(rchar) {
}
-char UTF8Latin1::ProcessText(char *text, int maxlen, const SWKey *key, const SWModule *module)
+char UTF8Latin1::processText(SWBuf &text, const SWKey *key, const SWModule *module)
{
unsigned char *from;
- unsigned short *to;
int len;
unsigned long uchar;
unsigned char significantFirstBits, subsequent;
- len = strlen(text) + 1; // shift string to right of buffer
- if (len < maxlen) {
- memmove(&text[maxlen - len], text, len);
- from = (unsigned char*)&text[maxlen - len];
+ if ((unsigned long)key < 2) {// hack, we're en(1)/de(0)ciphering
+ return -1;
}
- else
- from = (unsigned char*)text;
-
+ len = strlen(text.c_str()) + 1; // shift string to right of buffer
+
+ SWBuf orig = text;
+ from = (unsigned char*)orig.c_str();
+
// -------------------------------
- for (to = (unsigned short*)text; *from; from++) {
+ for (text = ""; *from; from++) {
uchar = 0;
if ((*from & 128) != 128) {
// if (*from != ' ')
@@ -60,15 +61,14 @@ char UTF8Latin1::ProcessText(char *text, int maxlen, const SWKey *key, const SWM
}
if (uchar < 0xff) {
- *to++ = (unsigned char)uchar;
+ text += (unsigned char)uchar;
}
else {
- *to++ = replacementChar;
+ text += replacementChar;
}
}
- *to++ = 0;
- *to = 0;
-
return 0;
}
+SWORD_NAMESPACE_END
+
diff --git a/src/modules/filters/utf8nfc.cpp b/src/modules/filters/utf8nfc.cpp
index df9e090..5500224 100644
--- a/src/modules/filters/utf8nfc.cpp
+++ b/src/modules/filters/utf8nfc.cpp
@@ -1,13 +1,12 @@
/******************************************************************************
*
-* utf8nfc - SWFilter decendant to perform NFC (canonical composition
+* utf8nfc - SWFilter descendant to perform NFC (canonical composition
* normalization) on UTF-8 text
*/
#ifdef _ICU_
#include <stdlib.h>
-#include <string.h>
#ifdef __GNUC__
#include <unixstr.h>
@@ -15,6 +14,8 @@
#include <utf8nfc.h>
+SWORD_NAMESPACE_START
+
UTF8NFC::UTF8NFC() {
conv = ucnv_open("UTF-8", &err);
}
@@ -23,19 +24,24 @@ UTF8NFC::~UTF8NFC() {
ucnv_close(conv);
}
-char UTF8NFC::ProcessText(char *text, int maxlen, const SWKey *key, const SWModule *module)
+char UTF8NFC::processText(SWBuf &text, const SWKey *key, const SWModule *module)
{
- int32_t len = strlen(text) * 2;
+ if ((unsigned long)key < 2) // hack, we're en(1)/de(0)ciphering
+ return -1;
+
+ int32_t len = text.length() * 2;
source = new UChar[len + 1]; //each char could become a surrogate pair
// Convert UTF-8 string to UTF-16 (UChars)
- len = ucnv_toUChars(conv, source, len, text, -1, &err);
+ len = ucnv_toUChars(conv, source, len, text.c_str(), -1, &err);
target = new UChar[len + 1];
//canonical composition
unorm_normalize(source, len, UNORM_NFC, 0, target, len, &err);
- ucnv_fromUChars(conv, text, maxlen, target, -1, &err);
+ text.setSize(text.size()*2);
+ len = ucnv_fromUChars(conv, text.getRawData(), text.size(), target, -1, &err);
+ text.setSize(len);
delete [] source;
delete [] target;
@@ -43,4 +49,5 @@ char UTF8NFC::ProcessText(char *text, int maxlen, const SWKey *key, const SWModu
return 0;
}
+SWORD_NAMESPACE_END
#endif
diff --git a/src/modules/filters/utf8nfkd.cpp b/src/modules/filters/utf8nfkd.cpp
index 450cbbf..6da24f8 100644
--- a/src/modules/filters/utf8nfkd.cpp
+++ b/src/modules/filters/utf8nfkd.cpp
@@ -1,13 +1,12 @@
/******************************************************************************
*
-* utf8nfkd - SWFilter decendant to perform NFKD (compatability decomposition
+* utf8nfkd - SWFilter descendant to perform NFKD (compatability decomposition
* normalization) on UTF-8 text
*/
#ifdef _ICU_
#include <stdlib.h>
-#include <string.h>
#ifdef __GNUC__
#include <unixstr.h>
@@ -15,6 +14,8 @@
#include <utf8nfkd.h>
+SWORD_NAMESPACE_START
+
UTF8NFKD::UTF8NFKD() {
conv = ucnv_open("UTF-8", &err);
}
@@ -23,24 +24,30 @@ UTF8NFKD::~UTF8NFKD() {
ucnv_close(conv);
}
-char UTF8NFKD::ProcessText(char *text, int maxlen, const SWKey *key, const SWModule *module)
+char UTF8NFKD::processText(SWBuf &text, const SWKey *key, const SWModule *module)
{
- int32_t len = strlen(text) * 2;
+ if ((unsigned long)key < 2) // hack, we're en(1)/de(0)ciphering
+ return -1;
+
+ int32_t len = text.length() * 2;
source = new UChar[len + 1]; //each char could become a surrogate pair
// Convert UTF-8 string to UTF-16 (UChars)
- len = ucnv_toUChars(conv, source, len, text, -1, &err);
+ len = ucnv_toUChars(conv, source, len, text.c_str(), -1, &err);
target = new UChar[len + 1];
//compatability decomposition
unorm_normalize(source, len, UNORM_NFKD, 0, target, len, &err);
- ucnv_fromUChars(conv, text, maxlen, target, -1, &err);
+ text.setSize(text.size()*2);
+ len = ucnv_fromUChars(conv, text.getRawData(), text.size(), target, -1, &err);
+ text.setSize(len);
- delete [] source;
- delete [] target;
+ delete [] source;
+ delete [] target;
return 0;
}
+SWORD_NAMESPACE_END
#endif
diff --git a/src/modules/filters/utf8transliterator.cpp b/src/modules/filters/utf8transliterator.cpp
index 7bc068a..b753c0c 100644
--- a/src/modules/filters/utf8transliterator.cpp
+++ b/src/modules/filters/utf8transliterator.cpp
@@ -1,27 +1,43 @@
/******************************************************************************
*
-* utf8transliterators - SWFilter decendant to transliterate between
+* utf8transliterators - SWFilter descendant to transliterate between
* ICU-supported scripts.
*/
#ifdef _ICU_
#include <stdlib.h>
-#include <string.h>
#ifdef __GNUC__
#include <unixstr.h>
#endif
+#include <unicode/ucnv.h>
+#include <unicode/uchar.h>
#include <utf8transliterator.h>
+#ifndef _ICUSWORD_
+#include "unicode/resbund.h"
+#endif
+#include <swlog.h>
+
+SWORD_NAMESPACE_START
+
const char UTF8Transliterator::optionstring[NUMTARGETSCRIPTS][16] = {
"Off",
"Latin",
+ "IPA",
"Basic Latin",
+ "SBL",
+ "TC",
"Beta",
"BGreek",
-/*
+ "SERA",
+ "Hugoye",
+ "UNGEGN",
+ "ISO",
+ "ALA-LC",
+ "BGN-PCGN",
"Greek",
"Hebrew",
"Cyrillic",
@@ -29,7 +45,6 @@ const char UTF8Transliterator::optionstring[NUMTARGETSCRIPTS][16] = {
"Syriac",
"Katakana",
"Hiragana",
- "Jamo",
"Hangul",
"Devanagari",
"Tamil",
@@ -46,19 +61,253 @@ const char UTF8Transliterator::optionstring[NUMTARGETSCRIPTS][16] = {
"Ethiopic",
"Gothic",
"Ugaritic",
- "Coptic"
- */
+ "Coptic",
+ "Meroitic",
+ "Linear B",
+ "Cypriot",
+ "Runic",
+ "Ogham",
+ "Thaana",
+ "Glagolitic",
+ "Tengwar",
+ "Cirth"
};
const char UTF8Transliterator::optName[] = "Transliteration";
const char UTF8Transliterator::optTip[] = "Transliterates between scripts";
+SWTransMap UTF8Transliterator::transMap;
+
+#ifndef _ICUSWORD_
+
+const char UTF8Transliterator::SW_RB_RULE_BASED_IDS[] = "RuleBasedTransliteratorIDs";
+const char UTF8Transliterator::SW_RB_RULE[] = "Rule";
+#ifdef SWICU_DATA
+const char UTF8Transliterator::SW_RESDATA[] = SWICU_DATA;
+#else
+const char UTF8Transliterator::SW_RESDATA[] = "/usr/local/lib/sword/";
+#endif
+
+class SWCharString {
+ public:
+ inline SWCharString(const UnicodeString& str);
+ inline ~SWCharString();
+ inline operator const char*() { return ptr; }
+ private:
+ char buf[128];
+ char* ptr;
+};
+SWCharString::SWCharString(const UnicodeString& str) {
+ // TODO This isn't quite right -- we should probably do
+ // preflighting here to determine the real length.
+ if (str.length() >= (int32_t)sizeof(buf)) {
+ ptr = new char[str.length() + 8];
+ } else {
+ ptr = buf;
+ }
+ str.extract(0, 0x7FFFFFFF, ptr, "");
+}
+
+SWCharString::~SWCharString() {
+ if (ptr != buf) {
+ delete[] ptr;
+ }
+}
+
+#endif // _ICUSWORD_
+
+
UTF8Transliterator::UTF8Transliterator() {
option = 0;
unsigned long i;
for (i = 0; i < NUMTARGETSCRIPTS; i++) {
options.push_back(optionstring[i]);
}
+#ifndef _ICUSWORD_
+ utf8status = U_ZERO_ERROR;
+ Load(utf8status);
+#endif
+}
+
+void UTF8Transliterator::Load(UErrorCode &status)
+{
+#ifndef _ICUSWORD_
+ static const char translit_swordindex[] = "translit_swordindex";
+
+ UResourceBundle *bundle, *transIDs, *colBund;
+ bundle = ures_openDirect(SW_RESDATA, translit_swordindex, &status);
+ if (U_FAILURE(status)) {
+ SWLog::systemlog->LogError("no resource index to load");
+ SWLog::systemlog->LogError("status %s", u_errorName(status));
+ return;
+ }
+
+ transIDs = ures_getByKey(bundle, SW_RB_RULE_BASED_IDS, 0, &status);
+ UParseError parseError;
+
+ int32_t row, maxRows;
+ if (U_SUCCESS(status)) {
+ maxRows = ures_getSize(transIDs);
+ for (row = 0; row < maxRows; row++) {
+ colBund = ures_getByIndex(transIDs, row, 0, &status);
+
+ if (U_SUCCESS(status) && ures_getSize(colBund) == 4) {
+ UnicodeString id = ures_getUnicodeStringByIndex(colBund, 0, &status);
+ UChar type = ures_getUnicodeStringByIndex(colBund, 1, &status).charAt(0);
+ UnicodeString resString = ures_getUnicodeStringByIndex(colBund, 2, &status);
+ SWLog::systemlog->LogInformation("ok so far");
+
+ if (U_SUCCESS(status)) {
+ switch (type) {
+ case 0x66: // 'f'
+ case 0x69: // 'i'
+ // 'file' or 'internal';
+ // row[2]=resource, row[3]=direction
+ {
+ UBool visible = (type == 0x0066 /*f*/);
+ UTransDirection dir =
+ (ures_getUnicodeStringByIndex(colBund, 3, &status).charAt(0) ==
+ 0x0046 /*F*/) ?
+ UTRANS_FORWARD : UTRANS_REVERSE;
+ //registry->put(id, resString, dir, visible);
+ SWLog::systemlog->LogInformation("instantiating %s ...", resString.getBuffer());
+ registerTrans(id, resString, dir, status);
+ SWLog::systemlog->LogInformation("done.");
+ }
+ break;
+ case 0x61: // 'a'
+ // 'alias'; row[2]=createInstance argument
+ //registry->put(id, resString, TRUE);
+ break;
+ }
+ }
+ else SWLog::systemlog->LogError("Failed to get resString");
+ }
+ else SWLog::systemlog->LogError("Failed to get row");
+ ures_close(colBund);
+ }
+ }
+ else
+ {
+ SWLog::systemlog->LogError("no resource index to load");
+ SWLog::systemlog->LogError("status %s", u_errorName(status));
+ }
+
+ ures_close(transIDs);
+ ures_close(bundle);
+
+#endif // _ICUSWORD_
+}
+
+void UTF8Transliterator::registerTrans(const UnicodeString& ID, const UnicodeString& resource,
+ UTransDirection dir, UErrorCode &status )
+{
+#ifndef _ICUSWORD_
+ SWLog::systemlog->LogInformation("registering ID locally %s", ID.getBuffer());
+ SWTransData swstuff;
+ swstuff.resource = resource;
+ swstuff.dir = dir;
+ SWTransPair swpair;
+ swpair.first = ID;
+ swpair.second = swstuff;
+ transMap.insert(swpair);
+#endif
+}
+
+bool UTF8Transliterator::checkTrans(const UnicodeString& ID, UErrorCode &status )
+{
+#ifndef _ICUSWORD_
+ Transliterator *trans = Transliterator::createInstance(ID, UTRANS_FORWARD, status);
+ if (!U_FAILURE(status))
+ {
+ // already have it, clean up and return true
+ SWLog::systemlog->LogInformation("already have it %s", ID.getBuffer());
+ delete trans;
+ return true;
+ }
+ status = U_ZERO_ERROR;
+
+ SWTransMap::iterator swelement;
+ if ((swelement = transMap.find(ID)) != transMap.end())
+ {
+ SWLog::systemlog->LogInformation("found element in map");
+ SWTransData swstuff = (*swelement).second;
+ UParseError parseError;
+ //UErrorCode status;
+ //std::cout << "unregistering " << ID << std::endl;
+ //Transliterator::unregister(ID);
+ SWLog::systemlog->LogInformation("resource is %s", swstuff.resource.getBuffer());
+
+ // Get the rules
+ //std::cout << "importing: " << ID << ", " << resource << std::endl;
+ SWCharString ch(swstuff.resource);
+ UResourceBundle *bundle = ures_openDirect(SW_RESDATA, ch, &status);
+ const UnicodeString rules = ures_getUnicodeStringByKey(bundle, SW_RB_RULE, &status);
+ ures_close(bundle);
+ //parser.parse(rules, isReverse ? UTRANS_REVERSE : UTRANS_FORWARD,
+ // parseError, status);
+ if (U_FAILURE(status)) {
+ SWLog::systemlog->LogError("Failed to get rules");
+ SWLog::systemlog->LogError("status %s", u_errorName(status));
+ return false;
+ }
+
+
+ Transliterator *trans = Transliterator::createFromRules(ID, rules, swstuff.dir,
+ parseError,status);
+ if (U_FAILURE(status)) {
+ SWLog::systemlog->LogError("Failed to create transliterator");
+ SWLog::systemlog->LogError("status %s", u_errorName(status));
+ SWLog::systemlog->LogError("Parse error: line %s", parseError.line);
+ SWLog::systemlog->LogError("Parse error: offset %d", parseError.offset);
+ SWLog::systemlog->LogError("Parse error: preContext %s", *parseError.preContext);
+ SWLog::systemlog->LogError("Parse error: postContext %s", *parseError.postContext);
+ SWLog::systemlog->LogError("rules were");
+// SWLog::systemlog->LogError((const char *)rules);
+ return false;
+ }
+
+ Transliterator::registerInstance(trans);
+ return true;
+
+ //Transliterator *trans = instantiateTrans(ID, swstuff.resource, swstuff.dir, parseError, status);
+ //return trans;
+ }
+ else
+ {
+ return false;
+ }
+#else
+return true;
+#endif // _ICUSWORD_
+}
+
+bool UTF8Transliterator::addTrans(const char* newTrans, SWBuf* transList) {
+#ifdef _ICUSWORD_
+ UErrorCode status;
+ if (checkTrans(UnicodeString(newTrans), status)) {
+#endif
+ *transList += newTrans;
+ *transList += ";";
+ return true;
+#ifdef _ICUSWORD_
+ }
+ else {
+ return false;
+ }
+#endif
+}
+
+Transliterator * UTF8Transliterator::createTrans(const UnicodeString& ID, UTransDirection dir, UErrorCode &status )
+{
+ Transliterator *trans = Transliterator::createInstance(ID,UTRANS_FORWARD,status);
+ if (U_FAILURE(status)) {
+ delete trans;
+ return NULL;
+ }
+ else {
+ return trans;
+ }
}
void UTF8Transliterator::setOptionValue(const char *ival)
@@ -75,20 +324,16 @@ const char *UTF8Transliterator::getOptionValue()
return (NUMTARGETSCRIPTS > option) ? optionstring[option] : 0;
}
-char UTF8Transliterator::ProcessText(char *text, int maxlen, const SWKey *key, const SWModule *module)
+char UTF8Transliterator::processText(SWBuf &text, const SWKey *key, const SWModule *module)
{
if (option) { // if we want transliteration
unsigned long i, j;
UErrorCode err = U_ZERO_ERROR;
UConverter * conv = NULL;
conv = ucnv_open("UTF-8", &err);
+ SWBuf ID;
bool compat = false;
- bool noNFC = false;
-
- if (option == SE_JAMO) {
- noNFC = true;
- }
// Convert UTF-8 string to UTF-16 (UChars)
j = strlen(text);
@@ -107,8 +352,9 @@ char UTF8Transliterator::ProcessText(char *text, int maxlen, const SWKey *key, c
for (i = 0; i < len; i++) {
j = ublock_getCode(source[i]);
+ scripts[SE_LATIN] = true;
switch (j) {
- case UBLOCK_BASIC_LATIN: scripts[SE_LATIN] = true; break;
+ //case UBLOCK_BASIC_LATIN: scripts[SE_LATIN] = true; break;
case UBLOCK_GREEK: scripts[SE_GREEK] = true; break;
case UBLOCK_HEBREW: scripts[SE_HEBREW] = true; break;
case UBLOCK_CYRILLIC: scripts[SE_CYRILLIC] = true; break;
@@ -132,8 +378,16 @@ char UTF8Transliterator::ProcessText(char *text, int maxlen, const SWKey *key, c
case UBLOCK_ARMENIAN: scripts[SE_ARMENIAN] = true; break;
case UBLOCK_ETHIOPIC: scripts[SE_ETHIOPIC] = true; break;
case UBLOCK_GOTHIC: scripts[SE_GOTHIC] = true; break;
- // needs Unicode 3.2? or 4.0? support from ICU
- //case UBLOCK_UGARITIC: scripts[SE_UGARITIC] = true; break;
+ case UBLOCK_UGARITIC: scripts[SE_UGARITIC] = true; break;
+// case UBLOCK_MEROITIC: scripts[SE_MEROITIC] = true; break;
+// case UBLOCK_LINEARB: scripts[SE_LINEARB] = true; break;
+// case UBLOCK_CYPRIOT: scripts[SE_CYPRIOT] = true; break;
+ case UBLOCK_RUNIC: scripts[SE_RUNIC] = true; break;
+ case UBLOCK_OGHAM: scripts[SE_OGHAM] = true; break;
+ case UBLOCK_THAANA: scripts[SE_THAANA] = true; break;
+// case UBLOCK_GLAGOLITIC: scripts[SE_GLAGOLITIC] = true; break;
+// case UBLOCK_TENGWAR: scripts[SE_TENGWAR] = true; break;
+// case UBLOCK_CIRTH: scripts[SE_CIRTH] = true; break;
case UBLOCK_CJK_RADICALS_SUPPLEMENT:
case UBLOCK_KANGXI_RADICALS:
case UBLOCK_IDEOGRAPHIC_DESCRIPTION_CHARACTERS:
@@ -153,11 +407,11 @@ char UTF8Transliterator::ProcessText(char *text, int maxlen, const SWKey *key, c
compat = true;
break;
- default: scripts[SE_LATIN] = true;
+ //default: scripts[SE_LATIN] = true;
}
- }
+ }
scripts[option] = false; //turn off the reflexive transliteration
-
+
//return if we have no transliteration to do for this text
j = 0;
for (i = 0; !j && i < NUMSCRIPTS; i++) {
@@ -168,312 +422,468 @@ char UTF8Transliterator::ProcessText(char *text, int maxlen, const SWKey *key, c
return 0;
}
- UnicodeString id;
if (compat) {
- id = UnicodeString("NFKD");
+ addTrans("NFKD", &ID);
}
else {
- id = UnicodeString("NFD");
+ addTrans("NFD", &ID);
}
//Simple X to Latin transliterators
if (scripts[SE_GREEK]) {
- if (option == SE_BETA)
- id += UnicodeString(";Greek-Beta");
- else if (option == SE_BGREEK)
- id += UnicodeString(";Greek-BGreek");
+ if (strnicmp (((SWModule*)module)->Lang(), "cop", 3)) {
+ if (option == SE_SBL)
+ addTrans("Greek-Latin/SBL", &ID);
+ else if (option == SE_TC)
+ addTrans("Greek-Latin/TC", &ID);
+ else if (option == SE_BETA)
+ addTrans("Greek-Latin/Beta", &ID);
+ else if (option == SE_BGREEK)
+ addTrans("Greek-Latin/BGreek", &ID);
+ else if (option == SE_UNGEGN)
+ addTrans("Greek-Latin/UNGEGN", &ID);
+ else if (option == SE_ISO)
+ addTrans("Greek-Latin/ISO", &ID);
+ else if (option == SE_ALALC)
+ addTrans("Greek-Latin/ALALC", &ID);
+ else if (option == SE_BGNPCGN)
+ addTrans("Greek-Latin/BGNPCGN", &ID);
+ else if (option == SE_IPA)
+ addTrans("Greek-IPA/Ancient", &ID);
+ else {
+ addTrans("Greek-Latin", &ID);
+ scripts[SE_LATIN] = true;
+ }
+ }
else {
- if (!strnicmp (((SWModule*)module)->Lang(), "cop", 3)) {
- id += UnicodeString(";Coptic-Latin");
- }
- else {
- id += UnicodeString(";Greek-Latin");
- }
- scripts[SE_LATIN] = true;
+ if (option == SE_SBL)
+ addTrans("Coptic-Latin/SBL", &ID);
+ else if (option == SE_TC)
+ addTrans("Coptic-Latin/TC", &ID);
+ else if (option == SE_BETA)
+ addTrans("Coptic-Latin/Beta", &ID);
+ else if (option == SE_IPA)
+ addTrans("Coptic-IPA", &ID);
+ else {
+ addTrans("Coptic-Latin", &ID);
+ scripts[SE_LATIN] = true;
+ }
}
}
if (scripts[SE_HEBREW]) {
- if (option == SE_BETA)
- id += UnicodeString(";Hebrew-CCAT");
+ if (option == SE_SBL)
+ addTrans("Hebrew-Latin/SBL", &ID);
+ else if (option == SE_TC)
+ addTrans("Hebrew-Latin/TC", &ID);
+ else if (option == SE_BETA)
+ addTrans("Hebrew-Latin/Beta", &ID);
+ else if (option == SE_UNGEGN)
+ addTrans("Hebrew-Latin/UNGEGN", &ID);
+ else if (option == SE_ALALC)
+ addTrans("Hebrew-Latin/ALALC", &ID);
else if (option == SE_SYRIAC)
- id += UnicodeString(";Hebrew-Syriac");
+ addTrans("Hebrew-Syriac", &ID);
else {
- id += UnicodeString(";Hebrew-Latin");
- scripts[SE_LATIN] = true;
+ addTrans("Hebrew-Latin", &ID);
+ scripts[SE_LATIN] = true;
}
}
if (scripts[SE_CYRILLIC]) {
- id += UnicodeString(";Cyrillic-Latin");
- scripts[SE_LATIN] = true;
+ if (option == SE_GLAGOLITIC)
+ addTrans("Cyrillic-Glagolitic", &ID);
+ else {
+ addTrans("Cyrillic-Latin", &ID);
+ scripts[SE_LATIN] = true;
+ }
}
if (scripts[SE_ARABIC]) {
- id += UnicodeString(";Arabic-Latin");
+ addTrans("Arabic-Latin", &ID);
scripts[SE_LATIN] = true;
}
if (scripts[SE_SYRIAC]) {
- if (option == SE_BETA)
- id += UnicodeString(";Syriac-CCAT");
+ if (option == SE_TC)
+ addTrans("Syriac-Latin/TC", &ID);
+ else if (option == SE_BETA)
+ addTrans("Syriac-Latin/Beta", &ID);
+ else if (option == SE_HUGOYE)
+ addTrans("Syriac-Latin/Hugoye", &ID);
else if (option == SE_HEBREW)
- id += UnicodeString(";Syriac-Hebrew");
+ addTrans("Syriac-Hebrew", &ID);
else {
- id += UnicodeString(";Syriac-Latin");
+ addTrans("Syriac-Latin", &ID);
scripts[SE_LATIN] = true;
}
}
if (scripts[SE_THAI]) {
- id += UnicodeString(";Thai-Latin");
+ addTrans("Thai-Latin", &ID);
scripts[SE_LATIN] = true;
}
if (scripts[SE_GEORGIAN]) {
- id += UnicodeString(";Georgian-Latin");
- scripts[SE_LATIN] = true;
+ if (option == SE_ISO)
+ addTrans("Georgian-Latin/ISO", &ID);
+ else if (option == SE_ALALC)
+ addTrans("Georgian-Latin/ALALC", &ID);
+ else if (option == SE_BGNPCGN)
+ addTrans("Georgian-Latin/BGNPCGN", &ID);
+ else if (option == SE_IPA)
+ addTrans("Georgian-IPA", &ID);
+ else {
+ addTrans("Georgian-Latin", &ID);
+ scripts[SE_LATIN] = true;
+ }
}
if (scripts[SE_ARMENIAN]) {
- id += UnicodeString(";Armenian-Latin");
- scripts[SE_LATIN] = true;
- }
+ if (option == SE_ISO)
+ addTrans("Armenian-Latin/ISO", &ID);
+ else if (option == SE_ALALC)
+ addTrans("Armenian-Latin/ALALC", &ID);
+ else if (option == SE_BGNPCGN)
+ addTrans("Armenian-Latin/BGNPCGN", &ID);
+ else if (option == SE_IPA)
+ addTrans("Armenian-IPA", &ID);
+ else {
+ addTrans("Armenian-Latin", &ID);
+ scripts[SE_LATIN] = true;
+ }
+ }
if (scripts[SE_ETHIOPIC]) {
- id += UnicodeString(";Ethiopic-Latin");
- scripts[SE_LATIN] = true;
+ if (option == SE_UNGEGN)
+ addTrans("Ethiopic-Latin/UNGEGN", &ID);
+ else if (option == SE_ISO)
+ addTrans("Ethiopic-Latin/ISO", &ID);
+ else if (option == SE_ALALC)
+ addTrans("Ethiopic-Latin/ALALC", &ID);
+ else if (option == SE_SERA)
+ addTrans("Ethiopic-Latin/SERA", &ID);
+ else {
+ addTrans("Ethiopic-Latin", &ID);
+ scripts[SE_LATIN] = true;
+ }
}
if (scripts[SE_GOTHIC]) {
- id += UnicodeString(";Gothic-Latin");
- scripts[SE_LATIN] = true;
+ if (option == SE_BASICLATIN)
+ addTrans("Gothic-Latin/Basic", &ID);
+ else if (option == SE_IPA)
+ addTrans("Gothic-IPA", &ID);
+ else {
+ addTrans("Gothic-Latin", &ID);
+ scripts[SE_LATIN] = true;
+ }
}
if (scripts[SE_UGARITIC]) {
- id += UnicodeString(";Ugaritic-Latin");
+ if (option == SE_SBL)
+ addTrans("Ugaritic-Latin/SBL", &ID);
+ else {
+ addTrans("Ugaritic-Latin", &ID);
+ scripts[SE_LATIN] = true;
+ }
+ }
+ if (scripts[SE_MEROITIC]) {
+ addTrans("Meroitic-Latin", &ID);
scripts[SE_LATIN] = true;
}
- if (scripts[SE_HAN]) {
- if (!strnicmp (((SWModule*)module)->Lang(), "ja", 2)) {
- id += UnicodeString(";Kanji-OnRomaji");
- }
- else {
- id += UnicodeString(";Han-Pinyin");
- }
+ if (scripts[SE_LINEARB]) {
+ addTrans("LinearB-Latin", &ID);
+ scripts[SE_LATIN] = true;
+ }
+ if (scripts[SE_CYPRIOT]) {
+ addTrans("Cypriot-Latin", &ID);
+ scripts[SE_LATIN] = true;
+ }
+ if (scripts[SE_RUNIC]) {
+ addTrans("Runic-Latin", &ID);
+ scripts[SE_LATIN] = true;
+ }
+ if (scripts[SE_OGHAM]) {
+ addTrans("Ogham-Latin", &ID);
+ scripts[SE_LATIN] = true;
+ }
+ if (scripts[SE_THAANA]) {
+ if (option == SE_ALALC)
+ addTrans("Thaana-Latin/ALALC", &ID);
+ else if (option == SE_BGNPCGN)
+ addTrans("Thaana-Latin/BGNPCGN", &ID);
+ else {
+ addTrans("Thaana-Latin", &ID);
+ scripts[SE_LATIN] = true;
+ }
+ }
+ if (scripts[SE_GLAGOLITIC]) {
+ if (option == SE_ISO)
+ addTrans("Glagolitic-Latin/ISO", &ID);
+ else if (option == SE_ALALC)
+ addTrans("Glagolitic-Latin/ALALC", &ID);
+ else if (option == SE_ALALC)
+ addTrans("Glagolitic-Cyrillic", &ID);
+ else {
+ addTrans("Glagolitic-Latin", &ID);
+ scripts[SE_LATIN] = true;
+ }
+ }
+ if (scripts[SE_THAI]) {
+ addTrans("Thai-Latin", &ID);
+ scripts[SE_LATIN] = true;
+ }
+ if (scripts[SE_THAI]) {
+ addTrans("Thai-Latin", &ID);
+ scripts[SE_LATIN] = true;
+ }
+
+ if (scripts[SE_HAN]) {
+ if (!strnicmp (((SWModule*)module)->Lang(), "ja", 2)) {
+ addTrans("Kanji-Romaji", &ID);
+ }
+ else {
+ addTrans("Han-Latin", &ID);
+ }
scripts[SE_LATIN] = true;
}
// Inter-Kana and Kana to Latin transliterators
if (option == SE_HIRAGANA && scripts[SE_KATAKANA]) {
- id += UnicodeString(";Katakana-Hiragana");
+ addTrans("Katakana-Hiragana", &ID);
scripts[SE_HIRAGANA] = true;
}
else if (option == SE_KATAKANA && scripts[SE_HIRAGANA]) {
- id += UnicodeString(";Hiragana-Katakana");
+ addTrans("Hiragana-Katakana", &ID);
scripts[SE_KATAKANA] = true;
}
else {
if (scripts[SE_KATAKANA]) {
- id += UnicodeString(";Katakana-Latin");
+ addTrans("Katakana-Latin", &ID);
scripts[SE_LATIN] = true;
}
if (scripts[SE_HIRAGANA]) {
- id += UnicodeString(";Hiragana-Latin");
+ addTrans("Hiragana-Latin", &ID);
scripts[SE_LATIN] = true;
}
}
- // Inter-Korean and Korean to Latin transliterators
- if (option == SE_HANGUL && scripts[SE_JAMO]) {
- noNFC = false;
- scripts[SE_HANGUL] = true;
- }
- else if (option == SE_JAMO && scripts[SE_HANGUL]) {
- noNFC = true;
- scripts[SE_JAMO] = true;
+ // Korean to Latin transliterators
+ if (scripts[SE_HANGUL]) {
+ addTrans("Hangul-Latin", &ID);
+ scripts[SE_LATIN] = true;
}
- else {
- if (scripts[SE_HANGUL]) {
- id += UnicodeString(";Hangul-Latin");
- scripts[SE_LATIN] = true;
- }
- if (scripts[SE_JAMO]) {
- id += UnicodeString(";Jamo-Latin");
- scripts[SE_LATIN] = true;
- }
+ if (scripts[SE_JAMO]) {
+ addTrans("Jamo-Latin", &ID);
+ scripts[SE_LATIN] = true;
}
// Indic-Latin
if (option < SE_DEVANAGARI || option > SE_MALAYALAM) {
// Indic to Latin
if (scripts[SE_TAMIL]) {
- id += UnicodeString(";Tamil-Latin");
+ addTrans("Tamil-Latin", &ID);
scripts[SE_LATIN] = true;
}
if (scripts[SE_BENGALI]) {
- id += UnicodeString(";Bengali-Latin");
+ addTrans("Bengali-Latin", &ID);
scripts[SE_LATIN] = true;
}
if (scripts[SE_GURMUKHI]) {
- id += UnicodeString(";Gurmukhi-Latin");
+ addTrans("Gurmukhi-Latin", &ID);
scripts[SE_LATIN] = true;
}
if (scripts[SE_GUJARATI]) {
- id += UnicodeString(";Gujarati-Latin");
+ addTrans("Gujarati-Latin", &ID);
scripts[SE_LATIN] = true;
}
if (scripts[SE_ORIYA]) {
- id += UnicodeString(";Oriya-Latin");
+ addTrans("Oriya-Latin", &ID);
scripts[SE_LATIN] = true;
}
if (scripts[SE_TELUGU]) {
- id += UnicodeString(";Telugu-Latin");
+ addTrans("Telugu-Latin", &ID);
scripts[SE_LATIN] = true;
}
if (scripts[SE_KANNADA]) {
- id += UnicodeString(";Kannada-Latin");
+ addTrans("Kannada-Latin", &ID);
scripts[SE_LATIN] = true;
}
if (scripts[SE_MALAYALAM]) {
- id += UnicodeString(";Malayalam-Latin");
+ addTrans("Malayalam-Latin", &ID);
scripts[SE_LATIN] = true;
}
}
else {
if (scripts[SE_LATIN]) {
- id += UnicodeString(";Latin-InterIndic");
+ addTrans("Latin-InterIndic", &ID);
}
if (scripts[SE_DEVANAGARI]) {
- id += UnicodeString(";Devanagari-InterIndic");
+ addTrans("Devanagari-InterIndic", &ID);
}
if (scripts[SE_TAMIL]) {
- id += UnicodeString(";Tamil-InterIndic");
+ addTrans("Tamil-InterIndic", &ID);
}
if (scripts[SE_BENGALI]) {
- id += UnicodeString(";Bengali-InterIndic");
+ addTrans("Bengali-InterIndic", &ID);
}
if (scripts[SE_GURMUKHI]) {
- id += UnicodeString(";Gurmurkhi-InterIndic");
+ addTrans("Gurmurkhi-InterIndic", &ID);
}
if (scripts[SE_GUJARATI]) {
- id += UnicodeString(";Gujarati-InterIndic");
+ addTrans("Gujarati-InterIndic", &ID);
}
if (scripts[SE_ORIYA]) {
- id += UnicodeString(";Oriya-InterIndic");
+ addTrans("Oriya-InterIndic", &ID);
}
if (scripts[SE_TELUGU]) {
- id += UnicodeString(";Telugu-InterIndic");
+ addTrans("Telugu-InterIndic", &ID);
}
if (scripts[SE_KANNADA]) {
- id += UnicodeString(";Kannada-InterIndic");
+ addTrans("Kannada-InterIndic", &ID);
}
if (scripts[SE_MALAYALAM]) {
- id += UnicodeString(";Malayalam-InterIndic");
+ addTrans("Malayalam-InterIndic", &ID);
}
switch(option) {
case SE_DEVANAGARI:
- id += UnicodeString(";InterIndic-Devanagari");
+ addTrans("InterIndic-Devanagari", &ID);
break;
case SE_TAMIL:
- id += UnicodeString(";InterIndic-Tamil");
+ addTrans("InterIndic-Tamil", &ID);
break;
case SE_BENGALI:
- id += UnicodeString(";InterIndic-Bengali");
+ addTrans("InterIndic-Bengali", &ID);
break;
case SE_GURMUKHI:
- id += UnicodeString(";InterIndic-Gurmukhi");
+ addTrans("InterIndic-Gurmukhi", &ID);
break;
case SE_GUJARATI:
- id += UnicodeString(";InterIndic-Gujarati");
+ addTrans("InterIndic-Gujarati", &ID);
break;
case SE_ORIYA:
- id += UnicodeString(";InterIndic-Oriya");
+ addTrans("InterIndic-Oriya", &ID);
break;
case SE_TELUGU:
- id += UnicodeString(";InterIndic-Telugu");
+ addTrans("InterIndic-Telugu", &ID);
break;
case SE_KANNADA:
- id += UnicodeString(";InterIndic-Kannada");
+ addTrans("InterIndic-Kannada", &ID);
break;
case SE_MALAYALAM:
- id += UnicodeString(";InterIndic-Malayalam");
+ addTrans("InterIndic-Malayalam", &ID);
break;
default:
- id += UnicodeString(";InterIndic-Latin");
+ addTrans("InterIndic-Latin", &ID);
scripts[SE_LATIN] = true;
break;
}
}
+ if (scripts[SE_TENGWAR]) {
+ addTrans("Tengwar-Latin", &ID);
+ scripts[SE_LATIN] = true;
+ }
+ if (scripts[SE_CIRTH]) {
+ addTrans("Cirth-Latin", &ID);
+ scripts[SE_LATIN] = true;
+ }
+
if (scripts[SE_LATIN]) {
switch (option) {
case SE_GREEK:
- id += UnicodeString(";Latin-Greek");
+ addTrans("Latin-Greek", &ID);
break;
case SE_HEBREW:
- id += UnicodeString(";Latin-Hebrew");
+ addTrans("Latin-Hebrew", &ID);
break;
case SE_CYRILLIC:
- id += UnicodeString(";Latin-Cyrillic");
+ addTrans("Latin-Cyrillic", &ID);
break;
case SE_ARABIC:
- id += UnicodeString(";Latin-Arabic");
+ addTrans("Latin-Arabic", &ID);
break;
case SE_SYRIAC:
- id += UnicodeString(";Latin-Syriac");
+ addTrans("Latin-Syriac", &ID);
break;
case SE_THAI:
- id += UnicodeString(";Latin-Thai");
+ addTrans("Latin-Thai", &ID);
break;
case SE_GEORGIAN:
- id += UnicodeString(";Latin-Georgian");
+ addTrans("Latin-Georgian", &ID);
break;
case SE_ARMENIAN:
- id += UnicodeString(";Latin-Armenian");
+ addTrans("Latin-Armenian", &ID);
break;
case SE_ETHIOPIC:
- id += UnicodeString(";Latin-Ethiopic");
+ addTrans("Latin-Ethiopic", &ID);
break;
case SE_GOTHIC:
- id += UnicodeString(";Latin-Gothic");
+ addTrans("Latin-Gothic", &ID);
break;
case SE_UGARITIC:
- id += UnicodeString(";Latin-Ugaritic");
+ addTrans("Latin-Ugaritic", &ID);
break;
case SE_COPTIC:
- id += UnicodeString(";Latin-Coptic");
+ addTrans("Latin-Coptic", &ID);
break;
case SE_KATAKANA:
- id += UnicodeString(";Latin-Katakana");
+ addTrans("Latin-Katakana", &ID);
break;
case SE_HIRAGANA:
- id += UnicodeString(";Latin-Hiragana");
+ addTrans("Latin-Hiragana", &ID);
break;
case SE_JAMO:
- id += UnicodeString(";Latin-Jamo");
+ addTrans("Latin-Jamo", &ID);
break;
case SE_HANGUL:
- id += UnicodeString(";Latin-Hangul");
+ addTrans("Latin-Hangul", &ID);
+ break;
+ case SE_MEROITIC:
+ addTrans("Latin-Meroitic", &ID);
+ break;
+ case SE_LINEARB:
+ addTrans("Latin-LinearB", &ID);
+ break;
+ case SE_CYPRIOT:
+ addTrans("Latin-Cypriot", &ID);
+ break;
+ case SE_RUNIC:
+ addTrans("Latin-Runic", &ID);
+ break;
+ case SE_OGHAM:
+ addTrans("Latin-Ogham", &ID);
+ break;
+ case SE_THAANA:
+ addTrans("Latin-Thaana", &ID);
+ break;
+ case SE_GLAGOLITIC:
+ addTrans("Latin-Glagolitic", &ID);
+ break;
+ case SE_TENGWAR:
+ addTrans("Latin-Tengwar", &ID);
+ break;
+ case SE_CIRTH:
+ addTrans("Latin-Cirth", &ID);
break;
}
}
if (option == SE_BASICLATIN) {
- id += UnicodeString(";Any-Latin1");
- }
-
- if (noNFC) {
- id += UnicodeString(";NFD");
- } else {
- id += UnicodeString(";NFC");
+ addTrans("Any-Latin1", &ID);
}
- UParseError perr;
+ addTrans("NFC", &ID);
err = U_ZERO_ERROR;
- Transliterator * trans = Transliterator::createInstance(id, UTRANS_FORWARD, perr, err);
- if (trans) {
+ Transliterator * trans = createTrans(UnicodeString(ID), UTRANS_FORWARD, err);
+ if (trans && !U_FAILURE(err)) {
UnicodeString target = UnicodeString(source);
- trans->transliterate(target);
- len = ucnv_fromUChars(conv, text, maxlen, target.getBuffer(), target.length(), &err);
- if (len < maxlen) *(text + len) = 0;
- else *(text + maxlen) = 0;
- delete trans;
+ trans->transliterate(target);
+ text.setSize(text.size()*2);
+ len = ucnv_fromUChars(conv, text.getRawData(), text.size(), target.getBuffer(), target.length(), &err);
+ text.setSize(len);
+ delete trans;
}
ucnv_close(conv);
}
return 0;
}
+
+SWORD_NAMESPACE_END
#endif
+
+
+
diff --git a/src/modules/filters/utf8utf16.cpp b/src/modules/filters/utf8utf16.cpp
index 9aea6fe..2fddf4c 100644
--- a/src/modules/filters/utf8utf16.cpp
+++ b/src/modules/filters/utf8utf16.cpp
@@ -1,6 +1,6 @@
/******************************************************************************
*
- * UTF8UTF16 - SWFilter decendant to convert UTF-8 to UTF-16
+ * UTF8UTF16 - SWFilter descendant to convert UTF-8 to UTF-16
*
*/
@@ -9,71 +9,70 @@
#include <utf8utf16.h>
+SWORD_NAMESPACE_START
+
UTF8UTF16::UTF8UTF16() {
}
-char UTF8UTF16::ProcessText(char *text, int maxlen, const SWKey *key, const SWModule *module)
-{
- unsigned char *from;
- unsigned short *to;
+char UTF8UTF16::processText(SWBuf &text, const SWKey *key, const SWModule *module) {
+ const unsigned char *from;
- int len;
- unsigned long uchar;
- unsigned char significantFirstBits, subsequent;
- unsigned short schar;
-
- len = strlen(text) + 1; // shift string to right of buffer
- if (len < maxlen) {
- memmove(&text[maxlen - len], text, len);
- from = (unsigned char*)&text[maxlen - len];
- }
- else
- from = (unsigned char*)text;
+ int len;
+ unsigned long uchar, uchars[10];
+ unsigned char significantFirstBits, subsequent;
+ unsigned short schar;
+ if ((unsigned long)key < 2) // hack, we're en(1)/de(0)ciphering
+ return -1;
+
- // -------------------------------
-
- for (to = (unsigned short*)text; *from; from++) {
- uchar = 0;
- if ((*from & 128) != 128) {
- // if (*from != ' ')
- uchar = *from;
- }
- else if ((*from & 128) && ((*from & 64) != 64)) {
- // error, do nothing
- continue;
- }
- else {
- *from <<= 1;
- for (subsequent = 1; (*from & 128); subsequent++) {
- *from <<= 1;
- from[subsequent] &= 63;
- uchar <<= 6;
- uchar |= from[subsequent];
- }
- subsequent--;
- *from <<=1;
- significantFirstBits = 8 - (2+subsequent);
-
- uchar |= (((short)*from) << (((6*subsequent)+significantFirstBits)-8));
- from += subsequent;
- }
+ SWBuf orig = text;
+ from = (const unsigned char *)orig.c_str();
- if (uchar < 0x1ffff) {
- *to++ = (unsigned short)uchar;
- }
- else {
- uchar -= 0x10000;
- schar = 0xD800 | (uchar & 0x03ff);
- uchar >>= 10;
- uchar |= 0xDC00;
- *to++ = (unsigned short)schar;
- *to++ = (unsigned short)uchar;
- }
- }
- *to = (unsigned short)0;
+ for (text = ""; *from; from++) {
+ uchar = 0;
+ if ((*from & 128) != 128) {
+ //if (*from != ' ')
+ uchar = *from;
+ }
+ else if ((*from & 128) && ((*from & 64) != 64)) {
+ // error, do nothing
+ continue;
+ }
+ else {
+ uchars[0] = *from;
+ uchars[0] <<= 1;
+ for (subsequent = 1; (uchars[0] & 128) && (subsequent < 10); subsequent++) {
+ uchars[0] <<= 1;
+ uchars[subsequent] = from[subsequent];
+ uchars[subsequent] &= 63;
+ uchar <<= 6;
+ uchar |= uchars[subsequent];
+ }
+ subsequent--;
+ uchars[0] <<=1;
+ significantFirstBits = 8 - (2+subsequent);
+
+ uchar |= (((short)uchars[0]) << (((6*subsequent)+significantFirstBits)-8));
+ from += subsequent;
+ }
- return 0;
+ if (uchar < 0x1ffff) {
+ text.setSize(text.size()+2);
+ *((unsigned short *)(text.getRawData()+(text.size()-2))) = (unsigned short)uchar;
+ }
+ else {
+ uchar -= 0x10000;
+ schar = 0xD800 | (uchar & 0x03ff);
+ uchar >>= 10;
+ uchar |= 0xDC00;
+ text.setSize(text.size()+4);
+ *((unsigned short *)(text.getRawData()+(text.size()-4))) = (unsigned short)schar;
+ *((unsigned short *)(text.getRawData()+(text.size()-2))) = (unsigned short)uchar;
+ }
+ }
+ return 0;
}
+SWORD_NAMESPACE_END