summaryrefslogtreecommitdiff
path: root/src/modules
diff options
context:
space:
mode:
Diffstat (limited to 'src/modules')
-rw-r--r--src/modules/Makefile5
-rw-r--r--src/modules/Makefile.am10
-rw-r--r--src/modules/comments/Makefile5
-rw-r--r--src/modules/comments/Makefile.am9
-rw-r--r--src/modules/comments/hrefcom/Makefile5
-rw-r--r--src/modules/comments/hrefcom/Makefile.am2
-rw-r--r--src/modules/comments/hrefcom/hrefcom.cpp82
-rw-r--r--src/modules/comments/rawcom/Makefile5
-rw-r--r--src/modules/comments/rawcom/Makefile.am2
-rw-r--r--src/modules/comments/rawcom/rawcom.cpp153
-rw-r--r--src/modules/comments/rawcom4/Makefile.am2
-rw-r--r--src/modules/comments/rawcom4/rawcom4.cpp153
-rw-r--r--src/modules/comments/rawfiles/Makefile5
-rw-r--r--src/modules/comments/rawfiles/Makefile.am2
-rw-r--r--src/modules/comments/rawfiles/rawfiles.cpp257
-rw-r--r--src/modules/comments/swcom.cpp105
-rw-r--r--src/modules/comments/zcom/Makefile5
-rw-r--r--src/modules/comments/zcom/Makefile.am2
-rw-r--r--src/modules/comments/zcom/zcom.cpp188
-rw-r--r--src/modules/common/Makefile4
-rw-r--r--src/modules/common/Makefile.am21
-rw-r--r--src/modules/common/compress.cpp.txt767
-rw-r--r--src/modules/common/entriesblk.cpp172
-rw-r--r--src/modules/common/lzsscomprs.cpp668
-rw-r--r--src/modules/common/rawstr.cpp565
-rw-r--r--src/modules/common/rawstr4.cpp572
-rw-r--r--src/modules/common/rawverse.cpp350
-rw-r--r--src/modules/common/rawverse4.cpp350
-rw-r--r--src/modules/common/sapphire.cpp216
-rw-r--r--src/modules/common/swcipher.cpp128
-rw-r--r--src/modules/common/swcomprs.cpp193
-rw-r--r--src/modules/common/swcomprs.doc802
-rw-r--r--src/modules/common/zipcomprs.cpp164
-rw-r--r--src/modules/common/zstr.cpp731
-rw-r--r--src/modules/common/zverse.cpp538
-rw-r--r--src/modules/filters/Makefile5
-rw-r--r--src/modules/filters/Makefile.am105
-rw-r--r--src/modules/filters/cipherfil.cpp46
-rw-r--r--src/modules/filters/gbffootnotes.cpp193
-rw-r--r--src/modules/filters/gbfheadings.cpp87
-rw-r--r--src/modules/filters/gbfhtml.cpp181
-rw-r--r--src/modules/filters/gbfhtmlhref.cpp288
-rw-r--r--src/modules/filters/gbfmorph.cpp77
-rw-r--r--src/modules/filters/gbfosis.cpp420
-rw-r--r--src/modules/filters/gbfplain.cpp97
-rw-r--r--src/modules/filters/gbfredletterwords.cpp93
-rw-r--r--src/modules/filters/gbfrtf.cpp311
-rw-r--r--src/modules/filters/gbfstrongs.cpp126
-rw-r--r--src/modules/filters/gbfthml.cpp216
-rw-r--r--src/modules/filters/gbfwebif.cpp191
-rw-r--r--src/modules/filters/gbfwordjs.cpp282
-rw-r--r--src/modules/filters/greeklexattribs.cpp101
-rw-r--r--src/modules/filters/latin1utf16.cpp119
-rw-r--r--src/modules/filters/latin1utf8.cpp173
-rw-r--r--src/modules/filters/osisfootnotes.cpp157
-rw-r--r--src/modules/filters/osisheadings.cpp144
-rw-r--r--src/modules/filters/osishtmlhref.cpp561
-rw-r--r--src/modules/filters/osislemma.cpp85
-rw-r--r--src/modules/filters/osismorph.cpp85
-rw-r--r--src/modules/filters/osismorphsegmentation.cpp106
-rw-r--r--src/modules/filters/osisosis.cpp173
-rw-r--r--src/modules/filters/osisplain.cpp192
-rw-r--r--src/modules/filters/osisredletterwords.cpp85
-rw-r--r--src/modules/filters/osisrtf.cpp520
-rw-r--r--src/modules/filters/osisscripref.cpp100
-rw-r--r--src/modules/filters/osisstrongs.cpp257
-rw-r--r--src/modules/filters/osisvariants.cpp118
-rw-r--r--src/modules/filters/osiswebif.cpp198
-rw-r--r--src/modules/filters/osiswordjs.cpp178
-rw-r--r--src/modules/filters/papyriplain.cpp71
-rw-r--r--src/modules/filters/plainfootnotes.cpp79
-rw-r--r--src/modules/filters/plainhtml.cpp83
-rw-r--r--src/modules/filters/rtfhtml.cpp81
-rw-r--r--src/modules/filters/scsuutf8.cpp226
-rw-r--r--src/modules/filters/swbasicfilter.cpp406
-rw-r--r--src/modules/filters/swoptfilter.cpp47
-rw-r--r--src/modules/filters/teihtmlhref.cpp205
-rw-r--r--src/modules/filters/teiplain.cpp116
-rw-r--r--src/modules/filters/teirtf.cpp182
-rw-r--r--src/modules/filters/thmlfootnotes.cpp124
-rw-r--r--src/modules/filters/thmlgbf.cpp291
-rw-r--r--src/modules/filters/thmlheadings.cpp153
-rw-r--r--src/modules/filters/thmlhtml.cpp236
-rw-r--r--src/modules/filters/thmlhtmlhref.cpp357
-rw-r--r--src/modules/filters/thmllemma.cpp65
-rw-r--r--src/modules/filters/thmlmorph.cpp65
-rw-r--r--src/modules/filters/thmlosis.cpp575
-rw-r--r--src/modules/filters/thmlplain.cpp219
-rw-r--r--src/modules/filters/thmlrtf.cpp346
-rw-r--r--src/modules/filters/thmlscripref.cpp123
-rw-r--r--src/modules/filters/thmlstrongs.cpp146
-rw-r--r--src/modules/filters/thmlvariants.cpp118
-rw-r--r--src/modules/filters/thmlwebif.cpp103
-rw-r--r--src/modules/filters/thmlwordjs.cpp296
-rw-r--r--src/modules/filters/unicodertf.cpp87
-rw-r--r--src/modules/filters/utf16utf8.cpp90
-rw-r--r--src/modules/filters/utf8arshaping.cpp51
-rw-r--r--src/modules/filters/utf8bidireorder.cpp60
-rw-r--r--src/modules/filters/utf8cantillation.cpp55
-rw-r--r--src/modules/filters/utf8greekaccents.cpp261
-rw-r--r--src/modules/filters/utf8hebrewpoints.cpp44
-rw-r--r--src/modules/filters/utf8html.cpp70
-rw-r--r--src/modules/filters/utf8latin1.cpp75
-rw-r--r--src/modules/filters/utf8nfc.cpp50
-rw-r--r--src/modules/filters/utf8nfkd.cpp52
-rw-r--r--src/modules/filters/utf8transliterator.cpp888
-rw-r--r--src/modules/filters/utf8utf16.cpp78
-rw-r--r--src/modules/genbook/Makefile5
-rw-r--r--src/modules/genbook/Makefile.am5
-rw-r--r--src/modules/genbook/rawgenbook/Makefile4
-rw-r--r--src/modules/genbook/rawgenbook/Makefile.am4
-rw-r--r--src/modules/genbook/rawgenbook/rawgenbook.cpp219
-rw-r--r--src/modules/genbook/swgenbook.cpp29
-rw-r--r--src/modules/lexdict/Makefile5
-rw-r--r--src/modules/lexdict/Makefile.am7
-rw-r--r--src/modules/lexdict/rawld/Makefile5
-rw-r--r--src/modules/lexdict/rawld/Makefile.am4
-rw-r--r--src/modules/lexdict/rawld/rawld.cpp194
-rw-r--r--src/modules/lexdict/rawld4/Makefile5
-rw-r--r--src/modules/lexdict/rawld4/Makefile.am4
-rw-r--r--src/modules/lexdict/rawld4/rawld4.cpp192
-rw-r--r--src/modules/lexdict/swld.cpp82
-rw-r--r--src/modules/lexdict/zld/Makefile5
-rw-r--r--src/modules/lexdict/zld/Makefile.am4
-rw-r--r--src/modules/lexdict/zld/zld.cpp189
-rw-r--r--src/modules/readme9
-rw-r--r--src/modules/swmodule.cpp1285
-rw-r--r--src/modules/tests/Makefile4
-rw-r--r--src/modules/tests/echomod.cpp27
-rw-r--r--src/modules/texts/Makefile5
-rw-r--r--src/modules/texts/Makefile.am7
-rw-r--r--src/modules/texts/rawtext/Makefile5
-rw-r--r--src/modules/texts/rawtext/Makefile.am2
-rw-r--r--src/modules/texts/rawtext/rawtext.cpp548
-rw-r--r--src/modules/texts/rawtext4/Makefile.am2
-rw-r--r--src/modules/texts/rawtext4/rawtext4.cpp548
-rw-r--r--src/modules/texts/swtext.cpp113
-rw-r--r--src/modules/texts/ztext/Makefile5
-rw-r--r--src/modules/texts/ztext/Makefile.am3
-rw-r--r--src/modules/texts/ztext/ztext.cpp195
140 files changed, 23602 insertions, 0 deletions
diff --git a/src/modules/Makefile b/src/modules/Makefile
new file mode 100644
index 0000000..ef8eccd
--- /dev/null
+++ b/src/modules/Makefile
@@ -0,0 +1,5 @@
+
+root := ../..
+
+all:
+ make -C ${root}
diff --git a/src/modules/Makefile.am b/src/modules/Makefile.am
new file mode 100644
index 0000000..944dc18
--- /dev/null
+++ b/src/modules/Makefile.am
@@ -0,0 +1,10 @@
+modulesdir = $(top_srcdir)/src/modules
+
+libsword_la_SOURCES += $(modulesdir)/swmodule.cpp
+
+include ../src/modules/common/Makefile.am
+include ../src/modules/filters/Makefile.am
+include ../src/modules/genbook/Makefile.am
+include ../src/modules/texts/Makefile.am
+include ../src/modules/comments/Makefile.am
+include ../src/modules/lexdict/Makefile.am
diff --git a/src/modules/comments/Makefile b/src/modules/comments/Makefile
new file mode 100644
index 0000000..1a2d00d
--- /dev/null
+++ b/src/modules/comments/Makefile
@@ -0,0 +1,5 @@
+
+root := ../../..
+
+all:
+ make -C ${root}
diff --git a/src/modules/comments/Makefile.am b/src/modules/comments/Makefile.am
new file mode 100644
index 0000000..7139cf8
--- /dev/null
+++ b/src/modules/comments/Makefile.am
@@ -0,0 +1,9 @@
+commentsdir = $(top_srcdir)/src/modules/comments
+
+libsword_la_SOURCES += $(commentsdir)/swcom.cpp
+
+include ../src/modules/comments/rawcom/Makefile.am
+include ../src/modules/comments/rawcom4/Makefile.am
+include ../src/modules/comments/rawfiles/Makefile.am
+include ../src/modules/comments/zcom/Makefile.am
+include ../src/modules/comments/hrefcom/Makefile.am
diff --git a/src/modules/comments/hrefcom/Makefile b/src/modules/comments/hrefcom/Makefile
new file mode 100644
index 0000000..35d6648
--- /dev/null
+++ b/src/modules/comments/hrefcom/Makefile
@@ -0,0 +1,5 @@
+
+root := ../../../..
+
+all:
+ make -C ${root}
diff --git a/src/modules/comments/hrefcom/Makefile.am b/src/modules/comments/hrefcom/Makefile.am
new file mode 100644
index 0000000..dba4294
--- /dev/null
+++ b/src/modules/comments/hrefcom/Makefile.am
@@ -0,0 +1,2 @@
+hrefcomdir = $(top_srcdir)/src/modules/comments/hrefcom
+libsword_la_SOURCES += $(hrefcomdir)/hrefcom.cpp
diff --git a/src/modules/comments/hrefcom/hrefcom.cpp b/src/modules/comments/hrefcom/hrefcom.cpp
new file mode 100644
index 0000000..7791da2
--- /dev/null
+++ b/src/modules/comments/hrefcom/hrefcom.cpp
@@ -0,0 +1,82 @@
+/******************************************************************************
+ * hrefcom.cpp - code for class 'HREFCom'- a module that produces HTML HREFs
+ * pointing to actual text desired. Uses standard
+ * files: ot and nt using indexs ??.bks ??.cps ??.vss
+ */
+
+
+#include <ctype.h>
+#include <stdio.h>
+#include <fcntl.h>
+
+#include <utilstr.h>
+#include <rawverse.h>
+#include <hrefcom.h>
+#include <swbuf.h>
+#include <versekey.h>
+
+SWORD_NAMESPACE_START
+
+ /******************************************************************************
+ * HREFCom Constructor - Initializes data for instance of HREFCom
+ *
+ * ENT: iname - Internal name for module
+ * iprefix - string to prepend to each HREF (e.g. "file://mods/com/jfb/")
+ * idesc - Name to display to user for module
+ * idisp - Display object to use for displaying
+ */
+
+HREFCom::HREFCom(const char *ipath, const char *iprefix, const char *iname, const char *idesc, SWDisplay *idisp) : RawVerse(ipath), SWCom(iname, idesc, idisp)
+{
+ prefix = 0;
+ stdstr(&prefix, iprefix);
+}
+
+
+/******************************************************************************
+ * HREFCom Destructor - Cleans up instance of HREFCom
+ */
+
+HREFCom::~HREFCom()
+{
+ if (prefix)
+ delete [] prefix;
+}
+
+
+/******************************************************************************
+ * HREFCom::operator char * - Returns the correct verse when char * cast
+ * is requested
+ *
+ * RET: string buffer with verse
+ */
+
+SWBuf &HREFCom::getRawEntryBuf() {
+ long start;
+ unsigned short size;
+ VerseKey *key = 0;
+
+ SWTRY {
+ key = SWDYNAMIC_CAST(VerseKey, this->key);
+ }
+ SWCATCH ( ... ) {}
+ if (!key)
+ key = new VerseKey(this->key);
+
+ findOffset(key->Testament(), key->Index(), &start, &size);
+ entrySize = size; // support getEntrySize call
+
+ SWBuf tmpbuf;
+
+ readText(key->Testament(), start, size, tmpbuf);
+ entryBuf = prefix;
+ entryBuf += tmpbuf.c_str();
+ prepText(entryBuf);
+
+ if (key != this->key)
+ delete key;
+
+ return entryBuf;
+}
+
+SWORD_NAMESPACE_END
diff --git a/src/modules/comments/rawcom/Makefile b/src/modules/comments/rawcom/Makefile
new file mode 100644
index 0000000..35d6648
--- /dev/null
+++ b/src/modules/comments/rawcom/Makefile
@@ -0,0 +1,5 @@
+
+root := ../../../..
+
+all:
+ make -C ${root}
diff --git a/src/modules/comments/rawcom/Makefile.am b/src/modules/comments/rawcom/Makefile.am
new file mode 100644
index 0000000..116d706
--- /dev/null
+++ b/src/modules/comments/rawcom/Makefile.am
@@ -0,0 +1,2 @@
+rawcomdir = $(top_srcdir)/src/modules/comments/rawcom
+libsword_la_SOURCES += $(rawcomdir)/rawcom.cpp
diff --git a/src/modules/comments/rawcom/rawcom.cpp b/src/modules/comments/rawcom/rawcom.cpp
new file mode 100644
index 0000000..fd01c24
--- /dev/null
+++ b/src/modules/comments/rawcom/rawcom.cpp
@@ -0,0 +1,153 @@
+/******************************************************************************
+ * rawcom.cpp - code for class 'RawCom'- a module that reads raw commentary
+ * files: ot and nt using indexs ??.bks ??.cps ??.vss
+ */
+
+
+#include <ctype.h>
+#include <stdio.h>
+#include <fcntl.h>
+
+#include <filemgr.h>
+#include <rawverse.h>
+#include <rawcom.h>
+#include <versekey.h>
+
+SWORD_NAMESPACE_START
+
+ /******************************************************************************
+ * RawCom Constructor - Initializes data for instance of RawCom
+ *
+ * ENT: iname - Internal name for module
+ * idesc - Name to display to user for module
+ * idisp - Display object to use for displaying
+ */
+
+RawCom::RawCom(const char *ipath, const char *iname, const char *idesc, SWDisplay *idisp, SWTextEncoding encoding, SWTextDirection dir, SWTextMarkup markup, const char* ilang)
+ : RawVerse(ipath),
+ SWCom(iname, idesc, idisp, encoding, dir, markup, ilang){
+}
+
+
+/******************************************************************************
+ * RawCom Destructor - Cleans up instance of RawCom
+ */
+
+RawCom::~RawCom()
+{
+}
+
+
+bool RawCom::isWritable() {
+ return ((idxfp[0]->getFd() > 0) && ((idxfp[0]->mode & FileMgr::RDWR) == FileMgr::RDWR));
+}
+/******************************************************************************
+ * RawCom::getRawEntry() - Returns the correct verse when char * cast
+ * is requested
+ *
+ * RET: string buffer with verse
+ */
+
+SWBuf &RawCom::getRawEntryBuf() {
+ long start = 0;
+ unsigned short size = 0;
+ VerseKey *key = &getVerseKey();
+
+ findOffset(key->Testament(), key->Index(), &start, &size);
+ entrySize = size; // support getEntrySize call
+
+ entryBuf = "";
+ readText(key->Testament(), start, size, entryBuf);
+
+ rawFilter(entryBuf, 0); // hack, decipher
+ rawFilter(entryBuf, key);
+
+// if (!isUnicode())
+ prepText(entryBuf);
+
+ return entryBuf;
+}
+
+
+/******************************************************************************
+ * RawCom::increment - Increments module key a number of entries
+ *
+ * ENT: steps - Number of entries to jump forward
+ *
+ * RET: *this
+ */
+
+void RawCom::increment(int steps) {
+ long start;
+ unsigned short size;
+ VerseKey *tmpkey = &getVerseKey();
+
+ findOffset(tmpkey->Testament(), tmpkey->Index(), &start, &size);
+
+ SWKey lastgood = *tmpkey;
+ while (steps) {
+ long laststart = start;
+ unsigned short lastsize = size;
+ SWKey lasttry = *tmpkey;
+ (steps > 0) ? (*key)++ : (*key)--;
+ tmpkey = &getVerseKey();
+
+ if ((error = key->Error())) {
+ *key = lastgood;
+ break;
+ }
+ long index = tmpkey->Index();
+ findOffset(tmpkey->Testament(), index, &start, &size);
+ if (
+ (((laststart != start) || (lastsize != size)) // we're a different entry
+// && (start > 0)
+ && (size)) // and we actually have a size
+ ||(!skipConsecutiveLinks)) { // or we don't want to skip consecutive links
+ steps += (steps < 0) ? 1 : -1;
+ lastgood = *tmpkey;
+ }
+ }
+ error = (error) ? KEYERR_OUTOFBOUNDS : 0;
+}
+
+
+void RawCom::setEntry(const char *inbuf, long len) {
+ VerseKey *key = &getVerseKey();
+ doSetText(key->Testament(), key->Index(), inbuf, len);
+}
+
+
+void RawCom::linkEntry(const SWKey *inkey) {
+ VerseKey *destkey = &getVerseKey();
+ const VerseKey *srckey = 0;
+
+ // see if we have a VerseKey * or decendant
+ SWTRY {
+ srckey = SWDYNAMIC_CAST(VerseKey, inkey);
+ }
+ SWCATCH ( ... ) {}
+ // if we don't have a VerseKey * decendant, create our own
+ if (!srckey)
+ srckey = new VerseKey(inkey);
+
+ doLinkEntry(destkey->Testament(), destkey->Index(), srckey->Index());
+
+ if (inkey != srckey) // free our key if we created a VerseKey
+ delete srckey;
+}
+
+
+/******************************************************************************
+ * RawCom::deleteEntry - deletes this entry
+ *
+ * RET: *this
+ */
+
+void RawCom::deleteEntry() {
+
+ VerseKey *key = &getVerseKey();
+ doSetText(key->Testament(), key->Index(), "");
+}
+
+
+SWORD_NAMESPACE_END
diff --git a/src/modules/comments/rawcom4/Makefile.am b/src/modules/comments/rawcom4/Makefile.am
new file mode 100644
index 0000000..346dbc5
--- /dev/null
+++ b/src/modules/comments/rawcom4/Makefile.am
@@ -0,0 +1,2 @@
+rawcom4dir = $(top_srcdir)/src/modules/comments/rawcom4
+libsword_la_SOURCES += $(rawcom4dir)/rawcom4.cpp
diff --git a/src/modules/comments/rawcom4/rawcom4.cpp b/src/modules/comments/rawcom4/rawcom4.cpp
new file mode 100644
index 0000000..e59ee39
--- /dev/null
+++ b/src/modules/comments/rawcom4/rawcom4.cpp
@@ -0,0 +1,153 @@
+/******************************************************************************
+ * rawcom4.cpp - code for class 'RawCom4'- a module that reads raw commentary
+ * files: ot and nt using indexs ??.bks ??.cps ??.vss
+ */
+
+
+#include <ctype.h>
+#include <stdio.h>
+#include <fcntl.h>
+
+#include <filemgr.h>
+#include <rawverse4.h>
+#include <rawcom4.h>
+#include <versekey.h>
+
+SWORD_NAMESPACE_START
+
+ /******************************************************************************
+ * RawCom4 Constructor - Initializes data for instance of RawCom4
+ *
+ * ENT: iname - Internal name for module
+ * idesc - Name to display to user for module
+ * idisp - Display object to use for displaying
+ */
+
+RawCom4::RawCom4(const char *ipath, const char *iname, const char *idesc, SWDisplay *idisp, SWTextEncoding encoding, SWTextDirection dir, SWTextMarkup markup, const char* ilang)
+ : RawVerse4(ipath),
+ SWCom(iname, idesc, idisp, encoding, dir, markup, ilang){
+}
+
+
+/******************************************************************************
+ * RawCom4 Destructor - Cleans up instance of RawCom4
+ */
+
+RawCom4::~RawCom4()
+{
+}
+
+
+bool RawCom4::isWritable() {
+ return ((idxfp[0]->getFd() > 0) && ((idxfp[0]->mode & FileMgr::RDWR) == FileMgr::RDWR));
+}
+/******************************************************************************
+ * RawCom4::getRawEntry() - Returns the correct verse when char * cast
+ * is requested
+ *
+ * RET: string buffer with verse
+ */
+
+SWBuf &RawCom4::getRawEntryBuf() {
+ long start = 0;
+ unsigned long size = 0;
+ VerseKey *key = &getVerseKey();
+
+ findOffset(key->Testament(), key->Index(), &start, &size);
+ entrySize = size; // support getEntrySize call
+
+ entryBuf = "";
+ readText(key->Testament(), start, size, entryBuf);
+
+ rawFilter(entryBuf, 0); // hack, decipher
+ rawFilter(entryBuf, key);
+
+// if (!isUnicode())
+ prepText(entryBuf);
+
+ return entryBuf;
+}
+
+
+/******************************************************************************
+ * RawCom4::increment - Increments module key a number of entries
+ *
+ * ENT: steps - Number of entries to jump forward
+ *
+ * RET: *this
+ */
+
+void RawCom4::increment(int steps) {
+ long start;
+ unsigned long size;
+ VerseKey *tmpkey = &getVerseKey();
+
+ findOffset(tmpkey->Testament(), tmpkey->Index(), &start, &size);
+
+ SWKey lastgood = *tmpkey;
+ while (steps) {
+ long laststart = start;
+ unsigned long lastsize = size;
+ SWKey lasttry = *tmpkey;
+ (steps > 0) ? (*key)++ : (*key)--;
+ tmpkey = &getVerseKey();
+
+ if ((error = key->Error())) {
+ *key = lastgood;
+ break;
+ }
+ long index = tmpkey->Index();
+ findOffset(tmpkey->Testament(), index, &start, &size);
+ if (
+ (((laststart != start) || (lastsize != size)) // we're a different entry
+// && (start > 0)
+ && (size)) // and we actually have a size
+ ||(!skipConsecutiveLinks)) { // or we don't want to skip consecutive links
+ steps += (steps < 0) ? 1 : -1;
+ lastgood = *tmpkey;
+ }
+ }
+ error = (error) ? KEYERR_OUTOFBOUNDS : 0;
+}
+
+
+void RawCom4::setEntry(const char *inbuf, long len) {
+ VerseKey *key = &getVerseKey();
+ doSetText(key->Testament(), key->Index(), inbuf, len);
+}
+
+
+void RawCom4::linkEntry(const SWKey *inkey) {
+ VerseKey *destkey = &getVerseKey();
+ const VerseKey *srckey = 0;
+
+ // see if we have a VerseKey * or decendant
+ SWTRY {
+ srckey = SWDYNAMIC_CAST(VerseKey, inkey);
+ }
+ SWCATCH ( ... ) {}
+ // if we don't have a VerseKey * decendant, create our own
+ if (!srckey)
+ srckey = new VerseKey(inkey);
+
+ doLinkEntry(destkey->Testament(), destkey->Index(), srckey->Index());
+
+ if (inkey != srckey) // free our key if we created a VerseKey
+ delete srckey;
+}
+
+
+/******************************************************************************
+ * RawCom4::deleteEntry - deletes this entry
+ *
+ * RET: *this
+ */
+
+void RawCom4::deleteEntry() {
+
+ VerseKey *key = &getVerseKey();
+ doSetText(key->Testament(), key->Index(), "");
+}
+
+
+SWORD_NAMESPACE_END
diff --git a/src/modules/comments/rawfiles/Makefile b/src/modules/comments/rawfiles/Makefile
new file mode 100644
index 0000000..35d6648
--- /dev/null
+++ b/src/modules/comments/rawfiles/Makefile
@@ -0,0 +1,5 @@
+
+root := ../../../..
+
+all:
+ make -C ${root}
diff --git a/src/modules/comments/rawfiles/Makefile.am b/src/modules/comments/rawfiles/Makefile.am
new file mode 100644
index 0000000..a9b7dbe
--- /dev/null
+++ b/src/modules/comments/rawfiles/Makefile.am
@@ -0,0 +1,2 @@
+rawfilesdir = $(top_srcdir)/src/modules/comments/rawfiles
+libsword_la_SOURCES += $(rawfilesdir)/rawfiles.cpp
diff --git a/src/modules/comments/rawfiles/rawfiles.cpp b/src/modules/comments/rawfiles/rawfiles.cpp
new file mode 100644
index 0000000..3b614d9
--- /dev/null
+++ b/src/modules/comments/rawfiles/rawfiles.cpp
@@ -0,0 +1,257 @@
+/******************************************************************************
+ * rawfiles.cpp - code for class 'RawFiles'- a module that produces HTML HREFs
+ * pointing to actual text desired. Uses standard
+ * files: ot and nt using indexs ??.bks ??.cps ??.vss
+ */
+
+
+#include <ctype.h>
+#include <stdio.h>
+#include <fcntl.h>
+
+#include <rawverse.h>
+#include <rawfiles.h>
+#include <filemgr.h>
+#include <versekey.h>
+
+SWORD_NAMESPACE_START
+
+ /******************************************************************************
+ * RawFiles Constructor - Initializes data for instance of RawFiles
+ *
+ * ENT: iname - Internal name for module
+ * idesc - Name to display to user for module
+ * idisp - Display object to use for displaying
+ */
+
+RawFiles::RawFiles(const char *ipath, const char *iname, const char *idesc, SWDisplay *idisp, SWTextEncoding enc, SWTextDirection dir, SWTextMarkup mark, const char* ilang) : RawVerse(ipath, FileMgr::RDWR), SWCom(iname, idesc, idisp, enc, dir, mark, ilang)
+{
+}
+
+
+/******************************************************************************
+ * RawFiles Destructor - Cleans up instance of RawFiles
+ */
+
+RawFiles::~RawFiles()
+{
+}
+
+
+/** Is the module writable? :)
+* @return yes or no
+*/
+bool RawFiles::isWritable() {
+ return ((idxfp[0]->getFd() > 0) && ((idxfp[0]->mode & FileMgr::RDWR) == FileMgr::RDWR));
+}
+
+
+/******************************************************************************
+ * RawFiles::getRawEntry - Returns the correct verse when char * cast
+ * is requested
+ *
+ * RET: string buffer with verse
+ */
+
+SWBuf &RawFiles::getRawEntryBuf() {
+ FileDesc *datafile;
+ long start = 0;
+ unsigned short size = 0;
+ VerseKey *key = 0;
+
+ SWTRY {
+ key = SWDYNAMIC_CAST(VerseKey, this->key);
+ }
+ SWCATCH ( ... ) {}
+ if (!key)
+ key = new VerseKey(this->key);
+
+ findOffset(key->Testament(), key->Index(), &start, &size);
+
+ entryBuf = "";
+ if (size) {
+ SWBuf tmpbuf = path;
+ tmpbuf += '/';
+ readText(key->Testament(), start, size, entryBuf);
+ tmpbuf += entryBuf;
+ entryBuf = "";
+ datafile = FileMgr::getSystemFileMgr()->open(tmpbuf.c_str(), FileMgr::RDONLY);
+ if (datafile->getFd() > 0) {
+ size = datafile->seek(0, SEEK_END);
+ char *tmpBuf = new char [ size + 1 ];
+ memset(tmpBuf, 0, size + 1);
+ datafile->seek(0, SEEK_SET);
+ datafile->read(tmpBuf, size);
+ entryBuf = tmpBuf;
+ delete [] tmpBuf;
+// preptext(entrybuf);
+ }
+ FileMgr::getSystemFileMgr()->close(datafile);
+ }
+
+ if (key != this->key)
+ delete key;
+
+ return entryBuf;
+}
+
+
+/******************************************************************************
+ * RawFiles::setEntry(char *)- Update the modules current key entry with
+ * provided text
+ */
+
+void RawFiles::setEntry(const char *inbuf, long len) {
+ FileDesc *datafile;
+ long start;
+ unsigned short size;
+ VerseKey *key = 0;
+
+ len = (len<0)?strlen(inbuf):len;
+ SWTRY {
+ key = SWDYNAMIC_CAST(VerseKey, this->key);
+ }
+ SWCATCH ( ... ) {}
+ if (!key)
+ key = new VerseKey(this->key);
+
+ findOffset(key->Testament(), key->Index(), &start, &size);
+
+ if (size) {
+ SWBuf tmpbuf;
+ entryBuf = path;
+ entryBuf += '/';
+ readText(key->Testament(), start, size, tmpbuf);
+ entryBuf += tmpbuf;
+ }
+ else {
+ SWBuf tmpbuf;
+ entryBuf = path;
+ entryBuf += '/';
+ tmpbuf = getNextFilename();
+ doSetText(key->Testament(), key->Index(), tmpbuf);
+ entryBuf += tmpbuf;
+ }
+ datafile = FileMgr::getSystemFileMgr()->open(entryBuf, FileMgr::CREAT|FileMgr::WRONLY|FileMgr::TRUNC);
+ if (datafile->getFd() > 0) {
+ datafile->write(inbuf, len);
+ }
+ FileMgr::getSystemFileMgr()->close(datafile);
+
+ if (key != this->key)
+ delete key;
+}
+
+
+/******************************************************************************
+ * RawFiles::linkEntry(SWKey *)- Link the modules current key entry with
+ * another module entry
+ *
+ * RET: *this
+ */
+
+void RawFiles::linkEntry(const SWKey *inkey) {
+
+ long start;
+ unsigned short size;
+ const VerseKey *key = 0;
+
+ SWTRY {
+ key = SWDYNAMIC_CAST(VerseKey, inkey);
+ }
+ SWCATCH ( ... ) {}
+ if (!key)
+ key = new VerseKey(this->key);
+
+ findOffset(key->Testament(), key->Index(), &start, &size);
+
+ if (size) {
+ SWBuf tmpbuf;
+ readText(key->Testament(), start, size + 2, tmpbuf);
+
+ if (key != inkey)
+ delete key;
+ key = 0;
+
+ SWTRY {
+ key = SWDYNAMIC_CAST(VerseKey, inkey);
+ }
+ SWCATCH ( ... ) {}
+ if (!key)
+ key = new VerseKey(this->key);
+ doSetText(key->Testament(), key->Index(), tmpbuf.c_str());
+ }
+
+ if (key != inkey)
+ delete key;
+}
+
+
+/******************************************************************************
+ * RawFiles::deleteEntry - deletes this entry
+ *
+ * RET: *this
+ */
+
+void RawFiles::deleteEntry() {
+
+ VerseKey *key = 0;
+
+ SWTRY {
+ key = SWDYNAMIC_CAST(VerseKey, this->key);
+ }
+ SWCATCH ( ... ) {}
+ if (!key)
+ key = new VerseKey(this->key);
+
+ doSetText(key->Testament(), key->Index(), "");
+
+ if (key != this->key)
+ delete key;
+}
+
+
+/******************************************************************************
+ * RawFiles::getNextfilename - generates a valid filename in which to store
+ * an entry
+ *
+ * RET: filename
+ */
+
+char *RawFiles::getNextFilename() {
+ static char incfile[255];
+ long number;
+ FileDesc *datafile;
+
+ sprintf(incfile, "%s/incfile", path);
+ datafile = FileMgr::getSystemFileMgr()->open(incfile, FileMgr::RDONLY);
+ if (datafile->read(&number, 4) != 4)
+ number = 0;
+ number++;
+ FileMgr::getSystemFileMgr()->close(datafile);
+
+ datafile = FileMgr::getSystemFileMgr()->open(incfile, FileMgr::CREAT|FileMgr::WRONLY|FileMgr::TRUNC);
+ datafile->write(&number, 4);
+ FileMgr::getSystemFileMgr()->close(datafile);
+ sprintf(incfile, "%.7ld", number-1);
+ return incfile;
+}
+
+
+char RawFiles::createModule (const char *path) {
+ char *incfile = new char [ strlen (path) + 16 ];
+ static long zero = 0;
+ FileDesc *datafile;
+
+ sprintf(incfile, "%s/incfile", path);
+ datafile = FileMgr::getSystemFileMgr()->open(incfile, FileMgr::CREAT|FileMgr::WRONLY|FileMgr::TRUNC);
+ delete [] incfile;
+ datafile->write(&zero, 4);
+ FileMgr::getSystemFileMgr()->close(datafile);
+
+ return RawVerse::createModule (path);
+}
+
+
+
+SWORD_NAMESPACE_END
diff --git a/src/modules/comments/swcom.cpp b/src/modules/comments/swcom.cpp
new file mode 100644
index 0000000..94f92c9
--- /dev/null
+++ b/src/modules/comments/swcom.cpp
@@ -0,0 +1,105 @@
+/******************************************************************************
+ * swcom.cpp - code for base class 'SWCom'- The basis for all commentary
+ * modules
+ */
+
+#include <swcom.h>
+#include <localemgr.h>
+#include <versekey.h>
+
+SWORD_NAMESPACE_START
+
+/******************************************************************************
+ * SWCom Constructor - Initializes data for instance of SWCom
+ *
+ * ENT: imodname - Internal name for module
+ * imoddesc - Name to display to user for module
+ * idisp - Display object to use for displaying
+ */
+
+SWCom::SWCom(const char *imodname, const char *imoddesc, SWDisplay *idisp, SWTextEncoding enc, SWTextDirection dir, SWTextMarkup mark, const char* ilang): SWModule(imodname, imoddesc, idisp, (char *)"Commentaries", enc, dir, mark, ilang) {
+ delete key;
+ key = CreateKey();
+ tmpVK = new VerseKey();
+}
+
+
+/******************************************************************************
+ * SWCom Destructor - Cleans up instance of SWCom
+ */
+
+SWCom::~SWCom() {
+ delete tmpVK;
+}
+
+
+SWKey *SWCom::CreateKey() { return new VerseKey(); }
+
+
+long SWCom::Index() const {
+ VerseKey *key = 0;
+ SWTRY {
+ key = SWDYNAMIC_CAST(VerseKey, this->key);
+ }
+ SWCATCH ( ... ) {}
+ if (!key)
+ key = new VerseKey(this->key);
+
+ entryIndex = key->NewIndex();
+
+ if (key != this->key)
+ delete key;
+
+ return entryIndex;
+}
+
+long SWCom::Index(long iindex) {
+ VerseKey *key = 0;
+ SWTRY {
+ key = SWDYNAMIC_CAST(VerseKey, this->key);
+ }
+ SWCATCH ( ... ) {}
+ if (!key)
+ key = new VerseKey(this->key);
+
+ key->Testament(1);
+ key->Index(iindex);
+
+ if (key != this->key) {
+ this->key->copyFrom(*key);
+ delete key;
+ }
+
+ return Index();
+}
+
+
+VerseKey &SWCom::getVerseKey() const {
+ VerseKey *key = NULL;
+ // see if we have a VerseKey * or decendant
+ SWTRY {
+ key = SWDYNAMIC_CAST(VerseKey, this->key);
+ }
+ SWCATCH ( ... ) { }
+ if (!key) {
+ ListKey *lkTest = 0;
+ SWTRY {
+ lkTest = SWDYNAMIC_CAST(ListKey, this->key);
+ }
+ SWCATCH ( ... ) { }
+ if (lkTest) {
+ SWTRY {
+ key = SWDYNAMIC_CAST(VerseKey, lkTest->GetElement());
+ }
+ SWCATCH ( ... ) { }
+ }
+ }
+ if (!key) {
+ tmpVK->setLocale(LocaleMgr::getSystemLocaleMgr()->getDefaultLocaleName());
+ (*tmpVK) = *(this->key);
+ return (*tmpVK);
+ }
+ else return *key;
+}
+
+SWORD_NAMESPACE_END
diff --git a/src/modules/comments/zcom/Makefile b/src/modules/comments/zcom/Makefile
new file mode 100644
index 0000000..35d6648
--- /dev/null
+++ b/src/modules/comments/zcom/Makefile
@@ -0,0 +1,5 @@
+
+root := ../../../..
+
+all:
+ make -C ${root}
diff --git a/src/modules/comments/zcom/Makefile.am b/src/modules/comments/zcom/Makefile.am
new file mode 100644
index 0000000..f22700c
--- /dev/null
+++ b/src/modules/comments/zcom/Makefile.am
@@ -0,0 +1,2 @@
+zcomdir = $(top_srcdir)/src/modules/comments/zcom
+libsword_la_SOURCES += $(zcomdir)/zcom.cpp
diff --git a/src/modules/comments/zcom/zcom.cpp b/src/modules/comments/zcom/zcom.cpp
new file mode 100644
index 0000000..cdaea02
--- /dev/null
+++ b/src/modules/comments/zcom/zcom.cpp
@@ -0,0 +1,188 @@
+/******************************************************************************
+ * rawcom.cpp - code for class 'zCom'- a module that reads raw commentary
+ * files: ot and nt using indexs ??.bks ??.cps ??.vss
+ */
+
+
+#include <ctype.h>
+#include <stdio.h>
+#include <fcntl.h>
+
+#include <zverse.h>
+#include <versekey.h>
+#include <zcom.h>
+#include <filemgr.h>
+
+SWORD_NAMESPACE_START
+
+/******************************************************************************
+ * zCom Constructor - Initializes data for instance of zCom
+ *
+ * ENT: ipath - path to data files
+ * iname - Internal name for module
+ * idesc - Name to display to user for module
+ * iblockType - verse, chapter, book, etc. of index chunks
+ * icomp - Compressor object
+ * idisp - Display object to use for displaying
+ */
+
+zCom::zCom(const char *ipath, const char *iname, const char *idesc, int iblockType, SWCompress *icomp, SWDisplay *idisp, SWTextEncoding enc, SWTextDirection dir, SWTextMarkup mark, const char* ilang) : zVerse(ipath, -1, iblockType, icomp), SWCom(iname, idesc, idisp, enc, dir, mark, ilang)/*, SWCompress()*/
+{
+ blockType = iblockType;
+ lastWriteKey = 0;
+}
+
+/******************************************************************************
+ * zCom Destructor - Cleans up instance of zCom
+ */
+
+zCom::~zCom() {
+ flushCache();
+
+ if (lastWriteKey)
+ delete lastWriteKey;
+}
+
+
+bool zCom::isWritable() {
+ return ((idxfp[0]->getFd() > 0) && ((idxfp[0]->mode & FileMgr::RDWR) == FileMgr::RDWR));
+}
+
+
+/******************************************************************************
+ * zCom::getRawEntry - Returns the correct verse when char * cast
+ * is requested
+ *
+ * RET: string buffer with verse
+ */
+SWBuf &zCom::getRawEntryBuf() {
+ long start = 0;
+ unsigned short size = 0;
+ VerseKey *key = &getVerseKey();
+
+ findOffset(key->Testament(), key->Index(), &start, &size);
+ entrySize = size; // support getEntrySize call
+
+ entryBuf = "";
+ zReadText(key->Testament(), start, size, entryBuf);
+
+ rawFilter(entryBuf, key);
+
+// if (!isUnicode())
+ prepText(entryBuf);
+
+ return entryBuf;
+}
+
+
+bool zCom::sameBlock(VerseKey *k1, VerseKey *k2) {
+ if (k1->Testament() != k2->Testament())
+ return false;
+
+ switch (blockType) {
+ case VERSEBLOCKS:
+ if (k1->Verse() != k2->Verse())
+ return false;
+ case CHAPTERBLOCKS:
+ if (k1->Chapter() != k2->Chapter())
+ return false;
+ case BOOKBLOCKS:
+ if (k1->Book() != k2->Book())
+ return false;
+ }
+ return true;
+}
+
+void zCom::setEntry(const char *inbuf, long len) {
+ VerseKey *key = &getVerseKey();
+
+ // see if we've jumped across blocks since last write
+ if (lastWriteKey) {
+ if (!sameBlock(lastWriteKey, key)) {
+ flushCache();
+ }
+ delete lastWriteKey;
+ }
+
+ doSetText(key->Testament(), key->Index(), inbuf, len);
+
+ lastWriteKey = (VerseKey *)key->clone(); // must delete
+}
+
+
+void zCom::linkEntry(const SWKey *inkey) {
+ VerseKey *destkey = &getVerseKey();
+ const VerseKey *srckey = 0;
+
+ // see if we have a VerseKey * or decendant
+ SWTRY {
+ srckey = (const VerseKey *) SWDYNAMIC_CAST(VerseKey, inkey);
+ }
+ SWCATCH ( ... ) {
+ }
+ // if we don't have a VerseKey * decendant, create our own
+ if (!srckey)
+ srckey = new VerseKey(inkey);
+
+ doLinkEntry(destkey->Testament(), destkey->Index(), srckey->Index());
+
+ if (inkey != srckey) // free our key if we created a VerseKey
+ delete srckey;
+}
+
+/******************************************************************************
+ * zCom::deleteEntry - deletes this entry
+ *
+ * RET: *this
+ */
+
+void zCom::deleteEntry() {
+
+ VerseKey *key = &getVerseKey();
+ doSetText(key->Testament(), key->Index(), "");
+}
+
+
+/******************************************************************************
+ * zCom::increment - Increments module key a number of entries
+ *
+ * ENT: increment - Number of entries to jump forward
+ *
+ * RET: *this
+ */
+
+void zCom::increment(int steps) {
+ long start;
+ unsigned short size;
+ VerseKey *tmpkey = &getVerseKey();
+
+ findOffset(tmpkey->Testament(), tmpkey->Index(), &start, &size);
+
+ SWKey lastgood = *tmpkey;
+ while (steps) {
+ long laststart = start;
+ unsigned short lastsize = size;
+ SWKey lasttry = *tmpkey;
+ (steps > 0) ? (*key)++ : (*key)--;
+ tmpkey = &getVerseKey();
+
+ if ((error = key->Error())) {
+ *key = lastgood;
+ break;
+ }
+ long index = tmpkey->Index();
+ findOffset(tmpkey->Testament(), index, &start, &size);
+ if (
+ (((laststart != start) || (lastsize != size)) // we're a different entry
+// && (start > 0)
+ && (size)) // and we actually have a size
+ ||(!skipConsecutiveLinks)) { // or we don't want to skip consecutive links
+ steps += (steps < 0) ? 1 : -1;
+ lastgood = *tmpkey;
+ }
+ }
+ error = (error) ? KEYERR_OUTOFBOUNDS : 0;
+}
+
+
+SWORD_NAMESPACE_END
diff --git a/src/modules/common/Makefile b/src/modules/common/Makefile
new file mode 100644
index 0000000..81f7721
--- /dev/null
+++ b/src/modules/common/Makefile
@@ -0,0 +1,4 @@
+root := ../../..
+
+all:
+ make -C ${root}
diff --git a/src/modules/common/Makefile.am b/src/modules/common/Makefile.am
new file mode 100644
index 0000000..e688094
--- /dev/null
+++ b/src/modules/common/Makefile.am
@@ -0,0 +1,21 @@
+commondir = $(top_srcdir)/src/modules/common
+
+libsword_la_SOURCES += $(commondir)/rawstr.cpp
+libsword_la_SOURCES += $(commondir)/rawstr4.cpp
+libsword_la_SOURCES += $(commondir)/swcomprs.cpp
+libsword_la_SOURCES += $(commondir)/lzsscomprs.cpp
+
+if ZLIB
+SWZLIB = $(commondir)/zipcomprs.cpp
+else
+SWZLIB =
+endif
+libsword_la_SOURCES += $(SWZLIB)
+libsword_la_SOURCES += $(commondir)/rawverse.cpp
+libsword_la_SOURCES += $(commondir)/rawverse4.cpp
+libsword_la_SOURCES += $(commondir)/swcipher.cpp
+libsword_la_SOURCES += $(commondir)/zverse.cpp
+libsword_la_SOURCES += $(commondir)/zstr.cpp
+libsword_la_SOURCES += $(commondir)/entriesblk.cpp
+libsword_la_SOURCES += $(commondir)/sapphire.cpp
+
diff --git a/src/modules/common/compress.cpp.txt b/src/modules/common/compress.cpp.txt
new file mode 100644
index 0000000..5031adb
--- /dev/null
+++ b/src/modules/common/compress.cpp.txt
@@ -0,0 +1,767 @@
+Compression Info, 10-11-95
+Jeff Wheeler
+
+Source of Algorithm
+-------------------
+
+The compression algorithms used here are based upon the algorithms developed and published by Haruhiko Okumura in a paper entitled "Data Compression Algorithms of LARC and LHarc." This paper discusses three compression algorithms, LSZZ, LZARI, and LZHUF. LZSS is described as the "first" of these, and is described as providing moderate compression with good speed. LZARI is described as an improved LZSS, a combination of the LZSS algorithm with adaptive arithmetic compression. It is described as being slower than LZSS but with better compression. LZHUF (the basis of the common LHA compression program) was included in the paper, however, a free usage license was not included.
+
+The following are copies of the statements included at the beginning of each source code listing that was supplied in the working paper.
+
+ LZSS, dated 4/6/89, marked as "Use, distribute and
+ modify this program freely."
+
+ LZARI, dated 4/7/89, marked as "Use, distribute and
+ modify this program freely."
+
+ LZHUF, dated 11/20/88, written by Haruyasu Yoshizaki,
+ translated by Haruhiko Okumura on 4/7/89. Not
+ expressly marked as redistributable or modifiable.
+
+Since both LZSS and LZARI are marked as "use, distribute and modify freely" we have felt at liberty basing our compression algorithm on either of these.
+
+Selection of Algorithm
+----------------------
+
+Working samples of three possible compression algorithms are supplied in Okumura's paper. Which should be used?
+
+LZSS is the fastest at decompression, but does not generated as small a compressed file as the other methods. The other two methods provided, perhaps, a 15% improvement in compression. Or, put another way, on a 100K file, LZSS might compress it to 50K while the others might approach 40-45K. For STEP purposes, it was decided that decoding speed was of more importance than tighter compression. For these reasons, the first compression algorithm implemented is the LZSS algorithm.
+
+About LZSS Encoding
+-------------------
+
+(adapted from Haruhiko Okumura's paper)
+
+This scheme was proposed by Ziv and Lempel [1]. A slightly modified version is described by Storer and Szymanski [2]. An implementation using a binary tree has been proposed by Bell [3].
+
+The algorithm is quite simple.
+1. Keep a ring buffer which initially contains all space characters.
+2. Read several letters from the file to the buffer.
+3. Search the buffer for the longest string that matches the letters just read, and send its length and position into the buffer.
+
+If the ring buffer is 4096 bytes, the position can be stored in 12 bits. If the length is represented in 4 bits, the <position, length> pair is two bytes long. If the longest match is no more than two characters, then just one character is sent without encoding. The process starts again with the next character. An extra bit is sent each time to tell the decoder whether the next item is a character of a <position, length> pair.
+
+[1] J. Ziv and A. Lempel, IEEE Transactions IT-23, 337-343 (1977).
+[2] J. A. Storer and T. G. Szymanski, J. ACM, 29, 928-951 (1982).
+[3] T.C. Gell, IEEE Transactions COM-34, 1176-1182 (1986).
+
+void InitTree( // no return value
+ void); // no parameters
+
+void InsertNode( // no return value
+ short int Pos); // position in the buffer
+
+void DeleteNode( // no return value
+ short int Node); // node to be removed
+
+void Encode( // no return value
+ void); // no parameters
+
+void Decode( // no return value
+ void); // no parameters
+
+// The following are constant sizes used by the compression algorithm.
+//
+// N - This is the size of the ring buffer. It is set
+// to 4K. It is important to note that a position
+// within the ring buffer requires 12 bits.
+//
+// F - This is the maximum length of a character sequence
+// that can be taken from the ring buffer. It is set
+// to 18. Note that a length must be 3 before it is
+// worthwhile to store a position/length pair, so the
+// length can be encoded in only 4 bits. Or, put yet
+// another way, it is not necessary to encode a length
+// of 0-18, it is necessary to encode a length of
+// 3-18, which requires 4 bits.
+//
+// THRESHOLD - It takes 2 bytes to store an offset and
+// a length. If a character sequence only
+// requires 1 or 2 characters to store
+// uncompressed, then it is better to store
+// it uncompressed than as an offset into
+// the ring buffer.
+//
+// Note that the 12 bits used to store the position and the 4 bits
+// used to store the length equal a total of 16 bits, or 2 bytes.
+
+#define N 4096
+#define F 18
+#define THRESHOLD 3
+#define NOT_USED N
+
+// m_ring_buffer is a text buffer. It contains "nodes" of
+// uncompressed text that can be indexed by position. That is,
+// a substring of the ring buffer can be indexed by a position
+// and a length. When decoding, the compressed text may contain
+// a position in the ring buffer and a count of the number of
+// bytes from the ring buffer that are to be moved into the
+// uncompressed buffer.
+//
+// This ring buffer is not maintained as part of the compressed
+// text. Instead, it is reconstructed dynamically. That is,
+// it starts out empty and gets built as the text is decompressed.
+//
+// The ring buffer contain N bytes, with an additional F - 1 bytes
+// to facilitate string comparison.
+
+unsigned char m_ring_buffer[N + F - 1];
+
+// m_match_position and m_match_length are set by InsertNode().
+//
+// These variables indicate the position in the ring buffer
+// and the number of characters at that position that match
+// a given string.
+
+short int m_match_position;
+short int m_match_length;
+
+// m_lson, m_rson, and m_dad are the Japanese way of referring to
+// a tree structure. The dad is the parent and it has a right and
+// left son (child).
+//
+// For i = 0 to N-1, m_rson[i] and m_lson[i] will be the right
+// and left children of node i.
+//
+// For i = 0 to N-1, m_dad[i] is the parent of node i.
+//
+// For i = 0 to 255, rson[N + i + 1] is the root of the tree for
+// strings that begin with the character i. Note that this requires
+// one byte characters.
+//
+// These nodes store values of 0...(N-1). Memory requirements
+// can be reduces by using 2-byte integers instead of full 4-byte
+// integers (for 32-bit applications). Therefore, these are
+// defined as "short ints."
+
+short int m_lson[N + 1];
+short int m_rson[N + 257];
+short int m_dad[N + 1];
+
+/*
+ -------------------------------------------------------------------------
+ cLZSS::InitTree
+
+ This function initializes the tree nodes to "empty" states.
+ -------------------------------------------------------------------------
+*/
+
+void cLZSS::InitTree( // no return value
+ void) // no parameters
+ throw() // exception list
+
+ {
+ int i;
+
+ // For i = 0 to N - 1, m_rson[i] and m_lson[i] will be the right
+ // and left children of node i. These nodes need not be
+ // initialized. However, for debugging purposes, it is nice to
+ // have them initialized. Since this is only used for compression
+ // (not decompression), I don't mind spending the time to do it.
+ //
+ // For the same range of i, m_dad[i] is the parent of node i.
+ // These are initialized to a known value that can represent
+ // a "not used" state.
+
+ for (i = 0; i < N; i++)
+ {
+ m_lson[i] = NOT_USED;
+ m_rson[i] = NOT_USED;
+ m_dad[i] = NOT_USED;
+ }
+
+ // For i = 0 to 255, m_rson[N + i + 1] is the root of the tree
+ // for strings that begin with the character i. This is why
+ // the right child array is larger than the left child array.
+ // These are also initialzied to a "not used" state.
+ //
+ // Note that there are 256 of these, one for each of the possible
+ // 256 characters.
+
+ for (i = N + 1; i <= (N + 256); i++)
+ {
+ m_rson[i] = NOT_USED;
+ }
+
+ // Done.
+ }
+
+/*
+ -------------------------------------------------------------------------
+ cLZSS::InsertNode
+
+ This function inserts a string from the ring buffer into one of
+ the trees. It loads the match position and length member variables
+ for the longest match.
+
+ The string to be inserted is identified by the parameter Pos,
+ A full F bytes are inserted. So, m_ring_buffer[Pos ... Pos+F-1]
+ are inserted.
+
+ If the matched length is exactly F, then an old node is removed
+ in favor of the new one (because the old one will be deleted
+ sooner).
+
+ Note that Pos plays a dual role. It is used as both a position
+ in the ring buffer and also as a tree node. m_ring_buffer[Pos]
+ defines a character that is used to identify a tree node.
+ -------------------------------------------------------------------------
+*/
+
+void cLZSS::InsertNode( // no return value
+ short int Pos) // position in the buffer
+ throw() // exception list
+
+ {
+ short int i;
+ short int p;
+ int cmp;
+ unsigned char * key;
+
+ ASSERT(Pos >= 0);
+ ASSERT(Pos < N);
+
+ cmp = 1;
+ key = &(m_ring_buffer[Pos]);
+
+ // The last 256 entries in m_rson contain the root nodes for
+ // strings that begin with a letter. Get an index for the
+ // first letter in this string.
+
+ p = (short int) (N + 1 + key[0]);
+
+ // Set the left and right tree nodes for this position to "not
+ // used."
+
+ m_lson[Pos] = NOT_USED;
+ m_rson[Pos] = NOT_USED;
+
+ // Haven't matched anything yet.
+
+ m_match_length = 0;
+
+ for ( ; ; )
+ {
+ if (cmp >= 0)
+ {
+ if (m_rson[p] != NOT_USED)
+ {
+ p = m_rson[p];
+ }
+ else
+ {
+ m_rson[p] = Pos;
+ m_dad[Pos] = p;
+ return;
+ }
+ }
+ else
+ {
+ if (m_lson[p] != NOT_USED)
+ {
+ p = m_lson[p];
+ }
+ else
+ {
+ m_lson[p] = Pos;
+ m_dad[Pos] = p;
+ return;
+ }
+ }
+
+ // Should we go to the right or the left to look for the
+ // next match?
+
+ for (i = 1; i < F; i++)
+ {
+ cmp = key[i] - m_ring_buffer[p + i];
+ if (cmp != 0)
+ break;
+ }
+
+ if (i > m_match_length)
+ {
+ m_match_position = p;
+ m_match_length = i;
+
+ if (i >= F)
+ break;
+ }
+ }
+
+ m_dad[Pos] = m_dad[p];
+ m_lson[Pos] = m_lson[p];
+ m_rson[Pos] = m_rson[p];
+
+ m_dad[ m_lson[p] ] = Pos;
+ m_dad[ m_rson[p] ] = Pos;
+
+ if (m_rson[ m_dad[p] ] == p)
+ {
+ m_rson[ m_dad[p] ] = Pos;
+ }
+ else
+ {
+ m_lson[ m_dad[p] ] = Pos;
+ }
+
+ // Remove "p"
+
+ m_dad[p] = NOT_USED;
+ }
+
+/*
+ -------------------------------------------------------------------------
+ cLZSS::DeleteNode
+
+ This function removes the node "Node" from the tree.
+ -------------------------------------------------------------------------
+*/
+
+void cLZSS::DeleteNode( // no return value
+ short int Node) // node to be removed
+ throw() // exception list
+
+ {
+ short int q;
+
+ ASSERT(Node >= 0);
+ ASSERT(Node < (N+1));
+
+ if (m_dad[Node] == NOT_USED)
+ {
+ // not in tree, nothing to do
+ return;
+ }
+
+ if (m_rson[Node] == NOT_USED)
+ {
+ q = m_lson[Node];
+ }
+ else if (m_lson[Node] == NOT_USED)
+ {
+ q = m_rson[Node];
+ }
+ else
+ {
+ q = m_lson[Node];
+ if (m_rson[q] != NOT_USED)
+ {
+ do
+ {
+ q = m_rson[q];
+ }
+ while (m_rson[q] != NOT_USED);
+
+ m_rson[ m_dad[q] ] = m_lson[q];
+ m_dad[ m_lson[q] ] = m_dad[q];
+ m_lson[q] = m_lson[Node];
+ m_dad[ m_lson[Node] ] = q;
+ }
+
+ m_rson[q] = m_rson[Node];
+ m_dad[ m_rson[Node] ] = q;
+ }
+
+ m_dad[q] = m_dad[Node];
+
+ if (m_rson[ m_dad[Node] ] == Node)
+ {
+ m_rson[ m_dad[Node] ] = q;
+ }
+ else
+ {
+ m_lson[ m_dad[Node] ] = q;
+ }
+
+ m_dad[Node] = NOT_USED;
+ }
+
+/*
+ -------------------------------------------------------------------------
+ cLZSS::Encode
+
+ This function "encodes" the input stream into the output stream.
+ The GetChars() and SendChars() functions are used to separate
+ this method from the actual i/o.
+ -------------------------------------------------------------------------
+*/
+
+void cLZSS::Encode( // no return value
+ void) // no parameters
+
+ {
+ short int i; // an iterator
+ short int r; // node number in the binary tree
+ short int s; // position in the ring buffer
+ unsigned short int len; // len of initial string
+ short int last_match_length; // length of last match
+ short int code_buf_pos; // position in the output buffer
+ unsigned char code_buf[17]; // the output buffer
+ unsigned char mask; // bit mask for byte 0 of out buf
+ unsigned char c; // character read from string
+
+ // Start with a clean tree.
+
+ InitTree();
+
+ // code_buf[0] works as eight flags. A "1" represents that the
+ // unit is an unencoded letter (1 byte), and a "0" represents
+ // that the next unit is a <position,length> pair (2 bytes).
+ //
+ // code_buf[1..16] stores eight units of code. Since the best
+ // we can do is store eight <position,length> pairs, at most 16
+ // bytes are needed to store this.
+ //
+ // This is why the maximum size of the code buffer is 17 bytes.
+
+ code_buf[0] = 0;
+ code_buf_pos = 1;
+
+ // Mask iterates over the 8 bits in the code buffer. The first
+ // character ends up being stored in the low bit.
+ //
+ // bit 8 7 6 5 4 3 2 1
+ // | |
+ // | first sequence in code buffer
+ // |
+ // last sequence in code buffer
+
+ mask = 1;
+
+ s = 0;
+ r = (short int) N - (short int) F;
+
+ // Initialize the ring buffer with spaces...
+
+ // Note that the last F bytes of the ring buffer are not filled.
+ // This is because those F bytes will be filled in immediately
+ // with bytes from the input stream.
+
+ memset(m_ring_buffer, ' ', N - F);
+
+ // Read F bytes into the last F bytes of the ring buffer.
+ //
+ // This function loads the buffer with X characters and returns
+ // the actual amount loaded.
+
+ len = GetChars(&(m_ring_buffer[r]), F);
+
+ // Make sure there is something to be compressed.
+
+ if (len == 0)
+ return;
+
+ // Insert the F strings, each of which begins with one or more
+ // 'space' characters. Note the order in which these strings
+ // are inserted. This way, degenerate trees will be less likely
+ // to occur.
+
+ for (i = 1; i <= F; i++)
+ {
+ InsertNode((short int) (r - i));
+ }
+
+ // Finally, insert the whole string just read. The
+ // member variables match_length and match_position are set.
+
+ InsertNode(r);
+
+ // Now that we're preloaded, continue till done.
+
+ do
+ {
+
+ // m_match_length may be spuriously long near the end of
+ // text.
+
+ if (m_match_length > len)
+ {
+ m_match_length = len;
+ }
+
+ // Is it cheaper to store this as a single character? If so,
+ // make it so.
+
+ if (m_match_length < THRESHOLD)
+ {
+ // Send one character. Remember that code_buf[0] is the
+ // set of flags for the next eight items.
+
+ m_match_length = 1;
+ code_buf[0] |= mask;
+ code_buf[code_buf_pos++] = m_ring_buffer[r];
+ }
+
+ // Otherwise, we do indeed have a string that can be stored
+ // compressed to save space.
+
+ else
+ {
+ // The next 16 bits need to contain the position (12 bits)
+ // and the length (4 bits).
+
+ code_buf[code_buf_pos++] = (unsigned char) m_match_position;
+ code_buf[code_buf_pos++] = (unsigned char) (
+ ((m_match_position >> 4) & 0xf0) |
+ (m_match_length - THRESHOLD) );
+ }
+
+ // Shift the mask one bit to the left so that it will be ready
+ // to store the new bit.
+
+ mask = (unsigned char) (mask << 1);
+
+ // If the mask is now 0, then we know that we have a full set
+ // of flags and items in the code buffer. These need to be
+ // output.
+
+ if (mask == 0)
+ {
+ // code_buf is the buffer of characters to be output.
+ // code_buf_pos is the number of characters it contains.
+
+ SendChars(code_buf, code_buf_pos);
+
+ // Reset for next buffer...
+
+ code_buf[0] = 0;
+ code_buf_pos = 1;
+ mask = 1;
+ }
+
+ last_match_length = m_match_length;
+
+ // Delete old strings and read new bytes...
+
+ for (i = 0; i < last_match_length; i++)
+ {
+
+ // Get next character...
+
+ if (GetChars(&c, 1) != 1)
+ break;
+
+ // Delete "old strings"
+
+ DeleteNode(s);
+
+ // Put this character into the ring buffer.
+ //
+ // The original comment here says "If the position is near
+ // the end of the buffer, extend the buffer to make
+ // string comparison easier."
+ //
+ // That's a little misleading, because the "end" of the
+ // buffer is really what we consider to be the "beginning"
+ // of the buffer, that is, positions 0 through F.
+ //
+ // The idea is that the front end of the buffer is duplicated
+ // into the back end so that when you're looking at characters
+ // at the back end of the buffer, you can index ahead (beyond
+ // the normal end of the buffer) and see the characters
+ // that are at the front end of the buffer wihtout having
+ // to adjust the index.
+ //
+ // That is...
+ //
+ // 1234xxxxxxxxxxxxxxxxxxxxxxxxxxxxx1234
+ // | | |
+ // position 0 end of buffer |
+ // |
+ // duplicate of front of buffer
+
+ m_ring_buffer[s] = c;
+
+ if (s < F - 1)
+ {
+ m_ring_buffer[s + N] = c;
+ }
+
+ // Increment the position, and wrap around when we're at
+ // the end. Note that this relies on N being a power of 2.
+
+ s = (short int) ( (s + 1) & (N - 1) );
+ r = (short int) ( (r + 1) & (N - 1) );
+
+ // Register the string that is found in
+ // m_ring_buffer[r..r+F-1].
+
+ InsertNode(r);
+ }
+
+ // If we didn't quit because we hit the last_match_length,
+ // then we must have quit because we ran out of characters
+ // to process.
+
+ while (i++ < last_match_length)
+ {
+ DeleteNode(s);
+
+ s = (short int) ( (s + 1) & (N - 1) );
+ r = (short int) ( (r + 1) & (N - 1) );
+
+ // Note that len hitting 0 is the key that causes the
+ // do...while() to terminate. This is the only place
+ // within the loop that len is modified.
+ //
+ // Its original value is F (or a number less than F for
+ // short strings).
+
+ if (--len)
+ {
+ InsertNode(r); /* buffer may not be empty. */
+ }
+ }
+
+ // End of do...while() loop. Continue processing until there
+ // are no more characters to be compressed. The variable
+ // "len" is used to signal this condition.
+ }
+ while (len > 0);
+
+ // There could still be something in the output buffer. Send it
+ // now.
+
+ if (code_buf_pos > 1)
+ {
+ // code_buf is the encoded string to send.
+ // code_buf_ptr is the number of characters.
+
+ SendChars(code_buf, code_buf_pos);
+ }
+
+ // Done!
+ }
+
+/*
+ -------------------------------------------------------------------------
+ cLZSS::Decode
+
+ This function "decodes" the input stream into the output stream.
+ The GetChars() and SendChars() functions are used to separate
+ this method from the actual i/o.
+ -------------------------------------------------------------------------
+*/
+
+void cLZSS::Decode( // no return value
+ void) // no parameters
+
+ {
+ int k;
+ int r; // node number
+ unsigned char c[F]; // an array of chars
+ unsigned char flags; // 8 bits of flags
+ int flag_count; // which flag we're on
+ short int pos; // position in the ring buffer
+ short int len; // number of chars in ring buffer
+
+ // Initialize the ring buffer with a common string.
+ //
+ // Note that the last F bytes of the ring buffer are not filled.
+
+ memset(m_ring_buffer, ' ', N - F);
+
+ r = N - F;
+
+ flags = (char) 0;
+ flag_count = 0;
+
+ for ( ; ; )
+ {
+
+ // If there are more bits of interest in this flag, then
+ // shift that next interesting bit into the 1's position.
+ //
+ // If this flag has been exhausted, the next byte must
+ // be a flag.
+
+ if (flag_count > 0)
+ {
+ flags = (unsigned char) (flags >> 1);
+ flag_count--;
+ }
+ else
+ {
+ // Next byte must be a flag.
+
+ if (GetChars(&flags, 1) != 1)
+ break;
+
+ // Set the flag counter. While at first it might appear
+ // that this should be an 8 since there are 8 bits in the
+ // flag, it should really be a 7 because the shift must
+ // be performed 7 times in order to see all 8 bits.
+
+ flag_count = 7;
+ }
+
+ // If the low order bit of the flag is now set, then we know
+ // that the next byte is a single, unencoded character.
+
+ if (flags & 1)
+ {
+ if (GetChars(c, 1) != 1)
+ break;
+
+ if (SendChars(c, 1) != 1)
+ break;
+
+ // Add to buffer, and increment to next spot. Wrap at end.
+
+ m_ring_buffer[r] = c[0];
+ r = (short int) ( (r + 1) & (N - 1) );
+ }
+
+ // Otherwise, we know that the next two bytes are a
+ // <position,length> pair. The position is in 12 bits and
+ // the length is in 4 bits.
+
+ else
+ {
+ // Original code:
+ // if ((i = getc(infile)) == EOF)
+ // break;
+ // if ((j = getc(infile)) == EOF)
+ // break;
+ // i |= ((j & 0xf0) << 4);
+ // j = (j & 0x0f) + THRESHOLD;
+ //
+ // I've modified this to only make one input call, and
+ // have changed the variable names to something more
+ // obvious.
+
+ if (GetChars(c, 2) != 2)
+ break;
+
+ // Convert these two characters into the position and
+ // length. Note that the length is always at least
+ // THRESHOLD, which is why we're able to get a length
+ // of 18 out of only 4 bits.
+
+ pos = (short int) ( c[0] | ((c[1] & 0xf0) << 4) );
+
+ len = (short int) ( (c[1] & 0x0f) + THRESHOLD );
+
+ // There are now "len" characters at position "pos" in
+ // the ring buffer that can be pulled out. Note that
+ // len is never more than F.
+
+ for (k = 0; k < len; k++)
+ {
+ c[k] = m_ring_buffer[(pos + k) & (N - 1)];
+
+ // Add to buffer, and increment to next spot. Wrap at end.
+
+ m_ring_buffer[r] = c[k];
+ r = (short int) ( (r + 1) & (N - 1) );
+ }
+
+ // Add the "len" characters to the output stream.
+
+ if (SendChars(c, len) != len)
+ break;
+ }
+ }
+ }
+
diff --git a/src/modules/common/entriesblk.cpp b/src/modules/common/entriesblk.cpp
new file mode 100644
index 0000000..216abd8
--- /dev/null
+++ b/src/modules/common/entriesblk.cpp
@@ -0,0 +1,172 @@
+#include <entriesblk.h>
+#include <stdlib.h>
+#include <string.h>
+
+SWORD_NAMESPACE_START
+
+const int EntriesBlock::METAHEADERSIZE = 4;
+ // count(4);
+const int EntriesBlock::METAENTRYSIZE = 8;
+ // offset(4); size(4);
+
+EntriesBlock::EntriesBlock(const char *iBlock, unsigned long size) {
+ if (size) {
+ block = (char *)calloc(1, size);
+ memcpy(block, iBlock, size);
+ }
+ else {
+ block = (char *)calloc(1, sizeof(__u32));
+ }
+}
+
+
+EntriesBlock::EntriesBlock() {
+ block = (char *)calloc(1, sizeof(__u32));
+}
+
+
+EntriesBlock::~EntriesBlock() {
+ free(block);
+}
+
+
+void EntriesBlock::setCount(int count) {
+ __u32 rawCount = archtosword32(count);
+ memcpy(block, &rawCount, sizeof(__u32));
+}
+
+
+int EntriesBlock::getCount() {
+ __u32 count = 0;
+ memcpy(&count, block, sizeof(__u32));
+ count = swordtoarch32(count);
+ return count;
+}
+
+
+void EntriesBlock::getMetaEntry(int index, unsigned long *offset, unsigned long *size) {
+ __u32 rawOffset = 0;
+ __u32 rawSize = 0;
+ *offset = 0;
+ *size = 0;
+ if (index >= getCount()) // assert index < count
+ return;
+
+ // first 4 bytes is count, each 6 bytes after is each meta entry
+ memcpy(&rawOffset, block + METAHEADERSIZE + (index * METAENTRYSIZE), sizeof(rawOffset));
+ memcpy(&rawSize, block + METAHEADERSIZE + (index * METAENTRYSIZE) + sizeof(rawOffset), sizeof(rawSize));
+
+ *offset = (unsigned long)swordtoarch32(rawOffset);
+ *size = (unsigned long)swordtoarch32(rawSize);
+}
+
+
+void EntriesBlock::setMetaEntry(int index, unsigned long offset, unsigned long size) {
+ __u32 rawOffset = archtosword32(offset);
+ __u32 rawSize = archtosword32(size);
+
+ if (index >= getCount()) // assert index < count
+ return;
+
+ // first 4 bytes is count, each 6 bytes after is each meta entry
+ memcpy(block + METAHEADERSIZE + (index * METAENTRYSIZE), &rawOffset, sizeof(rawOffset));
+ memcpy(block + METAHEADERSIZE + (index * METAENTRYSIZE) + sizeof(rawOffset), &rawSize, sizeof(rawSize));
+}
+
+
+const char *EntriesBlock::getRawData(unsigned long *retSize) {
+ unsigned long max = 4;
+ int loop;
+ unsigned long offset;
+ unsigned long size;
+ for (loop = 0; loop < getCount(); loop++) {
+ getMetaEntry(loop, &offset, &size);
+ max = ((offset + size) > max) ? (offset + size) : max;
+ }
+ *retSize = max;
+ return block;
+}
+
+
+int EntriesBlock::addEntry(const char *entry) {
+ unsigned long dataSize;
+ getRawData(&dataSize);
+ unsigned long len = strlen(entry);
+ unsigned long offset;
+ unsigned long size;
+ int count = getCount();
+ unsigned long dataStart = METAHEADERSIZE + (count * METAENTRYSIZE);
+ // new meta entry + new data size + 1 because null
+ block = (char *)realloc(block, dataSize + METAENTRYSIZE + len + 1);
+ // shift right to make room for new meta entry
+ memmove(block + dataStart + METAENTRYSIZE, block + dataStart, dataSize - dataStart);
+
+ for (int loop = 0; loop < count; loop++) {
+ getMetaEntry(loop, &offset, &size);
+ if (offset) { // if not a deleted entry
+ offset += METAENTRYSIZE;
+ setMetaEntry(loop, offset, size);
+ }
+ }
+
+ offset = dataSize; // original dataSize before realloc
+ size = len + 1;
+ // add our text to the end
+ memcpy(block + offset + METAENTRYSIZE, entry, size);
+ // increment count
+ setCount(count + 1);
+ // add our meta entry
+ setMetaEntry(count, offset + METAENTRYSIZE, size);
+ // return index of our new entry
+ return count;
+}
+
+
+const char *EntriesBlock::getEntry(int entryIndex) {
+ unsigned long offset;
+ unsigned long size;
+ static const char *empty = "";
+
+ getMetaEntry(entryIndex, &offset, &size);
+ return (offset) ? block+offset : empty;
+}
+
+
+unsigned long EntriesBlock::getEntrySize(int entryIndex) {
+ unsigned long offset;
+ unsigned long size;
+
+ getMetaEntry(entryIndex, &offset, &size);
+ return (offset) ? size : 0;
+}
+
+
+void EntriesBlock::removeEntry(int entryIndex) {
+ unsigned long offset;
+ unsigned long size, size2;
+ unsigned long dataSize;
+ getRawData(&dataSize);
+ getMetaEntry(entryIndex, &offset, &size);
+ int count = getCount();
+
+ if (!offset) // already deleted
+ return;
+
+ // shift left to retrieve space used for old entry
+ memmove(block + offset, block + offset + size, dataSize - (offset + size));
+
+ // fix offset for all entries after our entry that were shifted left
+ for (int loop = entryIndex + 1; loop < count; loop++) {
+ getMetaEntry(loop, &offset, &size2);
+ if (offset) { // if not a deleted entry
+ offset -= size;
+ setMetaEntry(loop, offset, size2);
+ }
+ }
+
+ // zero out our meta entry
+ setMetaEntry(entryIndex, 0L, 0);
+}
+
+
+SWORD_NAMESPACE_END
diff --git a/src/modules/common/lzsscomprs.cpp b/src/modules/common/lzsscomprs.cpp
new file mode 100644
index 0000000..bd8f768
--- /dev/null
+++ b/src/modules/common/lzsscomprs.cpp
@@ -0,0 +1,668 @@
+/******************************************************************************
+ * lzsscomprs.cpp - code for class 'LZSSCompress'- a driver class that
+ * provides LZSS compression
+ */
+
+#include <stdlib.h>
+#include <string.h>
+#include <lzsscomprs.h>
+
+SWORD_NAMESPACE_START
+
+/******************************************************************************
+ * LZSSCompress Statics
+ */
+
+// m_ring_buffer is a text buffer. It contains "nodes" of
+// uncompressed text that can be indexed by position. That is,
+// a substring of the ring buffer can be indexed by a position
+// and a length. When decoding, the compressed text may contain
+// a position in the ring buffer and a count of the number of
+// bytes from the ring buffer that are to be moved into the
+// uncompressed buffer.
+//
+// This ring buffer is not maintained as part of the compressed
+// text. Instead, it is reconstructed dynamically. That is,
+// it starts out empty and gets built as the text is decompressed.
+//
+// The ring buffer contain N bytes, with an additional F - 1 bytes
+// to facilitate string comparison.
+
+unsigned char LZSSCompress::m_ring_buffer[N + F - 1];
+
+// m_match_position and m_match_length are set by InsertNode().
+//
+// These variables indicate the position in the ring buffer
+// and the number of characters at that position that match
+// a given string.
+
+short int LZSSCompress::m_match_position;
+short int LZSSCompress::m_match_length;
+
+// m_lson, m_rson, and m_dad are the Japanese way of referring to
+// a tree structure. The dad is the parent and it has a right and
+// left son (child).
+//
+// For i = 0 to N-1, m_rson[i] and m_lson[i] will be the right
+// and left children of node i.
+//
+// For i = 0 to N-1, m_dad[i] is the parent of node i.
+//
+// For i = 0 to 255, rson[N + i + 1] is the root of the tree for
+// strings that begin with the character i. Note that this requires
+// one byte characters.
+//
+// These nodes store values of 0...(N-1). Memory requirements
+// can be reduces by using 2-byte integers instead of full 4-byte
+// integers (for 32-bit applications). Therefore, these are
+// defined as "short ints."
+
+short int LZSSCompress::m_lson[N + 1];
+short int LZSSCompress::m_rson[N + 257];
+short int LZSSCompress::m_dad[N + 1];
+
+
+/******************************************************************************
+ * LZSSCompress Constructor - Initializes data for instance of LZSSCompress
+ *
+ */
+
+LZSSCompress::LZSSCompress() : SWCompress() {
+}
+
+
+/******************************************************************************
+ * LZSSCompress Destructor - Cleans up instance of LZSSCompress
+ */
+
+LZSSCompress::~LZSSCompress() {
+}
+
+
+/******************************************************************************
+ * LZSSCompress::InitTree - This function initializes the tree nodes to
+ * "empty" states.
+ */
+
+void LZSSCompress::InitTree(void) {
+ int i;
+
+ // For i = 0 to N - 1, m_rson[i] and m_lson[i] will be the right
+ // and left children of node i. These nodes need not be
+ // initialized. However, for debugging purposes, it is nice to
+ // have them initialized. Since this is only used for compression
+ // (not decompression), I don't mind spending the time to do it.
+ //
+ // For the same range of i, m_dad[i] is the parent of node i.
+ // These are initialized to a known value that can represent
+ // a "not used" state.
+
+ for (i = 0; i < N; i++) {
+ m_lson[i] = NOT_USED;
+ m_rson[i] = NOT_USED;
+ m_dad[i] = NOT_USED;
+ }
+
+ // For i = 0 to 255, m_rson[N + i + 1] is the root of the tree
+ // for strings that begin with the character i. This is why
+ // the right child array is larger than the left child array.
+ // These are also initialzied to a "not used" state.
+ //
+ // Note that there are 256 of these, one for each of the possible
+ // 256 characters.
+
+ for (i = N + 1; i <= (N + 256); i++) {
+ m_rson[i] = NOT_USED;
+ }
+}
+
+
+/******************************************************************************
+ * LZSSCompress::InsertNode - This function inserts a string from the ring
+ * buffer into one of the trees. It loads the
+ * match position and length member variables
+ * for the longest match.
+ *
+ * The string to be inserted is identified by
+ * the parameter Pos, A full F bytes are
+ * inserted. So,
+ * m_ring_buffer[Pos ... Pos+F-1]
+ * are inserted.
+ *
+ * If the matched length is exactly F, then an
+ * old node is removed in favor of the new one
+ * (because the old one will be deleted
+ * sooner).
+ *
+ * Note that Pos plays a dual role. It is
+ * used as both a position in the ring buffer
+ * and also as a tree node.
+ * m_ring_buffer[Pos] defines a character that
+ * is used to identify a tree node.
+ *
+ * ENT: pos - position in the buffer
+ */
+
+void LZSSCompress::InsertNode(short int Pos)
+{
+ short int i;
+ short int p;
+ int cmp;
+ unsigned char * key;
+
+/*
+ ASSERT(Pos >= 0);
+ ASSERT(Pos < N);
+*/
+
+ cmp = 1;
+ key = &(m_ring_buffer[Pos]);
+
+ // The last 256 entries in m_rson contain the root nodes for
+ // strings that begin with a letter. Get an index for the
+ // first letter in this string.
+
+ p = (short int) (N + 1 + key[0]);
+
+ // Set the left and right tree nodes for this position to "not
+ // used."
+
+ m_lson[Pos] = NOT_USED;
+ m_rson[Pos] = NOT_USED;
+
+ // Haven't matched anything yet.
+
+ m_match_length = 0;
+
+ for ( ; ; ) {
+ if (cmp >= 0) {
+ if (m_rson[p] != NOT_USED) {
+ p = m_rson[p];
+ }
+ else {
+ m_rson[p] = Pos;
+ m_dad[Pos] = p;
+ return;
+ }
+ }
+ else {
+ if (m_lson[p] != NOT_USED) {
+ p = m_lson[p];
+ }
+ else {
+ m_lson[p] = Pos;
+ m_dad[Pos] = p;
+ return;
+ }
+ }
+
+ // Should we go to the right or the left to look for the
+ // next match?
+
+ for (i = 1; i < F; i++) {
+ cmp = key[i] - m_ring_buffer[p + i];
+ if (cmp != 0)
+ break;
+ }
+
+ if (i > m_match_length) {
+ m_match_position = p;
+ m_match_length = i;
+
+ if (i >= F)
+ break;
+ }
+ }
+
+ m_dad[Pos] = m_dad[p];
+ m_lson[Pos] = m_lson[p];
+ m_rson[Pos] = m_rson[p];
+
+ m_dad[ m_lson[p] ] = Pos;
+ m_dad[ m_rson[p] ] = Pos;
+
+ if (m_rson[ m_dad[p] ] == p) {
+ m_rson[ m_dad[p] ] = Pos;
+ }
+ else {
+ m_lson[ m_dad[p] ] = Pos;
+ }
+
+ // Remove "p"
+
+ m_dad[p] = NOT_USED;
+}
+
+
+/******************************************************************************
+ * LZSSCompress::DeleteNode - This function removes the node "Node" from the
+ * tree.
+ *
+ * ENT: node - node to be removed
+ */
+
+void LZSSCompress::DeleteNode(short int Node)
+{
+ short int q;
+
+/*
+ ASSERT(Node >= 0);
+ ASSERT(Node < (N+1));
+*/
+
+ if (m_dad[Node] == NOT_USED) { // not in tree, nothing to do
+ return;
+ }
+
+ if (m_rson[Node] == NOT_USED) {
+ q = m_lson[Node];
+ }
+ else if (m_lson[Node] == NOT_USED) {
+ q = m_rson[Node];
+ }
+ else {
+ q = m_lson[Node];
+ if (m_rson[q] != NOT_USED) {
+ do {
+ q = m_rson[q];
+ } while (m_rson[q] != NOT_USED);
+
+ m_rson[ m_dad[q] ] = m_lson[q];
+ m_dad[ m_lson[q] ] = m_dad[q];
+ m_lson[q] = m_lson[Node];
+ m_dad[ m_lson[Node] ] = q;
+ }
+
+ m_rson[q] = m_rson[Node];
+ m_dad[ m_rson[Node] ] = q;
+ }
+
+ m_dad[q] = m_dad[Node];
+
+ if (m_rson[ m_dad[Node] ] == Node) {
+ m_rson[ m_dad[Node] ] = q;
+ }
+ else {
+ m_lson[ m_dad[Node] ] = q;
+ }
+
+ m_dad[Node] = NOT_USED;
+}
+
+
+/******************************************************************************
+ * LZSSCompress::Encode - This function "encodes" the input stream into the
+ * output stream.
+ * The GetChars() and SendChars() functions are
+ * used to separate this method from the actual
+ * i/o.
+ * NOTE: must set zlen for parent class to know length of
+ * compressed buffer.
+ */
+
+void LZSSCompress::Encode(void)
+{
+ short int i; // an iterator
+ short int r; // node number in the binary tree
+ short int s; // position in the ring buffer
+ unsigned short int len; // len of initial string
+ short int last_match_length; // length of last match
+ short int code_buf_pos; // position in the output buffer
+ unsigned char code_buf[17]; // the output buffer
+ unsigned char mask; // bit mask for byte 0 of out buf
+ unsigned char c; // character read from string
+
+ // Start with a clean tree.
+
+ InitTree();
+ direct = 0; // set direction needed by parent [Get|Send]Chars()
+
+ // code_buf[0] works as eight flags. A "1" represents that the
+ // unit is an unencoded letter (1 byte), and a "0" represents
+ // that the next unit is a <position,length> pair (2 bytes).
+ //
+ // code_buf[1..16] stores eight units of code. Since the best
+ // we can do is store eight <position,length> pairs, at most 16
+ // bytes are needed to store this.
+ //
+ // This is why the maximum size of the code buffer is 17 bytes.
+
+ code_buf[0] = 0;
+ code_buf_pos = 1;
+
+ // Mask iterates over the 8 bits in the code buffer. The first
+ // character ends up being stored in the low bit.
+ //
+ // bit 8 7 6 5 4 3 2 1
+ // | |
+ // | first sequence in code buffer
+ // |
+ // last sequence in code buffer
+
+ mask = 1;
+
+ s = 0;
+ r = (short int) N - (short int) F;
+
+ // Initialize the ring buffer with spaces...
+
+ // Note that the last F bytes of the ring buffer are not filled.
+ // This is because those F bytes will be filled in immediately
+ // with bytes from the input stream.
+
+ memset(m_ring_buffer, ' ', N - F);
+
+ // Read F bytes into the last F bytes of the ring buffer.
+ //
+ // This function loads the buffer with X characters and returns
+ // the actual amount loaded.
+
+ len = GetChars((char *) &(m_ring_buffer[r]), F);
+
+ // Make sure there is something to be compressed.
+
+ if (len == 0)
+ return;
+
+ // Insert the F strings, each of which begins with one or more
+ // 'space' characters. Note the order in which these strings
+ // are inserted. This way, degenerate trees will be less likely
+ // to occur.
+
+ for (i = 1; i <= F; i++) {
+ InsertNode((short int) (r - i));
+ }
+
+ // Finally, insert the whole string just read. The
+ // member variables match_length and match_position are set.
+
+ InsertNode(r);
+
+ // Now that we're preloaded, continue till done.
+
+ do {
+
+ // m_match_length may be spuriously long near the end of
+ // text.
+
+ if (m_match_length > len) {
+ m_match_length = len;
+ }
+
+ // Is it cheaper to store this as a single character? If so,
+ // make it so.
+
+ if (m_match_length < THRESHOLD) {
+ // Send one character. Remember that code_buf[0] is the
+ // set of flags for the next eight items.
+
+ m_match_length = 1;
+ code_buf[0] |= mask;
+ code_buf[code_buf_pos++] = m_ring_buffer[r];
+ }
+
+ // Otherwise, we do indeed have a string that can be stored
+ // compressed to save space.
+
+ else {
+ // The next 16 bits need to contain the position (12 bits)
+ // and the length (4 bits).
+
+ code_buf[code_buf_pos++] = (unsigned char) m_match_position;
+ code_buf[code_buf_pos++] = (unsigned char) (
+ ((m_match_position >> 4) & 0xf0) |
+ (m_match_length - THRESHOLD) );
+ }
+
+ // Shift the mask one bit to the left so that it will be ready
+ // to store the new bit.
+
+ mask = (unsigned char) (mask << 1);
+
+ // If the mask is now 0, then we know that we have a full set
+ // of flags and items in the code buffer. These need to be
+ // output.
+
+ if (!mask) {
+ // code_buf is the buffer of characters to be output.
+ // code_buf_pos is the number of characters it contains.
+
+ SendChars((char *) code_buf, code_buf_pos);
+
+ // Reset for next buffer...
+
+ code_buf[0] = 0;
+ code_buf_pos = 1;
+ mask = 1;
+ }
+
+ last_match_length = m_match_length;
+
+ // Delete old strings and read new bytes...
+
+ for (i = 0; i < last_match_length; i++) {
+ // Get next character...
+
+ if (GetChars((char *) &c, 1) != 1)
+ break;
+
+ // Delete "old strings"
+
+ DeleteNode(s);
+
+ // Put this character into the ring buffer.
+ //
+ // The original comment here says "If the position is near
+ // the end of the buffer, extend the buffer to make
+ // string comparison easier."
+ //
+ // That's a little misleading, because the "end" of the
+ // buffer is really what we consider to be the "beginning"
+ // of the buffer, that is, positions 0 through F.
+ //
+ // The idea is that the front end of the buffer is duplicated
+ // into the back end so that when you're looking at characters
+ // at the back end of the buffer, you can index ahead (beyond
+ // the normal end of the buffer) and see the characters
+ // that are at the front end of the buffer wihtout having
+ // to adjust the index.
+ //
+ // That is...
+ //
+ // 1234xxxxxxxxxxxxxxxxxxxxxxxxxxxxx1234
+ // | | |
+ // position 0 end of buffer |
+ // |
+ // duplicate of front of buffer
+
+ m_ring_buffer[s] = c;
+
+ if (s < F - 1) {
+ m_ring_buffer[s + N] = c;
+ }
+
+ // Increment the position, and wrap around when we're at
+ // the end. Note that this relies on N being a power of 2.
+
+ s = (short int) ( (s + 1) & (N - 1) );
+ r = (short int) ( (r + 1) & (N - 1) );
+
+ // Register the string that is found in
+ // m_ring_buffer[r..r+F-1].
+
+ InsertNode(r);
+ }
+
+ // If we didn't quit because we hit the last_match_length,
+ // then we must have quit because we ran out of characters
+ // to process.
+
+ while (i++ < last_match_length) {
+ DeleteNode(s);
+
+ s = (short int) ( (s + 1) & (N - 1) );
+ r = (short int) ( (r + 1) & (N - 1) );
+
+ // Note that len hitting 0 is the key that causes the
+ // do...while() to terminate. This is the only place
+ // within the loop that len is modified.
+ //
+ // Its original value is F (or a number less than F for
+ // short strings).
+
+ if (--len) {
+ InsertNode(r); /* buffer may not be empty. */
+ }
+ }
+
+ // End of do...while() loop. Continue processing until there
+ // are no more characters to be compressed. The variable
+ // "len" is used to signal this condition.
+ } while (len > 0);
+
+ // There could still be something in the output buffer. Send it
+ // now.
+
+ if (code_buf_pos > 1) {
+ // code_buf is the encoded string to send.
+ // code_buf_ptr is the number of characters.
+
+ SendChars((char *) code_buf, code_buf_pos);
+ }
+
+
+ // must set zlen for parent class to know length of compressed buffer
+ zlen = zpos;
+}
+
+
+/******************************************************************************
+ * LZSSCompress::Decode - This function "decodes" the input stream into the
+ * output stream.
+ * The GetChars() and SendChars() functions are
+ * used to separate this method from the actual
+ * i/o.
+ */
+
+void LZSSCompress::Decode(void)
+{
+ int k;
+ int r; // node number
+ unsigned char c[F]; // an array of chars
+ unsigned char flags; // 8 bits of flags
+ int flag_count; // which flag we're on
+ short int pos; // position in the ring buffer
+ short int len; // number of chars in ring buffer
+ unsigned long totalLen = 0;
+
+ direct = 1; // set direction needed by parent [Get|Send]Chars()
+
+ // Initialize the ring buffer with a common string.
+ //
+ // Note that the last F bytes of the ring buffer are not filled.
+
+ memset(m_ring_buffer, ' ', N - F);
+
+ r = N - F;
+
+ flags = (char) 0;
+ flag_count = 0;
+
+ for ( ; ; ) {
+
+ // If there are more bits of interest in this flag, then
+ // shift that next interesting bit into the 1's position.
+ //
+ // If this flag has been exhausted, the next byte must
+ // be a flag.
+
+ if (flag_count > 0) {
+ flags = (unsigned char) (flags >> 1);
+ flag_count--;
+ }
+ else {
+ // Next byte must be a flag.
+
+ if (GetChars((char *) &flags, 1) != 1)
+ break;
+
+ // Set the flag counter. While at first it might appear
+ // that this should be an 8 since there are 8 bits in the
+ // flag, it should really be a 7 because the shift must
+ // be performed 7 times in order to see all 8 bits.
+
+ flag_count = 7;
+ }
+
+ // If the low order bit of the flag is now set, then we know
+ // that the next byte is a single, unencoded character.
+
+ if (flags & 1) {
+ if (GetChars((char *) c, 1) != 1)
+ break;
+
+ if (SendChars((char *) c, 1) != 1) {
+ totalLen++;
+ break;
+ }
+
+ // Add to buffer, and increment to next spot. Wrap at end.
+
+ m_ring_buffer[r] = c[0];
+ r = (short int) ( (r + 1) & (N - 1) );
+ }
+
+ // Otherwise, we know that the next two bytes are a
+ // <position,length> pair. The position is in 12 bits and
+ // the length is in 4 bits.
+
+ else {
+ // Original code:
+ // if ((i = getc(infile)) == EOF)
+ // break;
+ // if ((j = getc(infile)) == EOF)
+ // break;
+ // i |= ((j & 0xf0) << 4);
+ // j = (j & 0x0f) + THRESHOLD;
+ //
+ // I've modified this to only make one input call, and
+ // have changed the variable names to something more
+ // obvious.
+
+ if (GetChars((char *) c, 2) != 2)
+ break;
+
+ // Convert these two characters into the position and
+ // length. Note that the length is always at least
+ // THRESHOLD, which is why we're able to get a length
+ // of 18 out of only 4 bits.
+
+ pos = (short int) ( c[0] | ((c[1] & 0xf0) << 4) );
+
+ len = (short int) ( (c[1] & 0x0f) + THRESHOLD );
+
+ // There are now "len" characters at position "pos" in
+ // the ring buffer that can be pulled out. Note that
+ // len is never more than F.
+
+ for (k = 0; k < len; k++) {
+ c[k] = m_ring_buffer[(pos + k) & (N - 1)];
+
+ // Add to buffer, and increment to next spot. Wrap at end.
+
+ m_ring_buffer[r] = c[k];
+ r = (short int) ( (r + 1) & (N - 1) );
+ }
+
+ // Add the "len" :characters to the output stream.
+
+ if (SendChars((char *) c, len) != (unsigned int)len) {
+ totalLen += len;
+ break;
+ }
+ }
+ }
+ slen = totalLen;
+}
+
+SWORD_NAMESPACE_END
diff --git a/src/modules/common/rawstr.cpp b/src/modules/common/rawstr.cpp
new file mode 100644
index 0000000..d2da1e9
--- /dev/null
+++ b/src/modules/common/rawstr.cpp
@@ -0,0 +1,565 @@
+/******************************************************************************
+ * rawstr.cpp - code for class 'RawStr'- a module that reads raw text
+ * files: ot and nt using indexs ??.bks ??.cps ??.vss
+ * and provides lookup and parsing functions based on
+ * class StrKey
+ */
+
+
+#include <stdio.h>
+#include <fcntl.h>
+#include <errno.h>
+
+#include <stdlib.h>
+#include <utilstr.h>
+#include <rawstr.h>
+#include <sysdata.h>
+#include <swlog.h>
+#include <filemgr.h>
+#include <swbuf.h>
+#include <stringmgr.h>
+
+SWORD_NAMESPACE_START
+
+/******************************************************************************
+ * RawStr Statics
+ */
+
+int RawStr::instance = 0;
+char RawStr::nl = '\n';
+
+
+/******************************************************************************
+ * RawStr Constructor - Initializes data for instance of RawStr
+ *
+ * ENT: ipath - path of the directory where data and index files are located.
+ * be sure to include the trailing separator (e.g. '/' or '\')
+ * (e.g. 'modules/texts/rawtext/webster/')
+ */
+
+RawStr::RawStr(const char *ipath, int fileMode)
+{
+ SWBuf buf;
+
+ lastoff = -1;
+ path = 0;
+ stdstr(&path, ipath);
+
+ if (fileMode == -1) { // try read/write if possible
+ fileMode = FileMgr::RDWR;
+ }
+
+ buf.setFormatted("%s.idx", path);
+ idxfd = FileMgr::getSystemFileMgr()->open(buf, fileMode, true);
+
+ buf.setFormatted("%s.dat", path);
+ datfd = FileMgr::getSystemFileMgr()->open(buf, fileMode, true);
+
+ if (datfd < 0) {
+ SWLog::getSystemLog()->logError("%d", errno);
+ }
+
+ instance++;
+}
+
+
+/******************************************************************************
+ * RawStr Destructor - Cleans up instance of RawStr
+ */
+
+RawStr::~RawStr()
+{
+ if (path)
+ delete [] path;
+
+ --instance;
+
+ FileMgr::getSystemFileMgr()->close(idxfd);
+ FileMgr::getSystemFileMgr()->close(datfd);
+}
+
+
+/******************************************************************************
+ * RawStr::getidxbufdat - Gets the index string at the given idx offset
+ * NOTE: buf is allocated and must be freed by
+ * calling function
+ *
+ * ENT: ioffset - offset in dat file to lookup
+ * buf - address of pointer to allocate for storage of string
+ */
+
+void RawStr::getIDXBufDat(long ioffset, char **buf)
+{
+ int size;
+ char ch;
+ if (datfd > 0) {
+ datfd->seek(ioffset, SEEK_SET);
+ for (size = 0; datfd->read(&ch, 1) == 1; size++) {
+ if ((ch == '\\') || (ch == 10) || (ch == 13))
+ break;
+ }
+ *buf = (*buf) ? (char *)realloc(*buf, size*2 + 1) : (char *)malloc(size*2 + 1);
+ if (size) {
+ datfd->seek(ioffset, SEEK_SET);
+ datfd->read(*buf, size);
+ }
+ (*buf)[size] = 0;
+ toupperstr_utf8(*buf, size*2);
+ }
+ else {
+ *buf = (*buf) ? (char *)realloc(*buf, 1) : (char *)malloc(1);
+ **buf = 0;
+ }
+}
+
+
+/******************************************************************************
+ * RawStr::getidxbuf - Gets the index string at the given idx offset
+ * NOTE: buf is allocated and must be freed by
+ * calling function
+ *
+ * ENT: ioffset - offset in idx file to lookup
+ * buf - address of pointer to allocate for storage of string
+ */
+
+void RawStr::getIDXBuf(long ioffset, char **buf)
+{
+ long offset;
+
+ if (idxfd > 0) {
+ idxfd->seek(ioffset, SEEK_SET);
+ idxfd->read(&offset, 4);
+
+ offset = swordtoarch32(offset);
+
+ getIDXBufDat(offset, buf);
+ }
+}
+
+
+/******************************************************************************
+ * RawStr::findoffset - Finds the offset of the key string from the indexes
+ *
+ * ENT: key - key string to lookup
+ * start - address to store the starting offset
+ * size - address to store the size of the entry
+ * away - number of entries before of after to jump
+ * (default = 0)
+ *
+ * RET: error status -1 general error; -2 new file
+ */
+
+signed char RawStr::findOffset(const char *ikey, long *start, unsigned short *size, long away, long *idxoff)
+{
+ char *trybuf, *maxbuf, *key = 0, quitflag = 0;
+ signed char retval = -1;
+ long headoff, tailoff, tryoff = 0, maxoff = 0;
+ int diff = 0;
+
+ if (idxfd->getFd() >=0) {
+ tailoff = maxoff = idxfd->seek(0, SEEK_END) - 6;
+ retval = (tailoff >= 0) ? 0 : -2; // if NOT new file
+ if (*ikey) {
+ headoff = 0;
+
+ stdstr(&key, ikey, 3);
+ toupperstr_utf8(key, strlen(key)*3);
+
+ int keylen = strlen(key);
+ bool substr = false;
+
+ trybuf = maxbuf = 0;
+ getIDXBuf(maxoff, &maxbuf);
+
+ while (headoff < tailoff) {
+ tryoff = (lastoff == -1) ? headoff + ((((tailoff / 6) - (headoff / 6))) / 2) * 6 : lastoff;
+ lastoff = -1;
+ getIDXBuf(tryoff, &trybuf);
+
+ if (!*trybuf && tryoff) { // In case of extra entry at end of idx (not first entry)
+ tryoff += (tryoff > (maxoff / 2))?-6:6;
+ retval = -1;
+ break;
+ }
+
+ diff = strcmp(key, trybuf);
+
+ if (!diff)
+ break;
+
+ if (!strncmp(trybuf, key, keylen)) substr = true;
+
+ if (diff < 0)
+ tailoff = (tryoff == headoff) ? headoff : tryoff;
+ else headoff = tryoff;
+
+ if (tailoff == headoff + 6) {
+ if (quitflag++)
+ headoff = tailoff;
+ }
+ }
+
+ // didn't find exact match
+ if (headoff >= tailoff) {
+ tryoff = headoff;
+ if (!substr && ((tryoff != maxoff)||(strncmp(key, maxbuf, keylen)<0))) {
+ away--; // if our entry doesn't startwith our key, prefer the previous entry over the next
+ }
+ }
+ if (trybuf)
+ free(trybuf);
+ delete [] key;
+ if (maxbuf)
+ free(maxbuf);
+ }
+ else tryoff = 0;
+
+ idxfd->seek(tryoff, SEEK_SET);
+
+ *start = *size = 0;
+ idxfd->read(start, 4);
+ idxfd->read(size, 2);
+ if (idxoff)
+ *idxoff = tryoff;
+
+ *start = swordtoarch32(*start);
+ *size = swordtoarch16(*size);
+
+ while (away) {
+ long laststart = *start;
+ unsigned short lastsize = *size;
+ long lasttry = tryoff;
+ tryoff += (away > 0) ? 6 : -6;
+
+ bool bad = false;
+ if (((tryoff + (away*6)) < -6) || (tryoff + (away*6) > (maxoff+6)))
+ bad = true;
+ else if (idxfd->seek(tryoff, SEEK_SET) < 0)
+ bad = true;
+ if (bad) {
+ retval = -1;
+ *start = laststart;
+ *size = lastsize;
+ tryoff = lasttry;
+ if (idxoff)
+ *idxoff = tryoff;
+ break;
+ }
+ idxfd->read(start, 4);
+ idxfd->read(size, 2);
+ if (idxoff)
+ *idxoff = tryoff;
+
+ *start = swordtoarch32(*start);
+ *size = swordtoarch16(*size);
+
+ if (((laststart != *start) || (lastsize != *size)) && (*start >= 0) && (*size))
+ away += (away < 0) ? 1 : -1;
+ }
+
+ lastoff = tryoff;
+ }
+ else {
+ *start = 0;
+ *size = 0;
+ if (idxoff)
+ *idxoff = 0;
+ retval = -1;
+ }
+ return retval;
+}
+
+
+/******************************************************************************
+ * RawStr::preptext - Prepares the text before returning it to external
+ * objects
+ *
+ * ENT: buf - buffer where text is stored and where to store the prep'd
+ * text.
+ */
+
+void RawStr::prepText(SWBuf &buf) {
+ unsigned int to, from;
+ char space = 0, cr = 0, realdata = 0, nlcnt = 0;
+ char *rawBuf = buf.getRawData();
+ for (to = from = 0; rawBuf[from]; from++) {
+ switch (rawBuf[from]) {
+ case 10:
+ if (!realdata)
+ continue;
+ space = (cr) ? 0 : 1;
+ cr = 0;
+ nlcnt++;
+ if (nlcnt > 1) {
+// *to++ = nl;
+ rawBuf[to++] = 10;
+// *to++ = nl[1];
+// nlcnt = 0;
+ }
+ continue;
+ case 13:
+ if (!realdata)
+ continue;
+// *to++ = nl[0];
+ rawBuf[to++] = 10;
+ space = 0;
+ cr = 1;
+ continue;
+ }
+ realdata = 1;
+ nlcnt = 0;
+ if (space) {
+ space = 0;
+ if (rawBuf[from] != ' ') {
+ rawBuf[to++] = ' ';
+ from--;
+ continue;
+ }
+ }
+ rawBuf[to++] = rawBuf[from];
+ }
+ buf.setSize(to);
+
+ while (to > 1) { // remove trailing excess
+ to--;
+ if ((rawBuf[to] == 10) || (rawBuf[to] == ' '))
+ buf.setSize(to);
+ else break;
+ }
+}
+
+
+/******************************************************************************
+ * RawStr::readtext - gets text at a given offset
+ *
+ * ENT:
+ * start - starting offset where the text is located in the file
+ * size - size of text entry
+ * buf - buffer to store text
+ *
+ */
+
+void RawStr::readText(long istart, unsigned short *isize, char **idxbuf, SWBuf &buf)
+{
+ unsigned int ch;
+ char *idxbuflocal = 0;
+ getIDXBufDat(istart, &idxbuflocal);
+ long start = istart;
+
+ do {
+ if (*idxbuf)
+ delete [] *idxbuf;
+
+ buf = "";
+ buf.setFillByte(0);
+ buf.setSize(++(*isize));
+
+ *idxbuf = new char [ (*isize) ];
+
+ datfd->seek(start, SEEK_SET);
+ datfd->read(buf.getRawData(), (int)((*isize) - 1));
+
+ for (ch = 0; buf[ch]; ch++) { // skip over index string
+ if (buf[ch] == 10) {
+ ch++;
+ break;
+ }
+ }
+ buf = SWBuf(buf.c_str()+ch);
+ // resolve link
+ if (!strncmp(buf.c_str(), "@LINK", 5)) {
+ for (ch = 0; buf[ch]; ch++) { // null before nl
+ if (buf[ch] == 10) {
+ buf[ch] = 0;
+ break;
+ }
+ }
+ findOffset(buf.c_str() + 6, &start, isize);
+ }
+ else break;
+ }
+ while (true); // while we're resolving links
+
+ if (idxbuflocal) {
+ int localsize = strlen(idxbuflocal);
+ localsize = (localsize < (*isize - 1)) ? localsize : (*isize - 1);
+ strncpy(*idxbuf, idxbuflocal, localsize);
+ (*idxbuf)[localsize] = 0;
+ free(idxbuflocal);
+ }
+}
+
+
+/******************************************************************************
+ * RawLD::settext - Sets text for current offset
+ *
+ * ENT: key - key for this entry
+ * buf - buffer to store
+ * len - length of buffer (0 - null terminated)
+ */
+
+void RawStr::doSetText(const char *ikey, const char *buf, long len)
+{
+
+ long start, outstart;
+ long idxoff;
+ long endoff;
+ long shiftSize;
+ unsigned short size;
+ unsigned short outsize;
+ static const char nl[] = {13, 10};
+ char *tmpbuf = 0;
+ char *key = 0;
+ char *dbKey = 0;
+ char *idxBytes = 0;
+ char *outbuf = 0;
+ char *ch = 0;
+
+ char errorStatus = findOffset(ikey, &start, &size, 0, &idxoff);
+ stdstr(&key, ikey, 2);
+ toupperstr_utf8(key, strlen(key)*2);
+
+ len = (len < 0) ? strlen(buf) : len;
+
+ getIDXBufDat(start, &dbKey);
+
+ if (strcmp(key, dbKey) < 0) {
+ }
+ else if (strcmp(key, dbKey) > 0) {
+ if (errorStatus != (char)-2) // not a new file
+ idxoff += 6;
+ else idxoff = 0;
+ }
+ else if ((!strcmp(key, dbKey)) && (len>0 /*we're not deleting*/)) { // got absolute entry
+ do {
+ tmpbuf = new char [ size + 2 ];
+ memset(tmpbuf, 0, size + 2);
+ datfd->seek(start, SEEK_SET);
+ datfd->read(tmpbuf, (int)(size - 1));
+
+ for (ch = tmpbuf; *ch; ch++) { // skip over index string
+ if (*ch == 10) {
+ ch++;
+ break;
+ }
+ }
+ memmove(tmpbuf, ch, size - (unsigned short)(ch-tmpbuf));
+
+ // resolve link
+ if (!strncmp(tmpbuf, "@LINK", 5) && (len)) {
+ for (ch = tmpbuf; *ch; ch++) { // null before nl
+ if (*ch == 10) {
+ *ch = 0;
+ break;
+ }
+ }
+ findOffset(tmpbuf + 6, &start, &size, 0, &idxoff);
+ }
+ else break;
+ }
+ while (true); // while we're resolving links
+ }
+
+ endoff = idxfd->seek(0, SEEK_END);
+
+ shiftSize = endoff - idxoff;
+
+ if (shiftSize > 0) {
+ idxBytes = new char [ shiftSize ];
+ idxfd->seek(idxoff, SEEK_SET);
+ idxfd->read(idxBytes, shiftSize);
+ }
+
+ outbuf = new char [ len + strlen(key) + 5 ];
+ sprintf(outbuf, "%s%c%c", key, 13, 10);
+ size = strlen(outbuf);
+ memcpy(outbuf + size, buf, len);
+ size = outsize = size + (len);
+
+ start = outstart = datfd->seek(0, SEEK_END);
+
+ outstart = archtosword32(start);
+ outsize = archtosword16(size);
+
+ idxfd->seek(idxoff, SEEK_SET);
+ if (len > 0) {
+ datfd->seek(start, SEEK_SET);
+ datfd->write(outbuf, (int)size);
+
+ // add a new line to make data file easier to read in an editor
+ datfd->write(&nl, 2);
+
+ idxfd->write(&outstart, 4);
+ idxfd->write(&outsize, 2);
+ if (idxBytes) {
+ idxfd->write(idxBytes, shiftSize);
+ delete [] idxBytes;
+ }
+ }
+ else { // delete entry
+ if (idxBytes) {
+ idxfd->write(idxBytes+6, shiftSize-6);
+ idxfd->seek(-1, SEEK_CUR); // last valid byte
+ FileMgr::getSystemFileMgr()->trunc(idxfd); // truncate index
+ delete [] idxBytes;
+ }
+ }
+
+ delete [] key;
+ delete [] outbuf;
+ free(dbKey);
+}
+
+
+/******************************************************************************
+ * RawLD::linkentry - links one entry to another
+ *
+ * ENT: testmt - testament to find (0 - Bible/module introduction)
+ * destidxoff - dest offset into .vss
+ * srcidxoff - source offset into .vss
+ */
+
+void RawStr::doLinkEntry(const char *destkey, const char *srckey) {
+ char *text = new char [ strlen(destkey) + 7 ];
+ sprintf(text, "@LINK %s", destkey);
+ doSetText(srckey, text);
+ delete [] text;
+}
+
+
+/******************************************************************************
+ * RawLD::CreateModule - Creates new module files
+ *
+ * ENT: path - directory to store module files
+ * RET: error status
+ */
+
+signed char RawStr::createModule(const char *ipath)
+{
+ char *path = 0;
+ char *buf = new char [ strlen (ipath) + 20 ];
+ FileDesc *fd, *fd2;
+
+ stdstr(&path, ipath);
+
+ if ((path[strlen(path)-1] == '/') || (path[strlen(path)-1] == '\\'))
+ path[strlen(path)-1] = 0;
+
+ sprintf(buf, "%s.dat", path);
+ FileMgr::removeFile(buf);
+ fd = FileMgr::getSystemFileMgr()->open(buf, FileMgr::CREAT|FileMgr::WRONLY, FileMgr::IREAD|FileMgr::IWRITE);
+ fd->getFd();
+ FileMgr::getSystemFileMgr()->close(fd);
+
+ sprintf(buf, "%s.idx", path);
+ FileMgr::removeFile(buf);
+ fd2 = FileMgr::getSystemFileMgr()->open(buf, FileMgr::CREAT|FileMgr::WRONLY, FileMgr::IREAD|FileMgr::IWRITE);
+ fd2->getFd();
+ FileMgr::getSystemFileMgr()->close(fd2);
+
+ delete [] path;
+
+ return 0;
+}
+
+SWORD_NAMESPACE_END
diff --git a/src/modules/common/rawstr4.cpp b/src/modules/common/rawstr4.cpp
new file mode 100644
index 0000000..cbc8384
--- /dev/null
+++ b/src/modules/common/rawstr4.cpp
@@ -0,0 +1,572 @@
+/******************************************************************************
+ * rawstr.cpp - code for class 'RawStr'- a module that reads raw text
+ * files: ot and nt using indexs ??.bks ??.cps ??.vss
+ * and provides lookup and parsing functions based on
+ * class StrKey
+ */
+
+#include <stdio.h>
+#include <fcntl.h>
+#include <errno.h>
+#include <stdlib.h>
+#include <sys/types.h>
+
+#include <utilstr.h>
+#include <rawstr4.h>
+#include <sysdata.h>
+#include <swlog.h>
+#include <filemgr.h>
+#include <swbuf.h>
+#include <stringmgr.h>
+
+SWORD_NAMESPACE_START
+
+/******************************************************************************
+ * RawStr Statics
+ */
+
+int RawStr4::instance = 0;
+
+
+/******************************************************************************
+ * RawStr Constructor - Initializes data for instance of RawStr
+ *
+ * ENT: ipath - path of the directory where data and index files are located.
+ * be sure to include the trailing separator (e.g. '/' or '\')
+ * (e.g. 'modules/texts/rawtext/webster/')
+ */
+
+RawStr4::RawStr4(const char *ipath, int fileMode)
+{
+ SWBuf buf;
+
+ nl = '\n';
+ lastoff = -1;
+ path = 0;
+ stdstr(&path, ipath);
+
+ if (fileMode == -1) { // try read/write if possible
+ fileMode = FileMgr::RDWR;
+ }
+
+ buf.setFormatted("%s.idx", path);
+ idxfd = FileMgr::getSystemFileMgr()->open(buf, fileMode, true);
+
+ buf.setFormatted("%s.dat", path);
+ datfd = FileMgr::getSystemFileMgr()->open(buf, fileMode, true);
+
+ if (datfd < 0) {
+ SWLog::getSystemLog()->logError("%d", errno);
+ }
+
+ instance++;
+}
+
+
+/******************************************************************************
+ * RawStr Destructor - Cleans up instance of RawStr
+ */
+
+RawStr4::~RawStr4()
+{
+ if (path)
+ delete [] path;
+
+ --instance;
+
+ FileMgr::getSystemFileMgr()->close(idxfd);
+ FileMgr::getSystemFileMgr()->close(datfd);
+}
+
+
+/******************************************************************************
+ * RawStr4::getidxbufdat - Gets the index string at the given idx offset
+ * NOTE: buf is allocated and must be freed by
+ * calling function
+ *
+ * ENT: ioffset - offset in dat file to lookup
+ * buf - address of pointer to allocate for storage of string
+ */
+
+void RawStr4::getIDXBufDat(long ioffset, char **buf) {
+ int size;
+ char ch;
+ if (datfd > 0) {
+ datfd->seek(ioffset, SEEK_SET);
+ for (size = 0; datfd->read(&ch, 1) == 1; size++) {
+ if ((ch == '\\') || (ch == 10) || (ch == 13))
+ break;
+ }
+ *buf = (*buf) ? (char *)realloc(*buf, size*2 + 1) : (char *)malloc(size*2 + 1);
+ if (size) {
+ datfd->seek(ioffset, SEEK_SET);
+ datfd->read(*buf, size);
+ }
+ (*buf)[size] = 0;
+ toupperstr_utf8(*buf, size*2);
+ }
+ else {
+ *buf = (*buf) ? (char *)realloc(*buf, 1) : (char *)malloc(1);
+ **buf = 0;
+ }
+}
+
+
+/******************************************************************************
+ * RawStr4::getidxbuf - Gets the index string at the given idx offset
+ * NOTE: buf is allocated and must be freed by
+ * calling function
+ *
+ * ENT: ioffset - offset in idx file to lookup
+ * buf - address of pointer to allocate for storage of string
+ */
+
+void RawStr4::getIDXBuf(long ioffset, char **buf)
+{
+ long offset;
+
+ if (idxfd > 0) {
+ idxfd->seek(ioffset, SEEK_SET);
+ idxfd->read(&offset, 4);
+
+ offset = swordtoarch32(offset);
+
+ getIDXBufDat(offset, buf);
+
+/* What the heck is this supposed to do??????
+ for (trybuf = targetbuf = *buf; *trybuf; trybuf++, targetbuf++) {
+ *targetbuf = *trybuf;
+ }
+ *targetbuf = 0;
+ trybuf = 0;
+ toupperstr_utf8(targetbuf);
+*/
+ }
+}
+
+
+/******************************************************************************
+ * RawStr4::findoffset - Finds the offset of the key string from the indexes
+ *
+ * ENT: key - key string to lookup
+ * start - address to store the starting offset
+ * size - address to store the size of the entry
+ * away - number of entries before of after to jump
+ * (default = 0)
+ *
+ * RET: error status -1 general error; -2 new file
+ */
+
+signed char RawStr4::findOffset(const char *ikey, long *start, unsigned long *size, long away, long *idxoff)
+{
+ char *trybuf, *maxbuf, *key = 0, quitflag = 0;
+ signed char retval = -1;
+ long headoff, tailoff, tryoff = 0, maxoff = 0;
+ int diff = 0;
+
+ if (idxfd->getFd() >=0) {
+ tailoff = maxoff = idxfd->seek(0, SEEK_END) - 8;
+ retval = (tailoff >= 0) ? 0 : -2; // if NOT new file
+ if (*ikey) {
+ headoff = 0;
+
+ stdstr(&key, ikey, 3);
+ toupperstr_utf8(key, strlen(key)*3);
+
+ int keylen = strlen(key);
+ bool substr = false;
+
+ trybuf = maxbuf = 0;
+ getIDXBuf(maxoff, &maxbuf);
+
+ while (headoff < tailoff) {
+ tryoff = (lastoff == -1) ? headoff + ((((tailoff / 8) - (headoff / 8))) / 2) * 8 : lastoff;
+ lastoff = -1;
+ getIDXBuf(tryoff, &trybuf);
+
+ if (!*trybuf && tryoff) { // In case of extra entry at end of idx (not first entry)
+ tryoff += (tryoff > (maxoff / 2))?-8:8;
+ retval = -1;
+ break;
+ }
+
+ diff = strcmp(key, trybuf);
+
+ if (!diff)
+ break;
+
+ if (!strncmp(trybuf, key, keylen)) substr = true;
+
+ if (diff < 0)
+ tailoff = (tryoff == headoff) ? headoff : tryoff;
+ else headoff = tryoff;
+
+ if (tailoff == headoff + 8) {
+ if (quitflag++)
+ headoff = tailoff;
+ }
+ }
+
+ // didn't find exact match
+ if (headoff >= tailoff) {
+ tryoff = headoff;
+ if (!substr && ((tryoff != maxoff)||(strncmp(key, maxbuf, keylen)<0))) {
+ away--; // if our entry doesn't startwith our key, prefer the previous entry over the next
+ }
+ }
+ if (trybuf)
+ free(trybuf);
+ delete [] key;
+ if (maxbuf)
+ free(maxbuf);
+ }
+ else tryoff = 0;
+
+ idxfd->seek(tryoff, SEEK_SET);
+
+ *start = *size = 0;
+ idxfd->read(start, 4);
+ idxfd->read(size, 4);
+ if (idxoff)
+ *idxoff = tryoff;
+
+ *start = swordtoarch32(*start);
+ *size = swordtoarch32(*size);
+
+ while (away) {
+ long laststart = *start;
+ unsigned long lastsize = *size;
+ long lasttry = tryoff;
+ tryoff += (away > 0) ? 8 : -8;
+
+ bool bad = false;
+ if (((tryoff + (away*8)) < -8) || (tryoff + (away*8) > (maxoff+8)))
+ bad = true;
+ else if (idxfd->seek(tryoff, SEEK_SET) < 0)
+ bad = true;
+ if (bad) {
+ retval = -1;
+ *start = laststart;
+ *size = lastsize;
+ tryoff = lasttry;
+ if (idxoff)
+ *idxoff = tryoff;
+ break;
+ }
+ idxfd->read(start, 4);
+ idxfd->read(size, 4);
+ if (idxoff)
+ *idxoff = tryoff;
+
+ *start = swordtoarch32(*start);
+ *size = swordtoarch32(*size);
+
+ if (((laststart != *start) || (lastsize != *size)) && (*start >= 0) && (*size))
+ away += (away < 0) ? 1 : -1;
+ }
+
+ lastoff = tryoff;
+ }
+ else {
+ *start = 0;
+ *size = 0;
+ if (idxoff)
+ *idxoff = 0;
+ retval = -1;
+ }
+ return retval;
+}
+
+
+/******************************************************************************
+ * RawStr4::preptext - Prepares the text before returning it to external
+ * objects
+ *
+ * ENT: buf - buffer where text is stored and where to store the prep'd
+ * text.
+ */
+
+void RawStr4::prepText(SWBuf &buf) {
+ unsigned int to, from;
+ char space = 0, cr = 0, realdata = 0, nlcnt = 0;
+ char *rawBuf = buf.getRawData();
+ for (to = from = 0; rawBuf[from]; from++) {
+ switch (rawBuf[from]) {
+ case 10:
+ if (!realdata)
+ continue;
+ space = (cr) ? 0 : 1;
+ cr = 0;
+ nlcnt++;
+ if (nlcnt > 1) {
+// *to++ = nl;
+ rawBuf[to++] = 10;
+// *to++ = nl[1];
+// nlcnt = 0;
+ }
+ continue;
+ case 13:
+ if (!realdata)
+ continue;
+// *to++ = nl[0];
+ rawBuf[to++] = 10;
+ space = 0;
+ cr = 1;
+ continue;
+ }
+ realdata = 1;
+ nlcnt = 0;
+ if (space) {
+ space = 0;
+ if (rawBuf[from] != ' ') {
+ rawBuf[to++] = ' ';
+ from--;
+ continue;
+ }
+ }
+ rawBuf[to++] = rawBuf[from];
+ }
+ buf.setSize(to);
+
+ while (to > 1) { // remove trailing excess
+ to--;
+ if ((rawBuf[to] == 10) || (rawBuf[to] == ' '))
+ buf.setSize(to);
+ else break;
+ }
+}
+
+
+/******************************************************************************
+ * RawStr4::readtext - gets text at a given offset
+ *
+ * ENT:
+ * start - starting offset where the text is located in the file
+ * size - size of text entry
+ * buf - buffer to store text
+ *
+ */
+
+void RawStr4::readText(long istart, unsigned long *isize, char **idxbuf, SWBuf &buf)
+{
+ unsigned int ch;
+ char *idxbuflocal = 0;
+ getIDXBufDat(istart, &idxbuflocal);
+ long start = istart;
+
+ do {
+ if (*idxbuf)
+ delete [] *idxbuf;
+
+ buf = "";
+ buf.setFillByte(0);
+ buf.setSize(++(*isize));
+
+ *idxbuf = new char [ (*isize) ];
+
+ datfd->seek(start, SEEK_SET);
+ datfd->read(buf.getRawData(), (int)((*isize) - 1));
+
+ for (ch = 0; buf[ch]; ch++) { // skip over index string
+ if (buf[ch] == 10) {
+ ch++;
+ break;
+ }
+ }
+ buf = SWBuf(buf.c_str()+ch);
+ // resolve link
+ if (!strncmp(buf.c_str(), "@LINK", 5)) {
+ for (ch = 0; buf[ch]; ch++) { // null before nl
+ if (buf[ch] == 10) {
+ buf[ch] = 0;
+ break;
+ }
+ }
+ findOffset(buf.c_str() + 6, &start, isize);
+ }
+ else break;
+ }
+ while (true); // while we're resolving links
+
+ if (idxbuflocal) {
+ unsigned int localsize = strlen(idxbuflocal);
+ localsize = (localsize < (*isize - 1)) ? localsize : (*isize - 1);
+ strncpy(*idxbuf, idxbuflocal, localsize);
+ (*idxbuf)[localsize] = 0;
+ free(idxbuflocal);
+ }
+}
+
+
+/******************************************************************************
+ * RawLD::settext - Sets text for current offset
+ *
+ * ENT: key - key for this entry
+ * buf - buffer to store
+ * len - length of buffer (0 - null terminated)
+ */
+
+void RawStr4::doSetText(const char *ikey, const char *buf, long len) {
+
+ long start, outstart;
+ long idxoff;
+ long endoff;
+ long shiftSize;
+ unsigned long size;
+ unsigned long outsize;
+ static const char nl[] = {13, 10};
+ char *tmpbuf = 0;
+ char *key = 0;
+ char *dbKey = 0;
+ char *idxBytes = 0;
+ char *outbuf = 0;
+ char *ch = 0;
+
+ char errorStatus = findOffset(ikey, &start, &size, 0, &idxoff);
+ stdstr(&key, ikey, 3);
+ toupperstr_utf8(key, strlen(key)*3);
+
+ len = (len < 0) ? strlen(buf) : len;
+ getIDXBufDat(start, &dbKey);
+
+ if (strcmp(key, dbKey) < 0) {
+ }
+ else if (strcmp(key, dbKey) > 0) {
+ if (errorStatus != (char)-2) // not a new file
+ idxoff += 8;
+ else idxoff = 0;
+ }
+ else if ((!strcmp(key, dbKey)) && (len>0/*we're not deleting*/)) { // got absolute entry
+ do {
+ tmpbuf = new char [ size + 2 ];
+ memset(tmpbuf, 0, size + 2);
+ datfd->seek(start, SEEK_SET);
+ datfd->read(tmpbuf, (int)(size - 1));
+
+ for (ch = tmpbuf; *ch; ch++) { // skip over index string
+ if (*ch == 10) {
+ ch++;
+ break;
+ }
+ }
+ memmove(tmpbuf, ch, size - (unsigned long)(ch-tmpbuf));
+
+ // resolve link
+ if (!strncmp(tmpbuf, "@LINK", 5) && (len > 0)) {
+ for (ch = tmpbuf; *ch; ch++) { // null before nl
+ if (*ch == 10) {
+ *ch = 0;
+ break;
+ }
+ }
+ findOffset(tmpbuf + 8, &start, &size, 0, &idxoff);
+ ++size;
+ }
+ else break;
+ }
+ while (true); // while we're resolving links
+ }
+
+ endoff = idxfd->seek(0, SEEK_END);
+
+ shiftSize = endoff - idxoff;
+
+ if (shiftSize > 0) {
+ idxBytes = new char [ shiftSize ];
+ idxfd->seek(idxoff, SEEK_SET);
+ idxfd->read(idxBytes, shiftSize);
+ }
+
+ outbuf = new char [ len + strlen(key) + 5 ];
+ sprintf(outbuf, "%s%c%c", key, 13, 10);
+ size = strlen(outbuf);
+ memcpy(outbuf + size, buf, len);
+ size = outsize = size + len;
+
+ start = outstart = datfd->seek(0, SEEK_END);
+
+ outstart = archtosword32(start);
+ outsize = archtosword32(size);
+
+ idxfd->seek(idxoff, SEEK_SET);
+ if (len>0) {
+ datfd->seek(start, SEEK_SET);
+ datfd->write(outbuf, (long)size);
+
+ // add a new line to make data file easier to read in an editor
+ datfd->write(&nl, 2);
+
+ idxfd->write(&outstart, 4);
+ idxfd->write(&outsize, 4);
+ if (idxBytes) {
+ idxfd->write(idxBytes, shiftSize);
+ delete [] idxBytes;
+ }
+ }
+ else { // delete entry
+ if (idxBytes) {
+ idxfd->write(idxBytes+8, shiftSize-8);
+ idxfd->seek(-1, SEEK_CUR); // last valid byte
+ FileMgr::getSystemFileMgr()->trunc(idxfd); // truncate index
+ delete [] idxBytes;
+ }
+ }
+
+ delete [] key;
+ delete [] outbuf;
+ free(dbKey);
+}
+
+
+/******************************************************************************
+ * RawLD::linkentry - links one entry to another
+ *
+ * ENT: testmt - testament to find (0 - Bible/module introduction)
+ * destidxoff - dest offset into .vss
+ * srcidxoff - source offset into .vss
+ */
+
+void RawStr4::doLinkEntry(const char *destkey, const char *srckey) {
+ char *text = new char [ strlen(destkey) + 7 ];
+ sprintf(text, "@LINK %s", destkey);
+ doSetText(srckey, text);
+ delete [] text;
+}
+
+
+/******************************************************************************
+ * RawLD::CreateModule - Creates new module files
+ *
+ * ENT: path - directory to store module files
+ * RET: error status
+ */
+
+signed char RawStr4::createModule(const char *ipath)
+{
+ char *path = 0;
+ char *buf = new char [ strlen (ipath) + 20 ];
+ FileDesc *fd, *fd2;
+
+ stdstr(&path, ipath);
+
+ if ((path[strlen(path)-1] == '/') || (path[strlen(path)-1] == '\\'))
+ path[strlen(path)-1] = 0;
+
+ sprintf(buf, "%s.dat", path);
+ FileMgr::removeFile(buf);
+ fd = FileMgr::getSystemFileMgr()->open(buf, FileMgr::CREAT|FileMgr::WRONLY, FileMgr::IREAD|FileMgr::IWRITE);
+ fd->getFd();
+ FileMgr::getSystemFileMgr()->close(fd);
+
+ sprintf(buf, "%s.idx", path);
+ FileMgr::removeFile(buf);
+ fd2 = FileMgr::getSystemFileMgr()->open(buf, FileMgr::CREAT|FileMgr::WRONLY, FileMgr::IREAD|FileMgr::IWRITE);
+ fd2->getFd();
+ FileMgr::getSystemFileMgr()->close(fd2);
+
+ delete [] path;
+
+ return 0;
+}
+
+SWORD_NAMESPACE_END
diff --git a/src/modules/common/rawverse.cpp b/src/modules/common/rawverse.cpp
new file mode 100644
index 0000000..934082c
--- /dev/null
+++ b/src/modules/common/rawverse.cpp
@@ -0,0 +1,350 @@
+/******************************************************************************
+ * rawverse.cpp - code for class 'RawVerse'- a module that reads raw text
+ * files: ot and nt using indexs ??.bks ??.cps ??.vss
+ * and provides lookup and parsing functions based on
+ * class VerseKey
+ */
+
+
+#include <ctype.h>
+#include <stdio.h>
+#include <fcntl.h>
+#include <errno.h>
+
+#include <utilstr.h>
+#include <rawverse.h>
+#include <versekey.h>
+#include <sysdata.h>
+#include <filemgr.h>
+#include <swbuf.h>
+
+
+SWORD_NAMESPACE_START
+
+/******************************************************************************
+ * RawVerse Statics
+ */
+
+int RawVerse::instance = 0;
+const char *RawVerse::nl = "\r\n";
+
+
+/******************************************************************************
+ * RawVerse Constructor - Initializes data for instance of RawVerse
+ *
+ * ENT: ipath - path of the directory where data and index files are located.
+ * be sure to include the trailing separator (e.g. '/' or '\')
+ * (e.g. 'modules/texts/rawtext/webster/')
+ */
+
+RawVerse::RawVerse(const char *ipath, int fileMode)
+{
+ SWBuf buf;
+
+ path = 0;
+ stdstr(&path, ipath);
+
+ if ((path[strlen(path)-1] == '/') || (path[strlen(path)-1] == '\\'))
+ path[strlen(path)-1] = 0;
+
+ if (fileMode == -1) { // try read/write if possible
+ fileMode = FileMgr::RDWR;
+ }
+
+ buf.setFormatted("%s/ot.vss", path);
+ idxfp[0] = FileMgr::getSystemFileMgr()->open(buf, fileMode, true);
+
+ buf.setFormatted("%s/nt.vss", path);
+ idxfp[1] = FileMgr::getSystemFileMgr()->open(buf, fileMode, true);
+
+ buf.setFormatted("%s/ot", path);
+ textfp[0] = FileMgr::getSystemFileMgr()->open(buf, fileMode, true);
+
+ buf.setFormatted("%s/nt", path);
+ textfp[1] = FileMgr::getSystemFileMgr()->open(buf, fileMode, true);
+
+ instance++;
+}
+
+
+/******************************************************************************
+ * RawVerse Destructor - Cleans up instance of RawVerse
+ */
+
+RawVerse::~RawVerse()
+{
+ int loop1;
+
+ if (path)
+ delete [] path;
+
+ --instance;
+
+ for (loop1 = 0; loop1 < 2; loop1++) {
+ FileMgr::getSystemFileMgr()->close(idxfp[loop1]);
+ FileMgr::getSystemFileMgr()->close(textfp[loop1]);
+ }
+}
+
+
+/******************************************************************************
+ * RawVerse::findoffset - Finds the offset of the key verse from the indexes
+ *
+ * ENT: testmt - testament to find (0 - Bible/module introduction)
+ * idxoff - offset into .vss
+ * start - address to store the starting offset
+ * size - address to store the size of the entry
+ */
+
+void RawVerse::findOffset(char testmt, long idxoff, long *start, unsigned short *size) {
+ idxoff *= 6;
+ if (!testmt)
+ testmt = ((idxfp[1]) ? 1:2);
+
+ if (idxfp[testmt-1]->getFd() >= 0) {
+ idxfp[testmt-1]->seek(idxoff, SEEK_SET);
+ idxfp[testmt-1]->read(start, 4);
+ long len = idxfp[testmt-1]->read(size, 2); // read size
+
+ *start = swordtoarch32(*start);
+ *size = swordtoarch16(*size);
+
+ if (len < 2) {
+ *size = (unsigned short)((*start) ? (textfp[testmt-1]->seek(0, SEEK_END) - (long)*start) : 0); // if for some reason we get an error reading size, make size to end of file
+ }
+ }
+ else {
+ *start = 0;
+ *size = 0;
+ }
+}
+
+
+/******************************************************************************
+ * RawVerse::preptext - Prepares the text before returning it to external
+ * objects
+ *
+ * ENT: buf - buffer where text is stored and where to store the prep'd
+ * text.
+ */
+
+void RawVerse::prepText(SWBuf &buf) {
+ unsigned int to, from;
+ char space = 0, cr = 0, realdata = 0, nlcnt = 0;
+ char *rawBuf = buf.getRawData();
+ for (to = from = 0; rawBuf[from]; from++) {
+ switch (rawBuf[from]) {
+ case 10:
+ if (!realdata)
+ continue;
+ space = (cr) ? 0 : 1;
+ cr = 0;
+ nlcnt++;
+ if (nlcnt > 1) {
+// *to++ = nl;
+ rawBuf[to++] = 10;
+// *to++ = nl[1];
+// nlcnt = 0;
+ }
+ continue;
+ case 13:
+ if (!realdata)
+ continue;
+// *to++ = nl[0];
+ rawBuf[to++] = 10;
+ space = 0;
+ cr = 1;
+ continue;
+ }
+ realdata = 1;
+ nlcnt = 0;
+ if (space) {
+ space = 0;
+ if (rawBuf[from] != ' ') {
+ rawBuf[to++] = ' ';
+ from--;
+ continue;
+ }
+ }
+ rawBuf[to++] = rawBuf[from];
+ }
+ buf.setSize(to);
+
+ while (to > 1) { // remove trailing excess
+ to--;
+ if ((rawBuf[to] == 10) || (rawBuf[to] == ' '))
+ buf.setSize(to);
+ else break;
+ }
+}
+
+
+/******************************************************************************
+ * RawVerse::readtext - gets text at a given offset
+ *
+ * ENT: testmt - testament file to search in (0 - Old; 1 - New)
+ * start - starting offset where the text is located in the file
+ * size - size of text entry + 2 (null)(null)
+ * buf - buffer to store text
+ *
+ */
+
+void RawVerse::readText(char testmt, long start, unsigned short size, SWBuf &buf) {
+ buf = "";
+ buf.setFillByte(0);
+ buf.setSize(size + 1);
+ if (!testmt)
+ testmt = ((idxfp[1]) ? 1:2);
+ if (size) {
+ if (textfp[testmt-1]->getFd() >= 0) {
+ textfp[testmt-1]->seek(start, SEEK_SET);
+ textfp[testmt-1]->read(buf.getRawData(), (int)size);
+ }
+ }
+}
+
+
+/******************************************************************************
+ * RawVerse::settext - Sets text for current offset
+ *
+ * ENT: testmt - testament to find (0 - Bible/module introduction)
+ * idxoff - offset into .vss
+ * buf - buffer to store
+ * len - length of buffer (0 - null terminated)
+ */
+
+void RawVerse::doSetText(char testmt, long idxoff, const char *buf, long len)
+{
+ long start, outstart;
+ unsigned short size;
+ unsigned short outsize;
+
+ idxoff *= 6;
+ if (!testmt)
+ testmt = ((idxfp[1]) ? 1:2);
+
+ size = outsize = (len < 0) ? strlen(buf) : len;
+
+ start = outstart = textfp[testmt-1]->seek(0, SEEK_END);
+ idxfp[testmt-1]->seek(idxoff, SEEK_SET);
+
+ if (size) {
+ textfp[testmt-1]->seek(start, SEEK_SET);
+ textfp[testmt-1]->write(buf, (int)size);
+
+ // add a new line to make data file easier to read in an editor
+ textfp[testmt-1]->write(nl, 2);
+ }
+ else {
+ start = 0;
+ }
+
+ outstart = archtosword32(start);
+ outsize = archtosword16(size);
+
+ idxfp[testmt-1]->write(&outstart, 4);
+ idxfp[testmt-1]->write(&outsize, 2);
+
+
+}
+
+
+/******************************************************************************
+ * RawVerse::linkentry - links one entry to another
+ *
+ * ENT: testmt - testament to find (0 - Bible/module introduction)
+ * destidxoff - dest offset into .vss
+ * srcidxoff - source offset into .vss
+ */
+
+void RawVerse::doLinkEntry(char testmt, long destidxoff, long srcidxoff) {
+ long start;
+ unsigned short size;
+
+ destidxoff *= 6;
+ srcidxoff *= 6;
+
+ if (!testmt)
+ testmt = ((idxfp[1]) ? 1:2);
+
+ // get source
+ idxfp[testmt-1]->seek(srcidxoff, SEEK_SET);
+ idxfp[testmt-1]->read(&start, 4);
+ idxfp[testmt-1]->read(&size, 2);
+
+ // write dest
+ idxfp[testmt-1]->seek(destidxoff, SEEK_SET);
+ idxfp[testmt-1]->write(&start, 4);
+ idxfp[testmt-1]->write(&size, 2);
+}
+
+
+/******************************************************************************
+ * RawVerse::CreateModule - Creates new module files
+ *
+ * ENT: path - directory to store module files
+ * RET: error status
+ */
+
+char RawVerse::createModule(const char *ipath)
+{
+ char *path = 0;
+ char *buf = new char [ strlen (ipath) + 20 ];
+ FileDesc *fd, *fd2;
+
+ stdstr(&path, ipath);
+
+ if ((path[strlen(path)-1] == '/') || (path[strlen(path)-1] == '\\'))
+ path[strlen(path)-1] = 0;
+
+ sprintf(buf, "%s/ot", path);
+ FileMgr::removeFile(buf);
+ fd = FileMgr::getSystemFileMgr()->open(buf, FileMgr::CREAT|FileMgr::WRONLY, FileMgr::IREAD|FileMgr::IWRITE);
+ fd->getFd();
+ FileMgr::getSystemFileMgr()->close(fd);
+
+ sprintf(buf, "%s/nt", path);
+ FileMgr::removeFile(buf);
+ fd = FileMgr::getSystemFileMgr()->open(buf, FileMgr::CREAT|FileMgr::WRONLY, FileMgr::IREAD|FileMgr::IWRITE);
+ fd->getFd();
+ FileMgr::getSystemFileMgr()->close(fd);
+
+ sprintf(buf, "%s/ot.vss", path);
+ FileMgr::removeFile(buf);
+ fd = FileMgr::getSystemFileMgr()->open(buf, FileMgr::CREAT|FileMgr::WRONLY, FileMgr::IREAD|FileMgr::IWRITE);
+ fd->getFd();
+
+ sprintf(buf, "%s/nt.vss", path);
+ FileMgr::removeFile(buf);
+ fd2 = FileMgr::getSystemFileMgr()->open(buf, FileMgr::CREAT|FileMgr::WRONLY, FileMgr::IREAD|FileMgr::IWRITE);
+ fd2->getFd();
+
+ VerseKey vk;
+ vk.Headings(1);
+ long offset = 0;
+ short size = 0;
+ for (vk = TOP; !vk.Error(); vk++) {
+ if (vk.Testament() == 1) {
+ fd->write(&offset, 4);
+ fd->write(&size, 2);
+ }
+ else {
+ fd2->write(&offset, 4);
+ fd2->write(&size, 2);
+ }
+ }
+
+ FileMgr::getSystemFileMgr()->close(fd);
+ FileMgr::getSystemFileMgr()->close(fd2);
+
+ delete [] path;
+ delete [] buf;
+/*
+ RawVerse rv(path);
+ VerseKey mykey("Rev 22:21");
+*/
+
+ return 0;
+}
+
+SWORD_NAMESPACE_END
diff --git a/src/modules/common/rawverse4.cpp b/src/modules/common/rawverse4.cpp
new file mode 100644
index 0000000..bd438ec
--- /dev/null
+++ b/src/modules/common/rawverse4.cpp
@@ -0,0 +1,350 @@
+/******************************************************************************
+ * rawverse.cpp - code for class 'RawVerse4'- a module that reads raw text
+ * files: ot and nt using indexs ??.bks ??.cps ??.vss
+ * and provides lookup and parsing functions based on
+ * class VerseKey
+ */
+
+
+#include <ctype.h>
+#include <stdio.h>
+#include <fcntl.h>
+#include <errno.h>
+
+#include <utilstr.h>
+#include <rawverse4.h>
+#include <versekey.h>
+#include <sysdata.h>
+#include <filemgr.h>
+#include <swbuf.h>
+
+
+SWORD_NAMESPACE_START
+
+/******************************************************************************
+ * RawVerse4 Statics
+ */
+
+int RawVerse4::instance = 0;
+const char *RawVerse4::nl = "\r\n";
+
+
+/******************************************************************************
+ * RawVerse4 Constructor - Initializes data for instance of RawVerse4
+ *
+ * ENT: ipath - path of the directory where data and index files are located.
+ * be sure to include the trailing separator (e.g. '/' or '\')
+ * (e.g. 'modules/texts/rawtext/webster/')
+ */
+
+RawVerse4::RawVerse4(const char *ipath, int fileMode)
+{
+ SWBuf buf;
+
+ path = 0;
+ stdstr(&path, ipath);
+
+ if ((path[strlen(path)-1] == '/') || (path[strlen(path)-1] == '\\'))
+ path[strlen(path)-1] = 0;
+
+ if (fileMode == -1) { // try read/write if possible
+ fileMode = FileMgr::RDWR;
+ }
+
+ buf.setFormatted("%s/ot.vss", path);
+ idxfp[0] = FileMgr::getSystemFileMgr()->open(buf, fileMode, true);
+
+ buf.setFormatted("%s/nt.vss", path);
+ idxfp[1] = FileMgr::getSystemFileMgr()->open(buf, fileMode, true);
+
+ buf.setFormatted("%s/ot", path);
+ textfp[0] = FileMgr::getSystemFileMgr()->open(buf, fileMode, true);
+
+ buf.setFormatted("%s/nt", path);
+ textfp[1] = FileMgr::getSystemFileMgr()->open(buf, fileMode, true);
+
+ instance++;
+}
+
+
+/******************************************************************************
+ * RawVerse4 Destructor - Cleans up instance of RawVerse4
+ */
+
+RawVerse4::~RawVerse4()
+{
+ int loop1;
+
+ if (path)
+ delete [] path;
+
+ --instance;
+
+ for (loop1 = 0; loop1 < 2; loop1++) {
+ FileMgr::getSystemFileMgr()->close(idxfp[loop1]);
+ FileMgr::getSystemFileMgr()->close(textfp[loop1]);
+ }
+}
+
+
+/******************************************************************************
+ * RawVerse4::findoffset - Finds the offset of the key verse from the indexes
+ *
+ * ENT: testmt - testament to find (0 - Bible/module introduction)
+ * idxoff - offset into .vss
+ * start - address to store the starting offset
+ * size - address to store the size of the entry
+ */
+
+void RawVerse4::findOffset(char testmt, long idxoff, long *start, unsigned long *size) {
+ idxoff *= 8;
+ if (!testmt)
+ testmt = ((idxfp[1]) ? 1:2);
+
+ if (idxfp[testmt-1]->getFd() >= 0) {
+ idxfp[testmt-1]->seek(idxoff, SEEK_SET);
+ idxfp[testmt-1]->read(start, 4);
+ long len = idxfp[testmt-1]->read(size, 4); // read size
+
+ *start = swordtoarch32(*start);
+ *size = swordtoarch32(*size);
+
+ if (len < 2) {
+ *size = (unsigned long)((*start) ? (textfp[testmt-1]->seek(0, SEEK_END) - (long)*start) : 0); // if for some reason we get an error reading size, make size to end of file
+ }
+ }
+ else {
+ *start = 0;
+ *size = 0;
+ }
+}
+
+
+/******************************************************************************
+ * RawVerse4::preptext - Prepares the text before returning it to external
+ * objects
+ *
+ * ENT: buf - buffer where text is stored and where to store the prep'd
+ * text.
+ */
+
+void RawVerse4::prepText(SWBuf &buf) {
+ unsigned int to, from;
+ char space = 0, cr = 0, realdata = 0, nlcnt = 0;
+ char *rawBuf = buf.getRawData();
+ for (to = from = 0; rawBuf[from]; from++) {
+ switch (rawBuf[from]) {
+ case 10:
+ if (!realdata)
+ continue;
+ space = (cr) ? 0 : 1;
+ cr = 0;
+ nlcnt++;
+ if (nlcnt > 1) {
+// *to++ = nl;
+ rawBuf[to++] = 10;
+// *to++ = nl[1];
+// nlcnt = 0;
+ }
+ continue;
+ case 13:
+ if (!realdata)
+ continue;
+// *to++ = nl[0];
+ rawBuf[to++] = 10;
+ space = 0;
+ cr = 1;
+ continue;
+ }
+ realdata = 1;
+ nlcnt = 0;
+ if (space) {
+ space = 0;
+ if (rawBuf[from] != ' ') {
+ rawBuf[to++] = ' ';
+ from--;
+ continue;
+ }
+ }
+ rawBuf[to++] = rawBuf[from];
+ }
+ buf.setSize(to);
+
+ while (to > 1) { // remove trailing excess
+ to--;
+ if ((rawBuf[to] == 10) || (rawBuf[to] == ' '))
+ buf.setSize(to);
+ else break;
+ }
+}
+
+
+/******************************************************************************
+ * RawVerse4::readtext - gets text at a given offset
+ *
+ * ENT: testmt - testament file to search in (0 - Old; 1 - New)
+ * start - starting offset where the text is located in the file
+ * size - size of text entry + 2 (null)(null)
+ * buf - buffer to store text
+ *
+ */
+
+void RawVerse4::readText(char testmt, long start, unsigned long size, SWBuf &buf) {
+ buf = "";
+ buf.setFillByte(0);
+ buf.setSize(size + 1);
+ if (!testmt)
+ testmt = ((idxfp[1]) ? 1:2);
+ if (size) {
+ if (textfp[testmt-1]->getFd() >= 0) {
+ textfp[testmt-1]->seek(start, SEEK_SET);
+ textfp[testmt-1]->read(buf.getRawData(), (int)size);
+ }
+ }
+}
+
+
+/******************************************************************************
+ * RawVerse4::settext - Sets text for current offset
+ *
+ * ENT: testmt - testament to find (0 - Bible/module introduction)
+ * idxoff - offset into .vss
+ * buf - buffer to store
+ * len - length of buffer (0 - null terminated)
+ */
+
+void RawVerse4::doSetText(char testmt, long idxoff, const char *buf, long len)
+{
+ long start, outstart;
+ unsigned long size;
+ unsigned long outsize;
+
+ idxoff *= 8;
+ if (!testmt)
+ testmt = ((idxfp[1]) ? 1:2);
+
+ size = outsize = (len < 0) ? strlen(buf) : len;
+
+ start = outstart = textfp[testmt-1]->seek(0, SEEK_END);
+ idxfp[testmt-1]->seek(idxoff, SEEK_SET);
+
+ if (size) {
+ textfp[testmt-1]->seek(start, SEEK_SET);
+ textfp[testmt-1]->write(buf, (int)size);
+
+ // add a new line to make data file easier to read in an editor
+ textfp[testmt-1]->write(nl, 2);
+ }
+ else {
+ start = 0;
+ }
+
+ outstart = archtosword32(start);
+ outsize = archtosword32(size);
+
+ idxfp[testmt-1]->write(&outstart, 4);
+ idxfp[testmt-1]->write(&outsize, 4);
+
+
+}
+
+
+/******************************************************************************
+ * RawVerse4::linkentry - links one entry to another
+ *
+ * ENT: testmt - testament to find (0 - Bible/module introduction)
+ * destidxoff - dest offset into .vss
+ * srcidxoff - source offset into .vss
+ */
+
+void RawVerse4::doLinkEntry(char testmt, long destidxoff, long srcidxoff) {
+ long start;
+ unsigned long size;
+
+ destidxoff *= 8;
+ srcidxoff *= 8;
+
+ if (!testmt)
+ testmt = ((idxfp[1]) ? 1:2);
+
+ // get source
+ idxfp[testmt-1]->seek(srcidxoff, SEEK_SET);
+ idxfp[testmt-1]->read(&start, 4);
+ idxfp[testmt-1]->read(&size, 4);
+
+ // write dest
+ idxfp[testmt-1]->seek(destidxoff, SEEK_SET);
+ idxfp[testmt-1]->write(&start, 4);
+ idxfp[testmt-1]->write(&size, 4);
+}
+
+
+/******************************************************************************
+ * RawVerse4::CreateModule - Creates new module files
+ *
+ * ENT: path - directory to store module files
+ * RET: error status
+ */
+
+char RawVerse4::createModule(const char *ipath)
+{
+ char *path = 0;
+ char *buf = new char [ strlen (ipath) + 20 ];
+ FileDesc *fd, *fd2;
+
+ stdstr(&path, ipath);
+
+ if ((path[strlen(path)-1] == '/') || (path[strlen(path)-1] == '\\'))
+ path[strlen(path)-1] = 0;
+
+ sprintf(buf, "%s/ot", path);
+ FileMgr::removeFile(buf);
+ fd = FileMgr::getSystemFileMgr()->open(buf, FileMgr::CREAT|FileMgr::WRONLY, FileMgr::IREAD|FileMgr::IWRITE);
+ fd->getFd();
+ FileMgr::getSystemFileMgr()->close(fd);
+
+ sprintf(buf, "%s/nt", path);
+ FileMgr::removeFile(buf);
+ fd = FileMgr::getSystemFileMgr()->open(buf, FileMgr::CREAT|FileMgr::WRONLY, FileMgr::IREAD|FileMgr::IWRITE);
+ fd->getFd();
+ FileMgr::getSystemFileMgr()->close(fd);
+
+ sprintf(buf, "%s/ot.vss", path);
+ FileMgr::removeFile(buf);
+ fd = FileMgr::getSystemFileMgr()->open(buf, FileMgr::CREAT|FileMgr::WRONLY, FileMgr::IREAD|FileMgr::IWRITE);
+ fd->getFd();
+
+ sprintf(buf, "%s/nt.vss", path);
+ FileMgr::removeFile(buf);
+ fd2 = FileMgr::getSystemFileMgr()->open(buf, FileMgr::CREAT|FileMgr::WRONLY, FileMgr::IREAD|FileMgr::IWRITE);
+ fd2->getFd();
+
+ VerseKey vk;
+ vk.Headings(1);
+ long offset = 0;
+ long size = 0;
+ for (vk = TOP; !vk.Error(); vk++) {
+ if (vk.Testament() == 1) {
+ fd->write(&offset, 4);
+ fd->write(&size, 4);
+ }
+ else {
+ fd2->write(&offset, 4);
+ fd2->write(&size, 4);
+ }
+ }
+
+ FileMgr::getSystemFileMgr()->close(fd);
+ FileMgr::getSystemFileMgr()->close(fd2);
+
+ delete [] path;
+ delete [] buf;
+/*
+ RawVerse4 rv(path);
+ VerseKey mykey("Rev 22:21");
+*/
+
+ return 0;
+}
+
+SWORD_NAMESPACE_END
diff --git a/src/modules/common/sapphire.cpp b/src/modules/common/sapphire.cpp
new file mode 100644
index 0000000..a2c5ce0
--- /dev/null
+++ b/src/modules/common/sapphire.cpp
@@ -0,0 +1,216 @@
+/* sapphire.cpp -- the Saphire II stream cipher class.
+ Dedicated to the Public Domain the author and inventor:
+ (Michael Paul Johnson). This code comes with no warranty.
+ Use it at your own risk.
+ Ported from the Pascal implementation of the Sapphire Stream
+ Cipher 9 December 1994.
+ Added hash pre- and post-processing 27 December 1994.
+ Modified initialization to make index variables key dependent,
+ made the output function more resistant to cryptanalysis,
+ and renamed to Sapphire II 2 January 1995
+*/
+
+
+#include <string.h>
+
+#include "sapphire.h"
+
+SWORD_NAMESPACE_START
+
+unsigned char sapphire::keyrand(int limit,
+ unsigned char *user_key,
+ unsigned char keysize,
+ unsigned char *rsum,
+ unsigned *keypos)
+ {
+ unsigned u, // Value from 0 to limit to return.
+ retry_limiter, // No infinite loops allowed.
+ mask; // Select just enough bits.
+
+ if (!limit) return 0; // Avoid divide by zero error.
+ retry_limiter = 0;
+ mask = 1; // Fill mask with enough bits to cover
+ while (mask < (unsigned)limit) // the desired range.
+ mask = (mask << 1) + 1;
+ do
+ {
+ *rsum = cards[*rsum] + user_key[(*keypos)++];
+ if (*keypos >= keysize)
+ {
+ *keypos = 0; // Recycle the user key.
+ *rsum += keysize; // key "aaaa" != key "aaaaaaaa"
+ }
+ u = mask & *rsum;
+ if (++retry_limiter > 11)
+ u %= limit; // Prevent very rare long loops.
+ }
+ while (u > (unsigned)limit);
+ return u;
+ }
+
+void sapphire::initialize(unsigned char *key, unsigned char keysize)
+ {
+ // Key size may be up to 256 bytes.
+ // Pass phrases may be used directly, with longer length
+ // compensating for the low entropy expected in such keys.
+ // Alternatively, shorter keys hashed from a pass phrase or
+ // generated randomly may be used. For random keys, lengths
+ // of from 4 to 16 bytes are recommended, depending on how
+ // secure you want this to be.
+
+ int i;
+ unsigned char toswap, swaptemp, rsum;
+ unsigned keypos;
+
+ // If we have been given no key, assume the default hash setup.
+
+ if (keysize < 1)
+ {
+ hash_init();
+ return;
+ }
+
+ // Start with cards all in order, one of each.
+
+ for (i=0;i<256;i++)
+ cards[i] = i;
+
+ // Swap the card at each position with some other card.
+
+ toswap = 0;
+ keypos = 0; // Start with first byte of user key.
+ rsum = 0;
+ for (i=255;i>=0;i--)
+ {
+ toswap = keyrand(i, key, keysize, &rsum, &keypos);
+ swaptemp = cards[i];
+ cards[i] = cards[toswap];
+ cards[toswap] = swaptemp;
+ }
+
+ // Initialize the indices and data dependencies.
+ // Indices are set to different values instead of all 0
+ // to reduce what is known about the state of the cards
+ // when the first byte is emitted.
+
+ rotor = cards[1];
+ ratchet = cards[3];
+ avalanche = cards[5];
+ last_plain = cards[7];
+ last_cipher = cards[rsum];
+
+ toswap = swaptemp = rsum = 0;
+ keypos = 0;
+ }
+
+void sapphire::hash_init(void)
+ {
+ // This function is used to initialize non-keyed hash
+ // computation.
+
+ int i, j;
+
+ // Initialize the indices and data dependencies.
+
+ rotor = 1;
+ ratchet = 3;
+ avalanche = 5;
+ last_plain = 7;
+ last_cipher = 11;
+
+ // Start with cards all in inverse order.
+
+ for (i=0, j=255;i<256;i++,j--)
+ cards[i] = (unsigned char) j;
+ }
+
+sapphire::sapphire(unsigned char *key, unsigned char keysize)
+ {
+ if (key && keysize)
+ initialize(key, keysize);
+ }
+
+void sapphire::burn(void)
+ {
+ // Destroy the key and state information in RAM.
+ memset(cards, 0, 256);
+ rotor = ratchet = avalanche = last_plain = last_cipher = 0;
+ }
+
+sapphire::~sapphire()
+ {
+ burn();
+ }
+
+unsigned char sapphire::encrypt(unsigned char b)
+ {
+#ifdef USBINARY
+ // Picture a single enigma rotor with 256 positions, rewired
+ // on the fly by card-shuffling.
+
+ // This cipher is a variant of one invented and written
+ // by Michael Paul Johnson in November, 1993.
+
+ unsigned char swaptemp;
+
+ // Shuffle the deck a little more.
+
+ ratchet += cards[rotor++];
+ swaptemp = cards[last_cipher];
+ cards[last_cipher] = cards[ratchet];
+ cards[ratchet] = cards[last_plain];
+ cards[last_plain] = cards[rotor];
+ cards[rotor] = swaptemp;
+ avalanche += cards[swaptemp];
+
+ // Output one byte from the state in such a way as to make it
+ // very hard to figure out which one you are looking at.
+
+ last_cipher = b^cards[(cards[ratchet] + cards[rotor]) & 0xFF] ^
+ cards[cards[(cards[last_plain] +
+ cards[last_cipher] +
+ cards[avalanche])&0xFF]];
+ last_plain = b;
+ return last_cipher;
+#else
+ return b;
+#endif
+ }
+
+unsigned char sapphire::decrypt(unsigned char b)
+ {
+ unsigned char swaptemp;
+
+ // Shuffle the deck a little more.
+
+ ratchet += cards[rotor++];
+ swaptemp = cards[last_cipher];
+ cards[last_cipher] = cards[ratchet];
+ cards[ratchet] = cards[last_plain];
+ cards[last_plain] = cards[rotor];
+ cards[rotor] = swaptemp;
+ avalanche += cards[swaptemp];
+
+ // Output one byte from the state in such a way as to make it
+ // very hard to figure out which one you are looking at.
+
+ last_plain = b^cards[(cards[ratchet] + cards[rotor]) & 0xFF] ^
+ cards[cards[(cards[last_plain] +
+ cards[last_cipher] +
+ cards[avalanche])&0xFF]];
+ last_cipher = b;
+ return last_plain;
+ }
+
+void sapphire::hash_final(unsigned char *hash, // Destination
+ unsigned char hashlength) // Size of hash.
+ {
+ int i;
+
+ for (i=255;i>=0;i--)
+ encrypt((unsigned char) i);
+ for (i=0;i<hashlength;i++)
+ hash[i] = encrypt(0);
+ }
+
+SWORD_NAMESPACE_END
diff --git a/src/modules/common/swcipher.cpp b/src/modules/common/swcipher.cpp
new file mode 100644
index 0000000..bd4d551
--- /dev/null
+++ b/src/modules/common/swcipher.cpp
@@ -0,0 +1,128 @@
+/******************************************************************************
+ * swcipher.cpp - code for class 'SWCipher'- a driver class that provides
+ * cipher utilities.
+ */
+
+#include <stdlib.h>
+#include <string.h>
+#include <swcipher.h>
+
+SWORD_NAMESPACE_START
+
+/******************************************************************************
+ * SWCipher Constructor - Initializes data for instance of SWCipher
+ *
+ */
+
+SWCipher::SWCipher(unsigned char *key) {
+ master.initialize(key, strlen((char *)key));
+ buf = 0;
+}
+
+
+/******************************************************************************
+ * SWCipher Destructor - Cleans up instance of SWCipher
+ */
+
+SWCipher::~SWCipher()
+{
+ if (buf)
+ free(buf);
+}
+
+
+char *SWCipher::Buf(const char *ibuf, unsigned long ilen)
+{
+ if (ibuf) {
+
+ if (buf)
+ free(buf);
+
+ if (!ilen) {
+ len = strlen(buf);
+ ilen = len + 1;
+ }
+ else len = ilen;
+
+ buf = (char *) malloc(ilen);
+ memcpy(buf, ibuf, ilen);
+ cipher = false;
+ }
+
+ Decode();
+
+ return buf;
+}
+
+
+char *SWCipher::cipherBuf(unsigned long *ilen, const char *ibuf)
+{
+ if (ibuf) {
+
+ if (buf)
+ free(buf);
+
+ buf = (char *) malloc(*ilen+1);
+ memcpy(buf, ibuf, *ilen);
+ len = *ilen;
+ cipher = true;
+ }
+
+ Encode();
+
+ *ilen = len;
+ return buf;
+}
+
+
+/******************************************************************************
+ * SWCipher::Encode - This function "encodes" the input stream into the
+ * output stream.
+ * The GetChars() and SendChars() functions are
+ * used to separate this method from the actual
+ * i/o.
+ */
+
+void SWCipher::Encode(void)
+{
+ if (!cipher) {
+ work = master;
+ for (unsigned long i = 0; i < len; i++)
+ buf[i] = work.encrypt(buf[i]);
+ cipher = true;
+ }
+}
+
+
+/******************************************************************************
+ * SWCipher::Decode - This function "decodes" the input stream into the
+ * output stream.
+ * The GetChars() and SendChars() functions are
+ * used to separate this method from the actual
+ * i/o.
+ */
+
+void SWCipher::Decode(void)
+{
+ if (cipher) {
+ work = master;
+ unsigned long i;
+ for (i = 0; i < len; i++)
+ buf[i] = work.decrypt(buf[i]);
+ buf[i] = 0;
+ cipher = false;
+ }
+}
+
+
+/******************************************************************************
+ * SWCipher::setCipherKey - setter for a new CipherKey
+ *
+ */
+
+void SWCipher::setCipherKey(const char *ikey) {
+ unsigned char *key = (unsigned char *)ikey;
+ master.initialize(key, strlen((char *)key));
+}
+
+SWORD_NAMESPACE_END
diff --git a/src/modules/common/swcomprs.cpp b/src/modules/common/swcomprs.cpp
new file mode 100644
index 0000000..02d7d7b
--- /dev/null
+++ b/src/modules/common/swcomprs.cpp
@@ -0,0 +1,193 @@
+/******************************************************************************
+ * swcomprs.cpp - code for class 'SWCompress'- a driver class that provides
+ * compression utilities.
+ */
+
+#include <stdlib.h>
+#include <string.h>
+#include <swcomprs.h>
+
+SWORD_NAMESPACE_START
+
+/******************************************************************************
+ * SWCompress Constructor - Initializes data for instance of SWCompress
+ *
+ */
+
+SWCompress::SWCompress()
+{
+ buf = zbuf = 0;
+ Init();
+}
+
+
+/******************************************************************************
+ * SWCompress Destructor - Cleans up instance of SWCompress
+ */
+
+SWCompress::~SWCompress()
+{
+ if (zbuf)
+ free(zbuf);
+
+ if (buf)
+ free(buf);
+}
+
+
+void SWCompress::Init()
+{
+ if (buf)
+ free(buf);
+
+ if (zbuf)
+ free(zbuf);
+
+ buf = 0;
+ zbuf = 0;
+ direct = 0;
+ zlen = 0;
+ slen = 0;
+ zpos = 0;
+ pos = 0;
+}
+
+
+char *SWCompress::Buf(const char *ibuf, unsigned long *len) {
+ // setting an uncompressed buffer
+ if (ibuf) {
+ Init();
+ slen = (len) ? *len : strlen(ibuf);
+ buf = (char *) calloc(slen + 1, 1);
+ memcpy(buf, ibuf, slen);
+ }
+
+ // getting an uncompressed buffer
+ if (!buf) {
+ buf = (char *)calloc(1,1); // be sure we at least allocate an empty buf for return;
+ direct = 1;
+ Decode();
+// slen = strlen(buf);
+ if (len)
+ *len = slen;
+ }
+ return buf;
+}
+
+
+char *SWCompress::zBuf(unsigned long *len, char *ibuf)
+{
+ // setting a compressed buffer
+ if (ibuf) {
+ Init();
+ zbuf = (char *) malloc(*len);
+ memcpy(zbuf, ibuf, *len);
+ zlen = *len;
+ }
+
+ // getting a compressed buffer
+ if (!zbuf) {
+ direct = 0;
+ Encode();
+ }
+
+ *len = zlen;
+ return zbuf;
+}
+
+
+unsigned long SWCompress::GetChars(char *ibuf, unsigned long len)
+{
+ if (direct) {
+ len = (((zlen - zpos) > (unsigned)len) ? len : zlen - zpos);
+ if (len > 0) {
+ memmove(ibuf, &zbuf[zpos], len);
+ zpos += len;
+ }
+ }
+ else {
+// slen = strlen(buf);
+ len = (((slen - pos) > (unsigned)len) ? len : slen - pos);
+ if (len > 0) {
+ memmove(ibuf, &buf[pos], len);
+ pos += len;
+ }
+ }
+ return len;
+}
+
+
+unsigned long SWCompress::SendChars(char *ibuf, unsigned long len)
+{
+ if (direct) {
+ if (buf) {
+// slen = strlen(buf);
+ if ((pos + len) > (unsigned)slen) {
+ buf = (char *) realloc(buf, pos + len + 1024);
+ memset(&buf[pos], 0, len + 1024);
+ }
+ }
+ else buf = (char *)calloc(1, len + 1024);
+ memmove(&buf[pos], ibuf, len);
+ pos += len;
+ }
+ else {
+ if (zbuf) {
+ if ((zpos + len) > zlen) {
+ zbuf = (char *) realloc(zbuf, zpos + len + 1024);
+ zlen = zpos + len + 1024;
+ }
+ }
+ else {
+ zbuf = (char *)calloc(1, len + 1024);
+ zlen = len + 1024;
+ }
+ memmove(&zbuf[zpos], ibuf, len);
+ zpos += len;
+ }
+ return len;
+}
+
+
+/******************************************************************************
+ * SWCompress::Encode - This function "encodes" the input stream into the
+ * output stream.
+ * The GetChars() and SendChars() functions are
+ * used to separate this method from the actual
+ * i/o.
+ */
+
+void SWCompress::Encode(void)
+{
+ cycleStream();
+}
+
+
+/******************************************************************************
+ * SWCompress::Decode - This function "decodes" the input stream into the
+ * output stream.
+ * The GetChars() and SendChars() functions are
+ * used to separate this method from the actual
+ * i/o.
+ */
+
+void SWCompress::Decode(void)
+{
+ cycleStream();
+}
+
+
+void SWCompress::cycleStream() {
+ char buf[1024];
+ unsigned long len, totlen = 0;
+
+ do {
+ len = GetChars(buf, 1024);
+ if (len)
+ totlen += SendChars(buf, len);
+ } while (len == 1024);
+
+ zlen = slen = totlen;
+}
+
+SWORD_NAMESPACE_END
diff --git a/src/modules/common/swcomprs.doc b/src/modules/common/swcomprs.doc
new file mode 100644
index 0000000..b6817f2
--- /dev/null
+++ b/src/modules/common/swcomprs.doc
@@ -0,0 +1,802 @@
+The following is the original information send from Parson's Technologies via
+Craig Rairden.
+_______________________________________________________________________________
+Compression Info, 10-11-95
+Jeff Wheeler
+
+Source of Algorithm
+-------------------
+
+The compression algorithms used here are based upon the algorithms developed
+and published by Haruhiko Okumura in a paper entitled "Data Compression
+Algorithms of LARC and LHarc." This paper discusses three compression
+algorithms, LSZZ, LZARI, and LZHUF. LZSS is described as the "first" of
+these, and is described as providing moderate compression with good speed.
+LZARI is described as an improved LZSS, a combination of the LZSS algorithm
+with adaptive arithmetic compression. It is described as being slower than
+LZSS but with better compression. LZHUF (the basis of the common LHA
+compression program) was included in the paper, however, a free usage license
+was not included.
+
+The following are copies of the statements included at the beginning of each
+source code listing that was supplied in the working paper.
+
+ LZSS, dated 4/6/89, marked as "Use, distribute and
+ modify this program freely."
+
+ LZARI, dated 4/7/89, marked as "Use, distribute and
+ modify this program freely."
+
+ LZHUF, dated 11/20/88, written by Haruyasu Yoshizaki,
+ translated by Haruhiko Okumura on 4/7/89. Not
+ expressly marked as redistributable or modifiable.
+
+Since both LZSS and LZARI are marked as "use, distribute and modify freely" we
+have felt at liberty basing our compression algorithm on either of these.
+
+Selection of Algorithm
+----------------------
+
+Working samples of three possible compression algorithms are supplied in
+Okumura's paper. Which should be used?
+
+LZSS is the fastest at decompression, but does not generated as small a
+compressed file as the other methods. The other two methods provided, perhaps,
+a 15% improvement in compression. Or, put another way, on a 100K file, LZSS
+might compress it to 50K while the others might approach 40-45K. For STEP
+purposes, it was decided that decoding speed was of more importance than
+tighter compression. For these reasons, the first compression algorithm
+implemented is the LZSS algorithm.
+
+About LZSS Encoding
+-------------------
+
+(adapted from Haruhiko Okumura's paper)
+
+This scheme was proposed by Ziv and Lempel [1]. A slightly modified version
+is described by Storer and Szymanski [2]. An implementation using a binary
+tree has been proposed by Bell [3].
+
+The algorithm is quite simple.
+1. Keep a ring buffer which initially contains all space characters.
+2. Read several letters from the file to the buffer.
+3. Search the buffer for the longest string that matches the letters just
+ read, and send its length and position into the buffer.
+
+If the ring buffer is 4096 bytes, the position can be stored in 12 bits. If the
+length is represented in 4 bits, the <position, length> pair is two bytes
+long. If the longest match is no more than two characters, then just one
+character is sent without encoding. The process starts again with the next
+character. An extra bit is sent each time to tell the decoder whether the
+next item is a character of a <position, length> pair.
+
+[1] J. Ziv and A. Lempel, IEEE Transactions IT-23, 337-343 (1977).
+[2] J. A. Storer and T. G. Szymanski, J. ACM, 29, 928-951 (1982).
+[3] T.C. Gell, IEEE Transactions COM-34, 1176-1182 (1986).
+
+class SWCompress {
+public:
+void InitTree( // no return value
+ void); // no parameters
+
+void InsertNode( // no return value
+ short int Pos); // position in the buffer
+
+void DeleteNode( // no return value
+ short int Node); // node to be removed
+
+void Encode( // no return value
+ void); // no parameters
+
+void Decode( // no return value
+ void); // no parameters
+};
+
+// The following are constant sizes used by the compression algorithm.
+//
+// N - This is the size of the ring buffer. It is set
+// to 4K. It is important to note that a position
+// within the ring buffer requires 12 bits.
+//
+// F - This is the maximum length of a character sequence
+// that can be taken from the ring buffer. It is set
+// to 18. Note that a length must be 3 before it is
+// worthwhile to store a position/length pair, so the
+// length can be encoded in only 4 bits. Or, put yet
+// another way, it is not necessary to encode a length
+// of 0-18, it is necessary to encode a length of
+// 3-18, which requires 4 bits.
+//
+// THRESHOLD - It takes 2 bytes to store an offset and
+// a length. If a character sequence only
+// requires 1 or 2 characters to store
+// uncompressed, then it is better to store
+// it uncompressed than as an offset into
+// the ring buffer.
+//
+// Note that the 12 bits used to store the position and the 4 bits
+// used to store the length equal a total of 16 bits, or 2 bytes.
+
+#define N 4096
+#define F 18
+#define THRESHOLD 3
+#define NOT_USED N
+
+// m_ring_buffer is a text buffer. It contains "nodes" of
+// uncompressed text that can be indexed by position. That is,
+// a substring of the ring buffer can be indexed by a position
+// and a length. When decoding, the compressed text may contain
+// a position in the ring buffer and a count of the number of
+// bytes from the ring buffer that are to be moved into the
+// uncompressed buffer.
+//
+// This ring buffer is not maintained as part of the compressed
+// text. Instead, it is reconstructed dynamically. That is,
+// it starts out empty and gets built as the text is decompressed.
+//
+// The ring buffer contain N bytes, with an additional F - 1 bytes
+// to facilitate string comparison.
+
+unsigned char m_ring_buffer[N + F - 1];
+
+// m_match_position and m_match_length are set by InsertNode().
+//
+// These variables indicate the position in the ring buffer
+// and the number of characters at that position that match
+// a given string.
+
+short int m_match_position;
+short int m_match_length;
+
+// m_lson, m_rson, and m_dad are the Japanese way of referring to
+// a tree structure. The dad is the parent and it has a right and
+// left son (child).
+//
+// For i = 0 to N-1, m_rson[i] and m_lson[i] will be the right
+// and left children of node i.
+//
+// For i = 0 to N-1, m_dad[i] is the parent of node i.
+//
+// For i = 0 to 255, rson[N + i + 1] is the root of the tree for
+// strings that begin with the character i. Note that this requires
+// one byte characters.
+//
+// These nodes store values of 0...(N-1). Memory requirements
+// can be reduces by using 2-byte integers instead of full 4-byte
+// integers (for 32-bit applications). Therefore, these are
+// defined as "short ints."
+
+short int m_lson[N + 1];
+short int m_rson[N + 257];
+short int m_dad[N + 1];
+
+
+
+
+/*
+ -------------------------------------------------------------------------
+ cLZSS::InitTree
+
+ This function initializes the tree nodes to "empty" states.
+ -------------------------------------------------------------------------
+*/
+
+void cLZSS::InitTree( // no return value
+ void) // no parameters
+ throw() // exception list
+
+ {
+ int i;
+
+ // For i = 0 to N - 1, m_rson[i] and m_lson[i] will be the right
+ // and left children of node i. These nodes need not be
+ // initialized. However, for debugging purposes, it is nice to
+ // have them initialized. Since this is only used for compression
+ // (not decompression), I don't mind spending the time to do it.
+ //
+ // For the same range of i, m_dad[i] is the parent of node i.
+ // These are initialized to a known value that can represent
+ // a "not used" state.
+
+ for (i = 0; i < N; i++)
+ {
+ m_lson[i] = NOT_USED;
+ m_rson[i] = NOT_USED;
+ m_dad[i] = NOT_USED;
+ }
+
+ // For i = 0 to 255, m_rson[N + i + 1] is the root of the tree
+ // for strings that begin with the character i. This is why
+ // the right child array is larger than the left child array.
+ // These are also initialzied to a "not used" state.
+ //
+ // Note that there are 256 of these, one for each of the possible
+ // 256 characters.
+
+ for (i = N + 1; i <= (N + 256); i++)
+ {
+ m_rson[i] = NOT_USED;
+ }
+
+ // Done.
+ }
+
+/*
+ -------------------------------------------------------------------------
+ cLZSS::InsertNode
+
+ This function inserts a string from the ring buffer into one of
+ the trees. It loads the match position and length member variables
+ for the longest match.
+
+ The string to be inserted is identified by the parameter Pos,
+ A full F bytes are inserted. So, m_ring_buffer[Pos ... Pos+F-1]
+ are inserted.
+
+ If the matched length is exactly F, then an old node is removed
+ in favor of the new one (because the old one will be deleted
+ sooner).
+
+ Note that Pos plays a dual role. It is used as both a position
+ in the ring buffer and also as a tree node. m_ring_buffer[Pos]
+ defines a character that is used to identify a tree node.
+ -------------------------------------------------------------------------
+*/
+
+void cLZSS::InsertNode( // no return value
+ short int Pos) // position in the buffer
+ throw() // exception list
+
+ {
+ short int i;
+ short int p;
+ int cmp;
+ unsigned char * key;
+
+ ASSERT(Pos >= 0);
+ ASSERT(Pos < N);
+
+ cmp = 1;
+ key = &(m_ring_buffer[Pos]);
+
+ // The last 256 entries in m_rson contain the root nodes for
+ // strings that begin with a letter. Get an index for the
+ // first letter in this string.
+
+ p = (short int) (N + 1 + key[0]);
+
+ // Set the left and right tree nodes for this position to "not
+ // used."
+
+ m_lson[Pos] = NOT_USED;
+ m_rson[Pos] = NOT_USED;
+
+ // Haven't matched anything yet.
+
+ m_match_length = 0;
+
+ for ( ; ; )
+ {
+ if (cmp >= 0)
+ {
+ if (m_rson[p] != NOT_USED)
+ {
+ p = m_rson[p];
+ }
+ else
+ {
+ m_rson[p] = Pos;
+ m_dad[Pos] = p;
+ return;
+ }
+ }
+ else
+ {
+ if (m_lson[p] != NOT_USED)
+ {
+ p = m_lson[p];
+ }
+ else
+ {
+ m_lson[p] = Pos;
+ m_dad[Pos] = p;
+ return;
+ }
+ }
+
+ // Should we go to the right or the left to look for the
+ // next match?
+
+ for (i = 1; i < F; i++)
+ {
+ cmp = key[i] - m_ring_buffer[p + i];
+ if (cmp != 0)
+ break;
+ }
+
+ if (i > m_match_length)
+ {
+ m_match_position = p;
+ m_match_length = i;
+
+ if (i >= F)
+ break;
+ }
+ }
+
+ m_dad[Pos] = m_dad[p];
+ m_lson[Pos] = m_lson[p];
+ m_rson[Pos] = m_rson[p];
+
+ m_dad[ m_lson[p] ] = Pos;
+ m_dad[ m_rson[p] ] = Pos;
+
+ if (m_rson[ m_dad[p] ] == p)
+ {
+ m_rson[ m_dad[p] ] = Pos;
+ }
+ else
+ {
+ m_lson[ m_dad[p] ] = Pos;
+ }
+
+ // Remove "p"
+
+ m_dad[p] = NOT_USED;
+ }
+
+/*
+ -------------------------------------------------------------------------
+ cLZSS::DeleteNode
+
+ This function removes the node "Node" from the tree.
+ -------------------------------------------------------------------------
+*/
+
+void cLZSS::DeleteNode( // no return value
+ short int Node) // node to be removed
+ throw() // exception list
+
+ {
+ short int q;
+
+ ASSERT(Node >= 0);
+ ASSERT(Node < (N+1));
+
+ if (m_dad[Node] == NOT_USED)
+ {
+ // not in tree, nothing to do
+ return;
+ }
+
+ if (m_rson[Node] == NOT_USED)
+ {
+ q = m_lson[Node];
+ }
+ else if (m_lson[Node] == NOT_USED)
+ {
+ q = m_rson[Node];
+ }
+ else
+ {
+ q = m_lson[Node];
+ if (m_rson[q] != NOT_USED)
+ {
+ do
+ {
+ q = m_rson[q];
+ }
+ while (m_rson[q] != NOT_USED);
+
+ m_rson[ m_dad[q] ] = m_lson[q];
+ m_dad[ m_lson[q] ] = m_dad[q];
+ m_lson[q] = m_lson[Node];
+ m_dad[ m_lson[Node] ] = q;
+ }
+
+ m_rson[q] = m_rson[Node];
+ m_dad[ m_rson[Node] ] = q;
+ }
+
+ m_dad[q] = m_dad[Node];
+
+ if (m_rson[ m_dad[Node] ] == Node)
+ {
+ m_rson[ m_dad[Node] ] = q;
+ }
+ else
+ {
+ m_lson[ m_dad[Node] ] = q;
+ }
+
+ m_dad[Node] = NOT_USED;
+ }
+
+/*
+ -------------------------------------------------------------------------
+ cLZSS::Encode
+
+ This function "encodes" the input stream into the output stream.
+ The GetChars() and SendChars() functions are used to separate
+ this method from the actual i/o.
+ -------------------------------------------------------------------------
+*/
+
+void cLZSS::Encode( // no return value
+ void) // no parameters
+
+ {
+ short int i; // an iterator
+ short int r; // node number in the binary tree
+ short int s; // position in the ring buffer
+ unsigned short int len; // len of initial string
+ short int last_match_length; // length of last match
+ short int code_buf_pos; // position in the output buffer
+ unsigned char code_buf[17]; // the output buffer
+ unsigned char mask; // bit mask for byte 0 of out buf
+ unsigned char c; // character read from string
+
+ // Start with a clean tree.
+
+ InitTree();
+
+ // code_buf[0] works as eight flags. A "1" represents that the
+ // unit is an unencoded letter (1 byte), and a "0" represents
+ // that the next unit is a <position,length> pair (2 bytes).
+ //
+ // code_buf[1..16] stores eight units of code. Since the best
+ // we can do is store eight <position,length> pairs, at most 16
+ // bytes are needed to store this.
+ //
+ // This is why the maximum size of the code buffer is 17 bytes.
+
+ code_buf[0] = 0;
+ code_buf_pos = 1;
+
+ // Mask iterates over the 8 bits in the code buffer. The first
+ // character ends up being stored in the low bit.
+ //
+ // bit 8 7 6 5 4 3 2 1
+ // | |
+ // | first sequence in code buffer
+ // |
+ // last sequence in code buffer
+
+ mask = 1;
+
+ s = 0;
+ r = (short int) N - (short int) F;
+
+ // Initialize the ring buffer with spaces...
+
+ // Note that the last F bytes of the ring buffer are not filled.
+ // This is because those F bytes will be filled in immediately
+ // with bytes from the input stream.
+
+ memset(m_ring_buffer, ' ', N - F);
+
+ // Read F bytes into the last F bytes of the ring buffer.
+ //
+ // This function loads the buffer with X characters and returns
+ // the actual amount loaded.
+
+ len = GetChars(&(m_ring_buffer[r]), F);
+
+ // Make sure there is something to be compressed.
+
+ if (len == 0)
+ return;
+
+ // Insert the F strings, each of which begins with one or more
+ // 'space' characters. Note the order in which these strings
+ // are inserted. This way, degenerate trees will be less likely
+ // to occur.
+
+ for (i = 1; i <= F; i++)
+ {
+ InsertNode((short int) (r - i));
+ }
+
+ // Finally, insert the whole string just read. The
+ // member variables match_length and match_position are set.
+
+ InsertNode(r);
+
+ // Now that we're preloaded, continue till done.
+
+ do
+ {
+
+ // m_match_length may be spuriously long near the end of
+ // text.
+
+ if (m_match_length > len)
+ {
+ m_match_length = len;
+ }
+
+ // Is it cheaper to store this as a single character? If so,
+ // make it so.
+
+ if (m_match_length < THRESHOLD)
+ {
+ // Send one character. Remember that code_buf[0] is the
+ // set of flags for the next eight items.
+
+ m_match_length = 1;
+ code_buf[0] |= mask;
+ code_buf[code_buf_pos++] = m_ring_buffer[r];
+ }
+
+ // Otherwise, we do indeed have a string that can be stored
+ // compressed to save space.
+
+ else
+ {
+ // The next 16 bits need to contain the position (12 bits)
+ // and the length (4 bits).
+
+ code_buf[code_buf_pos++] = (unsigned char) m_match_position;
+ code_buf[code_buf_pos++] = (unsigned char) (
+ ((m_match_position >> 4) & 0xf0) |
+ (m_match_length - THRESHOLD) );
+ }
+
+ // Shift the mask one bit to the left so that it will be ready
+ // to store the new bit.
+
+ mask = (unsigned char) (mask << 1);
+
+ // If the mask is now 0, then we know that we have a full set
+ // of flags and items in the code buffer. These need to be
+ // output.
+
+ if (mask == 0)
+ {
+ // code_buf is the buffer of characters to be output.
+ // code_buf_pos is the number of characters it contains.
+
+ SendChars(code_buf, code_buf_pos);
+
+ // Reset for next buffer...
+
+ code_buf[0] = 0;
+ code_buf_pos = 1;
+ mask = 1;
+ }
+
+ last_match_length = m_match_length;
+
+ // Delete old strings and read new bytes...
+
+ for (i = 0; i < last_match_length; i++)
+ {
+
+ // Get next character...
+
+ if (GetChars(&c, 1) != 1)
+ break;
+
+ // Delete "old strings"
+
+ DeleteNode(s);
+
+ // Put this character into the ring buffer.
+ //
+ // The original comment here says "If the position is near
+ // the end of the buffer, extend the buffer to make
+ // string comparison easier."
+ //
+ // That's a little misleading, because the "end" of the
+ // buffer is really what we consider to be the "beginning"
+ // of the buffer, that is, positions 0 through F.
+ //
+ // The idea is that the front end of the buffer is duplicated
+ // into the back end so that when you're looking at characters
+ // at the back end of the buffer, you can index ahead (beyond
+ // the normal end of the buffer) and see the characters
+ // that are at the front end of the buffer wihtout having
+ // to adjust the index.
+ //
+ // That is...
+ //
+ // 1234xxxxxxxxxxxxxxxxxxxxxxxxxxxxx1234
+ // | | |
+ // position 0 end of buffer |
+ // |
+ // duplicate of front of buffer
+
+ m_ring_buffer[s] = c;
+
+ if (s < F - 1)
+ {
+ m_ring_buffer[s + N] = c;
+ }
+
+ // Increment the position, and wrap around when we're at
+ // the end. Note that this relies on N being a power of 2.
+
+ s = (short int) ( (s + 1) & (N - 1) );
+ r = (short int) ( (r + 1) & (N - 1) );
+
+ // Register the string that is found in
+ // m_ring_buffer[r..r+F-1].
+
+ InsertNode(r);
+ }
+
+ // If we didn't quit because we hit the last_match_length,
+ // then we must have quit because we ran out of characters
+ // to process.
+
+ while (i++ < last_match_length)
+ {
+ DeleteNode(s);
+
+ s = (short int) ( (s + 1) & (N - 1) );
+ r = (short int) ( (r + 1) & (N - 1) );
+
+ // Note that len hitting 0 is the key that causes the
+ // do...while() to terminate. This is the only place
+ // within the loop that len is modified.
+ //
+ // Its original value is F (or a number less than F for
+ // short strings).
+
+ if (--len)
+ {
+ InsertNode(r); /* buffer may not be empty. */
+ }
+ }
+
+ // End of do...while() loop. Continue processing until there
+ // are no more characters to be compressed. The variable
+ // "len" is used to signal this condition.
+ }
+ while (len > 0);
+
+ // There could still be something in the output buffer. Send it
+ // now.
+
+ if (code_buf_pos > 1)
+ {
+ // code_buf is the encoded string to send.
+ // code_buf_ptr is the number of characters.
+
+ SendChars(code_buf, code_buf_pos);
+ }
+
+ // Done!
+ }
+
+/*
+ -------------------------------------------------------------------------
+ cLZSS::Decode
+
+ This function "decodes" the input stream into the output stream.
+ The GetChars() and SendChars() functions are used to separate
+ this method from the actual i/o.
+ -------------------------------------------------------------------------
+*/
+
+void cLZSS::Decode( // no return value
+ void) // no parameters
+
+ {
+ int k;
+ int r; // node number
+ unsigned char c[F]; // an array of chars
+ unsigned char flags; // 8 bits of flags
+ int flag_count; // which flag we're on
+ short int pos; // position in the ring buffer
+ short int len; // number of chars in ring buffer
+
+ // Initialize the ring buffer with a common string.
+ //
+ // Note that the last F bytes of the ring buffer are not filled.
+
+ memset(m_ring_buffer, ' ', N - F);
+
+ r = N - F;
+
+ flags = (char) 0;
+ flag_count = 0;
+
+ for ( ; ; )
+ {
+
+ // If there are more bits of interest in this flag, then
+ // shift that next interesting bit into the 1's position.
+ //
+ // If this flag has been exhausted, the next byte must
+ // be a flag.
+
+ if (flag_count > 0)
+ {
+ flags = (unsigned char) (flags >> 1);
+ flag_count--;
+ }
+ else
+ {
+ // Next byte must be a flag.
+
+ if (GetChars(&flags, 1) != 1)
+ break;
+
+ // Set the flag counter. While at first it might appear
+ // that this should be an 8 since there are 8 bits in the
+ // flag, it should really be a 7 because the shift must
+ // be performed 7 times in order to see all 8 bits.
+
+ flag_count = 7;
+ }
+
+ // If the low order bit of the flag is now set, then we know
+ // that the next byte is a single, unencoded character.
+
+ if (flags & 1)
+ {
+ if (GetChars(c, 1) != 1)
+ break;
+
+ if (SendChars(c, 1) != 1)
+ break;
+
+ // Add to buffer, and increment to next spot. Wrap at end.
+
+ m_ring_buffer[r] = c[0];
+ r = (short int) ( (r + 1) & (N - 1) );
+ }
+
+ // Otherwise, we know that the next two bytes are a
+ // <position,length> pair. The position is in 12 bits and
+ // the length is in 4 bits.
+
+ else
+ {
+ // Original code:
+ // if ((i = getc(infile)) == EOF)
+ // break;
+ // if ((j = getc(infile)) == EOF)
+ // break;
+ // i |= ((j & 0xf0) << 4);
+ // j = (j & 0x0f) + THRESHOLD;
+ //
+ // I've modified this to only make one input call, and
+ // have changed the variable names to something more
+ // obvious.
+
+ if (GetChars(c, 2) != 2)
+ break;
+
+ // Convert these two characters into the position and
+ // length. Note that the length is always at least
+ // THRESHOLD, which is why we're able to get a length
+ // of 18 out of only 4 bits.
+
+ pos = (short int) ( c[0] | ((c[1] & 0xf0) << 4) );
+
+ len = (short int) ( (c[1] & 0x0f) + THRESHOLD );
+
+ // There are now "len" characters at position "pos" in
+ // the ring buffer that can be pulled out. Note that
+ // len is never more than F.
+
+ for (k = 0; k < len; k++)
+ {
+ c[k] = m_ring_buffer[(pos + k) & (N - 1)];
+
+ // Add to buffer, and increment to next spot. Wrap at end.
+
+ m_ring_buffer[r] = c[k];
+ r = (short int) ( (r + 1) & (N - 1) );
+ }
+
+ // Add the "len" characters to the output stream.
+
+ if (SendChars(c, len) != len)
+ break;
+ }
+ }
+ }
+
diff --git a/src/modules/common/zipcomprs.cpp b/src/modules/common/zipcomprs.cpp
new file mode 100644
index 0000000..21726bf
--- /dev/null
+++ b/src/modules/common/zipcomprs.cpp
@@ -0,0 +1,164 @@
+/******************************************************************************
+ * swcomprs.cpp - code for class 'ZipCompress'- a driver class that provides
+ * compression utilities. - using zlib
+ */
+
+#include <stdlib.h>
+#include <string.h>
+#include <stdio.h>
+#include <zipcomprs.h>
+#include <zlib.h>
+
+SWORD_NAMESPACE_START
+
+/******************************************************************************
+ * ZipCompress Constructor - Initializes data for instance of ZipCompress
+ *
+ */
+
+ZipCompress::ZipCompress() : SWCompress()
+{
+// fprintf(stderr, "init compress\n");
+}
+
+
+/******************************************************************************
+ * ZipCompress Destructor - Cleans up instance of ZipCompress
+ */
+
+ZipCompress::~ZipCompress() {
+}
+
+
+/******************************************************************************
+ * ZipCompress::Encode - This function "encodes" the input stream into the
+ * output stream.
+ * The GetChars() and SendChars() functions are
+ * used to separate this method from the actual
+ * i/o.
+ * NOTE: must set zlen for parent class to know length of
+ * compressed buffer.
+ */
+
+void ZipCompress::Encode(void)
+{
+/*
+ZEXTERN int ZEXPORT compress OF((Bytef *dest, uLongf *destLen,
+ const Bytef *source, uLong sourceLen));
+ Compresses the source buffer into the destination buffer. sourceLen is
+ the byte length of the source buffer. Upon entry, destLen is the total
+ size of the destination buffer, which must be at least 0.1% larger than
+ sourceLen plus 12 bytes. Upon exit, destLen is the actual size of the
+ compressed buffer.
+ This function can be used to compress a whole file at once if the
+ input file is mmap'ed.
+ compress returns Z_OK if success, Z_MEM_ERROR if there was not
+ enough memory, Z_BUF_ERROR if there was not enough room in the output
+ buffer.
+*/
+ direct = 0; // set direction needed by parent [Get|Send]Chars()
+
+ // get buffer
+ char chunk[1024];
+ char *buf = (char *)calloc(1, 1024);
+ char *chunkbuf = buf;
+ unsigned long chunklen;
+ unsigned long len = 0;
+ while((chunklen = GetChars(chunk, 1023))) {
+ memcpy(chunkbuf, chunk, chunklen);
+ len += chunklen;
+ if (chunklen < 1023)
+ break;
+ else buf = (char *)realloc(buf, len + 1024);
+ chunkbuf = buf+len;
+ }
+
+
+ zlen = (long) (len*1.001)+15;
+ char *zbuf = new char[zlen+1];
+ if (len)
+ {
+ //printf("Doing compress\n");
+ if (compress((Bytef*)zbuf, &zlen, (const Bytef*)buf, len) != Z_OK)
+ {
+ printf("ERROR in compression\n");
+ }
+ else {
+ SendChars(zbuf, zlen);
+ }
+ }
+ else
+ {
+ fprintf(stderr, "ERROR: no buffer to compress\n");
+ }
+ delete [] zbuf;
+ free (buf);
+}
+
+
+/******************************************************************************
+ * ZipCompress::Decode - This function "decodes" the input stream into the
+ * output stream.
+ * The GetChars() and SendChars() functions are
+ * used to separate this method from the actual
+ * i/o.
+ */
+
+void ZipCompress::Decode(void)
+{
+/*
+ZEXTERN int ZEXPORT uncompress OF((Bytef *dest, uLongf *destLen,
+ const Bytef *source, uLong sourceLen));
+ Decompresses the source buffer into the destination buffer. sourceLen is
+ the byte length of the source buffer. Upon entry, destLen is the total
+ size of the destination buffer, which must be large enough to hold the
+ entire uncompressed data. (The size of the uncompressed data must have
+ been saved previously by the compressor and transmitted to the decompressor
+ by some mechanism outside the scope of this compression library.)
+ Upon exit, destLen is the actual size of the compressed buffer.
+ This function can be used to decompress a whole file at once if the
+ input file is mmap'ed.
+
+ uncompress returns Z_OK if success, Z_MEM_ERROR if there was not
+ enough memory, Z_BUF_ERROR if there was not enough room in the output
+ buffer, or Z_DATA_ERROR if the input data was corrupted.
+*/
+
+ // get buffer
+ char chunk[1024];
+ char *zbuf = (char *)calloc(1, 1024);
+ char *chunkbuf = zbuf;
+ int chunklen;
+ unsigned long zlen = 0;
+ while((chunklen = GetChars(chunk, 1023))) {
+ memcpy(chunkbuf, chunk, chunklen);
+ zlen += chunklen;
+ if (chunklen < 1023)
+ break;
+ else zbuf = (char *)realloc(zbuf, zlen + 1024);
+ chunkbuf = zbuf + zlen;
+ }
+
+ //printf("Decoding complength{%ld} uncomp{%ld}\n", zlen, blen);
+ if (zlen) {
+ unsigned long blen = zlen*20; // trust compression is less than 1000%
+ char *buf = new char[blen];
+ //printf("Doing decompress {%s}\n", zbuf);
+ slen = 0;
+ switch (uncompress((Bytef*)buf, &blen, (Bytef*)zbuf, zlen)){
+ case Z_OK: SendChars(buf, blen); slen = blen; break;
+ case Z_MEM_ERROR: fprintf(stderr, "ERROR: not enough memory during decompression.\n"); break;
+ case Z_BUF_ERROR: fprintf(stderr, "ERROR: not enough room in the out buffer during decompression.\n"); break;
+ case Z_DATA_ERROR: fprintf(stderr, "ERROR: corrupt data during decompression.\n"); break;
+ default: fprintf(stderr, "ERROR: an unknown error occured during decompression.\n"); break;
+ }
+ delete [] buf;
+ }
+ else {
+ fprintf(stderr, "ERROR: no buffer to decompress!\n");
+ }
+ //printf("Finished decoding\n");
+ free (zbuf);
+}
+
+SWORD_NAMESPACE_END
diff --git a/src/modules/common/zstr.cpp b/src/modules/common/zstr.cpp
new file mode 100644
index 0000000..2539ff0
--- /dev/null
+++ b/src/modules/common/zstr.cpp
@@ -0,0 +1,731 @@
+/******************************************************************************
+ * zstr.cpp - code for class 'zStr'- a module that reads compressed text
+ * files and provides lookup and parsing functions based on
+ * class StrKey
+ */
+
+#include <stdio.h>
+#include <fcntl.h>
+#include <errno.h>
+
+#include <stdlib.h>
+#include <utilstr.h>
+#include <zstr.h>
+#include <swcomprs.h>
+
+#include <sysdata.h>
+#include <entriesblk.h>
+#include <swlog.h>
+#include <stringmgr.h>
+#include <filemgr.h>
+#include <swbuf.h>
+
+SWORD_NAMESPACE_START
+
+/******************************************************************************
+ * zStr Statics
+ */
+
+int zStr::instance = 0;
+const int zStr::IDXENTRYSIZE = 8;
+const int zStr::ZDXENTRYSIZE = 8;
+
+
+/******************************************************************************
+ * zStr Constructor - Initializes data for instance of zStr
+ *
+ * ENT: ipath - path of the directory where data and index files are located.
+ */
+
+zStr::zStr(const char *ipath, int fileMode, long blockCount, SWCompress *icomp)
+{
+ SWBuf buf;
+
+ nl = '\n';
+ lastoff = -1;
+ path = 0;
+ stdstr(&path, ipath);
+
+ compressor = (icomp) ? icomp : new SWCompress();
+ this->blockCount = blockCount;
+
+ if (fileMode == -1) { // try read/write if possible
+ fileMode = FileMgr::RDWR;
+ }
+
+ buf.setFormatted("%s.idx", path);
+ idxfd = FileMgr::getSystemFileMgr()->open(buf, fileMode, true);
+
+ buf.setFormatted("%s.dat", path);
+ datfd = FileMgr::getSystemFileMgr()->open(buf, fileMode, true);
+
+ buf.setFormatted("%s.zdx", path);
+ zdxfd = FileMgr::getSystemFileMgr()->open(buf, fileMode, true);
+
+ buf.setFormatted("%s.zdt", path);
+ zdtfd = FileMgr::getSystemFileMgr()->open(buf, fileMode, true);
+
+ if (datfd <= 0) {
+ SWLog::getSystemLog()->logError("%d", errno);
+ }
+
+ cacheBlock = 0;
+ cacheBlockIndex = -1;
+ cacheDirty = false;
+
+ instance++;
+}
+
+
+/******************************************************************************
+ * zStr Destructor - Cleans up instance of zStr
+ */
+
+zStr::~zStr() {
+
+ flushCache();
+
+ if (path)
+ delete [] path;
+
+ --instance;
+
+ FileMgr::getSystemFileMgr()->close(idxfd);
+ FileMgr::getSystemFileMgr()->close(datfd);
+ FileMgr::getSystemFileMgr()->close(zdxfd);
+ FileMgr::getSystemFileMgr()->close(zdtfd);
+
+
+ if (compressor)
+ delete compressor;
+
+}
+
+
+/******************************************************************************
+ * zStr::getidxbufdat - Gets the index string at the given dat offset
+ * NOTE: buf is calloc'd, or if not null, realloc'd and must
+ * be free'd by calling function
+ *
+ * ENT: ioffset - offset in dat file to lookup
+ * buf - address of pointer to allocate for storage of string
+ */
+
+void zStr::getKeyFromDatOffset(long ioffset, char **buf) {
+ int size;
+ char ch;
+ if (datfd > 0) {
+ datfd->seek(ioffset, SEEK_SET);
+ for (size = 0; datfd->read(&ch, 1) == 1; size++) {
+ if ((ch == '\\') || (ch == 10) || (ch == 13))
+ break;
+ }
+ *buf = (*buf) ? (char *)realloc(*buf, size*2 + 1) : (char *)malloc(size*2 + 1);
+ if (size) {
+ datfd->seek(ioffset, SEEK_SET);
+ datfd->read(*buf, size);
+ }
+ (*buf)[size] = 0;
+ toupperstr_utf8(*buf, size*2);
+ }
+ else {
+ *buf = (*buf) ? (char *)realloc(*buf, 1) : (char *)malloc(1);
+ **buf = 0;
+ }
+}
+
+
+/******************************************************************************
+ * zStr::getidxbuf - Gets the index string at the given idx offset
+ * NOTE: buf is calloc'd, or if not null, realloc'd
+ * and must be freed by calling function
+ *
+ * ENT: ioffset - offset in idx file to lookup
+ * buf - address of pointer to allocate for storage of string
+ */
+
+void zStr::getKeyFromIdxOffset(long ioffset, char **buf) {
+ __u32 offset;
+
+ if (idxfd > 0) {
+ idxfd->seek(ioffset, SEEK_SET);
+ idxfd->read(&offset, sizeof(__u32));
+ offset = swordtoarch32(offset);
+ getKeyFromDatOffset(offset, buf);
+ }
+}
+
+
+/******************************************************************************
+ * zStr::findoffset - Finds the offset of the key string from the indexes
+ *
+ * ENT: key - key string to lookup
+ * offset - address to store the starting offset
+ * size - address to store the size of the entry
+ * away - number of entries before of after to jump
+ * (default = 0)
+ *
+ * RET: error status
+ */
+
+signed char zStr::findKeyIndex(const char *ikey, long *idxoff, long away) {
+ char *maxbuf = 0, *trybuf = 0, *key = 0, quitflag = 0;
+ signed char retval = 0;
+ __s32 headoff, tailoff, tryoff = 0, maxoff = 0;
+ __u32 start, size;
+ int diff = 0;
+
+ if (idxfd->getFd() >= 0) {
+ tailoff = maxoff = idxfd->seek(0, SEEK_END) - IDXENTRYSIZE;
+ if (*ikey) {
+ headoff = 0;
+ stdstr(&key, ikey, 3);
+ toupperstr_utf8(key, strlen(key)*3);
+
+ int keylen = strlen(key);
+ bool substr = false;
+
+ getKeyFromIdxOffset(maxoff, &maxbuf);
+
+ while (headoff < tailoff) {
+ tryoff = (lastoff == -1) ? headoff + (((((tailoff / IDXENTRYSIZE) - (headoff / IDXENTRYSIZE))) / 2) * IDXENTRYSIZE) : lastoff;
+ lastoff = -1;
+
+ getKeyFromIdxOffset(tryoff, &trybuf);
+
+ if (!*trybuf && tryoff) { // In case of extra entry at end of idx (not first entry)
+ tryoff += (tryoff > (maxoff / 2))?-IDXENTRYSIZE:IDXENTRYSIZE;
+ retval = -1;
+ break;
+ }
+
+ diff = strcmp(key, trybuf);
+
+ if (!diff)
+ break;
+
+ if (!strncmp(trybuf, key, keylen)) substr = true;
+
+ if (diff < 0)
+ tailoff = (tryoff == headoff) ? headoff : tryoff;
+ else headoff = tryoff;
+
+ if (tailoff == headoff + IDXENTRYSIZE) {
+ if (quitflag++)
+ headoff = tailoff;
+ }
+ }
+
+ // didn't find exact match
+ if (headoff >= tailoff) {
+ tryoff = headoff;
+ if (!substr && ((tryoff != maxoff)||(strncmp(key, maxbuf, keylen)<0))) {
+ away--; // if our entry doesn't startwith our key, prefer the previous entry over the next
+ }
+ }
+ if (trybuf)
+ free(trybuf);
+ delete [] key;
+ if (maxbuf)
+ free(maxbuf);
+ }
+ else { tryoff = 0; }
+
+ idxfd->seek(tryoff, SEEK_SET);
+
+ start = size = 0;
+ retval = (idxfd->read(&start, sizeof(__u32))==sizeof(__u32)) ? retval : -1;
+ retval = (idxfd->read(&size, sizeof(__u32))==sizeof(__u32)) ? retval : -1;
+ start = swordtoarch32(start);
+ size = swordtoarch32(size);
+
+ if (idxoff)
+ *idxoff = tryoff;
+
+ while (away) {
+ __u32 laststart = start;
+ __u32 lastsize = size;
+ __s32 lasttry = tryoff;
+ tryoff += (away > 0) ? IDXENTRYSIZE : -IDXENTRYSIZE;
+
+ bool bad = false;
+ if (((long)(tryoff + (away*IDXENTRYSIZE)) < -IDXENTRYSIZE) || (tryoff + (away*IDXENTRYSIZE) > (maxoff+IDXENTRYSIZE)))
+ bad = true;
+ else if (idxfd->seek(tryoff, SEEK_SET) < 0)
+ bad = true;
+ if (bad) {
+ retval = -1;
+ start = laststart;
+ size = lastsize;
+ tryoff = lasttry;
+ if (idxoff)
+ *idxoff = tryoff;
+ break;
+ }
+ idxfd->read(&start, sizeof(__u32));
+ idxfd->read(&size, sizeof(__u32));
+ start = swordtoarch32(start);
+ size = swordtoarch32(size);
+
+ if (idxoff)
+ *idxoff = tryoff;
+
+
+ if (((laststart != start) || (lastsize != size)) && (start >= 0) && (size))
+ away += (away < 0) ? 1 : -1;
+ }
+
+ lastoff = tryoff;
+ }
+ else {
+ if (idxoff)
+ *idxoff = 0;
+ retval = -1;
+ }
+ return retval;
+}
+
+
+/******************************************************************************
+ * zStr::preptext - Prepares the text before returning it to external
+ * objects
+ *
+ * ENT: buf - buffer where text is stored and where to store the prep'd
+ * text.
+ */
+
+void zStr::prepText(SWBuf &buf) {
+ unsigned int to, from;
+ char space = 0, cr = 0, realdata = 0, nlcnt = 0;
+ char *rawBuf = buf.getRawData();
+ for (to = from = 0; rawBuf[from]; from++) {
+ switch (rawBuf[from]) {
+ case 10:
+ if (!realdata)
+ continue;
+ space = (cr) ? 0 : 1;
+ cr = 0;
+ nlcnt++;
+ if (nlcnt > 1) {
+// *to++ = nl;
+ rawBuf[to++] = 10;
+// *to++ = nl[1];
+// nlcnt = 0;
+ }
+ continue;
+ case 13:
+ if (!realdata)
+ continue;
+// *to++ = nl[0];
+ rawBuf[to++] = 10;
+ space = 0;
+ cr = 1;
+ continue;
+ }
+ realdata = 1;
+ nlcnt = 0;
+ if (space) {
+ space = 0;
+ if (rawBuf[from] != ' ') {
+ rawBuf[to++] = ' ';
+ from--;
+ continue;
+ }
+ }
+ rawBuf[to++] = rawBuf[from];
+ }
+ buf.setSize(to);
+
+ while (to > 1) { // remove trailing excess
+ to--;
+ if ((rawBuf[to] == 10) || (rawBuf[to] == ' '))
+ buf.setSize(to);
+ else break;
+ }
+}
+
+
+/******************************************************************************
+ * zStr::getText - gets text at a given offset
+ *
+ * ENT:
+ * offset - idxoffset where the key is located.
+ * buf - buffer to store text
+ * idxbuf - buffer to store index key
+ * NOTE: buffer will be alloc'd / realloc'd and
+ * should be free'd by the client
+ *
+ */
+
+void zStr::getText(long offset, char **idxbuf, char **buf) {
+ char *ch;
+ char *idxbuflocal = 0;
+ getKeyFromIdxOffset(offset, &idxbuflocal);
+ __u32 start;
+ __u32 size;
+
+ do {
+ idxfd->seek(offset, SEEK_SET);
+ idxfd->read(&start, sizeof(__u32));
+ idxfd->read(&size, sizeof(__u32));
+ start = swordtoarch32(start);
+ size = swordtoarch32(size);
+
+ *buf = (*buf) ? (char *)realloc(*buf, size*2 + 1) : (char *)malloc(size*2 + 1);
+ *idxbuf = (*idxbuf) ? (char *)realloc(*idxbuf, size*2 + 1) : (char *)malloc(size*2 + 1);
+ memset(*buf, 0, size + 1);
+ memset(*idxbuf, 0, size + 1);
+ datfd->seek(start, SEEK_SET);
+ datfd->read(*buf, (int)(size));
+
+ for (ch = *buf; *ch; ch++) { // skip over index string
+ if (*ch == 10) {
+ ch++;
+ break;
+ }
+ }
+ memmove(*buf, ch, size - (unsigned long)(ch-*buf));
+
+ // resolve link
+ if (!strncmp(*buf, "@LINK", 5)) {
+ for (ch = *buf; *ch; ch++) { // null before nl
+ if (*ch == 10) {
+ *ch = 0;
+ break;
+ }
+ }
+ findKeyIndex(*buf + 6, &offset);
+ }
+ else break;
+ }
+ while (true); // while we're resolving links
+
+ if (idxbuflocal) {
+ __u32 localsize = strlen(idxbuflocal);
+ localsize = (localsize < (size - 1)) ? localsize : (size - 1);
+ strncpy(*idxbuf, idxbuflocal, localsize);
+ (*idxbuf)[localsize] = 0;
+ free(idxbuflocal);
+ }
+ __u32 block = 0;
+ __u32 entry = 0;
+ memmove(&block, *buf, sizeof(__u32));
+ memmove(&entry, *buf + sizeof(__u32), sizeof(__u32));
+ block = swordtoarch32(block);
+ entry = swordtoarch32(entry);
+ getCompressedText(block, entry, buf);
+}
+
+
+/******************************************************************************
+ * zStr::getCompressedText - Get text entry from a compressed index / zdata
+ * file.
+ */
+
+void zStr::getCompressedText(long block, long entry, char **buf) {
+
+ __u32 size = 0;
+
+ if (cacheBlockIndex != block) {
+ __u32 start = 0;
+
+ zdxfd->seek(block * ZDXENTRYSIZE, SEEK_SET);
+ zdxfd->read(&start, sizeof(__u32));
+ zdxfd->read(&size, sizeof(__u32));
+ start = swordtoarch32(start);
+ size = swordtoarch32(size);
+
+ SWBuf buf;
+ buf.setSize(size + 5);
+ zdtfd->seek(start, SEEK_SET);
+ zdtfd->read(buf.getRawData(), size);
+
+ flushCache();
+
+ unsigned long len = size;
+ buf.setSize(size);
+ rawZFilter(buf, 0); // 0 = decipher
+
+ compressor->zBuf(&len, buf.getRawData());
+ char *rawBuf = compressor->Buf(0, &len);
+ cacheBlock = new EntriesBlock(rawBuf, len);
+ cacheBlockIndex = block;
+ }
+ size = cacheBlock->getEntrySize(entry);
+ *buf = (*buf) ? (char *)realloc(*buf, size*2 + 1) : (char *)malloc(size*2 + 1);
+ strcpy(*buf, cacheBlock->getEntry(entry));
+}
+
+
+/******************************************************************************
+ * zLD::settext - Sets text for current offset
+ *
+ * ENT: key - key for this entry
+ * buf - buffer to store
+ * len - length of buffer (0 - null terminated)
+ */
+
+void zStr::setText(const char *ikey, const char *buf, long len) {
+
+ __u32 start, outstart;
+ __u32 size, outsize;
+ __s32 endoff;
+ long idxoff = 0;
+ __s32 shiftSize;
+ static const char nl[] = {13, 10};
+ char *tmpbuf = 0;
+ char *key = 0;
+ char *dbKey = 0;
+ char *idxBytes = 0;
+ char *outbuf = 0;
+ char *ch = 0;
+
+ len = (len < 0) ? strlen(buf) : len;
+ stdstr(&key, ikey, 3);
+ toupperstr_utf8(key, strlen(key)*3);
+
+ char notFound = findKeyIndex(ikey, &idxoff, 0);
+ if (!notFound) {
+ getKeyFromIdxOffset(idxoff, &dbKey);
+ int diff = strcmp(key, dbKey);
+ if (diff < 0) {
+ }
+ else if (diff > 0) {
+ idxoff += IDXENTRYSIZE;
+ }
+ else if ((!diff) && (len > 0 /*we're not deleting*/)) { // got absolute entry
+ do {
+ idxfd->seek(idxoff, SEEK_SET);
+ idxfd->read(&start, sizeof(__u32));
+ idxfd->read(&size, sizeof(__u32));
+ start = swordtoarch32(start);
+ size = swordtoarch32(size);
+
+ tmpbuf = new char [ size + 2 ];
+ memset(tmpbuf, 0, size + 2);
+ datfd->seek(start, SEEK_SET);
+ datfd->read(tmpbuf, size);
+
+ for (ch = tmpbuf; *ch; ch++) { // skip over index string
+ if (*ch == 10) {
+ ch++;
+ break;
+ }
+ }
+ memmove(tmpbuf, ch, size - (unsigned long)(ch-tmpbuf));
+
+ // resolve link
+ if (!strncmp(tmpbuf, "@LINK", 5) && (len)) {
+ for (ch = tmpbuf; *ch; ch++) { // null before nl
+ if (*ch == 10) {
+ *ch = 0;
+ break;
+ }
+ }
+ findKeyIndex(tmpbuf + IDXENTRYSIZE, &idxoff);
+ delete [] tmpbuf;
+ }
+ else break;
+ }
+ while (true); // while we're resolving links
+ }
+ }
+
+ endoff = idxfd->seek(0, SEEK_END);
+
+ shiftSize = endoff - idxoff;
+
+ if (shiftSize > 0) {
+ idxBytes = new char [ shiftSize ];
+ idxfd->seek(idxoff, SEEK_SET);
+ idxfd->read(idxBytes, shiftSize);
+ }
+
+ outbuf = new char [ len + strlen(key) + 5 ];
+ sprintf(outbuf, "%s%c%c", key, 13, 10);
+ size = strlen(outbuf);
+ if (len > 0) { // NOT a link
+ if (!cacheBlock) {
+ flushCache();
+ cacheBlock = new EntriesBlock();
+ cacheBlockIndex = (zdxfd->seek(0, SEEK_END) / ZDXENTRYSIZE);
+ }
+ else if (cacheBlock->getCount() >= blockCount) {
+ flushCache();
+ cacheBlock = new EntriesBlock();
+ cacheBlockIndex = (zdxfd->seek(0, SEEK_END) / ZDXENTRYSIZE);
+ }
+ __u32 entry = cacheBlock->addEntry(buf);
+ cacheDirty = true;
+ outstart = archtosword32(cacheBlockIndex);
+ outsize = archtosword32(entry);
+ memcpy (outbuf + size, &outstart, sizeof(__u32));
+ memcpy (outbuf + size + sizeof(__u32), &outsize, sizeof(__u32));
+ size += (sizeof(__u32) * 2);
+ }
+ else { // link
+ memcpy(outbuf + size, buf, len);
+ size += len;
+ }
+
+ start = datfd->seek(0, SEEK_END);
+
+ outstart = archtosword32(start);
+ outsize = archtosword32(size);
+
+ idxfd->seek(idxoff, SEEK_SET);
+ if (len > 0) {
+ datfd->seek(start, SEEK_SET);
+ datfd->write(outbuf, size);
+
+ // add a new line to make data file easier to read in an editor
+ datfd->write(&nl, 2);
+
+ idxfd->write(&outstart, sizeof(__u32));
+ idxfd->write(&outsize, sizeof(__u32));
+ if (idxBytes) {
+ idxfd->write(idxBytes, shiftSize);
+ }
+ }
+ else { // delete entry
+ if (idxBytes) {
+ idxfd->write(idxBytes+IDXENTRYSIZE, shiftSize-IDXENTRYSIZE);
+ idxfd->seek(-1, SEEK_CUR); // last valid byte
+ FileMgr::getSystemFileMgr()->trunc(idxfd); // truncate index
+ }
+ }
+
+ if (idxBytes)
+ delete [] idxBytes;
+ delete [] key;
+ delete [] outbuf;
+ free(dbKey);
+}
+
+
+/******************************************************************************
+ * zLD::linkentry - links one entry to another
+ *
+ * ENT: testmt - testament to find (0 - Bible/module introduction)
+ * destidxoff - dest offset into .vss
+ * srcidxoff - source offset into .vss
+ */
+
+void zStr::linkEntry(const char *destkey, const char *srckey) {
+ char *text = new char [ strlen(destkey) + 7 ];
+ sprintf(text, "@LINK %s", destkey);
+ setText(srckey, text);
+ delete [] text;
+}
+
+
+void zStr::flushCache() {
+ if (cacheBlock) {
+ if (cacheDirty) {
+ __u32 start = 0;
+ unsigned long size = 0;
+ __u32 outstart = 0, outsize = 0;
+
+ const char *rawBuf = cacheBlock->getRawData(&size);
+ compressor->Buf(rawBuf, &size);
+ compressor->zBuf(&size);
+
+ SWBuf buf;
+ buf.setSize(size + 5);
+ memcpy(buf.getRawData(), compressor->zBuf(&size), size); // 1 = encipher
+ buf.setSize(size);
+ rawZFilter(buf, 1); // 1 = encipher
+
+ long zdxSize = zdxfd->seek(0, SEEK_END);
+ unsigned long zdtSize = zdtfd->seek(0, SEEK_END);
+
+ if ((cacheBlockIndex * ZDXENTRYSIZE) > (zdxSize - ZDXENTRYSIZE)) { // New Block
+ start = zdtSize;
+ }
+ else {
+ zdxfd->seek(cacheBlockIndex * ZDXENTRYSIZE, SEEK_SET);
+ zdxfd->read(&start, sizeof(__u32));
+ zdxfd->read(&outsize, sizeof(__u32));
+ start = swordtoarch32(start);
+ outsize = swordtoarch32(outsize);
+ if (start + outsize >= zdtSize) { // last entry, just overwrite
+ // start is already set
+ }
+ else if (size < outsize) { // middle entry, but smaller, that's fine and let's preserve bigger size
+ size = outsize;
+ }
+ else { // middle and bigger-- we have serious problems, for now let's put it at the end = lots of wasted space
+ start = zdtSize;
+ }
+ }
+
+
+
+ outstart = archtosword32(start);
+ outsize = archtosword32((__u32)size);
+
+ zdxfd->seek(cacheBlockIndex * ZDXENTRYSIZE, SEEK_SET);
+ zdtfd->seek(start, SEEK_SET);
+ zdtfd->write(buf, size);
+
+ // add a new line to make data file easier to read in an editor
+ zdtfd->write(&nl, 2);
+
+ zdxfd->write(&outstart, sizeof(__u32));
+ zdxfd->write(&outsize, sizeof(__u32));
+ }
+ delete cacheBlock;
+ cacheBlock = 0;
+ }
+ cacheBlockIndex = -1;
+ cacheDirty = false;
+}
+
+
+/******************************************************************************
+ * zLD::CreateModule - Creates new module files
+ *
+ * ENT: path - directory to store module files
+ * RET: error status
+ */
+
+signed char zStr::createModule(const char *ipath) {
+ char *path = 0;
+ char *buf = new char [ strlen (ipath) + 20 ];
+ FileDesc *fd, *fd2;
+
+ stdstr(&path, ipath);
+
+ if ((path[strlen(path)-1] == '/') || (path[strlen(path)-1] == '\\'))
+ path[strlen(path)-1] = 0;
+
+ sprintf(buf, "%s.dat", path);
+ FileMgr::removeFile(buf);
+ fd = FileMgr::getSystemFileMgr()->open(buf, FileMgr::CREAT|FileMgr::WRONLY, FileMgr::IREAD|FileMgr::IWRITE);
+ fd->getFd();
+ FileMgr::getSystemFileMgr()->close(fd);
+
+ sprintf(buf, "%s.idx", path);
+ FileMgr::removeFile(buf);
+ fd2 = FileMgr::getSystemFileMgr()->open(buf, FileMgr::CREAT|FileMgr::WRONLY, FileMgr::IREAD|FileMgr::IWRITE);
+ fd2->getFd();
+ FileMgr::getSystemFileMgr()->close(fd2);
+
+ sprintf(buf, "%s.zdt", path);
+ FileMgr::removeFile(buf);
+ fd2 = FileMgr::getSystemFileMgr()->open(buf, FileMgr::CREAT|FileMgr::WRONLY, FileMgr::IREAD|FileMgr::IWRITE);
+ fd2->getFd();
+ FileMgr::getSystemFileMgr()->close(fd2);
+
+ sprintf(buf, "%s.zdx", path);
+ FileMgr::removeFile(buf);
+ fd2 = FileMgr::getSystemFileMgr()->open(buf, FileMgr::CREAT|FileMgr::WRONLY, FileMgr::IREAD|FileMgr::IWRITE);
+ fd2->getFd();
+ FileMgr::getSystemFileMgr()->close(fd2);
+
+ delete [] path;
+
+ return 0;
+}
+
+SWORD_NAMESPACE_END
diff --git a/src/modules/common/zverse.cpp b/src/modules/common/zverse.cpp
new file mode 100644
index 0000000..3f91918
--- /dev/null
+++ b/src/modules/common/zverse.cpp
@@ -0,0 +1,538 @@
+/******************************************************************************
+ * zverse.h - code for class 'zVerse'- a module that reads raw text
+ * files: ot and nt using indexs ??.bks ??.cps ??.vss
+ * and provides lookup and parsing functions based on
+ * class VerseKey for compressed modules
+ */
+
+
+#include <ctype.h>
+#include <stdio.h>
+#include <fcntl.h>
+#include <errno.h>
+#include <stdlib.h>
+
+#include <utilstr.h>
+#include <versekey.h>
+#include <zverse.h>
+#include <sysdata.h>
+#include <swbuf.h>
+#include <filemgr.h>
+#include <swcomprs.h>
+
+
+SWORD_NAMESPACE_START
+
+/******************************************************************************
+ * zVerse Statics
+ */
+
+int zVerse::instance = 0;
+
+const char zVerse::uniqueIndexID[] = {'X', 'r', 'v', 'c', 'b'};
+
+/******************************************************************************
+ * zVerse Constructor - Initializes data for instance of zVerse
+ *
+ * ENT: ipath - path of the directory where data and index files are located.
+ * be sure to include the trailing separator (e.g. '/' or '\')
+ * (e.g. 'modules/texts/rawtext/webster/')
+ * fileMode - open mode for the files (FileMgr::RDONLY, etc.)
+ * blockType - verse, chapter, book, etc.
+ */
+
+zVerse::zVerse(const char *ipath, int fileMode, int blockType, SWCompress *icomp)
+{
+ // this line, instead of just defaulting, to keep FileMgr out of header
+ if (fileMode == -1) fileMode = FileMgr::RDONLY;
+
+ SWBuf buf;
+
+ nl = '\n';
+ path = 0;
+ cacheBufIdx = -1;
+ cacheTestament = 0;
+ cacheBuf = 0;
+ dirtyCache = false;
+ stdstr(&path, ipath);
+
+ if ((path[strlen(path)-1] == '/') || (path[strlen(path)-1] == '\\'))
+ path[strlen(path)-1] = 0;
+
+ compressor = (icomp) ? icomp : new SWCompress();
+
+ if (fileMode == -1) { // try read/write if possible
+ fileMode = FileMgr::RDWR;
+ }
+
+ buf.setFormatted("%s/ot.%czs", path, uniqueIndexID[blockType]);
+ idxfp[0] = FileMgr::getSystemFileMgr()->open(buf, fileMode, true);
+
+ buf.setFormatted("%s/nt.%czs", path, uniqueIndexID[blockType]);
+ idxfp[1] = FileMgr::getSystemFileMgr()->open(buf, fileMode, true);
+
+ buf.setFormatted("%s/ot.%czz", path, uniqueIndexID[blockType]);
+ textfp[0] = FileMgr::getSystemFileMgr()->open(buf, fileMode, true);
+
+ buf.setFormatted("%s/nt.%czz", path, uniqueIndexID[blockType]);
+ textfp[1] = FileMgr::getSystemFileMgr()->open(buf, fileMode, true);
+
+ buf.setFormatted("%s/ot.%czv", path, uniqueIndexID[blockType]);
+ compfp[0] = FileMgr::getSystemFileMgr()->open(buf, fileMode, true);
+
+ buf.setFormatted("%s/nt.%czv", path, uniqueIndexID[blockType]);
+ compfp[1] = FileMgr::getSystemFileMgr()->open(buf, fileMode, true);
+
+ instance++;
+}
+
+
+/******************************************************************************
+ * zVerse Destructor - Cleans up instance of zVerse
+ */
+
+zVerse::~zVerse()
+{
+ int loop1;
+
+ if (cacheBuf) {
+ flushCache();
+ free(cacheBuf);
+ }
+
+ if (path)
+ delete [] path;
+
+ if (compressor)
+ delete compressor;
+
+ --instance;
+
+ for (loop1 = 0; loop1 < 2; loop1++) {
+ FileMgr::getSystemFileMgr()->close(idxfp[loop1]);
+ FileMgr::getSystemFileMgr()->close(textfp[loop1]);
+ FileMgr::getSystemFileMgr()->close(compfp[loop1]);
+ }
+}
+
+
+/******************************************************************************
+ * zVerse::findoffset - Finds the offset of the key verse from the indexes
+ *
+ *
+ *
+ * ENT: testmt - testament to find (0 - Bible/module introduction)
+ * book - book to find (0 - testament introduction)
+ * chapter - chapter to find (0 - book introduction)
+ * verse - verse to find (0 - chapter introduction)
+ * start - address to store the starting offset
+ * size - address to store the size of the entry
+ */
+
+void zVerse::findOffset(char testmt, long idxoff, long *start, unsigned short *size)
+{
+ // set start to offset in
+ // set size to
+ // set
+ unsigned long ulBuffNum=0; // buffer number
+ unsigned long ulVerseStart=0; // verse offset within buffer
+ unsigned short usVerseSize=0; // verse size
+ unsigned long ulCompOffset=0; // compressed buffer start
+ unsigned long ulCompSize=0; // buffer size compressed
+ unsigned long ulUnCompSize=0; // buffer size uncompressed
+
+ *start = *size = 0;
+ //printf ("Finding offset %ld\n", idxoff);
+ idxoff *= 10;
+ if (!testmt) {
+ testmt = ((idxfp[0]) ? 1:2);
+ }
+
+ // assert we have and valid file descriptor
+ if (compfp[testmt-1]->getFd() < 1)
+ return;
+
+ long newOffset = compfp[testmt-1]->seek(idxoff, SEEK_SET);
+ if (newOffset == idxoff) {
+ if (compfp[testmt-1]->read(&ulBuffNum, 4) != 4) {
+ printf ("Error reading ulBuffNum\n");
+ return;
+ }
+ }
+ else return;
+
+ ulBuffNum = swordtoarch32(ulBuffNum);
+
+ if (compfp[testmt-1]->read(&ulVerseStart, 4) < 2)
+ {
+ printf ("Error reading ulVerseStart\n");
+ return;
+ }
+ if (compfp[testmt-1]->read(&usVerseSize, 2) < 2)
+ {
+ printf ("Error reading usVerseSize\n");
+ return;
+ }
+
+ *start = swordtoarch32(ulVerseStart);
+ *size = swordtoarch16(usVerseSize);
+
+ if (*size) {
+ if (((long) ulBuffNum == cacheBufIdx) && (testmt == cacheTestament) && (cacheBuf)) {
+ // have the text buffered
+ return;
+ }
+
+ //printf ("Got buffer number{%ld} versestart{%ld} versesize{%d}\n", ulBuffNum, ulVerseStart, usVerseSize);
+
+
+ if (idxfp[testmt-1]->seek(ulBuffNum*12, SEEK_SET)!=(long) ulBuffNum*12)
+ {
+ printf ("Error seeking compressed file index\n");
+ return;
+ }
+ if (idxfp[testmt-1]->read(&ulCompOffset, 4)<4)
+ {
+ printf ("Error reading ulCompOffset\n");
+ return;
+ }
+ if (idxfp[testmt-1]->read(&ulCompSize, 4)<4)
+ {
+ printf ("Error reading ulCompSize\n");
+ return;
+ }
+ if (idxfp[testmt-1]->read(&ulUnCompSize, 4)<4)
+ {
+ printf ("Error reading ulUnCompSize\n");
+ return;
+ }
+
+ ulCompOffset = swordtoarch32(ulCompOffset);
+ ulCompSize = swordtoarch32(ulCompSize);
+ ulUnCompSize = swordtoarch32(ulUnCompSize);
+
+ if (textfp[testmt-1]->seek(ulCompOffset, SEEK_SET)!=(long)ulCompOffset)
+ {
+ printf ("Error: could not seek to right place in compressed text\n");
+ return;
+ }
+ SWBuf pcCompText;
+ pcCompText.setSize(ulCompSize+5);
+
+ if (textfp[testmt-1]->read(pcCompText.getRawData(), ulCompSize)<(long)ulCompSize) {
+ printf ("Error reading compressed text\n");
+ return;
+ }
+ pcCompText.setSize(ulCompSize);
+ rawZFilter(pcCompText, 0); // 0 = decipher
+
+ compressor->zBuf(&ulCompSize, pcCompText.getRawData());
+
+ if (cacheBuf) {
+ flushCache();
+ free(cacheBuf);
+ }
+
+ unsigned long len = 0;
+ compressor->Buf(0, &len);
+ cacheBuf = (char *)calloc(len + 1, 1);
+ memcpy(cacheBuf, compressor->Buf(), len);
+
+ cacheTestament = testmt;
+ cacheBufIdx = ulBuffNum;
+ }
+}
+
+
+/******************************************************************************
+ * zVerse::zreadtext - gets text at a given offset
+ *
+ * ENT: testmt - testament file to search in (0 - Old; 1 - New)
+ * start - starting offset where the text is located in the file
+ * size - size of text entry + 1 (null)
+ * buf - buffer to store text
+ *
+ */
+
+void zVerse::zReadText(char testmt, long start, unsigned short size, SWBuf &inBuf) {
+ inBuf = "";
+ if ( (size > 0) && cacheBuf && ((unsigned)start < strlen(cacheBuf)) ){ //TODO: optimize this, remove strlen
+ inBuf.setFillByte(0);
+ inBuf.setSize(size+1);
+ strncpy(inBuf.getRawData(), &(cacheBuf[start]), size);
+ inBuf.setSize(strlen(inBuf.c_str()));
+ }
+}
+
+
+/******************************************************************************
+ * zVerse::settext - Sets text for current offset
+ *
+ * ENT: testmt - testament to find (0 - Bible/module introduction)
+ * idxoff - offset into .vss
+ * buf - buffer to store
+ * len - length of buffer (0 - null terminated)
+ */
+
+void zVerse::doSetText(char testmt, long idxoff, const char *buf, long len) {
+
+ len = (len < 0) ? strlen(buf) : len;
+ if (!testmt)
+ testmt = ((idxfp[0]) ? 1:2);
+ if ((!dirtyCache) || (cacheBufIdx < 0)) {
+ cacheBufIdx = idxfp[testmt-1]->seek(0, SEEK_END) / 12;
+ cacheTestament = testmt;
+ if (cacheBuf)
+ free(cacheBuf);
+ cacheBuf = (char *)calloc(len + 1, 1);
+ }
+ else cacheBuf = (char *)((cacheBuf)?realloc(cacheBuf, strlen(cacheBuf)+(len + 1)):calloc((len + 1), 1));
+
+ dirtyCache = true;
+
+ unsigned long start, outstart;
+ unsigned long outBufIdx = cacheBufIdx;
+ unsigned short size;
+ unsigned short outsize;
+
+ idxoff *= 10;
+ size = outsize = len;
+
+ start = strlen(cacheBuf);
+
+ if (!size)
+ start = outBufIdx = 0;
+
+ outBufIdx = archtosword32(outBufIdx);
+ outstart = archtosword32(start);
+ outsize = archtosword16(size);
+
+ compfp[testmt-1]->seek(idxoff, SEEK_SET);
+ compfp[testmt-1]->write(&outBufIdx, 4);
+ compfp[testmt-1]->write(&outstart, 4);
+ compfp[testmt-1]->write(&outsize, 2);
+ strcat(cacheBuf, buf);
+}
+
+
+void zVerse::flushCache() {
+ if (dirtyCache) {
+ unsigned long idxoff;
+ unsigned long start, outstart;
+ unsigned long size, outsize;
+ unsigned long zsize, outzsize;
+
+ idxoff = cacheBufIdx * 12;
+ if (cacheBuf) {
+ size = outsize = zsize = outzsize = strlen(cacheBuf);
+ if (size) {
+ // if (compressor) {
+ // delete compressor;
+ // compressor = new LZSSCompress();
+ // }
+ compressor->Buf(cacheBuf);
+ compressor->zBuf(&zsize);
+ outzsize = zsize;
+
+ SWBuf buf;
+ buf.setSize(zsize + 5);
+ memcpy(buf.getRawData(), compressor->zBuf(&zsize), zsize);
+ buf.setSize(zsize);
+ rawZFilter(buf, 1); // 1 = encipher
+
+ start = outstart = textfp[cacheTestament-1]->seek(0, SEEK_END);
+
+ outstart = archtosword32(start);
+ outsize = archtosword32(size);
+ outzsize = archtosword32(zsize);
+
+ textfp[cacheTestament-1]->write(buf, zsize);
+
+ idxfp[cacheTestament-1]->seek(idxoff, SEEK_SET);
+ idxfp[cacheTestament-1]->write(&outstart, 4);
+ idxfp[cacheTestament-1]->write(&outzsize, 4);
+ idxfp[cacheTestament-1]->write(&outsize, 4);
+ }
+ free(cacheBuf);
+ cacheBuf = 0;
+ }
+ dirtyCache = false;
+ }
+}
+
+/******************************************************************************
+ * RawVerse::linkentry - links one entry to another
+ *
+ * ENT: testmt - testament to find (0 - Bible/module introduction)
+ * destidxoff - dest offset into .vss
+ * srcidxoff - source offset into .vss
+ */
+
+void zVerse::doLinkEntry(char testmt, long destidxoff, long srcidxoff) {
+ long bufidx;
+ long start;
+ unsigned short size;
+
+ destidxoff *= 10;
+ srcidxoff *= 10;
+
+ if (!testmt)
+ testmt = ((idxfp[1]) ? 1:2);
+
+ // get source
+ compfp[testmt-1]->seek(srcidxoff, SEEK_SET);
+ compfp[testmt-1]->read(&bufidx, 4);
+ compfp[testmt-1]->read(&start, 4);
+ compfp[testmt-1]->read(&size, 2);
+
+ // write dest
+ compfp[testmt-1]->seek(destidxoff, SEEK_SET);
+ compfp[testmt-1]->write(&bufidx, 4);
+ compfp[testmt-1]->write(&start, 4);
+ compfp[testmt-1]->write(&size, 2);
+}
+
+
+/******************************************************************************
+ * RawVerse::CreateModule - Creates new module files
+ *
+ * ENT: path - directory to store module files
+ * RET: error status
+ */
+
+char zVerse::createModule(const char *ipath, int blockBound)
+{
+ char *path = 0;
+ char *buf = new char [ strlen (ipath) + 20 ];
+ FileDesc *fd, *fd2;
+
+ stdstr(&path, ipath);
+
+ if ((path[strlen(path)-1] == '/') || (path[strlen(path)-1] == '\\'))
+ path[strlen(path)-1] = 0;
+
+ sprintf(buf, "%s/ot.%czs", path, uniqueIndexID[blockBound]);
+ FileMgr::removeFile(buf);
+ fd = FileMgr::getSystemFileMgr()->open(buf, FileMgr::CREAT|FileMgr::WRONLY, FileMgr::IREAD|FileMgr::IWRITE);
+ fd->getFd();
+ FileMgr::getSystemFileMgr()->close(fd);
+
+ sprintf(buf, "%s/nt.%czs", path, uniqueIndexID[blockBound]);
+ FileMgr::removeFile(buf);
+ fd = FileMgr::getSystemFileMgr()->open(buf, FileMgr::CREAT|FileMgr::WRONLY, FileMgr::IREAD|FileMgr::IWRITE);
+ fd->getFd();
+ FileMgr::getSystemFileMgr()->close(fd);
+
+ sprintf(buf, "%s/ot.%czz", path, uniqueIndexID[blockBound]);
+ FileMgr::removeFile(buf);
+ fd = FileMgr::getSystemFileMgr()->open(buf, FileMgr::CREAT|FileMgr::WRONLY, FileMgr::IREAD|FileMgr::IWRITE);
+ fd->getFd();
+ FileMgr::getSystemFileMgr()->close(fd);
+
+ sprintf(buf, "%s/nt.%czz", path, uniqueIndexID[blockBound]);
+ FileMgr::removeFile(buf);
+ fd2 = FileMgr::getSystemFileMgr()->open(buf, FileMgr::CREAT|FileMgr::WRONLY, FileMgr::IREAD|FileMgr::IWRITE);
+ fd2->getFd();
+ FileMgr::getSystemFileMgr()->close(fd);
+
+ sprintf(buf, "%s/ot.%czv", path, uniqueIndexID[blockBound]);
+ FileMgr::removeFile(buf);
+ fd = FileMgr::getSystemFileMgr()->open(buf, FileMgr::CREAT|FileMgr::WRONLY, FileMgr::IREAD|FileMgr::IWRITE);
+ fd->getFd();
+
+ sprintf(buf, "%s/nt.%czv", path, uniqueIndexID[blockBound]);
+ FileMgr::removeFile(buf);
+ fd2 = FileMgr::getSystemFileMgr()->open(buf, FileMgr::CREAT|FileMgr::WRONLY, FileMgr::IREAD|FileMgr::IWRITE);
+ fd2->getFd();
+
+ VerseKey vk;
+ vk.Headings(1);
+ long offset = 0;
+ short size = 0;
+ for (vk = TOP; !vk.Error(); vk++) {
+ if (vk.Testament() == 1) {
+ fd->write(&offset, 4); //compBufIdxOffset
+ fd->write(&offset, 4);
+ fd->write(&size, 2);
+ }
+ else {
+ fd2->write(&offset, 4); //compBufIdxOffset
+ fd2->write(&offset, 4);
+ fd2->write(&size, 2);
+ }
+ }
+
+ FileMgr::getSystemFileMgr()->close(fd);
+ FileMgr::getSystemFileMgr()->close(fd2);
+
+ delete [] path;
+ delete [] buf;
+/*
+ RawVerse rv(path);
+ VerseKey mykey("Rev 22:21");
+*/
+
+ return 0;
+}
+
+
+/******************************************************************************
+ * zVerse::preptext - Prepares the text before returning it to external
+ * objects
+ *
+ * ENT: buf - buffer where text is stored and where to store the prep'd
+ * text.
+ */
+
+void zVerse::prepText(SWBuf &buf) {
+ unsigned int to, from;
+ char space = 0, cr = 0, realdata = 0, nlcnt = 0;
+ char *rawBuf = buf.getRawData();
+ for (to = from = 0; rawBuf[from]; from++) {
+ switch (rawBuf[from]) {
+ case 10:
+ if (!realdata)
+ continue;
+ space = (cr) ? 0 : 1;
+ cr = 0;
+ nlcnt++;
+ if (nlcnt > 1) {
+// *to++ = nl;
+ rawBuf[to++] = 10;
+// *to++ = nl[1];
+// nlcnt = 0;
+ }
+ continue;
+ case 13:
+ if (!realdata)
+ continue;
+// *to++ = nl[0];
+ rawBuf[to++] = 10;
+ space = 0;
+ cr = 1;
+ continue;
+ }
+ realdata = 1;
+ nlcnt = 0;
+ if (space) {
+ space = 0;
+ if (rawBuf[from] != ' ') {
+ rawBuf[to++] = ' ';
+ from--;
+ continue;
+ }
+ }
+ rawBuf[to++] = rawBuf[from];
+ }
+ buf.setSize(to);
+
+ while (to > 1) { // remove trailing excess
+ to--;
+ if ((rawBuf[to] == 10) || (rawBuf[to] == ' '))
+ buf.setSize(to);
+ else break;
+ }
+}
+
+
+SWORD_NAMESPACE_END
diff --git a/src/modules/filters/Makefile b/src/modules/filters/Makefile
new file mode 100644
index 0000000..1a2d00d
--- /dev/null
+++ b/src/modules/filters/Makefile
@@ -0,0 +1,5 @@
+
+root := ../../..
+
+all:
+ make -C ${root}
diff --git a/src/modules/filters/Makefile.am b/src/modules/filters/Makefile.am
new file mode 100644
index 0000000..7092c73
--- /dev/null
+++ b/src/modules/filters/Makefile.am
@@ -0,0 +1,105 @@
+filtersdir = $(top_srcdir)/src/modules/filters
+
+libsword_la_SOURCES += $(filtersdir)/swbasicfilter.cpp
+libsword_la_SOURCES += $(filtersdir)/swoptfilter.cpp
+
+GBFFIL = $(filtersdir)/gbfhtml.cpp
+GBFFIL += $(filtersdir)/gbfhtmlhref.cpp
+GBFFIL += $(filtersdir)/gbfwebif.cpp
+GBFFIL += $(filtersdir)/gbfplain.cpp
+GBFFIL += $(filtersdir)/gbfrtf.cpp
+GBFFIL += $(filtersdir)/gbfstrongs.cpp
+GBFFIL += $(filtersdir)/gbffootnotes.cpp
+GBFFIL += $(filtersdir)/gbfheadings.cpp
+GBFFIL += $(filtersdir)/gbfredletterwords.cpp
+GBFFIL += $(filtersdir)/gbfmorph.cpp
+GBFFIL += $(filtersdir)/gbfwordjs.cpp
+
+THMLFIL = $(filtersdir)/thmlstrongs.cpp
+THMLFIL += $(filtersdir)/thmlfootnotes.cpp
+THMLFIL += $(filtersdir)/thmlheadings.cpp
+THMLFIL += $(filtersdir)/thmlmorph.cpp
+THMLFIL += $(filtersdir)/thmllemma.cpp
+THMLFIL += $(filtersdir)/thmlscripref.cpp
+THMLFIL += $(filtersdir)/thmlvariants.cpp
+THMLFIL += $(filtersdir)/thmlgbf.cpp
+THMLFIL += $(filtersdir)/thmlrtf.cpp
+THMLFIL += $(filtersdir)/thmlhtml.cpp
+THMLFIL += $(filtersdir)/thmlhtmlhref.cpp
+THMLFIL += $(filtersdir)/thmlwebif.cpp
+THMLFIL += $(filtersdir)/thmlwordjs.cpp
+
+TEIFIL = $(filtersdir)/teiplain.cpp
+TEIFIL += $(filtersdir)/teirtf.cpp
+TEIFIL += $(filtersdir)/teihtmlhref.cpp
+
+CONVFIL = $(filtersdir)/gbfthml.cpp
+CONVFIL += $(filtersdir)/gbfosis.cpp
+CONVFIL += $(filtersdir)/thmlosis.cpp
+CONVFIL += $(filtersdir)/thmlplain.cpp
+CONVFIL += $(filtersdir)/osisosis.cpp
+
+OSISFIL = $(filtersdir)/osisheadings.cpp
+OSISFIL += $(filtersdir)/osisfootnotes.cpp
+OSISFIL += $(filtersdir)/osishtmlhref.cpp
+OSISFIL += $(filtersdir)/osiswebif.cpp
+OSISFIL += $(filtersdir)/osismorph.cpp
+OSISFIL += $(filtersdir)/osisstrongs.cpp
+OSISFIL += $(filtersdir)/osisplain.cpp
+OSISFIL += $(filtersdir)/osisrtf.cpp
+OSISFIL += $(filtersdir)/osislemma.cpp
+OSISFIL += $(filtersdir)/osisredletterwords.cpp
+OSISFIL += $(filtersdir)/osisscripref.cpp
+OSISFIL += $(filtersdir)/osisvariants.cpp
+OSISFIL += $(filtersdir)/osiswordjs.cpp
+OSISFIL += $(filtersdir)/osismorphsegmentation.cpp
+
+libsword_la_SOURCES += $(filtersdir)/latin1utf8.cpp
+libsword_la_SOURCES += $(filtersdir)/latin1utf16.cpp
+libsword_la_SOURCES += $(filtersdir)/utf8utf16.cpp
+libsword_la_SOURCES += $(filtersdir)/utf16utf8.cpp
+libsword_la_SOURCES += $(filtersdir)/scsuutf8.cpp
+libsword_la_SOURCES += $(filtersdir)/utf8html.cpp
+libsword_la_SOURCES += $(filtersdir)/utf8latin1.cpp
+
+libsword_la_SOURCES += $(filtersdir)/utf8cantillation.cpp
+libsword_la_SOURCES += $(filtersdir)/utf8hebrewpoints.cpp
+libsword_la_SOURCES += $(filtersdir)/utf8greekaccents.cpp
+
+libsword_la_SOURCES += $(filtersdir)/cipherfil.cpp
+
+PLFIL = $(filtersdir)/rtfhtml.cpp
+PLFIL += $(filtersdir)/plainfootnotes.cpp
+PLFIL += $(filtersdir)/plainhtml.cpp
+PLFIL += $(filtersdir)/greeklexattribs.cpp
+PLFIL += $(filtersdir)/unicodertf.cpp
+PLFIL += $(filtersdir)/papyriplain.cpp
+
+
+SWICUSRC = $(filtersdir)/utf8transliterator.cpp
+SWICUSRC += $(filtersdir)/utf8nfc.cpp
+SWICUSRC += $(filtersdir)/utf8nfkd.cpp
+SWICUSRC += $(filtersdir)/utf8arshaping.cpp
+SWICUSRC += $(filtersdir)/utf8bidireorder.cpp
+
+if ICU
+ICUDEFS = -D_ICU_
+DISTSWICUSRC =
+SWREALICUSRC = $(SWICUSRC)
+else
+if ICUSWORD
+ICUDEFS = -D_ICU_ -D_ICUSWORD_
+DISTSWICUSRC =
+SWREALICUSRC = $(SWICUSRC)
+else
+DISTSWICUSRC = $(SWICUSRC)
+SWREALICUSRC =
+endif
+endif
+
+AM_CPPFLAGS += $(ICUDEFS)
+libsword_la_SOURCES += $(SWREALICUSRC)
+EXTRA_DIST = $(DISTSWICUSRC)
+
+libsword_la_SOURCES += $(OSISFIL) $(GBFFIL) \
+ $(THMLFIL) $(CONVFIL) $(PLFIL) $(TEIFIL)
diff --git a/src/modules/filters/cipherfil.cpp b/src/modules/filters/cipherfil.cpp
new file mode 100644
index 0000000..24c665e
--- /dev/null
+++ b/src/modules/filters/cipherfil.cpp
@@ -0,0 +1,46 @@
+/******************************************************************************
+ *
+ * cipherfil - SWFilter descendant to decipher a module
+ */
+
+
+#include <stdlib.h>
+#include <cipherfil.h>
+#include <swcipher.h>
+#include <swbuf.h>
+
+SWORD_NAMESPACE_START
+
+CipherFilter::CipherFilter(const char *key) {
+ cipher = new SWCipher((unsigned char *)key);
+}
+
+
+CipherFilter::~CipherFilter() {
+ delete cipher;
+}
+
+
+SWCipher *CipherFilter::getCipher() {
+ return cipher;
+}
+
+
+char CipherFilter::processText(SWBuf &text, const SWKey *key, const SWModule *module) {
+ if (text.length() > 2) { //check if it's large enough to substract 2 in the next step.
+ unsigned long len = text.length();
+ if (!key) { // hack, using key to determine encipher, or decipher
+ cipher->cipherBuf(&len, text.getRawData()); //set buffer to enciphered text
+ memcpy(text.getRawData(), cipher->Buf(), len);
+// text = cipher->Buf(); //get the deciphered buffer
+ }
+ else if ((unsigned long)key == 1) {
+ cipher->Buf(text.getRawData(), len);
+ memcpy(text.getRawData(), cipher->cipherBuf(&len), len);
+// text = cipher->cipherBuf(&len);
+ }
+ }
+ return 0;
+}
+
+SWORD_NAMESPACE_END
diff --git a/src/modules/filters/gbffootnotes.cpp b/src/modules/filters/gbffootnotes.cpp
new file mode 100644
index 0000000..bef29b8
--- /dev/null
+++ b/src/modules/filters/gbffootnotes.cpp
@@ -0,0 +1,193 @@
+/******************************************************************************
+ *
+ * gbffootnotes - SWFilter descendant to hide or show footnotes
+ * in a GBF module.
+ */
+
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <gbffootnotes.h>
+#include <swmodule.h>
+#include <swbuf.h>
+#include <versekey.h>
+#include <utilxml.h>
+
+SWORD_NAMESPACE_START
+
+const char oName[] = "Footnotes";
+const char oTip[] = "Toggles Footnotes On and Off if they exist";
+
+const SWBuf choices[3] = {"Off", "On", ""};
+const StringList oValues(&choices[0], &choices[2]);
+
+
+GBFFootnotes::GBFFootnotes() : SWOptionFilter(oName, oTip, &oValues) {
+ setOptionValue("Off");
+}
+
+
+GBFFootnotes::~GBFFootnotes() {
+}
+
+
+char GBFFootnotes::processText (SWBuf &text, const SWKey *key, const SWModule *module)
+{
+
+ SWBuf token;
+ bool intoken = false;
+ bool hide = false;
+ SWBuf tagText;
+ XMLTag startTag;
+ SWBuf refs = "";
+ int footnoteNum = 1;
+ char buf[254];
+ VerseKey parser = key->getText();
+
+ SWBuf orig = text;
+ const char *from = orig.c_str();
+
+ //XMLTag tag;
+
+ for (text = ""; *from; from++) {
+ if (*from == '<') {
+ intoken = true;
+ token = "";
+ continue;
+ }
+ if (*from == '>') { // process tokens
+ intoken = false;
+
+ //XMLTag tag(token);
+ if (!strncmp(token, "RF",2)) {
+// tag = token;
+
+ refs = "";
+ startTag = token;
+ hide = true;
+ tagText = "";
+ continue;
+ }
+ else if (!strncmp(token, "Rf",2)) {
+ if (module->isProcessEntryAttributes()) {
+ //tag = token;
+
+ if((tagText.length() == 1) || !strcmp(module->Name(), "IGNT")) {
+ if (option) { // for ASV marks text in verse then put explanation at end of verse
+ text.append(" <FS>[");
+ text.append(tagText);
+ text.append("]<Fs>");
+ hide = false;
+ continue;
+ }
+ }
+ SWBuf fc = module->getEntryAttributes()["Footnote"]["count"]["value"];
+ footnoteNum = (fc.length()) ? atoi(fc.c_str()) : 0;
+ sprintf(buf, "%i", ++footnoteNum);
+ module->getEntryAttributes()["Footnote"]["count"]["value"] = buf;
+ StringList attributes = startTag.getAttributeNames();
+ for (StringList::const_iterator it = attributes.begin(); it != attributes.end(); it++) {
+ module->getEntryAttributes()["Footnote"][buf][it->c_str()] = startTag.getAttribute(it->c_str());
+ }
+ module->getEntryAttributes()["Footnote"][buf]["body"] = tagText;
+ startTag.setAttribute("swordFootnote", buf);
+ }
+ hide = false;
+ if (option) {
+ text.append(startTag);
+ text.append(tagText);
+ }
+ else continue;
+ }
+ if (!hide) {
+ text.append('<');
+ text.append(token);
+ text.append('>');
+ }
+ else {
+ tagText.append('<');
+ tagText.append(token);
+ tagText.append('>');
+ }
+ continue;
+ }
+ if (intoken) { //copy token
+ token.append(*from);
+ }
+ else if (!hide) { //copy text which is not inside a token
+ text.append(*from);
+ }
+ else tagText.append(*from);
+ }
+ return 0;
+
+ /*
+ if (!option) { // if we don't want footnotes
+ char token[4096]; // cheese. Fix.
+ int tokpos = 0;
+ bool intoken = false;
+ int len;
+ bool hide = false;
+
+ const char *from;
+ SWBuf orig = text;
+ from = orig.c_str();
+ for (text = ""; *from; from++) {
+ if (*from == '<') {
+ intoken = true;
+ tokpos = 0;
+// memset(token, 0, 4096);
+ token[0] = 0;
+ token[1] = 0;
+ token[2] = 0;
+ continue;
+ }
+ if (*from == '>') { // process tokens
+ intoken = false;
+ switch (*token) {
+ case 'R': // Reference
+ switch(token[1]) {
+ case 'F': // Begin footnote
+ hide = true;
+ break;
+ case 'f': // end footnote
+ hide = false;
+ break;
+ }
+ continue; // skip token
+ case 'W':
+ if (token[1] == 'T') {
+ switch (token[2]) {
+ case 'P':
+ case 'S':
+ case 'A':
+ continue; // remove this token
+ default:
+ break;
+ }
+ }
+ }
+ // if not a footnote token, keep token in text
+ if (!hide) {
+ text += '<';
+ text += token;
+ text += '>';
+ }
+ continue;
+ }
+ if (intoken) {
+ if (tokpos < 4090)
+ token[tokpos++] = *from;
+ token[tokpos+2] = 0; // +2 cuz we init token with 2 extra '0' because of switch statement
+ }
+ else {
+ if (!hide) {
+ text += *from;
+ }
+ }
+ }
+ }
+ return 0;*/
+}
+
+SWORD_NAMESPACE_END
diff --git a/src/modules/filters/gbfheadings.cpp b/src/modules/filters/gbfheadings.cpp
new file mode 100644
index 0000000..81a4d94
--- /dev/null
+++ b/src/modules/filters/gbfheadings.cpp
@@ -0,0 +1,87 @@
+/******************************************************************************
+ *
+ * gbfheadings - SWFilter descendant to hide or show headings
+ * in a GBF module.
+ */
+
+
+#include <stdlib.h>
+#include <gbfheadings.h>
+
+SWORD_NAMESPACE_START
+
+
+const char oName[] = "Headings";
+const char oTip[] = "Toggles Headings On and Off if they exist";
+
+const SWBuf choices[3] = {"Off", "On", ""};
+const StringList oValues(&choices[0], &choices[2]);
+
+GBFHeadings::GBFHeadings() : SWOptionFilter(oName, oTip, &oValues) {
+ setOptionValue("Off");
+}
+
+
+GBFHeadings::~GBFHeadings() {
+}
+
+
+char GBFHeadings::processText (SWBuf &text, const SWKey *key, const SWModule *module) {
+ if (!option) { // if we don't want headings
+ char token[2048]; // cheese. Fix.
+ int tokpos = 0;
+ bool intoken = false;
+ bool hide = false;
+
+ const char *from;
+ SWBuf orig = text;
+ from = orig.c_str();
+ for (text = ""; *from; from++) {
+ if (*from == '<') {
+ intoken = true;
+ tokpos = 0;
+// memset(token, 0, 2048);
+ token[0] = 0;
+ token[1] = 0;
+ token[2] = 0;
+ continue;
+ }
+ if (*from == '>') { // process tokens
+ intoken = false;
+ switch (*token) {
+ case 'T': // Reference
+ switch(token[1]) {
+ case 'S': // Begin heading
+ hide = true;
+ break;
+ case 's': // end heading
+ hide = false;
+ break;
+ }
+ continue; // skip token
+ }
+ // if not a heading token, keep token in text
+ if (!hide) {
+ text += '<';
+ for (char *tok = token; *tok; tok++)
+ text += *tok;
+ text += '>';
+ }
+ continue;
+ }
+ if (intoken) {
+ if (tokpos < 2045)
+ token[tokpos++] = *from;
+ token[tokpos+2] = 0;
+ }
+ else {
+ if (!hide) {
+ text += *from;
+ }
+ }
+ }
+ }
+ return 0;
+}
+
+SWORD_NAMESPACE_END
diff --git a/src/modules/filters/gbfhtml.cpp b/src/modules/filters/gbfhtml.cpp
new file mode 100644
index 0000000..a9d8434
--- /dev/null
+++ b/src/modules/filters/gbfhtml.cpp
@@ -0,0 +1,181 @@
+/***************************************************************************
+ gbfhtml.cpp - GBF to HTML filter
+ -------------------
+ begin : 2001-09-03
+ copyright : 2001 by CrossWire Bible Society
+ ***************************************************************************/
+
+/***************************************************************************
+ * *
+ * This program is free software; you can redistribute it and/or modify *
+ * it under the terms of the GNU General Public License as published by *
+ * the Free Software Foundation; either version 2 of the License, or *
+ * (at your option) any later version. *
+ * *
+ ***************************************************************************/
+
+#include <stdlib.h>
+#include <gbfhtml.h>
+#include <ctype.h>
+
+SWORD_NAMESPACE_START
+
+GBFHTML::GBFHTML() {
+ setTokenStart("<");
+ setTokenEnd(">");
+
+ setTokenCaseSensitive(true);
+
+ addTokenSubstitute("Rf", ")</small></font>");
+ addTokenSubstitute("Rx", "</a>");
+ addTokenSubstitute("FI", "<i>"); // italics begin
+ addTokenSubstitute("Fi", "</i>");
+ addTokenSubstitute("FB", "<n>"); // bold begin
+ addTokenSubstitute("Fb", "</n>");
+ addTokenSubstitute("FR", "<font color=\"#FF0000\">"); // words of Jesus begin
+ addTokenSubstitute("Fr", "</font>");
+ addTokenSubstitute("FU", "<u>"); // underline begin
+ addTokenSubstitute("Fu", "</u>");
+ addTokenSubstitute("FO", "<cite>"); // Old Testament quote begin
+ addTokenSubstitute("Fo", "</cite>");
+ addTokenSubstitute("FS", "<sup>"); // Superscript begin// Subscript begin
+ addTokenSubstitute("Fs", "</sup>");
+ addTokenSubstitute("FV", "<sub>"); // Subscript begin
+ addTokenSubstitute("Fv", "</sub>");
+ addTokenSubstitute("TT", "<big>"); // Book title begin
+ addTokenSubstitute("Tt", "</big>");
+ addTokenSubstitute("PP", "<cite>"); // poetry begin
+ addTokenSubstitute("Pp", "</cite>");
+ addTokenSubstitute("Fn", "</font>"); // font end
+ addTokenSubstitute("CL", "<br />"); // new line
+ addTokenSubstitute("CM", "<!P><br />"); // paragraph <!P> is a non showing comment that can be changed in the front end to <P> if desired
+ addTokenSubstitute("CG", ""); // ???
+ addTokenSubstitute("CT", ""); // ???
+ addTokenSubstitute("JR", "<div align=\"right\">"); // right align begin
+ addTokenSubstitute("JC", "<div align=\"center\">"); // center align begin
+ addTokenSubstitute("JL", "</div>"); // align end
+
+}
+
+
+bool GBFHTML::handleToken(SWBuf &buf, const char *token, BasicFilterUserData *userData) {
+ const char *tok;
+ char val[128];
+ char *valto;
+ const char *num;
+ MyUserData *u = (MyUserData *)userData;
+
+ if (!substituteToken(buf, token)) {
+ // deal with OSIS note tags. Just hide till OSISRTF
+ if (!strncmp(token, "note ", 5)) {
+ // let's stop text from going to output
+ u->suspendTextPassThru = true;
+ }
+
+ else if (!strncmp(token, "/note", 5)) {
+ u->suspendTextPassThru = false;
+ }
+
+ else if (!strncmp(token, "w", 1)) {
+ // OSIS Word (temporary until OSISRTF is done)
+ valto = val;
+ num = strstr(token, "lemma=\"x-Strongs:");
+ if (num) {
+ for (num+=17; ((*num) && (*num != '\"')); num++)
+ *valto++ = *num;
+ *valto = 0;
+ if (atoi((!isdigit(*val))?val+1:val) < 5627) {
+ buf += " <small><em>&lt;";
+ for (tok = (!isdigit(*val))?val+1:val; *tok; tok++)
+ buf += *tok;
+ buf += "&gt;</em></small> ";
+ }
+ } else {
+ num = strstr(token, "lemma=\"strong:");
+ if (num) {
+ for (num+=14; ((*num) && (*num != '\"')); num++)
+ *valto++ = *num;
+ *valto = 0;
+ if (atoi((!isdigit(*val))?val+1:val) < 5627) {
+ buf += " <small><em>&lt;";
+ for (tok = (!isdigit(*val))?val+1:val; *tok; tok++)
+ buf += *tok;
+ buf += "&gt;</em></small> ";
+ }
+ }
+ }
+ valto = val;
+ num = strstr(token, "morph=\"x-Robinson:");
+ if (num) {
+ for (num+=18; ((*num) && (*num != '\"')); num++)
+ *valto++ = *num;
+ *valto = 0;
+ // normal robinsons tense
+ buf += " <small><em>(";
+ for (tok = val; *tok; tok++)
+ buf += *tok;
+ buf += ")</em></small> ";
+ }
+ }
+
+ else if (!strncmp(token, "WG", 2) || !strncmp(token, "WH", 2)) { // strong's numbers
+ buf += " <small><em>&lt;";
+ for (tok = token + 2; *tok; tok++)
+ buf += *tok;
+ buf += "&gt;</em></small> ";
+ }
+
+ else if (!strncmp(token, "WTG", 3) || !strncmp(token, "WTH", 3)) { // strong's numbers tense
+ buf += " <small><em>&lt;";
+ for (tok = token + 3; *tok; tok++)
+ if(*tok != '\"')
+ buf += *tok;
+ buf += ")</em></small> ";
+ }
+
+ else if (!strncmp(token, "RX", 2)) {
+ buf += "<i>";
+ for (tok = token + 3; *tok; tok++) {
+ if(*tok != '<' && *tok+1 != 'R' && *tok+2 != 'x') {
+ buf += *tok;
+ }
+ else {
+ break;
+ }
+ }
+ buf += "</i>";
+ }
+
+ else if (!strncmp(token, "RB", 2)) {
+ buf += "<i>";
+ u->hasFootnotePreTag = true;
+ }
+
+ else if (!strncmp(token, "RF", 2)) {
+ if (u->hasFootnotePreTag) {
+ u->hasFootnotePreTag = false;
+ buf += "</i> ";
+ }
+ buf += "<font color=\"#800000\"><small> (";
+ }
+
+ else if (!strncmp(token, "FN", 2)) {
+ buf += "<font face=\"";
+ for (tok = token + 2; *tok; tok++)
+ if(*tok != '\"')
+ buf += *tok;
+ buf += "\">";
+ }
+
+ else if (!strncmp(token, "CA", 2)) { // ASCII value
+ buf += (char)atoi(&token[2]);
+ }
+
+ else {
+ return false;
+ }
+ }
+ return true;
+}
+
+SWORD_NAMESPACE_END
diff --git a/src/modules/filters/gbfhtmlhref.cpp b/src/modules/filters/gbfhtmlhref.cpp
new file mode 100644
index 0000000..7f1c254
--- /dev/null
+++ b/src/modules/filters/gbfhtmlhref.cpp
@@ -0,0 +1,288 @@
+/***************************************************************************
+ gbfhtmlhref.cpp - GBF to HTML filter with hrefs
+ for strongs and morph tags
+ -------------------
+ begin : 2001-09-03
+ copyright : 2001 by CrossWire Bible Society
+ ***************************************************************************/
+
+/***************************************************************************
+ * *
+ * This program is free software; you can redistribute it and/or modify *
+ * it under the terms of the GNU General Public License as published by *
+ * the Free Software Foundation; either version 2 of the License, or *
+ * (at your option) any later version. *
+ * *
+ ***************************************************************************/
+
+#include <stdlib.h>
+#include <gbfhtmlhref.h>
+#include <swmodule.h>
+#include <utilxml.h>
+#include <versekey.h>
+#include <ctype.h>
+#include <url.h>
+
+SWORD_NAMESPACE_START
+
+GBFHTMLHREF::MyUserData::MyUserData(const SWModule *module, const SWKey *key) : BasicFilterUserData(module, key) {
+ if (module) {
+ version = module->Name();
+ }
+}
+
+GBFHTMLHREF::GBFHTMLHREF() {
+ setTokenStart("<");
+ setTokenEnd(">");
+
+ setTokenCaseSensitive(true);
+
+ //addTokenSubstitute("Rf", ")</small></font>");
+ addTokenSubstitute("FA", "<font color=\"#800000\">"); // for ASV footnotes to mark text
+ addTokenSubstitute("Rx", "</a>");
+ addTokenSubstitute("FI", "<i>"); // italics begin
+ addTokenSubstitute("Fi", "</i>");
+ addTokenSubstitute("FB", "<b>"); // bold begin
+ addTokenSubstitute("Fb", "</b>");
+ addTokenSubstitute("FR", "<font color=\"#FF0000\">"); // words of Jesus begin
+ addTokenSubstitute("Fr", "</font>");
+ addTokenSubstitute("FU", "<u>"); // underline begin
+ addTokenSubstitute("Fu", "</u>");
+ addTokenSubstitute("FO", "<cite>"); // Old Testament quote begin
+ addTokenSubstitute("Fo", "</cite>");
+ addTokenSubstitute("FS", "<sup>"); // Superscript begin// Subscript begin
+ addTokenSubstitute("Fs", "</sup>");
+ addTokenSubstitute("FV", "<sub>"); // Subscript begin
+ addTokenSubstitute("Fv", "</sub>");
+ addTokenSubstitute("TT", "<big>"); // Book title begin
+ addTokenSubstitute("Tt", "</big>");
+ addTokenSubstitute("PP", "<cite>"); // poetry begin
+ addTokenSubstitute("Pp", "</cite>");
+ addTokenSubstitute("Fn", "</font>"); // font end
+ addTokenSubstitute("CL", "<br />"); // new line
+ addTokenSubstitute("CM", "<!P><br />"); // paragraph <!P> is a non showing comment that can be changed in the front end to <P> if desired
+ addTokenSubstitute("CG", ""); // ???
+ addTokenSubstitute("CT", ""); // ???
+ addTokenSubstitute("JR", "<div align=\"right\">"); // right align begin
+ addTokenSubstitute("JC", "<div align=\"center\">"); // center align begin
+ addTokenSubstitute("JL", "</div>"); // align end
+
+}
+
+
+bool GBFHTMLHREF::handleToken(SWBuf &buf, const char *token, BasicFilterUserData *userData) {
+ const char *tok;
+ MyUserData *u = (MyUserData *)userData;
+
+ if (!substituteToken(buf, token)) {
+ XMLTag tag(token);
+ /*if (!strncmp(token, "w", 1)) {
+ // OSIS Word (temporary until OSISRTF is done)
+ valto = val;
+ num = strstr(token, "lemma=\"x-Strongs:");
+ if (num) {
+ for (num+=17; ((*num) && (*num != '\"')); num++)
+ *valto++ = *num;
+ *valto = 0;
+ if (atoi((!isdigit(*val))?val+1:val) < 5627) {
+ buf += " <small><em>&lt;<a href=\"type=Strongs value=";
+ for (tok = val; *tok; tok++)
+ buf += *tok;
+ buf += "\">";
+ for (tok = (!isdigit(*val))?val+1:val; *tok; tok++)
+ buf += *tok;
+ buf += "</a>&gt;</em></small> ";
+ //cout << buf;
+
+ }
+ // forget these for now
+ //else {
+ // verb morph
+ //sprintf(wordstr, "%03d", word-1);
+ //module->getEntryAttributes()["Word"][wordstr]["Morph"] = val;
+ //}
+ }
+ else {
+ num = strstr(token, "lemma=\"strong:");
+ if (num) {
+ for (num+=14; ((*num) && (*num != '\"')); num++)
+ *valto++ = *num;
+ *valto = 0;
+ if (atoi((!isdigit(*val))?val+1:val) < 5627) {
+ buf += " <small><em>&lt;<a href=\"type=Strongs value=";
+ for (tok = val; *tok; tok++)
+ buf += *tok;
+ buf += "\">";
+ for (tok = (!isdigit(*val))?val+1:val; *tok; tok++)
+ buf += *tok;
+ buf += "</a>&gt;</em></small> ";
+ //cout << buf;
+
+ }
+ // forget these for now
+ //else {
+ // verb morph
+ //sprintf(wordstr, "%03d", word-1);
+ //module->getEntryAttributes()["Word"][wordstr]["Morph"] = val;
+ //}
+ }
+ }
+ valto = val;
+ num = strstr(token, "morph=\"x-Robinson:");
+ if (num) {
+ for (num+=18; ((*num) && (*num != '\"')); num++)
+ *valto++ = *num;
+ *valto = 0;
+ buf += " <small><em>(<a href=\"type=morph class=Robinson value=";
+ for (tok = val; *tok; tok++)
+ // normal robinsons tense
+ buf += *tok;
+ buf += "\">";
+ for (tok = val; *tok; tok++)
+ //if(*tok != '\"')
+ buf += *tok;
+ buf += "</a>)</em></small> ";
+ }
+ }*/
+
+ // else
+ if (!strncmp(token, "WG", 2)) { // strong's numbers
+ //buf += " <small><em>&lt;<a href=\"type=Strongs value=";
+ buf += " <small><em>&lt;<a href=\"passagestudy.jsp?action=showStrongs&type=Greek&value=";
+ for (tok = token+2; *tok; tok++)
+ //if(token[i] != '\"')
+ buf += *tok;
+ buf += "\">";
+ for (tok = token + 2; *tok; tok++)
+ //if(token[i] != '\"')
+ buf += *tok;
+ buf += "</a>&gt;</em></small>";
+ }
+ else if (!strncmp(token, "WH", 2)) { // strong's numbers
+ //buf += " <small><em>&lt;<a href=\"type=Strongs value=";
+ buf += " <small><em>&lt;<a href=\"passagestudy.jsp?action=showStrongs&type=Hebrew&value=";
+ for (tok = token+2; *tok; tok++)
+ //if(token[i] != '\"')
+ buf += *tok;
+ buf += "\">";
+ for (tok = token + 2; *tok; tok++)
+ //if(token[i] != '\"')
+ buf += *tok;
+ buf += "</a>&gt;</em></small>";
+ }
+ else if (!strncmp(token, "WTG", 3)) { // strong's numbers tense
+ //buf += " <small><em>(<a href=\"type=Strongs value=";
+ buf += " <small><em>(<a href=\"passagestudy.jsp?action=showStrongs&type=Greek&value=";
+ for (tok = token + 3; *tok; tok++)
+ if(*tok != '\"')
+ buf += *tok;
+ buf += "\">";
+ for (tok = token + 3; *tok; tok++)
+ if(*tok != '\"')
+ buf += *tok;
+ buf += "</a>)</em></small>";
+ }
+ else if (!strncmp(token, "WTH", 3)) { // strong's numbers tense
+ //buf += " <small><em>(<a href=\"type=Strongs value=";
+ buf += " <small><em>(<a href=\"passagestudy.jsp?action=showStrongs&type=Hebrew&value=";
+ for (tok = token + 3; *tok; tok++)
+ if(*tok != '\"')
+ buf += *tok;
+ buf += "\">";
+ for (tok = token + 3; *tok; tok++)
+ if(*tok != '\"')
+ buf += *tok;
+ buf += "</a>)</em></small>";
+ }
+
+ else if (!strncmp(token, "WT", 2) && strncmp(token, "WTH", 3) && strncmp(token, "WTG", 3)) { // morph tags
+ //buf += " <small><em>(<a href=\"type=morph class=none value=";
+ buf += " <small><em>(<a href=\"passagestudy.jsp?action=showMorph&type=Greek&value=";
+
+ for (tok = token + 2; *tok; tok++)
+ if(*tok != '\"')
+ buf += *tok;
+ buf += "\">";
+ for (tok = token + 2; *tok; tok++)
+ if(*tok != '\"')
+ buf += *tok;
+ buf += "</a>)</em></small>";
+ }
+
+ else if (!strcmp(tag.getName(), "RX")) {
+ buf += "<a href=\"";
+ for (tok = token + 3; *tok; tok++) {
+ if(*tok != '<' && *tok+1 != 'R' && *tok+2 != 'x') {
+ buf += *tok;
+ }
+ else {
+ break;
+ }
+ }
+ buf += "\">";
+ }
+ else if (!strcmp(tag.getName(), "RF")) {
+ SWBuf type = tag.getAttribute("type");
+ SWBuf footnoteNumber = tag.getAttribute("swordFootnote");
+ VerseKey *vkey = NULL;
+ // see if we have a VerseKey * or descendant
+ SWTRY {
+ vkey = SWDYNAMIC_CAST(VerseKey, u->key);
+ }
+ SWCATCH ( ... ) { }
+ if (vkey) {
+ // leave this special osis type in for crossReference notes types? Might thml use this some day? Doesn't hurt.
+ //char ch = ((tag.getAttribute("type") && ((!strcmp(tag.getAttribute("type"), "crossReference")) || (!strcmp(tag.getAttribute("type"), "x-cross-ref")))) ? 'x':'n');
+ buf.appendFormatted("<a href=\"passagestudy.jsp?action=showNote&type=n&value=%s&module=%s&passage=%s\"><small><sup>*n</sup></small></a> ",
+ URL::encode(footnoteNumber.c_str()).c_str(),
+ URL::encode(u->version.c_str()).c_str(),
+ URL::encode(vkey->getText()).c_str());
+ }
+ u->suspendTextPassThru = true;
+ }
+ else if (!strcmp(tag.getName(), "Rf")) {
+ u->suspendTextPassThru = false;
+ }
+/*
+ else if (!strncmp(token, "RB", 2)) {
+ buf += "<i> ";
+ u->hasFootnotePreTag = true;
+ }
+
+ else if (!strncmp(token, "Rf", 2)) {
+ buf += "&nbsp<a href=\"note=";
+ buf += u->lastTextNode.c_str();
+ buf += "\">";
+ buf += "<small><sup>*n</sup></small></a>&nbsp";
+ // let's let text resume to output again
+ u->suspendTextPassThru = false;
+ }
+
+ else if (!strncmp(token, "RF", 2)) {
+ if (u->hasFootnotePreTag) {
+ u->hasFootnotePreTag = false;
+ buf += "</i> ";
+ }
+ u->suspendTextPassThru = true;
+ }
+*/
+ else if (!strncmp(token, "FN", 2)) {
+ buf += "<font face=\"";
+ for (tok = token + 2; *tok; tok++)
+ if(*tok != '\"')
+ buf += *tok;
+ buf += "\">";
+ }
+
+ else if (!strncmp(token, "CA", 2)) { // ASCII value
+ buf += (char)atoi(&token[2]);
+ }
+
+ else {
+ return false;
+ }
+ }
+ return true;
+}
+
+SWORD_NAMESPACE_END
diff --git a/src/modules/filters/gbfmorph.cpp b/src/modules/filters/gbfmorph.cpp
new file mode 100644
index 0000000..5226db7
--- /dev/null
+++ b/src/modules/filters/gbfmorph.cpp
@@ -0,0 +1,77 @@
+/******************************************************************************
+ *
+ * gbfmorph - SWFilter descendant to hide or show morph tags
+ * in a GBF module.
+ */
+
+
+#include <stdlib.h>
+#include <gbfmorph.h>
+
+SWORD_NAMESPACE_START
+
+const char oName[] = "Morphological Tags";
+const char oTip[] = "Toggles Morphological Tags On and Off if they exist";
+
+const SWBuf choices[3] = {"Off", "On", ""};
+const StringList oValues(&choices[0], &choices[2]);
+
+GBFMorph::GBFMorph() : SWOptionFilter(oName, oTip, &oValues) {
+ setOptionValue("Off");
+}
+
+
+GBFMorph::~GBFMorph() {
+}
+
+
+char GBFMorph::processText(SWBuf &text, const SWKey *key, const SWModule *module) {
+ if (!option) { // if we don't want morph tags
+ const char *from;
+ char token[2048]; // cheese. Fix.
+ int tokpos = 0;
+ bool intoken = false;
+ bool lastspace = false;
+
+ SWBuf orig = text;
+ from = orig.c_str();
+
+ for (text = ""; *from; from++) {
+ if (*from == '<') {
+ intoken = true;
+ tokpos = 0;
+ token[0] = 0;
+ token[1] = 0;
+ token[2] = 0;
+ continue;
+ }
+ if (*from == '>') { // process tokens
+ intoken = false;
+ if (*token == 'W' && token[1] == 'T') { // Morph
+ if ((from[1] == ' ') || (from[1] == ',') || (from[1] == ';') || (from[1] == '.') || (from[1] == '?') || (from[1] == '!') || (from[1] == ')') || (from[1] == '\'') || (from[1] == '\"')) {
+ if (lastspace)
+ text--;
+ }
+ continue;
+ }
+ // if not a morph tag token, keep token in text
+ text += '<';
+ text += token;
+ text += '>';
+ continue;
+ }
+ if (intoken) {
+ if (tokpos < 2045)
+ token[tokpos++] = *from;
+ token[tokpos+2] = 0;
+ }
+ else {
+ text += *from;
+ lastspace = (*from == ' ');
+ }
+ }
+ }
+ return 0;
+}
+
+SWORD_NAMESPACE_END
diff --git a/src/modules/filters/gbfosis.cpp b/src/modules/filters/gbfosis.cpp
new file mode 100644
index 0000000..00443f9
--- /dev/null
+++ b/src/modules/filters/gbfosis.cpp
@@ -0,0 +1,420 @@
+/******************************************************************************
+ *
+ * gbfstrongs - SWFilter descendant to hide or show strongs number
+ * in a GBF module.
+ */
+
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <stdarg.h>
+#include <gbfosis.h>
+#include <swmodule.h>
+#include <versekey.h>
+#include <swlog.h>
+#include <stdarg.h>
+
+SWORD_NAMESPACE_START
+
+GBFOSIS::GBFOSIS() {
+}
+
+
+GBFOSIS::~GBFOSIS() {
+}
+
+
+char GBFOSIS::processText(SWBuf &text, const SWKey *key, const SWModule *module) {
+ char token[2048]; //cheesy, we seem to like cheese :)
+ int tokpos = 0;
+ bool intoken = false;
+ bool keepToken = false;
+
+// static QuoteStack quoteStack;
+
+ SWBuf orig = text;
+ SWBuf tmp;
+ SWBuf value;
+
+ bool suspendTextPassThru = false;
+ bool handled = false;
+ bool newWord = false;
+ bool newText = false;
+ bool lastspace = false;
+
+ const char *wordStart = text.c_str();
+ const char *wordEnd = NULL;
+
+ const char *textStart = NULL;
+ const char *textEnd = NULL;
+
+ SWBuf textNode = "";
+
+ SWBuf buf;
+
+ text = "";
+ for (const char* from = orig.c_str(); *from; ++from) {
+ if (*from == '<') { //start of new token detected
+ intoken = true;
+ tokpos = 0;
+ token[0] = 0;
+ token[1] = 0;
+ token[2] = 0;
+ textEnd = from-1; //end of last text node found
+ wordEnd = text.c_str() + text.length();//not good, instead of wordEnd = to!
+
+ continue;
+ }
+
+ if (*from == '>') { // process tokens
+ intoken = false;
+ keepToken = false;
+ suspendTextPassThru = false;
+ newWord = true;
+ handled = false;
+
+ while (wordStart < (text.c_str() + text.length())) { //hack
+ if (strchr(";,. :?!()'\"", *wordStart) && wordStart[0] && wordStart[1])
+ wordStart++;
+ else break;
+ }
+ while (wordEnd > wordStart) {
+ if (strchr(" ,;:.?!()'\"", *wordEnd))
+ wordEnd--;
+ else break;
+ }
+
+ // Scripture Reference
+ if (!strncmp(token, "scripRef", 8)) {
+ suspendTextPassThru = true;
+ newText = true;
+ handled = true;
+ }
+ else if (!strncmp(token, "/scripRef", 9)) {
+ tmp = "";
+ tmp.append(textStart, (int)(textEnd - textStart)+1);
+ text += VerseKey::convertToOSIS(tmp.c_str(), key);
+
+ lastspace = false;
+ suspendTextPassThru = false;
+ handled = true;
+ }
+
+ // Footnote
+ if (!strcmp(token, "RF") || !strncmp(token, "RF ", 3)) { //the GBFFootnotes filter adds the attribute "swordFootnote", we want to catch that, too
+ // pushString(buf, "<reference work=\"Bible.KJV\" reference=\"");
+ text += "<note type=\"x-StudyNote\">";
+ newText = true;
+ lastspace = false;
+ handled = true;
+ }
+ else if (!strcmp(token, "Rf")) {
+ text += "</note>";
+ lastspace = false;
+ handled = true;
+ }
+ // hebrew titles
+ if (!strcmp(token, "TH")) {
+ text += "<title type=\"psalm\">";
+ newText = true;
+ lastspace = false;
+ handled = true;
+ }
+ else if (!strcmp(token, "Th")) {
+ text += "</title>";
+ lastspace = false;
+ handled = true;
+ }
+ // Italics assume transchange
+ if (!strcmp(token, "FI")) {
+ text += "<transChange type=\"added\">";
+ newText = true;
+ lastspace = false;
+ handled = true;
+ }
+ else if (!strcmp(token, "Fi")) {
+ text += "</transChange>";
+ lastspace = false;
+ handled = true;
+ }
+ // less than
+ if (!strcmp(token, "CT")) {
+ text += "&lt;";
+ newText = true;
+ lastspace = false;
+ handled = true;
+ }
+ // greater than
+ if (!strcmp(token, "CG")) {
+ text += "&gt;";
+ newText = true;
+ lastspace = false;
+ handled = true;
+ }
+ // Paragraph break. For now use empty paragraph element
+ if (!strcmp(token, "CM")) {
+ text += "<milestone type=\"x-p\" />";
+ newText = true;
+ lastspace = false;
+ handled = true;
+ }
+
+ // Figure
+ else if (!strncmp(token, "img ", 4)) {
+ const char *src = strstr(token, "src");
+ if (!src) // assert we have a src attribute
+ continue;
+// return false;
+
+ text += "<figure src=\"";
+ const char *c;
+ for (c = src;((*c) && (*c != '"')); c++);
+
+// uncomment for SWORD absolute path logic
+// if (*(c+1) == '/') {
+// pushString(buf, "file:");
+// pushString(buf, module->getConfigEntry("AbsoluteDataPath"));
+// if (*((*buf)-1) == '/')
+// c++; // skip '/'
+// }
+// end of uncomment for asolute path logic
+
+ for (c++;((*c) && (*c != '"')); c++) {
+ text += *c;
+ }
+ text += "\" />";
+
+ lastspace = false;
+ handled = true;
+ }
+
+ // Strongs numbers
+ else if (*token == 'W' && (token[1] == 'G' || token[1] == 'H')) { // Strongs
+ bool divineName = false;
+ value = token+1;
+
+ // normal strongs number
+ //strstrip(val);
+ if (!strncmp(wordStart, "<w ", 3)) {
+ const char *attStart = strstr(wordStart, "lemma");
+ if (attStart) {
+ attStart += 7;
+
+ buf = "";
+ buf.appendFormatted("strong:%s ", value.c_str());
+ }
+ else { // no lemma attribute
+ attStart = wordStart + 3;
+
+ buf = "";
+ buf.appendFormatted(buf, "lemma=\"strong:%s\" ", value.c_str());
+ }
+
+ text.insert(attStart - text.c_str(), buf);
+ }
+ else { //wordStart doesn't point to an existing <w> attribute!
+ if (!strcmp(value.c_str(), "H03068")) { //divineName
+ buf = "";
+ buf.appendFormatted("<divineName><w lemma=\"strong:%s\">", value.c_str());
+
+ divineName = true;
+ }
+ else {
+ buf = "";
+ buf.appendFormatted("<w lemma=\"strong:%s\">", value.c_str());
+ }
+
+ text.insert(wordStart - text.c_str(), buf);
+
+ if (divineName) {
+ wordStart += 12;
+ text += "</w></divineName>";
+ }
+ else text += "</w>";
+
+ lastspace = false;
+ }
+ handled = true;
+ }
+
+ // Morphology
+ else if (*token == 'W' && token[1] == 'T') {
+ if (token[2] == 'G' || token[2] == 'H') { // Strongs
+ value = token+2;
+ }
+ else value = token+1;
+
+ if (!strncmp(wordStart, "<w ", 3)) {
+ const char *attStart = strstr(wordStart, "morph");
+ if (attStart) { //existing morph attribute, append this one to it
+ attStart += 7;
+ buf = "";
+ buf.appendFormatted("%s:%s ", "robinson", value.c_str());
+ }
+ else { // no lemma attribute
+ attStart = wordStart + 3;
+ buf = "";
+ buf.appendFormatted("morph=\"%s:%s\" ", "robinson", value.c_str());
+ }
+
+ text.insert(attStart - text.c_str(), buf); //hack, we have to
+ }
+ else { //no existing <w> attribute fond
+ buf = "";
+ buf.appendFormatted("<w morph=\"%s:%s\">", "robinson", value.c_str());
+ text.insert(wordStart - text.c_str(), buf);
+ text += "</w>";
+ lastspace = false;
+
+ }
+ handled = true;
+ }
+
+ if (!keepToken) {
+ if (!handled) {
+ SWLog::getSystemLog()->logError("Unprocessed Token: <%s> in key %s", token, key ? (const char*)*key : "<unknown>");
+// exit(-1);
+ }
+ if (from[1] && strchr(" ,;.:?!()'\"", from[1])) {
+ if (lastspace) {
+ text--;
+ }
+ }
+ if (newText) {
+ textStart = from+1;
+ newText = false;
+ }
+ continue;
+ }
+
+ // if not a strongs token, keep token in text
+ text.appendFormatted("<%s>", token);
+
+ if (newText) {
+ textStart = text.c_str() + text.length();
+ newWord = false;
+ }
+ continue;
+ }
+ if (intoken) {
+ if ((tokpos < 2045) && ((*from != 10)&&(*from != 13))) {
+ token[tokpos++] = *from;
+ token[tokpos+2] = 0;
+ }
+ }
+ else {
+ switch (*from) {
+ case '\'':
+ case '\"':
+ case '`':
+// quoteStack.handleQuote(fromStart, from, &to);
+ text += *from;
+ //from++; //this line removes chars after an apostrophe! Needs fixing.
+ break;
+ default:
+ if (newWord && (*from != ' ')) {
+ wordStart = text.c_str() + text.length();
+ newWord = false;
+
+ //fix this if required?
+ //memset(to, 0, 10);
+
+ }
+
+ if (!suspendTextPassThru) {
+ text += (*from);
+ lastspace = (*from == ' ');
+ }
+ }
+ }
+ }
+
+ VerseKey *vkey = SWDYNAMIC_CAST(VerseKey, key);
+ if (vkey) {
+ SWBuf ref = "";
+ if (vkey->Verse()) {
+ ref.appendFormatted("\t\t<verse osisID=\"%s\">", vkey->getOSISRef());
+ }
+
+ if (ref.length() > 0) {
+
+ text = ref + text;
+
+ if (vkey->Verse()) {
+ VerseKey tmp;
+ tmp = *vkey;
+ tmp.AutoNormalize(0);
+ tmp.Headings(1);
+
+ text += "</verse>";
+
+ tmp = MAXVERSE;
+ if (*vkey == tmp) {
+ tmp.Verse(0);
+// sprintf(ref, "\t</div>");
+// pushString(&to, ref);
+ tmp = MAXCHAPTER;
+ tmp = MAXVERSE;
+ if (*vkey == tmp) {
+ tmp.Chapter(0);
+ tmp.Verse(0);
+// sprintf(ref, "\t</div>");
+// pushString(&to, ref);
+/*
+ if (!quoteStack.empty()) {
+ SWLog::getSystemLog()->logError("popping unclosed quote at end of book");
+ quoteStack.clear();
+ }
+*/
+ }
+ }
+ }
+// else if (vkey->Chapter()) {
+// sprintf(ref, "\t<div type=\"chapter\" osisID=\"%s\">", vkey->getOSISRef());
+// }
+// else sprintf(ref, "\t<div type=\"book\" osisID=\"%s\">", vkey->getOSISRef());
+ }
+ }
+ return 0;
+}
+
+
+QuoteStack::QuoteStack() {
+ clear();
+}
+
+
+void QuoteStack::clear() {
+ while (!quotes.empty()) quotes.pop();
+}
+
+
+QuoteStack::~QuoteStack() {
+ clear();
+}
+
+
+void QuoteStack::handleQuote(char *buf, char *quotePos, SWBuf &text) {
+//QuoteInstance(char startChar = '\"', char level = 1, string uniqueID = "", char continueCount = 0) {
+ if (!quotes.empty()) {
+ QuoteInstance last = quotes.top();
+ if (last.startChar == *quotePos) {
+ text += "</quote>";
+ quotes.pop();
+ }
+ else {
+ quotes.push(QuoteInstance(*quotePos, last.level+1));
+ quotes.top().pushStartStream(text);
+ }
+ }
+ else {
+ quotes.push(QuoteInstance(*quotePos));
+ quotes.top().pushStartStream(text);
+ }
+}
+
+void QuoteStack::QuoteInstance::pushStartStream(SWBuf &text) {
+ text.appendFormatted("<quote level=\"%d\">", level);
+}
+
+SWORD_NAMESPACE_END
diff --git a/src/modules/filters/gbfplain.cpp b/src/modules/filters/gbfplain.cpp
new file mode 100644
index 0000000..5657e20
--- /dev/null
+++ b/src/modules/filters/gbfplain.cpp
@@ -0,0 +1,97 @@
+/******************************************************************************
+ *
+ * gbfplain - SWFilter descendant to strip out all GBF tags or convert to
+ * ASCII rendered symbols.
+ */
+
+
+#include <stdlib.h>
+#include <gbfplain.h>
+#include <swbuf.h>
+
+SWORD_NAMESPACE_START
+
+GBFPlain::GBFPlain() {
+}
+
+
+char GBFPlain::processText (SWBuf &text, const SWKey *key, const SWModule *module)
+{
+ char token[2048];
+ int tokpos = 0;
+ bool intoken = false;
+ SWBuf orig = text;
+ const char* from = orig.c_str();
+
+ for (text = ""; *from; ++from) {
+ if (*from == '<') {
+ intoken = true;
+ tokpos = 0;
+ token[0] = 0;
+ token[1] = 0;
+ token[2] = 0;
+ continue;
+ }
+ if (*from == '>') {
+ intoken = false;
+ // process desired tokens
+ switch (*token) {
+ case 'W': // Strongs
+ switch(token[1]) {
+ case 'G': // Greek
+ case 'H': // Hebrew
+ case 'T': // Tense
+ text.append(" <");
+ //for (char *tok = token + 2; *tok; tok++)
+ // text += *tok;
+ text.append(token+2);
+ text.append("> ");
+ continue;
+ }
+ break;
+ case 'R':
+ switch(token[1]) {
+ case 'F': // footnote begin
+ text.append(" [");
+ continue;
+ case 'f': // footnote end
+ text.append("] ");
+ continue;
+ }
+ break;
+ case 'C':
+ switch(token[1]) {
+ case 'A': // ASCII value
+ text.append((char)atoi(&token[2]));
+ continue;
+ case 'G':
+ text.append('>');
+ continue;
+/* Bug in WEB
+ case 'L':
+ *to++ = '<';
+ continue;
+*/
+ case 'L': // Bug in WEB. Use above entry when fixed
+ case 'N': // new line
+ text.append('\n');
+ continue;
+ case 'M': // new paragraph
+ text.append("\n\n");
+ continue;
+ }
+ break;
+ }
+ continue;
+ }
+ if (intoken) {
+ if (tokpos < 2045)
+ token[tokpos++] = *from;
+ token[tokpos+2] = 0;
+ }
+ else text.append(*from);
+ }
+ return 0;
+}
+
+SWORD_NAMESPACE_END
diff --git a/src/modules/filters/gbfredletterwords.cpp b/src/modules/filters/gbfredletterwords.cpp
new file mode 100644
index 0000000..a79802d
--- /dev/null
+++ b/src/modules/filters/gbfredletterwords.cpp
@@ -0,0 +1,93 @@
+/******************************************************************************
+ *
+ * GBFRedLetterWords - SWFilter descendant to toggle red coloring of words of
+ * Christ in a GBF module.
+ */
+
+
+#include <stdlib.h>
+#include <gbfredletterwords.h>
+#include <swmodule.h>
+#include <ctype.h>
+
+SWORD_NAMESPACE_START
+
+const char oName[] = "Words of Christ in Red";
+const char oTip[] = "Toggles Red Coloring for Words of Christ On and Off if they are marked";
+
+const SWBuf choices[3] = {"Off", "On", ""};
+const StringList oValues(&choices[0], &choices[2]);
+
+GBFRedLetterWords::GBFRedLetterWords() : SWOptionFilter(oName, oTip, &oValues) {
+ setOptionValue("Off");
+}
+
+
+GBFRedLetterWords::~GBFRedLetterWords() {
+}
+
+
+char GBFRedLetterWords::processText(SWBuf &text, const SWKey *key, const SWModule *module) {
+/** This function removes the red letter words in Bible like the WEB
+* The words are marked by <FR> as start and <Fr> as end tag.
+*/
+ if (!option) { // if we don't want footnotes
+ char token[4096]; // cheese. Fix.
+ int tokpos = 0;
+ bool intoken = false;
+ bool hide = false;
+
+ const char *from;
+ SWBuf orig = text;
+ from = orig.c_str();
+ for (text = ""; *from; from++) {
+ if (*from == '<') {
+ intoken = true;
+ tokpos = 0;
+// memset(token, 0, 4096);
+ token[0] = 0;
+ token[1] = 0;
+ token[2] = 0;
+ continue;
+ }
+ if (*from == '>') { // process tokens
+ intoken = false;
+ /*switch (*token) {
+ case 'F': // Font attribute
+ switch(token[1]) {
+ case 'R': // Begin red letter words
+ hide = true;
+ break;
+ case 'r': // end red letter words
+ hide = false;
+ break;
+ }
+ continue; // skip token
+ }*/
+
+ //hide the token if either FR or Fr was detected
+ hide = (token[0] == 'F' && ( (token[1] == 'R') || (token[1] == 'r') ));
+
+ // if not a red letter word token, keep token in text
+ if (!hide) {
+ text += '<';
+ for (char *tok = token; *tok; tok++)
+ text += *tok;
+ text += '>';
+ }
+ continue;
+ }
+ if (intoken) {
+ if (tokpos < 4090)
+ token[tokpos++] = *from;
+ token[tokpos+2] = 0; // +2 cuz we init token with 2 extra '0' because of switch statement
+ }
+ else {
+ text += *from;
+ }
+ }
+ }
+ return 0;
+}
+
+SWORD_NAMESPACE_END
diff --git a/src/modules/filters/gbfrtf.cpp b/src/modules/filters/gbfrtf.cpp
new file mode 100644
index 0000000..eb39612
--- /dev/null
+++ b/src/modules/filters/gbfrtf.cpp
@@ -0,0 +1,311 @@
+/******************************************************************************
+ *
+ * gbfrtf - SWFilter descendant to convert all GBF tags to RTF tags
+ */
+
+
+#include <gbfrtf.h>
+#include <utilstr.h>
+#include <ctype.h>
+#include <swbuf.h>
+
+SWORD_NAMESPACE_START
+
+GBFRTF::GBFRTF() {
+}
+
+
+char GBFRTF::processText(SWBuf &text, const SWKey *key, const SWModule *module)
+{
+ char token[2048];
+ char val[128];
+ char *valto;
+ char *num;
+ int tokpos = 0;
+ bool intoken = false;
+ const char *tok;
+ SWBuf strongnum;
+ SWBuf strongtense;
+ bool hideText = false;
+ int wordLen = 0;
+ int wordCount = 0;
+
+ const char *from;
+ SWBuf orig = text;
+ from = orig.c_str();
+ for (text = ""; *from; from++) {
+ if (*from == '<') {
+ wordLen = wordCount;
+ wordCount = 0;
+ intoken = true;
+ tokpos = 0;
+ token[0] = 0;
+ token[1] = 0;
+ token[2] = 0;
+ continue;
+ }
+ if (*from == '>') {
+ intoken = false;
+ // process desired tokens
+ // deal with OSIS note tags. Just hide till OSISRTF
+ if (!strncmp(token, "note ", 5)) {
+ hideText = true;
+ }
+ if (!strncmp(token, "/note", 5)) {
+ hideText = false;
+ }
+
+ switch (*token) {
+ case 'w': // OSIS Word (temporary until OSISRTF is done)
+ strongnum = "";
+ strongtense = "";
+ valto = val;
+ num = strstr(token, "lemma=\"x-Strongs:");
+ if (num) {
+ for (num+=17; ((*num) && (*num != '\"')); num++)
+ *valto++ = *num;
+ *valto = 0;
+ if (atoi((!isdigit(*val))?val+1:val) < 5627) {
+ // normal strongs number
+ strongnum += "{\\cf3 \\sub <";
+ for (tok = (!isdigit(*val))?val+1:val; *tok; tok++)
+ strongnum += *tok;
+ strongnum += ">}";
+ }
+ /* forget these for now
+ else {
+ // verb morph
+ sprintf(wordstr, "%03d", word-1);
+ module->getEntryAttributes()["Word"][wordstr]["Morph"] = val;
+ }
+ */
+ }
+ else {
+ num = strstr(token, "lemma=\"strong:");
+ if (num) {
+ for (num+=14; ((*num) && (*num != '\"')); num++)
+ *valto++ = *num;
+ *valto = 0;
+ if (atoi((!isdigit(*val))?val+1:val) < 5627) {
+ // normal strongs number
+ strongnum += "{\\cf3 \\sub <";
+ for (tok = (!isdigit(*val))?val+1:val; *tok; tok++)
+ strongnum += *tok;
+ strongnum += ">}";
+ }
+ /* forget these for now
+ else {
+ // verb morph
+ sprintf(wordstr, "%03d", word-1);
+ module->getEntryAttributes()["Word"][wordstr]["Morph"] = val;
+ }
+ */
+ }
+ }
+ valto = val;
+ num = strstr(token, "morph=\"x-Robinson:");
+ if (num) {
+ for (num+=18; ((*num) && (*num != '\"')); num++)
+ *valto++ = *num;
+ *valto = 0;
+ // normal robinsons tense
+ strongtense += "{\\cf4 \\sub (";
+ for (tok = val; *tok; tok++)
+ strongtense += *tok;
+ strongtense += ")}";
+ }
+ continue;
+
+ case '/':
+ if (token[1] == 'w') {
+ if ((wordCount > 0) || (strongnum != "{\\cf3 \\sub <3588>}")) {
+ //for (i = 0; i < strongnum.length(); i++)
+ text += strongnum;
+ //for (i = 0; i < strongtense.length(); i++)
+ text += strongtense;
+ }
+ }
+ continue;
+
+ case 'W': // Strongs
+ switch(token[1]) {
+ case 'G': // Greek
+ case 'H': // Hebrew
+ text += "{\\cf3 \\sub <";
+ for (tok = token + 2; *tok; tok++)
+ text += *tok;
+ text += ">}";
+ continue;
+
+ case 'T': // Tense
+ text += "{\\cf4 \\sub (";
+ bool separate = false;
+ for (tok = token + 2; *tok; tok++) {
+ if (separate) {
+ text += "; ";
+ separate = false;
+ }
+ switch (*tok) {
+ case 'G':
+ case 'H':
+ for (tok++; *tok; tok++) {
+ if (isdigit(*tok)) {
+ text += *tok;
+ separate = true;
+ }
+ else {
+ tok--;
+ break;
+ }
+ }
+ break;
+ default:
+ for (; *tok; tok++) {
+ text += *tok;
+ }
+ }
+ }
+ text += ")}";
+ continue;
+ }
+ break;
+ case 'R':
+ switch(token[1]) {
+ case 'X':
+ text += "<a href=\"\">";
+ continue;
+ case 'x':
+ text += "</a>";
+ continue;
+ case 'F': // footnote begin
+ text += "{\\i1 \\sub [ ";
+ continue;
+ case 'f': // footnote end
+ text += " ] }";
+ continue;
+ }
+ break;
+ case 'F': // font tags
+ switch(token[1]) {
+ case 'I': // italic start
+ text += "\\i1 ";
+ continue;
+ case 'i': // italic end
+ text += "\\i0 ";
+ continue;
+ case 'B': // bold start
+ text += "\\b1 ";
+ continue;
+ case 'b': // bold end
+ text += "\\b0 ";
+ continue;
+ case 'N':
+ text += '{';
+ if (!strnicmp(token+2, "Symbol", 6))
+ text += "\\f7 ";
+ if (!strnicmp(token+2, "Courier", 7))
+ text += "\\f8 ";
+ continue;
+ case 'n':
+ text += '}';
+ continue;
+ case 'S':
+ text += "{\\super ";
+ continue;
+ case 's':
+ text += '}';
+ continue;
+ case 'R':
+ text += "{\\cf6 ";
+ continue;
+ case 'r':
+ text += '}';
+ continue;
+ case 'O':
+ case 'C':
+ text += "\\scaps1 ";
+ continue;
+ case 'o':
+ case 'c':
+ text += "\\scaps0 ";
+ continue;
+ case 'V':
+ text += "{\\sub ";
+ continue;
+ case 'v':
+ text += '}';
+ continue;
+ case 'U':
+ text += "\\ul1 ";
+ continue;
+ case 'u':
+ text += "\\ul0 ";
+ continue;
+ }
+ break;
+ case 'C': // special character tags
+ switch(token[1]) {
+ case 'A': // ASCII value
+ text += (char)atoi(&token[2]);
+ continue;
+ case 'G':
+ text += '>';
+ continue;
+ case 'L': // line break
+ text += "\\line ";
+ continue;
+ case 'M': // new paragraph
+ text += "\\par ";
+ continue;
+ case 'T':
+ text += '<';
+ }
+ break;
+ case 'T': // title formatting
+ switch(token[1])
+ {
+ case 'T': // Book title begin
+ text += "{\\large ";
+ continue;
+ case 't':
+ text += '}';
+ continue;
+ case 'S':
+ text += "\\par {\\i1\\b1 ";
+ continue;
+ case 's':
+ text += "}\\par ";
+ continue;
+ }
+ break;
+ case 'J': // Strongs
+ switch(token[1]) {
+ case 'L':
+ text += "\\ql ";
+ case 'C':
+ text += "\\qc ";
+ case 'R':
+ text += "\\qr ";
+ case 'F':
+ text += "\\qj ";
+ }
+ }
+ continue;
+ }
+ if (intoken) {
+ if (tokpos < 2045)
+ token[tokpos++] = *from;
+ token[tokpos+2] = 0;
+ }
+ else {
+ if (!hideText) {
+ wordCount++;
+ text += *from;
+ }
+ }
+ }
+ return 0;
+}
+
+SWORD_NAMESPACE_END
+
diff --git a/src/modules/filters/gbfstrongs.cpp b/src/modules/filters/gbfstrongs.cpp
new file mode 100644
index 0000000..610edb5
--- /dev/null
+++ b/src/modules/filters/gbfstrongs.cpp
@@ -0,0 +1,126 @@
+/******************************************************************************
+ *
+ * gbfstrongs - SWFilter descendant to hide or show strongs number
+ * in a GBF module.
+ */
+
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <gbfstrongs.h>
+#include <swmodule.h>
+#include <ctype.h>
+
+SWORD_NAMESPACE_START
+
+const char oName[] = "Strong's Numbers";
+const char oTip[] = "Toggles Strong's Numbers On and Off if they exist";
+
+const SWBuf choices[3] = {"Off", "On", ""};
+const StringList oValues(&choices[0], &choices[2]);
+
+GBFStrongs::GBFStrongs() : SWOptionFilter(oName, oTip, &oValues) {
+ setOptionValue("Off");
+}
+
+
+GBFStrongs::~GBFStrongs() {
+}
+
+
+char GBFStrongs::processText(SWBuf &text, const SWKey *key, const SWModule *module) {
+ char token[2048]; // cheese. Fix.
+ int tokpos = 0;
+ bool intoken = false;
+ bool lastspace = false;
+ int word = 1;
+ char val[128];
+ char wordstr[5];
+ char *valto;
+ unsigned int textStart = 0, textEnd = 0;
+ bool newText = false;
+ SWBuf tmp;
+ const char *from;
+
+ SWBuf orig = text;
+ from = orig.c_str();
+
+ for (text = ""; *from; from++) {
+ if (*from == '<') {
+ intoken = true;
+ tokpos = 0;
+ token[0] = 0;
+ token[1] = 0;
+ token[2] = 0;
+ textEnd = text.size();
+ continue;
+ }
+ if (*from == '>') { // process tokens
+ intoken = false;
+ if (*token == 'W' && (token[1] == 'G' || token[1] == 'H')) { // Strongs
+ if (module->isProcessEntryAttributes()) {
+ valto = val;
+ for (unsigned int i = 1; ((token[i]) && (i < 150)); i++)
+ *valto++ = token[i];
+ *valto = 0;
+ if (atoi((!isdigit(*val))?val+1:val) < 5627) {
+ // normal strongs number
+ sprintf(wordstr, "%03d", word++);
+ module->getEntryAttributes()["Word"][wordstr]["PartsCount"] = "1";
+ module->getEntryAttributes()["Word"][wordstr]["Lemma"] = val;
+ module->getEntryAttributes()["Word"][wordstr]["LemmaClass"] = "strong";
+ tmp = "";
+ tmp.append(text.c_str()+textStart, (int)(textEnd - textStart));
+ module->getEntryAttributes()["Word"][wordstr]["Text"] = tmp;
+ newText = true;
+ }
+ else {
+ // verb morph
+ sprintf(wordstr, "%03d", word-1);
+ module->getEntryAttributes()["Word"][wordstr]["Morph"] = val;
+ module->getEntryAttributes()["Word"][wordstr]["MorphClass"] = "OLBMorph";
+ }
+ }
+
+ if (!option) {
+ if ((from[1] == ' ') || (from[1] == ',') || (from[1] == ';') || (from[1] == '.') || (from[1] == '?') || (from[1] == '!') || (from[1] == ')') || (from[1] == '\'') || (from[1] == '\"')) {
+ if (lastspace)
+ text--;
+ }
+ if (newText) {textStart = text.size(); newText = false; }
+ continue;
+ }
+ }
+ if (module->isProcessEntryAttributes()) {
+ if ((*token == 'W') && (token[1] == 'T')) { // Morph
+ valto = val;
+ for (unsigned int i = 2; ((token[i]) && (i < 150)); i++)
+ *valto++ = token[i];
+ *valto = 0;
+ sprintf(wordstr, "%03d", word-1);
+ module->getEntryAttributes()["Word"][wordstr]["MorphClass"] = "GBFMorph";
+ module->getEntryAttributes()["Word"][wordstr]["Morph"] = val;
+ newText = true;
+ }
+ }
+ // if not a strongs token, keep token in text
+ text += '<';
+ text += token;
+ text += '>';
+ if (newText) {textStart = text.size(); newText = false; }
+ continue;
+ }
+ if (intoken) {
+ if (tokpos < 2045)
+ token[tokpos++] = *from;
+ token[tokpos+2] = 0;
+ }
+ else {
+ text += *from;
+ lastspace = (*from == ' ');
+ }
+ }
+ return 0;
+}
+
+SWORD_NAMESPACE_END
diff --git a/src/modules/filters/gbfthml.cpp b/src/modules/filters/gbfthml.cpp
new file mode 100644
index 0000000..2664f48
--- /dev/null
+++ b/src/modules/filters/gbfthml.cpp
@@ -0,0 +1,216 @@
+/***************************************************************************
+ gbfthml.cpp - GBF to ThML filter
+ -------------------
+ begin : 1999-10-27
+ copyright : 2001 by CrossWire Bible Society
+ ***************************************************************************/
+
+/***************************************************************************
+ * *
+ * This program is free software; you can redistribute it and/or modify *
+ * it under the terms of the GNU General Public License as published by *
+ * the Free Software Foundation; either version 2 of the License, or *
+ * (at your option) any later version. *
+ * *
+ ***************************************************************************/
+
+#include <stdlib.h>
+#include <gbfthml.h>
+#include <swbuf.h>
+
+SWORD_NAMESPACE_START
+
+GBFThML::GBFThML()
+{
+}
+
+
+char GBFThML::processText(SWBuf &text, const SWKey *key, const SWModule *module) {
+ const char *from;
+ char token[2048];
+ int tokpos = 0;
+ bool intoken = false;
+ const char *tok;
+
+ SWBuf orig = text;
+ from = orig.c_str();
+
+ for (text = ""; *from; from++) {
+ if (*from == '<') {
+ intoken = true;
+ tokpos = 0;
+ token[0] = 0;
+ token[1] = 0;
+ token[2] = 0;
+ continue;
+ }
+ if (*from == '>')
+ {
+ intoken = false;
+ // process desired tokens
+ switch (*token) {
+ case 'W': // Strongs
+ switch(token[1]) {
+ case 'G':
+ case 'H':
+ text += "<sync type=\"Strongs\" value=\"";
+ for (tok = token + 1; *tok; tok++)
+ text += *tok;
+ text += "\" />";
+ continue;
+
+ case 'T': // Tense
+ text += "<sync type=\"Morph\" value=\"";
+ for (tok = token + 2; *tok; tok++)
+ text += *tok;
+ text += "\" />";
+ continue;
+ }
+ break;
+ case 'R':
+ switch(token[1])
+ {
+ case 'X':
+ text += "<a href=\"";
+ for (tok = token + 3; *tok; tok++) {
+ if(*tok != '<' && *tok+1 != 'R' && *tok+2 != 'x') {
+ text += *tok;
+ }
+ else {
+ break;
+ }
+ }
+ text += "\">";
+ continue;
+ case 'x':
+ text += "</a>";
+ continue;
+ case 'F': // footnote begin
+ text += "<note>";
+ continue;
+ case 'f': // footnote end
+ text += "</note>";
+ continue;
+ }
+ break;
+ case 'F': // font tags
+ switch(token[1])
+ {
+ case 'N':
+ text += "<font face=\"";
+ for (tok = token + 2; *tok; tok++)
+ text += *tok;
+ text += "\">";
+ continue;
+ case 'n':
+ text += "</font>";
+ continue;
+ case 'I': // italic start
+ text += "<i>";
+ continue;
+ case 'i': // italic end
+ text += "</i>";
+ continue;
+ case 'B': // bold start
+ text += "<b>";
+ continue;
+ case 'b': // bold end
+ text += "</b>";
+ continue;
+
+ case 'R': // words of Jesus begin
+ text += "<font color=\"#ff0000\">";
+ continue;
+ case 'r': // words of Jesus end
+ text += "</font>";
+ continue;
+ case 'U': // Underline start
+ text += "<u>";
+ continue;
+ case 'u': // Underline end
+ text += "</u>";
+ continue;
+ case 'O': // Old Testament quote begin
+ text += "<cite>";
+ continue;
+ case 'o': // Old Testament quote end
+ text += "</cite>";
+ continue;
+ case 'S': // Superscript begin
+ text += "<sup>";
+ continue;
+ case 's': // Superscript end
+ text += "</sup>";
+ continue;
+ case 'V': // Subscript begin
+ text += "<sub>";
+ continue;
+ case 'v': // Subscript end
+ text += "</sub>";
+ continue;
+ }
+ break;
+ case 'C': // special character tags
+ switch(token[1])
+ {
+ case 'A': // ASCII value
+ text += (char)atoi(&token[2]);
+ continue;
+ case 'G':
+ //*to++ = ' ';
+ continue;
+ case 'L': // line break
+ text += "<br /> ";
+ continue;
+ case 'M': // new paragraph
+ text += "<p />";
+ continue;
+ case 'T':
+ //*to++ = ' ';
+ continue;
+ }
+ break;
+ case 'T': // title formatting
+ switch(token[1])
+ {
+ case 'T': // Book title begin
+ text += "<big>";
+ continue;
+ case 't':
+ text += "</big>";
+ continue;
+ case 'S':
+ text += "<div class=\"sechead\">";
+ continue;
+ case 's':
+ text += "</div>";
+ continue;
+ }
+ break;
+
+ case 'P': // special formatting
+ switch(token[1]) {
+ case 'P': // Poetry begin
+ text += "<verse>";
+ continue;
+ case 'p':
+ text += "</verse>";
+ continue;
+ }
+ break;
+ }
+ continue;
+ }
+ if (intoken) {
+ if (tokpos < 2045)
+ token[tokpos++] = *from;
+ token[tokpos+2] = 0;
+ }
+ else text += *from;
+ }
+ return 0;
+}
+
+
+
+SWORD_NAMESPACE_END
diff --git a/src/modules/filters/gbfwebif.cpp b/src/modules/filters/gbfwebif.cpp
new file mode 100644
index 0000000..e651db6
--- /dev/null
+++ b/src/modules/filters/gbfwebif.cpp
@@ -0,0 +1,191 @@
+/***************************************************************************
+ GBFWEBIF.cpp - GBF to HTML filter with hrefs
+ for strongs and morph tags
+ -------------------
+ begin : 2001-09-03
+ copyright : 2001 by CrossWire Bible Society
+ ***************************************************************************/
+
+/***************************************************************************
+ * *
+ * This program is free software; you can redistribute it and/or modify *
+ * it under the terms of the GNU General Public License as published by *
+ * the Free Software Foundation; either version 2 of the License, or *
+ * (at your option) any later version. *
+ * *
+ ***************************************************************************/
+
+#include <gbfwebif.h>
+#include <ctype.h>
+#include <url.h>
+
+SWORD_NAMESPACE_START
+
+GBFWEBIF::GBFWEBIF() : baseURL(""), passageStudyURL(baseURL + "passagestudy.jsp") {
+//all is done in GBFHTMLHREF since it inherits form this class
+ addTokenSubstitute("FR", "<span class=\"wordsOfJesus\">"); // words of Jesus begin
+ addTokenSubstitute("Fr", "</span>");
+}
+
+bool GBFWEBIF::handleToken(SWBuf &buf, const char *token, BasicFilterUserData *userData) {
+ const char *tok;
+ char val[128];
+ char *valto;
+ const char *num;
+ SWBuf url;
+
+ if (!substituteToken(buf, token)) {
+ if (!strncmp(token, "w", 1)) {
+ // OSIS Word (temporary until OSISRTF is done)
+ valto = val;
+ num = strstr(token, "lemma=\"x-Strongs:");
+ if (num) {
+ for (num+=17; ((*num) && (*num != '\"')); num++)
+ *valto++ = *num;
+ *valto = 0;
+
+ if (atoi((!isdigit(*val))?val+1:val) < 5627) {
+ buf += " <small><em>&lt;";
+ url = "";
+ for (tok = val; *tok; tok++) {
+ url += *tok;
+ }
+ if ((url.length() > 1) && strchr("GH", url[0])) {
+ if (isdigit(url[1]))
+ url = url.c_str()+1;
+ }
+ buf.appendFormatted("<a href=\"%s?showStrong=%s#cv\">", passageStudyURL.c_str(), URL::encode(url).c_str());
+
+ for (tok = (!isdigit(*val))?val+1:val; *tok; tok++) {
+ buf += *tok;
+ }
+ buf += "</a>&gt;</em></small> ";
+ }
+ }
+ else {
+ num = strstr(token, "lemma=\"strong:");
+ if (num) {
+ for (num+=14; ((*num) && (*num != '\"')); num++)
+ *valto++ = *num;
+ *valto = 0;
+
+ if (atoi((!isdigit(*val))?val+1:val) < 5627) {
+ buf += " <small><em>&lt;";
+ url = "";
+ for (tok = val; *tok; tok++) {
+ url += *tok;
+ }
+ if ((url.length() > 1) && strchr("GH", url[0])) {
+ if (isdigit(url[1]))
+ url = url.c_str()+1;
+ }
+ buf.appendFormatted("<a href=\"%s?showStrong=%s#cv\">", passageStudyURL.c_str(), URL::encode(url).c_str());
+
+ for (tok = (!isdigit(*val))?val+1:val; *tok; tok++) {
+ buf += *tok;
+ }
+ buf += "</a>&gt;</em></small> ";
+ }
+ }
+ }
+ valto = val;
+ num = strstr(token, "morph=\"x-Robinson:");
+ if (num) {
+ for (num+=18; ((*num) && (*num != '\"')); num++)
+ *valto++ = *num;
+ *valto = 0;
+ buf += " <small><em>(";
+ url = "";
+ for (tok = val; *tok; tok++) {
+ // normal robinsons tense
+ buf += *tok;
+ }
+ buf.appendFormatted("<a href=\"%s?showMorph=%s#cv\">", passageStudyURL.c_str(), URL::encode(url).c_str());
+
+ for (tok = val; *tok; tok++) {
+ buf += *tok;
+ }
+ buf += "</a>)</em></small> ";
+ }
+ }
+
+ else if (!strncmp(token, "WG", 2) || !strncmp(token, "WH", 2)) { // strong's numbers
+ buf += " <small><em>&lt;";
+ url = "";
+
+ for (tok = token+1; *tok; tok++) {
+ url += *tok;
+ }
+ if ((url.length() > 1) && strchr("GH", url[0])) {
+ if (isdigit(url[1]))
+ url = url.c_str()+1;
+ }
+ buf.appendFormatted("<a href=\"%s?showStrong=%s#cv\">", passageStudyURL.c_str(), URL::encode(url).c_str());
+
+ for (tok = token + 2; *tok; tok++) {
+ buf += *tok;
+ }
+ buf += "</a>&gt;</em></small>";
+ }
+
+ else if (!strncmp(token, "WTG", 3) || !strncmp(token, "WTH", 3)) { // strong's numbers tense
+ buf += " <small><em>(";
+ url = "";
+ for (tok = token + 2; *tok; tok++) {
+ if(*tok != '\"')
+ url += *tok;
+ }
+ if ((url.length() > 1) && strchr("GH", url[0])) {
+ if (isdigit(url[1]))
+ url = url.c_str()+1;
+ }
+ buf.appendFormatted("<a href=\"%s?showStrong=%s#cv\">", passageStudyURL.c_str(), URL::encode(url).c_str());
+
+ for (tok = token + 3; *tok; tok++)
+ if(*tok != '\"')
+ buf += *tok;
+ buf += "</a>)</em></small>";
+ }
+
+ else if (!strncmp(token, "WT", 2) && strncmp(token, "WTH", 3) && strncmp(token, "WTG", 3)) { // morph tags
+ buf += " <small><em>(";
+ for (tok = token + 2; *tok; tok++) {
+ if(*tok != '\"')
+ buf += *tok;
+ }
+ buf.appendFormatted("<a href=\"%s?showMorph=%s#cv\">", passageStudyURL.c_str(), URL::encode(url).c_str());
+
+ for (tok = token + 2; *tok; tok++) {
+ if(*tok != '\"')
+ buf += *tok;
+ }
+ buf += "</a>)</em></small>";
+ }
+
+ else if (!strncmp(token, "RX", 2)) {
+ buf += "<a href=\"";
+ for (tok = token + 3; *tok; tok++) {
+ if(*tok != '<' && *tok+1 != 'R' && *tok+2 != 'x') {
+ buf += *tok;
+ }
+ else {
+ break;
+ }
+ }
+
+ buf.appendFormatted("a href=\"%s?key=%s#cv\">", passageStudyURL.c_str(), URL::encode(url).c_str());
+ }
+ // ok to leave these in
+ else if ((!strncmp(token, "span", 4))
+ || (!strncmp(token, "/span", 5))) {
+ buf.appendFormatted("<%s>", token);
+ }
+
+ else {
+ return GBFHTMLHREF::handleToken(buf, token, userData);
+ }
+ }
+ return true;
+}
+
+SWORD_NAMESPACE_END
diff --git a/src/modules/filters/gbfwordjs.cpp b/src/modules/filters/gbfwordjs.cpp
new file mode 100644
index 0000000..f81ffac
--- /dev/null
+++ b/src/modules/filters/gbfwordjs.cpp
@@ -0,0 +1,282 @@
+/******************************************************************************
+ *
+ * gbfstrongs - SWFilter descendant to hide or show strongs number
+ * in a GBF module.
+ */
+
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <gbfwordjs.h>
+#include <swmodule.h>
+#include <ctype.h>
+#include <utilstr.h>
+#include <versekey.h>
+
+SWORD_NAMESPACE_START
+
+const char oName[] = "Word Javascript";
+const char oTip[] = "Toggles Word Javascript data";
+
+const SWBuf choices[3] = {"Off", "On", ""};
+const StringList oValues(&choices[0], &choices[2]);
+
+
+GBFWordJS::GBFWordJS() : SWOptionFilter(oName, oTip, &oValues) {
+ setOptionValue("Off");
+
+ defaultGreekLex = 0;
+ defaultHebLex = 0;
+ defaultGreekParse = 0;
+ defaultHebParse = 0;
+ mgr = 0;
+}
+
+
+GBFWordJS::~GBFWordJS() {
+}
+
+
+char GBFWordJS::processText(SWBuf &text, const SWKey *key, const SWModule *module) {
+ if (option) {
+ char token[2112]; // cheese. Fix.
+ int tokpos = 0;
+ bool intoken = false;
+ bool lastspace = false;
+ int word = 1;
+ char val[128];
+ char wordstr[5];
+ unsigned int textStart = 0, lastAppendLen = 0, textEnd = 0;
+ SWBuf tmp;
+ bool newText = false;
+ bool needWordOut = false;
+ AttributeValue *wordAttrs = 0;
+ SWBuf modName = (module)?module->Name():"";
+ SWBuf wordSrcPrefix = modName;
+
+ const SWBuf orig = text;
+ const char * from = orig.c_str();
+ VerseKey *vkey = 0;
+ if (key) {
+ vkey = SWDYNAMIC_CAST(VerseKey, key);
+ }
+
+ for (text = ""; *from; from++) {
+ if (*from == '<') {
+ intoken = true;
+ tokpos = 0;
+ token[0] = 0;
+ token[1] = 0;
+ token[2] = 0;
+ textEnd = text.length();
+ continue;
+ }
+ if (*from == '>') { // process tokens
+ intoken = false;
+ if (*token == 'W' && (token[1] == 'G' || token[1] == 'H')) { // Strongs
+ strcpy(val,token+1);
+ if (atoi((!isdigit(*val))?val+1:val) < 5627) {
+ // normal strongs number
+ sprintf(wordstr, "%03d", word++);
+ needWordOut = (word > 2);
+ wordAttrs = &(module->getEntryAttributes()["Word"][wordstr]);
+ (*wordAttrs)["Lemma"] = val;
+ //printf("Adding: [\"Word\"][%s][\"Strongs\"] = %s\n", wordstr, val);
+ tmp = "";
+ tmp.append(text.c_str()+textStart, (int)(textEnd - textStart));
+ (*wordAttrs)["Text"] = tmp;
+ text.append("</span>");
+ SWBuf ts;
+ ts.appendFormatted("%d", textStart);
+ (*wordAttrs)["TextStart"] = ts;
+ //printf("Adding: [\"Word\"][%s][\"Text\"] = %s\n", wordstr, tmp.c_str());
+ newText = true;
+ }
+ else {
+ // verb morph
+ if (wordAttrs) {
+ (*wordAttrs)["Morph"] = val;
+ }
+ //printf("Adding: [\"Word\"][%s][\"Morph\"] = %s\n", wordstr, val);
+ }
+
+ }
+ if (*token == 'W' && token[1] == 'T') { // Morph
+ if (token[2] == 'G' || token[2] == 'H') {
+ strcpy(val, token+2);
+ }
+ else strcpy(val, token+1);
+ if (wordAttrs) {
+ (*wordAttrs)["Morph"] = val;
+ (*wordAttrs)["MorphClass"] = "StrongsMorph";
+ }
+ newText = true;
+ }
+ // if not a strongs token, keep token in text
+ text += '<';
+ text += token;
+ text += '>';
+ if (needWordOut) {
+ char wstr[10];
+ sprintf(wstr, "%03d", word-2);
+ AttributeValue *wAttrs = &(module->getEntryAttributes()["Word"][wstr]);
+ needWordOut = false;
+ SWBuf strong = (*wAttrs)["Lemma"];
+ SWBuf morph = (*wAttrs)["Morph"];
+ SWBuf morphClass = (*wAttrs)["MorphClass"];
+ SWBuf wordText = (*wAttrs)["Text"];
+ SWBuf textSt = (*wAttrs)["TextStart"];
+ if (strong.size()) {
+ char gh = 0;
+ gh = isdigit(strong[0]) ? 0:strong[0];
+ if (!gh) {
+ if (vkey) {
+ gh = vkey->Testament() ? 'H' : 'G';
+ }
+ }
+ else strong << 1;
+
+ SWModule *sLex = 0;
+ SWModule *sMorph = 0;
+ if (gh == 'G') {
+ sLex = defaultGreekLex;
+ sMorph = defaultGreekParse;
+ }
+ if (gh == 'H') {
+ sLex = defaultHebLex;
+ sMorph = defaultHebParse;
+ }
+ SWBuf lexName = "";
+ if (sLex) {
+ // we can pass the real lex name in, but we have some
+ // aliases in the javascript to optimize bandwidth
+ lexName = sLex->Name();
+ if (lexName == "StrongsGreek")
+ lexName = "G";
+ if (lexName == "StrongsHebrew")
+ lexName = "H";
+ }
+ SWBuf wordID;
+ if (vkey) {
+ // optimize for bandwidth and use only the verse as the unique entry id
+ wordID.appendFormatted("%d", vkey->Verse());
+ }
+ else {
+ wordID = key->getText();
+ }
+ for (unsigned int i = 0; i < wordID.size(); i++) {
+ if ((!isdigit(wordID[i])) && (!isalpha(wordID[i]))) {
+ wordID[i] = '_';
+ }
+ }
+ wordID.appendFormatted("_%s%d", wordSrcPrefix.c_str(), atoi(wstr));
+ if (textSt.size()) {
+ int textStr = atoi(textSt.c_str());
+ textStr += lastAppendLen;
+ SWBuf spanStart = "";
+
+
+
+/*
+ if (sMorph) {
+ SWBuf popMorph = "<a onclick=\"";
+ popMorph.appendFormatted("p(\'%s\',\'%s\','%s','');\" >%s</a>", sMorph->Name(), morph.c_str(), wordID.c_str(), morph.c_str());
+ morph = popMorph;
+ }
+*/
+
+ // 'p' = 'fillpop' to save bandwidth
+ const char *m = strchr(morph.c_str(), ':');
+ if (m) m++;
+ else m = morph.c_str();
+ spanStart.appendFormatted("<span class=\"clk\" onclick=\"p('%s','%s','%s','%s','','%s');\" >", lexName.c_str(), strong.c_str(), wordID.c_str(), m, modName.c_str());
+ text.insert(textStr, spanStart);
+ lastAppendLen = spanStart.length();
+ }
+ }
+
+ }
+ if (newText) {
+ textStart = text.length(); newText = false;
+ }
+ continue;
+ }
+ if (intoken) {
+ if (tokpos < 2045)
+ token[tokpos++] = *from;
+ token[tokpos+2] = 0;
+ }
+ else {
+ text += *from;
+ lastspace = (*from == ' ');
+ }
+ }
+
+ char wstr[10];
+ sprintf(wstr, "%03d", word-1);
+ AttributeValue *wAttrs = &(module->getEntryAttributes()["Word"][wstr]);
+ needWordOut = false;
+ SWBuf strong = (*wAttrs)["Lemma"];
+ SWBuf morph = (*wAttrs)["Morph"];
+ SWBuf morphClass = (*wAttrs)["MorphClass"];
+ SWBuf wordText = (*wAttrs)["Text"];
+ SWBuf textSt = (*wAttrs)["TextStart"];
+ if (strong.size()) {
+ char gh = 0;
+ gh = isdigit(strong[0]) ? 0:strong[0];
+ if (!gh) {
+ if (vkey) {
+ gh = vkey->Testament() ? 'H' : 'G';
+ }
+ }
+ else strong << 1;
+
+ SWModule *sLex = 0;
+ if (gh == 'G') {
+ sLex = defaultGreekLex;
+ }
+ if (gh == 'H') {
+ sLex = defaultHebLex;
+ }
+ SWBuf lexName = "";
+ if (sLex) {
+ // we can pass the real lex name in, but we have some
+ // aliases in the javascript to optimize bandwidth
+ lexName = sLex->Name();
+ if (lexName == "StrongsGreek")
+ lexName = "G";
+ if (lexName == "StrongsHebrew")
+ lexName = "H";
+ }
+ SWBuf wordID;
+ if (vkey) {
+ // optimize for bandwidth and use only the verse as the unique entry id
+ wordID.appendFormatted("%d", vkey->Verse());
+ }
+ else {
+ wordID = key->getText();
+ }
+ for (unsigned int i = 0; i < wordID.size(); i++) {
+ if ((!isdigit(wordID[i])) && (!isalpha(wordID[i]))) {
+ wordID[i] = '_';
+ }
+ }
+ wordID.appendFormatted("_%s%d", wordSrcPrefix.c_str(), atoi(wstr));
+ if (textSt.size()) {
+ int textStr = atoi(textSt.c_str());
+ textStr += lastAppendLen;
+ SWBuf spanStart = "";
+ // 'p' = 'fillpop' to save bandwidth
+ const char *m = strchr(morph.c_str(), ':');
+ if (m) m++;
+ else m = morph.c_str();
+ spanStart.appendFormatted("<span class=\"clk\" onclick=\"p('%s','%s','%s','%s','','%s');\" >", lexName.c_str(), strong.c_str(), wordID.c_str(), m, modName.c_str());
+ text.insert(textStr, spanStart);
+ }
+ }
+ }
+
+ return 0;
+}
+
+SWORD_NAMESPACE_END
diff --git a/src/modules/filters/greeklexattribs.cpp b/src/modules/filters/greeklexattribs.cpp
new file mode 100644
index 0000000..1e82305
--- /dev/null
+++ b/src/modules/filters/greeklexattribs.cpp
@@ -0,0 +1,101 @@
+/******************************************************************************
+ *
+ * greeklexattribs - SWFilter descendant to set entry attributes for greek
+ * lexicons
+ */
+
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <ctype.h>
+#include <string>
+#include <greeklexattribs.h>
+#include <swmodule.h>
+
+using std::string;
+
+SWORD_NAMESPACE_START
+
+GreekLexAttribs::GreekLexAttribs() {
+}
+
+
+char GreekLexAttribs::processText(SWBuf &text, const SWKey *key, const SWModule *module) {
+
+ if (module->isProcessEntryAttributes()) {
+ const char *from;
+ bool inAV = false;
+ string phrase;
+ string freq;
+ char val[128], *valto;
+ char wordstr[7];
+ const char *currentPhrase = 0;
+ const char *currentPhraseEnd = 0;
+ int number = 0;
+
+
+ for (from = text.c_str(); *from; from++) {
+ if (inAV) {
+ if (currentPhrase == 0) {
+ if (isalpha(*from))
+ currentPhrase = from;
+ }
+ else {
+ if ((!isalpha(*from)) && (*from != ' ') && (*from != '+') && (*from !='(') && (*from != ')') && (*from != '\'')) {
+ if (*from == '<') {
+ if (!currentPhraseEnd)
+ currentPhraseEnd = from - 1;
+ for (; *from && *from != '>'; from++) {
+ if (!strncmp(from, "value=\"", 7)) {
+ valto = val;
+ from += 7;
+ for (unsigned int i = 0; from[i] != '\"' && i < 127; i++)
+ *valto++ = from[i];
+ *valto = 0;
+ sprintf(wordstr, "%03d", number+1);
+ module->getEntryAttributes()["AVPhrase"][wordstr]["CompoundedWith"] = val;
+ from += strlen(val);
+ }
+ }
+ continue;
+ }
+
+ phrase = "";
+ phrase.append(currentPhrase, (int)(((currentPhraseEnd>currentPhrase)?currentPhraseEnd:from) - currentPhrase)-1);
+ currentPhrase = from;
+ while (*from && isdigit(*from)) from++;
+ freq = "";
+ freq.append(currentPhrase, (int)(from - currentPhrase));
+ if ((freq.length() > 0) && (phrase.length() > 0)) {
+ sprintf(wordstr, "%03d", ++number);
+ if ((strchr(phrase.c_str(), '(') > phrase.c_str()) && (strchr(phrase.c_str(), ')') > phrase.c_str() + 1)) {
+ string tmp = phrase.substr(0, phrase.find_first_of("("));
+ phrase.erase(phrase.find_first_of("("), 1);
+ phrase.erase(phrase.find_first_of(")"), 1);
+ phrase.erase(0,phrase.find_first_not_of("\r\n\v\t ")); phrase.erase(phrase.find_last_not_of("\r\n\v\t ")+1);
+ module->getEntryAttributes()["AVPhrase"][wordstr]["Alt"] = phrase.c_str();
+ phrase = tmp;
+ }
+ phrase.erase(0,phrase.find_first_not_of("\r\n\v\t ")); phrase.erase(phrase.find_last_not_of("\r\n\v\t ")+1);
+ freq.erase(0,freq.find_first_not_of("\r\n\v\t ")); freq.erase(freq.find_last_not_of("\r\n\v\t ")+1);
+ module->getEntryAttributes()["AVPhrase"][wordstr]["Phrase"] = phrase.c_str();
+ module->getEntryAttributes()["AVPhrase"][wordstr]["Frequency"] = freq.c_str();
+ currentPhrase = 0;
+ currentPhraseEnd = 0;
+ }
+ }
+ }
+ if (*from == ';') inAV = false;
+
+ }
+ else if (!strncmp(from, "AV-", 3)) {
+ inAV = true;
+ from+=2;
+ }
+ }
+ }
+ return 0;
+}
+
+
+SWORD_NAMESPACE_END
diff --git a/src/modules/filters/latin1utf16.cpp b/src/modules/filters/latin1utf16.cpp
new file mode 100644
index 0000000..1392750
--- /dev/null
+++ b/src/modules/filters/latin1utf16.cpp
@@ -0,0 +1,119 @@
+/******************************************************************************
+ *
+ * Latin1UTF16 - SWFilter descendant to convert a Latin-1 character to UTF-16
+ *
+ */
+
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <latin1utf16.h>
+#include <swbuf.h>
+
+SWORD_NAMESPACE_START
+
+Latin1UTF16::Latin1UTF16() {
+}
+
+
+char Latin1UTF16::processText(SWBuf &text, const SWKey *key, const SWModule *module) {
+ const unsigned char *from;
+ if ((unsigned long)key < 2) // hack, we're en(1)/de(0)ciphering
+ return (char)-1;
+
+
+ SWBuf orig = text;
+ from = (const unsigned char *)orig.c_str();
+
+ for (text = ""; *from; from++) {
+ text.setSize(text.size()+2);
+ switch (*from) {
+ case 0x80: // '€'
+ *((unsigned short *)(text.getRawData()+(text.size()-2))) = (unsigned short) 0x20AC;
+ break;
+ case 0x82: // '‚'
+ *((unsigned short *)(text.getRawData()+(text.size()-2))) = (unsigned short) 0x201A;
+ break;
+ case 0x83: // 'ƒ'
+ *((unsigned short *)(text.getRawData()+(text.size()-2))) = (unsigned short) 0x0192;
+ break;
+ case 0x84: // '„'
+ *((unsigned short *)(text.getRawData()+(text.size()-2))) = (unsigned short) 0x201E;
+ break;
+ case 0x85: // '…'
+ *((unsigned short *)(text.getRawData()+(text.size()-2))) = (unsigned short) 0x2026;
+ break;
+ case 0x86: // '†'
+ *((unsigned short *)(text.getRawData()+(text.size()-2))) = (unsigned short) 0x2020;
+ break;
+ case 0x87: // '‡'
+ *((unsigned short *)(text.getRawData()+(text.size()-2))) = (unsigned short) 0x2021;
+ break;
+ case 0x88: // 'ˆ'
+ *((unsigned short *)(text.getRawData()+(text.size()-2))) = (unsigned short) 0x02C6;
+ break;
+ case 0x89: // '‰'
+ *((unsigned short *)(text.getRawData()+(text.size()-2))) = (unsigned short) 0x2030;
+ break;
+ case 0x8A: // 'Š'
+ *((unsigned short *)(text.getRawData()+(text.size()-2))) = (unsigned short) 0x0160;
+ break;
+ case 0x8B: // '‹'
+ *((unsigned short *)(text.getRawData()+(text.size()-2))) = (unsigned short) 0x2039;
+ break;
+ case 0x8C: // 'Œ'
+ *((unsigned short *)(text.getRawData()+(text.size()-2))) = (unsigned short) 0x0152;
+ break;
+ case 0x8E: // 'Ž'
+ *((unsigned short *)(text.getRawData()+(text.size()-2))) = (unsigned short) 0x017D;
+ break;
+ case 0x91: // '‘'
+ *((unsigned short *)(text.getRawData()+(text.size()-2))) = (unsigned short) 0x2018;
+ break;
+ case 0x92: // '’'
+ *((unsigned short *)(text.getRawData()+(text.size()-2))) = (unsigned short) 0x2019;
+ break;
+ case 0x93: // '“'
+ *((unsigned short *)(text.getRawData()+(text.size()-2))) = (unsigned short) 0x201C;
+ break;
+ case 0x94: // '”'
+ *((unsigned short *)(text.getRawData()+(text.size()-2))) = (unsigned short) 0x201D;
+ break;
+ case 0x95: // '•'
+ *((unsigned short *)(text.getRawData()+(text.size()-2))) = (unsigned short) 0x2022;
+ break;
+ case 0x96: // '–'
+ *((unsigned short *)(text.getRawData()+(text.size()-2))) = (unsigned short) 0x2013;
+ break;
+ case 0x97: // '—'
+ *((unsigned short *)(text.getRawData()+(text.size()-2))) = (unsigned short) 0x2014;
+ break;
+ case 0x98: // '˜'
+ *((unsigned short *)(text.getRawData()+(text.size()-2))) = (unsigned short) 0x02DC;
+ break;
+ case 0x99: // '™'
+ *((unsigned short *)(text.getRawData()+(text.size()-2))) = (unsigned short) 0x2122;
+ break;
+ case 0x9A: // 'š'
+ *((unsigned short *)(text.getRawData()+(text.size()-2))) = (unsigned short) 0x0161;
+ break;
+ case 0x9B: // '›'
+ *((unsigned short *)(text.getRawData()+(text.size()-2))) = (unsigned short) 0x203A;
+ break;
+ case 0x9C: // 'œ'
+ *((unsigned short *)(text.getRawData()+(text.size()-2))) = (unsigned short) 0x0153;
+ break;
+ case 0x9E: // 'ž'
+ *((unsigned short *)(text.getRawData()+(text.size()-2))) = (unsigned short) 0x017E;
+ break;
+ case 0x9F: // 'Ÿ'
+ *((unsigned short *)(text.getRawData()+(text.size()-2))) = (unsigned short) 0x0178;
+ break;
+ default:
+ *((unsigned short *)(text.getRawData()+(text.size()-2))) = (unsigned short) *from;
+ }
+ }
+ return 0;
+}
+
+SWORD_NAMESPACE_END
diff --git a/src/modules/filters/latin1utf8.cpp b/src/modules/filters/latin1utf8.cpp
new file mode 100644
index 0000000..6c0d7f1
--- /dev/null
+++ b/src/modules/filters/latin1utf8.cpp
@@ -0,0 +1,173 @@
+/******************************************************************************
+ *
+ * Latin1UTF8 - SWFilter descendant to convert a Latin-1 character to UTF-8
+ *
+ */
+
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <latin1utf8.h>
+#include <swmodule.h>
+
+SWORD_NAMESPACE_START
+
+Latin1UTF8::Latin1UTF8() {
+}
+
+
+char Latin1UTF8::processText(SWBuf &text, const SWKey *key, const SWModule *module)
+{
+ const unsigned char *from;
+
+ if ((unsigned long)key < 2) // hack, we're en(1)/de(0)ciphering
+ return (char)-1;
+
+ SWBuf orig = text;
+ from = (const unsigned char *)orig.c_str();
+
+ for (text = ""; *from; from++) {
+ if (*from < 0x80) {
+ text += *from;
+ }
+ else if (*from < 0xc0) {
+ switch(*from) {
+ case 0x80: // '€'
+ text += 0xe2; // 'â'
+ text += 0x82; // '‚'
+ text += 0xac; // '¬'
+ break;
+ case 0x82: // '‚'
+ text += 0xe2; // 'â'
+ text += 0x80; // '€'
+ text += 0x9a; // 'š'
+ break;
+ case 0x83: // 'ƒ'
+ text += 0xc6; // 'Æ'
+ text += 0x92; // '’'
+ break;
+ case 0x84: // '„'
+ text += 0xe2; // 'â'
+ text += 0x80; // '€'
+ text += 0x9e; // 'ž'
+ break;
+ case 0x85: // '…'
+ text += 0xe2; // 'â'
+ text += 0x80; // '€'
+ text += 0xa6; // '¦'
+ break;
+ case 0x86: // '†'
+ text += 0xe2; // 'â'
+ text += 0x80; // '€'
+ text += 0xa0; // ' '
+ break;
+ case 0x87: // '‡'
+ text += 0xe2; // 'â'
+ text += 0x80; // '€'
+ text += 0xa1; // '¡'
+ break;
+ case 0x88: // 'ˆ'
+ text += 0xcb; // 'Ë'
+ text += 0x86; // '†'
+ break;
+ case 0x89: // '‰'
+ text += 0xe2; // 'â'
+ text += 0x80; // '€'
+ text += 0xb0; // '°'
+ break;
+ case 0x8A: // 'Š'
+ text += 0xc5; // 'Å'
+ text += 0xa0; // ' '
+ break;
+ case 0x8B: // '‹'
+ text += 0xe2; // 'â'
+ text += 0x80; // '€'
+ text += 0xb9; // '¹'
+ break;
+ case 0x8C: // 'Œ'
+ text += 0xc5; // 'Å'
+ text += 0x92; // '’'
+ break;
+ case 0x8E: // 'Ž'
+ text += 0xc5; // 'Å'
+ text += 0xbd; // '½'
+ break;
+ case 0x91: // '‘'
+ text += 0xe2; // 'â'
+ text += 0x80; // '€'
+ text += 0x98; // '˜'
+ break;
+ case 0x92: // '’'
+ text += 0xe2; // 'â'
+ text += 0x80; // '€'
+ text += 0x99; // '™'
+ break;
+ case 0x93: // '“'
+ text += 0xe2; // 'â'
+ text += 0x80; // '€'
+ text += 0x9c; // 'œ'
+ break;
+ case 0x94: // '”'
+ text += 0xe2; // 'â'
+ text += 0x80; // '€'
+ text += 0x9d; // ''
+ break;
+ case 0x95: // '•'
+ text += 0xe2; // 'â'
+ text += 0x80; // '€'
+ text += 0xa2; // '¢'
+ break;
+ case 0x96: // '–'
+ text += 0xe2; // 'â'
+ text += 0x80; // '€'
+ text += 0x93; // '“'
+ break;
+ case 0x97: // '—'
+ text += 0xe2; // 'â'
+ text += 0x80; // '€'
+ text += 0x94; // '”'
+ break;
+ case 0x98: // '˜'
+ text += 0xcb; // 'Ë'
+ text += 0x9c; // 'œ'
+ break;
+ case 0x99: // '™'
+ text += 0xe2; // 'â'
+ text += 0x84; // '„'
+ text += 0xa2; // '¢'
+ break;
+ case 0x9A: // 'š'
+ text += 0xc5; // 'Å'
+ text += 0xa1; // '¡'
+ break;
+ case 0x9B: // '›'
+ text += 0xe2; // 'â'
+ text += 0x80; // '€'
+ text += 0xba; // 'º'
+ break;
+ case 0x9C: // 'œ'
+ text += 0xc5; // 'Å'
+ text += 0x93; // '“'
+ break;
+ case 0x9E: // 'ž'
+ text += 0xc5; // 'Å'
+ text += 0xbe; // '¾'
+ break;
+ case 0x9F: // 'Ÿ'
+ text += 0xc5; // 'Å'
+ text += 0xb8; // '¸'
+ break;
+ default:
+ text += 0xC2;
+ text += *from;
+ }
+ }
+ else {
+ text += 0xC3;
+ text += (*from - 0x40);
+ }
+ }
+ return 0;
+}
+
+SWORD_NAMESPACE_END
diff --git a/src/modules/filters/osisfootnotes.cpp b/src/modules/filters/osisfootnotes.cpp
new file mode 100644
index 0000000..89c9c40
--- /dev/null
+++ b/src/modules/filters/osisfootnotes.cpp
@@ -0,0 +1,157 @@
+/******************************************************************************
+ *
+ * osisfootnotes - SWFilter descendant to hide or show footnotes
+ * in an OSIS module.
+ */
+
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <osisfootnotes.h>
+#include <swmodule.h>
+#include <swbuf.h>
+#include <versekey.h>
+#include <utilxml.h>
+#include <utilstr.h>
+
+SWORD_NAMESPACE_START
+
+const char oName[] = "Footnotes";
+const char oTip[] = "Toggles Footnotes On and Off if they exist";
+
+const SWBuf choices[3] = {"Off", "On", ""};
+const StringList oValues(&choices[0], &choices[2]);
+
+
+OSISFootnotes::OSISFootnotes() : SWOptionFilter(oName, oTip, &oValues) {
+ setOptionValue("Off");
+}
+
+
+OSISFootnotes::~OSISFootnotes() {
+}
+
+
+char OSISFootnotes::processText(SWBuf &text, const SWKey *key, const SWModule *module) {
+ SWBuf token;
+ bool intoken = false;
+ bool hide = false;
+ SWBuf tagText;
+ XMLTag startTag;
+ SWBuf refs = "";
+ int footnoteNum = 1;
+ char buf[254];
+ VerseKey parser(key->getText());
+
+ SWBuf orig = text;
+ const char *from = orig.c_str();
+
+ XMLTag tag;
+ bool strongsMarkup = false;
+
+
+ for (text = ""; *from; ++from) {
+
+ // remove all newlines temporarily to fix kjv2003 module
+ if ((*from == 10) || (*from == 13)) {
+ if ((text.length()>1) && (text[text.length()-2] != ' ') && (*(from+1) != ' '))
+ text.append(' ');
+ continue;
+ }
+
+
+ if (*from == '<') {
+ intoken = true;
+ token = "";
+ continue;
+ }
+
+
+
+ if (*from == '>') { // process tokens
+ intoken = false;
+ if (!strncmp(token, "note", 4) || !strncmp(token.c_str(), "/note", 5)) {
+ tag = token;
+
+ if (!tag.isEndTag()) {
+ if (tag.getAttribute("type") && (!strcmp("x-strongsMarkup", tag.getAttribute("type"))
+ || !strcmp("strongsMarkup", tag.getAttribute("type"))) // deprecated
+ ) {
+ tag.setEmpty(false); // handle bug in KJV2003 module where some note open tags were <note ... />
+ strongsMarkup = true;
+ }
+
+ if (!tag.isEmpty()) {
+// if ((!tag.isEmpty()) || (SWBuf("strongsMarkup") == tag.getAttribute("type"))) {
+ refs = "";
+ startTag = tag;
+ hide = true;
+ tagText = "";
+ continue;
+ }
+ }
+ if (hide && tag.isEndTag()) {
+ if (module->isProcessEntryAttributes() && !strongsMarkup) { //don`t parse strongsMarkup to EntryAttributes as Footnote
+ sprintf(buf, "%i", footnoteNum++);
+ StringList attributes = startTag.getAttributeNames();
+ for (StringList::const_iterator it = attributes.begin(); it != attributes.end(); it++) {
+ module->getEntryAttributes()["Footnote"][buf][it->c_str()] = startTag.getAttribute(it->c_str());
+ }
+ module->getEntryAttributes()["Footnote"][buf]["body"] = tagText;
+ startTag.setAttribute("swordFootnote", buf);
+ if ((startTag.getAttribute("type")) && (!strcmp(startTag.getAttribute("type"), "crossReference"))) {
+ if (!refs.length())
+ refs = parser.ParseVerseList(tagText.c_str(), parser, true).getRangeText();
+ module->getEntryAttributes()["Footnote"][buf]["refList"] = refs.c_str();
+ }
+ }
+ hide = false;
+ if (option || (startTag.getAttribute("type") && !strcmp(startTag.getAttribute("type"), "crossReference"))) { // we want the tag in the text; crossReferences are handled by another filter
+ text.append(startTag);
+// text.append(tagText); // we don't put the body back in because it is retrievable from EntryAttributes["Footnotes"][]["body"].
+ }
+ else continue;
+ }
+ strongsMarkup = false;
+ }
+
+ // if not a heading token, keep token in text
+ //if ((!strcmp(tag.getName(), "reference")) && (!tag.isEndTag())) {
+ // SWBuf osisRef = tag.getAttribute("osisRef");
+ if (!strncmp(token, "reference", 9)) {
+ if (refs.length()) {
+ refs.append("; ");
+ }
+
+ const char* attr = strstr(token.c_str() + 9, "osisRef=\"");
+ const char* end = attr ? strchr(attr+9, '"') : 0;
+
+ if (attr && end) {
+ refs.append(attr+9, end-(attr+9));
+ }
+ }
+ if (!hide) {
+ text.append('<');
+ text.append(token);
+ text.append('>');
+ }
+ else {
+ tagText.append('<');
+ tagText.append(token);
+ tagText.append('>');
+ }
+ continue;
+ }
+ if (intoken) { //copy token
+ token.append(*from);
+ }
+ else if (!hide) { //copy text which is not inside a token
+ text.append(*from);
+ }
+ else tagText.append(*from);
+ }
+ return 0;
+}
+
+SWORD_NAMESPACE_END
+
diff --git a/src/modules/filters/osisheadings.cpp b/src/modules/filters/osisheadings.cpp
new file mode 100644
index 0000000..a072335
--- /dev/null
+++ b/src/modules/filters/osisheadings.cpp
@@ -0,0 +1,144 @@
+/******************************************************************************
+ *
+ *osisheadings - SWFilter descendant to hide or show headings
+ * in an OSIS module.
+ */
+
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <osisheadings.h>
+#include <swmodule.h>
+#include <utilxml.h>
+#include <utilstr.h>
+
+SWORD_NAMESPACE_START
+
+const char oName[] = "Headings";
+const char oTip[] = "Toggles Headings On and Off if they exist";
+
+const SWBuf choices[3] = {"Off", "On", ""};
+const StringList oValues(&choices[0], &choices[2]);
+
+OSISHeadings::OSISHeadings() : SWOptionFilter(oName, oTip, &oValues) {
+ setOptionValue("Off");
+}
+
+
+OSISHeadings::~OSISHeadings() {
+}
+
+
+char OSISHeadings::processText(SWBuf &text, const SWKey *key, const SWModule *module) {
+ SWBuf token;
+ bool intoken = false;
+ bool hide = false;
+ bool preverse = false;
+ bool withinTitle = false;
+ bool canonical = false;
+ SWBuf header;
+ int headerNum = 0;
+ int pvHeaderNum = 0;
+ char buf[254];
+ XMLTag startTag;
+
+ SWBuf orig = text;
+ const char *from = orig.c_str();
+
+ XMLTag tag;
+
+ for (text = ""; *from; ++from) {
+ if (*from == '<') {
+ intoken = true;
+ token = "";
+
+ continue;
+ }
+ if (*from == '>') { // process tokens
+ intoken = false;
+
+ if (!strncmp(token.c_str(), "title", 5) || !strncmp(token.c_str(), "/title", 6)) {
+ withinTitle = (!strnicmp(token.c_str(), "title", 5));
+ tag = token;
+
+ if (!tag.isEndTag()) { //start tag
+ if (!tag.isEmpty()) {
+ startTag = tag;
+ }
+ }
+
+ if ( (tag.getAttribute("subType") && !stricmp(tag.getAttribute("subType"), "x-preverse"))
+ || (tag.getAttribute("subtype") && !stricmp(tag.getAttribute("subtype"), "x-preverse")) // deprecated
+ ) {
+ hide = true;
+ preverse = true;
+ header = "";
+ canonical = (tag.getAttribute("canonical") && (!stricmp(tag.getAttribute("canonical"), "true")));
+ continue;
+ }
+ if (!tag.isEndTag()) { //start tag
+ hide = true;
+ header = "";
+ if (option || canonical) { // we want the tag in the text
+ text.append('<');
+ text.append(token);
+ text.append('>');
+ }
+ continue;
+ }
+ if (hide && tag.isEndTag()) {
+ if (module->isProcessEntryAttributes() && ((option || canonical) || (!preverse))) {
+ if (preverse) {
+ sprintf(buf, "%i", pvHeaderNum++);
+ module->getEntryAttributes()["Heading"]["Preverse"][buf] = header;
+ }
+ else {
+ sprintf(buf, "%i", headerNum++);
+ module->getEntryAttributes()["Heading"]["Interverse"][buf] = header;
+ if (option || canonical) { // we want the tag in the text
+ text.append(header);
+ }
+ }
+
+ StringList attributes = startTag.getAttributeNames();
+ for (StringList::const_iterator it = attributes.begin(); it != attributes.end(); it++) {
+ module->getEntryAttributes()["Heading"][buf][it->c_str()] = startTag.getAttribute(it->c_str());
+ }
+ }
+
+ hide = false;
+ if (!(option || canonical) || preverse) { // we don't want the tag in the text anymore
+ preverse = false;
+ continue;
+ }
+ preverse = false;
+ }
+ }
+
+ if (withinTitle) {
+ header.append('<');
+ header.append(token);
+ header.append('>');
+ } else {
+ // if not a heading token, keep token in text
+ if (!hide) {
+ text.append('<');
+ text.append(token);
+ text.append('>');
+ }
+ }
+ continue;
+ }
+ if (intoken) { //copy token
+ token.append(*from);
+ }
+ else if (!hide) { //copy text which is not inside a token
+ text.append(*from);
+ }
+ else header.append(*from);
+ }
+ return 0;
+}
+
+SWORD_NAMESPACE_END
+
diff --git a/src/modules/filters/osishtmlhref.cpp b/src/modules/filters/osishtmlhref.cpp
new file mode 100644
index 0000000..fe3e058
--- /dev/null
+++ b/src/modules/filters/osishtmlhref.cpp
@@ -0,0 +1,561 @@
+/***************************************************************************
+ osishtmlhref.cpp - OSIS to HTML with hrefs filter
+ -------------------
+ begin : 2003-06-24
+ copyright : 2003 by CrossWire Bible Society
+ ***************************************************************************/
+
+/***************************************************************************
+ * *
+ * This program is free software; you can redistribute it and/or modify *
+ * it under the terms of the GNU General Public License as published by *
+ * the Free Software Foundation version 2 of the License. *
+ * *
+ ***************************************************************************/
+
+#include <stdlib.h>
+#include <ctype.h>
+#include <osishtmlhref.h>
+#include <utilxml.h>
+#include <utilstr.h>
+#include <versekey.h>
+#include <swmodule.h>
+#include <url.h>
+#include <stringmgr.h>
+#include <stack>
+
+SWORD_NAMESPACE_START
+
+class OSISHTMLHREF::QuoteStack : public std::stack<char *> {
+};
+
+OSISHTMLHREF::MyUserData::MyUserData(const SWModule *module, const SWKey *key) : BasicFilterUserData(module, key) {
+ inBold = false;
+ inXRefNote = false;
+ suspendLevel = 0;
+ quoteStack = new QuoteStack();
+ wordsOfChristStart = "<font color=\"red\"> ";
+ wordsOfChristEnd = "</font> ";
+ if (module) {
+ osisQToTick = ((!module->getConfigEntry("OSISqToTick")) || (strcmp(module->getConfigEntry("OSISqToTick"), "false")));
+ version = module->Name();
+ BiblicalText = (!strcmp(module->Type(), "Biblical Texts"));
+ }
+ else {
+ osisQToTick = true; // default
+ version = "";
+ }
+}
+
+OSISHTMLHREF::MyUserData::~MyUserData() {
+ // Just in case the quotes are not well formed
+ while (!quoteStack->empty()) {
+ char *tagData = quoteStack->top();
+ quoteStack->pop();
+ delete [] tagData;
+ }
+ delete quoteStack;
+}
+
+OSISHTMLHREF::OSISHTMLHREF() {
+ setTokenStart("<");
+ setTokenEnd(">");
+
+ setEscapeStart("&");
+ setEscapeEnd(";");
+
+ setEscapeStringCaseSensitive(true);
+ setPassThruNumericEscapeString(true);
+
+ addAllowedEscapeString("quot");
+ addAllowedEscapeString("apos");
+ addAllowedEscapeString("amp");
+ addAllowedEscapeString("lt");
+ addAllowedEscapeString("gt");
+
+ setTokenCaseSensitive(true);
+
+ addTokenSubstitute("lg", "<br />");
+ addTokenSubstitute("/lg", "<br />");
+
+ morphFirst = false;
+}
+
+// though this might be slightly slower, possibly causing an extra bool check, this is a renderFilter
+// so speed isn't the absolute highest priority, and this is a very minor possible hit
+static inline void outText(const char *t, SWBuf &o, BasicFilterUserData *u) { if (!u->suspendTextPassThru) o += t; else u->lastSuspendSegment += t; }
+static inline void outText(char t, SWBuf &o, BasicFilterUserData *u) { if (!u->suspendTextPassThru) o += t; else u->lastSuspendSegment += t; }
+
+void processLemma(bool suspendTextPassThru, XMLTag &tag, SWBuf &buf) {
+ const char *attrib;
+ const char *val;
+ if ((attrib = tag.getAttribute("lemma"))) {
+ int count = tag.getAttributePartCount("lemma", ' ');
+ int i = (count > 1) ? 0 : -1; // -1 for whole value cuz it's faster, but does the same thing as 0
+ do {
+ attrib = tag.getAttribute("lemma", i, ' ');
+ if (i < 0) i = 0; // to handle our -1 condition
+ val = strchr(attrib, ':');
+ val = (val) ? (val + 1) : attrib;
+ SWBuf gh;
+ if(*val == 'G')
+ gh = "Greek";
+ if(*val == 'H')
+ gh = "Hebrew";
+ const char *val2 = val;
+ if ((strchr("GH", *val)) && (isdigit(val[1])))
+ val2++;
+ //if ((!strcmp(val2, "3588")) && (lastText.length() < 1))
+ // show = false;
+ //else {
+ if (!suspendTextPassThru) {
+ buf.appendFormatted("<small><em>&lt;<a href=\"passagestudy.jsp?action=showStrongs&type=%s&value=%s\">%s</a>&gt;</em></small>",
+ (gh.length()) ? gh.c_str() : "",
+ URL::encode(val2).c_str(),
+ val2);
+ }
+ //}
+
+ } while (++i < count);
+ }
+}
+
+void processMorph(bool suspendTextPassThru, XMLTag &tag, SWBuf &buf) {
+ const char * attrib;
+ const char *val;
+ if ((attrib = tag.getAttribute("morph"))) { // && (show)) {
+ SWBuf savelemma = tag.getAttribute("savlm");
+ //if ((strstr(savelemma.c_str(), "3588")) && (lastText.length() < 1))
+ // show = false;
+ //if (show) {
+ int count = tag.getAttributePartCount("morph", ' ');
+ int i = (count > 1) ? 0 : -1; // -1 for whole value cuz it's faster, but does the same thing as 0
+ do {
+ attrib = tag.getAttribute("morph", i, ' ');
+ if (i < 0) i = 0; // to handle our -1 condition
+ val = strchr(attrib, ':');
+ val = (val) ? (val + 1) : attrib;
+ const char *val2 = val;
+ if ((*val == 'T') && (strchr("GH", val[1])) && (isdigit(val[2])))
+ val2+=2;
+ if (!suspendTextPassThru) {
+ buf.appendFormatted("<small><em>(<a href=\"passagestudy.jsp?action=showMorph&type=%s&value=%s\">%s</a>)</em></small>",
+ URL::encode(tag.getAttribute("morph")).c_str(),
+ URL::encode(val).c_str(),
+ val2);
+ }
+ } while (++i < count);
+ //}
+ }
+}
+
+bool OSISHTMLHREF::handleToken(SWBuf &buf, const char *token, BasicFilterUserData *userData) {
+ MyUserData *u = (MyUserData *)userData;
+ SWBuf scratch;
+ bool sub = (u->suspendTextPassThru) ? substituteToken(scratch, token) : substituteToken(buf, token);
+ if (!sub) {
+ // manually process if it wasn't a simple substitution
+ XMLTag tag(token);
+
+ // <w> tag
+ if (!strcmp(tag.getName(), "w")) {
+
+ // start <w> tag
+ if ((!tag.isEmpty()) && (!tag.isEndTag())) {
+ u->w = token;
+ }
+
+ // end or empty <w> tag
+ else {
+ bool endTag = tag.isEndTag();
+ SWBuf lastText;
+ //bool show = true; // to handle unplaced article in kjv2003-- temporary till combined
+
+ if (endTag) {
+ tag = u->w.c_str();
+ lastText = u->lastTextNode.c_str();
+ }
+ else lastText = "stuff";
+
+ const char *attrib;
+ const char *val;
+ if ((attrib = tag.getAttribute("xlit"))) {
+ val = strchr(attrib, ':');
+ val = (val) ? (val + 1) : attrib;
+ outText(" ", buf, u);
+ outText(val, buf, u);
+ }
+ if ((attrib = tag.getAttribute("gloss"))) {
+ val = strchr(attrib, ':');
+ val = (val) ? (val + 1) : attrib;
+ outText(" ", buf, u);
+ outText(val, buf, u);
+ }
+ if (!morphFirst) {
+ processLemma(u->suspendTextPassThru, tag, buf);
+ processMorph(u->suspendTextPassThru, tag, buf);
+ }
+ else {
+ processMorph(u->suspendTextPassThru, tag, buf);
+ processLemma(u->suspendTextPassThru, tag, buf);
+ }
+ if ((attrib = tag.getAttribute("POS"))) {
+ val = strchr(attrib, ':');
+ val = (val) ? (val + 1) : attrib;
+ outText(" ", buf, u);
+ outText(val, buf, u);
+ }
+
+ /*if (endTag)
+ buf += "}";*/
+ }
+ }
+
+ // <note> tag
+ else if (!strcmp(tag.getName(), "note")) {
+ if (!tag.isEndTag()) {
+ SWBuf type = tag.getAttribute("type");
+ bool strongsMarkup = (type == "x-strongsMarkup" || type == "strongsMarkup"); // the latter is deprecated
+ if (strongsMarkup) {
+ tag.setEmpty(false); // handle bug in KJV2003 module where some note open tags were <note ... />
+ }
+
+ if (!tag.isEmpty()) {
+
+ if (!strongsMarkup) { // leave strong's markup notes out, in the future we'll probably have different option filters to turn different note types on or off
+ SWBuf footnoteNumber = tag.getAttribute("swordFootnote");
+ VerseKey *vkey = NULL;
+ char ch = ((tag.getAttribute("type") && ((!strcmp(tag.getAttribute("type"), "crossReference")) || (!strcmp(tag.getAttribute("type"), "x-cross-ref")))) ? 'x':'n');
+
+ u->inXRefNote = true; // Why this change? Ben Morgan: Any note can have references in, so we need to set this to true for all notes
+// u->inXRefNote = (ch == 'x');
+
+ // see if we have a VerseKey * or descendant
+ SWTRY {
+ vkey = SWDYNAMIC_CAST(VerseKey, u->key);
+ }
+ SWCATCH ( ... ) { }
+ if (vkey) {
+ //printf("URL = %s\n",URL::encode(vkey->getText()).c_str());
+ buf.appendFormatted("<a href=\"passagestudy.jsp?action=showNote&type=%c&value=%s&module=%s&passage=%s\"><small><sup>*%c</sup></small></a>",
+ ch,
+ URL::encode(footnoteNumber.c_str()).c_str(),
+ URL::encode(u->version.c_str()).c_str(),
+ URL::encode(vkey->getText()).c_str(),
+ ch);
+ }
+ else {
+ buf.appendFormatted("<a href=\"passagestudy.jsp?action=showNote&type=%c&value=%s&module=%s&passage=%s\"><small><sup>*%c</sup></small></a>",
+ ch,
+ URL::encode(footnoteNumber.c_str()).c_str(),
+ URL::encode(u->version.c_str()).c_str(),
+ URL::encode(u->key->getText()).c_str(),
+ ch);
+ }
+ }
+ }
+ u->suspendTextPassThru = (++u->suspendLevel);
+ }
+ if (tag.isEndTag()) {
+ u->suspendTextPassThru = (--u->suspendLevel);
+ u->inXRefNote = false;
+ u->lastSuspendSegment = ""; // fix/work-around for nasb devineName in note bug
+ }
+ }
+
+ // <p> paragraph tag
+ else if (!strcmp(tag.getName(), "p")) {
+ if ((!tag.isEndTag()) && (!tag.isEmpty())) { // non-empty start tag
+ outText("<!P><br />", buf, u);
+ }
+ else if (tag.isEndTag()) { // end tag
+ outText("<!/P><br />", buf, u);
+ userData->supressAdjacentWhitespace = true;
+ }
+ else { // empty paragraph break marker
+ outText("<!P><br />", buf, u);
+ userData->supressAdjacentWhitespace = true;
+ }
+ }
+
+ // <reference> tag
+ else if (!strcmp(tag.getName(), "reference")) {
+ if (!u->inXRefNote) { // only show these if we're not in an xref note
+ if ((!tag.isEndTag()) && (!tag.isEmpty())) {
+ u->suspendTextPassThru = (++u->suspendLevel);
+ }
+ if (tag.isEndTag()) {
+ if (!u->BiblicalText) {
+ SWBuf refList = tag.getAttribute("passage");
+ if (!refList.length())
+ refList = u->lastTextNode;
+ SWBuf version = tag.getAttribute("version");
+
+ buf.appendFormatted("&nbsp;<a href=\"passagestudy.jsp?action=showRef&type=scripRef&value=%s&module=%s\">",
+ (refList.length()) ? URL::encode(refList.c_str()).c_str() : "",
+ (version.length()) ? URL::encode(version.c_str()).c_str() : "");
+ buf += u->lastTextNode.c_str();
+ buf += "</a>&nbsp;";
+ }
+ else {
+ SWBuf footnoteNumber = tag.getAttribute("swordFootnote");
+ VerseKey *vkey = NULL;
+ // see if we have a VerseKey * or descendant
+ SWTRY {
+ vkey = SWDYNAMIC_CAST(VerseKey, u->key);
+ }
+ SWCATCH ( ... ) {}
+ if (vkey) {
+ // leave this special osis type in for crossReference notes types? Might thml use this some day? Doesn't hurt.
+ //buf.appendFormatted("<a href=\"noteID=%s.x.%s\"><small><sup>*x</sup></small></a> ", vkey->getText(), footnoteNumber.c_str());
+ buf.appendFormatted("<a href=\"passagestudy.jsp?action=showNote&type=x&value=%s&module=%s&passage=%s\"><small><sup>*x</sup></small></a>",
+ URL::encode(footnoteNumber.c_str()).c_str(),
+ URL::encode(u->version.c_str()).c_str(),
+ URL::encode(vkey->getText()).c_str());
+
+ }
+ }
+ u->suspendTextPassThru = (--u->suspendLevel);
+ }
+ }/*
+ if (tag.isEndTag()) {
+ u->suspendTextPassThru = false;
+ }*/
+ }
+
+ // <l> poetry, etc
+ else if (!strcmp(tag.getName(), "l")) {
+ // end line marker
+ if (tag.getAttribute("eID")) {
+ outText("<br />", buf, u);
+ }
+ // <l/> without eID or sID
+ // Note: this is improper osis. This should be <lb/>
+ else if (tag.isEmpty() && !tag.getAttribute("sID")) {
+ outText("<br />", buf, u);
+ }
+ // end of the line
+ else if (tag.isEndTag()) {
+ outText("<br />", buf, u);
+ }
+ }
+
+ // <lb.../>
+ else if (!strcmp(tag.getName(), "lb")) {
+ outText("<br />", buf, u);
+ userData->supressAdjacentWhitespace = true;
+ }
+ // <milestone type="line"/>
+ // <milestone type="x-p"/>
+ // <milestone type="cQuote" marker="x"/>
+ else if ((!strcmp(tag.getName(), "milestone")) && (tag.getAttribute("type"))) {
+ if(!strcmp(tag.getAttribute("type"), "line")) {
+ outText("<br />", buf, u);
+ userData->supressAdjacentWhitespace = true;
+ }
+ else if(!strcmp(tag.getAttribute("type"),"x-p")) {
+ if( tag.getAttribute("marker"))
+ outText(tag.getAttribute("marker"), buf, u);
+ else outText("<!p>", buf, u);
+ }
+ else if (!strcmp(tag.getAttribute("type"), "cQuote")) {
+ const char *tmp = tag.getAttribute("marker");
+ bool hasMark = tmp;
+ SWBuf mark = tmp;
+ tmp = tag.getAttribute("level");
+ int level = (tmp) ? atoi(tmp) : 1;
+
+ // first check to see if we've been given an explicit mark
+ if (hasMark)
+ outText(mark, buf, u);
+ // finally, alternate " and ', if config says we should supply a mark
+ else if (u->osisQToTick)
+ outText((level % 2) ? '\"' : '\'', buf, u);
+ }
+ }
+
+ // <title>
+ else if (!strcmp(tag.getName(), "title")) {
+ if ((!tag.isEndTag()) && (!tag.isEmpty())) {
+ outText("<b>", buf, u);
+ }
+ else if (tag.isEndTag()) {
+ outText("</b><br />", buf, u);
+ }
+ }
+
+ // <catchWord> & <rdg> tags (italicize)
+ else if (!strcmp(tag.getName(), "rdg") || !strcmp(tag.getName(), "catchWord")) {
+ if ((!tag.isEndTag()) && (!tag.isEmpty())) {
+ outText("<i>", buf, u);
+ }
+ else if (tag.isEndTag()) {
+ outText("</i>", buf, u);
+ }
+ }
+
+ // divineName
+ else if (!strcmp(tag.getName(), "divineName")) {
+ if ((!tag.isEndTag()) && (!tag.isEmpty())) {
+ u->suspendTextPassThru = (++u->suspendLevel);
+ }
+ else if (tag.isEndTag()) {
+ SWBuf lastText = u->lastSuspendSegment.c_str();
+ u->suspendTextPassThru = (--u->suspendLevel);
+ if (lastText.size()) {
+ toupperstr(lastText);
+ scratch.setFormatted("%c<font size=\"-1\">%s</font>", lastText[0], lastText.c_str()+1);
+ outText(scratch.c_str(), buf, u);
+ }
+ }
+ }
+
+ // <hi> text highlighting
+ else if (!strcmp(tag.getName(), "hi")) {
+ SWBuf type = tag.getAttribute("type");
+ if ((!tag.isEndTag()) && (!tag.isEmpty())) {
+ if (type == "b" || type == "x-b") {
+ outText("<b>", buf, u);
+ u->inBold = true;
+ }
+ else { // all other types
+ outText("<i>", buf, u);
+ u->inBold = false;
+ }
+ }
+ else if (tag.isEndTag()) {
+ if(u->inBold) {
+ outText("</b>", buf, u);
+ u->inBold = false;
+ }
+ else outText("</i>", buf, u);
+ }
+ }
+
+ // <q> quote
+ // Rules for a quote element:
+ // If the tag is empty with an sID or an eID then use whatever it specifies for quoting.
+ // Note: empty elements without sID or eID are ignored.
+ // If the tag is <q> then use it's specifications and push it onto a stack for </q>
+ // If the tag is </q> then use the pushed <q> for specification
+ // If there is a marker attribute, possibly empty, this overrides osisQToTick.
+ // If osisQToTick, then output the marker, using level to determine the type of mark.
+ else if (!strcmp(tag.getName(), "q")) {
+ SWBuf type = tag.getAttribute("type");
+ SWBuf who = tag.getAttribute("who");
+ const char *tmp = tag.getAttribute("level");
+ int level = (tmp) ? atoi(tmp) : 1;
+ tmp = tag.getAttribute("marker");
+ bool hasMark = tmp;
+ SWBuf mark = tmp;
+
+ // open <q> or <q sID... />
+ if ((!tag.isEmpty() && !tag.isEndTag()) || (tag.isEmpty() && tag.getAttribute("sID"))) {
+ // if <q> then remember it for the </q>
+ if (!tag.isEmpty()) {
+ char *tagData = 0;
+ stdstr(&tagData, tag.toString());
+ u->quoteStack->push(tagData);
+ }
+
+ // Do this first so quote marks are included as WoC
+ if (who == "Jesus")
+ outText(u->wordsOfChristStart, buf, u);
+
+ // first check to see if we've been given an explicit mark
+ if (hasMark)
+ outText(mark, buf, u);
+ //alternate " and '
+ else if (u->osisQToTick)
+ outText((level % 2) ? '\"' : '\'', buf, u);
+ }
+ // close </q> or <q eID... />
+ else if ((tag.isEndTag()) || (tag.isEmpty() && tag.getAttribute("eID"))) {
+ // if it is </q> then pop the stack for the attributes
+ if (tag.isEndTag() && !u->quoteStack->empty()) {
+ char *tagData = u->quoteStack->top();
+ u->quoteStack->pop();
+ XMLTag qTag(tagData);
+ delete [] tagData;
+
+ type = qTag.getAttribute("type");
+ who = qTag.getAttribute("who");
+ tmp = qTag.getAttribute("level");
+ level = (tmp) ? atoi(tmp) : 1;
+ tmp = qTag.getAttribute("marker");
+ hasMark = tmp;
+ mark = tmp;
+ }
+
+ // first check to see if we've been given an explicit mark
+ if (hasMark)
+ outText(mark, buf, u);
+ // finally, alternate " and ', if config says we should supply a mark
+ else if (u->osisQToTick)
+ outText((level % 2) ? '\"' : '\'', buf, u);
+
+ // Do this last so quote marks are included as WoC
+ if (who == "Jesus")
+ outText(u->wordsOfChristEnd, buf, u);
+ }
+ }
+
+ // <transChange>
+ else if (!strcmp(tag.getName(), "transChange")) {
+ if ((!tag.isEndTag()) && (!tag.isEmpty())) {
+ SWBuf type = tag.getAttribute("type");
+ u->lastTransChange = type;
+
+ // just do all transChange tags this way for now
+ if ((type == "added") || (type == "supplied"))
+ outText("<i>", buf, u);
+ else if (type == "tenseChange")
+ buf += "*";
+ }
+ else if (tag.isEndTag()) {
+ SWBuf type = u->lastTransChange;
+ if ((type == "added") || (type == "supplied"))
+ outText("</i>", buf, u);
+ }
+ else { // empty transChange marker?
+ }
+ }
+
+ // image
+ else if (!strcmp(tag.getName(), "figure")) {
+ const char *src = tag.getAttribute("src");
+ if (!src) // assert we have a src attribute
+ return false;
+
+ SWBuf filepath;
+ if (userData->module) {
+ filepath = userData->module->getConfigEntry("AbsoluteDataPath");
+ if ((filepath.size()) && (filepath[filepath.size()-1] != '/') && (src[0] != '/'))
+ filepath += '/';
+ }
+ filepath += src;
+
+ // images become clickable, if the UI supports showImage.
+ outText("<a href=\"passagestudy.jsp?action=showImage&value=", buf, u);
+ outText(URL::encode(filepath.c_str()).c_str(), buf, u);
+ outText("&module=", buf, u);
+ outText(URL::encode(u->version.c_str()).c_str(), buf, u);
+ outText("\">", buf, u);
+
+// we do this because BibleCS looks for this EXACT format for an image tag
+ outText("<image border=0 src=\"", buf, u);
+ outText(filepath, buf, u);
+ outText("\" />", buf, u);
+
+ outText("</a>", buf, u);
+ }
+
+ else {
+ return false; // we still didn't handle token
+ }
+ }
+ return true;
+}
+
+
+SWORD_NAMESPACE_END
diff --git a/src/modules/filters/osislemma.cpp b/src/modules/filters/osislemma.cpp
new file mode 100644
index 0000000..f5e6ff6
--- /dev/null
+++ b/src/modules/filters/osislemma.cpp
@@ -0,0 +1,85 @@
+/******************************************************************************
+ *
+ * osislemma - SWFilter descendant to hide or show lemmata
+ * in a OSIS module.
+ */
+
+
+#include <stdlib.h>
+#include <osislemma.h>
+#include <utilxml.h>
+
+SWORD_NAMESPACE_START
+
+const char oName[] = "Lemmas";
+const char oTip[] = "Toggles Lemmas On and Off if they exist";
+
+const SWBuf choices[3] = {"Off", "On", ""};
+const StringList oValues(&choices[0], &choices[2]);
+
+OSISLemma::OSISLemma() : SWOptionFilter(oName, oTip, &oValues) {
+ setOptionValue("Off");
+}
+
+
+OSISLemma::~OSISLemma() {
+}
+
+
+char OSISLemma::processText(SWBuf &text, const SWKey *key, const SWModule *module) {
+ SWBuf token;
+ bool intoken = false;
+ bool lastspace = false;
+
+ const SWBuf orig = text;
+ const char * from = orig.c_str();
+
+ if (!option) {
+ for (text = ""; *from; ++from) {
+ if (*from == '<') {
+ intoken = true;
+ token = "";
+ continue;
+ }
+ if (*from == '>') { // process tokens
+ intoken = false;
+ if (token.startsWith("w ")) { // Word
+ XMLTag wtag(token);
+ int count = wtag.getAttributePartCount("lemma", ' ');
+ for (int i = 0; i < count; i++) {
+ SWBuf a = wtag.getAttribute("lemma", i, ' ');
+ const char *prefix = a.stripPrefix(':');
+ if ((!prefix) || ((SWBuf)prefix).startsWith("lemma.")) {
+ // remove attribute part
+ wtag.setAttribute("lemma", 0, i, ' ');
+ i--;
+ count--;
+ }
+ }
+ token = wtag;
+ token.trim();
+ // drop <>
+ token << 1;
+ token--;
+ }
+
+ // keep token in text
+ text.append('<');
+ text.append(token);
+ text.append('>');
+
+ continue;
+ }
+ if (intoken) {
+ token += *from;
+ }
+ else {
+ text.append(*from);
+ lastspace = (*from == ' ');
+ }
+ }
+ }
+ return 0;
+}
+
+SWORD_NAMESPACE_END
diff --git a/src/modules/filters/osismorph.cpp b/src/modules/filters/osismorph.cpp
new file mode 100644
index 0000000..69d44d5
--- /dev/null
+++ b/src/modules/filters/osismorph.cpp
@@ -0,0 +1,85 @@
+/******************************************************************************
+ *
+ * osismorph - SWFilter descendant to hide or show morph tags
+ * in a OSIS module.
+ */
+
+
+#include <stdlib.h>
+#include <osismorph.h>
+
+SWORD_NAMESPACE_START
+
+const char oName[] = "Morphological Tags";
+const char oTip[] = "Toggles Morphological Tags On and Off if they exist";
+
+const SWBuf choices[3] = {"Off", "On", ""};
+const StringList oValues(&choices[0], &choices[2]);
+
+OSISMorph::OSISMorph() : SWOptionFilter(oName, oTip, &oValues) {
+ setOptionValue("Off");
+}
+
+
+OSISMorph::~OSISMorph() {
+}
+
+
+char OSISMorph::processText(SWBuf &text, const SWKey *key, const SWModule *module) {
+ if (!option) { // if we don't want morph tags
+ char token[2048]; // cheese. Fix.
+ int tokpos = 0;
+ bool intoken = false;
+ bool lastspace = false;
+ SWBuf orig = text;
+ const char *from = orig.c_str();
+
+ //taken out of the loop for speed
+ const char* start = 0;
+ const char* end = 0;
+
+ for (text = ""; *from; ++from) {
+ if (*from == '<') {
+ intoken = true;
+ tokpos = 0;
+ token[0] = 0;
+ continue;
+ }
+ if (*from == '>') { // process tokens
+ intoken = false;
+
+ if ((*token == 'w') && (token[1] == ' ')) {
+ start = strstr(token+2, "morph=\""); //we leave out "w " at the start
+ end = start ? strchr(start+7, '"') : 0; //search the end of the morph value
+
+ if (start && end) { //start and end of the morph tag found
+ text.append('<');
+ text.append(token, start-token); //the text before the morph attr
+ text.append(end+1); //text after the morph attr
+ text.append('>');
+
+ continue; //next loop
+ }
+ }
+
+ text.append('<');
+ text.append(token);
+ text.append('>');
+
+ continue;
+ }
+ if (intoken) {
+ if (tokpos < 2045)
+ token[tokpos++] = *from;
+ token[tokpos] = 0;
+ }
+ else {
+ text.append(*from);
+ lastspace = (*from == ' ');
+ }
+ }
+ }
+ return 0;
+}
+
+SWORD_NAMESPACE_END
diff --git a/src/modules/filters/osismorphsegmentation.cpp b/src/modules/filters/osismorphsegmentation.cpp
new file mode 100644
index 0000000..bf32581
--- /dev/null
+++ b/src/modules/filters/osismorphsegmentation.cpp
@@ -0,0 +1,106 @@
+/******************************************************************************
+ *
+ * osismorphsegmentation - SWFilter descendant to toggle splitting of morphemes
+ * (for morpheme segmented Hebrew in the WLC)
+ */
+
+
+#include <osismorphsegmentation.h>
+#include <stdlib.h>
+#include <utilxml.h>
+#include <swmodule.h>
+#include <swbuf.h>
+
+SWORD_NAMESPACE_START
+
+const char oName[] = "Morpheme Segmentation";
+const char oTip[] = "Toggles Morpheme Segmentation On and Off, when present";
+
+const SWBuf choices[3] = {"Off", "On", ""};
+const StringList oValues(&choices[0], &choices[2]);
+
+OSISMorphSegmentation::OSISMorphSegmentation() : SWOptionFilter(oName, oTip, &oValues) {
+ setOptionValue("Off");
+}
+
+
+OSISMorphSegmentation::~OSISMorphSegmentation() {}
+
+
+char OSISMorphSegmentation::processText(SWBuf &text, const SWKey * /*key*/, const SWModule *module) {
+ SWBuf token;
+ bool intoken = false;
+ bool hide = false;
+
+ SWBuf orig( text );
+ const char *from = orig.c_str();
+
+ XMLTag tag;
+ SWBuf tagText = "";
+ unsigned int morphemeNum = 0;
+ bool inMorpheme = false;
+ SWBuf buf;
+
+ for (text = ""; *from; ++from) {
+ if (*from == '<') {
+ intoken = true;
+ token = "";
+ continue;
+ }
+
+ if (*from == '>') { // process tokens
+ intoken = false;
+
+ if (!strncmp(token.c_str(), "seg ", 4) || !strncmp(token.c_str(), "/seg", 4)) {
+ tag = token;
+
+ if (!tag.isEndTag() && tag.getAttribute("type") && !strcmp("morph", tag.getAttribute("type"))) { //<seg type="morph"> start tag
+ hide = !option; //only hide if option is Off
+ tagText = "";
+ inMorpheme = true;
+ }
+
+ if (tag.isEndTag()) {
+ buf.setFormatted("%.3d", morphemeNum++);
+ module->getEntryAttributes()["Morpheme"][buf]["body"] = tagText;
+ inMorpheme = false;
+ }
+ if (hide) { //hides start and end tags as long as hide is set
+
+ if (tag.isEndTag()) { //</seg>
+ hide = false;
+ }
+
+ continue; //leave out the current token
+ }
+ } //end of seg tag handling
+
+ text.append('<');
+ text.append(token);
+ text.append('>');
+
+ if (inMorpheme) {
+ tagText.append('<');
+ tagText.append(token);
+ tagText.append('>');
+ }
+
+ hide = false;
+
+ continue;
+ } //end of intoken part
+
+ if (intoken) { //copy token
+ token.append(*from);
+ }
+ else { //copy text which is not inside of a tag
+ text.append(*from);
+ if (inMorpheme) {
+ tagText.append(*from);
+ }
+ }
+ }
+ return 0;
+}
+
+SWORD_NAMESPACE_END
diff --git a/src/modules/filters/osisosis.cpp b/src/modules/filters/osisosis.cpp
new file mode 100644
index 0000000..7da6089
--- /dev/null
+++ b/src/modules/filters/osisosis.cpp
@@ -0,0 +1,173 @@
+/***************************************************************************
+ osisosis.cpp - internal OSIS to public OSIS filter
+ -------------------
+ begin : 2004-03-13
+ copyright : 2003 by CrossWire Bible Society
+ ***************************************************************************/
+
+/***************************************************************************
+ * *
+ * This program is free software; you can redistribute it and/or modify *
+ * it under the terms of the GNU General Public License as published by *
+ * the Free Software Foundation version 2 of the License.
+ * *
+ ***************************************************************************/
+
+#include <stdlib.h>
+#include <osisosis.h>
+#include <utilxml.h>
+#include <versekey.h>
+#include <swmodule.h>
+
+SWORD_NAMESPACE_START
+
+
+OSISOSIS::MyUserData::MyUserData(const SWModule *module, const SWKey *key) : BasicFilterUserData(module, key) {
+ osisQToTick = ((!module->getConfigEntry("OSISqToTick")) || (strcmp(module->getConfigEntry("OSISqToTick"), "false")));
+}
+
+
+OSISOSIS::OSISOSIS() {
+ setTokenStart("<");
+ setTokenEnd(">");
+
+ setEscapeStart("&");
+ setEscapeEnd(";");
+
+ setEscapeStringCaseSensitive(true);
+ setPassThruNumericEscapeString(true);
+
+ addAllowedEscapeString("quot");
+ addAllowedEscapeString("apos");
+ addAllowedEscapeString("amp");
+ addAllowedEscapeString("lt");
+ addAllowedEscapeString("gt");
+
+ setTokenCaseSensitive(true);
+}
+
+
+char OSISOSIS::processText(SWBuf &text, const SWKey *key, const SWModule *module) {
+ char status = SWBasicFilter::processText(text, key, module);
+ VerseKey *vkey = SWDYNAMIC_CAST(VerseKey, key);
+ if (vkey) {
+ SWBuf ref = "";
+ if (vkey->Verse()) {
+ ref.appendFormatted("\t\t<verse osisID=\"%s\">", vkey->getOSISRef());
+ }
+
+ if (ref.length() > 0) {
+
+ text = ref + text;
+
+ if (vkey->Verse()) {
+ VerseKey tmp;
+ tmp = *vkey;
+ tmp.AutoNormalize(0);
+ tmp.Headings(1);
+
+ text += "</verse>";
+
+ tmp = MAXVERSE;
+ if (*vkey == tmp) {
+ tmp.Verse(0);
+// sprintf(ref, "\t</div>");
+// pushString(&to, ref);
+ tmp = MAXCHAPTER;
+ tmp = MAXVERSE;
+ if (*vkey == tmp) {
+ tmp.Chapter(0);
+ tmp.Verse(0);
+// sprintf(ref, "\t</div>");
+// pushString(&to, ref);
+ }
+ }
+ }
+
+//
+// else if (vkey->Chapter()) {
+// sprintf(ref, "\t<div type=\"chapter\" osisID=\"%s\">", vkey->getOSISRef());
+// }
+// else sprintf(ref, "\t<div type=\"book\" osisID=\"%s\">", vkey->getOSISRef());
+//
+ }
+ }
+ return status;
+}
+
+bool OSISOSIS::handleToken(SWBuf &buf, const char *token, BasicFilterUserData *userData) {
+ // manually process if it wasn't a simple substitution
+ if (!substituteToken(buf, token)) {
+ MyUserData *u = (MyUserData *)userData;
+ XMLTag tag(token);
+
+ if (!tag.isEmpty() && (!tag.isEndTag()))
+ u->startTag = tag;
+
+ // <w> tag
+ if (!strcmp(tag.getName(), "w")) {
+
+ // start <w> tag
+ if ((!tag.isEmpty()) && (!tag.isEndTag())) {
+ SWBuf attr = tag.getAttribute("lemma");
+ if (attr.length()) {
+ if (!strncmp(attr.c_str(), "x-Strongs:", 10)) {
+ memcpy(attr.getRawData()+3, "strong", 6);
+ attr << 3;
+ tag.setAttribute("lemma", attr);
+ }
+ }
+ attr = tag.getAttribute("morph");
+ if (attr.length()) {
+ if (!strncmp(attr.c_str(), "x-StrongsMorph:", 15)) {
+ memcpy(attr.getRawData()+3, "strong", 6);
+ attr << 3;
+ tag.setAttribute("lemma", attr);
+ }
+ if (!strncmp(attr.c_str(), "x-Robinson:", 11)) {
+ attr[2] = 'r';
+ attr << 2;
+ tag.setAttribute("lemma", attr);
+ }
+ }
+ tag.setAttribute("wn", 0);
+ tag.setAttribute("savlm", 0);
+ tag.setAttribute("splitID", 0);
+ }
+ buf += tag;
+ }
+
+ // <note> tag
+ else if (!strcmp(tag.getName(), "note")) {
+ if (!tag.isEndTag()) {
+ SWBuf type = tag.getAttribute("type");
+ bool strongsMarkup = (type == "x-strongsMarkup" || type == "strongsMarkup"); // the latter is deprecated
+ if (strongsMarkup) {
+ tag.setEmpty(false); // handle bug in KJV2003 module where some note open tags were <note ... />
+ }
+
+ if (!tag.isEmpty()) {
+ tag.setAttribute("swordFootnote", 0);
+
+ if (!strongsMarkup) {
+ buf += tag;
+ }
+ else u->suspendTextPassThru = true;
+ }
+ }
+ if (tag.isEndTag()) {
+ if (u->suspendTextPassThru == false)
+ buf+=tag;
+ else u->suspendTextPassThru = false;
+ }
+ }
+
+ else {
+ return false; // we still didn't handle token
+ }
+ }
+ return true;
+}
+
+
+SWORD_NAMESPACE_END
diff --git a/src/modules/filters/osisplain.cpp b/src/modules/filters/osisplain.cpp
new file mode 100644
index 0000000..62841a6
--- /dev/null
+++ b/src/modules/filters/osisplain.cpp
@@ -0,0 +1,192 @@
+/***************************************************************************
+ osisplain.cpp - OSIS to Plaintext filter
+ -------------------
+ begin : 2003-02-15
+ copyright : 2003 by CrossWire Bible Society
+ ***************************************************************************/
+
+/***************************************************************************
+ * *
+ * This program is free software; you can redistribute it and/or modify *
+ * it under the terms of the GNU General Public License as published by *
+ * the Free Software Foundation; either version 2 of the License, or *
+ * (at your option) any later version. *
+ * *
+ ***************************************************************************/
+
+#include <stdlib.h>
+#include <osisplain.h>
+#include <ctype.h>
+#include <versekey.h>
+#include <stringmgr.h>
+
+SWORD_NAMESPACE_START
+
+OSISPlain::OSISPlain() {
+ setTokenStart("<");
+ setTokenEnd(">");
+
+ setEscapeStart("&");
+ setEscapeEnd(";");
+
+ setEscapeStringCaseSensitive(true);
+
+ addEscapeStringSubstitute("amp", "&");
+ addEscapeStringSubstitute("apos", "'");
+ addEscapeStringSubstitute("lt", "<");
+ addEscapeStringSubstitute("gt", ">");
+ addEscapeStringSubstitute("quot", "\"");
+
+ setTokenCaseSensitive(true);
+ addTokenSubstitute("title", "\n");
+ addTokenSubstitute("/title", "\n");
+ addTokenSubstitute("/l", "\n");
+ addTokenSubstitute("lg", "\n");
+ addTokenSubstitute("/lg", "\n");
+}
+
+
+bool OSISPlain::handleToken(SWBuf &buf, const char *token, BasicFilterUserData *userData) {
+ // manually process if it wasn't a simple substitution
+ if (!substituteToken(buf, token)) {
+ MyUserData *u = (MyUserData *)userData;
+ VerseKey *vk = SWDYNAMIC_CAST(VerseKey, u->key);
+ char testament = (vk) ? vk ->Testament() : 2; // default to NT
+ if (((*token == 'w') && (token[1] == ' ')) ||
+ ((*token == '/') && (token[1] == 'w') && (!token[2]))) {
+ u->tag = token;
+
+ bool start = false;
+ if (*token == 'w') {
+ if (token[strlen(token)-1] != '/') {
+ u->w = token;
+ return true;
+ }
+ start = true;
+ }
+ u->tag = (start) ? token : u->w.c_str();
+ bool show = true; // to handle unplaced article in kjv2003-- temporary till combined
+
+ SWBuf lastText = (start) ? "stuff" : u->lastTextNode.c_str();
+
+ const char *attrib;
+ const char *val;
+ if ((attrib = u->tag.getAttribute("xlit"))) {
+ val = strchr(attrib, ':');
+ val = (val) ? (val + 1) : attrib;
+ buf.append(" <");
+ buf.append(val);
+ buf.append('>');
+ }
+ if ((attrib = u->tag.getAttribute("gloss"))) {
+ val = strchr(attrib, ':');
+ val = (val) ? (val + 1) : attrib;
+ buf.append(" <");
+ buf.append(val);
+ buf.append('>');
+ }
+ if ((attrib = u->tag.getAttribute("lemma"))) {
+ int count = u->tag.getAttributePartCount("lemma", ' ');
+ int i = (count > 1) ? 0 : -1; // -1 for whole value cuz it's faster, but does the same thing as 0
+ do {
+ char gh;
+ attrib = u->tag.getAttribute("lemma", i, ' ');
+ if (i < 0) i = 0; // to handle our -1 condition
+ val = strchr(attrib, ':');
+ val = (val) ? (val + 1) : attrib;
+ if ((strchr("GH", *val)) && (isdigit(val[1]))) {
+ gh = *val;
+ val++;
+ }
+ else {
+ gh = (testament>1) ? 'G' : 'H';
+ }
+ if ((!strcmp(val, "3588")) && (lastText.length() < 1))
+ show = false;
+ else {
+ buf.append(" <");
+ buf.append(gh);
+ buf.append(val);
+ buf.append(">");
+ }
+ } while (++i < count);
+ }
+ if ((attrib = u->tag.getAttribute("morph")) && (show)) {
+ int count = u->tag.getAttributePartCount("morph", ' ');
+ int i = (count > 1) ? 0 : -1; // -1 for whole value cuz it's faster, but does the same thing as 0
+ do {
+ attrib = u->tag.getAttribute("morph", i, ' ');
+ if (i < 0) i = 0; // to handle our -1 condition
+ val = strchr(attrib, ':');
+ val = (val) ? (val + 1) : attrib;
+ if ((*val == 'T') && (strchr("GH", val[1])) && (isdigit(val[2])))
+ val+=2;
+ buf.append(" (");
+ buf.append(val);
+ buf.append(')');
+ } while (++i < count);
+ }
+ if ((attrib = u->tag.getAttribute("POS"))) {
+ val = strchr(attrib, ':');
+ val = (val) ? (val + 1) : attrib;
+
+ buf.append(" <");
+ buf.append(val);
+ buf.append('>');
+ }
+ }
+
+ // <note> tag
+ else if (!strncmp(token, "note", 4)) {
+ if (!strstr(token, "strongsMarkup")) { // leave strong's markup notes out, in the future we'll probably have different option filters to turn different note types on or off
+ buf.append(" (");
+ }
+ else u->suspendTextPassThru = true;
+ }
+ else if (!strncmp(token, "/note", 5)) {
+ if (!u->suspendTextPassThru)
+ buf.append(')');
+ else u->suspendTextPassThru = false;
+ }
+
+ // <p> paragraph tag
+ else if (((*token == 'p') && ((token[1] == ' ') || (!token[1]))) ||
+ ((*token == '/') && (token[1] == 'p') && (!token[2]))) {
+ userData->supressAdjacentWhitespace = true;
+ buf.append('\n');
+ }
+
+ // <lb .../>
+ else if (!strncmp(token, "lb", 2)) {
+ userData->supressAdjacentWhitespace = true;
+ buf.append('\n');
+ }
+ else if (!strncmp(token, "l", 1) && strstr(token, "eID")) {
+ userData->supressAdjacentWhitespace = true;
+ buf.append('\n');
+ }
+ else if (!strncmp(token, "/divineName", 11)) {
+ // Get the end portion of the string, and upper case it
+ char* end = buf.getRawData();
+ end += buf.size() - u->lastTextNode.size();
+ toupperstr(end);
+ }
+
+ // <milestone type="line"/>
+ else if (!strncmp(token, "milestone", 9)) {
+ const char* type = strstr(token+10, "type=\"");
+ if (type && strncmp(type+6, "line", 4)) { //we check for type != line
+ userData->supressAdjacentWhitespace = true;
+ buf.append('\n');
+ }
+ }
+
+ else {
+ return false; // we still didn't handle token
+ }
+ }
+ return true;
+}
+
+
+SWORD_NAMESPACE_END
diff --git a/src/modules/filters/osisredletterwords.cpp b/src/modules/filters/osisredletterwords.cpp
new file mode 100644
index 0000000..727332d
--- /dev/null
+++ b/src/modules/filters/osisredletterwords.cpp
@@ -0,0 +1,85 @@
+/******************************************************************************
+ *
+ * OSISRedLetterWords - SWFilter descendant to toggle red coloring for words
+ * of Christ in an OSIS module.
+ */
+
+
+#include <stdlib.h>
+#include <osisredletterwords.h>
+#include <swmodule.h>
+
+
+SWORD_NAMESPACE_START
+
+const char oName[] = "Words of Christ in Red";
+const char oTip[] = "Toggles Red Coloring for Words of Christ On and Off if they are marked";
+
+const SWBuf choices[3] = {"Off", "On", ""};
+const StringList oValues(&choices[0], &choices[2]);
+
+OSISRedLetterWords::OSISRedLetterWords() : SWOptionFilter(oName, oTip, &oValues) {
+ setOptionValue("On");
+}
+
+
+OSISRedLetterWords::~OSISRedLetterWords() {
+}
+
+
+char OSISRedLetterWords::processText(SWBuf &text, const SWKey *key, const SWModule *module) {
+ if (option) //leave in the red lettered words
+ return 0;
+
+ SWBuf token;
+ bool intoken = false;
+
+ SWBuf orig = text;
+ const char *from = orig.c_str();
+
+ //taken out of the loop
+ const char* start = 0;
+ const char* end = 0;
+
+ for (text = ""; *from; from++) {
+ if (*from == '<') {
+ intoken = true;
+ token = "";
+ continue;
+ }
+ else if (*from == '>') { // process tokens
+ intoken = false;
+
+ if ((token[0] == 'q') && (token[1] == ' ')) { //q tag
+ start = strstr(token.c_str(), " who=\"Jesus\"");
+ if (start && (strlen(start) >= 12)) { //we found a quote of Jesus Christ
+ end = start+12; //marks the end of the who attribute value
+
+ text.append('<');
+ text.append(token, start - (token.c_str())); //the text before the who attr
+ text.append(end, token.c_str() + token.length() - end); //text after the who attr
+ text.append('>');
+
+ continue;
+ }
+ }
+
+ //token not processed, append it. We don't want to alter the text
+ text.append('<');
+ text.append(token);
+ text.append('>');
+ continue;
+ }
+
+ if (intoken) { //copy token
+ token.append(*from);
+ }
+ else { //copy text which is not inside a token
+ text.append(*from);
+ }
+ }
+ return 0;
+}
+
+SWORD_NAMESPACE_END
+
diff --git a/src/modules/filters/osisrtf.cpp b/src/modules/filters/osisrtf.cpp
new file mode 100644
index 0000000..0352335
--- /dev/null
+++ b/src/modules/filters/osisrtf.cpp
@@ -0,0 +1,520 @@
+/***************************************************************************
+ osisrtf.cpp - OSIS to RTF filter
+ -------------------
+ begin : 2003-02-15
+ copyright : 2003 by CrossWire Bible Society
+ ***************************************************************************/
+
+/***************************************************************************
+ * *
+ * This program is free software; you can redistribute it and/or modify *
+ * it under the terms of the GNU General Public License as published by *
+ * the Free Software Foundation version 2 of the License. *
+ * *
+ ***************************************************************************/
+
+#include <stdlib.h>
+#include <ctype.h>
+#include <osisrtf.h>
+#include <utilxml.h>
+#include <utilstr.h>
+#include <versekey.h>
+#include <swmodule.h>
+#include <stringmgr.h>
+#include <stack>
+
+SWORD_NAMESPACE_START
+
+namespace {
+ class MyUserData : public BasicFilterUserData {
+ public:
+ bool osisQToTick;
+ bool BiblicalText;
+ bool inXRefNote;
+ int suspendLevel;
+ std::stack<char *> quoteStack;
+ SWBuf w;
+ SWBuf version;
+ MyUserData(const SWModule *module, const SWKey *key);
+ ~MyUserData();
+ };
+
+
+ MyUserData::MyUserData(const SWModule *module, const SWKey *key) : BasicFilterUserData(module, key) {
+ inXRefNote = false;
+ BiblicalText = false;
+ suspendLevel = 0;
+ if (module) {
+ version = module->Name();
+ BiblicalText = (!strcmp(module->Type(), "Biblical Texts"));
+ }
+ osisQToTick = ((!module->getConfigEntry("OSISqToTick")) || (strcmp(module->getConfigEntry("OSISqToTick"), "false")));
+ }
+
+
+ MyUserData::~MyUserData() {
+ // Just in case the quotes are not well formed
+ while (!quoteStack.empty()) {
+ char *tagData = quoteStack.top();
+ quoteStack.pop();
+ delete [] tagData;
+ }
+ }
+static inline void outText(const char *t, SWBuf &o, BasicFilterUserData *u) { if (!u->suspendTextPassThru) o += t; else u->lastSuspendSegment += t; }
+static inline void outText(char t, SWBuf &o, BasicFilterUserData *u) { if (!u->suspendTextPassThru) o += t; else u->lastSuspendSegment += t; }
+};
+
+
+OSISRTF::OSISRTF() {
+ setTokenStart("<");
+ setTokenEnd(">");
+
+ setEscapeStart("&");
+ setEscapeEnd(";");
+
+ setEscapeStringCaseSensitive(true);
+
+ addEscapeStringSubstitute("amp", "&");
+ addEscapeStringSubstitute("apos", "'");
+ addEscapeStringSubstitute("lt", "<");
+ addEscapeStringSubstitute("gt", ">");
+ addEscapeStringSubstitute("quot", "\"");
+ addTokenSubstitute("lg", "{\\par}");
+ addTokenSubstitute("/lg", "{\\par}");
+
+ setTokenCaseSensitive(true);
+}
+
+
+BasicFilterUserData *OSISRTF::createUserData(const SWModule *module, const SWKey *key) {
+ return new MyUserData(module, key);
+}
+
+
+char OSISRTF::processText(SWBuf &text, const SWKey *key, const SWModule *module) {
+
+ // preprocess text buffer to escape RTF control codes
+ const char *from;
+ SWBuf orig = text;
+ from = orig.c_str();
+ for (text = ""; *from; from++) { //loop to remove extra spaces
+ switch (*from) {
+ case '{':
+ case '}':
+ case '\\':
+ text += "\\";
+ text += *from;
+ break;
+ default:
+ text += *from;
+ }
+ }
+ text += (char)0;
+
+ SWBasicFilter::processText(text, key, module); //handle tokens as usual
+
+ orig = text;
+ from = orig.c_str();
+ for (text = ""; *from; from++) { //loop to remove extra spaces
+ if ((strchr(" \t\n\r", *from))) {
+ while (*(from+1) && (strchr(" \t\n\r", *(from+1)))) {
+ from++;
+ }
+ text += " ";
+ }
+ else {
+ text += *from;
+ }
+ }
+ text += (char)0; // probably not needed, but don't want to remove without investigating (same as above)
+ return 0;
+}
+
+
+bool OSISRTF::handleToken(SWBuf &buf, const char *token, BasicFilterUserData *userData) {
+ // manually process if it wasn't a simple substitution
+ MyUserData *u = (MyUserData *)userData;
+ SWBuf scratch;
+ bool sub = (u->suspendTextPassThru) ? substituteToken(scratch, token) : substituteToken(buf, token);
+ if (!sub) {
+ XMLTag tag(token);
+
+ // <w> tag
+ if (!strcmp(tag.getName(), "w")) {
+
+ // start <w> tag
+ if ((!tag.isEmpty()) && (!tag.isEndTag())) {
+ outText('{', buf, u);
+ u->w = token;
+ }
+
+ // end or empty <w> tag
+ else {
+ bool endTag = tag.isEndTag();
+ SWBuf lastText;
+ bool show = true; // to handle unplaced article in kjv2003-- temporary till combined
+
+ if (endTag) {
+ tag = u->w.c_str();
+ lastText = u->lastTextNode.c_str();
+ }
+ else lastText = "stuff";
+
+ const char *attrib;
+ const char *val;
+ if ((attrib = tag.getAttribute("xlit"))) {
+ val = strchr(attrib, ':');
+ val = (val) ? (val + 1) : attrib;
+ scratch.setFormatted(" {\\fs15 <%s>}", val);
+ outText(scratch.c_str(), buf, u);
+ }
+ if ((attrib = tag.getAttribute("gloss"))) {
+ val = strchr(attrib, ':');
+ val = (val) ? (val + 1) : attrib;
+ scratch.setFormatted(" {\\fs15 <%s>}", val);
+ outText(scratch.c_str(), buf, u);
+ }
+ if ((attrib = tag.getAttribute("lemma"))) {
+ int count = tag.getAttributePartCount("lemma", ' ');
+ int i = (count > 1) ? 0 : -1; // -1 for whole value cuz it's faster, but does the same thing as 0
+ do {
+ attrib = tag.getAttribute("lemma", i, ' ');
+ if (i < 0) i = 0; // to handle our -1 condition
+ val = strchr(attrib, ':');
+ val = (val) ? (val + 1) : attrib;
+ const char *val2 = val;
+ if ((strchr("GH", *val)) && (isdigit(val[1])))
+ val2++;
+ if ((!strcmp(val2, "3588")) && (lastText.length() < 1))
+ show = false;
+ else {
+ scratch.setFormatted(" {\\cf3 \\sub <%s>}", val2);
+ outText(scratch.c_str(), buf, u);
+ }
+ } while (++i < count);
+ }
+ if ((attrib = tag.getAttribute("morph")) && (show)) {
+ SWBuf savelemma = tag.getAttribute("savlm");
+ if ((strstr(savelemma.c_str(), "3588")) && (lastText.length() < 1))
+ show = false;
+ if (show) {
+ int count = tag.getAttributePartCount("morph", ' ');
+ int i = (count > 1) ? 0 : -1; // -1 for whole value cuz it's faster, but does the same thing as 0
+ do {
+ attrib = tag.getAttribute("morph", i, ' ');
+ if (i < 0) i = 0; // to handle our -1 condition
+ val = strchr(attrib, ':');
+ val = (val) ? (val + 1) : attrib;
+ const char *val2 = val;
+ if ((*val == 'T') && (strchr("GH", val[1])) && (isdigit(val[2])))
+ val2+=2;
+ scratch.setFormatted(" {\\cf4 \\sub (%s)}", val2);
+ outText(scratch.c_str(), buf, u);
+ } while (++i < count);
+ }
+ }
+ if ((attrib = tag.getAttribute("POS"))) {
+ val = strchr(attrib, ':');
+ val = (val) ? (val + 1) : attrib;
+ scratch.setFormatted(" {\\fs15 <%s>}", val);
+ outText(scratch.c_str(), buf, u);
+ }
+
+ if (endTag)
+ outText('}', buf, u);
+ }
+ }
+
+ // <note> tag
+ else if (!strcmp(tag.getName(), "note")) {
+ if (!tag.isEndTag()) {
+ if (!tag.isEmpty()) {
+ SWBuf type = tag.getAttribute("type");
+
+ if ( (type != "x-strongsMarkup") // leave strong's markup notes out, in the future we'll probably have different option filters to turn different note types on or off
+ && (type != "strongsMarkup") // deprecated
+ ) {
+ SWBuf footnoteNumber = tag.getAttribute("swordFootnote");
+ VerseKey *vkey = NULL;
+ // see if we have a VerseKey * or descendant
+ SWTRY {
+ vkey = SWDYNAMIC_CAST(VerseKey, u->key);
+ }
+ SWCATCH ( ... ) { }
+ if (vkey) {
+ char ch = ((!strcmp(type.c_str(), "crossReference")) || (!strcmp(type.c_str(), "x-cross-ref"))) ? 'x':'n';
+ scratch.setFormatted("{\\super <a href=\"\">*%c%i.%s</a>} ", ch, vkey->Verse(), footnoteNumber.c_str());
+ outText(scratch.c_str(), buf, u);
+ u->inXRefNote = (ch == 'x');
+ }
+ }
+ u->suspendTextPassThru = (++u->suspendLevel);
+ }
+ }
+ if (tag.isEndTag()) {
+ u->suspendTextPassThru = (--u->suspendLevel);
+ u->inXRefNote = false;
+ }
+ }
+
+ // <p> paragraph tag
+ else if (!strcmp(tag.getName(), "p")) {
+ if ((!tag.isEndTag()) && (!tag.isEmpty())) { // non-empty start tag
+ outText("{\\fi200\\par}", buf, u);
+ }
+ else if (tag.isEndTag()) { // end tag
+ outText("{\\par}", buf, u);
+ userData->supressAdjacentWhitespace = true;
+ }
+ else { // empty paragraph break marker
+ outText("{\\pard\\par\\par}", buf, u);
+ userData->supressAdjacentWhitespace = true;
+ }
+ }
+
+ // <reference> tag
+ else if (!strcmp(tag.getName(), "reference")) {
+ if (!u->inXRefNote) { // only show these if we're not in an xref note
+ if ((!tag.isEndTag()) && (!tag.isEmpty())) {
+ outText("{<a href=\"\">", buf, u);
+ }
+ else if (tag.isEndTag()) {
+ outText("</a>}", buf, u);
+ }
+ }
+ }
+
+ // <l> poetry
+ else if (!strcmp(tag.getName(), "l")) {
+ // end line marker
+ if (tag.getAttribute("eID")) {
+ outText("{\\par}", buf, u);
+ }
+ // <l/> without eID or sID
+ // Note: this is improper osis. This should be <lb/>
+ else if (tag.isEmpty() && !tag.getAttribute("sID")) {
+ outText("{\\par}", buf, u);
+ }
+ // end of the line
+ else if (tag.isEndTag()) {
+ outText("{\\par}", buf, u);
+ }
+ }
+
+ // <milestone type="line"/> or <lb.../>
+ else if ((!strcmp(tag.getName(), "lb")) || ((!strcmp(tag.getName(), "milestone")) && (tag.getAttribute("type")) && (!strcmp(tag.getAttribute("type"), "line")))) {
+ outText("{\\par}", buf, u);
+ userData->supressAdjacentWhitespace = true;
+ }
+
+ // <title>
+ else if (!strcmp(tag.getName(), "title")) {
+ if ((!tag.isEndTag()) && (!tag.isEmpty())) {
+ outText("{\\par\\i1\\b1 ", buf, u);
+ }
+ else if (tag.isEndTag()) {
+ outText("\\par}", buf, u);
+ }
+ }
+
+ // <catchWord> & <rdg> tags (italicize)
+ else if (!strcmp(tag.getName(), "rdg") || !strcmp(tag.getName(), "catchWord")) {
+ if ((!tag.isEndTag()) && (!tag.isEmpty())) {
+ outText("{\\i1 ", buf, u);
+ }
+ else if (tag.isEndTag()) {
+ outText('}', buf, u);
+ }
+ }
+
+ // <hi>
+ else if (!strcmp(tag.getName(), "hi")) {
+ SWBuf type = tag.getAttribute("type");
+ if ((!tag.isEndTag()) && (!tag.isEmpty())) {
+ if (type == "b" || type == "x-b")
+ outText("{\\b1 ", buf, u);
+ else // all other types
+ outText("{\\i1 ", buf, u);
+ }
+ else if (tag.isEndTag()) {
+ outText('}', buf, u);
+ }
+ }
+
+ // <q> quote
+ // Rules for a quote element:
+ // If the tag is empty with an sID or an eID then use whatever it specifies for quoting.
+ // Note: empty elements without sID or eID are ignored.
+ // If the tag is <q> then use it's specifications and push it onto a stack for </q>
+ // If the tag is </q> then use the pushed <q> for specification
+ // If there is a marker attribute, possibly empty, this overrides osisQToTick.
+ // If osisQToTick, then output the marker, using level to determine the type of mark.
+ else if (!strcmp(tag.getName(), "q")) {
+ SWBuf type = tag.getAttribute("type");
+ SWBuf who = tag.getAttribute("who");
+ const char *tmp = tag.getAttribute("level");
+ int level = (tmp) ? atoi(tmp) : 1;
+ tmp = tag.getAttribute("marker");
+ bool hasMark = tmp;
+ SWBuf mark = tmp;
+
+ // open <q> or <q sID... />
+ if ((!tag.isEmpty() && !tag.isEndTag()) || (tag.isEmpty() && tag.getAttribute("sID"))) {
+ // if <q> then remember it for the </q>
+ if (!tag.isEmpty()) {
+ char *tagData = 0;
+ stdstr(&tagData, tag.toString());
+ u->quoteStack.push(tagData);
+ }
+
+ // Do this first so quote marks are included as WoC
+ if (who == "Jesus")
+ outText("\\cf6 ", buf, u);
+
+ // first check to see if we've been given an explicit mark
+ if (hasMark)
+ outText(mark, buf, u);
+ //alternate " and '
+ else if (u->osisQToTick)
+ outText((level % 2) ? '\"' : '\'', buf, u);
+ }
+ // close </q> or <q eID... />
+ else if ((tag.isEndTag()) || (tag.getAttribute("eID"))) {
+ // if it is </q> then pop the stack for the attributes
+ if (tag.isEndTag() && !u->quoteStack.empty()) {
+ char *tagData = u->quoteStack.top();
+ u->quoteStack.pop();
+ XMLTag qTag(tagData);
+ delete [] tagData;
+
+ type = qTag.getAttribute("type");
+ who = qTag.getAttribute("who");
+ tmp = qTag.getAttribute("level");
+ level = (tmp) ? atoi(tmp) : 1;
+ tmp = qTag.getAttribute("marker");
+ hasMark = tmp;
+ mark = tmp;
+ }
+
+ // first check to see if we've been given an explicit mark
+ if (hasMark)
+ outText(mark, buf, u);
+ // finally, alternate " and ', if config says we should supply a mark
+ else if (u->osisQToTick)
+ outText((level % 2) ? '\"' : '\'', buf, u);
+
+ // Do this last so quote marks are included as WoC
+ if (who == "Jesus")
+ outText("\\cf0 ", buf, u);
+ }
+ }
+
+
+ // <milestone type="cQuote" marker="x"/>
+ else if (!strcmp(tag.getName(), "milestone") && tag.getAttribute("type") && !strcmp(tag.getAttribute("type"), "cQuote")) {
+ const char *tmp = tag.getAttribute("marker");
+ bool hasMark = tmp;
+ SWBuf mark = tmp;
+ tmp = tag.getAttribute("level");
+ int level = (tmp) ? atoi(tmp) : 1;
+
+ // first check to see if we've been given an explicit mark
+ if (hasMark)
+ outText(mark, buf, u);
+ // finally, alternate " and ', if config says we should supply a mark
+ else if (u->osisQToTick)
+ outText((level % 2) ? '\"' : '\'', buf, u);
+ }
+
+ // <transChange>
+ else if (!strcmp(tag.getName(), "transChange")) {
+ SWBuf type = tag.getAttribute("type");
+
+ if ((!tag.isEndTag()) && (!tag.isEmpty())) {
+
+// just do all transChange tags this way for now
+// if (type == "supplied")
+ outText("{\\i1 ", buf, u);
+ }
+ else if (tag.isEndTag()) {
+ outText('}', buf, u);
+ }
+ }
+
+ // <divineName>
+ else if (!strcmp(tag.getName(), "divineName")) {
+
+ if ((!tag.isEndTag()) && (!tag.isEmpty())) {
+ u->suspendTextPassThru = (++u->suspendLevel);
+ }
+ else if (tag.isEndTag()) {
+ SWBuf lastText = u->lastSuspendSegment.c_str();
+ u->suspendTextPassThru = (--u->suspendLevel);
+ if (lastText.size()) {
+ toupperstr(lastText);
+ scratch.setFormatted("{\\fs19%c\\fs16%s}", lastText[0], lastText.c_str()+1);
+ outText(scratch.c_str(), buf, u);
+ }
+ }
+ }
+
+ // <div>
+ else if (!strcmp(tag.getName(), "div")) {
+
+ if ((!tag.isEndTag()) && (!tag.isEmpty())) {
+ outText("\\par\\par\\pard ", buf, u);
+ }
+ else if (tag.isEndTag()) {
+ }
+ }
+
+ // image
+ else if (!strcmp(tag.getName(), "figure")) {
+ const char *src = tag.getAttribute("src");
+ if (!src) // assert we have a src attribute
+ return false;
+
+ char* filepath = new char[strlen(u->module->getConfigEntry("AbsoluteDataPath")) + strlen(token)];
+ *filepath = 0;
+ strcpy(filepath, userData->module->getConfigEntry("AbsoluteDataPath"));
+ strcat(filepath, src);
+
+// we do this because BibleCS looks for this EXACT format for an image tag
+ outText("<img src=\"", buf, u);
+ outText(filepath, buf, u);
+ outText("\" />", buf, u);
+/*
+ char imgc;
+ for (c = filepath + strlen(filepath); c > filepath && *c != '.'; c--);
+ c++;
+ FILE* imgfile;
+ if (stricmp(c, "jpg") || stricmp(c, "jpeg")) {
+ imgfile = fopen(filepath, "r");
+ if (imgfile != NULL) {
+ outText("{\\nonshppict {\\pict\\jpegblip ", buf, u);
+ while (feof(imgfile) != EOF) {
+ scratch.setFormatted("%2x", fgetc(imgfile));
+ outText(scratch.c_str(), buf, u);
+
+ }
+ fclose(imgfile);
+ outText("}}", buf, u);
+ }
+ }
+ else if (stricmp(c, "png")) {
+ outText("{\\*\\shppict {\\pict\\pngblip ", buf, u);
+
+ outText("}}", buf, u);
+ }
+*/
+ delete [] filepath;
+ }
+ else {
+ return false; // we still didn't handle token
+ }
+ }
+ return true;
+}
+
+
+SWORD_NAMESPACE_END
diff --git a/src/modules/filters/osisscripref.cpp b/src/modules/filters/osisscripref.cpp
new file mode 100644
index 0000000..437f5f5
--- /dev/null
+++ b/src/modules/filters/osisscripref.cpp
@@ -0,0 +1,100 @@
+/******************************************************************************
+ *
+ * OSISScripref - SWFilter descendant to hide or show scripture references
+ * in an OSIS module.
+ */
+
+
+#include <stdlib.h>
+#include <osisscripref.h>
+#include <swmodule.h>
+#include <utilxml.h>
+
+SWORD_NAMESPACE_START
+
+const char oName[] = "Cross-references";
+const char oTip[] = "Toggles Scripture Cross-references On and Off if they exist";
+
+const SWBuf choices[3] = {"Off", "On", ""};
+const StringList oValues(&choices[0], &choices[2]);
+
+OSISScripref::OSISScripref() : SWOptionFilter(oName, oTip, &oValues) {
+ setOptionValue("On");
+}
+
+
+OSISScripref::~OSISScripref() {
+}
+
+
+char OSISScripref::processText(SWBuf &text, const SWKey *key, const SWModule *module) {
+ SWBuf token;
+ bool intoken = false;
+ bool hide = false;
+ SWBuf tagText;
+ XMLTag startTag;
+
+ SWBuf orig = text;
+ const char *from = orig.c_str();
+
+ XMLTag tag;
+
+ for (text = ""; *from; ++from) {
+ if (*from == '<') {
+ intoken = true;
+ token = "";
+ continue;
+ }
+ if (*from == '>') { // process tokens
+ intoken = false;
+
+ tag = token;
+
+ if (!strncmp(token.c_str(), "note", 4) || !strncmp(token.c_str(), "/note", 5)) {
+ if (!tag.isEndTag() && !tag.isEmpty()) {
+ startTag = tag;
+ if ((tag.getAttribute("type")) && (!strcmp(tag.getAttribute("type"), "crossReference"))) {
+ hide = true;
+ tagText = "";
+ if (option) { // we want the tag in the text
+ text.append('<');
+ text.append(token);
+ text.append('>');
+ }
+ continue;
+ }
+ }
+ if (hide && tag.isEndTag()) {
+ hide = false;
+ if (option) { // we want the tag in the text
+ text.append(tagText); // end tag gets added further down
+ }
+ else continue; // don't let the end tag get added to the text
+ }
+ }
+
+ // if not a heading token, keep token in text
+ if (!hide) {
+ text.append('<');
+ text.append(token);
+ text.append('>');
+ }
+ else {
+ tagText.append('<');
+ tagText.append(token);
+ tagText.append('>');
+ }
+ continue;
+ }
+ if (intoken) { //copy token
+ token.append(*from);
+ }
+ else if (!hide) { //copy text which is not inside a token
+ text.append(*from);
+ }
+ else tagText.append(*from);
+ }
+ return 0;
+}
+
+SWORD_NAMESPACE_END
diff --git a/src/modules/filters/osisstrongs.cpp b/src/modules/filters/osisstrongs.cpp
new file mode 100644
index 0000000..922f7fd
--- /dev/null
+++ b/src/modules/filters/osisstrongs.cpp
@@ -0,0 +1,257 @@
+/******************************************************************************
+ *
+ * osisstrongs - SWFilter descendant to hide or show strongs number
+ * in a OSIS module.
+ */
+
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <ctype.h>
+#include <osisstrongs.h>
+#include <swmodule.h>
+#include <versekey.h>
+#include <utilxml.h>
+
+SWORD_NAMESPACE_START
+
+const char oName[] = "Strong's Numbers";
+const char oTip[] = "Toggles Strong's Numbers On and Off if they exist";
+
+const SWBuf choices[3] = {"Off", "On", ""};
+const StringList oValues(&choices[0], &choices[2]);
+
+
+OSISStrongs::OSISStrongs() : SWOptionFilter(oName, oTip, &oValues) {
+ setOptionValue("Off");
+}
+
+
+OSISStrongs::~OSISStrongs() {
+}
+
+
+char OSISStrongs::processText(SWBuf &text, const SWKey *key, const SWModule *module) {
+ SWBuf token;
+ bool intoken = false;
+ bool lastspace = false;
+ int wordNum = 1;
+ char wordstr[5];
+ const char *wordStart = 0;
+
+ const SWBuf orig = text;
+ const char * from = orig.c_str();
+
+ for (text = ""; *from; ++from) {
+ if (*from == '<') {
+ intoken = true;
+ token = "";
+ continue;
+ }
+ if (*from == '>') { // process tokens
+ intoken = false;
+ if (token.startsWith("w ")) { // Word
+ XMLTag wtag(token);
+ if (module->isProcessEntryAttributes()) {
+ wordStart = from+1;
+ char gh = 0;
+ VerseKey *vkey = 0;
+ if (key) {
+ vkey = SWDYNAMIC_CAST(VerseKey, key);
+ }
+ SWBuf lemma = "";
+ SWBuf morph = "";
+ SWBuf src = "";
+ SWBuf morphClass = "";
+ SWBuf lemmaClass = "";
+
+ const char *attrib;
+ sprintf(wordstr, "%03d", wordNum);
+
+ // why is morph entry attribute processing done in here? Well, it's faster. It makes more local sense to place this code in osismorph.
+ // easier to keep lemma and morph in same wordstr number too maybe.
+ if ((attrib = wtag.getAttribute("morph"))) {
+ int count = wtag.getAttributePartCount("morph", ' ');
+ int i = (count > 1) ? 0 : -1; // -1 for whole value cuz it's faster, but does the same thing as 0
+ do {
+ SWBuf mClass = "";
+ SWBuf mp = "";
+ attrib = wtag.getAttribute("morph", i, ' ');
+ if (i < 0) i = 0; // to handle our -1 condition
+
+ const char *m = strchr(attrib, ':');
+ if (m) {
+ int len = m-attrib;
+ mClass.append(attrib, len);
+ attrib += (len+1);
+ }
+ if ((mClass == "x-Robinsons") || (mClass == "x-Robinson") || (mClass == "Robinson")) {
+ mClass = "robinson";
+ }
+ if (i) { morphClass += " "; morph += " "; }
+ mp += attrib;
+ morphClass += mClass;
+ morph += mp;
+ if (count > 1) {
+ SWBuf tmp;
+ tmp.setFormatted("Morph.%d", i+1);
+ module->getEntryAttributes()["Word"][wordstr][tmp] = mp;
+ tmp.setFormatted("MorphClass.%d", i+1);
+ module->getEntryAttributes()["Word"][wordstr][tmp] = mClass;
+ }
+ } while (++i < count);
+ }
+
+ if ((attrib = wtag.getAttribute("lemma"))) {
+ int count = wtag.getAttributePartCount("lemma", ' ');
+ int i = (count > 1) ? 0 : -1; // -1 for whole value cuz it's faster, but does the same thing as 0
+ do {
+ gh = 0;
+ SWBuf lClass = "";
+ SWBuf l = "";
+ attrib = wtag.getAttribute("lemma", i, ' ');
+ if (i < 0) i = 0; // to handle our -1 condition
+
+ const char *m = strchr(attrib, ':');
+ if (m) {
+ int len = m-attrib;
+ lClass.append(attrib, len);
+ attrib += (len+1);
+ }
+ if ((lClass == "x-Strongs") || (lClass == "strong") || (lClass == "Strong")) {
+ if (isdigit(attrib[0])) {
+ if (vkey) {
+ gh = vkey->Testament() ? 'H' : 'G';
+ }
+ }
+ else {
+ gh = *attrib;
+ attrib++;
+ }
+ lClass = "strong";
+ }
+ if (gh) l += gh;
+ l += attrib;
+ if (i) { lemmaClass += " "; lemma += " "; }
+ lemma += l;
+ lemmaClass += lClass;
+ if (count > 1) {
+ SWBuf tmp;
+ tmp.setFormatted("Lemma.%d", i+1);
+ module->getEntryAttributes()["Word"][wordstr][tmp] = l;
+ tmp.setFormatted("LemmaClass.%d", i+1);
+ module->getEntryAttributes()["Word"][wordstr][tmp] = lClass;
+ }
+ } while (++i < count);
+ module->getEntryAttributes()["Word"][wordstr]["PartCount"].setFormatted("%d", count);
+ }
+
+ if ((attrib = wtag.getAttribute("src"))) {
+ int count = wtag.getAttributePartCount("src", ' ');
+ int i = (count > 1) ? 0 : -1; // -1 for whole value cuz it's faster, but does the same thing as 0
+ do {
+ SWBuf mp = "";
+ attrib = wtag.getAttribute("src", i, ' ');
+ if (i < 0) i = 0; // to handle our -1 condition
+
+ if (i) src += " ";
+ mp += attrib;
+ src += mp;
+ if (count > 1) {
+ SWBuf tmp;
+ tmp.setFormatted("Src.%d", i+1);
+ module->getEntryAttributes()["Word"][wordstr][tmp] = mp;
+ }
+ } while (++i < count);
+ }
+
+
+ if (lemma.length())
+ module->getEntryAttributes()["Word"][wordstr]["Lemma"] = lemma;
+ if (lemmaClass.length())
+ module->getEntryAttributes()["Word"][wordstr]["LemmaClass"] = lemmaClass;
+ if (morph.length())
+ module->getEntryAttributes()["Word"][wordstr]["Morph"] = morph;
+ if (morphClass.length())
+ module->getEntryAttributes()["Word"][wordstr]["MorphClass"] = morphClass;
+ if (src.length())
+ module->getEntryAttributes()["Word"][wordstr]["Src"] = src;
+
+ if (wtag.isEmpty()) {
+ int j;
+ for (j = token.length()-1; ((j>0) && (strchr(" /", token[j]))); j--);
+ token.size(j+1);
+ }
+
+ token += " wn=\"";
+ token += wordstr;
+ token += "\"";
+
+ if (wtag.isEmpty()) {
+ token += "/";
+ }
+
+ wordNum++;
+ }
+
+ if (!option) {
+/*
+ * Code which handles multiple lemma types. Kindof works but breaks at least WEBIF filters for strongs.
+ *
+ int count = wtag.getAttributePartCount("lemma", ' ');
+ for (int i = 0; i < count; i++) {
+ SWBuf a = wtag.getAttribute("lemma", i, ' ');
+ const char *prefix = a.stripPrefix(':');
+ if ((prefix) && (!strcmp(prefix, "x-Strongs") || !strcmp(prefix, "strong") || !strcmp(prefix, "Strong"))) {
+ // remove attribute part
+ wtag.setAttribute("lemma", 0, i, ' ');
+ i--;
+ count--;
+ }
+ }
+* Instead the codee below just removes the lemma attribute
+*****/
+ const char *l = wtag.getAttribute("lemma");
+ if (l) {
+ SWBuf savlm = l;
+ wtag.setAttribute("lemma", 0);
+ wtag.setAttribute("savlm", savlm);
+ token = wtag;
+ token.trim();
+ // drop <>
+ token << 1;
+ token--;
+ }
+ }
+ }
+ if (token.startsWith("/w")) { // Word End
+ if (module->isProcessEntryAttributes()) {
+ if (wordStart) {
+ SWBuf tmp;
+ tmp.append(wordStart, (from-wordStart)-3);
+ sprintf(wordstr, "%03d", wordNum-1);
+ module->getEntryAttributes()["Word"][wordstr]["Text"] = tmp;
+ }
+ }
+ wordStart = 0;
+ }
+
+ // keep token in text
+ text.append('<');
+ text.append(token);
+ text.append('>');
+
+ continue;
+ }
+ if (intoken) {
+ token += *from;
+ }
+ else {
+ text.append(*from);
+ lastspace = (*from == ' ');
+ }
+ }
+ return 0;
+}
+
+SWORD_NAMESPACE_END
diff --git a/src/modules/filters/osisvariants.cpp b/src/modules/filters/osisvariants.cpp
new file mode 100644
index 0000000..91d700c
--- /dev/null
+++ b/src/modules/filters/osisvariants.cpp
@@ -0,0 +1,118 @@
+/******************************************************************************
+ *
+ * osisvariants - SWFilter descendant to hide or show textual variants
+ * in an OSIS module.
+ */
+
+
+#include <stdlib.h>
+#include <osisvariants.h>
+#include <utilstr.h>
+
+SWORD_NAMESPACE_START
+
+const char OSISVariants::primary[] = "Primary Reading";
+const char OSISVariants::secondary[] = "Secondary Reading";
+const char OSISVariants::all[] = "All Readings";
+
+const char OSISVariants::optName[] = "Textual Variants";
+const char OSISVariants::optTip[] = "Switch between Textual Variants modes";
+
+
+OSISVariants::OSISVariants() {
+ option = false;
+ options.push_back(primary);
+ options.push_back(secondary);
+ options.push_back(all);
+}
+
+
+OSISVariants::~OSISVariants() {
+}
+
+void OSISVariants::setOptionValue(const char *ival)
+{
+ if (!stricmp(ival, primary)) option = 0;
+ else if (!stricmp(ival, secondary)) option = 1;
+ else option = 2;
+}
+
+const char *OSISVariants::getOptionValue()
+{
+ if (option == 0) {
+ return primary;
+ }
+ else if (option == 1) {
+ return secondary;
+ }
+ else {
+ return all;
+ }
+}
+
+char OSISVariants::processText(SWBuf &text, const SWKey *key, const SWModule *module)
+{
+ if (option == 0 || option == 1) { //we want primary or variant only
+ bool intoken = false;
+ bool hide = false;
+ bool invar = false;
+
+ SWBuf token;
+ SWBuf orig = text;
+ const char *from = orig.c_str();
+
+ //we use a fixed comparision string to make sure the loop is as fast as the original two blocks with almost the same code
+ //const char* variantCompareString = (option == 0) ? "div type=\"variant\" class=\"1\"" : "div type=\"variant\" class=\"2\"";
+
+ for (text = ""; *from; from++) {
+ if (*from == '<') {
+ intoken = true;
+ token = "";
+ continue;
+ }
+ else if (*from == '>') { // process tokens
+ intoken = false;
+
+ if (!strncmp(token.c_str(), "seg ", 4)) { //only one of the variants
+ invar = true;
+ hide = true;
+ continue;
+ }
+ if (!strncmp(token.c_str(), "div type=\"variant\"", 18)) {
+ invar = true;
+ continue;
+ }
+ if (!strncmp(token.c_str(), "/div", 4)) {
+ hide = false;
+ if (invar) {
+ invar = false;
+ continue;
+ }
+ }
+ if (!hide) {
+ text += '<';
+ text.append(token);
+ text += '>';
+ }
+
+ continue;
+ }
+ if (intoken) {
+ token += *from;
+ }
+ else if (!hide) {
+ text += *from;
+ }
+ }
+
+ }
+
+ return 0;
+}
+
+
+
+
+
+
+SWORD_NAMESPACE_END
diff --git a/src/modules/filters/osiswebif.cpp b/src/modules/filters/osiswebif.cpp
new file mode 100644
index 0000000..ecc58f7
--- /dev/null
+++ b/src/modules/filters/osiswebif.cpp
@@ -0,0 +1,198 @@
+/***************************************************************************
+ OSISWEBIF.cpp - OSIS to HTML filter with hrefs
+ for strongs and morph tags
+ -------------------
+ begin : 2003-10-23
+ copyright : 2003 by CrossWire Bible Society
+ ***************************************************************************/
+
+/***************************************************************************
+ * *
+ * This program is free software; you can redistribute it and/or modify *
+ * it under the terms of the GNU General Public License as published by *
+ * the Free Software Foundation; either version 2 of the License, or *
+ * (at your option) any later version. *
+ * *
+ ***************************************************************************/
+
+#include <stdlib.h>
+#include <osiswebif.h>
+#include <utilxml.h>
+#include <url.h>
+#include <versekey.h>
+#include <swmodule.h>
+#include <ctype.h>
+
+
+SWORD_NAMESPACE_START
+
+
+OSISWEBIF::OSISWEBIF() : baseURL(""), passageStudyURL(baseURL + "passagestudy.jsp"), javascript(false) {
+}
+
+
+BasicFilterUserData *OSISWEBIF::createUserData(const SWModule *module, const SWKey *key) {
+ MyUserData *u = new MyUserData(module, key);
+ u->wordsOfChristStart = "<span class=\"wordsOfJesus\"> ";
+ u->wordsOfChristEnd = "</span> ";
+ return u;
+}
+
+
+bool OSISWEBIF::handleToken(SWBuf &buf, const char *token, BasicFilterUserData *userData) {
+ MyUserData *u = (MyUserData *)userData;
+ SWBuf scratch;
+ bool sub = (u->suspendTextPassThru) ? substituteToken(scratch, token) : substituteToken(buf, token);
+ if (!sub) {
+ // manually process if it wasn't a simple substitution
+ XMLTag tag(token);
+
+ // <w> tag
+ if (!strcmp(tag.getName(), "w")) {
+
+ // start <w> tag
+ if ((!tag.isEmpty()) && (!tag.isEndTag())) {
+ u->w = token;
+ }
+
+ // end or empty <w> tag
+ else {
+ bool endTag = tag.isEndTag();
+ SWBuf lastText;
+ bool show = true; // to handle unplaced article in kjv2003-- temporary till combined
+
+ if (endTag) {
+ tag = u->w.c_str();
+ lastText = u->lastTextNode.c_str();
+ }
+ else lastText = "stuff";
+
+ const char *attrib;
+ const char *val;
+ if ((attrib = tag.getAttribute("xlit"))) {
+ val = strchr(attrib, ':');
+ val = (val) ? (val + 1) : attrib;
+// buf.appendFormatted(" %s", val);
+ }
+ if ((attrib = tag.getAttribute("gloss"))) {
+ val = strchr(attrib, ':');
+ val = (val) ? (val + 1) : attrib;
+ buf.appendFormatted(" %s", val);
+ }
+ if ((attrib = tag.getAttribute("lemma"))) {
+ int count = tag.getAttributePartCount("lemma", ' ');
+ int i = (count > 1) ? 0 : -1; // -1 for whole value cuz it's faster, but does the same thing as 0
+ do {
+ attrib = tag.getAttribute("lemma", i, ' ');
+ if (i < 0) i = 0; // to handle our -1 condition
+ val = strchr(attrib, ':');
+ val = (val) ? (val + 1) : attrib;
+ const char *val2 = val;
+ if ((strchr("GH", *val)) && (isdigit(val[1])))
+ val2++;
+ if ((!strcmp(val2, "3588")) && (lastText.length() < 1))
+ show = false;
+ else buf.appendFormatted(" <small><em>&lt;<a href=\"%s?showStrong=%s#cv\">%s</a>&gt;</em></small> ", passageStudyURL.c_str(), URL::encode(val2).c_str(), val2);
+ } while (++i < count);
+ }
+ if ((attrib = tag.getAttribute("morph")) && (show)) {
+ SWBuf savelemma = tag.getAttribute("savlm");
+ if ((strstr(savelemma.c_str(), "3588")) && (lastText.length() < 1))
+ show = false;
+ if (show) {
+ int count = tag.getAttributePartCount("morph", ' ');
+ int i = (count > 1) ? 0 : -1; // -1 for whole value cuz it's faster, but does the same thing as 0
+ do {
+ attrib = tag.getAttribute("morph", i, ' ');
+ if (i < 0) i = 0; // to handle our -1 condition
+ val = strchr(attrib, ':');
+ val = (val) ? (val + 1) : attrib;
+ const char *val2 = val;
+ if ((*val == 'T') && (strchr("GH", val[1])) && (isdigit(val[2])))
+ val2+=2;
+ buf.appendFormatted(" <small><em>(<a href=\"%s?showMorph=%s#cv\">%s</a>)</em></small> ", passageStudyURL.c_str(), URL::encode(val2).c_str(), val2);
+ } while (++i < count);
+ }
+ }
+ if ((attrib = tag.getAttribute("POS"))) {
+ val = strchr(attrib, ':');
+ val = (val) ? (val + 1) : attrib;
+ buf.appendFormatted(" %s", val);
+ }
+
+ /*if (endTag)
+ buf += "}";*/
+ }
+ }
+
+ // <note> tag
+ else if (!strcmp(tag.getName(), "note")) {
+ if (!tag.isEndTag()) {
+ SWBuf type = tag.getAttribute("type");
+ bool strongsMarkup = (type == "x-strongsMarkup" || type == "strongsMarkup"); // the latter is deprecated
+ if (strongsMarkup) {
+ tag.setEmpty(false); // handle bug in KJV2003 module where some note open tags were <note ... />
+ }
+
+ if (!tag.isEmpty()) {
+ if (!strongsMarkup) { // leave strong's markup notes out, in the future we'll probably have different option filters to turn different note types on or off
+ SWBuf footnoteNumber = tag.getAttribute("swordFootnote");
+ SWBuf modName = (u->module) ? u->module->Name() : "";
+ VerseKey *vkey = NULL;
+ // see if we have a VerseKey * or descendant
+ SWTRY {
+ vkey = SWDYNAMIC_CAST(VerseKey, u->key);
+ }
+ SWCATCH ( ... ) { }
+ if (vkey) {
+ char ch = ((tag.getAttribute("type") && ((!strcmp(tag.getAttribute("type"), "crossReference")) || (!strcmp(tag.getAttribute("type"), "x-cross-ref")))) ? 'x':'n');
+// buf.appendFormatted("<a href=\"noteID=%s.%c.%s\"><small><sup>*%c</sup></small></a> ", vkey->getText(), ch, footnoteNumber.c_str(), ch);
+ buf.appendFormatted("<span class=\"fn\" onclick=\"f(\'%s\',\'%s\',\'%s\');\" >%c</span>", modName.c_str(), u->key->getText(), footnoteNumber.c_str(), ch);
+ }
+ }
+ u->suspendTextPassThru = (++u->suspendLevel);
+ }
+ }
+ if (tag.isEndTag()) {
+ u->suspendTextPassThru = (--u->suspendLevel);
+
+ }
+ }
+
+ // <title>
+ else if (!strcmp(tag.getName(), "title")) {
+ if ((!tag.isEndTag()) && (!tag.isEmpty())) {
+ buf += "<h3>";
+ }
+ else if (tag.isEndTag()) {
+ buf += "</h3>";
+ }
+ }
+
+ // ok to leave these in
+ else if (!strcmp(tag.getName(), "div")) {
+ buf += tag;
+ }
+ else if (!strcmp(tag.getName(), "span")) {
+ buf += tag;
+ }
+ else if (!strcmp(tag.getName(), "br")) {
+ buf += tag;
+ }
+
+ // handled appropriately in base class
+ // <catchWord> & <rdg> tags (italicize)
+ // <hi> text highlighting
+ // <q> quote
+ // <milestone type="cQuote" marker="x"/>
+ // <transChange>
+ else {
+ return OSISHTMLHREF::handleToken(buf, token, userData);
+ }
+ }
+ return true;
+}
+
+
+SWORD_NAMESPACE_END
+
diff --git a/src/modules/filters/osiswordjs.cpp b/src/modules/filters/osiswordjs.cpp
new file mode 100644
index 0000000..dc805b4
--- /dev/null
+++ b/src/modules/filters/osiswordjs.cpp
@@ -0,0 +1,178 @@
+/******************************************************************************
+ *
+ * osisstrongs - SWFilter descendant to hide or show strongs number
+ * in a OSIS module.
+ */
+
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <osiswordjs.h>
+#include <swmodule.h>
+#include <ctype.h>
+#include <utilxml.h>
+#include <utilstr.h>
+#include <versekey.h>
+#include <stdio.h>
+
+SWORD_NAMESPACE_START
+
+const char oName[] = "Word Javascript";
+const char oTip[] = "Toggles Word Javascript data";
+
+const SWBuf choices[3] = {"Off", "On", ""};
+const StringList oValues(&choices[0], &choices[2]);
+
+
+OSISWordJS::OSISWordJS() : SWOptionFilter(oName, oTip, &oValues) {
+ setOptionValue("Off");
+
+ defaultGreekLex = 0;
+ defaultHebLex = 0;
+ defaultGreekParse = 0;
+ defaultHebParse = 0;
+ mgr = 0;
+}
+
+
+OSISWordJS::~OSISWordJS() {
+}
+
+
+char OSISWordJS::processText(SWBuf &text, const SWKey *key, const SWModule *module) {
+ if (option) {
+ char token[2112]; // cheese. Fix.
+ int tokpos = 0;
+ bool intoken = false;
+ bool lastspace = false;
+ int wordNum = 1;
+ char wordstr[5];
+ SWBuf modName = (module)?module->Name():"";
+ // add TR to w src in KJV then remove this next line
+ SWBuf wordSrcPrefix = (modName == "KJV")?SWBuf("TR"):modName;
+
+ VerseKey *vkey = 0;
+ if (key) {
+ vkey = SWDYNAMIC_CAST(VerseKey, key);
+ }
+
+ const SWBuf orig = text;
+ const char * from = orig.c_str();
+
+ for (text = ""; *from; ++from) {
+ if (*from == '<') {
+ intoken = true;
+ tokpos = 0;
+ token[0] = 0;
+ token[1] = 0;
+ token[2] = 0;
+ continue;
+ }
+ if (*from == '>') { // process tokens
+ intoken = false;
+ if ((*token == 'w') && (token[1] == ' ')) { // Word
+ XMLTag wtag(token);
+ sprintf(wordstr, "%03d", wordNum);
+ SWBuf lemmaClass;
+ SWBuf lemma;
+ SWBuf morph;
+ SWBuf src;
+ char gh = 0;
+ int count = atoi(module->getEntryAttributes()["Word"][wordstr]["PartCount"].c_str());
+ for (int i = 0; i < count; i++) {
+
+ // for now, lemma class can just be equal to last lemma class in multi part word
+ SWBuf tmp = "LemmaClass";
+ if (count > 1) tmp.appendFormatted(".%d", i+1);
+ lemmaClass = module->getEntryAttributes()["Word"][wordstr][tmp];
+
+ tmp = "Lemma";
+ if (count > 1) tmp.appendFormatted(".%d", i+1);
+ tmp = (module->getEntryAttributes()["Word"][wordstr][tmp].c_str());
+
+ // if we're strongs,
+ if (lemmaClass == "strong") {
+ gh = tmp[0];
+ tmp << 1;
+ }
+ if (lemma.size()) lemma += "|";
+ lemma += tmp;
+
+ tmp = "Morph";
+ if (count > 1) tmp.appendFormatted(".%d", i+1);
+ tmp = (module->getEntryAttributes()["Word"][wordstr][tmp].c_str());
+ if (morph.size()) morph += "|";
+ morph += tmp;
+
+ tmp = "Src";
+ if (count > 1) tmp.appendFormatted(".%d", i+1);
+ tmp = (module->getEntryAttributes()["Word"][wordstr][tmp].c_str());
+ if (!tmp.length()) tmp.appendFormatted("%d", wordNum);
+ tmp.insert(0, wordSrcPrefix);
+ if (src.size()) src += "|";
+ src += tmp;
+ }
+
+ SWBuf lexName = "";
+ // we can pass the real lex name in, but we have some
+ // aliases in the javascript to optimize bandwidth
+ if ((gh == 'G') && (defaultGreekLex)) {
+ lexName = (!strcmp(defaultGreekLex->Name(), "StrongsGreek"))?"G":defaultGreekLex->Name();
+ }
+ else if ((gh == 'H') && (defaultHebLex)) {
+ lexName = (!strcmp(defaultHebLex->Name(), "StrongsHebrew"))?"H":defaultHebLex->Name();
+ }
+
+ SWBuf xlit = wtag.getAttribute("xlit");
+
+ if ((lemmaClass != "strong") && (xlit.startsWith("betacode:"))) {
+ lexName = "betacode";
+// const char *m = strchr(xlit.c_str(), ':');
+// strong = ++m;
+ }
+ SWBuf wordID;
+ if (vkey) {
+ // optimize for bandwidth and use only the verse as the unique entry id
+ wordID.appendFormatted("%d", vkey->Verse());
+ }
+ else {
+ wordID = key->getText();
+ }
+ wordID.appendFormatted("_%s", src.c_str());
+ // clean up our word ID for XHTML
+ for (unsigned int i = 0; i < wordID.size(); i++) {
+ if ((!isdigit(wordID[i])) && (!isalpha(wordID[i]))) {
+ wordID[i] = '_';
+ }
+ }
+ // 'p' = 'fillpop' to save bandwidth
+ text.appendFormatted("<span class=\"clk\" onclick=\"p('%s','%s','%s','%s','','%s');\" >", lexName.c_str(), lemma.c_str(), wordID.c_str(), morph.c_str(), modName.c_str());
+ wordNum++;
+ }
+ if ((*token == '/') && (token[1] == 'w') && option) { // Word
+ text += "</w></span>";
+ continue;
+ }
+
+ // if not a strongs token, keep token in text
+ text.append('<');
+ text.append(token);
+ text.append('>');
+
+ continue;
+ }
+ if (intoken) {
+ if (tokpos < 2045)
+ token[tokpos++] = *from;
+ token[tokpos+2] = 0;
+ }
+ else {
+ text.append(*from);
+ lastspace = (*from == ' ');
+ }
+ }
+ }
+ return 0;
+}
+
+SWORD_NAMESPACE_END
diff --git a/src/modules/filters/papyriplain.cpp b/src/modules/filters/papyriplain.cpp
new file mode 100644
index 0000000..423bfda
--- /dev/null
+++ b/src/modules/filters/papyriplain.cpp
@@ -0,0 +1,71 @@
+/******************************************************************************
+ *
+ * papyriplain - SWFilter descendant to strip out all Papyri tags
+ */
+
+
+#include <stdlib.h>
+#include <papyriplain.h>
+
+SWORD_NAMESPACE_START
+
+PapyriPlain::PapyriPlain() {
+}
+
+
+char PapyriPlain::processText (SWBuf &text, const SWKey *key, const SWModule *module)
+{
+ SWBuf orig = text;
+ const char *from = orig.c_str();
+
+ for (text = ""; *from; ++from) {
+
+ // remove hyphen and whitespace if that is all that separates words
+ // also be sure we're not a double hyphen '--'
+ if ((*from == '-') && (text.length() > 0) && (text[text.length()-1] != '-')) {
+ char remove = 0;
+ const char *c;
+ for (c = from+1; *c; c++) {
+ if ((*c == 10) || (*c == 13)) {
+ remove = 1;
+ }
+ if (!strchr(" \t\n", *c)) {
+ if (remove) remove++;
+ break;
+ }
+ }
+ if (remove > 1) {
+ from = c-1;
+ continue;
+ }
+ }
+
+ // remove all newlines
+ if ((*from == 10) || (*from == 13)) {
+ if ((text.length()>1) && (text[text.length()-2] != ' ') && (*(from+1) != ' '))
+ text.append(' ');
+ continue;
+ }
+
+
+ // strip odd characters
+ switch (*from) {
+ case '(':
+ case ')':
+ case '[':
+ case ']':
+ case '{':
+ case '}':
+ case '<':
+ case '>':
+ continue;
+ }
+
+ // if we've made it this far
+ text.append(*from);
+
+ }
+ return 0;
+}
+
+SWORD_NAMESPACE_END
diff --git a/src/modules/filters/plainfootnotes.cpp b/src/modules/filters/plainfootnotes.cpp
new file mode 100644
index 0000000..0baf313
--- /dev/null
+++ b/src/modules/filters/plainfootnotes.cpp
@@ -0,0 +1,79 @@
+/***************************************************************************
+ plainfootnotes.cpp - description
+ -------------------
+ begin : Wed Oct 13 1999
+ copyright : (C) 1999 by The team of BibleTime
+ email : info@bibletime.de
+ ***************************************************************************/
+
+/***************************************************************************
+ * *
+ * This program is free software; you can redistribute it and/or modify *
+ * it under the terms of the GNU General Public License as published by *
+ * the Free Software Foundation; either version 2 of the License, or *
+ * (at your option) any later version. *
+ * *
+ ***************************************************************************/
+
+#include <plainfootnotes.h>
+#include <swkey.h>
+
+#include <stdlib.h>
+
+SWORD_NAMESPACE_START
+
+const char oName[] = "Footnotes";
+const char oTip[] = "Toggles Footnotes On and Off In Bible Texts If They Exist";
+
+const SWBuf choices[3] = {"Off", "On", ""};
+const StringList oValues(&choices[0], &choices[2]);
+
+PLAINFootnotes::PLAINFootnotes() : SWOptionFilter(oName, oTip, &oValues) {
+ setOptionValue("Off");
+}
+
+PLAINFootnotes::~PLAINFootnotes(){
+}
+
+
+char PLAINFootnotes::processText(SWBuf &text, const SWKey *key, const SWModule *module) {
+ if (!option) { // if we don't want footnotes
+ //char token[2048];
+ //SWBuf token;
+ //int tokpos = 0;
+ //bool intoken = false;
+ //bool lastspace = false;
+
+ bool hide = false;
+
+ SWBuf orig = text;
+ const char *from = orig.c_str();
+ for (text = ""; *from; from++) {
+ if (*from == '{') // Footnote start
+ {
+ hide = true;
+ continue;
+ }
+ else if (*from == '}') // Footnote end
+ {
+ hide = false;
+ continue;
+ }
+
+ //if (intoken) {
+ //if (tokpos < 2045)
+ // token += *from;
+ // token[tokpos+2] = 0;
+ //}
+ //else {
+ if (!hide) {
+ text = *from;
+ //lastspace = (*from == ' ');
+ }
+ //}
+ }
+ }
+ return 0;
+}
+
+SWORD_NAMESPACE_END
diff --git a/src/modules/filters/plainhtml.cpp b/src/modules/filters/plainhtml.cpp
new file mode 100644
index 0000000..f5f2a5c
--- /dev/null
+++ b/src/modules/filters/plainhtml.cpp
@@ -0,0 +1,83 @@
+/***************************************************************************
+ plainhtml.cpp - description
+ -------------------
+ begin : Thu Jun 24 1999
+ copyright : (C) 1999 by Torsten Uhlmann
+ email : TUhlmann@gmx.de
+ ***************************************************************************/
+
+/***************************************************************************
+ * *
+ * This program is free software; you can redistribute it and/or modify *
+ * it under the terms of the GNU General Public License as published by *
+ * the Free Software Foundation; either version 2 of the License, or *
+ * (at your option) any later version. *
+ * *
+ ***************************************************************************/
+
+#include <plainhtml.h>
+#include <swbuf.h>
+
+SWORD_NAMESPACE_START
+
+PLAINHTML::PLAINHTML()
+{
+}
+
+
+char PLAINHTML::processText(SWBuf &text, const SWKey *key, const SWModule *module)
+{
+ int count = 0;
+
+ SWBuf orig = text;
+ const char *from = orig.c_str();
+ for (text = ""; *from; from++)
+ {
+ if ((*from == '\n') && (from[1] == '\n')) // two newlinea are a paragraph
+ {
+ text += "<P>";
+ from++;
+ continue;
+ } else {
+ if ((*from == '\n')) // && (from[1] != '\n')) // only one new line
+ {
+ text += "<BR>";
+ continue;
+ }
+ }
+
+ if (*from == '{') { //footnote start
+ text += "<FONT COLOR=\"#80000\"><SMALL> (";
+ continue;
+ }
+ else if (*from == '}') //footnote end
+ {
+ text += ") </SMALL></FONT>";
+ continue;
+ }
+ else if (*from == '<') {
+ text += "&lt;";
+ continue;
+ }
+ else if (*from == '>') {
+ text += "&gt;";
+ continue;
+ }
+ else if (*from == '&') {
+ text += "&amp;";
+ continue;
+ }
+ else if ((*from == ' ') && (count > 5000))
+ {
+ text += "<WBR>";
+ count = 0;
+ continue;
+ }
+
+ text += *from;
+ count++;
+ }
+ return 0;
+}
+
+SWORD_NAMESPACE_END
diff --git a/src/modules/filters/rtfhtml.cpp b/src/modules/filters/rtfhtml.cpp
new file mode 100644
index 0000000..cac5068
--- /dev/null
+++ b/src/modules/filters/rtfhtml.cpp
@@ -0,0 +1,81 @@
+/***************************************************************************
+ rtfhtml.cpp - description
+ -------------------
+ begin : Wed Oct 13 1999
+ copyright : (C) 1999 by The team of BibleTime
+ email : info@bibletime.de
+ ***************************************************************************/
+
+/***************************************************************************
+ * *
+ * This program is free software; you can redistribute it and/or modify *
+ * it under the terms of the GNU General Public License as published by *
+ * the Free Software Foundation; either version 2 of the License, or *
+ * (at your option) any later version. *
+ * *
+ ***************************************************************************/
+
+#include <stdlib.h>
+#include <rtfhtml.h>
+#include <swbuf.h>
+
+SWORD_NAMESPACE_START
+
+RTFHTML::RTFHTML() {
+
+}
+
+
+char RTFHTML::processText(SWBuf &text, const SWKey *key, const SWModule *module)
+{
+ bool center = false;
+
+ const char *from;
+ SWBuf orig = text;
+ from = orig.c_str();
+ for (text = ""; *from; from++)
+ {
+ if (*from == '\\') // a RTF command
+ {
+ if ( !strncmp(from+1, "pard", 4) )
+ //(from[1] == 'p') && (from[2] == 'a') && (from[3] == 'r') && (from[4] == 'd'))
+ { // switch all modifiers off
+ if (center)
+ {
+ text += "</CENTER>";
+ center = false;
+ }
+ from += 4;
+ continue;
+ }
+ if ( !strncmp(from+1, "par", 3) )
+ //(from[1] == 'p') && (from[2] == 'a') && (from[3] == 'r'))
+ {
+ text += "<P>\n";
+ from += 3;
+ continue;
+ }
+ if (from[1] == ' ')
+ {
+ from += 1;
+ continue;
+ }
+ if ( !strncmp(from+1, "qc", 2) )
+ //(from[1] == 'q') && (from[2] == 'c')) // center on
+ {
+ if (!center)
+ {
+ text += "<CENTER>";
+ center = true;
+ }
+ from += 2;
+ continue;
+ }
+ }
+
+ text += *from;
+ }
+ return 0;
+}
+
+SWORD_NAMESPACE_END
diff --git a/src/modules/filters/scsuutf8.cpp b/src/modules/filters/scsuutf8.cpp
new file mode 100644
index 0000000..0daff4a
--- /dev/null
+++ b/src/modules/filters/scsuutf8.cpp
@@ -0,0 +1,226 @@
+/******************************************************************************
+ *
+ * SCSUUTF8 - SWFilter descendant to convert a SCSU character to UTF-8
+ *
+ */
+
+
+/* This class is based on:
+ * http://czyborra.com/scsu/scsu.c written by Roman Czyborra@dds.nl
+ * on Andrea's balcony in North Amsterdam on 1998-08-04
+ * Thanks to Richard Verhoeven <rcb5@win.tue.nl> for his suggestion
+ * to correct the haphazard "if" after UQU to "else if" on 1998-10-01
+ *
+ * This is a deflator to UTF-8 output for input compressed in SCSU,
+ * the (Reuters) Standard Compression Scheme for Unicode as described
+ * in http://www.unicode.org/unicode/reports/tr6.html
+ */
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <swmodule.h>
+
+#include <scsuutf8.h>
+
+SWORD_NAMESPACE_START
+
+SCSUUTF8::SCSUUTF8() {
+}
+
+
+unsigned char* SCSUUTF8::UTF8Output(unsigned long uchar, unsigned char* text)
+{
+ /* join UTF-16 surrogates without any pairing sanity checks */
+
+ static int d;
+
+ if (uchar >= 0xd800 && uchar <= 0xdbff) { d = uchar & 0x3f; return text; }
+ if (uchar >= 0xdc00 && uchar <= 0xdfff) { uchar = uchar + 0x2400 + d * 0x400; }
+
+ /* output one character as UTF-8 multibyte sequence */
+
+ if (uchar < 0x80) {
+ *text++ = c;
+ }
+ else if (uchar < 0x800) {
+ *text++ = 0xc0 | uchar >> 6;
+ *text++ = 0x80 | (uchar & 0x3f);
+ }
+ else if (uchar < 0x10000) {
+ *text++ = 0xe0 | uchar >> 12;
+ *text++ = 0x80 | (uchar >> 6 & 0x3f);
+ *text++ = 0x80 | (uchar & 0x3f);
+ }
+ else if (uchar < 0x200000) {
+ *text++ = 0xf0 | uchar >> 18;
+ *text++ = 0x80 | (uchar >> 12 & 0x3f);
+ *text++ = 0x80 | (uchar >> 6 & 0x3f);
+ *text++ = 0x80 | (uchar & 0x3f);
+ }
+
+ return text;
+}
+
+char SCSUUTF8::processText(SWBuf &text, const SWKey *key, const SWModule *module) {
+/*
+ unsigned char *to, *from;
+ unsigned long buflen = len * FILTERPAD;
+ char active = 0, mode = 0;
+ if ((unsigned long)key < 2) // hack, we're en(1)/de(0)ciphering
+ return -1;
+
+ static unsigned short start[8] = {0x0000,0x0080,0x0100,0x0300,0x2000,0x2080,0x2100,0x3000};
+ static unsigned short slide[8] = {0x0080,0x00C0,0x0400,0x0600,0x0900,0x3040,0x30A0,0xFF00};
+ static unsigned short win[256] = {
+ 0x0000, 0x0080, 0x0100, 0x0180, 0x0200, 0x0280, 0x0300, 0x0380,
+ 0x0400, 0x0480, 0x0500, 0x0580, 0x0600, 0x0680, 0x0700, 0x0780,
+ 0x0800, 0x0880, 0x0900, 0x0980, 0x0A00, 0x0A80, 0x0B00, 0x0B80,
+ 0x0C00, 0x0C80, 0x0D00, 0x0D80, 0x0E00, 0x0E80, 0x0F00, 0x0F80,
+ 0x1000, 0x1080, 0x1100, 0x1180, 0x1200, 0x1280, 0x1300, 0x1380,
+ 0x1400, 0x1480, 0x1500, 0x1580, 0x1600, 0x1680, 0x1700, 0x1780,
+ 0x1800, 0x1880, 0x1900, 0x1980, 0x1A00, 0x1A80, 0x1B00, 0x1B80,
+ 0x1C00, 0x1C80, 0x1D00, 0x1D80, 0x1E00, 0x1E80, 0x1F00, 0x1F80,
+ 0x2000, 0x2080, 0x2100, 0x2180, 0x2200, 0x2280, 0x2300, 0x2380,
+ 0x2400, 0x2480, 0x2500, 0x2580, 0x2600, 0x2680, 0x2700, 0x2780,
+ 0x2800, 0x2880, 0x2900, 0x2980, 0x2A00, 0x2A80, 0x2B00, 0x2B80,
+ 0x2C00, 0x2C80, 0x2D00, 0x2D80, 0x2E00, 0x2E80, 0x2F00, 0x2F80,
+ 0x3000, 0x3080, 0x3100, 0x3180, 0x3200, 0x3280, 0x3300, 0x3800,
+ 0xE000, 0xE080, 0xE100, 0xE180, 0xE200, 0xE280, 0xE300, 0xE380,
+ 0xE400, 0xE480, 0xE500, 0xE580, 0xE600, 0xE680, 0xE700, 0xE780,
+ 0xE800, 0xE880, 0xE900, 0xE980, 0xEA00, 0xEA80, 0xEB00, 0xEB80,
+ 0xEC00, 0xEC80, 0xED00, 0xED80, 0xEE00, 0xEE80, 0xEF00, 0xEF80,
+ 0xF000, 0xF080, 0xF100, 0xF180, 0xF200, 0xF280, 0xF300, 0xF380,
+ 0xF400, 0xF480, 0xF500, 0xF580, 0xF600, 0xF680, 0xF700, 0xF780,
+ 0xF800, 0xF880, 0xF900, 0xF980, 0xFA00, 0xFA80, 0xFB00, 0xFB80,
+ 0xFC00, 0xFC80, 0xFD00, 0xFD80, 0xFE00, 0xFE80, 0xFF00, 0xFF80,
+ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+ 0x0000, 0x00C0, 0x0250, 0x0370, 0x0530, 0x3040, 0x30A0, 0xFF60
+ };
+
+ if (!len)
+ return 0;
+
+ memmove(&text[buflen - len], text, len);
+ from = (unsigned char*)&text[buflen - len];
+ to = (unsigned char *)text;
+
+ // -------------------------------
+
+ for (int i = 0; i < len;) {
+
+
+ if (i >= len) break;
+ c = from[i++];
+
+ if (c >= 0x80)
+ {
+ to = UTF8Output (c - 0x80 + slide[active], to);
+ }
+ else if (c >= 0x20 && c <= 0x7F)
+ {
+ to = UTF8Output (c, to);
+ }
+ else if (c == 0x0 || c == 0x9 || c == 0xA || c == 0xC || c == 0xD)
+ {
+ to = UTF8Output (c, to);
+ }
+ else if (c >= 0x1 && c <= 0x8) // SQn
+ {
+ if (i >= len) break;
+ d = from[i++]; // single quote
+
+ to = UTF8Output (d < 0x80 ? d + start [c - 0x1] :
+ d - 0x80 + slide [c - 0x1], to);
+ }
+ else if (c >= 0x10 && c <= 0x17) // SCn
+ {
+ active = c - 0x10; // change window
+ }
+ else if (c >= 0x18 && c <= 0x1F) // SDn
+ {
+ active = c - 0x18; // define window
+ if (i >= len) break;
+ slide [active] = win [from[i++]];
+ }
+ else if (c == 0xB) // SDX
+ {
+ if (i >= len) break;
+ c = from[i++];
+
+ if (i >= len) break;
+ d = from[i++];
+
+ slide [active = c>>5] = 0x10000 + (((c & 0x1F) << 8 | d) << 7);
+ }
+ else if (c == 0xE) // SQU
+ {
+ if (i >= len) break;
+ c = from[i++]; // SQU
+
+ if (i >= len) break;
+ to = UTF8Output (c << 8 | from[i++], to);
+ }
+ else if (c == 0xF) // SCU
+ {
+ mode = 1; // change to Unicode mode
+
+ while (mode)
+ {
+ if (i >= len) break;
+ c = from[i++];
+
+ if (c <= 0xDF || c >= 0xF3)
+ {
+ if (i >= len) break;
+ to = UTF8Output (c << 8 | from[i++], to);
+ }
+ else if (c == 0xF0) // UQU
+ {
+ if (i >= len) break;
+ c = from[i++];
+
+ if (i >= len) break;
+ to = UTF8Output (c << 8 | from[i++], to);
+ }
+ else if (c >= 0xE0 && c <= 0xE7) // UCn
+ {
+ active = c - 0xE0; mode = 0;
+ }
+ else if (c >= 0xE8 && c <= 0xEF) // UDn
+ {
+ if (i >= len) break;
+ slide [active=c-0xE8] = win [from[i++]]; mode = 0;
+ }
+ else if (c == 0xF1) // UDX
+ {
+ if (i >= len) break;
+ c = from[i++];
+
+ if (i >= len) break;
+ d = from[i++];
+
+ slide [active = c>>5] =
+ 0x10000 + (((c & 0x1F) << 8 | d) << 7); mode = 0;
+ }
+ }
+ }
+
+
+ }
+
+ *to++ = 0;
+ *to = 0;
+*/
+ return 0;
+}
+
+SWORD_NAMESPACE_END
diff --git a/src/modules/filters/swbasicfilter.cpp b/src/modules/filters/swbasicfilter.cpp
new file mode 100644
index 0000000..ef10e45
--- /dev/null
+++ b/src/modules/filters/swbasicfilter.cpp
@@ -0,0 +1,406 @@
+/******************************************************************************
+ * swbasicfilter.h - definition of class SWBasicFilter. An SWFilter
+ * impl that provides some basic methods that
+ * many filters will need and can use as a starting
+ * point.
+ *
+ * $Id: swbasicfilter.cpp 2167 2008-05-16 23:23:39Z scribe $
+ *
+ * Copyright 2001 CrossWire Bible Society (http://www.crosswire.org)
+ * CrossWire Bible Society
+ * P. O. Box 2528
+ * Tempe, AZ 85280-2528
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation version 2.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ */
+
+#include <stdlib.h>
+#include <swbasicfilter.h>
+#include <stdio.h>
+#include <stdarg.h>
+#include <utilstr.h>
+#include <stringmgr.h>
+#include <map>
+#include <set>
+
+SWORD_NAMESPACE_START
+
+typedef std::map<SWBuf, SWBuf> DualStringMap;
+typedef std::set<SWBuf> StringSet;
+
+// I hate bridge patterns but this isolates std::map from a ton of filters
+class SWBasicFilter::Private {
+public:
+ DualStringMap tokenSubMap;
+ DualStringMap escSubMap;
+ StringSet escPassSet;
+};
+
+const char SWBasicFilter::INITIALIZE = 1;
+const char SWBasicFilter::PRECHAR = 2;
+const char SWBasicFilter::POSTCHAR = 4;
+const char SWBasicFilter::FINALIZE = 8;
+
+SWBasicFilter::SWBasicFilter() {
+
+ p = new Private;
+
+ processStages = 0;
+ tokenStart = 0;
+ tokenEnd = 0;
+ escStart = 0;
+ escEnd = 0;
+
+ setTokenStart("<");
+ setTokenEnd(">");
+ setEscapeStart("&");
+ setEscapeEnd(";");
+
+ escStringCaseSensitive = false;
+ tokenCaseSensitive = false;
+ passThruUnknownToken = false;
+ passThruUnknownEsc = false;
+ passThruNumericEsc = false;
+}
+
+
+SWBasicFilter::~SWBasicFilter() {
+ if (tokenStart)
+ delete [] tokenStart;
+
+ if (tokenEnd)
+ delete [] tokenEnd;
+
+ if (escStart)
+ delete [] escStart;
+
+ if (escEnd)
+ delete [] escEnd;
+
+ delete p;
+}
+
+
+void SWBasicFilter::setPassThruUnknownToken(bool val) {
+ passThruUnknownToken = val;
+}
+
+
+void SWBasicFilter::setPassThruUnknownEscapeString(bool val) {
+ passThruUnknownEsc = val;
+}
+
+void SWBasicFilter::setPassThruNumericEscapeString(bool val) {
+ passThruUnknownEsc = val;
+}
+
+
+void SWBasicFilter::setTokenCaseSensitive(bool val) {
+ tokenCaseSensitive = val;
+}
+
+
+void SWBasicFilter::setEscapeStringCaseSensitive(bool val) {
+ escStringCaseSensitive = val;
+}
+
+
+void SWBasicFilter::addTokenSubstitute(const char *findString, const char *replaceString) {
+ char *buf = 0;
+
+ if (!tokenCaseSensitive) {
+ stdstr(&buf, findString);
+ toupperstr(buf);
+ p->tokenSubMap[buf] = replaceString;
+ delete [] buf;
+ }
+ else p->tokenSubMap[findString] = replaceString;
+}
+
+
+void SWBasicFilter::removeTokenSubstitute(const char *findString) {
+ if (p->tokenSubMap.find(findString) != p->tokenSubMap.end()) {
+ p->tokenSubMap.erase( p->tokenSubMap.find(findString) );
+ }
+}
+
+void SWBasicFilter::addAllowedEscapeString(const char *findString) {
+ char *buf = 0;
+
+ if (!escStringCaseSensitive) {
+ stdstr(&buf, findString);
+ toupperstr(buf);
+ p->escPassSet.insert(StringSet::value_type(buf));
+ delete [] buf;
+ }
+ else p->escPassSet.insert(StringSet::value_type(findString));
+}
+
+void SWBasicFilter::removeAllowedEscapeString(const char *findString) {
+ if (p->escPassSet.find(findString) != p->escPassSet.end()) {
+ p->escPassSet.erase( p->escPassSet.find(findString) );
+ }
+}
+
+void SWBasicFilter::addEscapeStringSubstitute(const char *findString, const char *replaceString) {
+ char *buf = 0;
+
+ if (!escStringCaseSensitive) {
+ stdstr(&buf, findString);
+ toupperstr(buf);
+ p->escSubMap.insert(DualStringMap::value_type(buf, replaceString));
+ delete [] buf;
+ }
+ else p->escSubMap.insert(DualStringMap::value_type(findString, replaceString));
+}
+
+void SWBasicFilter::removeEscapeStringSubstitute(const char *findString) {
+ if (p->escSubMap.find(findString) != p->escSubMap.end()) {
+ p->escSubMap.erase( p->escSubMap.find(findString) );
+ }
+}
+
+bool SWBasicFilter::substituteToken(SWBuf &buf, const char *token) {
+ DualStringMap::iterator it;
+
+ if (!tokenCaseSensitive) {
+ char *tmp = 0;
+ stdstr(&tmp, token);
+ toupperstr(tmp);
+ it = p->tokenSubMap.find(tmp);
+ delete [] tmp;
+ } else
+ it = p->tokenSubMap.find(token);
+
+ if (it != p->tokenSubMap.end()) {
+ buf += it->second.c_str();
+ return true;
+ }
+ return false;
+}
+
+void SWBasicFilter::appendEscapeString(SWBuf &buf, const char *escString) {
+ buf += escStart;
+ buf += escString;
+ buf += escEnd;
+}
+
+bool SWBasicFilter::passAllowedEscapeString(SWBuf &buf, const char *escString) {
+ StringSet::iterator it;
+
+ if (!escStringCaseSensitive) {
+ char *tmp = 0;
+ stdstr(&tmp, escString);
+ toupperstr(tmp);
+ it = p->escPassSet.find(tmp);
+ delete [] tmp;
+ } else
+ it = p->escPassSet.find(escString);
+
+ if (it != p->escPassSet.end()) {
+ appendEscapeString(buf, escString);
+ return true;
+ }
+
+ return false;
+}
+
+bool SWBasicFilter::handleNumericEscapeString(SWBuf &buf, const char *escString) {
+ if (passThruNumericEsc) {
+ appendEscapeString(buf, escString);
+ return true;
+ }
+ return false;
+}
+
+bool SWBasicFilter::substituteEscapeString(SWBuf &buf, const char *escString) {
+ DualStringMap::iterator it;
+
+ if (*escString == '#') {
+ return handleNumericEscapeString(buf, escString);
+ }
+
+ if (passAllowedEscapeString(buf, escString)) {
+ return true;
+ }
+
+ if (!escStringCaseSensitive) {
+ char *tmp = 0;
+ stdstr(&tmp, escString);
+ toupperstr(tmp);
+ it = p->escSubMap.find(tmp);
+ delete [] tmp;
+ } else
+ it = p->escSubMap.find(escString);
+
+ if (it != p->escSubMap.end()) {
+ buf += it->second.c_str();
+ return true;
+ }
+ return false;
+}
+
+
+bool SWBasicFilter::handleToken(SWBuf &buf, const char *token, BasicFilterUserData *userData) {
+ return substituteToken(buf, token);
+}
+
+
+bool SWBasicFilter::handleEscapeString(SWBuf &buf, const char *escString, BasicFilterUserData *userData) {
+ return substituteEscapeString(buf, escString);
+}
+
+
+void SWBasicFilter::setEscapeStart(const char *escStart) {
+ stdstr(&(this->escStart), escStart);
+ escStartLen = strlen(escStart);
+}
+
+
+void SWBasicFilter::setEscapeEnd(const char *escEnd) {
+ stdstr(&(this->escEnd), escEnd);
+ escEndLen = strlen(escEnd);
+}
+
+
+void SWBasicFilter::setTokenStart(const char *tokenStart) {
+ stdstr(&(this->tokenStart), tokenStart);
+ tokenStartLen = strlen(tokenStart);
+}
+
+
+void SWBasicFilter::setTokenEnd(const char *tokenEnd) {
+ stdstr(&(this->tokenEnd), tokenEnd);
+ tokenEndLen = strlen(tokenEnd);
+}
+
+
+char SWBasicFilter::processText(SWBuf &text, const SWKey *key, const SWModule *module) {
+ char *from;
+ char token[4096];
+ int tokpos = 0;
+ bool intoken = false;
+ bool inEsc = false;
+ int escStartPos = 0, escEndPos = 0;
+ int tokenStartPos = 0, tokenEndPos = 0;
+ SWBuf lastTextNode;
+ BasicFilterUserData *userData = createUserData(module, key);
+
+ SWBuf orig = text;
+ from = orig.getRawData();
+ text = "";
+
+ if (processStages & INITIALIZE) {
+ if (processStage(INITIALIZE, text, from, userData)) { // processStage handled it all
+ delete userData;
+ return 0;
+ }
+ }
+
+ for (;*from; from++) {
+
+ if (processStages & PRECHAR) {
+ if (processStage(PRECHAR, text, from, userData)) // processStage handled this char
+ continue;
+ }
+
+ if (*from == tokenStart[tokenStartPos]) {
+ if (tokenStartPos == (tokenStartLen - 1)) {
+ intoken = true;
+ tokpos = 0;
+ token[0] = 0;
+ token[1] = 0;
+ token[2] = 0;
+ inEsc = false;
+ }
+ else tokenStartPos++;
+ continue;
+ }
+
+ if (*from == escStart[escStartPos]) {
+ if (escStartPos == (escStartLen - 1)) {
+ intoken = true;
+ tokpos = 0;
+ token[0] = 0;
+ token[1] = 0;
+ token[2] = 0;
+ inEsc = true;
+ }
+ else escStartPos++;
+ continue;
+ }
+
+ if (inEsc) {
+ if (*from == escEnd[escEndPos]) {
+ if (escEndPos == (escEndLen - 1)) {
+ intoken = inEsc = false;
+ userData->lastTextNode = lastTextNode;
+
+ if (!userData->suspendTextPassThru) { //if text through is disabled no tokens should pass, too
+ if ((!handleEscapeString(text, token, userData)) && (passThruUnknownEsc)) {
+ appendEscapeString(text, token);
+ }
+ }
+ escEndPos = escStartPos = tokenEndPos = tokenStartPos = 0;
+ lastTextNode = "";
+ continue;
+ }
+ }
+ }
+
+ if (!inEsc) {
+ if (*from == tokenEnd[tokenEndPos]) {
+ if (tokenEndPos == (tokenEndLen - 1)) {
+ intoken = false;
+ userData->lastTextNode = lastTextNode;
+ if ((!handleToken(text, token, userData)) && (passThruUnknownToken)) {
+ text += tokenStart;
+ text += token;
+ text += tokenEnd;
+ }
+ escEndPos = escStartPos = tokenEndPos = tokenStartPos = 0;
+ lastTextNode = "";
+ continue;
+ }
+ }
+ }
+
+ if (intoken) {
+ if (tokpos < 4090) {
+ token[tokpos++] = *from;
+ token[tokpos+2] = 0;
+ }
+ }
+ else {
+ if ((!userData->supressAdjacentWhitespace) || (*from != ' ')) {
+ if (!userData->suspendTextPassThru) {
+ text.append(*from);
+ userData->lastSuspendSegment.size(0);
+ }
+ else userData->lastSuspendSegment.append(*from);
+ lastTextNode.append(*from);
+ }
+ userData->supressAdjacentWhitespace = false;
+ }
+
+ if (processStages & POSTCHAR)
+ processStage(POSTCHAR, text, from, userData);
+
+ }
+
+ if (processStages & FINALIZE)
+ processStage(FINALIZE, text, from, userData);
+
+ delete userData;
+ return 0;
+}
+
+SWORD_NAMESPACE_END
diff --git a/src/modules/filters/swoptfilter.cpp b/src/modules/filters/swoptfilter.cpp
new file mode 100644
index 0000000..6921190
--- /dev/null
+++ b/src/modules/filters/swoptfilter.cpp
@@ -0,0 +1,47 @@
+/******************************************************************************
+ *
+ * swoptfilter - SWFilter descendant and base class for all option filters
+ */
+
+
+#include <swoptfilter.h>
+#include <utilstr.h>
+
+SWORD_NAMESPACE_START
+
+
+SWOptionFilter::SWOptionFilter() {
+ static StringList empty;
+ static const char *empty2 = "";
+ optName = empty2;
+ optTip = empty2;
+ optValues = &empty;
+}
+
+SWOptionFilter::SWOptionFilter(const char *oName, const char *oTip, const StringList *oValues) {
+ optName = oName;
+ optTip = oTip;
+ optValues = oValues;
+}
+
+
+SWOptionFilter::~SWOptionFilter() {
+}
+
+
+void SWOptionFilter::setOptionValue(const char *ival) {
+ for (StringList::const_iterator loop = optValues->begin(); loop != optValues->end(); loop++) {
+ if (!stricmp(loop->c_str(), ival)) {
+ optionValue = *loop;
+ option = (!strnicmp(ival, "On", 2)); // convenience for boolean filters
+ break;
+ }
+ }
+}
+
+const char *SWOptionFilter::getOptionValue() {
+ return optionValue;
+}
+
+
+SWORD_NAMESPACE_END
diff --git a/src/modules/filters/teihtmlhref.cpp b/src/modules/filters/teihtmlhref.cpp
new file mode 100644
index 0000000..1d213f4
--- /dev/null
+++ b/src/modules/filters/teihtmlhref.cpp
@@ -0,0 +1,205 @@
+/***************************************************************************
+ teirtf.cpp - TEI to HTMLHREF filter
+ -------------------
+ begin : 2006-07-03
+ copyright : 2006 by CrossWire Bible Society
+ ***************************************************************************/
+
+/***************************************************************************
+ * *
+ * This program is free software; you can redistribute it and/or modify *
+ * it under the terms of the GNU General Public License as published by *
+ * the Free Software Foundation; either version 2 of the License, or *
+ * (at your option) any later version. *
+ * *
+ ***************************************************************************/
+
+#include <stdlib.h>
+#include <ctype.h>
+#include <teihtmlhref.h>
+#include <utilxml.h>
+#include <swmodule.h>
+#include <url.h>
+
+
+SWORD_NAMESPACE_START
+
+
+TEIHTMLHREF::MyUserData::MyUserData(const SWModule *module, const SWKey *key) : BasicFilterUserData(module, key) {
+ BiblicalText = false;
+ if (module) {
+ version = module->Name();
+ BiblicalText = (!strcmp(module->Type(), "Biblical Texts"));
+ }
+}
+
+
+TEIHTMLHREF::TEIHTMLHREF() {
+ setTokenStart("<");
+ setTokenEnd(">");
+
+ setEscapeStart("&");
+ setEscapeEnd(";");
+
+ setEscapeStringCaseSensitive(true);
+
+ addAllowedEscapeString("quot");
+ addAllowedEscapeString("apos");
+ addAllowedEscapeString("amp");
+ addAllowedEscapeString("lt");
+ addAllowedEscapeString("gt");
+
+ setTokenCaseSensitive(true);
+}
+
+bool TEIHTMLHREF::handleToken(SWBuf &buf, const char *token, BasicFilterUserData *userData) {
+ // manually process if it wasn't a simple substitution
+ if (!substituteToken(buf, token)) {
+ MyUserData *u = (MyUserData *)userData;
+ XMLTag tag(token);
+
+ if (!strcmp(tag.getName(), "p")) {
+ if ((!tag.isEndTag()) && (!tag.isEmpty())) { // non-empty start tag
+ buf += "<!P><br />";
+ }
+ else if (tag.isEndTag()) { // end tag
+ buf += "<!/P><br />";
+ //userData->supressAdjacentWhitespace = true;
+ }
+ else { // empty paragraph break marker
+ buf += "<!P><br />";
+ //userData->supressAdjacentWhitespace = true;
+ }
+ }
+
+ // <hi>
+ else if (!strcmp(tag.getName(), "hi")) {
+ if ((!tag.isEndTag()) && (!tag.isEmpty())) {
+ SWBuf rend = tag.getAttribute("rend");
+
+ u->lastHi = rend;
+ if (rend == "ital")
+ buf += "<i>";
+ else if (rend == "bold")
+ buf += "<b>";
+ else if (rend == "sup")
+ buf += "<small><sup>";
+
+ }
+ else if (tag.isEndTag()) {
+ SWBuf rend = u->lastHi;
+ if (rend == "ital")
+ buf += "</i>";
+ else if (rend == "bold")
+ buf += "</b>";
+ else if (rend == "sup")
+ buf += "</sup></small>";
+ }
+ }
+
+ // <entryFree>
+ else if (!strcmp(tag.getName(), "entryFree")) {
+ if ((!tag.isEndTag()) && (!tag.isEmpty())) {
+ SWBuf n = tag.getAttribute("n");
+ if (n != "") {
+ buf += "<b>";
+ buf += n;
+ buf += "</b>";
+ }
+ }
+ }
+
+ // <sense>
+ else if (!strcmp(tag.getName(), "sense")) {
+ if ((!tag.isEndTag()) && (!tag.isEmpty())) {
+ SWBuf n = tag.getAttribute("n");
+ if (n != "") {
+ buf += "<br /><b>";
+ buf += n;
+ buf += "</b>";
+ }
+ }
+ }
+
+ // <div>
+ else if (!strcmp(tag.getName(), "div")) {
+
+ if ((!tag.isEndTag()) && (!tag.isEmpty())) {
+ buf += "<!P>";
+ }
+ else if (tag.isEndTag()) {
+ }
+ }
+
+ // <pos>, <gen>, <case>, <gram>, <number>, <mood>, <pron>, <def>
+ else if (!strcmp(tag.getName(), "pos") ||
+ !strcmp(tag.getName(), "gen") ||
+ !strcmp(tag.getName(), "case") ||
+ !strcmp(tag.getName(), "gram") ||
+ !strcmp(tag.getName(), "number") ||
+ !strcmp(tag.getName(), "pron") ||
+ !strcmp(tag.getName(), "def")) {
+ if ((!tag.isEndTag()) && (!tag.isEmpty())) {
+ buf += "<i>";
+ }
+ else if (tag.isEndTag()) {
+ buf += "</i>";
+ }
+ }
+
+ // <tr>
+ else if (!strcmp(tag.getName(), "tr")) {
+ if ((!tag.isEndTag()) && (!tag.isEmpty())) {
+ buf += "<i>";
+ }
+ else if (tag.isEndTag()) {
+ buf += "</i>";
+ }
+ }
+
+ // orth
+ else if (!strcmp(tag.getName(), "orth")) {
+ if ((!tag.isEndTag()) && (!tag.isEmpty())) {
+ buf += "<b>";
+ }
+ else if (tag.isEndTag()) {
+ buf += "</b>";
+ }
+ }
+
+ // <etym>, <usg>
+ else if (!strcmp(tag.getName(), "etym") ||
+ !strcmp(tag.getName(), "usg")) {
+ // do nothing here
+ }
+
+ // <note> tag
+ else if (!strcmp(tag.getName(), "note")) {
+ if (!tag.isEndTag()) {
+ if (!tag.isEmpty()) {
+ u->suspendTextPassThru = true;
+ }
+ }
+ if (tag.isEndTag()) {
+ SWBuf footnoteNumber = tag.getAttribute("swordFootnote");
+
+ buf.appendFormatted("<a href=\"passagestudy.jsp?action=showNote&type=n&value=%s&module=%s&passage=%s\"><small><sup>*n</sup></small></a>",
+ URL::encode(footnoteNumber.c_str()).c_str(),
+ URL::encode(u->version.c_str()).c_str(),
+ URL::encode(u->key->getText()).c_str());
+
+ u->suspendTextPassThru = false;
+ }
+ }
+
+ else {
+ return false; // we still didn't handle token
+ }
+
+ }
+ return true;
+}
+
+
+SWORD_NAMESPACE_END
+
diff --git a/src/modules/filters/teiplain.cpp b/src/modules/filters/teiplain.cpp
new file mode 100644
index 0000000..c721d84
--- /dev/null
+++ b/src/modules/filters/teiplain.cpp
@@ -0,0 +1,116 @@
+/***************************************************************************
+ teiplain.cpp - TEI to Plaintext filter
+ -------------------
+ begin : 2006-07-05
+ copyright : 2006 by CrossWire Bible Society
+ ***************************************************************************/
+
+/***************************************************************************
+ * *
+ * This program is free software; you can redistribute it and/or modify *
+ * it under the terms of the GNU General Public License as published by *
+ * the Free Software Foundation; either version 2 of the License, or *
+ * (at your option) any later version. *
+ * *
+ ***************************************************************************/
+
+#include <stdlib.h>
+#include <teiplain.h>
+#include <ctype.h>
+
+SWORD_NAMESPACE_START
+
+TEIPlain::TEIPlain() {
+ setTokenStart("<");
+ setTokenEnd(">");
+
+ setEscapeStart("&");
+ setEscapeEnd(";");
+
+ setEscapeStringCaseSensitive(true);
+
+ addEscapeStringSubstitute("amp", "&");
+ addEscapeStringSubstitute("apos", "'");
+ addEscapeStringSubstitute("lt", "<");
+ addEscapeStringSubstitute("gt", ">");
+ addEscapeStringSubstitute("quot", "\"");
+
+ setTokenCaseSensitive(true);
+}
+
+
+bool TEIPlain::handleToken(SWBuf &buf, const char *token, BasicFilterUserData *userData) {
+ // manually process if it wasn't a simple substitution
+ if (!substituteToken(buf, token)) {
+ //MyUserData *u = (MyUserData *)userData;
+ XMLTag tag(token);
+
+ // <p> paragraph tag
+ if (!strcmp(tag.getName(), "p")) {
+ if ((!tag.isEndTag()) && (!tag.isEmpty())) { // non-empty start tag
+ buf += "\n";
+ }
+ else if (tag.isEndTag()) { // end tag
+ buf += "\n";
+ userData->supressAdjacentWhitespace = true;
+ }
+ else { // empty paragraph break marker
+ buf += "\n\n";
+ userData->supressAdjacentWhitespace = true;
+ }
+ }
+
+ // <entryFree>
+ else if (!strcmp(tag.getName(), "entryFree")) {
+ SWBuf n = tag.getAttribute("n");
+ if ((!tag.isEndTag()) && (!tag.isEmpty())) {
+ if (n != "") {
+ buf += n;
+ buf += ". ";
+ }
+ }
+ }
+
+ // <sense>
+ else if (!strcmp(tag.getName(), "sense")) {
+ SWBuf n = tag.getAttribute("n");
+ if ((!tag.isEndTag()) && (!tag.isEmpty())) {
+ if (n != "") {
+ buf += n;
+ buf += ". ";
+ }
+ }
+ else if (tag.isEndTag()) {
+ buf += "\n";
+ }
+ }
+
+ // <div>
+ else if (!strcmp(tag.getName(), "div")) {
+
+ if ((!tag.isEndTag()) && (!tag.isEmpty())) {
+ buf.append("\n\n\n");
+ }
+ else if (tag.isEndTag()) {
+ }
+ }
+
+ // <etym>
+ else if (!strcmp(tag.getName(), "etym")) {
+ if ((!tag.isEndTag()) && (!tag.isEmpty())) {
+ buf += "[";
+ }
+ else if (tag.isEndTag()) {
+ buf += "]";
+ }
+ }
+
+ else {
+ return false; // we still didn't handle token
+ }
+ }
+ return true;
+}
+
+
+SWORD_NAMESPACE_END
diff --git a/src/modules/filters/teirtf.cpp b/src/modules/filters/teirtf.cpp
new file mode 100644
index 0000000..006f099
--- /dev/null
+++ b/src/modules/filters/teirtf.cpp
@@ -0,0 +1,182 @@
+/***************************************************************************
+ teirtf.cpp - TEI to RTF filter
+ -------------------
+ begin : 2006-07-03
+ copyright : 2006 by CrossWire Bible Society
+ ***************************************************************************/
+
+/***************************************************************************
+ * *
+ * This program is free software; you can redistribute it and/or modify *
+ * it under the terms of the GNU General Public License as published by *
+ * the Free Software Foundation; either version 2 of the License, or *
+ * (at your option) any later version. *
+ * *
+ ***************************************************************************/
+
+#include <stdlib.h>
+#include <ctype.h>
+#include <teirtf.h>
+#include <utilxml.h>
+#include <swmodule.h>
+#include <versekey.h>
+
+SWORD_NAMESPACE_START
+
+
+TEIRTF::MyUserData::MyUserData(const SWModule *module, const SWKey *key) : BasicFilterUserData(module, key) {
+ BiblicalText = false;
+ if (module) {
+ version = module->Name();
+ BiblicalText = (!strcmp(module->Type(), "Biblical Texts"));
+ }
+}
+
+
+TEIRTF::TEIRTF() {
+ setTokenStart("<");
+ setTokenEnd(">");
+
+ setEscapeStart("&");
+ setEscapeEnd(";");
+
+ setEscapeStringCaseSensitive(true);
+
+ addEscapeStringSubstitute("amp", "&");
+ addEscapeStringSubstitute("apos", "'");
+ addEscapeStringSubstitute("lt", "<");
+ addEscapeStringSubstitute("gt", ">");
+ addEscapeStringSubstitute("quot", "\"");
+
+ setTokenCaseSensitive(true);
+}
+
+
+bool TEIRTF::handleToken(SWBuf &buf, const char *token, BasicFilterUserData *userData) {
+ // manually process if it wasn't a simple substitution
+ if (!substituteToken(buf, token)) {
+ MyUserData *u = (MyUserData *)userData;
+ XMLTag tag(token);
+
+ // <p> paragraph tag
+ if (!strcmp(tag.getName(), "p")) {
+ if (!tag.isEndTag()) { // non-empty start tag
+ buf += "{\\sb100\\fi200\\par}";
+ }
+ }
+
+ // <hi>
+ else if (!strcmp(tag.getName(), "hi")) {
+ SWBuf rend = tag.getAttribute("rend");
+ if ((!tag.isEndTag()) && (!tag.isEmpty())) {
+ if (rend == "ital")
+ buf += "{\\i1 ";
+ else if (rend == "bold")
+ buf += "{\\b1 ";
+ else if (rend == "sup")
+ buf += "{\\super ";
+
+ }
+ else if (tag.isEndTag()) {
+ buf += "}";
+ }
+ }
+
+ // <entryFree>
+ else if (!strcmp(tag.getName(), "entryFree")) {
+ SWBuf n = tag.getAttribute("n");
+ if ((!tag.isEndTag()) && (!tag.isEmpty())) {
+ if (n != "") {
+ buf += "{\\b1 ";
+ buf += n;
+ buf += ". }"; }
+ }
+ }
+
+ // <sense>
+ else if (!strcmp(tag.getName(), "sense")) {
+ SWBuf n = tag.getAttribute("n");
+ if ((!tag.isEndTag()) && (!tag.isEmpty())) {
+ if (n != "") {
+ buf += "{\\sb100\\par\\b1 ";
+ buf += n;
+ buf += ". }";
+ }
+ }
+ }
+
+ // <div>
+ else if (!strcmp(tag.getName(), "div")) {
+
+ if ((!tag.isEndTag()) && (!tag.isEmpty())) {
+ buf.append("{\\pard\\sa300}");
+ }
+ else if (tag.isEndTag()) {
+ }
+ }
+
+ // <pos>, <gen>, <case>, <gram>, <number>, <mood>
+ else if (!strcmp(tag.getName(), "pos") || !strcmp(tag.getName(), "gen") || !strcmp(tag.getName(), "case") || !strcmp(tag.getName(), "gram") || !strcmp(tag.getName(), "number") || !strcmp(tag.getName(), "mood")) {
+ if ((!tag.isEndTag()) && (!tag.isEmpty())) {
+ buf += "{\\i1 ";
+ }
+ else if (tag.isEndTag()) {
+ buf += "}";
+ }
+ }
+
+ // <tr>
+ else if (!strcmp(tag.getName(), "tr")) {
+ if ((!tag.isEndTag()) && (!tag.isEmpty())) {
+ buf += "{\\i1 ";
+ }
+ else if (tag.isEndTag()) {
+ buf += "}";
+ }
+ }
+
+ // <etym>
+ else if (!strcmp(tag.getName(), "etym")) {
+ if ((!tag.isEndTag()) && (!tag.isEmpty())) {
+ buf += "[";
+ }
+ else if (tag.isEndTag()) {
+ buf += "]";
+ }
+ }
+
+ // <note> tag
+ else if (!strcmp(tag.getName(), "note")) {
+ if (!tag.isEndTag()) {
+ if (!tag.isEmpty()) {
+ SWBuf type = tag.getAttribute("type");
+
+ SWBuf footnoteNumber = tag.getAttribute("swordFootnote");
+ VerseKey *vkey;
+ // see if we have a VerseKey * or descendant
+ SWTRY {
+ vkey = SWDYNAMIC_CAST(VerseKey, u->key);
+ }
+ SWCATCH ( ... ) { }
+ if (vkey) {
+ buf.appendFormatted("{\\super <a href=\"\">*%s</a>} ", footnoteNumber.c_str());
+ }
+ u->suspendTextPassThru = true;
+ }
+ }
+ if (tag.isEndTag()) {
+ u->suspendTextPassThru = false;
+ }
+ }
+
+ else {
+ return false; // we still didn't handle token
+ }
+
+ }
+ return true;
+}
+
+
+SWORD_NAMESPACE_END
+
diff --git a/src/modules/filters/thmlfootnotes.cpp b/src/modules/filters/thmlfootnotes.cpp
new file mode 100644
index 0000000..23c43b4
--- /dev/null
+++ b/src/modules/filters/thmlfootnotes.cpp
@@ -0,0 +1,124 @@
+/******************************************************************************
+ *
+ * thmlfootnotes - SWFilter descendant to hide or show footnotes
+ * in a ThML module.
+ */
+
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <thmlfootnotes.h>
+#include <swmodule.h>
+#include <swbuf.h>
+#include <versekey.h>
+#include <utilxml.h>
+
+SWORD_NAMESPACE_START
+
+const char oName[] = "Footnotes";
+const char oTip[] = "Toggles Footnotes On and Off if they exist";
+
+const SWBuf choices[3] = {"Off", "On", ""};
+const StringList oValues(&choices[0], &choices[2]);
+
+ThMLFootnotes::ThMLFootnotes() : SWOptionFilter(oName, oTip, &oValues) {
+ setOptionValue("Off");
+}
+
+
+ThMLFootnotes::~ThMLFootnotes() {
+}
+
+
+char ThMLFootnotes::processText(SWBuf &text, const SWKey *key, const SWModule *module) {
+ SWBuf token;
+ bool intoken = false;
+ bool hide = false;
+ SWBuf tagText;
+ XMLTag startTag;
+ SWBuf refs = "";
+ int footnoteNum = 1;
+ char buf[254];
+ VerseKey parser = key->getText();
+
+ SWBuf orig = text;
+ const char *from = orig.c_str();
+
+ for (text = ""; *from; from++) {
+ if (*from == '<') {
+ intoken = true;
+ token = "";
+ continue;
+ }
+ if (*from == '>') { // process tokens
+ intoken = false;
+
+ XMLTag tag(token);
+ if (!strcmp(tag.getName(), "note")) {
+ if (!tag.isEndTag()) {
+ if (!tag.isEmpty()) {
+ refs = "";
+ startTag = tag;
+ hide = true;
+ tagText = "";
+ continue;
+ }
+ }
+ if (hide && tag.isEndTag()) {
+ if (module->isProcessEntryAttributes()) {
+ SWBuf fc = module->getEntryAttributes()["Footnote"]["count"]["value"];
+ footnoteNum = (fc.length()) ? atoi(fc.c_str()) : 0;
+ sprintf(buf, "%i", ++footnoteNum);
+ module->getEntryAttributes()["Footnote"]["count"]["value"] = buf;
+ StringList attributes = startTag.getAttributeNames();
+ for (StringList::iterator it = attributes.begin(); it != attributes.end(); it++) {
+ module->getEntryAttributes()["Footnote"][buf][it->c_str()] = startTag.getAttribute(it->c_str());
+ }
+ module->getEntryAttributes()["Footnote"][buf]["body"] = tagText;
+ startTag.setAttribute("swordFootnote", buf);
+ if ((startTag.getAttribute("type")) && (!strcmp(startTag.getAttribute("type"), "crossReference"))) {
+ if (!refs.length())
+ refs = parser.ParseVerseList(tagText.c_str(), parser, true).getRangeText();
+ module->getEntryAttributes()["Footnote"][buf]["refList"] = refs.c_str();
+ }
+ }
+ hide = false;
+ if ((option) || ((startTag.getAttribute("type") && (!strcmp(startTag.getAttribute("type"), "crossReference"))))) { // we want the tag in the text; crossReferences are handled by another filter
+ text += startTag;
+ text.append(tagText);
+ }
+ else continue;
+ }
+ }
+
+ // if not a note token, keep token in text
+ if ((!strcmp(tag.getName(), "scripRef")) && (!tag.isEndTag())) {
+ SWBuf osisRef = tag.getAttribute("passage");
+ if (refs.length())
+ refs += "; ";
+ refs += osisRef;
+ }
+ if (!hide) {
+ text += '<';
+ text.append(token);
+ text += '>';
+ }
+ else {
+ tagText += '<';
+ tagText.append(token);
+ tagText += '>';
+ }
+ continue;
+ }
+ if (intoken) { //copy token
+ token += *from;
+ }
+ else if (!hide) { //copy text which is not inside a token
+ text += *from;
+ }
+ else tagText += *from;
+ }
+ return 0;
+}
+
+SWORD_NAMESPACE_END
diff --git a/src/modules/filters/thmlgbf.cpp b/src/modules/filters/thmlgbf.cpp
new file mode 100644
index 0000000..f8703b1
--- /dev/null
+++ b/src/modules/filters/thmlgbf.cpp
@@ -0,0 +1,291 @@
+/***************************************************************************
+ thmlgbf.cpp - ThML to GBF filter
+ -------------------
+ begin : 1999-10-28
+ copyright : 2001 by CrossWire Bible Society
+ ***************************************************************************/
+
+/***************************************************************************
+ * *
+ * This program is free software; you can redistribute it and/or modify *
+ * it under the terms of the GNU General Public License as published by *
+ * the Free Software Foundation; either version 2 of the License, or *
+ * (at your option) any later version. *
+ * *
+ ***************************************************************************/
+
+#include <stdlib.h>
+#include <thmlgbf.h>
+#include <utilstr.h>
+#include <swbuf.h>
+
+SWORD_NAMESPACE_START
+
+ThMLGBF::ThMLGBF()
+{
+}
+
+
+char ThMLGBF::processText(SWBuf &text, const SWKey *key, const SWModule *module) {
+ const char *from;
+ char token[2048];
+ int tokpos = 0;
+ bool intoken = false;
+ bool ampersand = false;
+ bool sechead = false;
+ bool title = false;
+
+ SWBuf orig = text;
+ from = orig.c_str();
+
+ for (text = ""; *from; from++) {
+ if (*from == '<') {
+ intoken = true;
+ tokpos = 0;
+ token[0] = 0;
+ token[1] = 0;
+ token[2] = 0;
+ ampersand = false;
+ continue;
+ }
+ else if (*from == '&') {
+ intoken = true;
+ tokpos = 0;
+ memset(token, 0, 2048);
+ ampersand = true;
+ continue;
+ }
+ if (*from == ';' && ampersand) {
+ intoken = false;
+
+ if (!strncmp("nbsp", token, 4)) text += ' ';
+ else if (!strncmp("quot", token, 4)) text += '"';
+ else if (!strncmp("amp", token, 3)) text += '&';
+ else if (!strncmp("lt", token, 2)) text += '<';
+ else if (!strncmp("gt", token, 2)) text += '>';
+ else if (!strncmp("brvbar", token, 6)) text += '¦';
+ else if (!strncmp("sect", token, 4)) text += '§';
+ else if (!strncmp("copy", token, 4)) text += '©';
+ else if (!strncmp("laquo", token, 5)) text += '«';
+ else if (!strncmp("reg", token, 3)) text += '®';
+ else if (!strncmp("acute", token, 5)) text += '´';
+ else if (!strncmp("para", token, 4)) text += '¶';
+ else if (!strncmp("raquo", token, 5)) text += '»';
+
+ else if (!strncmp("Aacute", token, 6)) text += 'Á';
+ else if (!strncmp("Agrave", token, 6)) text += 'À';
+ else if (!strncmp("Acirc", token, 5)) text += 'Â';
+ else if (!strncmp("Auml", token, 4)) text += 'Ä';
+ else if (!strncmp("Atilde", token, 6)) text += 'Ã';
+ else if (!strncmp("Aring", token, 5)) text += 'Å';
+ else if (!strncmp("aacute", token, 6)) text += 'á';
+ else if (!strncmp("agrave", token, 6)) text += 'à';
+ else if (!strncmp("acirc", token, 5)) text += 'â';
+ else if (!strncmp("auml", token, 4)) text += 'ä';
+ else if (!strncmp("atilde", token, 6)) text += 'ã';
+ else if (!strncmp("aring", token, 5)) text += 'å';
+ else if (!strncmp("Eacute", token, 6)) text += 'É';
+ else if (!strncmp("Egrave", token, 6)) text += 'È';
+ else if (!strncmp("Ecirc", token, 5)) text += 'Ê';
+ else if (!strncmp("Euml", token, 4)) text += 'Ë';
+ else if (!strncmp("eacute", token, 6)) text += 'é';
+ else if (!strncmp("egrave", token, 6)) text += 'è';
+ else if (!strncmp("ecirc", token, 5)) text += 'ê';
+ else if (!strncmp("euml", token, 4)) text += 'ë';
+ else if (!strncmp("Iacute", token, 6)) text += 'Í';
+ else if (!strncmp("Igrave", token, 6)) text += 'Ì';
+ else if (!strncmp("Icirc", token, 5)) text += 'Î';
+ else if (!strncmp("Iuml", token, 4)) text += 'Ï';
+ else if (!strncmp("iacute", token, 6)) text += 'í';
+ else if (!strncmp("igrave", token, 6)) text += 'ì';
+ else if (!strncmp("icirc", token, 5)) text += 'î';
+ else if (!strncmp("iuml", token, 4)) text += 'ï';
+ else if (!strncmp("Oacute", token, 6)) text += 'Ó';
+ else if (!strncmp("Ograve", token, 6)) text += 'Ò';
+ else if (!strncmp("Ocirc", token, 5)) text += 'Ô';
+ else if (!strncmp("Ouml", token, 4)) text += 'Ö';
+ else if (!strncmp("Otilde", token, 6)) text += 'Õ';
+ else if (!strncmp("oacute", token, 6)) text += 'ó';
+ else if (!strncmp("ograve", token, 6)) text += 'ò';
+ else if (!strncmp("ocirc", token, 5)) text += 'ô';
+ else if (!strncmp("ouml", token, 4)) text += 'ö';
+ else if (!strncmp("otilde", token, 6)) text += 'õ';
+ else if (!strncmp("Uacute", token, 6)) text += 'Ú';
+ else if (!strncmp("Ugrave", token, 6)) text += 'Ù';
+ else if (!strncmp("Ucirc", token, 5)) text += 'Û';
+ else if (!strncmp("Uuml", token, 4)) text += 'Ü';
+ else if (!strncmp("uacute", token, 6)) text += 'ú';
+ else if (!strncmp("ugrave", token, 6)) text += 'ù';
+ else if (!strncmp("ucirc", token, 5)) text += 'û';
+ else if (!strncmp("uuml", token, 4)) text += 'ü';
+ else if (!strncmp("Yacute", token, 6)) text += 'Ý';
+ else if (!strncmp("yacute", token, 6)) text += 'ý';
+ else if (!strncmp("yuml", token, 4)) text += 'ÿ';
+
+ else if (!strncmp("deg", token, 3)) text += '°';
+ else if (!strncmp("plusmn", token, 6)) text += '±';
+ else if (!strncmp("sup2", token, 4)) text += '²';
+ else if (!strncmp("sup3", token, 4)) text += '³';
+ else if (!strncmp("sup1", token, 4)) text += '¹';
+ else if (!strncmp("nbsp", token, 4)) text += 'º';
+ else if (!strncmp("pound", token, 5)) text += '£';
+ else if (!strncmp("cent", token, 4)) text += '¢';
+ else if (!strncmp("frac14", token, 6)) text += '¼';
+ else if (!strncmp("frac12", token, 6)) text += '½';
+ else if (!strncmp("frac34", token, 6)) text += '¾';
+ else if (!strncmp("iquest", token, 6)) text += '¿';
+ else if (!strncmp("iexcl", token, 5)) text += '¡';
+ else if (!strncmp("ETH", token, 3)) text += 'Ð';
+ else if (!strncmp("eth", token, 3)) text += 'ð';
+ else if (!strncmp("THORN", token, 5)) text += 'Þ';
+ else if (!strncmp("thorn", token, 5)) text += 'þ';
+ else if (!strncmp("AElig", token, 5)) text += 'Æ';
+ else if (!strncmp("aelig", token, 5)) text += 'æ';
+ else if (!strncmp("Oslash", token, 6)) text += 'Ø';
+ else if (!strncmp("curren", token, 6)) text += '¤';
+ else if (!strncmp("Ccedil", token, 6)) text += 'Ç';
+ else if (!strncmp("ccedil", token, 6)) text += 'ç';
+ else if (!strncmp("szlig", token, 5)) text += 'ß';
+ else if (!strncmp("Ntilde", token, 6)) text += 'Ñ';
+ else if (!strncmp("ntilde", token, 6)) text += 'ñ';
+ else if (!strncmp("yen", token, 3)) text += '¥';
+ else if (!strncmp("not", token, 3)) text += '¬';
+ else if (!strncmp("ordf", token, 4)) text += 'ª';
+ else if (!strncmp("uml", token, 3)) text += '¨';
+ else if (!strncmp("shy", token, 3)) text += '­';
+ else if (!strncmp("macr", token, 4)) text += '¯';
+ else if (!strncmp("micro", token, 5)) text += "µ";
+ else if (!strncmp("middot", token, 6)) text +="·";
+ else if (!strncmp("cedil", token, 5)) text += "¸";
+ else if (!strncmp("ordm", token, 4)) text += "º";
+ else if (!strncmp("times", token, 5)) text += "×";
+ else if (!strncmp("divide", token, 6)) text +="÷";
+ else if (!strncmp("oslash", token, 6)) text +="ø";
+ continue;
+
+ }
+ else if (*from == '>' && !ampersand) {
+ intoken = false;
+ // process desired tokens
+ if (!strncmp(token, "sync type=\"Strongs\" value=\"", 27)) {
+ text += "<W";
+ for (unsigned int i = 27; token[i] != '\"'; i++)
+ text += token[i];
+ text += '>';
+ continue;
+ }
+ if (!strncmp(token, "sync type=\"morph\" value=\"", 25)) {
+ text += "<WT";
+ for (unsigned int i = 25; token[i] != '\"'; i++)
+ text += token[i];
+ text += '>';
+ continue;
+ }
+ else if (!strncmp(token, "scripRef", 8)) {
+ text += "<RX>";
+ continue;
+ }
+ else if (!strncmp(token, "/scripRef", 9)) {
+ text += "<Rx>";
+ continue;
+ }
+ else if (!strncmp(token, "note", 4)) {
+ text += "<RF>";
+ continue;
+ }
+ else if (!strncmp(token, "/note", 5)) {
+ text += "<Rf>";
+ continue;
+ }
+ else if (!strncmp(token, "sup", 3)) {
+ text += "<FS>";
+ }
+ else if (!strncmp(token, "/sup", 4)) {
+ text += "<Fs>";
+ }
+ else if (!strnicmp(token, "font color=#ff0000", 18)) {
+ text += "<FR>";
+ continue;
+ }
+ else if (!strnicmp(token, "/font", 5)) {
+ text += "<Fr>";
+ continue;
+ }
+ else if (!strncmp(token, "div class=\"sechead\"", 19)) {
+ text += "<TS>";
+ sechead = true;
+ continue;
+ }
+ else if (sechead && !strncmp(token, "/div", 19)) {
+ text += "<Ts>";
+ sechead = false;
+ continue;
+ }
+ else if (!strncmp(token, "div class=\"title\"", 19)) {
+ text += "<TT>";
+ title = true;
+ continue;
+ }
+ else if (title && !strncmp(token, "/div", 19)) {
+ text += "<Tt>";
+ title = false;
+ continue;
+ }
+ else if (!strnicmp(token, "br", 2)) {
+ text += "<CL>";
+ continue;
+ }
+ else switch(*token) {
+ case 'I': // font tags
+ case 'i':
+ text += "<FI>";
+ continue;
+ case 'B': // bold start
+ case 'b':
+ text += "<FB>";
+ continue;
+ case '/':
+ switch(token[1]) {
+ case 'P':
+ case 'p':
+ text += "<CM>";
+ continue;
+ case 'I':
+ case 'i': // italic end
+ text += "<Fi>";
+ continue;
+ case 'B': // bold start
+ case 'b':
+ text += "<Fb>";
+ continue;
+ }
+ }
+ continue;
+ }
+ if (intoken) {
+ if (tokpos < 2045)
+ token[tokpos++] = *from;
+ token[tokpos+2] = 0;
+ }
+ else text += *from;
+ }
+
+ orig = text;
+ from = orig.c_str();
+ for (text = ""; *from; from++) { //loop to remove extra spaces
+ if ((strchr(" \t\n\r", *from))) {
+ while (*(from+1) && (strchr(" \t\n\r", *(from+1)))) {
+ from++;
+ }
+ text += " ";
+ }
+ else {
+ text += *from;
+ }
+ }
+ text += (char)0;
+
+ return 0;
+}
+
+SWORD_NAMESPACE_END
diff --git a/src/modules/filters/thmlheadings.cpp b/src/modules/filters/thmlheadings.cpp
new file mode 100644
index 0000000..4d6134f
--- /dev/null
+++ b/src/modules/filters/thmlheadings.cpp
@@ -0,0 +1,153 @@
+/******************************************************************************
+ *
+ * thmlheadings - SWFilter descendant to hide or show headings
+ * in a ThML module.
+ */
+
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <thmlheadings.h>
+#include <utilxml.h>
+#include <utilstr.h>
+#include <swmodule.h>
+#include <stdio.h>
+
+SWORD_NAMESPACE_START
+
+const char oName[] = "Headings";
+const char oTip[] = "Toggles Headings On and Off if they exist";
+
+const SWBuf choices[3] = {"Off", "On", ""};
+const StringList oValues(&choices[0], &choices[2]);
+
+ThMLHeadings::ThMLHeadings() : SWOptionFilter(oName, oTip, &oValues) {
+ setOptionValue("Off");
+}
+
+
+ThMLHeadings::~ThMLHeadings() {
+}
+
+
+char ThMLHeadings::processText(SWBuf &text, const SWKey *key, const SWModule *module) {
+ SWBuf token;
+ bool intoken = false;
+ bool isheader = false;
+ bool hide = false;
+ bool preverse = false;
+ bool withinDiv = false;
+ SWBuf header;
+ int headerNum = 0;
+ int pvHeaderNum = 0;
+ char buf[254];
+ XMLTag startTag;
+
+ SWBuf orig = text;
+ const char *from = orig.c_str();
+
+ XMLTag tag;
+
+ for (text = ""; *from; ++from) {
+ if (*from == '<') {
+ intoken = true;
+ token = "";
+
+ continue;
+ }
+ if (*from == '>') { // process tokens
+ intoken = false;
+
+ if (!strnicmp(token.c_str(), "div", 3) || !strnicmp(token.c_str(), "/div", 4)) {
+ withinDiv = (!strnicmp(token.c_str(), "div", 3));
+ tag = token;
+ if (hide && tag.isEndTag()) {
+ if (module->isProcessEntryAttributes() && (option || (!preverse))) {
+ if (preverse) {
+ sprintf(buf, "%i", pvHeaderNum++);
+ module->getEntryAttributes()["Heading"]["Preverse"][buf] = header;
+ }
+ else {
+ sprintf(buf, "%i", headerNum++);
+ module->getEntryAttributes()["Heading"]["Interverse"][buf] = header;
+ if (option) { // we want the tag in the text
+ text.append(header);
+ }
+ }
+
+ StringList attributes = startTag.getAttributeNames();
+ for (StringList::const_iterator it = attributes.begin(); it != attributes.end(); it++) {
+ module->getEntryAttributes()["Heading"][buf][it->c_str()] = startTag.getAttribute(it->c_str());
+ }
+ }
+
+ hide = false;
+ if (!option || preverse) { // we don't want the tag in the text anymore
+ preverse = false;
+ continue;
+ }
+ preverse = false;
+ }
+ if (tag.getAttribute("class") && ((!stricmp(tag.getAttribute("class"), "sechead"))
+ || (!stricmp(tag.getAttribute("class"), "title")))) {
+
+ isheader = true;
+
+ if (!tag.isEndTag()) { //start tag
+ if (!tag.isEmpty()) {
+ startTag = tag;
+
+/* how do we tell a ThML preverse title from one that should be in the text? probably if any text is before the title... just assuming all are preverse for now
+ }
+ if (tag.getAttribute("subtype") && !stricmp(tag.getAttribute("subtype"), "x-preverse")) {
+*/
+ hide = true;
+ preverse = true;
+ header = "";
+ continue;
+ } // move back up under startTag = tag
+ }
+/* this is where non-preverse will go eventually
+ if (!tag.isEndTag()) { //start tag
+ hide = true;
+ header = "";
+ if (option) { // we want the tag in the text
+ text.append('<');
+ text.append(token);
+ text.append('>');
+ }
+ continue;
+ }
+*/
+ }
+ else
+ isheader = false;
+ }
+
+ if (withinDiv && isheader) {
+ header.append('<');
+ header.append(token);
+ header.append('>');
+ } else {
+ // if not a heading token, keep token in text
+ if (!hide) {
+ text.append('<');
+ text.append(token);
+ text.append('>');
+ }
+ }
+ continue;
+ }
+ if (intoken) { //copy token
+ token.append(*from);
+ }
+ else if (!hide) { //copy text which is not inside a token
+ text.append(*from);
+ }
+ else header.append(*from);
+ }
+ return 0;
+}
+
+
+SWORD_NAMESPACE_END
diff --git a/src/modules/filters/thmlhtml.cpp b/src/modules/filters/thmlhtml.cpp
new file mode 100644
index 0000000..efb09cd
--- /dev/null
+++ b/src/modules/filters/thmlhtml.cpp
@@ -0,0 +1,236 @@
+/***************************************************************************
+ thmlhtml.cpp - ThML to HTML filter
+ -------------------
+ begin : 1999-10-27
+ copyright : 2001 by CrossWire Bible Society
+ ***************************************************************************/
+
+/***************************************************************************
+ * *
+ * This program is free software; you can redistribute it and/or modify *
+ * it under the terms of the GNU General Public License as published by *
+ * the Free Software Foundation; either version 2 of the License, or *
+ * (at your option) any later version. *
+ * *
+ ***************************************************************************/
+
+#include <stdlib.h>
+#include <thmlhtml.h>
+#include <swmodule.h>
+#include <utilxml.h>
+
+SWORD_NAMESPACE_START
+
+ThMLHTML::ThMLHTML() {
+ setTokenStart("<");
+ setTokenEnd(">");
+
+ setEscapeStart("&");
+ setEscapeEnd(";");
+
+ setEscapeStringCaseSensitive(true);
+ setPassThruNumericEscapeString(true);
+
+ addAllowedEscapeString("quot");
+ addAllowedEscapeString("amp");
+ addAllowedEscapeString("lt");
+ addAllowedEscapeString("gt");
+
+ addAllowedEscapeString("nbsp");
+ addAllowedEscapeString("brvbar"); // "¦"
+ addAllowedEscapeString("sect"); // "§"
+ addAllowedEscapeString("copy"); // "©"
+ addAllowedEscapeString("laquo"); // "«"
+ addAllowedEscapeString("reg"); // "®"
+ addAllowedEscapeString("acute"); // "´"
+ addAllowedEscapeString("para"); // "¶"
+ addAllowedEscapeString("raquo"); // "»"
+
+ addAllowedEscapeString("Aacute"); // "Á"
+ addAllowedEscapeString("Agrave"); // "À"
+ addAllowedEscapeString("Acirc"); // "Â"
+ addAllowedEscapeString("Auml"); // "Ä"
+ addAllowedEscapeString("Atilde"); // "Ã"
+ addAllowedEscapeString("Aring"); // "Å"
+ addAllowedEscapeString("aacute"); // "á"
+ addAllowedEscapeString("agrave"); // "à"
+ addAllowedEscapeString("acirc"); // "â"
+ addAllowedEscapeString("auml"); // "ä"
+ addAllowedEscapeString("atilde"); // "ã"
+ addAllowedEscapeString("aring"); // "å"
+ addAllowedEscapeString("Eacute"); // "É"
+ addAllowedEscapeString("Egrave"); // "È"
+ addAllowedEscapeString("Ecirc"); // "Ê"
+ addAllowedEscapeString("Euml"); // "Ë"
+ addAllowedEscapeString("eacute"); // "é"
+ addAllowedEscapeString("egrave"); // "è"
+ addAllowedEscapeString("ecirc"); // "ê"
+ addAllowedEscapeString("euml"); // "ë"
+ addAllowedEscapeString("Iacute"); // "Í"
+ addAllowedEscapeString("Igrave"); // "Ì"
+ addAllowedEscapeString("Icirc"); // "Î"
+ addAllowedEscapeString("Iuml"); // "Ï"
+ addAllowedEscapeString("iacute"); // "í"
+ addAllowedEscapeString("igrave"); // "ì"
+ addAllowedEscapeString("icirc"); // "î"
+ addAllowedEscapeString("iuml"); // "ï"
+ addAllowedEscapeString("Oacute"); // "Ó"
+ addAllowedEscapeString("Ograve"); // "Ò"
+ addAllowedEscapeString("Ocirc"); // "Ô"
+ addAllowedEscapeString("Ouml"); // "Ö"
+ addAllowedEscapeString("Otilde"); // "Õ"
+ addAllowedEscapeString("oacute"); // "ó"
+ addAllowedEscapeString("ograve"); // "ò"
+ addAllowedEscapeString("ocirc"); // "ô"
+ addAllowedEscapeString("ouml"); // "ö"
+ addAllowedEscapeString("otilde"); // "õ"
+ addAllowedEscapeString("Uacute"); // "Ú"
+ addAllowedEscapeString("Ugrave"); // "Ù"
+ addAllowedEscapeString("Ucirc"); // "Û"
+ addAllowedEscapeString("Uuml"); // "Ü"
+ addAllowedEscapeString("uacute"); // "ú"
+ addAllowedEscapeString("ugrave"); // "ù"
+ addAllowedEscapeString("ucirc"); // "û"
+ addAllowedEscapeString("uuml"); // "ü"
+ addAllowedEscapeString("Yacute"); // "Ý"
+ addAllowedEscapeString("yacute"); // "ý"
+ addAllowedEscapeString("yuml"); // "ÿ"
+
+ addAllowedEscapeString("deg"); // "°"
+ addAllowedEscapeString("plusmn"); // "±"
+ addAllowedEscapeString("sup2"); // "²"
+ addAllowedEscapeString("sup3"); // "³"
+ addAllowedEscapeString("sup1"); // "¹"
+ addAllowedEscapeString("nbsp"); // "º"
+ addAllowedEscapeString("pound"); // "£"
+ addAllowedEscapeString("cent"); // "¢"
+ addAllowedEscapeString("frac14"); // "¼"
+ addAllowedEscapeString("frac12"); // "½"
+ addAllowedEscapeString("frac34"); // "¾"
+ addAllowedEscapeString("iquest"); // "¿"
+ addAllowedEscapeString("iexcl"); // "¡"
+ addAllowedEscapeString("ETH"); // "Ð"
+ addAllowedEscapeString("eth"); // "ð"
+ addAllowedEscapeString("THORN"); // "Þ"
+ addAllowedEscapeString("thorn"); // "þ"
+ addAllowedEscapeString("AElig"); // "Æ"
+ addAllowedEscapeString("aelig"); // "æ"
+ addAllowedEscapeString("Oslash"); // "Ø"
+ addAllowedEscapeString("curren"); // "¤"
+ addAllowedEscapeString("Ccedil"); // "Ç"
+ addAllowedEscapeString("ccedil"); // "ç"
+ addAllowedEscapeString("szlig"); // "ß"
+ addAllowedEscapeString("Ntilde"); // "Ñ"
+ addAllowedEscapeString("ntilde"); // "ñ"
+ addAllowedEscapeString("yen"); // "¥"
+ addAllowedEscapeString("not"); // "¬"
+ addAllowedEscapeString("ordf"); // "ª"
+ addAllowedEscapeString("uml"); // "¨"
+ addAllowedEscapeString("shy"); // "­"
+ addAllowedEscapeString("macr"); // "¯"
+
+ addAllowedEscapeString("micro"); // "µ"
+ addAllowedEscapeString("middot"); // "·"
+ addAllowedEscapeString("cedil"); // "¸"
+ addAllowedEscapeString("ordm"); // "º"
+ addAllowedEscapeString("times"); // "×"
+ addAllowedEscapeString("divide"); // "÷"
+ addAllowedEscapeString("oslash"); // "ø"
+
+ setTokenCaseSensitive(true);
+
+ addTokenSubstitute("note", " <font color=\"#800000\"><small>(");
+ addTokenSubstitute("/note", ")</small></font> ");
+}
+
+
+bool ThMLHTML::handleToken(SWBuf &buf, const char *token, BasicFilterUserData *userData) {
+ if (!substituteToken(buf, token)) { // manually process if it wasn't a simple substitution
+ MyUserData *u = (MyUserData *)userData;
+ XMLTag tag(token);
+ if (!strcmp(tag.getName(), "sync")) {
+ if (tag.getAttribute("type") && tag.getAttribute("value") && !strcmp(tag.getAttribute("type"), "Strongs")) {
+ const char* value = tag.getAttribute("value");
+ if (*value == 'H' || *value == 'G' || *value == 'A') {
+ value++;
+ buf += "<small><em>";
+ buf += value;
+ buf += "</em></small>";
+ }
+ else if (*value == 'T') {
+ value += 2;
+
+ buf += "<small><i>";
+ buf += value;
+ buf += "</i></small>";
+ }
+ }
+ else if (tag.getAttribute("type") && tag.getAttribute("value") && !strcmp(tag.getAttribute("type"), "morph")) {
+ buf += "<small><em>";
+ buf += tag.getAttribute("value");
+ buf += "</em></small>";
+ }
+ else if (tag.getAttribute("type") && tag.getAttribute("value") && !strcmp(tag.getAttribute("type"), "lemma")) {
+ buf += "<small><em>(";
+ buf += tag.getAttribute("value");
+ buf += ")</em></small>";
+ }
+ }
+ else if (!strcmp(tag.getName(), "div")) {
+ if (tag.isEndTag() && (u->SecHead)) {
+ buf += "</i></b><br />";
+ u->SecHead = false;
+ }
+ else if (tag.getAttribute("class")) {
+ if (!strcmp(tag.getAttribute("class"), "sechead")) {
+ u->SecHead = true;
+ buf += "<br /><b><i>";
+ }
+ else if (!strcmp(tag.getAttribute("class"), "title")) {
+ u->SecHead = true;
+ buf += "<br /><b><i>";
+ }
+ }
+ }
+ else if (!strcmp(tag.getName(), "img")) {
+ const char *src = strstr(token, "src");
+ if (!src) // assert we have a src attribute
+ return false;
+
+ buf += '<';
+ for (const char *c = token; *c; c++) {
+ if (c == src) {
+ for (;((*c) && (*c != '"')); c++)
+ buf += *c;
+
+ if (!*c) { c--; continue; }
+
+ buf += '"';
+ if (*(c+1) == '/') {
+ buf += "file:";
+ buf += userData->module->getConfigEntry("AbsoluteDataPath");
+ if (buf[buf.length()-2] == '/')
+ c++; // skip '/'
+ }
+ continue;
+ }
+ buf += *c;
+ }
+ buf += '>';
+ }
+ else if (!strcmp(tag.getName(), "scripRef")) { //do nothing with scrip refs, we leave them out
+
+ }
+ else {
+ buf += '<';
+ buf += token;
+ buf += '>';
+
+// return false; // we still didn't handle token
+ }
+ }
+ return true;
+}
+
+
+SWORD_NAMESPACE_END
diff --git a/src/modules/filters/thmlhtmlhref.cpp b/src/modules/filters/thmlhtmlhref.cpp
new file mode 100644
index 0000000..0596f75
--- /dev/null
+++ b/src/modules/filters/thmlhtmlhref.cpp
@@ -0,0 +1,357 @@
+/***************************************************************************
+ thmlhtmlhref.cpp - ThML to HTML filter with hrefs
+ -------------------
+ begin : 2001-09-03
+ copyright : 2001 by CrossWire Bible Society
+ ***************************************************************************/
+
+/***************************************************************************
+ * *
+ * This program is free software; you can redistribute it and/or modify *
+ * it under the terms of the GNU General Public License as published by *
+ * the Free Software Foundation; either version 2 of the License, or *
+ * (at your option) any later version. *
+ * *
+ ***************************************************************************/
+#include <stdlib.h>
+#include <thmlhtmlhref.h>
+#include <swmodule.h>
+#include <utilxml.h>
+#include <utilstr.h>
+#include <versekey.h>
+#include <url.h>
+
+SWORD_NAMESPACE_START
+
+
+ThMLHTMLHREF::MyUserData::MyUserData(const SWModule *module, const SWKey *key) : BasicFilterUserData(module, key) {
+ if (module) {
+ version = module->Name();
+ BiblicalText = (!strcmp(module->Type(), "Biblical Texts"));
+ SecHead = false;
+ }
+}
+
+
+ThMLHTMLHREF::ThMLHTMLHREF() {
+ setTokenStart("<");
+ setTokenEnd(">");
+
+ setEscapeStart("&");
+ setEscapeEnd(";");
+
+ setEscapeStringCaseSensitive(true);
+ setPassThruNumericEscapeString(true);
+
+ addAllowedEscapeString("quot");
+ addAllowedEscapeString("amp");
+ addAllowedEscapeString("lt");
+ addAllowedEscapeString("gt");
+
+ addAllowedEscapeString("nbsp");
+ addAllowedEscapeString("brvbar"); // "Å "
+ addAllowedEscapeString("sect"); // "§"
+ addAllowedEscapeString("copy"); // "©"
+ addAllowedEscapeString("laquo"); // "«"
+ addAllowedEscapeString("reg"); // "®"
+ addAllowedEscapeString("acute"); // "Ž"
+ addAllowedEscapeString("para"); // "¶"
+ addAllowedEscapeString("raquo"); // "»"
+
+ addAllowedEscapeString("Aacute"); // "Ã"
+ addAllowedEscapeString("Agrave"); // "À"
+ addAllowedEscapeString("Acirc"); // "Â"
+ addAllowedEscapeString("Auml"); // "Ä"
+ addAllowedEscapeString("Atilde"); // "Ã"
+ addAllowedEscapeString("Aring"); // "Ã…"
+ addAllowedEscapeString("aacute"); // "á"
+ addAllowedEscapeString("agrave"); // "à"
+ addAllowedEscapeString("acirc"); // "â"
+ addAllowedEscapeString("auml"); // "ä"
+ addAllowedEscapeString("atilde"); // "ã"
+ addAllowedEscapeString("aring"); // "Ã¥"
+ addAllowedEscapeString("Eacute"); // "É"
+ addAllowedEscapeString("Egrave"); // "È"
+ addAllowedEscapeString("Ecirc"); // "Ê"
+ addAllowedEscapeString("Euml"); // "Ë"
+ addAllowedEscapeString("eacute"); // "é"
+ addAllowedEscapeString("egrave"); // "è"
+ addAllowedEscapeString("ecirc"); // "ê"
+ addAllowedEscapeString("euml"); // "ë"
+ addAllowedEscapeString("Iacute"); // "Ã"
+ addAllowedEscapeString("Igrave"); // "Ì"
+ addAllowedEscapeString("Icirc"); // "ÃŽ"
+ addAllowedEscapeString("Iuml"); // "Ã"
+ addAllowedEscapeString("iacute"); // "í"
+ addAllowedEscapeString("igrave"); // "ì"
+ addAllowedEscapeString("icirc"); // "î"
+ addAllowedEscapeString("iuml"); // "ï"
+ addAllowedEscapeString("Oacute"); // "Ó"
+ addAllowedEscapeString("Ograve"); // "Ã’"
+ addAllowedEscapeString("Ocirc"); // "Ô"
+ addAllowedEscapeString("Ouml"); // "Ö"
+ addAllowedEscapeString("Otilde"); // "Õ"
+ addAllowedEscapeString("oacute"); // "ó"
+ addAllowedEscapeString("ograve"); // "ò"
+ addAllowedEscapeString("ocirc"); // "ô"
+ addAllowedEscapeString("ouml"); // "ö"
+ addAllowedEscapeString("otilde"); // "õ"
+ addAllowedEscapeString("Uacute"); // "Ú"
+ addAllowedEscapeString("Ugrave"); // "Ù"
+ addAllowedEscapeString("Ucirc"); // "Û"
+ addAllowedEscapeString("Uuml"); // "Ü"
+ addAllowedEscapeString("uacute"); // "ú"
+ addAllowedEscapeString("ugrave"); // "ù"
+ addAllowedEscapeString("ucirc"); // "û"
+ addAllowedEscapeString("uuml"); // "ü"
+ addAllowedEscapeString("Yacute"); // "Ã"
+ addAllowedEscapeString("yacute"); // "ý"
+ addAllowedEscapeString("yuml"); // "ÿ"
+
+ addAllowedEscapeString("deg"); // "°"
+ addAllowedEscapeString("plusmn"); // "±"
+ addAllowedEscapeString("sup2"); // "²"
+ addAllowedEscapeString("sup3"); // "³"
+ addAllowedEscapeString("sup1"); // "¹"
+ addAllowedEscapeString("nbsp"); // "º"
+ addAllowedEscapeString("pound"); // "£"
+ addAllowedEscapeString("cent"); // "¢"
+ addAllowedEscapeString("frac14"); // "Å’"
+ addAllowedEscapeString("frac12"); // "Å“"
+ addAllowedEscapeString("frac34"); // "Ÿ"
+ addAllowedEscapeString("iquest"); // "¿"
+ addAllowedEscapeString("iexcl"); // "¡"
+ addAllowedEscapeString("ETH"); // "Ã"
+ addAllowedEscapeString("eth"); // "ð"
+ addAllowedEscapeString("THORN"); // "Þ"
+ addAllowedEscapeString("thorn"); // "þ"
+ addAllowedEscapeString("AElig"); // "Æ"
+ addAllowedEscapeString("aelig"); // "æ"
+ addAllowedEscapeString("Oslash"); // "Ø"
+ addAllowedEscapeString("curren"); // "€"
+ addAllowedEscapeString("Ccedil"); // "Ç"
+ addAllowedEscapeString("ccedil"); // "ç"
+ addAllowedEscapeString("szlig"); // "ß"
+ addAllowedEscapeString("Ntilde"); // "Ñ"
+ addAllowedEscapeString("ntilde"); // "ñ"
+ addAllowedEscapeString("yen"); // "Â¥"
+ addAllowedEscapeString("not"); // "¬"
+ addAllowedEscapeString("ordf"); // "ª"
+ addAllowedEscapeString("uml"); // "Å¡"
+ addAllowedEscapeString("shy"); // "­"
+ addAllowedEscapeString("macr"); // "¯"
+
+ addAllowedEscapeString("micro"); // "µ"
+ addAllowedEscapeString("middot"); // "·"
+ addAllowedEscapeString("cedil"); // "ž"
+ addAllowedEscapeString("ordm"); // "º"
+ addAllowedEscapeString("times"); // "×"
+ addAllowedEscapeString("divide"); // "÷"
+ addAllowedEscapeString("oslash"); // "ø"
+
+ setTokenCaseSensitive(true);
+// addTokenSubstitute("scripture", "<i> ");
+ addTokenSubstitute("/scripture", "</i> ");
+}
+
+
+bool ThMLHTMLHREF::handleToken(SWBuf &buf, const char *token, BasicFilterUserData *userData) {
+ if (!substituteToken(buf, token)) { // manually process if it wasn't a simple substitution
+ MyUserData *u = (MyUserData *)userData;
+
+ XMLTag tag(token);
+ if ((!tag.isEndTag()) && (!tag.isEmpty()))
+ u->startTag = tag;
+
+ if (tag.getName() && !strcmp(tag.getName(), "sync")) {
+ SWBuf value = tag.getAttribute("value");
+ if (tag.getAttribute("type") && !strcmp(tag.getAttribute("type"), "morph")) { //&gt;
+ if(value.length())
+ buf.appendFormatted("<small><em>(<a href=\"passagestudy.jsp?action=showMorph&type=Greek&value=%s\">%s</a>)</em></small>",
+ URL::encode(value.c_str()).c_str(),
+ value.c_str());
+ }
+ else if (tag.getAttribute("type") && !strcmp(tag.getAttribute("type"), "lemma")) { //&gt;
+ if(value.length())
+ // empty "type=" is deliberate.
+ buf.appendFormatted("<small><em>&lt;<a href=\"passagestudy.jsp?action=showStrongs&type=&value=%s\">%s</a>&gt;</em></small>",
+ URL::encode(value.c_str()).c_str(),
+ value.c_str());
+ }
+ else if (tag.getAttribute("type") && !strcmp(tag.getAttribute("type"), "Strongs")) {
+ char ch = *value;
+ value<<1;
+ buf.appendFormatted("<small><em>&lt;<a href=\"passagestudy.jsp?action=showStrongs&type=%s&value=%s\">",
+ ((ch == 'H') ? "Hebrew" : "Greek"),
+ URL::encode(value.c_str()).c_str());
+ buf += (value.length()) ? value.c_str() : "";
+ buf += "</a>&gt;</em></small>";
+ }
+ else if (tag.getAttribute("type") && !strcmp(tag.getAttribute("type"), "Dict")) {
+ buf += (tag.isEndTag() ? "</b>" : "<b>");
+ }
+
+ }
+ // <note> tag
+ else if (!strcmp(tag.getName(), "note")) {
+ if (!tag.isEndTag()) {
+ if (!tag.isEmpty()) {
+ SWBuf type = tag.getAttribute("type");
+ SWBuf footnoteNumber = tag.getAttribute("swordFootnote");
+ VerseKey *vkey = NULL;
+ // see if we have a VerseKey * or descendant
+ SWTRY {
+ vkey = SWDYNAMIC_CAST(VerseKey, u->key);
+ }
+ SWCATCH ( ... ) { }
+ if (vkey) {
+ // leave this special osis type in for crossReference notes types? Might thml use this some day? Doesn't hurt.
+ char ch = ((tag.getAttribute("type") && ((!strcmp(tag.getAttribute("type"), "crossReference")) || (!strcmp(tag.getAttribute("type"), "x-cross-ref")))) ? 'x':'n');
+ buf.appendFormatted("<a href=\"passagestudy.jsp?action=showNote&type=%c&value=%s&module=%s&passage=%s\"><small><sup>*%c</sup></small></a>",
+ ch,
+ URL::encode(footnoteNumber.c_str()).c_str(),
+ URL::encode(u->version.c_str()).c_str(),
+ URL::encode(vkey->getText()).c_str(),
+ ch);
+ }
+ else {
+ char ch = ((tag.getAttribute("type") && ((!strcmp(tag.getAttribute("type"), "crossReference")) || (!strcmp(tag.getAttribute("type"), "x-cross-ref")))) ? 'x':'n');
+ buf.appendFormatted("<a href=\"passagestudy.jsp?action=showNote&type=%c&value=%s&module=%s&passage=%s\"><small><sup>*%c</sup></small></a>",
+ ch,
+ URL::encode(footnoteNumber.c_str()).c_str(),
+ URL::encode(u->version.c_str()).c_str(),
+ URL::encode(u->key->getText()).c_str(),
+ ch);
+ }
+ u->suspendTextPassThru = true;
+ }
+ }
+ if (tag.isEndTag()) {
+ u->suspendTextPassThru = false;
+ }
+ }
+ else if (!strcmp(tag.getName(), "scripture")) {
+ buf += (tag.isEndTag() ? "</i>" : "<i>");
+ }
+ // <scripRef> tag
+ else if (!strcmp(tag.getName(), "scripRef")) {
+ if (!tag.isEndTag()) {
+ if (!tag.isEmpty()) {
+ u->suspendTextPassThru = true;
+ }
+ }
+ if (tag.isEndTag()) { // </scripRef>
+ if (!u->BiblicalText) {
+ SWBuf refList = u->startTag.getAttribute("passage");
+ if (!refList.length())
+ refList = u->lastTextNode;
+ SWBuf version = tag.getAttribute("version");
+
+ buf.appendFormatted("<a href=\"passagestudy.jsp?action=showRef&type=scripRef&value=%s&module=%s\">",
+ (refList.length()) ? URL::encode(refList.c_str()).c_str() : "",
+ (version.length()) ? URL::encode(version.c_str()).c_str() : "");
+ buf += u->lastTextNode.c_str();
+ buf += "</a>";
+ }
+ else {
+ SWBuf footnoteNumber = u->startTag.getAttribute("swordFootnote");
+ VerseKey *vkey = NULL;
+ // see if we have a VerseKey * or descendant
+ SWTRY {
+ vkey = SWDYNAMIC_CAST(VerseKey, u->key);
+ }
+ SWCATCH ( ... ) {}
+ if (vkey) {
+ // leave this special osis type in for crossReference notes types? Might thml use this some day? Doesn't hurt.
+ //buf.appendFormatted("<a href=\"noteID=%s.x.%s\"><small><sup>*x</sup></small></a> ", vkey->getText(), footnoteNumber.c_str());
+ buf.appendFormatted("<a href=\"passagestudy.jsp?action=showNote&type=x&value=%s&module=%s&passage=%s\"><small><sup>*x</sup></small></a>",
+ URL::encode(footnoteNumber.c_str()).c_str(),
+ URL::encode(u->version.c_str()).c_str(),
+ URL::encode(vkey->getText()).c_str());
+
+ }
+ }
+
+ // let's let text resume to output again
+ u->suspendTextPassThru = false;
+ }
+ }
+ else if (tag.getName() && !strcmp(tag.getName(), "div")) {
+ if (tag.isEndTag() && u->SecHead) {
+ buf += "</i></b><br />";
+ u->SecHead = false;
+ }
+ else if (tag.getAttribute("class")) {
+ if (!stricmp(tag.getAttribute("class"), "sechead")) {
+ u->SecHead = true;
+ buf += "<br /><b><i>";
+ }
+ else if (!stricmp(tag.getAttribute("class"), "title")) {
+ u->SecHead = true;
+ buf += "<br /><b><i>";
+ }
+ else {
+ buf += tag;
+ }
+ }
+ else {
+ buf += tag;
+ }
+ }
+ else if (tag.getName() && (!strcmp(tag.getName(), "img") || !strcmp(tag.getName(), "image"))) {
+ const char *src = strstr(token, "src");
+ if (!src) // assert we have a src attribute
+ return false;
+
+ const char *c, *d;
+ if (((c = strchr(src+3, '"')) == NULL) ||
+ ((d = strchr( ++c , '"')) == NULL)) // identify endpoints.
+ return false; // abandon hope.
+
+ SWBuf imagename = "file:";
+ if (*c == '/') // as below, inside for loop.
+ imagename += userData->module->getConfigEntry("AbsoluteDataPath");
+ while (c != d) // move bits into the name.
+ imagename += *(c++);
+
+ // images become clickable, if the UI supports showImage.
+ buf.appendFormatted("<a href=\"passagestudy.jsp?action=showImage&value=%s&module=%s\"><",
+ URL::encode(imagename.c_str()).c_str(),
+ URL::encode(u->version.c_str()).c_str());
+
+ for (c = token; *c; c++) {
+ if ((*c == '/') && (*(c+1) == '\0'))
+ continue;
+ if (c == src) {
+ for (;((*c) && (*c != '"')); c++)
+ buf += *c;
+
+ if (!*c) { c--; continue; }
+
+ buf += '"';
+ if (*(c+1) == '/') {
+ buf += "file:";
+ buf += userData->module->getConfigEntry("AbsoluteDataPath");
+ if (buf[buf.length()-2] == '/')
+ c++; // skip '/'
+ }
+ continue;
+ }
+ buf += *c;
+ }
+ buf += " border=0 /></a>";
+ }
+ else {
+ buf += '<';
+ /*for (const char *tok = token; *tok; tok++)
+ buf += *tok;*/
+ buf += token;
+ buf += '>';
+ //return false; // we still didn't handle token
+ }
+ }
+ return true;
+}
+
+
+SWORD_NAMESPACE_END
diff --git a/src/modules/filters/thmllemma.cpp b/src/modules/filters/thmllemma.cpp
new file mode 100644
index 0000000..3e5761d
--- /dev/null
+++ b/src/modules/filters/thmllemma.cpp
@@ -0,0 +1,65 @@
+/******************************************************************************
+ *
+ * thmllemma - SWFilter descendant to hide or show lemmas
+ * in a ThML module.
+ */
+
+
+#include <stdlib.h>
+#include <thmllemma.h>
+
+SWORD_NAMESPACE_START
+
+const char oName[] = "Lemmas";
+const char oTip[] = "Toggles Lemmas On and Off if they exist";
+
+const SWBuf choices[3] = {"Off", "On", ""};
+const StringList oValues(&choices[0], &choices[2]);
+
+ThMLLemma::ThMLLemma() : SWOptionFilter(oName, oTip, &oValues) {
+ setOptionValue("Off");
+}
+
+
+ThMLLemma::~ThMLLemma() {
+}
+
+
+char ThMLLemma::processText(SWBuf &text, const SWKey *key, const SWModule *module) {
+ if (!option) { // if we don't want lemmas
+ bool intoken = false;
+
+ SWBuf token;
+ SWBuf orig = text;
+ const char *from = orig.c_str();
+ for (text = ""; *from; from++) {
+ if (*from == '<') {
+ intoken = true;
+ token = "";
+ continue;
+ }
+ if (*from == '>') { // process tokens
+ intoken = false;
+ if (!strncmp(token.c_str(), "sync ", 5) && strstr(token.c_str(), "type=\"lemma\"")) { // Lemma
+ continue;
+ }
+
+ // if not a lemma token, keep token in text
+ text += '<';
+ text += token;
+ text += '>';
+ continue;
+ }
+
+ if (intoken) {
+ token += *from;
+ }
+ else {
+ text += *from;
+ }
+ }
+ }
+ return 0;
+}
+
+SWORD_NAMESPACE_END
diff --git a/src/modules/filters/thmlmorph.cpp b/src/modules/filters/thmlmorph.cpp
new file mode 100644
index 0000000..0fbef56
--- /dev/null
+++ b/src/modules/filters/thmlmorph.cpp
@@ -0,0 +1,65 @@
+/******************************************************************************
+ *
+ * thmlmorph - SWFilter descendant to hide or show morph tags
+ * in a ThML module.
+ */
+
+
+#include <stdlib.h>
+#include <thmlmorph.h>
+
+SWORD_NAMESPACE_START
+
+const char oName[] = "Morphological Tags";
+const char oTip[] = "Toggles Morphological Tags On and Off if they exist";
+
+const SWBuf choices[3] = {"Off", "On", ""};
+const StringList oValues(&choices[0], &choices[2]);
+
+ThMLMorph::ThMLMorph() : SWOptionFilter(oName, oTip, &oValues) {
+ setOptionValue("Off");
+}
+
+
+ThMLMorph::~ThMLMorph() {
+}
+
+
+char ThMLMorph::processText(SWBuf &text, const SWKey *key, const SWModule *module) {
+ if (!option) { // if we don't want morph tags
+ bool intoken = false;
+
+ SWBuf token;
+ SWBuf orig = text;
+ const char *from = orig.c_str();
+ for (text = ""; *from; from++) {
+ if (*from == '<') {
+ intoken = true;
+ token = "";
+ continue;
+ }
+ if (*from == '>') { // process tokens
+ intoken = false;
+ if (!strncmp(token.c_str(), "sync ", 5) && strstr(token.c_str(), "type=\"morph\"")) { // Morph
+ continue;
+ }
+
+ // if not a morph tag token, keep token in text
+ text += '<';
+ text += token;
+ text += '>';
+ continue;
+ }
+
+ if (intoken) {
+ token += *from;
+ }
+ else {
+ text += *from;
+ }
+ }
+ }
+ return 0;
+}
+
+SWORD_NAMESPACE_END
diff --git a/src/modules/filters/thmlosis.cpp b/src/modules/filters/thmlosis.cpp
new file mode 100644
index 0000000..939be82
--- /dev/null
+++ b/src/modules/filters/thmlosis.cpp
@@ -0,0 +1,575 @@
+/******************************************************************************
+ *
+ * thmlstrongs - SWFilter descendant to hide or show strongs number
+ * in a ThML module.
+ */
+
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <stdarg.h>
+#include <ctype.h>
+#include <thmlosis.h>
+#include <swmodule.h>
+#include <swlog.h>
+#include <versekey.h>
+#include <utilstr.h>
+#include <utilxml.h>
+
+
+SWORD_NAMESPACE_START
+
+ThMLOSIS::ThMLOSIS() {
+}
+
+
+ThMLOSIS::~ThMLOSIS() {
+}
+
+
+char ThMLOSIS::processText(SWBuf &text, const SWKey *key, const SWModule *module) {
+ char token[2048]; // cheese. Fix.
+ int tokpos = 0;
+ bool intoken = false;
+ bool keepToken = false;
+ bool ampersand = false;
+
+// static QuoteStack quoteStack;
+
+ bool lastspace = false;
+ char val[128];
+ SWBuf buf;
+ char *valto;
+ char *ch;
+
+ const char *wordStart = text.c_str();
+ const char *wordEnd = NULL;
+
+ const char *textStart = NULL;
+ const char *textEnd = NULL;
+
+ bool suspendTextPassThru = false;
+ bool handled = false;
+ bool newText = false;
+ bool newWord = false;
+
+// SWBuf tmp;
+ SWBuf divEnd = "";
+
+ SWBuf orig = text;
+ const char* from = orig.c_str();
+
+ text = "";
+ for (from = orig.c_str(); *from; ++from) {
+
+ // handle silly <variant word> items in greek whnu, remove when module is fixed
+ if ((*from == '<') && (*(from+1) < 0)) {
+ text += "&lt;";
+ continue;
+ }
+
+ if (*from == '<') { //start of new token detected
+ intoken = true;
+ tokpos = 0;
+ token[0] = 0;
+ token[1] = 0;
+ token[2] = 0;
+ ampersand = false;
+ textEnd = from-1;
+ wordEnd = text.c_str() + text.length();//not good, instead of wordEnd = to!
+
+// wordEnd = to;
+ continue;
+ }
+
+ if (*from == '&') {
+ intoken = true;
+ tokpos = 0;
+ token[0] = 0;
+ token[1] = 0;
+ token[2] = 0;
+ ampersand = true;
+ continue;
+ }
+
+ if (*from == ';' && ampersand) {
+ intoken = false;
+ ampersand = false;
+
+ if (*token == '#') {
+ text += '&';
+ text += token;
+ text += ';';
+ }
+ else if (!strncmp("nbsp", token, 4)) text += ' ';
+ else if (!strncmp("quot", token, 4)) text += '"';
+ else if (!strncmp("amp", token, 3)) text += '&';
+ else if (!strncmp("lt", token, 2)) text += '<';
+ else if (!strncmp("gt", token, 2)) text += '>';
+ else if (!strncmp("brvbar", token, 6)) text += '¦';
+ else if (!strncmp("sect", token, 4)) text += '§';
+ else if (!strncmp("copy", token, 4)) text += '©';
+ else if (!strncmp("laquo", token, 5)) text += '«';
+ else if (!strncmp("reg", token, 3)) text += '®';
+ else if (!strncmp("acute", token, 5)) text += '´';
+ else if (!strncmp("para", token, 4)) text += '¶';
+ else if (!strncmp("raquo", token, 5)) text += '»';
+ else if (!strncmp("Aacute", token, 6)) text += 'Á';
+ else if (!strncmp("Agrave", token, 6)) text += 'À';
+ else if (!strncmp("Acirc", token, 5)) text += 'Â';
+ else if (!strncmp("Auml", token, 4)) text += 'Ä';
+ else if (!strncmp("Atilde", token, 6)) text += 'Ã';
+ else if (!strncmp("Aring", token, 5)) text += 'Å';
+ else if (!strncmp("aacute", token, 6)) text += 'á';
+ else if (!strncmp("agrave", token, 6)) text += 'à';
+ else if (!strncmp("acirc", token, 5)) text += 'â';
+ else if (!strncmp("auml", token, 4)) text += 'ä';
+ else if (!strncmp("atilde", token, 6)) text += 'ã';
+ else if (!strncmp("aring", token, 5)) text += 'å';
+ else if (!strncmp("Eacute", token, 6)) text += 'É';
+ else if (!strncmp("Egrave", token, 6)) text += 'È';
+ else if (!strncmp("Ecirc", token, 5)) text += 'Ê';
+ else if (!strncmp("Euml", token, 4)) text += 'Ë';
+ else if (!strncmp("eacute", token, 6)) text += 'é';
+ else if (!strncmp("egrave", token, 6)) text += 'è';
+ else if (!strncmp("ecirc", token, 5)) text += 'ê';
+ else if (!strncmp("euml", token, 4)) text += 'ë';
+ else if (!strncmp("Iacute", token, 6)) text += 'Í';
+ else if (!strncmp("Igrave", token, 6)) text += 'Ì';
+ else if (!strncmp("Icirc", token, 5)) text += 'Î';
+ else if (!strncmp("Iuml", token, 4)) text += 'Ï';
+ else if (!strncmp("iacute", token, 6)) text += 'í';
+ else if (!strncmp("igrave", token, 6)) text += 'ì';
+ else if (!strncmp("icirc", token, 5)) text += 'î';
+ else if (!strncmp("iuml", token, 4)) text += 'ï';
+ else if (!strncmp("Oacute", token, 6)) text += 'Ó';
+ else if (!strncmp("Ograve", token, 6)) text += 'Ò';
+ else if (!strncmp("Ocirc", token, 5)) text += 'Ô';
+ else if (!strncmp("Ouml", token, 4)) text += 'Ö';
+ else if (!strncmp("Otilde", token, 6)) text += 'Õ';
+ else if (!strncmp("oacute", token, 6)) text += 'ó';
+ else if (!strncmp("ograve", token, 6)) text += 'ò';
+ else if (!strncmp("ocirc", token, 5)) text += 'ô';
+ else if (!strncmp("ouml", token, 4)) text += 'ö';
+ else if (!strncmp("otilde", token, 6)) text += 'õ';
+ else if (!strncmp("Uacute", token, 6)) text += 'Ú';
+ else if (!strncmp("Ugrave", token, 6)) text += 'Ù';
+ else if (!strncmp("Ucirc", token, 5)) text += 'Û';
+ else if (!strncmp("Uuml", token, 4)) text += 'Ü';
+ else if (!strncmp("uacute", token, 6)) text += 'ú';
+ else if (!strncmp("ugrave", token, 6)) text += 'ù';
+ else if (!strncmp("ucirc", token, 5)) text += 'û';
+ else if (!strncmp("uuml", token, 4)) text += 'ü';
+ else if (!strncmp("Yacute", token, 6)) text += 'Ý';
+ else if (!strncmp("yacute", token, 6)) text += 'ý';
+ else if (!strncmp("yuml", token, 4)) text += 'ÿ';
+
+ else if (!strncmp("deg", token, 3)) text += '°';
+ else if (!strncmp("plusmn", token, 6)) text += '±';
+ else if (!strncmp("sup2", token, 4)) text += '²';
+ else if (!strncmp("sup3", token, 4)) text += '³';
+ else if (!strncmp("sup1", token, 4)) text += '¹';
+ else if (!strncmp("nbsp", token, 4)) text += 'º';
+ else if (!strncmp("pound", token, 5)) text += '£';
+ else if (!strncmp("cent", token, 4)) text += '¢';
+ else if (!strncmp("frac14", token, 6)) text += '¼';
+ else if (!strncmp("frac12", token, 6)) text += '½';
+ else if (!strncmp("frac34", token, 6)) text += '¾';
+ else if (!strncmp("iquest", token, 6)) text += '¿';
+ else if (!strncmp("iexcl", token, 5)) text += '¡';
+ else if (!strncmp("ETH", token, 3)) text += 'Ð';
+ else if (!strncmp("eth", token, 3)) text += 'ð';
+ else if (!strncmp("THORN", token, 5)) text += 'Þ';
+ else if (!strncmp("thorn", token, 5)) text += 'þ';
+ else if (!strncmp("AElig", token, 5)) text += 'Æ';
+ else if (!strncmp("aelig", token, 5)) text += 'æ';
+ else if (!strncmp("Oslash", token, 6)) text += 'Ø';
+ else if (!strncmp("curren", token, 6)) text += '¤';
+ else if (!strncmp("Ccedil", token, 6)) text += 'Ç';
+ else if (!strncmp("ccedil", token, 6)) text += 'ç';
+ else if (!strncmp("szlig", token, 5)) text += 'ß';
+ else if (!strncmp("Ntilde", token, 6)) text += 'Ñ';
+ else if (!strncmp("ntilde", token, 6)) text += 'ñ';
+ else if (!strncmp("yen", token, 3)) text += '¥';
+ else if (!strncmp("not", token, 3)) text += '¬';
+ else if (!strncmp("ordf", token, 4)) text += 'ª';
+ else if (!strncmp("uml", token, 3)) text += '¨';
+ else if (!strncmp("shy", token, 3)) text += '­';
+ else if (!strncmp("macr", token, 4)) text += '¯';
+ else if (!strncmp("micro", token, 5)) text += "µ";
+ else if (!strncmp("middot", token, 6)) text +="·";
+ else if (!strncmp("cedil", token, 5)) text += "¸";
+ else if (!strncmp("ordm", token, 4)) text += "º";
+ else if (!strncmp("times", token, 5)) text += "×";
+ else if (!strncmp("divide", token, 6)) text +="÷";
+ else if (!strncmp("oslash", token, 6)) text +="ø";
+ continue;
+ }
+
+ // handle silly <variant word> items in greek whnu, remove when module is fixed
+ if ((*from == '>') && (*(from-1) < 0)) {
+ text += "&gt;";
+ continue;
+ }
+
+ if (*from == '>') { // process tokens
+ intoken = false;
+ keepToken = false;
+ suspendTextPassThru = false;
+ newWord = true;
+ handled = false;
+
+ while (wordStart < (text.c_str() + text.length())) { //hack
+ if (strchr(";,. :?!()'\"", *wordStart) && wordStart[0] && wordStart[1])
+ wordStart++;
+ else break;
+ }
+ while (wordEnd > wordStart) {
+ if (strchr(" ,;:.?!()'\"", *wordEnd))
+ wordEnd--;
+ else break;
+ }
+
+ // variants
+ if (!strncmp(token, "div type=\"variant\"", 18)) {
+ XMLTag tag = token;
+ text.append("<seg type=\"x-variant\"");
+ SWBuf cls = "x-class:";
+ cls += tag.getAttribute("class");
+ if (cls.length()>8)
+ text.appendFormatted(" subType=\"%s\"", cls.c_str());
+
+ text += ">";
+ divEnd = "</seg>";
+ newText = true;
+ lastspace = false;
+ handled = true;
+ }
+ // section titles
+ if (!strcmp(token, "div class=\"sechead\"")) {
+// pushString(&to, "<title>");
+ text.append("<title>");
+ divEnd = "</title>";
+ newText = true;
+ lastspace = false;
+ handled = true;
+ }
+ else if (!strcmp(token, "/div")) {
+ //pushString(&to, divEnd.c_str());
+ text.append(divEnd);
+ lastspace = false;
+ handled = true;
+ }
+ // Scripture Reference
+ if (!strncmp(token, "scripRef", 8)) {
+ // pushString(buf, "<reference osisRef=\"");
+ suspendTextPassThru = true;
+ newText = true;
+ handled = true;
+ }
+ else if (!strncmp(token, "/scripRef", 9)) {
+ SWBuf tmp;
+ tmp = "";
+ tmp.append(textStart, (int)(textEnd - textStart)+1);
+ //pushString(&to, convertToOSIS(tmp.c_str(), key));
+ text.append(VerseKey::convertToOSIS(tmp.c_str(), key));
+ suspendTextPassThru = false;
+ handled = true;
+ }
+// Usage of italics to represent transChange isn't domaninant;
+// solution: mark in OSIS instead, assume no semantics other than emphasis
+// of italicized text
+// if (!strcmp(module->Type(), "Biblical Texts")) {
+// // Italics assume transchange for Biblical texts
+// if (!stricmp(token, "i")) {
+// pushString(&to, "<transChange type=\"added\">");
+// newText = true;
+// lastspace = false;
+// handled = true;
+// }
+// else if (!stricmp(token, "/i")) {
+// pushString(&to, "</transChange>");
+// lastspace = false;
+// handled = true;
+// }
+// }
+// else {
+// // otherwise, italics are just italics
+//-- end italics for transchange
+ if (!stricmp(token, "i")) {
+// pushString(&to, "<hi type=\"i\">");
+ text.append("<hi type=\"i\">");
+ newText = true;
+ lastspace = false;
+ handled = true;
+ }
+ else if (!stricmp(token, "/i")) {
+// pushString(&to, "</hi>");
+ text.append("</hi>");
+ lastspace = false;
+ handled = true;
+ }
+// }
+
+ if (!strcmp(token, "b")) {
+// pushString(&to, "<hi type=\"b\">");
+ text.append("<hi type=\"b\">");
+ newText = true;
+ lastspace = false;
+ handled = true;
+ }
+ else if (!strcmp(token, "/b")) {
+// pushString(&to, "</hi>");
+ text.append("</hi>");
+ lastspace = false;
+ handled = true;
+ }
+
+ // Footnote
+ if (!strcmp(token, "note")) {
+ //pushString(&to, "<note>");
+ text.append("<note>");
+ newText = true;
+ lastspace = false;
+ handled = true;
+ }
+ else if (!strcmp(token, "/note")) {
+ // pushString(&to, "</note>");
+ text.append("</note>");
+ lastspace = false;
+ handled = true;
+ }
+
+ // Figure
+ else if (!strncmp(token, "img ", 4)) {
+ const char *src = strstr(token, "src");
+ if (!src) // assert we have a src attribute
+ continue;
+// return false;
+
+ //pushString(&to, "<figure src=\"");
+ text.append("<figure src=\"");
+
+ const char* end = strchr(src+2, '"'); //start search behind src="
+
+ if (end) { //append the path
+ text.append(src+2, end - (src+2));
+ }
+
+// const char *c;
+// for (c = src;((*c) && (*c != '"')); c++);
+
+// uncomment for SWORD absolute path logic
+// if (*(c+1) == '/') {
+// pushString(buf, "file:");
+// pushString(buf, module->getConfigEntry("AbsoluteDataPath"));
+// if (*((*buf)-1) == '/')
+// c++; // skip '/'
+// }
+// end of uncomment for asolute path logic
+
+// for (c++;((*c) && (*c != '"')); c++)
+// *to++ = *c;
+
+ //pushString(&to, "\" />");
+ text.append("\" />");
+ handled = true;
+ }
+
+ // Strongs numbers
+ else if (!strnicmp(token, "sync type=\"Strongs\" ", 20)) { // Strongs
+ valto = val;
+ for (unsigned int i = 27; token[i] != '\"' && i < 150; i++)
+ *valto++ = token[i];
+ *valto = 0;
+ if (atoi((!isdigit(*val))?val+1:val) < 5627) {
+ // normal strongs number
+ strstrip(val);
+
+ if (!strncmp(wordStart, "<w ", 3)) {
+ const char *attStart = strstr(wordStart, "lemma");
+ if (attStart) { //existing morph attribute, append this one to it
+ attStart += 7;
+ buf = "";
+ buf.appendFormatted("strong:%s ", val);
+ }
+ else { // no lemma attribute
+ attStart = wordStart + 3;
+ buf = "";
+ buf.appendFormatted(buf, "lemma=\"strong:%s\" ", val);
+ }
+
+ text.insert(attStart - text.c_str(), buf);
+ }
+ else { //wordStart doesn't point to an existing <w> attribute!
+ buf = "";
+ buf.appendFormatted("<w lemma=\"strong:%s\">", val);
+ text.insert(wordStart - text.c_str(), buf);
+ text += "</w>";
+ lastspace = false;
+ }
+ }
+ // OLB verb morph, leave it out of OSIS tag
+ else {
+ }
+ handled = true;
+ }
+
+ // Morphology
+ else if (!strncmp(token, "sync type=\"morph\"", 17)) {
+ SWBuf cls = "";
+ SWBuf morph = "";
+ for (ch = token+17; *ch; ch++) {
+ if (!strncmp(ch, "class=\"", 7)) {
+ valto = val;
+ for (unsigned int i = 7; ch[i] != '\"' && i < 127; i++)
+ *valto++ = ch[i];
+ *valto = 0;
+ strstrip(val);
+ cls = val;
+ }
+ if (!strncmp(ch, "value=\"", 7)) {
+ valto = val;
+ for (unsigned int i = 7; ch[i] != '\"' && i < 127; i++)
+ *valto++ = ch[i];
+ *valto = 0;
+ strstrip(val);
+ morph = val;
+ }
+ }
+ if (!strncmp(wordStart, "<w ", 3)) {
+ const char *attStart = strstr(wordStart, "morph");
+ if (attStart) { //existing morph attribute, append this one to it
+ attStart += 7;
+ buf = "";
+ buf.appendFormatted("%s:%s ", ((cls.length())?cls.c_str():"robinson"), morph.c_str());
+ }
+ else { // no lemma attribute
+ attStart = wordStart + 3;
+ buf = "";
+ buf.appendFormatted("morph=\"%s:%s\" ", ((cls.length())?cls.c_str():"robinson"), morph.c_str());
+ }
+
+ text.insert(attStart - text.c_str(), buf); //hack, we have to
+ }
+ else { //no existing <w> attribute fond
+ buf = "";
+ buf.appendFormatted("<w morph=\"%s:%s\">", ((cls.length())?cls.c_str():"robinson"), morph.c_str());
+ text.insert(wordStart - text.c_str(), buf);
+ text += "</w>";
+ lastspace = false;
+
+ }
+ handled = true;
+ }
+
+ if (!keepToken) {
+ if (!handled) {
+ SWLog::getSystemLog()->logError("Unprocessed Token: <%s> in key %s", token, key ? (const char*)*key : "<unknown>");
+// exit(-1);
+ }
+ if (from[1] && strchr(" ,;.:?!()'\"", from[1])) {
+ if (lastspace) {
+ text--;
+ }
+ }
+ if (newText) {
+ textStart = from+1;
+ newText = false;
+ }
+ continue;
+ }
+
+ // if not a strongs token, keep token in text
+ text.appendFormatted("<%s>", token);
+
+ if (newText) {
+ textStart = text.c_str() + text.length();
+ newWord = false;
+ }
+ continue;
+ }
+ if (intoken) {
+ if ((tokpos < 2045) && ((*from != 10)&&(*from != 13))) {
+ token[tokpos++] = *from;
+ token[tokpos+2] = 0;
+ }
+ }
+ else {
+ switch (*from) {
+ case '\'':
+ case '\"':
+ case '`':
+// quoteStack.handleQuote(fromStart, from, &to);
+ text += *from;
+ //from++; //this line removes chars after an apostrophe! Needs fixing.
+ break;
+ default:
+ if (newWord && (*from != ' ')) {
+ wordStart = text.c_str() + text.length();
+ newWord = false;
+
+ //fix this if required?
+ //memset(to, 0, 10);
+
+ }
+
+ if (!suspendTextPassThru) {
+ text += (*from);
+ lastspace = (*from == ' ');
+ }
+ }
+ }
+ }
+
+ VerseKey *vkey = SWDYNAMIC_CAST(VerseKey, key);
+ if (vkey) {
+ SWBuf ref = "";
+ if (vkey->Verse()) {
+ ref.appendFormatted("\t\t<verse osisID=\"%s\">", vkey->getOSISRef());
+ }
+
+ if (ref.length() > 0) {
+
+ text = ref + text;
+
+ if (vkey->Verse()) {
+ VerseKey tmp;
+ tmp = *vkey;
+ tmp.AutoNormalize(0);
+ tmp.Headings(1);
+
+ text += "</verse>";
+
+ tmp = MAXVERSE;
+ if (*vkey == tmp) {
+ tmp.Verse(0);
+// sprintf(ref, "\t</div>");
+// pushString(&to, ref);
+ tmp = MAXCHAPTER;
+ tmp = MAXVERSE;
+ if (*vkey == tmp) {
+ tmp.Chapter(0);
+ tmp.Verse(0);
+// sprintf(ref, "\t</div>");
+// pushString(&to, ref);
+/*
+ if (!quoteStack.empty()) {
+ SWLog::getSystemLog()->logError("popping unclosed quote at end of book");
+ quoteStack.clear();
+ }
+*/
+ }
+ }
+ }
+// else if (vkey->Chapter()) {
+// sprintf(ref, "\t<div type=\"chapter\" osisID=\"%s\">", vkey->getOSISRef());
+// }
+// else sprintf(ref, "\t<div type=\"book\" osisID=\"%s\">", vkey->getOSISRef());
+ }
+ }
+ return 0;
+}
+
+
+SWORD_NAMESPACE_END
diff --git a/src/modules/filters/thmlplain.cpp b/src/modules/filters/thmlplain.cpp
new file mode 100644
index 0000000..8f8379a
--- /dev/null
+++ b/src/modules/filters/thmlplain.cpp
@@ -0,0 +1,219 @@
+/******************************************************************************
+ *
+ * thmlplain - SWFilter descendant to strip out all ThML tags or convert to
+ * ASCII rendered symbols.
+ */
+
+
+#include <stdlib.h>
+#include <thmlplain.h>
+#include <swbuf.h>
+
+SWORD_NAMESPACE_START
+
+ThMLPlain::ThMLPlain() {
+}
+
+char ThMLPlain::processText(SWBuf &text, const SWKey *key, const SWModule *module)
+{
+ char token[2048];
+ int tokpos = 0;
+ bool intoken = false;
+ bool ampersand = false;
+
+ const char *from;
+ SWBuf orig = text;
+ from = orig.c_str();
+ for (text = ""; *from; from++)
+ {
+ if (*from == 10 || *from == 13)
+ from++;
+ if (*from == '<') {
+ intoken = true;
+ tokpos = 0;
+ token[0] = 0;
+ token[1] = 0;
+ token[2] = 0;
+ ampersand = false;
+ continue;
+ }
+ else if (*from == '&') {
+ intoken = true;
+ tokpos = 0;
+ token[0] = 0;
+ token[1] = 0;
+ token[2] = 0;
+ ampersand = true;
+ continue;
+ }
+ if (*from == ';' && ampersand) {
+ intoken = false;
+ ampersand = false;
+
+ if (!strncmp("nbsp", token, 4)) text += ' ';
+ else if (!strncmp("quot", token, 4)) text += '"';
+ else if (!strncmp("amp", token, 3)) text += '&';
+ else if (!strncmp("lt", token, 2)) text += '<';
+ else if (!strncmp("gt", token, 2)) text += '>';
+ else if (!strncmp("brvbar", token, 6)) text += '¦';
+ else if (!strncmp("sect", token, 4)) text += '§';
+ else if (!strncmp("copy", token, 4)) text += '©';
+ else if (!strncmp("laquo", token, 5)) text += '«';
+ else if (!strncmp("reg", token, 3)) text += '®';
+ else if (!strncmp("acute", token, 5)) text += '´';
+ else if (!strncmp("para", token, 4)) text += '¶';
+ else if (!strncmp("raquo", token, 5)) text += '»';
+
+ else if (!strncmp("Aacute", token, 6)) text += 'Á';
+ else if (!strncmp("Agrave", token, 6)) text += 'À';
+ else if (!strncmp("Acirc", token, 5)) text += 'Â';
+ else if (!strncmp("Auml", token, 4)) text += 'Ä';
+ else if (!strncmp("Atilde", token, 6)) text += 'Ã';
+ else if (!strncmp("Aring", token, 5)) text += 'Å';
+ else if (!strncmp("aacute", token, 6)) text += 'á';
+ else if (!strncmp("agrave", token, 6)) text += 'à';
+ else if (!strncmp("acirc", token, 5)) text += 'â';
+ else if (!strncmp("auml", token, 4)) text += 'ä';
+ else if (!strncmp("atilde", token, 6)) text += 'ã';
+ else if (!strncmp("aring", token, 5)) text += 'å';
+ else if (!strncmp("Eacute", token, 6)) text += 'É';
+ else if (!strncmp("Egrave", token, 6)) text += 'È';
+ else if (!strncmp("Ecirc", token, 5)) text += 'Ê';
+ else if (!strncmp("Euml", token, 4)) text += 'Ë';
+ else if (!strncmp("eacute", token, 6)) text += 'é';
+ else if (!strncmp("egrave", token, 6)) text += 'è';
+ else if (!strncmp("ecirc", token, 5)) text += 'ê';
+ else if (!strncmp("euml", token, 4)) text += 'ë';
+ else if (!strncmp("Iacute", token, 6)) text += 'Í';
+ else if (!strncmp("Igrave", token, 6)) text += 'Ì';
+ else if (!strncmp("Icirc", token, 5)) text += 'Î';
+ else if (!strncmp("Iuml", token, 4)) text += 'Ï';
+ else if (!strncmp("iacute", token, 6)) text += 'í';
+ else if (!strncmp("igrave", token, 6)) text += 'ì';
+ else if (!strncmp("icirc", token, 5)) text += 'î';
+ else if (!strncmp("iuml", token, 4)) text += 'ï';
+ else if (!strncmp("Oacute", token, 6)) text += 'Ó';
+ else if (!strncmp("Ograve", token, 6)) text += 'Ò';
+ else if (!strncmp("Ocirc", token, 5)) text += 'Ô';
+ else if (!strncmp("Ouml", token, 4)) text += 'Ö';
+ else if (!strncmp("Otilde", token, 6)) text += 'Õ';
+ else if (!strncmp("oacute", token, 6)) text += 'ó';
+ else if (!strncmp("ograve", token, 6)) text += 'ò';
+ else if (!strncmp("ocirc", token, 5)) text += 'ô';
+ else if (!strncmp("ouml", token, 4)) text += 'ö';
+ else if (!strncmp("otilde", token, 6)) text += 'õ';
+ else if (!strncmp("Uacute", token, 6)) text += 'Ú';
+ else if (!strncmp("Ugrave", token, 6)) text += 'Ù';
+ else if (!strncmp("Ucirc", token, 5)) text += 'Û';
+ else if (!strncmp("Uuml", token, 4)) text += 'Ü';
+ else if (!strncmp("uacute", token, 6)) text += 'ú';
+ else if (!strncmp("ugrave", token, 6)) text += 'ù';
+ else if (!strncmp("ucirc", token, 5)) text += 'û';
+ else if (!strncmp("uuml", token, 4)) text += 'ü';
+ else if (!strncmp("Yacute", token, 6)) text += 'Ý';
+ else if (!strncmp("yacute", token, 6)) text += 'ý';
+ else if (!strncmp("yuml", token, 4)) text += 'ÿ';
+
+ else if (!strncmp("deg", token, 3)) text += '°';
+ else if (!strncmp("plusmn", token, 6)) text += '±';
+ else if (!strncmp("sup2", token, 4)) text += '²';
+ else if (!strncmp("sup3", token, 4)) text += '³';
+ else if (!strncmp("sup1", token, 4)) text += '¹';
+ else if (!strncmp("nbsp", token, 4)) text += 'º';
+ else if (!strncmp("pound", token, 5)) text += '£';
+ else if (!strncmp("cent", token, 4)) text += '¢';
+ else if (!strncmp("frac14", token, 6)) text += '¼';
+ else if (!strncmp("frac12", token, 6)) text += '½';
+ else if (!strncmp("frac34", token, 6)) text += '¾';
+ else if (!strncmp("iquest", token, 6)) text += '¿';
+ else if (!strncmp("iexcl", token, 5)) text += '¡';
+ else if (!strncmp("ETH", token, 3)) text += 'Ð';
+ else if (!strncmp("eth", token, 3)) text += 'ð';
+ else if (!strncmp("THORN", token, 5)) text += 'Þ';
+ else if (!strncmp("thorn", token, 5)) text += 'þ';
+ else if (!strncmp("AElig", token, 5)) text += 'Æ';
+ else if (!strncmp("aelig", token, 5)) text += 'æ';
+ else if (!strncmp("Oslash", token, 6)) text += 'Ø';
+ else if (!strncmp("curren", token, 6)) text += '¤';
+ else if (!strncmp("Ccedil", token, 6)) text += 'Ç';
+ else if (!strncmp("ccedil", token, 6)) text += 'ç';
+ else if (!strncmp("szlig", token, 5)) text += 'ß';
+ else if (!strncmp("Ntilde", token, 6)) text += 'Ñ';
+ else if (!strncmp("ntilde", token, 6)) text += 'ñ';
+ else if (!strncmp("yen", token, 3)) text += '¥';
+ else if (!strncmp("not", token, 3)) text += '¬';
+ else if (!strncmp("ordf", token, 4)) text += 'ª';
+ else if (!strncmp("uml", token, 3)) text += '¨';
+ else if (!strncmp("shy", token, 3)) text += '­';
+ else if (!strncmp("macr", token, 4)) text += '¯';
+ else if (!strncmp("micro", token, 5)) text += "µ";
+ else if (!strncmp("middot", token, 6)) text +="·";
+ else if (!strncmp("cedil", token, 5)) text += "¸";
+ else if (!strncmp("ordm", token, 4)) text += "º";
+ else if (!strncmp("times", token, 5)) text += "×";
+ else if (!strncmp("divide", token, 6)) text +="÷";
+ else if (!strncmp("oslash", token, 6)) text +="ø";
+ continue;
+
+ }
+ else if (*from == '>' && !ampersand) {
+ intoken = false;
+ // process desired tokens
+ if (!strncmp(token, "sync type=\"Strongs\" value=\"", 27)) {
+ text += ' ';
+ text += '<';
+ for (unsigned int i = 27; token[i] != '\"'; i++)
+ text += token[i];
+ text += '>';
+ continue;
+ }
+ if (!strncmp(token, "sync type=\"morph\" value=\"", 25)) {
+ text += ' ';
+ text += '(';
+ for (unsigned int i = 25; token[i] != '\"'; i++)
+ text += token[i];
+ text += ')';
+ continue;
+ }
+ if (!strncmp("note", token, 4)) {
+ text += ' ';
+ text += '(';
+ }
+ else if (!strncmp("br", token, 2))
+ text += '\n';
+ else if (!strncmp("/p", token, 2))
+ text += '\n';
+ else if (!strncmp("/note", token, 5)) {
+ text += ')';
+ text += ' ';
+ }
+ continue;
+ }
+ if (intoken) {
+ if (tokpos < 2045)
+ token[tokpos++] = *from;
+ token[tokpos+2] = 0;
+ }
+ else text += *from;
+ }
+
+ orig = text;
+ from = orig.c_str();
+ for (text = ""; *from; from++) { //loop to remove extra spaces
+ if ((strchr(" \t\n\r", *from))) {
+ while (*(from+1) && (strchr(" \t\n\r", *(from+1)))) {
+ from++;
+ }
+ text += " ";
+ }
+ else {
+ text += *from;
+ }
+ }
+ text += (char)0;
+
+ return 0;
+}
+
+
+SWORD_NAMESPACE_END
diff --git a/src/modules/filters/thmlrtf.cpp b/src/modules/filters/thmlrtf.cpp
new file mode 100644
index 0000000..23e4a90
--- /dev/null
+++ b/src/modules/filters/thmlrtf.cpp
@@ -0,0 +1,346 @@
+/***************************************************************************
+ thmlrtf.cpp - ThML to RTF filter
+ -------------------
+ begin : 1999-10-27
+ copyright : 2001 by CrossWire Bible Society
+ ***************************************************************************/
+
+/***************************************************************************
+ * *
+ * This program is free software; you can redistribute it and/or modify *
+ * it under the terms of the GNU General Public License as published by *
+ * the Free Software Foundation; either version 2 of the License, or *
+ * (at your option) any later version. *
+ * *
+ ***************************************************************************/
+
+#include <stdlib.h>
+#include <thmlrtf.h>
+#include <swmodule.h>
+#include <utilxml.h>
+#include <utilstr.h>
+#include <versekey.h>
+
+SWORD_NAMESPACE_START
+
+ThMLRTF::ThMLRTF() {
+ setTokenStart("<");
+ setTokenEnd(">");
+
+ setEscapeStart("&");
+ setEscapeEnd(";");
+
+ setEscapeStringCaseSensitive(true);
+
+ addEscapeStringSubstitute("nbsp", "\302\240");
+ addEscapeStringSubstitute("apos", "'");
+ addEscapeStringSubstitute("quot", "\"");
+ addEscapeStringSubstitute("amp", "&");
+ addEscapeStringSubstitute("lt", "<");
+ addEscapeStringSubstitute("gt", ">");
+ addEscapeStringSubstitute("brvbar", "¦");
+ addEscapeStringSubstitute("sect", "§");
+ addEscapeStringSubstitute("copy", "©");
+ addEscapeStringSubstitute("laquo", "«");
+ addEscapeStringSubstitute("reg", "®");
+ addEscapeStringSubstitute("acute", "´");
+ addEscapeStringSubstitute("para", "¶");
+ addEscapeStringSubstitute("raquo", "»");
+
+ addEscapeStringSubstitute("Aacute", "Á");
+ addEscapeStringSubstitute("Agrave", "À");
+ addEscapeStringSubstitute("Acirc", "Â");
+ addEscapeStringSubstitute("Auml", "Ä");
+ addEscapeStringSubstitute("Atilde", "Ã");
+ addEscapeStringSubstitute("Aring", "Å");
+ addEscapeStringSubstitute("aacute", "á");
+ addEscapeStringSubstitute("agrave", "à");
+ addEscapeStringSubstitute("acirc", "â");
+ addEscapeStringSubstitute("auml", "ä");
+ addEscapeStringSubstitute("atilde", "ã");
+ addEscapeStringSubstitute("aring", "å");
+ addEscapeStringSubstitute("Eacute", "É");
+ addEscapeStringSubstitute("Egrave", "È");
+ addEscapeStringSubstitute("Ecirc", "Ê");
+ addEscapeStringSubstitute("Euml", "Ë");
+ addEscapeStringSubstitute("eacute", "é");
+ addEscapeStringSubstitute("egrave", "è");
+ addEscapeStringSubstitute("ecirc", "ê");
+ addEscapeStringSubstitute("euml", "ë");
+ addEscapeStringSubstitute("Iacute", "Í");
+ addEscapeStringSubstitute("Igrave", "Ì");
+ addEscapeStringSubstitute("Icirc", "Î");
+ addEscapeStringSubstitute("Iuml", "Ï");
+ addEscapeStringSubstitute("iacute", "í");
+ addEscapeStringSubstitute("igrave", "ì");
+ addEscapeStringSubstitute("icirc", "î");
+ addEscapeStringSubstitute("iuml", "ï");
+ addEscapeStringSubstitute("Oacute", "Ó");
+ addEscapeStringSubstitute("Ograve", "Ò");
+ addEscapeStringSubstitute("Ocirc", "Ô");
+ addEscapeStringSubstitute("Ouml", "Ö");
+ addEscapeStringSubstitute("Otilde", "Õ");
+ addEscapeStringSubstitute("oacute", "ó");
+ addEscapeStringSubstitute("ograve", "ò");
+ addEscapeStringSubstitute("ocirc", "ô");
+ addEscapeStringSubstitute("ouml", "ö");
+ addEscapeStringSubstitute("otilde", "õ");
+ addEscapeStringSubstitute("Uacute", "Ú");
+ addEscapeStringSubstitute("Ugrave", "Ù");
+ addEscapeStringSubstitute("Ucirc", "Û");
+ addEscapeStringSubstitute("Uuml", "Ü");
+ addEscapeStringSubstitute("uacute", "ú");
+ addEscapeStringSubstitute("ugrave", "ù");
+ addEscapeStringSubstitute("ucirc", "û");
+ addEscapeStringSubstitute("uuml", "ü");
+ addEscapeStringSubstitute("Yacute", "Ý");
+ addEscapeStringSubstitute("yacute", "ý");
+ addEscapeStringSubstitute("yuml", "ÿ");
+
+ addEscapeStringSubstitute("deg", "°");
+ addEscapeStringSubstitute("plusmn", "±");
+ addEscapeStringSubstitute("sup2", "²");
+ addEscapeStringSubstitute("sup3", "³");
+ addEscapeStringSubstitute("sup1", "¹");
+ addEscapeStringSubstitute("nbsp", "º");
+ addEscapeStringSubstitute("pound", "£");
+ addEscapeStringSubstitute("cent", "¢");
+ addEscapeStringSubstitute("frac14", "¼");
+ addEscapeStringSubstitute("frac12", "½");
+ addEscapeStringSubstitute("frac34", "¾");
+ addEscapeStringSubstitute("iquest", "¿");
+ addEscapeStringSubstitute("iexcl", "¡");
+ addEscapeStringSubstitute("ETH", "Ð");
+ addEscapeStringSubstitute("eth", "ð");
+ addEscapeStringSubstitute("THORN", "Þ");
+ addEscapeStringSubstitute("thorn", "þ");
+ addEscapeStringSubstitute("AElig", "Æ");
+ addEscapeStringSubstitute("aelig", "æ");
+ addEscapeStringSubstitute("Oslash", "Ø");
+ addEscapeStringSubstitute("curren", "¤");
+ addEscapeStringSubstitute("Ccedil", "Ç");
+ addEscapeStringSubstitute("ccedil", "ç");
+ addEscapeStringSubstitute("szlig", "ß");
+ addEscapeStringSubstitute("Ntilde", "Ñ");
+ addEscapeStringSubstitute("ntilde", "ñ");
+ addEscapeStringSubstitute("yen", "¥");
+ addEscapeStringSubstitute("not", "¬");
+ addEscapeStringSubstitute("ordf", "ª");
+ addEscapeStringSubstitute("uml", "¨");
+ addEscapeStringSubstitute("shy", "­");
+ addEscapeStringSubstitute("macr", "¯");
+
+ addEscapeStringSubstitute("micro", "µ");
+ addEscapeStringSubstitute("middot", "·");
+ addEscapeStringSubstitute("cedil", "¸");
+ addEscapeStringSubstitute("ordm", "º");
+ addEscapeStringSubstitute("times", "×");
+ addEscapeStringSubstitute("divide", "÷");
+ addEscapeStringSubstitute("oslash", "ø");
+
+ setTokenCaseSensitive(true);
+
+
+ addTokenSubstitute("br", "\\line ");
+ addTokenSubstitute("br /", "\\line ");
+ addTokenSubstitute("i", "{\\i1 ");
+ addTokenSubstitute("/i", "}");
+ addTokenSubstitute("b", "{\\b1 ");
+ addTokenSubstitute("/b", "}");
+ addTokenSubstitute("p", "{\\fi200\\par}");
+ addTokenSubstitute("p /", "\\pard\\par\\par ");
+
+ //we need uppercase forms for the moment to support a few early ThML modules that aren't XHTML compliant
+ addTokenSubstitute("BR", "\\line ");
+ addTokenSubstitute("I", "{\\i1 ");
+ addTokenSubstitute("/I", "}");
+ addTokenSubstitute("B", "{\\b1 ");
+ addTokenSubstitute("/B", "}");
+ addTokenSubstitute("P", "\\par ");
+ addTokenSubstitute("scripture", "{\\i1 ");
+ addTokenSubstitute("/scripture", "}");
+ addTokenSubstitute("center", "\\qc ");
+ addTokenSubstitute("/center", "\\pard ");
+}
+
+
+char ThMLRTF::processText(SWBuf &text, const SWKey *key, const SWModule *module) {
+
+ // preprocess text buffer to escape RTF control codes
+ const char *from;
+ SWBuf orig = text;
+ from = orig.c_str();
+ for (text = ""; *from; from++) { //loop to remove extra spaces
+ switch (*from) {
+ case '{':
+ case '}':
+ case '\\':
+ text += "\\";
+ text += *from;
+ break;
+ default:
+ text += *from;
+ }
+ }
+ text += (char)0;
+
+ SWBasicFilter::processText(text, key, module); //handle tokens as usual
+
+ orig = text;
+ from = orig.c_str();
+ for (text = ""; *from; from++) { //loop to remove extra spaces
+ if ((strchr(" \t\n\r", *from))) {
+ while (*(from+1) && (strchr(" \t\n\r", *(from+1)))) {
+ from++;
+ }
+ text += " ";
+ }
+ else {
+ text += *from;
+ }
+ }
+ text += (char)0; // probably not needed, but don't want to remove without investigating (same as above)
+ return 0;
+}
+
+
+ThMLRTF::MyUserData::MyUserData(const SWModule *module, const SWKey *key) : BasicFilterUserData(module, key) {
+ this->SecHead = false;
+ XMLTag startTag = "";
+ if (module) {
+ version = module->Name();
+ BiblicalText = (!strcmp(module->Type(), "Biblical Texts"));
+ }
+}
+
+
+bool ThMLRTF::handleToken(SWBuf &buf, const char *token, BasicFilterUserData *userData) {
+ if (!substituteToken(buf, token)) { // manually process if it wasn't a simple substitution
+ MyUserData *u = (MyUserData *)userData;
+ XMLTag tag(token);
+ if ((!tag.isEndTag()) && (!tag.isEmpty()))
+ u->startTag = tag;
+ if (tag.getName() && !strcmp(tag.getName(), "sync")) {
+ SWBuf value = tag.getAttribute("value");
+ if (tag.getAttribute("type") && !strcmp(tag.getAttribute("type"), "morph")) { //&gt;
+ buf.appendFormatted(" {\\cf4 \\sub (%s)}", value.c_str());
+ }
+ else if( tag.getAttribute("type") && !strcmp(tag.getAttribute("type"), "Strongs")) {
+ if (value[0] == 'H' || value[0] == 'G' || value[0] == 'A') {
+ value<<1;
+ buf.appendFormatted(" {\\cf3 \\sub <%s>}", value.c_str());
+ }
+ else if (value[0] == 'T') {
+ value<<1;
+ buf.appendFormatted(" {\\cf4 \\sub (%s)}", value.c_str());
+ }
+ }
+ else if (tag.getAttribute("type") && !strcmp(tag.getAttribute("type"), "Dict")) {
+ if (!tag.isEndTag())
+ buf += "{\\b ";
+ else buf += "}";
+ }
+ }
+ // <note> tag
+ else if (!strcmp(tag.getName(), "note")) {
+ if (!tag.isEndTag()) {
+ if (!tag.isEmpty()) {
+ SWBuf type = tag.getAttribute("type");
+ SWBuf footnoteNumber = tag.getAttribute("swordFootnote");
+ VerseKey *vkey = NULL;
+ // see if we have a VerseKey * or descendant
+ SWTRY {
+ vkey = SWDYNAMIC_CAST(VerseKey, u->key);
+ }
+ SWCATCH ( ... ) { }
+ if (vkey) {
+ // leave this special osis type in for crossReference notes types? Might thml use this some day? Doesn't hurt.
+ char ch = ((tag.getAttribute("type") && ((!strcmp(tag.getAttribute("type"), "crossReference")) || (!strcmp(tag.getAttribute("type"), "x-cross-ref")))) ? 'x':'n');
+ buf.appendFormatted("{\\super <a href=\"\">*%c%i.%s</a>} ", ch, vkey->Verse(), footnoteNumber.c_str());
+ }
+ u->suspendTextPassThru = true;
+ }
+ }
+ if (tag.isEndTag()) {
+ u->suspendTextPassThru = false;
+ }
+ }
+
+
+ else if (!strcmp(tag.getName(), "scripRef")) {
+ if (!tag.isEndTag()) {
+ if (!tag.isEmpty()) {
+ u->suspendTextPassThru = true;
+ }
+ }
+ if (tag.isEndTag()) { // </scripRef>
+ if (!u->BiblicalText) {
+ SWBuf refList = u->startTag.getAttribute("passage");
+ if (!refList.length())
+ refList = u->lastTextNode;
+ SWBuf version = tag.getAttribute("version");
+ buf += "<a href=\"\">";
+ buf += refList.c_str();
+// buf += u->lastTextNode.c_str();
+ buf += "</a>";
+ }
+ else {
+ SWBuf footnoteNumber = u->startTag.getAttribute("swordFootnote");
+ VerseKey *vkey = NULL;
+ // see if we have a VerseKey * or descendant
+ SWTRY {
+ vkey = SWDYNAMIC_CAST(VerseKey, u->key);
+ }
+ SWCATCH ( ... ) {}
+ if (vkey) {
+ // leave this special osis type in for crossReference notes types? Might thml use this some day? Doesn't hurt.
+ buf.appendFormatted("{\\super <a href=\"\">*x%i.%s</a>} ", vkey->Verse(), footnoteNumber.c_str());
+ }
+ }
+
+ // let's let text resume to output again
+ u->suspendTextPassThru = false;
+ }
+ }
+
+ else if (tag.getName() && !strcmp(tag.getName(), "div")) {
+ if (tag.isEndTag() && u->SecHead) {
+ buf += "\\par}";
+ u->SecHead = false;
+ }
+ else if (tag.getAttribute("class")) {
+ if (!stricmp(tag.getAttribute("class"), "sechead")) {
+ u->SecHead = true;
+ buf += "{\\par\\i1\\b1 ";
+ }
+ else if (!stricmp(tag.getAttribute("class"), "title")) {
+ u->SecHead = true;
+ buf += "{\\par\\i1\\b1 ";
+ }
+ }
+ }
+ else if (tag.getName() && (!strcmp(tag.getName(), "img") || !strcmp(tag.getName(), "image"))) {
+ const char *src = tag.getAttribute("src");
+ if (!src) // assert we have a src attribute
+ return false;
+
+ char* filepath = new char[strlen(u->module->getConfigEntry("AbsoluteDataPath")) + strlen(token)];
+ *filepath = 0;
+ strcpy(filepath, userData->module->getConfigEntry("AbsoluteDataPath"));
+ strcat(filepath, src);
+
+// we do this because BibleCS looks for this EXACT format for an image tag
+ buf+="<img src=\"";
+ buf+=filepath;
+ buf+="\" />";
+ delete [] filepath;
+ }
+ else {
+ return false; // we still didn't handle token
+ }
+ }
+ return true;
+}
+
+
+SWORD_NAMESPACE_END
diff --git a/src/modules/filters/thmlscripref.cpp b/src/modules/filters/thmlscripref.cpp
new file mode 100644
index 0000000..df2b3d2
--- /dev/null
+++ b/src/modules/filters/thmlscripref.cpp
@@ -0,0 +1,123 @@
+/******************************************************************************
+ *
+ * thmlscripref - SWFilter descendant to hide or show scripture
+ * referebces in a ThML module.
+ */
+
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <thmlscripref.h>
+#include <utilxml.h>
+#include <versekey.h>
+#include <swmodule.h>
+
+SWORD_NAMESPACE_START
+
+const char oName[] = "Cross-references";
+const char oTip[] = "Toggles Scripture Cross-references On and Off if they exist";
+
+const SWBuf choices[3] = {"Off", "On", ""};
+const StringList oValues(&choices[0], &choices[2]);
+
+ThMLScripref::ThMLScripref() : SWOptionFilter(oName, oTip, &oValues) {
+ setOptionValue("On");
+}
+
+
+ThMLScripref::~ThMLScripref() {
+}
+
+
+char ThMLScripref::processText(SWBuf &text, const SWKey *key, const SWModule *module) {
+ SWBuf token;
+ bool intoken = false;
+ bool hide = false;
+ SWBuf tagText;
+ XMLTag startTag;
+ SWBuf refs = "";
+ int footnoteNum = 1;
+ char buf[254];
+ VerseKey parser = key->getText();
+
+ SWBuf orig = text;
+ const char *from = orig.c_str();
+
+ for (text = ""; *from; from++) {
+ if (*from == '<') {
+ intoken = true;
+ token = "";
+ continue;
+ }
+ if (*from == '>') { // process tokens
+ intoken = false;
+
+ XMLTag tag(token);
+ if (!strcmp(tag.getName(), "scripRef")) {
+ if (!tag.isEndTag()) {
+ if (!tag.isEmpty()) {
+ refs = "";
+ startTag = tag;
+ hide = true;
+ tagText = "";
+ continue;
+ }
+ }
+ if (hide && tag.isEndTag()) {
+ if (module->isProcessEntryAttributes()) {
+ SWBuf fc = module->getEntryAttributes()["Footnote"]["count"]["value"];
+ footnoteNum = (fc.length()) ? atoi(fc.c_str()) : 0;
+ sprintf(buf, "%i", ++footnoteNum);
+ module->getEntryAttributes()["Footnote"]["count"]["value"] = buf;
+ StringList attributes = startTag.getAttributeNames();
+ for (StringList::iterator it = attributes.begin(); it != attributes.end(); it++) {
+ module->getEntryAttributes()["Footnote"][buf][it->c_str()] = startTag.getAttribute(it->c_str());
+ }
+ module->getEntryAttributes()["Footnote"][buf]["body"] = tagText;
+ startTag.setAttribute("swordFootnote", buf);
+ SWBuf passage = startTag.getAttribute("passage");
+ if (passage.length())
+ refs = parser.ParseVerseList(passage.c_str(), parser, true).getRangeText();
+ else refs = parser.ParseVerseList(tagText.c_str(), parser, true).getRangeText();
+ module->getEntryAttributes()["Footnote"][buf]["refList"] = refs.c_str();
+ }
+ hide = false;
+ if (option) { // we want the tag in the text
+ text += startTag;
+ text.append(tagText);
+ }
+ else continue;
+ }
+ }
+
+ // if not a scripRef token, keep token in text
+ if ((!strcmp(tag.getName(), "scripRef")) && (!tag.isEndTag())) {
+ SWBuf osisRef = tag.getAttribute("passage");
+ if (refs.length())
+ refs += "; ";
+ refs += osisRef;
+ }
+ if (!hide) {
+ text += '<';
+ text.append(token);
+ text += '>';
+ }
+ else {
+ tagText += '<';
+ tagText.append(token);
+ tagText += '>';
+ }
+ continue;
+ }
+ if (intoken) { //copy token
+ token += *from;
+ }
+ else if (!hide) { //copy text which is not inside a token
+ text += *from;
+ }
+ else tagText += *from;
+ }
+ return 0;
+}
+
+SWORD_NAMESPACE_END
diff --git a/src/modules/filters/thmlstrongs.cpp b/src/modules/filters/thmlstrongs.cpp
new file mode 100644
index 0000000..c1ab08c
--- /dev/null
+++ b/src/modules/filters/thmlstrongs.cpp
@@ -0,0 +1,146 @@
+/******************************************************************************
+ *
+ * thmlstrongs - SWFilter descendant to hide or show strongs number
+ * in a ThML module.
+ */
+
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <thmlstrongs.h>
+#include <swmodule.h>
+#include <utilstr.h>
+#include <ctype.h>
+
+SWORD_NAMESPACE_START
+
+const char oName[] = "Strong's Numbers";
+const char oTip[] = "Toggles Strong's Numbers On and Off if they exist";
+
+const SWBuf choices[3] = {"Off", "On", ""};
+const StringList oValues(&choices[0], &choices[2]);
+
+ThMLStrongs::ThMLStrongs() : SWOptionFilter(oName, oTip, &oValues) {
+ setOptionValue("Off");
+}
+
+
+ThMLStrongs::~ThMLStrongs() {
+}
+
+
+char ThMLStrongs::processText(SWBuf &text, const SWKey *key, const SWModule *module) {
+ char token[2048]; // cheese. Fix.
+ const char *from;
+ int tokpos = 0;
+ bool intoken = false;
+ bool lastspace = false;
+ int word = 1;
+ char val[128];
+ char wordstr[5];
+ char *valto;
+ char *ch;
+ unsigned int textStart = 0, textEnd = 0;
+ SWBuf tmp;
+ bool newText = false;
+
+ SWBuf orig = text;
+ from = orig.c_str();
+
+ for (text = ""; *from; from++) {
+ if (*from == '<') {
+ intoken = true;
+ tokpos = 0;
+ token[0] = 0;
+ token[1] = 0;
+ token[2] = 0;
+ textEnd = text.length();
+ continue;
+ }
+ if (*from == '>') { // process tokens
+ intoken = false;
+ if (!strnicmp(token, "sync type=\"Strongs\" ", 20)) { // Strongs
+ if (module->isProcessEntryAttributes()) {
+ valto = val;
+ for (unsigned int i = 27; token[i] != '\"' && i < 150; i++)
+ *valto++ = token[i];
+ *valto = 0;
+ if (atoi((!isdigit(*val))?val+1:val) < 5627) {
+ // normal strongs number
+ sprintf(wordstr, "%03d", word);
+ module->getEntryAttributes()["Word"][wordstr]["PartCount"] = "1";
+ module->getEntryAttributes()["Word"][wordstr]["Lemma"] = val;
+ module->getEntryAttributes()["Word"][wordstr]["LemmaClass"] = "strong";
+ tmp = "";
+ tmp.append(text.c_str()+textStart, (int)(textEnd - textStart));
+ module->getEntryAttributes()["Word"][wordstr]["Text"] = tmp;
+ newText = true;
+ }
+ else {
+/*
+ // verb morph
+ sprintf(wordstr, "%03d", word);
+ module->getEntryAttributes()["Word"][wordstr]["Morph"] = val;
+ module->getEntryAttributes()["Word"][wordstr]["MorphClass"] = "OLBMorph";
+*/
+ word--; // for now, completely ignore this word attribute.
+ }
+ word++;
+ }
+
+ if (!option) { // if we don't want strongs
+ if ((from[1] == ' ') || (from[1] == ',') || (from[1] == ';') || (from[1] == '.') || (from[1] == '?') || (from[1] == '!') || (from[1] == ')') || (from[1] == '\'') || (from[1] == '\"')) {
+ if (lastspace)
+ text--;
+ }
+ if (newText) {textStart = text.length(); newText = false; }
+ continue;
+ }
+ }
+ if (module->isProcessEntryAttributes()) {
+ if (!strncmp(token, "sync type=\"morph\"", 17)) {
+ for (ch = token+17; *ch; ch++) {
+ if (!strncmp(ch, "class=\"", 7)) {
+ valto = val;
+ for (unsigned int i = 7; ch[i] != '\"' && i < 127; i++)
+ *valto++ = ch[i];
+ *valto = 0;
+ sprintf(wordstr, "%03d", word-1);
+ if ((!stricmp(val, "Robinsons")) || (!stricmp(val, "Robinson"))) {
+ strcpy(val, "robinson");
+ }
+ module->getEntryAttributes()["Word"][wordstr]["MorphClass"] = val;
+ }
+ if (!strncmp(ch, "value=\"", 7)) {
+ valto = val;
+ for (unsigned int i = 7; ch[i] != '\"' && i < 127; i++)
+ *valto++ = ch[i];
+ *valto = 0;
+ sprintf(wordstr, "%03d", word-1);
+ module->getEntryAttributes()["Word"][wordstr]["Morph"] = val;
+ }
+ }
+ newText = true;
+ }
+ }
+ // if not a strongs token, keep token in text
+ text += '<';
+ text += token;
+ text += '>';
+ if (newText) {textStart = text.length(); newText = false; }
+ continue;
+ }
+ if (intoken) {
+ if (tokpos < 2045)
+ token[tokpos++] = *from;
+ token[tokpos+2] = 0;
+ }
+ else {
+ text += *from;
+ lastspace = (*from == ' ');
+ }
+ }
+ return 0;
+}
+
+SWORD_NAMESPACE_END
diff --git a/src/modules/filters/thmlvariants.cpp b/src/modules/filters/thmlvariants.cpp
new file mode 100644
index 0000000..49f9b65
--- /dev/null
+++ b/src/modules/filters/thmlvariants.cpp
@@ -0,0 +1,118 @@
+/******************************************************************************
+ *
+ * thmlvariants - SWFilter descendant to hide or show textual variants
+ * in a ThML module.
+ */
+
+
+#include <stdlib.h>
+#include <thmlvariants.h>
+#include <utilstr.h>
+
+SWORD_NAMESPACE_START
+
+const char ThMLVariants::primary[] = "Primary Reading";
+const char ThMLVariants::secondary[] = "Secondary Reading";
+const char ThMLVariants::all[] = "All Readings";
+
+const char ThMLVariants::optName[] = "Textual Variants";
+const char ThMLVariants::optTip[] = "Switch between Textual Variants modes";
+
+
+ThMLVariants::ThMLVariants() {
+ option = false;
+ options.push_back(primary);
+ options.push_back(secondary);
+ options.push_back(all);
+}
+
+
+ThMLVariants::~ThMLVariants() {
+}
+
+void ThMLVariants::setOptionValue(const char *ival)
+{
+ if (!stricmp(ival, primary)) option = 0;
+ else if (!stricmp(ival, secondary)) option = 1;
+ else option = 2;
+}
+
+const char *ThMLVariants::getOptionValue()
+{
+ if (option == 0) {
+ return primary;
+ }
+ else if (option == 1) {
+ return secondary;
+ }
+ else {
+ return all;
+ }
+}
+
+char ThMLVariants::processText(SWBuf &text, const SWKey *key, const SWModule *module)
+{
+ if ( option == 0 || option == 1) { //we want primary or variant only
+ bool intoken = false;
+ bool hide = false;
+ bool invar = false;
+
+ SWBuf token;
+ SWBuf orig = text;
+ const char *from = orig.c_str();
+
+ //we use a fixed comparision string to make sure the loop is as fast as the original two blocks with almost the same code
+ const char* variantCompareString = (option == 0) ? "div type=\"variant\" class=\"1\"" : "div type=\"variant\" class=\"2\"";
+
+ for (text = ""; *from; from++) {
+ if (*from == '<') {
+ intoken = true;
+ token = "";
+ continue;
+ }
+ else if (*from == '>') { // process tokens
+ intoken = false;
+
+ if ( !strncmp(token.c_str(), variantCompareString, 28)) { //only one of the variants, length of the two strings is 28 in both cases
+ invar = true;
+ hide = true;
+ continue;
+ }
+ if (!strncmp(token.c_str(), "div type=\"variant\"", 18)) {
+ invar = true;
+ continue;
+ }
+ if (!strncmp(token.c_str(), "/div", 4)) {
+ hide = false;
+ if (invar) {
+ invar = false;
+ continue;
+ }
+ }
+ if (!hide) {
+ text += '<';
+ text.append(token);
+ text += '>';
+ }
+
+ continue;
+ }
+ if (intoken) {
+ token += *from;
+ }
+ else if (!hide) {
+ text += *from;
+ }
+ }
+
+ }
+
+ return 0;
+}
+
+
+
+
+
+
+SWORD_NAMESPACE_END
diff --git a/src/modules/filters/thmlwebif.cpp b/src/modules/filters/thmlwebif.cpp
new file mode 100644
index 0000000..7428754
--- /dev/null
+++ b/src/modules/filters/thmlwebif.cpp
@@ -0,0 +1,103 @@
+/***************************************************************************
+ ThMLWEBIF.cpp - ThML to HTML filter with hrefs
+ -------------------
+ begin : 2001-09-03
+ copyright : 2001 by CrossWire Bible Society
+ ***************************************************************************/
+
+/***************************************************************************
+ * *
+ * This program is free software; you can redistribute it and/or modify *
+ * it under the terms of the GNU General Public License as published by *
+ * the Free Software Foundation; either version 2 of the License, or *
+ * (at your option) any later version. *
+ * *
+ ***************************************************************************/
+
+#include <stdlib.h>
+#include <thmlwebif.h>
+#include <swmodule.h>
+#include <url.h>
+#include <utilxml.h>
+#include <ctype.h>
+
+SWORD_NAMESPACE_START
+
+ThMLWEBIF::ThMLWEBIF() : baseURL(""), passageStudyURL(baseURL + "passagestudy.jsp") {
+ //all's done in ThMLHTMLHREF
+}
+
+bool ThMLWEBIF::handleToken(SWBuf &buf, const char *token, BasicFilterUserData *userData) {
+
+ if (!substituteToken(buf, token)) { // manually process if it wasn't a simple substitution
+ MyUserData *u = (MyUserData *)userData;
+ XMLTag tag(token);
+ SWBuf url;
+ if (!strcmp(tag.getName(), "sync")) {
+ const char* value = tag.getAttribute("value");
+ url = value;
+ if ((url.length() > 1) && strchr("GH", url[0])) {
+ if (isdigit(url[1]))
+ url = url.c_str()+1;
+ }
+
+ if(tag.getAttribute("type") && !strcmp(tag.getAttribute("type"), "morph")){
+ buf += "<small><em> (";
+ buf.appendFormatted("<a href=\"%s?showMorph=%s#cv\">", passageStudyURL.c_str(), URL::encode(url).c_str() );
+ }
+ else {
+ if (value) {
+ value++; //skip leading G, H or T
+ //url = value;
+ }
+
+ buf += "<small><em> &lt;";
+ buf.appendFormatted("<a href=\"%s?showStrong=%s#cv\">", passageStudyURL.c_str(), URL::encode(url).c_str() );
+ }
+
+ buf += value;
+ buf += "</a>";
+
+ if (tag.getAttribute("type") && !strcmp(tag.getAttribute("type"), "morph")) {
+ buf += ") </em></small>";
+ }
+ else {
+ buf += "&gt; </em></small>";
+ }
+ }
+ else if (!strcmp(tag.getName(), "scripRef")) {
+ if (tag.isEndTag()) {
+ if (u->inscriptRef) { // like "<scripRef passage="John 3:16">John 3:16</scripRef>"
+ u->inscriptRef = false;
+ buf += "</a>";
+ }
+ else { // end of scripRef like "<scripRef>John 3:16</scripRef>"
+ url = u->lastTextNode;
+ buf.appendFormatted("<a href=\"%s?key=%s#cv\">", passageStudyURL.c_str(), URL::encode(url).c_str());
+ buf += u->lastTextNode.c_str();
+ buf += "</a>";
+
+ // let's let text resume to output again
+ u->suspendTextPassThru = false;
+ }
+ }
+ else if (tag.getAttribute("passage")) { //passage given
+ u->inscriptRef = true;
+
+ buf.appendFormatted("<a href=\"%s?key=%s#cv\">", passageStudyURL.c_str(), URL::encode(tag.getAttribute("passage")).c_str());
+ }
+ else { //no passage given
+ u->inscriptRef = false;
+ // let's stop text from going to output
+ u->suspendTextPassThru = true;
+ }
+ }
+ else {
+ return ThMLHTMLHREF::handleToken(buf,token,userData);
+ }
+ }
+ return true;
+}
+
+
+SWORD_NAMESPACE_END
diff --git a/src/modules/filters/thmlwordjs.cpp b/src/modules/filters/thmlwordjs.cpp
new file mode 100644
index 0000000..ad8eef0
--- /dev/null
+++ b/src/modules/filters/thmlwordjs.cpp
@@ -0,0 +1,296 @@
+/******************************************************************************
+ *
+ * thmlstrongs - SWFilter descendant to hide or show strongs number
+ * in a ThML module.
+ */
+
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <thmlwordjs.h>
+#include <swmodule.h>
+#include <ctype.h>
+#include <utilxml.h>
+#include <utilstr.h>
+#include <versekey.h>
+
+SWORD_NAMESPACE_START
+
+const char oName[] = "Word Javascript";
+const char oTip[] = "Toggles Word Javascript data";
+
+const SWBuf choices[3] = {"Off", "On", ""};
+const StringList oValues(&choices[0], &choices[2]);
+
+
+ThMLWordJS::ThMLWordJS() : SWOptionFilter(oName, oTip, &oValues) {
+ setOptionValue("Off");
+
+ defaultGreekLex = 0;
+ defaultHebLex = 0;
+ defaultGreekParse = 0;
+ defaultHebParse = 0;
+ mgr = 0;
+}
+
+
+ThMLWordJS::~ThMLWordJS() {
+}
+
+
+char ThMLWordJS::processText(SWBuf &text, const SWKey *key, const SWModule *module) {
+ if (option) {
+ char token[2112]; // cheese. Fix.
+ int tokpos = 0;
+ bool intoken = false;
+ bool lastspace = false;
+ int word = 1;
+ char val[128];
+ char *valto;
+ char *ch;
+ char wordstr[5];
+ unsigned int textStart = 0, lastAppendLen = 0, textEnd = 0;
+ SWBuf tmp;
+ bool newText = false;
+ bool needWordOut = false;
+ AttributeValue *wordAttrs = 0;
+ SWBuf modName = (module)?module->Name():"";
+ SWBuf wordSrcPrefix = modName;
+
+ const SWBuf orig = text;
+ const char * from = orig.c_str();
+ VerseKey *vkey = 0;
+ if (key) {
+ vkey = SWDYNAMIC_CAST(VerseKey, key);
+ }
+
+ for (text = ""; *from; from++) {
+ if (*from == '<') {
+ intoken = true;
+ tokpos = 0;
+ token[0] = 0;
+ token[1] = 0;
+ token[2] = 0;
+ textEnd = text.length();
+ continue;
+ }
+ if (*from == '>') { // process tokens
+ intoken = false;
+ if (!strnicmp(token, "sync type=\"Strongs\" ", 20)) { // Strongs
+ valto = val;
+ for (unsigned int i = 27; token[i] != '\"' && i < 150; i++)
+ *valto++ = token[i];
+ *valto = 0;
+ if (atoi((!isdigit(*val))?val+1:val) < 5627) {
+ // normal strongs number
+ sprintf(wordstr, "%03d", word++);
+ needWordOut = (word > 2);
+ wordAttrs = &(module->getEntryAttributes()["Word"][wordstr]);
+ (*wordAttrs)["Strongs"] = val;
+ //printf("Adding: [\"Word\"][%s][\"Strongs\"] = %s\n", wordstr, val);
+ tmp = "";
+ tmp.append(text.c_str()+textStart, (int)(textEnd - textStart));
+ (*wordAttrs)["Text"] = tmp;
+ text.append("</span>");
+ SWBuf ts;
+ ts.appendFormatted("%d", textStart);
+ (*wordAttrs)["TextStart"] = ts;
+ //printf("Adding: [\"Word\"][%s][\"Text\"] = %s\n", wordstr, tmp.c_str());
+ newText = true;
+ }
+ else {
+ // verb morph
+ (*wordAttrs)["Morph"] = val;
+ //printf("Adding: [\"Word\"][%s][\"Morph\"] = %s\n", wordstr, val);
+ }
+
+ }
+ if (!strncmp(token, "sync type=\"morph\"", 17)) {
+ for (ch = token+17; *ch; ch++) {
+ if (!strncmp(ch, "class=\"", 7)) {
+ valto = val;
+ for (unsigned int i = 7; ch[i] != '\"' && i < 127; i++)
+ *valto++ = ch[i];
+ *valto = 0;
+ (*wordAttrs)["MorphClass"] = val;
+ //printf("Adding: [\"Word\"][%s][\"MorphClass\"] = %s\n", wordstr, val);
+ }
+ if (!strncmp(ch, "value=\"", 7)) {
+ valto = val;
+ for (unsigned int i = 7; ch[i] != '\"' && i < 127; i++)
+ *valto++ = ch[i];
+ *valto = 0;
+ (*wordAttrs)["Morph"] = val;
+ //printf("Adding: [\"Word\"][%s][\"Morph\"] = %s\n", wordstr, val);
+ }
+ }
+ newText = true;
+ }
+ // if not a strongs token, keep token in text
+ text += '<';
+ text += token;
+ text += '>';
+ if (needWordOut) {
+ char wstr[10];
+ sprintf(wstr, "%03d", word-2);
+ AttributeValue *wAttrs = &(module->getEntryAttributes()["Word"][wstr]);
+ needWordOut = false;
+ SWBuf strong = (*wAttrs)["Strongs"];
+ SWBuf morph = (*wAttrs)["Morph"];
+ SWBuf morphClass = (*wAttrs)["MorphClass"];
+ SWBuf wordText = (*wAttrs)["Text"];
+ SWBuf textSt = (*wAttrs)["TextStart"];
+ if (strong.size()) {
+ char gh = 0;
+ gh = isdigit(strong[0]) ? 0:strong[0];
+ if (!gh) {
+ if (vkey) {
+ gh = vkey->Testament() ? 'H' : 'G';
+ }
+ }
+ else strong << 1;
+
+ SWModule *sLex = 0;
+ SWModule *sMorph = 0;
+ if (gh == 'G') {
+ sLex = defaultGreekLex;
+ sMorph = defaultGreekParse;
+ }
+ if (gh == 'H') {
+ sLex = defaultHebLex;
+ sMorph = defaultHebParse;
+ }
+ SWBuf lexName = "";
+ if (sLex) {
+ // we can pass the real lex name in, but we have some
+ // aliases in the javascript to optimize bandwidth
+ lexName = sLex->Name();
+ if (lexName == "StrongsGreek")
+ lexName = "G";
+ if (lexName == "StrongsHebrew")
+ lexName = "H";
+ }
+ SWBuf wordID;
+ if (vkey) {
+ // optimize for bandwidth and use only the verse as the unique entry id
+ wordID.appendFormatted("%d", vkey->Verse());
+ }
+ else {
+ wordID = key->getText();
+ }
+ for (unsigned int i = 0; i < wordID.size(); i++) {
+ if ((!isdigit(wordID[i])) && (!isalpha(wordID[i]))) {
+ wordID[i] = '_';
+ }
+ }
+ wordID.appendFormatted("_%s%d", wordSrcPrefix.c_str(), atoi(wstr));
+ if (textSt.size()) {
+ int textStr = atoi(textSt.c_str());
+ textStr += lastAppendLen;
+ SWBuf spanStart = "";
+
+
+
+/*
+ if (sMorph) {
+ SWBuf popMorph = "<a onclick=\"";
+ popMorph.appendFormatted("p(\'%s\',\'%s\','%s','');\" >%s</a>", sMorph->Name(), morph.c_str(), wordID.c_str(), morph.c_str());
+ morph = popMorph;
+ }
+*/
+
+ // 'p' = 'fillpop' to save bandwidth
+ const char *m = strchr(morph.c_str(), ':');
+ if (m) m++;
+ else m = morph.c_str();
+ spanStart.appendFormatted("<span class=\"clk\" onclick=\"p('%s','%s','%s','%s','','%s');\" >", lexName.c_str(), strong.c_str(), wordID.c_str(), m, modName.c_str());
+ text.insert(textStr, spanStart);
+ lastAppendLen = spanStart.length();
+ }
+ }
+
+ }
+ if (newText) {
+ textStart = text.length(); newText = false;
+ }
+ continue;
+ }
+ if (intoken) {
+ if (tokpos < 2045)
+ token[tokpos++] = *from;
+ token[tokpos+2] = 0;
+ }
+ else {
+ text += *from;
+ lastspace = (*from == ' ');
+ }
+ }
+
+ char wstr[10];
+ sprintf(wstr, "%03d", word-1);
+ AttributeValue *wAttrs = &(module->getEntryAttributes()["Word"][wstr]);
+ needWordOut = false;
+ SWBuf strong = (*wAttrs)["Strongs"];
+ SWBuf morph = (*wAttrs)["Morph"];
+ SWBuf morphClass = (*wAttrs)["MorphClass"];
+ SWBuf wordText = (*wAttrs)["Text"];
+ SWBuf textSt = (*wAttrs)["TextStart"];
+ if (strong.size()) {
+ char gh = 0;
+ gh = isdigit(strong[0]) ? 0:strong[0];
+ if (!gh) {
+ if (vkey) {
+ gh = vkey->Testament() ? 'H' : 'G';
+ }
+ }
+ else strong << 1;
+
+ SWModule *sLex = 0;
+ if (gh == 'G') {
+ sLex = defaultGreekLex;
+ }
+ if (gh == 'H') {
+ sLex = defaultHebLex;
+ }
+ SWBuf lexName = "";
+ if (sLex) {
+ // we can pass the real lex name in, but we have some
+ // aliases in the javascript to optimize bandwidth
+ lexName = sLex->Name();
+ if (lexName == "StrongsGreek")
+ lexName = "G";
+ if (lexName == "StrongsHebrew")
+ lexName = "H";
+ }
+ SWBuf wordID;
+ if (vkey) {
+ // optimize for bandwidth and use only the verse as the unique entry id
+ wordID.appendFormatted("%d", vkey->Verse());
+ }
+ else {
+ wordID = key->getText();
+ }
+ for (unsigned int i = 0; i < wordID.size(); i++) {
+ if ((!isdigit(wordID[i])) && (!isalpha(wordID[i]))) {
+ wordID[i] = '_';
+ }
+ }
+ wordID.appendFormatted("_%s%d", wordSrcPrefix.c_str(), atoi(wstr));
+ if (textSt.size()) {
+ int textStr = atoi(textSt.c_str());
+ textStr += lastAppendLen;
+ SWBuf spanStart = "";
+ // 'p' = 'fillpop' to save bandwidth
+ const char *m = strchr(morph.c_str(), ':');
+ if (m) m++;
+ else m = morph.c_str();
+ spanStart.appendFormatted("<span class=\"clk\" onclick=\"p('%s','%s','%s','%s','','%s');\" >", lexName.c_str(), strong.c_str(), wordID.c_str(), m, modName.c_str());
+ text.insert(textStr, spanStart);
+ }
+ }
+ }
+
+ return 0;
+}
+
+SWORD_NAMESPACE_END
diff --git a/src/modules/filters/unicodertf.cpp b/src/modules/filters/unicodertf.cpp
new file mode 100644
index 0000000..8c2a1f6
--- /dev/null
+++ b/src/modules/filters/unicodertf.cpp
@@ -0,0 +1,87 @@
+/******************************************************************************
+ *
+ * unicodertf - SWFilter descendant to convert a double byte unicode file
+ * to RTF tags
+ */
+
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <unicodertf.h>
+#include <swbuf.h>
+
+SWORD_NAMESPACE_START
+
+UnicodeRTF::UnicodeRTF() {
+}
+
+
+char UnicodeRTF::processText(SWBuf &text, const SWKey *key, const SWModule *module)
+{
+ const unsigned char *from;
+ char digit[10];
+ unsigned long ch;
+ signed short utf16;
+ unsigned char from2[7];
+
+ SWBuf orig = text;
+
+ from = (const unsigned char *)orig.c_str();
+
+ // -------------------------------
+ for (text = ""; *from; from++) {
+ ch = 0;
+ //case: ANSI
+ if ((*from & 128) != 128) {
+ text += *from;
+ continue;
+ }
+ //case: Invalid UTF-8 (illegal continuing byte in initial position)
+ if ((*from & 128) && ((*from & 64) != 64)) {
+ continue;
+ }
+ //case: 2+ byte codepoint
+ from2[0] = *from;
+ from2[0] <<= 1;
+ int subsequent;
+ for (subsequent = 1; (from2[0] & 128) && (subsequent < 7); subsequent++) {
+ from2[0] <<= 1;
+ from2[subsequent] = from[subsequent];
+ from2[subsequent] &= 63;
+ ch <<= 6;
+ ch |= from2[subsequent];
+ }
+ subsequent--;
+ from2[0] <<= 1;
+ char significantFirstBits = 8 - (2+subsequent);
+
+ ch |= (((short)from2[0]) << (((6*subsequent)+significantFirstBits)-8));
+ from += subsequent;
+ if (ch < 0x10000) {
+ utf16 = (signed short)ch;
+ text += '\\';
+ text += 'u';
+ sprintf(digit, "%d", utf16);
+ text += digit;
+ text += '?';
+ }
+ else {
+ utf16 = (signed short)((ch - 0x10000) / 0x400 + 0xD800);
+ text += '\\';
+ text += 'u';
+ sprintf(digit, "%d", utf16);
+ text += digit;
+ text += '?';
+ utf16 = (signed short)((ch - 0x10000) % 0x400 + 0xDC00);
+ text += '\\';
+ text += 'u';
+ sprintf(digit, "%d", utf16);
+ text += digit;
+ text += '?';
+ }
+ }
+
+ return 0;
+}
+
+SWORD_NAMESPACE_END
diff --git a/src/modules/filters/utf16utf8.cpp b/src/modules/filters/utf16utf8.cpp
new file mode 100644
index 0000000..ae0845f
--- /dev/null
+++ b/src/modules/filters/utf16utf8.cpp
@@ -0,0 +1,90 @@
+/******************************************************************************
+ *
+ * UTF16UTF8 - SWFilter descendant to convert UTF-16 to UTF-8
+ *
+ */
+
+#include <stdlib.h>
+#include <stdio.h>
+
+#include <utf16utf8.h>
+#include <swbuf.h>
+
+SWORD_NAMESPACE_START
+
+UTF16UTF8::UTF16UTF8() {
+}
+
+
+char UTF16UTF8::processText(SWBuf &text, const SWKey *key, const SWModule *module)
+{
+ unsigned short *from;
+
+ int len;
+ unsigned long uchar;
+ unsigned short schar;
+ len = 0;
+ from = (unsigned short*) text.c_str();
+ while (*from) {
+ len += 2;
+ from++;
+ }
+
+ SWBuf orig = text;
+ from = (unsigned short*)orig.c_str();
+
+
+ // -------------------------------
+
+ for (text = ""; *from; from++) {
+ uchar = 0;
+
+ if (*from < 0xD800 || *from > 0xDFFF) {
+ uchar = *from;
+ }
+ else if (*from >= 0xD800 && *from <= 0xDBFF) {
+ uchar = *from;
+ schar = *(from+1);
+ if (uchar < 0xDC00 || uchar > 0xDFFF) {
+ //error, do nothing
+ continue;
+ }
+ uchar &= 0x03ff;
+ schar &= 0x03ff;
+ uchar <<= 10;
+ uchar |= schar;
+ uchar += 0x10000;
+ from++;
+ }
+ else {
+ //error, do nothing
+ continue;
+ }
+
+ if (uchar < 0x80) {
+ text += uchar;
+ }
+ else if (uchar < 0x800) {
+ text += 0xc0 | (uchar >> 6);
+ text += 0x80 | (uchar & 0x3f);
+ }
+ else if (uchar < 0x10000) {
+ text += 0xe0 | (uchar >> 12);
+ text += 0x80 | ((uchar >> 6) & 0x3f);
+ text += 0x80 | (uchar & 0x3f);
+ }
+ else if (uchar < 0x200000) {
+ text += 0xF0 | (uchar >> 18);
+ text += 0x80 | ((uchar >> 12) & 0x3F);
+ text += 0x80 | ((uchar >> 6) & 0x3F);
+ text += 0x80 | (uchar & 0x3F);
+ }
+ }
+
+ return 0;
+}
+
+
+
+
+SWORD_NAMESPACE_END
diff --git a/src/modules/filters/utf8arshaping.cpp b/src/modules/filters/utf8arshaping.cpp
new file mode 100644
index 0000000..702fb62
--- /dev/null
+++ b/src/modules/filters/utf8arshaping.cpp
@@ -0,0 +1,51 @@
+/******************************************************************************
+*
+* utf8arshaping - SWFilter descendant to perform Arabic shaping on
+* UTF-8 text
+*/
+
+#ifdef _ICU_
+
+#include <stdlib.h>
+
+#include <utilstr.h>
+
+#include <utf8arshaping.h>
+#include <swbuf.h>
+
+SWORD_NAMESPACE_START
+
+UTF8arShaping::UTF8arShaping() {
+ conv = ucnv_open("UTF-8", &err);
+}
+
+UTF8arShaping::~UTF8arShaping() {
+ ucnv_close(conv);
+}
+
+char UTF8arShaping::processText(SWBuf &text, const SWKey *key, const SWModule *module)
+{
+ UChar *ustr, *ustr2;
+ if ((unsigned long)key < 2) // hack, we're en(1)/de(0)ciphering
+ return -1;
+
+ int32_t len = text.length();
+ ustr = new UChar[len];
+ ustr2 = new UChar[len];
+
+ // Convert UTF-8 string to UTF-16 (UChars)
+ len = ucnv_toUChars(conv, ustr, len, text.c_str(), -1, &err);
+
+ len = u_shapeArabic(ustr, len, ustr2, len, U_SHAPE_LETTERS_SHAPE | U_SHAPE_DIGITS_EN2AN, &err);
+
+ text.setSize(text.size()*2);
+ len = ucnv_fromUChars(conv, text.getRawData(), text.size(), ustr2, len, &err);
+ text.setSize(len);
+
+ delete [] ustr2;
+ delete [] ustr;
+ return 0;
+}
+
+SWORD_NAMESPACE_END
+#endif
diff --git a/src/modules/filters/utf8bidireorder.cpp b/src/modules/filters/utf8bidireorder.cpp
new file mode 100644
index 0000000..783602c
--- /dev/null
+++ b/src/modules/filters/utf8bidireorder.cpp
@@ -0,0 +1,60 @@
+/******************************************************************************
+*
+* utf8cnormalizer - SWFilter descendant to perform reordering of UTF-8
+* text to visual order according to Unicode BiDi
+*/
+
+#ifdef _ICU_
+
+#include <stdlib.h>
+
+#include <utilstr.h>
+
+#include <utf8bidireorder.h>
+#include <swbuf.h>
+
+SWORD_NAMESPACE_START
+
+UTF8BiDiReorder::UTF8BiDiReorder() {
+
+ conv = ucnv_open("UTF-8", &err);
+
+}
+
+UTF8BiDiReorder::~UTF8BiDiReorder() {
+ ucnv_close(conv);
+}
+
+char UTF8BiDiReorder::processText(SWBuf &text, const SWKey *key, const SWModule *module)
+{
+ UChar *ustr, *ustr2;
+ if ((unsigned long)key < 2) // hack, we're en(1)/de(0)ciphering
+ return -1;
+
+ int32_t len = text.length();
+ ustr = new UChar[len]; //each char could become a surrogate pair
+
+ // Convert UTF-8 string to UTF-16 (UChars)
+ len = ucnv_toUChars(conv, ustr, len, text.c_str(), -1, &err);
+ ustr2 = new UChar[len];
+
+ UBiDi* bidi = ubidi_openSized(len + 1, 0, &err);
+ ubidi_setPara(bidi, ustr, len, UBIDI_DEFAULT_RTL, NULL, &err);
+ len = ubidi_writeReordered(bidi, ustr2, len,
+ UBIDI_DO_MIRRORING | UBIDI_REMOVE_BIDI_CONTROLS, &err);
+ ubidi_close(bidi);
+
+// len = ubidi_writeReverse(ustr, len, ustr2, len,
+// UBIDI_DO_MIRRORING | UBIDI_REMOVE_BIDI_CONTROLS, &err);
+
+ text.setSize(text.size()*2);
+ len = ucnv_fromUChars(conv, text.getRawData(), text.size(), ustr2, len, &err);
+ text.setSize(len);
+
+ delete [] ustr2;
+ delete [] ustr;
+ return 0;
+}
+
+SWORD_NAMESPACE_END
+#endif
diff --git a/src/modules/filters/utf8cantillation.cpp b/src/modules/filters/utf8cantillation.cpp
new file mode 100644
index 0000000..6213620
--- /dev/null
+++ b/src/modules/filters/utf8cantillation.cpp
@@ -0,0 +1,55 @@
+/******************************************************************************
+ *
+ * UTF8Cantillation - SWFilter descendant to remove UTF-8 Hebrew cantillation
+ *
+ */
+
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <utf8cantillation.h>
+
+SWORD_NAMESPACE_START
+
+const char oName[] = "Hebrew Cantillation";
+const char oTip[] = "Toggles Hebrew Cantillation Marks";
+
+const SWBuf choices[3] = {"On", "Off", ""};
+const StringList oValues(&choices[0], &choices[2]);
+
+UTF8Cantillation::UTF8Cantillation() : SWOptionFilter(oName, oTip, &oValues) {
+ setOptionValue("Off");
+}
+
+
+UTF8Cantillation::~UTF8Cantillation(){};
+
+
+char UTF8Cantillation::processText(SWBuf &text, const SWKey *key, const SWModule *module) {
+ if (!option) {
+ //The UTF-8 range 0xD6 0x90 to 0xD6 0xAF and 0xD7 0x84 consist of Hebrew cantillation marks so block those out.
+ SWBuf orig = text;
+ const unsigned char* from = (unsigned char*)orig.c_str();
+ for (text = ""; *from; from++) {
+ if (*from != 0xD6) {
+ if (*from == 0xD7 && *(from + 1) == 0x84) {
+ from++;
+ }
+ else {
+ text += *from;
+ }
+ }
+ else if (*(from + 1) < 0x90 || *(from + 1) > 0xAF) {
+ text += *from;
+ from++;
+ text += *from;
+ }
+ else {
+ from++;
+ }
+ }
+ }
+ return 0;
+}
+
+SWORD_NAMESPACE_END
diff --git a/src/modules/filters/utf8greekaccents.cpp b/src/modules/filters/utf8greekaccents.cpp
new file mode 100644
index 0000000..df85968
--- /dev/null
+++ b/src/modules/filters/utf8greekaccents.cpp
@@ -0,0 +1,261 @@
+/******************************************************************************
+ *
+ * UTF8GreekAccents - SWFilter descendant to remove UTF-8 Greek accents
+ *
+ */
+
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <utf8greekaccents.h>
+
+
+#ifdef _ICU_
+#include <utf8nfkd.h>
+sword::UTF8NFKD decompose;
+#endif
+
+SWORD_NAMESPACE_START
+
+const char oName[] = "Greek Accents";
+const char oTip[] = "Toggles Greek Accents";
+
+const SWBuf choices[3] = {"On", "Off", ""};
+const StringList oValues(&choices[0], &choices[2]);
+
+UTF8GreekAccents::UTF8GreekAccents() : SWOptionFilter(oName, oTip, &oValues) {
+ setOptionValue("On");
+}
+
+UTF8GreekAccents::~UTF8GreekAccents(){};
+
+
+char UTF8GreekAccents::processText(SWBuf &text, const SWKey *key, const SWModule *module) {
+
+ if (!option) { //we don't want greek accents
+ //unsigned char *to, *from;
+ //to = (unsigned char*)text;
+ //for (from = (unsigned char*)text; *from; from++) {
+#ifdef _ICU_
+ decompose.processText(text, (SWKey *)2); // note the hack of 2 to mimic a real key. TODO: remove all hacks
+#endif
+
+ SWBuf orig = text;
+ const unsigned char* from = (unsigned char*)orig.c_str();
+ for (text = ""; *from; from++) {
+ //first just remove combining characters
+ if (*from == 0xE2 && *(from + 1) == 0x80 && *(from + 2) == 0x99) {
+ from += 2;
+ }
+ else if (*from == 0xCC && *(from + 1)) {
+ if (*(from + 1) == 0x80 || *(from + 1) == 0x81 || *(from + 1) == 0x82 || *(from + 1) == 0x88 || *(from + 1) == 0x93 || *(from + 1) == 0x94) {
+ from++;
+ }
+ }
+ else if (*from == 0xCD && (*(from + 1) == 0xBA || *(from + 1) == 0x82)) {
+ from++;
+ }
+ //now converted pre-composed characters to their alphabetic bases, discarding the accents
+
+ //Greek
+ //capital alpha
+ else if ((*from == 0xCE && *(from + 1) == 0x86)) {
+ text += 0xCE;
+ text += 0x91;
+ from++;
+ }
+ //capital epsilon
+ else if ((*from == 0xCE && *(from + 1) == 0x88)) {
+ text += 0xCE;
+ text += 0x95;
+ from++;
+ }
+ //capital eta
+ else if ((*from == 0xCE && *(from + 1) == 0x89)) {
+ text += 0xCE;
+ text += 0x97;
+ from++;
+ }
+ //capital iota
+ else if ((*from == 0xCE && (*(from + 1) == 0x8A || *(from + 1) == 0xAA))) {
+ text += 0xCE;
+ text += 0x99;
+ from++;
+ }
+ //capital omicron
+ else if ((*from == 0xCE && *(from + 1) == 0x8C)) {
+ text += 0xCE;
+ text += 0x9F;
+ from++;
+ }
+ //capital upsilon
+ else if ((*from == 0xCE && (*(from + 1) == 0x8E || *(from + 1) == 0xAB))) {
+ text += 0xCE;
+ text += 0xA5;
+ from++;
+ }
+ //capital omega
+ else if ((*from == 0xCE && *(from + 1) == 0x8F)) {
+ text += 0xCE;
+ text += 0xA9;
+ from++;
+ }
+
+ //alpha
+ else if ((*from == 0xCE && *(from + 1) == 0xAC)) {
+ text += 0xCE;
+ text += 0xB1;
+ from++;
+ }
+ //epsilon
+ else if ((*from == 0xCE && *(from + 1) == 0xAD)) {
+ text += 0xCE;
+ text += 0xB5;
+ from++;
+ }
+ //eta
+ else if ((*from == 0xCE && *(from + 1) == 0xAE)) {
+ text += 0xCE;
+ text += 0xB7;
+ from++;
+ }
+ //iota
+ else if ((*from == 0xCE && *(from + 1) == 0xAF) || (*from == 0xCF && *(from + 1) == 0x8A)) {
+ text += 0xCE;
+ text += 0xB9;
+ from++;
+ }
+ //omicron
+ else if ((*from == 0xCF && *(from + 1) == 0x8C)) {
+ text += 0xCE;
+ text += 0xBF;
+ from++;
+ }
+ //upsilon
+ else if ((*from == 0xCE && *(from + 1) == 0x88) || (*from == 0xCF && (*(from + 1) == 0x8B || *(from + 1) == 0x8D))) {
+ text += 0xCF;
+ text += 0x85;
+ from++;
+ }
+ //omega
+ else if ((*from == 0xCF && *(from + 1) == 0x8E)) {
+ text += 0xCF;
+ text += 0x89;
+ from++;
+ }
+
+ //Extended Greek
+ //capital alpha
+ else if (*from == 0xE1 && (((*(from + 1) == 0xBC || *(from + 1) == 0xBE) && *(from + 2) >= 0x88 && *(from + 2) <= 0x8F) || (*(from + 1) == 0xBE && *(from + 2) >= 0xB8 && *(from + 2) <= 0xBC))) {
+ text += 0xCE;
+ text += 0x91;
+ from+=2;
+ }
+ //capital epsilon
+ else if (*from == 0xE1 && ((*(from + 1) == 0xBC && *(from + 2) >= 0x98 && *(from + 2) <= 0x9D) || (*(from + 1) == 0xBF && (*(from + 2) == 0x88 || *(from + 2) == 0x89)))) {
+ text += 0xCE;
+ text += 0x95;
+ from+=2;
+ }
+ //capital eta
+ else if (*from == 0xE1 && ((*(from + 1) == 0xBC && *(from + 2) >= 0xA8 && *(from + 2) <= 0xAF) || (*(from + 1) == 0xBE && *(from + 2) >= 0x98 && *(from + 2) <= 0x9F) || (*(from + 1) == 0xBF && *(from + 2) >= 0x8A && *(from + 2) <= 0x8C))) {
+ text += 0xCE;
+ text += 0x97;
+ from+=2;
+ }
+ //capital iota
+ else if (*from == 0xE1 && ((*(from + 1) == 0xBC && *(from + 2) >= 0xB8 && *(from + 2) <= 0xBF) || (*(from + 1) == 0xBF && *(from + 2) >= 0x98 && *(from + 2) <= 0x9B))) {
+ text += 0xCE;
+ text += 0x99;
+ from+=2;
+ }
+ //capital omicron
+ else if (*from == 0xE1 && (((*(from + 1) == 0xBD && *(from + 2) >= 0x88 && *(from + 2) <= 0x8D)) || ((*(from + 1) == 0xBF && (*(from + 2) == 0xB8 || *(from + 2) == 0xB9))))) {
+ text += 0xCE;
+ text += 0x9F;
+ from+=2;
+ }
+ //capital upsilon
+ else if (*from == 0xE1 && ((*(from + 1) == 0xBD && *(from + 2) >= 0x99 && *(from + 2) <= 0x9F) || (*(from + 1) == 0xBF && *(from + 2) >= 0xA8 && *(from + 2) <= 0xAB))) {
+ text += 0xCE;
+ text += 0xA5;
+ from+=2;
+ }
+ //capital omega
+ else if (*from == 0xE1 && (((*(from + 1) == 0xBD || *(from + 1) == 0xBE) && *(from + 2) >= 0xA8 && *(from + 2) <= 0xAF) || (*(from + 1) == 0xBF && *(from + 2) >= 0xBA && *(from + 2) <= 0xBC))) {
+ text += 0xCE;
+ text += 0xA9;
+ from+=2;
+ }
+ //capital rho
+ else if (*from == 0xE1 && *(from + 1) == 0xBF && *(from + 2) == 0xAC) {
+ text += 0xCE;
+ text += 0xA1;
+ from+=2;
+ }
+
+ //alpha
+ else if (*from == 0xE1 && (
+ ((*(from + 1) == 0xBC || *(from + 1) == 0xBE) && *(from + 2) >= 0x80 && *(from + 2) <= 0x87)
+ || (*(from + 1) == 0xBD && (*(from + 2) == 0xB0 || *(from + 2) == 0xB1))
+ || (*(from + 1) == 0xBE && *(from + 2) >= 0xB0 && *(from + 2) <= 0xB7))) {
+ text += 0xCE;
+ text += 0xB1;
+ from+=2;
+ }
+ //epsilon
+ else if (*from == 0xE1 && ((*(from + 1) == 0xBC && *(from + 2) >= 0x90 && *(from + 2) <= 0x95) || (*(from + 1) == 0xBD && (*(from + 2) == 0xB2 || *(from + 2) == 0xB3)))) {
+ text += 0xCE;
+ text += 0xB5;
+ from+=2;
+ }
+ //eta
+ else if (*from == 0xE1 && ((*(from + 1) == 0xBE && *(from + 2) >= 0x90 && *(from + 2) <= 0x97) || (*(from + 1) == 0xBC && *(from + 2) >= 0xA0 && *(from + 2) <= 0xA7) || (*(from + 1) == 0xBF && *(from + 2) >= 0x82 && *(from + 2) <= 0x87) || (*(from + 1) == 0xBD && (*(from + 2) == 0xB4 || *(from + 2) == 0xB5)))) {
+ text += 0xCE;
+ text += 0xB7;
+ from+=2;
+ }
+ //iota
+ else if (*from == 0xE1 && ((*(from + 1) == 0xBC && *(from + 2) >= 0xB0 && *(from + 2) <= 0xB7) || (*(from + 1) == 0xBD && (*(from + 2) == 0xB6 || *(from + 2) == 0xB7)) || (*(from + 1) == 0xBF && *(from + 2) >= 0x90 && *(from + 2) <= 0x97))) {
+ text += 0xCE;
+ text += 0xB9;
+ from+=2;
+ }
+ //omicron
+ else if (*from == 0xE1 && (*(from + 1) == 0xBD && ((*(from + 2) >= 0x80 && *(from + 2) <= 0x85) || (*(from + 2) == 0xB8 || *(from + 2) == 0xB9)))) {
+ text += 0xCE;
+ text += 0xBF;
+ from+=2;
+ }
+ //upsilon
+ else if (*from == 0xE1 && ((*(from + 1) == 0xBD && ((*(from + 2) >= 0x90 && *(from + 2) <= 0x97) || *(from + 2) == 0xBA || *(from + 2) == 0xBB)) || (*(from + 1) == 0xBF && ((*(from + 2) >= 0xA0 && *(from + 2) <= 0xA3) || *(from + 2) == 0xA6 || *(from + 2) == 0xA7)))) {
+ text += 0xCF;
+ text += 0x85;
+ from+=2;
+ }
+ //omega
+ else if (*from == 0xE1 && ((*(from + 1) == 0xBD && ((*(from + 2) >= 0xA0 && *(from + 2) <= 0xA7) || (*(from + 2) == 0xBC || *(from + 2) == 0xBD))) || (*(from + 1) == 0xBE && (*(from + 2) >= 0xA0 && *(from + 2) <= 0xA7)) || (*(from + 1) == 0xBF && *(from + 2) >= 0xB2 && *(from + 2) <= 0xB7))) {
+ text += 0xCF;
+ text += 0x89;
+ from+=2;
+ }
+ //rho
+ else if (*from == 0xE1 && *(from + 1) == 0xBF && (*(from + 2) == 0xA4 && *(from + 2) == 0xA5)) {
+ text += 0xCF;
+ text += 0x81;
+ from+=2;
+ }
+ else { //no characters we filter
+ text += *from;
+ }
+ }
+ }
+ return 0;
+}
+
+
+
+
+
+
+SWORD_NAMESPACE_END
diff --git a/src/modules/filters/utf8hebrewpoints.cpp b/src/modules/filters/utf8hebrewpoints.cpp
new file mode 100644
index 0000000..0476db8
--- /dev/null
+++ b/src/modules/filters/utf8hebrewpoints.cpp
@@ -0,0 +1,44 @@
+/******************************************************************************
+ *
+ * UTF8HebrewPoints - SWFilter descendant to remove UTF-8 Hebrew vowel points
+ *
+ */
+
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <utf8hebrewpoints.h>
+
+SWORD_NAMESPACE_START
+
+const char oName[] = "Hebrew Vowel Points";
+const char oTip[] = "Toggles Hebrew Vowel Points";
+
+const SWBuf choices[3] = {"On", "Off", ""};
+const StringList oValues(&choices[0], &choices[2]);
+
+UTF8HebrewPoints::UTF8HebrewPoints() : SWOptionFilter(oName, oTip, &oValues) {
+ setOptionValue("On");
+}
+
+UTF8HebrewPoints::~UTF8HebrewPoints(){};
+
+
+char UTF8HebrewPoints::processText(SWBuf &text, const SWKey *key, const SWModule *module) {
+ if (!option) {
+ //The UTF-8 range 0xD6 0xB0 to 0xD6 0xBF excluding 0xD6 0x consist of Hebrew cantillation marks so block those out.
+ SWBuf orig = text;
+ const unsigned char* from = (unsigned char*)orig.c_str();
+ for (text = ""; *from; from++) {
+ if ((*from == 0xD6) && (*(from + 1) >= 0xB0 && *(from + 1) <= 0xBF) && (*(from + 1) != 0xBE)) {
+ from++;
+ }
+ else {
+ text += *from;
+ }
+ }
+ }
+ return 0;
+}
+
+SWORD_NAMESPACE_END
diff --git a/src/modules/filters/utf8html.cpp b/src/modules/filters/utf8html.cpp
new file mode 100644
index 0000000..088f669
--- /dev/null
+++ b/src/modules/filters/utf8html.cpp
@@ -0,0 +1,70 @@
+/******************************************************************************
+ *
+ * utf8html - SWFilter descendant to convert a UTF-8 stream to HTML escapes
+ *
+ */
+
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <utf8html.h>
+#include <swbuf.h>
+
+SWORD_NAMESPACE_START
+
+UTF8HTML::UTF8HTML() {
+}
+
+
+char UTF8HTML::processText(SWBuf &text, const SWKey *key, const SWModule *module)
+{
+ unsigned char *from;
+ int len;
+ char digit[10];
+ unsigned long ch;
+ if ((unsigned long)key < 2) // hack, we're en(1)/de(0)ciphering
+ return (char)-1;
+
+ len = strlen(text.c_str()) + 2; // shift string to right of buffer
+
+ SWBuf orig = text;
+ from = (unsigned char *)orig.c_str();
+
+ // -------------------------------
+ for (text = ""; *from; from++) {
+ ch = 0;
+ if ((*from & 128) != 128) {
+// if (*from != ' ')
+ text += *from;
+ continue;
+ }
+ if ((*from & 128) && ((*from & 64) != 64)) {
+ // error
+ *from = 'x';
+ continue;
+ }
+ *from <<= 1;
+ int subsequent;
+ for (subsequent = 1; (*from & 128); subsequent++) {
+ *from <<= 1;
+ from[subsequent] &= 63;
+ ch <<= 6;
+ ch |= from[subsequent];
+ }
+ subsequent--;
+ *from <<=1;
+ char significantFirstBits = 8 - (2+subsequent);
+
+ ch |= (((short)*from) << (((6*subsequent)+significantFirstBits)-8));
+ from += subsequent;
+ text += '&';
+ text += '#';
+ sprintf(digit, "%ld", ch);
+ for (char *dig = digit; *dig; dig++)
+ text += *dig;
+ text += ';';
+ }
+ return 0;
+}
+
+SWORD_NAMESPACE_END
diff --git a/src/modules/filters/utf8latin1.cpp b/src/modules/filters/utf8latin1.cpp
new file mode 100644
index 0000000..08b288d
--- /dev/null
+++ b/src/modules/filters/utf8latin1.cpp
@@ -0,0 +1,75 @@
+/******************************************************************************
+ *
+ * UTF8Latin1 - SWFilter descendant to convert UTF-8 to Latin-1
+ *
+ */
+
+#include <stdlib.h>
+#include <stdio.h>
+
+#include <utf8latin1.h>
+#include <swbuf.h>
+
+SWORD_NAMESPACE_START
+
+UTF8Latin1::UTF8Latin1(char rchar) : replacementChar(rchar) {
+}
+
+
+char UTF8Latin1::processText(SWBuf &text, const SWKey *key, const SWModule *module)
+{
+ unsigned char *from;
+
+ int len;
+ unsigned long uchar;
+ unsigned char significantFirstBits, subsequent;
+
+ if ((unsigned long)key < 2) {// hack, we're en(1)/de(0)ciphering
+ return (char)-1;
+ }
+ len = strlen(text.c_str()) + 1; // shift string to right of buffer
+
+ SWBuf orig = text;
+ from = (unsigned char*)orig.c_str();
+
+
+ // -------------------------------
+
+ for (text = ""; *from; from++) {
+ uchar = 0;
+ if ((*from & 128) != 128) {
+ // if (*from != ' ')
+ uchar = *from;
+ }
+ else if ((*from & 128) && ((*from & 64) != 64)) {
+ // error, do nothing
+ continue;
+ }
+ else {
+ *from <<= 1;
+ for (subsequent = 1; (*from & 128); subsequent++) {
+ *from <<= 1;
+ from[subsequent] &= 63;
+ uchar <<= 6;
+ uchar |= from[subsequent];
+ }
+ subsequent--;
+ *from <<=1;
+ significantFirstBits = 8 - (2+subsequent);
+
+ uchar |= (((short)*from) << (((6*subsequent)+significantFirstBits)-8));
+ from += subsequent;
+ }
+
+ if (uchar < 0xff) {
+ text += (unsigned char)uchar;
+ }
+ else {
+ text += replacementChar;
+ }
+ }
+ return 0;
+}
+
+SWORD_NAMESPACE_END
+
diff --git a/src/modules/filters/utf8nfc.cpp b/src/modules/filters/utf8nfc.cpp
new file mode 100644
index 0000000..15b76b5
--- /dev/null
+++ b/src/modules/filters/utf8nfc.cpp
@@ -0,0 +1,50 @@
+/******************************************************************************
+*
+* utf8nfc - SWFilter descendant to perform NFC (canonical composition
+* normalization) on UTF-8 text
+*/
+
+#ifdef _ICU_
+
+#include <stdlib.h>
+
+#include <utilstr.h>
+#include <unicode/unistr.h>
+#include <unicode/normlzr.h>
+#include <unicode/unorm.h>
+
+#include <utf8nfc.h>
+#include <swbuf.h>
+
+SWORD_NAMESPACE_START
+
+UTF8NFC::UTF8NFC() {
+ conv = ucnv_open("UTF-8", &err);
+}
+
+UTF8NFC::~UTF8NFC() {
+ ucnv_close(conv);
+}
+
+char UTF8NFC::processText(SWBuf &text, const SWKey *key, const SWModule *module)
+{
+ if ((unsigned long)key < 2) // hack, we're en(1)/de(0)ciphering
+ return -1;
+
+ UErrorCode status = U_ZERO_ERROR;
+ UnicodeString source(text.getRawData(), text.length(), conv, status);
+ UnicodeString target;
+
+ status = U_ZERO_ERROR;
+ Normalizer::normalize(source, UNORM_NFC, 0, target, status);
+
+ status = U_ZERO_ERROR;
+ text.setSize(text.size()*2); // potentially, it can grow to 2x the original size
+ int32_t len = target.extract(text.getRawData(), text.size(), conv, status);
+ text.setSize(len);
+
+ return 0;
+}
+
+SWORD_NAMESPACE_END
+#endif
diff --git a/src/modules/filters/utf8nfkd.cpp b/src/modules/filters/utf8nfkd.cpp
new file mode 100644
index 0000000..a19d36b
--- /dev/null
+++ b/src/modules/filters/utf8nfkd.cpp
@@ -0,0 +1,52 @@
+/******************************************************************************
+*
+* utf8nfkd - SWFilter descendant to perform NFKD (compatability decomposition
+* normalization) on UTF-8 text
+*/
+
+#ifdef _ICU_
+
+#include <stdlib.h>
+
+#include <utilstr.h>
+
+#include <utf8nfkd.h>
+#include <swbuf.h>
+
+SWORD_NAMESPACE_START
+
+UTF8NFKD::UTF8NFKD() {
+ conv = ucnv_open("UTF-8", &err);
+}
+
+UTF8NFKD::~UTF8NFKD() {
+ ucnv_close(conv);
+}
+
+char UTF8NFKD::processText(SWBuf &text, const SWKey *key, const SWModule *module)
+{
+ if ((unsigned long)key < 2) // hack, we're en(1)/de(0)ciphering
+ return -1;
+
+ int32_t len = 5 + text.length() * 5;
+ source = new UChar[len + 1]; //each char could become a surrogate pair
+
+ // Convert UTF-8 string to UTF-16 (UChars)
+ int32_t ulen = ucnv_toUChars(conv, source, len, text.c_str(), -1, &err);
+ target = new UChar[len + 1];
+
+ //compatability decomposition
+ ulen = unorm_normalize(source, ulen, UNORM_NFKD, 0, target, len, &err);
+
+ text.setSize(len);
+ len = ucnv_fromUChars(conv, text.getRawData(), len, target, ulen, &err);
+ text.setSize(len);
+
+ delete [] source;
+ delete [] target;
+
+ return 0;
+}
+
+SWORD_NAMESPACE_END
+#endif
diff --git a/src/modules/filters/utf8transliterator.cpp b/src/modules/filters/utf8transliterator.cpp
new file mode 100644
index 0000000..d99741b
--- /dev/null
+++ b/src/modules/filters/utf8transliterator.cpp
@@ -0,0 +1,888 @@
+/******************************************************************************
+*
+* utf8transliterators - SWFilter descendant to transliterate between
+* ICU-supported scripts.
+*/
+
+#ifdef _ICU_
+
+#include <stdlib.h>
+
+#include <utilstr.h>
+
+#include <unicode/ucnv.h>
+#include <unicode/uchar.h>
+#include <utf8transliterator.h>
+#include <swmodule.h>
+
+#ifndef _ICUSWORD_
+#include "unicode/resbund.h"
+#endif
+#include <swlog.h>
+
+SWORD_NAMESPACE_START
+
+const char UTF8Transliterator::optionstring[NUMTARGETSCRIPTS][16] = {
+ "Off",
+ "Latin",
+ "IPA",
+ "Basic Latin",
+ "SBL",
+ "TC",
+ "Beta",
+ "BGreek",
+ "SERA",
+ "Hugoye",
+ "UNGEGN",
+ "ISO",
+ "ALA-LC",
+ "BGN-PCGN",
+ "Greek",
+ "Hebrew",
+ "Cyrillic",
+ "Arabic",
+ "Syriac",
+ "Katakana",
+ "Hiragana",
+ "Hangul",
+ "Devanagari",
+ "Tamil",
+ "Bengali",
+ "Gurmukhi",
+ "Gujarati",
+ "Oriya",
+ "Telugu",
+ "Kannada",
+ "Malayalam",
+ "Thai",
+ "Georgian",
+ "Armenian",
+ "Ethiopic",
+ "Gothic",
+ "Ugaritic",
+ "Coptic",
+ "Meroitic",
+ "Linear B",
+ "Cypriot",
+ "Runic",
+ "Ogham",
+ "Thaana",
+ "Glagolitic",
+// "Tengwar",
+// "Cirth"
+};
+
+const char UTF8Transliterator::optName[] = "Transliteration";
+const char UTF8Transliterator::optTip[] = "Transliterates between scripts";
+
+SWTransMap UTF8Transliterator::transMap;
+
+#ifndef _ICUSWORD_
+
+const char UTF8Transliterator::SW_RB_RULE_BASED_IDS[] = "RuleBasedTransliteratorIDs";
+const char UTF8Transliterator::SW_RB_RULE[] = "Rule";
+#ifdef SWICU_DATA
+const char UTF8Transliterator::SW_RESDATA[] = SWICU_DATA;
+#else
+const char UTF8Transliterator::SW_RESDATA[] = "/usr/local/lib/sword/";
+#endif
+
+class SWCharString {
+ public:
+ inline SWCharString(const UnicodeString& str);
+ inline ~SWCharString();
+ inline operator const char*() { return ptr; }
+ private:
+ char buf[128];
+ char* ptr;
+};
+SWCharString::SWCharString(const UnicodeString& str) {
+ // TODO This isn't quite right -- we should probably do
+ // preflighting here to determine the real length.
+ if (str.length() >= (int32_t)sizeof(buf)) {
+ ptr = new char[str.length() + 8];
+ } else {
+ ptr = buf;
+ }
+ str.extract(0, 0x7FFFFFFF, ptr, "");
+}
+
+SWCharString::~SWCharString() {
+ if (ptr != buf) {
+ delete[] ptr;
+ }
+}
+
+#endif // _ICUSWORD_
+
+
+UTF8Transliterator::UTF8Transliterator() {
+ option = 0;
+ unsigned long i;
+ for (i = 0; i < NUMTARGETSCRIPTS; i++) {
+ options.push_back(optionstring[i]);
+ }
+#ifndef _ICUSWORD_
+ utf8status = U_ZERO_ERROR;
+ Load(utf8status);
+#endif
+}
+
+void UTF8Transliterator::Load(UErrorCode &status)
+{
+#ifndef _ICUSWORD_
+ static const char translit_swordindex[] = "translit_swordindex";
+
+ UResourceBundle *bundle = 0, *transIDs = 0, *colBund = 0;
+ bundle = ures_openDirect(SW_RESDATA, translit_swordindex, &status);
+ if (U_FAILURE(status)) {
+ SWLog::getSystemLog()->logError("UTF8Transliterator: ICU: no resource index to load");
+ SWLog::getSystemLog()->logError("UTF8Transliterator: ICU: status %s", u_errorName(status));
+ return;
+ }
+
+ transIDs = ures_getByKey(bundle, SW_RB_RULE_BASED_IDS, 0, &status);
+ //UParseError parseError;
+
+ int32_t row, maxRows;
+ if (U_SUCCESS(status)) {
+ maxRows = ures_getSize(transIDs);
+ for (row = 0; row < maxRows; row++) {
+ colBund = ures_getByIndex(transIDs, row, 0, &status);
+
+ if (U_SUCCESS(status) && ures_getSize(colBund) == 4) {
+ UnicodeString id = ures_getUnicodeStringByIndex(colBund, 0, &status);
+ UChar type = ures_getUnicodeStringByIndex(colBund, 1, &status).charAt(0);
+ UnicodeString resString = ures_getUnicodeStringByIndex(colBund, 2, &status);
+ SWLog::getSystemLog()->logDebug("ok so far");
+
+ if (U_SUCCESS(status)) {
+ switch (type) {
+ case 0x66: // 'f'
+ case 0x69: // 'i'
+ // 'file' or 'internal';
+ // row[2]=resource, row[3]=direction
+ {
+ //UBool visible = (type == 0x0066 /*f*/);
+ UTransDirection dir =
+ (ures_getUnicodeStringByIndex(colBund, 3, &status).charAt(0) ==
+ 0x0046 /*F*/) ?
+ UTRANS_FORWARD : UTRANS_REVERSE;
+ //registry->put(id, resString, dir, visible);
+ SWLog::getSystemLog()->logDebug("instantiating %s ...", resString.getBuffer());
+ registerTrans(id, resString, dir, status);
+ SWLog::getSystemLog()->logDebug("done.");
+ }
+ break;
+ case 0x61: // 'a'
+ // 'alias'; row[2]=createInstance argument
+ //registry->put(id, resString, TRUE);
+ break;
+ }
+ }
+ else SWLog::getSystemLog()->logError("UTF8Transliterator: ICU: Failed to get resString");
+ }
+ else SWLog::getSystemLog()->logError("UTF8Transliterator: ICU: Failed to get row");
+ ures_close(colBund);
+ }
+ }
+ else
+ {
+ SWLog::getSystemLog()->logError("UTF8Transliterator: ICU: no resource index to load");
+ SWLog::getSystemLog()->logError("UTF8Transliterator: ICU: status %s", u_errorName(status));
+ }
+
+ ures_close(transIDs);
+ ures_close(bundle);
+
+#endif // _ICUSWORD_
+}
+
+void UTF8Transliterator::registerTrans(const UnicodeString& ID, const UnicodeString& resource,
+ UTransDirection dir, UErrorCode &status )
+{
+#ifndef _ICUSWORD_
+ SWLog::getSystemLog()->logDebug("registering ID locally %s", ID.getBuffer());
+ SWTransData swstuff;
+ swstuff.resource = resource;
+ swstuff.dir = dir;
+ SWTransPair swpair;
+ swpair.first = ID;
+ swpair.second = swstuff;
+ transMap.insert(swpair);
+#endif
+}
+
+bool UTF8Transliterator::checkTrans(const UnicodeString& ID, UErrorCode &status )
+{
+#ifndef _ICUSWORD_
+ Transliterator *trans = Transliterator::createInstance(ID, UTRANS_FORWARD, status);
+ if (!U_FAILURE(status))
+ {
+ // already have it, clean up and return true
+ SWLog::getSystemLog()->logDebug("already have it %s", ID.getBuffer());
+ delete trans;
+ return true;
+ }
+ status = U_ZERO_ERROR;
+
+ SWTransMap::iterator swelement;
+ if ((swelement = transMap.find(ID)) != transMap.end())
+ {
+ SWLog::getSystemLog()->logDebug("found element in map");
+ SWTransData swstuff = (*swelement).second;
+ UParseError parseError;
+ //UErrorCode status;
+ //std::cout << "unregistering " << ID << std::endl;
+ //Transliterator::unregister(ID);
+ SWLog::getSystemLog()->logDebug("resource is %s", swstuff.resource.getBuffer());
+
+ // Get the rules
+ //std::cout << "importing: " << ID << ", " << resource << std::endl;
+ SWCharString ch(swstuff.resource);
+ UResourceBundle *bundle = ures_openDirect(SW_RESDATA, ch, &status);
+ const UnicodeString rules = ures_getUnicodeStringByKey(bundle, SW_RB_RULE, &status);
+ ures_close(bundle);
+ //parser.parse(rules, isReverse ? UTRANS_REVERSE : UTRANS_FORWARD,
+ // parseError, status);
+ if (U_FAILURE(status)) {
+ SWLog::getSystemLog()->logError("UTF8Transliterator: ICU: Failed to get rules");
+ SWLog::getSystemLog()->logError("UTF8Transliterator: ICU: status %s", u_errorName(status));
+ return false;
+ }
+
+
+ Transliterator *trans = Transliterator::createFromRules(ID, rules, swstuff.dir,
+ parseError,status);
+ if (U_FAILURE(status)) {
+ SWLog::getSystemLog()->logError("UTF8Transliterator: ICU: Failed to create transliterator");
+ SWLog::getSystemLog()->logError("UTF8Transliterator: ICU: status %s", u_errorName(status));
+ SWLog::getSystemLog()->logError("UTF8Transliterator: ICU: Parse error: line %s", parseError.line);
+ SWLog::getSystemLog()->logError("UTF8Transliterator: ICU: Parse error: offset %d", parseError.offset);
+ SWLog::getSystemLog()->logError("UTF8Transliterator: ICU: Parse error: preContext %s", *parseError.preContext);
+ SWLog::getSystemLog()->logError("UTF8Transliterator: ICU: Parse error: postContext %s", *parseError.postContext);
+ SWLog::getSystemLog()->logError("UTF8Transliterator: ICU: rules were");
+// SWLog::getSystemLog()->logError((const char *)rules);
+ return false;
+ }
+
+ Transliterator::registerInstance(trans);
+ return true;
+
+ //Transliterator *trans = instantiateTrans(ID, swstuff.resource, swstuff.dir, parseError, status);
+ //return trans;
+ }
+ else
+ {
+ return false;
+ }
+#else
+return true;
+#endif // _ICUSWORD_
+}
+
+bool UTF8Transliterator::addTrans(const char* newTrans, SWBuf* transList) {
+#ifdef _ICUSWORD_
+ UErrorCode status;
+ if (checkTrans(UnicodeString(newTrans), status)) {
+#endif
+ *transList += newTrans;
+ *transList += ";";
+ return true;
+#ifdef _ICUSWORD_
+ }
+ else {
+ return false;
+ }
+#endif
+}
+
+Transliterator * UTF8Transliterator::createTrans(const UnicodeString& ID, UTransDirection dir, UErrorCode &status )
+{
+ Transliterator *trans = Transliterator::createInstance(ID,UTRANS_FORWARD,status);
+ if (U_FAILURE(status)) {
+ delete trans;
+ return NULL;
+ }
+ else {
+ return trans;
+ }
+}
+
+void UTF8Transliterator::setOptionValue(const char *ival)
+{
+ unsigned char i = option = NUMTARGETSCRIPTS;
+ while (i && stricmp(ival, optionstring[i])) {
+ i--;
+ option = i;
+ }
+}
+
+const char *UTF8Transliterator::getOptionValue()
+{
+ return (NUMTARGETSCRIPTS > option) ? optionstring[option] : 0;
+}
+
+char UTF8Transliterator::processText(SWBuf &text, const SWKey *key, const SWModule *module)
+{
+ if (option) { // if we want transliteration
+ unsigned long i, j;
+ UErrorCode err = U_ZERO_ERROR;
+ UConverter * conv = NULL;
+ conv = ucnv_open("UTF-8", &err);
+ SWBuf ID;
+
+ bool compat = false;
+
+ // Convert UTF-8 string to UTF-16 (UChars)
+ j = strlen(text);
+ int32_t len = (j * 2) + 1;
+ UChar *source = new UChar[len];
+ err = U_ZERO_ERROR;
+ len = ucnv_toUChars(conv, source, len, text, j, &err);
+ source[len] = 0;
+
+ // Figure out which scripts are used in the string
+ unsigned char scripts[NUMSCRIPTS];
+
+ for (i = 0; i < NUMSCRIPTS; i++) {
+ scripts[i] = false;
+ }
+
+ for (i = 0; i < (unsigned long)len; i++) {
+ j = ublock_getCode(source[i]);
+ scripts[SE_LATIN] = true;
+ switch (j) {
+ //case UBLOCK_BASIC_LATIN: scripts[SE_LATIN] = true; break;
+ case UBLOCK_GREEK: scripts[SE_GREEK] = true; break;
+ case UBLOCK_HEBREW: scripts[SE_HEBREW] = true; break;
+ case UBLOCK_CYRILLIC: scripts[SE_CYRILLIC] = true; break;
+ case UBLOCK_ARABIC: scripts[SE_ARABIC] = true; break;
+ case UBLOCK_SYRIAC: scripts[SE_SYRIAC] = true; break;
+ case UBLOCK_KATAKANA: scripts[SE_KATAKANA] = true; break;
+ case UBLOCK_HIRAGANA: scripts[SE_HIRAGANA] = true; break;
+ case UBLOCK_HANGUL_SYLLABLES: scripts[SE_HANGUL] = true; break;
+ case UBLOCK_HANGUL_JAMO: scripts[SE_JAMO] = true; break;
+ case UBLOCK_DEVANAGARI: scripts[SE_DEVANAGARI] = true; break;
+ case UBLOCK_TAMIL: scripts[SE_TAMIL] = true; break;
+ case UBLOCK_BENGALI: scripts[SE_BENGALI] = true; break;
+ case UBLOCK_GURMUKHI: scripts[SE_GURMUKHI] = true; break;
+ case UBLOCK_GUJARATI: scripts[SE_GUJARATI] = true; break;
+ case UBLOCK_ORIYA: scripts[SE_ORIYA] = true; break;
+ case UBLOCK_TELUGU: scripts[SE_TELUGU] = true; break;
+ case UBLOCK_KANNADA: scripts[SE_KANNADA] = true; break;
+ case UBLOCK_MALAYALAM: scripts[SE_MALAYALAM] = true; break;
+ case UBLOCK_THAI: scripts[SE_THAI] = true; break;
+ case UBLOCK_GEORGIAN: scripts[SE_GEORGIAN] = true; break;
+ case UBLOCK_ARMENIAN: scripts[SE_ARMENIAN] = true; break;
+ case UBLOCK_ETHIOPIC: scripts[SE_ETHIOPIC] = true; break;
+ case UBLOCK_GOTHIC: scripts[SE_GOTHIC] = true; break;
+ case UBLOCK_UGARITIC: scripts[SE_UGARITIC] = true; break;
+// case UBLOCK_MEROITIC: scripts[SE_MEROITIC] = true; break;
+// case UBLOCK_LINEARB: scripts[SE_LINEARB] = true; break;
+// case UBLOCK_CYPRIOT: scripts[SE_CYPRIOT] = true; break;
+ case UBLOCK_RUNIC: scripts[SE_RUNIC] = true; break;
+ case UBLOCK_OGHAM: scripts[SE_OGHAM] = true; break;
+ case UBLOCK_THAANA: scripts[SE_THAANA] = true; break;
+// case UBLOCK_GLAGOLITIC: scripts[SE_GLAGOLITIC] = true; break;
+// case UBLOCK_TENGWAR: scripts[SE_TENGWAR] = true; break;
+// case UBLOCK_CIRTH: scripts[SE_CIRTH] = true; break;
+ case UBLOCK_CJK_RADICALS_SUPPLEMENT:
+ case UBLOCK_KANGXI_RADICALS:
+ case UBLOCK_IDEOGRAPHIC_DESCRIPTION_CHARACTERS:
+ case UBLOCK_CJK_SYMBOLS_AND_PUNCTUATION:
+ case UBLOCK_CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A:
+ case UBLOCK_CJK_UNIFIED_IDEOGRAPHS:
+ scripts[SE_HAN] = true;
+ break;
+ case UBLOCK_CJK_COMPATIBILITY:
+ case UBLOCK_CJK_COMPATIBILITY_IDEOGRAPHS:
+ case UBLOCK_CJK_COMPATIBILITY_FORMS:
+ scripts[SE_HAN] = true;
+ compat = true;
+ break;
+ case UBLOCK_HANGUL_COMPATIBILITY_JAMO:
+ scripts[SE_HANGUL] = true;
+ compat = true;
+ break;
+
+ //default: scripts[SE_LATIN] = true;
+ }
+ }
+ scripts[option] = false; //turn off the reflexive transliteration
+
+ //return if we have no transliteration to do for this text
+ j = 0;
+ for (i = 0; !j && i < NUMSCRIPTS; i++) {
+ if (scripts[i]) j++;
+ }
+ if (!j) {
+ ucnv_close(conv);
+ return 0;
+ }
+
+ if (compat) {
+ addTrans("NFKD", &ID);
+ }
+ else {
+ addTrans("NFD", &ID);
+ }
+
+ //Simple X to Latin transliterators
+ if (scripts[SE_GREEK]) {
+ if (strnicmp (((SWModule*)module)->Lang(), "cop", 3)) {
+ if (option == SE_SBL)
+ addTrans("Greek-Latin/SBL", &ID);
+ else if (option == SE_TC)
+ addTrans("Greek-Latin/TC", &ID);
+ else if (option == SE_BETA)
+ addTrans("Greek-Latin/Beta", &ID);
+ else if (option == SE_BGREEK)
+ addTrans("Greek-Latin/BGreek", &ID);
+ else if (option == SE_UNGEGN)
+ addTrans("Greek-Latin/UNGEGN", &ID);
+ else if (option == SE_ISO)
+ addTrans("Greek-Latin/ISO", &ID);
+ else if (option == SE_ALALC)
+ addTrans("Greek-Latin/ALALC", &ID);
+ else if (option == SE_BGNPCGN)
+ addTrans("Greek-Latin/BGNPCGN", &ID);
+ else if (option == SE_IPA)
+ addTrans("Greek-IPA/Ancient", &ID);
+ else {
+ addTrans("Greek-Latin", &ID);
+ scripts[SE_LATIN] = true;
+ }
+ }
+ else {
+ if (option == SE_SBL)
+ addTrans("Coptic-Latin/SBL", &ID);
+ else if (option == SE_TC)
+ addTrans("Coptic-Latin/TC", &ID);
+ else if (option == SE_BETA)
+ addTrans("Coptic-Latin/Beta", &ID);
+ else if (option == SE_IPA)
+ addTrans("Coptic-IPA", &ID);
+ else {
+ addTrans("Coptic-Latin", &ID);
+ scripts[SE_LATIN] = true;
+ }
+ }
+ }
+ if (scripts[SE_HEBREW]) {
+ if (option == SE_SBL)
+ addTrans("Hebrew-Latin/SBL", &ID);
+ else if (option == SE_TC)
+ addTrans("Hebrew-Latin/TC", &ID);
+ else if (option == SE_BETA)
+ addTrans("Hebrew-Latin/Beta", &ID);
+ else if (option == SE_UNGEGN)
+ addTrans("Hebrew-Latin/UNGEGN", &ID);
+ else if (option == SE_ALALC)
+ addTrans("Hebrew-Latin/ALALC", &ID);
+ else if (option == SE_SYRIAC)
+ addTrans("Hebrew-Syriac", &ID);
+ else {
+ addTrans("Hebrew-Latin", &ID);
+ scripts[SE_LATIN] = true;
+ }
+ }
+ if (scripts[SE_CYRILLIC]) {
+ if (option == SE_GLAGOLITIC)
+ addTrans("Cyrillic-Glagolitic", &ID);
+ else {
+ addTrans("Cyrillic-Latin", &ID);
+ scripts[SE_LATIN] = true;
+ }
+ }
+ if (scripts[SE_ARABIC]) {
+ addTrans("Arabic-Latin", &ID);
+ scripts[SE_LATIN] = true;
+ }
+ if (scripts[SE_SYRIAC]) {
+ if (option == SE_TC)
+ addTrans("Syriac-Latin/TC", &ID);
+ else if (option == SE_BETA)
+ addTrans("Syriac-Latin/Beta", &ID);
+ else if (option == SE_HUGOYE)
+ addTrans("Syriac-Latin/Hugoye", &ID);
+ else if (option == SE_HEBREW)
+ addTrans("Syriac-Hebrew", &ID);
+ else {
+ addTrans("Syriac-Latin", &ID);
+ scripts[SE_LATIN] = true;
+ }
+ }
+ if (scripts[SE_THAI]) {
+ addTrans("Thai-Latin", &ID);
+ scripts[SE_LATIN] = true;
+ }
+ if (scripts[SE_GEORGIAN]) {
+ if (option == SE_ISO)
+ addTrans("Georgian-Latin/ISO", &ID);
+ else if (option == SE_ALALC)
+ addTrans("Georgian-Latin/ALALC", &ID);
+ else if (option == SE_BGNPCGN)
+ addTrans("Georgian-Latin/BGNPCGN", &ID);
+ else if (option == SE_IPA)
+ addTrans("Georgian-IPA", &ID);
+ else {
+ addTrans("Georgian-Latin", &ID);
+ scripts[SE_LATIN] = true;
+ }
+ }
+ if (scripts[SE_ARMENIAN]) {
+ if (option == SE_ISO)
+ addTrans("Armenian-Latin/ISO", &ID);
+ else if (option == SE_ALALC)
+ addTrans("Armenian-Latin/ALALC", &ID);
+ else if (option == SE_BGNPCGN)
+ addTrans("Armenian-Latin/BGNPCGN", &ID);
+ else if (option == SE_IPA)
+ addTrans("Armenian-IPA", &ID);
+ else {
+ addTrans("Armenian-Latin", &ID);
+ scripts[SE_LATIN] = true;
+ }
+ }
+ if (scripts[SE_ETHIOPIC]) {
+ if (option == SE_UNGEGN)
+ addTrans("Ethiopic-Latin/UNGEGN", &ID);
+ else if (option == SE_ISO)
+ addTrans("Ethiopic-Latin/ISO", &ID);
+ else if (option == SE_ALALC)
+ addTrans("Ethiopic-Latin/ALALC", &ID);
+ else if (option == SE_SERA)
+ addTrans("Ethiopic-Latin/SERA", &ID);
+ else {
+ addTrans("Ethiopic-Latin", &ID);
+ scripts[SE_LATIN] = true;
+ }
+ }
+ if (scripts[SE_GOTHIC]) {
+ if (option == SE_BASICLATIN)
+ addTrans("Gothic-Latin/Basic", &ID);
+ else if (option == SE_IPA)
+ addTrans("Gothic-IPA", &ID);
+ else {
+ addTrans("Gothic-Latin", &ID);
+ scripts[SE_LATIN] = true;
+ }
+ }
+ if (scripts[SE_UGARITIC]) {
+ if (option == SE_SBL)
+ addTrans("Ugaritic-Latin/SBL", &ID);
+ else {
+ addTrans("Ugaritic-Latin", &ID);
+ scripts[SE_LATIN] = true;
+ }
+ }
+ if (scripts[SE_MEROITIC]) {
+ addTrans("Meroitic-Latin", &ID);
+ scripts[SE_LATIN] = true;
+ }
+ if (scripts[SE_LINEARB]) {
+ addTrans("LinearB-Latin", &ID);
+ scripts[SE_LATIN] = true;
+ }
+ if (scripts[SE_CYPRIOT]) {
+ addTrans("Cypriot-Latin", &ID);
+ scripts[SE_LATIN] = true;
+ }
+ if (scripts[SE_RUNIC]) {
+ addTrans("Runic-Latin", &ID);
+ scripts[SE_LATIN] = true;
+ }
+ if (scripts[SE_OGHAM]) {
+ addTrans("Ogham-Latin", &ID);
+ scripts[SE_LATIN] = true;
+ }
+ if (scripts[SE_THAANA]) {
+ if (option == SE_ALALC)
+ addTrans("Thaana-Latin/ALALC", &ID);
+ else if (option == SE_BGNPCGN)
+ addTrans("Thaana-Latin/BGNPCGN", &ID);
+ else {
+ addTrans("Thaana-Latin", &ID);
+ scripts[SE_LATIN] = true;
+ }
+ }
+ if (scripts[SE_GLAGOLITIC]) {
+ if (option == SE_ISO)
+ addTrans("Glagolitic-Latin/ISO", &ID);
+ else if (option == SE_ALALC)
+ addTrans("Glagolitic-Latin/ALALC", &ID);
+ else if (option == SE_ALALC)
+ addTrans("Glagolitic-Cyrillic", &ID);
+ else {
+ addTrans("Glagolitic-Latin", &ID);
+ scripts[SE_LATIN] = true;
+ }
+ }
+ if (scripts[SE_THAI]) {
+ addTrans("Thai-Latin", &ID);
+ scripts[SE_LATIN] = true;
+ }
+ if (scripts[SE_THAI]) {
+ addTrans("Thai-Latin", &ID);
+ scripts[SE_LATIN] = true;
+ }
+
+ if (scripts[SE_HAN]) {
+ if (!strnicmp (((SWModule*)module)->Lang(), "ja", 2)) {
+ addTrans("Kanji-Romaji", &ID);
+ }
+ else {
+ addTrans("Han-Latin", &ID);
+ }
+ scripts[SE_LATIN] = true;
+ }
+
+ // Inter-Kana and Kana to Latin transliterators
+ if (option == SE_HIRAGANA && scripts[SE_KATAKANA]) {
+ addTrans("Katakana-Hiragana", &ID);
+ scripts[SE_HIRAGANA] = true;
+ }
+ else if (option == SE_KATAKANA && scripts[SE_HIRAGANA]) {
+ addTrans("Hiragana-Katakana", &ID);
+ scripts[SE_KATAKANA] = true;
+ }
+ else {
+ if (scripts[SE_KATAKANA]) {
+ addTrans("Katakana-Latin", &ID);
+ scripts[SE_LATIN] = true;
+ }
+ if (scripts[SE_HIRAGANA]) {
+ addTrans("Hiragana-Latin", &ID);
+ scripts[SE_LATIN] = true;
+ }
+ }
+
+ // Korean to Latin transliterators
+ if (scripts[SE_HANGUL]) {
+ addTrans("Hangul-Latin", &ID);
+ scripts[SE_LATIN] = true;
+ }
+ if (scripts[SE_JAMO]) {
+ addTrans("Jamo-Latin", &ID);
+ scripts[SE_LATIN] = true;
+ }
+
+ // Indic-Latin
+ if (option < SE_DEVANAGARI || option > SE_MALAYALAM) {
+ // Indic to Latin
+ if (scripts[SE_TAMIL]) {
+ addTrans("Tamil-Latin", &ID);
+ scripts[SE_LATIN] = true;
+ }
+ if (scripts[SE_BENGALI]) {
+ addTrans("Bengali-Latin", &ID);
+ scripts[SE_LATIN] = true;
+ }
+ if (scripts[SE_GURMUKHI]) {
+ addTrans("Gurmukhi-Latin", &ID);
+ scripts[SE_LATIN] = true;
+ }
+ if (scripts[SE_GUJARATI]) {
+ addTrans("Gujarati-Latin", &ID);
+ scripts[SE_LATIN] = true;
+ }
+ if (scripts[SE_ORIYA]) {
+ addTrans("Oriya-Latin", &ID);
+ scripts[SE_LATIN] = true;
+ }
+ if (scripts[SE_TELUGU]) {
+ addTrans("Telugu-Latin", &ID);
+ scripts[SE_LATIN] = true;
+ }
+ if (scripts[SE_KANNADA]) {
+ addTrans("Kannada-Latin", &ID);
+ scripts[SE_LATIN] = true;
+ }
+ if (scripts[SE_MALAYALAM]) {
+ addTrans("Malayalam-Latin", &ID);
+ scripts[SE_LATIN] = true;
+ }
+ }
+ else {
+ if (scripts[SE_LATIN]) {
+ addTrans("Latin-InterIndic", &ID);
+ }
+ if (scripts[SE_DEVANAGARI]) {
+ addTrans("Devanagari-InterIndic", &ID);
+ }
+ if (scripts[SE_TAMIL]) {
+ addTrans("Tamil-InterIndic", &ID);
+ }
+ if (scripts[SE_BENGALI]) {
+ addTrans("Bengali-InterIndic", &ID);
+ }
+ if (scripts[SE_GURMUKHI]) {
+ addTrans("Gurmurkhi-InterIndic", &ID);
+ }
+ if (scripts[SE_GUJARATI]) {
+ addTrans("Gujarati-InterIndic", &ID);
+ }
+ if (scripts[SE_ORIYA]) {
+ addTrans("Oriya-InterIndic", &ID);
+ }
+ if (scripts[SE_TELUGU]) {
+ addTrans("Telugu-InterIndic", &ID);
+ }
+ if (scripts[SE_KANNADA]) {
+ addTrans("Kannada-InterIndic", &ID);
+ }
+ if (scripts[SE_MALAYALAM]) {
+ addTrans("Malayalam-InterIndic", &ID);
+ }
+
+ switch(option) {
+ case SE_DEVANAGARI:
+ addTrans("InterIndic-Devanagari", &ID);
+ break;
+ case SE_TAMIL:
+ addTrans("InterIndic-Tamil", &ID);
+ break;
+ case SE_BENGALI:
+ addTrans("InterIndic-Bengali", &ID);
+ break;
+ case SE_GURMUKHI:
+ addTrans("InterIndic-Gurmukhi", &ID);
+ break;
+ case SE_GUJARATI:
+ addTrans("InterIndic-Gujarati", &ID);
+ break;
+ case SE_ORIYA:
+ addTrans("InterIndic-Oriya", &ID);
+ break;
+ case SE_TELUGU:
+ addTrans("InterIndic-Telugu", &ID);
+ break;
+ case SE_KANNADA:
+ addTrans("InterIndic-Kannada", &ID);
+ break;
+ case SE_MALAYALAM:
+ addTrans("InterIndic-Malayalam", &ID);
+ break;
+ default:
+ addTrans("InterIndic-Latin", &ID);
+ scripts[SE_LATIN] = true;
+ break;
+ }
+ }
+
+// if (scripts[SE_TENGWAR]) {
+// addTrans("Tengwar-Latin", &ID);
+// scripts[SE_LATIN] = true;
+// }
+// if (scripts[SE_CIRTH]) {
+// addTrans("Cirth-Latin", &ID);
+// scripts[SE_LATIN] = true;
+// }
+
+ if (scripts[SE_LATIN]) {
+ switch (option) {
+ case SE_GREEK:
+ addTrans("Latin-Greek", &ID);
+ break;
+ case SE_HEBREW:
+ addTrans("Latin-Hebrew", &ID);
+ break;
+ case SE_CYRILLIC:
+ addTrans("Latin-Cyrillic", &ID);
+ break;
+ case SE_ARABIC:
+ addTrans("Latin-Arabic", &ID);
+ break;
+ case SE_SYRIAC:
+ addTrans("Latin-Syriac", &ID);
+ break;
+ case SE_THAI:
+ addTrans("Latin-Thai", &ID);
+ break;
+ case SE_GEORGIAN:
+ addTrans("Latin-Georgian", &ID);
+ break;
+ case SE_ARMENIAN:
+ addTrans("Latin-Armenian", &ID);
+ break;
+ case SE_ETHIOPIC:
+ addTrans("Latin-Ethiopic", &ID);
+ break;
+ case SE_GOTHIC:
+ addTrans("Latin-Gothic", &ID);
+ break;
+ case SE_UGARITIC:
+ addTrans("Latin-Ugaritic", &ID);
+ break;
+ case SE_COPTIC:
+ addTrans("Latin-Coptic", &ID);
+ break;
+ case SE_KATAKANA:
+ addTrans("Latin-Katakana", &ID);
+ break;
+ case SE_HIRAGANA:
+ addTrans("Latin-Hiragana", &ID);
+ break;
+ case SE_JAMO:
+ addTrans("Latin-Jamo", &ID);
+ break;
+ case SE_HANGUL:
+ addTrans("Latin-Hangul", &ID);
+ break;
+ case SE_MEROITIC:
+ addTrans("Latin-Meroitic", &ID);
+ break;
+ case SE_LINEARB:
+ addTrans("Latin-LinearB", &ID);
+ break;
+ case SE_CYPRIOT:
+ addTrans("Latin-Cypriot", &ID);
+ break;
+ case SE_RUNIC:
+ addTrans("Latin-Runic", &ID);
+ break;
+ case SE_OGHAM:
+ addTrans("Latin-Ogham", &ID);
+ break;
+ case SE_THAANA:
+ addTrans("Latin-Thaana", &ID);
+ break;
+ case SE_GLAGOLITIC:
+ addTrans("Latin-Glagolitic", &ID);
+ break;
+// case SE_TENGWAR:
+// addTrans("Latin-Tengwar", &ID);
+// break;
+// case SE_CIRTH:
+// addTrans("Latin-Cirth", &ID);
+// break;
+ }
+ }
+
+ if (option == SE_BASICLATIN) {
+ addTrans("Any-Latin1", &ID);
+ }
+
+ addTrans("NFC", &ID);
+
+ err = U_ZERO_ERROR;
+ Transliterator * trans = createTrans(UnicodeString(ID), UTRANS_FORWARD, err);
+ if (trans && !U_FAILURE(err)) {
+ UnicodeString target = UnicodeString(source);
+ trans->transliterate(target);
+ text.setSize(text.size()*2);
+ len = ucnv_fromUChars(conv, text.getRawData(), text.size(), target.getBuffer(), target.length(), &err);
+ text.setSize(len);
+ delete trans;
+ }
+ ucnv_close(conv);
+ }
+ return 0;
+}
+
+SWORD_NAMESPACE_END
+#endif
+
+
+
diff --git a/src/modules/filters/utf8utf16.cpp b/src/modules/filters/utf8utf16.cpp
new file mode 100644
index 0000000..5c1614c
--- /dev/null
+++ b/src/modules/filters/utf8utf16.cpp
@@ -0,0 +1,78 @@
+/******************************************************************************
+ *
+ * UTF8UTF16 - SWFilter descendant to convert UTF-8 to UTF-16
+ *
+ */
+
+#include <stdlib.h>
+#include <stdio.h>
+
+#include <utf8utf16.h>
+#include <swbuf.h>
+
+SWORD_NAMESPACE_START
+
+UTF8UTF16::UTF8UTF16() {
+}
+
+char UTF8UTF16::processText(SWBuf &text, const SWKey *key, const SWModule *module) {
+ const unsigned char *from;
+ unsigned long ch;
+ signed short utf16;
+ unsigned char from2[7];
+
+ SWBuf orig = text;
+
+ from = (const unsigned char *)orig.c_str();
+
+ // -------------------------------
+ for (text = ""; *from; from++) {
+ ch = 0;
+ //case: ANSI
+ if ((*from & 128) != 128) {
+ text.setSize(text.size()+2);
+ *((unsigned short *)(text.getRawData()+(text.size()-2))) = (unsigned short)*from;
+ continue;
+ }
+ //case: Invalid UTF-8 (illegal continuing byte in initial position)
+ if ((*from & 128) && ((*from & 64) != 64)) {
+ continue;
+ }
+ //case: 2+ byte codepoint
+ from2[0] = *from;
+ from2[0] <<= 1;
+ int subsequent;
+ for (subsequent = 1; (from2[0] & 128) && (subsequent < 7); subsequent++) {
+ from2[0] <<= 1;
+ from2[subsequent] = from[subsequent];
+ from2[subsequent] &= 63;
+ ch <<= 6;
+ ch |= from2[subsequent];
+ }
+ subsequent--;
+ from2[0] <<= 1;
+ char significantFirstBits = 8 - (2+subsequent);
+
+ ch |= (((short)from2[0]) << (((6*subsequent)+significantFirstBits)-8));
+ from += subsequent;
+ if (ch < 0x10000) {
+ text.setSize(text.size()+2);
+ *((unsigned short *)(text.getRawData()+(text.size()-2))) = (unsigned short)ch;
+ }
+ else {
+ utf16 = (signed short)((ch - 0x10000) / 0x400 + 0xD800);
+ text.setSize(text.size()+2);
+ *((unsigned short *)(text.getRawData()+(text.size()-2))) = (unsigned short)utf16;
+ utf16 = (signed short)((ch - 0x10000) % 0x400 + 0xDC00);
+ text.setSize(text.size()+2);
+ *((unsigned short *)(text.getRawData()+(text.size()-2))) = (unsigned short)utf16;
+ }
+ }
+ text.setSize(text.size()+2);
+ *((unsigned short *)(text.getRawData()+(text.size()-2))) = (unsigned short)0;
+
+ return 0;
+
+}
+
+SWORD_NAMESPACE_END
diff --git a/src/modules/genbook/Makefile b/src/modules/genbook/Makefile
new file mode 100644
index 0000000..1a2d00d
--- /dev/null
+++ b/src/modules/genbook/Makefile
@@ -0,0 +1,5 @@
+
+root := ../../..
+
+all:
+ make -C ${root}
diff --git a/src/modules/genbook/Makefile.am b/src/modules/genbook/Makefile.am
new file mode 100644
index 0000000..02f6ab1
--- /dev/null
+++ b/src/modules/genbook/Makefile.am
@@ -0,0 +1,5 @@
+genbookdir = $(top_srcdir)/src/modules/genbook
+
+libsword_la_SOURCES += $(genbookdir)/swgenbook.cpp
+
+include ../src/modules/genbook/rawgenbook/Makefile.am
diff --git a/src/modules/genbook/rawgenbook/Makefile b/src/modules/genbook/rawgenbook/Makefile
new file mode 100644
index 0000000..aab8056
--- /dev/null
+++ b/src/modules/genbook/rawgenbook/Makefile
@@ -0,0 +1,4 @@
+root := ../../../..
+
+all:
+ make -C ${root}
diff --git a/src/modules/genbook/rawgenbook/Makefile.am b/src/modules/genbook/rawgenbook/Makefile.am
new file mode 100644
index 0000000..a176d75
--- /dev/null
+++ b/src/modules/genbook/rawgenbook/Makefile.am
@@ -0,0 +1,4 @@
+rawgenbookdir = $(top_srcdir)/src/modules/genbook/rawgenbook
+
+libsword_la_SOURCES += $(rawgenbookdir)/rawgenbook.cpp
+
diff --git a/src/modules/genbook/rawgenbook/rawgenbook.cpp b/src/modules/genbook/rawgenbook/rawgenbook.cpp
new file mode 100644
index 0000000..22cc51f
--- /dev/null
+++ b/src/modules/genbook/rawgenbook/rawgenbook.cpp
@@ -0,0 +1,219 @@
+/******************************************************************************
+ * rawtext.cpp - code for class 'RawGenBook'- a module that reads raw text files:
+ * ot and nt using indexs ??.bks ??.cps ??.vss
+ */
+
+
+#include <stdio.h>
+#include <fcntl.h>
+
+#include <rawgenbook.h>
+#include <rawstr.h>
+#include <utilstr.h>
+#include <filemgr.h>
+#include <sysdata.h>
+#include <treekeyidx.h>
+#include <versetreekey.h>
+
+SWORD_NAMESPACE_START
+
+/******************************************************************************
+ * RawGenBook Constructor - Initializes data for instance of RawGenBook
+ *
+ * ENT: iname - Internal name for module
+ * idesc - Name to display to user for module
+ * idisp - Display object to use for displaying
+ */
+
+RawGenBook::RawGenBook(const char *ipath, const char *iname, const char *idesc, SWDisplay *idisp, SWTextEncoding enc, SWTextDirection dir, SWTextMarkup mark, const char* ilang, const char *keyType)
+ : SWGenBook(iname, idesc, idisp, enc, dir, mark, ilang) {
+
+ char *buf = new char [ strlen (ipath) + 20 ];
+
+ path = 0;
+ stdstr(&path, ipath);
+ verseKey = !strcmp("VerseKey", keyType);
+
+ if (verseKey) Type("Biblical Texts");
+
+ if ((path[strlen(path)-1] == '/') || (path[strlen(path)-1] == '\\'))
+ path[strlen(path)-1] = 0;
+
+ delete key;
+ key = CreateKey();
+
+
+ sprintf(buf, "%s.bdt", path);
+ bdtfd = FileMgr::getSystemFileMgr()->open(buf, FileMgr::RDWR, true);
+
+ delete [] buf;
+
+}
+
+
+/******************************************************************************
+ * RawGenBook Destructor - Cleans up instance of RawGenBook
+ */
+
+RawGenBook::~RawGenBook() {
+
+ FileMgr::getSystemFileMgr()->close(bdtfd);
+
+ if (path)
+ delete [] path;
+
+}
+
+
+bool RawGenBook::isWritable() {
+ return ((bdtfd->getFd() > 0) && ((bdtfd->mode & FileMgr::RDWR) == FileMgr::RDWR));
+}
+
+
+/******************************************************************************
+ * RawGenBook::getRawEntry - Returns the correct verse when char * cast
+ * is requested
+ *
+ * RET: string buffer with verse
+ */
+
+SWBuf &RawGenBook::getRawEntryBuf() {
+
+ __u32 offset = 0;
+ __u32 size = 0;
+
+ TreeKey *key = 0;
+ SWTRY {
+ key = SWDYNAMIC_CAST(TreeKey, (this->key));
+ }
+ SWCATCH ( ... ) {}
+
+ if (!key) {
+ VerseTreeKey *tkey = 0;
+ SWTRY {
+ tkey = SWDYNAMIC_CAST(VerseTreeKey, (this->key));
+ }
+ SWCATCH ( ... ) {}
+ if (tkey) key = tkey->getTreeKey();
+ }
+
+ if (!key) {
+ key = (TreeKeyIdx *)CreateKey();
+ (*key) = *(this->key);
+ }
+
+ int dsize;
+ key->getUserData(&dsize);
+ entryBuf = "";
+ if (dsize > 7) {
+ memcpy(&offset, key->getUserData(), 4);
+ offset = swordtoarch32(offset);
+
+ memcpy(&size, key->getUserData() + 4, 4);
+ size = swordtoarch32(size);
+
+ entrySize = size; // support getEntrySize call
+
+ entryBuf.setFillByte(0);
+ entryBuf.setSize(size);
+ bdtfd->seek(offset, SEEK_SET);
+ bdtfd->read(entryBuf.getRawData(), size);
+
+ rawFilter(entryBuf, 0); // hack, decipher
+ rawFilter(entryBuf, key);
+
+// if (!isUnicode())
+ RawStr::prepText(entryBuf);
+ }
+
+ if (key != this->key) // free our key if we created a VerseKey
+ delete key;
+
+ return entryBuf;
+}
+
+
+void RawGenBook::setEntry(const char *inbuf, long len) {
+
+ __u32 offset = archtosword32(bdtfd->seek(0, SEEK_END));
+ __u32 size = 0;
+ TreeKeyIdx *key = ((TreeKeyIdx *)this->key);
+
+ char userData[8];
+
+ if (len < 0)
+ len = strlen(inbuf);
+
+ bdtfd->write(inbuf, len);
+
+ size = archtosword32(len);
+ memcpy(userData, &offset, 4);
+ memcpy(userData+4, &size, 4);
+ key->setUserData(userData, 8);
+ key->save();
+}
+
+
+void RawGenBook::linkEntry(const SWKey *inkey) {
+ TreeKeyIdx *srckey = 0;
+ TreeKeyIdx *key = ((TreeKeyIdx *)this->key);
+ // see if we have a VerseKey * or decendant
+ SWTRY {
+ srckey = SWDYNAMIC_CAST(TreeKeyIdx, inkey);
+ }
+ SWCATCH ( ... ) {}
+ // if we don't have a VerseKey * decendant, create our own
+ if (!srckey) {
+ srckey = (TreeKeyIdx *)CreateKey();
+ (*srckey) = *inkey;
+ }
+
+ key->setUserData(srckey->getUserData(), 8);
+ key->save();
+
+ if (inkey != srckey) // free our key if we created a VerseKey
+ delete srckey;
+}
+
+
+/******************************************************************************
+ * RawGenBook::deleteEntry - deletes this entry
+ *
+ * RET: *this
+ */
+
+void RawGenBook::deleteEntry() {
+ TreeKeyIdx *key = ((TreeKeyIdx *)this->key);
+ key->remove();
+}
+
+
+char RawGenBook::createModule(const char *ipath) {
+ char *path = 0;
+ char *buf = new char [ strlen (ipath) + 20 ];
+ FileDesc *fd;
+ signed char retval;
+
+ stdstr(&path, ipath);
+
+ if ((path[strlen(path)-1] == '/') || (path[strlen(path)-1] == '\\'))
+ path[strlen(path)-1] = 0;
+
+ sprintf(buf, "%s.bdt", path);
+ FileMgr::removeFile(buf);
+ fd = FileMgr::getSystemFileMgr()->open(buf, FileMgr::CREAT|FileMgr::WRONLY, FileMgr::IREAD|FileMgr::IWRITE);
+ fd->getFd();
+ FileMgr::getSystemFileMgr()->close(fd);
+
+ retval = TreeKeyIdx::create(path);
+ delete [] path;
+ return retval;
+}
+
+
+SWKey *RawGenBook::CreateKey() {
+ TreeKeyIdx *newKey = new TreeKeyIdx(path);
+ return (verseKey) ? (SWKey *)new VerseTreeKey(newKey) : newKey;
+}
+
+SWORD_NAMESPACE_END
diff --git a/src/modules/genbook/swgenbook.cpp b/src/modules/genbook/swgenbook.cpp
new file mode 100644
index 0000000..0ce16aa
--- /dev/null
+++ b/src/modules/genbook/swgenbook.cpp
@@ -0,0 +1,29 @@
+/******************************************************************************
+ * swld.cpp - code for base class 'SWLD'. SWLD is the basis for all
+ * types of Lexicon and Dictionary modules (hence the 'LD').
+ */
+
+#include <swgenbook.h>
+
+SWORD_NAMESPACE_START
+
+/******************************************************************************
+ * SWLD Constructor - Initializes data for instance of SWLD
+ *
+ * ENT: imodname - Internal name for module
+ * imoddesc - Name to display to user for module
+ * idisp - Display object to use for displaying
+ */
+
+SWGenBook::SWGenBook(const char *imodname, const char *imoddesc, SWDisplay *idisp, SWTextEncoding enc, SWTextDirection dir, SWTextMarkup mark, const char* ilang) : SWModule(imodname, imoddesc, idisp, (char *)"Generic Books", enc, dir, mark, ilang) {
+}
+
+
+/******************************************************************************
+ * SWLD Destructor - Cleans up instance of SWLD
+ */
+
+SWGenBook::~SWGenBook() {
+}
+
+SWORD_NAMESPACE_END
diff --git a/src/modules/lexdict/Makefile b/src/modules/lexdict/Makefile
new file mode 100644
index 0000000..1a2d00d
--- /dev/null
+++ b/src/modules/lexdict/Makefile
@@ -0,0 +1,5 @@
+
+root := ../../..
+
+all:
+ make -C ${root}
diff --git a/src/modules/lexdict/Makefile.am b/src/modules/lexdict/Makefile.am
new file mode 100644
index 0000000..8cfe68b
--- /dev/null
+++ b/src/modules/lexdict/Makefile.am
@@ -0,0 +1,7 @@
+lexdictdir = $(top_srcdir)/src/modules/lexdict
+
+libsword_la_SOURCES += $(lexdictdir)/swld.cpp
+
+include ../src/modules/lexdict/rawld/Makefile.am
+include ../src/modules/lexdict/rawld4/Makefile.am
+include ../src/modules/lexdict/zld/Makefile.am
diff --git a/src/modules/lexdict/rawld/Makefile b/src/modules/lexdict/rawld/Makefile
new file mode 100644
index 0000000..35d6648
--- /dev/null
+++ b/src/modules/lexdict/rawld/Makefile
@@ -0,0 +1,5 @@
+
+root := ../../../..
+
+all:
+ make -C ${root}
diff --git a/src/modules/lexdict/rawld/Makefile.am b/src/modules/lexdict/rawld/Makefile.am
new file mode 100644
index 0000000..2a2d996
--- /dev/null
+++ b/src/modules/lexdict/rawld/Makefile.am
@@ -0,0 +1,4 @@
+rawlddir = $(top_srcdir)/src/modules/lexdict/rawld
+
+libsword_la_SOURCES += $(rawlddir)/rawld.cpp
+
diff --git a/src/modules/lexdict/rawld/rawld.cpp b/src/modules/lexdict/rawld/rawld.cpp
new file mode 100644
index 0000000..08be215
--- /dev/null
+++ b/src/modules/lexdict/rawld/rawld.cpp
@@ -0,0 +1,194 @@
+/******************************************************************************
+ * rawld.cpp - code for class 'RawLD'- a module that reads raw lexicon and
+ * dictionary files: *.dat *.idx
+ */
+
+
+#include <ctype.h>
+#include <stdio.h>
+#include <fcntl.h>
+
+#include <utilstr.h>
+#include <rawstr.h>
+#include <rawld.h>
+#include <filemgr.h>
+
+SWORD_NAMESPACE_START
+
+ /******************************************************************************
+ * RawLD Constructor - Initializes data for instance of RawLD
+ *
+ * ENT: ipath - path and filename of files (no extension)
+ * iname - Internal name for module
+ * idesc - Name to display to user for module
+ * idisp - Display object to use for displaying
+ */
+
+RawLD::RawLD(const char *ipath, const char *iname, const char *idesc, SWDisplay *idisp, SWTextEncoding enc, SWTextDirection dir, SWTextMarkup mark, const char* ilang) : RawStr(ipath), SWLD(iname, idesc, idisp, enc, dir, mark, ilang)
+{
+}
+
+
+/******************************************************************************
+ * RawLD Destructor - Cleans up instance of RawLD
+ */
+
+RawLD::~RawLD()
+{
+}
+
+
+bool RawLD::isWritable() {
+ return ((idxfd->getFd() > 0) && ((idxfd->mode & FileMgr::RDWR) == FileMgr::RDWR));
+}
+
+
+/******************************************************************************
+ * RawLD::strongsPad - Pads a key if (it-1) is 100% digits to 5 places
+ * allows for final to be alpha, e.g. '123B'
+ *
+ * ENT: buf - buffer to check and pad
+ */
+
+void RawLD::strongsPad(char *buf)
+{
+ char *check;
+ int size = 0;
+ int len = strlen(buf);
+ char subLet = 0;
+ bool bang = false;
+ if ((len < 8) && (len > 0)) {
+ for (check = buf; *(check+1); check++) {
+ if (!isdigit(*check))
+ break;
+ else size++;
+ }
+
+ if (size && ((size == (len-1)) || (size == (len-2)))) {
+ if (*check == '!') {
+ bang = true;
+ check++;
+ }
+ if (isalpha(*check)) {
+ subLet = toupper(*check);
+ *(check-(bang?1:0)) = 0;
+ }
+ sprintf(buf, "%.5d", atoi(buf));
+ if (subLet) {
+ check = buf+(strlen(buf));
+ if (bang) {
+ *check++ = '!';
+ }
+ *check++ = subLet;
+ *check = 0;
+ }
+ }
+ }
+}
+
+
+/******************************************************************************
+ * RawLD::getEntry - Looks up entry from data file. 'Snaps' to closest
+ * entry and sets 'entrybuf'.
+ *
+ * ENT: away - number of entries offset from key (default = 0)
+ *
+ * RET: error status
+ */
+
+char RawLD::getEntry(long away)
+{
+ long start = 0;
+ unsigned short size = 0;
+ char *idxbuf = 0;
+ char retval = 0;
+
+ char *buf = new char [ strlen(*key) + 6 ];
+ strcpy(buf, *key);
+
+ strongsPad(buf);
+
+ if (!(retval = findOffset(buf, &start, &size, away))) {
+ readText(start, &size, &idxbuf, entryBuf);
+ rawFilter(entryBuf, 0); // hack, decipher
+ rawFilter(entryBuf, key);
+ entrySize = size; // support getEntrySize call
+ if (!key->Persist()) // If we have our own key
+ *key = idxbuf; // reset it to entry index buffer
+
+ stdstr(&entkeytxt, idxbuf); // set entry key text that module 'snapped' to.
+ delete [] idxbuf;
+ }
+ else {
+ entryBuf = "";
+ }
+
+ delete [] buf;
+ return retval;
+}
+
+
+/******************************************************************************
+ * RawLD::getRawEntry - Returns the correct entry when char * cast
+ * is requested
+ *
+ * RET: string buffer with entry
+ */
+
+SWBuf &RawLD::getRawEntryBuf() {
+
+ char ret = getEntry();
+ if (!ret) {
+// if (!isUnicode())
+ prepText(entryBuf);
+ }
+ else error = ret;
+
+ return entryBuf;
+}
+
+
+/******************************************************************************
+ * RawLD::increment - Increments module key a number of entries
+ *
+ * ENT: increment - Number of entries to jump forward
+ *
+ * RET: *this
+ */
+
+void RawLD::increment(int steps) {
+ char tmperror;
+
+ if (key->isTraversable()) {
+ *key += steps;
+ error = key->Error();
+ steps = 0;
+ }
+
+ tmperror = (getEntry(steps)) ? KEYERR_OUTOFBOUNDS : 0;
+ error = (error)?error:tmperror;
+ *key = entkeytxt;
+}
+
+
+void RawLD::setEntry(const char *inbuf, long len) {
+ doSetText(*key, inbuf, len);
+}
+
+
+void RawLD::linkEntry(const SWKey *inkey) {
+ doLinkEntry(*key, *inkey);
+}
+
+
+/******************************************************************************
+ * RawFiles::deleteEntry - deletes this entry
+ *
+ * RET: *this
+ */
+
+void RawLD::deleteEntry() {
+ doSetText(*key, "");
+}
+
+SWORD_NAMESPACE_END
diff --git a/src/modules/lexdict/rawld4/Makefile b/src/modules/lexdict/rawld4/Makefile
new file mode 100644
index 0000000..35d6648
--- /dev/null
+++ b/src/modules/lexdict/rawld4/Makefile
@@ -0,0 +1,5 @@
+
+root := ../../../..
+
+all:
+ make -C ${root}
diff --git a/src/modules/lexdict/rawld4/Makefile.am b/src/modules/lexdict/rawld4/Makefile.am
new file mode 100644
index 0000000..697e2e5
--- /dev/null
+++ b/src/modules/lexdict/rawld4/Makefile.am
@@ -0,0 +1,4 @@
+rawld4dir = $(top_srcdir)/src/modules/lexdict/rawld4
+
+libsword_la_SOURCES += $(rawld4dir)/rawld4.cpp
+
diff --git a/src/modules/lexdict/rawld4/rawld4.cpp b/src/modules/lexdict/rawld4/rawld4.cpp
new file mode 100644
index 0000000..0fd1058
--- /dev/null
+++ b/src/modules/lexdict/rawld4/rawld4.cpp
@@ -0,0 +1,192 @@
+/******************************************************************************
+ * rawld.cpp - code for class 'RawLD'- a module that reads raw lexicon and
+ * dictionary files: *.dat *.idx
+ */
+
+
+#include <ctype.h>
+#include <stdio.h>
+#include <fcntl.h>
+
+#include <filemgr.h>
+#include <utilstr.h>
+#include <rawstr4.h>
+#include <rawld4.h>
+
+SWORD_NAMESPACE_START
+
+ /******************************************************************************
+ * RawLD Constructor - Initializes data for instance of RawLD
+ *
+ * ENT: ipath - path and filename of files (no extension)
+ * iname - Internal name for module
+ * idesc - Name to display to user for module
+ * idisp - Display object to use for displaying
+ */
+
+RawLD4::RawLD4(const char *ipath, const char *iname, const char *idesc, SWDisplay *idisp, SWTextEncoding enc, SWTextDirection dir, SWTextMarkup mark, const char* ilang) : RawStr4(ipath), SWLD(iname, idesc, idisp, enc, dir, mark, ilang)
+{
+}
+
+
+/******************************************************************************
+ * RawLD Destructor - Cleans up instance of RawLD
+ */
+
+RawLD4::~RawLD4()
+{
+}
+
+
+bool RawLD4::isWritable() {
+ return ((idxfd->getFd() > 0) && ((idxfd->mode & FileMgr::RDWR) == FileMgr::RDWR));
+}
+
+
+/******************************************************************************
+ * RawLD4::strongsPad - Pads a key if (it-1) is 100% digits to 5 places
+ * allows for final to be alpha, e.g. '123B'
+ *
+ * ENT: buf - buffer to check and pad
+ */
+
+void RawLD4::strongsPad(char *buf)
+{
+ char *check;
+ int size = 0;
+ int len = strlen(buf);
+ char subLet = 0;
+ bool bang = false;
+ if ((len < 8) && (len > 0)) {
+ for (check = buf; *(check+1); check++) {
+ if (!isdigit(*check))
+ break;
+ else size++;
+ }
+
+ if (size && ((size == (len-1)) || (size == (len-2)))) {
+ if (*check == '!') {
+ bang = true;
+ check++;
+ }
+ if (isalpha(*check)) {
+ subLet = toupper(*check);
+ *(check-(bang?1:0)) = 0;
+ }
+ sprintf(buf, "%.5d", atoi(buf));
+ if (subLet) {
+ check = buf+(strlen(buf));
+ if (bang) {
+ *check++ = '!';
+ }
+ *check++ = subLet;
+ *check = 0;
+ }
+ }
+ }
+}
+
+
+/******************************************************************************
+ * RawLD4::getEntry - Looks up entry from data file. 'Snaps' to closest
+ * entry and sets 'entrybuf'.
+ *
+ * ENT: away - number of entries offset from key (default = 0)
+ *
+ * RET: error status
+ */
+
+char RawLD4::getEntry(long away)
+{
+ long start = 0;
+ unsigned long size = 0;
+ char *idxbuf = 0;
+ char retval = 0;
+
+ char *buf = new char [ strlen(*key) + 6 ];
+ strcpy(buf, *key);
+
+ strongsPad(buf);
+
+ entryBuf = "";
+ if (!(retval = findOffset(buf, &start, &size, away))) {
+ readText(start, &size, &idxbuf, entryBuf);
+ rawFilter(entryBuf, 0); // hack, decipher
+ rawFilter(entryBuf, key);
+ entrySize = size; // support getEntrySize call
+ if (!key->Persist()) // If we have our own key
+ *key = idxbuf; // reset it to entry index buffer
+
+ stdstr(&entkeytxt, idxbuf); // set entry key text that module 'snapped' to.
+ delete [] idxbuf;
+ }
+
+ delete [] buf;
+ return retval;
+}
+
+
+/******************************************************************************
+ * RawLD4::getRawEntry - Returns the correct entry when char * cast
+ * is requested
+ *
+ * RET: string buffer with entry
+ */
+
+SWBuf &RawLD4::getRawEntryBuf() {
+
+ char ret = getEntry();
+ if (!ret) {
+// if (!isUnicode())
+ prepText(entryBuf);
+ }
+ else error = ret;
+
+ return entryBuf;
+}
+
+
+/******************************************************************************
+ * RawLD4::increment - Increments module key a number of entries
+ *
+ * ENT: increment - Number of entries to jump forward
+ *
+ * RET: *this
+ */
+
+void RawLD4::increment(int steps) {
+ char tmperror;
+
+ if (key->isTraversable()) {
+ *key += steps;
+ error = key->Error();
+ steps = 0;
+ }
+
+ tmperror = (getEntry(steps)) ? KEYERR_OUTOFBOUNDS : 0;
+ error = (error)?error:tmperror;
+ *key = entkeytxt;
+}
+
+
+void RawLD4::setEntry(const char *inbuf, long len) {
+ doSetText(*key, inbuf, len);
+}
+
+
+void RawLD4::linkEntry(const SWKey *inkey) {
+ doLinkEntry(*key, *inkey);
+}
+
+
+/******************************************************************************
+ * RawFiles::deleteEntry - deletes this entry
+ *
+ * RET: *this
+ */
+
+void RawLD4::deleteEntry() {
+ doSetText(*key, "");
+}
+
+SWORD_NAMESPACE_END
diff --git a/src/modules/lexdict/swld.cpp b/src/modules/lexdict/swld.cpp
new file mode 100644
index 0000000..518e5c0
--- /dev/null
+++ b/src/modules/lexdict/swld.cpp
@@ -0,0 +1,82 @@
+/******************************************************************************
+ * swld.cpp - code for base class 'SWLD'. SWLD is the basis for all
+ * types of Lexicon and Dictionary modules (hence the 'LD').
+ */
+
+#include <swld.h>
+#include <strkey.h>
+
+SWORD_NAMESPACE_START
+
+/******************************************************************************
+ * SWLD Constructor - Initializes data for instance of SWLD
+ *
+ * ENT: imodname - Internal name for module
+ * imoddesc - Name to display to user for module
+ * idisp - Display object to use for displaying
+ */
+
+SWLD::SWLD(const char *imodname, const char *imoddesc, SWDisplay *idisp, SWTextEncoding enc, SWTextDirection dir, SWTextMarkup mark, const char* ilang) : SWModule(imodname, imoddesc, idisp, (char *)"Lexicons / Dictionaries", enc, dir, mark, ilang)
+{
+ delete key;
+ key = CreateKey();
+ entkeytxt = new char [1];
+ *entkeytxt = 0;
+}
+
+
+/******************************************************************************
+ * SWLD Destructor - Cleans up instance of SWLD
+ */
+
+SWLD::~SWLD()
+{
+ if (entkeytxt)
+ delete [] entkeytxt;
+}
+
+
+SWKey *SWLD::CreateKey() { return new StrKey(); }
+
+
+/******************************************************************************
+ * SWLD::KeyText - Sets/gets module KeyText, getting from saved text if key is
+ * persistent
+ *
+ * ENT: ikeytext - value which to set keytext
+ * [0] - only get
+ *
+ * RET: pointer to keytext
+ */
+
+const char *SWLD::KeyText(const char *ikeytext)
+{
+ if (key->Persist() && !ikeytext) {
+ getRawEntryBuf(); // force module key to snap to entry
+ return entkeytxt;
+ }
+ else return SWModule::KeyText(ikeytext);
+}
+
+
+/******************************************************************************
+ * SWLD::setPosition(SW_POSITION) - Positions this key if applicable
+ */
+
+void SWLD::setPosition(SW_POSITION p) {
+ if (!key->isTraversable()) {
+ switch (p) {
+ case POS_TOP:
+ *key = "";
+ break;
+ case POS_BOTTOM:
+ *key = "zzzzzzzzz";
+ break;
+ }
+ }
+ else *key = p;
+ getRawEntryBuf();
+}
+
+
+SWORD_NAMESPACE_END
diff --git a/src/modules/lexdict/zld/Makefile b/src/modules/lexdict/zld/Makefile
new file mode 100644
index 0000000..35d6648
--- /dev/null
+++ b/src/modules/lexdict/zld/Makefile
@@ -0,0 +1,5 @@
+
+root := ../../../..
+
+all:
+ make -C ${root}
diff --git a/src/modules/lexdict/zld/Makefile.am b/src/modules/lexdict/zld/Makefile.am
new file mode 100644
index 0000000..81e4d7c
--- /dev/null
+++ b/src/modules/lexdict/zld/Makefile.am
@@ -0,0 +1,4 @@
+zlddir = $(top_srcdir)/src/modules/lexdict/zld
+
+libsword_la_SOURCES += $(zlddir)/zld.cpp
+
diff --git a/src/modules/lexdict/zld/zld.cpp b/src/modules/lexdict/zld/zld.cpp
new file mode 100644
index 0000000..bcb51ab
--- /dev/null
+++ b/src/modules/lexdict/zld/zld.cpp
@@ -0,0 +1,189 @@
+/******************************************************************************
+ * rawld.cpp - code for class 'RawLD'- a module that reads raw lexicon and
+ * dictionary files: *.dat *.idx
+ */
+
+
+#include <ctype.h>
+#include <stdio.h>
+#include <fcntl.h>
+
+#include <utilstr.h>
+#include <zstr.h>
+#include <zld.h>
+#include <filemgr.h>
+
+SWORD_NAMESPACE_START
+
+ /******************************************************************************
+ * RawLD Constructor - Initializes data for instance of RawLD
+ *
+ * ENT: ipath - path and filename of files (no extension)
+ * iname - Internal name for module
+ * idesc - Name to display to user for module
+ * idisp - Display object to use for displaying
+ */
+
+zLD::zLD(const char *ipath, const char *iname, const char *idesc, long blockCount, SWCompress *icomp, SWDisplay *idisp, SWTextEncoding enc, SWTextDirection dir, SWTextMarkup mark, const char* ilang) : zStr(ipath, -1, blockCount, icomp), SWLD(iname, idesc, idisp, enc, dir, mark, ilang) {
+
+}
+
+
+/******************************************************************************
+ * RawLD Destructor - Cleans up instance of RawLD
+ */
+
+zLD::~zLD() {
+
+}
+
+
+bool zLD::isWritable() {
+ return ((idxfd->getFd() > 0) && ((idxfd->mode & FileMgr::RDWR) == FileMgr::RDWR));
+}
+
+
+/******************************************************************************
+ * zLD::strongsPad - Pads a key if it is 100% digits to 5 places
+ *
+ * ENT: buf - buffer to check and pad
+ */
+
+void zLD::strongsPad(char *buf) {
+ char *check;
+ int size = 0;
+ int len = strlen(buf);
+ char subLet = 0;
+ bool bang = false;
+ if ((len < 8) && (len > 0)) {
+ for (check = buf; *(check+1); check++) {
+ if (!isdigit(*check))
+ break;
+ else size++;
+ }
+
+ if (size && ((size == (len-1)) || (size == (len-2)))) {
+ if (*check == '!') {
+ bang = true;
+ check++;
+ }
+ if (isalpha(*check)) {
+ subLet = toupper(*check);
+ *(check-(bang?1:0)) = 0;
+ }
+ sprintf(buf, "%.5d", atoi(buf));
+ if (subLet) {
+ check = buf+(strlen(buf));
+ if (bang) {
+ *check++ = '!';
+ }
+ *check++ = subLet;
+ *check = 0;
+ }
+ }
+ }
+}
+
+
+/******************************************************************************
+ * zLD::getEntry - Looks up entry from data file. 'Snaps' to closest
+ * entry and sets 'entrybuf'.
+ *
+ * ENT: away - number of entries offset from key (default = 0)
+ *
+ * RET: error status
+ */
+
+char zLD::getEntry(long away) {
+ char *idxbuf = 0;
+ char *ebuf = 0;
+ char retval = 0;
+ long index;
+ unsigned long size;
+ char *buf = new char [ strlen(*key) + 6 ];
+ strcpy(buf, *key);
+
+ strongsPad(buf);
+
+ entryBuf = "";
+ if (!(retval = findKeyIndex(buf, &index, away))) {
+ getText(index, &idxbuf, &ebuf);
+ size = strlen(ebuf) + 1;
+ entryBuf = ebuf;
+
+ rawFilter(entryBuf, key);
+
+ entrySize = size; // support getEntrySize call
+ if (!key->Persist()) // If we have our own key
+ *key = idxbuf; // reset it to entry index buffer
+
+ stdstr(&entkeytxt, idxbuf); // set entry key text that module 'snapped' to.
+ free(idxbuf);
+ free(ebuf);
+ }
+
+ delete [] buf;
+ return retval;
+}
+
+
+/******************************************************************************
+ * zLD::getRawEntry - Returns the correct entry when char * cast
+ * is requested
+ *
+ * RET: string buffer with entry
+ */
+
+SWBuf &zLD::getRawEntryBuf() {
+ if (!getEntry() /*&& !isUnicode()*/) {
+ prepText(entryBuf);
+ }
+
+ return entryBuf;
+}
+
+
+/******************************************************************************
+ * zLD::increment - Increments module key a number of entries
+ *
+ * ENT: increment - Number of entries to jump forward
+ *
+ * RET: *this
+ */
+
+void zLD::increment(int steps) {
+ char tmperror;
+
+ if (key->isTraversable()) {
+ *key += steps;
+ error = key->Error();
+ steps = 0;
+ }
+
+ tmperror = (getEntry(steps)) ? KEYERR_OUTOFBOUNDS : 0;
+ error = (error)?error:tmperror;
+ *key = entkeytxt;
+}
+
+
+void zLD::setEntry(const char *inbuf, long len) {
+ setText(*key, inbuf, len);
+}
+
+
+void zLD::linkEntry(const SWKey *inkey) {
+ zStr::linkEntry(*key, *inkey);
+}
+
+
+/******************************************************************************
+ * RawFiles::deleteEntry - deletes this entry
+ *
+ * RET: *this
+ */
+
+void zLD::deleteEntry() {
+ setText(*key, "");
+}
+
+SWORD_NAMESPACE_END
diff --git a/src/modules/readme b/src/modules/readme
new file mode 100644
index 0000000..92cc99e
--- /dev/null
+++ b/src/modules/readme
@@ -0,0 +1,9 @@
+This directory contains all different module types that are usable by the SWORD
+API.
+
+ comments Commentaries
+ common common utility objects
+ lexdict Lexicons/Dictionaries
+ maps Maps
+ parsers Language Parsers
+ texts Scripture Texts
diff --git a/src/modules/swmodule.cpp b/src/modules/swmodule.cpp
new file mode 100644
index 0000000..8461953
--- /dev/null
+++ b/src/modules/swmodule.cpp
@@ -0,0 +1,1285 @@
+/******************************************************************************
+ * swmodule.cpp -code for base class 'module'. Module is the basis for all
+ * types of modules (e.g. texts, commentaries, maps, lexicons,
+ * etc.)
+ */
+
+#include <vector>
+
+#include <sysdata.h>
+#include <swmodule.h>
+#include <utilstr.h>
+#include <regex.h> // GNU
+#include <swfilter.h>
+#include <versekey.h> // KLUDGE for Search
+#include <treekeyidx.h> // KLUDGE for Search
+#include <swoptfilter.h>
+#include <filemgr.h>
+#include <stringmgr.h>
+#ifndef _MSC_VER
+#include <iostream>
+#endif
+
+#ifdef USELUCENE
+#include <CLucene.h>
+#include <CLucene/CLBackwards.h>
+
+//Lucence includes
+//#include "CLucene.h"
+//#include "CLucene/util/Reader.h"
+//#include "CLucene/util/Misc.h"
+//#include "CLucene/util/dirent.h"
+
+using namespace lucene::index;
+using namespace lucene::analysis;
+using namespace lucene::util;
+using namespace lucene::store;
+using namespace lucene::document;
+using namespace lucene::queryParser;
+using namespace lucene::search;
+#endif
+
+using std::vector;
+
+SWORD_NAMESPACE_START
+
+SWDisplay SWModule::rawdisp;
+
+typedef std::list<SWBuf> StringList;
+
+/******************************************************************************
+ * SWModule Constructor - Initializes data for instance of SWModule
+ *
+ * ENT: imodname - Internal name for module
+ * imoddesc - Name to display to user for module
+ * idisp - Display object to use for displaying
+ * imodtype - Type of Module (All modules will be displayed with
+ * others of same type under their modtype heading
+ * unicode - if this module is unicode
+ */
+
+SWModule::SWModule(const char *imodname, const char *imoddesc, SWDisplay *idisp, char *imodtype, SWTextEncoding encoding, SWTextDirection direction, SWTextMarkup markup, const char* imodlang) {
+ key = CreateKey();
+ entryBuf = "";
+ config = &ownConfig;
+ modname = 0;
+ error = 0;
+ moddesc = 0;
+ modtype = 0;
+ modlang = 0;
+ this->encoding = encoding;
+ this->direction = direction;
+ this->markup = markup;
+ entrySize= -1;
+ disp = (idisp) ? idisp : &rawdisp;
+ stdstr(&modname, imodname);
+ stdstr(&moddesc, imoddesc);
+ stdstr(&modtype, imodtype);
+ stdstr(&modlang, imodlang);
+ stripFilters = new FilterList();
+ rawFilters = new FilterList();
+ renderFilters = new FilterList();
+ optionFilters = new OptionFilterList();
+ encodingFilters = new FilterList();
+ skipConsecutiveLinks = true;
+ procEntAttr = true;
+}
+
+
+/******************************************************************************
+ * SWModule Destructor - Cleans up instance of SWModule
+ */
+
+SWModule::~SWModule()
+{
+ if (modname)
+ delete [] modname;
+ if (moddesc)
+ delete [] moddesc;
+ if (modtype)
+ delete [] modtype;
+ if (modlang)
+ delete [] modlang;
+
+ if (key) {
+ if (!key->Persist())
+ delete key;
+ }
+
+ stripFilters->clear();
+ rawFilters->clear();
+ renderFilters->clear();
+ optionFilters->clear();
+ encodingFilters->clear();
+ entryAttributes.clear();
+
+ delete stripFilters;
+ delete rawFilters;
+ delete renderFilters;
+ delete optionFilters;
+ delete encodingFilters;
+}
+
+
+/******************************************************************************
+ * SWModule::CreateKey - Allocates a key of specific type for module
+ *
+ * RET: pointer to allocated key
+ */
+
+SWKey *SWModule::CreateKey()
+{
+ return new SWKey();
+}
+
+
+/******************************************************************************
+ * SWModule::Error - Gets and clears error status
+ *
+ * RET: error status
+ */
+
+char SWModule::Error()
+{
+ char retval = error;
+
+ error = 0;
+ return retval;
+}
+
+
+/******************************************************************************
+ * SWModule::Name - Sets/gets module name
+ *
+ * ENT: imodname - value which to set modname
+ * [0] - only get
+ *
+ * RET: pointer to modname
+ */
+
+char *SWModule::Name(const char *imodname) {
+ return stdstr(&modname, imodname);
+}
+
+char *SWModule::Name() const {
+ return modname;
+}
+
+
+/******************************************************************************
+ * SWModule::Description - Sets/gets module description
+ *
+ * ENT: imoddesc - value which to set moddesc
+ * [0] - only get
+ *
+ * RET: pointer to moddesc
+ */
+
+char *SWModule::Description(const char *imoddesc) {
+ return stdstr(&moddesc, imoddesc);
+}
+
+char *SWModule::Description() const {
+ return moddesc;
+}
+
+
+/******************************************************************************
+ * SWModule::Type - Sets/gets module type
+ *
+ * ENT: imodtype - value which to set modtype
+ * [0] - only get
+ *
+ * RET: pointer to modtype
+ */
+
+char *SWModule::Type(const char *imodtype) {
+ return stdstr(&modtype, imodtype);
+}
+
+char *SWModule::Type() const {
+ return modtype;
+}
+
+/******************************************************************************
+ * SWModule::Direction - Sets/gets module direction
+ *
+ * ENT: newdir - value which to set direction
+ * [-1] - only get
+ *
+ * RET: char direction
+ */
+char SWModule::Direction(signed char newdir) {
+ if (newdir != -1)
+ direction = newdir;
+ return direction;
+}
+
+/******************************************************************************
+ * SWModule::Encoding - Sets/gets module encoding
+ *
+ * ENT: newdir - value which to set direction
+ * [-1] - only get
+ *
+ * RET: char encoding
+ */
+char SWModule::Encoding(signed char newenc) {
+ if (newenc != -1)
+ encoding = newenc;
+ return encoding;
+}
+
+/******************************************************************************
+ * SWModule::Markup - Sets/gets module markup
+ *
+ * ENT: newdir - value which to set direction
+ * [-1] - only get
+ *
+ * RET: char markup
+ */
+char SWModule::Markup(signed char newmark) {
+ if (newmark != -1)
+ markup = newmark;
+ return markup;
+}
+
+
+/******************************************************************************
+ * SWModule::Lang - Sets/gets module language
+ *
+ * ENT: imodlang - value which to set modlang
+ * [0] - only get
+ *
+ * RET: pointer to modname
+ */
+
+char *SWModule::Lang(const char *imodlang)
+{
+ return stdstr(&modlang, imodlang);
+}
+
+
+/******************************************************************************
+ * SWModule::Disp - Sets/gets display driver
+ *
+ * ENT: idisp - value which to set disp
+ * [0] - only get
+ *
+ * RET: pointer to disp
+ */
+
+SWDisplay *SWModule::getDisplay() const {
+ return disp;
+}
+
+void SWModule::setDisplay(SWDisplay *idisp) {
+ disp = idisp;
+}
+
+
+/******************************************************************************
+ * SWModule::Display - Calls this modules display object and passes itself
+ *
+ * RET: error status
+ */
+
+char SWModule::Display() {
+ disp->Display(*this);
+ return 0;
+}
+
+
+/******************************************************************************
+ * SWModule::getKey - Gets the key from this module that points to the position
+ * record
+ *
+ * RET: key object
+ */
+
+SWKey *SWModule::getKey() const {
+ return key;
+}
+
+
+/******************************************************************************
+ * SWModule::setKey - Sets a key to this module for position to a particular
+ * record
+ *
+ * ENT: ikey - key with which to set this module
+ *
+ * RET: error status
+ */
+
+char SWModule::setKey(const SWKey *ikey) {
+ SWKey *oldKey = 0;
+
+ if (key) {
+ if (!key->Persist()) // if we have our own copy
+ oldKey = key;
+ }
+
+ if (!ikey->Persist()) { // if we are to keep our own copy
+ key = CreateKey();
+ *key = *ikey;
+ }
+ else key = (SWKey *)ikey; // if we are to just point to an external key
+
+ if (oldKey)
+ delete oldKey;
+
+ return 0;
+}
+
+
+/******************************************************************************
+ * SWModule::setPosition(SW_POSITION) - Positions this modules to an entry
+ *
+ * ENT: p - position (e.g. TOP, BOTTOM)
+ *
+ * RET: *this
+ */
+
+void SWModule::setPosition(SW_POSITION p) {
+ *key = p;
+ char saveError = key->Error();
+
+ switch (p) {
+ case POS_TOP:
+ (*this)++;
+ (*this)--;
+ break;
+
+ case POS_BOTTOM:
+ (*this)--;
+ (*this)++;
+ break;
+ }
+
+ error = saveError;
+}
+
+
+/******************************************************************************
+ * SWModule::increment - Increments module key a number of entries
+ *
+ * ENT: increment - Number of entries to jump forward
+ *
+ * RET: *this
+ */
+
+void SWModule::increment(int steps) {
+ (*key) += steps;
+ error = key->Error();
+}
+
+
+/******************************************************************************
+ * SWModule::decrement - Decrements module key a number of entries
+ *
+ * ENT: decrement - Number of entries to jump backward
+ *
+ * RET: *this
+ */
+
+void SWModule::decrement(int steps) {
+ (*key) -= steps;
+ error = key->Error();
+}
+
+
+/******************************************************************************
+ * SWModule::Search - Searches a module for a string
+ *
+ * ENT: istr - string for which to search
+ * searchType - type of search to perform
+ * >=0 - regex
+ * -1 - phrase
+ * -2 - multiword
+ * -3 - entryAttrib (eg. Word//Lemma/G1234/)
+ * -4 - clucene
+ * flags - options flags for search
+ * justCheckIfSupported - if set, don't search, only tell if this
+ * function supports requested search.
+ *
+ * RET: ListKey set to verses that contain istr
+ */
+
+ListKey &SWModule::search(const char *istr, int searchType, int flags, SWKey *scope, bool *justCheckIfSupported, void (*percent)(char, void *), void *percentUserData) {
+
+ listKey.ClearList();
+ SWBuf term = istr;
+
+#ifdef USELUCENE
+ SWBuf target = getConfigEntry("AbsoluteDataPath");
+ char ch = target.c_str()[strlen(target.c_str())-1];
+ if ((ch != '/') && (ch != '\\'))
+ target.append('/');
+ target.append("lucene");
+#endif
+ if (justCheckIfSupported) {
+ *justCheckIfSupported = (searchType >= -3);
+#ifdef USELUCENE
+ if ((searchType == -4) && (IndexReader::indexExists(target.c_str()))) {
+ *justCheckIfSupported = true;
+ }
+#endif
+ return listKey;
+ }
+
+ SWKey *saveKey = 0;
+ SWKey *searchKey = 0;
+ SWKey *resultKey = CreateKey();
+ regex_t preg;
+ vector<SWBuf> words;
+ const char *sres;
+ terminateSearch = false;
+ char perc = 1;
+ bool savePEA = isProcessEntryAttributes();
+ // determine if we might be doing special strip searches. useful for knowing if we can use shortcuts
+ bool specialStrips = (getConfigEntry("LocalStripFilter") || strchr(istr, '<'));
+
+ processEntryAttributes(searchType == -3);
+
+
+ if (!key->Persist()) {
+ saveKey = CreateKey();
+ *saveKey = *key;
+ }
+ else saveKey = key;
+
+ searchKey = (scope)?scope->clone():(key->Persist())?key->clone():0;
+ if (searchKey) {
+ searchKey->Persist(1);
+ setKey(*searchKey);
+ }
+
+ (*percent)(perc, percentUserData);
+ // MAJOR KLUDGE: VerseKey::Index still return index within testament.
+ // VerseKey::NewIndex should be moved to Index and Index should be some
+ // VerseKey specific name
+ VerseKey *vkcheck = 0;
+ SWTRY {
+ vkcheck = SWDYNAMIC_CAST(VerseKey, key);
+ }
+ SWCATCH (...) {}
+ // end MAJOR KLUDGE
+
+ *this = BOTTOM;
+ // fix below when we find out the bug
+ long highIndex = (vkcheck)?32300/*vkcheck->NewIndex()*/:key->Index();
+ if (!highIndex)
+ highIndex = 1; // avoid division by zero errors.
+ *this = TOP;
+ if (searchType >= 0) {
+ flags |=searchType|REG_NOSUB|REG_EXTENDED;
+ regcomp(&preg, istr, flags);
+ }
+
+ (*percent)(++perc, percentUserData);
+
+
+#ifdef USELUCENE
+ if (searchType == -4) { // lucene
+ //Buffers for the wchar<->utf8 char* conversion
+ const unsigned short int MAX_CONV_SIZE = 2047;
+ wchar_t wcharBuffer[MAX_CONV_SIZE + 1];
+ char utfBuffer[MAX_CONV_SIZE + 1];
+
+ lucene::index::IndexReader *ir = 0;
+ lucene::search::IndexSearcher *is = 0;
+ Query *q = 0;
+ Hits *h = 0;
+ SWTRY {
+ ir = IndexReader::open(target);
+ is = new IndexSearcher(ir);
+ (*percent)(10, percentUserData);
+
+ standard::StandardAnalyzer analyzer;
+ lucene_utf8towcs(wcharBuffer, istr, MAX_CONV_SIZE); //TODO Is istr always utf8?
+ q = QueryParser::parse(wcharBuffer, _T("content"), &analyzer);
+ (*percent)(20, percentUserData);
+ h = is->search(q);
+ (*percent)(80, percentUserData);
+
+ // iterate thru each good module position that meets the search
+ for (long i = 0; i < h->length(); i++) {
+ Document &doc = h->doc(i);
+
+ // set a temporary verse key to this module position
+ lucene_wcstoutf8(utfBuffer, doc.get(_T("key")), MAX_CONV_SIZE);
+ *resultKey = utfBuffer; //TODO Does a key always accept utf8?
+
+ // check to see if it sets ok (in our range?) and if so, add to our return list
+ *getKey() = *resultKey;
+ if (*getKey() == *resultKey) {
+ listKey << *resultKey;
+ listKey.GetElement()->userData = (void *)((__u32)(h->score(i)*100));
+ }
+ }
+ (*percent)(98, percentUserData);
+ }
+ SWCATCH (...) {
+ q = 0;
+ // invalid clucene query
+ }
+ delete h;
+ delete q;
+
+ delete is;
+ if (ir) {
+ ir->close();
+ }
+ }
+#endif
+
+ // some pre-loop processing
+ switch (searchType) {
+
+ // phrase
+ case -1:
+ // let's see if we're told to ignore case. If so, then we'll touppstr our term
+ if ((flags & REG_ICASE) == REG_ICASE) toupperstr(term);
+ break;
+
+ // multi-word
+ case -2:
+ // let's break the term down into our words vector
+ while (1) {
+ const char *word = term.stripPrefix(' ');
+ if (!word) {
+ words.push_back(term);
+ break;
+ }
+ words.push_back(word);
+ }
+ if ((flags & REG_ICASE) == REG_ICASE) {
+ for (unsigned int i = 0; i < words.size(); i++) {
+ toupperstr(words[i]);
+ }
+ }
+ break;
+
+ // entry attributes
+ case -3:
+ // let's break the attribute segs down. We'll reuse our words vector for each segment
+ while (1) {
+ const char *word = term.stripPrefix('/');
+ if (!word) {
+ words.push_back(term);
+ break;
+ }
+ words.push_back(word);
+ }
+ break;
+ }
+
+
+ // our main loop to iterate the module and find the stuff
+ perc = 5;
+ (*percent)(perc, percentUserData);
+
+
+ while ((searchType > -4) && !Error() && !terminateSearch) {
+ long mindex = 0;
+ if (vkcheck)
+ mindex = vkcheck->NewIndex();
+ else mindex = key->Index();
+ float per = (float)mindex / highIndex;
+ per *= 93;
+ per += 5;
+ char newperc = (char)per;
+// char newperc = (char)(5+(93*(((float)((vkcheck)?vkcheck->NewIndex():key->Index()))/highIndex)));
+ if (newperc > perc) {
+ perc = newperc;
+ (*percent)(perc, percentUserData);
+ }
+ else if (newperc < perc) {
+#ifndef _MSC_VER
+ std::cerr << "Serious error: new percentage complete is less than previous value\n";
+ std::cerr << "using vk? " << ((vkcheck)?"yes":"no") << "\n";
+ std::cerr << "index: " << ((vkcheck)?vkcheck->NewIndex():key->Index()) << "\n";
+ std::cerr << "highIndex: " << highIndex << "\n";
+ std::cerr << "newperc ==" << (int)newperc << "%" << "is smaller than\n";
+ std::cerr << "perc == " << (int )perc << "% \n";
+#endif
+ }
+ if (searchType >= 0) {
+ if (!regexec(&preg, StripText(), 0, 0, 0)) {
+ *resultKey = *getKey();
+ resultKey->clearBound();
+ listKey << *resultKey;
+ }
+ }
+
+ // phrase
+ else {
+ SWBuf textBuf;
+ switch (searchType) {
+
+ // phrase
+ case -1:
+ textBuf = StripText();
+ if ((flags & REG_ICASE) == REG_ICASE) toupperstr(textBuf);
+ sres = strstr(textBuf.c_str(), term.c_str());
+ if (sres) { //it's also in the StripText(), so we have a valid search result item now
+ *resultKey = *getKey();
+ resultKey->clearBound();
+ listKey << *resultKey;
+ }
+ break;
+
+ // multiword
+ case -2: { // enclose our allocations
+ int loopCount = 0;
+ unsigned int foundWords = 0;
+ do {
+ textBuf = ((loopCount == 0)&&(!specialStrips)) ? getRawEntry() : StripText();
+ foundWords = 0;
+
+ for (unsigned int i = 0; i < words.size(); i++) {
+ if ((flags & REG_ICASE) == REG_ICASE) toupperstr(textBuf);
+ sres = strstr(textBuf.c_str(), words[i].c_str());
+ if (!sres) {
+ break; //for loop
+ }
+ foundWords++;
+ }
+
+ loopCount++;
+ } while ( (loopCount < 2) && (foundWords == words.size()));
+
+ if ((loopCount == 2) && (foundWords == words.size())) { //we found the right words in both raw and stripped text, which means it's a valid result item
+ *resultKey = *getKey();
+ resultKey->clearBound();
+ listKey << *resultKey;
+ }
+ } break;
+
+ // entry attributes
+ case -3:
+ RenderText(); // force parse
+ AttributeTypeList &entryAttribs = getEntryAttributes();
+ AttributeTypeList::iterator i1Start, i1End;
+ AttributeList::iterator i2Start, i2End;
+ AttributeValue::iterator i3Start, i3End;
+
+ if ((words.size()) && (words[0].length())) {
+ i1Start = entryAttribs.find(words[0]);
+ i1End = i1Start;
+ if (i1End != entryAttribs.end())
+ i1End++;
+ }
+ else {
+ i1Start = entryAttribs.begin();
+ i1End = entryAttribs.end();
+ }
+ for (;i1Start != i1End; i1Start++) {
+ if ((words.size()>1) && (words[1].length())) {
+ i2Start = i1Start->second.find(words[1]);
+ i2End = i2Start;
+ if (i2End != i1Start->second.end())
+ i2End++;
+ }
+ else {
+ i2Start = i1Start->second.begin();
+ i2End = i1Start->second.end();
+ }
+ for (;i2Start != i2End; i2Start++) {
+ if ((words.size()>2) && (words[2].length())) {
+ i3Start = i2Start->second.find(words[2]);
+ i3End = i3Start;
+ if (i3End != i2Start->second.end())
+ i3End++;
+ }
+ else {
+ i3Start = i2Start->second.begin();
+ i3End = i2Start->second.end();
+ }
+ for (;i3Start != i3End; i3Start++) {
+ if ((words.size()>3) && (words[3].length())) {
+ if (flags & SEARCHFLAG_MATCHWHOLEENTRY) {
+ bool found = !(((flags & REG_ICASE) == REG_ICASE) ? sword::stricmp(i3Start->second.c_str(), words[3]) : strcmp(i3Start->second.c_str(), words[3]));
+ sres = (found) ? i3Start->second.c_str() : 0;
+ }
+ else {
+ sres = ((flags & REG_ICASE) == REG_ICASE) ? stristr(i3Start->second.c_str(), words[3]) : strstr(i3Start->second.c_str(), words[3]);
+ }
+ if (sres) {
+ *resultKey = *getKey();
+ resultKey->clearBound();
+ listKey << *resultKey;
+ break;
+ }
+ }
+ }
+ if (i3Start != i3End)
+ break;
+ }
+ if (i2Start != i2End)
+ break;
+ }
+ break;
+ } // end switch
+ }
+ (*this)++;
+ }
+
+
+ // cleaup work
+ if (searchType >= 0)
+ regfree(&preg);
+
+ setKey(*saveKey);
+
+ if (!saveKey->Persist())
+ delete saveKey;
+
+ if (searchKey)
+ delete searchKey;
+ delete resultKey;
+
+ listKey = TOP;
+ processEntryAttributes(savePEA);
+
+
+ (*percent)(100, percentUserData);
+
+
+ return listKey;
+}
+
+
+/******************************************************************************
+ * SWModule::StripText() - calls all stripfilters on current text
+ *
+ * ENT: buf - buf to massage instead of this modules current text
+ * len - max len of buf
+ *
+ * RET: this module's text at current key location massaged by Strip filters
+ */
+
+const char *SWModule::StripText(const char *buf, int len) {
+ return RenderText(buf, len, false);
+}
+
+
+/******************************************************************************
+ * SWModule::RenderText - calls all renderfilters on current text
+ *
+ * ENT: buf - buffer to Render instead of current module position
+ *
+ * RET: this module's text at current key location massaged by RenderText filters
+ */
+
+ const char *SWModule::RenderText(const char *buf, int len, bool render) {
+ entryAttributes.clear();
+
+ static SWBuf local;
+ if (buf)
+ local = buf;
+
+ SWBuf &tmpbuf = (buf) ? local : getRawEntryBuf();
+ SWKey *key = 0;
+ static const char *null = "";
+
+ if (tmpbuf) {
+ unsigned long size = (len < 0) ? ((getEntrySize()<0) ? strlen(tmpbuf) : getEntrySize()) : len;
+ if (size > 0) {
+ key = (SWKey *)*this;
+
+ optionFilter(tmpbuf, key);
+
+ if (render) {
+ renderFilter(tmpbuf, key);
+ encodingFilter(tmpbuf, key);
+ }
+ else stripFilter(tmpbuf, key);
+ }
+ }
+ else {
+ tmpbuf = null;
+ }
+
+ return tmpbuf;
+}
+
+
+/******************************************************************************
+ * SWModule::RenderText - calls all renderfilters on current text
+ *
+ * ENT: tmpKey - key to use to grab text
+ *
+ * RET: this module's text at current key location massaged by RenderFilers
+ */
+
+ const char *SWModule::RenderText(SWKey *tmpKey) {
+ SWKey *saveKey;
+ const char *retVal;
+
+ if (!key->Persist()) {
+ saveKey = CreateKey();
+ *saveKey = *key;
+ }
+ else saveKey = key;
+
+ setKey(*tmpKey);
+
+ retVal = RenderText();
+
+ setKey(*saveKey);
+
+ if (!saveKey->Persist())
+ delete saveKey;
+
+ return retVal;
+}
+
+
+/******************************************************************************
+ * SWModule::StripText - calls all StripTextFilters on current text
+ *
+ * ENT: tmpKey - key to use to grab text
+ *
+ * RET: this module's text at specified key location massaged by Strip filters
+ */
+
+const char *SWModule::StripText(SWKey *tmpKey) {
+ SWKey *saveKey;
+ const char *retVal;
+
+ if (!key->Persist()) {
+ saveKey = CreateKey();
+ *saveKey = *key;
+ }
+ else saveKey = key;
+
+ setKey(*tmpKey);
+
+ retVal = StripText();
+
+ setKey(*saveKey);
+
+ if (!saveKey->Persist())
+ delete saveKey;
+
+ return retVal;
+}
+
+
+const char *SWModule::getConfigEntry(const char *key) const {
+ ConfigEntMap::iterator it = config->find(key);
+ return (it != config->end()) ? it->second.c_str() : 0;
+}
+
+
+void SWModule::setConfig(ConfigEntMap *config) {
+ this->config = config;
+}
+
+
+bool SWModule::hasSearchFramework() {
+#ifdef USELUCENE
+ return true;
+#else
+ return SWSearchable::hasSearchFramework();
+#endif
+}
+
+void SWModule::deleteSearchFramework() {
+#ifdef USELUCENE
+ SWBuf target = getConfigEntry("AbsoluteDataPath");
+ char ch = target.c_str()[strlen(target.c_str())-1];
+ if ((ch != '/') && (ch != '\\'))
+ target.append('/');
+ target.append("lucene");
+
+ FileMgr::removeDir(target.c_str());
+#else
+ SWSearchable::deleteSearchFramework();
+#endif
+}
+
+
+signed char SWModule::createSearchFramework(void (*percent)(char, void *), void *percentUserData) {
+#ifdef USELUCENE
+ SWKey *saveKey = 0;
+ SWKey *searchKey = 0;
+ SWKey textkey;
+ SWBuf c;
+
+
+ // turn all filters to default values
+ StringList filterSettings;
+ for (OptionFilterList::iterator filter = optionFilters->begin(); filter != optionFilters->end(); filter++) {
+ filterSettings.push_back((*filter)->getOptionValue());
+ (*filter)->setOptionValue(*((*filter)->getOptionValues().begin()));
+
+ if (!strcmp("Greek Accents", (*filter)->getOptionName())) {
+ (*filter)->setOptionValue("Off");
+ }
+ }
+
+
+ // be sure we give CLucene enough file handles
+ FileMgr::getSystemFileMgr()->flush();
+
+ // save key information so as not to disrupt original
+ // module position
+ if (!key->Persist()) {
+ saveKey = CreateKey();
+ *saveKey = *key;
+ }
+ else saveKey = key;
+
+ searchKey = (key->Persist())?key->clone():0;
+ if (searchKey) {
+ searchKey->Persist(1);
+ setKey(*searchKey);
+ }
+
+ RAMDirectory *ramDir = NULL;
+ IndexWriter *coreWriter = NULL;
+ IndexWriter *fsWriter = NULL;
+ Directory *d = NULL;
+
+ standard::StandardAnalyzer *an = new standard::StandardAnalyzer();
+ SWBuf target = getConfigEntry("AbsoluteDataPath");
+ bool includeKeyInSearch = getConfig().has("SearchOption", "IncludeKeyInSearch");
+ char ch = target.c_str()[strlen(target.c_str())-1];
+ if ((ch != '/') && (ch != '\\'))
+ target.append('/');
+ target.append("lucene");
+ FileMgr::createParent(target+"/dummy");
+
+ ramDir = new RAMDirectory();
+ coreWriter = new IndexWriter(ramDir, an, true);
+
+
+
+ char perc = 1;
+ VerseKey *vkcheck = 0;
+ vkcheck = SWDYNAMIC_CAST(VerseKey, key);
+
+ TreeKeyIdx *tkcheck = 0;
+ tkcheck = SWDYNAMIC_CAST(TreeKeyIdx, key);
+
+
+ *this = BOTTOM;
+ long highIndex = (vkcheck)?32300/*vkcheck->NewIndex()*/:key->Index();
+ if (!highIndex)
+ highIndex = 1; // avoid division by zero errors.
+
+ bool savePEA = isProcessEntryAttributes();
+ processEntryAttributes(true);
+
+ // prox chapter blocks
+ // position module at the beginning
+ *this = TOP;
+
+ VerseKey chapMax;
+ SWBuf proxBuf;
+ SWBuf proxLem;
+ SWBuf strong;
+
+ const short int MAX_CONV_SIZE = 2047;
+ wchar_t wcharBuffer[MAX_CONV_SIZE + 1];
+
+ char err = Error();
+ while (!err) {
+ long mindex = 0;
+ if (vkcheck)
+ mindex = vkcheck->NewIndex();
+ else mindex = key->Index();
+
+ proxBuf = "";
+ proxLem = "";
+
+ // computer percent complete so we can report to our progress callback
+ float per = (float)mindex / highIndex;
+ // between 5%-98%
+ per *= 93; per += 5;
+ char newperc = (char)per;
+ if (newperc > perc) {
+ perc = newperc;
+ (*percent)(perc, percentUserData);
+ }
+
+ // get "content" field
+ const char *content = StripText();
+
+ bool good = false;
+
+ // start out entry
+ Document *doc = new Document();
+ // get "key" field
+ SWBuf keyText = (vkcheck) ? vkcheck->getOSISRef() : getKeyText();
+ if (content && *content) {
+ good = true;
+
+
+ // build "strong" field
+ AttributeTypeList::iterator words;
+ AttributeList::iterator word;
+ AttributeValue::iterator strongVal;
+
+ strong="";
+ words = getEntryAttributes().find("Word");
+ if (words != getEntryAttributes().end()) {
+ for (word = words->second.begin();word != words->second.end(); word++) {
+ int partCount = atoi(word->second["PartCount"]);
+ if (!partCount) partCount = 1;
+ for (int i = 0; i < partCount; i++) {
+ SWBuf tmp = "Lemma";
+ if (partCount > 1) tmp.appendFormatted(".%d", i+1);
+ strongVal = word->second.find(tmp);
+ if (strongVal != word->second.end()) {
+ // cheeze. skip empty article tags that weren't assigned to any text
+ if (strongVal->second == "G3588") {
+ if (word->second.find("Text") == word->second.end())
+ continue; // no text? let's skip
+ }
+ strong.append(strongVal->second);
+ strong.append(' ');
+ }
+ }
+ }
+ }
+
+ lucene_utf8towcs(wcharBuffer, keyText, MAX_CONV_SIZE); //keyText must be utf8
+ doc->add( *Field::Text(_T("key"), wcharBuffer ) );
+
+ if (includeKeyInSearch) {
+ c = keyText;
+ c += " ";
+ c += content;
+ content = c.c_str();
+ }
+
+ lucene_utf8towcs(wcharBuffer, content, MAX_CONV_SIZE); //content must be utf8
+ doc->add( *Field::UnStored(_T("content"), wcharBuffer) );
+
+ if (strong.length() > 0) {
+ lucene_utf8towcs(wcharBuffer, strong, MAX_CONV_SIZE);
+ doc->add( *Field::UnStored(_T("lemma"), wcharBuffer) );
+//printf("setting fields (%s).\ncontent: %s\nlemma: %s\n", (const char *)*key, content, strong.c_str());
+ }
+
+//printf("setting fields (%s).\n", (const char *)*key);
+//fflush(stdout);
+ }
+ // don't write yet, cuz we have to see if we're the first of a prox block (5:1 or chapter5/verse1
+
+ // for VerseKeys use chapter
+ if (vkcheck) {
+ chapMax = *vkcheck;
+ // we're the first verse in a chapter
+ if (vkcheck->Verse() == 1) {
+ chapMax = MAXVERSE;
+ VerseKey saveKey = *vkcheck;
+ while ((!err) && (*vkcheck <= chapMax)) {
+//printf("building proxBuf from (%s).\nproxBuf.c_str(): %s\n", (const char *)*key, proxBuf.c_str());
+//printf("building proxBuf from (%s).\n", (const char *)*key);
+
+ content = StripText();
+ if (content && *content) {
+ // build "strong" field
+ strong = "";
+ AttributeTypeList::iterator words;
+ AttributeList::iterator word;
+ AttributeValue::iterator strongVal;
+
+ words = getEntryAttributes().find("Word");
+ if (words != getEntryAttributes().end()) {
+ for (word = words->second.begin();word != words->second.end(); word++) {
+ int partCount = atoi(word->second["PartCount"]);
+ if (!partCount) partCount = 1;
+ for (int i = 0; i < partCount; i++) {
+ SWBuf tmp = "Lemma";
+ if (partCount > 1) tmp.appendFormatted(".%d", i+1);
+ strongVal = word->second.find(tmp);
+ if (strongVal != word->second.end()) {
+ // cheeze. skip empty article tags that weren't assigned to any text
+ if (strongVal->second == "G3588") {
+ if (word->second.find("Text") == word->second.end())
+ continue; // no text? let's skip
+ }
+ strong.append(strongVal->second);
+ strong.append(' ');
+ }
+ }
+ }
+ }
+ proxBuf += content;
+ proxBuf.append(' ');
+ proxLem += strong;
+ if (proxLem.length())
+ proxLem.append("\n");
+ }
+ (*this)++;
+ err = Error();
+ }
+ err = 0;
+ *vkcheck = saveKey;
+ }
+ }
+
+ // for TreeKeys use siblings if we have no children
+ else if (tkcheck) {
+ if (!tkcheck->hasChildren()) {
+ if (!tkcheck->previousSibling()) {
+ do {
+//printf("building proxBuf from (%s).\n", (const char *)*key);
+//fflush(stdout);
+
+ content = StripText();
+ if (content && *content) {
+ // build "strong" field
+ strong = "";
+ AttributeTypeList::iterator words;
+ AttributeList::iterator word;
+ AttributeValue::iterator strongVal;
+
+ words = getEntryAttributes().find("Word");
+ if (words != getEntryAttributes().end()) {
+ for (word = words->second.begin();word != words->second.end(); word++) {
+ int partCount = atoi(word->second["PartCount"]);
+ if (!partCount) partCount = 1;
+ for (int i = 0; i < partCount; i++) {
+ SWBuf tmp = "Lemma";
+ if (partCount > 1) tmp.appendFormatted(".%d", i+1);
+ strongVal = word->second.find(tmp);
+ if (strongVal != word->second.end()) {
+ // cheeze. skip empty article tags that weren't assigned to any text
+ if (strongVal->second == "G3588") {
+ if (word->second.find("Text") == word->second.end())
+ continue; // no text? let's skip
+ }
+ strong.append(strongVal->second);
+ strong.append(' ');
+ }
+ }
+ }
+ }
+
+ proxBuf += content;
+ proxBuf.append(' ');
+ proxLem += strong;
+ if (proxLem.length())
+ proxLem.append("\n");
+ }
+ } while (tkcheck->nextSibling());
+ tkcheck->parent();
+ tkcheck->firstChild();
+ }
+ else tkcheck->nextSibling(); // reposition from our previousSibling test
+ }
+ }
+
+ if (proxBuf.length() > 0) {
+
+ lucene_utf8towcs(wcharBuffer, proxBuf, MAX_CONV_SIZE); //keyText must be utf8
+
+//printf("proxBuf after (%s).\nprox: %s\nproxLem: %s\n", (const char *)*key, proxBuf.c_str(), proxLem.c_str());
+
+ doc->add( *Field::UnStored(_T("prox"), wcharBuffer) );
+ good = true;
+ }
+ if (proxLem.length() > 0) {
+ lucene_utf8towcs(wcharBuffer, proxLem, MAX_CONV_SIZE); //keyText must be utf8
+ doc->add( *Field::UnStored(_T("proxlem"), wcharBuffer) );
+ good = true;
+ }
+ if (good) {
+//printf("writing (%s).\n", (const char *)*key);
+//fflush(stdout);
+ coreWriter->addDocument(doc);
+ }
+ delete doc;
+
+ (*this)++;
+ err = Error();
+ }
+
+ // Optimizing automatically happens with the call to addIndexes
+ //coreWriter->optimize();
+ coreWriter->close();
+
+ if (IndexReader::indexExists(target.c_str())) {
+ d = FSDirectory::getDirectory(target.c_str(), false);
+ if (IndexReader::isLocked(d)) {
+ IndexReader::unlock(d);
+ }
+
+ fsWriter = new IndexWriter( d, an, false);
+ } else {
+ d = FSDirectory::getDirectory(target.c_str(), true);
+ fsWriter = new IndexWriter( d ,an, true);
+ }
+
+ Directory *dirs[] = { ramDir, 0 };
+ fsWriter->addIndexes(dirs);
+ fsWriter->close();
+
+ delete ramDir;
+ delete coreWriter;
+ delete fsWriter;
+ delete an;
+
+ // reposition module back to where it was before we were called
+ setKey(*saveKey);
+
+ if (!saveKey->Persist())
+ delete saveKey;
+
+ if (searchKey)
+ delete searchKey;
+
+ processEntryAttributes(savePEA);
+
+ // reset option filters back to original values
+ StringList::iterator origVal = filterSettings.begin();
+ for (OptionFilterList::iterator filter = optionFilters->begin(); filter != optionFilters->end(); filter++) {
+ (*filter)->setOptionValue(*origVal++);
+ }
+
+ return 0;
+#else
+ return SWSearchable::createSearchFramework(percent, percentUserData);
+#endif
+}
+
+/** OptionFilterBuffer a text buffer
+ * @param filters the FilterList of filters to iterate
+ * @param buf the buffer to filter
+ * @param key key location from where this buffer was extracted
+ */
+void SWModule::filterBuffer(OptionFilterList *filters, SWBuf &buf, SWKey *key) {
+ OptionFilterList::iterator it;
+ for (it = filters->begin(); it != filters->end(); it++) {
+ (*it)->processText(buf, key, this);
+ }
+}
+
+/** FilterBuffer a text buffer
+ * @param filters the FilterList of filters to iterate
+ * @param buf the buffer to filter
+ * @param key key location from where this buffer was extracted
+ */
+void SWModule::filterBuffer(FilterList *filters, SWBuf &buf, SWKey *key) {
+ FilterList::iterator it;
+ for (it = filters->begin(); it != filters->end(); it++) {
+ (*it)->processText(buf, key, this);
+ }
+}
+
+signed char SWModule::createModule(const char*) {
+ return -1;
+}
+
+void SWModule::setEntry(const char*, long) {
+}
+
+void SWModule::linkEntry(const SWKey*) {
+}
+
+SWORD_NAMESPACE_END
diff --git a/src/modules/tests/Makefile b/src/modules/tests/Makefile
new file mode 100644
index 0000000..81f7721
--- /dev/null
+++ b/src/modules/tests/Makefile
@@ -0,0 +1,4 @@
+root := ../../..
+
+all:
+ make -C ${root}
diff --git a/src/modules/tests/echomod.cpp b/src/modules/tests/echomod.cpp
new file mode 100644
index 0000000..2640773
--- /dev/null
+++ b/src/modules/tests/echomod.cpp
@@ -0,0 +1,27 @@
+/******************************************************************************
+ * echomod.cpp - code for class 'echomod'- a dummy test text module that just
+ * echos back the key
+ */
+
+#include <echomod.h>
+
+
+SWORD_NAMESPACE_START
+
+EchoMod::EchoMod() : SWText("echomod", "Echos back key")
+{
+}
+
+
+EchoMod::~EchoMod() {
+}
+
+
+SWBuf &EchoMod::getRawEntryBuf()
+{
+ static SWBuf retVal;
+ retVal = *key;
+ return retVal;
+}
+
+SWORD_NAMESPACE_END
diff --git a/src/modules/texts/Makefile b/src/modules/texts/Makefile
new file mode 100644
index 0000000..1a2d00d
--- /dev/null
+++ b/src/modules/texts/Makefile
@@ -0,0 +1,5 @@
+
+root := ../../..
+
+all:
+ make -C ${root}
diff --git a/src/modules/texts/Makefile.am b/src/modules/texts/Makefile.am
new file mode 100644
index 0000000..f5d81b4
--- /dev/null
+++ b/src/modules/texts/Makefile.am
@@ -0,0 +1,7 @@
+textsdir = $(top_srcdir)/src/modules/texts
+
+libsword_la_SOURCES += $(textsdir)/swtext.cpp
+
+include ../src/modules/texts/rawtext/Makefile.am
+include ../src/modules/texts/rawtext4/Makefile.am
+include ../src/modules/texts/ztext/Makefile.am
diff --git a/src/modules/texts/rawtext/Makefile b/src/modules/texts/rawtext/Makefile
new file mode 100644
index 0000000..35d6648
--- /dev/null
+++ b/src/modules/texts/rawtext/Makefile
@@ -0,0 +1,5 @@
+
+root := ../../../..
+
+all:
+ make -C ${root}
diff --git a/src/modules/texts/rawtext/Makefile.am b/src/modules/texts/rawtext/Makefile.am
new file mode 100644
index 0000000..5d77e44
--- /dev/null
+++ b/src/modules/texts/rawtext/Makefile.am
@@ -0,0 +1,2 @@
+rawtextdir = $(top_srcdir)/src/modules/texts/rawtext
+libsword_la_SOURCES += $(rawtextdir)/rawtext.cpp
diff --git a/src/modules/texts/rawtext/rawtext.cpp b/src/modules/texts/rawtext/rawtext.cpp
new file mode 100644
index 0000000..3245da6
--- /dev/null
+++ b/src/modules/texts/rawtext/rawtext.cpp
@@ -0,0 +1,548 @@
+/******************************************************************************
+ * rawtext.cpp - code for class 'RawText'- a module that reads raw text files:
+ * ot and nt using indexs ??.bks ??.cps ??.vss
+ */
+
+
+#include <stdio.h>
+#include <fcntl.h>
+#include <sysdata.h>
+
+#include <utilstr.h>
+#include <rawverse.h>
+#include <rawtext.h>
+#include <rawstr.h>
+#include <filemgr.h>
+#include <versekey.h>
+#include <stringmgr.h>
+
+#include <regex.h> // GNU
+#include <map>
+#include <list>
+#include <algorithm>
+
+#ifndef USELUCENE
+using std::map;
+using std::list;
+using std::find;
+#endif
+
+SWORD_NAMESPACE_START
+
+#ifndef USELUCENE
+typedef map < SWBuf, list<long> > strlist;
+typedef list<long> longlist;
+#endif
+
+/******************************************************************************
+ * RawText Constructor - Initializes data for instance of RawText
+ *
+ * ENT: iname - Internal name for module
+ * idesc - Name to display to user for module
+ * idisp - Display object to use for displaying
+ */
+
+RawText::RawText(const char *ipath, const char *iname, const char *idesc, SWDisplay *idisp, SWTextEncoding enc, SWTextDirection dir, SWTextMarkup mark, const char* ilang)
+ : SWText(iname, idesc, idisp, enc, dir, mark, ilang),
+ RawVerse(ipath) {
+
+#ifndef USELUCENE
+ SWBuf fname;
+ fname = path;
+ char ch = fname.c_str()[strlen(fname.c_str())-1];
+ if ((ch != '/') && (ch != '\\'))
+ fname += "/";
+
+ for (int loop = 0; loop < 2; loop++) {
+ fastSearch[loop] = 0;
+ SWBuf fastidxname =(fname + ((loop)?"ntwords.dat":"otwords.dat"));
+ if (FileMgr::existsFile(fastidxname.c_str())) {
+ fastidxname = (fname + ((loop)?"ntwords.idx":"otwords.idx"));
+ if (FileMgr::existsFile(fastidxname.c_str()))
+ fastSearch[loop] = new RawStr((fname + ((loop)?"ntwords":"otwords")).c_str());
+ }
+ }
+#endif
+}
+
+
+/******************************************************************************
+ * RawText Destructor - Cleans up instance of RawText
+ */
+
+RawText::~RawText() {
+#ifndef USELUCENE
+ if (fastSearch[0])
+ delete fastSearch[0];
+
+ if (fastSearch[1])
+ delete fastSearch[1];
+#endif
+}
+
+
+bool RawText::isWritable() {
+ return ((idxfp[0]->getFd() > 0) && ((idxfp[0]->mode & FileMgr::RDWR) == FileMgr::RDWR));
+}
+
+
+/******************************************************************************
+ * RawText::getRawEntry - Returns the correct verse when char * cast
+ * is requested
+ *
+ * RET: string buffer with verse
+ */
+
+SWBuf &RawText::getRawEntryBuf() {
+ long start = 0;
+ unsigned short size = 0;
+ VerseKey &key = getVerseKey();
+
+ findOffset(key.Testament(), key.Index(), &start, &size);
+ entrySize = size; // support getEntrySize call
+
+ entryBuf = "";
+ readText(key.Testament(), start, size, entryBuf);
+
+ rawFilter(entryBuf, 0); // hack, decipher
+ rawFilter(entryBuf, &key);
+
+// if (!isUnicode())
+ prepText(entryBuf);
+
+ return entryBuf;
+}
+
+
+signed char RawText::createSearchFramework(void (*percent)(char, void *), void *percentUserData) {
+#ifndef USELUCENE
+ SWKey *savekey = 0;
+ SWKey *searchkey = 0;
+ SWKey textkey;
+ char *word = 0;
+ char *wordBuf = 0;
+
+ // dictionary holds words associated with a list
+ // containing every module position that contains
+ // the word. [0] Old Testament; [1] NT
+ map < SWBuf, list<long> > dictionary[2];
+
+
+ // save key information so as not to disrupt original
+ // module position
+ if (!key->Persist()) {
+ savekey = CreateKey();
+ *savekey = *key;
+ }
+ else savekey = key;
+
+ searchkey = (key->Persist())?key->clone():0;
+ if (searchkey) {
+ searchkey->Persist(1);
+ setKey(*searchkey);
+ }
+
+ // position module at the beginning
+ *this = TOP;
+
+ VerseKey *lkey = (VerseKey *)key;
+
+ // iterate thru each entry in module
+ while (!Error()) {
+ long index = lkey->Index();
+ wordBuf = (char *)calloc(sizeof(char), strlen(StripText()) + 1);
+ strcpy(wordBuf, StripText());
+
+ // grab each word from the text
+ word = strtok(wordBuf, " !.,?;:()-=+/\\|{}[]\"<>");
+ while (word) {
+
+ // make word upper case
+ toupperstr(word);
+
+ // lookup word in dictionary (or make entry in dictionary
+ // for this word) and add this module position (index) to
+ // the word's associated list of module positions
+ dictionary[lkey->Testament()-1][word].push_back(index);
+ word = strtok(NULL, " !.,?;:()-=+/\\|{}[]\"<>");
+ }
+ free(wordBuf);
+ (*this)++;
+ }
+
+ // reposition module back to where it was before we were called
+ setKey(*savekey);
+
+ if (!savekey->Persist())
+ delete savekey;
+
+ if (searchkey)
+ delete searchkey;
+
+
+ // --------- Let's output an index from our dictionary -----------
+ FileDesc *datfd;
+ FileDesc *idxfd;
+ strlist::iterator it;
+ longlist::iterator it2;
+ unsigned long offset, entryoff;
+ unsigned short size;
+
+ SWBuf fname;
+ fname = path;
+ char ch = fname.c_str()[strlen(fname.c_str())-1];
+ if ((ch != '/') && (ch != '\\'))
+ fname += "/";
+
+ // for old and new testament do...
+ for (int loop = 0; loop < 2; loop++) {
+ datfd = FileMgr::getSystemFileMgr()->open((fname + ((loop)?"ntwords.dat":"otwords.dat")).c_str(), FileMgr::CREAT|FileMgr::WRONLY, 00644);
+ if (datfd->getFd() == -1)
+ return -1;
+ idxfd = FileMgr::getSystemFileMgr()->open((fname + ((loop)?"ntwords.idx":"otwords.idx")).c_str(), FileMgr::CREAT|FileMgr::WRONLY, 00644);
+ if (idxfd->getFd() == -1) {
+ FileMgr::getSystemFileMgr()->close(datfd);
+ return -1;
+ }
+
+ // iterate thru each word in the dictionary
+ for (it = dictionary[loop].begin(); it != dictionary[loop].end(); it++) {
+ printf("%s: ", it->first.c_str());
+
+ // get our current offset in our word.dat file and write this as the start
+ // of the next entry in our database
+ offset = datfd->seek(0, SEEK_CUR);
+ idxfd->write(&offset, 4);
+
+ // write our word out to the word.dat file, delineating with a \n
+ datfd->write(it->first.c_str(), strlen(it->first.c_str()));
+ datfd->write("\n", 1);
+
+ // force our mod position list for this word to be unique (remove
+ // duplicates that may exist if the word was found more than once
+ // in the verse
+ it->second.unique();
+
+ // iterate thru each mod position for this word and output it to
+ // our database
+ unsigned short count = 0;
+ for (it2 = it->second.begin(); it2 != it->second.end(); it2++) {
+ entryoff= *it2;
+ datfd->write(&entryoff, 4);
+ count++;
+ }
+
+ // now see what our new position is in our word.dat file and
+ // determine the size of this database entry
+ size = datfd->seek(0, SEEK_CUR) - offset;
+
+ // store the size of this database entry
+ idxfd->write(&size, 2);
+ printf("%d entries (size: %d)\n", count, size);
+ }
+ FileMgr::getSystemFileMgr()->close(datfd);
+ FileMgr::getSystemFileMgr()->close(idxfd);
+ }
+ return 0;
+#else
+ return SWModule::createSearchFramework(percent, percentUserData);
+#endif
+}
+
+
+void RawText::deleteSearchFramework() {
+#ifndef USELUCENE
+ SWBuf target = path;
+ char ch = target.c_str()[strlen(target.c_str())-1];
+ if ((ch != '/') && (ch != '\\'))
+ target += "/lucene";
+ FileMgr::removeFile(target + "ntwords.dat");
+ FileMgr::removeFile(target + "otwords.dat");
+ FileMgr::removeFile(target + "ntwords.idx");
+ FileMgr::removeFile(target + "otwords.idx");
+#else
+ SWModule::deleteSearchFramework();
+#endif
+}
+
+
+/******************************************************************************
+ * SWModule::search - Searches a module for a string
+ *
+ * ENT: istr - string for which to search
+ * searchType - type of search to perform
+ * >=0 - regex
+ * -1 - phrase
+ * -2 - multiword
+ * flags - options flags for search
+ * justCheckIfSupported - if set, don't search, only tell if this
+ * function supports requested search.
+ *
+ * RET: ListKey set to verses that contain istr
+ */
+
+ListKey &RawText::search(const char *istr, int searchType, int flags, SWKey *scope, bool *justCheckIfSupported, void (*percent)(char, void *), void *percentUserData) {
+#ifndef USELUCENE
+ listKey.ClearList();
+
+ if ((fastSearch[0]) && (fastSearch[1])) {
+
+ switch (searchType) {
+ case -2: {
+
+ if ((flags & REG_ICASE) != REG_ICASE) // if haven't chosen to
+ // ignore case
+ break; // can't handle fast case sensitive searches
+
+ // test to see if our scope for this search is bounded by a
+ // VerseKey
+ VerseKey *testKeyType = 0;
+ SWTRY {
+ testKeyType = SWDYNAMIC_CAST(VerseKey, ((scope)?scope:key));
+ }
+ SWCATCH ( ... ) {}
+ // if we don't have a VerseKey * decendant we can't handle
+ // because of scope.
+ // In the future, add bool SWKey::isValid(const char *tryString);
+ if (!testKeyType)
+ break;
+
+
+ // check if we just want to see if search is supported.
+ // If we've gotten this far, then it is supported.
+ if (justCheckIfSupported) {
+ *justCheckIfSupported = true;
+ return listKey;
+ }
+
+ SWKey saveKey = *testKeyType; // save current place
+
+ char error = 0;
+ char **words = 0;
+ char *wordBuf = 0;
+ int wordCount = 0;
+ long start;
+ unsigned short size;
+ char *idxbuf = 0;
+ SWBuf datBuf;
+ list <long> indexes;
+ list <long> indexes2;
+ VerseKey vk;
+ vk = TOP;
+
+ (*percent)(10, percentUserData);
+
+ // toupper our copy of search string
+ stdstr(&wordBuf, istr);
+ toupperstr(wordBuf);
+
+ // get list of individual words
+ words = (char **)calloc(sizeof(char *), 10);
+ int allocWords = 10;
+ words[wordCount] = strtok(wordBuf, " ");
+ while (words[wordCount]) {
+ wordCount++;
+ if (wordCount == allocWords) {
+ allocWords+=10;
+ words = (char **)realloc(words, sizeof(char *)*allocWords);
+ }
+ words[wordCount] = strtok(NULL, " ");
+ }
+
+ (*percent)(20, percentUserData);
+
+ // clear our result set
+ indexes.erase(indexes.begin(), indexes.end());
+
+ // search both old and new testament indexes
+ for (int j = 0; j < 2; j++) {
+ // iterate thru each word the user passed to us.
+ for (int i = 0; i < wordCount; i++) {
+
+ // clear this word's result set
+ indexes2.erase(indexes2.begin(), indexes2.end());
+ error = 0;
+
+ // iterate thru every word in the database that starts
+ // with our search word
+ for (int away = 0; !error; away++) {
+ idxbuf = 0;
+
+ // find our word in the database and jump ahead _away_
+ error = fastSearch[j]->findOffset(words[i], &start, &size, away);
+
+ // get the word from the database
+ fastSearch[j]->getIDXBufDat(start, &idxbuf);
+
+ // check to see if it starts with our target word
+ if (strlen(idxbuf) > strlen(words[i]))
+ idxbuf[strlen(words[i])] = 0;
+// else words[i][strlen(idxbuf)] = 0;
+ if (!strcmp(idxbuf, words[i])) {
+
+ // get data for this word from database
+ delete [] idxbuf;
+ idxbuf = 0;
+ datBuf = "";
+ fastSearch[j]->readText(start, &size, &idxbuf, datBuf);
+
+ // we know that the data consists of sizof(long)
+ // records each a valid module position that constains
+ // this word
+ //
+ // iterate thru each of these module positions
+ long *keyindex = (long *)datBuf.getRawData();
+ while (keyindex < (long *)(datBuf.getRawData() + size - (strlen(idxbuf) + 1))) {
+ if (i) { // if we're not on our first word
+
+ // check to see if this word is already in the result set.
+ // This is our AND functionality
+ if (find(indexes.begin(), indexes.end(), *keyindex) != indexes.end())
+ // add to new result set
+ indexes2.push_back(*keyindex);
+ }
+ else indexes2.push_back(*keyindex);
+ keyindex++;
+ }
+ }
+ else error = 1; // no more matches
+ free(idxbuf);
+ }
+
+ // make new result set final result set
+ indexes = indexes2;
+
+ percent((char)(20 + (float)((j*wordCount)+i)/(wordCount * 2) * 78), percentUserData);
+ }
+
+ // indexes contains our good verses, lets return them in a listKey
+ indexes.sort();
+
+ // iterate thru each good module position that meets the search
+ for (longlist::iterator it = indexes.begin(); it != indexes.end(); it++) {
+
+ // set a temporary verse key to this module position
+ vk.Testament(j+1);
+ vk.Error();
+ vk.Index(*it);
+
+ // check scope
+ // Try to set our scope key to this verse key
+ if (scope) {
+ *testKeyType = vk;
+
+ // check to see if it set ok and if so, add to our return list
+ if (*testKeyType == vk)
+ listKey << (const char *) vk;
+ }
+ else listKey << (const char*) vk;
+ }
+ }
+ (*percent)(98, percentUserData);
+
+ free(words);
+ free(wordBuf);
+
+ *testKeyType = saveKey; // set current place back to original
+
+ listKey = TOP;
+ (*percent)(100, percentUserData);
+ return listKey;
+ }
+
+ default:
+ break;
+ }
+ }
+
+ // check if we just want to see if search is supported
+ if (justCheckIfSupported) {
+ *justCheckIfSupported = false;
+ return listKey;
+ }
+
+#endif
+ // if we don't support this search, fall back to base class
+ return SWModule::search(istr, searchType, flags, scope, justCheckIfSupported, percent, percentUserData);
+}
+
+
+void RawText::setEntry(const char *inbuf, long len) {
+ VerseKey &key = getVerseKey();
+ doSetText(key.Testament(), key.Index(), inbuf, len);
+}
+
+
+void RawText::linkEntry(const SWKey *inkey) {
+ VerseKey &destkey = getVerseKey();
+ const VerseKey *srckey = 0;
+
+ // see if we have a VerseKey * or decendant
+ SWTRY {
+ srckey = SWDYNAMIC_CAST(VerseKey, inkey);
+ }
+ SWCATCH ( ... ) {}
+ // if we don't have a VerseKey * decendant, create our own
+ if (!srckey)
+ srckey = new VerseKey(inkey);
+
+ doLinkEntry(destkey.Testament(), destkey.Index(), srckey->Index());
+
+ if (inkey != srckey) // free our key if we created a VerseKey
+ delete srckey;
+}
+
+
+/******************************************************************************
+ * RawText::deleteEntry - deletes this entry
+ *
+ * RET: *this
+ */
+
+void RawText::deleteEntry() {
+ VerseKey &key = getVerseKey();
+ doSetText(key.Testament(), key.Index(), "");
+}
+
+/******************************************************************************
+ * RawText::increment - Increments module key a number of entries
+ *
+ * ENT: increment - Number of entries to jump forward
+ *
+ * RET: *this
+ */
+
+void RawText::increment(int steps) {
+ long start;
+ unsigned short size;
+ VerseKey *tmpkey = &getVerseKey();
+
+ findOffset(tmpkey->Testament(), tmpkey->Index(), &start, &size);
+
+ SWKey lastgood = *tmpkey;
+ while (steps) {
+ long laststart = start;
+ unsigned short lastsize = size;
+ SWKey lasttry = *tmpkey;
+ (steps > 0) ? (*key)++ : (*key)--;
+ tmpkey = &getVerseKey();
+
+ if ((error = key->Error())) {
+ *key = lastgood;
+ break;
+ }
+ long index = tmpkey->Index();
+ findOffset(tmpkey->Testament(), index, &start, &size);
+ if (
+ (((laststart != start) || (lastsize != size)) // we're a different entry
+// && (start > 0)
+ && (size)) // and we actually have a size
+ ||(!skipConsecutiveLinks)) { // or we don't want to skip consecutive links
+ steps += (steps < 0) ? 1 : -1;
+ lastgood = *tmpkey;
+ }
+ }
+ error = (error) ? KEYERR_OUTOFBOUNDS : 0;
+}
+
+SWORD_NAMESPACE_END
diff --git a/src/modules/texts/rawtext4/Makefile.am b/src/modules/texts/rawtext4/Makefile.am
new file mode 100644
index 0000000..1f3791d
--- /dev/null
+++ b/src/modules/texts/rawtext4/Makefile.am
@@ -0,0 +1,2 @@
+rawtext4dir = $(top_srcdir)/src/modules/texts/rawtext4
+libsword_la_SOURCES += $(rawtext4dir)/rawtext4.cpp
diff --git a/src/modules/texts/rawtext4/rawtext4.cpp b/src/modules/texts/rawtext4/rawtext4.cpp
new file mode 100644
index 0000000..a06691e
--- /dev/null
+++ b/src/modules/texts/rawtext4/rawtext4.cpp
@@ -0,0 +1,548 @@
+/******************************************************************************
+ * rawtext4.cpp - code for class 'RawText4'- a module that reads raw text files:
+ * ot and nt using indexs ??.bks ??.cps ??.vss
+ */
+
+
+#include <stdio.h>
+#include <fcntl.h>
+#include <sysdata.h>
+
+#include <utilstr.h>
+#include <rawverse4.h>
+#include <rawtext4.h>
+#include <rawstr4.h>
+#include <filemgr.h>
+#include <versekey.h>
+#include <stringmgr.h>
+
+#include <regex.h> // GNU
+#include <map>
+#include <list>
+#include <algorithm>
+
+#ifndef USELUCENE
+using std::map;
+using std::list;
+using std::find;
+#endif
+
+SWORD_NAMESPACE_START
+
+#ifndef USELUCENE
+typedef map < SWBuf, list<long> > strlist;
+typedef list<long> longlist;
+#endif
+
+/******************************************************************************
+ * RawText4 Constructor - Initializes data for instance of RawText4
+ *
+ * ENT: iname - Internal name for module
+ * idesc - Name to display to user for module
+ * idisp - Display object to use for displaying
+ */
+
+RawText4::RawText4(const char *ipath, const char *iname, const char *idesc, SWDisplay *idisp, SWTextEncoding enc, SWTextDirection dir, SWTextMarkup mark, const char* ilang)
+ : SWText(iname, idesc, idisp, enc, dir, mark, ilang),
+ RawVerse4(ipath) {
+
+#ifndef USELUCENE
+ SWBuf fname;
+ fname = path;
+ char ch = fname.c_str()[strlen(fname.c_str())-1];
+ if ((ch != '/') && (ch != '\\'))
+ fname += "/";
+
+ for (int loop = 0; loop < 2; loop++) {
+ fastSearch[loop] = 0;
+ SWBuf fastidxname =(fname + ((loop)?"ntwords.dat":"otwords.dat"));
+ if (FileMgr::existsFile(fastidxname.c_str())) {
+ fastidxname = (fname + ((loop)?"ntwords.idx":"otwords.idx"));
+ if (FileMgr::existsFile(fastidxname.c_str()))
+ fastSearch[loop] = new RawStr4((fname + ((loop)?"ntwords":"otwords")).c_str());
+ }
+ }
+#endif
+}
+
+
+/******************************************************************************
+ * RawText4 Destructor - Cleans up instance of RawText4
+ */
+
+RawText4::~RawText4() {
+#ifndef USELUCENE
+ if (fastSearch[0])
+ delete fastSearch[0];
+
+ if (fastSearch[1])
+ delete fastSearch[1];
+#endif
+}
+
+
+bool RawText4::isWritable() {
+ return ((idxfp[0]->getFd() > 0) && ((idxfp[0]->mode & FileMgr::RDWR) == FileMgr::RDWR));
+}
+
+
+/******************************************************************************
+ * RawText4::getRawEntry - Returns the correct verse when char * cast
+ * is requested
+ *
+ * RET: string buffer with verse
+ */
+
+SWBuf &RawText4::getRawEntryBuf() {
+ long start = 0;
+ unsigned long size = 0;
+ VerseKey &key = getVerseKey();
+
+ findOffset(key.Testament(), key.Index(), &start, &size);
+ entrySize = size; // support getEntrySize call
+
+ entryBuf = "";
+ readText(key.Testament(), start, size, entryBuf);
+
+ rawFilter(entryBuf, 0); // hack, decipher
+ rawFilter(entryBuf, &key);
+
+// if (!isUnicode())
+ prepText(entryBuf);
+
+ return entryBuf;
+}
+
+
+signed char RawText4::createSearchFramework(void (*percent)(char, void *), void *percentUserData) {
+#ifndef USELUCENE
+ SWKey *savekey = 0;
+ SWKey *searchkey = 0;
+ SWKey textkey;
+ char *word = 0;
+ char *wordBuf = 0;
+
+ // dictionary holds words associated with a list
+ // containing every module position that contains
+ // the word. [0] Old Testament; [1] NT
+ map < SWBuf, list<long> > dictionary[2];
+
+
+ // save key information so as not to disrupt original
+ // module position
+ if (!key->Persist()) {
+ savekey = CreateKey();
+ *savekey = *key;
+ }
+ else savekey = key;
+
+ searchkey = (key->Persist())?key->clone():0;
+ if (searchkey) {
+ searchkey->Persist(1);
+ setKey(*searchkey);
+ }
+
+ // position module at the beginning
+ *this = TOP;
+
+ VerseKey *lkey = (VerseKey *)key;
+
+ // iterate thru each entry in module
+ while (!Error()) {
+ long index = lkey->Index();
+ wordBuf = (char *)calloc(sizeof(char), strlen(StripText()) + 1);
+ strcpy(wordBuf, StripText());
+
+ // grab each word from the text
+ word = strtok(wordBuf, " !.,?;:()-=+/\\|{}[]\"<>");
+ while (word) {
+
+ // make word upper case
+ toupperstr(word);
+
+ // lookup word in dictionary (or make entry in dictionary
+ // for this word) and add this module position (index) to
+ // the word's associated list of module positions
+ dictionary[lkey->Testament()-1][word].push_back(index);
+ word = strtok(NULL, " !.,?;:()-=+/\\|{}[]\"<>");
+ }
+ free(wordBuf);
+ (*this)++;
+ }
+
+ // reposition module back to where it was before we were called
+ setKey(*savekey);
+
+ if (!savekey->Persist())
+ delete savekey;
+
+ if (searchkey)
+ delete searchkey;
+
+
+ // --------- Let's output an index from our dictionary -----------
+ FileDesc *datfd;
+ FileDesc *idxfd;
+ strlist::iterator it;
+ longlist::iterator it2;
+ unsigned long offset, entryoff;
+ unsigned long size;
+
+ SWBuf fname;
+ fname = path;
+ char ch = fname.c_str()[strlen(fname.c_str())-1];
+ if ((ch != '/') && (ch != '\\'))
+ fname += "/";
+
+ // for old and new testament do...
+ for (int loop = 0; loop < 2; loop++) {
+ datfd = FileMgr::getSystemFileMgr()->open((fname + ((loop)?"ntwords.dat":"otwords.dat")).c_str(), FileMgr::CREAT|FileMgr::WRONLY, 00644);
+ if (datfd->getFd() == -1)
+ return -1;
+ idxfd = FileMgr::getSystemFileMgr()->open((fname + ((loop)?"ntwords.idx":"otwords.idx")).c_str(), FileMgr::CREAT|FileMgr::WRONLY, 00644);
+ if (idxfd->getFd() == -1) {
+ FileMgr::getSystemFileMgr()->close(datfd);
+ return -1;
+ }
+
+ // iterate thru each word in the dictionary
+ for (it = dictionary[loop].begin(); it != dictionary[loop].end(); it++) {
+ printf("%s: ", it->first.c_str());
+
+ // get our current offset in our word.dat file and write this as the start
+ // of the next entry in our database
+ offset = datfd->seek(0, SEEK_CUR);
+ idxfd->write(&offset, 4);
+
+ // write our word out to the word.dat file, delineating with a \n
+ datfd->write(it->first.c_str(), strlen(it->first.c_str()));
+ datfd->write("\n", 1);
+
+ // force our mod position list for this word to be unique (remove
+ // duplicates that may exist if the word was found more than once
+ // in the verse
+ it->second.unique();
+
+ // iterate thru each mod position for this word and output it to
+ // our database
+ unsigned short count = 0;
+ for (it2 = it->second.begin(); it2 != it->second.end(); it2++) {
+ entryoff= *it2;
+ datfd->write(&entryoff, 4);
+ count++;
+ }
+
+ // now see what our new position is in our word.dat file and
+ // determine the size of this database entry
+ size = datfd->seek(0, SEEK_CUR) - offset;
+
+ // store the size of this database entry
+ idxfd->write(&size, 4);
+ printf("%d entries (size: %d)\n", count, size);
+ }
+ FileMgr::getSystemFileMgr()->close(datfd);
+ FileMgr::getSystemFileMgr()->close(idxfd);
+ }
+ return 0;
+#else
+ return SWModule::createSearchFramework(percent, percentUserData);
+#endif
+}
+
+
+void RawText4::deleteSearchFramework() {
+#ifndef USELUCENE
+ SWBuf target = path;
+ char ch = target.c_str()[strlen(target.c_str())-1];
+ if ((ch != '/') && (ch != '\\'))
+ target += "/lucene";
+ FileMgr::removeFile(target + "ntwords.dat");
+ FileMgr::removeFile(target + "otwords.dat");
+ FileMgr::removeFile(target + "ntwords.idx");
+ FileMgr::removeFile(target + "otwords.idx");
+#else
+ SWModule::deleteSearchFramework();
+#endif
+}
+
+
+/******************************************************************************
+ * SWModule::search - Searches a module for a string
+ *
+ * ENT: istr - string for which to search
+ * searchType - type of search to perform
+ * >=0 - regex
+ * -1 - phrase
+ * -2 - multiword
+ * flags - options flags for search
+ * justCheckIfSupported - if set, don't search, only tell if this
+ * function supports requested search.
+ *
+ * RET: ListKey set to verses that contain istr
+ */
+
+ListKey &RawText4::search(const char *istr, int searchType, int flags, SWKey *scope, bool *justCheckIfSupported, void (*percent)(char, void *), void *percentUserData) {
+#ifndef USELUCENE
+ listKey.ClearList();
+
+ if ((fastSearch[0]) && (fastSearch[1])) {
+
+ switch (searchType) {
+ case -2: {
+
+ if ((flags & REG_ICASE) != REG_ICASE) // if haven't chosen to
+ // ignore case
+ break; // can't handle fast case sensitive searches
+
+ // test to see if our scope for this search is bounded by a
+ // VerseKey
+ VerseKey *testKeyType = 0;
+ SWTRY {
+ testKeyType = SWDYNAMIC_CAST(VerseKey, ((scope)?scope:key));
+ }
+ SWCATCH ( ... ) {}
+ // if we don't have a VerseKey * decendant we can't handle
+ // because of scope.
+ // In the future, add bool SWKey::isValid(const char *tryString);
+ if (!testKeyType)
+ break;
+
+
+ // check if we just want to see if search is supported.
+ // If we've gotten this far, then it is supported.
+ if (justCheckIfSupported) {
+ *justCheckIfSupported = true;
+ return listKey;
+ }
+
+ SWKey saveKey = *testKeyType; // save current place
+
+ char error = 0;
+ char **words = 0;
+ char *wordBuf = 0;
+ int wordCount = 0;
+ long start;
+ unsigned long size;
+ char *idxbuf = 0;
+ SWBuf datBuf;
+ list <long> indexes;
+ list <long> indexes2;
+ VerseKey vk;
+ vk = TOP;
+
+ (*percent)(10, percentUserData);
+
+ // toupper our copy of search string
+ stdstr(&wordBuf, istr);
+ toupperstr(wordBuf);
+
+ // get list of individual words
+ words = (char **)calloc(sizeof(char *), 10);
+ int allocWords = 10;
+ words[wordCount] = strtok(wordBuf, " ");
+ while (words[wordCount]) {
+ wordCount++;
+ if (wordCount == allocWords) {
+ allocWords+=10;
+ words = (char **)realloc(words, sizeof(char *)*allocWords);
+ }
+ words[wordCount] = strtok(NULL, " ");
+ }
+
+ (*percent)(20, percentUserData);
+
+ // clear our result set
+ indexes.erase(indexes.begin(), indexes.end());
+
+ // search both old and new testament indexes
+ for (int j = 0; j < 2; j++) {
+ // iterate thru each word the user passed to us.
+ for (int i = 0; i < wordCount; i++) {
+
+ // clear this word's result set
+ indexes2.erase(indexes2.begin(), indexes2.end());
+ error = 0;
+
+ // iterate thru every word in the database that starts
+ // with our search word
+ for (int away = 0; !error; away++) {
+ idxbuf = 0;
+
+ // find our word in the database and jump ahead _away_
+ error = fastSearch[j]->findOffset(words[i], &start, &size, away);
+
+ // get the word from the database
+ fastSearch[j]->getIDXBufDat(start, &idxbuf);
+
+ // check to see if it starts with our target word
+ if (strlen(idxbuf) > strlen(words[i]))
+ idxbuf[strlen(words[i])] = 0;
+// else words[i][strlen(idxbuf)] = 0;
+ if (!strcmp(idxbuf, words[i])) {
+
+ // get data for this word from database
+ delete [] idxbuf;
+ idxbuf = 0;
+ datBuf = "";
+ fastSearch[j]->readText(start, &size, &idxbuf, datBuf);
+
+ // we know that the data consists of sizof(long)
+ // records each a valid module position that constains
+ // this word
+ //
+ // iterate thru each of these module positions
+ long *keyindex = (long *)datBuf.getRawData();
+ while (keyindex < (long *)(datBuf.getRawData() + size - (strlen(idxbuf) + 1))) {
+ if (i) { // if we're not on our first word
+
+ // check to see if this word is already in the result set.
+ // This is our AND functionality
+ if (find(indexes.begin(), indexes.end(), *keyindex) != indexes.end())
+ // add to new result set
+ indexes2.push_back(*keyindex);
+ }
+ else indexes2.push_back(*keyindex);
+ keyindex++;
+ }
+ }
+ else error = 1; // no more matches
+ free(idxbuf);
+ }
+
+ // make new result set final result set
+ indexes = indexes2;
+
+ percent((char)(20 + (float)((j*wordCount)+i)/(wordCount * 2) * 78), percentUserData);
+ }
+
+ // indexes contains our good verses, lets return them in a listKey
+ indexes.sort();
+
+ // iterate thru each good module position that meets the search
+ for (longlist::iterator it = indexes.begin(); it != indexes.end(); it++) {
+
+ // set a temporary verse key to this module position
+ vk.Testament(j+1);
+ vk.Error();
+ vk.Index(*it);
+
+ // check scope
+ // Try to set our scope key to this verse key
+ if (scope) {
+ *testKeyType = vk;
+
+ // check to see if it set ok and if so, add to our return list
+ if (*testKeyType == vk)
+ listKey << (const char *) vk;
+ }
+ else listKey << (const char*) vk;
+ }
+ }
+ (*percent)(98, percentUserData);
+
+ free(words);
+ free(wordBuf);
+
+ *testKeyType = saveKey; // set current place back to original
+
+ listKey = TOP;
+ (*percent)(100, percentUserData);
+ return listKey;
+ }
+
+ default:
+ break;
+ }
+ }
+
+ // check if we just want to see if search is supported
+ if (justCheckIfSupported) {
+ *justCheckIfSupported = false;
+ return listKey;
+ }
+
+#endif
+ // if we don't support this search, fall back to base class
+ return SWModule::search(istr, searchType, flags, scope, justCheckIfSupported, percent, percentUserData);
+}
+
+
+void RawText4::setEntry(const char *inbuf, long len) {
+ VerseKey &key = getVerseKey();
+ doSetText(key.Testament(), key.Index(), inbuf, len);
+}
+
+
+void RawText4::linkEntry(const SWKey *inkey) {
+ VerseKey &destkey = getVerseKey();
+ const VerseKey *srckey = 0;
+
+ // see if we have a VerseKey * or decendant
+ SWTRY {
+ srckey = SWDYNAMIC_CAST(VerseKey, inkey);
+ }
+ SWCATCH ( ... ) {}
+ // if we don't have a VerseKey * decendant, create our own
+ if (!srckey)
+ srckey = new VerseKey(inkey);
+
+ doLinkEntry(destkey.Testament(), destkey.Index(), srckey->Index());
+
+ if (inkey != srckey) // free our key if we created a VerseKey
+ delete srckey;
+}
+
+
+/******************************************************************************
+ * RawText4::deleteEntry - deletes this entry
+ *
+ * RET: *this
+ */
+
+void RawText4::deleteEntry() {
+ VerseKey &key = getVerseKey();
+ doSetText(key.Testament(), key.Index(), "");
+}
+
+/******************************************************************************
+ * RawText4::increment - Increments module key a number of entries
+ *
+ * ENT: increment - Number of entries to jump forward
+ *
+ * RET: *this
+ */
+
+void RawText4::increment(int steps) {
+ long start;
+ unsigned long size;
+ VerseKey *tmpkey = &getVerseKey();
+
+ findOffset(tmpkey->Testament(), tmpkey->Index(), &start, &size);
+
+ SWKey lastgood = *tmpkey;
+ while (steps) {
+ long laststart = start;
+ unsigned long lastsize = size;
+ SWKey lasttry = *tmpkey;
+ (steps > 0) ? (*key)++ : (*key)--;
+ tmpkey = &getVerseKey();
+
+ if ((error = key->Error())) {
+ *key = lastgood;
+ break;
+ }
+ long index = tmpkey->Index();
+ findOffset(tmpkey->Testament(), index, &start, &size);
+ if (
+ (((laststart != start) || (lastsize != size)) // we're a different entry
+// && (start > 0)
+ && (size)) // and we actually have a size
+ ||(!skipConsecutiveLinks)) { // or we don't want to skip consecutive links
+ steps += (steps < 0) ? 1 : -1;
+ lastgood = *tmpkey;
+ }
+ }
+ error = (error) ? KEYERR_OUTOFBOUNDS : 0;
+}
+
+SWORD_NAMESPACE_END
diff --git a/src/modules/texts/swtext.cpp b/src/modules/texts/swtext.cpp
new file mode 100644
index 0000000..d0ff386
--- /dev/null
+++ b/src/modules/texts/swtext.cpp
@@ -0,0 +1,113 @@
+/******************************************************************************
+ * swtext.cpp - code for base class 'SWText'- The basis for all text modules
+ */
+
+#include <swtext.h>
+#include <listkey.h>
+#include <localemgr.h>
+#include <versekey.h>
+
+SWORD_NAMESPACE_START
+
+/******************************************************************************
+ * SWText Constructor - Initializes data for instance of SWText
+ *
+ * ENT: imodname - Internal name for module
+ * imoddesc - Name to display to user for module
+ * idisp - Display object to use for displaying
+ */
+
+SWText::SWText(const char *imodname, const char *imoddesc, SWDisplay *idisp, SWTextEncoding enc, SWTextDirection dir, SWTextMarkup mark, const char* ilang): SWModule(imodname, imoddesc, idisp, (char *)"Biblical Texts", enc, dir, mark, ilang) {
+ tmpVK = new VerseKey();
+ delete key;
+ key = CreateKey();
+ skipConsecutiveLinks = false;
+}
+
+
+/******************************************************************************
+ * SWText Destructor - Cleans up instance of SWText
+ */
+
+SWText::~SWText() {
+ delete tmpVK;
+}
+
+
+/******************************************************************************
+ * SWText CreateKey - Create the correct key (VerseKey) for use with SWText
+ */
+
+SWKey *SWText::CreateKey() {
+ return new VerseKey();
+}
+
+
+long SWText::Index() const {
+ VerseKey *key = 0;
+ SWTRY {
+ key = SWDYNAMIC_CAST(VerseKey, this->key);
+ }
+ SWCATCH ( ... ) {}
+ if (!key)
+ key = new VerseKey(this->key);
+
+ entryIndex = key->NewIndex();
+
+ if (key != this->key)
+ delete key;
+
+ return entryIndex;
+}
+
+long SWText::Index(long iindex) {
+ VerseKey *key = 0;
+ SWTRY {
+ key = SWDYNAMIC_CAST(VerseKey, this->key);
+ }
+ SWCATCH ( ... ) {}
+ if (!key)
+ key = new VerseKey(this->key);
+
+ key->Testament(1);
+ key->Index(iindex);
+
+ if (key != this->key) {
+ this->key->copyFrom(*key);
+ delete key;
+ }
+
+ return Index();
+}
+
+
+VerseKey &SWText::getVerseKey() const {
+ VerseKey *key = NULL;
+ // see if we have a VerseKey * or decendant
+ SWTRY {
+ key = SWDYNAMIC_CAST(VerseKey, this->key);
+ }
+ SWCATCH ( ... ) { }
+ if (!key) {
+ ListKey *lkTest = 0;
+ SWTRY {
+ lkTest = SWDYNAMIC_CAST(ListKey, this->key);
+ }
+ SWCATCH ( ... ) { }
+ if (lkTest) {
+ SWTRY {
+ key = SWDYNAMIC_CAST(VerseKey, lkTest->GetElement());
+ }
+ SWCATCH ( ... ) { }
+ }
+ }
+ if (!key) {
+ tmpVK->setLocale(LocaleMgr::getSystemLocaleMgr()->getDefaultLocaleName());
+ (*tmpVK) = *(this->key);
+ return (*tmpVK);
+ }
+ else return *key;
+}
+
+
+SWORD_NAMESPACE_END
diff --git a/src/modules/texts/ztext/Makefile b/src/modules/texts/ztext/Makefile
new file mode 100644
index 0000000..35d6648
--- /dev/null
+++ b/src/modules/texts/ztext/Makefile
@@ -0,0 +1,5 @@
+
+root := ../../../..
+
+all:
+ make -C ${root}
diff --git a/src/modules/texts/ztext/Makefile.am b/src/modules/texts/ztext/Makefile.am
new file mode 100644
index 0000000..817107c
--- /dev/null
+++ b/src/modules/texts/ztext/Makefile.am
@@ -0,0 +1,3 @@
+ztextdir = $(top_srcdir)/src/modules/texts/ztext
+
+libsword_la_SOURCES += $(ztextdir)/ztext.cpp
diff --git a/src/modules/texts/ztext/ztext.cpp b/src/modules/texts/ztext/ztext.cpp
new file mode 100644
index 0000000..b461d49
--- /dev/null
+++ b/src/modules/texts/ztext/ztext.cpp
@@ -0,0 +1,195 @@
+/******************************************************************************
+ * ztext.cpp - code for class 'zText'- a module that reads compressed text
+ * files: ot and nt using indexs ??.vss
+ */
+
+
+#include <ctype.h>
+#include <stdio.h>
+#include <fcntl.h>
+#include <sysdata.h>
+#include <versekey.h>
+#include <filemgr.h>
+
+#include <ztext.h>
+
+#include <regex.h> // GNU
+
+
+SWORD_NAMESPACE_START
+
+/******************************************************************************
+ * zText Constructor - Initializes data for instance of zText
+ *
+ * ENT: ipath - path to data files
+ * iname - Internal name for module
+ * idesc - Name to display to user for module
+ * iblockType - verse, chapter, book, etc. of index chunks
+ * icomp - Compressor object
+ * idisp - Display object to use for displaying
+ */
+
+zText::zText(const char *ipath, const char *iname, const char *idesc, int iblockType, SWCompress *icomp, SWDisplay *idisp, SWTextEncoding enc, SWTextDirection dir, SWTextMarkup mark, const char* ilang)
+ : zVerse(ipath, FileMgr::RDWR, iblockType, icomp), SWText(iname, idesc, idisp, enc, dir, mark, ilang) {
+ blockType = iblockType;
+ lastWriteKey = 0;
+}
+
+
+/******************************************************************************
+ * zText Destructor - Cleans up instance of zText
+ */
+
+zText::~zText()
+{
+ flushCache();
+
+ if (lastWriteKey)
+ delete lastWriteKey;
+
+}
+
+
+bool zText::isWritable() { return ((idxfp[0]->getFd() > 0) && ((idxfp[0]->mode & FileMgr::RDWR) == FileMgr::RDWR)); }
+
+
+/******************************************************************************
+ * zText::getRawEntry - Returns the current verse buffer
+ *
+ * RET: buffer with verse
+ */
+
+SWBuf &zText::getRawEntryBuf() {
+ long start = 0;
+ unsigned short size = 0;
+ VerseKey &key = getVerseKey();
+
+ findOffset(key.Testament(), key.Index(), &start, &size);
+ entrySize = size; // support getEntrySize call
+
+ entryBuf = "";
+ zReadText(key.Testament(), start, size, entryBuf);
+
+ rawFilter(entryBuf, &key);
+
+// if (!isUnicode())
+ prepText(entryBuf);
+
+ return entryBuf;
+}
+
+
+bool zText::sameBlock(VerseKey *k1, VerseKey *k2) {
+ if (k1->Testament() != k2->Testament())
+ return false;
+
+ switch (blockType) {
+ case VERSEBLOCKS:
+ if (k1->Verse() != k2->Verse())
+ return false;
+ case CHAPTERBLOCKS:
+ if (k1->Chapter() != k2->Chapter())
+ return false;
+ case BOOKBLOCKS:
+ if (k1->Book() != k2->Book())
+ return false;
+ }
+ return true;
+}
+
+
+void zText::setEntry(const char *inbuf, long len) {
+ VerseKey &key = getVerseKey();
+
+ // see if we've jumped across blocks since last write
+ if (lastWriteKey) {
+ if (!sameBlock(lastWriteKey, &key)) {
+ flushCache();
+ }
+ delete lastWriteKey;
+ }
+
+ doSetText(key.Testament(), key.Index(), inbuf, len);
+
+ lastWriteKey = (VerseKey *)key.clone(); // must delete
+}
+
+
+void zText::linkEntry(const SWKey *inkey) {
+ VerseKey &destkey = getVerseKey();
+ const VerseKey *srckey = 0;
+
+ // see if we have a VerseKey * or decendant
+ SWTRY {
+ srckey = (const VerseKey *) SWDYNAMIC_CAST(VerseKey, inkey);
+ }
+ SWCATCH ( ... ) {
+ }
+ // if we don't have a VerseKey * decendant, create our own
+ if (!srckey)
+ srckey = new VerseKey(inkey);
+
+ doLinkEntry(destkey.Testament(), destkey.Index(), srckey->Index());
+
+ if (inkey != srckey) // free our key if we created a VerseKey
+ delete srckey;
+}
+
+
+/******************************************************************************
+ * zFiles::deleteEntry - deletes this entry
+ *
+ */
+
+void zText::deleteEntry() {
+
+ VerseKey &key = getVerseKey();
+
+ doSetText(key.Testament(), key.Index(), "");
+}
+
+
+/******************************************************************************
+ * zText::increment - Increments module key a number of entries
+ *
+ * ENT: increment - Number of entries to jump forward
+ *
+ */
+
+void zText::increment(int steps) {
+ long start;
+ unsigned short size;
+ VerseKey *tmpkey = &getVerseKey();
+
+ findOffset(tmpkey->Testament(), tmpkey->Index(), &start, &size);
+
+ SWKey lastgood = *tmpkey;
+ while (steps) {
+ long laststart = start;
+ unsigned short lastsize = size;
+ SWKey lasttry = *tmpkey;
+ (steps > 0) ? (*key)++ : (*key)--;
+ tmpkey = &getVerseKey();
+
+ if ((error = key->Error())) {
+ *key = lastgood;
+ break;
+ }
+ long index = tmpkey->Index();
+ findOffset(tmpkey->Testament(), index, &start, &size);
+
+ if (
+ (((laststart != start) || (lastsize != size)) // we're a different entry
+// && (start > 0)
+ && (size)) // and we actually have a size
+ ||(!skipConsecutiveLinks)) { // or we don't want to skip consecutive links
+ steps += (steps < 0) ? 1 : -1;
+ lastgood = *tmpkey;
+ }
+ }
+ error = (error) ? KEYERR_OUTOFBOUNDS : 0;
+}
+
+
+
+SWORD_NAMESPACE_END