summaryrefslogtreecommitdiff
path: root/src/modules/common
diff options
context:
space:
mode:
authorDimitri John Ledkov <xnox@ubuntu.com>2014-05-11 22:09:52 +0100
committerDimitri John Ledkov <xnox@ubuntu.com>2014-05-11 22:09:52 +0100
commit3525014850e3800ac7b28fd34e7f7af427f1c620 (patch)
tree3d1b8a17b86cfa9af178ceb818a4dc9daf52a46b /src/modules/common
sword (1.7.2+dfsg-2) unstable; urgency=medium
* Correct shared library symlink. (Closes: #747420) # imported from the archive
Diffstat (limited to 'src/modules/common')
-rw-r--r--src/modules/common/Makefile4
-rw-r--r--src/modules/common/Makefile.am23
-rw-r--r--src/modules/common/bz2comprs.cpp181
-rw-r--r--src/modules/common/entriesblk.cpp194
-rw-r--r--src/modules/common/lzsscomprs.cpp732
-rw-r--r--src/modules/common/lzsscomprs.txt802
-rw-r--r--src/modules/common/rawstr.cpp529
-rw-r--r--src/modules/common/rawstr4.cpp538
-rw-r--r--src/modules/common/rawverse.cpp311
-rw-r--r--src/modules/common/rawverse4.cpp312
-rw-r--r--src/modules/common/sapphire.cpp236
-rw-r--r--src/modules/common/swcipher.cpp147
-rw-r--r--src/modules/common/swcomprs.cpp211
-rw-r--r--src/modules/common/xzcomprs.cpp181
-rw-r--r--src/modules/common/zipcomprs.cpp183
-rw-r--r--src/modules/common/zstr.cpp700
-rw-r--r--src/modules/common/zverse.cpp507
17 files changed, 5791 insertions, 0 deletions
diff --git a/src/modules/common/Makefile b/src/modules/common/Makefile
new file mode 100644
index 0000000..81f7721
--- /dev/null
+++ b/src/modules/common/Makefile
@@ -0,0 +1,4 @@
+root := ../../..
+
+all:
+ make -C ${root}
diff --git a/src/modules/common/Makefile.am b/src/modules/common/Makefile.am
new file mode 100644
index 0000000..90a3f98
--- /dev/null
+++ b/src/modules/common/Makefile.am
@@ -0,0 +1,23 @@
+commondir = $(top_srcdir)/src/modules/common
+
+libsword_la_SOURCES += $(commondir)/rawstr.cpp
+libsword_la_SOURCES += $(commondir)/rawstr4.cpp
+libsword_la_SOURCES += $(commondir)/swcomprs.cpp
+libsword_la_SOURCES += $(commondir)/lzsscomprs.cpp
+
+if HAVE_LIBZ
+SWZLIB = $(commondir)/zipcomprs.cpp
+SWZLIB += $(commondir)/bz2comprs.cpp
+SWZLIB += $(commondir)/xzcomprs.cpp
+else
+SWZLIB =
+endif
+libsword_la_SOURCES += $(SWZLIB)
+libsword_la_SOURCES += $(commondir)/rawverse.cpp
+libsword_la_SOURCES += $(commondir)/rawverse4.cpp
+libsword_la_SOURCES += $(commondir)/swcipher.cpp
+libsword_la_SOURCES += $(commondir)/zverse.cpp
+libsword_la_SOURCES += $(commondir)/zstr.cpp
+libsword_la_SOURCES += $(commondir)/entriesblk.cpp
+libsword_la_SOURCES += $(commondir)/sapphire.cpp
+
diff --git a/src/modules/common/bz2comprs.cpp b/src/modules/common/bz2comprs.cpp
new file mode 100644
index 0000000..16f6d11
--- /dev/null
+++ b/src/modules/common/bz2comprs.cpp
@@ -0,0 +1,181 @@
+/******************************************************************************
+ *
+ * bz2comprs.cpp - Bzip2Compress, a driver class that provides bzip2
+ * compression (Burrows–Wheeler with Huffman coding)
+ *
+ * $Id: bz2comprs.cpp 2858 2013-07-08 03:08:10Z chrislit $
+ *
+ * Copyright 2013 CrossWire Bible Society (http://www.crosswire.org)
+ * CrossWire Bible Society
+ * P. O. Box 2528
+ * Tempe, AZ 85280-2528
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation version 2.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ */
+
+
+#include <stdlib.h>
+#include <string.h>
+#include <stdio.h>
+#include <bz2comprs.h>
+#include <zlib.h>
+
+SWORD_NAMESPACE_START
+
+/******************************************************************************
+ * Bzip2Compress Constructor - Initializes data for instance of Bzip2Compress
+ *
+ */
+
+Bzip2Compress::Bzip2Compress() : SWCompress() {
+}
+
+
+/******************************************************************************
+ * Bzip2Compress Destructor - Cleans up instance of Bzip2Compress
+ */
+
+Bzip2Compress::~Bzip2Compress() {
+}
+
+
+/******************************************************************************
+ * Bzip2Compress::Encode - This function "encodes" the input stream into the
+ * output stream.
+ * The GetChars() and SendChars() functions are
+ * used to separate this method from the actual
+ * i/o.
+ * NOTE: must set zlen for parent class to know length of
+ * compressed buffer.
+ */
+
+void Bzip2Compress::Encode(void)
+{
+/*
+ZEXTERN int ZEXPORT compress OF((Bytef *dest, uLongf *destLen,
+ const Bytef *source, uLong sourceLen));
+ Compresses the source buffer into the destination buffer. sourceLen is
+ the byte length of the source buffer. Upon entry, destLen is the total
+ size of the destination buffer, which must be at least 0.1% larger than
+ sourceLen plus 12 bytes. Upon exit, destLen is the actual size of the
+ compressed buffer.
+ This function can be used to compress a whole file at once if the
+ input file is mmap'ed.
+ compress returns Z_OK if success, Z_MEM_ERROR if there was not
+ enough memory, Z_BUF_ERROR if there was not enough room in the output
+ buffer.
+*/
+ direct = 0; // set direction needed by parent [Get|Send]Chars()
+
+ // get buffer
+ char chunk[1024];
+ char *buf = (char *)calloc(1, 1024);
+ char *chunkbuf = buf;
+ unsigned long chunklen;
+ unsigned long len = 0;
+ while((chunklen = GetChars(chunk, 1023))) {
+ memcpy(chunkbuf, chunk, chunklen);
+ len += chunklen;
+ if (chunklen < 1023)
+ break;
+ else buf = (char *)realloc(buf, len + 1024);
+ chunkbuf = buf+len;
+ }
+
+
+ zlen = (long) (len*1.001)+15;
+ char *zbuf = new char[zlen+1];
+ if (len)
+ {
+ //printf("Doing compress\n");
+ if (compress((Bytef*)zbuf, &zlen, (const Bytef*)buf, len) != Z_OK)
+ {
+ printf("ERROR in compression\n");
+ }
+ else {
+ SendChars(zbuf, zlen);
+ }
+ }
+ else
+ {
+ fprintf(stderr, "ERROR: no buffer to compress\n");
+ }
+ delete [] zbuf;
+ free (buf);
+}
+
+
+/******************************************************************************
+ * Bzip2Compress::Decode - This function "decodes" the input stream into the
+ * output stream.
+ * The GetChars() and SendChars() functions are
+ * used to separate this method from the actual
+ * i/o.
+ */
+
+void Bzip2Compress::Decode(void)
+{
+/*
+ZEXTERN int ZEXPORT uncompress OF((Bytef *dest, uLongf *destLen,
+ const Bytef *source, uLong sourceLen));
+ Decompresses the source buffer into the destination buffer. sourceLen is
+ the byte length of the source buffer. Upon entry, destLen is the total
+ size of the destination buffer, which must be large enough to hold the
+ entire uncompressed data. (The size of the uncompressed data must have
+ been saved previously by the compressor and transmitted to the decompressor
+ by some mechanism outside the scope of this compression library.)
+ Upon exit, destLen is the actual size of the compressed buffer.
+ This function can be used to decompress a whole file at once if the
+ input file is mmap'ed.
+
+ uncompress returns Z_OK if success, Z_MEM_ERROR if there was not
+ enough memory, Z_BUF_ERROR if there was not enough room in the output
+ buffer, or Z_DATA_ERROR if the input data was corrupted.
+*/
+
+ // get buffer
+ char chunk[1024];
+ char *zbuf = (char *)calloc(1, 1024);
+ char *chunkbuf = zbuf;
+ int chunklen;
+ unsigned long zlen = 0;
+ while((chunklen = GetChars(chunk, 1023))) {
+ memcpy(chunkbuf, chunk, chunklen);
+ zlen += chunklen;
+ if (chunklen < 1023)
+ break;
+ else zbuf = (char *)realloc(zbuf, zlen + 1024);
+ chunkbuf = zbuf + zlen;
+ }
+
+ //printf("Decoding complength{%ld} uncomp{%ld}\n", zlen, blen);
+ if (zlen) {
+ unsigned long blen = zlen*20; // trust compression is less than 1000%
+ char *buf = new char[blen];
+ //printf("Doing decompress {%s}\n", zbuf);
+ slen = 0;
+ switch (uncompress((Bytef*)buf, &blen, (Bytef*)zbuf, zlen)){
+ case Z_OK: SendChars(buf, blen); slen = blen; break;
+ case Z_MEM_ERROR: fprintf(stderr, "ERROR: not enough memory during decompression.\n"); break;
+ case Z_BUF_ERROR: fprintf(stderr, "ERROR: not enough room in the out buffer during decompression.\n"); break;
+ case Z_DATA_ERROR: fprintf(stderr, "ERROR: corrupt data during decompression.\n"); break;
+ default: fprintf(stderr, "ERROR: an unknown error occured during decompression.\n"); break;
+ }
+ delete [] buf;
+ }
+ else {
+ fprintf(stderr, "ERROR: no buffer to decompress!\n");
+ }
+ //printf("Finished decoding\n");
+ free (zbuf);
+}
+
+SWORD_NAMESPACE_END
diff --git a/src/modules/common/entriesblk.cpp b/src/modules/common/entriesblk.cpp
new file mode 100644
index 0000000..4872d28
--- /dev/null
+++ b/src/modules/common/entriesblk.cpp
@@ -0,0 +1,194 @@
+/******************************************************************************
+ *
+ * entriesblk.cpp - EntriesBlock facilitates compressed lex/dict modules
+ *
+ * $Id: entriesblk.cpp 2833 2013-06-29 06:40:28Z chrislit $
+ *
+ * Copyright 2001-2013 CrossWire Bible Society (http://www.crosswire.org)
+ * CrossWire Bible Society
+ * P. O. Box 2528
+ * Tempe, AZ 85280-2528
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation version 2.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ */
+
+#include <entriesblk.h>
+#include <stdlib.h>
+#include <string.h>
+
+SWORD_NAMESPACE_START
+
+const int EntriesBlock::METAHEADERSIZE = 4;
+ // count(4);
+const int EntriesBlock::METAENTRYSIZE = 8;
+ // offset(4); size(4);
+
+EntriesBlock::EntriesBlock(const char *iBlock, unsigned long size) {
+ if (size) {
+ block = (char *)calloc(1, size);
+ memcpy(block, iBlock, size);
+ }
+ else {
+ block = (char *)calloc(1, sizeof(__u32));
+ }
+}
+
+
+EntriesBlock::EntriesBlock() {
+ block = (char *)calloc(1, sizeof(__u32));
+}
+
+
+EntriesBlock::~EntriesBlock() {
+ free(block);
+}
+
+
+void EntriesBlock::setCount(int count) {
+ __u32 rawCount = archtosword32(count);
+ memcpy(block, &rawCount, sizeof(__u32));
+}
+
+
+int EntriesBlock::getCount() {
+ __u32 count = 0;
+ memcpy(&count, block, sizeof(__u32));
+ count = swordtoarch32(count);
+ return count;
+}
+
+
+void EntriesBlock::getMetaEntry(int index, unsigned long *offset, unsigned long *size) {
+ __u32 rawOffset = 0;
+ __u32 rawSize = 0;
+ *offset = 0;
+ *size = 0;
+ if (index >= getCount()) // assert index < count
+ return;
+
+ // first 4 bytes is count, each 6 bytes after is each meta entry
+ memcpy(&rawOffset, block + METAHEADERSIZE + (index * METAENTRYSIZE), sizeof(rawOffset));
+ memcpy(&rawSize, block + METAHEADERSIZE + (index * METAENTRYSIZE) + sizeof(rawOffset), sizeof(rawSize));
+
+ *offset = (unsigned long)swordtoarch32(rawOffset);
+ *size = (unsigned long)swordtoarch32(rawSize);
+}
+
+
+void EntriesBlock::setMetaEntry(int index, unsigned long offset, unsigned long size) {
+ __u32 rawOffset = archtosword32(offset);
+ __u32 rawSize = archtosword32(size);
+
+ if (index >= getCount()) // assert index < count
+ return;
+
+ // first 4 bytes is count, each 6 bytes after is each meta entry
+ memcpy(block + METAHEADERSIZE + (index * METAENTRYSIZE), &rawOffset, sizeof(rawOffset));
+ memcpy(block + METAHEADERSIZE + (index * METAENTRYSIZE) + sizeof(rawOffset), &rawSize, sizeof(rawSize));
+}
+
+
+const char *EntriesBlock::getRawData(unsigned long *retSize) {
+ unsigned long max = 4;
+ int loop;
+ unsigned long offset;
+ unsigned long size;
+ for (loop = 0; loop < getCount(); loop++) {
+ getMetaEntry(loop, &offset, &size);
+ max = ((offset + size) > max) ? (offset + size) : max;
+ }
+ *retSize = max;
+ return block;
+}
+
+
+int EntriesBlock::addEntry(const char *entry) {
+ unsigned long dataSize;
+ getRawData(&dataSize);
+ unsigned long len = strlen(entry);
+ unsigned long offset;
+ unsigned long size;
+ int count = getCount();
+ unsigned long dataStart = METAHEADERSIZE + (count * METAENTRYSIZE);
+ // new meta entry + new data size + 1 because null
+ block = (char *)realloc(block, dataSize + METAENTRYSIZE + len + 1);
+ // shift right to make room for new meta entry
+ memmove(block + dataStart + METAENTRYSIZE, block + dataStart, dataSize - dataStart);
+
+ for (int loop = 0; loop < count; loop++) {
+ getMetaEntry(loop, &offset, &size);
+ if (offset) { // if not a deleted entry
+ offset += METAENTRYSIZE;
+ setMetaEntry(loop, offset, size);
+ }
+ }
+
+ offset = dataSize; // original dataSize before realloc
+ size = len + 1;
+ // add our text to the end
+ memcpy(block + offset + METAENTRYSIZE, entry, size);
+ // increment count
+ setCount(count + 1);
+ // add our meta entry
+ setMetaEntry(count, offset + METAENTRYSIZE, size);
+ // return index of our new entry
+ return count;
+}
+
+
+const char *EntriesBlock::getEntry(int entryIndex) {
+ unsigned long offset;
+ unsigned long size;
+ static const char *empty = "";
+
+ getMetaEntry(entryIndex, &offset, &size);
+ return (offset) ? block+offset : empty;
+}
+
+
+unsigned long EntriesBlock::getEntrySize(int entryIndex) {
+ unsigned long offset;
+ unsigned long size;
+
+ getMetaEntry(entryIndex, &offset, &size);
+ return (offset) ? size : 0;
+}
+
+
+void EntriesBlock::removeEntry(int entryIndex) {
+ unsigned long offset;
+ unsigned long size, size2;
+ unsigned long dataSize;
+ getRawData(&dataSize);
+ getMetaEntry(entryIndex, &offset, &size);
+ int count = getCount();
+
+ if (!offset) // already deleted
+ return;
+
+ // shift left to retrieve space used for old entry
+ memmove(block + offset, block + offset + size, dataSize - (offset + size));
+
+ // fix offset for all entries after our entry that were shifted left
+ for (int loop = entryIndex + 1; loop < count; loop++) {
+ getMetaEntry(loop, &offset, &size2);
+ if (offset) { // if not a deleted entry
+ offset -= size;
+ setMetaEntry(loop, offset, size2);
+ }
+ }
+
+ // zero out our meta entry
+ setMetaEntry(entryIndex, 0L, 0);
+}
+
+
+SWORD_NAMESPACE_END
diff --git a/src/modules/common/lzsscomprs.cpp b/src/modules/common/lzsscomprs.cpp
new file mode 100644
index 0000000..ef1bc8c
--- /dev/null
+++ b/src/modules/common/lzsscomprs.cpp
@@ -0,0 +1,732 @@
+/******************************************************************************
+ *
+ * lzssomprs.cpp - LZSSCompress: a driver class that provides LZSS
+ * compression
+ *
+ * $Id: lzsscomprs.cpp 2935 2013-08-02 11:06:30Z scribe $
+ *
+ * Copyright 1996-2013 CrossWire Bible Society (http://www.crosswire.org)
+ * CrossWire Bible Society
+ * P. O. Box 2528
+ * Tempe, AZ 85280-2528
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation version 2.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ */
+
+#include <stdlib.h>
+#include <string.h>
+#include <lzsscomprs.h>
+
+// The following are constant sizes used by the compression algorithm.
+//
+// N - This is the size of the ring buffer. It is set
+// to 4K. It is important to note that a position
+// within the ring buffer requires 12 bits.
+//
+// F - This is the maximum length of a character sequence
+// that can be taken from the ring buffer. It is set
+// to 18. Note that a length must be 3 before it is
+// worthwhile to store a position/length pair, so the
+// length can be encoded in only 4 bits. Or, put yet
+// another way, it is not necessary to encode a length
+// of 0-18, it is necessary to encode a length of
+// 3-18, which requires 4 bits.
+//
+// THRESHOLD - It takes 2 bytes to store an offset and
+// a length. If a character sequence only
+// requires 1 or 2 characters to store
+// uncompressed, then it is better to store
+// it uncompressed than as an offset into
+// the ring buffer.
+//
+// Note that the 12 bits used to store the position and the 4 bits
+// used to store the length equal a total of 16 bits, or 2 bytes.
+
+#define N 4096
+#define F 18
+#define THRESHOLD 3
+#define NOT_USED N
+
+
+SWORD_NAMESPACE_START
+
+class LZSSCompress::Private {
+public:
+ static unsigned char m_ring_buffer[N + F - 1];
+ static short int m_match_position;
+ static short int m_match_length;
+ static short int m_lson[N + 1];
+ static short int m_rson[N + 257];
+ static short int m_dad[N + 1];
+ void InitTree();
+ void InsertNode(short int Pos);
+ void DeleteNode(short int Node);
+};
+
+/******************************************************************************
+ * LZSSCompress Statics
+ */
+
+// m_ring_buffer is a text buffer. It contains "nodes" of
+// uncompressed text that can be indexed by position. That is,
+// a substring of the ring buffer can be indexed by a position
+// and a length. When decoding, the compressed text may contain
+// a position in the ring buffer and a count of the number of
+// bytes from the ring buffer that are to be moved into the
+// uncompressed buffer.
+//
+// This ring buffer is not maintained as part of the compressed
+// text. Instead, it is reconstructed dynamically. That is,
+// it starts out empty and gets built as the text is decompressed.
+//
+// The ring buffer contain N bytes, with an additional F - 1 bytes
+// to facilitate string comparison.
+
+unsigned char LZSSCompress::Private::m_ring_buffer[N + F - 1];
+
+// m_match_position and m_match_length are set by InsertNode().
+//
+// These variables indicate the position in the ring buffer
+// and the number of characters at that position that match
+// a given string.
+
+short int LZSSCompress::Private::m_match_position;
+short int LZSSCompress::Private::m_match_length;
+
+// m_lson, m_rson, and m_dad are the Japanese way of referring to
+// a tree structure. The dad is the parent and it has a right and
+// left son (child).
+//
+// For i = 0 to N-1, m_rson[i] and m_lson[i] will be the right
+// and left children of node i.
+//
+// For i = 0 to N-1, m_dad[i] is the parent of node i.
+//
+// For i = 0 to 255, rson[N + i + 1] is the root of the tree for
+// strings that begin with the character i. Note that this requires
+// one byte characters.
+//
+// These nodes store values of 0...(N-1). Memory requirements
+// can be reduces by using 2-byte integers instead of full 4-byte
+// integers (for 32-bit applications). Therefore, these are
+// defined as "short ints."
+
+short int LZSSCompress::Private::m_lson[N + 1];
+short int LZSSCompress::Private::m_rson[N + 257];
+short int LZSSCompress::Private::m_dad[N + 1];
+
+
+/******************************************************************************
+ * LZSSCompress Constructor - Initializes data for instance of LZSSCompress
+ *
+ */
+
+LZSSCompress::LZSSCompress() : SWCompress() {
+ p = new Private();
+}
+
+
+/******************************************************************************
+ * LZSSCompress Destructor - Cleans up instance of LZSSCompress
+ */
+
+LZSSCompress::~LZSSCompress() {
+ delete p;
+}
+
+
+/******************************************************************************
+ * LZSSCompress::InitTree - This function initializes the tree nodes to
+ * "empty" states.
+ */
+
+void LZSSCompress::Private::InitTree(void) {
+ int i;
+
+ // For i = 0 to N - 1, m_rson[i] and m_lson[i] will be the right
+ // and left children of node i. These nodes need not be
+ // initialized. However, for debugging purposes, it is nice to
+ // have them initialized. Since this is only used for compression
+ // (not decompression), I don't mind spending the time to do it.
+ //
+ // For the same range of i, m_dad[i] is the parent of node i.
+ // These are initialized to a known value that can represent
+ // a "not used" state.
+
+ for (i = 0; i < N; i++) {
+ m_lson[i] = NOT_USED;
+ m_rson[i] = NOT_USED;
+ m_dad[i] = NOT_USED;
+ }
+
+ // For i = 0 to 255, m_rson[N + i + 1] is the root of the tree
+ // for strings that begin with the character i. This is why
+ // the right child array is larger than the left child array.
+ // These are also initialzied to a "not used" state.
+ //
+ // Note that there are 256 of these, one for each of the possible
+ // 256 characters.
+
+ for (i = N + 1; i <= (N + 256); i++) {
+ m_rson[i] = NOT_USED;
+ }
+}
+
+
+/******************************************************************************
+ * LZSSCompress::InsertNode - This function inserts a string from the ring
+ * buffer into one of the trees. It loads the
+ * match position and length member variables
+ * for the longest match.
+ *
+ * The string to be inserted is identified by
+ * the parameter Pos, A full F bytes are
+ * inserted. So,
+ * m_ring_buffer[Pos ... Pos+F-1]
+ * are inserted.
+ *
+ * If the matched length is exactly F, then an
+ * old node is removed in favor of the new one
+ * (because the old one will be deleted
+ * sooner).
+ *
+ * Note that Pos plays a dual role. It is
+ * used as both a position in the ring buffer
+ * and also as a tree node.
+ * m_ring_buffer[Pos] defines a character that
+ * is used to identify a tree node.
+ *
+ * ENT: pos - position in the buffer
+ */
+
+void LZSSCompress::Private::InsertNode(short int Pos)
+{
+ short int i;
+ short int p;
+ int cmp;
+ unsigned char * key;
+
+/*
+ ASSERT(Pos >= 0);
+ ASSERT(Pos < N);
+*/
+
+ cmp = 1;
+ key = &(m_ring_buffer[Pos]);
+
+ // The last 256 entries in m_rson contain the root nodes for
+ // strings that begin with a letter. Get an index for the
+ // first letter in this string.
+
+ p = (short int) (N + 1 + key[0]);
+
+ // Set the left and right tree nodes for this position to "not
+ // used."
+
+ m_lson[Pos] = NOT_USED;
+ m_rson[Pos] = NOT_USED;
+
+ // Haven't matched anything yet.
+
+ m_match_length = 0;
+
+ for ( ; ; ) {
+ if (cmp >= 0) {
+ if (m_rson[p] != NOT_USED) {
+ p = m_rson[p];
+ }
+ else {
+ m_rson[p] = Pos;
+ m_dad[Pos] = p;
+ return;
+ }
+ }
+ else {
+ if (m_lson[p] != NOT_USED) {
+ p = m_lson[p];
+ }
+ else {
+ m_lson[p] = Pos;
+ m_dad[Pos] = p;
+ return;
+ }
+ }
+
+ // Should we go to the right or the left to look for the
+ // next match?
+
+ for (i = 1; i < F; i++) {
+ cmp = key[i] - m_ring_buffer[p + i];
+ if (cmp != 0)
+ break;
+ }
+
+ if (i > m_match_length) {
+ m_match_position = p;
+ m_match_length = i;
+
+ if (i >= F)
+ break;
+ }
+ }
+
+ m_dad[Pos] = m_dad[p];
+ m_lson[Pos] = m_lson[p];
+ m_rson[Pos] = m_rson[p];
+
+ m_dad[ m_lson[p] ] = Pos;
+ m_dad[ m_rson[p] ] = Pos;
+
+ if (m_rson[ m_dad[p] ] == p) {
+ m_rson[ m_dad[p] ] = Pos;
+ }
+ else {
+ m_lson[ m_dad[p] ] = Pos;
+ }
+
+ // Remove "p"
+
+ m_dad[p] = NOT_USED;
+}
+
+
+/******************************************************************************
+ * LZSSCompress::DeleteNode - This function removes the node "Node" from the
+ * tree.
+ *
+ * ENT: node - node to be removed
+ */
+
+void LZSSCompress::Private::DeleteNode(short int Node)
+{
+ short int q;
+
+/*
+ ASSERT(Node >= 0);
+ ASSERT(Node < (N+1));
+*/
+
+ if (m_dad[Node] == NOT_USED) { // not in tree, nothing to do
+ return;
+ }
+
+ if (m_rson[Node] == NOT_USED) {
+ q = m_lson[Node];
+ }
+ else if (m_lson[Node] == NOT_USED) {
+ q = m_rson[Node];
+ }
+ else {
+ q = m_lson[Node];
+ if (m_rson[q] != NOT_USED) {
+ do {
+ q = m_rson[q];
+ } while (m_rson[q] != NOT_USED);
+
+ m_rson[ m_dad[q] ] = m_lson[q];
+ m_dad[ m_lson[q] ] = m_dad[q];
+ m_lson[q] = m_lson[Node];
+ m_dad[ m_lson[Node] ] = q;
+ }
+
+ m_rson[q] = m_rson[Node];
+ m_dad[ m_rson[Node] ] = q;
+ }
+
+ m_dad[q] = m_dad[Node];
+
+ if (m_rson[ m_dad[Node] ] == Node) {
+ m_rson[ m_dad[Node] ] = q;
+ }
+ else {
+ m_lson[ m_dad[Node] ] = q;
+ }
+
+ m_dad[Node] = NOT_USED;
+}
+
+
+/******************************************************************************
+ * LZSSCompress::Encode - This function "encodes" the input stream into the
+ * output stream.
+ * The GetChars() and SendChars() functions are
+ * used to separate this method from the actual
+ * i/o.
+ * NOTE: must set zlen for parent class to know length of
+ * compressed buffer.
+ */
+
+void LZSSCompress::Encode(void)
+{
+ short int i; // an iterator
+ short int r; // node number in the binary tree
+ short int s; // position in the ring buffer
+ unsigned short int len; // len of initial string
+ short int last_match_length; // length of last match
+ short int code_buf_pos; // position in the output buffer
+ unsigned char code_buf[17]; // the output buffer
+ unsigned char mask; // bit mask for byte 0 of out buf
+ unsigned char c; // character read from string
+
+ // Start with a clean tree.
+
+ p->InitTree();
+ direct = 0; // set direction needed by parent [Get|Send]Chars()
+
+ // code_buf[0] works as eight flags. A "1" represents that the
+ // unit is an unencoded letter (1 byte), and a "0" represents
+ // that the next unit is a <position,length> pair (2 bytes).
+ //
+ // code_buf[1..16] stores eight units of code. Since the best
+ // we can do is store eight <position,length> pairs, at most 16
+ // bytes are needed to store this.
+ //
+ // This is why the maximum size of the code buffer is 17 bytes.
+
+ code_buf[0] = 0;
+ code_buf_pos = 1;
+
+ // Mask iterates over the 8 bits in the code buffer. The first
+ // character ends up being stored in the low bit.
+ //
+ // bit 8 7 6 5 4 3 2 1
+ // | |
+ // | first sequence in code buffer
+ // |
+ // last sequence in code buffer
+
+ mask = 1;
+
+ s = 0;
+ r = (short int) N - (short int) F;
+
+ // Initialize the ring buffer with spaces...
+
+ // Note that the last F bytes of the ring buffer are not filled.
+ // This is because those F bytes will be filled in immediately
+ // with bytes from the input stream.
+
+ memset(p->m_ring_buffer, ' ', N - F);
+
+ // Read F bytes into the last F bytes of the ring buffer.
+ //
+ // This function loads the buffer with X characters and returns
+ // the actual amount loaded.
+
+ len = GetChars((char *) &(p->m_ring_buffer[r]), F);
+
+ // Make sure there is something to be compressed.
+
+ if (len == 0)
+ return;
+
+ // Insert the F strings, each of which begins with one or more
+ // 'space' characters. Note the order in which these strings
+ // are inserted. This way, degenerate trees will be less likely
+ // to occur.
+
+ for (i = 1; i <= F; i++) {
+ p->InsertNode((short int) (r - i));
+ }
+
+ // Finally, insert the whole string just read. The
+ // member variables match_length and match_position are set.
+
+ p->InsertNode(r);
+
+ // Now that we're preloaded, continue till done.
+
+ do {
+
+ // m_match_length may be spuriously long near the end of
+ // text.
+
+ if (p->m_match_length > len) {
+ p->m_match_length = len;
+ }
+
+ // Is it cheaper to store this as a single character? If so,
+ // make it so.
+
+ if (p->m_match_length < THRESHOLD) {
+ // Send one character. Remember that code_buf[0] is the
+ // set of flags for the next eight items.
+
+ p->m_match_length = 1;
+ code_buf[0] |= mask;
+ code_buf[code_buf_pos++] = p->m_ring_buffer[r];
+ }
+
+ // Otherwise, we do indeed have a string that can be stored
+ // compressed to save space.
+
+ else {
+ // The next 16 bits need to contain the position (12 bits)
+ // and the length (4 bits).
+
+ code_buf[code_buf_pos++] = (unsigned char) p->m_match_position;
+ code_buf[code_buf_pos++] = (unsigned char) (
+ ((p->m_match_position >> 4) & 0xf0) |
+ (p->m_match_length - THRESHOLD) );
+ }
+
+ // Shift the mask one bit to the left so that it will be ready
+ // to store the new bit.
+
+ mask = (unsigned char) (mask << 1);
+
+ // If the mask is now 0, then we know that we have a full set
+ // of flags and items in the code buffer. These need to be
+ // output.
+
+ if (!mask) {
+ // code_buf is the buffer of characters to be output.
+ // code_buf_pos is the number of characters it contains.
+
+ SendChars((char *) code_buf, code_buf_pos);
+
+ // Reset for next buffer...
+
+ code_buf[0] = 0;
+ code_buf_pos = 1;
+ mask = 1;
+ }
+
+ last_match_length = p->m_match_length;
+
+ // Delete old strings and read new bytes...
+
+ for (i = 0; i < last_match_length; i++) {
+ // Get next character...
+
+ if (GetChars((char *) &c, 1) != 1)
+ break;
+
+ // Delete "old strings"
+
+ p->DeleteNode(s);
+
+ // Put this character into the ring buffer.
+ //
+ // The original comment here says "If the position is near
+ // the end of the buffer, extend the buffer to make
+ // string comparison easier."
+ //
+ // That's a little misleading, because the "end" of the
+ // buffer is really what we consider to be the "beginning"
+ // of the buffer, that is, positions 0 through F.
+ //
+ // The idea is that the front end of the buffer is duplicated
+ // into the back end so that when you're looking at characters
+ // at the back end of the buffer, you can index ahead (beyond
+ // the normal end of the buffer) and see the characters
+ // that are at the front end of the buffer wihtout having
+ // to adjust the index.
+ //
+ // That is...
+ //
+ // 1234xxxxxxxxxxxxxxxxxxxxxxxxxxxxx1234
+ // | | |
+ // position 0 end of buffer |
+ // |
+ // duplicate of front of buffer
+
+ p->m_ring_buffer[s] = c;
+
+ if (s < F - 1) {
+ p->m_ring_buffer[s + N] = c;
+ }
+
+ // Increment the position, and wrap around when we're at
+ // the end. Note that this relies on N being a power of 2.
+
+ s = (short int) ( (s + 1) & (N - 1) );
+ r = (short int) ( (r + 1) & (N - 1) );
+
+ // Register the string that is found in
+ // m_ring_buffer[r..r+F-1].
+
+ p->InsertNode(r);
+ }
+
+ // If we didn't quit because we hit the last_match_length,
+ // then we must have quit because we ran out of characters
+ // to process.
+
+ while (i++ < last_match_length) {
+ p->DeleteNode(s);
+
+ s = (short int) ( (s + 1) & (N - 1) );
+ r = (short int) ( (r + 1) & (N - 1) );
+
+ // Note that len hitting 0 is the key that causes the
+ // do...while() to terminate. This is the only place
+ // within the loop that len is modified.
+ //
+ // Its original value is F (or a number less than F for
+ // short strings).
+
+ if (--len) {
+ p->InsertNode(r); /* buffer may not be empty. */
+ }
+ }
+
+ // End of do...while() loop. Continue processing until there
+ // are no more characters to be compressed. The variable
+ // "len" is used to signal this condition.
+ } while (len > 0);
+
+ // There could still be something in the output buffer. Send it
+ // now.
+
+ if (code_buf_pos > 1) {
+ // code_buf is the encoded string to send.
+ // code_buf_ptr is the number of characters.
+
+ SendChars((char *) code_buf, code_buf_pos);
+ }
+
+
+ // must set zlen for parent class to know length of compressed buffer
+ zlen = zpos;
+}
+
+
+/******************************************************************************
+ * LZSSCompress::Decode - This function "decodes" the input stream into the
+ * output stream.
+ * The GetChars() and SendChars() functions are
+ * used to separate this method from the actual
+ * i/o.
+ */
+
+void LZSSCompress::Decode(void)
+{
+ int k;
+ int r; // node number
+ unsigned char c[F]; // an array of chars
+ unsigned char flags; // 8 bits of flags
+ int flag_count; // which flag we're on
+ short int pos; // position in the ring buffer
+ short int len; // number of chars in ring buffer
+ unsigned long totalLen = 0;
+
+ direct = 1; // set direction needed by parent [Get|Send]Chars()
+
+ // Initialize the ring buffer with a common string.
+ //
+ // Note that the last F bytes of the ring buffer are not filled.
+
+ memset(p->m_ring_buffer, ' ', N - F);
+
+ r = N - F;
+
+ flags = (char) 0;
+ flag_count = 0;
+
+ for ( ; ; ) {
+
+ // If there are more bits of interest in this flag, then
+ // shift that next interesting bit into the 1's position.
+ //
+ // If this flag has been exhausted, the next byte must
+ // be a flag.
+
+ if (flag_count > 0) {
+ flags = (unsigned char) (flags >> 1);
+ flag_count--;
+ }
+ else {
+ // Next byte must be a flag.
+
+ if (GetChars((char *) &flags, 1) != 1)
+ break;
+
+ // Set the flag counter. While at first it might appear
+ // that this should be an 8 since there are 8 bits in the
+ // flag, it should really be a 7 because the shift must
+ // be performed 7 times in order to see all 8 bits.
+
+ flag_count = 7;
+ }
+
+ // If the low order bit of the flag is now set, then we know
+ // that the next byte is a single, unencoded character.
+
+ if (flags & 1) {
+ if (GetChars((char *) c, 1) != 1)
+ break;
+
+ if (SendChars((char *) c, 1) != 1) {
+ break;
+ }
+ totalLen++;
+
+ // Add to buffer, and increment to next spot. Wrap at end.
+
+ p->m_ring_buffer[r] = c[0];
+ r = (short int) ( (r + 1) & (N - 1) );
+ }
+
+ // Otherwise, we know that the next two bytes are a
+ // <position,length> pair. The position is in 12 bits and
+ // the length is in 4 bits.
+
+ else {
+ // Original code:
+ // if ((i = getc(infile)) == EOF)
+ // break;
+ // if ((j = getc(infile)) == EOF)
+ // break;
+ // i |= ((j & 0xf0) << 4);
+ // j = (j & 0x0f) + THRESHOLD;
+ //
+ // I've modified this to only make one input call, and
+ // have changed the variable names to something more
+ // obvious.
+
+ if (GetChars((char *) c, 2) != 2)
+ break;
+
+ // Convert these two characters into the position and
+ // length. Note that the length is always at least
+ // THRESHOLD, which is why we're able to get a length
+ // of 18 out of only 4 bits.
+
+ pos = (short int) ( c[0] | ((c[1] & 0xf0) << 4) );
+
+ len = (short int) ( (c[1] & 0x0f) + THRESHOLD );
+
+ // There are now "len" characters at position "pos" in
+ // the ring buffer that can be pulled out. Note that
+ // len is never more than F.
+
+ for (k = 0; k < len; k++) {
+ c[k] = p->m_ring_buffer[(pos + k) & (N - 1)];
+
+ // Add to buffer, and increment to next spot. Wrap at end.
+
+ p->m_ring_buffer[r] = c[k];
+ r = (short int) ( (r + 1) & (N - 1) );
+ }
+
+ // Add the "len" :characters to the output stream.
+
+ if (SendChars((char *) c, len) != (unsigned int)len) {
+ break;
+ }
+ totalLen += len;
+ }
+ }
+ slen = totalLen;
+}
+
+SWORD_NAMESPACE_END
diff --git a/src/modules/common/lzsscomprs.txt b/src/modules/common/lzsscomprs.txt
new file mode 100644
index 0000000..b6817f2
--- /dev/null
+++ b/src/modules/common/lzsscomprs.txt
@@ -0,0 +1,802 @@
+The following is the original information send from Parson's Technologies via
+Craig Rairden.
+_______________________________________________________________________________
+Compression Info, 10-11-95
+Jeff Wheeler
+
+Source of Algorithm
+-------------------
+
+The compression algorithms used here are based upon the algorithms developed
+and published by Haruhiko Okumura in a paper entitled "Data Compression
+Algorithms of LARC and LHarc." This paper discusses three compression
+algorithms, LSZZ, LZARI, and LZHUF. LZSS is described as the "first" of
+these, and is described as providing moderate compression with good speed.
+LZARI is described as an improved LZSS, a combination of the LZSS algorithm
+with adaptive arithmetic compression. It is described as being slower than
+LZSS but with better compression. LZHUF (the basis of the common LHA
+compression program) was included in the paper, however, a free usage license
+was not included.
+
+The following are copies of the statements included at the beginning of each
+source code listing that was supplied in the working paper.
+
+ LZSS, dated 4/6/89, marked as "Use, distribute and
+ modify this program freely."
+
+ LZARI, dated 4/7/89, marked as "Use, distribute and
+ modify this program freely."
+
+ LZHUF, dated 11/20/88, written by Haruyasu Yoshizaki,
+ translated by Haruhiko Okumura on 4/7/89. Not
+ expressly marked as redistributable or modifiable.
+
+Since both LZSS and LZARI are marked as "use, distribute and modify freely" we
+have felt at liberty basing our compression algorithm on either of these.
+
+Selection of Algorithm
+----------------------
+
+Working samples of three possible compression algorithms are supplied in
+Okumura's paper. Which should be used?
+
+LZSS is the fastest at decompression, but does not generated as small a
+compressed file as the other methods. The other two methods provided, perhaps,
+a 15% improvement in compression. Or, put another way, on a 100K file, LZSS
+might compress it to 50K while the others might approach 40-45K. For STEP
+purposes, it was decided that decoding speed was of more importance than
+tighter compression. For these reasons, the first compression algorithm
+implemented is the LZSS algorithm.
+
+About LZSS Encoding
+-------------------
+
+(adapted from Haruhiko Okumura's paper)
+
+This scheme was proposed by Ziv and Lempel [1]. A slightly modified version
+is described by Storer and Szymanski [2]. An implementation using a binary
+tree has been proposed by Bell [3].
+
+The algorithm is quite simple.
+1. Keep a ring buffer which initially contains all space characters.
+2. Read several letters from the file to the buffer.
+3. Search the buffer for the longest string that matches the letters just
+ read, and send its length and position into the buffer.
+
+If the ring buffer is 4096 bytes, the position can be stored in 12 bits. If the
+length is represented in 4 bits, the <position, length> pair is two bytes
+long. If the longest match is no more than two characters, then just one
+character is sent without encoding. The process starts again with the next
+character. An extra bit is sent each time to tell the decoder whether the
+next item is a character of a <position, length> pair.
+
+[1] J. Ziv and A. Lempel, IEEE Transactions IT-23, 337-343 (1977).
+[2] J. A. Storer and T. G. Szymanski, J. ACM, 29, 928-951 (1982).
+[3] T.C. Gell, IEEE Transactions COM-34, 1176-1182 (1986).
+
+class SWCompress {
+public:
+void InitTree( // no return value
+ void); // no parameters
+
+void InsertNode( // no return value
+ short int Pos); // position in the buffer
+
+void DeleteNode( // no return value
+ short int Node); // node to be removed
+
+void Encode( // no return value
+ void); // no parameters
+
+void Decode( // no return value
+ void); // no parameters
+};
+
+// The following are constant sizes used by the compression algorithm.
+//
+// N - This is the size of the ring buffer. It is set
+// to 4K. It is important to note that a position
+// within the ring buffer requires 12 bits.
+//
+// F - This is the maximum length of a character sequence
+// that can be taken from the ring buffer. It is set
+// to 18. Note that a length must be 3 before it is
+// worthwhile to store a position/length pair, so the
+// length can be encoded in only 4 bits. Or, put yet
+// another way, it is not necessary to encode a length
+// of 0-18, it is necessary to encode a length of
+// 3-18, which requires 4 bits.
+//
+// THRESHOLD - It takes 2 bytes to store an offset and
+// a length. If a character sequence only
+// requires 1 or 2 characters to store
+// uncompressed, then it is better to store
+// it uncompressed than as an offset into
+// the ring buffer.
+//
+// Note that the 12 bits used to store the position and the 4 bits
+// used to store the length equal a total of 16 bits, or 2 bytes.
+
+#define N 4096
+#define F 18
+#define THRESHOLD 3
+#define NOT_USED N
+
+// m_ring_buffer is a text buffer. It contains "nodes" of
+// uncompressed text that can be indexed by position. That is,
+// a substring of the ring buffer can be indexed by a position
+// and a length. When decoding, the compressed text may contain
+// a position in the ring buffer and a count of the number of
+// bytes from the ring buffer that are to be moved into the
+// uncompressed buffer.
+//
+// This ring buffer is not maintained as part of the compressed
+// text. Instead, it is reconstructed dynamically. That is,
+// it starts out empty and gets built as the text is decompressed.
+//
+// The ring buffer contain N bytes, with an additional F - 1 bytes
+// to facilitate string comparison.
+
+unsigned char m_ring_buffer[N + F - 1];
+
+// m_match_position and m_match_length are set by InsertNode().
+//
+// These variables indicate the position in the ring buffer
+// and the number of characters at that position that match
+// a given string.
+
+short int m_match_position;
+short int m_match_length;
+
+// m_lson, m_rson, and m_dad are the Japanese way of referring to
+// a tree structure. The dad is the parent and it has a right and
+// left son (child).
+//
+// For i = 0 to N-1, m_rson[i] and m_lson[i] will be the right
+// and left children of node i.
+//
+// For i = 0 to N-1, m_dad[i] is the parent of node i.
+//
+// For i = 0 to 255, rson[N + i + 1] is the root of the tree for
+// strings that begin with the character i. Note that this requires
+// one byte characters.
+//
+// These nodes store values of 0...(N-1). Memory requirements
+// can be reduces by using 2-byte integers instead of full 4-byte
+// integers (for 32-bit applications). Therefore, these are
+// defined as "short ints."
+
+short int m_lson[N + 1];
+short int m_rson[N + 257];
+short int m_dad[N + 1];
+
+
+
+
+/*
+ -------------------------------------------------------------------------
+ cLZSS::InitTree
+
+ This function initializes the tree nodes to "empty" states.
+ -------------------------------------------------------------------------
+*/
+
+void cLZSS::InitTree( // no return value
+ void) // no parameters
+ throw() // exception list
+
+ {
+ int i;
+
+ // For i = 0 to N - 1, m_rson[i] and m_lson[i] will be the right
+ // and left children of node i. These nodes need not be
+ // initialized. However, for debugging purposes, it is nice to
+ // have them initialized. Since this is only used for compression
+ // (not decompression), I don't mind spending the time to do it.
+ //
+ // For the same range of i, m_dad[i] is the parent of node i.
+ // These are initialized to a known value that can represent
+ // a "not used" state.
+
+ for (i = 0; i < N; i++)
+ {
+ m_lson[i] = NOT_USED;
+ m_rson[i] = NOT_USED;
+ m_dad[i] = NOT_USED;
+ }
+
+ // For i = 0 to 255, m_rson[N + i + 1] is the root of the tree
+ // for strings that begin with the character i. This is why
+ // the right child array is larger than the left child array.
+ // These are also initialzied to a "not used" state.
+ //
+ // Note that there are 256 of these, one for each of the possible
+ // 256 characters.
+
+ for (i = N + 1; i <= (N + 256); i++)
+ {
+ m_rson[i] = NOT_USED;
+ }
+
+ // Done.
+ }
+
+/*
+ -------------------------------------------------------------------------
+ cLZSS::InsertNode
+
+ This function inserts a string from the ring buffer into one of
+ the trees. It loads the match position and length member variables
+ for the longest match.
+
+ The string to be inserted is identified by the parameter Pos,
+ A full F bytes are inserted. So, m_ring_buffer[Pos ... Pos+F-1]
+ are inserted.
+
+ If the matched length is exactly F, then an old node is removed
+ in favor of the new one (because the old one will be deleted
+ sooner).
+
+ Note that Pos plays a dual role. It is used as both a position
+ in the ring buffer and also as a tree node. m_ring_buffer[Pos]
+ defines a character that is used to identify a tree node.
+ -------------------------------------------------------------------------
+*/
+
+void cLZSS::InsertNode( // no return value
+ short int Pos) // position in the buffer
+ throw() // exception list
+
+ {
+ short int i;
+ short int p;
+ int cmp;
+ unsigned char * key;
+
+ ASSERT(Pos >= 0);
+ ASSERT(Pos < N);
+
+ cmp = 1;
+ key = &(m_ring_buffer[Pos]);
+
+ // The last 256 entries in m_rson contain the root nodes for
+ // strings that begin with a letter. Get an index for the
+ // first letter in this string.
+
+ p = (short int) (N + 1 + key[0]);
+
+ // Set the left and right tree nodes for this position to "not
+ // used."
+
+ m_lson[Pos] = NOT_USED;
+ m_rson[Pos] = NOT_USED;
+
+ // Haven't matched anything yet.
+
+ m_match_length = 0;
+
+ for ( ; ; )
+ {
+ if (cmp >= 0)
+ {
+ if (m_rson[p] != NOT_USED)
+ {
+ p = m_rson[p];
+ }
+ else
+ {
+ m_rson[p] = Pos;
+ m_dad[Pos] = p;
+ return;
+ }
+ }
+ else
+ {
+ if (m_lson[p] != NOT_USED)
+ {
+ p = m_lson[p];
+ }
+ else
+ {
+ m_lson[p] = Pos;
+ m_dad[Pos] = p;
+ return;
+ }
+ }
+
+ // Should we go to the right or the left to look for the
+ // next match?
+
+ for (i = 1; i < F; i++)
+ {
+ cmp = key[i] - m_ring_buffer[p + i];
+ if (cmp != 0)
+ break;
+ }
+
+ if (i > m_match_length)
+ {
+ m_match_position = p;
+ m_match_length = i;
+
+ if (i >= F)
+ break;
+ }
+ }
+
+ m_dad[Pos] = m_dad[p];
+ m_lson[Pos] = m_lson[p];
+ m_rson[Pos] = m_rson[p];
+
+ m_dad[ m_lson[p] ] = Pos;
+ m_dad[ m_rson[p] ] = Pos;
+
+ if (m_rson[ m_dad[p] ] == p)
+ {
+ m_rson[ m_dad[p] ] = Pos;
+ }
+ else
+ {
+ m_lson[ m_dad[p] ] = Pos;
+ }
+
+ // Remove "p"
+
+ m_dad[p] = NOT_USED;
+ }
+
+/*
+ -------------------------------------------------------------------------
+ cLZSS::DeleteNode
+
+ This function removes the node "Node" from the tree.
+ -------------------------------------------------------------------------
+*/
+
+void cLZSS::DeleteNode( // no return value
+ short int Node) // node to be removed
+ throw() // exception list
+
+ {
+ short int q;
+
+ ASSERT(Node >= 0);
+ ASSERT(Node < (N+1));
+
+ if (m_dad[Node] == NOT_USED)
+ {
+ // not in tree, nothing to do
+ return;
+ }
+
+ if (m_rson[Node] == NOT_USED)
+ {
+ q = m_lson[Node];
+ }
+ else if (m_lson[Node] == NOT_USED)
+ {
+ q = m_rson[Node];
+ }
+ else
+ {
+ q = m_lson[Node];
+ if (m_rson[q] != NOT_USED)
+ {
+ do
+ {
+ q = m_rson[q];
+ }
+ while (m_rson[q] != NOT_USED);
+
+ m_rson[ m_dad[q] ] = m_lson[q];
+ m_dad[ m_lson[q] ] = m_dad[q];
+ m_lson[q] = m_lson[Node];
+ m_dad[ m_lson[Node] ] = q;
+ }
+
+ m_rson[q] = m_rson[Node];
+ m_dad[ m_rson[Node] ] = q;
+ }
+
+ m_dad[q] = m_dad[Node];
+
+ if (m_rson[ m_dad[Node] ] == Node)
+ {
+ m_rson[ m_dad[Node] ] = q;
+ }
+ else
+ {
+ m_lson[ m_dad[Node] ] = q;
+ }
+
+ m_dad[Node] = NOT_USED;
+ }
+
+/*
+ -------------------------------------------------------------------------
+ cLZSS::Encode
+
+ This function "encodes" the input stream into the output stream.
+ The GetChars() and SendChars() functions are used to separate
+ this method from the actual i/o.
+ -------------------------------------------------------------------------
+*/
+
+void cLZSS::Encode( // no return value
+ void) // no parameters
+
+ {
+ short int i; // an iterator
+ short int r; // node number in the binary tree
+ short int s; // position in the ring buffer
+ unsigned short int len; // len of initial string
+ short int last_match_length; // length of last match
+ short int code_buf_pos; // position in the output buffer
+ unsigned char code_buf[17]; // the output buffer
+ unsigned char mask; // bit mask for byte 0 of out buf
+ unsigned char c; // character read from string
+
+ // Start with a clean tree.
+
+ InitTree();
+
+ // code_buf[0] works as eight flags. A "1" represents that the
+ // unit is an unencoded letter (1 byte), and a "0" represents
+ // that the next unit is a <position,length> pair (2 bytes).
+ //
+ // code_buf[1..16] stores eight units of code. Since the best
+ // we can do is store eight <position,length> pairs, at most 16
+ // bytes are needed to store this.
+ //
+ // This is why the maximum size of the code buffer is 17 bytes.
+
+ code_buf[0] = 0;
+ code_buf_pos = 1;
+
+ // Mask iterates over the 8 bits in the code buffer. The first
+ // character ends up being stored in the low bit.
+ //
+ // bit 8 7 6 5 4 3 2 1
+ // | |
+ // | first sequence in code buffer
+ // |
+ // last sequence in code buffer
+
+ mask = 1;
+
+ s = 0;
+ r = (short int) N - (short int) F;
+
+ // Initialize the ring buffer with spaces...
+
+ // Note that the last F bytes of the ring buffer are not filled.
+ // This is because those F bytes will be filled in immediately
+ // with bytes from the input stream.
+
+ memset(m_ring_buffer, ' ', N - F);
+
+ // Read F bytes into the last F bytes of the ring buffer.
+ //
+ // This function loads the buffer with X characters and returns
+ // the actual amount loaded.
+
+ len = GetChars(&(m_ring_buffer[r]), F);
+
+ // Make sure there is something to be compressed.
+
+ if (len == 0)
+ return;
+
+ // Insert the F strings, each of which begins with one or more
+ // 'space' characters. Note the order in which these strings
+ // are inserted. This way, degenerate trees will be less likely
+ // to occur.
+
+ for (i = 1; i <= F; i++)
+ {
+ InsertNode((short int) (r - i));
+ }
+
+ // Finally, insert the whole string just read. The
+ // member variables match_length and match_position are set.
+
+ InsertNode(r);
+
+ // Now that we're preloaded, continue till done.
+
+ do
+ {
+
+ // m_match_length may be spuriously long near the end of
+ // text.
+
+ if (m_match_length > len)
+ {
+ m_match_length = len;
+ }
+
+ // Is it cheaper to store this as a single character? If so,
+ // make it so.
+
+ if (m_match_length < THRESHOLD)
+ {
+ // Send one character. Remember that code_buf[0] is the
+ // set of flags for the next eight items.
+
+ m_match_length = 1;
+ code_buf[0] |= mask;
+ code_buf[code_buf_pos++] = m_ring_buffer[r];
+ }
+
+ // Otherwise, we do indeed have a string that can be stored
+ // compressed to save space.
+
+ else
+ {
+ // The next 16 bits need to contain the position (12 bits)
+ // and the length (4 bits).
+
+ code_buf[code_buf_pos++] = (unsigned char) m_match_position;
+ code_buf[code_buf_pos++] = (unsigned char) (
+ ((m_match_position >> 4) & 0xf0) |
+ (m_match_length - THRESHOLD) );
+ }
+
+ // Shift the mask one bit to the left so that it will be ready
+ // to store the new bit.
+
+ mask = (unsigned char) (mask << 1);
+
+ // If the mask is now 0, then we know that we have a full set
+ // of flags and items in the code buffer. These need to be
+ // output.
+
+ if (mask == 0)
+ {
+ // code_buf is the buffer of characters to be output.
+ // code_buf_pos is the number of characters it contains.
+
+ SendChars(code_buf, code_buf_pos);
+
+ // Reset for next buffer...
+
+ code_buf[0] = 0;
+ code_buf_pos = 1;
+ mask = 1;
+ }
+
+ last_match_length = m_match_length;
+
+ // Delete old strings and read new bytes...
+
+ for (i = 0; i < last_match_length; i++)
+ {
+
+ // Get next character...
+
+ if (GetChars(&c, 1) != 1)
+ break;
+
+ // Delete "old strings"
+
+ DeleteNode(s);
+
+ // Put this character into the ring buffer.
+ //
+ // The original comment here says "If the position is near
+ // the end of the buffer, extend the buffer to make
+ // string comparison easier."
+ //
+ // That's a little misleading, because the "end" of the
+ // buffer is really what we consider to be the "beginning"
+ // of the buffer, that is, positions 0 through F.
+ //
+ // The idea is that the front end of the buffer is duplicated
+ // into the back end so that when you're looking at characters
+ // at the back end of the buffer, you can index ahead (beyond
+ // the normal end of the buffer) and see the characters
+ // that are at the front end of the buffer wihtout having
+ // to adjust the index.
+ //
+ // That is...
+ //
+ // 1234xxxxxxxxxxxxxxxxxxxxxxxxxxxxx1234
+ // | | |
+ // position 0 end of buffer |
+ // |
+ // duplicate of front of buffer
+
+ m_ring_buffer[s] = c;
+
+ if (s < F - 1)
+ {
+ m_ring_buffer[s + N] = c;
+ }
+
+ // Increment the position, and wrap around when we're at
+ // the end. Note that this relies on N being a power of 2.
+
+ s = (short int) ( (s + 1) & (N - 1) );
+ r = (short int) ( (r + 1) & (N - 1) );
+
+ // Register the string that is found in
+ // m_ring_buffer[r..r+F-1].
+
+ InsertNode(r);
+ }
+
+ // If we didn't quit because we hit the last_match_length,
+ // then we must have quit because we ran out of characters
+ // to process.
+
+ while (i++ < last_match_length)
+ {
+ DeleteNode(s);
+
+ s = (short int) ( (s + 1) & (N - 1) );
+ r = (short int) ( (r + 1) & (N - 1) );
+
+ // Note that len hitting 0 is the key that causes the
+ // do...while() to terminate. This is the only place
+ // within the loop that len is modified.
+ //
+ // Its original value is F (or a number less than F for
+ // short strings).
+
+ if (--len)
+ {
+ InsertNode(r); /* buffer may not be empty. */
+ }
+ }
+
+ // End of do...while() loop. Continue processing until there
+ // are no more characters to be compressed. The variable
+ // "len" is used to signal this condition.
+ }
+ while (len > 0);
+
+ // There could still be something in the output buffer. Send it
+ // now.
+
+ if (code_buf_pos > 1)
+ {
+ // code_buf is the encoded string to send.
+ // code_buf_ptr is the number of characters.
+
+ SendChars(code_buf, code_buf_pos);
+ }
+
+ // Done!
+ }
+
+/*
+ -------------------------------------------------------------------------
+ cLZSS::Decode
+
+ This function "decodes" the input stream into the output stream.
+ The GetChars() and SendChars() functions are used to separate
+ this method from the actual i/o.
+ -------------------------------------------------------------------------
+*/
+
+void cLZSS::Decode( // no return value
+ void) // no parameters
+
+ {
+ int k;
+ int r; // node number
+ unsigned char c[F]; // an array of chars
+ unsigned char flags; // 8 bits of flags
+ int flag_count; // which flag we're on
+ short int pos; // position in the ring buffer
+ short int len; // number of chars in ring buffer
+
+ // Initialize the ring buffer with a common string.
+ //
+ // Note that the last F bytes of the ring buffer are not filled.
+
+ memset(m_ring_buffer, ' ', N - F);
+
+ r = N - F;
+
+ flags = (char) 0;
+ flag_count = 0;
+
+ for ( ; ; )
+ {
+
+ // If there are more bits of interest in this flag, then
+ // shift that next interesting bit into the 1's position.
+ //
+ // If this flag has been exhausted, the next byte must
+ // be a flag.
+
+ if (flag_count > 0)
+ {
+ flags = (unsigned char) (flags >> 1);
+ flag_count--;
+ }
+ else
+ {
+ // Next byte must be a flag.
+
+ if (GetChars(&flags, 1) != 1)
+ break;
+
+ // Set the flag counter. While at first it might appear
+ // that this should be an 8 since there are 8 bits in the
+ // flag, it should really be a 7 because the shift must
+ // be performed 7 times in order to see all 8 bits.
+
+ flag_count = 7;
+ }
+
+ // If the low order bit of the flag is now set, then we know
+ // that the next byte is a single, unencoded character.
+
+ if (flags & 1)
+ {
+ if (GetChars(c, 1) != 1)
+ break;
+
+ if (SendChars(c, 1) != 1)
+ break;
+
+ // Add to buffer, and increment to next spot. Wrap at end.
+
+ m_ring_buffer[r] = c[0];
+ r = (short int) ( (r + 1) & (N - 1) );
+ }
+
+ // Otherwise, we know that the next two bytes are a
+ // <position,length> pair. The position is in 12 bits and
+ // the length is in 4 bits.
+
+ else
+ {
+ // Original code:
+ // if ((i = getc(infile)) == EOF)
+ // break;
+ // if ((j = getc(infile)) == EOF)
+ // break;
+ // i |= ((j & 0xf0) << 4);
+ // j = (j & 0x0f) + THRESHOLD;
+ //
+ // I've modified this to only make one input call, and
+ // have changed the variable names to something more
+ // obvious.
+
+ if (GetChars(c, 2) != 2)
+ break;
+
+ // Convert these two characters into the position and
+ // length. Note that the length is always at least
+ // THRESHOLD, which is why we're able to get a length
+ // of 18 out of only 4 bits.
+
+ pos = (short int) ( c[0] | ((c[1] & 0xf0) << 4) );
+
+ len = (short int) ( (c[1] & 0x0f) + THRESHOLD );
+
+ // There are now "len" characters at position "pos" in
+ // the ring buffer that can be pulled out. Note that
+ // len is never more than F.
+
+ for (k = 0; k < len; k++)
+ {
+ c[k] = m_ring_buffer[(pos + k) & (N - 1)];
+
+ // Add to buffer, and increment to next spot. Wrap at end.
+
+ m_ring_buffer[r] = c[k];
+ r = (short int) ( (r + 1) & (N - 1) );
+ }
+
+ // Add the "len" characters to the output stream.
+
+ if (SendChars(c, len) != len)
+ break;
+ }
+ }
+ }
+
diff --git a/src/modules/common/rawstr.cpp b/src/modules/common/rawstr.cpp
new file mode 100644
index 0000000..788ab6e
--- /dev/null
+++ b/src/modules/common/rawstr.cpp
@@ -0,0 +1,529 @@
+/******************************************************************************
+ *
+ * rawstr.cpp - code for class 'RawStr'- a module that reads raw text
+ * files: ot and nt using indexs ??.bks ??.cps ??.vss
+ * and provides lookup and parsing functions based on
+ * class StrKey
+ *
+ * $Id: rawstr.cpp 2833 2013-06-29 06:40:28Z chrislit $
+ *
+ * Copyright 1998-2013 CrossWire Bible Society (http://www.crosswire.org)
+ * CrossWire Bible Society
+ * P. O. Box 2528
+ * Tempe, AZ 85280-2528
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation version 2.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ */
+
+#include <stdio.h>
+#include <fcntl.h>
+#include <errno.h>
+
+#include <stdlib.h>
+#include <utilstr.h>
+#include <rawstr.h>
+#include <sysdata.h>
+#include <swlog.h>
+#include <filemgr.h>
+#include <swbuf.h>
+#include <stringmgr.h>
+
+SWORD_NAMESPACE_START
+
+/******************************************************************************
+ * RawStr Statics
+ */
+
+int RawStr::instance = 0;
+char RawStr::nl = '\n';
+const int RawStr::IDXENTRYSIZE = 6;
+
+
+
+/******************************************************************************
+ * RawStr Constructor - Initializes data for instance of RawStr
+ *
+ * ENT: ipath - path of the directory where data and index files are located.
+ * be sure to include the trailing separator (e.g. '/' or '\')
+ * (e.g. 'modules/texts/rawtext/webster/')
+ */
+
+RawStr::RawStr(const char *ipath, int fileMode, bool caseSensitive) : caseSensitive(caseSensitive)
+{
+ SWBuf buf;
+
+ lastoff = -1;
+ path = 0;
+ stdstr(&path, ipath);
+
+ if (fileMode == -1) { // try read/write if possible
+ fileMode = FileMgr::RDWR;
+ }
+
+ buf.setFormatted("%s.idx", path);
+ idxfd = FileMgr::getSystemFileMgr()->open(buf, fileMode, true);
+
+ buf.setFormatted("%s.dat", path);
+ datfd = FileMgr::getSystemFileMgr()->open(buf, fileMode, true);
+
+ if (datfd < 0) {
+ SWLog::getSystemLog()->logError("%d", errno);
+ }
+
+ instance++;
+}
+
+
+/******************************************************************************
+ * RawStr Destructor - Cleans up instance of RawStr
+ */
+
+RawStr::~RawStr()
+{
+ if (path)
+ delete [] path;
+
+ --instance;
+
+ FileMgr::getSystemFileMgr()->close(idxfd);
+ FileMgr::getSystemFileMgr()->close(datfd);
+}
+
+
+/******************************************************************************
+ * RawStr::getidxbufdat - Gets the index string at the given idx offset
+ * NOTE: buf is allocated and must be freed by
+ * calling function
+ *
+ * ENT: ioffset - offset in dat file to lookup
+ * buf - address of pointer to allocate for storage of string
+ */
+
+void RawStr::getIDXBufDat(long ioffset, char **buf) const
+{
+ int size;
+ char ch;
+ if (datfd > 0) {
+ datfd->seek(ioffset, SEEK_SET);
+ for (size = 0; datfd->read(&ch, 1) == 1; size++) {
+ if ((ch == '\\') || (ch == 10) || (ch == 13))
+ break;
+ }
+ *buf = (*buf) ? (char *)realloc(*buf, size*2 + 1) : (char *)malloc(size*2 + 1);
+ if (size) {
+ datfd->seek(ioffset, SEEK_SET);
+ datfd->read(*buf, size);
+ }
+ (*buf)[size] = 0;
+ if (!caseSensitive) toupperstr_utf8(*buf, size*2);
+ }
+ else {
+ *buf = (*buf) ? (char *)realloc(*buf, 1) : (char *)malloc(1);
+ **buf = 0;
+ }
+}
+
+
+/******************************************************************************
+ * RawStr::getidxbuf - Gets the index string at the given idx offset
+ * NOTE: buf is allocated and must be freed by
+ * calling function
+ *
+ * ENT: ioffset - offset in idx file to lookup
+ * buf - address of pointer to allocate for storage of string
+ */
+
+void RawStr::getIDXBuf(long ioffset, char **buf) const
+{
+ __u32 offset;
+
+ if (idxfd > 0) {
+ idxfd->seek(ioffset, SEEK_SET);
+ idxfd->read(&offset, 4);
+
+ offset = swordtoarch32(offset);
+
+ getIDXBufDat(offset, buf);
+ }
+}
+
+
+/******************************************************************************
+ * RawStr::findoffset - Finds the offset of the key string from the indexes
+ *
+ * ENT: key - key string to lookup
+ * start - address to store the starting offset
+ * size - address to store the size of the entry
+ * away - number of entries before of after to jump
+ * (default = 0)
+ *
+ * RET: error status -1 general error; -2 new file
+ */
+
+signed char RawStr::findOffset(const char *ikey, __u32 *start, __u16 *size, long away, __u32 *idxoff) const
+{
+ char *trybuf, *maxbuf, *key = 0, quitflag = 0;
+ signed char retval = -1;
+ long headoff, tailoff, tryoff = 0, maxoff = 0;
+ int diff = 0;
+ bool awayFromSubstrCheck = false;
+
+ if (idxfd->getFd() >=0) {
+ tailoff = maxoff = idxfd->seek(0, SEEK_END) - 6;
+ retval = (tailoff >= 0) ? 0 : -2; // if NOT new file
+ if (*ikey && retval != -2) {
+ headoff = 0;
+
+ stdstr(&key, ikey, 3);
+ if (!caseSensitive) toupperstr_utf8(key, strlen(key)*3);
+
+ int keylen = strlen(key);
+ bool substr = false;
+
+ trybuf = maxbuf = 0;
+ getIDXBuf(maxoff, &maxbuf);
+
+ while (headoff < tailoff) {
+ tryoff = (lastoff == -1) ? headoff + ((((tailoff / 6) - (headoff / 6))) / 2) * 6 : lastoff;
+ lastoff = -1;
+ getIDXBuf(tryoff, &trybuf);
+
+ if (!*trybuf && tryoff) { // In case of extra entry at end of idx (not first entry)
+ tryoff += (tryoff > (maxoff / 2))?-6:6;
+ retval = -1;
+ break;
+ }
+
+ diff = strcmp(key, trybuf);
+
+ if (!diff)
+ break;
+
+ if (!strncmp(trybuf, key, keylen)) substr = true;
+
+ if (diff < 0)
+ tailoff = (tryoff == headoff) ? headoff : tryoff;
+ else headoff = tryoff;
+
+ if (tailoff == headoff + 6) {
+ if (quitflag++)
+ headoff = tailoff;
+ }
+ }
+
+ // didn't find exact match
+ if (headoff >= tailoff) {
+ tryoff = headoff;
+ if (!substr && ((tryoff != maxoff)||(strncmp(key, maxbuf, keylen)<0))) {
+ awayFromSubstrCheck = true;
+ away--; // if our entry doesn't startwith our key, prefer the previous entry over the next
+ }
+ }
+ if (trybuf)
+ free(trybuf);
+ delete [] key;
+ if (maxbuf)
+ free(maxbuf);
+ }
+ else tryoff = 0;
+
+ idxfd->seek(tryoff, SEEK_SET);
+
+ __u32 tmpStart;
+ __u16 tmpSize;
+ *start = *size = tmpStart = tmpSize = 0;
+ idxfd->read(&tmpStart, 4);
+ idxfd->read(&tmpSize, 2);
+ if (idxoff)
+ *idxoff = tryoff;
+
+ *start = swordtoarch32(tmpStart);
+ *size = swordtoarch16(tmpSize);
+
+ while (away) {
+ unsigned long laststart = *start;
+ unsigned short lastsize = *size;
+ long lasttry = tryoff;
+ tryoff += (away > 0) ? 6 : -6;
+
+ bool bad = false;
+ if (((tryoff + (away*6)) < -6) || (tryoff + (away*6) > (maxoff+6)))
+ bad = true;
+ else if (idxfd->seek(tryoff, SEEK_SET) < 0)
+ bad = true;
+ if (bad) {
+ if(!awayFromSubstrCheck)
+ retval = -1;
+ *start = laststart;
+ *size = lastsize;
+ tryoff = lasttry;
+ if (idxoff)
+ *idxoff = tryoff;
+ break;
+ }
+ idxfd->read(&tmpStart, 4);
+ idxfd->read(&tmpSize, 2);
+ if (idxoff)
+ *idxoff = tryoff;
+
+ *start = swordtoarch32(tmpStart);
+ *size = swordtoarch16(tmpSize);
+
+ if (((laststart != *start) || (lastsize != *size)) && (*size))
+ away += (away < 0) ? 1 : -1;
+ }
+
+ lastoff = tryoff;
+ }
+ else {
+ *start = 0;
+ *size = 0;
+ if (idxoff)
+ *idxoff = 0;
+ retval = -1;
+ }
+ return retval;
+}
+
+
+/******************************************************************************
+ * RawStr::readtext - gets text at a given offset
+ *
+ * ENT:
+ * start - starting offset where the text is located in the file
+ * size - size of text entry
+ * buf - buffer to store text
+ *
+ */
+
+void RawStr::readText(__u32 istart, __u16 *isize, char **idxbuf, SWBuf &buf) const
+{
+ unsigned int ch;
+ char *idxbuflocal = 0;
+ getIDXBufDat(istart, &idxbuflocal);
+ __u32 start = istart;
+
+ do {
+ if (*idxbuf)
+ delete [] *idxbuf;
+
+ buf = "";
+ buf.setFillByte(0);
+ buf.setSize(++(*isize));
+
+ *idxbuf = new char [ (*isize) ];
+
+ datfd->seek(start, SEEK_SET);
+ datfd->read(buf.getRawData(), (int)((*isize) - 1));
+
+ for (ch = 0; buf[ch]; ch++) { // skip over index string
+ if (buf[ch] == 10) {
+ ch++;
+ break;
+ }
+ }
+ buf = SWBuf(buf.c_str()+ch);
+ // resolve link
+ if (!strncmp(buf.c_str(), "@LINK", 5)) {
+ for (ch = 0; buf[ch]; ch++) { // null before nl
+ if (buf[ch] == 10) {
+ buf[ch] = 0;
+ break;
+ }
+ }
+ findOffset(buf.c_str() + 6, &start, isize);
+ }
+ else break;
+ }
+ while (true); // while we're resolving links
+
+ if (idxbuflocal) {
+ int localsize = strlen(idxbuflocal);
+ localsize = (localsize < (*isize - 1)) ? localsize : (*isize - 1);
+ strncpy(*idxbuf, idxbuflocal, localsize);
+ (*idxbuf)[localsize] = 0;
+ free(idxbuflocal);
+ }
+}
+
+
+/******************************************************************************
+ * RawLD::settext - Sets text for current offset
+ *
+ * ENT: key - key for this entry
+ * buf - buffer to store
+ * len - length of buffer (0 - null terminated)
+ */
+
+void RawStr::doSetText(const char *ikey, const char *buf, long len)
+{
+
+ __u32 start, outstart;
+ __u32 idxoff;
+ __u32 endoff;
+ __s32 shiftSize;
+ __u16 size;
+ __u16 outsize;
+ static const char nl[] = {13, 10};
+ char *tmpbuf = 0;
+ char *key = 0;
+ char *dbKey = 0;
+ char *idxBytes = 0;
+ char *outbuf = 0;
+ char *ch = 0;
+
+ char errorStatus = findOffset(ikey, &start, &size, 0, &idxoff);
+ stdstr(&key, ikey, 2);
+ if (!caseSensitive) toupperstr_utf8(key, strlen(key)*2);
+
+ len = (len < 0) ? strlen(buf) : len;
+
+ getIDXBufDat(start, &dbKey);
+
+ if (strcmp(key, dbKey) < 0) {
+ }
+ else if (strcmp(key, dbKey) > 0) {
+ if (errorStatus != (char)-2) // not a new file
+ idxoff += 6;
+ else idxoff = 0;
+ }
+ else if ((!strcmp(key, dbKey)) && (len>0 /*we're not deleting*/)) { // got absolute entry
+ do {
+ tmpbuf = new char [ size + 2 ];
+ memset(tmpbuf, 0, size + 2);
+ datfd->seek(start, SEEK_SET);
+ datfd->read(tmpbuf, (int)(size - 1));
+
+ for (ch = tmpbuf; *ch; ch++) { // skip over index string
+ if (*ch == 10) {
+ ch++;
+ break;
+ }
+ }
+ memmove(tmpbuf, ch, size - (unsigned short)(ch-tmpbuf));
+
+ // resolve link
+ if (!strncmp(tmpbuf, "@LINK", 5) && (len)) {
+ for (ch = tmpbuf; *ch; ch++) { // null before nl
+ if (*ch == 10) {
+ *ch = 0;
+ break;
+ }
+ }
+ findOffset(tmpbuf + 6, &start, &size, 0, &idxoff);
+ }
+ else break;
+ }
+ while (true); // while we're resolving links
+ }
+
+ endoff = idxfd->seek(0, SEEK_END);
+
+ shiftSize = endoff - idxoff;
+
+ if (shiftSize > 0) {
+ idxBytes = new char [ shiftSize ];
+ idxfd->seek(idxoff, SEEK_SET);
+ idxfd->read(idxBytes, shiftSize);
+ }
+
+ outbuf = new char [ len + strlen(key) + 5 ];
+ sprintf(outbuf, "%s%c%c", key, 13, 10);
+ size = strlen(outbuf);
+ memcpy(outbuf + size, buf, len);
+ size = outsize = size + (len);
+
+ start = outstart = datfd->seek(0, SEEK_END);
+
+ outstart = archtosword32(start);
+ outsize = archtosword16(size);
+
+ idxfd->seek(idxoff, SEEK_SET);
+ if (len > 0) {
+ datfd->seek(start, SEEK_SET);
+ datfd->write(outbuf, (int)size);
+
+ // add a new line to make data file easier to read in an editor
+ datfd->write(&nl, 2);
+
+ idxfd->write(&outstart, 4);
+ idxfd->write(&outsize, 2);
+ if (idxBytes) {
+ idxfd->write(idxBytes, shiftSize);
+ delete [] idxBytes;
+ }
+ }
+ else { // delete entry
+ if (idxBytes) {
+ idxfd->write(idxBytes+6, shiftSize-6);
+ idxfd->seek(-1, SEEK_CUR); // last valid byte
+ FileMgr::getSystemFileMgr()->trunc(idxfd); // truncate index
+ delete [] idxBytes;
+ }
+ }
+
+ delete [] key;
+ delete [] outbuf;
+ free(dbKey);
+}
+
+
+/******************************************************************************
+ * RawLD::linkentry - links one entry to another
+ *
+ * ENT: testmt - testament to find (0 - Bible/module introduction)
+ * destidxoff - dest offset into .vss
+ * srcidxoff - source offset into .vss
+ */
+
+void RawStr::doLinkEntry(const char *destkey, const char *srckey) {
+ char *text = new char [ strlen(destkey) + 7 ];
+ sprintf(text, "@LINK %s", destkey);
+ doSetText(srckey, text);
+ delete [] text;
+}
+
+/******************************************************************************
+ * RawLD::CreateModule - Creates new module files
+ *
+ * ENT: path - directory to store module files
+ * RET: error status
+ */
+
+signed char RawStr::createModule(const char *ipath)
+{
+ char *path = 0;
+ char *buf = new char [ strlen (ipath) + 20 ];
+ FileDesc *fd, *fd2;
+
+ stdstr(&path, ipath);
+
+ if ((path[strlen(path)-1] == '/') || (path[strlen(path)-1] == '\\'))
+ path[strlen(path)-1] = 0;
+
+ sprintf(buf, "%s.dat", path);
+ FileMgr::removeFile(buf);
+ fd = FileMgr::getSystemFileMgr()->open(buf, FileMgr::CREAT|FileMgr::WRONLY, FileMgr::IREAD|FileMgr::IWRITE);
+ fd->getFd();
+ FileMgr::getSystemFileMgr()->close(fd);
+
+ sprintf(buf, "%s.idx", path);
+ FileMgr::removeFile(buf);
+ fd2 = FileMgr::getSystemFileMgr()->open(buf, FileMgr::CREAT|FileMgr::WRONLY, FileMgr::IREAD|FileMgr::IWRITE);
+ fd2->getFd();
+ FileMgr::getSystemFileMgr()->close(fd2);
+
+ delete [] path;
+
+ return 0;
+}
+
+SWORD_NAMESPACE_END
diff --git a/src/modules/common/rawstr4.cpp b/src/modules/common/rawstr4.cpp
new file mode 100644
index 0000000..e2ce899
--- /dev/null
+++ b/src/modules/common/rawstr4.cpp
@@ -0,0 +1,538 @@
+/******************************************************************************
+ *
+ * rawstr4.cpp - code for class 'RawStr'- a module that reads raw text
+ * files: ot and nt using indexs ??.bks ??.cps ??.vss
+ * and provides lookup and parsing functions based on
+ * class StrKey
+ *
+ * $Id: rawstr4.cpp 2833 2013-06-29 06:40:28Z chrislit $
+ *
+ * Copyright 2001-2013 CrossWire Bible Society (http://www.crosswire.org)
+ * CrossWire Bible Society
+ * P. O. Box 2528
+ * Tempe, AZ 85280-2528
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation version 2.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ */
+
+#include <stdio.h>
+#include <fcntl.h>
+#include <errno.h>
+#include <stdlib.h>
+#include <sys/types.h>
+
+#include <utilstr.h>
+#include <rawstr4.h>
+#include <sysdata.h>
+#include <swlog.h>
+#include <filemgr.h>
+#include <swbuf.h>
+#include <stringmgr.h>
+
+SWORD_NAMESPACE_START
+
+/******************************************************************************
+ * RawStr Statics
+ */
+
+int RawStr4::instance = 0;
+const int RawStr4::IDXENTRYSIZE = 8;
+
+
+/******************************************************************************
+ * RawStr Constructor - Initializes data for instance of RawStr
+ *
+ * ENT: ipath - path of the directory where data and index files are located.
+ * be sure to include the trailing separator (e.g. '/' or '\')
+ * (e.g. 'modules/texts/rawtext/webster/')
+ */
+
+RawStr4::RawStr4(const char *ipath, int fileMode, bool caseSensitive) : caseSensitive(caseSensitive)
+{
+ SWBuf buf;
+
+ nl = '\n';
+ lastoff = -1;
+ path = 0;
+ stdstr(&path, ipath);
+
+ if (fileMode == -1) { // try read/write if possible
+ fileMode = FileMgr::RDWR;
+ }
+
+ buf.setFormatted("%s.idx", path);
+ idxfd = FileMgr::getSystemFileMgr()->open(buf, fileMode, true);
+
+ buf.setFormatted("%s.dat", path);
+ datfd = FileMgr::getSystemFileMgr()->open(buf, fileMode, true);
+
+ if (datfd < 0) {
+ SWLog::getSystemLog()->logError("%d", errno);
+ }
+
+ instance++;
+}
+
+
+/******************************************************************************
+ * RawStr Destructor - Cleans up instance of RawStr
+ */
+
+RawStr4::~RawStr4()
+{
+ if (path)
+ delete [] path;
+
+ --instance;
+
+ FileMgr::getSystemFileMgr()->close(idxfd);
+ FileMgr::getSystemFileMgr()->close(datfd);
+}
+
+
+/******************************************************************************
+ * RawStr4::getidxbufdat - Gets the index string at the given idx offset
+ * NOTE: buf is allocated and must be freed by
+ * calling function
+ *
+ * ENT: ioffset - offset in dat file to lookup
+ * buf - address of pointer to allocate for storage of string
+ */
+
+void RawStr4::getIDXBufDat(long ioffset, char **buf) const
+{
+ int size;
+ char ch;
+ if (datfd > 0) {
+ datfd->seek(ioffset, SEEK_SET);
+ for (size = 0; datfd->read(&ch, 1) == 1; size++) {
+ if ((ch == '\\') || (ch == 10) || (ch == 13))
+ break;
+ }
+ *buf = (*buf) ? (char *)realloc(*buf, size*2 + 1) : (char *)malloc(size*2 + 1);
+ if (size) {
+ datfd->seek(ioffset, SEEK_SET);
+ datfd->read(*buf, size);
+ }
+ (*buf)[size] = 0;
+ if (!caseSensitive) toupperstr_utf8(*buf, size*2);
+ }
+ else {
+ *buf = (*buf) ? (char *)realloc(*buf, 1) : (char *)malloc(1);
+ **buf = 0;
+ }
+}
+
+
+/******************************************************************************
+ * RawStr4::getidxbuf - Gets the index string at the given idx offset
+ * NOTE: buf is allocated and must be freed by
+ * calling function
+ *
+ * ENT: ioffset - offset in idx file to lookup
+ * buf - address of pointer to allocate for storage of string
+ */
+
+void RawStr4::getIDXBuf(long ioffset, char **buf) const
+{
+ __u32 offset;
+
+ if (idxfd > 0) {
+ idxfd->seek(ioffset, SEEK_SET);
+
+ idxfd->read(&offset, 4);
+ offset = swordtoarch32(offset);
+
+ getIDXBufDat(offset, buf);
+
+/* What the heck is this supposed to do??????
+ for (trybuf = targetbuf = *buf; *trybuf; trybuf++, targetbuf++) {
+ *targetbuf = *trybuf;
+ }
+ *targetbuf = 0;
+ trybuf = 0;
+ if (!caseSensitive) toupperstr_utf8(targetbuf);
+*/
+ }
+}
+
+
+/******************************************************************************
+ * RawStr4::findoffset - Finds the offset of the key string from the indexes
+ *
+ * ENT: key - key string to lookup
+ * start - address to store the starting offset
+ * size - address to store the size of the entry
+ * away - number of entries before of after to jump
+ * (default = 0)
+ *
+ * RET: error status -1 general error; -2 new file
+ */
+
+signed char RawStr4::findOffset(const char *ikey, __u32 *start, __u32 *size, long away, __u32 *idxoff) const
+{
+ char *trybuf, *maxbuf, *key = 0, quitflag = 0;
+ signed char retval = -1;
+ long headoff, tailoff, tryoff = 0, maxoff = 0;
+ int diff = 0;
+ bool awayFromSubstrCheck = false;
+
+ if (idxfd->getFd() >=0) {
+ tailoff = maxoff = idxfd->seek(0, SEEK_END) - 8;
+
+ retval = (tailoff >= 0) ? 0 : -2; // if NOT new file
+ if (*ikey && retval != -2) {
+ headoff = 0;
+
+ stdstr(&key, ikey, 3);
+ if (!caseSensitive) toupperstr_utf8(key, strlen(key)*3);
+
+ int keylen = strlen(key);
+ bool substr = false;
+
+ trybuf = maxbuf = 0;
+ getIDXBuf(maxoff, &maxbuf);
+
+ while (headoff < tailoff) {
+ tryoff = (lastoff == -1) ? headoff + ((((tailoff / 8) - (headoff / 8))) / 2) * 8 : lastoff;
+ lastoff = -1;
+ getIDXBuf(tryoff, &trybuf);
+
+ if (!*trybuf && tryoff) { // In case of extra entry at end of idx (not first entry)
+ tryoff += (tryoff > (maxoff / 2))?-8:8;
+ retval = -1;
+ break;
+ }
+
+ diff = strcmp(key, trybuf);
+
+ if (!diff)
+ break;
+
+ if (!strncmp(trybuf, key, keylen)) substr = true;
+
+ if (diff < 0)
+ tailoff = (tryoff == headoff) ? headoff : tryoff;
+ else headoff = tryoff;
+
+ if (tailoff == headoff + 8) {
+ if (quitflag++)
+ headoff = tailoff;
+ }
+ }
+
+ // didn't find exact match
+ if (headoff >= tailoff) {
+ tryoff = headoff;
+ if (!substr && ((tryoff != maxoff)||(strncmp(key, maxbuf, keylen)<0))) {
+ awayFromSubstrCheck = true;
+ away--; // if our entry doesn't startwith our key, prefer the previous entry over the next
+ }
+ }
+ if (trybuf)
+ free(trybuf);
+ delete [] key;
+ if (maxbuf)
+ free(maxbuf);
+ }
+ else tryoff = 0;
+
+ idxfd->seek(tryoff, SEEK_SET);
+
+ __u32 tmpStart, tmpSize;
+ *start = *size = tmpStart = tmpSize = 0;
+ idxfd->read(&tmpStart, 4);
+ idxfd->read(&tmpSize, 4);
+ if (idxoff)
+ *idxoff = tryoff;
+
+ *start = swordtoarch32(tmpStart);
+ *size = swordtoarch32(tmpSize);
+
+ while (away) {
+ unsigned long laststart = *start;
+ unsigned long lastsize = *size;
+ long lasttry = tryoff;
+ tryoff += (away > 0) ? 8 : -8;
+
+ bool bad = false;
+ if (((tryoff + (away*8)) < -8) || (tryoff + (away*8) > (maxoff+8)))
+ bad = true;
+ else if (idxfd->seek(tryoff, SEEK_SET) < 0)
+ bad = true;
+ if (bad) {
+ if(!awayFromSubstrCheck)
+ retval = -1;
+ *start = laststart;
+ *size = lastsize;
+ tryoff = lasttry;
+ if (idxoff)
+ *idxoff = tryoff;
+ break;
+ }
+ idxfd->read(&tmpStart, 4);
+ idxfd->read(&tmpSize, 4);
+ if (idxoff)
+ *idxoff = tryoff;
+
+ *start = swordtoarch32(tmpStart);
+ *size = swordtoarch32(tmpSize);
+
+ if (((laststart != *start) || (lastsize != *size)) && (*size))
+ away += (away < 0) ? 1 : -1;
+ }
+
+ lastoff = tryoff;
+ }
+ else {
+ *start = 0;
+ *size = 0;
+ if (idxoff)
+ *idxoff = 0;
+ retval = -1;
+ }
+ return retval;
+}
+
+
+/******************************************************************************
+ * RawStr4::readtext - gets text at a given offset
+ *
+ * ENT:
+ * start - starting offset where the text is located in the file
+ * size - size of text entry
+ * buf - buffer to store text
+ *
+ */
+
+void RawStr4::readText(__u32 istart, __u32 *isize, char **idxbuf, SWBuf &buf) const
+{
+ unsigned int ch;
+ char *idxbuflocal = 0;
+ getIDXBufDat(istart, &idxbuflocal);
+ __u32 start = istart;
+
+ do {
+ if (*idxbuf)
+ delete [] *idxbuf;
+
+ buf = "";
+ buf.setFillByte(0);
+ buf.setSize(++(*isize));
+
+ *idxbuf = new char [ (*isize) ];
+
+ datfd->seek(start, SEEK_SET);
+ datfd->read(buf.getRawData(), (int)((*isize) - 1));
+
+ for (ch = 0; buf[ch]; ch++) { // skip over index string
+ if (buf[ch] == 10) {
+ ch++;
+ break;
+ }
+ }
+ buf = SWBuf(buf.c_str()+ch);
+ // resolve link
+ if (!strncmp(buf.c_str(), "@LINK", 5)) {
+ for (ch = 0; buf[ch]; ch++) { // null before nl
+ if (buf[ch] == 10) {
+ buf[ch] = 0;
+ break;
+ }
+ }
+ findOffset(buf.c_str() + 6, &start, isize);
+ }
+ else break;
+ }
+ while (true); // while we're resolving links
+
+ if (idxbuflocal) {
+ unsigned int localsize = strlen(idxbuflocal);
+ localsize = (localsize < (*isize - 1)) ? localsize : (*isize - 1);
+ strncpy(*idxbuf, idxbuflocal, localsize);
+ (*idxbuf)[localsize] = 0;
+ free(idxbuflocal);
+ }
+}
+
+
+/******************************************************************************
+ * RawLD::settext - Sets text for current offset
+ *
+ * ENT: key - key for this entry
+ * buf - buffer to store
+ * len - length of buffer (0 - null terminated)
+ */
+
+void RawStr4::doSetText(const char *ikey, const char *buf, long len) {
+
+ __u32 start, outstart;
+ __u32 idxoff;
+ __u32 endoff;
+ __s32 shiftSize;
+ __u32 size;
+ __u32 outsize;
+ static const char nl[] = {13, 10};
+ char *tmpbuf = 0;
+ char *key = 0;
+ char *dbKey = 0;
+ char *idxBytes = 0;
+ char *outbuf = 0;
+ char *ch = 0;
+
+ char errorStatus = findOffset(ikey, &start, &size, 0, &idxoff);
+ stdstr(&key, ikey, 3);
+ if (!caseSensitive) toupperstr_utf8(key, strlen(key)*3);
+
+ len = (len < 0) ? strlen(buf) : len;
+ getIDXBufDat(start, &dbKey);
+
+ if (strcmp(key, dbKey) < 0) {
+ }
+ else if (strcmp(key, dbKey) > 0) {
+ if (errorStatus != (char)-2) // not a new file
+ idxoff += 8;
+ else idxoff = 0;
+ }
+ else if ((!strcmp(key, dbKey)) && (len>0/*we're not deleting*/)) { // got absolute entry
+ do {
+ tmpbuf = new char [ size + 2 ];
+ memset(tmpbuf, 0, size + 2);
+ datfd->seek(start, SEEK_SET);
+ datfd->read(tmpbuf, (int)(size - 1));
+
+ for (ch = tmpbuf; *ch; ch++) { // skip over index string
+ if (*ch == 10) {
+ ch++;
+ break;
+ }
+ }
+ memmove(tmpbuf, ch, size - (unsigned long)(ch-tmpbuf));
+
+ // resolve link
+ if (!strncmp(tmpbuf, "@LINK", 5) && (len > 0)) {
+ for (ch = tmpbuf; *ch; ch++) { // null before nl
+ if (*ch == 10) {
+ *ch = 0;
+ break;
+ }
+ }
+ findOffset(tmpbuf + 8, &start, &size, 0, &idxoff);
+ ++size;
+ }
+ else break;
+ }
+ while (true); // while we're resolving links
+ }
+
+ endoff = idxfd->seek(0, SEEK_END);
+
+ shiftSize = endoff - idxoff;
+
+ if (shiftSize > 0) {
+ idxBytes = new char [ shiftSize ];
+ idxfd->seek(idxoff, SEEK_SET);
+ idxfd->read(idxBytes, shiftSize);
+ }
+
+ outbuf = new char [ len + strlen(key) + 5 ];
+ sprintf(outbuf, "%s%c%c", key, 13, 10);
+ size = strlen(outbuf);
+ memcpy(outbuf + size, buf, len);
+ size = outsize = size + len;
+
+ start = outstart = datfd->seek(0, SEEK_END);
+
+ outstart = archtosword32(start);
+ outsize = archtosword32(size);
+
+ idxfd->seek(idxoff, SEEK_SET);
+ if (len>0) {
+ datfd->seek(start, SEEK_SET);
+ datfd->write(outbuf, (long)size);
+
+ // add a new line to make data file easier to read in an editor
+ datfd->write(&nl, 2);
+
+ idxfd->write(&outstart, 4);
+ idxfd->write(&outsize, 4);
+ if (idxBytes) {
+ idxfd->write(idxBytes, shiftSize);
+ delete [] idxBytes;
+ }
+ }
+ else { // delete entry
+ if (idxBytes) {
+ idxfd->write(idxBytes+8, shiftSize-8);
+ idxfd->seek(-1, SEEK_CUR); // last valid byte
+ FileMgr::getSystemFileMgr()->trunc(idxfd); // truncate index
+ delete [] idxBytes;
+ }
+ }
+
+ delete [] key;
+ delete [] outbuf;
+ free(dbKey);
+}
+
+
+/******************************************************************************
+ * RawLD::linkentry - links one entry to another
+ *
+ * ENT: testmt - testament to find (0 - Bible/module introduction)
+ * destidxoff - dest offset into .vss
+ * srcidxoff - source offset into .vss
+ */
+
+void RawStr4::doLinkEntry(const char *destkey, const char *srckey) {
+ char *text = new char [ strlen(destkey) + 7 ];
+ sprintf(text, "@LINK %s", destkey);
+ doSetText(srckey, text);
+ delete [] text;
+}
+
+
+/******************************************************************************
+ * RawLD::CreateModule - Creates new module files
+ *
+ * ENT: path - directory to store module files
+ * RET: error status
+ */
+
+signed char RawStr4::createModule(const char *ipath)
+{
+ char *path = 0;
+ char *buf = new char [ strlen (ipath) + 20 ];
+ FileDesc *fd, *fd2;
+
+ stdstr(&path, ipath);
+
+ if ((path[strlen(path)-1] == '/') || (path[strlen(path)-1] == '\\'))
+ path[strlen(path)-1] = 0;
+
+ sprintf(buf, "%s.dat", path);
+ FileMgr::removeFile(buf);
+ fd = FileMgr::getSystemFileMgr()->open(buf, FileMgr::CREAT|FileMgr::WRONLY, FileMgr::IREAD|FileMgr::IWRITE);
+ fd->getFd();
+ FileMgr::getSystemFileMgr()->close(fd);
+
+ sprintf(buf, "%s.idx", path);
+ FileMgr::removeFile(buf);
+ fd2 = FileMgr::getSystemFileMgr()->open(buf, FileMgr::CREAT|FileMgr::WRONLY, FileMgr::IREAD|FileMgr::IWRITE);
+ fd2->getFd();
+ FileMgr::getSystemFileMgr()->close(fd2);
+
+ delete [] path;
+
+ return 0;
+}
+
+SWORD_NAMESPACE_END
diff --git a/src/modules/common/rawverse.cpp b/src/modules/common/rawverse.cpp
new file mode 100644
index 0000000..5527d38
--- /dev/null
+++ b/src/modules/common/rawverse.cpp
@@ -0,0 +1,311 @@
+/******************************************************************************
+ *
+ * rawverse.cpp - code for class 'RawVerse'- a module that reads raw text
+ * files: ot and nt using indexs ??.bks ??.cps ??.vss
+ * and provides lookup and parsing functions based on
+ * class VerseKey
+ *
+ *
+ * Copyright 1997-2013 CrossWire Bible Society (http://www.crosswire.org)
+ * CrossWire Bible Society
+ * P. O. Box 2528
+ * Tempe, AZ 85280-2528
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation version 2.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ */
+
+
+
+#include <ctype.h>
+#include <stdio.h>
+#include <fcntl.h>
+#include <errno.h>
+
+#include <utilstr.h>
+#include <rawverse.h>
+#include <versekey.h>
+#include <sysdata.h>
+#include <filemgr.h>
+#include <swbuf.h>
+
+
+SWORD_NAMESPACE_START
+
+/******************************************************************************
+ * RawVerse Statics
+ */
+
+int RawVerse::instance = 0;
+const char *RawVerse::nl = "\r\n";
+
+
+/******************************************************************************
+ * RawVerse Constructor - Initializes data for instance of RawVerse
+ *
+ * ENT: ipath - path of the directory where data and index files are located.
+ * be sure to include the trailing separator (e.g. '/' or '\')
+ * (e.g. 'modules/texts/rawtext/webster/')
+ */
+
+RawVerse::RawVerse(const char *ipath, int fileMode)
+{
+ SWBuf buf;
+
+ path = 0;
+ stdstr(&path, ipath);
+
+ if ((path[strlen(path)-1] == '/') || (path[strlen(path)-1] == '\\'))
+ path[strlen(path)-1] = 0;
+
+ if (fileMode == -1) { // try read/write if possible
+ fileMode = FileMgr::RDWR;
+ }
+
+ buf.setFormatted("%s/ot.vss", path);
+ idxfp[0] = FileMgr::getSystemFileMgr()->open(buf, fileMode, true);
+
+ buf.setFormatted("%s/nt.vss", path);
+ idxfp[1] = FileMgr::getSystemFileMgr()->open(buf, fileMode, true);
+
+ buf.setFormatted("%s/ot", path);
+ textfp[0] = FileMgr::getSystemFileMgr()->open(buf, fileMode, true);
+
+ buf.setFormatted("%s/nt", path);
+ textfp[1] = FileMgr::getSystemFileMgr()->open(buf, fileMode, true);
+
+ instance++;
+}
+
+
+/******************************************************************************
+ * RawVerse Destructor - Cleans up instance of RawVerse
+ */
+
+RawVerse::~RawVerse()
+{
+ int loop1;
+
+ if (path)
+ delete [] path;
+
+ --instance;
+
+ for (loop1 = 0; loop1 < 2; loop1++) {
+ FileMgr::getSystemFileMgr()->close(idxfp[loop1]);
+ FileMgr::getSystemFileMgr()->close(textfp[loop1]);
+ }
+}
+
+
+/******************************************************************************
+ * RawVerse::findoffset - Finds the offset of the key verse from the indexes
+ *
+ * ENT: testmt - testament to find (0 - Bible/module introduction)
+ * idxoff - offset into .vss
+ * start - address to store the starting offset
+ * size - address to store the size of the entry
+ */
+
+void RawVerse::findOffset(char testmt, long idxoff, long *start, unsigned short *size) const {
+ idxoff *= 6;
+ if (!testmt)
+ testmt = ((idxfp[1]) ? 1:2);
+
+ if (idxfp[testmt-1]->getFd() >= 0) {
+ idxfp[testmt-1]->seek(idxoff, SEEK_SET);
+ __s32 tmpStart;
+ __u16 tmpSize;
+ idxfp[testmt-1]->read(&tmpStart, 4);
+ long len = idxfp[testmt-1]->read(&tmpSize, 2); // read size
+
+ *start = swordtoarch32(tmpStart);
+ *size = swordtoarch16(tmpSize);
+
+ if (len < 2) {
+ *size = (unsigned short)((*start) ? (textfp[testmt-1]->seek(0, SEEK_END) - (long)*start) : 0); // if for some reason we get an error reading size, make size to end of file
+ }
+ }
+ else {
+ *start = 0;
+ *size = 0;
+ }
+}
+
+
+/******************************************************************************
+ * RawVerse::readtext - gets text at a given offset
+ *
+ * ENT: testmt - testament file to search in (0 - Old; 1 - New)
+ * start - starting offset where the text is located in the file
+ * size - size of text entry + 2 (null)(null)
+ * buf - buffer to store text
+ *
+ */
+
+void RawVerse::readText(char testmt, long start, unsigned short size, SWBuf &buf) const {
+ buf = "";
+ buf.setFillByte(0);
+ buf.setSize(size + 1);
+ if (!testmt)
+ testmt = ((idxfp[1]) ? 1:2);
+ if (size) {
+ if (textfp[testmt-1]->getFd() >= 0) {
+ textfp[testmt-1]->seek(start, SEEK_SET);
+ textfp[testmt-1]->read(buf.getRawData(), (int)size);
+ }
+ }
+}
+
+
+/******************************************************************************
+ * RawVerse::settext - Sets text for current offset
+ *
+ * ENT: testmt - testament to find (0 - Bible/module introduction)
+ * idxoff - offset into .vss
+ * buf - buffer to store
+ * len - length of buffer (0 - null terminated)
+ */
+
+void RawVerse::doSetText(char testmt, long idxoff, const char *buf, long len)
+{
+ __s32 start;
+ __u16 size;
+
+ idxoff *= 6;
+ if (!testmt)
+ testmt = ((idxfp[1]) ? 1:2);
+
+ size = (len < 0) ? strlen(buf) : len;
+
+ start = textfp[testmt-1]->seek(0, SEEK_END);
+ idxfp[testmt-1]->seek(idxoff, SEEK_SET);
+
+ if (size) {
+ textfp[testmt-1]->seek(start, SEEK_SET);
+ textfp[testmt-1]->write(buf, (int)size);
+
+ // add a new line to make data file easier to read in an editor
+ textfp[testmt-1]->write(nl, 2);
+ }
+ else {
+ start = 0;
+ }
+
+ start = archtosword32(start);
+ size = archtosword16(size);
+
+ idxfp[testmt-1]->write(&start, 4);
+ idxfp[testmt-1]->write(&size, 2);
+}
+
+
+/******************************************************************************
+ * RawVerse::linkentry - links one entry to another
+ *
+ * ENT: testmt - testament to find (0 - Bible/module introduction)
+ * destidxoff - dest offset into .vss
+ * srcidxoff - source offset into .vss
+ */
+
+void RawVerse::doLinkEntry(char testmt, long destidxoff, long srcidxoff) {
+ __s32 start;
+ __u16 size;
+
+ destidxoff *= 6;
+ srcidxoff *= 6;
+
+ if (!testmt)
+ testmt = ((idxfp[1]) ? 1:2);
+
+ // get source
+ idxfp[testmt-1]->seek(srcidxoff, SEEK_SET);
+ idxfp[testmt-1]->read(&start, 4);
+ idxfp[testmt-1]->read(&size, 2);
+
+ // write dest
+ idxfp[testmt-1]->seek(destidxoff, SEEK_SET);
+ idxfp[testmt-1]->write(&start, 4);
+ idxfp[testmt-1]->write(&size, 2);
+}
+
+
+/******************************************************************************
+ * RawVerse::createModule - Creates new module files
+ *
+ * ENT: path - directory to store module files
+ * RET: error status
+ */
+
+char RawVerse::createModule(const char *ipath, const char *v11n)
+{
+ char *path = 0;
+ char *buf = new char [ strlen (ipath) + 20 ];
+ FileDesc *fd, *fd2;
+
+ stdstr(&path, ipath);
+
+ if ((path[strlen(path)-1] == '/') || (path[strlen(path)-1] == '\\'))
+ path[strlen(path)-1] = 0;
+
+ sprintf(buf, "%s/ot", path);
+ FileMgr::removeFile(buf);
+ fd = FileMgr::getSystemFileMgr()->open(buf, FileMgr::CREAT|FileMgr::WRONLY, FileMgr::IREAD|FileMgr::IWRITE);
+ fd->getFd();
+ FileMgr::getSystemFileMgr()->close(fd);
+
+ sprintf(buf, "%s/nt", path);
+ FileMgr::removeFile(buf);
+ fd = FileMgr::getSystemFileMgr()->open(buf, FileMgr::CREAT|FileMgr::WRONLY, FileMgr::IREAD|FileMgr::IWRITE);
+ fd->getFd();
+ FileMgr::getSystemFileMgr()->close(fd);
+
+ sprintf(buf, "%s/ot.vss", path);
+ FileMgr::removeFile(buf);
+ fd = FileMgr::getSystemFileMgr()->open(buf, FileMgr::CREAT|FileMgr::WRONLY, FileMgr::IREAD|FileMgr::IWRITE);
+ fd->getFd();
+
+ sprintf(buf, "%s/nt.vss", path);
+ FileMgr::removeFile(buf);
+ fd2 = FileMgr::getSystemFileMgr()->open(buf, FileMgr::CREAT|FileMgr::WRONLY, FileMgr::IREAD|FileMgr::IWRITE);
+ fd2->getFd();
+
+ VerseKey vk;
+ vk.setVersificationSystem(v11n);
+ vk.setIntros(1);
+
+ __s32 offset = 0;
+ __u16 size = 0;
+ offset = archtosword32(offset);
+ size = archtosword16(size);
+
+ for (vk = TOP; !vk.popError(); vk++) {
+ if (vk.getTestament() < 2) {
+ fd->write(&offset, 4);
+ fd->write(&size, 2);
+ }
+ else {
+ fd2->write(&offset, 4);
+ fd2->write(&size, 2);
+ }
+ }
+ fd2->write(&offset, 4);
+ fd2->write(&size, 2);
+
+ FileMgr::getSystemFileMgr()->close(fd);
+ FileMgr::getSystemFileMgr()->close(fd2);
+
+ delete [] path;
+ delete [] buf;
+
+ return 0;
+}
+
+SWORD_NAMESPACE_END
diff --git a/src/modules/common/rawverse4.cpp b/src/modules/common/rawverse4.cpp
new file mode 100644
index 0000000..b87ea0d
--- /dev/null
+++ b/src/modules/common/rawverse4.cpp
@@ -0,0 +1,312 @@
+/******************************************************************************
+ *
+ * rawverse4.cpp - code for class 'RawVerse4'- a module that reads raw
+ * text files:
+ * ot and nt using indexs ??.bks ??.cps ??.vss
+ * and provides lookup and parsing functions based on
+ * class VerseKey
+ *
+ * $Id: rawverse4.cpp 2833 2013-06-29 06:40:28Z chrislit $
+ *
+ * Copyright 2007-2013 CrossWire Bible Society (http://www.crosswire.org)
+ * CrossWire Bible Society
+ * P. O. Box 2528
+ * Tempe, AZ 85280-2528
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation version 2.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ */
+
+
+
+#include <ctype.h>
+#include <stdio.h>
+#include <fcntl.h>
+#include <errno.h>
+
+#include <utilstr.h>
+#include <rawverse4.h>
+#include <versekey.h>
+#include <sysdata.h>
+#include <filemgr.h>
+#include <swbuf.h>
+
+
+SWORD_NAMESPACE_START
+
+/******************************************************************************
+ * RawVerse4 Statics
+ */
+
+int RawVerse4::instance = 0;
+const char *RawVerse4::nl = "\r\n";
+
+
+/******************************************************************************
+ * RawVerse4 Constructor - Initializes data for instance of RawVerse4
+ *
+ * ENT: ipath - path of the directory where data and index files are located.
+ * be sure to include the trailing separator (e.g. '/' or '\')
+ * (e.g. 'modules/texts/rawtext/webster/')
+ */
+
+RawVerse4::RawVerse4(const char *ipath, int fileMode)
+{
+ SWBuf buf;
+
+ path = 0;
+ stdstr(&path, ipath);
+
+ if ((path[strlen(path)-1] == '/') || (path[strlen(path)-1] == '\\'))
+ path[strlen(path)-1] = 0;
+
+ if (fileMode == -1) { // try read/write if possible
+ fileMode = FileMgr::RDWR;
+ }
+
+ buf.setFormatted("%s/ot.vss", path);
+ idxfp[0] = FileMgr::getSystemFileMgr()->open(buf, fileMode, true);
+
+ buf.setFormatted("%s/nt.vss", path);
+ idxfp[1] = FileMgr::getSystemFileMgr()->open(buf, fileMode, true);
+
+ buf.setFormatted("%s/ot", path);
+ textfp[0] = FileMgr::getSystemFileMgr()->open(buf, fileMode, true);
+
+ buf.setFormatted("%s/nt", path);
+ textfp[1] = FileMgr::getSystemFileMgr()->open(buf, fileMode, true);
+
+ instance++;
+}
+
+
+/******************************************************************************
+ * RawVerse4 Destructor - Cleans up instance of RawVerse4
+ */
+
+RawVerse4::~RawVerse4()
+{
+ int loop1;
+
+ if (path)
+ delete [] path;
+
+ --instance;
+
+ for (loop1 = 0; loop1 < 2; loop1++) {
+ FileMgr::getSystemFileMgr()->close(idxfp[loop1]);
+ FileMgr::getSystemFileMgr()->close(textfp[loop1]);
+ }
+}
+
+
+/******************************************************************************
+ * RawVerse4::findoffset - Finds the offset of the key verse from the indexes
+ *
+ * ENT: testmt - testament to find (0 - Bible/module introduction)
+ * idxoff - offset into .vss
+ * start - address to store the starting offset
+ * size - address to store the size of the entry
+ */
+
+void RawVerse4::findOffset(char testmt, long idxoff, long *start, unsigned long *size) const {
+ idxoff *= 8;
+ if (!testmt)
+ testmt = ((idxfp[1]) ? 1:2);
+
+ if (idxfp[testmt-1]->getFd() >= 0) {
+ idxfp[testmt-1]->seek(idxoff, SEEK_SET);
+ __u32 tmpStart;
+ __u32 tmpSize;
+ idxfp[testmt-1]->read(&tmpStart, 4);
+ long len = idxfp[testmt-1]->read(&tmpSize, 4); // read size
+
+ *start = swordtoarch32(tmpStart);
+ *size = swordtoarch32(tmpSize);
+
+ if (len < 2) {
+ *size = (unsigned long)((*start) ? (textfp[testmt-1]->seek(0, SEEK_END) - (long)*start) : 0); // if for some reason we get an error reading size, make size to end of file
+ }
+ }
+ else {
+ *start = 0;
+ *size = 0;
+ }
+}
+
+
+/******************************************************************************
+ * RawVerse4::readtext - gets text at a given offset
+ *
+ * ENT: testmt - testament file to search in (0 - Old; 1 - New)
+ * start - starting offset where the text is located in the file
+ * size - size of text entry + 2 (null)(null)
+ * buf - buffer to store text
+ *
+ */
+
+void RawVerse4::readText(char testmt, long start, unsigned long size, SWBuf &buf) const {
+ buf = "";
+ buf.setFillByte(0);
+ buf.setSize(size + 1);
+ if (!testmt)
+ testmt = ((idxfp[1]) ? 1:2);
+ if (size) {
+ if (textfp[testmt-1]->getFd() >= 0) {
+ textfp[testmt-1]->seek(start, SEEK_SET);
+ textfp[testmt-1]->read(buf.getRawData(), (int)size);
+ }
+ }
+}
+
+
+/******************************************************************************
+ * RawVerse4::settext - Sets text for current offset
+ *
+ * ENT: testmt - testament to find (0 - Bible/module introduction)
+ * idxoff - offset into .vss
+ * buf - buffer to store
+ * len - length of buffer (0 - null terminated)
+ */
+
+void RawVerse4::doSetText(char testmt, long idxoff, const char *buf, long len)
+{
+ __u32 start;
+ __u32 size;
+
+ idxoff *= 8;
+ if (!testmt)
+ testmt = ((idxfp[1]) ? 1:2);
+
+ size = (len < 0) ? strlen(buf) : len;
+
+ start = textfp[testmt-1]->seek(0, SEEK_END);
+ idxfp[testmt-1]->seek(idxoff, SEEK_SET);
+
+ if (size) {
+ textfp[testmt-1]->seek(start, SEEK_SET);
+ textfp[testmt-1]->write(buf, (int)size);
+
+ // add a new line to make data file easier to read in an editor
+ textfp[testmt-1]->write(nl, 2);
+ }
+ else {
+ start = 0;
+ }
+
+ start = archtosword32(start);
+ size = archtosword32(size);
+
+ idxfp[testmt-1]->write(&start, 4);
+ idxfp[testmt-1]->write(&size, 4);
+}
+
+
+/******************************************************************************
+ * RawVerse4::linkentry - links one entry to another
+ *
+ * ENT: testmt - testament to find (0 - Bible/module introduction)
+ * destidxoff - dest offset into .vss
+ * srcidxoff - source offset into .vss
+ */
+
+void RawVerse4::doLinkEntry(char testmt, long destidxoff, long srcidxoff) {
+ __u32 start;
+ __u32 size;
+
+ destidxoff *= 8;
+ srcidxoff *= 8;
+
+ if (!testmt)
+ testmt = ((idxfp[1]) ? 1:2);
+
+ // get source
+ idxfp[testmt-1]->seek(srcidxoff, SEEK_SET);
+ idxfp[testmt-1]->read(&start, 4);
+ idxfp[testmt-1]->read(&size, 4);
+
+ // write dest
+ idxfp[testmt-1]->seek(destidxoff, SEEK_SET);
+ idxfp[testmt-1]->write(&start, 4);
+ idxfp[testmt-1]->write(&size, 4);
+}
+
+
+/******************************************************************************
+ * RawVerse4::CreateModule - Creates new module files
+ *
+ * ENT: path - directory to store module files
+ * RET: error status
+ */
+
+char RawVerse4::createModule(const char *ipath, const char *v11n)
+{
+ char *path = 0;
+ char *buf = new char [ strlen (ipath) + 20 ];
+ FileDesc *fd, *fd2;
+
+ stdstr(&path, ipath);
+
+ if ((path[strlen(path)-1] == '/') || (path[strlen(path)-1] == '\\'))
+ path[strlen(path)-1] = 0;
+
+ sprintf(buf, "%s/ot", path);
+ FileMgr::removeFile(buf);
+ fd = FileMgr::getSystemFileMgr()->open(buf, FileMgr::CREAT|FileMgr::WRONLY, FileMgr::IREAD|FileMgr::IWRITE);
+ fd->getFd();
+ FileMgr::getSystemFileMgr()->close(fd);
+
+ sprintf(buf, "%s/nt", path);
+ FileMgr::removeFile(buf);
+ fd = FileMgr::getSystemFileMgr()->open(buf, FileMgr::CREAT|FileMgr::WRONLY, FileMgr::IREAD|FileMgr::IWRITE);
+ fd->getFd();
+ FileMgr::getSystemFileMgr()->close(fd);
+
+ sprintf(buf, "%s/ot.vss", path);
+ FileMgr::removeFile(buf);
+ fd = FileMgr::getSystemFileMgr()->open(buf, FileMgr::CREAT|FileMgr::WRONLY, FileMgr::IREAD|FileMgr::IWRITE);
+ fd->getFd();
+
+ sprintf(buf, "%s/nt.vss", path);
+ FileMgr::removeFile(buf);
+ fd2 = FileMgr::getSystemFileMgr()->open(buf, FileMgr::CREAT|FileMgr::WRONLY, FileMgr::IREAD|FileMgr::IWRITE);
+ fd2->getFd();
+
+ VerseKey vk;
+ vk.setVersificationSystem(v11n);
+ vk.setIntros(1);
+ __u32 offset = 0;
+ __u32 size = 0;
+ offset = archtosword32(offset);
+ size = archtosword32(size);
+
+ for (vk = TOP; !vk.popError(); vk++) {
+ if (vk.getTestament() < 2) {
+ fd->write(&offset, 4);
+ fd->write(&size, 4);
+ }
+ else {
+ fd2->write(&offset, 4);
+ fd2->write(&size, 4);
+ }
+ }
+ fd2->write(&offset, 4);
+ fd2->write(&size, 4);
+
+ FileMgr::getSystemFileMgr()->close(fd);
+ FileMgr::getSystemFileMgr()->close(fd2);
+
+ delete [] path;
+ delete [] buf;
+
+ return 0;
+}
+
+SWORD_NAMESPACE_END
diff --git a/src/modules/common/sapphire.cpp b/src/modules/common/sapphire.cpp
new file mode 100644
index 0000000..8cc3e15
--- /dev/null
+++ b/src/modules/common/sapphire.cpp
@@ -0,0 +1,236 @@
+/******************************************************************************
+ *
+ * sapphire.cpp - the Saphire II stream cipher class
+ *
+ * $Id: sapphire.cpp 2833 2013-06-29 06:40:28Z chrislit $
+ *
+ * Copyright 1999-2013 CrossWire Bible Society (http://www.crosswire.org)
+ * CrossWire Bible Society
+ * P. O. Box 2528
+ * Tempe, AZ 85280-2528
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation version 2.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ */
+
+/******************************************************************************
+ *
+ * Original license notice & credits:
+ * Dedicated to the Public Domain the author and inventor:
+ * (Michael Paul Johnson). This code comes with no warranty.
+ * Use it at your own risk.
+ * Ported from the Pascal implementation of the Sapphire Stream
+ * Cipher 9 December 1994.
+ * Added hash pre- and post-processing 27 December 1994.
+ * Modified initialization to make index variables key dependent,
+ * made the output function more resistant to cryptanalysis,
+ * and renamed to Sapphire II 2 January 1995
+ *
+ */
+
+#include <string.h>
+
+#include "sapphire.h"
+
+SWORD_NAMESPACE_START
+
+unsigned char sapphire::keyrand(int limit,
+ unsigned char *user_key,
+ unsigned char keysize,
+ unsigned char *rsum,
+ unsigned *keypos)
+ {
+ unsigned u, // Value from 0 to limit to return.
+ retry_limiter, // No infinite loops allowed.
+ mask; // Select just enough bits.
+
+ if (!limit) return 0; // Avoid divide by zero error.
+ retry_limiter = 0;
+ mask = 1; // Fill mask with enough bits to cover
+ while (mask < (unsigned)limit) // the desired range.
+ mask = (mask << 1) + 1;
+ do
+ {
+ *rsum = cards[*rsum] + user_key[(*keypos)++];
+ if (*keypos >= keysize)
+ {
+ *keypos = 0; // Recycle the user key.
+ *rsum += keysize; // key "aaaa" != key "aaaaaaaa"
+ }
+ u = mask & *rsum;
+ if (++retry_limiter > 11)
+ u %= limit; // Prevent very rare long loops.
+ }
+ while (u > (unsigned)limit);
+ return u;
+ }
+
+void sapphire::initialize(unsigned char *key, unsigned char keysize)
+ {
+ // Key size may be up to 256 bytes.
+ // Pass phrases may be used directly, with longer length
+ // compensating for the low entropy expected in such keys.
+ // Alternatively, shorter keys hashed from a pass phrase or
+ // generated randomly may be used. For random keys, lengths
+ // of from 4 to 16 bytes are recommended, depending on how
+ // secure you want this to be.
+
+ int i;
+ unsigned char toswap, swaptemp, rsum;
+ unsigned keypos;
+
+ // If we have been given no key, assume the default hash setup.
+
+ if (keysize < 1)
+ {
+ hash_init();
+ return;
+ }
+
+ // Start with cards all in order, one of each.
+
+ for (i=0;i<256;i++)
+ cards[i] = i;
+
+ // Swap the card at each position with some other card.
+
+ toswap = 0;
+ keypos = 0; // Start with first byte of user key.
+ rsum = 0;
+ for (i=255;i>=0;i--)
+ {
+ toswap = keyrand(i, key, keysize, &rsum, &keypos);
+ swaptemp = cards[i];
+ cards[i] = cards[toswap];
+ cards[toswap] = swaptemp;
+ }
+
+ // Initialize the indices and data dependencies.
+ // Indices are set to different values instead of all 0
+ // to reduce what is known about the state of the cards
+ // when the first byte is emitted.
+
+ rotor = cards[1];
+ ratchet = cards[3];
+ avalanche = cards[5];
+ last_plain = cards[7];
+ last_cipher = cards[rsum];
+
+ toswap = swaptemp = rsum = 0;
+ keypos = 0;
+ }
+
+void sapphire::hash_init(void)
+ {
+ // This function is used to initialize non-keyed hash
+ // computation.
+
+ int i, j;
+
+ // Initialize the indices and data dependencies.
+
+ rotor = 1;
+ ratchet = 3;
+ avalanche = 5;
+ last_plain = 7;
+ last_cipher = 11;
+
+ // Start with cards all in inverse order.
+
+ for (i=0, j=255;i<256;i++,j--)
+ cards[i] = (unsigned char) j;
+ }
+
+sapphire::sapphire(unsigned char *key, unsigned char keysize)
+ {
+ if (key && keysize)
+ initialize(key, keysize);
+ }
+
+void sapphire::burn(void)
+ {
+ // Destroy the key and state information in RAM.
+ memset(cards, 0, 256);
+ rotor = ratchet = avalanche = last_plain = last_cipher = 0;
+ }
+
+sapphire::~sapphire()
+ {
+ burn();
+ }
+
+unsigned char sapphire::encrypt(unsigned char b)
+ {
+ // Picture a single enigma rotor with 256 positions, rewired
+ // on the fly by card-shuffling.
+
+ // This cipher is a variant of one invented and written
+ // by Michael Paul Johnson in November, 1993.
+
+ unsigned char swaptemp;
+
+ // Shuffle the deck a little more.
+
+ ratchet += cards[rotor++];
+ swaptemp = cards[last_cipher];
+ cards[last_cipher] = cards[ratchet];
+ cards[ratchet] = cards[last_plain];
+ cards[last_plain] = cards[rotor];
+ cards[rotor] = swaptemp;
+ avalanche += cards[swaptemp];
+
+ // Output one byte from the state in such a way as to make it
+ // very hard to figure out which one you are looking at.
+
+ last_cipher = b^cards[(cards[ratchet] + cards[rotor]) & 0xFF] ^
+ cards[cards[(cards[last_plain] +
+ cards[last_cipher] +
+ cards[avalanche])&0xFF]];
+ last_plain = b;
+ return last_cipher;
+ }
+
+unsigned char sapphire::decrypt(unsigned char b)
+ {
+ unsigned char swaptemp;
+
+ // Shuffle the deck a little more.
+
+ ratchet += cards[rotor++];
+ swaptemp = cards[last_cipher];
+ cards[last_cipher] = cards[ratchet];
+ cards[ratchet] = cards[last_plain];
+ cards[last_plain] = cards[rotor];
+ cards[rotor] = swaptemp;
+ avalanche += cards[swaptemp];
+
+ // Output one byte from the state in such a way as to make it
+ // very hard to figure out which one you are looking at.
+
+ last_plain = b^cards[(cards[ratchet] + cards[rotor]) & 0xFF] ^
+ cards[cards[(cards[last_plain] +
+ cards[last_cipher] +
+ cards[avalanche])&0xFF]];
+ last_cipher = b;
+ return last_plain;
+ }
+
+void sapphire::hash_final(unsigned char *hash, // Destination
+ unsigned char hashlength) // Size of hash.
+ {
+ int i;
+
+ for (i=255;i>=0;i--)
+ encrypt((unsigned char) i);
+ for (i=0;i<hashlength;i++)
+ hash[i] = encrypt(0);
+ }
+
+SWORD_NAMESPACE_END
diff --git a/src/modules/common/swcipher.cpp b/src/modules/common/swcipher.cpp
new file mode 100644
index 0000000..16279dc
--- /dev/null
+++ b/src/modules/common/swcipher.cpp
@@ -0,0 +1,147 @@
+/******************************************************************************
+ *
+ * swcipher.cpp - code for class 'SWCipher'- a driver class that
+ * provides cipher utilities
+ *
+ * $Id: swcipher.cpp 2833 2013-06-29 06:40:28Z chrislit $
+ *
+ * Copyright 1999-2013 CrossWire Bible Society (http://www.crosswire.org)
+ * CrossWire Bible Society
+ * P. O. Box 2528
+ * Tempe, AZ 85280-2528
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation version 2.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ */
+
+
+#include <stdlib.h>
+#include <string.h>
+#include <swcipher.h>
+
+SWORD_NAMESPACE_START
+
+/******************************************************************************
+ * SWCipher Constructor - Initializes data for instance of SWCipher
+ *
+ */
+
+SWCipher::SWCipher(unsigned char *key) {
+ master.initialize(key, strlen((char *)key));
+ buf = 0;
+}
+
+
+/******************************************************************************
+ * SWCipher Destructor - Cleans up instance of SWCipher
+ */
+
+SWCipher::~SWCipher()
+{
+ if (buf)
+ free(buf);
+}
+
+
+char *SWCipher::Buf(const char *ibuf, unsigned long ilen)
+{
+ if (ibuf) {
+
+ if (buf)
+ free(buf);
+
+ if (!ilen) {
+ len = strlen(buf);
+ ilen = len + 1;
+ }
+ else len = ilen;
+
+ buf = (char *) malloc(ilen);
+ memcpy(buf, ibuf, ilen);
+ cipher = false;
+ }
+
+ Decode();
+
+ return buf;
+}
+
+
+char *SWCipher::cipherBuf(unsigned long *ilen, const char *ibuf)
+{
+ if (ibuf) {
+
+ if (buf)
+ free(buf);
+
+ buf = (char *) malloc(*ilen+1);
+ memcpy(buf, ibuf, *ilen);
+ len = *ilen;
+ cipher = true;
+ }
+
+ Encode();
+
+ *ilen = len;
+ return buf;
+}
+
+
+/******************************************************************************
+ * SWCipher::Encode - This function "encodes" the input stream into the
+ * output stream.
+ * The GetChars() and SendChars() functions are
+ * used to separate this method from the actual
+ * i/o.
+ */
+
+void SWCipher::Encode(void)
+{
+ if (!cipher) {
+ work = master;
+ for (unsigned long i = 0; i < len; i++)
+ buf[i] = work.encrypt(buf[i]);
+ cipher = true;
+ }
+}
+
+
+/******************************************************************************
+ * SWCipher::Decode - This function "decodes" the input stream into the
+ * output stream.
+ * The GetChars() and SendChars() functions are
+ * used to separate this method from the actual
+ * i/o.
+ */
+
+void SWCipher::Decode(void)
+{
+ if (cipher) {
+ work = master;
+ unsigned long i;
+ for (i = 0; i < len; i++)
+ buf[i] = work.decrypt(buf[i]);
+ buf[i] = 0;
+ cipher = false;
+ }
+}
+
+
+/******************************************************************************
+ * SWCipher::setCipherKey - setter for a new CipherKey
+ *
+ */
+
+void SWCipher::setCipherKey(const char *ikey) {
+ unsigned char *key = (unsigned char *)ikey;
+ master.initialize(key, strlen((char *)key));
+}
+
+SWORD_NAMESPACE_END
diff --git a/src/modules/common/swcomprs.cpp b/src/modules/common/swcomprs.cpp
new file mode 100644
index 0000000..9df8e7d
--- /dev/null
+++ b/src/modules/common/swcomprs.cpp
@@ -0,0 +1,211 @@
+/******************************************************************************
+ *
+ * swcomprs.cpp - a driver class that provides compression utilities
+ *
+ * $Id: swcomprs.cpp 2833 2013-06-29 06:40:28Z chrislit $
+ *
+ * Copyright 1996-2013 CrossWire Bible Society (http://www.crosswire.org)
+ * CrossWire Bible Society
+ * P. O. Box 2528
+ * Tempe, AZ 85280-2528
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation version 2.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ */
+
+
+#include <stdlib.h>
+#include <string.h>
+#include <swcomprs.h>
+
+SWORD_NAMESPACE_START
+
+/******************************************************************************
+ * SWCompress Constructor - Initializes data for instance of SWCompress
+ *
+ */
+
+SWCompress::SWCompress()
+{
+ buf = zbuf = 0;
+ Init();
+}
+
+
+/******************************************************************************
+ * SWCompress Destructor - Cleans up instance of SWCompress
+ */
+
+SWCompress::~SWCompress()
+{
+ if (zbuf)
+ free(zbuf);
+
+ if (buf)
+ free(buf);
+}
+
+
+void SWCompress::Init()
+{
+ if (buf)
+ free(buf);
+
+ if (zbuf)
+ free(zbuf);
+
+ buf = 0;
+ zbuf = 0;
+ direct = 0;
+ zlen = 0;
+ slen = 0;
+ zpos = 0;
+ pos = 0;
+}
+
+
+char *SWCompress::Buf(const char *ibuf, unsigned long *len) {
+ // setting an uncompressed buffer
+ if (ibuf) {
+ Init();
+ slen = (len) ? *len : strlen(ibuf);
+ buf = (char *) calloc(slen + 1, 1);
+ memcpy(buf, ibuf, slen);
+ }
+
+ // getting an uncompressed buffer
+ if (!buf) {
+ buf = (char *)calloc(1,1); // be sure we at least allocate an empty buf for return;
+ direct = 1;
+ Decode();
+// slen = strlen(buf);
+ if (len)
+ *len = slen;
+ }
+ return buf;
+}
+
+
+char *SWCompress::zBuf(unsigned long *len, char *ibuf)
+{
+ // setting a compressed buffer
+ if (ibuf) {
+ Init();
+ zbuf = (char *) malloc(*len);
+ memcpy(zbuf, ibuf, *len);
+ zlen = *len;
+ }
+
+ // getting a compressed buffer
+ if (!zbuf) {
+ direct = 0;
+ Encode();
+ }
+
+ *len = zlen;
+ return zbuf;
+}
+
+
+unsigned long SWCompress::GetChars(char *ibuf, unsigned long len)
+{
+ if (direct) {
+ len = (((zlen - zpos) > (unsigned)len) ? len : zlen - zpos);
+ if (len > 0) {
+ memmove(ibuf, &zbuf[zpos], len);
+ zpos += len;
+ }
+ }
+ else {
+// slen = strlen(buf);
+ len = (((slen - pos) > (unsigned)len) ? len : slen - pos);
+ if (len > 0) {
+ memmove(ibuf, &buf[pos], len);
+ pos += len;
+ }
+ }
+ return len;
+}
+
+
+unsigned long SWCompress::SendChars(char *ibuf, unsigned long len)
+{
+ if (direct) {
+ if (buf) {
+// slen = strlen(buf);
+ if ((pos + len) > (unsigned)slen) {
+ buf = (char *) realloc(buf, pos + len + 1024);
+ memset(&buf[pos], 0, len + 1024);
+ }
+ }
+ else buf = (char *)calloc(1, len + 1024);
+ memmove(&buf[pos], ibuf, len);
+ pos += len;
+ }
+ else {
+ if (zbuf) {
+ if ((zpos + len) > zlen) {
+ zbuf = (char *) realloc(zbuf, zpos + len + 1024);
+ zlen = zpos + len + 1024;
+ }
+ }
+ else {
+ zbuf = (char *)calloc(1, len + 1024);
+ zlen = len + 1024;
+ }
+ memmove(&zbuf[zpos], ibuf, len);
+ zpos += len;
+ }
+ return len;
+}
+
+
+/******************************************************************************
+ * SWCompress::Encode - This function "encodes" the input stream into the
+ * output stream.
+ * The GetChars() and SendChars() functions are
+ * used to separate this method from the actual
+ * i/o.
+ */
+
+void SWCompress::Encode(void)
+{
+ cycleStream();
+}
+
+
+/******************************************************************************
+ * SWCompress::Decode - This function "decodes" the input stream into the
+ * output stream.
+ * The GetChars() and SendChars() functions are
+ * used to separate this method from the actual
+ * i/o.
+ */
+
+void SWCompress::Decode(void)
+{
+ cycleStream();
+}
+
+
+void SWCompress::cycleStream() {
+ char buf[1024];
+ unsigned long len, totlen = 0;
+
+ do {
+ len = GetChars(buf, 1024);
+ if (len)
+ totlen += SendChars(buf, len);
+ } while (len == 1024);
+
+ zlen = slen = totlen;
+}
+
+SWORD_NAMESPACE_END
diff --git a/src/modules/common/xzcomprs.cpp b/src/modules/common/xzcomprs.cpp
new file mode 100644
index 0000000..db8a4a8
--- /dev/null
+++ b/src/modules/common/xzcomprs.cpp
@@ -0,0 +1,181 @@
+/******************************************************************************
+ *
+ * xzcomprs.cpp - XzCompress, a driver class that provides xz (LZMA2)
+ * compression
+ *
+ * $Id: xzcomprs.cpp 2850 2013-07-02 09:57:20Z chrislit $
+ *
+ * Copyright 2013 CrossWire Bible Society (http://www.crosswire.org)
+ * CrossWire Bible Society
+ * P. O. Box 2528
+ * Tempe, AZ 85280-2528
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation version 2.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ */
+
+
+#include <stdlib.h>
+#include <string.h>
+#include <stdio.h>
+#include <xzcomprs.h>
+#include <zlib.h>
+
+SWORD_NAMESPACE_START
+
+/******************************************************************************
+ * XzCompress Constructor - Initializes data for instance of XzCompress
+ *
+ */
+
+XzCompress::XzCompress() : SWCompress() {
+}
+
+
+/******************************************************************************
+ * XzCompress Destructor - Cleans up instance of XzCompress
+ */
+
+XzCompress::~XzCompress() {
+}
+
+
+/******************************************************************************
+ * XzCompress::Encode - This function "encodes" the input stream into the
+ * output stream.
+ * The GetChars() and SendChars() functions are
+ * used to separate this method from the actual
+ * i/o.
+ * NOTE: must set zlen for parent class to know length of
+ * compressed buffer.
+ */
+
+void XzCompress::Encode(void)
+{
+/*
+ZEXTERN int ZEXPORT compress OF((Bytef *dest, uLongf *destLen,
+ const Bytef *source, uLong sourceLen));
+ Compresses the source buffer into the destination buffer. sourceLen is
+ the byte length of the source buffer. Upon entry, destLen is the total
+ size of the destination buffer, which must be at least 0.1% larger than
+ sourceLen plus 12 bytes. Upon exit, destLen is the actual size of the
+ compressed buffer.
+ This function can be used to compress a whole file at once if the
+ input file is mmap'ed.
+ compress returns Z_OK if success, Z_MEM_ERROR if there was not
+ enough memory, Z_BUF_ERROR if there was not enough room in the output
+ buffer.
+*/
+ direct = 0; // set direction needed by parent [Get|Send]Chars()
+
+ // get buffer
+ char chunk[1024];
+ char *buf = (char *)calloc(1, 1024);
+ char *chunkbuf = buf;
+ unsigned long chunklen;
+ unsigned long len = 0;
+ while((chunklen = GetChars(chunk, 1023))) {
+ memcpy(chunkbuf, chunk, chunklen);
+ len += chunklen;
+ if (chunklen < 1023)
+ break;
+ else buf = (char *)realloc(buf, len + 1024);
+ chunkbuf = buf+len;
+ }
+
+
+ zlen = (long) (len*1.001)+15;
+ char *zbuf = new char[zlen+1];
+ if (len)
+ {
+ //printf("Doing compress\n");
+ if (compress((Bytef*)zbuf, &zlen, (const Bytef*)buf, len) != Z_OK)
+ {
+ printf("ERROR in compression\n");
+ }
+ else {
+ SendChars(zbuf, zlen);
+ }
+ }
+ else
+ {
+ fprintf(stderr, "ERROR: no buffer to compress\n");
+ }
+ delete [] zbuf;
+ free (buf);
+}
+
+
+/******************************************************************************
+ * XzCompress::Decode - This function "decodes" the input stream into the
+ * output stream.
+ * The GetChars() and SendChars() functions are
+ * used to separate this method from the actual
+ * i/o.
+ */
+
+void XzCompress::Decode(void)
+{
+/*
+ZEXTERN int ZEXPORT uncompress OF((Bytef *dest, uLongf *destLen,
+ const Bytef *source, uLong sourceLen));
+ Decompresses the source buffer into the destination buffer. sourceLen is
+ the byte length of the source buffer. Upon entry, destLen is the total
+ size of the destination buffer, which must be large enough to hold the
+ entire uncompressed data. (The size of the uncompressed data must have
+ been saved previously by the compressor and transmitted to the decompressor
+ by some mechanism outside the scope of this compression library.)
+ Upon exit, destLen is the actual size of the compressed buffer.
+ This function can be used to decompress a whole file at once if the
+ input file is mmap'ed.
+
+ uncompress returns Z_OK if success, Z_MEM_ERROR if there was not
+ enough memory, Z_BUF_ERROR if there was not enough room in the output
+ buffer, or Z_DATA_ERROR if the input data was corrupted.
+*/
+
+ // get buffer
+ char chunk[1024];
+ char *zbuf = (char *)calloc(1, 1024);
+ char *chunkbuf = zbuf;
+ int chunklen;
+ unsigned long zlen = 0;
+ while((chunklen = GetChars(chunk, 1023))) {
+ memcpy(chunkbuf, chunk, chunklen);
+ zlen += chunklen;
+ if (chunklen < 1023)
+ break;
+ else zbuf = (char *)realloc(zbuf, zlen + 1024);
+ chunkbuf = zbuf + zlen;
+ }
+
+ //printf("Decoding complength{%ld} uncomp{%ld}\n", zlen, blen);
+ if (zlen) {
+ unsigned long blen = zlen*20; // trust compression is less than 1000%
+ char *buf = new char[blen];
+ //printf("Doing decompress {%s}\n", zbuf);
+ slen = 0;
+ switch (uncompress((Bytef*)buf, &blen, (Bytef*)zbuf, zlen)){
+ case Z_OK: SendChars(buf, blen); slen = blen; break;
+ case Z_MEM_ERROR: fprintf(stderr, "ERROR: not enough memory during decompression.\n"); break;
+ case Z_BUF_ERROR: fprintf(stderr, "ERROR: not enough room in the out buffer during decompression.\n"); break;
+ case Z_DATA_ERROR: fprintf(stderr, "ERROR: corrupt data during decompression.\n"); break;
+ default: fprintf(stderr, "ERROR: an unknown error occured during decompression.\n"); break;
+ }
+ delete [] buf;
+ }
+ else {
+ fprintf(stderr, "ERROR: no buffer to decompress!\n");
+ }
+ //printf("Finished decoding\n");
+ free (zbuf);
+}
+
+SWORD_NAMESPACE_END
diff --git a/src/modules/common/zipcomprs.cpp b/src/modules/common/zipcomprs.cpp
new file mode 100644
index 0000000..3e44abd
--- /dev/null
+++ b/src/modules/common/zipcomprs.cpp
@@ -0,0 +1,183 @@
+/******************************************************************************
+ *
+ * zipcomprs.cpp - ZipCompress, a driver class that provides zlib
+ * compression
+ *
+ * $Id: zipcomprs.cpp 2833 2013-06-29 06:40:28Z chrislit $
+ *
+ * Copyright 2000-2013 CrossWire Bible Society (http://www.crosswire.org)
+ * CrossWire Bible Society
+ * P. O. Box 2528
+ * Tempe, AZ 85280-2528
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation version 2.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ */
+
+
+#include <stdlib.h>
+#include <string.h>
+#include <stdio.h>
+#include <zipcomprs.h>
+#include <zlib.h>
+
+SWORD_NAMESPACE_START
+
+/******************************************************************************
+ * ZipCompress Constructor - Initializes data for instance of ZipCompress
+ *
+ */
+
+ZipCompress::ZipCompress() : SWCompress()
+{
+// fprintf(stderr, "init compress\n");
+}
+
+
+/******************************************************************************
+ * ZipCompress Destructor - Cleans up instance of ZipCompress
+ */
+
+ZipCompress::~ZipCompress() {
+}
+
+
+/******************************************************************************
+ * ZipCompress::Encode - This function "encodes" the input stream into the
+ * output stream.
+ * The GetChars() and SendChars() functions are
+ * used to separate this method from the actual
+ * i/o.
+ * NOTE: must set zlen for parent class to know length of
+ * compressed buffer.
+ */
+
+void ZipCompress::Encode(void)
+{
+/*
+ZEXTERN int ZEXPORT compress OF((Bytef *dest, uLongf *destLen,
+ const Bytef *source, uLong sourceLen));
+ Compresses the source buffer into the destination buffer. sourceLen is
+ the byte length of the source buffer. Upon entry, destLen is the total
+ size of the destination buffer, which must be at least 0.1% larger than
+ sourceLen plus 12 bytes. Upon exit, destLen is the actual size of the
+ compressed buffer.
+ This function can be used to compress a whole file at once if the
+ input file is mmap'ed.
+ compress returns Z_OK if success, Z_MEM_ERROR if there was not
+ enough memory, Z_BUF_ERROR if there was not enough room in the output
+ buffer.
+*/
+ direct = 0; // set direction needed by parent [Get|Send]Chars()
+
+ // get buffer
+ char chunk[1024];
+ char *buf = (char *)calloc(1, 1024);
+ char *chunkbuf = buf;
+ unsigned long chunklen;
+ unsigned long len = 0;
+ while((chunklen = GetChars(chunk, 1023))) {
+ memcpy(chunkbuf, chunk, chunklen);
+ len += chunklen;
+ if (chunklen < 1023)
+ break;
+ else buf = (char *)realloc(buf, len + 1024);
+ chunkbuf = buf+len;
+ }
+
+
+ zlen = (long) (len*1.001)+15;
+ char *zbuf = new char[zlen+1];
+ if (len)
+ {
+ //printf("Doing compress\n");
+ if (compress((Bytef*)zbuf, &zlen, (const Bytef*)buf, len) != Z_OK)
+ {
+ printf("ERROR in compression\n");
+ }
+ else {
+ SendChars(zbuf, zlen);
+ }
+ }
+ else
+ {
+ fprintf(stderr, "ERROR: no buffer to compress\n");
+ }
+ delete [] zbuf;
+ free (buf);
+}
+
+
+/******************************************************************************
+ * ZipCompress::Decode - This function "decodes" the input stream into the
+ * output stream.
+ * The GetChars() and SendChars() functions are
+ * used to separate this method from the actual
+ * i/o.
+ */
+
+void ZipCompress::Decode(void)
+{
+/*
+ZEXTERN int ZEXPORT uncompress OF((Bytef *dest, uLongf *destLen,
+ const Bytef *source, uLong sourceLen));
+ Decompresses the source buffer into the destination buffer. sourceLen is
+ the byte length of the source buffer. Upon entry, destLen is the total
+ size of the destination buffer, which must be large enough to hold the
+ entire uncompressed data. (The size of the uncompressed data must have
+ been saved previously by the compressor and transmitted to the decompressor
+ by some mechanism outside the scope of this compression library.)
+ Upon exit, destLen is the actual size of the compressed buffer.
+ This function can be used to decompress a whole file at once if the
+ input file is mmap'ed.
+
+ uncompress returns Z_OK if success, Z_MEM_ERROR if there was not
+ enough memory, Z_BUF_ERROR if there was not enough room in the output
+ buffer, or Z_DATA_ERROR if the input data was corrupted.
+*/
+
+ // get buffer
+ char chunk[1024];
+ char *zbuf = (char *)calloc(1, 1024);
+ char *chunkbuf = zbuf;
+ int chunklen;
+ unsigned long zlen = 0;
+ while((chunklen = GetChars(chunk, 1023))) {
+ memcpy(chunkbuf, chunk, chunklen);
+ zlen += chunklen;
+ if (chunklen < 1023)
+ break;
+ else zbuf = (char *)realloc(zbuf, zlen + 1024);
+ chunkbuf = zbuf + zlen;
+ }
+
+ //printf("Decoding complength{%ld} uncomp{%ld}\n", zlen, blen);
+ if (zlen) {
+ unsigned long blen = zlen*20; // trust compression is less than 1000%
+ char *buf = new char[blen];
+ //printf("Doing decompress {%s}\n", zbuf);
+ slen = 0;
+ switch (uncompress((Bytef*)buf, &blen, (Bytef*)zbuf, zlen)){
+ case Z_OK: SendChars(buf, blen); slen = blen; break;
+ case Z_MEM_ERROR: fprintf(stderr, "ERROR: not enough memory during decompression.\n"); break;
+ case Z_BUF_ERROR: fprintf(stderr, "ERROR: not enough room in the out buffer during decompression.\n"); break;
+ case Z_DATA_ERROR: fprintf(stderr, "ERROR: corrupt data during decompression.\n"); break;
+ default: fprintf(stderr, "ERROR: an unknown error occured during decompression.\n"); break;
+ }
+ delete [] buf;
+ }
+ else {
+ fprintf(stderr, "ERROR: no buffer to decompress!\n");
+ }
+ //printf("Finished decoding\n");
+ free (zbuf);
+}
+
+SWORD_NAMESPACE_END
diff --git a/src/modules/common/zstr.cpp b/src/modules/common/zstr.cpp
new file mode 100644
index 0000000..a745502
--- /dev/null
+++ b/src/modules/common/zstr.cpp
@@ -0,0 +1,700 @@
+/******************************************************************************
+ *
+ * zstr.cpp - code for class 'zStr'- a module that reads compressed text
+ * files and provides lookup and parsing functions based on
+ * class StrKey
+ *
+ * $Id: zstr.cpp 2980 2013-09-14 21:51:47Z scribe $
+ *
+ * Copyright 2001-2013 CrossWire Bible Society (http://www.crosswire.org)
+ * CrossWire Bible Society
+ * P. O. Box 2528
+ * Tempe, AZ 85280-2528
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation version 2.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ */
+
+
+#include <stdio.h>
+#include <fcntl.h>
+#include <errno.h>
+
+#include <stdlib.h>
+#include <utilstr.h>
+#include <zstr.h>
+#include <swcomprs.h>
+
+#include <sysdata.h>
+#include <entriesblk.h>
+#include <swlog.h>
+#include <stringmgr.h>
+#include <filemgr.h>
+#include <swbuf.h>
+
+SWORD_NAMESPACE_START
+
+/******************************************************************************
+ * zStr Statics
+ */
+
+int zStr::instance = 0;
+const int zStr::IDXENTRYSIZE = 8;
+const int zStr::ZDXENTRYSIZE = 8;
+
+
+/******************************************************************************
+ * zStr Constructor - Initializes data for instance of zStr
+ *
+ * ENT: ipath - path of the directory where data and index files are located.
+ */
+
+zStr::zStr(const char *ipath, int fileMode, long blockCount, SWCompress *icomp, bool caseSensitive) : caseSensitive(caseSensitive)
+{
+ SWBuf buf;
+
+ lastoff = -1;
+ path = 0;
+ stdstr(&path, ipath);
+
+ compressor = (icomp) ? icomp : new SWCompress();
+ this->blockCount = blockCount;
+
+ if (fileMode == -1) { // try read/write if possible
+ fileMode = FileMgr::RDWR;
+ }
+
+ buf.setFormatted("%s.idx", path);
+ idxfd = FileMgr::getSystemFileMgr()->open(buf, fileMode, true);
+
+ buf.setFormatted("%s.dat", path);
+ datfd = FileMgr::getSystemFileMgr()->open(buf, fileMode, true);
+
+ buf.setFormatted("%s.zdx", path);
+ zdxfd = FileMgr::getSystemFileMgr()->open(buf, fileMode, true);
+
+ buf.setFormatted("%s.zdt", path);
+ zdtfd = FileMgr::getSystemFileMgr()->open(buf, fileMode, true);
+
+ if (datfd <= 0) {
+ SWLog::getSystemLog()->logError("%d", errno);
+ }
+
+ cacheBlock = 0;
+ cacheBlockIndex = -1;
+ cacheDirty = false;
+
+ instance++;
+}
+
+
+/******************************************************************************
+ * zStr Destructor - Cleans up instance of zStr
+ */
+
+zStr::~zStr() {
+
+ flushCache();
+
+ if (path)
+ delete [] path;
+
+ --instance;
+
+ FileMgr::getSystemFileMgr()->close(idxfd);
+ FileMgr::getSystemFileMgr()->close(datfd);
+ FileMgr::getSystemFileMgr()->close(zdxfd);
+ FileMgr::getSystemFileMgr()->close(zdtfd);
+
+
+ if (compressor)
+ delete compressor;
+
+}
+
+
+/******************************************************************************
+ * zStr::getidxbufdat - Gets the index string at the given dat offset
+ * NOTE: buf is calloc'd, or if not null, realloc'd and must
+ * be free'd by calling function
+ *
+ * ENT: ioffset - offset in dat file to lookup
+ * buf - address of pointer to allocate for storage of string
+ */
+
+void zStr::getKeyFromDatOffset(long ioffset, char **buf) const
+{
+ int size;
+ char ch;
+ if (datfd > 0) {
+ datfd->seek(ioffset, SEEK_SET);
+ for (size = 0; datfd->read(&ch, 1) == 1; size++) {
+ if ((ch == '\\') || (ch == 10) || (ch == 13))
+ break;
+ }
+ *buf = (*buf) ? (char *)realloc(*buf, size*2 + 1) : (char *)malloc(size*2 + 1);
+ if (size) {
+ datfd->seek(ioffset, SEEK_SET);
+ datfd->read(*buf, size);
+ }
+ (*buf)[size] = 0;
+ if (!caseSensitive) toupperstr_utf8(*buf, size*2);
+ }
+ else {
+ *buf = (*buf) ? (char *)realloc(*buf, 1) : (char *)malloc(1);
+ **buf = 0;
+ }
+}
+
+
+/******************************************************************************
+ * zStr::getidxbuf - Gets the index string at the given idx offset
+ * NOTE: buf is calloc'd, or if not null, realloc'd
+ * and must be freed by calling function
+ *
+ * ENT: ioffset - offset in idx file to lookup
+ * buf - address of pointer to allocate for storage of string
+ */
+
+void zStr::getKeyFromIdxOffset(long ioffset, char **buf) const
+{
+ __u32 offset;
+
+ if (idxfd > 0) {
+ idxfd->seek(ioffset, SEEK_SET);
+ idxfd->read(&offset, 4);
+ offset = swordtoarch32(offset);
+ getKeyFromDatOffset(offset, buf);
+ }
+}
+
+
+/******************************************************************************
+ * zStr::findoffset - Finds the offset of the key string from the indexes
+ *
+ * ENT: key - key string to lookup
+ * offset - address to store the starting offset
+ * size - address to store the size of the entry
+ * away - number of entries before of after to jump
+ * (default = 0)
+ *
+ * RET: error status
+ */
+
+signed char zStr::findKeyIndex(const char *ikey, long *idxoff, long away) const
+{
+ char *maxbuf = 0, *trybuf = 0, *key = 0, quitflag = 0;
+ signed char retval = 0;
+ __s32 headoff, tailoff, tryoff = 0, maxoff = 0;
+ __u32 start, size;
+ int diff = 0;
+ bool awayFromSubstrCheck = false;
+
+ if (idxfd->getFd() >= 0) {
+ tailoff = maxoff = idxfd->seek(0, SEEK_END) - IDXENTRYSIZE;
+ if (*ikey) {
+ headoff = 0;
+ stdstr(&key, ikey, 3);
+ if (!caseSensitive) toupperstr_utf8(key, strlen(key)*3);
+
+ int keylen = strlen(key);
+ bool substr = false;
+
+ getKeyFromIdxOffset(maxoff, &maxbuf);
+
+ while (headoff < tailoff) {
+ tryoff = (lastoff == -1) ? headoff + (((((tailoff / IDXENTRYSIZE) - (headoff / IDXENTRYSIZE))) / 2) * IDXENTRYSIZE) : lastoff;
+ lastoff = -1;
+
+ getKeyFromIdxOffset(tryoff, &trybuf);
+
+ if (!*trybuf && tryoff) { // In case of extra entry at end of idx (not first entry)
+ tryoff += (tryoff > (maxoff / 2))?-IDXENTRYSIZE:IDXENTRYSIZE;
+ retval = -1;
+ break;
+ }
+
+ diff = strcmp(key, trybuf);
+
+ if (!diff)
+ break;
+
+ if (!strncmp(trybuf, key, keylen)) substr = true;
+
+ if (diff < 0)
+ tailoff = (tryoff == headoff) ? headoff : tryoff;
+ else headoff = tryoff;
+
+ if (tailoff == headoff + IDXENTRYSIZE) {
+ if (quitflag++)
+ headoff = tailoff;
+ }
+ }
+
+ // didn't find exact match
+ if (headoff >= tailoff) {
+ tryoff = headoff;
+ if (!substr && ((tryoff != maxoff)||(strncmp(key, maxbuf, keylen)<0))) {
+ awayFromSubstrCheck = true;
+ away--; // if our entry doesn't startwith our key, prefer the previous entry over the next
+ }
+ }
+ if (trybuf)
+ free(trybuf);
+ delete [] key;
+ if (maxbuf)
+ free(maxbuf);
+ }
+ else { tryoff = 0; }
+
+ idxfd->seek(tryoff, SEEK_SET);
+
+ start = size = 0;
+ retval = (idxfd->read(&start, 4) == 4) ? retval : -1;
+ retval = (idxfd->read(&size, 4) == 4) ? retval : -1;
+ start = swordtoarch32(start);
+ size = swordtoarch32(size);
+
+ if (idxoff)
+ *idxoff = tryoff;
+
+ while (away) {
+ __u32 laststart = start;
+ __u32 lastsize = size;
+ __s32 lasttry = tryoff;
+ tryoff += (away > 0) ? IDXENTRYSIZE : -IDXENTRYSIZE;
+
+ bool bad = false;
+ if (((long)(tryoff + (away*IDXENTRYSIZE)) < -IDXENTRYSIZE) || (tryoff + (away*IDXENTRYSIZE) > (maxoff+IDXENTRYSIZE)))
+ bad = true;
+ else if (idxfd->seek(tryoff, SEEK_SET) < 0)
+ bad = true;
+ if (bad) {
+ if(!awayFromSubstrCheck)
+ retval = -1;
+ start = laststart;
+ size = lastsize;
+ tryoff = lasttry;
+ if (idxoff)
+ *idxoff = tryoff;
+ break;
+ }
+ idxfd->read(&start, 4);
+ idxfd->read(&size, 4);
+ start = swordtoarch32(start);
+ size = swordtoarch32(size);
+
+ if (idxoff)
+ *idxoff = tryoff;
+
+
+ if (((laststart != start) || (lastsize != size)) && (start >= 0) && (size))
+ away += (away < 0) ? 1 : -1;
+ }
+
+ lastoff = tryoff;
+ }
+ else {
+ if (idxoff)
+ *idxoff = 0;
+ retval = -1;
+ }
+ return retval;
+}
+
+
+/******************************************************************************
+ * zStr::getText - gets text at a given offset
+ *
+ * ENT:
+ * offset - idxoffset where the key is located.
+ * buf - buffer to store text
+ * idxbuf - buffer to store index key
+ * NOTE: buffer will be alloc'd / realloc'd and
+ * should be free'd by the client
+ *
+ */
+
+void zStr::getText(long offset, char **idxbuf, char **buf) const {
+ char *ch;
+ char *idxbuflocal = 0;
+ getKeyFromIdxOffset(offset, &idxbuflocal);
+ __u32 start;
+ __u32 size;
+
+ do {
+ idxfd->seek(offset, SEEK_SET);
+ idxfd->read(&start, 4);
+ idxfd->read(&size, 4);
+ start = swordtoarch32(start);
+ size = swordtoarch32(size);
+
+ *buf = (*buf) ? (char *)realloc(*buf, size*2 + 1) : (char *)malloc(size*2 + 1);
+ *idxbuf = (*idxbuf) ? (char *)realloc(*idxbuf, size*2 + 1) : (char *)malloc(size*2 + 1);
+ memset(*buf, 0, size + 1);
+ memset(*idxbuf, 0, size + 1);
+ datfd->seek(start, SEEK_SET);
+ datfd->read(*buf, (int)(size));
+
+ for (ch = *buf; *ch; ch++) { // skip over index string
+ if (*ch == 10) {
+ ch++;
+ break;
+ }
+ }
+ memmove(*buf, ch, size - (unsigned long)(ch-*buf));
+
+ // resolve link
+ if (!strncmp(*buf, "@LINK", 5)) {
+ for (ch = *buf; *ch; ch++) { // null before nl
+ if (*ch == 10) {
+ *ch = 0;
+ break;
+ }
+ }
+ findKeyIndex(*buf + 6, &offset);
+ }
+ else break;
+ }
+ while (true); // while we're resolving links
+
+ if (idxbuflocal) {
+ __u32 localsize = strlen(idxbuflocal);
+ localsize = (localsize < (size - 1)) ? localsize : (size - 1);
+ strncpy(*idxbuf, idxbuflocal, localsize);
+ (*idxbuf)[localsize] = 0;
+ free(idxbuflocal);
+ }
+ __u32 block = 0;
+ __u32 entry = 0;
+ memmove(&block, *buf, sizeof(__u32));
+ memmove(&entry, *buf + sizeof(__u32), sizeof(__u32));
+ block = swordtoarch32(block);
+ entry = swordtoarch32(entry);
+ getCompressedText(block, entry, buf);
+}
+
+
+/******************************************************************************
+ * zStr::getCompressedText - Get text entry from a compressed index / zdata
+ * file.
+ */
+
+void zStr::getCompressedText(long block, long entry, char **buf) const {
+
+ __u32 size = 0;
+
+ if (cacheBlockIndex != block) {
+ __u32 start = 0;
+
+ zdxfd->seek(block * ZDXENTRYSIZE, SEEK_SET);
+ zdxfd->read(&start, 4);
+ zdxfd->read(&size, 4);
+ start = swordtoarch32(start);
+ size = swordtoarch32(size);
+
+ SWBuf buf;
+ buf.setSize(size + 5);
+ zdtfd->seek(start, SEEK_SET);
+ zdtfd->read(buf.getRawData(), size);
+
+ flushCache();
+
+ unsigned long len = size;
+ buf.setSize(size);
+ rawZFilter(buf, 0); // 0 = decipher
+
+ compressor->zBuf(&len, buf.getRawData());
+ char *rawBuf = compressor->Buf(0, &len);
+ cacheBlock = new EntriesBlock(rawBuf, len);
+ cacheBlockIndex = block;
+ }
+ size = cacheBlock->getEntrySize(entry);
+ *buf = (*buf) ? (char *)realloc(*buf, size*2 + 1) : (char *)malloc(size*2 + 1);
+ strcpy(*buf, cacheBlock->getEntry(entry));
+}
+
+
+/******************************************************************************
+ * zLD::settext - Sets text for current offset
+ *
+ * ENT: key - key for this entry
+ * buf - buffer to store
+ * len - length of buffer (0 - null terminated)
+ */
+
+void zStr::setText(const char *ikey, const char *buf, long len) {
+
+ static const char nl[] = {13, 10};
+
+ __u32 start, outstart;
+ __u32 size, outsize;
+ __s32 endoff;
+ long idxoff = 0;
+ __s32 shiftSize;
+ char *tmpbuf = 0;
+ char *key = 0;
+ char *dbKey = 0;
+ char *idxBytes = 0;
+ char *outbuf = 0;
+ char *ch = 0;
+
+ len = (len < 0) ? strlen(buf) : len;
+ stdstr(&key, ikey, 3);
+ if (!caseSensitive) toupperstr_utf8(key, strlen(key)*3);
+
+ char notFound = findKeyIndex(ikey, &idxoff, 0);
+ if (!notFound) {
+ getKeyFromIdxOffset(idxoff, &dbKey);
+ int diff = strcmp(key, dbKey);
+ if (diff < 0) {
+ }
+ else if (diff > 0) {
+ idxoff += IDXENTRYSIZE;
+ }
+ else if ((!diff) && (len > 0 /*we're not deleting*/)) { // got absolute entry
+ do {
+ idxfd->seek(idxoff, SEEK_SET);
+ idxfd->read(&start, 4);
+ idxfd->read(&size, 4);
+ start = swordtoarch32(start);
+ size = swordtoarch32(size);
+
+ tmpbuf = new char [ size + 2 ];
+ memset(tmpbuf, 0, size + 2);
+ datfd->seek(start, SEEK_SET);
+ datfd->read(tmpbuf, size);
+
+ for (ch = tmpbuf; *ch; ch++) { // skip over index string
+ if (*ch == 10) {
+ ch++;
+ break;
+ }
+ }
+ memmove(tmpbuf, ch, size - (unsigned long)(ch-tmpbuf));
+
+ // resolve link
+ if (!strncmp(tmpbuf, "@LINK", 5) && (len)) {
+ for (ch = tmpbuf; *ch; ch++) { // null before nl
+ if (*ch == 10) {
+ *ch = 0;
+ break;
+ }
+ }
+ findKeyIndex(tmpbuf + IDXENTRYSIZE, &idxoff);
+ delete [] tmpbuf;
+ }
+ else break;
+ }
+ while (true); // while we're resolving links
+ }
+ }
+
+ endoff = idxfd->seek(0, SEEK_END);
+
+ shiftSize = endoff - idxoff;
+
+ if (shiftSize > 0) {
+ idxBytes = new char [ shiftSize ];
+ idxfd->seek(idxoff, SEEK_SET);
+ idxfd->read(idxBytes, shiftSize);
+ }
+
+ outbuf = new char [ len + strlen(key) + 5 ];
+ sprintf(outbuf, "%s%c%c", key, 13, 10);
+ size = strlen(outbuf);
+ if (len > 0) { // NOT a link
+ if (!cacheBlock) {
+ flushCache();
+ cacheBlock = new EntriesBlock();
+ cacheBlockIndex = (zdxfd->seek(0, SEEK_END) / ZDXENTRYSIZE);
+ }
+ else if (cacheBlock->getCount() >= blockCount) {
+ flushCache();
+ cacheBlock = new EntriesBlock();
+ cacheBlockIndex = (zdxfd->seek(0, SEEK_END) / ZDXENTRYSIZE);
+ }
+ __u32 entry = cacheBlock->addEntry(buf);
+ cacheDirty = true;
+ outstart = archtosword32(cacheBlockIndex);
+ outsize = archtosword32(entry);
+ memcpy (outbuf + size, &outstart, sizeof(__u32));
+ memcpy (outbuf + size + sizeof(__u32), &outsize, sizeof(__u32));
+ size += (sizeof(__u32) * 2);
+ }
+ else { // link
+ memcpy(outbuf + size, buf, len);
+ size += len;
+ }
+
+ start = datfd->seek(0, SEEK_END);
+
+ outstart = archtosword32(start);
+ outsize = archtosword32(size);
+
+ idxfd->seek(idxoff, SEEK_SET);
+ if (len > 0) {
+ datfd->seek(start, SEEK_SET);
+ datfd->write(outbuf, size);
+
+ // add a new line to make data file easier to read in an editor
+ datfd->write(&nl, 2);
+
+ idxfd->write(&outstart, 4);
+ idxfd->write(&outsize, 4);
+ if (idxBytes) {
+ idxfd->write(idxBytes, shiftSize);
+ }
+ }
+ else { // delete entry
+ if (idxBytes) {
+ idxfd->write(idxBytes+IDXENTRYSIZE, shiftSize-IDXENTRYSIZE);
+ idxfd->seek(-1, SEEK_CUR); // last valid byte
+ FileMgr::getSystemFileMgr()->trunc(idxfd); // truncate index
+ }
+ }
+
+ if (idxBytes)
+ delete [] idxBytes;
+ delete [] key;
+ delete [] outbuf;
+ free(dbKey);
+}
+
+
+/******************************************************************************
+ * zLD::linkentry - links one entry to another
+ *
+ * ENT: testmt - testament to find (0 - Bible/module introduction)
+ * destidxoff - dest offset into .vss
+ * srcidxoff - source offset into .vss
+ */
+
+void zStr::linkEntry(const char *destkey, const char *srckey) {
+ char *text = new char [ strlen(destkey) + 7 ];
+ sprintf(text, "@LINK %s", destkey);
+ setText(srckey, text);
+ delete [] text;
+}
+
+
+void zStr::flushCache() const {
+
+ static const char nl[] = {13, 10};
+
+ if (cacheBlock) {
+ if (cacheDirty) {
+ __u32 start = 0;
+ unsigned long size = 0;
+ __u32 outstart = 0, outsize = 0;
+
+ const char *rawBuf = cacheBlock->getRawData(&size);
+ compressor->Buf(rawBuf, &size);
+ compressor->zBuf(&size);
+
+ SWBuf buf;
+ buf.setSize(size + 5);
+ memcpy(buf.getRawData(), compressor->zBuf(&size), size); // 1 = encipher
+ buf.setSize(size);
+ rawZFilter(buf, 1); // 1 = encipher
+
+ long zdxSize = zdxfd->seek(0, SEEK_END);
+ unsigned long zdtSize = zdtfd->seek(0, SEEK_END);
+
+ if ((cacheBlockIndex * ZDXENTRYSIZE) > (zdxSize - ZDXENTRYSIZE)) { // New Block
+ start = zdtSize;
+ }
+ else {
+ zdxfd->seek(cacheBlockIndex * ZDXENTRYSIZE, SEEK_SET);
+ zdxfd->read(&start, 4);
+ zdxfd->read(&outsize, 4);
+ start = swordtoarch32(start);
+ outsize = swordtoarch32(outsize);
+ if (start + outsize >= zdtSize) { // last entry, just overwrite
+ // start is already set
+ }
+ else if (size < outsize) { // middle entry, but smaller, that's fine and let's preserve bigger size
+ size = outsize;
+ }
+ else { // middle and bigger-- we have serious problems, for now let's put it at the end = lots of wasted space
+ start = zdtSize;
+ }
+ }
+
+
+
+ outstart = archtosword32(start);
+ outsize = archtosword32((__u32)size);
+
+ zdxfd->seek(cacheBlockIndex * ZDXENTRYSIZE, SEEK_SET);
+ zdtfd->seek(start, SEEK_SET);
+ zdtfd->write(buf, size);
+
+ // add a new line to make data file easier to read in an editor
+ zdtfd->write(&nl, 2);
+
+ zdxfd->write(&outstart, 4);
+ zdxfd->write(&outsize, 4);
+ }
+ delete cacheBlock;
+ cacheBlock = 0;
+ }
+ cacheBlockIndex = -1;
+ cacheDirty = false;
+}
+
+
+/******************************************************************************
+ * zLD::CreateModule - Creates new module files
+ *
+ * ENT: path - directory to store module files
+ * RET: error status
+ */
+
+signed char zStr::createModule(const char *ipath) {
+ char *path = 0;
+ char *buf = new char [ strlen (ipath) + 20 ];
+ FileDesc *fd, *fd2;
+
+ stdstr(&path, ipath);
+
+ if ((path[strlen(path)-1] == '/') || (path[strlen(path)-1] == '\\'))
+ path[strlen(path)-1] = 0;
+
+ sprintf(buf, "%s.dat", path);
+ FileMgr::removeFile(buf);
+ fd = FileMgr::getSystemFileMgr()->open(buf, FileMgr::CREAT|FileMgr::WRONLY, FileMgr::IREAD|FileMgr::IWRITE);
+ fd->getFd();
+ FileMgr::getSystemFileMgr()->close(fd);
+
+ sprintf(buf, "%s.idx", path);
+ FileMgr::removeFile(buf);
+ fd2 = FileMgr::getSystemFileMgr()->open(buf, FileMgr::CREAT|FileMgr::WRONLY, FileMgr::IREAD|FileMgr::IWRITE);
+ fd2->getFd();
+ FileMgr::getSystemFileMgr()->close(fd2);
+
+ sprintf(buf, "%s.zdt", path);
+ FileMgr::removeFile(buf);
+ fd2 = FileMgr::getSystemFileMgr()->open(buf, FileMgr::CREAT|FileMgr::WRONLY, FileMgr::IREAD|FileMgr::IWRITE);
+ fd2->getFd();
+ FileMgr::getSystemFileMgr()->close(fd2);
+
+ sprintf(buf, "%s.zdx", path);
+ FileMgr::removeFile(buf);
+ fd2 = FileMgr::getSystemFileMgr()->open(buf, FileMgr::CREAT|FileMgr::WRONLY, FileMgr::IREAD|FileMgr::IWRITE);
+ fd2->getFd();
+ FileMgr::getSystemFileMgr()->close(fd2);
+
+ delete [] path;
+
+ return 0;
+}
+
+SWORD_NAMESPACE_END
diff --git a/src/modules/common/zverse.cpp b/src/modules/common/zverse.cpp
new file mode 100644
index 0000000..c280d98
--- /dev/null
+++ b/src/modules/common/zverse.cpp
@@ -0,0 +1,507 @@
+/******************************************************************************
+ *
+ * zverse.cpp - code for class 'zVerse'- a module that reads raw text
+ * files: ot and nt using indexs ??.bks ??.cps ??.vss
+ * and provides lookup and parsing functions based on
+ * class VerseKey for compressed modules
+ *
+ * $Id: zverse.cpp 2833 2013-06-29 06:40:28Z chrislit $
+ *
+ * Copyright 1996-2013 CrossWire Bible Society (http://www.crosswire.org)
+ * CrossWire Bible Society
+ * P. O. Box 2528
+ * Tempe, AZ 85280-2528
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation version 2.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ */
+
+
+
+#include <ctype.h>
+#include <stdio.h>
+#include <fcntl.h>
+#include <errno.h>
+#include <stdlib.h>
+
+#include <utilstr.h>
+#include <versekey.h>
+#include <zverse.h>
+#include <sysdata.h>
+#include <swbuf.h>
+#include <filemgr.h>
+#include <swcomprs.h>
+
+
+SWORD_NAMESPACE_START
+
+/******************************************************************************
+ * zVerse Statics
+ */
+
+int zVerse::instance = 0;
+
+const char zVerse::uniqueIndexID[] = {'X', 'r', 'v', 'c', 'b'};
+
+/******************************************************************************
+ * zVerse Constructor - Initializes data for instance of zVerse
+ *
+ * ENT: ipath - path of the directory where data and index files are located.
+ * be sure to include the trailing separator (e.g. '/' or '\')
+ * (e.g. 'modules/texts/rawtext/webster/')
+ * fileMode - open mode for the files (FileMgr::RDONLY, etc.)
+ * blockType - verse, chapter, book, etc.
+ */
+
+zVerse::zVerse(const char *ipath, int fileMode, int blockType, SWCompress *icomp)
+{
+ // this line, instead of just defaulting, to keep FileMgr out of header
+ if (fileMode == -1) fileMode = FileMgr::RDONLY;
+
+ SWBuf buf;
+
+ nl = '\n';
+ path = 0;
+ cacheBufIdx = -1;
+ cacheTestament = 0;
+ cacheBuf = 0;
+ dirtyCache = false;
+ stdstr(&path, ipath);
+
+ if ((path[strlen(path)-1] == '/') || (path[strlen(path)-1] == '\\'))
+ path[strlen(path)-1] = 0;
+
+ compressor = (icomp) ? icomp : new SWCompress();
+
+ if (fileMode == -1) { // try read/write if possible
+ fileMode = FileMgr::RDWR;
+ }
+
+ buf.setFormatted("%s/ot.%czs", path, uniqueIndexID[blockType]);
+ idxfp[0] = FileMgr::getSystemFileMgr()->open(buf, fileMode, true);
+
+ buf.setFormatted("%s/nt.%czs", path, uniqueIndexID[blockType]);
+ idxfp[1] = FileMgr::getSystemFileMgr()->open(buf, fileMode, true);
+
+ buf.setFormatted("%s/ot.%czz", path, uniqueIndexID[blockType]);
+ textfp[0] = FileMgr::getSystemFileMgr()->open(buf, fileMode, true);
+
+ buf.setFormatted("%s/nt.%czz", path, uniqueIndexID[blockType]);
+ textfp[1] = FileMgr::getSystemFileMgr()->open(buf, fileMode, true);
+
+ buf.setFormatted("%s/ot.%czv", path, uniqueIndexID[blockType]);
+ compfp[0] = FileMgr::getSystemFileMgr()->open(buf, fileMode, true);
+
+ buf.setFormatted("%s/nt.%czv", path, uniqueIndexID[blockType]);
+ compfp[1] = FileMgr::getSystemFileMgr()->open(buf, fileMode, true);
+
+ instance++;
+}
+
+
+/******************************************************************************
+ * zVerse Destructor - Cleans up instance of zVerse
+ */
+
+zVerse::~zVerse()
+{
+ int loop1;
+
+ if (cacheBuf) {
+ flushCache();
+ free(cacheBuf);
+ }
+
+ if (path)
+ delete [] path;
+
+ if (compressor)
+ delete compressor;
+
+ --instance;
+
+ for (loop1 = 0; loop1 < 2; loop1++) {
+ FileMgr::getSystemFileMgr()->close(idxfp[loop1]);
+ FileMgr::getSystemFileMgr()->close(textfp[loop1]);
+ FileMgr::getSystemFileMgr()->close(compfp[loop1]);
+ }
+}
+
+
+/******************************************************************************
+ * zVerse::findoffset - Finds the offset of the key verse from the indexes
+ *
+ *
+ *
+ * ENT: testmt - testament to find (0 - Bible/module introduction)
+ * book - book to find (0 - testament introduction)
+ * chapter - chapter to find (0 - book introduction)
+ * verse - verse to find (0 - chapter introduction)
+ * start - address to store the starting offset
+ * size - address to store the size of the entry
+ */
+
+void zVerse::findOffset(char testmt, long idxoff, long *start, unsigned short *size, unsigned long *buffnum) const
+{
+ __u32 ulBuffNum = 0; // buffer number
+ __u32 ulVerseStart = 0; // verse offset within buffer
+ __u16 usVerseSize = 0; // verse size
+ // set start to offset in
+ // set size to
+ // set
+ *start = *size = *buffnum = 0;
+ //fprintf(stderr, "Finding offset %ld\n", idxoff);
+ idxoff *= 10;
+ if (!testmt) {
+ testmt = ((idxfp[0]) ? 1:2);
+ }
+
+ // assert we have and valid file descriptor
+ if (compfp[testmt-1]->getFd() < 1)
+ return;
+
+ long newOffset = compfp[testmt-1]->seek(idxoff, SEEK_SET);
+ if (newOffset == idxoff) {
+ if (compfp[testmt-1]->read(&ulBuffNum, 4) != 4) {
+ fprintf(stderr, "Error reading ulBuffNum\n");
+ return;
+ }
+ }
+ else return;
+
+ if (compfp[testmt-1]->read(&ulVerseStart, 4) < 2)
+ {
+ fprintf(stderr, "Error reading ulVerseStart\n");
+ return;
+ }
+ if (compfp[testmt-1]->read(&usVerseSize, 2) < 2)
+ {
+ fprintf(stderr, "Error reading usVerseSize\n");
+ return;
+ }
+
+ *buffnum = swordtoarch32(ulBuffNum);
+ *start = swordtoarch32(ulVerseStart);
+ *size = swordtoarch16(usVerseSize);
+
+}
+
+
+/******************************************************************************
+ * zVerse::zreadtext - gets text at a given offset
+ *
+ * ENT: testmt - testament file to search in (0 - Old; 1 - New)
+ * start - starting offset where the text is located in the file
+ * size - size of text entry + 1 (null)
+ * buf - buffer to store text
+ *
+ */
+
+void zVerse::zReadText(char testmt, long start, unsigned short size, unsigned long ulBuffNum, SWBuf &inBuf) const {
+ __u32 ulCompOffset = 0; // compressed buffer start
+ __u32 ulCompSize = 0; // buffer size compressed
+ __u32 ulUnCompSize = 0; // buffer size uncompressed
+
+ if (!testmt) {
+ testmt = ((idxfp[0]) ? 1:2);
+ }
+
+ // assert we have and valid file descriptor
+ if (compfp[testmt-1]->getFd() < 1)
+ return;
+
+ if (size &&
+ !(((long) ulBuffNum == cacheBufIdx) && (testmt == cacheTestament) && (cacheBuf))) {
+ //fprintf(stderr, "Got buffer number{%ld} versestart{%ld} versesize{%d}\n", ulBuffNum, ulVerseStart, usVerseSize);
+
+ if (idxfp[testmt-1]->seek(ulBuffNum*12, SEEK_SET)!=(long) ulBuffNum*12)
+ {
+ fprintf(stderr, "Error seeking compressed file index\n");
+ return;
+ }
+ if (idxfp[testmt-1]->read(&ulCompOffset, 4)<4)
+ {
+ fprintf(stderr, "Error reading ulCompOffset\n");
+ return;
+ }
+ if (idxfp[testmt-1]->read(&ulCompSize, 4)<4)
+ {
+ fprintf(stderr, "Error reading ulCompSize\n");
+ return;
+ }
+ if (idxfp[testmt-1]->read(&ulUnCompSize, 4)<4)
+ {
+ fprintf(stderr, "Error reading ulUnCompSize\n");
+ return;
+ }
+
+ ulCompOffset = swordtoarch32(ulCompOffset);
+ ulCompSize = swordtoarch32(ulCompSize);
+ ulUnCompSize = swordtoarch32(ulUnCompSize);
+
+ if (textfp[testmt-1]->seek(ulCompOffset, SEEK_SET)!=(long)ulCompOffset)
+ {
+ fprintf(stderr, "Error: could not seek to right place in compressed text\n");
+ return;
+ }
+ SWBuf pcCompText;
+ pcCompText.setSize(ulCompSize+5);
+
+ if (textfp[testmt-1]->read(pcCompText.getRawData(), ulCompSize)<(long)ulCompSize) {
+ fprintf(stderr, "Error reading compressed text\n");
+ return;
+ }
+ pcCompText.setSize(ulCompSize);
+ rawZFilter(pcCompText, 0); // 0 = decipher
+
+ unsigned long bufSize = ulCompSize;
+ compressor->zBuf(&bufSize, pcCompText.getRawData());
+
+ if (cacheBuf) {
+ flushCache();
+ free(cacheBuf);
+ }
+
+ unsigned long len = 0;
+ compressor->Buf(0, &len);
+ cacheBuf = (char *)calloc(len + 1, 1);
+ memcpy(cacheBuf, compressor->Buf(), len);
+ cacheBufSize = strlen(cacheBuf); // TODO: can we just use len?
+ cacheTestament = testmt;
+ cacheBufIdx = ulBuffNum;
+ }
+
+ inBuf = "";
+ if ((size > 0) && cacheBuf && ((unsigned)start < cacheBufSize)) {
+ inBuf.setFillByte(0);
+ inBuf.setSize(size+1);
+ strncpy(inBuf.getRawData(), &(cacheBuf[start]), size);
+ inBuf.setSize(strlen(inBuf.c_str()));
+ }
+}
+
+
+/******************************************************************************
+ * zVerse::settext - Sets text for current offset
+ *
+ * ENT: testmt - testament to find (0 - Bible/module introduction)
+ * idxoff - offset into .vss
+ * buf - buffer to store
+ * len - length of buffer (0 - null terminated)
+ */
+
+void zVerse::doSetText(char testmt, long idxoff, const char *buf, long len) {
+
+ len = (len < 0) ? strlen(buf) : len;
+ if (!testmt)
+ testmt = ((idxfp[0]) ? 1:2);
+ if ((!dirtyCache) || (cacheBufIdx < 0)) {
+ cacheBufIdx = idxfp[testmt-1]->seek(0, SEEK_END) / 12;
+ cacheTestament = testmt;
+ if (cacheBuf)
+ free(cacheBuf);
+ cacheBuf = (char *)calloc(len + 1, 1);
+ }
+ else cacheBuf = (char *)((cacheBuf)?realloc(cacheBuf, strlen(cacheBuf)+(len + 1)):calloc((len + 1), 1));
+
+ dirtyCache = true;
+
+ __u32 start;
+ __u16 size;
+ __u32 outBufIdx = cacheBufIdx;
+
+ idxoff *= 10;
+ size = len;
+
+ start = strlen(cacheBuf);
+
+ if (!size)
+ start = outBufIdx = 0;
+
+ outBufIdx = archtosword32(outBufIdx);
+ start = archtosword32(start);
+ size = archtosword16(size);
+
+ compfp[testmt-1]->seek(idxoff, SEEK_SET);
+ compfp[testmt-1]->write(&outBufIdx, 4);
+ compfp[testmt-1]->write(&start, 4);
+ compfp[testmt-1]->write(&size, 2);
+ strcat(cacheBuf, buf);
+}
+
+
+void zVerse::flushCache() const {
+ if (dirtyCache) {
+ __u32 idxoff;
+ __u32 start, outstart;
+ __u32 size, outsize;
+ __u32 zsize, outzsize;
+
+ idxoff = cacheBufIdx * 12;
+ if (cacheBuf) {
+ size = outsize = zsize = outzsize = strlen(cacheBuf);
+ if (size) {
+ // if (compressor) {
+ // delete compressor;
+ // compressor = new LZSSCompress();
+ // }
+ compressor->Buf(cacheBuf);
+ unsigned long tmpSize;
+ compressor->zBuf(&tmpSize);
+ outzsize = zsize = tmpSize;
+
+ SWBuf buf;
+ buf.setSize(zsize + 5);
+ memcpy(buf.getRawData(), compressor->zBuf(&tmpSize), tmpSize);
+ outzsize = zsize = tmpSize;
+ buf.setSize(zsize);
+ rawZFilter(buf, 1); // 1 = encipher
+
+ start = outstart = textfp[cacheTestament-1]->seek(0, SEEK_END);
+
+ outstart = archtosword32(start);
+ outsize = archtosword32(size);
+ outzsize = archtosword32(zsize);
+
+ textfp[cacheTestament-1]->write(buf, zsize);
+
+ idxfp[cacheTestament-1]->seek(idxoff, SEEK_SET);
+ idxfp[cacheTestament-1]->write(&outstart, 4);
+ idxfp[cacheTestament-1]->write(&outzsize, 4);
+ idxfp[cacheTestament-1]->write(&outsize, 4);
+ }
+ free(cacheBuf);
+ cacheBuf = 0;
+ }
+ dirtyCache = false;
+ }
+}
+
+/******************************************************************************
+ * RawVerse::linkentry - links one entry to another
+ *
+ * ENT: testmt - testament to find (0 - Bible/module introduction)
+ * destidxoff - dest offset into .vss
+ * srcidxoff - source offset into .vss
+ */
+
+void zVerse::doLinkEntry(char testmt, long destidxoff, long srcidxoff) {
+ __s32 bufidx;
+ __s32 start;
+ __u16 size;
+
+ destidxoff *= 10;
+ srcidxoff *= 10;
+
+ if (!testmt)
+ testmt = ((idxfp[1]) ? 1:2);
+
+ // get source
+ compfp[testmt-1]->seek(srcidxoff, SEEK_SET);
+ compfp[testmt-1]->read(&bufidx, 4);
+ compfp[testmt-1]->read(&start, 4);
+ compfp[testmt-1]->read(&size, 2);
+
+ // write dest
+ compfp[testmt-1]->seek(destidxoff, SEEK_SET);
+ compfp[testmt-1]->write(&bufidx, 4);
+ compfp[testmt-1]->write(&start, 4);
+ compfp[testmt-1]->write(&size, 2);
+}
+
+
+/******************************************************************************
+ * RawVerse::CreateModule - Creates new module files
+ *
+ * ENT: path - directory to store module files
+ * RET: error status
+ */
+
+char zVerse::createModule(const char *ipath, int blockBound, const char *v11n)
+{
+ char *path = 0;
+ char *buf = new char [ strlen (ipath) + 20 ];
+ FileDesc *fd, *fd2;
+
+ stdstr(&path, ipath);
+
+ if ((path[strlen(path)-1] == '/') || (path[strlen(path)-1] == '\\'))
+ path[strlen(path)-1] = 0;
+
+ sprintf(buf, "%s/ot.%czs", path, uniqueIndexID[blockBound]);
+ FileMgr::removeFile(buf);
+ fd = FileMgr::getSystemFileMgr()->open(buf, FileMgr::CREAT|FileMgr::WRONLY, FileMgr::IREAD|FileMgr::IWRITE);
+ fd->getFd();
+ FileMgr::getSystemFileMgr()->close(fd);
+
+ sprintf(buf, "%s/nt.%czs", path, uniqueIndexID[blockBound]);
+ FileMgr::removeFile(buf);
+ fd = FileMgr::getSystemFileMgr()->open(buf, FileMgr::CREAT|FileMgr::WRONLY, FileMgr::IREAD|FileMgr::IWRITE);
+ fd->getFd();
+ FileMgr::getSystemFileMgr()->close(fd);
+
+ sprintf(buf, "%s/ot.%czz", path, uniqueIndexID[blockBound]);
+ FileMgr::removeFile(buf);
+ fd = FileMgr::getSystemFileMgr()->open(buf, FileMgr::CREAT|FileMgr::WRONLY, FileMgr::IREAD|FileMgr::IWRITE);
+ fd->getFd();
+ FileMgr::getSystemFileMgr()->close(fd);
+
+ sprintf(buf, "%s/nt.%czz", path, uniqueIndexID[blockBound]);
+ FileMgr::removeFile(buf);
+ fd2 = FileMgr::getSystemFileMgr()->open(buf, FileMgr::CREAT|FileMgr::WRONLY, FileMgr::IREAD|FileMgr::IWRITE);
+ fd2->getFd();
+ FileMgr::getSystemFileMgr()->close(fd);
+
+ sprintf(buf, "%s/ot.%czv", path, uniqueIndexID[blockBound]);
+ FileMgr::removeFile(buf);
+ fd = FileMgr::getSystemFileMgr()->open(buf, FileMgr::CREAT|FileMgr::WRONLY, FileMgr::IREAD|FileMgr::IWRITE);
+ fd->getFd();
+
+ sprintf(buf, "%s/nt.%czv", path, uniqueIndexID[blockBound]);
+ FileMgr::removeFile(buf);
+ fd2 = FileMgr::getSystemFileMgr()->open(buf, FileMgr::CREAT|FileMgr::WRONLY, FileMgr::IREAD|FileMgr::IWRITE);
+ fd2->getFd();
+
+ VerseKey vk;
+ vk.setVersificationSystem(v11n);
+ vk.setIntros(true);
+
+ __s32 offset = 0;
+ __s16 size = 0;
+ offset = archtosword32(offset);
+ size = archtosword16(size);
+
+ for (vk = TOP; !vk.popError(); vk++) {
+ if (vk.getTestament() < 2) {
+ fd->write(&offset, 4); //compBufIdxOffset
+ fd->write(&offset, 4);
+ fd->write(&size, 2);
+ }
+ else {
+ fd2->write(&offset, 4); //compBufIdxOffset
+ fd2->write(&offset, 4);
+ fd2->write(&size, 2);
+ }
+ }
+ fd2->write(&offset, 4); //compBufIdxOffset
+ fd2->write(&offset, 4);
+ fd2->write(&size, 2);
+
+ FileMgr::getSystemFileMgr()->close(fd);
+ FileMgr::getSystemFileMgr()->close(fd2);
+
+ delete [] path;
+ delete [] buf;
+
+ return 0;
+}
+
+
+SWORD_NAMESPACE_END