diff options
author | Dimitri John Ledkov <xnox@ubuntu.com> | 2014-05-11 22:09:52 +0100 |
---|---|---|
committer | Dimitri John Ledkov <xnox@ubuntu.com> | 2014-05-11 22:09:52 +0100 |
commit | 3525014850e3800ac7b28fd34e7f7af427f1c620 (patch) | |
tree | 3d1b8a17b86cfa9af178ceb818a4dc9daf52a46b /src/modules/common |
sword (1.7.2+dfsg-2) unstable; urgency=medium
* Correct shared library symlink. (Closes: #747420)
# imported from the archive
Diffstat (limited to 'src/modules/common')
-rw-r--r-- | src/modules/common/Makefile | 4 | ||||
-rw-r--r-- | src/modules/common/Makefile.am | 23 | ||||
-rw-r--r-- | src/modules/common/bz2comprs.cpp | 181 | ||||
-rw-r--r-- | src/modules/common/entriesblk.cpp | 194 | ||||
-rw-r--r-- | src/modules/common/lzsscomprs.cpp | 732 | ||||
-rw-r--r-- | src/modules/common/lzsscomprs.txt | 802 | ||||
-rw-r--r-- | src/modules/common/rawstr.cpp | 529 | ||||
-rw-r--r-- | src/modules/common/rawstr4.cpp | 538 | ||||
-rw-r--r-- | src/modules/common/rawverse.cpp | 311 | ||||
-rw-r--r-- | src/modules/common/rawverse4.cpp | 312 | ||||
-rw-r--r-- | src/modules/common/sapphire.cpp | 236 | ||||
-rw-r--r-- | src/modules/common/swcipher.cpp | 147 | ||||
-rw-r--r-- | src/modules/common/swcomprs.cpp | 211 | ||||
-rw-r--r-- | src/modules/common/xzcomprs.cpp | 181 | ||||
-rw-r--r-- | src/modules/common/zipcomprs.cpp | 183 | ||||
-rw-r--r-- | src/modules/common/zstr.cpp | 700 | ||||
-rw-r--r-- | src/modules/common/zverse.cpp | 507 |
17 files changed, 5791 insertions, 0 deletions
diff --git a/src/modules/common/Makefile b/src/modules/common/Makefile new file mode 100644 index 0000000..81f7721 --- /dev/null +++ b/src/modules/common/Makefile @@ -0,0 +1,4 @@ +root := ../../.. + +all: + make -C ${root} diff --git a/src/modules/common/Makefile.am b/src/modules/common/Makefile.am new file mode 100644 index 0000000..90a3f98 --- /dev/null +++ b/src/modules/common/Makefile.am @@ -0,0 +1,23 @@ +commondir = $(top_srcdir)/src/modules/common + +libsword_la_SOURCES += $(commondir)/rawstr.cpp +libsword_la_SOURCES += $(commondir)/rawstr4.cpp +libsword_la_SOURCES += $(commondir)/swcomprs.cpp +libsword_la_SOURCES += $(commondir)/lzsscomprs.cpp + +if HAVE_LIBZ +SWZLIB = $(commondir)/zipcomprs.cpp +SWZLIB += $(commondir)/bz2comprs.cpp +SWZLIB += $(commondir)/xzcomprs.cpp +else +SWZLIB = +endif +libsword_la_SOURCES += $(SWZLIB) +libsword_la_SOURCES += $(commondir)/rawverse.cpp +libsword_la_SOURCES += $(commondir)/rawverse4.cpp +libsword_la_SOURCES += $(commondir)/swcipher.cpp +libsword_la_SOURCES += $(commondir)/zverse.cpp +libsword_la_SOURCES += $(commondir)/zstr.cpp +libsword_la_SOURCES += $(commondir)/entriesblk.cpp +libsword_la_SOURCES += $(commondir)/sapphire.cpp + diff --git a/src/modules/common/bz2comprs.cpp b/src/modules/common/bz2comprs.cpp new file mode 100644 index 0000000..16f6d11 --- /dev/null +++ b/src/modules/common/bz2comprs.cpp @@ -0,0 +1,181 @@ +/****************************************************************************** + * + * bz2comprs.cpp - Bzip2Compress, a driver class that provides bzip2 + * compression (Burrows–Wheeler with Huffman coding) + * + * $Id: bz2comprs.cpp 2858 2013-07-08 03:08:10Z chrislit $ + * + * Copyright 2013 CrossWire Bible Society (http://www.crosswire.org) + * CrossWire Bible Society + * P. O. Box 2528 + * Tempe, AZ 85280-2528 + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation version 2. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + */ + + +#include <stdlib.h> +#include <string.h> +#include <stdio.h> +#include <bz2comprs.h> +#include <zlib.h> + +SWORD_NAMESPACE_START + +/****************************************************************************** + * Bzip2Compress Constructor - Initializes data for instance of Bzip2Compress + * + */ + +Bzip2Compress::Bzip2Compress() : SWCompress() { +} + + +/****************************************************************************** + * Bzip2Compress Destructor - Cleans up instance of Bzip2Compress + */ + +Bzip2Compress::~Bzip2Compress() { +} + + +/****************************************************************************** + * Bzip2Compress::Encode - This function "encodes" the input stream into the + * output stream. + * The GetChars() and SendChars() functions are + * used to separate this method from the actual + * i/o. + * NOTE: must set zlen for parent class to know length of + * compressed buffer. + */ + +void Bzip2Compress::Encode(void) +{ +/* +ZEXTERN int ZEXPORT compress OF((Bytef *dest, uLongf *destLen, + const Bytef *source, uLong sourceLen)); + Compresses the source buffer into the destination buffer. sourceLen is + the byte length of the source buffer. Upon entry, destLen is the total + size of the destination buffer, which must be at least 0.1% larger than + sourceLen plus 12 bytes. Upon exit, destLen is the actual size of the + compressed buffer. + This function can be used to compress a whole file at once if the + input file is mmap'ed. + compress returns Z_OK if success, Z_MEM_ERROR if there was not + enough memory, Z_BUF_ERROR if there was not enough room in the output + buffer. +*/ + direct = 0; // set direction needed by parent [Get|Send]Chars() + + // get buffer + char chunk[1024]; + char *buf = (char *)calloc(1, 1024); + char *chunkbuf = buf; + unsigned long chunklen; + unsigned long len = 0; + while((chunklen = GetChars(chunk, 1023))) { + memcpy(chunkbuf, chunk, chunklen); + len += chunklen; + if (chunklen < 1023) + break; + else buf = (char *)realloc(buf, len + 1024); + chunkbuf = buf+len; + } + + + zlen = (long) (len*1.001)+15; + char *zbuf = new char[zlen+1]; + if (len) + { + //printf("Doing compress\n"); + if (compress((Bytef*)zbuf, &zlen, (const Bytef*)buf, len) != Z_OK) + { + printf("ERROR in compression\n"); + } + else { + SendChars(zbuf, zlen); + } + } + else + { + fprintf(stderr, "ERROR: no buffer to compress\n"); + } + delete [] zbuf; + free (buf); +} + + +/****************************************************************************** + * Bzip2Compress::Decode - This function "decodes" the input stream into the + * output stream. + * The GetChars() and SendChars() functions are + * used to separate this method from the actual + * i/o. + */ + +void Bzip2Compress::Decode(void) +{ +/* +ZEXTERN int ZEXPORT uncompress OF((Bytef *dest, uLongf *destLen, + const Bytef *source, uLong sourceLen)); + Decompresses the source buffer into the destination buffer. sourceLen is + the byte length of the source buffer. Upon entry, destLen is the total + size of the destination buffer, which must be large enough to hold the + entire uncompressed data. (The size of the uncompressed data must have + been saved previously by the compressor and transmitted to the decompressor + by some mechanism outside the scope of this compression library.) + Upon exit, destLen is the actual size of the compressed buffer. + This function can be used to decompress a whole file at once if the + input file is mmap'ed. + + uncompress returns Z_OK if success, Z_MEM_ERROR if there was not + enough memory, Z_BUF_ERROR if there was not enough room in the output + buffer, or Z_DATA_ERROR if the input data was corrupted. +*/ + + // get buffer + char chunk[1024]; + char *zbuf = (char *)calloc(1, 1024); + char *chunkbuf = zbuf; + int chunklen; + unsigned long zlen = 0; + while((chunklen = GetChars(chunk, 1023))) { + memcpy(chunkbuf, chunk, chunklen); + zlen += chunklen; + if (chunklen < 1023) + break; + else zbuf = (char *)realloc(zbuf, zlen + 1024); + chunkbuf = zbuf + zlen; + } + + //printf("Decoding complength{%ld} uncomp{%ld}\n", zlen, blen); + if (zlen) { + unsigned long blen = zlen*20; // trust compression is less than 1000% + char *buf = new char[blen]; + //printf("Doing decompress {%s}\n", zbuf); + slen = 0; + switch (uncompress((Bytef*)buf, &blen, (Bytef*)zbuf, zlen)){ + case Z_OK: SendChars(buf, blen); slen = blen; break; + case Z_MEM_ERROR: fprintf(stderr, "ERROR: not enough memory during decompression.\n"); break; + case Z_BUF_ERROR: fprintf(stderr, "ERROR: not enough room in the out buffer during decompression.\n"); break; + case Z_DATA_ERROR: fprintf(stderr, "ERROR: corrupt data during decompression.\n"); break; + default: fprintf(stderr, "ERROR: an unknown error occured during decompression.\n"); break; + } + delete [] buf; + } + else { + fprintf(stderr, "ERROR: no buffer to decompress!\n"); + } + //printf("Finished decoding\n"); + free (zbuf); +} + +SWORD_NAMESPACE_END diff --git a/src/modules/common/entriesblk.cpp b/src/modules/common/entriesblk.cpp new file mode 100644 index 0000000..4872d28 --- /dev/null +++ b/src/modules/common/entriesblk.cpp @@ -0,0 +1,194 @@ +/****************************************************************************** + * + * entriesblk.cpp - EntriesBlock facilitates compressed lex/dict modules + * + * $Id: entriesblk.cpp 2833 2013-06-29 06:40:28Z chrislit $ + * + * Copyright 2001-2013 CrossWire Bible Society (http://www.crosswire.org) + * CrossWire Bible Society + * P. O. Box 2528 + * Tempe, AZ 85280-2528 + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation version 2. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + */ + +#include <entriesblk.h> +#include <stdlib.h> +#include <string.h> + +SWORD_NAMESPACE_START + +const int EntriesBlock::METAHEADERSIZE = 4; + // count(4); +const int EntriesBlock::METAENTRYSIZE = 8; + // offset(4); size(4); + +EntriesBlock::EntriesBlock(const char *iBlock, unsigned long size) { + if (size) { + block = (char *)calloc(1, size); + memcpy(block, iBlock, size); + } + else { + block = (char *)calloc(1, sizeof(__u32)); + } +} + + +EntriesBlock::EntriesBlock() { + block = (char *)calloc(1, sizeof(__u32)); +} + + +EntriesBlock::~EntriesBlock() { + free(block); +} + + +void EntriesBlock::setCount(int count) { + __u32 rawCount = archtosword32(count); + memcpy(block, &rawCount, sizeof(__u32)); +} + + +int EntriesBlock::getCount() { + __u32 count = 0; + memcpy(&count, block, sizeof(__u32)); + count = swordtoarch32(count); + return count; +} + + +void EntriesBlock::getMetaEntry(int index, unsigned long *offset, unsigned long *size) { + __u32 rawOffset = 0; + __u32 rawSize = 0; + *offset = 0; + *size = 0; + if (index >= getCount()) // assert index < count + return; + + // first 4 bytes is count, each 6 bytes after is each meta entry + memcpy(&rawOffset, block + METAHEADERSIZE + (index * METAENTRYSIZE), sizeof(rawOffset)); + memcpy(&rawSize, block + METAHEADERSIZE + (index * METAENTRYSIZE) + sizeof(rawOffset), sizeof(rawSize)); + + *offset = (unsigned long)swordtoarch32(rawOffset); + *size = (unsigned long)swordtoarch32(rawSize); +} + + +void EntriesBlock::setMetaEntry(int index, unsigned long offset, unsigned long size) { + __u32 rawOffset = archtosword32(offset); + __u32 rawSize = archtosword32(size); + + if (index >= getCount()) // assert index < count + return; + + // first 4 bytes is count, each 6 bytes after is each meta entry + memcpy(block + METAHEADERSIZE + (index * METAENTRYSIZE), &rawOffset, sizeof(rawOffset)); + memcpy(block + METAHEADERSIZE + (index * METAENTRYSIZE) + sizeof(rawOffset), &rawSize, sizeof(rawSize)); +} + + +const char *EntriesBlock::getRawData(unsigned long *retSize) { + unsigned long max = 4; + int loop; + unsigned long offset; + unsigned long size; + for (loop = 0; loop < getCount(); loop++) { + getMetaEntry(loop, &offset, &size); + max = ((offset + size) > max) ? (offset + size) : max; + } + *retSize = max; + return block; +} + + +int EntriesBlock::addEntry(const char *entry) { + unsigned long dataSize; + getRawData(&dataSize); + unsigned long len = strlen(entry); + unsigned long offset; + unsigned long size; + int count = getCount(); + unsigned long dataStart = METAHEADERSIZE + (count * METAENTRYSIZE); + // new meta entry + new data size + 1 because null + block = (char *)realloc(block, dataSize + METAENTRYSIZE + len + 1); + // shift right to make room for new meta entry + memmove(block + dataStart + METAENTRYSIZE, block + dataStart, dataSize - dataStart); + + for (int loop = 0; loop < count; loop++) { + getMetaEntry(loop, &offset, &size); + if (offset) { // if not a deleted entry + offset += METAENTRYSIZE; + setMetaEntry(loop, offset, size); + } + } + + offset = dataSize; // original dataSize before realloc + size = len + 1; + // add our text to the end + memcpy(block + offset + METAENTRYSIZE, entry, size); + // increment count + setCount(count + 1); + // add our meta entry + setMetaEntry(count, offset + METAENTRYSIZE, size); + // return index of our new entry + return count; +} + + +const char *EntriesBlock::getEntry(int entryIndex) { + unsigned long offset; + unsigned long size; + static const char *empty = ""; + + getMetaEntry(entryIndex, &offset, &size); + return (offset) ? block+offset : empty; +} + + +unsigned long EntriesBlock::getEntrySize(int entryIndex) { + unsigned long offset; + unsigned long size; + + getMetaEntry(entryIndex, &offset, &size); + return (offset) ? size : 0; +} + + +void EntriesBlock::removeEntry(int entryIndex) { + unsigned long offset; + unsigned long size, size2; + unsigned long dataSize; + getRawData(&dataSize); + getMetaEntry(entryIndex, &offset, &size); + int count = getCount(); + + if (!offset) // already deleted + return; + + // shift left to retrieve space used for old entry + memmove(block + offset, block + offset + size, dataSize - (offset + size)); + + // fix offset for all entries after our entry that were shifted left + for (int loop = entryIndex + 1; loop < count; loop++) { + getMetaEntry(loop, &offset, &size2); + if (offset) { // if not a deleted entry + offset -= size; + setMetaEntry(loop, offset, size2); + } + } + + // zero out our meta entry + setMetaEntry(entryIndex, 0L, 0); +} + + +SWORD_NAMESPACE_END diff --git a/src/modules/common/lzsscomprs.cpp b/src/modules/common/lzsscomprs.cpp new file mode 100644 index 0000000..ef1bc8c --- /dev/null +++ b/src/modules/common/lzsscomprs.cpp @@ -0,0 +1,732 @@ +/****************************************************************************** + * + * lzssomprs.cpp - LZSSCompress: a driver class that provides LZSS + * compression + * + * $Id: lzsscomprs.cpp 2935 2013-08-02 11:06:30Z scribe $ + * + * Copyright 1996-2013 CrossWire Bible Society (http://www.crosswire.org) + * CrossWire Bible Society + * P. O. Box 2528 + * Tempe, AZ 85280-2528 + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation version 2. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + */ + +#include <stdlib.h> +#include <string.h> +#include <lzsscomprs.h> + +// The following are constant sizes used by the compression algorithm. +// +// N - This is the size of the ring buffer. It is set +// to 4K. It is important to note that a position +// within the ring buffer requires 12 bits. +// +// F - This is the maximum length of a character sequence +// that can be taken from the ring buffer. It is set +// to 18. Note that a length must be 3 before it is +// worthwhile to store a position/length pair, so the +// length can be encoded in only 4 bits. Or, put yet +// another way, it is not necessary to encode a length +// of 0-18, it is necessary to encode a length of +// 3-18, which requires 4 bits. +// +// THRESHOLD - It takes 2 bytes to store an offset and +// a length. If a character sequence only +// requires 1 or 2 characters to store +// uncompressed, then it is better to store +// it uncompressed than as an offset into +// the ring buffer. +// +// Note that the 12 bits used to store the position and the 4 bits +// used to store the length equal a total of 16 bits, or 2 bytes. + +#define N 4096 +#define F 18 +#define THRESHOLD 3 +#define NOT_USED N + + +SWORD_NAMESPACE_START + +class LZSSCompress::Private { +public: + static unsigned char m_ring_buffer[N + F - 1]; + static short int m_match_position; + static short int m_match_length; + static short int m_lson[N + 1]; + static short int m_rson[N + 257]; + static short int m_dad[N + 1]; + void InitTree(); + void InsertNode(short int Pos); + void DeleteNode(short int Node); +}; + +/****************************************************************************** + * LZSSCompress Statics + */ + +// m_ring_buffer is a text buffer. It contains "nodes" of +// uncompressed text that can be indexed by position. That is, +// a substring of the ring buffer can be indexed by a position +// and a length. When decoding, the compressed text may contain +// a position in the ring buffer and a count of the number of +// bytes from the ring buffer that are to be moved into the +// uncompressed buffer. +// +// This ring buffer is not maintained as part of the compressed +// text. Instead, it is reconstructed dynamically. That is, +// it starts out empty and gets built as the text is decompressed. +// +// The ring buffer contain N bytes, with an additional F - 1 bytes +// to facilitate string comparison. + +unsigned char LZSSCompress::Private::m_ring_buffer[N + F - 1]; + +// m_match_position and m_match_length are set by InsertNode(). +// +// These variables indicate the position in the ring buffer +// and the number of characters at that position that match +// a given string. + +short int LZSSCompress::Private::m_match_position; +short int LZSSCompress::Private::m_match_length; + +// m_lson, m_rson, and m_dad are the Japanese way of referring to +// a tree structure. The dad is the parent and it has a right and +// left son (child). +// +// For i = 0 to N-1, m_rson[i] and m_lson[i] will be the right +// and left children of node i. +// +// For i = 0 to N-1, m_dad[i] is the parent of node i. +// +// For i = 0 to 255, rson[N + i + 1] is the root of the tree for +// strings that begin with the character i. Note that this requires +// one byte characters. +// +// These nodes store values of 0...(N-1). Memory requirements +// can be reduces by using 2-byte integers instead of full 4-byte +// integers (for 32-bit applications). Therefore, these are +// defined as "short ints." + +short int LZSSCompress::Private::m_lson[N + 1]; +short int LZSSCompress::Private::m_rson[N + 257]; +short int LZSSCompress::Private::m_dad[N + 1]; + + +/****************************************************************************** + * LZSSCompress Constructor - Initializes data for instance of LZSSCompress + * + */ + +LZSSCompress::LZSSCompress() : SWCompress() { + p = new Private(); +} + + +/****************************************************************************** + * LZSSCompress Destructor - Cleans up instance of LZSSCompress + */ + +LZSSCompress::~LZSSCompress() { + delete p; +} + + +/****************************************************************************** + * LZSSCompress::InitTree - This function initializes the tree nodes to + * "empty" states. + */ + +void LZSSCompress::Private::InitTree(void) { + int i; + + // For i = 0 to N - 1, m_rson[i] and m_lson[i] will be the right + // and left children of node i. These nodes need not be + // initialized. However, for debugging purposes, it is nice to + // have them initialized. Since this is only used for compression + // (not decompression), I don't mind spending the time to do it. + // + // For the same range of i, m_dad[i] is the parent of node i. + // These are initialized to a known value that can represent + // a "not used" state. + + for (i = 0; i < N; i++) { + m_lson[i] = NOT_USED; + m_rson[i] = NOT_USED; + m_dad[i] = NOT_USED; + } + + // For i = 0 to 255, m_rson[N + i + 1] is the root of the tree + // for strings that begin with the character i. This is why + // the right child array is larger than the left child array. + // These are also initialzied to a "not used" state. + // + // Note that there are 256 of these, one for each of the possible + // 256 characters. + + for (i = N + 1; i <= (N + 256); i++) { + m_rson[i] = NOT_USED; + } +} + + +/****************************************************************************** + * LZSSCompress::InsertNode - This function inserts a string from the ring + * buffer into one of the trees. It loads the + * match position and length member variables + * for the longest match. + * + * The string to be inserted is identified by + * the parameter Pos, A full F bytes are + * inserted. So, + * m_ring_buffer[Pos ... Pos+F-1] + * are inserted. + * + * If the matched length is exactly F, then an + * old node is removed in favor of the new one + * (because the old one will be deleted + * sooner). + * + * Note that Pos plays a dual role. It is + * used as both a position in the ring buffer + * and also as a tree node. + * m_ring_buffer[Pos] defines a character that + * is used to identify a tree node. + * + * ENT: pos - position in the buffer + */ + +void LZSSCompress::Private::InsertNode(short int Pos) +{ + short int i; + short int p; + int cmp; + unsigned char * key; + +/* + ASSERT(Pos >= 0); + ASSERT(Pos < N); +*/ + + cmp = 1; + key = &(m_ring_buffer[Pos]); + + // The last 256 entries in m_rson contain the root nodes for + // strings that begin with a letter. Get an index for the + // first letter in this string. + + p = (short int) (N + 1 + key[0]); + + // Set the left and right tree nodes for this position to "not + // used." + + m_lson[Pos] = NOT_USED; + m_rson[Pos] = NOT_USED; + + // Haven't matched anything yet. + + m_match_length = 0; + + for ( ; ; ) { + if (cmp >= 0) { + if (m_rson[p] != NOT_USED) { + p = m_rson[p]; + } + else { + m_rson[p] = Pos; + m_dad[Pos] = p; + return; + } + } + else { + if (m_lson[p] != NOT_USED) { + p = m_lson[p]; + } + else { + m_lson[p] = Pos; + m_dad[Pos] = p; + return; + } + } + + // Should we go to the right or the left to look for the + // next match? + + for (i = 1; i < F; i++) { + cmp = key[i] - m_ring_buffer[p + i]; + if (cmp != 0) + break; + } + + if (i > m_match_length) { + m_match_position = p; + m_match_length = i; + + if (i >= F) + break; + } + } + + m_dad[Pos] = m_dad[p]; + m_lson[Pos] = m_lson[p]; + m_rson[Pos] = m_rson[p]; + + m_dad[ m_lson[p] ] = Pos; + m_dad[ m_rson[p] ] = Pos; + + if (m_rson[ m_dad[p] ] == p) { + m_rson[ m_dad[p] ] = Pos; + } + else { + m_lson[ m_dad[p] ] = Pos; + } + + // Remove "p" + + m_dad[p] = NOT_USED; +} + + +/****************************************************************************** + * LZSSCompress::DeleteNode - This function removes the node "Node" from the + * tree. + * + * ENT: node - node to be removed + */ + +void LZSSCompress::Private::DeleteNode(short int Node) +{ + short int q; + +/* + ASSERT(Node >= 0); + ASSERT(Node < (N+1)); +*/ + + if (m_dad[Node] == NOT_USED) { // not in tree, nothing to do + return; + } + + if (m_rson[Node] == NOT_USED) { + q = m_lson[Node]; + } + else if (m_lson[Node] == NOT_USED) { + q = m_rson[Node]; + } + else { + q = m_lson[Node]; + if (m_rson[q] != NOT_USED) { + do { + q = m_rson[q]; + } while (m_rson[q] != NOT_USED); + + m_rson[ m_dad[q] ] = m_lson[q]; + m_dad[ m_lson[q] ] = m_dad[q]; + m_lson[q] = m_lson[Node]; + m_dad[ m_lson[Node] ] = q; + } + + m_rson[q] = m_rson[Node]; + m_dad[ m_rson[Node] ] = q; + } + + m_dad[q] = m_dad[Node]; + + if (m_rson[ m_dad[Node] ] == Node) { + m_rson[ m_dad[Node] ] = q; + } + else { + m_lson[ m_dad[Node] ] = q; + } + + m_dad[Node] = NOT_USED; +} + + +/****************************************************************************** + * LZSSCompress::Encode - This function "encodes" the input stream into the + * output stream. + * The GetChars() and SendChars() functions are + * used to separate this method from the actual + * i/o. + * NOTE: must set zlen for parent class to know length of + * compressed buffer. + */ + +void LZSSCompress::Encode(void) +{ + short int i; // an iterator + short int r; // node number in the binary tree + short int s; // position in the ring buffer + unsigned short int len; // len of initial string + short int last_match_length; // length of last match + short int code_buf_pos; // position in the output buffer + unsigned char code_buf[17]; // the output buffer + unsigned char mask; // bit mask for byte 0 of out buf + unsigned char c; // character read from string + + // Start with a clean tree. + + p->InitTree(); + direct = 0; // set direction needed by parent [Get|Send]Chars() + + // code_buf[0] works as eight flags. A "1" represents that the + // unit is an unencoded letter (1 byte), and a "0" represents + // that the next unit is a <position,length> pair (2 bytes). + // + // code_buf[1..16] stores eight units of code. Since the best + // we can do is store eight <position,length> pairs, at most 16 + // bytes are needed to store this. + // + // This is why the maximum size of the code buffer is 17 bytes. + + code_buf[0] = 0; + code_buf_pos = 1; + + // Mask iterates over the 8 bits in the code buffer. The first + // character ends up being stored in the low bit. + // + // bit 8 7 6 5 4 3 2 1 + // | | + // | first sequence in code buffer + // | + // last sequence in code buffer + + mask = 1; + + s = 0; + r = (short int) N - (short int) F; + + // Initialize the ring buffer with spaces... + + // Note that the last F bytes of the ring buffer are not filled. + // This is because those F bytes will be filled in immediately + // with bytes from the input stream. + + memset(p->m_ring_buffer, ' ', N - F); + + // Read F bytes into the last F bytes of the ring buffer. + // + // This function loads the buffer with X characters and returns + // the actual amount loaded. + + len = GetChars((char *) &(p->m_ring_buffer[r]), F); + + // Make sure there is something to be compressed. + + if (len == 0) + return; + + // Insert the F strings, each of which begins with one or more + // 'space' characters. Note the order in which these strings + // are inserted. This way, degenerate trees will be less likely + // to occur. + + for (i = 1; i <= F; i++) { + p->InsertNode((short int) (r - i)); + } + + // Finally, insert the whole string just read. The + // member variables match_length and match_position are set. + + p->InsertNode(r); + + // Now that we're preloaded, continue till done. + + do { + + // m_match_length may be spuriously long near the end of + // text. + + if (p->m_match_length > len) { + p->m_match_length = len; + } + + // Is it cheaper to store this as a single character? If so, + // make it so. + + if (p->m_match_length < THRESHOLD) { + // Send one character. Remember that code_buf[0] is the + // set of flags for the next eight items. + + p->m_match_length = 1; + code_buf[0] |= mask; + code_buf[code_buf_pos++] = p->m_ring_buffer[r]; + } + + // Otherwise, we do indeed have a string that can be stored + // compressed to save space. + + else { + // The next 16 bits need to contain the position (12 bits) + // and the length (4 bits). + + code_buf[code_buf_pos++] = (unsigned char) p->m_match_position; + code_buf[code_buf_pos++] = (unsigned char) ( + ((p->m_match_position >> 4) & 0xf0) | + (p->m_match_length - THRESHOLD) ); + } + + // Shift the mask one bit to the left so that it will be ready + // to store the new bit. + + mask = (unsigned char) (mask << 1); + + // If the mask is now 0, then we know that we have a full set + // of flags and items in the code buffer. These need to be + // output. + + if (!mask) { + // code_buf is the buffer of characters to be output. + // code_buf_pos is the number of characters it contains. + + SendChars((char *) code_buf, code_buf_pos); + + // Reset for next buffer... + + code_buf[0] = 0; + code_buf_pos = 1; + mask = 1; + } + + last_match_length = p->m_match_length; + + // Delete old strings and read new bytes... + + for (i = 0; i < last_match_length; i++) { + // Get next character... + + if (GetChars((char *) &c, 1) != 1) + break; + + // Delete "old strings" + + p->DeleteNode(s); + + // Put this character into the ring buffer. + // + // The original comment here says "If the position is near + // the end of the buffer, extend the buffer to make + // string comparison easier." + // + // That's a little misleading, because the "end" of the + // buffer is really what we consider to be the "beginning" + // of the buffer, that is, positions 0 through F. + // + // The idea is that the front end of the buffer is duplicated + // into the back end so that when you're looking at characters + // at the back end of the buffer, you can index ahead (beyond + // the normal end of the buffer) and see the characters + // that are at the front end of the buffer wihtout having + // to adjust the index. + // + // That is... + // + // 1234xxxxxxxxxxxxxxxxxxxxxxxxxxxxx1234 + // | | | + // position 0 end of buffer | + // | + // duplicate of front of buffer + + p->m_ring_buffer[s] = c; + + if (s < F - 1) { + p->m_ring_buffer[s + N] = c; + } + + // Increment the position, and wrap around when we're at + // the end. Note that this relies on N being a power of 2. + + s = (short int) ( (s + 1) & (N - 1) ); + r = (short int) ( (r + 1) & (N - 1) ); + + // Register the string that is found in + // m_ring_buffer[r..r+F-1]. + + p->InsertNode(r); + } + + // If we didn't quit because we hit the last_match_length, + // then we must have quit because we ran out of characters + // to process. + + while (i++ < last_match_length) { + p->DeleteNode(s); + + s = (short int) ( (s + 1) & (N - 1) ); + r = (short int) ( (r + 1) & (N - 1) ); + + // Note that len hitting 0 is the key that causes the + // do...while() to terminate. This is the only place + // within the loop that len is modified. + // + // Its original value is F (or a number less than F for + // short strings). + + if (--len) { + p->InsertNode(r); /* buffer may not be empty. */ + } + } + + // End of do...while() loop. Continue processing until there + // are no more characters to be compressed. The variable + // "len" is used to signal this condition. + } while (len > 0); + + // There could still be something in the output buffer. Send it + // now. + + if (code_buf_pos > 1) { + // code_buf is the encoded string to send. + // code_buf_ptr is the number of characters. + + SendChars((char *) code_buf, code_buf_pos); + } + + + // must set zlen for parent class to know length of compressed buffer + zlen = zpos; +} + + +/****************************************************************************** + * LZSSCompress::Decode - This function "decodes" the input stream into the + * output stream. + * The GetChars() and SendChars() functions are + * used to separate this method from the actual + * i/o. + */ + +void LZSSCompress::Decode(void) +{ + int k; + int r; // node number + unsigned char c[F]; // an array of chars + unsigned char flags; // 8 bits of flags + int flag_count; // which flag we're on + short int pos; // position in the ring buffer + short int len; // number of chars in ring buffer + unsigned long totalLen = 0; + + direct = 1; // set direction needed by parent [Get|Send]Chars() + + // Initialize the ring buffer with a common string. + // + // Note that the last F bytes of the ring buffer are not filled. + + memset(p->m_ring_buffer, ' ', N - F); + + r = N - F; + + flags = (char) 0; + flag_count = 0; + + for ( ; ; ) { + + // If there are more bits of interest in this flag, then + // shift that next interesting bit into the 1's position. + // + // If this flag has been exhausted, the next byte must + // be a flag. + + if (flag_count > 0) { + flags = (unsigned char) (flags >> 1); + flag_count--; + } + else { + // Next byte must be a flag. + + if (GetChars((char *) &flags, 1) != 1) + break; + + // Set the flag counter. While at first it might appear + // that this should be an 8 since there are 8 bits in the + // flag, it should really be a 7 because the shift must + // be performed 7 times in order to see all 8 bits. + + flag_count = 7; + } + + // If the low order bit of the flag is now set, then we know + // that the next byte is a single, unencoded character. + + if (flags & 1) { + if (GetChars((char *) c, 1) != 1) + break; + + if (SendChars((char *) c, 1) != 1) { + break; + } + totalLen++; + + // Add to buffer, and increment to next spot. Wrap at end. + + p->m_ring_buffer[r] = c[0]; + r = (short int) ( (r + 1) & (N - 1) ); + } + + // Otherwise, we know that the next two bytes are a + // <position,length> pair. The position is in 12 bits and + // the length is in 4 bits. + + else { + // Original code: + // if ((i = getc(infile)) == EOF) + // break; + // if ((j = getc(infile)) == EOF) + // break; + // i |= ((j & 0xf0) << 4); + // j = (j & 0x0f) + THRESHOLD; + // + // I've modified this to only make one input call, and + // have changed the variable names to something more + // obvious. + + if (GetChars((char *) c, 2) != 2) + break; + + // Convert these two characters into the position and + // length. Note that the length is always at least + // THRESHOLD, which is why we're able to get a length + // of 18 out of only 4 bits. + + pos = (short int) ( c[0] | ((c[1] & 0xf0) << 4) ); + + len = (short int) ( (c[1] & 0x0f) + THRESHOLD ); + + // There are now "len" characters at position "pos" in + // the ring buffer that can be pulled out. Note that + // len is never more than F. + + for (k = 0; k < len; k++) { + c[k] = p->m_ring_buffer[(pos + k) & (N - 1)]; + + // Add to buffer, and increment to next spot. Wrap at end. + + p->m_ring_buffer[r] = c[k]; + r = (short int) ( (r + 1) & (N - 1) ); + } + + // Add the "len" :characters to the output stream. + + if (SendChars((char *) c, len) != (unsigned int)len) { + break; + } + totalLen += len; + } + } + slen = totalLen; +} + +SWORD_NAMESPACE_END diff --git a/src/modules/common/lzsscomprs.txt b/src/modules/common/lzsscomprs.txt new file mode 100644 index 0000000..b6817f2 --- /dev/null +++ b/src/modules/common/lzsscomprs.txt @@ -0,0 +1,802 @@ +The following is the original information send from Parson's Technologies via +Craig Rairden. +_______________________________________________________________________________ +Compression Info, 10-11-95 +Jeff Wheeler + +Source of Algorithm +------------------- + +The compression algorithms used here are based upon the algorithms developed +and published by Haruhiko Okumura in a paper entitled "Data Compression +Algorithms of LARC and LHarc." This paper discusses three compression +algorithms, LSZZ, LZARI, and LZHUF. LZSS is described as the "first" of +these, and is described as providing moderate compression with good speed. +LZARI is described as an improved LZSS, a combination of the LZSS algorithm +with adaptive arithmetic compression. It is described as being slower than +LZSS but with better compression. LZHUF (the basis of the common LHA +compression program) was included in the paper, however, a free usage license +was not included. + +The following are copies of the statements included at the beginning of each +source code listing that was supplied in the working paper. + + LZSS, dated 4/6/89, marked as "Use, distribute and + modify this program freely." + + LZARI, dated 4/7/89, marked as "Use, distribute and + modify this program freely." + + LZHUF, dated 11/20/88, written by Haruyasu Yoshizaki, + translated by Haruhiko Okumura on 4/7/89. Not + expressly marked as redistributable or modifiable. + +Since both LZSS and LZARI are marked as "use, distribute and modify freely" we +have felt at liberty basing our compression algorithm on either of these. + +Selection of Algorithm +---------------------- + +Working samples of three possible compression algorithms are supplied in +Okumura's paper. Which should be used? + +LZSS is the fastest at decompression, but does not generated as small a +compressed file as the other methods. The other two methods provided, perhaps, +a 15% improvement in compression. Or, put another way, on a 100K file, LZSS +might compress it to 50K while the others might approach 40-45K. For STEP +purposes, it was decided that decoding speed was of more importance than +tighter compression. For these reasons, the first compression algorithm +implemented is the LZSS algorithm. + +About LZSS Encoding +------------------- + +(adapted from Haruhiko Okumura's paper) + +This scheme was proposed by Ziv and Lempel [1]. A slightly modified version +is described by Storer and Szymanski [2]. An implementation using a binary +tree has been proposed by Bell [3]. + +The algorithm is quite simple. +1. Keep a ring buffer which initially contains all space characters. +2. Read several letters from the file to the buffer. +3. Search the buffer for the longest string that matches the letters just + read, and send its length and position into the buffer. + +If the ring buffer is 4096 bytes, the position can be stored in 12 bits. If the +length is represented in 4 bits, the <position, length> pair is two bytes +long. If the longest match is no more than two characters, then just one +character is sent without encoding. The process starts again with the next +character. An extra bit is sent each time to tell the decoder whether the +next item is a character of a <position, length> pair. + +[1] J. Ziv and A. Lempel, IEEE Transactions IT-23, 337-343 (1977). +[2] J. A. Storer and T. G. Szymanski, J. ACM, 29, 928-951 (1982). +[3] T.C. Gell, IEEE Transactions COM-34, 1176-1182 (1986). + +class SWCompress { +public: +void InitTree( // no return value + void); // no parameters + +void InsertNode( // no return value + short int Pos); // position in the buffer + +void DeleteNode( // no return value + short int Node); // node to be removed + +void Encode( // no return value + void); // no parameters + +void Decode( // no return value + void); // no parameters +}; + +// The following are constant sizes used by the compression algorithm. +// +// N - This is the size of the ring buffer. It is set +// to 4K. It is important to note that a position +// within the ring buffer requires 12 bits. +// +// F - This is the maximum length of a character sequence +// that can be taken from the ring buffer. It is set +// to 18. Note that a length must be 3 before it is +// worthwhile to store a position/length pair, so the +// length can be encoded in only 4 bits. Or, put yet +// another way, it is not necessary to encode a length +// of 0-18, it is necessary to encode a length of +// 3-18, which requires 4 bits. +// +// THRESHOLD - It takes 2 bytes to store an offset and +// a length. If a character sequence only +// requires 1 or 2 characters to store +// uncompressed, then it is better to store +// it uncompressed than as an offset into +// the ring buffer. +// +// Note that the 12 bits used to store the position and the 4 bits +// used to store the length equal a total of 16 bits, or 2 bytes. + +#define N 4096 +#define F 18 +#define THRESHOLD 3 +#define NOT_USED N + +// m_ring_buffer is a text buffer. It contains "nodes" of +// uncompressed text that can be indexed by position. That is, +// a substring of the ring buffer can be indexed by a position +// and a length. When decoding, the compressed text may contain +// a position in the ring buffer and a count of the number of +// bytes from the ring buffer that are to be moved into the +// uncompressed buffer. +// +// This ring buffer is not maintained as part of the compressed +// text. Instead, it is reconstructed dynamically. That is, +// it starts out empty and gets built as the text is decompressed. +// +// The ring buffer contain N bytes, with an additional F - 1 bytes +// to facilitate string comparison. + +unsigned char m_ring_buffer[N + F - 1]; + +// m_match_position and m_match_length are set by InsertNode(). +// +// These variables indicate the position in the ring buffer +// and the number of characters at that position that match +// a given string. + +short int m_match_position; +short int m_match_length; + +// m_lson, m_rson, and m_dad are the Japanese way of referring to +// a tree structure. The dad is the parent and it has a right and +// left son (child). +// +// For i = 0 to N-1, m_rson[i] and m_lson[i] will be the right +// and left children of node i. +// +// For i = 0 to N-1, m_dad[i] is the parent of node i. +// +// For i = 0 to 255, rson[N + i + 1] is the root of the tree for +// strings that begin with the character i. Note that this requires +// one byte characters. +// +// These nodes store values of 0...(N-1). Memory requirements +// can be reduces by using 2-byte integers instead of full 4-byte +// integers (for 32-bit applications). Therefore, these are +// defined as "short ints." + +short int m_lson[N + 1]; +short int m_rson[N + 257]; +short int m_dad[N + 1]; + + + + +/* + ------------------------------------------------------------------------- + cLZSS::InitTree + + This function initializes the tree nodes to "empty" states. + ------------------------------------------------------------------------- +*/ + +void cLZSS::InitTree( // no return value + void) // no parameters + throw() // exception list + + { + int i; + + // For i = 0 to N - 1, m_rson[i] and m_lson[i] will be the right + // and left children of node i. These nodes need not be + // initialized. However, for debugging purposes, it is nice to + // have them initialized. Since this is only used for compression + // (not decompression), I don't mind spending the time to do it. + // + // For the same range of i, m_dad[i] is the parent of node i. + // These are initialized to a known value that can represent + // a "not used" state. + + for (i = 0; i < N; i++) + { + m_lson[i] = NOT_USED; + m_rson[i] = NOT_USED; + m_dad[i] = NOT_USED; + } + + // For i = 0 to 255, m_rson[N + i + 1] is the root of the tree + // for strings that begin with the character i. This is why + // the right child array is larger than the left child array. + // These are also initialzied to a "not used" state. + // + // Note that there are 256 of these, one for each of the possible + // 256 characters. + + for (i = N + 1; i <= (N + 256); i++) + { + m_rson[i] = NOT_USED; + } + + // Done. + } + +/* + ------------------------------------------------------------------------- + cLZSS::InsertNode + + This function inserts a string from the ring buffer into one of + the trees. It loads the match position and length member variables + for the longest match. + + The string to be inserted is identified by the parameter Pos, + A full F bytes are inserted. So, m_ring_buffer[Pos ... Pos+F-1] + are inserted. + + If the matched length is exactly F, then an old node is removed + in favor of the new one (because the old one will be deleted + sooner). + + Note that Pos plays a dual role. It is used as both a position + in the ring buffer and also as a tree node. m_ring_buffer[Pos] + defines a character that is used to identify a tree node. + ------------------------------------------------------------------------- +*/ + +void cLZSS::InsertNode( // no return value + short int Pos) // position in the buffer + throw() // exception list + + { + short int i; + short int p; + int cmp; + unsigned char * key; + + ASSERT(Pos >= 0); + ASSERT(Pos < N); + + cmp = 1; + key = &(m_ring_buffer[Pos]); + + // The last 256 entries in m_rson contain the root nodes for + // strings that begin with a letter. Get an index for the + // first letter in this string. + + p = (short int) (N + 1 + key[0]); + + // Set the left and right tree nodes for this position to "not + // used." + + m_lson[Pos] = NOT_USED; + m_rson[Pos] = NOT_USED; + + // Haven't matched anything yet. + + m_match_length = 0; + + for ( ; ; ) + { + if (cmp >= 0) + { + if (m_rson[p] != NOT_USED) + { + p = m_rson[p]; + } + else + { + m_rson[p] = Pos; + m_dad[Pos] = p; + return; + } + } + else + { + if (m_lson[p] != NOT_USED) + { + p = m_lson[p]; + } + else + { + m_lson[p] = Pos; + m_dad[Pos] = p; + return; + } + } + + // Should we go to the right or the left to look for the + // next match? + + for (i = 1; i < F; i++) + { + cmp = key[i] - m_ring_buffer[p + i]; + if (cmp != 0) + break; + } + + if (i > m_match_length) + { + m_match_position = p; + m_match_length = i; + + if (i >= F) + break; + } + } + + m_dad[Pos] = m_dad[p]; + m_lson[Pos] = m_lson[p]; + m_rson[Pos] = m_rson[p]; + + m_dad[ m_lson[p] ] = Pos; + m_dad[ m_rson[p] ] = Pos; + + if (m_rson[ m_dad[p] ] == p) + { + m_rson[ m_dad[p] ] = Pos; + } + else + { + m_lson[ m_dad[p] ] = Pos; + } + + // Remove "p" + + m_dad[p] = NOT_USED; + } + +/* + ------------------------------------------------------------------------- + cLZSS::DeleteNode + + This function removes the node "Node" from the tree. + ------------------------------------------------------------------------- +*/ + +void cLZSS::DeleteNode( // no return value + short int Node) // node to be removed + throw() // exception list + + { + short int q; + + ASSERT(Node >= 0); + ASSERT(Node < (N+1)); + + if (m_dad[Node] == NOT_USED) + { + // not in tree, nothing to do + return; + } + + if (m_rson[Node] == NOT_USED) + { + q = m_lson[Node]; + } + else if (m_lson[Node] == NOT_USED) + { + q = m_rson[Node]; + } + else + { + q = m_lson[Node]; + if (m_rson[q] != NOT_USED) + { + do + { + q = m_rson[q]; + } + while (m_rson[q] != NOT_USED); + + m_rson[ m_dad[q] ] = m_lson[q]; + m_dad[ m_lson[q] ] = m_dad[q]; + m_lson[q] = m_lson[Node]; + m_dad[ m_lson[Node] ] = q; + } + + m_rson[q] = m_rson[Node]; + m_dad[ m_rson[Node] ] = q; + } + + m_dad[q] = m_dad[Node]; + + if (m_rson[ m_dad[Node] ] == Node) + { + m_rson[ m_dad[Node] ] = q; + } + else + { + m_lson[ m_dad[Node] ] = q; + } + + m_dad[Node] = NOT_USED; + } + +/* + ------------------------------------------------------------------------- + cLZSS::Encode + + This function "encodes" the input stream into the output stream. + The GetChars() and SendChars() functions are used to separate + this method from the actual i/o. + ------------------------------------------------------------------------- +*/ + +void cLZSS::Encode( // no return value + void) // no parameters + + { + short int i; // an iterator + short int r; // node number in the binary tree + short int s; // position in the ring buffer + unsigned short int len; // len of initial string + short int last_match_length; // length of last match + short int code_buf_pos; // position in the output buffer + unsigned char code_buf[17]; // the output buffer + unsigned char mask; // bit mask for byte 0 of out buf + unsigned char c; // character read from string + + // Start with a clean tree. + + InitTree(); + + // code_buf[0] works as eight flags. A "1" represents that the + // unit is an unencoded letter (1 byte), and a "0" represents + // that the next unit is a <position,length> pair (2 bytes). + // + // code_buf[1..16] stores eight units of code. Since the best + // we can do is store eight <position,length> pairs, at most 16 + // bytes are needed to store this. + // + // This is why the maximum size of the code buffer is 17 bytes. + + code_buf[0] = 0; + code_buf_pos = 1; + + // Mask iterates over the 8 bits in the code buffer. The first + // character ends up being stored in the low bit. + // + // bit 8 7 6 5 4 3 2 1 + // | | + // | first sequence in code buffer + // | + // last sequence in code buffer + + mask = 1; + + s = 0; + r = (short int) N - (short int) F; + + // Initialize the ring buffer with spaces... + + // Note that the last F bytes of the ring buffer are not filled. + // This is because those F bytes will be filled in immediately + // with bytes from the input stream. + + memset(m_ring_buffer, ' ', N - F); + + // Read F bytes into the last F bytes of the ring buffer. + // + // This function loads the buffer with X characters and returns + // the actual amount loaded. + + len = GetChars(&(m_ring_buffer[r]), F); + + // Make sure there is something to be compressed. + + if (len == 0) + return; + + // Insert the F strings, each of which begins with one or more + // 'space' characters. Note the order in which these strings + // are inserted. This way, degenerate trees will be less likely + // to occur. + + for (i = 1; i <= F; i++) + { + InsertNode((short int) (r - i)); + } + + // Finally, insert the whole string just read. The + // member variables match_length and match_position are set. + + InsertNode(r); + + // Now that we're preloaded, continue till done. + + do + { + + // m_match_length may be spuriously long near the end of + // text. + + if (m_match_length > len) + { + m_match_length = len; + } + + // Is it cheaper to store this as a single character? If so, + // make it so. + + if (m_match_length < THRESHOLD) + { + // Send one character. Remember that code_buf[0] is the + // set of flags for the next eight items. + + m_match_length = 1; + code_buf[0] |= mask; + code_buf[code_buf_pos++] = m_ring_buffer[r]; + } + + // Otherwise, we do indeed have a string that can be stored + // compressed to save space. + + else + { + // The next 16 bits need to contain the position (12 bits) + // and the length (4 bits). + + code_buf[code_buf_pos++] = (unsigned char) m_match_position; + code_buf[code_buf_pos++] = (unsigned char) ( + ((m_match_position >> 4) & 0xf0) | + (m_match_length - THRESHOLD) ); + } + + // Shift the mask one bit to the left so that it will be ready + // to store the new bit. + + mask = (unsigned char) (mask << 1); + + // If the mask is now 0, then we know that we have a full set + // of flags and items in the code buffer. These need to be + // output. + + if (mask == 0) + { + // code_buf is the buffer of characters to be output. + // code_buf_pos is the number of characters it contains. + + SendChars(code_buf, code_buf_pos); + + // Reset for next buffer... + + code_buf[0] = 0; + code_buf_pos = 1; + mask = 1; + } + + last_match_length = m_match_length; + + // Delete old strings and read new bytes... + + for (i = 0; i < last_match_length; i++) + { + + // Get next character... + + if (GetChars(&c, 1) != 1) + break; + + // Delete "old strings" + + DeleteNode(s); + + // Put this character into the ring buffer. + // + // The original comment here says "If the position is near + // the end of the buffer, extend the buffer to make + // string comparison easier." + // + // That's a little misleading, because the "end" of the + // buffer is really what we consider to be the "beginning" + // of the buffer, that is, positions 0 through F. + // + // The idea is that the front end of the buffer is duplicated + // into the back end so that when you're looking at characters + // at the back end of the buffer, you can index ahead (beyond + // the normal end of the buffer) and see the characters + // that are at the front end of the buffer wihtout having + // to adjust the index. + // + // That is... + // + // 1234xxxxxxxxxxxxxxxxxxxxxxxxxxxxx1234 + // | | | + // position 0 end of buffer | + // | + // duplicate of front of buffer + + m_ring_buffer[s] = c; + + if (s < F - 1) + { + m_ring_buffer[s + N] = c; + } + + // Increment the position, and wrap around when we're at + // the end. Note that this relies on N being a power of 2. + + s = (short int) ( (s + 1) & (N - 1) ); + r = (short int) ( (r + 1) & (N - 1) ); + + // Register the string that is found in + // m_ring_buffer[r..r+F-1]. + + InsertNode(r); + } + + // If we didn't quit because we hit the last_match_length, + // then we must have quit because we ran out of characters + // to process. + + while (i++ < last_match_length) + { + DeleteNode(s); + + s = (short int) ( (s + 1) & (N - 1) ); + r = (short int) ( (r + 1) & (N - 1) ); + + // Note that len hitting 0 is the key that causes the + // do...while() to terminate. This is the only place + // within the loop that len is modified. + // + // Its original value is F (or a number less than F for + // short strings). + + if (--len) + { + InsertNode(r); /* buffer may not be empty. */ + } + } + + // End of do...while() loop. Continue processing until there + // are no more characters to be compressed. The variable + // "len" is used to signal this condition. + } + while (len > 0); + + // There could still be something in the output buffer. Send it + // now. + + if (code_buf_pos > 1) + { + // code_buf is the encoded string to send. + // code_buf_ptr is the number of characters. + + SendChars(code_buf, code_buf_pos); + } + + // Done! + } + +/* + ------------------------------------------------------------------------- + cLZSS::Decode + + This function "decodes" the input stream into the output stream. + The GetChars() and SendChars() functions are used to separate + this method from the actual i/o. + ------------------------------------------------------------------------- +*/ + +void cLZSS::Decode( // no return value + void) // no parameters + + { + int k; + int r; // node number + unsigned char c[F]; // an array of chars + unsigned char flags; // 8 bits of flags + int flag_count; // which flag we're on + short int pos; // position in the ring buffer + short int len; // number of chars in ring buffer + + // Initialize the ring buffer with a common string. + // + // Note that the last F bytes of the ring buffer are not filled. + + memset(m_ring_buffer, ' ', N - F); + + r = N - F; + + flags = (char) 0; + flag_count = 0; + + for ( ; ; ) + { + + // If there are more bits of interest in this flag, then + // shift that next interesting bit into the 1's position. + // + // If this flag has been exhausted, the next byte must + // be a flag. + + if (flag_count > 0) + { + flags = (unsigned char) (flags >> 1); + flag_count--; + } + else + { + // Next byte must be a flag. + + if (GetChars(&flags, 1) != 1) + break; + + // Set the flag counter. While at first it might appear + // that this should be an 8 since there are 8 bits in the + // flag, it should really be a 7 because the shift must + // be performed 7 times in order to see all 8 bits. + + flag_count = 7; + } + + // If the low order bit of the flag is now set, then we know + // that the next byte is a single, unencoded character. + + if (flags & 1) + { + if (GetChars(c, 1) != 1) + break; + + if (SendChars(c, 1) != 1) + break; + + // Add to buffer, and increment to next spot. Wrap at end. + + m_ring_buffer[r] = c[0]; + r = (short int) ( (r + 1) & (N - 1) ); + } + + // Otherwise, we know that the next two bytes are a + // <position,length> pair. The position is in 12 bits and + // the length is in 4 bits. + + else + { + // Original code: + // if ((i = getc(infile)) == EOF) + // break; + // if ((j = getc(infile)) == EOF) + // break; + // i |= ((j & 0xf0) << 4); + // j = (j & 0x0f) + THRESHOLD; + // + // I've modified this to only make one input call, and + // have changed the variable names to something more + // obvious. + + if (GetChars(c, 2) != 2) + break; + + // Convert these two characters into the position and + // length. Note that the length is always at least + // THRESHOLD, which is why we're able to get a length + // of 18 out of only 4 bits. + + pos = (short int) ( c[0] | ((c[1] & 0xf0) << 4) ); + + len = (short int) ( (c[1] & 0x0f) + THRESHOLD ); + + // There are now "len" characters at position "pos" in + // the ring buffer that can be pulled out. Note that + // len is never more than F. + + for (k = 0; k < len; k++) + { + c[k] = m_ring_buffer[(pos + k) & (N - 1)]; + + // Add to buffer, and increment to next spot. Wrap at end. + + m_ring_buffer[r] = c[k]; + r = (short int) ( (r + 1) & (N - 1) ); + } + + // Add the "len" characters to the output stream. + + if (SendChars(c, len) != len) + break; + } + } + } + diff --git a/src/modules/common/rawstr.cpp b/src/modules/common/rawstr.cpp new file mode 100644 index 0000000..788ab6e --- /dev/null +++ b/src/modules/common/rawstr.cpp @@ -0,0 +1,529 @@ +/****************************************************************************** + * + * rawstr.cpp - code for class 'RawStr'- a module that reads raw text + * files: ot and nt using indexs ??.bks ??.cps ??.vss + * and provides lookup and parsing functions based on + * class StrKey + * + * $Id: rawstr.cpp 2833 2013-06-29 06:40:28Z chrislit $ + * + * Copyright 1998-2013 CrossWire Bible Society (http://www.crosswire.org) + * CrossWire Bible Society + * P. O. Box 2528 + * Tempe, AZ 85280-2528 + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation version 2. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + */ + +#include <stdio.h> +#include <fcntl.h> +#include <errno.h> + +#include <stdlib.h> +#include <utilstr.h> +#include <rawstr.h> +#include <sysdata.h> +#include <swlog.h> +#include <filemgr.h> +#include <swbuf.h> +#include <stringmgr.h> + +SWORD_NAMESPACE_START + +/****************************************************************************** + * RawStr Statics + */ + +int RawStr::instance = 0; +char RawStr::nl = '\n'; +const int RawStr::IDXENTRYSIZE = 6; + + + +/****************************************************************************** + * RawStr Constructor - Initializes data for instance of RawStr + * + * ENT: ipath - path of the directory where data and index files are located. + * be sure to include the trailing separator (e.g. '/' or '\') + * (e.g. 'modules/texts/rawtext/webster/') + */ + +RawStr::RawStr(const char *ipath, int fileMode, bool caseSensitive) : caseSensitive(caseSensitive) +{ + SWBuf buf; + + lastoff = -1; + path = 0; + stdstr(&path, ipath); + + if (fileMode == -1) { // try read/write if possible + fileMode = FileMgr::RDWR; + } + + buf.setFormatted("%s.idx", path); + idxfd = FileMgr::getSystemFileMgr()->open(buf, fileMode, true); + + buf.setFormatted("%s.dat", path); + datfd = FileMgr::getSystemFileMgr()->open(buf, fileMode, true); + + if (datfd < 0) { + SWLog::getSystemLog()->logError("%d", errno); + } + + instance++; +} + + +/****************************************************************************** + * RawStr Destructor - Cleans up instance of RawStr + */ + +RawStr::~RawStr() +{ + if (path) + delete [] path; + + --instance; + + FileMgr::getSystemFileMgr()->close(idxfd); + FileMgr::getSystemFileMgr()->close(datfd); +} + + +/****************************************************************************** + * RawStr::getidxbufdat - Gets the index string at the given idx offset + * NOTE: buf is allocated and must be freed by + * calling function + * + * ENT: ioffset - offset in dat file to lookup + * buf - address of pointer to allocate for storage of string + */ + +void RawStr::getIDXBufDat(long ioffset, char **buf) const +{ + int size; + char ch; + if (datfd > 0) { + datfd->seek(ioffset, SEEK_SET); + for (size = 0; datfd->read(&ch, 1) == 1; size++) { + if ((ch == '\\') || (ch == 10) || (ch == 13)) + break; + } + *buf = (*buf) ? (char *)realloc(*buf, size*2 + 1) : (char *)malloc(size*2 + 1); + if (size) { + datfd->seek(ioffset, SEEK_SET); + datfd->read(*buf, size); + } + (*buf)[size] = 0; + if (!caseSensitive) toupperstr_utf8(*buf, size*2); + } + else { + *buf = (*buf) ? (char *)realloc(*buf, 1) : (char *)malloc(1); + **buf = 0; + } +} + + +/****************************************************************************** + * RawStr::getidxbuf - Gets the index string at the given idx offset + * NOTE: buf is allocated and must be freed by + * calling function + * + * ENT: ioffset - offset in idx file to lookup + * buf - address of pointer to allocate for storage of string + */ + +void RawStr::getIDXBuf(long ioffset, char **buf) const +{ + __u32 offset; + + if (idxfd > 0) { + idxfd->seek(ioffset, SEEK_SET); + idxfd->read(&offset, 4); + + offset = swordtoarch32(offset); + + getIDXBufDat(offset, buf); + } +} + + +/****************************************************************************** + * RawStr::findoffset - Finds the offset of the key string from the indexes + * + * ENT: key - key string to lookup + * start - address to store the starting offset + * size - address to store the size of the entry + * away - number of entries before of after to jump + * (default = 0) + * + * RET: error status -1 general error; -2 new file + */ + +signed char RawStr::findOffset(const char *ikey, __u32 *start, __u16 *size, long away, __u32 *idxoff) const +{ + char *trybuf, *maxbuf, *key = 0, quitflag = 0; + signed char retval = -1; + long headoff, tailoff, tryoff = 0, maxoff = 0; + int diff = 0; + bool awayFromSubstrCheck = false; + + if (idxfd->getFd() >=0) { + tailoff = maxoff = idxfd->seek(0, SEEK_END) - 6; + retval = (tailoff >= 0) ? 0 : -2; // if NOT new file + if (*ikey && retval != -2) { + headoff = 0; + + stdstr(&key, ikey, 3); + if (!caseSensitive) toupperstr_utf8(key, strlen(key)*3); + + int keylen = strlen(key); + bool substr = false; + + trybuf = maxbuf = 0; + getIDXBuf(maxoff, &maxbuf); + + while (headoff < tailoff) { + tryoff = (lastoff == -1) ? headoff + ((((tailoff / 6) - (headoff / 6))) / 2) * 6 : lastoff; + lastoff = -1; + getIDXBuf(tryoff, &trybuf); + + if (!*trybuf && tryoff) { // In case of extra entry at end of idx (not first entry) + tryoff += (tryoff > (maxoff / 2))?-6:6; + retval = -1; + break; + } + + diff = strcmp(key, trybuf); + + if (!diff) + break; + + if (!strncmp(trybuf, key, keylen)) substr = true; + + if (diff < 0) + tailoff = (tryoff == headoff) ? headoff : tryoff; + else headoff = tryoff; + + if (tailoff == headoff + 6) { + if (quitflag++) + headoff = tailoff; + } + } + + // didn't find exact match + if (headoff >= tailoff) { + tryoff = headoff; + if (!substr && ((tryoff != maxoff)||(strncmp(key, maxbuf, keylen)<0))) { + awayFromSubstrCheck = true; + away--; // if our entry doesn't startwith our key, prefer the previous entry over the next + } + } + if (trybuf) + free(trybuf); + delete [] key; + if (maxbuf) + free(maxbuf); + } + else tryoff = 0; + + idxfd->seek(tryoff, SEEK_SET); + + __u32 tmpStart; + __u16 tmpSize; + *start = *size = tmpStart = tmpSize = 0; + idxfd->read(&tmpStart, 4); + idxfd->read(&tmpSize, 2); + if (idxoff) + *idxoff = tryoff; + + *start = swordtoarch32(tmpStart); + *size = swordtoarch16(tmpSize); + + while (away) { + unsigned long laststart = *start; + unsigned short lastsize = *size; + long lasttry = tryoff; + tryoff += (away > 0) ? 6 : -6; + + bool bad = false; + if (((tryoff + (away*6)) < -6) || (tryoff + (away*6) > (maxoff+6))) + bad = true; + else if (idxfd->seek(tryoff, SEEK_SET) < 0) + bad = true; + if (bad) { + if(!awayFromSubstrCheck) + retval = -1; + *start = laststart; + *size = lastsize; + tryoff = lasttry; + if (idxoff) + *idxoff = tryoff; + break; + } + idxfd->read(&tmpStart, 4); + idxfd->read(&tmpSize, 2); + if (idxoff) + *idxoff = tryoff; + + *start = swordtoarch32(tmpStart); + *size = swordtoarch16(tmpSize); + + if (((laststart != *start) || (lastsize != *size)) && (*size)) + away += (away < 0) ? 1 : -1; + } + + lastoff = tryoff; + } + else { + *start = 0; + *size = 0; + if (idxoff) + *idxoff = 0; + retval = -1; + } + return retval; +} + + +/****************************************************************************** + * RawStr::readtext - gets text at a given offset + * + * ENT: + * start - starting offset where the text is located in the file + * size - size of text entry + * buf - buffer to store text + * + */ + +void RawStr::readText(__u32 istart, __u16 *isize, char **idxbuf, SWBuf &buf) const +{ + unsigned int ch; + char *idxbuflocal = 0; + getIDXBufDat(istart, &idxbuflocal); + __u32 start = istart; + + do { + if (*idxbuf) + delete [] *idxbuf; + + buf = ""; + buf.setFillByte(0); + buf.setSize(++(*isize)); + + *idxbuf = new char [ (*isize) ]; + + datfd->seek(start, SEEK_SET); + datfd->read(buf.getRawData(), (int)((*isize) - 1)); + + for (ch = 0; buf[ch]; ch++) { // skip over index string + if (buf[ch] == 10) { + ch++; + break; + } + } + buf = SWBuf(buf.c_str()+ch); + // resolve link + if (!strncmp(buf.c_str(), "@LINK", 5)) { + for (ch = 0; buf[ch]; ch++) { // null before nl + if (buf[ch] == 10) { + buf[ch] = 0; + break; + } + } + findOffset(buf.c_str() + 6, &start, isize); + } + else break; + } + while (true); // while we're resolving links + + if (idxbuflocal) { + int localsize = strlen(idxbuflocal); + localsize = (localsize < (*isize - 1)) ? localsize : (*isize - 1); + strncpy(*idxbuf, idxbuflocal, localsize); + (*idxbuf)[localsize] = 0; + free(idxbuflocal); + } +} + + +/****************************************************************************** + * RawLD::settext - Sets text for current offset + * + * ENT: key - key for this entry + * buf - buffer to store + * len - length of buffer (0 - null terminated) + */ + +void RawStr::doSetText(const char *ikey, const char *buf, long len) +{ + + __u32 start, outstart; + __u32 idxoff; + __u32 endoff; + __s32 shiftSize; + __u16 size; + __u16 outsize; + static const char nl[] = {13, 10}; + char *tmpbuf = 0; + char *key = 0; + char *dbKey = 0; + char *idxBytes = 0; + char *outbuf = 0; + char *ch = 0; + + char errorStatus = findOffset(ikey, &start, &size, 0, &idxoff); + stdstr(&key, ikey, 2); + if (!caseSensitive) toupperstr_utf8(key, strlen(key)*2); + + len = (len < 0) ? strlen(buf) : len; + + getIDXBufDat(start, &dbKey); + + if (strcmp(key, dbKey) < 0) { + } + else if (strcmp(key, dbKey) > 0) { + if (errorStatus != (char)-2) // not a new file + idxoff += 6; + else idxoff = 0; + } + else if ((!strcmp(key, dbKey)) && (len>0 /*we're not deleting*/)) { // got absolute entry + do { + tmpbuf = new char [ size + 2 ]; + memset(tmpbuf, 0, size + 2); + datfd->seek(start, SEEK_SET); + datfd->read(tmpbuf, (int)(size - 1)); + + for (ch = tmpbuf; *ch; ch++) { // skip over index string + if (*ch == 10) { + ch++; + break; + } + } + memmove(tmpbuf, ch, size - (unsigned short)(ch-tmpbuf)); + + // resolve link + if (!strncmp(tmpbuf, "@LINK", 5) && (len)) { + for (ch = tmpbuf; *ch; ch++) { // null before nl + if (*ch == 10) { + *ch = 0; + break; + } + } + findOffset(tmpbuf + 6, &start, &size, 0, &idxoff); + } + else break; + } + while (true); // while we're resolving links + } + + endoff = idxfd->seek(0, SEEK_END); + + shiftSize = endoff - idxoff; + + if (shiftSize > 0) { + idxBytes = new char [ shiftSize ]; + idxfd->seek(idxoff, SEEK_SET); + idxfd->read(idxBytes, shiftSize); + } + + outbuf = new char [ len + strlen(key) + 5 ]; + sprintf(outbuf, "%s%c%c", key, 13, 10); + size = strlen(outbuf); + memcpy(outbuf + size, buf, len); + size = outsize = size + (len); + + start = outstart = datfd->seek(0, SEEK_END); + + outstart = archtosword32(start); + outsize = archtosword16(size); + + idxfd->seek(idxoff, SEEK_SET); + if (len > 0) { + datfd->seek(start, SEEK_SET); + datfd->write(outbuf, (int)size); + + // add a new line to make data file easier to read in an editor + datfd->write(&nl, 2); + + idxfd->write(&outstart, 4); + idxfd->write(&outsize, 2); + if (idxBytes) { + idxfd->write(idxBytes, shiftSize); + delete [] idxBytes; + } + } + else { // delete entry + if (idxBytes) { + idxfd->write(idxBytes+6, shiftSize-6); + idxfd->seek(-1, SEEK_CUR); // last valid byte + FileMgr::getSystemFileMgr()->trunc(idxfd); // truncate index + delete [] idxBytes; + } + } + + delete [] key; + delete [] outbuf; + free(dbKey); +} + + +/****************************************************************************** + * RawLD::linkentry - links one entry to another + * + * ENT: testmt - testament to find (0 - Bible/module introduction) + * destidxoff - dest offset into .vss + * srcidxoff - source offset into .vss + */ + +void RawStr::doLinkEntry(const char *destkey, const char *srckey) { + char *text = new char [ strlen(destkey) + 7 ]; + sprintf(text, "@LINK %s", destkey); + doSetText(srckey, text); + delete [] text; +} + +/****************************************************************************** + * RawLD::CreateModule - Creates new module files + * + * ENT: path - directory to store module files + * RET: error status + */ + +signed char RawStr::createModule(const char *ipath) +{ + char *path = 0; + char *buf = new char [ strlen (ipath) + 20 ]; + FileDesc *fd, *fd2; + + stdstr(&path, ipath); + + if ((path[strlen(path)-1] == '/') || (path[strlen(path)-1] == '\\')) + path[strlen(path)-1] = 0; + + sprintf(buf, "%s.dat", path); + FileMgr::removeFile(buf); + fd = FileMgr::getSystemFileMgr()->open(buf, FileMgr::CREAT|FileMgr::WRONLY, FileMgr::IREAD|FileMgr::IWRITE); + fd->getFd(); + FileMgr::getSystemFileMgr()->close(fd); + + sprintf(buf, "%s.idx", path); + FileMgr::removeFile(buf); + fd2 = FileMgr::getSystemFileMgr()->open(buf, FileMgr::CREAT|FileMgr::WRONLY, FileMgr::IREAD|FileMgr::IWRITE); + fd2->getFd(); + FileMgr::getSystemFileMgr()->close(fd2); + + delete [] path; + + return 0; +} + +SWORD_NAMESPACE_END diff --git a/src/modules/common/rawstr4.cpp b/src/modules/common/rawstr4.cpp new file mode 100644 index 0000000..e2ce899 --- /dev/null +++ b/src/modules/common/rawstr4.cpp @@ -0,0 +1,538 @@ +/****************************************************************************** + * + * rawstr4.cpp - code for class 'RawStr'- a module that reads raw text + * files: ot and nt using indexs ??.bks ??.cps ??.vss + * and provides lookup and parsing functions based on + * class StrKey + * + * $Id: rawstr4.cpp 2833 2013-06-29 06:40:28Z chrislit $ + * + * Copyright 2001-2013 CrossWire Bible Society (http://www.crosswire.org) + * CrossWire Bible Society + * P. O. Box 2528 + * Tempe, AZ 85280-2528 + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation version 2. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + */ + +#include <stdio.h> +#include <fcntl.h> +#include <errno.h> +#include <stdlib.h> +#include <sys/types.h> + +#include <utilstr.h> +#include <rawstr4.h> +#include <sysdata.h> +#include <swlog.h> +#include <filemgr.h> +#include <swbuf.h> +#include <stringmgr.h> + +SWORD_NAMESPACE_START + +/****************************************************************************** + * RawStr Statics + */ + +int RawStr4::instance = 0; +const int RawStr4::IDXENTRYSIZE = 8; + + +/****************************************************************************** + * RawStr Constructor - Initializes data for instance of RawStr + * + * ENT: ipath - path of the directory where data and index files are located. + * be sure to include the trailing separator (e.g. '/' or '\') + * (e.g. 'modules/texts/rawtext/webster/') + */ + +RawStr4::RawStr4(const char *ipath, int fileMode, bool caseSensitive) : caseSensitive(caseSensitive) +{ + SWBuf buf; + + nl = '\n'; + lastoff = -1; + path = 0; + stdstr(&path, ipath); + + if (fileMode == -1) { // try read/write if possible + fileMode = FileMgr::RDWR; + } + + buf.setFormatted("%s.idx", path); + idxfd = FileMgr::getSystemFileMgr()->open(buf, fileMode, true); + + buf.setFormatted("%s.dat", path); + datfd = FileMgr::getSystemFileMgr()->open(buf, fileMode, true); + + if (datfd < 0) { + SWLog::getSystemLog()->logError("%d", errno); + } + + instance++; +} + + +/****************************************************************************** + * RawStr Destructor - Cleans up instance of RawStr + */ + +RawStr4::~RawStr4() +{ + if (path) + delete [] path; + + --instance; + + FileMgr::getSystemFileMgr()->close(idxfd); + FileMgr::getSystemFileMgr()->close(datfd); +} + + +/****************************************************************************** + * RawStr4::getidxbufdat - Gets the index string at the given idx offset + * NOTE: buf is allocated and must be freed by + * calling function + * + * ENT: ioffset - offset in dat file to lookup + * buf - address of pointer to allocate for storage of string + */ + +void RawStr4::getIDXBufDat(long ioffset, char **buf) const +{ + int size; + char ch; + if (datfd > 0) { + datfd->seek(ioffset, SEEK_SET); + for (size = 0; datfd->read(&ch, 1) == 1; size++) { + if ((ch == '\\') || (ch == 10) || (ch == 13)) + break; + } + *buf = (*buf) ? (char *)realloc(*buf, size*2 + 1) : (char *)malloc(size*2 + 1); + if (size) { + datfd->seek(ioffset, SEEK_SET); + datfd->read(*buf, size); + } + (*buf)[size] = 0; + if (!caseSensitive) toupperstr_utf8(*buf, size*2); + } + else { + *buf = (*buf) ? (char *)realloc(*buf, 1) : (char *)malloc(1); + **buf = 0; + } +} + + +/****************************************************************************** + * RawStr4::getidxbuf - Gets the index string at the given idx offset + * NOTE: buf is allocated and must be freed by + * calling function + * + * ENT: ioffset - offset in idx file to lookup + * buf - address of pointer to allocate for storage of string + */ + +void RawStr4::getIDXBuf(long ioffset, char **buf) const +{ + __u32 offset; + + if (idxfd > 0) { + idxfd->seek(ioffset, SEEK_SET); + + idxfd->read(&offset, 4); + offset = swordtoarch32(offset); + + getIDXBufDat(offset, buf); + +/* What the heck is this supposed to do?????? + for (trybuf = targetbuf = *buf; *trybuf; trybuf++, targetbuf++) { + *targetbuf = *trybuf; + } + *targetbuf = 0; + trybuf = 0; + if (!caseSensitive) toupperstr_utf8(targetbuf); +*/ + } +} + + +/****************************************************************************** + * RawStr4::findoffset - Finds the offset of the key string from the indexes + * + * ENT: key - key string to lookup + * start - address to store the starting offset + * size - address to store the size of the entry + * away - number of entries before of after to jump + * (default = 0) + * + * RET: error status -1 general error; -2 new file + */ + +signed char RawStr4::findOffset(const char *ikey, __u32 *start, __u32 *size, long away, __u32 *idxoff) const +{ + char *trybuf, *maxbuf, *key = 0, quitflag = 0; + signed char retval = -1; + long headoff, tailoff, tryoff = 0, maxoff = 0; + int diff = 0; + bool awayFromSubstrCheck = false; + + if (idxfd->getFd() >=0) { + tailoff = maxoff = idxfd->seek(0, SEEK_END) - 8; + + retval = (tailoff >= 0) ? 0 : -2; // if NOT new file + if (*ikey && retval != -2) { + headoff = 0; + + stdstr(&key, ikey, 3); + if (!caseSensitive) toupperstr_utf8(key, strlen(key)*3); + + int keylen = strlen(key); + bool substr = false; + + trybuf = maxbuf = 0; + getIDXBuf(maxoff, &maxbuf); + + while (headoff < tailoff) { + tryoff = (lastoff == -1) ? headoff + ((((tailoff / 8) - (headoff / 8))) / 2) * 8 : lastoff; + lastoff = -1; + getIDXBuf(tryoff, &trybuf); + + if (!*trybuf && tryoff) { // In case of extra entry at end of idx (not first entry) + tryoff += (tryoff > (maxoff / 2))?-8:8; + retval = -1; + break; + } + + diff = strcmp(key, trybuf); + + if (!diff) + break; + + if (!strncmp(trybuf, key, keylen)) substr = true; + + if (diff < 0) + tailoff = (tryoff == headoff) ? headoff : tryoff; + else headoff = tryoff; + + if (tailoff == headoff + 8) { + if (quitflag++) + headoff = tailoff; + } + } + + // didn't find exact match + if (headoff >= tailoff) { + tryoff = headoff; + if (!substr && ((tryoff != maxoff)||(strncmp(key, maxbuf, keylen)<0))) { + awayFromSubstrCheck = true; + away--; // if our entry doesn't startwith our key, prefer the previous entry over the next + } + } + if (trybuf) + free(trybuf); + delete [] key; + if (maxbuf) + free(maxbuf); + } + else tryoff = 0; + + idxfd->seek(tryoff, SEEK_SET); + + __u32 tmpStart, tmpSize; + *start = *size = tmpStart = tmpSize = 0; + idxfd->read(&tmpStart, 4); + idxfd->read(&tmpSize, 4); + if (idxoff) + *idxoff = tryoff; + + *start = swordtoarch32(tmpStart); + *size = swordtoarch32(tmpSize); + + while (away) { + unsigned long laststart = *start; + unsigned long lastsize = *size; + long lasttry = tryoff; + tryoff += (away > 0) ? 8 : -8; + + bool bad = false; + if (((tryoff + (away*8)) < -8) || (tryoff + (away*8) > (maxoff+8))) + bad = true; + else if (idxfd->seek(tryoff, SEEK_SET) < 0) + bad = true; + if (bad) { + if(!awayFromSubstrCheck) + retval = -1; + *start = laststart; + *size = lastsize; + tryoff = lasttry; + if (idxoff) + *idxoff = tryoff; + break; + } + idxfd->read(&tmpStart, 4); + idxfd->read(&tmpSize, 4); + if (idxoff) + *idxoff = tryoff; + + *start = swordtoarch32(tmpStart); + *size = swordtoarch32(tmpSize); + + if (((laststart != *start) || (lastsize != *size)) && (*size)) + away += (away < 0) ? 1 : -1; + } + + lastoff = tryoff; + } + else { + *start = 0; + *size = 0; + if (idxoff) + *idxoff = 0; + retval = -1; + } + return retval; +} + + +/****************************************************************************** + * RawStr4::readtext - gets text at a given offset + * + * ENT: + * start - starting offset where the text is located in the file + * size - size of text entry + * buf - buffer to store text + * + */ + +void RawStr4::readText(__u32 istart, __u32 *isize, char **idxbuf, SWBuf &buf) const +{ + unsigned int ch; + char *idxbuflocal = 0; + getIDXBufDat(istart, &idxbuflocal); + __u32 start = istart; + + do { + if (*idxbuf) + delete [] *idxbuf; + + buf = ""; + buf.setFillByte(0); + buf.setSize(++(*isize)); + + *idxbuf = new char [ (*isize) ]; + + datfd->seek(start, SEEK_SET); + datfd->read(buf.getRawData(), (int)((*isize) - 1)); + + for (ch = 0; buf[ch]; ch++) { // skip over index string + if (buf[ch] == 10) { + ch++; + break; + } + } + buf = SWBuf(buf.c_str()+ch); + // resolve link + if (!strncmp(buf.c_str(), "@LINK", 5)) { + for (ch = 0; buf[ch]; ch++) { // null before nl + if (buf[ch] == 10) { + buf[ch] = 0; + break; + } + } + findOffset(buf.c_str() + 6, &start, isize); + } + else break; + } + while (true); // while we're resolving links + + if (idxbuflocal) { + unsigned int localsize = strlen(idxbuflocal); + localsize = (localsize < (*isize - 1)) ? localsize : (*isize - 1); + strncpy(*idxbuf, idxbuflocal, localsize); + (*idxbuf)[localsize] = 0; + free(idxbuflocal); + } +} + + +/****************************************************************************** + * RawLD::settext - Sets text for current offset + * + * ENT: key - key for this entry + * buf - buffer to store + * len - length of buffer (0 - null terminated) + */ + +void RawStr4::doSetText(const char *ikey, const char *buf, long len) { + + __u32 start, outstart; + __u32 idxoff; + __u32 endoff; + __s32 shiftSize; + __u32 size; + __u32 outsize; + static const char nl[] = {13, 10}; + char *tmpbuf = 0; + char *key = 0; + char *dbKey = 0; + char *idxBytes = 0; + char *outbuf = 0; + char *ch = 0; + + char errorStatus = findOffset(ikey, &start, &size, 0, &idxoff); + stdstr(&key, ikey, 3); + if (!caseSensitive) toupperstr_utf8(key, strlen(key)*3); + + len = (len < 0) ? strlen(buf) : len; + getIDXBufDat(start, &dbKey); + + if (strcmp(key, dbKey) < 0) { + } + else if (strcmp(key, dbKey) > 0) { + if (errorStatus != (char)-2) // not a new file + idxoff += 8; + else idxoff = 0; + } + else if ((!strcmp(key, dbKey)) && (len>0/*we're not deleting*/)) { // got absolute entry + do { + tmpbuf = new char [ size + 2 ]; + memset(tmpbuf, 0, size + 2); + datfd->seek(start, SEEK_SET); + datfd->read(tmpbuf, (int)(size - 1)); + + for (ch = tmpbuf; *ch; ch++) { // skip over index string + if (*ch == 10) { + ch++; + break; + } + } + memmove(tmpbuf, ch, size - (unsigned long)(ch-tmpbuf)); + + // resolve link + if (!strncmp(tmpbuf, "@LINK", 5) && (len > 0)) { + for (ch = tmpbuf; *ch; ch++) { // null before nl + if (*ch == 10) { + *ch = 0; + break; + } + } + findOffset(tmpbuf + 8, &start, &size, 0, &idxoff); + ++size; + } + else break; + } + while (true); // while we're resolving links + } + + endoff = idxfd->seek(0, SEEK_END); + + shiftSize = endoff - idxoff; + + if (shiftSize > 0) { + idxBytes = new char [ shiftSize ]; + idxfd->seek(idxoff, SEEK_SET); + idxfd->read(idxBytes, shiftSize); + } + + outbuf = new char [ len + strlen(key) + 5 ]; + sprintf(outbuf, "%s%c%c", key, 13, 10); + size = strlen(outbuf); + memcpy(outbuf + size, buf, len); + size = outsize = size + len; + + start = outstart = datfd->seek(0, SEEK_END); + + outstart = archtosword32(start); + outsize = archtosword32(size); + + idxfd->seek(idxoff, SEEK_SET); + if (len>0) { + datfd->seek(start, SEEK_SET); + datfd->write(outbuf, (long)size); + + // add a new line to make data file easier to read in an editor + datfd->write(&nl, 2); + + idxfd->write(&outstart, 4); + idxfd->write(&outsize, 4); + if (idxBytes) { + idxfd->write(idxBytes, shiftSize); + delete [] idxBytes; + } + } + else { // delete entry + if (idxBytes) { + idxfd->write(idxBytes+8, shiftSize-8); + idxfd->seek(-1, SEEK_CUR); // last valid byte + FileMgr::getSystemFileMgr()->trunc(idxfd); // truncate index + delete [] idxBytes; + } + } + + delete [] key; + delete [] outbuf; + free(dbKey); +} + + +/****************************************************************************** + * RawLD::linkentry - links one entry to another + * + * ENT: testmt - testament to find (0 - Bible/module introduction) + * destidxoff - dest offset into .vss + * srcidxoff - source offset into .vss + */ + +void RawStr4::doLinkEntry(const char *destkey, const char *srckey) { + char *text = new char [ strlen(destkey) + 7 ]; + sprintf(text, "@LINK %s", destkey); + doSetText(srckey, text); + delete [] text; +} + + +/****************************************************************************** + * RawLD::CreateModule - Creates new module files + * + * ENT: path - directory to store module files + * RET: error status + */ + +signed char RawStr4::createModule(const char *ipath) +{ + char *path = 0; + char *buf = new char [ strlen (ipath) + 20 ]; + FileDesc *fd, *fd2; + + stdstr(&path, ipath); + + if ((path[strlen(path)-1] == '/') || (path[strlen(path)-1] == '\\')) + path[strlen(path)-1] = 0; + + sprintf(buf, "%s.dat", path); + FileMgr::removeFile(buf); + fd = FileMgr::getSystemFileMgr()->open(buf, FileMgr::CREAT|FileMgr::WRONLY, FileMgr::IREAD|FileMgr::IWRITE); + fd->getFd(); + FileMgr::getSystemFileMgr()->close(fd); + + sprintf(buf, "%s.idx", path); + FileMgr::removeFile(buf); + fd2 = FileMgr::getSystemFileMgr()->open(buf, FileMgr::CREAT|FileMgr::WRONLY, FileMgr::IREAD|FileMgr::IWRITE); + fd2->getFd(); + FileMgr::getSystemFileMgr()->close(fd2); + + delete [] path; + + return 0; +} + +SWORD_NAMESPACE_END diff --git a/src/modules/common/rawverse.cpp b/src/modules/common/rawverse.cpp new file mode 100644 index 0000000..5527d38 --- /dev/null +++ b/src/modules/common/rawverse.cpp @@ -0,0 +1,311 @@ +/****************************************************************************** + * + * rawverse.cpp - code for class 'RawVerse'- a module that reads raw text + * files: ot and nt using indexs ??.bks ??.cps ??.vss + * and provides lookup and parsing functions based on + * class VerseKey + * + * + * Copyright 1997-2013 CrossWire Bible Society (http://www.crosswire.org) + * CrossWire Bible Society + * P. O. Box 2528 + * Tempe, AZ 85280-2528 + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation version 2. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + */ + + + +#include <ctype.h> +#include <stdio.h> +#include <fcntl.h> +#include <errno.h> + +#include <utilstr.h> +#include <rawverse.h> +#include <versekey.h> +#include <sysdata.h> +#include <filemgr.h> +#include <swbuf.h> + + +SWORD_NAMESPACE_START + +/****************************************************************************** + * RawVerse Statics + */ + +int RawVerse::instance = 0; +const char *RawVerse::nl = "\r\n"; + + +/****************************************************************************** + * RawVerse Constructor - Initializes data for instance of RawVerse + * + * ENT: ipath - path of the directory where data and index files are located. + * be sure to include the trailing separator (e.g. '/' or '\') + * (e.g. 'modules/texts/rawtext/webster/') + */ + +RawVerse::RawVerse(const char *ipath, int fileMode) +{ + SWBuf buf; + + path = 0; + stdstr(&path, ipath); + + if ((path[strlen(path)-1] == '/') || (path[strlen(path)-1] == '\\')) + path[strlen(path)-1] = 0; + + if (fileMode == -1) { // try read/write if possible + fileMode = FileMgr::RDWR; + } + + buf.setFormatted("%s/ot.vss", path); + idxfp[0] = FileMgr::getSystemFileMgr()->open(buf, fileMode, true); + + buf.setFormatted("%s/nt.vss", path); + idxfp[1] = FileMgr::getSystemFileMgr()->open(buf, fileMode, true); + + buf.setFormatted("%s/ot", path); + textfp[0] = FileMgr::getSystemFileMgr()->open(buf, fileMode, true); + + buf.setFormatted("%s/nt", path); + textfp[1] = FileMgr::getSystemFileMgr()->open(buf, fileMode, true); + + instance++; +} + + +/****************************************************************************** + * RawVerse Destructor - Cleans up instance of RawVerse + */ + +RawVerse::~RawVerse() +{ + int loop1; + + if (path) + delete [] path; + + --instance; + + for (loop1 = 0; loop1 < 2; loop1++) { + FileMgr::getSystemFileMgr()->close(idxfp[loop1]); + FileMgr::getSystemFileMgr()->close(textfp[loop1]); + } +} + + +/****************************************************************************** + * RawVerse::findoffset - Finds the offset of the key verse from the indexes + * + * ENT: testmt - testament to find (0 - Bible/module introduction) + * idxoff - offset into .vss + * start - address to store the starting offset + * size - address to store the size of the entry + */ + +void RawVerse::findOffset(char testmt, long idxoff, long *start, unsigned short *size) const { + idxoff *= 6; + if (!testmt) + testmt = ((idxfp[1]) ? 1:2); + + if (idxfp[testmt-1]->getFd() >= 0) { + idxfp[testmt-1]->seek(idxoff, SEEK_SET); + __s32 tmpStart; + __u16 tmpSize; + idxfp[testmt-1]->read(&tmpStart, 4); + long len = idxfp[testmt-1]->read(&tmpSize, 2); // read size + + *start = swordtoarch32(tmpStart); + *size = swordtoarch16(tmpSize); + + if (len < 2) { + *size = (unsigned short)((*start) ? (textfp[testmt-1]->seek(0, SEEK_END) - (long)*start) : 0); // if for some reason we get an error reading size, make size to end of file + } + } + else { + *start = 0; + *size = 0; + } +} + + +/****************************************************************************** + * RawVerse::readtext - gets text at a given offset + * + * ENT: testmt - testament file to search in (0 - Old; 1 - New) + * start - starting offset where the text is located in the file + * size - size of text entry + 2 (null)(null) + * buf - buffer to store text + * + */ + +void RawVerse::readText(char testmt, long start, unsigned short size, SWBuf &buf) const { + buf = ""; + buf.setFillByte(0); + buf.setSize(size + 1); + if (!testmt) + testmt = ((idxfp[1]) ? 1:2); + if (size) { + if (textfp[testmt-1]->getFd() >= 0) { + textfp[testmt-1]->seek(start, SEEK_SET); + textfp[testmt-1]->read(buf.getRawData(), (int)size); + } + } +} + + +/****************************************************************************** + * RawVerse::settext - Sets text for current offset + * + * ENT: testmt - testament to find (0 - Bible/module introduction) + * idxoff - offset into .vss + * buf - buffer to store + * len - length of buffer (0 - null terminated) + */ + +void RawVerse::doSetText(char testmt, long idxoff, const char *buf, long len) +{ + __s32 start; + __u16 size; + + idxoff *= 6; + if (!testmt) + testmt = ((idxfp[1]) ? 1:2); + + size = (len < 0) ? strlen(buf) : len; + + start = textfp[testmt-1]->seek(0, SEEK_END); + idxfp[testmt-1]->seek(idxoff, SEEK_SET); + + if (size) { + textfp[testmt-1]->seek(start, SEEK_SET); + textfp[testmt-1]->write(buf, (int)size); + + // add a new line to make data file easier to read in an editor + textfp[testmt-1]->write(nl, 2); + } + else { + start = 0; + } + + start = archtosword32(start); + size = archtosword16(size); + + idxfp[testmt-1]->write(&start, 4); + idxfp[testmt-1]->write(&size, 2); +} + + +/****************************************************************************** + * RawVerse::linkentry - links one entry to another + * + * ENT: testmt - testament to find (0 - Bible/module introduction) + * destidxoff - dest offset into .vss + * srcidxoff - source offset into .vss + */ + +void RawVerse::doLinkEntry(char testmt, long destidxoff, long srcidxoff) { + __s32 start; + __u16 size; + + destidxoff *= 6; + srcidxoff *= 6; + + if (!testmt) + testmt = ((idxfp[1]) ? 1:2); + + // get source + idxfp[testmt-1]->seek(srcidxoff, SEEK_SET); + idxfp[testmt-1]->read(&start, 4); + idxfp[testmt-1]->read(&size, 2); + + // write dest + idxfp[testmt-1]->seek(destidxoff, SEEK_SET); + idxfp[testmt-1]->write(&start, 4); + idxfp[testmt-1]->write(&size, 2); +} + + +/****************************************************************************** + * RawVerse::createModule - Creates new module files + * + * ENT: path - directory to store module files + * RET: error status + */ + +char RawVerse::createModule(const char *ipath, const char *v11n) +{ + char *path = 0; + char *buf = new char [ strlen (ipath) + 20 ]; + FileDesc *fd, *fd2; + + stdstr(&path, ipath); + + if ((path[strlen(path)-1] == '/') || (path[strlen(path)-1] == '\\')) + path[strlen(path)-1] = 0; + + sprintf(buf, "%s/ot", path); + FileMgr::removeFile(buf); + fd = FileMgr::getSystemFileMgr()->open(buf, FileMgr::CREAT|FileMgr::WRONLY, FileMgr::IREAD|FileMgr::IWRITE); + fd->getFd(); + FileMgr::getSystemFileMgr()->close(fd); + + sprintf(buf, "%s/nt", path); + FileMgr::removeFile(buf); + fd = FileMgr::getSystemFileMgr()->open(buf, FileMgr::CREAT|FileMgr::WRONLY, FileMgr::IREAD|FileMgr::IWRITE); + fd->getFd(); + FileMgr::getSystemFileMgr()->close(fd); + + sprintf(buf, "%s/ot.vss", path); + FileMgr::removeFile(buf); + fd = FileMgr::getSystemFileMgr()->open(buf, FileMgr::CREAT|FileMgr::WRONLY, FileMgr::IREAD|FileMgr::IWRITE); + fd->getFd(); + + sprintf(buf, "%s/nt.vss", path); + FileMgr::removeFile(buf); + fd2 = FileMgr::getSystemFileMgr()->open(buf, FileMgr::CREAT|FileMgr::WRONLY, FileMgr::IREAD|FileMgr::IWRITE); + fd2->getFd(); + + VerseKey vk; + vk.setVersificationSystem(v11n); + vk.setIntros(1); + + __s32 offset = 0; + __u16 size = 0; + offset = archtosword32(offset); + size = archtosword16(size); + + for (vk = TOP; !vk.popError(); vk++) { + if (vk.getTestament() < 2) { + fd->write(&offset, 4); + fd->write(&size, 2); + } + else { + fd2->write(&offset, 4); + fd2->write(&size, 2); + } + } + fd2->write(&offset, 4); + fd2->write(&size, 2); + + FileMgr::getSystemFileMgr()->close(fd); + FileMgr::getSystemFileMgr()->close(fd2); + + delete [] path; + delete [] buf; + + return 0; +} + +SWORD_NAMESPACE_END diff --git a/src/modules/common/rawverse4.cpp b/src/modules/common/rawverse4.cpp new file mode 100644 index 0000000..b87ea0d --- /dev/null +++ b/src/modules/common/rawverse4.cpp @@ -0,0 +1,312 @@ +/****************************************************************************** + * + * rawverse4.cpp - code for class 'RawVerse4'- a module that reads raw + * text files: + * ot and nt using indexs ??.bks ??.cps ??.vss + * and provides lookup and parsing functions based on + * class VerseKey + * + * $Id: rawverse4.cpp 2833 2013-06-29 06:40:28Z chrislit $ + * + * Copyright 2007-2013 CrossWire Bible Society (http://www.crosswire.org) + * CrossWire Bible Society + * P. O. Box 2528 + * Tempe, AZ 85280-2528 + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation version 2. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + */ + + + +#include <ctype.h> +#include <stdio.h> +#include <fcntl.h> +#include <errno.h> + +#include <utilstr.h> +#include <rawverse4.h> +#include <versekey.h> +#include <sysdata.h> +#include <filemgr.h> +#include <swbuf.h> + + +SWORD_NAMESPACE_START + +/****************************************************************************** + * RawVerse4 Statics + */ + +int RawVerse4::instance = 0; +const char *RawVerse4::nl = "\r\n"; + + +/****************************************************************************** + * RawVerse4 Constructor - Initializes data for instance of RawVerse4 + * + * ENT: ipath - path of the directory where data and index files are located. + * be sure to include the trailing separator (e.g. '/' or '\') + * (e.g. 'modules/texts/rawtext/webster/') + */ + +RawVerse4::RawVerse4(const char *ipath, int fileMode) +{ + SWBuf buf; + + path = 0; + stdstr(&path, ipath); + + if ((path[strlen(path)-1] == '/') || (path[strlen(path)-1] == '\\')) + path[strlen(path)-1] = 0; + + if (fileMode == -1) { // try read/write if possible + fileMode = FileMgr::RDWR; + } + + buf.setFormatted("%s/ot.vss", path); + idxfp[0] = FileMgr::getSystemFileMgr()->open(buf, fileMode, true); + + buf.setFormatted("%s/nt.vss", path); + idxfp[1] = FileMgr::getSystemFileMgr()->open(buf, fileMode, true); + + buf.setFormatted("%s/ot", path); + textfp[0] = FileMgr::getSystemFileMgr()->open(buf, fileMode, true); + + buf.setFormatted("%s/nt", path); + textfp[1] = FileMgr::getSystemFileMgr()->open(buf, fileMode, true); + + instance++; +} + + +/****************************************************************************** + * RawVerse4 Destructor - Cleans up instance of RawVerse4 + */ + +RawVerse4::~RawVerse4() +{ + int loop1; + + if (path) + delete [] path; + + --instance; + + for (loop1 = 0; loop1 < 2; loop1++) { + FileMgr::getSystemFileMgr()->close(idxfp[loop1]); + FileMgr::getSystemFileMgr()->close(textfp[loop1]); + } +} + + +/****************************************************************************** + * RawVerse4::findoffset - Finds the offset of the key verse from the indexes + * + * ENT: testmt - testament to find (0 - Bible/module introduction) + * idxoff - offset into .vss + * start - address to store the starting offset + * size - address to store the size of the entry + */ + +void RawVerse4::findOffset(char testmt, long idxoff, long *start, unsigned long *size) const { + idxoff *= 8; + if (!testmt) + testmt = ((idxfp[1]) ? 1:2); + + if (idxfp[testmt-1]->getFd() >= 0) { + idxfp[testmt-1]->seek(idxoff, SEEK_SET); + __u32 tmpStart; + __u32 tmpSize; + idxfp[testmt-1]->read(&tmpStart, 4); + long len = idxfp[testmt-1]->read(&tmpSize, 4); // read size + + *start = swordtoarch32(tmpStart); + *size = swordtoarch32(tmpSize); + + if (len < 2) { + *size = (unsigned long)((*start) ? (textfp[testmt-1]->seek(0, SEEK_END) - (long)*start) : 0); // if for some reason we get an error reading size, make size to end of file + } + } + else { + *start = 0; + *size = 0; + } +} + + +/****************************************************************************** + * RawVerse4::readtext - gets text at a given offset + * + * ENT: testmt - testament file to search in (0 - Old; 1 - New) + * start - starting offset where the text is located in the file + * size - size of text entry + 2 (null)(null) + * buf - buffer to store text + * + */ + +void RawVerse4::readText(char testmt, long start, unsigned long size, SWBuf &buf) const { + buf = ""; + buf.setFillByte(0); + buf.setSize(size + 1); + if (!testmt) + testmt = ((idxfp[1]) ? 1:2); + if (size) { + if (textfp[testmt-1]->getFd() >= 0) { + textfp[testmt-1]->seek(start, SEEK_SET); + textfp[testmt-1]->read(buf.getRawData(), (int)size); + } + } +} + + +/****************************************************************************** + * RawVerse4::settext - Sets text for current offset + * + * ENT: testmt - testament to find (0 - Bible/module introduction) + * idxoff - offset into .vss + * buf - buffer to store + * len - length of buffer (0 - null terminated) + */ + +void RawVerse4::doSetText(char testmt, long idxoff, const char *buf, long len) +{ + __u32 start; + __u32 size; + + idxoff *= 8; + if (!testmt) + testmt = ((idxfp[1]) ? 1:2); + + size = (len < 0) ? strlen(buf) : len; + + start = textfp[testmt-1]->seek(0, SEEK_END); + idxfp[testmt-1]->seek(idxoff, SEEK_SET); + + if (size) { + textfp[testmt-1]->seek(start, SEEK_SET); + textfp[testmt-1]->write(buf, (int)size); + + // add a new line to make data file easier to read in an editor + textfp[testmt-1]->write(nl, 2); + } + else { + start = 0; + } + + start = archtosword32(start); + size = archtosword32(size); + + idxfp[testmt-1]->write(&start, 4); + idxfp[testmt-1]->write(&size, 4); +} + + +/****************************************************************************** + * RawVerse4::linkentry - links one entry to another + * + * ENT: testmt - testament to find (0 - Bible/module introduction) + * destidxoff - dest offset into .vss + * srcidxoff - source offset into .vss + */ + +void RawVerse4::doLinkEntry(char testmt, long destidxoff, long srcidxoff) { + __u32 start; + __u32 size; + + destidxoff *= 8; + srcidxoff *= 8; + + if (!testmt) + testmt = ((idxfp[1]) ? 1:2); + + // get source + idxfp[testmt-1]->seek(srcidxoff, SEEK_SET); + idxfp[testmt-1]->read(&start, 4); + idxfp[testmt-1]->read(&size, 4); + + // write dest + idxfp[testmt-1]->seek(destidxoff, SEEK_SET); + idxfp[testmt-1]->write(&start, 4); + idxfp[testmt-1]->write(&size, 4); +} + + +/****************************************************************************** + * RawVerse4::CreateModule - Creates new module files + * + * ENT: path - directory to store module files + * RET: error status + */ + +char RawVerse4::createModule(const char *ipath, const char *v11n) +{ + char *path = 0; + char *buf = new char [ strlen (ipath) + 20 ]; + FileDesc *fd, *fd2; + + stdstr(&path, ipath); + + if ((path[strlen(path)-1] == '/') || (path[strlen(path)-1] == '\\')) + path[strlen(path)-1] = 0; + + sprintf(buf, "%s/ot", path); + FileMgr::removeFile(buf); + fd = FileMgr::getSystemFileMgr()->open(buf, FileMgr::CREAT|FileMgr::WRONLY, FileMgr::IREAD|FileMgr::IWRITE); + fd->getFd(); + FileMgr::getSystemFileMgr()->close(fd); + + sprintf(buf, "%s/nt", path); + FileMgr::removeFile(buf); + fd = FileMgr::getSystemFileMgr()->open(buf, FileMgr::CREAT|FileMgr::WRONLY, FileMgr::IREAD|FileMgr::IWRITE); + fd->getFd(); + FileMgr::getSystemFileMgr()->close(fd); + + sprintf(buf, "%s/ot.vss", path); + FileMgr::removeFile(buf); + fd = FileMgr::getSystemFileMgr()->open(buf, FileMgr::CREAT|FileMgr::WRONLY, FileMgr::IREAD|FileMgr::IWRITE); + fd->getFd(); + + sprintf(buf, "%s/nt.vss", path); + FileMgr::removeFile(buf); + fd2 = FileMgr::getSystemFileMgr()->open(buf, FileMgr::CREAT|FileMgr::WRONLY, FileMgr::IREAD|FileMgr::IWRITE); + fd2->getFd(); + + VerseKey vk; + vk.setVersificationSystem(v11n); + vk.setIntros(1); + __u32 offset = 0; + __u32 size = 0; + offset = archtosword32(offset); + size = archtosword32(size); + + for (vk = TOP; !vk.popError(); vk++) { + if (vk.getTestament() < 2) { + fd->write(&offset, 4); + fd->write(&size, 4); + } + else { + fd2->write(&offset, 4); + fd2->write(&size, 4); + } + } + fd2->write(&offset, 4); + fd2->write(&size, 4); + + FileMgr::getSystemFileMgr()->close(fd); + FileMgr::getSystemFileMgr()->close(fd2); + + delete [] path; + delete [] buf; + + return 0; +} + +SWORD_NAMESPACE_END diff --git a/src/modules/common/sapphire.cpp b/src/modules/common/sapphire.cpp new file mode 100644 index 0000000..8cc3e15 --- /dev/null +++ b/src/modules/common/sapphire.cpp @@ -0,0 +1,236 @@ +/****************************************************************************** + * + * sapphire.cpp - the Saphire II stream cipher class + * + * $Id: sapphire.cpp 2833 2013-06-29 06:40:28Z chrislit $ + * + * Copyright 1999-2013 CrossWire Bible Society (http://www.crosswire.org) + * CrossWire Bible Society + * P. O. Box 2528 + * Tempe, AZ 85280-2528 + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation version 2. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + */ + +/****************************************************************************** + * + * Original license notice & credits: + * Dedicated to the Public Domain the author and inventor: + * (Michael Paul Johnson). This code comes with no warranty. + * Use it at your own risk. + * Ported from the Pascal implementation of the Sapphire Stream + * Cipher 9 December 1994. + * Added hash pre- and post-processing 27 December 1994. + * Modified initialization to make index variables key dependent, + * made the output function more resistant to cryptanalysis, + * and renamed to Sapphire II 2 January 1995 + * + */ + +#include <string.h> + +#include "sapphire.h" + +SWORD_NAMESPACE_START + +unsigned char sapphire::keyrand(int limit, + unsigned char *user_key, + unsigned char keysize, + unsigned char *rsum, + unsigned *keypos) + { + unsigned u, // Value from 0 to limit to return. + retry_limiter, // No infinite loops allowed. + mask; // Select just enough bits. + + if (!limit) return 0; // Avoid divide by zero error. + retry_limiter = 0; + mask = 1; // Fill mask with enough bits to cover + while (mask < (unsigned)limit) // the desired range. + mask = (mask << 1) + 1; + do + { + *rsum = cards[*rsum] + user_key[(*keypos)++]; + if (*keypos >= keysize) + { + *keypos = 0; // Recycle the user key. + *rsum += keysize; // key "aaaa" != key "aaaaaaaa" + } + u = mask & *rsum; + if (++retry_limiter > 11) + u %= limit; // Prevent very rare long loops. + } + while (u > (unsigned)limit); + return u; + } + +void sapphire::initialize(unsigned char *key, unsigned char keysize) + { + // Key size may be up to 256 bytes. + // Pass phrases may be used directly, with longer length + // compensating for the low entropy expected in such keys. + // Alternatively, shorter keys hashed from a pass phrase or + // generated randomly may be used. For random keys, lengths + // of from 4 to 16 bytes are recommended, depending on how + // secure you want this to be. + + int i; + unsigned char toswap, swaptemp, rsum; + unsigned keypos; + + // If we have been given no key, assume the default hash setup. + + if (keysize < 1) + { + hash_init(); + return; + } + + // Start with cards all in order, one of each. + + for (i=0;i<256;i++) + cards[i] = i; + + // Swap the card at each position with some other card. + + toswap = 0; + keypos = 0; // Start with first byte of user key. + rsum = 0; + for (i=255;i>=0;i--) + { + toswap = keyrand(i, key, keysize, &rsum, &keypos); + swaptemp = cards[i]; + cards[i] = cards[toswap]; + cards[toswap] = swaptemp; + } + + // Initialize the indices and data dependencies. + // Indices are set to different values instead of all 0 + // to reduce what is known about the state of the cards + // when the first byte is emitted. + + rotor = cards[1]; + ratchet = cards[3]; + avalanche = cards[5]; + last_plain = cards[7]; + last_cipher = cards[rsum]; + + toswap = swaptemp = rsum = 0; + keypos = 0; + } + +void sapphire::hash_init(void) + { + // This function is used to initialize non-keyed hash + // computation. + + int i, j; + + // Initialize the indices and data dependencies. + + rotor = 1; + ratchet = 3; + avalanche = 5; + last_plain = 7; + last_cipher = 11; + + // Start with cards all in inverse order. + + for (i=0, j=255;i<256;i++,j--) + cards[i] = (unsigned char) j; + } + +sapphire::sapphire(unsigned char *key, unsigned char keysize) + { + if (key && keysize) + initialize(key, keysize); + } + +void sapphire::burn(void) + { + // Destroy the key and state information in RAM. + memset(cards, 0, 256); + rotor = ratchet = avalanche = last_plain = last_cipher = 0; + } + +sapphire::~sapphire() + { + burn(); + } + +unsigned char sapphire::encrypt(unsigned char b) + { + // Picture a single enigma rotor with 256 positions, rewired + // on the fly by card-shuffling. + + // This cipher is a variant of one invented and written + // by Michael Paul Johnson in November, 1993. + + unsigned char swaptemp; + + // Shuffle the deck a little more. + + ratchet += cards[rotor++]; + swaptemp = cards[last_cipher]; + cards[last_cipher] = cards[ratchet]; + cards[ratchet] = cards[last_plain]; + cards[last_plain] = cards[rotor]; + cards[rotor] = swaptemp; + avalanche += cards[swaptemp]; + + // Output one byte from the state in such a way as to make it + // very hard to figure out which one you are looking at. + + last_cipher = b^cards[(cards[ratchet] + cards[rotor]) & 0xFF] ^ + cards[cards[(cards[last_plain] + + cards[last_cipher] + + cards[avalanche])&0xFF]]; + last_plain = b; + return last_cipher; + } + +unsigned char sapphire::decrypt(unsigned char b) + { + unsigned char swaptemp; + + // Shuffle the deck a little more. + + ratchet += cards[rotor++]; + swaptemp = cards[last_cipher]; + cards[last_cipher] = cards[ratchet]; + cards[ratchet] = cards[last_plain]; + cards[last_plain] = cards[rotor]; + cards[rotor] = swaptemp; + avalanche += cards[swaptemp]; + + // Output one byte from the state in such a way as to make it + // very hard to figure out which one you are looking at. + + last_plain = b^cards[(cards[ratchet] + cards[rotor]) & 0xFF] ^ + cards[cards[(cards[last_plain] + + cards[last_cipher] + + cards[avalanche])&0xFF]]; + last_cipher = b; + return last_plain; + } + +void sapphire::hash_final(unsigned char *hash, // Destination + unsigned char hashlength) // Size of hash. + { + int i; + + for (i=255;i>=0;i--) + encrypt((unsigned char) i); + for (i=0;i<hashlength;i++) + hash[i] = encrypt(0); + } + +SWORD_NAMESPACE_END diff --git a/src/modules/common/swcipher.cpp b/src/modules/common/swcipher.cpp new file mode 100644 index 0000000..16279dc --- /dev/null +++ b/src/modules/common/swcipher.cpp @@ -0,0 +1,147 @@ +/****************************************************************************** + * + * swcipher.cpp - code for class 'SWCipher'- a driver class that + * provides cipher utilities + * + * $Id: swcipher.cpp 2833 2013-06-29 06:40:28Z chrislit $ + * + * Copyright 1999-2013 CrossWire Bible Society (http://www.crosswire.org) + * CrossWire Bible Society + * P. O. Box 2528 + * Tempe, AZ 85280-2528 + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation version 2. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + */ + + +#include <stdlib.h> +#include <string.h> +#include <swcipher.h> + +SWORD_NAMESPACE_START + +/****************************************************************************** + * SWCipher Constructor - Initializes data for instance of SWCipher + * + */ + +SWCipher::SWCipher(unsigned char *key) { + master.initialize(key, strlen((char *)key)); + buf = 0; +} + + +/****************************************************************************** + * SWCipher Destructor - Cleans up instance of SWCipher + */ + +SWCipher::~SWCipher() +{ + if (buf) + free(buf); +} + + +char *SWCipher::Buf(const char *ibuf, unsigned long ilen) +{ + if (ibuf) { + + if (buf) + free(buf); + + if (!ilen) { + len = strlen(buf); + ilen = len + 1; + } + else len = ilen; + + buf = (char *) malloc(ilen); + memcpy(buf, ibuf, ilen); + cipher = false; + } + + Decode(); + + return buf; +} + + +char *SWCipher::cipherBuf(unsigned long *ilen, const char *ibuf) +{ + if (ibuf) { + + if (buf) + free(buf); + + buf = (char *) malloc(*ilen+1); + memcpy(buf, ibuf, *ilen); + len = *ilen; + cipher = true; + } + + Encode(); + + *ilen = len; + return buf; +} + + +/****************************************************************************** + * SWCipher::Encode - This function "encodes" the input stream into the + * output stream. + * The GetChars() and SendChars() functions are + * used to separate this method from the actual + * i/o. + */ + +void SWCipher::Encode(void) +{ + if (!cipher) { + work = master; + for (unsigned long i = 0; i < len; i++) + buf[i] = work.encrypt(buf[i]); + cipher = true; + } +} + + +/****************************************************************************** + * SWCipher::Decode - This function "decodes" the input stream into the + * output stream. + * The GetChars() and SendChars() functions are + * used to separate this method from the actual + * i/o. + */ + +void SWCipher::Decode(void) +{ + if (cipher) { + work = master; + unsigned long i; + for (i = 0; i < len; i++) + buf[i] = work.decrypt(buf[i]); + buf[i] = 0; + cipher = false; + } +} + + +/****************************************************************************** + * SWCipher::setCipherKey - setter for a new CipherKey + * + */ + +void SWCipher::setCipherKey(const char *ikey) { + unsigned char *key = (unsigned char *)ikey; + master.initialize(key, strlen((char *)key)); +} + +SWORD_NAMESPACE_END diff --git a/src/modules/common/swcomprs.cpp b/src/modules/common/swcomprs.cpp new file mode 100644 index 0000000..9df8e7d --- /dev/null +++ b/src/modules/common/swcomprs.cpp @@ -0,0 +1,211 @@ +/****************************************************************************** + * + * swcomprs.cpp - a driver class that provides compression utilities + * + * $Id: swcomprs.cpp 2833 2013-06-29 06:40:28Z chrislit $ + * + * Copyright 1996-2013 CrossWire Bible Society (http://www.crosswire.org) + * CrossWire Bible Society + * P. O. Box 2528 + * Tempe, AZ 85280-2528 + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation version 2. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + */ + + +#include <stdlib.h> +#include <string.h> +#include <swcomprs.h> + +SWORD_NAMESPACE_START + +/****************************************************************************** + * SWCompress Constructor - Initializes data for instance of SWCompress + * + */ + +SWCompress::SWCompress() +{ + buf = zbuf = 0; + Init(); +} + + +/****************************************************************************** + * SWCompress Destructor - Cleans up instance of SWCompress + */ + +SWCompress::~SWCompress() +{ + if (zbuf) + free(zbuf); + + if (buf) + free(buf); +} + + +void SWCompress::Init() +{ + if (buf) + free(buf); + + if (zbuf) + free(zbuf); + + buf = 0; + zbuf = 0; + direct = 0; + zlen = 0; + slen = 0; + zpos = 0; + pos = 0; +} + + +char *SWCompress::Buf(const char *ibuf, unsigned long *len) { + // setting an uncompressed buffer + if (ibuf) { + Init(); + slen = (len) ? *len : strlen(ibuf); + buf = (char *) calloc(slen + 1, 1); + memcpy(buf, ibuf, slen); + } + + // getting an uncompressed buffer + if (!buf) { + buf = (char *)calloc(1,1); // be sure we at least allocate an empty buf for return; + direct = 1; + Decode(); +// slen = strlen(buf); + if (len) + *len = slen; + } + return buf; +} + + +char *SWCompress::zBuf(unsigned long *len, char *ibuf) +{ + // setting a compressed buffer + if (ibuf) { + Init(); + zbuf = (char *) malloc(*len); + memcpy(zbuf, ibuf, *len); + zlen = *len; + } + + // getting a compressed buffer + if (!zbuf) { + direct = 0; + Encode(); + } + + *len = zlen; + return zbuf; +} + + +unsigned long SWCompress::GetChars(char *ibuf, unsigned long len) +{ + if (direct) { + len = (((zlen - zpos) > (unsigned)len) ? len : zlen - zpos); + if (len > 0) { + memmove(ibuf, &zbuf[zpos], len); + zpos += len; + } + } + else { +// slen = strlen(buf); + len = (((slen - pos) > (unsigned)len) ? len : slen - pos); + if (len > 0) { + memmove(ibuf, &buf[pos], len); + pos += len; + } + } + return len; +} + + +unsigned long SWCompress::SendChars(char *ibuf, unsigned long len) +{ + if (direct) { + if (buf) { +// slen = strlen(buf); + if ((pos + len) > (unsigned)slen) { + buf = (char *) realloc(buf, pos + len + 1024); + memset(&buf[pos], 0, len + 1024); + } + } + else buf = (char *)calloc(1, len + 1024); + memmove(&buf[pos], ibuf, len); + pos += len; + } + else { + if (zbuf) { + if ((zpos + len) > zlen) { + zbuf = (char *) realloc(zbuf, zpos + len + 1024); + zlen = zpos + len + 1024; + } + } + else { + zbuf = (char *)calloc(1, len + 1024); + zlen = len + 1024; + } + memmove(&zbuf[zpos], ibuf, len); + zpos += len; + } + return len; +} + + +/****************************************************************************** + * SWCompress::Encode - This function "encodes" the input stream into the + * output stream. + * The GetChars() and SendChars() functions are + * used to separate this method from the actual + * i/o. + */ + +void SWCompress::Encode(void) +{ + cycleStream(); +} + + +/****************************************************************************** + * SWCompress::Decode - This function "decodes" the input stream into the + * output stream. + * The GetChars() and SendChars() functions are + * used to separate this method from the actual + * i/o. + */ + +void SWCompress::Decode(void) +{ + cycleStream(); +} + + +void SWCompress::cycleStream() { + char buf[1024]; + unsigned long len, totlen = 0; + + do { + len = GetChars(buf, 1024); + if (len) + totlen += SendChars(buf, len); + } while (len == 1024); + + zlen = slen = totlen; +} + +SWORD_NAMESPACE_END diff --git a/src/modules/common/xzcomprs.cpp b/src/modules/common/xzcomprs.cpp new file mode 100644 index 0000000..db8a4a8 --- /dev/null +++ b/src/modules/common/xzcomprs.cpp @@ -0,0 +1,181 @@ +/****************************************************************************** + * + * xzcomprs.cpp - XzCompress, a driver class that provides xz (LZMA2) + * compression + * + * $Id: xzcomprs.cpp 2850 2013-07-02 09:57:20Z chrislit $ + * + * Copyright 2013 CrossWire Bible Society (http://www.crosswire.org) + * CrossWire Bible Society + * P. O. Box 2528 + * Tempe, AZ 85280-2528 + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation version 2. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + */ + + +#include <stdlib.h> +#include <string.h> +#include <stdio.h> +#include <xzcomprs.h> +#include <zlib.h> + +SWORD_NAMESPACE_START + +/****************************************************************************** + * XzCompress Constructor - Initializes data for instance of XzCompress + * + */ + +XzCompress::XzCompress() : SWCompress() { +} + + +/****************************************************************************** + * XzCompress Destructor - Cleans up instance of XzCompress + */ + +XzCompress::~XzCompress() { +} + + +/****************************************************************************** + * XzCompress::Encode - This function "encodes" the input stream into the + * output stream. + * The GetChars() and SendChars() functions are + * used to separate this method from the actual + * i/o. + * NOTE: must set zlen for parent class to know length of + * compressed buffer. + */ + +void XzCompress::Encode(void) +{ +/* +ZEXTERN int ZEXPORT compress OF((Bytef *dest, uLongf *destLen, + const Bytef *source, uLong sourceLen)); + Compresses the source buffer into the destination buffer. sourceLen is + the byte length of the source buffer. Upon entry, destLen is the total + size of the destination buffer, which must be at least 0.1% larger than + sourceLen plus 12 bytes. Upon exit, destLen is the actual size of the + compressed buffer. + This function can be used to compress a whole file at once if the + input file is mmap'ed. + compress returns Z_OK if success, Z_MEM_ERROR if there was not + enough memory, Z_BUF_ERROR if there was not enough room in the output + buffer. +*/ + direct = 0; // set direction needed by parent [Get|Send]Chars() + + // get buffer + char chunk[1024]; + char *buf = (char *)calloc(1, 1024); + char *chunkbuf = buf; + unsigned long chunklen; + unsigned long len = 0; + while((chunklen = GetChars(chunk, 1023))) { + memcpy(chunkbuf, chunk, chunklen); + len += chunklen; + if (chunklen < 1023) + break; + else buf = (char *)realloc(buf, len + 1024); + chunkbuf = buf+len; + } + + + zlen = (long) (len*1.001)+15; + char *zbuf = new char[zlen+1]; + if (len) + { + //printf("Doing compress\n"); + if (compress((Bytef*)zbuf, &zlen, (const Bytef*)buf, len) != Z_OK) + { + printf("ERROR in compression\n"); + } + else { + SendChars(zbuf, zlen); + } + } + else + { + fprintf(stderr, "ERROR: no buffer to compress\n"); + } + delete [] zbuf; + free (buf); +} + + +/****************************************************************************** + * XzCompress::Decode - This function "decodes" the input stream into the + * output stream. + * The GetChars() and SendChars() functions are + * used to separate this method from the actual + * i/o. + */ + +void XzCompress::Decode(void) +{ +/* +ZEXTERN int ZEXPORT uncompress OF((Bytef *dest, uLongf *destLen, + const Bytef *source, uLong sourceLen)); + Decompresses the source buffer into the destination buffer. sourceLen is + the byte length of the source buffer. Upon entry, destLen is the total + size of the destination buffer, which must be large enough to hold the + entire uncompressed data. (The size of the uncompressed data must have + been saved previously by the compressor and transmitted to the decompressor + by some mechanism outside the scope of this compression library.) + Upon exit, destLen is the actual size of the compressed buffer. + This function can be used to decompress a whole file at once if the + input file is mmap'ed. + + uncompress returns Z_OK if success, Z_MEM_ERROR if there was not + enough memory, Z_BUF_ERROR if there was not enough room in the output + buffer, or Z_DATA_ERROR if the input data was corrupted. +*/ + + // get buffer + char chunk[1024]; + char *zbuf = (char *)calloc(1, 1024); + char *chunkbuf = zbuf; + int chunklen; + unsigned long zlen = 0; + while((chunklen = GetChars(chunk, 1023))) { + memcpy(chunkbuf, chunk, chunklen); + zlen += chunklen; + if (chunklen < 1023) + break; + else zbuf = (char *)realloc(zbuf, zlen + 1024); + chunkbuf = zbuf + zlen; + } + + //printf("Decoding complength{%ld} uncomp{%ld}\n", zlen, blen); + if (zlen) { + unsigned long blen = zlen*20; // trust compression is less than 1000% + char *buf = new char[blen]; + //printf("Doing decompress {%s}\n", zbuf); + slen = 0; + switch (uncompress((Bytef*)buf, &blen, (Bytef*)zbuf, zlen)){ + case Z_OK: SendChars(buf, blen); slen = blen; break; + case Z_MEM_ERROR: fprintf(stderr, "ERROR: not enough memory during decompression.\n"); break; + case Z_BUF_ERROR: fprintf(stderr, "ERROR: not enough room in the out buffer during decompression.\n"); break; + case Z_DATA_ERROR: fprintf(stderr, "ERROR: corrupt data during decompression.\n"); break; + default: fprintf(stderr, "ERROR: an unknown error occured during decompression.\n"); break; + } + delete [] buf; + } + else { + fprintf(stderr, "ERROR: no buffer to decompress!\n"); + } + //printf("Finished decoding\n"); + free (zbuf); +} + +SWORD_NAMESPACE_END diff --git a/src/modules/common/zipcomprs.cpp b/src/modules/common/zipcomprs.cpp new file mode 100644 index 0000000..3e44abd --- /dev/null +++ b/src/modules/common/zipcomprs.cpp @@ -0,0 +1,183 @@ +/****************************************************************************** + * + * zipcomprs.cpp - ZipCompress, a driver class that provides zlib + * compression + * + * $Id: zipcomprs.cpp 2833 2013-06-29 06:40:28Z chrislit $ + * + * Copyright 2000-2013 CrossWire Bible Society (http://www.crosswire.org) + * CrossWire Bible Society + * P. O. Box 2528 + * Tempe, AZ 85280-2528 + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation version 2. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + */ + + +#include <stdlib.h> +#include <string.h> +#include <stdio.h> +#include <zipcomprs.h> +#include <zlib.h> + +SWORD_NAMESPACE_START + +/****************************************************************************** + * ZipCompress Constructor - Initializes data for instance of ZipCompress + * + */ + +ZipCompress::ZipCompress() : SWCompress() +{ +// fprintf(stderr, "init compress\n"); +} + + +/****************************************************************************** + * ZipCompress Destructor - Cleans up instance of ZipCompress + */ + +ZipCompress::~ZipCompress() { +} + + +/****************************************************************************** + * ZipCompress::Encode - This function "encodes" the input stream into the + * output stream. + * The GetChars() and SendChars() functions are + * used to separate this method from the actual + * i/o. + * NOTE: must set zlen for parent class to know length of + * compressed buffer. + */ + +void ZipCompress::Encode(void) +{ +/* +ZEXTERN int ZEXPORT compress OF((Bytef *dest, uLongf *destLen, + const Bytef *source, uLong sourceLen)); + Compresses the source buffer into the destination buffer. sourceLen is + the byte length of the source buffer. Upon entry, destLen is the total + size of the destination buffer, which must be at least 0.1% larger than + sourceLen plus 12 bytes. Upon exit, destLen is the actual size of the + compressed buffer. + This function can be used to compress a whole file at once if the + input file is mmap'ed. + compress returns Z_OK if success, Z_MEM_ERROR if there was not + enough memory, Z_BUF_ERROR if there was not enough room in the output + buffer. +*/ + direct = 0; // set direction needed by parent [Get|Send]Chars() + + // get buffer + char chunk[1024]; + char *buf = (char *)calloc(1, 1024); + char *chunkbuf = buf; + unsigned long chunklen; + unsigned long len = 0; + while((chunklen = GetChars(chunk, 1023))) { + memcpy(chunkbuf, chunk, chunklen); + len += chunklen; + if (chunklen < 1023) + break; + else buf = (char *)realloc(buf, len + 1024); + chunkbuf = buf+len; + } + + + zlen = (long) (len*1.001)+15; + char *zbuf = new char[zlen+1]; + if (len) + { + //printf("Doing compress\n"); + if (compress((Bytef*)zbuf, &zlen, (const Bytef*)buf, len) != Z_OK) + { + printf("ERROR in compression\n"); + } + else { + SendChars(zbuf, zlen); + } + } + else + { + fprintf(stderr, "ERROR: no buffer to compress\n"); + } + delete [] zbuf; + free (buf); +} + + +/****************************************************************************** + * ZipCompress::Decode - This function "decodes" the input stream into the + * output stream. + * The GetChars() and SendChars() functions are + * used to separate this method from the actual + * i/o. + */ + +void ZipCompress::Decode(void) +{ +/* +ZEXTERN int ZEXPORT uncompress OF((Bytef *dest, uLongf *destLen, + const Bytef *source, uLong sourceLen)); + Decompresses the source buffer into the destination buffer. sourceLen is + the byte length of the source buffer. Upon entry, destLen is the total + size of the destination buffer, which must be large enough to hold the + entire uncompressed data. (The size of the uncompressed data must have + been saved previously by the compressor and transmitted to the decompressor + by some mechanism outside the scope of this compression library.) + Upon exit, destLen is the actual size of the compressed buffer. + This function can be used to decompress a whole file at once if the + input file is mmap'ed. + + uncompress returns Z_OK if success, Z_MEM_ERROR if there was not + enough memory, Z_BUF_ERROR if there was not enough room in the output + buffer, or Z_DATA_ERROR if the input data was corrupted. +*/ + + // get buffer + char chunk[1024]; + char *zbuf = (char *)calloc(1, 1024); + char *chunkbuf = zbuf; + int chunklen; + unsigned long zlen = 0; + while((chunklen = GetChars(chunk, 1023))) { + memcpy(chunkbuf, chunk, chunklen); + zlen += chunklen; + if (chunklen < 1023) + break; + else zbuf = (char *)realloc(zbuf, zlen + 1024); + chunkbuf = zbuf + zlen; + } + + //printf("Decoding complength{%ld} uncomp{%ld}\n", zlen, blen); + if (zlen) { + unsigned long blen = zlen*20; // trust compression is less than 1000% + char *buf = new char[blen]; + //printf("Doing decompress {%s}\n", zbuf); + slen = 0; + switch (uncompress((Bytef*)buf, &blen, (Bytef*)zbuf, zlen)){ + case Z_OK: SendChars(buf, blen); slen = blen; break; + case Z_MEM_ERROR: fprintf(stderr, "ERROR: not enough memory during decompression.\n"); break; + case Z_BUF_ERROR: fprintf(stderr, "ERROR: not enough room in the out buffer during decompression.\n"); break; + case Z_DATA_ERROR: fprintf(stderr, "ERROR: corrupt data during decompression.\n"); break; + default: fprintf(stderr, "ERROR: an unknown error occured during decompression.\n"); break; + } + delete [] buf; + } + else { + fprintf(stderr, "ERROR: no buffer to decompress!\n"); + } + //printf("Finished decoding\n"); + free (zbuf); +} + +SWORD_NAMESPACE_END diff --git a/src/modules/common/zstr.cpp b/src/modules/common/zstr.cpp new file mode 100644 index 0000000..a745502 --- /dev/null +++ b/src/modules/common/zstr.cpp @@ -0,0 +1,700 @@ +/****************************************************************************** + * + * zstr.cpp - code for class 'zStr'- a module that reads compressed text + * files and provides lookup and parsing functions based on + * class StrKey + * + * $Id: zstr.cpp 2980 2013-09-14 21:51:47Z scribe $ + * + * Copyright 2001-2013 CrossWire Bible Society (http://www.crosswire.org) + * CrossWire Bible Society + * P. O. Box 2528 + * Tempe, AZ 85280-2528 + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation version 2. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + */ + + +#include <stdio.h> +#include <fcntl.h> +#include <errno.h> + +#include <stdlib.h> +#include <utilstr.h> +#include <zstr.h> +#include <swcomprs.h> + +#include <sysdata.h> +#include <entriesblk.h> +#include <swlog.h> +#include <stringmgr.h> +#include <filemgr.h> +#include <swbuf.h> + +SWORD_NAMESPACE_START + +/****************************************************************************** + * zStr Statics + */ + +int zStr::instance = 0; +const int zStr::IDXENTRYSIZE = 8; +const int zStr::ZDXENTRYSIZE = 8; + + +/****************************************************************************** + * zStr Constructor - Initializes data for instance of zStr + * + * ENT: ipath - path of the directory where data and index files are located. + */ + +zStr::zStr(const char *ipath, int fileMode, long blockCount, SWCompress *icomp, bool caseSensitive) : caseSensitive(caseSensitive) +{ + SWBuf buf; + + lastoff = -1; + path = 0; + stdstr(&path, ipath); + + compressor = (icomp) ? icomp : new SWCompress(); + this->blockCount = blockCount; + + if (fileMode == -1) { // try read/write if possible + fileMode = FileMgr::RDWR; + } + + buf.setFormatted("%s.idx", path); + idxfd = FileMgr::getSystemFileMgr()->open(buf, fileMode, true); + + buf.setFormatted("%s.dat", path); + datfd = FileMgr::getSystemFileMgr()->open(buf, fileMode, true); + + buf.setFormatted("%s.zdx", path); + zdxfd = FileMgr::getSystemFileMgr()->open(buf, fileMode, true); + + buf.setFormatted("%s.zdt", path); + zdtfd = FileMgr::getSystemFileMgr()->open(buf, fileMode, true); + + if (datfd <= 0) { + SWLog::getSystemLog()->logError("%d", errno); + } + + cacheBlock = 0; + cacheBlockIndex = -1; + cacheDirty = false; + + instance++; +} + + +/****************************************************************************** + * zStr Destructor - Cleans up instance of zStr + */ + +zStr::~zStr() { + + flushCache(); + + if (path) + delete [] path; + + --instance; + + FileMgr::getSystemFileMgr()->close(idxfd); + FileMgr::getSystemFileMgr()->close(datfd); + FileMgr::getSystemFileMgr()->close(zdxfd); + FileMgr::getSystemFileMgr()->close(zdtfd); + + + if (compressor) + delete compressor; + +} + + +/****************************************************************************** + * zStr::getidxbufdat - Gets the index string at the given dat offset + * NOTE: buf is calloc'd, or if not null, realloc'd and must + * be free'd by calling function + * + * ENT: ioffset - offset in dat file to lookup + * buf - address of pointer to allocate for storage of string + */ + +void zStr::getKeyFromDatOffset(long ioffset, char **buf) const +{ + int size; + char ch; + if (datfd > 0) { + datfd->seek(ioffset, SEEK_SET); + for (size = 0; datfd->read(&ch, 1) == 1; size++) { + if ((ch == '\\') || (ch == 10) || (ch == 13)) + break; + } + *buf = (*buf) ? (char *)realloc(*buf, size*2 + 1) : (char *)malloc(size*2 + 1); + if (size) { + datfd->seek(ioffset, SEEK_SET); + datfd->read(*buf, size); + } + (*buf)[size] = 0; + if (!caseSensitive) toupperstr_utf8(*buf, size*2); + } + else { + *buf = (*buf) ? (char *)realloc(*buf, 1) : (char *)malloc(1); + **buf = 0; + } +} + + +/****************************************************************************** + * zStr::getidxbuf - Gets the index string at the given idx offset + * NOTE: buf is calloc'd, or if not null, realloc'd + * and must be freed by calling function + * + * ENT: ioffset - offset in idx file to lookup + * buf - address of pointer to allocate for storage of string + */ + +void zStr::getKeyFromIdxOffset(long ioffset, char **buf) const +{ + __u32 offset; + + if (idxfd > 0) { + idxfd->seek(ioffset, SEEK_SET); + idxfd->read(&offset, 4); + offset = swordtoarch32(offset); + getKeyFromDatOffset(offset, buf); + } +} + + +/****************************************************************************** + * zStr::findoffset - Finds the offset of the key string from the indexes + * + * ENT: key - key string to lookup + * offset - address to store the starting offset + * size - address to store the size of the entry + * away - number of entries before of after to jump + * (default = 0) + * + * RET: error status + */ + +signed char zStr::findKeyIndex(const char *ikey, long *idxoff, long away) const +{ + char *maxbuf = 0, *trybuf = 0, *key = 0, quitflag = 0; + signed char retval = 0; + __s32 headoff, tailoff, tryoff = 0, maxoff = 0; + __u32 start, size; + int diff = 0; + bool awayFromSubstrCheck = false; + + if (idxfd->getFd() >= 0) { + tailoff = maxoff = idxfd->seek(0, SEEK_END) - IDXENTRYSIZE; + if (*ikey) { + headoff = 0; + stdstr(&key, ikey, 3); + if (!caseSensitive) toupperstr_utf8(key, strlen(key)*3); + + int keylen = strlen(key); + bool substr = false; + + getKeyFromIdxOffset(maxoff, &maxbuf); + + while (headoff < tailoff) { + tryoff = (lastoff == -1) ? headoff + (((((tailoff / IDXENTRYSIZE) - (headoff / IDXENTRYSIZE))) / 2) * IDXENTRYSIZE) : lastoff; + lastoff = -1; + + getKeyFromIdxOffset(tryoff, &trybuf); + + if (!*trybuf && tryoff) { // In case of extra entry at end of idx (not first entry) + tryoff += (tryoff > (maxoff / 2))?-IDXENTRYSIZE:IDXENTRYSIZE; + retval = -1; + break; + } + + diff = strcmp(key, trybuf); + + if (!diff) + break; + + if (!strncmp(trybuf, key, keylen)) substr = true; + + if (diff < 0) + tailoff = (tryoff == headoff) ? headoff : tryoff; + else headoff = tryoff; + + if (tailoff == headoff + IDXENTRYSIZE) { + if (quitflag++) + headoff = tailoff; + } + } + + // didn't find exact match + if (headoff >= tailoff) { + tryoff = headoff; + if (!substr && ((tryoff != maxoff)||(strncmp(key, maxbuf, keylen)<0))) { + awayFromSubstrCheck = true; + away--; // if our entry doesn't startwith our key, prefer the previous entry over the next + } + } + if (trybuf) + free(trybuf); + delete [] key; + if (maxbuf) + free(maxbuf); + } + else { tryoff = 0; } + + idxfd->seek(tryoff, SEEK_SET); + + start = size = 0; + retval = (idxfd->read(&start, 4) == 4) ? retval : -1; + retval = (idxfd->read(&size, 4) == 4) ? retval : -1; + start = swordtoarch32(start); + size = swordtoarch32(size); + + if (idxoff) + *idxoff = tryoff; + + while (away) { + __u32 laststart = start; + __u32 lastsize = size; + __s32 lasttry = tryoff; + tryoff += (away > 0) ? IDXENTRYSIZE : -IDXENTRYSIZE; + + bool bad = false; + if (((long)(tryoff + (away*IDXENTRYSIZE)) < -IDXENTRYSIZE) || (tryoff + (away*IDXENTRYSIZE) > (maxoff+IDXENTRYSIZE))) + bad = true; + else if (idxfd->seek(tryoff, SEEK_SET) < 0) + bad = true; + if (bad) { + if(!awayFromSubstrCheck) + retval = -1; + start = laststart; + size = lastsize; + tryoff = lasttry; + if (idxoff) + *idxoff = tryoff; + break; + } + idxfd->read(&start, 4); + idxfd->read(&size, 4); + start = swordtoarch32(start); + size = swordtoarch32(size); + + if (idxoff) + *idxoff = tryoff; + + + if (((laststart != start) || (lastsize != size)) && (start >= 0) && (size)) + away += (away < 0) ? 1 : -1; + } + + lastoff = tryoff; + } + else { + if (idxoff) + *idxoff = 0; + retval = -1; + } + return retval; +} + + +/****************************************************************************** + * zStr::getText - gets text at a given offset + * + * ENT: + * offset - idxoffset where the key is located. + * buf - buffer to store text + * idxbuf - buffer to store index key + * NOTE: buffer will be alloc'd / realloc'd and + * should be free'd by the client + * + */ + +void zStr::getText(long offset, char **idxbuf, char **buf) const { + char *ch; + char *idxbuflocal = 0; + getKeyFromIdxOffset(offset, &idxbuflocal); + __u32 start; + __u32 size; + + do { + idxfd->seek(offset, SEEK_SET); + idxfd->read(&start, 4); + idxfd->read(&size, 4); + start = swordtoarch32(start); + size = swordtoarch32(size); + + *buf = (*buf) ? (char *)realloc(*buf, size*2 + 1) : (char *)malloc(size*2 + 1); + *idxbuf = (*idxbuf) ? (char *)realloc(*idxbuf, size*2 + 1) : (char *)malloc(size*2 + 1); + memset(*buf, 0, size + 1); + memset(*idxbuf, 0, size + 1); + datfd->seek(start, SEEK_SET); + datfd->read(*buf, (int)(size)); + + for (ch = *buf; *ch; ch++) { // skip over index string + if (*ch == 10) { + ch++; + break; + } + } + memmove(*buf, ch, size - (unsigned long)(ch-*buf)); + + // resolve link + if (!strncmp(*buf, "@LINK", 5)) { + for (ch = *buf; *ch; ch++) { // null before nl + if (*ch == 10) { + *ch = 0; + break; + } + } + findKeyIndex(*buf + 6, &offset); + } + else break; + } + while (true); // while we're resolving links + + if (idxbuflocal) { + __u32 localsize = strlen(idxbuflocal); + localsize = (localsize < (size - 1)) ? localsize : (size - 1); + strncpy(*idxbuf, idxbuflocal, localsize); + (*idxbuf)[localsize] = 0; + free(idxbuflocal); + } + __u32 block = 0; + __u32 entry = 0; + memmove(&block, *buf, sizeof(__u32)); + memmove(&entry, *buf + sizeof(__u32), sizeof(__u32)); + block = swordtoarch32(block); + entry = swordtoarch32(entry); + getCompressedText(block, entry, buf); +} + + +/****************************************************************************** + * zStr::getCompressedText - Get text entry from a compressed index / zdata + * file. + */ + +void zStr::getCompressedText(long block, long entry, char **buf) const { + + __u32 size = 0; + + if (cacheBlockIndex != block) { + __u32 start = 0; + + zdxfd->seek(block * ZDXENTRYSIZE, SEEK_SET); + zdxfd->read(&start, 4); + zdxfd->read(&size, 4); + start = swordtoarch32(start); + size = swordtoarch32(size); + + SWBuf buf; + buf.setSize(size + 5); + zdtfd->seek(start, SEEK_SET); + zdtfd->read(buf.getRawData(), size); + + flushCache(); + + unsigned long len = size; + buf.setSize(size); + rawZFilter(buf, 0); // 0 = decipher + + compressor->zBuf(&len, buf.getRawData()); + char *rawBuf = compressor->Buf(0, &len); + cacheBlock = new EntriesBlock(rawBuf, len); + cacheBlockIndex = block; + } + size = cacheBlock->getEntrySize(entry); + *buf = (*buf) ? (char *)realloc(*buf, size*2 + 1) : (char *)malloc(size*2 + 1); + strcpy(*buf, cacheBlock->getEntry(entry)); +} + + +/****************************************************************************** + * zLD::settext - Sets text for current offset + * + * ENT: key - key for this entry + * buf - buffer to store + * len - length of buffer (0 - null terminated) + */ + +void zStr::setText(const char *ikey, const char *buf, long len) { + + static const char nl[] = {13, 10}; + + __u32 start, outstart; + __u32 size, outsize; + __s32 endoff; + long idxoff = 0; + __s32 shiftSize; + char *tmpbuf = 0; + char *key = 0; + char *dbKey = 0; + char *idxBytes = 0; + char *outbuf = 0; + char *ch = 0; + + len = (len < 0) ? strlen(buf) : len; + stdstr(&key, ikey, 3); + if (!caseSensitive) toupperstr_utf8(key, strlen(key)*3); + + char notFound = findKeyIndex(ikey, &idxoff, 0); + if (!notFound) { + getKeyFromIdxOffset(idxoff, &dbKey); + int diff = strcmp(key, dbKey); + if (diff < 0) { + } + else if (diff > 0) { + idxoff += IDXENTRYSIZE; + } + else if ((!diff) && (len > 0 /*we're not deleting*/)) { // got absolute entry + do { + idxfd->seek(idxoff, SEEK_SET); + idxfd->read(&start, 4); + idxfd->read(&size, 4); + start = swordtoarch32(start); + size = swordtoarch32(size); + + tmpbuf = new char [ size + 2 ]; + memset(tmpbuf, 0, size + 2); + datfd->seek(start, SEEK_SET); + datfd->read(tmpbuf, size); + + for (ch = tmpbuf; *ch; ch++) { // skip over index string + if (*ch == 10) { + ch++; + break; + } + } + memmove(tmpbuf, ch, size - (unsigned long)(ch-tmpbuf)); + + // resolve link + if (!strncmp(tmpbuf, "@LINK", 5) && (len)) { + for (ch = tmpbuf; *ch; ch++) { // null before nl + if (*ch == 10) { + *ch = 0; + break; + } + } + findKeyIndex(tmpbuf + IDXENTRYSIZE, &idxoff); + delete [] tmpbuf; + } + else break; + } + while (true); // while we're resolving links + } + } + + endoff = idxfd->seek(0, SEEK_END); + + shiftSize = endoff - idxoff; + + if (shiftSize > 0) { + idxBytes = new char [ shiftSize ]; + idxfd->seek(idxoff, SEEK_SET); + idxfd->read(idxBytes, shiftSize); + } + + outbuf = new char [ len + strlen(key) + 5 ]; + sprintf(outbuf, "%s%c%c", key, 13, 10); + size = strlen(outbuf); + if (len > 0) { // NOT a link + if (!cacheBlock) { + flushCache(); + cacheBlock = new EntriesBlock(); + cacheBlockIndex = (zdxfd->seek(0, SEEK_END) / ZDXENTRYSIZE); + } + else if (cacheBlock->getCount() >= blockCount) { + flushCache(); + cacheBlock = new EntriesBlock(); + cacheBlockIndex = (zdxfd->seek(0, SEEK_END) / ZDXENTRYSIZE); + } + __u32 entry = cacheBlock->addEntry(buf); + cacheDirty = true; + outstart = archtosword32(cacheBlockIndex); + outsize = archtosword32(entry); + memcpy (outbuf + size, &outstart, sizeof(__u32)); + memcpy (outbuf + size + sizeof(__u32), &outsize, sizeof(__u32)); + size += (sizeof(__u32) * 2); + } + else { // link + memcpy(outbuf + size, buf, len); + size += len; + } + + start = datfd->seek(0, SEEK_END); + + outstart = archtosword32(start); + outsize = archtosword32(size); + + idxfd->seek(idxoff, SEEK_SET); + if (len > 0) { + datfd->seek(start, SEEK_SET); + datfd->write(outbuf, size); + + // add a new line to make data file easier to read in an editor + datfd->write(&nl, 2); + + idxfd->write(&outstart, 4); + idxfd->write(&outsize, 4); + if (idxBytes) { + idxfd->write(idxBytes, shiftSize); + } + } + else { // delete entry + if (idxBytes) { + idxfd->write(idxBytes+IDXENTRYSIZE, shiftSize-IDXENTRYSIZE); + idxfd->seek(-1, SEEK_CUR); // last valid byte + FileMgr::getSystemFileMgr()->trunc(idxfd); // truncate index + } + } + + if (idxBytes) + delete [] idxBytes; + delete [] key; + delete [] outbuf; + free(dbKey); +} + + +/****************************************************************************** + * zLD::linkentry - links one entry to another + * + * ENT: testmt - testament to find (0 - Bible/module introduction) + * destidxoff - dest offset into .vss + * srcidxoff - source offset into .vss + */ + +void zStr::linkEntry(const char *destkey, const char *srckey) { + char *text = new char [ strlen(destkey) + 7 ]; + sprintf(text, "@LINK %s", destkey); + setText(srckey, text); + delete [] text; +} + + +void zStr::flushCache() const { + + static const char nl[] = {13, 10}; + + if (cacheBlock) { + if (cacheDirty) { + __u32 start = 0; + unsigned long size = 0; + __u32 outstart = 0, outsize = 0; + + const char *rawBuf = cacheBlock->getRawData(&size); + compressor->Buf(rawBuf, &size); + compressor->zBuf(&size); + + SWBuf buf; + buf.setSize(size + 5); + memcpy(buf.getRawData(), compressor->zBuf(&size), size); // 1 = encipher + buf.setSize(size); + rawZFilter(buf, 1); // 1 = encipher + + long zdxSize = zdxfd->seek(0, SEEK_END); + unsigned long zdtSize = zdtfd->seek(0, SEEK_END); + + if ((cacheBlockIndex * ZDXENTRYSIZE) > (zdxSize - ZDXENTRYSIZE)) { // New Block + start = zdtSize; + } + else { + zdxfd->seek(cacheBlockIndex * ZDXENTRYSIZE, SEEK_SET); + zdxfd->read(&start, 4); + zdxfd->read(&outsize, 4); + start = swordtoarch32(start); + outsize = swordtoarch32(outsize); + if (start + outsize >= zdtSize) { // last entry, just overwrite + // start is already set + } + else if (size < outsize) { // middle entry, but smaller, that's fine and let's preserve bigger size + size = outsize; + } + else { // middle and bigger-- we have serious problems, for now let's put it at the end = lots of wasted space + start = zdtSize; + } + } + + + + outstart = archtosword32(start); + outsize = archtosword32((__u32)size); + + zdxfd->seek(cacheBlockIndex * ZDXENTRYSIZE, SEEK_SET); + zdtfd->seek(start, SEEK_SET); + zdtfd->write(buf, size); + + // add a new line to make data file easier to read in an editor + zdtfd->write(&nl, 2); + + zdxfd->write(&outstart, 4); + zdxfd->write(&outsize, 4); + } + delete cacheBlock; + cacheBlock = 0; + } + cacheBlockIndex = -1; + cacheDirty = false; +} + + +/****************************************************************************** + * zLD::CreateModule - Creates new module files + * + * ENT: path - directory to store module files + * RET: error status + */ + +signed char zStr::createModule(const char *ipath) { + char *path = 0; + char *buf = new char [ strlen (ipath) + 20 ]; + FileDesc *fd, *fd2; + + stdstr(&path, ipath); + + if ((path[strlen(path)-1] == '/') || (path[strlen(path)-1] == '\\')) + path[strlen(path)-1] = 0; + + sprintf(buf, "%s.dat", path); + FileMgr::removeFile(buf); + fd = FileMgr::getSystemFileMgr()->open(buf, FileMgr::CREAT|FileMgr::WRONLY, FileMgr::IREAD|FileMgr::IWRITE); + fd->getFd(); + FileMgr::getSystemFileMgr()->close(fd); + + sprintf(buf, "%s.idx", path); + FileMgr::removeFile(buf); + fd2 = FileMgr::getSystemFileMgr()->open(buf, FileMgr::CREAT|FileMgr::WRONLY, FileMgr::IREAD|FileMgr::IWRITE); + fd2->getFd(); + FileMgr::getSystemFileMgr()->close(fd2); + + sprintf(buf, "%s.zdt", path); + FileMgr::removeFile(buf); + fd2 = FileMgr::getSystemFileMgr()->open(buf, FileMgr::CREAT|FileMgr::WRONLY, FileMgr::IREAD|FileMgr::IWRITE); + fd2->getFd(); + FileMgr::getSystemFileMgr()->close(fd2); + + sprintf(buf, "%s.zdx", path); + FileMgr::removeFile(buf); + fd2 = FileMgr::getSystemFileMgr()->open(buf, FileMgr::CREAT|FileMgr::WRONLY, FileMgr::IREAD|FileMgr::IWRITE); + fd2->getFd(); + FileMgr::getSystemFileMgr()->close(fd2); + + delete [] path; + + return 0; +} + +SWORD_NAMESPACE_END diff --git a/src/modules/common/zverse.cpp b/src/modules/common/zverse.cpp new file mode 100644 index 0000000..c280d98 --- /dev/null +++ b/src/modules/common/zverse.cpp @@ -0,0 +1,507 @@ +/****************************************************************************** + * + * zverse.cpp - code for class 'zVerse'- a module that reads raw text + * files: ot and nt using indexs ??.bks ??.cps ??.vss + * and provides lookup and parsing functions based on + * class VerseKey for compressed modules + * + * $Id: zverse.cpp 2833 2013-06-29 06:40:28Z chrislit $ + * + * Copyright 1996-2013 CrossWire Bible Society (http://www.crosswire.org) + * CrossWire Bible Society + * P. O. Box 2528 + * Tempe, AZ 85280-2528 + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation version 2. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + */ + + + +#include <ctype.h> +#include <stdio.h> +#include <fcntl.h> +#include <errno.h> +#include <stdlib.h> + +#include <utilstr.h> +#include <versekey.h> +#include <zverse.h> +#include <sysdata.h> +#include <swbuf.h> +#include <filemgr.h> +#include <swcomprs.h> + + +SWORD_NAMESPACE_START + +/****************************************************************************** + * zVerse Statics + */ + +int zVerse::instance = 0; + +const char zVerse::uniqueIndexID[] = {'X', 'r', 'v', 'c', 'b'}; + +/****************************************************************************** + * zVerse Constructor - Initializes data for instance of zVerse + * + * ENT: ipath - path of the directory where data and index files are located. + * be sure to include the trailing separator (e.g. '/' or '\') + * (e.g. 'modules/texts/rawtext/webster/') + * fileMode - open mode for the files (FileMgr::RDONLY, etc.) + * blockType - verse, chapter, book, etc. + */ + +zVerse::zVerse(const char *ipath, int fileMode, int blockType, SWCompress *icomp) +{ + // this line, instead of just defaulting, to keep FileMgr out of header + if (fileMode == -1) fileMode = FileMgr::RDONLY; + + SWBuf buf; + + nl = '\n'; + path = 0; + cacheBufIdx = -1; + cacheTestament = 0; + cacheBuf = 0; + dirtyCache = false; + stdstr(&path, ipath); + + if ((path[strlen(path)-1] == '/') || (path[strlen(path)-1] == '\\')) + path[strlen(path)-1] = 0; + + compressor = (icomp) ? icomp : new SWCompress(); + + if (fileMode == -1) { // try read/write if possible + fileMode = FileMgr::RDWR; + } + + buf.setFormatted("%s/ot.%czs", path, uniqueIndexID[blockType]); + idxfp[0] = FileMgr::getSystemFileMgr()->open(buf, fileMode, true); + + buf.setFormatted("%s/nt.%czs", path, uniqueIndexID[blockType]); + idxfp[1] = FileMgr::getSystemFileMgr()->open(buf, fileMode, true); + + buf.setFormatted("%s/ot.%czz", path, uniqueIndexID[blockType]); + textfp[0] = FileMgr::getSystemFileMgr()->open(buf, fileMode, true); + + buf.setFormatted("%s/nt.%czz", path, uniqueIndexID[blockType]); + textfp[1] = FileMgr::getSystemFileMgr()->open(buf, fileMode, true); + + buf.setFormatted("%s/ot.%czv", path, uniqueIndexID[blockType]); + compfp[0] = FileMgr::getSystemFileMgr()->open(buf, fileMode, true); + + buf.setFormatted("%s/nt.%czv", path, uniqueIndexID[blockType]); + compfp[1] = FileMgr::getSystemFileMgr()->open(buf, fileMode, true); + + instance++; +} + + +/****************************************************************************** + * zVerse Destructor - Cleans up instance of zVerse + */ + +zVerse::~zVerse() +{ + int loop1; + + if (cacheBuf) { + flushCache(); + free(cacheBuf); + } + + if (path) + delete [] path; + + if (compressor) + delete compressor; + + --instance; + + for (loop1 = 0; loop1 < 2; loop1++) { + FileMgr::getSystemFileMgr()->close(idxfp[loop1]); + FileMgr::getSystemFileMgr()->close(textfp[loop1]); + FileMgr::getSystemFileMgr()->close(compfp[loop1]); + } +} + + +/****************************************************************************** + * zVerse::findoffset - Finds the offset of the key verse from the indexes + * + * + * + * ENT: testmt - testament to find (0 - Bible/module introduction) + * book - book to find (0 - testament introduction) + * chapter - chapter to find (0 - book introduction) + * verse - verse to find (0 - chapter introduction) + * start - address to store the starting offset + * size - address to store the size of the entry + */ + +void zVerse::findOffset(char testmt, long idxoff, long *start, unsigned short *size, unsigned long *buffnum) const +{ + __u32 ulBuffNum = 0; // buffer number + __u32 ulVerseStart = 0; // verse offset within buffer + __u16 usVerseSize = 0; // verse size + // set start to offset in + // set size to + // set + *start = *size = *buffnum = 0; + //fprintf(stderr, "Finding offset %ld\n", idxoff); + idxoff *= 10; + if (!testmt) { + testmt = ((idxfp[0]) ? 1:2); + } + + // assert we have and valid file descriptor + if (compfp[testmt-1]->getFd() < 1) + return; + + long newOffset = compfp[testmt-1]->seek(idxoff, SEEK_SET); + if (newOffset == idxoff) { + if (compfp[testmt-1]->read(&ulBuffNum, 4) != 4) { + fprintf(stderr, "Error reading ulBuffNum\n"); + return; + } + } + else return; + + if (compfp[testmt-1]->read(&ulVerseStart, 4) < 2) + { + fprintf(stderr, "Error reading ulVerseStart\n"); + return; + } + if (compfp[testmt-1]->read(&usVerseSize, 2) < 2) + { + fprintf(stderr, "Error reading usVerseSize\n"); + return; + } + + *buffnum = swordtoarch32(ulBuffNum); + *start = swordtoarch32(ulVerseStart); + *size = swordtoarch16(usVerseSize); + +} + + +/****************************************************************************** + * zVerse::zreadtext - gets text at a given offset + * + * ENT: testmt - testament file to search in (0 - Old; 1 - New) + * start - starting offset where the text is located in the file + * size - size of text entry + 1 (null) + * buf - buffer to store text + * + */ + +void zVerse::zReadText(char testmt, long start, unsigned short size, unsigned long ulBuffNum, SWBuf &inBuf) const { + __u32 ulCompOffset = 0; // compressed buffer start + __u32 ulCompSize = 0; // buffer size compressed + __u32 ulUnCompSize = 0; // buffer size uncompressed + + if (!testmt) { + testmt = ((idxfp[0]) ? 1:2); + } + + // assert we have and valid file descriptor + if (compfp[testmt-1]->getFd() < 1) + return; + + if (size && + !(((long) ulBuffNum == cacheBufIdx) && (testmt == cacheTestament) && (cacheBuf))) { + //fprintf(stderr, "Got buffer number{%ld} versestart{%ld} versesize{%d}\n", ulBuffNum, ulVerseStart, usVerseSize); + + if (idxfp[testmt-1]->seek(ulBuffNum*12, SEEK_SET)!=(long) ulBuffNum*12) + { + fprintf(stderr, "Error seeking compressed file index\n"); + return; + } + if (idxfp[testmt-1]->read(&ulCompOffset, 4)<4) + { + fprintf(stderr, "Error reading ulCompOffset\n"); + return; + } + if (idxfp[testmt-1]->read(&ulCompSize, 4)<4) + { + fprintf(stderr, "Error reading ulCompSize\n"); + return; + } + if (idxfp[testmt-1]->read(&ulUnCompSize, 4)<4) + { + fprintf(stderr, "Error reading ulUnCompSize\n"); + return; + } + + ulCompOffset = swordtoarch32(ulCompOffset); + ulCompSize = swordtoarch32(ulCompSize); + ulUnCompSize = swordtoarch32(ulUnCompSize); + + if (textfp[testmt-1]->seek(ulCompOffset, SEEK_SET)!=(long)ulCompOffset) + { + fprintf(stderr, "Error: could not seek to right place in compressed text\n"); + return; + } + SWBuf pcCompText; + pcCompText.setSize(ulCompSize+5); + + if (textfp[testmt-1]->read(pcCompText.getRawData(), ulCompSize)<(long)ulCompSize) { + fprintf(stderr, "Error reading compressed text\n"); + return; + } + pcCompText.setSize(ulCompSize); + rawZFilter(pcCompText, 0); // 0 = decipher + + unsigned long bufSize = ulCompSize; + compressor->zBuf(&bufSize, pcCompText.getRawData()); + + if (cacheBuf) { + flushCache(); + free(cacheBuf); + } + + unsigned long len = 0; + compressor->Buf(0, &len); + cacheBuf = (char *)calloc(len + 1, 1); + memcpy(cacheBuf, compressor->Buf(), len); + cacheBufSize = strlen(cacheBuf); // TODO: can we just use len? + cacheTestament = testmt; + cacheBufIdx = ulBuffNum; + } + + inBuf = ""; + if ((size > 0) && cacheBuf && ((unsigned)start < cacheBufSize)) { + inBuf.setFillByte(0); + inBuf.setSize(size+1); + strncpy(inBuf.getRawData(), &(cacheBuf[start]), size); + inBuf.setSize(strlen(inBuf.c_str())); + } +} + + +/****************************************************************************** + * zVerse::settext - Sets text for current offset + * + * ENT: testmt - testament to find (0 - Bible/module introduction) + * idxoff - offset into .vss + * buf - buffer to store + * len - length of buffer (0 - null terminated) + */ + +void zVerse::doSetText(char testmt, long idxoff, const char *buf, long len) { + + len = (len < 0) ? strlen(buf) : len; + if (!testmt) + testmt = ((idxfp[0]) ? 1:2); + if ((!dirtyCache) || (cacheBufIdx < 0)) { + cacheBufIdx = idxfp[testmt-1]->seek(0, SEEK_END) / 12; + cacheTestament = testmt; + if (cacheBuf) + free(cacheBuf); + cacheBuf = (char *)calloc(len + 1, 1); + } + else cacheBuf = (char *)((cacheBuf)?realloc(cacheBuf, strlen(cacheBuf)+(len + 1)):calloc((len + 1), 1)); + + dirtyCache = true; + + __u32 start; + __u16 size; + __u32 outBufIdx = cacheBufIdx; + + idxoff *= 10; + size = len; + + start = strlen(cacheBuf); + + if (!size) + start = outBufIdx = 0; + + outBufIdx = archtosword32(outBufIdx); + start = archtosword32(start); + size = archtosword16(size); + + compfp[testmt-1]->seek(idxoff, SEEK_SET); + compfp[testmt-1]->write(&outBufIdx, 4); + compfp[testmt-1]->write(&start, 4); + compfp[testmt-1]->write(&size, 2); + strcat(cacheBuf, buf); +} + + +void zVerse::flushCache() const { + if (dirtyCache) { + __u32 idxoff; + __u32 start, outstart; + __u32 size, outsize; + __u32 zsize, outzsize; + + idxoff = cacheBufIdx * 12; + if (cacheBuf) { + size = outsize = zsize = outzsize = strlen(cacheBuf); + if (size) { + // if (compressor) { + // delete compressor; + // compressor = new LZSSCompress(); + // } + compressor->Buf(cacheBuf); + unsigned long tmpSize; + compressor->zBuf(&tmpSize); + outzsize = zsize = tmpSize; + + SWBuf buf; + buf.setSize(zsize + 5); + memcpy(buf.getRawData(), compressor->zBuf(&tmpSize), tmpSize); + outzsize = zsize = tmpSize; + buf.setSize(zsize); + rawZFilter(buf, 1); // 1 = encipher + + start = outstart = textfp[cacheTestament-1]->seek(0, SEEK_END); + + outstart = archtosword32(start); + outsize = archtosword32(size); + outzsize = archtosword32(zsize); + + textfp[cacheTestament-1]->write(buf, zsize); + + idxfp[cacheTestament-1]->seek(idxoff, SEEK_SET); + idxfp[cacheTestament-1]->write(&outstart, 4); + idxfp[cacheTestament-1]->write(&outzsize, 4); + idxfp[cacheTestament-1]->write(&outsize, 4); + } + free(cacheBuf); + cacheBuf = 0; + } + dirtyCache = false; + } +} + +/****************************************************************************** + * RawVerse::linkentry - links one entry to another + * + * ENT: testmt - testament to find (0 - Bible/module introduction) + * destidxoff - dest offset into .vss + * srcidxoff - source offset into .vss + */ + +void zVerse::doLinkEntry(char testmt, long destidxoff, long srcidxoff) { + __s32 bufidx; + __s32 start; + __u16 size; + + destidxoff *= 10; + srcidxoff *= 10; + + if (!testmt) + testmt = ((idxfp[1]) ? 1:2); + + // get source + compfp[testmt-1]->seek(srcidxoff, SEEK_SET); + compfp[testmt-1]->read(&bufidx, 4); + compfp[testmt-1]->read(&start, 4); + compfp[testmt-1]->read(&size, 2); + + // write dest + compfp[testmt-1]->seek(destidxoff, SEEK_SET); + compfp[testmt-1]->write(&bufidx, 4); + compfp[testmt-1]->write(&start, 4); + compfp[testmt-1]->write(&size, 2); +} + + +/****************************************************************************** + * RawVerse::CreateModule - Creates new module files + * + * ENT: path - directory to store module files + * RET: error status + */ + +char zVerse::createModule(const char *ipath, int blockBound, const char *v11n) +{ + char *path = 0; + char *buf = new char [ strlen (ipath) + 20 ]; + FileDesc *fd, *fd2; + + stdstr(&path, ipath); + + if ((path[strlen(path)-1] == '/') || (path[strlen(path)-1] == '\\')) + path[strlen(path)-1] = 0; + + sprintf(buf, "%s/ot.%czs", path, uniqueIndexID[blockBound]); + FileMgr::removeFile(buf); + fd = FileMgr::getSystemFileMgr()->open(buf, FileMgr::CREAT|FileMgr::WRONLY, FileMgr::IREAD|FileMgr::IWRITE); + fd->getFd(); + FileMgr::getSystemFileMgr()->close(fd); + + sprintf(buf, "%s/nt.%czs", path, uniqueIndexID[blockBound]); + FileMgr::removeFile(buf); + fd = FileMgr::getSystemFileMgr()->open(buf, FileMgr::CREAT|FileMgr::WRONLY, FileMgr::IREAD|FileMgr::IWRITE); + fd->getFd(); + FileMgr::getSystemFileMgr()->close(fd); + + sprintf(buf, "%s/ot.%czz", path, uniqueIndexID[blockBound]); + FileMgr::removeFile(buf); + fd = FileMgr::getSystemFileMgr()->open(buf, FileMgr::CREAT|FileMgr::WRONLY, FileMgr::IREAD|FileMgr::IWRITE); + fd->getFd(); + FileMgr::getSystemFileMgr()->close(fd); + + sprintf(buf, "%s/nt.%czz", path, uniqueIndexID[blockBound]); + FileMgr::removeFile(buf); + fd2 = FileMgr::getSystemFileMgr()->open(buf, FileMgr::CREAT|FileMgr::WRONLY, FileMgr::IREAD|FileMgr::IWRITE); + fd2->getFd(); + FileMgr::getSystemFileMgr()->close(fd); + + sprintf(buf, "%s/ot.%czv", path, uniqueIndexID[blockBound]); + FileMgr::removeFile(buf); + fd = FileMgr::getSystemFileMgr()->open(buf, FileMgr::CREAT|FileMgr::WRONLY, FileMgr::IREAD|FileMgr::IWRITE); + fd->getFd(); + + sprintf(buf, "%s/nt.%czv", path, uniqueIndexID[blockBound]); + FileMgr::removeFile(buf); + fd2 = FileMgr::getSystemFileMgr()->open(buf, FileMgr::CREAT|FileMgr::WRONLY, FileMgr::IREAD|FileMgr::IWRITE); + fd2->getFd(); + + VerseKey vk; + vk.setVersificationSystem(v11n); + vk.setIntros(true); + + __s32 offset = 0; + __s16 size = 0; + offset = archtosword32(offset); + size = archtosword16(size); + + for (vk = TOP; !vk.popError(); vk++) { + if (vk.getTestament() < 2) { + fd->write(&offset, 4); //compBufIdxOffset + fd->write(&offset, 4); + fd->write(&size, 2); + } + else { + fd2->write(&offset, 4); //compBufIdxOffset + fd2->write(&offset, 4); + fd2->write(&size, 2); + } + } + fd2->write(&offset, 4); //compBufIdxOffset + fd2->write(&offset, 4); + fd2->write(&size, 2); + + FileMgr::getSystemFileMgr()->close(fd); + FileMgr::getSystemFileMgr()->close(fd2); + + delete [] path; + delete [] buf; + + return 0; +} + + +SWORD_NAMESPACE_END |