summaryrefslogtreecommitdiff
path: root/src/modules/common/lzsscomprs.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'src/modules/common/lzsscomprs.cpp')
-rw-r--r--src/modules/common/lzsscomprs.cpp668
1 files changed, 0 insertions, 668 deletions
diff --git a/src/modules/common/lzsscomprs.cpp b/src/modules/common/lzsscomprs.cpp
deleted file mode 100644
index bd8f768..0000000
--- a/src/modules/common/lzsscomprs.cpp
+++ /dev/null
@@ -1,668 +0,0 @@
-/******************************************************************************
- * lzsscomprs.cpp - code for class 'LZSSCompress'- a driver class that
- * provides LZSS compression
- */
-
-#include <stdlib.h>
-#include <string.h>
-#include <lzsscomprs.h>
-
-SWORD_NAMESPACE_START
-
-/******************************************************************************
- * LZSSCompress Statics
- */
-
-// m_ring_buffer is a text buffer. It contains "nodes" of
-// uncompressed text that can be indexed by position. That is,
-// a substring of the ring buffer can be indexed by a position
-// and a length. When decoding, the compressed text may contain
-// a position in the ring buffer and a count of the number of
-// bytes from the ring buffer that are to be moved into the
-// uncompressed buffer.
-//
-// This ring buffer is not maintained as part of the compressed
-// text. Instead, it is reconstructed dynamically. That is,
-// it starts out empty and gets built as the text is decompressed.
-//
-// The ring buffer contain N bytes, with an additional F - 1 bytes
-// to facilitate string comparison.
-
-unsigned char LZSSCompress::m_ring_buffer[N + F - 1];
-
-// m_match_position and m_match_length are set by InsertNode().
-//
-// These variables indicate the position in the ring buffer
-// and the number of characters at that position that match
-// a given string.
-
-short int LZSSCompress::m_match_position;
-short int LZSSCompress::m_match_length;
-
-// m_lson, m_rson, and m_dad are the Japanese way of referring to
-// a tree structure. The dad is the parent and it has a right and
-// left son (child).
-//
-// For i = 0 to N-1, m_rson[i] and m_lson[i] will be the right
-// and left children of node i.
-//
-// For i = 0 to N-1, m_dad[i] is the parent of node i.
-//
-// For i = 0 to 255, rson[N + i + 1] is the root of the tree for
-// strings that begin with the character i. Note that this requires
-// one byte characters.
-//
-// These nodes store values of 0...(N-1). Memory requirements
-// can be reduces by using 2-byte integers instead of full 4-byte
-// integers (for 32-bit applications). Therefore, these are
-// defined as "short ints."
-
-short int LZSSCompress::m_lson[N + 1];
-short int LZSSCompress::m_rson[N + 257];
-short int LZSSCompress::m_dad[N + 1];
-
-
-/******************************************************************************
- * LZSSCompress Constructor - Initializes data for instance of LZSSCompress
- *
- */
-
-LZSSCompress::LZSSCompress() : SWCompress() {
-}
-
-
-/******************************************************************************
- * LZSSCompress Destructor - Cleans up instance of LZSSCompress
- */
-
-LZSSCompress::~LZSSCompress() {
-}
-
-
-/******************************************************************************
- * LZSSCompress::InitTree - This function initializes the tree nodes to
- * "empty" states.
- */
-
-void LZSSCompress::InitTree(void) {
- int i;
-
- // For i = 0 to N - 1, m_rson[i] and m_lson[i] will be the right
- // and left children of node i. These nodes need not be
- // initialized. However, for debugging purposes, it is nice to
- // have them initialized. Since this is only used for compression
- // (not decompression), I don't mind spending the time to do it.
- //
- // For the same range of i, m_dad[i] is the parent of node i.
- // These are initialized to a known value that can represent
- // a "not used" state.
-
- for (i = 0; i < N; i++) {
- m_lson[i] = NOT_USED;
- m_rson[i] = NOT_USED;
- m_dad[i] = NOT_USED;
- }
-
- // For i = 0 to 255, m_rson[N + i + 1] is the root of the tree
- // for strings that begin with the character i. This is why
- // the right child array is larger than the left child array.
- // These are also initialzied to a "not used" state.
- //
- // Note that there are 256 of these, one for each of the possible
- // 256 characters.
-
- for (i = N + 1; i <= (N + 256); i++) {
- m_rson[i] = NOT_USED;
- }
-}
-
-
-/******************************************************************************
- * LZSSCompress::InsertNode - This function inserts a string from the ring
- * buffer into one of the trees. It loads the
- * match position and length member variables
- * for the longest match.
- *
- * The string to be inserted is identified by
- * the parameter Pos, A full F bytes are
- * inserted. So,
- * m_ring_buffer[Pos ... Pos+F-1]
- * are inserted.
- *
- * If the matched length is exactly F, then an
- * old node is removed in favor of the new one
- * (because the old one will be deleted
- * sooner).
- *
- * Note that Pos plays a dual role. It is
- * used as both a position in the ring buffer
- * and also as a tree node.
- * m_ring_buffer[Pos] defines a character that
- * is used to identify a tree node.
- *
- * ENT: pos - position in the buffer
- */
-
-void LZSSCompress::InsertNode(short int Pos)
-{
- short int i;
- short int p;
- int cmp;
- unsigned char * key;
-
-/*
- ASSERT(Pos >= 0);
- ASSERT(Pos < N);
-*/
-
- cmp = 1;
- key = &(m_ring_buffer[Pos]);
-
- // The last 256 entries in m_rson contain the root nodes for
- // strings that begin with a letter. Get an index for the
- // first letter in this string.
-
- p = (short int) (N + 1 + key[0]);
-
- // Set the left and right tree nodes for this position to "not
- // used."
-
- m_lson[Pos] = NOT_USED;
- m_rson[Pos] = NOT_USED;
-
- // Haven't matched anything yet.
-
- m_match_length = 0;
-
- for ( ; ; ) {
- if (cmp >= 0) {
- if (m_rson[p] != NOT_USED) {
- p = m_rson[p];
- }
- else {
- m_rson[p] = Pos;
- m_dad[Pos] = p;
- return;
- }
- }
- else {
- if (m_lson[p] != NOT_USED) {
- p = m_lson[p];
- }
- else {
- m_lson[p] = Pos;
- m_dad[Pos] = p;
- return;
- }
- }
-
- // Should we go to the right or the left to look for the
- // next match?
-
- for (i = 1; i < F; i++) {
- cmp = key[i] - m_ring_buffer[p + i];
- if (cmp != 0)
- break;
- }
-
- if (i > m_match_length) {
- m_match_position = p;
- m_match_length = i;
-
- if (i >= F)
- break;
- }
- }
-
- m_dad[Pos] = m_dad[p];
- m_lson[Pos] = m_lson[p];
- m_rson[Pos] = m_rson[p];
-
- m_dad[ m_lson[p] ] = Pos;
- m_dad[ m_rson[p] ] = Pos;
-
- if (m_rson[ m_dad[p] ] == p) {
- m_rson[ m_dad[p] ] = Pos;
- }
- else {
- m_lson[ m_dad[p] ] = Pos;
- }
-
- // Remove "p"
-
- m_dad[p] = NOT_USED;
-}
-
-
-/******************************************************************************
- * LZSSCompress::DeleteNode - This function removes the node "Node" from the
- * tree.
- *
- * ENT: node - node to be removed
- */
-
-void LZSSCompress::DeleteNode(short int Node)
-{
- short int q;
-
-/*
- ASSERT(Node >= 0);
- ASSERT(Node < (N+1));
-*/
-
- if (m_dad[Node] == NOT_USED) { // not in tree, nothing to do
- return;
- }
-
- if (m_rson[Node] == NOT_USED) {
- q = m_lson[Node];
- }
- else if (m_lson[Node] == NOT_USED) {
- q = m_rson[Node];
- }
- else {
- q = m_lson[Node];
- if (m_rson[q] != NOT_USED) {
- do {
- q = m_rson[q];
- } while (m_rson[q] != NOT_USED);
-
- m_rson[ m_dad[q] ] = m_lson[q];
- m_dad[ m_lson[q] ] = m_dad[q];
- m_lson[q] = m_lson[Node];
- m_dad[ m_lson[Node] ] = q;
- }
-
- m_rson[q] = m_rson[Node];
- m_dad[ m_rson[Node] ] = q;
- }
-
- m_dad[q] = m_dad[Node];
-
- if (m_rson[ m_dad[Node] ] == Node) {
- m_rson[ m_dad[Node] ] = q;
- }
- else {
- m_lson[ m_dad[Node] ] = q;
- }
-
- m_dad[Node] = NOT_USED;
-}
-
-
-/******************************************************************************
- * LZSSCompress::Encode - This function "encodes" the input stream into the
- * output stream.
- * The GetChars() and SendChars() functions are
- * used to separate this method from the actual
- * i/o.
- * NOTE: must set zlen for parent class to know length of
- * compressed buffer.
- */
-
-void LZSSCompress::Encode(void)
-{
- short int i; // an iterator
- short int r; // node number in the binary tree
- short int s; // position in the ring buffer
- unsigned short int len; // len of initial string
- short int last_match_length; // length of last match
- short int code_buf_pos; // position in the output buffer
- unsigned char code_buf[17]; // the output buffer
- unsigned char mask; // bit mask for byte 0 of out buf
- unsigned char c; // character read from string
-
- // Start with a clean tree.
-
- InitTree();
- direct = 0; // set direction needed by parent [Get|Send]Chars()
-
- // code_buf[0] works as eight flags. A "1" represents that the
- // unit is an unencoded letter (1 byte), and a "0" represents
- // that the next unit is a <position,length> pair (2 bytes).
- //
- // code_buf[1..16] stores eight units of code. Since the best
- // we can do is store eight <position,length> pairs, at most 16
- // bytes are needed to store this.
- //
- // This is why the maximum size of the code buffer is 17 bytes.
-
- code_buf[0] = 0;
- code_buf_pos = 1;
-
- // Mask iterates over the 8 bits in the code buffer. The first
- // character ends up being stored in the low bit.
- //
- // bit 8 7 6 5 4 3 2 1
- // | |
- // | first sequence in code buffer
- // |
- // last sequence in code buffer
-
- mask = 1;
-
- s = 0;
- r = (short int) N - (short int) F;
-
- // Initialize the ring buffer with spaces...
-
- // Note that the last F bytes of the ring buffer are not filled.
- // This is because those F bytes will be filled in immediately
- // with bytes from the input stream.
-
- memset(m_ring_buffer, ' ', N - F);
-
- // Read F bytes into the last F bytes of the ring buffer.
- //
- // This function loads the buffer with X characters and returns
- // the actual amount loaded.
-
- len = GetChars((char *) &(m_ring_buffer[r]), F);
-
- // Make sure there is something to be compressed.
-
- if (len == 0)
- return;
-
- // Insert the F strings, each of which begins with one or more
- // 'space' characters. Note the order in which these strings
- // are inserted. This way, degenerate trees will be less likely
- // to occur.
-
- for (i = 1; i <= F; i++) {
- InsertNode((short int) (r - i));
- }
-
- // Finally, insert the whole string just read. The
- // member variables match_length and match_position are set.
-
- InsertNode(r);
-
- // Now that we're preloaded, continue till done.
-
- do {
-
- // m_match_length may be spuriously long near the end of
- // text.
-
- if (m_match_length > len) {
- m_match_length = len;
- }
-
- // Is it cheaper to store this as a single character? If so,
- // make it so.
-
- if (m_match_length < THRESHOLD) {
- // Send one character. Remember that code_buf[0] is the
- // set of flags for the next eight items.
-
- m_match_length = 1;
- code_buf[0] |= mask;
- code_buf[code_buf_pos++] = m_ring_buffer[r];
- }
-
- // Otherwise, we do indeed have a string that can be stored
- // compressed to save space.
-
- else {
- // The next 16 bits need to contain the position (12 bits)
- // and the length (4 bits).
-
- code_buf[code_buf_pos++] = (unsigned char) m_match_position;
- code_buf[code_buf_pos++] = (unsigned char) (
- ((m_match_position >> 4) & 0xf0) |
- (m_match_length - THRESHOLD) );
- }
-
- // Shift the mask one bit to the left so that it will be ready
- // to store the new bit.
-
- mask = (unsigned char) (mask << 1);
-
- // If the mask is now 0, then we know that we have a full set
- // of flags and items in the code buffer. These need to be
- // output.
-
- if (!mask) {
- // code_buf is the buffer of characters to be output.
- // code_buf_pos is the number of characters it contains.
-
- SendChars((char *) code_buf, code_buf_pos);
-
- // Reset for next buffer...
-
- code_buf[0] = 0;
- code_buf_pos = 1;
- mask = 1;
- }
-
- last_match_length = m_match_length;
-
- // Delete old strings and read new bytes...
-
- for (i = 0; i < last_match_length; i++) {
- // Get next character...
-
- if (GetChars((char *) &c, 1) != 1)
- break;
-
- // Delete "old strings"
-
- DeleteNode(s);
-
- // Put this character into the ring buffer.
- //
- // The original comment here says "If the position is near
- // the end of the buffer, extend the buffer to make
- // string comparison easier."
- //
- // That's a little misleading, because the "end" of the
- // buffer is really what we consider to be the "beginning"
- // of the buffer, that is, positions 0 through F.
- //
- // The idea is that the front end of the buffer is duplicated
- // into the back end so that when you're looking at characters
- // at the back end of the buffer, you can index ahead (beyond
- // the normal end of the buffer) and see the characters
- // that are at the front end of the buffer wihtout having
- // to adjust the index.
- //
- // That is...
- //
- // 1234xxxxxxxxxxxxxxxxxxxxxxxxxxxxx1234
- // | | |
- // position 0 end of buffer |
- // |
- // duplicate of front of buffer
-
- m_ring_buffer[s] = c;
-
- if (s < F - 1) {
- m_ring_buffer[s + N] = c;
- }
-
- // Increment the position, and wrap around when we're at
- // the end. Note that this relies on N being a power of 2.
-
- s = (short int) ( (s + 1) & (N - 1) );
- r = (short int) ( (r + 1) & (N - 1) );
-
- // Register the string that is found in
- // m_ring_buffer[r..r+F-1].
-
- InsertNode(r);
- }
-
- // If we didn't quit because we hit the last_match_length,
- // then we must have quit because we ran out of characters
- // to process.
-
- while (i++ < last_match_length) {
- DeleteNode(s);
-
- s = (short int) ( (s + 1) & (N - 1) );
- r = (short int) ( (r + 1) & (N - 1) );
-
- // Note that len hitting 0 is the key that causes the
- // do...while() to terminate. This is the only place
- // within the loop that len is modified.
- //
- // Its original value is F (or a number less than F for
- // short strings).
-
- if (--len) {
- InsertNode(r); /* buffer may not be empty. */
- }
- }
-
- // End of do...while() loop. Continue processing until there
- // are no more characters to be compressed. The variable
- // "len" is used to signal this condition.
- } while (len > 0);
-
- // There could still be something in the output buffer. Send it
- // now.
-
- if (code_buf_pos > 1) {
- // code_buf is the encoded string to send.
- // code_buf_ptr is the number of characters.
-
- SendChars((char *) code_buf, code_buf_pos);
- }
-
-
- // must set zlen for parent class to know length of compressed buffer
- zlen = zpos;
-}
-
-
-/******************************************************************************
- * LZSSCompress::Decode - This function "decodes" the input stream into the
- * output stream.
- * The GetChars() and SendChars() functions are
- * used to separate this method from the actual
- * i/o.
- */
-
-void LZSSCompress::Decode(void)
-{
- int k;
- int r; // node number
- unsigned char c[F]; // an array of chars
- unsigned char flags; // 8 bits of flags
- int flag_count; // which flag we're on
- short int pos; // position in the ring buffer
- short int len; // number of chars in ring buffer
- unsigned long totalLen = 0;
-
- direct = 1; // set direction needed by parent [Get|Send]Chars()
-
- // Initialize the ring buffer with a common string.
- //
- // Note that the last F bytes of the ring buffer are not filled.
-
- memset(m_ring_buffer, ' ', N - F);
-
- r = N - F;
-
- flags = (char) 0;
- flag_count = 0;
-
- for ( ; ; ) {
-
- // If there are more bits of interest in this flag, then
- // shift that next interesting bit into the 1's position.
- //
- // If this flag has been exhausted, the next byte must
- // be a flag.
-
- if (flag_count > 0) {
- flags = (unsigned char) (flags >> 1);
- flag_count--;
- }
- else {
- // Next byte must be a flag.
-
- if (GetChars((char *) &flags, 1) != 1)
- break;
-
- // Set the flag counter. While at first it might appear
- // that this should be an 8 since there are 8 bits in the
- // flag, it should really be a 7 because the shift must
- // be performed 7 times in order to see all 8 bits.
-
- flag_count = 7;
- }
-
- // If the low order bit of the flag is now set, then we know
- // that the next byte is a single, unencoded character.
-
- if (flags & 1) {
- if (GetChars((char *) c, 1) != 1)
- break;
-
- if (SendChars((char *) c, 1) != 1) {
- totalLen++;
- break;
- }
-
- // Add to buffer, and increment to next spot. Wrap at end.
-
- m_ring_buffer[r] = c[0];
- r = (short int) ( (r + 1) & (N - 1) );
- }
-
- // Otherwise, we know that the next two bytes are a
- // <position,length> pair. The position is in 12 bits and
- // the length is in 4 bits.
-
- else {
- // Original code:
- // if ((i = getc(infile)) == EOF)
- // break;
- // if ((j = getc(infile)) == EOF)
- // break;
- // i |= ((j & 0xf0) << 4);
- // j = (j & 0x0f) + THRESHOLD;
- //
- // I've modified this to only make one input call, and
- // have changed the variable names to something more
- // obvious.
-
- if (GetChars((char *) c, 2) != 2)
- break;
-
- // Convert these two characters into the position and
- // length. Note that the length is always at least
- // THRESHOLD, which is why we're able to get a length
- // of 18 out of only 4 bits.
-
- pos = (short int) ( c[0] | ((c[1] & 0xf0) << 4) );
-
- len = (short int) ( (c[1] & 0x0f) + THRESHOLD );
-
- // There are now "len" characters at position "pos" in
- // the ring buffer that can be pulled out. Note that
- // len is never more than F.
-
- for (k = 0; k < len; k++) {
- c[k] = m_ring_buffer[(pos + k) & (N - 1)];
-
- // Add to buffer, and increment to next spot. Wrap at end.
-
- m_ring_buffer[r] = c[k];
- r = (short int) ( (r + 1) & (N - 1) );
- }
-
- // Add the "len" :characters to the output stream.
-
- if (SendChars((char *) c, len) != (unsigned int)len) {
- totalLen += len;
- break;
- }
- }
- }
- slen = totalLen;
-}
-
-SWORD_NAMESPACE_END